#!/usr/bin/python # permalink: http://www.geekculture.com/joyoftech/joyarchives/711b.html # image url: http://www.geekculture.com/joyoftech/joyimages/711b.??? import urllib, re from xml.dom import minidom from time import time, gmtime, strftime IMGDB = 'jotrss-db.txt' def GetImageFilenameFromPage (url): opener = urllib.urlopen(url) content = opener.read() imgurlPattern = re.compile(r' '': imgFilenames[url] = fname.rstrip('\r\n') imgdb.close() except IOError, ValueError: imgFilenames = tmpHash def AppendImageFilename (url, fname): imgdb = open(IMGDB, 'a') imgdb.write('%s,%s\n' % (url, fname)) imgdb.close() def GetImageFilename (url, imgFilenames): if imgFilenames.has_key(url): return imgFilenames[url] else: fname = GetImageFilenameFromPage(url) AppendImageFilename(url, fname) imgFilenames[url] = fname return fname def GetImageData (): fnameList = [] pubdateList = [] imgFilenames = {} LoadImageFilenames(imgFilenames) rssUrl = urllib.urlopen('http://feeds.feedburner.com/jotrepub') rss = minidom.parse(rssUrl) items = rss.getElementsByTagName('item') for item in items: permalinkUrl = item.getElementsByTagName('link')[0].childNodes[0].toxml() pubDate = item.getElementsByTagName('pubDate')[0].childNodes[0].toxml() # The images can be either gifs or jpegs, it's impossible # to tell without actually loading the page. fname = GetImageFilename(permalinkUrl, imgFilenames) if fname == 'none': pass # we couldn't find an image at this location else: fnameList.append(fname) pubdateList.append(pubDate) return fnameList, pubdateList def RssHeader (): print """ The Joy of Tech http://www.geekculture.com/joyoftech/index.html The Joy of Tech Comic Strip """ def RssBody (imageDataList): fnameList, pubdateList = imageDataList for imgdata in zip(fnameList, pubdateList): print """ Issue %(issue)s ]]> http://www.geekculture.com/joyoftech/joyarchives/%(issue)s.html %(pubdate)s """ % { 'issue': imgdata[0].split('.')[0], 'filename': imgdata[0], 'pubdate': imgdata[1] } def RssFooter (): print "" def ExpiryTime (): # content expires 12 hours from now return strftime("%a, %d %b %Y %H:%M:%S UTC", gmtime(time() + 43200)) def Main (): print "Content-Type: application/rss+xml" print "Expires: " + ExpiryTime() + "\n" RssHeader() RssBody(GetImageData()) RssFooter() if __name__ == '__main__': Main()