#!/usr/bin/python
# permalink: http://www.geekculture.com/joyoftech/joyarchives/711b.html
# image url: http://www.geekculture.com/joyoftech/joyimages/711b.???
import urllib, re
from xml.dom import minidom
from time import time, gmtime, strftime
IMGDB = 'jotrss-db.txt'
def GetImageFilenameFromPage (url):
opener = urllib.urlopen(url)
content = opener.read()
imgurlPattern = re.compile(r'
'':
imgFilenames[url] = fname.rstrip('\r\n')
imgdb.close()
except IOError, ValueError:
imgFilenames = tmpHash
def AppendImageFilename (url, fname):
imgdb = open(IMGDB, 'a')
imgdb.write('%s,%s\n' % (url, fname))
imgdb.close()
def GetImageFilename (url, imgFilenames):
if imgFilenames.has_key(url):
return imgFilenames[url]
else:
fname = GetImageFilenameFromPage(url)
AppendImageFilename(url, fname)
imgFilenames[url] = fname
return fname
def GetImageData ():
fnameList = []
pubdateList = []
imgFilenames = {}
LoadImageFilenames(imgFilenames)
rssUrl = urllib.urlopen('http://feeds.feedburner.com/jotrepub')
rss = minidom.parse(rssUrl)
items = rss.getElementsByTagName('item')
for item in items:
permalinkUrl = item.getElementsByTagName('link')[0].childNodes[0].toxml()
pubDate = item.getElementsByTagName('pubDate')[0].childNodes[0].toxml()
# The images can be either gifs or jpegs, it's impossible
# to tell without actually loading the page.
fname = GetImageFilename(permalinkUrl, imgFilenames)
if fname == 'none':
pass # we couldn't find an image at this location
else:
fnameList.append(fname)
pubdateList.append(pubDate)
return fnameList, pubdateList
def RssHeader ():
print """
The Joy of Tech
http://www.geekculture.com/joyoftech/index.html
The Joy of Tech Comic Strip
"""
def RssBody (imageDataList):
fnameList, pubdateList = imageDataList
for imgdata in zip(fnameList, pubdateList):
print """
-
Issue %(issue)s
]]>
http://www.geekculture.com/joyoftech/joyarchives/%(issue)s.html
%(pubdate)s
""" % { 'issue': imgdata[0].split('.')[0], 'filename': imgdata[0], 'pubdate': imgdata[1] }
def RssFooter ():
print ""
def ExpiryTime ():
# content expires 12 hours from now
return strftime("%a, %d %b %Y %H:%M:%S UTC", gmtime(time() + 43200))
def Main ():
print "Content-Type: application/rss+xml"
print "Expires: " + ExpiryTime() + "\n"
RssHeader()
RssBody(GetImageData())
RssFooter()
if __name__ == '__main__':
Main()