import re import urllib2 import json def parseList(filePath): f = open(filePath) urlList = [] for l in f: if l.startswith("').findall(l) url = "http://archive.org/details/" + id[0] urlList.append({'id': id, 'url': url}) f.close() return urlList def getOgvPath(page): filename = re.compile(r'IAD\.ogv\ \=\ "(.*?)"').findall(page) server = re.compile(r'IAD.meta\ \=\ \{"server"\:\"(.*?)"').findall(page) itemno = re.compile(r'IAD.thumbs\ \=\ \["\/([0-9])\/').findall(page) path = "http://%s/%s/items/%s" % (server[0], itemno[0], filename[0],) return path def do(filePath): urls = parseList(filePath) ogvs = [] for u in urls: try: page = urllib2.urlopen(u['url']).read() ogvPath = getOgvPath(page) id = u['id'] ogvs.append({'id': id, 'path': ogvPath}) except: print u['url'] return ogvs