import urllib2 import re from texts.models import * NEXT = "http://next.pad.ma/" OLD = "http://pad.ma/" IdMapping = {} def getNewUrl(oldUrl): new = oldUrl.replace(OLD, NEXT) try: return urllib2.urlopen(new).url except: print "ERROR: " + oldUrl def getNewId(oldId): oldId = oldId.group().replace(OLD, "")[:-1] print oldId if oldId in IdMapping: return IdMapping[oldId] url = NEXT + oldId u = urllib2.urlopen(url) newId = u.url.replace(NEXT, "") IdMapping[oldId] = newId return newId def replaceUrls(text): regex = re.compile(r'(http:\/\/pad.ma\/V.*?)[\s\"\<]') for f in re.findall(regex, text): newUrl = getNewUrl(f) print newUrl text = text.replace(f, newUrl) return text # for m in re.findall(regex, text): # print getNewId(m) def doAll(): for p in PadmaText.objects.all(): print p.title newText = replaceUrls(p.html) p.html = newText p.save()