padmatexts/padmaTexts/migrations/migrate.py

50 lines
1.2 KiB
Python

import urllib2
import re
from texts.models import *
NEXT = "http://next.pad.ma/"
OLD = "http://pad.ma/"
IdMapping = {}
regex = re.compile(r'(http:\/\/.*?pad.ma\/(V.*?))[\s\"\<\n]')
regex2 = re.compile(r'(http:\/\/.*?pad.ma\/(V.*?))$')
def getNewUrl(oldUrl):
new = re.sub(regex2, lambda pat: NEXT + pat.groups()[1], oldUrl)
# print new
# new = oldUrl.replace(OLD, NEXT)
try:
return urllib2.urlopen(new).url
except:
print "ERROR: " + oldUrl
return None
def getNewId(oldId):
oldId = oldId.group().replace(OLD, "")[:-1]
print oldId
if oldId in IdMapping:
return IdMapping[oldId]
url = NEXT + oldId
u = urllib2.urlopen(url)
newId = u.url.replace(NEXT, "")
IdMapping[oldId] = newId
return newId
def replaceUrls(text):
for f in re.findall(regex, text):
newUrl = getNewUrl(f[0])
if newUrl is None:
continue
# print newUrl
text = text.replace(f[0], newUrl)
return text
# for m in re.findall(regex, text):
# print getNewId(m)
def doAll():
for p in PadmaText.objects.all():
print p.title
newText = replaceUrls(p.html)
p.html = newText
p.save()