50 lines
1.2 KiB
Python
50 lines
1.2 KiB
Python
import urllib2
|
|
import re
|
|
from texts.models import *
|
|
|
|
NEXT = "http://next.pad.ma/"
|
|
OLD = "http://pad.ma/"
|
|
IdMapping = {}
|
|
regex = re.compile(r'(http:\/\/.*?pad.ma\/(V.*?))[\s\"\<\n]')
|
|
regex2 = re.compile(r'(http:\/\/.*?pad.ma\/(V.*?))$')
|
|
def getNewUrl(oldUrl):
|
|
new = re.sub(regex2, lambda pat: NEXT + pat.groups()[1], oldUrl)
|
|
# print new
|
|
# new = oldUrl.replace(OLD, NEXT)
|
|
try:
|
|
return urllib2.urlopen(new).url
|
|
except:
|
|
print "ERROR: " + oldUrl
|
|
return None
|
|
|
|
def getNewId(oldId):
|
|
oldId = oldId.group().replace(OLD, "")[:-1]
|
|
print oldId
|
|
if oldId in IdMapping:
|
|
return IdMapping[oldId]
|
|
url = NEXT + oldId
|
|
u = urllib2.urlopen(url)
|
|
newId = u.url.replace(NEXT, "")
|
|
IdMapping[oldId] = newId
|
|
return newId
|
|
|
|
def replaceUrls(text):
|
|
|
|
for f in re.findall(regex, text):
|
|
newUrl = getNewUrl(f[0])
|
|
if newUrl is None:
|
|
continue
|
|
# print newUrl
|
|
text = text.replace(f[0], newUrl)
|
|
return text
|
|
# for m in re.findall(regex, text):
|
|
# print getNewId(m)
|
|
|
|
def doAll():
|
|
for p in PadmaText.objects.all():
|
|
print p.title
|
|
newText = replaceUrls(p.html)
|
|
p.html = newText
|
|
p.save()
|
|
|