manifests/manifests/ships/convert.py
2012-05-26 02:01:04 +05:30

284 lines
7.4 KiB
Python
Executable file

import codecs
from glob import glob
import sys
import os
import csv
import json
from models import Ship, Good, Manifest, Translation
import re
def toCsv():
manifests = glob('*.txt')
for m in manifests:
f = codecs.open(m, "r", encoding="windows-1256")
txt = unicode(f.read())
f.close()
outFileName = m.replace("txt", "csv")
print outFileName
w = codecs.open(outFileName, "w", "utf-8")
w.write(txt)
w.close()
print "done"
def toRaw():
csvs = glob("*.csv")
for c in csvs:
outFile = c.replace("csv", "html")
print outFile
f = codecs.open(outFile, "w", "utf-8")
r = csv.reader(open(c))
f.write("<table>")
# print "<table>"
for row in r:
f.write("<tr>")
for col in row:
f.write("<td>" + col.decode("utf-8") + "</td>")
f.write("</tr>")
f.write("</table>")
f.close()
print "done"
def toHtml():
htmls = glob("raw/*.html")
for h in htmls:
outFile = h.replace("raw/", "html/")
print outFile
w = codecs.open(outFile, "w", "utf-8")
header = open("header.html").read()
footer = open("footer.html").read()
f = codecs.open(h, "r", encoding="utf-8").read()
w.write(header + f + footer)
w.close()
print "done"
def isShip(row):
try:
# if str(int(row[0])) == row[0]:
# return True
if row[0].strip().lower() == 'import' or row[0].strip().lower() == 'rexport':
return True
else:
return False
except:
return False
def csvToJSON(prefix):
csvs = glob(prefix + "*.csv")
d = []
for c in csvs:
filename = c
date = filename.replace(prefix, "")
data = {
'filename': filename,
'date': date,
'ships': []
}
r = csv.reader(open(c))
rows = []
for row in r:
rows.append(row)
i = 0
ships = []
while i < len(rows):
thisRow = rows[i]
if isShip(thisRow): #is a ship
print thisRow[0]
thisShip = {
'row': thisRow,
'goods': []
}
i += 1
if i == len(rows):
break
currRow = rows[i]
while isShip(currRow) == False:
goodRow = currRow
thisShip['goods'].append(goodRow)
i += 1
if i == len(rows):
break
currRow = rows[i]
ships.append(thisShip)
data['ships'] = ships
d.append(data)
outFile = prefix + "Data.json"
f = codecs.open(outFile, "w", "utf-8")
f.write(json.dumps(d, indent=2))
f.close()
return
def addType(inFile, outFile):
inData = json.loads(open(inFile).read())
outFile = codecs.open(outFile, "w", "utf-8")
for fil in inData:
for ship in fil['ships']:
ship['row'].insert(0, "Export")
print ship['row'][1]
outJSON = json.dumps(inData)
outFile.write(outJSON)
outFile.close()
def cleanDates(filename):
data = json.load(open(filename))
for d in data:
d['filename'] = d['filename'].replace("csv/02042012/Outgoing/", "")
#date = d['filename'].replace("creekmanifest", "")
#prefix is a silly quick-fix for date like creekmanifest1201
def importJSON(filename, prefix=""):
data = json.loads(open(filename).read())
errors_ships = []
errors_goods = []
manifests_errors = []
for manifest in data:
m = Manifest()
m.filename = manifest['filename']
if prefix != '':
manifest['date'] = manifest['date'].replace(prefix, "")
dateStr = manifest['date'].replace(".csv", "")
yr = dateStr[-2:]
month = dateStr.replace(yr, "")
print yr
print month
# month = manifest['date'][0:3]
# yr = manifest['date'][3:5]
m.month = month[0:3].lower()
#print yr
m.year = int("20" + yr)
matchedManifests = Manifest.objects.filter(month=month).filter(year=m.year)
if matchedManifests.count() > 0:
print "Manifest file exists with id %d" % matchedManifests[0].id
for match in matchedManifests:
match.delete()
# datestring = "20%s-%s" % (yr, month,)
# m.date = datestring
try:
m.save()
except:
manifests_errors.append(manifest['filename'])
exit()
for ship in manifest['ships']:
s = Ship()
row = ship['row']
s.manifest_file = m
s.bill_type = 'Export'
s.number = row[0]
shipDate = row[1]
s.date = "%s-%s-%s" % (shipDate[6:], shipDate[3:5], shipDate[0:2])
s.ship_name = row[2]
s.captain = row[3]
s.flag = row[4]
s.owner = row[5]
try:
s.port = row[6]
except:
s.port = ''
try:
s.country = row[7]
except:
s.country = ''
try:
s.save()
print "ship success"
except:
print "ship error"
errors_ships.append(row)
if s.id:
for good in ship['goods']:
g = Good()
g.ship = s
g.description = good[0]
g.package_type = good[1]
if good[2].strip() != '':
try:
g.no_of_packages = int(good[2])
except:
errors_goods.append(good)
if good[3].strip() != '':
try:
g.weight = int(good[3])
except:
errors_goods.append(good)
if good[4].strip() != '':
try:
g.value = int(good[4])
except:
errors_goods.append(good)
try:
g.save()
except:
print "goods error"
errors_goods.append(good)
ship_errors_file = open("shipErrors.json", "w")
ship_errors_file.write(json.dumps(errors_ships, indent=2))
ship_errors_file.close()
good_errors_file = open("goodErrors.json", "w")
good_errors_file.write(json.dumps(errors_goods, indent=2))
good_errors_file.close()
manifests_errors_file = open("manifestsErrors.json", "w")
manifests_errors_file.write(json.dumps(manifests_errors, indent=2))
manifests_errors_file.close()
def generateGoodStrings():
for g in Good.objects.all():
if g.description_string: continue
s = getGoodString(g.description)
print s
g.description_string = s
g.save()
return
def getGoodString(s):
noRegex = re.compile(r'^[0-9]*?$')
parts = s.split("/")
if len(parts) == 1:
return s
if len(parts) == 2:
if noRegex.match(parts[0].strip()) is not None:
return parts[1]
elif noRegex.match(parts[1].strip()) is not None:
return parts[0]
else:
return s
else:
return s
def tokenizeGoods():
for g in Good.objects.all():
description = g.description
parts = description.split("/")
for p in parts:
# print p
w = p.strip()
if not isNumerical(w):
if Translation.objects.filter(string=w).count() < 1:
t = Translation(string=w)
t.save()
print w
# for p in parts:
regex = re.compile(r'.*?[0-9].*')
def isNumerical(s):
if regex.match(s) is not None:
return True
else:
return False
def applyGoodTranslations():
for t in Translation.objects.all():
if t.string_trans.strip() != '':
string = t.string
trans = t.string_trans
print trans
for g in Good.objects.filter(description_string=string):
g.description_string_trans = trans
g.save()