284 lines
7.4 KiB
Python
Executable file
284 lines
7.4 KiB
Python
Executable file
import codecs
|
|
from glob import glob
|
|
import sys
|
|
import os
|
|
import csv
|
|
import json
|
|
from models import Ship, Good, Manifest, Translation
|
|
import re
|
|
|
|
def toCsv():
|
|
manifests = glob('*.txt')
|
|
for m in manifests:
|
|
f = codecs.open(m, "r", encoding="windows-1256")
|
|
txt = unicode(f.read())
|
|
f.close()
|
|
outFileName = m.replace("txt", "csv")
|
|
print outFileName
|
|
w = codecs.open(outFileName, "w", "utf-8")
|
|
w.write(txt)
|
|
w.close()
|
|
print "done"
|
|
|
|
def toRaw():
|
|
csvs = glob("*.csv")
|
|
for c in csvs:
|
|
outFile = c.replace("csv", "html")
|
|
print outFile
|
|
f = codecs.open(outFile, "w", "utf-8")
|
|
r = csv.reader(open(c))
|
|
f.write("<table>")
|
|
# print "<table>"
|
|
for row in r:
|
|
f.write("<tr>")
|
|
for col in row:
|
|
f.write("<td>" + col.decode("utf-8") + "</td>")
|
|
f.write("</tr>")
|
|
f.write("</table>")
|
|
f.close()
|
|
print "done"
|
|
|
|
def toHtml():
|
|
htmls = glob("raw/*.html")
|
|
for h in htmls:
|
|
outFile = h.replace("raw/", "html/")
|
|
print outFile
|
|
w = codecs.open(outFile, "w", "utf-8")
|
|
header = open("header.html").read()
|
|
footer = open("footer.html").read()
|
|
f = codecs.open(h, "r", encoding="utf-8").read()
|
|
w.write(header + f + footer)
|
|
w.close()
|
|
print "done"
|
|
|
|
|
|
def isShip(row):
|
|
try:
|
|
# if str(int(row[0])) == row[0]:
|
|
# return True
|
|
if row[0].strip().lower() == 'import' or row[0].strip().lower() == 'rexport':
|
|
return True
|
|
else:
|
|
return False
|
|
except:
|
|
return False
|
|
|
|
|
|
def csvToJSON(prefix):
|
|
csvs = glob(prefix + "*.csv")
|
|
d = []
|
|
for c in csvs:
|
|
filename = c
|
|
date = filename.replace(prefix, "")
|
|
data = {
|
|
'filename': filename,
|
|
'date': date,
|
|
'ships': []
|
|
}
|
|
r = csv.reader(open(c))
|
|
rows = []
|
|
for row in r:
|
|
rows.append(row)
|
|
i = 0
|
|
ships = []
|
|
while i < len(rows):
|
|
thisRow = rows[i]
|
|
if isShip(thisRow): #is a ship
|
|
print thisRow[0]
|
|
thisShip = {
|
|
'row': thisRow,
|
|
'goods': []
|
|
}
|
|
i += 1
|
|
if i == len(rows):
|
|
break
|
|
currRow = rows[i]
|
|
while isShip(currRow) == False:
|
|
goodRow = currRow
|
|
thisShip['goods'].append(goodRow)
|
|
i += 1
|
|
if i == len(rows):
|
|
break
|
|
currRow = rows[i]
|
|
ships.append(thisShip)
|
|
data['ships'] = ships
|
|
d.append(data)
|
|
outFile = prefix + "Data.json"
|
|
f = codecs.open(outFile, "w", "utf-8")
|
|
f.write(json.dumps(d, indent=2))
|
|
f.close()
|
|
return
|
|
|
|
def addType(inFile, outFile):
|
|
inData = json.loads(open(inFile).read())
|
|
outFile = codecs.open(outFile, "w", "utf-8")
|
|
for fil in inData:
|
|
for ship in fil['ships']:
|
|
ship['row'].insert(0, "Export")
|
|
print ship['row'][1]
|
|
outJSON = json.dumps(inData)
|
|
outFile.write(outJSON)
|
|
outFile.close()
|
|
|
|
def cleanDates(filename):
|
|
data = json.load(open(filename))
|
|
for d in data:
|
|
d['filename'] = d['filename'].replace("csv/02042012/Outgoing/", "")
|
|
#date = d['filename'].replace("creekmanifest", "")
|
|
|
|
|
|
#prefix is a silly quick-fix for date like creekmanifest1201
|
|
def importJSON(filename, prefix=""):
|
|
data = json.loads(open(filename).read())
|
|
errors_ships = []
|
|
errors_goods = []
|
|
manifests_errors = []
|
|
for manifest in data:
|
|
m = Manifest()
|
|
m.filename = manifest['filename']
|
|
if prefix != '':
|
|
manifest['date'] = manifest['date'].replace(prefix, "")
|
|
dateStr = manifest['date'].replace(".csv", "")
|
|
yr = dateStr[-2:]
|
|
month = dateStr.replace(yr, "")
|
|
print yr
|
|
print month
|
|
# month = manifest['date'][0:3]
|
|
# yr = manifest['date'][3:5]
|
|
m.month = month[0:3].lower()
|
|
#print yr
|
|
m.year = int("20" + yr)
|
|
matchedManifests = Manifest.objects.filter(month=month).filter(year=m.year)
|
|
if matchedManifests.count() > 0:
|
|
print "Manifest file exists with id %d" % matchedManifests[0].id
|
|
for match in matchedManifests:
|
|
match.delete()
|
|
# datestring = "20%s-%s" % (yr, month,)
|
|
# m.date = datestring
|
|
try:
|
|
m.save()
|
|
except:
|
|
manifests_errors.append(manifest['filename'])
|
|
exit()
|
|
for ship in manifest['ships']:
|
|
s = Ship()
|
|
row = ship['row']
|
|
s.manifest_file = m
|
|
s.bill_type = 'Export'
|
|
s.number = row[0]
|
|
shipDate = row[1]
|
|
s.date = "%s-%s-%s" % (shipDate[6:], shipDate[3:5], shipDate[0:2])
|
|
s.ship_name = row[2]
|
|
s.captain = row[3]
|
|
s.flag = row[4]
|
|
s.owner = row[5]
|
|
try:
|
|
s.port = row[6]
|
|
except:
|
|
s.port = ''
|
|
try:
|
|
s.country = row[7]
|
|
except:
|
|
s.country = ''
|
|
try:
|
|
s.save()
|
|
print "ship success"
|
|
except:
|
|
print "ship error"
|
|
errors_ships.append(row)
|
|
if s.id:
|
|
for good in ship['goods']:
|
|
g = Good()
|
|
g.ship = s
|
|
g.description = good[0]
|
|
g.package_type = good[1]
|
|
if good[2].strip() != '':
|
|
try:
|
|
g.no_of_packages = int(good[2])
|
|
except:
|
|
errors_goods.append(good)
|
|
if good[3].strip() != '':
|
|
try:
|
|
g.weight = int(good[3])
|
|
except:
|
|
errors_goods.append(good)
|
|
if good[4].strip() != '':
|
|
try:
|
|
g.value = int(good[4])
|
|
except:
|
|
errors_goods.append(good)
|
|
try:
|
|
g.save()
|
|
except:
|
|
print "goods error"
|
|
errors_goods.append(good)
|
|
ship_errors_file = open("shipErrors.json", "w")
|
|
ship_errors_file.write(json.dumps(errors_ships, indent=2))
|
|
ship_errors_file.close()
|
|
good_errors_file = open("goodErrors.json", "w")
|
|
good_errors_file.write(json.dumps(errors_goods, indent=2))
|
|
good_errors_file.close()
|
|
manifests_errors_file = open("manifestsErrors.json", "w")
|
|
manifests_errors_file.write(json.dumps(manifests_errors, indent=2))
|
|
manifests_errors_file.close()
|
|
|
|
|
|
def generateGoodStrings():
|
|
for g in Good.objects.all():
|
|
if g.description_string: continue
|
|
s = getGoodString(g.description)
|
|
print s
|
|
g.description_string = s
|
|
g.save()
|
|
return
|
|
|
|
def getGoodString(s):
|
|
noRegex = re.compile(r'^[0-9]*?$')
|
|
parts = s.split("/")
|
|
if len(parts) == 1:
|
|
return s
|
|
if len(parts) == 2:
|
|
if noRegex.match(parts[0].strip()) is not None:
|
|
return parts[1]
|
|
elif noRegex.match(parts[1].strip()) is not None:
|
|
return parts[0]
|
|
else:
|
|
return s
|
|
else:
|
|
return s
|
|
|
|
def tokenizeGoods():
|
|
for g in Good.objects.all():
|
|
description = g.description
|
|
parts = description.split("/")
|
|
for p in parts:
|
|
# print p
|
|
w = p.strip()
|
|
if not isNumerical(w):
|
|
if Translation.objects.filter(string=w).count() < 1:
|
|
t = Translation(string=w)
|
|
t.save()
|
|
print w
|
|
# for p in parts:
|
|
|
|
|
|
regex = re.compile(r'.*?[0-9].*')
|
|
def isNumerical(s):
|
|
if regex.match(s) is not None:
|
|
return True
|
|
else:
|
|
return False
|
|
|
|
|
|
def applyGoodTranslations():
|
|
for t in Translation.objects.all():
|
|
if t.string_trans.strip() != '':
|
|
string = t.string
|
|
trans = t.string_trans
|
|
print trans
|
|
for g in Good.objects.filter(description_string=string):
|
|
g.description_string_trans = trans
|
|
g.save()
|
|
|
|
|
|
|