manifests/manifests/ships/convert.py

import codecs
from glob import glob
import sys
import os
import csv
import json
from models import Ship, Good, Manifest, Translation
import re

def toCsv():
  manifests = glob('*.txt')
  for m in manifests:
    f = codecs.open(m, "r", encoding="windows-1256")
    txt = unicode(f.read())
    f.close()
    outFileName = m.replace("txt", "csv")
    print outFileName
    w = codecs.open(outFileName, "w", "utf-8")
    w.write(txt)
    w.close()
  print "done"

def toRaw():
  csvs = glob("*.csv")
  for c in csvs:
    outFile = c.replace("csv", "html")
    print outFile
    f = codecs.open(outFile, "w", "utf-8")
    r = csv.reader(open(c))
    f.write("<table>")
#    print "<table>"
    for row in r:
      f.write("<tr>")
      for col in row:
        f.write("<td>" + col.decode("utf-8") + "</td>")
      f.write("</tr>")
    f.write("</table>")
    f.close()
  print "done"

def toHtml():
  htmls = glob("raw/*.html")
  for h in htmls:
    outFile = h.replace("raw/", "html/")
    print outFile
    w = codecs.open(outFile, "w", "utf-8")
    header = open("header.html").read()
    footer = open("footer.html").read()
    f = codecs.open(h, "r", encoding="utf-8").read()
    w.write(header + f + footer)
    w.close()
  print "done"


def isShip(row):
  try:
#    if str(int(row[0])) == row[0]:
#      return True
    if row[0].strip().lower() == 'import' or row[0].strip().lower() == 'rexport':
      return True
    else:
      return False
  except:
    return False


def csvToJSON(prefix):
  csvs = glob(prefix + "*.csv")
  d = []
  for c in csvs:
    filename = c
    date = filename.replace(prefix, "")
    data = {
      'filename': filename,
      'date': date,
      'ships': []
    }
    r = csv.reader(open(c))
    rows = []
    for row in r:
      rows.append(row)
    i = 0
    ships = []
    while i < len(rows):
      thisRow = rows[i]
      if isShip(thisRow): #is a ship
        print thisRow[0]
        thisShip = {
          'row': thisRow,
          'goods': []
        }
        i += 1
        if i == len(rows):
          break
        currRow = rows[i]
        while isShip(currRow) == False:
          goodRow = currRow
          thisShip['goods'].append(goodRow)
          i += 1
          if i == len(rows):
            break
          currRow = rows[i]
        ships.append(thisShip)
    data['ships'] = ships
    d.append(data)
  outFile = prefix + "Data.json"
  f = codecs.open(outFile, "w", "utf-8")
  f.write(json.dumps(d, indent=2))
  f.close()
  return

def addType(inFile, outFile):
  inData = json.loads(open(inFile).read())
  outFile = codecs.open(outFile, "w", "utf-8")
  for fil in inData:
    for ship in fil['ships']:
      ship['row'].insert(0, "Export")
      print ship['row'][1]
  outJSON = json.dumps(inData)
  outFile.write(outJSON)
  outFile.close()

def cleanDates(filename):
    data = json.load(open(filename))
    for d in data:
        d['filename'] = d['filename'].replace("csv/02042012/Outgoing/", "")
        #date = d['filename'].replace("creekmanifest", "")


#prefix is a silly quick-fix for date like creekmanifest1201
def importJSON(filename, prefix=""):
  data = json.loads(open(filename).read())
  errors_ships = []
  errors_goods = []
  manifests_errors = []
  for manifest in data:
    m = Manifest()
    m.filename = manifest['filename']
    if prefix != '':
        manifest['date'] = manifest['date'].replace(prefix, "")
    dateStr = manifest['date'].replace(".csv", "")
    yr = dateStr[-2:]
    month = dateStr.replace(yr, "")
    print yr
    print month
#    month = manifest['date'][0:3]
#    yr = manifest['date'][3:5]
    m.month = month[0:3].lower()
    #print yr
    m.year = int("20" + yr)
    matchedManifests = Manifest.objects.filter(month=month).filter(year=m.year)
    if matchedManifests.count() > 0:
        print "Manifest file exists with id %d" % matchedManifests[0].id
        for match in matchedManifests:
            match.delete()
#    datestring = "20%s-%s" % (yr, month,)
#    m.date = datestring
    try:
      m.save()
    except:
      manifests_errors.append(manifest['filename'])
      exit()
    for ship in manifest['ships']:
      s = Ship()
      row = ship['row']
      s.manifest_file = m
      s.bill_type = 'Export'
      s.number = row[0]
      shipDate = row[1]
      s.date =  "%s-%s-%s" % (shipDate[6:], shipDate[3:5], shipDate[0:2])
      s.ship_name = row[2]
      s.captain = row[3]
      s.flag = row[4]
      s.owner = row[5]
      try:
        s.port = row[6]
      except:
        s.port = ''
      try:
        s.country = row[7]
      except:
        s.country = ''
      try:
        s.save()
        print "ship success"
      except:
        print "ship error"
        errors_ships.append(row)
      if s.id:
        for good in ship['goods']:
          g = Good()
          g.ship = s
          g.description = good[0]
          g.package_type = good[1]
          if good[2].strip() != '':
            try:
              g.no_of_packages = int(good[2])
            except:
              errors_goods.append(good)
          if good[3].strip() != '':
            try:
              g.weight = int(good[3])
            except:
              errors_goods.append(good)
          if good[4].strip() != '':
            try:
              g.value = int(good[4])
            except:
              errors_goods.append(good)
          try:
            g.save()
          except:
            print "goods error"
            errors_goods.append(good)
  ship_errors_file = open("shipErrors.json", "w")
  ship_errors_file.write(json.dumps(errors_ships, indent=2))
  ship_errors_file.close()
  good_errors_file = open("goodErrors.json", "w")
  good_errors_file.write(json.dumps(errors_goods, indent=2))
  good_errors_file.close()
  manifests_errors_file = open("manifestsErrors.json", "w")
  manifests_errors_file.write(json.dumps(manifests_errors, indent=2))
  manifests_errors_file.close()


def generateGoodStrings():
    for g in Good.objects.all():
        if g.description_string: continue
        s = getGoodString(g.description)
        print s
        g.description_string = s
        g.save()
    return

def getGoodString(s):
    noRegex = re.compile(r'^[0-9]*?$')
    parts = s.split("/")
    if len(parts) == 1:
        return s
    if len(parts) == 2:
        if noRegex.match(parts[0].strip()) is not None:
            return parts[1]
        elif noRegex.match(parts[1].strip()) is not None:
            return parts[0]
        else:
            return s
    else:
        return s

def tokenizeGoods():
    for g in Good.objects.all():
        description = g.description
        parts = description.split("/")
        for p in parts:
#            print p
            w = p.strip()
            if not isNumerical(w):
                if Translation.objects.filter(string=w).count() < 1:
                    t = Translation(string=w)
                    t.save()
                    print w
#        for p in parts:


regex = re.compile(r'.*?[0-9].*')
def isNumerical(s):
    if regex.match(s) is not None:
        return True
    else:
        return False


def applyGoodTranslations():
    for t in Translation.objects.all():
        if t.string_trans.strip() != '':
            string = t.string
            trans = t.string_trans
            print trans
            for g in Good.objects.filter(description_string=string):
                g.description_string_trans = trans
                g.save()