get atlas json to grouped by unique routes with rows for schedule days, final state to split into 3 tables and import into db

2011-12-23 03:36:30 +05:30 · 2011-12-23 03:36:30 +05:30 · d57cdf0720
commit d57cdf0720
parent 3575a65912
3 changed files with 112884 additions and 0 deletions
--- a/chaloBEST/imports/import_atlas.py
+++ b/chaloBEST/imports/import_atlas.py
@ -2,6 +2,7 @@ from settings import PROJECT_ROOT
 from os.path import join
 import json
 import csv
 import pdb
 def csvToJSON():
    atlasCSV = csv.reader(open(join(PROJECT_ROOT, "../db_csv_files/Atlas.csv"), "r"), delimiter="\t")
@ -20,6 +21,109 @@ def csvToJSON():
    jsonFile.write(json.dumps(atlasDict, indent=2))
    jsonFile.close()
 '''
 function to copy over values of AM N PM + Schedule from previous row, reading from Atlas.json, writing to atlasCopied.json
 '''
 def processJSON():
    routeErrors = {'routes': [], 'others': []}
    routeMapping = json.loads(open(join(PROJECT_ROOT, "../db_csv_files/routeMapping.json")).read())    
    routes = json.loads(open(join(PROJECT_ROOT, "../db_csv_files/Atlas.json")).read())
    previousRow = []
    outDict = {}
    for key in routes.keys():
        print key
        if key not in routeMapping: #make note of routeNames we dont have routeAlias for yet.
            routeErrors['routes'].append(key) 
        else:  #else, go ahead ..
            routeAlias = routeMapping[key]
            thisRoute = routes[key]
            #handle copying over empty values from previous rows
            outDict[key] = []
            for row in thisRoute:
               # pdb.set_trace()
                if len(row) < 7:
                    routeErrors['others'].append({key: row})  
                    break  
                for i in range(2,4):
                    if row[i].strip() == '':
                        row[i] = previousRow[i]
                try:
                    if row[-5].strip() == '':
                        row[-5] = previousRow[-5]
                except:
                    pdb.set_trace()
                previousRow = row
                outDict[key].append(row)
    atlasRouteErrors = open("atlasRouteErrors.json", "w")
    atlasRouteErrors.write(json.dumps(routeErrors, indent=2))
    atlasRouteErrors.close()
    atlasCopied = open(join(PROJECT_ROOT, "../db_csv_files/atlasCopied.json"), "w")
    atlasCopied.write(json.dumps(outDict, indent=2))
    atlasCopied.close()
 '''
 function to group atlasCopied.json to uniqueRoutes (uniqueRoutes.json)
 '''
 def groupUnique():
    routes = json.loads(open(join(PROJECT_ROOT, "../db_csv_files/atlasCopied.json")).read())
    errors = {}
    outDict = {}
    for key in routes.keys():
        outDict[key] = []
        for row in routes[key]:
            print key
            d = {
                'from': row[7],
                'to': row[10],                
                'span': row[13],
                'is_full': False,
 #                'schedule': row[28],
                'rows': {
                    row[-5]: row
                }   
            }
            matchedRow = isNotUnique(d, outDict[key])
            if matchedRow:
                schedule = row[-5]
                outDict[key][matchedRow]['rows'][schedule] = row
            else:
                if isLargestSpan(d, routes[key]):
                    d['is_full'] = True
                outDict[key].append(d)
    outFile = open(join(PROJECT_ROOT, "../db_csv_files/uniqueRoutes.json"), "w")
    outFile.write(json.dumps(outDict, indent=2))
    outFile.close()
 def isLargestSpan(data, arr):
    span = data['span']
    for a in arr:
        try:
            arrSpan = float(a[13])
        except:
            arrSpan = 0
        try:
            dataSpan = float(data['span'])
        except:
            dataSpan = 0
        if arrSpan > dataSpan:
            return False
    return True
 '''
    returns index of row if not unique, else False
 '''
 def isNotUnique(data, arr):
    i = 0
    for a in arr:
        if a['from'] == data['from'] and a['to'] == data['to'] and a['span'] == data['span']:
            return i
        i += 1
    return False
 def getRouteCodes():
    atlasRawCSV = csv.reader(open(join(PROJECT_ROOT, "../db_csv_files/AtlasRaw.csv"), "r"), delimiter="\t")
    atlasDict = json.loads(open(join(PROJECT_ROOT, "../db_csv_files/Atlas.json")).read())
--- a/db_csv_files/atlasCopied.json
+++ b/db_csv_files/atlasCopied.json
--- a/db_csv_files/uniqueRoutes.json
+++ b/db_csv_files/uniqueRoutes.json