added comments to import_atlas, but it is still wrong :(
This commit is contained in:
parent
eae4d52bec
commit
895ab5de04
Binary file not shown.
|
@ -2,29 +2,15 @@ from settings import PROJECT_ROOT
|
|||
from os.path import join
|
||||
import json
|
||||
import csv
|
||||
import pdb
|
||||
import pdb #debugger
|
||||
from mumbai.models import *
|
||||
from fuzzywuzzy import process as fuzzprocess
|
||||
import datetime
|
||||
|
||||
#Get levenshtein distance between two strings, from http://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Levenshtein_distance
|
||||
def levenshtein(s1, s2):
|
||||
if len(s1) < len(s2):
|
||||
return levenshtein(s2, s1)
|
||||
if not s1:
|
||||
return len(s2)
|
||||
|
||||
previous_row = xrange(len(s2) + 1)
|
||||
for i, c1 in enumerate(s1):
|
||||
current_row = [i + 1]
|
||||
for j, c2 in enumerate(s2):
|
||||
insertions = previous_row[j + 1] + 1 # j+1 instead of j since previous_row and current_row are one character longer
|
||||
deletions = current_row[j] + 1 # than s2
|
||||
substitutions = previous_row[j] + (c1 != c2)
|
||||
current_row.append(min(insertions, deletions, substitutions))
|
||||
previous_row = current_row
|
||||
return previous_row[-1]
|
||||
|
||||
'''
|
||||
Convert Atlas.csv file (obtained from BEST) into first stage Atlas.json
|
||||
(step 1)
|
||||
'''
|
||||
def csvToJSON():
|
||||
atlasCSV = csv.reader(open(join(PROJECT_ROOT, "../db_csv_files/Atlas.csv"), "r"), delimiter="\t")
|
||||
atlasDict = {}
|
||||
|
@ -44,6 +30,7 @@ def csvToJSON():
|
|||
|
||||
'''
|
||||
function to copy over values of AM N PM + Schedule from previous row, reading from Atlas.json, writing to atlasCopied.json
|
||||
(fill in blank rows where 'copy from previous' is assumed, and create new json file - step 2)
|
||||
'''
|
||||
def processJSON():
|
||||
routeErrors = {'routes': [], 'others': []}
|
||||
|
@ -85,14 +72,18 @@ def processJSON():
|
|||
|
||||
'''
|
||||
function to group atlasCopied.json to uniqueRoutes (uniqueRoutes.json)
|
||||
(step 3)
|
||||
'''
|
||||
def groupUnique():
|
||||
routes = json.loads(open(join(PROJECT_ROOT, "../db_csv_files/atlasCopied.json")).read())
|
||||
errors = {}
|
||||
outDict = {}
|
||||
for key in routes.keys():
|
||||
|
||||
|
||||
outDict[key] = []
|
||||
for row in routes[key]:
|
||||
i = 0
|
||||
print key
|
||||
d = {
|
||||
'from': row[7],
|
||||
|
@ -105,47 +96,31 @@ def groupUnique():
|
|||
}
|
||||
}
|
||||
matchedRow = isNotUnique(d, outDict[key])
|
||||
schedule = row[-5]
|
||||
if matchedRow:
|
||||
schedule = row[-5]
|
||||
outDict[key][matchedRow]['rows'][schedule] = row
|
||||
outDict[key][i-1]['rows'][schedule] = row
|
||||
else:
|
||||
if isLargestSpan(d, routes[key]):
|
||||
d['is_full'] = True
|
||||
outDict[key].append(d)
|
||||
outDict[key][i]['rows'][schedule] = row
|
||||
i += 1
|
||||
|
||||
outFile = open(join(PROJECT_ROOT, "../db_csv_files/uniqueRoutes.json"), "w")
|
||||
outFile.write(json.dumps(outDict, indent=2))
|
||||
outFile.close()
|
||||
|
||||
|
||||
'''
|
||||
Import RouteMaster
|
||||
Go through uniqueRoutes.json and actually import atlas data into the db
|
||||
(step 4)
|
||||
'''
|
||||
def importRouteMaster():
|
||||
CsvFile = csv.reader(open(join(PROJECT_ROOT, "../db_csv_files/RouteMaster.csv"), "r"), delimiter=',')
|
||||
test = CsvFile.next()
|
||||
stop_errors = []
|
||||
print test
|
||||
for row in CsvFile:
|
||||
if len(row) < 1:
|
||||
continue
|
||||
from_to = getFromToStopsForRoute(row[0])
|
||||
if from_to is None:
|
||||
stop_errors.append(row[0])
|
||||
continue
|
||||
print row[0]
|
||||
obj = Route(code=row[0], alias=row[1], from_stop_txt=row[2], to_stop_txt=row[3], from_stop=from_to[0], to_stop=from_to[1], distance=row[4], stages=int(row[5]))
|
||||
obj.save()
|
||||
errors = open(join(PROJECT_ROOT, "../errors/routeStopErrors.json"), "w")
|
||||
errors.write(json.dumps(stop_errors, indent=2))
|
||||
errors.close()
|
||||
|
||||
|
||||
def importUniqueRoutes():
|
||||
data = json.load(open(join(PROJECT_ROOT, "../db_csv_files/uniqueRoutes.json")))
|
||||
routeMapping = json.load(open(join(PROJECT_ROOT, "../db_csv_files/routeMapping.json")))
|
||||
routeDoesNotExistErrors = []
|
||||
stopMapping = {}
|
||||
stopErrors = []
|
||||
routeDoesNotExistErrors = [] #route codes for which there are entries in routeMapping.json and in Atlas, but which do not exist in RouteMaster
|
||||
stopMapping = {} #FIXME
|
||||
stopErrors = [] #This should ideally never happen, and any errors here are bad and would indicate problems with the fuzzy matching logic, most likely.
|
||||
for route in data.keys():
|
||||
routeCode = routeMapping[route]
|
||||
try:
|
||||
|
@ -159,8 +134,8 @@ def importUniqueRoutes():
|
|||
distance = float(thisRoute['span'])
|
||||
except:
|
||||
distance = 0
|
||||
obj = UniqueRoute(route=routeObj, is_full=thisRoute['is_full'], distance=distance, from_stop_txt=thisRoute['from'], to_stop_txt=thisRoute['to'])
|
||||
if obj.is_full:
|
||||
obj = UniqueRoute(route=routeObj, is_full=thisRoute['is_full'], distance=distance, from_stop_txt=thisRoute['from'], to_stop_txt=thisRoute['to'])
|
||||
if obj.is_full: #If the route is the primary route, we can get stop codes easily from RouteDetails first / last stop
|
||||
from_to = getFromToStopsForRoute(routeObj.code)
|
||||
obj.from_stop = from_to[0]
|
||||
if not stopMapping.has_key(obj.from_stop_txt):
|
||||
|
@ -168,7 +143,7 @@ def importUniqueRoutes():
|
|||
obj.to_stop = from_to[1]
|
||||
if not stopMapping.has_key(obj.to_stop_txt):
|
||||
stopMapping[obj.to_stop_txt] = from_to[1].stopcd
|
||||
else:
|
||||
else: #Else we do fuzzy string matching against all possible values for stopname got from RouteDetails
|
||||
stopnames = []
|
||||
stopcodes = []
|
||||
for r in RouteDetails.objects.filter(rno=routeObj.code):
|
||||
|
@ -183,10 +158,11 @@ def importUniqueRoutes():
|
|||
except:
|
||||
stopErrors.append([thisRoute['from'], thisRoute['to']])
|
||||
continue
|
||||
|
||||
obj.save()
|
||||
#pdb.set_trace()
|
||||
# print thisRoute['rows'].keys()
|
||||
for schedule in thisRoute['rows'].keys():
|
||||
for schedule in thisRoute['rows'].keys(): #loop through each schedule per UniqueRoute and save it
|
||||
row = thisRoute['rows'][schedule]
|
||||
try:
|
||||
depot = Depot.objects.get(depot_code=row[6])
|
||||
|
@ -195,6 +171,8 @@ def importUniqueRoutes():
|
|||
#pdb.set_trace()
|
||||
routeScheduleObj = RouteSchedule(unique_route=obj, schedule_type=schedule, busesAM=noneInt(row[2]), busesN=noneInt(row[3]), busesPM=noneInt(row[4]), bus_type=row[5], depot_txt=row[6], depot=depot, first_from=formatTime(row[8]), last_from=formatTime(row[9]), first_to=formatTime(row[11]), last_to=formatTime(row[12]), runtime1=noneInt(row[14]), runtime2=noneInt(row[15]), runtime3=noneInt(row[16]), runtime4=noneInt(row[17]), headway1=noneInt(row[18]), headway2=noneInt(row[19]), headway3=noneInt(row[20]), headway4=noneInt(row[21]), headway5=noneInt(row[22]))
|
||||
routeScheduleObj.save()
|
||||
|
||||
#done saving things - write out error files:
|
||||
errors = open(join(PROJECT_ROOT, "../errors/routeMasterMissingRoutes.json"), "w")
|
||||
errors.write(json.dumps(routeDoesNotExistErrors, indent=2))
|
||||
errors.close()
|
||||
|
@ -227,12 +205,20 @@ def formatTime(s):
|
|||
except:
|
||||
return datetime.time(0,0)
|
||||
|
||||
'''
|
||||
Silly function to deal wth invalid strings in the data that need to go in as Integers into the db
|
||||
passed a string, it will either return int(string) or None if that fails for any reason
|
||||
FIXME: find a more elegant way to do this
|
||||
'''
|
||||
def noneInt(val):
|
||||
try:
|
||||
return int(val)
|
||||
except:
|
||||
return None
|
||||
|
||||
'''
|
||||
Passed a route code, it gets stop codes for the first and last stop
|
||||
'''
|
||||
def getFromToStopsForRoute(routeCode):
|
||||
# fromStr = row[2]
|
||||
routeDetails = RouteDetails.objects.filter(rno=routeCode).order_by('stopsr')
|
||||
|
@ -243,7 +229,12 @@ def getFromToStopsForRoute(routeCode):
|
|||
return (fromStop, toStop,)
|
||||
|
||||
|
||||
|
||||
'''
|
||||
checks whether the row in a set of rows for a route has the largest 'span' value, useful to tell if a row belongs to a primary route
|
||||
params:
|
||||
data - dict with a span attribute
|
||||
arr - array of rows to check if data['span'] is greater than. span is at row[13]
|
||||
'''
|
||||
def isLargestSpan(data, arr):
|
||||
span = data['span']
|
||||
for a in arr:
|
||||
|
@ -265,12 +256,15 @@ def isLargestSpan(data, arr):
|
|||
def isNotUnique(data, arr):
|
||||
i = 0
|
||||
for a in arr:
|
||||
if a['from'] == data['from'] and a['to'] == data['to'] and a['span'] == data['span']:
|
||||
if a['from'] == data['from'] and a['to'] == data['to']:
|
||||
return i
|
||||
i += 1
|
||||
return False
|
||||
|
||||
|
||||
'''
|
||||
Create routeMapping.json file to map route aliases to route codes
|
||||
TODO: add mappings from hard coded routes
|
||||
'''
|
||||
def getRouteCodes():
|
||||
atlasRawCSV = csv.reader(open(join(PROJECT_ROOT, "../db_csv_files/AtlasRaw.csv"), "r"), delimiter="\t")
|
||||
atlasDict = json.loads(open(join(PROJECT_ROOT, "../db_csv_files/Atlas.json")).read())
|
||||
|
@ -290,6 +284,28 @@ def getRouteCodes():
|
|||
mappingFile.close()
|
||||
|
||||
|
||||
'''
|
||||
Import RouteMaster into db
|
||||
'''
|
||||
def importRouteMaster():
|
||||
CsvFile = csv.reader(open(join(PROJECT_ROOT, "../db_csv_files/RouteMaster.csv"), "r"), delimiter=',')
|
||||
test = CsvFile.next()
|
||||
stop_errors = []
|
||||
print test
|
||||
for row in CsvFile:
|
||||
if len(row) < 1:
|
||||
continue
|
||||
from_to = getFromToStopsForRoute(row[0])
|
||||
if from_to is None:
|
||||
stop_errors.append(row[0])
|
||||
continue
|
||||
print row[0]
|
||||
obj = Route(code=row[0], alias=row[1], from_stop_txt=row[2], to_stop_txt=row[3], from_stop=from_to[0], to_stop=from_to[1], distance=row[4], stages=int(row[5]))
|
||||
obj.save()
|
||||
errors = open(join(PROJECT_ROOT, "../errors/routeStopErrors.json"), "w")
|
||||
errors.write(json.dumps(stop_errors, indent=2))
|
||||
errors.close()
|
||||
|
||||
|
||||
def csvClean1():
|
||||
atlasCSV = csv.reader(open(join(PROJECT_ROOT, "../db_csv_files/Atlas.csv"), "r"), delimiter="\t")
|
||||
|
|
|
@ -2,6 +2,9 @@ from django.contrib import admin
|
|||
from django import forms
|
||||
from mumbai.models import *
|
||||
|
||||
class RouteScheduleInline(admin.StackedInline):
|
||||
model = RouteSchedule
|
||||
|
||||
class AreaAdmin(admin.ModelAdmin):
|
||||
list_display = ("a_code", "areanm")
|
||||
list_editable = ("areanm",)
|
||||
|
@ -26,6 +29,9 @@ class FareAdmin(admin.ModelAdmin):
|
|||
models.TextField: {'widget': forms.TextInput},
|
||||
}
|
||||
|
||||
class UniqueRouteAdmin(admin.ModelAdmin):
|
||||
inlines = [RouteScheduleInline]
|
||||
|
||||
|
||||
class StopForm(forms.ModelForm):
|
||||
|
||||
|
@ -129,3 +135,5 @@ admin.site.register(Landmark, LandmarkAdmin )
|
|||
admin.site.register(Depot,DepotAdmin)
|
||||
admin.site.register(Holiday,HolidayAdmin)
|
||||
admin.site.register(StopLocation,StopLocationAdmin)
|
||||
|
||||
admin.site.register(UniqueRoute, UniqueRouteAdmin)
|
||||
|
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user