added comments to import_atlas, but it is still wrong :(
This commit is contained in:
parent
eae4d52bec
commit
895ab5de04
Binary file not shown.
|
@ -2,29 +2,15 @@ from settings import PROJECT_ROOT
|
||||||
from os.path import join
|
from os.path import join
|
||||||
import json
|
import json
|
||||||
import csv
|
import csv
|
||||||
import pdb
|
import pdb #debugger
|
||||||
from mumbai.models import *
|
from mumbai.models import *
|
||||||
from fuzzywuzzy import process as fuzzprocess
|
from fuzzywuzzy import process as fuzzprocess
|
||||||
import datetime
|
import datetime
|
||||||
|
|
||||||
#Get levenshtein distance between two strings, from http://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Levenshtein_distance
|
'''
|
||||||
def levenshtein(s1, s2):
|
Convert Atlas.csv file (obtained from BEST) into first stage Atlas.json
|
||||||
if len(s1) < len(s2):
|
(step 1)
|
||||||
return levenshtein(s2, s1)
|
'''
|
||||||
if not s1:
|
|
||||||
return len(s2)
|
|
||||||
|
|
||||||
previous_row = xrange(len(s2) + 1)
|
|
||||||
for i, c1 in enumerate(s1):
|
|
||||||
current_row = [i + 1]
|
|
||||||
for j, c2 in enumerate(s2):
|
|
||||||
insertions = previous_row[j + 1] + 1 # j+1 instead of j since previous_row and current_row are one character longer
|
|
||||||
deletions = current_row[j] + 1 # than s2
|
|
||||||
substitutions = previous_row[j] + (c1 != c2)
|
|
||||||
current_row.append(min(insertions, deletions, substitutions))
|
|
||||||
previous_row = current_row
|
|
||||||
return previous_row[-1]
|
|
||||||
|
|
||||||
def csvToJSON():
|
def csvToJSON():
|
||||||
atlasCSV = csv.reader(open(join(PROJECT_ROOT, "../db_csv_files/Atlas.csv"), "r"), delimiter="\t")
|
atlasCSV = csv.reader(open(join(PROJECT_ROOT, "../db_csv_files/Atlas.csv"), "r"), delimiter="\t")
|
||||||
atlasDict = {}
|
atlasDict = {}
|
||||||
|
@ -44,6 +30,7 @@ def csvToJSON():
|
||||||
|
|
||||||
'''
|
'''
|
||||||
function to copy over values of AM N PM + Schedule from previous row, reading from Atlas.json, writing to atlasCopied.json
|
function to copy over values of AM N PM + Schedule from previous row, reading from Atlas.json, writing to atlasCopied.json
|
||||||
|
(fill in blank rows where 'copy from previous' is assumed, and create new json file - step 2)
|
||||||
'''
|
'''
|
||||||
def processJSON():
|
def processJSON():
|
||||||
routeErrors = {'routes': [], 'others': []}
|
routeErrors = {'routes': [], 'others': []}
|
||||||
|
@ -85,14 +72,18 @@ def processJSON():
|
||||||
|
|
||||||
'''
|
'''
|
||||||
function to group atlasCopied.json to uniqueRoutes (uniqueRoutes.json)
|
function to group atlasCopied.json to uniqueRoutes (uniqueRoutes.json)
|
||||||
|
(step 3)
|
||||||
'''
|
'''
|
||||||
def groupUnique():
|
def groupUnique():
|
||||||
routes = json.loads(open(join(PROJECT_ROOT, "../db_csv_files/atlasCopied.json")).read())
|
routes = json.loads(open(join(PROJECT_ROOT, "../db_csv_files/atlasCopied.json")).read())
|
||||||
errors = {}
|
errors = {}
|
||||||
outDict = {}
|
outDict = {}
|
||||||
for key in routes.keys():
|
for key in routes.keys():
|
||||||
|
|
||||||
|
|
||||||
outDict[key] = []
|
outDict[key] = []
|
||||||
for row in routes[key]:
|
for row in routes[key]:
|
||||||
|
i = 0
|
||||||
print key
|
print key
|
||||||
d = {
|
d = {
|
||||||
'from': row[7],
|
'from': row[7],
|
||||||
|
@ -105,47 +96,31 @@ def groupUnique():
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
matchedRow = isNotUnique(d, outDict[key])
|
matchedRow = isNotUnique(d, outDict[key])
|
||||||
|
schedule = row[-5]
|
||||||
if matchedRow:
|
if matchedRow:
|
||||||
schedule = row[-5]
|
outDict[key][i-1]['rows'][schedule] = row
|
||||||
outDict[key][matchedRow]['rows'][schedule] = row
|
|
||||||
else:
|
else:
|
||||||
if isLargestSpan(d, routes[key]):
|
if isLargestSpan(d, routes[key]):
|
||||||
d['is_full'] = True
|
d['is_full'] = True
|
||||||
outDict[key].append(d)
|
outDict[key].append(d)
|
||||||
|
outDict[key][i]['rows'][schedule] = row
|
||||||
|
i += 1
|
||||||
|
|
||||||
outFile = open(join(PROJECT_ROOT, "../db_csv_files/uniqueRoutes.json"), "w")
|
outFile = open(join(PROJECT_ROOT, "../db_csv_files/uniqueRoutes.json"), "w")
|
||||||
outFile.write(json.dumps(outDict, indent=2))
|
outFile.write(json.dumps(outDict, indent=2))
|
||||||
outFile.close()
|
outFile.close()
|
||||||
|
|
||||||
|
|
||||||
'''
|
'''
|
||||||
Import RouteMaster
|
Go through uniqueRoutes.json and actually import atlas data into the db
|
||||||
|
(step 4)
|
||||||
'''
|
'''
|
||||||
def importRouteMaster():
|
|
||||||
CsvFile = csv.reader(open(join(PROJECT_ROOT, "../db_csv_files/RouteMaster.csv"), "r"), delimiter=',')
|
|
||||||
test = CsvFile.next()
|
|
||||||
stop_errors = []
|
|
||||||
print test
|
|
||||||
for row in CsvFile:
|
|
||||||
if len(row) < 1:
|
|
||||||
continue
|
|
||||||
from_to = getFromToStopsForRoute(row[0])
|
|
||||||
if from_to is None:
|
|
||||||
stop_errors.append(row[0])
|
|
||||||
continue
|
|
||||||
print row[0]
|
|
||||||
obj = Route(code=row[0], alias=row[1], from_stop_txt=row[2], to_stop_txt=row[3], from_stop=from_to[0], to_stop=from_to[1], distance=row[4], stages=int(row[5]))
|
|
||||||
obj.save()
|
|
||||||
errors = open(join(PROJECT_ROOT, "../errors/routeStopErrors.json"), "w")
|
|
||||||
errors.write(json.dumps(stop_errors, indent=2))
|
|
||||||
errors.close()
|
|
||||||
|
|
||||||
|
|
||||||
def importUniqueRoutes():
|
def importUniqueRoutes():
|
||||||
data = json.load(open(join(PROJECT_ROOT, "../db_csv_files/uniqueRoutes.json")))
|
data = json.load(open(join(PROJECT_ROOT, "../db_csv_files/uniqueRoutes.json")))
|
||||||
routeMapping = json.load(open(join(PROJECT_ROOT, "../db_csv_files/routeMapping.json")))
|
routeMapping = json.load(open(join(PROJECT_ROOT, "../db_csv_files/routeMapping.json")))
|
||||||
routeDoesNotExistErrors = []
|
routeDoesNotExistErrors = [] #route codes for which there are entries in routeMapping.json and in Atlas, but which do not exist in RouteMaster
|
||||||
stopMapping = {}
|
stopMapping = {} #FIXME
|
||||||
stopErrors = []
|
stopErrors = [] #This should ideally never happen, and any errors here are bad and would indicate problems with the fuzzy matching logic, most likely.
|
||||||
for route in data.keys():
|
for route in data.keys():
|
||||||
routeCode = routeMapping[route]
|
routeCode = routeMapping[route]
|
||||||
try:
|
try:
|
||||||
|
@ -159,8 +134,8 @@ def importUniqueRoutes():
|
||||||
distance = float(thisRoute['span'])
|
distance = float(thisRoute['span'])
|
||||||
except:
|
except:
|
||||||
distance = 0
|
distance = 0
|
||||||
obj = UniqueRoute(route=routeObj, is_full=thisRoute['is_full'], distance=distance, from_stop_txt=thisRoute['from'], to_stop_txt=thisRoute['to'])
|
obj = UniqueRoute(route=routeObj, is_full=thisRoute['is_full'], distance=distance, from_stop_txt=thisRoute['from'], to_stop_txt=thisRoute['to'])
|
||||||
if obj.is_full:
|
if obj.is_full: #If the route is the primary route, we can get stop codes easily from RouteDetails first / last stop
|
||||||
from_to = getFromToStopsForRoute(routeObj.code)
|
from_to = getFromToStopsForRoute(routeObj.code)
|
||||||
obj.from_stop = from_to[0]
|
obj.from_stop = from_to[0]
|
||||||
if not stopMapping.has_key(obj.from_stop_txt):
|
if not stopMapping.has_key(obj.from_stop_txt):
|
||||||
|
@ -168,7 +143,7 @@ def importUniqueRoutes():
|
||||||
obj.to_stop = from_to[1]
|
obj.to_stop = from_to[1]
|
||||||
if not stopMapping.has_key(obj.to_stop_txt):
|
if not stopMapping.has_key(obj.to_stop_txt):
|
||||||
stopMapping[obj.to_stop_txt] = from_to[1].stopcd
|
stopMapping[obj.to_stop_txt] = from_to[1].stopcd
|
||||||
else:
|
else: #Else we do fuzzy string matching against all possible values for stopname got from RouteDetails
|
||||||
stopnames = []
|
stopnames = []
|
||||||
stopcodes = []
|
stopcodes = []
|
||||||
for r in RouteDetails.objects.filter(rno=routeObj.code):
|
for r in RouteDetails.objects.filter(rno=routeObj.code):
|
||||||
|
@ -183,10 +158,11 @@ def importUniqueRoutes():
|
||||||
except:
|
except:
|
||||||
stopErrors.append([thisRoute['from'], thisRoute['to']])
|
stopErrors.append([thisRoute['from'], thisRoute['to']])
|
||||||
continue
|
continue
|
||||||
|
|
||||||
obj.save()
|
obj.save()
|
||||||
#pdb.set_trace()
|
#pdb.set_trace()
|
||||||
# print thisRoute['rows'].keys()
|
# print thisRoute['rows'].keys()
|
||||||
for schedule in thisRoute['rows'].keys():
|
for schedule in thisRoute['rows'].keys(): #loop through each schedule per UniqueRoute and save it
|
||||||
row = thisRoute['rows'][schedule]
|
row = thisRoute['rows'][schedule]
|
||||||
try:
|
try:
|
||||||
depot = Depot.objects.get(depot_code=row[6])
|
depot = Depot.objects.get(depot_code=row[6])
|
||||||
|
@ -195,6 +171,8 @@ def importUniqueRoutes():
|
||||||
#pdb.set_trace()
|
#pdb.set_trace()
|
||||||
routeScheduleObj = RouteSchedule(unique_route=obj, schedule_type=schedule, busesAM=noneInt(row[2]), busesN=noneInt(row[3]), busesPM=noneInt(row[4]), bus_type=row[5], depot_txt=row[6], depot=depot, first_from=formatTime(row[8]), last_from=formatTime(row[9]), first_to=formatTime(row[11]), last_to=formatTime(row[12]), runtime1=noneInt(row[14]), runtime2=noneInt(row[15]), runtime3=noneInt(row[16]), runtime4=noneInt(row[17]), headway1=noneInt(row[18]), headway2=noneInt(row[19]), headway3=noneInt(row[20]), headway4=noneInt(row[21]), headway5=noneInt(row[22]))
|
routeScheduleObj = RouteSchedule(unique_route=obj, schedule_type=schedule, busesAM=noneInt(row[2]), busesN=noneInt(row[3]), busesPM=noneInt(row[4]), bus_type=row[5], depot_txt=row[6], depot=depot, first_from=formatTime(row[8]), last_from=formatTime(row[9]), first_to=formatTime(row[11]), last_to=formatTime(row[12]), runtime1=noneInt(row[14]), runtime2=noneInt(row[15]), runtime3=noneInt(row[16]), runtime4=noneInt(row[17]), headway1=noneInt(row[18]), headway2=noneInt(row[19]), headway3=noneInt(row[20]), headway4=noneInt(row[21]), headway5=noneInt(row[22]))
|
||||||
routeScheduleObj.save()
|
routeScheduleObj.save()
|
||||||
|
|
||||||
|
#done saving things - write out error files:
|
||||||
errors = open(join(PROJECT_ROOT, "../errors/routeMasterMissingRoutes.json"), "w")
|
errors = open(join(PROJECT_ROOT, "../errors/routeMasterMissingRoutes.json"), "w")
|
||||||
errors.write(json.dumps(routeDoesNotExistErrors, indent=2))
|
errors.write(json.dumps(routeDoesNotExistErrors, indent=2))
|
||||||
errors.close()
|
errors.close()
|
||||||
|
@ -227,12 +205,20 @@ def formatTime(s):
|
||||||
except:
|
except:
|
||||||
return datetime.time(0,0)
|
return datetime.time(0,0)
|
||||||
|
|
||||||
|
'''
|
||||||
|
Silly function to deal wth invalid strings in the data that need to go in as Integers into the db
|
||||||
|
passed a string, it will either return int(string) or None if that fails for any reason
|
||||||
|
FIXME: find a more elegant way to do this
|
||||||
|
'''
|
||||||
def noneInt(val):
|
def noneInt(val):
|
||||||
try:
|
try:
|
||||||
return int(val)
|
return int(val)
|
||||||
except:
|
except:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
'''
|
||||||
|
Passed a route code, it gets stop codes for the first and last stop
|
||||||
|
'''
|
||||||
def getFromToStopsForRoute(routeCode):
|
def getFromToStopsForRoute(routeCode):
|
||||||
# fromStr = row[2]
|
# fromStr = row[2]
|
||||||
routeDetails = RouteDetails.objects.filter(rno=routeCode).order_by('stopsr')
|
routeDetails = RouteDetails.objects.filter(rno=routeCode).order_by('stopsr')
|
||||||
|
@ -243,7 +229,12 @@ def getFromToStopsForRoute(routeCode):
|
||||||
return (fromStop, toStop,)
|
return (fromStop, toStop,)
|
||||||
|
|
||||||
|
|
||||||
|
'''
|
||||||
|
checks whether the row in a set of rows for a route has the largest 'span' value, useful to tell if a row belongs to a primary route
|
||||||
|
params:
|
||||||
|
data - dict with a span attribute
|
||||||
|
arr - array of rows to check if data['span'] is greater than. span is at row[13]
|
||||||
|
'''
|
||||||
def isLargestSpan(data, arr):
|
def isLargestSpan(data, arr):
|
||||||
span = data['span']
|
span = data['span']
|
||||||
for a in arr:
|
for a in arr:
|
||||||
|
@ -265,12 +256,15 @@ def isLargestSpan(data, arr):
|
||||||
def isNotUnique(data, arr):
|
def isNotUnique(data, arr):
|
||||||
i = 0
|
i = 0
|
||||||
for a in arr:
|
for a in arr:
|
||||||
if a['from'] == data['from'] and a['to'] == data['to'] and a['span'] == data['span']:
|
if a['from'] == data['from'] and a['to'] == data['to']:
|
||||||
return i
|
return i
|
||||||
i += 1
|
i += 1
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
'''
|
||||||
|
Create routeMapping.json file to map route aliases to route codes
|
||||||
|
TODO: add mappings from hard coded routes
|
||||||
|
'''
|
||||||
def getRouteCodes():
|
def getRouteCodes():
|
||||||
atlasRawCSV = csv.reader(open(join(PROJECT_ROOT, "../db_csv_files/AtlasRaw.csv"), "r"), delimiter="\t")
|
atlasRawCSV = csv.reader(open(join(PROJECT_ROOT, "../db_csv_files/AtlasRaw.csv"), "r"), delimiter="\t")
|
||||||
atlasDict = json.loads(open(join(PROJECT_ROOT, "../db_csv_files/Atlas.json")).read())
|
atlasDict = json.loads(open(join(PROJECT_ROOT, "../db_csv_files/Atlas.json")).read())
|
||||||
|
@ -290,6 +284,28 @@ def getRouteCodes():
|
||||||
mappingFile.close()
|
mappingFile.close()
|
||||||
|
|
||||||
|
|
||||||
|
'''
|
||||||
|
Import RouteMaster into db
|
||||||
|
'''
|
||||||
|
def importRouteMaster():
|
||||||
|
CsvFile = csv.reader(open(join(PROJECT_ROOT, "../db_csv_files/RouteMaster.csv"), "r"), delimiter=',')
|
||||||
|
test = CsvFile.next()
|
||||||
|
stop_errors = []
|
||||||
|
print test
|
||||||
|
for row in CsvFile:
|
||||||
|
if len(row) < 1:
|
||||||
|
continue
|
||||||
|
from_to = getFromToStopsForRoute(row[0])
|
||||||
|
if from_to is None:
|
||||||
|
stop_errors.append(row[0])
|
||||||
|
continue
|
||||||
|
print row[0]
|
||||||
|
obj = Route(code=row[0], alias=row[1], from_stop_txt=row[2], to_stop_txt=row[3], from_stop=from_to[0], to_stop=from_to[1], distance=row[4], stages=int(row[5]))
|
||||||
|
obj.save()
|
||||||
|
errors = open(join(PROJECT_ROOT, "../errors/routeStopErrors.json"), "w")
|
||||||
|
errors.write(json.dumps(stop_errors, indent=2))
|
||||||
|
errors.close()
|
||||||
|
|
||||||
|
|
||||||
def csvClean1():
|
def csvClean1():
|
||||||
atlasCSV = csv.reader(open(join(PROJECT_ROOT, "../db_csv_files/Atlas.csv"), "r"), delimiter="\t")
|
atlasCSV = csv.reader(open(join(PROJECT_ROOT, "../db_csv_files/Atlas.csv"), "r"), delimiter="\t")
|
||||||
|
|
|
@ -2,6 +2,9 @@ from django.contrib import admin
|
||||||
from django import forms
|
from django import forms
|
||||||
from mumbai.models import *
|
from mumbai.models import *
|
||||||
|
|
||||||
|
class RouteScheduleInline(admin.StackedInline):
|
||||||
|
model = RouteSchedule
|
||||||
|
|
||||||
class AreaAdmin(admin.ModelAdmin):
|
class AreaAdmin(admin.ModelAdmin):
|
||||||
list_display = ("a_code", "areanm")
|
list_display = ("a_code", "areanm")
|
||||||
list_editable = ("areanm",)
|
list_editable = ("areanm",)
|
||||||
|
@ -26,6 +29,9 @@ class FareAdmin(admin.ModelAdmin):
|
||||||
models.TextField: {'widget': forms.TextInput},
|
models.TextField: {'widget': forms.TextInput},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class UniqueRouteAdmin(admin.ModelAdmin):
|
||||||
|
inlines = [RouteScheduleInline]
|
||||||
|
|
||||||
|
|
||||||
class StopForm(forms.ModelForm):
|
class StopForm(forms.ModelForm):
|
||||||
|
|
||||||
|
@ -129,3 +135,5 @@ admin.site.register(Landmark, LandmarkAdmin )
|
||||||
admin.site.register(Depot,DepotAdmin)
|
admin.site.register(Depot,DepotAdmin)
|
||||||
admin.site.register(Holiday,HolidayAdmin)
|
admin.site.register(Holiday,HolidayAdmin)
|
||||||
admin.site.register(StopLocation,StopLocationAdmin)
|
admin.site.register(StopLocation,StopLocationAdmin)
|
||||||
|
|
||||||
|
admin.site.register(UniqueRoute, UniqueRouteAdmin)
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user