From 93aa6eab811e7f61b18e7aa3c7d4f24566891ba4 Mon Sep 17 00:00:00 2001 From: Johnson Chetty Date: Fri, 30 Mar 2012 09:33:51 +0200 Subject: [PATCH] fix_missing changed - algo tweak 3 --- chaloBEST/imports/data_mapper.py | 10 +- chaloBEST/imports/fix_missing_atlas_data.py | 102 ++++++++++++++++++-- chaloBEST/imports/postload_cleanup.py | 11 +-- 3 files changed, 102 insertions(+), 21 deletions(-) diff --git a/chaloBEST/imports/data_mapper.py b/chaloBEST/imports/data_mapper.py index dfbaeaf..9bebad2 100644 --- a/chaloBEST/imports/data_mapper.py +++ b/chaloBEST/imports/data_mapper.py @@ -237,9 +237,9 @@ def loadFKinRouteDetail(): good_saves+=1 except: rd.route=None - err.append({"data":rd.route_code, "error":["Route Not Found in Route"]}) + err.append({"data":{"route_code":rd.route_code, "id":rd.id,"serial":rd.serial } , "error":"route Not Found in Route"}) - #errors = open(join(PROJECT_ROOT, "../errors/RouteNotFoundErrors.json"), "w") + errors = open(join(PROJECT_ROOT, "../errors/RouteNotFoundErrors.json"), "w") size = len(err) print "No. of Routes in RouteDetail mapped to Route: " , str(good_saves) print "No. of Routes in RouteDetail not mapped to Route: " , str(size) @@ -247,8 +247,8 @@ def loadFKinRouteDetail(): if (size != 0) : print "See /errors/RouteNotFoundErrors.json for details" - #errors.write(json.dumps(err, indent=2)) - #errors.close() + errors.write(json.dumps(str(err), indent=2)) + errors.close() return err @@ -281,7 +281,7 @@ def CsvLoader(thismodel): #print "Error:", str(entry) + '\t' + str(sys.exc_info()[0]) + '\n' errors = open(join(PROJECT_ROOT, "../errors/"+ thismodel + "Errors.json"), 'w') - errors.write(json.dumps(globalerr, indent=2)) + errors.write(json.dumps(str(globalerr), indent=2)) errors.close() diff --git a/chaloBEST/imports/fix_missing_atlas_data.py b/chaloBEST/imports/fix_missing_atlas_data.py index ae2db12..524c9df 100644 --- a/chaloBEST/imports/fix_missing_atlas_data.py +++ b/chaloBEST/imports/fix_missing_atlas_data.py @@ -14,21 +14,27 @@ def fix_distances(): if detail.stop.id == to_stop: details.reverse() break + # setup vars distance = 0.0 record = False last_stop_passed = False - for detail in details: - # basic idea, run thru each detail, if it has km info, then add it, if to_stop reached, and if it did not have km info, then go to the next detail having km info add it and done. - # For route 240RING, some detail.km is null??? - - # distance > 0 because of 100RING returning 1 stop shy of its start + + for detail in details: + # basic idea, run thru each detail, + # if it has km info, then add it, + # if to_stop reached, and if it did not have km info, + # then go to the next detail having km info add it and done. + + # For route 240RING, some detail.km is null??? + # distance > 0 because of 100RING returning 1 stop shy of its start if distance > 0 and detail.stop.id == to_stop: last_stop_passed = True # is a stage if record and detail.km: if not last_stop_passed: + # add it distance += float(detail.km) else: # if stage having km info reached after last stop, then add and exit loop @@ -37,10 +43,11 @@ def fix_distances(): last_stop_passed = True break - #if record and distance > 0 and detail.stop.id == to_stop and last_stop_reached: + #if record and distance > 0 and detail.stop.id == to_stop: # record = False # break + # Start recording *after* we check for the break, because, # if from_stop == to_stop, we don't want to break on the first stop if detail.stop.id == from_stop: record = True @@ -130,3 +137,86 @@ def fix_missing_runtimes(): if column != "runtime4": print Exception("ERR fix_missing_runtimes: %s STILL missing %s!" % (schedule, column)) + + + + + +hcolumns = ["headway%d" % n for n in range(1,6)] +def fix_missing_headways(): + for schedule in RouteSchedule.objects.all(): + # other schedules for the same unique route (but at different times) + sibling_schedules = schedule.unique_route.routeschedule_set.all() + # the "full" schedules for this route are used to attempt to + # guestimate the schedules of partial subroutes + related_routes = related_schedules = [] + related_subroutes = list(schedule.unique_route.route.uniqueroute_set.all()) + + # the main inner loop: for each headway column --- + for col_idx, column in enumerate(hcolumns): + # if the headway is set, AWESOME, bail + if getattr(schedule, column): continue + + # try to use the previous column....if available + if getattr(schedule, column): continue + if col_idx > 0: + prev_headway = getattr(schedule, columns[col_idx-1]) + if prev_headway: + setattr(schedule, column, prev_headway) + continue + + # ... or the next column, if it comes to that. + if col_idx < len(columns)-1: + next_headway = getattr(schedule, columns[col_idx+1]) + if next_headway: + setattr(schedule, column, next_headway) + continue + + # otherwise, go through the other schedules for this subroute and + # see if we get a matching headway -- if so, use it + for sibling in sibling_schedules: + sibling_headway = getattr(sibling, column) + if sibling_headway: + setattr(schedule, column, sibling_headway) + # print "OK fix_missing_headways: %s %s fixed to %s" % (schedule, column, sibling) + break + + if getattr(schedule, column): continue + # otherwise, go through the matching schedules for the full-length versions of this + # route and extrapolate the headway. + if related_subroutes: + # first, try to get the schedules for the full routes, given the same schedule type + related_schedules = [] + for related_route in related_subroutes: + related_schedules += list(related_route.routeschedule_set.filter(schedule_type=schedule.schedule_type)) + + # iterate over them and see if we got one with the right headway + for related_schedule in related_schedules: + related_headway = getattr(related_schedule, column) + if related_headway: + # if so, compute the partial headway of this schedule as the (possibly > 1.0) fraction of headway of the other schedule by distance + partial_headway = related_headway*float(schedule.unique_route.distance)/float(related_schedule.unique_route.distance) + # print "OK fix_missing_headways: %s %s adjusted to parent %s" % (schedule, column, related_schedule) + setattr(schedule, column, partial_headway) + break + + # did we find a headway? great, use it + if getattr(schedule, column): continue + + # failing that, try to get the schedules for the full routes, with ANY schedule type + for related_route in related_subroutes: + related_schedules += list(related_route.routeschedule_set.all()) + + # iterate over them and see if we got one with the right headway + for related_schedule in related_schedules: + related_headway = getattr(related_schedule, column) + if related_headway: + # if so, compute the partial headway of this schedule as the (possibly > 1.0) fraction of headway of the other schedule by distance + partial_headway = related_headway*float(schedule.unique_route.distance)/float(related_schedule.unique_route.distance) + # print "OK fix_missing_headways: %s %s adjusted to parent %s" % (schedule, column, related_schedule) + setattr(schedule, column, partial_headway) + break + + if column != "headway5": + print Exception("ERR fix_missing_headways: %s STILL missing %s!" % (schedule, column)) + diff --git a/chaloBEST/imports/postload_cleanup.py b/chaloBEST/imports/postload_cleanup.py index 5cd1d33..ddf7cca 100644 --- a/chaloBEST/imports/postload_cleanup.py +++ b/chaloBEST/imports/postload_cleanup.py @@ -1,6 +1,6 @@ from mumbai.models import * -#"Road, Area, Landmark, Stop" +# for Road, Area, Landmark, Stop def copynames2display_name(): print "Copying names to display_name field..." for obj in Stop.objects.all(): @@ -18,12 +18,3 @@ def copynames2display_name(): -""" -def addStopstoRoutes(): - print "Getting stops linked to Routes..." - for r in Route.objects.all(): - try: - r.stop = Stop.objects.get(name=) -""" - -