From dfb01db6e367e31c76f8c6530ac5d882712b00a5 Mon Sep 17 00:00:00 2001 From: Johnson Chetty Date: Wed, 28 Mar 2012 13:40:42 +0200 Subject: [PATCH] fix_missing changed - algo tweak , data_mapper restored --- chaloBEST/imports/data_mapper.py | 217 +++++++++++++++++++- chaloBEST/imports/fix_missing_atlas_data.py | 63 ++++-- chaloBEST/mumbai/models.py | 2 +- 3 files changed, 254 insertions(+), 28 deletions(-) diff --git a/chaloBEST/imports/data_mapper.py b/chaloBEST/imports/data_mapper.py index 5d3f54a..dfbaeaf 100644 --- a/chaloBEST/imports/data_mapper.py +++ b/chaloBEST/imports/data_mapper.py @@ -1,6 +1,212 @@ +from mumbai.models import * +import csv +from settings import PROJECT_ROOT +from os.path import join +import json +import datetime +import sys +from django.contrib.gis.geos import Point +from imports.import_atlas import getFromToStopsForRoute, importUniqueRoutes +from imports import postload_cleanup as postclean +from decimal import Decimal + +globalerr = [] + +def RouteType_save(entry): + obj = RouteType(code=entry[0], rtype=entry[1], faretype=entry[2]) + obj.save() + #print obj.__dict__ + +def getFromToStopsFromRouteDetails(code): + routeDetails = RouteDetail.objects.filter(route_code=code).order_by('serial') + if routeDetails.count() == 0: + return None + fromStop = routeDetails[0].stop + toStop = routeDetails[routeDetails.count() -1].stop + return (fromStop, toStop,) + +def Route_save(entry): + """ + try: + f_stop = Stop.objects.filter(name=str(entry[2]))[0] + except IndexError: + f_stop = None + + try: + t_stop = Stop.objects.filter(name=str(entry[3]))[0] + except IndexError: + t_stop = None + """ + from_to = getFromToStopsFromRouteDetails(entry[0]) + if from_to is None: + globalerr.append({"data" :entry[0], error:["Route not found"]}) + + #obj = Route(code=entry[0], alias=entry[1], from_stop_txt=entry[2], to_stop_txt=entry[3], from_stop=from_to[0], to_stop=from_to[1], distance=Decimal(entry[4]), stages=int(entry[5])) + + + obj = Route( + code=str(entry[0]), + alias=str(entry[1]), + from_stop=from_to[0], + from_stop_txt=str(entry[2]), + to_stop_txt=str(entry[3]), + to_stop=from_to[1], + distance=Decimal(entry[4]), + stages=int(entry[5])) + obj.save() + + # throw an error if the stops mapped do not exist. + # but load nulls into db anyway + # IndexError implies that nothing was mapped. + # MultipleRows found indicates ambiguity when there should'nt be. + + + + #f_stop = Stop.objects.get(name=str(entry[2]))[0] + #t_stop = Stop.objects.get(name=str(entry[3]))[0] + + #print obj.__dict__ + +def HardCodedRoute_save(entry): + obj = HardCodedRoute(code=str(entry[0]), alias=entry[1], faretype=entry[2]) + obj.save() + #print obj.__dict__ + +def Depot_save(entry): + obj = Depot( + code=str(entry[0]), + name=str(entry[1]), + stop=int(entry[2]) + ) + obj.save() + #print obj.__dict__ + +def Holiday_save(entry): + date_format = entry[0].rsplit('.') + theday = int(date_format[0]) + themonth = int(date_format[1]) + theyear = int('20'+ date_format[2]) + obj = Holiday(date=datetime.date(day=theday, month=themonth, year=theyear), name=str(entry[1])) + obj.save() + #print obj.__dict__ + +def RouteDetail_save(entry): + + temp_stop=Stop.objects.get(code=int(entry[2])) + """try: + temp_route=Route.objects.get(code=str(entry[0])) + except: + temp_route=None + try: + temp_stop=Stop.objects.get(code=int(entry[2])) + except: + temp_stop=None + """ + obj = RouteDetail( + route_code = entry[0], + route = None, + serial=int(entry[1]), + stop= temp_stop, + stage=(lambda:entry[3].startswith('1'), lambda:None)[ entry[3] == '' ](), + km=(lambda:None,lambda:Decimal(entry[4]))[ entry[4] != '' ]()) + obj.save() + #print obj.__dict__ + +def Road_save(entry): + obj = Road(code=int(entry[0]), name=str(entry[1])) + obj.save() + #print obj.__dict__ + +def Fare_save(entry): + obj = Fare( + slab=Decimal(entry[0]), + ordinary=int(entry[1]), + limited=int(entry[2]), + express=int(entry[3]), + ac=int(entry[4]), + ac_express=int(entry[5]) + ) + obj.save() + #print obj.__dict__ + +def Area_save(entry): + obj = Area(code=int(entry[0]), name= str(entry[1])) + obj.save() + #print obj.__dict__ + +def Stop_save(entry): + + _road = Road.objects.get(code=int(entry[4])) + _area = Area.objects.get(code=int(entry[5])) + try: + _depot = Depot.objects.filter(code=str(entry[6]))[0] + except IndexError: + _depot = None + + obj = Stop( + code=int(entry[0]), + name=str(entry[1]), + dbdirection=str(entry[2]), + chowki=(entry[3]).startswith('TRUE'), + road=_road, + area=_area, + depot=_depot + ) + + obj.save() + #print obj.__dict__ + +# There is no model as StopMarathi/AreaMarathi, but this is done to separate errors arising from different files, and also that the Marathi names should be done after the Stop and Area entities have been fully loaded cuz thats how we get them from BEST. + +def StopMarathi_save(entry): + obj = Stop.objects.get(code=int(entry[0])) + obj.name_mr = unicode(entry[1], 'utf-8') + obj.save() + #print obj.__dict__ + +def AreaMarathi_save(entry): + obj = Area.objects.get(code=int(entry[0])) + obj.name_mr = unicode(entry[1], 'utf-8') + obj.save() + #print obj.__dict__ + +loc1s = 0 +loc2s = 0 + +class NoPointsFoundError(Exception): + pass + +def StopLocation_save(entry): + this_stop = Stop.objects.get(code=int(entry[4])) + + #hits = {'one':[],'two':[],'three':[],'four':[]} + + flagerr = 0 + + if entry[0] and entry[1]: + loc1 = StopLocation(stop=this_stop, point=Point(float(entry[1]), float(entry[0])),direction='U' ) + loc1.save() + #loc1s+=1 + else: + flagerr=1 + + if entry[2] and entry[3]: + loc2 = StopLocation(stop=this_stop, point=Point(float(entry[3]), float(entry[2])),direction='D' ) + loc2.save() + #loc2s+=1 + else: + flagerr+=1 + + if flagerr == 2: + flagerr = 0 + raise NoPointsFoundError + + #print "Loc1s found : ", loc1s + #print "Loc2s found : ", loc2s -saveorder = ["Fare","Holiday","Area","Road","Depot","Stop", "StopMarathi","AreaMarathi","RouteDetail", "Route","RouteType","HardCodedRoute"] + +saveorder = ["Fare","Holiday","Area","Road","Depot","Stop", "StopMarathi","AreaMarathi","RouteDetail", "Route","RouteType","HardCodedRoute","StopLocation" ] mappingtosave = { "Fare":Fare_save, @@ -14,10 +220,11 @@ mappingtosave = { "RouteType":RouteType_save, "HardCodedRoute":HardCodedRoute_save, "StopMarathi":StopMarathi_save, - "AreaMarathi":AreaMarathi_save -} - + "AreaMarathi":AreaMarathi_save, + "StopLocation":StopLocation_save # There is no model as StopMarathi/AreaMarathi, but this is done to separate errors arising from different input files. + +} def loadFKinRouteDetail(): err=[] @@ -94,7 +301,7 @@ def fire_up(): # also importUniqueRoutes() print "loading UniqueRoute..." - postclean.copydefaultStopLocations() + #postclean.copydefaultStopLocations() postclean.copynames2display_name() #---------------------------------------------------------- diff --git a/chaloBEST/imports/fix_missing_atlas_data.py b/chaloBEST/imports/fix_missing_atlas_data.py index ed35466..f2cab9e 100644 --- a/chaloBEST/imports/fix_missing_atlas_data.py +++ b/chaloBEST/imports/fix_missing_atlas_data.py @@ -4,27 +4,45 @@ from mumbai.models import * def fix_distances(): for unique_route in UniqueRoute.objects.all(): - # RouteDetail sometimes isn't order from from_stop to to_stop + # RouteDetail sometimes isn't in order from from_stop to to_stop from_stop, to_stop = unique_route.from_stop.id, unique_route.to_stop.id details = list(unique_route.route.routedetail_set.all()) # Sometimes to_stop comes before from_stop in RouteDetail. What is there to say. for detail in details: if detail.stop.id == from_stop: break + if detail.stop.id == to_stop: details.reverse() break distance = 0.0 record = False - for detail in details: + last_stop_passed = False + for detail in details: + # basic idea, run thru each detail, if it has km info, then add it, if to_stop reached, and if it did not have km info, then go to the next detail having km info add it and done. # For route 240RING, some detail.km is null??? - if record and detail.km: distance += float(detail.km) - # distance > 0 because of 100RING returning 1 stop shy of its start - if record and distance > 0 and detail.stop.id == to_stop: - record = False - break + + # is a stage + if record and detail.km: + if not last_stop_passed: + distance += float(detail.km) + else: + # if stage having km info reached after last stop, then add and exit loop + distance += float(detail.km) + record=False + last_stop_passed = False + break + # distance > 0 because of 100RING returning 1 stop shy of its start + if distance > 0 and detail.stop.id == to_stop: + last_stop_passed = True + + #if record and distance > 0 and detail.stop.id == to_stop and last_stop_reached: + # record = False + # break + # Start recording *after* we check for the break, because, # if from_stop == to_stop, we don't want to break on the first stop if detail.stop.id == from_stop: record = True + if record: print Exception("UniqueRoute %s from %s to %s ran off the end while measuring distance!" %(unique_route, unique_route.from_stop.code, unique_route.to_stop.code)) if not distance: @@ -47,6 +65,22 @@ def fix_missing_runtimes(): for col_idx, column in enumerate(columns): # if the runtime is set, AWESOME, bail if getattr(schedule, column): continue + + # try to use the previous column....if available + if getattr(schedule, column): continue + if col_idx > 0: + prev_runtime = getattr(schedule, columns[col_idx-1]) + if prev_runtime: + setattr(schedule, column, prev_runtime) + continue + + # ... or the next column, if it comes to that. + if col_idx < len(columns)-1: + next_runtime = getattr(schedule, columns[col_idx+1]) + if next_runtime: + setattr(schedule, column, next_runtime) + continue + # otherwise, go through the other schedules for this subroute and # see if we get a matching runtime -- if so, use it for sibling in sibling_schedules: @@ -92,20 +126,5 @@ def fix_missing_runtimes(): setattr(schedule, column, partial_runtime) break - # OTHER-otherwise, use the previous column.... - if getattr(schedule, column): continue - if col_idx > 0: - prev_runtime = getattr(schedule, columns[col_idx-1]) - if prev_runtime: - setattr(schedule, column, prev_runtime) - continue - - # ... or the next column, if it comes to that. - if col_idx < len(columns)-1: - next_runtime = getattr(schedule, columns[col_idx+1]) - if next_runtime: - setattr(schedule, column, next_runtime) - continue - if column != "runtime4": print Exception("ERR fix_missing_runtimes: %s STILL missing %s!" % (schedule, column)) diff --git a/chaloBEST/mumbai/models.py b/chaloBEST/mumbai/models.py index 9c1a2c6..9f9f4cd 100644 --- a/chaloBEST/mumbai/models.py +++ b/chaloBEST/mumbai/models.py @@ -353,7 +353,7 @@ class StopLocation(models.Model): class Depot(models.Model): - code = models.CharField(max_length=5, unique=True) + code = models.CharField(max_length=5) # should have unique=True ? name = models.TextField(max_length=50) stop = models.IntegerField()