various tweaks to gtfs, data import, trivial change to models

2012-07-25 14:01:42 +02:00 · 2012-07-25 14:01:42 +02:00 · fe4aa2ba90
commit fe4aa2ba90
parent 75586554e0 427fdb99a3
3 changed files with 103 additions and 23 deletions
--- a/chaloBEST/gtfs/gtfs_export.py
+++ b/chaloBEST/gtfs/gtfs_export.py
@ -7,6 +7,7 @@ import sys
 import datetime
 from itertools import dropwhile
 import copy    
+from fuzzywuzzy import process as fuzzprocess


 def routeWithLocationData(route):
@ -396,7 +397,16 @@ badroutes=set()
 def get_routedetail_subset(unr, direction):
    """
    Given a uniqueroute, gets the list of stops in it.    
+    Algo. 
+    1. get routedetail(rd) list
+    2. check if rds are reversed
+    3. select stops if they are in UP or DOWN route
+    4. get indexes of the stops in the list.
+    5. splice the list acc to positions
+    6. check if full route, if yes, then ignore calculations, send entire list (only filter up/down stops)
+    
    """
+
    from_stop = unr.from_stop
    to_stop = unr.to_stop    
    code=str(unr.route.code)[3]
@ -430,7 +440,7 @@ def get_routedetail_subset(unr, direction):
            
    rdlist = lst            
    
-    # get indexes
+    # 30 lines below only to get the index positions of the from and to stops in the list. 
    from_index = -1
    to_index= -1

@ -465,19 +475,26 @@ def get_routedetail_subset(unr, direction):
        print "To-Stop not found in Route Details for unr.id", unr.id , " unr.to_stop_txt=", unr.to_stop_txt
        mismatched_unrs['to'].append({"unr":unr,"unr_to_stop_txt":unr.to_stop_txt,"unr_to_stop":unr.to_stop, "route":unr.route})
       
+    # indexes found , splice list

    rd_subset = rdlist[from_index:to_index+1]

+    # if ring route
                    
    if code == 'R' or code == '4':
        # ring specific code here. 
        # converts the given ring route subset to double size.
        # if ring route subset 
-        #if False:# not (unr.from_stop.id !=rdlist[0].stop.id and unr.to_stop.id !=rdlist[len(rdlist)-1]).stop.id:
-        #if not unr.is_full:        
-        rd_temp = copy.deepcopy(rd_subset)
-        rd_temp.reverse()
-        rd_subset.extend(rd_temp[1:])        
+        #if not (unr.from_stop.id !=rdlist[0].stop.id and unr.to_stop.id !=rdlist[len(rdlist)-1]).stop.id:
+        if not unr.is_full:       
+            # if it is a subset of the full ring route, then routedetails
+            rd_temp = copy.deepcopy(rd_subset)
+            rd_temp.reverse()
+            rd_subset.extend(rd_temp[1:])        
+        else:
+            # if full ring route, ignore splicing calculations and send route based only on "UP/DOWN" filtering
+            return rdlist
+

    if not direction in ["UP", "up", "U"]:
        rd_subset.reverse()
@ -489,10 +506,25 @@ def get_routedetail_subset(unr, direction):

    return rd_subset

+def check_route_and_rds():
+    """
+    because the full routedetails is given for the route, just a sanity check to make sure 
+    route from/to stop ids match with the ones in the routedetails
+  
+    """
+
+    lst = set()
+    for r in Route.objects.all():
+        rds = r.routedetail_set.all()
+        if r.from_stop != rds[0].stop:
+            lst.add(r)
+        if  r.to_stop != rds[len(rds)-1].stop:
+            lst.add(r)
+    return lst

 def get_bad_routes():
    """ 
-    Gets a list of routes wich have less than five routedetails or stops inany of their uniqueroutes.
+    Gets a list of routes which have less than five routedetails or stops inany of their uniqueroutes.
    """
    bad_routes=set()
    for unr in UniqueRoute.objects.all():        
@ -660,14 +692,6 @@ def getRoutesWBadRDs(cnt):
            routes.append(r)
    return routes

-def export_atlas():    
-    f = csv.writer(open(join(PROJECT_ROOT, "gtfs", "gtfs_mumbai_bus", "recomputed_atlas.csv"), "w"), delimiter="\t", quotechar='"', quoting=csv.QUOTE_MINIMAL)
-    f.writerow(["RouteCode","RouteAlias","BusesAM","BusesNoon","BusesPM","BusType","Depot","FromStopID","FromStopName","FirstFrom","LastFrom","ToStopID","ToStopName","FirstTo","LastTo","RouteSpan","rt1","rt2","rt3","rt4","headway1","headway2","headway3","headway4","headway5","ScheduleType"])
-    for unr in UniqueRoute.objects.all().order_by("route__code"):
-        for rs in unr.routeschedule_set.all().order_by("schedule_type"):
-            f.writerow([unr.route.code, unr.route.alias, rs.busesAM,rs.busesN,rs.busesPM,rs.bus_type,rs.depot_txt,unr.from_stop.id,unr.from_stop.name,rs.first_from,rs.last_from, unr.to_stop.id, unr.to_stop.name, rs.first_to, rs.last_to, unr.distance,rs.runtime1,rs.runtime2,rs.runtime3,rs.runtime4,rs.headway1,rs.headway2,rs.headway3,rs.headway4,rs.headway5, rs.schedule_type])
-
-
 def export_stop_times2(routelist):
    f = make_csv_writer("stop_times.txt")
    f.writerow(["trip_id","arrival_time","departure_time","stop_id","stop_sequence"])
@ -1412,7 +1436,66 @@ def makeStopList():
        f.write(line+ "\n")

    f.close()
+
+def get_rd_distance(unr, direction):
+    details= get_routedetail_subset(unr,direction)
+    dist =0.0
+    for seq, detail in enumerate(details):
+        blankstops=0
+        if detail.km:
+            dist+=float(detail.km)
+            blankstops=0
+        else:
+            blankstops+=1
+            if seq == len(details) - 1:
+                dist+=float(0.3*blankstops)
+                
+    return dist
+
+def export_atlas():    
+    import codecs
+    f =codecs.open(join(PROJECT_ROOT, "gtfs", "recomputed_atlas.csv"), "w", "utf-8")
+
+    f = csv.writer(open(join(PROJECT_ROOT, "gtfs", "gtfs_mumbai_bus", "recomputed_atlas.csv"), "w"), delimiter="\t", quotechar='"', quoting=csv.QUOTE_MINIMAL)
+    f.writerow(["RouteCode","RouteAlias","BusesAM","BusesNoon","BusesPM","BusType","Depot","FromStopCode","FromStopName","FromStopOriginal","FirstFrom","LastFrom","ToStopCode","ToStopName","ToStopOriginal","FirstTo","LastTo","rt1","rt2","rt3","rt4","headway1","headway2","headway3","headway4","headway5","ScheduleType","RouteSpan/AtlasDistance", "DistanceMasterRoute", "DistanceRouteDetailUP","DistanceRouteDetailDOWN", "mismatchedfromstop","mismatchedtostop","DaysOfRun" ])
+    for unr in UniqueRoute.objects.all().order_by("route__code"):
+        for rs in unr.routeschedule_set.all().order_by("schedule_type"):
+            bus_type= RouteType.objects.get(code=str(unr.route.code)[3]).faretype 
+            dist_up=get_rd_distance(unr,"UP")
+            dist_down=get_rd_distance(unr,"DOWN")
            
+            f.writerow([
+                    unr.route.code, 
+                    unr.route.alias, 
+                    rs.busesAM,
+                    rs.busesN,
+                    rs.busesPM,
+                    bus_type,
+                    rs.depot_txt,
+                    unr.from_stop.code,
+                    unr.from_stop.name,
+                    unr.from_stop_txt,
+                    #unr.from_stop.name_mr.encode('utf-8'),
+                    rs.first_from,
+                    rs.last_from, 
+                    unr.to_stop.code, 
+                    unr.to_stop.name,
+                    unr.to_stop_txt,
+                    #unr.to_stop.name_mr.encode('utf-8'), 
+                    rs.first_to, 
+                    rs.last_to, 
+                    rs.runtime1,rs.runtime2,rs.runtime3,rs.runtime4,
+                    rs.headway1,rs.headway2,rs.headway3,rs.headway4,rs.headway5, 
+                    rs.schedule_type,
+                    unr.distance,
+                    unr.route.distance,
+                    dist_up,
+                    dist_down,
+                    1 if 70 > fuzzprocess.ratio(unr.from_stop.name.lower(),unr.from_stop_txt.lower()) else 0,
+                    1 if 70 > fuzzprocess.ratio(unr.to_stop.name.lower(),unr.to_stop_txt.lower()) else 0,
+                    SCHED[rs.schedule_type].__str__().strip('[]')
+                    ])
+ 


 def fire_up(routelist):
--- a/chaloBEST/imports/data_mapper.py
+++ b/chaloBEST/imports/data_mapper.py
@ -59,9 +59,6 @@ def Route_save(entry):
    # but load nulls into db anyway
    # IndexError implies that nothing was mapped.
    # MultipleRows found indicates ambiguity when there should'nt be.
-
-   
-
    #f_stop = Stop.objects.get(name=str(entry[2]))[0]
    #t_stop = Stop.objects.get(name=str(entry[3]))[0]
    
@ -160,8 +157,8 @@ def Stop_save(entry):
 # There is no model as StopMarathi/AreaMarathi, but this is done to separate errors arising from different files, and also that the Marathi names should be done after the Stop and Area entities have been fully loaded cuz thats how we get them from BEST.

 def StopMarathi_save(entry):
-    obj = Stop.objects.get(code=int(entry[0])) 
-    obj.name_mr = unicode(entry[1], 'utf-8')
+    obj = Stop.objects.get(id=int(entry[0])) 
+    obj.name_mr = unicode(entry[3], 'utf-8')
    obj.save()
    #print obj.__dict__  

--- a/chaloBEST/mumbai/models.py
+++ b/chaloBEST/mumbai/models.py
@ -37,7 +37,7 @@ SCHED = {
    'MS&HOL':[1,2,3,4,5,6,8], 
    'FW':[1,2,3,4,5,6,7], 
    'SAT/SH':[6,7,8], 
-    'FH':['???'], 
+    'FH':[1,2,3,4,5,6,8], 
    'SAT&HOL':[6,8], 
    'SAT&SH':[6,7,8], 
    'SAT/SUND&HOL':[6,7,8], 
@ -321,9 +321,9 @@ class RouteSchedule(models.Model):
    busesAM = models.IntegerField(blank=True, null=True)
    busesN = models.IntegerField(blank=True, null=True)
    busesPM = models.IntegerField(blank=True, null=True)
-    bus_type = models.CharField(max_length=3, default="SD")
+    bus_type = models.CharField(max_length=3, default="SD", blank=True)
    depot_txt = models.CharField(max_length=16, blank=True)
-    depot = models.ForeignKey("Depot", null=True)
+    depot = models.ForeignKey("Depot", null=True, blank=True)
    first_from = models.TimeField(blank=True, null=True)
    last_from = models.TimeField(blank=True, null=True)
    first_to = models.TimeField(blank=True, null=True)