From 8dc7f32ca453424f4ed9f08fdd076d0932641082 Mon Sep 17 00:00:00 2001 From: Sanj Date: Fri, 24 Feb 2012 21:02:56 +0530 Subject: [PATCH] import scripts for stations and trains done --- indianrails/trains/imports.py | 169 ++++++++++++++++++++++++++++++++++ indianrails/trains/models.py | 16 ++-- 2 files changed, 177 insertions(+), 8 deletions(-) create mode 100644 indianrails/trains/imports.py diff --git a/indianrails/trains/imports.py b/indianrails/trains/imports.py new file mode 100644 index 0000000..1218ace --- /dev/null +++ b/indianrails/trains/imports.py @@ -0,0 +1,169 @@ +from models import * +from settings import DATA_DIR +import json +from os.path import join +import re +import datetime + +def import_stations(): + stations = json.load(open(join(DATA_DIR, "stations.json"))) + ''' + states = [] + for s in stations: + if s['state'] not in states and s['state'] != None and s['state'].strip() != '': + states.append(s['state']) + for s in states: + state = State(name=s) + print state.name + state.save() + ''' + + for s in stations: + station = Station() + station.data_id = s['id'] + station.code = s['code'] + station.name = s['name'] + station.zone = s['zone'] + if s['state'] is None: + s['state'] = '' + station.state = s['state'] + if s['address'] is None: + s['address'] = '' + station.address = s['address'] + print station.name + station.save() + + +def import_trains(): + trains = json.load(open(join(DATA_DIR, "trains.json"))) + errors = [] + print(len(trains)) + for t in trains: + train = Train() + train.data_id = t['id'] + train.name = t['name'] + train.number = t['number'] + if t['return_train'] is None: + t['return_train'] = '' + train.return_train = t['return_train'] + duration_h = getHours(t['duration']) + if duration_h is None: + errors.append({ + 'typ': 'Invalid Duration (H)', + 'train_data_id': t['id'], + 'duration': t['duration'] + }) + duration_h = 0 + train.duration_h = duration_h + + duration_m = getMinutes(t['duration']) + if duration_m is None: + errors.append({ + 'typ': 'Invalid Duration (M)', + 'train_data_id': t['id'], + 'duration': t['duration'] + }) + duration_m = 0 + train.duration_m = duration_m + + train.zone = t['zone'] + train.date_from = getDate(t['date_from']) + train.date_to = getDate(t['date_to']) + try: + train.from_station = Station.objects.get(code=t['from_station_code']) + except: + errors.append({ + 'typ': 'From Station Not Found', + 'train_data_id': t['id'], + 'train_number': t['number'], + 'from_station_code': t['from_station_code'], + 'from_station_name': t['from_station_name'] + }) + try: + train.to_station = Station.objects.get(code=t['to_station_code']) + except: + errors.append({ + 'typ': 'To Station Not Found', + 'train_data_id': t['id'], + 'train_number': t['number'], + 'to_station_code': t['to_station_code'], + 'to_station_name': t['to_station_name'] + }) + + train.number_of_halts = int(t['number_of_halts']) + train.typ = t['type'] + departure = getTime(t['departure']) + if departure is None: + errors.append({ + 'typ': 'Invalid Departure Time', + 'train_data_id': t['id'], + 'departure': t['departure'] + }) + departure = datetime.time(0,0) + train.departure = departure + + arrival = getTime(t['arrival']) + if departure is None: + errors.append({ + 'typ': 'Invalid Arrival Time', + 'train_data_id': t['id'], + 'arrival': t['arrival'] + }) + arrival = datetime.time(0,0) + train.arrival = arrival +# train.arrival = getTime(t['arrival']) + try: + train.distance = int(t['distance'].replace("km", "").strip()) + except: + train.distance = 0 + train.departure_days = t['departure_days'] + + boolFields = ['monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday', 'chair_car', 'sleeper', 'first_class', 'third_ac', 'second_ac', 'first_ac'] + for b in boolFields: + train.__setattr__(b, getBool(t[b])) + print train.name + train.save() + errors_file = open(join(DATA_DIR, "trainErrors.json"), "w") + json.dump(errors, errors_file, indent=2) + errors_file.close() + +timeRe = re.compile(r'([0-9]{1,2})h\s([0-9]{1,2})m') + +def getHours(s): + try: + return int(re.findall(timeRe,s)[0][0]) + except: + return None + +def getMinutes(s): + try: + return int(re.findall(timeRe,s)[0][1]) + except: + return None + +timeRe2 = re.compile(r'([0-9]{1,2})\:([0-9]{1,2})') +def getTime(s): + try: + hrs = int(re.findall(timeRe2,s)[0][0]) + mins = int(re.findall(timeRe2,s)[0][1]) + return datetime.time(hrs, mins) + except: + return None + +dateRe = re.compile(r'([a-zA-Z]{3})\s([0-9]{1,2})') +months = ['', 'jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec'] +def getDate(s): + if s is None or s == '': + return '' + s = s.strip() + matches = re.findall(dateRe, s) + monthStr = matches[0][0] + monthInt = months.index(monthStr.lower()) + date = int(matches[0][1]) + return "%02d-%02d" % (monthInt, date,) + +def getBool(s): + if s == None or int(s) == 0: + return False + else: + return True diff --git a/indianrails/trains/models.py b/indianrails/trains/models.py index 25ef0f3..868d1a5 100644 --- a/indianrails/trains/models.py +++ b/indianrails/trains/models.py @@ -25,21 +25,21 @@ class Station(models.Model): class Train(models.Model): data_id = models.IntegerField() name = models.CharField(max_length=255) - number = models.CharField(max_length=12, db_index=True) - return_train = models.CharField(max_length=12, db_index=True) + number = models.CharField(max_length=64, db_index=True) + return_train = models.CharField(max_length=64, db_index=True, blank=True) duration_h = models.IntegerField() duration_m = models.IntegerField() - zone = models.CharField(max_length=10) - date_from = models.DateField(null=True) - date_to = models.DateField(null=True) + zone = models.CharField(max_length=64) + date_from = models.CharField(max_length=5, blank=True) + date_to = models.CharField(max_length=5, blank=True) from_station = models.ForeignKey(Station, related_name='trains_from') to_station = models.ForeignKey(Station, related_name='trains_to') number_of_halts = models.IntegerField() - typ = models.CharField(max_length=12) + typ = models.CharField(max_length=32) departure = models.TimeField() arrival = models.TimeField() distance = models.IntegerField() - departure_days = models.CharField(max_length=12) #this is just a string for display, we use the booleans in our code + departure_days = models.CharField(max_length=64) #this is just a string for display, we use the booleans in our code monday = models.BooleanField() tuesday = models.BooleanField() wednesday = models.BooleanField() @@ -47,7 +47,7 @@ class Train(models.Model): friday = models.BooleanField() saturday = models.BooleanField() sunday = models.BooleanField() - classes = models.CharField(max_length=32) + classes = models.CharField(max_length=64) chair_car = models.BooleanField() sleeper = models.BooleanField() first_class = models.BooleanField()