From 032f350ac2fc193cb06e13e73f4f7d386d3dee84 Mon Sep 17 00:00:00 2001 From: Sanj Date: Mon, 2 Jan 2012 18:16:38 +0530 Subject: [PATCH] added fuzzy wuzzy to requirements --- chaloBEST/imports/import_atlas.py | 20 ++++++++++++++++++++ requirements.txt | 1 + 2 files changed, 21 insertions(+) diff --git a/chaloBEST/imports/import_atlas.py b/chaloBEST/imports/import_atlas.py index ac837e8..8df4600 100644 --- a/chaloBEST/imports/import_atlas.py +++ b/chaloBEST/imports/import_atlas.py @@ -3,6 +3,24 @@ from os.path import join import json import csv +#Get levenshtein distance between two strings, from http://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Levenshtein_distance +def levenshtein(s1, s2): + if len(s1) < len(s2): + return levenshtein(s2, s1) + if not s1: + return len(s2) + + previous_row = xrange(len(s2) + 1) + for i, c1 in enumerate(s1): + current_row = [i + 1] + for j, c2 in enumerate(s2): + insertions = previous_row[j + 1] + 1 # j+1 instead of j since previous_row and current_row are one character longer + deletions = current_row[j] + 1 # than s2 + substitutions = previous_row[j] + (c1 != c2) + current_row.append(min(insertions, deletions, substitutions)) + previous_row = current_row + return previous_row[-1] + def csvToJSON(): atlasCSV = csv.reader(open(join(PROJECT_ROOT, "../db_csv_files/Atlas.csv"), "r"), delimiter="\t") atlasDict = {} @@ -21,5 +39,7 @@ def csvToJSON(): jsonFile.close() + + def csvClean1(): atlasCSV = csv.reader(open(join(PROJECT_ROOT, "../db_csv_files/Atlas.csv"), "r"), delimiter="\t") diff --git a/requirements.txt b/requirements.txt index baa4289..cfe1372 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ -e svn+http://code.djangoproject.com/svn/django/branches/releases/1.3.X/#egg=django -e bzr+http://code.0x2620.org/python-ox/#egg=python-ox +-e git+git://github.com/seatgeek/fuzzywuzzy.git#egg=fuzzywuzzy django_extensions django-grappelli