author Simon Chabot Mon, 29 Oct 2012 10:21:14 +0100 changeset 58 27b66c6cee3a parent 57 2741988383ae child 59 0dad7c581201
Order imports
 distances.py file | annotate | diff | comparison | revisions matrix.py file | annotate | diff | comparison | revisions minhashing.py file | annotate | diff | comparison | revisions normalize.py file | annotate | diff | comparison | revisions
```--- a/distances.py	Mon Oct 29 10:17:24 2012 +0100
+++ b/distances.py	Mon Oct 29 10:21:14 2012 +0100
@@ -16,8 +16,9 @@
# with this program. If not, see <http://www.gnu.org/licenses/>.

from dateutil import parser as dateparser
+from math import cos, sqrt, pi #Needed for geographical distance
+
from scipy import matrix
-from math import cos, sqrt, pi #Needed for geographical distance

def levenshtein(stra, strb):
""" Compute the Levenshtein distance between stra and strb.```
```--- a/matrix.py	Mon Oct 29 10:17:24 2012 +0100
+++ b/matrix.py	Mon Oct 29 10:21:14 2012 +0100
@@ -15,12 +15,14 @@
# You should have received a copy of the GNU Lesser General Public License along
# with this program. If not, see <http://www.gnu.org/licenses/>.

+from collections import defaultdict
+from copy import deepcopy
+
+from scipy import matrix, empty
+from scipy import where
+
from cubes.alignment.distances import (levenshtein, soundex, \
jaccard, temporal, euclidean)
-from collections import defaultdict
-from scipy import matrix, empty
-from scipy import where
-from copy import deepcopy

class Distancematrix(object):
""" Construct and compute a matrix of distance given a distance function.```
```--- a/minhashing.py	Mon Oct 29 10:17:24 2012 +0100
+++ b/minhashing.py	Mon Oct 29 10:21:14 2012 +0100
@@ -17,11 +17,12 @@

import cPickle

-from scipy.sparse import lil_matrix
-from numpy import ones
+from time import time
from random import randint
from collections import defaultdict
-from time import time
+
+from numpy import ones
+from scipy.sparse import lil_matrix

from cubes.alignment.normalize import wordgrams
```
```--- a/normalize.py	Mon Oct 29 10:17:24 2012 +0100
+++ b/normalize.py	Mon Oct 29 10:21:14 2012 +0100
@@ -16,9 +16,11 @@
# with this program. If not, see <http://www.gnu.org/licenses/>.

import re
-from logilab.common.textutils import unormalize
+
+from string import punctuation
+
from nltk.tokenize import WordPunctTokenizer
-from string import punctuation
+from logilab.common.textutils import unormalize

STOPWORDS = set([u'alors', u'au', u'aucuns', u'aussi', u'autre', u'avant',```