[Matrix] Enables normalization
authorSimon Chabot <simon.chabot@logilab.fr>
Thu, 18 Oct 2012 17:58:53 +0200
changeset 23 d3b07dc25785
parent 22 0c3ef4909658
child 24 85a396a8f66d
[Matrix] Enables normalization
matrix.py
--- a/matrix.py	Thu Oct 18 17:58:28 2012 +0200
+++ b/matrix.py	Thu Oct 18 17:58:53 2012 +0200
@@ -27,7 +27,7 @@
         Given :
             input1 = ['Victor Hugo', 'Albert Camus']
             input2 = ['Victor Wugo', 'Albert Camus']
-            distance = 'levensthein'
+            distance = levensthein
 
             constructs the following matrix :
                  +----+----+
@@ -42,6 +42,7 @@
         self.input2 = input2
         self.distance = distance
         self._matrix = lil_matrix((len(input1), len(input2)), dtype='float32')
+        self._maxdist = 0
         self._compute()
 
     def _compute(self):
@@ -50,6 +51,8 @@
            # bottom
            for j in xrange(i, len(self.input2)):
                self._matrix[i,j] = self.distance(self.input1[i], self.input2[j])
+               if self._matrix[i,j] > self._maxdist:
+                   self._maxdist = self._matrix[i,j]
 
     def __getitem__(self, index):
         (i, j) = index
@@ -57,8 +60,10 @@
             return self._matrix[index]
         return self._matrix[j, i]
 
-    def matched(self, cutoff = 0):
+    def matched(self, cutoff = 0, normalized = False):
         match = defaultdict(list)
+        if normalized:
+            cutoff *= self._maxdist
         for i in xrange(len(self.input1)):
             for j in xrange(i, len(self.input2)):
                 if self._matrix[i, j] <= cutoff: