author Vincent Michel Tue, 13 Nov 2012 15:38:52 +0100 changeset 121 dfb4389d7e84 parent 120 0c0679c3c537 child 122 a7f83003766f
[matrix] Make API closer to scipy.spatial and add metrics handling
 matrix.py file | annotate | diff | comparison | revisions
```--- a/matrix.py	Tue Nov 13 15:38:19 2012 +0100
+++ b/matrix.py	Tue Nov 13 15:38:52 2012 +0100
@@ -21,6 +21,8 @@
from scipy import array, empty
from scipy import where

+import alignment.distances as ds
+
""" Construct and compute a matrix of distance given a distance function.

Given :
@@ -36,20 +38,47 @@
+----+----+
"""

-def cdist(input1, input2, distance, normalized = True, kargs = {}):
-    distmatrix = empty((len(input1), len(input2)), dtype='float32')
+METRICS = {'euclidean': ds.euclidean, 'levenshtein': ds.levenshtein,
+           'soundex': ds.soundex, 'jaccard': ds.jaccard,
+           'temporal': ds.temporal, 'geographical': ds.geographical}
+
+
+def pdist(X, metric='euclidean', matrix_normalized=True, metric_params=None):
+    """ Compute the upper triangular matrix in a way similar
+    to scipy.spatial.metric"""
+    metric = metric if not isinstance(metric, basestring) else METRICS.get(metric, ds.euclidean)
+    values = []
+    for i in xrange(len(X)):
+        for j in xrange(i+1, len(X)):
+            d = 1
+            if X[i] and X[j]:
+                d = metric(X[i], X[j], **(metric_params or {}))
+                if matrix_normalized:
+                    d = 1 - (1.0 / (1.0 + d))
+            values.append(d)
+    return values
+
+def cdist(X, Y, metric='euclidean', matrix_normalized=True, metric_params=None):
+    """ Compute the metric matrix, given two inputs and a metric
+    """
+    metric = metric if not isinstance(metric, basestring) else METRICS.get(metric, ds.euclidean)
+    distmatrix = empty((len(X), len(Y)), dtype='float32')
size = distmatrix.shape
for i in xrange(size[0]):
for j in xrange(size[1]):
d = 1
-            if input1[i] and input2[j]:
-                d = distance(input1[i], input2[j], **kargs)
-                if normalized:
+            if X[i] and Y[j]:
+                d = metric(X[i], Y[j], **(metric_params or {}))
+                if matrix_normalized:
d = 1 - (1.0 / (1.0 + d))
distmatrix[i, j] = d
return distmatrix

def matched(distmatrix, cutoff = 0, normalized = False):
+    """ Return the matched elements within a dictionnary,
+    each key being the indice from X, and the corresponding
+    values being a list of couple (indice from Y, distance)
+    """
match = defaultdict(list)
if normalized:
distmatrix /= distmatrix.max()```