[old api] Update old api and corresponding tests, see #183461
authorVincent Michel <vincent.michel@logilab.fr>
Tue, 15 Oct 2013 09:32:21 +0000
changeset 316 eecaebe54657
parent 315 c66d1517d56f
child 317 172be468a7aa
[old api] Update old api and corresponding tests, see #183461
old_api.py
test/test_old_api.py
--- a/old_api.py	Tue Oct 15 09:43:26 2013 +0000
+++ b/old_api.py	Tue Oct 15 09:32:21 2013 +0000
@@ -144,7 +144,8 @@
     # Build aligner
     processings = [extract_treatment_from_treatment(t, ind) for ind, t in processings.iteritems()]
     aligner = BaseAligner(threshold, processings)
-    aligner.register_normalizers(normalizers)
+    aligner.register_ref_normalizer(normalizers)
+    aligner.register_target_normalizer(normalizers)
     # Align
     return aligner.align(alignset, targetset)
 
@@ -156,8 +157,8 @@
                                      'release.'
                                      ' You should rather use the BaseAligner '
                                      'object of the aligner module'))
-    mat, matched = align([alignset[i] for i in alignind],
-                         [targetset[i] for i in targetind], threshold,
+    mat, matched = align([alignset[i[0]] for i in alignind],
+                         [targetset[i[0]] for i in targetind], threshold,
                          processings, _applyNormalization=_applyNormalization)
     new_matched = {}
     for k, values in matched.iteritems():
@@ -194,7 +195,7 @@
                 subdict.add((v, d))
                 # XXX avoid issue in sparse matrix
                 if get_global_mat:
-                    global_mat[k, v] = d or 10**(-10)
+                    global_mat[k[0], v[0]] = d or 10**(-10)
     if get_global_mat:
         return global_mat, global_matched
     return global_matched
@@ -219,11 +220,11 @@
     if not uniq:
         for alignid in matched:
             for targetid, _ in matched[alignid]:
-                yield alignset[alignid][0], targetset[targetid][0]
+                yield alignset[alignid[0]][0], targetset[targetid[0]][0]
     else:
         for alignid in matched:
             bestid, _ = sorted(matched[alignid], key=lambda x:x[1])[0]
-            yield alignset[alignid][0], targetset[bestid][0]
+            yield alignset[alignid[0]][0], targetset[bestid[0]][0]
 
 def alignall_iterative(alignfile, targetfile, alignformat, targetformat,
                        threshold, size=10000, equality_threshold=0.01,
@@ -285,10 +286,10 @@
                 for alignid in matched:
                     bestid, dist = sorted(matched[alignid], key=lambda x:x[1])[0]
                     #Get the better known distance
-                    _, current_dist = cache.get(alignset[alignid][0], (None, None))
+                    _, current_dist = cache.get(alignset[alignid[0]][0], (None, None))
                     if current_dist is None or current_dist > dist:
                         #If it's better, update the cache
-                        cache[alignset[alignid][0]] = (targetset[bestid][0], dist)
+                        cache[alignset[alignid[0]][0]] = (targetset[bestid[0]][0], dist)
                         if dist <= equality_threshold:
                             #If perfect, stop trying to align this one
                             doneids.add(alignset[alignid][0])
--- a/test/test_old_api.py	Tue Oct 15 09:43:26 2013 +0000
+++ b/test/test_old_api.py	Tue Oct 15 09:32:21 2013 +0000
@@ -99,7 +99,10 @@
                      ['T2', 'labelt2', (5.3, 48.2)],
                      ['T3', 'labelt3', (6.25, 48.91)],
                      ]
-        true_matched = set([(0,0), (0, 2), (1,2), (3,1)])
+        true_matched = set([((0, 'V1'), (0, 'T1')),
+                           ((1, 'V2'), (2, 'T3')),
+                           ((0, 'V1'), (2, 'T3')),
+                           ((3, 'V4'), (1, 'T2'))])
         neighbours = findneighbours_kdtree(alignset, targetset, indexes=(2, 2), threshold=0.3)
         processings = {2: {'metric': 'geographical', 'matrix_normalized':False,
                           'metric_params': {'units': 'km', 'in_radians': False}}}
@@ -112,7 +115,10 @@
         self.assertEqual(true_matched, predict_matched)
 
     def test_divide_and_conquer_align(self):
-        true_matched = set([(0,0), (0, 2), (1,2), (3,1)])
+        true_matched = set([((0, 'V1'), (0, 'T1')),
+                            ((1, 'V2'), (2, 'T3')),
+                            ((0, 'V1'), (2, 'T3')),
+                            ((3, 'V4'), (1, 'T2'))])
         alignset = [['V1', 'label1', (6.14194444444, 48.67)],
                     ['V2', 'label2', (6.2, 49)],
                     ['V3', 'label3', (5.1, 48)],
@@ -203,7 +209,10 @@
                      ['T3', 'labelt3', (6.25, 48.91)],
                      ]
         neighbours = findneighbours_kdtree(alignset, targetset, indexes=(2, 2), threshold=0.3)
-        self.assertEqual([([0], [0, 2]), ([1], [0, 2]), ([2], [1]), ([3], [1])], neighbours)
+        self.assertEqual([([(0, 'V1')], [(0, 'T1'), (2, 'T3')]),
+                          ([(1, 'V2')], [(0, 'T1'), (2, 'T3')]),
+                          ([(2, 'V3')], [(1, 'T2')]),
+                          ([(3, 'V4')], [(1, 'T2')])], neighbours)
 
     def test_findneighbours_minhashing(self):
         lemmas = loadlemmas(path.join(TESTDIR, 'data', 'french_lemmas.txt'))
@@ -221,7 +230,9 @@
         alignset = normalize_set(alignset, processings)
         targetset = normalize_set(targetset, processings)
         neighbours = findneighbours_minhashing(alignset, targetset, indexes=(2, 2), threshold=0.4)
-        for align in (([2, 4], [1]), ([0], [0]), ([3], [2])):
+        true_set = [([(0, 'V1')], [(0, 'T1')]), ([(3, 'V4')], [(2, 'T3')]),
+                    ([(2, 'V3'), (4, 'V5')], [(1, 'T2')])]
+        for align in true_set:
             self.assertIn(align, neighbours)
 
     def test_findneighbours_clustering(self):