[aligner] Use the processings' weight to compute the distance matrix draft obsolete
authorSimon Chabot <simon.chabot@logilab.fr>
Thu, 21 Mar 2019 16:45:25 +0100
changeset 546 cfa85506a071
obsolete rewritten using changetopics as 4d286f4b8027 by Simon Chabot <simon.chabot@logilab.fr> Thu, 09 May 2019 12:12:28 +0200
parent 545 f46589fd7c13
child 547 e3ca6034a3b4
[aligner] Use the processings' weight to compute the distance matrix All the processing objects have a `weight` attribute so they can be mixed each other with their own weighting. Let's use it ! This attribute was unsed until now. Differential Revision: https://phab.logilab.fr/D2066
nazca/rl/aligner.py
test/test_alignment.py
--- a/nazca/rl/aligner.py	Mon Sep 03 11:45:30 2018 +0200
+++ b/nazca/rl/aligner.py	Thu Mar 21 16:45:25 2019 +0100
@@ -104,8 +104,9 @@
              len(target_indexes)),
             dtype='float32')
         for processing in self.processings:
-            distmatrix += processing.cdist(refset, targetset,
-                                           ref_indexes, target_indexes)
+            w = processing.weight
+            distmatrix += w*processing.cdist(refset, targetset,
+                                             ref_indexes, target_indexes)
         return distmatrix
 
     def threshold_matched(self, distmatrix):
--- a/test/test_alignment.py	Mon Sep 03 11:45:30 2018 +0200
+++ b/test/test_alignment.py	Thu Mar 21 16:45:25 2019 +0100
@@ -27,6 +27,8 @@
 random.seed(6)  # Make sure tests are repeatable
 from os import path
 
+import numpy as np
+
 from nazca.utils.normalize import simplify
 import nazca.rl.aligner as alig
 import nazca.rl.blocking as blo
@@ -57,6 +59,85 @@
             for v, distance in values:
                 self.assertIn((k, v), true_matched)
 
+    def test_align_with_weight(self):
+        refset = [
+            ['V1', 'l1', (6.14194444444, 48.67)],
+            ['V2', 'l2', (6.2, 49)],
+            ['V3', 'l3', (5.1, 48)],
+            ['V4', 'l4', (5.2, 48.1)],
+        ]
+        targetset = [
+            ['T1', 'l1', (6.17, 48.7)],
+            ['T2', 'l1', (6.14, 48.73)],
+            ['T3', 'l4', (6.25, 48.91)],
+        ]
+
+        ref_indexes = range(len(refset))
+        target_indexes = range(len(targetset))
+
+        # Compute the distance matrix on the label only (weight=1)
+        aligner = alig.BaseAligner(
+            threshold=0.2,
+            processings=(LevenshteinProcessing(1, 1), ),
+        )
+        mat_difflib = aligner.compute_distance_matrix(
+            refset,
+            targetset,
+            ref_indexes,
+            target_indexes,
+        )
+
+        # Compute the distance matrix on the location only (weight=1)
+        aligner = alig.BaseAligner(
+            threshold=0.2,
+            processings=(GeographicalProcessing(2, 2), )
+        )
+        mat_geo = aligner.compute_distance_matrix(
+            refset,
+            targetset,
+            ref_indexes,
+            target_indexes,
+        )
+
+        # Compute the distance matrix with label and location (same weight)
+        aligner = alig.BaseAligner(
+            threshold=0.2,
+            processings=(
+                LevenshteinProcessing(1, 1),
+                GeographicalProcessing(2, 2),
+            )
+        )
+        mat_difflib_geo = aligner.compute_distance_matrix(
+            refset,
+            targetset,
+            ref_indexes,
+            target_indexes,
+        )
+
+        np.testing.assert_almost_equal(mat_difflib_geo, mat_geo + mat_difflib)
+
+        # Same aligner, but the processings have ≠ weights, to show that the
+        # weight is taken into account in the final result
+        aligner = alig.BaseAligner(
+            threshold=0.2,
+            processings=(
+                LevenshteinProcessing(1, 1, weight=0.8),
+                GeographicalProcessing(2, 2, weight=0.2),
+            )
+        )
+        mat_difflib_geo = aligner.compute_distance_matrix(
+            refset,
+            targetset,
+            ref_indexes,
+            target_indexes,
+        )
+
+        np.testing.assert_almost_equal(
+            mat_difflib_geo,
+            0.2*mat_geo + 0.8*mat_difflib
+        )
+
+
     def test_blocking_align(self):
         refset = [['V1', 'label1', (6.14194444444, 48.67)],
                   ['V2', 'label2', (6.2, 49)],