[blocking] Protect key blocking against none values, see #183446
authorVincent Michel <vincent.michel@logilab.fr>
Tue, 15 Oct 2013 09:04:34 +0000
changeset 310 6e2933017fca
parent 309 abb55b01503f
child 311 6bb993dec00b
[blocking] Protect key blocking against none values, see #183446
blocking.py
--- a/blocking.py	Tue Oct 15 09:03:13 2013 +0000
+++ b/blocking.py	Tue Oct 15 09:04:34 2013 +0000
@@ -202,9 +202,10 @@
         the identifiers of the records of the both sets for this value.
     """
 
-    def __init__(self, ref_attr_index, target_attr_index, callback):
+    def __init__(self, ref_attr_index, target_attr_index, callback, ignore_none=False):
         super(KeyBlocking, self).__init__(ref_attr_index, target_attr_index)
         self.callback = callback
+        self.ignore_none = ignore_none
         self.reference_index = {}
         self.target_index = {}
 
@@ -213,9 +214,13 @@
         """
         for ind, rec in enumerate(refset):
             key = self.callback(rec[self.ref_attr_index])
+            if not key and self.ignore_none:
+                continue
             self.reference_index.setdefault(key, []).append((ind, rec[0]))
         for ind, rec in enumerate(targetset):
             key = self.callback(rec[self.target_attr_index])
+            if not key and self.ignore_none:
+                continue
             self.target_index.setdefault(key, []).append((ind, rec[0]))
 
     def _iter_blocks(self):