[distances] Add the soundex distance (related #128982)
authorSimon Chabot <simon.chabot@logilab.fr>
Wed, 17 Oct 2012 12:05:02 +0200
changeset 11 c4d63951cc16
parent 10 9d29f8f854e9
child 12 9ef85db30fb5
[distances] Add the soundex distance (related #128982)
distances.py
test/test_alignment.py
--- a/distances.py	Wed Oct 17 12:04:41 2012 +0200
+++ b/distances.py	Wed Oct 17 12:05:02 2012 +0200
@@ -77,3 +77,9 @@
     code = code[0] + ''.join([consonnantscode[c] for c in code[1:]])
     ###First four letters, completed by zeros
     return code[:4] + '0' * (4 - len(code))
+
+def soundex(stra, strb, language = 'french'):
+    """ Return the 1/0 distance between the soundex code of stra and strb.
+        1 means they have the same code, 0 they don't
+    """
+    return 1 if soundexcode(stra, language) == soundexcode(strb, language) else 0
--- a/test/test_alignment.py	Wed Oct 17 12:04:41 2012 +0200
+++ b/test/test_alignment.py	Wed Oct 17 12:05:02 2012 +0200
@@ -70,6 +70,9 @@
         self.assertEqual(soundexcode('Tymczak', 'english'), 'T522')
         self.assertEqual(soundexcode('Pfister', 'english'), 'P236')
 
+        self.assertEqual(soundex('Rubert', 'Robert', 'english'), 1)
+        self.assertEqual(soundex('Rubin', 'Robert', 'english'), 0)
+
 
 if __name__ == '__main__':
     from logilab.common.testlib import unittest_main