[distance] Add a new distance between dates (related #128982)
authorSimon Chabot <simon.chabot@logilab.fr>
Wed, 17 Oct 2012 15:31:11 +0200
changeset 13 2f19c16c3610
parent 12 9ef85db30fb5
child 14 e02ce920aff6
[distance] Add a new distance between dates (related #128982)
distances.py
test/test_alignment.py
--- a/distances.py	Wed Oct 17 12:34:28 2012 +0200
+++ b/distances.py	Wed Oct 17 15:31:11 2012 +0200
@@ -1,4 +1,6 @@
-# -*- coding : utf-8 -*-
+# -*- coding:utf-8 -*-
+
+from dateutil import parser as dateparser
 
 def levenshtein(stra, strb):
     """ Compute the Levenshtein distance between stra and strb.
@@ -97,3 +99,39 @@
 
     jab = 1.0 * len(seta.intersection(setb)) / len(seta.union(setb))
     return 1.0 - jab
+
+def temporal(stra, strb, granularity = u'days', language = u'french'):
+    """ Return the distance between two strings (read as dates).
+
+        ``granularity`` can be either ``days`` or ``months`` or ``years``
+        (be careful to the plural form !)
+        ``language`` can be either french or english
+    """
+    class customparserinfo(dateparser.parserinfo):
+        if language.lower() == u'french':
+            HMS      = [(u'h', u'heure', u'heures'),
+                        (u'm', u'minute', u'minutes'),
+                        (u's', u'seconde', u'seconde'),]
+            JUMP     = [u' ', u'.', u',', u';', u'-', u'/', u"'",
+                        u'a', u'le', u'et',]
+            MONTHS   = [(u'Jan', u'Janvier'), (u'Fev', u'Fevrier'), (u'Mar', u'Mars'),
+                       (u'Avr', u'Avril'), (u'Mai', u'Mai'), (u'Jun', u'Juin'),
+                       (u'Jui', u'Juillet'), (u'Aou', u'Aout'),
+                       (u'Sep', u'Septembre'), (u'Oct', u'Octobre'),
+                       (u'Nov', u'Novembre'), (u'Dec', u'Decembre'),]
+            PERTAIN  = [u'de']
+            WEEKDAYS = [(u'Lun', u'Lundi'),
+                        (u'Mar', u'Mardi'),
+                        (u'Mer', u'Mercredi'),
+                        (u'Jeu', u'Jeudi'),
+                        (u'Ven', u'Vendredi'),
+                        (u'Sam', u'Samedi'),
+                        (u'Dim', u'Dimanche'),]
+    datea = dateparser.parse(stra, parserinfo = customparserinfo())
+    dateb = dateparser.parse(strb, parserinfo = customparserinfo())
+    diff  = datea - dateb
+    if granularity.lower() == 'years':
+        return abs(diff.days / 365.25)
+    if granularity.lower() == 'months':
+        return abs(diff.days / 30.5)
+    return abs(diff.days)
--- a/test/test_alignment.py	Wed Oct 17 12:34:28 2012 +0200
+++ b/test/test_alignment.py	Wed Oct 17 15:31:11 2012 +0200
@@ -39,7 +39,7 @@
 
 from cubicweb.devtools import testlib
 from cubes.alignment.distances import (levenshtein, soundex, soundexcode, \
-                                       jaccard)
+                                       jaccard, temporal)
 
 class DistancesTest(testlib.CubicWebTC):
     def test_levenshtein(self):
@@ -84,6 +84,19 @@
         self.assertAlmostEqual(jaccard('boujour', 'bonjour'), 0.166, 2)
         self.assertAlmostEqual(jaccard('rubert', 'robert'), 0.333, 2)
 
+    def test_temporal(self):
+        #Test the distance between two dates. The distance can be given in
+        #``days``, ``months`` or ``years``
+
+        self.assertEqual(temporal('14 aout 1991', '14/08/1991'), 0)
+        self.assertEqual(temporal('14 aout 1991', '08/14/1991'), 0)
+        self.assertEqual(temporal('14 aout 1991', '08/15/1992'), 367)
+        self.assertAlmostEqual(temporal('14 aout 1991', '08/15/1992', 'years'), 1.0, 1)
+        self.assertAlmostEqual(temporal('1991', '1992', 'years'), 1.0, 1)
+        self.assertAlmostEqual(temporal('13 mars', '13 mai', 'months'), 2.0, 1)
+        self.assertAlmostEqual(temporal('13 march', '13 may', 'months',
+                                        'english'), 2.0, 1)
+
 if __name__ == '__main__':
     from logilab.common.testlib import unittest_main
     unittest_main()