[distance] Move out the custom parser info.
authorSimon Chabot <simon.chabot@logilab.fr>
Tue, 20 Nov 2012 10:59:05 +0100
changeset 153 b0d53d47cb08
parent 152 d4556b2354be
child 154 f30b1c1bd109
[distance] Move out the custom parser info. It enables users to give their own implementation of the parser info, thus it can support other languages…
distances.py
test/test_alignment.py
--- a/distances.py	Tue Nov 20 10:33:39 2012 +0100
+++ b/distances.py	Tue Nov 20 10:59:05 2012 +0100
@@ -187,7 +187,32 @@
 
 
 ### TEMPORAL DISTANCES ########################################################
-def temporal(stra, strb, granularity=u'days', language=u'french',
+class FrenchParserInfo(dateparser.parserinfo):
+    """ Inherit of the dateutil.parser.parserinfo and translate the english
+        dependant variables into french.
+    """
+
+    HMS = [(u'h', u'heure', u'heures'),
+           (u'm', u'minute', u'minutes'),
+                (u's', u'seconde', u'seconde'),]
+    JUMP = [u' ', u'.', u',', u';', u'-', u'/', u"'",
+           u'a', u'le', u'et', u'er']
+    MONTHS = [(u'Jan', u'Janvier'), (u'Fev', u'Fevrier'),
+              (u'Mar', u'Mars'), (u'Avr', u'Avril'), (u'Mai', u'Mai'),
+              (u'Jun', u'Juin'), (u'Jui', u'Juillet'),
+              (u'Aou', u'Aout'), (u'Sep', u'Septembre'),
+              (u'Oct', u'Octobre'), (u'Nov', u'Novembre'),
+              (u'Dec', u'Decembre')]
+    PERTAIN = [u'de']
+    WEEKDAYS = [(u'Lun', u'Lundi'),
+                (u'Mar', u'Mardi'),
+                (u'Mer', u'Mercredi'),
+                (u'Jeu', u'Jeudi'),
+                (u'Ven', u'Vendredi'),
+                (u'Sam', u'Samedi'),
+                (u'Dim', u'Dimanche')]
+
+def temporal(stra, strb, granularity=u'days', parserinfo=FrenchParserInfo,
              dayfirst=True, yearfirst=False):
     """ Return the distance between two strings (read as dates).
 
@@ -201,30 +226,9 @@
         Neither stra nor strb can have accent. Clean it before.
     """
 
-    class customparserinfo(dateparser.parserinfo):
-        if language.lower() == u'french':
-            HMS = [(u'h', u'heure', u'heures'),
-                   (u'm', u'minute', u'minutes'),
-                        (u's', u'seconde', u'seconde'),]
-            JUMP = [u' ', u'.', u',', u';', u'-', u'/', u"'",
-                   u'a', u'le', u'et', u'er']
-            MONTHS = [(u'Jan', u'Janvier'), (u'Fev', u'Fevrier'),
-                      (u'Mar', u'Mars'), (u'Avr', u'Avril'), (u'Mai', u'Mai'),
-                      (u'Jun', u'Juin'), (u'Jui', u'Juillet'),
-                      (u'Aou', u'Aout'), (u'Sep', u'Septembre'),
-                      (u'Oct', u'Octobre'), (u'Nov', u'Novembre'),
-                      (u'Dec', u'Decembre')]
-            PERTAIN = [u'de']
-            WEEKDAYS = [(u'Lun', u'Lundi'),
-                        (u'Mar', u'Mardi'),
-                        (u'Mer', u'Mercredi'),
-                        (u'Jeu', u'Jeudi'),
-                        (u'Ven', u'Vendredi'),
-                        (u'Sam', u'Samedi'),
-                        (u'Dim', u'Dimanche')]
-    datea = dateparser.parse(stra, parserinfo=customparserinfo(dayfirst,
+    datea = dateparser.parse(stra, parserinfo=parserinfo(dayfirst,
                              yearfirst), fuzzy=True)
-    dateb = dateparser.parse(strb, parserinfo=customparserinfo(dayfirst,
+    dateb = dateparser.parse(strb, parserinfo=parserinfo(dayfirst,
                              yearfirst), fuzzy=True)
     diff = datea - dateb
     if granularity.lower() == 'years':
--- a/test/test_alignment.py	Tue Nov 20 10:33:39 2012 +0100
+++ b/test/test_alignment.py	Tue Nov 20 10:59:05 2012 +0100
@@ -41,10 +41,11 @@
 
 import unittest2
 import random
+random.seed(6) ### Make sure tests are repeatable
 import numpy as np
 
 from os import path
-random.seed(6) ### Make sure tests are repeatable
+from dateutil import parser as dateparser
 
 from alignment.distances import (levenshtein, soundex, soundexcode,   \
                                  jaccard, temporal, euclidean,        \
@@ -120,13 +121,13 @@
         self.assertEqual(temporal('14 aout 1991', '08/15/1992'), 367)
         #Test a case of ambiguity
         self.assertEqual(temporal('1er mai 2012', '01/05/2012'), 0)
-        self.assertEqual(temporal('1er mai 2012', '05/01/2012', dayfirst = False), 0)
+        self.assertEqual(temporal('1er mai 2012', '05/01/2012', dayfirst=False), 0)
         #Test the different granularities available
         self.assertAlmostEqual(temporal('14 aout 1991', '08/15/1992', 'years'), 1.0, 1)
         self.assertAlmostEqual(temporal('1991', '1992', 'years'), 1.0, 1)
         self.assertAlmostEqual(temporal('13 mars', '13 mai', 'months'), 2.0, 1)
         self.assertAlmostEqual(temporal('13 march', '13 may', 'months',
-                                        'english'), 2.0, 1)
+                                        parserinfo=dateparser.parserinfo), 2.0, 1)
 
         #Test fuzzyness
         self.assertEqual(temporal('Jean est né le 1er octobre 1958',
@@ -148,7 +149,7 @@
     def test_geographical(self):
         paris = (48.856578, 2.351828)
         london = (51.504872, -0.07857)
-        dist_parislondon = geographical(paris, london, in_radians = False)
+        dist_parislondon = geographical(paris, london, in_radians=False)
 
         self.assertAlmostEqual(dist_parislondon, 341564, 0)
 
@@ -224,7 +225,7 @@
 
         #Victor Hugo --> Victor Wugo
         #Albert Camus --> Albert Camus, Albert Camu
-        self.assertEqual(am.matched(m, cutoff = 2),
+        self.assertEqual(am.matched(m, cutoff=2),
                         {0: [(0, d(i1[0], i2[0]))], 1: [(1, d(i1[1], i2[1])),
                                                         (2, d(i1[1], i2[2]))]})