[Normalizer] Add the format method (related to #128998)
authorSimon Chabot <simon.chabot@logilab.fr>
Thu, 18 Oct 2012 12:22:30 +0200
changeset 17 8cd8b96f9333
parent 16 7db50b138900
child 18 64f8789a2951
[Normalizer] Add the format method (related to #128998)
normalize.py
test/test_alignment.py
--- a/normalize.py	Thu Oct 18 11:41:59 2012 +0200
+++ b/normalize.py	Thu Oct 18 12:22:30 2012 +0200
@@ -15,6 +15,7 @@
 # You should have received a copy of the GNU Lesser General Public License along
 # with this program. If not, see <http://www.gnu.org/licenses/>.
 
+import re
 from logilab.common.textutils import unormalize
 from nltk.tokenize import WordPunctTokenizer 
 
@@ -75,3 +76,20 @@
         """
 
         return format(round(float(number), ndigits), '0.%df' % ndigits)
+
+    def format(self, string, regexp, output):
+        """ Apply the regexp to the ``string`` and return a formatted string
+        according to ``output``
+
+        eg :
+         normalizer.format(u'[Victor Hugo - 26 fev 1802 / 22 mai 1885]',
+                           r'\[(?P<firstname>\w+) (?p<lastname>\w+) - '
+                           r'(?P<birthdate>.*?) / (?<deathdate>.*?)\]',
+                           u'%(lastname)s, %(firstname)s (%(birthdate)s -'
+                           u'%(deathdate)s)')
+
+         would return u'Hugo, Victor (26 fev 1802 - 22 mai 1885)'
+         """
+
+        match = re.match(regexp, string)
+        return output % match.groupdict()
--- a/test/test_alignment.py	Thu Oct 18 11:41:59 2012 +0200
+++ b/test/test_alignment.py	Thu Oct 18 12:22:30 2012 +0200
@@ -125,6 +125,20 @@
         self.assertEqual(self.normalizer.round(3.14159), '3')
         self.assertEqual(self.normalizer.round('3.14159', 3), '3.142')
 
+    def test_format(self):
+        string = u'[Victor Hugo - 26 fev 1802 / 22 mai 1885]'
+        regex  = r'\[(?P<firstname>\w+) (?P<lastname>\w+) - ' \
+                 r'(?P<birthdate>.*) \/ (?P<deathdate>.*?)\]'
+        output = u'%(lastname)s, %(firstname)s (%(birthdate)s - %(deathdate)s)'
+        self.assertEqual(self.normalizer.format(string, regex, output),
+                         u'Hugo, Victor (26 fev 1802 - 22 mai 1885)')
+
+        string = u'http://perdu.com/42/supertop/cool'
+        regex  = r'http://perdu.com/(?P<id>\d+).*'
+        output = u'%(id)s'
+        self.assertEqual(self.normalizer.format(string, regex, output),
+                         u'42')
+
 
 if __name__ == '__main__':
     from logilab.common.testlib import unittest_main