[Aligner] `normalize_set` handles tuples. (closes #117136)
authorSimon Chabot <simon.chabot@logilab.fr>
Fri, 25 Jan 2013 12:41:53 +0100
changeset 246 6d80b4e863f3
parent 245 e5f1e678e654
child 247 db9a8b3f6f16
[Aligner] `normalize_set` handles tuples. (closes #117136)
aligner.py
test/test_alignment.py
--- a/aligner.py	Wed Jan 30 15:14:23 2013 +0100
+++ b/aligner.py	Fri Jan 25 12:41:53 2013 +0100
@@ -31,7 +31,10 @@
 
 def normalize_set(rset, treatments):
     """ Apply all the normalization functions to the given rset """
+    normalized_set = []
     for row in rset:
+        row = list(row)
+        normalized_set.append(row)
         for ind, attribut in enumerate(row):
             treat = treatments.get(ind)
             if not attribut or not treat:
@@ -44,7 +47,7 @@
                                  for arg in farg if arg in treat.get('norm_params', []))
                 attribut = f(attribut, **givenargs)
             row[ind] = attribut
-    return rset
+    return normalized_set
 
 def findneighbours_kdtree(alignset, targetset, indexes=(1, 1), threshold=0.1):
     """ Find the neigbhours using kdree
--- a/test/test_alignment.py	Wed Jan 30 15:14:23 2013 +0100
+++ b/test/test_alignment.py	Fri Jan 25 12:41:53 2013 +0100
@@ -309,6 +309,29 @@
         neighbours = alig.findneighbours_kdtree(alignset, targetset, indexes=(2, 2), threshold=0.3)
         self.assertEqual(neighbours, [[[0], [0, 2]], [[1], [0, 2]], [[2], [1]], [[3], [1]]])
 
+    def test_normalize_set(self):
+        treatments = {1: {'normalization': [simplify,]}}
+
+        alignlist = [['Label1', u"Un nuage flotta dans le grand ciel bleu."],
+                     ['Label2', u"Pour quelle occasion vous êtes-vous apprêtée ?"],
+                     ['Label3', u"Je les vis ensemble à plusieurs occasions."],
+                     ['Label4', u"Je n'aime pas ce genre de bandes dessinées tristes."],
+                     ['Label5', u"Ensemble et à plusieurs occasions, je les vis."],
+                    ]
+        aligntuple = [tuple(l) for l in alignlist]
+
+        normalizedlist = alig.normalize_set(alignlist, treatments)
+        normalizedtuple = alig.normalize_set(aligntuple, treatments)
+
+        self.assertListEqual(normalizedlist, normalizedtuple)
+        self.assertListEqual(normalizedlist,
+                        [['Label1', u"nuage flotta grand ciel bleu"],
+                         ['Label2', u"occasion êtes apprêtée"],
+                         ['Label3', u"vis ensemble à plusieurs occasions"],
+                         ['Label4', u"n aime genre bandes dessinées tristes"],
+                         ['Label5', u"ensemble à plusieurs occasions vis"],
+                        ])
+
     def test_findneighbours_minhashing(self):
         lemmas = loadlemmas(path.join(TESTDIR, 'data', 'french_lemmas.txt'))
         treatments = {2: {'normalization': [simplify,], 'norm_params': {'lemmas': lemmas}}}