[normalize] Make the loadlemmas() function more readable
authorSimon Chabot <simon.chabot@logilab.fr>
Tue, 20 Nov 2012 15:17:38 +0100
changeset 158 a6449ca99bbf
parent 157 30b9271eb762
child 159 ec8c2443c2b8
[normalize] Make the loadlemmas() function more readable
aligner.py
normalize.py
--- a/aligner.py	Tue Nov 20 15:13:57 2012 +0100
+++ b/aligner.py	Tue Nov 20 15:17:38 2012 +0100
@@ -231,7 +231,6 @@
                                  get_global_mat=True):
     """ Full conquer and divide method for alignment.
     Compute neighbours and merge the different subalignments.
-    XXX
     """
     global_matched = {}
     if get_global_mat:
--- a/normalize.py	Tue Nov 20 15:13:57 2012 +0100
+++ b/normalize.py	Tue Nov 20 15:17:38 2012 +0100
@@ -144,12 +144,16 @@
     for r in xrange(len(words)):
         yield ' '.join(words[r:r + k])
 
-def loadlemmas(filename):
+def loadlemmas(filename, encoding='utf-8'):
     """ Return the default lemmas dictionnary
     """
-    #XXX Make a loop
-    return dict([line.decode('utf-8').strip().split('\t') for line in open(filename)
-                 if len(line.strip().split('\t'))==2])
+    lemmas = {}
+    with open(filename) as fobj:
+        for line in fobj:
+            line = line.decode(encoding).strip().split('\t')
+            if len(line) == 2:
+                lemmas[line[0]] = line[1]
+    return lemmas
 
 def lemmatized(sentence, lemmas, tokenizer=None):
     """ Return the lemmatized sentence