flake8
authorAdrien Di Mascio <Adrien.DiMascio@logilab.fr>
Mon, 03 Sep 2018 10:50:50 +0200
changeset 541 78ef292acda7
parent 540 03c8777469f5
child 542 aabf5c567bb2
flake8
nazca/__pkginfo__.py
nazca/data/countries.py
nazca/ner/__init__.py
nazca/ner/filters.py
nazca/ner/preprocessors.py
nazca/ner/sources.py
nazca/rl/aligner.py
nazca/rl/blocking.py
nazca/utils/dataio.py
nazca/utils/distances.py
nazca/utils/minhashing.py
nazca/utils/normalize.py
nazca/utils/tokenizer.py
test/test_alignment.py
test/test_blocking.py
test/test_dataio.py
test/test_distances.py
test/test_filters.py
test/test_minhashing.py
test/test_ner.py
test/test_normalize.py
test/test_preprocessors.py
test/test_tokenizer.py
--- a/nazca/__pkginfo__.py	Mon Sep 03 10:26:24 2018 +0200
+++ b/nazca/__pkginfo__.py	Mon Sep 03 10:50:50 2018 +0200
@@ -18,6 +18,7 @@
 # with nazca. If not, see <http://www.gnu.org/licenses/>.
 """Nazca packaging information."""
 __docformat__ = "restructuredtext en"
+from os.path import join
 import sys
 
 distname = 'nazca'
@@ -53,7 +54,6 @@
     'nltk': None,
 }
 
-from os.path import join
 include_dirs = [join('test', 'data'), 'data', 'examples', 'ner', 'rl', 'utils']
 
 if sys.version_info < (2, 7):
--- a/nazca/data/countries.py	Mon Sep 03 10:26:24 2018 +0200
+++ b/nazca/data/countries.py	Mon Sep 03 10:50:50 2018 +0200
@@ -1,4 +1,4 @@
-
+# flake8: noqa
 # Countries list (ISO-3166)
 COUNTRIES = {'##': 'non renseign\xc3\xa9',
              '..': 'non renseign\xc3\xa9',
--- a/nazca/ner/__init__.py	Mon Sep 03 10:26:24 2018 +0200
+++ b/nazca/ner/__init__.py	Mon Sep 03 10:50:50 2018 +0200
@@ -5,7 +5,7 @@
 
 
 ###############################################################################
-### NER PROCESS ###############################################################
+# NER PROCESS #################################################################
 ###############################################################################
 class NerProcess(object):
     """ High-level process for Named Entities Recognition
@@ -52,7 +52,6 @@
         for token in tokens:
             if token.start < last_stop:
                 continue  # this token overlaps with a previous match
-            word = token.word
             # Applies preprocessors
             # XXX Preprocessors may be sources dependant
             for preprocessor in self.preprocessors:
--- a/nazca/ner/filters.py	Mon Sep 03 10:26:24 2018 +0200
+++ b/nazca/ner/filters.py	Mon Sep 03 10:50:50 2018 +0200
@@ -5,7 +5,7 @@
 
 
 ###############################################################################
-### NER FILTERS ###############################################################
+# NER FILTERS #################################################################
 ###############################################################################
 class AbstractNerFilter(object):
     """ A filter used for cleaning named entities results
@@ -27,8 +27,11 @@
     def __call__(self, named_entities):
         uris = [u for u, p, t in named_entities]
         counts = dict([(u, uris.count(u)) for u in set(uris)])
-        return [n for n in named_entities if not ((self.min_occ and counts[n[0]] < self.min_occ)
-                                                  or (self.max_occ and counts[n[0]] > self.max_occ))]
+        return [
+            n for n in named_entities
+            if not ((self.min_occ and counts[n[0]] < self.min_occ) or
+                    (self.max_occ and counts[n[0]] > self.max_occ))
+        ]
 
 
 class NerRDFTypeFilter(object):
--- a/nazca/ner/preprocessors.py	Mon Sep 03 10:26:24 2018 +0200
+++ b/nazca/ner/preprocessors.py	Mon Sep 03 10:50:50 2018 +0200
@@ -9,7 +9,7 @@
 
 
 ###############################################################################
-### NER PREPROCESSORS #########################################################
+# NER PREPROCESSORS ###########################################################
 ###############################################################################
 class AbstractNerPreprocessor(object):
     """ Preprocessor
--- a/nazca/ner/sources.py	Mon Sep 03 10:26:24 2018 +0200
+++ b/nazca/ner/sources.py	Mon Sep 03 10:50:50 2018 +0200
@@ -1,12 +1,11 @@
 # -*- coding: utf-8 -*-
 """ Sources for Named Entities Recognition.
 """
-from nazca.utils.tokenizer import Token
 from nazca.utils.dataio import sparqlquery, rqlquery
 
 
 ###############################################################################
-### NER SOURCE ################################################################
+# NER SOURCE ##################################################################
 ###############################################################################
 class AbstractNerSource(object):
     """ High-level source for Named Entities Recognition
--- a/nazca/rl/aligner.py	Mon Sep 03 10:26:24 2018 +0200
+++ b/nazca/rl/aligner.py	Mon Sep 03 10:50:50 2018 +0200
@@ -28,7 +28,7 @@
 
 
 ###############################################################################
-### UTILITY FUNCTIONS #########################################################
+# UTILITY FUNCTIONS ###########################################################
 ###############################################################################
 def iter_aligned_pairs(refset, targetset, global_mat,
                        global_matched, unique=True):
@@ -53,7 +53,7 @@
 
 
 ###############################################################################
-### BASE ALIGNER OBJECT #######################################################
+# BASE ALIGNER OBJECT #########################################################
 ###############################################################################
 class BaseAligner(object):
 
@@ -126,7 +126,6 @@
     def _get_match(self, refset, targetset, ref_indexes=None,
                    target_indexes=None):
         # Build items
-        items = []
         ref_indexes = ref_indexes or xrange(len(refset))
         target_indexes = target_indexes or xrange(len(targetset))
         # Apply alignments
@@ -266,7 +265,7 @@
 
 
 ###############################################################################
-### PIPELINE ALIGNER OBJECT ##################################################
+# PIPELINE ALIGNER OBJECT ####################################################
 ###############################################################################
 class PipelineAligner(object):
     """ This pipeline will perform iterative alignments, removing each time
@@ -295,8 +294,6 @@
         target_index = range(len(targetset))
         self.refset_size = len(refset)
         self.targetset_size = len(targetset)
-        global_matched = {}
-        global_mat = lil_matrix((len(refset), len(targetset)))
         seen_refset = set()
         # Iteration over aligners
         for ind_aligner, aligner in enumerate(self.aligners):
--- a/nazca/rl/blocking.py	Mon Sep 03 10:26:24 2018 +0200
+++ b/nazca/rl/blocking.py	Mon Sep 03 10:50:50 2018 +0200
@@ -39,7 +39,7 @@
 
 
 ###############################################################################
-### GENERAL BLOCKING ##########################################################
+# GENERAL BLOCKING ############################################################
 ###############################################################################
 class BaseBlocking(object):
     """ An abstract general blocking object that exposes
@@ -189,7 +189,7 @@
 
 
 ###############################################################################
-### KEY BLOCKING ##############################################################
+# KEY BLOCKING ################################################################
 ###############################################################################
 class KeyBlocking(BaseBlocking):
     """ This blocking technique is based on a a blocking criteria
@@ -257,7 +257,7 @@
 
 
 ###############################################################################
-### BIGRAM BLOCKING ###########################################################
+# BIGRAM BLOCKING #############################################################
 ###############################################################################
 class NGramBlocking(BaseBlocking):
     """ This blocking technique is based on a a n-gram key.
@@ -326,7 +326,7 @@
 
 
 ###############################################################################
-### SORTKEY BLOCKING ##########################################################
+# SORTKEY BLOCKING ############################################################
 ###############################################################################
 class SortedNeighborhoodBlocking(BaseBlocking):
     """ This blocking technique is based on a a sorting blocking criteria
@@ -376,7 +376,7 @@
 
 
 ###############################################################################
-### MERGE BLOCKING ############################################################
+# MERGE BLOCKING ##############################################################
 ###############################################################################
 class MergeBlocking(BaseBlocking):
     """ This blocking technique keep only one appearance of one given values,
@@ -451,7 +451,7 @@
 
 
 ###############################################################################
-### CLUSTERING-BASED BLOCKINGS ################################################
+# CLUSTERING-BASED BLOCKINGS ##################################################
 ###############################################################################
 class KmeansBlocking(BaseBlocking):
     """ A blocking technique based on Kmeans
@@ -514,7 +514,7 @@
 
 
 ###############################################################################
-### KDTREE BLOCKINGS ##########################################################
+# KDTREE BLOCKINGS ############################################################
 ###############################################################################
 class KdTreeBlocking(BaseBlocking):
     """ A blocking technique based on KdTree
@@ -579,7 +579,7 @@
 
 
 ###############################################################################
-### MINHASHING BLOCKINGS ######################################################
+# MINHASHING BLOCKINGS ########################################################
 ###############################################################################
 class MinHashingBlocking(BaseBlocking):
     """ A blocking technique based on MinHashing
@@ -643,7 +643,7 @@
 
 
 ###############################################################################
-### BLOCKING PIPELINE #########################################################
+# BLOCKING PIPELINE ###########################################################
 ###############################################################################
 class PipelineBlocking(BaseBlocking):
     """ Pipeline multiple blocking techniques
--- a/nazca/utils/dataio.py	Mon Sep 03 10:26:24 2018 +0200
+++ b/nazca/utils/dataio.py	Mon Sep 03 10:50:50 2018 +0200
@@ -35,7 +35,7 @@
 
 
 ###############################################################################
-### UTILITY FUNCTIONS #########################################################
+# UTILITY FUNCTIONS ###########################################################
 ###############################################################################
 def autocast(data, encoding=None):
     """ Try to convert data into a specific type
@@ -54,7 +54,7 @@
 
 
 ###############################################################################
-### RQL FUNCTIONS #############################################################
+# RQL FUNCTIONS ###############################################################
 ###############################################################################
 def get_cw_cnx(endpoint):
     """ Get a cnx on a CubicWeb database
@@ -62,7 +62,6 @@
     from cubicweb import dbapi
     from cubicweb.__pkginfo__ import numversion
     from cubicweb.cwconfig import CubicWebConfiguration
-    from cubicweb.entities import AnyEntity
     CubicWebConfiguration.load_cwctl_plugins()
     config = CubicWebConfiguration.config_for(endpoint)
     if numversion < (3, 19):
@@ -104,7 +103,7 @@
 
 
 ###############################################################################
-### SPARQL FUNCTIONS ##########################################################
+# SPARQL FUNCTIONS ############################################################
 ###############################################################################
 def _sparqlexecute(endpoint, query, raise_on_error=False):
     """ Execute a sparql query and return the raw results
@@ -192,7 +191,7 @@
 
 
 ###############################################################################
-### FILE FUNCTIONS ############################################################
+# FILE FUNCTIONS ##############################################################
 ###############################################################################
 def parsefile(filename, indexes=None, nbmax=None, delimiter='\t',
               encoding='utf-8', field_size_limit=None,
@@ -320,7 +319,7 @@
 
 
 ###############################################################################
-### OUTPUT UTILITIES ##########################################################
+# OUTPUT UTILITIES ############################################################
 ###############################################################################
 class AbstractPrettyPrint(object):
     """ Pretty print the output of a named entities process
@@ -378,7 +377,7 @@
 <title>ner</title>
 </head>
 <body><div>%s</div></body>
-</html>'''
+</html>'''  # noqa
 
     def is_valid(self, html):
         try:
--- a/nazca/utils/distances.py	Mon Sep 03 10:26:24 2018 +0200
+++ b/nazca/utils/distances.py	Mon Sep 03 10:50:50 2018 +0200
@@ -32,7 +32,7 @@
 
 
 ###############################################################################
-### UTILITY FUNCTIONS #########################################################
+# UTILITY FUNCTIONS ###########################################################
 ###############################################################################
 def cdist(distance_callback, refset, targetset, matrix_normalized=False,
           ref_indexes=None, target_indexes=None):
@@ -56,7 +56,6 @@
         (len(ref_indexes),
          len(target_indexes)),
         dtype='float32')
-    size = distmatrix.shape
     for i, iref in enumerate(ref_indexes):
         for j, jref in enumerate(target_indexes):
             d = 1
@@ -111,7 +110,7 @@
 
 
 ###############################################################################
-### NUMERICAL DISTANCES #######################################################
+# NUMERICAL DISTANCES #########################################################
 ###############################################################################
 def euclidean(a, b):
     """ Simple euclidian distance
@@ -124,7 +123,7 @@
 
 
 ###############################################################################
-### STRING DISTANCES ##########################################################
+# STRING DISTANCES ############################################################
 ###############################################################################
 def exact_match(a, b):
     """ The simplest distance, defined as 0 if both values are equal, 1 elsewise.
@@ -270,7 +269,7 @@
 
 
 ###############################################################################
-### TEMPORAL DISTANCES ########################################################
+# TEMPORAL DISTANCES ##########################################################
 ###############################################################################
 if DATEUTIL_ENABLED:
     class FrenchParserInfo(dateparser.parserinfo):
@@ -325,7 +324,7 @@
 
 
 ###############################################################################
-### GEOGRAPHICAL DISTANCES ####################################################
+# GEOGRAPHICAL DISTANCES ######################################################
 ###############################################################################
 def geographical(pointa, pointb, in_radians=False, planet_radius=6371009,
                  units='m'):
@@ -360,7 +359,7 @@
 
 
 ###############################################################################
-### BASE PROCESSING ######################################################
+# BASE PROCESSING ########################################################
 ###############################################################################
 class BaseProcessing(object):
     """ A processing object used to provide an abstraction over the different
@@ -467,7 +466,7 @@
 
 
 ###############################################################################
-### CONCRETE PROCESSINGS #################################################
+# CONCRETE PROCESSINGS ###################################################
 ###############################################################################
 class ExactMatchProcessing(BaseProcessing):
     """ A processing based on the exact match (1 if a==b, 0 elsewise)
@@ -500,7 +499,8 @@
     """
 
     def __init__(self, ref_attr_index=None, target_attr_index=None,
-                 in_radians=False, planet_radius=6371009, units='m', weight=1, matrix_normalized=False):
+                 in_radians=False, planet_radius=6371009, units='m', weight=1,
+                 matrix_normalized=False):
         distance_callback = partial(geographical, in_radians=in_radians,
                                     planet_radius=planet_radius, units=units)
         super(GeographicalProcessing, self).__init__(ref_attr_index,
--- a/nazca/utils/minhashing.py	Mon Sep 03 10:26:24 2018 +0200
+++ b/nazca/utils/minhashing.py	Mon Sep 03 10:50:50 2018 +0200
@@ -27,7 +27,7 @@
 
 
 ###############################################################################
-### UTILITY FUNCTIONS #########################################################
+# UTILITY FUNCTIONS ###########################################################
 ###############################################################################
 def randomhashfunction(zr):
     """ Return a random hash function, mapping x in Z to ZR
@@ -63,7 +63,7 @@
 
 
 ###############################################################################
-### MINHASHING ################################################################
+# MINHASHING ##################################################################
 ###############################################################################
 class Minlsh(object):
     """ Operate minhashing + locally-sensitive-hashing to find similar sentences
--- a/nazca/utils/normalize.py	Mon Sep 03 10:26:24 2018 +0200
+++ b/nazca/utils/normalize.py	Mon Sep 03 10:50:50 2018 +0200
@@ -17,30 +17,31 @@
 
 import re
 from string import punctuation
-from warnings import warn
 from unicodedata import normalize as _uninormalize
 from functools import partial
 
 from six import text_type, string_types
 
-
-FRENCH_STOPWORDS = set([u'alors', u'au', u'aux', u'aucuns', u'aussi', u'autre', u'avant',
-                        u'avec', u'avoir', u'bon', u'car', u'ce', u'cela', u'ces', u'ceux', u'chaque',
-                        u'ci', u'comme', u'comment', u'dans', u'de', u'des', u'du', u'dedans', u'dehors',
-                        u'depuis', u'deux', u'devrait', u'doit', u'donc', u'dos', u'droite', u'début',
-                        u'elle', u'elles', u'en', u'encore', u'essai', u'est', u'et', u'eu', u'eux', u'fait',
-                        u'faites', u'fois', u'font', u'force', u'haut', u'hors', u'ici', u'il', u'ils',
-                        u'je', u'juste', u'la', u'le', u'les', u'leur', u'lui', u'là', u'ma', u'maintenant',
-                        u'mais', u'me', u'mes', u'moi', u'moins', u'mon', u'mot', u'même', u'ne',
-                        u'ni', u'nommés', u'nos',
-                        u'notre', u'nous', u'nouveaux', u'on', u'ou', u'où', u'par', u'parce', u'parole',
-                        u'pas', u'personnes', u'peut', u'peu', u'pièce', u'plupart', u'pour',
-                        u'pourquoi', u'quand', u'que', u'quel', u'quelle', u'quelles', u'quels', u'qui',
-                        u'sa', u'sans', u'se', u'ses', u'seulement', u'si', u'sien', u'son', u'sont', u'sous',
-                        u'soyez', u'sujet', u'sur', u'ta', u'tandis', u'tellement', u'te', u'tels', u'tes', u'toi',
-                        u'ton', u'tous', u'tout', u'trop', u'très', u'tu', u'un', u'une', u'valeur', u'voie',
-                        u'voient', u'vont', u'vos', u'votre', u'vous', u'vu', u'ça', u'étaient', u'état',
-                        u'étions', u'été', u'être'])
+FRENCH_STOPWORDS = set([
+    u'alors', u'au', u'aux', u'aucuns', u'aussi', u'autre', u'avant', u'avec',
+    u'avoir', u'bon', u'car', u'ce', u'cela', u'ces', u'ceux', u'chaque',
+    u'ci', u'comme', u'comment', u'dans', u'de', u'des', u'du', u'dedans',
+    u'dehors', u'depuis', u'deux', u'devrait', u'doit', u'donc', u'dos',
+    u'droite', u'début', u'elle', u'elles', u'en', u'encore', u'essai', u'est',
+    u'et', u'eu', u'eux', u'fait', u'faites', u'fois', u'font', u'force',
+    u'haut', u'hors', u'ici', u'il', u'ils', u'je', u'juste', u'la', u'le',
+    u'les', u'leur', u'lui', u'là', u'ma', u'maintenant', u'mais', u'me',
+    u'mes', u'moi', u'moins', u'mon', u'mot', u'même', u'ne', u'ni', u'nommés',
+    u'nos', u'notre', u'nous', u'nouveaux', u'on', u'ou', u'où', u'par',
+    u'parce', u'parole', u'pas', u'personnes', u'peut', u'peu', u'pièce',
+    u'plupart', u'pour', u'pourquoi', u'quand', u'que', u'quel', u'quelle',
+    u'quelles', u'quels', u'qui', u'sa', u'sans', u'se', u'ses', u'seulement',
+    u'si', u'sien', u'son', u'sont', u'sous', u'soyez', u'sujet', u'sur',
+    u'ta', u'tandis', u'tellement', u'te', u'tels', u'tes', u'toi', u'ton',
+    u'tous', u'tout', u'trop', u'très', u'tu', u'un', u'une', u'valeur',
+    u'voie', u'voient', u'vont', u'vos', u'votre', u'vous', u'vu', u'ça',
+    u'étaient', u'état', u'étions', u'été', u'être'
+])
 
 MANUAL_UNICODE_MAP = {
     u'\xa1': u'!',    # INVERTED EXCLAMATION MARK
@@ -62,7 +63,7 @@
 
 
 ###############################################################################
-### NORMALIZE FUNCTIONS #######################################################
+# NORMALIZE FUNCTIONS #########################################################
 ###############################################################################
 def unormalize(ustring, substitute=None):
     """replace diacritical characters with their corresponding ascii characters
@@ -205,7 +206,7 @@
 
 
 ###############################################################################
-### NORMALIZER OBJECTS ########################################################
+# NORMALIZER OBJECTS ##########################################################
 ###############################################################################
 class BaseNormalizer(object):
     """ A normalizer object used to provide an abstraction over the different
@@ -376,7 +377,7 @@
 
 
 ###############################################################################
-### JOIN NORMALIZER ###########################################################
+# JOIN NORMALIZER #############################################################
 ###############################################################################
 class JoinNormalizer(BaseNormalizer):
     """Normalizer that join multiple fields in only one.
@@ -407,7 +408,7 @@
 
 
 ###############################################################################
-### NORMALIZER PIPELINE #######################################################
+# NORMALIZER PIPELINE #########################################################
 ###############################################################################
 class NormalizerPipeline(BaseNormalizer):
     """ Pipeline of Normalizers
--- a/nazca/utils/tokenizer.py	Mon Sep 03 10:26:24 2018 +0200
+++ b/nazca/utils/tokenizer.py	Mon Sep 03 10:50:50 2018 +0200
@@ -1,7 +1,6 @@
 # -*- coding: utf-8 -*-
 """ Tokenizer for sentences/words segmentation.
 """
-import itertools
 import collections
 import re
 
--- a/test/test_alignment.py	Mon Sep 03 10:26:24 2018 +0200
+++ b/test/test_alignment.py	Mon Sep 03 10:50:50 2018 +0200
@@ -24,7 +24,7 @@
 else:
     import unittest2 as unittest
 import random
-random.seed(6) ### Make sure tests are repeatable
+random.seed(6)  # Make sure tests are repeatable
 from os import path
 
 from nazca.utils.normalize import simplify
@@ -52,10 +52,10 @@
         processings = (GeographicalProcessing(2, 2, units='km'),)
         aligner = alig.BaseAligner(threshold=30, processings=processings)
         mat, matched = aligner.align(refset, targetset)
-        true_matched = [(0,0), (0, 2), (1,2), (3,1)]
+        true_matched = [(0, 0), (0, 2), (1, 2), (3, 1)]
         for k, values in six.iteritems(matched):
             for v, distance in values:
-                self.assertIn((k,v), true_matched)
+                self.assertIn((k, v), true_matched)
 
     def test_blocking_align(self):
         refset = [['V1', 'label1', (6.14194444444, 48.67)],
@@ -68,7 +68,7 @@
                      ['T3', 'labelt3', (6.25, 48.91)],
                      ]
         # Creation of the aligner object
-        true_matched = set([(0,0), (0, 2), (1,2), (3,1)])
+        true_matched = set([(0, 0), (0, 2), (1, 2), (3, 1)])
         processings = (GeographicalProcessing(2, 2, units='km'),)
         aligner = alig.BaseAligner(threshold=30, processings=processings)
         blocking = blo.KdTreeBlocking(ref_attr_index=2,
@@ -94,7 +94,7 @@
                      ['T3', 'labelt3', (6.25, 48.91)],
                      ]
         # Creation of the aligner object
-        true_matched = set([(0,0), (0, 2), (1,2), (3,1)])
+        true_matched = set([(0, 0), (0, 2), (1, 2), (3, 1)])
         processings = (GeographicalProcessing(2, 2, units='km'),)
         aligner = alig.BaseAligner(threshold=30, processings=processings)
         aligner.register_blocking(blo.KdTreeBlocking(ref_attr_index=2,
@@ -109,10 +109,10 @@
 
     def test_unique_align(self):
         refset = [['V1', 'label1', (6.14194444444, 48.67)],
-                    ['V2', 'label2', (6.2, 49)],
-                    ['V3', 'label3', (5.1, 48)],
-                    ['V4', 'label4', (5.2, 48.1)],
-                    ]
+                  ['V2', 'label2', (6.2, 49)],
+                  ['V3', 'label3', (5.1, 48)],
+                  ['V4', 'label4', (5.2, 48.1)],
+                  ]
         targetset = [['T1', 'labelt1', (6.17, 48.7)],
                      ['T2', 'labelt2', (5.3, 48.2)],
                      ['T3', 'labelt3', (6.25, 48.91)],
@@ -183,8 +183,5 @@
             self.assertIn(m, matched_wo_distance)
 
 
-
-
 if __name__ == '__main__':
     unittest.main()
-
--- a/test/test_blocking.py	Mon Sep 03 10:26:24 2018 +0200
+++ b/test/test_blocking.py	Mon Sep 03 10:50:50 2018 +0200
@@ -25,10 +25,10 @@
 from os import path
 from functools import partial
 import random
-random.seed(6) ### Make sure tests are repeatable / Minhashing
+random.seed(6)  # Make sure tests are repeatable / Minhashing
 
-from nazca.utils.distances import (levenshtein, soundex, soundexcode,   \
-                                       jaccard, euclidean, geographical)
+from nazca.utils.distances import (levenshtein, soundex, soundexcode,
+                                   jaccard, euclidean, geographical)
 from nazca.rl.blocking import (KeyBlocking, SortedNeighborhoodBlocking,
                                MergeBlocking,
                                NGramBlocking, PipelineBlocking,
@@ -201,7 +201,7 @@
                      ['d', 'ccdd', 'bbaa']]
         true_pairs = [('3', 'c'), ('4', 'd')]
         blocking = NGramBlocking(ref_attr_index=2, target_attr_index=2,
-                                   ngram_size=2, depth=1)
+                                 ngram_size=2, depth=1)
         blocking.fit(refset, targetset)
         pairs = list(blocking.iter_id_pairs())
         self.assertEqual(len(pairs), len(true_pairs))
@@ -225,7 +225,7 @@
         """ Test sort reversing values
         """
         blocking = SortedNeighborhoodBlocking(ref_attr_index=1, target_attr_index=1,
-                                              key_func=lambda x:x[::-1], window_width=1)
+                                              key_func=lambda x: x[::-1], window_width=1)
         blocking.fit(SOUNDEX_REFSET, SOUNDEX_TARGETSET)
         blocks = list(blocking.iter_id_blocks())
         true_blocks = [(['a1'], ['b3']), (['a2'], ['b6']), (['a5'], ['b4']), (['a3'], ['b7', 'b1']),
@@ -237,10 +237,9 @@
 
 class MergeBlockingTest(unittest.TestCase):
 
-
     def test_merge_blocks(self):
         blocking = MergeBlocking(ref_attr_index=1, target_attr_index=None,
-                                 score_func=lambda x:x[2])
+                                 score_func=lambda x: x[2])
         refset = [('http://fr.wikipedia.org/wiki/Paris_%28Texas%29', 'Paris', 25898),
                   ('http://fr.wikipedia.org/wiki/Paris', 'Paris', 12223100),
                   ('http://fr.wikipedia.org/wiki/Saint-Malo', 'Saint-Malo', 46342)]
@@ -258,7 +257,7 @@
 
     def test_merge_blocks_targetset(self):
         blocking = MergeBlocking(ref_attr_index=None, target_attr_index=2,
-                                 score_func=lambda x:x[1])
+                                 score_func=lambda x: x[1])
         refset = [('Paris (Texas)', 25000),
                   ('Paris (France)', 12000000)]
         targetset = [('http://fr.wikipedia.org/wiki/Paris_%28Texas%29', 25898, 'Paris'),
@@ -279,10 +278,10 @@
 
     def test_clustering_blocking_kmeans(self):
         refset = [['V1', 'label1', (6.14194444444, 48.67)],
-                    ['V2', 'label2', (6.2, 49)],
-                    ['V3', 'label3', (5.1, 48)],
-                    ['V4', 'label4', (5.2, 48.1)],
-                    ]
+                  ['V2', 'label2', (6.2, 49)],
+                  ['V3', 'label3', (5.1, 48)],
+                  ['V4', 'label4', (5.2, 48.1)],
+                  ]
         targetset = [['T1', 'labelt1', (6.2, 48.9)],
                      ['T2', 'labelt2', (5.3, 48.2)],
                      ['T3', 'labelt3', (6.25, 48.91)],
@@ -291,7 +290,7 @@
             import sklearn as skl
         except ImportError:
             self.skipTest('Scikit learn does not seem to be installed')
-        if int(skl.__version__.split('-')[0].split('.')[1])<=11:
+        if int(skl.__version__.split('-')[0].split('.')[1]) <= 11:
             self.skipTest('Scikit learn version is too old - Skipping test')
         blocking = KmeansBlocking(ref_attr_index=2, target_attr_index=2)
         blocking.fit(refset, targetset)
@@ -362,7 +361,8 @@
                      ['b', 'aabb', 'ddcc'],
                      ['c', 'ccdd', 'aabb'],
                      ['d', 'ccdd', 'bbaa']]
-        true_pairs = [((0, '1'), (0, 'a')), ((1, '2'), (1, 'b')), ((2, '3'), (2, 'c')), ((3, '4'), (3, 'd'))]
+        true_pairs = [((0, '1'), (0, 'a')), ((1, '2'), (1, 'b')),
+                      ((2, '3'), (2, 'c')), ((3, '4'), (3, 'd'))]
         blocking_1 = NGramBlocking(ref_attr_index=1, target_attr_index=1,
                                    ngram_size=2, depth=1)
         blocking_2 = NGramBlocking(ref_attr_index=2, target_attr_index=2,
@@ -396,9 +396,5 @@
             self.assertIn(pair, pairs)
 
 
-
-
-
 if __name__ == '__main__':
     unittest.main()
-
--- a/test/test_dataio.py	Mon Sep 03 10:26:24 2018 +0200
+++ b/test/test_dataio.py	Mon Sep 03 10:50:50 2018 +0200
@@ -37,6 +37,7 @@
 
 TESTDIR = path.dirname(__file__)
 
+
 @contextmanager
 def tempdir():
     try:
@@ -45,7 +46,7 @@
     finally:
         try:
             shutil.rmtree(temp)
-        except:
+        except BaseException:
             pass
 
 
@@ -53,21 +54,21 @@
 
     def test_valid(self):
         from lxml import etree
-        if int(etree.__version__< '3.2.0'):
+        if int(etree.__version__ < '3.2.0'):
             # https://bugs.launchpad.net/lxml/+bug/673205
             self.skipTest('Lxml version to old for ValidXHTMLPrettyPrint')
         self.assertTrue(ValidXHTMLPrettyPrint().is_valid(u'<p>coucou</p>'))
 
     def test_valid_unicode(self):
         from lxml import etree
-        if int(etree.__version__< '3.2.0'):
+        if int(etree.__version__ < '3.2.0'):
             # https://bugs.launchpad.net/lxml/+bug/673205
             self.skipTest('Lxml version to old for ValidXHTMLPrettyPrint')
         self.assertTrue(ValidXHTMLPrettyPrint().is_valid(u'<p>hé</p>'))
 
     def test_invalid(self):
         from lxml import etree
-        if int(etree.__version__< '3.2.0'):
+        if int(etree.__version__ < '3.2.0'):
             # https://bugs.launchpad.net/lxml/+bug/673205
             self.skipTest('Lxml version to old for ValidXHTMLPrettyPrint')
         self.assertFalse(ValidXHTMLPrettyPrint().is_valid(u'<p><div>coucou</div></p>'))
@@ -107,7 +108,7 @@
                           [3, (23, 2.17), u'cherry', u'flower']], data)
 
         data = parsefile(path.join(TESTDIR, 'data', 'file2parse'),
-                         [0, (2, 3), 4, 1], delimiter=',', formatopt={2:str})
+                         [0, (2, 3), 4, 1], delimiter=',', formatopt={2: str})
         self.assertEqual([[1, ('12', 19), u'apple', u'house'],
                           [2, ('21.9', 19), u'stramberry', u'horse'],
                           [3, ('23', 2.17), u'cherry', u'flower']], data)
@@ -118,7 +119,6 @@
                           ['2', ('21.9', '19'), 'stramberry', 'horse'],
                           ['3', ('23', '2.17'), 'cherry', 'flower']], data)
 
-
     def test_autocast(self):
         self.assertEqual(autocast('1'), 1)
         self.assertEqual(autocast('1.'), 1.)
@@ -223,34 +223,34 @@
     @unittest.skipUnless(SPARQL_ENABLED, 'python-sparqlwrapper is not installed')
     def test_sparql_autocast(self):
         alignset = sparqlquery('http://dbpedia.inria.fr/sparql',
-                                 'prefix db-owl: <http://dbpedia.org/ontology/>'
-                                 'prefix db-prop: <http://fr.dbpedia.org/property/>'
-                                 'select ?ville, ?name, ?long, ?lat where {'
-                                 ' ?ville db-owl:country <http://fr.dbpedia.org/resource/France> .'
-                                 ' ?ville rdf:type db-owl:PopulatedPlace .'
-                                 ' ?ville db-owl:populationTotal ?population .'
-                                 ' ?ville foaf:name ?name .'
-                                 ' ?ville db-prop:longitude ?long .'
-                                 ' ?ville db-prop:latitude ?lat .'
-                                 ' FILTER (?population > 1000)'
-                                 '} LIMIT 100', indexes=[0, 1, (2, 3)])
+                               'prefix db-owl: <http://dbpedia.org/ontology/>'
+                               'prefix db-prop: <http://fr.dbpedia.org/property/>'
+                               'select ?ville, ?name, ?long, ?lat where {'
+                               ' ?ville db-owl:country <http://fr.dbpedia.org/resource/France> .'
+                               ' ?ville rdf:type db-owl:PopulatedPlace .'
+                               ' ?ville db-owl:populationTotal ?population .'
+                               ' ?ville foaf:name ?name .'
+                               ' ?ville db-prop:longitude ?long .'
+                               ' ?ville db-prop:latitude ?lat .'
+                               ' FILTER (?population > 1000)'
+                               '} LIMIT 100', indexes=[0, 1, (2, 3)])
         self.assertEqual(len(alignset), 100)
         self.assertTrue(isinstance(alignset[0][2][0], float))
 
     @unittest.skipUnless(SPARQL_ENABLED, 'python-sparqlwrapper is not installed')
     def test_sparql_no_autocast(self):
         alignset = sparqlquery('http://dbpedia.inria.fr/sparql',
-                                 'prefix db-owl: <http://dbpedia.org/ontology/>'
-                                 'prefix db-prop: <http://fr.dbpedia.org/property/>'
-                                 'select ?ville, ?name, ?long, ?lat where {'
-                                 ' ?ville db-owl:country <http://fr.dbpedia.org/resource/France> .'
-                                 ' ?ville rdf:type db-owl:PopulatedPlace .'
-                                 ' ?ville db-owl:populationTotal ?population .'
-                                 ' ?ville foaf:name ?name .'
-                                 ' ?ville db-prop:longitude ?long .'
-                                 ' ?ville db-prop:latitude ?lat .'
-                                 ' FILTER (?population > 1000)'
-                                 '} LIMIT 100', indexes=[0, 1, (2, 3)], autocast_data=False)
+                               'prefix db-owl: <http://dbpedia.org/ontology/>'
+                               'prefix db-prop: <http://fr.dbpedia.org/property/>'
+                               'select ?ville, ?name, ?long, ?lat where {'
+                               ' ?ville db-owl:country <http://fr.dbpedia.org/resource/France> .'
+                               ' ?ville rdf:type db-owl:PopulatedPlace .'
+                               ' ?ville db-owl:populationTotal ?population .'
+                               ' ?ville foaf:name ?name .'
+                               ' ?ville db-prop:longitude ?long .'
+                               ' ?ville db-prop:latitude ?lat .'
+                               ' FILTER (?population > 1000)'
+                               '} LIMIT 100', indexes=[0, 1, (2, 3)], autocast_data=False)
         self.assertEqual(len(alignset), 100)
         self.assertFalse(isinstance(alignset[0][2][0], float))
 
@@ -262,4 +262,3 @@
 
 if __name__ == '__main__':
     unittest.main()
-
--- a/test/test_distances.py	Mon Sep 03 10:26:24 2018 +0200
+++ b/test/test_distances.py	Mon Sep 03 10:50:50 2018 +0200
@@ -24,7 +24,7 @@
 else:
     import unittest2 as unittest
 import random
-random.seed(6) ### Make sure tests are repeatable
+random.seed(6)  # Make sure tests are repeatable
 from dateutil import parser as dateparser
 
 from nazca.utils.distances import (levenshtein, soundex, soundexcode,
@@ -50,26 +50,26 @@
         self.assertEqual(levenshtein('bon', 'bonjour'), 4)
         self.assertEqual(levenshtein('Victor Hugo', 'Hugo Victor'), 0)
 
-        #Test symetry
+        # Test symetry
         self.assertEqual(levenshtein('Victor Hugo', 'Vitor Wugo'),
                          levenshtein('Vitor Wugo', 'Victor Hugo'))
 
     def test_soundex(self):
-        ##     Test extracted from Wikipedia en :
-        #Using this algorithm :
-        #both "Robert" and "Rupert" return the same string "R163"
-        #while "Rubin" yields "R150".
+        # Test extracted from Wikipedia en :
+        # Using this algorithm :
+        # both "Robert" and "Rupert" return the same string "R163"
+        # while "Rubin" yields "R150".
         #
         # "Ashcraft" and "Ashcroft" both yield "A261" and not "A226"
-        #(the chars 's' and 'c' in the name would receive a single number
-        #of 2 and not 22 since an 'h' lies in between them).
+        # (the chars 's' and 'c' in the name would receive a single number
+        # of 2 and not 22 since an 'h' lies in between them).
         #
         # "Tymczak" yields "T522" not "T520"
-        #(the chars 'z' and 'k' in the name are coded as 2 twice since a vowel
-        #lies in between them).
+        # (the chars 'z' and 'k' in the name are coded as 2 twice since a vowel
+        # lies in between them).
         #
-        #"Pfister" yields "P236" not "P123" (the first two letters have the same
-        #number and are coded once as 'P').
+        # "Pfister" yields "P236" not "P123" (the first two letters have the same
+        # number and are coded once as 'P').
 
         self.assertEqual(soundexcode('Robert', 'english'), 'R163')
         self.assertEqual(soundexcode('Rubert', 'english'), 'R163')
@@ -82,21 +82,21 @@
         self.assertEqual(soundex('Rubin', 'Robert', 'english'), 1)
 
     def test_jaccard(self):
-        #The jaccard indice between two words is the ratio of the number of
-        #identical letters and the total number of letters
-        #Each letter is counted once only
-        #The distance is 1 - jaccard_indice
+        # The jaccard indice between two words is the ratio of the number of
+        # identical letters and the total number of letters
+        # Each letter is counted once only
+        # The distance is 1 - jaccard_indice
 
         self.assertEqual(jaccard('bonjour', 'bonjour'), 0.0)
         self.assertAlmostEqual(jaccard('boujour', 'bonjour'), 1, 2)
         self.assertAlmostEqual(jaccard(u'sacré rubert', u'sacré hubert'), 0.667, 2)
 
-        #Test symetry
+        # Test symetry
         self.assertEqual(jaccard('orange', 'morange'),
                          jaccard('morange', 'orange'))
 
     def test_temporal(self):
-        #Test the distance between two dates. The distance can be given in
+        # Test the distance between two dates. The distance can be given in
         #``days``, ``months`` or ``years``
         try:
             from nazca.distances import temporal
@@ -105,21 +105,21 @@
         self.assertEqual(temporal('14 aout 1991', '14/08/1991'), 0)
         self.assertEqual(temporal('14 aout 1991', '08/14/1991'), 0)
         self.assertEqual(temporal('14 aout 1991', '08/15/1992'), 367)
-        #Test a case of ambiguity
+        # Test a case of ambiguity
         self.assertEqual(temporal('1er mai 2012', '01/05/2012'), 0)
         self.assertEqual(temporal('1er mai 2012', '05/01/2012', dayfirst=False), 0)
-        #Test the different granularities available
+        # Test the different granularities available
         self.assertAlmostEqual(temporal('14 aout 1991', '08/15/1992', 'years'), 1.0, 1)
         self.assertAlmostEqual(temporal('1991', '1992', 'years'), 1.0, 1)
         self.assertAlmostEqual(temporal('13 mars', '13 mai', 'months'), 2.0, 1)
         self.assertAlmostEqual(temporal('13 march', '13 may', 'months',
                                         parserinfo=dateparser.parserinfo), 2.0, 1)
 
-        #Test fuzzyness
+        # Test fuzzyness
         self.assertEqual(temporal('Jean est né le 1er octobre 1958',
                                   'Le 01-10-1958, Jean est né'), 0)
 
-        #Test symetry
+        # Test symetry
         self.assertEqual(temporal('14-08-1991', '15/08/1992'),
                          temporal('15/08/1992', '14/08/1991'))
 
@@ -128,7 +128,7 @@
         self.assertEqual(euclidean(-10, 11), 21)
         self.assertEqual(euclidean('-10', '11'), 21)
 
-        #Test symetry
+        # Test symetry
         self.assertEqual(euclidean(10, 11),
                          euclidean(11, 10))
 
@@ -160,8 +160,8 @@
 
     def test_geographical_3(self):
         # Use two columns of the record
-        processing = GeographicalProcessing(ref_attr_index=(1,2),
-                                            target_attr_index=(1,2),
+        processing = GeographicalProcessing(ref_attr_index=(1, 2),
+                                            target_attr_index=(1, 2),
                                             units='km')
         _input = (('paris', 48.856578, 2.351828),
                   ('london', 51.504872, -0.07857))
@@ -213,8 +213,8 @@
     def test_operation(self):
         m = self.matrix
         self.assertTrue((3 * m == m * 3).all())
-        self.assertTrue(((m - 0.5*m) == (0.5 * m)).all())
-        self.assertTrue(((m + 10*m - m * 3) == (8 * m)).all())
+        self.assertTrue(((m - 0.5 * m) == (0.5 * m)).all())
+        self.assertTrue(((m + 10 * m - m * 3) == (8 * m)).all())
 
     def test_pdist(self):
         _input = [u'Victor Wugo', u'Albert Camus', 'Albert Camu']
@@ -268,4 +268,3 @@
 
 if __name__ == '__main__':
     unittest.main()
-
--- a/test/test_filters.py	Mon Sep 03 10:26:24 2018 +0200
+++ b/test/test_filters.py	Mon Sep 03 10:50:50 2018 +0200
@@ -47,16 +47,16 @@
         self.assertEqual(named_entities,
                          [('http://example.com/me', None,
                            Token(word='me', start=26, end=28,
-                                           sentence=Sentence(indice=0, start=0, end=38))),
+                                 sentence=Sentence(indice=0, start=0, end=38))),
                           ('http://example2.com/me', None,
                            Token(word='me', start=26, end=28,
-                                           sentence=Sentence(indice=0, start=0, end=38))),
+                                 sentence=Sentence(indice=0, start=0, end=38))),
                           ('http://example.com/me', None,
                            Token(word='me', start=43, end=45,
-                                           sentence=Sentence(indice=1, start=39, end=46))),
+                                 sentence=Sentence(indice=1, start=39, end=46))),
                           ('http://example2.com/me', None,
                            Token(word='me', start=43, end=45,
-                                           sentence=Sentence(indice=1, start=39, end=46)))])
+                                 sentence=Sentence(indice=1, start=39, end=46)))])
 
     def test_occurence_filter_max_occ(self):
         """ Test occurence filter """
@@ -70,7 +70,7 @@
         self.assertEqual(named_entities,
                          [('http://example.com/everyone', None,
                            Token(word='everyone', start=6, end=14,
-                                           sentence=Sentence(indice=0, start=0, end=38))),])
+                                 sentence=Sentence(indice=0, start=0, end=38))), ])
 
     def test_disambiguation_word_length(self):
         """ Test occurence filter """
@@ -121,6 +121,6 @@
                            Token(word='toto', start=21, end=25,
                                  sentence=Sentence(indice=1, start=17, end=26)))])
 
+
 if __name__ == '__main__':
     unittest.main()
-
--- a/test/test_minhashing.py	Mon Sep 03 10:26:24 2018 +0200
+++ b/test/test_minhashing.py	Mon Sep 03 10:50:50 2018 +0200
@@ -24,7 +24,7 @@
 from functools import partial
 from os import path
 import random
-random.seed(6) ### Make sure tests are repeatable
+random.seed(6)  # Make sure tests are repeatable
 
 from nazca.utils.normalize import simplify
 from nazca.utils.minhashing import Minlsh, count_vectorizer_func
@@ -33,7 +33,6 @@
 TESTDIR = path.dirname(__file__)
 
 
-
 class MinLSHTest(unittest.TestCase):
 
     def test_iter_wordgrams(self):
@@ -61,13 +60,12 @@
                      u"Pour quelle occasion vous êtes-vous apprêtée ?",
                      u"Je les vis ensemble à plusieurs occasions.",
                      u"Je les ai vus ensemble à plusieurs occasions.",
-                    ]
+                     ]
         minlsh = Minlsh()
         # XXX Should works independantly of the seed. Unstability due to the bands number ?
         minlsh.train((simplify(s, FRENCH_LEMMAS, remove_stopwords=True) for s in sentences), 1, 200)
-        self.assertEqual(set([(0, 1), (2, 3), (5,6)]), minlsh.predict(0.4))
+        self.assertEqual(set([(0, 1), (2, 3), (5, 6)]), minlsh.predict(0.4))
 
 
 if __name__ == '__main__':
     unittest.main()
-
--- a/test/test_ner.py	Mon Sep 03 10:26:24 2018 +0200
+++ b/test/test_ner.py	Mon Sep 03 10:50:50 2018 +0200
@@ -40,13 +40,13 @@
         lexicon = {'everyone': 'http://example.com/everyone',
                    'me': 'http://example.com/me'}
         source = NerSourceLexicon(lexicon)
-        self.assertEqual(source.query_word('me'), ['http://example.com/me',])
-        self.assertEqual(source.query_word('everyone'), ['http://example.com/everyone',])
+        self.assertEqual(source.query_word('me'), ['http://example.com/me', ])
+        self.assertEqual(source.query_word('everyone'), ['http://example.com/everyone', ])
         self.assertEqual(source.query_word('me everyone'), [])
         self.assertEqual(source.query_word('toto'), [])
         # Token
         token = Token('me', 0, 2, None)
-        self.assertEqual(source.recognize_token(token), ['http://example.com/me',])
+        self.assertEqual(source.recognize_token(token), ['http://example.com/me', ])
         token = Token('ma', 0, 2, None)
         self.assertEqual(source.recognize_token(token), [])
 
@@ -54,13 +54,13 @@
         """ Test rql source """
         source = NerSourceRql('http://www.cubicweb.org',
                               'Any U LIMIT 1 WHERE X cwuri U, X name "%(word)s"')
-        self.assertEqual(source.query_word('apycot'), [u'http://www.cubicweb.org/1310453',])
+        self.assertEqual(source.query_word('apycot'), [u'http://www.cubicweb.org/1310453', ])
 
     @unittest.skipUnless(SPARQL_ENABLED, 'python-sparqlwrapper is not installed')
     def test_sparql_source(self):
         """ Test sparql source """
         source = NerSourceSparql(u'http://dbpedia.org/sparql',
-                             u'''SELECT DISTINCT ?uri
+                                 u'''SELECT DISTINCT ?uri
                                  WHERE {
                                  ?uri rdf:type <http://dbpedia.org/ontology/ProgrammingLanguage> ;
                                       dbpedia-owl:designer <http://dbpedia.org/resource/Guido_van_Rossum> ;
@@ -82,13 +82,13 @@
         self.assertEqual(named_entities,
                          [('http://example.com/everyone', None,
                            Token(word='everyone', start=6, end=14,
-                                           sentence=Sentence(indice=0, start=0, end=38))),
+                                 sentence=Sentence(indice=0, start=0, end=38))),
                           ('http://example.com/me', None,
                            Token(word='me', start=26, end=28,
-                                           sentence=Sentence(indice=0, start=0, end=38))),
+                                 sentence=Sentence(indice=0, start=0, end=38))),
                           ('http://example.com/me', None,
                            Token(word='me', start=43, end=45,
-                                           sentence=Sentence(indice=1, start=39, end=46)))])
+                                 sentence=Sentence(indice=1, start=39, end=46)))])
 
     @unittest.skipUnless(NLTK_AVAILABLE, 'nltk is not available')
     def test_ner_process_multisources(self):
@@ -103,45 +103,45 @@
         self.assertEqual(named_entities,
                          [('http://example.com/everyone', None,
                            Token(word='everyone', start=6, end=14,
-                                           sentence=Sentence(indice=0, start=0, end=38))),
+                                 sentence=Sentence(indice=0, start=0, end=38))),
                           ('http://example.com/me', None,
                            Token(word='me', start=26, end=28,
-                                           sentence=Sentence(indice=0, start=0, end=38))),
+                                 sentence=Sentence(indice=0, start=0, end=38))),
                           ('http://example2.com/me', None,
                            Token(word='me', start=26, end=28,
-                                           sentence=Sentence(indice=0, start=0, end=38))),
+                                 sentence=Sentence(indice=0, start=0, end=38))),
                           ('http://example.com/me', None,
                            Token(word='me', start=43, end=45,
-                                           sentence=Sentence(indice=1, start=39, end=46))),
+                                 sentence=Sentence(indice=1, start=39, end=46))),
                           ('http://example2.com/me', None,
                            Token(word='me', start=43, end=45,
-                                           sentence=Sentence(indice=1, start=39, end=46)))])
+                                 sentence=Sentence(indice=1, start=39, end=46)))])
         # Two sources, unique
         ner = NerProcess((source1, source2), unique=True)
         named_entities = ner.process_text(text)
         self.assertEqual(named_entities,
                          [('http://example.com/everyone', None,
                            Token(word='everyone', start=6, end=14,
-                                           sentence=Sentence(indice=0, start=0, end=38))),
+                                 sentence=Sentence(indice=0, start=0, end=38))),
                           ('http://example.com/me', None,
                            Token(word='me', start=26, end=28,
-                                           sentence=Sentence(indice=0, start=0, end=38))),
+                                 sentence=Sentence(indice=0, start=0, end=38))),
                           ('http://example.com/me', None,
                            Token(word='me', start=43, end=45,
-                                           sentence=Sentence(indice=1, start=39, end=46)))])
+                                 sentence=Sentence(indice=1, start=39, end=46)))])
         # Two sources inversed, unique
         ner = NerProcess((source2, source1), unique=True)
         named_entities = ner.process_text(text)
         self.assertEqual(named_entities,
                          [('http://example.com/everyone', None,
                            Token(word='everyone', start=6, end=14,
-                                           sentence=Sentence(indice=0, start=0, end=38))),
+                                 sentence=Sentence(indice=0, start=0, end=38))),
                           ('http://example2.com/me', None,
                            Token(word='me', start=26, end=28,
-                                           sentence=Sentence(indice=0, start=0, end=38))),
+                                 sentence=Sentence(indice=0, start=0, end=38))),
                           ('http://example2.com/me', None,
                            Token(word='me', start=43, end=45,
-                                           sentence=Sentence(indice=1, start=39, end=46)))])
+                                 sentence=Sentence(indice=1, start=39, end=46)))])
 
     @unittest.skipUnless(NLTK_AVAILABLE, 'nltk is not available')
     def test_ner_process_add_sources(self):
@@ -155,32 +155,32 @@
         self.assertEqual(named_entities,
                          [('http://example.com/everyone', None,
                            Token(word='everyone', start=6, end=14,
-                                           sentence=Sentence(indice=0, start=0, end=38))),
+                                 sentence=Sentence(indice=0, start=0, end=38))),
                           ('http://example.com/me', None,
                            Token(word='me', start=26, end=28,
-                                           sentence=Sentence(indice=0, start=0, end=38))),
+                                 sentence=Sentence(indice=0, start=0, end=38))),
                           ('http://example.com/me', None,
                            Token(word='me', start=43, end=45,
-                                           sentence=Sentence(indice=1, start=39, end=46))),])
+                                 sentence=Sentence(indice=1, start=39, end=46))), ])
         # Two sources, not unique
         ner.add_ner_source(source2)
         named_entities = ner.process_text(text)
         self.assertEqual(named_entities,
                          [('http://example.com/everyone', None,
                            Token(word='everyone', start=6, end=14,
-                                           sentence=Sentence(indice=0, start=0, end=38))),
+                                 sentence=Sentence(indice=0, start=0, end=38))),
                           ('http://example.com/me', None,
                            Token(word='me', start=26, end=28,
-                                           sentence=Sentence(indice=0, start=0, end=38))),
+                                 sentence=Sentence(indice=0, start=0, end=38))),
                           ('http://example2.com/me', None,
                            Token(word='me', start=26, end=28,
-                                           sentence=Sentence(indice=0, start=0, end=38))),
+                                 sentence=Sentence(indice=0, start=0, end=38))),
                           ('http://example.com/me', None,
                            Token(word='me', start=43, end=45,
-                                           sentence=Sentence(indice=1, start=39, end=46))),
+                                 sentence=Sentence(indice=1, start=39, end=46))),
                           ('http://example2.com/me', None,
                            Token(word='me', start=43, end=45,
-                                           sentence=Sentence(indice=1, start=39, end=46)))])
+                                 sentence=Sentence(indice=1, start=39, end=46)))])
 
     @unittest.skipUnless(NLTK_AVAILABLE, 'nltk is not available')
     def test_ner_process_preprocess(self):
@@ -190,7 +190,7 @@
                                    'me': 'http://example.com/me'})
         preprocessor = NerStopwordsFilterPreprocessor()
         ner = NerProcess((source,),
-                                  preprocessors=(preprocessor,))
+                         preprocessors=(preprocessor,))
         named_entities = ner.process_text(text)
         self.assertEqual(named_entities, [('http://example.com/toto', None,
                                            Token(word='Toto', start=6, end=10,
@@ -244,4 +244,3 @@
 
 if __name__ == '__main__':
     unittest.main()
-
--- a/test/test_normalize.py	Mon Sep 03 10:26:24 2018 +0200
+++ b/test/test_normalize.py	Mon Sep 03 10:50:50 2018 +0200
@@ -40,16 +40,16 @@
 
     def test_unormalize(self):
         self.assertEqual(lunormalize(u'bépoèàÀêùï'),
-                                     u'bepoeaaeui')
+                         u'bepoeaaeui')
 
     def test_simplify(self):
-        self.assertEqual(simplify(u"J'aime les frites, les pommes et les" \
+        self.assertEqual(simplify(u"J'aime les frites, les pommes et les"
                                   u" scoubidous !", FRENCH_LEMMAS),
                          u"aimer frites pomme scoubidou")
 
     def test_tokenize(self):
         self.assertEqual(tokenize(u"J'aime les frites !"),
-                         [u"J'", u'aime', u'les', u'frites', u'!',])
+                         [u"J'", u'aime', u'les', u'frites', u'!', ])
 
     def test_lemmatizer(self):
         self.assertEqual(lemmatized(u'sacré rubert', FRENCH_LEMMAS), u'sacré rubert')
@@ -65,14 +65,14 @@
 
     def test_format(self):
         string = u'[Victor Hugo - 26 fev 1802 / 22 mai 1885]'
-        regex  = r'\[(?P<firstname>\w+) (?P<lastname>\w+) - ' \
-                 r'(?P<birthdate>.*) \/ (?P<deathdate>.*?)\]'
+        regex = r'\[(?P<firstname>\w+) (?P<lastname>\w+) - ' \
+            r'(?P<birthdate>.*) \/ (?P<deathdate>.*?)\]'
         output = u'%(lastname)s, %(firstname)s (%(birthdate)s - %(deathdate)s)'
         self.assertEqual(rgxformat(string, regex, output),
                          u'Hugo, Victor (26 fev 1802 - 22 mai 1885)')
 
         string = u'http://perdu.com/42/supertop/cool'
-        regex  = r'http://perdu.com/(?P<id>\d+).*'
+        regex = r'http://perdu.com/(?P<id>\d+).*'
         output = u'%(id)s'
         self.assertEqual(rgxformat(string, regex, output),
                          u'42')
@@ -87,7 +87,7 @@
     def test_normalizer_record(self):
         normalizer = BaseNormalizer(lunormalize, attr_index=1)
         record = ('a1', u'bépoèàÀêùï')
-        self.assertEqual(normalizer.normalize(record), ['a1',u'bepoeaaeui'])
+        self.assertEqual(normalizer.normalize(record), ['a1', u'bepoeaaeui'])
 
     def test_normalizer_dataset(self):
         normalizer = BaseNormalizer(lunormalize, attr_index=1)
@@ -109,12 +109,13 @@
     def test_unormalize_record(self):
         normalizer = UnicodeNormalizer(attr_index=1)
         record = ('a1', u'bépoèàÀêùï')
-        self.assertEqual(['a1',u'bepoeaaeui'], normalizer.normalize(record))
+        self.assertEqual(['a1', u'bepoeaaeui'], normalizer.normalize(record))
 
     def test_simplify(self):
         normalizer = SimplifyNormalizer(lemmas=FRENCH_LEMMAS)
-        self.assertEqual(normalizer.normalize(u"J'aime les frites, les pommes et les scoubidous !")
-                         , u"aimer frites pomme scoubidou")
+        self.assertEqual(
+            normalizer.normalize(u"J'aime les frites, les pommes et les scoubidous !"),
+            u"aimer frites pomme scoubidou")
 
     def test_simplify_record(self):
         normalizer = SimplifyNormalizer(attr_index=1, lemmas=FRENCH_LEMMAS)
@@ -124,12 +125,12 @@
 
     def test_tokenize(self):
         normalizer = TokenizerNormalizer()
-        self.assertEqual([u"J'", u'aime', u'les', u'frites', u'!',],
+        self.assertEqual([u"J'", u'aime', u'les', u'frites', u'!', ],
                          normalizer.normalize(u"J'aime les frites !"))
 
     def test_tokenize_record(self):
         normalizer = TokenizerNormalizer(attr_index=1)
-        self.assertEqual(['a1', [u"J'", u'aime', u'les', u'frites', u'!',]],
+        self.assertEqual(['a1', [u"J'", u'aime', u'les', u'frites', u'!', ]],
                          normalizer.normalize(['a1', u"J'aime les frites !"]))
 
     def test_lemmatizer(self):
@@ -173,18 +174,17 @@
                          normalizer.normalize(['a1', u'http://perdu.com/42/supertop/cool']))
 
     def test_join(self):
-        normalizer = JoinNormalizer((1,2))
+        normalizer = JoinNormalizer((1, 2))
         self.assertEqual(normalizer.normalize((1, 'ab', 'cd', 'e', 5)), [1, 'e', 5, 'ab, cd'])
 
 
-
 class NormalizerPipelineTestCase(unittest.TestCase):
 
     def test_normalizer(self):
         regexp = r'(?P<id>\d+);{["]?(?P<firstname>.+[^"])["]?};{(?P<surname>.*)};{};{};(?P<date>.*)'
         output = u'%(id)s\t%(firstname)s\t%(surname)s\t%(date)s'
         n1 = RegexpNormalizer(regexp, u'%(id)s\t%(firstname)s\t%(surname)s\t%(date)s')
-        n2 = BaseNormalizer(callback= lambda x: x.split('\t'))
+        n2 = BaseNormalizer(callback=lambda x: x.split('\t'))
         n3 = UnicodeNormalizer(attr_index=(1, 2, 3))
         pipeline = NormalizerPipeline((n1, n2, n3))
         r1 = u'1111;{"Toto tàtà"};{Titi};{};{};'
@@ -193,4 +193,3 @@
 
 if __name__ == '__main__':
     unittest.main()
-
--- a/test/test_preprocessors.py	Mon Sep 03 10:26:24 2018 +0200
+++ b/test/test_preprocessors.py	Mon Sep 03 10:50:50 2018 +0200
@@ -101,4 +101,3 @@
 
 if __name__ == '__main__':
     unittest.main()
-
--- a/test/test_tokenizer.py	Mon Sep 03 10:26:24 2018 +0200
+++ b/test/test_tokenizer.py	Mon Sep 03 10:50:50 2018 +0200
@@ -44,7 +44,14 @@
                                         token_max_size=3)
         tokens = list(tokenizer)
         self.assertEqual(len(tokens), 18)
-        t1 = Token(word='Hello everyone this', start=0, end=20, sentence=Sentence(indice=0, start=0, end=38))
+        t1 = Token(
+            word='Hello everyone this',
+            start=0,
+            end=20,
+            sentence=Sentence(
+                indice=0,
+                start=0,
+                end=38))
         self.assertEqual(tokens[0], t1)
         t2 = Token(word='And', start=39, end=42, sentence=Sentence(indice=1, start=39, end=46))
         self.assertEqual(tokens[16], t2)
@@ -67,7 +74,14 @@
                                         token_max_size=3)
         tokens = list(tokenizer)
         self.assertEqual(len(tokens), 10)
-        t1 =  Token(word='me speaking', start=26, end=37, sentence=Sentence(indice=0, start=0, end=38))
+        t1 = Token(
+            word='me speaking',
+            start=26,
+            end=37,
+            sentence=Sentence(
+                indice=0,
+                start=0,
+                end=38))
         self.assertEqual(tokens[8], t1)
 
     def test_richstringtokenizer_maxsize(self):
@@ -99,4 +113,3 @@
 
 if __name__ == '__main__':
     unittest.main()
-