[core] Fix issue in sliding window for the Nerdy Process.
authorVincent Michel <vincent.michel@logilab.fr>
Thu, 27 Jun 2013 18:26:28 +0200
changeset 332 5f12e8b7c094
parent 331 f91e69ca8fca
child 333 47a62334bfaa
[core] Fix issue in sliding window for the Nerdy Process.
core.py
test/test_core.py
--- a/core.py	Thu Jun 27 18:25:48 2013 +0200
+++ b/core.py	Thu Jun 27 18:26:28 2013 +0200
@@ -356,6 +356,7 @@
                 for uri in process.recognize_token(token):
                     named_entities.append((uri, process.name, token))
                     recognized = True
+                    last_stop = token.end
                     if self.unique:
                         break
                 if recognized and self.unique:
--- a/test/test_core.py	Thu Jun 27 18:25:48 2013 +0200
+++ b/test/test_core.py	Thu Jun 27 18:26:28 2013 +0200
@@ -201,6 +201,24 @@
                                            Token(word='Toto', start=6, end=10,
                                                  sentence=Sentence(indice=0, start=0, end=34)))])
 
+    def test_nerdy_process_chained_word(self):
+        """ Test nerdy process """
+        text = 'Hello everyone me, this is   me speaking. And me.'
+        source = core.NerdySourceLexical({'everyone': 'http://example.com/everyone',
+                                          'everyone me': 'http://example.com/everyone_me',
+                                          'me': 'http://example.com/me'})
+        nerdy = core.NerdyProcess((source,))
+        named_entities = nerdy.process_text(text)
+        self.assertEqual(named_entities,
+                         [('http://example.com/everyone_me', None,
+                           Token(word='everyone me', start=6, end=17,
+                                 sentence=Sentence(indice=0, start=0, end=41))),
+                          ('http://example.com/me', None,
+                           Token(word='me', start=29, end=31,
+                                 sentence=Sentence(indice=0, start=0, end=41))),
+                          ('http://example.com/me', None,
+                           Token(word='me', start=46, end=48, sentence=Sentence(indice=1, start=41, end=49)))])
+
 
 if __name__ == '__main__':
     unittest2.main()