[aligner] Enable the user to give the signature matrix for minhashing
authorSimon Chabot <simon.chabot@logilab.fr>
Fri, 09 Nov 2012 10:02:25 +0100
changeset 103 d9be0462db20
parent 102 87300f4d6530
child 104 8322d728be8a
[aligner] Enable the user to give the signature matrix for minhashing
aligner.py
--- a/aligner.py	Thu Nov 08 16:36:50 2012 +0100
+++ b/aligner.py	Fri Nov 09 10:02:25 2012 +0100
@@ -36,8 +36,8 @@
 
 
 
-def findneighbours(alignset, targetset, indexes = (1, 1), mode = 'kdtree',
-                   threshold = 0.1, k = 1, n_clusters = None):
+def findneighbours(alignset, targetset, indexes=(1, 1), mode='kdtree',
+                   threshold=0.1, n_clusters=None, kwordsgram=1, siglen=200):
 
     SEARCHERS = set(['kdtree', 'minhashing', 'kmeans', 'minibatch'])
     mode = mode.lower()
@@ -69,7 +69,7 @@
         idelement = ''
         minhasher.train([elt[indexes[0]] or idelement for elt in alignset] +
                         [elt[indexes[1]] or idelement for elt in targetset],
-                        k)
+                        kwordsgram, siglen)
         rawneighbours = minhasher.findsimilarsentences(threshold)
         neighbours = []
         for data in rawneighbours: #XXX: Return an iterator