[minhashing] No need for a temporary 3D array draft
authorJérôme Roy <jerome.roy@logilab.fr>
Thu, 10 Dec 2015 16:10:38 +0100
changeset 510 6aeb7c0d40c7
parent 509 a3bc7cecb80f
[minhashing] No need for a temporary 3D array
utils/minhashing.py
--- a/utils/minhashing.py	Tue Dec 08 16:38:32 2015 +0100
+++ b/utils/minhashing.py	Thu Dec 10 16:10:38 2015 +0100
@@ -153,9 +153,9 @@
         while rows:
             doc = rows.pop(0)
             #Concatenate the needed rows.
-            tmp = np.dstack([hashvalues[:, r] for r in doc])
+            tmp = hashvalues[:,doc]
             #Take the mininum of hashes
-            sig[:, docind] = np.min(tmp[0], 1)
+            sig[:, docind] = np.min(tmp, 1)
             docind += 1
             if self._verbose and docind % 50000 == 0:
                 print (docind * 100) / nrows