[minhashing] Remove depracated __main__
authorVincent Michel <vincent.michel@logilab.fr>
Mon, 26 Nov 2012 10:37:43 +0100
changeset 163 06359f2b0af7
parent 162 369ab3cfac44
child 164 0d75690e81b6
[minhashing] Remove depracated __main__
minhashing.py
--- a/minhashing.py	Wed Nov 21 10:17:48 2012 +0100
+++ b/minhashing.py	Mon Nov 26 10:37:43 2012 +0100
@@ -181,52 +181,3 @@
             similars.update(set(tuple(v) for v in buckets.itervalues()
                                          if len(v) > 1))
         return similars
-
-
-if __name__ == '__main__':
-    from alignment.normalize import (loadlemmas, simplify)
-    from alignment.dataio import parsefile
-    from time import time
-    import matplotlib.pyplot as plt
-    from scipy import polyfit
-
-    sentences = [s[0] for s in parsefile('data/US.txt', indexes=[1],
-                               field_size_limit=1000000000, nbmax=None) if s[0]]
-
-
-    lemmas = loadlemmas('data/french_lemmas.txt')
-    minlsh = Minlsh()
-
-    def compute_complexite(size):
-        print "%d%%" % size
-        t0 = time()
-        length = int(size*len(sentences)/100)
-        minlsh.train((simplify(s, lemmas) for s in sentences[:length]), 1, 100)
-        t1 = time()
-        r = minlsh.predict(0.3)
-        t2 = time()
-        print 'Nb sentences : %d' % length
-        print 'Training + signaturing time : %.3fs' % (t1 - t0)
-        print 'Similarity %.3fs' % (t2 - t1)
-        print 'Total : %.3fs' % (t2 - t0)
-        return len(sentences[:length]), (t1 - t0), (t2 - t1)
-
-    x = []
-    ytrain = []
-    ysimil = []
-    ycumul = []
-    print "Start the computation"
-    for size in xrange(1, 100, 5):
-        p = compute_complexite(size)
-        x.append(p[0])
-        ytrain.append(p[1])
-        ysimil.append(p[2])
-        ycumul.append(p[1] + p[2])
-
-    plt.plot(x, ytrain, label='trainning')
-    plt.plot(x, ysimil, label='buckets')
-    plt.plot(x, ycumul, label='cumul')
-    plt.legend()
-    print polyfit(x, ytrain, 1)
-    print polyfit(x, ysimil, 1)
-    plt.show()