[Minhashing] Trained data can be saved and loaded
authorSimon Chabot <simon.chabot@logilab.fr>
Mon, 29 Oct 2012 10:17:24 +0100
changeset 57 2741988383ae
parent 56 afa74864fe6c
child 58 27b66c6cee3a
[Minhashing] Trained data can be saved and loaded
minhashing.py
normalize.py
--- a/minhashing.py	Mon Oct 29 09:52:16 2012 +0100
+++ b/minhashing.py	Mon Oct 29 10:17:24 2012 +0100
@@ -15,6 +15,7 @@
 # You should have received a copy of the GNU Lesser General Public License along
 # with this program. If not, see <http://www.gnu.org/licenses/>.
 
+import cPickle
 
 from scipy.sparse import lil_matrix
 from numpy import ones
@@ -113,6 +114,30 @@
                         sig[i, c] = hashr
         return sig
 
+    def save(self, savefile):
+        """ Save the training into `savefile` for a future use """
+
+        if not self._trained:
+            print "Not trained, nothing to save"
+            return
+
+        with open(savefile, 'wb') as fobj:
+            pickler = cPickle.Pickler(fobj)
+            pickler.dump(self.sigmatrix)
+
+    def load(self, savefile):
+        """ Load a trained minhashing """
+
+        with open(savefile, 'rb') as fobj:
+            pickler = cPickle.Unpickler(fobj)
+            self.sigmatrix = pickler.load()
+
+        if self.sigmatrix is not None:
+            self._trained = True
+        else:
+            self._trained = False
+
+
     def findsimilarsentences(self, bandsize, sentenceid = -1, dispThreshold = False):
         """ Return a set of tuples of *possible* similar sentences
 
--- a/normalize.py	Mon Oct 29 09:52:16 2012 +0100
+++ b/normalize.py	Mon Oct 29 10:17:24 2012 +0100
@@ -118,7 +118,7 @@
         in decimal digits (default 0 digits)
 
         If ``number`` is not a float, this method casts it to a float. (An
-        exception can be raised if it's not possible)
+        exception may be raised if it's not possible)
     """
 
     return format(round(float(number), ndigits), '0.%df' % ndigits)