[demo] Add a new demo (with a kdtree)
authorSimon Chabot <simon.chabot@logilab.fr>
Wed, 07 Nov 2012 15:58:46 +0100
changeset 83 da44e0b956bb
parent 82 8f5a9de54d3e
child 84 3cd1edfd28d0
[demo] Add a new demo (with a kdtree)
demo.py
--- a/demo.py	Wed Nov 07 12:18:00 2012 +0100
+++ b/demo.py	Wed Nov 07 15:58:46 2012 +0100
@@ -1,6 +1,8 @@
 #!/usr/bin/python
 #-*- coding:utf-8 -*-
 
+from scipy.spatial import KDTree
+
 import alignment.distances as d
 import alignment.normalize as n
 from alignment.aligner import align, parsefile
@@ -78,7 +80,43 @@
     #    otherwise
     print dmatrix
 
+def demo_2():
+    targetset = parsefile('demo/FR.txt', indexes = [0, 1, (4, 5)])
+    alignset = parsefile('demo/frenchbnf', indexes = [0, 2, (14, 12)])
+
+    aligntree = KDTree([elt[2] or (0, 0) for elt in alignset])
+    targettree = KDTree([elt[2] or (0, 0) for elt in targetset])
+
+    neighbors = aligntree.query_ball_tree(targettree, 0.1)
+
+    # Let's define the treatements to apply on the location's name
+    tr_name = { 'normalization': [lambda x: str(x),#Some names are casted to
+                                                   #int/float, just correct it
+                                  n.simplify], # Simply all the names (remove
+                                               #   punctuation, lower case, etc)
+                'distance': d.levenshtein,     # Use the levenshtein distance
+                'weighting': 1                 # Use 1 a name-distance matrix
+                                               #   weighting coefficient
+              }
+
+    print "Start computation"
+    for ind, nei in enumerate(neighbors):
+#        print alignset[ind][1]
+#        print [targetset[i][1] for i in nei]
+        m, b = align([alignset[ind][:2]],      # The dataset to align
+              [targetset[i][:2] for i in nei], # The target dataset
+              [tr_name],
+              0.3,
+              'demo/demo2_results')  # Filename of the output
+                                     #   result file
+
 if __name__ == '__main__':
+    print "Running demo_0"
     demo_0()
+
+    print "Running demo_1"
     demo_1()
 
+    print "Running demo_2"
+    ## Same as demo_1, but in a more efficient way, using a KDTree
+    demo_2()