[demo] Add a new demo of alignment usage
authorSimon Chabot <simon.chabot@logilab.fr>
Wed, 07 Nov 2012 11:43:26 +0100
changeset 81 615bc75f0c29
parent 80 7b68f83c8c73
child 82 8f5a9de54d3e
[demo] Add a new demo of alignment usage
demo.py
--- a/demo.py	Wed Nov 07 10:47:41 2012 +0100
+++ b/demo.py	Wed Nov 07 11:43:26 2012 +0100
@@ -5,8 +5,33 @@
 import alignment.normalize as n
 from alignment.aligner import align, parsefile
 
-if __name__ == '__main__':
+def demo_0():
+    # prixgoncourt is the list of Goncourt Prize for short stories, extracted
+    # from wikipedia
+
+    # dbfrenchauthors is an extract dbpedia for the query :
+    #   SELECT ?writer, ?name WHERE {
+    #      ?writer  <http://purl.org/dc/terms/subject> <http://dbpedia.org/resource/Category:French_novelists>.
+    #      ?writer rdfs:label ?name.
+    #      FILTER(lang(?name) = 'fr')
+    #   }
+
+    #We try to align Goncourt winers onto dbpedia results
 
+    alignset = parsefile('demo/prixgoncourt', indexes = [1, 1])
+    targetset = parsefile('demo/dbfrenchauthors_out', indexes = [0, 1], delimiter='#')
+
+    tr_name = { 'normalization' : [n.simplify],
+                'distance': d.levenshtein
+              }
+
+    dmatrix, hasmatched = align(alignset, targetset, [tr_name],
+                                0.4, 'demo0_results')
+
+    print dmatrix
+
+
+def demo_1():
     # FR.txt is an extract of geonames, where locations have been sorted by name
     # frenchbnf is an extract of french BNF's locations, sorted by name too
 
@@ -14,8 +39,8 @@
     # position (longitude, latitude)
     # ``nbmax`` is the number of locations to load
 
-    targetset = parsefile('data/FR.txt', indexes = [0, 1, (4, 5)], nbmax = 2000)
-    alignset = parsefile('data/frenchbnf', indexes = [0, 2, (14, 12)], nbmax = 1000)
+    targetset = parsefile('demo/FR.txt', indexes = [0, 1, (4, 5)], nbmax = 2000)
+    alignset = parsefile('demo/frenchbnf', indexes = [0, 2, (14, 12)], nbmax = 1000)
 
 
     # Let's define the treatements to apply on the location's name
@@ -40,10 +65,14 @@
                                                     #   given by order.
                                 0.4,                # The maximal distance
                                                     #   threshold
-                                'alignment_results')# Filename of the output
+                                'demo1_results')    # Filename of the output
                                                     #   result file
     # the ``align()`` function return two items
     # 0. the computed distance matrix
     # 1. a boolean, True if at least one alignment has been done, False
     #    otherwise
     print dmatrix
+
+if __name__ == '__main__':
+    demo_0()
+