[demo] add exemple on custom normalization function usage
authorSimon Chabot <simon.chabot@logilab.fr>
Wed, 07 Nov 2012 12:18:00 +0100
changeset 82 8f5a9de54d3e
parent 81 615bc75f0c29
child 83 da44e0b956bb
[demo] add exemple on custom normalization function usage
demo.py
--- a/demo.py	Wed Nov 07 11:43:26 2012 +0100
+++ b/demo.py	Wed Nov 07 12:18:00 2012 +0100
@@ -6,7 +6,7 @@
 from alignment.aligner import align, parsefile
 
 def demo_0():
-    # prixgoncourt is the list of Goncourt Prize for short stories, extracted
+    # prixgoncourt is the list of Goncourt Prize, extracted
     # from wikipedia
 
     # dbfrenchauthors is an extract dbpedia for the query :
@@ -19,14 +19,19 @@
     #We try to align Goncourt winers onto dbpedia results
 
     alignset = parsefile('demo/prixgoncourt', indexes = [1, 1])
-    targetset = parsefile('demo/dbfrenchauthors_out', indexes = [0, 1], delimiter='#')
+    targetset = parsefile('demo/dbfrenchauthors', indexes = [0, 1], delimiter='#')
 
-    tr_name = { 'normalization' : [n.simplify],
+    def removeparenthesis(string):
+        if '(' in string:
+            return string[:string.index('(')]
+        return string
+
+    tr_name = { 'normalization' : [removeparenthesis, n.simplify],
                 'distance': d.levenshtein
               }
 
     dmatrix, hasmatched = align(alignset, targetset, [tr_name],
-                                0.4, 'demo0_results')
+                                0.4, 'demo/demo0_results')
 
     print dmatrix
 
@@ -65,7 +70,7 @@
                                                     #   given by order.
                                 0.4,                # The maximal distance
                                                     #   threshold
-                                'demo1_results')    # Filename of the output
+                                'demo/demo1_results')    # Filename of the output
                                                     #   result file
     # the ``align()`` function return two items
     # 0. the computed distance matrix
@@ -75,4 +80,5 @@
 
 if __name__ == '__main__':
     demo_0()
+    demo_1()