[dataio] Make autocaste optional for sparql io, see #183448
authorVincent Michel <vincent.michel@logilab.fr>
Tue, 15 Oct 2013 12:21:38 +0000
changeset 312 74d0a106fe23
parent 311 6bb993dec00b
child 313 4b4bd8713f9f
[dataio] Make autocaste optional for sparql io, see #183448
dataio.py
test/test_dataio.py
--- a/dataio.py	Tue Oct 08 13:25:07 2013 +0000
+++ b/dataio.py	Tue Oct 15 12:21:38 2013 +0000
@@ -31,7 +31,7 @@
 ###############################################################################
 ### UTILITY FUNCTIONS #########################################################
 ###############################################################################
-def autocasted(data, encoding=None):
+def autocast(data, encoding=None):
     """ Try to convert data into a specific type
     in (int, float, str)
     """
@@ -69,7 +69,7 @@
 ###############################################################################
 ### SPARQL FUNCTIONS ##########################################################
 ###############################################################################
-def sparqlquery(endpoint, query, indexes=None):
+def sparqlquery(endpoint, query, indexes=None, autocaste_data=True):
     """ Run the sparql query on the given endpoint, and wrap the items in the
     indexes form. If indexes is empty, keep raw output"""
 
@@ -84,17 +84,20 @@
     labels = rawresults['head']['vars']
     results = []
     indexes = indexes or []
-
+    if autocaste_data:
+        transform = autocast
+    else:
+        def transform(*args): return args
     for raw in rawresults["results"]["bindings"]:
         data = []
         if not indexes:
-            data = [autocasted(raw[label]['value']) for label in labels]
+            data = [transform(raw[label]['value']) for label in labels]
         else:
             for il, ind in enumerate(indexes):
                 if isinstance(ind, tuple):
-                    data.append(tuple([autocasted(raw[labels[i]]['value']) for i in ind]))
+                    data.append(tuple([transform(raw[labels[i]]['value']) for i in ind]))
                 else:
-                    data.append(autocasted(raw[labels[il]]['value']))
+                    data.append(transform(raw[labels[il]]['value']))
         results.append(data)
     return results
 
@@ -117,8 +120,8 @@
                     [2, (21.9, 19), u'stramberry', u'horse'],
                     [3, (23, 2.17), u'cherry', u'flower']]
 
-            By default, all cells are "autocasted" (thanks to the
-            ``autocasted()`` function), but you can overpass it thanks to the
+            By default, all cells are "autocast" (thanks to the
+            ``autocast()`` function), but you can overpass it thanks to the
             ``formatopt`` dictionnary. Each key is the index to work on, and the
             value is the function to call. See the following example:
 
@@ -148,7 +151,7 @@
     indexes = indexes or []
     formatopt = formatopt or {}
     for ind, row in enumerate(formatedoutput(filename)):
-        row = [formatopt.get(i, lambda x: autocasted(x, encoding))(cell)
+        row = [formatopt.get(i, lambda x: autocast(x, encoding))(cell)
                for i, cell in enumerate(row)]
         data = []
         if nbmax and ind > nbmax:
--- a/test/test_dataio.py	Tue Oct 08 13:25:07 2013 +0000
+++ b/test/test_dataio.py	Tue Oct 15 12:21:38 2013 +0000
@@ -22,7 +22,7 @@
 from os import path
 from tempfile import mkdtemp
 
-from nazca.dataio import parsefile, autocasted, split_file
+from nazca.dataio import sparqlquery, parsefile, autocast, split_file
 
 
 TESTDIR = path.dirname(__file__)
@@ -53,14 +53,14 @@
                           [2, ('21.9', 19), u'stramberry', u'horse'],
                           [3, ('23', 2.17), u'cherry', u'flower']], data)
 
-    def test_autocasted(self):
-        self.assertEqual(autocasted('1'), 1)
-        self.assertEqual(autocasted('1.'), 1.)
-        self.assertEqual(autocasted('1,'), 1.)
-        self.assertEqual(autocasted('1,2'), 1.2)
-        self.assertEqual(autocasted('1,2X'), '1,2X')
-        self.assertEqual(autocasted(u'tété'), u'tété')
-        self.assertEqual(autocasted('tété', encoding='utf-8'), u'tété')
+    def test_autocast(self):
+        self.assertEqual(autocast('1'), 1)
+        self.assertEqual(autocast('1.'), 1.)
+        self.assertEqual(autocast('1,'), 1.)
+        self.assertEqual(autocast('1,2'), 1.2)
+        self.assertEqual(autocast('1,2X'), '1,2X')
+        self.assertEqual(autocast(u'tété'), u'tété')
+        self.assertEqual(autocast('tété', encoding='utf-8'), u'tété')
 
     def test_split_file(self):
         NBLINES = 190
@@ -82,6 +82,38 @@
             with open(file2split) as fobj:
                 self.assertEqual(alllines, fobj.readlines())
 
+    def test_sparql_autocast(self):
+        alignset = sparqlquery('http://dbpedia.inria.fr/sparql',
+                                 'prefix db-owl: <http://dbpedia.org/ontology/>'
+                                 'prefix db-prop: <http://fr.dbpedia.org/property/>'
+                                 'select ?ville, ?name, ?long, ?lat where {'
+                                 ' ?ville db-owl:country <http://fr.dbpedia.org/resource/France> .'
+                                 ' ?ville rdf:type db-owl:PopulatedPlace .'
+                                 ' ?ville db-owl:populationTotal ?population .'
+                                 ' ?ville foaf:name ?name .'
+                                 ' ?ville db-prop:longitude ?long .'
+                                 ' ?ville db-prop:latitude ?lat .'
+                                 ' FILTER (?population > 1000)'
+                                 '} LIMIT 100', indexes=[0, 1, (2, 3)])
+        self.assertEqual(len(alignset), 100)
+        self.assertTrue(isinstance(alignset[0][2][0], float))
+
+    def test_sparql_no_autocast(self):
+        alignset = sparqlquery('http://dbpedia.inria.fr/sparql',
+                                 'prefix db-owl: <http://dbpedia.org/ontology/>'
+                                 'prefix db-prop: <http://fr.dbpedia.org/property/>'
+                                 'select ?ville, ?name, ?long, ?lat where {'
+                                 ' ?ville db-owl:country <http://fr.dbpedia.org/resource/France> .'
+                                 ' ?ville rdf:type db-owl:PopulatedPlace .'
+                                 ' ?ville db-owl:populationTotal ?population .'
+                                 ' ?ville foaf:name ?name .'
+                                 ' ?ville db-prop:longitude ?long .'
+                                 ' ?ville db-prop:latitude ?lat .'
+                                 ' FILTER (?population > 1000)'
+                                 '} LIMIT 100', indexes=[0, 1, (2, 3)], autocaste_data=False)
+        self.assertEqual(len(alignset), 100)
+        self.assertFalse(isinstance(alignset[0][2][0], float))
+
 
 if __name__ == '__main__':
     unittest2.main()