[dataio] Export some function to the dataio.py file
authorSimon Chabot <simon.chabot@logilab.fr>
Tue, 13 Nov 2012 16:04:17 +0100
changeset 124 9e21516d69e1
parent 123 82d1323eb534
child 125 07fe131b44d0
[dataio] Export some function to the dataio.py file
dataio.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/dataio.py	Tue Nov 13 16:04:17 2012 +0100
@@ -0,0 +1,109 @@
+# -*- coding:utf-8 -*-
+# copyright 2012 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
+# contact http://www.logilab.fr -- mailto:contact@logilab.fr
+#
+# This program is free software: you can redistribute it and/or modify it under
+# the terms of the GNU Lesser General Public License as published by the Free
+# Software Foundation, either version 2.1 of the License, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+# details.
+#
+# You should have received a copy of the GNU Lesser General Public License along
+# with this program. If not, see <http://www.gnu.org/licenses/>.
+
+import csv
+
+
+def autocasted(data, encoding=None):
+    """ Try to convert data into a specific type
+    in (int, float, str)
+    """
+    data = data.strip()
+    try:
+        return int(data)
+    except ValueError:
+        try:
+            return float(data.replace(',', '.'))
+        except ValueError:
+            if encoding:
+                return data.decode(encoding)
+            return data
+
+def sparqlquery(endpoint, query, indexes=[]):
+    """ Run the sparql query on the given endpoint, and wrap the items in the
+    indexes form. If indexes is empty, keep raw output"""
+
+    from SPARQLWrapper import SPARQLWrapper, JSON
+
+    sparql = SPARQLWrapper(endpoint)
+    sparql.setQuery(query)
+    sparql.setReturnFormat(JSON)
+    rawresults = sparql.query().convert()
+    labels = rawresults['head']['vars']
+    results = []
+
+    for raw in rawresults["results"]["bindings"]:
+        data = []
+        if not indexes:
+            data = [autocasted(raw[label]['value']) for label in labels]
+        else:
+            for ind in indexes:
+                if isinstance(ind, tuple):
+                    data.append(tuple([autocasted(raw[labels[i]]['value']) for i in ind]))
+                else:
+                    data.append(autocasted(raw[labels[ind]]['value']))
+        results.append(data)
+    return results
+
+def parsefile(filename, indexes=[], nbmax=None, delimiter='\t',
+              encoding='utf-8', field_size_limit=None):
+    """ Parse the file (read ``nbmax`` line at maximum if given). Each
+        line is splitted according ``delimiter`` and only ``indexes`` are kept
+
+        eg : The file is :
+                1, house, 12, 19, apple
+                2, horse, 21.9, 19, stramberry
+                3, flower, 23, 2.17, cherry
+
+            data = parsefile('myfile', [0, (2, 3), 4, 1], delimiter=',')
+
+            The result will be :
+            data = [[1, (12,   19), 'apple', 'house'],
+                    [2, (21.9, 19), 'stramberry', 'horse'],
+                    [3, (23,   2.17), 'cherry', 'flower']]
+
+    """
+    def formatedoutput(filename):
+        if field_size_limit:
+            csv.field_size_limit(field_size_limit)
+
+        with open(filename, 'r') as csvfile:
+            reader = csv.reader(csvfile, delimiter=delimiter)
+            for row in reader:
+                yield [autocasted(cell) for cell in row]
+
+
+    result = []
+    for ind, row in enumerate(formatedoutput(filename)):
+        data = []
+        if nbmax and ind > nbmax:
+            break
+        if not indexes:
+            data = row
+        else:
+            for ind in indexes:
+                if isinstance(ind, tuple):
+                    data.append(tuple([row[i] for i in ind]))
+                    if '' in data[-1]:
+                        data[-1] = None
+                elif row[ind]:
+                    data.append(row[ind])
+                else:
+                    data.append(None)
+
+        result.append(data)
+    return result