[dataio] Add a variable to avoid autocast in parsefile nazca-centos-version-0.5.0-1 nazca-debian-version-0.5.0-1 nazca-version-0.5.0
authorVincent Michel <vincent.michel@logilab.fr>
Tue, 08 Apr 2014 16:15:43 +0000
changeset 420 a3588d56fafe
parent 419 c8d11a09128a
child 421 1d6a374704ce
[dataio] Add a variable to avoid autocast in parsefile
test/test_dataio.py
utils/dataio.py
--- a/test/test_dataio.py	Tue Apr 08 17:44:16 2014 +0200
+++ b/test/test_dataio.py	Tue Apr 08 16:15:43 2014 +0000
@@ -108,6 +108,13 @@
                           [2, ('21.9', 19), u'stramberry', u'horse'],
                           [3, ('23', 2.17), u'cherry', u'flower']], data)
 
+        data = parsefile(path.join(TESTDIR, 'data', 'file2parse'),
+                         [0, (2, 3), 4, 1], delimiter=',', use_autocast=False)
+        self.assertEqual([['1', ('12', '19'), 'apple', 'house'],
+                          ['2', ('21.9', '19'), 'stramberry', 'horse'],
+                          ['3', ('23', '2.17'), 'cherry', 'flower']], data)
+
+
     def test_autocast(self):
         self.assertEqual(autocast('1'), 1)
         self.assertEqual(autocast('1.'), 1.)
--- a/utils/dataio.py	Tue Apr 08 17:44:16 2014 +0200
+++ b/utils/dataio.py	Tue Apr 08 16:15:43 2014 +0000
@@ -172,7 +172,8 @@
 ### FILE FUNCTIONS ############################################################
 ###############################################################################
 def parsefile(filename, indexes=None, nbmax=None, delimiter='\t',
-              encoding='utf-8', field_size_limit=None, formatopt=None):
+              encoding='utf-8', field_size_limit=None,
+              use_autocast=True, formatopt=None):
     """ Parse the file (read ``nbmax`` line at maximum if given). Each
         line is splitted according ``delimiter`` and only ``indexes`` are kept
 
@@ -212,12 +213,16 @@
         csvfile.close()
 
 
-
+    # Autocast if asked
+    if use_autocast:
+        deffunc = lambda x: autocast(x, encoding)
+    else:
+        deffunc = lambda x: x
     result = []
     indexes = indexes or []
     formatopt = formatopt or {}
     for ind, row in enumerate(formatedoutput(filename)):
-        row = [formatopt.get(i, lambda x: autocast(x, encoding))(cell)
+        row = [formatopt.get(i, deffunc)(cell)
                for i, cell in enumerate(row)]
         data = []
         if nbmax and ind > nbmax: