Add read capability: raw flatten array data of DataSets can be read draft
authorAlain Leufroy <alain.leufroy@logilab.fr>
Fri, 13 Apr 2012 21:06:08 +0200
changeset 15 981b5667158c
parent 14 f4791bdd245a
child 16 a3f3c61347c1
Add read capability: raw flatten array data of DataSets can be read :: $ mkdir /tmp/image $ python h5fs.py test/data/ex_image1.h5 /tmp/image $ echo 'Let see the FS ' $ tree /tmp/image /tmp/image/ |-- image1 `-- pallete 0 directories, 2 files $ ls -lah /tmp/image/image1 -r-------- 1 alain users 79K Jan 1 1970 /tmp/image/image $ echo 'Here what we can do' $ python >>> f = open('/tmp/image/image1') >>> f.seek(0, 2) >>> print f.tell()/1024. 78.125 >>> import struct >>> f.seek(0) >>> data = struct.unpack('80000B', f.read()) >>> len(data) 80000 >>> import numpy >>> f.seek(0) >>> arr = numpy.empty((50, 16, 20, 5), dtype='|u1') >>> f.readinto(arr.data) 80000 >>> arr[::8,2,5,2] array([0, 1, 2, 4, 5, 7, 8], dtype=uint8) >>> exit() $ echo 'It was funny, now terminate all.' $ fusermount -u /tmp/image $ rm -r /tmp/image $ echo 'it is all folk!'
h5fs.py
test/data/ex_image1.h5
test/test_H5FS.py
--- a/h5fs.py	Tue Apr 02 12:21:13 2013 +0200
+++ b/h5fs.py	Fri Apr 13 21:06:08 2012 +0200
@@ -70,12 +70,17 @@
         return h5entry
 
     @staticmethod
-    def get_stat(h5entry):
+    def get_size(h5entry):
+        '''Return the byte size of the file'''
+        arr = h5entry.value
+        return arr.itemsize * arr.size
+
+    def get_stat(self, h5entry):
         '''Return the entry stat information (an instance of Stat)'''
         if isinstance(h5entry, h5py.highlevel.Group):
             return GroupStat()
         if isinstance(h5entry, h5py.highlevel.Dataset):
-            return DatasetStat()
+            return DatasetStat(st_size=self.get_size(h5entry))
         raise H5IOError(errno.ENOENT, 'Unknown entry type', h5entry.name)
 
     @staticmethod
@@ -84,11 +89,23 @@
         # XXX unicode seems not to be allowed
         return str(osp.basename(h5entry.name))
 
+    @staticmethod
+    def get_data(h5entry, start, end):
+        return h5entry.value.data[start:end]
+
     def list_group(self, group, specs=('.', '..')):
         '''A fonction generator that yields name of the entries in the group.
         The list is in arbritary order with specials entries ``specs``.'''
         return chain(iter(specs), imap(self.get_name, group.itervalues()))
 
+    @staticmethod
+    def openable(h5entry):
+        '''raise H5IOError if not h5entry could not be readable'''
+        if not isinstance(h5entry, h5py.highlevel.Dataset):
+            raise H5IOError(errno.EISDIR, 'Is a Group', h5entry.name)
+
+# ===
+
     def readdir(self, path, offset):
         '''Return a generator that yields entries from the given Group path'''
         convert = partial(fuse.Direntry, offset=offset)
@@ -98,6 +115,21 @@
         '''Return the stat attributes of the given ``path``'''
         return self.get_stat(self.get_entry(path))
 
+    def open(self, path, flags):
+        '''Raise H%IOError if no open is not allowed.
+
+        XXX: readonly for now
+        '''
+        if (flags & os.O_RDONLY) != os.O_RDONLY:
+            raise H5IOError(errno.EACCES, 'Could not open', path)
+        return self.openable(self.get_entry(path))
+
+    def read(self, path, size, offset):
+        h5entry = self.get_entry(path)
+        if isinstance(h5entry, h5py.highlevel.Group):
+            raise H5IOError(errno.EISDIR, 'Is a Group', path)
+        return self.get_data(h5entry, offset, offset + size)
+
 
 def __main__():
     '''Main entry point that start the HDF5 filesystem server'''
Binary file test/data/ex_image1.h5 has changed
--- a/test/test_H5FS.py	Tue Apr 02 12:21:13 2013 +0200
+++ b/test/test_H5FS.py	Fri Apr 13 21:06:08 2012 +0200
@@ -1,7 +1,9 @@
+import os
 import os.path as osp
 import tempfile
 import errno
 import h5py
+import numpy as N
 
 from unittest import TestCase as TC, main
 from logilab.common.testlib import within_tempdir
@@ -27,7 +29,10 @@
     @with_h5file
     def wrapped(self, h5file):
         h5file.create_group(u'sam')
-        h5file.create_dataset(u'melu', data=xrange(12), shape=(4, 3), dtype='u8')
+        data = N.array([(-1, 4.0, 'Hello'), (2, 6.0, 'World')],
+                       dtype=[('f0', '>u8'), ('f1', '>f4'), ('f2', '|S7')])
+        h5file.create_dataset(u'melu', data=data)
+            # data=xrange(12), shape=(4, 3), dtype='u4')
         fsmx = h5fs.FsMixin()
         fsmx.h5file = h5file
         return func(self, fsmx)
@@ -35,6 +40,11 @@
 
 
 class FsMixin_TC(TC):
+
+    @with_fsmixin
+    def test_get_dataset_size(self, fsmx):
+        self.assertEqual(fsmx.get_size(fsmx.h5file['melu']), 38)
+
     @with_fsmixin
     def test_get_stat_with_group(self, fsmx):
         group = fsmx.h5file['sam']
@@ -43,12 +53,34 @@
     @with_fsmixin
     def test_get_stat_with_dataset(self, fsmx):
         dataset = fsmx.h5file['melu']
-        self.assertTrue(isinstance(fsmx.get_stat(dataset), h5fs.DatasetStat))
+        stat = fsmx.get_stat(dataset)
+        self.assertTrue(isinstance(stat, h5fs.DatasetStat))
+        self.assertEqual(stat.st_size, 38)
 
     @with_fsmixin
     def test_get_name(self, fsmx):
         dataset = fsmx.h5file['melu']
-        self.assertEqual(FSM.get_name(dataset), 'melu')
+        self.assertEqual(fsmx.get_name(dataset), 'melu')
+
+    @with_fsmixin
+    def test_get_data(self, fsmx):
+        self.assertSequenceEqual(fsmx.get_data(fsmx.h5file['melu'], 7, 19),
+                                 '\xff@\x80\x00\x00Hello\x00\x00')
+
+    @with_fsmixin
+    def test_list_group(self, fsmx):
+        result = set(fsmx.list_group(fsmx.h5file['/'], ('alain',)))
+        self.assertSetEqual(result, set(['sam', 'melu', 'alain']))
+
+    @with_fsmixin
+    def test_openable_wrong(self, fsmx):
+        self.assertRaises(h5fs.H5IOError, fsmx.openable, fsmx.h5file['sam'])
+
+    @with_fsmixin
+    def test_openable_ok(self, fsmx):
+        fsmx.openable(fsmx.h5file['melu']) # no H5IOError raised
+
+# =======
 
     @with_fsmixin
     def test_readdir(self, fsmx):
@@ -70,6 +102,33 @@
     def test_getattr_wrong(self, fsmx):
         self.assertRaises(h5fs.H5IOError, fsmx.getattr, u'/wrong data path')
 
+    @with_fsmixin
+    def test_open_wrong_path(self, fsmx):
+        self.assertRaises(h5fs.H5IOError, fsmx.open, u'/wrong data path',
+                          os.O_RDONLY)
+
+    @with_fsmixin
+    def test_open_wrong_write(self, fsmx): # XXX read-only for now
+        self.assertRaises(h5fs.H5IOError, fsmx.open, u'/wrong data path',
+                          os.O_WRONLY)
+
+    @with_fsmixin
+    def test_open_ok(self, fsmx):
+        fsmx.open(u'melu', os.O_RDONLY) # no H5IOError raised
+
+    @with_fsmixin
+    def test_read_wrong_noent(self, fsmx):
+        self.assertRaises(h5fs.H5IOError, fsmx.read, u'/wrong data path', 0, 10)
+
+    @with_fsmixin
+    def test_read_wrong_isdir(self, fsmx):
+        self.assertRaises(h5fs.H5IOError, fsmx.read, u'/sam', 0, 10)
+
+    @with_fsmixin
+    def test_read(self, fsmx):
+        self.assertSequenceEqual(fsmx.read(u'melu', 12, 7),
+                                 '\xff@\x80\x00\x00Hello\x00\x00')
+
 
 if __name__ == '__main__':
     main()