[entities,hooks] Add a complete support for resources
authorDenis Laxalde <denis.laxalde@logilab.fr>
Thu, 18 Dec 2014 11:41:18 +0100
changeset 40 d7f97a999d12
parent 20 e7ff8cdf0adf
child 41 c7556306b72d
[entities,hooks] Add a complete support for resources In particular, rely on a specific adapter and a `ckan_resource_id` attribute similarly to dataset-like entity types. This allows to completely support file upload and update on CKAN instance (though we always upload data upon metadata modifications). Closes #4753964.
entities.py
hooks.py
test/data/entities.py
test/data/schema.py
test/unittest_hooks.py
utils.py
--- a/entities.py	Tue Dec 09 16:28:26 2014 +0100
+++ b/entities.py	Thu Dec 18 11:41:18 2014 +0100
@@ -18,8 +18,9 @@
 
 import re
 import unicodedata
+from urllib2 import urlopen
 
-from cubicweb.predicates import relation_possible
+from cubicweb.predicates import relation_possible, adaptable
 from cubicweb.view import EntityAdapter
 
 from cubes.ckanpublish.utils import ckan_post
@@ -39,9 +40,9 @@
     return re.sub('[-\s]+', '-', value)
 
 
-class CKANPublishableAdapter(EntityAdapter):
+class CKANDatasetAdapter(EntityAdapter):
     """Adapter for entity that can be mapped to a CKAN dataset"""
-    __regid__ = 'ICKANPublishable'
+    __regid__ = 'ICKANDataset'
     __abstract__ = True
     __select__ = (EntityAdapter.__select__ &
                   relation_possible('ckan_dataset_id', role='subject'))
@@ -128,30 +129,53 @@
         """
         return None
 
-    def dataset_resources(self):
-        """May return a list of entities adaptable as IDownloadable to be set
-        as resources of the CKAN dataset.
+
+class CKANResourceAdapter(EntityAdapter):
+    """Adapter for entity that can be mapped to a CKAN resource"""
+    __regid__ = 'ICKANResource'
+    __abstract__ = True
+    __select__ = (EntityAdapter.__select__ &
+                  relation_possible('ckan_resource_id', role='subject'))
+
+    @property
+    def dataset(self):
+        """The dataset-like entity (adaptable as ICKANDataset) associated with
+        this resource.
         """
-        return []
+        raise NotImplementedError()
+
+    def ckan_metadata(self):
+        """Return a dict of metadata about the resource"""
+        metadata = {
+            'created': self.entity.creation_date.isoformat(),
+            'last_modified': self.entity.modification_date.isoformat(),
+        }
+        return metadata
 
-    def ckan_resources(self):
-        """Yield dicts of CKAN dataset resource info"""
-        resources_url = self.ckan_package_resources()
-        for entity in self.dataset_resources():
-            adapted = entity.cw_adapt_to('IDownloadable')
-            if adapted is None:
-                self.warning(
-                    'invalid resource %r, could not adapt to IDownloadable',
-                    entity)
-                continue
-            url = adapted.download_url()
-            if url in resources_url:
-                self.info('skipping resource %s and it seems to be already '
-                          'on CKAN side', entity)
-                continue
-            yield {'url': url,
-                   'name': adapted.download_file_name(),
-                   'mimetype': adapted.download_content_type(),
-                   'created': str(entity.creation_date),
-                   'last_modified': str(entity.modification_date),
-                  }
+    def read(self):
+        """Read resource content (file-like interface)."""
+        raise NotImplementedError()
+
+
+class DownloadableCKANResourceAdapter(CKANResourceAdapter):
+    """Adapter for downloadable entities that can be mapped to a CKAN
+    resource.
+    """
+    __abstract__ = True
+    __select__ = CKANResourceAdapter.__select__ & adaptable('IDownloadable')
+
+    def ckan_metadata(self):
+        """Basic metadata extended with IDownloadable"""
+        metadata = super(DownloadableCKANResourceAdapter, self).ckan_metadata()
+        idownload = self.entity.cw_adapt_to('IDownloadable')
+        metadata.update(
+            {'name': idownload.download_file_name(),
+             'mimetype': idownload.download_content_type(),
+            }
+        )
+        return metadata
+
+    def read(self):
+        """Read content using IDownloadable adapter from URL."""
+        idownload = self.entity.cw_adapt_to('IDownloadable')
+        return urlopen(idownload.download_url())
--- a/hooks.py	Tue Dec 09 16:28:26 2014 +0100
+++ b/hooks.py	Thu Dec 18 11:41:18 2014 +0100
@@ -26,10 +26,10 @@
                                      ckan_instance_configured)
 
 
-def _ckan_action(config, eid, action, data=None):
+def _ckan_action(config, eid, action, **kwargs):
     """Run `ckan_post` and eventually raise ValidationError."""
     try:
-        return ckan_post(config, action, data=data)
+        return ckan_post(config, action, **kwargs)
     except (CKANPostError, RequestException) as exc:
         raise ValidationError(eid, {'ckan_dataset_id': unicode(exc)})
 
@@ -38,7 +38,7 @@
     """Create a CKAN dataset and set `ckan_dataset_id` attribute or
     respective entity. Return the dataset id.
     """
-    res = _ckan_action(config, eid, 'package_create', data)
+    res = _ckan_action(config, eid, 'package_create', data=data)
     return res['id']
 
 
@@ -54,28 +54,31 @@
     _ckan_action(config, eid, 'package_delete', data={'id': datasetid})
 
 
-def add_dataset_resource(config, eid, datasetid, resource_data):
+def create_dataset_resource(config, eid, datasetid, metadata, data):
     """Add a resource to an existing CKAN dataset"""
-    resource_data['package_id'] = datasetid
-    return _ckan_action(config, eid, 'resource_create', data=resource_data)
+    metadata['package_id'] = datasetid
+    res = _ckan_action(config, eid, 'resource_create', data=metadata,
+                       files=[('upload', data)])
+    return res['id']
 
 
-def delete_dataset_resources(config, eid, datasetid):
-    """Delete resources of a CKAN dataset"""
-    res = _ckan_action(config, eid, 'package_show', data={'id': datasetid})
-    resources = res['resources']
-    deleted = set([])
-    for resource in resources:
-        _ckan_action(config, eid, 'resource_delete', {'id': resource['id']})
-        deleted.add(resource['id'])
-    return deleted
+def update_dataset_resource(config, eid, resourceid, metadata, data):
+    """Update an existing CKAN resource."""
+    metadata['id'] = resourceid
+    _ckan_action(config, eid, 'resource_update', data=metadata,
+                 files=[('upload', data)])
+
+
+def delete_dataset_resource(config, eid, resourceid):
+    """Delete a CKAN resource"""
+    _ckan_action(config, eid, 'resource_delete', data={'id': resourceid})
 
 
 class DeleteCKANDataSetHook(hook.Hook):
     """Delete CKAN dataset upon deletion of the corresponding entity"""
     __regid__ = 'ckanpublish.delete-ckan-dataset'
     __select__ = (hook.Hook.__select__ & ckan_instance_configured &
-                  adaptable('ICKANPublishable') &
+                  adaptable('ICKANDataset') &
                   score_entity(lambda x: x.ckan_dataset_id))
     events = ('before_delete_entity', )
 
@@ -87,7 +90,7 @@
     """Add or update a CKAN dataset upon addition or update of an entity"""
     __regid__ = 'ckanpublish.add-update-ckan-dataset'
     __select__ = (hook.Hook.__select__ & ckan_instance_configured &
-                  adaptable('ICKANPublishable'))
+                  adaptable('ICKANDataset'))
     events = ('after_add_entity', 'after_update_entity', )
 
     def __call__(self):
@@ -103,12 +106,10 @@
             datasetid = entity.ckan_dataset_id
             config = self.cnx.vreg.config
             if self.cnx.deleted_in_transaction(eid):
-                deleted = delete_dataset_resources(config, eid, datasetid)
-                self.info('deleted CKAN resources %s', ', '.join(deleted))
                 delete_dataset(config, eid, datasetid)
                 self.info('deleted CKAN dataset %s', datasetid)
             else:
-                cpublish = entity.cw_adapt_to('ICKANPublishable')
+                cpublish = entity.cw_adapt_to('ICKANDataset')
                 data = cpublish.ckan_data()
                 if datasetid is not None:
                     update_dataset(config, eid, datasetid, data)
@@ -120,8 +121,60 @@
                         'SET X ckan_dataset_id %(dsid)s WHERE X eid %(eid)s',
                         {'eid': eid, 'dsid': datasetid})
                     self.info('created CKAN dataset %s', datasetid)
-                for resource_data in cpublish.ckan_resources():
-                    resource_id = add_dataset_resource(config, eid, datasetid,
-                                                       resource_data)
-                    self.info('add resource %s to CKAN dataset %s' %
-                              (resource_id, datasetid))
+
+
+class DeleteCKANResourceHook(hook.Hook):
+    """Delete CKAN resource upon deletion of the corresponding entity"""
+    __regid__ = 'ckanpublish.delete-ckan-resource'
+    __select__ = (hook.Hook.__select__ & ckan_instance_configured &
+                  adaptable('ICKANResource') &
+                  score_entity(lambda x: x.ckan_resource_id))
+    events = ('before_delete_entity', )
+
+    def __call__(self):
+        CKANResourceOp.get_instance(self._cw).add_data(self.entity.eid)
+
+
+class AddOrUpdateCKANResourceHook(hook.Hook):
+    """Add or update a CKAN resource upon addition or update of an entity"""
+    __regid__ = 'ckanpublish.add-update-ckan-resource'
+    __select__ = (hook.Hook.__select__ & ckan_instance_configured &
+                  adaptable('ICKANResource'))
+    events = ('after_add_entity', 'after_update_entity', )
+
+    def __call__(self):
+        CKANResourceOp.get_instance(self._cw).add_data(self.entity.eid)
+
+
+class CKANResourceOp(hook.DataOperationMixIn, hook.Operation):
+    """Operation to create, update or delete a CKAN resource"""
+
+    def precommit_event(self):
+        for eid in self.get_data():
+            entity = self.cnx.entity_from_eid(eid)
+            resourceid = entity.ckan_resource_id
+            iresource = entity.cw_adapt_to('ICKANResource')
+            config = self.cnx.vreg.config
+            if self.cnx.deleted_in_transaction(eid) and resourceid is not None:
+                delete_dataset_resource(config, eid, resourceid)
+                self.info('deleted resource %s', resourceid)
+            else:
+                metadata = iresource.ckan_metadata()
+                data = iresource.read()
+                if resourceid is None:
+                    dataset = iresource.dataset
+                    assert dataset, 'no dataset for resource #%d' % eid
+                    if not dataset.ckan_dataset_id:
+                        self.error('skipping resource #%d as its dataset %#d is '
+                                   'not in the CKAN instance', eid, dataset.eid)
+                        continue
+                    resourceid = create_dataset_resource(
+                        config, eid, dataset.ckan_dataset_id, metadata, data)
+                    self.cnx.execute(
+                        'SET X ckan_resource_id %(rid)s WHERE X eid %(eid)s',
+                        {'eid': eid, 'rid': resourceid})
+                    self.info('added resource %s', resourceid)
+                else:
+                    update_dataset_resource(
+                        config, eid, resourceid, metadata, data)
+                    self.info('updated resource %s', resourceid)
--- a/test/data/entities.py	Tue Dec 09 16:28:26 2014 +0100
+++ b/test/data/entities.py	Thu Dec 18 11:41:18 2014 +0100
@@ -1,14 +1,23 @@
-from cubicweb.predicates import is_instance
+from cubicweb.predicates import is_instance, relation_possible
 
-from cubes.ckanpublish.entities import CKANPublishableAdapter
+from cubes.ckanpublish.entities import (CKANDatasetAdapter,
+                                        DownloadableCKANResourceAdapter)
 
 
-class CWDataSetCKANPublish(CKANPublishableAdapter):
-    __select__ = CKANPublishableAdapter.__select__ & is_instance('CWDataSet')
+class CWDatasetCKANDatasetAdapter(CKANDatasetAdapter):
+    __select__ = CKANDatasetAdapter.__select__ & is_instance('CWDataSet')
 
     def dataset_maintainer(self):
         if self.entity.maintainer:
             return self.entity.maintainer[0]
 
-    def dataset_resources(self):
-        return self.entity.resources
+
+class FileCKANResourceAdapter(DownloadableCKANResourceAdapter):
+    __select__ = (DownloadableCKANResourceAdapter.__select__ &
+                  is_instance('File') &
+                  relation_possible('resources', role='object'))
+
+    @property
+    def dataset(self):
+        if self.entity.reverse_resources:
+            return self.entity.reverse_resources[0]
--- a/test/data/schema.py	Tue Dec 09 16:28:26 2014 +0100
+++ b/test/data/schema.py	Thu Dec 18 11:41:18 2014 +0100
@@ -11,3 +11,11 @@
         )
     maintainer = SubjectRelation('CWUser', cardinality='?*')
     resources = SubjectRelation('File', cardinality='*?', composite='subject')
+
+
+class ckan_resource_id(RelationDefinition):
+    __permissions__={'read': ('managers', 'users', 'guests'),
+                     'add': (),
+                     'update': ()}
+    subject = 'File'
+    object  = 'String'
--- a/test/unittest_hooks.py	Tue Dec 09 16:28:26 2014 +0100
+++ b/test/unittest_hooks.py	Thu Dec 18 11:41:18 2014 +0100
@@ -29,24 +29,24 @@
 
     def tearDown(self):
         with self.admin_access.repo_cnx() as cnx:
-            # Delete Table linked to a CKAN dataset, so that the latter gets
-            # deleted.
+            # Delete entities linked to a CKAN dataset as well as their
+            # resources, so that the CKAN dataset and resources get deleted.
             # However, datasets will still have to be purge from the web ui.
             cnx.execute('DELETE CWDataSet X WHERE EXISTS(X ckan_dataset_id I)')
+            cnx.execute('DELETE File X WHERE EXISTS(X ckan_resource_id I)')
             cnx.commit()
         super(CKANPublishHooksTC, self).tearDown()
 
-    def test_entity_creation(self):
+    def test_dataset(self):
         with self.admin_access.repo_cnx() as cnx:
             entity = cnx.create_entity('CWDataSet', name=u'buz buz ?!',
                                        description=u'opendata buzzzz')
             cnx.commit()
-            yield self._check_entity_create, cnx, entity
-            yield self._check_entity_update, cnx, entity
-            yield self._check_entity_resources, cnx, entity
-            yield self._check_entity_delete, cnx, entity
+            yield self._check_dataset_create, cnx, entity
+            yield self._check_dataset_update, cnx, entity
+            yield self._check_dataset_delete, cnx, entity
 
-    def _check_entity_create(self, cnx, entity):
+    def _check_dataset_create(self, cnx, entity):
         self.set_description('entity creation')
         self.assertIsNotNone(entity.ckan_dataset_id)
         result = ckan_post(self.ckan_config, 'package_show',
@@ -54,12 +54,12 @@
         self.assertEqual(result['name'], '%s-buz-buz' % entity.eid)
         self.assertEqual(result['title'], entity.name)
         self.assertEqual(result['notes'], entity.description)
-        cpublish = entity.cw_adapt_to('ICKANPublishable')
+        cpublish = entity.cw_adapt_to('ICKANDataset')
         organization_id = cpublish.ckan_get_organization_id(
             self.dataset_owner_org)
         self.assertEqual(result['owner_org'], organization_id)
 
-    def _check_entity_update(self, cnx, entity):
+    def _check_dataset_update(self, cnx, entity):
         self.set_description('entity update')
         entity.cw_set(description=u'no this is actually serious')
         cnx.commit()
@@ -75,40 +75,58 @@
         self.assertEqual(result['maintainer'], 'T. Oto')
         self.assertEqual(result['maintainer_email'], 'to@t.o')
 
-    def _check_entity_resources(self, cnx, entity):
-        self.set_description('entity resources')
-        resource = cnx.create_entity('File', data=Binary('yui'),
-                                     data_format=u'text/plain',
-                                     data_name=u'blurp',
-                                     reverse_resources=entity)
-        cnx.commit()
-        result = ckan_post(self.ckan_config, 'package_show',
-                           {'id': entity.ckan_dataset_id})
-        resources = result['resources']
-        self.assertEqual(len(resources), 1)
-        r0 = resources[0]
-        iresource = resource.cw_adapt_to('IDownloadable')
-        self.assertEqual(r0['url'], iresource.download_url())
-        # Update the entity (should trigger another push to CKAN, but no new
-        # resource).
-        entity.cw_set(description=u'blurp')
-        cnx.commit()
-        result = ckan_post(self.ckan_config, 'package_show',
-                           {'id': entity.ckan_dataset_id})
-        resources = result['resources']
-        self.assertEqual(len(resources), 1)
-
-    def _check_entity_delete(self, cnx, entity):
+    def _check_dataset_delete(self, cnx, entity):
         self.set_description('entity deletion')
         ckanid = entity.ckan_dataset_id
-        result = ckan_post(self.ckan_config, 'package_show',
-                           {'id': ckanid})
-        resource_id = result['resources'][0]['id']
         entity.cw_delete()
         cnx.commit()
         result = ckan_post(self.ckan_config, 'package_show',
                            {'id': ckanid})
         self.assertEqual(result['state'], 'deleted')
+
+    def test_resources(self):
+        with self.admin_access.repo_cnx() as cnx:
+            dataset = cnx.create_entity('CWDataSet', name=u'blurp',
+                                        description=u'flop')
+            resource = cnx.create_entity('File', data=Binary('yui'),
+                                         data_format=u'text/plain',
+                                         data_name=u'blurp',
+                                         reverse_resources=dataset)
+            cnx.commit()
+            yield self._check_resource_creation, cnx, dataset, resource
+            yield self._check_resource_update, cnx, resource
+            yield self._check_resource_delete, cnx, dataset, resource
+
+    def _check_resource_creation(self, cnx, dataset, resource):
+        self.set_description('resource creation')
+        self.assertIsNotNone(resource.ckan_resource_id)
+        result = ckan_post(self.ckan_config, 'package_show',
+                           {'id': dataset.ckan_dataset_id})
+        resources = result['resources']
+        self.assertEqual(len(resources), 1)
+        result = ckan_post(self.ckan_config, 'resource_show',
+                           {'id': resource.ckan_resource_id})
+        self.assertEqual(result['created'],
+                         resource.creation_date.isoformat())
+        self.assertEqual(result['last_modified'],
+                         resource.modification_date.isoformat())
+        self.assertEqual(result['name'], u'blurp')
+        self.assertEqual(result['mimetype'], 'text/plain')
+
+    def _check_resource_update(self, cnx, resource):
+        self.set_description('resource update')
+        resource.cw_set(data_name=u'gloups')
+        cnx.commit()
+        result = ckan_post(self.ckan_config, 'resource_show',
+                           {'id': resource.ckan_resource_id})
+        self.assertEqual(result['name'], u'gloups')
+
+    def _check_resource_delete(self, cnx, dataset, resource):
+        self.set_description('resource deletion')
+        ckanid = dataset.ckan_dataset_id
+        resource_id = resource.ckan_resource_id
+        resource.cw_delete()
+        cnx.commit()
         result = ckan_post(self.ckan_config, 'resource_show',
                            {'id': resource_id})
         self.assertEqual(result['state'], 'deleted')
--- a/utils.py	Tue Dec 09 16:28:26 2014 +0100
+++ b/utils.py	Thu Dec 18 11:41:18 2014 +0100
@@ -27,14 +27,16 @@
     """CKAN post action error"""
 
 
-def ckan_post(config, action, data=None):
+def ckan_post(config, action, data=None, files=None):
     base = config['ckan-baseurl']
     if not base.endswith('/'):
         base += '/'
     url = urljoin(base, 'api/3/action/' + action)
-    headers = {'Authorization': config['ckan-api-key'],
-               'Content-Type': 'application/json'}
-    resp = requests.post(url, headers=headers, data=json.dumps(data or {}))
+    headers = {'Authorization': config['ckan-api-key']}
+    if files is None:
+        data = json.dumps(data or {})
+        headers['Content-Type'] = 'application/json'
+    resp = requests.post(url, headers=headers, data=data, files=files)
     try:
         jresp = resp.json()
     except ValueError:
@@ -43,7 +45,11 @@
         if resp.ok:
             return jresp['result']
         else:
-            error = jresp['error']
+            try:
+                error = jresp['error']
+            except TypeError:
+                # Sometimes, jresp is not as dict.
+                error = jresp
     raise CKANPostError('action %s failed: %s' % (action, error))