Implement CKANPublishable adapter and entity creation and update hooks
authorDenis Laxalde <denis.laxalde@logilab.fr>
Fri, 17 Oct 2014 10:39:50 +0200
changeset 7 6e1d41bacacf
parent 0 14cd4f8f6aa2
child 12 183145b6bee7
Implement CKANPublishable adapter and entity creation and update hooks Essentially move code from an application cube and add tests for hooks. Closes #4502966.
README
__pkginfo__.py
cubicweb-ckanpublish.spec
debian/control
entities.py
hooks.py
site_cubicweb.py
test/data/entities.py
test/data/hooks.py
test/data/schema.py
test/unittest_hooks.py
utils.py
--- a/README	Wed Oct 15 16:49:01 2014 +0200
+++ b/README	Fri Oct 17 10:39:50 2014 +0200
@@ -1,3 +1,23 @@
 Summary
 -------
-Publish data to a CKAN instance
+This cube enables data publishing to a CKAN opendata portal.
+
+
+Configuration
+-------------
+
+The cube essentially provides an adapter ``ICKANPublishable`` which relies on
+a ``ckan_dataset_id`` attribute on the adapted entity. As its name suggests,
+the latter is used to relate a CubicWeb entity to a CKAN dataset (the
+fundamental entity type in CKAN data model). Most of the synchronization to
+the CKAN instance is done in hooks so usually one would set tight security
+rules for this attribute, e.g.:
+::
+
+    class MyEntity(EntityType):
+        ckan_dataset_id = String(
+            description=_('identifier of corresponding CKAN dataset'),
+            __permissions__={'read': ('managers', 'users', 'guests'),
+                             'add': (),
+                             'update': ()},
+            )
--- a/__pkginfo__.py	Wed Oct 15 16:49:01 2014 +0200
+++ b/__pkginfo__.py	Fri Oct 17 10:39:50 2014 +0200
@@ -13,7 +13,9 @@
 description = 'Publish data to a CKAN instance'
 web = 'http://www.cubicweb.org/project/%s' % distname
 
-__depends__ =  {'cubicweb': '>= 3.19.5'}
+__depends__ =  {'cubicweb': '>= 3.19.5',
+                'requests': None,
+               }
 __recommends__ = {}
 
 classifiers = [
--- a/cubicweb-ckanpublish.spec	Wed Oct 15 16:49:01 2014 +0200
+++ b/cubicweb-ckanpublish.spec	Fri Oct 17 10:39:50 2014 +0200
@@ -21,6 +21,7 @@
 
 BuildRequires:  %{python} %{python}-setuptools
 Requires:       cubicweb >= 3.19.5
+Requires:       %{python}-requests
 
 %description
 Publish data to a CKAN instance
--- a/debian/control	Wed Oct 15 16:49:01 2014 +0200
+++ b/debian/control	Fri Oct 17 10:39:50 2014 +0200
@@ -10,6 +10,7 @@
 Architecture: all
 Depends:
  cubicweb-common (>= 3.19.5),
+ python-requests,
  ${python:Depends},
  ${misc:Depends},
 Description: Publish data to a CKAN instance
--- a/entities.py	Wed Oct 15 16:49:01 2014 +0200
+++ b/entities.py	Fri Oct 17 10:39:50 2014 +0200
@@ -15,3 +15,101 @@
 # with this program. If not, see <http://www.gnu.org/licenses/>.
 
 """cubicweb-ckanpublish entity's classes"""
+
+import re
+import unicodedata
+
+from cubicweb.predicates import relation_possible
+from cubicweb.view import EntityAdapter
+
+from cubes.ckanpublish.utils import ckan_post
+
+
+def slugify(value):
+    """Converts to lowercase, removes non-word characters (alphanumerics and
+    underscores) and converts spaces to hyphens. Also strips leading and
+    trailing whitespace.
+
+    Adapted from django.utils.text and novaclient.utils.
+    """
+    if not isinstance(value, unicode):
+        value = unicode(value)
+    value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore')
+    value = unicode(re.sub('[^\w\s-]', '', value).strip().lower())
+    return re.sub('[-\s]+', '-', value)
+
+
+class CKANPublishableAdapter(EntityAdapter):
+    """Adapter for entity that can be mapped to a CKAN dataset"""
+    __regid__ = 'ICKANPublishable'
+    __select__ = (EntityAdapter.__select__ &
+                  relation_possible('ckan_dataset_id', role='subject'))
+
+    @property
+    def ckan_name(self):
+        """name field suitable for CKAN (must be unique, hence the eid prefix)
+        """
+        mainattr = self.entity.e_schema.main_attribute().type
+        name = getattr(self.entity, mainattr)
+        return str(self.entity.eid) + '-' + slugify(name).lower()
+
+    def ckan_notes(self):
+        """Build the CKAN dataset notes attribute
+
+        This contains the entity description along with the list of related
+        columns
+        """
+        if 'description' not in self.entity.e_schema.subject_relations():
+            return
+        # XXX CKAN uses Markdown syntax for notes field.
+        return self.entity.printable_value('description', format='text/plain')
+
+    def ckan_get_organization_id(self, orgname):
+        """Retrieve the ID of an organization given its name"""
+        data = {'organizations': [orgname],
+                'all_fields': True}
+        res = ckan_post(self._cw.vreg.config, 'organization_list', data)
+        if res:
+            return res[0]['id']
+        else:
+            raise Exception('no organization named %s in CKAN instance' %
+                            orgname)
+
+    def dataset_title(self):
+        """Title of the CKAN dataset"""
+        return self.entity.dc_title()
+
+    def ckan_data(self):
+        """Return a dict with all data to build a CKAN dataset from entity"""
+        data = {'name': self.ckan_name,
+                'title': self.dataset_title(),
+                'notes': self.ckan_notes(),
+                'maintainer': None,
+                'maintainer_email': None,
+               }
+        orgname = self._cw.vreg.config.get('ckan-organization')
+        data['owner_org'] = self.ckan_get_organization_id(orgname)
+        maintainer = self.dataset_maintainer()
+        if maintainer:
+            data['maintainer'] = maintainer.dc_long_title()
+            if maintainer.primary_email:
+                data['maintainer_email'] = maintainer.primary_email[0].address
+        data['tags'] = list(self.dataset_tags())
+        data['extras'] = list(self.dataset_extras())
+        return data
+
+    def dataset_extras(self):
+        """Extra fields for the dataset"""
+        return []
+
+    def dataset_tags(self):
+        """Yield tag data for entity"""
+        if self.entity.e_schema.has_relation('tags', role='object'):
+            for tag in self.entity.reverse_tags:
+                yield {'name': slugify(tag.name)}
+
+    def dataset_maintainer(self):
+        """May return a CWUser entity corresponding to the maintainer of
+        dataset-like entity.
+        """
+        return None
--- a/hooks.py	Wed Oct 15 16:49:01 2014 +0200
+++ b/hooks.py	Fri Oct 17 10:39:50 2014 +0200
@@ -15,3 +15,89 @@
 # with this program. If not, see <http://www.gnu.org/licenses/>.
 
 """cubicweb-ckanpublish specific hooks and operations"""
+
+from requests.exceptions import RequestException
+
+from cubicweb import ValidationError
+from cubicweb.predicates import adaptable, score_entity
+from cubicweb.server import hook
+
+from cubes.ckanpublish.utils import (ckan_post, CKANPostError,
+                                     ckan_instance_configured)
+
+
+def create_dataset(config, eid, data):
+    """Create a CKAN dataset and set `ckan_dataset_id` attribute or
+    respective entity. Return the dataset id.
+    """
+    try:
+        res = ckan_post(config, 'package_create', data)
+        return res['id']
+    except (CKANPostError, RequestException) as exc:
+        raise ValidationError(eid, {'ckan_dataset_id': unicode(exc)})
+
+
+def update_dataset(config, eid, datasetid, udata):
+    """Update an existing CKAN dataset"""
+    try:
+        data = ckan_post(config, 'package_show', {'id': datasetid})
+        data.update(udata)
+        ckan_post(config, 'package_update', data)
+    except (CKANPostError, RequestException) as exc:
+        raise ValidationError(eid, {'ckan_dataset_id': unicode(exc)})
+
+
+def delete_dataset(config, eid, datasetid):
+    """Delete a CKAN dataset"""
+    try:
+        ckan_post(config, 'package_delete', {'id': datasetid})
+    except (CKANPostError, RequestException) as exc:
+        raise ValidationError(eid, {'ckan_dataset_id': unicode(exc)})
+
+
+class DeleteCKANDataSetHook(hook.Hook):
+    """Delete CKAN dataset upon deletion of the corresponding entity"""
+    __regid__ = 'ckanpublish.delete-ckan-dataset'
+    __select__ = (hook.Hook.__select__ & ckan_instance_configured &
+                  adaptable('ICKANPublishable') &
+                  score_entity(lambda x: x.ckan_dataset_id))
+    events = ('before_delete_entity', )
+
+    def __call__(self):
+        CKANDatasetOp.get_instance(self._cw).add_data(self.entity.eid)
+
+
+class AddOrUpdateCKANDataSetHook(hook.Hook):
+    """Add or update a CKAN dataset upon addition or update of an entity"""
+    __regid__ = 'ckanpublish.add-update-ckan-dataset'
+    __select__ = (hook.Hook.__select__ & ckan_instance_configured &
+                  adaptable('ICKANPublishable'))
+    events = ('after_add_entity', 'after_update_entity', )
+
+    def __call__(self):
+        CKANDatasetOp.get_instance(self._cw).add_data(self.entity.eid)
+
+
+class CKANDatasetOp(hook.DataOperationMixIn, hook.Operation):
+    """Operation to create, update or delete a CKAN dataset"""
+
+    def precommit_event(self):
+        for eid in self.get_data():
+            entity = self.cnx.entity_from_eid(eid)
+            datasetid = entity.ckan_dataset_id
+            config = self.cnx.vreg.config
+            if self.cnx.deleted_in_transaction(eid):
+                delete_dataset(config, eid, datasetid)
+                self.info('deleted CKAN dataset %s', datasetid)
+            else:
+                data = entity.cw_adapt_to('ICKANPublishable').ckan_data()
+                if datasetid is not None:
+                    update_dataset(config, eid, datasetid, data)
+                    self.info('updated %s fields in CKAN dataset %s',
+                              data.keys(), datasetid)
+                else:
+                    datasetid = create_dataset(config, eid, data)
+                    self.cnx.execute(
+                        'SET X ckan_dataset_id %(dsid)s WHERE X eid %(eid)s',
+                        {'eid': eid, 'dsid': datasetid})
+                    self.info('created CKAN dataset %s', datasetid)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/site_cubicweb.py	Fri Oct 17 10:39:50 2014 +0200
@@ -0,0 +1,23 @@
+from logilab.common.configuration import REQUIRED
+
+options = (
+    ('ckan-baseurl',
+     {'type' : 'string',
+      'default': REQUIRED,
+      'help': u'base url of the CKAN instance to push data to',
+      'group': 'ckan', 'level': 0,
+      }),
+    ('ckan-api-key',
+     {'type' : 'string',
+      'default': REQUIRED,
+      'help': u'an API key for the CKAN instance',
+      'group': 'ckan', 'level': 0,
+      }),
+    ('ckan-organization',
+     {'type' : 'string',
+      'default': REQUIRED,
+      'help': u'the organization under which dataset will be created',
+      'group': 'ckan', 'level': 0,
+      }),
+)
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/data/entities.py	Fri Oct 17 10:39:50 2014 +0200
@@ -0,0 +1,11 @@
+from cubicweb.predicates import is_instance
+
+from cubes.ckanpublish.entities import CKANPublishableAdapter
+
+
+class CWDataSetCKANPublish(CKANPublishableAdapter):
+    __select__ = CKANPublishableAdapter.__select__ & is_instance('CWDataSet')
+
+    def dataset_maintainer(self):
+        if self.entity.maintainer:
+            return self.entity.maintainer[0]
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/data/hooks.py	Fri Oct 17 10:39:50 2014 +0200
@@ -0,0 +1,13 @@
+from cubicweb.server import hook
+
+from cubes.ckanpublish.hooks import CKANDatasetOp
+
+
+class AddUpdateMaintainerHook(hook.Hook):
+    __regid__ = 'ckanpublish-tests.add-update-maintainer'
+    __select__ = (hook.Hook.__select__ &
+                  hook.match_rtype('maintainer', frometypes=('CWDataSet')))
+    events = ('after_add_relation', 'after_delete_relation')
+
+    def __call__(self):
+        CKANDatasetOp.get_instance(self._cw).add_data(self.eidfrom)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/data/schema.py	Fri Oct 17 10:39:50 2014 +0200
@@ -0,0 +1,12 @@
+from yams.buildobjs import EntityType, String, SubjectRelation
+
+
+class CWDataSet(EntityType):
+    name = String(required=True)
+    description = String()
+    ckan_dataset_id = String(
+        __permissions__={'read': ('managers', 'users', 'guests'),
+                         'add': (),
+                         'update': ()},
+        )
+    maintainer = SubjectRelation('CWUser', cardinality='?*')
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/unittest_hooks.py	Fri Oct 17 10:39:50 2014 +0200
@@ -0,0 +1,89 @@
+"""cubicweb-ckanpublish unit tests for hooks"""
+
+from cubicweb.devtools.testlib import CubicWebTC
+
+from cubes.ckanpublish.utils import ckan_post, CKANPostError
+
+
+class CKANPublishHooksTC(CubicWebTC):
+
+    dataset_owner_org = None
+
+    @classmethod
+    def setUpClass(cls):
+        try:
+            from ckanconfig import baseurl, apikey, organization
+        except ImportError:
+            cls.__unittest_skip__ = True
+            cls.__unittest_skip_why__ = 'no CKAN instance configuration found'
+        else:
+            cls.ckan_config = {'ckan-baseurl': baseurl,
+                               'ckan-api-key': apikey,
+                               'ckan-organization': organization}
+            cls.dataset_owner_org = organization
+
+    def setup_database(self):
+        for k, v in self.ckan_config.items():
+            self.config.global_set_option(k, v)
+
+    def tearDown(self):
+        with self.admin_access.repo_cnx() as cnx:
+            # Delete Table linked to a CKAN dataset, so that the latter gets
+            # deleted.
+            # However, datasets will still have to be purge from the web ui.
+            cnx.execute('DELETE CWDataSet X WHERE EXISTS(X ckan_dataset_id I)')
+            cnx.commit()
+        super(CKANPublishHooksTC, self).tearDown()
+
+    def test_entity_creation(self):
+        with self.admin_access.repo_cnx() as cnx:
+            entity = cnx.create_entity('CWDataSet', name=u'buz buz ?!',
+                                       description=u'opendata buzzzz')
+            cnx.commit()
+            yield self._check_entity_create, cnx, entity
+            yield self._check_entity_update, cnx, entity
+            yield self._check_entity_delete, cnx, entity
+
+    def _check_entity_create(self, cnx, entity):
+        self.set_description('entity creation')
+        self.assertIsNotNone(entity.ckan_dataset_id)
+        result = ckan_post(self.ckan_config, 'package_show',
+                           {'id': entity.ckan_dataset_id})
+        self.assertEqual(result['name'], '%s-buz-buz' % entity.eid)
+        self.assertEqual(result['title'], entity.name)
+        self.assertEqual(result['notes'], entity.description)
+        cpublish = entity.cw_adapt_to('ICKANPublishable')
+        organization_id = cpublish.ckan_get_organization_id(
+            self.dataset_owner_org)
+        self.assertEqual(result['owner_org'], organization_id)
+
+    def _check_entity_update(self, cnx, entity):
+        self.set_description('entity update')
+        entity.cw_set(description=u'no this is actually serious')
+        cnx.commit()
+        result = ckan_post(self.ckan_config, 'package_show',
+                           {'id': entity.ckan_dataset_id})
+        self.assertEqual(result['notes'], entity.description)
+        user = self.create_user(cnx, 'toto', firstname=u'T.',
+                                surname=u'Oto', email=u'to@t.o')
+        entity.cw_set(maintainer=user)
+        cnx.commit()
+        result = ckan_post(self.ckan_config, 'package_show',
+                           {'id': entity.ckan_dataset_id})
+        self.assertEqual(result['maintainer'], 'T. Oto')
+        self.assertEqual(result['maintainer_email'], 'to@t.o')
+
+    def _check_entity_delete(self, cnx, entity):
+        self.set_description('entity deletion')
+        ckanid = entity.ckan_dataset_id
+        entity.cw_delete()
+        cnx.commit()
+        result = ckan_post(self.ckan_config, 'package_show',
+                           {'id': ckanid})
+        self.assertEqual(result['state'], 'deleted')
+
+
+if __name__ == '__main__':
+    from logilab.common.testlib import unittest_main
+    unittest_main()
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/utils.py	Fri Oct 17 10:39:50 2014 +0200
@@ -0,0 +1,53 @@
+# copyright 2014 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
+# contact http://www.logilab.fr -- mailto:contact@logilab.fr
+#
+# This program is free software: you can redistribute it and/or modify it under
+# the terms of the GNU Lesser General Public License as published by the Free
+# Software Foundation, either version 2.1 of the License, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+# details.
+#
+# You should have received a copy of the GNU Lesser General Public License along
+# with this program. If not, see <http://www.gnu.org/licenses/>.
+"""cubicweb-cg33catsi utilities"""
+
+import json
+from urllib import basejoin
+
+import requests
+
+from cubicweb.predicates import objectify_predicate
+
+
+class CKANPostError(Exception):
+    """CKAN post action error"""
+
+
+def ckan_post(config, action, data=None):
+    url = basejoin(config['ckan-baseurl'], 'api/3/action/' + action)
+    headers = {'Authorization': config['ckan-api-key'],
+               'Content-Type': 'application/json'}
+    r = requests.post(url, headers=headers, data=json.dumps(data or {}))
+    if not r.ok:
+        error = r.json()['error']
+        raise CKANPostError('action %s failed: %s' % (action, error))
+    return r.json()['result']
+
+
+@objectify_predicate
+def ckan_instance_configured(cls, req, **kwargs):
+    """Return 1 if CKAN instance configuration is defined.
+
+    (Mostly useful in tests to disable CKAN hooks.)
+    """
+    config = req.vreg.config
+    for option in ('ckan-baseurl', 'ckan-api-key', 'ckan-organization'):
+        if not config.get(option):
+            req.error('CKAN instance configuration incomplete, missing "%s" '
+                      'option' % option)
+            return 0
+    return 1