Move seda schemes initialization to a dataimport module
authorSylvain Thénault <sylvain.thenault@logilab.fr>
Wed, 28 Sep 2016 16:19:56 +0200
changeset 1596 7868ca263be8
parent 1595 d975d2435d7d
child 1597 d39735336352
Move seda schemes initialization to a dataimport module with some code cleanups and refactor it that we may ensure data files validity from tests.
dataimport.py
migration/postcreate.py
test/test_dataimport.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/dataimport.py	Wed Sep 28 16:19:56 2016 +0200
@@ -0,0 +1,128 @@
+# coding: utf-8
+# copyright 2016 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
+# contact http://www.logilab.fr -- mailto:contact@logilab.fr
+#
+# This program is free software: you can redistribute it and/or modify it under
+# the terms of the GNU Lesser General Public License as published by the Free
+# Software Foundation, either version 2.1 of the License, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+# details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+"""cubicweb-seda data import tools"""
+
+from __future__ import print_function
+
+from itertools import count
+from os.path import join, dirname
+
+from six import text_type
+
+from cubicweb.dataimport.stores import NoHookRQLObjectStore
+from cubicweb.dataimport.importer import SimpleImportLog
+
+from cubes.skos import lcsv, sobjects as skos
+
+
+LCSV_FILES = (
+    # schemes extracted from SEDA 2 XSD
+    (u'SEDA 2 : Actions',
+     'seda_final_action', 'SEDAStorageRule',
+     'final_action_storage_code_type.csv'),
+    (u'SEDA 2 : Unités de mesure',
+     'seda_unit', ('SEDAWidth', 'SEDAHeight', 'SEDADepth',
+                   'SEDADiameter', 'SEDALength', 'SEDAThickness'),
+     'measurement_units_type.csv'),
+    (u'SEDA 2 : Unités de poids',
+     'seda_unit', 'SEDAWeight',
+     'measurement_weight_units_type.csv'),
+    (u'SEDA 2 : Types de mot-clé',
+     'seda_keyword_type_to', (),
+     'code_keyword_type.csv'),
+    (u'SEDA : Niveaux de description',
+     'seda_description_level', (),
+     'level_type.csv'),
+    # schemes extracted from SEDA 2 XSD, completed to support earlier SEDA versions
+    (u'SEDA : Sort final',
+     'seda_final_action', 'SEDAAppraisalRule',
+     'final_action_appraisal_code_type.csv'),
+    # schemes extracted from earlier SEDA versions
+    (u"SEDA : Durée d'utilité administrative",
+     'seda_rule', 'SEDASeqAppraisalRuleRule',
+     'dua.csv'),
+    (u"SEDA : Codes de restriction d'accès",
+     'seda_rule', 'SEDASeqAccessRuleRule',
+     'access_control.csv'),
+    # other schemes
+    (u'Types MIME',
+     'seda_mime_type_to', (),
+     'mime_types.csv'),
+    (u"Types d'évènement",
+     'seda_event_type_to', (),
+     'event_types.csv'),
+    (u'Encodages (extraits du schéma UN/CEFACT)',
+     'seda_encoding_to', (),
+     'encodings.csv'),
+    (u'Formats de fichier (PRONOM)',
+     'seda_format_id_to', (),
+     'file_formats.csv'),
+    (u'Niveau de classification (IGI 1300)',
+     'seda_classification_level', (),
+     'classification_levels.csv'),
+    (u'Langues (ISO-639-3)',
+     ('seda_language_to', 'seda_description_language_to'), (),
+     'languages.csv'),
+)
+
+
+def lcsv_import(cnx, store, fname, scheme_uri):
+    """Actually import LCSV data file."""
+    with open(join(dirname(__file__), 'migration', 'data', fname)) as stream:
+        extentities = skos.lcsv_extentities(stream, scheme_uri, ';', 'utf-8')
+        import_log = SimpleImportLog(fname)
+        skos.store_skos_extentities(cnx, store, extentities, import_log,
+                                    raise_on_error=True, extid_as_cwuri=False)
+
+
+def lcsv_check(cnx, store, fname, scheme_uri):
+    """Simply check data file consistency."""
+    counter = count()
+
+    def uri_generator(val):
+        return text_type(next(counter)) + val
+
+    with open(join(dirname(__file__), 'migration', 'data', fname)) as stream:
+        lcsv2rdf = lcsv.LCSV2RDF(stream, ';', 'utf-8',
+                                 # XXX drop once skos is released
+                                 uri_generator=uri_generator, uri_cls=text_type)
+        list(lcsv2rdf.triples())
+
+
+def import_seda_schemes(cnx, lcsv_import=lcsv_import):
+    """Import all LCSV data files defined in LCSV_FILES"""
+    store = NoHookRQLObjectStore(cnx)
+    for title, rtypes, etypes, fname in LCSV_FILES:
+        if not cnx.find('ConceptScheme', title=title):
+            print('importing', title.encode('utf-8'))
+            description = u'edition 2009' if title.startswith('SEDA :') else None
+            scheme = cnx.create_entity('ConceptScheme', title=title,
+                                       description=description)
+            lcsv_import(cnx, store, fname, scheme.cwuri)
+            if not isinstance(rtypes, tuple):
+                rtypes = (rtypes,)
+            for rtype in rtypes:
+                rtype_e = cnx.find('CWRType', name=rtype).one()
+                scheme.cw_set(scheme_relation_type=rtype_e)
+            if not isinstance(etypes, tuple):
+                etypes = (etypes,)
+            for etype in etypes:
+                etype_e = cnx.find('CWEType', name=etype).one()
+                scheme.cw_set(scheme_entity_type=etype_e)
+    store.flush()
+    store.commit()
+    store.finish()
--- a/migration/postcreate.py	Wed Sep 28 16:16:42 2016 +0200
+++ b/migration/postcreate.py	Wed Sep 28 16:19:56 2016 +0200
@@ -18,108 +18,8 @@
 
 from __future__ import print_function
 
-from os.path import join, dirname
-
-from cubicweb.dataimport.stores import NoHookRQLObjectStore
-from cubicweb.dataimport.importer import SimpleImportLog
-
-from cubes.skos.sobjects import lcsv_extentities, store_skos_extentities
-
-
-def lcsv_import(cnx, store, fname, scheme_uri):
-    import_log = SimpleImportLog(fname)
-    stream = open(join(dirname(__file__), 'data', fname))
-    entities = lcsv_extentities(stream, scheme_uri, ';', 'utf-8')
-    store_skos_extentities(cnx, store, entities, import_log,
-                           raise_on_error=True, extid_as_cwuri=False)
-
-
-def import_seda_schemes(cnx):
-    store = NoHookRQLObjectStore(cnx)
-    for title, rtypes, etypes, fname in (
-            # schemes extracted from SEDA 2 XSD
-            (u'SEDA 2 : Actions',
-             'seda_final_action',
-             'SEDAStorageRule',
-             'final_action_storage_code_type.csv'),
-            (u'SEDA 2 : Unités de mesure',
-             'seda_unit',
-             ('SEDAWidth', 'SEDAHeight', 'SEDADepth',
-              'SEDADiameter', 'SEDALength', 'SEDAThickness'),
-             'measurement_units_type.csv'),
-            (u'SEDA 2 : Unités de poids',
-             'seda_unit',
-             'SEDAWeight',
-             'measurement_weight_units_type.csv'),
-            (u'SEDA 2 : Types de mot-clé',
-             'seda_keyword_type_to',
-             (),
-             'code_keyword_type.csv'),
-            (u'SEDA : Niveaux de description',
-             'seda_description_level',
-             (),
-             'level_type.csv'),
-            # schemes extracted from SEDA 2 XSD, completed to support earlier SEDA versions
-            (u'SEDA : Sort final',
-             'seda_final_action',
-             'SEDAAppraisalRule',
-             'final_action_appraisal_code_type.csv'),
-            # schemes extracted from earlier SEDA versions
-            (u"SEDA : Durée d'utilité administrative",
-             'seda_rule',
-             'SEDASeqAppraisalRuleRule',
-             'dua.csv'),
-            (u"SEDA : Codes de restriction d'accès",
-             'seda_rule',
-             'SEDASeqAccessRuleRule',
-             'access_control.csv'),
-            # other schemes
-            (u'Types MIME',
-             'seda_mime_type_to',
-             (),
-             'mime_types.csv'),
-            (u"Types d'évènement",
-             'seda_event_type_to',
-             (),
-             'event_types.csv'),
-            (u'Encodages (extraits du schéma UN/CEFACT)',
-             'seda_encoding_to',
-             (),
-             'encodings.csv'),
-            (u'Formats de fichier (PRONOM)',
-             'seda_format_id_to',
-             (),
-             'file_formats.csv'),
-            (u'Niveau de classification (IGI 1300)',
-             'seda_classification_level',
-             (),
-             'classification_levels.csv'),
-            (u'Langues (ISO-639-3)',
-             ('seda_language_to', 'seda_description_language_to'),
-             (),
-             'languages.csv'),
-    ):
-        if not cnx.find('ConceptScheme', title=title):
-            print('importing', title.encode('utf-8'))
-            description = u'edition 2009' if title.startswith('SEDA') else None
-            scheme = cnx.create_entity('ConceptScheme', title=title,
-                                       description=description)
-            lcsv_import(cnx, store, fname, scheme.cwuri)
-            if not isinstance(rtypes, tuple):
-                rtypes = (rtypes,)
-            for rtype in rtypes:
-                rtype_e = cnx.find('CWRType', name=rtype).one()
-                scheme.cw_set(scheme_relation_type=rtype_e)
-            if not isinstance(etypes, tuple):
-                etypes = (etypes,)
-            for etype in etypes:
-                etype_e = cnx.find('CWEType', name=etype).one()
-                scheme.cw_set(scheme_entity_type=etype_e)
-    store.flush()
-    store.commit()
-    store.finish()
-
 
 if config.mode != 'test':
+    from cubes.seda.dataimport import import_seda_schemes
     print('-> creating SEDA concept schemes')
     import_seda_schemes(cnx)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/test_dataimport.py	Wed Sep 28 16:19:56 2016 +0200
@@ -0,0 +1,33 @@
+# copyright 2016 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
+# contact http://www.logilab.fr -- mailto:contact@logilab.fr
+#
+# This program is free software: you can redistribute it and/or modify it under
+# the terms of the GNU Lesser General Public License as published by the Free
+# Software Foundation, either version 2.1 of the License, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+# details.
+#
+# You should have received a copy of the GNU Lesser General Public License along
+# with this program. If not, see <http://www.gnu.org/licenses/>.
+"""cubicweb-seda unit tests for dataimport"""
+
+from cubicweb.devtools.testlib import CubicWebTC
+
+from cubes.seda import dataimport
+
+
+class ConcepSchemeImportTC(CubicWebTC):
+
+    def test_import_seda_schemes(self):
+        with self.admin_access.client_cnx() as cnx:
+            dataimport.import_seda_schemes(cnx, lcsv_import=dataimport.lcsv_check)
+            self.assertEqual(len(cnx.find('ConceptScheme')), 14)
+
+
+if __name__ == '__main__':
+    import unittest
+    unittest.main()