Rely on cubicweb-skos's extid2eid mapping during LCSV data import
authorDenis Laxalde <denis.laxalde@logilab.fr>
Tue, 20 Feb 2018 10:12:22 +0100
changeset 2950 960c8c66cfc2
parent 2947 aa9acf6aa53a
child 2952 f30a729c4a9d
Rely on cubicweb-skos's extid2eid mapping during LCSV data import We drop monkeypatches and hacks introduced in 49d118aa8254 in 2016 which were supposed to be transient waiting for a cubicweb-skos release which came a long time ago now. Now we build the extid2eid dict before importing LCSV concept schemes and pass it to store_skos_extentities() so that it will be re-used for all files instead of re-built. Both lcsv_import and lcsv_check functions are updated, as the latter is used in test_dataimport.py as a kind of "mock" but must respect the former's interface. Also note that we now have to update 'extid2eid' dict after creating a new concept scheme (with init_seda_scheme()): while this sounds perfectly reasonable, I don't completely understand how this works without this before... We update requirement on cubicweb-skos so that the latest release is used (though some previous release would probably work as well, but I'm not sure which and it's not so easy to test).
cubicweb-seda.spec
cubicweb_seda/__pkginfo__.py
cubicweb_seda/dataimport.py
debian/control
--- a/cubicweb-seda.spec	Tue Feb 20 10:13:18 2018 +0100
+++ b/cubicweb-seda.spec	Tue Feb 20 10:12:22 2018 +0100
@@ -22,7 +22,7 @@
 BuildRequires:  %{python} %{python}-setuptools
 Requires:       cubicweb >= 3.25.3
 Requires:       cubicweb-eac
-Requires:       cubicweb-skos >= 0.12.1
+Requires:       cubicweb-skos >= 1.3.0
 Requires:       cubicweb-compound >= 0.7
 Requires:       cubicweb-relationwidget >= 0.4l
 Requires:       cubicweb-squareui
--- a/cubicweb_seda/__pkginfo__.py	Tue Feb 20 10:13:18 2018 +0100
+++ b/cubicweb_seda/__pkginfo__.py	Tue Feb 20 10:12:22 2018 +0100
@@ -17,7 +17,7 @@
     'cubicweb': '>= 3.25.4, < 3.27',
     'six': '>= 1.4.0',
     'cubicweb-eac': None,
-    'cubicweb-skos': '>= 0.12.1',
+    'cubicweb-skos': '>= 1.3.0',
     'cubicweb-compound': '>= 0.7',
     'cubicweb-relationwidget': '>= 0.4',
     'cubicweb-squareui': None,
--- a/cubicweb_seda/dataimport.py	Tue Feb 20 10:13:18 2018 +0100
+++ b/cubicweb_seda/dataimport.py	Tue Feb 20 10:12:22 2018 +0100
@@ -25,7 +25,10 @@
 
 from cubicweb.server.checkintegrity import reindex_entities
 from cubicweb.dataimport.stores import NoHookRQLObjectStore
-from cubicweb.dataimport.importer import SimpleImportLog
+from cubicweb.dataimport.importer import (
+    SimpleImportLog,
+    cwuri2eid,
+)
 
 from cubes.skos import lcsv, sobjects as skos
 
@@ -105,16 +108,16 @@
 )]
 
 
-def lcsv_import(cnx, store, fname, scheme_uri):
+def lcsv_import(cnx, store, fname, scheme_uri, **kwargs):
     """Actually import LCSV data file."""
     with open(fname) as stream:
         extentities = skos.lcsv_extentities(stream, scheme_uri, ';', 'utf-8')
         import_log = SimpleImportLog(fname)
         skos.store_skos_extentities(cnx, store, extentities, import_log,
-                                    raise_on_error=True, extid_as_cwuri=False)
+                                    raise_on_error=True, extid_as_cwuri=False, **kwargs)
 
 
-def lcsv_check(cnx, store, fname, scheme_uri, separator=';'):
+def lcsv_check(cnx, store, fname, scheme_uri, separator=';', **kwargs):
     """Simply check data file consistency."""
     counter = count()
 
@@ -161,22 +164,16 @@
 
 def import_seda_schemes(cnx, lcsv_import=lcsv_import, lcsv_files=LCSV_FILES):
     """Import all LCSV data files defined in LCSV_FILES."""
-    orig_cwuri2eid = post321_import.cwuri2eid
-    try:
-        _import_seda_schemes(cnx, lcsv_import, lcsv_files)
-    finally:
-        post321_import.cwuri2eid = orig_cwuri2eid
-
-
-def _import_seda_schemes(cnx, lcsv_import=lcsv_import, lcsv_files=LCSV_FILES):
-    """Import all LCSV data files defined in LCSV_FILES."""
-    feed_extid2eid_cache(cnx)
+    extid2eid = cwuri2eid(cnx, ('ConceptScheme', 'Label'))
+    # concepts and external URIs may come from any source
+    extid2eid.update(cwuri2eid(cnx, ('Concept', 'ExternalUri')))
     store = get_store(cnx)
     for title, rtypes, etypes, fname in lcsv_files:
         if not cnx.find('ConceptScheme', title=title):
             print('importing', title.encode('utf-8'))
             scheme = init_seda_scheme(cnx, title)
-            lcsv_import(cnx, store, fname, scheme.cwuri)
+            extid2eid[scheme.cwuri] = scheme.eid
+            lcsv_import(cnx, store, fname, scheme.cwuri, extid2eid=extid2eid)
             if not isinstance(rtypes, tuple):
                 rtypes = (rtypes,)
             for rtype in rtypes:
@@ -193,23 +190,3 @@
     if not isinstance(store, NoHookRQLObjectStore):
         # when using the massive store, we need explicit reindexation
         reindex_entities(cnx.repo.schema, cnx, etypes=['Concept', 'ConceptScheme'])
-
-
-# hack to avoid recomputing extid2eid mapping for each lcsv file, this is costly with massive store
-# since index may have been removed
-from logilab.common.decorators import monkeypatch  # noqa
-from cubicweb.dataimport.importer import cwuri2eid as orig_cwuri2eid  # noqa
-from cubes.skos import post321_import  # noqa
-
-EXTID2EID_CACHE = None
-
-
-def feed_extid2eid_cache(cnx):
-    global EXTID2EID_CACHE
-    EXTID2EID_CACHE = orig_cwuri2eid(cnx, ('ConceptScheme', 'Label'))
-    # though concepts and external URIs may come from any source
-    EXTID2EID_CACHE.update(patched_cwuri2eid(cnx, ('Concept', 'ExternalUri')))
-
-
-def patched_cwuri2eid(cnx, etypes, source_eid=None):
-    return EXTID2EID_CACHE
--- a/debian/control	Tue Feb 20 10:13:18 2018 +0100
+++ b/debian/control	Tue Feb 20 10:12:22 2018 +0100
@@ -14,7 +14,7 @@
 Depends:
  python-cubicweb (>= 3.25.3),
  cubicweb-eac,
- cubicweb-skos (>= 0.12.1),
+ cubicweb-skos (>= 1.3.0),
  cubicweb-compound (>= 0.7),
  cubicweb-relationwidget (>= 0.4),
  cubicweb-squareui,