Monkey-patch skos's cwuri2eid to build extid2eid dict only once
authorSylvain Thénault <sylvain.thenault@logilab.fr>
Wed, 19 Oct 2016 15:55:33 +0200
changeset 1891 49d118aa8254
parent 1890 1019e4f3227c
child 1892 6a60d26286af
Monkey-patch skos's cwuri2eid to build extid2eid dict only once Else it's built for each imported file, and this is very costly since when the massive store is in use, table indexes have been dropped. This will be done properly in next skos release which will accept extid2eid map as optional argument.
dataimport.py
--- a/dataimport.py	Wed Oct 19 15:53:28 2016 +0200
+++ b/dataimport.py	Wed Oct 19 15:55:33 2016 +0200
@@ -129,6 +129,7 @@
 
 def import_seda_schemes(cnx, lcsv_import=lcsv_import):
     """Import all LCSV data files defined in LCSV_FILES."""
+    feed_extid2eid_cache(cnx)
     store = get_store(cnx)
     for title, rtypes, etypes, fname in LCSV_FILES:
         if not cnx.find('ConceptScheme', title=title):
@@ -148,3 +149,24 @@
             store.flush()
     store.commit()
     store.finish()
+
+
+# hack to avoid recomputing extid2eid mapping for each lcsv file, this is costly with massive store
+# since index may have been removed
+from logilab.common.decorators import monkeypatch  # noqa
+from cubicweb.dataimport.importer import cwuri2eid as orig_cwuri2eid  # noqa
+from cubes.skos import post321_import  # noqa
+
+EXTID2EID_CACHE = None
+
+
+def feed_extid2eid_cache(cnx):
+    global EXTID2EID_CACHE
+    EXTID2EID_CACHE = orig_cwuri2eid(cnx, ('ConceptScheme', 'Label'))
+    # though concepts and external URIs may come from any source
+    EXTID2EID_CACHE.update(cwuri2eid(cnx, ('Concept', 'ExternalUri')))
+
+
+@monkeypatch(post321_import)
+def cwuri2eid(cnx, etypes, source_eid=None):
+    return EXTID2EID_CACHE