[ccplugin] Add a CWCTL command for identifier and file path anonynimzation, with unit tests, closes #2925973
authorVladimir Popescu <vladimir.popescu@logilab.fr>
Wed, 03 Jul 2013 16:20:01 +0200
changeset 157 69c1d0737721
parent 155 f2d3d2816f2d
child 158 ed68e70ae6d2
[ccplugin] Add a CWCTL command for identifier and file path anonynimzation, with unit tests, closes #2925973
ccplugin.py
test/unittest_remake-uuid.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ccplugin.py	Wed Jul 03 16:20:01 2013 +0200
@@ -0,0 +1,103 @@
+# -*- coding: utf-8 -*-
+
+import sys
+from os import path as osp
+import shutil
+import logging
+
+from cubicweb import AuthenticationError
+from cubicweb import cwconfig
+from cubicweb.utils import make_uid
+from cubicweb.server.utils import manager_userpasswd
+from cubicweb.dbapi import in_memory_repo_cnx
+from cubicweb.toolsutils import Command
+from cubicweb.cwctl import CWCTL
+
+
+def copy_file(old_fpath, new_fpath, logger=None):
+    """ Safe copy of a file from one path to another
+    """
+    if osp.isfile(old_fpath):
+        if logger:
+            logger.info('copy_file: Attempting to copy file %s to %s...\n'
+                        % (old_fpath, new_fpath))
+        try:
+            shutil.copyfile(old_fpath, new_fpath)
+        except IOError as ioer:
+            logger.error('copy_file: %s\n' % ioer)
+            raise
+
+def update_ident_fpath(session, subject_eid, uuid, logger=None):
+    """ Update the identifier and paths of a subject
+    """
+    if logger:
+        logger.info('update_ident_fpath: processing subject %d...\n' % subject_eid)
+    session.execute('SET X identifier %(i)s WHERE X eid %(e)s', {'i': uuid, 'e': subject_eid})
+    rset = session.execute('Any X WHERE X concerns S, S eid %(e)s, '
+                           'X is QuestionnaireRun', {'e': subject_eid}, build_descr=False)
+    for concerned_eid in rset:
+        concerned_eid = concerned_eid[0]
+        if logger:
+            logger.info('update_ident_fpath: processing related entity %d\n' % concerned_eid)
+        uuid = make_uid() + '_' + unicode(concerned_eid)
+        session.execute('SET X identifier %(i)s WHERE X eid %(e)s', {'i': uuid, 'e': concerned_eid})
+    rset = session.execute('Any X, F WHERE X concerns S, S eid %(e)s, X filepath F, '
+                           'X is IN (Scan, GenomicMeasure)', {'e': subject_eid}, build_descr=False)
+    for concerned_eid, concerned_fpath in rset:
+        if logger:
+            logger.info('update_ident_fpath: processing related entity %d\n' % concerned_eid)
+        uuid = make_uid() + '_' + unicode(concerned_eid)
+        session.execute('SET X identifier %(i)s WHERE X eid %(e)s', {'i': uuid, 'e': concerned_eid})
+        new_filepath = uuid + '_' + unicode(concerned_eid) + osp.basename(concerned_fpath)
+        copy_file(concerned_fpath, new_filepath, logger)
+        session.execute('SET X filepath %(i)s WHERE X eid %(e)s', {'i': new_filepath, 'e': concerned_eid})
+
+
+class BrainomicsRemakeUidCommand(Command):
+    """ Change subjects uids for anonymization.
+
+    <instance>
+      the identifier of the instance to connect.
+
+    """
+    name = 'remake-uid'
+    arguments = '<instance>'
+
+    def _init_cw_connection(self, appid):
+        logger = logging.getLogger('brainomics')
+        config = cwconfig.instance_configuration(appid)
+        sourcescfg = config.sources()
+        config.set_sources_mode(('system',))
+        cnx = repo = None
+        while cnx is None:
+            try:
+                login = sourcescfg['admin']['login']
+                pwd = sourcescfg['admin']['password']
+            except KeyError:
+                login, pwd = manager_userpasswd()
+            try:
+                repo, cnx = in_memory_repo_cnx(config, login=login, password=pwd)
+            except AuthenticationError:
+                logging.warning('remake-uuid: wrong user/password')
+            else:
+                break
+        session = repo._get_session(cnx.sessionid)
+        return cnx, session
+
+    def run(self, args):
+        logger = logging.getLogger('brainomics')
+        if len(args) > 0:
+            appid = args.pop(0)
+            cw_cnx, session = self._init_cw_connection(appid)
+            session.set_cnxset()
+            debug = True if len(args) > 0 and args.pop(0) == 'D' else False
+            for subject_eid in session.execute('Any X WHERE X is Subject', build_descr=False):
+                subject_eid = subject_eid[0]
+                uuid = unicode(make_uid())
+                update_ident_fpath(session, subject_eid, uuid, logger)
+            session.commit()
+        else:
+            logger.error("remake-uuid: Please specify Brainomics instance")
+
+
+CWCTL.register(BrainomicsRemakeUidCommand)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/unittest_remake-uuid.py	Wed Jul 03 16:20:01 2013 +0200
@@ -0,0 +1,58 @@
+import os
+import os.path as osp
+import tempfile
+from cubicweb.utils import make_uid
+from cubicweb.devtools.testlib import CubicWebTC
+from cubes.brainomics.ccplugin import copy_file, update_ident_fpath
+
+
+
+class TestBrainomicsRemakeUid(CubicWebTC):
+
+    def setup_database(self):
+        super(TestBrainomicsRemakeUid, self).setup_database()
+        ce = self.request().create_entity
+        self.rql = self.session.execute
+        _study = ce('Study', name=u'Test study', data_filepath=u'/tmp/test_data',
+                    description=u'Test study with dummy data', keywords=u'test;dummy')
+        self.subject_1 = ce('Subject', identifier=u'demo_subject_1', gender=u'female',
+                            handedness=u'right')
+        self.subject_2 = ce('Subject', identifier=u'demo_subject_2', gender=u'male',
+                            handedness=u'ambidextrous')
+        self.scan_11 = ce('Scan', identifier=u'anat_1709',
+                          label=u'anatomy', type=u'normalized T1', format=u'nii.hz',
+                          completed=True, valid=True, filepath=u'1709/anat.nii.gz',
+                          concerns=self.subject_1, related_study=_study)
+        self.scan_12 = ce('Scan', identifier=u'anat_1899',
+                          label=u'anatomy', type=u'normalized T1', format=u'nii.hz',
+                          completed=True, valid=True, filepath=u'1899/anat.nii.gz',
+                          concerns=self.subject_1, related_study=_study)
+        self.scan_21 = ce('Scan', identifier=u'anat_2325',
+                          label=u'anatomy', type=u'normalized T1', format=u'nii.hz',
+                          completed=True, valid=True, filepath=u'2325/anat.nii.gz',
+                          concerns=self.subject_2, related_study=_study)
+        self.scan_22 = ce('Scan', identifier=u'anat_3785',
+                          label=u'anatomy', type=u'normalized T1', format=u'nii.hz',
+                          completed=True, valid=True, filepath=u'3785/anat.nii.gz',
+                          concerns=self.subject_2, related_study=_study)
+
+    def test_copy_file(self):
+        for fpath in (scan.filepath for scan in (self.scan_11, self.scan_12, self.scan_21, self.scan_22)):
+            dest_path = unicode(make_uid()) + '_' + osp.basename(fpath)
+            with tempfile.NamedTemporaryFile(suffix=osp.basename(fpath)) as f:
+                copy_file(f.name, dest_path)
+            if osp.isfile(dest_path):
+                os.remove(dest_path)
+
+    def test_update_ident_fpath(self):
+        old_fpaths = self.rql('Any F WHERE X is IN (Scan, GenomicMeasure), X filepath F', build_descr=False)
+        for s in self.rql('Any X WHERE X is Subject'):
+            update_ident_fpath(self.session, s[0], unicode(make_uid()))
+        new_fpaths = self.rql('Any F WHERE X is IN (Scan, GenomicMeasure), X filepath F', build_descr=False)
+
+        self.assertNotEqual(old_fpaths, new_fpaths)
+
+
+if __name__ == '__main__':
+    from logilab.common.testlib import unittest_main
+    unittest_main()