[importers] Small fixes in importers after small schema changes
authorVincent Michel <vincent.michel@logilab.fr>
Thu, 17 Jan 2013 09:58:09 +0100
changeset 34 c70cf156ea24
parent 33 2896ae2c2e5e
child 35 1f80416802da
[importers] Small fixes in importers after small schema changes
importers/localizer.py
importers/mindboogle.py
importers/openfmri.py
--- a/importers/localizer.py	Thu Jan 17 14:02:33 2013 +0100
+++ b/importers/localizer.py	Thu Jan 17 09:58:09 2013 +0100
@@ -28,7 +28,7 @@
 HANDEDNESS_MAP = {
     'Right handed': u'right',
     'Left handed': u'left',
-    'Ambidextrous': u'both',
+    'Ambidextrous': u'ambidextrous',
     'Unknown': u'unknown',
 }
 
@@ -63,7 +63,7 @@
 def import_study(data_dir):
     """ Import a study from a data dir """
     data = {}
-    data['data_filepath'] = data_dir
+    data['data_filepath'] = osp.abspath(data_dir)
     data['name'] = u'localizer'
     data['description'] = u'localizer db'
     return data
@@ -103,13 +103,14 @@
         data['hosted_by'] = 'SHFJ'
     return data
 
-def import_assessment(data_dir, label):
+def import_assessment(data_dir, label, study_eid):
     """ Import an assessment """
     info = json.load(open('%s/subject.json' % data_dir))
     data = {}
     data['identifier'] = u'%s_%s' % (info['nip'], label)
     data['protocol'] = info['protocol']
     data['timepoint'] = info['date']
+    data['related_study'] = study_eid
     if info.get('date'):
         data['datetime'] = datetime.strptime(info['date'],
                                              '%Y-%m-%d %H:%M:%S')
@@ -413,7 +414,7 @@
                 score_defs[score_val['name']] = score_def.eid
                 def_eid = score_def.eid
             score_val = store.create_entity('ScoreValue', definition=def_eid,
-                                            value=value)
+                                            text=value)
             store.relate(subject.eid, 'related_infos', score_val.eid)
 
         # Design matrix #######################################################
@@ -425,7 +426,7 @@
                                          start=root_dir)))
 
         # Genetics ############################################################
-        gen_assessment = import_assessment(sid, 'genetics')
+        gen_assessment = import_assessment(sid, 'genetics', study.eid)
         gen_assessment = store.create_entity('Assessment', **gen_assessment)
         store.relate(subject.eid, 'concerned_by', gen_assessment.eid)
         measure = gen_measures[subject.identifier]
@@ -438,7 +439,7 @@
 
         # Anat & fMRI ############################################################
         # anat assessment
-        anat_assessment = import_assessment(sid, 'anat')
+        anat_assessment = import_assessment(sid, 'anat', study.eid)
         anat_assessment = store.create_entity('Assessment', **anat_assessment)
         store.relate(center_eid, 'holds', anat_assessment.eid)
         store.relate(subject.eid, 'concerned_by', anat_assessment.eid)
@@ -454,7 +455,7 @@
             store.relate(scan_anat.eid, 'uses_device', device_id)
             store.relate(anat_assessment.eid, 'generates', scan_anat.eid, 'Assessment')
         # fmri assessment
-        fmri_assessment = import_assessment(sid, 'fmri')
+        fmri_assessment = import_assessment(sid, 'fmri', study.eid)
         fmri_assessment = store.create_entity('Assessment', **fmri_assessment)
         store.relate(center_eid, 'holds', fmri_assessment.eid)
         store.relate(subject.eid, 'concerned_by', fmri_assessment.eid)
@@ -472,7 +473,7 @@
 
         # c-maps & t-maps #####################################################
         for dtype, label in (('c', 'c_maps'), ('t', 't_maps')):
-            assessment = import_assessment(sid, label)
+            assessment = import_assessment(sid, label, study.eid)
             assessment = store.create_entity('Assessment', **assessment)
             store.relate(center_eid, 'holds', assessment.eid)
             store.relate(subject.eid, 'concerned_by', assessment.eid)
@@ -494,7 +495,7 @@
                 store.relate(scan.eid, 'external_resources', dm_res.eid)
 
         # mask ################################################################
-        assessment = import_assessment(sid, 'mask')
+        assessment = import_assessment(sid, 'mask', study.eid)
         assessment = store.create_entity('Assessment', **assessment)
         store.relate(center_eid, 'holds', assessment.eid)
         store.relate(subject.eid, 'concerned_by', assessment.eid)
@@ -510,7 +511,7 @@
         store.relate(assessment.eid, 'generates', scan.eid, 'Assessment')
 
         # Questionnaire run ###################################################
-        assessment = import_assessment(sid, 'questionnaire')
+        assessment = import_assessment(sid, 'questionnaire', study.eid)
         assessment = store.create_entity('Assessment', **assessment)
         store.relate(center_eid, 'holds', assessment.eid)
         store.relate(subject.eid, 'concerned_by', assessment.eid)
--- a/importers/mindboogle.py	Thu Jan 17 14:02:33 2013 +0100
+++ b/importers/mindboogle.py	Thu Jan 17 09:58:09 2013 +0100
@@ -29,40 +29,52 @@
 if __name__ == '__main__':
     base_path = osp.abspath(sys.argv[4])
     store = Store(session)
-    study = store.create_entity('Study', data_filepath=unicode(base_path), name = u'mindboggle',
-                                description=u'''The Mindboggle-101 dataset includes manually labeled anatomical regions for 101 healthy subjects. The manually edited cortical labels follow sulcus landmarks according to the Desikan-Killiany-Tourville (DKT) protocol. See http://www.frontiersin.org/Brain_Imaging_Methods/10.3389/fnins.2012.00171/full “101 labeled brain images and a consistent human cortical labeling protocol”
+    global_study = store.create_entity('Study', data_filepath=unicode(base_path), name = u'mindboggle',
+                                       description=u'''The Mindboggle-101 dataset includes manually labeled anatomical regions for 101 healthy subjects. The manually edited cortical labels follow sulcus landmarks according to the Desikan-Killiany-Tourville (DKT) protocol. See http://www.frontiersin.org/Brain_Imaging_Methods/10.3389/fnins.2012.00171/full “101 labeled brain images and a consistent human cortical labeling protocol”
 Arno Klein, Jason Tourville. Frontiers in Brain Imaging Methods. 6:171. DOI: 10.3389/fnins.2012.00171
     and http://mindboggle.info/data.html''')
-    count_subject = 0
-    for subject_path in glob.iglob(osp.join(base_path,'*/*')):
-        subject_id = subject_path.split('/')[-1]
-        print '--->', subject_id
-        subject = store.create_entity('Subject', identifier=unicode(subject_id),
-                                      gender=u'unknown', handedness=u'unknown')
-        store.relate(subject.eid, 'related_studies', study.eid)
-        assessment = store.create_entity('Assessment',identifier=u'label',
-                                         protocol=u'mindboggle-label')
-        store.relate(subject.eid, 'concerned_by', assessment.eid)
-        for image in glob.iglob(osp.join(base_path, subject_path, '*.nii.gz')):
-            image_id = image.split('/')[-1].split('.')[0]
-            anat = {'identifier': u'%s_%s' % (subject_id, image_id),
-                    'label': u'labels' if 'labels' in image_id else u'anatomy',
-                    'format': u'nii.gz',
-                    'type': u'normalized T1',
-                    'related_study': study.eid,
-                    'filepath': unicode(osp.relpath(image, start=base_path)),
-                    'valid': True, 'completed': True, 'description': 'Image from Mindboggle'}
-            mri_data = store.create_entity('MRIData', sequence=u'T1')
-            # MRI data
-            mri_data = {'sequence': u'T1'}
-            mri_data.update(get_image_info(image))
-            mri_data = store.create_entity('MRIData', **mri_data)
-            anat['has_data'] = mri_data.eid
-            anat = store.create_entity('Scan', **anat)
-            store.relate(anat.eid, 'concerns', subject.eid, 'Scan')
-            store.relate(assessment.eid, 'generates', anat.eid, 'Assessment')
-        count_subject += 1
-    # Flush
-    store.flush()
-    store.commit()
-    print '-------> %s subjects imported' % count_subject
+    for study_path in ('Extra-18_volumes', 'MMRR-21_volumes', 'NKI-RS-22_volume',
+                       'NKI-TRT-20_volumes', 'OASIS-TRT-20_volumes'):
+        full_study_path = osp.join(base_path, study_path)
+        if not osp.exists(full_study_path):
+            print '%s does not exist, skip it' % full_study_path
+            continue
+        # Create study
+        count_subject = 0
+        study = store.create_entity('Study', data_filepath=unicode(full_study_path),
+                                    name = unicode(study_path))
+        for subject_path in glob.iglob(osp.join(full_study_path,'*')):
+            subject_id = subject_path.split('/')[-1]
+            print '--->', subject_id
+            subject = store.create_entity('Subject', identifier=unicode(subject_id),
+                                          gender=u'unknown', handedness=u'unknown')
+            store.relate(subject.eid, 'related_studies', study.eid)
+            store.relate(subject.eid, 'related_studies', global_study.eid)
+            assessment = store.create_entity('Assessment',identifier=u'label',
+                                             related_study=study.eid,
+                                             protocol=u'mindboggle-label')
+            store.relate(subject.eid, 'concerned_by', assessment.eid)
+            for image in glob.iglob(osp.join(full_study_path, subject_path, '*.nii.gz')):
+                image_id = image.split('/')[-1].split('.')[0]
+                anat = {'identifier': u'%s_%s' % (subject_id, image_id),
+                        'label': u'labels' if 'labels' in image_id else u'anatomy',
+                        'format': u'nii.gz',
+                        'type': u'normalized T1',
+                        'related_study': study.eid,
+                        'filepath': unicode(osp.relpath(image, start=full_study_path)),
+                        'valid': True, 'completed': True, 'description': 'Image from Mindboggle'}
+                mri_data = store.create_entity('MRIData', sequence=u'T1')
+                # MRI data
+                mri_data = {'sequence': u'T1'}
+                mri_data.update(get_image_info(image))
+                mri_data = store.create_entity('MRIData', **mri_data)
+                anat['has_data'] = mri_data.eid
+                anat['related_study'] = study.eid
+                anat = store.create_entity('Scan', **anat)
+                store.relate(anat.eid, 'concerns', subject.eid, 'Scan')
+                store.relate(assessment.eid, 'generates', anat.eid, 'Assessment')
+            count_subject += 1
+        # Flush
+        store.flush()
+        store.commit()
+        print '-------> %s subjects imported for %s' % (count_subject, study_path)
--- a/importers/openfmri.py	Thu Jan 17 14:02:33 2013 +0100
+++ b/importers/openfmri.py	Thu Jan 17 09:58:09 2013 +0100
@@ -30,11 +30,14 @@
     base_path = osp.abspath(sys.argv[4])
     store = Store(session)
     count_all_subject = 0
+    global_study = store.create_entity('Study', data_filepath=unicode(base_path), name = u'openfmri',
+                                       description=u'''OpenfMRI.org is a project dedicated to the free and open sharing of functional magnetic resonance imaging (fMRI) datasets, including raw data. See http://openfmri.org/''')
+    # All studies
     for study_path in glob.iglob(osp.join(base_path,'*/')):
         study_id = study_path[:-1].split('/')[-1]
         print '--->', study_id
         # XXX use True metadata for studies, P.I. ...
-        study = store.create_entity('Study', data_filepath=unicode(osp.abspath(base_path)),
+        study = store.create_entity('Study', data_filepath=unicode(osp.abspath(study_path)),
                                     name = u'openfmri-%s' % study_id, keywords='openfmri')
         # Device
         device_path = osp.join(study_path, 'scan_key.txt')
@@ -51,7 +54,7 @@
         if osp.exists(task_path):
             ext_resource = store.create_entity('ExternalResource', name=u'task_key',
                                                related_study=study.eid,
-                                               filepath=unicode(osp.relpath(task_path, start=base_path)))
+                                               filepath=unicode(osp.relpath(task_path, start=study_path)))
             task_eid = ext_resource.eid
         # Model contrasts and condition_key
         model_path = osp.join(study_path, 'models')
@@ -63,7 +66,7 @@
                 ext_resource = store.create_entity('ExternalResource',
                                                    name=unicode(osp.splitext(file_id)[0]),
                                                    related_study=study.eid,
-                                                   filepath=unicode(osp.relpath(model, start=base_path)))
+                                                   filepath=unicode(osp.relpath(model, start=study_path)))
                 model_files.setdefault(model_id, []).append(ext_resource.eid)
         # Demographics
         demographics_path = osp.join(study_path, 'demographics.txt')
@@ -86,17 +89,19 @@
             subject = store.create_entity('Subject', identifier=unicode(subject_id),
                                           age = age, gender=gender or u'unknown', handedness=u'unknown')
             store.relate(subject.eid, 'related_studies', study.eid)
+            store.relate(subject.eid, 'related_studies', global_study.eid)
             # anatomy
-            assessment = store.create_entity('Assessment',identifier=u'anat', protocol=u'anat')
+            assessment = store.create_entity('Assessment',identifier=u'anat', protocol=u'anat',
+                                             related_study=study.eid)
             store.relate(subject.eid, 'concerned_by', assessment.eid)
-            for image in glob.iglob(osp.join(base_path, subject_path, 'anatomy/*.nii.gz')):
+            for image in glob.iglob(osp.join(study_path, subject_path, 'anatomy/*.nii.gz')):
                 image_id = image.split('/')[-1].split('.')[0]
                 anat = {'identifier': u'%s_%s' % (subject_id, image_id),
                         'label': u'anatomy',
                         'format': u'nii.gz',
                         'type': u'normalized T1',
                         'related_study': study.eid,
-                        'filepath': unicode(osp.relpath(image, start=base_path)),
+                        'filepath': unicode(osp.relpath(image, start=study_path)),
                         'valid': True, 'completed': True, 'description': 'Image from OpenFmri'}
                 mri_data = store.create_entity('MRIData', sequence=u'T1')
                 # MRI data
@@ -111,7 +116,7 @@
                 store.relate(assessment.eid, 'generates', anat.eid, 'Assessment')
             # Model
             models = {}
-            for model in glob.iglob(osp.join(base_path, subject_path, 'model/*')):
+            for model in glob.iglob(osp.join(study_path, subject_path, 'model/*')):
                 for model_path in glob.iglob(osp.join(model, '*/*/*.txt')):
                     model_id = unicode(osp.splitext(model_path.split('/')[-1])[0])
                     task_id = unicode(model_path.split('/')[-2])
@@ -120,11 +125,11 @@
                     ext_resource = store.create_entity('ExternalResource',
                                                        name=name,
                                                        related_study=study.eid,
-                                                       filepath=unicode(osp.relpath(model_path, start=base_path)))
+                                                       filepath=unicode(osp.relpath(model_path, start=study_path)))
                     models.setdefault(task_id, []).append(ext_resource.eid)
             # Behav
             behavs = {}
-            for behav_path in glob.iglob(osp.join(base_path, subject_path, 'behav/*')):
+            for behav_path in glob.iglob(osp.join(study_path, subject_path, 'behav/*')):
                     behav_id = unicode(osp.splitext(behav_path.split('/')[-1])[0])
                     task_id = unicode(behav_path.split('/')[-2])
                     type_id = unicode(behav_path.split('/')[-3])
@@ -132,22 +137,23 @@
                     ext_resource = store.create_entity('ExternalResource',
                                                        name=name,
                                                        related_study=study.eid,
-                                                       filepath=unicode(osp.relpath(behav_path, start=base_path)))
+                                                       filepath=unicode(osp.relpath(behav_path, start=study_path)))
                     behavs.setdefault(task_id, []).append(ext_resource.eid)
             # Bold
-            for run in glob.iglob(osp.join(base_path, subject_path, 'BOLD/*')):
+            for run in glob.iglob(osp.join(study_path, subject_path, 'BOLD/*')):
                 run_id = run.split('/')[-1].split('.')[0]
-                assessment = store.create_entity('Assessment',identifier=unicode(run), protocol=u'fmri')
+                assessment = store.create_entity('Assessment',identifier=unicode(run), protocol=u'fmri',
+                                             related_study=study.eid)
                 store.relate(subject.eid, 'concerned_by', assessment.eid)
                 if task_eid:
                     store.relate(assessment.eid, 'external_resources', task_eid)
                 image_id = unicode('bold')
                 anat = {'identifier': u'%s_%s' % (subject_id, image_id),
-                        'label': u'anatomy',
+                        'label': u'bold',
                         'format': u'nii.gz',
                         'type': u'raw fMRI',
                         'related_study': study.eid,
-                        'filepath': unicode(osp.relpath(run+'/bold.nii.gz', start=base_path)),
+                        'filepath': unicode(osp.relpath(run+'/bold.nii.gz', start=study_path)),
                         'valid': True, 'completed': True, 'description': 'Image from OpenFmri'}
                 mri_data = store.create_entity('MRIData', sequence=u'T1')
                 # MRI data
@@ -160,9 +166,9 @@
                 store.relate(assessment.eid, 'generates', anat.eid, 'Assessment')
                 if device_eid:
                     store.relate(anat.eid, 'uses_device', device_eid)
-                for eid in models.get(task_id, []):
+                for eid in models.get(run_id, []):
                     store.relate(anat.eid, 'external_resources', eid)
-                for eid in behavs.get(task_id, []):
+                for eid in behavs.get(run_id, []):
                     store.relate(anat.eid, 'external_resources', eid)
             count_subject += 1
         count_all_subject += count_subject