[importers] Update and cleanup importers
authorVincent Michel <vincent.michel@logilab.fr>
Thu, 20 Dec 2012 10:06:20 +0100
changeset 23 b309136b814a
parent 22 4fd887fe2c9f
child 24 51ef281aac62
[importers] Update and cleanup importers
importers/helpers.py
importers/localizer.py
importers/mindboogle.py
importers/openfmri.py
--- a/importers/helpers.py	Wed Jan 09 16:40:59 2013 +0100
+++ b/importers/helpers.py	Thu Dec 20 10:06:20 2012 +0100
@@ -23,16 +23,16 @@
     """ Return the image info readed from the image using Nibabel """
     img = nb.load(image_path)
     data = {}
-    data['voxel_res_x'] = int(img.get_header()['pixdim'][1])
-    data['voxel_res_y'] = int(img.get_header()['pixdim'][2])
-    data['voxel_res_z'] = int(img.get_header()['pixdim'][3])
+    data['voxel_res_x'] = float(img.get_header()['pixdim'][1])
+    data['voxel_res_y'] = float(img.get_header()['pixdim'][2])
+    data['voxel_res_z'] = float(img.get_header()['pixdim'][3])
     data['shape_x'] = int(img.get_shape()[0])
     data['shape_y'] = int(img.get_shape()[1])
     data['shape_z'] = int(img.get_shape()[2])
     data['shape_t'] = int(img.get_shape()[3]) if len(img.get_shape()) == 4 else None
     data['affine'] = pickle.dumps(img.get_affine().tolist())
     desc = str(img.get_header()['descrip'])
-
+    # Use desc ?
     try:
         if get_tr:
             tr, te = re.findall(
@@ -44,55 +44,3 @@
         data['te'] = None
 
     return data
-
-
-def import_genes(genetics_dir):
-    ref_chr_path = os.path.join(genetics_dir, 'chromosomes.json')
-    ref_gene_path = os.path.join(
-        genetics_dir, 'sprintBack', 'hg18.refGene.meta')
-    chromosomes = json.load(open(ref_chr_path))
-    ref_gene = []
-    for row in csv.reader(open(ref_gene_path), delimiter='\t'):
-        gene = {}
-        gene['name'] = unicode(row[0])
-        gene['gene_id'] = unicode(row[0])
-        gene['uri'] = None
-        gene['start_position'] = int(row[1])
-        gene['stop_position'] = int(row[2])
-        gene['chromosome'] = row[3].split('_')[0]
-        ref_gene.append(gene)
-    return ref_gene
-
-
-def import_chromosomes(genetics_dir):
-    ref_chr_path = os.path.join(genetics_dir, 'chromosomes.json')
-    chromosomes = json.load(open(ref_chr_path))
-    chrs = []
-    for chr_id in chromosomes:
-        chr = {}
-        chr['name'] = u'chr%s' % chromosomes[chr_id].upper()
-        chr['identifier'] = unicode(chr['name'])
-
-        chrs.append(chr)
-    return chrs
-
-
-def import_snps(genetics_dir):
-    ref_chr_path = os.path.join(genetics_dir, 'chromosomes.json')
-    ref_snp_path = os.path.join(
-        genetics_dir, 'sprintBack', 'bysubj', 'bru3506.bim')
-    chromosomes = json.load(open(ref_chr_path))
-    snps = []
-    for row in csv.reader(open(ref_snp_path), delimiter='\t'):
-        snp = {}
-        if row[0] == '0':
-            continue
-
-        chr_id = chromosomes[row[0]]
-
-        snp['rs_id'] = unicode(row[1])
-        snp['position'] = int(row[3])
-        snp['chromosome'] = u'chr%s' % chr_id.upper()
-
-        snps.append(snp)
-    return snps
--- a/importers/localizer.py	Wed Jan 09 16:40:59 2013 +0100
+++ b/importers/localizer.py	Thu Dec 20 10:06:20 2012 +0100
@@ -1,6 +1,8 @@
 # -*- coding: utf-8 -*-
 
+import sys
 import os
+import os.path as osp
 import re
 import glob
 import csv
@@ -10,235 +12,189 @@
 
 import nibabel as nb
 
-gender_map = {
+from cubes.brainomics.importers.helpers import get_image_info
+
+
+###############################################################################
+### Global Mappings ###########################################################
+###############################################################################
+GENDER_MAP = {
     '1': u'male',
     '2': u'female',
     '3': u'unknown'
 }
 
-handedness_map = {
+HANDEDNESS_MAP = {
     'Right handed': u'right',
     'Left handed': u'left',
     'Ambidextrous': u'both',
     'Unknown': u'unknown',
 }
 
-def rel_path(path):
-    return os.path.join(
-        '/localizer',
-        path.split('localizer')[-1].strip('/'))
+SCORE_TYPES = [u'language', u'family',
+               u'schizophrenic', u'dyslexic',
+               u'dyscalculic', u'synaesthete']
+
+SEQ_TYPES = [u'localizer_long_complex',
+             u'localizer_long_easy',
+             u'localizer_short_complex',
+             u'localizer_short_easy']
 
 
-def get_image_info(image_path, get_tr=True):
-    img = nb.load(image_path)
-    data = {}
-    data['voxel_res_x'] = int(img.get_header()['pixdim'][1])
-    data['voxel_res_y'] = int(img.get_header()['pixdim'][2])
-    data['voxel_res_z'] = int(img.get_header()['pixdim'][3])
-    data['shape_x'] = int(img.get_shape()[0])
-    data['shape_y'] = int(img.get_shape()[1])
-    data['shape_z'] = int(img.get_shape()[2])
-    data['shape_t'] = int(img.get_shape()[3]) if len(img.get_shape()) == 4 else None
-    data['affine'] = pickle.dumps(img.get_affine().tolist())
-    desc = str(img.get_header()['descrip'])
-
-    try:
-        if get_tr:
-            tr, te = re.findall(
-                'TR=(.*)ms.*TE=(.*)ms', desc)[0]
-            data['tr'] = float(tr)
-            data['te'] = float(te)
-    except Exception, e:
-        data['tr'] = None
-        data['te'] = None
-
-    return data
-
-
+###############################################################################
+### MedicalExp entities #######################################################
+###############################################################################
 def import_subject(data_dir):
-    data = {}
-    score_values = []
-
-    info = json.load(
-        open('%s/subject.json' % data_dir))
+    """ Import a subject from a data dir """
+    data, score_values = {}, []
+    info = json.load(open('%s/subject.json' % data_dir))
     data['identifier'] = info['nip']
     data['age'] = info['age']
-    data['gender'] = gender_map.get(
-        info['sex'], gender_map['3'])
+    data['gender'] = GENDER_MAP.get(info['sex'], GENDER_MAP['3'])
     data['age'] = info['age']
-    data['handedness'] = handedness_map.get(
-        info['laterality'],
-        handedness_map['Unknown'])
-
-    score_types = [u'language', u'family',
-                   u'schizophrenic', u'dyslexic',
-                   u'dyscalculic', u'synaesthete']
-
-    for score in score_types:
-        score_values.append(
-            {'name': score,
-             'value': info.get(score)})
-
+    data['handedness'] = HANDEDNESS_MAP.get(info['laterality'],
+                                            HANDEDNESS_MAP['Unknown'])
+    for score in SCORE_TYPES:
+        score_values.append({'name': score,
+                             'value': info.get(score)})
     return data, score_values
 
-
-def import_study(data_dir=None):
+def import_study(data_dir):
+    """ Import a study from a data dir """
     data = {}
     data['data_filepath'] = data_dir
     data['name'] = u'localizer'
     data['description'] = u'localizer db'
-
     return data
 
-
-def import_anat(data_dir, normalized=False):
-    scan_data = {}
-    mri_data = {}
-
-    info = json.load(
-        open('%s/subject.json' % data_dir))
-
-    scan_data['identifier'] = u'%s_anat' % info['exam']
-    scan_data['label'] = u'anatomy' if normalized else u'raw anatomy'
-    scan_data['format'] = u'nii.gz'
-    if normalized:
-        scan_data['type'] = u'normalized T1'
-    else:
-        scan_data['type'] = u'raw T1'
-    if normalized:
-        scan_data['filepath'] = os.path.join(
-            data_dir, 'anat', 'anat.nii.gz')
-    else:
-        scan_data['filepath'] = os.path.join(
-            data_dir, 'anat', 'raw_anat.nii.gz')
-    scan_data['timepoint'] = info['date']
-    scan_data['completed'] = True
-    scan_data['valid'] = True
-    scan_data['description'] = info['anatomy']
-
-    mri_data['sequence'] = u'T1'
-    mri_data.update(
-        get_image_info(scan_data['filepath']))
-
-    scan_data['filepath'] = rel_path(
-        scan_data['filepath'])
-
-    return scan_data, mri_data
-
-
-def import_fmri(data_dir, preprocessed=False):
-    scan_data = {}
-    mri_data = {}
-
-    info = json.load(
-        open('%s/subject.json' % data_dir))
+def import_center(data_dir):
+    """ Import a center """
+    data = {}
+    info = json.load(open('%s/subject.json' % data_dir))
+    data['identifier'] = info['site']
+    if info['site'] == u'SHFJ':
+        data['name'] = u'SHFJ'
+        data['department'] = u'Essonne'
+        data['city'] = u'Orsay'
+        data['postal_code'] = 91401
+        data['country'] = u'France'
+    elif info['site'] == u'Neurospin':
+        data['name'] = u'Neurospin'
+        data['department'] = u'Essonne'
+        data['city'] = u'Gif-sur-Yvette'
+        data['postal_code'] = 91191
+        data['country'] = u'France'
+    return data
 
-    scan_data['identifier'] = u'%s_fmri' % info['exam'] if preprocessed \
-        else u'%s_raw_fmri' % info['exam']
-    scan_data['label'] = u'bold' if preprocessed else u'raw bold'
-    scan_data['format'] = u'nii.gz'
-    if preprocessed:
-        scan_data['type'] = u'preprocessed fMRI'
-    else:
-        scan_data['type'] = u'raw fMRI'
-    if preprocessed:
-        scan_data['filepath'] = os.path.join(
-            data_dir, 'fmri', 'bold.nii.gz')
-    else:
-        scan_data['filepath'] = os.path.join(
-            data_dir, 'fmri', 'raw_bold.nii.gz')
-    scan_data['timepoint'] = info['date']
-    scan_data['completed'] = True
-    scan_data['valid'] = True
-    scan_data['description'] = (
-        u'epi_problem=%(epi_problem)s '
-        'sound_problem=%(sound_problem)s '
-        'video_problem=%(video_problem)s '
-        'motor_error=%(motor_error)s' % info)
-
-    seq_types = [u'localizer_long_complex',
-                 u'localizer_long_easy',
-                 u'localizer_short_complex',
-                 u'localizer_short_easy']
-
-    for seq_type in seq_types:
-        if info[seq_type]:
-            scan_data['description'] += u' %s' % seq_type
-
-    mri_data['sequence'] = u'EPI'
-
-    mri_data.update(
-        get_image_info(scan_data['filepath']))
-
-    scan_data['filepath'] = rel_path(
-        scan_data['filepath'])
-
-    return scan_data, mri_data
-
+def import_device(data_dir):
+    """ Import a device """
+    data = {}
+    info = json.load(open('%s/subject.json' % data_dir))
+    if info['site'] == u'Neurospin':
+        data['name'] = '3T SIEMENS Trio'
+        data['manufacturer'] = 'SIEMENS'
+        data['model'] = 'Trio'
+        data['hosted_by'] = 'Neurospin'
+    if info['site'] == u'SHFJ':
+        data['name'] = '3T Brucker'
+        data['manufacturer'] = 'Brucker'
+        data['model'] = '3T Brucker'
+        data['hosted_by'] = 'SHFJ'
+    return data
 
 def import_assessment(data_dir, label):
-    info = json.load(
-        open('%s/subject.json' % data_dir))
-
+    """ Import an assessment """
+    info = json.load(open('%s/subject.json' % data_dir))
     data = {}
     data['identifier'] = u'%s_%s' % (info['nip'], label)
     data['protocol'] = info['protocol']
     data['timepoint'] = info['date']
-
     if info.get('date'):
-        data['datetime'] = datetime.strptime(
-            info['date'],
-            '%Y-%m-%d %H:%M:%S')
+        data['datetime'] = datetime.strptime(info['date'],
+                                             '%Y-%m-%d %H:%M:%S')
     else:
         data['datetime'] = None
     return data
 
 
-def import_maps(data_dir, dtype='c'):
-
-    info = json.load(
-        open('%s/subject.json' % data_dir))
-
-    base_path = os.path.join(data_dir, '%s_maps' % dtype)
+###############################################################################
+### Neuroimaging entities #####################################################
+###############################################################################
+def import_neuroimaging(data_dir, dtype='anat', norm_prep=False):
+    """ Import a neuorimaging scan """
+    scan_data, mri_data = {}, {}
+    info = json.load(open('%s/subject.json' % data_dir))
+    # Label and id
+    if dtype == 'anat':
+        mri_data['sequence'] = u'T1'
+        scan_data['identifier'] = u'%s_anat' % info['exam']
+        scan_data['label'] = u'anatomy' if normalized else u'raw anatomy'
+        scan_data['type'] = u'normalized T1' if norm_prep else u'raw T1'
+        if norm_prep:
+            scan_data['filepath'] = os.path.join(data_dir, 'anat', 'anat.nii.gz')
+        else:
+            scan_data['filepath'] = os.path.join(data_dir, 'anat', 'raw_anat.nii.gz')
+    else:
+        mri_data['sequence'] = u'EPI'
+        scan_data['identifier'] = u'%s_fmri' % info['exam'] if norm_prep  \
+                                  else u'%s_raw_fmri' % info['exam']
+        scan_data['label'] = u'bold' if norm_prep else u'raw bold'
+        scan_data['type'] = u'preprocessed fMRI' if norm_prep else u'raw fMRI'
+        if norm_prep:
+            scan_data['filepath'] = os.path.join(data_dir, 'fmri', 'bold.nii.gz')
+        else:
+            scan_data['filepath'] = os.path.join(data_dir, 'fmri', 'raw_bold.nii.gz')
+    # Data properties
+    scan_data['format'] = u'nii.gz'
+    scan_data['timepoint'] = info['date']
+    scan_data['completed'] = True
+    scan_data['valid'] = True
+    # Description
+    if dtype == 'anat':
+        scan_data['description'] = info['anatomy']
+    else:
+        scan_data['description'] = (
+            u'epi_problem=%(epi_problem)s '
+            'sound_problem=%(sound_problem)s '
+            'video_problem=%(video_problem)s '
+            'motor_error=%(motor_error)s' % info)
+        for seq_type in SEQ_TYPES:
+            if info[seq_type]:
+                scan_data['description'] += u' %s' % seq_type
+    # Update mri data
+    mri_data.update(get_image_info(scan_data['filepath']))
+    return scan_data, mri_data
 
+def import_maps(data_dir, dtype='c'):
+    """ Import c/t maps """
+    info = json.load(open('%s/subject.json' % data_dir))
+    base_path = os.path.join(data_dir, '%s_maps' % dtype)
     for img_path in glob.iglob(os.path.join(base_path, '*.nii.gz')):
-        scan_data = {}
-        mri_data = {}
-
+        scan_data, mri_data = {}, {}
         scan_data['identifier'] = u'%s_%s_map' % (info['exam'], dtype)
-        scan_data['label'] = unicode(os.path.split(
-            img_path)[1].split('.nii.gz')[0].replace('_', ' '))
+        scan_data['label'] = unicode(os.path.split(img_path)[1].split(
+            '.nii.gz')[0].replace('_', ' '))
         scan_data['format'] = u'nii.gz'
         scan_data['type'] = u'%s map' % dtype
         scan_data['filepath'] = img_path
         scan_data['timepoint'] = info['date']
         scan_data['completed'] = True
         scan_data['valid'] = True
-
+        # Mri data
         mri_data['sequence'] = None
-        mri_data.update(
-            get_image_info(scan_data['filepath'], get_tr=False))
-
-        scan_data['filepath'] = rel_path(
-            scan_data['filepath'])
-
+        mri_data.update(get_image_info(scan_data['filepath'], get_tr=False))
         name = os.path.split(img_path)[1].split('.nii.gz')[0]
-
         ext_resource = {}
         ext_resource['name'] = u'contrast definition'
-        ext_resource['filepath'] = unicode(rel_path(os.path.join(
-            data_dir, 'contrasts', '%s.json' % name)))
-
+        ext_resource['filepath'] = unicode(os.path.join(data_dir,
+                                                        'contrasts',
+                                                        '%s.json' % name))
         yield scan_data, mri_data, ext_resource
 
-
 def import_mask(data_dir):
-    scan_data = {}
-    mri_data = {}
-
-    info = json.load(
-        open('%s/subject.json' % data_dir))
-
+    """ Import a mask """
+    scan_data, mri_data = {}, {}
+    info = json.load(open('%s/subject.json' % data_dir))
     scan_data['identifier'] = u'%s_mask' % info['exam']
     scan_data['label'] = u'mask'
     scan_data['format'] = u'nii.gz'
@@ -248,27 +204,24 @@
     scan_data['completed'] = True
     scan_data['valid'] = True
     mri_data['sequence'] = None
-    mri_data.update(
-        get_image_info(scan_data['filepath']))
-
-    scan_data['filepath'] = rel_path(
-        scan_data['filepath'])
-
+    mri_data.update(get_image_info(scan_data['filepath']))
     return scan_data, mri_data
 
 
+###############################################################################
+### Questionnaire entities ####################################################
+###############################################################################
 def import_questionnaire(data_dir):
+    """ Import a questionnaire and its questions """
     questionnaire = {}
     questionnaire['name'] = u'localizer questionnaire'
     questionnaire['identifier'] = u'localizer_questionnaire'
     questionnaire['type'] = u'behavioural'
     questionnaire['version'] = u'1.0'
     questionnaire['language'] = u'French'
-
-    behave = json.load(
-        open('%s/behavioural.json' % data_dir))
+    behave = json.load(open('%s/behavioural.json' % data_dir))
     del behave['date']
-
+    # Questions
     questions = []
     values = [behave[k] for k in sorted(behave.keys())]
     for i, (val, item) in enumerate(zip(values, sorted(behave.keys()))):
@@ -278,17 +231,13 @@
         question['text'] = unicode(item)
         question['type'] = u'boolean' if isinstance(val, bool) else u'float'
         question['possible_answers'] = None
-
         questions.append(question)
-
     return questionnaire, questions
 
-
 def import_questionnaire_run(data_dir, questionnaire_id, questions_id):
+    """ Import a questionnaire run """
     run = {}
-
-    behave = json.load(
-        open('%s/behavioural.json' % data_dir))
+    behave = json.load(open('%s/behavioural.json' % data_dir))
     sid = os.path.split(data_dir)[1]
     run['identifier'] = u'localizer_questionnaire_%s' % (sid)
     run['user_ident'] = u'subject'
@@ -304,10 +253,9 @@
     run['completed'] = True
     run['valid'] = True
     run['instance_of'] = questionnaire_id
+    # Answers
     answers = []
-
     values = [behave[k] for k in sorted(behave.keys())]
-
     for i, (val, item) in enumerate(zip(values, sorted(behave.keys()))):
         answer = {}
         # XXX: handle str answers
@@ -316,53 +264,17 @@
             answer['datetime'] = run['datetime']
             answer['question'] = questions_id[item]
             answers.append(answer)
-
     return run, answers
 
 
-def import_center(data_dir):
-    data = {}
-    info = json.load(open('%s/subject.json' % data_dir))
-    data['identifier'] = info['site']
-
-    if info['site'] == u'SHFJ':
-        data['name'] = u'SHFJ'
-        data['department'] = u'Essonne'
-        data['city'] = u'Orsay'
-        data['postal_code'] = 91401
-        data['country'] = u'France'
-    elif info['site'] == u'Neurospin':
-        data['name'] = u'Neurospin'
-        data['department'] = u'Essonne'
-        data['city'] = u'Gif-sur-Yvette'
-        data['postal_code'] = 91191
-        data['country'] = u'France'
-
-    return data
-
-
-def import_device(data_dir):
-    data = {}
-    info = json.load(open('%s/subject.json' % data_dir))
-
-    if info['site'] == u'Neurospin':
-        data['name'] = '3T SIEMENS Trio'
-        data['manufacturer'] = 'SIEMENS'
-        data['model'] = 'Trio'
-        data['hosted_by'] = 'Neurospin'
-    if info['site'] == u'SHFJ':
-        data['name'] = '3T Brucker'
-        data['manufacturer'] = 'Brucker'
-        data['model'] = '3T Brucker'
-        data['hosted_by'] = 'SHFJ'
-
-    return data
-
-
+###############################################################################
+### Genomics entities #########################################################
+###############################################################################
+# XXX These functions may be pushed in helpers, as they may be more general
 def import_genes(genetics_dir):
+    """ Import genes """
     ref_chr_path = os.path.join(genetics_dir, 'chromosomes.json')
-    ref_gene_path = os.path.join(
-        genetics_dir, 'sprintBack', 'hg18.refGene.meta')
+    ref_gene_path = os.path.join(genetics_dir, 'sprintBack', 'hg18.refGene.meta')
     chromosomes = json.load(open(ref_chr_path))
     ref_gene = []
     for row in csv.reader(open(ref_gene_path), delimiter='\t'):
@@ -376,8 +288,8 @@
         ref_gene.append(gene)
     return ref_gene
 
-
 def import_chromosomes(genetics_dir):
+    """ Import chromosomes """
     ref_chr_path = os.path.join(genetics_dir, 'chromosomes.json')
     chromosomes = json.load(open(ref_chr_path))
     chrs = []
@@ -385,43 +297,37 @@
         chr = {}
         chr['name'] = u'chr%s' % chromosomes[chr_id].upper()
         chr['identifier'] = unicode(chr['name'])
-
         chrs.append(chr)
     return chrs
 
-
 def import_snps(genetics_dir):
+    """ Import snps """
     ref_chr_path = os.path.join(genetics_dir, 'chromosomes.json')
-    ref_snp_path = os.path.join(
-        genetics_dir, 'sprintBack', 'bysubj', 'bru3506.bim')
+    ref_snp_path = os.path.join(genetics_dir, 'sprintBack', 'bysubj', 'bru3506.bim')
     chromosomes = json.load(open(ref_chr_path))
     snps = []
     for row in csv.reader(open(ref_snp_path), delimiter='\t'):
         snp = {}
         if row[0] == '0':
             continue
-
         chr_id = chromosomes[row[0]]
-
         snp['rs_id'] = unicode(row[1])
         snp['position'] = int(row[3])
         snp['chromosome'] = u'chr%s' % chr_id.upper()
-
         snps.append(snp)
     return snps
 
-
 def import_genomic_measures(genetics_dir):
+    """ Import a genomic measures """
     measure_path = os.path.join(genetics_dir, 'sprintBack', 'bysubj')
     g_measures = {}
-
     for path in glob.glob(os.path.join(measure_path, '*.bim')):
         subject_id = os.path.split(path)[1].split('.bim')[0]
         genomic_measure = {}
         genomic_measure['identifier'] = u'genomic_measure_%s' % subject_id
         genomic_measure['type'] = u'SNP'
         genomic_measure['format'] = u'plink'
-        genomic_measure['filepath'] = unicode(rel_path(path.split('.bim')[0]))
+        genomic_measure['filepath'] = unicode(path)
         genomic_measure['chip_serialnum'] = None
         genomic_measure['completed'] = True
         genomic_measure['valid'] = True
@@ -430,24 +336,29 @@
     return g_measures
 
 
+###############################################################################
+### MAIN ######################################################################
+###############################################################################
 if __name__ == '__main__':
-    from cubicweb.dataimport import SQLGenObjectStore as Store
-
-    root_dir = '/opt/localizer/localizer/subjects'
-    genetics_dir = '/opt/localizer/localizer/genetics'
-
-    # for sid in glob.glob('%s/*' % root_dir):
-    #     print os.path.split(sid)[1]
-    #     print import_devices(sid)
-    #     print import_center(sid)
-
+    # Create store
+    try:
+        from cubicweb.dataimport import SQLGenObjectStore as Store
+        sqlgen_store = True
+    except ImportError:
+        from cubicweb.dataimport import NoHookRQLObjectStore as Store
+        sqlgen_store = False
     store = Store(session)
 
-    # ---------------------------------------------------------------------
-    # Questionnaire
-    # ---------------------------------------------------------------------
+    root_dir = osp.abspath(sys.argv[4])
+    subjects_dir = osp.join(root_dir, 'subjects')
+    genetics_dir = osp.join(root_dir, 'genetics')
 
-    one_subject = glob.glob('%s/*' % root_dir)[0]
+    ### Study #################################################################
+    study = import_study(data_dir=root_dir)
+    study = store.create_entity('Study', **study)
+
+    ### Initialize questionnaire ##############################################
+    one_subject = glob.glob('%s/*' % subjects_dir)[0]
     questionnaire, questions = import_questionnaire(one_subject)
     questionnaire = store.create_entity('Questionnaire', **questionnaire)
     questions_id = {}
@@ -456,72 +367,58 @@
         question = store.create_entity('Question', **question)
         questions_id[question.text] = question.eid
 
-    score_defs = {}
-    centers = {}
-    devices = {}
-
-    # ---------------------------------------------------------------------
-    # Study
-    # ---------------------------------------------------------------------
-
-    study = import_study(data_dir=root_dir)
-    study = store.create_entity('Study', **study)
-
-    # ---------------------------------------------------------------------
-    # Genetics
-    # ---------------------------------------------------------------------
-
-    print
-    print 'Import genetic data...'
-    print '-' * 80
-
+    ### Initialize genetics ####################################################
+    # Chromosomes
     chrs = import_chromosomes(genetics_dir)
     chr_map = {}
-    for chr in chrs:
-        print 'chr', chr['name']
-        chr = store.create_entity('Chromosome', **chr)
-        chr_map.setdefault(chr['name'], chr.eid)
-
-    print chr_map
-
+    for _chr in chrs:
+        print 'chr', _chr['name']
+        _chr = store.create_entity('Chromosome', **_chr)
+        chr_map.setdefault(_chr['name'], _chr.eid)
+    # Genes
     genes = import_genes(genetics_dir)
     for gene in genes:
         print 'gene', gene['name'], gene['chromosome']
         gene['chromosome'] = chr_map[gene['chromosome']]
         gene = store.create_entity('Gene', **gene)
-
+    # Flush/Commit
+    if sqlgen_store:
+        store.flush()
+    # Snps
     snps = import_snps(genetics_dir)
     snp_eids = []
-    for snp in snps:
+    for ind, snp in enumerate(snps):
         print 'snp', snp['rs_id']
         snp['chromosome'] = chr_map[snp['chromosome']]
         snp = store.create_entity('Snp', **snp)
         snp_eids.append(snp.eid)
-
-    platform = {
-        'identifier': 'Affymetrix_6.0',
-        }
-
+        if ind and ind % 100000 == 0:
+            store.flush()
+    # Flush/Commit
+    if sqlgen_store:
+        store.flush()
+    # Platform
+    platform = {'identifier': 'Affymetrix_6.0'}
     platform = store.create_entity('GenomicPlatform', **platform)
     for snp_eid in snp_eids:
         store.relate(platform.eid, 'related_snps', snp_eid)
 
-
+    ### Genetics measures #####################################################
     gen_measures = import_genomic_measures(genetics_dir)
 
-    print
-    print 'Import subject data...'
-    print '-' * 80
-
-    store.flush()
+    # Flush/Commit
+    if sqlgen_store:
+        store.flush()
 
-    for sid in glob.glob('%s/*' % root_dir):
-        print os.path.split(sid)[1]
+    ###########################################################################
+    ### Subjects ##############################################################
+    ###########################################################################
+    centers, devices, score_defs = {}, {}, {}
+    for sid in glob.glob('%s/*' % subjects_dir):
 
-        # ---------------------------------------------------------------------
-        # Center & devices
-        # ---------------------------------------------------------------------
+        print '-------->', sid, os.path.split(sid)[1]
 
+        # Centers #############################################################
         center = import_center(sid)
         if center['name'] not in centers:
             center = store.create_entity('Center', **center)
@@ -529,9 +426,10 @@
             center_eid = center.eid
         else:
             center_eid = centers[center['name']]
+
+        # Devices #############################################################
         device = import_device(sid)
         device['hosted_by'] = centers[device['hosted_by']]
-
         if device['name'] not in devices:
             device = store.create_entity('Device', **device)
             devices.setdefault(device.name, device.eid)
@@ -539,24 +437,14 @@
         else:
             device_id = devices[device['name']]
 
-        # ---------------------------------------------------------------------
-        # Subject
-        # ---------------------------------------------------------------------
-
+        # Subject #############################################################
         subject, score_values = import_subject(sid)
         subject = store.create_entity('Subject', **subject)
         store.relate(subject.eid, 'related_studies', study.eid)
-
-        dm_res = store.create_entity(
-            'ExternalResource',
-            name=u'design_matrix',
-            filepath=unicode(rel_path(os.path.join(sid, 'design_matrix.json'))))
-
         for score_val in score_values:
             value = score_val['value']
             if not value:
                 continue
-
             if score_val['name'] in score_defs:
                 def_eid = score_defs.get(score_val['name'])
             else:
@@ -567,118 +455,88 @@
                 score_def = store.create_entity('ScoreDef', **score_def)
                 score_defs[score_val['name']] = score_def.eid
                 def_eid = score_def.eid
-
-            score_val = store.create_entity('ScoreValue', definition=def_eid, value=value)
+            score_val = store.create_entity('ScoreValue', definition=def_eid,
+                                            value=value)
             store.relate(subject.eid, 'related_infos', score_val.eid)
 
-        # ---------------------------------------------------------------------
-        # Genetics
-        # ---------------------------------------------------------------------
+        # Design matrix #######################################################
+        dm_res = store.create_entity('ExternalResource',
+                                     name=u'design_matrix',
+                                     related_study=study.eid,
+                                     filepath=unicode(osp.relpath(
+                                         os.path.join(sid, 'design_matrix.json'),
+                                         start=root_dir)))
+
+        # Genetics ############################################################
         gen_assessment = import_assessment(sid, 'genetics')
         gen_assessment = store.create_entity('Assessment', **gen_assessment)
         store.relate(subject.eid, 'concerned_by', gen_assessment.eid)
         measure = gen_measures[subject.identifier]
         measure['platform'] = platform.eid
+        measure['related_study'] = study.eid
+        measure['filepath'] = osp.relpath(measure['filepath'], start=root_dir)
         measure = store.create_entity('GenomicMeasure', **measure)
         store.relate(measure.eid, 'concerns', subject.eid, 'GenomicMeasure')
-        store.relate(gen_assessment.eid, 'generates', measure.eid)
-
+        store.relate(gen_assessment.eid, 'generates', measure.eid, 'Assessment')
 
-        # ---------------------------------------------------------------------
-        # Anat & fMRI
-        # ---------------------------------------------------------------------
-
+        # Anat & fMRI ############################################################
         # anat assessment
         anat_assessment = import_assessment(sid, 'anat')
         anat_assessment = store.create_entity('Assessment', **anat_assessment)
         store.relate(center_eid, 'holds', anat_assessment.eid)
         store.relate(subject.eid, 'concerned_by', anat_assessment.eid)
-
-
-        # raw anat
-        scan_anat, mri_anat = import_anat(sid)
-        mri_anat = store.create_entity('MRIData', **mri_anat)
-        scan_anat['has_data'] = mri_anat.eid
-        scan_anat = store.create_entity('Scan', **scan_anat)
-        store.relate(scan_anat.eid, 'concerns', subject.eid, 'Scan')
-        store.relate(scan_anat.eid, 'uses_device', device_id)
-        store.relate(anat_assessment.eid, 'generates', scan_anat.eid)
-
-        # normalized anat
-        scan_anat, mri_anat = import_anat(sid, True)
-        mri_anat = store.create_entity('MRIData', **mri_anat)
-        scan_anat['has_data'] = mri_anat.eid
-        scan_anat = store.create_entity('Scan', **scan_anat)
-        store.relate(scan_anat.eid, 'concerns', subject.eid, 'Scan')
-        store.relate(scan_anat.eid, 'uses_device', device_id)
-        store.relate(anat_assessment.eid, 'generates', scan_anat.eid)
-
+        for normalized in (False, True):
+            scan_anat, mri_anat = import_neuroimaging(sid, 'anat', normalized)
+            mri_anat = store.create_entity('MRIData', **mri_anat)
+            scan_anat['has_data'] = mri_anat.eid
+            scan_anat['related_study'] = study.eid
+            # Get the relative filepath
+            scan_anat['filepath'] = osp.relpath(scan_anat['filepath'], start=root_dir)
+            scan_anat = store.create_entity('Scan', **scan_anat)
+            store.relate(scan_anat.eid, 'concerns', subject.eid, 'Scan')
+            store.relate(scan_anat.eid, 'uses_device', device_id)
+            store.relate(anat_assessment.eid, 'generates', scan_anat.eid, 'Assessment')
         # fmri assessment
         fmri_assessment = import_assessment(sid, 'fmri')
         fmri_assessment = store.create_entity('Assessment', **fmri_assessment)
         store.relate(center_eid, 'holds', fmri_assessment.eid)
         store.relate(subject.eid, 'concerned_by', fmri_assessment.eid)
-
-        # raw bold
-        scan_fmri, mri_fmri = import_fmri(sid)
-        mri_fmri = store.create_entity('MRIData', **mri_fmri)
-        scan_fmri['has_data'] = mri_fmri.eid
-        scan_fmri = store.create_entity('Scan', **scan_fmri)
-        store.relate(scan_fmri.eid, 'concerns', subject.eid, 'Scan')
-        store.relate(scan_fmri.eid, 'uses_device', device_id)
-        store.relate(fmri_assessment.eid, 'generates', scan_fmri.eid)
-
-        # preproc bold
-        scan_fmri, mri_fmri = import_fmri(sid, True)
-        mri_fmri = store.create_entity('MRIData', **mri_fmri)
-        scan_fmri['has_data'] = mri_fmri.eid
-        scan_fmri = store.create_entity('Scan', **scan_fmri)
-        store.relate(scan_fmri.eid, 'concerns', subject.eid, 'Scan')
-        store.relate(scan_fmri.eid, 'uses_device', device_id)
-        store.relate(fmri_assessment.eid, 'generates', scan_fmri.eid)
-
-        # ---------------------------------------------------------------------
-        # Maps
-        # ---------------------------------------------------------------------
-
-        # c_maps assessment
-        assessment = import_assessment(sid, 'c_maps')
-        assessment = store.create_entity('Assessment', **assessment)
-        store.relate(center_eid, 'holds', assessment.eid)
-        store.relate(subject.eid, 'concerned_by', assessment.eid)
+        for preprocessed in (False, True):
+            scan_fmri, mri_fmri = import_neuroimaging(sid, 'fmri', preprocessed)
+            mri_fmri = store.create_entity('MRIData', **mri_fmri)
+            scan_fmri['has_data'] = mri_fmri.eid
+            scan_fmri['related_study'] = study.eid
+            # Get the relative filepath
+            scan_fmri['filepath'] = osp.relpath(scan_fmri['filepath'], start=root_dir)
+            scan_fmri = store.create_entity('Scan', **scan_fmri)
+            store.relate(scan_fmri.eid, 'concerns', subject.eid, 'Scan')
+            store.relate(scan_fmri.eid, 'uses_device', device_id)
+            store.relate(fmri_assessment.eid, 'generates', scan_fmri.eid, 'Assessment')
 
-        for scan, mri, con_res in import_maps(sid, 'c'):
-            mri = store.create_entity('MRIData', **mri)
-            scan['has_data'] = mri.eid
-            scan = store.create_entity('Scan', **scan)
-            con_res = store.create_entity('ExternalResource', **con_res)
-            store.relate(scan.eid, 'concerns', subject.eid, 'Scan')
-            store.relate(scan.eid, 'uses_device', device_id)
-            store.relate(assessment.eid, 'generates', scan.eid)
-            store.relate(scan.eid, 'external_resources', con_res.eid)
-            store.relate(scan.eid, 'external_resources', dm_res.eid)
+        # c-maps & t-maps #####################################################
+        for dtype, label in (('c', 'c_maps'), ('t', 't_maps')):
+            assessment = import_assessment(sid, label)
+            assessment = store.create_entity('Assessment', **assessment)
+            store.relate(center_eid, 'holds', assessment.eid)
+            store.relate(subject.eid, 'concerned_by', assessment.eid)
+            for scan, mri, con_res in import_maps(sid, dtype):
+                mri = store.create_entity('MRIData', **mri)
+                if con_res:
+                    con_res['related_study'] = study.eid
+                    con_res['filepath'] = osp.relpath(con_res['filepath'], start=root_dir)
+                    con_res = store.create_entity('ExternalResource', **con_res)
+                scan['has_data'] = mri.eid
+                scan['related_study'] = study.eid
+                # Get the relative filepath
+                scan['filepath'] = osp.relpath(scan['filepath'], start=root_dir)
+                scan = store.create_entity('Scan', **scan)
+                store.relate(scan.eid, 'concerns', subject.eid, 'Scan')
+                store.relate(scan.eid, 'uses_device', device_id)
+                store.relate(assessment.eid, 'generates', scan.eid, 'Assessment')
+                store.relate(scan.eid, 'external_resources', con_res.eid)
+                store.relate(scan.eid, 'external_resources', dm_res.eid)
 
-        # t_maps assessment
-        assessment = import_assessment(sid, 't_maps')
-        assessment = store.create_entity('Assessment', **assessment)
-        store.relate(center_eid, 'holds', assessment.eid)
-        store.relate(subject.eid, 'concerned_by', assessment.eid)
-
-        for scan, mri, con_res in import_maps(sid, 't'):
-            mri = store.create_entity('MRIData', **mri)
-            con_res = store.create_entity('ExternalResource', **con_res)
-            scan['has_data'] = mri.eid
-            scan = store.create_entity('Scan', **scan)
-            store.relate(scan.eid, 'concerns', subject.eid, 'Scan')
-            store.relate(scan.eid, 'uses_device', device_id)
-            store.relate(assessment.eid, 'generates', scan.eid)
-            store.relate(scan.eid, 'external_resources', con_res.eid)
-            store.relate(scan.eid, 'external_resources', dm_res.eid)
-
-        # ---------------------------------------------------------------------
-        # Mask
-        # ---------------------------------------------------------------------
-
+        # mask ################################################################
         assessment = import_assessment(sid, 'mask')
         assessment = store.create_entity('Assessment', **assessment)
         store.relate(center_eid, 'holds', assessment.eid)
@@ -686,29 +544,30 @@
         scan, mri = import_mask(sid)
         mri = store.create_entity('MRIData', **mri)
         scan['has_data'] = mri.eid
+        scan['related_study'] = study.eid
+        # Get the relative filepath
+        scan['filepath'] = osp.relpath(scan['filepath'], start=root_dir)
         scan = store.create_entity('Scan', **scan)
         store.relate(scan.eid, 'concerns', subject.eid, 'Scan')
         store.relate(scan.eid, 'uses_device', device_id)
-        store.relate(assessment.eid, 'generates', scan.eid)
+        store.relate(assessment.eid, 'generates', scan.eid, 'Assessment')
 
-        # ---------------------------------------------------------------------
-        # Questionnaire run
-        # ---------------------------------------------------------------------
-
+        # Questionnaire run ###################################################
         assessment = import_assessment(sid, 'questionnaire')
         assessment = store.create_entity('Assessment', **assessment)
         store.relate(center_eid, 'holds', assessment.eid)
         store.relate(subject.eid, 'concerned_by', assessment.eid)
-
         run, answers = import_questionnaire_run(sid, questionnaire.eid, questions_id)
+        run['related_study'] = study.eid
         run = store.create_entity('QuestionnaireRun', **run)
-
+        # Answers
         for answer in answers:
             answer['questionnaire_run'] = run.eid
             answer = store.create_entity('Answer', **answer)
+        store.relate(run.eid, 'concerns', subject.eid, 'QuestionnaireRun')
+        store.relate(assessment.eid, 'generates', run.eid, 'Assessment')
 
-        store.relate(run.eid, 'concerns', subject.eid, 'QuestionnaireRun')
-        store.relate(assessment.eid, 'generates', run.eid)
-
-    store.flush()
+    # Flush/Commit
+    if sqlgen_store:
+        store.flush()
     store.commit()
--- a/importers/mindboogle.py	Wed Jan 09 16:40:59 2013 +0100
+++ b/importers/mindboogle.py	Thu Dec 20 10:06:20 2012 +0100
@@ -49,7 +49,8 @@
                     'label': u'labels' if 'labels' in image_id else u'anatomy',
                     'format': u'nii.gz',
                     'type': u'normalized T1',
-                    'filepath': unicode(osp.relpath(image)),
+                    'related_study': study.eid,
+                    'filepath': unicode(osp.relpath(image, start=base_path)),
                     'valid': True, 'completed': True, 'description': 'Image from Mindboggle'}
             mri_data = store.create_entity('MRIData', sequence=u'T1')
             # MRI data
@@ -59,7 +60,7 @@
             anat['has_data'] = mri_data.eid
             anat = store.create_entity('Scan', **anat)
             store.relate(anat.eid, 'concerns', subject.eid, 'Scan')
-            store.relate(assessment.eid, 'generates', anat.eid)
+            store.relate(assessment.eid, 'generates', anat.eid, 'Assessment')
         count_subject += 1
     # Flush
     store.flush()
--- a/importers/openfmri.py	Wed Jan 09 16:40:59 2013 +0100
+++ b/importers/openfmri.py	Thu Dec 20 10:06:20 2012 +0100
@@ -34,7 +34,7 @@
         study_id = study_path[:-1].split('/')[-1]
         print '--->', study_id
         # XXX use True metadata for studies, P.I. ...
-        study = store.create_entity('Study', data_filepath=unicode(osp.abspath(study_path)),
+        study = store.create_entity('Study', data_filepath=unicode(osp.abspath(base_path)),
                                     name = u'openfmri-%s' % study_id, keywords='openfmri')
         # Device
         device_path = osp.join(study_path, 'scan_key.txt')
@@ -50,7 +50,8 @@
         task_eid = None
         if osp.exists(task_path):
             ext_resource = store.create_entity('ExternalResource', name=u'task_key',
-                                               filepath=unicode(osp.abspath(task_path)))
+                                               related_study=study.eid,
+                                               filepath=unicode(osp.relpath(task_path, start=base_path)))
             task_eid = ext_resource.eid
         # Model contrasts and condition_key
         model_path = osp.join(study_path, 'models')
@@ -61,7 +62,8 @@
                 file_id = model.split('/')[-1]
                 ext_resource = store.create_entity('ExternalResource',
                                                    name=unicode(osp.splitext(file_id)[0]),
-                                                   filepath=unicode(osp.abspath(file_id)))
+                                                   related_study=study.eid,
+                                                   filepath=unicode(osp.relpath(model, start=base_path)))
                 model_files.setdefault(model_id, []).append(ext_resource.eid)
         # Demographics
         demographics_path = osp.join(study_path, 'demographics.txt')
@@ -93,7 +95,8 @@
                         'label': u'anatomy',
                         'format': u'nii.gz',
                         'type': u'normalized T1',
-                        'filepath': unicode(osp.relpath(image)),
+                        'related_study': study.eid,
+                        'filepath': unicode(osp.relpath(image, start=base_path)),
                         'valid': True, 'completed': True, 'description': 'Image from OpenFmri'}
                 mri_data = store.create_entity('MRIData', sequence=u'T1')
                 # MRI data
@@ -105,7 +108,7 @@
                 if device_eid:
                     store.relate(anat.eid, 'uses_device', device_eid)
                 store.relate(anat.eid, 'concerns', subject.eid, 'Scan')
-                store.relate(assessment.eid, 'generates', anat.eid)
+                store.relate(assessment.eid, 'generates', anat.eid, 'Assessment')
             # Model
             models = {}
             for model in glob.iglob(osp.join(base_path, subject_path, 'model/*')):
@@ -116,7 +119,8 @@
                     name = u'_'.join((subject_id, type_id, task_id))
                     ext_resource = store.create_entity('ExternalResource',
                                                        name=name,
-                                                       filepath=unicode(osp.abspath(model_path)))
+                                                       related_study=study.eid,
+                                                       filepath=unicode(osp.relpath(model_path, start=base_path)))
                     models.setdefault(task_id, []).append(ext_resource.eid)
             # Behav
             behavs = {}
@@ -127,7 +131,8 @@
                     name = u'_'.join((subject_id, type_id, task_id))
                     ext_resource = store.create_entity('ExternalResource',
                                                        name=name,
-                                                       filepath=unicode(osp.abspath(behav_path)))
+                                                       related_study=study.eid,
+                                                       filepath=unicode(osp.relpath(behav_path, start=base_path)))
                     behavs.setdefault(task_id, []).append(ext_resource.eid)
             # Bold
             for run in glob.iglob(osp.join(base_path, subject_path, 'BOLD/*')):
@@ -141,7 +146,8 @@
                         'label': u'anatomy',
                         'format': u'nii.gz',
                         'type': u'raw fMRI',
-                        'filepath': unicode(osp.relpath(run+'/bold.nii.gz')),
+                        'related_study': study.eid,
+                        'filepath': unicode(osp.relpath(run+'/bold.nii.gz', start=base_path)),
                         'valid': True, 'completed': True, 'description': 'Image from OpenFmri'}
                 mri_data = store.create_entity('MRIData', sequence=u'T1')
                 # MRI data
@@ -151,7 +157,7 @@
                 anat['has_data'] = mri_data.eid
                 anat = store.create_entity('Scan', **anat)
                 store.relate(anat.eid, 'concerns', subject.eid, 'Scan')
-                store.relate(assessment.eid, 'generates', anat.eid)
+                store.relate(assessment.eid, 'generates', anat.eid, 'Assessment')
                 if device_eid:
                     store.relate(anat.eid, 'uses_device', device_eid)
                 for eid in models.get(task_id, []):