From ccd5d1bc1a978ff803aa713ee4d33215adebb07b Mon Sep 17 00:00:00 2001 From: Marcel Zwiers Date: Mon, 16 Sep 2024 09:31:13 +0200 Subject: [PATCH] First (non-working) version of the new BidsMap, Format and Datatype classes --- bidscoin/bcoin.py | 12 +- bidscoin/bids.py | 2410 ++++++++++++------------ bidscoin/bidsapps/fixmeta.py | 8 +- bidscoin/bidscoiner.py | 32 +- bidscoin/bidseditor.py | 592 +++--- bidscoin/bidsmapper.py | 108 +- bidscoin/heuristics/schema.json | 2 +- bidscoin/plugins/README | 6 +- bidscoin/plugins/dcm2niix2bids.py | 122 +- bidscoin/plugins/nibabel2bids.py | 70 +- bidscoin/plugins/spec2nii2bids.py | 56 +- bidscoin/utilities/bidsparticipants.py | 10 +- docs/CHANGELOG.md | 2 +- docs/plugins.rst | 6 +- tests/test_bids.py | 4 +- tests/test_bidsmapper.py | 4 +- 16 files changed, 1812 insertions(+), 1632 deletions(-) diff --git a/bidscoin/bcoin.py b/bidscoin/bcoin.py index 1aa694d5..34614e27 100755 --- a/bidscoin/bcoin.py +++ b/bidscoin/bcoin.py @@ -443,7 +443,7 @@ def test_plugin(plugin: Union[Path,str], options: dict) -> int: Performs runtime tests of the plug-in :param plugin: The name of the plugin that is being tested - :param options: A dictionary with the plugin options, e.g. taken from the bidsmap['Options']['plugins'][plugin.stem] + :param options: A dictionary with the plugin options, e.g. taken from the bidsmap.plugins[plugin.stem] :return: The result of the plugin test routine (e.g. 0 if it passed or 1 if there was a general plug-in error) """ @@ -493,12 +493,12 @@ def test_bidsmap(bidsmapfile: str): bidsmapfile = Path(bidsmapfile) if bidsmapfile.is_dir(): bidsmapfile = bidsmapfile/'code'/'bidscoin'/'bidsmap.yaml' - bidsmap, _ = bids.load_bidsmap(bidsmapfile, checks=(True, True, True)) + bidsmap = bids.BidsMap(bidsmapfile, checks=(True, True, True)) - return bids.validate_bidsmap(bidsmap, 1) + return bidsmap.validate(1) -def test_bidscoin(bidsmapfile: Union[Path,dict], options: dict=None, testplugins: bool=True, testgui: bool=True, testtemplate: bool=True) -> int: +def test_bidscoin(bidsmapfile: Path, options: dict=None, testplugins: bool=True, testgui: bool=True, testtemplate: bool=True) -> int: """ Performs a bidscoin installation test @@ -521,7 +521,7 @@ def test_bidscoin(bidsmapfile: Union[Path,dict], options: dict=None, testplugins LOGGER.info(f"Running bidsmap checks:") try: # Moving the import to the top of this module will cause circular import issues from bidscoin import bids - bidsmap, _ = bids.load_bidsmap(bidsmapfile, checks=(True, True, False)) + bidsmap = bids.BidsMap(bidsmapfile, checks=(True, True, False)) except Exception as bidsmaperror: LOGGER.error(f"An error occurred when loading {bidsmapfile}:\n{bidsmaperror}\nThis may be due to invalid YAML syntax. You can check this using a YAML validator (e.g. https://www.yamllint.com)") bidsmap = {'Options': {}} @@ -533,7 +533,7 @@ def test_bidscoin(bidsmapfile: Union[Path,dict], options: dict=None, testplugins if testtemplate: try: # Moving the import to the top of this module will cause circular import issues from bidscoin import bids - success = bids.check_template(bidsmap) and success + success = bidsmap.check_template() and success except ImportError: LOGGER.info(f"Could not fully test: {bidsmap}") diff --git a/bidscoin/bids.py b/bidscoin/bids.py index 7f203cb1..3147e0d7 100644 --- a/bidscoin/bids.py +++ b/bidscoin/bids.py @@ -21,7 +21,9 @@ from functools import lru_cache from pathlib import Path from typing import List, Set, Tuple, Union, Dict, Any, Iterable, NewType -from pydicom import dcmread, fileset, datadict + +from numpy.f2py.f90mod_rules import options +from pydicom import dcmread, fileset from importlib.util import find_spec if find_spec('bidscoin') is None: import sys @@ -34,14 +36,12 @@ # Define custom data types (replace with proper classes or TypeAlias of Python >= 3.10) Plugin = NewType('Plugin', Dict[str, Any]) -Options = NewType('Options', Dict[str, Dict[str, Any]]) +Plugins = NewType('Plugin', Dict[str, Plugin]) +Options = NewType('Options', Dict[str, Any]) Properties = NewType('Properties', Dict[str, Any]) Attributes = NewType('Attributes', Dict[str, Any]) Bids = NewType('Bids', Dict[str, Any]) Meta = NewType('Meta', Dict[str, Any]) -Run = NewType('Run', Dict[str, Any]) # Any = Union[Provenance, Properties, Attributes, Bids, Meta, DataSource]]) but we cannot yet refer to DataSource -Dataformat = NewType('Dataformat', Dict[str, Union[str, List[Run]]]) -Bidsmap = NewType('Bidsmap', Dict[str, Union[Options, Dataformat]]) LOGGER = logging.getLogger(__name__) @@ -61,7 +61,7 @@ entities = yaml.load(_stream) "The descriptions of the entities present in BIDS filenames" with (schemafolder/'objects'/'extensions.yaml').open('r') as _stream: - extensions = [val['value'] for key,val in yaml.load(_stream).items() if val['value'] not in ('.json','.tsv','.bval','.bvec')] + extensions = [val['value'] for key,val in yaml.load(_stream).items() if val['value'] not in ('.json','.tsv','.bval','.bvec') and '/' not in val['value']] "The possible extensions of BIDS data files" with (schemafolder/'rules'/'entities.yaml').open('r') as _stream: entitiesorder = yaml.load(_stream) @@ -72,73 +72,87 @@ class DataSource: - """ - A source data type (e.g. DICOM or PAR) that can be converted to BIDS by the plugins - - :param provenance: The full path of a representative file for this data source - :param plugins: The plugins that are used to interact with the source data type. Uses bidsmap['Options']['plugins'] format - :param dataformat: The dataformat name in the bidsmap, e.g. DICOM or PAR - :param datatype: The intended BIDS data type of the data source TODO: move to a separate BidsTarget/Mapping class - :param subprefix: The subprefix used in the sourcefolder - :param sesprefix: The sesprefix used in the sourcefolder - """ + """Reads properties, attributes and BIDS-related features to sourcefiles of a supported dataformat (e.g. DICOM or PAR)""" - def __init__(self, provenance: Union[str, Path]='', plugins: Dict[str, Plugin]=None, dataformat: str='', datatype: str='', subprefix: str='', sesprefix: str=''): - self.path: Path = Path(provenance) - self.datatype: str = datatype - self.dataformat: str = dataformat - self.plugins = plugins - if not plugins: - self.plugins = {} - if not dataformat: - self.is_datasource - self.subprefix: str = subprefix - self.sesprefix: str = sesprefix - self._cache: dict = {} + def __init__(self, sourcefile: Union[str, Path]='', plugins: Plugins=None, dataformat: str='', options: Options=None): + """ + Reads the properties and attributes from a source data file + + :param sourcefile: The full filepath of the data source + :param plugins: The plugin dictionaries with their options + :param dataformat: The dataformat name in the bidsmap, e.g. DICOM or PAR + :param options: A (bidsmap) dictionary with 'subprefix' and 'sesprefix' fields + """ + + self.path = Path(sourcefile) + """The full path of a representative file for this data source""" + self.dataformat = dataformat + """The dataformat name of the plugin that interacts with the data source, e.g. DICOM or PAR""" + self.plugins = plugins or {} + """The plugins that are used to interact with the source data type""" + self.subprefix = options['subprefix'] or '' + """The subprefix used in the sourcefolder""" + self.sesprefix = options['sesprefix'] or '' + """The sesprefix used in the sourcefolder""" + self._cache = {} + + def __eq__(self, other): + """Equality test for all DataSource attributes""" + + if isinstance(other, DataSource): + return (( self.path, self.dataformat, self.plugins, self.subprefix, self.sesprefix) == + (other.path, other.dataformat, other.plugins, other.subprefix, other.sesprefix)) + else: + return NotImplemented def __repr__(self): - return (f"Path:\t\t{self.path}\n" - f"Datatype:\t{self.datatype}\n" + + return (f"{self.__class__}\n" + f"Path:\t\t{self.path}\n" f"Dataformat:\t{self.dataformat}\n" f"Plugins:\t{self.plugins}\n" f"Subprefix:\t{self.subprefix}\n" f"Sesprefix:\t{self.sesprefix}") + def __str__(self): + + return f"[{self.dataformat}] {self.path}" + @property def resubprefix(self) -> str: """Returns the subprefix with escaped regular expression characters (except '-'). A single '*' wildcard is returned as ''""" + return '' if self.subprefix=='*' else re.escape(self.subprefix).replace(r'\-','-') @property def resesprefix(self) -> str: """Returns the sesprefix with escaped regular expression characters (except '-'). A single '*' wildcard is returned as ''""" + return '' if self.sesprefix=='*' else re.escape(self.sesprefix).replace(r'\-','-') - @property - def is_datasource(self) -> bool: - """Returns True is the datasource has a valid dataformat""" + def has_plugin(self) -> bool: + """Test whether the datasource has a plugin that supports the sourcefile. If so, then update self.dataformat accordingly""" if not self.path.is_file() or self.path.is_dir(): return False for plugin, options in self.plugins.items(): - module = bcoin.import_plugin(plugin, ('is_sourcefile',)) + module = bcoin.import_plugin(plugin, ('has_support',)) if module: try: - dataformat = module.is_sourcefile(self.path) + supported = module.has_support(self.path, self.dataformat) except Exception as moderror: - dataformat = '' + supported = '' LOGGER.exception(f"The {plugin} plugin crashed while reading {self.path}\n{moderror}") - if dataformat: - self.dataformat: str = dataformat + if supported: + if self.dataformat and self.dataformat != supported: + LOGGER.debug(f"Inconsistent dataformat found, updating: {self.dataformat} -> {supported}") + self.dataformat: str = supported return True - if self.datatype: - LOGGER.verbose(f"No plugins found that can read {self.datatype}: {self.path}") - return False - def properties(self, tagname: str, run: Run=None) -> Union[str, int]: + def properties(self, tagname: str, run=None) -> Union[str, int]: """ Gets the 'filepath[:regex]', 'filename[:regex]', 'filesize' or 'nrfiles' filesystem property. The filepath (with trailing "/") and filename can be parsed using an optional regular expression re.findall(regex, filepath/filename). The last match is returned @@ -341,6 +355,1124 @@ def dynamicvalue(self, value, cleanup: bool=True, runtime: bool=False): return value +class RunItem: + """ + Reads and writes to/from a YAML runitem dictionary, i.e. the provenance string, the properties and attributies input + dictionaries and the bids and meta output dictionaries (bidsmap > dataformat > datatype > run-item) + """ + + def __init__(self, dataformat: str='', datatype: str='', data: dict=None, datasource: DataSource=None, options: Options=None, plugins: Plugins=None): + """ + Create a run-item with the proper structure, provenance info and a data source. NB: Updates to the attributes traverse to the + datasource, but not vice versa + + :param dataformat: The name of the dataformat + :param datatype: The name of the datatype + :param data: The YAML run-item dictionary with the following keys: provenance, properties, attributes, bids, meta + :param datasource: A data source that is deepcopied and added to the object, otherwise a datasource is created from data['provenance'] + :param options: The dictionary with the BIDScoin options + :param plugins: The plugin dictionaries with their options + """ + + super().__setattr__('_data', data or {'provenance': '', 'properties': {}, 'attributes': {}, 'meta': {}}) + + # Set the regular attributes + self.datasource = datasource = copy.deepcopy(datasource) if datasource else DataSource(self.provenance, plugins, dataformat, options) + """The DataSource object that is deepcopied or created from the run-item provenance""" + datasource.subprefix = options['subprefix'] if options else datasource.subprefix + datasource.sesprefix = options['sesprefix'] if options else datasource.sesprefix + self.dataformat = dataformat or datasource.dataformat + """The name of the dataformat""" + self.datatype = datatype + """The name of the datatype""" + self.options = options + """The dictionary with the BIDScoin options""" + self.plugins = plugins or datasource.plugins + """The plugin dictionaries with their options""" + + # Set the default data attributes + self.provenance = data['provenance'] if data else str(datasource.path) + """The file path of the data source""" + self.properties = Properties({'filepath': '', 'filename': '', 'filesize': '', 'nrfiles': None}) + """The file system properties from the data source that can be matched against other data sources""" + self.attributes = Attributes({}) + """The (header) attributes from the data source that can be matched against other data sources""" + self.bids = Bids({'suffix': ''}) + """The BIDS output dictionary (used for construting the BIDS filename)""" + self.meta = Meta({}) + """The meta output dictionary (will be appended to the json sidecar file)""" + + # Populate the data attributes with the given data (except provenance, which was already stored above) + for key in data or []: + if key != 'provenance': + setattr(self, key, copy.copy(data[key])) # This should remove the YAML comments and bounds + + def __getattr__(self, name: str): + + _name = f"_{name}" + _getattr = super().__getattribute__ + + return _getattr('_data')[name] if name in _getattr('_data') else _getattr(_name) + + def __setattr__(self, name, value): + + _name = f"_{name}" + _getattr = super().__getattribute__ + _setattr = super().__setattr__ + + if name in _getattr('_data'): + _getattr('_data')[name] = value + else: + _setattr(_name, value) + + # Also update the twin attributes of the datasource (should never happen anyway) + if name == 'dataformat': + self.datasource.dataformat = value or '' + if name == 'plugins': + self.datasource.plugins = value or {} + + def __str__(self): + + return f"[{self.dataformat}/{self.datatype}] {self.provenance}" + + def __repr__(self): + + return (f"{self.__class__}\n" + f"Datasource:\t{self.datasource}\n" + f"Dataformat:\t{self.dataformat}\n" + f"Datatype:\t{self.datatype}\n" + f"Provenance:\t{self.provenance}\n" + f"Properties:\t{self.properties}\n" + f"Attributes:\t{self.attributes}\n" + f"Bids:\t\t{self.bids}\n" + f"Meta:\t\t{self.meta}") + + def __eq__(self, other): + """A deep test for the RunItem attributes and YAML data""" + + if isinstance(other, RunItem): + return (self.dataformat, self.datatype, self._data) == (other.dataformat, other.datatype, other._data) + else: + return NotImplemented + + def check(self, datatype: str, checks: Tuple[bool, bool, bool]=(False, False, False)) -> Tuple[Union[bool, None], Union[bool, None], Union[bool, None]]: + """ + Check run for required and optional entities using the BIDS schema files + + :param datatype: The datatype that is checked, e.g. 'anat' + :param checks: Booleans to report if all (bidskeys, bids-suffixes, bids-values) in the run are present according to the BIDS schema specifications + :return: True/False if the keys, suffixes or values are bids-valid or None if they cannot be checked + """ + + run_keysok = None + run_suffixok = None + run_valsok = None + datatype = str(datatype) # To support DataType objects + + # Check if we have provenance info + if all(checks) and not self.provenance: + LOGGER.info(f"No provenance info found for {datatype}/*_{self.bids['suffix']}") + + # Check if we have a suffix and datatype rules + if 'suffix' not in self.bids: + if checks[1]: LOGGER.warning(f'Invalid bidsmap: The {datatype} "suffix" key is missing ({datatype} -> {self.provenance})') + return run_keysok, False, run_valsok # The suffix is not BIDS-valid, we cannot check the keys and values + if datatype not in datatyperules: + return run_keysok, run_suffixok, run_valsok # We cannot check anything + + # Use the suffix to find the right typegroup + suffix = self.bids.get('suffix') + if self.datasource.path.is_file(): + suffix = self.datasource.dynamicvalue(suffix, True, True) + for typegroup in datatyperules[datatype]: + + if '<' not in suffix or '>' not in suffix: + run_suffixok = False # We can now check the suffix + + if suffix in datatyperules[datatype][typegroup]['suffixes']: + + run_keysok = True # We can now check the key + run_suffixok = True # The suffix is valid + run_valsok = True # We can now check the value + + # Check if all expected entity-keys are present in the run and if they are properly filled + for entity in datatyperules[datatype][typegroup]['entities']: + entitykey = entities[entity]['name'] + entityformat = entities[entity]['format'] # E.g. 'label' or 'index' (the entity type always seems to be 'string') + bidsvalue = self.bids.get(entitykey) + dynamicvalue = True if isinstance(bidsvalue, str) and ('<' in bidsvalue and '>' in bidsvalue) else False + if entitykey in ('sub', 'ses'): continue + if isinstance(bidsvalue, list): + bidsvalue = bidsvalue[bidsvalue[-1]] # Get the selected item + if entitykey not in self.bids: + if checks[0]: LOGGER.warning(f'Invalid bidsmap: The "{entitykey}" key is missing ({datatype}/*_{self.bids["suffix"]} -> {self.provenance})') + run_keysok = False + if bidsvalue and not dynamicvalue and bidsvalue!=sanitize(bidsvalue): + if checks[2]: LOGGER.warning(f'Invalid {entitykey} value: "{bidsvalue}" ({datatype}/*_{self.bids["suffix"]} -> {self.provenance})') + run_valsok = False + elif not bidsvalue and datatyperules[datatype][typegroup]['entities'][entity]=='required': + if checks[2]: LOGGER.warning(f'Required "{entitykey}" value is missing ({datatype}/*_{self.bids["suffix"]} -> {self.provenance})') + run_valsok = False + if bidsvalue and not dynamicvalue and entityformat=='index' and not str(bidsvalue).isdecimal(): + if checks[2]: LOGGER.warning(f'Invalid {entitykey}-index: "{bidsvalue}" is not a number ({datatype}/*_{self.bids["suffix"]} -> {self.provenance})') + run_valsok = False + + # Check if all the bids-keys are present in the schema file + entitykeys = [entities[entity]['name'] for entity in datatyperules[datatype][typegroup]['entities']] + for bidskey in self.bids: + if bidskey not in entitykeys + ['suffix']: + if checks[0]: LOGGER.warning(f'Invalid bidsmap: The "{bidskey}" key is not allowed according to the BIDS standard ({datatype}/*_{self.bids["suffix"]} -> {self.provenance})') + run_keysok = False + if run_valsok: run_valsok = None + + break + + # Hack: There are physio, stim and events entities in the 'task'-rules, which can be added to any datatype + if suffix in datatyperules['task']['events']['suffixes'] + datatyperules['task']['timeseries']['suffixes']: + bidsname = self.bidsname(validkeys=False, runtime=self.datasource.path.is_file()) + run_suffixok = bids_validator.BIDSValidator().is_bids(f"/sub-foo/{datatype}/{bidsname}.json") # NB: Using the BIDSValidator sounds nice but doesn't give any control over the BIDS-version + run_valsok = run_suffixok + LOGGER.bcdebug(f"bidsname={run_suffixok}: /sub-foo/{datatype}/{bidsname}.json") + + if checks[0] and run_keysok in (None, False): + LOGGER.bcdebug(f'Invalid "{run_keysok}" key-checks in run-item: "{self.bids["suffix"]}" ({datatype} -> {self.provenance})\nRun["bids"]:\n{self.bids}') + + if checks[1] and run_suffixok is False: + LOGGER.warning(f'Invalid run-item with suffix: "{self.bids["suffix"]}" ({datatype} -> {self.provenance})') + LOGGER.bcdebug(f"Run['bids']:\n{self.bids}") + + if checks[2] and run_valsok in (None, False): + LOGGER.bcdebug(f'Invalid "{run_valsok}" val-checks in run-item: "{self.bids["suffix"]}" ({datatype} -> {self.provenance})\nRun["bids"]:\n{self.bids}') + + return run_keysok, run_suffixok, run_valsok + + def strip_suffix(self): + """ + Certain attributes such as SeriesDescriptions (but not ProtocolName!?) may get a suffix like '_SBRef' from the vendor, + try to strip it off from the BIDS entities + """ + + # See if we have a suffix for this datatype + bids = self.bids + if 'suffix' in bids and bids['suffix']: + suffix = bids['suffix'].lower() + else: + return + + # See if any of the BIDS labels ends with the same suffix. If so, then remove it + for key in bids: + if key == 'suffix': + continue + if isinstance(bids[key], str) and bids[key].lower().endswith(suffix): + bids[key] = bids[key][0:-len(suffix)] # NB: This will leave the added '_' and '.' characters, but they will be taken out later (as they are not BIDS-valid) + + def bidsname(self, subid: str='unknown', sesid: str='', validkeys: bool=False, runtime: bool=False, cleanup: bool=True) -> str: + """ + Composes a filename as it should be according to the BIDS standard using the BIDS keys in run. The bids values are + dynamically updated and cleaned, and invalid bids keys and empty bids values are ignored + + :param subid: The subject identifier, i.e. name of the subject folder (e.g. 'sub-001' or just '001') + :param sesid: The optional session identifier, i.e. name of the session folder (e.g. 'ses-01' or just '01'). Can be left empty + :param validkeys: Removes non-BIDS-compliant bids-keys if True + :param runtime: Replaces dynamic bidsvalues if True + :param cleanup: Sanitizes non-BIDS-compliant characters from the filename if True + :return: The composed BIDS file-name (without file-extension) + """ + + # Try to update the sub/ses-ids + subid = re.sub(f'^sub-', '', subid) + sesid = re.sub(f'^ses-', '', sesid) if sesid else '' # Catch sesid = None + if cleanup: + subid = sanitize(subid) + sesid = sanitize(sesid) + + # Compose the bidsname + bidsname = f"sub-{subid}{'_ses-'+sesid if sesid else ''}" # Start with the subject/session identifier + entitiekeys = [entities[entity]['name'] for entity in entitiesorder] # Use the valid keys from the BIDS schema + if not validkeys: # Use the (ordered valid + invalid) keys from the run item + entitiekeys = [key for key in entitiekeys if key in self.bids] + \ + [key for key in self.bids if key not in entitiekeys and key!='suffix'] + for entitykey in entitiekeys: + bidsvalue = self.bids.get(entitykey) # Get the entity data from the run item + if not bidsvalue: + bidsvalue = '' + if isinstance(bidsvalue, list): + bidsvalue = bidsvalue[bidsvalue[-1]] # Get the selected item + elif runtime and not (entitykey=='run' and (bidsvalue.replace('<','').replace('>','').isdecimal() or bidsvalue == '<<>>')): + bidsvalue = self.datasource.dynamicvalue(bidsvalue, cleanup=True, runtime=runtime) + if cleanup: + bidsvalue = sanitize(bidsvalue) + if bidsvalue: + bidsname = f"{bidsname}_{entitykey}-{bidsvalue}" # Append the key-value data to the bidsname + suffix = self.bids.get('suffix') + if runtime: + suffix = self.datasource.dynamicvalue(suffix, runtime=runtime) + if cleanup: + suffix = sanitize(suffix) + bidsname = f"{bidsname}{'_'+suffix if suffix else ''}" # And end with the suffix + + return bidsname + + def increment_runindex(self, outfolder: Path, bidsname: str, scans_table: pd.DataFrame=None, targets: Set[Path]=()) -> str: + """ + Checks if a file with the same bidsname already exists in the folder and then increments the dynamic runindex + (if any) until no such file is found. + + NB: For <<>> runs, if the run-less file already exists, then add 'run-2' to bidsname and rename run-less files + to 'run-1', and, optionally, do the same for entries in scans_table and targets (i.e. keep them in sync) + + :param outfolder: The full pathname of the bids output folder + :param bidsname: The bidsname with a provisional runindex, e.g. from get_bidsname() + :param scans_table The scans.tsv table that need to remain in sync when renaming a run-less file + :param targets: The set of output targets that need to remain in sync when renaming a run-less file + :return: The bidsname with the original or incremented runindex + """ + + # Check input + runval = str(self.bids.get('run') or '') + if not (runval.startswith('<<') and runval.endswith('>>') and (runval.replace('<','').replace('>','').isdecimal() or runval == '<<>>')): + return bidsname + bidsext = ''.join(Path(bidsname).suffixes) + bidsname = bidsname.split('.')[0] + + # Make an inventory of the runs + runless_name = insert_bidskeyval(bidsname, 'run', '', False) + run1_name = insert_bidskeyval(bidsname, 'run', '1', False) + runless_files = list(outfolder.glob(f"{runless_name}.*")) + run1_files = list(outfolder.glob(f"{run1_name}.*")) + + # Start incrementing from run-1 if we have already renamed runless to run-1 + if run1_files and runval == '<<>>': + bidsname = run1_name + + # Increment the run-index if the bidsfile already exists until that's no longer the case + while list(outfolder.glob(f"{bidsname}.*")): # The run already exists -> increment the run-index + runindex = get_bidsvalue(bidsname, 'run') or '1' # If run-less -> identify as existing run-1 + bidsname = insert_bidskeyval(bidsname, 'run', str(int(runindex) + 1), False) + + # Rename run-less to run-1 when dealing with a new run-2 + if runless_files and get_bidsvalue(bidsname, 'run') == '2': + + # Check if everything is OK + if runless_files and run1_files: + LOGGER.error(f"File already exists, cannot rename {outfolder/runless_name}.* -> {run1_name}.*") + return bidsname + bidsext + + # Rename run-less to run-1 + for runless_file in runless_files: + LOGGER.verbose(f"Found run-2 files for <<>> index, renaming\n{runless_file} -> {run1_name}") + run1_file = (outfolder/run1_name).with_suffix(''.join(runless_file.suffixes)) + runless_file.replace(run1_file) + if runless_file in targets: + targets.remove(runless_file) + targets.add(run1_file) + run1_scan = f"{run1_file.parent.name}/{run1_file.name}" # NB: as POSIX + runless_scan = f"{runless_file.parent.name}/{runless_file.name}" # NB: as POSIX + if scans_table is not None and runless_scan in scans_table.index: + scans_table.rename(index={runless_scan: run1_scan}, inplace=True) + + return bidsname + bidsext + + +class DataType: + """Reads and writes to/from a YAML datatype dictionary (bidsmap > dataformat > datatype)""" + + def __init__(self, dataformat: str, datatype: str, data: list, options: Options, plugins: Plugins): + """ + Reads from a YAML datatype dictionary + + :param dataformat: The name of the dataformat + :param datatype: The name of the datatype + :param data: The YAML datatype dictionary, i.e. a list of runitems + :param options: The dictionary with the BIDScoin options + :param plugins: The plugin dictionaries with their options + """ + + self.dataformat = dataformat + """The name of the dataformat""" + self.datatype = datatype + """The name of the datatype""" + self.options = options + """The dictionary with the BIDScoin options""" + self.plugins = plugins + """The plugin dictionaries with their options""" + self._data = data or [] + """The YAML datatype dictionary, i.e. a list of runitems""" + + def __str__(self): + + return f"{self.datatype}" # NB: Changing this likely breaks DataType support + + def __repr__(self): + + return f"{self.__class__} {self.datatype} ({len(self.runitems)})" + + def __eq__(self, other): + """A shallow test if the DataType name is equal (so irrespective whether their runitems differ)""" + + if isinstance(other, Union[DataType, str]): + return str(self) == str(other) + else: + return NotImplemented + + @property + def runitems(self) -> List[RunItem]: + """Returns a list of the RunItem objects for this datatype""" + + return [RunItem(self.dataformat, self.datatype, rundata, None, self.options, self.plugins) for rundata in self._data] + + def delete_run(self, provenance: str): + """ + Delete a run-item from the datatype section + + :param provenance: The provenance identifier of/or the run-item that is deleted + """ + + for index, runitem in enumerate(self.runitems): + if Path(runitem.provenance) == Path(provenance): + del self._data[index] + return + + LOGGER.error(f"Could not find (and delete) this [{self.dataformat}][{self.datatype}] run: '{provenance}") + + def delete_runs(self): + """Delete all run-items from the datatype section""" + + self._data = [] + + def insert_run(self, runitem: RunItem, position: int=None): + """ + Inserts a run-item to the DataType + + :param runitem: The run item that is appended to the list of run items of its datatype + :param position: The position at which the run is inserted. The run is appended at the end if position is None + """ + + self._data.insert(len(self._data) if position is None else position, runitem) + + def replace_run(self, runitem: RunItem): + """ + Replaces the existing run-item with the same provenance with a new run-item + + :param runitem: The new run-item + """ + + for index, run in enumerate(self._data): + if Path(run.provenance) == Path(runitem.provenance): + self._data[index] = runitem + return + + LOGGER.error(f"Could not replace {runitem} because it could not be found") + + +class DataFormat: + """Reads and writes to/from a YAML dataformat dictionary (bidsmap > dataformat)""" + + def __init__(self, dataformat: str, data: dict, options: Options, plugins: Plugins): + """ + Reads from a YAML dataformat dictionary + + :param dataformat: The name of the dataformat + :param data: The YAML dataformat dictionary, i.e. subject and session items + a set of datatypes + :param options: The dictionary with the BIDScoin options + :param plugins: The plugin dictionaries with their options + """ + + # Initialize the getter/setter data dictionary + self.__dict__['_data'] = {} + + self.dataformat = dataformat + """The name of the dataformat""" + self.options = options + """The dictionary with the BIDScoin options""" + self.plugins = plugins + """The plugin dictionaries with their options""" + self._data = data + """The YAML dataformat dictionary, i.e. subject and session items + a set of datatypes""" + + def __str__(self): + + return f"{self.dataformat}" # NB: Changing this likely breaks DataFormat support + + def __repr__(self): + + datatypes = '\n'.join([f"\t{repr(dtype)}" for dtype in self.datatypes]) + return f"{self.__class__} {self.dataformat}\n{datatypes}" + + def __eq__(self, other): + """A shallow test if the DataFormat name is equal (so irrespective whether their datatypes differ)""" + + if isinstance(other, Union[DataFormat, str]): + return str(self) == str(other) + else: + return NotImplemented + + @property + def subject(self) -> str: + """The regular expression for extracting the subject identifier""" + + return self._data['subject'] + + @subject.setter + def subject(self, value: str): + + self._data['subject'] = value + + @property + def session(self) -> str: + """The regular expression for extracting the session identifier""" + + return self._data['session'] + + @session.setter + def session(self, value: str): + self._data['session'] = value + + @property + def datatypes(self) -> List[DataType]: + """Gets a list of DataType objects for the dataformat""" + + return [DataType(self.dataformat, datatype, self._data[datatype], self.options, self.plugins) for datatype in self._data if datatype not in ('subject', 'session')] + + def datatype(self, datatype: Union[str, DataType]) -> DataType: + """Gets the DataType object for the dataformat""" + + return DataType(self.dataformat, str(datatype), self._data.get(str(datatype),[]), self.options, self.plugins) + + def add_datatype(self, datatype: Union[str, DataType]): + """Adds a new empty datatype item to the dataformat""" + + datatype = str(datatype) + if datatype not in self._data: + self._data[datatype] = [] + else: + LOGGER.debug(f"The {self.dataformat} dataformat already contains the datatype '{datatype}'") + + +class BidsMap: + """Reads and writes mapping heuristics from the bidsmap YAML-file""" + + def __init__(self, yamlfile: Path, folder: Path=templatefolder, plugins: Iterable[Union[Path,str]]=(), checks: Tuple[bool,bool,bool]=(True,True,True)): + """ + Read and standardize the bidsmap (i.e. add missing information and perform checks). If yamlfile is not fullpath, then 'folder' is first searched before + the default 'heuristics'. If yamfile is empty, then first 'bidsmap.yaml' is searched for, then 'bidsmap_template'. So fullpath + has precedence over folder and bidsmap.yaml has precedence over the bidsmap_template. + + :param yamlfile: The full pathname or basename of the bidsmap yaml-file + :param folder: Used when yamlfile=basename and not in the pwd: yamlfile is then assumed to be in the (bidscoin)folder. A bidsignore file in folder will be added to the bidsmap bidsignore items + :param plugins: List of plugins to be used (with default options, overrules the plugin list in the study/template bidsmaps). Leave empty to use all plugins in the bidsmap + :param checks: Booleans to check if all (bidskeys, bids-suffixes, bids-values) in the run are present according to the BIDS schema specifications + """ + + # Initialize the getter/setter data dictionary + self.__dict__['_data'] = {} + + # Input checking + self.plugins = plugins = plugins or {} + """The plugins that are used to interact with the source data type""" + self.store = {} + """The in- and output folders for storing samples in the provenance store (NB: this is set by bidsmapper)""" + if not yamlfile.suffix: + yamlfile = yamlfile.with_suffix('.yaml') # Add a standard file-extension if needed + if len(yamlfile.parents) == 1 and not yamlfile.is_file(): + yamlfile = folder/yamlfile # Get the full path to the bidsmap yaml-file + yamlfile = yamlfile.resolve() + self.filepath = yamlfile if yamlfile.is_file() else Path() # Can be used to signal that the bidsmap is empty + """The full path to the bidsmap yaml-file""" + if not yamlfile.is_file(): + if yamlfile.name: LOGGER.info(f"No existing bidsmap file found: {yamlfile}") + return + + # Read the heuristics from the bidsmap file + if any(checks): + LOGGER.info(f"Reading: {yamlfile}") + with yamlfile.open('r') as stream: + bidsmap = yaml.load(stream) + self._data = bidsmap + """The raw YAML data""" + + # Issue a warning if the version in the bidsmap YAML-file is not the same as the bidscoin version + bidsmapversion = self.options.get('version', 'Unknown') + if bidsmapversion.rsplit('.', 1)[0] != __version__.rsplit('.', 1)[0] and any(checks): + LOGGER.warning(f'BIDScoiner version conflict: {yamlfile} was created with version {bidsmapversion}, but this is version {__version__}') + elif bidsmapversion != __version__ and any(checks): + LOGGER.info(f'BIDScoiner version difference: {yamlfile} was created with version {bidsmapversion}, but this is version {__version__}. This is normally OK but check the https://bidscoin.readthedocs.io/en/latest/CHANGELOG.html') + + # Make sure subprefix and sesprefix are strings + subprefix = self.options['subprefix'] = self.options['subprefix'] or '' + sesprefix = self.options['sesprefix'] = self.options['sesprefix'] or '' + + # Append the existing .bidsignore data from the bidsfolder and make sure bidsignore, unknowntypes, ignoretypes and notderivative are lists + if isinstance(self.options.get('bidsignore'), str): + self.options['bidsignore'] = self.options['bidsignore'].split(';') + bidsignorefile = folder.parents[1]/'.bidsignore' + if bidsignorefile.is_file(): + self.options['bidsignore'] = list(set(list(self.options['bidsignore']) + bidsignorefile.read_text().splitlines())) + self.options['bidsignore'] = list(set(self.options.get('bidsignore') or [])) + self.options['unknowntypes'] = list(set(self.options.get('unknowntypes') or [])) + self.options['ignoretypes'] = list(set(self.options.get('ignoretypes') or [])) + self.options['notderivative'] = list(set(self.options.get('notderivative') or [])) + + # Make sure we get a proper plugin options and dataformat sections (use plugin default bidsmappings when a template bidsmap is loaded) + if plugins: + for plugin in [plugin for plugin in self.plugins if plugin not in plugins]: + del self.plugins[plugin] + for plugin in plugins if plugins else self.plugins: + module = bcoin.import_plugin(plugin) + if not self.plugins.get(plugin): + LOGGER.info(f"Adding default bidsmap options from the {plugin} plugin") + self.plugins[plugin] = module.OPTIONS if 'OPTIONS' in dir(module) else {} + if 'BIDSMAP' in dir(module) and yamlfile.parent == templatefolder: + for dataformat, datasection in module.BIDSMAP.items(): + if dataformat not in bidsmap: + LOGGER.info(f"Adding default bidsmappings from the {plugin} plugin") + bidsmap[dataformat] = datasection + + self.dataformats = [DataFormat(dataformat, bidsmap[dataformat], self.options, self.plugins) for dataformat in bidsmap if dataformat not in ('$schema', 'Options')] + """Gets a list of the DataFormat objects in the bidsmap (e.g. DICOM)""" + + # Add missing provenance info, run dictionaries and bids entities + runitem_ = RunItem() + for dataformat in self.dataformats: + for datatype in dataformat.datatypes: + for index, runitem in enumerate(datatype.runitems or []): + + # Add missing provenance info + if not runitem.provenance: + runitem.provenance = str(Path(f"{subprefix.replace('*', '')}unknown/{sesprefix.replace('*', '')}unknown/{dataformat}_{datatype}_id{index + 1:03}")) + + # Update the provenance store paths if needed (e.g. when the bids-folder was moved) + provenance = Path(runitem.provenance) + if not provenance.is_file(): + for n, part in enumerate(provenance.parts): + if part == 'bidscoin' and provenance.parts[n + 1] == 'provenance': + store = folder/provenance.relative_to(*provenance.parts[0:n + 1]) + if store.is_file(): + LOGGER.bcdebug(f"Updating provenance: {provenance} -> {store}") + runitem.provenance = str(store) + + # Add default data dictionaries if they are missing (e.g. "meta" or "properties") + for attr in ('properties', 'attributes', 'bids', 'meta'): + datadict = getattr(runitem, attr) + for key, val in getattr(runitem_, attr).items(): + if not datadict.get(key): + datadict[key] = val + + # Add missing bids entities + suffix = runitem.bids.get('suffix') + if runitem.datasource.has_plugin(): + suffix = runitem.datasource.dynamicvalue(suffix, True, True) + for typegroup in datatyperules.get(datatype.datatype, {}): # E.g. typegroup = 'nonparametric' + if suffix in datatyperules[datatype.datatype][typegroup]['suffixes']: # run_found = True + for entity in datatyperules[datatype.datatype][typegroup]['entities']: + entitykey = entities[entity]['name'] + if entitykey not in runitem.bids and entitykey not in ('sub', 'ses'): + LOGGER.info(f"Adding missing {dataformat}>{datatype}>{suffix} bidsmap entity key: {entitykey}") + runitem.bids[entitykey] = '' + if entitykey == 'part' and not isinstance(runitem.bids['part'], list): + if runitem.bids['part'] in ('', 'mag', 'phase', 'real', 'imag', None): + runitem.bids['part'] = ['', 'mag', 'phase', 'real', 'imag', ('', 'mag', 'phase', 'real', 'imag').index(runitem.bids['part'] or '')] + else: + runitem.bids['part'] = ['', 'mag', 'phase', 'real', 'imag', runitem.bids['part'], 5] + + # Validate the bidsmap entries + self.check(checks) + + def __str__(self): + + return f"{self.filepath}" + + def __repr__(self): + + dataformats = '\n'.join([f"{repr(dformat)}" for dformat in self.dataformats]) + return (f"{self.__class__}\n" + f"Filepath:\t{self.filepath}\n" + f"Plugins:\t{[plugin for plugin in self.plugins]}\n" + f"Dataformats:\n{dataformats}") + + def __iter__(self): + + return iter(self.dataformats) + + @property + def options(self) -> Options: + """The dictionary with the BIDScoin options""" + + return self._data['Options']['bidscoin'] + + @options.setter + def options(self, options: dict): + self._data['Options']['bidscoin'] = options + + @property + def plugins(self) -> Plugins: + """The plugin dictionaries with their options""" + + return self._data['Options']['plugins'] + + @plugins.setter + def plugins(self, plugins: Plugins): + if 'Options' not in self._data: + self._data['Options'] = {} + self._data['Options']['plugins'] = plugins + + def dataformat(self, dataformat: str) -> DataFormat: + """Gets the DataFormat object from the bidsmap""" + + return DataFormat(dataformat, self._data.get(dataformat,{}), self.options, self.plugins) + + def save(self, filename: Path=None): + """ + Save the BIDSmap as a YAML text file + + :param filename: Full pathname of the bidsmap file (otherwise the existing filename will be used) + """ + + # Validate the bidsmap entries + self.check((False, True, True)) + self.validate(0) + + filename = filename or self.filepath + filename.parent.mkdir(parents=True, exist_ok=True) + LOGGER.info(f"Writing bidsmap to: {filename}") + with filename.open('w') as stream: + yaml.dump(self._data, stream) + + def validate(self, level: int=1) -> bool: + """ + Test the bidsname of runs in the bidsmap using the bids-validator + + :param level: (-2) as 2 but no logging reports, + (-1) as 1 but no logging reports, + (0) as 1 but only report invalid runs, + (1) test only BIDS datatypes, i.e. datatypes not in `.bidsignore` or `ignoretypes`, + (2) test all converted datatypes, i.e. datatypes not in `ignoretypes`, + (3) test all datatypes + :return: True if all tested runs in bidsmap were bids-valid, otherwise False + """ + + valid = True + ignoretypes = self.options.get('ignoretypes', []) + bidsignore = self.options.get('bidsignore', []) + + # Test all the runs in the bidsmap + LOGGER.info(f"bids-validator {bids_validator.__version__} test results (* = in .bidsignore):") + for dataformat in self.dataformats: + for datatype in dataformat.datatypes: + for runitem in datatype.runitems: + bidsname = runitem.bidsname(f"sub-{sanitize(dataformat)}", '', False) + ignore = check_ignore(datatype, bidsignore) or check_ignore(bidsname+'.json', bidsignore, 'file') + ignore_1 = datatype in ignoretypes or ignore + ignore_2 = datatype in ignoretypes + for ext in extensions: # NB: `ext` used to be '.json', which is more generic (but see https://github.com/bids-standard/bids-validator/issues/2113) + if bidstest := bids_validator.BIDSValidator().is_bids(f"/sub-{sanitize(dataformat)}/{datatype}/{bidsname}{ext}"): break + if level==3 or (abs(level)==2 and not ignore_2) or (-2 Tuple[Union[bool, None], Union[bool, None], Union[bool, None]]: + """ + Check all non-ignored runs in the bidsmap for required and optional entities using the BIDS schema files + + :param checks: Booleans to check if all (bids-keys, bids-suffixes, bids-values) in the run are present according to the BIDS schema specifications + :return: False if the keys, suffixes or values are proven to be invalid, otherwise None or True + """ + + results = (None, None, None) + + if not any(checks): + return results + + # Check all the runs in the bidsmap + LOGGER.info('Checking the bidsmap run-items:') + for dataformat in self.dataformats: + for datatype in dataformat.datatypes: + if datatype in self.options['ignoretypes']: continue # E.g. 'exclude' + if check_ignore(datatype, self.options['bidsignore']): continue + if datatype.runitems and results == (None, None, None): + results = (True, True, True) # We can now check the bidsmap + for runitem in datatype.runitems: + bidsname = runitem.bidsname(validkeys=False) + if check_ignore(bidsname+'.json', self.options['bidsignore'], 'file'): continue + isvalid = runitem.check(datatype, checks) + results = [result and valid for result, valid in zip(results, isvalid)] + + if all([result is True for result, check in zip(results, checks) if check is True]): + LOGGER.success('All run-items in the bidsmap are valid') + elif any([result is False for result, check in zip(results, checks) if check is True]): + LOGGER.warning('Not all run-items in the bidsmap are valid') + else: + LOGGER.verbose('Could not validate every run-item in the bidsmap') + + return results + + def check_template(self) -> bool: + """ + Check all the datatypes in the template bidsmap for required and optional entities using the BIDS schema files + + :return: True if the template bidsmap is valid, otherwise False + """ + + valid = True + ignoretypes = self.options.get('ignoretypes', []) + bidsignore = self.options.get('bidsignore', []) + + # Check all the datatypes in the bidsmap + LOGGER.verbose('Checking the template bidsmap datatypes:') + for dataformat in self.dataformats: + for datatype in dataformat.datatypes: + if not (datatype in bidsdatatypesdef or datatype in ignoretypes or check_ignore(datatype, bidsignore)): + LOGGER.warning(f"Invalid {dataformat} datatype: '{datatype}' (you may want to add it to the 'bidsignore' list)") + valid = False + if datatype in ignoretypes: continue + datatypesuffixes = [] + for runitem in datatype.runitems: + datatypesuffixes.append(runitem.bids['suffix']) + for key, val in runitem.attributes.items(): + try: + re.compile(str(val)) + except re.error: + LOGGER.warning(f"Invalid regex pattern in the {key} value '{val}' in: {runitem}\nThis may cause run-matching errors unless '{val}' is a literal attribute value") + for typegroup in datatyperules.get(datatype.datatype, {}): + for suffix in datatyperules[datatype.datatype][typegroup]['suffixes']: + if not (suffix in datatypesuffixes or suffix in str(bidsignore) or + '[DEPRECATED]' in suffixes[suffix]['description'] or + '**Change:** Removed from' in suffixes[suffix]['description'] or + '**Change:** Replaced by' in suffixes[suffix]['description']): + LOGGER.warning(f"Missing '{suffix}' run-item in: bidsmap[{dataformat}][{datatype}] (NB: this may be fine / a deprecated item)") + valid = False + + # Validate against the json schema + with (templatefolder/'schema.json').open('r') as stream: + schema = json.load(stream) + try: + jsonschema.validate(self._data, schema) + except jsonschema.ValidationError as bidsmaperror: + LOGGER.warning(f"Invalid template bidsmap:\n{bidsmaperror}") + valid = False + + if valid: + LOGGER.success('All datatypes and options in the template bidsmap are valid') + else: + LOGGER.warning('Not all datatypes and options in the template bidsmap are valid') + + return valid + + def dir(self, dataformat: Union[str, DataFormat]) -> List[Path]: + """ + Make a provenance list of all the runs in the bidsmap[dataformat] + + :param dataformat: The information source in the bidsmap that is used, e.g. 'DICOM' + :return: List of all provenances + """ + + provenance = [] + for datatype in self.dataformat(str(dataformat)).datatypes: + for runitem in datatype.runitems: + if not runitem.provenance: + LOGGER.warning(f'The bidsmap run {datatype} run does not contain provenance data') + else: + provenance.append(Path(runitem.provenance)) + + provenance.sort() + + return provenance + + def exist_run(self, runitem: RunItem, datatype: Union[str, DataType]='') -> bool: + """ + Checks the bidsmap to see if there is already an entry in runlist with the same properties and attributes as in the input run + + :param runitem: The run-item that is searched for in the bidsmap + :param datatype: The datatype that is searched in, e.g. 'anat'. Empty values will search through all datatypes + :return: True if the run exists in runlist, otherwise False + """ + + datatype = str(datatype) + + # Search recursively + if not datatype: + for dtype in self.dataformat(runitem.dataformat).datatypes: + if self.exist_run(runitem, dtype): + return True + + if datatype not in self.dataformat(runitem.dataformat).datatypes: + return False + + for run in self.dataformat(runitem.dataformat).datatype(datatype).runitems: + + # Begin with match = True unless all properties and attributes are empty + match = any([getattr(run, attr)[key] not in (None,'') for attr in ('properties','attributes') for key in getattr(run, attr)]) + + # TODO: Test if the run has more attributes than the runitem + + # Test if all properties and attributes of the runitem match with the run + for attr in ('properties', 'attributes'): + for itemkey, itemvalue in getattr(runitem, attr).items(): + value = getattr(run, attr).get(itemkey) # Matching labels which exist in one datatype but not in the other -> None + match = match and match_runvalue(itemvalue, value) + if not match: + break # There is no point in searching further within the run now that we've found a mismatch + + # Stop searching if we found a matching runitem (i.e. which is the case if match is still True after all run tests) + if match: + return True + + return False + + def get_matching_run(self, datasource: DataSource, runtime=False) -> Tuple[RunItem, str]: + """ + Find the first run in the bidsmap with properties and attributes that match with the data source. Only non-empty + properties and attributes are matched, except when runtime is True, then the empty attributes are also matched. + The datatypes are searched for in this order: + + ignoredatatypes (e.g. 'exclude') -> normal bidsdatatypes (e.g. 'anat') -> unknowndatatypes (e.g. 'extra_data') + + :param datasource: The data source from which the attributes are read. NB: The datasource.datatype attribute is updated + :param runtime: Dynamic <> are expanded if True + :return: (run, provenance) The returned run has all its attributes populated with the source file attributes. + If there is a match, the provenance of the bidsmap entry is returned, otherwise it will be ''. + NB: The run._data dictionary is NOT bounded to the bidsmap._data dictionary! + """ + + unknowndatatypes = self.options.get('unknowntypes') or [] + ignoredatatypes = self.options.get('ignoretypes') or [] + bidsdatatypes = [dtype.datatype for dtype in self.dataformat(datasource.dataformat).datatypes if dtype not in unknowndatatypes + ignoredatatypes] + run_ = RunItem('', '', {}, datasource, self.options, self.plugins) + """The cleanly populated output run item""" + + # Loop through all datatypes and runs; all info goes cleanly into run_ (to avoid formatting problem of the CommentedMap) + if 'fmap' in bidsdatatypes: + bidsdatatypes.insert(0, bidsdatatypes.pop(bidsdatatypes.index('fmap'))) # Put fmap at the front (to catch inverted polarity scans first + for datatype in ignoredatatypes + bidsdatatypes + unknowndatatypes: # The ordered datatypes in which a matching run is searched for + + for run in self.dataformat(datasource.dataformat).datatype(datatype).runitems: + + # Begin with match = True unless all properties and attributes are empty + match = any([getattr(run, attr)[key] not in (None,'') for attr in ('properties','attributes') for key in getattr(run, attr)]) + run_ = RunItem('', datatype, {}, datasource, self.options, self.plugins) # (Re)populate the run item + + # Test if the data source matches all the non-empty run-item properties, but do not populate them + for propkey, propvalue in run.properties.items(): + + if propvalue: + sourcevalue = datasource.properties(propkey) + match = match and match_runvalue(sourcevalue, propvalue) + + # Do not populate the empty attribute with the info from the sourcefile but keep the matching expression + run_.properties[propkey] = propvalue + + # Test if the data source matches all the run-item attributes and populate all of them + for attrkey, attrvalue in run.attributes.items(): + + # Check if the attribute value matches with the info from the sourcefile + sourcevalue = datasource.attributes(attrkey, validregexp=True) + if attrvalue or runtime: + match = match and match_runvalue(sourcevalue, attrvalue) + + # Populate the empty attribute with the info from the sourcefile + run_.attributes[attrkey] = sourcevalue + + # Try to fill the bids-labels + for bidskey, bidsvalue in run.bids.items(): + + # NB: bidsvalue can be a (mutable) list + bidsvalue = copy.copy(bidsvalue) + + # Replace the dynamic bids values, except the dynamic run-index (e.g. <<>>) + if bidskey == 'run' and bidsvalue and (bidsvalue.replace('<','').replace('>','').isdecimal() or bidsvalue == '<<>>'): + run_.bids[bidskey] = bidsvalue + else: + run_.bids[bidskey] = datasource.dynamicvalue(bidsvalue, runtime=runtime) + + # SeriesDescriptions (and ProtocolName?) may get a suffix like '_SBRef' from the vendor, try to strip it off + run_ = run_.strip_suffix() + + # Try to fill the meta-data + for metakey, metavalue in run.meta.items(): + + # NB: metavalue can be a (mutable) list + metavalue = copy.copy(metavalue) + + # Replace the dynamic meta values, except the IntendedFor value (e.g. <>) + if metakey == 'IntendedFor': + run_.meta[metakey] = metavalue + elif metakey in ('B0FieldSource', 'B0FieldIdentifier') and fnmatch(str(metavalue), '*<>*'): + run_.meta[metakey] = metavalue + else: + run_.meta[metakey] = datasource.dynamicvalue(metavalue, cleanup=False, runtime=runtime) + + # Stop searching the bidsmap if we have a match + if match: + LOGGER.bcdebug(f"Found bidsmap match: {run} -> {run_}") + return run_, run.provenance + + # We don't have a match (all tests failed, so datatype should be the *last* one, e.g. unknowndatatype) + LOGGER.bcdebug(f"Found no bidsmap match for: {run_.provenance}") + if run_.datatype not in unknowndatatypes: + LOGGER.warning(f"Datatype was expected to be in {unknowndatatypes}, instead it is {run_.datatype} -> {run_.provenance}") + return run_, '' + + def get_run(self, datatype: Union[str, DataType], suffix_idx: Union[int, str], datasource: DataSource) -> RunItem: + """ + Find the (first) run in bidsmap[dataformat][bidsdatatype] with run.bids['suffix_idx'] == suffix_idx + + :param datatype: The datatype in which a matching run is searched for (e.g. 'anat') + :param suffix_idx: The name of the suffix that is searched for (e.g. 'bold') or the datatype index number + :param datasource: The datasource with the provenance file from which the properties, attributes and dynamic values are read + :return: The clean (filled) run item in the bidsmap[dataformat][bidsdatatype] with the matching suffix_idx, + otherwise an empty dict + """ + + datatype = str(datatype) + + for index, runitem in enumerate(self.dataformat(datasource.dataformat).datatype(datatype).runitems): + if index == suffix_idx or runitem.bids['suffix'] == suffix_idx: + + for propkey, propvalue in runitem.properties.items(): + runitem.properties[propkey] = propvalue + + for attrkey, attrvalue in runitem.attributes.items(): + if datasource.path.name: + runitem.attributes[attrkey] = datasource.attributes(attrkey, validregexp=True) + else: + runitem.attributes[attrkey] = attrvalue + + # Replace the dynamic bids values, except the dynamic run-index (e.g. <<>>) + for bidskey, bidsvalue in runitem.bids.items(): + + # NB: bidsvalue can be a (mutable) list + bidsvalue = copy.copy(bidsvalue) + if bidskey == 'run' and bidsvalue and (bidsvalue.replace('<','').replace('>','').isdecimal() or bidsvalue == '<<>>'): + runitem.bids[bidskey] = bidsvalue + else: + runitem.bids[bidskey] = datasource.dynamicvalue(bidsvalue) + + # Replace the dynamic meta values, except the IntendedFor value (e.g. <>) + for metakey, metavalue in runitem.meta.items(): + + # NB: metavalue can be a (mutable) list + metavalue = copy.copy(metavalue) + if metakey == 'IntendedFor': + runitem.meta[metakey] = metavalue + elif metakey in ('B0FieldSource', 'B0FieldIdentifier') and fnmatch(str(metavalue), '*<>*'): + runitem.meta[metakey] = metavalue + else: + runitem.meta[metakey] = datasource.dynamicvalue(metavalue, cleanup=False) + + return runitem + + LOGGER.error(f"A '{datatype}' run with suffix_idx '{suffix_idx}' cannot be found in bidsmap['{datasource.dataformat}']") + + def find_run(self, provenance: str, dataformat: Union[str, DataFormat]='', datatype: Union[str, DataType]='') -> RunItem: + """ + Find the (first) run in bidsmap[dataformat][bidsdatatype] with run.provenance == provenance + + :param provenance: The unique provenance that is used to identify the run + :param dataformat: The dataformat section in the bidsmap in which a matching run is searched for, e.g. 'DICOM'. Otherwise, all dataformats are searched + :param datatype: The datatype in which a matching run is searched for (e.g. 'anat'). Otherwise, all datatypes are searched + :return: The (unfilled) run item from the bidsmap[dataformat][bidsdatatype] + """ + + dataformat = str(dataformat) + datatype = str(datatype) + + for dataformat in [self.dataformat(dataformat)] if dataformat else self.dataformats: + datatypes = [dataformat.datatype(datatype)] if datatype else dataformat.datatypes + for dtype in datatypes: + for runitem in dtype.runitems: + if Path(runitem.provenance) == Path(provenance): + return runitem + + LOGGER.bcdebug(f"Could not find this [{dataformat}][{datatype}] run: '{provenance}") + + def delete_run(self, provenance: Union[RunItem, str], datatype: Union[str, DataType]='', dataformat: Union[str, DataFormat]=''): + """ + Delete the first matching run from the BIDS map + + :param provenance: The provenance identifier of/or the run-item that is deleted + :param datatype: The datatype that of the deleted runitem, e.g. 'anat' + :param dataformat: The dataformat section in the bidsmap in which the run is deleted, e.g. 'DICOM'. Otherwise, + all dataformat sections searched for + """ + + if isinstance(provenance, str): + runitem = self.find_run(provenance, dataformat, datatype) + else: + runitem = provenance + provenance = runitem.provenance + if not runitem.provenance: + return + + dformat = self.dataformat(str(dataformat) or runitem.dataformat) + dtype = dformat.datatype(str(datatype) or runitem.datatype) + dtype.delete_run(provenance) + + def insert_run(self, runitem: RunItem, position: int=None): + """ + Inserts a run-item to the BIDS map (e.g. allowing you to insert a run-item from another bidsmap). + Optionally, a copy of the datasource is stored in the provenance store + + :param runitem: The (cleaned orphan) run item that is appended to the list of run items of its datatype + :param position: The position at which the run is inserted. The run is appended at the end if position is None + """ + + # Work from the provenance store if given (the store source and target are set during bidsmapper runtime) + if self.store and runitem.datasource.path.is_file(): + targetfile = self.store['target']/runitem.datasource.path.relative_to(self.store['source']) + if not targetfile.is_file(): + targetfile.parent.mkdir(parents=True, exist_ok=True) + LOGGER.verbose(f"Storing a copy of the discovered sample: {targetfile}") + runitem.provenance = str(shutil.copyfile(runitem.datasource.path, targetfile)) + + # Insert the run item + self.dataformat(runitem.dataformat).add_datatype(runitem.datatype) # Add the datatype if it doesn't exist + self.dataformat(runitem.dataformat).datatype(runitem.datatype).insert_run(runitem, position) + + def update(self, source_datatype: Union[str, DataType], runitem: RunItem): + """ + Update the BIDS map if the runitem datatype has changed: + 1. Remove the runitem from the old datatype section + 2. Append the (cleaned and deepcopied) runitem to its new datatype section + + Else: + 1. Use the provenance to look up its index number + 2. Replace the runitem + + :param source_datatype: The old datatype, e.g. 'anat' + :param runitem: The run item that is being moved to its new runitem.datatype + """ + + dataformat = runitem.dataformat + new_datatype = runitem.datatype + num_runs_in = len(self.dir(dataformat)) + + # Warn the user if the target run already exists when the run is moved to another datatype + if source_datatype != new_datatype: + if self.exist_run(runitem, new_datatype): + LOGGER.error(f'The "{source_datatype}" run already exists in {new_datatype}...') + + # Delete the source run + self.delete_run(runitem, source_datatype) + + # Append the (cleaned-up) target run + self.insert_run(runitem) + + else: + self.dataformat(dataformat).datatype(new_datatype).replace_run(runitem) + + num_runs_out = len(self.dir(dataformat)) + if num_runs_out != num_runs_in: + LOGGER.error(f"Number of runs in bidsmap['{dataformat}'] changed unexpectedly: {num_runs_in} -> {num_runs_out}") + + def unpack(sesfolder: Path, wildcard: str='', workfolder: Path='', _subprefix: Union[str,None]='') -> Tuple[Set[Path], bool]: """ Unpacks and sorts DICOM files in sourcefolder to a temporary folder if sourcefolder contains a DICOMDIR file or .tar.gz, .gz or .zip files @@ -508,7 +1640,7 @@ def get_parfiles(folder: Path) -> List[Path]: return parfiles -def get_datasource(sourcedir: Path, plugins: Dict[str, Plugin], recurse: int=8) -> DataSource: +def get_datasource(sourcedir: Path, plugins: Plugins, recurse: int=8) -> DataSource: """Gets a data source from the sourcedir inputfolder and its recursive subfolders""" datasource = DataSource() @@ -840,487 +1972,79 @@ def get_sparfield(tagname: str, sparfile: Path) -> Union[str, int]: except (IOError, OSError): LOGGER.warning(f"Cannot read {tagname} from {sparfile}") - except Exception as sparerror: - LOGGER.warning(f"Could not parse {tagname} from {sparfile}\n{sparerror}") - - # Cast the dicom data type to int or str (i.e. to something that yaml.dump can handle) - if isinstance(value, int): - return int(value) - elif value is None: - return '' - else: - return str(value) # If it's a MultiValue type then flatten it - - -# Profiling shows this is currently the most expensive function, therefore the (primitive but effective) cache optimization -_P7HDR_CACHE = _P7FILE_CACHE = None -@lru_cache(maxsize=65536) -def get_p7field(tagname: str, p7file: Path) -> Union[str, int]: - """ - Extracts the field value from the P-file header - - :param tagname: Name of the SPAR field - :param p7file: The full pathname of the P7 file - :return: Extracted tag-values from the P7 file - """ - - global _P7HDR_CACHE, _P7FILE_CACHE - - value = '' - if not p7file.is_file(): - LOGGER.error(f"{p7file} not found") - - else: - try: - if p7file != _P7FILE_CACHE: - - from spec2nii.GE.ge_read_pfile import Pfile - - hdr = Pfile(p7file).hdr - _P7HDR_CACHE = hdr - _P7FILE_CACHE = p7file - else: - hdr = _P7HDR_CACHE - - value = getattr(hdr, tagname, '') - if type(value) == bytes: - try: value = value.decode('UTF-8') - except UnicodeDecodeError: pass - - except ImportError: - LOGGER.warning(f"The extra `spec2nii` library could not be found or was not installed (see the BIDScoin install instructions)") - - except (IOError, OSError): - LOGGER.warning(f'Cannot read {tagname} from {p7file}') - - except Exception as p7error: - LOGGER.warning(f'Could not parse {tagname} from {p7file}\n{p7error}') - - # Cast the dicom data type to int or str (i.e. to something that yaml.dump can handle) - if isinstance(value, int): - return int(value) - elif value is None: - return '' - else: - return str(value) # If it's a MultiValue type then flatten it - - -# TODO: A number of functions below this point are bidsmap related. Make a class out of them - - -def load_bidsmap(yamlfile: Path=Path(), folder: Path=templatefolder, plugins:Iterable[Union[Path,str]]=(), checks: Tuple[bool, bool, bool]=(True, True, True)) -> Tuple[Bidsmap, Path]: - """ - Read the mapping heuristics from the bidsmap yaml-file. If yamlfile is not fullpath, then 'folder' is first searched before - the default 'heuristics'. If yamfile is empty, then first 'bidsmap.yaml' is searched for, then 'bidsmap_template'. So fullpath - has precedence over folder and bidsmap.yaml has precedence over the bidsmap_template. - - NB: A run['datasource'] = DataSource object is added to every run-item - - :param yamlfile: The full pathname or basename of the bidsmap yaml-file - :param folder: Used when yamlfile=basename and not in the pwd: yamlfile is then assumed to be in the (bidscoin)folder. A bidsignore file in folder will be added to the bidsmap bidsignore items - :param plugins: List of plugins to be used (with default options, overrules the plugin list in the study/template bidsmaps). Leave empty to use all plugins in the bidsmap - :param checks: Booleans to check if all (bidskeys, bids-suffixes, bids-values) in the run are present according to the BIDS schema specifications - :return: Tuple with (1) ruamel.yaml dict structure, with all options, BIDS mapping heuristics, labels and attributes, etc. and (2) the fullpath yaml-file - """ - - # Input checking - if not yamlfile.name: - yamlfile = Path('bidsmap.yaml') - if not yamlfile.suffix: - yamlfile = yamlfile.with_suffix('.yaml') # Add a standard file-extension if needed - if len(yamlfile.parents) == 1 and not yamlfile.is_file(): - yamlfile = folder/yamlfile # Get the full path to the bidsmap yaml-file - yamlfile = yamlfile.resolve() - if not yamlfile.is_file(): - LOGGER.info(f"No existing bidsmap file found: {yamlfile}") - return Bidsmap({}), yamlfile - bidsignorefile = folder.parents[1]/'.bidsignore' - - # Read the heuristics from the bidsmap file - if any(checks): - LOGGER.info(f"Reading: {yamlfile}") - with yamlfile.open('r') as stream: - bidsmap = Bidsmap(yaml.load(stream)) - - # Issue a warning if the version in the bidsmap YAML-file is not the same as the bidscoin version - if 'bidscoin' in bidsmap['Options'] and 'version' in bidsmap['Options']['bidscoin']: - bidsmapversion = bidsmap['Options']['bidscoin']['version'] - elif 'version' in bidsmap['Options']: # Handle legacy bidsmaps - bidsmapversion = bidsmap['Options']['version'] - else: - bidsmapversion = 'Unknown' - if bidsmapversion.rsplit('.', 1)[0] != __version__.rsplit('.', 1)[0] and any(checks): - LOGGER.warning(f'BIDScoiner version conflict: {yamlfile} was created with version {bidsmapversion}, but this is version {__version__}') - elif bidsmapversion != __version__ and any(checks): - LOGGER.info(f'BIDScoiner version difference: {yamlfile} was created with version {bidsmapversion}, but this is version {__version__}. This is normally OK but check the https://bidscoin.readthedocs.io/en/latest/CHANGELOG.html') - - # Make sure subprefix and sesprefix are strings - subprefix = bidsmap['Options']['bidscoin']['subprefix'] = bidsmap['Options']['bidscoin']['subprefix'] or '' - sesprefix = bidsmap['Options']['bidscoin']['sesprefix'] = bidsmap['Options']['bidscoin']['sesprefix'] or '' - - # Append the existing .bidsignore data from the bidsfolder and make sure bidsignore, unknowntypes, ignoretypes and notderivative are lists - if isinstance(bidsmap['Options']['bidscoin'].get('bidsignore'), str): - bidsmap['Options']['bidscoin']['bidsignore'] = bidsmap['Options']['bidscoin']['bidsignore'].split(';') - if bidsignorefile.is_file(): - bidsmap['Options']['bidscoin']['bidsignore'] = list(set(list(bidsmap['Options']['bidscoin']['bidsignore']) + bidsignorefile.read_text().splitlines())) - bidsmap['Options']['bidscoin']['bidsignore'] = list(set(bidsmap['Options']['bidscoin'].get('bidsignore') or [])) - bidsmap['Options']['bidscoin']['unknowntypes'] = list(set(bidsmap['Options']['bidscoin'].get('unknowntypes') or [])) - bidsmap['Options']['bidscoin']['ignoretypes'] = list(set(bidsmap['Options']['bidscoin'].get('ignoretypes') or [])) - bidsmap['Options']['bidscoin']['notderivative'] = list(set(bidsmap['Options']['bidscoin'].get('notderivative') or [])) - - # Make sure we get a proper plugin options and dataformat sections (use plugin default bidsmappings when a template bidsmap is loaded) - if not bidsmap['Options'].get('plugins'): - bidsmap['Options']['plugins'] = {} - if plugins: - for plugin in [plugin for plugin in bidsmap['Options']['plugins'] if plugin not in plugins]: - del bidsmap['Options']['plugins'][plugin] - for plugin in plugins if plugins else bidsmap['Options']['plugins']: - module = bcoin.import_plugin(plugin) - if not bidsmap['Options']['plugins'].get(plugin): - LOGGER.info(f"Adding default bidsmap options from the {plugin} plugin") - bidsmap['Options']['plugins'][plugin] = module.OPTIONS if 'OPTIONS' in dir(module) else {} - if 'BIDSMAP' in dir(module) and yamlfile.parent == templatefolder: - for dataformat, bidsmappings in module.BIDSMAP.items(): - if dataformat not in bidsmap: - LOGGER.info(f"Adding default bidsmappings from the {plugin} plugin") - bidsmap[dataformat] = bidsmappings - - # Add missing provenance info, run dictionaries and bids entities - run_ = create_run() - for dataformat in bidsmap: - if dataformat in ('$schema', 'Options'): continue - bidsmap[dataformat]['session'] = bidsmap[dataformat]['session'] or '' # Session-less data repositories - for datatype in bidsmap[dataformat] or []: - if datatype in ('subject', 'session'): continue - for index, run in enumerate(bidsmap[dataformat][datatype] or []): - - # Add missing provenance info - if not run.get('provenance'): - run['provenance'] = str(Path(f"{subprefix.replace('*','')}-unknown/{sesprefix.replace('*','')}-unknown/{dataformat}_{datatype}_id{index+1:03}")) - - # Update the provenance store paths if needed (e.g. when the bids-folder was moved) - provenance = Path(run['provenance']) - if not provenance.is_file(): - for n, part in enumerate(provenance.parts): - if part == 'bidscoin' and provenance.parts[n+1] == 'provenance': - store = folder/provenance.relative_to(*provenance.parts[0:n+1]) - if store.is_file(): - LOGGER.bcdebug(f"Updating provenance: {provenance} -> {store}") - run['provenance'] = str(store) - - # Add missing run dictionaries (e.g. "meta" or "properties") - for key, val in run_.items(): - if key not in run or not run[key]: - run[key] = val - - # Add a DataSource object - run['datasource'] = DataSource(run['provenance'], bidsmap['Options']['plugins'], dataformat, datatype, subprefix, sesprefix) - - # Add missing bids entities - suffix = run['bids'].get('suffix') - if run['datasource'].is_datasource: - suffix = run['datasource'].dynamicvalue(suffix, True, True) - for typegroup in datatyperules.get(datatype, {}): # E.g. typegroup = 'nonparametric' - if suffix in datatyperules[datatype][typegroup]['suffixes']: # run_found = True - for entity in datatyperules[datatype][typegroup]['entities']: - entitykey = entities[entity]['name'] - if entitykey not in run['bids'] and entitykey not in ('sub','ses'): - LOGGER.info(f"Adding missing {dataformat}>{datatype}>{suffix} bidsmap entity key: {entitykey}") - run['bids'][entitykey] = '' - if entitykey == 'part' and not isinstance(run['bids']['part'], list): - if run['bids']['part'] in ('', 'mag', 'phase', 'real', 'imag', None): - run['bids']['part'] = ['', 'mag', 'phase', 'real', 'imag', ('','mag','phase','real','imag').index(run['bids']['part'] or '')] - else: - run['bids']['part'] = ['', 'mag', 'phase', 'real', 'imag', run['bids']['part'], 5] - - # Validate the bidsmap entries - check_bidsmap(bidsmap, checks) - - return bidsmap, yamlfile - - -def save_bidsmap(filename: Path, bidsmap: Bidsmap) -> None: - """ - Save the BIDSmap as a YAML text file - - NB: The run['datasource'] = DataSource objects are not saved - - :param filename: Full pathname of the bidsmap file - :param bidsmap: Full bidsmap data structure, with all options, BIDS labels and attributes, etc. - :return: - """ - - # Remove the added DataSource objects - bidsmap = copy.deepcopy(bidsmap) - for dataformat in bidsmap: - if dataformat in ('$schema', 'Options'): continue - for datatype in bidsmap[dataformat] or []: - if not isinstance(bidsmap[dataformat][datatype], list): continue # E.g. 'subject' and 'session' - for run in bidsmap[dataformat][datatype]: - run.pop('datasource', None) - - # Validate the bidsmap entries - check_bidsmap(bidsmap, (False,True,True)) - validate_bidsmap(bidsmap, 0) - - LOGGER.info(f"Writing bidsmap to: {filename}") - filename.parent.mkdir(parents=True, exist_ok=True) - with filename.open('w') as stream: - yaml.dump(bidsmap, stream) - - -def validate_bidsmap(bidsmap: Bidsmap, level: int=1) -> bool: - """ - Test the bidsname of runs in the bidsmap using the bids-validator - - :param bidsmap: Full bidsmap data structure, with all options, BIDS labels and attributes, etc. - :param level: (-2) as 2 but no logging reports, - (-1) as 1 but no logging reports, - (0) as 1 but only report invalid runs, - (1) test only BIDS datatypes, i.e. datatypes not in `.bidsignore` or `ignoretypes`, - (2) test all converted datatypes, i.e. datatypes not in `ignoretypes`, - (3) test all datatypes - :return: True if all tested runs in bidsmap were bids-valid, otherwise False - """ - - if not bidsmap: - LOGGER.info('No bidsmap to validate') - return False - - valid = True - ignoretypes = bidsmap['Options']['bidscoin'].get('ignoretypes', []) - bidsignore = bidsmap['Options']['bidscoin'].get('bidsignore', []) - - # Test all the runs in the bidsmap - LOGGER.info(f"bids-validator {bids_validator.__version__} test results (* = in .bidsignore):") - for dataformat in bidsmap: - if dataformat in ('$schema', 'Options'): continue - for datatype in bidsmap[dataformat] or []: - if not isinstance(bidsmap[dataformat][datatype], list): continue # E.g. 'subject' and 'session' - for run in bidsmap[dataformat][datatype]: - bidsname = get_bidsname(f"sub-{sanitize(dataformat)}", '', run, False) - ignore = check_ignore(datatype, bidsignore) or check_ignore(bidsname+'.json', bidsignore, 'file') - ignore_1 = datatype in ignoretypes or ignore - ignore_2 = datatype in ignoretypes - for ext in extensions: # NB: `ext` used to be '.json', which is more generic (but see https://github.com/bids-standard/bids-validator/issues/2113) - if bidstest := bids_validator.BIDSValidator().is_bids(f"/sub-{sanitize(dataformat)}/{datatype}/{bidsname}{ext}"): break - if level==3 or (abs(level)==2 and not ignore_2) or (-2 Tuple[Union[bool, None], Union[bool, None], Union[bool, None]]: - """ - Check all non-ignored runs in the bidsmap for required and optional entities using the BIDS schema files - - :param bidsmap: Full bidsmap data structure, with all options, BIDS labels and attributes, etc. - :param checks: Booleans to check if all (bids-keys, bids-suffixes, bids-values) in the run are present according to the BIDS schema specifications - :return: False if the keys, suffixes or values are proven to be invalid, otherwise None or True - """ - - results = (None, None, None) - - if not any(checks): - return results - - if not bidsmap: - LOGGER.info('No bidsmap run-items to check') - return results + except Exception as sparerror: + LOGGER.warning(f"Could not parse {tagname} from {sparfile}\n{sparerror}") - # Check all the runs in the bidsmap - LOGGER.info('Checking the bidsmap run-items:') - for dataformat in bidsmap: - if dataformat in ('$schema', 'Options'): continue # TODO: Check Options - for datatype in bidsmap[dataformat] or []: - if not isinstance(bidsmap[dataformat][datatype], list): continue # E.g. 'subject' and 'session' - if datatype in bidsmap['Options']['bidscoin']['ignoretypes']: continue # E.g. 'exclude' - if check_ignore(datatype, bidsmap['Options']['bidscoin']['bidsignore']): continue - if bidsmap[dataformat][datatype] and results == (None, None, None): - results = (True, True, True) # We can now check the bidsmap - for run in bidsmap[dataformat][datatype]: - bidsname = get_bidsname('sub-foo', '', run, False) - if check_ignore(bidsname+'.json', bidsmap['Options']['bidscoin']['bidsignore'], 'file'): continue - isvalid = check_run(datatype, run, checks) - results = [result and valid for result, valid in zip(results, isvalid)] - - if all([result==True for result, check in zip(results, checks) if check is True]): - LOGGER.success('All run-items in the bidsmap are valid') - elif any([result==False for result, check in zip(results, checks) if check is True]): - LOGGER.warning('Not all run-items in the bidsmap are valid') + # Cast the dicom data type to int or str (i.e. to something that yaml.dump can handle) + if isinstance(value, int): + return int(value) + elif value is None: + return '' else: - LOGGER.verbose('Could not validate every run-item in the bidsmap') - - return results + return str(value) # If it's a MultiValue type then flatten it -def check_template(bidsmap: Bidsmap) -> bool: +# Profiling shows this is currently the most expensive function, therefore the (primitive but effective) cache optimization +_P7HDR_CACHE = _P7FILE_CACHE = None +@lru_cache(maxsize=65536) +def get_p7field(tagname: str, p7file: Path) -> Union[str, int]: """ - Check all the datatypes in the template bidsmap for required and optional entities using the BIDS schema files + Extracts the field value from the P-file header - :param bidsmap: Full bidsmap data structure, with all options, BIDS labels and attributes, etc. - :return: True if the template bidsmap is valid, otherwise False + :param tagname: Name of the SPAR field + :param p7file: The full pathname of the P7 file + :return: Extracted tag-values from the P7 file """ - if not bidsmap: - LOGGER.info('No bidsmap datatypes to check') - return False + global _P7HDR_CACHE, _P7FILE_CACHE - valid = True - ignoretypes = bidsmap['Options']['bidscoin'].get('ignoretypes', []) - bidsignore = bidsmap['Options']['bidscoin'].get('bidsignore', []) - - # Check all the datatypes in the bidsmap - LOGGER.verbose('Checking the template bidsmap datatypes:') - for dataformat in bidsmap: - if dataformat in ('$schema', 'Options'): continue - for datatype in bidsmap[dataformat] or []: - if not isinstance(bidsmap[dataformat][datatype], list): continue # Skip datatype = 'subject'/'session' - if not (datatype in bidsdatatypesdef or datatype in ignoretypes or check_ignore(datatype, bidsignore)): - LOGGER.warning(f"Invalid {dataformat} datatype: '{datatype}' (you may want to add it to the 'bidsignore' list)") - valid = False - if datatype in ignoretypes: continue - datatypesuffixes = [] - for run in bidsmap[dataformat][datatype]: - datatypesuffixes.append(run['bids']['suffix']) - for key, val in run['attributes'].items(): - try: - re.compile(str(val)) - except re.error: - LOGGER.warning(f"Invalid regex pattern in the {key} value '{val}' in: bidsmap[{dataformat}][{datatype}] -> {run['provenance']}\nThis may cause run-matching errors unless '{val}' is a literal attribute value") - for typegroup in datatyperules.get(datatype, {}): - for suffix in datatyperules[datatype][typegroup]['suffixes']: - if not (suffix in datatypesuffixes or suffix in str(bidsignore) or - '[DEPRECATED]' in suffixes[suffix]['description'] or - '**Change:** Removed from' in suffixes[suffix]['description'] or - '**Change:** Replaced by' in suffixes[suffix]['description']): - LOGGER.warning(f"Missing '{suffix}' run-item in: bidsmap[{dataformat}][{datatype}] (NB: this may be fine / a deprecated item)") - valid = False - - # Validate against the json schema - with (templatefolder/'schema.json').open('r') as stream: - schema = json.load(stream) - try: - jsonschema.validate(bidsmap, schema) - except jsonschema.ValidationError as bidsmaperror: - LOGGER.warning(f"Invalid template bidsmap:\n{bidsmaperror}") - valid = False + value = '' + if not p7file.is_file(): + LOGGER.error(f"{p7file} not found") - if valid: - LOGGER.success('All datatypes and options in the template bidsmap are valid') else: - LOGGER.warning('Not all datatypes and options in the template bidsmap are valid') - - return valid - - -def check_run(datatype: str, run: Run, checks: Tuple[bool, bool, bool]=(False, False, False)) -> Tuple[Union[bool, None], Union[bool, None], Union[bool, None]]: - """ - Check run for required and optional entities using the BIDS schema files - - :param datatype: The datatype that is checked, e.g. 'anat' - :param run: The run (list-item) with bids entities that are checked against missing values & invalid keys - :param checks: Booleans to report if all (bidskeys, bids-suffixes, bids-values) in the run are present according to the BIDS schema specifications - :return: True/False if the keys, suffixes or values are bids-valid or None if they cannot be checked - """ + try: + if p7file != _P7FILE_CACHE: - run_keysok = None - run_suffixok = None - run_valsok = None - - # Check if we have provenance info - if all(checks) and not run['provenance']: - LOGGER.info(f'No provenance info found for {datatype}/*_{run["bids"]["suffix"]}') - - # Check if we have a suffix and datatype rules - if 'suffix' not in run['bids']: - if checks[1]: LOGGER.warning(f'Invalid bidsmap: The {datatype} "suffix" key is missing ({datatype} -> {run["provenance"]})') - return run_keysok, False, run_valsok # The suffix is not BIDS-valid, we cannot check the keys and values - if datatype not in datatyperules: - return run_keysok, run_suffixok, run_valsok # We cannot check anything - - # Use the suffix to find the right typegroup - suffix = run['bids'].get('suffix') - if 'datasource' in run and run['datasource'].path.is_file(): - suffix = run['datasource'].dynamicvalue(suffix, True, True) - for typegroup in datatyperules[datatype]: - - if '<' not in suffix or '>' not in suffix: - run_suffixok = False # We can now check the suffix - - if suffix in datatyperules[datatype][typegroup]['suffixes']: - - run_keysok = True # We can now check the key - run_suffixok = True # The suffix is valid - run_valsok = True # We can now check the value - - # Check if all expected entity-keys are present in the run and if they are properly filled - for entity in datatyperules[datatype][typegroup]['entities']: - entitykey = entities[entity]['name'] - entityformat = entities[entity]['format'] # E.g. 'label' or 'index' (the entity type always seems to be 'string') - bidsvalue = run['bids'].get(entitykey) - dynamicvalue = True if isinstance(bidsvalue, str) and ('<' in bidsvalue and '>' in bidsvalue) else False - if entitykey in ('sub', 'ses'): continue - if isinstance(bidsvalue, list): - bidsvalue = bidsvalue[bidsvalue[-1]] # Get the selected item - if entitykey not in run['bids']: - if checks[0]: LOGGER.warning(f'Invalid bidsmap: The "{entitykey}" key is missing ({datatype}/*_{run["bids"]["suffix"]} -> {run["provenance"]})') - run_keysok = False - if bidsvalue and not dynamicvalue and bidsvalue!=sanitize(bidsvalue): - if checks[2]: LOGGER.warning(f'Invalid {entitykey} value: "{bidsvalue}" ({datatype}/*_{run["bids"]["suffix"]} -> {run["provenance"]})') - run_valsok = False - elif not bidsvalue and datatyperules[datatype][typegroup]['entities'][entity]=='required': - if checks[2]: LOGGER.warning(f'Required "{entitykey}" value is missing ({datatype}/*_{run["bids"]["suffix"]} -> {run["provenance"]})') - run_valsok = False - if bidsvalue and not dynamicvalue and entityformat=='index' and not str(bidsvalue).isdecimal(): - if checks[2]: LOGGER.warning(f'Invalid {entitykey}-index: "{bidsvalue}" is not a number ({datatype}/*_{run["bids"]["suffix"]} -> {run["provenance"]})') - run_valsok = False - - # Check if all the bids-keys are present in the schema file - entitykeys = [entities[entity]['name'] for entity in datatyperules[datatype][typegroup]['entities']] - for bidskey in run['bids']: - if bidskey not in entitykeys + ['suffix']: - if checks[0]: LOGGER.warning(f'Invalid bidsmap: The "{bidskey}" key is not allowed according to the BIDS standard ({datatype}/*_{run["bids"]["suffix"]} -> {run["provenance"]})') - run_keysok = False - if run_valsok: run_valsok = None + from spec2nii.GE.ge_read_pfile import Pfile - break + hdr = Pfile(p7file).hdr + _P7HDR_CACHE = hdr + _P7FILE_CACHE = p7file + else: + hdr = _P7HDR_CACHE - # Hack: There are physio, stim and events entities in the 'task'-rules, which can be added to any datatype - if suffix in datatyperules['task']['events']['suffixes'] + datatyperules['task']['timeseries']['suffixes']: - bidsname = get_bidsname('sub-foo', '', run, False, 'datasource' in run and run['datasource'].path.is_file()) - run_suffixok = bids_validator.BIDSValidator().is_bids(f"/sub-foo/{datatype}/{bidsname}.json") # NB: Using the BIDSValidator sounds nice but doesn't give any control over the BIDS-version - run_valsok = run_suffixok - LOGGER.bcdebug(f"bidsname={run_suffixok}: /sub-foo/{datatype}/{bidsname}.json") + value = getattr(hdr, tagname, '') + if type(value) is bytes: + try: value = value.decode('UTF-8') + except UnicodeDecodeError: pass - if checks[0] and run_keysok in (None, False): - LOGGER.bcdebug(f'Invalid "{run_keysok}" key-checks in run-item: "{run["bids"]["suffix"]}" ({datatype} -> {run["provenance"]})\nRun["bids"]:\n{run["bids"]}') + except ImportError: + LOGGER.warning(f"The extra `spec2nii` library could not be found or was not installed (see the BIDScoin install instructions)") - if checks[1] and run_suffixok is False: - LOGGER.warning(f'Invalid run-item with suffix: "{run["bids"]["suffix"]}" ({datatype} -> {run["provenance"]})') - LOGGER.bcdebug(f"Run['bids']:\n{run['bids']}") + except (IOError, OSError): + LOGGER.warning(f'Cannot read {tagname} from {p7file}') - if checks[2] and run_valsok in (None, False): - LOGGER.bcdebug(f'Invalid "{run_valsok}" val-checks in run-item: "{run["bids"]["suffix"]}" ({datatype} -> {run["provenance"]})\nRun["bids"]:\n{run["bids"]}') + except Exception as p7error: + LOGGER.warning(f'Could not parse {tagname} from {p7file}\n{p7error}') - return run_keysok, run_suffixok, run_valsok + # Cast the dicom data type to int or str (i.e. to something that yaml.dump can handle) + if isinstance(value, int): + return int(value) + elif value is None: + return '' + else: + return str(value) # If it's a MultiValue type then flatten it -def check_ignore(entry: str, bidsignore: Union[str,list], datatype: str= 'dir') -> bool: +def check_ignore(entry, bidsignore: Union[str,list], filetype: str= 'dir') -> bool: """ A rudimentary check whether `entry` should be BIDS-ignored. This function should eventually be replaced by bids_validator functionality See also https://github.com/bids-standard/bids-specification/issues/131 :param entry: The entry that is checked against the bidsignore (e.g. a directory/datatype such as `anat` or a file such as `sub-001_ct.nii.gz`) :param bidsignore: The list or semicolon separated bidsignore pattern (e.g. from the bidscoin Options such as `mrs/;extra_data/;sub-*_ct.*`) - :param datatype: The entry datatype, i.e. 'dir' or 'file', that can be used to limit the check + :param filetype: The entry filetype, i.e. 'dir' or 'file', that can be used to limit the check :return: True if the entry should be ignored, else False """ @@ -1330,56 +2054,32 @@ def check_ignore(entry: str, bidsignore: Union[str,list], datatype: str= 'dir') ignore = False for item in set(bidsignore + ['code/', 'sourcedata/', 'derivatives/']): - if datatype == 'dir' and not item.endswith('/'): continue - if datatype == 'file' and item.endswith('/'): continue + if filetype == 'dir' and not item.endswith('/'): continue + if filetype == 'file' and item.endswith('/'): continue if item.endswith('/'): item = item[0:-1] - if fnmatch(entry, item): + if fnmatch(str(entry), item): ignore = True break return ignore -def strip_suffix(run: Run) -> Run: - """ - Certain attributes such as SeriesDescriptions (but not ProtocolName!?) may get a suffix like '_SBRef' from the vendor, - try to strip it off from the BIDS labels - - :param run: The run with potentially added suffixes that are the same as the BIDS suffixes - :return: The run with these suffixes removed - """ - - # See if we have a suffix for this datatype - if 'suffix' in run['bids'] and run['bids']['suffix']: - suffix = run['bids']['suffix'].lower() - else: - return run - - # See if any of the BIDS labels ends with the same suffix. If so, then remove it - for key in run['bids']: - if key == 'suffix': - continue - if isinstance(run['bids'][key], str) and run['bids'][key].lower().endswith(suffix): - run['bids'][key] = run['bids'][key][0:-len(suffix)] # NB: This will leave the added '_' and '.' characters, but they will be taken out later (as they are not BIDS-valid) - - return run - - -def sanitize(label: str) -> str: +def sanitize(label: Union[str, DataFormat, DataType]): """ Converts a given label to a cleaned-up label that can be used as a BIDS label. Remove leading and trailing spaces; convert other spaces, special BIDS characters and anything that is not an alphanumeric to a ''. This will for example map "Joe's reward_task" to "Joesrewardtask" - :param label: The given label that potentially contains undesired characters - :return: The cleaned-up/BIDS-valid label + :param label: The label that potentially contains undesired characters + :return: The cleaned-up/BIDS-valid string label or the original (non-string) label """ if label is None or label == '': return '' - if not isinstance(label, str): + if not isinstance(label, (str, DataFormat, DataType)): return label + label = str(label) special_characters = (' ', '_', '-','.') @@ -1389,245 +2089,6 @@ def sanitize(label: str) -> str: return re.sub(r'(?u)[^-\w.]', '', label) -def dir_bidsmap(bidsmap: Bidsmap, dataformat: str) -> List[Path]: - """ - Make a provenance list of all the runs in the bidsmap[dataformat] - - :param bidsmap: The bidsmap, with all the runs in it - :param dataformat: The information source in the bidsmap that is used, e.g. 'DICOM' - :return: List of all provenances - """ - - provenance = [] - for datatype in bidsmap.get(dataformat, []): - if not isinstance(bidsmap[dataformat].get(datatype), list): continue # E.g. 'subject' and 'session' - for run in bidsmap[dataformat][datatype]: - if not run['provenance']: - LOGGER.warning(f'The bidsmap run {datatype} run does not contain provenance data') - else: - provenance.append(Path(run['provenance'])) - - provenance.sort() - - return provenance - - -def create_run(datasource: DataSource=None, bidsmap: Bidsmap=None) -> Run: - """ - Create an empty run-item with the proper structure, provenance info and a data source - - :param datasource: The data source that is deepcopied and attached - :param bidsmap: The bidsmap, with all the bidscoin options in it (for prefix/plugin info) - :return: The created run - """ - - datasource = copy.deepcopy(datasource or DataSource()) - if bidsmap: - datasource.plugins = bidsmap['Options']['plugins'] - datasource.subprefix = bidsmap['Options']['bidscoin'].get('subprefix','') - datasource.sesprefix = bidsmap['Options']['bidscoin'].get('sesprefix','') - - return Run(dict(provenance = str(datasource.path), - properties = {'filepath':'', 'filename':'', 'filesize':'', 'nrfiles':None}, - attributes = {}, - bids = {'suffix':''}, - meta = {}, - datasource = datasource)) - - -def get_run(bidsmap: Bidsmap, datatype: str, suffix_idx: Union[int, str], datasource: DataSource) -> Run: - """ - Find the (first) run in bidsmap[dataformat][bidsdatatype] with run['bids']['suffix_idx'] == suffix_idx - - :param bidsmap: This could be a template bidsmap, with all options, BIDS labels and attributes, etc. - :param datatype: The datatype in which a matching run is searched for (e.g. 'anat') - :param suffix_idx: The name of the suffix that is searched for (e.g. 'bold') or the datatype index number - :param datasource: The datasource with the provenance file from which the properties, attributes and dynamic values are read - :return: The clean (filled) run item in the bidsmap[dataformat][bidsdatatype] with the matching suffix_idx, - otherwise an empty dict - """ - - runs = bidsmap.get(datasource.dataformat, {}).get(datatype, []) - for index, run in enumerate(runs): - if index == suffix_idx or run['bids']['suffix'] == suffix_idx: - - # Get a clean run (remove comments to avoid overly complicated commentedMaps from ruamel.yaml) - run_ = create_run(datasource, bidsmap) - run_['datasource'].datatype = datatype - - for propkey, propvalue in run['properties'].items(): - run_['properties'][propkey] = propvalue - - for attrkey, attrvalue in run['attributes'].items(): - if datasource.path.name: - run_['attributes'][attrkey] = datasource.attributes(attrkey, validregexp=True) - else: - run_['attributes'][attrkey] = attrvalue - - # Replace the dynamic bids values, except the dynamic run-index (e.g. <<>>) - for bidskey, bidsvalue in run['bids'].items(): - - # NB: bidsvalue can be a (mutable) list - bidsvalue = copy.copy(bidsvalue) - if bidskey == 'run' and bidsvalue and (bidsvalue.replace('<','').replace('>','').isdecimal() or bidsvalue == '<<>>'): - run_['bids'][bidskey] = bidsvalue - else: - run_['bids'][bidskey] = datasource.dynamicvalue(bidsvalue) - - # Replace the dynamic meta values, except the IntendedFor value (e.g. <>) - for metakey, metavalue in run['meta'].items(): - - # NB: metavalue can be a (mutable) list - metavalue = copy.copy(metavalue) - if metakey == 'IntendedFor': - run_['meta'][metakey] = metavalue - elif metakey in ('B0FieldSource', 'B0FieldIdentifier') and fnmatch(str(metavalue), '*<>*'): - run_['meta'][metakey] = metavalue - else: - run_['meta'][metakey] = datasource.dynamicvalue(metavalue, cleanup=False) - - return run_ - - LOGGER.error(f"A '{datatype}' run with suffix_idx '{suffix_idx}' cannot be found in bidsmap['{datasource.dataformat}']") - return Run({}) - - -def find_run(bidsmap: Bidsmap, provenance: str, dataformat: str='', datatype: str='') -> Run: - """ - Find the (first) run in bidsmap[dataformat][bidsdatatype] with run['provenance'] == provenance - - :param bidsmap: This could be a template bidsmap, with all options, BIDS labels and attributes, etc. - :param provenance: The unique provenance that is used to identify the run - :param dataformat: The dataformat section in the bidsmap in which a matching run is searched for, e.g. 'DICOM'. Otherwise, all dataformats are searched - :param datatype: The datatype in which a matching run is searched for (e.g. 'anat'). Otherwise, all datatypes are searched - :return: The (unfilled) run item from the bidsmap[dataformat][bidsdatatype] - """ - - if dataformat: - dataformats = (dataformat,) - else: - dataformats = [item for item in bidsmap if item not in ('$schema','Options') and bidsmap[item]] - for dataformat in dataformats: - if datatype: - datatypes = (datatype,) - else: - datatypes = [item for item in bidsmap[dataformat] if item not in ('subject','session') and bidsmap[dataformat][item]] - for dtype in datatypes: - for run in bidsmap[dataformat].get(dtype,[]): - if Path(run['provenance']) == Path(provenance): - return run - - LOGGER.bcdebug(f"Could not find this [{dataformat}][{datatype}] run: '{provenance}") - return Run({}) - - -def delete_run(bidsmap: Bidsmap, provenance: Union[Run, str], datatype: str= '', dataformat: str='') -> bool: - """ - Delete the first matching run from the BIDS map - - :param bidsmap: Full bidsmap data structure, with all options, BIDS labels and attributes, etc. - :param provenance: The provenance identifier of/or the run-item that is deleted - :param datatype: The datatype that of the deleted run_item (can be different from run_item['datasource']), e.g. 'anat' - :param dataformat: The dataformat section in the bidsmap in which the run is deleted, e.g. 'DICOM' - :return: True if successful, False otherwise - """ - - if isinstance(provenance, str): - run_item = find_run(bidsmap, provenance, dataformat) - if not run_item: - return False - else: - run_item = provenance - provenance = run_item['provenance'] - - if not dataformat: - dataformat = run_item['datasource'].dataformat - if not datatype: - datatype = run_item['datasource'].datatype - if dataformat in bidsmap: - for index, run in enumerate(bidsmap[dataformat].get(datatype,[])): - if Path(run['provenance']) == Path(provenance): - del bidsmap[dataformat][datatype][index] - return True - - LOGGER.error(f"Could not find (and delete) this [{dataformat}][{datatype}] run: '{provenance}") - return False - - -def insert_run(bidsmap: Bidsmap, run: Run, position: int=None) -> None: - """ - Inserts a cleaned-up run to the BIDS map - - :param bidsmap: Full bidsmap data structure, with all options, BIDS labels and attributes, etc. - :param run: The run (listitem) that is appended to the datatype - :param position: The position at which the run is inserted. The run is appended at the end if position is None - :return: - """ - - # Copy the values from the run to an empty dict - run_ = create_run(run['datasource'], bidsmap) - for item in run_: - if item in ('provenance', 'datasource'): - continue - run_[item].update(copy.deepcopy(run[item])) - run = run_ - - dataformat = run['datasource'].dataformat - datatype = run['datasource'].datatype - if not bidsmap.get(dataformat): - bidsmap[dataformat] = {datatype: []} - if not bidsmap.get(dataformat).get(datatype): - bidsmap[dataformat][datatype] = [run] - else: - bidsmap[dataformat][datatype].insert(len(bidsmap[dataformat][datatype]) if position is None else position, run) - - -def update_bidsmap(bidsmap: Bidsmap, source_datatype: str, run: Run) -> None: - """ - Update the BIDS map if the datatype changes: - 1. Remove the source run from the source datatype section - 2. Append the (cleaned and deepcopied) target run to the target datatype section - - Else: - 1. Use the provenance to look up the index number in that datatype - 2. Replace the run - - :param bidsmap: Full bidsmap data structure, with all options, BIDS labels and attributes, etc. - :param source_datatype: The current datatype name, e.g. 'anat' - :param run: The run item that is being moved to run['datasource'].datatype - :return: - """ - - dataformat = run['datasource'].dataformat - run_datatype = run['datasource'].datatype - num_runs_in = len(dir_bidsmap(bidsmap, dataformat)) - - # Assert that the target datatype is known - if not run_datatype: - LOGGER.error(f'The datatype of the run cannot be determined...') - - # Warn the user if the target run already exists when the run is moved to another datatype - if source_datatype != run_datatype: - if exist_run(bidsmap, run_datatype, run): - LOGGER.error(f'The "{source_datatype}" run already exists in {run_datatype}...') - - # Delete the source run - delete_run(bidsmap, run, source_datatype) - - # Append the (cleaned-up) target run - insert_run(bidsmap, run) - - else: - for index, run_ in enumerate(bidsmap[dataformat][run_datatype]): - if Path(run_['provenance']) == Path(run['provenance']): - bidsmap[dataformat][run_datatype][index] = run - break - - num_runs_out = len(dir_bidsmap(bidsmap, dataformat)) - if num_runs_out != num_runs_in: - LOGGER.error(f"Number of runs in bidsmap['{dataformat}'] changed unexpectedly: {num_runs_in} -> {num_runs_out}") - - def match_runvalue(attribute, pattern) -> bool: """ Match the value items with the attribute string using regex. If both attribute @@ -1647,7 +2108,7 @@ def match_runvalue(attribute, pattern) -> bool: """ # Consider it a match if both attribute and value are identical or empty/None - if str(attribute)==str(pattern) or (not attribute and not pattern): + if str(attribute) == str(pattern) or (not attribute and not pattern): return True if not pattern: @@ -1667,152 +2128,12 @@ def match_runvalue(attribute, pattern) -> bool: return match is not None -def exist_run(bidsmap: Bidsmap, datatype: str, run_item: Run) -> bool: - """ - Checks the bidsmap to see if there is already an entry in runlist with the same properties and attributes as in the input run - - :param bidsmap: Full bidsmap data structure, with all options, BIDS labels and attributes, etc. - :param datatype: The datatype in the source that is used, e.g. 'anat'. Empty values will search through all datatypes - :param run_item: The run-item that is searched for in the datatype - :return: True if the run exists in runlist, otherwise False - """ - - dataformat = run_item['datasource'].dataformat - if not datatype: - for dtype in bidsmap.get(dataformat,{}): - if not isinstance(bidsmap[dataformat][dtype], list): continue # E.g. 'subject' and 'session' - if exist_run(bidsmap, dtype, run_item): - return True - - if not bidsmap.get(dataformat, {}).get(datatype): - return False - - for run in bidsmap[dataformat][datatype]: - - # Begin with match = False only if all attributes are empty - match = any([run[matching][attrkey] not in [None,''] for matching in ('properties','attributes') for attrkey in run[matching]]) # Normally match==True, but make match==False if all attributes are empty - - # Search for a case where all run_item items match with the run_item items - for matching in ('properties', 'attributes'): - for itemkey, itemvalue in run_item[matching].items(): - value = run[matching].get(itemkey) # Matching bids-labels which exist in one datatype but not in the other -> None - match = match and match_runvalue(itemvalue, value) - if not match: - break # There is no point in searching further within the run_item now that we've found a mismatch - - # Stop searching if we found a matching run_item (i.e. which is the case if match is still True after all run tests) - if match: - return True - - return False - - -def get_matching_run(datasource: DataSource, bidsmap: Bidsmap, runtime=False) -> Tuple[Run, str]: - """ - Find the first run in the bidsmap with properties and file attributes that match with the data source, and then - through the attributes. Only non-empty properties and attributes are matched, except when runtime is True, then - the empty attributes are also matched. The datatypes are searched for in this order: - - ignoredatatypes (e.g. 'exclude') -> normal bidsdatatypes (e.g. 'anat') -> unknowndatatypes (e.g. 'extra_data') - - Then update/fill the provenance, and the (dynamic) bids and meta values (bids values are cleaned-up to be BIDS-valid) - - :param datasource: The data source from which the attributes are read. NB: The datasource.datatype attribute is updated - :param bidsmap: Full bidsmap data structure, with all options, BIDS keys and attributes, etc - :param runtime: Dynamic <> are expanded if True - :return: (run, provenance) The matching and filled-in/cleaned run item, and if there is a match, the - provenance (i.e. unique identifier) of the run-item in the bidsmap. If there is no match then the - run is still populated with info from the source-file, but the returned provenance will be '' - """ - - unknowndatatypes: list = bidsmap['Options']['bidscoin'].get('unknowntypes',[]) - ignoredatatypes: list = bidsmap['Options']['bidscoin'].get('ignoretypes',[]) - bidsdatatypes: list = [dtype for dtype in bidsmap.get(datasource.dataformat) if dtype not in unknowndatatypes + ignoredatatypes + ['subject', 'session']] - dataformat = Dataformat(bidsmap.get(datasource.dataformat, {})) - - # Loop through all datatypes and runs; all info goes cleanly into run_ (to avoid formatting problem of the CommentedMap) - if 'fmap' in bidsdatatypes: - bidsdatatypes.insert(0, bidsdatatypes.pop(bidsdatatypes.index('fmap'))) # Put fmap at the front (to catch inverted polarity scans first - run_ = create_run(datasource, bidsmap) - for datatype in ignoredatatypes + bidsdatatypes + unknowndatatypes: # The ordered datatypes in which a matching run is searched for - - if datatype not in dataformat: - continue - runs = dataformat.get(datatype) - datasource.datatype = datatype - for run in runs or []: - - match = any(run[matching][attrkey] not in [None,''] for matching in ('properties','attributes') for attrkey in run[matching]) # Normally match==True, but make match==False if all attributes are empty - run_ = create_run(datasource, bidsmap) - - # Try to see if the sourcefile matches all the filesystem properties - for propkey, propvalue in run['properties'].items(): - - # Check if the attribute value matches with the info from the sourcefile - if propvalue: - sourcevalue = datasource.properties(propkey) - match = match and match_runvalue(sourcevalue, propvalue) - - # Do not fill the empty attribute with the info from the sourcefile but keep the matching expression - run_['properties'][propkey] = propvalue - - # Try to see if the sourcefile matches all the attributes and fill all of them - for attrkey, attrvalue in run['attributes'].items(): - - # Check if the attribute value matches with the info from the sourcefile - sourcevalue = datasource.attributes(attrkey, validregexp=True) - if attrvalue or runtime: - match = match and match_runvalue(sourcevalue, attrvalue) - - # Fill the empty attribute with the info from the sourcefile - run_['attributes'][attrkey] = sourcevalue - - # Try to fill the bids-labels - for bidskey, bidsvalue in run['bids'].items(): - - # NB: bidsvalue can be a (mutable) list - bidsvalue = copy.copy(bidsvalue) - - # Replace the dynamic bids values, except the dynamic run-index (e.g. <<>>) - if bidskey == 'run' and bidsvalue and (bidsvalue.replace('<','').replace('>','').isdecimal() or bidsvalue == '<<>>'): - run_['bids'][bidskey] = bidsvalue - else: - run_['bids'][bidskey] = datasource.dynamicvalue(bidsvalue, runtime=runtime) - - # SeriesDescriptions (and ProtocolName?) may get a suffix like '_SBRef' from the vendor, try to strip it off - run_ = strip_suffix(run_) - - # Try to fill the meta-data - for metakey, metavalue in run['meta'].items(): - - # NB: metavalue can be a (mutable) list - metavalue = copy.copy(metavalue) - - # Replace the dynamic meta values, except the IntendedFor value (e.g. <>) - if metakey == 'IntendedFor': - run_['meta'][metakey] = metavalue - elif metakey in ('B0FieldSource', 'B0FieldIdentifier') and fnmatch(str(metavalue), '*<>*'): - run_['meta'][metakey] = metavalue - else: - run_['meta'][metakey] = datasource.dynamicvalue(metavalue, cleanup=False, runtime=runtime) - - # Stop searching the bidsmap if we have a match - if match: - LOGGER.bcdebug(f"Found bidsmap match: {run['provenance']} -> {run_['provenance']}") - return run_, run['provenance'] - - # We don't have a match (all tests failed, so datatype should be the *last* one, e.g. unknowndatatype) - LOGGER.bcdebug(f"Found no bidsmap match for: {run_['provenance']}") - if not datasource.datatype in unknowndatatypes: - LOGGER.warning(f"Datatype was expected to be in {unknowndatatypes}, instead it is {datasource.datatype} -> {run_['provenance']}") - return run_, '' - - -def get_derivatives(datatype: str, exceptions: Iterable=()) -> list: +def get_derivatives(datatype: Union[str, DataType], exceptions: Iterable=()) -> list: """ Retrieves a list of suffixes that are stored in the derivatives folder (e.g. the qMRI maps). TODO: Replace with a more systematic/documented method """ + datatype = str(datatype) if datatype == 'anat': return [suffix for suffix in datatyperules[datatype]['parametric']['suffixes'] if suffix not in tuple(exceptions) + ('UNIT1',)] # The qMRI data (maps) @@ -1823,55 +2144,6 @@ def get_derivatives(datatype: str, exceptions: Iterable=()) -> list: return [] -def get_bidsname(subid: str, sesid: str, run: Run, validkeys: bool, runtime: bool=False, cleanup: bool=True) -> str: - """ - Composes a filename as it should be according to the BIDS standard using the BIDS keys in run. The bids values are - dynamically updated and cleaned, and invalid bids keys and empty bids values are ignored - - :param subid: The subject identifier, i.e. name of the subject folder (e.g. 'sub-001' or just '001') - :param sesid: The optional session identifier, i.e. name of the session folder (e.g. 'ses-01' or just '01'). Can be left empty - :param run: The run mapping with the BIDS key-value pairs - :param validkeys: Removes non-BIDS-compliant bids-keys if True - :param runtime: Replaces dynamic bidsvalues if True - :param cleanup: Sanitizes non-BIDS-compliant characters from the filename if True - :return: The composed BIDS file-name (without file-extension) - """ - - # Try to update the sub/ses-ids - subid = re.sub(f'^sub-', '', subid) - sesid = re.sub(f'^ses-', '', sesid) if sesid else '' # Catch sesid = None - if cleanup: - subid = sanitize(subid) - sesid = sanitize(sesid) - - # Compose the bidsname - bidsname = f"sub-{subid}{'_ses-'+sesid if sesid else ''}" # Start with the subject/session identifier - entitiekeys = [entities[entity]['name'] for entity in entitiesorder] # Use the valid keys from the BIDS schema - if not validkeys: # Use the (ordered valid + invalid) keys from the run item - entitiekeys = [key for key in entitiekeys if key in run['bids']] + \ - [key for key in run['bids'] if key not in entitiekeys and key!='suffix'] - for entitykey in entitiekeys: - bidsvalue = run['bids'].get(entitykey) # Get the entity data from the run item - if not bidsvalue: - bidsvalue = '' - if isinstance(bidsvalue, list): - bidsvalue = bidsvalue[bidsvalue[-1]] # Get the selected item - elif runtime and not (entitykey=='run' and (bidsvalue.replace('<','').replace('>','').isdecimal() or bidsvalue == '<<>>')): - bidsvalue = run['datasource'].dynamicvalue(bidsvalue, cleanup=True, runtime=runtime) - if cleanup: - bidsvalue = sanitize(bidsvalue) - if bidsvalue: - bidsname = f"{bidsname}_{entitykey}-{bidsvalue}" # Append the key-value data to the bidsname - suffix = run['bids'].get('suffix') - if runtime: - suffix = run['datasource'].dynamicvalue(suffix, runtime=runtime) - if cleanup: - suffix = sanitize(suffix) - bidsname = f"{bidsname}{'_'+suffix if suffix else ''}" # And end with the suffix - - return bidsname - - def get_bidsvalue(bidsfile: Union[str, Path], bidskey: str, newvalue: str='') -> Union[Path, str]: """ Sets the bidslabel, i.e. '*_bidskey-*_' is replaced with '*_bidskey-bidsvalue_'. If the key exists but is not in the @@ -1947,7 +2219,7 @@ def insert_bidskeyval(bidsfile: Union[str, Path], bidskey: str, newvalue: str, v bidsext = ''.join(Path(bidsfile).suffixes) # Parse the key-value pairs and store all the run info - run = create_run() + run = RunItem() subid = '' sesid = '' for keyval in bidsname.split('_'): @@ -1958,11 +2230,11 @@ def insert_bidskeyval(bidsfile: Union[str, Path], bidskey: str, newvalue: str, v elif key == 'ses': sesid = keyval else: - run['bids'][key] = val + run.bids[key] = val else: - run['bids']['suffix'] = f"{run['bids'].get('suffix','')}_{keyval}" # account for multiple suffixes (e.g. _bold_e1_ph from dcm2niix) - if run['bids'].get('suffix','').startswith('_'): - run['bids']['suffix'] = run['bids']['suffix'][1:] + run.bids['suffix'] = f"{run.bids.get('suffix','')}_{keyval}" # account for multiple suffixes (e.g. _bold_e1_ph from dcm2niix) + if run.bids.get('suffix','').startswith('_'): + run.bids['suffix'] = run.bids['suffix'][1:] # Insert the key-value pair in the run if bidskey == 'sub': @@ -1970,78 +2242,16 @@ def insert_bidskeyval(bidsfile: Union[str, Path], bidskey: str, newvalue: str, v elif bidskey == 'ses': sesid = newvalue else: - run['bids'][bidskey] = newvalue + run.bids[bidskey] = newvalue # Compose the new filename - newbidsfile = (bidspath/get_bidsname(subid, sesid, run, validkeys, cleanup=False)).with_suffix(bidsext) + newbidsfile = (bidspath / run.bidsname(subid, sesid, validkeys, cleanup=False)).with_suffix(bidsext) if isinstance(bidsfile, str): newbidsfile = str(newbidsfile) return newbidsfile -def increment_runindex(outfolder: Path, bidsname: str, run: Run, scans_table: pd.DataFrame=None, targets: Set[Path]=()) -> str: - """ - Checks if a file with the same bidsname already exists in the folder and then increments the dynamic runindex - (if any) until no such file is found. - - NB: For <<>> runs, if the run-less file already exists, then add 'run-2' to bidsname and rename run-less files - to 'run-1', and, optionally, do the same for entries in scans_table and targets (i.e. keep them in sync) - - :param outfolder: The full pathname of the bids output folder - :param bidsname: The bidsname with a provisional runindex, e.g. from get_bidsname() - :param run: The run mapping with the BIDS key-value pairs - :param scans_table The scans.tsv table that need to remain in sync when renaming a run-less file - :param targets: The set of output targets that need to remain in sync when renaming a run-less file - :return: The bidsname with the original or incremented runindex - """ - - # Check input - runval = str(run['bids'].get('run') or '') - if not (runval.startswith('<<') and runval.endswith('>>') and (runval.replace('<','').replace('>','').isdecimal() or runval == '<<>>')): - return bidsname - bidsext = ''.join(Path(bidsname).suffixes) - bidsname = bidsname.split('.')[0] - - # Make an inventory of the runs - runless_name = insert_bidskeyval(bidsname, 'run', '', False) - run1_name = insert_bidskeyval(bidsname, 'run', '1', False) - runless_files = list(outfolder.glob(f"{runless_name}.*")) - run1_files = list(outfolder.glob(f"{run1_name}.*")) - - # Start incrementing from run-1 if we have already renamed runless to run-1 - if run1_files and runval == '<<>>': - bidsname = run1_name - - # Increment the run-index if the bidsfile already exists until that's no longer the case - while list(outfolder.glob(f"{bidsname}.*")): # The run already exists -> increment the run-index - runindex = get_bidsvalue(bidsname, 'run') or '1' # If run-less -> identify as existing run-1 - bidsname = insert_bidskeyval(bidsname, 'run', str(int(runindex) + 1), False) - - # Rename run-less to run-1 when dealing with a new run-2 - if runless_files and get_bidsvalue(bidsname, 'run') == '2': - - # Check if everything is OK - if runless_files and run1_files: - LOGGER.error(f"File already exists, cannot rename {outfolder/runless_name}.* -> {run1_name}.*") - return bidsname + bidsext - - # Rename run-less to run-1 - for runless_file in runless_files: - LOGGER.verbose(f"Found run-2 files for <<>> index, renaming\n{runless_file} -> {run1_name}") - run1_file = (outfolder/run1_name).with_suffix(''.join(runless_file.suffixes)) - runless_file.replace(run1_file) - if runless_file in targets: - targets.remove(runless_file) - targets.add(run1_file) - run1_scan = f"{run1_file.parent.name}/{run1_file.name}" # NB: as POSIX - runless_scan = f"{runless_file.parent.name}/{runless_file.name}" # NB: as POSIX - if scans_table is not None and runless_scan in scans_table.index: - scans_table.rename(index={runless_scan: run1_scan}, inplace=True) - - return bidsname + bidsext - - def check_runindices(session: Path) -> bool: """ Checks if the run-indices with the acquisition times stored in the scans.tsv file (NB: that means that scans in @@ -2140,7 +2350,7 @@ def limitmatches(fmap: str, matches: List[str], limits: str, niifiles: Set[str], niifiles.update(matches) -def addmetadata(bidsses: Path) -> None: +def addmetadata(bidsses: Path): """ Adds the special fieldmap metadata (IntendedFor, TE, etc.) @@ -2279,7 +2489,7 @@ def updatemetadata(datasource: DataSource, targetmeta: Path, usermeta: Meta, ext :param datasource: The data source from which dynamic values are read :param targetmeta: The filepath of the target data file with meta-data :param usermeta: A user metadata dict, e.g. the meta table from a run-item - :param extensions: A list of file extensions of the source metadata files, e.g. as specified in bidsmap['Options']['plugins']['plugin']['meta'] + :param extensions: A list of file extensions of the source metadata files, e.g. as specified in bidsmap.plugins['plugin']['meta'] :param sourcemeta: The filepath of the source data file with associated/equally named meta-data files (name may include wildcards). Leave empty to use datasource.path :return: The combined target + source + user metadata """ @@ -2431,7 +2641,7 @@ def addparticipant(participants_tsv: Path, subid: str='', sesid: str='', data: d return table, meta -def bidsprov(bidsfolder: Path, source: Path=Path(), runid: str='', datatype: str='unknown', targets: Iterable[Path]=()) -> pd.DataFrame: +def bidsprov(bidsfolder: Path, source: Path=Path(), runid: str='', datatype: Union[str, DataType]='unknown', targets: Iterable[Path]=()) -> pd.DataFrame: """ Save data transformation information in the bids/code/bidscoin folder (in the future this may be done in accordance with BEP028) @@ -2463,139 +2673,7 @@ def bidsprov(bidsfolder: Path, source: Path=Path(), runid: str='', datatype: str # Write the provenance data if source.name: LOGGER.bcdebug(f"Writing provenance data to: {provfile}") - provdata.loc[str(source)] = [runid, datatype, ', '.join([f"{target.parts[1]+':' if target.parts[0]=='derivatives' else ''}{target.name}" for target in targets])] + provdata.loc[str(source)] = [runid, str(datatype), ', '.join([f"{target.parts[1]+':' if target.parts[0]=='derivatives' else ''}{target.name}" for target in targets])] provdata.sort_index().to_csv(provfile, sep='\t') return provdata - - -def get_propertieshelp(propertieskey: str) -> str: - """ - Reads the description of a matching attributes key in the source dictionary - - :param propertieskey: The properties key for which the help text is obtained - :return: The obtained help text - """ - - # Return the description from the DICOM dictionary or a default text - if propertieskey == 'filepath': - return 'The path of the source file that is matched against the (regex) pattern' - if propertieskey == 'filename': - return 'The name of the source file that is matched against the (regex) pattern' - if propertieskey == 'filesize': - return 'The size of the source file that is matched against the (regex) pattern' - if propertieskey == 'nrfiles': - return 'The nr of similar files in the folder that matched against the properties (regex) patterns' - - return f"{propertieskey} is not a valid property-key" - - -def get_attributeshelp(attributeskey: str) -> str: - """ - Reads the description of a matching attributes key in the source dictionary - - TODO: implement PAR/REC support - - :param attributeskey: The attribute key for which the help text is obtained - :return: The obtained help text - """ - - if not attributeskey: - return 'Please provide a key-name' - - # Return the description from the DICOM dictionary or a default text - try: - return f"{attributeskey}\nThe DICOM '{datadict.dictionary_description(attributeskey)}' attribute" - - except ValueError: - return f"{attributeskey}\nAn unknown/private attribute" - - -def get_datatypehelp(datatype: str) -> str: - """ - Reads the description of the datatype in the schema/objects/datatypes.yaml file - - :param datatype: The datatype for which the help text is obtained - :return: The obtained help text - """ - - if not datatype: - return "Please provide a datatype" - - # Return the description for the datatype or a default text - if datatype in bidsdatatypesdef: - return f"{bidsdatatypesdef[datatype]['display_name']}\n{bidsdatatypesdef[datatype]['description']}" - - return f"{datatype}\nAn unknown/private datatype" - - -def get_suffixhelp(suffix: str, datatype: str) -> str: - """ - Reads the description of the suffix in the schema/objects/suffixes.yaml file - - :param suffix: The suffix for which the help text is obtained - :param datatype: The datatype of the suffix - :return: The obtained help text - """ - - if not suffix: - return "Please provide a suffix" - - isderivative = '' - if suffix in get_derivatives(datatype): - isderivative = '\nNB: This is a BIDS derivatives datatype' - - # Return the description for the suffix or a default text - if suffix in suffixes: - return f"{suffixes[suffix]['display_name']}\n{suffixes[suffix]['description']}{isderivative}" - - return f"{suffix}\nAn unknown/private suffix" - - -def get_entityhelp(entitykey: str) -> str: - """ - Reads the description of a matching entity=entitykey in the schema/entities.yaml file - - :param entitykey: The bids key for which the help text is obtained - :return: The obtained help text - """ - - if not entitykey: - return "Please provide a key-name" - - # Return the description from the entities or a default text - for entity in entities: - if entities[entity]['name'] == entitykey: - return f"{entities[entity]['display_name']}\n{entities[entity]['description']}" - - return f"{entitykey}\nAn unknown/private entity" - - -def get_metahelp(metakey: str) -> str: - """ - Reads the description of a matching schema/metadata/metakey.yaml file - - :param metakey: The meta key for which the help text is obtained - :return: The obtained help text - """ - - if not metakey: - return "Please provide a key-name" - - # Return the description from the metadata file or a default text - for field in metafields: - if metakey == metafields[field].get('name'): - description = metafields[field]['description'] - if metakey == 'IntendedFor': # IntendedFor is a special search-pattern field in BIDScoin - description += ('\nNB: These associated files can be dynamically searched for' - '\nduring bidscoiner runtime with glob-style matching patterns,' - '\n"such as <>" or <>' - '\n(see documentation)') - if metakey in ('B0FieldIdentifier', 'B0FieldSource'): # <> is a special dynamic value in BIDScoin - description += ('\nNB: The `<>` (sub)string will be replaced by the' - '\nsession label during bidscoiner runtime. In this way you can' - '\ncreate session-specific B0FieldIdentifier/Source tags (recommended)') - - return f"{metafields[field]['display_name']}\n{description}" - - return f"{metakey}\nAn unknown/private meta key" diff --git a/bidscoin/bidsapps/fixmeta.py b/bidscoin/bidsapps/fixmeta.py index b9759b18..a7c03c1d 100755 --- a/bidscoin/bidsapps/fixmeta.py +++ b/bidscoin/bidsapps/fixmeta.py @@ -53,10 +53,10 @@ def fixmeta(bidsfolder: str, pattern: str, metadata: dict, participant: list, bi LOGGER.info(f"Command: fixmeta {' '.join(sys.argv[1:])}") # Load the bidsmap data (-> plugins) - bidsmap, _ = bids.load_bidsmap(Path(bidsmap or 'bidsmap.yaml'), bidsdir/'code'/'bidscoin', checks=(False, False, False)) + bidsmap, _ = bids.BidsMap(Path(bidsmap or 'bidsmap.yaml'), bidsdir/'code'/'bidscoin', checks=(False, False, False)) if not bidsmap: - bidsmap, _ = bids.load_bidsmap(bidsmap_template, checks=(False, False, False)) - plugins = bidsmap['Options']['plugins'] + bidsmap, _ = bids.BidsMap(bidsmap_template, checks=(False, False, False)) + plugins = bidsmap.plugins provdata = bids.bidsprov(bidsdir) # Loop over the subject/session-directories @@ -83,7 +83,7 @@ def fixmeta(bidsfolder: str, pattern: str, metadata: dict, participant: list, bi if isinstance(row['targets'], str) and target.name in row['targets']: sourcedir = source datasource = bids.get_datasource(Path(sourcedir), plugins) - LOGGER.bcdebug(f"Datasource provenance: '{target.name}' -> '{datasource.path}'") + LOGGER.bcdebug(f"Datasource provenance: '{target.name}' -> '{datasource}'") # Load/copy over the source meta-data jsonfile = target.with_suffix('').with_suffix('.json') diff --git a/bidscoin/bidscoiner.py b/bidscoin/bidscoiner.py index 27abc59c..4bc47761 100755 --- a/bidscoin/bidscoiner.py +++ b/bidscoin/bidscoiner.py @@ -81,27 +81,25 @@ def bidscoiner(sourcefolder: str, bidsfolder: str, participant: list=(), force: # Create a README file if it does not exist readme_file = bidsfolder/'README' - if not readme_file.is_file(): + if not (readme_file.is_file() or next(bidsfolder.glob('README.*'))): LOGGER.info(f"Creating a template README file (adjust it to your needs): {readme_file}") try: urllib.request.urlretrieve('https://raw.githubusercontent.com/bids-standard/bids-starter-kit/main/templates/README.MD', readme_file) except urllib.error.URLError: - readme_file.write_text( - f"A free form text ( README ) describing the dataset in more details that SHOULD be provided. For an example, see e.g.:\n" - f"https://github.com/bids-standard/bids-starter-kit/blob/main/templates/README.MD\n\n" - f"The raw BIDS data was created using BIDScoin {__version__}\n" - f"All provenance information and settings can be found in ./code/bidscoin\n" - f"For more information see: https://github.com/Donders-Institute/bidscoin\n") + readme_file.write_text(f"A free form text ( README ) describing the dataset in more details that SHOULD be provided. For an example, see e.g.:\n" + f"https://github.com/bids-standard/bids-starter-kit/blob/main/templates/README.MD\n\n" + f"The raw BIDS data was created using BIDScoin {__version__}\n" + f"All provenance information and settings can be found in ./code/bidscoin\n" + f"For more information see: https://github.com/Donders-Institute/bidscoin\n") # Get the bidsmap heuristics from the bidsmap YAML-file - bidsmap, bidsmapfile = bids.load_bidsmap(bidsmapfile, bidscoinfolder) - dataformats = [dataformat for dataformat in bidsmap if dataformat and dataformat not in ('$schema','Options')] - if not bidsmap: + bidsmap = bids.BidsMap(bidsmapfile, bidscoinfolder) + if not bidsmap.filepath.is_file(): LOGGER.error(f"No bidsmap file found in {bidsfolder}. Please run the bidsmapper first and/or use the correct bidsfolder") return # Load the data conversion plugins - plugins = [bcoin.import_plugin(plugin, ('bidscoiner_plugin',)) for plugin,options in bidsmap['Options']['plugins'].items()] + plugins = [bcoin.import_plugin(plugin, ('bidscoiner_plugin',)) for plugin,options in bidsmap.plugins.items()] plugins = [plugin for plugin in plugins if plugin] # Filter the empty items from the list if not plugins: LOGGER.warning(f"The plugins listed in your bidsmap['Options'] did not have a usable `bidscoiner_plugin` function, nothing to do") @@ -110,7 +108,7 @@ def bidscoiner(sourcefolder: str, bidsfolder: str, participant: list=(), force: return # Append options to the .bidsignore file - bidsignore_items = bidsmap['Options']['bidscoin']['bidsignore'] + bidsignore_items = bidsmap.options['bidsignore'] bidsignore_file = bidsfolder/'.bidsignore' if bidsignore_items: LOGGER.verbose(f"Writing {bidsignore_items} entries to {bidsignore_file}") @@ -121,8 +119,8 @@ def bidscoiner(sourcefolder: str, bidsfolder: str, participant: list=(), force: bidsignore.write(item + '\n') # Get the list of subjects - subprefix = bidsmap['Options']['bidscoin']['subprefix'].replace('*','') - sesprefix = bidsmap['Options']['bidscoin']['sesprefix'].replace('*','') + subprefix = bidsmap.options['subprefix'].replace('*','') + sesprefix = bidsmap.options['sesprefix'].replace('*','') if not participant: subjects = lsdirs(rawfolder, (subprefix if subprefix!='*' else '') + '*') if not subjects: @@ -151,7 +149,7 @@ def bidscoiner(sourcefolder: str, bidsfolder: str, participant: list=(), force: for subject in subjects: # Check if we should skip the session-folder - datasource = bids.get_datasource(subject, bidsmap['Options']['plugins']) + datasource = bids.get_datasource(subject, bidsmap.plugins) subid,_ = datasource.subid_sesid(bidsmap[datasource.dataformat]['subject'], bidsmap[datasource.dataformat]['session']) if (bidsfolder/subid).is_dir() and not force: LOGGER.info(f">>> Skipping already processed subject: {bidsfolder/subid} (you can use the -f option to overrule)") @@ -257,11 +255,11 @@ def bidscoiner(sourcefolder: str, bidsfolder: str, participant: list=(), force: for session in sessions: # Unpack the data in a temporary folder if it is tarballed/zipped and/or contains a DICOMDIR file - sesfolders, unpacked = bids.unpack(session, bidsmap['Options']['bidscoin'].get('unzip','')) + sesfolders, unpacked = bids.unpack(session, bidsmap.options.get('unzip','')) for sesfolder in sesfolders: # Check if we should skip the session-folder - datasource = bids.get_datasource(sesfolder, bidsmap['Options']['plugins']) + datasource = bids.get_datasource(sesfolder, bidsmap.plugins) if not datasource.dataformat: LOGGER.info(f">>> No datasources found in '{sesfolder}'") continue diff --git a/bidscoin/bidseditor.py b/bidscoin/bidseditor.py index bf9617d9..8edb76ea 100755 --- a/bidscoin/bidseditor.py +++ b/bidscoin/bidseditor.py @@ -5,14 +5,13 @@ import logging import copy import webbrowser -import re import ast import json import csv import nibabel as nib from bids_validator import BIDSValidator from typing import Union, List, Dict -from pydicom import dcmread +from pydicom import dcmread, datadict from pathlib import Path from functools import partial from PyQt6 import QtCore, QtGui, QtWidgets @@ -24,8 +23,7 @@ if find_spec('bidscoin') is None: sys.path.append(str(Path(__file__).parents[1])) from bidscoin import bcoin, bids, bidsversion, check_version, trackusage, bidsmap_template, __version__ -from bidscoin.bids import Bidsmap, Plugin, Run, extensions - +from bidscoin.bids import extensions, BidsMap, RunItem, DataType ROW_HEIGHT = 22 BIDSCOIN_LOGO = Path(__file__).parent/'bidscoin_logo.png' @@ -78,7 +76,7 @@ class MainWindow(QMainWindow): - def __init__(self, bidsfolder: Path, input_bidsmap: Bidsmap, template_bidsmap: Bidsmap, datasaved: bool=False, reset: bool=False): + def __init__(self, bidsfolder: Path, input_bidsmap: BidsMap, template_bidsmap: BidsMap, datasaved: bool=False, reset: bool=False): # Set up the main window if not reset: @@ -86,28 +84,32 @@ def __init__(self, bidsfolder: Path, input_bidsmap: Bidsmap, template_bidsmap: B self.setWindowIcon(QtGui.QIcon(str(BIDSCOIN_ICON))) self.set_menu_statusbar() - if not input_bidsmap: + if not input_bidsmap.filepath.name: filename, _ = QFileDialog.getOpenFileName(self, 'Open a bidsmap file', str(bidsfolder), 'YAML Files (*.yaml *.yml);;All Files (*)') if filename: - input_bidsmap, _ = bids.load_bidsmap(Path(filename)) - if input_bidsmap.get('Options'): - template_bidsmap['Options'] = input_bidsmap['Options'] # Always use the options of the input bidsmap + input_bidsmap = BidsMap(Path(filename)) + if input_bidsmap.options: + template_bidsmap.options = input_bidsmap.options # Always use the options of the input bidsmap + template_bidsmap.plugins = input_bidsmap.plugins # Always use the plugins of the input bidsmap else: - input_bidsmap = {'Options': template_bidsmap['Options']} + input_bidsmap = copy.deepcopy(template_bidsmap) + for dataformat in input_bidsmap.dataformats: + for datatype in dataformat.datatypes: + datatype.delete_runs() # Keep track of the EditWindow status self.editwindow_opened: Union[str,None] = None # The provenance string of the run-item that is opened in the EditWindow # Set the input data self.bidsfolder: Path = Path(bidsfolder) # The folder where the bids data is / will be stored - self.input_bidsmap: Bidsmap = input_bidsmap # The original/unedited bidsmap - self.output_bidsmap: Bidsmap = copy.deepcopy(input_bidsmap) # The edited bidsmap - self.template_bidsmap: Bidsmap = template_bidsmap # The bidsmap from which new data type run-items are taken + self.input_bidsmap = input_bidsmap # The original/unedited bidsmap + self.output_bidsmap = copy.deepcopy(input_bidsmap) # The edited bidsmap + self.template_bidsmap = template_bidsmap # The bidsmap from which new data type run-items are taken self.datasaved: bool = datasaved # True if data has been saved on disk - self.dataformats: List[str] = [dataformat for dataformat in input_bidsmap if dataformat and dataformat not in ('$schema','Options') and bids.dir_bidsmap(input_bidsmap, dataformat)] - self.bidsignore: List[str] = input_bidsmap['Options']['bidscoin']['bidsignore'] - self.unknowndatatypes: List[str] = input_bidsmap['Options']['bidscoin']['unknowntypes'] - self.ignoredatatypes: List[str] = input_bidsmap['Options']['bidscoin']['ignoretypes'] + self.dataformats = [dataformat.dataformat for dataformat in input_bidsmap.dataformats if input_bidsmap.dir(dataformat)] + self.bidsignore: List[str] = input_bidsmap.options['bidsignore'] + self.unknowndatatypes: List[str] = input_bidsmap.options['unknowntypes'] + self.ignoredatatypes: List[str] = input_bidsmap.options['ignoretypes'] # Set up the tabs, add the tables and put the bidsmap data in them tabwidget = self.tabwidget = QtWidgets.QTabWidget() @@ -192,21 +194,20 @@ def show_contextmenu(self, pos): rowindex = [index.row() for index in table.selectedIndexes() if index.column() == colindex] if rowindex and colindex in (-1, 0, 4): # User clicked the index, the edit-button or elsewhere (i.e. not on an activated widget) return - runs = [] - subid = [] - sesid = [] + runs: List[RunItem] = [] + subids: List[str] = [] + sesids: List[str] = [] for index in rowindex: datatype = table.item(index, 2).text() provenance = table.item(index, 5).text() - runs.append(bids.find_run(self.output_bidsmap, provenance, dataformat, datatype)) - subid.append(bids.get_bidsvalue(table.item(index, 3).text(), 'sub')) - sesid.append(bids.get_bidsvalue(table.item(index, 3).text(), 'ses')) + runs.append(self.output_bidsmap.find_run(provenance, dataformat, datatype)) + subids.append(bids.get_bidsvalue(table.item(index, 3).text(), 'sub')) + sesids.append(bids.get_bidsvalue(table.item(index, 3).text(), 'ses')) # Get the datatypes for the dataformat(s) datatypes = set() - for dtype in self.template_bidsmap[dataformat]: - if dtype not in ('subject', 'session'): - datatypes.add(dtype) + for dtype in self.template_bidsmap.dataformat(dataformat).datatypes: + datatypes.add(dtype.datatype) datatypes = sorted(datatypes) # Pop-up the context-menu @@ -229,22 +230,22 @@ def show_contextmenu(self, pos): if filenames: datatype, ok = QInputDialog.getItem(self, 'Select the data type of the run-item(s)', 'datatype', datatypes, editable=False) if datatype and ok: - datasource = bids.DataSource() + datasource = bids.DataSource(dataformat=dataformat) for filename in filenames: - datasource = bids.DataSource(filename, self.output_bidsmap['Options']['plugins'], dataformat, datatype) - if datasource.is_datasource: - run = bids.get_run(self.template_bidsmap, datatype, 0, datasource) - run['properties']['filepath'] = datasource.properties('filepath') # Make the added run a strict match (i.e. an exception) - run['properties']['filename'] = datasource.properties('filename') # Make the added run a strict match (i.e. an exception) + datasource = bids.DataSource(filename, self.output_bidsmap.plugins, dataformat, self.output_bidsmap.options) + if datasource.has_plugin(): + run = self.template_bidsmap.get_run(datatype, 0, datasource) + run.properties['filepath'] = datasource.properties('filepath') # Make the added run a strict match (i.e. an exception) + run.properties['filename'] = datasource.properties('filename') # Make the added run a strict match (i.e. an exception) LOGGER.verbose(f"Expert usage: User adds run-item {dataformat}[{datatype}]: {filename}") - if Path(filename) in bids.dir_bidsmap(self.output_bidsmap, dataformat): + if Path(filename) in self.output_bidsmap.dir(dataformat): LOGGER.warning(f"Added run-item {dataformat}[{datatype}]: {filename} already exists") - bids.insert_run(self.output_bidsmap, run, 0) # Put the run at the front (so it gets matching priority) + self.output_bidsmap.insert_run(run, 0) # Put the run at the front (so it gets matching priority) if dataformat not in self.ordered_file_index: self.ordered_file_index[dataformat] = {datasource.path: 0} else: self.ordered_file_index[dataformat][datasource.path] = max(self.ordered_file_index[dataformat][fname] for fname in self.ordered_file_index[dataformat]) + 1 - if datasource.is_datasource: + if datasource.has_plugin(): self.update_subses_samples(self.output_bidsmap, dataformat) elif action == delete: @@ -258,12 +259,13 @@ def show_contextmenu(self, pos): QMessageBox.StandardButton.Yes | QMessageBox.StandardButton.Cancel, QMessageBox.StandardButton.Cancel) if answer != QMessageBox.StandardButton.Yes: continue LOGGER.verbose(f"Expert usage: User removes run-item {dataformat}[{datatype}]: {provenance}") - deleted = bids.delete_run(self.output_bidsmap, bids.find_run(self.output_bidsmap, provenance, dataformat, datatype)) + self.output_bidsmap.delete_run(self.output_bidsmap.find_run(provenance, dataformat, datatype)) + deleted = True if deleted: self.update_subses_samples(self.output_bidsmap, dataformat) elif action == compare: - CompareWindow(runs, subid, sesid) + CompareWindow(runs, subids, sesids) elif action == edit: if len(rowindex) == 1: @@ -284,14 +286,14 @@ def show_contextmenu(self, pos): continue # Get the new run from the template - oldrun = bids.find_run(self.output_bidsmap, provenance, dataformat, datatype) - newrun = bids.get_run(self.template_bidsmap, newdatatype, 0, oldrun['datasource']) + oldrun = self.output_bidsmap.find_run(provenance, dataformat, datatype) + newrun = self.template_bidsmap.get_run(newdatatype, 0, oldrun.datasource) if not newrun: QMessageBox.warning(self, 'Edit BIDS mapping', f"Cannot find the '{newdatatype}' data type in your template") continue # Insert the new run in our output bidsmap - bids.update_bidsmap(self.output_bidsmap, datatype, newrun) + self.output_bidsmap.update(datatype, newrun) LOGGER.verbose(f"User sets run-item {dataformat}[{datatype} -> {newdatatype}]: {provenance}") self.update_subses_samples(self.output_bidsmap, dataformat) @@ -422,7 +424,7 @@ def set_tab_options(self): """Set the options tab""" # Create the bidscoin table - bidscoin_options = self.output_bidsmap['Options']['bidscoin'] + bidscoin_options = self.output_bidsmap.options self.options_label['bidscoin'] = bidscoin_label = QLabel('BIDScoin') bidscoin_label.setToolTip(TOOLTIP_BIDSCOIN) self.options_table['bidscoin'] = bidscoin_table = MyQTableWidget() @@ -449,7 +451,7 @@ def set_tab_options(self): layout.addWidget(bidscoin_table) # Add the plugin tables - for plugin, options in self.output_bidsmap['Options']['plugins'].items(): + for plugin, options in self.output_bidsmap.plugins.items(): plugin_label, plugin_table = self.plugin_table(plugin, options) layout.addWidget(plugin_label) layout.addWidget(plugin_table) @@ -502,7 +504,7 @@ def set_tab_filebrowser(self): self.tabwidget.addTab(tab, 'Data browser') - def update_subses_samples(self, output_bidsmap: Bidsmap, dataformat: str): + def update_subses_samples(self, output_bidsmap: BidsMap, dataformat: str): """(Re)populates the sample list with bidsnames according to the bidsmap""" self.datachanged = True @@ -510,14 +512,14 @@ def update_subses_samples(self, output_bidsmap: Bidsmap, dataformat: str): # Update the subject/session table subitem = MyWidgetItem('subject', iseditable=False) - subitem.setToolTip(bids.get_entityhelp('sub')) + subitem.setToolTip(get_entityhelp('sub')) sesitem = MyWidgetItem('session', iseditable=False) - sesitem.setToolTip(bids.get_entityhelp('ses')) + sesitem.setToolTip(get_entityhelp('ses')) subses_table = self.subses_table[dataformat] subses_table.setItem(0, 0, subitem) subses_table.setItem(1, 0, sesitem) - subses_table.setItem(0, 1, MyWidgetItem(output_bidsmap[dataformat]['subject'])) - subses_table.setItem(1, 1, MyWidgetItem(output_bidsmap[dataformat]['session'])) + subses_table.setItem(0, 1, MyWidgetItem(output_bidsmap.dataformat(dataformat).subject)) + subses_table.setItem(1, 1, MyWidgetItem(output_bidsmap.dataformat(dataformat).session)) # Update the run samples table idx = 0 @@ -527,20 +529,20 @@ def update_subses_samples(self, output_bidsmap: Bidsmap, dataformat: str): samples_table.setRowCount(num_files) samples_table.setSortingEnabled(False) samples_table.clearContents() - for datatype, runs in output_bidsmap[dataformat].items(): - if not isinstance(runs, list): continue # E.g. datatype = 'subject' or 'session' - for run in runs: + for datatype in output_bidsmap.dataformat(dataformat).datatypes: + for run in datatype.runitems: # Check the run and get some data - validrun = all(bids.check_run(datatype, run, checks=(False, False, False))[1:3]) - provenance = Path(run['provenance']) - subid = output_bidsmap[dataformat]['subject'] - sesid = output_bidsmap[dataformat]['session'] - subid, sesid = run['datasource'].subid_sesid(subid, sesid or '') - bidsname = bids.get_bidsname(subid, sesid, run, not bids.check_ignore(datatype,self.bidsignore) and datatype not in self.ignoredatatypes) + dtype = datatype.datatype + validrun = all(run.check(dtype, checks=(False, False, False))[1:3]) + provenance = Path(run.provenance) + subid = output_bidsmap.dataformat(dataformat).subject + sesid = output_bidsmap.dataformat(dataformat).session + subid, sesid = run.datasource.subid_sesid(subid, sesid or '') + bidsname = run.bidsname(subid, sesid, not bids.check_ignore(datatype,self.bidsignore) and dtype not in self.ignoredatatypes) ignore = bids.check_ignore(datatype, self.bidsignore) or bids.check_ignore(bidsname+'.json', self.bidsignore, 'file') - exceptions = self.output_bidsmap['Options']['bidscoin']['notderivative'] - if run['datasource'].dynamicvalue(run['bids']['suffix'], True, True) in bids.get_derivatives(datatype, exceptions): + exceptions = self.output_bidsmap.options['notderivative'] + if run.datasource.dynamicvalue(run.bids['suffix'], True, True) in bids.get_derivatives(dtype, exceptions): session = self.bidsfolder/'derivatives'/'[manufacturer]'/subid/sesid else: session = self.bidsfolder/subid/sesid @@ -548,8 +550,8 @@ def update_subses_samples(self, output_bidsmap: Bidsmap, dataformat: str): samples_table.setItem(idx, 0, MyWidgetItem(f"{row_index+1:03d}", iseditable=False)) samples_table.setItem(idx, 1, MyWidgetItem(provenance.name)) - samples_table.setItem(idx, 2, MyWidgetItem(datatype)) # Hidden column - samples_table.setItem(idx, 3, MyWidgetItem(Path(datatype)/(bidsname + '.*'))) + samples_table.setItem(idx, 2, MyWidgetItem(dtype)) # Hidden column + samples_table.setItem(idx, 3, MyWidgetItem(Path(dtype)/(bidsname + '.*'))) samples_table.setItem(idx, 5, MyWidgetItem(provenance)) # Hidden column samples_table.item(idx, 0).setFlags(QtCore.Qt.ItemFlag.NoItemFlags) @@ -557,28 +559,28 @@ def update_subses_samples(self, output_bidsmap: Bidsmap, dataformat: str): samples_table.item(idx, 2).setFlags(QtCore.Qt.ItemFlag.ItemIsEnabled) samples_table.item(idx, 1).setToolTip('Double-click to inspect the header information') samples_table.item(idx, 1).setStatusTip(str(provenance.parent) + str(Path('/'))) - if datatype not in self.ignoredatatypes: + if dtype not in self.ignoredatatypes: samples_table.item(idx, 3).setStatusTip(str(session) + str(Path('/'))) if samples_table.item(idx, 3): if ignore: samples_table.item(idx, 3).setForeground(QtGui.QColor('darkorange')) samples_table.item(idx, 3).setToolTip(f"Orange: This {datatype} item is ignored by BIDS-apps and BIDS-validators") - elif datatype in self.ignoredatatypes: + elif dtype in self.ignoredatatypes: samples_table.item(idx, 1).setForeground(QtGui.QColor('gray')) samples_table.item(idx, 3).setForeground(QtGui.QColor('gray')) f = samples_table.item(idx, 3).font() f.setStrikeOut(True) samples_table.item(idx, 3).setFont(f) samples_table.item(idx, 3).setToolTip('Gray/Strike-out: This imaging data type will be ignored and not converted BIDS') - elif not validrun or datatype in self.unknowndatatypes: + elif not validrun or dtype in self.unknowndatatypes: samples_table.item(idx, 3).setForeground(QtGui.QColor('red')) samples_table.item(idx, 3).setToolTip(f"Red: This {datatype} item is not BIDS-valid but will still be converted. You should edit this item or make sure it is in your bidsignore list ([Options] tab)") else: samples_table.item(idx, 3).setForeground(QtGui.QColor('green')) samples_table.item(idx, 3).setToolTip(f"Green: This '{datatype}' data type is part of BIDS") - if validrun or ignore or datatype in self.ignoredatatypes: + if validrun or ignore or dtype in self.ignoredatatypes: edit_button = QPushButton('Edit') edit_button.setToolTip('Click to see more details and edit the BIDS output name') else: @@ -601,7 +603,7 @@ def update_subses_samples(self, output_bidsmap: Bidsmap, dataformat: str): def set_ordered_file_index(self, dataformat: str) -> int: """Sets the mapping between the ordered provenance and an increasing file-index""" - provenances = bids.dir_bidsmap(self.output_bidsmap, dataformat) + provenances = self.output_bidsmap.dir(dataformat) if len(provenances) > len(self.ordered_file_index.get(dataformat,[])): ordered_index = {} for file_index, file_name in enumerate(provenances): @@ -618,14 +620,14 @@ def subsescell2bidsmap(self, rowindex: int, colindex: int): if colindex == 1 and dataformat in self.dataformats: key = self.subses_table[dataformat].item(rowindex, 0).text().strip() value = self.subses_table[dataformat].item(rowindex, 1).text().strip() - oldvalue = self.output_bidsmap[dataformat][key] + oldvalue = getattr(self.output_bidsmap.dataformat(dataformat), key) if oldvalue is None: oldvalue = '' # Only if cell content was changed, update if key and value != oldvalue: LOGGER.verbose(f"User sets {dataformat}['{key}'] from '{oldvalue}' to '{value}'") - self.output_bidsmap[dataformat][key] = value + setattr(self.output_bidsmap.dataformat(dataformat), key, value) self.update_subses_samples(self.output_bidsmap, dataformat) def open_editwindow(self, provenance: Path=Path(), datatype: str=''): @@ -645,8 +647,8 @@ def open_editwindow(self, provenance: Path=Path(), datatype: str=''): # Check for open edit window, find the right data type index and open the edit window if not self.editwindow_opened: # Find the source index of the run in the list of runs (using the provenance) and open the edit window - for run in self.output_bidsmap[dataformat][datatype]: - if Path(run['provenance']) == Path(provenance): + for run in self.output_bidsmap.dataformat(dataformat).datatype(datatype).runitems: + if Path(run.provenance) == Path(provenance): LOGGER.verbose(f'User is editing {provenance}') self.editwindow = EditWindow(run, self.output_bidsmap, self.template_bidsmap) self.editwindow_opened = str(provenance) @@ -667,7 +669,7 @@ def release_editwindow(self): """Allow a new edit window to be opened""" self.editwindow_opened = None - def plugin_table(self, name: str, plugin: Plugin) -> tuple: + def plugin_table(self, name: str, plugin: dict) -> tuple: """:return: a plugin-label and a filled plugin-table""" self.options_label[name] = plugin_label = QLabel(f"{name} - plugin") @@ -703,9 +705,9 @@ def options2bidsmap(self, rowindex: int, colindex: int): for plugin,table in self.options_table.items(): if plugin == 'bidscoin': - oldoptions = self.output_bidsmap['Options']['bidscoin'] + oldoptions = self.output_bidsmap.options else: - oldoptions = self.output_bidsmap['Options']['plugins'].get(plugin,{}) + oldoptions = self.output_bidsmap.plugins.get(plugin,{}) newoptions = {} for rownr in range(table.rowCount()): keyitem = table.item(rownr, 0) @@ -722,14 +724,14 @@ def options2bidsmap(self, rowindex: int, colindex: int): LOGGER.verbose(f"User sets the '{plugin}' option from '{key}: {oldoptions.get(key)}' to '{key}: {val}'") self.datachanged = True if plugin == 'bidscoin': - self.output_bidsmap['Options']['bidscoin'] = newoptions + self.output_bidsmap.options = newoptions self.unknowndatatypes = newoptions.get('unknowntypes', []) self.ignoredatatypes = newoptions.get('ignoretypes', []) self.bidsignore = newoptions.get('bidsignore', []) for dataformat in self.dataformats: self.update_subses_samples(self.output_bidsmap, dataformat) else: - self.output_bidsmap['Options']['plugins'][plugin] = newoptions + self.output_bidsmap.plugins[plugin] = newoptions # Add an extra row if the table is full if rowindex + 1 == table.rowCount() and table.currentItem() and table.currentItem().text().strip(): @@ -769,20 +771,18 @@ def add_plugin(self): # Check the selected plugin and get its options plugin = dropdown.currentText() - if plugin in self.output_bidsmap['Options']['plugins']: + if plugin in self.output_bidsmap.plugins: LOGGER.error(f"Cannot add the '{plugin}' plugin as it already exists in the bidsmap") return module = bcoin.import_plugin(plugin) - options = self.input_bidsmap[ 'Options']['plugins'].get(plugin, - self.template_bidsmap['Options']['plugins'].get(plugin, - module.OPTIONS if 'OPTIONS' in dir(module) else {})) + options = self.input_bidsmap.plugins.get(plugin, self.template_bidsmap.plugins.get(plugin, module.OPTIONS if 'OPTIONS' in dir(module) else {})) # Insert the selected plugin in the options_layout LOGGER.info(f"Adding the '{plugin}' plugin to bidsmap") plugin_label, plugin_table = self.plugin_table(plugin, options) self.options_layout.insertWidget(self.options_layout.count()-3, plugin_label) self.options_layout.insertWidget(self.options_layout.count()-3, plugin_table) - self.output_bidsmap['Options']['plugins'][plugin] = options + self.output_bidsmap.plugins[plugin] = options self.datachanged = True # Notify the user that the bidsmapper need to be re-run @@ -791,7 +791,7 @@ def add_plugin(self): def del_plugin(self, plugin: str): """Removes the plugin table from the Options-tab and the data from the bidsmap""" - LOGGER.info(f"Removing the '{plugin}' from bidsmap['Options']['plugins']") + LOGGER.info(f"Removing the '{plugin}' from bidsmap.plugins") plugin_label = self.options_label[plugin] plugin_table = self.options_table[plugin] self.options_layout.removeWidget(plugin_label) @@ -800,13 +800,13 @@ def del_plugin(self, plugin: str): plugin_table.deleteLater() self.options_label.pop(plugin, None) self.options_table.pop(plugin, None) - self.output_bidsmap['Options']['plugins'].pop(plugin, None) + self.output_bidsmap.plugins.pop(plugin, None) self.datachanged = True def test_plugin(self, plugin: str): """Test the plugin and show the result in a pop-up window""" - status = bcoin.test_plugin(Path(plugin), self.output_bidsmap['Options']['plugins'].get(plugin,{})) + status = bcoin.test_plugin(Path(plugin), self.output_bidsmap.plugins.get(plugin,{})) if not status or (status==3 and plugin=='dcm2niix2bids'): QMessageBox.information(self, 'Plugin test', f"Import of {plugin}: Passed\nSee terminal output for more info") else: @@ -815,7 +815,7 @@ def test_plugin(self, plugin: str): def test_bidscoin(self): """Test the bidsmap tool and show the result in a pop-up window""" - if not bcoin.test_bidscoin(self.input_bidsmap, options=self.output_bidsmap['Options'], testplugins=False, testgui=False, testtemplate=False): + if not bcoin.test_bidscoin(self.input_bidsmap, options=self.output_bidsmap.options, testplugins=False, testgui=False, testtemplate=False): QMessageBox.information(self, 'Tool test', f"BIDScoin test: Passed\nSee terminal output for more info") else: QMessageBox.warning(self, 'Tool test', f"BIDScoin test: Failed\nSee terminal output for more info") @@ -824,9 +824,9 @@ def validate_runs(self): """Test the runs in the study bidsmap""" LOGGER.info(' ') - bids.check_bidsmap(self.output_bidsmap) + self.output_bidsmap.check() LOGGER.info(' ') - bids.validate_bidsmap(self.output_bidsmap, 2) + self.output_bidsmap.validate(2) def reset(self): """Reset button: reset the window with the original input BIDS map""" @@ -862,21 +862,20 @@ def open_bidsmap(self): filename, _ = QFileDialog.getOpenFileName(self, 'Open File', str(self.bidsfolder/'code'/'bidscoin'/'bidsmap.yaml'), 'YAML Files (*.yaml *.yml);;All Files (*)') if filename: QtCore.QCoreApplication.setApplicationName(f"{filename} - BIDS editor {__version__}") - self.input_bidsmap, _ = bids.load_bidsmap(Path(filename)) + self.input_bidsmap = BidsMap(Path(filename)) self.reset() def save_bidsmap(self): """Check and save the bidsmap to file""" for dataformat in self.dataformats: - if self.output_bidsmap[dataformat].get('fmap'): - for run in self.output_bidsmap[dataformat]['fmap']: - if not (run['meta'].get('B0FieldSource') or run['meta'].get('B0FieldIdentifier') or run['meta'].get('IntendedFor')): - LOGGER.warning(f"B0FieldIdentifier/IntendedFor fieldmap value is empty for {dataformat} run-item: {run['provenance']}") + for run in self.output_bidsmap.dataformat(dataformat).datatype('fmap').runitems: + if not (run.meta.get('B0FieldSource') or run.meta.get('B0FieldIdentifier') or run.meta.get('IntendedFor')): + LOGGER.warning(f"B0FieldIdentifier/IntendedFor fieldmap value is empty for {dataformat} run-item: {run}") filename,_ = QFileDialog.getSaveFileName(self, 'Save File', str(self.bidsfolder/'code'/'bidscoin'/'bidsmap.yaml'), 'YAML Files (*.yaml *.yml);;All Files (*)') if filename: - bids.save_bidsmap(Path(filename), self.output_bidsmap) + self.output_bidsmap.save(Path(filename)) QtCore.QCoreApplication.setApplicationName(f"{filename} - BIDS editor {__version__}") self.datasaved = True self.datachanged = False @@ -887,10 +886,11 @@ def save_options(self): yamlfile, _ = QFileDialog.getOpenFileName(self, 'Select the (default) template bidsmap to save the options in', str(bidsmap_template), 'YAML Files (*.yaml *.yml);;All Files (*)') if yamlfile: - LOGGER.info(f"Saving bidsmap['Options'] in: {yamlfile}") + LOGGER.info(f"Saving bidsmap options in: {yamlfile}") with open(yamlfile, 'r') as stream: bidsmap = bids.yaml.load(stream) - bidsmap['Options'] = self.output_bidsmap['Options'] + bidsmap.options = self.output_bidsmap.options + bidsmap.plugins = self.output_bidsmap.plugins with open(yamlfile, 'w') as stream: bids.yaml.dump(bidsmap, stream) @@ -949,28 +949,39 @@ class EditWindow(QDialog): """ # Emit the new bidsmap when done (see docstring) - done_edit = QtCore.pyqtSignal(dict, str) + done_edit = QtCore.pyqtSignal(BidsMap, str) - def __init__(self, run: Run, bidsmap: Bidsmap, template_bidsmap: Bidsmap): + def __init__(self, run: RunItem, bidsmap: BidsMap, template_bidsmap: BidsMap): super().__init__() + # Set the data - datasource: bids.DataSource = run['datasource'] - self.datasource = datasource - self.dataformat: str = datasource.dataformat # The data format of the run-item being edited (bidsmap[dataformat][datatype][run-item]) - self.source_datatype: str = datasource.datatype # The BIDS data type of the original run-item - self.target_datatype: str = datasource.datatype # The BIDS data type that the edited run-item is being changed into - self.current_datatype: str = datasource.datatype # The BIDS datatype of the run-item just before it is being changed (again) - self.unknowndatatypes: List[str] = [datatype for datatype in bidsmap['Options']['bidscoin']['unknowntypes'] if datatype in template_bidsmap[self.dataformat]] - self.ignoredatatypes: List[str] = [datatype for datatype in bidsmap['Options']['bidscoin']['ignoretypes'] if datatype in template_bidsmap[self.dataformat]] - self.bidsdatatypes: List[str] = [datatype for datatype in template_bidsmap[self.dataformat] if datatype not in self.unknowndatatypes + self.ignoredatatypes + ['subject', 'session']] - self.bidsignore: List[str] = bidsmap['Options']['bidscoin']['bidsignore'] - self.source_bidsmap: Bidsmap = bidsmap # The bidsmap at the start of the edit = output_bidsmap in the MainWindow - self.target_bidsmap: Bidsmap = copy.deepcopy(bidsmap) # The edited bidsmap -> will be returned as output_bidsmap in the MainWindow - self.template_bidsmap: Bidsmap = template_bidsmap # The bidsmap from which new datatype run-items are taken - self.source_run: Run = run # The original run-item from the source bidsmap - self.target_run: Run = copy.deepcopy(run) # The edited run-item that is inserted in the target_bidsmap - self.get_allowed_suffixes() # Set the possible suffixes the user can select for a given datatype - self.subid, self.sesid = datasource.subid_sesid(bidsmap[self.dataformat]['subject'], bidsmap[self.dataformat]['session'] or '') + datasource = run.datasource + self.datasource = datasource + self.dataformat = run.dataformat + """The data format of the run-item being edited (bidsmap[dataformat][datatype][run-item])""" + self.source_datatype = run.datatype + """The BIDS data type of the original run-item""" + self.target_datatype = run.datatype + """The BIDS data type that the edited run-item is being changed into""" + self.current_datatype = run.datatype + """The BIDS datatype of the run-item just before it is being changed (again)""" + self.unknowndatatypes: List[str] = [datatype for datatype in bidsmap.options['unknowntypes'] if datatype in template_bidsmap.dataformat(self.dataformat).datatypes] + self.ignoredatatypes: List[str] = [datatype for datatype in bidsmap.options['ignoretypes'] if datatype in template_bidsmap.dataformat(self.dataformat).datatypes] + self.bidsdatatypes = [str(datatype) for datatype in template_bidsmap.dataformat(self.dataformat).datatypes if datatype not in self.unknowndatatypes + self.ignoredatatypes + ['subject', 'session']] + self.bidsignore = bidsmap.options['bidsignore'] + self.source_bidsmap = bidsmap + """The bidsmap at the start of the edit = output_bidsmap in the MainWindow""" + self.target_bidsmap = copy.deepcopy(bidsmap) + """The edited bidsmap -> will be returned as output_bidsmap in the MainWindow""" + self.template_bidsmap = template_bidsmap + """The bidsmap from which new datatype run-items are taken""" + self.source_run = run + """The original run-item from the source bidsmap""" + self.target_run = copy.deepcopy(run) + """The edited run-item that is inserted in the target_bidsmap""" + self.allowed_suffixes = self.get_allowed_suffixes() + """Set the possible suffixes the user can select for a given datatype""" + self.subid, self.sesid = datasource.subid_sesid(bidsmap.dataformat(run.dataformat).subject, bidsmap.dataformat(run.dataformat).session or '') # Set up the window self.setWindowIcon(QtGui.QIcon(str(BIDSCOIN_ICON))) @@ -1005,7 +1016,7 @@ def __init__(self, run: Run, bidsmap: Bidsmap, template_bidsmap: Bidsmap): self.datatype_dropdown.currentIndexChanged.connect(self.datatype_dropdown_change) self.datatype_dropdown.setToolTip('The BIDS data type. First make sure this one is correct, then choose the right suffix') for n, datatype in enumerate(self.bidsdatatypes + self.unknowndatatypes): - self.datatype_dropdown.setItemData(n, bids.get_datatypehelp(datatype), QtCore.Qt.ItemDataRole.ToolTipRole) + self.datatype_dropdown.setItemData(n, get_datatypehelp(datatype), QtCore.Qt.ItemDataRole.ToolTipRole) # Set up the BIDS table self.bids_label = QLabel('Entities') @@ -1093,7 +1104,7 @@ def __init__(self, run: Run, bidsmap: Bidsmap, template_bidsmap: Bidsmap): def reject(self, confirm=True): """Ask if the user really wants to close the window""" - if confirm and re.sub('<(?!.*<).*? object at .*?>','',str(self.target_run)) != re.sub('<(?!.*<).*? object at .*?>','',str(self.source_run)): # Ignore the memory address of the datasource object + if confirm and self.target_run != self.source_run: self.raise_() answer = QMessageBox.question(self, 'Edit BIDS mapping', 'Closing window, do you want to save the changes you made?', QMessageBox.StandardButton.Yes | QMessageBox.StandardButton.No | QMessageBox.StandardButton.Cancel, QMessageBox.StandardButton.Yes) @@ -1123,7 +1134,7 @@ def show_contextmenu(self, pos): if action == import_data: # Read all the meta-data from the table and store it in the target_run - metafile, _ = QFileDialog.getOpenFileName(self, 'Import meta data from file', str(self.source_run['provenance']), + metafile, _ = QFileDialog.getOpenFileName(self, 'Import meta data from file', str(self.source_run.provenance), 'JSON/YAML/CSV/TSV Files (*.json *.yaml *.yml *.txt *.csv *.tsv);;All Files (*)') if metafile: @@ -1151,28 +1162,28 @@ def show_contextmenu(self, pos): return # Write all the meta-data to the target_run - self.target_run['meta'].update(metadata) + self.target_run.meta.update(metadata) # Refresh the meta-table using the target_run _, _, _, data_meta = self.run2data() self.fill_table(self.meta_table, data_meta) elif action == clear_table: - self.target_run['meta'] = {} + self.target_run.meta = {} self.fill_table(self.meta_table, []) - def get_allowed_suffixes(self): + def get_allowed_suffixes(self) -> Dict[str, set]: """Derive the possible suffixes for each datatype from the template. """ allowed_suffixes = {} for datatype in self.bidsdatatypes + self.unknowndatatypes + self.ignoredatatypes: allowed_suffixes[datatype] = set() - for run in self.template_bidsmap[self.dataformat].get(datatype, []): - suffix = self.datasource.dynamicvalue(run['bids']['suffix'], True) + for run in self.template_bidsmap.dataformat(self.dataformat).datatype(datatype).runitems: + suffix = self.datasource.dynamicvalue(run.bids['suffix'], True) if suffix: - allowed_suffixes[datatype].add(suffix) + allowed_suffixes[str(datatype)].add(suffix) - self.allowed_suffixes: Dict[str, set] = allowed_suffixes + return allowed_suffixes def run2data(self) -> tuple: """Derive the tabular data from the target_run, needed to render the edit window @@ -1184,33 +1195,33 @@ def run2data(self) -> tuple: filename = self.datasource.properties('filename') filesize = self.datasource.properties('filesize') nrfiles = self.datasource.properties('nrfiles') - data_properties = [[{'value': 'filepath', 'iseditable': False}, - {'value': run['properties'].get('filepath'), 'iseditable': True}, - {'value': filepath, 'iseditable': False}], - [{'value': 'filename', 'iseditable': False}, - {'value': run['properties'].get('filename'), 'iseditable': True}, - {'value': filename, 'iseditable': False}], - [{'value': 'filesize', 'iseditable': False}, - {'value': run['properties'].get('filesize'), 'iseditable': True}, - {'value': filesize, 'iseditable': False}], - [{'value': 'nrfiles', 'iseditable': False}, - {'value': run['properties'].get('nrfiles'), 'iseditable': True}, - {'value': nrfiles, 'iseditable': False}]] + data_properties = [[{'value': 'filepath', 'iseditable': False}, + {'value': run.properties.get('filepath'), 'iseditable': True}, + {'value': filepath, 'iseditable': False}], + [{'value': 'filename', 'iseditable': False}, + {'value': run.properties.get('filename'), 'iseditable': True}, + {'value': filename, 'iseditable': False}], + [{'value': 'filesize', 'iseditable': False}, + {'value': run.properties.get('filesize'), 'iseditable': True}, + {'value': filesize, 'iseditable': False}], + [{'value': 'nrfiles', 'iseditable': False}, + {'value': run.properties.get('nrfiles'), 'iseditable': True}, + {'value': nrfiles, 'iseditable': False}]] data_attributes = [] - for key, value in run['attributes'].items(): + for key, value in run.attributes.items(): data_attributes.append([{'value': key, 'iseditable': False}, {'value': value, 'iseditable': True}]) data_bids = [] - bidsname = bids.get_bidsname(self.subid, self.sesid, run, False) + '.json' + bidsname = run.bidsname(self.subid, self.sesid, False) + '.json' if bids.check_ignore(self.target_datatype, self.bidsignore) or bids.check_ignore(bidsname, self.bidsignore, 'file') or self.target_datatype in self.ignoredatatypes: - bidskeys = run['bids'].keys() + bidskeys = run.bids.keys() else: bidskeys = [bids.entities[entity]['name'] for entity in bids.entitiesorder if entity not in ('subject','session')] + ['suffix'] # Impose the BIDS-specified order + suffix for key in bidskeys: - if key in run['bids']: - value = run['bids'].get(key) + if key in run.bids: + value = run.bids.get(key) if (self.target_datatype in self.bidsdatatypes and key=='suffix') or isinstance(value, list): iseditable = False else: @@ -1219,7 +1230,7 @@ def run2data(self) -> tuple: {'value': value, 'iseditable': iseditable}]) # NB: This can be a (menu) list data_meta = [] - for key, value in run['meta'].items(): + for key, value in run.meta.items(): data_meta.append([{'value': key, 'iseditable': True}, {'value': value, 'iseditable': True}]) @@ -1249,33 +1260,33 @@ def fill_table(self, table: QTableWidget, data: list): table.clearContents() addrow = [] if table.objectName() == 'meta': - addrow = [[{'value':'', 'iseditable': True}, {'value':'', 'iseditable': True}]] + addrow = [[{'value': '', 'iseditable': True}, {'value': '', 'iseditable': True}]] table.setRowCount(len(data + addrow)) for i, row in enumerate(data + addrow): key = row[0]['value'] - if table.objectName()=='bids' and key=='suffix' and self.target_datatype in self.bidsdatatypes: + if table.objectName() == 'bids' and key == 'suffix' and self.target_datatype in self.bidsdatatypes: table.setItem(i, 0, MyWidgetItem('suffix', iseditable=False)) - suffix = self.datasource.dynamicvalue(self.target_run['bids'].get('suffix','')) - suffixes = sorted(self.allowed_suffixes.get(self.target_datatype, ['']), key=str.casefold) + suffix = self.datasource.dynamicvalue(self.target_run.bids.get('suffix','')) + suffixes = sorted(self.allowed_suffixes.get(self.target_datatype, set()), key=str.casefold) suffix_dropdown = self.suffix_dropdown = QComboBox() suffix_dropdown.addItems(suffixes) suffix_dropdown.setCurrentIndex(suffix_dropdown.findText(suffix)) suffix_dropdown.currentIndexChanged.connect(self.suffix_dropdown_change) suffix_dropdown.setToolTip('The suffix that sets the different run types apart. First make sure the "Data type" dropdown-menu is set correctly before choosing the right suffix here') for n, suffix in enumerate(suffixes): - suffix_dropdown.setItemData(n, bids.get_suffixhelp(suffix, self.target_datatype), QtCore.Qt.ItemDataRole.ToolTipRole) + suffix_dropdown.setItemData(n, get_suffixhelp(suffix, self.target_datatype), QtCore.Qt.ItemDataRole.ToolTipRole) table.setCellWidget(i, 1, self.spacedwidget(suffix_dropdown)) continue for j, item in enumerate(row): value = item.get('value') - if table.objectName()=='bids' and isinstance(value, list): + if table.objectName() == 'bids' and isinstance(value, list): value_dropdown = QComboBox() value_dropdown.addItems(value[0:-1]) value_dropdown.setCurrentIndex(value[-1]) value_dropdown.currentIndexChanged.connect(partial(self.bidscell2run, i, j)) if j == 0: - value_dropdown.setToolTip(bids.get_entityhelp(key)) + value_dropdown.setToolTip(get_entityhelp(key)) table.setCellWidget(i, j, self.spacedwidget(value_dropdown)) else: value_item = MyWidgetItem(value, iseditable=item['iseditable']) @@ -1283,13 +1294,13 @@ def fill_table(self, table: QTableWidget, data: list): if j == 1: value_item.setToolTip('The (regex) matching pattern that for this property') if j == 2: - value_item.setToolTip(bids.get_propertieshelp(key)) - elif table.objectName()=='attributes' and j==0: - value_item.setToolTip(bids.get_attributeshelp(key)) - elif table.objectName()=='bids' and j==0: - value_item.setToolTip(bids.get_entityhelp(key)) - elif table.objectName()=='meta' and j==0: - value_item.setToolTip(bids.get_metahelp(key)) + value_item.setToolTip(get_propertieshelp(key)) + elif table.objectName() == 'attributes' and j == 0: + value_item.setToolTip(get_attributeshelp(key)) + elif table.objectName() == 'bids' and j == 0: + value_item.setToolTip(get_entityhelp(key)) + elif table.objectName() == 'meta' and j == 0: + value_item.setToolTip(get_metahelp(key)) table.setItem(i, j, value_item) table.blockSignals(False) @@ -1301,7 +1312,7 @@ def propertiescell2run(self, rowindex: int, colindex: int): if colindex == 1: key = self.properties_table.item(rowindex, 0).text().strip() value = self.properties_table.item(rowindex, 1).text().strip() - oldvalue = self.target_run['properties'].get(key) + oldvalue = self.target_run.properties.get(key) if oldvalue is None: oldvalue = '' @@ -1311,8 +1322,8 @@ def propertiescell2run(self, rowindex: int, colindex: int): f'It is discouraged to change {self.dataformat} property values unless you are an expert user. Do you really want to change "{oldvalue}" to "{value}"?', QMessageBox.StandardButton.Yes | QMessageBox.StandardButton.No, QMessageBox.StandardButton.No) if answer == QMessageBox.StandardButton.Yes: - LOGGER.verbose(f"Expert usage: User sets {self.dataformat}['{key}'] from '{oldvalue}' to '{value}' for {self.target_run['provenance']}") - self.target_run['properties'][key] = value + LOGGER.verbose(f"Expert usage: User sets {self.dataformat}['{key}'] from '{oldvalue}' to '{value}' for {self.target_run.provenance}") + self.target_run.properties[key] = value else: self.properties_table.blockSignals(True) self.properties_table.item(rowindex, 1).setText(oldvalue) @@ -1325,7 +1336,7 @@ def attributescell2run(self, rowindex: int, colindex: int): if colindex == 1: key = self.attributes_table.item(rowindex, 0).text().strip() value = self.attributes_table.item(rowindex, 1).text() - oldvalue = self.target_run['attributes'].get(key) + oldvalue = self.target_run.attributes.get(key) if oldvalue is None: oldvalue = '' @@ -1335,8 +1346,8 @@ def attributescell2run(self, rowindex: int, colindex: int): f'It is discouraged to change {self.dataformat} attribute values unless you are an expert user. Do you really want to change "{oldvalue}" to "{value}"?', QMessageBox.StandardButton.Yes | QMessageBox.StandardButton.No, QMessageBox.StandardButton.No) if answer == QMessageBox.StandardButton.Yes: - LOGGER.verbose(f"Expert usage: User sets {self.dataformat}['{key}'] from '{oldvalue}' to '{value}' for {self.target_run['provenance']}") - self.target_run['attributes'][key] = value + LOGGER.verbose(f"Expert usage: User sets {self.dataformat}['{key}'] from '{oldvalue}' to '{value}' for {self.target_run.provenance}") + self.target_run.attributes[key] = value else: self.attributes_table.blockSignals(True) self.attributes_table.item(rowindex, 1).setText(oldvalue) @@ -1351,10 +1362,10 @@ def bidscell2run(self, rowindex: int, colindex: int): if hasattr(self.bids_table.cellWidget(rowindex, 1), 'spacedwidget'): dropdown = self.bids_table.cellWidget(rowindex, 1).spacedwidget value = [dropdown.itemText(n) for n in range(len(dropdown))] + [dropdown.currentIndex()] - oldvalue = self.target_run['bids'].get(key) + oldvalue = self.target_run.bids.get(key) else: value = self.bids_table.item(rowindex, 1).text().strip() - oldvalue = self.target_run['bids'].get(key) + oldvalue = self.target_run.bids.get(key) if oldvalue is None: oldvalue = '' @@ -1370,15 +1381,15 @@ def bidscell2run(self, rowindex: int, colindex: int): f'It is discouraged to remove the <> run-index. Do you really want to change "{oldvalue}" to "{value}"?', QMessageBox.StandardButton.Yes | QMessageBox.StandardButton.No, QMessageBox.StandardButton.No) if answer == QMessageBox.StandardButton.Yes: - LOGGER.verbose(f"Expert usage: User sets bids['{key}'] from '{oldvalue}' to '{value}' for {self.target_run['provenance']}") + LOGGER.verbose(f"Expert usage: User sets bids['{key}'] from '{oldvalue}' to '{value}' for {self.target_run.provenance}") else: value = oldvalue self.bids_table.item(rowindex, 1).setText(oldvalue) - LOGGER.verbose(f"User sets bids['{key}'] from '{oldvalue}' to '{value}' for {self.target_run['provenance']}") + LOGGER.verbose(f"User sets bids['{key}'] from '{oldvalue}' to '{value}' for {self.target_run.provenance}") else: - LOGGER.verbose(f"User sets bids['{key}'] from '{oldvalue}' to '{value}' for {self.target_run['provenance']}") + LOGGER.verbose(f"User sets bids['{key}'] from '{oldvalue}' to '{value}' for {self.target_run.provenance}") self.bids_table.blockSignals(False) - self.target_run['bids'][key] = value + self.target_run.bids[key] = value self.refresh_bidsname() def metacell2run(self, rowindex: int, colindex: int): @@ -1386,7 +1397,7 @@ def metacell2run(self, rowindex: int, colindex: int): key = self.meta_table.item(rowindex, 0).text().strip() value = self.meta_table.item(rowindex, 1).text().strip() - oldvalue = self.target_run['meta'].get(key) + oldvalue = self.target_run.meta.get(key) if oldvalue is None: oldvalue = '' if value != oldvalue: @@ -1396,17 +1407,17 @@ def metacell2run(self, rowindex: int, colindex: int): self.meta_table.blockSignals(True) self.meta_table.item(rowindex, 1).setText(value) self.meta_table.blockSignals(False) - LOGGER.verbose(f"User sets meta['{key}'] from '{oldvalue}' to '{value}' for {self.target_run['provenance']}") + LOGGER.verbose(f"User sets meta['{key}'] from '{oldvalue}' to '{value}' for {self.target_run.provenance}") # Read all the meta-data from the table and store it in the target_run - self.target_run['meta'] = {} + self.target_run.meta = {} for n in range(self.meta_table.rowCount()): key_ = self.meta_table.item(n, 0).text().strip() value_ = self.meta_table.item(n, 1).text().strip() if key_: try: value_ = ast.literal_eval(value_) # E.g. convert stringified list or int back to list or int except (ValueError, SyntaxError): pass - self.target_run['meta'][key_] = value_ + self.target_run.meta[key_] = value_ elif value_: QMessageBox.warning(self, 'Input error', f"Please enter a key-name (left cell) for the '{value_}' value in row {n+1}") @@ -1422,28 +1433,28 @@ def change_run(self, suffix_idx): """ # Add a check to see if we can still read the source data - if not Path(self.target_run['provenance']).is_file(): - LOGGER.warning(f"Can no longer find the source file: {self.target_run['provenance']}") - QMessageBox.warning(self, 'Edit BIDS mapping', f"Cannot reliably change the datatype and/or suffix because the source file '{self.target_run['provenance']}' can no longer be found.\n\nPlease restore the source data or use the `bidsmapper -s` option to solve this issue. Resetting the run-item now...") + if not Path(self.target_run.provenance).is_file(): + LOGGER.warning(f"Can no longer find the source file: {self.target_run.provenance}") + QMessageBox.warning(self, 'Edit BIDS mapping', f"Cannot reliably change the datatype and/or suffix because the source file '{self.target_run.provenance}' can no longer be found.\n\nPlease restore the source data or use the `bidsmapper -s` option to solve this issue. Resetting the run-item now...") self.reset() return # Get the new target_run from the template - new_target_run = bids.get_run(self.template_bidsmap, self.target_datatype, suffix_idx, self.datasource) + new_target_run = self.template_bidsmap.get_run(self.target_datatype, suffix_idx, self.datasource) if not new_target_run: QMessageBox.warning(self, 'Edit BIDS mapping', f"Cannot find the {self.target_datatype}[{suffix_idx}] datatype in your template. Resetting the run-item now...") self.reset() return - old_entities = self.target_run['bids'] + old_entities = self.target_run.bids self.target_run = copy.deepcopy(new_target_run) # Transfer the old entity data to the new run-item if possible and if it's not there yet for key, val in old_entities.items(): - if val and key in self.target_run['bids'] and not self.target_run['bids'][key]: - self.target_run['bids'][key] = val + if val and key in self.target_run.bids and not self.target_run.bids[key]: + self.target_run.bids[key] = val # Insert the new target_run in our target_bidsmap - bids.update_bidsmap(self.target_bidsmap, self.current_datatype, self.target_run) + self.target_bidsmap.update(self.current_datatype, self.target_run) # Now that we have updated the bidsmap, we can also update the current_datatype self.current_datatype = self.target_datatype @@ -1456,7 +1467,7 @@ def datatype_dropdown_change(self): self.target_datatype = self.datatype_dropdown.currentText() - LOGGER.verbose(f"User changes the BIDS data type from '{self.current_datatype}' to '{self.target_datatype}' for {self.target_run['provenance']}") + LOGGER.verbose(f"User changes the BIDS data type from '{self.current_datatype}' to '{self.target_datatype}' for {self.target_run.provenance}") self.change_run(0) @@ -1465,7 +1476,7 @@ def suffix_dropdown_change(self): target_suffix = self.suffix_dropdown.currentText() - LOGGER.verbose(f"User changes the BIDS suffix from '{self.target_run['bids'].get('suffix')}' to '{target_suffix}' for {self.target_run['provenance']}") + LOGGER.verbose(f"User changes the BIDS suffix from '{self.target_run.bids.get('suffix')}' to '{target_suffix}' for {self.target_run.provenance}") self.change_run(target_suffix) @@ -1473,7 +1484,7 @@ def refresh_bidsname(self): """Updates the bidsname with the current (edited) bids values""" ignore = bids.check_ignore(self.target_datatype,self.bidsignore) or self.target_datatype in self.ignoredatatypes - bidsname = (Path(self.target_datatype)/bids.get_bidsname(self.subid, self.sesid, self.target_run, not ignore)).with_suffix('.*') + bidsname = (Path(self.target_datatype)/self.target_run.bidsname(self.subid, self.sesid, not ignore)).with_suffix('.*') font = self.bidsname_textbox.font() if bids.check_ignore(self.target_datatype, self.bidsignore) or bids.check_ignore(bidsname.name, self.bidsignore, 'file'): @@ -1488,7 +1499,7 @@ def refresh_bidsname(self): self.bidsname_textbox.setToolTip(f"Gray/Strike-out: This '{self.target_datatype}' data type will be ignored and not converted BIDS. Click 'OK' if you want your BIDS output data to look like this") self.bidsname_textbox.setTextColor(QtGui.QColor('gray')) font.setStrikeOut(True) - elif not all(bids.check_run(self.target_datatype, self.target_run, checks=(False, True, True))[1:3]): + elif not all(self.target_run.check(self.target_datatype, checks=(False, True, True))[1:3]): self.bidsname_textbox.setToolTip(f"Red: This name is not valid according to the BIDS standard -- see terminal output for more info") self.bidsname_textbox.setTextColor(QtGui.QColor('red')) font.setStrikeOut(False) @@ -1533,7 +1544,7 @@ def accept_run(self): # Check if the bidsname is valid bidsname = Path(self.bidsname_textbox.toPlainText()) - validrun = False not in bids.check_run(self.target_datatype, self.target_run, checks=(False, False, False))[1:3] + validrun = False not in self.target_run.check(self.target_datatype, checks=(False, False, False))[1:3] bidsvalid = validrun if not (bids.check_ignore(self.target_datatype,self.bidsignore) or bids.check_ignore(bidsname.name,self.bidsignore,'file') or self.target_datatype in self.ignoredatatypes): for ext in extensions: # NB: `ext` used to be '.json', which is more generic (but see https://github.com/bids-standard/bids-validator/issues/2113) @@ -1547,9 +1558,9 @@ def accept_run(self): message = f'The run-item does not seem to be valid but the "{bidsname}" name does pass the bids-validator test' elif not validrun: message = f'The "{bidsname}" name is not valid according to the BIDS standard' - elif self.target_datatype=='fmap' and not (self.target_run['meta'].get('B0FieldSource') or - self.target_run['meta'].get('B0FieldIdentifier') or - self.target_run['meta'].get('IntendedFor')): + elif self.target_datatype=='fmap' and not (self.target_run.meta.get('B0FieldSource') or + self.target_run.meta.get('B0FieldIdentifier') or + self.target_run.meta.get('IntendedFor')): message = f'The "B0FieldIdentifier/IntendedFor" meta-data is left empty for {bidsname} (not recommended)' if message: answer = QMessageBox.question(self, 'Edit BIDS mapping', f'WARNING:\n{message}\n\nDo you want to go back and edit the run?', @@ -1558,12 +1569,10 @@ def accept_run(self): LOGGER.warning(message) LOGGER.verbose(f'User approves the edit') - if re.sub('<(?!.*<).*? object at .*?>','',str(self.target_run)) != re.sub('<(?!.*<).*? object at .*?>','',str(self.source_run)): # Ignore the memory address of the datasource object - bids.update_bidsmap(self.target_bidsmap, self.current_datatype, self.target_run) - + if self.target_run != self.source_run: + self.target_bidsmap.update(self.current_datatype, self.target_run) self.done_edit.emit(self.target_bidsmap, self.dataformat) self.done(1) - else: self.done(2) @@ -1574,10 +1583,9 @@ def export_run(self): str(bidsmap_template), 'YAML Files (*.yaml *.yml);;All Files (*)') if yamlfile: LOGGER.info(f'Exporting run item: bidsmap[{self.dataformat}][{self.target_datatype}] -> {yamlfile}') - yamlfile = Path(yamlfile) - bidsmap, _ = bids.load_bidsmap(yamlfile, checks=(False, False, False)) - bids.insert_run(bidsmap, self.target_run) - bids.save_bidsmap(yamlfile, bidsmap) + bidsmap = BidsMap(Path(yamlfile), checks=(False, False, False)) + bidsmap.insert_run(self.target_run) + bidsmap.save() QMessageBox.information(self, 'Edit BIDS mapping', f"Successfully exported:\n\nbidsmap[{self.dataformat}][{self.target_datatype}] -> {yamlfile}") def inspect_sourcefile(self, rowindex: int=None, colindex: int=None): @@ -1585,9 +1593,9 @@ def inspect_sourcefile(self, rowindex: int=None, colindex: int=None): if colindex in (0,2): if rowindex == 0: - QtGui.QDesktopServices.openUrl(QtCore.QUrl.fromLocalFile(str(Path(self.target_run['provenance']).parent))) + QtGui.QDesktopServices.openUrl(QtCore.QUrl.fromLocalFile(str(Path(self.target_run.provenance).parent))) if rowindex == 1: - self.popup = InspectWindow(Path(self.target_run['provenance'])) + self.popup = InspectWindow(Path(self.target_run.provenance)) self.popup.show() self.popup.scrollbar.setValue(0) # This can only be done after self.popup.show() @@ -1616,7 +1624,7 @@ def get_help(self): class CompareWindow(QDialog): - def __init__(self, runs: List[Run], subid: List[str], sesid: List[str]): + def __init__(self, runs: List[RunItem], subid: List[str], sesid: List[str]): super().__init__() # Set up the window @@ -1637,7 +1645,7 @@ def __init__(self, runs: List[Run], subid: List[str], sesid: List[str]): properties_label.setToolTip('The filesystem properties that match with (identify) the source file') properties_table = self.fill_table(data_properties, 'properties') properties_table.setToolTip('The filesystem property that matches with the source file') - properties_table.cellDoubleClicked.connect(partial(self.inspect_sourcefile, run['provenance'])) + properties_table.cellDoubleClicked.connect(partial(self.inspect_sourcefile, run.provenance)) # Set up the attributes table attributes_label = QLabel('Attributes') @@ -1657,8 +1665,8 @@ def __init__(self, runs: List[Run], subid: List[str], sesid: List[str]): meta_table = self.fill_table(data_meta, 'meta', minimum=False) meta_table.setToolTip('The key-value pair that will be appended to the (e.g. dcm2niix-produced) json sidecar file') - bidsname = bids.get_bidsname(subid[index], sesid[index], run, False) + '.*' - groupbox = QGroupBox(f"{run['datasource'].datatype}/{bidsname}") + bidsname = run.bidsname(subid[index], sesid[index], False) + '.*' + groupbox = QGroupBox(f"{run.datatype}/{bidsname}") layout = QVBoxLayout() layout.addWidget(properties_label) layout.addWidget(properties_table) @@ -1675,38 +1683,40 @@ def __init__(self, runs: List[Run], subid: List[str], sesid: List[str]): self.show() - def run2data(self, run) -> tuple: + @staticmethod + def run2data(run) -> tuple: """Derive the tabular data from the target_run, needed to render the compare window :return: (data_properties, data_attributes, data_bids, data_meta) """ - data_properties = [['filepath', run['properties'].get('filepath'), run['datasource'].properties('filepath')], - ['filename', run['properties'].get('filename'), run['datasource'].properties('filename')], - ['filesize', run['properties'].get('filesize'), run['datasource'].properties('filesize')], - ['nrfiles', run['properties'].get('nrfiles'), run['datasource'].properties('nrfiles')]] + data_properties = [['filepath', run.properties.get('filepath'), run.datasource.properties('filepath')], + ['filename', run.properties.get('filename'), run.datasource.properties('filename')], + ['filesize', run.properties.get('filesize'), run.datasource.properties('filesize')], + ['nrfiles', run.properties.get('nrfiles'), run.datasource.properties('nrfiles')]] data_attributes = [] - for key in sorted(run['attributes'].keys()): - value = run['attributes'].get(key) + for key in sorted(run.attributes.keys()): + value = run.attributes.get(key) data_attributes.append([key, value]) data_bids = [] bidskeys = [bids.entities[entity]['name'] for entity in bids.entitiesorder if entity not in ('subject','session')] + ['suffix'] # Impose the BIDS-specified order + suffix for key in bidskeys: - if key in run['bids']: - value = run['bids'].get(key) + if key in run.bids: + value = run.bids.get(key) if isinstance(value, list): value = value[value[-1]] data_bids.append([key, value]) data_meta = [] - for key in sorted(run['meta'].keys()): - value = run['meta'].get(key) + for key in sorted(run.meta.keys()): + value = run.meta.get(key) data_meta.append([key, value]) return data_properties, data_attributes, data_bids, data_meta - def fill_table(self, data: list, name: str, minimum: bool=True) -> QTableWidget: + @staticmethod + def fill_table(data: list, name: str, minimum: bool=True) -> QTableWidget: """Return a table widget filled with the data""" nrcolumns = len(data[0]) if data else 2 # Always at least two columns (i.e. key, value) @@ -1823,7 +1833,13 @@ def __init__(self, value: Union[str,Path]='', iseditable: bool=True): super().__init__() self.setText(value) - self.seteditable(iseditable) + + self.iseditable = iseditable + if iseditable: + self.setFlags(QtCore.Qt.ItemFlag.ItemIsEnabled | QtCore.Qt.ItemFlag.ItemIsSelectable | QtCore.Qt.ItemFlag.ItemIsEditable) + else: + self.setFlags(QtCore.Qt.ItemFlag.ItemIsEnabled) + self.setForeground(QtGui.QColor('gray')) def setText(self, p_str): """Catch int and None""" @@ -1833,16 +1849,139 @@ def setText(self, p_str): super(MyWidgetItem, self).setText(str(p_str)) - def seteditable(self, iseditable: bool=True): - """Make the WidgetItem editable""" - self.iseditable = iseditable +def get_propertieshelp(propertieskey: str) -> str: + """ + Reads the description of a matching attributes key in the source dictionary - if iseditable: - self.setFlags(QtCore.Qt.ItemFlag.ItemIsEnabled | QtCore.Qt.ItemFlag.ItemIsSelectable | QtCore.Qt.ItemFlag.ItemIsEditable) - else: - self.setFlags(QtCore.Qt.ItemFlag.ItemIsEnabled) - self.setForeground(QtGui.QColor('gray')) + :param propertieskey: The properties key for which the help text is obtained + :return: The obtained help text + """ + + # Return the description from the DICOM dictionary or a default text + if propertieskey == 'filepath': + return 'The path of the source file that is matched against the (regex) pattern' + if propertieskey == 'filename': + return 'The name of the source file that is matched against the (regex) pattern' + if propertieskey == 'filesize': + return 'The size of the source file that is matched against the (regex) pattern' + if propertieskey == 'nrfiles': + return 'The nr of similar files in the folder that matched against the properties (regex) patterns' + + return f"{propertieskey} is not a valid property-key" + + +def get_attributeshelp(attributeskey: str) -> str: + """ + Reads the description of a matching attributes key in the source dictionary + + TODO: implement PAR/REC support + + :param attributeskey: The attribute key for which the help text is obtained + :return: The obtained help text + """ + + if not attributeskey: + return 'Please provide a key-name' + + # Return the description from the DICOM dictionary or a default text + try: + return f"{attributeskey}\nThe DICOM '{datadict.dictionary_description(attributeskey)}' attribute" + + except ValueError: + return f"{attributeskey}\nAn unknown/private attribute" + + +def get_datatypehelp(datatype: Union[str, DataType]) -> str: + """ + Reads the description of the datatype in the schema/objects/datatypes.yaml file + + :param datatype: The datatype for which the help text is obtained + :return: The obtained help text + """ + + datatype = str(datatype) + + if not datatype: + return "Please provide a datatype" + + # Return the description for the datatype or a default text + if datatype in bids.bidsdatatypesdef: + return f"{bids.bidsdatatypesdef[datatype]['display_name']}\n{bids.bidsdatatypesdef[datatype]['description']}" + + return f"{datatype}\nAn unknown/private datatype" + + +def get_suffixhelp(suffix: str, datatype: Union[str, DataType]) -> str: + """ + Reads the description of the suffix in the schema/objects/suffixes.yaml file + + :param suffix: The suffix for which the help text is obtained + :param datatype: The datatype of the suffix + :return: The obtained help text + """ + + if not suffix: + return "Please provide a suffix" + + isderivative = '' + if suffix in bids.get_derivatives(datatype): + isderivative = '\nNB: This is a BIDS derivatives datatype' + + # Return the description for the suffix or a default text + if suffix in bids.suffixes: + return f"{bids.suffixes[suffix]['display_name']}\n{bids.suffixes[suffix]['description']}{isderivative}" + + return f"{suffix}\nAn unknown/private suffix" + + +def get_entityhelp(entitykey: str) -> str: + """ + Reads the description of a matching entity=entitykey in the schema/entities.yaml file + + :param entitykey: The bids key for which the help text is obtained + :return: The obtained help text + """ + + if not entitykey: + return "Please provide a key-name" + + # Return the description from the entities or a default text + for entity in bids.entities: + if bids.entities[entity]['name'] == entitykey: + return f"{bids.entities[entity]['display_name']}\n{bids.entities[entity]['description']}" + + return f"{entitykey}\nAn unknown/private entity" + + +def get_metahelp(metakey: str) -> str: + """ + Reads the description of a matching schema/metadata/metakey.yaml file + + :param metakey: The meta key for which the help text is obtained + :return: The obtained help text + """ + + if not metakey: + return "Please provide a key-name" + + # Return the description from the metadata file or a default text + for field in bids.metafields: + if metakey == bids.metafields[field].get('name'): + description = bids.metafields[field]['description'] + if metakey == 'IntendedFor': # IntendedFor is a special search-pattern field in BIDScoin + description += ('\nNB: These associated files can be dynamically searched for' + '\nduring bidscoiner runtime with glob-style matching patterns,' + '\n"such as <>" or <>' + '\n(see documentation)') + if metakey in ('B0FieldIdentifier', 'B0FieldSource'): # <> is a special dynamic value in BIDScoin + description += ('\nNB: The `<>` (sub)string will be replaced by the' + '\nsession label during bidscoiner runtime. In this way you can' + '\ncreate session-specific B0FieldIdentifier/Source tags (recommended)') + + return f"{bids.metafields[field]['display_name']}\n{description}" + + return f"{metakey}\nAn unknown/private meta key" def bidseditor(bidsfolder: str, bidsmap: str='', template: str=bidsmap_template) -> None: @@ -1865,11 +2004,12 @@ def bidseditor(bidsfolder: str, bidsmap: str='', template: str=bidsmap_template) LOGGER.info(f">>> bidseditor bidsfolder={bidsfolder} bidsmap={bidsmapfile} template={templatefile}") # Obtain the initial bidsmap info - template_bidsmap, templatefile = bids.load_bidsmap(templatefile, checks=(True, True, False)) - input_bidsmap, bidsmapfile = bids.load_bidsmap(bidsmapfile, bidsfolder/'code'/'bidscoin') - bids.check_template(template_bidsmap) - if input_bidsmap.get('Options'): - template_bidsmap['Options'] = input_bidsmap['Options'] # Always use the options of the input bidsmap + template_bidsmap = BidsMap(templatefile, checks=(True, True, False)) + input_bidsmap = BidsMap(bidsmapfile, bidsfolder/'code'/'bidscoin') + template_bidsmap.check_template() + if input_bidsmap.options: + template_bidsmap.options = input_bidsmap.options # Always use the options of the input bidsmap + template_bidsmap.plugins = input_bidsmap.plugins # Always use the plugins of the input bidsmap # Start the Qt-application app = QApplication(sys.argv) diff --git a/bidscoin/bidsmapper.py b/bidscoin/bidsmapper.py index 788522cf..87aeb228 100755 --- a/bidscoin/bidsmapper.py +++ b/bidscoin/bidsmapper.py @@ -19,12 +19,12 @@ if find_spec('bidscoin') is None: sys.path.append(str(Path(__file__).parents[1])) from bidscoin import bcoin, bids, lsdirs, trackusage, check_version, __version__ -from bidscoin.bids import Bidsmap +from bidscoin.bids import BidsMap _, uptodate, versionmessage = check_version() -def bidsmapper(sourcefolder: str, bidsfolder: str, bidsmap: str, template: str, plugins: list, subprefix: str, sesprefix: str, unzip: str, store: bool=False, automated: bool=False, force: bool=False, no_update: bool=False) -> dict: +def bidsmapper(sourcefolder: str, bidsfolder: str, bidsmap: str, template: str, plugins: list, subprefix: str, sesprefix: str, unzip: str, store: bool=False, automated: bool=False, force: bool=False, no_update: bool=False) -> BidsMap: """ Main function that processes all the subjects and session in the sourcefolder and that generates a fully filled-in bidsmap.yaml file in bidsfolder/code/bidscoin. Folders in sourcefolder are assumed to contain a single dataset. @@ -56,7 +56,7 @@ def bidsmapper(sourcefolder: str, bidsfolder: str, bidsmap: str, template: str, LOGGER.bcdebug(f"Regular expression metacharacters found in {sesprefix}, this may cause errors later on...") if not rawfolder.is_dir(): print(f"Rawfolder '{rawfolder}' not found") - return {} + exit(1) # Start logging if force: @@ -71,45 +71,41 @@ def bidsmapper(sourcefolder: str, bidsfolder: str, bidsmap: str, template: str, f"template={templatefile} plugins={plugins} subprefix={subprefix} sesprefix={sesprefix} store={store} force={force}") # Get the heuristics for filling the new bidsmap (NB: plugins are stored in the bidsmaps) - bidsmap_old, bidsmapfile = bids.load_bidsmap(bidsmapfile, bidscoinfolder, plugins) - template, _ = bids.load_bidsmap(templatefile, plugins=plugins, checks=(True, True, False)) - bids.check_template(template) + bidsmap_old = BidsMap(bidsmapfile, bidscoinfolder, plugins) + template = BidsMap(templatefile, plugins=plugins, checks=(True, True, False)) + template.check_template() # Create the new bidsmap as a copy / bidsmap skeleton with no data type entries (i.e. bidsmap with empty lists) - if force and bidsmapfile.is_file(): - LOGGER.info(f"Deleting previous bidsmap: {bidsmapfile}") - bidsmapfile.unlink() - bidsmap_old = {} - if bidsmap_old: - bidsmap_new = copy.deepcopy(bidsmap_old) - else: - bidsmap_new = copy.deepcopy(template) - template['Options'] = bidsmap_new['Options'] # Always use the options of the new bidsmap + if force and bidsmap_old.filepath.name: + LOGGER.info(f"Deleting previous bidsmap: {bidsmap_old.filepath}") + bidsmap_old.filepath.unlink() + bidsmap_old.filepath = Path() + bidsmap_new = copy.deepcopy(bidsmap_old if bidsmap_old.filepath.name else template) + template.options = bidsmap_new.options # Always use the options of the new bidsmap + template.plugins = bidsmap_new.plugins # Always use the plugins of the new bidsmap if unzip: - bidsmap_new['Options']['bidscoin']['unzip'] = unzip + bidsmap_new.options['unzip'] = unzip else: - unzip = bidsmap_new['Options']['bidscoin'].get('unzip','') - for dataformat in bidsmap_new: - if dataformat in ('$schema', 'Options'): continue - for datatype in bidsmap_new[dataformat] or []: - if datatype not in ('subject', 'session'): - bidsmap_new[dataformat][datatype] = [] - - # Store/retrieve the empty or user-defined sub-/ses-prefix + unzip = bidsmap_new.options.get('unzip','') + for dataformat in bidsmap_new.dataformats: + for datatype in dataformat.datatypes: + datatype.delete_runs() + + # Store/retrieve the empty or user-defined sub-/ses-prefix. The new bidsmap is now ready to be populated subprefix, sesprefix = setprefix(bidsmap_new, subprefix, sesprefix, rawfolder, update = not no_update) - # Start with an empty skeleton if we didn't have an old bidsmap - if not bidsmap_old: + # Start with an empty skeleton if we don't have an old bidsmap (due to loading failure or deletion by force) + if not bidsmap_old.filepath.name: bidsmap_old = copy.deepcopy(bidsmap_new) # Import the data scanning plugins - plugins = [bcoin.import_plugin(plugin, ('bidsmapper_plugin',)) for plugin in bidsmap_new['Options']['plugins']] + plugins = [bcoin.import_plugin(plugin, ('bidsmapper_plugin',)) for plugin in bidsmap_new.plugins] plugins = [plugin for plugin in plugins if plugin] # Filter the empty items from the list if not plugins: LOGGER.warning(f"The plugins listed in your bidsmap['Options'] did not have a usable `bidsmapper_plugin` function, nothing to do") LOGGER.info('-------------- FINISHED! ------------') LOGGER.info('') - return {} + return bidsmap_new # Loop over all subjects and sessions and built up the bidsmap entries subjects = lsdirs(rawfolder, ('' if subprefix=='*' else subprefix) + '*') @@ -129,16 +125,14 @@ def bidsmapper(sourcefolder: str, bidsfolder: str, bidsmap: str, template: str, sesfolders, unpacked = bids.unpack(session, unzip) for sesfolder in sesfolders: if store: - store = {'source': sesfolder.parent.parent.parent.parent if unpacked else rawfolder.parent, - 'target': bidscoinfolder/'provenance'} - else: - store = {} + bidsmap_new.store = {'source': sesfolder.parent.parent.parent.parent if unpacked else rawfolder.parent, + 'target': bidscoinfolder/'provenance'} # Run the bidsmapper plugins for module in plugins: LOGGER.verbose(f"Executing plugin: {Path(module.__file__).stem} -> {sesfolder}") trackusage(Path(module.__file__).stem) - module.bidsmapper_plugin(sesfolder, bidsmap_new, bidsmap_old, template, store) + module.bidsmapper_plugin(sesfolder, bidsmap_new, bidsmap_old, template) # Clean-up the temporary unpacked data if unpacked: @@ -146,7 +140,7 @@ def bidsmapper(sourcefolder: str, bidsfolder: str, bidsmap: str, template: str, # Save the new study bidsmap in the bidscoinfolder or launch the bidseditor UI_MainWindow if automated: - bids.save_bidsmap(bidsmapfile, bidsmap_new) + bidsmap_new.save() else: LOGGER.info('Opening the bidseditor') @@ -157,12 +151,12 @@ def bidsmapper(sourcefolder: str, bidsfolder: str, bidsmap: str, template: str, except ImportError: import bidseditor # This should work if bidscoin was not pip-installed app = QApplication(sys.argv) - app.setApplicationName(f"{bidsmapfile} - BIDS editor {__version__}") + app.setApplicationName(f"{bidsmap_new.filepath} - BIDS editor {__version__}") mainwin = bidseditor.MainWindow(bidsfolder, bidsmap_new, template) mainwin.show() - if not bidsmapfile.is_file() or not uptodate: + if not bidsmap_new.filepath.name or not uptodate: messagebox = QMessageBox(mainwin) messagebox.setText(f"The bidsmapper has finished scanning {rawfolder}\n\n" f"Please carefully check all the different BIDS output names " @@ -185,9 +179,9 @@ def bidsmapper(sourcefolder: str, bidsfolder: str, bidsmap: str, template: str, return bidsmap_new -def setprefix(bidsmap: Bidsmap, subprefix: str, sesprefix: str, rawfolder: Path, update: bool=True) -> tuple: +def setprefix(bidsmap: BidsMap, subprefix: str, sesprefix: str, rawfolder: Path, update: bool=True) -> tuple: """ - Set the prefix in the Options, subject, session and in all the run['datasource'] objects + Set the prefix in the Options, subject, session :param bidsmap: The bidsmap with the data :param subprefix: The subprefix (take value from bidsmap if empty) @@ -198,42 +192,34 @@ def setprefix(bidsmap: Bidsmap, subprefix: str, sesprefix: str, rawfolder: Path, """ # Get/set the sub-/ses-prefixes in the 'Options' - oldsubprefix = bidsmap['Options']['bidscoin'].get('subprefix','') - oldsesprefix = bidsmap['Options']['bidscoin'].get('sesprefix','') + oldsubprefix = bidsmap.options.get('subprefix','') + oldsesprefix = bidsmap.options.get('sesprefix','') if not subprefix: subprefix = oldsubprefix # Use the default value from the bidsmap if not sesprefix: sesprefix = oldsesprefix # Use the default value from the bidsmap - bidsmap['Options']['bidscoin']['subprefix'] = subprefix - bidsmap['Options']['bidscoin']['sesprefix'] = sesprefix + bidsmap.options['subprefix'] = subprefix + bidsmap.options['sesprefix'] = sesprefix # Update the bidsmap dataformat sections reprefix = lambda prefix: '' if prefix=='*' else re.escape(prefix).replace(r'\-','-') - for dataformat in bidsmap: - if not bidsmap[dataformat] or dataformat in ('$schema','Options'): continue - - # Update the run-DataSources - for datatype in bidsmap[dataformat]: - if not isinstance(bidsmap[dataformat][datatype], list): continue # E.g. 'subject' and 'session' - for run in bidsmap[dataformat][datatype]: - run['datasource'].subprefix = subprefix - run['datasource'].sesprefix = sesprefix + for dataformat in bidsmap.dataformats: # Replace the sub-/ses-prefixes in the dynamic filepath values of bidsmap[dataformat]['subject'] and ['session'] - if update and bidsmap[dataformat]['subject'].startswith('< int: return 0 -def is_sourcefile(file: Path) -> str: +def has_support(file: Path) -> str: """ This plugin function assesses whether a sourcefile is of a supported dataformat @@ -112,7 +112,7 @@ def is_sourcefile(file: Path) -> str: if file.is_file(): - LOGGER.verbose(f'This is a demo-plugin is_sourcefile routine, assessing whether "{file}" has a valid dataformat') + LOGGER.verbose(f'This is a demo-plugin has_support routine, assessing whether "{file}" has a valid dataformat') return 'dataformat' if file == 'supportedformat' else '' return '' diff --git a/bidscoin/plugins/dcm2niix2bids.py b/bidscoin/plugins/dcm2niix2bids.py index d43b4792..8635d347 100644 --- a/bidscoin/plugins/dcm2niix2bids.py +++ b/bidscoin/plugins/dcm2niix2bids.py @@ -9,14 +9,13 @@ import dateutil.parser import pandas as pd import json -import shutil import ast from bids_validator import BIDSValidator from typing import Union, List from pathlib import Path from bidscoin import bcoin, bids, lsdirs, due, Doi from bidscoin.utilities import physio -from bidscoin.bids import Bidsmap, Plugin +from bidscoin.bids import BidsMap, Plugin, Plugins, DataFormat try: from nibabel.testing import data_path except ImportError: @@ -41,7 +40,7 @@ def test(options: Plugin=OPTIONS) -> int: """ Performs shell tests of dcm2niix - :param options: A dictionary with the plugin options, e.g. taken from the bidsmap['Options']['plugins']['dcm2niix2bids'] + :param options: A dictionary with the plugin options, e.g. taken from the bidsmap.plugins['dcm2niix2bids'] :return: The errorcode (e.g 0 if the tool generated the expected result, > 0 if there was a tool error) """ @@ -62,7 +61,7 @@ def test(options: Plugin=OPTIONS) -> int: # Test reading an attribute from a PAR-file parfile = Path(data_path)/'phantom_EPI_asc_CLEAR_2_1.PAR' try: - assert is_sourcefile(parfile) == 'PAR' + assert has_support(parfile) == 'PAR' assert get_attribute('PAR', parfile, 'exam_name', options) == 'Konvertertest' except Exception as pluginerror: LOGGER.error(f"Could not read attribute(s) from {parfile}:\n{pluginerror}") @@ -71,14 +70,18 @@ def test(options: Plugin=OPTIONS) -> int: return errorcode if errorcode != 3 else 0 -def is_sourcefile(file: Path) -> str: +def has_support(file: Path, dataformat: Union[DataFormat, str]='') -> str: """ - This plugin function supports assessing whether the file is a valid sourcefile + This plugin function assesses whether a sourcefile is of a supported dataformat - :param file: The file that is assessed - :return: The valid dataformat of the file for this plugin + :param file: The sourcefile that is assessed + :param dataformat: The requested dataformat (optional requirement) + :return: The valid/supported dataformat of the sourcefile """ + if dataformat and dataformat not in ('DICOM', 'PAR'): + return '' + if bids.is_dicomfile(file): # To support pet2bids add: and bids.get_dicomfield('Modality', file) != 'PT' return 'DICOM' @@ -88,14 +91,14 @@ def is_sourcefile(file: Path) -> str: return '' -def get_attribute(dataformat: str, sourcefile: Path, attribute: str, options: Plugin) -> Union[str, int]: +def get_attribute(dataformat: str, sourcefile: Path, attribute: str, options) -> Union[str, int]: """ This plugin supports reading attributes from DICOM and PAR dataformats :param dataformat: The bidsmap-dataformat of the sourcefile, e.g. DICOM of PAR :param sourcefile: The sourcefile from which the attribute value should be read :param attribute: The attribute key for which the value should be read - :param options: A dictionary with the plugin options, e.g. taken from the bidsmap['Options']['plugins'] + :param options: A dictionary with the plugin options, e.g. taken from the bidsmap.plugins :return: The attribute value """ if dataformat == 'DICOM': @@ -105,7 +108,7 @@ def get_attribute(dataformat: str, sourcefile: Path, attribute: str, options: Pl return bids.get_parfield(attribute, sourcefile) -def bidsmapper_plugin(session: Path, bidsmap_new: Bidsmap, bidsmap_old: Bidsmap, template: Bidsmap, store: dict) -> None: +def bidsmapper_plugin(session: Path, bidsmap_new: BidsMap, bidsmap_old: BidsMap, template: BidsMap) -> None: """ All the logic to map the DICOM/PAR source fields onto bids labels go into this function @@ -113,12 +116,11 @@ def bidsmapper_plugin(session: Path, bidsmap_new: Bidsmap, bidsmap_old: Bidsmap, :param bidsmap_new: The new study bidsmap that we are building :param bidsmap_old: The previous study bidsmap that has precedence over the template bidsmap :param template: The template bidsmap with the default heuristics - :param store: The paths of the source- and target-folder :return: """ # Get started - plugins = {'dcm2niix2bids': Plugin(bidsmap_new['Options']['plugins']['dcm2niix2bids'])} + plugins = Plugins({'dcm2niix2bids': bidsmap_new.plugins['dcm2niix2bids']}) datasource = bids.get_datasource(session, plugins) dataformat = datasource.dataformat if not dataformat: @@ -130,18 +132,13 @@ def bidsmapper_plugin(session: Path, bidsmap_new: Bidsmap, bidsmap_old: Bidsmap, for sourcedir in lsdirs(session, '**/*'): for n in range(1): # Option: Use range(2) to scan two files and catch e.g. magnitude1/2 fieldmap files that are stored in one Series folder (but bidscoiner sees only the first file anyhow and it makes bidsmapper 2x slower :-() sourcefile = bids.get_dicomfile(sourcedir, n) - if sourcefile.name and is_sourcefile(sourcefile): + if sourcefile.name and has_support(sourcefile): sourcefiles.append(sourcefile) elif dataformat == 'PAR': sourcefiles = bids.get_parfiles(session) else: LOGGER.error(f"Unsupported dataformat '{dataformat}'") - # Extra bidsmap check - if not template[dataformat] and not bidsmap_old[dataformat]: - LOGGER.error(f"No {dataformat} source information found in the study and template bidsmap") - return - # Update the bidsmap with the info from the source files for sourcefile in sourcefiles: @@ -153,57 +150,48 @@ def bidsmapper_plugin(session: Path, bidsmap_new: Bidsmap, bidsmap_old: Bidsmap, break # See if we can find a matching run in the old bidsmap - datasource = bids.DataSource(sourcefile, plugins, dataformat) - run, match = bids.get_matching_run(datasource, bidsmap_old) + datasource = bids.DataSource(sourcefile, plugins, dataformat, bidsmap_new.options) + run, match = bidsmap_old.get_matching_run(datasource) # If not, see if we can find a matching run in the template if not match: LOGGER.bcdebug('No match found in the study bidsmap, now trying the template bidsmap') - run, _ = bids.get_matching_run(datasource, template) + run, _ = template.get_matching_run(datasource) # See if we have collected the run somewhere in our new bidsmap - if not bids.exist_run(bidsmap_new, '', run): + if not bidsmap_new.exist_run(run): # Communicate with the user if the run was not present in bidsmap_old or in template, i.e. that we found a new sample if not match: - LOGGER.info(f"Discovered '{datasource.datatype}' {dataformat} sample: {sourcefile}") + LOGGER.info(f"Discovered sample: {datasource}") # Try to automagically set the {part: phase/imag/real} (should work for Siemens data) - if not datasource.datatype=='' and 'part' in run['bids'] and not run['bids']['part'][-1] and run['attributes'].get('ImageType'): # part[-1]==0 -> part is not specified - imagetype = ast.literal_eval(run['attributes']['ImageType']) # E.g. ImageType = "['ORIGINAL', 'PRIMARY', 'M', 'ND']" + if not run.datatype == '' and 'part' in run.bids and not run.bids['part'][-1] and run.attributes.get('ImageType'): # part[-1]==0 -> part is not specified + imagetype = ast.literal_eval(run['attributes']['ImageType']) # E.g. ImageType = "['ORIGINAL', 'PRIMARY', 'M', 'ND']" if 'P' in imagetype: - pass - run['bids']['part'][-1] = run['bids']['part'].index('phase') # E.g. part = ['', mag, phase, real, imag, 0] + run.bids['part'][-1] = run.bids['part'].index('phase') # E.g. part = ['', mag, phase, real, imag, 0] # elif 'M' in imagetype: - # run['bids']['part'][-1] = run['bids']['part'].index('mag') + # run.bids['part'][-1] = run.bids['part'].index('mag') elif 'I' in imagetype: - run['bids']['part'][-1] = run['bids']['part'].index('imag') + run.bids['part'][-1] = run.bids['part'].index('imag') elif 'R' in imagetype: - run['bids']['part'][-1] = run['bids']['part'].index('real') - if run['bids']['part'][-1]: - LOGGER.verbose(f"Updated {dataformat}/{datasource.datatype} entity: 'part' -> '{run['bids']['part'][run['bids']['part'][-1]]}' ({imagetype})") + run.bids['part'][-1] = run.bids['part'].index('real') + if run.bids['part'][-1]: + LOGGER.verbose(f"Updated {dataformat}/{datasource.datatype} entity: 'part' -> '{run.bids['part'][run.bids['part'][-1]]}' ({imagetype})") else: - LOGGER.bcdebug(f"Known '{datasource.datatype}' {dataformat} sample: {sourcefile}") - - # Now work from the provenance store - if store: - targetfile = store['target']/sourcefile.relative_to(store['source']) - targetfile.parent.mkdir(parents=True, exist_ok=True) - LOGGER.verbose(f"Storing the discovered {dataformat} sample as: {targetfile}") - run['provenance'] = str(shutil.copyfile(sourcefile, targetfile)) - run['datasource'].path = targetfile + LOGGER.bcdebug(f"Known sample: {datasource}") # Copy the filled-in run over to the new bidsmap - bids.insert_run(bidsmap_new, run) + bidsmap_new.insert_run(run) else: - LOGGER.bcdebug(f"Existing/duplicate '{datasource.datatype}' {dataformat} sample: {sourcefile}") + LOGGER.bcdebug(f"Existing/duplicate sample: {datasource}") @due.dcite(Doi('10.1016/j.jneumeth.2016.03.001'), description='dcm2niix: DICOM to NIfTI converter', tags=['reference-implementation']) -def bidscoiner_plugin(session: Path, bidsmap: Bidsmap, bidsses: Path) -> Union[None, dict]: +def bidscoiner_plugin(session: Path, bidsmap: BidsMap, bidsses: Path) -> Union[None, dict]: """ The bidscoiner plugin to convert the session DICOM and PAR/REC source-files into BIDS-valid NIfTI-files in the corresponding bids session-folder and extract personals (e.g. Age, Sex) from the source header @@ -220,11 +208,11 @@ def bidscoiner_plugin(session: Path, bidsmap: Bidsmap, bidsses: Path) -> Union[N bidsfolder = bidsses.parent.parent if sesid else bidsses.parent # Get started and see what dataformat we have - options: Plugin = bidsmap['Options']['plugins']['dcm2niix2bids'] - exceptions: list = bidsmap['Options']['bidscoin'].get('notderivative', ()) - bidsignore: list = bidsmap['Options']['bidscoin']['bidsignore'] + options = Plugin(bidsmap.plugins['dcm2niix2bids']) + exceptions: list = bidsmap.options.get('notderivative', []) + bidsignore: list = bidsmap.options['bidsignore'] fallback = 'fallback' if options.get('fallback','y').lower() in ('y', 'yes', 'true') else '' - datasource = bids.get_datasource(session, {'dcm2niix2bids': options}) + datasource = bids.get_datasource(session, Plugins({'dcm2niix2bids': options})) dataformat = datasource.dataformat if not dataformat: LOGGER.info(f"--> No {__name__} sourcedata found in: {session}") @@ -259,45 +247,45 @@ def bidscoiner_plugin(session: Path, bidsmap: Bidsmap, bidsses: Path) -> Union[N sourcefile = bids.get_dicomfile(source) elif dataformat == 'PAR': sourcefile = source - if not sourcefile.name or not is_sourcefile(sourcefile): + if not sourcefile.name or not has_support(sourcefile): continue # Get a matching run from the bidsmap - datasource = bids.DataSource(sourcefile, {'dcm2niix2bids': options}, dataformat) - run, runid = bids.get_matching_run(datasource, bidsmap, runtime=True) + datasource = bids.DataSource(sourcefile, {'dcm2niix2bids': options}, dataformat, bidsmap.options) + run, runid = bidsmap.get_matching_run(datasource, runtime=True) # Check if we should ignore this run - if datasource.datatype in bidsmap['Options']['bidscoin']['ignoretypes']: + if run.datatype in bidsmap.options['ignoretypes']: LOGGER.info(f"--> Leaving out: {source}") - bids.bidsprov(bidsses, source, runid, datasource.datatype) # Write out empty provenance data + bids.bidsprov(bidsses, source, runid, run.datatype) # Write out empty provenance data continue # Check if we already know this run if not runid: - LOGGER.error(f"--> Skipping unknown '{datasource.datatype}' run: {sourcefile}\n-> Re-run the bidsmapper and delete {bidsses} to solve this warning") + LOGGER.error(f"--> Skipping unknown '{run.datatype}' run: {sourcefile}\n-> Re-run the bidsmapper and delete {bidsses} to solve this warning") bids.bidsprov(bidsses, source) # Write out empty provenance data continue LOGGER.info(f"--> Coining: {source}") # Create the BIDS session/datatype output folder - suffix = datasource.dynamicvalue(run['bids']['suffix'], True, True) - if suffix in bids.get_derivatives(datasource.datatype, exceptions): - outfolder = bidsfolder/'derivatives'/manufacturer.replace(' ','')/subid/sesid/datasource.datatype + suffix = datasource.dynamicvalue(run.bids['suffix'], True, True) + if suffix in bids.get_derivatives(run.datatype, exceptions): + outfolder = bidsfolder/'derivatives'/manufacturer.replace(' ','')/subid/sesid/run.datatype else: - outfolder = bidsses/datasource.datatype + outfolder = bidsses/run.datatype outfolder.mkdir(parents=True, exist_ok=True) # Compose the BIDS filename using the matched run - ignore = bids.check_ignore(datasource.datatype, bidsignore) - bidsname = bids.get_bidsname(subid, sesid, run, not ignore, runtime=True) + ignore = bids.check_ignore(run.datatype, bidsignore) + bidsname = run.bidsname(subid, sesid, not ignore, runtime=True) ignore = ignore or bids.check_ignore(bidsname+'.json', bidsignore, 'file') runindex = bids.get_bidsvalue(bidsname, 'run') - bidsname = bids.increment_runindex(outfolder, bidsname, run, scans_table) + bidsname = run.increment_runindex(outfolder, bidsname, scans_table) targets = set() # -> A store for all fullpath output targets (.nii/.tsv) for this bidsname # Check if the bidsname is valid - bidstest = (Path('/')/subid/sesid/datasource.datatype/bidsname).with_suffix('.nii').as_posix() + bidstest = (Path('/')/subid/sesid/run.datatype/bidsname).with_suffix('.nii').as_posix() isbids = BIDSValidator().is_bids(bidstest) if not isbids and not ignore: LOGGER.warning(f"The '{bidstest}' output name did not pass the bids-validator test") @@ -384,7 +372,7 @@ def bidscoiner_plugin(session: Path, bidsmap: Bidsmap, bidsses: Path) -> Union[N for postfix in postfixes: # Patch the echo entity in the newbidsname with the dcm2niix echo info # NB: We can't rely on the bids-entity info here because manufacturers can e.g. put multiple echos in one series/run-folder - if 'echo' in run['bids'] and postfix.startswith('e'): + if 'echo' in run.bids and postfix.startswith('e'): echonr = f"_{postfix}".replace('_e','') # E.g. postfix='e1' if not echonr: echonr = '1' @@ -399,7 +387,7 @@ def bidscoiner_plugin(session: Path, bidsmap: Bidsmap, bidsses: Path) -> Union[N newbidsname = bids.get_bidsvalue(newbidsname, fallback, postfix) # Append the unknown postfix to the fallback-label # Patch the phase entity in the newbidsname with the dcm2niix mag/phase info - elif 'part' in run['bids'] and postfix in ('ph','real','imaginary'): # e.g. part: ['', 'mag', 'phase', 'real', 'imag', 0] + elif 'part' in run.bids and postfix in ('ph','real','imaginary'): # e.g. part: ['', 'mag', 'phase', 'real', 'imag', 0] if postfix == 'ph': newbidsname = bids.insert_bidskeyval(newbidsname, 'part', 'phase', ignore) if postfix == 'real': @@ -479,7 +467,7 @@ def bidscoiner_plugin(session: Path, bidsmap: Bidsmap, bidsses: Path) -> Union[N jsonfile = target.with_suffix('').with_suffix('.json') if not jsonfile.is_file(): LOGGER.warning(f"Unexpected conversion result, could not find: {jsonfile}") - metadata = bids.updatemetadata(datasource, jsonfile, run['meta'], options.get('meta',[])) + metadata = bids.updatemetadata(datasource, jsonfile, run.meta, options.get('meta',[])) # Remove the bval/bvec files of sbref- and inv-images (produced by dcm2niix but not allowed by the BIDS specifications) if ((datasource.datatype == 'dwi' and suffix == 'sbref') or @@ -535,7 +523,7 @@ def bidscoiner_plugin(session: Path, bidsmap: Bidsmap, bidsses: Path) -> Union[N if age and options.get('anon','y') in ('y','yes'): age = int(float(age)) except Exception as exc: - LOGGER.warning(f"Could not parse age from: {datasource.path}\n{exc}") + LOGGER.warning(f"Could not parse age from: {datasource}\n{exc}") personals = {} personals['age'] = str(age) personals['sex'] = datasource.attributes('PatientSex') diff --git a/bidscoin/plugins/nibabel2bids.py b/bidscoin/plugins/nibabel2bids.py index b5036cab..bcef792d 100644 --- a/bidscoin/plugins/nibabel2bids.py +++ b/bidscoin/plugins/nibabel2bids.py @@ -5,14 +5,13 @@ import logging import dateutil.parser import json -import shutil import pandas as pd import nibabel as nib from bids_validator import BIDSValidator from typing import Union from pathlib import Path from bidscoin import bids -from bidscoin.bids import Bidsmap, Plugin +from bidscoin.bids import BidsMap, Plugin, Plugins try: from nibabel.testing import data_path @@ -31,7 +30,7 @@ def test(options: Plugin=OPTIONS) -> int: """ Performs a nibabel test - :param options: A dictionary with the plugin options, e.g. taken from the bidsmap['Options']['plugins']['nibabel2bids'] + :param options: A dictionary with the plugin options, e.g. taken from the bidsmap.plugins['nibabel2bids'] :return: The errorcode: 0 for successful execution, 1 for general tool errors, 2 for `ext` option errors, 3 for `meta` option errors """ @@ -50,7 +49,7 @@ def test(options: Plugin=OPTIONS) -> int: return 3 niifile = Path(data_path)/'anatomical.nii' - assert is_sourcefile(niifile) == 'Nibabel' + assert has_support(niifile) == 'Nibabel' assert str(get_attribute('Nibabel', niifile, 'descrip', options)) == "b'spm - 3D normalized'" except Exception as nibabelerror: @@ -61,14 +60,18 @@ def test(options: Plugin=OPTIONS) -> int: return 0 -def is_sourcefile(file: Path) -> str: +def has_support(file: Path, dataformat: Union[DataFormat, str]='') -> str: """ - This plugin function supports assessing whether the file is a valid sourcefile + This plugin function assesses whether a sourcefile is of a supported dataformat - :param file: The file that is assessed - :return: The valid dataformat of the file for this plugin + :param file: The sourcefile that is assessed + :param dataformat: The requested dataformat (optional requirement) + :return: The valid/supported dataformat of the sourcefile """ + if dataformat and dataformat != 'Nibabel': + return '' + ext = ''.join(file.suffixes) if file.is_file() and ext.lower() in sum((klass.valid_exts for klass in nib.imageclasses.all_image_classes), ('.nii.gz',)): return 'Nibabel' @@ -83,7 +86,7 @@ def get_attribute(dataformat: str, sourcefile: Path, attribute: str, options: Pl :param dataformat: The bidsmap-dataformat of the sourcefile, e.g. DICOM of PAR :param sourcefile: The sourcefile from which the attribute value should be read :param attribute: The attribute key for which the value should be read - :param options: A dictionary with the plugin options, e.g. taken from the bidsmap['Options']['plugins'] + :param options: A dictionary with the plugin options, e.g. taken from the bidsmap.plugins :return: The attribute value """ @@ -102,7 +105,7 @@ def get_attribute(dataformat: str, sourcefile: Path, attribute: str, options: Pl return value -def bidsmapper_plugin(session: Path, bidsmap_new: Bidsmap, bidsmap_old: Bidsmap, template: Bidsmap, store: dict) -> None: +def bidsmapper_plugin(session: Path, bidsmap_new: BidsMap, bidsmap_old: BidsMap, template: BidsMap) -> None: """ All the logic to map the Nibabel header fields onto bids labels go into this function @@ -110,55 +113,46 @@ def bidsmapper_plugin(session: Path, bidsmap_new: Bidsmap, bidsmap_old: Bidsmap, :param bidsmap_new: The new study bidsmap that we are building :param bidsmap_old: The previous study bidsmap that has precedence over the template bidsmap :param template: The template bidsmap with the default heuristics - :param store: The paths of the source- and target-folder :return: """ # Get started - plugins = {'nibabel2bids': Plugin(bidsmap_new['Options']['plugins']['nibabel2bids'])} + plugins = Plugins({'nibabel2bids': bidsmap_new.plugins['nibabel2bids']}) datasource = bids.get_datasource(session, plugins, recurse=2) if not datasource.dataformat: return - if not (template[datasource.dataformat] or bidsmap_old[datasource.dataformat]): + if not (template.dataformat(datasource.dataformat) or bidsmap_old[datasource.dataformat]): LOGGER.error(f"No {datasource.dataformat} source information found in the bidsmap and template") return # Collect the different DICOM/PAR source files for all runs in the session - for sourcefile in [file for file in session.rglob('*') if is_sourcefile(file)]: + for sourcefile in [file for file in session.rglob('*') if has_support(file)]: # See if we can find a matching run in the old bidsmap datasource = bids.DataSource(sourcefile, plugins, datasource.dataformat) - run, match = bids.get_matching_run(datasource, bidsmap_old) + run, match = bidsmap_old.get_matching_run(datasource) # If not, see if we can find a matching run in the template if not match: - run, _ = bids.get_matching_run(datasource, template) + run, _ = template.get_matching_run(datasource) # See if we have collected the run somewhere in our new bidsmap - if not bids.exist_run(bidsmap_new, '', run): + if not bidsmap_new.exist_run(run): # Communicate with the user if the run was not present in bidsmap_old or in template, i.e. that we found a new sample if not match: - LOGGER.info(f"Discovered '{datasource.datatype}' {datasource.dataformat} sample: {sourcefile}") + LOGGER.info(f"Discovered data sample: {datasource}") else: - LOGGER.bcdebug(f"Known '{datasource.datatype}' {datasource.dataformat} sample: {sourcefile}") - - # Now work from the provenance store - if store: - targetfile = store['target']/sourcefile.relative_to(store['source']) - targetfile.parent.mkdir(parents=True, exist_ok=True) - LOGGER.verbose(f"Storing the discovered {datasource.dataformat} sample as: {targetfile}") - run['provenance'] = str(shutil.copyfile(sourcefile, targetfile)) - run['datasource'].path = targetfile + LOGGER.bcdebug(f"Known data sample: {datasource}") # Copy the filled-in run over to the new bidsmap - bids.insert_run(bidsmap_new, run) + bidsmap_new.insert_run(run) else: - LOGGER.bcdebug(f"Existing/duplicate '{datasource.datatype}' {datasource.dataformat} sample: {sourcefile}") + LOGGER.bcdebug(f"Existing/duplicate sample: {datasource}") -def bidscoiner_plugin(session: Path, bidsmap: Bidsmap, bidsses: Path) -> None: +def bidscoiner_plugin(session: Path, bidsmap: BidsMap, bidsses: Path) -> None: """ The bidscoiner plugin to convert the session Nibabel source-files into BIDS-valid NIfTI-files in the corresponding bids session-folder @@ -174,10 +168,10 @@ def bidscoiner_plugin(session: Path, bidsmap: Bidsmap, bidsses: Path) -> None: sesid = bidsses.name if bidsses.name.startswith('ses-') else '' # Get started - options = bidsmap['Options']['plugins']['nibabel2bids'] + options = bidsmap._data['Options']['plugins']['nibabel2bids'] ext = options.get('ext', '') meta = options.get('meta', []) - sourcefiles = [file for file in session.rglob('*') if is_sourcefile(file)] + sourcefiles = [file for file in session.rglob('*') if has_support(file)] if not sourcefiles: LOGGER.info(f"--> No {__name__} sourcedata found in: {session}") return @@ -194,11 +188,11 @@ def bidscoiner_plugin(session: Path, bidsmap: Bidsmap, bidsses: Path) -> None: for source in sourcefiles: datasource = bids.DataSource(source, {'nibabel2bids':options}) - run, runid = bids.get_matching_run(datasource, bidsmap, runtime=True) + run, runid = bidsmap.get_matching_run(datasource, runtime=True) # Check if we should ignore this run - if datasource.datatype in bidsmap['Options']['bidscoin']['ignoretypes']: - LOGGER.info(f"--> Leaving out: {source}") + if datasource.datatype in bidsmap.options['ignoretypes']: + LOGGER.info(f"--> Leaving out: {datasource}") bids.bidsprov(bidsses, source, runid, datasource.datatype) # Write out empty provenance data continue @@ -208,16 +202,16 @@ def bidscoiner_plugin(session: Path, bidsmap: Bidsmap, bidsses: Path) -> None: bids.bidsprov(bidsses, source) # Write out empty provenance data continue - LOGGER.info(f"--> Coining: {source}") + LOGGER.info(f"--> Coining: {datasource}") # Create the BIDS session/datatype output folder outfolder = bidsses/datasource.datatype outfolder.mkdir(parents=True, exist_ok=True) # Compose the BIDS filename using the matched run - bidsignore = bids.check_ignore(datasource.datatype, bidsmap['Options']['bidscoin']['bidsignore']) + bidsignore = bids.check_ignore(datasource.datatype, bidsmap.options['bidsignore']) bidsname = bids.get_bidsname(subid, sesid, run, not bidsignore, runtime=True) - bidsignore = bidsignore or bids.check_ignore(bidsname+'.json', bidsmap['Options']['bidscoin']['bidsignore'], 'file') + bidsignore = bidsignore or bids.check_ignore(bidsname+'.json', bidsmap.options['bidsignore'], 'file') bidsname = bids.increment_runindex(outfolder, bidsname, run, scans_table) target = (outfolder/bidsname).with_suffix(ext) diff --git a/bidscoin/plugins/spec2nii2bids.py b/bidscoin/plugins/spec2nii2bids.py index bc63abbc..df022eb8 100644 --- a/bidscoin/plugins/spec2nii2bids.py +++ b/bidscoin/plugins/spec2nii2bids.py @@ -12,7 +12,7 @@ from bids_validator import BIDSValidator from pathlib import Path from bidscoin import bcoin, bids, due, Doi -from bidscoin.bids import Bidsmap, Plugin +from bidscoin.bids import BidsMap, Plugin LOGGER = logging.getLogger(__name__) @@ -28,7 +28,7 @@ def test(options: Plugin=OPTIONS) -> int: """ This plugin shell tests the working of the spec2nii2bids plugin + its bidsmap options - :param options: A dictionary with the plugin options, e.g. taken from the bidsmap['Options']['plugins']['spec2nii2bids'] + :param options: A dictionary with the plugin options, e.g. taken from the bidsmap.plugins['spec2nii2bids'] :return: The errorcode (e.g 0 if the tool generated the expected result, > 0 if there was a tool error) """ @@ -47,14 +47,18 @@ def test(options: Plugin=OPTIONS) -> int: return bcoin.run_command(f"{options.get('command',OPTIONS['command'])} -v") -def is_sourcefile(file: Path) -> str: +def has_support(file: Path, dataformat: Union[DataFormat, str]='') -> str: """ This plugin function assesses whether a sourcefile is of a supported dataformat - :param file: The sourcefile that is assessed - :return: The valid/supported dataformat of the sourcefile + :param file: The sourcefile that is assessed + :param dataformat: The requested dataformat (optional requirement) + :return: The valid/supported dataformat of the sourcefile """ + if dataformat and dataformat not in ('Twix', 'SPAR', 'Pfile'): + return '' + suffix = file.suffix.lower() if suffix == '.dat': return 'Twix' @@ -99,7 +103,7 @@ def get_attribute(dataformat: str, sourcefile: Path, attribute: str, options: Pl LOGGER.error(f"Unsupported MRS data-format: {dataformat}") -def bidsmapper_plugin(session: Path, bidsmap_new: Bidsmap, bidsmap_old: Bidsmap, template: Bidsmap, store: dict) -> None: +def bidsmapper_plugin(session: Path, bidsmap_new: BidsMap, bidsmap_old: BidsMap, template: BidsMap, store: dict) -> None: """ All the heuristics spec2nii2bids attributes and properties onto bids labels and meta-data go into this plugin function. The function is expected to update/append new runs to the bidsmap_new data structure. The bidsmap options for this plugin @@ -119,7 +123,7 @@ def bidsmapper_plugin(session: Path, bidsmap_new: Bidsmap, bidsmap_old: Bidsmap, plugins = {'spec2nii2bids': Plugin(bidsmap_new['Options']['plugins']['spec2nii2bids'])} # Update the bidsmap with the info from the source files - for sourcefile in [file for file in session.rglob('*') if is_sourcefile(file)]: + for sourcefile in [file for file in session.rglob('*') if has_support(file)]: datasource = bids.DataSource(sourcefile, plugins) dataformat = datasource.dataformat @@ -133,38 +137,30 @@ def bidsmapper_plugin(session: Path, bidsmap_new: Bidsmap, bidsmap_old: Bidsmap, return # See if we can find a matching run in the old bidsmap - run, match = bids.get_matching_run(datasource, bidsmap_old) + run, match = bidsmap_old.get_matching_run(datasource) # If not, see if we can find a matching run in the template if not match: - run, _ = bids.get_matching_run(datasource, template) + run, _ = template.get_matching_run(datasource) # See if we have collected the run somewhere in our new bidsmap - if not bids.exist_run(bidsmap_new, '', run): + if not bidsmap_new.exist_run(run): # Communicate with the user if the run was not present in bidsmap_old or in template, i.e. that we found a new sample if not match: - LOGGER.info(f"Discovered '{datasource.datatype}' {dataformat} sample: {sourcefile}") + LOGGER.info(f"Discovered sample: {datasource}") else: - LOGGER.bcdebug(f"Known '{datasource.datatype}' {dataformat} sample: {sourcefile}") - - # Now work from the provenance store - if store: - targetfile = store['target']/sourcefile.relative_to(store['source']) - targetfile.parent.mkdir(parents=True, exist_ok=True) - LOGGER.verbose(f"Storing the discovered {dataformat} sample as: {targetfile}") - run['provenance'] = str(shutil.copyfile(sourcefile, targetfile)) - run['datasource'].path = targetfile + LOGGER.bcdebug(f"Known sample: {datasource}") # Copy the filled-in run over to the new bidsmap - bids.insert_run(bidsmap_new, run) + bidsmap_new.insert_run(run) else: - LOGGER.bcdebug(f"Existing/duplicate '{datasource.datatype}' {dataformat} sample: {sourcefile}") + LOGGER.bcdebug(f"Existing/duplicate sample: {datasource}") @due.dcite(Doi('10.1002/mrm.29418'), description='Multi-format in vivo MR spectroscopy conversion to NIFTI', tags=['reference-implementation']) -def bidscoiner_plugin(session: Path, bidsmap: Bidsmap, bidsses: Path) -> Union[None, dict]: +def bidscoiner_plugin(session: Path, bidsmap: BidsMap, bidsses: Path) -> Union[None, dict]: """ This wrapper function around spec2nii converts the MRS data in the session folder and saves it in the bidsfolder. Each saved datafile should be accompanied by a json sidecar file. The bidsmap options for this plugin can be found in: @@ -182,10 +178,10 @@ def bidscoiner_plugin(session: Path, bidsmap: Bidsmap, bidsses: Path) -> Union[N sesid = bidsses.name if bidsses.name.startswith('ses-') else '' # Get started and see what dataformat we have - options = bidsmap['Options']['plugins']['spec2nii2bids'] + options = bidsmap.plugins['spec2nii2bids'] datasource = bids.get_datasource(session, {'spec2nii2bids':options}) dataformat = datasource.dataformat - sourcefiles = [file for file in session.rglob('*') if is_sourcefile(file)] + sourcefiles = [file for file in session.rglob('*') if has_support(file)] if not sourcefiles: LOGGER.info(f"--> No {__name__} sourcedata found in: {session}") return @@ -203,10 +199,10 @@ def bidscoiner_plugin(session: Path, bidsmap: Bidsmap, bidsses: Path) -> Union[N # Get a data source, a matching run from the bidsmap datasource = bids.DataSource(source, {'spec2nii2bids': options}) - run, runid = bids.get_matching_run(datasource, bidsmap, runtime=True) + run, runid = bidsmap.get_matching_run(datasource, runtime=True) # Check if we should ignore this run - if datasource.datatype in bidsmap['Options']['bidscoin']['ignoretypes']: + if datasource.datatype in bidsmap.options['ignoretypes']: LOGGER.info(f"--> Leaving out: {source}") bids.bidsprov(bidsses, source, runid, datasource.datatype) # Write out empty provenance data continue @@ -224,9 +220,9 @@ def bidscoiner_plugin(session: Path, bidsmap: Bidsmap, bidsses: Path) -> Union[N outfolder.mkdir(parents=True, exist_ok=True) # Compose the BIDS filename using the matched run - bidsignore = bids.check_ignore(datasource.datatype, bidsmap['Options']['bidscoin']['bidsignore']) + bidsignore = bids.check_ignore(datasource.datatype, bidsmap.options['bidsignore']) bidsname = bids.get_bidsname(subid, sesid, run, not bidsignore, runtime=True) - bidsignore = bidsignore or bids.check_ignore(bidsname+'.json', bidsmap['Options']['bidscoin']['bidsignore'], 'file') + bidsignore = bidsignore or bids.check_ignore(bidsname+'.json', bidsmap.options['bidsignore'], 'file') bidsname = bids.increment_runindex(outfolder, bidsname, run, scans_table) target = (outfolder/bidsname).with_suffix('.nii.gz') @@ -325,6 +321,6 @@ def bidscoiner_plugin(session: Path, bidsmap: Bidsmap, bidsses: Path) -> Union[N age = int(float(age)) personals['age'] = str(age) except Exception as exc: - LOGGER.warning(f"Could not parse age from: {datasource.path}\n{exc}") + LOGGER.warning(f"Could not parse age from: {datasource}\n{exc}") return personals diff --git a/bidscoin/utilities/bidsparticipants.py b/bidscoin/utilities/bidsparticipants.py index aa41826d..70aede9b 100755 --- a/bidscoin/utilities/bidsparticipants.py +++ b/bidscoin/utilities/bidsparticipants.py @@ -27,7 +27,7 @@ def scanpersonals(bidsmap: Bidsmap, session: Path, personals: dict, keys: list) """ # Get valid BIDS subject/session identifiers from the (first) DICOM- or PAR/XML source file - datasource = bids.get_datasource(session, bidsmap['Options']['plugins']) + datasource = bids.get_datasource(session, bidsmap.plugins) dataformat = datasource.dataformat if not datasource.dataformat: LOGGER.info(f"No supported datasource found in '{session}'") @@ -46,7 +46,7 @@ def scanpersonals(bidsmap: Bidsmap, session: Path, personals: dict, keys: list) elif age.endswith('M'): age = float(age.rstrip('M')) / 12 elif age.endswith('Y'): age = float(age.rstrip('Y')) if age: - if bidsmap['Options']['plugins']['dcm2niix2bids'].get('anon', 'y') in ('y','yes'): + if bidsmap.plugins['dcm2niix2bids'].get('anon', 'y') in ('y','yes'): age = int(float(age)) personals['age'] = str(age) @@ -89,8 +89,8 @@ def bidsparticipants(sourcefolder: str, bidsfolder: str, keys: list, bidsmap: st if not bidsmap: LOGGER.info('Make sure to run "bidsmapper" first, exiting now') return - subprefix = bidsmap['Options']['bidscoin']['subprefix'] - sesprefix = bidsmap['Options']['bidscoin']['sesprefix'] + subprefix = bidsmap.options['subprefix'] + sesprefix = bidsmap.options['sesprefix'] # Get the table & dictionary of the subjects that have been processed participants_tsv = bidsfolder/'participants.tsv' @@ -125,7 +125,7 @@ def bidsparticipants(sourcefolder: str, bidsfolder: str, keys: list, bidsmap: st subid, sesid = bids.DataSource(session/'dum.my', subprefix=subprefix, sesprefix=sesprefix).subid_sesid() # Unpack the data in a temporary folder if it is tarballed/zipped and/or contains a DICOMDIR file - sesfolders, unpacked = bids.unpack(session, bidsmap['Options']['bidscoin'].get('unzip','')) + sesfolders, unpacked = bids.unpack(session, bidsmap.options.get('unzip','')) for sesfolder in sesfolders: # Update/append the personal source data diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index c710081a..0f22645c 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -199,7 +199,7 @@ - The option to remove decimals from age and discard acquisition dates from the metadata ### Changed -- Plugins should now have a `is_sourcefile` and a `get_attribute` function and have a simpler/changed API (-> DataSource class) +- Plugins should now have a `has_support` and a `get_attribute` function and have a simpler/changed API (-> DataSource class) - The intricate filtering of the `nrfiles` property by the other filesystem properties has been removed and is now a pure/unfiltered file-system property - The default `<>` keyword has been replaced by the more flexible <\> property to extract the subject/session label - The dcm2bidsmap and the dcm2niix2bids plugins have been merged diff --git a/docs/plugins.rst b/docs/plugins.rst index ca2b40c0..d3220585 100644 --- a/docs/plugins.rst +++ b/docs/plugins.rst @@ -49,7 +49,7 @@ As can be seen in the API code snippet below (but also see the default plugins f are expected to be present: - test: A test function for the plugin + its bidsmap options. Can be called by the user from the bidseditor and the bidscoin utility - - is_sourcefile: A function to assess whether a source file is supported by the plugin. The return value should correspond to a data format section in the bidsmap + - has_support: A function to assess whether a source file is supported by the plugin. The return value should correspond to a data format section in the bidsmap - get_attribute: A function to read an attribute value from a source file - bidsmapper_plugin: A function to discover BIDS-mappings in a source data session - bidscoiner_plugin: A function to convert a single source data session to bids according to the specified BIDS-mappings @@ -148,7 +148,7 @@ As can be seen in the API code snippet below (but also see the default plugins f return 0 - def is_sourcefile(file: Path) -> str: + def has_support(file: Path) -> str: """ This plugin function assesses whether a sourcefile is of a supported dataformat @@ -158,7 +158,7 @@ As can be seen in the API code snippet below (but also see the default plugins f if file.is_file(): - LOGGER.verbose(f'This is a demo-plugin is_sourcefile routine, assessing whether "{file}" has a valid dataformat') + LOGGER.verbose(f'This is a demo-plugin has_support routine, assessing whether "{file}" has a valid dataformat') return 'dataformat' if file == 'supportedformat' else '' return '' diff --git a/tests/test_bids.py b/tests/test_bids.py index 0e9bef5d..fd70f178 100644 --- a/tests/test_bids.py +++ b/tests/test_bids.py @@ -57,7 +57,7 @@ def extdatasource(self, dcm_file, tmp_path): return bids.DataSource(ext_dcm_file, {'dcm2niix2bids': Plugin({})}, 'DICOM') def test_is_datasource(self, datasource): - assert datasource.is_datasource + assert datasource.has_plugin() assert datasource.dataformat == 'DICOM' def test_properties(self, datasource): @@ -122,7 +122,7 @@ def test_get_dicomfile(dcm_file, dicomdir): def test_get_datasource(dicomdir): datasource = bids.get_datasource(dicomdir.parent, {'dcm2niix2bids': Plugin({})}) - assert datasource.is_datasource + assert datasource.has_plugin() assert datasource.dataformat == 'DICOM' diff --git a/tests/test_bidsmapper.py b/tests/test_bidsmapper.py index b9215f8c..1feef782 100644 --- a/tests/test_bidsmapper.py +++ b/tests/test_bidsmapper.py @@ -12,8 +12,8 @@ def test_bidsmapper(raw_dicomdir, bids_dicomdir, bidsmap_dicomdir, subprefix, se resubprefix = '' if subprefix=='*' else re.escape(subprefix).replace(r'\-','-') resesprefix = '' if sesprefix=='*' else re.escape(sesprefix).replace(r'\-','-') bidsmap = bidsmapper.bidsmapper(raw_dicomdir, bids_dicomdir, bidsmap_dicomdir, bidsmap_template, [], subprefix, sesprefix, unzip='', store=store, automated=True, force=True) - assert bidsmap['Options']['bidscoin']['subprefix'] == subprefix - assert bidsmap['Options']['bidscoin']['sesprefix'] == sesprefix + assert bidsmap.options['subprefix'] == subprefix + assert bidsmap.options['sesprefix'] == sesprefix assert bidsmap['DICOM']['subject'] == f"<>" assert bidsmap['DICOM']['session'] == f"<>" assert len(bidsmap['DICOM']['exclude']) > 1