From ff2ee5a3f246c06f2596c19dc8aaa39e30e0e543 Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Wed, 24 Jul 2024 08:20:45 -0400 Subject: [PATCH] Allow bids-filter-file terms for mesh and morphometry files (#1210) --- docs/usage.rst | 6 +- xcp_d/data/io_spec.yaml | 152 +++++++++++++++++++++++++ xcp_d/tests/test_utils_bids.py | 79 ++++++++++++- xcp_d/utils/bids.py | 196 ++++++++++----------------------- xcp_d/workflows/base.py | 4 +- 5 files changed, 294 insertions(+), 143 deletions(-) create mode 100644 xcp_d/data/io_spec.yaml diff --git a/docs/usage.rst b/docs/usage.rst index 5bb3523a8..bc10718ec 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -152,6 +152,7 @@ This argument must point to a JSON file, containing filters that will be fed int The keys in this JSON file are unique to XCP-D. They are our internal terms for different inputs that will be selected from the preprocessed dataset. +The full list of keys can be found in the file ``xcp_d/data/io_spec.yaml``. ``"bold"`` determines which preprocessed BOLD files will be chosen. You can set a number of entities here, including "session", "task", "space", "resolution", and @@ -166,7 +167,7 @@ We recommend NOT setting the datatype, suffix, or file extension in the filter f ``"t1w"`` selects a native T1w-space, preprocessed T1w file. ``"t2w"`` selects a native T1w-space, preprocessed T2w file. -If not T1w file is available, this file will be in T2w space. +If a T1w file is not available, this file will be in T2w space. ``"anat_dseg"`` selects a segmentation file in the same space as the BOLD data. This file is not used for anything. @@ -182,6 +183,9 @@ The standard space that will be used depends on the ``"bold"`` files that are se ``"template_to_anat_xfm"`` selects a transform from standard space to T1w/T2w space. Again, the standard space is determined based on other files. +There are additional keys that control how mesh and morphometry files are selected. +Please refer to ``io_spec.yaml`` for more information on them. + Example bids-filter-file ======================== diff --git a/xcp_d/data/io_spec.yaml b/xcp_d/data/io_spec.yaml new file mode 100644 index 000000000..88ce671b2 --- /dev/null +++ b/xcp_d/data/io_spec.yaml @@ -0,0 +1,152 @@ +queries: + base: + # brain mask in same standard space as BOLD data + anat_brainmask: + datatype: anat + desc: brain + extension: .nii.gz + suffix: mask + # native anatomical-space dseg file + anat_dseg: + datatype: anat + desc: null + extension: .nii.gz + space: null + suffix: dseg + # transform from native anatomical (T1w or T2w) space to same standard space as BOLD + # "to" entity will be set later + anat_to_template_xfm: + datatype: anat + from: + - T1w + - T2w + suffix: xfm + # all preprocessed BOLD files in the right space/resolution/density + bold: + datatype: func + desc: + - preproc + - null + suffix: bold + # native T1w-space, preprocessed T1w file + t1w: + datatype: anat + desc: preproc + extension: .nii.gz + space: null + suffix: T1w + # native T2w-space or T1w-space, preprocessed T2w file + t2w: + datatype: anat + desc: preproc + extension: .nii.gz + space: + - null + - T1w + suffix: T2w + # transform from standard space to anatomical (T1w or T2w) space + # "from" entity will be set later + template_to_anat_xfm: + datatype: anat + suffix: xfm + to: + - T1w + - T2w + mesh: + # Pial surface mesh in fsnative space (fsLR-space mesh will be searched for separately) + lh_pial_surf: + datatype: anat + desc: null + extension: .surf.gii + hemi: L + suffix: pial + # Subject's surface sphere to be used to warp fsnative meshes to fsLR space + lh_subject_sphere: + datatype: anat + desc: reg + extension: .surf.gii + hemi: L + space: null + suffix: sphere + # White matter surface mesh in fsnative space (fsLR-space mesh will be searched for separately) + lh_wm_surf: + datatype: anat + desc: null + extension: .surf.gii + hemi: L + suffix: + - smoothwm + - white + # Pial surface mesh in fsnative space (fsLR-space mesh will be searched for separately) + rh_pial_surf: + datatype: anat + desc: null + extension: .surf.gii + hemi: R + suffix: pial + # Subject's surface sphere to be used to warp fsnative meshes to fsLR space + rh_subject_sphere: + datatype: anat + desc: reg + extension: .surf.gii + hemi: R + space: null + suffix: sphere + # White matter surface mesh in fsnative space (fsLR-space mesh will be searched for separately) + rh_wm_surf: + datatype: anat + desc: null + extension: .surf.gii + hemi: R + suffix: + - smoothwm + - white + morphometry: + # Cortical thickness in fsLR space CIFTI + cortical_thickness: + datatype: anat + den: 91k + desc: null + extension: .dscalar.nii + space: fsLR + suffix: thickness + # Corrected cortical thickness in fsLR space CIFTI + cortical_thickness_corr: + datatype: anat + den: 91k + desc: corrected + extension: .dscalar.nii + space: fsLR + suffix: thickness + # Myelin map in fsLR space CIFTI + myelin: + datatype: anat + den: 91k + desc: null + extension: .dscalar.nii + space: fsLR + suffix: myelinw + # Smoothed myelin map in fsLR space CIFTI + myelin_smoothed: + datatype: anat + den: 91k + desc: smoothed + extension: .dscalar.nii + space: fsLR + suffix: myelinw + # Sulcal curvature in fsLR space CIFTI + sulcal_curv: + datatype: anat + den: 91k + desc: null + extension: .dscalar.nii + space: fsLR + suffix: curv + # Sulcal depth in fsLR space CIFTI + sulcal_depth: + datatype: anat + den: 91k + desc: null + extension: .dscalar.nii + space: fsLR + suffix: sulc diff --git a/xcp_d/tests/test_utils_bids.py b/xcp_d/tests/test_utils_bids.py index cc66431ea..7b731a7cd 100644 --- a/xcp_d/tests/test_utils_bids.py +++ b/xcp_d/tests/test_utils_bids.py @@ -126,13 +126,17 @@ def test_collect_mesh_data(datasets, tmp_path_factory): """Test collect_mesh_data.""" # Dataset without mesh files layout = BIDSLayout(datasets["fmriprep_without_freesurfer"], validate=False) - mesh_available, standard_space_mesh, _, _ = xbids.collect_mesh_data(layout, "1648798153") + mesh_available, standard_space_mesh, _, _ = xbids.collect_mesh_data( + layout, "1648798153", bids_filters={} + ) assert mesh_available is False assert standard_space_mesh is False # Dataset with native-space mesh files (one file matching each query) layout = BIDSLayout(datasets["pnc"], validate=False) - mesh_available, standard_space_mesh, _, _ = xbids.collect_mesh_data(layout, "1648798153") + mesh_available, standard_space_mesh, _, _ = xbids.collect_mesh_data( + layout, "1648798153", bids_filters={} + ) assert mesh_available is True assert standard_space_mesh is False @@ -153,7 +157,9 @@ def test_collect_mesh_data(datasets, tmp_path_factory): (std_mesh_dir / "sub-1648798153/ses-PNC1/anat").joinpath(f).touch() layout = BIDSLayout(std_mesh_dir, validate=False) - mesh_available, standard_space_mesh, _, _ = xbids.collect_mesh_data(layout, "1648798153") + mesh_available, standard_space_mesh, _, _ = xbids.collect_mesh_data( + layout, "1648798153", bids_filters={} + ) assert mesh_available is True assert standard_space_mesh is True @@ -179,7 +185,72 @@ def test_collect_mesh_data(datasets, tmp_path_factory): layout = BIDSLayout(std_mesh_dir, validate=False) with pytest.raises(ValueError, match="More than one surface found"): - xbids.collect_mesh_data(layout, "1648798153") + xbids.collect_mesh_data(layout, "1648798153", bids_filters={}) + + # If we include BIDS filters, we should be able to ignore the existing files + layout = BIDSLayout(datasets["pnc"], validate=False) + mesh_available, standard_space_mesh, _, _ = xbids.collect_mesh_data( + layout, + "1648798153", + bids_filters={ + "lh_pial_surf": {"acquisition": "test"}, + "rh_pial_surf": {"acquisition": "test"}, + "lh_wm_surf": {"acquisition": "test"}, + "rh_wm_surf": {"acquisition": "test"}, + "lh_subject_sphere": {"acquisition": "test"}, + "rh_subject_sphere": {"acquisition": "test"}, + }, + ) + assert mesh_available is False + assert standard_space_mesh is False + + +def test_collect_morphometry_data(datasets, tmp_path_factory): + """Test collect_morphometry_data.""" + # Dataset without morphometry files + layout = BIDSLayout(datasets["fmriprep_without_freesurfer"], validate=False) + morph_file_types, _ = xbids.collect_morphometry_data(layout, "1648798153", bids_filters={}) + assert morph_file_types == [] + + # Dataset with morphometry files (one file matching each query) + layout = BIDSLayout(datasets["pnc"], validate=False) + morph_file_types, _ = xbids.collect_morphometry_data(layout, "1648798153", bids_filters={}) + assert morph_file_types == ["cortical_thickness", "sulcal_curv", "sulcal_depth"] + + # Dataset with multiple files matching each query (raises an error) + bad_morph_dir = tmp_path_factory.mktemp("bad_morph") + shutil.copyfile( + os.path.join(datasets["pnc"], "dataset_description.json"), + bad_morph_dir / "dataset_description.json", + ) + os.makedirs(bad_morph_dir / "sub-1648798153/ses-PNC1/anat", exist_ok=True) + files = [ + "sub-1648798153_ses-PNC1_acq-refaced_space-fsLR_den-91k_thickness.dscalar.nii", + "sub-1648798153_ses-PNC1_acq-refaced2_space-fsLR_den-91k_thickness.dscalar.nii", + "sub-1648798153_ses-PNC1_acq-refaced_space-fsLR_den-91k_sulc.dscalar.nii", + "sub-1648798153_ses-PNC1_acq-refaced2_space-fsLR_den-91k_sulc.dscalar.nii", + "sub-1648798153_ses-PNC1_acq-refaced_space-fsLR_den-91k_curv.dscalar.nii", + "sub-1648798153_ses-PNC1_acq-refaced2_space-fsLR_den-91k_curv.dscalar.nii", + ] + for f in files: + (bad_morph_dir / "sub-1648798153/ses-PNC1/anat").joinpath(f).touch() + + layout = BIDSLayout(bad_morph_dir, validate=False) + with pytest.raises(ValueError, match="More than one .* found"): + xbids.collect_morphometry_data(layout, "1648798153", bids_filters={}) + + # If we include BIDS filters, we should be able to ignore the existing files + layout = BIDSLayout(datasets["pnc"], validate=False) + morph_file_types, _ = xbids.collect_morphometry_data( + layout, + "1648798153", + bids_filters={ + "cortical_thickness": {"acquisition": "test"}, + "sulcal_curv": {"acquisition": "test"}, + "sulcal_depth": {"acquisition": "test"}, + }, + ) + assert morph_file_types == [] def test_write_dataset_description(datasets, tmp_path_factory, caplog): diff --git a/xcp_d/utils/bids.py b/xcp_d/utils/bids.py index 904a06044..f89136af8 100644 --- a/xcp_d/utils/bids.py +++ b/xcp_d/utils/bids.py @@ -5,6 +5,7 @@ Most of the code is copied from niworkflows. A PR will be submitted to niworkflows at some point. """ + import os import warnings from pathlib import Path @@ -14,6 +15,7 @@ from nipype import logging from packaging.version import Version +from xcp_d.data import load as load_data from xcp_d.utils.doc import fill_doc from xcp_d.utils.filemanip import ensure_list @@ -165,55 +167,8 @@ def collect_data( %(layout)s subj_data : dict """ - queries = { - # all preprocessed BOLD files in the right space/resolution/density - "bold": {"datatype": "func", "suffix": "bold", "desc": ["preproc", None]}, - # native T1w-space, preprocessed T1w file - "t1w": { - "datatype": "anat", - "space": None, - "desc": "preproc", - "suffix": "T1w", - "extension": ".nii.gz", - }, - # native T2w-space, preprocessed T1w file - "t2w": { - "datatype": "anat", - "space": [None, "T1w"], - "desc": "preproc", - "suffix": "T2w", - "extension": ".nii.gz", - }, - # native T1w-space dseg file - "anat_dseg": { - "datatype": "anat", - "space": None, - "desc": None, - "suffix": "dseg", - "extension": ".nii.gz", - }, - # transform from standard space to T1w or T2w space - # "from" entity will be set later - "template_to_anat_xfm": { - "datatype": "anat", - "to": ["T1w", "T2w"], - "suffix": "xfm", - }, - # brain mask in same standard space as BOLD data - "anat_brainmask": { - "datatype": "anat", - "desc": "brain", - "suffix": "mask", - "extension": ".nii.gz", - }, - # transform from T1w or T2w space to standard space - # "to" entity will be set later - "anat_to_template_xfm": { - "datatype": "anat", - "from": ["T1w", "T2w"], - "suffix": "xfm", - }, - } + _spec = yaml.safe_load(load_data.readable("io_spec.yaml").read_text()) + queries = _spec["queries"]["base"] if input_type in ("hcp", "dcan", "ukb"): # HCP/DCAN data have anats only in standard space queries["t1w"]["space"] = "MNI152NLin6Asym" @@ -226,8 +181,9 @@ def collect_data( # Apply filters. These may override anything. bids_filters = bids_filters or {} - for acq, entities in bids_filters.items(): - queries[acq].update(entities) + for acq in queries.keys(): + if acq in bids_filters: + queries[acq].update(bids_filters[acq]) # Select the best available space. if "space" in queries["bold"]: @@ -347,13 +303,16 @@ def collect_data( else: subj_data[field] = None - LOGGER.log(25, f"Collected data:\n{yaml.dump(subj_data, default_flow_style=False, indent=4)}") + LOGGER.log( + 25, + f"Collected data:\n{yaml.dump(subj_data, default_flow_style=False, indent=4)}", + ) return subj_data @fill_doc -def collect_mesh_data(layout, participant_label): +def collect_mesh_data(layout, participant_label, bids_filters): """Collect surface files from preprocessed derivatives. This function will try to collect fsLR-space, 32k-resolution surface files first. @@ -381,38 +340,38 @@ def collect_mesh_data(layout, participant_label): # Surfaces to use for brainsprite and anatomical workflow # The base surfaces can be used to generate the derived surfaces. # The base surfaces may be in native or standard space. - base_queries = { - "pial_surf": { - "desc": None, - "suffix": "pial", - }, - "wm_surf": { - "desc": None, - "suffix": ["smoothwm", "white"], - }, - } + _spec = yaml.safe_load(load_data.readable("io_spec.yaml").read_text()) + queries = _spec["queries"]["mesh"] + # Apply filters. These may override anything. + bids_filters = bids_filters or {} + for acq in queries.keys(): + if acq in bids_filters: + queries[acq].update(bids_filters[acq]) + + # First, try to grab the first base surface file in standard (fsLR) space. + # If it's not available, switch to native fsnative-space data. standard_space_mesh = True - for name, query in base_queries.items(): - # First, try to grab the first base surface file in standard space. - # If it's not available, switch to native T1w-space data. - for hemisphere in ["L", "R"]: - temp_files = layout.get( - return_type="file", - subject=participant_label, - datatype="anat", - hemi=hemisphere, - space="fsLR", - den="32k", - extension=".surf.gii", - **query, - ) + for name, query in queries.items(): + # Don't look for fsLR-space versions of the subject spheres. + if "subject_sphere" in name: + continue - if len(temp_files) == 0: - LOGGER.info("No standard-space surfaces found.") - standard_space_mesh = False - elif len(temp_files) > 1: - LOGGER.warning(f"{name}: More than one standard-space surface found.") + temp_files = layout.get( + return_type="file", + subject=participant_label, + space="fsLR", + den="32k", + **query, + ) + + if len(temp_files) == 0: + standard_space_mesh = False + elif len(temp_files) > 1: + LOGGER.warning(f"{name}: More than one standard-space surface found.") + + if not standard_space_mesh: + LOGGER.info("No standard-space surfaces found.") # Now that we know if there are standard-space surfaces available, we can grab the files. query_extras = {} @@ -420,27 +379,15 @@ def collect_mesh_data(layout, participant_label): query_extras = { "space": None, } - # We need the subject spheres if we're using native-space surfaces. - base_queries["subject_sphere"] = { - "space": None, - "desc": "reg", - "suffix": "sphere", - } initial_mesh_files = {} - queries = {} - for name, query in base_queries.items(): - for hemisphere in ["L", "R"]: - key = f"{hemisphere.lower()}h_{name}" - queries[key] = { - "subject": participant_label, - "datatype": "anat", - "hemi": hemisphere, - "extension": ".surf.gii", - **query, - **query_extras, - } - initial_mesh_files[key] = layout.get(return_type="file", **queries[key]) + for name, query in queries.items(): + queries[name] = { + "subject": participant_label, + **query, + **query_extras, + } + initial_mesh_files[name] = layout.get(return_type="file", **queries[name]) mesh_files = {} mesh_available = True @@ -449,8 +396,10 @@ def collect_mesh_data(layout, participant_label): mesh_files[dtype] = surface_files_[0] elif len(surface_files_) == 0: - mesh_available = False mesh_files[dtype] = None + # We don't need subject spheres if we have standard-space meshes already + if not ("subject_sphere" in dtype and standard_space_mesh): + mesh_available = False else: mesh_available = False @@ -461,9 +410,6 @@ def collect_mesh_data(layout, participant_label): f"Query: {queries[dtype]}" ) - mesh_files["lh_subject_sphere"] = mesh_files.get("lh_subject_sphere", None) - mesh_files["rh_subject_sphere"] = mesh_files.get("rh_subject_sphere", None) - # Check for *_space-dhcpAsym_desc-reg_sphere.surf.gii # If we find it, we assume segmentation was done with MCRIBS. Otherwise, assume FreeSurfer. dhcp_file = layout.get( @@ -489,7 +435,7 @@ def collect_mesh_data(layout, participant_label): @fill_doc -def collect_morphometry_data(layout, participant_label): +def collect_morphometry_data(layout, participant_label, bids_filters): """Collect morphometry surface files from preprocessed derivatives. This function will look for fsLR-space, 91k-resolution morphometry CIFTI files. @@ -510,42 +456,20 @@ def collect_morphometry_data(layout, participant_label): Dictionary of surface file identifiers and their paths. If the surface files weren't found, then the paths will be Nones. """ - queries = { - "sulcal_depth": { - "desc": None, - "suffix": "sulc", - }, - "sulcal_curv": { - "desc": None, - "suffix": "curv", - }, - "cortical_thickness": { - "desc": None, - "suffix": "thickness", - }, - "cortical_thickness_corr": { - "desc": "corrected", - "suffix": "thickness", - }, - "myelin": { - "desc": None, - "suffix": "myelinw", - }, - "myelin_smoothed": { - "desc": "smoothed", - "suffix": "myelinw", - }, - } + _spec = yaml.safe_load(load_data.readable("io_spec.yaml").read_text()) + queries = _spec["queries"]["morphometry"] + + # Apply filters. These may override anything. + bids_filters = bids_filters or {} + for acq in queries.keys(): + if acq in bids_filters: + queries[acq].update(bids_filters[acq]) morphometry_files = {} for name, query in queries.items(): files = layout.get( return_type="file", subject=participant_label, - datatype="anat", - space="fsLR", - den="91k", - extension=".dscalar.nii", **query, ) if len(files) == 1: diff --git a/xcp_d/workflows/base.py b/xcp_d/workflows/base.py index e55055f0a..4df526b6b 100644 --- a/xcp_d/workflows/base.py +++ b/xcp_d/workflows/base.py @@ -138,10 +138,12 @@ def init_single_subject_wf(subject_id: str): mesh_available, standard_space_mesh, software, mesh_files = collect_mesh_data( layout=config.execution.layout, participant_label=subject_id, + bids_filters=config.execution.bids_filters, ) morph_file_types, morphometry_files = collect_morphometry_data( layout=config.execution.layout, participant_label=subject_id, + bids_filters=config.execution.bids_filters, ) # determine the appropriate post-processing workflow @@ -155,7 +157,6 @@ def init_single_subject_wf(subject_id: str): inputnode = pe.Node( niu.IdentityInterface( fields=[ - "subj_data", # not currently used, but will be in future "t1w", "t2w", # optional "anat_brainmask", # used to estimate head radius and for QC metrics @@ -180,7 +181,6 @@ def init_single_subject_wf(subject_id: str): ), name="inputnode", ) - inputnode.inputs.subj_data = subj_data inputnode.inputs.t1w = subj_data["t1w"] inputnode.inputs.t2w = subj_data["t2w"] inputnode.inputs.anat_brainmask = subj_data["anat_brainmask"]