PennLINC · tsalo · Oct 29, 2024 · Oct 29, 2024 · Oct 29, 2024 · Oct 29, 2024
diff --git a/docs/outputs.rst b/docs/outputs.rst
@@ -239,11 +239,13 @@ Functional timeseries and connectivity matrices
 This includes the atlases used to extract the timeseries.
 
 .. important::
+
    If ``abcd`` or ``hbcd`` mode is used, the time series will be interpolated.
    If ``linc`` mode is used, the time series will be censored.
    In both cases, the correlation matrices will be calculated using the censored time series.
 
 .. important::
+
    Correlation matrices with the ``desc-<INT>volumes`` entity are produced if the
    ``--create-matrices`` parameter is used with integer values.
 
@@ -331,17 +333,37 @@ The "framewise_displacement" column contains zeros for low-motion volumes, and o
 high-motion outliers.
 This file includes the high-motion volumes that are removed in most other derivatives.
 
-``design.tsv`` is a tab-delimited file with one column for each nuisance regressor,
-including one-hot encoded regressors indicating each of the high-motion outlier volumes.
-This file includes the high-motion volumes that are removed in most other derivatives.
+``design.tsv`` is a tab-delimited file with one column for each nuisance regressor.
+This file includes rows for the high-motion volumes that are removed in most other derivatives.
+It does not indicate which volumes are removed (please see the ``outliers.tsv`` for that)
+and also does not include the intercept or linear trend terms that are separately applied before
+the GLM regression.
+
+``_desc-linc_qc.tsv`` is a tab-delimited file with one row and one column for each of several QC
+metrics. The QC metrics are described in detail in the ``desc-linc_qc.json`` file.
+This file includes metrics indicating the number of volumes removed due to high motion,
+the number of confounds used in the denoising step
+(including two for the linear trend and intercept),
+and the number of degrees of freedom lost by temporal filtering.
+
+In order to determine the degrees of freedom for the resulting correlations,
+you should do the following calculation:
+``num_retained_volumes - (num_dof_used_by_denoising + num_dof_used_by_filter + 2)``.
 
 .. important::
-   Please note that the outlier columns are somewhat misleading,
-   as volumes are removed by censoring, rather than regression.
+
+   This formula ignores autocorrelation.
+   In order to calculate the effective degrees of freedom for your correlation coefficients,
+   consider using a tool like xDF.
+
+.. warning::
+
+   This formula must be adjusted for correlations computed from random subsets of the time series
+   (i.e., by using the ``--create-matrices`` parameter with integer values).
 
 
-DCAN style scrubbing file (if ``--skip-dcan-qc`` is not used)
-=============================================================
+DCAN style scrubbing file (if ``--abcc-qc`` is not used)
+========================================================
 
 This file is in hdf5 format (readable by h5py), and contains binary scrubbing masks from 0.0
 to 1mm FD in 0.01 steps.

diff --git a/docs/workflows.rst b/docs/workflows.rst
@@ -773,9 +773,6 @@ to satisfy DCAN-specific tools.
    regressors, this could lead to the imperfect removal of noise from your BOLD data.
 
 The residuals from the second step are referred to as the ``denoised, interpolated BOLD``.
-The ``denoised, interpolated BOLD`` will only be written out to the output
-directory if the ``--skip-dcan-qc`` flag is not used,
-as users **should not** use interpolated data directly.
 
 
 Re-censoring
@@ -791,6 +788,8 @@ After bandpass filtering, high motion volumes are removed from the
    In the ``abcd`` and ``hbcd`` modes,
    the denoised, **interpolated** BOLD data are the primary output.
 
+   This behavior is specifically controlled with the ``--output-type`` flag.
+
 
 Resting-state derivative generation
 ===================================

diff --git a/xcp_d/cli/parser.py b/xcp_d/cli/parser.py
@@ -433,6 +433,7 @@ def _build_parser():
     )
     g_censor.add_argument(
         '--output-type',
+        '--output_type',
         dest='output_type',
         default='auto',
         action='store',

diff --git a/xcp_d/data/reports-spec-func.yml b/xcp_d/data/reports-spec-func.yml
@@ -16,7 +16,12 @@ sections:
               and outlier volumes are identified.
     subtitle: Framewise Displacement and Censored Volumes
   - bids: {datatype: figures, suffix: design}
-    caption:  The "design matrix" represents the confounds that are used to denoise the BOLD data.
+    caption: |
+      The "design matrix" represents the confounds that are used to denoise the BOLD data.
+
+      Please note that the outlier columns are somewhat misleading,
+      as volumes are removed by censoring, rather than regression.
+      This figure does not include the intercept or linear trend that are removed in the denoising step.
     subtitle: Design Matrix for Confound Regression
     style:
       height: 100px

diff --git a/xcp_d/data/reports-spec.yml b/xcp_d/data/reports-spec.yml
@@ -24,7 +24,12 @@ sections:
               and outlier volumes are identified.
     subtitle: Framewise Displacement and Censored Volumes
   - bids: {datatype: figures, suffix: design}
-    caption:  The "design matrix" represents the confounds that are used to denoise the BOLD data.
+    caption: |
+      The "design matrix" represents the confounds that are used to denoise the BOLD data.
+
+      Please note that the outlier columns are somewhat misleading,
+      as volumes are removed by censoring, rather than regression.
+      This figure does not include the intercept or linear trend that are removed in the denoising step.
     subtitle: Design Matrix for Confound Regression
     style:
       height: 500px

diff --git a/xcp_d/interfaces/report.py b/xcp_d/interfaces/report.py
@@ -36,7 +36,10 @@
 \t\t\t<li>DVARS Before and After Processing : {dvars_before_after}</li>
 \t\t\t<li>Correlation between DVARS and FD Before and After Processing :
 {fd_dvars_correlation}</li>
-\t\t\t<li>Number of Volumes Censored : {num_vols_censored}</li>
+\t\t\t<li>Number of Volumes Retained : {num_retained_volumes}</li>
+\t\t\t<li>Number of Volumes Censored : {num_censored_volumes}</li>
+\t\t\t<li>Lost Degrees of Freedom from Denoising : {num_dof_used_by_denoising}</li>
+\t\t\t<li>Lost Degrees of Freedom from Filtering : {num_dof_used_by_filter}</li>
 \t\t</ul>
 """
 
@@ -157,7 +160,6 @@ def _generate_segment(self):
             f"{round(qcfile['fd_dvars_correlation_initial'][0], 4)}, "
             f"{round(qcfile['fd_dvars_correlation_final'][0], 4)}"
         )
-        num_vols_censored = str(round(qcfile['num_censored_volumes'][0], 4))
 
         return QC_TEMPLATE.format(
             space=space,
@@ -167,7 +169,10 @@ def _generate_segment(self):
             max_relative_rms=max_relative_rms,
             dvars_before_after=dvars,
             fd_dvars_correlation=fd_dvars_correlation,
-            num_vols_censored=num_vols_censored,
+            num_retained_volumes=qcfile['num_retained_volumes'][0],
+            num_censored_volumes=qcfile['num_censored_volumes'][0],
+            num_dof_used_by_denoising=qcfile['num_dof_used_by_denoising'][0],
+            num_dof_used_by_filter=qcfile['num_dof_used_by_filter'][0],
         )
 
 

diff --git a/xcp_d/interfaces/utils.py b/xcp_d/interfaces/utils.py
@@ -20,7 +20,7 @@
 )
 
 from xcp_d.utils.filemanip import fname_presuffix
-from xcp_d.utils.modified_data import downcast_to_32
+from xcp_d.utils.modified_data import calculate_dof, downcast_to_32
 from xcp_d.utils.qcmetrics import compute_dvars, compute_registration_qc
 from xcp_d.utils.write_save import read_ndata
 
@@ -176,23 +176,25 @@
         mandatory=True,
         desc='fMRIPrep confounds file, after dummy scans removal',
     )
+    design_matrix = File(
+        exists=True,
+        mandatory=False,
+        desc='Design matrix used in the denoising step.',
+    )
     cleaned_file = File(
         exists=True,
         mandatory=True,
         desc='Processed file, after denoising and censoring.',
     )
     TR = traits.Float(mandatory=True, desc='Repetition time, in seconds.')
     head_radius = traits.Float(mandatory=True, desc='Head radius for FD calculation, in mm.')
-    bold_mask_inputspace = traits.Either(
-        None,
-        File(exists=True),
+    high_pass = traits.Float(
         mandatory=True,
-        desc=(
-            'Mask file from NIfTI. May be None, for CIFTI processing. '
-            'The mask is in the same space as the BOLD data, which may not be the same as the '
-            'bold_mask_stdspace file. '
-            'Used to load the masked BOLD data. Not used for QC metrics.'
-        ),
+        desc='High-pass filter cutoff, in Hz.',
+    )
+    low_pass = traits.Float(
+        mandatory=True,
+        desc='Low-pass filter cutoff, in Hz.',
     )
 
     # Inputs used only for nifti data
@@ -219,6 +221,17 @@
         mandatory=False,
         desc='BOLD mask in anatomical space. Used to calculate coregistration QC metrics.',
     )
+    bold_mask_inputspace = traits.Either(
+        None,
+        File(exists=True),
+        mandatory=True,
+        desc=(
+            'Mask file from NIfTI. May be None, for CIFTI processing. '
+            'The mask is in the same space as the BOLD data, which may not be the same as the '
+            'bold_mask_stdspace file. '
+            'Used to load the masked BOLD data. Not used for QC metrics.'
+        ),
+    )
     bold_mask_stdspace = File(
         exists=True,
         mandatory=False,
@@ -247,9 +260,14 @@
         motion_df = pd.read_table(self.inputs.motion_file)
         preproc_fd = motion_df['framewise_displacement'].to_numpy()
         rmsd = motion_df['rmsd'].to_numpy()
+        if isdefined(self.inputs.design_matrix):
+            design_matrix = pd.read_table(self.inputs.design_matrix)
+            num_dof_used_by_denoising = design_matrix.shape[1] + 2  # intercept + linear detrend
+        else:
+            num_dof_used_by_denoising = np.nan
 
         # Determine number of dummy volumes and load temporal mask
-        dummy_scans = self.inputs.dummy_scans
+        num_dummy_scans = self.inputs.dummy_scans  # not included in temporal mask
         if isdefined(self.inputs.temporal_mask):
             censoring_df = pd.read_table(self.inputs.temporal_mask)
             tmask_arr = censoring_df['framewise_displacement'].values
@@ -258,6 +276,12 @@
 
         num_censored_volumes = int(tmask_arr.sum())
         num_retained_volumes = int((tmask_arr == 0).sum())
+        num_dof_lost_by_filter = calculate_dof(
+            n_volumes=motion_df.shape[0],
+            t_r=self.inputs.TR,
+            high_pass=self.inputs.high_pass or 0,
+            low_pass=self.inputs.low_pass or np.inf,
+        )
 
         # Apply temporal mask to interpolated/full data
         rmsd_censored = rmsd[tmask_arr == 0]
@@ -307,9 +331,11 @@
                 'max_relative_rms': [rmsd_max_value],
                 'mean_dvars_initial': [mean_dvars_before_processing],
                 'mean_dvars_final': [mean_dvars_after_processing],
-                'num_dummy_volumes': [dummy_scans],
+                'num_dummy_volumes': [num_dummy_scans],
                 'num_censored_volumes': [num_censored_volumes],
                 'num_retained_volumes': [num_retained_volumes],
+                'num_dof_used_by_denoising': [num_dof_used_by_denoising],
+                'num_dof_used_by_filter': [num_dof_lost_by_filter],
                 'fd_dvars_correlation_initial': [fd_dvars_correlation_initial],
                 'fd_dvars_correlation_final': [fd_dvars_correlation_final],
             }
@@ -376,6 +402,27 @@
                     'The number of non-steady state volumes removed from the time series by XCP-D.'
                 ),
             },
+            'num_dof_used_by_denoising': {
+                'LongName': 'Number of Degrees of Freedom Lost by Denoising',
+                'Description': (
+                    'The number of confounds used in the denoising step, plus intercept and '
+                    'linear trend. '
+                    'To estimate the degrees of freedom for bivariate correlations between '
+                    'censored, denoised time series (ignoring the effect of autocorrelation), do '
+                    'num_retained_volumes - (num_dof_used_by_denoising + num_dof_used_by_filter '
+                    '+ 2).'
+                ),
+            },
+            'num_dof_used_by_filter': {
+                'LongName': 'Number of Degrees of Freedom Lost by Temporal Filter',
+                'Description': (
+                    'The degrees of freedom used up by the temporal filter. '
+                    'To estimate the degrees of freedom for bivariate correlations between '
+                    'censored, denoised time series (ignoring the effect of autocorrelation), do '
+                    'num_retained_volumes - (num_dof_used_by_denoising + num_dof_used_by_filter '
+                    '+ 2).'
+                ),
+            },
             'num_censored_volumes': {
                 'LongName': 'Number of Censored Volumes',
                 'Description': (
@@ -387,7 +434,11 @@
                 'LongName': 'Number of Retained Volumes',
                 'Description': (
                     'The number of volumes retained in the denoised dataset. '
-                    'This does not include dummy volumes or high-motion outliers.'
+                    'This does not include dummy volumes or high-motion outliers. '
+                    'To estimate the degrees of freedom for bivariate correlations between '
+                    'censored, denoised time series (ignoring the effect of autocorrelation), do '
+                    'num_retained_volumes - (num_dof_used_by_denoising + num_dof_used_by_filter '
+                    '+ 2).'
                 ),
             },
             'fd_dvars_correlation_initial': {

diff --git a/xcp_d/utils/modified_data.py b/xcp_d/utils/modified_data.py
@@ -173,10 +173,6 @@
     post_scrubbing_duration : :obj:`float`
         Amount of time remaining in the run after dummy scan removal, in seconds.
     """
-    if fd_thresh <= 0:
-        # No scrubbing will be performed, so there's no point is calculating amount of "good time".
-        return np.inf
-
     dummy_scans = _infer_dummy_scans(
         dummy_scans=dummy_scans,
         confounds_file=motion_file,
@@ -188,27 +184,73 @@
     # Remove dummy volumes
     fmriprep_confounds_df = fmriprep_confounds_df.drop(np.arange(dummy_scans))
 
-    # Calculate filtered FD
-    band_stop_min_adjusted, band_stop_max_adjusted, _ = _modify_motion_filter(
-        motion_filter_type=motion_filter_type,
-        band_stop_min=band_stop_min,
-        band_stop_max=band_stop_max,
-        TR=TR,
-    )
-    motion_df = load_motion(
-        fmriprep_confounds_df,
-        TR=TR,
-        motion_filter_type=motion_filter_type,
-        motion_filter_order=motion_filter_order,
-        band_stop_min=band_stop_min_adjusted,
-        band_stop_max=band_stop_max_adjusted,
-    )
-    fd_arr = compute_fd(
-        confound=motion_df,
-        head_radius=head_radius,
-        filtered=bool(motion_filter_type),
-    )
-    return np.sum(fd_arr <= fd_thresh) * TR
+    retained_sec = np.inf
+    if fd_thresh > 0:
+        # Calculate filtered FD
+        band_stop_min_adjusted, band_stop_max_adjusted, _ = _modify_motion_filter(
+            motion_filter_type=motion_filter_type,
+            band_stop_min=band_stop_min,
+            band_stop_max=band_stop_max,
+            TR=TR,
+        )
+        motion_df = load_motion(
+            fmriprep_confounds_df,
+            TR=TR,
+            motion_filter_type=motion_filter_type,
+            motion_filter_order=motion_filter_order,
+            band_stop_min=band_stop_min_adjusted,
+            band_stop_max=band_stop_max_adjusted,
+        )
+        fd_arr = compute_fd(
+            confound=motion_df,
+            head_radius=head_radius,
+            filtered=bool(motion_filter_type),
+        )
+        retained_sec = np.sum(fd_arr <= fd_thresh) * TR
+
+    return retained_sec
+
+
+def calculate_dof(n_volumes, t_r, high_pass=0, low_pass=np.inf):
+    """Calculate the number of degrees of freedom lost by a temporal filter.
+
+    Parameters
+    ----------
+    n_volumes : int
+        Number of data points in the time series.
+    t_r : float
+        Repetition time of the time series, in seconds.
+    high_pass : float
+        High-pass frequency in Hertz. Default is 0 (no high-pass filter).
+    low_pass : float or numpy.inf
+        Low-pass frequency in Hertz. Default is np.inf (no low-pass filter).
+
+    Returns
+    -------
+    dof_lost : int
+        Number of degrees of freedom lost by applying the filter.
+
+    Notes
+    -----
+    Both Caballero-Gaudes & Reynolds (2017) and Reynolds et al. (preprint)
+    say that each frequency removed drops two degrees of freedom.
+    """
+    import numpy as np
+
+    duration = t_r * n_volumes
+    fs = 1 / t_r
+    nyq = 0.5 * fs
+    spacing = 1 / duration
+    n_freqs = int(np.ceil(nyq / spacing))
+    frequencies_hz = np.linspace(0, nyq, n_freqs)
+
+    # Figure out what the change in DOF is from the bandpass filter
+    dropped_freqs_idx = np.where((frequencies_hz < high_pass) | (frequencies_hz > low_pass))[0]
+    n_dropped_freqs = dropped_freqs_idx.size
+
+    # Calculate the lost degrees of freedom
+    dof_lost = n_dropped_freqs * 2
+    return dof_lost
 
 
 def calculate_exact_scans(exact_times, scan_length, t_r, bold_file):