Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added graph showing intra-subject variability (COV) as a function of atrophy estimation error #87

Merged
merged 2 commits into from
Jan 16, 2021
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 86 additions & 3 deletions csa_rescale_stat.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,77 @@ def sample_size(df, config_param):
sample_size.append('inf')
return sample_size

def error_function_of_intra_cov(df, path_output):
"""Scatter plot of intra-subject COV in function of error %
:param df: dataframe for computing stats per subject: df_sub
:param path_output: directory in which plot is saved
"""
fig, ax = plt.subplots(figsize=(7, 7))
# remove rescale=1 because error=0
df = df[df['rescale'] != 1]
# compute linear regression
z = np.polyfit(x=df.loc[:, 'perc_error'], y=df.loc[:, 'cov'], deg=1)
p = np.poly1d(z)
# plot
df.plot.scatter(x='perc_error', y='cov', c='rescale', colormap='viridis')
min_err = min(df['perc_error'].values)
max_err = max(df['perc_error'].values)
plt.plot([min_err, max_err], [min_err*z[0]+z[1], max_err*z[0]+z[1]], ls="--", c=".3")
ax.set_xlabel('perc_error')
ax.set_ylabel('cov')
plt.title('COV in function of % error,\n linear regression: {}'.format(p))
plt.title("COV in function of % error")
ax.legend(loc='upper right')
plt.grid()
output_file = os.path.join(path_output, "fig_err_in_function_of_cov.png")
plt.savefig(output_file, bbox_inches='tight')
print("--> Created figure: {}".format(output_file))


def error_function_of_intra_cov_outlier(df, path_output):
"""Scatter plot of intra-subject COV in function of error % to identify outliers with either high error %
or high intra-subject COV
:param df: dataframe for computing stats per subject: df_sub
:param path_output: directory in which plot is saved
"""
fig, ax = plt.subplots(figsize=(7, 7))
# identified outliers either high error % or high intra-subject COV of t1 images
outliers_t1_all = ['sub-brnoUhb01', 'sub-brnoUhb02', 'sub-brnoUhb03', 'sub-brnoUhb06', 'sub-brnoUhb07', 'sub-brnoUhb08',
'sub-barcelona04', 'sub-barcelona06', 'sub-beijingPrisma03', 'sub-milan03', 'sub-oxfordFmrib01',
'sub-tokyo750w03', 'sub-pavia04']
# identified t1 outliers remaining after subjects removed due to missing vertebrae levels missing CSA
outliers_t1 = ['sub-brnoUhb01', 'sub-brnoUhb08', 'sub-milan03', 'sub-oxfordFmrib01', 'sub-cmrrb05',
'sub-tokyo750w03', 'sub-pavia04']
# identified outliers either high error % or high intra-subject COV of t2 images
outliers_t2_all = ['sub-tokyo750w02', 'sub-tokyo750w04', 'sub-tokyo750w06', 'sub-beijingVerio02',
'sub-beijingVerio03', 'sub-ubc02', 'sub-vuiisIngenia05']
# identified t2 outliers remaining after subjects removed due to missing vertebrae levels missing CSA
outliers_t2 = ['sub-tokyo750w02', 'sub-tokyo750w04', 'sub-tokyo750w06', 'sub-beijingVerio02', 'sub-beijingVerio03',
'sub-ubc02', 'sub-vuiisIngenia05']
# remove rescale=1 because error=0
df = df[df['rescale'] != 1]
ax.scatter(df['perc_error'], df['cov'], color='tab:blue', label='others')
# plot scatter outliers of t1w images
for outlier_t1 in outliers_t1:
df_t1 = df.groupby(['subject']).get_group(outlier_t1)
ax.scatter(df_t1['perc_error'], df_t1['cov'], color='tab:red', label=outlier_t1)
df_t1 = []
# plot scatter outliers of t2w images
for outlier_t2 in outliers_t2:
df_t2 = df.groupby(['subject']).get_group(outlier_t2)
ax.scatter(df_t2['perc_error'], df_t2['cov'], color='tab:olive', label=outlier_t2)
df_t2 = []
# plot
ax.set_xlabel('perc_error')
ax.set_ylabel('cov')
plt.title("COV in function of error %")
ax.legend(loc='upper right')
plt.grid()
# save image
output_file = os.path.join(path_output, "fig_err_in_function_of_cov_outlier.png")
plt.savefig(output_file, bbox_inches='tight')
print("--> Created figure: {}".format(output_file))


def add_columns_df_sub(df):
""" Add columns theoretic CSA values (rX^2 * MEAN(area)) and CSA without rescaling to dataframe
Expand All @@ -257,6 +328,7 @@ def add_columns_df_sub(df):
group.loc[subject, 'theoretic_csa'] = csa_without_rescale.loc[subject, 'mean'] * (rescale ** 2)
df.loc[rescale, 'theoretic_csa'] = group['theoretic_csa'].values
df.loc[rescale, 'csa_without_rescale'] = csa_without_rescale['mean'].values
df.loc[rescale, 'csa_without_rescale'] = csa_without_rescale['mean'].values
df = df.reset_index()
return df

Expand Down Expand Up @@ -285,6 +357,9 @@ def main():
# identify rows with missing values
print("Remove rows with missing values...")
lines_to_drop = df_vert[df_vert['MEAN(area)'] == 'None'].index
df_vert['subject'] = list(sub.split('data_processed/')[1].split('/anat')[0] for sub in df_vert['Filename'])

# remove rows with missing values
df_vert = df_vert.drop(df_vert.index[lines_to_drop])
df_vert['MEAN(area)'] = pd.to_numeric(df_vert['MEAN(area)'])
print(" Rows removed: {}".format(lines_to_drop))
Expand All @@ -294,7 +369,7 @@ def main():

# add useful columns to dataframe
df_vert['basename'] = list(os.path.basename(path).split('.nii.gz')[0] for path in df_vert['Filename'])
df_vert['rescale'] = list(float(b.split('crop_r')[1].split('_')[0]) for b in df_vert['basename'])
df_vert['rescale'] = list(float(b.split('RPI_r_r')[1].split('_')[0]) for b in df_vert['basename'])
df_vert['slices'] = list(int(slices.split(':')[1]) - int(slices.split(':')[0]) + 1 for slices in df_vert['Slice (I->S)'])

# verify if vertlevels of interest were given in input by user
Expand Down Expand Up @@ -338,8 +413,10 @@ def main():
df_sub = add_columns_df_sub(df_sub)
df_sub['rescale_estimated'] = df_sub['mean'].div(df_sub['csa_without_rescale'])
df_sub['error'] = (df_sub['mean'] - df_sub['theoretic_csa']).abs()
df_sub['perc_error'] = 100 * (df_sub['mean'] - df_sub['theoretic_csa']).abs().div(df_sub['mean'])
df_sub['perc_error'] = 100 * (df_sub['mean'] - df_sub['theoretic_csa']).abs().div(df_sub['theoretic_csa'])
print(df_sub)
# save dataframe in a csv file
df_sub.to_csv(os.path.join(path_output, r'csa_sub.csv'))

# Create dataframe for computing stats across subject: df_rescale
print("\n==================== rescaling_dataframe ==========================\n")
Expand All @@ -356,9 +433,12 @@ def main():
df_rescale['mean_rescale_estimated'] = df_sub.groupby('rescale').mean()['rescale_estimated'].values
df_rescale['std_rescale_estimated'] = df_sub.groupby('rescale').std()['rescale_estimated'].values
df_rescale['mean_perc_error'] = df_sub.groupby('rescale').mean()['perc_error'].values
df_rescale['mean_error'] = df_sub.groupby('rescale').mean()['error'].values
df_rescale['std_perc_error'] = df_sub.groupby('rescale').std()['perc_error'].values
df_rescale['sample_size'] = sample_size(df_rescale, config_param)
print(df_rescale)
# save dataframe in a csv file
df_sub.to_csv(os.path.join(path_output, r'csa_transfo.csv'))

# plot graph if verbose is present
if arguments.fig:
Expand All @@ -382,7 +462,10 @@ def main():
# std = STD of subjects without rescaling CSA values
# mean_csa = mean CSA value of subjects without rescaling
plot_sample_size(z_conf=z_score_confidence, z_power=z_score_power, std=df_rescale.loc[1, 'std_inter'], mean_csa=df_rescale.loc[1, 'mean_inter'], path_output=path_output)

# scatter plot of COV in function of error %
error_function_of_intra_cov(df_sub, path_output=path_output)
# scatter plot of COV in function of error % to identify outliers
error_function_of_intra_cov_outlier(df_sub, path_output=path_output)

if __name__ == "__main__":
main()