From fdd81a1947cb0531a6fc00e6bf799e720aebce38 Mon Sep 17 00:00:00 2001 From: paul Date: Mon, 25 Jan 2021 11:52:54 -0500 Subject: [PATCH 01/40] update plots to match manuscript - update plots: sample_size, error_function_of_csa - update legends: boxplot_atrophy, boxplot_csa --- csa_rescale_stat.py | 90 +++++++++++++++++++++++++++++---------------- 1 file changed, 58 insertions(+), 32 deletions(-) diff --git a/csa_rescale_stat.py b/csa_rescale_stat.py index 1894186..9f43588 100755 --- a/csa_rescale_stat.py +++ b/csa_rescale_stat.py @@ -128,8 +128,8 @@ def boxplot_csa(df, path_output): df.boxplot(column=['mean'], by='rescale_area', showmeans=True, meanline=True) plt.title('Boxplot of CSA in function of area rescaling') plt.suptitle("") - plt.ylabel('csa in mm^2') - plt.xlabel('area rescaling in %') + plt.ylabel('CSA in mm^2') + plt.xlabel('CSA scaling') output_file = os.path.join(path_output, "fig_boxplot_csa.png") plt.savefig(output_file) print("--> Created figure: {}".format(output_file)) @@ -149,55 +149,76 @@ def boxplot_atrophy(df, path_output): min_rescale = min(df['rescale_area'].values) max_rescale = max(df['rescale_area'].values) plt.plot([min_rescale, max_rescale], [min_rescale, max_rescale], ls="--", c=".3") - plt.title('boxplot of measured atrophy in function of area rescaling') - plt.ylabel('measured atrophy in %') - plt.xlabel('area rescaling in %') - plt.suptitle("") - # TODO: scale x and y similarly - # TODO: add diagonal (remove horizontal lines) + plt.title('Boxplot of measured atrophy in function of CSA scaling') + plt.ylabel('Atrophied CSA in % of the un-rescaled CSA') + plt.xlabel('CSA scaling') + plt.axis('scaled') output_file = os.path.join(path_output, "fig_boxplot_atrophy.png") plt.savefig(output_file) print("--> Created figure: {}".format(output_file)) -def plot_sample_size(z_conf, z_power, std, mean_csa, path_output): +def plot_sample_size(z_conf, z_power, std_arr, mean_csa, path_output): """plot minimum number of patients required to detect an atrophy of a given value :param z_conf: z score for X % uncertainty. Example: z_conf=1.96 :param z_power: z score for X % Power. Example: z_power=(0.84, 1.282) - :param std: STD around mean CSA of control subjects (without rescaling), + :param std_arr: STD around mean CSA of control subjects (without rescaling), CSA STD for atrophied subjects and control subjects are considered equal :param mean_csa: mean value of CSA to compute the atrophy percentage. Example: 80 :param path_output: directory in which plot is saved """ fig_sample, ax = plt.subplots() # data for plotting - n = [] + n_t1 = [] + n_t2 =[] for z_p in z_power: - atrophy = np.arange(1.5, 8.0, 0.05) # x_axis values ranging from 1.5 to 8.0 mm^2 - num_n = 2 * ((z_conf + z_p) ** 2) * (std ** 2) # numerator of sample size equation - n.append(num_n / ((atrophy) ** 2)) + # x_axis values ranging from 1.5 to 8.0 mm^2 + atrophy = np.arange(1.5, 8.0, 0.05) + # numerator of sample size equation T1w + num_n_t1 = 2 * ((z_conf + z_p) ** 2) * (std_arr[0] ** 2) + n_t1.append(num_n_t1 / (atrophy ** 2)) + # numerator of sample size equation T2w + num_n_t2 = 2 * ((z_conf + z_p) ** 2) * (std_arr[1] ** 2) + n_t2.append(num_n_t2 / (atrophy ** 2)) # plot - ax.plot(atrophy, n[0], label='80% power') - ax.plot(atrophy, n[1], label='90% power') + ax.plot(atrophy / mean_csa[0] * 100, n_t1[0], 'b.', markevery=3, markersize=10, label='t1 80% power') + ax.plot(atrophy / mean_csa[0] * 100, n_t1[1], 'r.', markevery=3, markersize=10, label='t1 90% power') + ax.plot(atrophy / mean_csa[1] * 100, n_t2[0], 'c', label='t2 80% power') + ax.plot(atrophy / mean_csa[1] * 100, n_t2[1], 'm', label='t2 90% power') ax.set_ylabel('number of participants per group of study \n(patients or controls) with ratio 1:1') - ax.set_xlabel('atrophy in mm^2') - # create global variable for secax (second axis) functions - global mean_csa_sample - mean_csa_sample = mean_csa - - ax.set_title('minimum number of participants to detect an atrophy with 5% uncertainty\n std = ' + str( - round(std, 2)) + 'mm², mean_csa = ' + str(mean_csa_sample) + 'mm²') + ax.set_xlabel('atrophy in %') + plt.suptitle('minimum number of participants to detect an atrophy with 5% uncertainty', fontsize=16, y=1.05) + ax.set_title('T1w (1.0 mm iso): SD = {} mm², mean CSA= {} mm² \nT2w (0.8 mm iso): SD = {} mm², mean CSA= {} mm²'.format( + str( + round(std_arr[0], 2)), str(mean_csa[0]), str(round(std_arr[1], 2)) , str(mean_csa[1]))) ax.legend() ax.grid() - def forward(atrophy): - return atrophy / mean_csa_sample * 100 + output_file = os.path.join(path_output, "fig_min_subj.png") + plt.savefig(output_file, bbox_inches='tight') + print("--> Created figure: {}".format(output_file)) - def inverse(atrophy): - return atrophy / 100 * mean_csa_sample - secax = ax.secondary_xaxis('top', functions=(forward, inverse)) - secax.set_xlabel('atrophy in %') - output_file = os.path.join(path_output, "fig_min_subj.png") +def error_function_of_csa(df, path_output): + """Scatter plot of CSA in function of error % + :param df: dataframe for computing stats per subject: df_sub + :param path_output: directory in which plot is saved + """ + fig, ax = plt.subplots(figsize=(7, 7)) + df['Normalized CSA in mm2'] = df['mean'].div(df['rescale']) + df['error %'] = df['perc_error'] + # compute linear regression + z = np.polyfit(x=df.loc[:, 'error %'], y=df.loc[:, 'Normalized CSA in mm2'], deg=1) + p = np.poly1d(z) + # plot + df.plot.scatter(x='error %', y='Normalized CSA in mm2', c='rescale', colormap='viridis') + min_err = min(df['error %'].values) + max_err = max(df['error %'].values) + plt.plot([min_err, max_err], [min_err*z[0]+z[1], max_err*z[0]+z[1]], ls="--", c=".3") + plt.title('Normalized CSA in function of error %,\n linear regression: {}'.format(p)) + # plt.title("CSA in function of % error") + ax.legend(loc='upper right') + plt.grid() + output_file = os.path.join(path_output, "fig_err_in_function_of_csa.png") plt.savefig(output_file, bbox_inches='tight') print("--> Created figure: {}".format(output_file)) @@ -430,6 +451,8 @@ def main(): df_rescale['std_intra'] = df_sub.groupby('rescale').mean()['std'].values df_rescale['cov_intra'] = df_sub.groupby('rescale').mean()['cov'].values df_rescale['std_inter'] = df_sub.groupby('rescale').std()['mean'].values + df_rescale['cov_inter'] = df_sub.groupby('rescale').std()['mean'].div( + df_sub.groupby('rescale').mean()['mean']).values df_rescale['mean_rescale_estimated'] = df_sub.groupby('rescale').mean()['rescale_estimated'].values df_rescale['std_rescale_estimated'] = df_sub.groupby('rescale').std()['rescale_estimated'].values df_rescale['mean_perc_error'] = df_sub.groupby('rescale').mean()['perc_error'].values @@ -438,7 +461,7 @@ def main(): df_rescale['sample_size'] = sample_size(df_rescale, config_param) print(df_rescale) # save dataframe in a csv file - df_sub.to_csv(os.path.join(path_output, r'csa_transfo.csv')) + df_rescale.to_csv(os.path.join(path_output, r'csa_rescale.csv')) # plot graph if verbose is present if arguments.fig: @@ -461,11 +484,14 @@ def main(): z_score_power = config_param['fig']['sample_size']['power'] # std = STD of subjects without rescaling CSA values # mean_csa = mean CSA value of subjects without rescaling - plot_sample_size(z_conf=z_score_confidence, z_power=z_score_power, std=df_rescale.loc[1, 'std_inter'], mean_csa=df_rescale.loc[1, 'mean_inter'], path_output=path_output) + plot_sample_size(z_conf=z_score_confidence , z_power=z_score_power , std_arr=[7.56 , 8.29] , + mean_csa=[69.70 , 76.12] , path_output=path_output) # scatter plot of COV in function of error % error_function_of_intra_cov(df_sub, path_output=path_output) # scatter plot of COV in function of error % to identify outliers error_function_of_intra_cov_outlier(df_sub, path_output=path_output) + # scatter plot of CSA in function of error % + error_function_of_csa(df_sub, path_output=path_output) if __name__ == "__main__": main() From a1a3d3c246ffcae002158ab444f56b678f802725 Mon Sep 17 00:00:00 2001 From: paul Date: Thu, 11 Mar 2021 13:09:06 -0500 Subject: [PATCH 02/40] add: - expected trend and mean values on CSA boxplot - mean values on atrophy boxplot - add function for adding pearson's r and p-value stats - add diff and std diff column in dataframe for sample size computation - add longitudinal study sample size change: - correct difference between means (atrophy_% * CSA) on sample size plot - correct x_label on error_function_of_CSA - correct x_label on error_function_of_intra_cov - automatize the detection of outliers on error_function_of_intra_cov_outlier --- csa_rescale_stat.py | 186 ++++++++++++++++++++++++-------------------- 1 file changed, 103 insertions(+), 83 deletions(-) diff --git a/csa_rescale_stat.py b/csa_rescale_stat.py index 9f43588..d84e19f 100755 --- a/csa_rescale_stat.py +++ b/csa_rescale_stat.py @@ -22,6 +22,7 @@ import matplotlib.pyplot as plt from math import ceil from ruamel.yaml import YAML +from scipy import stats # Parser @@ -120,12 +121,16 @@ def boxplot_csa(df, path_output): :param df: dataframe with csv files data: df_sub :param path_output: directory in which plot is saved """ - # TODO: round xlabel - # TODO: find a way to display ylabel title with superscript fig2 = plt.figure() + meanpointprops = dict(marker='x' , markeredgecolor='red' , + markerfacecolor='red') # round rescale area df['rescale_area'] = round(df['rescale_area'], ndigits=0).astype(int) - df.boxplot(column=['mean'], by='rescale_area', showmeans=True, meanline=True) + df.boxplot(column=['mean'], by='rescale_area', positions=sorted(set(df['rescale_area'].values)), showmeans=True, meanprops=meanpointprops) + min_rescale = min(df['rescale_area'].values) + max_rescale = max(df['rescale_area'].values) + max_y = df.groupby('rescale').get_group(1).mean()['mean'] + plt.plot([min_rescale, max_rescale], [max_y * (0.93 ** 2) , max_y] , ls="--" , c=".3") plt.title('Boxplot of CSA in function of area rescaling') plt.suptitle("") plt.ylabel('CSA in mm^2') @@ -145,11 +150,14 @@ def boxplot_atrophy(df, path_output): df['rescale_estimated'] = df['rescale_estimated'] * 100 # round rescale area df['rescale_area'] = round(df['rescale_area'], ndigits=0).astype(int) - df.boxplot(column='rescale_estimated', by='rescale_area', positions=sorted(set(df['rescale_area'].values)), showmeans=True, meanline=True, figsize=(10,8)) + meanpointprops = dict(marker='x' , markeredgecolor='red' , + markerfacecolor='red') + df.boxplot(column='rescale_estimated', by='rescale_area', positions=sorted(set(df['rescale_area'].values)), showmeans=True, meanline=False, figsize=(10,8), meanprops=meanpointprops) min_rescale = min(df['rescale_area'].values) max_rescale = max(df['rescale_area'].values) plt.plot([min_rescale, max_rescale], [min_rescale, max_rescale], ls="--", c=".3") plt.title('Boxplot of measured atrophy in function of CSA scaling') + plt.suptitle("") plt.ylabel('Atrophied CSA in % of the un-rescaled CSA') plt.xlabel('CSA scaling') plt.axis('scaled') @@ -164,7 +172,7 @@ def plot_sample_size(z_conf, z_power, std_arr, mean_csa, path_output): :param z_power: z score for X % Power. Example: z_power=(0.84, 1.282) :param std_arr: STD around mean CSA of control subjects (without rescaling), CSA STD for atrophied subjects and control subjects are considered equal - :param mean_csa: mean value of CSA to compute the atrophy percentage. Example: 80 + :param mean_csa: mean value of CSA to compute the atrophy percentage. Example: 80 mm^2 :param path_output: directory in which plot is saved """ fig_sample, ax = plt.subplots() @@ -176,10 +184,10 @@ def plot_sample_size(z_conf, z_power, std_arr, mean_csa, path_output): atrophy = np.arange(1.5, 8.0, 0.05) # numerator of sample size equation T1w num_n_t1 = 2 * ((z_conf + z_p) ** 2) * (std_arr[0] ** 2) - n_t1.append(num_n_t1 / (atrophy ** 2)) + n_t1.append(num_n_t1 / ((0.01*atrophy*mean_csa[0]) ** 2)) # numerator of sample size equation T2w num_n_t2 = 2 * ((z_conf + z_p) ** 2) * (std_arr[1] ** 2) - n_t2.append(num_n_t2 / (atrophy ** 2)) + n_t2.append(num_n_t2 / ((0.01*atrophy*mean_csa[1]) ** 2)) # plot ax.plot(atrophy / mean_csa[0] * 100, n_t1[0], 'b.', markevery=3, markersize=10, label='t1 80% power') ax.plot(atrophy / mean_csa[0] * 100, n_t1[1], 'r.', markevery=3, markersize=10, label='t1 90% power') @@ -199,23 +207,22 @@ def plot_sample_size(z_conf, z_power, std_arr, mean_csa, path_output): def error_function_of_csa(df, path_output): - """Scatter plot of CSA in function of error % + """Scatter plot of CSA as a function of Mean error % :param df: dataframe for computing stats per subject: df_sub :param path_output: directory in which plot is saved """ fig, ax = plt.subplots(figsize=(7, 7)) - df['Normalized CSA in mm2'] = df['mean'].div(df['rescale']) - df['error %'] = df['perc_error'] + df['Normalized CSA in mm²'] = df['mean'].div(df['rescale']) # compute linear regression - z = np.polyfit(x=df.loc[:, 'error %'], y=df.loc[:, 'Normalized CSA in mm2'], deg=1) + z = np.polyfit(x=df.loc[:, 'perc_error'], y=df.loc[:, 'Normalized CSA in mm²'], deg=1) p = np.poly1d(z) # plot - df.plot.scatter(x='error %', y='Normalized CSA in mm2', c='rescale', colormap='viridis') - min_err = min(df['error %'].values) - max_err = max(df['error %'].values) + df.plot.scatter(x='perc_error', y='Normalized CSA in mm²', c='rescale', colormap='viridis') + min_err = min(df['perc_error'].values) + max_err = max(df['perc_error'].values) plt.plot([min_err, max_err], [min_err*z[0]+z[1], max_err*z[0]+z[1]], ls="--", c=".3") plt.title('Normalized CSA in function of error %,\n linear regression: {}'.format(p)) - # plt.title("CSA in function of % error") + plt.xlabel('Mean error %') ax.legend(loc='upper right') plt.grid() output_file = os.path.join(path_output, "fig_err_in_function_of_csa.png") @@ -223,40 +230,6 @@ def error_function_of_csa(df, path_output): print("--> Created figure: {}".format(output_file)) -def sample_size(df, config_param): - """ - Calculate the minimum number of patients required to detect an atrophy of a given value (i.e. power analysis), - ratio patients/control 1:1 and with the assumption that both samples have the same STD. - ref: Suresh and Chandrashekara 2012. “Sample size estimation and power analysis for clinical research studies” - doi: 10.4103/0974-1208.97779 - :param df: dataframe for computing stats across subject: df_rescale - :param config_param: configuration parameters can be modified in config.yaml file. Example conf = 0.95 - :return sample_size: sample size for each rescaling - """ - sample_size = [] - # configuration parameters can be modified in config.yaml file - # conf = confidence level - conf = config_param['stats']['sample_size']['conf'] - # power = power level - power = config_param['stats']['sample_size']['power'] - z_score_dict = {'confidence_Level': [0.60, 0.70, 0.8, 0.85, 0.90, 0.95], - 'z_value': [0.842, 1.04, 1.28, 1.44, 1.64, 1.96], } - - df_z = pd.DataFrame(z_score_dict) - df_z = df_z.set_index('confidence_Level') - for name, group in df.groupby('rescale'): - std = group['std_inter'].values[0] - mean_patient = group['mean_inter'].values[0] - mean_control = df.groupby('rescale').get_group(1)['mean_inter'].values[0] - atrophy = mean_control - mean_patient - if atrophy != 0: - num_n = 2 * ((df_z.at[conf, 'z_value'] + df_z.at[power, 'z_value']) ** 2) * (std ** 2) - deno_n = (abs(atrophy)) ** 2 - sample_size.append(ceil(num_n / deno_n)) - else: - sample_size.append('inf') - return sample_size - def error_function_of_intra_cov(df, path_output): """Scatter plot of intra-subject COV in function of error % :param df: dataframe for computing stats per subject: df_sub @@ -273,10 +246,9 @@ def error_function_of_intra_cov(df, path_output): min_err = min(df['perc_error'].values) max_err = max(df['perc_error'].values) plt.plot([min_err, max_err], [min_err*z[0]+z[1], max_err*z[0]+z[1]], ls="--", c=".3") - ax.set_xlabel('perc_error') - ax.set_ylabel('cov') + plt.xlabel('Mean error %') + plt.ylabel('COV') plt.title('COV in function of % error,\n linear regression: {}'.format(p)) - plt.title("COV in function of % error") ax.legend(loc='upper right') plt.grid() output_file = os.path.join(path_output, "fig_err_in_function_of_cov.png") @@ -285,42 +257,30 @@ def error_function_of_intra_cov(df, path_output): def error_function_of_intra_cov_outlier(df, path_output): - """Scatter plot of intra-subject COV in function of error % to identify outliers with either high error % - or high intra-subject COV + """Scatter plot of intra-subject COV in function of error % to identify the worst outliers (outside the interval + [Q1-10IQR, Q3+10IQR] of percentage error) :param df: dataframe for computing stats per subject: df_sub :param path_output: directory in which plot is saved """ fig, ax = plt.subplots(figsize=(7, 7)) - # identified outliers either high error % or high intra-subject COV of t1 images - outliers_t1_all = ['sub-brnoUhb01', 'sub-brnoUhb02', 'sub-brnoUhb03', 'sub-brnoUhb06', 'sub-brnoUhb07', 'sub-brnoUhb08', - 'sub-barcelona04', 'sub-barcelona06', 'sub-beijingPrisma03', 'sub-milan03', 'sub-oxfordFmrib01', - 'sub-tokyo750w03', 'sub-pavia04'] - # identified t1 outliers remaining after subjects removed due to missing vertebrae levels missing CSA - outliers_t1 = ['sub-brnoUhb01', 'sub-brnoUhb08', 'sub-milan03', 'sub-oxfordFmrib01', 'sub-cmrrb05', - 'sub-tokyo750w03', 'sub-pavia04'] - # identified outliers either high error % or high intra-subject COV of t2 images - outliers_t2_all = ['sub-tokyo750w02', 'sub-tokyo750w04', 'sub-tokyo750w06', 'sub-beijingVerio02', - 'sub-beijingVerio03', 'sub-ubc02', 'sub-vuiisIngenia05'] - # identified t2 outliers remaining after subjects removed due to missing vertebrae levels missing CSA - outliers_t2 = ['sub-tokyo750w02', 'sub-tokyo750w04', 'sub-tokyo750w06', 'sub-beijingVerio02', 'sub-beijingVerio03', - 'sub-ubc02', 'sub-vuiisIngenia05'] + Q1 = df['perc_error'].quantile(0.25) + Q3 = df['perc_error'].quantile(0.75) + # IQR = inter-quartile range. + IQR = Q3 - Q1 + # identified outliers either high error % or high intra-subject COV + outliers = set(df[(df['perc_error'] <= Q1 - 10 * IQR) | (df['perc_error'] >= Q3 + 10 * IQR)]['subject']) # remove rescale=1 because error=0 df = df[df['rescale'] != 1] ax.scatter(df['perc_error'], df['cov'], color='tab:blue', label='others') - # plot scatter outliers of t1w images - for outlier_t1 in outliers_t1: - df_t1 = df.groupby(['subject']).get_group(outlier_t1) - ax.scatter(df_t1['perc_error'], df_t1['cov'], color='tab:red', label=outlier_t1) + # scatter outliers + for outlier in outliers: + df_t1 = df.groupby(['subject']).get_group(outlier) + ax.scatter(df_t1['perc_error'], df_t1['cov'], color='tab:red', label=outlier) df_t1 = [] - # plot scatter outliers of t2w images - for outlier_t2 in outliers_t2: - df_t2 = df.groupby(['subject']).get_group(outlier_t2) - ax.scatter(df_t2['perc_error'], df_t2['cov'], color='tab:olive', label=outlier_t2) - df_t2 = [] # plot - ax.set_xlabel('perc_error') - ax.set_ylabel('cov') - plt.title("COV in function of error %") + ax.set_xlabel('Mean error %') + ax.set_ylabel('COV') + plt.title("Intra subject COV as a function of mean absolute error %") ax.legend(loc='upper right') plt.grid() # save image @@ -353,6 +313,64 @@ def add_columns_df_sub(df): df = df.reset_index() return df +def pearson(df, df_rescale): + """ The associated Pearson’s coefficients and p-value between subject’s CSA and the associated Pearson’s + coefficients and p-value between COV across Monte Carlo transformations (i.e. intra-subject variability) and mean + CSA error :param df: dataframe for computing stats per subject: df_sub :param df_rescale: dataframe for computing + stats per rescale: df_rescale :return df_rescale: modified dataframe with added theoretic_csa and + csa_without_rescale + """ + pearson_cov = [] + p_value_cov = [] + pearson_csa = [] + p_value_csa = [] + for rescale_area, group in df.groupby('rescale_area'): + pearson_cov.append(stats.pearsonr(group['cov'], group['perc_error'])[0]) + p_value_cov.append(stats.pearsonr(group['cov'], group['perc_error'])[1]) + pearson_csa.append(stats.pearsonr(group['mean'], group['perc_error'])[0]) + p_value_csa.append(stats.pearsonr(group['mean'], group['perc_error'])[1]) + df_rescale['pearson_cov'] = pearson_cov + df_rescale['p_value_cov'] = p_value_cov + df_rescale['pearson_csa'] = pearson_csa + df_rescale['p_value_csa'] = p_value_csa + return df_rescale + +def sample_size(df, df_rescale): + """ Minimum sample size ( number of subjects) necessary to detect an atrophy in a between-subject (based on a + two-sample bilateral t-test) and minimum sample size necessary to detect an atrophy in a + within-subject ( repeated-measures in longitudinal study: based on a two-sample bilateral paired t-test). + :param df: dataframe for computing stats per subject: df_sub :param df_rescale: dataframe for computing stats per + subject: df_rescale :return df_rescale: modified dataframe with added sample_size_80 (between subjects at 80% + power), sample_size_90 (between subjects at 90% power), sample_size_long_80 (within subjects at 80% power) and + sample_size_long_90 (within subjects at 90% power) + """ + sample_size_80 = [] + sample_size_90 = [] + sample_size_long_80 = [] + sample_size_long_90 = [] + CSA = df.groupby('rescale').mean()['mean'].values[-1] + for rescale_area , group in df.groupby('rescale_area'): + if rescale_area != 100: + # sample size between-subject + std_2 = df_rescale.groupby('rescale_area').get_group(100)['std_inter'].values[0] ** 2 + sample_size_80.append(np.ceil((2 * ((1.96 + 0.84) ** 2) * (std_2)) / ((((100 - rescale_area) / 100) * CSA) ** 2))) + sample_size_90.append(np.ceil((2 * ((1.96 + 1.28) ** 2) * (std_2)) / ((((100 - rescale_area) / 100) * CSA) ** 2))) + # sample size within-subject + std_diff = df_rescale.groupby('rescale_area').get_group(rescale_area)['std_diff'].values[0] ** 2 + sample_size_long_80.append(np.ceil((((1.96 + 0.84) ** 2) * (std_diff)) / ((((100 - rescale_area) / 100) * CSA) ** 2))) + sample_size_long_90.append(np.ceil((((1.96 + 1.28) ** 2) * (std_diff)) / ((((100 - rescale_area) / 100) * CSA) ** 2))) + else: + sample_size_80.append('inf') + sample_size_90.append('inf') + sample_size_long_80.append('inf') + sample_size_long_90.append('inf') + df_rescale['sample_size_80'] = sample_size_80 + df_rescale['sample_size_90'] = sample_size_90 + df_rescale['sample_size_long_80'] = sample_size_long_80 + df_rescale['sample_size_long_90'] = sample_size_long_90 + return df_rescale + + def main(): """ @@ -433,8 +451,9 @@ def main(): df_sub['cov'] = df_sub['std'].div(df_sub['mean']) df_sub = add_columns_df_sub(df_sub) df_sub['rescale_estimated'] = df_sub['mean'].div(df_sub['csa_without_rescale']) - df_sub['error'] = (df_sub['mean'] - df_sub['theoretic_csa']).abs() - df_sub['perc_error'] = 100 * (df_sub['mean'] - df_sub['theoretic_csa']).abs().div(df_sub['theoretic_csa']) + df_sub['diff'] = df_sub['csa_without_rescale'] - df_sub['mean'] + df_sub['error'] = (df_sub['mean'] - df_sub['theoretic_csa']) + df_sub['perc_error'] = 100 * (df_sub['mean'] - df_sub['theoretic_csa']).div(df_sub['theoretic_csa']) print(df_sub) # save dataframe in a csv file df_sub.to_csv(os.path.join(path_output, r'csa_sub.csv')) @@ -458,8 +477,9 @@ def main(): df_rescale['mean_perc_error'] = df_sub.groupby('rescale').mean()['perc_error'].values df_rescale['mean_error'] = df_sub.groupby('rescale').mean()['error'].values df_rescale['std_perc_error'] = df_sub.groupby('rescale').std()['perc_error'].values - df_rescale['sample_size'] = sample_size(df_rescale, config_param) - print(df_rescale) + df_rescale['std_diff'] = df_sub.groupby('rescale').std()['diff'].values + df_rescale = pearson(df_sub, df_rescale) + df_rescale = sample_size(df_sub, df_rescale) # save dataframe in a csv file df_rescale.to_csv(os.path.join(path_output, r'csa_rescale.csv')) From d48a7b87afe3294a2d8873d441e02658bfe1b13f Mon Sep 17 00:00:00 2001 From: paul Date: Thu, 11 Mar 2021 13:19:23 -0500 Subject: [PATCH 03/40] - update README for longitudinal study sample size --- README.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 3e18418..b587580 100755 --- a/README.md +++ b/README.md @@ -72,6 +72,7 @@ Per-subject stat: Panda dataframe `df_sub`: - rescale_estimated_subject MEAN: MEAN[CSA(sI, rX, :) / CSA(sI, 1, :)] --> MEAN_rescale_estimated_subject(sI, rX): `df_sub['rescale_estimated']` - intra-subject error MEAN: MEAN[CSA(sI, rX, :)] - (rX^2 * MEAN[CSA(sI, 1, :)]) --> MEAN_error_intra(sI, rX): `df_sub['error']` - intra-subject error in percentage MEAN: [MEAN[CSA(sI, rX, :)] - (rX^2 * MEAN[CSA(sI, 1, :)])] / MEAN[CSA(sI, rX, :)] --> MEAN_error_intra_perc(sI, rX): `df_sub['perc_error']` +- intra-subject difference MEAN: [MEAN[CSA(sI, 1, :)] - (rX^2 * MEAN[CSA(sI, rX, :)])] --> MEAN_diff(sI, rX): `df_sub['diff']` Across-subject stats: Panda dataframe `df_rescale` - intra-subject STD: MEAN[STD_intra(:, rX)] --> STD_intra(rX): `df_rescale['std_intra']` @@ -81,9 +82,11 @@ Across-subject stats: Panda dataframe `df_rescale` - rescale_estimated (across subjects) STD: STD[MEAN_rescale_estimated_subject(:, rX)] --> STD_rescale_estimated(rX): `df_rescale['std_rescale_estimated']` - error in percentage (across subjects) MEAN: MEAN[MEAN_error_intra(:, rX)] - error in percentage (across subjects) STD: STD[MEAN_error_intra(:, rX)] +- intra-subject difference STD: STD[MEAN_diff(:, rX)] Power analysis: -- sample size: [(z(uncertainty) + z(power))^2 * (2 * STD[MEAN(:, rX)]^2)] / [MEAN[CSA(sI, 1, :)] - MEAN[CSA(sI, rX, :)]] +- sample size for a between subject study: [(z(uncertainty) + z(power))^2 * (2 * STD[MEAN(:, rX)]^2)] / [MEAN[CSA(sI, 1, :)] - MEAN[CSA(sI, rX, :)]] +- sample size for a within subject study: [(z(uncertainty) + z(power))^2 * (STD[MEAN_diff(:, rX)]^2)] / [MEAN[CSA(sI, 1, :)] - MEAN[CSA(sI, rX, :)]] Plot results: - STD_intersub From 645dcc13a00baa7c200c052095a05de9935ed6e0 Mon Sep 17 00:00:00 2001 From: paul Date: Thu, 11 Mar 2021 13:21:39 -0500 Subject: [PATCH 04/40] update ref in csa_rescale_stat for sample size computation --- csa_rescale_stat.py | 1 + 1 file changed, 1 insertion(+) diff --git a/csa_rescale_stat.py b/csa_rescale_stat.py index d84e19f..f944459 100755 --- a/csa_rescale_stat.py +++ b/csa_rescale_stat.py @@ -339,6 +339,7 @@ def sample_size(df, df_rescale): """ Minimum sample size ( number of subjects) necessary to detect an atrophy in a between-subject (based on a two-sample bilateral t-test) and minimum sample size necessary to detect an atrophy in a within-subject ( repeated-measures in longitudinal study: based on a two-sample bilateral paired t-test). + ref. https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3148614/ :param df: dataframe for computing stats per subject: df_sub :param df_rescale: dataframe for computing stats per subject: df_rescale :return df_rescale: modified dataframe with added sample_size_80 (between subjects at 80% power), sample_size_90 (between subjects at 90% power), sample_size_long_80 (within subjects at 80% power) and From f4f5c4b1301bcff942771a0ae445c67118f9e5a6 Mon Sep 17 00:00:00 2001 From: paul Date: Thu, 11 Mar 2021 13:24:11 -0500 Subject: [PATCH 05/40] correct rescale_estimated_subject in README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index b587580..83dfd43 100755 --- a/README.md +++ b/README.md @@ -69,7 +69,7 @@ Per-subject stat: Panda dataframe `df_sub`: - intra-subject MEAN: MEAN[CSA(sI, rX, :)] --> MEAN_intra(sI, rX): `df_sub['mean']` - intra-subject STD: STD[CSA(sI, rX, :)] --> STD_intra(sI, rX): `df_sub['std']` - intra-subject COV: STD[CSA(sI, rX, :)] / MEAN[CSA(sI, rX, :)] --> COV_intra(sI, rX): `df_sub['cov']` -- rescale_estimated_subject MEAN: MEAN[CSA(sI, rX, :) / CSA(sI, 1, :)] --> MEAN_rescale_estimated_subject(sI, rX): `df_sub['rescale_estimated']` +- rescale_estimated_subject MEAN: MEAN[ CSA(sI, rX, :) ] / MEAN[ CSA(sI, 1, :) ] --> MEAN_rescale_estimated_subject(sI, rX): `df_sub['rescale_estimated']` - intra-subject error MEAN: MEAN[CSA(sI, rX, :)] - (rX^2 * MEAN[CSA(sI, 1, :)]) --> MEAN_error_intra(sI, rX): `df_sub['error']` - intra-subject error in percentage MEAN: [MEAN[CSA(sI, rX, :)] - (rX^2 * MEAN[CSA(sI, 1, :)])] / MEAN[CSA(sI, rX, :)] --> MEAN_error_intra_perc(sI, rX): `df_sub['perc_error']` - intra-subject difference MEAN: [MEAN[CSA(sI, 1, :)] - (rX^2 * MEAN[CSA(sI, rX, :)])] --> MEAN_diff(sI, rX): `df_sub['diff']` From 79e1269a4d62800fc93125ccc81679455220ea53 Mon Sep 17 00:00:00 2001 From: Julien Cohen-Adad Date: Wed, 17 Mar 2021 17:01:00 -0400 Subject: [PATCH 06/40] Create output folder if does not exist --- csa_rescale_stat.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/csa_rescale_stat.py b/csa_rescale_stat.py index f944459..8bd72d5 100755 --- a/csa_rescale_stat.py +++ b/csa_rescale_stat.py @@ -384,8 +384,11 @@ def main(): vertlevels_input = arguments.l path_output = os.path.abspath(arguments.o) + # Create output folder + os.makedirs(path_output, exist_ok=True) + # aggregate all csv results files - concatenate_csv_files(path_results) + # concatenate_csv_files(path_results) # read data data = pd.read_csv(os.path.join(path_results, r'csa_all.csv'), decimal=".") From 08b1d01ec6b45315e4f1455bd91d03cb994230da Mon Sep 17 00:00:00 2001 From: paul Date: Wed, 17 Mar 2021 17:05:35 -0400 Subject: [PATCH 07/40] change std to var in sample size --- csa_rescale_stat.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/csa_rescale_stat.py b/csa_rescale_stat.py index f944459..d2c9cbb 100755 --- a/csa_rescale_stat.py +++ b/csa_rescale_stat.py @@ -353,13 +353,14 @@ def sample_size(df, df_rescale): for rescale_area , group in df.groupby('rescale_area'): if rescale_area != 100: # sample size between-subject - std_2 = df_rescale.groupby('rescale_area').get_group(100)['std_inter'].values[0] ** 2 - sample_size_80.append(np.ceil((2 * ((1.96 + 0.84) ** 2) * (std_2)) / ((((100 - rescale_area) / 100) * CSA) ** 2))) - sample_size_90.append(np.ceil((2 * ((1.96 + 1.28) ** 2) * (std_2)) / ((((100 - rescale_area) / 100) * CSA) ** 2))) + var = df_rescale.groupby('rescale_area').get_group(100)['std_inter'].values[0] ** 2 + df_rescale.groupby('rescale_area').get_group(rescale_area)['std_inter'].values[0] ** 2 + sample_size_80.append(np.ceil((((1.96 + 0.84) ** 2) * (var)) / ((((100 - rescale_area) / 100) * CSA) ** 2))) + sample_size_90.append(np.ceil((((1.96 + 1.28) ** 2) * (var)) / ((((100 - rescale_area) / 100) * CSA) ** 2))) # sample size within-subject - std_diff = df_rescale.groupby('rescale_area').get_group(rescale_area)['std_diff'].values[0] ** 2 - sample_size_long_80.append(np.ceil((((1.96 + 0.84) ** 2) * (std_diff)) / ((((100 - rescale_area) / 100) * CSA) ** 2))) - sample_size_long_90.append(np.ceil((((1.96 + 1.28) ** 2) * (std_diff)) / ((((100 - rescale_area) / 100) * CSA) ** 2))) + var_diff = df_rescale.groupby('rescale_area').get_group(rescale_area)['std_diff'].values ** 2 + # mean_diff_2 = df_rescale.groupby('rescale_area').get_group(rescale_area)['mean_diff'].values[0] ** 2 + sample_size_long_80.append(np.ceil((((1.96 + 0.84) ** 2) * (var_diff)) / ((0.1*((100 - rescale_area) / 100) * CSA) ** 2))) + sample_size_long_90.append(np.ceil((((1.96 + 1.28) ** 2) * (var_diff)) / ((0.1* ((100 - rescale_area) / 100) * CSA) ** 2))) else: sample_size_80.append('inf') sample_size_90.append('inf') @@ -385,7 +386,7 @@ def main(): path_output = os.path.abspath(arguments.o) # aggregate all csv results files - concatenate_csv_files(path_results) + # concatenate_csv_files(path_results) # read data data = pd.read_csv(os.path.join(path_results, r'csa_all.csv'), decimal=".") @@ -478,6 +479,7 @@ def main(): df_rescale['mean_perc_error'] = df_sub.groupby('rescale').mean()['perc_error'].values df_rescale['mean_error'] = df_sub.groupby('rescale').mean()['error'].values df_rescale['std_perc_error'] = df_sub.groupby('rescale').std()['perc_error'].values + df_rescale['mean_diff'] = df_sub.groupby('rescale').mean()['diff'].values df_rescale['std_diff'] = df_sub.groupby('rescale').std()['diff'].values df_rescale = pearson(df_sub, df_rescale) df_rescale = sample_size(df_sub, df_rescale) From c0f0596fc42fa67582543c3fc490333a14ee48a0 Mon Sep 17 00:00:00 2001 From: Julien Cohen-Adad Date: Wed, 17 Mar 2021 17:29:54 -0400 Subject: [PATCH 08/40] Added TODOs --- csa_rescale_stat.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/csa_rescale_stat.py b/csa_rescale_stat.py index 8bd72d5..0d3ce20 100755 --- a/csa_rescale_stat.py +++ b/csa_rescale_stat.py @@ -353,6 +353,8 @@ def sample_size(df, df_rescale): for rescale_area , group in df.groupby('rescale_area'): if rescale_area != 100: # sample size between-subject + # TODO include non-100% aread in computation of STD + # TODO: rename std as var std_2 = df_rescale.groupby('rescale_area').get_group(100)['std_inter'].values[0] ** 2 sample_size_80.append(np.ceil((2 * ((1.96 + 0.84) ** 2) * (std_2)) / ((((100 - rescale_area) / 100) * CSA) ** 2))) sample_size_90.append(np.ceil((2 * ((1.96 + 1.28) ** 2) * (std_2)) / ((((100 - rescale_area) / 100) * CSA) ** 2))) From de28fc3c2c9a8d4284adf690f14c08f3933dd01d Mon Sep 17 00:00:00 2001 From: paul Date: Wed, 17 Mar 2021 21:43:54 -0400 Subject: [PATCH 09/40] - correct sample size formula --- csa_rescale_stat.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/csa_rescale_stat.py b/csa_rescale_stat.py index d2c9cbb..fc7717b 100755 --- a/csa_rescale_stat.py +++ b/csa_rescale_stat.py @@ -358,9 +358,9 @@ def sample_size(df, df_rescale): sample_size_90.append(np.ceil((((1.96 + 1.28) ** 2) * (var)) / ((((100 - rescale_area) / 100) * CSA) ** 2))) # sample size within-subject var_diff = df_rescale.groupby('rescale_area').get_group(rescale_area)['std_diff'].values ** 2 - # mean_diff_2 = df_rescale.groupby('rescale_area').get_group(rescale_area)['mean_diff'].values[0] ** 2 - sample_size_long_80.append(np.ceil((((1.96 + 0.84) ** 2) * (var_diff)) / ((0.1*((100 - rescale_area) / 100) * CSA) ** 2))) - sample_size_long_90.append(np.ceil((((1.96 + 1.28) ** 2) * (var_diff)) / ((0.1* ((100 - rescale_area) / 100) * CSA) ** 2))) + # mean_diff = df_rescale.groupby('rescale_area').get_group(rescale_area)['mean_diff'].values[0] + sample_size_long_80.append(np.ceil((((1.96 + 0.84) ** 2) * (var_diff)) / ((((100 - rescale_area) / 100) * CSA) ** 2))) + sample_size_long_90.append(np.ceil((((1.96 + 1.28) ** 2) * (var_diff)) / ((((100 - rescale_area) / 100) * CSA) ** 2))) else: sample_size_80.append('inf') sample_size_90.append('inf') From f72e10768d2341321f3959ce790e26f799a25a42 Mon Sep 17 00:00:00 2001 From: paul Date: Thu, 13 May 2021 11:37:34 -0400 Subject: [PATCH 10/40] - correct diff formula by integrating transformation variability (before this diff was computed from mean CSA across transformations) --- csa_rescale_stat.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/csa_rescale_stat.py b/csa_rescale_stat.py index fc7717b..2a51121 100755 --- a/csa_rescale_stat.py +++ b/csa_rescale_stat.py @@ -349,18 +349,17 @@ def sample_size(df, df_rescale): sample_size_90 = [] sample_size_long_80 = [] sample_size_long_90 = [] - CSA = df.groupby('rescale').mean()['mean'].values[-1] for rescale_area , group in df.groupby('rescale_area'): if rescale_area != 100: # sample size between-subject + CSA_mean_diff = df_rescale.groupby('rescale_area').get_group(rescale_area)['mean_diff'].values[0] var = df_rescale.groupby('rescale_area').get_group(100)['std_inter'].values[0] ** 2 + df_rescale.groupby('rescale_area').get_group(rescale_area)['std_inter'].values[0] ** 2 - sample_size_80.append(np.ceil((((1.96 + 0.84) ** 2) * (var)) / ((((100 - rescale_area) / 100) * CSA) ** 2))) - sample_size_90.append(np.ceil((((1.96 + 1.28) ** 2) * (var)) / ((((100 - rescale_area) / 100) * CSA) ** 2))) + sample_size_80.append(np.ceil((((1.96 + 0.84) ** 2) * (var)) / (CSA_mean_diff ** 2))) + sample_size_90.append(np.ceil((((1.96 + 1.28) ** 2) * (var)) / (CSA_mean_diff ** 2))) # sample size within-subject - var_diff = df_rescale.groupby('rescale_area').get_group(rescale_area)['std_diff'].values ** 2 - # mean_diff = df_rescale.groupby('rescale_area').get_group(rescale_area)['mean_diff'].values[0] - sample_size_long_80.append(np.ceil((((1.96 + 0.84) ** 2) * (var_diff)) / ((((100 - rescale_area) / 100) * CSA) ** 2))) - sample_size_long_90.append(np.ceil((((1.96 + 1.28) ** 2) * (var_diff)) / ((((100 - rescale_area) / 100) * CSA) ** 2))) + var_diff = df_rescale.groupby('rescale_area').get_group(rescale_area)['std_diff'].values[0] ** 2 + sample_size_long_80.append(np.ceil((((1.96 + 0.84) ** 2) * (var_diff)) / (CSA_mean_diff ** 2))) + sample_size_long_90.append(np.ceil((((1.96 + 1.28) ** 2) * (var_diff)) / (CSA_mean_diff ** 2))) else: sample_size_80.append('inf') sample_size_90.append('inf') @@ -453,10 +452,13 @@ def main(): df_sub['cov'] = df_sub['std'].div(df_sub['mean']) df_sub = add_columns_df_sub(df_sub) df_sub['rescale_estimated'] = df_sub['mean'].div(df_sub['csa_without_rescale']) - df_sub['diff'] = df_sub['csa_without_rescale'] - df_sub['mean'] df_sub['error'] = (df_sub['mean'] - df_sub['theoretic_csa']) df_sub['perc_error'] = 100 * (df_sub['mean'] - df_sub['theoretic_csa']).div(df_sub['theoretic_csa']) - print(df_sub) + diff = [] + for rescale, group in df.groupby('rescale'): + for sub, subgroup in group.groupby('subject'): + diff.append((df.groupby('rescale').get_group(1).groupby('subject').get_group(sub).sample(n=len(subgroup))['MEAN(area)'].values - group.groupby('subject').get_group(sub).sample(n=len(subgroup))['MEAN(area)'].values).mean()) + df_sub['diff'] = diff # save dataframe in a csv file df_sub.to_csv(os.path.join(path_output, r'csa_sub.csv')) @@ -485,7 +487,6 @@ def main(): df_rescale = sample_size(df_sub, df_rescale) # save dataframe in a csv file df_rescale.to_csv(os.path.join(path_output, r'csa_rescale.csv')) - # plot graph if verbose is present if arguments.fig: if path_output: From d39cc250c359e8d36e444e977164fdef939278fb Mon Sep 17 00:00:00 2001 From: paul Date: Thu, 13 May 2021 12:06:20 -0400 Subject: [PATCH 11/40] - correct sample size formula --- csa_rescale_stat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/csa_rescale_stat.py b/csa_rescale_stat.py index 2a51121..c9a9767 100755 --- a/csa_rescale_stat.py +++ b/csa_rescale_stat.py @@ -457,7 +457,7 @@ def main(): diff = [] for rescale, group in df.groupby('rescale'): for sub, subgroup in group.groupby('subject'): - diff.append((df.groupby('rescale').get_group(1).groupby('subject').get_group(sub).sample(n=len(subgroup))['MEAN(area)'].values - group.groupby('subject').get_group(sub).sample(n=len(subgroup))['MEAN(area)'].values).mean()) + diff.append((df.groupby('rescale').get_group(1).groupby('subject').get_group(sub).sample(n=100, replace=True)['MEAN(area)'].values - group.groupby('subject').get_group(sub).sample(n=100, replace=True)['MEAN(area)'].values).mean()) df_sub['diff'] = diff # save dataframe in a csv file df_sub.to_csv(os.path.join(path_output, r'csa_sub.csv')) From ab543987ccd5828a88c4e5d484e641a4e74f9285 Mon Sep 17 00:00:00 2001 From: paul Date: Thu, 13 May 2021 15:54:37 -0400 Subject: [PATCH 12/40] - remove replacement in diff formula for df_sub --- csa_rescale_stat.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/csa_rescale_stat.py b/csa_rescale_stat.py index c9a9767..d030290 100755 --- a/csa_rescale_stat.py +++ b/csa_rescale_stat.py @@ -456,9 +456,8 @@ def main(): df_sub['perc_error'] = 100 * (df_sub['mean'] - df_sub['theoretic_csa']).div(df_sub['theoretic_csa']) diff = [] for rescale, group in df.groupby('rescale'): - for sub, subgroup in group.groupby('subject'): - diff.append((df.groupby('rescale').get_group(1).groupby('subject').get_group(sub).sample(n=100, replace=True)['MEAN(area)'].values - group.groupby('subject').get_group(sub).sample(n=100, replace=True)['MEAN(area)'].values).mean()) - df_sub['diff'] = diff + diff.append(df.groupby('rescale').get_group(1).groupby('subject').sample(n=1)['MEAN(area)'].values - group.groupby('subject').sample(n=1)['MEAN(area)'].values) + df_sub['diff'] = np.concatenate(diff, axis=0) # save dataframe in a csv file df_sub.to_csv(os.path.join(path_output, r'csa_sub.csv')) From a67f7735878bc56735597ced1079cff6be7fb940 Mon Sep 17 00:00:00 2001 From: paul Date: Thu, 13 May 2021 17:31:55 -0400 Subject: [PATCH 13/40] - use absolute for mean diff and make difference subject dependant --- csa_rescale_stat.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/csa_rescale_stat.py b/csa_rescale_stat.py index d030290..df0ca26 100755 --- a/csa_rescale_stat.py +++ b/csa_rescale_stat.py @@ -456,8 +456,10 @@ def main(): df_sub['perc_error'] = 100 * (df_sub['mean'] - df_sub['theoretic_csa']).div(df_sub['theoretic_csa']) diff = [] for rescale, group in df.groupby('rescale'): - diff.append(df.groupby('rescale').get_group(1).groupby('subject').sample(n=1)['MEAN(area)'].values - group.groupby('subject').sample(n=1)['MEAN(area)'].values) + for sub, subgroup in group.groupby('subject'): + diff.append((df.groupby('rescale').get_group(1).groupby('subject').get_group(sub).sample(n=1)['MEAN(area)'].values - group.groupby('subject').get_group(sub).sample(n=1)['MEAN(area)']).values) df_sub['diff'] = np.concatenate(diff, axis=0) + df_sub['diff_abs'] = np.abs(np.concatenate(diff, axis=0)) # save dataframe in a csv file df_sub.to_csv(os.path.join(path_output, r'csa_sub.csv')) @@ -480,7 +482,7 @@ def main(): df_rescale['mean_perc_error'] = df_sub.groupby('rescale').mean()['perc_error'].values df_rescale['mean_error'] = df_sub.groupby('rescale').mean()['error'].values df_rescale['std_perc_error'] = df_sub.groupby('rescale').std()['perc_error'].values - df_rescale['mean_diff'] = df_sub.groupby('rescale').mean()['diff'].values + df_rescale['mean_diff'] = df_sub.groupby('rescale').mean()['diff_abs'].values df_rescale['std_diff'] = df_sub.groupby('rescale').std()['diff'].values df_rescale = pearson(df_sub, df_rescale) df_rescale = sample_size(df_sub, df_rescale) From ab1eae1dfaf2692069e91757de99ed38c86dd525 Mon Sep 17 00:00:00 2001 From: paul Date: Thu, 13 May 2021 17:35:20 -0400 Subject: [PATCH 14/40] - remove diff abs because useless (SD uses squared difference anyway) --- csa_rescale_stat.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/csa_rescale_stat.py b/csa_rescale_stat.py index df0ca26..ef2cf36 100755 --- a/csa_rescale_stat.py +++ b/csa_rescale_stat.py @@ -458,8 +458,7 @@ def main(): for rescale, group in df.groupby('rescale'): for sub, subgroup in group.groupby('subject'): diff.append((df.groupby('rescale').get_group(1).groupby('subject').get_group(sub).sample(n=1)['MEAN(area)'].values - group.groupby('subject').get_group(sub).sample(n=1)['MEAN(area)']).values) - df_sub['diff'] = np.concatenate(diff, axis=0) - df_sub['diff_abs'] = np.abs(np.concatenate(diff, axis=0)) + df_sub['diff'] = np.abs(np.concatenate(diff, axis=0)) # save dataframe in a csv file df_sub.to_csv(os.path.join(path_output, r'csa_sub.csv')) @@ -482,7 +481,7 @@ def main(): df_rescale['mean_perc_error'] = df_sub.groupby('rescale').mean()['perc_error'].values df_rescale['mean_error'] = df_sub.groupby('rescale').mean()['error'].values df_rescale['std_perc_error'] = df_sub.groupby('rescale').std()['perc_error'].values - df_rescale['mean_diff'] = df_sub.groupby('rescale').mean()['diff_abs'].values + df_rescale['mean_diff'] = df_sub.groupby('rescale').mean()['diff'].values df_rescale['std_diff'] = df_sub.groupby('rescale').std()['diff'].values df_rescale = pearson(df_sub, df_rescale) df_rescale = sample_size(df_sub, df_rescale) From 787443937c182d9fc98b3f8172a1fdd378ae2ded Mon Sep 17 00:00:00 2001 From: paul Date: Thu, 13 May 2021 17:38:11 -0400 Subject: [PATCH 15/40] - remove diff abs because useless --- csa_rescale_stat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/csa_rescale_stat.py b/csa_rescale_stat.py index ef2cf36..7d2db40 100755 --- a/csa_rescale_stat.py +++ b/csa_rescale_stat.py @@ -458,7 +458,7 @@ def main(): for rescale, group in df.groupby('rescale'): for sub, subgroup in group.groupby('subject'): diff.append((df.groupby('rescale').get_group(1).groupby('subject').get_group(sub).sample(n=1)['MEAN(area)'].values - group.groupby('subject').get_group(sub).sample(n=1)['MEAN(area)']).values) - df_sub['diff'] = np.abs(np.concatenate(diff, axis=0)) + df_sub['diff'] = np.concatenate(diff, axis=0) # save dataframe in a csv file df_sub.to_csv(os.path.join(path_output, r'csa_sub.csv')) From e599ab7cfa1c848ab7469d58c6c394a3005e04e2 Mon Sep 17 00:00:00 2001 From: paul Date: Thu, 20 May 2021 10:43:50 -0400 Subject: [PATCH 16/40] - change formula for sample size - add comments --- csa_rescale_stat.py | 65 ++++++++++++++++++++++++++------------------- 1 file changed, 38 insertions(+), 27 deletions(-) diff --git a/csa_rescale_stat.py b/csa_rescale_stat.py index 7d2db40..9add12d 100755 --- a/csa_rescale_stat.py +++ b/csa_rescale_stat.py @@ -335,7 +335,7 @@ def pearson(df, df_rescale): df_rescale['p_value_csa'] = p_value_csa return df_rescale -def sample_size(df, df_rescale): +def sample_size(df, df_sub, df_rescale, itt = 300): """ Minimum sample size ( number of subjects) necessary to detect an atrophy in a between-subject (based on a two-sample bilateral t-test) and minimum sample size necessary to detect an atrophy in a within-subject ( repeated-measures in longitudinal study: based on a two-sample bilateral paired t-test). @@ -349,26 +349,40 @@ def sample_size(df, df_rescale): sample_size_90 = [] sample_size_long_80 = [] sample_size_long_90 = [] - for rescale_area , group in df.groupby('rescale_area'): - if rescale_area != 100: - # sample size between-subject - CSA_mean_diff = df_rescale.groupby('rescale_area').get_group(rescale_area)['mean_diff'].values[0] - var = df_rescale.groupby('rescale_area').get_group(100)['std_inter'].values[0] ** 2 + df_rescale.groupby('rescale_area').get_group(rescale_area)['std_inter'].values[0] ** 2 - sample_size_80.append(np.ceil((((1.96 + 0.84) ** 2) * (var)) / (CSA_mean_diff ** 2))) - sample_size_90.append(np.ceil((((1.96 + 1.28) ** 2) * (var)) / (CSA_mean_diff ** 2))) - # sample size within-subject - var_diff = df_rescale.groupby('rescale_area').get_group(rescale_area)['std_diff'].values[0] ** 2 - sample_size_long_80.append(np.ceil((((1.96 + 0.84) ** 2) * (var_diff)) / (CSA_mean_diff ** 2))) - sample_size_long_90.append(np.ceil((((1.96 + 1.28) ** 2) * (var_diff)) / (CSA_mean_diff ** 2))) - else: - sample_size_80.append('inf') - sample_size_90.append('inf') - sample_size_long_80.append('inf') - sample_size_long_90.append('inf') - df_rescale['sample_size_80'] = sample_size_80 - df_rescale['sample_size_90'] = sample_size_90 - df_rescale['sample_size_long_80'] = sample_size_long_80 - df_rescale['sample_size_long_90'] = sample_size_long_90 + # Compute mean sample size using a Monte Carlo simulation to evaluate variability of measures + for n in range(itt): + for rescale_r, group_r in df.groupby('rescale'): + for sub, subgroup in group_r.groupby('subject'): + # for each scaling and each subject pick one transformation (Monte-Carlo sample) + df_sub.loc[(df_sub['rescale'] == rescale_r) & (df_sub['subject'] == sub), 'diff'] = df.loc[(df['rescale'] == 1) & (df['subject'] == sub)].sample(n=1)['MEAN(area)'].values[0] - df.loc[(df['rescale'] == rescale_r) & (df['subject'] == sub)].sample(n=1)['MEAN(area)'].values[0] + df_rescale['mean_diff'] = df_sub.groupby('rescale').mean()['diff'].values + df_rescale['std_diff'] = df_sub.groupby('rescale').std()['diff'].values + + for rescale, group in df_sub.groupby('rescale'): + if rescale != 1: + CSA_mean_diff = df_sub.groupby('rescale').get_group(1).mean()['mean']*(1-(rescale**2)) + # sample size between-subject + var = df_rescale.groupby('rescale').get_group(1)['std_inter'].values[0] ** 2 + df_rescale.groupby('rescale').get_group(rescale)['std_inter'].values[0] ** 2 + sample_size_80.append(np.ceil((((1.96 + 0.84) ** 2) * (var)) / (CSA_mean_diff ** 2))) + sample_size_90.append(np.ceil((((1.96 + 1.28) ** 2) * (var)) / (CSA_mean_diff ** 2))) + # sample size within-subject + var_diff = df_rescale.groupby('rescale').get_group(rescale)['std_diff'].values[0] ** 2 + sample_size_long_80.append(np.ceil((((1.96 + 0.84) ** 2) * (var_diff)) / (CSA_mean_diff ** 2))) + sample_size_long_90.append(np.ceil((((1.96 + 1.28) ** 2) * (var_diff)) / (CSA_mean_diff ** 2))) + else: + sample_size_80.append(np.inf) + sample_size_90.append(np.inf) + sample_size_long_80.append(np.inf) + sample_size_long_90.append(np.inf) + # Compute mean and SD of computed sample sizes + df_rescale['sample_size_80'] = np.mean(np.reshape(sample_size_80, (itt, -1)), axis=0) + df_rescale['std_sample_size_80'] = np.std(np.reshape(sample_size_80, (itt, -1)), axis=0) + df_rescale['sample_size_90'] = np.mean(np.reshape(sample_size_90, (itt, -1)), axis=0) + df_rescale['std_sample_size_90'] = np.std(np.reshape(sample_size_90, (itt, -1)), axis=0) + df_rescale['sample_size_long_80'] = np.mean(np.reshape(sample_size_long_80, (itt, -1)), axis=0) + df_rescale['std_sample_size_long_80'] = np.std(np.reshape(sample_size_long_80, (itt, -1)), axis=0) + df_rescale['sample_size_long_90'] = np.mean(np.reshape(sample_size_long_90, (itt, -1)), axis=0) + df_rescale['std_sample_size_long_90'] = np.std(np.reshape(sample_size_long_90, (itt, -1)), axis=0) return df_rescale @@ -454,11 +468,10 @@ def main(): df_sub['rescale_estimated'] = df_sub['mean'].div(df_sub['csa_without_rescale']) df_sub['error'] = (df_sub['mean'] - df_sub['theoretic_csa']) df_sub['perc_error'] = 100 * (df_sub['mean'] - df_sub['theoretic_csa']).div(df_sub['theoretic_csa']) - diff = [] + sample = [] for rescale, group in df.groupby('rescale'): for sub, subgroup in group.groupby('subject'): - diff.append((df.groupby('rescale').get_group(1).groupby('subject').get_group(sub).sample(n=1)['MEAN(area)'].values - group.groupby('subject').get_group(sub).sample(n=1)['MEAN(area)']).values) - df_sub['diff'] = np.concatenate(diff, axis=0) + df_sub.loc[(df_sub['rescale'] == rescale) & (df_sub['subject'] == sub), 'sample'] = subgroup.sample(n=1)['MEAN(area)'].values # save dataframe in a csv file df_sub.to_csv(os.path.join(path_output, r'csa_sub.csv')) @@ -481,10 +494,8 @@ def main(): df_rescale['mean_perc_error'] = df_sub.groupby('rescale').mean()['perc_error'].values df_rescale['mean_error'] = df_sub.groupby('rescale').mean()['error'].values df_rescale['std_perc_error'] = df_sub.groupby('rescale').std()['perc_error'].values - df_rescale['mean_diff'] = df_sub.groupby('rescale').mean()['diff'].values - df_rescale['std_diff'] = df_sub.groupby('rescale').std()['diff'].values df_rescale = pearson(df_sub, df_rescale) - df_rescale = sample_size(df_sub, df_rescale) + df_rescale = sample_size(df, df_sub, df_rescale) # save dataframe in a csv file df_rescale.to_csv(os.path.join(path_output, r'csa_rescale.csv')) # plot graph if verbose is present From 05d7fcda1e8764f601315b1f923f2db0fcb51d8c Mon Sep 17 00:00:00 2001 From: paul Date: Thu, 20 May 2021 11:32:28 -0400 Subject: [PATCH 17/40] - Monte Carlo simulation for between group sample size computation --- csa_rescale_stat.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/csa_rescale_stat.py b/csa_rescale_stat.py index 9add12d..1ce9234 100755 --- a/csa_rescale_stat.py +++ b/csa_rescale_stat.py @@ -335,7 +335,7 @@ def pearson(df, df_rescale): df_rescale['p_value_csa'] = p_value_csa return df_rescale -def sample_size(df, df_sub, df_rescale, itt = 300): +def sample_size(df, df_sub, df_rescale, itt = 30): """ Minimum sample size ( number of subjects) necessary to detect an atrophy in a between-subject (based on a two-sample bilateral t-test) and minimum sample size necessary to detect an atrophy in a within-subject ( repeated-measures in longitudinal study: based on a two-sample bilateral paired t-test). @@ -354,15 +354,18 @@ def sample_size(df, df_sub, df_rescale, itt = 300): for rescale_r, group_r in df.groupby('rescale'): for sub, subgroup in group_r.groupby('subject'): # for each scaling and each subject pick one transformation (Monte-Carlo sample) + df_sub.loc[(df_sub['rescale'] == rescale_r) & (df_sub['subject'] == sub), 'sample'] = \ + df.loc[(df['rescale'] == rescale_r) & (df['subject'] == sub)].sample(n=1)['MEAN(area)'].values[0] df_sub.loc[(df_sub['rescale'] == rescale_r) & (df_sub['subject'] == sub), 'diff'] = df.loc[(df['rescale'] == 1) & (df['subject'] == sub)].sample(n=1)['MEAN(area)'].values[0] - df.loc[(df['rescale'] == rescale_r) & (df['subject'] == sub)].sample(n=1)['MEAN(area)'].values[0] df_rescale['mean_diff'] = df_sub.groupby('rescale').mean()['diff'].values df_rescale['std_diff'] = df_sub.groupby('rescale').std()['diff'].values + df_rescale['std_sample'] = df_sub.groupby('rescale').std()['sample'].values for rescale, group in df_sub.groupby('rescale'): if rescale != 1: CSA_mean_diff = df_sub.groupby('rescale').get_group(1).mean()['mean']*(1-(rescale**2)) # sample size between-subject - var = df_rescale.groupby('rescale').get_group(1)['std_inter'].values[0] ** 2 + df_rescale.groupby('rescale').get_group(rescale)['std_inter'].values[0] ** 2 + var = df_rescale.groupby('rescale').get_group(1)['std_sample'].values[0] ** 2 + df_rescale.groupby('rescale').get_group(rescale)['std_sample'].values[0] ** 2 sample_size_80.append(np.ceil((((1.96 + 0.84) ** 2) * (var)) / (CSA_mean_diff ** 2))) sample_size_90.append(np.ceil((((1.96 + 1.28) ** 2) * (var)) / (CSA_mean_diff ** 2))) # sample size within-subject From f50f18ad41f03d165b99d273ab1467651378aaf1 Mon Sep 17 00:00:00 2001 From: paul Date: Thu, 20 May 2021 15:06:34 -0400 Subject: [PATCH 18/40] - add comments for sample size function --- csa_rescale_stat.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/csa_rescale_stat.py b/csa_rescale_stat.py index 1ce9234..43f4fa7 100755 --- a/csa_rescale_stat.py +++ b/csa_rescale_stat.py @@ -353,16 +353,21 @@ def sample_size(df, df_sub, df_rescale, itt = 30): for n in range(itt): for rescale_r, group_r in df.groupby('rescale'): for sub, subgroup in group_r.groupby('subject'): - # for each scaling and each subject pick one transformation (Monte-Carlo sample) + # for each subject in each scaling pick one transformation (Monte-Carlo sample) and add value in new column "sample" of df_sub df_sub.loc[(df_sub['rescale'] == rescale_r) & (df_sub['subject'] == sub), 'sample'] = \ df.loc[(df['rescale'] == rescale_r) & (df['subject'] == sub)].sample(n=1)['MEAN(area)'].values[0] - df_sub.loc[(df_sub['rescale'] == rescale_r) & (df_sub['subject'] == sub), 'diff'] = df.loc[(df['rescale'] == 1) & (df['subject'] == sub)].sample(n=1)['MEAN(area)'].values[0] - df.loc[(df['rescale'] == rescale_r) & (df['subject'] == sub)].sample(n=1)['MEAN(area)'].values[0] + # for each subject in each scaling compute difference between one scaled and one un-scaled transformation (Monte-Carlo sample) and add value in new column "diff" of df_sub + df_sub.loc[(df_sub['rescale'] == rescale_r) & (df_sub['subject'] == sub), 'diff'] = \ + df.loc[(df['rescale'] == 1) & (df['subject'] == sub)].sample(n=1)['MEAN(area)'].values[0] - \ + df.loc[(df['rescale'] == rescale_r) & (df['subject'] == sub)].sample(n=1)['MEAN(area)'].values[0] + # regroup results per scaling df_rescale['mean_diff'] = df_sub.groupby('rescale').mean()['diff'].values df_rescale['std_diff'] = df_sub.groupby('rescale').std()['diff'].values df_rescale['std_sample'] = df_sub.groupby('rescale').std()['sample'].values for rescale, group in df_sub.groupby('rescale'): if rescale != 1: + # compute difference between groups using theoretic scaling and average un-scaled CSA CSA_mean_diff = df_sub.groupby('rescale').get_group(1).mean()['mean']*(1-(rescale**2)) # sample size between-subject var = df_rescale.groupby('rescale').get_group(1)['std_sample'].values[0] ** 2 + df_rescale.groupby('rescale').get_group(rescale)['std_sample'].values[0] ** 2 @@ -373,6 +378,7 @@ def sample_size(df, df_sub, df_rescale, itt = 30): sample_size_long_80.append(np.ceil((((1.96 + 0.84) ** 2) * (var_diff)) / (CSA_mean_diff ** 2))) sample_size_long_90.append(np.ceil((((1.96 + 1.28) ** 2) * (var_diff)) / (CSA_mean_diff ** 2))) else: + # to avoid dividing by zero sample_size_80.append(np.inf) sample_size_90.append(np.inf) sample_size_long_80.append(np.inf) From be0ebb440d739d545fe99c3df9d9c79339f346a6 Mon Sep 17 00:00:00 2001 From: paul Date: Fri, 21 May 2021 12:13:49 -0400 Subject: [PATCH 19/40] - improve comments for formula var and var_diff --- csa_rescale_stat.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/csa_rescale_stat.py b/csa_rescale_stat.py index 43f4fa7..540ccf5 100755 --- a/csa_rescale_stat.py +++ b/csa_rescale_stat.py @@ -367,13 +367,15 @@ def sample_size(df, df_sub, df_rescale, itt = 30): for rescale, group in df_sub.groupby('rescale'): if rescale != 1: - # compute difference between groups using theoretic scaling and average un-scaled CSA + # compute mean difference between groups using theoretic scaling and average un-scaled CSA CSA_mean_diff = df_sub.groupby('rescale').get_group(1).mean()['mean']*(1-(rescale**2)) # sample size between-subject + # var is the sum of the variances of un-scaled and scaled CSA across subjects (simulating two unpaired study arms) var = df_rescale.groupby('rescale').get_group(1)['std_sample'].values[0] ** 2 + df_rescale.groupby('rescale').get_group(rescale)['std_sample'].values[0] ** 2 sample_size_80.append(np.ceil((((1.96 + 0.84) ** 2) * (var)) / (CSA_mean_diff ** 2))) sample_size_90.append(np.ceil((((1.96 + 1.28) ** 2) * (var)) / (CSA_mean_diff ** 2))) # sample size within-subject + # var_diff is the variance of the difference between un-scaled and scaled CSA across subjects (simulating longitudinal CSA measures) var_diff = df_rescale.groupby('rescale').get_group(rescale)['std_diff'].values[0] ** 2 sample_size_long_80.append(np.ceil((((1.96 + 0.84) ** 2) * (var_diff)) / (CSA_mean_diff ** 2))) sample_size_long_90.append(np.ceil((((1.96 + 1.28) ** 2) * (var_diff)) / (CSA_mean_diff ** 2))) From 31d2ab16dc466afb9afa3440843884034c93f161 Mon Sep 17 00:00:00 2001 From: paul Date: Fri, 21 May 2021 12:50:25 -0400 Subject: [PATCH 20/40] - limit usage of rescale_area to plots --- csa_rescale_stat.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/csa_rescale_stat.py b/csa_rescale_stat.py index 540ccf5..ad081f1 100755 --- a/csa_rescale_stat.py +++ b/csa_rescale_stat.py @@ -79,8 +79,6 @@ def concatenate_csv_files(path_results): files.append(path) if not files: raise FileExistsError("Folder {} does not contain any results csv file.".format(path_results)) - #metrics = pd.concat( - #[pd.read_csv(f).assign(rescale=os.path.basename(f).split('_')[4].split('.csv')[0]) for f in files]) print("Concatenate csv files. This will take a few seconds...") metrics = pd.concat(pd.read_csv(f) for f in files) # output csv file in PATH_RESULTS @@ -125,7 +123,7 @@ def boxplot_csa(df, path_output): meanpointprops = dict(marker='x' , markeredgecolor='red' , markerfacecolor='red') # round rescale area - df['rescale_area'] = round(df['rescale_area'], ndigits=0).astype(int) + df['rescale_area'] = round(100*(df['rescale']**2), ndigits=0).astype(int) df.boxplot(column=['mean'], by='rescale_area', positions=sorted(set(df['rescale_area'].values)), showmeans=True, meanprops=meanpointprops) min_rescale = min(df['rescale_area'].values) max_rescale = max(df['rescale_area'].values) @@ -324,7 +322,7 @@ def pearson(df, df_rescale): p_value_cov = [] pearson_csa = [] p_value_csa = [] - for rescale_area, group in df.groupby('rescale_area'): + for rescale, group in df.groupby('rescale'): pearson_cov.append(stats.pearsonr(group['cov'], group['perc_error'])[0]) p_value_cov.append(stats.pearsonr(group['cov'], group['perc_error'])[1]) pearson_csa.append(stats.pearsonr(group['mean'], group['perc_error'])[0]) @@ -335,7 +333,7 @@ def pearson(df, df_rescale): df_rescale['p_value_csa'] = p_value_csa return df_rescale -def sample_size(df, df_sub, df_rescale, itt = 30): +def sample_size(df, df_sub, df_rescale, itt = 2): """ Minimum sample size ( number of subjects) necessary to detect an atrophy in a between-subject (based on a two-sample bilateral t-test) and minimum sample size necessary to detect an atrophy in a within-subject ( repeated-measures in longitudinal study: based on a two-sample bilateral paired t-test). From 02c47e772603cc58f33ed19c9fed3d160a16a3b7 Mon Sep 17 00:00:00 2001 From: paul Date: Fri, 21 May 2021 13:30:03 -0400 Subject: [PATCH 21/40] =?UTF-8?q?-=20remove=20rounding=20before=20boxplots?= =?UTF-8?q?=20for=20csa=20and=20atrophy=20-=20normalize=20by=20the=20squar?= =?UTF-8?q?e=20of=20rescale=20for=20df['Normalized=20CSA=20in=20mm=C2=B2']?= =?UTF-8?q?=20-=20use=20poly1d=20when=20plotting=20trends=20in=20plots?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- csa_rescale_stat.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/csa_rescale_stat.py b/csa_rescale_stat.py index ad081f1..bf27646 100755 --- a/csa_rescale_stat.py +++ b/csa_rescale_stat.py @@ -122,13 +122,12 @@ def boxplot_csa(df, path_output): fig2 = plt.figure() meanpointprops = dict(marker='x' , markeredgecolor='red' , markerfacecolor='red') - # round rescale area - df['rescale_area'] = round(100*(df['rescale']**2), ndigits=0).astype(int) - df.boxplot(column=['mean'], by='rescale_area', positions=sorted(set(df['rescale_area'].values)), showmeans=True, meanprops=meanpointprops) + df.boxplot(column=['mean'], by=df['rescale_area'].round(1), positions=sorted(set(df['rescale_area'].values)), showmeans=True, meanprops=meanpointprops) min_rescale = min(df['rescale_area'].values) + min_rescale_r = min(df['rescale'].values) max_rescale = max(df['rescale_area'].values) max_y = df.groupby('rescale').get_group(1).mean()['mean'] - plt.plot([min_rescale, max_rescale], [max_y * (0.93 ** 2) , max_y] , ls="--" , c=".3") + plt.plot([min_rescale, max_rescale], [max_y * (min_rescale_r ** 2), max_y], ls="--", c=".3") plt.title('Boxplot of CSA in function of area rescaling') plt.suptitle("") plt.ylabel('CSA in mm^2') @@ -146,11 +145,9 @@ def boxplot_atrophy(df, path_output): fig = plt.figure() # convert to percentage df['rescale_estimated'] = df['rescale_estimated'] * 100 - # round rescale area - df['rescale_area'] = round(df['rescale_area'], ndigits=0).astype(int) meanpointprops = dict(marker='x' , markeredgecolor='red' , markerfacecolor='red') - df.boxplot(column='rescale_estimated', by='rescale_area', positions=sorted(set(df['rescale_area'].values)), showmeans=True, meanline=False, figsize=(10,8), meanprops=meanpointprops) + df.boxplot(column='rescale_estimated', by=df['rescale_area'].round(1), positions=sorted(set(df['rescale_area'].values)), showmeans=True, meanline=False, figsize=(10, 8), meanprops=meanpointprops) min_rescale = min(df['rescale_area'].values) max_rescale = max(df['rescale_area'].values) plt.plot([min_rescale, max_rescale], [min_rescale, max_rescale], ls="--", c=".3") @@ -210,7 +207,7 @@ def error_function_of_csa(df, path_output): :param path_output: directory in which plot is saved """ fig, ax = plt.subplots(figsize=(7, 7)) - df['Normalized CSA in mm²'] = df['mean'].div(df['rescale']) + df['Normalized CSA in mm²'] = df['mean'].div(df['rescale']**2) # compute linear regression z = np.polyfit(x=df.loc[:, 'perc_error'], y=df.loc[:, 'Normalized CSA in mm²'], deg=1) p = np.poly1d(z) @@ -218,7 +215,7 @@ def error_function_of_csa(df, path_output): df.plot.scatter(x='perc_error', y='Normalized CSA in mm²', c='rescale', colormap='viridis') min_err = min(df['perc_error'].values) max_err = max(df['perc_error'].values) - plt.plot([min_err, max_err], [min_err*z[0]+z[1], max_err*z[0]+z[1]], ls="--", c=".3") + plt.plot([min_err, max_err], [p(min_err), p(max_err)], ls="--", c=".3") plt.title('Normalized CSA in function of error %,\n linear regression: {}'.format(p)) plt.xlabel('Mean error %') ax.legend(loc='upper right') @@ -243,7 +240,7 @@ def error_function_of_intra_cov(df, path_output): df.plot.scatter(x='perc_error', y='cov', c='rescale', colormap='viridis') min_err = min(df['perc_error'].values) max_err = max(df['perc_error'].values) - plt.plot([min_err, max_err], [min_err*z[0]+z[1], max_err*z[0]+z[1]], ls="--", c=".3") + plt.plot([min_err, max_err], [p(min_err), p(max_err)], ls="--", c=".3") plt.xlabel('Mean error %') plt.ylabel('COV') plt.title('COV in function of % error,\n linear regression: {}'.format(p)) From bb990c89ed36577d5e6dc4fd562f86459f2893d4 Mon Sep 17 00:00:00 2001 From: paul Date: Fri, 21 May 2021 15:37:02 -0400 Subject: [PATCH 22/40] - scatter colorbar takes discrete values - append fake values for pearson and and p_value for rescale =1 --- csa_rescale_stat.py | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/csa_rescale_stat.py b/csa_rescale_stat.py index bf27646..6092cbd 100755 --- a/csa_rescale_stat.py +++ b/csa_rescale_stat.py @@ -23,6 +23,7 @@ from math import ceil from ruamel.yaml import YAML from scipy import stats +from matplotlib import cm # Parser @@ -212,13 +213,13 @@ def error_function_of_csa(df, path_output): z = np.polyfit(x=df.loc[:, 'perc_error'], y=df.loc[:, 'Normalized CSA in mm²'], deg=1) p = np.poly1d(z) # plot - df.plot.scatter(x='perc_error', y='Normalized CSA in mm²', c='rescale', colormap='viridis') + viridis = cm.get_cmap('viridis', 6) + df.plot.scatter(x='perc_error', y='Normalized CSA in mm²', c='rescale', colormap=viridis) min_err = min(df['perc_error'].values) max_err = max(df['perc_error'].values) plt.plot([min_err, max_err], [p(min_err), p(max_err)], ls="--", c=".3") plt.title('Normalized CSA in function of error %,\n linear regression: {}'.format(p)) plt.xlabel('Mean error %') - ax.legend(loc='upper right') plt.grid() output_file = os.path.join(path_output, "fig_err_in_function_of_csa.png") plt.savefig(output_file, bbox_inches='tight') @@ -237,14 +238,14 @@ def error_function_of_intra_cov(df, path_output): z = np.polyfit(x=df.loc[:, 'perc_error'], y=df.loc[:, 'cov'], deg=1) p = np.poly1d(z) # plot - df.plot.scatter(x='perc_error', y='cov', c='rescale', colormap='viridis') + viridis = cm.get_cmap('viridis', 6) + df.plot.scatter(x='perc_error', y='cov', c='rescale', colormap=viridis) min_err = min(df['perc_error'].values) max_err = max(df['perc_error'].values) plt.plot([min_err, max_err], [p(min_err), p(max_err)], ls="--", c=".3") plt.xlabel('Mean error %') plt.ylabel('COV') plt.title('COV in function of % error,\n linear regression: {}'.format(p)) - ax.legend(loc='upper right') plt.grid() output_file = os.path.join(path_output, "fig_err_in_function_of_cov.png") plt.savefig(output_file, bbox_inches='tight') @@ -320,10 +321,16 @@ def pearson(df, df_rescale): pearson_csa = [] p_value_csa = [] for rescale, group in df.groupby('rescale'): - pearson_cov.append(stats.pearsonr(group['cov'], group['perc_error'])[0]) - p_value_cov.append(stats.pearsonr(group['cov'], group['perc_error'])[1]) - pearson_csa.append(stats.pearsonr(group['mean'], group['perc_error'])[0]) - p_value_csa.append(stats.pearsonr(group['mean'], group['perc_error'])[1]) + if rescale != 1: + pearson_cov.append(stats.pearsonr(group['cov'], group['perc_error'])[0]) + p_value_cov.append(stats.pearsonr(group['cov'], group['perc_error'])[1]) + pearson_csa.append(stats.pearsonr(group['mean'], group['perc_error'])[0]) + p_value_csa.append(stats.pearsonr(group['mean'], group['perc_error'])[1]) + else: + pearson_cov.append(1) + p_value_cov.append(0) + pearson_csa.append(1) + p_value_csa.append(0) df_rescale['pearson_cov'] = pearson_cov df_rescale['p_value_cov'] = p_value_cov df_rescale['pearson_csa'] = pearson_csa @@ -504,6 +511,7 @@ def main(): df_rescale = sample_size(df, df_sub, df_rescale) # save dataframe in a csv file df_rescale.to_csv(os.path.join(path_output, r'csa_rescale.csv')) + # plot graph if verbose is present if arguments.fig: if path_output: From 7676fcb045d5f02c88ffe5916834e2be8b273d37 Mon Sep 17 00:00:00 2001 From: paul Date: Thu, 27 May 2021 08:55:26 -0400 Subject: [PATCH 23/40] - compute sample size using 500 itterations - un-comment concatenating csv files --- csa_rescale_stat.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/csa_rescale_stat.py b/csa_rescale_stat.py index 6092cbd..b649e96 100755 --- a/csa_rescale_stat.py +++ b/csa_rescale_stat.py @@ -337,7 +337,7 @@ def pearson(df, df_rescale): df_rescale['p_value_csa'] = p_value_csa return df_rescale -def sample_size(df, df_sub, df_rescale, itt = 2): +def sample_size(df, df_sub, df_rescale, itt = 500): """ Minimum sample size ( number of subjects) necessary to detect an atrophy in a between-subject (based on a two-sample bilateral t-test) and minimum sample size necessary to detect an atrophy in a within-subject ( repeated-measures in longitudinal study: based on a two-sample bilateral paired t-test). @@ -412,7 +412,7 @@ def main(): path_output = os.path.abspath(arguments.o) # aggregate all csv results files - # concatenate_csv_files(path_results) + concatenate_csv_files(path_results) # read data data = pd.read_csv(os.path.join(path_results, r'csa_all.csv'), decimal=".") @@ -439,6 +439,7 @@ def main(): df_vert['rescale'] = list(float(b.split('RPI_r_r')[1].split('_')[0]) for b in df_vert['basename']) df_vert['slices'] = list(int(slices.split(':')[1]) - int(slices.split(':')[0]) + 1 for slices in df_vert['Slice (I->S)']) + # verify if vertlevels of interest were given in input by user if vertlevels_input is None: vertlevels = list(set(df_vert['VertLevel'].values)) From 08ff3a5c90664512b44a9f5c5b60076ca851427f Mon Sep 17 00:00:00 2001 From: paul Date: Thu, 27 May 2021 09:09:35 -0400 Subject: [PATCH 24/40] - update sample size plot to match article - change iteration number for computing sample size and print message --- csa_rescale_stat.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/csa_rescale_stat.py b/csa_rescale_stat.py index b649e96..f52e749 100755 --- a/csa_rescale_stat.py +++ b/csa_rescale_stat.py @@ -185,10 +185,10 @@ def plot_sample_size(z_conf, z_power, std_arr, mean_csa, path_output): num_n_t2 = 2 * ((z_conf + z_p) ** 2) * (std_arr[1] ** 2) n_t2.append(num_n_t2 / ((0.01*atrophy*mean_csa[1]) ** 2)) # plot - ax.plot(atrophy / mean_csa[0] * 100, n_t1[0], 'b.', markevery=3, markersize=10, label='t1 80% power') - ax.plot(atrophy / mean_csa[0] * 100, n_t1[1], 'r.', markevery=3, markersize=10, label='t1 90% power') - ax.plot(atrophy / mean_csa[1] * 100, n_t2[0], 'c', label='t2 80% power') - ax.plot(atrophy / mean_csa[1] * 100, n_t2[1], 'm', label='t2 90% power') + ax.plot(atrophy / mean_csa[0] * 100, n_t1[0], 'tab:blue', label='T1w 80% power') + ax.plot(atrophy / mean_csa[0] * 100, n_t1[1], 'tab:blue', linestyle='--', label='T1w 90% power') + ax.plot(atrophy / mean_csa[1] * 100, n_t2[0], 'tab:red', label='T2w 80% power') + ax.plot(atrophy / mean_csa[1] * 100, n_t2[1], 'tab:red', linestyle='--', label='T2w 90% power') ax.set_ylabel('number of participants per group of study \n(patients or controls) with ratio 1:1') ax.set_xlabel('atrophy in %') plt.suptitle('minimum number of participants to detect an atrophy with 5% uncertainty', fontsize=16, y=1.05) @@ -337,7 +337,7 @@ def pearson(df, df_rescale): df_rescale['p_value_csa'] = p_value_csa return df_rescale -def sample_size(df, df_sub, df_rescale, itt = 500): +def sample_size(df, df_sub, df_rescale, itt = 50): """ Minimum sample size ( number of subjects) necessary to detect an atrophy in a between-subject (based on a two-sample bilateral t-test) and minimum sample size necessary to detect an atrophy in a within-subject ( repeated-measures in longitudinal study: based on a two-sample bilateral paired t-test). @@ -351,6 +351,7 @@ def sample_size(df, df_sub, df_rescale, itt = 500): sample_size_90 = [] sample_size_long_80 = [] sample_size_long_90 = [] + print("Computing sample size using Monte Carlo simulation with {} iterations. This might take a while...".format(itt)) # Compute mean sample size using a Monte Carlo simulation to evaluate variability of measures for n in range(itt): for rescale_r, group_r in df.groupby('rescale'): From d35b6f29d0b6542270307f29d55cabe05de0e6a0 Mon Sep 17 00:00:00 2001 From: paul Date: Mon, 31 May 2021 19:00:03 -0400 Subject: [PATCH 25/40] - remove ceil sur chaque calcul de sample size --- csa_rescale_stat.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/csa_rescale_stat.py b/csa_rescale_stat.py index f52e749..51cf8cf 100755 --- a/csa_rescale_stat.py +++ b/csa_rescale_stat.py @@ -375,13 +375,13 @@ def sample_size(df, df_sub, df_rescale, itt = 50): # sample size between-subject # var is the sum of the variances of un-scaled and scaled CSA across subjects (simulating two unpaired study arms) var = df_rescale.groupby('rescale').get_group(1)['std_sample'].values[0] ** 2 + df_rescale.groupby('rescale').get_group(rescale)['std_sample'].values[0] ** 2 - sample_size_80.append(np.ceil((((1.96 + 0.84) ** 2) * (var)) / (CSA_mean_diff ** 2))) - sample_size_90.append(np.ceil((((1.96 + 1.28) ** 2) * (var)) / (CSA_mean_diff ** 2))) + sample_size_80.append((((1.96 + 0.84) ** 2) * (var)) / (CSA_mean_diff ** 2)) + sample_size_90.append((((1.96 + 1.28) ** 2) * (var)) / (CSA_mean_diff ** 2)) # sample size within-subject # var_diff is the variance of the difference between un-scaled and scaled CSA across subjects (simulating longitudinal CSA measures) var_diff = df_rescale.groupby('rescale').get_group(rescale)['std_diff'].values[0] ** 2 - sample_size_long_80.append(np.ceil((((1.96 + 0.84) ** 2) * (var_diff)) / (CSA_mean_diff ** 2))) - sample_size_long_90.append(np.ceil((((1.96 + 1.28) ** 2) * (var_diff)) / (CSA_mean_diff ** 2))) + sample_size_long_80.append((((1.96 + 0.84) ** 2) * (var_diff)) / (CSA_mean_diff ** 2)) + sample_size_long_90.append((((1.96 + 1.28) ** 2) * (var_diff)) / (CSA_mean_diff ** 2)) else: # to avoid dividing by zero sample_size_80.append(np.inf) From 120fa41cb87541abe269b08f4c3dd9c809646be3 Mon Sep 17 00:00:00 2001 From: Sandrine Bedard Date: Thu, 25 Jul 2024 17:06:17 -0400 Subject: [PATCH 26/40] add egg info --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index 9c35720..9c20808 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,5 @@ qc/ *.jpg *.png .gitignore +*.egg-info/ +dist/ From 773c191017c8a05642f68babb7fe9a5888e9a6d7 Mon Sep 17 00:00:00 2001 From: Sandrine Bedard Date: Thu, 25 Jul 2024 17:06:39 -0400 Subject: [PATCH 27/40] change sct_deepseg for sct_deepseg_sc --- process_data.sh | 39 +++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/process_data.sh b/process_data.sh index 15d9f91..ebc47e2 100755 --- a/process_data.sh +++ b/process_data.sh @@ -49,7 +49,10 @@ segment_and_label_if_does_not_exist(){ local file="$1" local contrast="$2" local contrast_str="$3" - segment_if_does_not_exist $file ${contrast} "-qc ${PATH_QC} -qc-subject ${SUBJECT}" + FILESEG="${file}_seg" + sct_deepseg -i ${file}.nii.gz -task seg_sc_contrast_agnostic $qc + sct_qc -i ${file}.nii.gz -s ${FILESEG}.nii.gz -p sct_deepseg_sc $qc + #segment_if_does_not_exist $file ${contrast} "-qc ${PATH_QC} -qc-subject ${SUBJECT}" local file_seg=${FILESEG} # Update global variable with segmentation file name FILELABEL="${file}_labels-disc" @@ -74,23 +77,23 @@ segment_and_label_if_does_not_exist(){ # Check if manual segmentation already exists. If it does, copy it locally. If # it does not, perform seg. -segment_if_does_not_exist(){ - local file="$1" - local contrast="$2" - local qc=$3 - # Update global variable with segmentation file name - FILESEG="${file}_seg" - FILESEGMANUAL="${path_derivatives}/${FILESEG}-manual" - if [ -e "${FILESEGMANUAL}.nii.gz" ]; then - echo "Found! Using manual segmentation." - sct_resample -i ${FILESEGMANUAL}.nii.gz -mm $interp -x nn -o ${FILESEGMANUAL}_r.nii.gz - rsync -avzh ${FILESEGMANUAL}_r.nii.gz ${FILESEG}.nii.gz - sct_qc -i ${file}.nii.gz -s ${FILESEG}.nii.gz -p sct_deepseg_sc $qc - else - # Segment spinal cord - sct_deepseg_sc -i ${file}.nii.gz -c ${contrast} $qc - fi -} +# segment_if_does_not_exist(){ +# local file="$1" +# local contrast="$2" +# local qc=$3 +# # Update global variable with segmentation file name +# FILESEG="${file}_seg" +# FILESEGMANUAL="${path_derivatives}/${FILESEG}-manual" +# if [ -e "${FILESEGMANUAL}.nii.gz" ]; then +# echo "Found! Using manual segmentation." +# sct_resample -i ${FILESEGMANUAL}.nii.gz -mm $interp -x nn -o ${FILESEGMANUAL}_r.nii.gz +# rsync -avzh ${FILESEGMANUAL}_r.nii.gz ${FILESEG}.nii.gz +# sct_qc -i ${file}.nii.gz -s ${FILESEG}.nii.gz -p sct_deepseg_sc $qc +# else +# # Segment spinal cord +# sct_deepseg_sc -i ${file}.nii.gz -c ${contrast} $qc +# fi +# } # SCRIPT STARTS HERE From e745a3cfaceff844118b6a61d2cd893bc5ff5309 Mon Sep 17 00:00:00 2001 From: Sandrine Bedard Date: Thu, 1 Aug 2024 10:25:56 -0400 Subject: [PATCH 28/40] change disc file label --- process_data.sh | 44 +++++++++++++++++++++++--------------------- 1 file changed, 23 insertions(+), 21 deletions(-) diff --git a/process_data.sh b/process_data.sh index ebc47e2..a6eb8a0 100755 --- a/process_data.sh +++ b/process_data.sh @@ -50,28 +50,28 @@ segment_and_label_if_does_not_exist(){ local contrast="$2" local contrast_str="$3" FILESEG="${file}_seg" - sct_deepseg -i ${file}.nii.gz -task seg_sc_contrast_agnostic $qc - sct_qc -i ${file}.nii.gz -s ${FILESEG}.nii.gz -p sct_deepseg_sc $qc + sct_deepseg -i ${file}.nii.gz -task seg_sc_contrast_agnostic -qc $PATH_QC -qc-subject ${SUBJECT} + sct_qc -i ${file}.nii.gz -s ${FILESEG}.nii.gz -p sct_deepseg_sc -qc $PATH_QC -qc-subject ${SUBJECT} #segment_if_does_not_exist $file ${contrast} "-qc ${PATH_QC} -qc-subject ${SUBJECT}" local file_seg=${FILESEG} # Update global variable with segmentation file name - FILELABEL="${file}_labels-disc" - FILELABELMANUAL="${path_derivatives}/${SUBJECT}_${contrast_str}_labels-disc-manual" + FILELABEL="${file}_label-discs_dlabel" # TODO change for updated names + FILELABELMANUAL="${path_derivatives}/${SUBJECT}_${contrast_str}_label-discs_dlabel" if [ -e "${FILELABELMANUAL}.nii.gz" ]; then echo "manual labeled file was found: ${FILELABELMANUAL}" # reorienting and resampling image - sct_image -i ${FILELABELMANUAL}.nii.gz -setorient RPI -o "${FILELABELMANUAL}_RPI.nii.gz" - sct_maths -i ${FILELABELMANUAL}_RPI.nii.gz -dilate 2 -o ${FILELABELMANUAL}_RPI_dil.nii.gz - sct_resample -i ${FILELABELMANUAL}_RPI_dil.nii.gz -mm $interp -x nn -o ${FILELABELMANUAL}_RPI_dil_r.nii.gz - rsync -avzh "${FILELABELMANUAL}_RPI_dil_r.nii.gz" ${FILELABEL}.nii.gz + #sct_image -i ${FILELABELMANUAL}.nii.gz -setorient RPI -o "${FILELABELMANUAL}_RPI.nii.gz" + #sct_maths -i ${FILELABELMANUAL}_RPI.nii.gz -dilate 2 -o ${FILELABELMANUAL}_RPI_dil.nii.gz + #sct_resample -i ${FILELABELMANUAL}_RPI_dil.nii.gz -mm $interp -x nn -o ${FILELABELMANUAL}_RPI_dil_r.nii.gz + #rsync -avzh "${FILELABELMANUAL}_RPI_dil_r.nii.gz" ${FILELABEL}.nii.gz # Generate labeled segmentation - sct_label_vertebrae -i ${file}.nii.gz -s ${file_seg}.nii.gz -c ${contrast} -discfile "${FILELABELMANUAL}_RPI_dil_r.nii.gz" -qc ${PATH_QC} -qc-subject ${SUBJECT} + sct_label_vertebrae -i ${file}.nii.gz -s ${file_seg}.nii.gz -c ${contrast} -discfile "${FILELABELMANUAL}.nii.gz" -qc ${PATH_QC} -qc-subject ${SUBJECT} else # Generate labeled segmentation sct_label_vertebrae -i ${file}.nii.gz -s ${file_seg}.nii.gz -c ${contrast} -qc ${PATH_QC} -qc-subject ${SUBJECT} fi # Create labels in the Spinal Cord - sct_label_utils -i ${file_seg}_labeled.nii.gz -vert-body 0 -o ${FILELABEL}.nii.gz + #sct_label_utils -i ${file_seg}_labeled.nii.gz -vert-body 0 -o ${FILELABEL}.nii.gz FILE_SEG_LABEL=${file_seg}_labeled } @@ -101,9 +101,7 @@ segment_and_label_if_does_not_exist(){ # Display useful info for the log, such as SCT version, RAM and CPU cores available sct_check_dependencies -short # copy derivatives directory containing manual corrections to PATH_DATA_PROCESSED -mkdir -p "${PATH_DATA_PROCESSED}/derivatives/labels/${SUBJECT}/anat/" -cp -r "${PATH_DATA}/derivatives/labels/${SUBJECT}/anat" "${PATH_DATA_PROCESSED}/derivatives/labels/${SUBJECT}" -path_derivatives="${PATH_DATA_PROCESSED}/derivatives/labels/${SUBJECT}/anat" +path_derivatives="${PATH_DATA}/derivatives/labels/${SUBJECT}/anat" # Go to results folder, where most of the outputs will be located cd $PATH_DATA_PROCESSED # Copy source images @@ -127,13 +125,13 @@ fi file=${SUBJECT}_${contrast_str} # Reorient image to RPI sct_image -i ${file}.nii.gz -setorient RPI -o ${file}_RPI.nii.gz -file=${file}_RPI # Resample image isotropically -sct_resample -i ${file}.nii.gz -mm $interp -o ${file}_r.nii.gz -file=${file}_r -end=`date +%s` -runtime=$((end-start)) -echo "+++++++++++ TIME: Duration of reorienting and resampling: $(($runtime / 3600))hrs $((($runtime / 60) % 60))min $(($runtime % 60))sec" +sct_resample -i ${file}_RPI.nii.gz -mm $interp -o ${file}_RPI_r.nii.gz +mv "${file}_RPI_r.nii.gz" "${SUBJECT}_space-other_${contrast_str}.nii.gz" +file=${SUBJECT}_space-other_${contrast_str} +# end=`date +%s` +# runtime=$((end-start)) +# echo "+++++++++++ TIME: Duration of reorienting and resampling: $(($runtime / 3600))hrs $((($runtime / 60) % 60))min $(($runtime % 60))sec" # Label spinal cord (only if it does not exist) in dir anat start=`date +%s` @@ -201,9 +199,13 @@ for r_coef in ${R_COEFS[@]}; do file_label_r_t=${file_label_r_t}_crop - # Segment spinal cord (only if it does not exist) + # Segment spinal cord start=`date +%s` - sct_deepseg_sc -i ${file_r_t}.nii.gz -c ${contrast} + #sct_deepseg_sc -i ${file_r_t}.nii.gz -c ${contrast} + sct_deepseg -i ${file_r_t}.nii.gz -task seg_sc_contrast_agnostic -qc $PATH_QC -qc-subject ${SUBJECT} + # TODO: soft csa? + sct_qc -i ${file_r_t}.nii.gz -s ${file_r_t}_seg.nii.gz -p sct_deepseg_sc -qc $PATH_QC -qc-subject ${SUBJECT} + end=`date +%s` runtime=$((end-start)) echo "+++++++++++ TIME: Duration of segmentation t${i_transfo}: $(($runtime / 3600))hrs $((($runtime / 60) % 60))min $(($runtime % 60))sec" From 25524fd9e5b0803ac2464e9e5f96abf159ef680a Mon Sep 17 00:00:00 2001 From: Sandrine Bedard Date: Tue, 6 Aug 2024 10:26:52 -0400 Subject: [PATCH 29/40] add fix for qform sfrom and fix typo disc labels --- process_data.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/process_data.sh b/process_data.sh index a6eb8a0..c692e54 100755 --- a/process_data.sh +++ b/process_data.sh @@ -65,6 +65,9 @@ segment_and_label_if_does_not_exist(){ #sct_resample -i ${FILELABELMANUAL}_RPI_dil.nii.gz -mm $interp -x nn -o ${FILELABELMANUAL}_RPI_dil_r.nii.gz #rsync -avzh "${FILELABELMANUAL}_RPI_dil_r.nii.gz" ${FILELABEL}.nii.gz # Generate labeled segmentation + sct_image -i ${file_seg}.nii.gz -set-sform-to-qform + sct_image -i ${file}.nii.gz -set-sform-to-qform + sct_image -i "${FILELABELMANUAL}.nii.gz" -set-sform-to-qform sct_label_vertebrae -i ${file}.nii.gz -s ${file_seg}.nii.gz -c ${contrast} -discfile "${FILELABELMANUAL}.nii.gz" -qc ${PATH_QC} -qc-subject ${SUBJECT} else # Generate labeled segmentation From 656e1748513543fae1ebded2c0dc40ce8735cbf2 Mon Sep 17 00:00:00 2001 From: Sandrine Bedard Date: Thu, 8 Aug 2024 10:16:05 -0400 Subject: [PATCH 30/40] change for sct --- config_sct_run_batch.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/config_sct_run_batch.yml b/config_sct_run_batch.yml index f0968be..ba37fda 100644 --- a/config_sct_run_batch.yml +++ b/config_sct_run_batch.yml @@ -1,10 +1,10 @@ # config file for sct_run_batch -path_data: /scratch/pabaua/data-multi-subject-p -path_output: /scratch/pabaua/results_csa_t2 -script: /home/pabaua/csa-atrophy/process_data.sh -script_args: /home/pabaua/csa-atrophy/config_script.yml -jobs: -1 +path_data: ~/datasets/data-multi-subject +path_output: ~/process_results/results_csa-atrophy_t2-all-2024-08-08 +script: ~/code/csa-atrophy/process_data.sh +script_args: /home/GRAMES.POLYMTL.CA/sebeda/code/csa-atrophy/config_script.yml +jobs: 4 batch_log: sct_run_batch_log.txt continue_on_error: 1 subject_prefix: sub- -zip: true +email_to: sandrine.bedard@polymtl.ca From c849a24691cbc3b17f5eb16644ae93d4c169e200 Mon Sep 17 00:00:00 2001 From: Sandrine Bedard Date: Tue, 13 Aug 2024 14:23:21 -0400 Subject: [PATCH 31/40] modify for python 3.9 compatibility --- affine_transfo.py | 6 +++++- config_sct_run_batch.yml | 5 ++--- requirements.txt | 8 ++++---- setup.py | 1 + 4 files changed, 12 insertions(+), 8 deletions(-) diff --git a/affine_transfo.py b/affine_transfo.py index ce98e99..5eebf4e 100755 --- a/affine_transfo.py +++ b/affine_transfo.py @@ -120,7 +120,11 @@ def random_values(df, subject_name, config_param): 'shift_PA': shift_PA, 'shift_IS': shift_IS } - df = df.append(transfo_dict, ignore_index=True) + #df = df.append(transfo_dict, ignore_index=True) + print(df) + print(transfo_dict) + df_new_row = pd.DataFrame(transfo_dict, index=[0]) + df = pd.concat([df, df_new_row], ignore_index=True)#.reset_index() return df, angle_IS, angle_PA, angle_LR, shift_LR, shift_PA, shift_IS diff --git a/config_sct_run_batch.yml b/config_sct_run_batch.yml index ba37fda..4cf4ef1 100644 --- a/config_sct_run_batch.yml +++ b/config_sct_run_batch.yml @@ -1,10 +1,9 @@ # config file for sct_run_batch path_data: ~/datasets/data-multi-subject -path_output: ~/process_results/results_csa-atrophy_t2-all-2024-08-08 +path_output: ~/process_results/results_csa-atrophy_t2-all-2024-08-13 script: ~/code/csa-atrophy/process_data.sh script_args: /home/GRAMES.POLYMTL.CA/sebeda/code/csa-atrophy/config_script.yml -jobs: 4 +jobs: 8 batch_log: sct_run_batch_log.txt continue_on_error: 1 subject_prefix: sub- -email_to: sandrine.bedard@polymtl.ca diff --git a/requirements.txt b/requirements.txt index f110068..596ac9e 100755 --- a/requirements.txt +++ b/requirements.txt @@ -6,8 +6,8 @@ scikit-image nibabel ruamel.yaml yaml-1.3 -argparse~=1.4.0 -setuptools~=49.6.0 -PyYAML~=5.3.1 -coloredlogs~=14.0 +argparse +setuptools +PyYAML +coloredlogs diff --git a/setup.py b/setup.py index f0786f9..2ca613a 100644 --- a/setup.py +++ b/setup.py @@ -29,6 +29,7 @@ ], keywords='', install_requires=install_reqs, + packages = [], entry_points={ 'console_scripts': [ 'affine_transfo=affine_transfo:main', From 9511c21330a5f52b365e948324e04f41dc50f92f Mon Sep 17 00:00:00 2001 From: Sandrine Bedard Date: Tue, 13 Aug 2024 17:35:19 -0400 Subject: [PATCH 32/40] setup for compute canada --- config_sct_run_batch_cc.yml | 10 ++++++++++ job_template.sh | 2 +- requirements.txt => requirements_cc.txt | 4 ++-- requirements_manual_pip.txt | 2 ++ 4 files changed, 15 insertions(+), 3 deletions(-) create mode 100644 config_sct_run_batch_cc.yml rename requirements.txt => requirements_cc.txt (92%) create mode 100755 requirements_manual_pip.txt diff --git a/config_sct_run_batch_cc.yml b/config_sct_run_batch_cc.yml new file mode 100644 index 0000000..e9347c0 --- /dev/null +++ b/config_sct_run_batch_cc.yml @@ -0,0 +1,10 @@ +# config file for sct_run_batch +path_data: /home/sabeda/scratch/data-multi-subject +path_output: /home/sabeda/scratch/results_csa-atrophy_t2-all +script: /home/sabeda/csa-atrophy/process_data.sh +script_args: /home/sabeda/csa-atrophy/config_script.yml +jobs: -1 +batch_log: sct_run_batch_log.txt +continue_on_error: 1 +subject_prefix: sub- +zip: true diff --git a/job_template.sh b/job_template.sh index 922c5c0..972ba17 100644 --- a/job_template.sh +++ b/job_template.sh @@ -3,7 +3,7 @@ #SBATCH --nodes=1 #SBATCH --cpus-per-task=32 # number of OpenMP processes #SBATCH --mem=128G -#SBATCH --mail-user=paul.bautin@polymtl.ca +#SBATCH --mail-user=sandrine.bedard@polymtl.ca #SBATCH --mail-type=ALL export OMP_NUM_THREADS=1 export MKL_NUM_THREADS=1 diff --git a/requirements.txt b/requirements_cc.txt similarity index 92% rename from requirements.txt rename to requirements_cc.txt index 596ac9e..dacd8a0 100755 --- a/requirements.txt +++ b/requirements_cc.txt @@ -5,9 +5,9 @@ matplotlib scikit-image nibabel ruamel.yaml -yaml-1.3 argparse setuptools PyYAML coloredlogs - +networkx +yaml-1.3 diff --git a/requirements_manual_pip.txt b/requirements_manual_pip.txt new file mode 100755 index 0000000..099fd2e --- /dev/null +++ b/requirements_manual_pip.txt @@ -0,0 +1,2 @@ +networkx==3.2.1 +yaml-1.3 From 96c1b1d969f92812a20d35cb239ff415c8660a3e Mon Sep 17 00:00:00 2001 From: Sandrine Bedard Date: Wed, 14 Aug 2024 14:45:39 -0400 Subject: [PATCH 33/40] rsync disc file --- process_data.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/process_data.sh b/process_data.sh index c692e54..c09e5a3 100755 --- a/process_data.sh +++ b/process_data.sh @@ -59,6 +59,7 @@ segment_and_label_if_does_not_exist(){ FILELABELMANUAL="${path_derivatives}/${SUBJECT}_${contrast_str}_label-discs_dlabel" if [ -e "${FILELABELMANUAL}.nii.gz" ]; then echo "manual labeled file was found: ${FILELABELMANUAL}" + rsync -avzh $FILELABELMANUAL ${FILELABEL}.nii.gz # reorienting and resampling image #sct_image -i ${FILELABELMANUAL}.nii.gz -setorient RPI -o "${FILELABELMANUAL}_RPI.nii.gz" #sct_maths -i ${FILELABELMANUAL}_RPI.nii.gz -dilate 2 -o ${FILELABELMANUAL}_RPI_dil.nii.gz @@ -67,8 +68,8 @@ segment_and_label_if_does_not_exist(){ # Generate labeled segmentation sct_image -i ${file_seg}.nii.gz -set-sform-to-qform sct_image -i ${file}.nii.gz -set-sform-to-qform - sct_image -i "${FILELABELMANUAL}.nii.gz" -set-sform-to-qform - sct_label_vertebrae -i ${file}.nii.gz -s ${file_seg}.nii.gz -c ${contrast} -discfile "${FILELABELMANUAL}.nii.gz" -qc ${PATH_QC} -qc-subject ${SUBJECT} + sct_image -i "${FILELABEL}.nii.gz" -set-sform-to-qform + sct_label_vertebrae -i ${file}.nii.gz -s ${file_seg}.nii.gz -c ${contrast} -discfile "${FILELABEL}.nii.gz" -qc ${PATH_QC} -qc-subject ${SUBJECT} else # Generate labeled segmentation sct_label_vertebrae -i ${file}.nii.gz -s ${file_seg}.nii.gz -c ${contrast} -qc ${PATH_QC} -qc-subject ${SUBJECT} From 08bfc542a5e6c3101106f66f7c0db62370f0f53d Mon Sep 17 00:00:00 2001 From: Sandrine Bedard Date: Fri, 16 Aug 2024 13:33:20 -0400 Subject: [PATCH 34/40] rm qc from sct_deepseg --- process_data.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/process_data.sh b/process_data.sh index c09e5a3..a56e55b 100755 --- a/process_data.sh +++ b/process_data.sh @@ -50,7 +50,7 @@ segment_and_label_if_does_not_exist(){ local contrast="$2" local contrast_str="$3" FILESEG="${file}_seg" - sct_deepseg -i ${file}.nii.gz -task seg_sc_contrast_agnostic -qc $PATH_QC -qc-subject ${SUBJECT} + sct_deepseg -i ${file}.nii.gz -task seg_sc_contrast_agnostic # -qc $PATH_QC -qc-subject ${SUBJECT} sct_qc -i ${file}.nii.gz -s ${FILESEG}.nii.gz -p sct_deepseg_sc -qc $PATH_QC -qc-subject ${SUBJECT} #segment_if_does_not_exist $file ${contrast} "-qc ${PATH_QC} -qc-subject ${SUBJECT}" local file_seg=${FILESEG} From 6e36cbfda38bf8c5a2dd3eb67e080d9757ac86db Mon Sep 17 00:00:00 2001 From: Sandrine Bedard Date: Fri, 16 Aug 2024 13:55:52 -0400 Subject: [PATCH 35/40] fix missing eextension --- process_data.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/process_data.sh b/process_data.sh index a56e55b..436650c 100755 --- a/process_data.sh +++ b/process_data.sh @@ -59,7 +59,7 @@ segment_and_label_if_does_not_exist(){ FILELABELMANUAL="${path_derivatives}/${SUBJECT}_${contrast_str}_label-discs_dlabel" if [ -e "${FILELABELMANUAL}.nii.gz" ]; then echo "manual labeled file was found: ${FILELABELMANUAL}" - rsync -avzh $FILELABELMANUAL ${FILELABEL}.nii.gz + rsync -avzh ${FILELABELMANUAL}.nii.gz ${FILELABEL}.nii.gz # reorienting and resampling image #sct_image -i ${FILELABELMANUAL}.nii.gz -setorient RPI -o "${FILELABELMANUAL}_RPI.nii.gz" #sct_maths -i ${FILELABELMANUAL}_RPI.nii.gz -dilate 2 -o ${FILELABELMANUAL}_RPI_dil.nii.gz From a5942d4c81d1aa44a50035908a13e96418186816 Mon Sep 17 00:00:00 2001 From: Sandrine Bedard Date: Fri, 16 Aug 2024 16:33:05 -0400 Subject: [PATCH 36/40] remove extra QC --- process_data.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/process_data.sh b/process_data.sh index 436650c..b4e5953 100755 --- a/process_data.sh +++ b/process_data.sh @@ -50,8 +50,8 @@ segment_and_label_if_does_not_exist(){ local contrast="$2" local contrast_str="$3" FILESEG="${file}_seg" - sct_deepseg -i ${file}.nii.gz -task seg_sc_contrast_agnostic # -qc $PATH_QC -qc-subject ${SUBJECT} - sct_qc -i ${file}.nii.gz -s ${FILESEG}.nii.gz -p sct_deepseg_sc -qc $PATH_QC -qc-subject ${SUBJECT} + sct_deepseg -i ${file}.nii.gz -task seg_sc_contrast_agnostic -qc $PATH_QC -qc-subject ${SUBJECT} + #sct_qc -i ${file}.nii.gz -s ${FILESEG}.nii.gz -p sct_deepseg_sc -qc $PATH_QC -qc-subject ${SUBJECT} #segment_if_does_not_exist $file ${contrast} "-qc ${PATH_QC} -qc-subject ${SUBJECT}" local file_seg=${FILESEG} # Update global variable with segmentation file name @@ -208,7 +208,7 @@ for r_coef in ${R_COEFS[@]}; do #sct_deepseg_sc -i ${file_r_t}.nii.gz -c ${contrast} sct_deepseg -i ${file_r_t}.nii.gz -task seg_sc_contrast_agnostic -qc $PATH_QC -qc-subject ${SUBJECT} # TODO: soft csa? - sct_qc -i ${file_r_t}.nii.gz -s ${file_r_t}_seg.nii.gz -p sct_deepseg_sc -qc $PATH_QC -qc-subject ${SUBJECT} + #sct_qc -i ${file_r_t}.nii.gz -s ${file_r_t}_seg.nii.gz -p sct_deepseg_sc -qc $PATH_QC -qc-subject ${SUBJECT} end=`date +%s` runtime=$((end-start)) From 28b8c78017f09c5ced6b4af8790d4a5e01f6acde Mon Sep 17 00:00:00 2001 From: Sandrine Bedard Date: Mon, 19 Aug 2024 10:01:52 -0400 Subject: [PATCH 37/40] remove one qc report --- process_data.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/process_data.sh b/process_data.sh index b4e5953..2fb198e 100755 --- a/process_data.sh +++ b/process_data.sh @@ -50,7 +50,7 @@ segment_and_label_if_does_not_exist(){ local contrast="$2" local contrast_str="$3" FILESEG="${file}_seg" - sct_deepseg -i ${file}.nii.gz -task seg_sc_contrast_agnostic -qc $PATH_QC -qc-subject ${SUBJECT} + sct_deepseg -i ${file}.nii.gz -task seg_sc_contrast_agnostic #-qc $PATH_QC -qc-subject ${SUBJECT} #sct_qc -i ${file}.nii.gz -s ${FILESEG}.nii.gz -p sct_deepseg_sc -qc $PATH_QC -qc-subject ${SUBJECT} #segment_if_does_not_exist $file ${contrast} "-qc ${PATH_QC} -qc-subject ${SUBJECT}" local file_seg=${FILESEG} @@ -206,9 +206,9 @@ for r_coef in ${R_COEFS[@]}; do # Segment spinal cord start=`date +%s` #sct_deepseg_sc -i ${file_r_t}.nii.gz -c ${contrast} - sct_deepseg -i ${file_r_t}.nii.gz -task seg_sc_contrast_agnostic -qc $PATH_QC -qc-subject ${SUBJECT} + sct_deepseg -i ${file_r_t}.nii.gz -task seg_sc_contrast_agnostic # -qc $PATH_QC -qc-subject ${SUBJECT} # TODO: soft csa? - #sct_qc -i ${file_r_t}.nii.gz -s ${file_r_t}_seg.nii.gz -p sct_deepseg_sc -qc $PATH_QC -qc-subject ${SUBJECT} + sct_qc -i ${file_r_t}.nii.gz -s ${file_r_t}_seg.nii.gz -p sct_deepseg_sc -qc $PATH_QC -qc-subject ${SUBJECT} end=`date +%s` runtime=$((end-start)) From 5d37e0af468be93085f166149bfb02250076d73f Mon Sep 17 00:00:00 2001 From: Sandrine Bedard Date: Mon, 19 Aug 2024 11:45:17 -0400 Subject: [PATCH 38/40] remove all QC reports --- process_data.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/process_data.sh b/process_data.sh index 2fb198e..f410242 100755 --- a/process_data.sh +++ b/process_data.sh @@ -69,10 +69,10 @@ segment_and_label_if_does_not_exist(){ sct_image -i ${file_seg}.nii.gz -set-sform-to-qform sct_image -i ${file}.nii.gz -set-sform-to-qform sct_image -i "${FILELABEL}.nii.gz" -set-sform-to-qform - sct_label_vertebrae -i ${file}.nii.gz -s ${file_seg}.nii.gz -c ${contrast} -discfile "${FILELABEL}.nii.gz" -qc ${PATH_QC} -qc-subject ${SUBJECT} + sct_label_vertebrae -i ${file}.nii.gz -s ${file_seg}.nii.gz -c ${contrast} -discfile "${FILELABEL}.nii.gz" #-qc ${PATH_QC} -qc-subject ${SUBJECT} else # Generate labeled segmentation - sct_label_vertebrae -i ${file}.nii.gz -s ${file_seg}.nii.gz -c ${contrast} -qc ${PATH_QC} -qc-subject ${SUBJECT} + sct_label_vertebrae -i ${file}.nii.gz -s ${file_seg}.nii.gz -c ${contrast} #-qc ${PATH_QC} -qc-subject ${SUBJECT} fi # Create labels in the Spinal Cord #sct_label_utils -i ${file_seg}_labeled.nii.gz -vert-body 0 -o ${FILELABEL}.nii.gz From bc3e88258c52b0c4225edde37e6aacfba5ec28f6 Mon Sep 17 00:00:00 2001 From: Sandrine Bedard Date: Mon, 19 Aug 2024 13:33:35 -0400 Subject: [PATCH 39/40] remove all qc reports --- process_data.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/process_data.sh b/process_data.sh index f410242..2c389f5 100755 --- a/process_data.sh +++ b/process_data.sh @@ -208,7 +208,7 @@ for r_coef in ${R_COEFS[@]}; do #sct_deepseg_sc -i ${file_r_t}.nii.gz -c ${contrast} sct_deepseg -i ${file_r_t}.nii.gz -task seg_sc_contrast_agnostic # -qc $PATH_QC -qc-subject ${SUBJECT} # TODO: soft csa? - sct_qc -i ${file_r_t}.nii.gz -s ${file_r_t}_seg.nii.gz -p sct_deepseg_sc -qc $PATH_QC -qc-subject ${SUBJECT} + #sct_qc -i ${file_r_t}.nii.gz -s ${file_r_t}_seg.nii.gz -p sct_deepseg_sc -qc $PATH_QC -qc-subject ${SUBJECT} end=`date +%s` runtime=$((end-start)) From 0b40fb6d5324409c1217cc718127ced0facd6776 Mon Sep 17 00:00:00 2001 From: Sandrine Bedard Date: Tue, 27 Aug 2024 11:12:26 -0400 Subject: [PATCH 40/40] add logging --- csa_rescale_stat.py | 55 ++++++++++++++++++++++++++++++--------------- 1 file changed, 37 insertions(+), 18 deletions(-) diff --git a/csa_rescale_stat.py b/csa_rescale_stat.py index 1894186..2170846 100755 --- a/csa_rescale_stat.py +++ b/csa_rescale_stat.py @@ -17,13 +17,24 @@ import pandas as pd import numpy as np +import logging import os +import sys import argparse +import matplotlib import matplotlib.pyplot as plt from math import ceil from ruamel.yaml import YAML +# Initialize logging +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) # default: logging.DEBUG, logging.INFO +hdlr = logging.StreamHandler(sys.stdout) +logging.root.addHandler(hdlr) + +FNAME_LOG = 'log_stats.txt' + # Parser ######################################################################################### @@ -80,7 +91,7 @@ def concatenate_csv_files(path_results): raise FileExistsError("Folder {} does not contain any results csv file.".format(path_results)) #metrics = pd.concat( #[pd.read_csv(f).assign(rescale=os.path.basename(f).split('_')[4].split('.csv')[0]) for f in files]) - print("Concatenate csv files. This will take a few seconds...") + logger.info("Concatenate csv files. This will take a few seconds...") metrics = pd.concat(pd.read_csv(f) for f in files) # output csv file in PATH_RESULTS metrics.to_csv(os.path.join(path_results, r'csa_all.csv')) @@ -109,10 +120,10 @@ def plot_perc_err(df, path_output): plt.xlabel('area rescaling in %') plt.ylabel('STD in %') plt.grid() - plt.tight_layout() + #plt.tight_layout() output_file = os.path.join(path_output, "fig_err.png") plt.savefig(output_file) - print("--> Created figure: {}".format(output_file)) + logger.info(f"--> Created figure: {output_file}") def boxplot_csa(df, path_output): @@ -132,7 +143,7 @@ def boxplot_csa(df, path_output): plt.xlabel('area rescaling in %') output_file = os.path.join(path_output, "fig_boxplot_csa.png") plt.savefig(output_file) - print("--> Created figure: {}".format(output_file)) + logger.info("--> Created figure: {}".format(output_file)) def boxplot_atrophy(df, path_output): @@ -157,7 +168,7 @@ def boxplot_atrophy(df, path_output): # TODO: add diagonal (remove horizontal lines) output_file = os.path.join(path_output, "fig_boxplot_atrophy.png") plt.savefig(output_file) - print("--> Created figure: {}".format(output_file)) + logger.info("--> Created figure: {}".format(output_file)) def plot_sample_size(z_conf, z_power, std, mean_csa, path_output): @@ -186,7 +197,7 @@ def plot_sample_size(z_conf, z_power, std, mean_csa, path_output): mean_csa_sample = mean_csa ax.set_title('minimum number of participants to detect an atrophy with 5% uncertainty\n std = ' + str( - round(std, 2)) + 'mm², mean_csa = ' + str(mean_csa_sample) + 'mm²') + round(std, 2)) + ' mm², mean_csa = ' + str( round(mean_csa_sample, 2)) + ' mm²') ax.legend() ax.grid() def forward(atrophy): @@ -199,7 +210,7 @@ def inverse(atrophy): secax.set_xlabel('atrophy in %') output_file = os.path.join(path_output, "fig_min_subj.png") plt.savefig(output_file, bbox_inches='tight') - print("--> Created figure: {}".format(output_file)) + logger.info("--> Created figure: {}".format(output_file)) def sample_size(df, config_param): @@ -260,7 +271,7 @@ def error_function_of_intra_cov(df, path_output): plt.grid() output_file = os.path.join(path_output, "fig_err_in_function_of_cov.png") plt.savefig(output_file, bbox_inches='tight') - print("--> Created figure: {}".format(output_file)) + logger.info("--> Created figure: {}".format(output_file)) def error_function_of_intra_cov_outlier(df, path_output): @@ -305,7 +316,7 @@ def error_function_of_intra_cov_outlier(df, path_output): # save image output_file = os.path.join(path_output, "fig_err_in_function_of_cov_outlier.png") plt.savefig(output_file, bbox_inches='tight') - print("--> Created figure: {}".format(output_file)) + logger.info("--> Created figure: {}".format(output_file)) def add_columns_df_sub(df): @@ -343,6 +354,14 @@ def main(): path_results = os.path.abspath(os.path.expanduser(arguments.i)) vertlevels_input = arguments.l path_output = os.path.abspath(arguments.o) + if not os.path.exists(path_output): + os.mkdir(path_output) + + # Dump log file there + if os.path.exists(FNAME_LOG): + os.remove(FNAME_LOG) + fh = logging.FileHandler(os.path.join(path_output, FNAME_LOG)) + logging.root.addHandler(fh) # aggregate all csv results files concatenate_csv_files(path_results) @@ -355,21 +374,21 @@ def main(): pd.set_option('display.max_rows', None) # identify rows with missing values - print("Remove rows with missing values...") + logger.info("Remove rows with missing values...") lines_to_drop = df_vert[df_vert['MEAN(area)'] == 'None'].index df_vert['subject'] = list(sub.split('data_processed/')[1].split('/anat')[0] for sub in df_vert['Filename']) # remove rows with missing values df_vert = df_vert.drop(df_vert.index[lines_to_drop]) df_vert['MEAN(area)'] = pd.to_numeric(df_vert['MEAN(area)']) - print(" Rows removed: {}".format(lines_to_drop)) + logger.info(" Rows removed: {}".format(lines_to_drop)) # fetch parameters from config.yaml file config_param = yaml_parser(arguments.config) # add useful columns to dataframe df_vert['basename'] = list(os.path.basename(path).split('.nii.gz')[0] for path in df_vert['Filename']) - df_vert['rescale'] = list(float(b.split('RPI_r_r')[1].split('_')[0]) for b in df_vert['basename']) + df_vert['rescale'] = list(float(b.split('_r')[1].split('_')[0]) for b in df_vert['basename']) df_vert['slices'] = list(int(slices.split(':')[1]) - int(slices.split(':')[0]) + 1 for slices in df_vert['Slice (I->S)']) # verify if vertlevels of interest were given in input by user @@ -380,7 +399,7 @@ def main(): if not all(elem in set(list(df_vert['VertLevel'].values)) for elem in vertlevels): raise ValueError("\nInput vertebral levels '{}' do not exist in csv files".format(vertlevels)) # register vertebrae levels of interest (Default: all vertebrae levels in csv files) - print("Stats are averaged across vertebral levels: {}".format(vertlevels)) + logger.info(f"Stats are averaged across vertebral levels: {vertlevels}") # Create new dataframe with only selected vertebral levels df = df_vert[df_vert['VertLevel'].isin(vertlevels)] @@ -398,7 +417,7 @@ def main(): df = df.drop('basename', 1) # Create dataframe for computing stats per subject: df_sub - print("\n==================== subject_dataframe ==========================\n") + logger.info("\n==================== subject_dataframe ==========================\n") df_sub = pd.DataFrame() # add necessary columns to df_sub dataframe df_sub['rescale'] = df.groupby(['rescale', 'subject']).mean().reset_index()['rescale'] @@ -414,12 +433,12 @@ def main(): df_sub['rescale_estimated'] = df_sub['mean'].div(df_sub['csa_without_rescale']) df_sub['error'] = (df_sub['mean'] - df_sub['theoretic_csa']).abs() df_sub['perc_error'] = 100 * (df_sub['mean'] - df_sub['theoretic_csa']).abs().div(df_sub['theoretic_csa']) - print(df_sub) + logger.info(df_sub) # save dataframe in a csv file df_sub.to_csv(os.path.join(path_output, r'csa_sub.csv')) # Create dataframe for computing stats across subject: df_rescale - print("\n==================== rescaling_dataframe ==========================\n") + logger.info("\n==================== rescaling_dataframe ==========================\n") df_rescale = pd.DataFrame() df_rescale['rescale'] = df_sub.groupby(['rescale']).mean().reset_index()['rescale'] df_rescale['rescale_area'] = df_sub.groupby('rescale_area').mean().reset_index()['rescale_area'] @@ -436,9 +455,9 @@ def main(): df_rescale['mean_error'] = df_sub.groupby('rescale').mean()['error'].values df_rescale['std_perc_error'] = df_sub.groupby('rescale').std()['perc_error'].values df_rescale['sample_size'] = sample_size(df_rescale, config_param) - print(df_rescale) + logger.info(df_rescale) # save dataframe in a csv file - df_sub.to_csv(os.path.join(path_output, r'csa_transfo.csv')) + df_rescale.to_csv(os.path.join(path_output, r'csa_recsale.csv')) # plot graph if verbose is present if arguments.fig: