From b0b13d82d98f594fbeed38c49bf36e3dfa4ff54e Mon Sep 17 00:00:00 2001 From: DanielCorralesAlonso Date: Thu, 21 Nov 2024 10:45:06 +0100 Subject: [PATCH] updates --- config.yaml | 14 ++-- full_example.py | 2 +- network_functions.py | 156 --------------------------------------- use_case_new_strategy.py | 4 +- 4 files changed, 10 insertions(+), 166 deletions(-) diff --git a/config.yaml b/config.yaml index 7db63c6..2125f51 100644 --- a/config.yaml +++ b/config.yaml @@ -45,7 +45,7 @@ point_cond_mut_info: PE_prob: 0.7 # Probability of equivalence rel_point_cond_mut_info: - PE_cost: 10 # Probability equivalent cost + PE_cost: 5 # Probability equivalent cost PE_info: 4 # Probability equivalent info PE_prob: 0.7 # Probability of equivalence @@ -150,9 +150,9 @@ colors: # -- Use case -single_run: True -num_runs: 200 -max_workers: 32 +single_run: False +num_runs: 640 +max_workers: 64 all_variables: True from_elicitation: False @@ -161,9 +161,9 @@ operational_limit : { "No_scr_no_col": "inf", "No_scr_col": 3000, "gFOBT": 30000, - "FIT": 40000, + "FIT": 42000, "Blood_based": 7000, - "Stool_DNA": 5000, + "Stool_DNA": 6000, "CTC": 2000, "Colon_capsule": 2000, } @@ -190,5 +190,5 @@ operational_limit_new_test : { "Stool_DNA": 5000, "CTC": 2000, "Colon_capsule": 2000, - "New_test": 25000, + "New_test": 50000, } \ No newline at end of file diff --git a/full_example.py b/full_example.py index 95eaa28..a188edf 100644 --- a/full_example.py +++ b/full_example.py @@ -63,7 +63,7 @@ def full_example(only_counts = False): _ , counts, possible_outcomes = calculate_network_utilities(net, df_test, full_calculation = True) #code from plots.py function plot_screening_counts() - bars1 = axes[i,j].bar(possible_outcomes, counts, color = 'blue', label = 'Number of tests') + bars1 = axes[i,j].bar(possible_outcomes, counts, color = 'steelblue', alpha= 0.3, label = 'Number of tests') for bar in bars1: axes[i,j].text(bar.get_x() + bar.get_width()/2, bar.get_height() + 3000, str(bar.get_height()), ha='center', color='black', fontsize=10) diff --git a/network_functions.py b/network_functions.py index c2c74d2..ac83715 100644 --- a/network_functions.py +++ b/network_functions.py @@ -408,163 +408,7 @@ def compare_strategies(df_selected, net, possible_outcomes, operational_limit = cost += col_costs return df_temp, cost - - - - - - - -# def simulate_test_results(sensitivity_scr, specificity_scr, -# sensitivity_col, specificity_col, y_crc): -# """ -# Simulate test results based on sensitivity, specificity, and actual number of patients -# with and without the disease. - -# Parameters: -# - sensitivity (float): Sensitivity of the test (true positive rate) -# - specificity (float): Specificity of the test (true negative rate) -# - num_with_disease (int): Number of patients who have the disease -# - num_without_disease (int): Number of patients who do not have the disease - -# Returns: -# - pandas DataFrame: A DataFrame with the simulated test results, true conditions, and test outcomes. -# """ - - -# num_with_disease = y_crc.sum() -# num_without_disease = len(y_crc) - num_with_disease - -# # Step 1: Create a list of patients with and without the disease - -# # Step 2: Simulate test results -# scr_results = [] -# col_results = [] - -# for y in y_crc: -# if y == 1: -# # Patient has the disease, test is positive with probability = sensitivity -# scr_result = np.random.choice([1, 0], p=[sensitivity_scr, 1 - sensitivity_scr]) -# else: -# # Patient does not have the disease, test is negative with probability = specificity -# scr_result = np.random.choice([0, 1], p=[specificity_scr, 1 - specificity_scr]) - -# scr_results.append(scr_result) - -# # Step 3: Create a DataFrame to store the results -# df_scr = pd.DataFrame({ -# 'Condition': y_crc, # True condition of the patient -# 'TestResult': scr_results # Simulated test result -# }) - - -# # For FIT positives, perform colonoscopy: -# FIT_positives = df_scr[df_scr["TestResult"] == 1] - -# conditions = FIT_positives["Condition"].to_list() - -# col_results = [] - -# for condition in conditions: -# if condition == 1: -# # Patient has the disease, test is positive with probability = sensitivity -# col_result = np.random.choice([1, 0], p=[sensitivity_col, 1 - sensitivity_col]) -# else: -# # Patient does not have the disease, test is negative with probability = specificity -# col_result = np.random.choice([0, 1], p=[specificity_col, 1 - specificity_col]) - -# col_results.append(col_result) - -# # Step 5: Create a DataFrame to store the results -# df_col = pd.DataFrame({ -# 'Condition': conditions, # True condition of the patient -# 'TestResult': col_results # Simulated test result -# }) - - - -# return df_scr, df_col - - - -# def output_test_results(df_test, y,df_scr, cost_scr, df_col, cost_col, verbose = False): - -# # Add columns to indicate true positives, false positives, etc. -# df_scr['TruePositive'] = (df_scr['Condition'] == 1) & (df_scr['TestResult'] == 1) -# df_scr['FalsePositive'] = (df_scr['Condition'] == 0) & (df_scr['TestResult'] == 1) -# df_scr['TrueNegative'] = (df_scr['Condition'] == 0) & (df_scr['TestResult'] == 0) -# df_scr['FalseNegative'] = (df_scr['Condition'] == 1) & (df_scr['TestResult'] == 0) - -# # Step 4: Calculate confusion matrix components -# TP_scr = df_scr['TruePositive'].sum() -# FP_scr = df_scr['FalsePositive'].sum() -# TN_scr = df_scr['TrueNegative'].sum() -# FN_scr = df_scr['FalseNegative'].sum() - -# # Create confusion matrix -# confusion_matrix_scr = pd.DataFrame({ -# 'Predicted Negative': [TN_scr, FN_scr], -# 'Predicted Positive': [FP_scr, TP_scr] -# }, index=['Actual Negative', 'Actual Positive']) - - -# FIT_positives = df_scr[df_scr["TestResult"] == 1] -# patient_data = df_scr["Condition"] - -# if verbose: -# logger.info("Number of patients considered: ", patient_data.shape[0]) -# logger.info(f"Cost of screening: {cost_scr*(patient_data.shape[0])} €") -# logger.info("Number of FIT positives: ", FIT_positives.shape[0]) -# logger.info("Number of colonoscopies to be done: ", FIT_positives.shape[0]) -# logger.info(f"Cost of colonoscopy program: {cost_col*FIT_positives.shape[0]} €") - - -# # Add columns to indicate true positives, false positives, etc. -# df_col['TruePositive'] = (df_col['Condition'] == 1) & (df_col['TestResult'] == 1) -# df_col['FalsePositive'] = (df_col['Condition'] == 0) & (df_col['TestResult'] == 1) -# df_col['TrueNegative'] = (df_col['Condition'] == 0) & (df_col['TestResult'] == 0) -# df_col['FalseNegative'] = (df_col['Condition'] == 1) & (df_col['TestResult'] == 0) - -# # Step 6: Calculate confusion matrix components -# TP_col = df_col['TruePositive'].sum() -# FP_col = df_col['FalsePositive'].sum() -# TN_col = df_col['TrueNegative'].sum() -# FN_col = df_col['FalseNegative'].sum() - -# # Create confusion matrix -# confusion_matrix_col = pd.DataFrame({ -# 'Predicted Negative': [TN_col, FN_col], -# 'Predicted Positive': [FP_col, TP_col] -# }, index=['Actual Negative', 'Actual Positive']) - -# total_cost = cost_scr*df_scr["Condition"].shape[0] + cost_col*FIT_positives.shape[0] - -# if verbose: -# logger.info("Number of CRC true positive cases detected by colonoscopy: ", TP_scr) -# logger.info("Number of false positives by colonoscopy: ", FP_scr) -# logger.info(f"Total cost of screening and colonoscopy: {total_cost} €") -# logger.info("Proportion of total CRC cases in the whole population detected by the method: ", TP_scr / df_test["CRC"].sum()) -# logger.info("Proportion of cases in the high-risk target population detected by the method: ", TP_scr / y.sum()) - -# combined_confusion_matrix = pd.DataFrame({ -# 'Predicted Negative': [TN_scr + TN_col, FN_scr + FN_col], -# 'Predicted Positive': [FP_col, TP_col] -# }, index=['Actual Negative', 'Actual Positive']) - -# # Calculate sensitivity and specificity using the combined confusion matrix -# sensitivity = TP_col / (TP_col + FN_col + FN_scr) -# specificity = (TN_scr + TN_col) / (TN_scr +TN_col + FP_col) -# PPV = TP_col / (TP_col + FP_col) -# NPV = (TN_scr + TN_col) / (TN_scr + TN_col + FN_scr + FN_col) - -# metrics = { -# "sensitivity": sensitivity, -# "specificity": specificity, -# "PPV": PPV, -# "NPV": NPV -# } -# return confusion_matrix_scr, confusion_matrix_col, combined_confusion_matrix, total_cost, metrics diff --git a/use_case_new_strategy.py b/use_case_new_strategy.py index 35eac1d..e3fd494 100644 --- a/use_case_new_strategy.py +++ b/use_case_new_strategy.py @@ -41,7 +41,7 @@ def use_case_new_strategy(net = None, log_dir = None, run_label = 'run', best_f1_score = {}, - output_dir = None + output_dir = 'logs' ): # check if an element in operational limit is inf @@ -51,7 +51,7 @@ def use_case_new_strategy(net = None, operational_limit_comp = {k: np.inf if v == "inf" else v for k, v in operational_limit_comp.items()} if logger == None: - logger, log_dir = create_folders_logger(single_run = single_run, label="use_case_", date = False, time = False, output_dir=output_dir) + logger, log_dir = create_folders_logger(single_run = single_run, label="use_case_", date = True, time = True, output_dir= output_dir) else: log_dir = os.path.join(log_dir, run_label) if not os.path.exists(log_dir):