From bbac9f1c23f3cf7c9d7cd07e23d29e0408036466 Mon Sep 17 00:00:00 2001 From: Candace Moore Date: Thu, 14 Nov 2024 07:48:57 +0100 Subject: [PATCH 1/2] properly branched --- .../1112024_try_replace_colnames_scorecsvs.R | 64 +++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 r_scripts/1112024_try_replace_colnames_scorecsvs.R diff --git a/r_scripts/1112024_try_replace_colnames_scorecsvs.R b/r_scripts/1112024_try_replace_colnames_scorecsvs.R new file mode 100644 index 0000000..499d712 --- /dev/null +++ b/r_scripts/1112024_try_replace_colnames_scorecsvs.R @@ -0,0 +1,64 @@ +#Here, we would like to change the Participant Id numbers in the SCORE export repeating data files to BRICK participant Ids. This way, we can upload it to the BRICK castor +#here we read in the score repeating data files +library(dplyr) + +# Read the subset key table and rename the Participant.Id to BRICK_Id +subset_key_table <- read.csv("Z:/castor_proof_files/csv_castor/current/brick_subset_key_table102024.csv") %>% + rename(BRICK_Id = Participant.Id) + +# Read in all of the repeating data files from SCORE #path is for windows machine /mnt/data on linux!. +# Read in all of the repeating data files from SCORE +file_paths <- list( + visual_hearing = "Z:/score_clinical_manifestationTOBEREVIEWED/SCORE_RADeep-registry__Visual_and_hearing_disease_Medical_History_Clinical_manifestati_export_20240717.csv", + acute_complications = "Z:/score_clinical_manifestationTOBEREVIEWED/SCORE_RADeep-registry__Acute_complications_export_20240717.csv", + bone_extremities = "Z:/score_clinical_manifestationTOBEREVIEWED/SCORE_RADeep-registry__Bone_and_extremities_Medical_History_Clinical_manifestations_export_20240717.csv", + cardiac_pulmonary = "Z:/score_clinical_manifestationTOBEREVIEWED/SCORE_RADeep-registry__Cardiac_and_pulmonary_disease_Medical_History_Clinical_manifest_export_20240717.csv", + comorbidities = "Z:/score_clinical_manifestationTOBEREVIEWED/SCORE_RADeep-registry__Comorbidities_export_20240717.csv", + endocrinological = "Z:/score_clinical_manifestationTOBEREVIEWED/SCORE_RADeep-registry__Endocrinological_disease_Medical_History_Clinical_manifestation_export_20240717.csv", + registry = "Z:/score_clinical_manifestationTOBEREVIEWED/SCORE_RADeep-registry__export_20240717.csv", + liver_kidney = "Z:/score_clinical_manifestationTOBEREVIEWED/SCORE_RADeep-registry__Liver_and_kidney_disease_Medical_History_Clinical_manifestation_export_20240717.csv", + neurological = "Z:/score_clinical_manifestationTOBEREVIEWED/SCORE_RADeep-registry__Neurological_disease_Medical_History_Clinical_manifestations_export_20240717.csv", + specific_treatment = "Z:/score_clinical_manifestationTOBEREVIEWED/SCORE_RADeep-registry__Treatment_Use_of_specific_treatment_or_inclusion_in_Clinical_Tr_export_20240717.csv", + chelation_treatment = "Z:/score_clinical_manifestationTOBEREVIEWED/SCORE_RADeep-registry__Treatments_chelation_export_20240717.csv", + hydroxyurea_treatment = "Z:/score_clinical_manifestationTOBEREVIEWED/SCORE_RADeep-registry__Treatments_hydroxyurea_export_20240717.csv", + visit = "Z:/score_clinical_manifestationTOBEREVIEWED/SCORE_RADeep-registry__Visit_export_20240717.csv" +) + +# Read each file into a data frame and store in df_list +df_list <- lapply(file_paths, function(path) { + read.csv(path, sep = ";", stringsAsFactors = FALSE) +}) + +# Rename the first column in each data frame to RADeep_id +df_list <- lapply(df_list, function(df) { + colnames(df)[1] <- "RADeep_id" + df +}) + +# Merge each data frame with subset_key_table, bringing in BRICK_Id and renaming it to Participant Id +df_list <- lapply(df_list, function(df) { + df %>% + left_join(subset_key_table, by = "RADeep_id") %>% # Join on RADeep_id + mutate(`Participant Id` = BRICK_Id) %>% # Create Participant Id from BRICK_Id + select(`Participant Id`, everything(), -BRICK_Id) # Arrange columns accordingly +}) + +# Optionally assign back to original data frame names +names(df_list) <- names(file_paths) + +# Assign back to individual data frames if needed +list2env(df_list, envir = .GlobalEnv) + +# Define the output directory for the updated CSV files +output_dir <- "Z:/castor_proof_files/csv_castor/current/" + +# Ensure the output directory exists +dir.create(output_dir, showWarnings = FALSE) + +# Write each data frame to a CSV file in the output directory +for (name in names(df_list)) { + write.csv(df_list[[name]], file = paste0(output_dir, name, "_updated.csv"), row.names = FALSE) +} + +# Confirm file creation +cat("CSV files have been created in:", output_dir, "\n") From 89a951c8c33f7e6838953b464aa9b82c174f0e6b Mon Sep 17 00:00:00 2001 From: Candace Moore Date: Sat, 23 Nov 2024 12:14:29 +0100 Subject: [PATCH 2/2] add aditional R scripts --- dwi_2_environment.yaml | 21 +++ dwi_3_environment.yaml | 22 +++ r_scripts/1162024Tables_paper_1.R | 154 ++++++++++++++++++ ...72024_Table2_Growthcurves_script_attempt.R | 78 +++++++++ ...2112024_Identify_genotypes_bridge_sample.R | 117 +++++++++++++ .../12112024_script_table1_ash_2024_poster.R | 101 ++++++++++++ .../8112024_Script_missing_lab_values_T0.R | 18 ++ r_scripts/8112024_Table3_volbrain_stats.R | 93 +++++++++++ .../8112024_create_table4_fs_recon-all.R | 83 ++++++++++ .../fs_table_marjolein_abstract_28102024.R | 81 +++++++++ ...marjolein_abstract_with_genotype_8112024.R | 43 +++++ r_scripts/organize_BRIDGE_data.R | 84 ++++++++++ r_scripts/subset_key_table_castor.R | 9 + 13 files changed, 904 insertions(+) create mode 100644 dwi_2_environment.yaml create mode 100644 dwi_3_environment.yaml create mode 100644 r_scripts/1162024Tables_paper_1.R create mode 100644 r_scripts/1172024_Table2_Growthcurves_script_attempt.R create mode 100644 r_scripts/12112024_Identify_genotypes_bridge_sample.R create mode 100644 r_scripts/12112024_script_table1_ash_2024_poster.R create mode 100644 r_scripts/8112024_Script_missing_lab_values_T0.R create mode 100644 r_scripts/8112024_Table3_volbrain_stats.R create mode 100644 r_scripts/8112024_create_table4_fs_recon-all.R create mode 100644 r_scripts/fs_table_marjolein_abstract_28102024.R create mode 100644 r_scripts/fs_table_marjolein_abstract_with_genotype_8112024.R create mode 100644 r_scripts/organize_BRIDGE_data.R create mode 100644 r_scripts/subset_key_table_castor.R diff --git a/dwi_2_environment.yaml b/dwi_2_environment.yaml new file mode 100644 index 0000000..11367d7 --- /dev/null +++ b/dwi_2_environment.yaml @@ -0,0 +1,21 @@ +# dwi environment + + +name: witty +channels: + - conda-forge + +dependencies: + - dipy=1.7.0 + - fury=0.7.1 + - jupyter + - jupyterlab + - matplotlib=3.5.3 + - nilearn=0.7.0 + - osfclient=0.0.5 + - python>=3.10 + - pybids + +# if environment does not resolve try installing everything but pybids, then `conda install conda-forge::pybids` + + diff --git a/dwi_3_environment.yaml b/dwi_3_environment.yaml new file mode 100644 index 0000000..11a1409 --- /dev/null +++ b/dwi_3_environment.yaml @@ -0,0 +1,22 @@ +# dwi environment + + +name: wittier +channels: + - conda-forge + +dependencies: + - dipy=1.7.0 + - fury=0.7.1 + - jupyter + - jupyterlab + - matplotlib=3.5.3 + - nilearn=0.7.0 + - numexpr=2.8.4 + - osfclient=0.0.5 + - python>=3.10 + - pybids + +# if environment does not resolve try installing everything but pybids, then `conda install conda-forge::pybids` + + diff --git a/r_scripts/1162024Tables_paper_1.R b/r_scripts/1162024Tables_paper_1.R new file mode 100644 index 0000000..0e0631b --- /dev/null +++ b/r_scripts/1162024Tables_paper_1.R @@ -0,0 +1,154 @@ +## in this script, I will try to clean up the dataexport of castor on november 11th. At this thime, score repeated data, volbrain data and freesurfer data have not been uploaded yet +#weirdly enough, the age_T0_MRI_months is exported incorrectly by castor. I decided to limit my next export to just the baseline_MRI form at T0 and to merge it later +library(dplyr) +library(writexl) #to write the tables to excel files +#read in the raw data file. For now in my personal folder, will get moved around when we arrive at the final datastructure +raw <- read.csv("Z:/Aida_experiment/6112024_brick_castor_data/BRICK_export_20241106.csv", sep= ";") +ages <-read.csv("Z:/Aida_experiment/combined_BRICK_marjolein.csv", sep=",") #this file contains the ages and recon-all data, as I apparently cannot export calculated fields from castor +volbrain <- read.csv("Z:/castor_proof_files/csv_castor/current/volbrains_castor.csv") #contains volbrain output + +#rename first columns in order to merge later. +raw <- raw %>% + rename(Participant_Id = `ï..Participant.Id`) + +ages <- ages %>% + rename(Participant_Id = Participant.Id) + +volbrain <- volbrain %>% + rename(Participant_Id = Participant.Id) + +#I decided not to clean it up. Instead. Just start with creating the tables. + +table1 <- raw %>% select(Participant_Id, Hydrea_at_scan_T0, brick_genotype, mothersbirth, fathersbirth) + +#import age in months from the ages df +table1 <- table1 %>% + left_join(select(ages, Participant_Id, Age_at_scan_m_T0X, gender_BRICK), by = "Participant_Id") + +#add a column in years. now we have the ingredients for table1 +table1 <- table1 %>% + mutate(Age_at_scan_years = round(Age_at_scan_m_T0X / 12, 1)) + +#Table 2: Lab values including HbF + +table2 <- raw %>% select(Participant_Id, ERY0, HB0, MCV0, HT0, reticulocyte_count_percentage_1, LEU0, TROMBO0, FE0, FERT0, TRAF0, TSAT0, ALAT0, LDH0, TBIL0, DBIL0, KREA0, SCHWARTZ_Bedsite0, FOLZ0, UREU0, NA0, K00, VITD0, ASAT0, AFOS0, GGT0, CRP0, KREA_U0, TE_U0, TE_U_KR0, ALB_U0, ALB_U_KR0, B120, HPLC_HBF_T0, HPLC_HBF_date_T0, HPLC_HbS_T0, HPLC_HbS_date_T0) + +#Table 3: Descriptives on radiology reports +table3 <- raw %>% select(Participant_Id,Measurement_moment_QC_T1w_T0, Score_QC_T1w_T0, Exclude_Score_QC_T1w_T0, Remarks_Score_QC_T1w_T0, Screened_for_WMH_T0, WMH_observed_T0, Vasculature_examined_T0, Vascular_malformations_T0, Microbleeds_screened_T0, Microbleeds_present_T0, Incidental_finding_T0, Marjolein_T0, Remarks_MRI_T0) + +#Table 4: White matter hyperintensities +table4 <- volbrain %>% + select( + Participant_Id, dl_Sex_T0, dl_Age_T0, + dl_Total_lesion_count_T0, dl_Total_lesion_volume_.absolute._cm3_T0, + dl_Total_lesion_volume_.normalized._._T0, dl_Total_lesion_burden_T0, + dl_Periventricular_lesion_count_T0, dl_Periventricular_lesion_volume_.absolute._cm3_T0, + dl_Periventricular_lesion_volume_.normalized._._T0, dl_Periventricular_lesion_burden_T0, + dl_Deep_white_lesion_count_T0, dl_Deep_white_lesion_volume_.absolute._cm3_T0, + dl_Deep_white_lesion_volume_.normalized._._T0, dl_Deep_white_lesion_burden_T0, + dl_Juxtacortical_lesion_count_T0, dl_Juxtacortical_lesion_volume_.absolute._cm3_T0, + dl_Juxtacortical_lesion_volume_.normalized._._T0, dl_Juxtacortical_lesion_burden_T0, + dl_Infratentorial_lesion_count_T0, dl_Infratentorial_lesion_volume_.absolute._cm3_T0, + dl_Infratentorial_lesion_volume_.normalized._._T0, dl_Infratentorial_lesion_burden_T0 + ) +#Table 5: Recon-all volumetrics (white, grey and subcortical matter) with z-scores for 2 different kinds of growth charts. +# Calculate the Total Cortical Volume and Total Cerebellar Volume +table5 <- ages %>% + mutate( + Total_Cortical_Volume = CortexVol + Left.Cerebellum.Cortex + Right.Cerebellum.Cortex, + Total_Cerebellar_Volume = Left.Cerebellum.Cortex + Right.Cerebellum.Cortex + ) %>% + + # Select relevant columns for the table and rename for clarity + select( + `Participant ID` = Participant_Id, + `Age (years)` = Age_at_scan_y_T0X, + `Gender` = gender_BRICK, + + # Total Volumes + `Total Cortical Volume` = Total_Cortical_Volume, + `Total White Matter Volume` = CerebralWhiteMatterVol, + `Total Gray Matter Volume` = TotalGrayVol, + `Total Cerebellar Volume` = Total_Cerebellar_Volume, + + # Subcortical Structures + `Thalamus Left` = Left.Thalamus.Proper, + `Thalamus Right` = Right.Thalamus.Proper, + `Caudate Left` = Left.Caudate, + `Caudate Right` = Right.Caudate, + `Putamen Left` = Left.Putamen, + `Putamen Right` = Right.Putamen, + `Pallidum Left` = Left.Pallidum, + `Pallidum Right` = Right.Pallidum, + `Hippocampus Left` = Left.Hippocampus, + `Hippocampus Right` = Right.Hippocampus + ) +#Table 5_1: Neuropsychologic outcomes(WISCV&WAISIV) + Education level parents +#WiscV +table6_wisc <- raw %>% + select( + Participant_Id, + scorePrimIndex_TIQ_S_1, # Full Scale IQ + scorePrimIndex_VBI_IQ_1, # Verbal Comprehension Index + scorePrimIndex_VRI_IQ_1, # Visual Spatial Index + scorePrimIndex_FRI_IQ_1, # Fluid Reasoning Index + scorePrimIndex_WgI_IQ_1, # Working Memory Index + scorePrimIndex_VsI_IQ_1, # Processing Speed Index + scoreSecIndex_KRI_IQ_1, # Quantitative Reasoning Index + scoreSecIndex_AWI_IQ_1, # Auditory Working Memory Index + scoreSecIndex_NVI_IQ_1, # Nonverbal Index + scoreSecIndex_AVI_IQ_1, # General Ability Index + ) + +#waisIV + +table6_wais <- raw %>% + select( + "Participant_Id", + "ScoreTIQ_1", # Full Scale IQ (FSIQ) + "ScoreSomVBI_1", # Verbal Comprehension Index (VCI) + "ScorePerVBI_1", # Perceptual Verbal Comprehension Index (related to FRI) + "ScoreBIVBI_1", # Blocked Verbal Comprehension Index (related to FRI) + "ScoreWgI_1", # Working Memory Index (WMI) + "ScoreVsI_1" # Processing Speed Index (PSI) + ) + +#create a real table1 +library(tableone) + +# Specify the categorical and continuous variables +categorical_vars <- c("Hydrea_at_scan_T0", "brick_genotype", "gender_BRICK") +continuous_vars <- c("Age_at_scan_years") + +# Specify the categorical and continuous variables for CreateTableOne +vars <- c(categorical_vars, continuous_vars) + +# Create the table using CreateTableOne +table1_summary <- CreateTableOne(vars = vars, data = combined_data, factorVars = categorical_vars) + +# Print the table1 summary +print(table1_summary) + +#I want the median and IQR for HbS +# Manually calculate median and IQR for 'HPLC_HbS_T0' +hplc_summary <- combined_data %>% + summarise( + Median_HPLC_HbS_T0 = median(HPLC_HbS_T0, na.rm = TRUE), + IQR_HPLC_HbS_T0 = IQR(HPLC_HbS_T0, na.rm = TRUE) + ) + +# Manually calculate mean and SD for 'HB0' +hb0_summary <- combined_data %>% + summarise( + Mean_HB0 = mean(HB0, na.rm = TRUE), + SD_HB0 = sd(HB0, na.rm = TRUE) + ) + +# Print the custom summaries for HPLC_HbS_T0 and HB0 +print("HPLC_HbS_T0 - Median and IQR:") +print(hplc_summary) + +print("HB0 - Mean and SD:") +print(hb0_summary) + + diff --git a/r_scripts/1172024_Table2_Growthcurves_script_attempt.R b/r_scripts/1172024_Table2_Growthcurves_script_attempt.R new file mode 100644 index 0000000..da6096c --- /dev/null +++ b/r_scripts/1172024_Table2_Growthcurves_script_attempt.R @@ -0,0 +1,78 @@ +library(dplyr) +library(tableone) +library(writexl) +library(tibble) # For rownames_to_column() + +# Select relevant columns from table2 +table2_filtered <- table2 %>% + select(Participant_Id, ERY0, HB0, MCV0, HT0, reticulocyte_count_percentage_1, LEU0, + TROMBO0, FE0, FERT0, TRAF0, TSAT0, ALAT0, LDH0, TBIL0, DBIL0, KREA0, + FOLZ0, UREU0, NA0, K00, VITD0, ASAT0, AFOS0, GGT0, CRP0, KREA_U0, + TE_U0, TE_U_KR0, ALB_U0, ALB_U_KR0, B120) + + +# Define continuous variables +continuous_vars <- c("ERY0", "HB0", "MCV0", "HT0", "reticulocyte_count_percentage_1", + "LEU0", "TROMBO0", "FE0", "FERT0", "TRAF0", "TSAT0", "ALAT0", + "LDH0", "TBIL0", "DBIL0", "KREA0", "FOLZ0", "UREU0", "NA0", + "K00", "VITD0", "ASAT0", "AFOS0", "GGT0", "CRP0", "KREA_U0", + "TE_U0", "TE_U_KR0", "ALB_U0", "ALB_U_KR0", "B120") + +# Create a named vector of labels for the continuous variables, with units +labels <- c( + "ERY0" = "Red blood cell (RBC) count (x10^12/L)", + "HB0" = "Hemoglobin (g/dL)", + "MCV0" = "Mean corpuscular volume (MCV) (fL)", + "HT0" = "Hematocrit (HCT) (%)", + "reticulocyte_count_percentage_1" = "Reticulocyte count (%)", + "LEU0" = "White blood cell (WBC) count (x10^9/L)", + "TROMBO0" = "Platelet count (x10^9/L)", + "FE0" = "Serum iron (µg/dL)", + "FERT0" = "Ferritin serum (ng/mL)", + "TRAF0" = "Transferrin (mg/dL)", + "TSAT0" = "Transferrin saturation (%)", + "ALAT0" = "Alanine transaminase (ALT) (U/L)", + "LDH0" = "Lactate Dehydrogenase (LDH) (U/L)", + "TBIL0" = "Total bilirubin (mg/dL)", + "DBIL0" = "Conjugated/direct bilirubin (mg/dL)", + "KREA0" = "Creatinine (mg/dL)", + "FOLZ0" = "Folate (ng/mL)", + "UREU0" = "Urea (mg/dL)", + "NA0" = "Sodium (mmol/L)", + "K00" = "Potassium (mmol/L)", + "VITD0" = "Vitamin D (ng/mL)", + "ASAT0" = "Aspartate transaminase (AST) (U/L)", + "AFOS0" = "Alkaline phosphatase (ALP) (U/L)", + "GGT0" = "Gamma-glutamyl transferase (GGT) (U/L)", + "CRP0" = "C-reactive protein (CRP) (mg/L)", + "KREA_U0" = "Creatinine (Urine) (mg/dL)", + "TE_U0" = "Iron (Urine) (µg/dL)", + "TE_U_KR0" = "Iron (Urine, Kr) (µg/dL)", + "ALB_U0" = "Albumin (Urine) (g/dL)", + "ALB_U_KR0" = "Albumin (Urine, Kr) (g/dL)", + "B120" = "Bilirubin 120 (mg/dL)" +) + +# Create a summary table with continuous variables +table2_summary <- CreateTableOne( + vars = continuous_vars, + data = table2_filtered, + factorVars = character(0) # No factor variables +) + +# Convert the table summary to a data frame +table2_df <- as.data.frame(print(table2_summary, quote = FALSE, noSpaces = TRUE)) + +# Add the labels as a new column by matching with continuous_vars +table2_df <- table2_df %>% + rownames_to_column(var = "Variable") %>% + mutate(Variable = labels[continuous_vars]) + +# Rename the 'Overall' column to "Mean (SD)" to indicate the values +colnames(table2_df)[colnames(table2_df) == "Overall"] <- "Mean (SD)" + + +# View the final table +print(table2_df) +#question: weird results, how do we impute missing values? + diff --git a/r_scripts/12112024_Identify_genotypes_bridge_sample.R b/r_scripts/12112024_Identify_genotypes_bridge_sample.R new file mode 100644 index 0000000..9cfc342 --- /dev/null +++ b/r_scripts/12112024_Identify_genotypes_bridge_sample.R @@ -0,0 +1,117 @@ +#this script is to find the gentotypes that apply to the bridge subset of brick for the ash poster 2024 +library(dplyr) +library(readr) +library(tableone) + +#load dataset with genotypes +genotype <-read.csv("Z:/Aida_experiment/combined_BRICK_marjolein_with_genotype.csv") + +# Create a new column with rounded "Age_at_scan_y_T0X" values in the genotype DataFrame. This is needed for an exact match +genotype <- genotype %>% + mutate(Age_at_scan_y_T0X = round(Age_at_scan_y_T0X, 2)) + +# Define ages for each gender +ages_male <- c(9.94, 11.74, 12.73, 12.81, 13.94, 15.09, 15.22, 16.02, 16.65, 17.4, 17.48) +ages_female <- c(8.05, 9.16, 10.32, 13.72, 13.77, 14.85, 16.25) + +# Filter genotype DataFrame based on matching ages for males and females separately +matched_data_male <- genotype %>% + filter(Age_at_scan_y_T0X %in% ages_male & gender_BRICK == 1) %>% + select(Participant_Id, brick_genotype, Age_at_scan_y_T0X, gender_BRICK) + +matched_data_female <- genotype %>% + filter(Age_at_scan_y_T0X %in% ages_female & gender_BRICK == 2) %>% + select(Participant_Id, brick_genotype, Age_at_scan_y_T0X, gender_BRICK) + +# Assign to BRIDGE_male and BRIDGE_female DataFrames +BRIDGE_male <- matched_data_male +BRIDGE_female <- matched_data_female + +# Display the resulting DataFrames +print("BRIDGE_male:") +print(BRIDGE_male) + +print("BRIDGE_female:") +print(BRIDGE_female) + +#not an exact match. let's just compare it manually + +genotype_male <- genotype %>% filter(gender_BRICK == 1) + +genotype_female <- genotype %>% filter(gender_BRICK == 2) + +#now some relevant extra rows are the hydrea use row, Hb and HbF that are interesting for table 1 +#load the bridge subset of participants, I manually added gender, brick number and genotype to the bridge subset. I've also ran th script of 1162024Tables_paper_1.R and 1172024_Table2_Growthcurves_script +#beforehand +male_brick <- read.csv("Z:/Aida_experiment/ASH_poster_2024/12112024_TCV_BRICK_BRIDGE_gen_boys.csv") +female_brick <- read.csv("Z:/Aida_experiment/ASH_poster_2024/12112024_TCV_BRICK_BRIDGE_gen_girls.csv") + +#from df table1 I need "Hydrea_at_scan_T0" and from dataframe "table2_filtered" I need HB0. The NA values need to be kicked out. merge on Participant_Id into new df for males and females + +# Extract the relevant columns from table1, table2_filtered, and raw for males +table1_filtered_male <- table1 %>% + select(Participant_Id, Hydrea_at_scan_T0) %>% + filter(!is.na(Hydrea_at_scan_T0)) + +table2_filtered_male <- table2_filtered %>% + select(Participant_Id, HB0) %>% + filter(!is.na(HB0)) + +raw_filtered_male <- raw %>% + select(Participant_Id, HPLC_HbS_T0) %>% + filter(!is.na(HPLC_HbS_T0)) + +# Merge male data with additional columns +male_data_merged <- male_brick %>% + inner_join(table1_filtered_male, by = "Participant_Id") %>% + inner_join(table2_filtered_male, by = "Participant_Id") %>% + inner_join(raw_filtered_male, by = "Participant_Id") + +# Repeat the process for females +table1_filtered_female <- table1 %>% + select(Participant_Id, Hydrea_at_scan_T0) %>% + filter(!is.na(Hydrea_at_scan_T0)) + +table2_filtered_female <- table2_filtered %>% + select(Participant_Id, HB0) %>% + filter(!is.na(HB0)) + +raw_filtered_female <- raw %>% + select(Participant_Id, HPLC_HbS_T0) %>% + filter(!is.na(HPLC_HbS_T0)) + +# Merge female data with additional columns +female_data_merged <- female_brick %>% + inner_join(table1_filtered_female, by = "Participant_Id") %>% + inner_join(table2_filtered_female, by = "Participant_Id") %>% + inner_join(raw_filtered_female, by = "Participant_Id") + +# Display the resulting data frames +print("Male Data Merged:") +print(male_data_merged) + +print("Female Data Merged:") +print(female_data_merged) + + +#now make the dfs one and create a table 1 +# Merge male and female brick datasets into one, keeping the 'gender' column intact +combined_data <- bind_rows(male_data_merged, female_data_merged) + +# Define categorical and continuous variables +categorical_vars <- c("Hydrea_at_scan_T0", "genotype", "gender_brick") +continuous_vars <- c("AgeChild", "HB0", "HPLC_HbS_T0") + +# Use the table1 package to create a summary table +table1_summary <- CreateTableOne(~ Hydrea_at_scan_T0 + genotype + gender_brick + + AgeChild + HB0 + HPLC_HbS_T0, + data = combined_data, + render.categorical = "Freq", + render.continuous = c("Median", "IQR")) + +# Print the table1 summary +print(table1_summary) + + + + diff --git a/r_scripts/12112024_script_table1_ash_2024_poster.R b/r_scripts/12112024_script_table1_ash_2024_poster.R new file mode 100644 index 0000000..56d8b3d --- /dev/null +++ b/r_scripts/12112024_script_table1_ash_2024_poster.R @@ -0,0 +1,101 @@ +# in this script, the definite table 1 was made for the ash 2024 poster. Hb has 2 missing values so this shoul dbe adressed separately. +library(dplyr) +library(tableone) + +# Read male and female brick data +male_brick <- read.csv("Z:/Aida_experiment/ASH_poster_2024/12112024_TCV_BRICK_BRIDGE_gen_boys.csv") +female_brick <- read.csv("Z:/Aida_experiment/ASH_poster_2024/12112024_TCV_BRICK_BRIDGE_gen_girls.csv") + + +# Extract the relevant columns from table1, table2_filtered, and raw for males (with NA included) +table1_filtered_male_withna <- table1 %>% + select(Participant_Id, Hydrea_at_scan_T0) + +table2_filtered_male_withna <- table2_filtered %>% + select(Participant_Id, HB0) + +raw_filtered_male_withna <- raw %>% + select(Participant_Id, HPLC_HbS_T0) + +# Merge male data with additional columns (keeping NAs) +male_data_merged_withna <- male_brick %>% + inner_join(table1_filtered_male_withna, by = "Participant_Id") %>% + inner_join(table2_filtered_male_withna, by = "Participant_Id") %>% + inner_join(raw_filtered_male_withna, by = "Participant_Id") + +# Repeat the process for females (with NA included) +table1_filtered_female_withna <- table1 %>% + select(Participant_Id, Hydrea_at_scan_T0) + +table2_filtered_female_withna <- table2_filtered %>% + select(Participant_Id, HB0) + +raw_filtered_female_withna <- raw %>% + select(Participant_Id, HPLC_HbS_T0) + +# Merge female data with additional columns (keeping NAs) +female_data_merged_withna <- female_brick %>% + inner_join(table1_filtered_female_withna, by = "Participant_Id") %>% + inner_join(table2_filtered_female_withna, by = "Participant_Id") %>% + inner_join(raw_filtered_female_withna, by = "Participant_Id") + +# Combine male and female data into one dataset +combined_data_withna <- bind_rows(male_data_merged_withna, female_data_merged_withna) + +# Check for missing values in each column and count the number of observations +missing_values <- combined_data_withna %>% + summarise(across(everything(), + list(Missing = ~sum(is.na(.)), + Observed = ~sum(!is.na(.))))) + +# Print missing values and observed counts for each variable +print("Missing and Observed Values for Each Variable:") +print(missing_values) + +# Only in Hb0 2 values are missing! Keep the 18 for the rest of table1. We keep the table1 script from + +# Define categorical and continuous variables (excluding HB0 from Table 1) +categorical_vars_withna <- c("Hydrea_at_scan_T0", "genotype", "gender_brick") +continuous_vars_withna <- c("AgeChild", "HPLC_HbS_T0") # Exclude HB0 from here + +# Create Table 1 summary without HB0 +table1_summary_wna <- CreateTableOne( + vars = c("Hydrea_at_scan_T0", "genotype", "gender_brick", + "AgeChild", "HPLC_HbS_T0"), # Include all variables except HB0 + data = combined_data_withna, + factorVars = categorical_vars_withna # Specify categorical variables +) + +# Print the Table 1 summary +print("Table 1 Summary (Excluding HB0):") +print(table1_summary_wna) + +# Create a custom summary for the continuous variables (mean and SD for AgeChild, median and IQR for HPLC_HbS_T0) +custom_summary_withna <- combined_data_withna %>% + summarise( + AgeChild_mean = mean(AgeChild, na.rm = TRUE), # Mean of AgeChild + AgeChild_sd = sd(AgeChild, na.rm = TRUE), # SD of AgeChild + HPLC_HbS_T0_median = median(HPLC_HbS_T0, na.rm = TRUE), # Median of HPLC_HbS_T0 + HPLC_HbS_T0_IQR = IQR(HPLC_HbS_T0, na.rm = TRUE) # IQR of HPLC_HbS_T0 + ) + +# Print the custom summary +print("Custom Summary (Mean, SD for AgeChild; Median, IQR for HPLC_HbS_T0):") +print(custom_summary_withna) + +# Calculate the mean and SD of HB0, excluding NAs (missing values) +hb0_summary <- combined_data_withna %>% + summarise( + HB0_mean = mean(HB0, na.rm = TRUE), # Mean excluding NAs + HB0_sd = sd(HB0, na.rm = TRUE) # SD excluding NAs + ) + +# Print the summary for HB0 +print("Summary of HB0 (Excluding Missing Data):") +print(hb0_summary) + +# Add a footnote about missing HB0 values +footnote <- "Note: For the variable 'HB0', only data from 16/18 participants was included due to missing values." + +# Print the footnote +print(footnote) diff --git a/r_scripts/8112024_Script_missing_lab_values_T0.R b/r_scripts/8112024_Script_missing_lab_values_T0.R new file mode 100644 index 0000000..9b3c1a9 --- /dev/null +++ b/r_scripts/8112024_Script_missing_lab_values_T0.R @@ -0,0 +1,18 @@ +### this script is meant for missing lab identification, so that we can check in castor,First run the script 1172024_Table2_Growthcurves_script_attempt.R + +library(dplyr) +library(tidyr) +library(writexl) + +# Identify missing values for each Participant_Id, excluding `reticulocyte_count_percentage_1` +missing_values <- table2_filtered %>% + pivot_longer(cols = -Participant_Id, names_to = "Variable", values_to = "Value") %>% # Transform to long format + filter(is.na(Value) & Variable != "reticulocyte_count_percentage_1") %>% # Filter for missing values, excluding reticulocyte_count_percentage_1 + select(Participant_Id, Variable) # Select only relevant columns + +# Write the result to an Excel file +write_xlsx(missing_values, path = "Z:/Aida_experiment/Growthcurves_paper/Uittezoeken/missing_values_per_participant.xlsx") + + +# Display the result +print(missing_values) diff --git a/r_scripts/8112024_Table3_volbrain_stats.R b/r_scripts/8112024_Table3_volbrain_stats.R new file mode 100644 index 0000000..c0739ac --- /dev/null +++ b/r_scripts/8112024_Table3_volbrain_stats.R @@ -0,0 +1,93 @@ +library(dplyr) +library(writexl) +library(tidyr) + +# Read in the raw data file, ages, and volbrain data +raw <- read.csv("Z:/Aida_experiment/6112024_brick_castor_data/BRICK_export_20241106.csv", sep= ";") +ages <- read.csv("Z:/Aida_experiment/combined_BRICK_marjolein.csv", sep=",") #this file contains the ages and recon-all data +volbrain <- read.csv("Z:/castor_proof_files/csv_castor/current/volbrains_castor.csv") #contains volbrain output + +# Rename participant id for later merge +volbrain <- volbrain %>% rename(Participant_Id = Participant.Id) + +# Rename first columns in the raw dataset for merging +raw <- raw %>% rename(Participant_Id = `ï..Participant.Id`) + +# Table 4: White matter hyperintensities +table4 <- volbrain %>% + select( + Participant_Id, dl_Sex_T0, dl_Age_T0, dl_Quality_control_T1_T0, dl_Quality_control_FLAIR_T0, + dl_Total_lesion_count_T0, dl_Total_lesion_volume_.absolute._cm3_T0, + dl_Total_lesion_volume_.normalized._._T0, dl_Total_lesion_burden_T0, + dl_Periventricular_lesion_count_T0, dl_Periventricular_lesion_volume_.absolute._cm3_T0, + dl_Periventricular_lesion_volume_.normalized._._T0, dl_Periventricular_lesion_burden_T0, + dl_Deep_white_lesion_count_T0, dl_Deep_white_lesion_volume_.absolute._cm3_T0, + dl_Deep_white_lesion_volume_.normalized._._T0, dl_Deep_white_lesion_burden_T0, + dl_Juxtacortical_lesion_count_T0, dl_Juxtacortical_lesion_volume_.absolute._cm3_T0, + dl_Juxtacortical_lesion_volume_.normalized._._T0, dl_Juxtacortical_lesion_burden_T0, + dl_Infratentorial_lesion_count_T0, dl_Infratentorial_lesion_volume_.absolute._cm3_T0, + dl_Infratentorial_lesion_volume_.normalized._._T0, dl_Infratentorial_lesion_burden_T0 + ) + +# Merge Exclude_Score_QC_T1w_T0 from raw dataset into table4 +table4_merged <- table4 %>% + left_join(raw %>% select(Participant_Id, Exclude_Score_QC_T1w_T0), by = "Participant_Id") + +# Clean the quality control columns: Convert to character and trim whitespace +table4_merged <- table4_merged %>% + mutate( + dl_Quality_control_T1_T0 = trimws(as.character(dl_Quality_control_T1_T0)), + dl_Quality_control_FLAIR_T0 = trimws(as.character(dl_Quality_control_FLAIR_T0)) + ) + +# Now exclude the low-quality scans based on the quality control columns ("C") or Exclude_Score_QC_T1w_T0 being 1 +table4_volbrain_df <- table4_merged %>% + filter( + !(grepl("C", dl_Quality_control_T1_T0) | grepl("C", dl_Quality_control_FLAIR_T0)) & + Exclude_Score_QC_T1w_T0 != 1 + ) + +# View the filtered table +print(table4_volbrain_df) + +#now create a table with volbrains descriptives +# Define the continuous variables for which you want to calculate means and SDs +continuous_vars <- c( + "dl_Age_T0", + "dl_Total_lesion_count_T0", + "dl_Total_lesion_volume_.absolute._cm3_T0", + "dl_Total_lesion_volume_.normalized._._T0", + "dl_Total_lesion_burden_T0", + "dl_Periventricular_lesion_count_T0", + "dl_Periventricular_lesion_volume_.absolute._cm3_T0", + "dl_Periventricular_lesion_volume_.normalized._._T0", + "dl_Periventricular_lesion_burden_T0", + "dl_Deep_white_lesion_count_T0", + "dl_Deep_white_lesion_volume_.absolute._cm3_T0", + "dl_Deep_white_lesion_volume_.normalized._._T0", + "dl_Deep_white_lesion_burden_T0", + "dl_Juxtacortical_lesion_count_T0", + "dl_Juxtacortical_lesion_volume_.absolute._cm3_T0", + "dl_Juxtacortical_lesion_volume_.normalized._._T0", + "dl_Juxtacortical_lesion_burden_T0", + "dl_Infratentorial_lesion_count_T0", + "dl_Infratentorial_lesion_volume_.absolute._cm3_T0", + "dl_Infratentorial_lesion_volume_.normalized._._T0", + "dl_Infratentorial_lesion_burden_T0" +) + +# Define the categorical variables if needed +categorical_vars <- c("dl_Sex_T0") # If you want to include categorical variables like sex + +# Create the descriptive table and assign it to table4_volbrain +table4_volbrain <- CreateTableOne( + vars = c(continuous_vars, categorical_vars), # Include both continuous and categorical variables + data = table4_volbrain_df, # The dataset to summarize + factorVars = categorical_vars, # Specify categorical variables + includeNA = TRUE # Include NA values in the table if desired +) + +# Print the table +print(table4_volbrain) + + diff --git a/r_scripts/8112024_create_table4_fs_recon-all.R b/r_scripts/8112024_create_table4_fs_recon-all.R new file mode 100644 index 0000000..0d680f3 --- /dev/null +++ b/r_scripts/8112024_create_table4_fs_recon-all.R @@ -0,0 +1,83 @@ +library(dplyr) +library(writexl) +library(tidyr) + +# Read in the raw data file, ages, and volbrain data +raw <- read.csv("Z:/Aida_experiment/6112024_brick_castor_data/BRICK_export_20241106.csv", sep= ";") +ages <- read.csv("Z:/Aida_experiment/combined_BRICK_marjolein.csv", sep=",") #this file contains the ages and recon-all data +volbrain <- read.csv("Z:/castor_proof_files/csv_castor/current/volbrains_castor.csv") #contains volbrain output + +# Rename first columns in the raw dataset for merging +raw <- raw %>% rename(Participant_Id = `ï..Participant.Id`) + +# Rename participant id for later merge +ages <- ages %>% rename(Participant_Id = Participant.Id) + + +#Table 5: Recon-all volumetrics (white, grey and subcortical matter) with z-scores for 2 different kinds of growth charts. +# Calculate the Total Cortical Volume and Total Cerebellar Volume +table5 <- ages %>% + mutate( + Total_Cortical_Volume = CortexVol + Left.Cerebellum.Cortex + Right.Cerebellum.Cortex, + Total_Cerebellar_Volume = Left.Cerebellum.Cortex + Right.Cerebellum.Cortex + ) %>% + + # Select relevant columns for the table and rename for clarity + select( + `Participant_Id` = Participant_Id, + `Age (years)` = Age_at_scan_y_T0X, + `Gender` = gender_BRICK, + + # Total Volumes + `Total Cortical Volume` = Total_Cortical_Volume, + `Total White Matter Volume` = CerebralWhiteMatterVol, + `Total Gray Matter Volume` = TotalGrayVol, + `Total Cerebellar Volume` = Total_Cerebellar_Volume, + + # Subcortical Structures + `Thalamus Left` = Left.Thalamus.Proper, + `Thalamus Right` = Right.Thalamus.Proper, + `Caudate Left` = Left.Caudate, + `Caudate Right` = Right.Caudate, + `Putamen Left` = Left.Putamen, + `Putamen Right` = Right.Putamen, + `Pallidum Left` = Left.Pallidum, + `Pallidum Right` = Right.Pallidum, + `Hippocampus Left` = Left.Hippocampus, + `Hippocampus Right` = Right.Hippocampus + ) + + +#merge table5 with quality control column +table5_merged <- table5 %>% + left_join(raw %>% select(Participant_Id, Exclude_Score_QC_T1w_T0), by = "Participant_Id") + + +#throw out the low-quality scans +table5_fs <- table5_merged %>% + filter( + Exclude_Score_QC_T1w_T0 != 1 + ) +#now create the descriptive table + +# Exclude Age (years) and Participant_Id from the variables list +vars <- c( + "Total Cortical Volume", + "Total White Matter Volume", + "Total Gray Matter Volume", + "Total Cerebellar Volume", + "Thalamus Left", "Thalamus Right", + "Caudate Left", "Caudate Right", + "Putamen Left", "Putamen Right", + "Pallidum Left", "Pallidum Right", + "Hippocampus Left", "Hippocampus Right" +) + +# Using the tableone package to create the summary table with means and standard deviations +library(tableone) + +# Creating a summary table using CreateTableOne +table_summary <- CreateTableOne(vars = vars, data = table5_fs, factorVars = c("Gender")) + +# Print the table +print(table_summary) diff --git a/r_scripts/fs_table_marjolein_abstract_28102024.R b/r_scripts/fs_table_marjolein_abstract_28102024.R new file mode 100644 index 0000000..010f4af --- /dev/null +++ b/r_scripts/fs_table_marjolein_abstract_28102024.R @@ -0,0 +1,81 @@ +library(dplyr) +#this script is for preparing a table for Marjolein to plot in the references curves and pull out z-scores +#load relevant datasets +brick_fs <- read.csv("Z:/processed_data/freesurfer_stats/brain_volumes_from_freesurfer_no_qc.csv") #freesurfer output all brick subjects +Koppelfile <-read.csv("Z:/Aida_experiment/Part_ID_PID.csv", sep= ";") #contain PID and BRICK Participant ID +Marjolein_file <- read.csv("Z:/Aida_experiment/SCD_output_vol_table_BRIDGE_Marjolein.csv", sep= ",") #contains BRIDGE data and PID +age_check <- read.csv("Z:/castor_proof_files/csv_castor/current/HbF_TCD_age_months_T020082024_ano_pres_castor.csv",sep=";")#contains ages of brick participants in months +permissions <- read.csv("Z:/Aida_experiment/BRICK_export_permissions_20241029.csv", sep=";") + +# Perform a left join to keep all rows from Koppelfile, adding "SubjectID" from Marjolein_file where PID matches +brickbridge_fs <- merge(Koppelfile, Marjolein_file[, c("PID", "SubjectID")], by = "PID", all.x = TRUE) + +# Remove the PID column +brickbridge_fs <- brickbridge_fs[, !names(brickbridge_fs) %in% "PID"] + +# Move "SubjectID" to the first column +brickbridge_fs <- brickbridge_fs[, c("SubjectID", setdiff(names(brickbridge_fs), "SubjectID"))] + +# Replace hyphens with underscores in Participant.ID of brick_fs +brick_fs$Participant.ID <- gsub("-", "_", brick_fs$Participant.ID) + +# Perform a left join to combine brickbridge_fs with brick_fs based on matching Participant IDs +combined_brick <- merge(brickbridge_fs, brick_fs, by.x = "Participant.Id", by.y = "Participant.ID", all = TRUE) + +#now add the age in months to this df + +# Rename the first column of age_check to Participant.Id +colnames(age_check)[1] <- "Participant.Id" + +# Trim whitespace and standardize case in both data frames +combined_brick$Participant.Id <- trimws(toupper(combined_brick$Participant.Id)) +age_check$Participant.Id <- trimws(toupper(age_check$Participant.Id)) + + +# Merge combined_brick with age_check based on Participant.Id +# Merge combined_brick with age_check based on Participant.Id, including both age and gender columns +combined_brick <- merge(combined_brick, age_check[, c("Participant.Id", "Age_at_scan_m_T0X", "gender_BRICK")], + by = "Participant.Id", all.x = TRUE) + +# Remove the column called "X" if it exists +combined_brick <- combined_brick[, !names(combined_brick) %in% "X"] + +# Create the Age_at_scan_y_T0X column (age in years) +combined_brick$Age_at_scan_y_T0X <- combined_brick$Age_at_scan_m_T0X / 12 + +# Reorder the columns to place Age_at_scan_m_T0X as the third column, +# Age_at_scan_y_T0X as the fourth column, and gender_BRICK as the fifth column +column_order <- c(names(combined_brick)[1:2], "Age_at_scan_m_T0X", "Age_at_scan_y_T0X", "gender_BRICK", + setdiff(names(combined_brick), c("Participant.Id", "Age_at_scan_m_T0X", "Age_at_scan_y_T0X", "gender_BRICK"))) +combined_brick <- combined_brick[, column_order] + +# Remove duplicates based on Participant.Id while keeping the first occurrence .That would be brick 3 and 9 +combined_brick <- combined_brick %>% + distinct(Participant.Id, .keep_all = TRUE) + +#add the permissions from castor export +# Rename the column in permissions +colnames(permissions)[colnames(permissions) == "ï..Participant.Id"] <- "Participant Id" + +# Merge the two data frames, keeping only the necessary column from permissions +combined_brick <- merge( + combined_brick, + permissions[, c("Participant Id", "permission_data_sharing_for_other_studies")], + by.x = "Participant.Id", + by.y = "Participant Id", + all.x = TRUE +) + +# Reorder columns to place "permission_data_sharing_for_other_studies" as the sixth column +combined_brick <- combined_brick[, c(1:5, ncol(combined_brick), 6:(ncol(combined_brick) - 1))] + +#it seems we have a duplicae SubjectID column, remove this +combined_brick$SubjectID.1 <- NULL + + +# Write the updated combined data to a CSV file +write.csv(combined_brick, "Z:/Aida_experiment/combined_BRICK_marjolein.csv", row.names = FALSE) + + + + diff --git a/r_scripts/fs_table_marjolein_abstract_with_genotype_8112024.R b/r_scripts/fs_table_marjolein_abstract_with_genotype_8112024.R new file mode 100644 index 0000000..07e3287 --- /dev/null +++ b/r_scripts/fs_table_marjolein_abstract_with_genotype_8112024.R @@ -0,0 +1,43 @@ +## this script is for adding the genotype column to the original file I sent to Marjolein for the growth charts paper + +library(dplyr) + +ages <-read.csv("Z:/Aida_experiment/combined_BRICK_marjolein.csv", sep=",") #this file contains the ages and recon-all data, as I apparently cannot export calculated fields from castor +raw <- read.csv("Z:/Aida_experiment/6112024_brick_castor_data/BRICK_export_20241106.csv", sep= ";") #contains a castor-export + +#select the genotype colum from the raw df +genotype <- raw %>% select(Participant_Id, brick_genotype) + +#rename first columns in order to merge later. +raw <- raw %>% + rename(Participant_Id = `ï..Participant.Id`) + +ages <- ages %>% + rename(Participant_Id = Participant.Id) + +#now link the brick_genotype to the data in the ages df and put it as a sixth column +# Merge the genotype with the ages dataset by Participant_Id +ages_with_genotype <- ages %>% + left_join(genotype, by = "Participant_Id") + +# Reorder columns to place 'brick_genotype' as the sixth column +ages_with_genotype <- ages_with_genotype %>% + select(1:5, brick_genotype, everything()) + +# Add the 'severe_genotype' column: 1 for HbSS and HbSb0, 0 for the others +ages_with_genotype <- ages_with_genotype %>% + mutate(severe_genotype = case_when( + brick_genotype == "HbSS" ~ 1, + brick_genotype == "HbSb0" ~ 1, + TRUE ~ 0 + )) + +# Reorder columns to place 'severe_genotype' after 'brick_genotype' +ages_with_genotype <- ages_with_genotype %>% + select(1:6, severe_genotype, everything()) + +# Check the first few rows to ensure it looks correct +head(ages_with_genotype) + +# Optionally, save the new dataset with the added genotype column +write.csv(ages_with_genotype, "Z:/Aida_experiment/combined_BRICK_marjolein_with_genotype.csv", row.names = FALSE) \ No newline at end of file diff --git a/r_scripts/organize_BRIDGE_data.R b/r_scripts/organize_BRIDGE_data.R new file mode 100644 index 0000000..060c4ae --- /dev/null +++ b/r_scripts/organize_BRIDGE_data.R @@ -0,0 +1,84 @@ +library(dplyr) +Koppelfile <-read.csv("Z:/Aida_experiment/Part_ID_PID.csv", sep= ";") +Marjolein_file <- read.csv("Z:/Aida_experiment/SCD_output_vol_table_BRIDGE_Marjolein.csv", sep= ",") + +# Join the datasets based on "PID" and bring in "Participant Id" from Koppelfile +combined_data <- merge(Marjolein_file, Koppelfile[, c("PID", "Participant.Id")], by = "PID", all.x = TRUE) + + +#remove PID from the resulting file +combined_data <- combined_data[, !names(combined_data) %in% "PID"] + +# Move "Participant Id" to the first column +combined_data <- combined_data[, c("Participant.Id", setdiff(names(combined_data), "Participant.Id"))] + +# Write combined_data to a CSV file in the specified directory +write.csv(combined_data, "Z:/Aida_experiment/BRIDGE_T0.csv", row.names = FALSE) + +## + +#vergelijken freesurfer met marjolein data +brick_freesurfer <- read.csv("Z:/processed_data/freesurfer_stats/brain_volumes_from_freesurfer_no_qc.csv") + +# Replace "-" with "_" in the "Participant Id" column of brick_freesurfer +brick_freesurfer$`Participant Id` <- gsub("-", "_", brick_freesurfer$`Participant Id`) + +# Rename the 'Participant.Id' column to 'Participant Id' in combined_data +colnames(combined_data)[colnames(combined_data) == "Participant.Id"] <- "Participant Id" +colnames(brick_freesurfer)[colnames(brick_freesurfer) == "Participant.Id"] <- "Participant Id" + + +# First, rename the "Participant Id" columns in both dataframes +colnames(brick_freesurfer)[colnames(brick_freesurfer) == "Participant Id"] <- "Participant Id_brick" +colnames(combined_data)[colnames(combined_data) == "Participant Id"] <- "Participant Id_bridge" + +# Replace "-" with "_" in the "Participant Id" column of brick_freesurfer +brick_freesurfer$`Participant Id_brick` <- gsub("-", "_", brick_freesurfer$`Participant Id_brick`) + +# Now merge the two dataframes on the renamed Participant Id columns +merged_data <- merge( + brick_freesurfer[, c("Participant Id_brick", "Right.Amygdala")], + combined_data[, c("Participant Id_bridge", "Right.Amygdala")], + by.x = "Participant Id_brick", + by.y = "Participant Id_bridge", + suffixes = c("_brick", "_bridge") +) + +# View the resulting merged dataframe +head(merged_data) + +###twice BRICK003 appears here. check for duplicates in Marjolein_file. Dit zijn 5873683 1192976 BRICK_009 en BRICK_003 +# Extract the duplicate PID values +duplicate_values <- Marjolein_file$PID[duplicated(Marjolein_file$PID) | duplicated(Marjolein_file$PID, fromLast = TRUE)] + +# Remove duplicates from the result to get unique duplicate values +unique_duplicates <- unique(duplicate_values) + +# Display the unique duplicate values +print(unique_duplicates) + +# now check ages. everything else looks fin +age_check <- read.csv("Z:/castor_proof_files/csv_castor/current/HbF_TCD_age_months_T020082024_ano_pres_castor.csv",sep=";") + +# Step 1: Rename the 'ï..BRICK' column for clarity (optional) +colnames(age_check)[colnames(age_check) == "ï..BRICK"] <- "Participant_Id" + +# Step 2: Merge the two dataframes on the relevant columns +merged_data <- merge( + age_check[, c("Participant_Id", "Age_at_scan_y_T0X")], + combined_data[, c("Participant Id_bridge", "Age.at.time.of.Scan")], + by.x = "Participant_Id", + by.y = "Participant Id_bridge", + suffixes = c("_age_check", "_combined") +) + +# Step 3: Compare the age columns +comparison_result <- merged_data[, c("Participant_Id", "Age_at_scan_y_T0X", "Age.at.time.of.Scan")] +comparison_result$Age_Match <- comparison_result$Age_at_scan_y_T0X == comparison_result$Age.at.time.of.Scan + +# Display the comparison results +print(comparison_result) + +#marjoleins data is niet in maanden uitgedrukt maar in . nogwat. ziet er goed genoeg uit wmb + + diff --git a/r_scripts/subset_key_table_castor.R b/r_scripts/subset_key_table_castor.R new file mode 100644 index 0000000..5ef0e52 --- /dev/null +++ b/r_scripts/subset_key_table_castor.R @@ -0,0 +1,9 @@ +#here, we prepare a key table for upload in castor, participation in other studies +library(dplyr) +short_key_table <- read.csv("Z:/castor_proof_files/brick_score_key_26_4_2024.csv") +subset_key_table <- short_key_table[,1:10] +print(subset_key_table) + +write.csv(subset_key_table, "Z:/castor_proof_files/brick_subset_key_table102024.csv", row.names = FALSE) + +