From bbac9f1c23f3cf7c9d7cd07e23d29e0408036466 Mon Sep 17 00:00:00 2001
From: Candace Moore <doctormakeda@gmail.com>
Date: Thu, 14 Nov 2024 07:48:57 +0100
Subject: [PATCH 1/2] properly branched

---
 .../1112024_try_replace_colnames_scorecsvs.R  | 64 +++++++++++++++++++
 1 file changed, 64 insertions(+)
 create mode 100644 r_scripts/1112024_try_replace_colnames_scorecsvs.R

diff --git a/r_scripts/1112024_try_replace_colnames_scorecsvs.R b/r_scripts/1112024_try_replace_colnames_scorecsvs.R
new file mode 100644
index 0000000..499d712
--- /dev/null
+++ b/r_scripts/1112024_try_replace_colnames_scorecsvs.R
@@ -0,0 +1,64 @@
+#Here, we would like to change the Participant Id numbers in the SCORE export repeating data files to BRICK participant Ids. This way, we can upload it to the BRICK castor
+#here we read in the score repeating data files
+library(dplyr)
+
+# Read the subset key table and rename the Participant.Id to BRICK_Id
+subset_key_table <- read.csv("Z:/castor_proof_files/csv_castor/current/brick_subset_key_table102024.csv") %>%
+  rename(BRICK_Id = Participant.Id)
+
+# Read in all of the repeating data files from SCORE #path is for windows machine /mnt/data on linux!.
+# Read in all of the repeating data files from SCORE
+file_paths <- list(
+  visual_hearing = "Z:/score_clinical_manifestationTOBEREVIEWED/SCORE_RADeep-registry__Visual_and_hearing_disease_Medical_History_Clinical_manifestati_export_20240717.csv",
+  acute_complications = "Z:/score_clinical_manifestationTOBEREVIEWED/SCORE_RADeep-registry__Acute_complications_export_20240717.csv",
+  bone_extremities = "Z:/score_clinical_manifestationTOBEREVIEWED/SCORE_RADeep-registry__Bone_and_extremities_Medical_History_Clinical_manifestations_export_20240717.csv",
+  cardiac_pulmonary = "Z:/score_clinical_manifestationTOBEREVIEWED/SCORE_RADeep-registry__Cardiac_and_pulmonary_disease_Medical_History_Clinical_manifest_export_20240717.csv",
+  comorbidities = "Z:/score_clinical_manifestationTOBEREVIEWED/SCORE_RADeep-registry__Comorbidities_export_20240717.csv",
+  endocrinological = "Z:/score_clinical_manifestationTOBEREVIEWED/SCORE_RADeep-registry__Endocrinological_disease_Medical_History_Clinical_manifestation_export_20240717.csv",
+  registry = "Z:/score_clinical_manifestationTOBEREVIEWED/SCORE_RADeep-registry__export_20240717.csv",
+  liver_kidney = "Z:/score_clinical_manifestationTOBEREVIEWED/SCORE_RADeep-registry__Liver_and_kidney_disease_Medical_History_Clinical_manifestation_export_20240717.csv",
+  neurological = "Z:/score_clinical_manifestationTOBEREVIEWED/SCORE_RADeep-registry__Neurological_disease_Medical_History_Clinical_manifestations_export_20240717.csv",
+  specific_treatment = "Z:/score_clinical_manifestationTOBEREVIEWED/SCORE_RADeep-registry__Treatment_Use_of_specific_treatment_or_inclusion_in_Clinical_Tr_export_20240717.csv",
+  chelation_treatment = "Z:/score_clinical_manifestationTOBEREVIEWED/SCORE_RADeep-registry__Treatments_chelation_export_20240717.csv",
+  hydroxyurea_treatment = "Z:/score_clinical_manifestationTOBEREVIEWED/SCORE_RADeep-registry__Treatments_hydroxyurea_export_20240717.csv",
+  visit = "Z:/score_clinical_manifestationTOBEREVIEWED/SCORE_RADeep-registry__Visit_export_20240717.csv"
+)
+
+# Read each file into a data frame and store in df_list
+df_list <- lapply(file_paths, function(path) {
+  read.csv(path, sep = ";", stringsAsFactors = FALSE)
+})
+
+# Rename the first column in each data frame to RADeep_id
+df_list <- lapply(df_list, function(df) {
+  colnames(df)[1] <- "RADeep_id"
+  df
+})
+
+# Merge each data frame with subset_key_table, bringing in BRICK_Id and renaming it to Participant Id
+df_list <- lapply(df_list, function(df) {
+  df %>%
+    left_join(subset_key_table, by = "RADeep_id") %>%  # Join on RADeep_id
+    mutate(`Participant Id` = BRICK_Id) %>%  # Create Participant Id from BRICK_Id
+    select(`Participant Id`, everything(), -BRICK_Id)  # Arrange columns accordingly
+})
+
+# Optionally assign back to original data frame names
+names(df_list) <- names(file_paths)
+
+# Assign back to individual data frames if needed
+list2env(df_list, envir = .GlobalEnv)
+
+# Define the output directory for the updated CSV files
+output_dir <- "Z:/castor_proof_files/csv_castor/current/"
+
+# Ensure the output directory exists
+dir.create(output_dir, showWarnings = FALSE)
+
+# Write each data frame to a CSV file in the output directory
+for (name in names(df_list)) {
+  write.csv(df_list[[name]], file = paste0(output_dir, name, "_updated.csv"), row.names = FALSE)
+}
+
+# Confirm file creation
+cat("CSV files have been created in:", output_dir, "\n")

From 89a951c8c33f7e6838953b464aa9b82c174f0e6b Mon Sep 17 00:00:00 2001
From: Candace Moore <doctormakeda@gmail.com>
Date: Sat, 23 Nov 2024 12:14:29 +0100
Subject: [PATCH 2/2] add aditional R scripts

---
 dwi_2_environment.yaml                        |  21 +++
 dwi_3_environment.yaml                        |  22 +++
 r_scripts/1162024Tables_paper_1.R             | 154 ++++++++++++++++++
 ...72024_Table2_Growthcurves_script_attempt.R |  78 +++++++++
 ...2112024_Identify_genotypes_bridge_sample.R | 117 +++++++++++++
 .../12112024_script_table1_ash_2024_poster.R  | 101 ++++++++++++
 .../8112024_Script_missing_lab_values_T0.R    |  18 ++
 r_scripts/8112024_Table3_volbrain_stats.R     |  93 +++++++++++
 .../8112024_create_table4_fs_recon-all.R      |  83 ++++++++++
 .../fs_table_marjolein_abstract_28102024.R    |  81 +++++++++
 ...marjolein_abstract_with_genotype_8112024.R |  43 +++++
 r_scripts/organize_BRIDGE_data.R              |  84 ++++++++++
 r_scripts/subset_key_table_castor.R           |   9 +
 13 files changed, 904 insertions(+)
 create mode 100644 dwi_2_environment.yaml
 create mode 100644 dwi_3_environment.yaml
 create mode 100644 r_scripts/1162024Tables_paper_1.R
 create mode 100644 r_scripts/1172024_Table2_Growthcurves_script_attempt.R
 create mode 100644 r_scripts/12112024_Identify_genotypes_bridge_sample.R
 create mode 100644 r_scripts/12112024_script_table1_ash_2024_poster.R
 create mode 100644 r_scripts/8112024_Script_missing_lab_values_T0.R
 create mode 100644 r_scripts/8112024_Table3_volbrain_stats.R
 create mode 100644 r_scripts/8112024_create_table4_fs_recon-all.R
 create mode 100644 r_scripts/fs_table_marjolein_abstract_28102024.R
 create mode 100644 r_scripts/fs_table_marjolein_abstract_with_genotype_8112024.R
 create mode 100644 r_scripts/organize_BRIDGE_data.R
 create mode 100644 r_scripts/subset_key_table_castor.R

diff --git a/dwi_2_environment.yaml b/dwi_2_environment.yaml
new file mode 100644
index 0000000..11367d7
--- /dev/null
+++ b/dwi_2_environment.yaml
@@ -0,0 +1,21 @@
+# dwi environment
+
+
+name: witty
+channels:
+  - conda-forge
+  
+dependencies:
+  - dipy=1.7.0
+  - fury=0.7.1
+  - jupyter
+  - jupyterlab
+  - matplotlib=3.5.3
+  - nilearn=0.7.0
+  - osfclient=0.0.5
+  - python>=3.10
+  - pybids
+  
+# if environment does not resolve try installing everything but pybids, then `conda install conda-forge::pybids`
+
+
diff --git a/dwi_3_environment.yaml b/dwi_3_environment.yaml
new file mode 100644
index 0000000..11a1409
--- /dev/null
+++ b/dwi_3_environment.yaml
@@ -0,0 +1,22 @@
+# dwi environment
+
+
+name: wittier
+channels:
+  - conda-forge
+  
+dependencies:
+  - dipy=1.7.0
+  - fury=0.7.1
+  - jupyter
+  - jupyterlab
+  - matplotlib=3.5.3
+  - nilearn=0.7.0
+  - numexpr=2.8.4 
+  - osfclient=0.0.5
+  - python>=3.10
+  - pybids
+  
+# if environment does not resolve try installing everything but pybids, then `conda install conda-forge::pybids`
+
+
diff --git a/r_scripts/1162024Tables_paper_1.R b/r_scripts/1162024Tables_paper_1.R
new file mode 100644
index 0000000..0e0631b
--- /dev/null
+++ b/r_scripts/1162024Tables_paper_1.R
@@ -0,0 +1,154 @@
+## in this script, I will try to clean up the dataexport of castor on november 11th. At this thime, score repeated data, volbrain data and freesurfer data have not been uploaded yet
+#weirdly enough, the age_T0_MRI_months is exported incorrectly by castor. I decided to limit my next export to just the baseline_MRI form at T0 and to merge it later
+library(dplyr)
+library(writexl) #to write the tables to excel files
+#read in the raw data file. For now in my personal folder, will get moved around when we arrive at the final datastructure
+raw <- read.csv("Z:/Aida_experiment/6112024_brick_castor_data/BRICK_export_20241106.csv", sep= ";")
+ages <-read.csv("Z:/Aida_experiment/combined_BRICK_marjolein.csv", sep=",") #this file contains the ages and recon-all data, as I apparently cannot export calculated fields from castor
+volbrain <- read.csv("Z:/castor_proof_files/csv_castor/current/volbrains_castor.csv") #contains volbrain output
+
+#rename first columns in order to merge later.
+raw <- raw %>%
+  rename(Participant_Id = `ï..Participant.Id`)
+
+ages <- ages %>% 
+  rename(Participant_Id = Participant.Id)
+
+volbrain <- volbrain %>% 
+  rename(Participant_Id = Participant.Id)
+
+#I decided not to clean it up. Instead. Just start with creating the tables.
+
+table1 <- raw %>% select(Participant_Id, Hydrea_at_scan_T0, brick_genotype, mothersbirth, fathersbirth)
+
+#import age in months from the ages df
+table1 <- table1 %>%
+  left_join(select(ages, Participant_Id, Age_at_scan_m_T0X, gender_BRICK), by = "Participant_Id")
+
+#add a column in years. now we have the ingredients for table1
+table1 <- table1 %>%
+  mutate(Age_at_scan_years = round(Age_at_scan_m_T0X / 12, 1))
+
+#Table 2: Lab values including HbF
+
+table2 <- raw %>% select(Participant_Id, ERY0, HB0, MCV0, HT0, reticulocyte_count_percentage_1, LEU0, TROMBO0, FE0, FERT0, TRAF0, TSAT0, ALAT0, LDH0, TBIL0, DBIL0, KREA0, SCHWARTZ_Bedsite0, FOLZ0, UREU0, NA0, K00, VITD0, ASAT0, AFOS0, GGT0, CRP0, KREA_U0, TE_U0, TE_U_KR0, ALB_U0, ALB_U_KR0, B120, HPLC_HBF_T0, HPLC_HBF_date_T0, HPLC_HbS_T0, HPLC_HbS_date_T0)
+                      
+#Table 3: Descriptives on radiology reports
+table3 <- raw %>% select(Participant_Id,Measurement_moment_QC_T1w_T0, Score_QC_T1w_T0, Exclude_Score_QC_T1w_T0, Remarks_Score_QC_T1w_T0, Screened_for_WMH_T0, WMH_observed_T0, Vasculature_examined_T0, Vascular_malformations_T0, Microbleeds_screened_T0, Microbleeds_present_T0, Incidental_finding_T0, Marjolein_T0, Remarks_MRI_T0)
+
+#Table 4: White matter hyperintensities
+table4 <- volbrain %>%
+  select(
+    Participant_Id, dl_Sex_T0, dl_Age_T0,
+    dl_Total_lesion_count_T0, dl_Total_lesion_volume_.absolute._cm3_T0,
+    dl_Total_lesion_volume_.normalized._._T0, dl_Total_lesion_burden_T0,
+    dl_Periventricular_lesion_count_T0, dl_Periventricular_lesion_volume_.absolute._cm3_T0,
+    dl_Periventricular_lesion_volume_.normalized._._T0, dl_Periventricular_lesion_burden_T0,
+    dl_Deep_white_lesion_count_T0, dl_Deep_white_lesion_volume_.absolute._cm3_T0,
+    dl_Deep_white_lesion_volume_.normalized._._T0, dl_Deep_white_lesion_burden_T0,
+    dl_Juxtacortical_lesion_count_T0, dl_Juxtacortical_lesion_volume_.absolute._cm3_T0,
+    dl_Juxtacortical_lesion_volume_.normalized._._T0, dl_Juxtacortical_lesion_burden_T0,
+    dl_Infratentorial_lesion_count_T0, dl_Infratentorial_lesion_volume_.absolute._cm3_T0,
+    dl_Infratentorial_lesion_volume_.normalized._._T0, dl_Infratentorial_lesion_burden_T0
+  )
+#Table 5: Recon-all volumetrics (white, grey and subcortical matter) with z-scores for 2 different kinds of growth charts.
+# Calculate the Total Cortical Volume and Total Cerebellar Volume
+table5 <- ages %>%
+  mutate(
+    Total_Cortical_Volume = CortexVol + Left.Cerebellum.Cortex + Right.Cerebellum.Cortex,
+    Total_Cerebellar_Volume = Left.Cerebellum.Cortex + Right.Cerebellum.Cortex
+  ) %>%
+  
+  # Select relevant columns for the table and rename for clarity
+  select(
+    `Participant ID` = Participant_Id,
+    `Age (years)` = Age_at_scan_y_T0X,
+    `Gender` = gender_BRICK,
+    
+    # Total Volumes
+    `Total Cortical Volume` = Total_Cortical_Volume,
+    `Total White Matter Volume` = CerebralWhiteMatterVol,
+    `Total Gray Matter Volume` = TotalGrayVol,
+    `Total Cerebellar Volume` = Total_Cerebellar_Volume,
+    
+    # Subcortical Structures
+    `Thalamus Left` = Left.Thalamus.Proper,
+    `Thalamus Right` = Right.Thalamus.Proper,
+    `Caudate Left` = Left.Caudate,
+    `Caudate Right` = Right.Caudate,
+    `Putamen Left` = Left.Putamen,
+    `Putamen Right` = Right.Putamen,
+    `Pallidum Left` = Left.Pallidum,
+    `Pallidum Right` = Right.Pallidum,
+    `Hippocampus Left` = Left.Hippocampus,
+    `Hippocampus Right` = Right.Hippocampus
+  )
+#Table 5_1: Neuropsychologic outcomes(WISCV&WAISIV) + Education level parents
+#WiscV
+table6_wisc <- raw %>%
+  select(
+    Participant_Id,
+    scorePrimIndex_TIQ_S_1,    # Full Scale IQ
+    scorePrimIndex_VBI_IQ_1, # Verbal Comprehension Index
+    scorePrimIndex_VRI_IQ_1, # Visual Spatial Index
+    scorePrimIndex_FRI_IQ_1, # Fluid Reasoning Index
+    scorePrimIndex_WgI_IQ_1, # Working Memory Index
+    scorePrimIndex_VsI_IQ_1, # Processing Speed Index
+    scoreSecIndex_KRI_IQ_1,      # Quantitative Reasoning Index
+    scoreSecIndex_AWI_IQ_1,     # Auditory Working Memory Index
+    scoreSecIndex_NVI_IQ_1,      # Nonverbal Index
+    scoreSecIndex_AVI_IQ_1,      # General Ability Index
+   )
+
+#waisIV
+
+table6_wais <- raw %>%
+  select(
+    "Participant_Id",
+    "ScoreTIQ_1",             # Full Scale IQ (FSIQ)
+    "ScoreSomVBI_1",          # Verbal Comprehension Index (VCI)
+    "ScorePerVBI_1",          # Perceptual Verbal Comprehension Index (related to FRI)
+    "ScoreBIVBI_1",           # Blocked Verbal Comprehension Index (related to FRI)
+    "ScoreWgI_1",             # Working Memory Index (WMI)
+    "ScoreVsI_1"              # Processing Speed Index (PSI)
+  )
+
+#create a real table1
+library(tableone)
+
+# Specify the categorical and continuous variables
+categorical_vars <- c("Hydrea_at_scan_T0", "brick_genotype", "gender_BRICK")
+continuous_vars <- c("Age_at_scan_years")
+
+# Specify the categorical and continuous variables for CreateTableOne
+vars <- c(categorical_vars, continuous_vars)
+
+# Create the table using CreateTableOne
+table1_summary <- CreateTableOne(vars = vars, data = combined_data, factorVars = categorical_vars)
+
+# Print the table1 summary
+print(table1_summary)
+
+#I want the median and IQR for HbS
+# Manually calculate median and IQR for 'HPLC_HbS_T0' 
+hplc_summary <- combined_data %>%
+  summarise(
+    Median_HPLC_HbS_T0 = median(HPLC_HbS_T0, na.rm = TRUE),
+    IQR_HPLC_HbS_T0 = IQR(HPLC_HbS_T0, na.rm = TRUE)
+  )
+
+# Manually calculate mean and SD for 'HB0' 
+hb0_summary <- combined_data %>%
+  summarise(
+    Mean_HB0 = mean(HB0, na.rm = TRUE),
+    SD_HB0 = sd(HB0, na.rm = TRUE)
+  )
+
+# Print the custom summaries for HPLC_HbS_T0 and HB0
+print("HPLC_HbS_T0 - Median and IQR:")
+print(hplc_summary)
+
+print("HB0 - Mean and SD:")
+print(hb0_summary)
+
+
diff --git a/r_scripts/1172024_Table2_Growthcurves_script_attempt.R b/r_scripts/1172024_Table2_Growthcurves_script_attempt.R
new file mode 100644
index 0000000..da6096c
--- /dev/null
+++ b/r_scripts/1172024_Table2_Growthcurves_script_attempt.R
@@ -0,0 +1,78 @@
+library(dplyr)
+library(tableone)
+library(writexl)
+library(tibble)  # For rownames_to_column()
+
+# Select relevant columns from table2
+table2_filtered <- table2 %>%
+  select(Participant_Id, ERY0, HB0, MCV0, HT0, reticulocyte_count_percentage_1, LEU0, 
+         TROMBO0, FE0, FERT0, TRAF0, TSAT0, ALAT0, LDH0, TBIL0, DBIL0, KREA0, 
+         FOLZ0, UREU0, NA0, K00, VITD0, ASAT0, AFOS0, GGT0, CRP0, KREA_U0, 
+         TE_U0, TE_U_KR0, ALB_U0, ALB_U_KR0, B120)
+
+
+# Define continuous variables
+continuous_vars <- c("ERY0", "HB0", "MCV0", "HT0", "reticulocyte_count_percentage_1", 
+                     "LEU0", "TROMBO0", "FE0", "FERT0", "TRAF0", "TSAT0", "ALAT0", 
+                     "LDH0", "TBIL0", "DBIL0", "KREA0", "FOLZ0", "UREU0", "NA0", 
+                     "K00", "VITD0", "ASAT0", "AFOS0", "GGT0", "CRP0", "KREA_U0", 
+                     "TE_U0", "TE_U_KR0", "ALB_U0", "ALB_U_KR0", "B120")
+
+# Create a named vector of labels for the continuous variables, with units
+labels <- c(
+  "ERY0" = "Red blood cell (RBC) count (x10^12/L)",
+  "HB0" = "Hemoglobin (g/dL)",
+  "MCV0" = "Mean corpuscular volume (MCV) (fL)",
+  "HT0" = "Hematocrit (HCT) (%)",
+  "reticulocyte_count_percentage_1" = "Reticulocyte count (%)",
+  "LEU0" = "White blood cell (WBC) count (x10^9/L)",
+  "TROMBO0" = "Platelet count (x10^9/L)",
+  "FE0" = "Serum iron (µg/dL)",
+  "FERT0" = "Ferritin serum (ng/mL)",
+  "TRAF0" = "Transferrin (mg/dL)",
+  "TSAT0" = "Transferrin saturation (%)",
+  "ALAT0" = "Alanine transaminase (ALT) (U/L)",
+  "LDH0" = "Lactate Dehydrogenase (LDH) (U/L)",
+  "TBIL0" = "Total bilirubin (mg/dL)",
+  "DBIL0" = "Conjugated/direct bilirubin (mg/dL)",
+  "KREA0" = "Creatinine (mg/dL)",
+  "FOLZ0" = "Folate (ng/mL)",
+  "UREU0" = "Urea (mg/dL)",
+  "NA0" = "Sodium (mmol/L)",
+  "K00" = "Potassium (mmol/L)",
+  "VITD0" = "Vitamin D (ng/mL)",
+  "ASAT0" = "Aspartate transaminase (AST) (U/L)",
+  "AFOS0" = "Alkaline phosphatase (ALP) (U/L)",
+  "GGT0" = "Gamma-glutamyl transferase (GGT) (U/L)",
+  "CRP0" = "C-reactive protein (CRP) (mg/L)",
+  "KREA_U0" = "Creatinine (Urine) (mg/dL)",
+  "TE_U0" = "Iron (Urine) (µg/dL)",
+  "TE_U_KR0" = "Iron (Urine, Kr) (µg/dL)",
+  "ALB_U0" = "Albumin (Urine) (g/dL)",
+  "ALB_U_KR0" = "Albumin (Urine, Kr) (g/dL)",
+  "B120" = "Bilirubin 120 (mg/dL)"
+)
+
+# Create a summary table with continuous variables
+table2_summary <- CreateTableOne(
+  vars = continuous_vars,
+  data = table2_filtered,
+  factorVars = character(0)  # No factor variables
+)
+
+# Convert the table summary to a data frame
+table2_df <- as.data.frame(print(table2_summary, quote = FALSE, noSpaces = TRUE))
+
+# Add the labels as a new column by matching with continuous_vars
+table2_df <- table2_df %>%
+  rownames_to_column(var = "Variable") %>%
+  mutate(Variable = labels[continuous_vars])
+
+# Rename the 'Overall' column to "Mean (SD)" to indicate the values
+colnames(table2_df)[colnames(table2_df) == "Overall"] <- "Mean (SD)"
+
+
+# View the final table
+print(table2_df)
+#question: weird results, how do we impute missing values?
+
diff --git a/r_scripts/12112024_Identify_genotypes_bridge_sample.R b/r_scripts/12112024_Identify_genotypes_bridge_sample.R
new file mode 100644
index 0000000..9cfc342
--- /dev/null
+++ b/r_scripts/12112024_Identify_genotypes_bridge_sample.R
@@ -0,0 +1,117 @@
+#this script is to find the gentotypes that apply to the bridge subset of brick for the ash poster 2024
+library(dplyr)
+library(readr)
+library(tableone)
+
+#load dataset with genotypes
+genotype <-read.csv("Z:/Aida_experiment/combined_BRICK_marjolein_with_genotype.csv")
+
+# Create a new column with rounded "Age_at_scan_y_T0X" values in the genotype DataFrame. This is needed for an exact match
+genotype <- genotype %>%
+  mutate(Age_at_scan_y_T0X = round(Age_at_scan_y_T0X, 2))
+
+# Define ages for each gender
+ages_male <- c(9.94, 11.74, 12.73, 12.81, 13.94, 15.09, 15.22, 16.02, 16.65, 17.4, 17.48)
+ages_female <- c(8.05, 9.16, 10.32, 13.72, 13.77, 14.85, 16.25)
+
+# Filter genotype DataFrame based on matching ages for males and females separately
+matched_data_male <- genotype %>%
+  filter(Age_at_scan_y_T0X %in% ages_male & gender_BRICK == 1) %>%
+  select(Participant_Id, brick_genotype, Age_at_scan_y_T0X, gender_BRICK)
+
+matched_data_female <- genotype %>%
+  filter(Age_at_scan_y_T0X %in% ages_female & gender_BRICK == 2) %>%
+  select(Participant_Id, brick_genotype, Age_at_scan_y_T0X, gender_BRICK)
+
+# Assign to BRIDGE_male and BRIDGE_female DataFrames
+BRIDGE_male <- matched_data_male
+BRIDGE_female <- matched_data_female
+
+# Display the resulting DataFrames
+print("BRIDGE_male:")
+print(BRIDGE_male)
+
+print("BRIDGE_female:")
+print(BRIDGE_female)
+
+#not an exact match. let's just compare it manually
+
+genotype_male <- genotype %>% filter(gender_BRICK == 1)
+
+genotype_female <- genotype %>% filter(gender_BRICK == 2)
+
+#now some relevant extra rows are the hydrea use row, Hb and HbF that are interesting for table 1
+#load the bridge subset of participants, I manually added gender, brick number and genotype to the bridge subset. I've also ran th script of 1162024Tables_paper_1.R and 1172024_Table2_Growthcurves_script
+#beforehand
+male_brick <- read.csv("Z:/Aida_experiment/ASH_poster_2024/12112024_TCV_BRICK_BRIDGE_gen_boys.csv")
+female_brick <- read.csv("Z:/Aida_experiment/ASH_poster_2024/12112024_TCV_BRICK_BRIDGE_gen_girls.csv")
+
+#from df table1 I need "Hydrea_at_scan_T0" and from dataframe "table2_filtered" I need HB0. The NA values need to be kicked out. merge on Participant_Id into new df for males and females
+
+# Extract the relevant columns from table1, table2_filtered, and raw for males
+table1_filtered_male <- table1 %>%
+  select(Participant_Id, Hydrea_at_scan_T0) %>%
+  filter(!is.na(Hydrea_at_scan_T0))
+
+table2_filtered_male <- table2_filtered %>%
+  select(Participant_Id, HB0) %>%
+  filter(!is.na(HB0))
+
+raw_filtered_male <- raw %>%
+  select(Participant_Id, HPLC_HbS_T0) %>%
+  filter(!is.na(HPLC_HbS_T0))
+
+# Merge male data with additional columns
+male_data_merged <- male_brick %>%
+  inner_join(table1_filtered_male, by = "Participant_Id") %>%
+  inner_join(table2_filtered_male, by = "Participant_Id") %>%
+  inner_join(raw_filtered_male, by = "Participant_Id")
+
+# Repeat the process for females
+table1_filtered_female <- table1 %>%
+  select(Participant_Id, Hydrea_at_scan_T0) %>%
+  filter(!is.na(Hydrea_at_scan_T0))
+
+table2_filtered_female <- table2_filtered %>%
+  select(Participant_Id, HB0) %>%
+  filter(!is.na(HB0))
+
+raw_filtered_female <- raw %>%
+  select(Participant_Id, HPLC_HbS_T0) %>%
+  filter(!is.na(HPLC_HbS_T0))
+
+# Merge female data with additional columns
+female_data_merged <- female_brick %>%
+  inner_join(table1_filtered_female, by = "Participant_Id") %>%
+  inner_join(table2_filtered_female, by = "Participant_Id") %>%
+  inner_join(raw_filtered_female, by = "Participant_Id")
+
+# Display the resulting data frames
+print("Male Data Merged:")
+print(male_data_merged)
+
+print("Female Data Merged:")
+print(female_data_merged)
+
+
+#now make the dfs one and create a table 1
+# Merge male and female brick datasets into one, keeping the 'gender' column intact
+combined_data <- bind_rows(male_data_merged, female_data_merged)
+
+# Define categorical and continuous variables
+categorical_vars <- c("Hydrea_at_scan_T0", "genotype", "gender_brick")
+continuous_vars <- c("AgeChild", "HB0", "HPLC_HbS_T0")
+
+# Use the table1 package to create a summary table
+table1_summary <-  CreateTableOne(~ Hydrea_at_scan_T0 + genotype + gender_brick + 
+                           AgeChild + HB0 + HPLC_HbS_T0, 
+                         data = combined_data, 
+                         render.categorical = "Freq", 
+                         render.continuous = c("Median", "IQR"))
+
+# Print the table1 summary
+print(table1_summary)
+
+
+
+
diff --git a/r_scripts/12112024_script_table1_ash_2024_poster.R b/r_scripts/12112024_script_table1_ash_2024_poster.R
new file mode 100644
index 0000000..56d8b3d
--- /dev/null
+++ b/r_scripts/12112024_script_table1_ash_2024_poster.R
@@ -0,0 +1,101 @@
+# in this script, the definite table 1 was made for the ash 2024 poster. Hb has 2 missing values so this shoul dbe adressed separately.
+library(dplyr)
+library(tableone)
+
+# Read male and female brick data
+male_brick <- read.csv("Z:/Aida_experiment/ASH_poster_2024/12112024_TCV_BRICK_BRIDGE_gen_boys.csv")
+female_brick <- read.csv("Z:/Aida_experiment/ASH_poster_2024/12112024_TCV_BRICK_BRIDGE_gen_girls.csv")
+
+
+# Extract the relevant columns from table1, table2_filtered, and raw for males (with NA included)
+table1_filtered_male_withna <- table1 %>%
+  select(Participant_Id, Hydrea_at_scan_T0)
+
+table2_filtered_male_withna <- table2_filtered %>%
+  select(Participant_Id, HB0)
+
+raw_filtered_male_withna <- raw %>%
+  select(Participant_Id, HPLC_HbS_T0)
+
+# Merge male data with additional columns (keeping NAs)
+male_data_merged_withna <- male_brick %>%
+  inner_join(table1_filtered_male_withna, by = "Participant_Id") %>%
+  inner_join(table2_filtered_male_withna, by = "Participant_Id") %>%
+  inner_join(raw_filtered_male_withna, by = "Participant_Id")
+
+# Repeat the process for females (with NA included)
+table1_filtered_female_withna <- table1 %>%
+  select(Participant_Id, Hydrea_at_scan_T0)
+
+table2_filtered_female_withna <- table2_filtered %>%
+  select(Participant_Id, HB0)
+
+raw_filtered_female_withna <- raw %>%
+  select(Participant_Id, HPLC_HbS_T0)
+
+# Merge female data with additional columns (keeping NAs)
+female_data_merged_withna <- female_brick %>%
+  inner_join(table1_filtered_female_withna, by = "Participant_Id") %>%
+  inner_join(table2_filtered_female_withna, by = "Participant_Id") %>%
+  inner_join(raw_filtered_female_withna, by = "Participant_Id")
+
+# Combine male and female data into one dataset
+combined_data_withna <- bind_rows(male_data_merged_withna, female_data_merged_withna)
+
+# Check for missing values in each column and count the number of observations
+missing_values <- combined_data_withna %>%
+  summarise(across(everything(), 
+                   list(Missing = ~sum(is.na(.)), 
+                        Observed = ~sum(!is.na(.)))))
+
+# Print missing values and observed counts for each variable
+print("Missing and Observed Values for Each Variable:")
+print(missing_values)
+
+# Only in Hb0 2 values are missing! Keep the 18 for the rest of table1. We keep the table1 script from 
+
+# Define categorical and continuous variables (excluding HB0 from Table 1)
+categorical_vars_withna <- c("Hydrea_at_scan_T0", "genotype", "gender_brick")
+continuous_vars_withna <- c("AgeChild", "HPLC_HbS_T0")  # Exclude HB0 from here
+
+# Create Table 1 summary without HB0
+table1_summary_wna <- CreateTableOne(
+  vars = c("Hydrea_at_scan_T0", "genotype", "gender_brick", 
+           "AgeChild", "HPLC_HbS_T0"),  # Include all variables except HB0
+  data = combined_data_withna, 
+  factorVars = categorical_vars_withna  # Specify categorical variables
+)
+
+# Print the Table 1 summary
+print("Table 1 Summary (Excluding HB0):")
+print(table1_summary_wna)
+
+# Create a custom summary for the continuous variables (mean and SD for AgeChild, median and IQR for HPLC_HbS_T0)
+custom_summary_withna <- combined_data_withna %>%
+  summarise(
+    AgeChild_mean = mean(AgeChild, na.rm = TRUE),  # Mean of AgeChild
+    AgeChild_sd = sd(AgeChild, na.rm = TRUE),      # SD of AgeChild
+    HPLC_HbS_T0_median = median(HPLC_HbS_T0, na.rm = TRUE),  # Median of HPLC_HbS_T0
+    HPLC_HbS_T0_IQR = IQR(HPLC_HbS_T0, na.rm = TRUE)  # IQR of HPLC_HbS_T0
+  )
+
+# Print the custom summary
+print("Custom Summary (Mean, SD for AgeChild; Median, IQR for HPLC_HbS_T0):")
+print(custom_summary_withna)
+
+# Calculate the mean and SD of HB0, excluding NAs (missing values)
+hb0_summary <- combined_data_withna %>%
+  summarise(
+    HB0_mean = mean(HB0, na.rm = TRUE),  # Mean excluding NAs
+    HB0_sd = sd(HB0, na.rm = TRUE)  # SD excluding NAs
+  )
+
+# Print the summary for HB0
+print("Summary of HB0 (Excluding Missing Data):")
+print(hb0_summary)
+
+# Add a footnote about missing HB0 values
+footnote <- "Note: For the variable 'HB0', only data from 16/18 participants was included due to missing values."
+
+# Print the footnote
+print(footnote)
diff --git a/r_scripts/8112024_Script_missing_lab_values_T0.R b/r_scripts/8112024_Script_missing_lab_values_T0.R
new file mode 100644
index 0000000..9b3c1a9
--- /dev/null
+++ b/r_scripts/8112024_Script_missing_lab_values_T0.R
@@ -0,0 +1,18 @@
+### this script is meant for missing lab identification, so that we can check in castor,First run the script 1172024_Table2_Growthcurves_script_attempt.R
+
+library(dplyr)
+library(tidyr)
+library(writexl)
+
+# Identify missing values for each Participant_Id, excluding `reticulocyte_count_percentage_1`
+missing_values <- table2_filtered %>%
+  pivot_longer(cols = -Participant_Id, names_to = "Variable", values_to = "Value") %>% # Transform to long format
+  filter(is.na(Value) & Variable != "reticulocyte_count_percentage_1") %>%             # Filter for missing values, excluding reticulocyte_count_percentage_1
+  select(Participant_Id, Variable)                                                     # Select only relevant columns
+
+# Write the result to an Excel file
+write_xlsx(missing_values, path = "Z:/Aida_experiment/Growthcurves_paper/Uittezoeken/missing_values_per_participant.xlsx")
+
+
+# Display the result
+print(missing_values)
diff --git a/r_scripts/8112024_Table3_volbrain_stats.R b/r_scripts/8112024_Table3_volbrain_stats.R
new file mode 100644
index 0000000..c0739ac
--- /dev/null
+++ b/r_scripts/8112024_Table3_volbrain_stats.R
@@ -0,0 +1,93 @@
+library(dplyr)
+library(writexl)
+library(tidyr)
+
+# Read in the raw data file, ages, and volbrain data
+raw <- read.csv("Z:/Aida_experiment/6112024_brick_castor_data/BRICK_export_20241106.csv", sep= ";")
+ages <- read.csv("Z:/Aida_experiment/combined_BRICK_marjolein.csv", sep=",") #this file contains the ages and recon-all data
+volbrain <- read.csv("Z:/castor_proof_files/csv_castor/current/volbrains_castor.csv") #contains volbrain output
+
+# Rename participant id for later merge
+volbrain <- volbrain %>% rename(Participant_Id = Participant.Id)
+
+# Rename first columns in the raw dataset for merging
+raw <- raw %>% rename(Participant_Id = `ï..Participant.Id`)
+
+# Table 4: White matter hyperintensities
+table4 <- volbrain %>%
+  select(
+    Participant_Id, dl_Sex_T0, dl_Age_T0, dl_Quality_control_T1_T0, dl_Quality_control_FLAIR_T0,
+    dl_Total_lesion_count_T0, dl_Total_lesion_volume_.absolute._cm3_T0,
+    dl_Total_lesion_volume_.normalized._._T0, dl_Total_lesion_burden_T0,
+    dl_Periventricular_lesion_count_T0, dl_Periventricular_lesion_volume_.absolute._cm3_T0,
+    dl_Periventricular_lesion_volume_.normalized._._T0, dl_Periventricular_lesion_burden_T0,
+    dl_Deep_white_lesion_count_T0, dl_Deep_white_lesion_volume_.absolute._cm3_T0,
+    dl_Deep_white_lesion_volume_.normalized._._T0, dl_Deep_white_lesion_burden_T0,
+    dl_Juxtacortical_lesion_count_T0, dl_Juxtacortical_lesion_volume_.absolute._cm3_T0,
+    dl_Juxtacortical_lesion_volume_.normalized._._T0, dl_Juxtacortical_lesion_burden_T0,
+    dl_Infratentorial_lesion_count_T0, dl_Infratentorial_lesion_volume_.absolute._cm3_T0,
+    dl_Infratentorial_lesion_volume_.normalized._._T0, dl_Infratentorial_lesion_burden_T0
+  )
+
+# Merge Exclude_Score_QC_T1w_T0 from raw dataset into table4
+table4_merged <- table4 %>%
+  left_join(raw %>% select(Participant_Id, Exclude_Score_QC_T1w_T0), by = "Participant_Id")
+
+# Clean the quality control columns: Convert to character and trim whitespace
+table4_merged <- table4_merged %>%
+  mutate(
+    dl_Quality_control_T1_T0 = trimws(as.character(dl_Quality_control_T1_T0)),
+    dl_Quality_control_FLAIR_T0 = trimws(as.character(dl_Quality_control_FLAIR_T0))
+  )
+
+# Now exclude the low-quality scans based on the quality control columns ("C") or Exclude_Score_QC_T1w_T0 being 1
+table4_volbrain_df <- table4_merged %>%
+  filter(
+    !(grepl("C", dl_Quality_control_T1_T0) | grepl("C", dl_Quality_control_FLAIR_T0)) & 
+      Exclude_Score_QC_T1w_T0 != 1
+  )
+
+# View the filtered table
+print(table4_volbrain_df)
+
+#now create a table with volbrains descriptives
+# Define the continuous variables for which you want to calculate means and SDs
+continuous_vars <- c(
+  "dl_Age_T0", 
+  "dl_Total_lesion_count_T0", 
+  "dl_Total_lesion_volume_.absolute._cm3_T0",
+  "dl_Total_lesion_volume_.normalized._._T0", 
+  "dl_Total_lesion_burden_T0", 
+  "dl_Periventricular_lesion_count_T0", 
+  "dl_Periventricular_lesion_volume_.absolute._cm3_T0",
+  "dl_Periventricular_lesion_volume_.normalized._._T0", 
+  "dl_Periventricular_lesion_burden_T0", 
+  "dl_Deep_white_lesion_count_T0", 
+  "dl_Deep_white_lesion_volume_.absolute._cm3_T0", 
+  "dl_Deep_white_lesion_volume_.normalized._._T0", 
+  "dl_Deep_white_lesion_burden_T0", 
+  "dl_Juxtacortical_lesion_count_T0", 
+  "dl_Juxtacortical_lesion_volume_.absolute._cm3_T0", 
+  "dl_Juxtacortical_lesion_volume_.normalized._._T0", 
+  "dl_Juxtacortical_lesion_burden_T0", 
+  "dl_Infratentorial_lesion_count_T0", 
+  "dl_Infratentorial_lesion_volume_.absolute._cm3_T0", 
+  "dl_Infratentorial_lesion_volume_.normalized._._T0", 
+  "dl_Infratentorial_lesion_burden_T0"
+)
+
+# Define the categorical variables if needed
+categorical_vars <- c("dl_Sex_T0")  # If you want to include categorical variables like sex
+
+# Create the descriptive table and assign it to table4_volbrain
+table4_volbrain <- CreateTableOne(
+  vars = c(continuous_vars, categorical_vars),  # Include both continuous and categorical variables
+  data = table4_volbrain_df,                        # The dataset to summarize
+  factorVars = categorical_vars,                 # Specify categorical variables
+  includeNA = TRUE                               # Include NA values in the table if desired
+)
+
+# Print the table
+print(table4_volbrain)
+
+
diff --git a/r_scripts/8112024_create_table4_fs_recon-all.R b/r_scripts/8112024_create_table4_fs_recon-all.R
new file mode 100644
index 0000000..0d680f3
--- /dev/null
+++ b/r_scripts/8112024_create_table4_fs_recon-all.R
@@ -0,0 +1,83 @@
+library(dplyr)
+library(writexl)
+library(tidyr)
+
+# Read in the raw data file, ages, and volbrain data
+raw <- read.csv("Z:/Aida_experiment/6112024_brick_castor_data/BRICK_export_20241106.csv", sep= ";")
+ages <- read.csv("Z:/Aida_experiment/combined_BRICK_marjolein.csv", sep=",") #this file contains the ages and recon-all data
+volbrain <- read.csv("Z:/castor_proof_files/csv_castor/current/volbrains_castor.csv") #contains volbrain output
+
+# Rename first columns in the raw dataset for merging
+raw <- raw %>% rename(Participant_Id = `ï..Participant.Id`)
+
+# Rename participant id for later merge
+ages <- ages %>% rename(Participant_Id = Participant.Id)
+
+
+#Table 5: Recon-all volumetrics (white, grey and subcortical matter) with z-scores for 2 different kinds of growth charts.
+# Calculate the Total Cortical Volume and Total Cerebellar Volume
+table5 <- ages %>%
+  mutate(
+    Total_Cortical_Volume = CortexVol + Left.Cerebellum.Cortex + Right.Cerebellum.Cortex,
+    Total_Cerebellar_Volume = Left.Cerebellum.Cortex + Right.Cerebellum.Cortex
+  ) %>%
+  
+  # Select relevant columns for the table and rename for clarity
+  select(
+    `Participant_Id` = Participant_Id,
+    `Age (years)` = Age_at_scan_y_T0X,
+    `Gender` = gender_BRICK,
+    
+    # Total Volumes
+    `Total Cortical Volume` = Total_Cortical_Volume,
+    `Total White Matter Volume` = CerebralWhiteMatterVol,
+    `Total Gray Matter Volume` = TotalGrayVol,
+    `Total Cerebellar Volume` = Total_Cerebellar_Volume,
+    
+    # Subcortical Structures
+    `Thalamus Left` = Left.Thalamus.Proper,
+    `Thalamus Right` = Right.Thalamus.Proper,
+    `Caudate Left` = Left.Caudate,
+    `Caudate Right` = Right.Caudate,
+    `Putamen Left` = Left.Putamen,
+    `Putamen Right` = Right.Putamen,
+    `Pallidum Left` = Left.Pallidum,
+    `Pallidum Right` = Right.Pallidum,
+    `Hippocampus Left` = Left.Hippocampus,
+    `Hippocampus Right` = Right.Hippocampus
+  )
+
+
+#merge table5 with quality control column
+table5_merged <- table5 %>%
+  left_join(raw %>% select(Participant_Id, Exclude_Score_QC_T1w_T0), by = "Participant_Id")
+
+
+#throw out the low-quality scans
+table5_fs <- table5_merged %>%
+  filter(
+    Exclude_Score_QC_T1w_T0 != 1
+  )
+#now create the descriptive table
+
+# Exclude Age (years) and Participant_Id from the variables list
+vars <- c(
+  "Total Cortical Volume", 
+  "Total White Matter Volume", 
+  "Total Gray Matter Volume", 
+  "Total Cerebellar Volume",
+  "Thalamus Left", "Thalamus Right", 
+  "Caudate Left", "Caudate Right", 
+  "Putamen Left", "Putamen Right", 
+  "Pallidum Left", "Pallidum Right", 
+  "Hippocampus Left", "Hippocampus Right"
+)
+
+# Using the tableone package to create the summary table with means and standard deviations
+library(tableone)
+
+# Creating a summary table using CreateTableOne
+table_summary <- CreateTableOne(vars = vars, data = table5_fs, factorVars = c("Gender"))
+
+# Print the table
+print(table_summary)
diff --git a/r_scripts/fs_table_marjolein_abstract_28102024.R b/r_scripts/fs_table_marjolein_abstract_28102024.R
new file mode 100644
index 0000000..010f4af
--- /dev/null
+++ b/r_scripts/fs_table_marjolein_abstract_28102024.R
@@ -0,0 +1,81 @@
+library(dplyr)
+#this script is for preparing a table for Marjolein to plot in the references curves and pull out z-scores
+#load relevant datasets
+brick_fs <- read.csv("Z:/processed_data/freesurfer_stats/brain_volumes_from_freesurfer_no_qc.csv") #freesurfer output all brick subjects
+Koppelfile <-read.csv("Z:/Aida_experiment/Part_ID_PID.csv", sep= ";") #contain PID and BRICK Participant ID
+Marjolein_file <- read.csv("Z:/Aida_experiment/SCD_output_vol_table_BRIDGE_Marjolein.csv", sep= ",") #contains BRIDGE data and PID
+age_check <- read.csv("Z:/castor_proof_files/csv_castor/current/HbF_TCD_age_months_T020082024_ano_pres_castor.csv",sep=";")#contains ages of brick participants in months
+permissions <- read.csv("Z:/Aida_experiment/BRICK_export_permissions_20241029.csv", sep=";")
+
+# Perform a left join to keep all rows from Koppelfile, adding "SubjectID" from Marjolein_file where PID matches
+brickbridge_fs <- merge(Koppelfile, Marjolein_file[, c("PID", "SubjectID")], by = "PID", all.x = TRUE)
+
+# Remove the PID column
+brickbridge_fs <- brickbridge_fs[, !names(brickbridge_fs) %in% "PID"]
+
+# Move "SubjectID" to the first column
+brickbridge_fs <- brickbridge_fs[, c("SubjectID", setdiff(names(brickbridge_fs), "SubjectID"))]
+
+# Replace hyphens with underscores in Participant.ID of brick_fs
+brick_fs$Participant.ID <- gsub("-", "_", brick_fs$Participant.ID)
+
+# Perform a left join to combine brickbridge_fs with brick_fs based on matching Participant IDs
+combined_brick <- merge(brickbridge_fs, brick_fs, by.x = "Participant.Id", by.y = "Participant.ID", all = TRUE)
+
+#now add the age in months to this df
+
+# Rename the first column of age_check to Participant.Id
+colnames(age_check)[1] <- "Participant.Id"
+
+# Trim whitespace and standardize case in both data frames
+combined_brick$Participant.Id <- trimws(toupper(combined_brick$Participant.Id))
+age_check$Participant.Id <- trimws(toupper(age_check$Participant.Id))
+
+
+# Merge combined_brick with age_check based on Participant.Id
+# Merge combined_brick with age_check based on Participant.Id, including both age and gender columns
+combined_brick <- merge(combined_brick, age_check[, c("Participant.Id", "Age_at_scan_m_T0X", "gender_BRICK")], 
+                         by = "Participant.Id", all.x = TRUE)
+
+# Remove the column called "X" if it exists
+combined_brick <- combined_brick[, !names(combined_brick) %in% "X"]
+
+# Create the Age_at_scan_y_T0X column (age in years)
+combined_brick$Age_at_scan_y_T0X <- combined_brick$Age_at_scan_m_T0X / 12
+
+# Reorder the columns to place Age_at_scan_m_T0X as the third column, 
+# Age_at_scan_y_T0X as the fourth column, and gender_BRICK as the fifth column
+column_order <- c(names(combined_brick)[1:2], "Age_at_scan_m_T0X", "Age_at_scan_y_T0X", "gender_BRICK", 
+                  setdiff(names(combined_brick), c("Participant.Id", "Age_at_scan_m_T0X", "Age_at_scan_y_T0X", "gender_BRICK")))
+combined_brick <- combined_brick[, column_order]
+
+# Remove duplicates based on Participant.Id while keeping the first occurrence .That would be brick 3 and 9
+combined_brick <- combined_brick %>%
+  distinct(Participant.Id, .keep_all = TRUE)
+
+#add the permissions from castor export
+# Rename the column in permissions
+colnames(permissions)[colnames(permissions) == "ï..Participant.Id"] <- "Participant Id"
+
+# Merge the two data frames, keeping only the necessary column from permissions
+combined_brick <- merge(
+  combined_brick, 
+  permissions[, c("Participant Id", "permission_data_sharing_for_other_studies")], 
+  by.x = "Participant.Id", 
+  by.y = "Participant Id", 
+  all.x = TRUE
+)
+
+# Reorder columns to place "permission_data_sharing_for_other_studies" as the sixth column
+combined_brick <- combined_brick[, c(1:5, ncol(combined_brick), 6:(ncol(combined_brick) - 1))]
+
+#it seems we have a duplicae SubjectID column, remove this
+combined_brick$SubjectID.1 <- NULL
+
+
+# Write the updated combined data to a CSV file
+write.csv(combined_brick, "Z:/Aida_experiment/combined_BRICK_marjolein.csv", row.names = FALSE)
+
+
+
+
diff --git a/r_scripts/fs_table_marjolein_abstract_with_genotype_8112024.R b/r_scripts/fs_table_marjolein_abstract_with_genotype_8112024.R
new file mode 100644
index 0000000..07e3287
--- /dev/null
+++ b/r_scripts/fs_table_marjolein_abstract_with_genotype_8112024.R
@@ -0,0 +1,43 @@
+## this script is for adding the genotype column to the original file I sent to Marjolein for the growth charts paper
+
+library(dplyr)
+
+ages <-read.csv("Z:/Aida_experiment/combined_BRICK_marjolein.csv", sep=",") #this file contains the ages and recon-all data, as I apparently cannot export calculated fields from castor
+raw <- read.csv("Z:/Aida_experiment/6112024_brick_castor_data/BRICK_export_20241106.csv", sep= ";") #contains a castor-export
+
+#select the genotype colum from the raw df
+genotype <- raw %>% select(Participant_Id, brick_genotype)
+
+#rename first columns in order to merge later.
+raw <- raw %>%
+  rename(Participant_Id = `ï..Participant.Id`)
+
+ages <- ages %>% 
+  rename(Participant_Id = Participant.Id)
+
+#now link the brick_genotype to the data in the ages df and put it as a sixth column
+# Merge the genotype with the ages dataset by Participant_Id
+ages_with_genotype <- ages %>%
+  left_join(genotype, by = "Participant_Id")
+
+# Reorder columns to place 'brick_genotype' as the sixth column
+ages_with_genotype <- ages_with_genotype %>%
+  select(1:5, brick_genotype, everything())
+
+# Add the 'severe_genotype' column: 1 for HbSS and HbSb0, 0 for the others
+ages_with_genotype <- ages_with_genotype %>%
+  mutate(severe_genotype = case_when(
+    brick_genotype == "HbSS" ~ 1,
+    brick_genotype == "HbSb0" ~ 1,
+    TRUE ~ 0
+  ))
+
+# Reorder columns to place 'severe_genotype' after 'brick_genotype'
+ages_with_genotype <- ages_with_genotype %>%
+  select(1:6, severe_genotype, everything())
+
+# Check the first few rows to ensure it looks correct
+head(ages_with_genotype)
+
+# Optionally, save the new dataset with the added genotype column
+write.csv(ages_with_genotype, "Z:/Aida_experiment/combined_BRICK_marjolein_with_genotype.csv", row.names = FALSE)
\ No newline at end of file
diff --git a/r_scripts/organize_BRIDGE_data.R b/r_scripts/organize_BRIDGE_data.R
new file mode 100644
index 0000000..060c4ae
--- /dev/null
+++ b/r_scripts/organize_BRIDGE_data.R
@@ -0,0 +1,84 @@
+library(dplyr)
+Koppelfile <-read.csv("Z:/Aida_experiment/Part_ID_PID.csv", sep= ";")
+Marjolein_file <- read.csv("Z:/Aida_experiment/SCD_output_vol_table_BRIDGE_Marjolein.csv", sep= ",")
+
+# Join the datasets based on "PID" and bring in "Participant Id" from Koppelfile
+combined_data <- merge(Marjolein_file, Koppelfile[, c("PID", "Participant.Id")], by = "PID", all.x = TRUE)
+
+
+#remove PID from the resulting file
+combined_data <- combined_data[, !names(combined_data) %in% "PID"]
+
+# Move "Participant Id" to the first column
+combined_data <- combined_data[, c("Participant.Id", setdiff(names(combined_data), "Participant.Id"))]
+
+# Write combined_data to a CSV file in the specified directory
+write.csv(combined_data, "Z:/Aida_experiment/BRIDGE_T0.csv", row.names = FALSE)
+
+##
+
+#vergelijken freesurfer met marjolein data
+brick_freesurfer <- read.csv("Z:/processed_data/freesurfer_stats/brain_volumes_from_freesurfer_no_qc.csv")
+
+# Replace "-" with "_" in the "Participant Id" column of brick_freesurfer
+brick_freesurfer$`Participant Id` <- gsub("-", "_", brick_freesurfer$`Participant Id`)
+
+# Rename the 'Participant.Id' column to 'Participant Id' in combined_data
+colnames(combined_data)[colnames(combined_data) == "Participant.Id"] <- "Participant Id"
+colnames(brick_freesurfer)[colnames(brick_freesurfer) == "Participant.Id"] <- "Participant Id"
+
+
+# First, rename the "Participant Id" columns in both dataframes
+colnames(brick_freesurfer)[colnames(brick_freesurfer) == "Participant Id"] <- "Participant Id_brick"
+colnames(combined_data)[colnames(combined_data) == "Participant Id"] <- "Participant Id_bridge"
+
+# Replace "-" with "_" in the "Participant Id" column of brick_freesurfer
+brick_freesurfer$`Participant Id_brick` <- gsub("-", "_", brick_freesurfer$`Participant Id_brick`)
+
+# Now merge the two dataframes on the renamed Participant Id columns
+merged_data <- merge(
+  brick_freesurfer[, c("Participant Id_brick", "Right.Amygdala")],
+  combined_data[, c("Participant Id_bridge", "Right.Amygdala")],
+  by.x = "Participant Id_brick",
+  by.y = "Participant Id_bridge",
+  suffixes = c("_brick", "_bridge")
+)
+
+# View the resulting merged dataframe
+head(merged_data)
+
+###twice BRICK003 appears here. check for duplicates in Marjolein_file. Dit zijn 5873683 1192976 BRICK_009 en BRICK_003
+# Extract the duplicate PID values
+duplicate_values <- Marjolein_file$PID[duplicated(Marjolein_file$PID) | duplicated(Marjolein_file$PID, fromLast = TRUE)]
+
+# Remove duplicates from the result to get unique duplicate values
+unique_duplicates <- unique(duplicate_values)
+
+# Display the unique duplicate values
+print(unique_duplicates)
+
+# now check ages. everything else looks fin
+age_check <- read.csv("Z:/castor_proof_files/csv_castor/current/HbF_TCD_age_months_T020082024_ano_pres_castor.csv",sep=";")
+
+# Step 1: Rename the 'ï..BRICK' column for clarity (optional)
+colnames(age_check)[colnames(age_check) == "ï..BRICK"] <- "Participant_Id"
+
+# Step 2: Merge the two dataframes on the relevant columns
+merged_data <- merge(
+  age_check[, c("Participant_Id", "Age_at_scan_y_T0X")],
+  combined_data[, c("Participant Id_bridge", "Age.at.time.of.Scan")],
+  by.x = "Participant_Id",
+  by.y = "Participant Id_bridge",
+  suffixes = c("_age_check", "_combined")
+)
+
+# Step 3: Compare the age columns
+comparison_result <- merged_data[, c("Participant_Id", "Age_at_scan_y_T0X", "Age.at.time.of.Scan")]
+comparison_result$Age_Match <- comparison_result$Age_at_scan_y_T0X == comparison_result$Age.at.time.of.Scan
+
+# Display the comparison results
+print(comparison_result)
+
+#marjoleins data is niet in maanden uitgedrukt maar in . nogwat. ziet er goed genoeg uit wmb
+
+ 
diff --git a/r_scripts/subset_key_table_castor.R b/r_scripts/subset_key_table_castor.R
new file mode 100644
index 0000000..5ef0e52
--- /dev/null
+++ b/r_scripts/subset_key_table_castor.R
@@ -0,0 +1,9 @@
+#here, we prepare a key table for upload in castor, participation in other studies
+library(dplyr)
+short_key_table <- read.csv("Z:/castor_proof_files/brick_score_key_26_4_2024.csv")
+subset_key_table <- short_key_table[,1:10]
+print(subset_key_table)
+
+write.csv(subset_key_table, "Z:/castor_proof_files/brick_subset_key_table102024.csv", row.names = FALSE)
+
+