brickstudymasons · brickstudymasons · Nov 14, 2024 · Nov 23, 2024
diff --git a/dwi_2_environment.yaml b/dwi_2_environment.yaml
@@ -0,0 +1,21 @@
+# dwi environment
+
+
+name: witty
+channels:
+  - conda-forge
+
+dependencies:
+  - dipy=1.7.0
+  - fury=0.7.1
+  - jupyter
+  - jupyterlab
+  - matplotlib=3.5.3
+  - nilearn=0.7.0
+  - osfclient=0.0.5
+  - python>=3.10
+  - pybids
+
+# if environment does not resolve try installing everything but pybids, then `conda install conda-forge::pybids`
+
+
diff --git a/dwi_3_environment.yaml b/dwi_3_environment.yaml
@@ -0,0 +1,22 @@
+# dwi environment
+
+
+name: wittier
+channels:
+  - conda-forge
+
+dependencies:
+  - dipy=1.7.0
+  - fury=0.7.1
+  - jupyter
+  - jupyterlab
+  - matplotlib=3.5.3
+  - nilearn=0.7.0
+  - numexpr=2.8.4 
+  - osfclient=0.0.5
+  - python>=3.10
+  - pybids
+
+# if environment does not resolve try installing everything but pybids, then `conda install conda-forge::pybids`
+
+
diff --git a/r_scripts/1112024_try_replace_colnames_scorecsvs.R b/r_scripts/1112024_try_replace_colnames_scorecsvs.R
@@ -0,0 +1,64 @@
+#Here, we would like to change the Participant Id numbers in the SCORE export repeating data files to BRICK participant Ids. This way, we can upload it to the BRICK castor
+#here we read in the score repeating data files
+library(dplyr)
+
+# Read the subset key table and rename the Participant.Id to BRICK_Id
+subset_key_table <- read.csv("Z:/castor_proof_files/csv_castor/current/brick_subset_key_table102024.csv") %>%
+  rename(BRICK_Id = Participant.Id)
+
+# Read in all of the repeating data files from SCORE #path is for windows machine /mnt/data on linux!.
+# Read in all of the repeating data files from SCORE
+file_paths <- list(
+  visual_hearing = "Z:/score_clinical_manifestationTOBEREVIEWED/SCORE_RADeep-registry__Visual_and_hearing_disease_Medical_History_Clinical_manifestati_export_20240717.csv",
+  acute_complications = "Z:/score_clinical_manifestationTOBEREVIEWED/SCORE_RADeep-registry__Acute_complications_export_20240717.csv",
+  bone_extremities = "Z:/score_clinical_manifestationTOBEREVIEWED/SCORE_RADeep-registry__Bone_and_extremities_Medical_History_Clinical_manifestations_export_20240717.csv",
+  cardiac_pulmonary = "Z:/score_clinical_manifestationTOBEREVIEWED/SCORE_RADeep-registry__Cardiac_and_pulmonary_disease_Medical_History_Clinical_manifest_export_20240717.csv",
+  comorbidities = "Z:/score_clinical_manifestationTOBEREVIEWED/SCORE_RADeep-registry__Comorbidities_export_20240717.csv",
+  endocrinological = "Z:/score_clinical_manifestationTOBEREVIEWED/SCORE_RADeep-registry__Endocrinological_disease_Medical_History_Clinical_manifestation_export_20240717.csv",
+  registry = "Z:/score_clinical_manifestationTOBEREVIEWED/SCORE_RADeep-registry__export_20240717.csv",
+  liver_kidney = "Z:/score_clinical_manifestationTOBEREVIEWED/SCORE_RADeep-registry__Liver_and_kidney_disease_Medical_History_Clinical_manifestation_export_20240717.csv",
+  neurological = "Z:/score_clinical_manifestationTOBEREVIEWED/SCORE_RADeep-registry__Neurological_disease_Medical_History_Clinical_manifestations_export_20240717.csv",
+  specific_treatment = "Z:/score_clinical_manifestationTOBEREVIEWED/SCORE_RADeep-registry__Treatment_Use_of_specific_treatment_or_inclusion_in_Clinical_Tr_export_20240717.csv",
+  chelation_treatment = "Z:/score_clinical_manifestationTOBEREVIEWED/SCORE_RADeep-registry__Treatments_chelation_export_20240717.csv",
+  hydroxyurea_treatment = "Z:/score_clinical_manifestationTOBEREVIEWED/SCORE_RADeep-registry__Treatments_hydroxyurea_export_20240717.csv",
+  visit = "Z:/score_clinical_manifestationTOBEREVIEWED/SCORE_RADeep-registry__Visit_export_20240717.csv"
+)
+
+# Read each file into a data frame and store in df_list
+df_list <- lapply(file_paths, function(path) {
+  read.csv(path, sep = ";", stringsAsFactors = FALSE)
+})
+
+# Rename the first column in each data frame to RADeep_id
+df_list <- lapply(df_list, function(df) {
+  colnames(df)[1] <- "RADeep_id"
+  df
+})
+
+# Merge each data frame with subset_key_table, bringing in BRICK_Id and renaming it to Participant Id
+df_list <- lapply(df_list, function(df) {
+  df %>%
+    left_join(subset_key_table, by = "RADeep_id") %>%  # Join on RADeep_id
+    mutate(`Participant Id` = BRICK_Id) %>%  # Create Participant Id from BRICK_Id
+    select(`Participant Id`, everything(), -BRICK_Id)  # Arrange columns accordingly
+})
+
+# Optionally assign back to original data frame names
+names(df_list) <- names(file_paths)
+
+# Assign back to individual data frames if needed
+list2env(df_list, envir = .GlobalEnv)
+
+# Define the output directory for the updated CSV files
+output_dir <- "Z:/castor_proof_files/csv_castor/current/"
+
+# Ensure the output directory exists
+dir.create(output_dir, showWarnings = FALSE)
+
+# Write each data frame to a CSV file in the output directory
+for (name in names(df_list)) {
+  write.csv(df_list[[name]], file = paste0(output_dir, name, "_updated.csv"), row.names = FALSE)
+}
+
+# Confirm file creation
+cat("CSV files have been created in:", output_dir, "\n")
diff --git a/r_scripts/1162024Tables_paper_1.R b/r_scripts/1162024Tables_paper_1.R
@@ -0,0 +1,154 @@
+## in this script, I will try to clean up the dataexport of castor on november 11th. At this thime, score repeated data, volbrain data and freesurfer data have not been uploaded yet
+#weirdly enough, the age_T0_MRI_months is exported incorrectly by castor. I decided to limit my next export to just the baseline_MRI form at T0 and to merge it later
+library(dplyr)
+library(writexl) #to write the tables to excel files
+#read in the raw data file. For now in my personal folder, will get moved around when we arrive at the final datastructure
+raw <- read.csv("Z:/Aida_experiment/6112024_brick_castor_data/BRICK_export_20241106.csv", sep= ";")
+ages <-read.csv("Z:/Aida_experiment/combined_BRICK_marjolein.csv", sep=",") #this file contains the ages and recon-all data, as I apparently cannot export calculated fields from castor
+volbrain <- read.csv("Z:/castor_proof_files/csv_castor/current/volbrains_castor.csv") #contains volbrain output
+
+#rename first columns in order to merge later.
+raw <- raw %>%
+  rename(Participant_Id = `ï..Participant.Id`)
+
+ages <- ages %>% 
+  rename(Participant_Id = Participant.Id)
+
+volbrain <- volbrain %>% 
+  rename(Participant_Id = Participant.Id)
+
+#I decided not to clean it up. Instead. Just start with creating the tables.
+
+table1 <- raw %>% select(Participant_Id, Hydrea_at_scan_T0, brick_genotype, mothersbirth, fathersbirth)
+
+#import age in months from the ages df
+table1 <- table1 %>%
+  left_join(select(ages, Participant_Id, Age_at_scan_m_T0X, gender_BRICK), by = "Participant_Id")
+
+#add a column in years. now we have the ingredients for table1
+table1 <- table1 %>%
+  mutate(Age_at_scan_years = round(Age_at_scan_m_T0X / 12, 1))
+
+#Table 2: Lab values including HbF
+
+table2 <- raw %>% select(Participant_Id, ERY0, HB0, MCV0, HT0, reticulocyte_count_percentage_1, LEU0, TROMBO0, FE0, FERT0, TRAF0, TSAT0, ALAT0, LDH0, TBIL0, DBIL0, KREA0, SCHWARTZ_Bedsite0, FOLZ0, UREU0, NA0, K00, VITD0, ASAT0, AFOS0, GGT0, CRP0, KREA_U0, TE_U0, TE_U_KR0, ALB_U0, ALB_U_KR0, B120, HPLC_HBF_T0, HPLC_HBF_date_T0, HPLC_HbS_T0, HPLC_HbS_date_T0)
+
+#Table 3: Descriptives on radiology reports
+table3 <- raw %>% select(Participant_Id,Measurement_moment_QC_T1w_T0, Score_QC_T1w_T0, Exclude_Score_QC_T1w_T0, Remarks_Score_QC_T1w_T0, Screened_for_WMH_T0, WMH_observed_T0, Vasculature_examined_T0, Vascular_malformations_T0, Microbleeds_screened_T0, Microbleeds_present_T0, Incidental_finding_T0, Marjolein_T0, Remarks_MRI_T0)
+
+#Table 4: White matter hyperintensities
+table4 <- volbrain %>%
+  select(
+    Participant_Id, dl_Sex_T0, dl_Age_T0,
+    dl_Total_lesion_count_T0, dl_Total_lesion_volume_.absolute._cm3_T0,
+    dl_Total_lesion_volume_.normalized._._T0, dl_Total_lesion_burden_T0,
+    dl_Periventricular_lesion_count_T0, dl_Periventricular_lesion_volume_.absolute._cm3_T0,
+    dl_Periventricular_lesion_volume_.normalized._._T0, dl_Periventricular_lesion_burden_T0,
+    dl_Deep_white_lesion_count_T0, dl_Deep_white_lesion_volume_.absolute._cm3_T0,
+    dl_Deep_white_lesion_volume_.normalized._._T0, dl_Deep_white_lesion_burden_T0,
+    dl_Juxtacortical_lesion_count_T0, dl_Juxtacortical_lesion_volume_.absolute._cm3_T0,
+    dl_Juxtacortical_lesion_volume_.normalized._._T0, dl_Juxtacortical_lesion_burden_T0,
+    dl_Infratentorial_lesion_count_T0, dl_Infratentorial_lesion_volume_.absolute._cm3_T0,
+    dl_Infratentorial_lesion_volume_.normalized._._T0, dl_Infratentorial_lesion_burden_T0
+  )
+#Table 5: Recon-all volumetrics (white, grey and subcortical matter) with z-scores for 2 different kinds of growth charts.
+# Calculate the Total Cortical Volume and Total Cerebellar Volume
+table5 <- ages %>%
+  mutate(
+    Total_Cortical_Volume = CortexVol + Left.Cerebellum.Cortex + Right.Cerebellum.Cortex,
+    Total_Cerebellar_Volume = Left.Cerebellum.Cortex + Right.Cerebellum.Cortex
+  ) %>%
+
+  # Select relevant columns for the table and rename for clarity
+  select(
+    `Participant ID` = Participant_Id,
+    `Age (years)` = Age_at_scan_y_T0X,
+    `Gender` = gender_BRICK,
+
+    # Total Volumes
+    `Total Cortical Volume` = Total_Cortical_Volume,
+    `Total White Matter Volume` = CerebralWhiteMatterVol,
+    `Total Gray Matter Volume` = TotalGrayVol,
+    `Total Cerebellar Volume` = Total_Cerebellar_Volume,
+
+    # Subcortical Structures
+    `Thalamus Left` = Left.Thalamus.Proper,
+    `Thalamus Right` = Right.Thalamus.Proper,
+    `Caudate Left` = Left.Caudate,
+    `Caudate Right` = Right.Caudate,
+    `Putamen Left` = Left.Putamen,
+    `Putamen Right` = Right.Putamen,
+    `Pallidum Left` = Left.Pallidum,
+    `Pallidum Right` = Right.Pallidum,
+    `Hippocampus Left` = Left.Hippocampus,
+    `Hippocampus Right` = Right.Hippocampus
+  )
+#Table 5_1: Neuropsychologic outcomes(WISCV&WAISIV) + Education level parents
+#WiscV
+table6_wisc <- raw %>%
+  select(
+    Participant_Id,
+    scorePrimIndex_TIQ_S_1,    # Full Scale IQ
+    scorePrimIndex_VBI_IQ_1, # Verbal Comprehension Index
+    scorePrimIndex_VRI_IQ_1, # Visual Spatial Index
+    scorePrimIndex_FRI_IQ_1, # Fluid Reasoning Index
+    scorePrimIndex_WgI_IQ_1, # Working Memory Index
+    scorePrimIndex_VsI_IQ_1, # Processing Speed Index
+    scoreSecIndex_KRI_IQ_1,      # Quantitative Reasoning Index
+    scoreSecIndex_AWI_IQ_1,     # Auditory Working Memory Index
+    scoreSecIndex_NVI_IQ_1,      # Nonverbal Index
+    scoreSecIndex_AVI_IQ_1,      # General Ability Index
+   )
+
+#waisIV
+
+table6_wais <- raw %>%
+  select(
+    "Participant_Id",
+    "ScoreTIQ_1",             # Full Scale IQ (FSIQ)
+    "ScoreSomVBI_1",          # Verbal Comprehension Index (VCI)
+    "ScorePerVBI_1",          # Perceptual Verbal Comprehension Index (related to FRI)
+    "ScoreBIVBI_1",           # Blocked Verbal Comprehension Index (related to FRI)
+    "ScoreWgI_1",             # Working Memory Index (WMI)
+    "ScoreVsI_1"              # Processing Speed Index (PSI)
+  )
+
+#create a real table1
+library(tableone)
+
+# Specify the categorical and continuous variables
+categorical_vars <- c("Hydrea_at_scan_T0", "brick_genotype", "gender_BRICK")
+continuous_vars <- c("Age_at_scan_years")
+
+# Specify the categorical and continuous variables for CreateTableOne
+vars <- c(categorical_vars, continuous_vars)
+
+# Create the table using CreateTableOne
+table1_summary <- CreateTableOne(vars = vars, data = combined_data, factorVars = categorical_vars)
+
+# Print the table1 summary
+print(table1_summary)
+
+#I want the median and IQR for HbS
+# Manually calculate median and IQR for 'HPLC_HbS_T0' 
+hplc_summary <- combined_data %>%
+  summarise(
+    Median_HPLC_HbS_T0 = median(HPLC_HbS_T0, na.rm = TRUE),
+    IQR_HPLC_HbS_T0 = IQR(HPLC_HbS_T0, na.rm = TRUE)
+  )
+
+# Manually calculate mean and SD for 'HB0' 
+hb0_summary <- combined_data %>%
+  summarise(
+    Mean_HB0 = mean(HB0, na.rm = TRUE),
+    SD_HB0 = sd(HB0, na.rm = TRUE)
+  )
+
+# Print the custom summaries for HPLC_HbS_T0 and HB0
+print("HPLC_HbS_T0 - Median and IQR:")
+print(hplc_summary)
+
+print("HB0 - Mean and SD:")
+print(hb0_summary)
+
+
diff --git a/r_scripts/1172024_Table2_Growthcurves_script_attempt.R b/r_scripts/1172024_Table2_Growthcurves_script_attempt.R
@@ -0,0 +1,78 @@
+library(dplyr)
+library(tableone)
+library(writexl)
+library(tibble)  # For rownames_to_column()
+
+# Select relevant columns from table2
+table2_filtered <- table2 %>%
+  select(Participant_Id, ERY0, HB0, MCV0, HT0, reticulocyte_count_percentage_1, LEU0, 
+         TROMBO0, FE0, FERT0, TRAF0, TSAT0, ALAT0, LDH0, TBIL0, DBIL0, KREA0, 
+         FOLZ0, UREU0, NA0, K00, VITD0, ASAT0, AFOS0, GGT0, CRP0, KREA_U0, 
+         TE_U0, TE_U_KR0, ALB_U0, ALB_U_KR0, B120)
+
+
+# Define continuous variables
+continuous_vars <- c("ERY0", "HB0", "MCV0", "HT0", "reticulocyte_count_percentage_1", 
+                     "LEU0", "TROMBO0", "FE0", "FERT0", "TRAF0", "TSAT0", "ALAT0", 
+                     "LDH0", "TBIL0", "DBIL0", "KREA0", "FOLZ0", "UREU0", "NA0", 
+                     "K00", "VITD0", "ASAT0", "AFOS0", "GGT0", "CRP0", "KREA_U0", 
+                     "TE_U0", "TE_U_KR0", "ALB_U0", "ALB_U_KR0", "B120")
+
+# Create a named vector of labels for the continuous variables, with units
+labels <- c(
+  "ERY0" = "Red blood cell (RBC) count (x10^12/L)",
+  "HB0" = "Hemoglobin (g/dL)",
+  "MCV0" = "Mean corpuscular volume (MCV) (fL)",
+  "HT0" = "Hematocrit (HCT) (%)",
+  "reticulocyte_count_percentage_1" = "Reticulocyte count (%)",
+  "LEU0" = "White blood cell (WBC) count (x10^9/L)",
+  "TROMBO0" = "Platelet count (x10^9/L)",
+  "FE0" = "Serum iron (µg/dL)",
+  "FERT0" = "Ferritin serum (ng/mL)",
+  "TRAF0" = "Transferrin (mg/dL)",
+  "TSAT0" = "Transferrin saturation (%)",
+  "ALAT0" = "Alanine transaminase (ALT) (U/L)",
+  "LDH0" = "Lactate Dehydrogenase (LDH) (U/L)",
+  "TBIL0" = "Total bilirubin (mg/dL)",
+  "DBIL0" = "Conjugated/direct bilirubin (mg/dL)",
+  "KREA0" = "Creatinine (mg/dL)",
+  "FOLZ0" = "Folate (ng/mL)",
+  "UREU0" = "Urea (mg/dL)",
+  "NA0" = "Sodium (mmol/L)",
+  "K00" = "Potassium (mmol/L)",
+  "VITD0" = "Vitamin D (ng/mL)",
+  "ASAT0" = "Aspartate transaminase (AST) (U/L)",
+  "AFOS0" = "Alkaline phosphatase (ALP) (U/L)",
+  "GGT0" = "Gamma-glutamyl transferase (GGT) (U/L)",
+  "CRP0" = "C-reactive protein (CRP) (mg/L)",
+  "KREA_U0" = "Creatinine (Urine) (mg/dL)",
+  "TE_U0" = "Iron (Urine) (µg/dL)",
+  "TE_U_KR0" = "Iron (Urine, Kr) (µg/dL)",
+  "ALB_U0" = "Albumin (Urine) (g/dL)",
+  "ALB_U_KR0" = "Albumin (Urine, Kr) (g/dL)",
+  "B120" = "Bilirubin 120 (mg/dL)"
+)
+
+# Create a summary table with continuous variables
+table2_summary <- CreateTableOne(
+  vars = continuous_vars,
+  data = table2_filtered,
+  factorVars = character(0)  # No factor variables
+)
+
+# Convert the table summary to a data frame
+table2_df <- as.data.frame(print(table2_summary, quote = FALSE, noSpaces = TRUE))
+
+# Add the labels as a new column by matching with continuous_vars
+table2_df <- table2_df %>%
+  rownames_to_column(var = "Variable") %>%
+  mutate(Variable = labels[continuous_vars])
+
+# Rename the 'Overall' column to "Mean (SD)" to indicate the values
+colnames(table2_df)[colnames(table2_df) == "Overall"] <- "Mean (SD)"
+
+
+# View the final table
+print(table2_df)
+#question: weird results, how do we impute missing values?
+