fix bug, rerun

rokitalab · Nov 12, 2024 · ea5ffeb · ea5ffeb
1 parent 7e1521d
commit ea5ffeb
Show file tree

Hide file tree

Showing 4 changed files with 570 additions and 13 deletions.
diff --git a/analyses/molecular-subtyping-PB/01-molecular-subtype-pineoblastoma.html b/analyses/molecular-subtyping-PB/01-molecular-subtype-pineoblastoma.html
@@ -360,13 +360,13 @@ <h4 class="date">2023-12-06</h4>
 <div id="load-library" class="section level2">
 <h2>load library</h2>
 <pre class="r"><code>library(tidyverse)</code></pre>
-<pre><code>## ── Attaching core tidyverse packages ─────────────────────────────────────────────── tidyverse 2.0.0 ──
+<pre><code>## ── Attaching core tidyverse packages ───────────────────────────────────────── tidyverse 2.0.0 ──
 ## ✔ dplyr     1.1.4     ✔ readr     2.1.5
 ## ✔ forcats   1.0.0     ✔ stringr   1.5.1
 ## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
 ## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
 ## ✔ purrr     1.0.2     
-## ── Conflicts ───────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
+## ── Conflicts ─────────────────────────────────────────────────────────── tidyverse_conflicts() ──
 ## ✖ dplyr::filter() masks stats::filter()
 ## ✖ dplyr::lag()    masks stats::lag()
 ## ℹ Use the conflicted package (&lt;http://conflicted.r-lib.org/&gt;) to force all conflicts to become errors</code></pre>
@@ -385,7 +385,7 @@ <h2>set directories</h2>
 <h2>read files</h2>
 <pre class="r"><code>histo &lt;- readr::read_tsv(file.path(data_dir, &quot;histologies-base.tsv&quot;))</code></pre>
 <pre><code>## Rows: 47895 Columns: 64
-## ── Column specification ───────────────────────────────────────────────────────────────────────────────
+## ── Column specification ─────────────────────────────────────────────────────────────────────────
 ## Delimiter: &quot;\t&quot;
 ## chr (41): Kids_First_Participant_ID, Kids_First_Biospecimen_ID, sample_id, a...
 ## dbl (21): cell_line_passage, OS_days, EFS_days, age_at_diagnosis_days, age_a...

diff --git a/analyses/molecular-subtyping-PB/02-pineoblastoma-umap.Rmd b/analyses/molecular-subtyping-PB/02-pineoblastoma-umap.Rmd
@@ -48,33 +48,32 @@ Filter hist for methyl samples, and append to subtype df
 ```{r}
 hist_methyl <- hist %>%
   dplyr::filter(pathology_diagnosis == "Pineoblastoma",
-                experimental_strategy == "Methylation") 
+                experimental_strategy == "Methylation",
+                grepl("PB_", dkfz_v12_methylation_subclass)) 
 
 subtypes <- read_tsv(subtype_file) %>%
-  right_join(hist_methyl) %>%
-  filter(!is.na(molecular_subtype_methyl)) %>%
-  dplyr::rename(Kids_First_Biospecimen_ID_methyl = Kids_First_Biospecimen_ID) %>%
+  filter(!is.na(molecular_subtype_methyl),
+         Kids_First_Biospecimen_ID %in% hist_methyl$Kids_First_Biospecimen_ID) %>%
   # remove anything not PB and also the to be classified sample
   # update one with germline pathogenic variant (GPV) reported in Fiorca, et al 2024
-  mutate(molecular_subtype = case_when(Kids_First_Biospecimen_ID_methyl == "BS_SG2X2XQB" ~ paste0(molecular_subtype, " + DROSHA GPV"),
+  mutate(molecular_subtype = case_when(Kids_First_Biospecimen_ID == "BS_SG2X2XQB" ~ paste0(molecular_subtype, " + DROSHA GPV"),
          TRUE ~ molecular_subtype),
-         molecular_subtype_methyl = case_when(Kids_First_Biospecimen_ID_methyl == "BS_SG2X2XQB" ~ paste0(molecular_subtype_methyl, " + DROSHA GPV"),
+         molecular_subtype_methyl = case_when(Kids_First_Biospecimen_ID == "BS_SG2X2XQB" ~ paste0(molecular_subtype_methyl, " + DROSHA GPV"),
          TRUE ~ molecular_subtype_methyl))
 
 ```
 
 Get number of samples by MB SHH subtype
 
 ```{r}
-table(hist_methyl$molecular_subtype)
+table(subtypes$molecular_subtype)
 ```
 
 Load methylation data and filter for ids in `mb_shh_subtypes`
 
 ```{r load methyl}
 methyl <- readRDS(methyl_file)
-
-pineo_methyl <- methyl[,colnames(methyl) %in% c("Probe_ID", subtypes$Kids_First_Biospecimen_ID_methyl)]
+pineo_methyl <- methyl[,colnames(methyl) %in% c("Probe_ID", subtypes$Kids_First_Biospecimen_ID)]
 
 pineo_methyl <- pineo_methyl %>%
   distinct(Probe_ID, .keep_all = TRUE) %>%
@@ -96,7 +95,7 @@ set.seed(2024)
 umap_results <- umap::umap(t(pineo_methyl[var_probes, ]), n_neighbors = 5)
 
 umap_plot_df <- data.frame(umap_results$layout) %>%
-  tibble::rownames_to_column("Kids_First_Biospecimen_ID_methyl") %>%
+  tibble::rownames_to_column("Kids_First_Biospecimen_ID") %>%
   left_join(subtypes)
 ```
 

diff --git a/analyses/molecular-subtyping-PB/02-pineoblastoma-umap.html b/analyses/molecular-subtyping-PB/02-pineoblastoma-umap.html
diff --git a/analyses/molecular-subtyping-PB/plot/umap_pineo.pdf b/analyses/molecular-subtyping-PB/plot/umap_pineo.pdf