Skip to content

Commit

Permalink
Merge pull request #62 from FredHutch/handle_lm_issue
Browse files Browse the repository at this point in the history
add filtering
  • Loading branch information
cansavvy authored Sep 30, 2024
2 parents 588b580 + fa738ba commit 68c0f98
Show file tree
Hide file tree
Showing 9 changed files with 51 additions and 27 deletions.
7 changes: 7 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,15 @@ export(get_example_data)
export(gimap_annotate)
export(gimap_filter)
export(gimap_normalize)
export(qc_cdf)
export(qc_constructs_countzero_bar)
export(qc_cor_heatmap)
export(qc_plasmid_histogram)
export(qc_sample_hist)
export(qc_variance_hist)
export(run_qc)
export(setup_data)
export(supported_cell_lines)
import(dplyr)
import(ggplot2)
import(kableExtra)
Expand Down
7 changes: 1 addition & 6 deletions R/03-annotate.R
Original file line number Diff line number Diff line change
Expand Up @@ -136,12 +136,7 @@ gimap_annotate <- function(.data = NULL,
)
)

if (gimap_dataset$filtered_data$filter_step_run) {
keep_for_annotdf <- annotation_df$pgRNA_id %in% unlist(gimap_dataset$filtered_data$metadata_pg_ids)
annotation_df <- annotation_df[keep_for_annotdf, ]
}

################################ STORE IT ####################################
################################ STORE IT ####################################

if (gimap_dataset$filtered_data$filter_step_run) {
keep_for_annotdf <- annotation_df$pgRNA_id %in% unlist(gimap_dataset$filtered_data$metadata_pg_ids)
Expand Down
32 changes: 27 additions & 5 deletions R/04-normalize.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@
#' @param replicates Specifies the column name of the metadata set up in `$metadata$sample_metadata`
#' that has a factor that represents column that specifies replicates. These replicates will be kept separate
#' for the late but the early and plasmid others will be averaged and used for normalization.
#' @param num_ids_wo_annot default is 20; the number of pgRNA IDs to display to console if they don't have corresponding annotation data;
#' ff there are more IDs without annotation data than this number, the output will be sent to a file rather than the console.
#' @param rm_ids_wo_annot default is TRUE; whether or not to filter out pgRNA IDs from the input dataset that don't have corresponding annotation data available
#' @export
#' @examples \dontrun{
#'
Expand All @@ -31,7 +34,9 @@
gimap_normalize <- function(.data = NULL,
gimap_dataset,
timepoints = NULL,
replicates = NULL) {
replicates = NULL,
num_ids_wo_annot = 20,
rm_ids_wo_annot = TRUE) {
# Code adapted from
# https://github.com/FredHutch/GI_mapping/blob/main/workflow/scripts/03-filter_and_calculate_LFC.Rmd

Expand Down Expand Up @@ -83,6 +88,7 @@ gimap_normalize <- function(.data = NULL,
dataset <- gimap_dataset$transformed_data$log2_cpm
pg_ids <- gimap_dataset$metadata$pg_ids
}

# Doing some reshaping to get one handy data frame
lfc_df <- dataset %>%
as.data.frame() %>%
Expand All @@ -94,18 +100,34 @@ gimap_normalize <- function(.data = NULL,
tidyr::pivot_wider(values_from = "log2_cpm",
names_from = c(timepoints, replicates))



missing_ids <- data.frame(
missing_ids = setdiff(lfc_df$pg_ids, gimap_dataset$annotation$pgRNA_id)
)

if ((nrow(missing_ids) > 0) & (nrow(missing_ids) < num_ids_wo_annot)){
message("The following ", nrow(missing_ids), " IDs were not found in the annotation data: \n", paste0(missing_ids, collapse = ", "))
} else {
missing_ids_file <- file.path("missing_ids_file.csv")
readr::write_csv(missing_ids, missing_ids_file)
}

if ((nrow(missing_ids) > 0) & (rm_ids_wo_annot == TRUE)){
lfc_df <- lfc_df %>%
filter(!pg_ids %in% missing_ids)
message("The input data for the IDs which were not found in the annotation data has been filtered out and will not be included in the analysis output.")
} else{
message("The input data for the IDs which were not found in the annotation data will be kept throughout the analysis, but any data from the annotation won't be available for them.")
}

# Calculate the means for each construct across the groups
plasmid_df <- apply(dplyr::select(lfc_df, starts_with("plasmid")), 1, mean)


late_vs_plasmid_df <- lfc_df %>%
dplyr::mutate_at(dplyr::vars(dplyr::starts_with("late")), ~.x - plasmid_df) %>%
dplyr::select(pg_ids, dplyr::starts_with("late")) %>%
dplyr::left_join(gimap_dataset$annotation, by = c("pg_ids" = "pgRNA_id"))


########################### Perform adjustments #############################

### Calculate medians
Expand All @@ -128,7 +150,7 @@ gimap_normalize <- function(.data = NULL,
# Then, divide by the median of negative controls (double non-targeting) minus
# median of positive controls (targeting 1 essential gene).
# This will effectively set the median of the positive controls (essential genes) to -1.
lfc_adj = lfc_adj1 / (median(lfc_adj1[norm_ctrl_flag == "negative_control"]) - median(lfc_adj1[norm_ctrl_flag == "positive_control"]))
lfc_adj = lfc_adj1 / (median(lfc_adj1[norm_ctrl_flag == "negative_control"], na.rm = TRUE) - median(lfc_adj1[norm_ctrl_flag == "positive_control"]))
) %>%
ungroup()

Expand Down
2 changes: 1 addition & 1 deletion R/06-calculate_gi.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ calc_gi <- function(.data = NULL,

if (!("gimap_dataset" %in% class(gimap_dataset))) stop("This function only works with gimap_dataset objects which can be made with the setup_data() function.")


## calculate expected double-targeting GI score by summing the two mean single-targeting
## CRISPR scores for that paralog pair
gi_calc_df <- gimap_dataset$crispr_score %>%
Expand All @@ -39,6 +38,7 @@ calc_gi <- function(.data = NULL,
target_type == "gene_ctrl" ~ mean_single_target_crispr_1 + mean_double_control_crispr_2,
target_type == "ctrl_gene" ~ mean_double_control_crispr_1 + mean_single_target_crispr_2
))

# Calculating mean crisprs
overall_results <- gi_calc_df %>%
dplyr::group_by(rep, pgRNA_target_double) %>%
Expand Down
2 changes: 1 addition & 1 deletion man/calc_crispr.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/calc_gi.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions man/gimap_annotate.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 9 additions & 2 deletions man/gimap_normalize.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 1 addition & 8 deletions man/setup_data.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 68c0f98

Please sign in to comment.