diff --git a/.github/workflows/check-bioc.yml b/.github/workflows/check-bioc.yml index b6cd16c..f04c9c1 100644 --- a/.github/workflows/check-bioc.yml +++ b/.github/workflows/check-bioc.yml @@ -20,9 +20,7 @@ ## * GHA: GitHub Action ## * OS: operating system -on: - push: - pull_request: +on: workflow_dispatch name: R-CMD-check-bioc @@ -56,6 +54,7 @@ jobs: - { os: windows-latest, r: '4.1', bioc: '3.13'} env: R_REMOTES_NO_ERRORS_FROM_WARNINGS: true + RSPM_ROOT: https://packagemanager.posit.co. RSPM: ${{ matrix.config.rspm }} NOT_CRAN: true TZ: UTC @@ -76,19 +75,19 @@ jobs: ## https://github.com/r-lib/actions/blob/master/examples/check-standard.yaml ## If they update their steps, we will also need to update ours. - name: Checkout Repository - uses: actions/checkout@v2 + uses: actions/checkout@v3 ## R is already included in the Bioconductor docker images - name: Setup R from r-lib if: runner.os != 'Linux' - uses: r-lib/actions/setup-r@master + uses: r-lib/actions/setup-r@v2 with: r-version: ${{ matrix.config.r }} ## pandoc is already included in the Bioconductor docker images - name: Setup pandoc from r-lib if: runner.os != 'Linux' - uses: r-lib/actions/setup-pandoc@master + uses: r-lib/actions/setup-pandoc@v2 - name: Query dependencies run: | @@ -96,21 +95,21 @@ jobs: saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) shell: Rscript {0} - - name: Cache R packages + - name: Restore R package cache if: "!contains(github.event.head_commit.message, '/nocache') && runner.os != 'Linux'" - uses: actions/cache@v2 + uses: actions/cache@v3 with: path: ${{ env.R_LIBS_USER }} - key: ${{ env.cache-version }}-${{ runner.os }}-biocversion-RELEASE_3_12-r-4.0-${{ hashFiles('.github/depends.Rds') }} - restore-keys: ${{ env.cache-version }}-${{ runner.os }}-biocversion-RELEASE_3_12-r-4.0- + key: ${{ env.cache-version }}-${{ runner.os }}-biocversion-RELEASE_3_17-r-4.3-${{ hashFiles('.github/depends.Rds') }} + restore-keys: ${{ env.cache-version }}-${{ runner.os }}-biocversion-RELEASE_3_17-r-4.3- - name: Cache R packages on Linux if: "!contains(github.event.head_commit.message, '/nocache') && runner.os == 'Linux' " - uses: actions/cache@v2 + uses: actions/cache@v3 with: path: /home/runner/work/_temp/Library - key: ${{ env.cache-version }}-${{ runner.os }}-biocversion-RELEASE_3_12-r-4.0-${{ hashFiles('.github/depends.Rds') }} - restore-keys: ${{ env.cache-version }}-${{ runner.os }}-biocversion-RELEASE_3_12-r-4.0- + key: ${{ env.cache-version }}-${{ runner.os }}-biocversion-RELEASE_3_17-r-4.3-${{ hashFiles('.github/depends.Rds') }} + restore-keys: ${{ env.cache-version }}-${{ runner.os }}-biocversion-RELEASE_3_17-r-4.3- - name: Install Linux system dependencies if: runner.os == 'Linux' @@ -126,6 +125,7 @@ jobs: sysreqs=$(Rscript -e 'cat("apt-get update -y && apt-get install -y", paste(gsub("apt-get install -y ", "", remotes::system_requirements("ubuntu", "20.04")), collapse = " "))') echo $sysreqs sudo -s eval "$sysreqs" + - name: Install macOS system dependencies if: matrix.config.os == 'macOS-latest' run: | @@ -160,7 +160,7 @@ jobs: - name: Set BiocVersion run: | - BiocManager::install(version = "${{ matrix.config.bioc }}", ask = FALSE) + BiocManager::install(version = "${{ matrix.config.bioc }}", ask = FALSE, update = FALSE) shell: Rscript {0} - name: Install dependencies pass 1 @@ -178,6 +178,7 @@ jobs: BiocManager::install("ComplexHeatmap", update = FALSE) remotes::install_cran("circlize") remotes::install_cran("tidyverse") + remotes::install_cran("DiagrammeR", type = ifelse("${{ runner.os }}" == "Windows", "win.binary", getOption("pkgType"))) remotes::install_local(dependencies = TRUE, repos = BiocManager::repositories(), build_vignettes = FALSE, upgrade = TRUE) continue-on-error: true shell: Rscript {0} diff --git a/DESCRIPTION b/DESCRIPTION index 68b27c4..2e54515 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,16 +1,16 @@ Package: nichenetr Type: Package Title: NicheNet: Modeling Intercellular Communication by Linking Ligands to Target Genes -Version: 1.1.1 -Authors@R: person("Robin", "Browaeys", email = "robin.browaeys@ugent.be", - role = c("aut", "cre")) +Version: 2.0.0 +Authors@R: c(person("Robin", "Browaeys", role = c("aut")), + person("Chananchida", "Sang-aram", role = c("aut", "cre"), email = "chananchida.sangaram@ugent.be")) Description: This package allows you the investigate intercellular communication from a computational perspective. More specifically, it allows to investigate how interacting cells influence each other's gene expression. Functionalities of this package (e.g. including predicting extracellular upstream regulators and their affected target genes) build upon a probabilistic model of ligand-target links that was inferred by data-integration. License: GPL-3 Encoding: UTF-8 LazyData: true URL: https://github.com/saeyslab/nichenetr BugReports: https://github.com/saeyslab/nichenetr/issues -RoxygenNote: 7.1.1 +RoxygenNote: 7.1.2 Depends: R (>= 3.0.0) Imports: dplyr, @@ -41,11 +41,16 @@ Imports: magrittr, circlize, ComplexHeatmap, - grDevices -Suggests: knitr, + grDevices, + ggnewscale, + ggforce, + shadowtext +Suggests: + knitr, rmarkdown, testthat, doMC, + mco, parallel, covr, tidyverse diff --git a/NAMESPACE b/NAMESPACE index ee9d40c..4c815ef 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -7,6 +7,7 @@ export(alias_to_symbol_seurat) export(apply_hub_corrections) export(assess_influence_source) export(assess_rf_class_probabilities) +export(calculate_de) export(calculate_fraction_top_predicted) export(calculate_fraction_top_predicted_fisher) export(calculate_niche_de) @@ -53,12 +54,14 @@ export(extract_top_fraction_ligands) export(extract_top_fraction_targets) export(extract_top_n_ligands) export(extract_top_n_targets) +export(generate_prioritization_tables) export(get_active_ligand_receptor_network) export(get_active_ligand_target_df) export(get_active_ligand_target_matrix) export(get_active_regulatory_network) export(get_active_signaling_network) export(get_expressed_genes) +export(get_exprs_avg) export(get_lfc_celltype) export(get_ligand_activities_targets) export(get_ligand_signaling_path) @@ -88,6 +91,7 @@ export(make_heatmap_ggplot) export(make_ligand_activity_target_exprs_plot) export(make_ligand_receptor_lfc_plot) export(make_ligand_receptor_lfc_spatial_plot) +export(make_mushroom_plot) export(make_threecolor_heatmap_ggplot) export(mlrmbo_optimization) export(model_based_ligand_activity_prediction) @@ -115,6 +119,7 @@ export(process_mlrmbo_nichenet_optimization) export(process_niche_de) export(process_receiver_target_de) export(process_spatial_de) +export(process_table_to_ic) export(randomize_complete_network_source_specific) export(randomize_datasource_network) export(randomize_network) @@ -128,10 +133,14 @@ export(wrapper_average_performances) export(wrapper_evaluate_single_importances_ligand_prediction) import(Seurat) import(caret) +import(cowplot) import(dplyr) import(e1071) +import(ggforce) +import(ggnewscale) import(ggplot2) import(readr) +import(shadowtext) import(tibble) import(tidyr) importFrom(ComplexHeatmap,Legend) diff --git a/R/application_prediction.R b/R/application_prediction.R index a935648..401a66c 100644 --- a/R/application_prediction.R +++ b/R/application_prediction.R @@ -1,2220 +1,2024 @@ -#' @title Convert cluster assignment to settings format suitable for target gene prediction. -#' -#' @description \code{convert_cluster_to_settings} Convert cluster assignment to settings format suitable for target gene prediction. -#' -#' @usage -#' convert_cluster_to_settings(i, cluster_vector, setting_name, setting_from, background = NULL) -#' -#' @param i The cluster number of the cluster of interest to which genes should belong -#' @param cluster_vector Named vector containing the cluster number to which every gene belongs -#' @param setting_name Base name of the setting -#' @param setting_from Active ligands for the specific setting -#' @param background NULL or a character vector of genes belonging to the background. When NULL: the background will be formed by genes belonging to other clusters that the cluster of interest. Default NULL. If not NULL and genes present in the cluster of interest are in this vector of background gene names, these genes will be removed from the background. -#' -#' @return A list with following elements: $name (indicating the cluster id), $from, $response. $response is a gene-named logical vector indicating whether the gene is part of the respective cluster. -#' -#' @examples -#' \dontrun{ -#' genes_clusters = c("TGFB1" = 1,"TGFB2" = 1,"TGFB3" = 2) -#' cluster_settings = lapply(seq(length(unique(genes_clusters))), convert_cluster_to_settings, cluster_vector = genes_clusters, setting_name = "example", setting_from = "BMP2") -#' } -#' -#' @export -#' -convert_cluster_to_settings = function(i, cluster_vector, setting_name, setting_from, background = NULL){ - - # input check - if(!is.numeric(i) | length(i) != 1 | i <= 0) - stop("i should be a number higher than 0") - if(!is.numeric(cluster_vector) | is.null(names(cluster_vector))) - stop("cluster_vector should be a named numeric vector") - if(!is.character(setting_name)) - stop("setting_name should be a character vector") - if(!is.character(setting_from)) - stop("setting_from should be a character vector") - if(!is.character(background) & !is.null(background)) - stop("background should be a character vector or NULL") - - requireNamespace("dplyr") - - - genes_cluster_oi = cluster_vector[cluster_vector == i] %>% names() - - if (is.null(background)){ - response = names(cluster_vector) %in% genes_cluster_oi - names(response) = names(cluster_vector) - } else { - background = background[(background %in% genes_cluster_oi) == FALSE] - background_logical = rep(FALSE,times = length(background)) - names(background_logical) = background - cluster_logical = rep(TRUE,times = length(genes_cluster_oi)) - names(cluster_logical) = genes_cluster_oi - response = c(background_logical,cluster_logical) - } - return(list(name = paste0(setting_name,"_cluster_",i), from = setting_from, response = response)) -} -#' @title Predict activities of ligands in regulating expression of a gene set of interest -#' -#' @description \code{predict_ligand_activities} Predict activities of ligands in regulating expression of a gene set of interest. Ligand activities are defined as how well they predict the observed transcriptional response (i.e. gene set) according to the NicheNet model. -#' -#' @usage -#' predict_ligand_activities(geneset, background_expressed_genes,ligand_target_matrix, potential_ligands, single = TRUE,...) -#' -#' @param geneset Character vector of the gene symbols of genes of which the expression is potentially affected by ligands from the interacting cell. -#' @param background_expressed_genes Character vector of gene symbols of the background, non-affected, genes (can contain the symbols of the affected genes as well). -#' @param ligand_target_matrix The NicheNet ligand-target matrix denoting regulatory potential scores between ligands and targets (ligands in columns). -#' @param potential_ligands Character vector giving the gene symbols of the potentially active ligands you want to define ligand activities for. -#' @param single TRUE if you want to calculate ligand activity scores by considering every ligand individually (recommended). FALSE if you want to calculate ligand activity scores as variable importances of a multi-ligand classification model. -#' @param ... Additional parameters for get_multi_ligand_importances if single = FALSE. -#' -#' @return A tibble giving several ligand activity scores. Following columns in the tibble: $test_ligand, $auroc, $aupr and $pearson. -#' -#' @examples -#' \dontrun{ -#' weighted_networks = construct_weighted_networks(lr_network, sig_network, gr_network,source_weights_df) -#' ligands = list("TNF","BMP2","IL4") -#' ligand_target_matrix = construct_ligand_target_matrix(weighted_networks, ligands, ltf_cutoff = 0, algorithm = "PPR", damping_factor = 0.5, secondary_targets = FALSE) -#' potential_ligands = c("TNF","BMP2","IL4") -#' geneset = c("SOCS2","SOCS3", "IRF1") -#' background_expressed_genes = c("SOCS2","SOCS3","IRF1","ICAM1","ID1","ID2","ID3") -#' ligand_activities = predict_ligand_activities(geneset = geneset, background_expressed_genes = background_expressed_genes, ligand_target_matrix = ligand_target_matrix, potential_ligands = potential_ligands) -#' } -#' -#' @export -#' -predict_ligand_activities = function(geneset,background_expressed_genes,ligand_target_matrix, potential_ligands, single = TRUE,...){ - setting = list(geneset) %>% - lapply(convert_gene_list_settings_evaluation, name = "gene set", ligands_oi = potential_ligands, background = background_expressed_genes) - if (single == TRUE){ - settings_ligand_prediction = setting %>% - convert_settings_ligand_prediction(all_ligands = potential_ligands, validation = FALSE, single = TRUE) - ligand_importances = settings_ligand_prediction %>% lapply(get_single_ligand_importances,ligand_target_matrix = ligand_target_matrix, known = FALSE) %>% bind_rows() - - } else { - settings_ligand_prediction = setting %>% - convert_settings_ligand_prediction(all_ligands = potential_ligands, validation = FALSE, single = FALSE) - ligand_importances = settings_ligand_prediction %>% lapply(get_multi_ligand_importances,ligand_target_matrix = ligand_target_matrix, known = FALSE, ...) %>% bind_rows() - - } - return(ligand_importances %>% select(test_ligand,auroc,aupr,aupr_corrected, pearson)) -} -#' @title Infer weighted active ligand-target links between a possible ligand and target genes of interest -#' -#' @description \code{get_weighted_ligand_target_links} Infer active ligand target links between possible lignands and genes belonging to a gene set of interest: consider the intersect between the top n targets of a ligand and the gene set. -#' -#' @usage -#' get_weighted_ligand_target_links(ligand, geneset,ligand_target_matrix,n = 250) -#' -#' @param geneset Character vector of the gene symbols of genes of which the expression is potentially affected by ligands from the interacting cell. -#' @param ligand Character vector giving the gene symbols of the potentially active ligand for which you want to find target genes. -#' @param n The top n of targets per ligand that will be considered. Default: 250. -#' @inheritParams predict_ligand_activities -#' -#' @return A tibble with columns ligand, target and weight (i.e. regulatory potential score). -#' -#' @examples -#' \dontrun{ -#' weighted_networks = construct_weighted_networks(lr_network, sig_network, gr_network,source_weights_df) -#' ligands = list("TNF","BMP2","IL4") -#' ligand_target_matrix = construct_ligand_target_matrix(weighted_networks, ligands, ltf_cutoff = 0, algorithm = "PPR", damping_factor = 0.5, secondary_targets = FALSE) -#' potential_ligand = "TNF" -#' geneset = c("SOCS2","SOCS3", "IRF1") -#' active_ligand_target_links_df = get_weighted_ligand_target_links(ligand = potential_ligand, geneset = geneset, ligand_target_matrix = ligand_target_matrix, n = 250) -#' } -#' -#' @export -#' -get_weighted_ligand_target_links = function(ligand, geneset,ligand_target_matrix,n = 250){ - top_n_score = ligand_target_matrix[,ligand] %>% sort(decreasing = T) %>% head(n) %>% min() - targets = intersect(ligand_target_matrix[,ligand] %>% .[. >= top_n_score ] %>% names(),geneset) - if (length(targets) == 0){ - ligand_target_weighted_df = tibble(ligand = ligand, target = NA, weight = NA) - } else if (length(targets) == 1) { - ligand_target_weighted_df = tibble(ligand = ligand, target = targets, weight = ligand_target_matrix[targets,ligand]) - } else { - ligand_target_weighted_df = tibble(ligand = ligand, target = names(ligand_target_matrix[targets,ligand])) %>% inner_join(tibble(target = names(ligand_target_matrix[targets,ligand]), weight = ligand_target_matrix[targets,ligand]), by = "target") - } - return(ligand_target_weighted_df) -} -#' @title Prepare heatmap visualization of the ligand-target links starting from a ligand-target tibble. -#' -#' @description \code{prepare_ligand_target_visualization} Prepare heatmap visualization of the ligand-target links starting from a ligand-target tibble. Get regulatory potential scores between all pairs of ligands and targets documented in this tibble. For better visualization, we propose to define a quantile cutoff on the ligand-target scores. -#' -#' @usage -#' prepare_ligand_target_visualization(ligand_target_df, ligand_target_matrix, cutoff = 0.25) -#' -#' @param cutoff Quantile cutoff on the ligand-target scores of the input weighted ligand-target network. Scores under this cutoff will be set to 0. -#' @param ligand_target_df Tibble with columns 'ligand', 'target' and 'weight' to indicate ligand-target regulatory potential scores of interest. -#' @inheritParams predict_ligand_activities -#' -#' @return A matrix giving the ligand-target regulatory potential scores between ligands of interest and their targets genes part of the gene set of interest. -#' -#' @examples -#' \dontrun{ -#' weighted_networks = construct_weighted_networks(lr_network, sig_network, gr_network,source_weights_df) -#' ligands = list("TNF","BMP2","IL4") -#' ligand_target_matrix = construct_ligand_target_matrix(weighted_networks, ligands, ltf_cutoff = 0, algorithm = "PPR", damping_factor = 0.5, secondary_targets = FALSE) -#' geneset = c("SOCS2","SOCS3", "IRF1") -#' background_expressed_genes = c("SOCS2","SOCS3","IRF1","ICAM1","ID1","ID2","ID3") -#' active_ligand_target_links_df = potential_ligands %>% lapply(get_weighted_ligand_target_links, geneset = geneset, ligand_target_matrix = ligand_target_matrix, n = 250) %>% bind_rows() -#' active_ligand_target_links = prepare_ligand_target_visualization(ligand_target_df = active_ligand_target_links_df, ligand_target_matrix = ligand_target_matrix, cutoff = 0.25) -#' } -#' -#' @export -#' -prepare_ligand_target_visualization = function(ligand_target_df, ligand_target_matrix, cutoff = 0.25){ - - # define a cutoff on the ligand-target links - cutoff_include_all_ligands = ligand_target_df$weight %>% quantile(cutoff) - - # give a score of 0 to ligand-target links not higher than the defined cutoff - ligand_target_matrix_oi = ligand_target_matrix - ligand_target_matrix_oi[ligand_target_matrix_oi < cutoff_include_all_ligands] = 0 - - # consider only targets belonging to the top250 targets of individual ligands and with at least one ligand-link with score higher than the defined cutoff - ligand_target_vis = ligand_target_matrix_oi[ligand_target_df$target %>% unique(),ligand_target_df$ligand %>% unique()] - dim(ligand_target_vis) = c(length(ligand_target_df$target %>% unique()), length(ligand_target_df$ligand %>% unique())) - all_targets = ligand_target_df$target %>% unique() - all_ligands = ligand_target_df$ligand %>% unique() - rownames(ligand_target_vis) = all_targets - colnames(ligand_target_vis) = all_ligands - - keep_targets = all_targets[ligand_target_vis %>% apply(1,sum) > 0] - keep_ligands = all_ligands[ligand_target_vis %>% apply(2,sum) > 0] - - - ligand_target_vis_filtered = ligand_target_vis[keep_targets,keep_ligands] - - - if(is.matrix(ligand_target_vis_filtered)){ - rownames(ligand_target_vis_filtered) = keep_targets - colnames(ligand_target_vis_filtered) = keep_ligands - - } else { - dim(ligand_target_vis_filtered) = c(length(keep_targets), length(keep_ligands)) - rownames(ligand_target_vis_filtered) = keep_targets - colnames(ligand_target_vis_filtered) = keep_ligands - } - - if(nrow(ligand_target_vis_filtered) > 1 & ncol(ligand_target_vis_filtered) > 1){ - distoi = dist(1-cor(t(ligand_target_vis_filtered))) - hclust_obj = hclust(distoi, method = "ward.D2") - order_targets = hclust_obj$labels[hclust_obj$order] - - distoi_targets = dist(1-cor(ligand_target_vis_filtered)) - hclust_obj = hclust(distoi_targets, method = "ward.D2") - order_ligands = hclust_obj$labels[hclust_obj$order] - - } else { - order_targets = rownames(ligand_target_vis_filtered) - order_ligands = colnames(ligand_target_vis_filtered) - } - - vis_ligand_target_network = ligand_target_vis_filtered[order_targets,order_ligands] - dim(vis_ligand_target_network) = c(length(order_targets), length(order_ligands)) - rownames(vis_ligand_target_network) = order_targets - colnames(vis_ligand_target_network) = order_ligands - return(vis_ligand_target_network) - -} -#' @title Assess probability that a target gene belongs to the geneset based on a multi-ligand random forest model -#' -#' @description \code{assess_rf_class_probabilities} Assess probability that a target gene belongs to the geneset based on a multi-ligand random forest model (with cross-validation). Target genes and background genes will be split in different groups in a stratified way. -#' -#' @usage -#' assess_rf_class_probabilities(round,folds,geneset,background_expressed_genes,ligands_oi,ligand_target_matrix) -#' -#' @param ligands_oi Character vector giving the gene symbols of the ligands you want to build the multi-ligand with. -#' @param round Integer describing which fold of the cross-validation scheme it is. -#' @param folds Integer describing how many folds should be used. -#' @inheritParams predict_ligand_activities -#' -#' @return A tibble with columns: $gene, $response, $prediction. Response indicates whether the gene belongs to the geneset of interest, prediction gives the probability this gene belongs to the geneset according to the random forest model. -#' -#' @examples -#' \dontrun{ -#' weighted_networks = construct_weighted_networks(lr_network, sig_network, gr_network,source_weights_df) -#' ligands = list("TNF","BMP2","IL4") -#' ligand_target_matrix = construct_ligand_target_matrix(weighted_networks, ligands, ltf_cutoff = 0, algorithm = "PPR", damping_factor = 0.5, secondary_targets = FALSE) -#' potential_ligands = c("TNF","BMP2","IL4") -#' geneset = c("SOCS2","SOCS3", "IRF1") -#' background_expressed_genes = c("SOCS2","SOCS3","IRF1","ICAM1","ID1","ID2","ID3") -#' fold1_rf_prob = assess_rf_class_probabilities(round = 1,folds = 2,geneset = geneset,background_expressed_genes = background_expressed_genes ,ligands_oi = potential_ligands,ligand_target_matrix = ligand_target_matrix) -#' } -#' -#' @export -#' -assess_rf_class_probabilities = function(round,folds,geneset,background_expressed_genes,ligands_oi, ligand_target_matrix){ - set.seed(round) - geneset_shuffled = sample(geneset, size = length(geneset)) - geneset_grouped = split(geneset_shuffled,1:folds) - - strict_background_expressed_genes = background_expressed_genes[!background_expressed_genes %in% geneset] - set.seed(round) - strict_background_expressed_genes_shuffled = sample(strict_background_expressed_genes, size = length(strict_background_expressed_genes)) - strict_background_expressed_genes_grouped = split(strict_background_expressed_genes_shuffled,1:folds) - - geneset_predictions_all = seq(length(geneset_grouped)) %>% lapply(rf_target_prediction,geneset_grouped,strict_background_expressed_genes_grouped,ligands_oi,ligand_target_matrix) %>% bind_rows() - geneset_predictions_all = geneset_predictions_all %>% mutate(response = gsub("\\.","",response) %>% as.logical()) -} -#' @title Assess how well classification predictions accord to the expected response -#' -#' @description \code{classification_evaluation_continuous_pred_wrapper} Assess how well classification predictions accord to the expected response. -#' -#' @usage -#' classification_evaluation_continuous_pred_wrapper(response_prediction_tibble) -#' -#' @param response_prediction_tibble Tibble with columns "response" and "prediction" (e.g. output of function `assess_rf_class_probabilities`) -#' -#' @return A tibble showing several classification evaluation metrics. -#' -#' @examples -#' \dontrun{ -#' weighted_networks = construct_weighted_networks(lr_network, sig_network, gr_network,source_weights_df) -#' ligands = list("TNF","BMP2","IL4") -#' ligand_target_matrix = construct_ligand_target_matrix(weighted_networks, ligands, ltf_cutoff = 0, algorithm = "PPR", damping_factor = 0.5, secondary_targets = FALSE) -#' potential_ligands = c("TNF","BMP2","IL4") -#' geneset = c("SOCS2","SOCS3", "IRF1") -#' background_expressed_genes = c("SOCS2","SOCS3","IRF1","ICAM1","ID1","ID2","ID3") -#' fold1_rf_prob = assess_rf_class_probabilities(round = 1,folds = 2,geneset = geneset,background_expressed_genes = background_expressed_genes ,ligands_oi = potential_ligands,ligand_target_matrix = ligand_target_matrix) -# classification_evaluation_continuous_pred_wrapper(fold1_rf_prob) -#' } -#' -#' @export -#' -classification_evaluation_continuous_pred_wrapper = function(response_prediction_tibble) { - prediction_performances = classification_evaluation_continuous_pred(response_prediction_tibble$prediction, response_prediction_tibble$response, iregulon = FALSE) - return(prediction_performances) -} -#' @title Find which genes were among the top-predicted targets genes in a specific cross-validation round and see whether these genes belong to the gene set of interest as well. -#' -#' @description \code{get_top_predicted_genes} Find which genes were among the top-predicted targets genes in a specific cross-validation round and see whether these genes belong to the gene set of interest as well. -#' -#' @usage -#' get_top_predicted_genes(round,gene_prediction_list, quantile_cutoff = 0.95) -#' -#' @param gene_prediction_list List with per round of cross-validation: a tibble with columns "gene", "prediction" and "response" (e.g. output of function `assess_rf_class_probabilities`) -#' @param round Integer describing which fold of the cross-validation scheme it is. -#' @param quantile_cutoff Quantile of which genes should be considered as top-predicted targets. Default: 0.95, thus considering the top 5 percent predicted genes as predicted targets. -#' -#' @return A tibble indicating for every gene whether it belongs to the geneset and whether it belongs to the top-predicted genes in a specific cross-validation round. -#' -#' @examples -#' \dontrun{ -#' weighted_networks = construct_weighted_networks(lr_network, sig_network, gr_network,source_weights_df) -#' ligands = list("TNF","BMP2","IL4") -#' ligand_target_matrix = construct_ligand_target_matrix(weighted_networks, ligands, ltf_cutoff = 0, algorithm = "PPR", damping_factor = 0.5, secondary_targets = FALSE) -#' potential_ligands = c("TNF","BMP2","IL4") -#' geneset = c("SOCS2","SOCS3", "IRF1") -#' background_expressed_genes = c("SOCS2","SOCS3","IRF1","ICAM1","ID1","ID2","ID3") -#' gene_predictions_list = seq(2) %>% lapply(assess_rf_class_probabilities,2, geneset = geneset,background_expressed_genes = background_expressed_genes,ligands_oi = potential_ligands,ligand_target_matrix = ligand_target_matrix) -#' seq(length(gene_predictions_list)) %>% lapply(get_top_predicted_genes,gene_predictions_list) -#' } -#' -#' @export -#' -get_top_predicted_genes = function(round,gene_prediction_list, quantile_cutoff = 0.95){ - affected_gene_predictions = gene_prediction_list[[round]] - predicted_positive = affected_gene_predictions %>% - arrange(-prediction) %>% - mutate(predicted_top_target = prediction >= quantile(prediction,quantile_cutoff)) %>% - filter(predicted_top_target) %>% rename(true_target = response) %>% - select(gene,true_target,predicted_top_target) - colnames(predicted_positive) = c("gene","true_target",paste0("predicted_top_target_round",round)) - return(predicted_positive) -} -#' @title Determine the fraction of genes belonging to the geneset or background and to the top-predicted genes. -#' -#' @description \code{calculate_fraction_top_predicted} Defines the fraction of genes belonging to the geneset or background and to the top-predicted genes. -#' -#' @usage -#' calculate_fraction_top_predicted(affected_gene_predictions, quantile_cutoff = 0.95) -#' -#' @param affected_gene_predictions Tibble with columns "gene", "prediction" and "response" (e.g. output of function `assess_rf_class_probabilities`) -#' @param quantile_cutoff Quantile of which genes should be considered as top-predicted targets. Default: 0.95, thus considering the top 5 percent predicted genes as predicted targets. -#' -#' @return A tibble indicating the number of genes belonging to the gene set of interest or background (true_target column), the number and fraction of genes of these gruops that were part of the top predicted targets in a specific cross-validation round. -#' -#' @examples -#' \dontrun{ -#' weighted_networks = construct_weighted_networks(lr_network, sig_network, gr_network,source_weights_df) -#' ligands = list("TNF","BMP2","IL4") -#' ligand_target_matrix = construct_ligand_target_matrix(weighted_networks, ligands, ltf_cutoff = 0, algorithm = "PPR", damping_factor = 0.5, secondary_targets = FALSE) -#' potential_ligands = c("TNF","BMP2","IL4") -#' geneset = c("SOCS2","SOCS3", "IRF1") -#' background_expressed_genes = c("SOCS2","SOCS3","IRF1","ICAM1","ID1","ID2","ID3") -#' gene_predictions_list = seq(2) %>% lapply(assess_rf_class_probabilities,2, geneset = geneset,background_expressed_genes = background_expressed_genes,ligands_oi = potential_ligands,ligand_target_matrix = ligand_target_matrix) -#' target_prediction_performances_discrete_cv = gene_predictions_list %>% lapply(calculate_fraction_top_predicted) %>% bind_rows() %>% ungroup() %>% mutate(round=rep(1:length(gene_predictions_list), each = 2)) - -#' } -#' -#' @export -#' -calculate_fraction_top_predicted = function(affected_gene_predictions, quantile_cutoff = 0.95){ - predicted_positive = affected_gene_predictions %>% arrange(-prediction) %>% filter(prediction >= quantile(prediction,quantile_cutoff)) %>% group_by(response) %>% count() %>% rename(positive_prediction = n) %>% rename(true_target = response) - all = affected_gene_predictions %>% arrange(-prediction) %>% rename(true_target = response) %>% group_by(true_target) %>% count() - inner_join(all,predicted_positive, by = "true_target") %>% mutate(fraction_positive_predicted = positive_prediction/n) -} -#' @title Perform a Fisher's exact test to determine whether genes belonging to the gene set of interest are more likely to be part of the top-predicted targets. -#' -#' @description \code{calculate_fraction_top_predicted_fisher} Performs a Fisher's exact test to determine whether genes belonging to the gene set of interest are more likely to be part of the top-predicted targets. -#' -#' @usage -#' calculate_fraction_top_predicted_fisher(affected_gene_predictions, quantile_cutoff = 0.95, p_value_output = TRUE) -#' -#' @param p_value_output Should total summary or p-value be returned as output? Default: TRUE. -#' @inheritParams calculate_fraction_top_predicted -#' -#' @return Summary of the Fisher's exact test or just the p-value -#' -#' @examples -#' \dontrun{ -#' weighted_networks = construct_weighted_networks(lr_network, sig_network, gr_network,source_weights_df) -#' ligands = list("TNF","BMP2","IL4") -#' ligand_target_matrix = construct_ligand_target_matrix(weighted_networks, ligands, ltf_cutoff = 0, algorithm = "PPR", damping_factor = 0.5, secondary_targets = FALSE) -#' potential_ligands = c("TNF","BMP2","IL4") -#' geneset = c("SOCS2","SOCS3", "IRF1") -#' background_expressed_genes = c("SOCS2","SOCS3","IRF1","ICAM1","ID1","ID2","ID3") -#' gene_predictions_list = seq(2) %>% lapply(assess_rf_class_probabilities,2, geneset = geneset,background_expressed_genes = background_expressed_genes,ligands_oi = potential_ligands,ligand_target_matrix = ligand_target_matrix) -#' target_prediction_performances_fisher_pval = gene_predictions_list %>% lapply(calculate_fraction_top_predicted_fisher) %>% unlist() %>% mean() -#' } -#' -#' @export -#' -calculate_fraction_top_predicted_fisher = function(affected_gene_predictions, quantile_cutoff = 0.95, p_value_output = TRUE){ - predicted_positive = affected_gene_predictions %>% arrange(-prediction) %>% filter(prediction >= quantile(prediction,quantile_cutoff)) %>% group_by(response) %>% count() %>% rename(positive_prediction = n) - all = affected_gene_predictions %>% arrange(-prediction) %>% group_by(response) %>% count() - results_df = inner_join(all,predicted_positive, by = "response") %>% mutate(fraction_positive_predicted = positive_prediction/n) - tp = results_df %>% filter(response == TRUE) %>% .$positive_prediction - fp = results_df %>% filter(response == FALSE) %>% .$positive_prediction - fn = (results_df %>% filter(response == TRUE) %>% .$n) - (results_df %>% filter(response == TRUE) %>% .$positive_prediction) - tn = (results_df %>% filter(response == FALSE) %>% .$n) - (results_df %>% filter(response == FALSE) %>% .$positive_prediction) - contingency_table = matrix(c(tp,fp,fn,tn), nrow = 2,dimnames = list(c("geneset", "background"), c("top-predicted", "no-top-predicted"))) - summary = fisher.test(contingency_table, alternative = "greater") - if(p_value_output == TRUE){ - return(summary$p.value) - } else { - return(summary) - } -} -#' @title Cut off outer quantiles and rescale to a [0, 1] range -#' -#' @description \code{scale_quantile} Cut off outer quantiles and rescale to a [0, 1] range -#' -#' @usage -#' scale_quantile(x, outlier_cutoff = .05) -#' -#' @param x A numeric vector, matrix or data frame. -#' @param outlier_cutoff The quantile cutoff for outliers (default 0.05). -#' -#' @return The centered, scaled matrix or vector. The numeric centering and scalings used are returned as attributes. -#' -#' @examples -#' \dontrun{ -#' ## Generate a matrix from a normal distribution -#' ## with a large standard deviation, centered at c(5, 5) -#' x <- matrix(rnorm(200*2, sd = 10, mean = 5), ncol = 2) -#' -#' ## Scale the dataset between [0,1] -#' x_scaled <- scale_quantile(x) -#' -#' ## Show ranges of each column -#' apply(x_scaled, 2, range) -#' } -#' @export -scale_quantile <- function(x, outlier_cutoff = .05) { - # same function as scale_quantile from dynutils (copied here for use in vignette to avoid having dynutils as dependency) - # credits to the amazing (w/z)outer and r(obrecht)cannood(t) from dynverse (https://github.com/dynverse)! - if (is.null(dim(x))) { - sc <- scale_quantile(matrix(x, ncol = 1), outlier_cutoff = outlier_cutoff) - out <- sc[,1] - names(out) <- names(x) - attr(out, "addend") <- attr(sc, "addend") - attr(out, "multiplier") <- attr(sc, "multiplier") - out - } else { - quants <- apply(x, 2, stats::quantile, c(outlier_cutoff, 1 - outlier_cutoff), na.rm = TRUE) - - addend <- -quants[1,] - divisor <- apply(quants, 2, diff) - divisor[divisor == 0] <- 1 - - apply_quantile_scale(x, addend, 1 / divisor) - } -} -#' @title Prepare single-cell expression data to perform ligand activity analysis -#' -#' @description \code{convert_single_cell_expression_to_settings} Prepare single-cell expression data to perform ligand activity analysis -#' -#' @usage -#' convert_single_cell_expression_to_settings(cell_id, expression_matrix, setting_name, setting_from, regression = FALSE) -#' -#' @param cell_id Identity of the cell of interest -#' @param setting_name Name of the dataset -#' @param expression_matrix Gene expression matrix of single-cells -#' @param setting_from Character vector giving the gene symbols of the potentially active ligands you want to define ligand activities for. -#' @param regression Perform regression-based ligand activity analysis (TRUE) or classification-based ligand activity analysis (FALSE) by considering the genes expressed higher than the 0.975 quantiles as genes of interest. Default: FALSE. -#' -#' @return A list with slots $name, $from and $response respectively containing the setting name, potentially active ligands and the response to predict (whether genes belong to gene set of interest; i.e. most strongly expressed genes in a cell) -#' -#' @examples -#' \dontrun{ -#' weighted_networks = construct_weighted_networks(lr_network, sig_network, gr_network,source_weights_df) -#' ligands = list("TNF","BMP2","IL4") -#' ligand_target_matrix = construct_ligand_target_matrix(weighted_networks, ligands, ltf_cutoff = 0, algorithm = "PPR", damping_factor = 0.5, secondary_targets = FALSE) -#' potential_ligands = c("TNF","BMP2","IL4") -#' genes = c("SOCS2","SOCS3","IRF1","ICAM1","ID1","ID2","ID3") -#' cell_ids = c("cell1","cell2") -#' expression_scaled = matrix(rnorm(length(genes)*2, sd = 0.5, mean = 0.5), nrow = 2) -#' rownames(expression_scaled) = cell_ids -#' colnames(expression_scaled) = genes -#' settings = convert_single_cell_expression_to_settings(cell_id = cell_ids[1], expression_matrix = expression_scaled, setting_name = "test", setting_from = potential_ligands) -#' } -#' -#' @export -#' -convert_single_cell_expression_to_settings = function(cell_id, expression_matrix, setting_name, setting_from, regression = FALSE){ - # input check - requireNamespace("dplyr") - - if (regression == TRUE){ - response = expression_matrix[cell_id,] - } else { - response_continuous = expression_matrix[cell_id,] - response = response_continuous >= quantile(response_continuous,0.975) - } - return(list(name = paste0(setting_name,"_",cell_id), from = setting_from, response = response)) -} -#' @title Single-cell ligand activity prediction -#' -#' @description \code{predict_single_cell_ligand_activities} For every individual cell of interest, predict activities of ligands in regulating expression of genes that are stronger expressed in that cell compared to other cells (0.975 quantile). Ligand activities are defined as how well they predict the observed transcriptional response (i.e. gene set) according to the NicheNet model. -#' -#' @usage -#' predict_single_cell_ligand_activities(cell_ids, expression_scaled,ligand_target_matrix, potential_ligands, single = TRUE,...) -#' -#' @param cell_ids Identities of cells for which the ligand activities should be calculated. -#' @param expression_scaled Scaled expression matrix of single-cells (scaled such that high values indicate that a gene is stronger expressed in that cell compared to others) -#' @param ligand_target_matrix The NicheNet ligand-target matrix denoting regulatory potential scores between ligands and targets (ligands in columns). -#' @param potential_ligands Character vector giving the gene symbols of the potentially active ligands you want to define ligand activities for. -#' @param single TRUE if you want to calculate ligand activity scores by considering every ligand individually (recommended). FALSE if you want to calculate ligand activity scores as variable importances of a multi-ligand classification model. -#' @param ... Additional parameters for get_multi_ligand_importances if single = FALSE. -#' -#' @return A tibble giving several ligand activity scores for single cells. Following columns in the tibble: $setting, $test_ligand, $auroc, $aupr and $pearson. -#' -#' @examples -#' \dontrun{ -#' weighted_networks = construct_weighted_networks(lr_network, sig_network, gr_network,source_weights_df) -#' ligands = list("TNF","BMP2","IL4") -#' ligand_target_matrix = construct_ligand_target_matrix(weighted_networks, ligands, ltf_cutoff = 0, algorithm = "PPR", damping_factor = 0.5, secondary_targets = FALSE) -#' potential_ligands = c("TNF","BMP2","IL4") -#' genes = c("SOCS2","SOCS3","IRF1","ICAM1","ID1","ID2","ID3") -#' cell_ids = c("cell1","cell2") -#' expression_scaled = matrix(rnorm(length(genes)*2, sd = 0.5, mean = 0.5), nrow = 2) -#' rownames(expression_scaled) = cell_ids -#' colnames(expression_scaled) = genes -#' ligand_activities = predict_single_cell_ligand_activities(cell_ids = cell_ids, expression_scaled = expression_scaled, ligand_target_matrix = ligand_target_matrix, potential_ligands = potential_ligands) -#' } -#' -#' @export -#' -predict_single_cell_ligand_activities = function(cell_ids, expression_scaled,ligand_target_matrix, potential_ligands, single = TRUE,...){ - settings_single_cell_ligand_pred = cell_ids %>% lapply(convert_single_cell_expression_to_settings, expression_scaled, "", potential_ligands) - if (single == TRUE){ - settings_ligand_prediction = settings_single_cell_ligand_pred %>% convert_settings_ligand_prediction(all_ligands = potential_ligands, validation = FALSE, single = TRUE) - - ligand_importances = settings_ligand_prediction %>% lapply(get_single_ligand_importances,ligand_target_matrix = ligand_target_matrix, known = FALSE) %>% bind_rows() %>% mutate(setting = gsub("^_","",setting)) - - } else { - settings_ligand_prediction = settings_single_cell_ligand_pred %>% convert_settings_ligand_prediction(all_ligands = potential_ligands, validation = FALSE, single = FALSE) - - ligand_importances = settings_ligand_prediction %>% lapply(get_multi_ligand_importances,ligand_target_matrix = ligand_target_matrix, known = FALSE, ...) %>% bind_rows() %>% mutate(setting = gsub("^_","",setting)) - - } - return(ligand_importances %>% select(setting,test_ligand,auroc,aupr,pearson)) -} -#' @title Normalize single-cell ligand activities -#' -#' @description \code{normalize_single_cell_ligand_activities} Normalize single-cell ligand activities to make ligand activities over different cells comparable. -#' @usage -#' normalize_single_cell_ligand_activities(ligand_activities) -#' -#' @param ligand_activities Output from the function `predict_single_cell_ligand_activities`. -#' -#' @return A tibble giving the normalized ligand activity scores for single cells. Following columns in the tibble: $cell, $ligand, $pearson, which is the normalized ligand activity value. -#' -#' @examples -#' \dontrun{ -#' weighted_networks = construct_weighted_networks(lr_network, sig_network, gr_network,source_weights_df) -#' ligands = list("TNF","BMP2","IL4") -#' ligand_target_matrix = construct_ligand_target_matrix(weighted_networks, ligands, ltf_cutoff = 0, algorithm = "PPR", damping_factor = 0.5, secondary_targets = FALSE) -#' potential_ligands = c("TNF","BMP2","IL4") -#' genes = c("SOCS2","SOCS3","IRF1","ICAM1","ID1","ID2","ID3") -#' cell_ids = c("cell1","cell2") -#' expression_scaled = matrix(rnorm(length(genes)*2, sd = 0.5, mean = 0.5), nrow = 2) -#' rownames(expression_scaled) = cell_ids -#' colnames(expression_scaled) = genes -#' ligand_activities = predict_single_cell_ligand_activities(cell_ids = cell_ids, expression_scaled = expression_scaled, ligand_target_matrix = ligand_target_matrix, potential_ligands = potential_ligands) -#' normalized_ligand_activities = normalize_single_cell_ligand_activities(ligand_activities) -#' } -#' -#' @export -#' -normalize_single_cell_ligand_activities = function(ligand_activities){ - single_ligand_activities_pearson_norm = ligand_activities %>% - group_by(setting) %>% - mutate(pearson = nichenetr::scaling_modified_zscore(pearson)) %>% - ungroup() %>% - rename(cell = setting, ligand = test_ligand) %>% - distinct(cell,ligand,pearson) - - single_ligand_activities_pearson_norm_df = single_ligand_activities_pearson_norm %>% - spread(cell, pearson,fill = min(.$pearson)) - - single_ligand_activities_pearson_norm_matrix = single_ligand_activities_pearson_norm_df %>% - select(-ligand) %>% - t() %>% - magrittr::set_colnames(single_ligand_activities_pearson_norm_df$ligand) - - single_cell_ligand_activities_pearson_norm_df = single_ligand_activities_pearson_norm_matrix %>% - data.frame() %>% - rownames_to_column("cell") %>% - as_tibble() -} -#' @title Perform a correlation and regression analysis between cells' ligand activities and property scores of interest -#' -#' @description \code{single_ligand_activity_score_regression} Performs a correlation and regression analysis between cells' ligand activities and property scores of interest. -#' @usage -#' single_ligand_activity_score_regression(ligand_activities, scores_tbl) -#' -#' @param ligand_activities Output from the function `normalize_single_cell_ligand_activities`. -#' @param scores_tbl a tibble containing scores for every cell (columns: $cell and $score). The score should correspond to the property of interest -#' -#' @return A tibble giving for every ligand, the correlation/regression coefficients giving information about the relation between its activity and the property of interest. -#' -#' @examples -#' \dontrun{ -#' weighted_networks = construct_weighted_networks(lr_network, sig_network, gr_network,source_weights_df) -#' ligands = list("TNF","BMP2","IL4") -#' ligand_target_matrix = construct_ligand_target_matrix(weighted_networks, ligands, ltf_cutoff = 0, algorithm = "PPR", damping_factor = 0.5, secondary_targets = FALSE) -#' potential_ligands = c("TNF","BMP2","IL4") -#' genes = c("SOCS2","SOCS3","IRF1","ICAM1","ID1","ID2","ID3") -#' cell_ids = c("cell1","cell2") -#' expression_scaled = matrix(rnorm(length(genes)*2, sd = 0.5, mean = 0.5), nrow = 2) -#' rownames(expression_scaled) = cell_ids -#' colnames(expression_scaled) = genes -#' ligand_activities = predict_single_cell_ligand_activities(cell_ids = cell_ids, expression_scaled = expression_scaled, ligand_target_matrix = ligand_target_matrix, potential_ligands = potential_ligands) -#' normalized_ligand_activities = normalize_single_cell_ligand_activities(ligand_activities) -#' cell_scores_tbl = tibble(cell = cell_ids, score = c(1,4)) -#' regression_analysis_output = single_ligand_activity_score_regression(normalized_ligand_activities,cell_scores_tbl) -#' } -#' -#' @export -#' -single_ligand_activity_score_regression = function(ligand_activities, scores_tbl){ - combined = inner_join(scores_tbl,ligand_activities) - output = lapply(combined %>% select(-cell, -score), function(activity_prediction, combined){ - geneset_score = combined$score - metrics = regression_evaluation(activity_prediction,geneset_score) - }, combined) - ligands = names(output) - output_df = output %>% bind_rows() %>% mutate(ligand = ligands) - return(output_df) -} -#' @title Assess how well cells' ligand activities predict a binary property of interest of cells. -#' -#' @description \code{single_ligand_activity_score_classification} Evaluates classification performances: it assesses how well cells' ligand activities can predict a binary property of interest. -#' @usage -#' single_ligand_activity_score_classification(ligand_activities, scores_tbl) -#' -#' @param ligand_activities Output from the function `normalize_single_cell_ligand_activities`. -#' @param scores_tbl a tibble indicating for every cell whether the property of interests holds TRUE or FALSE (columns: $cell: character vector with cell ids and $score: logical vector according to property of interest). -#' -#' @return A tibble giving for every ligand, the classification performance metrics giving information about the relation between its activity and the property of interest. -#' -#' @examples -#' \dontrun{ -#' weighted_networks = construct_weighted_networks(lr_network, sig_network, gr_network,source_weights_df) -#' ligands = list("TNF","BMP2","IL4") -#' ligand_target_matrix = construct_ligand_target_matrix(weighted_networks, ligands, ltf_cutoff = 0, algorithm = "PPR", damping_factor = 0.5, secondary_targets = FALSE) -#' potential_ligands = c("TNF","BMP2","IL4") -#' genes = c("SOCS2","SOCS3","IRF1","ICAM1","ID1","ID2","ID3") -#' cell_ids = c("cell1","cell2") -#' expression_scaled = matrix(rnorm(length(genes)*2, sd = 0.5, mean = 0.5), nrow = 2) -#' rownames(expression_scaled) = cell_ids -#' colnames(expression_scaled) = genes -#' ligand_activities = predict_single_cell_ligand_activities(cell_ids = cell_ids, expression_scaled = expression_scaled, ligand_target_matrix = ligand_target_matrix, potential_ligands = potential_ligands) -#' normalized_ligand_activities = normalize_single_cell_ligand_activities(ligand_activities) -#' cell_scores_tbl = tibble(cell = cell_ids, score = c(TRUE,FALSE)) -#' classification_analysis_output = single_ligand_activity_score_classification(normalized_ligand_activities,cell_scores_tbl) -#' } -#' -#' @export -#' -single_ligand_activity_score_classification = function(ligand_activities, scores_tbl){ - combined = inner_join(scores_tbl, ligand_activities) - output = lapply(combined %>% select(-cell, -score), function(activity_prediction, - combined) { - geneset_score = combined$score - - metrics = classification_evaluation_continuous_pred(activity_prediction, - geneset_score, iregulon = F) - }, combined) - - - ligands = names(output) - output_df = output %>% bind_rows() %>% mutate(ligand = ligands) - return(output_df) -} -single_ligand_activity_score_regression = function(ligand_activities, scores_tbl){ - combined = inner_join(scores_tbl,ligand_activities) - output = lapply(combined %>% select(-cell, -score), function(activity_prediction, combined){ - geneset_score = combined$score - metrics = regression_evaluation(activity_prediction,geneset_score) - }, combined) - ligands = names(output) - output_df = output %>% bind_rows() %>% mutate(ligand = ligands) - return(output_df) -} -#' @title Perform NicheNet analysis on Seurat object: explain DE between conditions -#' -#' @description \code{nichenet_seuratobj_aggregate} Perform NicheNet analysis on Seurat object: explain differential expression (DE) in a receiver celltype between two different conditions by ligands expressed by sender cells -#' @usage -#' nichenet_seuratobj_aggregate(receiver, seurat_obj, condition_colname, condition_oi, condition_reference, sender = "all",ligand_target_matrix,lr_network,weighted_networks,expression_pct = 0.10, lfc_cutoff = 0.25, geneset = "DE", filter_top_ligands = TRUE, top_n_ligands = 20,top_n_targets = 200, cutoff_visualization = 0.33,organism = "human",verbose = TRUE, assay_oi = NULL) -#' -#' @param receiver Name of cluster identity/identities of cells that are presumably affected by intercellular communication with other cells -#' @param seurat_obj Single-cell expression dataset as Seurat object https://satijalab.org/seurat/. -#' @param condition_colname Name of the column in the meta data dataframe that indicates which condition/sample cells were coming from. -#' @param condition_oi Condition of interest in which receiver cells were presumably affected by other cells. Should be a name present in the `condition_colname` column of the metadata. -#' @param condition_reference The second condition (e.g. reference or steady-state condition). Should be a name present in the `condition_colname` column of the metadata. -#' @param sender Determine the potential sender cells. Name of cluster identity/identities of cells that presumably affect expression in the receiver cell type. In case you want to look at all possible sender cell types in the data, you can give this argument the value "all". "all" indicates thus that all cell types in the dataset will be considered as possible sender cells. As final option, you could give this argument the value "undefined"."undefined" won't look at ligands expressed by sender cells, but at all ligands for which a corresponding receptor is expressed. This could be useful if the presumably active sender cell is not profiled. Default: "all". -#' @param expression_pct To determine ligands and receptors expressed by sender and receiver cells, we consider genes expressed if they are expressed in at least a specific fraction of cells of a cluster. This number indicates this fraction. Default: 0.10 -#' @param lfc_cutoff Cutoff on log fold change in the wilcoxon differential expression test. Default: 0.25. -#' @param geneset Indicate whether to consider all DE genes between condition 1 and 2 ("DE"), or only genes upregulated in condition 1 ("up"), or only genes downregulad in condition 1 ("down"). -#' @param filter_top_ligands Indicate whether output tables for ligand-target and ligand-receptor networks should be done for a filtered set of top ligands (TRUE) or for all ligands (FALSE). Default: TRUE. -#' @param top_n_ligands Indicate how many ligands should be extracted as top-ligands after ligand activity analysis. Only for these ligands, target genes and receptors will be returned. Default: 20. -#' @param top_n_targets To predict active, affected targets of the prioritized ligands, consider only DE genes if they also belong to the a priori top n ("top_n_targets") targets of a ligand. Default = 200. -#' @param cutoff_visualization Because almost no ligand-target scores have a regulatory potential score of 0, we clarify the heatmap visualization by giving the links with the lowest scores a score of 0. The cutoff_visualization paramter indicates this fraction of links that are given a score of zero. Default = 0.33. -#' @param organism Organism from which cells originate."human" (default) or "mouse". -#' @param ligand_target_matrix The NicheNet ligand-target matrix denoting regulatory potential scores between ligands and targets (ligands in columns). -#' @param lr_network The ligand-receptor network (columns that should be present: $from, $to). -#' @param weighted_networks The NicheNet weighted networks denoting interactions and their weights/confidences in the ligand-signaling and gene regulatory network. -#' @param verbose Print out the current analysis stage. Default: TRUE. -#' @inheritParams get_expressed_genes -#' -#' @return A list with the following elements: -#' $ligand_activities: data frame with output ligand activity analysis; -#' $top_ligands: top_n ligands based on ligand activity; -#' $top_targets: active, affected target genes of these ligands; -#' $top_receptors: receptors of these ligands; -#' $ligand_target_matrix: matrix indicating regulatory potential scores between active ligands and their predicted targets; -#' $ligand_target_heatmap: heatmap of ligand-target regulatory potential; -#' $ligand_target_df: data frame showing regulatory potential scores of predicted active ligand-target network; -#' $ligand_activity_target_heatmap: heatmap showing both ligand activity scores and target genes of these top ligands; -#' $ligand_expression_dotplot: expression dotplot of the top ligands; -#' $ligand_differential_expression_heatmap = differential expression heatmap of the top ligands; -#' $ligand_receptor_matrix: matrix of ligand-receptor interactions; -#' $ligand_receptor_heatmap: heatmap showing ligand-receptor interactions; -#' $ligand_receptor_df: data frame of ligand-receptor interactions; -#' $ligand_receptor_matrix_bonafide: ligand-receptor matrix, after filtering out interactions predicted by PPI; -#' $ligand_receptor_heatmap_bonafide: heatmap of ligand-receptor interactions after filtering out interactions predicted by PPI; -#' $ligand_receptor_df_bonafide: data frame of ligand-receptor interactions, after filtering out interactions predicted by PPI; -#' $geneset_oi: a vector containing the set of genes used as input for the ligand activity analysis; -#' $background_expressed_genes: the background of genes to which the geneset will be compared in the ligand activity analysis. -#' -#' @import Seurat -#' @import dplyr -#' @importFrom magrittr set_rownames set_colnames -#' -#' @examples -#' \dontrun{ -#' seuratObj = readRDS(url("https://zenodo.org/record/3531889/files/seuratObj_test.rds")) -#' ligand_target_matrix = readRDS(url("https://zenodo.org/record/3260758/files/ligand_target_matrix.rds")) -#' lr_network = readRDS(url("https://zenodo.org/record/3260758/files/lr_network.rds")) -#' weighted_networks = readRDS(url("https://zenodo.org/record/3260758/files/weighted_networks.rds")) -#' nichenet_seuratobj_aggregate(receiver = "CD8 T", seurat_obj = seuratObj, condition_colname = "aggregate", condition_oi = "LCMV", condition_reference = "SS", sender = "Mono", ligand_target_matrix = ligand_target_matrix, lr_network = lr_network, weighted_networks = weighted_networks, organism = "mouse") -#' } -#' -#' @export -#' -nichenet_seuratobj_aggregate = function(receiver, seurat_obj, condition_colname, condition_oi, condition_reference, sender = "all",ligand_target_matrix,lr_network,weighted_networks, - expression_pct = 0.10, lfc_cutoff = 0.25, geneset = "DE", filter_top_ligands = TRUE ,top_n_ligands = 20, - top_n_targets = 200, cutoff_visualization = 0.33, - organism = "human",verbose = TRUE, assay_oi = NULL) -{ - requireNamespace("Seurat") - requireNamespace("dplyr") - - # input check - if(! "RNA" %in% names(seurat_obj@assays)){ - if ("Spatial" %in% names(seurat_obj@assays)){ - warning("You are going to apply NicheNet on a spatial seurat object. Be sure it's ok to use NicheNet the way you are planning to do it. So this means: you should have changes in gene expression in receiver cells caused by cell-cell interactions. Note that in the case of spatial transcriptomics, you are not dealing with single cells but with 'spots' containing multiple cells of the same of different cell types.") - - if (class(seurat_obj@assays$Spatial@data) != "matrix" & class(seurat_obj@assays$Spatial@data) != "dgCMatrix") { - warning("Spatial Seurat object should contain a matrix of normalized expression data. Check 'seurat_obj@assays$Spatial@data' for default or 'seurat_obj@assays$SCT@data' for when the single-cell transform pipeline was applied") - } - if (sum(dim(seurat_obj@assays$Spatial@data)) == 0) { - stop("Seurat object should contain normalized expression data (numeric matrix). Check 'seurat_obj@assays$Spatial@data'") - } - }} else { - if (class(seurat_obj@assays$RNA@data) != "matrix" & - class(seurat_obj@assays$RNA@data) != "dgCMatrix") { - warning("Seurat object should contain a matrix of normalized expression data. Check 'seurat_obj@assays$RNA@data' for default or 'seurat_obj@assays$integrated@data' for integrated data or seurat_obj@assays$SCT@data for when the single-cell transform pipeline was applied") - } - - if ("integrated" %in% names(seurat_obj@assays)) { - if (sum(dim(seurat_obj@assays$RNA@data)) == 0 & sum(dim(seurat_obj@assays$integrated@data)) == - 0) - stop("Seurat object should contain normalized expression data (numeric matrix). Check 'seurat_obj@assays$RNA@data' for default or 'seurat_obj@assays$integrated@data' for integrated data") - } - else if ("SCT" %in% names(seurat_obj@assays)) { - if (sum(dim(seurat_obj@assays$RNA@data)) == 0 & sum(dim(seurat_obj@assays$SCT@data)) == - 0) { - stop("Seurat object should contain normalized expression data (numeric matrix). Check 'seurat_obj@assays$RNA@data' for default or 'seurat_obj@assays$SCT@data' for data corrected via SCT") - } - } - else { - if (sum(dim(seurat_obj@assays$RNA@data)) == 0) { - stop("Seurat object should contain normalized expression data (numeric matrix). Check 'seurat_obj@assays$RNA@data'") - } - } - } - - if(!condition_colname %in% colnames(seurat_obj@meta.data)) - stop("Your column indicating the conditions/samples of interest should be in the metadata dataframe") - if(sum(condition_oi %in% c(seurat_obj[[condition_colname]] %>% unlist() %>% as.character() %>% unique())) != length(condition_oi)) - stop("condition_oi should be in the condition-indicating column") - if(sum(condition_reference %in% c(seurat_obj[[condition_colname]] %>% unlist() %>% as.character() %>% unique())) != length(condition_reference)) - stop("condition_reference should be in the condition-indicating column") - if(sum(receiver %in% unique(Idents(seurat_obj))) != length(receiver)) - stop("The defined receiver cell type should be an identity class of your seurat object") - if(length(sender) == 1){ - if(sender != "all" & sender != "undefined"){ - if(sum(sender %in% unique(Idents(seurat_obj))) != length(sender)){ - stop("The sender argument should be 'all' or 'undefined' or an identity class of your seurat object") - } - } - } else { - if(sum(sender %in% unique(Idents(seurat_obj))) != length(sender)){ - stop("The sender argument should be 'all' or 'undefined' or an identity class of your seurat object") - } - } - if(organism != "mouse" & organism != "human") - stop("Organism should be 'mouse' or 'human'") - if(geneset != "DE" & geneset != "up" & geneset != "down") - stop("geneset should be 'DE', 'up' or 'down'") - if("integrated" %in% names(seurat_obj@assays)){ - warning("Seurat object is result from the Seurat integration workflow. Make sure that the way of defining expressed and differentially expressed genes in this wrapper is appropriate for your integrated data.") - } - # Read in and process NicheNet networks, define ligands and receptors - if (verbose == TRUE){print("Read in and process NicheNet's networks")} - weighted_networks_lr = weighted_networks$lr_sig %>% inner_join(lr_network %>% distinct(from,to), by = c("from","to")) - - if (organism == "mouse"){ - lr_network = lr_network %>% mutate(from = convert_human_to_mouse_symbols(from), to = convert_human_to_mouse_symbols(to)) %>% drop_na() - colnames(ligand_target_matrix) = ligand_target_matrix %>% colnames() %>% convert_human_to_mouse_symbols() - rownames(ligand_target_matrix) = ligand_target_matrix %>% rownames() %>% convert_human_to_mouse_symbols() - ligand_target_matrix = ligand_target_matrix %>% .[!is.na(rownames(ligand_target_matrix)), !is.na(colnames(ligand_target_matrix))] - weighted_networks_lr = weighted_networks_lr %>% mutate(from = convert_human_to_mouse_symbols(from), to = convert_human_to_mouse_symbols(to)) %>% drop_na() - } - - lr_network_strict = lr_network %>% filter(database != "ppi_prediction_go" & database != "ppi_prediction") - - ligands = lr_network %>% pull(from) %>% unique() - receptors = lr_network %>% pull(to) %>% unique() - ligands_bona_fide = lr_network_strict %>% pull(from) %>% unique() - receptors_bona_fide = lr_network_strict %>% pull(to) %>% unique() - - if (verbose == TRUE){print("Define expressed ligands and receptors in receiver and sender cells")} - - # step1 nichenet analysis: get expressed genes in sender and receiver cells - - ## receiver - list_expressed_genes_receiver = receiver %>% unique() %>% lapply(get_expressed_genes, seurat_obj, expression_pct, assay_oi) - names(list_expressed_genes_receiver) = receiver %>% unique() - expressed_genes_receiver = list_expressed_genes_receiver %>% unlist() %>% unique() - - ## sender - if (length(sender) == 1){ - if (sender == "all"){ - sender_celltypes = Idents(seurat_obj) %>% levels() - list_expressed_genes_sender = sender_celltypes %>% lapply(get_expressed_genes, seurat_obj, expression_pct, assay_oi) - names(list_expressed_genes_sender) = sender_celltypes - expressed_genes_sender = list_expressed_genes_sender %>% unlist() %>% unique() - - } else if (sender == "undefined") { - if("integrated" %in% names(seurat_obj@assays)){ - expressed_genes_sender = union(seurat_obj@assays$integrated@data %>% rownames(),rownames(ligand_target_matrix)) %>% union(colnames(ligand_target_matrix)) - } else { - expressed_genes_sender = union(seurat_obj@assays$RNA@data %>% rownames(),rownames(ligand_target_matrix)) %>% union(colnames(ligand_target_matrix)) - } - } else if (sender != "all" & sender != "undefined") { - sender_celltypes = sender - list_expressed_genes_sender = sender_celltypes %>% unique() %>% lapply(get_expressed_genes, seurat_obj, expression_pct, assay_oi) - names(list_expressed_genes_sender) = sender_celltypes %>% unique() - expressed_genes_sender = list_expressed_genes_sender %>% unlist() %>% unique() - } - } else { - sender_celltypes = sender - list_expressed_genes_sender = sender_celltypes %>% unique() %>% lapply(get_expressed_genes, seurat_obj, expression_pct, assay_oi) - names(list_expressed_genes_sender) = sender_celltypes %>% unique() - expressed_genes_sender = list_expressed_genes_sender %>% unlist() %>% unique() - } - - # step2 nichenet analysis: define background and gene list of interest: here differential expression between two conditions of cell type of interest - if (verbose == TRUE){print("Perform DE analysis in receiver cell")} - - seurat_obj_receiver= subset(seurat_obj, idents = receiver) - seurat_obj_receiver = SetIdent(seurat_obj_receiver, value = seurat_obj_receiver[[condition_colname]]) - DE_table_receiver = FindMarkers(object = seurat_obj_receiver, ident.1 = condition_oi, ident.2 = condition_reference, min.pct = expression_pct) %>% rownames_to_column("gene") - - SeuratV4 = c("avg_log2FC") %in% colnames(DE_table_receiver) - - if(SeuratV4 == TRUE){ - if (geneset == "DE"){ - geneset_oi = DE_table_receiver %>% filter(p_val_adj <= 0.05 & abs(avg_log2FC) >= lfc_cutoff) %>% pull(gene) - } else if (geneset == "up") { - geneset_oi = DE_table_receiver %>% filter(p_val_adj <= 0.05 & avg_log2FC >= lfc_cutoff) %>% pull(gene) - } else if (geneset == "down") { - geneset_oi = DE_table_receiver %>% filter(p_val_adj <= 0.05 & avg_log2FC <= lfc_cutoff) %>% pull(gene) - } - } else { - if (geneset == "DE"){ - geneset_oi = DE_table_receiver %>% filter(p_val_adj <= 0.05 & abs(avg_logFC) >= lfc_cutoff) %>% pull(gene) - } else if (geneset == "up") { - geneset_oi = DE_table_receiver %>% filter(p_val_adj <= 0.05 & avg_logFC >= lfc_cutoff) %>% pull(gene) - } else if (geneset == "down") { - geneset_oi = DE_table_receiver %>% filter(p_val_adj <= 0.05 & avg_logFC <= lfc_cutoff) %>% pull(gene) - } - } - - - geneset_oi = geneset_oi %>% .[. %in% rownames(ligand_target_matrix)] - if (length(geneset_oi) == 0){ - stop("No genes were differentially expressed") - } - background_expressed_genes = expressed_genes_receiver %>% .[. %in% rownames(ligand_target_matrix)] - - # step3 nichenet analysis: define potential ligands - expressed_ligands = intersect(ligands,expressed_genes_sender) - expressed_receptors = intersect(receptors,expressed_genes_receiver) - if (length(expressed_ligands) == 0){ - stop("No ligands expressed in sender cell") - } - if (length(expressed_receptors) == 0){ - stop("No receptors expressed in receiver cell") - } - potential_ligands = lr_network %>% filter(from %in% expressed_ligands & to %in% expressed_receptors) %>% pull(from) %>% unique() - if (length(potential_ligands) == 0){ - stop("No potentially active ligands") - } - - - if (verbose == TRUE){print("Perform NicheNet ligand activity analysis")} - - # step4 perform NicheNet's ligand activity analysis - ligand_activities = predict_ligand_activities(geneset = geneset_oi, background_expressed_genes = background_expressed_genes, ligand_target_matrix = ligand_target_matrix, potential_ligands = potential_ligands) - ligand_activities = ligand_activities %>% - arrange(-pearson) %>% - mutate(rank = rank(desc(pearson)), - bona_fide_ligand = test_ligand %in% ligands_bona_fide) - - if(filter_top_ligands == TRUE){ - best_upstream_ligands = ligand_activities %>% top_n(top_n_ligands, pearson) %>% arrange(-pearson) %>% pull(test_ligand) %>% unique() - } else { - best_upstream_ligands = ligand_activities %>% arrange(-pearson) %>% pull(test_ligand) %>% unique() - } - - if (verbose == TRUE){print("Infer active target genes of the prioritized ligands")} - - # step5 infer target genes of the top-ranked ligands - active_ligand_target_links_df = best_upstream_ligands %>% lapply(get_weighted_ligand_target_links,geneset = geneset_oi, ligand_target_matrix = ligand_target_matrix, n = top_n_targets) %>% bind_rows() %>% drop_na() - if(nrow(active_ligand_target_links_df) > 0){ - active_ligand_target_links = prepare_ligand_target_visualization(ligand_target_df = active_ligand_target_links_df, ligand_target_matrix = ligand_target_matrix, cutoff = cutoff_visualization) - order_ligands = intersect(best_upstream_ligands, colnames(active_ligand_target_links)) %>% rev() %>% make.names() - order_targets = active_ligand_target_links_df$target %>% unique() %>% intersect(rownames(active_ligand_target_links)) %>% make.names() - rownames(active_ligand_target_links) = rownames(active_ligand_target_links) %>% make.names() - colnames(active_ligand_target_links) = colnames(active_ligand_target_links) %>% make.names() - - order_targets = order_targets %>% intersect(rownames(active_ligand_target_links)) - order_ligands = order_ligands %>% intersect(colnames(active_ligand_target_links)) - - vis_ligand_target = active_ligand_target_links[order_targets,order_ligands,drop=FALSE] %>% t() - p_ligand_target_network = vis_ligand_target %>% make_heatmap_ggplot("Prioritized ligands","Predicted target genes", color = "purple",legend_position = "top", x_axis_position = "top",legend_title = "Regulatory potential") + theme(axis.text.x = element_text(face = "italic")) #+ scale_fill_gradient2(low = "whitesmoke", high = "purple", breaks = c(0,0.006,0.012)) - } else { - vis_ligand_target = NULL - p_ligand_target_network = NULL - print("no highly likely active targets found for top ligands") - } - # combined heatmap: overlay ligand activities - ligand_pearson_matrix = ligand_activities %>% select(pearson) %>% as.matrix() %>% magrittr::set_rownames(ligand_activities$test_ligand) - - rownames(ligand_pearson_matrix) = rownames(ligand_pearson_matrix) %>% make.names() - colnames(ligand_pearson_matrix) = colnames(ligand_pearson_matrix) %>% make.names() - - vis_ligand_pearson = ligand_pearson_matrix[order_ligands, ] %>% as.matrix(ncol = 1) %>% magrittr::set_colnames("Pearson") - p_ligand_pearson = vis_ligand_pearson %>% make_heatmap_ggplot("Prioritized ligands","Ligand activity", color = "darkorange",legend_position = "top", x_axis_position = "top", legend_title = "Pearson correlation coefficient\ntarget gene prediction ability)") + theme(legend.text = element_text(size = 9)) - p_ligand_pearson - - figures_without_legend = cowplot::plot_grid( - p_ligand_pearson + theme(legend.position = "none", axis.ticks = element_blank()) + theme(axis.title.x = element_text()), - p_ligand_target_network + theme(legend.position = "none", axis.ticks = element_blank()) + ylab(""), - align = "hv", - nrow = 1, - rel_widths = c(ncol(vis_ligand_pearson)+10, ncol(vis_ligand_target))) - legends = cowplot::plot_grid( - ggpubr::as_ggplot(ggpubr::get_legend(p_ligand_pearson)), - ggpubr::as_ggplot(ggpubr::get_legend(p_ligand_target_network)), - nrow = 1, - align = "h") - - combined_plot = cowplot::plot_grid(figures_without_legend, - legends, - rel_heights = c(10,2), nrow = 2, align = "hv") - - # ligand-receptor plot - # get the ligand-receptor network of the top-ranked ligands - if (verbose == TRUE){print("Infer receptors of the prioritized ligands")} - - lr_network_top = lr_network %>% filter(from %in% best_upstream_ligands & to %in% expressed_receptors) %>% distinct(from,to) - best_upstream_receptors = lr_network_top %>% pull(to) %>% unique() - - lr_network_top_df_large = weighted_networks_lr %>% filter(from %in% best_upstream_ligands & to %in% best_upstream_receptors) - - lr_network_top_df = lr_network_top_df_large %>% spread("from","weight",fill = 0) - lr_network_top_matrix = lr_network_top_df %>% select(-to) %>% as.matrix() %>% magrittr::set_rownames(lr_network_top_df$to) - - if (nrow(lr_network_top_matrix) > 1){ - dist_receptors = dist(lr_network_top_matrix, method = "binary") - hclust_receptors = hclust(dist_receptors, method = "ward.D2") - order_receptors = hclust_receptors$labels[hclust_receptors$order] - } else { - order_receptors = rownames(lr_network_top_matrix) - } - if (ncol(lr_network_top_matrix) > 1) { - dist_ligands = dist(lr_network_top_matrix %>% t(), method = "binary") - hclust_ligands = hclust(dist_ligands, method = "ward.D2") - order_ligands_receptor = hclust_ligands$labels[hclust_ligands$order] - } else { - order_ligands_receptor = colnames(lr_network_top_matrix) - } - - order_receptors = order_receptors %>% intersect(rownames(lr_network_top_matrix)) - order_ligands_receptor = order_ligands_receptor %>% intersect(colnames(lr_network_top_matrix)) - - vis_ligand_receptor_network = lr_network_top_matrix[order_receptors, order_ligands_receptor] - dim(vis_ligand_receptor_network) = c(length(order_receptors), length(order_ligands_receptor)) - rownames(vis_ligand_receptor_network) = order_receptors %>% make.names() - colnames(vis_ligand_receptor_network) = order_ligands_receptor %>% make.names() - - p_ligand_receptor_network = vis_ligand_receptor_network %>% t() %>% make_heatmap_ggplot("Ligands","Receptors", color = "mediumvioletred", x_axis_position = "top",legend_title = "Prior interaction potential") - - # bona fide ligand-receptor - lr_network_top_df_large_strict = lr_network_top_df_large %>% distinct(from,to) %>% inner_join(lr_network_strict, by = c("from","to")) %>% distinct(from,to) - lr_network_top_df_large_strict = lr_network_top_df_large_strict %>% inner_join(lr_network_top_df_large, by = c("from","to")) - - lr_network_top_df_strict = lr_network_top_df_large_strict %>% spread("from","weight",fill = 0) - lr_network_top_matrix_strict = lr_network_top_df_strict %>% select(-to) %>% as.matrix() %>% magrittr::set_rownames(lr_network_top_df_strict$to) - - if (nrow(lr_network_top_df_large_strict) == 0){ - print("Remark: no bona fide receptors of top ligands") - vis_ligand_receptor_network_strict = NULL - p_ligand_receptor_network_strict = NULL - lr_network_top_df_large_strict = NULL - - } else { - if (nrow(lr_network_top_matrix_strict) > 1){ - dist_receptors = dist(lr_network_top_matrix_strict, method = "binary") - hclust_receptors = hclust(dist_receptors, method = "ward.D2") - order_receptors = hclust_receptors$labels[hclust_receptors$order] - } else { - order_receptors = rownames(lr_network_top_matrix) - } - if (ncol(lr_network_top_matrix_strict) > 1) { - dist_ligands = dist(lr_network_top_matrix_strict %>% t(), method = "binary") - hclust_ligands = hclust(dist_ligands, method = "ward.D2") - order_ligands_receptor = hclust_ligands$labels[hclust_ligands$order] - } else { - order_ligands_receptor = colnames(lr_network_top_matrix_strict) - } - order_receptors = order_receptors %>% intersect(rownames(lr_network_top_matrix_strict)) - order_ligands_receptor = order_ligands_receptor %>% intersect(colnames(lr_network_top_matrix_strict)) - - vis_ligand_receptor_network_strict = lr_network_top_matrix_strict[order_receptors, order_ligands_receptor] - dim(vis_ligand_receptor_network_strict) = c(length(order_receptors), length(order_ligands_receptor)) - - rownames(vis_ligand_receptor_network_strict) = order_receptors %>% make.names() - colnames(vis_ligand_receptor_network_strict) = order_ligands_receptor %>% make.names() - - p_ligand_receptor_network_strict = vis_ligand_receptor_network_strict %>% t() %>% make_heatmap_ggplot("Ligands","Receptors", color = "mediumvioletred", x_axis_position = "top",legend_title = "Prior interaction potential\n(bona fide)") - - lr_network_top_df_large_strict = lr_network_top_df_large_strict %>% rename(ligand = from, receptor = to) - } - - # DE analysis for each sender cell type -- of course only possible when having sender cell types - if (length(sender) > 1){ - are_there_senders = TRUE - } - if(length(sender) == 1){ - if(sender != "undefined"){ - are_there_senders = TRUE - } else { - are_there_senders = FALSE - } - } - - if (are_there_senders == TRUE){ - if (verbose == TRUE){print("Perform DE analysis in sender cells")} - seurat_obj = subset(seurat_obj, features= potential_ligands) - - DE_table_all = Idents(seurat_obj) %>% levels() %>% intersect(sender_celltypes) %>% lapply(get_lfc_celltype, seurat_obj = seurat_obj, condition_colname = condition_colname, condition_oi = condition_oi, condition_reference = condition_reference, expression_pct = expression_pct, celltype_col = NULL) %>% reduce(full_join, by = "gene") # use this if cell type labels are the identities of your Seurat object -- if not: indicate the celltype_col properly - DE_table_all[is.na(DE_table_all)] = 0 - - # Combine ligand activities with DE information - ligand_activities_de = ligand_activities %>% select(test_ligand, pearson) %>% rename(ligand = test_ligand) %>% left_join(DE_table_all %>% rename(ligand = gene), by = "ligand") - ligand_activities_de[is.na(ligand_activities_de)] = 0 - - # make LFC heatmap - lfc_matrix = ligand_activities_de %>% select(-ligand, -pearson) %>% as.matrix() %>% magrittr::set_rownames(ligand_activities_de$ligand) - rownames(lfc_matrix) = rownames(lfc_matrix) %>% make.names() - - order_ligands = order_ligands[order_ligands %in% rownames(lfc_matrix)] - vis_ligand_lfc = lfc_matrix[order_ligands,] - vis_ligand_lfc = vis_ligand_lfc %>% as.matrix(ncol = length(Idents(seurat_obj) %>% levels() %>% intersect(sender_celltypes))) - colnames(vis_ligand_lfc) = vis_ligand_lfc %>% colnames() %>% make.names() - - p_ligand_lfc = vis_ligand_lfc %>% make_threecolor_heatmap_ggplot("Prioritized ligands","LFC in Sender", low_color = "midnightblue",mid_color = "white", mid = median(vis_ligand_lfc), high_color = "red",legend_position = "top", x_axis_position = "top", legend_title = "LFC") + theme(axis.text.y = element_text(face = "italic")) - - # ligand expression Seurat dotplot - real_makenames_conversion = lr_network$from %>% unique() %>% magrittr::set_names(lr_network$from %>% unique() %>% make.names()) - order_ligands_adapted = real_makenames_conversion[order_ligands] - names(order_ligands_adapted) = NULL - - seurat_obj_subset = seurat_obj %>% subset(idents = sender_celltypes) - seurat_obj_subset = SetIdent(seurat_obj_subset, value = seurat_obj_subset[[condition_colname]]) %>% subset(idents = condition_oi) ## only shows cells of the condition of interest - rotated_dotplot = DotPlot(seurat_obj %>% subset(cells = Cells(seurat_obj_subset)), features = order_ligands_adapted, cols = "RdYlBu") + coord_flip() + theme(legend.text = element_text(size = 10), legend.title = element_text(size = 12)) # flip of coordinates necessary because we want to show ligands in the rows when combining all plots - rm(seurat_obj_subset) - - # combined plot - figures_without_legend = cowplot::plot_grid( - p_ligand_pearson + theme(legend.position = "none", axis.ticks = element_blank()) + theme(axis.title.x = element_text()), - rotated_dotplot + theme(legend.position = "none", axis.ticks = element_blank(), axis.title.x = element_text(size = 12), axis.text.y = element_text(face = "italic", size = 9), axis.text.x = element_text(size = 9, angle = 90,hjust = 0)) + ylab("Expression in Sender") + xlab("") + scale_y_discrete(position = "right"), - p_ligand_lfc + theme(legend.position = "none", axis.ticks = element_blank()) + theme(axis.title.x = element_text()) + ylab(""), - p_ligand_target_network + theme(legend.position = "none", axis.ticks = element_blank()) + ylab(""), - align = "hv", - nrow = 1, - rel_widths = c(ncol(vis_ligand_pearson)+6, ncol(vis_ligand_lfc) + 7, ncol(vis_ligand_lfc) + 8, ncol(vis_ligand_target))) - - legends = cowplot::plot_grid( - ggpubr::as_ggplot(ggpubr::get_legend(p_ligand_pearson)), - ggpubr::as_ggplot(ggpubr::get_legend(rotated_dotplot)), - ggpubr::as_ggplot(ggpubr::get_legend(p_ligand_lfc)), - ggpubr::as_ggplot(ggpubr::get_legend(p_ligand_target_network)), - nrow = 1, - align = "h", rel_widths = c(1.5, 1, 1, 1)) - - combined_plot = cowplot::plot_grid(figures_without_legend, legends, rel_heights = c(10,5), nrow = 2, align = "hv") - combined_plot - - } else { - rotated_dotplot = NULL - p_ligand_lfc = NULL - } - - return(list( - ligand_activities = ligand_activities, - top_ligands = best_upstream_ligands, - top_targets = active_ligand_target_links_df$target %>% unique(), - top_receptors = lr_network_top_df_large$to %>% unique(), - ligand_target_matrix = vis_ligand_target, - ligand_target_heatmap = p_ligand_target_network, - ligand_target_df = active_ligand_target_links_df, - ligand_expression_dotplot = rotated_dotplot, - ligand_differential_expression_heatmap = p_ligand_lfc, - ligand_activity_target_heatmap = combined_plot, - ligand_receptor_matrix = vis_ligand_receptor_network, - ligand_receptor_heatmap = p_ligand_receptor_network, - ligand_receptor_df = lr_network_top_df_large %>% rename(ligand = from, receptor = to), - ligand_receptor_matrix_bonafide = vis_ligand_receptor_network_strict, - ligand_receptor_heatmap_bonafide = p_ligand_receptor_network_strict, - ligand_receptor_df_bonafide = lr_network_top_df_large_strict, - geneset_oi = geneset_oi, - background_expressed_genes = background_expressed_genes - )) -} -#' @title Determine expressed genes of a cell type from a Seurat object single-cell RNA seq dataset or Seurat spatial transcriptomics dataset -#' -#' @description \code{get_expressed_genes} Return the genes that are expressed in a given cell cluster based on the fraction of cells in that cluster that should express the cell. -#' @usage -#' get_expressed_genes(ident, seurat_obj, pct = 0.10, assay_oi = NULL) -#' -#' @param ident Name of cluster identity/identities of cells -#' @param seurat_obj Single-cell expression dataset as Seurat object https://satijalab.org/seurat/. -#' @param pct We consider genes expressed if they are expressed in at least a specific fraction of cells of a cluster. This number indicates this fraction. Default: 0.10. Choice of this parameter is important and depends largely on the used sequencing platform. We recommend to require a lower fraction (like the default 0.10) for 10X data than for e.g. Smart-seq2 data. -#' @param assay_oi If wanted: specify yourself which assay to look for. Default this value is NULL and as a consequence the 'most advanced' assay will be used to define expressed genes. -#' -#' @return A character vector with the gene symbols of the expressed genes -#' -#' @import Seurat -#' @import dplyr -#' -#' @examples -#' \dontrun{ -#' get_expressed_genes(ident = "CD8 T", seurat_obj = seuratObj, pct = 0.10) -#' } -#' -#' @export -#' -get_expressed_genes = function(ident, seurat_obj, pct = 0.1, assay_oi = NULL){ - requireNamespace("Seurat") - requireNamespace("dplyr") - - # input check - - - if (!"RNA" %in% names(seurat_obj@assays)) { - if ("Spatial" %in% names(seurat_obj@assays)) { - if (class(seurat_obj@assays$Spatial@data) != "matrix" & - class(seurat_obj@assays$Spatial@data) != "dgCMatrix") { - warning("Spatial Seurat object should contain a matrix of normalized expression data. Check 'seurat_obj@assays$Spatial@data' for default or 'seurat_obj@assays$SCT@data' for when the single-cell transform pipeline was applied") - } - if (sum(dim(seurat_obj@assays$Spatial@data)) == 0) { - stop("Seurat object should contain normalized expression data (numeric matrix). Check 'seurat_obj@assays$Spatial@data'") - } - } - } - else { - if (class(seurat_obj@assays$RNA@data) != "matrix" & - class(seurat_obj@assays$RNA@data) != "dgCMatrix") { - warning("Seurat object should contain a matrix of normalized expression data. Check 'seurat_obj@assays$RNA@data' for default or 'seurat_obj@assays$integrated@data' for integrated data or seurat_obj@assays$SCT@data for when the single-cell transform pipeline was applied") - } - if ("integrated" %in% names(seurat_obj@assays)) { - if (sum(dim(seurat_obj@assays$RNA@data)) == 0 & sum(dim(seurat_obj@assays$integrated@data)) == - 0) - stop("Seurat object should contain normalized expression data (numeric matrix). Check 'seurat_obj@assays$RNA@data' for default or 'seurat_obj@assays$integrated@data' for integrated data") - } - else if ("SCT" %in% names(seurat_obj@assays)) { - if (sum(dim(seurat_obj@assays$RNA@data)) == 0 & sum(dim(seurat_obj@assays$SCT@data)) == - 0) { - stop("Seurat object should contain normalized expression data (numeric matrix). Check 'seurat_obj@assays$RNA@data' for default or 'seurat_obj@assays$SCT@data' for data corrected via SCT") - } - } - else { - if (sum(dim(seurat_obj@assays$RNA@data)) == 0) { - stop("Seurat object should contain normalized expression data (numeric matrix). Check 'seurat_obj@assays$RNA@data'") - } - } - } - if (sum(ident %in% unique(Idents(seurat_obj))) != length(ident)) { - stop("One or more provided cell clusters is not part of the 'Idents' of your Seurat object") - } - - if(!is.null(assay_oi)){ - if(! assay_oi %in% Seurat::Assays(seurat_obj)){ - stop("assay_oi should be an assay of your Seurat object") - } - } - - # Get cell identities of cluster of interest - - - cells_oi = Idents(seurat_obj) %>% .[Idents(seurat_obj) %in% - ident] %>% names() - - # Get exprs matrix: from assay oi or from most advanced assay if assay oi not specifcied - - if(!is.null(assay_oi)){ - cells_oi_in_matrix = intersect(colnames(seurat_obj[[assay_oi]]@data), cells_oi) - exprs_mat = seurat_obj[[assay_oi]]@data %>% .[, cells_oi_in_matrix] - } else { - if ("integrated" %in% names(seurat_obj@assays)) { - warning("Seurat object is result from the Seurat integration workflow. The expressed genes are now defined based on the integrated slot. You can change this via the assay_oi parameter of the get_expressed_genes() functions. Recommended assays: RNA or SCT") - cells_oi_in_matrix = intersect(colnames(seurat_obj@assays$integrated@data), - cells_oi) - if (length(cells_oi_in_matrix) != length(cells_oi)) - stop("Not all cells of interest are in your expression matrix (seurat_obj@assays$integrated@data). Please check that the expression matrix contains cells in columns and genes in rows.") - exprs_mat = seurat_obj@assays$integrated@data %>% .[, - cells_oi_in_matrix] - } - else if ("SCT" %in% names(seurat_obj@assays) & !"Spatial" %in% - names(seurat_obj@assays)) { - warning("Seurat object is result from the Seurat single-cell transform workflow. The expressed genes are defined based on the SCT slot. You can change this via the assay_oi parameter of the get_expressed_genes() functions. Recommended assays: RNA or SCT") - cells_oi_in_matrix = intersect(colnames(seurat_obj@assays$SCT@data), - cells_oi) - if (length(cells_oi_in_matrix) != length(cells_oi)) - stop("Not all cells of interest are in your expression matrix (seurat_obj@assays$SCT@data). Please check that the expression matrix contains cells in columns and genes in rows.") - exprs_mat = seurat_obj@assays$SCT@data %>% .[, cells_oi_in_matrix] - } - else if ("Spatial" %in% names(seurat_obj@assays) & - !"SCT" %in% names(seurat_obj@assays)) { - warning("Seurat object is result from the Seurat spatial object. The expressed genes are defined based on the Spatial slot. If the spatial data is spot-based (mixture of cells) and not single-cell resolution, we recommend against directly using nichenetr on spot-based data (because you want to look at cell-cell interactions, and not at spot-spot interactions! ;-) )") - cells_oi_in_matrix = intersect(colnames(seurat_obj@assays$Spatial@data), - cells_oi) - if (length(cells_oi_in_matrix) != length(cells_oi)) - stop("Not all cells of interest are in your expression matrix (seurat_obj@assays$Spatial@data). Please check that the expression matrix contains cells in columns and genes in rows.") - exprs_mat = seurat_obj@assays$Spatial@data %>% .[, cells_oi_in_matrix] - } - else if ("Spatial" %in% names(seurat_obj@assays) & - "SCT" %in% names(seurat_obj@assays)) { - warning("Seurat object is result from the Seurat spatial object, followed by the SCT workflow. If the spatial data is spot-based (mixture of cells) and not single-cell resolution, we recommend against directly using nichenetr on spot-based data (because you want to look at cell-cell interactions, and not at spot-spot interactions! The expressed genes are defined based on the SCT slot, but this can be changed via the assay_oi parameter.") - cells_oi_in_matrix = intersect(colnames(seurat_obj@assays$SCT@data), - cells_oi) - if (length(cells_oi_in_matrix) != length(cells_oi)) - stop("Not all cells of interest are in your expression matrix (seurat_obj@assays$Spatial@data). Please check that the expression matrix contains cells in columns and genes in rows.") - exprs_mat = seurat_obj@assays$SCT@data %>% .[, cells_oi_in_matrix] - } - else { - if (sum(cells_oi %in% colnames(seurat_obj@assays$RNA@data)) == - 0) - stop("None of the cells are in colnames of 'seurat_obj@assays$RNA@data'. The expression matrix should contain cells in columns and genes in rows.") - cells_oi_in_matrix = intersect(colnames(seurat_obj@assays$RNA@data), - cells_oi) - if (length(cells_oi_in_matrix) != length(cells_oi)) - stop("Not all cells of interest are in your expression matrix (seurat_obj@assays$RNA@data). Please check that the expression matrix contains cells in columns and genes in rows.") - exprs_mat = seurat_obj@assays$RNA@data %>% .[, cells_oi_in_matrix] - } - - } - - # use defined cells and exprs matrix to get expressed genes - - n_cells_oi_in_matrix = length(cells_oi_in_matrix) - if (n_cells_oi_in_matrix < 5000) { - genes = exprs_mat %>% apply(1, function(x) { - sum(x > 0)/n_cells_oi_in_matrix - }) %>% .[. >= pct] %>% names() - } - else { - splits = split(1:nrow(exprs_mat), ceiling(seq_along(1:nrow(exprs_mat))/100)) - genes = splits %>% lapply(function(genes_indices, exprs, - pct, n_cells_oi_in_matrix) { - begin_i = genes_indices[1] - end_i = genes_indices[length(genes_indices)] - exprs = exprs[begin_i:end_i, , drop = FALSE] - genes = exprs %>% apply(1, function(x) { - sum(x > 0)/n_cells_oi_in_matrix - }) %>% .[. >= pct] %>% names() - }, exprs_mat, pct, n_cells_oi_in_matrix) %>% unlist() %>% - unname() - } - return(genes) -} -#' @title Perform NicheNet analysis on Seurat object: explain DE between two cell clusters -#' -#' @description \code{nichenet_seuratobj_cluster_de} Perform NicheNet analysis on Seurat object: explain differential expression (DE) between two 'receiver' cell clusters by ligands expressed by neighboring cells. -#' @usage -#' nichenet_seuratobj_cluster_de(seurat_obj, receiver_affected, receiver_reference, sender = "all",ligand_target_matrix,lr_network,weighted_networks,expression_pct = 0.10, lfc_cutoff = 0.25, geneset = "DE", filter_top_ligands = TRUE, top_n_ligands = 20,top_n_targets = 200, cutoff_visualization = 0.33,organism = "human",verbose = TRUE, assay_oi = NULL) -#' -#' @param seurat_obj Single-cell expression dataset as Seurat object https://satijalab.org/seurat/. -#' @param receiver_reference Name of cluster identity/identities of "steady-state" cells, before they are affected by intercellular communication with other cells -#' @param receiver_affected Name of cluster identity/identities of "affected" cells that were presumably affected by intercellular communication with other cells -#' @param sender Determine the potential sender cells. Name of cluster identity/identities of cells that presumably affect expression in the receiver cell type. In case you want to look at all possible sender cell types in the data, you can give this argument the value "all". "all" indicates thus that all cell types in the dataset will be considered as possible sender cells. As final option, you could give this argument the value "undefined"."undefined" won't look at ligands expressed by sender cells, but at all ligands for which a corresponding receptor is expressed. This could be useful if the presumably active sender cell is not profiled. Default: "all". -#' @param expression_pct To determine ligands and receptors expressed by sender and receiver cells, we consider genes expressed if they are expressed in at least a specific fraction of cells of a cluster. This number indicates this fraction. Default: 0.10 -#' @param lfc_cutoff Cutoff on log fold change in the wilcoxon differential expression test. Default: 0.25. -#' @param geneset Indicate whether to consider all DE genes between condition 1 and 2 ("DE"), or only genes upregulated in condition 1 ("up"), or only genes downregulad in condition 1 ("down"). -#' @param filter_top_ligands Indicate whether output tables for ligand-target and ligand-receptor networks should be done for a filtered set of top ligands (TRUE) or for all ligands (FALSE). Default: TRUE. -#' @param top_n_ligands Indicate how many ligands should be extracted as top-ligands after ligand activity analysis. Only for these ligands, target genes and receptors will be returned. Default: 20. -#' @param top_n_targets To predict active, affected targets of the prioritized ligands, consider only DE genes if they also belong to the a priori top n ("top_n_targets") targets of a ligand. Default = 200. -#' @param cutoff_visualization Because almost no ligand-target scores have a regulatory potential score of 0, we clarify the heatmap visualization by giving the links with the lowest scores a score of 0. The cutoff_visualization paramter indicates this fraction of links that are given a score of zero. Default = 0.33. -#' @param organism Organism from which cells originate."human" (default) or "mouse". -#' @param ligand_target_matrix The NicheNet ligand-target matrix denoting regulatory potential scores between ligands and targets (ligands in columns). -#' @param lr_network The ligand-receptor network (columns that should be present: $from, $to). -#' @param weighted_networks The NicheNet weighted networks denoting interactions and their weights/confidences in the ligand-signaling and gene regulatory network. -#' @param verbose Print out the current analysis stage. Default: TRUE. -#' @inheritParams get_expressed_genes -#' -#' @return A list with the following elements: -#' $ligand_activities: data frame with output ligand activity analysis; -#' $top_ligands: top_n ligands based on ligand activity; -#' $top_targets: active, affected target genes of these ligands; -#' $top_receptors: receptors of these ligands; -#' $ligand_target_matrix: matrix indicating regulatory potential scores between active ligands and their predicted targets; -#' $ligand_target_heatmap: heatmap of ligand-target regulatory potential; -#' $ligand_target_df: data frame showing regulatory potential scores of predicted active ligand-target network; -#' $ligand_activity_target_heatmap: heatmap showing both ligand activity scores and target genes of these top ligands; -#' $ligand_expression_dotplot: expression dotplot of the top ligands; -#' $ligand_receptor_matrix: matrix of ligand-receptor interactions; -#' $ligand_receptor_heatmap: heatmap showing ligand-receptor interactions; -#' $ligand_receptor_df: data frame of ligand-receptor interactions; -#' $ligand_receptor_matrix_bonafide: ligand-receptor matrix, after filtering out interactions predicted by PPI; -#' $ligand_receptor_heatmap_bonafide: heatmap of ligand-receptor interactions after filtering out interactions predicted by PPI; -#' $ligand_receptor_df_bonafide: data frame of ligand-receptor interactions, after filtering out interactions predicted by PPI; -#' $geneset_oi: a vector containing the set of genes used as input for the ligand activity analysis; -#' $background_expressed_genes: the background of genes to which the geneset will be compared in the ligand activity analysis. -#' -#' @import Seurat -#' @import dplyr -#' @importFrom magrittr set_rownames set_colnames -#' -#' @examples -#' \dontrun{ -#' seuratObj = readRDS(url("https://zenodo.org/record/3531889/files/seuratObj_test.rds")) -#' ligand_target_matrix = readRDS(url("https://zenodo.org/record/3260758/files/ligand_target_matrix.rds")) -#' lr_network = readRDS(url("https://zenodo.org/record/3260758/files/lr_network.rds")) -#' weighted_networks = readRDS(url("https://zenodo.org/record/3260758/files/weighted_networks.rds")) -#' # works, but does not make sense -#' nichenet_seuratobj_cluster_de(seurat_obj = seuratObj, receiver_affected = "CD8 T", receiver_reference = "Mono", sender = "Mono", ligand_target_matrix = ligand_target_matrix, lr_network = lr_network, weighted_networks = weighted_networks) -#' # type of analysis for which this would make sense -#' nichenet_seuratobj_cluster_de(seurat_obj = seuratObj, receiver_affected = "p-EMT-pos-cancer", receiver_reference = "p-EMT-neg-cancer", sender = "Fibroblast", ligand_target_matrix = ligand_target_matrix, lr_network = lr_network, weighted_networks = weighted_networks) -#' } -#' -#' @export -#' -nichenet_seuratobj_cluster_de = function(seurat_obj, receiver_affected, receiver_reference, sender = "all",ligand_target_matrix,lr_network,weighted_networks, - expression_pct = 0.10, lfc_cutoff = 0.25, geneset = "DE", filter_top_ligands = TRUE, top_n_ligands = 20, - top_n_targets = 200, cutoff_visualization = 0.33, - organism = "human",verbose = TRUE, assay_oi = NULL) -{ - requireNamespace("Seurat") - requireNamespace("dplyr") - - # input check - # input check - if(! "RNA" %in% names(seurat_obj@assays)){ - if ("Spatial" %in% names(seurat_obj@assays)){ - warning("You are going to apply NicheNet on a spatial seurat object. Be sure it's ok to use NicheNet the way you are planning to do it. So this means: you should have changes in gene expression in receiver cells caused by cell-cell interactions. Note that in the case of spatial transcriptomics, you are not dealing with single cells but with 'spots' containing multiple cells of the same of different cell types.") - - if (class(seurat_obj@assays$Spatial@data) != "matrix" & class(seurat_obj@assays$Spatial@data) != "dgCMatrix") { - warning("Spatial Seurat object should contain a matrix of normalized expression data. Check 'seurat_obj@assays$Spatial@data' for default or 'seurat_obj@assays$SCT@data' for when the single-cell transform pipeline was applied") - } - if (sum(dim(seurat_obj@assays$Spatial@data)) == 0) { - stop("Seurat object should contain normalized expression data (numeric matrix). Check 'seurat_obj@assays$Spatial@data'") - } - }} else { - if (class(seurat_obj@assays$RNA@data) != "matrix" & - class(seurat_obj@assays$RNA@data) != "dgCMatrix") { - warning("Seurat object should contain a matrix of normalized expression data. Check 'seurat_obj@assays$RNA@data' for default or 'seurat_obj@assays$integrated@data' for integrated data or seurat_obj@assays$SCT@data for when the single-cell transform pipeline was applied") - } - - if ("integrated" %in% names(seurat_obj@assays)) { - if (sum(dim(seurat_obj@assays$RNA@data)) == 0 & sum(dim(seurat_obj@assays$integrated@data)) == - 0) - stop("Seurat object should contain normalized expression data (numeric matrix). Check 'seurat_obj@assays$RNA@data' for default or 'seurat_obj@assays$integrated@data' for integrated data") - } - else if ("SCT" %in% names(seurat_obj@assays)) { - if (sum(dim(seurat_obj@assays$RNA@data)) == 0 & sum(dim(seurat_obj@assays$SCT@data)) == - 0) { - stop("Seurat object should contain normalized expression data (numeric matrix). Check 'seurat_obj@assays$RNA@data' for default or 'seurat_obj@assays$SCT@data' for data corrected via SCT") - } - } - else { - if (sum(dim(seurat_obj@assays$RNA@data)) == 0) { - stop("Seurat object should contain normalized expression data (numeric matrix). Check 'seurat_obj@assays$RNA@data'") - } - } - } - - - if(sum(receiver_affected %in% unique(Idents(seurat_obj))) != length(receiver_affected)) - stop("The defined receiver_affected cell type should be an identity class of your seurat object") - if(sum(receiver_reference %in% unique(Idents(seurat_obj))) != length(receiver_reference)) - stop("The defined receiver_reference cell type should be an identity class of your seurat object") - if(length(sender) == 1){ - if(sender != "all" & sender != "undefined"){ - if(sum(sender %in% unique(Idents(seurat_obj))) != length(sender)){ - stop("The sender argument should be 'all' or 'undefined' or an identity class of your seurat object") - } - } - } else { - if(sum(sender %in% unique(Idents(seurat_obj))) != length(sender)){ - stop("The sender argument should be 'all' or 'undefined' or an identity class of your seurat object") - } - } - if(organism != "mouse" & organism != "human") - stop("Organism should be 'mouse' or 'human'") - if(geneset != "DE" & geneset != "up" & geneset != "down") - stop("geneset should be 'DE', 'up' or 'down'") - - if("integrated" %in% names(seurat_obj@assays)){ - warning("Seurat object is result from the Seurat integration workflow. Make sure that the way of defining expressed and differentially expressed genes in this wrapper is appropriate for your integrated data.") - } - - # Read in and process NicheNet networks, define ligands and receptors - if (verbose == TRUE){print("Read in and process NicheNet's networks")} - weighted_networks_lr = weighted_networks$lr_sig %>% inner_join(lr_network %>% distinct(from,to), by = c("from","to")) - - if (organism == "mouse"){ - lr_network = lr_network %>% mutate(from = convert_human_to_mouse_symbols(from), to = convert_human_to_mouse_symbols(to)) %>% drop_na() - colnames(ligand_target_matrix) = ligand_target_matrix %>% colnames() %>% convert_human_to_mouse_symbols() - rownames(ligand_target_matrix) = ligand_target_matrix %>% rownames() %>% convert_human_to_mouse_symbols() - ligand_target_matrix = ligand_target_matrix %>% .[!is.na(rownames(ligand_target_matrix)), !is.na(colnames(ligand_target_matrix))] - weighted_networks_lr = weighted_networks_lr %>% mutate(from = convert_human_to_mouse_symbols(from), to = convert_human_to_mouse_symbols(to)) %>% drop_na() - } - lr_network_strict = lr_network %>% filter(database != "ppi_prediction_go" & database != "ppi_prediction") - - ligands = lr_network %>% pull(from) %>% unique() - receptors = lr_network %>% pull(to) %>% unique() - ligands_bona_fide = lr_network_strict %>% pull(from) %>% unique() - receptors_bona_fide = lr_network_strict %>% pull(to) %>% unique() - - if (verbose == TRUE){print("Define expressed ligands and receptors in receiver and sender cells")} - - # step1 nichenet analysis: get expressed genes in sender and receiver cells - - ## receiver - # expressed genes: only in steady state population (for determining receptors) - list_expressed_genes_receiver_ss = c(receiver_reference) %>% unique() %>% lapply(get_expressed_genes, seurat_obj, expression_pct, assay_oi) - names(list_expressed_genes_receiver_ss) = c(receiver_reference) %>% unique() - expressed_genes_receiver_ss = list_expressed_genes_receiver_ss %>% unlist() %>% unique() - - # expressed genes: both in steady state and affected population (for determining background of expressed genes) - list_expressed_genes_receiver = c(receiver_reference,receiver_affected) %>% unique() %>% lapply(get_expressed_genes, seurat_obj, expression_pct, assay_oi) - names(list_expressed_genes_receiver) = c(receiver_reference,receiver_affected) %>% unique() - expressed_genes_receiver = list_expressed_genes_receiver %>% unlist() %>% unique() - - ## sender - if (length(sender) == 1){ - if (sender == "all"){ - sender_celltypes = Idents(seurat_obj) %>% levels() - list_expressed_genes_sender = sender_celltypes %>% lapply(get_expressed_genes, seurat_obj, expression_pct, assay_oi) - names(list_expressed_genes_sender) = sender_celltypes - expressed_genes_sender = list_expressed_genes_sender %>% unlist() %>% unique() - - } else if (sender == "undefined") { - if("integrated" %in% names(seurat_obj@assays)){ - expressed_genes_sender = union(seurat_obj@assays$integrated@data %>% rownames(),rownames(ligand_target_matrix)) %>% union(colnames(ligand_target_matrix)) - } else { - expressed_genes_sender = union(seurat_obj@assays$RNA@data %>% rownames(),rownames(ligand_target_matrix)) %>% union(colnames(ligand_target_matrix)) - } - } else if (sender != "all" & sender != "undefined") { - sender_celltypes = sender - list_expressed_genes_sender = sender_celltypes %>% unique() %>% lapply(get_expressed_genes, seurat_obj, expression_pct, assay_oi) - names(list_expressed_genes_sender) = sender_celltypes %>% unique() - expressed_genes_sender = list_expressed_genes_sender %>% unlist() %>% unique() - } - } else { - sender_celltypes = sender - list_expressed_genes_sender = sender_celltypes %>% unique() %>% lapply(get_expressed_genes, seurat_obj, expression_pct, assay_oi) - names(list_expressed_genes_sender) = sender_celltypes %>% unique() - expressed_genes_sender = list_expressed_genes_sender %>% unlist() %>% unique() - } - - # step2 nichenet analysis: define background and gene list of interest: here differential expression between two conditions of cell type of interest - if (verbose == TRUE){print("Perform DE analysis between two receiver cell clusters")} - - DE_table_receiver = FindMarkers(object = seurat_obj, ident.1 = receiver_affected, ident.2 = receiver_reference, min.pct = expression_pct) %>% rownames_to_column("gene") - - SeuratV4 = c("avg_log2FC") %in% colnames(DE_table_receiver) - - if(SeuratV4 == TRUE){ - if (geneset == "DE"){ - geneset_oi = DE_table_receiver %>% filter(p_val_adj <= 0.05 & abs(avg_log2FC) >= lfc_cutoff) %>% pull(gene) - } else if (geneset == "up") { - geneset_oi = DE_table_receiver %>% filter(p_val_adj <= 0.05 & avg_log2FC >= lfc_cutoff) %>% pull(gene) - } else if (geneset == "down") { - geneset_oi = DE_table_receiver %>% filter(p_val_adj <= 0.05 & avg_log2FC <= lfc_cutoff) %>% pull(gene) - } - } else { - if (geneset == "DE"){ - geneset_oi = DE_table_receiver %>% filter(p_val_adj <= 0.05 & abs(avg_logFC) >= lfc_cutoff) %>% pull(gene) - } else if (geneset == "up") { - geneset_oi = DE_table_receiver %>% filter(p_val_adj <= 0.05 & avg_logFC >= lfc_cutoff) %>% pull(gene) - } else if (geneset == "down") { - geneset_oi = DE_table_receiver %>% filter(p_val_adj <= 0.05 & avg_logFC <= lfc_cutoff) %>% pull(gene) - } - } - - - - geneset_oi = geneset_oi %>% .[. %in% rownames(ligand_target_matrix)] - if (length(geneset_oi) == 0){ - stop("No genes were differentially expressed") - } - background_expressed_genes = expressed_genes_receiver %>% .[. %in% rownames(ligand_target_matrix)] - - # step3 nichenet analysis: define potential ligands - expressed_ligands = intersect(ligands,expressed_genes_sender) - expressed_receptors = intersect(receptors,expressed_genes_receiver) - if (length(expressed_ligands) == 0){ - stop("No ligands expressed in sender cell") - } - if (length(expressed_receptors) == 0){ - stop("No receptors expressed in receiver cell") - } - potential_ligands = lr_network %>% filter(from %in% expressed_ligands & to %in% expressed_receptors) %>% pull(from) %>% unique() - if (length(potential_ligands) == 0){ - stop("No potentially active ligands") - } - - if (verbose == TRUE){print("Perform NicheNet ligand activity analysis")} - - # step4 perform NicheNet's ligand activity analysis - ligand_activities = predict_ligand_activities(geneset = geneset_oi, background_expressed_genes = background_expressed_genes, ligand_target_matrix = ligand_target_matrix, potential_ligands = potential_ligands) - ligand_activities = ligand_activities %>% - arrange(-pearson) %>% - mutate(rank = rank(desc(pearson)), - bona_fide_ligand = test_ligand %in% ligands_bona_fide) - - if(filter_top_ligands == TRUE){ - best_upstream_ligands = ligand_activities %>% top_n(top_n_ligands, pearson) %>% arrange(-pearson) %>% pull(test_ligand) %>% unique() - } else { - best_upstream_ligands = ligand_activities %>% arrange(-pearson) %>% pull(test_ligand) %>% unique() - } - if (verbose == TRUE){print("Infer active target genes of the prioritized ligands")} - - # step5 infer target genes of the top-ranked ligands - active_ligand_target_links_df = best_upstream_ligands %>% lapply(get_weighted_ligand_target_links,geneset = geneset_oi, ligand_target_matrix = ligand_target_matrix, n = top_n_targets) %>% bind_rows() %>% drop_na() - - if(nrow(active_ligand_target_links_df) > 0){ - active_ligand_target_links = prepare_ligand_target_visualization(ligand_target_df = active_ligand_target_links_df, ligand_target_matrix = ligand_target_matrix, cutoff = cutoff_visualization) - order_ligands = intersect(best_upstream_ligands, colnames(active_ligand_target_links)) %>% rev() %>% make.names() - order_targets = active_ligand_target_links_df$target %>% unique() %>% intersect(rownames(active_ligand_target_links)) %>% make.names() - rownames(active_ligand_target_links) = rownames(active_ligand_target_links) %>% make.names() - colnames(active_ligand_target_links) = colnames(active_ligand_target_links) %>% make.names() - - order_targets = order_targets %>% intersect(rownames(active_ligand_target_links)) - order_ligands = order_ligands %>% intersect(colnames(active_ligand_target_links)) - - vis_ligand_target = active_ligand_target_links[order_targets,order_ligands] %>% t() - p_ligand_target_network = vis_ligand_target %>% make_heatmap_ggplot("Prioritized ligands","Predicted target genes", color = "purple",legend_position = "top", x_axis_position = "top",legend_title = "Regulatory potential") + theme(axis.text.x = element_text(face = "italic")) #+ scale_fill_gradient2(low = "whitesmoke", high = "purple", breaks = c(0,0.006,0.012)) - } else { - vis_ligand_target = NULL - p_ligand_target_network = NULL - print("no highly likely active targets found for top ligands") - } - - # combined heatmap: overlay ligand activities - ligand_pearson_matrix = ligand_activities %>% select(pearson) %>% as.matrix() %>% magrittr::set_rownames(ligand_activities$test_ligand) - - rownames(ligand_pearson_matrix) = rownames(ligand_pearson_matrix) %>% make.names() - colnames(ligand_pearson_matrix) = colnames(ligand_pearson_matrix) %>% make.names() - - vis_ligand_pearson = ligand_pearson_matrix[order_ligands, ] %>% as.matrix(ncol = 1) %>% magrittr::set_colnames("Pearson") - p_ligand_pearson = vis_ligand_pearson %>% make_heatmap_ggplot("Prioritized ligands","Ligand activity", color = "darkorange",legend_position = "top", x_axis_position = "top", legend_title = "Pearson correlation coefficient\ntarget gene prediction ability)") + theme(legend.text = element_text(size = 9)) - p_ligand_pearson - - figures_without_legend = cowplot::plot_grid( - p_ligand_pearson + theme(legend.position = "none", axis.ticks = element_blank()) + theme(axis.title.x = element_text()), - p_ligand_target_network + theme(legend.position = "none", axis.ticks = element_blank()) + ylab(""), - align = "hv", - nrow = 1, - rel_widths = c(ncol(vis_ligand_pearson)+10, ncol(vis_ligand_target))) - legends = cowplot::plot_grid( - ggpubr::as_ggplot(ggpubr::get_legend(p_ligand_pearson)), - ggpubr::as_ggplot(ggpubr::get_legend(p_ligand_target_network)), - nrow = 1, - align = "h") - - combined_plot = cowplot::plot_grid(figures_without_legend, - legends, - rel_heights = c(10,2), nrow = 2, align = "hv") - - # ligand-receptor plot - # get the ligand-receptor network of the top-ranked ligands - if (verbose == TRUE){print("Infer receptors of the prioritized ligands")} - - lr_network_top = lr_network %>% filter(from %in% best_upstream_ligands & to %in% expressed_receptors) %>% distinct(from,to) - best_upstream_receptors = lr_network_top %>% pull(to) %>% unique() - - lr_network_top_df_large = weighted_networks_lr %>% filter(from %in% best_upstream_ligands & to %in% best_upstream_receptors) - - lr_network_top_df = lr_network_top_df_large %>% spread("from","weight",fill = 0) - lr_network_top_matrix = lr_network_top_df %>% select(-to) %>% as.matrix() %>% magrittr::set_rownames(lr_network_top_df$to) - - if (nrow(lr_network_top_matrix) > 1){ - dist_receptors = dist(lr_network_top_matrix, method = "binary") - hclust_receptors = hclust(dist_receptors, method = "ward.D2") - order_receptors = hclust_receptors$labels[hclust_receptors$order] - } else { - order_receptors = rownames(lr_network_top_matrix) - } - if (ncol(lr_network_top_matrix) > 1) { - dist_ligands = dist(lr_network_top_matrix %>% t(), method = "binary") - hclust_ligands = hclust(dist_ligands, method = "ward.D2") - order_ligands_receptor = hclust_ligands$labels[hclust_ligands$order] - } else { - order_ligands_receptor = colnames(lr_network_top_matrix) - } - - order_receptors = order_receptors %>% intersect(rownames(lr_network_top_matrix)) - order_ligands_receptor = order_ligands_receptor %>% intersect(colnames(lr_network_top_matrix)) - - vis_ligand_receptor_network = lr_network_top_matrix[order_receptors, order_ligands_receptor] - dim(vis_ligand_receptor_network) = c(length(order_receptors), length(order_ligands_receptor)) - - rownames(vis_ligand_receptor_network) = order_receptors %>% make.names() - colnames(vis_ligand_receptor_network) = order_ligands_receptor %>% make.names() - - p_ligand_receptor_network = vis_ligand_receptor_network %>% t() %>% make_heatmap_ggplot("Ligands","Receptors", color = "mediumvioletred", x_axis_position = "top",legend_title = "Prior interaction potential") - - # bona fide ligand-receptor - lr_network_top_df_large_strict = lr_network_top_df_large %>% distinct(from,to) %>% inner_join(lr_network_strict, by = c("from","to")) %>% distinct(from,to) - lr_network_top_df_large_strict = lr_network_top_df_large_strict %>% inner_join(lr_network_top_df_large, by = c("from","to")) - - lr_network_top_df_strict = lr_network_top_df_large_strict %>% spread("from","weight",fill = 0) - lr_network_top_matrix_strict = lr_network_top_df_strict %>% select(-to) %>% as.matrix() %>% magrittr::set_rownames(lr_network_top_df_strict$to) - - if (nrow(lr_network_top_df_large_strict) == 0){ - print("Remark: no bona fide receptors of top ligands") - vis_ligand_receptor_network_strict = NULL - p_ligand_receptor_network_strict = NULL - lr_network_top_df_large_strict = NULL - - } else { - - if (nrow(lr_network_top_matrix_strict) > 1){ - dist_receptors = dist(lr_network_top_matrix_strict, method = "binary") - hclust_receptors = hclust(dist_receptors, method = "ward.D2") - order_receptors = hclust_receptors$labels[hclust_receptors$order] - } else { - order_receptors = rownames(lr_network_top_matrix) - } - if (ncol(lr_network_top_matrix_strict) > 1) { - dist_ligands = dist(lr_network_top_matrix_strict %>% t(), method = "binary") - hclust_ligands = hclust(dist_ligands, method = "ward.D2") - order_ligands_receptor = hclust_ligands$labels[hclust_ligands$order] - } else { - order_ligands_receptor = colnames(lr_network_top_matrix_strict) - } - - order_receptors = order_receptors %>% intersect(rownames(lr_network_top_matrix_strict)) - order_ligands_receptor = order_ligands_receptor %>% intersect(colnames(lr_network_top_matrix_strict)) - - vis_ligand_receptor_network_strict = lr_network_top_matrix_strict[order_receptors, order_ligands_receptor] - dim(vis_ligand_receptor_network_strict) = c(length(order_receptors), length(order_ligands_receptor)) - - rownames(vis_ligand_receptor_network_strict) = order_receptors %>% make.names() - colnames(vis_ligand_receptor_network_strict) = order_ligands_receptor %>% make.names() - - p_ligand_receptor_network_strict = vis_ligand_receptor_network_strict %>% t() %>% make_heatmap_ggplot("Ligands","Receptors", color = "mediumvioletred", x_axis_position = "top",legend_title = "Prior interaction potential\n(bona fide)") - - lr_network_top_df_large_strict = lr_network_top_df_large_strict %>% rename(ligand = from, receptor = to) - - } - - # ligand expression Seurat dotplot - if (length(sender) > 1){ - are_there_senders = TRUE - } - if(length(sender) == 1){ - if(sender != "undefined"){ - are_there_senders = TRUE - } else { - are_there_senders = FALSE - } - } - - if (are_there_senders == TRUE){ - real_makenames_conversion = lr_network$from %>% unique() %>% magrittr::set_names(lr_network$from %>% unique() %>% make.names()) - order_ligands_adapted = real_makenames_conversion[order_ligands] - names(order_ligands_adapted) = NULL - rotated_dotplot = DotPlot(seurat_obj %>% subset(idents = sender_celltypes), features = order_ligands_adapted, cols = "RdYlBu") + coord_flip() + theme(legend.text = element_text(size = 10), legend.title = element_text(size = 12)) # flip of coordinates necessary because we want to show ligands in the rows when combining all plots - - } else { - rotated_dotplot = NULL - } - - - return(list( - ligand_activities = ligand_activities, - top_ligands = best_upstream_ligands, - top_targets = active_ligand_target_links_df$target %>% unique(), - top_receptors = lr_network_top_df_large$to %>% unique(), - ligand_target_matrix = vis_ligand_target, - ligand_target_heatmap = p_ligand_target_network, - ligand_target_df = active_ligand_target_links_df, - ligand_expression_dotplot = rotated_dotplot, - ligand_activity_target_heatmap = combined_plot, - ligand_receptor_matrix = vis_ligand_receptor_network, - ligand_receptor_heatmap = p_ligand_receptor_network, - ligand_receptor_df = lr_network_top_df_large %>% rename(ligand = from, receptor = to), - ligand_receptor_matrix_bonafide = vis_ligand_receptor_network_strict, - ligand_receptor_heatmap_bonafide = p_ligand_receptor_network_strict, - ligand_receptor_df_bonafide = lr_network_top_df_large_strict, - geneset_oi = geneset_oi, - background_expressed_genes = background_expressed_genes - - )) -} -#' @title Perform NicheNet analysis on Seurat object: explain DE between two cell clusters from separate conditions -#' -#' @description \code{nichenet_seuratobj_aggregate_cluster_de} Perform NicheNet analysis on Seurat object: explain differential expression (DE) between two 'receiver' cell clusters coming from different conditions, by ligands expressed by neighboring cells. -#' @usage -#' nichenet_seuratobj_aggregate_cluster_de(seurat_obj, receiver_affected, receiver_reference, condition_colname, condition_oi, condition_reference, sender = "all",ligand_target_matrix,lr_network,weighted_networks,expression_pct = 0.10, lfc_cutoff = 0.25, geneset = "DE", filter_top_ligands = TRUE, top_n_ligands = 20,top_n_targets = 200, cutoff_visualization = 0.33,organism = "human",verbose = TRUE, assay_oi = NULL) -#' -#' @param seurat_obj Single-cell expression dataset as Seurat object https://satijalab.org/seurat/. -#' @param receiver_reference Name of cluster identity/identities of "steady-state" cells, before they are affected by intercellular communication with other cells -#' @param receiver_affected Name of cluster identity/identities of "affected" cells that were presumably affected by intercellular communication with other cells -#' @param condition_colname Name of the column in the meta data dataframe that indicates which condition/sample cells were coming from. -#' @param condition_oi Condition of interest in which receiver cells were presumably affected by other cells. Should be a name present in the `condition_colname` column of the metadata. -#' @param condition_reference The second condition (e.g. reference or steady-state condition). Should be a name present in the `condition_colname` column of the metadata. -#' @param sender Determine the potential sender cells. Name of cluster identity/identities of cells that presumably affect expression in the receiver cell type. In case you want to look at all possible sender cell types in the data, you can give this argument the value "all". "all" indicates thus that all cell types in the dataset will be considered as possible sender cells. As final option, you could give this argument the value "undefined"."undefined" won't look at ligands expressed by sender cells, but at all ligands for which a corresponding receptor is expressed. This could be useful if the presumably active sender cell is not profiled. Default: "all". -#' @param expression_pct To determine ligands and receptors expressed by sender and receiver cells, we consider genes expressed if they are expressed in at least a specific fraction of cells of a cluster. This number indicates this fraction. Default: 0.10 -#' @param lfc_cutoff Cutoff on log fold change in the wilcoxon differential expression test. Default: 0.25. -#' @param geneset Indicate whether to consider all DE genes between condition 1 and 2 ("DE"), or only genes upregulated in condition 1 ("up"), or only genes downregulad in condition 1 ("down"). -#' @param filter_top_ligands Indicate whether output tables for ligand-target and ligand-receptor networks should be done for a filtered set of top ligands (TRUE) or for all ligands (FALSE). Default: TRUE. -#' @param top_n_ligands Indicate how many ligands should be extracted as top-ligands after ligand activity analysis. Only for these ligands, target genes and receptors will be returned. Default: 20. -#' @param top_n_targets To predict active, affected targets of the prioritized ligands, consider only DE genes if they also belong to the a priori top n ("top_n_targets") targets of a ligand. Default = 200. -#' @param cutoff_visualization Because almost no ligand-target scores have a regulatory potential score of 0, we clarify the heatmap visualization by giving the links with the lowest scores a score of 0. The cutoff_visualization paramter indicates this fraction of links that are given a score of zero. Default = 0.33. -#' @param organism Organism from which cells originate."human" (default) or "mouse". -#' @param ligand_target_matrix The NicheNet ligand-target matrix denoting regulatory potential scores between ligands and targets (ligands in columns). -#' @param lr_network The ligand-receptor network (columns that should be present: $from, $to). -#' @param weighted_networks The NicheNet weighted networks denoting interactions and their weights/confidences in the ligand-signaling and gene regulatory network. -#' @param verbose Print out the current analysis stage. Default: TRUE. -#' @inheritParams get_expressed_genes -#' -#' @return A list with the following elements: -#' $ligand_activities: data frame with output ligand activity analysis; -#' $top_ligands: top_n ligands based on ligand activity; -#' $top_targets: active, affected target genes of these ligands; -#' $top_receptors: receptors of these ligands; -#' $ligand_target_matrix: matrix indicating regulatory potential scores between active ligands and their predicted targets; -#' $ligand_target_heatmap: heatmap of ligand-target regulatory potential; -#' $ligand_target_df: data frame showing regulatory potential scores of predicted active ligand-target network; -#' $ligand_activity_target_heatmap: heatmap showing both ligand activity scores and target genes of these top ligands; -#' $ligand_expression_dotplot: expression dotplot of the top ligands; -#' $ligand_receptor_matrix: matrix of ligand-receptor interactions; -#' $ligand_receptor_heatmap: heatmap showing ligand-receptor interactions; -#' $ligand_receptor_df: data frame of ligand-receptor interactions; -#' $ligand_receptor_matrix_bonafide: ligand-receptor matrix, after filtering out interactions predicted by PPI; -#' $ligand_receptor_heatmap_bonafide: heatmap of ligand-receptor interactions after filtering out interactions predicted by PPI; -#' $ligand_receptor_df_bonafide: data frame of ligand-receptor interactions, after filtering out interactions predicted by PPI; -#' $geneset_oi: a vector containing the set of genes used as input for the ligand activity analysis; -#' $background_expressed_genes: the background of genes to which the geneset will be compared in the ligand activity analysis. -#' -#' @import Seurat -#' @import dplyr -#' @importFrom magrittr set_rownames set_colnames -#' -#' @examples -#' \dontrun{ -#' seuratObj = readRDS(url("https://zenodo.org/record/3531889/files/seuratObj_test.rds")) -#' ligand_target_matrix = readRDS(url("https://zenodo.org/record/3260758/files/ligand_target_matrix.rds")) -#' lr_network = readRDS(url("https://zenodo.org/record/3260758/files/lr_network.rds")) -#' weighted_networks = readRDS(url("https://zenodo.org/record/3260758/files/weighted_networks.rds")) -#' nichenet_seuratobj_aggregate_cluster_de(seurat_obj = seuratObj, receiver_affected = "CD8 T", receiver_reference = "CD8 T", condition_colname = "aggregate", condition_oi = "LCMV", condition_reference = "SS", sender = "Mono", ligand_target_matrix = ligand_target_matrix, lr_network = lr_network, weighted_networks = weighted_networks) -#' } -#' -#' @export -#' -nichenet_seuratobj_aggregate_cluster_de = function(seurat_obj, receiver_affected, receiver_reference, - condition_colname, condition_oi, condition_reference, sender = "all", - ligand_target_matrix,lr_network,weighted_networks, - expression_pct = 0.10, lfc_cutoff = 0.25, geneset = "DE", filter_top_ligands = TRUE, top_n_ligands = 20, - top_n_targets = 200, cutoff_visualization = 0.33, - organism = "human",verbose = TRUE, assay_oi = NULL) -{ - - requireNamespace("Seurat") - requireNamespace("dplyr") - - # input check - if(! "RNA" %in% names(seurat_obj@assays)){ - if ("Spatial" %in% names(seurat_obj@assays)){ - warning("You are going to apply NicheNet on a spatial seurat object. Be sure it's ok to use NicheNet the way you are planning to do it. So this means: you should have changes in gene expression in receiver cells caused by cell-cell interactions. Note that in the case of spatial transcriptomics, you are not dealing with single cells but with 'spots' containing multiple cells of the same of different cell types.") - - if (class(seurat_obj@assays$Spatial@data) != "matrix" & class(seurat_obj@assays$Spatial@data) != "dgCMatrix") { - warning("Spatial Seurat object should contain a matrix of normalized expression data. Check 'seurat_obj@assays$Spatial@data' for default or 'seurat_obj@assays$SCT@data' for when the single-cell transform pipeline was applied") - } - if (sum(dim(seurat_obj@assays$Spatial@data)) == 0) { - stop("Seurat object should contain normalized expression data (numeric matrix). Check 'seurat_obj@assays$Spatial@data'") - } - }} else { - if (class(seurat_obj@assays$RNA@data) != "matrix" & - class(seurat_obj@assays$RNA@data) != "dgCMatrix") { - warning("Seurat object should contain a matrix of normalized expression data. Check 'seurat_obj@assays$RNA@data' for default or 'seurat_obj@assays$integrated@data' for integrated data or seurat_obj@assays$SCT@data for when the single-cell transform pipeline was applied") - } - - if ("integrated" %in% names(seurat_obj@assays)) { - if (sum(dim(seurat_obj@assays$RNA@data)) == 0 & sum(dim(seurat_obj@assays$integrated@data)) == - 0) - stop("Seurat object should contain normalized expression data (numeric matrix). Check 'seurat_obj@assays$RNA@data' for default or 'seurat_obj@assays$integrated@data' for integrated data") - } - else if ("SCT" %in% names(seurat_obj@assays)) { - if (sum(dim(seurat_obj@assays$RNA@data)) == 0 & sum(dim(seurat_obj@assays$SCT@data)) == - 0) { - stop("Seurat object should contain normalized expression data (numeric matrix). Check 'seurat_obj@assays$RNA@data' for default or 'seurat_obj@assays$SCT@data' for data corrected via SCT") - } - } - else { - if (sum(dim(seurat_obj@assays$RNA@data)) == 0) { - stop("Seurat object should contain normalized expression data (numeric matrix). Check 'seurat_obj@assays$RNA@data'") - } - } - } - - - if(sum(receiver_affected %in% unique(Idents(seurat_obj))) != length(receiver_affected)) - stop("The defined receiver_affected cell type should be an identity class of your seurat object") - if(sum(receiver_reference %in% unique(Idents(seurat_obj))) != length(receiver_reference)) - stop("The defined receiver_reference cell type should be an identity class of your seurat object") - if(!condition_colname %in% colnames(seurat_obj@meta.data)) - stop("Your column indicating the conditions/samples of interest should be in the metadata dataframe") - if(sum(condition_oi %in% c(seurat_obj[[condition_colname]] %>% unlist() %>% as.character() %>% unique())) != length(condition_oi)) - stop("condition_oi should be in the condition-indicating column") - if(sum(condition_reference %in% c(seurat_obj[[condition_colname]] %>% unlist() %>% as.character() %>% unique())) != length(condition_reference)) - stop("condition_reference should be in the condition-indicating column") - if(length(sender) == 1){ - if(sender != "all" & sender != "undefined"){ - if(sum(sender %in% unique(Idents(seurat_obj))) != length(sender)){ - stop("The sender argument should be 'all' or 'undefined' or an identity class of your seurat object") - } - } - } else { - if(sum(sender %in% unique(Idents(seurat_obj))) != length(sender)){ - stop("The sender argument should be 'all' or 'undefined' or an identity class of your seurat object") - } - } - if(organism != "mouse" & organism != "human") - stop("Organism should be 'mouse' or 'human'") - if(geneset != "DE" & geneset != "up" & geneset != "down") - stop("geneset should be 'DE', 'up' or 'down'") - - if("integrated" %in% names(seurat_obj@assays)){ - warning("Seurat object is result from the Seurat integration workflow. Make sure that the way of defining expressed and differentially expressed genes in this wrapper is appropriate for your integrated data.") - } - # Read in and process NicheNet networks, define ligands and receptors - if (verbose == TRUE){print("Read in and process NicheNet's networks")} - weighted_networks_lr = weighted_networks$lr_sig %>% inner_join(lr_network %>% distinct(from,to), by = c("from","to")) - - if (organism == "mouse"){ - lr_network = lr_network %>% mutate(from = convert_human_to_mouse_symbols(from), to = convert_human_to_mouse_symbols(to)) %>% drop_na() - colnames(ligand_target_matrix) = ligand_target_matrix %>% colnames() %>% convert_human_to_mouse_symbols() - rownames(ligand_target_matrix) = ligand_target_matrix %>% rownames() %>% convert_human_to_mouse_symbols() - ligand_target_matrix = ligand_target_matrix %>% .[!is.na(rownames(ligand_target_matrix)), !is.na(colnames(ligand_target_matrix))] - weighted_networks_lr = weighted_networks_lr %>% mutate(from = convert_human_to_mouse_symbols(from), to = convert_human_to_mouse_symbols(to)) %>% drop_na() - } - lr_network_strict = lr_network %>% filter(database != "ppi_prediction_go" & database != "ppi_prediction") - - ligands = lr_network %>% pull(from) %>% unique() - receptors = lr_network %>% pull(to) %>% unique() - ligands_bona_fide = lr_network_strict %>% pull(from) %>% unique() - receptors_bona_fide = lr_network_strict %>% pull(to) %>% unique() - - if (verbose == TRUE){print("Define expressed ligands and receptors in receiver and sender cells")} - - # step1 nichenet analysis: get expressed genes in sender and receiver cells - - ## receiver - # expressed genes: only in steady state population (for determining receptors) - list_expressed_genes_receiver_ss = c(receiver_reference) %>% unique() %>% lapply(get_expressed_genes, seurat_obj, expression_pct, assay_oi) - names(list_expressed_genes_receiver_ss) = c(receiver_reference) %>% unique() - expressed_genes_receiver_ss = list_expressed_genes_receiver_ss %>% unlist() %>% unique() - - # expressed genes: both in steady state and affected population (for determining background of expressed genes) - list_expressed_genes_receiver = c(receiver_reference,receiver_affected) %>% unique() %>% lapply(get_expressed_genes, seurat_obj, expression_pct, assay_oi) - names(list_expressed_genes_receiver) = c(receiver_reference,receiver_affected) %>% unique() - expressed_genes_receiver = list_expressed_genes_receiver %>% unlist() %>% unique() - - ## sender - if (length(sender) == 1){ - if (sender == "all"){ - sender_celltypes = Idents(seurat_obj) %>% levels() - list_expressed_genes_sender = sender_celltypes %>% lapply(get_expressed_genes, seurat_obj, expression_pct, assay_oi) - names(list_expressed_genes_sender) = sender_celltypes - expressed_genes_sender = list_expressed_genes_sender %>% unlist() %>% unique() - - } else if (sender == "undefined") { - - if("integrated" %in% names(seurat_obj@assays)){ - expressed_genes_sender = union(seurat_obj@assays$integrated@data %>% rownames(),rownames(ligand_target_matrix)) %>% union(colnames(ligand_target_matrix)) - } else { - expressed_genes_sender = union(seurat_obj@assays$RNA@data %>% rownames(),rownames(ligand_target_matrix)) %>% union(colnames(ligand_target_matrix)) - } - - } else if (sender != "all" & sender != "undefined") { - sender_celltypes = sender - list_expressed_genes_sender = sender_celltypes %>% unique() %>% lapply(get_expressed_genes, seurat_obj, expression_pct, assay_oi) - names(list_expressed_genes_sender) = sender_celltypes %>% unique() - expressed_genes_sender = list_expressed_genes_sender %>% unlist() %>% unique() - } - } else { - sender_celltypes = sender - list_expressed_genes_sender = sender_celltypes %>% unique() %>% lapply(get_expressed_genes, seurat_obj, expression_pct, assay_oi) - names(list_expressed_genes_sender) = sender_celltypes %>% unique() - expressed_genes_sender = list_expressed_genes_sender %>% unlist() %>% unique() - } - - # step2 nichenet analysis: define background and gene list of interest: here differential expression between two conditions of cell type of interest - if (verbose == TRUE){print("Perform DE analysis between two receiver cell clusters")} - - seurat_obj_receiver_affected= subset(seurat_obj, idents = receiver_affected) - seurat_obj_receiver_affected = SetIdent(seurat_obj_receiver_affected, value = seurat_obj_receiver_affected[[condition_colname]]) - seurat_obj_receiver_affected= subset(seurat_obj_receiver_affected, idents = condition_oi) - - seurat_obj_receiver_reference= subset(seurat_obj, idents = receiver_reference) - seurat_obj_receiver_reference = SetIdent(seurat_obj_receiver_reference, value = seurat_obj_receiver_reference[[condition_colname]]) - seurat_obj_receiver_reference= subset(seurat_obj_receiver_reference, idents = condition_reference) - - seurat_obj_receiver = merge(seurat_obj_receiver_affected, seurat_obj_receiver_reference) - - DE_table_receiver = FindMarkers(object = seurat_obj_receiver, ident.1 = condition_oi, ident.2 = condition_reference, min.pct = expression_pct) %>% rownames_to_column("gene") - - - SeuratV4 = c("avg_log2FC") %in% colnames(DE_table_receiver) - - if(SeuratV4 == TRUE){ - if (geneset == "DE"){ - geneset_oi = DE_table_receiver %>% filter(p_val_adj <= 0.05 & abs(avg_log2FC) >= lfc_cutoff) %>% pull(gene) - } else if (geneset == "up") { - geneset_oi = DE_table_receiver %>% filter(p_val_adj <= 0.05 & avg_log2FC >= lfc_cutoff) %>% pull(gene) - } else if (geneset == "down") { - geneset_oi = DE_table_receiver %>% filter(p_val_adj <= 0.05 & avg_log2FC <= lfc_cutoff) %>% pull(gene) - } - } else { - if (geneset == "DE"){ - geneset_oi = DE_table_receiver %>% filter(p_val_adj <= 0.05 & abs(avg_logFC) >= lfc_cutoff) %>% pull(gene) - } else if (geneset == "up") { - geneset_oi = DE_table_receiver %>% filter(p_val_adj <= 0.05 & avg_logFC >= lfc_cutoff) %>% pull(gene) - } else if (geneset == "down") { - geneset_oi = DE_table_receiver %>% filter(p_val_adj <= 0.05 & avg_logFC <= lfc_cutoff) %>% pull(gene) - } - } - - geneset_oi = geneset_oi %>% .[. %in% rownames(ligand_target_matrix)] - if (length(geneset_oi) == 0){ - stop("No genes were differentially expressed") - } - background_expressed_genes = expressed_genes_receiver %>% .[. %in% rownames(ligand_target_matrix)] - - # step3 nichenet analysis: define potential ligands - expressed_ligands = intersect(ligands,expressed_genes_sender) - expressed_receptors = intersect(receptors,expressed_genes_receiver) - if (length(expressed_ligands) == 0){ - stop("No ligands expressed in sender cell") - } - if (length(expressed_receptors) == 0){ - stop("No receptors expressed in receiver cell") - } - potential_ligands = lr_network %>% filter(from %in% expressed_ligands & to %in% expressed_receptors) %>% pull(from) %>% unique() - if (length(potential_ligands) == 0){ - stop("No potentially active ligands") - } - - if (verbose == TRUE){print("Perform NicheNet ligand activity analysis")} - - # step4 perform NicheNet's ligand activity analysis - ligand_activities = predict_ligand_activities(geneset = geneset_oi, background_expressed_genes = background_expressed_genes, ligand_target_matrix = ligand_target_matrix, potential_ligands = potential_ligands) - ligand_activities = ligand_activities %>% - arrange(-pearson) %>% - mutate(rank = rank(desc(pearson)), - bona_fide_ligand = test_ligand %in% ligands_bona_fide) - - if(filter_top_ligands == TRUE){ - best_upstream_ligands = ligand_activities %>% top_n(top_n_ligands, pearson) %>% arrange(-pearson) %>% pull(test_ligand) %>% unique() - } else { - best_upstream_ligands = ligand_activities %>% arrange(-pearson) %>% pull(test_ligand) %>% unique() - } - if (verbose == TRUE){print("Infer active target genes of the prioritized ligands")} - - # step5 infer target genes of the top-ranked ligands - active_ligand_target_links_df = best_upstream_ligands %>% lapply(get_weighted_ligand_target_links,geneset = geneset_oi, ligand_target_matrix = ligand_target_matrix, n = top_n_targets) %>% bind_rows() %>% drop_na() - - if(nrow(active_ligand_target_links_df) > 0){ - active_ligand_target_links = prepare_ligand_target_visualization(ligand_target_df = active_ligand_target_links_df, ligand_target_matrix = ligand_target_matrix, cutoff = cutoff_visualization) - order_ligands = intersect(best_upstream_ligands, colnames(active_ligand_target_links)) %>% rev() %>% make.names() - order_targets = active_ligand_target_links_df$target %>% unique() %>% intersect(rownames(active_ligand_target_links)) %>% make.names() - rownames(active_ligand_target_links) = rownames(active_ligand_target_links) %>% make.names() - colnames(active_ligand_target_links) = colnames(active_ligand_target_links) %>% make.names() - - order_targets = order_targets %>% intersect(rownames(active_ligand_target_links)) - order_ligands = order_ligands %>% intersect(colnames(active_ligand_target_links)) - - vis_ligand_target = active_ligand_target_links[order_targets,order_ligands] %>% t() - p_ligand_target_network = vis_ligand_target %>% make_heatmap_ggplot("Prioritized ligands","Predicted target genes", color = "purple",legend_position = "top", x_axis_position = "top",legend_title = "Regulatory potential") + theme(axis.text.x = element_text(face = "italic")) #+ scale_fill_gradient2(low = "whitesmoke", high = "purple", breaks = c(0,0.006,0.012)) - } else { - vis_ligand_target = NULL - p_ligand_target_network = NULL - print("no highly likely active targets found for top ligands") - } - # combined heatmap: overlay ligand activities - ligand_pearson_matrix = ligand_activities %>% select(pearson) %>% as.matrix() %>% magrittr::set_rownames(ligand_activities$test_ligand) - - rownames(ligand_pearson_matrix) = rownames(ligand_pearson_matrix) %>% make.names() - colnames(ligand_pearson_matrix) = colnames(ligand_pearson_matrix) %>% make.names() - - vis_ligand_pearson = ligand_pearson_matrix[order_ligands, ] %>% as.matrix(ncol = 1) %>% magrittr::set_colnames("Pearson") - p_ligand_pearson = vis_ligand_pearson %>% make_heatmap_ggplot("Prioritized ligands","Ligand activity", color = "darkorange",legend_position = "top", x_axis_position = "top", legend_title = "Pearson correlation coefficient\ntarget gene prediction ability)") + theme(legend.text = element_text(size = 9)) - p_ligand_pearson - - figures_without_legend = cowplot::plot_grid( - p_ligand_pearson + theme(legend.position = "none", axis.ticks = element_blank()) + theme(axis.title.x = element_text()), - p_ligand_target_network + theme(legend.position = "none", axis.ticks = element_blank()) + ylab(""), - align = "hv", - nrow = 1, - rel_widths = c(ncol(vis_ligand_pearson)+10, ncol(vis_ligand_target))) - legends = cowplot::plot_grid( - ggpubr::as_ggplot(ggpubr::get_legend(p_ligand_pearson)), - ggpubr::as_ggplot(ggpubr::get_legend(p_ligand_target_network)), - nrow = 1, - align = "h") - - combined_plot = cowplot::plot_grid(figures_without_legend, - legends, - rel_heights = c(10,2), nrow = 2, align = "hv") - - # ligand-receptor plot - # get the ligand-receptor network of the top-ranked ligands - if (verbose == TRUE){print("Infer receptors of the prioritized ligands")} - - lr_network_top = lr_network %>% filter(from %in% best_upstream_ligands & to %in% expressed_receptors) %>% distinct(from,to) - best_upstream_receptors = lr_network_top %>% pull(to) %>% unique() - - lr_network_top_df_large = weighted_networks_lr %>% filter(from %in% best_upstream_ligands & to %in% best_upstream_receptors) - - lr_network_top_df = lr_network_top_df_large %>% spread("from","weight",fill = 0) - lr_network_top_matrix = lr_network_top_df %>% select(-to) %>% as.matrix() %>% magrittr::set_rownames(lr_network_top_df$to) - - if (nrow(lr_network_top_matrix) > 1){ - dist_receptors = dist(lr_network_top_matrix, method = "binary") - hclust_receptors = hclust(dist_receptors, method = "ward.D2") - order_receptors = hclust_receptors$labels[hclust_receptors$order] - } else { - order_receptors = rownames(lr_network_top_matrix) - } - if (ncol(lr_network_top_matrix) > 1) { - dist_ligands = dist(lr_network_top_matrix %>% t(), method = "binary") - hclust_ligands = hclust(dist_ligands, method = "ward.D2") - order_ligands_receptor = hclust_ligands$labels[hclust_ligands$order] - } else { - order_ligands_receptor = colnames(lr_network_top_matrix) - } - - order_receptors = order_receptors %>% intersect(rownames(lr_network_top_matrix)) - order_ligands_receptor = order_ligands_receptor %>% intersect(colnames(lr_network_top_matrix)) - - vis_ligand_receptor_network = lr_network_top_matrix[order_receptors, order_ligands_receptor] - dim(vis_ligand_receptor_network) = c(length(order_receptors), length(order_ligands_receptor)) - - rownames(vis_ligand_receptor_network) = order_receptors %>% make.names() - colnames(vis_ligand_receptor_network) = order_ligands_receptor %>% make.names() - - p_ligand_receptor_network = vis_ligand_receptor_network %>% t() %>% make_heatmap_ggplot("Ligands","Receptors", color = "mediumvioletred", x_axis_position = "top",legend_title = "Prior interaction potential") - - # bona fide ligand-receptor - lr_network_top_df_large_strict = lr_network_top_df_large %>% distinct(from,to) %>% inner_join(lr_network_strict, by = c("from","to")) %>% distinct(from,to) - lr_network_top_df_large_strict = lr_network_top_df_large_strict %>% inner_join(lr_network_top_df_large, by = c("from","to")) - - lr_network_top_df_strict = lr_network_top_df_large_strict %>% spread("from","weight",fill = 0) - lr_network_top_matrix_strict = lr_network_top_df_strict %>% select(-to) %>% as.matrix() %>% magrittr::set_rownames(lr_network_top_df_strict$to) - - if (nrow(lr_network_top_df_large_strict) == 0){ - print("Remark: no bona fide receptors of top ligands") - vis_ligand_receptor_network_strict = NULL - p_ligand_receptor_network_strict = NULL - lr_network_top_df_large_strict = NULL - - } else { - if (nrow(lr_network_top_matrix_strict) > 1){ - dist_receptors = dist(lr_network_top_matrix_strict, method = "binary") - hclust_receptors = hclust(dist_receptors, method = "ward.D2") - order_receptors = hclust_receptors$labels[hclust_receptors$order] - } else { - order_receptors = rownames(lr_network_top_matrix) - } - if (ncol(lr_network_top_matrix_strict) > 1) { - dist_ligands = dist(lr_network_top_matrix_strict %>% t(), method = "binary") - hclust_ligands = hclust(dist_ligands, method = "ward.D2") - order_ligands_receptor = hclust_ligands$labels[hclust_ligands$order] - } else { - order_ligands_receptor = colnames(lr_network_top_matrix_strict) - } - - order_receptors = order_receptors %>% intersect(rownames(lr_network_top_matrix_strict)) - order_ligands_receptor = order_ligands_receptor %>% intersect(colnames(lr_network_top_matrix_strict)) - - vis_ligand_receptor_network_strict = lr_network_top_matrix_strict[order_receptors, order_ligands_receptor] - dim(vis_ligand_receptor_network_strict) = c(length(order_receptors), length(order_ligands_receptor)) - - rownames(vis_ligand_receptor_network_strict) = order_receptors %>% make.names() - colnames(vis_ligand_receptor_network_strict) = order_ligands_receptor %>% make.names() - - p_ligand_receptor_network_strict = vis_ligand_receptor_network_strict %>% t() %>% make_heatmap_ggplot("Ligands","Receptors", color = "mediumvioletred", x_axis_position = "top",legend_title = "Prior interaction potential\n(bona fide)") - - lr_network_top_df_large_strict = lr_network_top_df_large_strict %>% rename(ligand = from, receptor = to) - - } - - # ligand expression Seurat dotplot - if (length(sender) > 1){ - are_there_senders = TRUE - } - if(length(sender) == 1){ - if(sender != "undefined"){ - are_there_senders = TRUE - } else { - are_there_senders = FALSE - } - } - - if (are_there_senders == TRUE){ - real_makenames_conversion = lr_network$from %>% unique() %>% magrittr::set_names(lr_network$from %>% unique() %>% make.names()) - order_ligands_adapted = real_makenames_conversion[order_ligands] - names(order_ligands_adapted) = NULL - rotated_dotplot = DotPlot(seurat_obj %>% subset(idents = sender_celltypes), features = order_ligands_adapted, cols = "RdYlBu") + coord_flip() + theme(legend.text = element_text(size = 10), legend.title = element_text(size = 12)) # flip of coordinates necessary because we want to show ligands in the rows when combining all plots - - } else { - rotated_dotplot = NULL - } - return(list( - ligand_activities = ligand_activities, - top_ligands = best_upstream_ligands, - top_targets = active_ligand_target_links_df$target %>% unique(), - top_receptors = lr_network_top_df_large$to %>% unique(), - ligand_target_matrix = vis_ligand_target, - ligand_target_heatmap = p_ligand_target_network, - ligand_target_df = active_ligand_target_links_df, - ligand_expression_dotplot = rotated_dotplot, - ligand_activity_target_heatmap = combined_plot, - ligand_receptor_matrix = vis_ligand_receptor_network, - ligand_receptor_heatmap = p_ligand_receptor_network, - ligand_receptor_df = lr_network_top_df_large %>% rename(ligand = from, receptor = to), - ligand_receptor_matrix_bonafide = vis_ligand_receptor_network_strict, - ligand_receptor_heatmap_bonafide = p_ligand_receptor_network_strict, - ligand_receptor_df_bonafide = lr_network_top_df_large_strict, - geneset_oi = geneset_oi, - background_expressed_genes = background_expressed_genes - )) -} -#' @title Get log fold change values of genes in cell type of interest -#' -#' @description \code{get_lfc_celltype} Get log fold change of genes between two conditions in cell type of interest when using a Seurat single-cell object. -#' -#' @usage -#' get_lfc_celltype(celltype_oi, seurat_obj, condition_colname, condition_oi, condition_reference, celltype_col = "celltype", expression_pct = 0.10) -#' #' -#' @param seurat_obj Single-cell expression dataset as Seurat object https://satijalab.org/seurat/. -#' @param celltype_oi Name of celltype of interest. Should be present in the celltype metadata dataframe. -#' @param condition_colname Name of the column in the meta data dataframe that indicates which condition/sample cells were coming from. -#' @param condition_oi Condition of interest. Should be a name present in the "condition_colname" column of the metadata. -#' @param condition_reference The second condition (e.g. reference or steady-state condition). Should be a name present in the "condition_colname" column of the metadata. -#' @param celltype_col Metadata colum name where the cell type identifier is stored. Default: "celltype". If this is NULL, the Idents() of the seurat object will be considered as your cell type identifier. -#' @param expression_pct To consider only genes if they are expressed in at least a specific fraction of cells of a cluster. This number indicates this fraction. Default: 0.10 -#' -#' @return A tbl with the log fold change values of genes. Positive lfc values: higher in condition_oi compared to condition_reference. -#' -#' @import Seurat -#' @import dplyr -#' -#' @examples -#' \dontrun{ -#' requireNamespace("dplyr") -#' seuratObj = readRDS(url("https://zenodo.org/record/3531889/files/seuratObj_test.rds")) -#' get_lfc_celltype(seurat_obj = seuratObj, celltype_oi = "CD8 T", condition_colname = "aggregate", condition_oi = "LCMV", condition_reference = "SS", celltype_col = "celltype", expression_pct = 0.10) -#' } -#' @export -#' -get_lfc_celltype = function(celltype_oi, seurat_obj, condition_colname, condition_oi, condition_reference, celltype_col = "celltype", expression_pct = 0.10){ - requireNamespace("Seurat") - requireNamespace("dplyr") - if(!is.null(celltype_col)){ - seurat_obj_celltype = SetIdent(seurat_obj, value = seurat_obj[[celltype_col]]) - seuratObj_sender = subset(seurat_obj_celltype, idents = celltype_oi) - - } else { - seuratObj_sender = subset(seurat_obj, idents = celltype_oi) - - } - seuratObj_sender = SetIdent(seuratObj_sender, value = seuratObj_sender[[condition_colname]]) - DE_table_sender = FindMarkers(object = seuratObj_sender, ident.1 = condition_oi, ident.2 = condition_reference, min.pct = expression_pct, logfc.threshold = 0.05) %>% rownames_to_column("gene") - - SeuratV4 = c("avg_log2FC") %in% colnames(DE_table_sender) - - if(SeuratV4 == TRUE){ - DE_table_sender = DE_table_sender %>% as_tibble() %>% select(-p_val) %>% select(gene, avg_log2FC) - } else { - DE_table_sender = DE_table_sender %>% as_tibble() %>% select(-p_val) %>% select(gene, avg_logFC) - } - - colnames(DE_table_sender) = c("gene",celltype_oi) - return(DE_table_sender) -} +#' @title Convert cluster assignment to settings format suitable for target gene prediction. +#' +#' @description \code{convert_cluster_to_settings} Convert cluster assignment to settings format suitable for target gene prediction. +#' +#' @usage +#' convert_cluster_to_settings(i, cluster_vector, setting_name, setting_from, background = NULL) +#' +#' @param i The cluster number of the cluster of interest to which genes should belong +#' @param cluster_vector Named vector containing the cluster number to which every gene belongs +#' @param setting_name Base name of the setting +#' @param setting_from Active ligands for the specific setting +#' @param background NULL or a character vector of genes belonging to the background. When NULL: the background will be formed by genes belonging to other clusters that the cluster of interest. Default NULL. If not NULL and genes present in the cluster of interest are in this vector of background gene names, these genes will be removed from the background. +#' +#' @return A list with following elements: $name (indicating the cluster id), $from, $response. $response is a gene-named logical vector indicating whether the gene is part of the respective cluster. +#' +#' @examples +#' \dontrun{ +#' genes_clusters = c("TGFB1" = 1,"TGFB2" = 1,"TGFB3" = 2) +#' cluster_settings = lapply(seq(length(unique(genes_clusters))), convert_cluster_to_settings, cluster_vector = genes_clusters, setting_name = "example", setting_from = "BMP2") +#' } +#' +#' @export +#' +convert_cluster_to_settings = function(i, cluster_vector, setting_name, setting_from, background = NULL){ + + # input check + if(!is.numeric(i) | length(i) != 1 | i <= 0) + stop("i should be a number higher than 0") + if(!is.numeric(cluster_vector) | is.null(names(cluster_vector))) + stop("cluster_vector should be a named numeric vector") + if(!is.character(setting_name)) + stop("setting_name should be a character vector") + if(!is.character(setting_from)) + stop("setting_from should be a character vector") + if(!is.character(background) & !is.null(background)) + stop("background should be a character vector or NULL") + + requireNamespace("dplyr") + + + genes_cluster_oi = cluster_vector[cluster_vector == i] %>% names() + + if (is.null(background)){ + response = names(cluster_vector) %in% genes_cluster_oi + names(response) = names(cluster_vector) + } else { + background = background[(background %in% genes_cluster_oi) == FALSE] + background_logical = rep(FALSE,times = length(background)) + names(background_logical) = background + cluster_logical = rep(TRUE,times = length(genes_cluster_oi)) + names(cluster_logical) = genes_cluster_oi + response = c(background_logical,cluster_logical) + } + return(list(name = paste0(setting_name,"_cluster_",i), from = setting_from, response = response)) +} +#' @title Predict activities of ligands in regulating expression of a gene set of interest +#' +#' @description \code{predict_ligand_activities} Predict activities of ligands in regulating expression of a gene set of interest. Ligand activities are defined as how well they predict the observed transcriptional response (i.e. gene set) according to the NicheNet model. +#' +#' @usage +#' predict_ligand_activities(geneset, background_expressed_genes,ligand_target_matrix, potential_ligands, single = TRUE,...) +#' +#' @param geneset Character vector of the gene symbols of genes of which the expression is potentially affected by ligands from the interacting cell. +#' @param background_expressed_genes Character vector of gene symbols of the background, non-affected, genes (can contain the symbols of the affected genes as well). +#' @param ligand_target_matrix The NicheNet ligand-target matrix denoting regulatory potential scores between ligands and targets (ligands in columns). +#' @param potential_ligands Character vector giving the gene symbols of the potentially active ligands you want to define ligand activities for. +#' @param single TRUE if you want to calculate ligand activity scores by considering every ligand individually (recommended). FALSE if you want to calculate ligand activity scores as variable importances of a multi-ligand classification model. +#' @param ... Additional parameters for get_multi_ligand_importances if single = FALSE. +#' +#' @return A tibble giving several ligand activity scores. Following columns in the tibble: $test_ligand, $auroc, $aupr and $pearson. +#' +#' @examples +#' \dontrun{ +#' weighted_networks = construct_weighted_networks(lr_network, sig_network, gr_network,source_weights_df) +#' ligands = list("TNF","BMP2","IL4") +#' ligand_target_matrix = construct_ligand_target_matrix(weighted_networks, ligands, ltf_cutoff = 0, algorithm = "PPR", damping_factor = 0.5, secondary_targets = FALSE) +#' potential_ligands = c("TNF","BMP2","IL4") +#' geneset = c("SOCS2","SOCS3", "IRF1") +#' background_expressed_genes = c("SOCS2","SOCS3","IRF1","ICAM1","ID1","ID2","ID3") +#' ligand_activities = predict_ligand_activities(geneset = geneset, background_expressed_genes = background_expressed_genes, ligand_target_matrix = ligand_target_matrix, potential_ligands = potential_ligands) +#' } +#' +#' @export +#' +predict_ligand_activities = function(geneset,background_expressed_genes,ligand_target_matrix, potential_ligands, single = TRUE,...){ + setting = list(geneset) %>% + lapply(convert_gene_list_settings_evaluation, name = "gene set", ligands_oi = potential_ligands, background = background_expressed_genes) + if (single == TRUE){ + settings_ligand_prediction = setting %>% + convert_settings_ligand_prediction(all_ligands = potential_ligands, validation = FALSE, single = TRUE) + ligand_importances = settings_ligand_prediction %>% lapply(get_single_ligand_importances,ligand_target_matrix = ligand_target_matrix, known = FALSE) %>% bind_rows() + + } else { + settings_ligand_prediction = setting %>% + convert_settings_ligand_prediction(all_ligands = potential_ligands, validation = FALSE, single = FALSE) + ligand_importances = settings_ligand_prediction %>% lapply(get_multi_ligand_importances,ligand_target_matrix = ligand_target_matrix, known = FALSE, ...) %>% bind_rows() + + } + return(ligand_importances %>% select(test_ligand,auroc,aupr,aupr_corrected, pearson)) +} +#' @title Infer weighted active ligand-target links between a possible ligand and target genes of interest +#' +#' @description \code{get_weighted_ligand_target_links} Infer active ligand target links between possible lignands and genes belonging to a gene set of interest: consider the intersect between the top n targets of a ligand and the gene set. +#' +#' @usage +#' get_weighted_ligand_target_links(ligand, geneset,ligand_target_matrix,n = 250) +#' +#' @param geneset Character vector of the gene symbols of genes of which the expression is potentially affected by ligands from the interacting cell. +#' @param ligand Character vector giving the gene symbols of the potentially active ligand for which you want to find target genes. +#' @param n The top n of targets per ligand that will be considered. Default: 250. +#' @inheritParams predict_ligand_activities +#' +#' @return A tibble with columns ligand, target and weight (i.e. regulatory potential score). +#' +#' @examples +#' \dontrun{ +#' weighted_networks = construct_weighted_networks(lr_network, sig_network, gr_network,source_weights_df) +#' ligands = list("TNF","BMP2","IL4") +#' ligand_target_matrix = construct_ligand_target_matrix(weighted_networks, ligands, ltf_cutoff = 0, algorithm = "PPR", damping_factor = 0.5, secondary_targets = FALSE) +#' potential_ligand = "TNF" +#' geneset = c("SOCS2","SOCS3", "IRF1") +#' active_ligand_target_links_df = get_weighted_ligand_target_links(ligand = potential_ligand, geneset = geneset, ligand_target_matrix = ligand_target_matrix, n = 250) +#' } +#' +#' @export +#' +get_weighted_ligand_target_links = function(ligand, geneset,ligand_target_matrix,n = 250){ + top_n_score = ligand_target_matrix[,ligand] %>% sort(decreasing = T) %>% head(n) %>% min() + targets = intersect(ligand_target_matrix[,ligand] %>% .[. >= top_n_score ] %>% names(),geneset) + if (length(targets) == 0){ + ligand_target_weighted_df = tibble(ligand = ligand, target = NA, weight = NA) + } else if (length(targets) == 1) { + ligand_target_weighted_df = tibble(ligand = ligand, target = targets, weight = ligand_target_matrix[targets,ligand]) + } else { + ligand_target_weighted_df = tibble(ligand = ligand, target = names(ligand_target_matrix[targets,ligand])) %>% inner_join(tibble(target = names(ligand_target_matrix[targets,ligand]), weight = ligand_target_matrix[targets,ligand]), by = "target") + } + return(ligand_target_weighted_df) +} +#' @title Prepare heatmap visualization of the ligand-target links starting from a ligand-target tibble. +#' +#' @description \code{prepare_ligand_target_visualization} Prepare heatmap visualization of the ligand-target links starting from a ligand-target tibble. Get regulatory potential scores between all pairs of ligands and targets documented in this tibble. For better visualization, we propose to define a quantile cutoff on the ligand-target scores. +#' +#' @usage +#' prepare_ligand_target_visualization(ligand_target_df, ligand_target_matrix, cutoff = 0.25) +#' +#' @param cutoff Quantile cutoff on the ligand-target scores of the input weighted ligand-target network. Scores under this cutoff will be set to 0. +#' @param ligand_target_df Tibble with columns 'ligand', 'target' and 'weight' to indicate ligand-target regulatory potential scores of interest. +#' @inheritParams predict_ligand_activities +#' +#' @return A matrix giving the ligand-target regulatory potential scores between ligands of interest and their targets genes part of the gene set of interest. +#' +#' @examples +#' \dontrun{ +#' weighted_networks = construct_weighted_networks(lr_network, sig_network, gr_network,source_weights_df) +#' ligands = list("TNF","BMP2","IL4") +#' ligand_target_matrix = construct_ligand_target_matrix(weighted_networks, ligands, ltf_cutoff = 0, algorithm = "PPR", damping_factor = 0.5, secondary_targets = FALSE) +#' geneset = c("SOCS2","SOCS3", "IRF1") +#' background_expressed_genes = c("SOCS2","SOCS3","IRF1","ICAM1","ID1","ID2","ID3") +#' active_ligand_target_links_df = potential_ligands %>% lapply(get_weighted_ligand_target_links, geneset = geneset, ligand_target_matrix = ligand_target_matrix, n = 250) %>% bind_rows() +#' active_ligand_target_links = prepare_ligand_target_visualization(ligand_target_df = active_ligand_target_links_df, ligand_target_matrix = ligand_target_matrix, cutoff = 0.25) +#' } +#' +#' @export +#' +prepare_ligand_target_visualization = function(ligand_target_df, ligand_target_matrix, cutoff = 0.25){ + + # define a cutoff on the ligand-target links + cutoff_include_all_ligands = ligand_target_df$weight %>% quantile(cutoff) + + # give a score of 0 to ligand-target links not higher than the defined cutoff + ligand_target_matrix_oi = ligand_target_matrix + ligand_target_matrix_oi[ligand_target_matrix_oi < cutoff_include_all_ligands] = 0 + + # consider only targets belonging to the top250 targets of individual ligands and with at least one ligand-link with score higher than the defined cutoff + ligand_target_vis = ligand_target_matrix_oi[ligand_target_df$target %>% unique(),ligand_target_df$ligand %>% unique()] + dim(ligand_target_vis) = c(length(ligand_target_df$target %>% unique()), length(ligand_target_df$ligand %>% unique())) + all_targets = ligand_target_df$target %>% unique() + all_ligands = ligand_target_df$ligand %>% unique() + rownames(ligand_target_vis) = all_targets + colnames(ligand_target_vis) = all_ligands + + keep_targets = all_targets[ligand_target_vis %>% apply(1,sum) > 0] + keep_ligands = all_ligands[ligand_target_vis %>% apply(2,sum) > 0] + + + ligand_target_vis_filtered = ligand_target_vis[keep_targets,keep_ligands] + + + if(is.matrix(ligand_target_vis_filtered)){ + rownames(ligand_target_vis_filtered) = keep_targets + colnames(ligand_target_vis_filtered) = keep_ligands + + } else { + dim(ligand_target_vis_filtered) = c(length(keep_targets), length(keep_ligands)) + rownames(ligand_target_vis_filtered) = keep_targets + colnames(ligand_target_vis_filtered) = keep_ligands + } + + if(nrow(ligand_target_vis_filtered) > 1 & ncol(ligand_target_vis_filtered) > 1){ + distoi = dist(1-cor(t(ligand_target_vis_filtered))) + hclust_obj = hclust(distoi, method = "ward.D2") + order_targets = hclust_obj$labels[hclust_obj$order] + + distoi_targets = dist(1-cor(ligand_target_vis_filtered)) + hclust_obj = hclust(distoi_targets, method = "ward.D2") + order_ligands = hclust_obj$labels[hclust_obj$order] + + } else { + order_targets = rownames(ligand_target_vis_filtered) + order_ligands = colnames(ligand_target_vis_filtered) + } + + vis_ligand_target_network = ligand_target_vis_filtered[order_targets,order_ligands] + dim(vis_ligand_target_network) = c(length(order_targets), length(order_ligands)) + rownames(vis_ligand_target_network) = order_targets + colnames(vis_ligand_target_network) = order_ligands + return(vis_ligand_target_network) + +} +#' @title Assess probability that a target gene belongs to the geneset based on a multi-ligand random forest model +#' +#' @description \code{assess_rf_class_probabilities} Assess probability that a target gene belongs to the geneset based on a multi-ligand random forest model (with cross-validation). Target genes and background genes will be split in different groups in a stratified way. +#' +#' @usage +#' assess_rf_class_probabilities(round,folds,geneset,background_expressed_genes,ligands_oi,ligand_target_matrix) +#' +#' @param ligands_oi Character vector giving the gene symbols of the ligands you want to build the multi-ligand with. +#' @param round Integer describing which fold of the cross-validation scheme it is. +#' @param folds Integer describing how many folds should be used. +#' @inheritParams predict_ligand_activities +#' +#' @return A tibble with columns: $gene, $response, $prediction. Response indicates whether the gene belongs to the geneset of interest, prediction gives the probability this gene belongs to the geneset according to the random forest model. +#' +#' @examples +#' \dontrun{ +#' weighted_networks = construct_weighted_networks(lr_network, sig_network, gr_network,source_weights_df) +#' ligands = list("TNF","BMP2","IL4") +#' ligand_target_matrix = construct_ligand_target_matrix(weighted_networks, ligands, ltf_cutoff = 0, algorithm = "PPR", damping_factor = 0.5, secondary_targets = FALSE) +#' potential_ligands = c("TNF","BMP2","IL4") +#' geneset = c("SOCS2","SOCS3", "IRF1") +#' background_expressed_genes = c("SOCS2","SOCS3","IRF1","ICAM1","ID1","ID2","ID3") +#' fold1_rf_prob = assess_rf_class_probabilities(round = 1,folds = 2,geneset = geneset,background_expressed_genes = background_expressed_genes ,ligands_oi = potential_ligands,ligand_target_matrix = ligand_target_matrix) +#' } +#' +#' @export +#' +assess_rf_class_probabilities = function(round,folds,geneset,background_expressed_genes,ligands_oi, ligand_target_matrix){ + set.seed(round) + geneset_shuffled = sample(geneset, size = length(geneset)) + geneset_grouped = split(geneset_shuffled,1:folds) + + strict_background_expressed_genes = background_expressed_genes[!background_expressed_genes %in% geneset] + set.seed(round) + strict_background_expressed_genes_shuffled = sample(strict_background_expressed_genes, size = length(strict_background_expressed_genes)) + strict_background_expressed_genes_grouped = split(strict_background_expressed_genes_shuffled,1:folds) + + geneset_predictions_all = seq(length(geneset_grouped)) %>% lapply(rf_target_prediction,geneset_grouped,strict_background_expressed_genes_grouped,ligands_oi,ligand_target_matrix) %>% bind_rows() + geneset_predictions_all = geneset_predictions_all %>% mutate(response = gsub("\\.","",response) %>% as.logical()) +} +#' @title Assess how well classification predictions accord to the expected response +#' +#' @description \code{classification_evaluation_continuous_pred_wrapper} Assess how well classification predictions accord to the expected response. +#' +#' @usage +#' classification_evaluation_continuous_pred_wrapper(response_prediction_tibble) +#' +#' @param response_prediction_tibble Tibble with columns "response" and "prediction" (e.g. output of function `assess_rf_class_probabilities`) +#' +#' @return A tibble showing several classification evaluation metrics. +#' +#' @examples +#' \dontrun{ +#' weighted_networks = construct_weighted_networks(lr_network, sig_network, gr_network,source_weights_df) +#' ligands = list("TNF","BMP2","IL4") +#' ligand_target_matrix = construct_ligand_target_matrix(weighted_networks, ligands, ltf_cutoff = 0, algorithm = "PPR", damping_factor = 0.5, secondary_targets = FALSE) +#' potential_ligands = c("TNF","BMP2","IL4") +#' geneset = c("SOCS2","SOCS3", "IRF1") +#' background_expressed_genes = c("SOCS2","SOCS3","IRF1","ICAM1","ID1","ID2","ID3") +#' fold1_rf_prob = assess_rf_class_probabilities(round = 1,folds = 2,geneset = geneset,background_expressed_genes = background_expressed_genes ,ligands_oi = potential_ligands,ligand_target_matrix = ligand_target_matrix) +# classification_evaluation_continuous_pred_wrapper(fold1_rf_prob) +#' } +#' +#' @export +#' +classification_evaluation_continuous_pred_wrapper = function(response_prediction_tibble) { + prediction_performances = classification_evaluation_continuous_pred(response_prediction_tibble$prediction, response_prediction_tibble$response, iregulon = FALSE) + return(prediction_performances) +} +#' @title Find which genes were among the top-predicted targets genes in a specific cross-validation round and see whether these genes belong to the gene set of interest as well. +#' +#' @description \code{get_top_predicted_genes} Find which genes were among the top-predicted targets genes in a specific cross-validation round and see whether these genes belong to the gene set of interest as well. +#' +#' @usage +#' get_top_predicted_genes(round,gene_prediction_list, quantile_cutoff = 0.95) +#' +#' @param gene_prediction_list List with per round of cross-validation: a tibble with columns "gene", "prediction" and "response" (e.g. output of function `assess_rf_class_probabilities`) +#' @param round Integer describing which fold of the cross-validation scheme it is. +#' @param quantile_cutoff Quantile of which genes should be considered as top-predicted targets. Default: 0.95, thus considering the top 5 percent predicted genes as predicted targets. +#' +#' @return A tibble indicating for every gene whether it belongs to the geneset and whether it belongs to the top-predicted genes in a specific cross-validation round. +#' +#' @examples +#' \dontrun{ +#' weighted_networks = construct_weighted_networks(lr_network, sig_network, gr_network,source_weights_df) +#' ligands = list("TNF","BMP2","IL4") +#' ligand_target_matrix = construct_ligand_target_matrix(weighted_networks, ligands, ltf_cutoff = 0, algorithm = "PPR", damping_factor = 0.5, secondary_targets = FALSE) +#' potential_ligands = c("TNF","BMP2","IL4") +#' geneset = c("SOCS2","SOCS3", "IRF1") +#' background_expressed_genes = c("SOCS2","SOCS3","IRF1","ICAM1","ID1","ID2","ID3") +#' gene_predictions_list = seq(2) %>% lapply(assess_rf_class_probabilities,2, geneset = geneset,background_expressed_genes = background_expressed_genes,ligands_oi = potential_ligands,ligand_target_matrix = ligand_target_matrix) +#' seq(length(gene_predictions_list)) %>% lapply(get_top_predicted_genes,gene_predictions_list) +#' } +#' +#' @export +#' +get_top_predicted_genes = function(round,gene_prediction_list, quantile_cutoff = 0.95){ + affected_gene_predictions = gene_prediction_list[[round]] + predicted_positive = affected_gene_predictions %>% + arrange(-prediction) %>% + mutate(predicted_top_target = prediction >= quantile(prediction,quantile_cutoff)) %>% + filter(predicted_top_target) %>% rename(true_target = response) %>% + select(gene,true_target,predicted_top_target) + colnames(predicted_positive) = c("gene","true_target",paste0("predicted_top_target_round",round)) + return(predicted_positive) +} +#' @title Determine the fraction of genes belonging to the geneset or background and to the top-predicted genes. +#' +#' @description \code{calculate_fraction_top_predicted} Defines the fraction of genes belonging to the geneset or background and to the top-predicted genes. +#' +#' @usage +#' calculate_fraction_top_predicted(affected_gene_predictions, quantile_cutoff = 0.95) +#' +#' @param affected_gene_predictions Tibble with columns "gene", "prediction" and "response" (e.g. output of function `assess_rf_class_probabilities`) +#' @param quantile_cutoff Quantile of which genes should be considered as top-predicted targets. Default: 0.95, thus considering the top 5 percent predicted genes as predicted targets. +#' +#' @return A tibble indicating the number of genes belonging to the gene set of interest or background (true_target column), the number and fraction of genes of these gruops that were part of the top predicted targets in a specific cross-validation round. +#' +#' @examples +#' \dontrun{ +#' weighted_networks = construct_weighted_networks(lr_network, sig_network, gr_network,source_weights_df) +#' ligands = list("TNF","BMP2","IL4") +#' ligand_target_matrix = construct_ligand_target_matrix(weighted_networks, ligands, ltf_cutoff = 0, algorithm = "PPR", damping_factor = 0.5, secondary_targets = FALSE) +#' potential_ligands = c("TNF","BMP2","IL4") +#' geneset = c("SOCS2","SOCS3", "IRF1") +#' background_expressed_genes = c("SOCS2","SOCS3","IRF1","ICAM1","ID1","ID2","ID3") +#' gene_predictions_list = seq(2) %>% lapply(assess_rf_class_probabilities,2, geneset = geneset,background_expressed_genes = background_expressed_genes,ligands_oi = potential_ligands,ligand_target_matrix = ligand_target_matrix) +#' target_prediction_performances_discrete_cv = gene_predictions_list %>% lapply(calculate_fraction_top_predicted) %>% bind_rows() %>% ungroup() %>% mutate(round=rep(1:length(gene_predictions_list), each = 2)) + +#' } +#' +#' @export +#' +calculate_fraction_top_predicted = function(affected_gene_predictions, quantile_cutoff = 0.95){ + predicted_positive = affected_gene_predictions %>% arrange(-prediction) %>% filter(prediction >= quantile(prediction,quantile_cutoff)) %>% group_by(response) %>% count() %>% rename(positive_prediction = n) %>% rename(true_target = response) + all = affected_gene_predictions %>% arrange(-prediction) %>% rename(true_target = response) %>% group_by(true_target) %>% count() + inner_join(all,predicted_positive, by = "true_target") %>% mutate(fraction_positive_predicted = positive_prediction/n) +} +#' @title Perform a Fisher's exact test to determine whether genes belonging to the gene set of interest are more likely to be part of the top-predicted targets. +#' +#' @description \code{calculate_fraction_top_predicted_fisher} Performs a Fisher's exact test to determine whether genes belonging to the gene set of interest are more likely to be part of the top-predicted targets. +#' +#' @usage +#' calculate_fraction_top_predicted_fisher(affected_gene_predictions, quantile_cutoff = 0.95, p_value_output = TRUE) +#' +#' @param p_value_output Should total summary or p-value be returned as output? Default: TRUE. +#' @inheritParams calculate_fraction_top_predicted +#' +#' @return Summary of the Fisher's exact test or just the p-value +#' +#' @examples +#' \dontrun{ +#' weighted_networks = construct_weighted_networks(lr_network, sig_network, gr_network,source_weights_df) +#' ligands = list("TNF","BMP2","IL4") +#' ligand_target_matrix = construct_ligand_target_matrix(weighted_networks, ligands, ltf_cutoff = 0, algorithm = "PPR", damping_factor = 0.5, secondary_targets = FALSE) +#' potential_ligands = c("TNF","BMP2","IL4") +#' geneset = c("SOCS2","SOCS3", "IRF1") +#' background_expressed_genes = c("SOCS2","SOCS3","IRF1","ICAM1","ID1","ID2","ID3") +#' gene_predictions_list = seq(2) %>% lapply(assess_rf_class_probabilities,2, geneset = geneset,background_expressed_genes = background_expressed_genes,ligands_oi = potential_ligands,ligand_target_matrix = ligand_target_matrix) +#' target_prediction_performances_fisher_pval = gene_predictions_list %>% lapply(calculate_fraction_top_predicted_fisher) %>% unlist() %>% mean() +#' } +#' +#' @export +#' +calculate_fraction_top_predicted_fisher = function(affected_gene_predictions, quantile_cutoff = 0.95, p_value_output = TRUE){ + predicted_positive = affected_gene_predictions %>% arrange(-prediction) %>% filter(prediction >= quantile(prediction,quantile_cutoff)) %>% group_by(response) %>% count() %>% rename(positive_prediction = n) + all = affected_gene_predictions %>% arrange(-prediction) %>% group_by(response) %>% count() + results_df = inner_join(all,predicted_positive, by = "response") %>% mutate(fraction_positive_predicted = positive_prediction/n) + tp = results_df %>% filter(response == TRUE) %>% .$positive_prediction + fp = results_df %>% filter(response == FALSE) %>% .$positive_prediction + fn = (results_df %>% filter(response == TRUE) %>% .$n) - (results_df %>% filter(response == TRUE) %>% .$positive_prediction) + tn = (results_df %>% filter(response == FALSE) %>% .$n) - (results_df %>% filter(response == FALSE) %>% .$positive_prediction) + contingency_table = matrix(c(tp,fp,fn,tn), nrow = 2,dimnames = list(c("geneset", "background"), c("top-predicted", "no-top-predicted"))) + summary = fisher.test(contingency_table, alternative = "greater") + if(p_value_output == TRUE){ + return(summary$p.value) + } else { + return(summary) + } +} +#' @title Cut off outer quantiles and rescale to a [0, 1] range +#' +#' @description \code{scale_quantile} Cut off outer quantiles and rescale to a [0, 1] range +#' +#' @usage +#' scale_quantile(x, outlier_cutoff = .05) +#' +#' @param x A numeric vector, matrix or data frame. +#' @param outlier_cutoff The quantile cutoff for outliers (default 0.05). +#' +#' @return The centered, scaled matrix or vector. The numeric centering and scalings used are returned as attributes. +#' +#' @examples +#' \dontrun{ +#' ## Generate a matrix from a normal distribution +#' ## with a large standard deviation, centered at c(5, 5) +#' x <- matrix(rnorm(200*2, sd = 10, mean = 5), ncol = 2) +#' +#' ## Scale the dataset between [0,1] +#' x_scaled <- scale_quantile(x) +#' +#' ## Show ranges of each column +#' apply(x_scaled, 2, range) +#' } +#' @export +scale_quantile <- function(x, outlier_cutoff = .05) { + # same function as scale_quantile from dynutils (copied here for use in vignette to avoid having dynutils as dependency) + # credits to the amazing (w/z)outer and r(obrecht)cannood(t) from dynverse (https://github.com/dynverse)! + if (is.null(dim(x))) { + sc <- scale_quantile(matrix(x, ncol = 1), outlier_cutoff = outlier_cutoff) + out <- sc[,1] + names(out) <- names(x) + attr(out, "addend") <- attr(sc, "addend") + attr(out, "multiplier") <- attr(sc, "multiplier") + out + } else { + quants <- apply(x, 2, stats::quantile, c(outlier_cutoff, 1 - outlier_cutoff), na.rm = TRUE) + + addend <- -quants[1,] + divisor <- apply(quants, 2, diff) + divisor[divisor == 0] <- 1 + + apply_quantile_scale(x, addend, 1 / divisor) + } +} +#' @title Prepare single-cell expression data to perform ligand activity analysis +#' +#' @description \code{convert_single_cell_expression_to_settings} Prepare single-cell expression data to perform ligand activity analysis +#' +#' @usage +#' convert_single_cell_expression_to_settings(cell_id, expression_matrix, setting_name, setting_from, regression = FALSE) +#' +#' @param cell_id Identity of the cell of interest +#' @param setting_name Name of the dataset +#' @param expression_matrix Gene expression matrix of single-cells +#' @param setting_from Character vector giving the gene symbols of the potentially active ligands you want to define ligand activities for. +#' @param regression Perform regression-based ligand activity analysis (TRUE) or classification-based ligand activity analysis (FALSE) by considering the genes expressed higher than the 0.975 quantiles as genes of interest. Default: FALSE. +#' +#' @return A list with slots $name, $from and $response respectively containing the setting name, potentially active ligands and the response to predict (whether genes belong to gene set of interest; i.e. most strongly expressed genes in a cell) +#' +#' @examples +#' \dontrun{ +#' weighted_networks = construct_weighted_networks(lr_network, sig_network, gr_network,source_weights_df) +#' ligands = list("TNF","BMP2","IL4") +#' ligand_target_matrix = construct_ligand_target_matrix(weighted_networks, ligands, ltf_cutoff = 0, algorithm = "PPR", damping_factor = 0.5, secondary_targets = FALSE) +#' potential_ligands = c("TNF","BMP2","IL4") +#' genes = c("SOCS2","SOCS3","IRF1","ICAM1","ID1","ID2","ID3") +#' cell_ids = c("cell1","cell2") +#' expression_scaled = matrix(rnorm(length(genes)*2, sd = 0.5, mean = 0.5), nrow = 2) +#' rownames(expression_scaled) = cell_ids +#' colnames(expression_scaled) = genes +#' settings = convert_single_cell_expression_to_settings(cell_id = cell_ids[1], expression_matrix = expression_scaled, setting_name = "test", setting_from = potential_ligands) +#' } +#' +#' @export +#' +convert_single_cell_expression_to_settings = function(cell_id, expression_matrix, setting_name, setting_from, regression = FALSE){ + # input check + requireNamespace("dplyr") + + if (regression == TRUE){ + response = expression_matrix[cell_id,] + } else { + response_continuous = expression_matrix[cell_id,] + response = response_continuous >= quantile(response_continuous,0.975) + } + return(list(name = paste0(setting_name,"_",cell_id), from = setting_from, response = response)) +} +#' @title Single-cell ligand activity prediction +#' +#' @description \code{predict_single_cell_ligand_activities} For every individual cell of interest, predict activities of ligands in regulating expression of genes that are stronger expressed in that cell compared to other cells (0.975 quantile). Ligand activities are defined as how well they predict the observed transcriptional response (i.e. gene set) according to the NicheNet model. +#' +#' @usage +#' predict_single_cell_ligand_activities(cell_ids, expression_scaled,ligand_target_matrix, potential_ligands, single = TRUE,...) +#' +#' @param cell_ids Identities of cells for which the ligand activities should be calculated. +#' @param expression_scaled Scaled expression matrix of single-cells (scaled such that high values indicate that a gene is stronger expressed in that cell compared to others) +#' @param ligand_target_matrix The NicheNet ligand-target matrix denoting regulatory potential scores between ligands and targets (ligands in columns). +#' @param potential_ligands Character vector giving the gene symbols of the potentially active ligands you want to define ligand activities for. +#' @param single TRUE if you want to calculate ligand activity scores by considering every ligand individually (recommended). FALSE if you want to calculate ligand activity scores as variable importances of a multi-ligand classification model. +#' @param ... Additional parameters for get_multi_ligand_importances if single = FALSE. +#' +#' @return A tibble giving several ligand activity scores for single cells. Following columns in the tibble: $setting, $test_ligand, $auroc, $aupr and $pearson. +#' +#' @examples +#' \dontrun{ +#' weighted_networks = construct_weighted_networks(lr_network, sig_network, gr_network,source_weights_df) +#' ligands = list("TNF","BMP2","IL4") +#' ligand_target_matrix = construct_ligand_target_matrix(weighted_networks, ligands, ltf_cutoff = 0, algorithm = "PPR", damping_factor = 0.5, secondary_targets = FALSE) +#' potential_ligands = c("TNF","BMP2","IL4") +#' genes = c("SOCS2","SOCS3","IRF1","ICAM1","ID1","ID2","ID3") +#' cell_ids = c("cell1","cell2") +#' expression_scaled = matrix(rnorm(length(genes)*2, sd = 0.5, mean = 0.5), nrow = 2) +#' rownames(expression_scaled) = cell_ids +#' colnames(expression_scaled) = genes +#' ligand_activities = predict_single_cell_ligand_activities(cell_ids = cell_ids, expression_scaled = expression_scaled, ligand_target_matrix = ligand_target_matrix, potential_ligands = potential_ligands) +#' } +#' +#' @export +#' +predict_single_cell_ligand_activities = function(cell_ids, expression_scaled,ligand_target_matrix, potential_ligands, single = TRUE,...){ + settings_single_cell_ligand_pred = cell_ids %>% lapply(convert_single_cell_expression_to_settings, expression_scaled, "", potential_ligands) + if (single == TRUE){ + settings_ligand_prediction = settings_single_cell_ligand_pred %>% convert_settings_ligand_prediction(all_ligands = potential_ligands, validation = FALSE, single = TRUE) + + ligand_importances = settings_ligand_prediction %>% lapply(get_single_ligand_importances,ligand_target_matrix = ligand_target_matrix, known = FALSE) %>% bind_rows() %>% mutate(setting = gsub("^_","",setting)) + + } else { + settings_ligand_prediction = settings_single_cell_ligand_pred %>% convert_settings_ligand_prediction(all_ligands = potential_ligands, validation = FALSE, single = FALSE) + + ligand_importances = settings_ligand_prediction %>% lapply(get_multi_ligand_importances,ligand_target_matrix = ligand_target_matrix, known = FALSE, ...) %>% bind_rows() %>% mutate(setting = gsub("^_","",setting)) + + } + return(ligand_importances %>% select(setting,test_ligand,auroc,aupr,pearson)) +} +#' @title Normalize single-cell ligand activities +#' +#' @description \code{normalize_single_cell_ligand_activities} Normalize single-cell ligand activities to make ligand activities over different cells comparable. +#' @usage +#' normalize_single_cell_ligand_activities(ligand_activities) +#' +#' @param ligand_activities Output from the function `predict_single_cell_ligand_activities`. +#' +#' @return A tibble giving the normalized ligand activity scores for single cells. Following columns in the tibble: $cell, $ligand, $pearson, which is the normalized ligand activity value. +#' +#' @examples +#' \dontrun{ +#' weighted_networks = construct_weighted_networks(lr_network, sig_network, gr_network,source_weights_df) +#' ligands = list("TNF","BMP2","IL4") +#' ligand_target_matrix = construct_ligand_target_matrix(weighted_networks, ligands, ltf_cutoff = 0, algorithm = "PPR", damping_factor = 0.5, secondary_targets = FALSE) +#' potential_ligands = c("TNF","BMP2","IL4") +#' genes = c("SOCS2","SOCS3","IRF1","ICAM1","ID1","ID2","ID3") +#' cell_ids = c("cell1","cell2") +#' expression_scaled = matrix(rnorm(length(genes)*2, sd = 0.5, mean = 0.5), nrow = 2) +#' rownames(expression_scaled) = cell_ids +#' colnames(expression_scaled) = genes +#' ligand_activities = predict_single_cell_ligand_activities(cell_ids = cell_ids, expression_scaled = expression_scaled, ligand_target_matrix = ligand_target_matrix, potential_ligands = potential_ligands) +#' normalized_ligand_activities = normalize_single_cell_ligand_activities(ligand_activities) +#' } +#' +#' @export +#' +normalize_single_cell_ligand_activities = function(ligand_activities){ + single_ligand_activities_aupr_norm = ligand_activities %>% + group_by(setting) %>% + mutate(aupr = nichenetr::scaling_modified_zscore(aupr)) %>% + ungroup() %>% + rename(cell = setting, ligand = test_ligand) %>% + distinct(cell,ligand,aupr) + + single_ligand_activities_aupr_norm_df = single_ligand_activities_aupr_norm %>% + spread(cell, aupr,fill = min(.$aupr)) + + single_ligand_activities_aupr_norm_matrix = single_ligand_activities_aupr_norm_df %>% + select(-ligand) %>% + t() %>% + magrittr::set_colnames(single_ligand_activities_aupr_norm_df$ligand) + + single_ligand_activities_aupr_norm_df = single_ligand_activities_aupr_norm_matrix %>% + data.frame() %>% + rownames_to_column("cell") %>% + as_tibble() +} +#' @title Perform a correlation and regression analysis between cells' ligand activities and property scores of interest +#' +#' @description \code{single_ligand_activity_score_regression} Performs a correlation and regression analysis between cells' ligand activities and property scores of interest. +#' @usage +#' single_ligand_activity_score_regression(ligand_activities, scores_tbl) +#' +#' @param ligand_activities Output from the function `normalize_single_cell_ligand_activities`. +#' @param scores_tbl a tibble containing scores for every cell (columns: $cell and $score). The score should correspond to the property of interest +#' +#' @return A tibble giving for every ligand, the correlation/regression coefficients giving information about the relation between its activity and the property of interest. +#' +#' @examples +#' \dontrun{ +#' weighted_networks = construct_weighted_networks(lr_network, sig_network, gr_network,source_weights_df) +#' ligands = list("TNF","BMP2","IL4") +#' ligand_target_matrix = construct_ligand_target_matrix(weighted_networks, ligands, ltf_cutoff = 0, algorithm = "PPR", damping_factor = 0.5, secondary_targets = FALSE) +#' potential_ligands = c("TNF","BMP2","IL4") +#' genes = c("SOCS2","SOCS3","IRF1","ICAM1","ID1","ID2","ID3") +#' cell_ids = c("cell1","cell2") +#' expression_scaled = matrix(rnorm(length(genes)*2, sd = 0.5, mean = 0.5), nrow = 2) +#' rownames(expression_scaled) = cell_ids +#' colnames(expression_scaled) = genes +#' ligand_activities = predict_single_cell_ligand_activities(cell_ids = cell_ids, expression_scaled = expression_scaled, ligand_target_matrix = ligand_target_matrix, potential_ligands = potential_ligands) +#' normalized_ligand_activities = normalize_single_cell_ligand_activities(ligand_activities) +#' cell_scores_tbl = tibble(cell = cell_ids, score = c(1,4)) +#' regression_analysis_output = single_ligand_activity_score_regression(normalized_ligand_activities,cell_scores_tbl) +#' } +#' +#' @export +#' +single_ligand_activity_score_regression = function(ligand_activities, scores_tbl){ + combined = inner_join(scores_tbl,ligand_activities) + output = lapply(combined %>% select(-cell, -score), function(activity_prediction, combined){ + geneset_score = combined$score + metrics = regression_evaluation(activity_prediction,geneset_score) + }, combined) + ligands = names(output) + output_df = output %>% bind_rows() %>% mutate(ligand = ligands) + return(output_df) +} +#' @title Assess how well cells' ligand activities predict a binary property of interest of cells. +#' +#' @description \code{single_ligand_activity_score_classification} Evaluates classification performances: it assesses how well cells' ligand activities can predict a binary property of interest. +#' @usage +#' single_ligand_activity_score_classification(ligand_activities, scores_tbl) +#' +#' @param ligand_activities Output from the function `normalize_single_cell_ligand_activities`. +#' @param scores_tbl a tibble indicating for every cell whether the property of interests holds TRUE or FALSE (columns: $cell: character vector with cell ids and $score: logical vector according to property of interest). +#' +#' @return A tibble giving for every ligand, the classification performance metrics giving information about the relation between its activity and the property of interest. +#' +#' @examples +#' \dontrun{ +#' weighted_networks = construct_weighted_networks(lr_network, sig_network, gr_network,source_weights_df) +#' ligands = list("TNF","BMP2","IL4") +#' ligand_target_matrix = construct_ligand_target_matrix(weighted_networks, ligands, ltf_cutoff = 0, algorithm = "PPR", damping_factor = 0.5, secondary_targets = FALSE) +#' potential_ligands = c("TNF","BMP2","IL4") +#' genes = c("SOCS2","SOCS3","IRF1","ICAM1","ID1","ID2","ID3") +#' cell_ids = c("cell1","cell2") +#' expression_scaled = matrix(rnorm(length(genes)*2, sd = 0.5, mean = 0.5), nrow = 2) +#' rownames(expression_scaled) = cell_ids +#' colnames(expression_scaled) = genes +#' ligand_activities = predict_single_cell_ligand_activities(cell_ids = cell_ids, expression_scaled = expression_scaled, ligand_target_matrix = ligand_target_matrix, potential_ligands = potential_ligands) +#' normalized_ligand_activities = normalize_single_cell_ligand_activities(ligand_activities) +#' cell_scores_tbl = tibble(cell = cell_ids, score = c(TRUE,FALSE)) +#' classification_analysis_output = single_ligand_activity_score_classification(normalized_ligand_activities,cell_scores_tbl) +#' } +#' +#' @export +#' +single_ligand_activity_score_classification = function(ligand_activities, scores_tbl){ + combined = inner_join(scores_tbl, ligand_activities) + output = lapply(combined %>% select(-cell, -score), function(activity_prediction, + combined) { + geneset_score = combined$score + + metrics = classification_evaluation_continuous_pred(activity_prediction, + geneset_score, iregulon = F) + }, combined) + + + ligands = names(output) + output_df = output %>% bind_rows() %>% mutate(ligand = ligands) + return(output_df) +} +single_ligand_activity_score_regression = function(ligand_activities, scores_tbl){ + combined = inner_join(scores_tbl,ligand_activities) + output = lapply(combined %>% select(-cell, -score), function(activity_prediction, combined){ + geneset_score = combined$score + metrics = regression_evaluation(activity_prediction,geneset_score) + }, combined) + ligands = names(output) + output_df = output %>% bind_rows() %>% mutate(ligand = ligands) + return(output_df) +} +#' @title Perform NicheNet analysis on Seurat object: explain DE between conditions +#' +#' @description \code{nichenet_seuratobj_aggregate} Perform NicheNet analysis on Seurat object: explain differential expression (DE) in a receiver celltype between two different conditions by ligands expressed by sender cells +#' @usage +#' nichenet_seuratobj_aggregate(receiver, seurat_obj, condition_colname, condition_oi, condition_reference, sender = "all",ligand_target_matrix,lr_network,weighted_networks,expression_pct = 0.10, lfc_cutoff = 0.25, geneset = "DE", filter_top_ligands = TRUE, top_n_ligands = 30,top_n_targets = 200, cutoff_visualization = 0.33,verbose = TRUE, assay_oi = NULL) +#' +#' @param receiver Name of cluster identity/identities of cells that are presumably affected by intercellular communication with other cells +#' @param seurat_obj Single-cell expression dataset as Seurat object https://satijalab.org/seurat/. +#' @param condition_colname Name of the column in the meta data dataframe that indicates which condition/sample cells were coming from. +#' @param condition_oi Condition of interest in which receiver cells were presumably affected by other cells. Should be a name present in the `condition_colname` column of the metadata. +#' @param condition_reference The second condition (e.g. reference or steady-state condition). Should be a name present in the `condition_colname` column of the metadata. +#' @param sender Determine the potential sender cells. Name of cluster identity/identities of cells that presumably affect expression in the receiver cell type. In case you want to look at all possible sender cell types in the data, you can give this argument the value "all". "all" indicates thus that all cell types in the dataset will be considered as possible sender cells. As final option, you could give this argument the value "undefined"."undefined" won't look at ligands expressed by sender cells, but at all ligands for which a corresponding receptor is expressed. This could be useful if the presumably active sender cell is not profiled. Default: "all". +#' @param expression_pct To determine ligands and receptors expressed by sender and receiver cells, we consider genes expressed if they are expressed in at least a specific fraction of cells of a cluster. This number indicates this fraction. Default: 0.10 +#' @param lfc_cutoff Cutoff on log fold change in the wilcoxon differential expression test. Default: 0.25. +#' @param geneset Indicate whether to consider all DE genes between condition 1 and 2 ("DE"), or only genes upregulated in condition 1 ("up"), or only genes downregulad in condition 1 ("down"). +#' @param filter_top_ligands Indicate whether output tables for ligand-target and ligand-receptor networks should be done for a filtered set of top ligands (TRUE) or for all ligands (FALSE). Default: TRUE. +#' @param top_n_ligands Indicate how many ligands should be extracted as top-ligands after ligand activity analysis. Only for these ligands, target genes and receptors will be returned. Default: 30. +#' @param top_n_targets To predict active, affected targets of the prioritized ligands, consider only DE genes if they also belong to the a priori top n ("top_n_targets") targets of a ligand. Default = 200. +#' @param cutoff_visualization Because almost no ligand-target scores have a regulatory potential score of 0, we clarify the heatmap visualization by giving the links with the lowest scores a score of 0. The cutoff_visualization paramter indicates this fraction of links that are given a score of zero. Default = 0.33. +#' @param ligand_target_matrix The NicheNet ligand-target matrix of the organism of interest denoting regulatory potential scores between ligands and targets (ligands in columns). +#' @param lr_network The ligand-receptor network (columns that should be present: $from, $to) of the organism of interest. +#' @param weighted_networks The NicheNet weighted networks of the organism of interest denoting interactions and their weights/confidences in the ligand-signaling and gene regulatory network. +#' @param verbose Print out the current analysis stage. Default: TRUE. +#' @inheritParams get_expressed_genes +#' +#' @return A list with the following elements: +#' $ligand_activities: data frame with output ligand activity analysis; +#' $top_ligands: top_n ligands based on ligand activity; +#' $top_targets: active, affected target genes of these ligands; +#' $top_receptors: receptors of these ligands; +#' $ligand_target_matrix: matrix indicating regulatory potential scores between active ligands and their predicted targets; +#' $ligand_target_heatmap: heatmap of ligand-target regulatory potential; +#' $ligand_target_df: data frame showing regulatory potential scores of predicted active ligand-target network; +#' $ligand_activity_target_heatmap: heatmap showing both ligand activity scores and target genes of these top ligands; +#' $ligand_expression_dotplot: expression dotplot of the top ligands; +#' $ligand_differential_expression_heatmap = differential expression heatmap of the top ligands; +#' $ligand_receptor_matrix: matrix of ligand-receptor interactions; +#' $ligand_receptor_heatmap: heatmap showing ligand-receptor interactions; +#' $ligand_receptor_df: data frame of ligand-receptor interactions; +#' $geneset_oi: a vector containing the set of genes used as input for the ligand activity analysis; +#' $background_expressed_genes: the background of genes to which the geneset will be compared in the ligand activity analysis. +#' +#' @import Seurat +#' @import dplyr +#' @importFrom magrittr set_rownames set_colnames +#' +#' @examples +#' \dontrun{ +#' seuratObj = readRDS(url("https://zenodo.org/record/3531889/files/seuratObj_test.rds")) +#' lr_network = readRDS(url("https://zenodo.org/record/7074291/files/lr_network_mouse_21122021.rds")) +#' ligand_target_matrix = readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final_mouse.rds")) +#' weighted_networks = readRDS(url("https://zenodo.org/record/7074291/files/weighted_networks_nsga2r_final_mouse.rds")) +#' nichenet_seuratobj_aggregate(receiver = "CD8 T", seurat_obj = seuratObj, condition_colname = "aggregate", condition_oi = "LCMV", condition_reference = "SS", sender = "Mono", ligand_target_matrix = ligand_target_matrix, lr_network = lr_network, weighted_networks = weighted_networks) +#' } +#' +#' @export +#' +nichenet_seuratobj_aggregate = function(receiver, seurat_obj, condition_colname, condition_oi, condition_reference, sender = "all",ligand_target_matrix,lr_network,weighted_networks, + expression_pct = 0.10, lfc_cutoff = 0.25, geneset = "DE", filter_top_ligands = TRUE ,top_n_ligands = 30, + top_n_targets = 200, cutoff_visualization = 0.33, + verbose = TRUE, assay_oi = NULL) +{ + requireNamespace("Seurat") + requireNamespace("dplyr") + + # input check + if(! "RNA" %in% names(seurat_obj@assays)){ + if ("Spatial" %in% names(seurat_obj@assays)){ + warning("You are going to apply NicheNet on a spatial seurat object. Be sure it's ok to use NicheNet the way you are planning to do it. So this means: you should have changes in gene expression in receiver cells caused by cell-cell interactions. Note that in the case of spatial transcriptomics, you are not dealing with single cells but with 'spots' containing multiple cells of the same of different cell types.") + + if (class(seurat_obj@assays$Spatial@data) != "matrix" & class(seurat_obj@assays$Spatial@data) != "dgCMatrix") { + warning("Spatial Seurat object should contain a matrix of normalized expression data. Check 'seurat_obj@assays$Spatial@data' for default or 'seurat_obj@assays$SCT@data' for when the single-cell transform pipeline was applied") + } + if (sum(dim(seurat_obj@assays$Spatial@data)) == 0) { + stop("Seurat object should contain normalized expression data (numeric matrix). Check 'seurat_obj@assays$Spatial@data'") + } + }} else { + if (class(seurat_obj@assays$RNA@data) != "matrix" & + class(seurat_obj@assays$RNA@data) != "dgCMatrix") { + warning("Seurat object should contain a matrix of normalized expression data. Check 'seurat_obj@assays$RNA@data' for default or 'seurat_obj@assays$integrated@data' for integrated data or seurat_obj@assays$SCT@data for when the single-cell transform pipeline was applied") + } + + if ("integrated" %in% names(seurat_obj@assays)) { + if (sum(dim(seurat_obj@assays$RNA@data)) == 0 & sum(dim(seurat_obj@assays$integrated@data)) == + 0) + stop("Seurat object should contain normalized expression data (numeric matrix). Check 'seurat_obj@assays$RNA@data' for default or 'seurat_obj@assays$integrated@data' for integrated data") + } + else if ("SCT" %in% names(seurat_obj@assays)) { + if (sum(dim(seurat_obj@assays$RNA@data)) == 0 & sum(dim(seurat_obj@assays$SCT@data)) == + 0) { + stop("Seurat object should contain normalized expression data (numeric matrix). Check 'seurat_obj@assays$RNA@data' for default or 'seurat_obj@assays$SCT@data' for data corrected via SCT") + } + } + else { + if (sum(dim(seurat_obj@assays$RNA@data)) == 0) { + stop("Seurat object should contain normalized expression data (numeric matrix). Check 'seurat_obj@assays$RNA@data'") + } + } + } + + if(!condition_colname %in% colnames(seurat_obj@meta.data)) + stop("Your column indicating the conditions/samples of interest should be in the metadata dataframe") + if(sum(condition_oi %in% c(seurat_obj[[condition_colname]] %>% unlist() %>% as.character() %>% unique())) != length(condition_oi)) + stop("condition_oi should be in the condition-indicating column") + if(sum(condition_reference %in% c(seurat_obj[[condition_colname]] %>% unlist() %>% as.character() %>% unique())) != length(condition_reference)) + stop("condition_reference should be in the condition-indicating column") + if(sum(receiver %in% unique(Idents(seurat_obj))) != length(receiver)) + stop("The defined receiver cell type should be an identity class of your seurat object") + if(length(sender) == 1){ + if(sender != "all" & sender != "undefined"){ + if(sum(sender %in% unique(Idents(seurat_obj))) != length(sender)){ + stop("The sender argument should be 'all' or 'undefined' or an identity class of your seurat object") + } + } + } else { + if(sum(sender %in% unique(Idents(seurat_obj))) != length(sender)){ + stop("The sender argument should be 'all' or 'undefined' or an identity class of your seurat object") + } + } + if(geneset != "DE" & geneset != "up" & geneset != "down") + stop("geneset should be 'DE', 'up' or 'down'") + if("integrated" %in% names(seurat_obj@assays)){ + warning("Seurat object is result from the Seurat integration workflow. Make sure that the way of defining expressed and differentially expressed genes in this wrapper is appropriate for your integrated data.") + } + # Read in and process NicheNet networks, define ligands and receptors + if (verbose == TRUE){print("Read in and process NicheNet's networks")} + weighted_networks_lr = weighted_networks$lr_sig %>% inner_join(lr_network %>% distinct(from,to), by = c("from","to")) + + ligands = lr_network %>% pull(from) %>% unique() + receptors = lr_network %>% pull(to) %>% unique() + if (verbose == TRUE){print("Define expressed ligands and receptors in receiver and sender cells")} + + # step1 nichenet analysis: get expressed genes in sender and receiver cells + + ## receiver + list_expressed_genes_receiver = receiver %>% unique() %>% lapply(get_expressed_genes, seurat_obj, expression_pct, assay_oi) + names(list_expressed_genes_receiver) = receiver %>% unique() + expressed_genes_receiver = list_expressed_genes_receiver %>% unlist() %>% unique() + + ## sender + if (length(sender) == 1){ + if (sender == "all"){ + sender_celltypes = Idents(seurat_obj) %>% levels() + list_expressed_genes_sender = sender_celltypes %>% lapply(get_expressed_genes, seurat_obj, expression_pct, assay_oi) + names(list_expressed_genes_sender) = sender_celltypes + expressed_genes_sender = list_expressed_genes_sender %>% unlist() %>% unique() + + } else if (sender == "undefined") { + if("integrated" %in% names(seurat_obj@assays)){ + expressed_genes_sender = union(seurat_obj@assays$integrated@data %>% rownames(),rownames(ligand_target_matrix)) %>% union(colnames(ligand_target_matrix)) + } else { + expressed_genes_sender = union(seurat_obj@assays$RNA@data %>% rownames(),rownames(ligand_target_matrix)) %>% union(colnames(ligand_target_matrix)) + } + } else if (sender != "all" & sender != "undefined") { + sender_celltypes = sender + list_expressed_genes_sender = sender_celltypes %>% unique() %>% lapply(get_expressed_genes, seurat_obj, expression_pct, assay_oi) + names(list_expressed_genes_sender) = sender_celltypes %>% unique() + expressed_genes_sender = list_expressed_genes_sender %>% unlist() %>% unique() + } + } else { + sender_celltypes = sender + list_expressed_genes_sender = sender_celltypes %>% unique() %>% lapply(get_expressed_genes, seurat_obj, expression_pct, assay_oi) + names(list_expressed_genes_sender) = sender_celltypes %>% unique() + expressed_genes_sender = list_expressed_genes_sender %>% unlist() %>% unique() + } + + # step2 nichenet analysis: define background and gene list of interest: here differential expression between two conditions of cell type of interest + if (verbose == TRUE){print("Perform DE analysis in receiver cell")} + + seurat_obj_receiver= subset(seurat_obj, idents = receiver) + seurat_obj_receiver = SetIdent(seurat_obj_receiver, value = seurat_obj_receiver[[condition_colname]]) + DE_table_receiver = FindMarkers(object = seurat_obj_receiver, ident.1 = condition_oi, ident.2 = condition_reference, min.pct = expression_pct) %>% rownames_to_column("gene") + + SeuratV4 = c("avg_log2FC") %in% colnames(DE_table_receiver) + + if(SeuratV4 == TRUE){ + if (geneset == "DE"){ + geneset_oi = DE_table_receiver %>% filter(p_val_adj <= 0.05 & abs(avg_log2FC) >= lfc_cutoff) %>% pull(gene) + } else if (geneset == "up") { + geneset_oi = DE_table_receiver %>% filter(p_val_adj <= 0.05 & avg_log2FC >= lfc_cutoff) %>% pull(gene) + } else if (geneset == "down") { + geneset_oi = DE_table_receiver %>% filter(p_val_adj <= 0.05 & avg_log2FC <= lfc_cutoff) %>% pull(gene) + } + } else { + if (geneset == "DE"){ + geneset_oi = DE_table_receiver %>% filter(p_val_adj <= 0.05 & abs(avg_logFC) >= lfc_cutoff) %>% pull(gene) + } else if (geneset == "up") { + geneset_oi = DE_table_receiver %>% filter(p_val_adj <= 0.05 & avg_logFC >= lfc_cutoff) %>% pull(gene) + } else if (geneset == "down") { + geneset_oi = DE_table_receiver %>% filter(p_val_adj <= 0.05 & avg_logFC <= lfc_cutoff) %>% pull(gene) + } + } + + + geneset_oi = geneset_oi %>% .[. %in% rownames(ligand_target_matrix)] + if (length(geneset_oi) == 0){ + stop("No genes were differentially expressed") + } + background_expressed_genes = expressed_genes_receiver %>% .[. %in% rownames(ligand_target_matrix)] + + # step3 nichenet analysis: define potential ligands + expressed_ligands = intersect(ligands,expressed_genes_sender) + expressed_receptors = intersect(receptors,expressed_genes_receiver) + if (length(expressed_ligands) == 0){ + stop("No ligands expressed in sender cell") + } + if (length(expressed_receptors) == 0){ + stop("No receptors expressed in receiver cell") + } + potential_ligands = lr_network %>% filter(from %in% expressed_ligands & to %in% expressed_receptors) %>% pull(from) %>% unique() + if (length(potential_ligands) == 0){ + stop("No potentially active ligands") + } + + + if (verbose == TRUE){print("Perform NicheNet ligand activity analysis")} + + # step4 perform NicheNet's ligand activity analysis + ligand_activities = predict_ligand_activities(geneset = geneset_oi, background_expressed_genes = background_expressed_genes, ligand_target_matrix = ligand_target_matrix, potential_ligands = potential_ligands) + ligand_activities = ligand_activities %>% + arrange(-aupr_corrected) %>% + mutate(rank = rank(desc(aupr_corrected))) + + if(filter_top_ligands == TRUE){ + best_upstream_ligands = ligand_activities %>% top_n(top_n_ligands, aupr_corrected) %>% arrange(-aupr_corrected) %>% pull(test_ligand) %>% unique() + } else { + best_upstream_ligands = ligand_activities %>% arrange(-aupr_corrected) %>% pull(test_ligand) %>% unique() + } + + if (verbose == TRUE){print("Infer active target genes of the prioritized ligands")} + + # step5 infer target genes of the top-ranked ligands + active_ligand_target_links_df = best_upstream_ligands %>% lapply(get_weighted_ligand_target_links,geneset = geneset_oi, ligand_target_matrix = ligand_target_matrix, n = top_n_targets) %>% bind_rows() %>% drop_na() + if(nrow(active_ligand_target_links_df) > 0){ + active_ligand_target_links = prepare_ligand_target_visualization(ligand_target_df = active_ligand_target_links_df, ligand_target_matrix = ligand_target_matrix, cutoff = cutoff_visualization) + order_ligands = intersect(best_upstream_ligands, colnames(active_ligand_target_links)) %>% rev() %>% make.names() + order_targets = active_ligand_target_links_df$target %>% unique() %>% intersect(rownames(active_ligand_target_links)) %>% make.names() + rownames(active_ligand_target_links) = rownames(active_ligand_target_links) %>% make.names() + colnames(active_ligand_target_links) = colnames(active_ligand_target_links) %>% make.names() + + order_targets = order_targets %>% intersect(rownames(active_ligand_target_links)) + order_ligands = order_ligands %>% intersect(colnames(active_ligand_target_links)) + + vis_ligand_target = active_ligand_target_links[order_targets,order_ligands,drop=FALSE] %>% t() + p_ligand_target_network = vis_ligand_target %>% make_heatmap_ggplot("Prioritized ligands","Predicted target genes", color = "purple",legend_position = "top", x_axis_position = "top",legend_title = "Regulatory potential") + theme(axis.text.x = element_text(face = "italic")) #+ scale_fill_gradient2(low = "whitesmoke", high = "purple", breaks = c(0,0.006,0.012)) + } else { + vis_ligand_target = NULL + p_ligand_target_network = NULL + print("no highly likely active targets found for top ligands") + } + # combined heatmap: overlay ligand activities + ligand_aupr_matrix = ligand_activities %>% select(aupr_corrected) %>% as.matrix() %>% magrittr::set_rownames(ligand_activities$test_ligand) + + rownames(ligand_aupr_matrix) = rownames(ligand_aupr_matrix) %>% make.names() + colnames(ligand_aupr_matrix) = colnames(ligand_aupr_matrix) %>% make.names() + + vis_ligand_aupr = ligand_aupr_matrix[order_ligands, ] %>% as.matrix(ncol = 1) %>% magrittr::set_colnames("AUPR") + p_ligand_aupr = vis_ligand_aupr %>% make_heatmap_ggplot("Prioritized ligands","Ligand activity", color = "darkorange",legend_position = "top", x_axis_position = "top", legend_title = "AUPR\n(target gene prediction ability)") + theme(legend.text = element_text(size = 9)) + p_ligand_aupr + + figures_without_legend = cowplot::plot_grid( + p_ligand_aupr + theme(legend.position = "none", axis.ticks = element_blank()) + theme(axis.title.x = element_text()), + p_ligand_target_network + theme(legend.position = "none", axis.ticks = element_blank()) + ylab(""), + align = "hv", + nrow = 1, + rel_widths = c(ncol(vis_ligand_aupr)+10, ncol(vis_ligand_target))) + legends = cowplot::plot_grid( + ggpubr::as_ggplot(ggpubr::get_legend(p_ligand_aupr)), + ggpubr::as_ggplot(ggpubr::get_legend(p_ligand_target_network)), + nrow = 1, + align = "h") + + combined_plot = cowplot::plot_grid(figures_without_legend, + legends, + rel_heights = c(10,2), nrow = 2, align = "hv") + + # ligand-receptor plot + # get the ligand-receptor network of the top-ranked ligands + if (verbose == TRUE){print("Infer receptors of the prioritized ligands")} + + lr_network_top = lr_network %>% filter(from %in% best_upstream_ligands & to %in% expressed_receptors) %>% distinct(from,to) + best_upstream_receptors = lr_network_top %>% pull(to) %>% unique() + + lr_network_top_df_large = weighted_networks_lr %>% filter(from %in% best_upstream_ligands & to %in% best_upstream_receptors) + + lr_network_top_df = lr_network_top_df_large %>% spread("from","weight",fill = 0) + lr_network_top_matrix = lr_network_top_df %>% select(-to) %>% as.matrix() %>% magrittr::set_rownames(lr_network_top_df$to) + + if (nrow(lr_network_top_matrix) > 1){ + dist_receptors = dist(lr_network_top_matrix, method = "binary") + hclust_receptors = hclust(dist_receptors, method = "ward.D2") + order_receptors = hclust_receptors$labels[hclust_receptors$order] + } else { + order_receptors = rownames(lr_network_top_matrix) + } + if (ncol(lr_network_top_matrix) > 1) { + dist_ligands = dist(lr_network_top_matrix %>% t(), method = "binary") + hclust_ligands = hclust(dist_ligands, method = "ward.D2") + order_ligands_receptor = hclust_ligands$labels[hclust_ligands$order] + } else { + order_ligands_receptor = colnames(lr_network_top_matrix) + } + + order_receptors = order_receptors %>% intersect(rownames(lr_network_top_matrix)) + order_ligands_receptor = order_ligands_receptor %>% intersect(colnames(lr_network_top_matrix)) + + vis_ligand_receptor_network = lr_network_top_matrix[order_receptors, order_ligands_receptor] + dim(vis_ligand_receptor_network) = c(length(order_receptors), length(order_ligands_receptor)) + rownames(vis_ligand_receptor_network) = order_receptors %>% make.names() + colnames(vis_ligand_receptor_network) = order_ligands_receptor %>% make.names() + + p_ligand_receptor_network = vis_ligand_receptor_network %>% t() %>% make_heatmap_ggplot("Ligands","Receptors", color = "mediumvioletred", x_axis_position = "top",legend_title = "Prior interaction potential") + + # DE analysis for each sender cell type -- of course only possible when having sender cell types + if (length(sender) > 1){ + are_there_senders = TRUE + } + if(length(sender) == 1){ + if(sender != "undefined"){ + are_there_senders = TRUE + } else { + are_there_senders = FALSE + } + } + + if (are_there_senders == TRUE){ + if (verbose == TRUE){print("Perform DE analysis in sender cells")} + seurat_obj = subset(seurat_obj, features= potential_ligands) + + DE_table_all = Idents(seurat_obj) %>% levels() %>% intersect(sender_celltypes) %>% lapply(get_lfc_celltype, seurat_obj = seurat_obj, condition_colname = condition_colname, condition_oi = condition_oi, condition_reference = condition_reference, expression_pct = expression_pct, celltype_col = NULL) %>% reduce(full_join, by = "gene") # use this if cell type labels are the identities of your Seurat object -- if not: indicate the celltype_col properly + DE_table_all[is.na(DE_table_all)] = 0 + + # Combine ligand activities with DE information + ligand_activities_de = ligand_activities %>% select(test_ligand, pearson) %>% rename(ligand = test_ligand) %>% left_join(DE_table_all %>% rename(ligand = gene), by = "ligand") + ligand_activities_de[is.na(ligand_activities_de)] = 0 + + # make LFC heatmap + lfc_matrix = ligand_activities_de %>% select(-ligand, -pearson) %>% as.matrix() %>% magrittr::set_rownames(ligand_activities_de$ligand) + rownames(lfc_matrix) = rownames(lfc_matrix) %>% make.names() + + order_ligands = order_ligands[order_ligands %in% rownames(lfc_matrix)] + vis_ligand_lfc = lfc_matrix[order_ligands,] + vis_ligand_lfc = vis_ligand_lfc %>% as.matrix(ncol = length(Idents(seurat_obj) %>% levels() %>% intersect(sender_celltypes))) + colnames(vis_ligand_lfc) = vis_ligand_lfc %>% colnames() %>% make.names() + + p_ligand_lfc = vis_ligand_lfc %>% make_threecolor_heatmap_ggplot("Prioritized ligands","LFC in Sender", low_color = "midnightblue",mid_color = "white", mid = median(vis_ligand_lfc), high_color = "red",legend_position = "top", x_axis_position = "top", legend_title = "LFC") + theme(axis.text.y = element_text(face = "italic")) + + # ligand expression Seurat dotplot + real_makenames_conversion = lr_network$from %>% unique() %>% magrittr::set_names(lr_network$from %>% unique() %>% make.names()) + order_ligands_adapted = real_makenames_conversion[order_ligands] + names(order_ligands_adapted) = NULL + + seurat_obj_subset = seurat_obj %>% subset(idents = sender_celltypes) + seurat_obj_subset = SetIdent(seurat_obj_subset, value = seurat_obj_subset[[condition_colname]]) %>% subset(idents = condition_oi) ## only shows cells of the condition of interest + rotated_dotplot = DotPlot(seurat_obj %>% subset(cells = Cells(seurat_obj_subset)), features = order_ligands_adapted, cols = "RdYlBu") + coord_flip() + theme(legend.text = element_text(size = 10), legend.title = element_text(size = 12)) # flip of coordinates necessary because we want to show ligands in the rows when combining all plots + rm(seurat_obj_subset) + + # combined plot + figures_without_legend = cowplot::plot_grid( + p_ligand_aupr + theme(legend.position = "none", axis.ticks = element_blank()) + theme(axis.title.x = element_text()), + rotated_dotplot + theme(legend.position = "none", axis.ticks = element_blank(), axis.title.x = element_text(size = 12), axis.text.y = element_text(face = "italic", size = 9), axis.text.x = element_text(size = 9, angle = 90,hjust = 0)) + ylab("Expression in Sender") + xlab("") + scale_y_discrete(position = "right"), + p_ligand_lfc + theme(legend.position = "none", axis.ticks = element_blank()) + theme(axis.title.x = element_text()) + ylab(""), + p_ligand_target_network + theme(legend.position = "none", axis.ticks = element_blank()) + ylab(""), + align = "hv", + nrow = 1, + rel_widths = c(ncol(vis_ligand_aupr)+6, ncol(vis_ligand_lfc) + 7, ncol(vis_ligand_lfc) + 8, ncol(vis_ligand_target))) + + legends = cowplot::plot_grid( + ggpubr::as_ggplot(ggpubr::get_legend(p_ligand_aupr)), + ggpubr::as_ggplot(ggpubr::get_legend(rotated_dotplot)), + ggpubr::as_ggplot(ggpubr::get_legend(p_ligand_lfc)), + ggpubr::as_ggplot(ggpubr::get_legend(p_ligand_target_network)), + nrow = 1, + align = "h", rel_widths = c(1.5, 1, 1, 1)) + + combined_plot = cowplot::plot_grid(figures_without_legend, legends, rel_heights = c(10,5), nrow = 2, align = "hv") + combined_plot + + } else { + rotated_dotplot = NULL + p_ligand_lfc = NULL + } + + return(list( + ligand_activities = ligand_activities, + top_ligands = best_upstream_ligands, + top_targets = active_ligand_target_links_df$target %>% unique(), + top_receptors = lr_network_top_df_large$to %>% unique(), + ligand_target_matrix = vis_ligand_target, + ligand_target_heatmap = p_ligand_target_network, + ligand_target_df = active_ligand_target_links_df, + ligand_expression_dotplot = rotated_dotplot, + ligand_differential_expression_heatmap = p_ligand_lfc, + ligand_activity_target_heatmap = combined_plot, + ligand_receptor_matrix = vis_ligand_receptor_network, + ligand_receptor_heatmap = p_ligand_receptor_network, + ligand_receptor_df = lr_network_top_df_large %>% rename(ligand = from, receptor = to), + geneset_oi = geneset_oi, + background_expressed_genes = background_expressed_genes + )) +} +#' @title Determine expressed genes of a cell type from a Seurat object single-cell RNA seq dataset or Seurat spatial transcriptomics dataset +#' +#' @description \code{get_expressed_genes} Return the genes that are expressed in a given cell cluster based on the fraction of cells in that cluster that should express the cell. +#' @usage +#' get_expressed_genes(ident, seurat_obj, pct = 0.10, assay_oi = NULL) +#' +#' @param ident Name of cluster identity/identities of cells +#' @param seurat_obj Single-cell expression dataset as Seurat object https://satijalab.org/seurat/. +#' @param pct We consider genes expressed if they are expressed in at least a specific fraction of cells of a cluster. This number indicates this fraction. Default: 0.10. Choice of this parameter is important and depends largely on the used sequencing platform. We recommend to require a lower fraction (like the default 0.10) for 10X data than for e.g. Smart-seq2 data. +#' @param assay_oi If wanted: specify yourself which assay to look for. Default this value is NULL and as a consequence the 'most advanced' assay will be used to define expressed genes. +#' +#' @return A character vector with the gene symbols of the expressed genes +#' +#' @import Seurat +#' @import dplyr +#' +#' @examples +#' \dontrun{ +#' get_expressed_genes(ident = "CD8 T", seurat_obj = seuratObj, pct = 0.10) +#' } +#' +#' @export +#' +get_expressed_genes = function(ident, seurat_obj, pct = 0.1, assay_oi = NULL){ + requireNamespace("Seurat") + requireNamespace("dplyr") + + # input check + + + if (!"RNA" %in% names(seurat_obj@assays)) { + if ("Spatial" %in% names(seurat_obj@assays)) { + if (class(seurat_obj@assays$Spatial@data) != "matrix" & + class(seurat_obj@assays$Spatial@data) != "dgCMatrix") { + warning("Spatial Seurat object should contain a matrix of normalized expression data. Check 'seurat_obj@assays$Spatial@data' for default or 'seurat_obj@assays$SCT@data' for when the single-cell transform pipeline was applied") + } + if (sum(dim(seurat_obj@assays$Spatial@data)) == 0) { + stop("Seurat object should contain normalized expression data (numeric matrix). Check 'seurat_obj@assays$Spatial@data'") + } + } + } + else { + if (class(seurat_obj@assays$RNA@data) != "matrix" & + class(seurat_obj@assays$RNA@data) != "dgCMatrix") { + warning("Seurat object should contain a matrix of normalized expression data. Check 'seurat_obj@assays$RNA@data' for default or 'seurat_obj@assays$integrated@data' for integrated data or seurat_obj@assays$SCT@data for when the single-cell transform pipeline was applied") + } + if ("integrated" %in% names(seurat_obj@assays)) { + if (sum(dim(seurat_obj@assays$RNA@data)) == 0 & sum(dim(seurat_obj@assays$integrated@data)) == + 0) + stop("Seurat object should contain normalized expression data (numeric matrix). Check 'seurat_obj@assays$RNA@data' for default or 'seurat_obj@assays$integrated@data' for integrated data") + } + else if ("SCT" %in% names(seurat_obj@assays)) { + if (sum(dim(seurat_obj@assays$RNA@data)) == 0 & sum(dim(seurat_obj@assays$SCT@data)) == + 0) { + stop("Seurat object should contain normalized expression data (numeric matrix). Check 'seurat_obj@assays$RNA@data' for default or 'seurat_obj@assays$SCT@data' for data corrected via SCT") + } + } + else { + if (sum(dim(seurat_obj@assays$RNA@data)) == 0) { + stop("Seurat object should contain normalized expression data (numeric matrix). Check 'seurat_obj@assays$RNA@data'") + } + } + } + if (sum(ident %in% unique(Idents(seurat_obj))) != length(ident)) { + stop("One or more provided cell clusters is not part of the 'Idents' of your Seurat object") + } + + if(!is.null(assay_oi)){ + if(! assay_oi %in% Seurat::Assays(seurat_obj)){ + stop("assay_oi should be an assay of your Seurat object") + } + } + + # Get cell identities of cluster of interest + + + cells_oi = Idents(seurat_obj) %>% .[Idents(seurat_obj) %in% + ident] %>% names() + + # Get exprs matrix: from assay oi or from most advanced assay if assay oi not specifcied + + if(!is.null(assay_oi)){ + cells_oi_in_matrix = intersect(colnames(seurat_obj[[assay_oi]]@data), cells_oi) + exprs_mat = seurat_obj[[assay_oi]]@data %>% .[, cells_oi_in_matrix] + } else { + if ("integrated" %in% names(seurat_obj@assays)) { + warning("Seurat object is result from the Seurat integration workflow. The expressed genes are now defined based on the integrated slot. You can change this via the assay_oi parameter of the get_expressed_genes() functions. Recommended assays: RNA or SCT") + cells_oi_in_matrix = intersect(colnames(seurat_obj@assays$integrated@data), + cells_oi) + if (length(cells_oi_in_matrix) != length(cells_oi)) + stop("Not all cells of interest are in your expression matrix (seurat_obj@assays$integrated@data). Please check that the expression matrix contains cells in columns and genes in rows.") + exprs_mat = seurat_obj@assays$integrated@data %>% .[, + cells_oi_in_matrix] + } + else if ("SCT" %in% names(seurat_obj@assays) & !"Spatial" %in% + names(seurat_obj@assays)) { + warning("Seurat object is result from the Seurat single-cell transform workflow. The expressed genes are defined based on the SCT slot. You can change this via the assay_oi parameter of the get_expressed_genes() functions. Recommended assays: RNA or SCT") + cells_oi_in_matrix = intersect(colnames(seurat_obj@assays$SCT@data), + cells_oi) + if (length(cells_oi_in_matrix) != length(cells_oi)) + stop("Not all cells of interest are in your expression matrix (seurat_obj@assays$SCT@data). Please check that the expression matrix contains cells in columns and genes in rows.") + exprs_mat = seurat_obj@assays$SCT@data %>% .[, cells_oi_in_matrix] + } + else if ("Spatial" %in% names(seurat_obj@assays) & + !"SCT" %in% names(seurat_obj@assays)) { + warning("Seurat object is result from the Seurat spatial object. The expressed genes are defined based on the Spatial slot. If the spatial data is spot-based (mixture of cells) and not single-cell resolution, we recommend against directly using nichenetr on spot-based data (because you want to look at cell-cell interactions, and not at spot-spot interactions! ;-) )") + cells_oi_in_matrix = intersect(colnames(seurat_obj@assays$Spatial@data), + cells_oi) + if (length(cells_oi_in_matrix) != length(cells_oi)) + stop("Not all cells of interest are in your expression matrix (seurat_obj@assays$Spatial@data). Please check that the expression matrix contains cells in columns and genes in rows.") + exprs_mat = seurat_obj@assays$Spatial@data %>% .[, cells_oi_in_matrix] + } + else if ("Spatial" %in% names(seurat_obj@assays) & + "SCT" %in% names(seurat_obj@assays)) { + warning("Seurat object is result from the Seurat spatial object, followed by the SCT workflow. If the spatial data is spot-based (mixture of cells) and not single-cell resolution, we recommend against directly using nichenetr on spot-based data (because you want to look at cell-cell interactions, and not at spot-spot interactions! The expressed genes are defined based on the SCT slot, but this can be changed via the assay_oi parameter.") + cells_oi_in_matrix = intersect(colnames(seurat_obj@assays$SCT@data), + cells_oi) + if (length(cells_oi_in_matrix) != length(cells_oi)) + stop("Not all cells of interest are in your expression matrix (seurat_obj@assays$Spatial@data). Please check that the expression matrix contains cells in columns and genes in rows.") + exprs_mat = seurat_obj@assays$SCT@data %>% .[, cells_oi_in_matrix] + } + else { + if (sum(cells_oi %in% colnames(seurat_obj@assays$RNA@data)) == + 0) + stop("None of the cells are in colnames of 'seurat_obj@assays$RNA@data'. The expression matrix should contain cells in columns and genes in rows.") + cells_oi_in_matrix = intersect(colnames(seurat_obj@assays$RNA@data), + cells_oi) + if (length(cells_oi_in_matrix) != length(cells_oi)) + stop("Not all cells of interest are in your expression matrix (seurat_obj@assays$RNA@data). Please check that the expression matrix contains cells in columns and genes in rows.") + exprs_mat = seurat_obj@assays$RNA@data %>% .[, cells_oi_in_matrix] + } + + } + + # use defined cells and exprs matrix to get expressed genes + + n_cells_oi_in_matrix = length(cells_oi_in_matrix) + if (n_cells_oi_in_matrix < 5000) { + genes = exprs_mat %>% apply(1, function(x) { + sum(x > 0)/n_cells_oi_in_matrix + }) %>% .[. >= pct] %>% names() + } + else { + splits = split(1:nrow(exprs_mat), ceiling(seq_along(1:nrow(exprs_mat))/100)) + genes = splits %>% lapply(function(genes_indices, exprs, + pct, n_cells_oi_in_matrix) { + begin_i = genes_indices[1] + end_i = genes_indices[length(genes_indices)] + exprs = exprs[begin_i:end_i, , drop = FALSE] + genes = exprs %>% apply(1, function(x) { + sum(x > 0)/n_cells_oi_in_matrix + }) %>% .[. >= pct] %>% names() + }, exprs_mat, pct, n_cells_oi_in_matrix) %>% unlist() %>% + unname() + } + return(genes) +} +#' @title Perform NicheNet analysis on Seurat object: explain DE between two cell clusters +#' +#' @description \code{nichenet_seuratobj_cluster_de} Perform NicheNet analysis on Seurat object: explain differential expression (DE) between two 'receiver' cell clusters by ligands expressed by neighboring cells. +#' @usage +#' nichenet_seuratobj_cluster_de(seurat_obj, receiver_affected, receiver_reference, sender = "all",ligand_target_matrix,lr_network,weighted_networks,expression_pct = 0.10, lfc_cutoff = 0.25, geneset = "DE", filter_top_ligands = TRUE, top_n_ligands = 30,top_n_targets = 200, cutoff_visualization = 0.33,verbose = TRUE, assay_oi = NULL) +#' +#' @param seurat_obj Single-cell expression dataset as Seurat object https://satijalab.org/seurat/. +#' @param receiver_reference Name of cluster identity/identities of "steady-state" cells, before they are affected by intercellular communication with other cells +#' @param receiver_affected Name of cluster identity/identities of "affected" cells that were presumably affected by intercellular communication with other cells +#' @param sender Determine the potential sender cells. Name of cluster identity/identities of cells that presumably affect expression in the receiver cell type. In case you want to look at all possible sender cell types in the data, you can give this argument the value "all". "all" indicates thus that all cell types in the dataset will be considered as possible sender cells. As final option, you could give this argument the value "undefined"."undefined" won't look at ligands expressed by sender cells, but at all ligands for which a corresponding receptor is expressed. This could be useful if the presumably active sender cell is not profiled. Default: "all". +#' @param expression_pct To determine ligands and receptors expressed by sender and receiver cells, we consider genes expressed if they are expressed in at least a specific fraction of cells of a cluster. This number indicates this fraction. Default: 0.10 +#' @param lfc_cutoff Cutoff on log fold change in the wilcoxon differential expression test. Default: 0.25. +#' @param geneset Indicate whether to consider all DE genes between condition 1 and 2 ("DE"), or only genes upregulated in condition 1 ("up"), or only genes downregulad in condition 1 ("down"). +#' @param filter_top_ligands Indicate whether output tables for ligand-target and ligand-receptor networks should be done for a filtered set of top ligands (TRUE) or for all ligands (FALSE). Default: TRUE. +#' @param top_n_ligands Indicate how many ligands should be extracted as top-ligands after ligand activity analysis. Only for these ligands, target genes and receptors will be returned. Default: 30. +#' @param top_n_targets To predict active, affected targets of the prioritized ligands, consider only DE genes if they also belong to the a priori top n ("top_n_targets") targets of a ligand. Default = 200. +#' @param cutoff_visualization Because almost no ligand-target scores have a regulatory potential score of 0, we clarify the heatmap visualization by giving the links with the lowest scores a score of 0. The cutoff_visualization paramter indicates this fraction of links that are given a score of zero. Default = 0.33. +#' @param ligand_target_matrix The NicheNet ligand-target matrix denoting regulatory potential scores between ligands and targets (ligands in columns). +#' @param lr_network The ligand-receptor network (columns that should be present: $from, $to). +#' @param weighted_networks The NicheNet weighted networks denoting interactions and their weights/confidences in the ligand-signaling and gene regulatory network. +#' @param verbose Print out the current analysis stage. Default: TRUE. +#' @inheritParams get_expressed_genes +#' +#' @return A list with the following elements: +#' $ligand_activities: data frame with output ligand activity analysis; +#' $top_ligands: top_n ligands based on ligand activity; +#' $top_targets: active, affected target genes of these ligands; +#' $top_receptors: receptors of these ligands; +#' $ligand_target_matrix: matrix indicating regulatory potential scores between active ligands and their predicted targets; +#' $ligand_target_heatmap: heatmap of ligand-target regulatory potential; +#' $ligand_target_df: data frame showing regulatory potential scores of predicted active ligand-target network; +#' $ligand_activity_target_heatmap: heatmap showing both ligand activity scores and target genes of these top ligands; +#' $ligand_expression_dotplot: expression dotplot of the top ligands; +#' $ligand_receptor_matrix: matrix of ligand-receptor interactions; +#' $ligand_receptor_heatmap: heatmap showing ligand-receptor interactions; +#' $ligand_receptor_df: data frame of ligand-receptor interactions; +#' $geneset_oi: a vector containing the set of genes used as input for the ligand activity analysis; +#' $background_expressed_genes: the background of genes to which the geneset will be compared in the ligand activity analysis. +#' +#' @import Seurat +#' @import dplyr +#' @importFrom magrittr set_rownames set_colnames +#' +#' @examples +#' \dontrun{ +#' seuratObj = readRDS(url("https://zenodo.org/record/3531889/files/seuratObj_test.rds")) +#' lr_network = readRDS(url("https://zenodo.org/record/7074291/files/lr_network_mouse_21122021.rds")) +#' ligand_target_matrix = readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final_mouse.rds")) +#' weighted_networks = readRDS(url("https://zenodo.org/record/7074291/files/weighted_networks_nsga2r_final_mouse.rds")) +#' # works, but does not make sense +#' nichenet_seuratobj_cluster_de(seurat_obj = seuratObj, receiver_affected = "CD8 T", receiver_reference = "Mono", sender = "Mono", ligand_target_matrix = ligand_target_matrix, lr_network = lr_network, weighted_networks = weighted_networks) +#' # type of analysis for which this would make sense +#' nichenet_seuratobj_cluster_de(seurat_obj = seuratObj, receiver_affected = "p-EMT-pos-cancer", receiver_reference = "p-EMT-neg-cancer", sender = "Fibroblast", ligand_target_matrix = ligand_target_matrix, lr_network = lr_network, weighted_networks = weighted_networks) +#' } +#' +#' @export +#' +nichenet_seuratobj_cluster_de = function(seurat_obj, receiver_affected, receiver_reference, sender = "all",ligand_target_matrix,lr_network,weighted_networks, + expression_pct = 0.10, lfc_cutoff = 0.25, geneset = "DE", filter_top_ligands = TRUE, top_n_ligands = 30, + top_n_targets = 200, cutoff_visualization = 0.33, + verbose = TRUE, assay_oi = NULL) +{ + requireNamespace("Seurat") + requireNamespace("dplyr") + + # input check + # input check + if(! "RNA" %in% names(seurat_obj@assays)){ + if ("Spatial" %in% names(seurat_obj@assays)){ + warning("You are going to apply NicheNet on a spatial seurat object. Be sure it's ok to use NicheNet the way you are planning to do it. So this means: you should have changes in gene expression in receiver cells caused by cell-cell interactions. Note that in the case of spatial transcriptomics, you are not dealing with single cells but with 'spots' containing multiple cells of the same of different cell types.") + + if (class(seurat_obj@assays$Spatial@data) != "matrix" & class(seurat_obj@assays$Spatial@data) != "dgCMatrix") { + warning("Spatial Seurat object should contain a matrix of normalized expression data. Check 'seurat_obj@assays$Spatial@data' for default or 'seurat_obj@assays$SCT@data' for when the single-cell transform pipeline was applied") + } + if (sum(dim(seurat_obj@assays$Spatial@data)) == 0) { + stop("Seurat object should contain normalized expression data (numeric matrix). Check 'seurat_obj@assays$Spatial@data'") + } + }} else { + if (class(seurat_obj@assays$RNA@data) != "matrix" & + class(seurat_obj@assays$RNA@data) != "dgCMatrix") { + warning("Seurat object should contain a matrix of normalized expression data. Check 'seurat_obj@assays$RNA@data' for default or 'seurat_obj@assays$integrated@data' for integrated data or seurat_obj@assays$SCT@data for when the single-cell transform pipeline was applied") + } + + if ("integrated" %in% names(seurat_obj@assays)) { + if (sum(dim(seurat_obj@assays$RNA@data)) == 0 & sum(dim(seurat_obj@assays$integrated@data)) == + 0) + stop("Seurat object should contain normalized expression data (numeric matrix). Check 'seurat_obj@assays$RNA@data' for default or 'seurat_obj@assays$integrated@data' for integrated data") + } + else if ("SCT" %in% names(seurat_obj@assays)) { + if (sum(dim(seurat_obj@assays$RNA@data)) == 0 & sum(dim(seurat_obj@assays$SCT@data)) == + 0) { + stop("Seurat object should contain normalized expression data (numeric matrix). Check 'seurat_obj@assays$RNA@data' for default or 'seurat_obj@assays$SCT@data' for data corrected via SCT") + } + } + else { + if (sum(dim(seurat_obj@assays$RNA@data)) == 0) { + stop("Seurat object should contain normalized expression data (numeric matrix). Check 'seurat_obj@assays$RNA@data'") + } + } + } + + + if(sum(receiver_affected %in% unique(Idents(seurat_obj))) != length(receiver_affected)) + stop("The defined receiver_affected cell type should be an identity class of your seurat object") + if(sum(receiver_reference %in% unique(Idents(seurat_obj))) != length(receiver_reference)) + stop("The defined receiver_reference cell type should be an identity class of your seurat object") + if(length(sender) == 1){ + if(sender != "all" & sender != "undefined"){ + if(sum(sender %in% unique(Idents(seurat_obj))) != length(sender)){ + stop("The sender argument should be 'all' or 'undefined' or an identity class of your seurat object") + } + } + } else { + if(sum(sender %in% unique(Idents(seurat_obj))) != length(sender)){ + stop("The sender argument should be 'all' or 'undefined' or an identity class of your seurat object") + } + } + if(geneset != "DE" & geneset != "up" & geneset != "down") + stop("geneset should be 'DE', 'up' or 'down'") + + if("integrated" %in% names(seurat_obj@assays)){ + warning("Seurat object is result from the Seurat integration workflow. Make sure that the way of defining expressed and differentially expressed genes in this wrapper is appropriate for your integrated data.") + } + + # Read in and process NicheNet networks, define ligands and receptors + if (verbose == TRUE){print("Read in and process NicheNet's networks")} + weighted_networks_lr = weighted_networks$lr_sig %>% inner_join(lr_network %>% distinct(from,to), by = c("from","to")) + + ligands = lr_network %>% pull(from) %>% unique() + receptors = lr_network %>% pull(to) %>% unique() + + if (verbose == TRUE){print("Define expressed ligands and receptors in receiver and sender cells")} + + # step1 nichenet analysis: get expressed genes in sender and receiver cells + + ## receiver + # expressed genes: only in steady state population (for determining receptors) + list_expressed_genes_receiver_ss = c(receiver_reference) %>% unique() %>% lapply(get_expressed_genes, seurat_obj, expression_pct, assay_oi) + names(list_expressed_genes_receiver_ss) = c(receiver_reference) %>% unique() + expressed_genes_receiver_ss = list_expressed_genes_receiver_ss %>% unlist() %>% unique() + + # expressed genes: both in steady state and affected population (for determining background of expressed genes) + list_expressed_genes_receiver = c(receiver_reference,receiver_affected) %>% unique() %>% lapply(get_expressed_genes, seurat_obj, expression_pct, assay_oi) + names(list_expressed_genes_receiver) = c(receiver_reference,receiver_affected) %>% unique() + expressed_genes_receiver = list_expressed_genes_receiver %>% unlist() %>% unique() + + ## sender + if (length(sender) == 1){ + if (sender == "all"){ + sender_celltypes = Idents(seurat_obj) %>% levels() + list_expressed_genes_sender = sender_celltypes %>% lapply(get_expressed_genes, seurat_obj, expression_pct, assay_oi) + names(list_expressed_genes_sender) = sender_celltypes + expressed_genes_sender = list_expressed_genes_sender %>% unlist() %>% unique() + + } else if (sender == "undefined") { + if("integrated" %in% names(seurat_obj@assays)){ + expressed_genes_sender = union(seurat_obj@assays$integrated@data %>% rownames(),rownames(ligand_target_matrix)) %>% union(colnames(ligand_target_matrix)) + } else { + expressed_genes_sender = union(seurat_obj@assays$RNA@data %>% rownames(),rownames(ligand_target_matrix)) %>% union(colnames(ligand_target_matrix)) + } + } else if (sender != "all" & sender != "undefined") { + sender_celltypes = sender + list_expressed_genes_sender = sender_celltypes %>% unique() %>% lapply(get_expressed_genes, seurat_obj, expression_pct, assay_oi) + names(list_expressed_genes_sender) = sender_celltypes %>% unique() + expressed_genes_sender = list_expressed_genes_sender %>% unlist() %>% unique() + } + } else { + sender_celltypes = sender + list_expressed_genes_sender = sender_celltypes %>% unique() %>% lapply(get_expressed_genes, seurat_obj, expression_pct, assay_oi) + names(list_expressed_genes_sender) = sender_celltypes %>% unique() + expressed_genes_sender = list_expressed_genes_sender %>% unlist() %>% unique() + } + + # step2 nichenet analysis: define background and gene list of interest: here differential expression between two conditions of cell type of interest + if (verbose == TRUE){print("Perform DE analysis between two receiver cell clusters")} + + DE_table_receiver = FindMarkers(object = seurat_obj, ident.1 = receiver_affected, ident.2 = receiver_reference, min.pct = expression_pct) %>% rownames_to_column("gene") + + SeuratV4 = c("avg_log2FC") %in% colnames(DE_table_receiver) + + if(SeuratV4 == TRUE){ + if (geneset == "DE"){ + geneset_oi = DE_table_receiver %>% filter(p_val_adj <= 0.05 & abs(avg_log2FC) >= lfc_cutoff) %>% pull(gene) + } else if (geneset == "up") { + geneset_oi = DE_table_receiver %>% filter(p_val_adj <= 0.05 & avg_log2FC >= lfc_cutoff) %>% pull(gene) + } else if (geneset == "down") { + geneset_oi = DE_table_receiver %>% filter(p_val_adj <= 0.05 & avg_log2FC <= lfc_cutoff) %>% pull(gene) + } + } else { + if (geneset == "DE"){ + geneset_oi = DE_table_receiver %>% filter(p_val_adj <= 0.05 & abs(avg_logFC) >= lfc_cutoff) %>% pull(gene) + } else if (geneset == "up") { + geneset_oi = DE_table_receiver %>% filter(p_val_adj <= 0.05 & avg_logFC >= lfc_cutoff) %>% pull(gene) + } else if (geneset == "down") { + geneset_oi = DE_table_receiver %>% filter(p_val_adj <= 0.05 & avg_logFC <= lfc_cutoff) %>% pull(gene) + } + } + + + + geneset_oi = geneset_oi %>% .[. %in% rownames(ligand_target_matrix)] + if (length(geneset_oi) == 0){ + stop("No genes were differentially expressed") + } + background_expressed_genes = expressed_genes_receiver %>% .[. %in% rownames(ligand_target_matrix)] + + # step3 nichenet analysis: define potential ligands + expressed_ligands = intersect(ligands,expressed_genes_sender) + expressed_receptors = intersect(receptors,expressed_genes_receiver) + if (length(expressed_ligands) == 0){ + stop("No ligands expressed in sender cell") + } + if (length(expressed_receptors) == 0){ + stop("No receptors expressed in receiver cell") + } + potential_ligands = lr_network %>% filter(from %in% expressed_ligands & to %in% expressed_receptors) %>% pull(from) %>% unique() + if (length(potential_ligands) == 0){ + stop("No potentially active ligands") + } + + if (verbose == TRUE){print("Perform NicheNet ligand activity analysis")} + + # step4 perform NicheNet's ligand activity analysis + ligand_activities = predict_ligand_activities(geneset = geneset_oi, background_expressed_genes = background_expressed_genes, ligand_target_matrix = ligand_target_matrix, potential_ligands = potential_ligands) + ligand_activities = ligand_activities %>% + arrange(-aupr_corrected) %>% + mutate(rank = rank(desc(aupr_corrected))) + + if(filter_top_ligands == TRUE){ + best_upstream_ligands = ligand_activities %>% top_n(top_n_ligands, aupr_corrected) %>% arrange(-aupr_corrected) %>% pull(test_ligand) %>% unique() + } else { + best_upstream_ligands = ligand_activities %>% arrange(-aupr_corrected) %>% pull(test_ligand) %>% unique() + } + if (verbose == TRUE){print("Infer active target genes of the prioritized ligands")} + + # step5 infer target genes of the top-ranked ligands + active_ligand_target_links_df = best_upstream_ligands %>% lapply(get_weighted_ligand_target_links,geneset = geneset_oi, ligand_target_matrix = ligand_target_matrix, n = top_n_targets) %>% bind_rows() %>% drop_na() + + if(nrow(active_ligand_target_links_df) > 0){ + active_ligand_target_links = prepare_ligand_target_visualization(ligand_target_df = active_ligand_target_links_df, ligand_target_matrix = ligand_target_matrix, cutoff = cutoff_visualization) + order_ligands = intersect(best_upstream_ligands, colnames(active_ligand_target_links)) %>% rev() %>% make.names() + order_targets = active_ligand_target_links_df$target %>% unique() %>% intersect(rownames(active_ligand_target_links)) %>% make.names() + rownames(active_ligand_target_links) = rownames(active_ligand_target_links) %>% make.names() + colnames(active_ligand_target_links) = colnames(active_ligand_target_links) %>% make.names() + + order_targets = order_targets %>% intersect(rownames(active_ligand_target_links)) + order_ligands = order_ligands %>% intersect(colnames(active_ligand_target_links)) + + vis_ligand_target = active_ligand_target_links[order_targets,order_ligands] %>% t() + p_ligand_target_network = vis_ligand_target %>% make_heatmap_ggplot("Prioritized ligands","Predicted target genes", color = "purple",legend_position = "top", x_axis_position = "top",legend_title = "Regulatory potential") + theme(axis.text.x = element_text(face = "italic")) #+ scale_fill_gradient2(low = "whitesmoke", high = "purple", breaks = c(0,0.006,0.012)) + } else { + vis_ligand_target = NULL + p_ligand_target_network = NULL + print("no highly likely active targets found for top ligands") + } + + # combined heatmap: overlay ligand activities + ligand_aupr_matrix = ligand_activities %>% select(aupr_corrected) %>% as.matrix() %>% magrittr::set_rownames(ligand_activities$test_ligand) + + rownames(ligand_aupr_matrix) = rownames(ligand_aupr_matrix) %>% make.names() + colnames(ligand_aupr_matrix) = colnames(ligand_aupr_matrix) %>% make.names() + + vis_ligand_aupr = ligand_aupr_matrix[order_ligands, ] %>% as.matrix(ncol = 1) %>% magrittr::set_colnames("AUPR") + p_ligand_aupr = vis_ligand_aupr %>% make_heatmap_ggplot("Prioritized ligands","Ligand activity", color = "darkorange",legend_position = "top", x_axis_position = "top", legend_title = "AUPR\n(target gene prediction ability)") + theme(legend.text = element_text(size = 9)) + p_ligand_aupr + + figures_without_legend = cowplot::plot_grid( + p_ligand_aupr + theme(legend.position = "none", axis.ticks = element_blank()) + theme(axis.title.x = element_text()), + p_ligand_target_network + theme(legend.position = "none", axis.ticks = element_blank()) + ylab(""), + align = "hv", + nrow = 1, + rel_widths = c(ncol(vis_ligand_aupr)+10, ncol(vis_ligand_target))) + legends = cowplot::plot_grid( + ggpubr::as_ggplot(ggpubr::get_legend(p_ligand_aupr)), + ggpubr::as_ggplot(ggpubr::get_legend(p_ligand_target_network)), + nrow = 1, + align = "h") + + combined_plot = cowplot::plot_grid(figures_without_legend, + legends, + rel_heights = c(10,2), nrow = 2, align = "hv") + + # ligand-receptor plot + # get the ligand-receptor network of the top-ranked ligands + if (verbose == TRUE){print("Infer receptors of the prioritized ligands")} + + lr_network_top = lr_network %>% filter(from %in% best_upstream_ligands & to %in% expressed_receptors) %>% distinct(from,to) + best_upstream_receptors = lr_network_top %>% pull(to) %>% unique() + + lr_network_top_df_large = weighted_networks_lr %>% filter(from %in% best_upstream_ligands & to %in% best_upstream_receptors) + + lr_network_top_df = lr_network_top_df_large %>% spread("from","weight",fill = 0) + lr_network_top_matrix = lr_network_top_df %>% select(-to) %>% as.matrix() %>% magrittr::set_rownames(lr_network_top_df$to) + + if (nrow(lr_network_top_matrix) > 1){ + dist_receptors = dist(lr_network_top_matrix, method = "binary") + hclust_receptors = hclust(dist_receptors, method = "ward.D2") + order_receptors = hclust_receptors$labels[hclust_receptors$order] + } else { + order_receptors = rownames(lr_network_top_matrix) + } + if (ncol(lr_network_top_matrix) > 1) { + dist_ligands = dist(lr_network_top_matrix %>% t(), method = "binary") + hclust_ligands = hclust(dist_ligands, method = "ward.D2") + order_ligands_receptor = hclust_ligands$labels[hclust_ligands$order] + } else { + order_ligands_receptor = colnames(lr_network_top_matrix) + } + + order_receptors = order_receptors %>% intersect(rownames(lr_network_top_matrix)) + order_ligands_receptor = order_ligands_receptor %>% intersect(colnames(lr_network_top_matrix)) + + vis_ligand_receptor_network = lr_network_top_matrix[order_receptors, order_ligands_receptor] + dim(vis_ligand_receptor_network) = c(length(order_receptors), length(order_ligands_receptor)) + + rownames(vis_ligand_receptor_network) = order_receptors %>% make.names() + colnames(vis_ligand_receptor_network) = order_ligands_receptor %>% make.names() + + p_ligand_receptor_network = vis_ligand_receptor_network %>% t() %>% make_heatmap_ggplot("Ligands","Receptors", color = "mediumvioletred", x_axis_position = "top",legend_title = "Prior interaction potential") + + # ligand expression Seurat dotplot + if (length(sender) > 1){ + are_there_senders = TRUE + } + if(length(sender) == 1){ + if(sender != "undefined"){ + are_there_senders = TRUE + } else { + are_there_senders = FALSE + } + } + + if (are_there_senders == TRUE){ + real_makenames_conversion = lr_network$from %>% unique() %>% magrittr::set_names(lr_network$from %>% unique() %>% make.names()) + order_ligands_adapted = real_makenames_conversion[order_ligands] + names(order_ligands_adapted) = NULL + rotated_dotplot = DotPlot(seurat_obj %>% subset(idents = sender_celltypes), features = order_ligands_adapted, cols = "RdYlBu") + coord_flip() + theme(legend.text = element_text(size = 10), legend.title = element_text(size = 12)) # flip of coordinates necessary because we want to show ligands in the rows when combining all plots + + } else { + rotated_dotplot = NULL + } + + + return(list( + ligand_activities = ligand_activities, + top_ligands = best_upstream_ligands, + top_targets = active_ligand_target_links_df$target %>% unique(), + top_receptors = lr_network_top_df_large$to %>% unique(), + ligand_target_matrix = vis_ligand_target, + ligand_target_heatmap = p_ligand_target_network, + ligand_target_df = active_ligand_target_links_df, + ligand_expression_dotplot = rotated_dotplot, + ligand_activity_target_heatmap = combined_plot, + ligand_receptor_matrix = vis_ligand_receptor_network, + ligand_receptor_heatmap = p_ligand_receptor_network, + ligand_receptor_df = lr_network_top_df_large %>% rename(ligand = from, receptor = to), + geneset_oi = geneset_oi, + background_expressed_genes = background_expressed_genes + + )) +} +#' @title Perform NicheNet analysis on Seurat object: explain DE between two cell clusters from separate conditions +#' +#' @description \code{nichenet_seuratobj_aggregate_cluster_de} Perform NicheNet analysis on Seurat object: explain differential expression (DE) between two 'receiver' cell clusters coming from different conditions, by ligands expressed by neighboring cells. +#' @usage +#' nichenet_seuratobj_aggregate_cluster_de(seurat_obj, receiver_affected, receiver_reference, condition_colname, condition_oi, condition_reference, sender = "all",ligand_target_matrix,lr_network,weighted_networks,expression_pct = 0.10, lfc_cutoff = 0.25, geneset = "DE", filter_top_ligands = TRUE, top_n_ligands = 30,top_n_targets = 200, cutoff_visualization = 0.33,verbose = TRUE, assay_oi = NULL) +#' +#' @param seurat_obj Single-cell expression dataset as Seurat object https://satijalab.org/seurat/. +#' @param receiver_reference Name of cluster identity/identities of "steady-state" cells, before they are affected by intercellular communication with other cells +#' @param receiver_affected Name of cluster identity/identities of "affected" cells that were presumably affected by intercellular communication with other cells +#' @param condition_colname Name of the column in the meta data dataframe that indicates which condition/sample cells were coming from. +#' @param condition_oi Condition of interest in which receiver cells were presumably affected by other cells. Should be a name present in the `condition_colname` column of the metadata. +#' @param condition_reference The second condition (e.g. reference or steady-state condition). Should be a name present in the `condition_colname` column of the metadata. +#' @param sender Determine the potential sender cells. Name of cluster identity/identities of cells that presumably affect expression in the receiver cell type. In case you want to look at all possible sender cell types in the data, you can give this argument the value "all". "all" indicates thus that all cell types in the dataset will be considered as possible sender cells. As final option, you could give this argument the value "undefined"."undefined" won't look at ligands expressed by sender cells, but at all ligands for which a corresponding receptor is expressed. This could be useful if the presumably active sender cell is not profiled. Default: "all". +#' @param expression_pct To determine ligands and receptors expressed by sender and receiver cells, we consider genes expressed if they are expressed in at least a specific fraction of cells of a cluster. This number indicates this fraction. Default: 0.10 +#' @param lfc_cutoff Cutoff on log fold change in the wilcoxon differential expression test. Default: 0.25. +#' @param geneset Indicate whether to consider all DE genes between condition 1 and 2 ("DE"), or only genes upregulated in condition 1 ("up"), or only genes downregulad in condition 1 ("down"). +#' @param filter_top_ligands Indicate whether output tables for ligand-target and ligand-receptor networks should be done for a filtered set of top ligands (TRUE) or for all ligands (FALSE). Default: TRUE. +#' @param top_n_ligands Indicate how many ligands should be extracted as top-ligands after ligand activity analysis. Only for these ligands, target genes and receptors will be returned. Default: 30. +#' @param top_n_targets To predict active, affected targets of the prioritized ligands, consider only DE genes if they also belong to the a priori top n ("top_n_targets") targets of a ligand. Default = 200. +#' @param cutoff_visualization Because almost no ligand-target scores have a regulatory potential score of 0, we clarify the heatmap visualization by giving the links with the lowest scores a score of 0. The cutoff_visualization paramter indicates this fraction of links that are given a score of zero. Default = 0.33. +#' @param ligand_target_matrix The NicheNet ligand-target matrix of the organism of interest denoting regulatory potential scores between ligands and targets (ligands in columns). +#' @param lr_network The ligand-receptor network (columns that should be present: $from, $to) of the organism of interest. +#' @param weighted_networks The NicheNet weighted networks of the organism of interest denoting interactions and their weights/confidences in the ligand-signaling and gene regulatory network. +#' @param verbose Print out the current analysis stage. Default: TRUE. +#' @inheritParams get_expressed_genes +#' +#' @return A list with the following elements: +#' $ligand_activities: data frame with output ligand activity analysis; +#' $top_ligands: top_n ligands based on ligand activity; +#' $top_targets: active, affected target genes of these ligands; +#' $top_receptors: receptors of these ligands; +#' $ligand_target_matrix: matrix indicating regulatory potential scores between active ligands and their predicted targets; +#' $ligand_target_heatmap: heatmap of ligand-target regulatory potential; +#' $ligand_target_df: data frame showing regulatory potential scores of predicted active ligand-target network; +#' $ligand_activity_target_heatmap: heatmap showing both ligand activity scores and target genes of these top ligands; +#' $ligand_expression_dotplot: expression dotplot of the top ligands; +#' $ligand_receptor_matrix: matrix of ligand-receptor interactions; +#' $ligand_receptor_heatmap: heatmap showing ligand-receptor interactions; +#' $ligand_receptor_df: data frame of ligand-receptor interactions; +#' $geneset_oi: a vector containing the set of genes used as input for the ligand activity analysis; +#' $background_expressed_genes: the background of genes to which the geneset will be compared in the ligand activity analysis. +#' +#' @import Seurat +#' @import dplyr +#' @importFrom magrittr set_rownames set_colnames +#' +#' @examples +#' \dontrun{ +#' seuratObj = readRDS(url("https://zenodo.org/record/3531889/files/seuratObj_test.rds")) +#' lr_network = readRDS(url("https://zenodo.org/record/7074291/files/lr_network_mouse_21122021.rds")) +#' ligand_target_matrix = readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final_mouse.rds")) +#' weighted_networks = readRDS(url("https://zenodo.org/record/7074291/files/weighted_networks_nsga2r_final_mouse.rds")) +#' nichenet_seuratobj_aggregate_cluster_de(seurat_obj = seuratObj, receiver_affected = "CD8 T", receiver_reference = "CD8 T", condition_colname = "aggregate", condition_oi = "LCMV", condition_reference = "SS", sender = "Mono", ligand_target_matrix = ligand_target_matrix, lr_network = lr_network, weighted_networks = weighted_networks) +#' } +#' +#' @export +#' +nichenet_seuratobj_aggregate_cluster_de = function(seurat_obj, receiver_affected, receiver_reference, + condition_colname, condition_oi, condition_reference, sender = "all", + ligand_target_matrix,lr_network,weighted_networks, + expression_pct = 0.10, lfc_cutoff = 0.25, geneset = "DE", filter_top_ligands = TRUE, top_n_ligands = 30, + top_n_targets = 200, cutoff_visualization = 0.33, + verbose = TRUE, assay_oi = NULL) +{ + + requireNamespace("Seurat") + requireNamespace("dplyr") + + # input check + if(! "RNA" %in% names(seurat_obj@assays)){ + if ("Spatial" %in% names(seurat_obj@assays)){ + warning("You are going to apply NicheNet on a spatial seurat object. Be sure it's ok to use NicheNet the way you are planning to do it. So this means: you should have changes in gene expression in receiver cells caused by cell-cell interactions. Note that in the case of spatial transcriptomics, you are not dealing with single cells but with 'spots' containing multiple cells of the same of different cell types.") + + if (class(seurat_obj@assays$Spatial@data) != "matrix" & class(seurat_obj@assays$Spatial@data) != "dgCMatrix") { + warning("Spatial Seurat object should contain a matrix of normalized expression data. Check 'seurat_obj@assays$Spatial@data' for default or 'seurat_obj@assays$SCT@data' for when the single-cell transform pipeline was applied") + } + if (sum(dim(seurat_obj@assays$Spatial@data)) == 0) { + stop("Seurat object should contain normalized expression data (numeric matrix). Check 'seurat_obj@assays$Spatial@data'") + } + }} else { + if (class(seurat_obj@assays$RNA@data) != "matrix" & + class(seurat_obj@assays$RNA@data) != "dgCMatrix") { + warning("Seurat object should contain a matrix of normalized expression data. Check 'seurat_obj@assays$RNA@data' for default or 'seurat_obj@assays$integrated@data' for integrated data or seurat_obj@assays$SCT@data for when the single-cell transform pipeline was applied") + } + + if ("integrated" %in% names(seurat_obj@assays)) { + if (sum(dim(seurat_obj@assays$RNA@data)) == 0 & sum(dim(seurat_obj@assays$integrated@data)) == + 0) + stop("Seurat object should contain normalized expression data (numeric matrix). Check 'seurat_obj@assays$RNA@data' for default or 'seurat_obj@assays$integrated@data' for integrated data") + } + else if ("SCT" %in% names(seurat_obj@assays)) { + if (sum(dim(seurat_obj@assays$RNA@data)) == 0 & sum(dim(seurat_obj@assays$SCT@data)) == + 0) { + stop("Seurat object should contain normalized expression data (numeric matrix). Check 'seurat_obj@assays$RNA@data' for default or 'seurat_obj@assays$SCT@data' for data corrected via SCT") + } + } + else { + if (sum(dim(seurat_obj@assays$RNA@data)) == 0) { + stop("Seurat object should contain normalized expression data (numeric matrix). Check 'seurat_obj@assays$RNA@data'") + } + } + } + + + if(sum(receiver_affected %in% unique(Idents(seurat_obj))) != length(receiver_affected)) + stop("The defined receiver_affected cell type should be an identity class of your seurat object") + if(sum(receiver_reference %in% unique(Idents(seurat_obj))) != length(receiver_reference)) + stop("The defined receiver_reference cell type should be an identity class of your seurat object") + if(!condition_colname %in% colnames(seurat_obj@meta.data)) + stop("Your column indicating the conditions/samples of interest should be in the metadata dataframe") + if(sum(condition_oi %in% c(seurat_obj[[condition_colname]] %>% unlist() %>% as.character() %>% unique())) != length(condition_oi)) + stop("condition_oi should be in the condition-indicating column") + if(sum(condition_reference %in% c(seurat_obj[[condition_colname]] %>% unlist() %>% as.character() %>% unique())) != length(condition_reference)) + stop("condition_reference should be in the condition-indicating column") + if(length(sender) == 1){ + if(sender != "all" & sender != "undefined"){ + if(sum(sender %in% unique(Idents(seurat_obj))) != length(sender)){ + stop("The sender argument should be 'all' or 'undefined' or an identity class of your seurat object") + } + } + } else { + if(sum(sender %in% unique(Idents(seurat_obj))) != length(sender)){ + stop("The sender argument should be 'all' or 'undefined' or an identity class of your seurat object") + } + } + if(geneset != "DE" & geneset != "up" & geneset != "down") + stop("geneset should be 'DE', 'up' or 'down'") + + if("integrated" %in% names(seurat_obj@assays)){ + warning("Seurat object is result from the Seurat integration workflow. Make sure that the way of defining expressed and differentially expressed genes in this wrapper is appropriate for your integrated data.") + } + # Read in and process NicheNet networks, define ligands and receptors + if (verbose == TRUE){print("Read in and process NicheNet's networks")} + weighted_networks_lr = weighted_networks$lr_sig %>% inner_join(lr_network %>% distinct(from,to), by = c("from","to")) + + ligands = lr_network %>% pull(from) %>% unique() + receptors = lr_network %>% pull(to) %>% unique() + + if (verbose == TRUE){print("Define expressed ligands and receptors in receiver and sender cells")} + + # step1 nichenet analysis: get expressed genes in sender and receiver cells + + ## receiver + # expressed genes: only in steady state population (for determining receptors) + list_expressed_genes_receiver_ss = c(receiver_reference) %>% unique() %>% lapply(get_expressed_genes, seurat_obj, expression_pct, assay_oi) + names(list_expressed_genes_receiver_ss) = c(receiver_reference) %>% unique() + expressed_genes_receiver_ss = list_expressed_genes_receiver_ss %>% unlist() %>% unique() + + # expressed genes: both in steady state and affected population (for determining background of expressed genes) + list_expressed_genes_receiver = c(receiver_reference,receiver_affected) %>% unique() %>% lapply(get_expressed_genes, seurat_obj, expression_pct, assay_oi) + names(list_expressed_genes_receiver) = c(receiver_reference,receiver_affected) %>% unique() + expressed_genes_receiver = list_expressed_genes_receiver %>% unlist() %>% unique() + + ## sender + if (length(sender) == 1){ + if (sender == "all"){ + sender_celltypes = Idents(seurat_obj) %>% levels() + list_expressed_genes_sender = sender_celltypes %>% lapply(get_expressed_genes, seurat_obj, expression_pct, assay_oi) + names(list_expressed_genes_sender) = sender_celltypes + expressed_genes_sender = list_expressed_genes_sender %>% unlist() %>% unique() + + } else if (sender == "undefined") { + + if("integrated" %in% names(seurat_obj@assays)){ + expressed_genes_sender = union(seurat_obj@assays$integrated@data %>% rownames(),rownames(ligand_target_matrix)) %>% union(colnames(ligand_target_matrix)) + } else { + expressed_genes_sender = union(seurat_obj@assays$RNA@data %>% rownames(),rownames(ligand_target_matrix)) %>% union(colnames(ligand_target_matrix)) + } + + } else if (sender != "all" & sender != "undefined") { + sender_celltypes = sender + list_expressed_genes_sender = sender_celltypes %>% unique() %>% lapply(get_expressed_genes, seurat_obj, expression_pct, assay_oi) + names(list_expressed_genes_sender) = sender_celltypes %>% unique() + expressed_genes_sender = list_expressed_genes_sender %>% unlist() %>% unique() + } + } else { + sender_celltypes = sender + list_expressed_genes_sender = sender_celltypes %>% unique() %>% lapply(get_expressed_genes, seurat_obj, expression_pct, assay_oi) + names(list_expressed_genes_sender) = sender_celltypes %>% unique() + expressed_genes_sender = list_expressed_genes_sender %>% unlist() %>% unique() + } + + # step2 nichenet analysis: define background and gene list of interest: here differential expression between two conditions of cell type of interest + if (verbose == TRUE){print("Perform DE analysis between two receiver cell clusters")} + + seurat_obj_receiver_affected= subset(seurat_obj, idents = receiver_affected) + seurat_obj_receiver_affected = SetIdent(seurat_obj_receiver_affected, value = seurat_obj_receiver_affected[[condition_colname]]) + seurat_obj_receiver_affected= subset(seurat_obj_receiver_affected, idents = condition_oi) + + seurat_obj_receiver_reference= subset(seurat_obj, idents = receiver_reference) + seurat_obj_receiver_reference = SetIdent(seurat_obj_receiver_reference, value = seurat_obj_receiver_reference[[condition_colname]]) + seurat_obj_receiver_reference= subset(seurat_obj_receiver_reference, idents = condition_reference) + + seurat_obj_receiver = merge(seurat_obj_receiver_affected, seurat_obj_receiver_reference) + + DE_table_receiver = FindMarkers(object = seurat_obj_receiver, ident.1 = condition_oi, ident.2 = condition_reference, min.pct = expression_pct) %>% rownames_to_column("gene") + + + SeuratV4 = c("avg_log2FC") %in% colnames(DE_table_receiver) + + if(SeuratV4 == TRUE){ + if (geneset == "DE"){ + geneset_oi = DE_table_receiver %>% filter(p_val_adj <= 0.05 & abs(avg_log2FC) >= lfc_cutoff) %>% pull(gene) + } else if (geneset == "up") { + geneset_oi = DE_table_receiver %>% filter(p_val_adj <= 0.05 & avg_log2FC >= lfc_cutoff) %>% pull(gene) + } else if (geneset == "down") { + geneset_oi = DE_table_receiver %>% filter(p_val_adj <= 0.05 & avg_log2FC <= lfc_cutoff) %>% pull(gene) + } + } else { + if (geneset == "DE"){ + geneset_oi = DE_table_receiver %>% filter(p_val_adj <= 0.05 & abs(avg_logFC) >= lfc_cutoff) %>% pull(gene) + } else if (geneset == "up") { + geneset_oi = DE_table_receiver %>% filter(p_val_adj <= 0.05 & avg_logFC >= lfc_cutoff) %>% pull(gene) + } else if (geneset == "down") { + geneset_oi = DE_table_receiver %>% filter(p_val_adj <= 0.05 & avg_logFC <= lfc_cutoff) %>% pull(gene) + } + } + + geneset_oi = geneset_oi %>% .[. %in% rownames(ligand_target_matrix)] + if (length(geneset_oi) == 0){ + stop("No genes were differentially expressed") + } + background_expressed_genes = expressed_genes_receiver %>% .[. %in% rownames(ligand_target_matrix)] + + # step3 nichenet analysis: define potential ligands + expressed_ligands = intersect(ligands,expressed_genes_sender) + expressed_receptors = intersect(receptors,expressed_genes_receiver) + if (length(expressed_ligands) == 0){ + stop("No ligands expressed in sender cell") + } + if (length(expressed_receptors) == 0){ + stop("No receptors expressed in receiver cell") + } + potential_ligands = lr_network %>% filter(from %in% expressed_ligands & to %in% expressed_receptors) %>% pull(from) %>% unique() + if (length(potential_ligands) == 0){ + stop("No potentially active ligands") + } + + if (verbose == TRUE){print("Perform NicheNet ligand activity analysis")} + + # step4 perform NicheNet's ligand activity analysis + ligand_activities = predict_ligand_activities(geneset = geneset_oi, background_expressed_genes = background_expressed_genes, ligand_target_matrix = ligand_target_matrix, potential_ligands = potential_ligands) + ligand_activities = ligand_activities %>% + arrange(-aupr_corrected) %>% + mutate(rank = rank(desc(aupr_corrected))) + + if(filter_top_ligands == TRUE){ + best_upstream_ligands = ligand_activities %>% top_n(top_n_ligands, aupr_corrected) %>% arrange(-aupr_corrected) %>% pull(test_ligand) %>% unique() + } else { + best_upstream_ligands = ligand_activities %>% arrange(-aupr_corrected) %>% pull(test_ligand) %>% unique() + } + if (verbose == TRUE){print("Infer active target genes of the prioritized ligands")} + + # step5 infer target genes of the top-ranked ligands + active_ligand_target_links_df = best_upstream_ligands %>% lapply(get_weighted_ligand_target_links,geneset = geneset_oi, ligand_target_matrix = ligand_target_matrix, n = top_n_targets) %>% bind_rows() %>% drop_na() + + if(nrow(active_ligand_target_links_df) > 0){ + active_ligand_target_links = prepare_ligand_target_visualization(ligand_target_df = active_ligand_target_links_df, ligand_target_matrix = ligand_target_matrix, cutoff = cutoff_visualization) + order_ligands = intersect(best_upstream_ligands, colnames(active_ligand_target_links)) %>% rev() %>% make.names() + order_targets = active_ligand_target_links_df$target %>% unique() %>% intersect(rownames(active_ligand_target_links)) %>% make.names() + rownames(active_ligand_target_links) = rownames(active_ligand_target_links) %>% make.names() + colnames(active_ligand_target_links) = colnames(active_ligand_target_links) %>% make.names() + + order_targets = order_targets %>% intersect(rownames(active_ligand_target_links)) + order_ligands = order_ligands %>% intersect(colnames(active_ligand_target_links)) + + vis_ligand_target = active_ligand_target_links[order_targets,order_ligands] %>% t() + p_ligand_target_network = vis_ligand_target %>% make_heatmap_ggplot("Prioritized ligands","Predicted target genes", color = "purple",legend_position = "top", x_axis_position = "top",legend_title = "Regulatory potential") + theme(axis.text.x = element_text(face = "italic")) #+ scale_fill_gradient2(low = "whitesmoke", high = "purple", breaks = c(0,0.006,0.012)) + } else { + vis_ligand_target = NULL + p_ligand_target_network = NULL + print("no highly likely active targets found for top ligands") + } + # combined heatmap: overlay ligand activities + ligand_aupr_matrix = ligand_activities %>% select(aupr_corrected) %>% as.matrix() %>% magrittr::set_rownames(ligand_activities$test_ligand) + + rownames(ligand_aupr_matrix) = rownames(ligand_aupr_matrix) %>% make.names() + colnames(ligand_aupr_matrix) = colnames(ligand_aupr_matrix) %>% make.names() + + vis_ligand_aupr = ligand_aupr_matrix[order_ligands, ] %>% as.matrix(ncol = 1) %>% magrittr::set_colnames("AUPR") + p_ligand_aupr = vis_ligand_aupr %>% make_heatmap_ggplot("Prioritized ligands","Ligand activity", color = "darkorange",legend_position = "top", x_axis_position = "top", legend_title = "AUPR\n(target gene prediction ability)") + theme(legend.text = element_text(size = 9)) + p_ligand_aupr + + figures_without_legend = cowplot::plot_grid( + p_ligand_aupr + theme(legend.position = "none", axis.ticks = element_blank()) + theme(axis.title.x = element_text()), + p_ligand_target_network + theme(legend.position = "none", axis.ticks = element_blank()) + ylab(""), + align = "hv", + nrow = 1, + rel_widths = c(ncol(vis_ligand_aupr)+10, ncol(vis_ligand_target))) + legends = cowplot::plot_grid( + ggpubr::as_ggplot(ggpubr::get_legend(p_ligand_aupr)), + ggpubr::as_ggplot(ggpubr::get_legend(p_ligand_target_network)), + nrow = 1, + align = "h") + + combined_plot = cowplot::plot_grid(figures_without_legend, + legends, + rel_heights = c(10,2), nrow = 2, align = "hv") + + # ligand-receptor plot + # get the ligand-receptor network of the top-ranked ligands + if (verbose == TRUE){print("Infer receptors of the prioritized ligands")} + + lr_network_top = lr_network %>% filter(from %in% best_upstream_ligands & to %in% expressed_receptors) %>% distinct(from,to) + best_upstream_receptors = lr_network_top %>% pull(to) %>% unique() + + lr_network_top_df_large = weighted_networks_lr %>% filter(from %in% best_upstream_ligands & to %in% best_upstream_receptors) + + lr_network_top_df = lr_network_top_df_large %>% spread("from","weight",fill = 0) + lr_network_top_matrix = lr_network_top_df %>% select(-to) %>% as.matrix() %>% magrittr::set_rownames(lr_network_top_df$to) + + if (nrow(lr_network_top_matrix) > 1){ + dist_receptors = dist(lr_network_top_matrix, method = "binary") + hclust_receptors = hclust(dist_receptors, method = "ward.D2") + order_receptors = hclust_receptors$labels[hclust_receptors$order] + } else { + order_receptors = rownames(lr_network_top_matrix) + } + if (ncol(lr_network_top_matrix) > 1) { + dist_ligands = dist(lr_network_top_matrix %>% t(), method = "binary") + hclust_ligands = hclust(dist_ligands, method = "ward.D2") + order_ligands_receptor = hclust_ligands$labels[hclust_ligands$order] + } else { + order_ligands_receptor = colnames(lr_network_top_matrix) + } + + order_receptors = order_receptors %>% intersect(rownames(lr_network_top_matrix)) + order_ligands_receptor = order_ligands_receptor %>% intersect(colnames(lr_network_top_matrix)) + + vis_ligand_receptor_network = lr_network_top_matrix[order_receptors, order_ligands_receptor] + dim(vis_ligand_receptor_network) = c(length(order_receptors), length(order_ligands_receptor)) + + rownames(vis_ligand_receptor_network) = order_receptors %>% make.names() + colnames(vis_ligand_receptor_network) = order_ligands_receptor %>% make.names() + + p_ligand_receptor_network = vis_ligand_receptor_network %>% t() %>% make_heatmap_ggplot("Ligands","Receptors", color = "mediumvioletred", x_axis_position = "top",legend_title = "Prior interaction potential") + + # ligand expression Seurat dotplot + if (length(sender) > 1){ + are_there_senders = TRUE + } + if(length(sender) == 1){ + if(sender != "undefined"){ + are_there_senders = TRUE + } else { + are_there_senders = FALSE + } + } + + if (are_there_senders == TRUE){ + real_makenames_conversion = lr_network$from %>% unique() %>% magrittr::set_names(lr_network$from %>% unique() %>% make.names()) + order_ligands_adapted = real_makenames_conversion[order_ligands] + names(order_ligands_adapted) = NULL + rotated_dotplot = DotPlot(seurat_obj %>% subset(idents = sender_celltypes), features = order_ligands_adapted, cols = "RdYlBu") + coord_flip() + theme(legend.text = element_text(size = 10), legend.title = element_text(size = 12)) # flip of coordinates necessary because we want to show ligands in the rows when combining all plots + + } else { + rotated_dotplot = NULL + } + return(list( + ligand_activities = ligand_activities, + top_ligands = best_upstream_ligands, + top_targets = active_ligand_target_links_df$target %>% unique(), + top_receptors = lr_network_top_df_large$to %>% unique(), + ligand_target_matrix = vis_ligand_target, + ligand_target_heatmap = p_ligand_target_network, + ligand_target_df = active_ligand_target_links_df, + ligand_expression_dotplot = rotated_dotplot, + ligand_activity_target_heatmap = combined_plot, + ligand_receptor_matrix = vis_ligand_receptor_network, + ligand_receptor_heatmap = p_ligand_receptor_network, + ligand_receptor_df = lr_network_top_df_large %>% rename(ligand = from, receptor = to), + geneset_oi = geneset_oi, + background_expressed_genes = background_expressed_genes + )) +} +#' @title Get log fold change values of genes in cell type of interest +#' +#' @description \code{get_lfc_celltype} Get log fold change of genes between two conditions in cell type of interest when using a Seurat single-cell object. +#' +#' @usage +#' get_lfc_celltype(celltype_oi, seurat_obj, condition_colname, condition_oi, condition_reference, celltype_col = "celltype", expression_pct = 0.10) +#' #' +#' @param seurat_obj Single-cell expression dataset as Seurat object https://satijalab.org/seurat/. +#' @param celltype_oi Name of celltype of interest. Should be present in the celltype metadata dataframe. +#' @param condition_colname Name of the column in the meta data dataframe that indicates which condition/sample cells were coming from. +#' @param condition_oi Condition of interest. Should be a name present in the "condition_colname" column of the metadata. +#' @param condition_reference The second condition (e.g. reference or steady-state condition). Should be a name present in the "condition_colname" column of the metadata. +#' @param celltype_col Metadata colum name where the cell type identifier is stored. Default: "celltype". If this is NULL, the Idents() of the seurat object will be considered as your cell type identifier. +#' @param expression_pct To consider only genes if they are expressed in at least a specific fraction of cells of a cluster. This number indicates this fraction. Default: 0.10 +#' +#' @return A tbl with the log fold change values of genes. Positive lfc values: higher in condition_oi compared to condition_reference. +#' +#' @import Seurat +#' @import dplyr +#' +#' @examples +#' \dontrun{ +#' requireNamespace("dplyr") +#' seuratObj = readRDS(url("https://zenodo.org/record/3531889/files/seuratObj_test.rds")) +#' get_lfc_celltype(seurat_obj = seuratObj, celltype_oi = "CD8 T", condition_colname = "aggregate", condition_oi = "LCMV", condition_reference = "SS", celltype_col = "celltype", expression_pct = 0.10) +#' } +#' @export +#' +get_lfc_celltype = function(celltype_oi, seurat_obj, condition_colname, condition_oi, condition_reference, celltype_col = "celltype", expression_pct = 0.10){ + requireNamespace("Seurat") + requireNamespace("dplyr") + if(!is.null(celltype_col)){ + seurat_obj_celltype = SetIdent(seurat_obj, value = seurat_obj[[celltype_col]]) + seuratObj_sender = subset(seurat_obj_celltype, idents = celltype_oi) + + } else { + seuratObj_sender = subset(seurat_obj, idents = celltype_oi) + + } + seuratObj_sender = SetIdent(seuratObj_sender, value = seuratObj_sender[[condition_colname]]) + DE_table_sender = FindMarkers(object = seuratObj_sender, ident.1 = condition_oi, ident.2 = condition_reference, min.pct = expression_pct, logfc.threshold = 0.05) %>% rownames_to_column("gene") + + SeuratV4 = c("avg_log2FC") %in% colnames(DE_table_sender) + + if(SeuratV4 == TRUE){ + DE_table_sender = DE_table_sender %>% as_tibble() %>% select(-p_val) %>% select(gene, avg_log2FC) + } else { + DE_table_sender = DE_table_sender %>% as_tibble() %>% select(-p_val) %>% select(gene, avg_logFC) + } + + colnames(DE_table_sender) = c("gene",celltype_oi) + return(DE_table_sender) +} diff --git a/R/application_visualization.R b/R/application_visualization.R index 7e8857b..6ae5eec 100644 --- a/R/application_visualization.R +++ b/R/application_visualization.R @@ -542,7 +542,200 @@ make_heatmap_bidir_lt_ggplot = function(matrix, y_name, x_name, y_axis = TRUE, x } } +#' @title Make a "mushroom plot" of ligand-receptor interactions +#' +#' @description \code{make_mushroom_plot} Make a plot in which each glyph consists of two semicircles corresponding to ligand- and receptor- information. The size of the semicircle is the percentage of cells that express the protein, while the saturation corresponds to the scaled average expression value. +#' +#' @usage +#' make_mushroom_plot(prioritization_table, top_n = 30, show_ranking = FALSE, show_all_datapoints = FALSE, true_color_range = FALSE, size = "scaled_avg_exprs", color = "scaled_lfc", +#' ligand_fill_colors = c("#DEEBF7", "#08306B"), receptor_fill_colors = c("#FEE0D2", "#A50F15"), +#' unranked_ligand_fill_colors = c(alpha("#FFFFFF", alpha=0.2), alpha("#252525", alpha=0.2)), unranked_receptor_fill_colors = c( alpha("#FFFFFF", alpha=0.2), alpha("#252525", alpha=0.2))) +#' +#' @param prioritization_table A prioritization table as generated by \code{\link{generate_prioritization_tables}} +#' @param top_n An integer indicating how many ligand-receptor pairs to show +#' @param show_ranking A logical indicating whether to show the ranking of the ligand-receptor pairs (default: FALSE) +#' @param show_all_datapoints A logical indicating whether to show all ligand-receptor pairs (default: FALSE, if true they will be grayed out) +#' @param true_color_range A logical indicating whether to use the true color range for the ligand-receptor pairs (default: FALSE; range 0-1 is used) +#' @param size A string indicating which column to use for the size of the semicircles (default: "scaled_avg_exprs"; use column name without "_ligand" or "_receptor" suffix) +#' @param color A string indicating which column to use for the color of the semicircles (default: "scaled_lfc"; use column name without "_ligand" or "_receptor" suffix) +#' @param ligand_fill_colors A vector of the low and high colors to use for the ligand semicircle fill gradient (default: c("#DEEBF7", "#08306B")) +#' @param receptor_fill_colors A vector of the low and high colors to use for the receptor semicircle fill gradient (default: c("#FEE0D2", "#A50F15")) +#' @param unranked_ligand_fill_colors A vector of the low and high colors to use for the unranked ligands when show_all_datapoints is TRUE (default: c(alpha("#FFFFFF", alpha=0.2), alpha("#252525", alpha=0.2))) +#' @param unranked_receptor_fill_colors A vector of the low and high colors to use for the unkraed receptors when show_all_datapoints is TRUE (default: c(alpha("#FFFFFF", alpha=0.2), alpha("#252525", alpha=0.2))) +#' +#' @return A ggplot object +#' +#' @import ggplot2 +#' @import ggforce +#' @import ggnewscale +#' @import shadowtext +#' @import cowplot +#' +#' @examples +#' \dontrun{ +#' # Create a prioritization table +#' prior_table <- generate_prioritization_tables(processed_expr_table, processed_DE_table, ligand_activities, processed_condition_markers, prioritizing_weights) +#' make_mushroom_plot(prior_table) +#' +#' # Show only top 20, and write rankings on the plot +#' make_mushroom_plot(prior_table, top_n = 20, show_ranking = TRUE) +#' +#' # Show all datapoints, and use true color range +#' make_mushroom_plot(prior_table, show_all_datapoints = TRUE, true_color_range = TRUE) +#' +#' # Change the size and color columns +#' make_mushroom_plot(prior_table, size = "pct_expressed", color = "scaled_avg_exprs") +#' } +#' @export +#' +make_mushroom_plot = function(prioritization_table, top_n = 30, show_rankings = FALSE, + show_all_datapoints = FALSE, true_color_range = FALSE, + size = "scaled_avg_exprs", color = "scaled_lfc", + ligand_fill_colors = c("#DEEBF7", "#08306B"), + receptor_fill_colors = c("#FEE0D2", "#A50F15"), + unranked_ligand_fill_colors = c(alpha("#FFFFFF", alpha=0.2), alpha("#252525", alpha=0.2)), + unranked_receptor_fill_colors = c( alpha("#FFFFFF", alpha=0.2), alpha("#252525", alpha=0.2))){ + size_ext <- c("ligand", "receptor"); color_ext <- c("ligand", "receptor") + if (size == "pct_expressed") size_ext <- c("sender", "receiver") + if (color == "pct_expressed") color_ext <- c("sender", "receiver") + + cols_to_use <- c("sender", "ligand", "receptor", paste0(size, "_", size_ext), paste0(color, "_", color_ext)) + + if (!all(cols_to_use %in% colnames(prioritization_table))){ + stop(paste(paste0("`", cols_to_use %>% .[!. %in% colnames(prioritization_table)], "`", collapse =", "), "column not in prioritization table")) + } + if(!is.logical(show_rankings) | length(show_rankings) != 1) + stop("show_rankings should be a TRUE or FALSE") + if(!is.logical(show_all_datapoints) | length(show_all_datapoints) != 1) + stop("show_all_datapoints should be a TRUE or FALSE") + if(!is.logical(true_color_range) | length(true_color_range) != 1) + stop("true_color_range should be a TRUE or FALSE") + if(!is.numeric(top_n) | length(top_n) != 1) + stop("top_n should be a numeric vector of length 1") + if(length(ligand_fill_colors) != 2) + stop("ligand_fill_colors should be a vector of length 2") + if(length(receptor_fill_colors) != 2) + stop("receptor_fill_colors should be a vector of length 2") + if(length(unranked_ligand_fill_colors) != 2) + stop("unranked_ligand_fill_colors should be a vector of length 2") + if(length(unranked_receptor_fill_colors) != 2) + stop("unranked_receptor_fill_colors should be a vector of length 2") + requireNamespace("dplyr") + requireNamespace("ggplot2") + requireNamespace("ggnewscale") + requireNamespace("ggforce") + requireNamespace("shadowtext") + requireNamespace("cowplot") + + # Filter to top_n, create a new column of ligand-receptor interactions + filtered_table <- prioritization_table %>% dplyr::mutate(prioritization_rank = rank(desc(prioritization_score))) %>% + dplyr::mutate(lr_interaction = paste(ligand, receptor, sep = " - ")) + order_interactions <- unique(filtered_table %>% filter(prioritization_rank <= top_n) %>% pull(lr_interaction)) + filtered_table <- filtered_table %>% filter(lr_interaction %in% order_interactions) %>% + mutate(lr_interaction = factor(lr_interaction, levels = rev(order_interactions))) + + celltypes_vec <- 1:length(unique(filtered_table$sender)) %>% setNames(sort(unique(filtered_table$sender))) + lr_interaction_vec <- 1:length(order_interactions) %>% setNames(order_interactions) + + # Make each ligand and receptor into separate rows (to draw 1 semicircle per row) + filtered_table <- filtered_table %>% select(c("lr_interaction", all_of(cols_to_use), "prioritization_rank")) %>% + pivot_longer(c(ligand, receptor), names_to = "type", values_to = "protein") %>% + mutate(size = ifelse(type == "ligand", get(paste0(size, "_", size_ext[1])), get(paste0(size, "_", size_ext[2]))), + color = ifelse(type == "ligand", get(paste0(color, "_", color_ext[1])), get(paste0(color, "_", color_ext[2])))) %>% + select(-contains(c("_ligand", "_receptor", "_sender", "_receiver"))) %>% + mutate(start = rep(c(-pi, 0), nrow(filtered_table))) %>% + mutate(x = celltypes_vec[sender], y = lr_interaction_vec[lr_interaction]) + + # Rename size and color columns to be more human-readable + keywords_adj <- c("LFC", "p-val", "product", "mean", "adjusted", "expression") %>% setNames(c("lfc", "pval", "prod", "avg", "adj", "exprs")) + size_title <- sapply(stringr::str_split(size, "_")[[1]], function(k) ifelse(is.na(keywords_adj[k]), k, keywords_adj[k])) %>% + paste0(., collapse = " ") %>% R.utils::capitalize() + color_title <- sapply(stringr::str_split(color, "_")[[1]], function(k) ifelse(is.na(keywords_adj[k]), k, keywords_adj[k])) %>% + paste0(., collapse = " ") %>% R.utils::capitalize() + + color_lims <- c(0,1) + if (true_color_range) color_lims <- NULL + + scale <- 0.5 + p1 <- ggplot() + + # Draw ligand semicircle + geom_arc_bar(data = filtered_table %>% filter(type=="ligand", prioritization_rank <= top_n), + aes(x0 = x, y0 = y, r0 = 0, r = sqrt(size)*scale, + start = start, end = start + pi, fill=color), + color = "white") + + scale_fill_gradient(low = ligand_fill_colors[1] , high=ligand_fill_colors[2] , + limits=color_lims, oob=scales::squish, + name=paste0(color_title, " (", color_ext[1], ")") %>% str_wrap(width=15)) + + # Create new fill scale for receptor semicircles + new_scale_fill() + + geom_arc_bar(data = filtered_table %>% filter(type=="receptor", prioritization_rank <= top_n), + aes(x0 = x, y0 = y, r0 = 0, r = sqrt(size)*scale, + start = start, end = start + pi, fill=color), + color = "white") + + scale_fill_gradient(low = receptor_fill_colors[1], high=receptor_fill_colors[2] , limits=color_lims, oob=scales::squish, + name=paste0(color_title, " (", color_ext[2], ")") %>% str_wrap(width=15)) + + # Other plot information + scale_y_continuous(breaks=length(lr_interaction_vec):1, labels=names(lr_interaction_vec)) + + scale_x_continuous(breaks=1:length(celltypes_vec), labels=names(celltypes_vec), position="top") + + xlab("Sender cell types") + ylab("Ligand-receptor interaction") + + coord_fixed() + + theme_bw() + + theme(panel.grid.major = element_blank(), + legend.box = "horizontal") + + + # Add unranked ligand and receptor semicircles if requested + if (show_all_datapoints){ + p1 <- p1 + new_scale_fill() + + geom_arc_bar(data = filtered_table %>% filter(type=="ligand", prioritization_rank > top_n), + aes(x0 = x, y0 = y, r0 = 0, r = sqrt(size)*scale, + start = start, end = start + pi, fill=color), + color = "white") + + scale_fill_gradient(low = unranked_ligand_fill_colors[1], high=unranked_ligand_fill_colors[2], + limits=color_lims, oob=scales::squish, guide = "none") + + new_scale_fill() + + geom_arc_bar(data = filtered_table %>% filter(type=="receptor", prioritization_rank > top_n), + aes(x0 = x, y0 = y, r0 = 0, r = sqrt(size)*scale, + start = start, end = start + pi, fill=color), + color = "white") + + scale_fill_gradient(low=unranked_receptor_fill_colors[1], high=unranked_receptor_fill_colors[2], + limits=color_lims, oob=scales::squish, guide = "none") + } + + # Add ranking numbers if requested + if (show_rankings){ + p1 <- p1 + geom_shadowtext(data = filtered_table %>% filter(prioritization_rank <= top_n), + aes(x=x, y=y, label=prioritization_rank)) + } + + legend1 <- ggpubr::as_ggplot(ggpubr::get_legend(p1)) + + # For the size legend, create a new plot + legend2 <- ggplot(data.frame(values = c(0.25, 0.5, 0.75, 1), x=1:4, y=1, start=-pi)) + + geom_rect(aes(xmin=x-0.5, xmax=x+0.5, ymin=y-0.5, ymax=y+0.5), color="gray80", fill=NA) + + geom_arc_bar(aes(x0=x, y0=y, r0=0, r=sqrt(values)*scale, start=start, end=start+pi), fill="black") + + geom_text(aes(label=values, x=x, y=y-0.6), vjust=1) + + labs(tag = size_title) + + scale_x_continuous(breaks = 1:4, labels=c(0.25, 0.5, 0.75, 1)) + + scale_y_continuous(limits=c(-0.5, 1.5)) + + labs(x="Percent expressed") + + coord_fixed() + theme_classic() + + theme(panel.background = element_blank(), + plot.background = element_blank(), + plot.margin = margin(0, 0, 10, 0), + plot.tag.position = "top", + plot.tag = element_text(margin=margin(0, 0, 5,0), size=10), + axis.text = element_blank(), + axis.line = element_blank(), + axis.ticks = element_blank(), + axis.title = element_blank()) + + # Combine the two legends + legends <- cowplot::plot_grid(NULL, legend1, legend2, NULL, nrow=4, scale=c(1,1,0.5,1), + rel_heights = c(2, 1, 2, 2), align = "v", axis="tb") + cowplot::plot_grid(p1 + theme(legend.position="none"), legends) +} diff --git a/R/differential_nichenet.R b/R/differential_nichenet.R index e92d8f1..b34c3d9 100644 --- a/R/differential_nichenet.R +++ b/R/differential_nichenet.R @@ -3,9 +3,10 @@ #' @description \code{scale_quantile_adapted} Normalize values in a vector by quantile scaling. Add a pseudovalue of 0.001 to avoid having a score of 0 for the lowest value. #' #' @usage -#' scale_quantile_adapted(x) +#' scale_quantile_adapted(x, outlier_cutoff = 0) #' #' @param x A numeric vector. +#' @param outlier_cutoff The quantile cutoff for outliers (default 0). #' #' @return A quantile-scaled numeric vector. #' @@ -16,8 +17,8 @@ #' #' @export #' -scale_quantile_adapted = function(x){ - y = scale_quantile(x,outlier_cutoff = 0) +scale_quantile_adapted = function(x, outlier_cutoff = 0){ + y = scale_quantile(x, outlier_cutoff = outlier_cutoff) y = y + 0.001 return(y) } @@ -384,7 +385,7 @@ process_niche_de = function(DE_table, niches, type, expression_pct){ #' #' @param DE_sender_processed Output of `process_niche_de` with `type = receiver` #' @param DE_receiver_processed Output of `process_niche_de` with `type = receiver` -#' @param lr_network Ligand-Receptor Network in tibble format: ligand, receptor, bonafide as columns +#' @param lr_network Ligand-Receptor Network in tibble format: ligand, receptor as columns #' @param specificity_score Defines which score will be used to prioritze ligand-receptor pairs and consider their differential expression. Default and recommended: "min_lfc". #' "min_lfc" looks at the minimal logFC of the ligand/receptor between the celltype of interest and all the other celltypes. #' Alternatives: "mean_lfc", "min_score", and "mean_score". Mean uses the average/mean instead of minimum. @@ -587,7 +588,7 @@ get_ligand_activities_targets = function(niche_geneset_list, ligand_target_matri print(paste0("Calculate Ligand activities for: ",receiver_oi)) ligand_activities = nichenetr::predict_ligand_activities(geneset = geneset_oi, background_expressed_genes = background_expressed_genes, ligand_target_matrix = ligand_target_matrix, potential_ligands = ligands) - ligand_activities = ligand_activities %>% dplyr::rename(ligand = test_ligand, activity = pearson) %>% dplyr::select(-aupr, -auroc) %>% filter(!is.na(activity)) + ligand_activities = ligand_activities %>% dplyr::rename(ligand = test_ligand, activity = aupr_corrected) %>% dplyr::select(-pearson, -auroc, -aupr) %>% filter(!is.na(activity)) ligand_target_df = ligand_activities$ligand %>% unique() %>% lapply(nichenetr::get_weighted_ligand_target_links, geneset_oi, ligand_target_matrix, top_n_target) %>% dplyr::bind_rows() %>% dplyr::rename(ligand_target_weight = weight) ligand_activities = ligand_activities %>% dplyr::inner_join(ligand_target_df, by = c("ligand")) %>% dplyr::mutate(receiver = receiver_oi) %>% dplyr::group_by(receiver) %>% dplyr::mutate(activity_normalized = nichenetr::scaling_zscore(activity)) @@ -777,8 +778,7 @@ get_non_spatial_de = function(niches, spatial_info, type, lr_network){ # "ligand_scaled_receptor_expression_fraction" = 1, # "scaled_receptor_score_spatial" = 0, # "scaled_activity" = 0, -# "scaled_activity_normalized" = 1, -# "bona_fide" = 1) +# "scaled_activity_normalized" = 1) #' #' @return A list containing a prioritization table for ligand-receptor interactions, and one for ligand-target interactions #' @@ -794,8 +794,7 @@ get_non_spatial_de = function(niches, spatial_info, type, lr_network){ # "ligand_scaled_receptor_expression_fraction" = 1, # "scaled_receptor_score_spatial" = 0, # "scaled_activity" = 0, -# "scaled_activity_normalized" = 1, -# "bona_fide" = 1) +# "scaled_activity_normalized" = 1) #' output_nichenet_analysis = list(DE_sender_receiver = DE_sender_receiver, ligand_scaled_receptor_expression_fraction_df = ligand_scaled_receptor_expression_fraction_df, sender_spatial_DE_processed = sender_spatial_DE_processed, receiver_spatial_DE_processed = receiver_spatial_DE_processed, # ligand_activities_targets = ligand_activities_targets, DE_receiver_processed_targets = DE_receiver_processed_targets, exprs_tbl_ligand = exprs_tbl_ligand, exprs_tbl_receptor = exprs_tbl_receptor, exprs_tbl_target = exprs_tbl_target) #' prioritization_tables = get_prioritization_tables(output_nichenet_analysis, prioritizing_weights) @@ -816,13 +815,12 @@ get_prioritization_tables = function(output_nichenet_analysis, prioritizing_weig # reorder the columns - combined_information = combined_information %>% mutate(ligand_receptor = paste(ligand, receptor, sep = "--")) %>% mutate(bonafide_score = 1) %>% mutate_cond(bonafide == FALSE, bonafide_score = 0.5) - + combined_information = combined_information %>% mutate(ligand_receptor = paste(ligand, receptor, sep = "--")) combined_information = combined_information %>% select( - niche, receiver, sender, ligand_receptor, ligand, receptor, bonafide, target, + niche, receiver, sender, ligand_receptor, ligand, receptor, target, ligand_score,ligand_significant, ligand_present, ligand_expression, ligand_expression_scaled, ligand_fraction, ligand_score_spatial, receptor_score, receptor_significant, receptor_present, receptor_expression, receptor_expression_scaled, receptor_fraction, receptor_score_spatial, - ligand_scaled_receptor_expression_fraction, avg_score_ligand_receptor, bonafide_score, + ligand_scaled_receptor_expression_fraction, avg_score_ligand_receptor, target_score, target_significant, target_present, target_expression, target_expression_scaled, target_fraction, ligand_target_weight, activity, activity_normalized, scaled_ligand_score, scaled_ligand_expression_scaled, scaled_receptor_score, scaled_receptor_expression_scaled, scaled_avg_score_ligand_receptor, @@ -848,11 +846,10 @@ get_prioritization_tables = function(output_nichenet_analysis, prioritizing_weig (prioritizing_weights["scaled_activity"] * scaled_activity) + (prioritizing_weights["scaled_activity_normalized"] * scaled_activity_normalized) + (prioritizing_weights["ligand_fraction"] * scaled_ligand_fraction_adapted ) + - (prioritizing_weights["receptor_fraction"] * scaled_receptor_fraction_adapted ) + - (prioritizing_weights["bona_fide"] * bonafide_score) + (prioritizing_weights["receptor_fraction"] * scaled_receptor_fraction_adapted ) )* (1/length(prioritizing_weights))) %>% dplyr::arrange(-prioritization_score) - prioritization_tbl_ligand_receptor = combined_information_prioritized %>% select(niche, receiver, sender, ligand_receptor, ligand, receptor, bonafide, + prioritization_tbl_ligand_receptor = combined_information_prioritized %>% select(niche, receiver, sender, ligand_receptor, ligand, receptor, ligand_score,ligand_significant, ligand_present, ligand_expression, ligand_expression_scaled, ligand_fraction, ligand_score_spatial, receptor_score, receptor_significant, receptor_present, receptor_expression, receptor_expression_scaled, receptor_fraction, receptor_score_spatial, ligand_scaled_receptor_expression_fraction, avg_score_ligand_receptor, @@ -862,7 +859,7 @@ get_prioritization_tables = function(output_nichenet_analysis, prioritizing_weig scaled_ligand_fraction_adapted, scaled_receptor_fraction_adapted, scaled_activity, scaled_activity_normalized, prioritization_score) %>% distinct() - prioritization_tbl_ligand_target = combined_information_prioritized %>% select(niche, receiver, sender, ligand_receptor, ligand, receptor, bonafide, target, + prioritization_tbl_ligand_target = combined_information_prioritized %>% select(niche, receiver, sender, ligand_receptor, ligand, receptor, target, target_score, target_significant, target_present, target_expression, target_expression_scaled, target_fraction, ligand_target_weight, activity, activity_normalized, scaled_activity, scaled_activity_normalized, prioritization_score) %>% distinct() diff --git a/R/prioritization.R b/R/prioritization.R new file mode 100644 index 0000000..aed608a --- /dev/null +++ b/R/prioritization.R @@ -0,0 +1,423 @@ +check_names <- function(column, seurat_obj = NULL){ + # If seurat_obj is NA + if (is.null(seurat_obj)) { + if (column != make.names(column)) { + stop(paste0("'", column, "' is not a syntactically valid R name - check make.names")) + } + } else { + if (!all(unique(seurat_obj[[column, drop=TRUE]]) == make.names(unique(seurat_obj[[column, drop=TRUE]])))){ + stop(paste0("'", column, "' column should have syntactically valid R names - see make.names")) + } + } +} + +#' @title Calculate differential expression of one cell type versus all other cell types +#' +#' @description \code{calculate_de} Calculate differential expression of one cell type versus all other cell types using Seurat::FindAllMarkers. If condition_oi is provided, only consider cells from that condition. +#' +#' @usage +#' calculate_de(seurat_obj, celltype_colname, condition_oi = NA, condition_colname = NA, assay_oi = "RNA", ...) +#' +#' @param seurat_obj Seurat object +#' @param celltype_colname Name of the meta data column that indicates the cell type of a cell +#' @param condition_oi If provided, subset seurat_obj so DE is only calculated for cells belonging to condition_oi +#' @param condition_colname Name of the meta data column that indicates from which group/condition a cell comes from +#' @param assay_oi Which assay need to be used for DE calculation. Default RNA, alternatives: SCT. +#' @param ... Arguments passed to Seurat::FindAllMarkers(by default: features = NULL, min.pct = 0, logfc.threshold = 0, return.thresh = 1) +#' +#' @return A dataframe containing the DE results +#' +#' @examples +#' \dontrun{ +#' seurat_obj = readRDS(url("https://zenodo.org/record/3531889/files/seuratObj.rds")) +#' seurat_obj$celltype <- make.names(seurat_obj$celltype) +#' # Calculate cell-type specific markers across conditions +#' calculate_de(seurat_obj, "celltype") +#' # Calculate LCMV-specific cell-type markers +#' calculate_de(seurat_obj, "celltype", condition_oi = "LCMV", condition_colname = "aggregate") +#' } +#' +#' @export +#' +calculate_de = function(seurat_obj, celltype_colname, + condition_oi = NA, condition_colname = NA, + assay_oi = "RNA", + ...){ + + # Default settings to return all genes with their p-val and LFC + FindAllMarkers_args = list(object = seurat_obj, assay = assay_oi, + features = NULL, min.pct = 0, logfc.threshold = 0, return.thresh = 1) + + # Replace this with user arguments + FindAllMarkers_args[names(list(...))] = list(...) + + if (any(!is.na(condition_colname), !is.na(condition_oi)) & !all(!is.na(condition_colname), !is.na(condition_oi))){ + stop("Please input both condition_colname and condition_oi") + } + + # Check names + sapply(c(celltype_colname, condition_oi, condition_colname) %>% .[!is.na(.)], check_names) + sapply(celltype_colname, check_names, seurat_obj) + + # Subset seurat obj to condition of interest + if (!is.na(condition_oi)) { + seurat_obj = seurat_obj[,seurat_obj[[condition_colname]] == condition_oi] + } + + # Set celltype as identity class + Idents(seurat_obj) <- seuratObj[[celltype_colname, drop=TRUE]] + + DE_table = do.call(FindAllMarkers, FindAllMarkers_args) %>% + rename(cluster_id = cluster) + + SeuratV4 = c("avg_log2FC") %in% colnames(DE_table) + if(!SeuratV4){ + DE_table = DE_table %>% dplyr::rename(avg_log2FC = avg_logFC) + } + + return(DE_table) + +} +#' @title Calculate average of gene expression per cell type. +#' +#' @description \code{get_exprs_avg} Calculate average of gene expression per cell type. If condition_oi is provided, only consider cells from that condition. +#' @usage +#' get_exprs_avg(seurat_obj, celltype_colname, condition_oi = NA, condition_colname = NA) +#' +#' @inheritParams calculate_de +#' @param condition_oi If provided, subset seurat_obj so average expression is only calculated for cells belonging to condition_oi +#' +#' @return Data frame with average gene expression per cell type. +#' +#' @import dplyr +#' @import tibble +#' @import tidyr +#' +#' @examples +#' \dontrun{ +#' seurat_obj = readRDS(url("https://zenodo.org/record/3531889/files/seuratObj.rds")) +#' seurat_obj$celltype <- make.names(seuratObj$celltype) +#' # Calculate average expression across conditions +#' expression_info = get_exprs_avg(seurat_obj, "celltype") +#' # Calculate LCMV-specific average expression +#' expression_info = get_exprs_avg(seurat_obj, "celltype", condition_oi = "LCMV", condition_colname = "aggregate") +#' } +#' +#' @export +#' +get_exprs_avg = function(seurat_obj, celltype_colname, + condition_oi = NA, condition_colname = NA){ + + requireNamespace("dplyr") + + if (any(!is.na(condition_colname), !is.na(condition_oi)) & !all(!is.na(condition_colname), !is.na(condition_oi))){ + stop("Please input both condition_colname and condition_oi") + } + + # Check names + sapply(c(celltype_colname, condition_oi, condition_colname) %>% .[!is.na(.)], check_names) + sapply(celltype_colname, check_names, seurat_obj) + + # Subset seurat object + if (!is.na(condition_oi)) { + seurat_obj = seurat_obj[,seurat_obj[[condition_colname]] == condition_oi] + } + + seurat_obj <- NormalizeData(seurat_obj, verbose = FALSE) + avg_celltype <- AverageExpression(seurat_obj, assays = "RNA", slot = "data", group.by = celltype_colname) %>% + .$RNA %>% data.frame() %>% rownames_to_column("gene") %>% + pivot_longer(!gene, names_to = "cluster_id", values_to = "avg_expr") + + return (avg_celltype) + + + +} +#' @title Process DE or expression information into intercellular communication focused information. +#' +#' @description \code{process_table_to_ic} First, only keep information of ligands for senders_oi, and information of receptors for receivers_oi. +#' Then, combine information for senders and receivers by linking ligands to receptors based on the prior knowledge ligand-receptor network. +#' @usage process_table_to_ic(table_object, table_type = "expression", lr_network, senders_oi = NULL, receivers_oi = NULL) +#' @param table_object Output of `get_exprs_avg`, `calculate_de`, or `FindMarkers` +#' @param table_type "expression", "celltype_DE", or "group_DE": indicates whether the table contains expression, celltype markers, or condition-specific information +#' @param lr_network Prior knowledge Ligand-Receptor network (columns: ligand, receptor) +#' @param senders_oi Default NULL: all celltypes will be considered as senders. If you want to select specific senders of interest: you can add this here as character vector. +#' @param receivers_oi Default NULL: all celltypes will be considered as receivers If you want to select specific receivers of interest: you can add this here as character vector. +#' @return Dataframe combining sender and receiver information linked to each other through joining by the ligand-receptor network. +#' +#' @import dplyr +#' +#' @examples +#' \dontrun{ +#' library(dplyr) +#' lr_network = readRDS(url("https://zenodo.org/record/3260758/files/lr_network.rds")) +#' lr_network = lr_network %>% dplyr::rename(ligand = from, receptor = to) %>% dplyr::distinct(ligand, receptor) +#' seurat_obj = readRDS(url("https://zenodo.org/record/3531889/files/seuratObj.rds")) +#' seurat_obj$celltype <- make.names(seuratObj$celltype) +#' # Calculate LCMV-specific average expression +#' expression_info = get_exprs_avg(seurat_obj, "celltype", condition_oi = "LCMV", condition_colname = "aggregate") +#' # Calculate LCMV-specific cell-type markers +#' DE_table = calculate_de(seurat_obj, "celltype", condition_oi = "LCMV", condition_colname = "aggregate") +#' # Calculate LCMV-specific genes across cell types +#' condition_markers <- FindMarkers(object = seuratObj, ident.1 = "LCMV", ident.2 = "SS", +#' group.by = "aggregate", min.pct = 0, logfc.threshold = 0) %>% rownames_to_column("gene") +#' processed_expr_info = process_table_to_ic(expression_info, table_type = "expression", lr_network) +#' processed_DE_table <- process_table_to_ic(DE_table, table_type = "celltype_DE", lr_network, +#' senders_oi = c("CD4.T", "Treg", "Mono", "NK", "B", "DC"), receivers_oi = "CD8.T") +#' processed_condition_markers <- process_table_to_ic(condition_markers, table_type = "condition_DE", lr_network) +#' } +#' +#' @export +#' +process_table_to_ic = function(table_object, table_type = "expression", + lr_network, senders_oi = NULL, receivers_oi = NULL){ + + ligands = lr_network %>% dplyr::pull(ligand) %>% unique() + receptors = lr_network %>% dplyr::pull(receptor) %>% unique() + + if (table_type == "expression"){ + if (!is.null(senders_oi)) warning("senders_oi is given. The expression data will be scaled with all remaining cell types, so it is recommended that senders_oi = NULL") + if (!is.null(receivers_oi)) warning("receivers_oi is given. The expression data will be scaled with all remaining cell types, so it is recommended that receivers_oi = NULL") + + sender_table <- table_object %>% dplyr::rename(sender = cluster_id, ligand = gene, avg_ligand = avg_expr) + receiver_table <- table_object %>% dplyr::rename(receiver = cluster_id, receptor = gene, avg_receptor = avg_expr) + columns_select <- c("sender", "receiver", "ligand", "receptor", "avg_ligand", "avg_receptor", "ligand_receptor_prod") + + } else if (table_type == "celltype_DE"){ + if (is.null(senders_oi)) warning("senders_oi is NULL For DE filtering, it is best if this parameter is given.") + if (is.null(receivers_oi)) warning("receivers_oi is NULL For DE filtering, it is best if this parameter is given.") + + sender_table <- table_object %>% dplyr::rename(sender = cluster_id, ligand = gene, avg_ligand = avg_log2FC, p_val_ligand = p_val, p_adj_ligand = p_val_adj, pct_expressed_sender = pct.1) + receiver_table <- table_object %>% dplyr::rename(receiver = cluster_id, receptor = gene, avg_receptor = avg_log2FC, p_val_receptor = p_val, p_adj_receptor = p_val_adj, pct_expressed_receiver = pct.1) + columns_select <- c("sender", "receiver", "ligand", "receptor", "lfc_ligand", "lfc_receptor", "ligand_receptor_lfc_avg", "p_val_ligand", "p_adj_ligand", "p_val_receptor", "p_adj_receptor", "pct_expressed_sender", "pct_expressed_receiver") + + } else if (table_type == "group_DE") { + if (!is.null(senders_oi)) stop("senders_oi is given. Since we do not consider cell type specificity, please change this to NULL") + if (!is.null(receivers_oi)) stop("receivers_oi is given. Since we do not consider cell type specificity, please change this to NULL") + + sender_table = table_object %>% dplyr::rename(ligand = gene, avg_ligand = avg_log2FC, p_val_ligand = p_val, p_adj_ligand = p_val_adj) + receiver_table = table_object %>% dplyr::rename(receptor = gene, avg_receptor = avg_log2FC, p_val_receptor = p_val, p_adj_receptor = p_val_adj) + columns_select <- c("ligand", "receptor", "lfc_ligand", "lfc_receptor", "ligand_receptor_lfc_avg", "p_val_ligand", "p_adj_ligand", "p_val_receptor", "p_adj_receptor") + + } + + # Filter senders and receivers if it is not NA + sender_table <- sender_table %>% {if (!is.null(senders_oi)) filter(., sender %in% senders_oi) else (.)} + receiver_table <- receiver_table %>% {if (!is.null(receivers_oi)) filter(., receiver %in% receivers_oi) else (.)} + + # Join sender-ligand-receptor-receiver + sender_receiver_table <- sender_table %>% dplyr::inner_join(lr_network, by = "ligand") %>% + dplyr::inner_join(receiver_table, by = "receptor") + + # Calculate average expression + sender_receiver_table <- sender_receiver_table %>% + mutate(ligand_receptor_avg = case_when( + table_type == "expression" ~ avg_ligand * avg_receptor, + grepl("DE$", table_type) ~ (avg_ligand + avg_ligand)/2 + ) + ) %>% arrange(-ligand_receptor_avg) %>% + # Rename columns appropriately + {if (table_type == "expression") rename(., "ligand_receptor_prod" = "ligand_receptor_avg") + else rename(., "ligand_receptor_lfc_avg" = "ligand_receptor_avg", "lfc_ligand" = "avg_ligand", "lfc_receptor" = "avg_receptor")} %>% + select(all_of(columns_select)) %>% dplyr::distinct() + + return(sender_receiver_table) + +} + + +#' @title generate_prioritization_tables +#' +#' @description \code{generate_prioritization_tables} Perform a prioritization of cell-cell interactions (similar to MultiNicheNet). +#' User can choose the importance attached to each of the following prioritization criteria: differential expression of ligand and receptor, cell-type specificity of expression of ligand and receptor, NicheNet ligand activity +#' @usage generate_prioritization_tables(sender_receiver_info, sender_receiver_de, ligand_activities, lr_condition_de = NULL, +#' prioritizing_weights = c("de_ligand" = 1,"de_receptor" = 1,"activity_scaled" = 2, +#' "exprs_ligand" = 2,"exprs_receptor" = 2, +#' "ligand_condition_specificity" = 0, "receptor_condition_specificity"=0)) +#' +#' @param sender_receiver_info Output of `get_exprs_avg` -> `process_table_to_ic` +#' @param sender_receiver_de Output of`calculate_de` -> `process_table_to_ic` +#' @param ligand_activities Output of `predict_ligand_activities` +#' @param lr_condition_de Output of `FindMarkers` -> `process_table_to_ic` +#' @param prioritizing_weights Named vector indicating the relative weights of each prioritization criterion +#' +#' @return Data frames of prioritized sender-ligand-receiver-receptor interactions. +#' +#' @import dplyr +#' +#' @examples +#' \dontrun{ +#' library(dplyr) +#' lr_network = readRDS(url("https://zenodo.org/record/3260758/files/lr_network.rds")) +#' lr_network = lr_network %>% dplyr::rename(ligand = from, receptor = to) %>% dplyr::distinct(ligand, receptor) +#' ligand_target_matrix = readRDS(url("https://zenodo.org/record/3260758/files/ligand_target_matrix.rds")) +#' seurat_obj = readRDS(url("https://zenodo.org/record/3531889/files/seuratObj.rds")) +#' seurat_obj$celltype <- make.names(seuratObj$celltype) +#' sender_celltypes = c("CD4.T","Treg", "Mono", "NK", "B", "DC") +#' receiver = "CD8.T" +#' +#' # Convert lr_network from mouse to human +#' lr_network = lr_network %>% mutate(from = convert_human_to_mouse_symbols(from), to = convert_human_to_mouse_symbols(to)) %>% drop_na() +#' colnames(ligand_target_matrix) = ligand_target_matrix %>% colnames() %>% convert_human_to_mouse_symbols() +#' rownames(ligand_target_matrix) = ligand_target_matrix %>% rownames() %>% convert_human_to_mouse_symbols() +#' ligand_target_matrix = ligand_target_matrix %>% .[!is.na(rownames(ligand_target_matrix)), !is.na(colnames(ligand_target_matrix))] +#' +#' # Ligand activity analysis +#' seurat_obj_receiver = subset(seurat_obj, idents = receiver) %>% SetIdent(value = .[["aggregate"]]) +#' geneset_oi = FindMarkers(object = seurat_obj_receiver, ident.1 = "LCMV, ident.2 = "SS, min.pct = 0.10) %>% rownames_to_column("gene") %>% +#' filter(p_val_adj <= 0.05 & abs(avg_log2FC) >= 0.25) %>% pull(gene) %>% .[. %in% rownames(ligand_target_matrix)] +#' expressed_genes_sender = sender_celltypes %>% unique() %>% lapply(get_expressed_genes, seurat_obj, 0.10) %>% unlist() %>% unique() +#' expressed_genes_receiver = get_expressed_genes(receiver, seurat_obj, pct = 0.10) +#' expressed_ligands = intersect(lr_network %>% pull(ligand) %>% unique(), expressed_genes_sender) +#' expressed_receptors = intersect(lr_network %>% pull(receiver) %>% unique(), expressed_genes_receiver) +#' potential_ligands = lr_network %>% filter(ligand %in% expressed_ligands & receptor %in% expressed_receptors) %>% pull(from) %>% unique() +#' ligand_activities = predict_ligand_activities(geneset = geneset_oi, background_expressed_genes = expressed_genes_receiver %>% .[. %in% rownames(ligand_target_matrix)], +#' ligand_target_matrix = ligand_target_matrix, potential_ligands = potential_ligands) +#' +#' # Calculate LCMV-specific average expression +#' expression_info = get_exprs_avg(seurat_obj, "celltype", condition_oi = "LCMV", condition_colname = "aggregate") +#' +#' # Calculate LCMV-specific cell-type markers +#' DE_table = calculate_de(seurat_obj, "celltype", condition_oi = "LCMV", condition_colname = "aggregate") +#' +#' # Calculate condition-specific markers +#' condition_markers <- FindMarkers(object = seuratObj, ident.1 = "LCMV", ident.2 = "SS", +#' group.by = "aggregate", min.pct = 0, logfc.threshold = 0) %>% rownames_to_column("gene") +#' +#' # Process tables +#' processed_expr_info = process_table_to_ic(expression_info, table_type = "expression", lr_network) +#' processed_DE_table <- process_table_to_ic(DE_table, table_type = "celltype_DE", lr_network, +#' senders_oi = sender_celltypes, receivers_oi = receiver) +#' processed_condition_DE_table <- process_table_to_ic(condition_markers, table_type = "group_DE", lr_network) +#' +#' # Generate prioritization tables +#' prioritizing_weights = c("de_ligand" = 1, "de_receptor" = 1, "activity_scaled" = 2, "exprs_ligand" = 1, "exprs_receptor" = 1, "ligand_condition_specificity" = 0, "receptor_condition_specificity" = 0) +#' generate_prioritization_tables(processed_expr_info, +#' processed_DE_table, +#' ligand_activities, +#' processed_condition_DE_table, +#' prioritizing_weights) +#'} +#' @export +#' +#' +generate_prioritization_tables = function(sender_receiver_info, sender_receiver_de, ligand_activities, lr_condition_de = NULL, + prioritizing_weights = c("de_ligand" = 1,"de_receptor" = 1,"activity_scaled" = 2,"exprs_ligand" = 2,"exprs_receptor" = 2, + "ligand_condition_specificity" = 0, "receptor_condition_specificity"=0)){ + + requireNamespace("dplyr") + sender_receiver_tbl = sender_receiver_de %>% dplyr::distinct(sender, receiver) + + # Ligand DE prioritization + sender_ligand_prioritization = sender_receiver_de %>% dplyr::ungroup() %>% dplyr::select(sender, ligand, lfc_ligand, p_val_ligand) %>% dplyr::distinct() %>% + dplyr::mutate(lfc_pval_ligand = -log10(p_val_ligand)*lfc_ligand, + p_val_ligand_adapted = -log10(p_val_ligand)*sign(lfc_ligand)) + sender_ligand_prioritization = sender_ligand_prioritization %>% dplyr::mutate(scaled_lfc_ligand = rank(lfc_ligand, ties.method = "average", na.last = FALSE)/max(rank(lfc_ligand, ties.method = "average", na.last = FALSE)), + scaled_p_val_ligand = rank(desc(p_val_ligand), ties.method = "average", na.last = FALSE)/max(rank(desc(p_val_ligand), ties.method = "average", na.last = FALSE)), + scaled_lfc_pval_ligand = rank(lfc_pval_ligand, ties.method = "average", na.last = FALSE)/max(rank(lfc_pval_ligand, ties.method = "average", na.last = FALSE)), + scaled_p_val_ligand_adapted = rank(p_val_ligand_adapted, ties.method = "average", na.last = FALSE)/max(rank(p_val_ligand_adapted, ties.method = "average", na.last = FALSE))) %>% + dplyr::arrange(-lfc_pval_ligand) + + # Receptor DE prioritization + receiver_receptor_prioritization = sender_receiver_de %>% dplyr::ungroup() %>% dplyr::select(receiver, receptor, lfc_receptor, p_val_receptor) %>% dplyr::distinct() %>% + dplyr::mutate(lfc_pval_receptor = -log10(p_val_receptor)*lfc_receptor, + p_val_receptor_adapted = -log10(p_val_receptor)*sign(lfc_receptor) ) + receiver_receptor_prioritization = receiver_receptor_prioritization %>% dplyr::mutate(scaled_lfc_receptor = rank(lfc_receptor, ties.method = "average", na.last = FALSE)/max(rank(lfc_receptor, ties.method = "average", na.last = FALSE)), + scaled_p_val_receptor = rank(desc(p_val_receptor), ties.method = "average", na.last = FALSE)/max(rank(desc(p_val_receptor), ties.method = "average", na.last = FALSE)), + scaled_lfc_pval_receptor = rank(lfc_pval_receptor, ties.method = "average", na.last = FALSE)/max(rank(lfc_pval_receptor, ties.method = "average", na.last = FALSE)), + scaled_p_val_receptor_adapted = rank(p_val_receptor_adapted, ties.method = "average", na.last = FALSE)/max(rank(p_val_receptor_adapted, ties.method = "average", na.last = FALSE))) %>% dplyr::arrange(-lfc_pval_receptor) + + # Ligand activity prioritization + ligand_activity_prioritization = ligand_activities %>% select(test_ligand, pearson, rank) %>% rename(activity = pearson, ligand=test_ligand) %>% + dplyr::mutate(activity_zscore = nichenetr::scaling_zscore(activity), + scaled_activity = scale_quantile_adapted(activity, outlier_cutoff = 0.01)) %>% dplyr::arrange(-activity_zscore) + + + # Cell-type specificity of expression of ligand: per ligand: score each sender combination based on expression + ligand_celltype_specificity_prioritization = sender_receiver_info %>% dplyr::select(sender, ligand, avg_ligand) %>% dplyr::distinct() %>% dplyr::group_by(ligand) %>% + dplyr::mutate(scaled_avg_exprs_ligand = scale_quantile_adapted(avg_ligand)) %>% dplyr::arrange(-scaled_avg_exprs_ligand) + + # Cell-type specificity of expression of receptor: per receptor: score each receiver combination based on expression + receptor_celltype_specificity_prioritization = sender_receiver_info %>% dplyr::select(receiver, receptor, avg_receptor) %>% dplyr::distinct() %>% dplyr::group_by(receptor) %>% + dplyr::mutate(scaled_avg_exprs_receptor = scale_quantile_adapted(avg_receptor)) %>% dplyr::arrange(-scaled_avg_exprs_receptor) + + if (!is.null(lr_condition_de)){ + # Condition specificity of ligand + ligand_condition_prioritization = lr_condition_de %>% dplyr::ungroup() %>% dplyr::select(ligand, lfc_ligand, p_val_ligand) %>% dplyr::distinct() %>% + dplyr::mutate(lfc_pval_ligand = -log10(p_val_ligand)*lfc_ligand, + p_val_ligand_adapted = -log10(p_val_ligand)*sign(lfc_ligand)) + ligand_condition_prioritization = ligand_condition_prioritization %>% dplyr::mutate(scaled_lfc_ligand = rank(lfc_ligand, ties.method = "average", na.last = FALSE)/max(rank(lfc_ligand, ties.method = "average", na.last = FALSE)), + scaled_p_val_ligand = rank(desc(p_val_ligand), ties.method = "average", na.last = FALSE)/max(rank(desc(p_val_ligand), ties.method = "average", na.last = FALSE)), + scaled_lfc_pval_ligand = rank(lfc_pval_ligand, ties.method = "average", na.last = FALSE)/max(rank(lfc_pval_ligand, ties.method = "average", na.last = FALSE)), + scaled_p_val_ligand_adapted = rank(p_val_ligand_adapted, ties.method = "average", na.last = FALSE)/max(rank(p_val_ligand_adapted, ties.method = "average", na.last = FALSE))) %>% + dplyr::arrange(-lfc_pval_ligand) %>% rename_with(.fn = function(column_name) paste0(column_name, "_group"), .cols = -ligand) + + # Condition specificity of receptor + receptor_condition_prioritization = lr_condition_de %>% dplyr::ungroup() %>% dplyr::select(receptor, lfc_receptor, p_val_receptor) %>% dplyr::distinct() %>% + dplyr::mutate(lfc_pval_receptor = -log10(p_val_receptor)*lfc_receptor, + p_val_receptor_adapted = -log10(p_val_receptor)*sign(lfc_receptor)) + receptor_condition_prioritization = receptor_condition_prioritization %>% dplyr::mutate(scaled_lfc_receptor = rank(lfc_receptor, ties.method = "average", na.last = FALSE)/max(rank(lfc_receptor, ties.method = "average", na.last = FALSE)), + scaled_p_val_receptor = rank(desc(p_val_receptor), ties.method = "average", na.last = FALSE)/max(rank(desc(p_val_receptor), ties.method = "average", na.last = FALSE)), + scaled_lfc_pval_receptor = rank(lfc_pval_receptor, ties.method = "average", na.last = FALSE)/max(rank(lfc_pval_receptor, ties.method = "average", na.last = FALSE)), + scaled_p_val_receptor_adapted = rank(p_val_receptor_adapted, ties.method = "average", na.last = FALSE)/max(rank(p_val_receptor_adapted, ties.method = "average", na.last = FALSE))) %>% + dplyr::arrange(-lfc_pval_receptor) %>% rename_with(.fn = function(column_name) paste0(column_name, "_group"), .cols = -receptor) + + } else { + if (any(prioritizing_weights[grep("specificity", names(prioritizing_weights))] > 0)) { + stop("No lr_condition_de table given, yet the relevant weights are nonzero.\nEither set weights of 'ligand_condition_specificity' and 'receptor_condition_specificity' to zero or provide lr_condition_de.") + } + } + + + weights <- prioritizing_weights + # final group-based prioritization + group_prioritization_tbl = sender_receiver_de %>% + dplyr::inner_join(sender_receiver_info) %>% + {if (weights["de_ligand"] > 0) dplyr::inner_join(., sender_ligand_prioritization) else (.)} %>% + {if (weights["activity_scaled"] > 0) dplyr::inner_join(., ligand_activity_prioritization) else (.)} %>% + {if (weights["de_receptor"] > 0) dplyr::inner_join(., receiver_receptor_prioritization) else (.)} %>% + {if (weights["exprs_ligand"] > 0) dplyr::inner_join(., ligand_celltype_specificity_prioritization) else (.)} %>% + {if (weights["exprs_receptor"] > 0) dplyr::inner_join(., receptor_celltype_specificity_prioritization) else (.)} %>% + {if (weights["ligand_condition_specificity"] > 0) dplyr::inner_join(., ligand_condition_prioritization) else (.)} %>% + {if (weights["receptor_condition_specificity"] > 0) dplyr::inner_join(., receptor_condition_prioritization) else (.)} + + + # have a weighted average the final score (no product!!) + sum_prioritization_weights = 2*weights["de_ligand"] + 2*weights["de_receptor"] + weights["activity_scaled"] + weights["exprs_ligand"] + weights["exprs_receptor"] + weights["ligand_condition_specificity"] + weights["receptor_condition_specificity"] + group_prioritization_tbl = group_prioritization_tbl %>% rowwise() %>% + dplyr::mutate(prioritization_score = + ( + (prioritizing_weights["de_ligand"] * ifelse("scaled_lfc_ligand" %in% names(group_prioritization_tbl), scaled_lfc_ligand, 0)) + + (prioritizing_weights["de_receptor"] * ifelse("scaled_lfc_receptor" %in% names(group_prioritization_tbl), scaled_lfc_receptor, 0)) + + (prioritizing_weights["de_ligand"] * ifelse("scaled_p_val_ligand_adapted" %in% names(group_prioritization_tbl), scaled_p_val_ligand_adapted, 0)) + + (prioritizing_weights["de_receptor"] * ifelse("scaled_p_val_receptor_adapted" %in% names(group_prioritization_tbl), scaled_p_val_receptor_adapted, 0)) + + (prioritizing_weights["activity_scaled"] * ifelse("scaled_activity" %in% names(group_prioritization_tbl), scaled_activity, 0)) + + (prioritizing_weights["exprs_ligand"] * ifelse("scaled_avg_exprs_ligand" %in% names(group_prioritization_tbl), scaled_avg_exprs_ligand, 0)) + + (prioritizing_weights["exprs_receptor"] * ifelse("scaled_avg_exprs_receptor" %in% names(group_prioritization_tbl), scaled_avg_exprs_receptor, 0)) + + (prioritizing_weights["ligand_condition_specificity"] * ifelse("scaled_lfc_ligand_group" %in% names(group_prioritization_tbl), scaled_lfc_ligand_group, 0)) + + (prioritizing_weights["receptor_condition_specificity"] * ifelse("scaled_lfc_receptor_group" %in% names(group_prioritization_tbl), scaled_lfc_receptor_group, 0)) + )* (1/sum_prioritization_weights)) %>% dplyr::arrange(-prioritization_score) %>% + ungroup() + + return (group_prioritization_tbl) + +} + +get_top_n_lr_pairs = function(prioritization_tables, top_n, groups_oi = NULL, senders_oi = NULL, receivers_oi = NULL, rank_per_group = TRUE){ + prioritization_tbl_oi = prioritization_tables$group_prioritization_tbl %>% dplyr::filter(group == top_group & fraction_expressing_ligand_receptor > 0) %>% dplyr::distinct(group, sender, receiver, ligand, receptor, receiver, id, prioritization_score) + if(!is.null(groups_oi)){ + prioritization_tbl_oi = prioritization_tbl_oi %>% dplyr::filter(group %in% groups_oi) + } + if(!is.null(senders_oi)){ + prioritization_tbl_oi = prioritization_tbl_oi %>% dplyr::filter(sender %in% senders_oi) + } + if(!is.null(receivers_oi)){ + prioritization_tbl_oi = prioritization_tbl_oi %>% dplyr::filter(receiver %in% receivers_oi) + } + if(rank_per_group == TRUE){ + prioritization_tbl_oi = prioritization_tbl_oi %>% dplyr::group_by(group) %>% dplyr::mutate(prioritization_rank = rank(desc(prioritization_score))) %>% dplyr::filter(prioritization_rank <= top_n) + } else { + prioritization_tbl_oi = prioritization_tbl_oi %>% dplyr::mutate(prioritization_rank = rank(desc(prioritization_score))) %>% dplyr::filter(prioritization_rank <= top_n) + } + return(prioritization_tbl_oi) +} diff --git a/R/supporting_functions.R b/R/supporting_functions.R index 5664a51..ddfadf5 100644 --- a/R/supporting_functions.R +++ b/R/supporting_functions.R @@ -217,7 +217,7 @@ alias_to_symbol_seurat = function(seurat_obj, organism) { requireNamespace("Seurat") RNA = seurat_obj@assays$RNA - newnames = convert_alias_to_symbols(rownames(RNA@counts), organism = organism) + newnames = convert_alias_to_symbols(rownames(RNA@counts), organism = organism, verbose = FALSE) # sometimes: there are doubles: doubles = newnames %>% table() %>% .[. > 1] %>% names() diff --git a/README.Rmd b/README.Rmd index 6c15737..4f41f99 100644 --- a/README.Rmd +++ b/README.Rmd @@ -1,7 +1,7 @@ --- output: github_document: - html_preview: false + html_preview: true --- @@ -16,6 +16,7 @@ rmarkdown::render("README.Rmd",output_format = "md_document") [![R build status](https://github.com/saeyslab/nichenetr/workflows/R-CMD-check-bioc/badge.svg)](https://github.com/saeyslab/nichenetr/actions) [![Coverage Status](https://codecov.io/gh/saeyslab/nichenetr/branch/master/graph/badge.svg)](https://codecov.io/gh/saeyslab/nichenetr) [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3260758.svg)](https://doi.org/10.5281/zenodo.3260758) +[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.7074291.svg)](https://doi.org/10.5281/zenodo.7074291) @@ -23,16 +24,14 @@ rmarkdown::render("README.Rmd",output_format = "md_document") We describe the NicheNet algorithm in the following paper: [NicheNet: modeling intercellular communication by linking ligands to target genes](https://www.nature.com/articles/s41592-019-0667-5). -Bonnardel, T'Jonck et al. already used NicheNet to predict upstream niche signals driving Kupffer cell differentiation [Stellate Cells, Hepatocytes, and Endothelial Cells Imprint the Kupffer Cell Identity on Monocytes Colonizing the Liver Macrophage Niche](https://www.cell.com/immunity/fulltext/S1074-7613(19)30368-1). -### Important update! - -12-01-2022: In the Liver Atlas paper from Guilliams et al.: [Spatial proteogenomics reveals distinct and evolutionarily conserved hepatic macrophage niches](https://www.sciencedirect.com/science/article/pii/S0092867421014811), we used Differential NicheNet, an extension to the default NicheNet algorithm. **Differential NicheNet** can be used to compare cell-cell interactions between different niches and better predict niche-specific ligand-receptor (L-R) pairs. It was used in that paper to predict ligand-receptor pairs specific for the Kupffer cell niche in mouse and human. - -The main difference between the classic NicheNet pipeline and the Differential NicheNet pipeline is that Differential NicheNet also uses the differential expression between the conditions/niches of the ligand-receptor pairs for prioritization in addition to the ligand activities. The classic NicheNet pipeline on the contrary uses only ligand acivity for prioritization (and shows differential expression only in visualizations). - -So if you have data of multiple conditions or niches, and you want to include differential expression of the ligand-receptor pairs in the prioritization, we recommend you check out Differential NicheNet (update nichenetr to the 1.1.0 version). At the bottom of this page, you can find the links to two vignettes illustrating a Differential NicheNet analysis. We recommend these vignettes if you want to apply Differential NicheNet on your own data. If you want to see the code used for the analyses used in the Guilliams et al. paper, see https://github.com/saeyslab/NicheNet_LiverCellAtlas. +### Major updates (20-06-2023)! +* MultiNicheNet - a multi-sample, multi-condition extension of NicheNet - is now available on [biorxiv](https://www.biorxiv.org/content/10.1101/2023.06.13.544751v1) and [Github](https://github.com/saeyslab/multinichenetr). +* MultiNicheNet uses an [updated prior model (v2)](https://zenodo.org/record/7074291/) consisting of additional ligand-receptor interactions from the [Omnipath database](https://omnipathdb.org/) and from [Verschueren et al. (2020)](https://www.sciencedirect.com/science/article/pii/S0092867420306942?via%3Dihub). We have now also updated the vignettes of NicheNet to use the new model instead. +* **New functionality:** we have included additional functions to prioritize ligands not only based on the ligand activity, but also on the ligand and receptor expression, cell type specificity, and condition specificity. This is similar to the criteria used in Differential NicheNet and MultiNicheNet. See the [Prioritizing ligands based on expression values](vignettes/seurat_steps_prioritization.md) vignette for more information. +* Due to this more generalizable prioritization scheme, we will no longer provide support for Differential NicheNet. +* We included code for making a ligand-receptor-target circos plot in the [Circos plot visualization](vignettes/circos.md) vignette. ## Introduction to NicheNet @@ -66,11 +65,11 @@ Moreover, we provide instructions on how to make intuitive visualizations of the

![](vignettes/circos_plot_adapted.jpg) - ## Installation of nichenetr -Installation typically takes a few minutes, depending on the number of dependencies that has already been installed on your pc. -You can install nichenetr (and required dependencies) from github with: +Installation typically takes a few minutes, depending on the number of +dependencies that has already been installed on your pc. You can install +nichenetr (and required dependencies) from github with: ```{r gh-installation, eval = FALSE} # install.packages("devtools") @@ -94,6 +93,7 @@ To facilitate the use of NicheNet on single-cell data, we demonstrate the use of Following vignettes contain explanation on how to do some follow-up analyses after performing the most basic analysis: +* [Prioritization of ligands based on expression values](vignettes/seurat_steps_prioritization.md): `vignette("seurat_steps_prioritization", package="nichenetr")` * [Inferring ligand-to-target signaling paths](vignettes/ligand_target_signaling_path.md): `vignette("ligand_target_signaling_path", package="nichenetr")` * [Assess how well top-ranked ligands can predict a gene set of interest](vignettes/target_prediction_evaluation_geneset.md): `vignette("target_prediction_evaluation_geneset", package="nichenetr")` * [Single-cell NicheNet's ligand activity analysis](vignettes/ligand_activity_single_cell.md): `vignette("ligand_activity_single_cell", package="nichenetr")` @@ -109,19 +109,32 @@ People interested in building own models or benchmark own models against NicheNe * [Model evaluation: target gene and ligand activity prediction](vignettes/model_evaluation.md): `vignette("model_evaluation", package="nichenetr")` * [Parameter optimization via mlrMBO](vignettes/parameter_optimization.md): `vignette("parameter_optimization", package="nichenetr")` -People working with mouse data can see in the following vignette how to convert NicheNet's ligand-target model (given in human symbols) to mouse symbols: - -* [Converting NicheNet's model from human to mouse symbols](vignettes/symbol_conversion.md): `vignette("symbol_conversion", package="nichenetr")` -Differential NicheNet vignettes: +##### Deprecated vignettes +Differential NicheNet has been deprecated; you may want to consider using the [general prioritization scheme](vignettes/seurat_steps_prioritization.md) instead. * [Differential NicheNet analysis between niches of interest](vignettes/differential_nichenet.md):`vignette("differential_nichenet", package="nichenetr")` * [Differential NicheNet analysis between conditions of interest](vignettes/differential_nichenet_pEMT.md):`vignette("differential_nichenet_pEMT", package="nichenetr")` +In NicheNet v2, the mouse and human ligand-target models are uploaded separately so symbol conversion is not necessary. If you are still using the NicheNet v1 model, you can check the following vignette on how to convert the model (given in human symbols) to mouse symbols: + +* [Converting NicheNet's model from human to mouse symbols](vignettes/symbol_conversion.md): `vignette("symbol_conversion", package="nichenetr")` + ## FAQ Check the FAQ page at [FAQ NicheNet](vignettes/faq.md): `vignette("faq", package="nichenetr")` +## Previous updates + +**12-01-2022:** In the Liver Atlas paper from Guilliams et al.: [Spatial proteogenomics reveals distinct and evolutionarily conserved hepatic macrophage niches](https://www.sciencedirect.com/science/article/pii/S0092867421014811), we used Differential NicheNet, an extension to the default NicheNet algorithm. **Differential NicheNet** can be used to compare cell-cell interactions between different niches and better predict niche-specific ligand-receptor (L-R) pairs. It was used in that paper to predict ligand-receptor pairs specific for the Kupffer cell niche in mouse and human. + +The main difference between the classic NicheNet pipeline and the Differential NicheNet pipeline is that Differential NicheNet also uses the differential expression between the conditions/niches of the ligand-receptor pairs for prioritization in addition to the ligand activities. The classic NicheNet pipeline on the contrary uses only ligand acivity for prioritization (and shows differential expression only in visualizations). + +So if you have data of multiple conditions or niches, and you want to include differential expression of the ligand-receptor pairs in the prioritization, we recommend you check out Differential NicheNet (update nichenetr to the 1.1.0 version). At the bottom of this page, you can find the links to two vignettes illustrating a Differential NicheNet analysis. We recommend these vignettes if you want to apply Differential NicheNet on your own data. If you want to see the code used for the analyses used in the Guilliams et al. paper, see https://github.com/saeyslab/NicheNet_LiverCellAtlas. + +**15-10-2019:** Bonnardel, T'Jonck et al. used NicheNet to predict upstream niche signals driving Kupffer cell differentiation [Stellate Cells, Hepatocytes, and Endothelial Cells Imprint the Kupffer Cell Identity on Monocytes Colonizing the Liver Macrophage Niche](https://www.cell.com/immunity/fulltext/S1074-7613(19)30368-1). + + ## References Browaeys, R., Saelens, W. & Saeys, Y. NicheNet: modeling intercellular communication by linking ligands to target genes. Nat Methods (2019) doi:10.1038/s41592-019-0667-5 diff --git a/README.md b/README.md index afa01fc..0b5a021 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,4 @@ + **nichenetr: the R implementation of the NicheNet method.** The goal of @@ -26,42 +28,30 @@ We describe the NicheNet algorithm in the following paper: [NicheNet: modeling intercellular communication by linking ligands to target genes](https://www.nature.com/articles/s41592-019-0667-5). -Bonnardel, T’Jonck et al. already used NicheNet to predict upstream -niche signals driving Kupffer cell differentiation [Stellate Cells, -Hepatocytes, and Endothelial Cells Imprint the Kupffer Cell Identity on -Monocytes Colonizing the Liver Macrophage -Niche](https://www.cell.com/immunity/fulltext/S1074-7613(19)30368-1). - -### Important update! - -12-01-2022: In the Liver Atlas paper from Guilliams et al.: [Spatial -proteogenomics reveals distinct and evolutionarily conserved hepatic -macrophage -niches](https://www.sciencedirect.com/science/article/pii/S0092867421014811), -we used Differential NicheNet, an extension to the default NicheNet -algorithm. **Differential NicheNet** can be used to compare cell-cell -interactions between different niches and better predict niche-specific -ligand-receptor (L-R) pairs. It was used in that paper to predict -ligand-receptor pairs specific for the Kupffer cell niche in mouse and -human. - -The main difference between the classic NicheNet pipeline and the -Differential NicheNet pipeline is that Differential NicheNet also uses -the differential expression between the conditions/niches of the -ligand-receptor pairs for prioritization in addition to the ligand -activities. The classic NicheNet pipeline on the contrary uses only -ligand acivity for prioritization (and shows differential expression -only in visualizations). - -So if you have data of multiple conditions or niches, and you want to -include differential expression of the ligand-receptor pairs in the -prioritization, we recommend you check out Differential NicheNet (update -nichenetr to the 1.1.0 version). At the bottom of this page, you can -find the links to two vignettes illustrating a Differential NicheNet -analysis. We recommend these vignettes if you want to apply Differential -NicheNet on your own data. If you want to see the code used for the -analyses used in the Guilliams et al. paper, see -. +### Major updates (20-06-2023)! + +- MultiNicheNet - a multi-sample, multi-condition extension of + NicheNet - is now available on + [biorxiv](https://www.biorxiv.org/content/10.1101/2023.06.13.544751v1) + and [Github](https://github.com/saeyslab/multinichenetr). +- MultiNicheNet uses an [updated prior model + (v2)](https://zenodo.org/record/7074291/) consisting of additional + ligand-receptor interactions from the [Omnipath + database](https://omnipathdb.org/) and from [Verschueren et + al. (2020)](https://www.sciencedirect.com/science/article/pii/S0092867420306942?via%3Dihub). + We have now also updated the vignettes of NicheNet to use the new + model instead. +- **New functionality:** we have included additional functions to + prioritize ligands not only based on the ligand activity, but also on + the ligand and receptor expression, cell type specificity, and + condition specificity. This is similar to the criteria used in + Differential NicheNet and MultiNicheNet. See the [Prioritizing ligands + based on expression values](vignettes/seurat_steps_prioritization.md) + vignette for more information. +- Due to this more generalizable prioritization scheme, we will no + longer provide support for Differential NicheNet. +- We included code for making a ligand-receptor-target circos plot in + the [Circos plot visualization](vignettes/circos.md) vignette. ## Introduction to NicheNet @@ -114,16 +104,16 @@ effects. Specific functionalities of this package include: -- assessing how well ligands expressed by a sender cell can predict - changes in gene expression in the receiver cell -- prioritizing ligands based on their effect on gene expression -- inferring putative ligand-target links active in the system under - study -- inferring potential signaling paths between ligands and target genes - of interest: to generate causal hypotheses and check which data - sources support the predictions -- validation of the prior ligand-target model -- construction of user-defined prior ligand-target models +- assessing how well ligands expressed by a sender cell can predict + changes in gene expression in the receiver cell +- prioritizing ligands based on their effect on gene expression +- inferring putative ligand-target links active in the system under + study +- inferring potential signaling paths between ligands and target genes + of interest: to generate causal hypotheses and check which data + sources support the predictions +- validation of the prior ligand-target model +- construction of user-defined prior ligand-target models Moreover, we provide instructions on how to make intuitive visualizations of the main predictions (e.g., via circos plots as shown @@ -137,8 +127,10 @@ Installation typically takes a few minutes, depending on the number of dependencies that has already been installed on your pc. You can install nichenetr (and required dependencies) from github with: - # install.packages("devtools") - devtools::install_github("saeyslab/nichenetr") +``` r +# install.packages("devtools") +devtools::install_github("saeyslab/nichenetr") +``` nichenetr was tested on both Windows and Linux (most recently tested R version: R 4.0.0) @@ -153,76 +145,120 @@ NicheNet analysis. This includes prioritizing ligands and predicting target genes of prioritized ligands. This demo analysis takes only a few minutes to run: -- [NicheNet’s ligand activity analysis on a gene set of interest: - predict active ligands and their target - genes](vignettes/ligand_activity_geneset.md): - `vignette("ligand_activity_geneset", package="nichenetr")` +- [NicheNet’s ligand activity analysis on a gene set of interest: + predict active ligands and their target + genes](vignettes/ligand_activity_geneset.md): + `vignette("ligand_activity_geneset", package="nichenetr")` To facilitate the use of NicheNet on single-cell data, we demonstrate the use of NicheNet on a Seurat object in following vignettes. One demonstrates the use of a single wrapper function, the other demonstrates what’s behind the wrapper (recommended). -- [Perform NicheNet analysis starting from a Seurat - object](vignettes/seurat_wrapper.md):`vignette("seurat_wrapper", package="nichenetr")` -- [Perform NicheNet analysis starting from a Seurat object: - step-by-step - analysis](vignettes/seurat_steps.md):`vignette("seurat_steps", package="nichenetr")` +- [Perform NicheNet analysis starting from a Seurat + object](vignettes/seurat_wrapper.md):`vignette("seurat_wrapper", package="nichenetr")` +- [Perform NicheNet analysis starting from a Seurat object: step-by-step + analysis](vignettes/seurat_steps.md):`vignette("seurat_steps", package="nichenetr")` Following vignettes contain explanation on how to do some follow-up analyses after performing the most basic analysis: -- [Inferring ligand-to-target signaling - paths](vignettes/ligand_target_signaling_path.md): - `vignette("ligand_target_signaling_path", package="nichenetr")` -- [Assess how well top-ranked ligands can predict a gene set of - interest](vignettes/target_prediction_evaluation_geneset.md): - `vignette("target_prediction_evaluation_geneset", package="nichenetr")` -- [Single-cell NicheNet’s ligand activity - analysis](vignettes/ligand_activity_single_cell.md): - `vignette("ligand_activity_single_cell", package="nichenetr")` +- [Prioritization of ligands based on expression + values](vignettes/seurat_steps_prioritization.md): + `vignette("seurat_steps_prioritization", package="nichenetr")` +- [Inferring ligand-to-target signaling + paths](vignettes/ligand_target_signaling_path.md): + `vignette("ligand_target_signaling_path", package="nichenetr")` +- [Assess how well top-ranked ligands can predict a gene set of + interest](vignettes/target_prediction_evaluation_geneset.md): + `vignette("target_prediction_evaluation_geneset", package="nichenetr")` +- [Single-cell NicheNet’s ligand activity + analysis](vignettes/ligand_activity_single_cell.md): + `vignette("ligand_activity_single_cell", package="nichenetr")` If you want to make a circos plot visualization of the NicheNet output, you can check following vignettes: -- [Circos plot visualization to show active ligand-target links - between interacting - cells](vignettes/circos.md):`vignette("circos", package="nichenetr")`. -- [Seurat Wrapper + Circos - visualization](vignettes/seurat_wrapper_circos.md):`vignette("seurat_wrapper_circos", package="nichenetr")`. +- [Circos plot visualization to show active ligand-target links between + interacting + cells](vignettes/circos.md):`vignette("circos", package="nichenetr")`. +- [Seurat Wrapper + Circos + visualization](vignettes/seurat_wrapper_circos.md):`vignette("seurat_wrapper_circos", package="nichenetr")`. People interested in building own models or benchmark own models against NicheNet can read one of the following vignettes: -- [Model construction](vignettes/model_construction.md): - `vignette("model_construction", package="nichenetr")` -- [Model evaluation: target gene and ligand activity - prediction](vignettes/model_evaluation.md): - `vignette("model_evaluation", package="nichenetr")` -- [Parameter optimization via - mlrMBO](vignettes/parameter_optimization.md): - `vignette("parameter_optimization", package="nichenetr")` +- [Model construction](vignettes/model_construction.md): + `vignette("model_construction", package="nichenetr")` +- [Model evaluation: target gene and ligand activity + prediction](vignettes/model_evaluation.md): + `vignette("model_evaluation", package="nichenetr")` +- [Parameter optimization via + mlrMBO](vignettes/parameter_optimization.md): + `vignette("parameter_optimization", package="nichenetr")` + +##### Deprecated vignettes -People working with mouse data can see in the following vignette how to -convert NicheNet’s ligand-target model (given in human symbols) to mouse -symbols: +Differential NicheNet has been deprecated; you may want to consider +using the [general prioritization +scheme](vignettes/seurat_steps_prioritization.md) instead. -- [Converting NicheNet’s model from human to mouse - symbols](vignettes/symbol_conversion.md): - `vignette("symbol_conversion", package="nichenetr")` +- [Differential NicheNet analysis between niches of + interest](vignettes/differential_nichenet.md):`vignette("differential_nichenet", package="nichenetr")` +- [Differential NicheNet analysis between conditions of + interest](vignettes/differential_nichenet_pEMT.md):`vignette("differential_nichenet_pEMT", package="nichenetr")` -Differential NicheNet vignettes: +In NicheNet v2, the mouse and human ligand-target models are uploaded +separately so symbol conversion is not necessary. If you are still using +the NicheNet v1 model, you can check the following vignette on how to +convert the model (given in human symbols) to mouse symbols: -- [Differential NicheNet analysis between niches of - interest](vignettes/differential_nichenet.md):`vignette("differential_nichenet", package="nichenetr")` -- [Differential NicheNet analysis between conditions of - interest](vignettes/differential_nichenet_pEMT.md):`vignette("differential_nichenet_pEMT", package="nichenetr")` +- [Converting NicheNet’s model from human to mouse + symbols](vignettes/symbol_conversion.md): + `vignette("symbol_conversion", package="nichenetr")` ## FAQ Check the FAQ page at [FAQ NicheNet](vignettes/faq.md): `vignette("faq", package="nichenetr")` +## Previous updates + +**12-01-2022:** In the Liver Atlas paper from Guilliams et al.: [Spatial +proteogenomics reveals distinct and evolutionarily conserved hepatic +macrophage +niches](https://www.sciencedirect.com/science/article/pii/S0092867421014811), +we used Differential NicheNet, an extension to the default NicheNet +algorithm. **Differential NicheNet** can be used to compare cell-cell +interactions between different niches and better predict niche-specific +ligand-receptor (L-R) pairs. It was used in that paper to predict +ligand-receptor pairs specific for the Kupffer cell niche in mouse and +human. + +The main difference between the classic NicheNet pipeline and the +Differential NicheNet pipeline is that Differential NicheNet also uses +the differential expression between the conditions/niches of the +ligand-receptor pairs for prioritization in addition to the ligand +activities. The classic NicheNet pipeline on the contrary uses only +ligand acivity for prioritization (and shows differential expression +only in visualizations). + +So if you have data of multiple conditions or niches, and you want to +include differential expression of the ligand-receptor pairs in the +prioritization, we recommend you check out Differential NicheNet (update +nichenetr to the 1.1.0 version). At the bottom of this page, you can +find the links to two vignettes illustrating a Differential NicheNet +analysis. We recommend these vignettes if you want to apply Differential +NicheNet on your own data. If you want to see the code used for the +analyses used in the Guilliams et al. paper, see +. + +**15-10-2019:** Bonnardel, T’Jonck et al. used NicheNet to predict +upstream niche signals driving Kupffer cell differentiation [Stellate +Cells, Hepatocytes, and Endothelial Cells Imprint the Kupffer Cell +Identity on Monocytes Colonizing the Liver Macrophage +Niche](https://www.cell.com/immunity/fulltext/S1074-7613(19)30368-1). + ## References Browaeys, R., Saelens, W. & Saeys, Y. NicheNet: modeling intercellular diff --git a/data/annotation_data_sources.rda b/data/annotation_data_sources.rda index 51e8689..654324c 100644 Binary files a/data/annotation_data_sources.rda and b/data/annotation_data_sources.rda differ diff --git a/data/hyperparameter_list.rda b/data/hyperparameter_list.rda index bdb21e4..785d225 100644 Binary files a/data/hyperparameter_list.rda and b/data/hyperparameter_list.rda differ diff --git a/data/optimized_source_weights_df.rda b/data/optimized_source_weights_df.rda index d79749e..7d6cb39 100644 Binary files a/data/optimized_source_weights_df.rda and b/data/optimized_source_weights_df.rda differ diff --git a/data/source_weights_df.rda b/data/source_weights_df.rda index 6389c1a..64e4727 100644 Binary files a/data/source_weights_df.rda and b/data/source_weights_df.rda differ diff --git a/man/calculate_de.Rd b/man/calculate_de.Rd new file mode 100644 index 0000000..d209060 --- /dev/null +++ b/man/calculate_de.Rd @@ -0,0 +1,38 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/prioritization.R +\name{calculate_de} +\alias{calculate_de} +\title{Calculate differential expression of one cell type versus all other cell types} +\usage{ +calculate_de(seurat_obj, celltype_colname, condition_oi = NA, condition_colname = NA, assay_oi = "RNA", ...) +} +\arguments{ +\item{seurat_obj}{Seurat object} + +\item{celltype_colname}{Name of the meta data column that indicates the cell type of a cell} + +\item{condition_oi}{If provided, subset seurat_obj so DE is only calculated for cells belonging to condition_oi} + +\item{condition_colname}{Name of the meta data column that indicates from which group/condition a cell comes from} + +\item{assay_oi}{Which assay need to be used for DE calculation. Default RNA, alternatives: SCT.} + +\item{...}{Arguments passed to Seurat::FindAllMarkers(by default: features = NULL, min.pct = 0, logfc.threshold = 0, return.thresh = 1)} +} +\value{ +A dataframe containing the DE results +} +\description{ +\code{calculate_de} Calculate differential expression of one cell type versus all other cell types using Seurat::FindAllMarkers. If condition_oi is provided, only consider cells from that condition. +} +\examples{ +\dontrun{ +seurat_obj = readRDS(url("https://zenodo.org/record/3531889/files/seuratObj.rds")) +seurat_obj$celltype <- make.names(seurat_obj$celltype) +# Calculate cell-type specific markers across conditions +calculate_de(seurat_obj, "celltype") +# Calculate LCMV-specific cell-type markers +calculate_de(seurat_obj, "celltype", condition_oi = "LCMV", condition_colname = "aggregate") +} + +} diff --git a/man/combine_sender_receiver_de.Rd b/man/combine_sender_receiver_de.Rd index f0aa47b..b99d4f0 100644 --- a/man/combine_sender_receiver_de.Rd +++ b/man/combine_sender_receiver_de.Rd @@ -11,7 +11,7 @@ combine_sender_receiver_de(DE_sender_processed, DE_receiver_processed, lr_networ \item{DE_receiver_processed}{Output of `process_niche_de` with `type = receiver`} -\item{lr_network}{Ligand-Receptor Network in tibble format: ligand, receptor, bonafide as columns} +\item{lr_network}{Ligand-Receptor Network in tibble format: ligand, receptor as columns} \item{specificity_score}{Defines which score will be used to prioritze ligand-receptor pairs and consider their differential expression. Default and recommended: "min_lfc". "min_lfc" looks at the minimal logFC of the ligand/receptor between the celltype of interest and all the other celltypes. diff --git a/man/generate_prioritization_tables.Rd b/man/generate_prioritization_tables.Rd new file mode 100644 index 0000000..151df14 --- /dev/null +++ b/man/generate_prioritization_tables.Rd @@ -0,0 +1,83 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/prioritization.R +\name{generate_prioritization_tables} +\alias{generate_prioritization_tables} +\title{generate_prioritization_tables} +\usage{ +generate_prioritization_tables(sender_receiver_info, sender_receiver_de, ligand_activities, lr_condition_de = NULL, + prioritizing_weights = c("de_ligand" = 1,"de_receptor" = 1,"activity_scaled" = 2, + "exprs_ligand" = 2,"exprs_receptor" = 2, + "ligand_condition_specificity" = 0, "receptor_condition_specificity"=0)) +} +\arguments{ +\item{sender_receiver_info}{Output of `get_exprs_avg` -> `process_table_to_ic`} + +\item{sender_receiver_de}{Output of`calculate_de` -> `process_table_to_ic`} + +\item{ligand_activities}{Output of `predict_ligand_activities`} + +\item{lr_condition_de}{Output of `FindMarkers` -> `process_table_to_ic`} + +\item{prioritizing_weights}{Named vector indicating the relative weights of each prioritization criterion} +} +\value{ +Data frames of prioritized sender-ligand-receiver-receptor interactions. +} +\description{ +\code{generate_prioritization_tables} Perform a prioritization of cell-cell interactions (similar to MultiNicheNet). +User can choose the importance attached to each of the following prioritization criteria: differential expression of ligand and receptor, cell-type specificity of expression of ligand and receptor, NicheNet ligand activity +} +\examples{ +\dontrun{ +library(dplyr) +lr_network = readRDS(url("https://zenodo.org/record/3260758/files/lr_network.rds")) +lr_network = lr_network \%>\% dplyr::rename(ligand = from, receptor = to) \%>\% dplyr::distinct(ligand, receptor) +ligand_target_matrix = readRDS(url("https://zenodo.org/record/3260758/files/ligand_target_matrix.rds")) +seurat_obj = readRDS(url("https://zenodo.org/record/3531889/files/seuratObj.rds")) +seurat_obj$celltype <- make.names(seuratObj$celltype) +sender_celltypes = c("CD4.T","Treg", "Mono", "NK", "B", "DC") +receiver = "CD8.T" + +# Convert lr_network from mouse to human +lr_network = lr_network \%>\% mutate(from = convert_human_to_mouse_symbols(from), to = convert_human_to_mouse_symbols(to)) \%>\% drop_na() +colnames(ligand_target_matrix) = ligand_target_matrix \%>\% colnames() \%>\% convert_human_to_mouse_symbols() +rownames(ligand_target_matrix) = ligand_target_matrix \%>\% rownames() \%>\% convert_human_to_mouse_symbols() +ligand_target_matrix = ligand_target_matrix \%>\% .[!is.na(rownames(ligand_target_matrix)), !is.na(colnames(ligand_target_matrix))] + +# Ligand activity analysis +seurat_obj_receiver = subset(seurat_obj, idents = receiver) \%>\% SetIdent(value = .[["aggregate"]]) +geneset_oi = FindMarkers(object = seurat_obj_receiver, ident.1 = "LCMV, ident.2 = "SS, min.pct = 0.10) \%>\% rownames_to_column("gene") \%>\% + filter(p_val_adj <= 0.05 & abs(avg_log2FC) >= 0.25) \%>\% pull(gene) \%>\% .[. \%in\% rownames(ligand_target_matrix)] +expressed_genes_sender = sender_celltypes \%>\% unique() \%>\% lapply(get_expressed_genes, seurat_obj, 0.10) \%>\% unlist() \%>\% unique() +expressed_genes_receiver = get_expressed_genes(receiver, seurat_obj, pct = 0.10) +expressed_ligands = intersect(lr_network \%>\% pull(ligand) \%>\% unique(), expressed_genes_sender) +expressed_receptors = intersect(lr_network \%>\% pull(receiver) \%>\% unique(), expressed_genes_receiver) +potential_ligands = lr_network \%>\% filter(ligand \%in\% expressed_ligands & receptor \%in\% expressed_receptors) \%>\% pull(from) \%>\% unique() +ligand_activities = predict_ligand_activities(geneset = geneset_oi, background_expressed_genes = expressed_genes_receiver \%>\% .[. \%in\% rownames(ligand_target_matrix)], + ligand_target_matrix = ligand_target_matrix, potential_ligands = potential_ligands) + +# Calculate LCMV-specific average expression +expression_info = get_exprs_avg(seurat_obj, "celltype", condition_oi = "LCMV", condition_colname = "aggregate") + +# Calculate LCMV-specific cell-type markers +DE_table = calculate_de(seurat_obj, "celltype", condition_oi = "LCMV", condition_colname = "aggregate") + +# Calculate condition-specific markers +condition_markers <- FindMarkers(object = seuratObj, ident.1 = "LCMV", ident.2 = "SS", + group.by = "aggregate", min.pct = 0, logfc.threshold = 0) \%>\% rownames_to_column("gene") + +# Process tables +processed_expr_info = process_table_to_ic(expression_info, table_type = "expression", lr_network) +processed_DE_table <- process_table_to_ic(DE_table, table_type = "celltype_DE", lr_network, + senders_oi = sender_celltypes, receivers_oi = receiver) +processed_condition_DE_table <- process_table_to_ic(condition_markers, table_type = "group_DE", lr_network) + +# Generate prioritization tables +prioritizing_weights = c("de_ligand" = 1, "de_receptor" = 1, "activity_scaled" = 2, "exprs_ligand" = 1, "exprs_receptor" = 1, "ligand_condition_specificity" = 0, "receptor_condition_specificity" = 0) +generate_prioritization_tables(processed_expr_info, + processed_DE_table, + ligand_activities, + processed_condition_DE_table, + prioritizing_weights) +} +} diff --git a/man/get_exprs_avg.Rd b/man/get_exprs_avg.Rd new file mode 100644 index 0000000..0557126 --- /dev/null +++ b/man/get_exprs_avg.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/prioritization.R +\name{get_exprs_avg} +\alias{get_exprs_avg} +\title{Calculate average of gene expression per cell type.} +\usage{ +get_exprs_avg(seurat_obj, celltype_colname, condition_oi = NA, condition_colname = NA) +} +\arguments{ +\item{seurat_obj}{Seurat object} + +\item{celltype_colname}{Name of the meta data column that indicates the cell type of a cell} + +\item{condition_oi}{If provided, subset seurat_obj so average expression is only calculated for cells belonging to condition_oi} + +\item{condition_colname}{Name of the meta data column that indicates from which group/condition a cell comes from} +} +\value{ +Data frame with average gene expression per cell type. +} +\description{ +\code{get_exprs_avg} Calculate average of gene expression per cell type. If condition_oi is provided, only consider cells from that condition. +} +\examples{ +\dontrun{ +seurat_obj = readRDS(url("https://zenodo.org/record/3531889/files/seuratObj.rds")) +seurat_obj$celltype <- make.names(seuratObj$celltype) +# Calculate average expression across conditions +expression_info = get_exprs_avg(seurat_obj, "celltype") +# Calculate LCMV-specific average expression +expression_info = get_exprs_avg(seurat_obj, "celltype", condition_oi = "LCMV", condition_colname = "aggregate") +} + +} diff --git a/man/get_ligand_activities_targets.Rd b/man/get_ligand_activities_targets.Rd index 6f91a04..a90641c 100644 --- a/man/get_ligand_activities_targets.Rd +++ b/man/get_ligand_activities_targets.Rd @@ -9,7 +9,7 @@ get_ligand_activities_targets(niche_geneset_list, ligand_target_matrix, top_n_ta \arguments{ \item{niche_geneset_list}{List of lists/niches giving the geneset of interest for the receiver cell type in each niche.} -\item{ligand_target_matrix}{The NicheNet ligand-target matrix denoting regulatory potential scores between ligands and targets (ligands in columns).} +\item{ligand_target_matrix}{The NicheNet ligand-target matrix of the organism of interest denoting regulatory potential scores between ligands and targets (ligands in columns).} \item{top_n_target}{To predict active, affected targets of the prioritized ligands, consider only DE genes if they also belong to the a priori top n ("top_n_targets") targets of a ligand. Default = 200.} } diff --git a/man/get_non_spatial_de.Rd b/man/get_non_spatial_de.Rd index 42fecfd..8e7a787 100644 --- a/man/get_non_spatial_de.Rd +++ b/man/get_non_spatial_de.Rd @@ -13,7 +13,7 @@ get_non_spatial_de(niches, spatial_info, type, lr_network) \item{type}{For what type of cellype is the DE analysis: "sender" or "receiver"?} -\item{lr_network}{Ligand-Receptor Network in tibble format: ligand, receptor, bonafide as columns} +\item{lr_network}{Ligand-Receptor Network in tibble format: ligand, receptor as columns} } \value{ A tibble of mock processed spatial DE information in case you don't have spatial information for the sender and/or receiver celltype. diff --git a/man/make_mushroom_plot.Rd b/man/make_mushroom_plot.Rd new file mode 100644 index 0000000..479f741 --- /dev/null +++ b/man/make_mushroom_plot.Rd @@ -0,0 +1,55 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/application_visualization.R +\name{make_mushroom_plot} +\alias{make_mushroom_plot} +\title{Make a "mushroom plot" of ligand-receptor interactions} +\usage{ +make_mushroom_plot(prioritization_table, top_n = 30, show_ranking = FALSE, show_all_datapoints = FALSE, true_color_range = FALSE, size = "scaled_avg_exprs", color = "scaled_lfc", + ligand_fill_colors = c("#DEEBF7", "#08306B"), receptor_fill_colors = c("#FEE0D2", "#A50F15"), + unranked_ligand_fill_colors = c(alpha("#FFFFFF", alpha=0.2), alpha("#252525", alpha=0.2)), unranked_receptor_fill_colors = c( alpha("#FFFFFF", alpha=0.2), alpha("#252525", alpha=0.2))) +} +\arguments{ +\item{prioritization_table}{A prioritization table as generated by \code{\link{generate_prioritization_tables}}} + +\item{top_n}{An integer indicating how many ligand-receptor pairs to show} + +\item{show_all_datapoints}{A logical indicating whether to show all ligand-receptor pairs (default: FALSE, if true they will be grayed out)} + +\item{true_color_range}{A logical indicating whether to use the true color range for the ligand-receptor pairs (default: FALSE; range 0-1 is used)} + +\item{size}{A string indicating which column to use for the size of the semicircles (default: "scaled_avg_exprs"; use column name without "_ligand" or "_receptor" suffix)} + +\item{color}{A string indicating which column to use for the color of the semicircles (default: "scaled_lfc"; use column name without "_ligand" or "_receptor" suffix)} + +\item{ligand_fill_colors}{A vector of the low and high colors to use for the ligand semicircle fill gradient (default: c("#DEEBF7", "#08306B"))} + +\item{receptor_fill_colors}{A vector of the low and high colors to use for the receptor semicircle fill gradient (default: c("#FEE0D2", "#A50F15"))} + +\item{unranked_ligand_fill_colors}{A vector of the low and high colors to use for the unranked ligands when show_all_datapoints is TRUE (default: c(alpha("#FFFFFF", alpha=0.2), alpha("#252525", alpha=0.2)))} + +\item{unranked_receptor_fill_colors}{A vector of the low and high colors to use for the unkraed receptors when show_all_datapoints is TRUE (default: c(alpha("#FFFFFF", alpha=0.2), alpha("#252525", alpha=0.2)))} + +\item{show_ranking}{A logical indicating whether to show the ranking of the ligand-receptor pairs (default: FALSE)} +} +\value{ +A ggplot object +} +\description{ +\code{make_mushroom_plot} Make a plot in which each glyph consists of two semicircles corresponding to ligand- and receptor- information. The size of the semicircle is the percentage of cells that express the protein, while the saturation corresponds to the scaled average expression value. +} +\examples{ +\dontrun{ +# Create a prioritization table +prior_table <- generate_prioritization_tables(processed_expr_table, processed_DE_table, ligand_activities, processed_condition_markers, prioritizing_weights) +make_mushroom_plot(prior_table) + +# Show only top 20, and write rankings on the plot +make_mushroom_plot(prior_table, top_n = 20, show_ranking = TRUE) + +# Show all datapoints, and use true color range +make_mushroom_plot(prior_table, show_all_datapoints = TRUE, true_color_range = TRUE) + +# Change the size and color columns +make_mushroom_plot(prior_table, size = "pct_expressed", color = "scaled_avg_exprs") +} +} diff --git a/man/nichenet_seuratobj_aggregate.Rd b/man/nichenet_seuratobj_aggregate.Rd index b525a7c..d616437 100644 --- a/man/nichenet_seuratobj_aggregate.Rd +++ b/man/nichenet_seuratobj_aggregate.Rd @@ -4,7 +4,7 @@ \alias{nichenet_seuratobj_aggregate} \title{Perform NicheNet analysis on Seurat object: explain DE between conditions} \usage{ -nichenet_seuratobj_aggregate(receiver, seurat_obj, condition_colname, condition_oi, condition_reference, sender = "all",ligand_target_matrix,lr_network,weighted_networks,expression_pct = 0.10, lfc_cutoff = 0.25, geneset = "DE", filter_top_ligands = TRUE, top_n_ligands = 20,top_n_targets = 200, cutoff_visualization = 0.33,organism = "human",verbose = TRUE, assay_oi = NULL) +nichenet_seuratobj_aggregate(receiver, seurat_obj, condition_colname, condition_oi, condition_reference, sender = "all",ligand_target_matrix,lr_network,weighted_networks,expression_pct = 0.10, lfc_cutoff = 0.25, geneset = "DE", filter_top_ligands = TRUE, top_n_ligands = 30,top_n_targets = 200, cutoff_visualization = 0.33,verbose = TRUE, assay_oi = NULL) } \arguments{ \item{receiver}{Name of cluster identity/identities of cells that are presumably affected by intercellular communication with other cells} @@ -19,11 +19,11 @@ nichenet_seuratobj_aggregate(receiver, seurat_obj, condition_colname, condition_ \item{sender}{Determine the potential sender cells. Name of cluster identity/identities of cells that presumably affect expression in the receiver cell type. In case you want to look at all possible sender cell types in the data, you can give this argument the value "all". "all" indicates thus that all cell types in the dataset will be considered as possible sender cells. As final option, you could give this argument the value "undefined"."undefined" won't look at ligands expressed by sender cells, but at all ligands for which a corresponding receptor is expressed. This could be useful if the presumably active sender cell is not profiled. Default: "all".} -\item{ligand_target_matrix}{The NicheNet ligand-target matrix denoting regulatory potential scores between ligands and targets (ligands in columns).} +\item{ligand_target_matrix}{The NicheNet ligand-target matrix of the organism of interest denoting regulatory potential scores between ligands and targets (ligands in columns).} -\item{lr_network}{The ligand-receptor network (columns that should be present: $from, $to).} +\item{lr_network}{The ligand-receptor network (columns that should be present: $from, $to) of the organism of interest.} -\item{weighted_networks}{The NicheNet weighted networks denoting interactions and their weights/confidences in the ligand-signaling and gene regulatory network.} +\item{weighted_networks}{The NicheNet weighted networks of the organism of interest denoting interactions and their weights/confidences in the ligand-signaling and gene regulatory network.} \item{expression_pct}{To determine ligands and receptors expressed by sender and receiver cells, we consider genes expressed if they are expressed in at least a specific fraction of cells of a cluster. This number indicates this fraction. Default: 0.10} @@ -33,14 +33,12 @@ nichenet_seuratobj_aggregate(receiver, seurat_obj, condition_colname, condition_ \item{filter_top_ligands}{Indicate whether output tables for ligand-target and ligand-receptor networks should be done for a filtered set of top ligands (TRUE) or for all ligands (FALSE). Default: TRUE.} -\item{top_n_ligands}{Indicate how many ligands should be extracted as top-ligands after ligand activity analysis. Only for these ligands, target genes and receptors will be returned. Default: 20.} +\item{top_n_ligands}{Indicate how many ligands should be extracted as top-ligands after ligand activity analysis. Only for these ligands, target genes and receptors will be returned. Default: 30.} \item{top_n_targets}{To predict active, affected targets of the prioritized ligands, consider only DE genes if they also belong to the a priori top n ("top_n_targets") targets of a ligand. Default = 200.} \item{cutoff_visualization}{Because almost no ligand-target scores have a regulatory potential score of 0, we clarify the heatmap visualization by giving the links with the lowest scores a score of 0. The cutoff_visualization paramter indicates this fraction of links that are given a score of zero. Default = 0.33.} -\item{organism}{Organism from which cells originate."human" (default) or "mouse".} - \item{verbose}{Print out the current analysis stage. Default: TRUE.} \item{assay_oi}{If wanted: specify yourself which assay to look for. Default this value is NULL and as a consequence the 'most advanced' assay will be used to define expressed genes.} @@ -60,9 +58,6 @@ $ligand_differential_expression_heatmap = differential expression heatmap of the $ligand_receptor_matrix: matrix of ligand-receptor interactions; $ligand_receptor_heatmap: heatmap showing ligand-receptor interactions; $ligand_receptor_df: data frame of ligand-receptor interactions; -$ligand_receptor_matrix_bonafide: ligand-receptor matrix, after filtering out interactions predicted by PPI; -$ligand_receptor_heatmap_bonafide: heatmap of ligand-receptor interactions after filtering out interactions predicted by PPI; -$ligand_receptor_df_bonafide: data frame of ligand-receptor interactions, after filtering out interactions predicted by PPI; $geneset_oi: a vector containing the set of genes used as input for the ligand activity analysis; $background_expressed_genes: the background of genes to which the geneset will be compared in the ligand activity analysis. } @@ -72,10 +67,10 @@ $background_expressed_genes: the background of genes to which the geneset will b \examples{ \dontrun{ seuratObj = readRDS(url("https://zenodo.org/record/3531889/files/seuratObj_test.rds")) -ligand_target_matrix = readRDS(url("https://zenodo.org/record/3260758/files/ligand_target_matrix.rds")) -lr_network = readRDS(url("https://zenodo.org/record/3260758/files/lr_network.rds")) -weighted_networks = readRDS(url("https://zenodo.org/record/3260758/files/weighted_networks.rds")) -nichenet_seuratobj_aggregate(receiver = "CD8 T", seurat_obj = seuratObj, condition_colname = "aggregate", condition_oi = "LCMV", condition_reference = "SS", sender = "Mono", ligand_target_matrix = ligand_target_matrix, lr_network = lr_network, weighted_networks = weighted_networks, organism = "mouse") +lr_network = readRDS(url("https://zenodo.org/record/7074291/files/lr_network_mouse_21122021.rds")) +ligand_target_matrix = readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final_mouse.rds")) +weighted_networks = readRDS(url("https://zenodo.org/record/7074291/files/weighted_networks_nsga2r_final_mouse.rds")) +nichenet_seuratobj_aggregate(receiver = "CD8 T", seurat_obj = seuratObj, condition_colname = "aggregate", condition_oi = "LCMV", condition_reference = "SS", sender = "Mono", ligand_target_matrix = ligand_target_matrix, lr_network = lr_network, weighted_networks = weighted_networks) } } diff --git a/man/nichenet_seuratobj_aggregate_cluster_de.Rd b/man/nichenet_seuratobj_aggregate_cluster_de.Rd index d4230ba..23f66c0 100644 --- a/man/nichenet_seuratobj_aggregate_cluster_de.Rd +++ b/man/nichenet_seuratobj_aggregate_cluster_de.Rd @@ -4,7 +4,7 @@ \alias{nichenet_seuratobj_aggregate_cluster_de} \title{Perform NicheNet analysis on Seurat object: explain DE between two cell clusters from separate conditions} \usage{ -nichenet_seuratobj_aggregate_cluster_de(seurat_obj, receiver_affected, receiver_reference, condition_colname, condition_oi, condition_reference, sender = "all",ligand_target_matrix,lr_network,weighted_networks,expression_pct = 0.10, lfc_cutoff = 0.25, geneset = "DE", filter_top_ligands = TRUE, top_n_ligands = 20,top_n_targets = 200, cutoff_visualization = 0.33,organism = "human",verbose = TRUE, assay_oi = NULL) +nichenet_seuratobj_aggregate_cluster_de(seurat_obj, receiver_affected, receiver_reference, condition_colname, condition_oi, condition_reference, sender = "all",ligand_target_matrix,lr_network,weighted_networks,expression_pct = 0.10, lfc_cutoff = 0.25, geneset = "DE", filter_top_ligands = TRUE, top_n_ligands = 30,top_n_targets = 200, cutoff_visualization = 0.33,verbose = TRUE, assay_oi = NULL) } \arguments{ \item{seurat_obj}{Single-cell expression dataset as Seurat object https://satijalab.org/seurat/.} @@ -21,11 +21,11 @@ nichenet_seuratobj_aggregate_cluster_de(seurat_obj, receiver_affected, receiver_ \item{sender}{Determine the potential sender cells. Name of cluster identity/identities of cells that presumably affect expression in the receiver cell type. In case you want to look at all possible sender cell types in the data, you can give this argument the value "all". "all" indicates thus that all cell types in the dataset will be considered as possible sender cells. As final option, you could give this argument the value "undefined"."undefined" won't look at ligands expressed by sender cells, but at all ligands for which a corresponding receptor is expressed. This could be useful if the presumably active sender cell is not profiled. Default: "all".} -\item{ligand_target_matrix}{The NicheNet ligand-target matrix denoting regulatory potential scores between ligands and targets (ligands in columns).} +\item{ligand_target_matrix}{The NicheNet ligand-target matrix of the organism of interest denoting regulatory potential scores between ligands and targets (ligands in columns).} -\item{lr_network}{The ligand-receptor network (columns that should be present: $from, $to).} +\item{lr_network}{The ligand-receptor network (columns that should be present: $from, $to) of the organism of interest.} -\item{weighted_networks}{The NicheNet weighted networks denoting interactions and their weights/confidences in the ligand-signaling and gene regulatory network.} +\item{weighted_networks}{The NicheNet weighted networks of the organism of interest denoting interactions and their weights/confidences in the ligand-signaling and gene regulatory network.} \item{expression_pct}{To determine ligands and receptors expressed by sender and receiver cells, we consider genes expressed if they are expressed in at least a specific fraction of cells of a cluster. This number indicates this fraction. Default: 0.10} @@ -35,14 +35,12 @@ nichenet_seuratobj_aggregate_cluster_de(seurat_obj, receiver_affected, receiver_ \item{filter_top_ligands}{Indicate whether output tables for ligand-target and ligand-receptor networks should be done for a filtered set of top ligands (TRUE) or for all ligands (FALSE). Default: TRUE.} -\item{top_n_ligands}{Indicate how many ligands should be extracted as top-ligands after ligand activity analysis. Only for these ligands, target genes and receptors will be returned. Default: 20.} +\item{top_n_ligands}{Indicate how many ligands should be extracted as top-ligands after ligand activity analysis. Only for these ligands, target genes and receptors will be returned. Default: 30.} \item{top_n_targets}{To predict active, affected targets of the prioritized ligands, consider only DE genes if they also belong to the a priori top n ("top_n_targets") targets of a ligand. Default = 200.} \item{cutoff_visualization}{Because almost no ligand-target scores have a regulatory potential score of 0, we clarify the heatmap visualization by giving the links with the lowest scores a score of 0. The cutoff_visualization paramter indicates this fraction of links that are given a score of zero. Default = 0.33.} -\item{organism}{Organism from which cells originate."human" (default) or "mouse".} - \item{verbose}{Print out the current analysis stage. Default: TRUE.} \item{assay_oi}{If wanted: specify yourself which assay to look for. Default this value is NULL and as a consequence the 'most advanced' assay will be used to define expressed genes.} @@ -61,9 +59,6 @@ $ligand_expression_dotplot: expression dotplot of the top ligands; $ligand_receptor_matrix: matrix of ligand-receptor interactions; $ligand_receptor_heatmap: heatmap showing ligand-receptor interactions; $ligand_receptor_df: data frame of ligand-receptor interactions; -$ligand_receptor_matrix_bonafide: ligand-receptor matrix, after filtering out interactions predicted by PPI; -$ligand_receptor_heatmap_bonafide: heatmap of ligand-receptor interactions after filtering out interactions predicted by PPI; -$ligand_receptor_df_bonafide: data frame of ligand-receptor interactions, after filtering out interactions predicted by PPI; $geneset_oi: a vector containing the set of genes used as input for the ligand activity analysis; $background_expressed_genes: the background of genes to which the geneset will be compared in the ligand activity analysis. } @@ -73,9 +68,9 @@ $background_expressed_genes: the background of genes to which the geneset will b \examples{ \dontrun{ seuratObj = readRDS(url("https://zenodo.org/record/3531889/files/seuratObj_test.rds")) -ligand_target_matrix = readRDS(url("https://zenodo.org/record/3260758/files/ligand_target_matrix.rds")) -lr_network = readRDS(url("https://zenodo.org/record/3260758/files/lr_network.rds")) -weighted_networks = readRDS(url("https://zenodo.org/record/3260758/files/weighted_networks.rds")) +lr_network = readRDS(url("https://zenodo.org/record/7074291/files/lr_network_mouse_21122021.rds")) +ligand_target_matrix = readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final_mouse.rds")) +weighted_networks = readRDS(url("https://zenodo.org/record/7074291/files/weighted_networks_nsga2r_final_mouse.rds")) nichenet_seuratobj_aggregate_cluster_de(seurat_obj = seuratObj, receiver_affected = "CD8 T", receiver_reference = "CD8 T", condition_colname = "aggregate", condition_oi = "LCMV", condition_reference = "SS", sender = "Mono", ligand_target_matrix = ligand_target_matrix, lr_network = lr_network, weighted_networks = weighted_networks) } diff --git a/man/nichenet_seuratobj_cluster_de.Rd b/man/nichenet_seuratobj_cluster_de.Rd index 3386dcc..c7aad0e 100644 --- a/man/nichenet_seuratobj_cluster_de.Rd +++ b/man/nichenet_seuratobj_cluster_de.Rd @@ -4,7 +4,7 @@ \alias{nichenet_seuratobj_cluster_de} \title{Perform NicheNet analysis on Seurat object: explain DE between two cell clusters} \usage{ -nichenet_seuratobj_cluster_de(seurat_obj, receiver_affected, receiver_reference, sender = "all",ligand_target_matrix,lr_network,weighted_networks,expression_pct = 0.10, lfc_cutoff = 0.25, geneset = "DE", filter_top_ligands = TRUE, top_n_ligands = 20,top_n_targets = 200, cutoff_visualization = 0.33,organism = "human",verbose = TRUE, assay_oi = NULL) +nichenet_seuratobj_cluster_de(seurat_obj, receiver_affected, receiver_reference, sender = "all",ligand_target_matrix,lr_network,weighted_networks,expression_pct = 0.10, lfc_cutoff = 0.25, geneset = "DE", filter_top_ligands = TRUE, top_n_ligands = 30,top_n_targets = 200, cutoff_visualization = 0.33,verbose = TRUE, assay_oi = NULL) } \arguments{ \item{seurat_obj}{Single-cell expression dataset as Seurat object https://satijalab.org/seurat/.} @@ -29,14 +29,12 @@ nichenet_seuratobj_cluster_de(seurat_obj, receiver_affected, receiver_reference, \item{filter_top_ligands}{Indicate whether output tables for ligand-target and ligand-receptor networks should be done for a filtered set of top ligands (TRUE) or for all ligands (FALSE). Default: TRUE.} -\item{top_n_ligands}{Indicate how many ligands should be extracted as top-ligands after ligand activity analysis. Only for these ligands, target genes and receptors will be returned. Default: 20.} +\item{top_n_ligands}{Indicate how many ligands should be extracted as top-ligands after ligand activity analysis. Only for these ligands, target genes and receptors will be returned. Default: 30.} \item{top_n_targets}{To predict active, affected targets of the prioritized ligands, consider only DE genes if they also belong to the a priori top n ("top_n_targets") targets of a ligand. Default = 200.} \item{cutoff_visualization}{Because almost no ligand-target scores have a regulatory potential score of 0, we clarify the heatmap visualization by giving the links with the lowest scores a score of 0. The cutoff_visualization paramter indicates this fraction of links that are given a score of zero. Default = 0.33.} -\item{organism}{Organism from which cells originate."human" (default) or "mouse".} - \item{verbose}{Print out the current analysis stage. Default: TRUE.} \item{assay_oi}{If wanted: specify yourself which assay to look for. Default this value is NULL and as a consequence the 'most advanced' assay will be used to define expressed genes.} @@ -55,9 +53,6 @@ $ligand_expression_dotplot: expression dotplot of the top ligands; $ligand_receptor_matrix: matrix of ligand-receptor interactions; $ligand_receptor_heatmap: heatmap showing ligand-receptor interactions; $ligand_receptor_df: data frame of ligand-receptor interactions; -$ligand_receptor_matrix_bonafide: ligand-receptor matrix, after filtering out interactions predicted by PPI; -$ligand_receptor_heatmap_bonafide: heatmap of ligand-receptor interactions after filtering out interactions predicted by PPI; -$ligand_receptor_df_bonafide: data frame of ligand-receptor interactions, after filtering out interactions predicted by PPI; $geneset_oi: a vector containing the set of genes used as input for the ligand activity analysis; $background_expressed_genes: the background of genes to which the geneset will be compared in the ligand activity analysis. } @@ -67,9 +62,9 @@ $background_expressed_genes: the background of genes to which the geneset will b \examples{ \dontrun{ seuratObj = readRDS(url("https://zenodo.org/record/3531889/files/seuratObj_test.rds")) -ligand_target_matrix = readRDS(url("https://zenodo.org/record/3260758/files/ligand_target_matrix.rds")) -lr_network = readRDS(url("https://zenodo.org/record/3260758/files/lr_network.rds")) -weighted_networks = readRDS(url("https://zenodo.org/record/3260758/files/weighted_networks.rds")) +lr_network = readRDS(url("https://zenodo.org/record/7074291/files/lr_network_mouse_21122021.rds")) +ligand_target_matrix = readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final_mouse.rds")) +weighted_networks = readRDS(url("https://zenodo.org/record/7074291/files/weighted_networks_nsga2r_final_mouse.rds")) # works, but does not make sense nichenet_seuratobj_cluster_de(seurat_obj = seuratObj, receiver_affected = "CD8 T", receiver_reference = "Mono", sender = "Mono", ligand_target_matrix = ligand_target_matrix, lr_network = lr_network, weighted_networks = weighted_networks) # type of analysis for which this would make sense diff --git a/man/process_spatial_de.Rd b/man/process_spatial_de.Rd index 0b6b7f2..301a660 100644 --- a/man/process_spatial_de.Rd +++ b/man/process_spatial_de.Rd @@ -11,7 +11,7 @@ process_spatial_de(DE_table, type, lr_network, expression_pct, specificity_score \item{type}{For what type of cellype is the DE analysis: "sender" or "receiver"?} -\item{lr_network}{Ligand-Receptor Network in tibble format: ligand, receptor, bonafide as columns} +\item{lr_network}{Ligand-Receptor Network in tibble format: ligand, receptor as columns} \item{expression_pct}{Percentage of cells of a cell type having a non-zero expression value for a gene such that a gene can be considered expressed by that cell type.} diff --git a/man/process_table_to_ic.Rd b/man/process_table_to_ic.Rd new file mode 100644 index 0000000..3292ba8 --- /dev/null +++ b/man/process_table_to_ic.Rd @@ -0,0 +1,47 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/prioritization.R +\name{process_table_to_ic} +\alias{process_table_to_ic} +\title{Process DE or expression information into intercellular communication focused information.} +\usage{ +process_table_to_ic(table_object, table_type = "expression", lr_network, senders_oi = NULL, receivers_oi = NULL) +} +\arguments{ +\item{table_object}{Output of `get_exprs_avg`, `calculate_de`, or `FindMarkers`} + +\item{table_type}{"expression", "celltype_DE", or "group_DE": indicates whether the table contains expression, celltype markers, or condition-specific information} + +\item{lr_network}{Prior knowledge Ligand-Receptor network (columns: ligand, receptor)} + +\item{senders_oi}{Default NULL: all celltypes will be considered as senders. If you want to select specific senders of interest: you can add this here as character vector.} + +\item{receivers_oi}{Default NULL: all celltypes will be considered as receivers If you want to select specific receivers of interest: you can add this here as character vector.} +} +\value{ +Dataframe combining sender and receiver information linked to each other through joining by the ligand-receptor network. +} +\description{ +\code{process_table_to_ic} First, only keep information of ligands for senders_oi, and information of receptors for receivers_oi. +Then, combine information for senders and receivers by linking ligands to receptors based on the prior knowledge ligand-receptor network. +} +\examples{ +\dontrun{ +library(dplyr) +lr_network = readRDS(url("https://zenodo.org/record/3260758/files/lr_network.rds")) +lr_network = lr_network \%>\% dplyr::rename(ligand = from, receptor = to) \%>\% dplyr::distinct(ligand, receptor) +seurat_obj = readRDS(url("https://zenodo.org/record/3531889/files/seuratObj.rds")) +seurat_obj$celltype <- make.names(seuratObj$celltype) +# Calculate LCMV-specific average expression +expression_info = get_exprs_avg(seurat_obj, "celltype", condition_oi = "LCMV", condition_colname = "aggregate") +# Calculate LCMV-specific cell-type markers +DE_table = calculate_de(seurat_obj, "celltype", condition_oi = "LCMV", condition_colname = "aggregate") +# Calculate LCMV-specific genes across cell types +condition_markers <- FindMarkers(object = seuratObj, ident.1 = "LCMV", ident.2 = "SS", + group.by = "aggregate", min.pct = 0, logfc.threshold = 0) \%>\% rownames_to_column("gene") +processed_expr_info = process_table_to_ic(expression_info, table_type = "expression", lr_network) +processed_DE_table <- process_table_to_ic(DE_table, table_type = "celltype_DE", lr_network, +senders_oi = c("CD4.T", "Treg", "Mono", "NK", "B", "DC"), receivers_oi = "CD8.T") +processed_condition_markers <- process_table_to_ic(condition_markers, table_type = "condition_DE", lr_network) +} + +} diff --git a/man/scale_quantile_adapted.Rd b/man/scale_quantile_adapted.Rd index 1691e96..e8d7380 100644 --- a/man/scale_quantile_adapted.Rd +++ b/man/scale_quantile_adapted.Rd @@ -4,10 +4,12 @@ \alias{scale_quantile_adapted} \title{Normalize values in a vector by quantile scaling and add a pseudovalue of 0.001} \usage{ -scale_quantile_adapted(x) +scale_quantile_adapted(x, outlier_cutoff = 0) } \arguments{ \item{x}{A numeric vector.} + +\item{outlier_cutoff}{The quantile cutoff for outliers (default 0).} } \value{ A quantile-scaled numeric vector. diff --git a/tests/testthat/test-application_prediction.R b/tests/testthat/test-application_prediction.R index f0899a9..1d4fbfe 100644 --- a/tests/testthat/test-application_prediction.R +++ b/tests/testthat/test-application_prediction.R @@ -1,75 +1,75 @@ context("NicheNet analysis on Seurat objects") test_that("Seurat wrapper works", { - - ligand_target_matrix = readRDS(url("https://zenodo.org/record/3260758/files/ligand_target_matrix.rds")) - lr_network = readRDS(url("https://zenodo.org/record/3260758/files/lr_network.rds")) - weighted_networks = readRDS(url("https://zenodo.org/record/3260758/files/weighted_networks.rds")) + options(timeout = 3600) + ligand_target_matrix = readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final_mouse.rds")) + lr_network = readRDS(url("https://zenodo.org/record/7074291/files/lr_network_mouse_21122021.rds")) + weighted_networks = readRDS(url("https://zenodo.org/record/7074291/files/weighted_networks_nsga2r_final_mouse.rds")) seurat_object_lite = readRDS(url("https://zenodo.org/record/3531889/files/seuratObj_test.rds")) - nichenet_output = nichenet_seuratobj_aggregate(seurat_obj = seurat_object_lite, receiver = "CD8 T", condition_oi = "LCMV", condition_reference = "SS", condition_colname = "aggregate", sender = c("Mono"), ligand_target_matrix = ligand_target_matrix, weighted_networks = weighted_networks, lr_network = lr_network, organism = "mouse") + nichenet_output = nichenet_seuratobj_aggregate(seurat_obj = seurat_object_lite, receiver = "CD8 T", condition_oi = "LCMV", condition_reference = "SS", condition_colname = "aggregate", sender = c("Mono"), ligand_target_matrix = ligand_target_matrix, weighted_networks = weighted_networks, lr_network = lr_network) expect_type(nichenet_output,"list") - nichenet_output = nichenet_seuratobj_aggregate(seurat_obj = seurat_object_lite, receiver = "CD8 T", condition_oi = "LCMV", condition_reference = "SS", condition_colname = "aggregate", sender = c("Mono"), ligand_target_matrix = ligand_target_matrix, weighted_networks = weighted_networks, lr_network = lr_network, organism = "mouse",geneset = "up") + nichenet_output = nichenet_seuratobj_aggregate(seurat_obj = seurat_object_lite, receiver = "CD8 T", condition_oi = "LCMV", condition_reference = "SS", condition_colname = "aggregate", sender = c("Mono"), ligand_target_matrix = ligand_target_matrix, weighted_networks = weighted_networks, lr_network = lr_network,geneset = "up") expect_type(nichenet_output,"list") - nichenet_output = nichenet_seuratobj_aggregate(seurat_obj = seurat_object_lite, receiver = "CD8 T", condition_oi = "LCMV", condition_reference = "SS", condition_colname = "aggregate", sender = c("Mono"), ligand_target_matrix = ligand_target_matrix, weighted_networks = weighted_networks, lr_network = lr_network, organism = "mouse",geneset = "down") + nichenet_output = nichenet_seuratobj_aggregate(seurat_obj = seurat_object_lite, receiver = "CD8 T", condition_oi = "LCMV", condition_reference = "SS", condition_colname = "aggregate", sender = c("Mono"), ligand_target_matrix = ligand_target_matrix, weighted_networks = weighted_networks, lr_network = lr_network,geneset = "down") expect_type(nichenet_output,"list") - nichenet_output = nichenet_seuratobj_aggregate(seurat_obj = seurat_object_lite, receiver = "CD8 T", condition_oi = "LCMV", condition_reference = "SS", condition_colname = "aggregate", sender = "all", ligand_target_matrix = ligand_target_matrix, weighted_networks = weighted_networks, lr_network = lr_network, organism = "mouse") + nichenet_output = nichenet_seuratobj_aggregate(seurat_obj = seurat_object_lite, receiver = "CD8 T", condition_oi = "LCMV", condition_reference = "SS", condition_colname = "aggregate", sender = "all", ligand_target_matrix = ligand_target_matrix, weighted_networks = weighted_networks, lr_network = lr_network) expect_type(nichenet_output,"list") - nichenet_output = nichenet_seuratobj_aggregate(seurat_obj = seurat_object_lite, receiver = "CD8 T", condition_oi = "LCMV", condition_reference = "SS", condition_colname = "aggregate", sender = "undefined", ligand_target_matrix = ligand_target_matrix, weighted_networks = weighted_networks, lr_network = lr_network, organism = "mouse") + nichenet_output = nichenet_seuratobj_aggregate(seurat_obj = seurat_object_lite, receiver = "CD8 T", condition_oi = "LCMV", condition_reference = "SS", condition_colname = "aggregate", sender = "undefined", ligand_target_matrix = ligand_target_matrix, weighted_networks = weighted_networks, lr_network = lr_network) expect_type(nichenet_output,"list") - nichenet_output = nichenet_seuratobj_aggregate(seurat_obj = seurat_object_lite, receiver = "CD8 T", condition_oi = "LCMV", condition_reference = "SS", condition_colname = "aggregate", sender = "undefined", ligand_target_matrix = ligand_target_matrix, weighted_networks = weighted_networks, lr_network = lr_network, organism = "mouse", filter_top_ligands = FALSE) + nichenet_output = nichenet_seuratobj_aggregate(seurat_obj = seurat_object_lite, receiver = "CD8 T", condition_oi = "LCMV", condition_reference = "SS", condition_colname = "aggregate", sender = "undefined", ligand_target_matrix = ligand_target_matrix, weighted_networks = weighted_networks, lr_network = lr_network, filter_top_ligands = FALSE) expect_type(nichenet_output,"list") - nichenet_output = nichenet_seuratobj_aggregate_cluster_de(seurat_obj = seurat_object_lite, receiver_affected = "CD8 T", receiver_reference = "CD8 T", condition_oi = "LCMV", condition_reference = "SS", condition_colname = "aggregate", sender = c("Mono"), ligand_target_matrix = ligand_target_matrix, weighted_networks = weighted_networks, lr_network = lr_network, organism = "mouse") + nichenet_output = nichenet_seuratobj_aggregate_cluster_de(seurat_obj = seurat_object_lite, receiver_affected = "CD8 T", receiver_reference = "CD8 T", condition_oi = "LCMV", condition_reference = "SS", condition_colname = "aggregate", sender = c("Mono"), ligand_target_matrix = ligand_target_matrix, weighted_networks = weighted_networks, lr_network = lr_network) expect_type(nichenet_output,"list") - nichenet_output = nichenet_seuratobj_aggregate_cluster_de(seurat_obj = seurat_object_lite, receiver_affected = "CD8 T", receiver_reference = "CD8 T", condition_oi = "LCMV", condition_reference = "SS", condition_colname = "aggregate", sender = c("Mono"), ligand_target_matrix = ligand_target_matrix, weighted_networks = weighted_networks, lr_network = lr_network, organism = "mouse",geneset = "up") + nichenet_output = nichenet_seuratobj_aggregate_cluster_de(seurat_obj = seurat_object_lite, receiver_affected = "CD8 T", receiver_reference = "CD8 T", condition_oi = "LCMV", condition_reference = "SS", condition_colname = "aggregate", sender = c("Mono"), ligand_target_matrix = ligand_target_matrix, weighted_networks = weighted_networks, lr_network = lr_network,geneset = "up") expect_type(nichenet_output,"list") - nichenet_output = nichenet_seuratobj_aggregate_cluster_de(seurat_obj = seurat_object_lite, receiver_affected = "CD8 T", receiver_reference = "CD8 T", condition_oi = "LCMV", condition_reference = "SS", condition_colname = "aggregate", sender = c("Mono"), ligand_target_matrix = ligand_target_matrix, weighted_networks = weighted_networks, lr_network = lr_network, organism = "mouse",geneset = "down") + nichenet_output = nichenet_seuratobj_aggregate_cluster_de(seurat_obj = seurat_object_lite, receiver_affected = "CD8 T", receiver_reference = "CD8 T", condition_oi = "LCMV", condition_reference = "SS", condition_colname = "aggregate", sender = c("Mono"), ligand_target_matrix = ligand_target_matrix, weighted_networks = weighted_networks, lr_network = lr_network,geneset = "down") expect_type(nichenet_output,"list") - nichenet_output = nichenet_seuratobj_aggregate_cluster_de(seurat_obj = seurat_object_lite, receiver_affected = "CD8 T", receiver_reference = "CD8 T", condition_oi = "LCMV", condition_reference = "SS", condition_colname = "aggregate", sender = "all", ligand_target_matrix = ligand_target_matrix, weighted_networks = weighted_networks, lr_network = lr_network, organism = "mouse") + nichenet_output = nichenet_seuratobj_aggregate_cluster_de(seurat_obj = seurat_object_lite, receiver_affected = "CD8 T", receiver_reference = "CD8 T", condition_oi = "LCMV", condition_reference = "SS", condition_colname = "aggregate", sender = "all", ligand_target_matrix = ligand_target_matrix, weighted_networks = weighted_networks, lr_network = lr_network) expect_type(nichenet_output,"list") - nichenet_output = nichenet_seuratobj_aggregate_cluster_de(seurat_obj = seurat_object_lite, receiver_affected = "CD8 T", receiver_reference = "CD8 T", condition_oi = "LCMV", condition_reference = "SS", condition_colname = "aggregate", sender = "undefined", ligand_target_matrix = ligand_target_matrix, weighted_networks = weighted_networks, lr_network = lr_network, organism = "mouse") + nichenet_output = nichenet_seuratobj_aggregate_cluster_de(seurat_obj = seurat_object_lite, receiver_affected = "CD8 T", receiver_reference = "CD8 T", condition_oi = "LCMV", condition_reference = "SS", condition_colname = "aggregate", sender = "undefined", ligand_target_matrix = ligand_target_matrix, weighted_networks = weighted_networks, lr_network = lr_network) expect_type(nichenet_output,"list") - nichenet_output = nichenet_seuratobj_aggregate_cluster_de(seurat_obj = seurat_object_lite, receiver_affected = "CD8 T", receiver_reference = "CD8 T", condition_oi = "LCMV", condition_reference = "SS", condition_colname = "aggregate", sender = "undefined", ligand_target_matrix = ligand_target_matrix, weighted_networks = weighted_networks, lr_network = lr_network, organism = "mouse", filter_top_ligands = FALSE) + nichenet_output = nichenet_seuratobj_aggregate_cluster_de(seurat_obj = seurat_object_lite, receiver_affected = "CD8 T", receiver_reference = "CD8 T", condition_oi = "LCMV", condition_reference = "SS", condition_colname = "aggregate", sender = "undefined", ligand_target_matrix = ligand_target_matrix, weighted_networks = weighted_networks, lr_network = lr_network, filter_top_ligands = FALSE) expect_type(nichenet_output,"list") - nichenet_output = nichenet_seuratobj_cluster_de(seurat_obj = seurat_object_lite, receiver_affected = "CD8 T", receiver_reference = "Mono", sender = c("Mono"), ligand_target_matrix = ligand_target_matrix, weighted_networks = weighted_networks, lr_network = lr_network, organism = "mouse") + nichenet_output = nichenet_seuratobj_cluster_de(seurat_obj = seurat_object_lite, receiver_affected = "CD8 T", receiver_reference = "Mono", sender = c("Mono"), ligand_target_matrix = ligand_target_matrix, weighted_networks = weighted_networks, lr_network = lr_network) expect_type(nichenet_output,"list") - nichenet_output = nichenet_seuratobj_cluster_de(seurat_obj = seurat_object_lite, receiver_affected = "CD8 T", receiver_reference = "Mono", sender = c("Mono"), ligand_target_matrix = ligand_target_matrix, weighted_networks = weighted_networks, lr_network = lr_network, organism = "mouse",geneset = "up") + nichenet_output = nichenet_seuratobj_cluster_de(seurat_obj = seurat_object_lite, receiver_affected = "CD8 T", receiver_reference = "Mono", sender = c("Mono"), ligand_target_matrix = ligand_target_matrix, weighted_networks = weighted_networks, lr_network = lr_network,geneset = "up") expect_type(nichenet_output,"list") - nichenet_output = nichenet_seuratobj_cluster_de(seurat_obj = seurat_object_lite, receiver_affected = "CD8 T", receiver_reference = "Mono", sender = c("Mono"), ligand_target_matrix = ligand_target_matrix, weighted_networks = weighted_networks, lr_network = lr_network, organism = "mouse",geneset = "down") + nichenet_output = nichenet_seuratobj_cluster_de(seurat_obj = seurat_object_lite, receiver_affected = "CD8 T", receiver_reference = "Mono", sender = c("Mono"), ligand_target_matrix = ligand_target_matrix, weighted_networks = weighted_networks, lr_network = lr_network,geneset = "down") expect_type(nichenet_output,"list") - nichenet_output = nichenet_seuratobj_cluster_de(seurat_obj = seurat_object_lite, receiver_affected = "CD8 T", receiver_reference = "Mono", sender = "all", ligand_target_matrix = ligand_target_matrix, weighted_networks = weighted_networks, lr_network = lr_network, organism = "mouse") + nichenet_output = nichenet_seuratobj_cluster_de(seurat_obj = seurat_object_lite, receiver_affected = "CD8 T", receiver_reference = "Mono", sender = "all", ligand_target_matrix = ligand_target_matrix, weighted_networks = weighted_networks, lr_network = lr_network) expect_type(nichenet_output,"list") - nichenet_output = nichenet_seuratobj_cluster_de(seurat_obj = seurat_object_lite, receiver_affected = "CD8 T", receiver_reference = "Mono", sender = "undefined", ligand_target_matrix = ligand_target_matrix, weighted_networks = weighted_networks, lr_network = lr_network, organism = "mouse") + nichenet_output = nichenet_seuratobj_cluster_de(seurat_obj = seurat_object_lite, receiver_affected = "CD8 T", receiver_reference = "Mono", sender = "undefined", ligand_target_matrix = ligand_target_matrix, weighted_networks = weighted_networks, lr_network = lr_network) expect_type(nichenet_output,"list") - nichenet_output = nichenet_seuratobj_cluster_de(seurat_obj = seurat_object_lite, receiver_affected = "CD8 T", receiver_reference = "Mono", sender = "undefined", ligand_target_matrix = ligand_target_matrix, weighted_networks = weighted_networks, lr_network = lr_network, organism = "mouse", filter_top_ligands = FALSE) + nichenet_output = nichenet_seuratobj_cluster_de(seurat_obj = seurat_object_lite, receiver_affected = "CD8 T", receiver_reference = "Mono", sender = "undefined", ligand_target_matrix = ligand_target_matrix, weighted_networks = weighted_networks, lr_network = lr_network, filter_top_ligands = FALSE) expect_type(nichenet_output,"list") seurat_object_lite@meta.data$aggregate = seurat_object_lite@meta.data$aggregate %>% as.factor() seurat_object_lite@meta.data$celltype = seurat_object_lite@meta.data$celltype %>% as.factor() - nichenet_output = nichenet_seuratobj_aggregate(seurat_obj = seurat_object_lite, receiver = "CD8 T", condition_oi = "LCMV", condition_reference = "SS", condition_colname = "aggregate", sender = c("Mono"), ligand_target_matrix = ligand_target_matrix, weighted_networks = weighted_networks, lr_network = lr_network, organism = "mouse") + nichenet_output = nichenet_seuratobj_aggregate(seurat_obj = seurat_object_lite, receiver = "CD8 T", condition_oi = "LCMV", condition_reference = "SS", condition_colname = "aggregate", sender = c("Mono"), ligand_target_matrix = ligand_target_matrix, weighted_networks = weighted_networks, lr_network = lr_network) expect_type(nichenet_output,"list") - nichenet_output = nichenet_seuratobj_aggregate_cluster_de(seurat_obj = seurat_object_lite, receiver_affected = "CD8 T", receiver_reference = "CD8 T", condition_oi = "LCMV", condition_reference = "SS", condition_colname = "aggregate", sender = "undefined", ligand_target_matrix = ligand_target_matrix, weighted_networks = weighted_networks, lr_network = lr_network, organism = "mouse") + nichenet_output = nichenet_seuratobj_aggregate_cluster_de(seurat_obj = seurat_object_lite, receiver_affected = "CD8 T", receiver_reference = "CD8 T", condition_oi = "LCMV", condition_reference = "SS", condition_colname = "aggregate", sender = "undefined", ligand_target_matrix = ligand_target_matrix, weighted_networks = weighted_networks, lr_network = lr_network) expect_type(nichenet_output,"list") - nichenet_output = nichenet_seuratobj_cluster_de(seurat_obj = seurat_object_lite, receiver_affected = "CD8 T", receiver_reference = "Mono", sender = "undefined", ligand_target_matrix = ligand_target_matrix, weighted_networks = weighted_networks, lr_network = lr_network, organism = "mouse") + nichenet_output = nichenet_seuratobj_cluster_de(seurat_obj = seurat_object_lite, receiver_affected = "CD8 T", receiver_reference = "Mono", sender = "undefined", ligand_target_matrix = ligand_target_matrix, weighted_networks = weighted_networks, lr_network = lr_network) expect_type(nichenet_output,"list") lfc_output = get_lfc_celltype(seurat_obj = seurat_object_lite, celltype_oi = "CD8 T", condition_colname = "aggregate", condition_oi = "LCMV", condition_reference = "SS", expression_pct = 0.10) @@ -140,7 +140,7 @@ test_that("Target gene prediction can be predicted by multi-ligand models", { target_prediction_performances_discrete_cv = gene_predictions_list %>% lapply(calculate_fraction_top_predicted,quantile_cutoff = 0.66) %>% bind_rows() expect_type(target_prediction_performances_discrete_cv,"list") - target_prediction_performances_fisher_pval = gene_predictions_list %>% lapply(calculate_fraction_top_predicted_fisher,quantile_cutoff = 0.66) %>% unlist() %>% mean() + target_prediction_performances_fisher_pval = gene_predictions_list %>% lapply(calculate_fraction_top_predicted_fisher,quantile_cutoff = 0.5) %>% unlist() %>% mean() expect_type(target_prediction_performances_fisher_pval,"double") }) @@ -174,6 +174,8 @@ test_that("Single-cell ligand activity prediction functions work a bit OK", { expect_type(normalized_ligand_activities,"list") cell_scores_tbl = tibble(cell = cell_ids, score = c(1,4,2,3)) + # Since changing the metric, these return zeros so standard deviation is zero + normalized_ligand_activities <- cbind(normalized_ligand_activities %>% select(cell), normalized_ligand_activities %>% select(-cell) + matrix(rnorm(12, sd=0.01),4)) regression_analysis_output = single_ligand_activity_score_regression(normalized_ligand_activities,cell_scores_tbl) expect_type(regression_analysis_output,"list") diff --git a/tests/testthat/test-differential_nichenet.R b/tests/testthat/test-differential_nichenet.R index e7924ab..c46a567 100644 --- a/tests/testthat/test-differential_nichenet.R +++ b/tests/testthat/test-differential_nichenet.R @@ -1,15 +1,11 @@ context("Differential NicheNet") test_that("Differential NicheNet pipeline works", { - - ligand_target_matrix = readRDS(url("https://zenodo.org/record/3260758/files/ligand_target_matrix.rds")) - colnames(ligand_target_matrix) = ligand_target_matrix %>% colnames() %>% convert_human_to_mouse_symbols() - rownames(ligand_target_matrix) = ligand_target_matrix %>% rownames() %>% convert_human_to_mouse_symbols() + options(timeout = 3600) + ligand_target_matrix = readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final_mouse.rds")) ligand_target_matrix = ligand_target_matrix %>% .[!is.na(rownames(ligand_target_matrix)), !is.na(colnames(ligand_target_matrix))] - lr_network = readRDS(url("https://zenodo.org/record/3260758/files/lr_network.rds")) - lr_network = lr_network %>% mutate(from = convert_human_to_mouse_symbols(from), to = convert_human_to_mouse_symbols(to)) %>% tidyr::drop_na() - lr_network = lr_network %>% mutate(bonafide = ! database %in% c("ppi_prediction","ppi_prediction_go")) - lr_network = lr_network %>% dplyr::rename(ligand = from, receptor = to) %>% distinct(ligand, receptor, bonafide) + lr_network = readRDS(url("https://zenodo.org/record/7074291/files/lr_network_mouse_21122021.rds")) + lr_network = lr_network %>% dplyr::rename(ligand = from, receptor = to) %>% distinct(ligand, receptor) seurat_object_lite = readRDS(url("https://zenodo.org/record/3531889/files/seuratObj_test.rds")) seurat_object_lite@meta.data$celltype_aggregate = paste(seurat_object_lite@meta.data$celltype, seurat_object_lite@meta.data$aggregate,sep = "_") # user adaptation required on own dataset @@ -94,7 +90,7 @@ test_that("Differential NicheNet pipeline works", { length(geneset_niche2) top_n_target = 250 - + niche_geneset_list = list( "LCMV_niche" = list( "receiver" = niches[[1]]$receiver, @@ -128,7 +124,7 @@ test_that("Differential NicheNet pipeline works", { inner_join(exprs_tbl_ligand, by = c("ligand")) %>% inner_join(exprs_tbl_receptor, by = c("receptor")) %>% inner_join(DE_sender_receiver %>% distinct(niche, sender, receiver)) - ligand_scaled_receptor_expression_fraction_df = exprs_sender_receiver %>% group_by(ligand, receiver) %>% mutate(rank_receptor_expression = dense_rank(receptor_expression), rank_receptor_fraction = dense_rank(receptor_fraction)) %>% mutate(ligand_scaled_receptor_expression_fraction = 0.5*( (rank_receptor_fraction / max(rank_receptor_fraction)) + ((rank_receptor_expression / max(rank_receptor_expression))) ) ) %>% distinct(ligand, receptor, receiver, ligand_scaled_receptor_expression_fraction, bonafide) %>% distinct() %>% ungroup() + ligand_scaled_receptor_expression_fraction_df = exprs_sender_receiver %>% group_by(ligand, receiver) %>% mutate(rank_receptor_expression = dense_rank(receptor_expression), rank_receptor_fraction = dense_rank(receptor_fraction)) %>% mutate(ligand_scaled_receptor_expression_fraction = 0.5*( (rank_receptor_fraction / max(rank_receptor_fraction)) + ((rank_receptor_expression / max(rank_receptor_expression))) ) ) %>% distinct(ligand, receptor, receiver, ligand_scaled_receptor_expression_fraction) %>% distinct() %>% ungroup() prioritizing_weights = c("scaled_ligand_score" = 5, "scaled_ligand_expression_scaled" = 1, @@ -140,8 +136,7 @@ test_that("Differential NicheNet pipeline works", { "ligand_scaled_receptor_expression_fraction" = 1, "scaled_receptor_score_spatial" = 0, "scaled_activity" = 0, - "scaled_activity_normalized" = 1, - "bona_fide" = 1) + "scaled_activity_normalized" = 1) output = list(DE_sender_receiver = DE_sender_receiver, ligand_scaled_receptor_expression_fraction_df = ligand_scaled_receptor_expression_fraction_df, sender_spatial_DE_processed = sender_spatial_DE_processed, receiver_spatial_DE_processed = receiver_spatial_DE_processed, ligand_activities_targets = ligand_activities_targets, DE_receiver_processed_targets = DE_receiver_processed_targets, exprs_tbl_ligand = exprs_tbl_ligand, exprs_tbl_receptor = exprs_tbl_receptor, exprs_tbl_target = exprs_tbl_target) diff --git a/tests/testthat/test-symbol_conversion.R b/tests/testthat/test-symbol_conversion.R index 95fc4ce..f3bf60f 100644 --- a/tests/testthat/test-symbol_conversion.R +++ b/tests/testthat/test-symbol_conversion.R @@ -27,6 +27,7 @@ test_that("human-mouse and symbol-alias conversion works", { expect_type(aliases,"character") }) test_that("Seurat alias conversion works", { + options(timeout = 3600) seurat_object_lite = readRDS(url("https://zenodo.org/record/3531889/files/seuratObj_test.rds")) seurat_object_lite2 = seurat_object_lite %>% alias_to_symbol_seurat(organism = "mouse") testthat::expect_equal(typeof(seurat_object_lite2), "S4") diff --git a/vignettes/circos.rmd b/vignettes/circos.Rmd similarity index 58% rename from vignettes/circos.rmd rename to vignettes/circos.Rmd index 31d49f2..e98ee2a 100644 --- a/vignettes/circos.rmd +++ b/vignettes/circos.Rmd @@ -1,7 +1,7 @@ --- title: "Circos plot visualization to show active ligand-target links between interacting cells" -author: "Robin Browaeys" -date: "3-7-2019" +author: "Robin Browaeys & Chananchida Sang-aram" +date: "2023-07-20" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Circos plot visualization to show active ligand-target links between interacting cells} @@ -61,7 +61,7 @@ expressed_genes_malignant = expression[malignant_ids,] %>% apply(2,function(x){1 ### Load the ligand-target model we want to use ```{r} -ligand_target_matrix = readRDS(url("https://zenodo.org/record/3260758/files/ligand_target_matrix.rds")) +ligand_target_matrix = readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final.rds")) ligand_target_matrix[1:5,1:5] # target genes in rows, ligands in columns ``` @@ -70,8 +70,7 @@ ligand_target_matrix[1:5,1:5] # target genes in rows, ligands in columns As gene set of interest, we consider the genes of which the expression is possibly affected due to communication with other cells. -Because we here want to investigate how CAFs and endothelial cells regulate the expression of p-EMT genes in malignant cells, we will use the p-EMT gene set defined by Puram et al. as gene set of interset and use all genes expressed in malignant cells as background of genes. - +Because we here want to investigate how CAFs and endothelial cells regulate the expression of p-EMT genes in malignant cells, we will use the p-EMT gene set defined by Puram et al. as gene set of interest and use all genes expressed in malignant cells as background of genes. ```{r} pemt_geneset = readr::read_tsv(url("https://zenodo.org/record/3260758/files/pemt_signature.txt"), col_names = "gene") %>% pull(gene) %>% .[. %in% rownames(ligand_target_matrix)] # only consider genes also present in the NicheNet model - this excludes genes from the gene list for which the official HGNC symbol was not used by Puram et al. @@ -88,7 +87,7 @@ In a first step, we will define a set of potentially active ligands. As potentia Note that we combine the ligands from CAFs and endothelial cells in one ligand activity analysis now. Later on, we will look which of the top-ranked ligands is mainly expressed by which of both cell types. ```{r} -lr_network = readRDS(url("https://zenodo.org/record/3260758/files/lr_network.rds")) +lr_network = readRDS(url("https://zenodo.org/record/7074291/files/lr_network_human_21122021.rds")) ligands = lr_network %>% pull(from) %>% unique() expressed_ligands_CAFs = intersect(ligands,expressed_genes_CAFs) @@ -108,11 +107,11 @@ Now perform the ligand activity analysis: infer how well NicheNet's ligand-targe ligand_activities = predict_ligand_activities(geneset = pemt_geneset, background_expressed_genes = background_expressed_genes, ligand_target_matrix = ligand_target_matrix, potential_ligands = potential_ligands) ``` -Now, we want to rank the ligands based on their ligand activity. In our validation study, we showed that the pearson correlation between a ligand's target predictions and the observed transcriptional response was the most informative measure to define ligand activity. Therefore, we will rank the ligands based on their pearson correlation coefficient. +Now, we want to rank the ligands based on their ligand activity. In our validation study, we showed that the AUPR between a ligand's target predictions and the observed transcriptional response was the most informative measure to define ligand activity. Therefore, we will rank the ligands based on their AUPR. We will choose the top 20 ligands here - as opposed to the top 30 in the main vignette - to avoid overcrowding the circos plot. ```{r} -ligand_activities %>% arrange(-pearson) -best_upstream_ligands = ligand_activities %>% top_n(20, pearson) %>% arrange(-pearson) %>% pull(test_ligand) +ligand_activities %>% arrange(-aupr_corrected) +best_upstream_ligands = ligand_activities %>% top_n(20, aupr_corrected) %>% arrange(-aupr_corrected) %>% pull(test_ligand) head(best_upstream_ligands) ``` @@ -238,7 +237,7 @@ chordDiagram(links_circle, directional = 1,order=order,link.sort = TRUE, link.de circos.track(track.index = 1, panel.fun = function(x, y) { circos.text(CELL_META$xcenter, CELL_META$ylim[1], CELL_META$sector.index, facing = "clockwise", niceFacing = TRUE, adj = c(0, 0.55), cex = 1) -}, bg.border = NA) # +}, bg.border = NA) circos.clear() ``` @@ -272,6 +271,7 @@ circos.clear() dev.off() ``` + ### Visualize ligand-receptor interactions of the prioritized ligands in a circos plot ```{r} @@ -346,7 +346,7 @@ Render the circos plot (all links same transparancy). Only the widths of the blo ```{r, fig.width=8, fig.height=8} circos.par(gap.degree = gaps) -chordDiagram(links_circle, directional = 1,order=order,link.sort = TRUE, link.decreasing = FALSE, grid.col = grid_col,transparency = 0, diffHeight = 0.005, direction.type = c("diffHeight", "arrows"),link.arr.type = "big.arrow", link.visible = links_circle$weight >= cutoff_include_all_ligands,annotationTrack = "grid", +chordDiagram(links_circle, directional = 1, order=order, link.sort = TRUE, link.decreasing = FALSE, grid.col = grid_col,transparency = 0, diffHeight = 0.005, direction.type = c("diffHeight", "arrows"),link.arr.type = "big.arrow", link.visible = links_circle$weight >= cutoff_include_all_ligands,annotationTrack = "grid", preAllocateTracks = list(track.height = 0.075)) # we go back to the first track and customize sector labels circos.track(track.index = 1, panel.fun = function(x, y) { @@ -386,11 +386,276 @@ circos.clear() dev.off() ``` -### Remark on making a ligand-receptor-target circos plot -In the paper of Bonnardel, T'Jonck et al. [Stellate Cells, Hepatocytes, and Endothelial Cells Imprint the Kupffer Cell Identity on Monocytes Colonizing the Liver Macrophage Niche](https://www.cell.com/immunity/fulltext/S1074-7613(19)30368-1), we showed in Fig. 6B a ligand-receptor-target circos plot to visualize the main NicheNet predictions. This “ligand-receptor-target” circos plot was made by making first two separate circos plots: the ligand-target and ligand-receptor circos plot. Then these circos plots were overlayed in Inkscape (with the center of the two circles at the same location and the ligand-receptor circos plot bigger than the ligand-target one). To generate the combined circos plot as shown ni Fig. 6B, we then manually removed all elements of the ligand-receptor circos plot except the outer receptor layer. In the near future, we will be working on a solution to generate this ligand-receptor-target circos plot in a fully automated manner. +### Adding an outer track to the circos plot (ligand-receptor-target circos plot) + +In the paper of Bonnardel, T'Jonck et al. [Stellate Cells, Hepatocytes, and Endothelial Cells Imprint the Kupffer Cell Identity on Monocytes Colonizing the Liver Macrophage Niche](https://www.cell.com/immunity/fulltext/S1074-7613(19)30368-1), we showed in Fig. 6B a ligand-receptor-target circos plot to visualize the main NicheNet predictions. This “ligand-receptor-target” circos plot was made by making first two separate circos plots: the ligand-target and ligand-receptor circos plot. Then these circos plots were overlayed in Inkscape (with the center of the two circles at the same location and the ligand-receptor circos plot bigger than the ligand-target one). To generate the combined circos plot as shown in Fig. 6B, we then manually removed all elements of the ligand-receptor circos plot except the outer receptor layer. + +It is also possible to generate this kind of plot programmatically, given that you are able to group your ligands. Below, we demonstrate two approaches that uses either the `circlize::draw.sector` or `circlize::highlight.sector` function. The former is more complicated but gives you the flexibility to draw receptor arcs of different lengths, while the `highlight.sector` function is constrained to the widths of the targets in the inner track. + +First, let's rerun a code chunk from above to redefine `circos_links` and the color scheme. + +```{r} +circos_links = active_ligand_target_links_df %>% filter(!target %in% targets_to_remove &!ligand %in% ligands_to_remove) + +grid_col_ligand =c("General" = "lawngreen", + "CAF-specific" = "royalblue", + "Endothelial-specific" = "gold") +grid_col_target =c( + "p_emt" = "tomato") + +grid_col_tbl_ligand = tibble(ligand_type = grid_col_ligand %>% names(), color_ligand_type = grid_col_ligand) +grid_col_tbl_target = tibble(target_type = grid_col_target %>% names(), color_target_type = grid_col_target) + +circos_links = circos_links %>% mutate(ligand = paste(ligand," ")) # extra space: make a difference between a gene as ligand and a gene as target! +circos_links = circos_links %>% inner_join(grid_col_tbl_ligand) %>% inner_join(grid_col_tbl_target) +links_circle = circos_links %>% select(ligand, target, weight) + +ligand_color = circos_links %>% distinct(ligand,color_ligand_type) +grid_ligand_color = ligand_color$color_ligand_type %>% set_names(ligand_color$ligand) +target_color = circos_links %>% distinct(target,color_target_type) +grid_target_color = target_color$color_target_type %>% set_names(target_color$target) -If you would want to split up target genes and receptors in different groups according to signaling pathway (as done in mentioned paper), then you first need to define these groups in a specific data frame in advance (cf what is shown for ligands in the `ligand_type_indication_df `in the vignette). When you then want to overlay receptors in this case, you need to make sure that the ligand-receptor weights of receptors in one group are proportional to the ligand-target weights of the targets in that group (to generate the nice overlay effect). So in that case, the ligand-receptor weights are proportional to the ‘underlying’ ligand-target regulatory potential scores and not reflective of prior information supporting the specific ligand-receptor interaction (as shown in this vignette for ligand-receptor circos plots). +grid_col = c(grid_ligand_color,grid_target_color) + +ligand_order = c(CAF_specific_ligands,general_ligands,endothelial_specific_ligands) %>% c(paste(.," ")) %>% intersect(circos_links$ligand) +``` + + +#### Using the `draw.sector` function + +For demonstration purposes, we will use a subset of ligands and divide them into four groups that differ in the signaling pathways they interact with. Then, we assign targets and receptors to each ligand group based on their relative rankings (and summed weights in case the rankings are the same). The way we assigned targets and receptors to ligand groups here does not always lead to the most biologically meaningful results, as you will see in the final plot. Hence, it is best if you curate this list manually, e.g., through prior knowledge and by also looking at the ligand-target and ligand-receptor heatmap. Also keep in mind that there is no real correspondence between receptors and targets, as explained more [here](https://github.com/saeyslab/nichenetr/issues/20#issuecomment-611601039). + +```{r} +groups <- list(group1 = c("TGFB2", "ENG"), + group2 = grep("BMP|GDF|INHBA", best_upstream_ligands, value = TRUE), + group3 = grep("COL|MMP|TIMP", best_upstream_ligands, value = TRUE), + group4 = "CXCL12") + +# Create list of targets for each group of ligand +targets <- lapply(names(groups), function(i) { + # Rank each target for each ligand + active_ligand_target_links_df %>% group_by(ligand) %>% mutate(target_rank = dense_rank(desc(weight))) %>% + filter(ligand %in% groups[[i]]) %>% + # Make two metrics for each target -> summed weight and avg_rank + group_by(target) %>% + summarise(n = n(), summed_weight=sum(weight), avg_rank = mean(target_rank)) %>% + # Only keep targets that are connected to at least half of ligands in the group + filter(n > (length(groups[[i]])/2)) %>% mutate(type = i) +}) %>% do.call(rbind, .) + +# Check if any targets are distinct per group (groups 2 and 4 have some) +lapply(paste0("group", 1:5), function(group_name) setdiff(targets %>% filter(type == group_name) %>% pull(target), targets %>% filter(type != group_name) %>% pull(target))) + +# Assign target to a specific group, first based on ranking and second based on weight +targets_filtered <- targets %>% group_by(target) %>% filter(avg_rank == min(avg_rank)) %>% + filter(summed_weight == max(summed_weight)) %>% ungroup() + +# Do the same for receptors +receptor_colors <- c("#387D7A", "#9DA9A0", "#DD7373", "#725752") %>% setNames(names(groups)) +receptors <- lapply(names(groups), function(i) { + weighted_networks$lr_sig %>% filter(from %in% groups[[i]] & to %in% best_upstream_receptors) %>% + group_by(from) %>% mutate(receptor_rank = dense_rank(desc(weight))) %>% + group_by(to) %>% summarise(summed_weight=sum(weight), avg_weight=mean(weight), avg_rank = mean(receptor_rank)) %>% + mutate(type=i, color = receptor_colors[i]) %>% rename(receptor = to) +}) %>% do.call(rbind, .) + +# Check distinct receptors per group +lapply(paste0("group", 1:5), function(group_name) setdiff(receptors %>% filter(type == group_name) %>% pull(receptor), receptors %>% filter(type != group_name) %>% pull(receptor))) + +# Assign receptor to a specific group +receptors_filtered <- receptors %>% group_by(receptor) %>% filter(avg_rank == min(avg_rank)) %>% + filter(summed_weight == max(summed_weight)) %>% ungroup() +``` + +We will then have to redefine some variables. + +```{r} +# Filter out targets and ligands that are no longer present +links_circle_approach1 <- links_circle %>% filter(ligand %in% paste0(unlist(groups), " "), + target %in% targets_filtered$target) + +order <- c(ligand_order %>%.[. %in% paste0(unlist(groups), " ")], targets_filtered$target) + +# Redefine gaps between sectors +width_same_cell_same_ligand_type = 0.6 +width_different_cell = 4.5 +width_ligand_target = 12 +width_same_cell_same_target_type = 0.6 # Added +width_different_target = 4.5 # Added + +group_widths <- sapply(paste0("group", 1:4), function(group) { + # Gap between targets of the same group + paste0(rep(width_same_cell_same_target_type, times =(targets_filtered %>% filter(type==group) %>% nrow)-1), collapse=",")}) %>% + # Separate this with gap of different group + paste0(., collapse=paste0(",",width_different_target,",")) %>% + str_split(., ",") %>% .[[1]] %>% as.numeric + +gaps = c( + rep(width_same_cell_same_ligand_type, times = (circos_links %>% filter(ligand_type == "CAF-specific", + ligand %in% paste0(unlist(groups), " "), + target %in% targets_filtered$target) %>% + distinct(ligand) %>% nrow() -1)), + width_different_cell, + rep(width_same_cell_same_ligand_type, times = (circos_links %>% filter(ligand_type == "General", + ligand %in% paste0(unlist(groups), " "), + target %in% targets_filtered$target) %>% + distinct(ligand) %>% nrow() -1)), + width_different_cell, + rep(width_same_cell_same_ligand_type, times = (circos_links %>% filter(ligand_type == "Endothelial-specific", + ligand %in% paste0(unlist(groups), " "), + target %in% targets_filtered$target) %>% + distinct(ligand) %>% nrow() -1)), + width_ligand_target, + group_widths, + width_ligand_target + ) + +gaps = gaps %>% .[!is.na(.)] + +``` + +Finally, we create the plot. What's different here is we add an extra layer in `preAllocateTracks`, and we add a `for` loop at the end to draw the outer layer. As mentioned previously, the resulting plot will require some further manual tweaking (e.g., ITG receptors from group 1 should be moved to group 3). + +```{r, fig.width=8, fig.height=8} +circos.par(gap.degree = gaps) + +chordDiagram(links_circle_approach1, order=order, transparency=0, directional = 1, link.sort = TRUE, link.decreasing = FALSE, + grid.col = grid_col, diffHeight = 0.005, direction.type = c("diffHeight", "arrows"), + link.arr.type = "big.arrow", link.visible = links_circle$weight >= cutoff_include_all_ligands, annotationTrack = "grid", + # Add extra track for outer layer + preAllocateTracks = list(list(track.height = 0.025), + list(track.height = 0.25))) + +# we go back to the first track and customize sector labels +circos.track(track.index = 2, panel.fun = function(x, y) { + circos.text(CELL_META$xcenter, CELL_META$ylim[1], CELL_META$sector.index, + facing = "clockwise", niceFacing = TRUE, adj = c(0, 0.55), cex = 0.8) +}, bg.border = NA) # + + +padding <- 0.05 # Gaps between receptor arcs +rou_adjustment <- 0.08 # Might need adjustment +for (group in unique(targets_filtered$type)){ + # Subset target and receptor + targets_subset <- targets_filtered %>% filter(type == group) + receptors_subset <- receptors_filtered %>% filter(type == group) + + # Get approximate position of outer ring + pos <- circlize(c(0, 1), c(0, 1), track.index = 1) + rou1 <- pos[1, "rou"]-rou_adjustment + rou2 <- pos[2, "rou"]-rou_adjustment + + # Get range of angles from first to last target of the current group + theta1 <- circlize:::get.sector.data((targets_subset %>% pull(target) %>% .[1]))["start.degree"] + theta2 <- circlize:::get.sector.data((targets_subset %>% pull(target) %>% .[length(.)]))["end.degree"] + + # Scale the arc lengths according to the summed ligand-receptor weights + receptors_subset_scaled <- receptors_subset %>% mutate(scaled_weight = summed_weight/sum(summed_weight)*(theta1-theta2)) + # For each receptor + current_theta <- theta1 + for (i in 1:nrow(receptors_subset_scaled)){ + # Get end angle of the arc + end_theta <- current_theta-(receptors_subset_scaled %>% slice(i) %>% pull(scaled_weight)) + d1 <- abs(end_theta-current_theta) # For gaps + + # Main function - we draw the arc here + draw.sector(current_theta+(d1*-padding), end_theta-(d1*-padding), + rou1, rou2, + col = receptors_subset_scaled %>% slice(i) %>% pull(color), + clock.wise = TRUE, border=NA) + + # Add text containing receptor name + pos_text <- reverse.circlize((current_theta + end_theta)/2 + ifelse(current_theta < end_theta, 180, 0), (rou1 + rou2)/2) + # It's going to give a Note that point is out of plotting region + suppressMessages(circos.text(pos_text[1,1], pos_text[1,2]+convert_y(7, "mm"), + labels = receptors_subset_scaled %>% slice(i) %>% pull(receptor), + niceFacing=TRUE, facing="clockwise", cex=0.6)) + + current_theta <- end_theta + } + +} + + +circos.clear() +``` + +#### Using the `highlight.sector` function + +With this function, it is not possible to draw receptor arcs that end at different points from the target arcs. +To demonstrate this, we will randomly assign the target genes into one of three groups (Receptors A, B, and C). + +```{r} +target_gene_groups <- sample(c("Receptor A", "Receptor B", "Receptor C"), length(unique(circos_links$target)), replace = TRUE) %>% + setNames(unique(circos_links$target)) +target_gene_groups + +target_gene_group_colors <- c("#387D7A", "#9DA9A0", "#704E2E") %>% setNames(unique(target_gene_groups)) +``` + +Again, we will redefine some variables. + +```{r} +# Order targets according to receptor they belong to +order = c(ligand_order, target_gene_groups %>% sort %>% names) + +# Redefine gaps between sectors +width_same_cell_same_ligand_type = 0.6 +width_different_cell = 4.5 +width_ligand_target = 12 +width_same_cell_same_target_type = 0.6 # Added +width_different_target = 4.5 # Added + +# Add this to circos_links +circos_links = circos_links %>% mutate(target_receptor = target_gene_groups[target]) + +gaps = c( + rep(width_same_cell_same_ligand_type, times = (circos_links %>% filter(ligand_type == "CAF-specific") %>% distinct(ligand) %>% nrow() -1)), + width_different_cell, + rep(width_same_cell_same_ligand_type, times = (circos_links %>% filter(ligand_type == "General") %>% distinct(ligand) %>% nrow() -1)), + width_different_cell, + rep(width_same_cell_same_ligand_type, times = (circos_links %>% filter(ligand_type == "Endothelial-specific") %>% distinct(ligand) %>% nrow() -1)), + width_ligand_target, + # Add code to define gaps between different target groups + rep(width_same_cell_same_target_type, times = (circos_links %>% filter(target_receptor == "Receptor A") %>% distinct(target) %>% nrow() -1)), + width_different_target, + rep(width_same_cell_same_target_type, times = (circos_links %>% filter(target_receptor == "Receptor B") %>% distinct(target) %>% nrow() -1)), + width_different_target, + rep(width_same_cell_same_target_type, times = (circos_links %>% filter(target_receptor == "Receptor C") %>% distinct(target) %>% nrow() -1)), + width_ligand_target + ) + +``` + +The general idea here is similar - adding an extra layer in `preAllocateTracks` and a `for` loop at the end to draw the outer layer - but the function allows for much cleaner code. + +```{r, fig.width=8, fig.height=8} +circos.par(gap.degree = gaps) +chordDiagram(links_circle, directional = 1, transparency=0, order=order,link.sort = TRUE, link.decreasing = FALSE, + grid.col = grid_col, diffHeight = 0.005, direction.type = c("diffHeight", "arrows"), + link.arr.type = "big.arrow", link.visible = links_circle$weight >= cutoff_include_all_ligands,annotationTrack = "grid", + # Add extra track for outer layer + preAllocateTracks = list(list(track.height = 0.025), + list(track.height = 0.2))) + +# we go back to the first track and customize sector labels +circos.track(track.index = 2, panel.fun = function(x, y) { + circos.text(CELL_META$xcenter, CELL_META$ylim[1], CELL_META$sector.index, + facing = "clockwise", niceFacing = TRUE, adj = c(0, 0.55), cex = 0.8) +}, bg.border = NA) # + +# Add outer layer +for (target_gene_group in unique(target_gene_groups)){ + highlight.sector(target_gene_groups %>% .[. == target_gene_group] %>% names, + track.index = 1, + col = target_gene_group_colors[target_gene_group], + text = target_gene_group, + cex = 0.8, facing="bending.inside", niceFacing = TRUE, text.vjust = "5mm") +} + +circos.clear() +``` ### References diff --git a/vignettes/circos.md b/vignettes/circos.md index 7f4325f..78ed868 100644 --- a/vignettes/circos.md +++ b/vignettes/circos.md @@ -1,8 +1,8 @@ Circos plot visualization to show active ligand-target links between interacting cells ================ -Robin Browaeys -3-7-2019 +Robin Browaeys & Chananchida Sang-aram +2023-07-20 +![](circos_files/figure-gfm/unnamed-chunk-94-1.png) ``` r circos.clear() @@ -327,7 +322,7 @@ circos.track(track.index = 1, panel.fun = function(x, y) { }, bg.border = NA) # ``` -![](circos_files/figure-gfm/unnamed-chunk-16-1.png) +![](circos_files/figure-gfm/unnamed-chunk-95-1.png) ``` r circos.clear() @@ -427,7 +422,7 @@ interaction). ``` r circos.par(gap.degree = gaps) -chordDiagram(links_circle, directional = 1,order=order,link.sort = TRUE, link.decreasing = FALSE, grid.col = grid_col,transparency = 0, diffHeight = 0.005, direction.type = c("diffHeight", "arrows"),link.arr.type = "big.arrow", link.visible = links_circle$weight >= cutoff_include_all_ligands,annotationTrack = "grid", +chordDiagram(links_circle, directional = 1, order=order, link.sort = TRUE, link.decreasing = FALSE, grid.col = grid_col,transparency = 0, diffHeight = 0.005, direction.type = c("diffHeight", "arrows"),link.arr.type = "big.arrow", link.visible = links_circle$weight >= cutoff_include_all_ligands,annotationTrack = "grid", preAllocateTracks = list(track.height = 0.075)) # we go back to the first track and customize sector labels circos.track(track.index = 1, panel.fun = function(x, y) { @@ -436,7 +431,7 @@ circos.track(track.index = 1, panel.fun = function(x, y) { }, bg.border = NA) # ``` -![](circos_files/figure-gfm/unnamed-chunk-22-1.png) +![](circos_files/figure-gfm/unnamed-chunk-101-1.png) ``` r circos.clear() @@ -457,7 +452,7 @@ circos.track(track.index = 1, panel.fun = function(x, y) { }, bg.border = NA) # ``` -![](circos_files/figure-gfm/unnamed-chunk-23-1.png) +![](circos_files/figure-gfm/unnamed-chunk-102-1.png) ``` r circos.clear() @@ -481,37 +476,345 @@ dev.off() ## 2 ``` -### Remark on making a ligand-receptor-target circos plot +### Adding an outer track to the circos plot (ligand-receptor-target circos plot) In the paper of Bonnardel, T’Jonck et al. [Stellate Cells, Hepatocytes, and Endothelial Cells Imprint the Kupffer Cell Identity on Monocytes Colonizing the Liver Macrophage -Niche](https://www.cell.com/immunity/fulltext/S1074-7613\(19\)30368-1), -we showed in Fig. 6B a ligand-receptor-target circos plot to visualize -the main NicheNet predictions. This “ligand-receptor-target” circos plot -was made by making first two separate circos plots: the ligand-target -and ligand-receptor circos plot. Then these circos plots were overlayed -in Inkscape (with the center of the two circles at the same location and +Niche](https://www.cell.com/immunity/fulltext/S1074-7613(19)30368-1), we +showed in Fig. 6B a ligand-receptor-target circos plot to visualize the +main NicheNet predictions. This “ligand-receptor-target” circos plot was +made by making first two separate circos plots: the ligand-target and +ligand-receptor circos plot. Then these circos plots were overlayed in +Inkscape (with the center of the two circles at the same location and the ligand-receptor circos plot bigger than the ligand-target one). To -generate the combined circos plot as shown ni Fig. 6B, we then manually +generate the combined circos plot as shown in Fig. 6B, we then manually removed all elements of the ligand-receptor circos plot except the outer -receptor layer. In the near future, we will be working on a solution to -generate this ligand-receptor-target circos plot in a fully automated -manner. - -If you would want to split up target genes and receptors in different -groups according to signaling pathway (as done in mentioned paper), then -you first need to define these groups in a specific data frame in -advance (cf what is shown for ligands in the -`ligand_type_indication_df`in the vignette). When you then want to -overlay receptors in this case, you need to make sure that the -ligand-receptor weights of receptors in one group are proportional to -the ligand-target weights of the targets in that group (to generate the -nice overlay effect). So in that case, the ligand-receptor weights are -proportional to the ‘underlying’ ligand-target regulatory potential -scores and not reflective of prior information supporting the specific -ligand-receptor interaction (as shown in this vignette for -ligand-receptor circos plots). +receptor layer. + +It is also possible to generate this kind of plot programmatically, +given that you are able to group your ligands. Below, we demonstrate two +approaches that uses either the `circlize::draw.sector` or +`circlize::highlight.sector` function. The former is more complicated +but gives you the flexibility to draw receptor arcs of different +lengths, while the `highlight.sector` function is constrained to the +widths of the targets in the inner track. + +First, let’s rerun a code chunk from above to redefine `circos_links` +and the color scheme. + +``` r +circos_links = active_ligand_target_links_df %>% filter(!target %in% targets_to_remove &!ligand %in% ligands_to_remove) + +grid_col_ligand =c("General" = "lawngreen", + "CAF-specific" = "royalblue", + "Endothelial-specific" = "gold") +grid_col_target =c( + "p_emt" = "tomato") + +grid_col_tbl_ligand = tibble(ligand_type = grid_col_ligand %>% names(), color_ligand_type = grid_col_ligand) +grid_col_tbl_target = tibble(target_type = grid_col_target %>% names(), color_target_type = grid_col_target) + +circos_links = circos_links %>% mutate(ligand = paste(ligand," ")) # extra space: make a difference between a gene as ligand and a gene as target! +circos_links = circos_links %>% inner_join(grid_col_tbl_ligand) %>% inner_join(grid_col_tbl_target) +links_circle = circos_links %>% select(ligand, target, weight) + +ligand_color = circos_links %>% distinct(ligand,color_ligand_type) +grid_ligand_color = ligand_color$color_ligand_type %>% set_names(ligand_color$ligand) +target_color = circos_links %>% distinct(target,color_target_type) +grid_target_color = target_color$color_target_type %>% set_names(target_color$target) + +grid_col = c(grid_ligand_color,grid_target_color) + +ligand_order = c(CAF_specific_ligands,general_ligands,endothelial_specific_ligands) %>% c(paste(.," ")) %>% intersect(circos_links$ligand) +``` + +#### Using the `draw.sector` function + +For demonstration purposes, we will use a subset of ligands and divide them into four groups that differ in the signaling pathways they interact with. Then, we assign targets and receptors to each ligand group based on their relative rankings (and summed weights in case the rankings are the same). The way we assigned targets and receptors to ligand groups here does not always lead to the most biologically meaningful results, as you will see in the final plot. Hence, it is best if you curate this list manually, e.g., through prior knowledge and by also looking at the ligand-target and ligand-receptor heatmap. Also keep in mind that there is no real correspondence between receptors and targets, as explained more [here](https://github.com/saeyslab/nichenetr/issues/20#issuecomment-611601039). + +``` r +groups <- list(group1 = c("TGFB2", "ENG"), + group2 = grep("BMP|GDF|INHBA", best_upstream_ligands, value = TRUE), + group3 = grep("COL|MMP|TIMP", best_upstream_ligands, value = TRUE), + group4 = "CXCL12") + +# Create list of targets for each group of ligand +targets <- lapply(names(groups), function(i) { + # Rank each target for each ligand + active_ligand_target_links_df %>% group_by(ligand) %>% mutate(target_rank = dense_rank(desc(weight))) %>% + filter(ligand %in% groups[[i]]) %>% + # Make two metrics for each target -> summed weight and avg_rank + group_by(target) %>% + summarise(n = n(), summed_weight=sum(weight), avg_rank = mean(target_rank)) %>% + # Only keep targets that are connected to at least half of ligands in the group + filter(n > (length(groups[[i]])/2)) %>% mutate(type = i) +}) %>% do.call(rbind, .) + +# Check if any targets are distinct per group (groups 2 and 4 have some) +lapply(paste0("group", 1:5), function(group_name) setdiff(targets %>% filter(type == group_name) %>% pull(target), targets %>% filter(type != group_name) %>% pull(target))) +## [[1]] +## character(0) +## +## [[2]] +## [1] "DKK3" "GJA1" "HTRA1" "SEMA3C" +## +## [[3]] +## character(0) +## +## [[4]] +## [1] "LAMC2" "MMP10" "PRSS23" "SLC31A2" "TPM4" +## +## [[5]] +## character(0) + +# Assign target to a specific group, first based on ranking and second based on weight +targets_filtered <- targets %>% group_by(target) %>% filter(avg_rank == min(avg_rank)) %>% + filter(summed_weight == max(summed_weight)) %>% ungroup() + +# Do the same for receptors +receptor_colors <- c("#387D7A", "#9DA9A0", "#DD7373", "#725752") %>% setNames(names(groups)) +receptors <- lapply(names(groups), function(i) { + weighted_networks$lr_sig %>% filter(from %in% groups[[i]] & to %in% best_upstream_receptors) %>% + group_by(from) %>% mutate(receptor_rank = dense_rank(desc(weight))) %>% + group_by(to) %>% summarise(summed_weight=sum(weight), avg_weight=mean(weight), avg_rank = mean(receptor_rank)) %>% + mutate(type=i, color = receptor_colors[i]) %>% rename(receptor = to) +}) %>% do.call(rbind, .) + +# Check distinct receptors per group +lapply(paste0("group", 1:5), function(group_name) setdiff(receptors %>% filter(type == group_name) %>% pull(receptor), receptors %>% filter(type != group_name) %>% pull(receptor))) +## [[1]] +## [1] "MET" +## +## [[2]] +## character(0) +## +## [[3]] +## [1] "CD47" "ITGA2" "ITGA3" "ITGB5" "ITGB6" "ITGB8" +## +## [[4]] +## [1] "BDKRB2" +## +## [[5]] +## character(0) + +# Assign receptor to a specific group +receptors_filtered <- receptors %>% group_by(receptor) %>% filter(avg_rank == min(avg_rank)) %>% + filter(summed_weight == max(summed_weight)) %>% ungroup() +``` + +We will then have to redefine some variables. + +``` r +# Filter out targets and ligands that are no longer present +links_circle_approach1 <- links_circle %>% filter(ligand %in% paste0(unlist(groups), " "), + target %in% targets_filtered$target) + +order <- c(ligand_order %>%.[. %in% paste0(unlist(groups), " ")], targets_filtered$target) + +# Redefine gaps between sectors +width_same_cell_same_ligand_type = 0.6 +width_different_cell = 4.5 +width_ligand_target = 12 +width_same_cell_same_target_type = 0.6 # Added +width_different_target = 4.5 # Added + +group_widths <- sapply(paste0("group", 1:4), function(group) { + # Gap between targets of the same group + paste0(rep(width_same_cell_same_target_type, times =(targets_filtered %>% filter(type==group) %>% nrow)-1), collapse=",")}) %>% + # Separate this with gap of different group + paste0(., collapse=paste0(",",width_different_target,",")) %>% + str_split(., ",") %>% .[[1]] %>% as.numeric + +gaps = c( + rep(width_same_cell_same_ligand_type, times = (circos_links %>% filter(ligand_type == "CAF-specific", + ligand %in% paste0(unlist(groups), " "), + target %in% targets_filtered$target) %>% + distinct(ligand) %>% nrow() -1)), + width_different_cell, + rep(width_same_cell_same_ligand_type, times = (circos_links %>% filter(ligand_type == "General", + ligand %in% paste0(unlist(groups), " "), + target %in% targets_filtered$target) %>% + distinct(ligand) %>% nrow() -1)), + width_different_cell, + rep(width_same_cell_same_ligand_type, times = (circos_links %>% filter(ligand_type == "Endothelial-specific", + ligand %in% paste0(unlist(groups), " "), + target %in% targets_filtered$target) %>% + distinct(ligand) %>% nrow() -1)), + width_ligand_target, + group_widths, + width_ligand_target + ) + +gaps = gaps %>% .[!is.na(.)] +``` + +Finally, we create the plot. What’s different here is we add an extra +layer in `preAllocateTracks`, and we add a `for` loop at the end to draw +the outer layer. As mentioned previously, the resulting plot will +require some further manual tweaking (e.g., ITG receptors from group 1 +should be moved to group 3). + +``` r +circos.par(gap.degree = gaps) + +chordDiagram(links_circle_approach1, order=order, transparency=0, directional = 1, link.sort = TRUE, link.decreasing = FALSE, + grid.col = grid_col, diffHeight = 0.005, direction.type = c("diffHeight", "arrows"), + link.arr.type = "big.arrow", link.visible = links_circle$weight >= cutoff_include_all_ligands, annotationTrack = "grid", + # Add extra track for outer layer + preAllocateTracks = list(list(track.height = 0.025), + list(track.height = 0.25))) + +# we go back to the first track and customize sector labels +circos.track(track.index = 2, panel.fun = function(x, y) { + circos.text(CELL_META$xcenter, CELL_META$ylim[1], CELL_META$sector.index, + facing = "clockwise", niceFacing = TRUE, adj = c(0, 0.55), cex = 0.8) +}, bg.border = NA) # + + +padding <- 0.05 # Gaps between receptor arcs +rou_adjustment <- 0.08 # Might need adjustment +for (group in unique(targets_filtered$type)){ + # Subset target and receptor + targets_subset <- targets_filtered %>% filter(type == group) + receptors_subset <- receptors_filtered %>% filter(type == group) + + # Get approximate position of outer ring + pos <- circlize(c(0, 1), c(0, 1), track.index = 1) + rou1 <- pos[1, "rou"]-rou_adjustment + rou2 <- pos[2, "rou"]-rou_adjustment + + # Get range of angles from first to last target of the current group + theta1 <- circlize:::get.sector.data((targets_subset %>% pull(target) %>% .[1]))["start.degree"] + theta2 <- circlize:::get.sector.data((targets_subset %>% pull(target) %>% .[length(.)]))["end.degree"] + + # Scale the arc lengths according to the summed ligand-receptor weights + receptors_subset_scaled <- receptors_subset %>% mutate(scaled_weight = summed_weight/sum(summed_weight)*(theta1-theta2)) + # For each receptor + current_theta <- theta1 + for (i in 1:nrow(receptors_subset_scaled)){ + # Get end angle of the arc + end_theta <- current_theta-(receptors_subset_scaled %>% slice(i) %>% pull(scaled_weight)) + d1 <- abs(end_theta-current_theta) # For gaps + + # Main function - we draw the arc here + draw.sector(current_theta+(d1*-padding), end_theta-(d1*-padding), + rou1, rou2, + col = receptors_subset_scaled %>% slice(i) %>% pull(color), + clock.wise = TRUE, border=NA) + + # Add text containing receptor name + pos_text <- reverse.circlize((current_theta + end_theta)/2 + ifelse(current_theta < end_theta, 180, 0), (rou1 + rou2)/2) + # It's going to give a Note that point is out of plotting region + suppressMessages(circos.text(pos_text[1,1], pos_text[1,2]+convert_y(7, "mm"), + labels = receptors_subset_scaled %>% slice(i) %>% pull(receptor), + niceFacing=TRUE, facing="clockwise", cex=0.6)) + + current_theta <- end_theta + } + +} +``` + +![](circos_files/figure-gfm/unnamed-chunk-107-1.png) + +``` r + + +circos.clear() +``` + +#### Using the `highlight.sector` function + +With this function, it is not possible to draw receptor arcs that end at +different points from the target arcs. To demonstrate this, we will +randomly assign the target genes into one of three groups (Receptors A, +B, and C). + +``` r +target_gene_groups <- sample(c("Receptor A", "Receptor B", "Receptor C"), length(unique(circos_links$target)), replace = TRUE) %>% + setNames(unique(circos_links$target)) +target_gene_groups +## ACTN1 C1S COL17A1 COL1A1 COL4A2 F3 FSTL3 IGFBP3 ITGA5 LAMC2 MFAP2 MMP2 MYH9 PDLIM7 +## "Receptor B" "Receptor A" "Receptor A" "Receptor B" "Receptor C" "Receptor C" "Receptor B" "Receptor C" "Receptor C" "Receptor C" "Receptor B" "Receptor C" "Receptor C" "Receptor C" +## PSMD2 PTHLH SERPINE1 SERPINE2 TAGLN TGFBI TNC TPM1 MMP1 MMP10 MT2A PRSS23 SLC31A2 THBS1 +## "Receptor A" "Receptor B" "Receptor C" "Receptor A" "Receptor C" "Receptor A" "Receptor C" "Receptor B" "Receptor C" "Receptor B" "Receptor C" "Receptor C" "Receptor C" "Receptor B" +## TPM4 APP COL5A2 DKK3 GJA1 HTRA1 PLAU SEMA3C VIM CAV1 ITGA6 MAGED1 MAGED2 PLOD2 +## "Receptor A" "Receptor B" "Receptor B" "Receptor B" "Receptor C" "Receptor B" "Receptor C" "Receptor C" "Receptor B" "Receptor A" "Receptor B" "Receptor A" "Receptor B" "Receptor A" +## SLC39A14 FSTL1 LGALS1 P4HA2 IL32 FHL2 ITGB1 +## "Receptor B" "Receptor C" "Receptor C" "Receptor C" "Receptor C" "Receptor B" "Receptor C" + +target_gene_group_colors <- c("#387D7A", "#9DA9A0", "#704E2E") %>% setNames(unique(target_gene_groups)) +``` + +Again, we will redefine some variables. + +``` r +# Order targets according to receptor they belong to +order = c(ligand_order, target_gene_groups %>% sort %>% names) + +# Redefine gaps between sectors +width_same_cell_same_ligand_type = 0.6 +width_different_cell = 4.5 +width_ligand_target = 12 +width_same_cell_same_target_type = 0.6 # Added +width_different_target = 4.5 # Added + +# Add this to circos_links +circos_links = circos_links %>% mutate(target_receptor = target_gene_groups[target]) + +gaps = c( + rep(width_same_cell_same_ligand_type, times = (circos_links %>% filter(ligand_type == "CAF-specific") %>% distinct(ligand) %>% nrow() -1)), + width_different_cell, + rep(width_same_cell_same_ligand_type, times = (circos_links %>% filter(ligand_type == "General") %>% distinct(ligand) %>% nrow() -1)), + width_different_cell, + rep(width_same_cell_same_ligand_type, times = (circos_links %>% filter(ligand_type == "Endothelial-specific") %>% distinct(ligand) %>% nrow() -1)), + width_ligand_target, + # Add code to define gaps between different target groups + rep(width_same_cell_same_target_type, times = (circos_links %>% filter(target_receptor == "Receptor A") %>% distinct(target) %>% nrow() -1)), + width_different_target, + rep(width_same_cell_same_target_type, times = (circos_links %>% filter(target_receptor == "Receptor B") %>% distinct(target) %>% nrow() -1)), + width_different_target, + rep(width_same_cell_same_target_type, times = (circos_links %>% filter(target_receptor == "Receptor C") %>% distinct(target) %>% nrow() -1)), + width_ligand_target + ) +``` + +The general idea here is similar - adding an extra layer in +`preAllocateTracks` and a `for` loop at the end to draw the outer +layer - but the function allows for much cleaner code. + +``` r +circos.par(gap.degree = gaps) +chordDiagram(links_circle, directional = 1, transparency=0, order=order,link.sort = TRUE, link.decreasing = FALSE, + grid.col = grid_col, diffHeight = 0.005, direction.type = c("diffHeight", "arrows"), + link.arr.type = "big.arrow", link.visible = links_circle$weight >= cutoff_include_all_ligands,annotationTrack = "grid", + # Add extra track for outer layer + preAllocateTracks = list(list(track.height = 0.025), + list(track.height = 0.2))) + +# we go back to the first track and customize sector labels +circos.track(track.index = 2, panel.fun = function(x, y) { + circos.text(CELL_META$xcenter, CELL_META$ylim[1], CELL_META$sector.index, + facing = "clockwise", niceFacing = TRUE, adj = c(0, 0.55), cex = 0.8) +}, bg.border = NA) # + +# Add outer layer +for (target_gene_group in unique(target_gene_groups)){ + highlight.sector(target_gene_groups %>% .[. == target_gene_group] %>% names, + track.index = 1, + col = target_gene_group_colors[target_gene_group], + text = target_gene_group, + cex = 0.8, facing="bending.inside", niceFacing = TRUE, text.vjust = "5mm") +} +``` + +![](circos_files/figure-gfm/unnamed-chunk-110-1.png) + +``` r + +circos.clear() +``` ### References @@ -520,9 +823,9 @@ and Endothelial Cells Imprint the Kupffer Cell Identity on Monocytes Colonizing the Liver Macrophage Niche](https://doi.org/10.1016/j.immuni.2019.08.017) -
+
-
+
Puram, Sidharth V., Itay Tirosh, Anuraag S. Parikh, Anoop P. Patel, Keren Yizhak, Shawn Gillespie, Christopher Rodman, et al. 2017. diff --git a/vignettes/circos_files/figure-gfm/unnamed-chunk-101-1.png b/vignettes/circos_files/figure-gfm/unnamed-chunk-101-1.png new file mode 100644 index 0000000..90e6ebd Binary files /dev/null and b/vignettes/circos_files/figure-gfm/unnamed-chunk-101-1.png differ diff --git a/vignettes/circos_files/figure-gfm/unnamed-chunk-102-1.png b/vignettes/circos_files/figure-gfm/unnamed-chunk-102-1.png new file mode 100644 index 0000000..738b22b Binary files /dev/null and b/vignettes/circos_files/figure-gfm/unnamed-chunk-102-1.png differ diff --git a/vignettes/circos_files/figure-gfm/unnamed-chunk-107-1.png b/vignettes/circos_files/figure-gfm/unnamed-chunk-107-1.png new file mode 100644 index 0000000..592c468 Binary files /dev/null and b/vignettes/circos_files/figure-gfm/unnamed-chunk-107-1.png differ diff --git a/vignettes/circos_files/figure-gfm/unnamed-chunk-110-1.png b/vignettes/circos_files/figure-gfm/unnamed-chunk-110-1.png new file mode 100644 index 0000000..1dce9c9 Binary files /dev/null and b/vignettes/circos_files/figure-gfm/unnamed-chunk-110-1.png differ diff --git a/vignettes/circos_files/figure-gfm/unnamed-chunk-15-1.png b/vignettes/circos_files/figure-gfm/unnamed-chunk-15-1.png deleted file mode 100644 index 7bc3631..0000000 Binary files a/vignettes/circos_files/figure-gfm/unnamed-chunk-15-1.png and /dev/null differ diff --git a/vignettes/circos_files/figure-gfm/unnamed-chunk-16-1.png b/vignettes/circos_files/figure-gfm/unnamed-chunk-16-1.png deleted file mode 100644 index 9945e98..0000000 Binary files a/vignettes/circos_files/figure-gfm/unnamed-chunk-16-1.png and /dev/null differ diff --git a/vignettes/circos_files/figure-gfm/unnamed-chunk-203-1.png b/vignettes/circos_files/figure-gfm/unnamed-chunk-203-1.png deleted file mode 100644 index 564fbac..0000000 Binary files a/vignettes/circos_files/figure-gfm/unnamed-chunk-203-1.png and /dev/null differ diff --git a/vignettes/circos_files/figure-gfm/unnamed-chunk-204-1.png b/vignettes/circos_files/figure-gfm/unnamed-chunk-204-1.png deleted file mode 100644 index 70fd703..0000000 Binary files a/vignettes/circos_files/figure-gfm/unnamed-chunk-204-1.png and /dev/null differ diff --git a/vignettes/circos_files/figure-gfm/unnamed-chunk-22-1.png b/vignettes/circos_files/figure-gfm/unnamed-chunk-22-1.png deleted file mode 100644 index df08581..0000000 Binary files a/vignettes/circos_files/figure-gfm/unnamed-chunk-22-1.png and /dev/null differ diff --git a/vignettes/circos_files/figure-gfm/unnamed-chunk-23-1.png b/vignettes/circos_files/figure-gfm/unnamed-chunk-23-1.png deleted file mode 100644 index b32791c..0000000 Binary files a/vignettes/circos_files/figure-gfm/unnamed-chunk-23-1.png and /dev/null differ diff --git a/vignettes/circos_files/figure-gfm/unnamed-chunk-25-1.png b/vignettes/circos_files/figure-gfm/unnamed-chunk-25-1.png deleted file mode 100644 index e843f41..0000000 Binary files a/vignettes/circos_files/figure-gfm/unnamed-chunk-25-1.png and /dev/null differ diff --git a/vignettes/circos_files/figure-gfm/unnamed-chunk-25-2.png b/vignettes/circos_files/figure-gfm/unnamed-chunk-25-2.png deleted file mode 100644 index 819a7f7..0000000 Binary files a/vignettes/circos_files/figure-gfm/unnamed-chunk-25-2.png and /dev/null differ diff --git a/vignettes/circos_files/figure-gfm/unnamed-chunk-32-1.png b/vignettes/circos_files/figure-gfm/unnamed-chunk-32-1.png deleted file mode 100644 index 7bc3631..0000000 Binary files a/vignettes/circos_files/figure-gfm/unnamed-chunk-32-1.png and /dev/null differ diff --git a/vignettes/circos_files/figure-gfm/unnamed-chunk-33-1.png b/vignettes/circos_files/figure-gfm/unnamed-chunk-33-1.png deleted file mode 100644 index 9945e98..0000000 Binary files a/vignettes/circos_files/figure-gfm/unnamed-chunk-33-1.png and /dev/null differ diff --git a/vignettes/circos_files/figure-gfm/unnamed-chunk-45-1.png b/vignettes/circos_files/figure-gfm/unnamed-chunk-45-1.png deleted file mode 100644 index 7bc3631..0000000 Binary files a/vignettes/circos_files/figure-gfm/unnamed-chunk-45-1.png and /dev/null differ diff --git a/vignettes/circos_files/figure-gfm/unnamed-chunk-46-1.png b/vignettes/circos_files/figure-gfm/unnamed-chunk-46-1.png deleted file mode 100644 index 9945e98..0000000 Binary files a/vignettes/circos_files/figure-gfm/unnamed-chunk-46-1.png and /dev/null differ diff --git a/vignettes/circos_files/figure-gfm/unnamed-chunk-52-1.png b/vignettes/circos_files/figure-gfm/unnamed-chunk-52-1.png deleted file mode 100644 index df08581..0000000 Binary files a/vignettes/circos_files/figure-gfm/unnamed-chunk-52-1.png and /dev/null differ diff --git a/vignettes/circos_files/figure-gfm/unnamed-chunk-53-1.png b/vignettes/circos_files/figure-gfm/unnamed-chunk-53-1.png deleted file mode 100644 index b32791c..0000000 Binary files a/vignettes/circos_files/figure-gfm/unnamed-chunk-53-1.png and /dev/null differ diff --git a/vignettes/circos_files/figure-gfm/unnamed-chunk-94-1.png b/vignettes/circos_files/figure-gfm/unnamed-chunk-94-1.png new file mode 100644 index 0000000..41fca5e Binary files /dev/null and b/vignettes/circos_files/figure-gfm/unnamed-chunk-94-1.png differ diff --git a/vignettes/circos_files/figure-gfm/unnamed-chunk-95-1.png b/vignettes/circos_files/figure-gfm/unnamed-chunk-95-1.png new file mode 100644 index 0000000..f949f34 Binary files /dev/null and b/vignettes/circos_files/figure-gfm/unnamed-chunk-95-1.png differ diff --git a/vignettes/data_sources.xlsx b/vignettes/data_sources.xlsx old mode 100644 new mode 100755 index 346c39a..e5630e9 Binary files a/vignettes/data_sources.xlsx and b/vignettes/data_sources.xlsx differ diff --git a/vignettes/differential_nichenet.Rmd b/vignettes/differential_nichenet.Rmd index 4bd7519..7c42f17 100644 --- a/vignettes/differential_nichenet.Rmd +++ b/vignettes/differential_nichenet.Rmd @@ -52,38 +52,20 @@ As you can see, the LSECs, hepatocytes and Stellate cells are each divided in tw ## Read in the NicheNet ligand-receptor network and ligand-target matrix -The used ligand-receptor network and ligand-target matrix can be downloaded from Zenodo [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3260758.svg)](https://doi.org/10.5281/zenodo.3260758). -The Seurat object containing expression data of interacting cells in HNSCC can also be downloaded from Zenodo [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.4675430.svg)](https://doi.org/10.5281/zenodo.4675430). +The used ligand-receptor network and ligand-target matrix can be downloaded from Zenodo [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.7074291.svg)](https://doi.org/10.5281/zenodo.7074291). ```{r} -ligand_target_matrix = readRDS(url("https://zenodo.org/record/3260758/files/ligand_target_matrix.rds")) +ligand_target_matrix = readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final_mouse.rds")) ligand_target_matrix[1:5,1:5] # target genes in rows, ligands in columns ``` ```{r} -lr_network = readRDS(url("https://zenodo.org/record/3260758/files/lr_network.rds")) -lr_network = lr_network %>% mutate(bonafide = ! database %in% c("ppi_prediction","ppi_prediction_go")) -lr_network = lr_network %>% dplyr::rename(ligand = from, receptor = to) %>% distinct(ligand, receptor, bonafide) +lr_network = readRDS(url("https://zenodo.org/record/7074291/files/lr_network_mouse_21122021.rds")) +lr_network = lr_network %>% dplyr::rename(ligand = from, receptor = to) %>% distinct(ligand, receptor) head(lr_network) ``` -Note: because the data is of mouse origin: we need to convert human gene symbols to their murine one-to-one orthologs - -```{r} -organism = "mouse" -``` - -```{r} -if(organism == "mouse"){ - lr_network = lr_network %>% mutate(ligand = convert_human_to_mouse_symbols(ligand), receptor = convert_human_to_mouse_symbols(receptor)) %>% drop_na() - - colnames(ligand_target_matrix) = ligand_target_matrix %>% colnames() %>% convert_human_to_mouse_symbols() - rownames(ligand_target_matrix) = ligand_target_matrix %>% rownames() %>% convert_human_to_mouse_symbols() - ligand_target_matrix = ligand_target_matrix %>% .[!is.na(rownames(ligand_target_matrix)), !is.na(colnames(ligand_target_matrix))] -} -``` - # 1. Define the niches/microenvironments of interest Each niche should have at least one “sender/niche” cell population and one “receiver/target” cell population (present in your expression data) @@ -125,8 +107,11 @@ DE will be calculated for each pairwise sender (or receiver) cell type comparisi ```{r} assay_oi = "SCT" # other possibilities: RNA,... -seurat_obj = PrepSCTFindMarkers(seurat_obj, assay = "SCT", verbose = FALSE) +# If you use convert_to_alias before here, this one won't work +seurat_obj = Seurat::PrepSCTFindMarkers(seurat_obj, assay = "SCT", verbose = FALSE) + +seurat_obj = alias_to_symbol_seurat(seurat_obj, organism = "mouse") DE_sender = calculate_niche_de(seurat_obj = seurat_obj %>% subset(features = lr_network$ligand %>% intersect(rownames(seurat_obj))), niches = niches, type = "sender", assay_oi = assay_oi) # only ligands important for sender cell types DE_receiver = calculate_niche_de(seurat_obj = seurat_obj %>% subset(features = lr_network$receptor %>% unique()), niches = niches, type = "receiver", assay_oi = assay_oi) # only receptors now, later on: DE analysis to find targets @@ -315,7 +300,7 @@ exprs_sender_receiver = lr_network %>% inner_join(exprs_tbl_ligand, by = c("ligand")) %>% inner_join(exprs_tbl_receptor, by = c("receptor")) %>% inner_join(DE_sender_receiver %>% distinct(niche, sender, receiver)) -ligand_scaled_receptor_expression_fraction_df = exprs_sender_receiver %>% group_by(ligand, receiver) %>% mutate(rank_receptor_expression = dense_rank(receptor_expression), rank_receptor_fraction = dense_rank(receptor_fraction)) %>% mutate(ligand_scaled_receptor_expression_fraction = 0.5*( (rank_receptor_fraction / max(rank_receptor_fraction)) + ((rank_receptor_expression / max(rank_receptor_expression))) ) ) %>% distinct(ligand, receptor, receiver, ligand_scaled_receptor_expression_fraction, bonafide) %>% distinct() %>% ungroup() +ligand_scaled_receptor_expression_fraction_df = exprs_sender_receiver %>% group_by(ligand, receiver) %>% mutate(rank_receptor_expression = dense_rank(receptor_expression), rank_receptor_fraction = dense_rank(receptor_fraction)) %>% mutate(ligand_scaled_receptor_expression_fraction = 0.5*( (rank_receptor_fraction / max(rank_receptor_fraction)) + ((rank_receptor_expression / max(rank_receptor_expression))) ) ) %>% distinct(ligand, receptor, receiver, ligand_scaled_receptor_expression_fraction) %>% distinct() %>% ungroup() ``` # 7. Prioritization of ligand-receptor and ligand-target links @@ -346,8 +331,6 @@ We provide the user the option to consider the following properties for prioriti * Normalized ligand activity: to further prioritize ligand-receptor pairs based on their predicted effect of the ligand-receptor interaction on the gene expression in the receiver cell type - normalization of activity is done because we found that some datasets/conditions/niches have higher baseline activity values than others - normalized ligand activity accords to 'relative' enrichment of target genes of a ligand within the affected receiver genes. `prioritizing_weights` argument: `"scaled_activity_normalized"`. Recommended weight: at least 1. -* Prior knowledge quality of the L-R interaction: the NicheNet LR network consists of two types of interactions: L-R pairs documented in curated databases, and L-R pairs predicted based on gene annotation and PPIs. The former are categorized as 'bona fide' interactions. To rank bona fide interactions higher, but not exlude potentially interesting non-bona-fide ones, we give bona fide interactions a score of 1, and non-bona-fide interactions a score fof 0.5. `prioritizing_weights` argument: `"bona_fide"` Recommend weight: at least 1. - ```{r} prioritizing_weights = c("scaled_ligand_score" = 5, @@ -360,8 +343,7 @@ prioritizing_weights = c("scaled_ligand_score" = 5, "ligand_scaled_receptor_expression_fraction" = 1, "scaled_receptor_score_spatial" = 0, "scaled_activity" = 0, - "scaled_activity_normalized" = 1, - "bona_fide" = 1) + "scaled_activity_normalized" = 1) ``` ```{r} @@ -504,7 +486,7 @@ For the opposite pairs with low-DE and high-activity that are not strongly prior When Ligand-Receptor pairs have both high DE and high activity, we can consider them to be very good candidates in regulating the process of interest, and we recommend testing these candidates for further experimental validation. -# References +### References Browaeys, R., Saelens, W. & Saeys, Y. NicheNet: modeling intercellular communication by linking ligands to target genes. Nat Methods (2019) doi:10.1038/s41592-019-0667-5 diff --git a/vignettes/differential_nichenet.md b/vignettes/differential_nichenet.md index 24f5fb1..a3380df 100644 --- a/vignettes/differential_nichenet.md +++ b/vignettes/differential_nichenet.md @@ -64,54 +64,33 @@ pericentral). The used ligand-receptor network and ligand-target matrix can be downloaded from Zenodo -[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3260758.svg)](https://doi.org/10.5281/zenodo.3260758). -The Seurat object containing expression data of interacting cells in -HNSCC can also be downloaded from Zenodo -[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.4675430.svg)](https://doi.org/10.5281/zenodo.4675430). +[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.7074291.svg)](https://doi.org/10.5281/zenodo.7074291). ``` r -ligand_target_matrix = readRDS(url("https://zenodo.org/record/3260758/files/ligand_target_matrix.rds")) +ligand_target_matrix = readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final_mouse.rds")) ligand_target_matrix[1:5,1:5] # target genes in rows, ligands in columns -## CXCL1 CXCL2 CXCL3 CXCL5 PPBP -## A1BG 3.534343e-04 4.041324e-04 3.729920e-04 3.080640e-04 2.628388e-04 -## A1BG-AS1 1.650894e-04 1.509213e-04 1.583594e-04 1.317253e-04 1.231819e-04 -## A1CF 5.787175e-04 4.596295e-04 3.895907e-04 3.293275e-04 3.211944e-04 -## A2M 6.027058e-04 5.996617e-04 5.164365e-04 4.517236e-04 4.590521e-04 -## A2M-AS1 8.898724e-05 8.243341e-05 7.484018e-05 4.912514e-05 5.120439e-05 +## 2300002M23Rik 2610528A11Rik 9530003J23Rik a A2m +## 0610005C13Rik 0.000000e+00 0.000000e+00 1.311297e-05 0.000000e+00 1.390053e-05 +## 0610009B22Rik 0.000000e+00 0.000000e+00 1.269301e-05 0.000000e+00 1.345536e-05 +## 0610009L18Rik 8.872902e-05 4.977197e-05 2.581909e-04 7.570125e-05 9.802264e-05 +## 0610010F05Rik 2.194046e-03 1.111556e-03 3.142374e-03 1.631658e-03 2.585820e-03 +## 0610010K14Rik 2.271606e-03 9.360769e-04 3.546140e-03 1.697713e-03 2.632082e-03 ``` ``` r -lr_network = readRDS(url("https://zenodo.org/record/3260758/files/lr_network.rds")) -lr_network = lr_network %>% mutate(bonafide = ! database %in% c("ppi_prediction","ppi_prediction_go")) -lr_network = lr_network %>% dplyr::rename(ligand = from, receptor = to) %>% distinct(ligand, receptor, bonafide) +lr_network = readRDS(url("https://zenodo.org/record/7074291/files/lr_network_mouse_21122021.rds")) +lr_network = lr_network %>% dplyr::rename(ligand = from, receptor = to) %>% distinct(ligand, receptor) head(lr_network) -## # A tibble: 6 x 3 -## ligand receptor bonafide -## -## 1 CXCL1 CXCR2 TRUE -## 2 CXCL2 CXCR2 TRUE -## 3 CXCL3 CXCR2 TRUE -## 4 CXCL5 CXCR2 TRUE -## 5 PPBP CXCR2 TRUE -## 6 CXCL6 CXCR2 TRUE -``` - -Note: because the data is of mouse origin: we need to convert human gene -symbols to their murine one-to-one orthologs - -``` r -organism = "mouse" -``` - -``` r -if(organism == "mouse"){ - lr_network = lr_network %>% mutate(ligand = convert_human_to_mouse_symbols(ligand), receptor = convert_human_to_mouse_symbols(receptor)) %>% drop_na() - - colnames(ligand_target_matrix) = ligand_target_matrix %>% colnames() %>% convert_human_to_mouse_symbols() - rownames(ligand_target_matrix) = ligand_target_matrix %>% rownames() %>% convert_human_to_mouse_symbols() - ligand_target_matrix = ligand_target_matrix %>% .[!is.na(rownames(ligand_target_matrix)), !is.na(colnames(ligand_target_matrix))] -} +## # A tibble: 6 × 2 +## ligand receptor +## +## 1 2300002M23Rik Ddr1 +## 2 2610528A11Rik Gpr15 +## 3 9530003J23Rik Itgal +## 4 a Atrn +## 5 a F11r +## 6 a Mc1r ``` # 1. Define the niches/microenvironments of interest @@ -175,8 +154,11 @@ analysis will be driven by the most abundant cell types. ``` r assay_oi = "SCT" # other possibilities: RNA,... -seurat_obj = PrepSCTFindMarkers(seurat_obj, assay = "SCT", verbose = FALSE) +# If you use convert_to_alias before here, this one won't work +seurat_obj = Seurat::PrepSCTFindMarkers(seurat_obj, assay = "SCT", verbose = FALSE) + +seurat_obj = alias_to_symbol_seurat(seurat_obj, organism = "mouse") DE_sender = calculate_niche_de(seurat_obj = seurat_obj %>% subset(features = lr_network$ligand %>% intersect(rownames(seurat_obj))), niches = niches, type = "sender", assay_oi = assay_oi) # only ligands important for sender cell types ## [1] "Calculate Sender DE between: LSECs_portal and Cholangiocytes" ## [2] "Calculate Sender DE between: LSECs_portal and Fibroblast 2" @@ -211,7 +193,7 @@ DE_sender = calculate_niche_de(seurat_obj = seurat_obj %>% subset(features = lr_ ## [4] "Calculate Sender DE between: Mesothelial cells and Cholangiocytes" ## [5] "Calculate Sender DE between: Mesothelial cells and Fibroblast 2" DE_receiver = calculate_niche_de(seurat_obj = seurat_obj %>% subset(features = lr_network$receptor %>% unique()), niches = niches, type = "receiver", assay_oi = assay_oi) # only receptors now, later on: DE analysis to find targets -## # A tibble: 3 x 2 +## # A tibble: 3 × 2 ## receiver receiver_other_niche ## ## 1 KCs MoMac2 @@ -379,16 +361,11 @@ geneset_MoMac1 = DE_receiver_processed_targets %>% filter(receiver == niches$MoM # Good idea to check which genes will be left out of the ligand activity analysis (=when not present in the rownames of the ligand-target matrix). # If many genes are left out, this might point to some issue in the gene naming (eg gene aliases and old gene symbols, bad human-mouse mapping) geneset_KC %>% setdiff(rownames(ligand_target_matrix)) -## [1] "Fcna" "Wfdc17" "AW112010" "mt-Co1" "mt-Nd2" "C4b" "Adgre4" "mt-Co3" -## [9] "Pira2" "mt-Co2" "mt-Nd4" "mt-Atp6" "mt-Nd1" "mt-Nd3" "Ear2" "2900097C17Rik" -## [17] "Iigp1" "Trim30a" "B430306N03Rik" "mt-Cytb" "Pilrb2" "Anapc15" "Arf2" "Gbp8" -## [25] "AC149090.1" "Cd209f" "Xlr" "Ifitm6" +## [1] "Wfdc17" "AW112010" "2900097C17Rik" "B430306N03Rik" "AC149090.1" geneset_MoMac2 %>% setdiff(rownames(ligand_target_matrix)) -## [1] "Chil3" "Lyz1" "Ccl9" "Tmsb10" "Ly6c2" "Gm21188" "Gm10076" "Ms4a6c" -## [9] "Calm3" "Atp5e" "Ftl1-ps1" "S100a11" "Clec4a3" "Snrpe" "Cox6c" "Ly6i" -## [17] "1810058I24Rik" "Rpl34" "Aph1c" "Atp5o.1" +## [1] "Gm21188" "Gm10076" "Rpl41" "Atp5o.1" "H2afy" geneset_MoMac1 %>% setdiff(rownames(ligand_target_matrix)) -## [1] "H2-Ab1" "Malat1" "H2-Aa" "Hspa1b" "Gm26522" "Ly6a" "H2-D1" "Klra2" "Bcl2a1d" "Kcnq1ot1" +## [1] "Gm26522" length(geneset_KC) ## [1] 443 @@ -474,7 +451,7 @@ exprs_sender_receiver = lr_network %>% inner_join(exprs_tbl_ligand, by = c("ligand")) %>% inner_join(exprs_tbl_receptor, by = c("receptor")) %>% inner_join(DE_sender_receiver %>% distinct(niche, sender, receiver)) -ligand_scaled_receptor_expression_fraction_df = exprs_sender_receiver %>% group_by(ligand, receiver) %>% mutate(rank_receptor_expression = dense_rank(receptor_expression), rank_receptor_fraction = dense_rank(receptor_fraction)) %>% mutate(ligand_scaled_receptor_expression_fraction = 0.5*( (rank_receptor_fraction / max(rank_receptor_fraction)) + ((rank_receptor_expression / max(rank_receptor_expression))) ) ) %>% distinct(ligand, receptor, receiver, ligand_scaled_receptor_expression_fraction, bonafide) %>% distinct() %>% ungroup() +ligand_scaled_receptor_expression_fraction_df = exprs_sender_receiver %>% group_by(ligand, receiver) %>% mutate(rank_receptor_expression = dense_rank(receptor_expression), rank_receptor_fraction = dense_rank(receptor_fraction)) %>% mutate(ligand_scaled_receptor_expression_fraction = 0.5*( (rank_receptor_fraction / max(rank_receptor_fraction)) + ((rank_receptor_expression / max(rank_receptor_expression))) ) ) %>% distinct(ligand, receptor, receiver, ligand_scaled_receptor_expression_fraction) %>% distinct() %>% ungroup() ``` # 7. Prioritization of ligand-receptor and ligand-target links @@ -488,125 +465,111 @@ We provide the user the option to consider the following properties for prioritization (of which the weights are defined in `prioritizing_weights`) : -- Ligand DE score: niche-specific expression of the ligand: by - default, this the minimum logFC between the sender of interest and - all the senders of the other niche(s). The higher the min logFC, the - higher the niche-specificity of the ligand. Therefore we recommend - to give this factor a very high weight. `prioritizing_weights` - argument: `"scaled_ligand_score"`. Recommended weight: 5 (at least - 1, max 5). - -- Scaled ligand expression: scaled expression of a ligand in one - sender compared to the other cell types in the dataset. This might - be useful to rescue potentially interesting ligands that have a high - scaled expression value, but a relatively small min logFC compared - to the other niche. One reason why this logFC might be small occurs - when (some) genes are not picked up efficiently by the used - sequencing technology (or other reasons for low RNA expression of - ligands). For example, we have observed that many ligands from the - Tgf-beta/BMP family are not picked up efficiently with single-nuclei - RNA sequencing compared to single-cell sequencing. - `prioritizing_weights` argument: - `"scaled_ligand_expression_scaled"`. Recommended weight: 1 (unless - technical reason for lower gene detection such as while using - Nuc-seq: then recommended to use a higher weight: 2). - -- Ligand expression fraction: Ligands that are expressed in a smaller - fraction of cells of a cell type than defined by - `exprs_cutoff`(default: 0.10) will get a lower ranking, proportional - to their fraction (eg ligand expressed in 9% of cells will be ranked - higher than ligand expressed in 0.5% of cells). We opted for this - weighting based on fraction, instead of removing ligands that are - not expressed in more cells than this cutoff, because some - interesting ligands could be removed that way. Fraction of - expression is not taken into account for the prioritization if it is - already higher than the cutoff. `prioritizing_weights` argument: - `"ligand_fraction"`. Recommended weight: 1. - -- Ligand spatial DE score: spatial expression specificity of the - ligand. If the niche of interest is at a specific tissue location, - but some of the sender cell types of that niche are also present in - other locations, it can be very informative to further prioritize - ligands of that sender by looking how they are DE between the - spatial location of interest compared to the other locations. - `prioritizing_weights` argument: `"scaled_ligand_score_spatial"`. - Recommended weight: 2 (or 0 if not applicable). - -- Receptor DE score: niche-specific expression of the receptor: by - default, this the minimum logFC between the receiver of interest and - all the receiver of the other niche(s). The higher the min logFC, - the higher the niche-specificity of the receptor. Based on our - experience, we don’t suggest to give this as high importance as the - ligand DE, but this might depend on the specific case study. - `prioritizing_weights` argument: `"scaled_receptor_score"`. - Recommended weight: 0.5 (at least 0.5, and lower than - `"scaled_ligand_score"`). - -- Scaled receptor expression: scaled expression of a receptor in one - receiver compared to the other cell types in the dataset. This might - be useful to rescue potentially interesting receptors that have a - high scaled expression value, but a relatively small min logFC - compared to the other niche. One reason why this logFC might be - small occurs when (some) genes are not picked up efficiently by the - used sequencing technology. `prioritizing_weights` argument: - `"scaled_receptor_expression_scaled"`. Recommended weight: 0.5. - -- Receptor expression fraction: Receptors that are expressed in a - smaller fraction of cells of a cell type than defined by - `exprs_cutoff`(default: 0.10) will get a lower ranking, proportional - to their fraction (eg receptor expressed in 9% of cells will be - ranked higher than receptor expressed in 0.5% of cells). We opted - for this weighting based on fraction, instead of removing receptors - that are not expressed in more cells than this cutoff, because some - interesting receptors could be removed that way. Fraction of - expression is not taken into account for the prioritization if it is - already higher than the cutoff. `prioritizing_weights` argument: - `"receptor_fraction"`. Recommended weight: 1. - -- Receptor expression strength: this factor let us give higher weights - to the most highly expressed receptor of a ligand in the receiver. - This let us rank higher one member of a receptor family if it higher - expressed than the other members. `prioritizing_weights` argument: - `"ligand_scaled_receptor_expression_fraction"`. Recommended value: 1 - (minimum: 0.5). - -- Receptor spatial DE score: spatial expression specificity of the - receptor. If the niche of interest is at a specific tissue location, - but the receiver cell type of that niche is also present in other - locations, it can be very informative to further prioritize - receptors of that receiver by looking how they are DE between the - spatial location of interest compared to the other locations. - `prioritizing_weights` argument: `"scaled_receptor_score_spatial"`. - Recommended weight: 1 (or 0 if not applicable). - -- Absolute ligand activity: to further prioritize ligand-receptor - pairs based on their predicted effect of the ligand-receptor - interaction on the gene expression in the receiver cell type - - absolute ligand activity accords to ‘absolute’ enrichment of target - genes of a ligand within the affected receiver genes. - `prioritizing_weights` argument: `"scaled_activity"`. Recommended - weight: 0, unless absolute enrichment of target genes is of specific - interest. - -- Normalized ligand activity: to further prioritize ligand-receptor - pairs based on their predicted effect of the ligand-receptor - interaction on the gene expression in the receiver cell type - - normalization of activity is done because we found that some - datasets/conditions/niches have higher baseline activity values than - others - normalized ligand activity accords to ‘relative’ enrichment - of target genes of a ligand within the affected receiver genes. - `prioritizing_weights` argument: `"scaled_activity_normalized"`. - Recommended weight: at least 1. - -- Prior knowledge quality of the L-R interaction: the NicheNet LR - network consists of two types of interactions: L-R pairs documented - in curated databases, and L-R pairs predicted based on gene - annotation and PPIs. The former are categorized as ‘bona fide’ - interactions. To rank bona fide interactions higher, but not exlude - potentially interesting non-bona-fide ones, we give bona fide - interactions a score of 1, and non-bona-fide interactions a score - fof 0.5. `prioritizing_weights` argument: `"bona_fide"` Recommend - weight: at least 1. +- Ligand DE score: niche-specific expression of the ligand: by default, + this the minimum logFC between the sender of interest and all the + senders of the other niche(s). The higher the min logFC, the higher + the niche-specificity of the ligand. Therefore we recommend to give + this factor a very high weight. `prioritizing_weights` argument: + `"scaled_ligand_score"`. Recommended weight: 5 (at least 1, max 5). + +- Scaled ligand expression: scaled expression of a ligand in one sender + compared to the other cell types in the dataset. This might be useful + to rescue potentially interesting ligands that have a high scaled + expression value, but a relatively small min logFC compared to the + other niche. One reason why this logFC might be small occurs when + (some) genes are not picked up efficiently by the used sequencing + technology (or other reasons for low RNA expression of ligands). For + example, we have observed that many ligands from the Tgf-beta/BMP + family are not picked up efficiently with single-nuclei RNA sequencing + compared to single-cell sequencing. `prioritizing_weights` argument: + `"scaled_ligand_expression_scaled"`. Recommended weight: 1 (unless + technical reason for lower gene detection such as while using Nuc-seq: + then recommended to use a higher weight: 2). + +- Ligand expression fraction: Ligands that are expressed in a smaller + fraction of cells of a cell type than defined by + `exprs_cutoff`(default: 0.10) will get a lower ranking, proportional + to their fraction (eg ligand expressed in 9% of cells will be ranked + higher than ligand expressed in 0.5% of cells). We opted for this + weighting based on fraction, instead of removing ligands that are not + expressed in more cells than this cutoff, because some interesting + ligands could be removed that way. Fraction of expression is not taken + into account for the prioritization if it is already higher than the + cutoff. `prioritizing_weights` argument: `"ligand_fraction"`. + Recommended weight: 1. + +- Ligand spatial DE score: spatial expression specificity of the ligand. + If the niche of interest is at a specific tissue location, but some of + the sender cell types of that niche are also present in other + locations, it can be very informative to further prioritize ligands of + that sender by looking how they are DE between the spatial location of + interest compared to the other locations. `prioritizing_weights` + argument: `"scaled_ligand_score_spatial"`. Recommended weight: 2 (or 0 + if not applicable). + +- Receptor DE score: niche-specific expression of the receptor: by + default, this the minimum logFC between the receiver of interest and + all the receiver of the other niche(s). The higher the min logFC, the + higher the niche-specificity of the receptor. Based on our experience, + we don’t suggest to give this as high importance as the ligand DE, but + this might depend on the specific case study. `prioritizing_weights` + argument: `"scaled_receptor_score"`. Recommended weight: 0.5 (at least + 0.5, and lower than `"scaled_ligand_score"`). + +- Scaled receptor expression: scaled expression of a receptor in one + receiver compared to the other cell types in the dataset. This might + be useful to rescue potentially interesting receptors that have a high + scaled expression value, but a relatively small min logFC compared to + the other niche. One reason why this logFC might be small occurs when + (some) genes are not picked up efficiently by the used sequencing + technology. `prioritizing_weights` argument: + `"scaled_receptor_expression_scaled"`. Recommended weight: 0.5. + +- Receptor expression fraction: Receptors that are expressed in a + smaller fraction of cells of a cell type than defined by + `exprs_cutoff`(default: 0.10) will get a lower ranking, proportional + to their fraction (eg receptor expressed in 9% of cells will be ranked + higher than receptor expressed in 0.5% of cells). We opted for this + weighting based on fraction, instead of removing receptors that are + not expressed in more cells than this cutoff, because some interesting + receptors could be removed that way. Fraction of expression is not + taken into account for the prioritization if it is already higher than + the cutoff. `prioritizing_weights` argument: `"receptor_fraction"`. + Recommended weight: 1. + +- Receptor expression strength: this factor let us give higher weights + to the most highly expressed receptor of a ligand in the receiver. + This let us rank higher one member of a receptor family if it higher + expressed than the other members. `prioritizing_weights` argument: + `"ligand_scaled_receptor_expression_fraction"`. Recommended value: 1 + (minimum: 0.5). + +- Receptor spatial DE score: spatial expression specificity of the + receptor. If the niche of interest is at a specific tissue location, + but the receiver cell type of that niche is also present in other + locations, it can be very informative to further prioritize receptors + of that receiver by looking how they are DE between the spatial + location of interest compared to the other locations. + `prioritizing_weights` argument: `"scaled_receptor_score_spatial"`. + Recommended weight: 1 (or 0 if not applicable). + +- Absolute ligand activity: to further prioritize ligand-receptor pairs + based on their predicted effect of the ligand-receptor interaction on + the gene expression in the receiver cell type - absolute ligand + activity accords to ‘absolute’ enrichment of target genes of a ligand + within the affected receiver genes. `prioritizing_weights` argument: + `"scaled_activity"`. Recommended weight: 0, unless absolute enrichment + of target genes is of specific interest. + +- Normalized ligand activity: to further prioritize ligand-receptor + pairs based on their predicted effect of the ligand-receptor + interaction on the gene expression in the receiver cell type - + normalization of activity is done because we found that some + datasets/conditions/niches have higher baseline activity values than + others - normalized ligand activity accords to ‘relative’ enrichment + of target genes of a ligand within the affected receiver genes. + `prioritizing_weights` argument: `"scaled_activity_normalized"`. + Recommended weight: at least 1. ``` r prioritizing_weights = c("scaled_ligand_score" = 5, @@ -619,8 +582,7 @@ prioritizing_weights = c("scaled_ligand_score" = 5, "ligand_scaled_receptor_expression_fraction" = 1, "scaled_receptor_score_spatial" = 0, "scaled_activity" = 0, - "scaled_activity_normalized" = 1, - "bona_fide" = 1) + "scaled_activity_normalized" = 1) ``` ``` r @@ -629,118 +591,121 @@ output = list(DE_sender_receiver = DE_sender_receiver, ligand_scaled_receptor_ex prioritization_tables = get_prioritization_tables(output, prioritizing_weights) prioritization_tables$prioritization_tbl_ligand_receptor %>% filter(receiver == niches[[1]]$receiver) %>% head(10) -## # A tibble: 10 x 37 -## niche receiver sender ligand_receptor ligand receptor bonafide ligand_score ligand_signific~ ligand_present ligand_expressi~ -## -## 1 KC_niche KCs Hepatocytes~ Apoa1--Lrp1 Apoa1 Lrp1 FALSE 3.18 1 1 14.7 -## 2 KC_niche KCs Hepatocytes~ Apoa1--Msr1 Apoa1 Msr1 FALSE 3.18 1 1 14.7 -## 3 KC_niche KCs Hepatocytes~ Apoa1--Abca1 Apoa1 Abca1 FALSE 3.18 1 1 14.7 -## 4 KC_niche KCs Hepatocytes~ Apoa1--Scarb1 Apoa1 Scarb1 FALSE 3.18 1 1 14.7 -## 5 KC_niche KCs Hepatocytes~ Apoa1--Derl1 Apoa1 Derl1 FALSE 3.18 1 1 14.7 -## 6 KC_niche KCs Hepatocytes~ Serpina1a--Lrp1 Serpina~ Lrp1 TRUE 2.64 1 1 6.97 -## 7 KC_niche KCs Hepatocytes~ Apoa1--Atp5b Apoa1 Atp5b FALSE 3.18 1 1 14.7 -## 8 KC_niche KCs Hepatocytes~ Trf--Tfrc Trf Tfrc TRUE 1.61 1 1 6.19 -## 9 KC_niche KCs Hepatocytes~ Apoa1--Cd36 Apoa1 Cd36 FALSE 3.18 1 1 14.7 -## 10 KC_niche KCs LSECs_portal Cxcl10--Fpr1 Cxcl10 Fpr1 FALSE 1.66 1 1 2.35 -## # ... with 26 more variables: ligand_expression_scaled , ligand_fraction , ligand_score_spatial , receptor_score , -## # receptor_significant , receptor_present , receptor_expression , receptor_expression_scaled , -## # receptor_fraction , receptor_score_spatial , ligand_scaled_receptor_expression_fraction , +## # A tibble: 10 × 36 +## niche receiver sender ligand_receptor ligand receptor ligand_score ligand_signific… ligand_present +## +## 1 KC_niche KCs Hepatocytes_portal Apoc3--Lrp1 Apoc3 Lrp1 3.33 1 1 +## 2 KC_niche KCs Hepatocytes_portal Apoa2--Lrp1 Apoa2 Lrp1 4.07 1 1 +## 3 KC_niche KCs Hepatocytes_portal Apoa1--Lrp1 Apoa1 Lrp1 3.18 1 1 +## 4 KC_niche KCs Hepatocytes_portal Serpina1e--Lrp1 Serpi… Lrp1 3.63 1 1 +## 5 KC_niche KCs Hepatocytes_portal Apoc3--Tlr2 Apoc3 Tlr2 3.33 1 1 +## 6 KC_niche KCs Hepatocytes_portal Apoa1--Abca1 Apoa1 Abca1 3.18 1 1 +## 7 KC_niche KCs Hepatocytes_portal Hpx--Lrp1 Hpx Lrp1 1.87 1 1 +## 8 KC_niche KCs Hepatocytes_portal Serpina1b--Lrp1 Serpi… Lrp1 2.70 1 1 +## 9 KC_niche KCs Hepatocytes_portal Fgb--Cdh5 Fgb Cdh5 1.98 1 1 +## 10 KC_niche KCs Stellate cells_po… Ntm--Cd79b Ntm Cd79b 2.65 1 1 +## # … with 27 more variables: ligand_expression , ligand_expression_scaled , ligand_fraction , +## # ligand_score_spatial , receptor_score , receptor_significant , receptor_present , +## # receptor_expression , receptor_expression_scaled , receptor_fraction , +## # receptor_score_spatial , ligand_scaled_receptor_expression_fraction , ## # avg_score_ligand_receptor , activity , activity_normalized , scaled_ligand_score , ## # scaled_ligand_expression_scaled , scaled_receptor_score , scaled_receptor_expression_scaled , -## # scaled_avg_score_ligand_receptor , scaled_ligand_score_spatial , scaled_receptor_score_spatial , -## # scaled_ligand_fraction_adapted , scaled_receptor_fraction_adapted , scaled_activity , ... +## # scaled_avg_score_ligand_receptor , scaled_ligand_score_spatial , … prioritization_tables$prioritization_tbl_ligand_target %>% filter(receiver == niches[[1]]$receiver) %>% head(10) -## # A tibble: 10 x 20 -## niche receiver sender ligand_receptor ligand receptor bonafide target target_score target_signific~ target_present target_expressi~ -## -## 1 KC_niche KCs Hepato~ Apoa1--Lrp1 Apoa1 Lrp1 FALSE Abca1 0.197 1 1 0.979 -## 2 KC_niche KCs Hepato~ Apoa1--Lrp1 Apoa1 Lrp1 FALSE Actb 0.279 1 1 21.6 -## 3 KC_niche KCs Hepato~ Apoa1--Lrp1 Apoa1 Lrp1 FALSE Ehd1 0.272 1 1 0.402 -## 4 KC_niche KCs Hepato~ Apoa1--Lrp1 Apoa1 Lrp1 FALSE Hmox1 1.16 1 1 5.23 -## 5 KC_niche KCs Hepato~ Apoa1--Lrp1 Apoa1 Lrp1 FALSE Sgk1 0.265 1 1 0.629 -## 6 KC_niche KCs Hepato~ Apoa1--Lrp1 Apoa1 Lrp1 FALSE Tcf7l2 0.811 1 1 1.32 -## 7 KC_niche KCs Hepato~ Apoa1--Lrp1 Apoa1 Lrp1 FALSE Tsc22~ 0.263 1 1 0.635 -## 8 KC_niche KCs Hepato~ Apoa1--Msr1 Apoa1 Msr1 FALSE Abca1 0.197 1 1 0.979 -## 9 KC_niche KCs Hepato~ Apoa1--Msr1 Apoa1 Msr1 FALSE Actb 0.279 1 1 21.6 -## 10 KC_niche KCs Hepato~ Apoa1--Msr1 Apoa1 Msr1 FALSE Ehd1 0.272 1 1 0.402 -## # ... with 8 more variables: target_expression_scaled , target_fraction , ligand_target_weight , activity , -## # activity_normalized , scaled_activity , scaled_activity_normalized , prioritization_score +## # A tibble: 10 × 19 +## niche receiver sender ligand_receptor ligand receptor target target_score target_signific… target_present +## +## 1 KC_niche KCs Hepatocyte… Apoc3--Lrp1 Apoc3 Lrp1 Abca1 0.197 1 1 +## 2 KC_niche KCs Hepatocyte… Apoc3--Lrp1 Apoc3 Lrp1 Hmox1 1.16 1 1 +## 3 KC_niche KCs Hepatocyte… Apoc3--Lrp1 Apoc3 Lrp1 Il1a 0.152 1 1 +## 4 KC_niche KCs Hepatocyte… Apoc3--Lrp1 Apoc3 Lrp1 Pten 0.378 1 1 +## 5 KC_niche KCs Hepatocyte… Apoc3--Lrp1 Apoc3 Lrp1 Sgk1 0.265 1 1 +## 6 KC_niche KCs Hepatocyte… Apoc3--Lrp1 Apoc3 Lrp1 Stat1 0.273 1 1 +## 7 KC_niche KCs Hepatocyte… Apoc3--Lrp1 Apoc3 Lrp1 Tcf7l2 0.811 1 1 +## 8 KC_niche KCs Hepatocyte… Apoc3--Lrp1 Apoc3 Lrp1 Txnip 0.342 1 1 +## 9 KC_niche KCs Hepatocyte… Apoc3--Lrp1 Apoc3 Lrp1 Vcam1 0.820 1 1 +## 10 KC_niche KCs Hepatocyte… Apoa2--Lrp1 Apoa2 Lrp1 Abca1 0.197 1 1 +## # … with 9 more variables: target_expression , target_expression_scaled , target_fraction , +## # ligand_target_weight , activity , activity_normalized , scaled_activity , +## # scaled_activity_normalized , prioritization_score prioritization_tables$prioritization_tbl_ligand_receptor %>% filter(receiver == niches[[2]]$receiver) %>% head(10) -## # A tibble: 10 x 37 -## niche receiver sender ligand_receptor ligand receptor bonafide ligand_score ligand_significa~ ligand_present ligand_expressi~ -## -## 1 MoMac2_niche MoMac2 Cholangi~ Spp1--Itga4 Spp1 Itga4 TRUE 6.09 1 1 72.4 -## 2 MoMac2_niche MoMac2 Cholangi~ Spp1--Cd44 Spp1 Cd44 TRUE 6.09 1 1 72.4 -## 3 MoMac2_niche MoMac2 Cholangi~ Spp1--Itgb5 Spp1 Itgb5 TRUE 6.09 1 1 72.4 -## 4 MoMac2_niche MoMac2 Cholangi~ Spp1--Itgav Spp1 Itgav TRUE 6.09 1 1 72.4 -## 5 MoMac2_niche MoMac2 Cholangi~ Spp1--Itgb1 Spp1 Itgb1 TRUE 6.09 1 1 72.4 -## 6 MoMac2_niche MoMac2 Cholangi~ Spp1--Itga9 Spp1 Itga9 TRUE 6.09 1 1 72.4 -## 7 MoMac2_niche MoMac2 Cholangi~ Spp1--Ncstn Spp1 Ncstn FALSE 6.09 1 1 72.4 -## 8 MoMac2_niche MoMac2 Cholangi~ Spp1--Itga5 Spp1 Itga5 FALSE 6.09 1 1 72.4 -## 9 MoMac2_niche MoMac2 Fibrobla~ Lama2--Rpsa Lama2 Rpsa TRUE 1.51 1 1 3.19 -## 10 MoMac2_niche MoMac2 Cholangi~ Cyr61--Itgb2 Cyr61 Itgb2 TRUE 0.812 1 1 3.11 -## # ... with 26 more variables: ligand_expression_scaled , ligand_fraction , ligand_score_spatial , receptor_score , -## # receptor_significant , receptor_present , receptor_expression , receptor_expression_scaled , -## # receptor_fraction , receptor_score_spatial , ligand_scaled_receptor_expression_fraction , +## # A tibble: 10 × 36 +## niche receiver sender ligand_receptor ligand receptor ligand_score ligand_signific… ligand_present +## +## 1 MoMac2_niche MoMac2 Cholangiocytes Spp1--Cd44 Spp1 Cd44 6.09 1 1 +## 2 MoMac2_niche MoMac2 Cholangiocytes Spp1--Itga4 Spp1 Itga4 6.09 1 1 +## 3 MoMac2_niche MoMac2 Cholangiocytes Spp1--Itgb5 Spp1 Itgb5 6.09 1 1 +## 4 MoMac2_niche MoMac2 Cholangiocytes Spp1--Itgav Spp1 Itgav 6.09 1 1 +## 5 MoMac2_niche MoMac2 Cholangiocytes Spp1--Itgb1 Spp1 Itgb1 6.09 1 1 +## 6 MoMac2_niche MoMac2 Cholangiocytes Clu--Trem2 Clu Trem2 3.79 1 1 +## 7 MoMac2_niche MoMac2 Cholangiocytes Spp1--Itga9 Spp1 Itga9 6.09 1 1 +## 8 MoMac2_niche MoMac2 Cholangiocytes Spp1--Itga5 Spp1 Itga5 6.09 1 1 +## 9 MoMac2_niche MoMac2 Cholangiocytes Spp1--S1pr1 Spp1 S1pr1 6.09 1 1 +## 10 MoMac2_niche MoMac2 Fibroblast 2 Lama2--Rpsa Lama2 Rpsa 1.51 1 1 +## # … with 27 more variables: ligand_expression , ligand_expression_scaled , ligand_fraction , +## # ligand_score_spatial , receptor_score , receptor_significant , receptor_present , +## # receptor_expression , receptor_expression_scaled , receptor_fraction , +## # receptor_score_spatial , ligand_scaled_receptor_expression_fraction , ## # avg_score_ligand_receptor , activity , activity_normalized , scaled_ligand_score , ## # scaled_ligand_expression_scaled , scaled_receptor_score , scaled_receptor_expression_scaled , -## # scaled_avg_score_ligand_receptor , scaled_ligand_score_spatial , scaled_receptor_score_spatial , -## # scaled_ligand_fraction_adapted , scaled_receptor_fraction_adapted , scaled_activity , ... +## # scaled_avg_score_ligand_receptor , scaled_ligand_score_spatial , … prioritization_tables$prioritization_tbl_ligand_target %>% filter(receiver == niches[[2]]$receiver) %>% head(10) -## # A tibble: 10 x 20 -## niche receiver sender ligand_receptor ligand receptor bonafide target target_score target_signific~ target_present target_expressi~ -## -## 1 MoMac2~ MoMac2 Cholang~ Spp1--Itga4 Spp1 Itga4 TRUE Ahnak 1.05 1 1 1.36 -## 2 MoMac2~ MoMac2 Cholang~ Spp1--Itga4 Spp1 Itga4 TRUE Cdkn1a 0.609 1 1 0.801 -## 3 MoMac2~ MoMac2 Cholang~ Spp1--Itga4 Spp1 Itga4 TRUE Cxcr4 0.374 1 1 0.717 -## 4 MoMac2~ MoMac2 Cholang~ Spp1--Itga4 Spp1 Itga4 TRUE Dhrs3 0.371 1 1 0.743 -## 5 MoMac2~ MoMac2 Cholang~ Spp1--Itga4 Spp1 Itga4 TRUE Fn1 0.360 1 1 0.456 -## 6 MoMac2~ MoMac2 Cholang~ Spp1--Itga4 Spp1 Itga4 TRUE Gadd4~ 0.180 1 1 0.474 -## 7 MoMac2~ MoMac2 Cholang~ Spp1--Itga4 Spp1 Itga4 TRUE Gapdh 0.656 1 1 3.27 -## 8 MoMac2~ MoMac2 Cholang~ Spp1--Itga4 Spp1 Itga4 TRUE Gdf15 0.479 1 1 0.521 -## 9 MoMac2~ MoMac2 Cholang~ Spp1--Itga4 Spp1 Itga4 TRUE Gsn 0.221 1 1 0.647 -## 10 MoMac2~ MoMac2 Cholang~ Spp1--Itga4 Spp1 Itga4 TRUE Plec 0.154 1 1 0.164 -## # ... with 8 more variables: target_expression_scaled , target_fraction , ligand_target_weight , activity , -## # activity_normalized , scaled_activity , scaled_activity_normalized , prioritization_score +## # A tibble: 10 × 19 +## niche receiver sender ligand_receptor ligand receptor target target_score target_signific… target_present +## +## 1 MoMac2_niche MoMac2 Cholan… Spp1--Cd44 Spp1 Cd44 Alox5… 0.382 1 1 +## 2 MoMac2_niche MoMac2 Cholan… Spp1--Cd44 Spp1 Cd44 Bax 0.334 1 1 +## 3 MoMac2_niche MoMac2 Cholan… Spp1--Cd44 Spp1 Cd44 Bcl2l… 0.280 1 1 +## 4 MoMac2_niche MoMac2 Cholan… Spp1--Cd44 Spp1 Cd44 Cdkn1a 0.609 1 1 +## 5 MoMac2_niche MoMac2 Cholan… Spp1--Cd44 Spp1 Cd44 Cxcr4 0.374 1 1 +## 6 MoMac2_niche MoMac2 Cholan… Spp1--Cd44 Spp1 Cd44 Dhrs3 0.371 1 1 +## 7 MoMac2_niche MoMac2 Cholan… Spp1--Cd44 Spp1 Cd44 Emp1 0.398 1 1 +## 8 MoMac2_niche MoMac2 Cholan… Spp1--Cd44 Spp1 Cd44 Fn1 0.360 1 1 +## 9 MoMac2_niche MoMac2 Cholan… Spp1--Cd44 Spp1 Cd44 Gadd4… 0.180 1 1 +## 10 MoMac2_niche MoMac2 Cholan… Spp1--Cd44 Spp1 Cd44 Gdf15 0.479 1 1 +## # … with 9 more variables: target_expression , target_expression_scaled , target_fraction , +## # ligand_target_weight , activity , activity_normalized , scaled_activity , +## # scaled_activity_normalized , prioritization_score prioritization_tables$prioritization_tbl_ligand_receptor %>% filter(receiver == niches[[3]]$receiver) %>% head(10) -## # A tibble: 10 x 37 -## niche receiver sender ligand_receptor ligand receptor bonafide ligand_score ligand_signific~ ligand_present ligand_expressi~ -## -## 1 MoMac1_niche MoMac1 Mesotheli~ C3--C3ar1 C3 C3ar1 TRUE 3.52 1 1 22.6 -## 2 MoMac1_niche MoMac1 Capsule f~ C3--C3ar1 C3 C3ar1 TRUE 3.42 1 1 20.9 -## 3 MoMac1_niche MoMac1 Mesotheli~ C3--Itgb2 C3 Itgb2 TRUE 3.52 1 1 22.6 -## 4 MoMac1_niche MoMac1 Mesotheli~ C3--Itgax C3 Itgax TRUE 3.52 1 1 22.6 -## 5 MoMac1_niche MoMac1 Mesotheli~ C3--Lrp1 C3 Lrp1 TRUE 3.52 1 1 22.6 -## 6 MoMac1_niche MoMac1 Capsule f~ C3--Itgb2 C3 Itgb2 TRUE 3.42 1 1 20.9 -## 7 MoMac1_niche MoMac1 Capsule f~ C3--Itgax C3 Itgax TRUE 3.42 1 1 20.9 -## 8 MoMac1_niche MoMac1 Capsule f~ C3--Lrp1 C3 Lrp1 TRUE 3.42 1 1 20.9 -## 9 MoMac1_niche MoMac1 Capsule f~ Rarres2--Cmklr1 Rarre~ Cmklr1 TRUE 2.50 1 1 15.8 -## 10 MoMac1_niche MoMac1 Mesotheli~ C3--Ccr5 C3 Ccr5 FALSE 3.52 1 1 22.6 -## # ... with 26 more variables: ligand_expression_scaled , ligand_fraction , ligand_score_spatial , receptor_score , -## # receptor_significant , receptor_present , receptor_expression , receptor_expression_scaled , -## # receptor_fraction , receptor_score_spatial , ligand_scaled_receptor_expression_fraction , +## # A tibble: 10 × 36 +## niche receiver sender ligand_receptor ligand receptor ligand_score ligand_signific… ligand_present +## +## 1 MoMac1_niche MoMac1 Mesothelial c… C3--C3ar1 C3 C3ar1 3.52 1 1 +## 2 MoMac1_niche MoMac1 Capsule fibro… C3--C3ar1 C3 C3ar1 3.42 1 1 +## 3 MoMac1_niche MoMac1 Capsule fibro… Lgals1--Ptprc Lgals1 Ptprc 2.80 1 1 +## 4 MoMac1_niche MoMac1 Capsule fibro… Slpi--Cd4 Slpi Cd4 4.37 1 1 +## 5 MoMac1_niche MoMac1 Mesothelial c… C3--Itgb2 C3 Itgb2 3.52 1 1 +## 6 MoMac1_niche MoMac1 Mesothelial c… Slpi--Cd4 Slpi Cd4 4.26 1 1 +## 7 MoMac1_niche MoMac1 Mesothelial c… C3--Lrp1 C3 Lrp1 3.52 1 1 +## 8 MoMac1_niche MoMac1 Mesothelial c… C3--Itgax C3 Itgax 3.52 1 1 +## 9 MoMac1_niche MoMac1 Capsule fibro… C3--Itgb2 C3 Itgb2 3.42 1 1 +## 10 MoMac1_niche MoMac1 Capsule fibro… C3--Lrp1 C3 Lrp1 3.42 1 1 +## # … with 27 more variables: ligand_expression , ligand_expression_scaled , ligand_fraction , +## # ligand_score_spatial , receptor_score , receptor_significant , receptor_present , +## # receptor_expression , receptor_expression_scaled , receptor_fraction , +## # receptor_score_spatial , ligand_scaled_receptor_expression_fraction , ## # avg_score_ligand_receptor , activity , activity_normalized , scaled_ligand_score , ## # scaled_ligand_expression_scaled , scaled_receptor_score , scaled_receptor_expression_scaled , -## # scaled_avg_score_ligand_receptor , scaled_ligand_score_spatial , scaled_receptor_score_spatial , -## # scaled_ligand_fraction_adapted , scaled_receptor_fraction_adapted , scaled_activity , ... +## # scaled_avg_score_ligand_receptor , scaled_ligand_score_spatial , … prioritization_tables$prioritization_tbl_ligand_target %>% filter(receiver == niches[[3]]$receiver) %>% head(10) -## # A tibble: 10 x 20 -## niche receiver sender ligand_receptor ligand receptor bonafide target target_score target_signific~ target_present target_expressi~ -## -## 1 MoMac1~ MoMac1 Mesothe~ C3--C3ar1 C3 C3ar1 TRUE Btg2 0.615 1 1 1.51 -## 2 MoMac1~ MoMac1 Mesothe~ C3--C3ar1 C3 C3ar1 TRUE Ccnd2 0.505 1 1 0.490 -## 3 MoMac1~ MoMac1 Mesothe~ C3--C3ar1 C3 C3ar1 TRUE Cdk6 0.221 1 1 0.320 -## 4 MoMac1~ MoMac1 Mesothe~ C3--C3ar1 C3 C3ar1 TRUE Ier5 0.396 1 1 1.16 -## 5 MoMac1~ MoMac1 Mesothe~ C3--C3ar1 C3 C3ar1 TRUE Il1b 0.956 1 1 3.74 -## 6 MoMac1~ MoMac1 Mesothe~ C3--C3ar1 C3 C3ar1 TRUE Jun 0.765 1 1 1.93 -## 7 MoMac1~ MoMac1 Mesothe~ C3--C3ar1 C3 C3ar1 TRUE Pdgfb 0.243 1 1 0.510 -## 8 MoMac1~ MoMac1 Mesothe~ C3--C3ar1 C3 C3ar1 TRUE Ubc 0.306 1 1 2.16 -## 9 MoMac1~ MoMac1 Capsule~ C3--C3ar1 C3 C3ar1 TRUE Btg2 0.615 1 1 1.51 -## 10 MoMac1~ MoMac1 Capsule~ C3--C3ar1 C3 C3ar1 TRUE Ccnd2 0.505 1 1 0.490 -## # ... with 8 more variables: target_expression_scaled , target_fraction , ligand_target_weight , activity , -## # activity_normalized , scaled_activity , scaled_activity_normalized , prioritization_score +## # A tibble: 10 × 19 +## niche receiver sender ligand_receptor ligand receptor target target_score target_signific… target_present +## +## 1 MoMac1_niche MoMac1 Mesoth… C3--C3ar1 C3 C3ar1 Btg2 0.615 1 1 +## 2 MoMac1_niche MoMac1 Mesoth… C3--C3ar1 C3 C3ar1 Ccnd2 0.505 1 1 +## 3 MoMac1_niche MoMac1 Mesoth… C3--C3ar1 C3 C3ar1 Cdk6 0.221 1 1 +## 4 MoMac1_niche MoMac1 Mesoth… C3--C3ar1 C3 C3ar1 H2-D1 0.318 1 1 +## 5 MoMac1_niche MoMac1 Mesoth… C3--C3ar1 C3 C3ar1 Il1b 0.956 1 1 +## 6 MoMac1_niche MoMac1 Mesoth… C3--C3ar1 C3 C3ar1 Jun 0.765 1 1 +## 7 MoMac1_niche MoMac1 Capsul… C3--C3ar1 C3 C3ar1 Btg2 0.615 1 1 +## 8 MoMac1_niche MoMac1 Capsul… C3--C3ar1 C3 C3ar1 Ccnd2 0.505 1 1 +## 9 MoMac1_niche MoMac1 Capsul… C3--C3ar1 C3 C3ar1 Cdk6 0.221 1 1 +## 10 MoMac1_niche MoMac1 Capsul… C3--C3ar1 C3 C3ar1 H2-D1 0.318 1 1 +## # … with 9 more variables: target_expression , target_expression_scaled , target_fraction , +## # ligand_target_weight , activity , activity_normalized , scaled_activity , +## # scaled_activity_normalized , prioritization_score prioritization_tables$prioritization_tbl_ligand_receptor = prioritization_tables$prioritization_tbl_ligand_receptor %>% mutate(receiver = factor(receiver, levels = c("KCs","MoMac1","MoMac2")), niche = factor(niche, levels = c("KC_niche","MoMac1_niche","MoMac2_niche"))) prioritization_tables$prioritization_tbl_ligand_target = prioritization_tables$prioritization_tbl_ligand_target %>% mutate(receiver = factor(receiver, levels = c("KCs","MoMac1","MoMac2")), niche = factor(niche, levels = c("KC_niche","MoMac1_niche","MoMac2_niche"))) @@ -781,7 +746,7 @@ lfc_plot = make_ligand_receptor_lfc_plot(receiver_oi, prioritized_tbl_oi, priori lfc_plot ``` -![](differential_nichenet_files/figure-gfm/unnamed-chunk-25-1.png) +![](differential_nichenet_files/figure-gfm/unnamed-chunk-23-1.png) Show the spatialDE as additional information @@ -790,7 +755,7 @@ lfc_plot_spatial = make_ligand_receptor_lfc_spatial_plot(receiver_oi, prioritize lfc_plot_spatial ``` -![](differential_nichenet_files/figure-gfm/unnamed-chunk-26-1.png) +![](differential_nichenet_files/figure-gfm/unnamed-chunk-24-1.png) From this plot, you can see that some KC-niche ligands like Dll4 (by LSEC) and Il34 (by Stellate cells) are higher expressed in the @@ -814,7 +779,7 @@ exprs_activity_target_plot = make_ligand_activity_target_exprs_plot(receiver_oi, exprs_activity_target_plot$combined_plot ``` -![](differential_nichenet_files/figure-gfm/unnamed-chunk-27-1.png) +![](differential_nichenet_files/figure-gfm/unnamed-chunk-25-1.png) On this plot, we can see that some strongly DE ligand-receptor pairs in the KC niche, have also high scaled ligand activity on KCs - making them @@ -841,7 +806,7 @@ exprs_activity_target_plot = make_ligand_activity_target_exprs_plot(receiver_oi, exprs_activity_target_plot$combined_plot ``` -![](differential_nichenet_files/figure-gfm/unnamed-chunk-28-1.png) +![](differential_nichenet_files/figure-gfm/unnamed-chunk-26-1.png) ## Circos plot of prioritized ligand-receptor pairs @@ -849,6 +814,7 @@ Because a top50 is too much to visualize in a circos plot, we will only visualize the top 15. ``` r + filtered_ligands = ligand_prioritized_tbl_oi %>% filter(receiver == receiver_oi) %>% top_n(15, prioritization_score) %>% pull(ligand) %>% unique() prioritized_tbl_oi = prioritization_tables$prioritization_tbl_ligand_receptor %>% filter(ligand %in% filtered_ligands) %>% select(niche, sender, receiver, ligand, receptor, ligand_receptor, prioritization_score) %>% distinct() %>% inner_join(top_ligand_receptor_niche_df) %>% group_by(ligand) %>% filter(receiver == receiver_oi) %>% top_n(2, prioritization_score) %>% ungroup() @@ -859,7 +825,7 @@ colors_receiver = c("lavender") %>% magrittr::set_names(prioritized_tbl_oi$rece circos_output = make_circos_lr(prioritized_tbl_oi, colors_sender, colors_receiver) ``` -![](differential_nichenet_files/figure-gfm/unnamed-chunk-29-1.png)![](differential_nichenet_files/figure-gfm/unnamed-chunk-29-2.png) +![](differential_nichenet_files/figure-gfm/unnamed-chunk-27-1.png)![](differential_nichenet_files/figure-gfm/unnamed-chunk-27-2.png) ``` r # circos_output$p_circos @@ -877,7 +843,7 @@ lfc_plot = make_ligand_receptor_lfc_plot(receiver_oi, prioritized_tbl_oi, priori lfc_plot ``` -![](differential_nichenet_files/figure-gfm/unnamed-chunk-30-1.png) +![](differential_nichenet_files/figure-gfm/unnamed-chunk-28-1.png) ## Visualization for the other liver macrophages: bile duct @@ -891,7 +857,7 @@ lfc_plot = make_ligand_receptor_lfc_plot(receiver_oi, prioritized_tbl_oi, priori lfc_plot ``` -![](differential_nichenet_files/figure-gfm/unnamed-chunk-31-1.png) +![](differential_nichenet_files/figure-gfm/unnamed-chunk-29-1.png) # Notes, limitations, and comparison to default NicheNet. @@ -934,7 +900,7 @@ consider them to be very good candidates in regulating the process of interest, and we recommend testing these candidates for further experimental validation. -# References +### References Browaeys, R., Saelens, W. & Saeys, Y. NicheNet: modeling intercellular communication by linking ligands to target genes. Nat Methods (2019) diff --git a/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-2-1.png b/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-2-1.png index 3d1b41a..993daae 100644 Binary files a/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-2-1.png and b/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-2-1.png differ diff --git a/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-2-2.png b/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-2-2.png deleted file mode 100644 index b6e29cd..0000000 Binary files a/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-2-2.png and /dev/null differ diff --git a/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-23-1.png b/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-23-1.png new file mode 100644 index 0000000..939c881 Binary files /dev/null and b/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-23-1.png differ diff --git a/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-24-1.png b/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-24-1.png index ec59d4b..37177b7 100644 Binary files a/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-24-1.png and b/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-24-1.png differ diff --git a/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-24-2.png b/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-24-2.png deleted file mode 100644 index b6e29cd..0000000 Binary files a/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-24-2.png and /dev/null differ diff --git a/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-25-1.png b/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-25-1.png index 378d3cb..56742cc 100644 Binary files a/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-25-1.png and b/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-25-1.png differ diff --git a/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-26-1.png b/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-26-1.png index 8464cfe..6ab9de2 100644 Binary files a/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-26-1.png and b/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-26-1.png differ diff --git a/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-27-1.png b/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-27-1.png index e8a8cd4..1e65f11 100644 Binary files a/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-27-1.png and b/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-27-1.png differ diff --git a/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-27-2.png b/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-27-2.png new file mode 100644 index 0000000..c5c6883 Binary files /dev/null and b/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-27-2.png differ diff --git a/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-28-1.png b/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-28-1.png index 4ca5df7..4e2d83e 100644 Binary files a/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-28-1.png and b/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-28-1.png differ diff --git a/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-28-2.png b/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-28-2.png deleted file mode 100644 index 73f6803..0000000 Binary files a/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-28-2.png and /dev/null differ diff --git a/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-29-1.png b/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-29-1.png index aef8000..d13d462 100644 Binary files a/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-29-1.png and b/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-29-1.png differ diff --git a/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-29-2.png b/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-29-2.png deleted file mode 100644 index 73f6803..0000000 Binary files a/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-29-2.png and /dev/null differ diff --git a/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-29-3.png b/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-29-3.png deleted file mode 100644 index dfff8bb..0000000 Binary files a/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-29-3.png and /dev/null differ diff --git a/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-30-1.png b/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-30-1.png deleted file mode 100644 index 7c9c393..0000000 Binary files a/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-30-1.png and /dev/null differ diff --git a/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-30-2.png b/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-30-2.png deleted file mode 100644 index 019d4b0..0000000 Binary files a/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-30-2.png and /dev/null differ diff --git a/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-31-1.png b/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-31-1.png deleted file mode 100644 index b625827..0000000 Binary files a/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-31-1.png and /dev/null differ diff --git a/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-4-1.png b/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-4-1.png deleted file mode 100644 index 8bcbc91..0000000 Binary files a/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-4-1.png and /dev/null differ diff --git a/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-55-1.png b/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-55-1.png deleted file mode 100644 index ec59d4b..0000000 Binary files a/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-55-1.png and /dev/null differ diff --git a/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-55-2.png b/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-55-2.png deleted file mode 100644 index b6e29cd..0000000 Binary files a/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-55-2.png and /dev/null differ diff --git a/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-57-1.png b/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-57-1.png deleted file mode 100644 index 8bcbc91..0000000 Binary files a/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-57-1.png and /dev/null differ diff --git a/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-80-1.png b/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-80-1.png deleted file mode 100644 index 9494c38..0000000 Binary files a/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-80-1.png and /dev/null differ diff --git a/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-81-1.png b/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-81-1.png deleted file mode 100644 index 06f15f0..0000000 Binary files a/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-81-1.png and /dev/null differ diff --git a/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-82-1.png b/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-82-1.png deleted file mode 100644 index 6b3cf9d..0000000 Binary files a/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-82-1.png and /dev/null differ diff --git a/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-83-1.png b/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-83-1.png deleted file mode 100644 index 70aa06a..0000000 Binary files a/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-83-1.png and /dev/null differ diff --git a/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-83-2.png b/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-83-2.png deleted file mode 100644 index 019d4b0..0000000 Binary files a/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-83-2.png and /dev/null differ diff --git a/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-83-3.png b/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-83-3.png deleted file mode 100644 index 70aa06a..0000000 Binary files a/vignettes/differential_nichenet_files/figure-gfm/unnamed-chunk-83-3.png and /dev/null differ diff --git a/vignettes/differential_nichenet_pEMT.Rmd b/vignettes/differential_nichenet_pEMT.Rmd index 164824a..0f3460e 100644 --- a/vignettes/differential_nichenet_pEMT.Rmd +++ b/vignettes/differential_nichenet_pEMT.Rmd @@ -29,7 +29,7 @@ The goal of Differential NicheNet is to predict ligand-receptors pairs that are This vignette guides you in detail through all the steps of a Differential NicheNet analysis. As example expression data of interacting cells, we will use data from Puram et al. to explore intercellular communication in the tumor microenvironment in head and neck squamous cell carcinoma (HNSCC) [@puram_single-cell_2017]. More specifically, we will look at cell-cell communication differences between pEMT-high and pEMT-low tumors (pEMT = partial epithelial-mesenschymal transition). In this data, we thus have 2 conditions/niches, but this pipeline is also usable for more conditions/niches. -The used ligand-receptor network and ligand-target matrix can be downloaded from Zenodo [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3260758.svg)](https://doi.org/10.5281/zenodo.3260758). +The used ligand-receptor network and ligand-target matrix can be downloaded from Zenodo [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.7074291.svg)](https://doi.org/10.5281/zenodo.7074291). The Seurat object containing expression data of interacting cells in HNSCC can also be downloaded from Zenodo [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.4675430.svg)](https://doi.org/10.5281/zenodo.4675430). # 0. Read in the expression data of interest, and the NicheNet ligand-receptor network and ligand-target matrix @@ -76,33 +76,28 @@ seurat_obj = SetIdent(seurat_obj, value = seurat_obj[[celltype_id]]) ## Read in the NicheNet ligand-receptor network and ligand-target matrix ```{r} -ligand_target_matrix = readRDS(url("https://zenodo.org/record/3260758/files/ligand_target_matrix.rds")) +ligand_target_matrix = readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final.rds")) ligand_target_matrix[1:5,1:5] # target genes in rows, ligands in columns ``` ```{r} -lr_network = readRDS(url("https://zenodo.org/record/3260758/files/lr_network.rds")) -lr_network = lr_network %>% mutate(bonafide = ! database %in% c("ppi_prediction","ppi_prediction_go")) -lr_network = lr_network %>% dplyr::rename(ligand = from, receptor = to) %>% distinct(ligand, receptor, bonafide) +lr_network = readRDS(url("https://zenodo.org/record/7074291/files/lr_network_human_21122021.rds")) +lr_network = lr_network %>% dplyr::rename(ligand = from, receptor = to) %>% distinct(ligand, receptor) head(lr_network) ``` -Note: if your data is of mouse origin: convert human gene symbols to their one-to-one orthologs +Note: if your data is of mouse origin: use the mouse networks ```{r} organism = "human" # user adaptation required on own dataset -``` -```{r} if(organism == "mouse"){ - lr_network = lr_network %>% mutate(ligand = convert_human_to_mouse_symbols(ligand), receptor = convert_human_to_mouse_symbols(receptor)) %>% drop_na() - - colnames(ligand_target_matrix) = ligand_target_matrix %>% colnames() %>% convert_human_to_mouse_symbols() - rownames(ligand_target_matrix) = ligand_target_matrix %>% rownames() %>% convert_human_to_mouse_symbols() - ligand_target_matrix = ligand_target_matrix %>% .[!is.na(rownames(ligand_target_matrix)), !is.na(colnames(ligand_target_matrix))] + lr_network = readRDS(url("https://zenodo.org/record/7074291/files/lr_network_mouse_21122021.rds")) + ligand_target_matrix = readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final_mouse.rds")) } -``` + +``` # 1. Define the niches/microenvironments of interest @@ -136,6 +131,7 @@ DE will be calculated for each pairwise sender (or receiver) cell type comparisi ```{r} assay_oi = "SCT" # other possibilities: RNA,... +seurat_obj = alias_to_symbol_seurat(seurat_obj, organism = "human") DE_sender = calculate_niche_de(seurat_obj = seurat_obj %>% subset(features = lr_network$ligand %>% unique()), niches = niches, type = "sender", assay_oi = assay_oi) # only ligands important for sender cell types DE_receiver = calculate_niche_de(seurat_obj = seurat_obj %>% subset(features = lr_network$receptor %>% unique()), niches = niches, type = "receiver", assay_oi = assay_oi) # only receptors now, later on: DE analysis to find targets DE_sender = DE_sender %>% mutate(avg_log2FC = ifelse(avg_log2FC == Inf, max(avg_log2FC[is.finite(avg_log2FC)]), ifelse(avg_log2FC == -Inf, min(avg_log2FC[is.finite(avg_log2FC)]), avg_log2FC))) @@ -335,7 +331,7 @@ exprs_sender_receiver = lr_network %>% inner_join(exprs_tbl_ligand, by = c("ligand")) %>% inner_join(exprs_tbl_receptor, by = c("receptor")) %>% inner_join(DE_sender_receiver %>% distinct(niche, sender, receiver)) -ligand_scaled_receptor_expression_fraction_df = exprs_sender_receiver %>% group_by(ligand, receiver) %>% mutate(rank_receptor_expression = dense_rank(receptor_expression), rank_receptor_fraction = dense_rank(receptor_fraction)) %>% mutate(ligand_scaled_receptor_expression_fraction = 0.5*( (rank_receptor_fraction / max(rank_receptor_fraction)) + ((rank_receptor_expression / max(rank_receptor_expression))) ) ) %>% distinct(ligand, receptor, receiver, ligand_scaled_receptor_expression_fraction, bonafide) %>% distinct() %>% ungroup() +ligand_scaled_receptor_expression_fraction_df = exprs_sender_receiver %>% group_by(ligand, receiver) %>% mutate(rank_receptor_expression = dense_rank(receptor_expression), rank_receptor_fraction = dense_rank(receptor_fraction)) %>% mutate(ligand_scaled_receptor_expression_fraction = 0.5*( (rank_receptor_fraction / max(rank_receptor_fraction)) + ((rank_receptor_expression / max(rank_receptor_expression))) ) ) %>% distinct(ligand, receptor, receiver, ligand_scaled_receptor_expression_fraction) %>% distinct() %>% ungroup() ``` # 7. Prioritization of ligand-receptor and ligand-target links @@ -366,9 +362,6 @@ We provide the user the option to consider the following properties for prioriti * Normalized ligand activity: to further prioritize ligand-receptor pairs based on their predicted effect of the ligand-receptor interaction on the gene expression in the receiver cell type - normalization of activity is done because we found that some datasets/conditions/niches have higher baseline activity values than others - normalized ligand activity accords to 'relative' enrichment of target genes of a ligand within the affected receiver genes. `prioritizing_weights` argument: `"scaled_activity_normalized"`. Recommended weight: at least 1. -* Prior knowledge quality of the L-R interaction: the NicheNet LR network consists of two types of interactions: L-R pairs documented in curated databases, and L-R pairs predicted based on gene annotation and PPIs. The former are categorized as 'bona fide' interactions. To rank bona fide interactions higher, but not exlude potentially interesting non-bona-fide ones, we give bona fide interactions a score of 1, and non-bona-fide interactions a score fof 0.5. `prioritizing_weights` argument: `"bona_fide"` Recommend weight: at least 1. - - ```{r} prioritizing_weights = c("scaled_ligand_score" = 5, "scaled_ligand_expression_scaled" = 1, @@ -380,8 +373,7 @@ prioritizing_weights = c("scaled_ligand_score" = 5, "ligand_scaled_receptor_expression_fraction" = 1, "scaled_receptor_score_spatial" = 0, "scaled_activity" = 0, - "scaled_activity_normalized" = 1, - "bona_fide" = 1) + "scaled_activity_normalized" = 1) ``` Note: these settings will give substantially more weight to DE ligand-receptor pairs compared to activity. Users can change this if wanted, just like other settings can be changed if that would be better to tackle the specific biological question you want to address. @@ -512,7 +504,7 @@ For the opposite pairs with low-DE and high-activity that are not strongly prior When Ligand-Receptor pairs have both high DE and high activity, we can consider them to be very good candidates in regulating the process of interest, and we recommend testing these candidates for further experimental validation. -# References +### References Browaeys, R., Saelens, W. & Saeys, Y. NicheNet: modeling intercellular communication by linking ligands to target genes. Nat Methods (2019) doi:10.1038/s41592-019-0667-5 diff --git a/vignettes/differential_nichenet_pEMT.md b/vignettes/differential_nichenet_pEMT.md index c54f75e..1248600 100644 --- a/vignettes/differential_nichenet_pEMT.md +++ b/vignettes/differential_nichenet_pEMT.md @@ -29,7 +29,7 @@ conditions/niches. The used ligand-receptor network and ligand-target matrix can be downloaded from Zenodo -[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3260758.svg)](https://doi.org/10.5281/zenodo.3260758). +[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.7074291.svg)](https://doi.org/10.5281/zenodo.7074291). The Seurat object containing expression data of interacting cells in HNSCC can also be downloaded from Zenodo [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.4675430.svg)](https://doi.org/10.5281/zenodo.4675430). @@ -95,10 +95,10 @@ DimPlot(seurat_obj, group.by = "celltype_aggregate") ``` r seurat_obj@meta.data$celltype_aggregate %>% table() %>% sort(decreasing = TRUE) ## . -## Malignant_High T.cell_High Malignant_Low CAF_High myofibroblast_High Endothelial_High CAF_Low -## 1093 689 549 396 382 105 104 -## Myeloid_High myofibroblast_Low Endothelial_Low Myeloid_Low T.cell_Low -## 92 61 53 7 3 +## Malignant_High T.cell_High Malignant_Low CAF_High myofibroblast_High Endothelial_High +## 1093 689 549 396 382 105 +## CAF_Low Myeloid_High myofibroblast_Low Endothelial_Low Myeloid_Low T.cell_Low +## 104 92 61 53 7 3 ``` ``` r @@ -109,47 +109,40 @@ seurat_obj = SetIdent(seurat_obj, value = seurat_obj[[celltype_id]]) ## Read in the NicheNet ligand-receptor network and ligand-target matrix ``` r -ligand_target_matrix = readRDS(url("https://zenodo.org/record/3260758/files/ligand_target_matrix.rds")) +ligand_target_matrix = readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final.rds")) ligand_target_matrix[1:5,1:5] # target genes in rows, ligands in columns -## CXCL1 CXCL2 CXCL3 CXCL5 PPBP -## A1BG 3.534343e-04 4.041324e-04 3.729920e-04 3.080640e-04 2.628388e-04 -## A1BG-AS1 1.650894e-04 1.509213e-04 1.583594e-04 1.317253e-04 1.231819e-04 -## A1CF 5.787175e-04 4.596295e-04 3.895907e-04 3.293275e-04 3.211944e-04 -## A2M 6.027058e-04 5.996617e-04 5.164365e-04 4.517236e-04 4.590521e-04 -## A2M-AS1 8.898724e-05 8.243341e-05 7.484018e-05 4.912514e-05 5.120439e-05 +## A2M AANAT ABCA1 ACE ACE2 +## A-GAMMA3'E 0.0000000000 0.0000000000 0.0000000000 0.0000000000 0.000000000 +## A1BG 0.0018503922 0.0011108718 0.0014225077 0.0028594037 0.001139013 +## A1BG-AS1 0.0007400797 0.0004677614 0.0005193137 0.0007836698 0.000375007 +## A1CF 0.0024799266 0.0013026348 0.0020420890 0.0047921048 0.003273375 +## A2M 0.0084693452 0.0040689323 0.0064256379 0.0105191365 0.005719199 ``` ``` r -lr_network = readRDS(url("https://zenodo.org/record/3260758/files/lr_network.rds")) -lr_network = lr_network %>% mutate(bonafide = ! database %in% c("ppi_prediction","ppi_prediction_go")) -lr_network = lr_network %>% dplyr::rename(ligand = from, receptor = to) %>% distinct(ligand, receptor, bonafide) +lr_network = readRDS(url("https://zenodo.org/record/7074291/files/lr_network_human_21122021.rds")) +lr_network = lr_network %>% dplyr::rename(ligand = from, receptor = to) %>% distinct(ligand, receptor) head(lr_network) -## # A tibble: 6 x 3 -## ligand receptor bonafide -## -## 1 CXCL1 CXCR2 TRUE -## 2 CXCL2 CXCR2 TRUE -## 3 CXCL3 CXCR2 TRUE -## 4 CXCL5 CXCR2 TRUE -## 5 PPBP CXCR2 TRUE -## 6 CXCL6 CXCR2 TRUE +## # A tibble: 6 × 2 +## ligand receptor +## +## 1 A2M MMP2 +## 2 A2M MMP9 +## 3 A2M LRP1 +## 4 A2M KLK3 +## 5 AANAT MTNR1A +## 6 AANAT MTNR1B ``` -Note: if your data is of mouse origin: convert human gene symbols to -their one-to-one orthologs +Note: if your data is of mouse origin: use the mouse networks ``` r organism = "human" # user adaptation required on own dataset -``` -``` r if(organism == "mouse"){ - lr_network = lr_network %>% mutate(ligand = convert_human_to_mouse_symbols(ligand), receptor = convert_human_to_mouse_symbols(receptor)) %>% drop_na() - - colnames(ligand_target_matrix) = ligand_target_matrix %>% colnames() %>% convert_human_to_mouse_symbols() - rownames(ligand_target_matrix) = ligand_target_matrix %>% rownames() %>% convert_human_to_mouse_symbols() - ligand_target_matrix = ligand_target_matrix %>% .[!is.na(rownames(ligand_target_matrix)), !is.na(colnames(ligand_target_matrix))] + lr_network = readRDS(url("https://zenodo.org/record/7074291/files/lr_network_mouse_21122021.rds")) + ligand_target_matrix = readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final_mouse.rds")) } ``` @@ -207,6 +200,7 @@ most abundant cell types. ``` r assay_oi = "SCT" # other possibilities: RNA,... +seurat_obj = alias_to_symbol_seurat(seurat_obj, organism = "human") DE_sender = calculate_niche_de(seurat_obj = seurat_obj %>% subset(features = lr_network$ligand %>% unique()), niches = niches, type = "sender", assay_oi = assay_oi) # only ligands important for sender cell types ## [1] "Calculate Sender DE between: myofibroblast_High and myofibroblast_Low" ## [2] "Calculate Sender DE between: myofibroblast_High and Endothelial_Low" @@ -214,11 +208,14 @@ DE_sender = calculate_niche_de(seurat_obj = seurat_obj %>% subset(features = lr_ ## [1] "Calculate Sender DE between: Endothelial_High and myofibroblast_Low" ## [2] "Calculate Sender DE between: Endothelial_High and Endothelial_Low" ## [3] "Calculate Sender DE between: Endothelial_High and CAF_Low" -## [1] "Calculate Sender DE between: CAF_High and myofibroblast_Low" "Calculate Sender DE between: CAF_High and Endothelial_Low" +## [1] "Calculate Sender DE between: CAF_High and myofibroblast_Low" +## [2] "Calculate Sender DE between: CAF_High and Endothelial_Low" ## [3] "Calculate Sender DE between: CAF_High and CAF_Low" -## [1] "Calculate Sender DE between: T.cell_High and myofibroblast_Low" "Calculate Sender DE between: T.cell_High and Endothelial_Low" +## [1] "Calculate Sender DE between: T.cell_High and myofibroblast_Low" +## [2] "Calculate Sender DE between: T.cell_High and Endothelial_Low" ## [3] "Calculate Sender DE between: T.cell_High and CAF_Low" -## [1] "Calculate Sender DE between: Myeloid_High and myofibroblast_Low" "Calculate Sender DE between: Myeloid_High and Endothelial_Low" +## [1] "Calculate Sender DE between: Myeloid_High and myofibroblast_Low" +## [2] "Calculate Sender DE between: Myeloid_High and Endothelial_Low" ## [3] "Calculate Sender DE between: Myeloid_High and CAF_Low" ## [1] "Calculate Sender DE between: myofibroblast_Low and myofibroblast_High" ## [2] "Calculate Sender DE between: myofibroblast_Low and Endothelial_High" @@ -230,11 +227,13 @@ DE_sender = calculate_niche_de(seurat_obj = seurat_obj %>% subset(features = lr_ ## [3] "Calculate Sender DE between: Endothelial_Low and CAF_High" ## [4] "Calculate Sender DE between: Endothelial_Low and T.cell_High" ## [5] "Calculate Sender DE between: Endothelial_Low and Myeloid_High" -## [1] "Calculate Sender DE between: CAF_Low and myofibroblast_High" "Calculate Sender DE between: CAF_Low and Endothelial_High" -## [3] "Calculate Sender DE between: CAF_Low and CAF_High" "Calculate Sender DE between: CAF_Low and T.cell_High" +## [1] "Calculate Sender DE between: CAF_Low and myofibroblast_High" +## [2] "Calculate Sender DE between: CAF_Low and Endothelial_High" +## [3] "Calculate Sender DE between: CAF_Low and CAF_High" +## [4] "Calculate Sender DE between: CAF_Low and T.cell_High" ## [5] "Calculate Sender DE between: CAF_Low and Myeloid_High" DE_receiver = calculate_niche_de(seurat_obj = seurat_obj %>% subset(features = lr_network$receptor %>% unique()), niches = niches, type = "receiver", assay_oi = assay_oi) # only receptors now, later on: DE analysis to find targets -## # A tibble: 1 x 2 +## # A tibble: 1 × 2 ## receiver receiver_other_niche ## ## 1 Malignant_High Malignant_Low @@ -392,40 +391,23 @@ geneset_niche2 = DE_receiver_processed_targets %>% filter(receiver == niches[[2] # Good idea to check which genes will be left out of the ligand activity analysis (=when not present in the rownames of the ligand-target matrix). # If many genes are left out, this might point to some issue in the gene naming (eg gene aliases and old gene symbols, bad human-mouse mapping) geneset_niche1 %>% setdiff(rownames(ligand_target_matrix)) -## [1] "ANXA8L2" "PRKCDBP" "IL8" "PTRF" "SEPP1" "C1orf186" "CCDC109B" "C10orf54" -## [9] "LEPREL1" "ZNF812" "LOC645638" "LOC401397" "LINC00162" "DFNA5" "PLK1S1" "ZMYM6NB" -## [17] "C19orf10" "CTSL1" "SQRDL" "LOC375295" "WBP5" "LOC100505633" "AIM1" "C1orf63" -## [25] "LOC100507463" "GPR115" "VIMP" "SEP15" "C1orf172" "NAPRT1" "LHFP" "KRT16P1" -## [33] "C7orf10" "PTPLA" "GRAMD3" "CPSF3L" "MESDC2" "C10orf10" "KIAA1609" "CCDC53" -## [41] "TXLNG2P" "NGFRAP1" "ERO1L" "FAM134A" "LSMD1" "TCEB2" "B3GALTL" "HN1L" -## [49] "LOC550643" "KIAA0922" "GLT25D1" "FAM127A" "C1orf151-NBL1" "SEPW1" "GPR126" "LOC100505806" -## [57] "LINC00478" "TCEB1" "GRAMD2" "GNB2L1" "KIRREL" +## [1] "ANXA8L2" "LOC645638" "LOC401397" "LOC375295" "LOC100505633" "LOC100507463" "KRT16P1" +## [8] "LSMD1" "HN1L" "LOC550643" "LOC100505806" "SARS" "LOC100130476" geneset_niche2 %>% setdiff(rownames(ligand_target_matrix)) -## [1] "LOC344887" "AGPAT9" "C1orf110" "KIAA1467" "LOC100292680" "EPT1" "CT45A4" "LOC654433" -## [9] "UPK3BL" "LINC00340" "LOC100128338" "FAM60A" "CCDC144C" "LOC401109" "LOC286467" "LEPREL4" -## [17] "LOC731275" "LOC642236" "LINC00516" "LOC101101776" "SC5DL" "PVRL4" "LOC100130093" "LINC00338" -## [25] "LOC100132891" "PPAP2C" "C6orf1" "C2orf47" "WHSC1L1" "LOC100289019" "SETD8" "KDM5B-AS1" -## [33] "SPG20" "CXCR7" "LOC100216479" "LOC100505761" "MGC57346" "LPHN3" "CENPC1" "C11orf93" -## [41] "C14orf169" "LOC100506060" "FLJ31485" "LOC440905" "MLF1IP" "TMEM194A" "RRP7B" "REXO1L1" -## [49] "LOC100129269" "KIAA1715" "CTAGE5" "LOC202781" "LOC100506714" "LOC401164" "UTS2D" "LOC146880" -## [57] "KIAA1804" "C5orf55" "C21orf119" "PRUNE" "LRRC16A" "LOC339240" "FLJ35024" "C5orf28" -## [65] "LOC100505876" "MGC21881" "LOC100133985" "PPAPDC2" "FRG1B" "CECR5" "LOC100129361" "CCBL1" -## [73] "PTPLAD1" "MST4" "LOC550112" "LOC389791" "CCDC90A" "KIAA0195" "LOC100506469" "LOC100133161" -## [81] "LOC646719" "LOC728819" "BRE" "LOC284581" "LOC441081" "LOC728377" "LOC100134229" "C3orf65" -## [89] "SMEK2" "KIAA1737" "C17orf70" "PLEKHM1P" "LOC338758" "PCNXL2" "LOC91948" "C17orf89" -## [97] "LOC100505783" "SMCR7L" "C8orf4" "GPR56" "ATHL1" "LOC339535" "PPAPDC1B" "DAK" -## [105] "LOC100507173" "CRHR1-IT1" "PPAP2B" "ADCK4" "KIAA0146" "GYLTL1B" "LOC100272216" "LOC400027" -## [113] "WHSC1" "LOC100130855" "C7orf55" "C19orf40" "ADCK3" "C9orf142" "SGOL1" "LOC90834" -## [121] "PTPLAD2" "KIAA1967" "LOC100132352" "LOC100630918" "ADRBK2" "LINC00263" "FAM64A" "LOC401074" -## [129] "FAM179B" "RP1-177G6.2" "METTL21D" "ERO1LB" "FLJ45445" "NADKD1" "LOC100506233" "LOC100652772" -## [137] "FAM175A" "LINC00630" "C11orf82" "SETD5-AS1" "SGK196" "FLJ14186" "CCDC104" "FAM63A" -## [145] "NARG2" "MTERFD1" "CCDC74B-AS1" "LOC286186" "WDR67" "C12orf52" "FLJ30403" "KIAA2018" -## [153] "GCN1L1" "FLJ43681" "LOC152217" "FONG" "C18orf8" "ALG1L9P" "GTDC2" "LOC100507217" -## [161] "NBPF24" "WBSCR27" "C14orf1" "LOC284889" "KIAA0317" "FAM65A" "PMS2L2" "LUST" -## [169] "C15orf52" "FAM195A" "LOC399744" "PYCRL" "LOC338799" "LOC100506190" "C9orf91" "FLJ45340" -## [177] "LOC349196" "LOC100128881" "TOMM70A" "ALS2CR8" "LDOC1L" "HDGFRP3" "ZNF767" "LOC728558" -## [185] "LOC283693" "LEPREL2" "QTRTD1" "SELM" "C6orf25" "C1orf86" "HNRPLL" "LOC145820" -## [193] "LOC100289341" "C17orf85" "C3orf72" "C14orf64" "C9orf9" "LOC100506394" +## [1] "LOC344887" "AGPAT9" "LOC100292680" "CT45A4" "LOC654433" "LOC100128338" "LOC401109" +## [8] "LOC286467" "LOC100133331" "LOC440173" "LOC731275" "LOC642236" "LINC00516" "LOC101101776" +## [15] "LOC100130093" "LOC100132891" "C2orf47" "LOC100289019" "LOC100216479" "LOC100505761" "MGC57346" +## [22] "LOC100506060" "LOC728752" "FLJ31485" "LOC440905" "LOC100129269" "LOC285074" "LOC202781" +## [29] "LOC100506714" "LOC401164" "LOC146880" "KIAA1804" "LOC339240" "FLJ35024" "LOC100505876" +## [36] "MGC21881" "LOC100133985" "LOC100129361" "MST4" "LOC283922" "LOC550112" "LOC389791" +## [43] "LOC100506469" "LOC100133161" "LOC646719" "LOC728819" "LOC728377" "LOC100134229" "MUM1" +## [50] "LOC338758" "LOC91948" "LOC100505783" "LOC339535" "LOC150776" "LOC100507173" "LOC645513" +## [57] "LOC100272216" "LOC400027" "LOC100130855" "LOC100288748" "C7orf55" "LOC90834" "LOC100132352" +## [64] "LOC100630918" "MGC27345" "LOC401074" "RP1-177G6.2" "FLJ45445" "LOC100506233" "LOC100652772" +## [71] "FLJ14186" "MARS" "LOC100287042" "LOC286186" "FLJ30403" "FLJ43681" "LOC152217" +## [78] "LOC100507217" "NBPF24" "LOC284889" "PMS2L2" "LOC100130451" "LOC399744" "LOC338799" +## [85] "LOC100506190" "LOC400927" "FLJ45340" "LOC349196" "LOC100128881" "LOC728558" "LOC283693" +## [92] "LOC145820" "LOC100289341" "LOC100506394" length(geneset_niche1) ## [1] 1668 @@ -462,9 +444,9 @@ geneset_niche2 = DE_receiver_processed_targets %>% filter(receiver == niches[[2] # Good idea to check which genes will be left out of the ligand activity analysis (=when not present in the rownames of the ligand-target matrix). # If many genes are left out, this might point to some issue in the gene naming (eg gene aliases and old gene symbols, bad human-mouse mapping) geneset_niche1 %>% setdiff(rownames(ligand_target_matrix)) -## [1] "ANXA8L2" "PRKCDBP" "IL8" "PTRF" "SEPP1" "C1orf186" +## [1] "ANXA8L2" geneset_niche2 %>% setdiff(rownames(ligand_target_matrix)) -## [1] "LOC344887" "AGPAT9" "C1orf110" "KIAA1467" "LOC100292680" "EPT1" "CT45A4" +## [1] "LOC344887" "AGPAT9" "LOC100292680" "CT45A4" length(geneset_niche1) ## [1] 169 @@ -532,7 +514,7 @@ exprs_sender_receiver = lr_network %>% inner_join(exprs_tbl_ligand, by = c("ligand")) %>% inner_join(exprs_tbl_receptor, by = c("receptor")) %>% inner_join(DE_sender_receiver %>% distinct(niche, sender, receiver)) -ligand_scaled_receptor_expression_fraction_df = exprs_sender_receiver %>% group_by(ligand, receiver) %>% mutate(rank_receptor_expression = dense_rank(receptor_expression), rank_receptor_fraction = dense_rank(receptor_fraction)) %>% mutate(ligand_scaled_receptor_expression_fraction = 0.5*( (rank_receptor_fraction / max(rank_receptor_fraction)) + ((rank_receptor_expression / max(rank_receptor_expression))) ) ) %>% distinct(ligand, receptor, receiver, ligand_scaled_receptor_expression_fraction, bonafide) %>% distinct() %>% ungroup() +ligand_scaled_receptor_expression_fraction_df = exprs_sender_receiver %>% group_by(ligand, receiver) %>% mutate(rank_receptor_expression = dense_rank(receptor_expression), rank_receptor_fraction = dense_rank(receptor_fraction)) %>% mutate(ligand_scaled_receptor_expression_fraction = 0.5*( (rank_receptor_fraction / max(rank_receptor_fraction)) + ((rank_receptor_expression / max(rank_receptor_expression))) ) ) %>% distinct(ligand, receptor, receiver, ligand_scaled_receptor_expression_fraction) %>% distinct() %>% ungroup() ``` # 7. Prioritization of ligand-receptor and ligand-target links @@ -546,125 +528,111 @@ We provide the user the option to consider the following properties for prioritization (of which the weights are defined in `prioritizing_weights`) : -- Ligand DE score: niche-specific expression of the ligand: by - default, this the minimum logFC between the sender of interest and - all the senders of the other niche(s). The higher the min logFC, the - higher the niche-specificity of the ligand. Therefore we recommend - to give this factor a very high weight. `prioritizing_weights` - argument: `"scaled_ligand_score"`. Recommended weight: 5 (at least - 1, max 5). - -- Scaled ligand expression: scaled expression of a ligand in one - sender compared to the other cell types in the dataset. This might - be useful to rescue potentially interesting ligands that have a high - scaled expression value, but a relatively small min logFC compared - to the other niche. One reason why this logFC might be small occurs - when (some) genes are not picked up efficiently by the used - sequencing technology (or other reasons for low RNA expression of - ligands). For example, we have observed that many ligands from the - Tgf-beta/BMP family are not picked up efficiently with single-nuclei - RNA sequencing compared to single-cell sequencing. - `prioritizing_weights` argument: - `"scaled_ligand_expression_scaled"`. Recommended weight: 1 (unless - technical reason for lower gene detection such as while using - Nuc-seq: then recommended to use a higher weight: 2). - -- Ligand expression fraction: Ligands that are expressed in a smaller - fraction of cells of a cell type than defined by - `exprs_cutoff`(default: 0.10) will get a lower ranking, proportional - to their fraction (eg ligand expressed in 9% of cells will be ranked - higher than ligand expressed in 0.5% of cells). We opted for this - weighting based on fraction, instead of removing ligands that are - not expressed in more cells than this cutoff, because some - interesting ligands could be removed that way. Fraction of - expression is not taken into account for the prioritization if it is - already higher than the cutoff. `prioritizing_weights` argument: - `"ligand_fraction"`. Recommended weight: 1. - -- Ligand spatial DE score: spatial expression specificity of the - ligand. If the niche of interest is at a specific tissue location, - but some of the sender cell types of that niche are also present in - other locations, it can be very informative to further prioritize - ligands of that sender by looking how they are DE between the - spatial location of interest compared to the other locations. - `prioritizing_weights` argument: `"scaled_ligand_score_spatial"`. - Recommended weight: 2 (or 0 if not applicable). - -- Receptor DE score: niche-specific expression of the receptor: by - default, this the minimum logFC between the receiver of interest and - all the receiver of the other niche(s). The higher the min logFC, - the higher the niche-specificity of the receptor. Based on our - experience, we don’t suggest to give this as high importance as the - ligand DE, but this might depend on the specific case study. - `prioritizing_weights` argument: `"scaled_receptor_score"`. - Recommended weight: 0.5 (at least 0.5, and lower than - `"scaled_ligand_score"`). - -- Scaled receptor expression: scaled expression of a receptor in one - receiver compared to the other cell types in the dataset. This might - be useful to rescue potentially interesting receptors that have a - high scaled expression value, but a relatively small min logFC - compared to the other niche. One reason why this logFC might be - small occurs when (some) genes are not picked up efficiently by the - used sequencing technology. `prioritizing_weights` argument: - `"scaled_receptor_expression_scaled"`. Recommended weight: 0.5. - -- Receptor expression fraction: Receptors that are expressed in a - smaller fraction of cells of a cell type than defined by - `exprs_cutoff`(default: 0.10) will get a lower ranking, proportional - to their fraction (eg receptor expressed in 9% of cells will be - ranked higher than receptor expressed in 0.5% of cells). We opted - for this weighting based on fraction, instead of removing receptors - that are not expressed in more cells than this cutoff, because some - interesting receptors could be removed that way. Fraction of - expression is not taken into account for the prioritization if it is - already higher than the cutoff. `prioritizing_weights` argument: - `"receptor_fraction"`. Recommended weight: 1. - -- Receptor expression strength: this factor let us give higher weights - to the most highly expressed receptor of a ligand in the receiver. - This let us rank higher one member of a receptor family if it higher - expressed than the other members. `prioritizing_weights` argument: - `"ligand_scaled_receptor_expression_fraction"`. Recommended value: 1 - (minimum: 0.5). - -- Receptor spatial DE score: spatial expression specificity of the - receptor. If the niche of interest is at a specific tissue location, - but the receiver cell type of that niche is also present in other - locations, it can be very informative to further prioritize - receptors of that receiver by looking how they are DE between the - spatial location of interest compared to the other locations. - `prioritizing_weights` argument: `"scaled_receptor_score_spatial"`. - Recommended weight: 1 (or 0 if not applicable). - -- Absolute ligand activity: to further prioritize ligand-receptor - pairs based on their predicted effect of the ligand-receptor - interaction on the gene expression in the receiver cell type - - absolute ligand activity accords to ‘absolute’ enrichment of target - genes of a ligand within the affected receiver genes. - `prioritizing_weights` argument: `"scaled_activity"`. Recommended - weight: 0, unless absolute enrichment of target genes is of specific - interest. - -- Normalized ligand activity: to further prioritize ligand-receptor - pairs based on their predicted effect of the ligand-receptor - interaction on the gene expression in the receiver cell type - - normalization of activity is done because we found that some - datasets/conditions/niches have higher baseline activity values than - others - normalized ligand activity accords to ‘relative’ enrichment - of target genes of a ligand within the affected receiver genes. - `prioritizing_weights` argument: `"scaled_activity_normalized"`. - Recommended weight: at least 1. - -- Prior knowledge quality of the L-R interaction: the NicheNet LR - network consists of two types of interactions: L-R pairs documented - in curated databases, and L-R pairs predicted based on gene - annotation and PPIs. The former are categorized as ‘bona fide’ - interactions. To rank bona fide interactions higher, but not exlude - potentially interesting non-bona-fide ones, we give bona fide - interactions a score of 1, and non-bona-fide interactions a score - fof 0.5. `prioritizing_weights` argument: `"bona_fide"` Recommend - weight: at least 1. +- Ligand DE score: niche-specific expression of the ligand: by default, + this the minimum logFC between the sender of interest and all the + senders of the other niche(s). The higher the min logFC, the higher + the niche-specificity of the ligand. Therefore we recommend to give + this factor a very high weight. `prioritizing_weights` argument: + `"scaled_ligand_score"`. Recommended weight: 5 (at least 1, max 5). + +- Scaled ligand expression: scaled expression of a ligand in one sender + compared to the other cell types in the dataset. This might be useful + to rescue potentially interesting ligands that have a high scaled + expression value, but a relatively small min logFC compared to the + other niche. One reason why this logFC might be small occurs when + (some) genes are not picked up efficiently by the used sequencing + technology (or other reasons for low RNA expression of ligands). For + example, we have observed that many ligands from the Tgf-beta/BMP + family are not picked up efficiently with single-nuclei RNA sequencing + compared to single-cell sequencing. `prioritizing_weights` argument: + `"scaled_ligand_expression_scaled"`. Recommended weight: 1 (unless + technical reason for lower gene detection such as while using Nuc-seq: + then recommended to use a higher weight: 2). + +- Ligand expression fraction: Ligands that are expressed in a smaller + fraction of cells of a cell type than defined by + `exprs_cutoff`(default: 0.10) will get a lower ranking, proportional + to their fraction (eg ligand expressed in 9% of cells will be ranked + higher than ligand expressed in 0.5% of cells). We opted for this + weighting based on fraction, instead of removing ligands that are not + expressed in more cells than this cutoff, because some interesting + ligands could be removed that way. Fraction of expression is not taken + into account for the prioritization if it is already higher than the + cutoff. `prioritizing_weights` argument: `"ligand_fraction"`. + Recommended weight: 1. + +- Ligand spatial DE score: spatial expression specificity of the ligand. + If the niche of interest is at a specific tissue location, but some of + the sender cell types of that niche are also present in other + locations, it can be very informative to further prioritize ligands of + that sender by looking how they are DE between the spatial location of + interest compared to the other locations. `prioritizing_weights` + argument: `"scaled_ligand_score_spatial"`. Recommended weight: 2 (or 0 + if not applicable). + +- Receptor DE score: niche-specific expression of the receptor: by + default, this the minimum logFC between the receiver of interest and + all the receiver of the other niche(s). The higher the min logFC, the + higher the niche-specificity of the receptor. Based on our experience, + we don’t suggest to give this as high importance as the ligand DE, but + this might depend on the specific case study. `prioritizing_weights` + argument: `"scaled_receptor_score"`. Recommended weight: 0.5 (at least + 0.5, and lower than `"scaled_ligand_score"`). + +- Scaled receptor expression: scaled expression of a receptor in one + receiver compared to the other cell types in the dataset. This might + be useful to rescue potentially interesting receptors that have a high + scaled expression value, but a relatively small min logFC compared to + the other niche. One reason why this logFC might be small occurs when + (some) genes are not picked up efficiently by the used sequencing + technology. `prioritizing_weights` argument: + `"scaled_receptor_expression_scaled"`. Recommended weight: 0.5. + +- Receptor expression fraction: Receptors that are expressed in a + smaller fraction of cells of a cell type than defined by + `exprs_cutoff`(default: 0.10) will get a lower ranking, proportional + to their fraction (eg receptor expressed in 9% of cells will be ranked + higher than receptor expressed in 0.5% of cells). We opted for this + weighting based on fraction, instead of removing receptors that are + not expressed in more cells than this cutoff, because some interesting + receptors could be removed that way. Fraction of expression is not + taken into account for the prioritization if it is already higher than + the cutoff. `prioritizing_weights` argument: `"receptor_fraction"`. + Recommended weight: 1. + +- Receptor expression strength: this factor let us give higher weights + to the most highly expressed receptor of a ligand in the receiver. + This let us rank higher one member of a receptor family if it higher + expressed than the other members. `prioritizing_weights` argument: + `"ligand_scaled_receptor_expression_fraction"`. Recommended value: 1 + (minimum: 0.5). + +- Receptor spatial DE score: spatial expression specificity of the + receptor. If the niche of interest is at a specific tissue location, + but the receiver cell type of that niche is also present in other + locations, it can be very informative to further prioritize receptors + of that receiver by looking how they are DE between the spatial + location of interest compared to the other locations. + `prioritizing_weights` argument: `"scaled_receptor_score_spatial"`. + Recommended weight: 1 (or 0 if not applicable). + +- Absolute ligand activity: to further prioritize ligand-receptor pairs + based on their predicted effect of the ligand-receptor interaction on + the gene expression in the receiver cell type - absolute ligand + activity accords to ‘absolute’ enrichment of target genes of a ligand + within the affected receiver genes. `prioritizing_weights` argument: + `"scaled_activity"`. Recommended weight: 0, unless absolute enrichment + of target genes is of specific interest. + +- Normalized ligand activity: to further prioritize ligand-receptor + pairs based on their predicted effect of the ligand-receptor + interaction on the gene expression in the receiver cell type - + normalization of activity is done because we found that some + datasets/conditions/niches have higher baseline activity values than + others - normalized ligand activity accords to ‘relative’ enrichment + of target genes of a ligand within the affected receiver genes. + `prioritizing_weights` argument: `"scaled_activity_normalized"`. + Recommended weight: at least 1. ``` r prioritizing_weights = c("scaled_ligand_score" = 5, @@ -677,8 +645,7 @@ prioritizing_weights = c("scaled_ligand_score" = 5, "ligand_scaled_receptor_expression_fraction" = 1, "scaled_receptor_score_spatial" = 0, "scaled_activity" = 0, - "scaled_activity_normalized" = 1, - "bona_fide" = 1) + "scaled_activity_normalized" = 1) ``` Note: these settings will give substantially more weight to DE @@ -692,80 +659,82 @@ output = list(DE_sender_receiver = DE_sender_receiver, ligand_scaled_receptor_ex prioritization_tables = get_prioritization_tables(output, prioritizing_weights) prioritization_tables$prioritization_tbl_ligand_receptor %>% filter(receiver == niches[[1]]$receiver) %>% head(10) -## # A tibble: 10 x 37 -## niche receiver sender ligand_receptor ligand receptor bonafide ligand_score ligand_signific~ ligand_present ligand_expressi~ -## -## 1 pEMT_High_niche Malignan~ T.cel~ PTPRC--MET PTPRC MET FALSE 3.22 1 1 9.32 -## 2 pEMT_High_niche Malignan~ T.cel~ PTPRC--EGFR PTPRC EGFR FALSE 3.22 1 1 9.32 -## 3 pEMT_High_niche Malignan~ T.cel~ PTPRC--CD44 PTPRC CD44 FALSE 3.22 1 1 9.32 -## 4 pEMT_High_niche Malignan~ T.cel~ PTPRC--ERBB2 PTPRC ERBB2 FALSE 3.22 1 1 9.32 -## 5 pEMT_High_niche Malignan~ T.cel~ PTPRC--IFNAR1 PTPRC IFNAR1 FALSE 3.22 1 1 9.32 -## 6 pEMT_High_niche Malignan~ T.cel~ TNF--TNFRSF21 TNF TNFRSF21 TRUE 1.74 1 1 2.34 -## 7 pEMT_High_niche Malignan~ Myelo~ SERPINA1--LRP1 SERPI~ LRP1 TRUE 2.52 1 1 4.83 -## 8 pEMT_High_niche Malignan~ Myelo~ IL1B--IL1RAP IL1B IL1RAP TRUE 1.50 1 1 1.93 -## 9 pEMT_High_niche Malignan~ Myelo~ IL1RN--IL1R2 IL1RN IL1R2 TRUE 1.62 1 1 2.07 -## 10 pEMT_High_niche Malignan~ T.cel~ PTPRC--INSR PTPRC INSR FALSE 3.22 1 1 9.32 -## # ... with 26 more variables: ligand_expression_scaled , ligand_fraction , ligand_score_spatial , receptor_score , -## # receptor_significant , receptor_present , receptor_expression , receptor_expression_scaled , -## # receptor_fraction , receptor_score_spatial , ligand_scaled_receptor_expression_fraction , +## # A tibble: 10 × 36 +## niche receiver sender ligand_receptor ligand receptor ligand_score ligand_signific… ligand_present +## +## 1 pEMT_High_niche Malignant_Hi… T.cel… PTPRC--DPP4 PTPRC DPP4 3.22 1 1 +## 2 pEMT_High_niche Malignant_Hi… T.cel… CD96--NECTIN1 CD96 NECTIN1 2.39 1 1 +## 3 pEMT_High_niche Malignant_Hi… Myelo… C1QA--CSPG4 C1QA CSPG4 2.52 1 1 +## 4 pEMT_High_niche Malignant_Hi… Myelo… SERPINA1--F12 SERPI… F12 2.52 1 1 +## 5 pEMT_High_niche Malignant_Hi… Myelo… C1QB--LRP1 C1QB LRP1 2.69 1 1 +## 6 pEMT_High_niche Malignant_Hi… Myelo… ITGB2--CD82 ITGB2 CD82 2.48 1 1 +## 7 pEMT_High_niche Malignant_Hi… Myelo… TYROBP--KLRD1 TYROBP KLRD1 2.77 1 1 +## 8 pEMT_High_niche Malignant_Hi… T.cel… TNF--TNFRSF21 TNF TNFRSF21 1.74 1 1 +## 9 pEMT_High_niche Malignant_Hi… Myelo… TYROBP--TREM1 TYROBP TREM1 2.77 1 1 +## 10 pEMT_High_niche Malignant_Hi… T.cel… SIRPG--CD47 SIRPG CD47 2.06 1 1 +## # … with 27 more variables: ligand_expression , ligand_expression_scaled , ligand_fraction , +## # ligand_score_spatial , receptor_score , receptor_significant , receptor_present , +## # receptor_expression , receptor_expression_scaled , receptor_fraction , +## # receptor_score_spatial , ligand_scaled_receptor_expression_fraction , ## # avg_score_ligand_receptor , activity , activity_normalized , scaled_ligand_score , ## # scaled_ligand_expression_scaled , scaled_receptor_score , scaled_receptor_expression_scaled , -## # scaled_avg_score_ligand_receptor , scaled_ligand_score_spatial , scaled_receptor_score_spatial , -## # scaled_ligand_fraction_adapted , scaled_receptor_fraction_adapted , scaled_activity , ... +## # scaled_avg_score_ligand_receptor , scaled_ligand_score_spatial , … prioritization_tables$prioritization_tbl_ligand_target %>% filter(receiver == niches[[1]]$receiver) %>% head(10) -## # A tibble: 10 x 20 -## niche receiver sender ligand_receptor ligand receptor bonafide target target_score target_signific~ target_present target_expressi~ -## -## 1 pEMT_H~ Malignan~ T.cell~ PTPRC--MET PTPRC MET FALSE EHF 1.04 1 1 1.88 -## 2 pEMT_H~ Malignan~ T.cell~ PTPRC--MET PTPRC MET FALSE GADD4~ 0.836 1 1 2.42 -## 3 pEMT_H~ Malignan~ T.cell~ PTPRC--MET PTPRC MET FALSE SERPI~ 0.889 1 1 1.79 -## 4 pEMT_H~ Malignan~ T.cell~ PTPRC--EGFR PTPRC EGFR FALSE EHF 1.04 1 1 1.88 -## 5 pEMT_H~ Malignan~ T.cell~ PTPRC--EGFR PTPRC EGFR FALSE GADD4~ 0.836 1 1 2.42 -## 6 pEMT_H~ Malignan~ T.cell~ PTPRC--EGFR PTPRC EGFR FALSE SERPI~ 0.889 1 1 1.79 -## 7 pEMT_H~ Malignan~ T.cell~ PTPRC--CD44 PTPRC CD44 FALSE EHF 1.04 1 1 1.88 -## 8 pEMT_H~ Malignan~ T.cell~ PTPRC--CD44 PTPRC CD44 FALSE GADD4~ 0.836 1 1 2.42 -## 9 pEMT_H~ Malignan~ T.cell~ PTPRC--CD44 PTPRC CD44 FALSE SERPI~ 0.889 1 1 1.79 -## 10 pEMT_H~ Malignan~ T.cell~ PTPRC--ERBB2 PTPRC ERBB2 FALSE EHF 1.04 1 1 1.88 -## # ... with 8 more variables: target_expression_scaled , target_fraction , ligand_target_weight , activity , -## # activity_normalized , scaled_activity , scaled_activity_normalized , prioritization_score +## # A tibble: 10 × 19 +## niche receiver sender ligand_receptor ligand receptor target target_score target_signific… target_present +## +## 1 pEMT_High_ni… Maligna… T.cel… PTPRC--DPP4 PTPRC DPP4 AIM2 0.815 1 1 +## 2 pEMT_High_ni… Maligna… T.cel… PTPRC--DPP4 PTPRC DPP4 APP 0.770 1 1 +## 3 pEMT_High_ni… Maligna… T.cel… PTPRC--DPP4 PTPRC DPP4 CXCL2 1.07 1 1 +## 4 pEMT_High_ni… Maligna… T.cel… PTPRC--DPP4 PTPRC DPP4 CXCL8 0.897 1 1 +## 5 pEMT_High_ni… Maligna… T.cel… PTPRC--DPP4 PTPRC DPP4 GADD4… 0.836 1 1 +## 6 pEMT_High_ni… Maligna… T.cel… PTPRC--DPP4 PTPRC DPP4 IGFBP3 1.06 1 1 +## 7 pEMT_High_ni… Maligna… T.cel… PTPRC--DPP4 PTPRC DPP4 ITGA5 1.05 1 1 +## 8 pEMT_High_ni… Maligna… T.cel… PTPRC--DPP4 PTPRC DPP4 MMP1 0.918 1 1 +## 9 pEMT_High_ni… Maligna… T.cel… PTPRC--DPP4 PTPRC DPP4 MMP13 0.855 1 1 +## 10 pEMT_High_ni… Maligna… T.cel… PTPRC--DPP4 PTPRC DPP4 NDRG1 0.906 1 1 +## # … with 9 more variables: target_expression , target_expression_scaled , target_fraction , +## # ligand_target_weight , activity , activity_normalized , scaled_activity , +## # scaled_activity_normalized , prioritization_score prioritization_tables$prioritization_tbl_ligand_receptor %>% filter(receiver == niches[[2]]$receiver) %>% head(10) -## # A tibble: 10 x 37 -## niche receiver sender ligand_receptor ligand receptor bonafide ligand_score ligand_signific~ ligand_present ligand_expressi~ -## -## 1 pEMT_Low_niche Malignan~ Endoth~ F8--LRP1 F8 LRP1 TRUE 0.952 1 1 2.17 -## 2 pEMT_Low_niche Malignan~ Endoth~ PLAT--LRP1 PLAT LRP1 TRUE 0.913 1 1 2.70 -## 3 pEMT_Low_niche Malignan~ CAF_Low FGF10--FGFR2 FGF10 FGFR2 TRUE 0.385 0.8 1 1.07 -## 4 pEMT_Low_niche Malignan~ CAF_Low NLGN2--NRXN3 NLGN2 NRXN3 TRUE 0.140 0.2 1 0.269 -## 5 pEMT_Low_niche Malignan~ CAF_Low RSPO3--LGR6 RSPO3 LGR6 TRUE 0.557 0.8 1 1.27 -## 6 pEMT_Low_niche Malignan~ CAF_Low COMP--SDC1 COMP SDC1 TRUE 0.290 0.8 1 1.27 -## 7 pEMT_Low_niche Malignan~ CAF_Low SEMA3C--NRP2 SEMA3C NRP2 TRUE 0.652 1 1 1.73 -## 8 pEMT_Low_niche Malignan~ CAF_Low SLIT2--SDC1 SLIT2 SDC1 TRUE 0.494 1 1 0.846 -## 9 pEMT_Low_niche Malignan~ Endoth~ IL33--IL1RAP IL33 IL1RAP FALSE 1.34 1 1 2.75 -## 10 pEMT_Low_niche Malignan~ CAF_Low C3--LRP1 C3 LRP1 TRUE 0.480 1 1 4.79 -## # ... with 26 more variables: ligand_expression_scaled , ligand_fraction , ligand_score_spatial , receptor_score , -## # receptor_significant , receptor_present , receptor_expression , receptor_expression_scaled , -## # receptor_fraction , receptor_score_spatial , ligand_scaled_receptor_expression_fraction , +## # A tibble: 10 × 36 +## niche receiver sender ligand_receptor ligand receptor ligand_score ligand_signific… ligand_present +## +## 1 pEMT_Low_niche Malignant_Low Endoth… IL33--IL1RAP IL33 IL1RAP 1.34 1 1 +## 2 pEMT_Low_niche Malignant_Low Endoth… F8--LDLR F8 LDLR 0.952 1 1 +## 3 pEMT_Low_niche Malignant_Low Endoth… POSTN--PTK7 POSTN PTK7 0.891 1 1 +## 4 pEMT_Low_niche Malignant_Low CAF_Low IGSF10--MILR1 IGSF10 MILR1 0.859 1 1 +## 5 pEMT_Low_niche Malignant_Low Endoth… PLAT--LRP1 PLAT LRP1 0.913 1 1 +## 6 pEMT_Low_niche Malignant_Low myofib… IGFBPL1--DCC IGFBP… DCC 0.789 0.4 1 +## 7 pEMT_Low_niche Malignant_Low Endoth… EPHA4--FGFR2 EPHA4 FGFR2 0.464 0.8 1 +## 8 pEMT_Low_niche Malignant_Low CAF_Low SLIT2--SDC1 SLIT2 SDC1 0.494 1 1 +## 9 pEMT_Low_niche Malignant_Low Endoth… F8--LRP4 F8 LRP4 0.952 1 1 +## 10 pEMT_Low_niche Malignant_Low CAF_Low FGF14--SCN9A FGF14 SCN9A 0.200 0.4 1 +## # … with 27 more variables: ligand_expression , ligand_expression_scaled , ligand_fraction , +## # ligand_score_spatial , receptor_score , receptor_significant , receptor_present , +## # receptor_expression , receptor_expression_scaled , receptor_fraction , +## # receptor_score_spatial , ligand_scaled_receptor_expression_fraction , ## # avg_score_ligand_receptor , activity , activity_normalized , scaled_ligand_score , ## # scaled_ligand_expression_scaled , scaled_receptor_score , scaled_receptor_expression_scaled , -## # scaled_avg_score_ligand_receptor , scaled_ligand_score_spatial , scaled_receptor_score_spatial , -## # scaled_ligand_fraction_adapted , scaled_receptor_fraction_adapted , scaled_activity , ... +## # scaled_avg_score_ligand_receptor , scaled_ligand_score_spatial , … prioritization_tables$prioritization_tbl_ligand_target %>% filter(receiver == niches[[2]]$receiver) %>% head(10) -## # A tibble: 10 x 20 -## niche receiver sender ligand_receptor ligand receptor bonafide target target_score target_signific~ target_present target_expressi~ -## -## 1 pEMT_L~ Malignan~ Endoth~ F8--LRP1 F8 LRP1 TRUE ETV4 0.771 1 1 1.00 -## 2 pEMT_L~ Malignan~ Endoth~ PLAT--LRP1 PLAT LRP1 TRUE CLDN7 0.835 1 1 2.30 -## 3 pEMT_L~ Malignan~ Endoth~ PLAT--LRP1 PLAT LRP1 TRUE ETV4 0.771 1 1 1.00 -## 4 pEMT_L~ Malignan~ CAF_Low FGF10--FGFR2 FGF10 FGFR2 TRUE ETV4 0.771 1 1 1.00 -## 5 pEMT_L~ Malignan~ CAF_Low FGF10--FGFR2 FGF10 FGFR2 TRUE WNT5A 1.40 1 1 2.01 -## 6 pEMT_L~ Malignan~ CAF_Low NLGN2--NRXN3 NLGN2 NRXN3 TRUE CLDN5 0.979 1 1 0.991 -## 7 pEMT_L~ Malignan~ CAF_Low NLGN2--NRXN3 NLGN2 NRXN3 TRUE ETV4 0.771 1 1 1.00 -## 8 pEMT_L~ Malignan~ CAF_Low RSPO3--LGR6 RSPO3 LGR6 TRUE DDC 0.832 1 1 0.785 -## 9 pEMT_L~ Malignan~ CAF_Low RSPO3--LGR6 RSPO3 LGR6 TRUE EGFL7 0.763 1 1 1.09 -## 10 pEMT_L~ Malignan~ CAF_Low COMP--SDC1 COMP SDC1 TRUE CLDN7 0.835 1 1 2.30 -## # ... with 8 more variables: target_expression_scaled , target_fraction , ligand_target_weight , activity , -## # activity_normalized , scaled_activity , scaled_activity_normalized , prioritization_score +## # A tibble: 10 × 19 +## niche receiver sender ligand_receptor ligand receptor target target_score target_signific… target_present +## +## 1 pEMT_Low_nic… Maligna… Endot… IL33--IL1RAP IL33 IL1RAP F2RL2 0.923 1 1 +## 2 pEMT_Low_nic… Maligna… Endot… IL33--IL1RAP IL33 IL1RAP MSC 1.04 1 1 +## 3 pEMT_Low_nic… Maligna… Endot… IL33--IL1RAP IL33 IL1RAP SHISA2 0.805 1 1 +## 4 pEMT_Low_nic… Maligna… Endot… IL33--IL1RAP IL33 IL1RAP SLC7A… 1.05 1 1 +## 5 pEMT_Low_nic… Maligna… Endot… F8--LDLR F8 LDLR NA NA NA +## 6 pEMT_Low_nic… Maligna… Endot… F8--LDLR F8 LDLR NA NA NA +## 7 pEMT_Low_nic… Maligna… Endot… F8--LDLR F8 LDLR NA NA NA +## 8 pEMT_Low_nic… Maligna… Endot… F8--LDLR F8 LDLR NA NA NA +## 9 pEMT_Low_nic… Maligna… Endot… F8--LDLR F8 LDLR NA NA NA +## 10 pEMT_Low_nic… Maligna… Endot… F8--LDLR F8 LDLR NA NA NA +## # … with 9 more variables: target_expression , target_expression_scaled , target_fraction , +## # ligand_target_weight , activity , activity_normalized , scaled_activity , +## # scaled_activity_normalized , prioritization_score ``` # 8. Visualization of the Differential NicheNet output @@ -803,7 +772,7 @@ lfc_plot = make_ligand_receptor_lfc_plot(receiver_oi, prioritized_tbl_oi, priori lfc_plot ``` -![](differential_nichenet_pEMT_files/figure-gfm/unnamed-chunk-29-1.png) +![](differential_nichenet_pEMT_files/figure-gfm/unnamed-chunk-28-1.png) Show the spatialDE as additional information @@ -812,7 +781,7 @@ lfc_plot_spatial = make_ligand_receptor_lfc_spatial_plot(receiver_oi, prioritize lfc_plot_spatial ``` -![](differential_nichenet_pEMT_files/figure-gfm/unnamed-chunk-30-1.png) +![](differential_nichenet_pEMT_files/figure-gfm/unnamed-chunk-29-1.png) ## Ligand expression, activity and target genes @@ -825,7 +794,7 @@ exprs_activity_target_plot = make_ligand_activity_target_exprs_plot(receiver_oi, exprs_activity_target_plot$combined_plot ``` -![](differential_nichenet_pEMT_files/figure-gfm/unnamed-chunk-31-1.png) +![](differential_nichenet_pEMT_files/figure-gfm/unnamed-chunk-30-1.png) Based on this plot, we can infer many hypotheses such as the following: “Interestingly, IL1 family ligands seem to have activity in inducing the DE genes between high pEMT and low pEMT malignant cells; and they are @@ -850,7 +819,7 @@ exprs_activity_target_plot = make_ligand_activity_target_exprs_plot(receiver_oi, exprs_activity_target_plot$combined_plot ``` -![](differential_nichenet_pEMT_files/figure-gfm/unnamed-chunk-32-1.png) +![](differential_nichenet_pEMT_files/figure-gfm/unnamed-chunk-31-1.png) ## Circos plot of prioritized ligand-receptor pairs @@ -868,7 +837,7 @@ colors_receiver = c("lavender") %>% magrittr::set_names(prioritized_tbl_oi$rece circos_output = make_circos_lr(prioritized_tbl_oi, colors_sender, colors_receiver) ``` -![](differential_nichenet_pEMT_files/figure-gfm/unnamed-chunk-33-1.png)![](differential_nichenet_pEMT_files/figure-gfm/unnamed-chunk-33-2.png) +![](differential_nichenet_pEMT_files/figure-gfm/unnamed-chunk-32-1.png)![](differential_nichenet_pEMT_files/figure-gfm/unnamed-chunk-32-2.png) ## Interpretation of these results @@ -905,7 +874,7 @@ lfc_plot = make_ligand_receptor_lfc_plot(receiver_oi, prioritized_tbl_oi, priori lfc_plot ``` -![](differential_nichenet_pEMT_files/figure-gfm/unnamed-chunk-34-1.png) +![](differential_nichenet_pEMT_files/figure-gfm/unnamed-chunk-33-1.png) # Notes, limitations, and comparison to default NicheNet. @@ -948,7 +917,7 @@ consider them to be very good candidates in regulating the process of interest, and we recommend testing these candidates for further experimental validation. -# References +### References Browaeys, R., Saelens, W. & Saeys, Y. NicheNet: modeling intercellular communication by linking ligands to target genes. Nat Methods (2019) diff --git a/vignettes/differential_nichenet_pEMT_files/figure-gfm/unnamed-chunk-2-1.png b/vignettes/differential_nichenet_pEMT_files/figure-gfm/unnamed-chunk-2-1.png index ec59d4b..7824779 100644 Binary files a/vignettes/differential_nichenet_pEMT_files/figure-gfm/unnamed-chunk-2-1.png and b/vignettes/differential_nichenet_pEMT_files/figure-gfm/unnamed-chunk-2-1.png differ diff --git a/vignettes/differential_nichenet_pEMT_files/figure-gfm/unnamed-chunk-2-2.png b/vignettes/differential_nichenet_pEMT_files/figure-gfm/unnamed-chunk-2-2.png index b6e29cd..89d66af 100644 Binary files a/vignettes/differential_nichenet_pEMT_files/figure-gfm/unnamed-chunk-2-2.png and b/vignettes/differential_nichenet_pEMT_files/figure-gfm/unnamed-chunk-2-2.png differ diff --git a/vignettes/differential_nichenet_pEMT_files/figure-gfm/unnamed-chunk-27-1.png b/vignettes/differential_nichenet_pEMT_files/figure-gfm/unnamed-chunk-27-1.png deleted file mode 100644 index 801d51d..0000000 Binary files a/vignettes/differential_nichenet_pEMT_files/figure-gfm/unnamed-chunk-27-1.png and /dev/null differ diff --git a/vignettes/differential_nichenet_pEMT_files/figure-gfm/unnamed-chunk-28-1.png b/vignettes/differential_nichenet_pEMT_files/figure-gfm/unnamed-chunk-28-1.png index 346dcfb..699efd4 100644 Binary files a/vignettes/differential_nichenet_pEMT_files/figure-gfm/unnamed-chunk-28-1.png and b/vignettes/differential_nichenet_pEMT_files/figure-gfm/unnamed-chunk-28-1.png differ diff --git a/vignettes/differential_nichenet_pEMT_files/figure-gfm/unnamed-chunk-29-1.png b/vignettes/differential_nichenet_pEMT_files/figure-gfm/unnamed-chunk-29-1.png index 6d68598..931f41e 100644 Binary files a/vignettes/differential_nichenet_pEMT_files/figure-gfm/unnamed-chunk-29-1.png and b/vignettes/differential_nichenet_pEMT_files/figure-gfm/unnamed-chunk-29-1.png differ diff --git a/vignettes/differential_nichenet_pEMT_files/figure-gfm/unnamed-chunk-30-1.png b/vignettes/differential_nichenet_pEMT_files/figure-gfm/unnamed-chunk-30-1.png index 2ff60f2..be3f69e 100644 Binary files a/vignettes/differential_nichenet_pEMT_files/figure-gfm/unnamed-chunk-30-1.png and b/vignettes/differential_nichenet_pEMT_files/figure-gfm/unnamed-chunk-30-1.png differ diff --git a/vignettes/differential_nichenet_pEMT_files/figure-gfm/unnamed-chunk-30-2.png b/vignettes/differential_nichenet_pEMT_files/figure-gfm/unnamed-chunk-30-2.png deleted file mode 100644 index 019d4b0..0000000 Binary files a/vignettes/differential_nichenet_pEMT_files/figure-gfm/unnamed-chunk-30-2.png and /dev/null differ diff --git a/vignettes/differential_nichenet_pEMT_files/figure-gfm/unnamed-chunk-31-1.png b/vignettes/differential_nichenet_pEMT_files/figure-gfm/unnamed-chunk-31-1.png index e799e7c..f4b3e9f 100644 Binary files a/vignettes/differential_nichenet_pEMT_files/figure-gfm/unnamed-chunk-31-1.png and b/vignettes/differential_nichenet_pEMT_files/figure-gfm/unnamed-chunk-31-1.png differ diff --git a/vignettes/differential_nichenet_pEMT_files/figure-gfm/unnamed-chunk-32-1.png b/vignettes/differential_nichenet_pEMT_files/figure-gfm/unnamed-chunk-32-1.png index 76144c8..a60a357 100644 Binary files a/vignettes/differential_nichenet_pEMT_files/figure-gfm/unnamed-chunk-32-1.png and b/vignettes/differential_nichenet_pEMT_files/figure-gfm/unnamed-chunk-32-1.png differ diff --git a/vignettes/differential_nichenet_pEMT_files/figure-gfm/unnamed-chunk-32-2.png b/vignettes/differential_nichenet_pEMT_files/figure-gfm/unnamed-chunk-32-2.png new file mode 100644 index 0000000..fe6402e Binary files /dev/null and b/vignettes/differential_nichenet_pEMT_files/figure-gfm/unnamed-chunk-32-2.png differ diff --git a/vignettes/differential_nichenet_pEMT_files/figure-gfm/unnamed-chunk-33-1.png b/vignettes/differential_nichenet_pEMT_files/figure-gfm/unnamed-chunk-33-1.png index 794f677..2e2ce5b 100644 Binary files a/vignettes/differential_nichenet_pEMT_files/figure-gfm/unnamed-chunk-33-1.png and b/vignettes/differential_nichenet_pEMT_files/figure-gfm/unnamed-chunk-33-1.png differ diff --git a/vignettes/differential_nichenet_pEMT_files/figure-gfm/unnamed-chunk-33-2.png b/vignettes/differential_nichenet_pEMT_files/figure-gfm/unnamed-chunk-33-2.png deleted file mode 100644 index 32fa2b9..0000000 Binary files a/vignettes/differential_nichenet_pEMT_files/figure-gfm/unnamed-chunk-33-2.png and /dev/null differ diff --git a/vignettes/differential_nichenet_pEMT_files/figure-gfm/unnamed-chunk-34-1.png b/vignettes/differential_nichenet_pEMT_files/figure-gfm/unnamed-chunk-34-1.png deleted file mode 100644 index 1ffa58e..0000000 Binary files a/vignettes/differential_nichenet_pEMT_files/figure-gfm/unnamed-chunk-34-1.png and /dev/null differ diff --git a/vignettes/differential_nichenet_pEMT_files/figure-gfm/unnamed-chunk-4-1.png b/vignettes/differential_nichenet_pEMT_files/figure-gfm/unnamed-chunk-4-1.png index 8bcbc91..46ed85d 100644 Binary files a/vignettes/differential_nichenet_pEMT_files/figure-gfm/unnamed-chunk-4-1.png and b/vignettes/differential_nichenet_pEMT_files/figure-gfm/unnamed-chunk-4-1.png differ diff --git a/vignettes/evaluation_datasets.xlsx b/vignettes/evaluation_datasets.xlsx old mode 100644 new mode 100755 index b3ebbff..b73d6ca Binary files a/vignettes/evaluation_datasets.xlsx and b/vignettes/evaluation_datasets.xlsx differ diff --git a/vignettes/faq.Rmd b/vignettes/faq.Rmd index 6920d5e..8bde538 100644 --- a/vignettes/faq.Rmd +++ b/vignettes/faq.Rmd @@ -179,4 +179,4 @@ I recommend using all cells of the receiver cell type of interest as receiver (s ## My question is not in these list? What should I do now? -First, you can check the open and closed issues (https://github.com/saeyslab/nichenetr/issues) of this package on github to see whether your question might be addressed in one of these. If not, don’t hesitate to open a new issue. If you would prefer to keep the discussion private, you can send an email to the corresponding author of the NicheNet paper (yvan.saeys[at]ugent.be), but I prefer that you open an issue so other users can learn from it as well! +First, you can check the open and closed issues (https://github.com/saeyslab/nichenetr/issues) of this package on github to see whether your question might be addressed in one of these. If not, don’t hesitate to open a new issue. If you would prefer to keep the discussion private, you can send an email to info[at]nichenet.be, but we prefer that you open an issue so other users can learn from it as well! diff --git a/vignettes/faq.md b/vignettes/faq.md index 271f74a..348dd61 100644 --- a/vignettes/faq.md +++ b/vignettes/faq.md @@ -582,6 +582,5 @@ First, you can check the open and closed issues () of this package on github to see whether your question might be addressed in one of these. If not, don’t hesitate to open a new issue. If you would prefer to keep -the discussion private, you can send an email to the corresponding -author of the NicheNet paper (yvan.saeys\[at\]ugent.be), but I prefer +the discussion private, you can send an email to info\[at\]nichenet.be, but we prefer that you open an issue so other users can learn from it as well! diff --git a/vignettes/ligand_activity_geneset.Rmd b/vignettes/ligand_activity_geneset.Rmd index e9c0015..e9708f3 100644 --- a/vignettes/ligand_activity_geneset.Rmd +++ b/vignettes/ligand_activity_geneset.Rmd @@ -37,7 +37,7 @@ The pipeline of a basic NicheNet analysis consist mainly of the following steps: This vignette guides you in detail through all these steps. As example expression data of interacting cells, we will use data from Puram et al. to explore intercellular communication in the tumor microenvironment in head and neck squamous cell carcinoma (HNSCC) [See @puram_single-cell_2017]. More specifically, we will look at which ligands expressed by cancer-associated fibroblasts (CAFs) can induce a specific gene program in neighboring malignant cells. This program, a partial epithelial-mesenschymal transition (p-EMT) program, could be linked to metastasis by Puram et al. -The used ligand-target matrix and example expression data of interacting cells can be downloaded from Zenodo. [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3260758.svg)](https://doi.org/10.5281/zenodo.3260758) +The used [ligand-target matrix](https://doi.org/10.5281/zenodo.7074290) and example [expression data](https://doi.org/10.5281/zenodo.3260758) of interacting cells can be downloaded from Zenodo. ## Step 0: Load required packages, NicheNet's ligand-target prior model and processed expression data of interacting cells @@ -51,12 +51,26 @@ library(tidyverse) Ligand-target model: This model denotes the prior potential that a particular ligand might regulate the expression of a specific target gene. +In Nichenet v2, networks and matrices for both mouse and human are made separately [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.7074291.svg)](https://doi.org/10.5281/zenodo.7074291). ```{r} -ligand_target_matrix = readRDS(url("https://zenodo.org/record/3260758/files/ligand_target_matrix.rds")) +options(timeout = 600) +organism = "human" + +if(organism == "human"){ + lr_network = readRDS(url("https://zenodo.org/record/7074291/files/lr_network_human_21122021.rds")) + ligand_target_matrix = readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final.rds")) +} else if(organism == "mouse"){ + lr_network = readRDS(url("https://zenodo.org/record/7074291/files/lr_network_mouse_21122021.rds")) + ligand_target_matrix = readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final_mouse.rds")) + +} + +lr_network = lr_network %>% distinct(from, to) ligand_target_matrix[1:5,1:5] # target genes in rows, ligands in columns ``` + Expression data of interacting cells: publicly available single-cell data from CAF and malignant cells from HNSCC tumors: ```{r} @@ -65,6 +79,13 @@ expression = hnscc_expression$expression sample_info = hnscc_expression$sample_info # contains meta-information about the cells ``` +Because the NicheNet 2.0. networks are in the most recent version of the official gene symbols, we will make sure that the gene symbols used in the expression data are also updated (= converted from their “aliases” to official gene symbols). + +```{r} +# If this is not done, there will be 35 genes fewer in lr_network_expressed! +colnames(expression) = convert_alias_to_symbols(colnames(expression), "human", verbose = FALSE) +``` + ## Step 1: Define expressed genes in sender and receiver cell populations Our research question is to prioritize which ligands expressed by CAFs can induce p-EMT in neighboring malignant cells. Therefore, CAFs are the sender cells in this example and malignant cells are the receiver cells. This is an example of paracrine signaling. Note that autocrine signaling can be considered if sender and receiver cell type are the same. @@ -106,8 +127,6 @@ head(background_expressed_genes) As potentially active ligands, we will use ligands that are 1) expressed by CAFs and 2) can bind a (putative) receptor expressed by malignant cells. Putative ligand-receptor links were gathered from NicheNet's ligand-receptor data sources. ```{r} -lr_network = readRDS(url("https://zenodo.org/record/3260758/files/lr_network.rds")) - # If wanted, users can remove ligand-receptor interactions that were predicted based on protein-protein interactions and only keep ligand-receptor interactions that are described in curated databases. To do this: uncomment following line of code: # lr_network = lr_network %>% filter(database != "ppi_prediction_go" & database != "ppi_prediction") @@ -137,24 +156,24 @@ Now perform the ligand activity analysis: in this analysis, we will calculate th ligand_activities = predict_ligand_activities(geneset = geneset_oi, background_expressed_genes = background_expressed_genes, ligand_target_matrix = ligand_target_matrix, potential_ligands = potential_ligands) ``` -Now, we want to rank the ligands based on their ligand activity. In our validation study, we showed that the pearson correlation coefficient (PCC) between a ligand's target predictions and the observed transcriptional response was the most informative measure to define ligand activity. Therefore, we will rank the ligands based on their pearson correlation coefficient. This allows us to prioritize p-EMT-regulating ligands. +Now, we want to rank the ligands based on their ligand activity. In our validation study, we showed that the area under the precision-recall curve (AUPR) between a ligand's target predictions and the observed transcriptional response was the most informative measure to define ligand activity (this was the Pearson correlation for v1). Therefore, we will rank the ligands based on their AUPR. This allows us to prioritize p-EMT-regulating ligands. ```{r} -ligand_activities %>% arrange(-pearson) -best_upstream_ligands = ligand_activities %>% top_n(20, pearson) %>% arrange(-pearson) %>% pull(test_ligand) +ligand_activities %>% arrange(-aupr_corrected) +best_upstream_ligands = ligand_activities %>% top_n(30, aupr_corrected) %>% arrange(-aupr_corrected) %>% pull(test_ligand) head(best_upstream_ligands) ``` -We see here that the performance metrics indicate that the 20 top-ranked ligands can predict the p-EMT genes reasonably, this implies that ranking of the ligands might be accurate as shown in our study. However, it is possible that for some gene sets, the target gene prediction performance of the top-ranked ligands would not be much better than random prediction. In that case, prioritization of ligands will be less trustworthy. +We see here that the performance metrics indicate that the 30 top-ranked ligands can predict the p-EMT genes reasonably, this implies that ranking of the ligands might be accurate as shown in our study. However, it is possible that for some gene sets, the target gene prediction performance of the top-ranked ligands would not be much better than random prediction. In that case, prioritization of ligands will be less trustworthy. -Additional note: we looked at the top 20 ligands here and will continue the analysis by inferring p-EMT target genes of these 20 ligands. However, the choice of looking only at the 20 top-ranked ligands for further biological interpretation is based on biological intuition and is quite arbitrary. Therefore, users can decide to continue the analysis with a different number of ligands. We recommend to check the selected cutoff by looking at the distribution of the ligand activity values. Here, we show the ligand activity histogram (the score for the 20th ligand is indicated via the dashed line). +Additional note: we looked at the top 30 ligands here and will continue the analysis by inferring p-EMT target genes of these 30 ligands. However, the choice of looking only at the 30 top-ranked ligands for further biological interpretation is based on biological intuition and is quite arbitrary. Therefore, users can decide to continue the analysis with a different number of ligands. We recommend to check the selected cutoff by looking at the distribution of the ligand activity values. Here, we show the ligand activity histogram (the score for the 30th ligand is indicated via the dashed line). ```{r} # show histogram of ligand activity scores -p_hist_lig_activity = ggplot(ligand_activities, aes(x=pearson)) + +p_hist_lig_activity = ggplot(ligand_activities, aes(x=aupr_corrected)) + geom_histogram(color="black", fill="darkorange") + # geom_density(alpha=.1, fill="orange") + - geom_vline(aes(xintercept=min(ligand_activities %>% top_n(20, pearson) %>% pull(pearson))), color="red", linetype="dashed", size=1) + + geom_vline(aes(xintercept=min(ligand_activities %>% top_n(30, aupr_corrected) %>% pull(aupr_corrected))), color="red", linetype="dashed", size=1) + labs(x="ligand activity (PCC)", y = "# ligands") + theme_classic() p_hist_lig_activity @@ -172,7 +191,7 @@ nrow(active_ligand_target_links_df) head(active_ligand_target_links_df) ``` -For visualization purposes, we adapted the ligand-target regulatory potential matrix as follows. Regulatory potential scores were set as 0 if their score was below a predefined threshold, which was here the 0.25 quantile of scores of interactions between the 20 top-ranked ligands and each of their respective top targets (see the ligand-target network defined in the data frame). +For visualization purposes, we adapted the ligand-target regulatory potential matrix as follows. Regulatory potential scores were set as 0 if their score was below a predefined threshold, which was here the 0.25 quantile of scores of interactions between the 30 top-ranked ligands and each of their respective top targets (see the ligand-target network defined in the data frame). ```{r} active_ligand_target_links = prepare_ligand_target_visualization(ligand_target_df = active_ligand_target_links_df, ligand_target_matrix = ligand_target_matrix, cutoff = 0.25) @@ -213,7 +232,7 @@ lr_network_top = lr_network %>% filter(from %in% best_upstream_ligands & to %in% best_upstream_receptors = lr_network_top %>% pull(to) %>% unique() # get the weights of the ligand-receptor interactions as used in the NicheNet model -weighted_networks = readRDS(url("https://zenodo.org/record/3260758/files/weighted_networks.rds")) +weighted_networks = readRDS(url("https://zenodo.org/record/7074291/files/weighted_networks_nsga2r_final.rds")) lr_network_top_df = weighted_networks$lr_sig %>% filter(from %in% best_upstream_ligands & to %in% best_upstream_receptors) # convert to a matrix @@ -254,14 +273,14 @@ library(ggpubr) #### Prepare the ligand activity matrix ```{r} -ligand_pearson_matrix = ligand_activities %>% select(pearson) %>% as.matrix() %>% magrittr::set_rownames(ligand_activities$test_ligand) +ligand_aupr_matrix = ligand_activities %>% select(aupr_corrected) %>% as.matrix() %>% magrittr::set_rownames(ligand_activities$test_ligand) -vis_ligand_pearson = ligand_pearson_matrix[order_ligands, ] %>% as.matrix(ncol = 1) %>% magrittr::set_colnames("Pearson") +vis_ligand_aupr = ligand_aupr_matrix[order_ligands, ] %>% as.matrix(ncol = 1) %>% magrittr::set_colnames("AUPR") ``` ```{r, fig.width=5, fig.height=6} -p_ligand_pearson = vis_ligand_pearson %>% make_heatmap_ggplot("Prioritized CAF-ligands","Ligand activity", color = "darkorange",legend_position = "top", x_axis_position = "top", legend_title = "Pearson correlation coefficient\ntarget gene prediction ability)") -p_ligand_pearson +p_ligand_aupr = vis_ligand_aupr %>% make_heatmap_ggplot("Prioritized CAF-ligands","Ligand activity", color = "darkorange",legend_position = "top", x_axis_position = "top", legend_title = "AUPR\n(target gene prediction ability)") +p_ligand_aupr ``` #### Prepare expression of ligands in fibroblast per tumor @@ -312,7 +331,7 @@ p_target_tumor_scaled_expression ```{r, fig.width=13, fig.height=7} figures_without_legend = plot_grid( - p_ligand_pearson + theme(legend.position = "none", axis.ticks = element_blank()) + theme(axis.title.x = element_text()), + p_ligand_aupr + theme(legend.position = "none", axis.ticks = element_blank()) + theme(axis.title.x = element_text()), p_ligand_tumor_expression + theme(legend.position = "none", axis.ticks = element_blank()) + theme(axis.title.x = element_text()) + ylab(""), p_ligand_target_network + theme(legend.position = "none", axis.ticks = element_blank()) + ylab(""), NULL, @@ -320,11 +339,11 @@ figures_without_legend = plot_grid( p_target_tumor_scaled_expression + theme(legend.position = "none", axis.ticks = element_blank()) + xlab(""), align = "hv", nrow = 2, - rel_widths = c(ncol(vis_ligand_pearson)+ 4.5, ncol(vis_ligand_tumor_expression), ncol(vis_ligand_target)) -2, - rel_heights = c(nrow(vis_ligand_pearson), nrow(vis_target_tumor_expression_scaled) + 3)) + rel_widths = c(ncol(vis_ligand_aupr)+ 4.5, ncol(vis_ligand_tumor_expression), ncol(vis_ligand_target)) -2, + rel_heights = c(nrow(vis_ligand_aupr), nrow(vis_target_tumor_expression_scaled) + 3)) legends = plot_grid( - as_ggplot(get_legend(p_ligand_pearson)), + as_ggplot(get_legend(p_ligand_aupr)), as_ggplot(get_legend(p_ligand_tumor_expression)), as_ggplot(get_legend(p_ligand_target_network)), as_ggplot(get_legend(p_target_tumor_scaled_expression)), diff --git a/vignettes/ligand_activity_geneset.md b/vignettes/ligand_activity_geneset.md index ed6745e..ef52c4f 100644 --- a/vignettes/ligand_activity_geneset.md +++ b/vignettes/ligand_activity_geneset.md @@ -26,40 +26,39 @@ cells. The pipeline of a basic NicheNet analysis consist mainly of the following steps: - - 1. Define a “sender/niche” cell population and a “receiver/target” - cell population present in your expression data and determine - which genes are expressed in both populations +- 1. Define a “sender/niche” cell population and a “receiver/target” + cell population present in your expression data and determine + which genes are expressed in both populations - - 2. Define a gene set of interest: these are the genes in the - “receiver/target” cell population that are potentially - affected by ligands expressed by interacting cells (e.g. genes - differentially expressed upon cell-cell interaction) +- 2. Define a gene set of interest: these are the genes in the + “receiver/target” cell population that are potentially affected by + ligands expressed by interacting cells (e.g. genes differentially + expressed upon cell-cell interaction) - - 3. Define a set of potential ligands: these are ligands that are - expressed by the “sender/niche” cell population and bind a - (putative) receptor expressed by the “receiver/target” - population +- 3. Define a set of potential ligands: these are ligands that are + expressed by the “sender/niche” cell population and bind a + (putative) receptor expressed by the “receiver/target” population - - 4) Perform NicheNet ligand activity analysis: rank the potential - ligands based on the presence of their target genes in the gene - set of interest (compared to the background set of genes) +- 4) Perform NicheNet ligand activity analysis: rank the potential + ligands based on the presence of their target genes in the gene + set of interest (compared to the background set of genes) - - 5) Infer top-predicted target genes of ligands that are top-ranked - in the ligand activity analysis +- 5) Infer top-predicted target genes of ligands that are top-ranked in + the ligand activity analysis This vignette guides you in detail through all these steps. As example expression data of interacting cells, we will use data from Puram et -al. to explore intercellular communication in the tumor -microenvironment in head and neck squamous cell carcinoma (HNSCC) (See -Puram et al. 2017). More specifically, we will look at which ligands -expressed by cancer-associated fibroblasts (CAFs) can induce a specific -gene program in neighboring malignant cells. This program, a partial +al. to explore intercellular communication in the tumor microenvironment +in head and neck squamous cell carcinoma (HNSCC) (See Puram et al. +2017). More specifically, we will look at which ligands expressed by +cancer-associated fibroblasts (CAFs) can induce a specific gene program +in neighboring malignant cells. This program, a partial epithelial-mesenschymal transition (p-EMT) program, could be linked to metastasis by Puram et al.  -The used ligand-target matrix and example expression data of interacting -cells can be downloaded from Zenodo. -[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3260758.svg)](https://doi.org/10.5281/zenodo.3260758) +The used [ligand-target matrix](https://doi.org/10.5281/zenodo.7074290) +and example [expression data](https://doi.org/10.5281/zenodo.3260758) of +interacting cells can be downloaded from Zenodo. ## Step 0: Load required packages, NicheNet’s ligand-target prior model and processed expression data of interacting cells @@ -73,17 +72,31 @@ library(tidyverse) Ligand-target model: This model denotes the prior potential that a particular ligand might -regulate the expression of a specific target gene. +regulate the expression of a specific target gene. In Nichenet v2, +networks and matrices for both mouse and human are made separately +[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.7074291.svg)](https://doi.org/10.5281/zenodo.7074291). ``` r -ligand_target_matrix = readRDS(url("https://zenodo.org/record/3260758/files/ligand_target_matrix.rds")) +options(timeout = 600) +organism = "human" + +if(organism == "human"){ + lr_network = readRDS(url("https://zenodo.org/record/7074291/files/lr_network_human_21122021.rds")) + ligand_target_matrix = readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final.rds")) +} else if(organism == "mouse"){ + lr_network = readRDS(url("https://zenodo.org/record/7074291/files/lr_network_mouse_21122021.rds")) + ligand_target_matrix = readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final_mouse.rds")) + +} + +lr_network = lr_network %>% distinct(from, to) ligand_target_matrix[1:5,1:5] # target genes in rows, ligands in columns -## CXCL1 CXCL2 CXCL3 CXCL5 PPBP -## A1BG 3.534343e-04 4.041324e-04 3.729920e-04 3.080640e-04 2.628388e-04 -## A1BG-AS1 1.650894e-04 1.509213e-04 1.583594e-04 1.317253e-04 1.231819e-04 -## A1CF 5.787175e-04 4.596295e-04 3.895907e-04 3.293275e-04 3.211944e-04 -## A2M 6.027058e-04 5.996617e-04 5.164365e-04 4.517236e-04 4.590521e-04 -## A2M-AS1 8.898724e-05 8.243341e-05 7.484018e-05 4.912514e-05 5.120439e-05 +## A2M AANAT ABCA1 ACE ACE2 +## A-GAMMA3'E 0.0000000000 0.0000000000 0.0000000000 0.0000000000 0.000000000 +## A1BG 0.0018503922 0.0011108718 0.0014225077 0.0028594037 0.001139013 +## A1BG-AS1 0.0007400797 0.0004677614 0.0005193137 0.0007836698 0.000375007 +## A1CF 0.0024799266 0.0013026348 0.0020420890 0.0047921048 0.003273375 +## A2M 0.0084693452 0.0040689323 0.0064256379 0.0105191365 0.005719199 ``` Expression data of interacting cells: publicly available single-cell @@ -95,6 +108,16 @@ expression = hnscc_expression$expression sample_info = hnscc_expression$sample_info # contains meta-information about the cells ``` +Because the NicheNet 2.0. networks are in the most recent version of the +official gene symbols, we will make sure that the gene symbols used in +the expression data are also updated (= converted from their “aliases” +to official gene symbols). + +``` r +# If this is not done, there will be 35 genes fewer in lr_network_expressed! +colnames(expression) = convert_alias_to_symbols(colnames(expression), "human", verbose = FALSE) +``` + ## Step 1: Define expressed genes in sender and receiver cell populations Our research question is to prioritize which ligands expressed by CAFs @@ -119,8 +142,7 @@ consider genes to be expressed in a cell type when they have non-zero values in at least 10% of the cells from that cell type. This is described as well in the other vignette [Perform NicheNet analysis starting from a Seurat object: step-by-step -analysis](seurat_steps.md):`vignette("seurat_steps", -package="nichenetr")`. +analysis](seurat_steps.md):`vignette("seurat_steps", package="nichenetr")`. ``` r tumors_remove = c("HN10","HN","HN12", "HN13", "HN24", "HN7", "HN8","HN23") @@ -168,8 +190,6 @@ cells. Putative ligand-receptor links were gathered from NicheNet’s ligand-receptor data sources. ``` r -lr_network = readRDS(url("https://zenodo.org/record/3260758/files/lr_network.rds")) - # If wanted, users can remove ligand-receptor interactions that were predicted based on protein-protein interactions and only keep ligand-receptor interactions that are described in curated databases. To do this: uncomment following line of code: # lr_network = lr_network %>% filter(database != "ppi_prediction_go" & database != "ppi_prediction") @@ -181,15 +201,15 @@ expressed_receptors = intersect(receptors,expressed_genes_receiver) lr_network_expressed = lr_network %>% filter(from %in% expressed_ligands & to %in% expressed_receptors) head(lr_network_expressed) -## # A tibble: 6 x 4 -## from to source database -## -## 1 HGF MET kegg_cytokines kegg -## 2 TNFSF10 TNFRSF10A kegg_cytokines kegg -## 3 TNFSF10 TNFRSF10B kegg_cytokines kegg -## 4 TGFB2 TGFBR1 kegg_cytokines kegg -## 5 TGFB3 TGFBR1 kegg_cytokines kegg -## 6 INHBA ACVR2A kegg_cytokines kegg +## # A tibble: 6 × 2 +## from to +## +## 1 A2M MMP2 +## 2 A2M MMP9 +## 3 ADAM10 APP +## 4 ADAM10 CD44 +## 5 ADAM10 TSPAN5 +## 6 ADAM10 TSPAN15 ``` This ligand-receptor network contains the expressed ligand-receptor @@ -199,7 +219,7 @@ we will consider the ligands from this network. ``` r potential_ligands = lr_network_expressed %>% pull(from) %>% unique() head(potential_ligands) -## [1] "HGF" "TNFSF10" "TGFB2" "TGFB3" "INHBA" "CD99" +## [1] "A2M" "ADAM10" "ADAM12" "ADAM15" "ADAM17" "ADAM9" ``` ## Step 4: Perform NicheNet’s ligand activity analysis on the gene set of interest @@ -215,35 +235,35 @@ ligand_activities = predict_ligand_activities(geneset = geneset_oi, background_e ``` Now, we want to rank the ligands based on their ligand activity. In our -validation study, we showed that the pearson correlation coefficient -(PCC) between a ligand’s target predictions and the observed +validation study, we showed that the area under the precision-recall +curve (AUPR) between a ligand’s target predictions and the observed transcriptional response was the most informative measure to define -ligand activity. Therefore, we will rank the ligands based on their -pearson correlation coefficient. This allows us to prioritize +ligand activity (this was the Pearson correlation for v1). Therefore, we +will rank the ligands based on their AUPR. This allows us to prioritize p-EMT-regulating ligands. ``` r -ligand_activities %>% arrange(-pearson) -## # A tibble: 131 x 4 -## test_ligand auroc aupr pearson -## -## 1 PTHLH 0.667 0.0720 0.128 -## 2 CXCL12 0.680 0.0507 0.123 -## 3 AGT 0.676 0.0581 0.120 -## 4 TGFB3 0.689 0.0454 0.117 -## 5 IL6 0.693 0.0510 0.115 -## 6 INHBA 0.695 0.0502 0.113 -## 7 ADAM17 0.672 0.0526 0.113 -## 8 TNC 0.700 0.0444 0.109 -## 9 CTGF 0.680 0.0473 0.108 -## 10 FN1 0.679 0.0505 0.108 -## # ... with 121 more rows -best_upstream_ligands = ligand_activities %>% top_n(20, pearson) %>% arrange(-pearson) %>% pull(test_ligand) +ligand_activities %>% arrange(-aupr_corrected) +## # A tibble: 212 × 5 +## test_ligand auroc aupr aupr_corrected pearson +## +## 1 TGFB2 0.772 0.120 0.105 0.195 +## 2 BMP8A 0.774 0.0852 0.0699 0.175 +## 3 INHBA 0.777 0.0837 0.0685 0.122 +## 4 CXCL12 0.714 0.0829 0.0676 0.141 +## 5 LTBP1 0.727 0.0762 0.0609 0.160 +## 6 CCN2 0.736 0.0734 0.0581 0.141 +## 7 TNXB 0.719 0.0717 0.0564 0.157 +## 8 ENG 0.764 0.0703 0.0551 0.145 +## 9 BMP5 0.750 0.0691 0.0538 0.148 +## 10 VCAN 0.720 0.0687 0.0534 0.140 +## # … with 202 more rows +best_upstream_ligands = ligand_activities %>% top_n(30, aupr_corrected) %>% arrange(-aupr_corrected) %>% pull(test_ligand) head(best_upstream_ligands) -## [1] "PTHLH" "CXCL12" "AGT" "TGFB3" "IL6" "INHBA" +## [1] "TGFB2" "BMP8A" "INHBA" "CXCL12" "LTBP1" "CCN2" ``` -We see here that the performance metrics indicate that the 20 top-ranked +We see here that the performance metrics indicate that the 30 top-ranked ligands can predict the p-EMT genes reasonably, this implies that ranking of the ligands might be accurate as shown in our study. However, it is possible that for some gene sets, the target gene prediction @@ -251,28 +271,28 @@ performance of the top-ranked ligands would not be much better than random prediction. In that case, prioritization of ligands will be less trustworthy. -Additional note: we looked at the top 20 ligands here and will continue -the analysis by inferring p-EMT target genes of these 20 ligands. -However, the choice of looking only at the 20 top-ranked ligands for +Additional note: we looked at the top 30 ligands here and will continue +the analysis by inferring p-EMT target genes of these 30 ligands. +However, the choice of looking only at the 30 top-ranked ligands for further biological interpretation is based on biological intuition and is quite arbitrary. Therefore, users can decide to continue the analysis with a different number of ligands. We recommend to check the selected cutoff by looking at the distribution of the ligand activity values. -Here, we show the ligand activity histogram (the score for the 20th +Here, we show the ligand activity histogram (the score for the 30th ligand is indicated via the dashed line). ``` r # show histogram of ligand activity scores -p_hist_lig_activity = ggplot(ligand_activities, aes(x=pearson)) + +p_hist_lig_activity = ggplot(ligand_activities, aes(x=aupr_corrected)) + geom_histogram(color="black", fill="darkorange") + # geom_density(alpha=.1, fill="orange") + - geom_vline(aes(xintercept=min(ligand_activities %>% top_n(20, pearson) %>% pull(pearson))), color="red", linetype="dashed", size=1) + + geom_vline(aes(xintercept=min(ligand_activities %>% top_n(30, aupr_corrected) %>% pull(aupr_corrected))), color="red", linetype="dashed", size=1) + labs(x="ligand activity (PCC)", y = "# ligands") + theme_classic() p_hist_lig_activity ``` -![](ligand_activity_geneset_files/figure-gfm/unnamed-chunk-10-1.png) +![](ligand_activity_geneset_files/figure-gfm/unnamed-chunk-11-1.png) ## Step 5: Infer target genes of top-ranked ligands and visualize in a heatmap @@ -292,23 +312,23 @@ not be shown on the heatmap. active_ligand_target_links_df = best_upstream_ligands %>% lapply(get_weighted_ligand_target_links,geneset = geneset_oi, ligand_target_matrix = ligand_target_matrix, n = 250) %>% bind_rows() nrow(active_ligand_target_links_df) -## [1] 143 +## [1] 460 head(active_ligand_target_links_df) -## # A tibble: 6 x 3 +## # A tibble: 6 × 3 ## ligand target weight ## -## 1 PTHLH COL1A1 0.00399 -## 2 PTHLH MMP1 0.00425 -## 3 PTHLH MMP2 0.00210 -## 4 PTHLH MYH9 0.00116 -## 5 PTHLH P4HA2 0.00190 -## 6 PTHLH PLAU 0.00401 +## 1 TGFB2 ACTN1 0.0849 +## 2 TGFB2 C1S 0.124 +## 3 TGFB2 COL17A1 0.0732 +## 4 TGFB2 COL1A1 0.243 +## 5 TGFB2 COL4A2 0.148 +## 6 TGFB2 F3 0.0747 ``` For visualization purposes, we adapted the ligand-target regulatory potential matrix as follows. Regulatory potential scores were set as 0 if their score was below a predefined threshold, which was here the 0.25 -quantile of scores of interactions between the 20 top-ranked ligands and +quantile of scores of interactions between the 30 top-ranked ligands and each of their respective top targets (see the ligand-target network defined in the data frame). @@ -316,17 +336,17 @@ defined in the data frame). active_ligand_target_links = prepare_ligand_target_visualization(ligand_target_df = active_ligand_target_links_df, ligand_target_matrix = ligand_target_matrix, cutoff = 0.25) nrow(active_ligand_target_links_df) -## [1] 143 +## [1] 460 head(active_ligand_target_links_df) -## # A tibble: 6 x 3 +## # A tibble: 6 × 3 ## ligand target weight ## -## 1 PTHLH COL1A1 0.00399 -## 2 PTHLH MMP1 0.00425 -## 3 PTHLH MMP2 0.00210 -## 4 PTHLH MYH9 0.00116 -## 5 PTHLH P4HA2 0.00190 -## 6 PTHLH PLAU 0.00401 +## 1 TGFB2 ACTN1 0.0849 +## 2 TGFB2 C1S 0.124 +## 3 TGFB2 COL17A1 0.0732 +## 4 TGFB2 COL1A1 0.243 +## 5 TGFB2 COL4A2 0.148 +## 6 TGFB2 F3 0.0747 ``` The putatively active ligand-target links will now be visualized in a @@ -343,7 +363,7 @@ p_ligand_target_network = vis_ligand_target %>% make_heatmap_ggplot("Prioritized p_ligand_target_network ``` -![](ligand_activity_geneset_files/figure-gfm/unnamed-chunk-13-1.png) +![](ligand_activity_geneset_files/figure-gfm/unnamed-chunk-14-1.png) Note that the choice of these cutoffs for visualization is quite arbitrary. We recommend users to test several cutoff values. @@ -372,7 +392,7 @@ lr_network_top = lr_network %>% filter(from %in% best_upstream_ligands & to %in% best_upstream_receptors = lr_network_top %>% pull(to) %>% unique() # get the weights of the ligand-receptor interactions as used in the NicheNet model -weighted_networks = readRDS(url("https://zenodo.org/record/3260758/files/weighted_networks.rds")) +weighted_networks = readRDS(url("https://zenodo.org/record/7074291/files/weighted_networks_nsga2r_final.rds")) lr_network_top_df = weighted_networks$lr_sig %>% filter(from %in% best_upstream_ligands & to %in% best_upstream_receptors) # convert to a matrix @@ -397,7 +417,7 @@ p_ligand_receptor_network = vis_ligand_receptor_network %>% t() %>% make_heatmap p_ligand_receptor_network ``` -![](ligand_activity_geneset_files/figure-gfm/unnamed-chunk-15-1.png) +![](ligand_activity_geneset_files/figure-gfm/unnamed-chunk-16-1.png) ## Follow-up analysis 2: Visualize expression of top-predicted ligands and their target genes in a combined heatmap @@ -421,17 +441,17 @@ library(ggpubr) #### Prepare the ligand activity matrix ``` r -ligand_pearson_matrix = ligand_activities %>% select(pearson) %>% as.matrix() %>% magrittr::set_rownames(ligand_activities$test_ligand) +ligand_aupr_matrix = ligand_activities %>% select(aupr_corrected) %>% as.matrix() %>% magrittr::set_rownames(ligand_activities$test_ligand) -vis_ligand_pearson = ligand_pearson_matrix[order_ligands, ] %>% as.matrix(ncol = 1) %>% magrittr::set_colnames("Pearson") +vis_ligand_aupr = ligand_aupr_matrix[order_ligands, ] %>% as.matrix(ncol = 1) %>% magrittr::set_colnames("AUPR") ``` ``` r -p_ligand_pearson = vis_ligand_pearson %>% make_heatmap_ggplot("Prioritized CAF-ligands","Ligand activity", color = "darkorange",legend_position = "top", x_axis_position = "top", legend_title = "Pearson correlation coefficient\ntarget gene prediction ability)") -p_ligand_pearson +p_ligand_aupr = vis_ligand_aupr %>% make_heatmap_ggplot("Prioritized CAF-ligands","Ligand activity", color = "darkorange",legend_position = "top", x_axis_position = "top", legend_title = "AUPR\n(target gene prediction ability)") +p_ligand_aupr ``` -![](ligand_activity_geneset_files/figure-gfm/unnamed-chunk-18-1.png) +![](ligand_activity_geneset_files/figure-gfm/unnamed-chunk-19-1.png) #### Prepare expression of ligands in fibroblast per tumor @@ -458,7 +478,7 @@ p_ligand_tumor_expression = vis_ligand_tumor_expression %>% make_heatmap_ggplot( p_ligand_tumor_expression ``` -![](ligand_activity_geneset_files/figure-gfm/unnamed-chunk-20-1.png) +![](ligand_activity_geneset_files/figure-gfm/unnamed-chunk-21-1.png) #### Prepare expression of target genes in malignant cells per tumor @@ -479,13 +499,13 @@ p_target_tumor_scaled_expression = vis_target_tumor_expression_scaled %>% make_ p_target_tumor_scaled_expression ``` -![](ligand_activity_geneset_files/figure-gfm/unnamed-chunk-22-1.png) +![](ligand_activity_geneset_files/figure-gfm/unnamed-chunk-23-1.png) #### Combine the different heatmaps in one overview figure ``` r figures_without_legend = plot_grid( - p_ligand_pearson + theme(legend.position = "none", axis.ticks = element_blank()) + theme(axis.title.x = element_text()), + p_ligand_aupr + theme(legend.position = "none", axis.ticks = element_blank()) + theme(axis.title.x = element_text()), p_ligand_tumor_expression + theme(legend.position = "none", axis.ticks = element_blank()) + theme(axis.title.x = element_text()) + ylab(""), p_ligand_target_network + theme(legend.position = "none", axis.ticks = element_blank()) + ylab(""), NULL, @@ -493,11 +513,11 @@ figures_without_legend = plot_grid( p_target_tumor_scaled_expression + theme(legend.position = "none", axis.ticks = element_blank()) + xlab(""), align = "hv", nrow = 2, - rel_widths = c(ncol(vis_ligand_pearson)+ 4.5, ncol(vis_ligand_tumor_expression), ncol(vis_ligand_target)) -2, - rel_heights = c(nrow(vis_ligand_pearson), nrow(vis_target_tumor_expression_scaled) + 3)) + rel_widths = c(ncol(vis_ligand_aupr)+ 4.5, ncol(vis_ligand_tumor_expression), ncol(vis_ligand_target)) -2, + rel_heights = c(nrow(vis_ligand_aupr), nrow(vis_target_tumor_expression_scaled) + 3)) legends = plot_grid( - as_ggplot(get_legend(p_ligand_pearson)), + as_ggplot(get_legend(p_ligand_aupr)), as_ggplot(get_legend(p_ligand_tumor_expression)), as_ggplot(get_legend(p_ligand_target_network)), as_ggplot(get_legend(p_target_tumor_scaled_expression)), @@ -509,35 +529,33 @@ plot_grid(figures_without_legend, rel_heights = c(10,2), nrow = 2, align = "hv") ``` -![](ligand_activity_geneset_files/figure-gfm/unnamed-chunk-23-1.png) +![](ligand_activity_geneset_files/figure-gfm/unnamed-chunk-24-1.png) ## Other follow-up analyses: As another follow-up analysis, you can infer possible signaling paths between ligands and targets of interest. You can read how to do this in the following vignette [Inferring ligand-to-target signaling -paths](ligand_target_signaling_path.md):`vignette("ligand_target_signaling_path", -package="nichenetr")`. +paths](ligand_target_signaling_path.md):`vignette("ligand_target_signaling_path", package="nichenetr")`. Another follow-up analysis is getting a “tangible” measure of how well top-ranked ligands predict the gene set of interest and assess which genes of the gene set can be predicted well. You can read how to do this in the following vignette [Assess how well top-ranked ligands can predict a gene set of -interest](target_prediction_evaluation_geneset.md):`vignette("target_prediction_evaluation_geneset", -package="nichenetr")`. +interest](target_prediction_evaluation_geneset.md):`vignette("target_prediction_evaluation_geneset", package="nichenetr")`. In case you want to visualize ligand-target links between multiple interacting cells, you can make an appealing circos plot as shown in vignette [Circos plot visualization to show active ligand-target links -between interacting cells](circos.md):`vignette("circos", -package="nichenetr")`. +between interacting +cells](circos.md):`vignette("circos", package="nichenetr")`. ## References -
+
-
+
Puram, Sidharth V., Itay Tirosh, Anuraag S. Parikh, Anoop P. Patel, Keren Yizhak, Shawn Gillespie, Christopher Rodman, et al. 2017. diff --git a/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-10-1.png b/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-10-1.png deleted file mode 100644 index 66cc080..0000000 Binary files a/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-10-1.png and /dev/null differ diff --git a/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-11-1.png b/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-11-1.png index de08417..f3fea91 100644 Binary files a/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-11-1.png and b/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-11-1.png differ diff --git a/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-12-1.png b/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-12-1.png deleted file mode 100644 index 94f4bee..0000000 Binary files a/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-12-1.png and /dev/null differ diff --git a/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-13-1.png b/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-13-1.png deleted file mode 100644 index 5d9a412..0000000 Binary files a/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-13-1.png and /dev/null differ diff --git a/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-14-1.png b/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-14-1.png index b92bf38..066a883 100644 Binary files a/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-14-1.png and b/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-14-1.png differ diff --git a/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-15-1.png b/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-15-1.png deleted file mode 100644 index c777566..0000000 Binary files a/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-15-1.png and /dev/null differ diff --git a/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-16-1.png b/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-16-1.png new file mode 100644 index 0000000..2a5b064 Binary files /dev/null and b/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-16-1.png differ diff --git a/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-17-1.png b/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-17-1.png deleted file mode 100644 index 062aa22..0000000 Binary files a/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-17-1.png and /dev/null differ diff --git a/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-18-1.png b/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-18-1.png deleted file mode 100644 index 28aa7f6..0000000 Binary files a/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-18-1.png and /dev/null differ diff --git a/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-19-1.png b/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-19-1.png index 833b6ea..cfd4718 100644 Binary files a/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-19-1.png and b/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-19-1.png differ diff --git a/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-20-1.png b/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-20-1.png deleted file mode 100644 index 4a19232..0000000 Binary files a/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-20-1.png and /dev/null differ diff --git a/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-21-1.png b/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-21-1.png index babce31..2470147 100644 Binary files a/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-21-1.png and b/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-21-1.png differ diff --git a/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-22-1.png b/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-22-1.png deleted file mode 100644 index d027174..0000000 Binary files a/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-22-1.png and /dev/null differ diff --git a/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-23-1.png b/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-23-1.png index bc7f615..7b4d239 100644 Binary files a/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-23-1.png and b/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-23-1.png differ diff --git a/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-24-1.png b/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-24-1.png index de08417..1fda2ea 100644 Binary files a/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-24-1.png and b/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-24-1.png differ diff --git a/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-25-1.png b/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-25-1.png deleted file mode 100644 index 833b6ea..0000000 Binary files a/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-25-1.png and /dev/null differ diff --git a/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-26-1.png b/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-26-1.png deleted file mode 100644 index 77ec01e..0000000 Binary files a/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-26-1.png and /dev/null differ diff --git a/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-27-1.png b/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-27-1.png deleted file mode 100644 index babce31..0000000 Binary files a/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-27-1.png and /dev/null differ diff --git a/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-28-1.png b/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-28-1.png deleted file mode 100644 index 276b973..0000000 Binary files a/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-28-1.png and /dev/null differ diff --git a/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-30-1.png b/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-30-1.png deleted file mode 100644 index 567b986..0000000 Binary files a/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-30-1.png and /dev/null differ diff --git a/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-9-1.png b/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-9-1.png deleted file mode 100644 index 47bd3e6..0000000 Binary files a/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-9-1.png and /dev/null differ diff --git a/vignettes/ligand_activity_geneset_files/figure-markdown_github/unnamed-chunk-10-1.png b/vignettes/ligand_activity_geneset_files/figure-markdown_github/unnamed-chunk-10-1.png deleted file mode 100644 index ff1d3cd..0000000 Binary files a/vignettes/ligand_activity_geneset_files/figure-markdown_github/unnamed-chunk-10-1.png and /dev/null differ diff --git a/vignettes/ligand_activity_geneset_files/figure-markdown_github/unnamed-chunk-30-1.png b/vignettes/ligand_activity_geneset_files/figure-markdown_github/unnamed-chunk-30-1.png deleted file mode 100644 index 22f7fef..0000000 Binary files a/vignettes/ligand_activity_geneset_files/figure-markdown_github/unnamed-chunk-30-1.png and /dev/null differ diff --git a/vignettes/ligand_activity_single_cell.Rmd b/vignettes/ligand_activity_single_cell.Rmd index 1d1c55a..39d198a 100644 --- a/vignettes/ligand_activity_single_cell.Rmd +++ b/vignettes/ligand_activity_single_cell.Rmd @@ -23,7 +23,7 @@ knitr::opts_chunk$set( ) ``` -This vignette shows how NicheNet can be used to predict which ligands might be active in single-cells. If a ligand has a high activity in a cell, this means that target genes of that ligand are stronger expressed in that cell than in other cells. In this example, we will use data from Puram et al. to explore intercellular communication in the tumor microenvironment in head and neck squamous cell carcinoma (HNSCC) [See @puram_single-cell_2017]. More specifically, we will assess the activity of cancer-associated fibroblast (CAF) ligands in malignant cells. The used ligand-target matrix and example expression data of interacting cells can be downloaded from Zenodo [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3260758.svg)](https://doi.org/10.5281/zenodo.3260758). +This vignette shows how NicheNet can be used to predict which ligands might be active in single-cells. If a ligand has a high activity in a cell, this means that target genes of that ligand are stronger expressed in that cell than in other cells. In this example, we will use data from Puram et al. to explore intercellular communication in the tumor microenvironment in head and neck squamous cell carcinoma (HNSCC) [See @puram_single-cell_2017]. More specifically, we will assess the activity of cancer-associated fibroblast (CAF) ligands in malignant cells. The used [ligand-target matrix](https://doi.org/10.5281/zenodo.7074290) and example [expression data](https://doi.org/10.5281/zenodo.3260758) of interacting cells can be downloaded from Zenodo. In order to prioritize ligands regulating a process of interest, you can perform a regression/correlation analysis between ligand activities in cells, and scores of a cell corresponding to the process of interest. For example, in this case study we were interested in finding ligands regulating p-EMT. Therefore we correlated ligand activities to the p-EMT scores of cells. @@ -62,7 +62,7 @@ expressed_genes_malignant = expression[malignant_ids,] %>% apply(2,function(x){1 ### Load the ligand-target model we want to use ```{r} -ligand_target_matrix = readRDS(url("https://zenodo.org/record/3260758/files/ligand_target_matrix.rds")) +ligand_target_matrix = readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final.rds")) ligand_target_matrix[1:5,1:5] # target genes in rows, ligands in columns ``` @@ -71,7 +71,7 @@ ligand_target_matrix[1:5,1:5] # target genes in rows, ligands in columns In a first step, we will define a set of potentially active ligands. As potentially active ligands, we will use ligands that are 1) expressed by CAFs and 2) can bind a (putative) receptor expressed by malignant cells. Putative ligand-receptor links were gathered from NicheNet's ligand-receptor data sources. ```{r} -lr_network = readRDS(url("https://zenodo.org/record/3260758/files/lr_network.rds")) +lr_network = readRDS(url("https://zenodo.org/record/7074291/files/lr_network_human_21122021.rds")) ligands = lr_network$from %>% unique() expressed_ligands = intersect(ligands,expressed_genes_CAFs) receptors = lr_network$to %>% unique() @@ -108,7 +108,7 @@ cell_scores_tbl = tibble(cell = malignant_hn5_ids, score = expression_scaled[mal Then, we will determine the correlation between these p-EMT scores and ligand activities over all cells to prioritize p-EMT-inducing ligands. We hypothesize that ligands might be potential regulators of the p-EMT program if higher ligand activities are associated with higher p-EMT scores. Based on this correlation, we obtained a ranking of potential p-EMT-inducing ligands. -To do so, we frist need to process and normalize the ligand activities (i.e. pearson correlation values) to make different cells comparable. Here we use modified z-score normalization. +To do so, we first need to process and normalize the ligand activities (i.e. area under the precision-recall curve) to make different cells comparable. Here we use modified z-score normalization. ```{r} normalized_ligand_activities = normalize_single_cell_ligand_activities(ligand_activities) diff --git a/vignettes/ligand_activity_single_cell.md b/vignettes/ligand_activity_single_cell.md index a87bbca..6c1a4a1 100644 --- a/vignettes/ligand_activity_single_cell.md +++ b/vignettes/ligand_activity_single_cell.md @@ -15,9 +15,9 @@ Puram et al. to explore intercellular communication in the tumor microenvironment in head and neck squamous cell carcinoma (HNSCC) (See Puram et al. 2017). More specifically, we will assess the activity of cancer-associated fibroblast (CAF) ligands in malignant cells. The used -ligand-target matrix and example expression data of interacting cells -can be downloaded from Zenodo -[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3260758.svg)](https://doi.org/10.5281/zenodo.3260758). +[ligand-target matrix](https://doi.org/10.5281/zenodo.7074290) and +example [expression data](https://doi.org/10.5281/zenodo.3260758) of +interacting cells can be downloaded from Zenodo. In order to prioritize ligands regulating a process of interest, you can perform a regression/correlation analysis between ligand activities in @@ -41,8 +41,7 @@ library(tidyverse) ### Read in expression data of interacting cells First, we will read in the single-cell data from CAF and malignant cells -from HNSCC tumors (See Puram et al. -2017). +from HNSCC tumors (See Puram et al. 2017). ``` r hnscc_expression = readRDS(url("https://zenodo.org/record/3260758/files/hnscc_expression.rds")) @@ -54,8 +53,7 @@ Secondly, we will determine which genes are expressed in CAFs and malignant cells from high quality primary tumors. Therefore, we wil not consider cells from tumor samples of less quality or from lymph node metastases. To determine expressed genes, we use the definition used by -of Puram et -al. +of Puram et al. ``` r tumors_remove = c("HN10","HN","HN12", "HN13", "HN24", "HN7", "HN8","HN23") @@ -70,14 +68,14 @@ expressed_genes_malignant = expression[malignant_ids,] %>% apply(2,function(x){1 ### Load the ligand-target model we want to use ``` r -ligand_target_matrix = readRDS(url("https://zenodo.org/record/3260758/files/ligand_target_matrix.rds")) +ligand_target_matrix = readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final.rds")) ligand_target_matrix[1:5,1:5] # target genes in rows, ligands in columns -## CXCL1 CXCL2 CXCL3 CXCL5 PPBP -## A1BG 3.534343e-04 4.041324e-04 3.729920e-04 3.080640e-04 2.628388e-04 -## A1BG-AS1 1.650894e-04 1.509213e-04 1.583594e-04 1.317253e-04 1.231819e-04 -## A1CF 5.787175e-04 4.596295e-04 3.895907e-04 3.293275e-04 3.211944e-04 -## A2M 6.027058e-04 5.996617e-04 5.164365e-04 4.517236e-04 4.590521e-04 -## A2M-AS1 8.898724e-05 8.243341e-05 7.484018e-05 4.912514e-05 5.120439e-05 +## A2M AANAT ABCA1 ACE ACE2 +## A-GAMMA3'E 0.0000000000 0.0000000000 0.0000000000 0.0000000000 0.000000000 +## A1BG 0.0018503922 0.0011108718 0.0014225077 0.0028594037 0.001139013 +## A1BG-AS1 0.0007400797 0.0004677614 0.0005193137 0.0007836698 0.000375007 +## A1CF 0.0024799266 0.0013026348 0.0020420890 0.0047921048 0.003273375 +## A2M 0.0084693452 0.0040689323 0.0064256379 0.0105191365 0.005719199 ``` ### Perform NicheNet’s single-cell ligand activity analysis @@ -86,11 +84,10 @@ In a first step, we will define a set of potentially active ligands. As potentially active ligands, we will use ligands that are 1) expressed by CAFs and 2) can bind a (putative) receptor expressed by malignant cells. Putative ligand-receptor links were gathered from NicheNet’s -ligand-receptor data -sources. +ligand-receptor data sources. ``` r -lr_network = readRDS(url("https://zenodo.org/record/3260758/files/lr_network.rds")) +lr_network = readRDS(url("https://zenodo.org/record/7074291/files/lr_network_human_21122021.rds")) ligands = lr_network$from %>% unique() expressed_ligands = intersect(ligands,expressed_genes_CAFs) receptors = lr_network$to %>% unique() @@ -98,12 +95,11 @@ expressed_receptors = intersect(receptors,expressed_genes_malignant) potential_ligands = lr_network %>% filter(from %in% expressed_ligands & to %in% expressed_receptors) %>% .$from %>% unique() head(potential_ligands) -## [1] "HGF" "TNFSF10" "TGFB2" "TGFB3" "INHBA" "CD99" +## [1] "A2M" "ADAM10" "ADAM12" "ADAM15" "ADAM17" "ADAM9" ``` In a second step, we will scale the single-cell expression data -(including only expressed -genes). +(including only expressed genes). ``` r background_expressed_genes = expressed_genes_malignant %>% .[. %in% rownames(ligand_target_matrix)] @@ -118,8 +114,7 @@ only on 10 example cells from the HN5 tumor. This vignette’s only purpose is to illustrate the analysis. In practice, ligand activity analysis for several cells can be better -run in parallel (via -e.g. parallel::mclapply)\! +run in parallel (via e.g. parallel::mclapply)! ``` r malignant_hn5_ids = sample_info %>% filter(tumor == "HN5") %>% filter(`Lymph node` == 0) %>% filter(`classified as cancer cell` == 1) %>% .$cell %>% head(10) @@ -146,10 +141,9 @@ program if higher ligand activities are associated with higher p-EMT scores. Based on this correlation, we obtained a ranking of potential p-EMT-inducing ligands. -To do so, we frist need to process and normalize the ligand activities -(i.e. pearson correlation values) to make different cells comparable. -Here we use modified z-score -normalization. +To do so, we first need to process and normalize the ligand activities +(i.e. area under the precision-recall curve) to make different cells +comparable. Here we use modified z-score normalization. ``` r normalized_ligand_activities = normalize_single_cell_ligand_activities(ligand_activities) @@ -158,42 +152,41 @@ normalized_ligand_activities = normalize_single_cell_ligand_activities(ligand_ac Then, we combine the ligand activities and cell property scores and perform correlation and regression analysis. We can prioritize ligands by ranking them based on the pearson correlation between activity scores -and property -scores. +and property scores. ``` r output_correlation_analysis = single_ligand_activity_score_regression(normalized_ligand_activities,cell_scores_tbl) output_correlation_analysis %>% arrange(-pearson_regression) %>% select(pearson_regression, ligand) -## # A tibble: 131 x 2 -## pearson_regression ligand -## -## 1 0.525 TNC -## 2 0.497 TFPI -## 3 0.491 SEMA5A -## 4 0.488 ANXA1 -## 5 0.473 TNFSF13B -## 6 0.462 IBSP -## 7 0.449 HDGF -## 8 0.443 HSP90B1 -## 9 0.431 CALM3 -## 10 0.428 CXCL11 -## # ... with 121 more rows +## # A tibble: 203 × 2 +## pearson_regression ligand +## +## 1 0.830 OGN +## 2 0.740 ANGPTL2 +## 3 0.583 CXCL10 +## 4 0.577 NID1 +## 5 0.572 MMP14 +## 6 0.560 CXCL12 +## 7 0.555 COL11A1 +## 8 0.550 BGN +## 9 0.526 CLCF1 +## 10 0.510 TFPI +## # … with 193 more rows ``` -Visualize the relation between ligand activity -and the cell's property score of interest +Visualize the relation between ligand activity and the cell’s property +score of interest ``` r inner_join(cell_scores_tbl,normalized_ligand_activities) %>% ggplot(aes(score,TNC)) + geom_point() + geom_smooth(method = "lm") ``` -![](ligand_activity_single_cell_files/figure-gfm/unnamed-chunk-22-1.png) +![](ligand_activity_single_cell_files/figure-gfm/unnamed-chunk-80-1.png) ### References -
+
-
+
Puram, Sidharth V., Itay Tirosh, Anuraag S. Parikh, Anoop P. Patel, Keren Yizhak, Shawn Gillespie, Christopher Rodman, et al. 2017. diff --git a/vignettes/ligand_activity_single_cell_files/figure-gfm/unnamed-chunk-11-1.png b/vignettes/ligand_activity_single_cell_files/figure-gfm/unnamed-chunk-11-1.png deleted file mode 100644 index 6a50282..0000000 Binary files a/vignettes/ligand_activity_single_cell_files/figure-gfm/unnamed-chunk-11-1.png and /dev/null differ diff --git a/vignettes/ligand_activity_single_cell_files/figure-gfm/unnamed-chunk-22-1.png b/vignettes/ligand_activity_single_cell_files/figure-gfm/unnamed-chunk-22-1.png deleted file mode 100644 index 6a50282..0000000 Binary files a/vignettes/ligand_activity_single_cell_files/figure-gfm/unnamed-chunk-22-1.png and /dev/null differ diff --git a/vignettes/ligand_activity_single_cell_files/figure-gfm/unnamed-chunk-80-1.png b/vignettes/ligand_activity_single_cell_files/figure-gfm/unnamed-chunk-80-1.png new file mode 100644 index 0000000..67580a7 Binary files /dev/null and b/vignettes/ligand_activity_single_cell_files/figure-gfm/unnamed-chunk-80-1.png differ diff --git a/vignettes/ligand_activity_single_cell_files/figure-markdown_github/unnamed-chunk-22-1.png b/vignettes/ligand_activity_single_cell_files/figure-markdown_github/unnamed-chunk-22-1.png deleted file mode 100644 index eab1f0c..0000000 Binary files a/vignettes/ligand_activity_single_cell_files/figure-markdown_github/unnamed-chunk-22-1.png and /dev/null differ diff --git a/vignettes/ligand_receptor_circos.svg b/vignettes/ligand_receptor_circos.svg index 199474b..0879746 100644 --- a/vignettes/ligand_receptor_circos.svg +++ b/vignettes/ligand_receptor_circos.svg @@ -3,1293 +3,1309 @@ - + - + - + - + - + + + + - + - + - + - + - + - + - + - + - + + + + + + + + + + - + - + - + - + - + - + - + - + - + - + - + - - - - - - - + - + - + - + - - - - - - - - - - + - + - + - + - + - - - - + - + - + - + - + - + + + + - + - + - + - + - + - + - - - - + - + - + + + + + + + + + + + + + - + - + - + - + - + - - - - + - + - + - + - + - + - + - + - + - + - - - - + - + - + - + - + - + + + + - + - + - + - + - + - + - + - + - + - + - - - - - - - + - + - + - + - - - - - - - + - + - + - + - - - - - - - + - + - + - + - + - - - - + - + - + - - - - + - + - + - + - + - - - - - - - + - + - + - + - - - - + - + - + - + - + - + + + + - + - + - + - + - + - + + + + - + - + - + - - - - - - - + - + - + - + - + - + - - - - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + + + + - + - + - + - + - + + + + - + - + - + - + - + - - - - + - + - + - + - + - + + + + - + - + - + - + - - - - + - + - + - + - + - - - - + - + - + - + - + - - - - + - + - + - + - + + + + - + - + - + - + - + - + - + - + - + - - - - + - + - + - + - - - - - - - + - + - + - + - + + + + - + - + - + - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - + + + + + + + - - - - - - - + + + + + + + + - - - - - - - - + + + + + + + - - - - - - - - + + + + + + + - - - - - - - - + + + + + + - - - - - - - + + + + + - - - - - - - - + + + + + + - - - - + + + + + + + + - - - - - - - + + + + + + + - - - - - - + + + + + + + + - - - - - - - + + + + + + - - - - - - - - + + + + + + - - - - - - + + + + + + + - - - - - - - - + + + + + + + + - - - - - - - + + + + + + - - - - - - - + + + + + + - - - - - - - + + + + + - - - - - - + + + + + - - - - - - + + + + + + - - - - + + + + - - - - + + + + - - - - - + + + + - - - - - + + + + + + - - - - - - + + + + + + - - - - - + + + + - - - - - + + + + + - - - - - + + + + + - - - - + + + + + - - - - - - + + + + + - - - - + + + + + - - - - - + + + + + - - - - - + + + + + - - - - - + + + + - - - + + + + - - - - - + + + + - - - - + + + + - - - - - + + + + + + - - - + + + + - - - - + + + - - - - - - + + + + - - - - - + + + + diff --git a/vignettes/ligand_target_circos.svg b/vignettes/ligand_target_circos.svg index 5105267..5929276 100644 --- a/vignettes/ligand_target_circos.svg +++ b/vignettes/ligand_target_circos.svg @@ -3,1421 +3,2010 @@ - + - + - + - + - + + + + - + - + - + - + - + - + - + - + - + + + + + + + + + + - + - + - + - + - + - + - + - + - + - + - + - + - - - - + - + - + - + - + - - - - - - - + - + - + - + - - - - - - - - - - + - + - + + + + + + + - + - + - + - + - + + + + - + - + - + - + - + - + + + + - + - + - + - + - + - - - - + - + - + - + - + - + - + - + - + - + - - - - + - + - + - + - + - + + + + - + - + - + - + - + - - - - + - + - + - + + + + - + - + - + - + - - - - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + + + + + + + - + - + - + - - - - - - - + - + - + - + - + - + + + + + + + - + - + - + - + - + + + + - + - + - + - + - + + + + + + + - + - + - - - - - - - - - - - - - + - + - + - + - + - + + + + - + - + - + - + + + + + + + + + + - + - + - + - + - + - + - + - + - + - + - + - - - - + - + - + - + - + - + + + + - + - + - + - - - - + - + - + - + - + - + - + - + - + - + + + + + + + + + + - + - + - + - + - + + + + - + - + - + - + - + - + - + - + - + - + + + + + + + + + + - + - + - + - + - + - + + + + + + + - + - + - + - + - + - + - + - + - + - + - + - - - - + - + - + - + - - - - - - - + - + - + - + - + + + + - + - + - + - - - - + - + - + - + - + - - - - + - + - + - + - + - + - + - + - + - + + + + + + + - + - + - + - + - + - + + + + + + + - + - + - + - + - + - - - - + - + - + - + - + - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - + + + + - - - - - - - + + + + + + - - - - - - - + + + + + - - - - - - - + + + + + - - - - - - + + + + + + + + - - - - - + + + + + + + + - - - - + + + + + - - - - - + + + + + - - - + + + - - - - - + + + + - - - - + + + + - - - - + + + + + - - - - + + + + - - - - - - + + + + + + - - - - - + + + + + + + - - - + + + + + - - - - - + + + + - - - - - - + + + - - - - - + + + + + + - - - - - + + + + - - - - + + + + - - - + + + + + - - - - + + + + - - - - + + + + + + - - - - + + + - - - - - + + + + - - - - - + + + + + - - - - - - + + + + + + - - - - - + + + + + + - - - + + + + + - - - - + + + + + + + + - - - - - + + + + + - - - - + + + + + + - - - + + + + + - - - - - - + + + + - - - - - - + + + + - - - - - + + + + + diff --git a/vignettes/ligand_target_signaling_path.Rmd b/vignettes/ligand_target_signaling_path.Rmd index 54ce0fb..097ed9e 100644 --- a/vignettes/ligand_target_signaling_path.Rmd +++ b/vignettes/ligand_target_signaling_path.Rmd @@ -7,6 +7,7 @@ vignette: > %\VignetteIndexEntry{Inferring ligand-to-target signaling paths} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} +always_allow_html: true --- +We will now compare performances between NicheNet v1 and v2 on both +ligand treatment datasets. Note that although the performance of v2 is +much better here, the CytoSig experiments were also included during +model construction of v2. To get the results in the MultiNicheNet paper, you will have +to follow the `model_construction.Rmd` vignette and filter out the +CytoSig data sources during model construction. + +``` r + +performances_df <- lapply(c("nichenet_gs", "cytosig_gs"), function(gs) { + + # Choose 'settings' based on type of gold standard + if (gs == "nichenet_gs") { + settings = expression_settings_validation %>% lapply(convert_expression_settings_evaluation) + settings = settings %>% discard(~length(.$from) > 1) + } else { + settings = cytosig_settings + } + + lapply(c("v1", "v2"), function(ver){ + # Get the ligand_target_matrix according to the version, Evaluate transcriptional response prediction on every dataset + # + performances = settings %>% lapply(evaluate_target_prediction, get(paste0("ligand_target_matrix_", ver))) %>% bind_rows() + + # Select some classification evaluation metrics showing the target gene prediction performance + performances = performances %>% select(-aupr, -auc_iregulon,-pearson_log_pval,-spearman_log_pval ,-sensitivity_roc, -specificity_roc) %>% + pivot_longer(auroc:spearman, names_to = "scorename", values_to = "scorevalue") %>% mutate(dataset = gs, version = ver) + + }) %>% do.call(rbind, .) +}) %>% do.call(rbind, .) + + +scorelabels = str_wrap(c("AUROC", "AUPR (corrected)", "Mean-rank gene-set enrichment", "AUC-iRegulon (corrected)","Pearson correlation", "Spearman's rank correlation"), width=10) %>% + setNames(performances$scorename %>% unique) +scorerandom = c(auroc=0.5, aupr_corrected=0, auc_iregulon_corrected = 0, pearson = 0, spearman = 0,mean_rank_GST_log_pval = 0) %>% data.frame(scorevalue=.) %>% rownames_to_column("scorename") + +ggplot(performances_df, aes(y=scorevalue, x=version)) + + geom_violin(aes(fill=version)) + + geom_boxplot(width = 0.05) + + labs(y="Target prediction score", x="NicheNet version") + + facet_grid(scorename~dataset, scales = "free", + labeller=labeller(scorename=scorelabels, + dataset=c(cytosig_gs="CytoSig datasets", nichenet_gs="NicheNet datasets"))) + + geom_hline(aes(yintercept=scorevalue), data=scorerandom, linetype = 2, linewidth=0.2, color = "red") + + theme_bw() + + theme(strip.text.y = element_text(angle=0), + strip.background.x = element_blank(), + strip.background.y = element_rect(fill="white"), + panel.grid.major = element_blank(), + legend.position = "none") +``` + +![](model_evaluation_files/figure-gfm/unnamed-chunk-5-1.png) + ### Example: ligand activity prediction evaluation Now we will show how to assess the accuracy of the model in predicting @@ -118,26 +171,26 @@ ligand activity scores as should be for a good ligand-target model. A graphical summary of this procedure is visualized here below: -![](vignettes/ligand_activity_prediction_workflow_new.png) +![](ligand_activity_prediction_workflow_new.png) Step 1: convert expression datasets to the required format to perform -ligand activity -prediction +ligand activity prediction ``` r # convert expression datasets to correct format for ligand activity prediction +settings = expression_settings_validation %>% lapply(convert_expression_settings_evaluation) +settings = settings %>% discard(~length(.$from) > 1) all_ligands = settings %>% extract_ligands_from_settings(combination = FALSE) %>% unlist() settings_ligand_prediction = settings %>% convert_settings_ligand_prediction(all_ligands = all_ligands, validation = TRUE) ``` Step 2: calculate the ligand importances (these are classification evaluation metrics indicating how well a ligand can predict the observed -DE genes in a specific ligand treatment -dataset) +DE genes in a specific ligand treatment dataset) ``` r # infer ligand importances: for all ligands of interest, we assess how well a ligand explains the differential expression in a specific datasets (and we do this for all datasets). -ligand_importances = settings_ligand_prediction %>% lapply(get_single_ligand_importances,ligand_target_matrix) %>% bind_rows() +ligand_importances = settings_ligand_prediction %>% lapply(get_single_ligand_importances,ligand_target_matrix_v2) %>% bind_rows() ``` Step 3: evaluate how separate ligand importances can predict ligand @@ -145,14 +198,17 @@ activity ``` r # Look at predictive performance of single/individual importance measures to predict ligand activity: of all ligands tested, the ligand that is truly active in a dataset should get the highest activity score (i.e. best target gene prediction performance) + +# Replace infinite values with 10000 +ligand_importances = ligand_importances %>% mutate(across(ends_with("log_pval"), ~ifelse(is.infinite(.x), 10000, .x))) + evaluation_ligand_prediction = ligand_importances$setting %>% unique() %>% lapply(function(x){x}) %>% lapply(wrapper_evaluate_single_importances_ligand_prediction,ligand_importances) %>% bind_rows() %>% inner_join(ligand_importances %>% distinct(setting,ligand)) ``` Step 4: visualize the results: show here different classification -evaluation -metrics +evaluation metrics ``` r # Visualize some classification evaluation metrics showing the ligand activity prediction performance @@ -173,8 +229,74 @@ evaluation_ligand_prediction %>% theme(axis.text.x = element_text(angle = 90)) ``` -![](model_evaluation_files/figure-gfm/unnamed-chunk-8-1.png) +![](model_evaluation_files/figure-gfm/unnamed-chunk-9-1.png) + +We will again compare performances between NicheNet v1 and v2 on both +ligand treatment datasets. + +``` r + +performances_df <- lapply(c("nichenet_gs", "cytosig_gs"), function(gs) { + + # Choose 'settings' based on type of gold standard + if (gs == "nichenet_gs") { + settings = expression_settings_validation %>% lapply(convert_expression_settings_evaluation) + settings = settings %>% discard(~length(.$from) > 1) + } else { + settings = cytosig_settings + } + + all_ligands = settings %>% extract_ligands_from_settings(combination = FALSE) %>% unlist() + settings_ligand_prediction = settings %>% convert_settings_ligand_prediction(all_ligands = all_ligands, validation = TRUE) + + lapply(c("v1", "v2"), function(ver){ + + # Look at predictive performance of single/individual importance measures to predict ligand activity: of all ligands tested, + # the ligand that is truly active in a dataset should get the highest activity score (i.e. best target gene prediction performance) + # infer ligand importances: for all ligands of interest, we assess how well a ligand explains the differential expression in a specific datasets (and we do this for all datasets). + ligand_importances = settings_ligand_prediction %>% lapply(get_single_ligand_importances, get(paste0("ligand_target_matrix_", ver))) %>% bind_rows() + + # Filter out column with infinite values + ligand_importances = ligand_importances %>% mutate(across(ends_with("log_pval"), ~ifelse(is.infinite(.x), 10000, .x))) + + evaluation_ligand_prediction = ligand_importances$setting %>% unique() %>% lapply(function(x){x}) %>% + lapply(wrapper_evaluate_single_importances_ligand_prediction,ligand_importances) %>% + bind_rows() %>% inner_join(ligand_importances %>% distinct(setting,ligand)) + + # Select some classification evaluation metrics showing the ligand activity prediction performance + evaluation_ligand_prediction %>% select(-aupr, -sensitivity_roc, -specificity_roc, -pearson, -spearman, -mean_rank_GST_log_pval) %>% pivot_longer(auroc:aupr_corrected, names_to = "scorename", values_to = "scorevalue") %>% mutate(dataset = gs, version = ver) + + }) %>% do.call(rbind, .) +}) %>% do.call(rbind, .) +``` + +``` r +scorelabels = str_wrap(c("AUROC", "AUPR", "AUPR (corrected)", "Sensitivity ROC", "Specificity ROC", + "Mean-rank gene-set enrichment", "AUC-iRegulon", "AUC-iRegulon (corrected)", + "Pearson log p-val", "Spearman log p-val", "Pearson correlation", "Spearman's rank correlation"), width=15) %>% setNames(performances_df$importance_measure %>% unique) +scorerandom = c(auroc=0.5, aupr_corrected=0) %>% data.frame(scorevalue=.) %>% rownames_to_column("scorename") + +ggplot(performances_df %>% filter(importance_measure %in% + c("auroc", "aupr_corrected", "mean_rank_GST_log_pval", + "auc_iregulon_corrected", "pearson", "spearman")), + aes(y=scorevalue, x=version, color=importance_measure, group=interaction(importance_measure, version))) + + geom_violin(position=position_dodge(0.75)) + + geom_boxplot(width = 0.05, position=position_dodge(0.75)) + + scale_x_discrete(guide = guide_axis(angle = 30)) + + scale_fill_discrete(labels=scorelabels) + + guides(color="none") + + labs(y="Evaluation ligand activity prediction", x="Ligand activity measure for two NicheNet versions", + fill = "Importance\nmeasure") + + facet_grid(scorename~dataset, scales = "free", + labeller=labeller(scorename=scorelabels, + dataset=c(cytosig_gs="CytoSig datasets", nichenet_gs="NicheNet datasets"))) + + geom_hline(aes(yintercept=scorevalue), data=scorerandom, linetype = 2, linewidth=0.2, color = "red") + + theme_bw() + + theme(strip.text.y = element_text(angle=0), + strip.background.x = element_blank(), + strip.background.y = element_rect(fill="white"), + panel.grid.major = element_blank(), + legend.position = "bottom") +``` -This plots shows that using the pearson correlation coefficient target -prediction metric is the best metric to use for ranking ligands -according to predicted ligand activity. +![](model_evaluation_files/figure-gfm/unnamed-chunk-11-1.png) diff --git a/vignettes/model_evaluation_files/figure-gfm/unnamed-chunk-11-1.png b/vignettes/model_evaluation_files/figure-gfm/unnamed-chunk-11-1.png index c668159..d59054b 100644 Binary files a/vignettes/model_evaluation_files/figure-gfm/unnamed-chunk-11-1.png and b/vignettes/model_evaluation_files/figure-gfm/unnamed-chunk-11-1.png differ diff --git a/vignettes/model_evaluation_files/figure-gfm/unnamed-chunk-15-1.png b/vignettes/model_evaluation_files/figure-gfm/unnamed-chunk-15-1.png deleted file mode 100644 index 91dc326..0000000 Binary files a/vignettes/model_evaluation_files/figure-gfm/unnamed-chunk-15-1.png and /dev/null differ diff --git a/vignettes/model_evaluation_files/figure-gfm/unnamed-chunk-2-1.png b/vignettes/model_evaluation_files/figure-gfm/unnamed-chunk-2-1.png deleted file mode 100644 index e2f6940..0000000 Binary files a/vignettes/model_evaluation_files/figure-gfm/unnamed-chunk-2-1.png and /dev/null differ diff --git a/vignettes/model_evaluation_files/figure-gfm/unnamed-chunk-3-1.png b/vignettes/model_evaluation_files/figure-gfm/unnamed-chunk-3-1.png deleted file mode 100644 index 8e34316..0000000 Binary files a/vignettes/model_evaluation_files/figure-gfm/unnamed-chunk-3-1.png and /dev/null differ diff --git a/vignettes/model_evaluation_files/figure-gfm/unnamed-chunk-4-1.png b/vignettes/model_evaluation_files/figure-gfm/unnamed-chunk-4-1.png index 86bdec6..f6cbb63 100644 Binary files a/vignettes/model_evaluation_files/figure-gfm/unnamed-chunk-4-1.png and b/vignettes/model_evaluation_files/figure-gfm/unnamed-chunk-4-1.png differ diff --git a/vignettes/model_evaluation_files/figure-gfm/unnamed-chunk-5-1.png b/vignettes/model_evaluation_files/figure-gfm/unnamed-chunk-5-1.png new file mode 100644 index 0000000..72e5cd6 Binary files /dev/null and b/vignettes/model_evaluation_files/figure-gfm/unnamed-chunk-5-1.png differ diff --git a/vignettes/model_evaluation_files/figure-gfm/unnamed-chunk-8-1.png b/vignettes/model_evaluation_files/figure-gfm/unnamed-chunk-8-1.png deleted file mode 100644 index a92b2e5..0000000 Binary files a/vignettes/model_evaluation_files/figure-gfm/unnamed-chunk-8-1.png and /dev/null differ diff --git a/vignettes/model_evaluation_files/figure-gfm/unnamed-chunk-9-1.png b/vignettes/model_evaluation_files/figure-gfm/unnamed-chunk-9-1.png new file mode 100644 index 0000000..0febf8b Binary files /dev/null and b/vignettes/model_evaluation_files/figure-gfm/unnamed-chunk-9-1.png differ diff --git a/vignettes/model_evaluation_files/figure-markdown_github/unnamed-chunk-2-1.png b/vignettes/model_evaluation_files/figure-markdown_github/unnamed-chunk-2-1.png deleted file mode 100644 index 0c2e19b..0000000 Binary files a/vignettes/model_evaluation_files/figure-markdown_github/unnamed-chunk-2-1.png and /dev/null differ diff --git a/vignettes/model_evaluation_files/figure-markdown_github/unnamed-chunk-3-1.png b/vignettes/model_evaluation_files/figure-markdown_github/unnamed-chunk-3-1.png deleted file mode 100644 index 78387f7..0000000 Binary files a/vignettes/model_evaluation_files/figure-markdown_github/unnamed-chunk-3-1.png and /dev/null differ diff --git a/vignettes/model_evaluation_stephanie.Rmd b/vignettes/model_evaluation_stephanie.Rmd deleted file mode 100644 index be39d33..0000000 --- a/vignettes/model_evaluation_stephanie.Rmd +++ /dev/null @@ -1,155 +0,0 @@ ---- -title: "Evaluation of NicheNet's ligand-target predictions" -author: "Robin Browaeys" -date: "2018-11-12" -output: rmarkdown::html_vignette -vignette: > - %\VignetteIndexEntry{Evaluation of NicheNet's ligand-target predictions} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} ---- - - - -```{r setup, include = FALSE} -knitr::opts_chunk$set( - collapse = TRUE, - # comment = "#>", - warning = FALSE, - message = FALSE -) -``` - -This vignette shows how the ligand-target predictions of NicheNet were evaluated. For validation, we collected transcriptome data of cells before and after they were treated by one or two ligands in culture. Using these ligand treatment datasets for validation has the advantage that observed gene expression changes can be directly attributed to the addition of the ligand(s). Hence, differentially expressed genes can be considered as a gold standard of target genes of a particular ligand. - -You can use the procedure shown here to evaluate your own model and compare its performance to NicheNet. Ligand treatment validation datasets and NicheNet's ligand-target model can be downloaded from Zenodo [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3260758.svg)](https://doi.org/10.5281/zenodo.3260758). - - -### Load nichenetr, the model we want to evaluate, and the datasets on which we want to evaluate it. -```{r} -library(nichenetr) -library(tidyverse) - -# Load in the ligand-target model -library(RcppCNPy) - -mat = npyLoad("C:/Users/rbrowaey/work/Research/NicheNet/StephanieChen/new_lt_matrix.npy") -column_names = read.csv("C:/Users/rbrowaey/work/Research/NicheNet/StephanieChen/column_names.txt",sep = ";") %>% colnames() -row_names = read.csv("C:/Users/rbrowaey/work/Research/NicheNet/StephanieChen/row_names.txt",sep = ";") %>% colnames() -colnames(mat) = column_names -rownames(mat) = row_names - -mat[1:10,1:20] - -ligand_target_matrix = mat %>% t() -ligand_target_matrix[1:5,1:5] # target genes in rows, ligands in columns - -# The ligand treatment expression datasets used for validation can be downloaded from Zenodo: -expression_settings_validation = readRDS(url("https://zenodo.org/record/3260758/files/expression_settings.rds")) - -#Ligand treatment datasets show the log fold change in expression of genes after treatment with one or more specific ligands. Here: example for the ligand NODAL: -head(expression_settings_validation$nodal_Nodal$diffexp) -``` - -### Example: transcriptional response prediction evaluation - -First, we will demonstrate how to evaluate the transcriptional response (i.e. target gene prediction) performance for all ligand treatment expression datasets. For this, we determine how well the model predicts which genes are differentially expressed after treatment with a ligand. Ideally, target genes with high regulatory potential scores for a ligand, should be differentially expressed in response to that ligand. - -For information of all collected ligand treatment datasets, see [Dataset information](evaluation_datasets.xlsx) - -For the sake of simplicity, we exclude in this vignette the ligand-treatment datasets profiling the response to multiple ligands. To see how to build a ligand-target model with target predictions for multiple ligands at once: see vignette [Construction of NicheNet's ligand-target model](model_construction.md): `vignette("model_construction", package="nichenetr")`. - -Step 1: convert expression datasets to the required format to perform target gene prediction - -```{r} -settings = expression_settings_validation %>% lapply(convert_expression_settings_evaluation) -settings = settings %>% discard(~length(.$from) > 1) -# settings = settings %>% keep(.$from %in% colnames(ligand_target_matrix)) - -``` - -Step 2: calculate the target gene prediction performances - -```{r} -# Evaluate transcriptional response prediction on every dataset -performances = settings %>% lapply(evaluate_target_prediction, ligand_target_matrix) %>% bind_rows() - - -``` - -Step 3: visualize the results: show here different classification evaluation metrics - -```{r, fig.width=8, fig.height=8} -# Visualize some classification evaluation metrics showing the target gene prediction performance -performances = performances %>% select(-aupr, -auc_iregulon,-pearson_log_pval,-spearman_log_pval ,-sensitivity_roc, -specificity_roc) %>% gather(key = scorename, value = scorevalue, auroc:spearman) -scorelabels = c(auroc="AUROC", aupr_corrected="AUPR (corrected)", auc_iregulon_corrected = "AUC-iRegulon (corrected)",pearson = "Pearson correlation", spearman = "Spearman's rank correlation",mean_rank_GST_log_pval = "Mean-rank gene-set enrichment") -scorerandom = c(auroc=0.5, aupr_corrected=0, auc_iregulon_corrected = 0, pearson = 0, spearman = 0,mean_rank_GST_log_pval = 0) %>% data.frame(scorevalue=.) %>% rownames_to_column("scorename") - -performances %>% - mutate(model = "NicheNet") %>% - ggplot() + - geom_violin(aes(model, scorevalue, group=model, fill = model)) + - geom_boxplot(aes(model, scorevalue, group = model),width = 0.05) + - scale_y_continuous("Score target prediction") + - facet_wrap(~scorename, scales = "free", labeller=as_labeller(scorelabels)) + - geom_hline(aes(yintercept=scorevalue), data=scorerandom, linetype = 2, color = "red") + - theme_bw() - -``` - -### Example: ligand activity prediction evaluation - -Now we will show how to assess the accuracy of the model in predicting whether cells were treated by a particular ligand or not. In other words, we will evaluate how well NicheNet prioritizes active ligand(s), given a set of differentially expressed genes. For this procedure, we assume the following: the better a ligand predicts the transcriptional response compared to other ligands, the more likely it is that this ligand is active. Therefore, we first get ligand activity (or ligand importance or feature importance) scores for all ligands on all ligand-treatment expression datasets of which the true acive ligand is known. Then we assess whether the truly active ligands get indeed higher ligand activity scores as should be for a good ligand-target model. - -A graphical summary of this procedure is visualized here below: - -![](ligand_activity_prediction_workflow_new.png) - -Step 1: convert expression datasets to the required format to perform ligand activity prediction - -```{r} -# convert expression datasets to correct format for ligand activity prediction -all_ligands = settings %>% extract_ligands_from_settings(combination = FALSE) %>% unlist() -settings_ligand_prediction = settings %>% convert_settings_ligand_prediction(all_ligands = all_ligands, validation = TRUE) -``` - -Step 2: calculate the ligand importances (these are classification evaluation metrics indicating how well a ligand can predict the observed DE genes in a specific ligand treatment dataset) - -```{r} -# infer ligand importances: for all ligands of interest, we assess how well a ligand explains the differential expression in a specific datasets (and we do this for all datasets). -ligand_importances = settings_ligand_prediction %>% lapply(get_single_ligand_importances,ligand_target_matrix) %>% bind_rows() -``` - -Step 3: evaluate how separate ligand importances can predict ligand activity - -```{r} -# Look at predictive performance of single/individual importance measures to predict ligand activity: of all ligands tested, the ligand that is truly active in a dataset should get the highest activity score (i.e. best target gene prediction performance) -evaluation_ligand_prediction = ligand_importances$setting %>% unique() %>% lapply(function(x){x}) %>% - lapply(wrapper_evaluate_single_importances_ligand_prediction,ligand_importances) %>% - bind_rows() %>% inner_join(ligand_importances %>% distinct(setting,ligand)) -``` - -Step 4: visualize the results: show here different classification evaluation metrics - -```{r,fig.width=8, fig.height=8} -# Visualize some classification evaluation metrics showing the ligand activity prediction performance -evaluation_ligand_prediction = evaluation_ligand_prediction %>% select(-aupr, -sensitivity_roc, -specificity_roc, -pearson, -spearman, -mean_rank_GST_log_pval) %>% gather(key = scorename, value = scorevalue, auroc:aupr_corrected) -scorelabels = c(auroc="AUROC", aupr_corrected="AUPR (corrected)") -scorerandom = c(auroc=0.5, aupr_corrected=0) %>% data.frame(scorevalue=.) %>% rownames_to_column("scorename") - -evaluation_ligand_prediction %>% - filter(importance_measure %in% c("auroc", "aupr_corrected", "mean_rank_GST_log_pval", "auc_iregulon_corrected", "pearson", "spearman")) %>% - ggplot() + - geom_violin(aes(importance_measure, scorevalue, group=importance_measure, fill = importance_measure)) + - geom_boxplot(aes(importance_measure, scorevalue, group = importance_measure),width = 0.1) + - scale_y_continuous("Evaluation ligand activity prediction") + - scale_x_discrete("Ligand activity measure") + - facet_wrap(~scorename, scales = "free", labeller=as_labeller(scorelabels)) + - geom_hline(aes(yintercept=scorevalue), data=scorerandom, linetype = 2, color = "red") + - theme_bw() + - theme(axis.text.x = element_text(angle = 90)) -``` - -This plots shows that using the pearson correlation coefficient target prediction metric is the best metric to use for ranking ligands according to predicted ligand activity. diff --git a/vignettes/parameter_optimization.Rmd b/vignettes/parameter_optimization.Rmd index 6a29b1f..ca89c9b 100644 --- a/vignettes/parameter_optimization.Rmd +++ b/vignettes/parameter_optimization.Rmd @@ -18,13 +18,14 @@ knitr::opts_chunk$set( collapse = TRUE, # comment = "#>", warning = FALSE, - message = FALSE + message = FALSE, + eval = FALSE ) ``` This vignette shows how we optimized both hyperparameters and data source weights via model-based optimization (see manuscript for more information). Because the optimization requires intensive parallel computation, we performed optimization in parallel on a gridengine cluster via the qsub package (https://cran.r-project.org/web/packages/qsub/qsub.pdf). This script is merely illustrative and should be adapted by the user to work on its own system. -The input data used in this vignette can be found at: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3260758.svg)](https://doi.org/10.5281/zenodo.3260758). +The [ligand treatment validation datasets](https://doi.org/10.5281/zenodo.3260758), and [NicheNet’s v2 ligand-target model](https://doi.org/10.5281/zenodo.7074290) can be downloaded from Zenodo. First, we will load in the required packages and networks we will use to construct the models which we will evaluate during the optimization procedure. ```{r} @@ -34,9 +35,9 @@ library(qsub) library(mlrMBO) # in the NicheNet framework, ligand-target links are predicted based on collected biological knowledge on ligand-receptor, signaling and gene regulatory interactions -lr_network = readRDS(url("https://zenodo.org/record/3260758/files/lr_network.rds")) -sig_network = readRDS(url("https://zenodo.org/record/3260758/files/signaling_network.rds")) -gr_network = readRDS(url("https://zenodo.org/record/3260758/files/gr_network.rds")) +lr_network = readRDS(url("https://zenodo.org/record/7074291/files/lr_network_human_21122021.rds")) +sig_network = readRDS(url("https://zenodo.org/record/7074291/files/signaling_network_human_21122021.rds")) +gr_network = readRDS(url("https://zenodo.org/record/7074291/files/gr_network_human_21122021.rds")) ``` We will load in the ligand treatment validation datasets and try to optimize the parameters to maximize both target gene and ligand activity prediction. In this vignette, we do the optimization on all datasets. Alternatively, you can select a specific subset of datasets and evaluate the final performance on the left-out datasets. diff --git a/vignettes/parameter_optimization.md b/vignettes/parameter_optimization.md index ccde842..b7aa113 100644 --- a/vignettes/parameter_optimization.md +++ b/vignettes/parameter_optimization.md @@ -7,32 +7,48 @@ Robin Browaeys rmarkdown::render("vignettes/parameter_optimization.Rmd", output_format = "github_document") # please, don't run this!! --> -This vignette shows how we optimized both hyperparameters and data source weights via model-based optimization (see manuscript for more information). Because the optimization requires intensive parallel computation, we performed optimization in parallel on a gridengine cluster via the qsub package (https://cran.r-project.org/web/packages/qsub/qsub.pdf). This script is merely illustrative and should be adapted by the user to work on its own system. - -The input data used in this vignette can be found at: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3260758.svg)](https://doi.org/10.5281/zenodo.3260758). - -First, we will load in the required packages and networks we will use to construct the models which we will evaluate during the optimization procedure. -```{r} +This vignette shows how we optimized both hyperparameters and data +source weights via model-based optimization (see manuscript for more +information). Because the optimization requires intensive parallel +computation, we performed optimization in parallel on a gridengine +cluster via the qsub package +(). This script +is merely illustrative and should be adapted by the user to work on its +own system. + +The [ligand treatment validation datasets](https://doi.org/10.5281/zenodo.3260758), and [NicheNet’s v2 ligand-target model](https://doi.org/10.5281/zenodo.7074290) can be downloaded from Zenodo. + +First, we will load in the required packages and networks we will use to +construct the models which we will evaluate during the optimization +procedure. + +``` r library(nichenetr) library(tidyverse) library(qsub) library(mlrMBO) # in the NicheNet framework, ligand-target links are predicted based on collected biological knowledge on ligand-receptor, signaling and gene regulatory interactions -lr_network = readRDS(url("https://zenodo.org/record/3260758/files/lr_network.rds")) -sig_network = readRDS(url("https://zenodo.org/record/3260758/files/signaling_network.rds")) -gr_network = readRDS(url("https://zenodo.org/record/3260758/files/gr_network.rds")) +lr_network = readRDS(url("https://zenodo.org/record/7074291/files/lr_network_human_21122021.rds")) +sig_network = readRDS(url("https://zenodo.org/record/7074291/files/signaling_network_human_21122021.rds")) +gr_network = readRDS(url("https://zenodo.org/record/7074291/files/gr_network_human_21122021.rds")) ``` -We will load in the ligand treatment validation datasets and try to optimize the parameters to maximize both target gene and ligand activity prediction. In this vignette, we do the optimization on all datasets. Alternatively, you can select a specific subset of datasets and evaluate the final performance on the left-out datasets. +We will load in the ligand treatment validation datasets and try to +optimize the parameters to maximize both target gene and ligand activity +prediction. In this vignette, we do the optimization on all datasets. +Alternatively, you can select a specific subset of datasets and evaluate +the final performance on the left-out datasets. -```{r} +``` r # The ligand treatment expression datasets used for validation can be downloaded from Zenodo: expression_settings_validation = readRDS(url("https://zenodo.org/record/3260758/files/expression_settings.rds")) ``` -Define the optimization wrapper function and config information for the qsub package -```{r} +Define the optimization wrapper function and config information for the +qsub package + +``` r mlrmbo_optimization_wrapper = function(...){ library(nichenetr) library(mlrMBO) @@ -55,7 +71,7 @@ qsub_config = create_qsub_config( Perform optimization: -```{r} +``` r additional_arguments_topology_correction = list(source_names = source_weights_df$source %>% unique(), algorithm = "PPR", correct_topology = FALSE, @@ -95,16 +111,28 @@ job_mlrmbo = qsub_lapply(X = 1, 50, 24, 240, additional_arguments_topology_correction) ``` -Once the job is finised (which can take a few days - for shorter running time: reduce the number of iterations), run: -```{r} +Once the job is finised (which can take a few days - for shorter running +time: reduce the number of iterations), run: + +``` r res_job_mlrmbo = qsub_retrieve(job_mlrmbo) ``` Get now the most optimal parameter setting as a result of this analysis -```{r} +``` r optimized_parameters = res_job_mlrmbo %>% process_mlrmbo_nichenet_optimization(source_names = source_weights_df$source %>% unique()) ``` -When you would be interested to generate a context-specific model, it could be possible that you would like to optimize the parameters specifically on your dataset of interest and not on the general ligand treatment datasets (be aware for overfitting, though!). Because for your own data, you don't know the true active ligands, you could only optimize target gene prediction performance and not ligand activity performance. In order to this, you would need to change the expression settings in the optimization functions such that they include your data, and use the function `model_evaluation_optimization_application` instead of `model_evaluation_optimization` (define this as the function parameter in `makeMultiObjectiveFunction` shown here above). +When you would be interested to generate a context-specific model, it +could be possible that you would like to optimize the parameters +specifically on your dataset of interest and not on the general ligand +treatment datasets (be aware for overfitting, though!). Because for your +own data, you don’t know the true active ligands, you could only +optimize target gene prediction performance and not ligand activity +performance. In order to this, you would need to change the expression +settings in the optimization functions such that they include your data, +and use the function `model_evaluation_optimization_application` instead +of `model_evaluation_optimization` (define this as the function +parameter in `makeMultiObjectiveFunction` shown here above). diff --git a/vignettes/png2jpg.zip b/vignettes/png2jpg.zip deleted file mode 100644 index aee2bf4..0000000 Binary files a/vignettes/png2jpg.zip and /dev/null differ diff --git a/vignettes/seurat_steps.Rmd b/vignettes/seurat_steps.Rmd index fd74fdd..9c9f044 100644 --- a/vignettes/seurat_steps.Rmd +++ b/vignettes/seurat_steps.Rmd @@ -23,7 +23,7 @@ knitr::opts_chunk$set( ) ``` -In this vignette, you can learn how to perform a basic NicheNet analysis on a Seurat v3 object. Such a NicheNet analysis can help you to generate hypotheses about an intercellular communication process of interest for which you have single-cell gene expression data as a Seurat object. Specifically, NicheNet can predict 1) which ligands from one or more cell population(s) ("sender/niche") are most likely to affect target gene expression in an interacting cell population ("receiver/target") and 2) which specific target genes are affected by which of these predicted ligands. +In this vignette, you can learn how to perform a basic NicheNet analysis on a Seurat v3/v4 object. Such a NicheNet analysis can help you to generate hypotheses about an intercellular communication process of interest for which you have single-cell gene expression data as a Seurat object. Specifically, NicheNet can predict 1) which ligands from one or more cell population(s) ("sender/niche") are most likely to affect target gene expression in an interacting cell population ("receiver/target") and 2) which specific target genes are affected by which of these predicted ligands. Because NicheNet studies how ligands affect gene expression in putatively neighboring/interacting cells, you need to have data about this effect in gene expression you want to study. So, there need to be 'some kind of' differential expression in a receiver cell population, caused by ligands from one of more interacting sender cell populations. @@ -34,7 +34,7 @@ As example expression data of interacting cells, we will use mouse NICHE-seq dat In this dataset, differential expression is observed between CD8 T cells in steady-state and CD8 T cells after LCMV infection. NicheNet can be applied to look at how several immune cell populations in the lymph node (i.e., monocytes, dendritic cells, NK cells, B cells, CD4 T cells) can regulate and induce these observed gene expression changes. NicheNet will specifically prioritize ligands from these immune cells and their target genes that change in expression upon LCMV infection. -The used NicheNet networks, ligand-target matrix and example expression data of interacting cells can be downloaded from Zenodo. The NicheNet networks and ligand-target matrix at [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3260758.svg)](https://doi.org/10.5281/zenodo.3260758) and the Seurat object of the processed NICHE-seq single-cell data at [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3531889.svg)](https://doi.org/10.5281/zenodo.3531889). +The used [ligand-target matrix](https://doi.org/10.5281/zenodo.7074290) and the [Seurat object of the processed NICHE-seq single-cell data](https://doi.org/10.5281/zenodo.3531889) can be downloaded from Zenodo. # Prepare NicheNet analysis @@ -54,7 +54,7 @@ If you would use and load other packages, we recommend to load these 3 packages ### Read in the expression data of interacting cells: -The dataset used here is publicly available single-cell data from immune cells in the T cell area of the inguinal lymph node. The data was processed and aggregated by applying the Seurat alignment pipeline. The Seurat object contains this aggregated data. Note that this should be a Seurat v3 object and that gene should be named by their official mouse/human gene symbol. +The dataset used here is publicly available single-cell data from immune cells in the T cell area of the inguinal lymph node. The data was processed and aggregated by applying the Seurat alignment pipeline. The Seurat object contains this aggregated data. Note that this should be a Seurat v3/v4 object and that gene should be named by their official mouse/human gene symbol. ```{r} seuratObj = readRDS(url("https://zenodo.org/record/3531889/files/seuratObj.rds")) @@ -77,30 +77,33 @@ DimPlot(seuratObj, reduction = "tsne", group.by = "aggregate") ### Read in NicheNet's ligand-target prior model, ligand-receptor network and weighted integrated networks: ```{r} -ligand_target_matrix = readRDS(url("https://zenodo.org/record/3260758/files/ligand_target_matrix.rds")) -ligand_target_matrix[1:5,1:5] # target genes in rows, ligands in columns -lr_network = readRDS(url("https://zenodo.org/record/3260758/files/lr_network.rds")) -head(lr_network) +organism = "mouse" -weighted_networks = readRDS(url("https://zenodo.org/record/3260758/files/weighted_networks.rds")) -weighted_networks_lr = weighted_networks$lr_sig %>% inner_join(lr_network %>% distinct(from,to), by = c("from","to")) +if(organism == "human"){ + lr_network = readRDS(url("https://zenodo.org/record/7074291/files/lr_network_human_21122021.rds")) + ligand_target_matrix = readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final.rds")) + weighted_networks = readRDS(url("https://zenodo.org/record/7074291/files/weighted_networks_nsga2r_final.rds")) +} else if(organism == "mouse"){ + lr_network = readRDS(url("https://zenodo.org/record/7074291/files/lr_network_mouse_21122021.rds")) + ligand_target_matrix = readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final_mouse.rds")) + weighted_networks = readRDS(url("https://zenodo.org/record/7074291/files/weighted_networks_nsga2r_final_mouse.rds")) +} + +lr_network = lr_network %>% distinct(from, to) +head(lr_network) +ligand_target_matrix[1:5,1:5] # target genes in rows, ligands in columns + +weighted_networks_lr = weighted_networks$lr_sig %>% inner_join(lr_network, by = c("from","to")) head(weighted_networks$lr_sig) # interactions and their weights in the ligand-receptor + signaling network head(weighted_networks$gr) # interactions and their weights in the gene regulatory network - ``` -Because the expression data is of mouse origin, we will convert the NicheNet network gene symbols from human to mouse based on one-to-one orthology: +If your expression data has the older gene symbols, you may want to use our alias conversion function to avoid the loss of gene names. ```{r} -lr_network = lr_network %>% mutate(from = convert_human_to_mouse_symbols(from), to = convert_human_to_mouse_symbols(to)) %>% drop_na() -colnames(ligand_target_matrix) = ligand_target_matrix %>% colnames() %>% convert_human_to_mouse_symbols() -rownames(ligand_target_matrix) = ligand_target_matrix %>% rownames() %>% convert_human_to_mouse_symbols() - -ligand_target_matrix = ligand_target_matrix %>% .[!is.na(rownames(ligand_target_matrix)), !is.na(colnames(ligand_target_matrix))] - -weighted_networks_lr = weighted_networks_lr %>% mutate(from = convert_human_to_mouse_symbols(from), to = convert_human_to_mouse_symbols(to)) %>% drop_na() +seuratObj = alias_to_symbol_seurat(seuratObj, "mouse") ``` # Perform the NicheNet analysis @@ -166,16 +169,16 @@ potential_ligands = lr_network %>% filter(from %in% expressed_ligands & to %in% ```{r} ligand_activities = predict_ligand_activities(geneset = geneset_oi, background_expressed_genes = background_expressed_genes, ligand_target_matrix = ligand_target_matrix, potential_ligands = potential_ligands) -ligand_activities = ligand_activities %>% arrange(-pearson) %>% mutate(rank = rank(desc(pearson))) +ligand_activities = ligand_activities %>% arrange(-aupr_corrected) %>% mutate(rank = rank(desc(aupr_corrected))) ligand_activities ``` -The different ligand activity measures (auroc, aupr, pearson correlation coefficient) are a measure for how well a ligand can predict the observed differentially expressed genes compared to the background of expressed genes. In our validation study, we showed that the pearson correlation coefficient between a ligand's target predictions and the observed transcriptional response was the most informative measure to define ligand activity. Therefore, NicheNet ranks the ligands based on their pearson correlation coefficient. This allows us to prioritize ligands inducing the antiviral response in CD8 T cells. +The different ligand activity measures (auroc, aupr, pearson correlation coefficient) are a measure for how well a ligand can predict the observed differentially expressed genes compared to the background of expressed genes. In our validation study, we showed that the area under the precision-recall curve (AUPR) between a ligand's target predictions and the observed transcriptional response was the most informative measure to define ligand activity (this was the Pearson correlation for v1). Therefore, NicheNet ranks the ligands based on their AUPR. This allows us to prioritize ligands inducing the antiviral response in CD8 T cells. -The number of top-ranked ligands that are further used to predict active target genes and construct an active ligand-receptor network is here 20. +The number of top-ranked ligands that are further used to predict active target genes and construct an active ligand-receptor network is here 30. ```{r} -best_upstream_ligands = ligand_activities %>% top_n(20, pearson) %>% arrange(-pearson) %>% pull(test_ligand) %>% unique() +best_upstream_ligands = ligand_activities %>% top_n(30, aupr_corrected) %>% arrange(-aupr_corrected) %>% pull(test_ligand) %>% unique() ``` These ligands are expressed by one or more of the input sender cells. To see which cell population expresses which of these top-ranked ligands, you can run the following: @@ -209,7 +212,7 @@ p_ligand_target_network = vis_ligand_target %>% make_heatmap_ggplot("Prioritized p_ligand_target_network ``` -Note that not all ligands from the top 20 are present in this ligand-target heatmap. The left-out ligands are ligands that don't have target genes with high enough regulatory potential scores. Therefore, they did not survive the used cutoffs. To include them, you can be less stringent in the used cutoffs. +Note that not all ligands from the top 30 are present in this ligand-target heatmap. The left-out ligands are ligands that don't have target genes with high enough regulatory potential scores. Therefore, they did not survive the used cutoffs. To include them, you can be less stringent in the used cutoffs. ### Receptors of top-ranked ligands @@ -243,41 +246,6 @@ p_ligand_receptor_network = vis_ligand_receptor_network %>% t() %>% make_heatmap p_ligand_receptor_network ``` -### Receptors of top-ranked ligands, but after considering only bona fide ligand-receptor interactions documented in literature and publicly available databases - -```{r} -lr_network_strict = lr_network %>% filter(database != "ppi_prediction_go" & database != "ppi_prediction") -ligands_bona_fide = lr_network_strict %>% pull(from) %>% unique() -receptors_bona_fide = lr_network_strict %>% pull(to) %>% unique() - -lr_network_top_df_large_strict = lr_network_top_df_large %>% distinct(from,to) %>% inner_join(lr_network_strict, by = c("from","to")) %>% distinct(from,to) -lr_network_top_df_large_strict = lr_network_top_df_large_strict %>% inner_join(lr_network_top_df_large, by = c("from","to")) - -lr_network_top_df_strict = lr_network_top_df_large_strict %>% spread("from","weight",fill = 0) -lr_network_top_matrix_strict = lr_network_top_df_strict %>% select(-to) %>% as.matrix() %>% magrittr::set_rownames(lr_network_top_df_strict$to) - -dist_receptors = dist(lr_network_top_matrix_strict, method = "binary") -hclust_receptors = hclust(dist_receptors, method = "ward.D2") -order_receptors = hclust_receptors$labels[hclust_receptors$order] - -dist_ligands = dist(lr_network_top_matrix_strict %>% t(), method = "binary") -hclust_ligands = hclust(dist_ligands, method = "ward.D2") -order_ligands_receptor = hclust_ligands$labels[hclust_ligands$order] - -order_receptors = order_receptors %>% intersect(rownames(lr_network_top_matrix_strict)) -order_ligands_receptor = order_ligands_receptor %>% intersect(colnames(lr_network_top_matrix_strict)) - -vis_ligand_receptor_network_strict = lr_network_top_matrix_strict[order_receptors, order_ligands_receptor] -rownames(vis_ligand_receptor_network_strict) = order_receptors %>% make.names() -colnames(vis_ligand_receptor_network_strict) = order_ligands_receptor %>% make.names() - -``` - -```{r} -p_ligand_receptor_network_strict = vis_ligand_receptor_network_strict %>% t() %>% make_heatmap_ggplot("Ligands","Receptors", color = "mediumvioletred", x_axis_position = "top",legend_title = "Prior interaction potential\n(bona fide)") -p_ligand_receptor_network_strict -``` - ## 6) Add log fold change information of ligands from sender cells In some cases, it might be possible to also check upregulation of ligands in sender cells. This can add a useful extra layer of information next to the ligand activities defined by NicheNet, because you can assume that some of the ligands inducing DE in receiver cells, will be DE themselves in the sender cells. @@ -318,13 +286,13 @@ For example, you can make a combined heatmap of ligand activities, ligand expres ```{r} # ligand activity heatmap -ligand_pearson_matrix = ligand_activities %>% select(pearson) %>% as.matrix() %>% magrittr::set_rownames(ligand_activities$test_ligand) +ligand_aupr_matrix = ligand_activities %>% select(aupr_corrected) %>% as.matrix() %>% magrittr::set_rownames(ligand_activities$test_ligand) -rownames(ligand_pearson_matrix) = rownames(ligand_pearson_matrix) %>% make.names() -colnames(ligand_pearson_matrix) = colnames(ligand_pearson_matrix) %>% make.names() +rownames(ligand_aupr_matrix) = rownames(ligand_aupr_matrix) %>% make.names() +colnames(ligand_aupr_matrix) = colnames(ligand_aupr_matrix) %>% make.names() -vis_ligand_pearson = ligand_pearson_matrix[order_ligands, ] %>% as.matrix(ncol = 1) %>% magrittr::set_colnames("Pearson") -p_ligand_pearson = vis_ligand_pearson %>% make_heatmap_ggplot("Prioritized ligands","Ligand activity", color = "darkorange",legend_position = "top", x_axis_position = "top", legend_title = "Pearson correlation coefficient\ntarget gene prediction ability)") + theme(legend.text = element_text(size = 9)) +vis_ligand_aupr = ligand_aupr_matrix[order_ligands, ] %>% as.matrix(ncol = 1) %>% magrittr::set_colnames("AUPR") +p_ligand_aupr = vis_ligand_aupr %>% make_heatmap_ggplot("Prioritized ligands","Ligand activity", color = "darkorange",legend_position = "top", x_axis_position = "top", legend_title = "AUPR\n(target gene prediction ability)") + theme(legend.text = element_text(size = 9)) ``` @@ -339,16 +307,16 @@ rotated_dotplot = DotPlot(seuratObj %>% subset(celltype %in% sender_celltypes), ```{r, fig.width=12, fig.height=10} figures_without_legend = cowplot::plot_grid( - p_ligand_pearson + theme(legend.position = "none", axis.ticks = element_blank()) + theme(axis.title.x = element_text()), + p_ligand_aupr + theme(legend.position = "none", axis.ticks = element_blank()) + theme(axis.title.x = element_text()), rotated_dotplot + theme(legend.position = "none", axis.ticks = element_blank(), axis.title.x = element_text(size = 12), axis.text.y = element_text(face = "italic", size = 9), axis.text.x = element_text(size = 9, angle = 90,hjust = 0)) + ylab("Expression in Sender") + xlab("") + scale_y_discrete(position = "right"), p_ligand_lfc + theme(legend.position = "none", axis.ticks = element_blank()) + theme(axis.title.x = element_text()) + ylab(""), p_ligand_target_network + theme(legend.position = "none", axis.ticks = element_blank()) + ylab(""), align = "hv", nrow = 1, - rel_widths = c(ncol(vis_ligand_pearson)+6, ncol(vis_ligand_lfc) + 7, ncol(vis_ligand_lfc) + 8, ncol(vis_ligand_target))) + rel_widths = c(ncol(vis_ligand_aupr)+6, ncol(vis_ligand_lfc) + 7, ncol(vis_ligand_lfc) + 8, ncol(vis_ligand_target))) legends = cowplot::plot_grid( - ggpubr::as_ggplot(ggpubr::get_legend(p_ligand_pearson)), + ggpubr::as_ggplot(ggpubr::get_legend(p_ligand_aupr)), ggpubr::as_ggplot(ggpubr::get_legend(rotated_dotplot)), ggpubr::as_ggplot(ggpubr::get_legend(p_ligand_lfc)), ggpubr::as_ggplot(ggpubr::get_legend(p_ligand_target_network)), @@ -362,7 +330,7 @@ combined_plot # Remarks -1. Top-ranked ligands and target genes shown here differ from the predictions shown in the respective case study in the NicheNet paper because a different definition of expressed genes was used. +Top-ranked ligands and target genes shown here differ from the predictions shown in the respective case study in the NicheNet paper because 1) a different definition of expressed genes was used, and 2) we have updated the ligand-target matrix to include more data sources. # References diff --git a/vignettes/seurat_steps.md b/vignettes/seurat_steps.md index d32ecbb..5c55653 100644 --- a/vignettes/seurat_steps.md +++ b/vignettes/seurat_steps.md @@ -9,14 +9,14 @@ rmarkdown::render("vignettes/seurat_steps.Rmd", output_format = "github_document --> In this vignette, you can learn how to perform a basic NicheNet analysis -on a Seurat v3 object. Such a NicheNet analysis can help you to generate -hypotheses about an intercellular communication process of interest for -which you have single-cell gene expression data as a Seurat object. -Specifically, NicheNet can predict 1) which ligands from one or more -cell population(s) (“sender/niche”) are most likely to affect target -gene expression in an interacting cell population (“receiver/target”) -and 2) which specific target genes are affected by which of these -predicted ligands. +on a Seurat v3/v4 object. Such a NicheNet analysis can help you to +generate hypotheses about an intercellular communication process of +interest for which you have single-cell gene expression data as a Seurat +object. Specifically, NicheNet can predict 1) which ligands from one or +more cell population(s) (“sender/niche”) are most likely to affect +target gene expression in an interacting cell population +(“receiver/target”) and 2) which specific target genes are affected by +which of these predicted ligands. Because NicheNet studies how ligands affect gene expression in putatively neighboring/interacting cells, you need to have data about @@ -54,12 +54,10 @@ regulate and induce these observed gene expression changes. NicheNet will specifically prioritize ligands from these immune cells and their target genes that change in expression upon LCMV infection. -The used NicheNet networks, ligand-target matrix and example expression -data of interacting cells can be downloaded from Zenodo. The NicheNet -networks and ligand-target matrix at -[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3260758.svg)](https://doi.org/10.5281/zenodo.3260758) -and the Seurat object of the processed NICHE-seq single-cell data at -[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3531889.svg)](https://doi.org/10.5281/zenodo.3531889). +The used [ligand-target matrix](https://doi.org/10.5281/zenodo.7074290) +and the [Seurat object of the processed NICHE-seq single-cell +data](https://doi.org/10.5281/zenodo.3531889) can be downloaded from +Zenodo. # Prepare NicheNet analysis @@ -91,7 +89,7 @@ The dataset used here is publicly available single-cell data from immune cells in the T cell area of the inguinal lymph node. The data was processed and aggregated by applying the Seurat alignment pipeline. The Seurat object contains this aggregated data. Note that this should be a -Seurat v3 object and that gene should be named by their official +Seurat v3/v4 object and that gene should be named by their official mouse/human gene symbol. ``` r @@ -118,11 +116,11 @@ seuratObj@meta.data$celltype %>% table() # note that the number of cells of some DimPlot(seuratObj, reduction = "tsne") ``` -![](seurat_steps_files/figure-gfm/unnamed-chunk-23-1.png) +![](seurat_steps_files/figure-gfm/unnamed-chunk-3-1.png) Visualize the data to see to which condition cells belong. The metadata dataframe column that denotes the condition (steady-state or after LCMV -infection) is here called ‘aggregate.’ +infection) is here called ‘aggregate’. ``` r seuratObj@meta.data$aggregate %>% table() @@ -132,69 +130,72 @@ seuratObj@meta.data$aggregate %>% table() DimPlot(seuratObj, reduction = "tsne", group.by = "aggregate") ``` -![](seurat_steps_files/figure-gfm/unnamed-chunk-24-1.png) +![](seurat_steps_files/figure-gfm/unnamed-chunk-4-1.png) ### Read in NicheNet’s ligand-target prior model, ligand-receptor network and weighted integrated networks: ``` r -ligand_target_matrix = readRDS(url("https://zenodo.org/record/3260758/files/ligand_target_matrix.rds")) -ligand_target_matrix[1:5,1:5] # target genes in rows, ligands in columns -## CXCL1 CXCL2 CXCL3 CXCL5 PPBP -## A1BG 3.534343e-04 4.041324e-04 3.729920e-04 3.080640e-04 2.628388e-04 -## A1BG-AS1 1.650894e-04 1.509213e-04 1.583594e-04 1.317253e-04 1.231819e-04 -## A1CF 5.787175e-04 4.596295e-04 3.895907e-04 3.293275e-04 3.211944e-04 -## A2M 6.027058e-04 5.996617e-04 5.164365e-04 4.517236e-04 4.590521e-04 -## A2M-AS1 8.898724e-05 8.243341e-05 7.484018e-05 4.912514e-05 5.120439e-05 - -lr_network = readRDS(url("https://zenodo.org/record/3260758/files/lr_network.rds")) -head(lr_network) -## # A tibble: 6 x 4 -## from to source database -## -## 1 CXCL1 CXCR2 kegg_cytokines kegg -## 2 CXCL2 CXCR2 kegg_cytokines kegg -## 3 CXCL3 CXCR2 kegg_cytokines kegg -## 4 CXCL5 CXCR2 kegg_cytokines kegg -## 5 PPBP CXCR2 kegg_cytokines kegg -## 6 CXCL6 CXCR2 kegg_cytokines kegg - -weighted_networks = readRDS(url("https://zenodo.org/record/3260758/files/weighted_networks.rds")) -weighted_networks_lr = weighted_networks$lr_sig %>% inner_join(lr_network %>% distinct(from,to), by = c("from","to")) +organism = "mouse" + +if(organism == "human"){ + lr_network = readRDS(url("https://zenodo.org/record/7074291/files/lr_network_human_21122021.rds")) + ligand_target_matrix = readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final.rds")) + weighted_networks = readRDS(url("https://zenodo.org/record/7074291/files/weighted_networks_nsga2r_final.rds")) +} else if(organism == "mouse"){ + lr_network = readRDS(url("https://zenodo.org/record/7074291/files/lr_network_mouse_21122021.rds")) + ligand_target_matrix = readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final_mouse.rds")) + weighted_networks = readRDS(url("https://zenodo.org/record/7074291/files/weighted_networks_nsga2r_final_mouse.rds")) + +} + +lr_network = lr_network %>% distinct(from, to) +head(lr_network) +## # A tibble: 6 × 2 +## from to +## +## 1 2300002M23Rik Ddr1 +## 2 2610528A11Rik Gpr15 +## 3 9530003J23Rik Itgal +## 4 a Atrn +## 5 a F11r +## 6 a Mc1r +ligand_target_matrix[1:5,1:5] # target genes in rows, ligands in columns +## 2300002M23Rik 2610528A11Rik 9530003J23Rik a A2m +## 0610005C13Rik 0.000000e+00 0.000000e+00 1.311297e-05 0.000000e+00 1.390053e-05 +## 0610009B22Rik 0.000000e+00 0.000000e+00 1.269301e-05 0.000000e+00 1.345536e-05 +## 0610009L18Rik 8.872902e-05 4.977197e-05 2.581909e-04 7.570125e-05 9.802264e-05 +## 0610010F05Rik 2.194046e-03 1.111556e-03 3.142374e-03 1.631658e-03 2.585820e-03 +## 0610010K14Rik 2.271606e-03 9.360769e-04 3.546140e-03 1.697713e-03 2.632082e-03 + +weighted_networks_lr = weighted_networks$lr_sig %>% inner_join(lr_network, by = c("from","to")) head(weighted_networks$lr_sig) # interactions and their weights in the ligand-receptor + signaling network -## # A tibble: 6 x 3 -## from to weight -## -## 1 A1BG ABCC6 0.422 -## 2 A1BG ACE2 0.101 -## 3 A1BG ADAM10 0.0970 -## 4 A1BG AGO1 0.0525 -## 5 A1BG AKT1 0.0855 -## 6 A1BG ANXA7 0.457 +## # A tibble: 6 × 3 +## from to weight +## +## 1 0610010F05Rik App 0.110 +## 2 0610010F05Rik Cat 0.0673 +## 3 0610010F05Rik H1f2 0.0660 +## 4 0610010F05Rik Lrrc49 0.0829 +## 5 0610010F05Rik Nicn1 0.0864 +## 6 0610010F05Rik Srpk1 0.123 head(weighted_networks$gr) # interactions and their weights in the gene regulatory network -## # A tibble: 6 x 3 -## from to weight -## -## 1 A1BG A2M 0.0294 -## 2 AAAS GFAP 0.0290 -## 3 AADAC CYP3A4 0.0422 -## 4 AADAC IRF8 0.0275 -## 5 AATF ATM 0.0330 -## 6 AATF ATR 0.0355 +## # A tibble: 6 × 3 +## from to weight +## +## 1 0610010K14Rik 0610010K14Rik 0.121 +## 2 0610010K14Rik 2510039O18Rik 0.121 +## 3 0610010K14Rik 2610021A01Rik 0.0256 +## 4 0610010K14Rik 9130401M01Rik 0.0263 +## 5 0610010K14Rik Alg1 0.127 +## 6 0610010K14Rik Alox12 0.128 ``` -Because the expression data is of mouse origin, we will convert the -NicheNet network gene symbols from human to mouse based on one-to-one -orthology: +If your expression data has the older gene symbols, you may want to use +our alias conversion function to avoid the loss of gene names. ``` r -lr_network = lr_network %>% mutate(from = convert_human_to_mouse_symbols(from), to = convert_human_to_mouse_symbols(to)) %>% drop_na() -colnames(ligand_target_matrix) = ligand_target_matrix %>% colnames() %>% convert_human_to_mouse_symbols() -rownames(ligand_target_matrix) = ligand_target_matrix %>% rownames() %>% convert_human_to_mouse_symbols() - -ligand_target_matrix = ligand_target_matrix %>% .[!is.na(rownames(ligand_target_matrix)), !is.na(colnames(ligand_target_matrix))] - -weighted_networks_lr = weighted_networks_lr %>% mutate(from = convert_human_to_mouse_symbols(from), to = convert_human_to_mouse_symbols(to)) %>% drop_na() +seuratObj = alias_to_symbol_seurat(seuratObj, "mouse") ``` # Perform the NicheNet analysis @@ -210,8 +211,8 @@ analysis consist of the following steps: ## 1. Define a “sender/niche” cell population and a “receiver/target” cell population present in your expression data and determine which genes are expressed in both populations In this case study, the receiver cell population is the ‘CD8 T’ cell -population, whereas the sender cell populations are ‘CD4 T,’ ‘Treg,’ -‘Mono,’ ‘NK,’ ‘B’ and ‘DC.’ We will consider a gene to be expressed when +population, whereas the sender cell populations are ‘CD4 T’, ‘Treg’, +‘Mono’, ‘NK’, ‘B’ and ‘DC’. We will consider a gene to be expressed when it is expressed in at least 10% of cells in one cluster. ``` r @@ -234,8 +235,8 @@ expressed_genes_sender = list_expressed_genes_sender %>% unlist() %>% unique() Here, the gene set of interest are the genes differentially expressed in CD8 T cells after LCMV infection. The condition of interest is thus -‘LCMV,’ whereas the reference/steady-state condition is ‘SS.’ The notion -of conditions can be extracted from the metadata column ‘aggregate.’ The +‘LCMV’, whereas the reference/steady-state condition is ‘SS’. The notion +of conditions can be extracted from the metadata column ‘aggregate’. The method to calculate the differential expression is here the standard Seurat Wilcoxon test, but this can be changed if necessary. @@ -275,39 +276,40 @@ potential_ligands = lr_network %>% filter(from %in% expressed_ligands & to %in% ``` r ligand_activities = predict_ligand_activities(geneset = geneset_oi, background_expressed_genes = background_expressed_genes, ligand_target_matrix = ligand_target_matrix, potential_ligands = potential_ligands) -ligand_activities = ligand_activities %>% arrange(-pearson) %>% mutate(rank = rank(desc(pearson))) +ligand_activities = ligand_activities %>% arrange(-aupr_corrected) %>% mutate(rank = rank(desc(aupr_corrected))) ligand_activities -## # A tibble: 44 x 5 -## test_ligand auroc aupr pearson rank -## -## 1 Ebi3 0.638 0.234 0.197 1 -## 2 Il15 0.582 0.163 0.0961 2 -## 3 Crlf2 0.549 0.163 0.0758 3 -## 4 App 0.499 0.141 0.0655 4 -## 5 Tgfb1 0.494 0.140 0.0558 5 -## 6 Ptprc 0.536 0.149 0.0554 6 -## 7 H2-M3 0.525 0.157 0.0528 7 -## 8 Icam1 0.543 0.142 0.0486 8 -## 9 Cxcl10 0.531 0.141 0.0408 9 -## 10 Adam17 0.517 0.137 0.0359 10 -## # ... with 34 more rows +## # A tibble: 73 × 6 +## test_ligand auroc aupr aupr_corrected pearson rank +## +## 1 Ebi3 0.663 0.390 0.244 0.301 1 +## 2 Ptprc 0.642 0.310 0.165 0.167 2 +## 3 H2-M3 0.608 0.292 0.146 0.179 3 +## 4 H2-M2 0.611 0.279 0.133 0.153 5 +## 5 H2-T10 0.611 0.279 0.133 0.153 5 +## 6 H2-T22 0.611 0.279 0.133 0.153 5 +## 7 H2-T23 0.611 0.278 0.132 0.153 7 +## 8 H2-K1 0.605 0.268 0.122 0.142 8 +## 9 H2-Q4 0.605 0.268 0.122 0.141 10 +## 10 H2-Q6 0.605 0.268 0.122 0.141 10 +## # … with 63 more rows ``` The different ligand activity measures (auroc, aupr, pearson correlation coefficient) are a measure for how well a ligand can predict the observed differentially expressed genes compared to the background of -expressed genes. In our validation study, we showed that the pearson -correlation coefficient between a ligand’s target predictions and the -observed transcriptional response was the most informative measure to -define ligand activity. Therefore, NicheNet ranks the ligands based on -their pearson correlation coefficient. This allows us to prioritize -ligands inducing the antiviral response in CD8 T cells. +expressed genes. In our validation study, we showed that the area under +the precision-recall curve (AUPR) between a ligand’s target predictions +and the observed transcriptional response was the most informative +measure to define ligand activity (this was the Pearson correlation for +v1). Therefore, NicheNet ranks the ligands based on their AUPR. This +allows us to prioritize ligands inducing the antiviral response in CD8 T +cells. The number of top-ranked ligands that are further used to predict active -target genes and construct an active ligand-receptor network is here 20. +target genes and construct an active ligand-receptor network is here 30. ``` r -best_upstream_ligands = ligand_activities %>% top_n(20, pearson) %>% arrange(-pearson) %>% pull(test_ligand) %>% unique() +best_upstream_ligands = ligand_activities %>% top_n(30, aupr_corrected) %>% arrange(-aupr_corrected) %>% pull(test_ligand) %>% unique() ``` These ligands are expressed by one or more of the input sender cells. To @@ -318,7 +320,7 @@ you can run the following: DotPlot(seuratObj, features = best_upstream_ligands %>% rev(), cols = "RdYlBu") + RotatedAxis() ``` -![](seurat_steps_files/figure-gfm/unnamed-chunk-33-1.png) +![](seurat_steps_files/figure-gfm/unnamed-chunk-13-1.png) As you can see, most op the top-ranked ligands seem to be mainly expressed by dendritic cells and monocytes. @@ -345,9 +347,9 @@ p_ligand_target_network = vis_ligand_target %>% make_heatmap_ggplot("Prioritized p_ligand_target_network ``` -![](seurat_steps_files/figure-gfm/unnamed-chunk-35-1.png) +![](seurat_steps_files/figure-gfm/unnamed-chunk-15-1.png) -Note that not all ligands from the top 20 are present in this +Note that not all ligands from the top 30 are present in this ligand-target heatmap. The left-out ligands are ligands that don’t have target genes with high enough regulatory potential scores. Therefore, they did not survive the used cutoffs. To include them, you can be less @@ -385,43 +387,7 @@ p_ligand_receptor_network = vis_ligand_receptor_network %>% t() %>% make_heatmap p_ligand_receptor_network ``` -![](seurat_steps_files/figure-gfm/unnamed-chunk-37-1.png) - -### Receptors of top-ranked ligands, but after considering only bona fide ligand-receptor interactions documented in literature and publicly available databases - -``` r -lr_network_strict = lr_network %>% filter(database != "ppi_prediction_go" & database != "ppi_prediction") -ligands_bona_fide = lr_network_strict %>% pull(from) %>% unique() -receptors_bona_fide = lr_network_strict %>% pull(to) %>% unique() - -lr_network_top_df_large_strict = lr_network_top_df_large %>% distinct(from,to) %>% inner_join(lr_network_strict, by = c("from","to")) %>% distinct(from,to) -lr_network_top_df_large_strict = lr_network_top_df_large_strict %>% inner_join(lr_network_top_df_large, by = c("from","to")) - -lr_network_top_df_strict = lr_network_top_df_large_strict %>% spread("from","weight",fill = 0) -lr_network_top_matrix_strict = lr_network_top_df_strict %>% select(-to) %>% as.matrix() %>% magrittr::set_rownames(lr_network_top_df_strict$to) - -dist_receptors = dist(lr_network_top_matrix_strict, method = "binary") -hclust_receptors = hclust(dist_receptors, method = "ward.D2") -order_receptors = hclust_receptors$labels[hclust_receptors$order] - -dist_ligands = dist(lr_network_top_matrix_strict %>% t(), method = "binary") -hclust_ligands = hclust(dist_ligands, method = "ward.D2") -order_ligands_receptor = hclust_ligands$labels[hclust_ligands$order] - -order_receptors = order_receptors %>% intersect(rownames(lr_network_top_matrix_strict)) -order_ligands_receptor = order_ligands_receptor %>% intersect(colnames(lr_network_top_matrix_strict)) - -vis_ligand_receptor_network_strict = lr_network_top_matrix_strict[order_receptors, order_ligands_receptor] -rownames(vis_ligand_receptor_network_strict) = order_receptors %>% make.names() -colnames(vis_ligand_receptor_network_strict) = order_ligands_receptor %>% make.names() -``` - -``` r -p_ligand_receptor_network_strict = vis_ligand_receptor_network_strict %>% t() %>% make_heatmap_ggplot("Ligands","Receptors", color = "mediumvioletred", x_axis_position = "top",legend_title = "Prior interaction potential\n(bona fide)") -p_ligand_receptor_network_strict -``` - -![](seurat_steps_files/figure-gfm/unnamed-chunk-39-1.png) +![](seurat_steps_files/figure-gfm/unnamed-chunk-17-1.png) ## 6) Add log fold change information of ligands from sender cells @@ -458,15 +424,16 @@ p_ligand_lfc = vis_ligand_lfc %>% make_threecolor_heatmap_ggplot("Prioritized li p_ligand_lfc ``` -![](seurat_steps_files/figure-gfm/unnamed-chunk-40-1.png) +![](seurat_steps_files/figure-gfm/unnamed-chunk-18-1.png) ``` r + # change colors a bit to make them more stand out p_ligand_lfc = p_ligand_lfc + scale_fill_gradientn(colors = c("midnightblue","blue", "grey95", "grey99","firebrick1","red"),values = c(0,0.1,0.2,0.25, 0.40, 0.7,1), limits = c(vis_ligand_lfc %>% min() - 0.1, vis_ligand_lfc %>% max() + 0.1)) p_ligand_lfc ``` -![](seurat_steps_files/figure-gfm/unnamed-chunk-40-2.png) +![](seurat_steps_files/figure-gfm/unnamed-chunk-18-2.png) ## 7) Summary visualizations of the NicheNet analysis @@ -478,13 +445,13 @@ for expression. ``` r # ligand activity heatmap -ligand_pearson_matrix = ligand_activities %>% select(pearson) %>% as.matrix() %>% magrittr::set_rownames(ligand_activities$test_ligand) +ligand_aupr_matrix = ligand_activities %>% select(aupr_corrected) %>% as.matrix() %>% magrittr::set_rownames(ligand_activities$test_ligand) -rownames(ligand_pearson_matrix) = rownames(ligand_pearson_matrix) %>% make.names() -colnames(ligand_pearson_matrix) = colnames(ligand_pearson_matrix) %>% make.names() +rownames(ligand_aupr_matrix) = rownames(ligand_aupr_matrix) %>% make.names() +colnames(ligand_aupr_matrix) = colnames(ligand_aupr_matrix) %>% make.names() -vis_ligand_pearson = ligand_pearson_matrix[order_ligands, ] %>% as.matrix(ncol = 1) %>% magrittr::set_colnames("Pearson") -p_ligand_pearson = vis_ligand_pearson %>% make_heatmap_ggplot("Prioritized ligands","Ligand activity", color = "darkorange",legend_position = "top", x_axis_position = "top", legend_title = "Pearson correlation coefficient\ntarget gene prediction ability)") + theme(legend.text = element_text(size = 9)) +vis_ligand_aupr = ligand_aupr_matrix[order_ligands, ] %>% as.matrix(ncol = 1) %>% magrittr::set_colnames("AUPR") +p_ligand_aupr = vis_ligand_aupr %>% make_heatmap_ggplot("Prioritized ligands","Ligand activity", color = "darkorange",legend_position = "top", x_axis_position = "top", legend_title = "AUPR\n(target gene prediction ability)") + theme(legend.text = element_text(size = 9)) ``` ``` r @@ -496,17 +463,18 @@ rotated_dotplot = DotPlot(seuratObj %>% subset(celltype %in% sender_celltypes), ``` ``` r + figures_without_legend = cowplot::plot_grid( - p_ligand_pearson + theme(legend.position = "none", axis.ticks = element_blank()) + theme(axis.title.x = element_text()), + p_ligand_aupr + theme(legend.position = "none", axis.ticks = element_blank()) + theme(axis.title.x = element_text()), rotated_dotplot + theme(legend.position = "none", axis.ticks = element_blank(), axis.title.x = element_text(size = 12), axis.text.y = element_text(face = "italic", size = 9), axis.text.x = element_text(size = 9, angle = 90,hjust = 0)) + ylab("Expression in Sender") + xlab("") + scale_y_discrete(position = "right"), p_ligand_lfc + theme(legend.position = "none", axis.ticks = element_blank()) + theme(axis.title.x = element_text()) + ylab(""), p_ligand_target_network + theme(legend.position = "none", axis.ticks = element_blank()) + ylab(""), align = "hv", nrow = 1, - rel_widths = c(ncol(vis_ligand_pearson)+6, ncol(vis_ligand_lfc) + 7, ncol(vis_ligand_lfc) + 8, ncol(vis_ligand_target))) + rel_widths = c(ncol(vis_ligand_aupr)+6, ncol(vis_ligand_lfc) + 7, ncol(vis_ligand_lfc) + 8, ncol(vis_ligand_target))) legends = cowplot::plot_grid( - ggpubr::as_ggplot(ggpubr::get_legend(p_ligand_pearson)), + ggpubr::as_ggplot(ggpubr::get_legend(p_ligand_aupr)), ggpubr::as_ggplot(ggpubr::get_legend(rotated_dotplot)), ggpubr::as_ggplot(ggpubr::get_legend(p_ligand_lfc)), ggpubr::as_ggplot(ggpubr::get_legend(p_ligand_target_network)), @@ -517,13 +485,14 @@ combined_plot = cowplot::plot_grid(figures_without_legend, legends, rel_heights combined_plot ``` -![](seurat_steps_files/figure-gfm/unnamed-chunk-43-1.png) +![](seurat_steps_files/figure-gfm/unnamed-chunk-21-1.png) # Remarks -1. Top-ranked ligands and target genes shown here differ from the - predictions shown in the respective case study in the NicheNet paper - because a different definition of expressed genes was used. +Top-ranked ligands and target genes shown here differ from the +predictions shown in the respective case study in the NicheNet paper +because 1) a different definition of expressed genes was used, and 2) we +have updated the ligand-target matrix to include more data sources. # References diff --git a/vignettes/seurat_steps_TF_activity.Rmd b/vignettes/seurat_steps_TF_activity.Rmd deleted file mode 100644 index 06854e4..0000000 --- a/vignettes/seurat_steps_TF_activity.Rmd +++ /dev/null @@ -1,238 +0,0 @@ ---- -title: "Perform NicheNet-TF activity analysis starting from a Seurat object: step-by-step analysis" -author: "Robin Browaeys" -date: "2021-03-31" -output: rmarkdown::html_vignette -vignette: > - %\VignetteIndexEntry{Perform NicheNet-TF activity analysis starting from a Seurat object: step-by-step analysis} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} -bibliography: library.bib ---- - - - -```{r setup, include = FALSE} -knitr::opts_chunk$set( - collapse = TRUE, - # comment = "#>", - warning = FALSE, - message = FALSE -) -``` - -In this vignette, you can learn how to perform a TF activity analysis within the NicheNet framework on a Seurat object. -This is very similar to NicheNet's ligand activity, except that we will now predict activities of TFs in the receiver cell type of interest itself. This is very similar because we can just use the NicheNet TF-target matrix instead of the ligand-target matrix. - -It is very important to know that the TFs within the TF-target model of NicheNet are not only 'strict sense bona fide' TFs - they should more be considered as regulators than as TFs (eg in this matrix some signaling molecules are also considered as regulators even though they are not bona fide TFs). - -In this vignette, we demonstrate the use of NicheNet for TF activity analysis on a Seurat Object. - -As example expression data, we will use mouse NICHE-seq data from Medaglia et al. to explore intercellular communication in the T cell area in the inguinal lymph node before and 72 hours after lymphocytic choriomeningitis virus (LCMV) infection [See @medaglia_spatial_2017]. We will use NicheNet to predict TF activity in response to this LCMV infection - based on differential expression between CD8 T cells in steady-state and CD8 T cells after LCMV infection. - -The used NicheNet networks, tf-target matrix and example expression data of interacting cells can be downloaded from Zenodo. The NicheNet networks and tf-target matrix at [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3260758.svg)](https://doi.org/10.5281/zenodo.3260758) and the Seurat object of the processed NICHE-seq single-cell data at [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3531889.svg)](https://doi.org/10.5281/zenodo.3531889). - -# Prepare NicheNet analysis - -## Load required packages, read in the Seurat object with processed expression data and NicheNet's and TF-target prior model. - -The NicheNet TF-target matrix denotes the prior potential that particular tfs might regulate the expression of particular target genes. This matrix is necessary to prioritize possible TFs based on observed gene expression effects (i.e. NicheNet's TF activity analysis) and infer affected target genes of these prioritized TFs. - -### Load Packages: - -```{r} -library(nichenetr) -library(Seurat) # please update to Seurat V4 -library(tidyverse) -``` - -If you would use and load other packages, we recommend to load these 3 packages after the others. - -### Read in the expression data of interacting cells: - -The dataset used here is publicly available single-cell data from immune cells in the T cell area of the inguinal lymph node. The data was processed and aggregated by applying the Seurat alignment pipeline. The Seurat object contains this aggregated data. Note that this should be a Seurat v3 object and that gene should be named by their official mouse/human gene symbol. - -```{r} -seuratObj = readRDS(url("https://zenodo.org/record/3531889/files/seuratObj.rds")) -seuratObj@meta.data %>% head() -``` - -Visualize which cell populations are present: CD4 T cells (including regulatory T cells), CD8 T cells, B cells, NK cells, dendritic cells (DCs) and inflammatory monocytes -```{r} -seuratObj@meta.data$celltype %>% table() # note that the number of cells of some cell types is very low and should preferably be higher for a real application -DimPlot(seuratObj, reduction = "tsne") -``` - -Visualize the data to see to which condition cells belong. The metadata dataframe column that denotes the condition (steady-state or after LCMV infection) is here called 'aggregate'. - -```{r} -seuratObj@meta.data$aggregate %>% table() -DimPlot(seuratObj, reduction = "tsne", group.by = "aggregate") -``` - -### Read in NicheNet's tf-target prior model: - -```{r} -tf_target_matrix = readRDS(url("https://zenodo.org/record/3260758/files/tf_target_matrix.rds")) - -tf_target_matrix[1:5,1:5] # target genes in rows, tfs in columns -``` - -Because the expression data is of mouse origin, we will convert the NicheNet network gene symbols from human to mouse based on one-to-one orthology: - -```{r} -colnames(tf_target_matrix) = tf_target_matrix %>% colnames() %>% convert_human_to_mouse_symbols() -rownames(tf_target_matrix) = tf_target_matrix %>% rownames() %>% convert_human_to_mouse_symbols() - -tf_target_matrix = tf_target_matrix %>% .[!is.na(rownames(tf_target_matrix)), !is.na(colnames(tf_target_matrix))] - -``` - -You can check top target genes of TFs (based on prior information) as follows: - -eg 25 top targets of Srebf2: - -```{r} - -extract_top_n_targets("Srebf2", 25, tf_target_matrix) - -``` - -# Perform the NicheNet analysis - -In this case study, we want to apply NicheNet to predict which TFs could have induced the differential expression in CD8 T cells after LCMV infection (and are thus active). - -The pipeline of a TF activity analysis consist of the following steps: - -## 1. Define a “receiver/target” cell population present in your expression data and determine which genes are expressed in this population - -In this case study, the receiver cell population is the 'CD8 T' cell population -We will consider a gene to be expressed when it is expressed in at least 10% of cells in one cluster. This can also be lower, such as 5%. - -```{r} -## receiver -receiver = "CD8 T" -expressed_genes_receiver = get_expressed_genes(receiver, seuratObj, pct = 0.10) # can be changed to 0.05 - -background_expressed_genes = expressed_genes_receiver %>% .[. %in% rownames(tf_target_matrix)] -``` - -## 2. Define a gene set of interest: these are the genes in the “receiver/target” cell population that are regulated by differentially active TFs - -Here, the gene set of interest are the genes differentially expressed in CD8 T cells after LCMV infection. The condition of interest is thus 'LCMV', whereas the reference/steady-state condition is 'SS'. The notion of conditions can be extracted from the metadata column 'aggregate'. The method to calculate the differential expression is here the standard Seurat Wilcoxon test, but this can be changed if necessary. - -```{r} -seurat_obj_receiver= subset(seuratObj, idents = receiver) -seurat_obj_receiver = SetIdent(seurat_obj_receiver, value = seurat_obj_receiver[["aggregate"]]) - -condition_oi = "LCMV" -condition_reference = "SS" - -DE_table_receiver = FindMarkers(object = seurat_obj_receiver, ident.1 = condition_oi, ident.2 = condition_reference, min.pct = 0.10) %>% rownames_to_column("gene") - -geneset_oi = DE_table_receiver %>% filter(p_val_adj <= 0.05 & abs(avg_log2FC) >= 0.25) %>% pull(gene) -geneset_oi = geneset_oi %>% .[. %in% rownames(tf_target_matrix)] -``` - -## 3. Define a set of potential tfs: these are tfs that are expressed by by the “receiver/target” population - -Note: expression levels of some TFs is very low, so it might be interesting to rerun the analysis with all TFs in the database, and not only with the expressed ones! - -```{r} -tfs = colnames(tf_target_matrix) - -expressed_tfs = intersect(tfs,expressed_genes_receiver) - -potential_tfs = expressed_tfs -``` - -## 4) Perform NicheNet tf activity analysis: rank the potential tfs based on the presence of their target genes in the gene set of interest (compared to the background set of genes) - -We just use the `predict_ligand_activities` as for classic NicheNet analyses, but now with the TF-target matrix as input. - -```{r} -tf_activities = predict_ligand_activities(geneset = geneset_oi, background_expressed_genes = background_expressed_genes, ligand_target_matrix = tf_target_matrix, potential_ligands = potential_tfs) - -tf_activities = tf_activities %>% arrange(-pearson) %>% mutate(rank = rank(desc(pearson))) -tf_activities -``` - -The different tf activity measures (auroc, aupr, pearson correlation coefficient) are a measure for how well a tf can predict the observed differentially expressed genes compared to the background of expressed genes. In our validation study, we showed that the pearson correlation coefficient between a tf's target predictions and the observed transcriptional response was the most informative measure to define tf activity. Therefore, NicheNet ranks the tfs based on their pearson correlation coefficient. This allows us to prioritize tfs inducing the antiviral response in CD8 T cells. - -The number of top-ranked tfs that are further used to predict active target genes and construct an active tf-receptor network is here 20. - -We can also include some additional top TFs based on the ranking of the AUPR and the AUROC. -For important TFs, the number of 20 might also be a bit too low, and considering more TFs might be useful. - -```{r} -best_upstream_tfs = c(tf_activities %>% top_n(20, pearson) %>% arrange(-pearson) %>% pull(test_ligand) %>% unique(), tf_activities %>% top_n(20, auroc) %>% arrange(-auroc) %>% pull(test_ligand) %>% unique(), tf_activities %>% top_n(20, aupr) %>% arrange(-aupr) %>% pull(test_ligand) %>% unique()) %>% unique() -``` - - -## 5) Infer top-predicted target genes of tfs that are top-ranked in the tf activity analysis - -### Active target gene inference - -n = 200 --> set to 100 to become more stringent - -```{r} -active_tf_target_links_df = best_upstream_tfs %>% lapply(get_weighted_ligand_target_links,geneset = geneset_oi, ligand_target_matrix = tf_target_matrix, n = 200) %>% bind_rows() %>% drop_na() - -active_tf_target_links = prepare_ligand_target_visualization(ligand_target_df = active_tf_target_links_df, ligand_target_matrix = tf_target_matrix, cutoff = 0.33) - -order_tfs = intersect(best_upstream_tfs, colnames(active_tf_target_links)) %>% rev() %>% make.names() -order_targets = active_tf_target_links_df$target %>% unique() %>% intersect(rownames(active_tf_target_links)) %>% make.names() -rownames(active_tf_target_links) = rownames(active_tf_target_links) %>% make.names() # make.names() for heatmap visualization of genes like H2-T23 -colnames(active_tf_target_links) = colnames(active_tf_target_links) %>% make.names() # make.names() for heatmap visualization of genes like H2-T23 - -vis_tf_target = active_tf_target_links[order_targets,order_tfs] %>% t() -``` - -```{r, fig.height = 11, fig.width=21} -p_tf_target_network = vis_tf_target %>% make_heatmap_ggplot("Prioritized tfs","Predicted target genes", color = "purple",legend_position = "top", x_axis_position = "top",legend_title = "Regulatory potential") + theme(axis.text.x = element_text(face = "italic")) + scale_fill_gradient2(low = "whitesmoke", high = "purple", breaks = c(0,0.0045,0.0090)) -p_tf_target_network -``` - - - -## 6) Add log fold change information of tfs from receiver cells - -In some cases, it might be possible to also check upregulation of tfs in receiver cells. -Here this is possible: we will define the log fold change between LCMV and steady-state in all sender cell types and visualize this as extra information. - -In other case studies, it could be possible to plot expression of the TF across different cell types / sub cell types - -```{r, fig.width=10} -# DE analysis for each sender cell type -# this uses a new nichenetr function - reinstall nichenetr if necessary! -DE_table_all = Idents(seuratObj) %>% levels() %>% intersect(receiver) %>% lapply(get_lfc_celltype, seurat_obj = seuratObj, condition_colname = "aggregate", condition_oi = condition_oi, condition_reference = condition_reference, expression_pct = 0.10) %>% reduce(full_join) -DE_table_all[is.na(DE_table_all)] = 0 - -# Combine tf activities with DE information -tf_activities_de = tf_activities %>% select(test_ligand, pearson) %>% rename(tf = test_ligand) %>% left_join(DE_table_all %>% rename(tf = gene)) -tf_activities_de[is.na(tf_activities_de)] = 0 - -# make LFC heatmap -lfc_matrix = tf_activities_de %>% select(-tf, -pearson) %>% as.matrix() %>% magrittr::set_rownames(tf_activities_de$tf) -rownames(lfc_matrix) = rownames(lfc_matrix) %>% make.names() - -order_tfs = order_tfs[order_tfs %in% rownames(lfc_matrix)] -vis_tf_lfc = lfc_matrix[order_tfs,] - -vis_tf_lfc = matrix(vis_tf_lfc, ncol = 1) -rownames(vis_tf_lfc) = order_tfs -colnames(vis_tf_lfc) = receiver - -p_tf_lfc = vis_tf_lfc %>% make_threecolor_heatmap_ggplot("Prioritized tfs","LFC in receiver", low_color = "midnightblue",mid_color = "white", mid = median(vis_tf_lfc), high_color = "red",legend_position = "top", x_axis_position = "top", legend_title = "LFC") + theme(axis.text.y = element_text(face = "italic")) -p_tf_lfc - -# change colors a bit to make them more stand out -p_tf_lfc = p_tf_lfc + scale_fill_gradientn(colors = c("midnightblue","blue", "grey95", "grey99","firebrick1","red"),values = c(0,0.1,0.2,0.25, 0.40, 0.7,1), limits = c(vis_tf_lfc %>% min() - 0.1, vis_tf_lfc %>% max() + 0.1)) -p_tf_lfc -``` - -Interestingly, the top active TFs with many target genes among DE genes are DE as well (but know this should not be the case necessarily) - - diff --git a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-113-1.png b/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-113-1.png deleted file mode 100644 index f21a1e3..0000000 Binary files a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-113-1.png and /dev/null differ diff --git a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-114-1.png b/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-114-1.png deleted file mode 100644 index ad0a5d3..0000000 Binary files a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-114-1.png and /dev/null differ diff --git a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-123-1.png b/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-123-1.png deleted file mode 100644 index 1409bef..0000000 Binary files a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-123-1.png and /dev/null differ diff --git a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-125-1.png b/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-125-1.png deleted file mode 100644 index 851b747..0000000 Binary files a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-125-1.png and /dev/null differ diff --git a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-127-1.png b/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-127-1.png deleted file mode 100644 index 464b42e..0000000 Binary files a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-127-1.png and /dev/null differ diff --git a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-129-1.png b/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-129-1.png deleted file mode 100644 index d30faa0..0000000 Binary files a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-129-1.png and /dev/null differ diff --git a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-13-1.png b/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-13-1.png index 1409bef..5d19a7e 100644 Binary files a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-13-1.png and b/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-13-1.png differ diff --git a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-130-1.png b/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-130-1.png deleted file mode 100644 index 948d548..0000000 Binary files a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-130-1.png and /dev/null differ diff --git a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-130-2.png b/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-130-2.png deleted file mode 100644 index 1c8dbaf..0000000 Binary files a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-130-2.png and /dev/null differ diff --git a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-133-1.png b/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-133-1.png deleted file mode 100644 index 877ba3b..0000000 Binary files a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-133-1.png and /dev/null differ diff --git a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-14-1.png b/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-14-1.png deleted file mode 100644 index 434936b..0000000 Binary files a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-14-1.png and /dev/null differ diff --git a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-15-1.png b/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-15-1.png index 851b747..e113a6e 100644 Binary files a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-15-1.png and b/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-15-1.png differ diff --git a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-16-1.png b/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-16-1.png deleted file mode 100644 index cd391e8..0000000 Binary files a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-16-1.png and /dev/null differ diff --git a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-17-1.png b/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-17-1.png index 464b42e..fcd8523 100644 Binary files a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-17-1.png and b/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-17-1.png differ diff --git a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-18-1.png b/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-18-1.png index 464b42e..30241ad 100644 Binary files a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-18-1.png and b/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-18-1.png differ diff --git a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-18-2.png b/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-18-2.png new file mode 100644 index 0000000..35c85ab Binary files /dev/null and b/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-18-2.png differ diff --git a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-19-1.png b/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-19-1.png deleted file mode 100644 index d30faa0..0000000 Binary files a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-19-1.png and /dev/null differ diff --git a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-20-1.png b/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-20-1.png deleted file mode 100644 index 948d548..0000000 Binary files a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-20-1.png and /dev/null differ diff --git a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-20-2.png b/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-20-2.png deleted file mode 100644 index 1c8dbaf..0000000 Binary files a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-20-2.png and /dev/null differ diff --git a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-21-1.png b/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-21-1.png index a85935a..ad57ef8 100644 Binary files a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-21-1.png and b/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-21-1.png differ diff --git a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-21-2.png b/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-21-2.png deleted file mode 100644 index e444bb1..0000000 Binary files a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-21-2.png and /dev/null differ diff --git a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-23-1.png b/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-23-1.png deleted file mode 100644 index f21a1e3..0000000 Binary files a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-23-1.png and /dev/null differ diff --git a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-24-1.png b/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-24-1.png deleted file mode 100644 index ad0a5d3..0000000 Binary files a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-24-1.png and /dev/null differ diff --git a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-28-1.png b/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-28-1.png deleted file mode 100644 index e7b0816..0000000 Binary files a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-28-1.png and /dev/null differ diff --git a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-29-1.png b/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-29-1.png deleted file mode 100644 index 8e031bf..0000000 Binary files a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-29-1.png and /dev/null differ diff --git a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-3-1.png b/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-3-1.png index f21a1e3..b5db526 100644 Binary files a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-3-1.png and b/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-3-1.png differ diff --git a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-33-1.png b/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-33-1.png deleted file mode 100644 index 1409bef..0000000 Binary files a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-33-1.png and /dev/null differ diff --git a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-35-1.png b/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-35-1.png deleted file mode 100644 index 851b747..0000000 Binary files a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-35-1.png and /dev/null differ diff --git a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-37-1.png b/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-37-1.png deleted file mode 100644 index 464b42e..0000000 Binary files a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-37-1.png and /dev/null differ diff --git a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-38-1.png b/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-38-1.png deleted file mode 100644 index abce3a2..0000000 Binary files a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-38-1.png and /dev/null differ diff --git a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-39-1.png b/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-39-1.png deleted file mode 100644 index d30faa0..0000000 Binary files a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-39-1.png and /dev/null differ diff --git a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-4-1.png b/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-4-1.png index ad0a5d3..12ca0a9 100644 Binary files a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-4-1.png and b/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-4-1.png differ diff --git a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-40-1.png b/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-40-1.png deleted file mode 100644 index 948d548..0000000 Binary files a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-40-1.png and /dev/null differ diff --git a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-40-2.png b/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-40-2.png deleted file mode 100644 index 1c8dbaf..0000000 Binary files a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-40-2.png and /dev/null differ diff --git a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-42-1.png b/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-42-1.png deleted file mode 100644 index 914c681..0000000 Binary files a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-42-1.png and /dev/null differ diff --git a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-43-1.png b/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-43-1.png deleted file mode 100644 index 877ba3b..0000000 Binary files a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-43-1.png and /dev/null differ diff --git a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-44-1.png b/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-44-1.png deleted file mode 100644 index 7c60ab5..0000000 Binary files a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-44-1.png and /dev/null differ diff --git a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-46-1.png b/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-46-1.png deleted file mode 100644 index 9775530..0000000 Binary files a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-46-1.png and /dev/null differ diff --git a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-5-1.png b/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-5-1.png deleted file mode 100644 index d0aa3bd..0000000 Binary files a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-5-1.png and /dev/null differ diff --git a/vignettes/seurat_steps_prioritization.Rmd b/vignettes/seurat_steps_prioritization.Rmd new file mode 100644 index 0000000..8e6ccff --- /dev/null +++ b/vignettes/seurat_steps_prioritization.Rmd @@ -0,0 +1,281 @@ +--- +title: "Perform NicheNet analysis with prioritization" +author: "Robin Browaeys & Chananchida Sang-aram" +date: "2023-07-20" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Perform NicheNet analysis starting from a Seurat object: step-by-step analysis with prioritization} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +bibliography: library.bib +--- + + + +```{r setup, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + # comment = "#>", + warning = FALSE, + message = FALSE +) +``` + +```{r} +### Load Packages +library(nichenetr) +library(Seurat) # please update to Seurat V4 +library(tidyverse) + +### Read in Seurat object +seuratObj = readRDS(url("https://zenodo.org/record/3531889/files/seuratObj.rds")) +seuratObj$celltype <- make.names(seuratObj$celltype) +Idents(seuratObj) <- seuratObj$celltype + +``` + +In this vignette, we will extend the basic NicheNet analysis analysis from [Perform NicheNet analysis starting from a Seurat object: step-by-step analysis](seurat_steps.md) by incorporating gene expression as part of the prioritization This is a generalization of the [Differential NicheNet](differential_nichenet.md) and [MultiNicheNet](https://github.com/saeyslab/multinichenetr) approach. While the original NicheNet only ranks ligands based on the ligand activity analysis, it is now also possible to prioritize ligands based on upregulation of the ligand/receptor, and the cell-type and condition specificity of hte ligand and receptor. + +Make sure you understand the different steps in a NicheNet analysis that are described in that vignette before proceeding with this vignette and performing a real NicheNet analysis on your data. + +We will again make use of mouse NICHE-seq data from Medaglia et al. to explore intercellular communication in the T cell area in the inguinal lymph node before and 72 hours after lymphocytic choriomeningitis virus (LCMV) infection [@medaglia_spatial_2017]. We will NicheNet to explore immune cell crosstalk in response to this LCMV infection. In this dataset, differential expression is observed between CD8 T cells in steady-state and CD8 T cells after LCMV infection. NicheNet can be applied to look at how several immune cell populations in the lymph node (i.e., monocytes, dendritic cells, NK cells, B cells, CD4 T cells) can regulate and induce these observed gene expression changes. NicheNet will specifically prioritize ligands from these immune cells and their target genes that change in expression upon LCMV infection. + +Hence, we have to make some additional calculations, including DE of the ligand/receptor in a sender/receiver cell type, and the average expression of each ligand/receptor in each sender/receiver cell type. The DE analysis boils down to computing pairwise tests between the cell type of interest and other cell types in the dataset. We will subset the data to only the condition of interest, "LCMV". For this analysis we will consider all cell types as both sender and receiver, as we want the ligand/receptor to be specific. + +The used [ligand-target matrix](https://doi.org/10.5281/zenodo.7074290) and the [Seurat object of the processed NICHE-seq single-cell data](https://doi.org/10.5281/zenodo.3531889) can be downloaded from Zenodo. + +## Load required packages, read in the Seurat object with processed expression data of interacting cells and NicheNet's ligand-target prior model, ligand-receptor network and weighted integrated networks. + +The NicheNet ligand-receptor network and weighted networks are necessary to define and show possible ligand-receptor interactions between two cell populations. The ligand-target matrix denotes the prior potential that particular ligands might regulate the expression of particular target genes. This matrix is necessary to prioritize possible ligand-receptor interactions based on observed gene expression effects (i.e. NicheNet's ligand activity analysis) and infer affected target genes of these prioritized ligands. + +```{r} +lr_network = readRDS(url("https://zenodo.org/record/7074291/files/lr_network_mouse_21122021.rds")) +ligand_target_matrix = readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final_mouse.rds")) +weighted_networks = readRDS(url("https://zenodo.org/record/7074291/files/weighted_networks_nsga2r_final_mouse.rds")) +lr_network = lr_network %>% distinct(from, to) +head(lr_network) +ligand_target_matrix[1:5,1:5] # target genes in rows, ligands in columns + +weighted_networks_lr = weighted_networks$lr_sig %>% inner_join(lr_network, by = c("from","to")) +head(weighted_networks$lr_sig) # interactions and their weights in the ligand-receptor + signaling network +head(weighted_networks$gr) # interactions and their weights in the gene regulatory network + +seuratObj = alias_to_symbol_seurat(seuratObj, "mouse") +``` + + +# Perform the NicheNet analysis + +In this case study, we want to apply NicheNet to predict which ligands expressed by all immune cells in the T cell area of the lymph node are most likely to have induced the differential expression in CD8 T cells after LCMV infection. + +As described in the main vignette, the pipeline of a basic NicheNet analysis consist of the following steps: + +In this case study, the receiver cell population is the 'CD8 T' cell population, whereas the sender cell populations are 'CD4 T', 'Treg', 'Mono', 'NK', 'B' and 'DC'. +We will consider a gene to be expressed when it is expressed in at least 10% of cells in one cluster. + +```{r} +# 1. Define a “sender/niche” cell population and a “receiver/target” cell population present in your expression data and determine which genes are expressed in both populations +## receiver +receiver = "CD8.T" +expressed_genes_receiver = get_expressed_genes(receiver, seuratObj, pct = 0.10) +background_expressed_genes = expressed_genes_receiver %>% .[. %in% rownames(ligand_target_matrix)] + +## sender +sender_celltypes = c("CD4.T","Treg", "Mono", "NK", "B", "DC") + +list_expressed_genes_sender = sender_celltypes %>% unique() %>% lapply(get_expressed_genes, seuratObj, 0.10) # lapply to get the expressed genes of every sender cell type separately here +expressed_genes_sender = list_expressed_genes_sender %>% unlist() %>% unique() + +# 2. Define a gene set of interest: these are the genes in the “receiver/target” cell population that are potentially affected by ligands expressed by interacting cells (e.g. genes differentially expressed upon cell-cell interaction) + +seurat_obj_receiver= subset(seuratObj, idents = receiver) +seurat_obj_receiver = SetIdent(seurat_obj_receiver, value = seurat_obj_receiver[["aggregate"]]) + +condition_oi = "LCMV" +condition_reference = "SS" + +DE_table_receiver = FindMarkers(object = seurat_obj_receiver, ident.1 = condition_oi, ident.2 = condition_reference, min.pct = 0.10) %>% rownames_to_column("gene") + +geneset_oi = DE_table_receiver %>% filter(p_val_adj <= 0.05 & abs(avg_log2FC) >= 0.25) %>% pull(gene) +geneset_oi = geneset_oi %>% .[. %in% rownames(ligand_target_matrix)] + +# 3. Define a set of potential ligands +ligands = lr_network %>% pull(from) %>% unique() +receptors = lr_network %>% pull(to) %>% unique() + +expressed_ligands = intersect(ligands,expressed_genes_sender) +expressed_receptors = intersect(receptors,expressed_genes_receiver) + +potential_ligands = lr_network %>% filter(from %in% expressed_ligands & to %in% expressed_receptors) %>% pull(from) %>% unique() + +# 4. Perform NicheNet ligand activity analysis +ligand_activities = predict_ligand_activities(geneset = geneset_oi, background_expressed_genes = background_expressed_genes, ligand_target_matrix = ligand_target_matrix, potential_ligands = potential_ligands) + +ligand_activities = ligand_activities %>% arrange(-pearson) %>% mutate(rank = rank(desc(pearson))) +ligand_activities +``` + + +## Perform prioritization of ligand-receptor pairs + +In addition to the NicheNet ligand activity (`activity_scaled`), you can prioritize based on: + +* Upregulation of the ligand in a sender cell type compared to other cell types: `de_ligand` +* Upregulation of the receptor in a receiver cell type: `de_receptor` +* Average expression of the ligand in the sender cell type: `exprs_ligand` +* Average expression of the receptor in the receiver cell type: `exprs_receptor` +* Condition-specificity of the ligand across all cell types: `ligand_condition_specificity` +* Condition-specificity of the receptor across all cell types: `receptor_condition_specificity` + +Note that the first four criteria are calculated only in the condition of interest. + +```{r} +# Default weights +prioritizing_weights = c("de_ligand" = 1, + "de_receptor" = 1, + "activity_scaled" = 2, + "exprs_ligand" = 1, + "exprs_receptor" = 1, + "ligand_condition_specificity" = 0.5, + "receptor_condition_specificity" = 0.5) + +``` + +We provide helper functions to calculate these values, including `calculate_de` and `get_exprs_avg`. `process_table_to_ic` transforms these different dataframes so they are compatible with the `generate_prioritization_tables` function. + +```{r} +celltypes <- unique(seuratObj$celltype) +lr_network_renamed <- lr_network %>% rename(ligand=from, receptor=to) + +# Only calculate DE for LCMV condition, with genes that are in the ligand-receptor network +DE_table <- calculate_de(seuratObj, celltype_colname = "celltype", + condition_colname = "aggregate", condition_oi = condition_oi, + features = union(expressed_ligands, expressed_receptors)) + +# Average expression information - only for LCMV condition +expression_info <- get_exprs_avg(seuratObj, "celltype", condition_colname = "aggregate", condition_oi = condition_oi) + +# Calculate condition specificity - only for datasets with two conditions! +condition_markers <- FindMarkers(object = seuratObj, ident.1 = condition_oi, ident.2 = condition_reference, + group.by = "aggregate", min.pct = 0, logfc.threshold = 0, + features = union(expressed_ligands, expressed_receptors)) %>% rownames_to_column("gene") + +# Combine DE of senders and receivers -> used for prioritization +processed_DE_table <- process_table_to_ic(DE_table, table_type = "celltype_DE", lr_network_renamed, + senders_oi = sender_celltypes, receivers_oi = receiver) + +processed_expr_table <- process_table_to_ic(expression_info, table_type = "expression", lr_network_renamed) + +processed_condition_markers <- process_table_to_ic(condition_markers, table_type = "group_DE", lr_network_renamed) +``` + +Finally we generate the prioritization table. +```{r} +prior_table <- generate_prioritization_tables(processed_expr_table, + processed_DE_table, + ligand_activities, + processed_condition_markers, + prioritizing_weights) + +prior_table +``` + +As you can see, the resulting table now show the rankings for *ligand-receptor interactions of a sender-receiver cell type pair*, instead of just the prioritized ligands. We included all columns here, but if you just want relevant columns that were used to calculate the ranking: + +```{r} +prior_table %>% select(c('sender', 'receiver', 'ligand', 'receptor', 'scaled_lfc_ligand', 'scaled_lfc_receptor', 'scaled_p_val_ligand_adapted', 'scaled_p_val_receptor_adapted', 'scaled_avg_exprs_ligand', 'scaled_avg_exprs_receptor', 'scaled_lfc_ligand_group', 'scaled_lfc_receptor_group', 'scaled_activity')) +``` + +Cxcl10 now went up in the rankings due to both the high expression of its potential receptor Dpp4 and its high celltype specificity (`scaled_lfc_ligand`). You can also see this in the dotplot and heatmap below. + +```{r fig.width = 15, fig.height = 10} +best_upstream_ligands = ligand_activities %>% top_n(20, aupr_corrected) %>% arrange(desc(aupr_corrected)) %>% pull(test_ligand) %>% unique() + +# DE analysis for each sender cell type +DE_table_all = Idents(seuratObj) %>% levels() %>% intersect(sender_celltypes) %>% + lapply(get_lfc_celltype, seurat_obj = seuratObj, condition_colname = "aggregate", condition_oi = condition_oi, condition_reference = condition_reference, + expression_pct = 0.10, celltype_col = NULL) %>% reduce(full_join) +DE_table_all[is.na(DE_table_all)] = 0 + +order_ligands <- intersect(make.names(best_upstream_ligands), colnames(active_ligand_target_links)) %>% rev() + +# ligand activity heatmap +ligand_aupr_matrix <- ligand_activities %>% select(aupr_corrected) %>% as.matrix() %>% magrittr::set_rownames(ligand_activities$test_ligand) %>% + `rownames<-`(make.names(rownames(.))) %>% `colnames<-`(make.names(colnames(.))) + +vis_ligand_aupr <- as.matrix(ligand_aupr_matrix[order_ligands, ], ncol=1) %>% magrittr::set_colnames("AUPR") +p_ligand_aupr <- make_heatmap_ggplot(vis_ligand_aupr, "Prioritized ligands","Ligand activity", + color = "darkorange",legend_position = "top", x_axis_position = "top", + legend_title = "AUPR\ntarget gene prediction ability)") + + theme(legend.text = element_text(size = 9)) + + +# LFC heatmap +# First combine ligand activities with DE information and make +ligand_activities_de <- ligand_activities %>% select(test_ligand, aupr_corrected) %>% rename(ligand = test_ligand) %>% left_join(DE_table_all %>% rename(ligand = gene)) +ligand_activities_de[is.na(ligand_activities_de)] <- 0 +lfc_matrix <- ligand_activities_de %>% select(-ligand, -aupr_corrected) %>% as.matrix() %>% magrittr::set_rownames(ligand_activities_de$ligand) %>% + `rownames<-`(make.names(rownames(.))) %>% `colnames<-`(make.names(colnames(.))) +vis_ligand_lfc <- lfc_matrix[order_ligands,] + +p_ligand_lfc <- make_threecolor_heatmap_ggplot(vis_ligand_lfc, "Prioritized ligands","LFC in Sender", + low_color = "midnightblue", mid_color = "white", mid = median(vis_ligand_lfc), high_color = "red", + legend_position = "top", x_axis_position = "top", legend_title = "LFC") + + theme(axis.text.y = element_text(face = "italic")) + + +# ligand expression Seurat dotplot +order_ligands_adapted <- str_replace_all(order_ligands, "\\.", "-") +rotated_dotplot <- DotPlot(seuratObj %>% subset(celltype %in% sender_celltypes), features = order_ligands_adapted, cols = "RdYlBu") + + # flip of coordinates necessary because we want to show ligands in the rows when combining all plots + coord_flip() + theme(legend.text = element_text(size = 10), legend.title = element_text(size = 12)) + +# Combine figures and legend separately +figures_without_legend <- cowplot::plot_grid( + p_ligand_aupr + theme(legend.position = "none", axis.ticks = element_blank()) + theme(axis.title.x = element_text()), + rotated_dotplot + theme(legend.position = "none", axis.ticks = element_blank(), axis.title.x = element_text(size = 12), + axis.text.y = element_text(face = "italic", size = 9), axis.text.x = element_text(size = 9, angle = 90,hjust = 0)) + + ylab("Expression in Sender") + xlab("") + scale_y_discrete(position = "right"), + p_ligand_lfc + theme(legend.position = "none", axis.ticks = element_blank()) + theme(axis.title.x = element_text()) + ylab(""), + align = "hv", + nrow = 1, + rel_widths = c(ncol(vis_ligand_aupr)+6, ncol(vis_ligand_lfc) + 7, ncol(vis_ligand_lfc) + 8)) + +legends <- cowplot::plot_grid( + ggpubr::as_ggplot(ggpubr::get_legend(p_ligand_aupr)), + ggpubr::as_ggplot(ggpubr::get_legend(rotated_dotplot)), + ggpubr::as_ggplot(ggpubr::get_legend(p_ligand_lfc)), + nrow = 1, + align = "h", rel_widths = c(1.5, 1, 1)) + +combined_plot <- cowplot::plot_grid(figures_without_legend, legends, nrow = 2, align = "hv") +print(combined_plot) +``` + +### Extra visualization of ligand-receptor pairs + +We provide the function `make_mushroom_plot` which allows you to display expression of ligand-receptor pairs in semicircles. By default, the fill gradient shows the LFC between cell types, while the size of the semicircle corresponds to the scaled mean expression. + +```{r fig.height=8, fig.width=6} +make_mushroom_plot(prior_table, top_n = 30) +``` + +We provide multiple ways to customize this plot, including changing the "size" and "fill" values to certain columns from the prioritization table (but without the `_ligand` or `_receptor` suffix). In addition, you can also choose to show the rankings of each ligand-receptor-sender pair, as well as show all data points for context. + +```{r fig.height=8, fig.width=6} +print(paste0("Column names that you can use are: ", paste0(prior_table %>% select(ends_with(c("_ligand", "_receptor", "_sender", "_receiver"))) %>% colnames() %>% + str_remove("_ligand|_receptor|_sender|_receiver") %>% unique, collapse = ", "))) + +# Change size and color columns +make_mushroom_plot(prior_table, top_n = 30, size = "pct_expressed", color = "scaled_avg_exprs") + +# Show rankings and other datapoints +make_mushroom_plot(prior_table, top_n = 30, show_rankings = TRUE, show_all_datapoints = TRUE) + +# Show true limits instead of having it from 0 to 1 +make_mushroom_plot(prior_table, top_n = 30, true_color_range = TRUE) +``` + +### References diff --git a/vignettes/seurat_steps_prioritization.md b/vignettes/seurat_steps_prioritization.md new file mode 100644 index 0000000..766e7e7 --- /dev/null +++ b/vignettes/seurat_steps_prioritization.md @@ -0,0 +1,443 @@ +Perform NicheNet analysis with prioritization +================ +Robin Browaeys & Chananchida Sang-aram +2023-07-20 + + + +``` r +### Load Packages +library(nichenetr) +library(Seurat) # please update to Seurat V4 +library(tidyverse) + +### Read in Seurat object +seuratObj = readRDS(url("https://zenodo.org/record/3531889/files/seuratObj.rds")) +seuratObj$celltype <- make.names(seuratObj$celltype) +Idents(seuratObj) <- seuratObj$celltype +``` + +In this vignette, we will extend the basic NicheNet analysis analysis +from [Perform NicheNet analysis starting from a Seurat object: +step-by-step analysis](seurat_steps.md) by incorporating gene expression +as part of the prioritization This is a generalization of the +[Differential NicheNet](differential_nichenet.md) and +[MultiNicheNet](https://github.com/saeyslab/multinichenetr) approach. +While the original NicheNet only ranks ligands based on the ligand +activity analysis, it is now also possible to prioritize ligands based +on upregulation of the ligand/receptor, and the cell-type and condition +specificity of hte ligand and receptor. + +Make sure you understand the different steps in a NicheNet analysis that +are described in that vignette before proceeding with this vignette and +performing a real NicheNet analysis on your data. + +We will again make use of mouse NICHE-seq data from Medaglia et al. to +explore intercellular communication in the T cell area in the inguinal +lymph node before and 72 hours after lymphocytic choriomeningitis virus +(LCMV) infection (Medaglia et al. 2017). We will NicheNet to explore +immune cell crosstalk in response to this LCMV infection. In this +dataset, differential expression is observed between CD8 T cells in +steady-state and CD8 T cells after LCMV infection. NicheNet can be +applied to look at how several immune cell populations in the lymph node +(i.e., monocytes, dendritic cells, NK cells, B cells, CD4 T cells) can +regulate and induce these observed gene expression changes. NicheNet +will specifically prioritize ligands from these immune cells and their +target genes that change in expression upon LCMV infection. + +Hence, we have to make some additional calculations, including DE of the +ligand/receptor in a sender/receiver cell type, and the average +expression of each ligand/receptor in each sender/receiver cell type. +The DE analysis boils down to computing pairwise tests between the cell +type of interest and other cell types in the dataset. We will subset the +data to only the condition of interest, “LCMV”. For this analysis we +will consider all cell types as both sender and receiver, as we want the +ligand/receptor to be specific. + +The used [ligand-target matrix](https://doi.org/10.5281/zenodo.7074290) +and the [Seurat object of the processed NICHE-seq single-cell +data](https://doi.org/10.5281/zenodo.3531889) can be downloaded from +Zenodo. + +## Load required packages, read in the Seurat object with processed expression data of interacting cells and NicheNet’s ligand-target prior model, ligand-receptor network and weighted integrated networks. + +The NicheNet ligand-receptor network and weighted networks are necessary +to define and show possible ligand-receptor interactions between two +cell populations. The ligand-target matrix denotes the prior potential +that particular ligands might regulate the expression of particular +target genes. This matrix is necessary to prioritize possible +ligand-receptor interactions based on observed gene expression effects +(i.e. NicheNet’s ligand activity analysis) and infer affected target +genes of these prioritized ligands. + +``` r +lr_network = readRDS(url("https://zenodo.org/record/7074291/files/lr_network_mouse_21122021.rds")) +ligand_target_matrix = readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final_mouse.rds")) +weighted_networks = readRDS(url("https://zenodo.org/record/7074291/files/weighted_networks_nsga2r_final_mouse.rds")) +lr_network = lr_network %>% distinct(from, to) +head(lr_network) +## # A tibble: 6 × 2 +## from to +## +## 1 2300002M23Rik Ddr1 +## 2 2610528A11Rik Gpr15 +## 3 9530003J23Rik Itgal +## 4 a Atrn +## 5 a F11r +## 6 a Mc1r +ligand_target_matrix[1:5,1:5] # target genes in rows, ligands in columns +## 2300002M23Rik 2610528A11Rik 9530003J23Rik a A2m +## 0610005C13Rik 0.000000e+00 0.000000e+00 1.311297e-05 0.000000e+00 1.390053e-05 +## 0610009B22Rik 0.000000e+00 0.000000e+00 1.269301e-05 0.000000e+00 1.345536e-05 +## 0610009L18Rik 8.872902e-05 4.977197e-05 2.581909e-04 7.570125e-05 9.802264e-05 +## 0610010F05Rik 2.194046e-03 1.111556e-03 3.142374e-03 1.631658e-03 2.585820e-03 +## 0610010K14Rik 2.271606e-03 9.360769e-04 3.546140e-03 1.697713e-03 2.632082e-03 + +weighted_networks_lr = weighted_networks$lr_sig %>% inner_join(lr_network, by = c("from","to")) +head(weighted_networks$lr_sig) # interactions and their weights in the ligand-receptor + signaling network +## # A tibble: 6 × 3 +## from to weight +## +## 1 0610010F05Rik App 0.110 +## 2 0610010F05Rik Cat 0.0673 +## 3 0610010F05Rik H1f2 0.0660 +## 4 0610010F05Rik Lrrc49 0.0829 +## 5 0610010F05Rik Nicn1 0.0864 +## 6 0610010F05Rik Srpk1 0.123 +head(weighted_networks$gr) # interactions and their weights in the gene regulatory network +## # A tibble: 6 × 3 +## from to weight +## +## 1 0610010K14Rik 0610010K14Rik 0.121 +## 2 0610010K14Rik 2510039O18Rik 0.121 +## 3 0610010K14Rik 2610021A01Rik 0.0256 +## 4 0610010K14Rik 9130401M01Rik 0.0263 +## 5 0610010K14Rik Alg1 0.127 +## 6 0610010K14Rik Alox12 0.128 + +seuratObj = alias_to_symbol_seurat(seuratObj, "mouse") +``` + +# Perform the NicheNet analysis + +In this case study, we want to apply NicheNet to predict which ligands +expressed by all immune cells in the T cell area of the lymph node are +most likely to have induced the differential expression in CD8 T cells +after LCMV infection. + +As described in the main vignette, the pipeline of a basic NicheNet +analysis consist of the following steps: + +In this case study, the receiver cell population is the ‘CD8 T’ cell +population, whereas the sender cell populations are ‘CD4 T’, ‘Treg’, +‘Mono’, ‘NK’, ‘B’ and ‘DC’. We will consider a gene to be expressed when +it is expressed in at least 10% of cells in one cluster. + +``` r +# 1. Define a “sender/niche” cell population and a “receiver/target” cell population present in your expression data and determine which genes are expressed in both populations +## receiver +receiver = "CD8.T" +expressed_genes_receiver = get_expressed_genes(receiver, seuratObj, pct = 0.10) +background_expressed_genes = expressed_genes_receiver %>% .[. %in% rownames(ligand_target_matrix)] + +## sender +sender_celltypes = c("CD4.T","Treg", "Mono", "NK", "B", "DC") + +list_expressed_genes_sender = sender_celltypes %>% unique() %>% lapply(get_expressed_genes, seuratObj, 0.10) # lapply to get the expressed genes of every sender cell type separately here +expressed_genes_sender = list_expressed_genes_sender %>% unlist() %>% unique() + +# 2. Define a gene set of interest: these are the genes in the “receiver/target” cell population that are potentially affected by ligands expressed by interacting cells (e.g. genes differentially expressed upon cell-cell interaction) + +seurat_obj_receiver= subset(seuratObj, idents = receiver) +seurat_obj_receiver = SetIdent(seurat_obj_receiver, value = seurat_obj_receiver[["aggregate"]]) + +condition_oi = "LCMV" +condition_reference = "SS" + +DE_table_receiver = FindMarkers(object = seurat_obj_receiver, ident.1 = condition_oi, ident.2 = condition_reference, min.pct = 0.10) %>% rownames_to_column("gene") + +geneset_oi = DE_table_receiver %>% filter(p_val_adj <= 0.05 & abs(avg_log2FC) >= 0.25) %>% pull(gene) +geneset_oi = geneset_oi %>% .[. %in% rownames(ligand_target_matrix)] + +# 3. Define a set of potential ligands +ligands = lr_network %>% pull(from) %>% unique() +receptors = lr_network %>% pull(to) %>% unique() + +expressed_ligands = intersect(ligands,expressed_genes_sender) +expressed_receptors = intersect(receptors,expressed_genes_receiver) + +potential_ligands = lr_network %>% filter(from %in% expressed_ligands & to %in% expressed_receptors) %>% pull(from) %>% unique() + +# 4. Perform NicheNet ligand activity analysis +ligand_activities = predict_ligand_activities(geneset = geneset_oi, background_expressed_genes = background_expressed_genes, ligand_target_matrix = ligand_target_matrix, potential_ligands = potential_ligands) + +ligand_activities = ligand_activities %>% arrange(-pearson) %>% mutate(rank = rank(desc(pearson))) +ligand_activities +## # A tibble: 73 × 6 +## test_ligand auroc aupr aupr_corrected pearson rank +## +## 1 Ebi3 0.663 0.390 0.244 0.301 1 +## 2 Tgfb1 0.596 0.254 0.108 0.202 2 +## 3 H2-M3 0.608 0.292 0.146 0.179 3 +## 4 Ptprc 0.642 0.310 0.165 0.167 4 +## 5 H2-M2 0.611 0.279 0.133 0.153 6 +## 6 H2-T10 0.611 0.279 0.133 0.153 6 +## 7 H2-T22 0.611 0.279 0.133 0.153 6 +## 8 H2-T23 0.611 0.278 0.132 0.153 8 +## 9 App 0.590 0.248 0.102 0.150 9 +## 10 H2-K1 0.605 0.268 0.122 0.142 10 +## # … with 63 more rows +``` + +## Perform prioritization of ligand-receptor pairs + +In addition to the NicheNet ligand activity (`activity_scaled`), you can +prioritize based on: + +- Upregulation of the ligand in a sender cell type compared to other + cell types: `de_ligand` +- Upregulation of the receptor in a receiver cell type: `de_receptor` +- Average expression of the ligand in the sender cell type: + `exprs_ligand` +- Average expression of the receptor in the receiver cell type: + `exprs_receptor` +- Condition-specificity of the ligand across all cell types: + `ligand_condition_specificity` +- Condition-specificity of the receptor across all cell types: + `receptor_condition_specificity` + +Note that the first four criteria are calculated only in the condition +of interest. + +``` r +# Default weights +prioritizing_weights = c("de_ligand" = 1, + "de_receptor" = 1, + "activity_scaled" = 2, + "exprs_ligand" = 1, + "exprs_receptor" = 1, + "ligand_condition_specificity" = 0.5, + "receptor_condition_specificity" = 0.5) +``` + +We provide helper functions to calculate these values, including +`calculate_de` and `get_exprs_avg`. `process_table_to_ic` transforms +these different dataframes so they are compatible with the +`generate_prioritization_tables` function. + +``` r +celltypes <- unique(seuratObj$celltype) +lr_network_renamed <- lr_network %>% rename(ligand=from, receptor=to) + +# Only calculate DE for LCMV condition, with genes that are in the ligand-receptor network +DE_table <- calculate_de(seuratObj, celltype_colname = "celltype", + condition_colname = "aggregate", condition_oi = condition_oi, + features = union(expressed_ligands, expressed_receptors)) + +# Average expression information - only for LCMV condition +expression_info <- get_exprs_avg(seuratObj, "celltype", condition_colname = "aggregate", condition_oi = condition_oi) + +# Calculate condition specificity - only for datasets with two conditions! +condition_markers <- FindMarkers(object = seuratObj, ident.1 = condition_oi, ident.2 = condition_reference, + group.by = "aggregate", min.pct = 0, logfc.threshold = 0, + features = union(expressed_ligands, expressed_receptors)) %>% rownames_to_column("gene") + +# Combine DE of senders and receivers -> used for prioritization +processed_DE_table <- process_table_to_ic(DE_table, table_type = "celltype_DE", lr_network_renamed, + senders_oi = sender_celltypes, receivers_oi = receiver) + +processed_expr_table <- process_table_to_ic(expression_info, table_type = "expression", lr_network_renamed) + +processed_condition_markers <- process_table_to_ic(condition_markers, table_type = "group_DE", lr_network_renamed) +``` + +Finally we generate the prioritization table. + +``` r +prior_table <- generate_prioritization_tables(processed_expr_table, + processed_DE_table, + ligand_activities, + processed_condition_markers, + prioritizing_weights) + +prior_table +## # A tibble: 858 × 51 +## sender receiver ligand receptor lfc_ligand lfc_receptor ligand_receptor_lfc_avg p_val_ligand p_adj_ligand p_val_receptor p_adj_receptor pct_expressed_s… pct_expressed_r… avg_ligand +## +## 1 NK CD8.T Ptprc Dpp4 0.527 0.194 0.527 1.14e- 7 1.54e- 3 1.84e- 4 1 e+ 0 0.832 0.133 16.6 +## 2 Mono CD8.T Cxcl10 Dpp4 4.23 0.194 4.23 1.34e- 80 1.82e- 76 1.84e- 4 1 e+ 0 0.744 0.133 54.8 +## 3 Mono CD8.T Ebi3 Il27ra 0.503 0.0580 0.503 1.14e- 52 1.54e- 48 7.60e- 4 1 e+ 0 0.122 0.139 0.546 +## 4 Mono CD8.T Cxcl9 Dpp4 4.12 0.194 4.12 2.18e-121 2.95e-117 1.84e- 4 1 e+ 0 0.456 0.133 23.8 +## 5 B CD8.T H2-M3 Cd8a 0.372 1.88 0.372 1.49e- 3 1 e+ 0 1.97e-266 2.67e-262 0.152 0.669 1.59 +## 6 Mono CD8.T Ptprc Dpp4 0.473 0.194 0.473 6.18e- 6 8.37e- 2 1.84e- 4 1 e+ 0 0.844 0.133 14.9 +## 7 DC CD8.T Ccl22 Dpp4 2.57 0.194 2.57 3.59e-298 4.87e-294 1.84e- 4 1 e+ 0 0.389 0.133 6.37 +## 8 DC CD8.T H2-M2 Cd8a 3.59 1.88 3.59 0 0 1.97e-266 2.67e-262 0.556 0.669 9.73 +## 9 DC CD8.T H2-D1 Cd8a 1.21 1.88 1.21 3.65e- 8 4.94e- 4 1.97e-266 2.67e-262 1 0.669 60.7 +## 10 Mono CD8.T Cxcl11 Dpp4 2.17 0.194 2.17 4.27e-130 5.78e-126 1.84e- 4 1 e+ 0 0.256 0.133 4.37 +## # … with 848 more rows, and 37 more variables: avg_receptor , ligand_receptor_prod , lfc_pval_ligand , p_val_ligand_adapted , scaled_lfc_ligand , +## # scaled_p_val_ligand , scaled_lfc_pval_ligand , scaled_p_val_ligand_adapted , activity , rank , activity_zscore , scaled_activity , +## # lfc_pval_receptor , p_val_receptor_adapted , scaled_lfc_receptor , scaled_p_val_receptor , scaled_lfc_pval_receptor , +## # scaled_p_val_receptor_adapted , scaled_avg_exprs_ligand , scaled_avg_exprs_receptor , lfc_ligand_group , p_val_ligand_group , +## # lfc_pval_ligand_group , p_val_ligand_adapted_group , scaled_lfc_ligand_group , scaled_p_val_ligand_group , scaled_lfc_pval_ligand_group , +## # scaled_p_val_ligand_adapted_group , lfc_receptor_group , p_val_receptor_group , lfc_pval_receptor_group , p_val_receptor_adapted_group , +## # scaled_lfc_receptor_group , scaled_p_val_receptor_group , scaled_lfc_pval_receptor_group , scaled_p_val_receptor_adapted_group , prioritization_score +``` + +As you can see, the resulting table now show the rankings for +*ligand-receptor interactions of a sender-receiver cell type pair*, +instead of just the prioritized ligands. We included all columns here, +but if you just want relevant columns that were used to calculate the +ranking: + +``` r +prior_table %>% select(c('sender', 'receiver', 'ligand', 'receptor', 'scaled_lfc_ligand', 'scaled_lfc_receptor', 'scaled_p_val_ligand_adapted', 'scaled_p_val_receptor_adapted', 'scaled_avg_exprs_ligand', 'scaled_avg_exprs_receptor', 'scaled_lfc_ligand_group', 'scaled_lfc_receptor_group', 'scaled_activity')) +## # A tibble: 858 × 13 +## sender receiver ligand receptor scaled_lfc_ligand scaled_lfc_receptor scaled_p_val_ligand_adap… scaled_p_val_re… scaled_avg_expr… scaled_avg_expr… scaled_lfc_liga… scaled_lfc_rece… +## +## 1 NK CD8.T Ptprc Dpp4 0.824 0.901 0.843 0.887 1.00 1.00 0.779 0.831 +## 2 Mono CD8.T Cxcl10 Dpp4 0.997 0.901 0.957 0.887 1.00 1.00 0.990 0.831 +## 3 Mono CD8.T Ebi3 Il27ra 0.817 0.831 0.941 0.859 1.00 0.859 0.538 0.0986 +## 4 Mono CD8.T Cxcl9 Dpp4 0.994 0.901 0.975 0.887 1.00 1.00 0.894 0.831 +## 5 B CD8.T H2-M3 Cd8a 0.777 1 0.768 1 1.00 1.00 0.846 0.0845 +## 6 Mono CD8.T Ptprc Dpp4 0.808 0.901 0.827 0.887 0.867 1.00 0.779 0.831 +## 7 DC CD8.T Ccl22 Dpp4 0.974 0.901 0.996 0.887 1.00 1.00 0.490 0.831 +## 8 DC CD8.T H2-M2 Cd8a 0.989 1 1 1 1.00 1.00 0.308 0.0845 +## 9 DC CD8.T H2-D1 Cd8a 0.925 1 0.853 1 1.00 1.00 0.885 0.0845 +## 10 Mono CD8.T Cxcl11 Dpp4 0.963 0.901 0.977 0.887 1.00 1.00 0.673 0.831 +## # … with 848 more rows, and 1 more variable: scaled_activity +``` + +Cxcl10 now went up in the rankings due to both the high expression of +its potential receptor Dpp4 and its high celltype specificity +(`scaled_lfc_ligand`). You can also see this in the dotplot and heatmap +below. + +``` r +best_upstream_ligands = ligand_activities %>% top_n(20, aupr_corrected) %>% arrange(desc(aupr_corrected)) %>% pull(test_ligand) %>% unique() + +# DE analysis for each sender cell type +DE_table_all = Idents(seuratObj) %>% levels() %>% intersect(sender_celltypes) %>% + lapply(get_lfc_celltype, seurat_obj = seuratObj, condition_colname = "aggregate", condition_oi = condition_oi, condition_reference = condition_reference, + expression_pct = 0.10, celltype_col = NULL) %>% reduce(full_join) +DE_table_all[is.na(DE_table_all)] = 0 + +order_ligands <- intersect(make.names(best_upstream_ligands), colnames(active_ligand_target_links)) %>% rev() + +# ligand activity heatmap +ligand_aupr_matrix <- ligand_activities %>% select(aupr_corrected) %>% as.matrix() %>% magrittr::set_rownames(ligand_activities$test_ligand) %>% + `rownames<-`(make.names(rownames(.))) %>% `colnames<-`(make.names(colnames(.))) + +vis_ligand_aupr <- as.matrix(ligand_aupr_matrix[order_ligands, ], ncol=1) %>% magrittr::set_colnames("AUPR") +p_ligand_aupr <- make_heatmap_ggplot(vis_ligand_aupr, "Prioritized ligands","Ligand activity", + color = "darkorange",legend_position = "top", x_axis_position = "top", + legend_title = "AUPR\ntarget gene prediction ability)") + + theme(legend.text = element_text(size = 9)) + + +# LFC heatmap +# First combine ligand activities with DE information and make +ligand_activities_de <- ligand_activities %>% select(test_ligand, aupr_corrected) %>% rename(ligand = test_ligand) %>% left_join(DE_table_all %>% rename(ligand = gene)) +ligand_activities_de[is.na(ligand_activities_de)] <- 0 +lfc_matrix <- ligand_activities_de %>% select(-ligand, -aupr_corrected) %>% as.matrix() %>% magrittr::set_rownames(ligand_activities_de$ligand) %>% + `rownames<-`(make.names(rownames(.))) %>% `colnames<-`(make.names(colnames(.))) +vis_ligand_lfc <- lfc_matrix[order_ligands,] + +p_ligand_lfc <- make_threecolor_heatmap_ggplot(vis_ligand_lfc, "Prioritized ligands","LFC in Sender", + low_color = "midnightblue", mid_color = "white", mid = median(vis_ligand_lfc), high_color = "red", + legend_position = "top", x_axis_position = "top", legend_title = "LFC") + + theme(axis.text.y = element_text(face = "italic")) + + +# ligand expression Seurat dotplot +order_ligands_adapted <- str_replace_all(order_ligands, "\\.", "-") +rotated_dotplot <- DotPlot(seuratObj %>% subset(celltype %in% sender_celltypes), features = order_ligands_adapted, cols = "RdYlBu") + + # flip of coordinates necessary because we want to show ligands in the rows when combining all plots + coord_flip() + theme(legend.text = element_text(size = 10), legend.title = element_text(size = 12)) + +# Combine figures and legend separately +figures_without_legend <- cowplot::plot_grid( + p_ligand_aupr + theme(legend.position = "none", axis.ticks = element_blank()) + theme(axis.title.x = element_text()), + rotated_dotplot + theme(legend.position = "none", axis.ticks = element_blank(), axis.title.x = element_text(size = 12), + axis.text.y = element_text(face = "italic", size = 9), axis.text.x = element_text(size = 9, angle = 90,hjust = 0)) + + ylab("Expression in Sender") + xlab("") + scale_y_discrete(position = "right"), + p_ligand_lfc + theme(legend.position = "none", axis.ticks = element_blank()) + theme(axis.title.x = element_text()) + ylab(""), + align = "hv", + nrow = 1, + rel_widths = c(ncol(vis_ligand_aupr)+6, ncol(vis_ligand_lfc) + 7, ncol(vis_ligand_lfc) + 8)) + +legends <- cowplot::plot_grid( + ggpubr::as_ggplot(ggpubr::get_legend(p_ligand_aupr)), + ggpubr::as_ggplot(ggpubr::get_legend(rotated_dotplot)), + ggpubr::as_ggplot(ggpubr::get_legend(p_ligand_lfc)), + nrow = 1, + align = "h", rel_widths = c(1.5, 1, 1)) + +combined_plot <- cowplot::plot_grid(figures_without_legend, legends, nrow = 2, align = "hv") +print(combined_plot) +``` + +![](seurat_steps_prioritization_files/figure-gfm/unnamed-chunk-35-1.png) + +### Extra visualization of ligand-receptor pairs + +We provide the function `make_mushroom_plot` which allows you to display +expression of ligand-receptor pairs in semicircles. By default, the fill +gradient shows the LFC between cell types, while the size of the +semicircle corresponds to the scaled mean expression. + +``` r +make_mushroom_plot(prior_table, top_n = 30) +``` + +![](seurat_steps_prioritization_files/figure-gfm/unnamed-chunk-36-1.png) + +We provide multiple ways to customize this plot, including changing the +“size” and “fill” values to certain columns from the prioritization +table (but without the `_ligand` or `_receptor` suffix). In addition, +you can also choose to show the rankings of each ligand-receptor-sender +pair, as well as show all data points for context. + +``` r +print(paste0("Column names that you can use are: ", paste0(prior_table %>% select(ends_with(c("_ligand", "_receptor", "_sender", "_receiver"))) %>% colnames() %>% + str_remove("_ligand|_receptor|_sender|_receiver") %>% unique, collapse = ", "))) +## [1] "Column names that you can use are: lfc, p_val, p_adj, avg, lfc_pval, scaled_lfc, scaled_p_val, scaled_lfc_pval, scaled_avg_exprs, pct_expressed" + +# Change size and color columns +make_mushroom_plot(prior_table, top_n = 30, size = "pct_expressed", color = "scaled_avg_exprs") +``` + +![](seurat_steps_prioritization_files/figure-gfm/unnamed-chunk-37-1.png) + +``` r + +# Show rankings and other datapoints +make_mushroom_plot(prior_table, top_n = 30, show_rankings = TRUE, show_all_datapoints = TRUE) +``` + +![](seurat_steps_prioritization_files/figure-gfm/unnamed-chunk-37-2.png) + +``` r + +# Show true limits instead of having it from 0 to 1 +make_mushroom_plot(prior_table, top_n = 30, true_color_range = TRUE) +``` + +![](seurat_steps_prioritization_files/figure-gfm/unnamed-chunk-37-3.png) + +### References + +
+ +
+ +Medaglia, Chiara, Amir Giladi, Liat Stoler-Barak, Marco De Giovanni, +Tomer Meir Salame, Adi Biram, Eyal David, et al. 2017. “Spatial +Reconstruction of Immune Niches by Combining Photoactivatable Reporters +and scRNA-Seq.” *Science*, December, +eaao4277. . + +
+ +
diff --git a/vignettes/seurat_steps_prioritization_files/figure-gfm/unnamed-chunk-35-1.png b/vignettes/seurat_steps_prioritization_files/figure-gfm/unnamed-chunk-35-1.png new file mode 100644 index 0000000..418d784 Binary files /dev/null and b/vignettes/seurat_steps_prioritization_files/figure-gfm/unnamed-chunk-35-1.png differ diff --git a/vignettes/seurat_steps_prioritization_files/figure-gfm/unnamed-chunk-36-1.png b/vignettes/seurat_steps_prioritization_files/figure-gfm/unnamed-chunk-36-1.png new file mode 100644 index 0000000..7026efa Binary files /dev/null and b/vignettes/seurat_steps_prioritization_files/figure-gfm/unnamed-chunk-36-1.png differ diff --git a/vignettes/seurat_steps_prioritization_files/figure-gfm/unnamed-chunk-37-1.png b/vignettes/seurat_steps_prioritization_files/figure-gfm/unnamed-chunk-37-1.png new file mode 100644 index 0000000..32b300e Binary files /dev/null and b/vignettes/seurat_steps_prioritization_files/figure-gfm/unnamed-chunk-37-1.png differ diff --git a/vignettes/seurat_steps_prioritization_files/figure-gfm/unnamed-chunk-37-2.png b/vignettes/seurat_steps_prioritization_files/figure-gfm/unnamed-chunk-37-2.png new file mode 100644 index 0000000..c0b250d Binary files /dev/null and b/vignettes/seurat_steps_prioritization_files/figure-gfm/unnamed-chunk-37-2.png differ diff --git a/vignettes/seurat_steps_prioritization_files/figure-gfm/unnamed-chunk-37-3.png b/vignettes/seurat_steps_prioritization_files/figure-gfm/unnamed-chunk-37-3.png new file mode 100644 index 0000000..179e10f Binary files /dev/null and b/vignettes/seurat_steps_prioritization_files/figure-gfm/unnamed-chunk-37-3.png differ diff --git a/vignettes/seurat_wrapper.Rmd b/vignettes/seurat_wrapper.Rmd index 3eadba3..aa8cd72 100644 --- a/vignettes/seurat_wrapper.Rmd +++ b/vignettes/seurat_wrapper.Rmd @@ -23,18 +23,18 @@ knitr::opts_chunk$set( ) ``` -In this vignette, you can learn how to perform a basic NicheNet analysis on a Seurat v3 object. Such a NicheNet analysis can help you to generate hypotheses about an intercellular communication process of interest for which you have single-cell gene expression data as a Seurat object. Specifically, NicheNet can predict 1) which ligands from one or more cell population(s) ("sender/niche") are most likely to affect target gene expression in an interacting cell population ("receiver/target") and 2) which specific target genes are affected by which of these predicted ligands. +In this vignette, you can learn how to perform a basic NicheNet analysis on a Seurat v3/v4 object. Such a NicheNet analysis can help you to generate hypotheses about an intercellular communication process of interest for which you have single-cell gene expression data as a Seurat object. Specifically, NicheNet can predict 1) which ligands from one or more cell population(s) ("sender/niche") are most likely to affect target gene expression in an interacting cell population ("receiver/target") and 2) which specific target genes are affected by which of these predicted ligands. Because NicheNet studies how ligands affect gene expression in putatively neighboring/interacting cells, you need to have data about this effect in gene expression you want to study. So, there need to be 'some kind of' differential expression in a receiver cell population, caused by ligands from one of more interacting sender cell populations. In this vignette, we demonstrate the use of NicheNet on a Seurat Object. The wrapper function we will show consists of the same different steps that are discussed in detail in the main, basis, NicheNet vignette [NicheNet's ligand activity analysis on a gene set of interest: predict active ligands and their target genes](ligand_activity_geneset.md):`vignette("ligand_activity_geneset", package="nichenetr")`. Make sure you understand the different steps in a NicheNet analysis that are described in that vignette before proceeding with this vignette and performing a real NicheNet analysis on your data. In another vignette [Perform NicheNet analysis starting from a Seurat object: step-by-step analysis](seurat_steps.md):`vignette("seurat_steps", package="nichenetr")`, we also show the execution of these steps one for one, but in contrast to the main vignette now specifically for a Seurat Object. This allows users to adapt specific steps of the pipeline to make them more appropriate for their data (recommended). -As example expression data of interacting cells, we will use mouse NICHE-seq data from Medaglia et al. to explore intercellular communication in the T cell area in the inguinal lymph node before and 72 hours after lymphocytic choriomeningitis virus (LCMV) infection [See @medaglia_spatial_2017]. We will NicheNet to explore immune cell crosstalk in response to this LCMV infection. +As example expression data of interacting cells, we will use mouse NICHE-seq data from Medaglia et al. to explore intercellular communication in the T cell area in the inguinal lymph node before and 72 hours after lymphocytic choriomeningitis virus (LCMV) infection [@medaglia_spatial_2017]. We will NicheNet to explore immune cell crosstalk in response to this LCMV infection. In this dataset, differential expression is observed between CD8 T cells in steady-state and CD8 T cells after LCMV infection. NicheNet can be applied to look at how several immune cell populations in the lymph node (i.e., monocytes, dendritic cells, NK cells, B cells, CD4 T cells) can regulate and induce these observed gene expression changes. NicheNet will specifically prioritize ligands from these immune cells and their target genes that change in expression upon LCMV infection. -The used NicheNet networks, ligand-target matrix and example expression data of interacting cells can be downloaded from Zenodo. The NicheNet networks and ligand-target matrix at [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3260758.svg)](https://doi.org/10.5281/zenodo.3260758) and the Seurat object of the processed NICHE-seq single-cell data at [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3531889.svg)](https://doi.org/10.5281/zenodo.3531889). +The used [ligand-target matrix](https://doi.org/10.5281/zenodo.7074290) and the [Seurat object of the processed NICHE-seq single-cell data](https://doi.org/10.5281/zenodo.3531889) can be downloaded from Zenodo. # Prepare NicheNet analysis @@ -55,23 +55,36 @@ If you would use and load other packages, we recommend to load these 3 packages ### Read in NicheNet's ligand-target prior model, ligand-receptor network and weighted integrated networks: ```{r} -ligand_target_matrix = readRDS(url("https://zenodo.org/record/3260758/files/ligand_target_matrix.rds")) -ligand_target_matrix[1:5,1:5] # target genes in rows, ligands in columns +options(timeout=600) +organism = "mouse" + +if(organism == "human"){ + lr_network = readRDS(url("https://zenodo.org/record/7074291/files/lr_network_human_21122021.rds")) + ligand_target_matrix = readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final.rds")) + weighted_networks = readRDS(url("https://zenodo.org/record/7074291/files/weighted_networks_nsga2r_final.rds")) +} else if(organism == "mouse"){ + lr_network = readRDS(url("https://zenodo.org/record/7074291/files/lr_network_mouse_21122021.rds")) + ligand_target_matrix = readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final_mouse.rds")) + weighted_networks = readRDS(url("https://zenodo.org/record/7074291/files/weighted_networks_nsga2r_final_mouse.rds")) -lr_network = readRDS(url("https://zenodo.org/record/3260758/files/lr_network.rds")) +} + +lr_network = lr_network %>% distinct(from, to) head(lr_network) +ligand_target_matrix[1:5,1:5] # target genes in rows, ligands in columns -weighted_networks = readRDS(url("https://zenodo.org/record/3260758/files/weighted_networks.rds")) +weighted_networks_lr = weighted_networks$lr_sig %>% inner_join(lr_network, by = c("from","to")) head(weighted_networks$lr_sig) # interactions and their weights in the ligand-receptor + signaling network head(weighted_networks$gr) # interactions and their weights in the gene regulatory network ``` ### Read in the expression data of interacting cells: -The dataset used here is publicly available single-cell data from immune cells in the T cell area of the inguinal lymph node. The data was processed and aggregated by applying the Seurat alignment pipeline. The Seurat object contains this aggregated data. Note that this should be a Seurat v3 object and that gene should be named by their official mouse/human gene symbol. +The dataset used here is publicly available single-cell data from immune cells in the T cell area of the inguinal lymph node. The data was processed and aggregated by applying the Seurat alignment pipeline. The Seurat object contains this aggregated data. Note that this should be a Seurat v3/v4 object and that gene should be named by their official mouse/human gene symbol. If your expression data has the older gene symbols, you may want to use our alias conversion function to avoid the loss of gene names. ```{r} seuratObj = readRDS(url("https://zenodo.org/record/3531889/files/seuratObj.rds")) +seuratObj = alias_to_symbol_seurat(seuratObj, "mouse") # convert gene names seuratObj@meta.data %>% head() ``` @@ -110,7 +123,7 @@ In this case study, the receiver cell population is the 'CD8 T' cell population, The gene set of interest are the genes differentially expressed in CD8 T cells after LCMV infection. The condition of interest is thus 'LCMV', whereas the reference/steady-state condition is 'SS'. The notion of conditions can be extracted from the metadata column 'aggregate', the method to calculate the differential expression is the standard Seurat Wilcoxon test. -The number of top-ranked ligands that are further used to predict active target genes and construct an active ligand-receptor network is 20 by default. +The number of top-ranked ligands that are further used to predict active target genes and construct an active ligand-receptor network is 30 by default. To perform the NicheNet analysis with these specifications, run the following: @@ -123,7 +136,9 @@ nichenet_output = nichenet_seuratobj_aggregate( receiver = "CD8 T", condition_colname = "aggregate", condition_oi = "LCMV", condition_reference = "SS", sender = c("CD4 T","Treg", "Mono", "NK", "B", "DC"), - ligand_target_matrix = ligand_target_matrix, lr_network = lr_network, weighted_networks = weighted_networks, organism = "mouse") + ligand_target_matrix = ligand_target_matrix, + lr_network = lr_network, + weighted_networks = weighted_networks) ``` ### Interpret the NicheNet analysis output @@ -136,11 +151,9 @@ A first thing NicheNet does, is prioritizing ligands based on predicted ligand a nichenet_output$ligand_activities ``` -The different ligand activity measures (auroc, aupr, pearson correlation coefficient) are a measure for how well a ligand can predict the observed differentially expressed genes compared to the background of expressed genes. In our validation study, we showed that the pearson correlation coefficient between a ligand's target predictions and the observed transcriptional response was the most informative measure to define ligand activity. Therefore, NicheNet ranks the ligands based on their pearson correlation coefficient. This allows us to prioritize ligands inducing the antiviral response in CD8 T cells. +The different ligand activity measures (auroc, aupr, pearson correlation coefficient) are a measure for how well a ligand can predict the observed differentially expressed genes compared to the background of expressed genes. In our validation study, we showed that the area under the precision-recall curve (AUPR) between a ligand's target predictions and the observed transcriptional response was the most informative measure to define ligand activity (this was the Pearson correlation for v1). Therefore, NicheNet ranks the ligands based on their AUPR. This allows us to prioritize ligands inducing the antiviral response in CD8 T cells. -The column 'bona_fide_ligand' indicates whether the ligand is part of ligand-receptor interactions that are documented in public databases ('bona_fide_ligand = TRUE') and not of ligand-receptor interactions that we predicted based on annotation as ligand/receptor and protein-protein interaction databases ('bona_fide_ligand = FALSE'). - -To get a list of the 20 top-ranked ligands: run the following command +To get a list of the 30 top-ranked ligands: run the following command ```{r} nichenet_output$top_ligands @@ -187,7 +200,7 @@ nichenet_output$ligand_target_matrix %>% .[1:10,1:6] nichenet_output$ligand_target_df # weight column = regulatory potential ``` -To get a list of the top-predicted target genes of the 20 top-ranked ligands: run the following command +To get a list of the top-predicted target genes of the 30 top-ranked ligands: run the following command ```{r} nichenet_output$top_targets @@ -241,13 +254,6 @@ You can visualize the expression of these as well. Because we only focus on CD8 DotPlot(seuratObj %>% subset(idents = "CD8 T"), features = nichenet_output$top_receptors %>% rev(), split.by = "aggregate") + RotatedAxis() ``` -You also can just show 'bona fide' ligand-receptor links that are described in the literature and not predicted based on protein-protein interactions: - -```{r} -nichenet_output$ligand_receptor_heatmap_bonafide -nichenet_output$ligand_receptor_matrix_bonafide -nichenet_output$ligand_receptor_df_bonafide -``` If you are interested in checking which geneset (and background set of genes) was used during the ligand activity analysis: ```{r} @@ -260,7 +266,7 @@ nichenet_output$background_expressed_genes %>% length() Instead of focusing on multiple sender cell types, it is possible that you are only interested in doing the analyis for one sender cell type, such as dendritic cells in this case. ```{r, fig.width=14} -nichenet_output = nichenet_seuratobj_aggregate(seurat_obj = seuratObj, receiver = "CD8 T", condition_colname = "aggregate", condition_oi = "LCMV", condition_reference = "SS", sender = "DC", ligand_target_matrix = ligand_target_matrix, lr_network = lr_network, weighted_networks = weighted_networks, organism = "mouse") +nichenet_output = nichenet_seuratobj_aggregate(seurat_obj = seuratObj, receiver = "CD8 T", condition_colname = "aggregate", condition_oi = "LCMV", condition_reference = "SS", sender = "DC", ligand_target_matrix = ligand_target_matrix, lr_network = lr_network, weighted_networks = weighted_networks) nichenet_output$ligand_activity_target_heatmap ``` @@ -268,7 +274,7 @@ nichenet_output$ligand_activity_target_heatmap Instead of focusing on one or multiple predefined sender cell types, it is also possible that you want to consider all cell types present as possible sender cell. This also includes the receiver cell type, making that you can look at autocrine signaling as well. ```{r, fig.width=14} -nichenet_output = nichenet_seuratobj_aggregate(seurat_obj = seuratObj, receiver = "CD8 T", condition_colname = "aggregate", condition_oi = "LCMV", condition_reference = "SS", sender = "all", ligand_target_matrix = ligand_target_matrix, lr_network = lr_network, weighted_networks = weighted_networks, organism = "mouse") +nichenet_output = nichenet_seuratobj_aggregate(seurat_obj = seuratObj, receiver = "CD8 T", condition_colname = "aggregate", condition_oi = "LCMV", condition_reference = "SS", sender = "all", ligand_target_matrix = ligand_target_matrix, lr_network = lr_network, weighted_networks = weighted_networks) nichenet_output$ligand_activity_target_heatmap ``` @@ -276,7 +282,7 @@ nichenet_output$ligand_activity_target_heatmap In some cases, it could be possible that you don't have data of potential sender cells. If you still want to predict possible upstream ligands that could have been responsible for the observed differential expression in your cell type, you can do this by following command. This will consider all possible ligands in the NicheNet databases for which a receptor is expressed by the receiver cell of interest. ```{r, fig.width=8} -nichenet_output = nichenet_seuratobj_aggregate(seurat_obj = seuratObj, receiver = "CD8 T", condition_colname = "aggregate", condition_oi = "LCMV", condition_reference = "SS", sender = "undefined", ligand_target_matrix = ligand_target_matrix, lr_network = lr_network, weighted_networks = weighted_networks, organism = "mouse") +nichenet_output = nichenet_seuratobj_aggregate(seurat_obj = seuratObj, receiver = "CD8 T", condition_colname = "aggregate", condition_oi = "LCMV", condition_reference = "SS", sender = "undefined", ligand_target_matrix = ligand_target_matrix, lr_network = lr_network, weighted_networks = weighted_networks) nichenet_output$ligand_activity_target_heatmap @@ -292,7 +298,7 @@ In some cases, you might be interested in multiple target/receiver cell populati receiver_celltypes_oi = c("CD4 T", "CD8 T") # receiver_celltypes_oi = seuratObj %>% Idents() %>% unique() # for all celltypes in the dataset: use only when this would make sense biologically -nichenet_output = receiver_celltypes_oi %>% lapply(nichenet_seuratobj_aggregate, seurat_obj = seuratObj, condition_colname = "aggregate", condition_oi = "LCMV", condition_reference = "SS", sender = c("CD4 T","Treg", "Mono", "NK", "B", "DC"), ligand_target_matrix = ligand_target_matrix, lr_network = lr_network, weighted_networks = weighted_networks, organism = "mouse") +nichenet_output = receiver_celltypes_oi %>% lapply(nichenet_seuratobj_aggregate, seurat_obj = seuratObj, condition_colname = "aggregate", condition_oi = "LCMV", condition_reference = "SS", sender = c("CD4 T","Treg", "Mono", "NK", "B", "DC"), ligand_target_matrix = ligand_target_matrix, lr_network = lr_network, weighted_networks = weighted_networks) names(nichenet_output) = receiver_celltypes_oi @@ -320,7 +326,7 @@ print(cd8_ligands) Previously, we demonstrated the use of a wrapper function for applying NicheNet to explain differential expression between two conditions in one cell type. However, also differential expression between two cell populations might sometimes be (partially) caused by communication with cells in the neighborhood. For example, differentiation from a progenitor cell to the differentiated cell might be induced by niche cells. A concrete example is discussed in this paper: [Stellate Cells, Hepatocytes, and Endothelial Cells Imprint the Kupffer Cell Identity on Monocytes Colonizing the Liver Macrophage Niche](https://www.cell.com/immunity/fulltext/S1074-7613(19)30368-1). -Therefore, we will now also demonstrate the use of another Seurat wrapper function that can be used in the case of explaining differential expression between cell populations. But keep in mind that the comparison that you make should be biologically relevant. It is possible to use NicheNet to explain differential expression beween any two cell populations in your dataset, but in most cases, differential expression between cell populations will be a result of cell-intrinisc properties (i.e. different cell types have a different gene expression profile) and not of intercellular communication processes. In such a case, it does not make any sense to use NicheNet. +Therefore, we will now also demonstrate the use of another Seurat wrapper function that can be used in the case of explaining differential expression between cell populations. But keep in mind that the comparison that you make should be biologically relevant. It is possible to use NicheNet to explain differential expression between any two cell populations in your dataset, but in most cases, differential expression between cell populations will be a result of cell-intrinsic properties (i.e. different cell types have a different gene expression profile) and not of intercellular communication processes. In such a case, it does not make any sense to use NicheNet. For demonstration purposes, we will here first change the seuratObject of the data described above, such that it can be used in this setting. @@ -339,7 +345,7 @@ nichenet_output = nichenet_seuratobj_cluster_de( seurat_obj = seuratObj, receiver_reference = "CD8 T_SS", receiver_affected = "CD8 T_LCMV", sender = c("DC_LCMV","Mono_LCMV"), - ligand_target_matrix = ligand_target_matrix, lr_network = lr_network, weighted_networks = weighted_networks, organism = "mouse") + ligand_target_matrix = ligand_target_matrix, lr_network = lr_network, weighted_networks = weighted_networks) ``` diff --git a/vignettes/seurat_wrapper.md b/vignettes/seurat_wrapper.md index 1e2b6a3..18ce343 100644 --- a/vignettes/seurat_wrapper.md +++ b/vignettes/seurat_wrapper.md @@ -8,14 +8,14 @@ rmarkdown::render("vignettes/seurat_wrapper.Rmd", output_format = "github_docume --> In this vignette, you can learn how to perform a basic NicheNet analysis -on a Seurat v3 object. Such a NicheNet analysis can help you to generate -hypotheses about an intercellular communication process of interest for -which you have single-cell gene expression data as a Seurat object. -Specifically, NicheNet can predict 1) which ligands from one or more -cell population(s) (“sender/niche”) are most likely to affect target -gene expression in an interacting cell population (“receiver/target”) -and 2) which specific target genes are affected by which of these -predicted ligands. +on a Seurat v3/v4 object. Such a NicheNet analysis can help you to +generate hypotheses about an intercellular communication process of +interest for which you have single-cell gene expression data as a Seurat +object. Specifically, NicheNet can predict 1) which ligands from one or +more cell population(s) (“sender/niche”) are most likely to affect +target gene expression in an interacting cell population +(“receiver/target”) and 2) which specific target genes are affected by +which of these predicted ligands. Because NicheNet studies how ligands affect gene expression in putatively neighboring/interacting cells, you need to have data about @@ -42,9 +42,9 @@ appropriate for their data (recommended). As example expression data of interacting cells, we will use mouse NICHE-seq data from Medaglia et al. to explore intercellular communication in the T cell area in the inguinal lymph node before and -72 hours after lymphocytic choriomeningitis virus (LCMV) infection (See -Medaglia et al. 2017). We will NicheNet to explore immune cell crosstalk -in response to this LCMV infection. +72 hours after lymphocytic choriomeningitis virus (LCMV) infection +(Medaglia et al. 2017). We will NicheNet to explore immune cell +crosstalk in response to this LCMV infection. In this dataset, differential expression is observed between CD8 T cells in steady-state and CD8 T cells after LCMV infection. NicheNet can be @@ -54,12 +54,10 @@ regulate and induce these observed gene expression changes. NicheNet will specifically prioritize ligands from these immune cells and their target genes that change in expression upon LCMV infection. -The used NicheNet networks, ligand-target matrix and example expression -data of interacting cells can be downloaded from Zenodo. The NicheNet -networks and ligand-target matrix at -[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3260758.svg)](https://doi.org/10.5281/zenodo.3260758) -and the Seurat object of the processed NICHE-seq single-cell data at -[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3531889.svg)](https://doi.org/10.5281/zenodo.3531889). +The used [ligand-target matrix](https://doi.org/10.5281/zenodo.7074290) +and the [Seurat object of the processed NICHE-seq single-cell +data](https://doi.org/10.5281/zenodo.3531889) can be downloaded from +Zenodo. # Prepare NicheNet analysis @@ -88,48 +86,60 @@ packages after the others. ### Read in NicheNet’s ligand-target prior model, ligand-receptor network and weighted integrated networks: ``` r -ligand_target_matrix = readRDS(url("https://zenodo.org/record/3260758/files/ligand_target_matrix.rds")) -ligand_target_matrix[1:5,1:5] # target genes in rows, ligands in columns -## CXCL1 CXCL2 CXCL3 CXCL5 PPBP -## A1BG 3.534343e-04 4.041324e-04 3.729920e-04 3.080640e-04 2.628388e-04 -## A1BG-AS1 1.650894e-04 1.509213e-04 1.583594e-04 1.317253e-04 1.231819e-04 -## A1CF 5.787175e-04 4.596295e-04 3.895907e-04 3.293275e-04 3.211944e-04 -## A2M 6.027058e-04 5.996617e-04 5.164365e-04 4.517236e-04 4.590521e-04 -## A2M-AS1 8.898724e-05 8.243341e-05 7.484018e-05 4.912514e-05 5.120439e-05 - -lr_network = readRDS(url("https://zenodo.org/record/3260758/files/lr_network.rds")) +options(timeout=600) +organism = "mouse" + +if(organism == "human"){ + lr_network = readRDS(url("https://zenodo.org/record/7074291/files/lr_network_human_21122021.rds")) + ligand_target_matrix = readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final.rds")) + weighted_networks = readRDS(url("https://zenodo.org/record/7074291/files/weighted_networks_nsga2r_final.rds")) +} else if(organism == "mouse"){ + lr_network = readRDS(url("https://zenodo.org/record/7074291/files/lr_network_mouse_21122021.rds")) + ligand_target_matrix = readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final_mouse.rds")) + weighted_networks = readRDS(url("https://zenodo.org/record/7074291/files/weighted_networks_nsga2r_final_mouse.rds")) + +} + +lr_network = lr_network %>% distinct(from, to) head(lr_network) -## # A tibble: 6 x 4 -## from to source database -## -## 1 CXCL1 CXCR2 kegg_cytokines kegg -## 2 CXCL2 CXCR2 kegg_cytokines kegg -## 3 CXCL3 CXCR2 kegg_cytokines kegg -## 4 CXCL5 CXCR2 kegg_cytokines kegg -## 5 PPBP CXCR2 kegg_cytokines kegg -## 6 CXCL6 CXCR2 kegg_cytokines kegg - -weighted_networks = readRDS(url("https://zenodo.org/record/3260758/files/weighted_networks.rds")) +## # A tibble: 6 × 2 +## from to +## +## 1 2300002M23Rik Ddr1 +## 2 2610528A11Rik Gpr15 +## 3 9530003J23Rik Itgal +## 4 a Atrn +## 5 a F11r +## 6 a Mc1r +ligand_target_matrix[1:5,1:5] # target genes in rows, ligands in columns +## 2300002M23Rik 2610528A11Rik 9530003J23Rik a A2m +## 0610005C13Rik 0.000000e+00 0.000000e+00 1.311297e-05 0.000000e+00 1.390053e-05 +## 0610009B22Rik 0.000000e+00 0.000000e+00 1.269301e-05 0.000000e+00 1.345536e-05 +## 0610009L18Rik 8.872902e-05 4.977197e-05 2.581909e-04 7.570125e-05 9.802264e-05 +## 0610010F05Rik 2.194046e-03 1.111556e-03 3.142374e-03 1.631658e-03 2.585820e-03 +## 0610010K14Rik 2.271606e-03 9.360769e-04 3.546140e-03 1.697713e-03 2.632082e-03 + +weighted_networks_lr = weighted_networks$lr_sig %>% inner_join(lr_network, by = c("from","to")) head(weighted_networks$lr_sig) # interactions and their weights in the ligand-receptor + signaling network -## # A tibble: 6 x 3 -## from to weight -## -## 1 A1BG ABCC6 0.422 -## 2 A1BG ACE2 0.101 -## 3 A1BG ADAM10 0.0970 -## 4 A1BG AGO1 0.0525 -## 5 A1BG AKT1 0.0855 -## 6 A1BG ANXA7 0.457 +## # A tibble: 6 × 3 +## from to weight +## +## 1 0610010F05Rik App 0.110 +## 2 0610010F05Rik Cat 0.0673 +## 3 0610010F05Rik H1f2 0.0660 +## 4 0610010F05Rik Lrrc49 0.0829 +## 5 0610010F05Rik Nicn1 0.0864 +## 6 0610010F05Rik Srpk1 0.123 head(weighted_networks$gr) # interactions and their weights in the gene regulatory network -## # A tibble: 6 x 3 -## from to weight -## -## 1 A1BG A2M 0.0294 -## 2 AAAS GFAP 0.0290 -## 3 AADAC CYP3A4 0.0422 -## 4 AADAC IRF8 0.0275 -## 5 AATF ATM 0.0330 -## 6 AATF ATR 0.0355 +## # A tibble: 6 × 3 +## from to weight +## +## 1 0610010K14Rik 0610010K14Rik 0.121 +## 2 0610010K14Rik 2510039O18Rik 0.121 +## 3 0610010K14Rik 2610021A01Rik 0.0256 +## 4 0610010K14Rik 9130401M01Rik 0.0263 +## 5 0610010K14Rik Alg1 0.127 +## 6 0610010K14Rik Alox12 0.128 ``` ### Read in the expression data of interacting cells: @@ -138,11 +148,14 @@ The dataset used here is publicly available single-cell data from immune cells in the T cell area of the inguinal lymph node. The data was processed and aggregated by applying the Seurat alignment pipeline. The Seurat object contains this aggregated data. Note that this should be a -Seurat v3 object and that gene should be named by their official -mouse/human gene symbol. +Seurat v3/v4 object and that gene should be named by their official +mouse/human gene symbol. If your expression data has the older gene +symbols, you may want to use our alias conversion function to avoid the +loss of gene names. ``` r seuratObj = readRDS(url("https://zenodo.org/record/3531889/files/seuratObj.rds")) +seuratObj = alias_to_symbol_seurat(seuratObj, "mouse") # convert gene names seuratObj@meta.data %>% head() ## nGene nUMI orig.ident aggregate res.0.6 celltype nCount_RNA nFeature_RNA ## W380370 880 1611 LN_SS SS 1 CD8 T 1607 876 @@ -169,7 +182,7 @@ DimPlot(seuratObj, reduction = "tsne") Visualize the data to see to which condition cells belong. The metadata dataframe column that denotes the condition (steady-state or after LCMV -infection) is here called ‘aggregate.’ +infection) is here called ‘aggregate’. ``` r seuratObj@meta.data$aggregate %>% table() @@ -191,26 +204,25 @@ after LCMV infection. As described in the main vignette, the pipeline of a basic NicheNet analysis consist of the following steps: -- 1. Define a “sender/niche” cell population and a “receiver/target” - cell population present in your expression data and determine - which genes are expressed in both populations +- 1. Define a “sender/niche” cell population and a “receiver/target” + cell population present in your expression data and determine + which genes are expressed in both populations -- 2. Define a gene set of interest: these are the genes in the - “receiver/target” cell population that are potentially affected - by ligands expressed by interacting cells (e.g. genes - differentially expressed upon cell-cell interaction) +- 2. Define a gene set of interest: these are the genes in the + “receiver/target” cell population that are potentially affected by + ligands expressed by interacting cells (e.g. genes differentially + expressed upon cell-cell interaction) -- 3. Define a set of potential ligands: these are ligands that are - expressed by the “sender/niche” cell population and bind a - (putative) receptor expressed by the “receiver/target” - population +- 3. Define a set of potential ligands: these are ligands that are + expressed by the “sender/niche” cell population and bind a + (putative) receptor expressed by the “receiver/target” population -- 4. Perform NicheNet ligand activity analysis: rank the potential - ligands based on the presence of their target genes in the gene - set of interest (compared to the background set of genes) +- 4) Perform NicheNet ligand activity analysis: rank the potential + ligands based on the presence of their target genes in the gene + set of interest (compared to the background set of genes) -- 5. Infer receptors and top-predicted target genes of ligands that - are top-ranked in the ligand activity analysis +- 5) Infer receptors and top-predicted target genes of ligands that are + top-ranked in the ligand activity analysis All these steps are contained in one of three following similar single functions: `nichenet_seuratobj_aggregate`, @@ -226,20 +238,20 @@ prioritization and ranking of the ligands! ## NicheNet analysis on Seurat object: explain differential expression between two conditions In this case study, the receiver cell population is the ‘CD8 T’ cell -population, whereas the sender cell populations are ‘CD4 T,’ ‘Treg,’ -‘Mono,’ ‘NK,’ ‘B’ and ‘DC.’ The above described functions will consider +population, whereas the sender cell populations are ‘CD4 T’, ‘Treg’, +‘Mono’, ‘NK’, ‘B’ and ‘DC’. The above described functions will consider a gene to be expressed when it is expressed in at least a predefined fraction of cells in one cluster (default: 10%). The gene set of interest are the genes differentially expressed in CD8 T -cells after LCMV infection. The condition of interest is thus ‘LCMV,’ -whereas the reference/steady-state condition is ‘SS.’ The notion of -conditions can be extracted from the metadata column ‘aggregate,’ the +cells after LCMV infection. The condition of interest is thus ‘LCMV’, +whereas the reference/steady-state condition is ‘SS’. The notion of +conditions can be extracted from the metadata column ‘aggregate’, the method to calculate the differential expression is the standard Seurat Wilcoxon test. The number of top-ranked ligands that are further used to predict active -target genes and construct an active ligand-receptor network is 20 by +target genes and construct an active ligand-receptor network is 30 by default. To perform the NicheNet analysis with these specifications, run the @@ -254,7 +266,9 @@ nichenet_output = nichenet_seuratobj_aggregate( receiver = "CD8 T", condition_colname = "aggregate", condition_oi = "LCMV", condition_reference = "SS", sender = c("CD4 T","Treg", "Mono", "NK", "B", "DC"), - ligand_target_matrix = ligand_target_matrix, lr_network = lr_network, weighted_networks = weighted_networks, organism = "mouse") + ligand_target_matrix = ligand_target_matrix, + lr_network = lr_network, + weighted_networks = weighted_networks) ## [1] "Read in and process NicheNet's networks" ## [1] "Define expressed ligands and receptors in receiver and sender cells" ## [1] "Perform DE analysis in receiver cell" @@ -274,43 +288,39 @@ command: ``` r nichenet_output$ligand_activities -## # A tibble: 44 x 6 -## test_ligand auroc aupr pearson rank bona_fide_ligand -## -## 1 Ebi3 0.638 0.234 0.197 1 FALSE -## 2 Il15 0.582 0.163 0.0961 2 TRUE -## 3 Crlf2 0.549 0.163 0.0758 3 FALSE -## 4 App 0.499 0.141 0.0655 4 TRUE -## 5 Tgfb1 0.494 0.140 0.0558 5 TRUE -## 6 Ptprc 0.536 0.149 0.0554 6 TRUE -## 7 H2-M3 0.525 0.157 0.0528 7 TRUE -## 8 Icam1 0.543 0.142 0.0486 8 TRUE -## 9 Cxcl10 0.531 0.141 0.0408 9 TRUE -## 10 Adam17 0.517 0.137 0.0359 10 TRUE -## # ... with 34 more rows +## # A tibble: 73 × 6 +## test_ligand auroc aupr aupr_corrected pearson rank +## +## 1 Ebi3 0.663 0.390 0.244 0.301 1 +## 2 Ptprc 0.642 0.310 0.165 0.167 2 +## 3 H2-M3 0.608 0.292 0.146 0.179 3 +## 4 H2-M2 0.611 0.279 0.133 0.153 5 +## 5 H2-T10 0.611 0.279 0.133 0.153 5 +## 6 H2-T22 0.611 0.279 0.133 0.153 5 +## 7 H2-T23 0.611 0.278 0.132 0.153 7 +## 8 H2-K1 0.605 0.268 0.122 0.142 8 +## 9 H2-Q4 0.605 0.268 0.122 0.141 10 +## 10 H2-Q6 0.605 0.268 0.122 0.141 10 +## # … with 63 more rows ``` The different ligand activity measures (auroc, aupr, pearson correlation coefficient) are a measure for how well a ligand can predict the observed differentially expressed genes compared to the background of -expressed genes. In our validation study, we showed that the pearson -correlation coefficient between a ligand’s target predictions and the -observed transcriptional response was the most informative measure to -define ligand activity. Therefore, NicheNet ranks the ligands based on -their pearson correlation coefficient. This allows us to prioritize -ligands inducing the antiviral response in CD8 T cells. - -The column ‘bona\_fide\_ligand’ indicates whether the ligand is part of -ligand-receptor interactions that are documented in public databases -(‘bona\_fide\_ligand = TRUE’) and not of ligand-receptor interactions -that we predicted based on annotation as ligand/receptor and -protein-protein interaction databases (‘bona\_fide\_ligand = FALSE’). +expressed genes. In our validation study, we showed that the area under +the precision-recall curve (AUPR) between a ligand’s target predictions +and the observed transcriptional response was the most informative +measure to define ligand activity (this was the Pearson correlation for +v1). Therefore, NicheNet ranks the ligands based on their AUPR. This +allows us to prioritize ligands inducing the antiviral response in CD8 T +cells. -To get a list of the 20 top-ranked ligands: run the following command +To get a list of the 30 top-ranked ligands: run the following command ``` r nichenet_output$top_ligands -## [1] "Ebi3" "Il15" "Crlf2" "App" "Tgfb1" "Ptprc" "H2-M3" "Icam1" "Cxcl10" "Adam17" "Cxcl11" "Cxcl9" "H2-T23" "Sema4d" "Ccl5" "C3" "Cxcl16" "Itgb1" "Anxa1" "Sell" +## [1] "Ebi3" "Ptprc" "H2-M3" "H2-M2" "H2-T10" "H2-T22" "H2-T23" "H2-K1" "H2-Q4" "H2-Q6" "H2-Q7" "H2-D1" "Sirpa" "Cd48" "Tgfb1" "Ccl22" "App" "Selplg" "Cxcl10" "Btla" "Adam17" "Icam1" "Cxcl11" +## [24] "Tgm2" "B2m" "Cxcl9" "Cd72" "Hp" "Itgb2" "Vcan" ``` These ligands are expressed by one or more of the input sender cells. To @@ -372,44 +382,46 @@ visualization in other ways or output to a csv file). ``` r nichenet_output$ligand_target_matrix %>% .[1:10,1:6] -## Cd274 Cd53 Ddit4 Id3 Ifit3 Irf1 -## Sell 0.000000000 0 0.001290863 0.001222706 0 0.001095100 -## Itgb1 0.000000000 0 0.001162142 0.001214922 0 0.001069406 -## C3 0.000000000 0 0.001105490 0.000000000 0 0.000000000 -## Ccl5 0.000000000 0 0.001281096 0.001228147 0 0.001155790 -## Sema4d 0.000000000 0 0.001103465 0.001179496 0 0.000000000 -## H2.T23 0.000000000 0 0.001112018 0.001110184 0 0.000000000 -## Adam17 0.002280965 0 0.001760241 0.001546186 0 0.001637201 -## Cxcl10 0.000000000 0 0.001354334 0.001372142 0 0.001393116 -## Icam1 0.000000000 0 0.001325195 0.001314746 0 0.001375860 -## H2.M3 0.000000000 0 0.001436893 0.001506164 0 0.001329158 +## Bst2 Cd274 Cxcl10 Cxcr4 Ddit4 Ddx58 +## Vcan 0.000000000 0.00000000 0.00000000 0.007730215 0.008496498 0.00000000 +## Itgb2 0.000000000 0.00000000 0.00000000 0.009843522 0.009705963 0.00000000 +## Hp 0.000000000 0.00000000 0.00000000 0.008886796 0.010263817 0.00000000 +## Cd72 0.000000000 0.00000000 0.00000000 0.008311072 0.009318998 0.00000000 +## B2m 0.000000000 0.00000000 0.00000000 0.009044523 0.010623390 0.00000000 +## Tgm2 0.010030030 0.00000000 0.04939643 0.014778849 0.015946489 0.04583594 +## Cxcl11 0.000000000 0.00000000 0.00000000 0.007855882 0.000000000 0.00000000 +## Icam1 0.008581207 0.00000000 0.01196470 0.012707198 0.014780600 0.00000000 +## Adam17 0.008167279 0.06549177 0.01109420 0.071842451 0.014236968 0.00000000 +## Btla 0.000000000 0.00000000 0.00000000 0.000000000 0.008851392 0.00000000 ``` ``` r nichenet_output$ligand_target_df # weight column = regulatory potential -## # A tibble: 155 x 3 +## # A tibble: 510 × 3 ## ligand target weight ## -## 1 Ebi3 Cd274 0.00325 -## 2 Ebi3 Cd53 0.00321 -## 3 Ebi3 Ddit4 0.00335 -## 4 Ebi3 Id3 0.00373 -## 5 Ebi3 Ifit3 0.00320 -## 6 Ebi3 Irf1 0.00692 -## 7 Ebi3 Irf7 0.00312 -## 8 Ebi3 Irf9 0.00543 -## 9 Ebi3 Parp14 0.00336 -## 10 Ebi3 Pdcd4 0.00335 -## # ... with 145 more rows -``` - -To get a list of the top-predicted target genes of the 20 top-ranked +## 1 Ebi3 Bst2 0.0500 +## 2 Ebi3 Cd274 0.0504 +## 3 Ebi3 Cxcl10 0.0570 +## 4 Ebi3 Cxcr4 0.0430 +## 5 Ebi3 Ddit4 0.0485 +## 6 Ebi3 Ddx58 0.0402 +## 7 Ebi3 Ddx60 0.0488 +## 8 Ebi3 Dhx58 0.0406 +## 9 Ebi3 Dtx3l 0.0405 +## 10 Ebi3 Eif2ak2 0.0400 +## # … with 500 more rows +``` + +To get a list of the top-predicted target genes of the 30 top-ranked ligands: run the following command ``` r nichenet_output$top_targets -## [1] "Cd274" "Cd53" "Ddit4" "Id3" "Ifit3" "Irf1" "Irf7" "Irf9" "Parp14" "Pdcd4" "Pml" "Psmb9" "Rnf213" "Stat1" "Stat2" "Tap1" "Ubc" "Zbp1" "Cd69" "Gbp4" "Basp1" "Casp8" -## [23] "Cxcl10" "Nlrc5" "Vim" "Actb" "Ifih1" "Myh9" "B2m" "H2-T23" "Rpl13a" "Cxcr4" +## [1] "Bst2" "Cd274" "Cxcl10" "Cxcr4" "Ddit4" "Ddx58" "Ddx60" "Dhx58" "Dtx3l" "Eif2ak2" "Gbp7" "H2-D1" "H2-K1" "H2-M3" "H2-Q6" "H2-Q7" "Ifi35" "Ifit1bl1" +## [19] "Ifit3" "Ifit3b" "Irf1" "Irf7" "Irf9" "Isg15" "Lcp1" "Lgals3bp" "Mx1" "Mx2" "Nampt" "Nmi" "Oas1a" "Oas2" "Oas3" "Parp14" "Parp9" "Pml" +## [37] "Psmb9" "Rsad2" "Stat1" "Stat2" "Tap1" "Xaf1" "Zbp1" "Cd69" "H3f3b" "Id3" "Ifi44" "Ifih1" "H2-T10" "H2-T22" "H2-T23" "Vim" "Ifit2" "Isg20" +## [55] "Gbp3" "Hspa5" "Ifit1" "Nt5c3" "Igfbp4" "Gbp2" "Ifi203" "Ifi206" "Ifi208" "Ifi209" "Ifi213" "Mndal" "Ube2l6" ``` You can visualize the expression of these as well. Because we only focus @@ -475,35 +487,35 @@ visualization in other ways or output to a csv file). ``` r nichenet_output$ligand_receptor_matrix %>% .[1:10,1:6] -## Cxcl9 Cxcl16 Cxcl11 Ccl5 Cxcl10 App -## Cxcr6 0.3629049 0.6598705 0.2255185 0.2627207 0.4001071 0.2255185 -## Ccr7 0.2217117 0.2217117 0.3567789 0.3933531 0.2582858 0.2217117 -## Ccr9 0.1357118 0.2374693 0.2374693 0.1357118 0.1357118 0.1357118 -## Gpr18 0.1374828 0.1374828 0.1374828 0.1374828 0.1374828 0.0000000 -## S1pr1 0.1263826 0.1263826 0.1263826 0.1263826 0.1263826 0.0000000 -## Itga4 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 -## Cd47 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 -## Ptk2b 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 -## Il2rb 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 -## Il2rg 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 +## H2.T23 H2.T22 H2.T10 H2.Q7 H2.Q6 H2.Q4 +## Itgb2 0 0 0 0 0 0 +## Spn 0 0 0 0 0 0 +## Msn 0 0 0 0 0 0 +## Itgal 0 0 0 0 0 0 +## Ezr 0 0 0 0 0 0 +## Il2rg 0 0 0 0 0 0 +## Sell 0 0 0 0 0 0 +## Itga4 0 0 0 0 0 0 +## Selplg 0 0 0 0 0 0 +## Tap1 0 0 0 0 0 0 ``` ``` r nichenet_output$ligand_receptor_df # weight column accords to number of data sources that document this interaction -## # A tibble: 61 x 3 +## # A tibble: 53 × 3 ## ligand receptor weight ## -## 1 Adam17 Notch1 0.482 -## 2 Anxa1 Ccr7 0.222 -## 3 Anxa1 Ccr9 0.237 -## 4 Anxa1 Cxcr6 0.226 -## 5 Anxa1 Itga4 0.201 -## 6 App Ccr7 0.222 -## 7 App Ccr9 0.136 -## 8 App Cxcr6 0.226 -## 9 App Notch1 0.354 -## 10 App Tgfbr2 0.441 -## # ... with 51 more rows +## 1 Adam17 Notch1 1.05 +## 2 App Cd74 0.670 +## 3 B2m Klrd1 0.733 +## 4 B2m Tap1 0.782 +## 5 B2m Tap2 0.834 +## 6 Btla Cd247 0.333 +## 7 Ccl22 Ccr7 0.679 +## 8 Ccl22 Dpp4 0.717 +## 9 Cd48 Cd2 0.964 +## 10 Cd72 Cd5 0.786 +## # … with 43 more rows ``` To get a list of the receptors of the 20 top-ranked ligands: run the @@ -511,8 +523,8 @@ following command ``` r nichenet_output$top_receptors -## [1] "Notch1" "Ccr7" "Ccr9" "Cxcr6" "Itga4" "Tgfbr2" "Itgb2" "Gpr18" "S1pr1" "Il7r" "Il27ra" "Cd8a" "Klrd1" "Il2rg" "Itgal" "Spn" "Il2rb" "Cd47" "Ptk2b" "Cd2" "Cd28" "Selplg" -## [23] "Ptprc" +## [1] "Notch1" "Cd74" "Klrd1" "Tap1" "Tap2" "Cd247" "Ccr7" "Dpp4" "Cd2" "Cd5" "Il27ra" "Cd8a" "Itgb2" "Ezr" "Il2rg" "Itgal" "Msn" "Spn" "Cd82" "Thy1" "Sell" "Cd47" "Cd69" +## [24] "Tgfbr2" "Itga4" "Selplg" ``` You can visualize the expression of these as well. Because we only focus @@ -525,64 +537,34 @@ DotPlot(seuratObj %>% subset(idents = "CD8 T"), features = nichenet_output$top_r ![](seurat_wrapper_files/figure-gfm/unnamed-chunk-23-1.png) -You also can just show ‘bona fide’ ligand-receptor links that are -described in the literature and not predicted based on protein-protein -interactions: - -``` r -nichenet_output$ligand_receptor_heatmap_bonafide -``` - -![](seurat_wrapper_files/figure-gfm/unnamed-chunk-24-1.png) - -``` r -nichenet_output$ligand_receptor_matrix_bonafide -## H2.M3 H2.T23 C3 Icam1 Tgfb1 Cxcl16 Il15 -## Il2rb 0.0000000 0.00000000 0.0000000 0.00000000 0.0000000 0.0000000 0.8714269 -## Il2rg 0.0000000 0.00000000 0.0000000 0.00000000 0.0000000 0.0000000 0.8587859 -## Itgal 0.0000000 0.00000000 0.0000000 0.06542904 0.0000000 0.0000000 0.0000000 -## Itgb2 0.0000000 0.00000000 0.2916032 0.06113009 0.0000000 0.0000000 0.0000000 -## Tgfbr2 0.0000000 0.00000000 0.0000000 0.00000000 0.7665905 0.0000000 0.0000000 -## Cxcr6 0.0000000 0.00000000 0.0000000 0.00000000 0.0000000 0.6598705 0.0000000 -## Klrd1 0.8334165 0.05478448 0.0000000 0.00000000 0.0000000 0.0000000 0.0000000 -nichenet_output$ligand_receptor_df_bonafide -## # A tibble: 9 x 3 -## ligand receptor weight -## -## 1 C3 Itgb2 0.292 -## 2 Cxcl16 Cxcr6 0.660 -## 3 H2-T23 Klrd1 0.0548 -## 4 H2-M3 Klrd1 0.833 -## 5 Icam1 Itgal 0.0654 -## 6 Icam1 Itgb2 0.0611 -## 7 Il15 Il2rb 0.871 -## 8 Il15 Il2rg 0.859 -## 9 Tgfb1 Tgfbr2 0.767 -``` - If you are interested in checking which geneset (and background set of genes) was used during the ligand activity analysis: ``` r nichenet_output$geneset_oi -## [1] "Irf7" "Stat1" "Ifit3" "Ifit1" "Bst2" "B2m" "Rnf213" "Plac8" "Isg15" "Shisa5" "Zbp1" "Isg20" -## [13] "Samhd1" "Usp18" "H2-T23" "Gbp2" "Ifi203" "Tmsb4x" "Rsad2" "Ly6e" "Rtp4" "Ifit2" "Xaf1" "Smchd1" -## [25] "Daxx" "Alb" "Samd9l" "Actb" "Parp9" "Gbp4" "Lgals3bp" "Mx1" "Gbp7" "Cmpk2" "Dtx3l" "Slfn5" -## [37] "Oasl1" "Herc6" "Ifih1" "Rpsa" "P2ry13" "Irgm2" "Tapbp" "Rps8" "Stat2" "Ifi44" "Rpl8" "Psmb8" -## [49] "Igfbp4" "Ddx58" "Rac2" "Trafd1" "Pml" "Oas2" "Psme1" "Apoe" "Basp1" "Rps27a" "Znfx1" "Rpl13" -## [61] "Oas3" "Nt5c3" "Rnf114" "Tap1" "Rps28" "Rplp0" "Ddx60" "Vim" "Ifi35" "Itm2b" "Ctss" "Pabpc1" -## [73] "Parp14" "Hspa8" "Tor3a" "Rpl23" "Tmbim6" "Thy1" "Ncoa7" "Dhx58" "Rps10" "Rps19" "Psmb9" "Il2rg" -## [85] "Etnk1" "Irf9" "1600014C10Rik" "Parp12" "Eif2ak2" "Eef1b2" "Eef2" "Npc2" "Rps2" "Rps3" "Sp110" "Ube2l6" -## [97] "Nmi" "Uba7" "Psmb10" "Cxcl10" "Rpl13a" "Nhp2" "Tbrg1" "Usp25" "Tor1aip2" "Adar" "Gzma" "Cd53" -## [109] "Hspa5" "Cfl1" "Crip1" "Slco3a1" "Tlr7" "Trim21" "Rpl10" "Mycbp2" "Rps16" "Nlrc5" "Rplp2" "Acadl" -## [121] "Trim12c" "Rps4x" "Irf1" "Psma2" "Nme2" "Zcchc11" "Snord12" "Phip" "Ifitm3" "Sp140" "Dusp2" "Mrpl30" -## [133] "H2-M3" "Gbp3" "Dtx1" "Eef1g" "Rbl1" "Xpo1" "Gm9844" "Rpl35" "Rps26" "Cxcr4" "Eif3m" "Treml2" -## [145] "Rpl35a" "Pdcd4" "Arrb2" "Ubc" "Clic4" "Rpl10a" "Lcp1" "Cd274" "Ddit4" "Cnn2" "Nampt" "Ascc3" -## [157] "Cd47" "Snord49b" "D17Wsu92e" "Fam26f" "Hcst" "Myh9" "Rps27" "Mov10" "Arf4" "Arhgdib" "Ppib" "Trim25" -## [169] "Tspo" "Id3" "Snord35a" "Rnf8" "Casp8" "Ptpn7" "Itk" "Cd69" "Nop10" "Anxa6" "Hk1" "Prkcb" -## [181] "Iqgap1" "Keap1" "Rpl7" "Parp10" +## [1] "Ifi27l2b" "Irf7" "Ly6a" "Stat1" "Ly6c2" "Ifit3" "Ifit1" "Ly6c1" "Bst2" "B2m" "Rnf213" "Ifit1bl1" +## [13] "Plac8" "Slfn1" "Ifi209" "Isg15" "Igtp" "Ifi206" "Shisa5" "Ms4a4c" "H2-K1" "Zbp1" "Oasl2" "Isg20" +## [25] "Samhd1" "Ifi208" "Ms4a6b" "Trim30a" "Usp18" "Mndal" "H2-T23" "Slfn8" "Gbp2" "Ifi203" "Iigp1" "Tmsb4x" +## [37] "H2-T22" "Rsad2" "Ly6e" "Rtp4" "Ifit3b" "Zfas1" "Ifit2" "Phf11b" "Xaf1" "Smchd1" "Daxx" "Alb" +## [49] "Samd9l" "Actb" "Parp9" "Gbp4" "Lgals3bp" "Mx1" "Ifi213" "Irgm1" "2410006H16Rik" "Gbp7" "Cmpk2" "Dtx3l" +## [61] "Slfn5" "H2-D1" "Oasl1" "Herc6" "Ifih1" "Rpsa" "P2ry13" "Irgm2" "Tapbp" "Rps8" "Stat2" "Ifi44" +## [73] "Phf11c" "Rpl8" "Psmb8" "Gm12250" "Igfbp4" "Rplp2-ps1" "Ddx58" "Rac2" "Trafd1" "Sp100" "Gbp9" "Pml" +## [85] "Oas2" "Slfn2" "Psme1" "Apoe" "Gas5" "H2-Q7" "Basp1" "Ms4a4b" "Rps27a" "Cd52" "Znfx1" "Rpl13" +## [97] "Oas3" "Nt5c3" "Rnf114" "Tap1" "Rps28" "Oas1a" "Rplp0" "Ddx60" "Vim" "Gbp6" "Ifi35" "Itm2b" +## [109] "Ctss" "Tgtp1" "Pabpc1" "H2-Q6" "Parp14" "Hspa8" "Tor3a" "Rpl23" "Mx2" "Tmbim6" "Thy1" "Ncoa7" +## [121] "Dhx58" "Rps10" "Rps19" "Psmb9" "Il2rg" "Etnk1" "Irf9" "Rps3a1" "Gbp10" "1600014C10Rik" "Parp12" "Trim30d" +## [133] "Eif2ak2" "Eef1b2" "Eef2" "Npc2" "Rps2" "Rps3" "Sp110" "Ube2l6" "Nmi" "Uba7" "Psmb10" "Cxcl10" +## [145] "Rpl13a" "Trim30c" "Nhp2" "Tbrg1" "Jaml" "Usp25" "Tor1aip2" "Adar" "Gzma" "Gm2000" "Rps18-ps5" "Cd53" +## [157] "Phf11" "Hspa5" "Cfl1" "Crip1" "Slco3a1" "Tlr7" "Trim21" "Gbp8" "Rpl10" "Mycbp2" "Rps16" "Nlrc5" +## [169] "Rplp2" "Acadl" "Trim12c" "Rps4x" "Irf1" "Psma2" "Nme2" "Tut4" "Apobec3" "Snord12" "Phip" "Ifitm3" +## [181] "Sp140" "Dusp2" "Mrpl30" "Malat1" "H2-M3" "Gbp3" "Tmsb10" "Dtx1" "Eef1g" "Rbl1" "Epb41l4aos" "Xpo1" +## [193] "Rgcc" "Gm9844" "Rpl35" "Rps26" "Cxcr4" "Eif3m" "Treml2" "Rpl35a" "Pdcd4" "Arrb2" "Ubc" "Clic4" +## [205] "H2-T10" "Rpl10a" "Lcp1" "Cd274" "Ddit4" "Cnn2" "Nampt" "Ascc3" "Cd47" "Snord49b" "Ilrun" "Calhm6" +## [217] "Psme2b" "Hcst" "Myh9" "Rps27" "Mov10" "Gm15772" "Arf4" "Arhgdib" "Ppib" "Ubb" "Trim25" "Tspo" +## [229] "Id3" "Snord35a" "Rnf8" "Casp8" "Ptpn7" "Itk" "Rps27rt" "Cd69" "H3f3b" "Nop10" "Anxa6" "Hk1" +## [241] "Prkcb" "Iqgap1" "Keap1" "Rpl7" "Parp10" nichenet_output$background_expressed_genes %>% length() -## [1] 1487 +## [1] 1662 ``` ### Rerun the NicheNet analysis with different sender cell definition @@ -592,7 +574,7 @@ you are only interested in doing the analyis for one sender cell type, such as dendritic cells in this case. ``` r -nichenet_output = nichenet_seuratobj_aggregate(seurat_obj = seuratObj, receiver = "CD8 T", condition_colname = "aggregate", condition_oi = "LCMV", condition_reference = "SS", sender = "DC", ligand_target_matrix = ligand_target_matrix, lr_network = lr_network, weighted_networks = weighted_networks, organism = "mouse") +nichenet_output = nichenet_seuratobj_aggregate(seurat_obj = seuratObj, receiver = "CD8 T", condition_colname = "aggregate", condition_oi = "LCMV", condition_reference = "SS", sender = "DC", ligand_target_matrix = ligand_target_matrix, lr_network = lr_network, weighted_networks = weighted_networks) ## [1] "Read in and process NicheNet's networks" ## [1] "Define expressed ligands and receptors in receiver and sender cells" ## [1] "Perform DE analysis in receiver cell" @@ -604,7 +586,7 @@ nichenet_output = nichenet_seuratobj_aggregate(seurat_obj = seuratObj, receiver nichenet_output$ligand_activity_target_heatmap ``` -![](seurat_wrapper_files/figure-gfm/unnamed-chunk-26-1.png) +![](seurat_wrapper_files/figure-gfm/unnamed-chunk-25-1.png) Instead of focusing on one or multiple predefined sender cell types, it is also possible that you want to consider all cell types present as @@ -612,7 +594,7 @@ possible sender cell. This also includes the receiver cell type, making that you can look at autocrine signaling as well. ``` r -nichenet_output = nichenet_seuratobj_aggregate(seurat_obj = seuratObj, receiver = "CD8 T", condition_colname = "aggregate", condition_oi = "LCMV", condition_reference = "SS", sender = "all", ligand_target_matrix = ligand_target_matrix, lr_network = lr_network, weighted_networks = weighted_networks, organism = "mouse") +nichenet_output = nichenet_seuratobj_aggregate(seurat_obj = seuratObj, receiver = "CD8 T", condition_colname = "aggregate", condition_oi = "LCMV", condition_reference = "SS", sender = "all", ligand_target_matrix = ligand_target_matrix, lr_network = lr_network, weighted_networks = weighted_networks) ## [1] "Read in and process NicheNet's networks" ## [1] "Define expressed ligands and receptors in receiver and sender cells" ## [1] "Perform DE analysis in receiver cell" @@ -624,7 +606,7 @@ nichenet_output = nichenet_seuratobj_aggregate(seurat_obj = seuratObj, receiver nichenet_output$ligand_activity_target_heatmap ``` -![](seurat_wrapper_files/figure-gfm/unnamed-chunk-27-1.png) +![](seurat_wrapper_files/figure-gfm/unnamed-chunk-26-1.png) In some cases, it could be possible that you don’t have data of potential sender cells. If you still want to predict possible upstream @@ -634,7 +616,7 @@ will consider all possible ligands in the NicheNet databases for which a receptor is expressed by the receiver cell of interest. ``` r -nichenet_output = nichenet_seuratobj_aggregate(seurat_obj = seuratObj, receiver = "CD8 T", condition_colname = "aggregate", condition_oi = "LCMV", condition_reference = "SS", sender = "undefined", ligand_target_matrix = ligand_target_matrix, lr_network = lr_network, weighted_networks = weighted_networks, organism = "mouse") +nichenet_output = nichenet_seuratobj_aggregate(seurat_obj = seuratObj, receiver = "CD8 T", condition_colname = "aggregate", condition_oi = "LCMV", condition_reference = "SS", sender = "undefined", ligand_target_matrix = ligand_target_matrix, lr_network = lr_network, weighted_networks = weighted_networks) ## [1] "Read in and process NicheNet's networks" ## [1] "Define expressed ligands and receptors in receiver and sender cells" ## [1] "Perform DE analysis in receiver cell" @@ -645,7 +627,7 @@ nichenet_output = nichenet_seuratobj_aggregate(seurat_obj = seuratObj, receiver nichenet_output$ligand_activity_target_heatmap ``` -![](seurat_wrapper_files/figure-gfm/unnamed-chunk-28-1.png) +![](seurat_wrapper_files/figure-gfm/unnamed-chunk-27-1.png) As you can see in this analysis result, many genes DE in CD8 T cells after LCMV infection are strongly predicted type I interferon targets. @@ -669,7 +651,7 @@ steady-state and LCMV infection in both CD8 and CD4 T cells. receiver_celltypes_oi = c("CD4 T", "CD8 T") # receiver_celltypes_oi = seuratObj %>% Idents() %>% unique() # for all celltypes in the dataset: use only when this would make sense biologically -nichenet_output = receiver_celltypes_oi %>% lapply(nichenet_seuratobj_aggregate, seurat_obj = seuratObj, condition_colname = "aggregate", condition_oi = "LCMV", condition_reference = "SS", sender = c("CD4 T","Treg", "Mono", "NK", "B", "DC"), ligand_target_matrix = ligand_target_matrix, lr_network = lr_network, weighted_networks = weighted_networks, organism = "mouse") +nichenet_output = receiver_celltypes_oi %>% lapply(nichenet_seuratobj_aggregate, seurat_obj = seuratObj, condition_colname = "aggregate", condition_oi = "LCMV", condition_reference = "SS", sender = c("CD4 T","Treg", "Mono", "NK", "B", "DC"), ligand_target_matrix = ligand_target_matrix, lr_network = lr_network, weighted_networks = weighted_networks) ## [1] "Read in and process NicheNet's networks" ## [1] "Define expressed ligands and receptors in receiver and sender cells" ## [1] "Perform DE analysis in receiver cell" @@ -696,7 +678,8 @@ common_ligands = intersect(nichenet_output$`CD4 T`$top_ligands, nichenet_output$ print("common ligands are: ") ## [1] "common ligands are: " print(common_ligands) -## [1] "Ebi3" "Il15" "Crlf2" "H2-M3" "App" "Ptprc" "Icam1" "Ccl5" "Cxcl10" "Tgfb1" "Cxcl11" "Sema4d" "Cxcl9" "H2-T23" "Cxcl16" "C3" "Itgb1" +## [1] "Ebi3" "Ptprc" "H2-M3" "H2-M2" "H2-T10" "H2-T22" "H2-T23" "Sirpa" "H2-K1" "H2-Q4" "H2-Q6" "H2-Q7" "H2-D1" "Ccl22" "Cd48" "App" "Tgfb1" "Selplg" "Icam1" "Btla" "Cd72" "B2m" "Hp" +## [24] "Itgb2" cd4_ligands = nichenet_output$`CD4 T`$top_ligands %>% setdiff(nichenet_output$`CD8 T`$top_ligands) cd8_ligands = nichenet_output$`CD8 T`$top_ligands %>% setdiff(nichenet_output$`CD4 T`$top_ligands) @@ -704,12 +687,12 @@ cd8_ligands = nichenet_output$`CD8 T`$top_ligands %>% setdiff(nichenet_output$`C print("Ligands specifically regulating DE in CD4T: ") ## [1] "Ligands specifically regulating DE in CD4T: " print(cd4_ligands) -## [1] "Cd274" "Hmgb1" "Cd28" +## [1] "H2-Eb1" "H2-Oa" "Il16" "Fn1" "H2-DMb1" "H2-DMb2" print("Ligands specifically regulating DE in CD8T: ") ## [1] "Ligands specifically regulating DE in CD8T: " print(cd8_ligands) -## [1] "Adam17" "Anxa1" "Sell" +## [1] "Cxcl10" "Adam17" "Cxcl11" "Tgm2" "Cxcl9" "Vcan" ``` ## NicheNet analysis on Seurat object: explain differential expression between two cell populations @@ -729,9 +712,9 @@ Therefore, we will now also demonstrate the use of another Seurat wrapper function that can be used in the case of explaining differential expression between cell populations. But keep in mind that the comparison that you make should be biologically relevant. It is possible -to use NicheNet to explain differential expression beween any two cell +to use NicheNet to explain differential expression between any two cell populations in your dataset, but in most cases, differential expression -between cell populations will be a result of cell-intrinisc properties +between cell populations will be a result of cell-intrinsic properties (i.e. different cell types have a different gene expression profile) and not of intercellular communication processes. In such a case, it does not make any sense to use NicheNet. @@ -761,7 +744,7 @@ nichenet_output = nichenet_seuratobj_cluster_de( seurat_obj = seuratObj, receiver_reference = "CD8 T_SS", receiver_affected = "CD8 T_LCMV", sender = c("DC_LCMV","Mono_LCMV"), - ligand_target_matrix = ligand_target_matrix, lr_network = lr_network, weighted_networks = weighted_networks, organism = "mouse") + ligand_target_matrix = ligand_target_matrix, lr_network = lr_network, weighted_networks = weighted_networks) ## [1] "Read in and process NicheNet's networks" ## [1] "Define expressed ligands and receptors in receiver and sender cells" ## [1] "Perform DE analysis between two receiver cell clusters" @@ -776,7 +759,7 @@ Check the top-ranked ligands and their target genes nichenet_output$ligand_activity_target_heatmap ``` -![](seurat_wrapper_files/figure-gfm/unnamed-chunk-33-1.png) +![](seurat_wrapper_files/figure-gfm/unnamed-chunk-32-1.png) Check the expression of the top-ranked ligands @@ -784,7 +767,7 @@ Check the expression of the top-ranked ligands DotPlot(seuratObj, features = nichenet_output$top_ligands %>% rev(), cols = "RdYlBu") + RotatedAxis() ``` -![](seurat_wrapper_files/figure-gfm/unnamed-chunk-34-1.png) +![](seurat_wrapper_files/figure-gfm/unnamed-chunk-33-1.png) It could be interested to check which top-ranked ligands are differentially expressed in monocytes after LCMV infection @@ -795,7 +778,7 @@ Mono_upregulated_ligands = FindMarkers(seuratObj, ident.1 = "Mono_LCMV", ident.2 print("Monocyte ligands upregulated after LCMV infection and explaining DE between CD8T-StSt and CD8T-LCMV are: ") ## [1] "Monocyte ligands upregulated after LCMV infection and explaining DE between CD8T-StSt and CD8T-LCMV are: " print(Mono_upregulated_ligands) -## [1] "Cxcl10" +## [1] "B2m" "H2-D1" "Cxcl10" ``` # Remarks diff --git a/vignettes/seurat_wrapper_circos.Rmd b/vignettes/seurat_wrapper_circos.Rmd index adb1431..125cbb1 100644 --- a/vignettes/seurat_wrapper_circos.Rmd +++ b/vignettes/seurat_wrapper_circos.Rmd @@ -2,7 +2,12 @@ title: "Seurat Wrapper + Circos visualization" author: "Robin Browaeys" date: "18-1-2021" -output: html_document +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Seurat Wrapper + Circos visualization} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +bibliography: library.bib --- @@ -26,7 +31,7 @@ As example expression data of interacting cells, we will use mouse NICHE-seq dat In this dataset, differential expression is observed between CD8 T cells in steady-state and CD8 T cells after LCMV infection. NicheNet can be applied to look at how several immune cell populations in the lymph node (i.e., monocytes, dendritic cells, NK cells, B cells, CD4 T cells) can regulate and induce these observed gene expression changes. NicheNet will specifically prioritize ligands from these immune cells and their target genes that change in expression upon LCMV infection. -The used NicheNet networks, ligand-target matrix and example expression data of interacting cells can be downloaded from Zenodo. The NicheNet networks and ligand-target matrix at [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3260758.svg)](https://doi.org/10.5281/zenodo.3260758) and the Seurat object of the processed NICHE-seq single-cell data at [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3531889.svg)](https://doi.org/10.5281/zenodo.3531889). +The used [ligand-target matrix](https://doi.org/10.5281/zenodo.7074290) and the [Seurat object of the processed NICHE-seq single-cell data](https://doi.org/10.5281/zenodo.3531889) can be downloaded from Zenodo. # Prepare NicheNet analysis @@ -47,18 +52,18 @@ If you would use and load other packages, we recommend to load these 3 packages ### Read in NicheNet's ligand-target prior model, ligand-receptor network and weighted integrated networks: ```{r} -ligand_target_matrix = readRDS(url("https://zenodo.org/record/3260758/files/ligand_target_matrix.rds")) +ligand_target_matrix = readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final_mouse.rds")) ligand_target_matrix[1:5,1:5] # target genes in rows, ligands in columns -lr_network = readRDS(url("https://zenodo.org/record/3260758/files/lr_network.rds")) +lr_network = readRDS(url("https://zenodo.org/record/7074291/files/lr_network_mouse_21122021.rds")) head(lr_network) -weighted_networks = readRDS(url("https://zenodo.org/record/3260758/files/weighted_networks.rds")) +weighted_networks = readRDS(url("https://zenodo.org/record/7074291/files/weighted_networks_nsga2r_final_mouse.rds")) head(weighted_networks$lr_sig) # interactions and their weights in the ligand-receptor + signaling network head(weighted_networks$gr) # interactions and their weights in the gene regulatory network ``` -### Read in the expression data of interacting cells: +### Read in the expression data of interacting cells ```{r} @@ -86,7 +91,7 @@ In this case study, the receiver cell population is the 'CD8 T' cell population, The gene set of interest are the genes differentially expressed in CD8 T cells after LCMV infection. The condition of interest is thus 'LCMV', whereas the reference/steady-state condition is 'SS'. The notion of conditions can be extracted from the metadata column 'aggregate', the method to calculate the differential expression is the standard Seurat Wilcoxon test. -The number of top-ranked ligands that are further used to predict active target genes and construct an active ligand-receptor network is 20 by default. +The number of top-ranked ligands that are further used to predict active target genes and construct an active ligand-receptor network is 30 by default, but we will only choose the top 20 to not overcrowd the circos plot. To perform the NicheNet analysis with these specifications, run the following: @@ -100,7 +105,8 @@ nichenet_output = nichenet_seuratobj_aggregate( receiver = "CD8 T", condition_colname = "aggregate", condition_oi = "LCMV", condition_reference = "SS", sender = sender_celltypes, - ligand_target_matrix = ligand_target_matrix, lr_network = lr_network, weighted_networks = weighted_networks, organism = "mouse") + ligand_target_matrix = ligand_target_matrix, lr_network = lr_network, weighted_networks = weighted_networks, + top_n_ligands = 20) ``` ### Interpret the NicheNet analysis output @@ -421,3 +427,5 @@ circos.track(track.index = 1, panel.fun = function(x, y) { circos.clear() dev.off() ``` + +### References diff --git a/vignettes/seurat_wrapper_circos.md b/vignettes/seurat_wrapper_circos.md index 5329f5d..9f91351 100644 --- a/vignettes/seurat_wrapper_circos.md +++ b/vignettes/seurat_wrapper_circos.md @@ -29,9 +29,9 @@ could lead to wrong interpretation of the results. As example expression data of interacting cells, we will use mouse NICHE-seq data from Medaglia et al. to explore intercellular communication in the T cell area in the inguinal lymph node before and -72 hours after lymphocytic choriomeningitis virus (LCMV) infection \[See -@medaglia\_spatial\_2017\]. We will NicheNet to explore immune cell -crosstalk in response to this LCMV infection. +72 hours after lymphocytic choriomeningitis virus (LCMV) infection (See +Medaglia et al. 2017). We will NicheNet to explore immune cell crosstalk +in response to this LCMV infection. In this dataset, differential expression is observed between CD8 T cells in steady-state and CD8 T cells after LCMV infection. NicheNet can be @@ -41,12 +41,10 @@ regulate and induce these observed gene expression changes. NicheNet will specifically prioritize ligands from these immune cells and their target genes that change in expression upon LCMV infection. -The used NicheNet networks, ligand-target matrix and example expression -data of interacting cells can be downloaded from Zenodo. The NicheNet -networks and ligand-target matrix at -[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3260758.svg)](https://doi.org/10.5281/zenodo.3260758) -and the Seurat object of the processed NICHE-seq single-cell data at -[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3531889.svg)](https://doi.org/10.5281/zenodo.3531889). +The used [ligand-target matrix](https://doi.org/10.5281/zenodo.7074290) +and the [Seurat object of the processed NICHE-seq single-cell +data](https://doi.org/10.5281/zenodo.3531889) can be downloaded from +Zenodo. # Prepare NicheNet analysis @@ -67,51 +65,51 @@ packages after the others. ### Read in NicheNet’s ligand-target prior model, ligand-receptor network and weighted integrated networks: ``` r -ligand_target_matrix = readRDS(url("https://zenodo.org/record/3260758/files/ligand_target_matrix.rds")) +ligand_target_matrix = readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final_mouse.rds")) ligand_target_matrix[1:5,1:5] # target genes in rows, ligands in columns -## CXCL1 CXCL2 CXCL3 CXCL5 PPBP -## A1BG 3.534343e-04 4.041324e-04 3.729920e-04 3.080640e-04 2.628388e-04 -## A1BG-AS1 1.650894e-04 1.509213e-04 1.583594e-04 1.317253e-04 1.231819e-04 -## A1CF 5.787175e-04 4.596295e-04 3.895907e-04 3.293275e-04 3.211944e-04 -## A2M 6.027058e-04 5.996617e-04 5.164365e-04 4.517236e-04 4.590521e-04 -## A2M-AS1 8.898724e-05 8.243341e-05 7.484018e-05 4.912514e-05 5.120439e-05 - -lr_network = readRDS(url("https://zenodo.org/record/3260758/files/lr_network.rds")) +## 2300002M23Rik 2610528A11Rik 9530003J23Rik a A2m +## 0610005C13Rik 0.000000e+00 0.000000e+00 1.311297e-05 0.000000e+00 1.390053e-05 +## 0610009B22Rik 0.000000e+00 0.000000e+00 1.269301e-05 0.000000e+00 1.345536e-05 +## 0610009L18Rik 8.872902e-05 4.977197e-05 2.581909e-04 7.570125e-05 9.802264e-05 +## 0610010F05Rik 2.194046e-03 1.111556e-03 3.142374e-03 1.631658e-03 2.585820e-03 +## 0610010K14Rik 2.271606e-03 9.360769e-04 3.546140e-03 1.697713e-03 2.632082e-03 + +lr_network = readRDS(url("https://zenodo.org/record/7074291/files/lr_network_mouse_21122021.rds")) head(lr_network) -## # A tibble: 6 x 4 -## from to source database -## -## 1 CXCL1 CXCR2 kegg_cytokines kegg -## 2 CXCL2 CXCR2 kegg_cytokines kegg -## 3 CXCL3 CXCR2 kegg_cytokines kegg -## 4 CXCL5 CXCR2 kegg_cytokines kegg -## 5 PPBP CXCR2 kegg_cytokines kegg -## 6 CXCL6 CXCR2 kegg_cytokines kegg - -weighted_networks = readRDS(url("https://zenodo.org/record/3260758/files/weighted_networks.rds")) +## # A tibble: 6 × 4 +## from to database source +## +## 1 2300002M23Rik Ddr1 omnipath omnipath +## 2 2610528A11Rik Gpr15 omnipath omnipath +## 3 9530003J23Rik Itgal omnipath omnipath +## 4 a Atrn omnipath omnipath +## 5 a F11r omnipath omnipath +## 6 a Mc1r omnipath omnipath + +weighted_networks = readRDS(url("https://zenodo.org/record/7074291/files/weighted_networks_nsga2r_final_mouse.rds")) head(weighted_networks$lr_sig) # interactions and their weights in the ligand-receptor + signaling network -## # A tibble: 6 x 3 -## from to weight -## -## 1 A1BG ABCC6 0.422 -## 2 A1BG ACE2 0.101 -## 3 A1BG ADAM10 0.0970 -## 4 A1BG AGO1 0.0525 -## 5 A1BG AKT1 0.0855 -## 6 A1BG ANXA7 0.457 +## # A tibble: 6 × 3 +## from to weight +## +## 1 0610010F05Rik App 0.110 +## 2 0610010F05Rik Cat 0.0673 +## 3 0610010F05Rik H1f2 0.0660 +## 4 0610010F05Rik Lrrc49 0.0829 +## 5 0610010F05Rik Nicn1 0.0864 +## 6 0610010F05Rik Srpk1 0.123 head(weighted_networks$gr) # interactions and their weights in the gene regulatory network -## # A tibble: 6 x 3 -## from to weight -## -## 1 A1BG A2M 0.0294 -## 2 AAAS GFAP 0.0290 -## 3 AADAC CYP3A4 0.0422 -## 4 AADAC IRF8 0.0275 -## 5 AATF ATM 0.0330 -## 6 AATF ATR 0.0355 +## # A tibble: 6 × 3 +## from to weight +## +## 1 0610010K14Rik 0610010K14Rik 0.121 +## 2 0610010K14Rik 2510039O18Rik 0.121 +## 3 0610010K14Rik 2610021A01Rik 0.0256 +## 4 0610010K14Rik 9130401M01Rik 0.0263 +## 5 0610010K14Rik Alg1 0.127 +## 6 0610010K14Rik Alox12 0.128 ``` -### Read in the expression data of interacting cells: +### Read in the expression data of interacting cells ``` r seuratObj = readRDS(url("https://zenodo.org/record/3531889/files/seuratObj.rds")) @@ -167,8 +165,9 @@ method to calculate the differential expression is the standard Seurat Wilcoxon test. The number of top-ranked ligands that are further used to predict active -target genes and construct an active ligand-receptor network is 20 by -default. +target genes and construct an active ligand-receptor network is 30 by +default, but we will only choose the top 20 to not overcrowd the circos +plot. To perform the NicheNet analysis with these specifications, run the following: @@ -183,13 +182,15 @@ nichenet_output = nichenet_seuratobj_aggregate( receiver = "CD8 T", condition_colname = "aggregate", condition_oi = "LCMV", condition_reference = "SS", sender = sender_celltypes, - ligand_target_matrix = ligand_target_matrix, lr_network = lr_network, weighted_networks = weighted_networks, organism = "mouse") + ligand_target_matrix = ligand_target_matrix, lr_network = lr_network, weighted_networks = weighted_networks, + top_n_ligands = 20) ## [1] "Read in and process NicheNet's networks" ## [1] "Define expressed ligands and receptors in receiver and sender cells" ## [1] "Perform DE analysis in receiver cell" ## [1] "Perform NicheNet ligand activity analysis" ## [1] "Infer active target genes of the prioritized ligands" ## [1] "Infer receptors of the prioritized ligands" +## [1] "Perform DE analysis in sender cells" ``` ### Interpret the NicheNet analysis output @@ -202,20 +203,20 @@ command: ``` r nichenet_output$ligand_activities -## # A tibble: 44 x 6 -## test_ligand auroc aupr pearson rank bona_fide_ligand -## -## 1 Ebi3 0.638 0.234 0.197 1 FALSE -## 2 Il15 0.582 0.163 0.0961 2 TRUE -## 3 Crlf2 0.549 0.163 0.0758 3 FALSE -## 4 App 0.499 0.141 0.0655 4 TRUE -## 5 Tgfb1 0.494 0.140 0.0558 5 TRUE -## 6 Ptprc 0.536 0.149 0.0554 6 TRUE -## 7 H2-M3 0.525 0.157 0.0528 7 TRUE -## 8 Icam1 0.543 0.142 0.0486 8 TRUE -## 9 Cxcl10 0.531 0.141 0.0408 9 TRUE -## 10 Adam17 0.517 0.137 0.0359 10 TRUE -## # ... with 34 more rows +## # A tibble: 70 × 6 +## test_ligand auroc aupr aupr_corrected pearson rank +## +## 1 Ebi3 0.658 0.381 0.235 0.293 1 +## 2 Ptprc 0.642 0.305 0.159 0.161 2 +## 3 H2-M3 0.610 0.287 0.142 0.181 3 +## 4 H2-M2 0.614 0.272 0.126 0.147 5 +## 5 H2-T10 0.614 0.272 0.126 0.147 5 +## 6 H2-T22 0.614 0.272 0.126 0.147 5 +## 7 H2-T23 0.614 0.271 0.126 0.147 7 +## 8 H2-K1 0.607 0.258 0.113 0.132 8 +## 9 H2-Q4 0.606 0.258 0.112 0.131 10 +## 10 H2-Q6 0.606 0.258 0.112 0.131 10 +## # … with 60 more rows ``` These ligands are expressed by one or more of the input sender cells. To @@ -300,14 +301,15 @@ names(sender_ligand_assignment) The top ligands seem to be most strongly expressed by B cells, NK cells, monocytes and DCs. We will know also look at which ligands are common -across multiple cell types (= those that are specific to > 1 cell -type, or those that were not assigned to a cell type in the previous -block of code) +across multiple cell types (= those that are specific to \> 1 cell type, +or those that were not assigned to a cell type in the previous block of +code) Determine now which prioritized ligands are expressed by CAFs and or endothelial cells ``` r + all_assigned_ligands = sender_ligand_assignment %>% lapply(function(x){names(x)}) %>% unlist() unique_ligands = all_assigned_ligands %>% table() %>% .[. == 1] %>% names() general_ligands = nichenet_output$top_ligands %>% setdiff(unique_ligands) @@ -598,3 +600,18 @@ dev.off() ## png ## 2 ``` +### References + +
+ +
+ +Medaglia, Chiara, Amir Giladi, Liat Stoler-Barak, Marco De Giovanni, +Tomer Meir Salame, Adi Biram, Eyal David, et al. 2017. “Spatial +Reconstruction of Immune Niches by Combining Photoactivatable Reporters +and scRNA-Seq.” *Science*, December, +eaao4277. . + +
+ +
diff --git a/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-10-1.png b/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-10-1.png index 4830699..e3f743f 100644 Binary files a/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-10-1.png and b/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-10-1.png differ diff --git a/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-10-2.png b/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-10-2.png index be1b60b..93b1dd9 100644 Binary files a/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-10-2.png and b/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-10-2.png differ diff --git a/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-10-3.png b/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-10-3.png index d8ec3cb..6d776ab 100644 Binary files a/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-10-3.png and b/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-10-3.png differ diff --git a/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-104-1.png b/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-104-1.png deleted file mode 100644 index f21a1e3..0000000 Binary files a/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-104-1.png and /dev/null differ diff --git a/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-105-1.png b/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-105-1.png deleted file mode 100644 index ad0a5d3..0000000 Binary files a/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-105-1.png and /dev/null differ diff --git a/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-11-1.png b/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-11-1.png index 5cdda36..4f3e89c 100644 Binary files a/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-11-1.png and b/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-11-1.png differ diff --git a/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-19-1.png b/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-19-1.png index c358d8c..a3ef603 100644 Binary files a/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-19-1.png and b/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-19-1.png differ diff --git a/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-20-1.png b/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-20-1.png index cf44500..5c17c48 100644 Binary files a/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-20-1.png and b/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-20-1.png differ diff --git a/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-26-1.png b/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-26-1.png index eb8682e..ed9b2ed 100644 Binary files a/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-26-1.png and b/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-26-1.png differ diff --git a/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-27-1.png b/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-27-1.png index 754601e..6c35b13 100644 Binary files a/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-27-1.png and b/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-27-1.png differ diff --git a/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-4-1.png b/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-4-1.png index f21a1e3..b5db526 100644 Binary files a/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-4-1.png and b/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-4-1.png differ diff --git a/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-5-1.png b/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-5-1.png index ad0a5d3..12ca0a9 100644 Binary files a/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-5-1.png and b/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-5-1.png differ diff --git a/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-8-1.png b/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-8-1.png index 1409bef..0d7cf9c 100644 Binary files a/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-8-1.png and b/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-8-1.png differ diff --git a/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-9-1.png b/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-9-1.png index 6357be7..e14ecfa 100644 Binary files a/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-9-1.png and b/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-9-1.png differ diff --git a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-10-1.png b/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-10-1.png index f82b1f7..76a806d 100644 Binary files a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-10-1.png and b/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-10-1.png differ diff --git a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-11-1.png b/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-11-1.png index 5cdda36..a4c9b67 100644 Binary files a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-11-1.png and b/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-11-1.png differ diff --git a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-11-2.png b/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-11-2.png deleted file mode 100644 index be1b60b..0000000 Binary files a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-11-2.png and /dev/null differ diff --git a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-11-3.png b/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-11-3.png deleted file mode 100644 index d8ec3cb..0000000 Binary files a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-11-3.png and /dev/null differ diff --git a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-12-1.png b/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-12-1.png index d36d0fc..3365c86 100644 Binary files a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-12-1.png and b/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-12-1.png differ diff --git a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-13-1.png b/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-13-1.png deleted file mode 100644 index d36d0fc..0000000 Binary files a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-13-1.png and /dev/null differ diff --git a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-16-1.png b/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-16-1.png index 6a4e684..1148682 100644 Binary files a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-16-1.png and b/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-16-1.png differ diff --git a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-17-1.png b/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-17-1.png index 44486e3..f1ce0c7 100644 Binary files a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-17-1.png and b/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-17-1.png differ diff --git a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-17-2.png b/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-17-2.png index e0f6862..8e029cf 100644 Binary files a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-17-2.png and b/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-17-2.png differ diff --git a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-17-3.png b/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-17-3.png index 0a9bc5b..bc2cc82 100644 Binary files a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-17-3.png and b/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-17-3.png differ diff --git a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-18-1.png b/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-18-1.png index 024b40d..5a5d770 100644 Binary files a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-18-1.png and b/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-18-1.png differ diff --git a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-18-2.png b/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-18-2.png deleted file mode 100644 index e0f6862..0000000 Binary files a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-18-2.png and /dev/null differ diff --git a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-18-3.png b/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-18-3.png deleted file mode 100644 index 0a9bc5b..0000000 Binary files a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-18-3.png and /dev/null differ diff --git a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-19-1.png b/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-19-1.png index 464b42e..fcd8523 100644 Binary files a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-19-1.png and b/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-19-1.png differ diff --git a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-20-1.png b/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-20-1.png deleted file mode 100644 index 464b42e..0000000 Binary files a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-20-1.png and /dev/null differ diff --git a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-23-1.png b/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-23-1.png index 0b5f187..1a3fb2f 100644 Binary files a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-23-1.png and b/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-23-1.png differ diff --git a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-24-1.png b/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-24-1.png deleted file mode 100644 index d30faa0..0000000 Binary files a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-24-1.png and /dev/null differ diff --git a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-25-1.png b/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-25-1.png index d30faa0..cd12af2 100644 Binary files a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-25-1.png and b/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-25-1.png differ diff --git a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-26-1.png b/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-26-1.png index 83498e3..7cb48f8 100644 Binary files a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-26-1.png and b/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-26-1.png differ diff --git a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-26-2.png b/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-26-2.png deleted file mode 100644 index a9e01e3..0000000 Binary files a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-26-2.png and /dev/null differ diff --git a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-27-1.png b/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-27-1.png index 0a37bd9..bb6e0ee 100644 Binary files a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-27-1.png and b/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-27-1.png differ diff --git a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-27-2.png b/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-27-2.png deleted file mode 100644 index f863edd..0000000 Binary files a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-27-2.png and /dev/null differ diff --git a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-28-1.png b/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-28-1.png deleted file mode 100644 index 21330dc..0000000 Binary files a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-28-1.png and /dev/null differ diff --git a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-28-2.png b/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-28-2.png deleted file mode 100644 index b8b39a6..0000000 Binary files a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-28-2.png and /dev/null differ diff --git a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-29-1.png b/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-29-1.png deleted file mode 100644 index 21330dc..0000000 Binary files a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-29-1.png and /dev/null differ diff --git a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-32-1.png b/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-32-1.png new file mode 100644 index 0000000..0de9ee9 Binary files /dev/null and b/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-32-1.png differ diff --git a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-33-1.png b/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-33-1.png index bef1bab..28fde41 100644 Binary files a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-33-1.png and b/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-33-1.png differ diff --git a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-34-1.png b/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-34-1.png deleted file mode 100644 index b4c3166..0000000 Binary files a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-34-1.png and /dev/null differ diff --git a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-35-1.png b/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-35-1.png deleted file mode 100644 index b4c3166..0000000 Binary files a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-35-1.png and /dev/null differ diff --git a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-4-1.png b/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-4-1.png index f21a1e3..b5db526 100644 Binary files a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-4-1.png and b/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-4-1.png differ diff --git a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-5-1.png b/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-5-1.png index ad0a5d3..12ca0a9 100644 Binary files a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-5-1.png and b/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-5-1.png differ diff --git a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-9-1.png b/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-9-1.png index 0b3dd58..e926cfe 100644 Binary files a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-9-1.png and b/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-9-1.png differ diff --git a/vignettes/target_prediction_evaluation_geneset.Rmd b/vignettes/target_prediction_evaluation_geneset.Rmd index dc332ca..4f71b40 100644 --- a/vignettes/target_prediction_evaluation_geneset.Rmd +++ b/vignettes/target_prediction_evaluation_geneset.Rmd @@ -34,7 +34,7 @@ In this example, we will use data from Puram et al. to explore intercellular com For this analysis, we will first assess the ligand activity of each ligand, or in other words, we will assess how well each CAF-ligand can predict the p-EMT gene set compared to the background of expressed genes. This allows us to prioritize p-EMT-regulating ligands. Then, we will assess how well the prioritized ligands together can predict whether genes belong to the gene set of interest or not. -The used ligand-target matrix and example expression data of interacting cells can be downloaded from Zenodo. [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3260758.svg)](https://doi.org/10.5281/zenodo.3260758) +The used [ligand-target matrix](https://doi.org/10.5281/zenodo.7074290) and example [expression data](https://doi.org/10.5281/zenodo.3260758) of interacting cells can be downloaded from Zenodo. ### Load packages required for this vignette @@ -68,7 +68,7 @@ expressed_genes_malignant = expression[malignant_ids,] %>% apply(2,function(x){1 ### Load the ligand-target model we want to use ```{r} -ligand_target_matrix = readRDS(url("https://zenodo.org/record/3260758/files/ligand_target_matrix.rds")) +ligand_target_matrix = readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final.rds")) ligand_target_matrix[1:5,1:5] # target genes in rows, ligands in columns ``` @@ -92,7 +92,7 @@ head(background_expressed_genes) In a first step, we will define a set of potentially active ligands. As potentially active ligands, we will use ligands that are 1) expressed by CAFs and 2) can bind a (putative) receptor expressed by malignant cells. Putative ligand-receptor links were gathered from NicheNet's ligand-receptor data sources. ```{r} -lr_network = readRDS(url("https://zenodo.org/record/3260758/files/lr_network.rds")) +lr_network = readRDS(url("https://zenodo.org/record/7074291/files/lr_network_human_21122021.rds")) ligands = lr_network %>% pull(from) %>% unique() expressed_ligands = intersect(ligands,expressed_genes_CAFs) @@ -110,29 +110,29 @@ Now perform the ligand activity analysis: infer how well NicheNet's ligand-targe ligand_activities = predict_ligand_activities(geneset = pemt_geneset, background_expressed_genes = background_expressed_genes, ligand_target_matrix = ligand_target_matrix, potential_ligands = potential_ligands) ``` -Now, we want to rank the ligands based on their ligand activity. In our validation study, we showed that the pearson correlation between a ligand's target predictions and the observed transcriptional response was the most informative measure to define ligand activity. Therefore, we will rank the ligands based on their pearson correlation coefficient. +Now, we want to rank the ligands based on their ligand activity. In our validation study, we showed that the AUPR between a ligand's target predictions and the observed transcriptional response was the most informative measure to define ligand activity. Therefore, we will rank the ligands based on their AUPR. ```{r} -ligand_activities %>% arrange(-pearson) -best_upstream_ligands = ligand_activities %>% top_n(20, pearson) %>% arrange(-pearson) %>% pull(test_ligand) +ligand_activities %>% arrange(-aupr_corrected) +best_upstream_ligands = ligand_activities %>% top_n(30, aupr_corrected) %>% arrange(-aupr_corrected) %>% pull(test_ligand) head(best_upstream_ligands) ``` -For the top 20 ligands, we will now build a multi-ligand model that uses all top-ranked ligands to predict whether a gene belongs to the p-EMT program of not. This classification model will be trained via cross-validation and returns a probability for every gene. +For the top 30 ligands, we will now build a multi-ligand model that uses all top-ranked ligands to predict whether a gene belongs to the p-EMT program of not. This classification model will be trained via cross-validation and returns a probability for every gene. ```{r} # change rounds and folds here, to two rounds to reduce time: normally: do multiple rounds k = 3 # 3-fold n = 2 # 2 rounds -pemt_gene_predictions_top20_list = seq(n) %>% lapply(assess_rf_class_probabilities, folds = k, geneset = pemt_geneset, background_expressed_genes = background_expressed_genes, ligands_oi = best_upstream_ligands, ligand_target_matrix = ligand_target_matrix) +pemt_gene_predictions_top30_list = seq(n) %>% lapply(assess_rf_class_probabilities, folds = k, geneset = pemt_geneset, background_expressed_genes = background_expressed_genes, ligands_oi = best_upstream_ligands, ligand_target_matrix = ligand_target_matrix) ``` Evaluate now how well the target gene probabilies accord to the gene set assignments ```{r} # get performance: auroc-aupr-pearson -target_prediction_performances_cv = pemt_gene_predictions_top20_list %>% lapply(classification_evaluation_continuous_pred_wrapper) %>% bind_rows() %>% mutate(round=seq(1:nrow(.))) +target_prediction_performances_cv = pemt_gene_predictions_top30_list %>% lapply(classification_evaluation_continuous_pred_wrapper) %>% bind_rows() %>% mutate(round=seq(1:nrow(.))) ``` What is the AUROC, AUPR and PCC of this model (averaged over cross-validation rounds)? @@ -147,7 +147,7 @@ Evaluate now whether genes belonging to the gene set are more likely to be top-p ```{r} # get performance: how many p-EMT genes and non-p-EMT-genes among top 5% predicted targets -target_prediction_performances_discrete_cv = pemt_gene_predictions_top20_list %>% lapply(calculate_fraction_top_predicted, quantile_cutoff = 0.95) %>% bind_rows() %>% ungroup() %>% mutate(round=rep(1:length(pemt_gene_predictions_top20_list), each = 2)) +target_prediction_performances_discrete_cv = pemt_gene_predictions_top30_list %>% lapply(calculate_fraction_top_predicted, quantile_cutoff = 0.95) %>% bind_rows() %>% ungroup() %>% mutate(round=rep(1:length(pemt_gene_predictions_top30_list), each = 2)) ``` What is the fraction of p-EMT genes that belongs to the top 5% predicted targets? @@ -163,7 +163,7 @@ target_prediction_performances_discrete_cv %>% filter(!true_target) %>% .$fracti We see that the p-EMT genes are enriched in the top-predicted target genes. To test this, we will now apply a Fisher's exact test for every cross-validation round and report the average p-value. ```{r} -target_prediction_performances_discrete_fisher = pemt_gene_predictions_top20_list %>% lapply(calculate_fraction_top_predicted_fisher, quantile_cutoff = 0.95) +target_prediction_performances_discrete_fisher = pemt_gene_predictions_top30_list %>% lapply(calculate_fraction_top_predicted_fisher, quantile_cutoff = 0.95) target_prediction_performances_discrete_fisher %>% unlist() %>% mean() ``` @@ -171,7 +171,7 @@ Finally, we will look at which p-EMT genes are well-predicted in every cross-val ```{r} # get top predicted genes -top_predicted_genes = seq(length(pemt_gene_predictions_top20_list)) %>% lapply(get_top_predicted_genes,pemt_gene_predictions_top20_list) %>% reduce(full_join, by = c("gene","true_target")) +top_predicted_genes = seq(length(pemt_gene_predictions_top30_list)) %>% lapply(get_top_predicted_genes,pemt_gene_predictions_top30_list) %>% reduce(full_join, by = c("gene","true_target")) top_predicted_genes %>% filter(true_target) ``` diff --git a/vignettes/target_prediction_evaluation_geneset.md b/vignettes/target_prediction_evaluation_geneset.md index 6d4c4c5..a9e58ea 100644 --- a/vignettes/target_prediction_evaluation_geneset.md +++ b/vignettes/target_prediction_evaluation_geneset.md @@ -11,11 +11,11 @@ This vignette shows how NicheNet can be used to to predict which ligands might regulate a given set of genes and how well they do this prediction. For this analysis, you need to define: - - a set of genes of which expression in a “receiver cell” is possibly - affected by extracellular protein signals (ligands) (e.g. genes - differentially expressed upon cell-cell interaction ) - - a set of potentially active ligands (e.g. ligands expressed by - interacting “sender cells”) +- a set of genes of which expression in a “receiver cell” is possibly + affected by extracellular protein signals (ligands) (e.g. genes + differentially expressed upon cell-cell interaction ) +- a set of potentially active ligands (e.g. ligands expressed by + interacting “sender cells”) Therefore, you often first need to process expression data of interacting cells to define both. @@ -36,9 +36,9 @@ genes. This allows us to prioritize p-EMT-regulating ligands. Then, we will assess how well the prioritized ligands together can predict whether genes belong to the gene set of interest or not. -The used ligand-target matrix and example expression data of interacting -cells can be downloaded from Zenodo. -[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3260758.svg)](https://doi.org/10.5281/zenodo.3260758) +The used [ligand-target matrix](https://doi.org/10.5281/zenodo.7074290) +and example [expression data](https://doi.org/10.5281/zenodo.3260758) of +interacting cells can be downloaded from Zenodo. ### Load packages required for this vignette @@ -50,8 +50,7 @@ library(tidyverse) ### Read in expression data of interacting cells First, we will read in the publicly available single-cell data from CAF -and malignant cells from HNSCC -tumors. +and malignant cells from HNSCC tumors. ``` r hnscc_expression = readRDS(url("https://zenodo.org/record/3260758/files/hnscc_expression.rds")) @@ -63,8 +62,7 @@ Secondly, we will determine which genes are expressed in CAFs and malignant cells from high quality primary tumors. Therefore, we wil not consider cells from tumor samples of less quality or from lymph node metastases. To determine expressed genes, we use the definition used by -of Puram et -al. +of Puram et al. ``` r tumors_remove = c("HN10","HN","HN12", "HN13", "HN24", "HN7", "HN8","HN23") @@ -79,14 +77,14 @@ expressed_genes_malignant = expression[malignant_ids,] %>% apply(2,function(x){1 ### Load the ligand-target model we want to use ``` r -ligand_target_matrix = readRDS(url("https://zenodo.org/record/3260758/files/ligand_target_matrix.rds")) +ligand_target_matrix = readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final.rds")) ligand_target_matrix[1:5,1:5] # target genes in rows, ligands in columns -## CXCL1 CXCL2 CXCL3 CXCL5 PPBP -## A1BG 3.534343e-04 4.041324e-04 3.729920e-04 3.080640e-04 2.628388e-04 -## A1BG-AS1 1.650894e-04 1.509213e-04 1.583594e-04 1.317253e-04 1.231819e-04 -## A1CF 5.787175e-04 4.596295e-04 3.895907e-04 3.293275e-04 3.211944e-04 -## A2M 6.027058e-04 5.996617e-04 5.164365e-04 4.517236e-04 4.590521e-04 -## A2M-AS1 8.898724e-05 8.243341e-05 7.484018e-05 4.912514e-05 5.120439e-05 +## A2M AANAT ABCA1 ACE ACE2 +## A-GAMMA3'E 0.0000000000 0.0000000000 0.0000000000 0.0000000000 0.000000000 +## A1BG 0.0018503922 0.0011108718 0.0014225077 0.0028594037 0.001139013 +## A1BG-AS1 0.0007400797 0.0004677614 0.0005193137 0.0007836698 0.000375007 +## A1CF 0.0024799266 0.0013026348 0.0020420890 0.0047921048 0.003273375 +## A2M 0.0084693452 0.0040689323 0.0064256379 0.0105191365 0.005719199 ``` ### Load the gene set of interest and background of genes @@ -97,8 +95,7 @@ is possibly affected due to communication with other cells. Because we here want to investigate how CAF regulate the expression of p-EMT genes in malignant cells, we will use the p-EMT gene set defined by Puram et al. as gene set of interset and use all genes expressed in -malignant cells as background of -genes. +malignant cells as background of genes. ``` r pemt_geneset = readr::read_tsv(url("https://zenodo.org/record/3260758/files/pemt_signature.txt"), col_names = "gene") %>% pull(gene) %>% .[. %in% rownames(ligand_target_matrix)] # only consider genes also present in the NicheNet model - this excludes genes from the gene list for which the official HGNC symbol was not used by Puram et al. @@ -115,11 +112,10 @@ In a first step, we will define a set of potentially active ligands. As potentially active ligands, we will use ligands that are 1) expressed by CAFs and 2) can bind a (putative) receptor expressed by malignant cells. Putative ligand-receptor links were gathered from NicheNet’s -ligand-receptor data -sources. +ligand-receptor data sources. ``` r -lr_network = readRDS(url("https://zenodo.org/record/3260758/files/lr_network.rds")) +lr_network = readRDS(url("https://zenodo.org/record/7074291/files/lr_network_human_21122021.rds")) ligands = lr_network %>% pull(from) %>% unique() expressed_ligands = intersect(ligands,expressed_genes_CAFs) @@ -129,57 +125,55 @@ expressed_receptors = intersect(receptors,expressed_genes_malignant) potential_ligands = lr_network %>% filter(from %in% expressed_ligands & to %in% expressed_receptors) %>% pull(from) %>% unique() head(potential_ligands) -## [1] "HGF" "TNFSF10" "TGFB2" "TGFB3" "INHBA" "CD99" +## [1] "A2M" "ADAM10" "ADAM12" "ADAM15" "ADAM17" "ADAM9" ``` Now perform the ligand activity analysis: infer how well NicheNet’s ligand-target potential scores can predict whether a gene belongs to the -p-EMT program or -not. +p-EMT program or not. ``` r ligand_activities = predict_ligand_activities(geneset = pemt_geneset, background_expressed_genes = background_expressed_genes, ligand_target_matrix = ligand_target_matrix, potential_ligands = potential_ligands) ``` Now, we want to rank the ligands based on their ligand activity. In our -validation study, we showed that the pearson correlation between a -ligand’s target predictions and the observed transcriptional response -was the most informative measure to define ligand activity. Therefore, -we will rank the ligands based on their pearson correlation coefficient. +validation study, we showed that the AUPR between a ligand’s target +predictions and the observed transcriptional response was the most +informative measure to define ligand activity. Therefore, we will rank +the ligands based on their AUPR. ``` r -ligand_activities %>% arrange(-pearson) -## # A tibble: 131 x 4 -## test_ligand auroc aupr pearson -## -## 1 PTHLH 0.667 0.0720 0.128 -## 2 CXCL12 0.680 0.0507 0.123 -## 3 AGT 0.676 0.0581 0.120 -## 4 TGFB3 0.689 0.0454 0.117 -## 5 IL6 0.693 0.0510 0.115 -## 6 INHBA 0.695 0.0502 0.113 -## 7 ADAM17 0.672 0.0526 0.113 -## 8 TNC 0.700 0.0444 0.109 -## 9 CTGF 0.680 0.0473 0.108 -## 10 FN1 0.679 0.0505 0.108 -## # ... with 121 more rows -best_upstream_ligands = ligand_activities %>% top_n(20, pearson) %>% arrange(-pearson) %>% pull(test_ligand) +ligand_activities %>% arrange(-aupr_corrected) +## # A tibble: 203 × 5 +## test_ligand auroc aupr aupr_corrected pearson +## +## 1 TGFB2 0.768 0.123 0.107 0.199 +## 2 CXCL12 0.708 0.0884 0.0721 0.144 +## 3 BMP8A 0.770 0.0880 0.0718 0.177 +## 4 INHBA 0.773 0.0866 0.0703 0.124 +## 5 LTBP1 0.722 0.0785 0.0622 0.163 +## 6 TNXB 0.713 0.0737 0.0574 0.158 +## 7 ENG 0.759 0.0732 0.0569 0.157 +## 8 BMP5 0.745 0.0715 0.0552 0.150 +## 9 VCAN 0.715 0.0711 0.0548 0.142 +## 10 HGF 0.712 0.0711 0.0548 0.138 +## # … with 193 more rows +best_upstream_ligands = ligand_activities %>% top_n(30, aupr_corrected) %>% arrange(-aupr_corrected) %>% pull(test_ligand) head(best_upstream_ligands) -## [1] "PTHLH" "CXCL12" "AGT" "TGFB3" "IL6" "INHBA" +## [1] "TGFB2" "CXCL12" "BMP8A" "INHBA" "LTBP1" "TNXB" ``` -For the top 20 ligands, we will now build a multi-ligand model that uses +For the top 30 ligands, we will now build a multi-ligand model that uses all top-ranked ligands to predict whether a gene belongs to the p-EMT program of not. This classification model will be trained via -cross-validation and returns a probability for every -gene. +cross-validation and returns a probability for every gene. ``` r # change rounds and folds here, to two rounds to reduce time: normally: do multiple rounds k = 3 # 3-fold n = 2 # 2 rounds -pemt_gene_predictions_top20_list = seq(n) %>% lapply(assess_rf_class_probabilities, folds = k, geneset = pemt_geneset, background_expressed_genes = background_expressed_genes, ligands_oi = best_upstream_ligands, ligand_target_matrix = ligand_target_matrix) +pemt_gene_predictions_top30_list = seq(n) %>% lapply(assess_rf_class_probabilities, folds = k, geneset = pemt_geneset, background_expressed_genes = background_expressed_genes, ligands_oi = best_upstream_ligands, ligand_target_matrix = ligand_target_matrix) ``` Evaluate now how well the target gene probabilies accord to the gene set @@ -187,7 +181,7 @@ assignments ``` r # get performance: auroc-aupr-pearson -target_prediction_performances_cv = pemt_gene_predictions_top20_list %>% lapply(classification_evaluation_continuous_pred_wrapper) %>% bind_rows() %>% mutate(round=seq(1:nrow(.))) +target_prediction_performances_cv = pemt_gene_predictions_top30_list %>% lapply(classification_evaluation_continuous_pred_wrapper) %>% bind_rows() %>% mutate(round=seq(1:nrow(.))) ``` What is the AUROC, AUPR and PCC of this model (averaged over @@ -195,20 +189,19 @@ cross-validation rounds)? ``` r target_prediction_performances_cv$auroc %>% mean() -## [1] 0.7295863 +## [1] 0.7606117 target_prediction_performances_cv$aupr %>% mean() -## [1] 0.07603073 +## [1] 0.09281456 target_prediction_performances_cv$pearson %>% mean() -## [1] 0.1660327 +## [1] 0.1911942 ``` Evaluate now whether genes belonging to the gene set are more likely to -be top-predicted. We will look at the top 5% of predicted targets -here. +be top-predicted. We will look at the top 5% of predicted targets here. ``` r # get performance: how many p-EMT genes and non-p-EMT-genes among top 5% predicted targets -target_prediction_performances_discrete_cv = pemt_gene_predictions_top20_list %>% lapply(calculate_fraction_top_predicted, quantile_cutoff = 0.95) %>% bind_rows() %>% ungroup() %>% mutate(round=rep(1:length(pemt_gene_predictions_top20_list), each = 2)) +target_prediction_performances_discrete_cv = pemt_gene_predictions_top30_list %>% lapply(calculate_fraction_top_predicted, quantile_cutoff = 0.95) %>% bind_rows() %>% ungroup() %>% mutate(round=rep(1:length(pemt_gene_predictions_top30_list), each = 2)) ``` What is the fraction of p-EMT genes that belongs to the top 5% predicted @@ -216,27 +209,25 @@ targets? ``` r target_prediction_performances_discrete_cv %>% filter(true_target) %>% .$fraction_positive_predicted %>% mean() -## [1] 0.25 +## [1] 0.3489583 ``` What is the fraction of non-p-EMT genes that belongs to the top 5% -predicted -targets? +predicted targets? ``` r target_prediction_performances_discrete_cv %>% filter(!true_target) %>% .$fraction_positive_predicted %>% mean() -## [1] 0.04769076 +## [1] 0.04529767 ``` We see that the p-EMT genes are enriched in the top-predicted target genes. To test this, we will now apply a Fisher’s exact test for every -cross-validation round and report the average -p-value. +cross-validation round and report the average p-value. ``` r -target_prediction_performances_discrete_fisher = pemt_gene_predictions_top20_list %>% lapply(calculate_fraction_top_predicted_fisher, quantile_cutoff = 0.95) +target_prediction_performances_discrete_fisher = pemt_gene_predictions_top30_list %>% lapply(calculate_fraction_top_predicted_fisher, quantile_cutoff = 0.95) target_prediction_performances_discrete_fisher %>% unlist() %>% mean() -## [1] 5.647773e-10 +## [1] 4.529691e-18 ``` Finally, we will look at which p-EMT genes are well-predicted in every @@ -244,29 +235,29 @@ cross-validation round. ``` r # get top predicted genes -top_predicted_genes = seq(length(pemt_gene_predictions_top20_list)) %>% lapply(get_top_predicted_genes,pemt_gene_predictions_top20_list) %>% reduce(full_join, by = c("gene","true_target")) +top_predicted_genes = seq(length(pemt_gene_predictions_top30_list)) %>% lapply(get_top_predicted_genes,pemt_gene_predictions_top30_list) %>% reduce(full_join, by = c("gene","true_target")) top_predicted_genes %>% filter(true_target) -## # A tibble: 28 x 4 -## gene true_target predicted_top_target_roun~ predicted_top_target_rou~ -## -## 1 COL1A1 TRUE TRUE TRUE -## 2 MMP2 TRUE TRUE TRUE -## 3 MMP1 TRUE TRUE TRUE -## 4 PLAU TRUE TRUE TRUE -## 5 TIMP3 TRUE TRUE TRUE -## 6 MT2A TRUE TRUE TRUE -## 7 INHBA TRUE TRUE TRUE -## 8 COL4A2 TRUE TRUE TRUE -## 9 MMP10 TRUE TRUE TRUE -## 10 COL17A1 TRUE TRUE TRUE -## # ... with 18 more rows +## # A tibble: 41 × 4 +## gene true_target predicted_top_target_round1 predicted_top_target_round2 +## +## 1 SERPINE1 TRUE TRUE TRUE +## 2 MMP1 TRUE TRUE TRUE +## 3 TAGLN TRUE TRUE TRUE +## 4 COL1A1 TRUE TRUE TRUE +## 5 FSTL3 TRUE TRUE TRUE +## 6 MT2A TRUE TRUE TRUE +## 7 TNC TRUE TRUE TRUE +## 8 SEMA3C TRUE TRUE TRUE +## 9 THBS1 TRUE TRUE TRUE +## 10 LAMC2 TRUE TRUE TRUE +## # … with 31 more rows ``` ### References -
+
-
+
Puram, Sidharth V., Itay Tirosh, Anuraag S. Parikh, Anoop P. Patel, Keren Yizhak, Shawn Gillespie, Christopher Rodman, et al. 2017. diff --git a/vignettes/tgfb3_targets_signaling_path.jpg b/vignettes/tgfb3_targets_signaling_path.jpg index 4f3e61a..e34ee9a 100644 Binary files a/vignettes/tgfb3_targets_signaling_path.jpg and b/vignettes/tgfb3_targets_signaling_path.jpg differ diff --git a/vignettes/tgfb3_targets_signaling_path.png b/vignettes/tgfb3_targets_signaling_path.png index 8903531..1e40cd8 100644 Binary files a/vignettes/tgfb3_targets_signaling_path.png and b/vignettes/tgfb3_targets_signaling_path.png differ