diff --git a/.Rbuildignore b/.Rbuildignore index 5c0eec310..daf3fbc13 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -2,6 +2,7 @@ ^renv\.lock$ ^Giotto\.Rproj$ ^\.Rproj\.user$ +^.lintr ^LICENSE\.md$ ^README\.Rmd$ diff --git a/.lintr b/.lintr new file mode 100644 index 000000000..44f4939bc --- /dev/null +++ b/.lintr @@ -0,0 +1,5 @@ +linters: linters_with_defaults( + indentation_linter = NULL, + object_name_linter = NULL, + object_length_linter = NULL) + diff --git a/DESCRIPTION b/DESCRIPTION index 945224e9a..63bbf99a5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Giotto Title: Spatial Single-Cell Transcriptomics Toolbox -Version: 4.1.0 +Version: 4.1.3 Authors@R: c( person("Ruben", "Dries", email = "rubendries@gmail.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0001-7650-7754")), @@ -22,36 +22,27 @@ Authors@R: c( Description: Toolbox to process, analyze and visualize spatial single-cell expression data. License: MIT + file LICENSE Encoding: UTF-8 -LazyData: true URL: https://drieslab.github.io/Giotto/, https://github.com/drieslab/Giotto BugReports: https://github.com/drieslab/Giotto/issues RoxygenNote: 7.3.2 Depends: - base (>= 4.1.0), - utils (>= 4.1.0), - R (>= 4.1.0), + R (>= 4.4.1), methods, GiottoClass (>= 0.3.3) Imports: BiocParallel, BiocSingular, checkmate, - cowplot (>= 0.9.4), data.table (>= 1.12.2), dbscan (>= 1.1-3), + ggraph, ggplot2 (>= 3.1.1), - GiottoUtils (>= 0.1.9), - GiottoVisuals (>= 0.2.4), + GiottoUtils (>= 0.1.12), + GiottoVisuals (>= 0.2.5), igraph (>= 1.2.4.1), - jsonlite, - limma, Matrix (>= 1.6-2), MatrixGenerics, - plotly, - progressr, - reshape2, reticulate (>= 1.25), - Rtsne (>= 0.15), scales (>= 1.0.0), sparseMatrixStats, stats, @@ -78,19 +69,19 @@ Suggests: ggalluvial, ggdendro, ggforce, - ggraph, ggspavis, graphcoloring, HDF5Array (>= 1.18.1), hdf5r, - jackstraw, kableExtra, knitr, + limma, MAST, miniUI, multinet (>= 3.0.2), networkD3, pheatmap, + plotly, quadprog, harmony, R.utils, @@ -99,6 +90,7 @@ Suggests: rlang, rhdf5, RTriangle (>= 1.6-0.10), + Rtsne (>= 0.15), Rvision, scater, scran (>= 1.10.1), @@ -134,6 +126,7 @@ Collate: 'differential_expression.R' 'dimension_reduction.R' 'feature_set_enrichment.R' + 'filter.R' 'general_help.R' 'giotto_viewer.R' 'globals.R' @@ -142,6 +135,7 @@ Collate: 'image_registration.R' 'interactivity.R' 'kriging.R' + 'normalize.R' 'package_imports.R' 'poly_influence.R' 'python_hmrf.R' diff --git a/NAMESPACE b/NAMESPACE index f9eddf16a..8aca099c2 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -14,7 +14,6 @@ export("prov<-") export("sankeyLabel<-") export("sankeyRelate<-") export("spatUnit<-") -export(PAGEEnrich) export(activeFeatType) export(activeSpatUnit) export(addCellIntMetadata) @@ -98,7 +97,6 @@ export(combineICG) export(combineInteractionChangedFeats) export(combineInteractionChangedGenes) export(combineMetadata) -export(combineSpatialCellFeatureInfo) export(combineSpatialCellMetadataInfo) export(combineToMultiPolygon) export(compareCellAbundance) @@ -127,6 +125,7 @@ export(createGiottoPolygon) export(createGiottoPolygonsFromDfr) export(createGiottoPolygonsFromGeoJSON) export(createGiottoPolygonsFromMask) +export(createGiottoVisiumHDObject) export(createGiottoVisiumObject) export(createGiottoXeniumObject) export(createMerscopeLargeImage) @@ -138,14 +137,12 @@ export(createSpatLocsObj) export(createSpatNetObj) export(createSpatialDefaultGrid) export(createSpatialDelaunayNetwork) -export(createSpatialEnrich) export(createSpatialFeaturesKNNnetwork) export(createSpatialGenomicsObject) export(createSpatialGrid) export(createSpatialKNNnetwork) export(createSpatialNetwork) export(createSpatialWeightMatrix) -export(create_jackstrawplot) export(create_screeplot) export(crop) export(cropGiottoLargeImage) @@ -242,7 +239,6 @@ export(getGiottoImage) export(getMultiomics) export(getNearestNetwork) export(getONTraCv1Input) -export(getONTraCv2Input) export(getPolygonInfo) export(getRainbowColors) export(getSpatialEnrichment) @@ -257,15 +253,14 @@ export(giottoPoints) export(giottoPolygon) export(giottoSankeyPlan) export(giottoToAnnData) +export(giottoToAnndataZarr) export(giottoToSeurat) export(giottoToSeuratV4) export(giottoToSeuratV5) export(giottoToSpatialExperiment) export(heatmSpatialCorFeats) -export(heatmSpatialCorGenes) export(hexVertices) export(hist) -export(hyperGeometricEnrich) export(identifyTMAcores) export(importCosMx) export(importVisiumHD) @@ -274,6 +269,7 @@ export(initHMRF_V2) export(insertCrossSectionFeatPlot3D) export(insertCrossSectionSpatPlot3D) export(installGiottoEnvironment) +export(installGiottoONTraCEnvironment) export(instructions) export(interactiveLandmarkSelection) export(jackstrawPlot) @@ -301,7 +297,6 @@ export(plotCCcomDotplot) export(plotCCcomHeatmap) export(plotCPF) export(plotCTCompositionInNicheCluster) -export(plotCTCompositionInProbCluster) export(plotCellProximityFeatSpot) export(plotCellProximityFeats) export(plotCellTypeNTScore) @@ -326,6 +321,8 @@ export(plotPCA_3D) export(plotPolygons) export(plotRankSpatvsExpr) export(plotRecovery) +export(plotSpatNicheClusterBin) +export(plotSpatNicheClusterProb) export(plotStatDelaunayNetwork) export(plotTSNE) export(plotTSNE_2D) @@ -339,7 +336,6 @@ export(print.combIcfObject) export(print.icfObject) export(processGiotto) export(prov) -export(rankEnrich) export(rankSpatialCorGroups) export(read10xAffineImage) export(readCellMetadata) @@ -360,6 +356,7 @@ export(readSpatLocsData) export(readSpatNetData) export(reconnectGiottoImage) export(rectVertices) +export(reduceDims) export(registerGiottoObjectList) export(registerGiottoObjectListFiji) export(registerGiottoObjectListRvision) @@ -374,8 +371,9 @@ export(runDWLSDeconv) export(runGiottoHarmony) export(runHyperGeometricEnrich) export(runIntegratedUMAP) +export(runNMF) +export(runONTraCV1) export(runPAGEEnrich) -export(runPAGEEnrich_OLD) export(runPCA) export(runPCAprojection) export(runPCAprojectionBatch) @@ -514,6 +512,7 @@ export(writeHMRFresults) exportMethods("$") exportMethods("$<-") exportMethods(interpolateFeature) +exportMethods(labelTransfer) import(GiottoClass) import(GiottoUtils) import(GiottoVisuals) @@ -575,7 +574,6 @@ importFrom(GiottoClass,combineCellData) importFrom(GiottoClass,combineFeatureData) importFrom(GiottoClass,combineFeatureOverlapData) importFrom(GiottoClass,combineMetadata) -importFrom(GiottoClass,combineSpatialCellFeatureInfo) importFrom(GiottoClass,combineSpatialCellMetadataInfo) importFrom(GiottoClass,combineToMultiPolygon) importFrom(GiottoClass,convertGiottoLargeImageToMG) diff --git a/NEWS.md b/NEWS.md index 9d8438301..a1bf83a28 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,51 @@ +# Giotto 4.1.3 (2024/10/27) + +## New +* Add `giottoToAnndataZarr()` to create a local anndata zarr folder and interact with the vitessceR package. +* `reduceDims()` API function for dimension reductions +* `runNMF()` implementation that works via RcppML + +## Changes +* `runWNN()` and `runIntegratedUMAP()` arguments were updated to make the function flexible to handle any number of modalities. +* update `jackstrawPlot()` to make more flexible and efficient. Changed default params for `scaling`, `centering`, and `feats_to_use` to match `runPCA()` +* change warning when reduction "feats" is selected in `runtSNE()` to error to avoid accidentally wiping the `giotto` object. + + +# Giotto 4.1.2 + +## Breaking changes +* remove deprecated `PAGEEnrich()`. Use `runPAGEEnrich()` instead +* remove deprecated `rankEnrich()`. Use `runRankEnrich()` instead +* remove deprecated `hyperGeometricEnrich()`. Use `runHyperGeometricEnrich()` instead +* remove deprecated `createSpatialEnrich()`. Use `runSpatialEnrich()` instead +* remove deprecated `heatmSpatialCorGenes()`. Use `heatmSpatialCorFeats()` instead +* remove deprecated `runPAGEEnrich_OLD()`. Use `runPAGEEnrich()` instead +* remove `do_pca`, `expression_values`, `feats_to_use` args from `runGiottoHarmony()`. Running PCA during the `harmony::RunHarmony()` call is deprecated. + +## Enhancements +* add `'quantile'` normalization method to `normalizeGiotto()` + +## Changes +* `limma`, `plotly`, and `Rtsne` moved to Suggests +* move `progressr` and `jsonlite` dependencies to GiottoUtils v0.1.12 +* remove `reshape2` dependency. + +## Bug fixes +* `processGiotto()` can now skip adjust step by default + +## New +* `identifyTMAcores()` for assigning IDs to tissue microarray spatial data. +* `labelTransfer()` for transferring labels between giotto objects or subsets thereof. Supercedes `doClusterProjection()` + +# Giotto 4.1.1 + +## Bug fixes +* Allow `giottoInstructions` passing for Xenium convenience functions + +## Changes +* Deprecate `screePlot()` `name` in favor of `dim_reduction_name` param + + # Giotto 4.1.0 (2024/07/31) ## Breaking changes diff --git a/R/ONTraC_wrapper.R b/R/ONTraC_wrapper.R index e33abeac7..a06c6a778 100644 --- a/R/ONTraC_wrapper.R +++ b/R/ONTraC_wrapper.R @@ -1,3 +1,81 @@ +#' @title installGiottoONTraCEnvironment +#' @description Installs a conda environment contains ONTraC. This +#' includes a miniconda installation and also a set of python packages that +#' Giotto may often use. See details for further information +#' @param packages_to_install python modules (packages) to install for Giotto. +#' @param python_version python version to use within the giotto conda +#' environment. Default is v3.11.9 +#' @param ontrac_version ONTraC version to install. Default is "latest" +#' @param mini_install_path (optional) desired miniconda installation location. +#' Default is chosen by `reticulate::install_miniconda()` +#' @param confirm whether to pause for confirmation of conda environment +#' install location (default = TRUE) +#' @param envname name to assign environment. Default = "giotto_ontrac_env" +#' @param conda either "auto" (default) to allow reticulate to handle it, or +#' the full filepath to the conda executable. You can also set the option +#' "reticulate.conda_binary" or `Sys.setenv()` "RETICULATE_CONDA" to tell +#' reticulate where to look. +#' @param force_miniconda force reinstallation of miniconda +#' @param force_environment force reinstallation of the giotto environment +#' @param verbose be verbose +#' @returns installs a giotto environment using the reticulate miniconda system +#' @details This function will install a local conda environment using +#' the miniconda system as implemented by \pkg{reticulate}. Which contains +#' ONTraC and a set of python packages that Giotto may often use. +#' +#' @examples +#' installGiottoONTraCEnvironment() +#' +#' @export +installGiottoONTraCEnvironment <- function( + python_version = "3.11.9", + ontrac_version = "latest", + mini_install_path = NULL, + confirm = TRUE, + envname = "giotto_ontrac_env", + conda = "auto", + force_miniconda = FALSE, + force_environment = FALSE, + verbose = NULL) { + # handle ontrac version + if (ontrac_version == "latest") { + ontrac <- "ONTraC" + } else { + ontrac <- paste0("ONTraC==", ontrac_version) + } + + # install conda env + installGiottoEnvironment( + packages_to_install = c( + "pandas==2.2.1", + "networkx==2.8.8", + "python-igraph==0.10.2", + "leidenalg==0.9.0", + "python-louvain==0.16", + "python.app==1.4", + "scikit-learn==1.1.3", + "smfishhmrf", + "session-info", + ontrac + ), + pip_packages = c( + "python-louvain", + "smfishhmrf", + "session-info", + "ONTraC" + ), + python_version = python_version, + mini_install_path = mini_install_path, + confirm = confirm, + envname = envname, + conda = "auto", + force_miniconda = force_miniconda, + force_environment = force_environment, + verbose = verbose + ) +} + + #' @title getONTraCv1Input #' @name getONTraCv1Input #' @description generate the input data for ONTraC v1 @@ -11,169 +89,181 @@ #' g <- GiottoData::loadGiottoMini("visium") #' #' getONTraCv1Input( -#' gobject = g, -#' cell_type = "custom_leiden" +#' gobject = g, +#' cell_type = "custom_leiden" #' ) #' @export -getONTraCv1Input <- function(gobject, # nolint: object_name_linter. - cell_type, - output_path = getwd(), - spat_unit = NULL, - feat_type = NULL, - verbose = TRUE) { - # Set feat_type and spat_unit - spat_unit <- set_default_spat_unit( - gobject = gobject, - spat_unit = spat_unit - ) - feat_type <- set_default_feat_type( - gobject = gobject, - spat_unit = spat_unit, - feat_type = feat_type - ) - - pos_df <- getSpatialLocations( - gobject = gobject, - spat_unit = spat_unit, - output = "data.table" - ) - meta_df <- pDataDT( - gobject = gobject, - spat_unit = spat_unit, - feat_type = feat_type - ) - output_df <- merge(x = pos_df, y = meta_df, by = "cell_ID") - - # check if the cell_type column exits - if (!cell_type %in% colnames(output_df)) { - vmsg(.v = verbose, paste( - "Given", - cell_type, - "do not exist in giotto object's metadata!" - )) - return(NULL) - } - - # add default sample name for one sample obj - if (!"list_ID" %in% colnames(output_df)) { - output_df$list_ID <- "ONTraC" - } - - output_df <- output_df[, .SD, .SDcols = c( - "cell_ID", - "list_ID", - "sdimx", - "sdimy", - cell_type - )] - colnames(output_df) <- c("Cell_ID", "Sample", "x", "y", "Cell_Type") - file_path <- file.path(output_path, "ONTraC_dataset_input.csv") - write.csv(output_df, file = file_path, quote = FALSE, row.names = FALSE) - vmsg(.v = verbose, paste("ONTraC input file was saved as", file_path)) - - return(output_df) +getONTraCv1Input <- function(gobject, + cell_type, + output_path = getwd(), + spat_unit = NULL, + feat_type = NULL, + verbose = TRUE) { + # Set feat_type and spat_unit + spat_unit <- set_default_spat_unit( + gobject = gobject, + spat_unit = spat_unit + ) + feat_type <- set_default_feat_type( + gobject = gobject, + spat_unit = spat_unit, + feat_type = feat_type + ) + + pos_df <- getSpatialLocations( + gobject = gobject, + spat_unit = spat_unit, + output = "data.table" + ) + meta_df <- pDataDT( + gobject = gobject, + spat_unit = spat_unit, + feat_type = feat_type + ) + output_df <- merge(x = pos_df, y = meta_df, by = "cell_ID") + + # check if the cell_type column exits + if (!cell_type %in% colnames(output_df)) { + vmsg(.v = verbose, paste( + "Given", + cell_type, + "do not exist in giotto object's metadata!" + )) + return(NULL) + } + + # add default sample name for one sample obj + if (!"list_ID" %in% colnames(output_df)) { + output_df$list_ID <- "ONTraC" + } + + output_df <- output_df[, .SD, .SDcols = c( + "cell_ID", + "list_ID", + "sdimx", + "sdimy", + cell_type + )] + colnames(output_df) <- c("Cell_ID", "Sample", "x", "y", "Cell_Type") + file_path <- file.path(output_path, "ONTraC_dataset_input.csv") + write.csv(output_df, file = file_path, quote = FALSE, row.names = FALSE) + vmsg(.v = verbose, paste("ONTraC input file was saved as", file_path)) + + return(output_df) } -#' @title getONTraCv2Input -#' @name getONTraCv2Input -#' @description generate the input data for ONTraC v2 -#' @inheritParams data_access_params -#' @inheritParams read_data_params -#' @param output_path the path to save the output file -#' @param cell_type the cell type column name in the metadata -#' @returns data.table with columns: Cell_ID, Sample, x, y, Cell_Type -#' @details This function generate the input data for ONTraC v2 +#' @title runONTraCV1 +#' @name runONTraCV1 +#' @description run ONTraC +#' @param dataset the path to the input data file +#' @param preprocessing_dir the directory to save the preprocessing results +#' @param GNN_dir the directory to save the GNN results +#' @param NTScore_dir the directory to save the NTScore results +#' @param n_cpu the number of CPUs used for niche network constructing. Default +#' is 4L +#' @param n_neighbors the number of neighbors used for ONTraC in niche network +#' construction. Default is 50L +#' @param n_local the index of local neighbor used for ONTraC in niche network +#' construction for normalization. Default is 20L +#' @param device the device used for ONTraC running GNN model. Default is "cpu" +#' @param epochs the maximum number of epochs for model training. Default is +#' 1000L +#' @param patience the number of epochs wait for better result. Default is 100L +#' @param min_delta the minimum change of loss to be considered as improvement. +#' Default is 0.001 +#' @param min_epochs the minimum number of epochs to train. Default is 50L +#' @param batch_size the batch size for training. Default is 0L for whole +#' dataset +#' @param seed the random seed for reproducibility. Default is 42L +#' @param lr the learning rate for training. Default is 0.03 +#' @param hidden_feats the number of hidden features for GNN model. Default is +#' 4L +#' @param k the number of neighbors for GNN model. Default is 6L +#' @param modularity_loss_weight the weight of modularity loss. Default is 0.3 +#' @param purity_loss_weight the weight of purity loss. Default is 300.0 +#' @param regularization_loss_weight the weight of regularization loss. Default +#' is 0.1 +#' @param beta the weight of entropy loss. Default is 0.03 +#' @param python_path, path to python executable within a conda/miniconda +#' environment. Default is "giotto_ontrac_env" +#' @returns none +#' @details This function runs ONTraC #' @examples -#' g <- GiottoData::loadGiottoMini("visium") -#' -#' getONTraCv2Input( -#' gobject = g, -#' cell_type = "custom_leiden" +#' runONTraCV1( +#' dataset = "ONTraC_dataset_input.csv", +#' preprocessing_dir = "preprocessing_dir", +#' GNN_dir = "GNN_dir", +#' NTScore_dir = "NTScore_dir", +#' envname = "giotto_ontrac_env" #' ) #' @export -getONTraCv2Input <- function(gobject, # nolint: object_name_linter. - cell_type, - output_path = getwd(), - spat_unit = NULL, - feat_type = NULL, - verbose = TRUE) { - # Set feat_type and spat_unit - spat_unit <- set_default_spat_unit( - gobject = gobject, - spat_unit = spat_unit - ) - feat_type <- set_default_feat_type( - gobject = gobject, - spat_unit = spat_unit, - feat_type = feat_type - ) - - pos_df <- getSpatialLocations( - gobject = gobject, - spat_unit = spat_unit, - output = "data.table" - ) - meta_df <- pDataDT( - gobject = gobject, - spat_unit = spat_unit, - feat_type = feat_type - ) - output_df <- merge(x = pos_df, y = meta_df, by = "cell_ID") - - # check if the cell_type column exits - if (!cell_type %in% colnames(output_df)) { - vmsg(.v = verbose, paste( - "Given", - cell_type, - "do not exist in giotto object's metadata!" - )) - return(NULL) - } - - # add default sample name for one sample obj - if (!"list_ID" %in% colnames(output_df)) { - output_df$list_ID <- "ONTraC" - } - - output_df <- output_df[, .SD, .SDcols = c( - "cell_ID", - "list_ID", - "sdimx", - "sdimy", - cell_type - )] - colnames(output_df) <- c("Cell_ID", "Sample", "x", "y", "Cell_Type") - file_path <- file.path(output_path, "ONTraC_meta_data_input.csv") - write.csv(output_df, file = file_path, quote = FALSE, row.names = FALSE) - vmsg(.v = verbose, paste("ONTraC input file was saved as", file_path)) - - return(output_df) -} - - -#' @title load_cell_bin_niche_cluster -#' @name load_cell_bin_niche_cluster -#' @description load cell-level binarized niche cluster -#' @inheritParams data_access_params -#' @inheritParams read_data_params -#' @param ontrac_results_dir the directory where the ONTraC results are saved -#' @returns gobject with cell-level binarized niche cluster -#' @details This function loads the ONTraC outputed cell-level binarized niche -#' cluster into the giotto object. -load_cell_bin_niche_cluster <- function(gobject, - ontrac_results_dir = getwd()) { - bin_niche_cluster_df <- read.csv(file = file.path( - ontrac_results_dir, - "GNN_dir", "cell_level_max_niche_cluster.csv.gz" - )) - colnames(bin_niche_cluster_df) <- c("cell_ID", "NicheCluster") - gobject <- GiottoClass::addCellMetadata(gobject, - new_metadata = bin_niche_cluster_df, - by_column = TRUE, - column_cell_ID = "cell_ID" - ) - return(gobject) +runONTraCV1 <- function( + ONTraC_input, + dataset, + preprocessing_dir, + GNN_dir, + NTScore_dir, + n_cpu = 4L, + n_neighbors = 50L, + n_local = 20L, + device = c("cpu", "cuda"), + epochs = 1000L, + patience = 100L, + min_delta = 0.001, + min_epochs = 50L, + batch_size = 0L, + seed = 42L, + lr = 0.03, + hidden_feats = 4L, + k = 6L, + modularity_loss_weight = 0.3, + purity_loss_weight = 300.0, + regularization_loss_weight = 0.1, + beta = 0.03, + python_path = "giotto_ontrac_env") { + # parameters check + device <- match.arg(device) + + # 1. identify operating system + my_os <- get_os() + + # handle conda env + set_giotto_python_path(python_path) + python_path <- reticulate::conda_python(envname = python_path) + if (my_os == "windows") { + ONTraC_path <- file.path(dirname(python_path), "Scripts", "ONTraC") + } else { + ONTraC_path <- file.path(dirname(python_path), "ONTraC") + } + + # run ONTraC + command <- paste( + ONTraC_path, + "-d", dataset, + "--preprocessing-dir", preprocessing_dir, + "--GNN-dir", GNN_dir, + "--NTScore-dir", NTScore_dir, + "--n-cpu", n_cpu, + "--n-neighbors", n_neighbors, + "--n-local", n_local, + "--device", device, + "--epochs", epochs, + "--patience", patience, + "--min-delta", min_delta, + "--min-epochs", min_epochs, + "--batch-size", batch_size, + "--seed", seed, + "--lr", lr, + "--hidden-feats", hidden_feats, + "--k", k, + "--modularity-loss-weight", modularity_loss_weight, + "--purity-loss-weight", purity_loss_weight, + "--regularization-loss-weight", regularization_loss_weight, + "--beta", beta + ) + wrap_msg(paste0("+", command)) + system(command) } @@ -182,23 +272,34 @@ load_cell_bin_niche_cluster <- function(gobject, #' @description load cell-level NT score #' @inheritParams data_access_params #' @inheritParams read_data_params -#' @param ontrac_results_dir the directory where the ONTraC results are saved +#' @param ontrac_results_dir the directory where the ONTraC results are saved. +#' Default is getwd() +#' @param NTScore_dir the directory to save the NTScore results. Default is +#' file.path(ontrac_results_dir, "NTScore_dir") +#' @param NTScore_reverse whether to reverse the NTScore. Default is FALSE #' @returns gobject with cell-level NT score #' @details This function loads the ONTraC outputed cell-level NT score -load_cell_NT_score <- function(gobject, # nolint: object_name_linter. - ontrac_results_dir = getwd()) { - NT_score_df <- read.csv(file = file.path( # nolint: object_name_linter. - ontrac_results_dir, - "NTScore_dir", "NTScore.csv.gz" - ))[c("Cell_ID", "Cell_NTScore")] - colnames(NT_score_df) <- c("cell_ID", "NTScore") # nolint: object_name_linter. - gobject <- addCellMetadata(gobject, - new_metadata = NT_score_df, # nolint: object_name_linter. - by_column = TRUE, - column_cell_ID = "cell_ID" - ) - - return(gobject) +load_cell_NT_score <- function(gobject, + ontrac_results_dir = getwd(), + NTScore_dir = file.path( + ontrac_results_dir, + "NTScore_dir" + ), + NTScore_reverse = FALSE) { + NT_score_df <- read.csv(file = file.path( + NTScore_dir, "NTScore.csv.gz" + ))[c("Cell_ID", "Cell_NTScore")] + colnames(NT_score_df) <- c("cell_ID", "NTScore") + if (NTScore_reverse) { + NT_score_df$NTScore <- 1 - NT_score_df$NTScore + } + gobject <- addCellMetadata(gobject, + new_metadata = NT_score_df, + by_column = TRUE, + column_cell_ID = "cell_ID" + ) + + return(gobject) } @@ -207,35 +308,41 @@ load_cell_NT_score <- function(gobject, # nolint: object_name_linter. #' @description load cell-niche cluster probability #' @inheritParams data_access_params #' @inheritParams read_data_params -#' @param ontrac_results_dir the directory where the ONTraC results are saved +#' @param ontrac_results_dir the directory where the ONTraC results are saved. +#' Default is getwd() +#' @param GNN_dir the directory to save the GNN results. Default is +#' file.path(ontrac_results_dir, "GNN_dir") #' @param name name for the probability matrix #' @returns gobject with cell-niche cluster probability matrix #' @details This function loads the ONTraC outputed cell-niche cluster #' probability as an exprObj into the giotto object. load_cell_niche_cluster_prob <- function(gobject, - ontrac_results_dir = getwd(), - spat_unit = "cell", - feat_type = "niche cluster", - name = "prob") { - niche_cluster_prob_df <- read.csv(file = file.path( - ontrac_results_dir, - "GNN_dir", "cell_level_niche_cluster.csv.gz" - )) - rownames(niche_cluster_prob_df) <- niche_cluster_prob_df$Cell_ID - niche_cluster_prob_df$Cell_ID <- NULL - expobj <- createExprObj(t(niche_cluster_prob_df), - spat_unit = spat_unit, - feat_type = feat_type - ) - gobject <- GiottoClass::setExpression( - gobject = gobject, - x = expobj, - spat_unit = spat_unit, - feat_type = feat_type, - name = name - ) - - return(gobject) + ontrac_results_dir = getwd(), + GNN_dir = file.path( + ontrac_results_dir, + "GNN_dir" + ), + spat_unit = "cell", + feat_type = "niche cluster", + name = "prob") { + niche_cluster_prob_df <- read.csv(file = file.path( + GNN_dir, "cell_level_niche_cluster.csv.gz" + )) + rownames(niche_cluster_prob_df) <- niche_cluster_prob_df$Cell_ID + niche_cluster_prob_df$Cell_ID <- NULL + expobj <- createExprObj(t(niche_cluster_prob_df), + spat_unit = spat_unit, + feat_type = feat_type + ) + gobject <- GiottoClass::setExpression( + gobject = gobject, + x = expobj, + spat_unit = spat_unit, + feat_type = feat_type, + name = name + ) + + return(gobject) } @@ -244,41 +351,148 @@ load_cell_niche_cluster_prob <- function(gobject, #' @description load niche cluster connectivity #' @inheritParams data_access_params #' @inheritParams read_data_params -#' @param ontrac_results_dir the directory where the ONTraC results are saved +#' @param ontrac_results_dir the directory where the ONTraC results are saved. +#' Default is getwd() +#' @param GNN_dir the directory to save the GNN results. Default is +#' file.path(ontrac_results_dir, "GNN_dir") #' @param name name for the connectivity matrix #' @returns gobject with niche cluster connectivity matrix #' @details This function loads the ONTraC outputed niche cluster connectivity #' matrix as an exprObj into the giotto object. load_nc_connectivity <- function(gobject, - ontrac_results_dir = getwd(), - spat_unit = "niche cluster", - feat_type = "connectivity", - name = "normalized") { - connectivity_df <- read.csv(file = file.path( - ontrac_results_dir, - "GNN_dir", "consolidate_out_adj.csv.gz" - ), header = FALSE) - rownames(connectivity_df) <- paste0( - "NicheCluster_", - seq_len(dim(connectivity_df)[1]) - 1 - ) - colnames(connectivity_df) <- paste0( - "NicheCluster_", - seq_len(dim(connectivity_df)[2]) - 1 - ) - expobj <- createExprObj(t(connectivity_df), - spat_unit = spat_unit, - feat_type = feat_type - ) - gobject <- GiottoClass::setExpression( - gobject = gobject, - x = expobj, - spat_unit = spat_unit, - feat_type = feat_type, - name = name - ) - - return(gobject) + ontrac_results_dir = getwd(), + GNN_dir = file.path( + ontrac_results_dir, + "GNN_dir" + ), + spat_unit = "niche cluster", + feat_type = "connectivity", + name = "normalized") { + connectivity_df <- read.csv(file = file.path( + GNN_dir, "consolidate_out_adj.csv.gz" + ), header = FALSE) + rownames(connectivity_df) <- paste0( + "NicheCluster_", + seq_len(dim(connectivity_df)[1]) - 1 + ) + colnames(connectivity_df) <- paste0( + "NicheCluster_", + seq_len(dim(connectivity_df)[2]) - 1 + ) + expobj <- createExprObj(t(connectivity_df), + spat_unit = spat_unit, + feat_type = feat_type + ) + gobject <- GiottoClass::setExpression( + gobject = gobject, + x = expobj, + spat_unit = spat_unit, + feat_type = feat_type, + name = name + ) + + return(gobject) +} + + +#' @title load_niche_cluster_nt_score +#' @name load_niche_cluster_nt_score +#' @description load niche cluster NT score +#' @inheritParams data_access_params +#' @inheritParams read_data_params +#' @param ontrac_results_dir the directory where the ONTraC results are saved. +#' Default is getwd() +#' @param NTScore_dir the directory to save the NTScore results. Default is +#' file.path(ontrac_results_dir, "NTScore_dir") +#' @param NTScore_reverse whether to reverse the NTScore. Default is FALSE +#' @returns gobject with niche cluster NT score +#' @details This function loads the ONTraC outputed niche cluster NT score +#' into the giotto object. +load_niche_cluster_nt_score <- function(gobject, + ontrac_results_dir = getwd(), + NTScore_dir = file.path( + ontrac_results_dir, + "NTScore_dir" + ), + NTScore_reverse = FALSE) { + niche_cluster_df <- read.csv(file = file.path( + NTScore_dir, "niche_cluster_score.csv.gz" + ), header = FALSE) + colnames(niche_cluster_df) <- c("NTScore") + niche_cluster_df$feat_ID <- paste0( + "NicheCluster_", + seq_len(dim(niche_cluster_df)[1]) - 1 + ) + if (NTScore_reverse) { + niche_cluster_df$NTScore <- 1 - niche_cluster_df$NTScore + } + gobject <- GiottoClass::addCellMetadata( + gobject = gobject, + spat_unit = "niche cluster", + feat_type = "connectivity", + new_metadata = niche_cluster_df, + by_column = TRUE, + column_cell_ID = "feat_ID" + ) + niche_cluster_meta_obj <- GiottoClass::createFeatMetaObj(niche_cluster_df) + gobject <- GiottoClass::setFeatureMetadata( + gobject = gobject, + x = niche_cluster_meta_obj, + spat_unit = "cell", + feat_type = "niche cluster" + ) + + return(gobject) +} + + +#' @title cal_cell_niche_cluster_bin +#' @name cal_cell_niche_cluster_bin +#' @description calculate binarized cell-level niche cluster assignment +#' @inheritParams data_access_params +#' @inheritParams read_data_params +#' @returns gobject with binarized cell-level niche cluster assignment +cal_cell_niche_cluster_bin <- function( + gobject, + spat_unit = "cell", + feat_type = "niche cluster") { + # calculate the binarized cell-level niche cluster assignment + expr_values <- getExpression( + gobject = gobject, + spat_unit = spat_unit, + feat_type = feat_type, + output = "matrix" + ) + niche_cluster_bin <- rownames(expr_values)[apply( + expr_values, + 2, + function(x) which.max(x) + )] + ori_meta_df <- pDataDT(gobject) + new_meta_df <- data.frame( + cell_ID = ori_meta_df$cell_ID, + new_feat = niche_cluster_bin + ) + colnames(new_meta_df) <- c("cell_ID", feat_type) + # get NTScore for each niche cluster + nc_meta_df <- fDataDT( + gobject = gobject, + spat_unit = spat_unit, + feat_type = feat_type + ) + # order the niche cluster by NT score + sorted_nc <- nc_meta_df$feat_ID[order(nc_meta_df$NTScore)] + new_meta_df[[feat_type]] <- factor( + new_meta_df[[feat_type]], + levels = sorted_nc + ) + # add the new metadata to the giotto object + gobject <- addCellMetadata(gobject, + new_metadata = new_meta_df, + by_column = TRUE, + column_cell_ID = "cell_ID" + ) + return(gobject) } @@ -288,28 +502,151 @@ load_nc_connectivity <- function(gobject, #' @inheritParams data_access_params #' @inheritParams read_data_params #' @param ontrac_results_dir the directory where the ONTraC results are saved +#' @param preprocessing_dir the directory to save the preprocessing results. +#' Default is file.path(ontrac_results_dir, "preprocessing_dir") +#' @param GNN_dir the directory to save the GNN results. Default is +#' file.path(ontrac_results_dir, "GNN_dir") +#' @param NTScore_dir the directory to save the NTScore results. Default is +#' file.path(ontrac_results_dir, "NTScore_dir") +#' @param NTScore_reverse whether to reverse the NTScore. Default is FALSE #' @returns gobject with ONTraC results #' @details This function loads the ONTraC results into the giotto object. #' @export -loadOntraCResults <- function(gobject, # nolint: object_name_linter. - ontrac_results_dir = getwd()) { - gobject <- load_cell_bin_niche_cluster(gobject, ontrac_results_dir) - gobject <- load_cell_NT_score(gobject, ontrac_results_dir) - gobject <- load_cell_niche_cluster_prob(gobject, ontrac_results_dir) - gobject <- GiottoClass::addCellMetadata( - gobject = gobject, - spat_unit = "cell", - feat_type = "niche cluster", - new_metadata = pDataDT(gobject), - by_column = TRUE, - column_cell_ID = "cell_ID" - ) +loadOntraCResults <- function(gobject, + ontrac_results_dir = getwd(), + preprocessing_dir = file.path( + ontrac_results_dir, + "preprocessing_dir" + ), + GNN_dir = file.path( + ontrac_results_dir, + "GNN_dir" + ), + NTScore_dir = file.path( + ontrac_results_dir, + "NTScore_dir" + ), + NTScore_reverse = FALSE) { + gobject <- load_cell_NT_score( + gobject = gobject, + ontrac_results_dir = ontrac_results_dir, + NTScore_dir = NTScore_dir, + NTScore_reverse = NTScore_reverse + ) + gobject <- load_cell_niche_cluster_prob( + gobject = gobject, + ontrac_results_dir = ontrac_results_dir, + GNN_dir = GNN_dir + ) + gobject <- GiottoClass::addCellMetadata( + gobject = gobject, + spat_unit = "cell", + feat_type = "niche cluster", + new_metadata = pDataDT(gobject), + by_column = TRUE, + column_cell_ID = "cell_ID" + ) + gobject <- load_nc_connectivity( + gobject = gobject, + ontrac_results_dir = ontrac_results_dir, + GNN_dir = GNN_dir + ) + gobject <- load_niche_cluster_nt_score( + gobject = gobject, + ontrac_results_dir = ontrac_results_dir, + NTScore_dir = NTScore_dir, + NTScore_reverse = NTScore_reverse + ) + gobject <- cal_cell_niche_cluster_bin( + gobject = gobject + ) + + return(gobject) +} + - gobject <- load_nc_connectivity(gobject, ontrac_results_dir) - return(gobject) +#' @title plotSpatNicheClusterProb +#' @name plotSpatNicheClusterProb +#' @description plot spatial niche cluster probability +#' @inheritParams data_access_params +#' @inheritParams plot_output_params +#' @param spat_unit name of spatial unit niche stored cluster features +#' @param feat_type name of the feature type stored probability matrix +#' @param expression_values name of the expression matrix stored probability +#' values +#' @param ... additional arguments to be passed to the spatFeatPlot2D function +#' @details This function plots the spatial niche cluster probability +#' @returns ggplot +#' @export +plotSpatNicheClusterProb <- function( + gobject, + spat_unit = "cell", + feat_type = "niche cluster", + expression_values = "prob", + ..., + default_save_name = "spatNicheClusterProb") { + nc_meta_df <- fDataDT( + gobject = gobject, + spat_unit = spat_unit, + feat_type = feat_type + ) + sorted_nc <- nc_meta_df$feat_ID[order(nc_meta_df$NTScore)] + + spatFeatPlot2D( + gobject = gobject, + spat_unit = spat_unit, + feat_type = feat_type, + expression_values = expression_values, + feats = sorted_nc, + ... + ) } + +#' @title plotSpatNicheClusterBin +#' @name plotSpatNicheClusterBin +#' @description plot spatial niche cluster binarized +#' @inheritParams data_access_params +#' @inheritParams plot_output_params +#' @param spat_unit name of spatial unit niche stored cluster features +#' @param feat_type name of the feature type stored binarized niche cluster +#' @param niche_cluster_label name of the niche cluster label +#' @param ... additional arguments to be passed to the spatFeatPlot2D function +#' @details This function plots the spatial niche cluster binarized +#' @returns ggplot +#' @export +plotSpatNicheClusterBin <- function( + gobject, + spat_unit = "cell", + feat_type = "niche cluster", + ..., + default_save_name = "spatNicheClusterBin") { + # determine the color code + nc_meta_df <- fDataDT( + gobject = gobject, + spat_unit = spat_unit, + feat_type = feat_type + ) + # order the niche cluster by NT score + sorted_nc <- nc_meta_df$feat_ID[order(nc_meta_df$NTScore)] + nc_num <- length(sorted_nc) + # cell color code is a named vector with the niche cluster label + cell_color_code <- setNames( + viridis::turbo(n = nc_num + 2)[1:nc_num + 1], + sorted_nc + ) + + spatPlot2D( + gobject = gobject, + spat_unit = spat_unit, + cell_color = feat_type, + cell_color_code = cell_color_code, + ... + ) +} + + #' @title plotNicheClusterConnectivity #' @name plotNicheClusterConnectivity #' @description plot niche cluster connectivity @@ -319,121 +656,137 @@ loadOntraCResults <- function(gobject, # nolint: object_name_linter. #' @param feat_type name of the feature type stored niche cluster connectivities #' @param values name of the expression matrix stored connectivity values #' @details This function plots the niche cluster connectivity matrix +#' @returns ggplot #' @export -plotNicheClusterConnectivity <- function( # nolint: object_name_linter. - gobject, - spat_unit = "niche cluster", - feat_type = "connectivity", - values = "normalized", - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - theme_param = list(), - default_save_name = "NicheClusterConnectivity") { - # load `guide_edge_colourbar` function in ggraph, - # otherwise it will raise an error when using `scale_edge_colour_gradientn` - library(ggraph) - - # get the niche cluster connectivity matrix - niche_cluster_connectivites <- getExpression( - gobject = gobject, - values = "normalized", - spat_unit = "niche cluster", - feat_type = "connectivity", - output = "matrix" - ) - - # transform the matrix to data.frame for constructing igraph object - niche_cluster_connectivites <- cbind( - expand.grid(dimnames(niche_cluster_connectivites)), - value = as.vector(as.matrix( - niche_cluster_connectivites +plotNicheClusterConnectivity <- function( + gobject, + spat_unit = "niche cluster", + feat_type = "connectivity", + values = "normalized", + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "NicheClusterConnectivity") { + # load `guide_edge_colourbar` function in ggraph, + # otherwise it will raise an error when using `scale_edge_colour_gradientn` + library(ggraph) + + # get the niche cluster connectivity matrix + niche_cluster_connectivites <- getExpression( + gobject = gobject, + values = values, + spat_unit = spat_unit, + feat_type = feat_type, + output = "matrix" + ) + nc_num <- dim(niche_cluster_connectivites)[1] + + # transform the matrix to data.frame for constructing igraph object + niche_cluster_connectivites <- cbind( + expand.grid(dimnames(niche_cluster_connectivites)), + value = as.vector(as.matrix( + niche_cluster_connectivites + )) + ) + colnames(niche_cluster_connectivites) <- c("from", "to", "connectivites") + + # construct igraph object + igd <- igraph::graph_from_data_frame( + d = niche_cluster_connectivites[, c("from", "to", "connectivites")], + directed = FALSE + ) + igd <- igraph::simplify( + graph = igd, + remove.loops = TRUE, + remove.multiple = FALSE + ) + # set edge attributes + edges_sizes <- igraph::edge_attr(igd, "connectivites") + edges_colors <- edges_sizes + igd <- igraph::set_edge_attr( + graph = igd, + index = igraph::E(igd), + name = "color", + value = edges_colors + ) + igd <- igraph::set_edge_attr( + graph = igd, + index = igraph::E(igd), + name = "size", + value = edges_sizes + ) + # set node attributes + nc_meta_df <- pDataDT( + gobject = gobject, + spat_unit = spat_unit, + feat_type = feat_type + ) + igraph::V(igd)$NTScore <- nc_meta_df$NTScore[ + match( + igraph::V(igd)$name, + nc_meta_df$cell_ID + ) + ] + + # plot + ## layout + coords <- igraph::layout_with_drl( + graph = igd, + weights = edges_sizes, + use.seed = TRUE + ) + gpl <- ggraph::ggraph(graph = igd, layout = coords) + + ## edges + gpl <- gpl + ggraph::geom_edge_link( + ggplot2::aes( + colour = edges_sizes, + edge_width = 5, + edge_alpha = size # nolint: object_usage_linter. + ), + show.legend = FALSE + ) + gpl <- gpl + ggraph::scale_edge_alpha(range = c(0.1, 1)) + gpl <- gpl + ggraph::scale_edge_colour_gradientn( + colours = getColors("Reds", 9, src = "RColorBrewer"), + name = "Value" + ) + + ## node + gpl <- gpl + ggraph::geom_node_point( + ggplot2::aes(colour = NTScore), # nolint: object_usage_linter. + size = 10 + ) + gpl <- gpl + ggplot2::scale_colour_gradientn( + colours = viridis::turbo(n = nc_num + 2)[1:nc_num + 1] + ) + gpl <- gpl + ggraph::geom_node_text( + ggplot2::aes(label = name), # nolint: object_usage_linter. + repel = TRUE + ) + + ## theme + gpl <- gpl + ggplot2::theme_bw() + ggplot2::theme( + panel.grid = ggplot2::element_blank(), + panel.border = ggplot2::element_blank(), + axis.title = ggplot2::element_blank(), + axis.text = ggplot2::element_blank(), + axis.ticks = ggplot2::element_blank() + ) + gpl + + # return or save + return(GiottoVisuals::plot_output_handler( + gobject = gobject, + plot_object = gpl, + save_plot = save_plot, + return_plot = return_plot, + show_plot = show_plot, + default_save_name = default_save_name, + save_param = save_param, + else_return = NULL )) - ) - colnames(niche_cluster_connectivites) <- c("from", "to", "connectivites") - - # construct igraph object - igd <- igraph::graph_from_data_frame( - d = niche_cluster_connectivites[, c("from", "to", "connectivites")], - directed = FALSE - ) - igd <- igraph::simplify( - graph = igd, - remove.loops = TRUE, - remove.multiple = FALSE - ) - edges_sizes <- igraph::edge_attr(igd, "connectivites") - edges_colors <- edges_sizes - igd <- igraph::set.edge.attribute( - graph = igd, - index = igraph::E(igd), - name = "color", - value = edges_colors - ) - igd <- igraph::set.edge.attribute( - graph = igd, - index = igraph::E(igd), - name = "size", - value = edges_sizes - ) - - # plot - ## layout - coords <- igraph::layout_with_drl( - graph = igd, - weights = edges_sizes, - use.seed = TRUE - ) - gpl <- ggraph::ggraph(graph = igd, layout = coords) - - ## edges - gpl <- gpl + ggraph::geom_edge_link( - ggplot2::aes( - colour = edges_sizes, - edge_width = 5, - edge_alpha = size # nolint: object_usage_linter. - ), - show.legend = FALSE - ) - gpl <- gpl + ggraph::scale_edge_alpha(range = c(0.1, 1)) - gpl <- gpl + ggraph::scale_edge_colour_gradientn( - colours = RColorBrewer::brewer.pal(9, "Reds"), - name = "Value" - ) - - ## node - gpl <- gpl + ggraph::geom_node_point( - ggplot2::aes(colour = name), # nolint: object_usage_linter. - size = 10 - ) - gpl <- gpl + ggplot2::scale_fill_gradientn(colours = viridis::turbo(100)) - gpl <- gpl + ggraph::geom_node_text( - ggplot2::aes(label = name), # nolint: object_usage_linter. - repel = TRUE - ) - - ## theme - gpl <- gpl + ggplot2::theme_bw() + ggplot2::theme( - panel.grid = ggplot2::element_blank(), - panel.border = ggplot2::element_blank(), - axis.title = ggplot2::element_blank(), - axis.text = ggplot2::element_blank(), - axis.ticks = ggplot2::element_blank() - ) - gpl - - # return or save - return(GiottoVisuals::plot_output_handler( - gobject = gobject, - plot_object = gpl, - save_plot = save_plot, - return_plot = return_plot, - show_plot = show_plot, - default_save_name = default_save_name, - save_param = save_param, - else_return = NULL - )) } #' @title plotCTCompositionInNicheCluster @@ -444,324 +797,197 @@ plotNicheClusterConnectivity <- function( # nolint: object_name_linter. #' @inheritParams plot_output_params #' @param spat_unit name of spatial unit niche stored cluster features #' @param feat_type name of the feature type stored probability matrix -#' @param values name of the expression matrix stored probability of each cell assigned to each niche cluster -#' @details This function plots the cell type composition within each niche cluster +#' @param normalization normalization method for the cell type composition +#' @param values name of the expression matrix stored probability of each cell +#' assigned to each niche cluster +#' @details This function plots the cell type composition within each niche +#' cluster +#' @returns ggplot #' @export -plotCTCompositionInNicheCluster <- function( # nolint: object_name_linter. - gobject, - cell_type, - values = "prob", - spat_unit = "cell", - feat_type = "niche cluster", - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - theme_param = list(), - default_save_name = "CellTypeCompositionInNicheCluster") { - # Get the cell type composition within each niche cluster - ## extract the cell-level niche cluster probability matrix - exp <- getExpression( - gobject = gobject, - values = values, - spat_unit = spat_unit, - feat_type = feat_type, - output = "exprObj" - ) - prob_df <- as.data.frame(t(as.matrix(exp@exprMat))) - prob_df$cell_ID <- rownames(prob_df) - ## combine the cell type and niche cluster probability matrix - combined_df <- merge( - as.data.frame(pDataDT(gobject, feat_type = feat_type))[, c( - "cell_ID", - cell_type - )], - prob_df, - by = "cell_ID" - ) - - # Calculate the normalized cell type composition within each niche cluster - cell_type_counts_df <- combined_df %>% - tidyr::pivot_longer( - cols = dplyr::starts_with("NicheCluster_"), - names_to = "Cluster", - values_to = "Probability" - ) %>% - dplyr::group_by( - !!rlang::sym(cell_type), - Cluster # nolint: object_usage_linter. - ) %>% - dplyr::summarise(Sum = sum(Probability, # nolint: object_usage_linter. - na.rm = TRUE - )) %>% - tidyr::spread(key = "Cluster", value = "Sum", fill = 0) - cell_type_counts_df <- as.data.frame(cell_type_counts_df) - rownames(cell_type_counts_df) <- cell_type_counts_df[[cell_type]] - cell_type_counts_df[[cell_type]] <- NULL - normalized_df <- as.data.frame(t( - t(cell_type_counts_df) / colSums(cell_type_counts_df) - )) - - - # Reshape the data frame into long format - normalized_df[[cell_type]] <- rownames(normalized_df) - df_long <- normalized_df %>% - tidyr::pivot_longer( - cols = -!!rlang::sym(cell_type), # nolint: object_usage_linter. - names_to = "Cluster", - values_to = "Composition" - ) - - # Create the heatmap using ggplot2 - pl <- ggplot(df_long, aes( - x = !!rlang::sym(cell_type), # nolint: object_usage_linter. - y = Cluster, # nolint: object_usage_linter. - fill = Composition # nolint: object_usage_linter. - )) + - geom_tile() + - viridis::scale_fill_viridis(option = "inferno", limits = c(0, 1)) + - theme_minimal() + - labs( - title = "Normalized cell type compositions within each niche cluster", - x = "Cell_Type", - y = "Cluster" - ) + - theme(axis.text.x = element_text(angle = 45, hjust = 1)) - - # return or save - return(GiottoVisuals::plot_output_handler( - gobject = gobject, - plot_object = pl, - save_plot = save_plot, - return_plot = return_plot, - show_plot = show_plot, - default_save_name = default_save_name, - save_param = save_param, - else_return = NULL - )) +plotCTCompositionInNicheCluster <- function( + gobject, + cell_type, + values = "prob", + spat_unit = "cell", + feat_type = "niche cluster", + normalization = c("by_niche_cluster", "by_cell_type", NULL), + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "CellTypeCompositionInNicheCluster") { + normalization <- match.arg(normalization) + + # Get the cell type composition within each niche cluster + ## extract the cell-level niche cluster probability matrix + exp <- getExpression( + gobject = gobject, + values = values, + spat_unit = spat_unit, + feat_type = feat_type, + output = "matrix" + ) + prob_df <- as.data.frame(t(as.matrix(exp))) + prob_df$cell_ID <- rownames(prob_df) + ## combine the cell type and niche cluster probability matrix + combined_df <- merge( + as.data.frame(pDataDT(gobject))[, c( + "cell_ID", + cell_type + )], + prob_df, + by = "cell_ID" + ) + + # Calculate the normalized cell type composition within each niche cluster + cell_type_counts_df <- combined_df %>% + tidyr::pivot_longer( + cols = dplyr::starts_with("NicheCluster_"), + names_to = "Cluster", + values_to = "Probability" + ) %>% + dplyr::group_by( + !!rlang::sym(cell_type), + Cluster # nolint: object_usage_linter. + ) %>% + dplyr::summarise(Sum = sum(Probability, # nolint: object_usage_linter. + na.rm = TRUE + )) %>% + tidyr::spread(key = "Cluster", value = "Sum", fill = 0) + cell_type_counts_df <- as.data.frame(cell_type_counts_df) + rownames(cell_type_counts_df) <- cell_type_counts_df[[cell_type]] + cell_type_counts_df[[cell_type]] <- NULL + + if (normalization == "by_cell_type") { + normalized_df <- as.data.frame( + cell_type_counts_df / rowSums(cell_type_counts_df) + ) + } else if (normalization == "by_niche_cluster") { + normalized_df <- as.data.frame(t( + t(cell_type_counts_df) / colSums(cell_type_counts_df) + )) + } else if (normalization == NULL) { + normalized_df <- cell_type_counts_df + } + + # Reshape the data frame into long format + normalized_df[[cell_type]] <- rownames(normalized_df) + df_long <- normalized_df %>% + tidyr::pivot_longer( + cols = -!!rlang::sym(cell_type), # nolint: object_usage_linter. + names_to = "Cluster", + values_to = "Composition" + ) + + # Order the niche clusters by NTscore + nc_meta_df <- fDataDT( + gobject = gobject, + spat_unit = spat_unit, + feat_type = feat_type + ) + df_long$Cluster <- factor(df_long$Cluster, + levels = nc_meta_df$feat_ID[order(nc_meta_df$NTScore)] + ) + + # Order the cell types by the average NTScore + data_df <- pDataDT( + gobject = gobject, + spat_unit = spat_unit, + feat_type = feat_type + ) + avg_scores <- data_df %>% + dplyr::group_by(!!rlang::sym(cell_type)) %>% + dplyr::summarise(Avg_NTScore = mean(NTScore)) + # nolint: object_usage_linter. + df_long[[cell_type]] <- factor(df_long[[cell_type]], + levels = avg_scores[[cell_type]][order(avg_scores$Avg_NTScore)] + ) + + # Create the heatmap using ggplot2 + pl <- ggplot(df_long, aes( + x = !!rlang::sym(cell_type), # nolint: object_usage_linter. + y = Cluster, # nolint: object_usage_linter. + fill = Composition # nolint: object_usage_linter. + )) + + geom_tile() + + viridis::scale_fill_viridis(option = "inferno", limits = c(0, 1)) + + theme_minimal() + + labs( + title = "Normalized cell type compositions within each niche + cluster", + x = "Cell_Type", + y = "Cluster" + ) + + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + + # return or save + return(GiottoVisuals::plot_output_handler( + gobject = gobject, + plot_object = pl, + save_plot = save_plot, + return_plot = return_plot, + show_plot = show_plot, + default_save_name = default_save_name, + save_param = save_param, + else_return = NULL + )) } -#' @title plotCTCompositionInProbCluster -#' @name plotCTCompositionInProbCluster -#' @description plot cell type composition within each probabilistic cluster +#' @title plotCellTypeNTScore +#' @name plotCellTypeNTScore +#' @description plot NTScore by cell type #' @param cell_type the cell type column name in the metadata #' @inheritParams data_access_params #' @inheritParams plot_output_params -#' @param spat_unit name of spatial unit niche stored cluster features -#' @param feat_type name of the feature type stored niche cluster connectivities -#' @param values name of the expression matrix stored probability of each cell assigned to each probabilistic cluster -#' @details This function plots the cell type composition within each probabilistic cluster +#' @returns ggplot #' @export -plotCTCompositionInProbCluster <- function( # nolint: object_name_linter. - gobject, +plotCellTypeNTScore <- function(gobject, cell_type, - values = "prob", + values = "NTScore", spat_unit = "cell", feat_type = "niche cluster", show_plot = NULL, return_plot = NULL, save_plot = NULL, save_param = list(), - theme_param = list(), - default_save_name = "plotCTCompositionInProbCluster") { - # Get the cell type composition within each niche cluster - ## extract the cell-level niche cluster probability matrix - exp <- getExpression( - gobject = gobject, - values = values, - spat_unit = spat_unit, - feat_type = feat_type, - output = "exprObj" - ) - prob_df <- as.data.frame(t(as.matrix(exp@exprMat))) - prob_df$cell_ID <- rownames(prob_df) - ## combine the cell type and niche cluster probability matrix - combined_df <- merge( - as.data.frame(pDataDT(gobject, feat_type = feat_type))[, c( - "cell_ID", - cell_type - )], - prob_df, - by = "cell_ID" - ) - - # Calculate the normalized cell type composition within each niche cluster - cell_type_counts_df <- combined_df %>% - tidyr::pivot_longer( - cols = dplyr::starts_with("NicheCluster_"), - names_to = "Cluster", - values_to = "Probability" - ) %>% - dplyr::group_by( - !!rlang::sym(cell_type), - Cluster # nolint: object_usage_linter. - ) %>% - dplyr::summarise(Sum = sum(Probability, # nolint: object_usage_linter. - na.rm = TRUE - )) %>% - tidyr::spread(key = "Cluster", value = "Sum", fill = 0) - cell_type_counts_df <- as.data.frame(cell_type_counts_df) - rownames(cell_type_counts_df) <- cell_type_counts_df[[cell_type]] - cell_type_counts_df[[cell_type]] <- NULL - normalized_df <- as.data.frame(t( - t(cell_type_counts_df) / colSums(cell_type_counts_df) - )) - - - # Reshape the data frame into long format - normalized_df[[cell_type]] <- rownames(normalized_df) - df_long <- normalized_df %>% - tidyr::pivot_longer( - cols = -!!rlang::sym(cell_type), # nolint: object_usage_linter. - names_to = "Cluster", - values_to = "Composition" - ) - - # Create the heatmap using ggplot2 - pl <- ggplot(df_long, aes( - x = !!rlang::sym(cell_type), # nolint: object_usage_linter. - y = Cluster, # nolint: object_usage_linter. - fill = Composition # nolint: object_usage_linter. - )) + - geom_tile() + - viridis::scale_fill_viridis(option = "inferno", limits = c(0, 1)) + - theme_minimal() + - labs( - title = "Normalized cell type compositions within each niche cluster", - x = "Cell_Type", - y = "Cluster" - ) + - theme(axis.text.x = element_text(angle = 45, hjust = 1)) - - # return or save - return(GiottoVisuals::plot_output_handler( - gobject = gobject, - plot_object = pl, - save_plot = save_plot, - return_plot = return_plot, - show_plot = show_plot, - default_save_name = default_save_name, - save_param = save_param, - else_return = NULL - )) -} - + default_save_name = "CellTypeNTScore") { + # Get the cell type composition within each niche cluster + data_df <- pDataDT( + gobject = gobject, + spat_unit = spat_unit, + feat_type = feat_type + ) + avg_scores <- data_df %>% + dplyr::group_by(!!rlang::sym(cell_type)) %>% + dplyr::summarise(Avg_NTScore = mean(!!rlang::sym(values))) + data_df[[cell_type]] <- factor(data_df[[cell_type]], + levels = avg_scores[[cell_type]][order(avg_scores$Avg_NTScore)] + ) -#' @title plotCellTypeNTScore -#' @name plotCellTypeNTScore -#' @description plot NTScore by cell type -#' @param cell_type the cell type column name in the metadata -#' @inheritParams data_access_params -#' @inheritParams plot_output_params -#' @export -plotCellTypeNTScore <- function(gobject, # nolint: object_name_linter. - cell_type, - values = "NTScore", - spat_unit = "cell", - feat_type = "niche cluster", - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - theme_param = list(), - default_save_name = "CellTypeNTScore") { - # Get the cell type composition within each niche cluster - data_df <- pDataDT( - gobject = gobject, - spat_unit = spat_unit, - feat_type = feat_type - ) - avg_scores <- data_df %>% - dplyr::group_by(!!rlang::sym(cell_type)) %>% # nolint: object_usage_linter. - dplyr::summarise(Avg_NTScore = mean(NTScore)) # nolint: object_usage_linter. - data_df[[cell_type]] <- factor(data_df[[cell_type]], - levels = avg_scores[[cell_type]][order(avg_scores$Avg_NTScore)] - ) - - pl <- ggplot(data_df, aes( - x = NTScore, # nolint: object_usage_linter. - y = !!rlang::sym(cell_type), - fill = !!rlang::sym(cell_type) - )) + - geom_violin() + - theme_minimal() + - labs( - title = "Violin Plot of NTScore by Cell Type", - x = "NTScore", - y = "Cell Type" - ) + - ggplot2::theme(axis.text.x = element_text(angle = 45, hjust = 1)) - - # return or save - return(GiottoVisuals::plot_output_handler( - gobject = gobject, - plot_object = pl, - save_plot = save_plot, - return_plot = return_plot, - show_plot = show_plot, - default_save_name = default_save_name, - save_param = save_param, - else_return = NULL - )) + pl <- ggplot(data_df, aes( + x = !!rlang::sym(values), + y = !!rlang::sym(cell_type), + fill = !!rlang::sym(cell_type) + )) + + geom_violin() + + theme_minimal() + + labs( + title = "Violin Plot of NTScore by Cell Type", + x = values, + y = "Cell Type" + ) + + ggplot2::theme(axis.text.x = element_text(angle = 45, hjust = 1)) + + # return or save + return(GiottoVisuals::plot_output_handler( + gobject = gobject, + plot_object = pl, + save_plot = save_plot, + return_plot = return_plot, + show_plot = show_plot, + default_save_name = default_save_name, + save_param = save_param, + else_return = NULL + )) } - - -#' @title plotDiscreteAlongContinuous -#' @name plotDiscreteAlongContinuous -#' @description plot density of a discrete annotation along a continuou values -#' @param cell_type the column name of discrete annotation in cell metadata -#' @param values the column name of continuous values in cell metadata -#' @inheritParams data_access_params -#' @inheritParams plot_output_params -#' @export -plotCellTypeNTScore <- function(gobject, # nolint: object_name_linter. - cell_type, - values = "NTScore", - spat_unit = "cell", - feat_type = "niche cluster", - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - theme_param = list(), - default_save_name = "discreteAlongContinuous") { - # Get the cell type composition within each niche cluster - data_df <- pDataDT( - gobject = gobject, - spat_unit = spat_unit, - feat_type = feat_type - ) - avg_scores <- data_df %>% - dplyr::group_by(!!rlang::sym(cell_type)) %>% # nolint: object_usage_linter. - dplyr::summarise(Avg_NTScore = mean(NTScore)) # nolint: object_usage_linter. - data_df[[cell_type]] <- factor(data_df[[cell_type]], - levels = avg_scores[[cell_type]][order(avg_scores$Avg_NTScore)] - ) - - pl <- ggplot(data_df, aes( - x = NTScore, # nolint: object_usage_linter. - y = !!rlang::sym(cell_type), - fill = !!rlang::sym(cell_type) - )) + - geom_violin() + - theme_minimal() + - labs( - title = "Violin Plot of NTScore by Cell Type", - x = "NTScore", - y = "Cell Type" - ) + - ggplot2::theme(axis.text.x = element_text(angle = 45, hjust = 1)) - - # return or save - return(GiottoVisuals::plot_output_handler( - gobject = gobject, - plot_object = pl, - save_plot = save_plot, - return_plot = return_plot, - show_plot = show_plot, - default_save_name = default_save_name, - save_param = save_param, - else_return = NULL - )) -} \ No newline at end of file diff --git a/R/auxiliary_giotto.R b/R/auxiliary_giotto.R index de16b2c08..84700ca94 100644 --- a/R/auxiliary_giotto.R +++ b/R/auxiliary_giotto.R @@ -3,1262 +3,6 @@ -### matrix processing #### - -#' @title Mean expression detected test -#' @param mymatrix matrix of expression info -#' @param detection_threshold detection threshold. Defaults to 1 count. -#' @returns numeric -#' @keywords internal -.mean_expr_det_test <- function(mymatrix, detection_threshold = 1) { - unlist(apply(X = mymatrix, MARGIN = 1, FUN = function(x) { - detected_x <- x[x > detection_threshold] - mean(detected_x) - })) -} - -#' @title Normalize expression matrix for library size -#' @param mymatrix matrix object -#' @param scalefactor scalefactor -#' @returns matrix -#' @keywords internal -.lib_norm_giotto <- function(mymatrix, scalefactor) { - libsizes <- colSums_flex(mymatrix) - - if (any(libsizes == 0)) { - warning(wrap_txt("Total library size or counts for individual spat - units are 0. - This will likely result in normalization problems. - filter (filterGiotto) or impute (imputeGiotto) spatial - units.")) - } - - norm_expr <- t_flex(t_flex(mymatrix) / libsizes) * scalefactor - return(norm_expr) -} - -#' @title Log normalize expression matrix -#' @returns matrix -#' @keywords internal -.log_norm_giotto <- function(mymatrix, base, offset) { - if (methods::is(mymatrix, "DelayedArray")) { - mymatrix <- log(mymatrix + offset) / log(base) - # } else if(methods::is(mymatrix, 'DelayedMatrix')) { - # mymatrix = log(mymatrix + offset)/log(base) - } else if (methods::is(mymatrix, "dgCMatrix")) { - mymatrix@x <- log(mymatrix@x + offset) / log(base) - # replace with sparseMatrixStats - } else if (methods::is(mymatrix, "Matrix")) { - mymatrix@x <- log(mymatrix@x + offset) / log(base) - } else if(methods::is(mymatrix, 'dbMatrix')) { - mymatrix[] <- dplyr::mutate(mymatrix[], x = x + offset) # workaround for lack of @x slot - mymatrix <- log(mymatrix)/log(base) - } else { - mymatrix <- log(as.matrix(mymatrix) + offset) / log(base) - } - - return(mymatrix) -} - - - - - - - - - - - - -### Filter Values #### - - - - -#' @title filterDistributions -#' @name filterDistributions -#' @description show gene or cell distribution after filtering on expression -#' threshold -#' @param gobject giotto object -#' @param feat_type feature type -#' @param spat_unit spatial unit -#' @param expression_values expression values to use -#' @param method method to create distribution (see details) -#' @param expression_threshold threshold to consider a gene expressed -#' @param detection consider features (e.g. genes) or cells -#' @param plot_type type of plot -#' @param scale_y scale y-axis (e.g. "log"), NULL = no scaling -#' @param nr_bins number of bins for histogram plot -#' @param fill_color fill color for plots -#' @param scale_axis ggplot transformation for axis (e.g. log2) -#' @param axis_offset offset to be used together with the scaling -#' transformation -#' @param show_plot logical. show plot -#' @param return_plot logical. return ggplot object -#' @param save_plot logical. directly save the plot -#' @param save_param list of saving parameters from -#' [GiottoVisuals::all_plots_save_function] -#' @param default_save_name default save name for saving, don't change, -#' change save_name in save_param -#' @returns ggplot object -#' @details -#' There are 3 ways to create a distribution profile and summarize it for -#' either the features or the cells (spatial units) \cr -#' \itemize{ -#' \item{1. threshold: calculate features that cross a thresold (default)} -#' \item{2. sum: summarize the features, i.e. total of a feature} -#' \item{3. mean: calculate mean of the features, i.e. average expression} -#' } -#' @md -#' @examples -#' g <- GiottoData::loadGiottoMini("visium") -#' -#' filterDistributions(g) -#' @export -filterDistributions <- function( - gobject, - feat_type = NULL, - spat_unit = NULL, - expression_values = c("raw", "normalized", "scaled", "custom"), - method = c("threshold", "sum", "mean"), - expression_threshold = 1, - detection = c("feats", "cells"), - plot_type = c("histogram", "violin"), - scale_y = NULL, - nr_bins = 30, - fill_color = "lightblue", - scale_axis = "identity", - axis_offset = 0, - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "filterDistributions") { - # Set feat_type and spat_unit - spat_unit <- set_default_spat_unit( - gobject = gobject, - spat_unit = spat_unit - ) - feat_type <- set_default_feat_type( - gobject = gobject, - spat_unit = spat_unit, - feat_type = feat_type - ) - - # expression values to be used - values <- match.arg( - expression_values, - unique(c("raw", "normalized", "scaled", "custom", expression_values)) - ) - expr_values <- getExpression( - gobject = gobject, - spat_unit = spat_unit, - feat_type = feat_type, - values = values, - output = "matrix" - ) - - # plot distribution for feats or cells - detection <- match.arg(detection, c("feats", "cells")) - - # method to calculate distribution - method <- match.arg(method, c("threshold", "sum", "mean")) - - # plot type - plot_type <- match.arg(plot_type, c("histogram", "violin")) - - # variables - V1 <- NULL - - # for genes - if (detection == "feats") { - if (method == "threshold") { - feat_detection_levels <- data.table::as.data.table( - rowSums_flex(expr_values >= expression_threshold) - ) - mytitle <- "feat detected in # of cells" - } else if (method == "sum") { - feat_detection_levels <- data.table::as.data.table( - rowSums_flex(expr_values) - ) - mytitle <- "total sum of feature detected in all cells" - } else if (method == "mean") { - feat_detection_levels <- data.table::as.data.table( - rowMeans_flex(expr_values) - ) - mytitle <- "average of feature detected in all cells" - } - - y_title <- "count" - if (!is.null(scale_y)) { - feat_detection_levels[, V1 := do.call(what = scale_y, list(V1))] - y_title <- paste(scale_y, y_title) - } - - - - if (plot_type == "violin") { - pl <- ggplot2::ggplot() - pl <- pl + ggplot2::theme_classic() - pl <- pl + ggplot2::geom_violin( - data = feat_detection_levels, - ggplot2::aes(x = "feats", y = V1 + axis_offset), - fill = fill_color - ) - pl <- pl + ggplot2::scale_y_continuous(trans = scale_axis) - pl <- pl + ggplot2::labs(y = mytitle, x = "") - } else if (plot_type == "histogram") { - pl <- ggplot2::ggplot() - pl <- pl + ggplot2::theme_classic() - pl <- pl + ggplot2::geom_histogram( - data = feat_detection_levels, - ggplot2::aes(x = V1 + axis_offset), - color = "white", bins = nr_bins, fill = fill_color - ) - pl <- pl + ggplot2::scale_x_continuous(trans = scale_axis) - pl <- pl + ggplot2::labs(x = mytitle, y = y_title) - } - - # for cells - } else if (detection == "cells") { - if (method == "threshold") { - cell_detection_levels <- data.table::as.data.table( - colSums_flex(expr_values >= expression_threshold) - ) - mytitle <- "feats detected per cell" - } else if (method == "sum") { - cell_detection_levels <- data.table::as.data.table( - colSums_flex(expr_values) - ) - mytitle <- "total features per cell" - } else if (method == "mean") { - cell_detection_levels <- data.table::as.data.table( - colMeans_flex(expr_values) - ) - mytitle <- "average number of features per cell" - } - - y_title <- "count" - if (!is.null(scale_y)) { - cell_detection_levels[, V1 := do.call(what = scale_y, list(V1))] - y_title <- paste(scale_y, y_title) - } - - - - if (plot_type == "violin") { - pl <- ggplot2::ggplot() - pl <- pl + ggplot2::theme_classic() - pl <- pl + ggplot2::geom_violin( - data = cell_detection_levels, - ggplot2::aes(x = "cells", y = V1 + axis_offset), - fill = fill_color - ) - pl <- pl + ggplot2::scale_y_continuous(trans = scale_axis) - pl <- pl + ggplot2::labs(y = mytitle, x = "") - } else if (plot_type == "histogram") { - pl <- ggplot2::ggplot() - pl <- pl + ggplot2::theme_classic() - pl <- pl + ggplot2::geom_histogram( - data = cell_detection_levels, - ggplot2::aes(x = V1 + axis_offset), - color = "white", bins = nr_bins, fill = fill_color - ) - pl <- pl + ggplot2::scale_x_continuous(trans = scale_axis) - pl <- pl + ggplot2::labs(x = mytitle, y = y_title) - } - } - - return(GiottoVisuals::plot_output_handler( - gobject = gobject, - plot_object = pl, - save_plot = save_plot, - return_plot = return_plot, - show_plot = show_plot, - default_save_name = default_save_name, - save_param = save_param, - else_return = NULL - )) -} - - - -#' @title filterCombinations -#' @name filterCombinations -#' @description Shows how many genes and cells are lost with combinations of -#' thresholds. -#' @inheritParams data_access_params -#' @inheritParams plot_output_params -#' @param expression_values expression values to use -#' @param expression_thresholds all thresholds to consider a gene expressed -#' @param feat_det_in_min_cells minimum # of cells that need to express a -#' feature -#' @param min_det_feats_per_cell minimum # of features that need to be -#' detected in a cell -#' @param scale_x_axis ggplot transformation for x-axis (e.g. log2) -#' @param x_axis_offset x-axis offset to be used together with the scaling -#' transformation -#' @param scale_y_axis ggplot transformation for y-axis (e.g. log2) -#' @param y_axis_offset y-axis offset to be used together with the scaling -#' transformation -#' @returns list of data.table and ggplot object -#' @details Creates a scatterplot that visualizes the number of genes and -#' cells that are lost with a specific combination of a gene and cell -#' threshold given an arbitrary cutoff to call a gene expressed. This function -#' can be used to make an informed decision at the filtering step with -#' filterGiotto. -#' @examples -#' g <- GiottoData::loadGiottoMini("visium") -#' -#' filterCombinations(g) -#' @export -filterCombinations <- function( - gobject, - feat_type = NULL, - spat_unit = NULL, - expression_values = c("raw", "normalized", "scaled", "custom"), - expression_thresholds = c(1, 2), - feat_det_in_min_cells = c(5, 50), - min_det_feats_per_cell = c(200, 400), - scale_x_axis = "identity", - x_axis_offset = 0, - scale_y_axis = "identity", - y_axis_offset = 0, - show_plot = TRUE, - return_plot = FALSE, - save_plot = NULL, - save_param = list(), - default_save_name = "filterCombinations") { - # Set feat_type and spat_unit - spat_unit <- set_default_spat_unit( - gobject = gobject, - spat_unit = spat_unit - ) - feat_type <- set_default_feat_type( - gobject = gobject, - spat_unit = spat_unit, - feat_type = feat_type - ) - - - # expression values to be used - values <- match.arg( - expression_values, - unique(c("raw", "normalized", "scaled", "custom", expression_values)) - ) - expr_values <- getExpression( - gobject = gobject, - spat_unit = spat_unit, - feat_type = feat_type, - values = values - )[] - - # feat and cell minimums need to have the same length - if (length(feat_det_in_min_cells) != length(min_det_feats_per_cell)) { - stop("\n feat_det_in_min_cells and min_det_feats_per_cell need to be - the same size \n") - } - - # compute the number of removed feats and cells - result_list <- list() - for (thresh_i in seq_along(expression_thresholds)) { - threshold <- expression_thresholds[thresh_i] - - det_feats_res <- list() - det_cells_res <- list() - for (combn_i in seq_along(feat_det_in_min_cells)) { - min_cells_for_feat <- feat_det_in_min_cells[combn_i] - min_feats_per_cell <- min_det_feats_per_cell[combn_i] - - - # first remove feats - filter_index_feats <- rowSums_flex( - expr_values >= threshold - ) >= min_cells_for_feat - removed_feats <- length(filter_index_feats[ - filter_index_feats == FALSE - ]) - det_cells_res[[combn_i]] <- removed_feats - - # then remove cells - filter_index_cells <- colSums_flex(expr_values[ - filter_index_feats, - ] >= threshold) >= min_feats_per_cell - removed_cells <- length(filter_index_cells[ - filter_index_cells == FALSE - ]) - det_feats_res[[combn_i]] <- removed_cells - } - - temp_dt <- data.table::data.table( - "threshold" = threshold, - removed_feats = unlist(det_cells_res), - removed_cells = unlist(det_feats_res) - ) - - result_list[[thresh_i]] <- temp_dt - } - - result_DT <- do.call("rbind", result_list) - - # data.table variables - feat_detected_in_min_cells <- min_detected_feats_per_cell <- - combination <- NULL - - result_DT[["feat_detected_in_min_cells"]] <- feat_det_in_min_cells - result_DT[["min_detected_feats_per_cell"]] <- min_det_feats_per_cell - result_DT[["combination"]] <- paste0( - result_DT$feat_detected_in_min_cells, "-", - result_DT$min_detected_feats_per_cell - ) - - result_DT <- result_DT[, .( - threshold, - feat_detected_in_min_cells, - min_detected_feats_per_cell, - combination, - removed_feats, - removed_cells - )] - - maximum_x_value <- max(result_DT[["removed_cells"]], na.rm = TRUE) - maximum_y_value <- max(result_DT[["removed_feats"]], na.rm = TRUE) - - pl <- ggplot2::ggplot() - pl <- pl + ggplot2::theme_classic() - pl <- pl + ggplot2::geom_line(data = result_DT, aes( - x = removed_cells + x_axis_offset, - y = removed_feats + y_axis_offset, - group = as.factor(threshold) - ), linetype = 2) - pl <- pl + ggplot2::geom_point(data = result_DT, aes( - x = removed_cells + x_axis_offset, - y = removed_feats + y_axis_offset, - color = as.factor(threshold) - )) - pl <- pl + scale_color_discrete( - guide = guide_legend(title = "threshold(s)") - ) - pl <- pl + geom_text_repel(data = result_DT, aes( - x = removed_cells + x_axis_offset, - y = removed_feats + y_axis_offset, - label = combination - )) - pl <- pl + ggplot2::scale_x_continuous( - trans = scale_x_axis, limits = c(0, maximum_x_value) - ) - pl <- pl + ggplot2::scale_y_continuous( - trans = scale_y_axis, limits = c(0, maximum_y_value) - ) - pl <- pl + ggplot2::labs( - x = "number of removed cells", y = "number of removed feats" - ) - - - return(plot_output_handler( - gobject = gobject, - plot_object = pl, - save_plot = save_plot, - return_plot = return_plot, - show_plot = show_plot, - default_save_name = default_save_name, - save_param = save_param, - else_return = list(results = result_DT, ggplot = pl) - )) -} - - -#' @title filterGiotto -#' @name filterGiotto -#' @description filter Giotto object based on expression threshold -#' @param gobject giotto object -#' @param spat_unit character. spatial unit. If more than one is provided then -#' the first will be filtered, the filtering results will be applied across the -#' other spat_units provided -#' @param feat_type character. feature type. If more than one is provided then -#' the first will be filtered, the filtering results will be applied across the -#' other feat_types provided. -#' @param expression_values expression values to use -#' @param expression_threshold threshold to consider a gene expressed -#' @param feat_det_in_min_cells minimum # of cells that need to express a -#' feature -#' @param min_det_feats_per_cell minimum # of features that need to be detected -#' in a cell -#' @param all_spat_units deprecated. Use spat_unit_fsub = ":all:" -#' @param all_feat_types deprecated. Use feat_type_ssub = ":all:" -#' @param spat_unit_fsub character vector. (default = ':all:') limit features -#' to remove results to selected spat_units -#' @param feat_type_ssub character vector. (default = ':all:') limit cells to -#' remove results to selected feat_types -#' @param poly_info polygon information to use -#' @param tag_cells tag filtered cells in metadata vs. remove cells -#' @param tag_cell_name column name for tagged cells in metadata -#' @param tag_feats tag features in metadata vs. remove features -#' @param tag_feats_name column name for tagged features in metadata -#' @param verbose verbose -#' -#' @returns giotto object -#' @details The function \code{\link{filterCombinations}} can be used to -#' explore the effect of different parameter values. -#' Please note that this function filters data in a predefined order, features, -#' then cells. -#' After filtering in this order, certain features may be left over in the -#' metadata with a corresponding number of cells which is less than that of -#' the threshold value of cells, -#' feat_det_in_min_cells. This behavior is explained in detail here: -#' \url{https://github.com/drieslab/Giotto/issues/500#issuecomment-1396083446} -#' @examples -#' g <- GiottoData::loadGiottoMini("visium") -#' -#' filterGiotto(g) -#' @export -filterGiotto <- function( - gobject, - spat_unit = NULL, - feat_type = NULL, - expression_values = c("raw", "normalized", "scaled", "custom"), - expression_threshold = 1, - feat_det_in_min_cells = 100, - min_det_feats_per_cell = 100, - spat_unit_fsub = ":all:", - feat_type_ssub = ":all:", - all_spat_units = NULL, - all_feat_types = NULL, - poly_info = NULL, - tag_cells = FALSE, - tag_cell_name = "tag", - tag_feats = FALSE, - tag_feats_name = "tag", - verbose = TRUE) { - # data.table vars - cell_ID <- feat_ID <- NULL - - # handle deprecations - if (!is.null(all_spat_units)) { - if (all_spat_units) { - spat_unit_fsub <- ":all:" - } else { - spat_unit_fsub <- spat_unit - } - - warning(wrap_txt( - 'filterGiotto: - all_spat_units param is deprecated. - Please use spat_unit_fsub = \":all:\" instead. (this is the default)' - )) - } - if (!is.null(all_feat_types)) { - if (all_feat_types) { - feat_type_ssub <- ":all:" - } else { - feat_type_ssub <- feat_type - } - - warning(wrap_txt( - 'filterGiotto: all_feat_types param is deprecated. - Please use feat_type_ssub = \":all:\" instead. - (this is the default)' - )) - } - - - # Set feat_type and spat_unit - spat_unit <- set_default_spat_unit( - gobject = gobject, - spat_unit = spat_unit - ) - feat_type <- set_default_feat_type( - gobject = gobject, - spat_unit = spat_unit, - feat_type = feat_type - ) - # set poly_info - if (is.null(poly_info)) { - poly_info <- spat_unit - } - - if (verbose && length(spat_unit) > 1L) { - wrap_msg( - "More than one spat_unit provided.\n", - paste0("[", spat_unit[[1L]], "]"), - "filtering results will be applied across spat_units:", spat_unit - ) - } - if (verbose && length(feat_type) > 1L) { - wrap_msg( - "More than one feat_type provided.\n", - paste0("[", feat_type[[1L]], "]"), - "filtering results will be applied across spat_units:", feat_type - ) - } - - - # expression values to be used - values <- match.arg( - expression_values, - unique(c("raw", "normalized", "scaled", "custom", expression_values)) - ) - - # get expression values to perform filtering on - # Only the first spat_unit and feat_type provided are filtered. - # IF there are additional spat_units and feat_types provided, then the - # filtering - # results from this round will be applied to the other provided spat_units - # and feat_types as well. - expr_values <- getExpression( - gobject = gobject, - spat_unit = spat_unit[[1L]], - feat_type = feat_type[[1L]], - values = values, - output = "matrix" - ) - - # approach: - # 1. first remove genes that are not frequently detected - # 2. then remove cells that do not have sufficient detected genes - - ## filter features - filter_index_feats <- rowSums_flex( - expr_values >= expression_threshold - ) >= feat_det_in_min_cells - selected_feat_ids <- names(filter_index_feats[filter_index_feats == TRUE]) - - - - ## filter cells - filter_index_cells <- colSums_flex(expr_values[ - filter_index_feats, - ] >= expression_threshold) >= min_det_feats_per_cell - selected_cell_ids <- names(filter_index_cells[filter_index_cells == TRUE]) - - - - # update cell metadata - if (isTRUE(tag_cells)) { - cell_meta <- getCellMetadata(gobject = gobject, copy_obj = TRUE) - cell_meta[][, c(tag_cell_name) := ifelse( - cell_ID %in% selected_cell_ids, 0, 1 - )] - gobject <- setCellMetadata( - gobject = gobject, x = cell_meta, initialize = FALSE - ) - - # set selected cells back to all cells - selected_cell_ids <- names(filter_index_cells) - } - - if (isTRUE(tag_feats)) { - feat_meta <- getFeatureMetadata(gobject = gobject, copy_obj = TRUE) - feat_meta[][, c(tag_feats_name) := ifelse( - feat_ID %in% selected_feat_ids, 0, 1 - )] - gobject <- setFeatureMetadata( - gobject = gobject, x = feat_meta, initialize = FALSE - ) - - # set selected feats back to all feats - selected_feat_ids <- names(filter_index_feats) - } - - - - # update feature metadata - newGiottoObject <- subsetGiotto( - gobject = gobject, - feat_type = feat_type, - spat_unit = spat_unit, - cell_ids = selected_cell_ids, - feat_ids = selected_feat_ids, - spat_unit_fsub = spat_unit_fsub, - feat_type_ssub = feat_type_ssub, - poly_info = poly_info, - verbose = verbose - ) - - ## print output ## - removed_feats <- length(filter_index_feats[filter_index_feats == FALSE]) - total_feats <- length(filter_index_feats) - - removed_cells <- length(filter_index_cells[filter_index_cells == FALSE]) - total_cells <- length(filter_index_cells) - - if (isTRUE(verbose)) { - cat("\n") - cat("Feature type: ", feat_type, "\n") - - if (isTRUE(tag_cells)) { - cat( - "Number of cells tagged: ", removed_cells, " out of ", - total_cells, "\n" - ) - } else { - cat( - "Number of cells removed: ", removed_cells, " out of ", - total_cells, "\n" - ) - } - - if (isTRUE(tag_feats)) { - cat( - "Number of feats tagged: ", removed_feats, " out of ", - total_feats, "\n" - ) - } else { - cat( - "Number of feats removed: ", removed_feats, " out of ", - total_feats, "\n" - ) - } - } - - - ## update parameters used ## - - # Do not update downstream of processGiotto - # Parameters will be updated within processGiotto - try( - { - upstream_func <- sys.call(-2) - fname <- as.character(upstream_func[[1]]) - if (fname == "processGiotto") { - return(newGiottoObject) - } - }, - silent = TRUE - ) - - - # If this function call is not downstream of processGiotto, update normally - newGiottoObject <- update_giotto_params( - newGiottoObject, - description = "_filter" - ) - - return(newGiottoObject) - - -} - - - - -### normalization #### - -#' @title compute_dbMatrix -#' @description saves dbMatrix to db if global option is set -#' @details -#' Set \code{options(giotto.dbmatrix_compute = FALSE)} if saving dbMatrix -#' after each step of normalization workflow is not desired. -#' @keywords internal -.compute_dbMatrix <- function(dbMatrix, name, verbose = TRUE) { - # input validation - if(!inherits(dbMatrix, 'dbMatrix')) { - stop('dbMatrix must be of class dbMatrix') - } - - if(!is.character(name)) { - stop('name must be a character') - } - - # TODO: update with dbData generic - con = dbMatrix:::get_con(dbMatrix) - - # overwrite table by default - if(name %in% DBI::dbListTables(con)) { - DBI::dbRemoveTable(con, name) - } - - if(verbose){ - msg <- glue::glue('Computing {name} expression matrix on disk...') - cat(msg) - } - - dbMatrix[] |> - dplyr::compute(temporary=F, name = name) - - # TODO: update below with proper setters from dbMatrix - dbMatrix[] <- dplyr::tbl(con, name) # reassign to computed mat - dbMatrix@name <- name - - if(verbose) cat('done \n') - - return(dbMatrix) -} - -#' @title RNA standard normalization -#' @name .rna_standard_normalization -#' @description standard function for RNA normalization -#' @returns giotto object -#' @keywords internal -.rna_standard_normalization <- function( - gobject, - raw_expr, - feat_type, - spat_unit, - library_size_norm = TRUE, - scalefactor = 6e3, - log_norm = TRUE, - log_offset = 1, - logbase = 2, - scale_feats = TRUE, - scale_cells = TRUE, - scale_order = c("first_feats", "first_cells"), - verbose = TRUE) { - # check feature type compatibility - if (!feat_type %in% c("rna", "RNA")) { - warning("Caution: Standard normalization was developed for RNA data \n") - } - - # evaluate provenance before modifying raw_expr in case h5_file exists - if (isS4(raw_expr)) { - provenance <- raw_expr@provenance - } else { - provenance <- NULL - } - - - feat_names <- rownames(raw_expr[]) - col_names <- colnames(raw_expr[]) - - ## 1. library size normalize - if (library_size_norm == TRUE) { - norm_expr <- .lib_norm_giotto( - mymatrix = raw_expr[], - scalefactor = scalefactor - ) - } else { - norm_expr <- raw_expr[] - } - - ## 2. lognormalize - if (log_norm == TRUE) { - norm_expr <- .log_norm_giotto( - mymatrix = norm_expr, - base = logbase, - offset = log_offset - ) - } - - ## 3. scale - if (scale_feats == TRUE & scale_cells == TRUE) { - scale_order <- match.arg( - arg = scale_order, choices = c("first_feats", "first_cells") - ) - - if (scale_order == "first_feats") { - if (isTRUE(verbose)) { - wrap_msg("\n first scale feats and then cells \n") - } - - norm_scaled_expr <- t_flex(standardise_flex( - x = t_flex(norm_expr), center = TRUE, scale = TRUE - )) - norm_scaled_expr <- standardise_flex( - x = norm_scaled_expr, center = TRUE, scale = TRUE - ) - } else if (scale_order == "first_cells") { - if (isTRUE(verbose)) { - wrap_msg("\n first scale cells and then feats \n") - } - - norm_scaled_expr <- standardise_flex( - x = norm_expr, center = TRUE, scale = TRUE - ) - norm_scaled_expr <- t_flex(standardise_flex( - x = t_flex(norm_scaled_expr), center = TRUE, scale = TRUE - )) - } else { - stop("\n scale order must be given \n") - } - } else if (scale_feats == TRUE) { - norm_scaled_expr <- t_flex(standardise_flex( - x = t_flex(norm_expr), center = TRUE, scale = TRUE - )) - } else if (scale_cells == TRUE) { - norm_scaled_expr <- standardise_flex( - x = norm_expr, center = TRUE, scale = TRUE - ) - } else { - norm_scaled_expr <- NULL - } - - - ## 4. add cell and gene names back - if (!is.null(norm_expr)) { - rownames(norm_expr) <- feat_names - colnames(norm_expr) <- col_names - } - if (!is.null(norm_scaled_expr)) { - rownames(norm_scaled_expr) <- feat_names - colnames(norm_scaled_expr) <- col_names - } - - ## 5. create and set exprObj - # Save dbMatrix to db - compute_mat <- getOption("giotto.dbmatrix_compute", default = FALSE) - if(compute_mat && !is.null(norm_expr)){ - norm_expr <- .compute_dbMatrix( - dbMatrix = norm_expr, - name = 'normalized', - verbose = verbose - ) - } - - norm_expr <- create_expr_obj( - name = "normalized", - exprMat = norm_expr, - spat_unit = spat_unit, - feat_type = feat_type, - provenance = provenance, - misc = NULL - ) - - # Save dbMatrix to db - if(compute_mat && !is.null(norm_scaled_expr)){ - norm_scaled_expr = .compute_dbMatrix( - dbMatrix = norm_scaled_expr, - name = 'scaled', - verbose = verbose - ) - } - - norm_scaled_expr <- create_expr_obj( - name = "scaled", - exprMat = norm_scaled_expr, - spat_unit = spat_unit, - feat_type = feat_type, - provenance = provenance, - misc = NULL - ) - - ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### - gobject <- set_expression_values( - gobject = gobject, - values = norm_expr - ) - - gobject <- set_expression_values( - gobject = gobject, - values = norm_scaled_expr - ) - ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### - - ## 6. return Giotto object - return(gobject) -} - - - -#' @title RNA osmfish normalization -#' @name .rna_osmfish_normalization -#' @description function for RNA normalization according to osmFISH paper -#' @returns giotto object -#' @keywords internal -.rna_osmfish_normalization <- function( - gobject, - raw_expr, - feat_type, - spat_unit, - name = "custom", - verbose = TRUE) { - # check feature type compatibility - if (!feat_type %in% c("rna", "RNA")) { - warning("Caution: osmFISH normalization was developed for RNA in situ - data \n") - } - - # 1. normalize per gene with scale-factor equal to number of genes - norm_feats <- (raw_expr[] / rowSums_flex(raw_expr[])) * nrow(raw_expr[]) - # 2. normalize per cells with scale-factor equal to number of cells - norm_feats_cells <- t_flex((t_flex(norm_feats) / - colSums_flex(norm_feats)) * ncol(raw_expr[])) - - # return results to Giotto object - if (verbose == TRUE) { - message( - "\n osmFISH-like normalized data will be returned to the", - name, "Giotto slot \n" - ) - } - - norm_feats_cells <- create_expr_obj( - name = name, - exprMat = norm_feats_cells, - spat_unit = spat_unit, - feat_type = feat_type, - provenance = raw_expr@provenance - ) - - ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### - gobject <- set_expression_values( - gobject = gobject, - values = norm_feats_cells - ) - ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### - - - return(gobject) -} - - -#' @title RNA pearson residuals normalization -#' @name .rna_pears_resid_normalization -#' @description function for RNA normalization according to Lause/Kobak et al -#' paper -#' Adapted from https://gist.github.com/hypercompetent/51a3c428745e1c06d826d76c3671797c#file-pearson_residuals-r -#' @returns giotto object -#' @keywords internal -.rna_pears_resid_normalization <- function( - gobject, - raw_expr, - feat_type, - spat_unit, - theta = 100, - name = "scaled", - verbose = TRUE) { - # print message with information # - if (verbose) { - message("using 'Lause/Kobak' method to normalize count matrix If used in - published research, please cite: - Jan Lause, Philipp Berens, Dmitry Kobak (2020). - 'Analytic Pearson residuals for normalization of single-cell RNA-seq UMI - data' ") - } - - - # check feature type compatibility - if (!feat_type %in% c("rna", "RNA")) { - warning("Caution: pearson residual normalization was developed for RNA - count normalization \n") - } - - if (methods::is(raw_expr[], "HDF5Matrix")) { - counts_sum0 <- methods::as(matrix( - MatrixGenerics::colSums2(raw_expr[]), - nrow = 1 - ), "HDF5Matrix") - counts_sum1 <- methods::as(matrix( - MatrixGenerics::rowSums2(raw_expr[]), - ncol = 1 - ), "HDF5Matrix") - counts_sum <- sum(raw_expr[]) - - # get residuals - mu <- (counts_sum1 %*% counts_sum0) / counts_sum - z <- (raw_expr[] - mu) / sqrt(mu + mu^2 / theta) - - # clip to sqrt(n) - n <- ncol(raw_expr[]) - z[z > sqrt(n)] <- sqrt(n) - z[z < -sqrt(n)] <- -sqrt(n) - } else { - counts_sum0 <- methods::as(matrix(Matrix::colSums( - raw_expr[] - ), nrow = 1), "dgCMatrix") - counts_sum1 <- methods::as(matrix(Matrix::rowSums( - raw_expr[] - ), ncol = 1), "dgCMatrix") - counts_sum <- sum(raw_expr[]) - - # get residuals - mu <- (counts_sum1 %*% counts_sum0) / counts_sum - z <- (raw_expr[] - mu) / sqrt(mu + mu^2 / theta) - - # clip to sqrt(n) - n <- ncol(raw_expr[]) - z[z > sqrt(n)] <- sqrt(n) - z[z < -sqrt(n)] <- -sqrt(n) - } - - # return results to Giotto object - if (verbose == TRUE) { - message( - "\n Pearson residual normalized data will be returned to the ", - name, " Giotto slot \n" - ) - } - - z <- create_expr_obj( - name = name, - exprMat = z, - spat_unit = spat_unit, - feat_type = feat_type, - provenance = raw_expr@provenance - ) - - ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### - gobject <- set_expression_values( - gobject = gobject, - values = z - ) - ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### - - return(gobject) -} - - - - -#' @title normalizeGiotto -#' @name normalizeGiotto -#' @description fast normalize and/or scale expresion values of Giotto object -#' @param gobject giotto object -#' @param spat_unit spatial unit -#' @param feat_type feature type -#' @param expression_values expression values to use -#' @param norm_methods normalization method to use -#' @param library_size_norm normalize cells by library size -#' @param scalefactor scale factor to use after library size normalization -#' @param log_norm transform values to log-scale -#' @param log_offset offset value to add to expression matrix, default = 1 -#' @param logbase log base to use to log normalize expression values -#' @param scale_feats z-score genes over all cells -#' @param scale_genes deprecated, use scale_feats -#' @param scale_cells z-score cells over all genes -#' @param scale_order order to scale feats and cells -#' @param theta theta parameter for the pearson residual normalization step -#' @param update_slot slot or name to use for the results from osmFISH and -#' pearson residual normalization -#' @param verbose be verbose -#' @returns giotto object -#' @details Currently there are two 'methods' to normalize your raw counts data. -#' -#' A. The standard method follows the standard protocol which can be adjusted -#' using the provided parameters and follows the following order: \cr -#' \itemize{ -#' \item{1. Data normalization for total library size and scaling by a custom scale-factor.} -#' \item{2. Log transformation of data.} -#' \item{3. Z-scoring of data by genes and/or cells.} -#' } -#' B. The normalization method as provided by the osmFISH paper is also implemented: \cr -#' \itemize{ -#' \item{1. First normalize genes, for each gene divide the counts by the total gene count and -#' multiply by the total number of genes.} -#' \item{2. Next normalize cells, for each cell divide the normalized gene counts by the total -#' counts per cell and multiply by the total number of cells.} -#' } -#' C. The normalization method as provided by Lause/Kobak et al is also implemented: \cr -#' \itemize{ -#' \item{1. First calculate expected values based on Pearson correlations.} -#' \item{2. Next calculate z-scores based on observed and expected values.} -#' } -#' By default the latter two results will be saved in the Giotto slot for -#' scaled expression, this can be changed by changing the update_slot parameters -#' @examples -#' g <- GiottoData::loadGiottoMini("visium") -#' -#' normalizeGiotto(g) -#' @export -normalizeGiotto <- function( - gobject, - spat_unit = NULL, - feat_type = NULL, - expression_values = "raw", - norm_methods = c("standard", "pearson_resid", "osmFISH"), - library_size_norm = TRUE, - scalefactor = 6e3, - log_norm = TRUE, - log_offset = 1, - logbase = 2, - scale_feats = TRUE, - scale_genes = NULL, - scale_cells = TRUE, - scale_order = c("first_feats", "first_cells"), - theta = 100, - update_slot = "scaled", - verbose = TRUE) { - ## deprecated arguments - if (!is.null(scale_genes)) { - scale_feats <- scale_genes - warning("scale_genes is deprecated, use scale_feats in the future \n") - } - - # Set feat_type and spat_unit - spat_unit <- set_default_spat_unit( - gobject = gobject, - spat_unit = spat_unit - ) - feat_type <- set_default_feat_type( - gobject = gobject, - spat_unit = spat_unit, - feat_type = feat_type - ) - - ## default is to start from raw data - values <- match.arg(expression_values, unique(c("raw", expression_values))) - raw_expr <- getExpression( - gobject = gobject, - spat_unit = spat_unit, - feat_type = feat_type, - values = values, - output = "exprObj" - ) - - norm_methods <- match.arg( - arg = norm_methods, choices = c("standard", "pearson_resid", "osmFISH") - ) - - # normalization according to standard methods - if (norm_methods == "standard") { - gobject <- .rna_standard_normalization( - gobject = gobject, - raw_expr = raw_expr, - feat_type = feat_type, - spat_unit = spat_unit, - library_size_norm = library_size_norm, - scalefactor = scalefactor, - log_norm = log_norm, - log_offset = log_offset, - logbase = logbase, - scale_feats = scale_feats, - scale_cells = scale_cells, - scale_order = scale_order, - verbose = verbose - ) - } else if (norm_methods == "osmFISH") { - gobject <- .rna_osmfish_normalization( - gobject = gobject, - raw_expr = raw_expr, - feat_type = feat_type, - spat_unit = spat_unit, - name = update_slot, - verbose = verbose - ) - } else if (norm_methods == "pearson_resid") { - gobject <- .rna_pears_resid_normalization( - gobject = gobject, - raw_expr = raw_expr, - feat_type = feat_type, - spat_unit = spat_unit, - theta = theta, - name = update_slot, - verbose = verbose - ) - } - - ## update parameters used ## - - # Do not update downstream of processGiotto - # Parameters will be updated within processGiotto - try( - { - upstream_func <- sys.call(-2) - fname <- as.character(upstream_func[[1]]) - if (fname == "processGiotto") { - return(gobject) - } - }, - silent = TRUE - ) - - - # If this function call is not downstream of processGiotto, update normally - gobject <- update_giotto_params(gobject, description = "_normalize") - - return(gobject) -} - #' @title Adjust expression values @@ -1297,6 +41,8 @@ adjustGiottoMatrix <- function( provided.") } + package_check("limma") + # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -1389,10 +135,7 @@ adjustGiottoMatrix <- function( ) ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### - gobject <- set_expression_values( - gobject = gobject, - values = adjusted_matrix - ) + gobject <- setGiotto(gobject, adjusted_matrix) ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## update parameters used ## @@ -1454,14 +197,14 @@ processGiotto <- function( adjust_params = list(), verbose = TRUE) { # filter Giotto - if (verbose == TRUE) message("1. start filter step") + vmsg(.v = verbose, "1. start filter step") if (!inherits(filter_params, "list")) { stop("filter_params need to be a list of parameters for filterGiotto") } gobject <- do.call("filterGiotto", c(gobject = gobject, filter_params)) # normalize Giotto - if (verbose == TRUE) message("2. start normalization step") + vmsg(.v = verbose, "2. start normalization step") if (!inherits(norm_params, "list")) { stop("norm_params need to be a list of parameters for normalizeGiotto") } @@ -1476,7 +219,7 @@ processGiotto <- function( gobject <- do.call("addStatistics", c(gobject = gobject, stat_params)) # adjust Giotto, if applicable - if (!is.null(adjust_params)) { + if (length(adjust_params) > 0L) { if (verbose == TRUE) message("4. start adjusted matrix step") if (!inherits(adjust_params, "list")) { stop("adjust_params need to be a list of parameters for @@ -1621,10 +364,7 @@ addFeatStatistics <- function( "mean_expr_det" ) := NULL] ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### - gobject <- set_feature_metadata(gobject, - metadata = feat_metadata, - verbose = FALSE - ) + gobject <- setGiotto(gobject, feat_metadata, verbose = FALSE) ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### } @@ -1781,10 +521,7 @@ addCellStatistics <- function( ) cell_metadata[][, c("nr_feats", "perc_feats", "total_expr") := NULL] ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### - gobject <- set_cell_metadata(gobject, - metadata = cell_metadata, - verbose = FALSE - ) + gobject <- setGiotto(gobject, cell_metadata, verbose = FALSE) ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### } @@ -2092,3 +829,21 @@ findNetworkNeighbors <- function( return(nb_annot) } + + +# internals #### + + + +#' @title Mean expression detected test +#' @param mymatrix matrix of expression info +#' @param detection_threshold detection threshold. Defaults to 1 count. +#' @returns numeric +#' @keywords internal +#' @noRd +.mean_expr_det_test <- function(mymatrix, detection_threshold = 1) { + unlist(apply(X = mymatrix, MARGIN = 1, FUN = function(x) { + detected_x <- x[x > detection_threshold] + mean(detected_x) + })) +} diff --git a/R/cell_segmentation.R b/R/cell_segmentation.R index 8090cd603..b9b9b1b4c 100644 --- a/R/cell_segmentation.R +++ b/R/cell_segmentation.R @@ -16,12 +16,11 @@ #' of the tile: sx (start x), ex (end x), sy, and ey. #' #' @export -doCellSegmentation <- function( - raster_img, - folder_path, - reduce_resolution = 4, - overlapping_pixels = 50, - python_path = NULL) { +doCellSegmentation <- function(raster_img, + folder_path, + reduce_resolution = 4, + overlapping_pixels = 50, + python_path = NULL) { package_check("deepcell", repository = "pip") package_check("PIL", repository = "pip") @@ -93,16 +92,23 @@ doCellSegmentation <- function( #' @title perform cellpose segmentation #' @description #' -#' perform the Giotto Wrapper of cellpose segmentation. This is for a model inference to generate segmentation mask file from input image. +#' perform the Giotto Wrapper of cellpose segmentation. This is for a model +#' inference to generate segmentation mask file from input image. #' main parameters needed #' @name doCellposeSegmentation -#' @param image_dir character, required. Provide a path to a gray scale or a three channel image. -#' @param python_path python environment with cellpose installed. default = "giotto_cellpose". +#' @param image_dir character, required. Provide a path to a gray scale or a +#' three channel image. +#' @param python_path python environment with cellpose installed. +#' default = "giotto_cellpose". #' @param mask_output required. Provide a path to the output mask file. #' @param channel_1 channel number for cytoplasm, default to 0(gray scale) #' @param channel_2 channel number for Nuclei, default to 0(gray scale) -#' @param model_name Name of the model to run inference. Default to 'cyto3', if you want to run cutomized trained model, place your model file in ~/.cellpose/models and specify your model name. -#' @param batch_size Cellpose Parameter, Number of 224x224 patches to run simultaneously on the GPU. Can make smaller or bigger depending on GPU memory usage. Defaults to 8. +#' @param model_name Name of the model to run inference. Default to 'cyto3', +#' if you want to run cutomized trained model, place your model file in +#' ~/.cellpose/models and specify your model name. +#' @param batch_size Cellpose Parameter, Number of 224x224 patches to run +#' simultaneously on the GPU. Can make smaller or bigger depending on GPU +#' memory usage. Defaults to 8. #' @param resample Cellpose Parameter #' @param channel_axis Cellpose Parameter #' @param z_axis Cellpose Parameter @@ -124,97 +130,102 @@ doCellSegmentation <- function( #' @param interp Cellpose Parameter #' @param compute_masks Cellpose Parameter #' @param progress Cellpose Parameter -#' @returns No return variable, as this will write directly to output path provided. +#' @returns No return variable, as this will write directly to output path +#' provided. #' @examples #' # example code -#' doCellposeSegmentation(image_dir = input_image, mask_output = output, channel_1 = 2, channel_2 = 1, model_name = 'cyto3',batch_size=4) +#' doCellposeSegmentation(image_dir = input_image, +#' mask_output = output, channel_1 = 2, +#' channel_2 = 1, model_name = "cyto3", batch_size = 4) #' @export -doCellposeSegmentation <- function(python_env = 'giotto_cellpose', - image_dir, - mask_output, - channel_1 = 0, - channel_2 = 0, - model_name = 'cyto3', - batch_size=8, - resample=TRUE, - channel_axis=NULL, - z_axis=NULL, - normalize=TRUE, - invert=FALSE, - rescale=NULL, - diameter=NULL, - flow_threshold=0.4, - cellprob_threshold=0.0, - do_3D=FALSE, - anisotropy=NULL, - stitch_threshold=0.0, - min_size=15, - niter=NULL, - augment=FALSE, - tile=TRUE, - tile_overlap=0.1, - bsize=224, - interp=TRUE, - compute_masks=TRUE, - progress=NULL, - verbose = TRUE,...){ - - - #Check Input arguments - model_name <- match.arg(model_name, unique(c('cyto3', 'cyto2', 'cyto','nuclei', model_name))) +doCellposeSegmentation <- function(python_env = "giotto_cellpose", + image_dir, + mask_output, + channel_1 = 0, + channel_2 = 0, + model_name = "cyto3", + batch_size = 8, + resample = TRUE, + channel_axis = NULL, + z_axis = NULL, + normalize = TRUE, + invert = FALSE, + rescale = NULL, + diameter = NULL, + flow_threshold = 0.4, + cellprob_threshold = 0.0, + do_3D = FALSE, + anisotropy = NULL, + stitch_threshold = 0.0, + min_size = 15, + niter = NULL, + augment = FALSE, + tile = TRUE, + tile_overlap = 0.1, + bsize = 224, + interp = TRUE, + compute_masks = TRUE, + progress = NULL, + verbose = TRUE, ...) { + # Check Input arguments + model_name <- match.arg( + model_name, unique(c("cyto3", "cyto2", "cyto", "nuclei", model_name))) ## Load required python libraries GiottoClass::set_giotto_python_path(python_env) - GiottoUtils::package_check('cellpose',repository = 'pip') + GiottoUtils::package_check("cellpose", repository = "pip") cellpose <- reticulate::import("cellpose") np <- reticulate::import("numpy") cv2 <- reticulate::import("cv2") torch <- reticulate::import("torch") - message('successfully loaded giotto environment with cellpose.') + message("successfully loaded giotto environment with cellpose.") - if (!(torch$cuda$is_available())){ - warning('GPU is not available for this session, inference may be slow.\n ') + if (!(torch$cuda$is_available())) { + warning("GPU is not available for this session, inference may be slow.") } - GiottoUtils::vmsg(.v = verbose, .is_debug = F,'Loading Image from ',image_dir) + GiottoUtils::vmsg(.v = verbose, .is_debug = FALSE, "Loading Image from ", + image_dir) img <- cellpose$io$imread(image_dir) - GiottoUtils::vmsg(.v = verbose, .is_debug = F,'Loading Model...') + GiottoUtils::vmsg(.v = verbose, .is_debug = FALSE, "Loading Model...") - model_to_seg <- cellpose$models$Cellpose(model_type=model_name,gpu = torch$cuda$is_available()) - channel_to_seg <- as.integer(c(channel_1,channel_2)) + model_to_seg <- cellpose$models$Cellpose(model_type = model_name, + gpu = torch$cuda$is_available()) + channel_to_seg <- as.integer(c(channel_1, channel_2)) - GiottoUtils::vmsg(.v = verbose, .is_debug = F,'Segmenting Image...') + GiottoUtils::vmsg(.v = verbose, .is_debug = FALSE, "Segmenting Image...") segmentation <- model_to_seg$eval result <- segmentation(img, - diameter=diameter, - channels=channel_to_seg, - batch_size = batch_size, - resample=resample, - channel_axis=channel_axis, - z_axis=z_axis, - normalize=normalize, - invert=invert, - rescale=rescale, - flow_threshold=flow_threshold, - cellprob_threshold=cellprob_threshold, - do_3D=do_3D, - anisotropy=anisotropy, - stitch_threshold=stitch_threshold, - min_size=min_size, - niter=niter, - augment=augment, - tile=tile, - tile_overlap=tile_overlap, - bsize=bsize, - interp=interp, - compute_masks=compute_masks, - progress=progress) + diameter = diameter, + channels = channel_to_seg, + batch_size = batch_size, + resample = resample, + channel_axis = channel_axis, + z_axis = z_axis, + normalize = normalize, + invert = invert, + rescale = rescale, + flow_threshold = flow_threshold, + cellprob_threshold = cellprob_threshold, + do_3D = do_3D, + anisotropy = anisotropy, + stitch_threshold = stitch_threshold, + min_size = min_size, + niter = niter, + augment = augment, + tile = tile, + tile_overlap = tile_overlap, + bsize = bsize, + interp = interp, + compute_masks = compute_masks, + progress = progress + ) masks <- result[[1]] - GiottoUtils::vmsg(.v = verbose, .is_debug = F,'Segmentation finished... Saving mask file...') - GiottoUtils::package_check('terra') - rast = terra::rast(masks) - terra::writeRaster(rast, mask_output,overwrite=TRUE) + GiottoUtils::vmsg(.v = verbose, .is_debug = FALSE, + "Segmentation finished... Saving mask file...") + GiottoUtils::package_check("terra") + rast <- terra::rast(masks) + terra::writeRaster(rast, mask_output, overwrite = TRUE) } - diff --git a/R/clustering.R b/R/clustering.R index 65c36846c..fe858c0f4 100644 --- a/R/clustering.R +++ b/R/clustering.R @@ -47,25 +47,24 @@ #' #' doLeidenCluster(g) #' @export -doLeidenCluster <- function( - gobject, - spat_unit = NULL, - feat_type = NULL, - name = "leiden_clus", - nn_network_to_use = "sNN", - network_name = "sNN.pca", - python_path = NULL, - resolution = 1, - weight_col = "weight", - partition_type = c( - "RBConfigurationVertexPartition", - "ModularityVertexPartition" - ), - init_membership = NULL, - n_iterations = 1000, - return_gobject = TRUE, - set_seed = TRUE, - seed_number = 1234) { +doLeidenCluster <- function(gobject, + spat_unit = NULL, + feat_type = NULL, + name = "leiden_clus", + nn_network_to_use = "sNN", + network_name = "sNN.pca", + python_path = NULL, + resolution = 1, + weight_col = "weight", + partition_type = c( + "RBConfigurationVertexPartition", + "ModularityVertexPartition" + ), + init_membership = NULL, + n_iterations = 1000, + return_gobject = TRUE, + set_seed = TRUE, + seed_number = 1234) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -204,7 +203,8 @@ doLeidenCluster <- function( gobject = gobject, spat_unit = spat_unit, feat_type = feat_type, - new_metadata = ident_clusters_DT[, c("cell_ID", name), with = FALSE], + new_metadata = ident_clusters_DT[ + , c("cell_ID", name), with = FALSE], by_column = TRUE, column_cell_ID = "cell_ID" ) @@ -253,7 +253,8 @@ doLeidenCluster <- function( #' as long as they can fit in memory. See \code{\link[igraph]{cluster_leiden}} #' for more information. #' -#' Set \emph{weights = NULL} to use the vertices weights associated with the igraph network. +#' Set \emph{weights = NULL} to use the vertices weights associated with the +#' igraph network. #' Set \emph{weights = NA} if you don't want to use vertices weights #' #' @examples @@ -261,23 +262,22 @@ doLeidenCluster <- function( #' #' doLeidenClusterIgraph(g) #' @export -doLeidenClusterIgraph <- function( - gobject, - spat_unit = NULL, - feat_type = NULL, - name = "leiden_clus", - nn_network_to_use = "sNN", - network_name = "sNN.pca", - objective_function = c("modularity", "CPM"), - weights = NULL, - resolution_parameter = 1, - beta = 0.01, - initial_membership = NULL, - n_iterations = 1000, - return_gobject = TRUE, - set_seed = TRUE, - seed_number = 1234, - ...) { +doLeidenClusterIgraph <- function(gobject, + spat_unit = NULL, + feat_type = NULL, + name = "leiden_clus", + nn_network_to_use = "sNN", + network_name = "sNN.pca", + objective_function = c("modularity", "CPM"), + weights = NULL, + resolution_parameter = 1, + beta = 0.01, + initial_membership = NULL, + n_iterations = 1000, + return_gobject = TRUE, + set_seed = TRUE, + seed_number = 1234, + ...) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -369,7 +369,8 @@ doLeidenClusterIgraph <- function( gobject = gobject, spat_unit = spat_unit, feat_type = feat_type, - new_metadata = ident_clusters_DT[, c("cell_ID", name), with = FALSE], + new_metadata = ident_clusters_DT[ + , c("cell_ID", name), with = FALSE], by_column = TRUE, column_cell_ID = "cell_ID" ) @@ -426,18 +427,17 @@ doLeidenClusterIgraph <- function( #' show_plot = FALSE, save_plot = FALSE #' ) #' @export -doGiottoClustree <- function( - gobject, - res_vector = NULL, - res_seq = NULL, - return_gobject = FALSE, - show_plot = NULL, - save_plot = NULL, - return_plot = NULL, - save_param = list(), - default_save_name = "clustree", - verbose = TRUE, - ...) { +doGiottoClustree <- function(gobject, + res_vector = NULL, + res_seq = NULL, + return_gobject = FALSE, + show_plot = NULL, + save_plot = NULL, + return_plot = NULL, + save_param = list(), + default_save_name = "clustree", + verbose = TRUE, + ...) { package_check(pkg_name = "clustree", repository = "CRAN") ## setting resolutions to use if (is.null(res_vector)) { @@ -510,21 +510,20 @@ doGiottoClustree <- function( #' Set \emph{weight_col = NULL} to give equal weight (=1) to each edge. #' @md #' @keywords internal -.doLouvainCluster_community <- function( - gobject, - spat_unit = NULL, - feat_type = NULL, - name = "louvain_clus", - nn_network_to_use = "sNN", - network_name = "sNN.pca", - python_path = NULL, - resolution = 1, - weight_col = NULL, - louv_random = FALSE, - return_gobject = TRUE, - set_seed = FALSE, - seed_number = 1234, - ...) { +.doLouvainCluster_community <- function(gobject, + spat_unit = NULL, + feat_type = NULL, + name = "louvain_clus", + nn_network_to_use = "sNN", + network_name = "sNN.pca", + python_path = NULL, + resolution = 1, + weight_col = NULL, + louv_random = FALSE, + return_gobject = TRUE, + set_seed = FALSE, + seed_number = 1234, + ...) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -656,7 +655,8 @@ doGiottoClustree <- function( gobject = gobject, spat_unit = spat_unit, feat_type = feat_type, - new_metadata = ident_clusters_DT[, c("cell_ID", name), with = FALSE], + new_metadata = ident_clusters_DT[ + , c("cell_ID", name), with = FALSE], by_column = TRUE, column_cell_ID = "cell_ID" ) @@ -713,18 +713,17 @@ doGiottoClustree <- function( #' in R for more information. #' #' @keywords internal -.doLouvainCluster_multinet <- function( - gobject, - spat_unit = NULL, - feat_type = NULL, - name = "louvain_clus", - nn_network_to_use = "sNN", - network_name = "sNN.pca", - gamma = 1, - omega = 1, - return_gobject = TRUE, - set_seed = FALSE, - seed_number = 1234) { +.doLouvainCluster_multinet <- function(gobject, + spat_unit = NULL, + feat_type = NULL, + name = "louvain_clus", + nn_network_to_use = "sNN", + network_name = "sNN.pca", + gamma = 1, + omega = 1, + return_gobject = TRUE, + set_seed = FALSE, + seed_number = 1234) { if ("multinet" %in% rownames(installed.packages()) == FALSE) { stop( "package 'multinet' is not yet installed \n", @@ -823,7 +822,8 @@ doGiottoClustree <- function( gobject = gobject, spat_unit = spat_unit, feat_type = feat_type, - new_metadata = ident_clusters_DT[, c("cell_ID", name), with = FALSE], + new_metadata = ident_clusters_DT[ + , c("cell_ID", name), with = FALSE], by_column = TRUE, column_cell_ID = "cell_ID" ) @@ -893,24 +893,23 @@ doGiottoClustree <- function( #' #' doLouvainCluster(g) #' @export -doLouvainCluster <- function( - gobject, - spat_unit = NULL, - feat_type = NULL, - version = c("community", "multinet"), - name = "louvain_clus", - nn_network_to_use = "sNN", - network_name = "sNN.pca", - python_path = NULL, - resolution = 1, - weight_col = NULL, - gamma = 1, - omega = 1, - louv_random = FALSE, - return_gobject = TRUE, - set_seed = FALSE, - seed_number = 1234, - ...) { +doLouvainCluster <- function(gobject, + spat_unit = NULL, + feat_type = NULL, + version = c("community", "multinet"), + name = "louvain_clus", + nn_network_to_use = "sNN", + network_name = "sNN.pca", + python_path = NULL, + resolution = 1, + weight_col = NULL, + gamma = 1, + omega = 1, + louv_random = FALSE, + return_gobject = TRUE, + set_seed = FALSE, + seed_number = 1234, + ...) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -993,17 +992,16 @@ doLouvainCluster <- function( #' g <- doRandomWalkCluster(g) #' pDataDT(g) #' @export -doRandomWalkCluster <- function( - gobject, - name = "random_walk_clus", - nn_network_to_use = "sNN", - network_name = "sNN.pca", - walk_steps = 4, - walk_clusters = 10, - walk_weights = NA, - return_gobject = TRUE, - set_seed = FALSE, - seed_number = 1234) { +doRandomWalkCluster <- function(gobject, + name = "random_walk_clus", + nn_network_to_use = "sNN", + network_name = "sNN.pca", + walk_steps = 4, + walk_clusters = 10, + walk_weights = NA, + return_gobject = TRUE, + set_seed = FALSE, + seed_number = 1234) { ## get cell IDs ## cell_ID_vec <- gobject@cell_ID @@ -1090,18 +1088,17 @@ doRandomWalkCluster <- function( #' #' doSNNCluster(g) #' @export -doSNNCluster <- function( - gobject, - name = "sNN_clus", - nn_network_to_use = "kNN", - network_name = "kNN.pca", - k = 20, - eps = 4, - minPts = 16, - borderPoints = TRUE, - return_gobject = TRUE, - set_seed = FALSE, - seed_number = 1234) { +doSNNCluster <- function(gobject, + name = "sNN_clus", + nn_network_to_use = "kNN", + network_name = "kNN.pca", + k = 20, + eps = 4, + minPts = 16, + borderPoints = TRUE, + return_gobject = TRUE, + set_seed = FALSE, + seed_number = 1234) { ## get cell IDs ## cell_ID_vec <- gobject@cell_ID @@ -1222,39 +1219,41 @@ doSNNCluster <- function( #' @param return_gobject boolean: return giotto object (default = TRUE) #' @param set_seed set seed (default = TRUE) #' @param seed_number number for seed -#' @returns if return_gobject = TRUE: giotto object with new clusters appended to cell metadata -#' @details The default settings will use dimension reduction results as input. -#' Set dim_reduction_to_use = NULL if you want to directly use expression values as input. -#' By providing a feature vector to feats_to_use you can subset the expression matrix. +#' @returns if return_gobject = TRUE: giotto object with new clusters appended +#' to cell metadata +#' @details The default settings will use dimension reduction results as input. +#' Set dim_reduction_to_use = NULL if you want to directly use expression +#' values as input. +#' By providing a feature vector to feats_to_use you can subset the expression +#' matrix. #' @seealso \code{\link[stats]{kmeans}} #' @examples #' g <- GiottoData::loadGiottoMini("visium") #' #' doKmeans(g) #' @export -doKmeans <- function( - gobject, - feat_type = NULL, - spat_unit = NULL, - expression_values = c("normalized", "scaled", "custom"), - feats_to_use = NULL, - dim_reduction_to_use = c("cells", "pca", "umap", "tsne"), - dim_reduction_name = "pca", - dimensions_to_use = 1:10, - distance_method = c( - "original", "pearson", "spearman", - "euclidean", "maximum", "manhattan", - "canberra", "binary", "minkowski" - ), - centers = 10, - iter_max = 100, - nstart = 1000, - algorithm = "Hartigan-Wong", - name = "kmeans", - return_gobject = TRUE, - set_seed = TRUE, - seed_number = 1234) { - +doKmeans <- function(gobject, + feat_type = NULL, + spat_unit = NULL, + expression_values = c("normalized", "scaled", "custom"), + feats_to_use = NULL, + dim_reduction_to_use = c("cells", "pca", "umap", "tsne"), + dim_reduction_name = "pca", + dimensions_to_use = 1:10, + distance_method = c( + "original", "pearson", "spearman", + "euclidean", "maximum", "manhattan", + "canberra", "binary", "minkowski" + ), + centers = 10, + iter_max = 100, + nstart = 1000, + algorithm = "Hartigan-Wong", + name = "kmeans", + return_gobject = TRUE, + set_seed = TRUE, + seed_number = 1234) { + # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -1266,58 +1265,59 @@ doKmeans <- function( feat_type = feat_type ) - distance_method <- match.arg(distance_method, choices = c( "original", "pearson", "spearman", "euclidean", "maximum", "manhattan", "canberra", "binary", "minkowski" )) - + dim_reduction_to_use <- match.arg( + dim_reduction_to_use, c("cells", "pca", "umap", "tsne") + ) + expression_values <- match.arg( + expression_values, c("normalized", "scaled", "custom") + ) ## using dimension reduction ## - if(!is.null(dim_reduction_to_use)) { - - # use only available dimensions if dimensions < dimensions_to_use - dim_coord <- get_dimReduction( - gobject = gobject, - spat_unit = spat_unit, - feat_type = feat_type, - reduction = "cells", - reduction_method = dim_reduction_to_use, - name = dim_reduction_name, - output = "dimObj" - ) - - dimensions_to_use <- dimensions_to_use[ - dimensions_to_use %in% seq_len(ncol(dim_coord[])) - ] - matrix_to_use <- dim_coord[][, dimensions_to_use] - - } else { - - ## using original matrix ## - expr_values <- getExpression( - gobject = gobject, - spat_unit = spat_unit, - feat_type = feat_type, - values = expression_values, - output = "exprObj" - ) - - # subset expression matrix - if (!is.null(feats_to_use)) { - expr_values[] <- expr_values[][ - rownames(expr_values[]) %in% feats_to_use, + if (dim_reduction_to_use != "cells" && !is.null(dim_reduction_to_use)) { + # use only available dimensions if dimensions < dimensions_to_use + dim_coord <- getDimReduction( + gobject = gobject, + spat_unit = spat_unit, + feat_type = feat_type, + reduction = "cells", + reduction_method = dim_reduction_to_use, + name = dim_reduction_name, + output = "dimObj" + ) + + dimensions_to_use <- dimensions_to_use[ + dimensions_to_use %in% seq_len(ncol(dim_coord[])) ] - } - - # features as columns - # cells as rows - matrix_to_use <- t_flex(expr_values[]) - - } - - + matrix_to_use <- dim_coord[][, dimensions_to_use] + } else { + vmsg(.is_debug = TRUE, "clustering from expression values") + ## using original matrix ## + expr_values <- getExpression( + gobject = gobject, + spat_unit = spat_unit, + feat_type = feat_type, + values = expression_values, + output = "exprObj" + ) + + # subset expression matrix + if (!is.null(feats_to_use)) { + expr_values[] <- expr_values[][ + rownames(expr_values[]) %in% feats_to_use, + ] + } + + # features as columns + # cells as rows + matrix_to_use <- t_flex(expr_values[]) + } + + ## distance if (distance_method == "original") { celldist <- matrix_to_use @@ -1335,7 +1335,7 @@ doKmeans <- function( ## kmeans clustering # start seed if (isTRUE(set_seed)) { - set.seed(seed = as.integer(seed_number)) + GiottoUtils::local_seed(seed_number) } # start clustering @@ -1347,13 +1347,8 @@ doKmeans <- function( algorithm = algorithm ) - # exit seed - if (isTRUE(set_seed)) { - set.seed(seed = Sys.time()) - } - ident_clusters_DT <- data.table::data.table( - cell_ID = names(kclusters[["cluster"]]), + "cell_ID" = names(kclusters[["cluster"]]), "name" = kclusters[["cluster"]] ) data.table::setnames(ident_clusters_DT, "name", name) @@ -1380,11 +1375,9 @@ doKmeans <- function( cell_metadata[][, eval(name) := NULL] ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### - gobject <- setCellMetadata( - gobject = gobject, - x = cell_metadata, - verbose = FALSE, - initialize = FALSE + gobject <- setGiotto( + gobject, cell_metadata, + verbose = FALSE, initialize = FALSE ) ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### } @@ -1393,7 +1386,8 @@ doKmeans <- function( gobject = gobject, spat_unit = spat_unit, feat_type = feat_type, - new_metadata = ident_clusters_DT[, c("cell_ID", name), with = FALSE], + new_metadata = ident_clusters_DT[ + , c("cell_ID", name), with = FALSE], by_column = TRUE, column_cell_ID = "cell_ID" ) @@ -1439,31 +1433,30 @@ doKmeans <- function( #' #' doHclust(g) #' @export -doHclust <- function( - gobject, - spat_unit = NULL, - feat_type = NULL, - expression_values = c("normalized", "scaled", "custom"), - feats_to_use = NULL, - dim_reduction_to_use = c("cells", "pca", "umap", "tsne"), - dim_reduction_name = "pca", - dimensions_to_use = 1:10, - distance_method = c( - "pearson", "spearman", "original", - "euclidean", "maximum", "manhattan", - "canberra", "binary", "minkowski" - ), - agglomeration_method = c( - "ward.D2", "ward.D", "single", - "complete", "average", "mcquitty", - "median", "centroid" - ), - k = 10, - h = NULL, - name = "hclust", - return_gobject = TRUE, - set_seed = TRUE, - seed_number = 1234) { +doHclust <- function(gobject, + spat_unit = NULL, + feat_type = NULL, + expression_values = c("normalized", "scaled", "custom"), + feats_to_use = NULL, + dim_reduction_to_use = c("cells", "pca", "umap", "tsne"), + dim_reduction_name = "pca", + dimensions_to_use = 1:10, + distance_method = c( + "pearson", "spearman", "original", + "euclidean", "maximum", "manhattan", + "canberra", "binary", "minkowski" + ), + agglomeration_method = c( + "ward.D2", "ward.D", "single", + "complete", "average", "mcquitty", + "median", "centroid" + ), + k = 10, + h = NULL, + name = "hclust", + return_gobject = TRUE, + set_seed = TRUE, + seed_number = 1234) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -1493,11 +1486,13 @@ doHclust <- function( ) ) values <- match.arg(expression_values, c("normalized", "scaled", "custom")) + dim_reduction_to_use <- match.arg( + dim_reduction_to_use, c("cells", "pca", "umap", "tsne") + ) ## using dimension reduction ## if (dim_reduction_to_use != "cells" && !is.null(dim_reduction_to_use)) { - ## TODO: check if reduction exists # use only available dimensions if dimensions < dimensions_to_use dim_coord <- getDimReduction( @@ -1515,6 +1510,7 @@ doHclust <- function( ] matrix_to_use <- dim_coord[, dimensions_to_use] } else { + vmsg(.is_debug = TRUE, "clustering from expression values") ## using original matrix ## expr_values <- getExpression( gobject = gobject, @@ -1604,7 +1600,8 @@ doHclust <- function( gobject = gobject, feat_type = feat_type, spat_unit = spat_unit, - new_metadata = ident_clusters_DT[, c("cell_ID", name), with = FALSE], + new_metadata = ident_clusters_DT[ + , c("cell_ID", name), with = FALSE], by_column = TRUE, column_cell_ID = "cell_ID" ) @@ -1680,62 +1677,61 @@ doHclust <- function( #' #' clusterCells(g) #' @export -clusterCells <- function( - gobject, - cluster_method = c( - "leiden", - "louvain_community", "louvain_multinet", - "randomwalk", "sNNclust", - "kmeans", "hierarchical" - ), - name = "cluster_name", - nn_network_to_use = "sNN", - network_name = "sNN.pca", - pyth_leid_resolution = 1, - pyth_leid_weight_col = "weight", - pyth_leid_part_type = c( - "RBConfigurationVertexPartition", - "ModularityVertexPartition" - ), - pyth_leid_init_memb = NULL, - pyth_leid_iterations = 1000, - pyth_louv_resolution = 1, - pyth_louv_weight_col = NULL, - python_louv_random = FALSE, - python_path = NULL, - louvain_gamma = 1, - louvain_omega = 1, - walk_steps = 4, - walk_clusters = 10, - walk_weights = NA, - sNNclust_k = 20, - sNNclust_eps = 4, - sNNclust_minPts = 16, - borderPoints = TRUE, - expression_values = c("normalized", "scaled", "custom"), - feats_to_use = NULL, - dim_reduction_to_use = c("cells", "pca", "umap", "tsne"), - dim_reduction_name = "pca", - dimensions_to_use = 1:10, - distance_method = c( - "original", "pearson", "spearman", - "euclidean", "maximum", "manhattan", - "canberra", "binary", "minkowski" - ), - km_centers = 10, - km_iter_max = 100, - km_nstart = 1000, - km_algorithm = "Hartigan-Wong", - hc_agglomeration_method = c( - "ward.D2", "ward.D", "single", - "complete", "average", "mcquitty", - "median", "centroid" - ), - hc_k = 10, - hc_h = NULL, - return_gobject = TRUE, - set_seed = TRUE, - seed_number = 1234) { +clusterCells <- function(gobject, + cluster_method = c( + "leiden", + "louvain_community", "louvain_multinet", + "randomwalk", "sNNclust", + "kmeans", "hierarchical" + ), + name = "cluster_name", + nn_network_to_use = "sNN", + network_name = "sNN.pca", + pyth_leid_resolution = 1, + pyth_leid_weight_col = "weight", + pyth_leid_part_type = c( + "RBConfigurationVertexPartition", + "ModularityVertexPartition" + ), + pyth_leid_init_memb = NULL, + pyth_leid_iterations = 1000, + pyth_louv_resolution = 1, + pyth_louv_weight_col = NULL, + python_louv_random = FALSE, + python_path = NULL, + louvain_gamma = 1, + louvain_omega = 1, + walk_steps = 4, + walk_clusters = 10, + walk_weights = NA, + sNNclust_k = 20, + sNNclust_eps = 4, + sNNclust_minPts = 16, + borderPoints = TRUE, + expression_values = c("normalized", "scaled", "custom"), + feats_to_use = NULL, + dim_reduction_to_use = c("cells", "pca", "umap", "tsne"), + dim_reduction_name = "pca", + dimensions_to_use = 1:10, + distance_method = c( + "original", "pearson", "spearman", + "euclidean", "maximum", "manhattan", + "canberra", "binary", "minkowski" + ), + km_centers = 10, + km_iter_max = 100, + km_nstart = 1000, + km_algorithm = "Hartigan-Wong", + hc_agglomeration_method = c( + "ward.D2", "ward.D", "single", + "complete", "average", "mcquitty", + "median", "centroid" + ), + hc_k = 10, + hc_h = NULL, + return_gobject = TRUE, + set_seed = TRUE, + seed_number = 1234) { ## select cluster method cluster_method <- match.arg( arg = cluster_method, @@ -1941,40 +1937,41 @@ NULL #' @rdname subClusterCells #' @export -subClusterCells <- function(gobject, - name = "sub_clus", - cluster_method = c( - "leiden", - "louvain_community", - "louvain_multinet" - ), - cluster_column = NULL, - selected_clusters = NULL, - hvg_param = deprecated(), - hvf_param = list( - reverse_log_scale = TRUE, difference_in_cov = 1, - expression_values = "normalized" - ), - hvg_min_perc_cells = deprecated(), - hvf_min_perc_cells = 5, - hvg_mean_expr_det = deprecated(), - hvf_mean_expr_det = 1, - use_all_genes_as_hvg = deprecated(), - use_all_feats_as_hvf = FALSE, - min_nr_of_hvg = deprecated(), - min_nr_of_hvf = 5, - pca_param = list(expression_values = "normalized", scale_unit = TRUE), - nn_param = list(dimensions_to_use = 1:20), - k_neighbors = 10, - resolution = 1, - n_iterations = 1000, - gamma = 1, - omega = 1, - python_path = NULL, - nn_network_to_use = "sNN", - network_name = "sNN.pca", - return_gobject = TRUE, - verbose = TRUE) { +subClusterCells <- function( + gobject, + name = "sub_clus", + cluster_method = c( + "leiden", + "louvain_community", + "louvain_multinet" + ), + cluster_column = NULL, + selected_clusters = NULL, + hvg_param = deprecated(), + hvf_param = list( + reverse_log_scale = TRUE, difference_in_cov = 1, + expression_values = "normalized" + ), + hvg_min_perc_cells = deprecated(), + hvf_min_perc_cells = 5, + hvg_mean_expr_det = deprecated(), + hvf_mean_expr_det = 1, + use_all_genes_as_hvg = deprecated(), + use_all_feats_as_hvf = FALSE, + min_nr_of_hvg = deprecated(), + min_nr_of_hvf = 5, + pca_param = list(expression_values = "normalized", scale_unit = TRUE), + nn_param = list(dimensions_to_use = 1:20), + k_neighbors = 10, + resolution = 1, + n_iterations = 1000, + gamma = 1, + omega = 1, + python_path = NULL, + nn_network_to_use = "sNN", + network_name = "sNN.pca", + return_gobject = TRUE, + verbose = TRUE) { ## select cluster method cluster_method <- match.arg(arg = cluster_method, choices = c( "leiden", @@ -1993,7 +1990,8 @@ subClusterCells <- function(gobject, hvf_param <- .dep_param(hvg_param, hvf_param) hvf_min_perc_cells <- .dep_param(hvg_min_perc_cells, hvf_min_perc_cells) hvf_mean_expr_det <- .dep_param(hvg_mean_expr_det, hvf_mean_expr_det) - use_all_feats_as_hvf <- .dep_param(use_all_genes_as_hvg, use_all_feats_as_hvf) + use_all_feats_as_hvf <- .dep_param(use_all_genes_as_hvg, + use_all_feats_as_hvf) min_nr_of_hvf <- .dep_param(min_nr_of_hvg, min_nr_of_hvf) # gather common args @@ -2060,36 +2058,35 @@ subClusterCells <- function(gobject, #' @param toplevel do not use #' @param feat_type feature type #' @export -doLeidenSubCluster <- function( - gobject, - feat_type = NULL, - name = "sub_leiden_clus", - cluster_column = NULL, - selected_clusters = NULL, - hvf_param = list( - reverse_log_scale = TRUE, difference_in_cov = 1, - expression_values = "normalized" - ), - hvg_param = deprecated(), - hvf_min_perc_cells = 5, - hvg_min_perc_cells = deprecated(), - hvf_mean_expr_det = 1, - hvg_mean_expr_det = deprecated(), - use_all_feats_as_hvf = FALSE, - use_all_genes_as_hvg = deprecated(), - min_nr_of_hvf = 5, - min_nr_of_hvg = deprecated(), - pca_param = list(expression_values = "normalized", scale_unit = TRUE), - nn_param = list(dimensions_to_use = 1:20), - k_neighbors = 10, - resolution = 0.5, - n_iterations = 500, - python_path = NULL, - nn_network_to_use = "sNN", - network_name = "sNN.pca", - return_gobject = TRUE, - toplevel = 2, - verbose = TRUE) { +doLeidenSubCluster <- function(gobject, + feat_type = NULL, + name = "sub_leiden_clus", + cluster_column = NULL, + selected_clusters = NULL, + hvf_param = list( + reverse_log_scale = TRUE, difference_in_cov = 1, + expression_values = "normalized" + ), + hvg_param = deprecated(), + hvf_min_perc_cells = 5, + hvg_min_perc_cells = deprecated(), + hvf_mean_expr_det = 1, + hvg_mean_expr_det = deprecated(), + use_all_feats_as_hvf = FALSE, + use_all_genes_as_hvg = deprecated(), + min_nr_of_hvf = 5, + min_nr_of_hvg = deprecated(), + pca_param = list(expression_values = "normalized", scale_unit = TRUE), + nn_param = list(dimensions_to_use = 1:20), + k_neighbors = 10, + resolution = 0.5, + n_iterations = 500, + python_path = NULL, + nn_network_to_use = "sNN", + network_name = "sNN.pca", + return_gobject = TRUE, + toplevel = 2, + verbose = TRUE) { # specify feat_type if (is.null(feat_type)) { feat_type <- gobject@expression_feat[[1]] @@ -2106,7 +2103,8 @@ doLeidenSubCluster <- function( hvf_param <- .dep_param(hvg_param, hvf_param) hvf_min_perc_cells <- .dep_param(hvg_min_perc_cells, hvf_min_perc_cells) hvf_mean_expr_det <- .dep_param(hvg_mean_expr_det, hvf_mean_expr_det) - use_all_feats_as_hvf <- .dep_param(use_all_genes_as_hvg, use_all_feats_as_hvf) + use_all_feats_as_hvf <- .dep_param(use_all_genes_as_hvg, + use_all_feats_as_hvf) min_nr_of_hvf <- .dep_param(min_nr_of_hvg, min_nr_of_hvf) @@ -2262,29 +2260,28 @@ doLeidenSubCluster <- function( # subcluster cells using a NN-network and the Louvain community # detection algorithm -.doLouvainSubCluster_community <- function( - gobject, - name = "sub_louvain_comm_clus", - cluster_column = NULL, - selected_clusters = NULL, - hvf_param = list( - reverse_log_scale = TRUE, - difference_in_cov = 1, - expression_values = "normalized" - ), - hvf_min_perc_cells = 5, - hvf_mean_expr_det = 1, - use_all_feats_as_hvf = FALSE, - min_nr_of_hvf = 5, - pca_param = list(expression_values = "normalized", scale_unit = TRUE), - nn_param = list(dimensions_to_use = 1:20), - k_neighbors = 10, - resolution = 0.5, - python_path = NULL, - nn_network_to_use = "sNN", - network_name = "sNN.pca", - return_gobject = TRUE, - verbose = TRUE) { +.doLouvainSubCluster_community <- function(gobject, + name = "sub_louvain_comm_clus", + cluster_column = NULL, + selected_clusters = NULL, + hvf_param = list( + reverse_log_scale = TRUE, + difference_in_cov = 1, + expression_values = "normalized" + ), + hvf_min_perc_cells = 5, + hvf_mean_expr_det = 1, + use_all_feats_as_hvf = FALSE, + min_nr_of_hvf = 5, + pca_param = list(expression_values = "normalized", scale_unit = TRUE), + nn_param = list(dimensions_to_use = 1:20), + k_neighbors = 10, + resolution = 0.5, + python_path = NULL, + nn_network_to_use = "sNN", + network_name = "sNN.pca", + return_gobject = TRUE, + verbose = TRUE) { iter_list <- list() cell_metadata <- pDataDT(gobject) @@ -2447,28 +2444,27 @@ doLeidenSubCluster <- function( # subcluster cells using a NN-network and the Louvain multinet # detection algorithm -.doLouvainSubCluster_multinet <- function( - gobject, - name = "sub_louvain_mult_clus", - cluster_column = NULL, - selected_clusters = NULL, - hvf_param = list( - reverse_log_scale = TRUE, difference_in_cov = 1, - expression_values = "normalized" - ), - hvf_min_perc_cells = 5, - hvf_mean_expr_det = 1, - use_all_feats_as_hvf = FALSE, - min_nr_of_hvf = 5, - pca_param = list(expression_values = "normalized", scale_unit = TRUE), - nn_param = list(dimensions_to_use = 1:20), - k_neighbors = 10, - gamma = 1, - omega = 1, - nn_network_to_use = "sNN", - network_name = "sNN.pca", - return_gobject = TRUE, - verbose = TRUE) { +.doLouvainSubCluster_multinet <- function(gobject, + name = "sub_louvain_mult_clus", + cluster_column = NULL, + selected_clusters = NULL, + hvf_param = list( + reverse_log_scale = TRUE, difference_in_cov = 1, + expression_values = "normalized" + ), + hvf_min_perc_cells = 5, + hvf_mean_expr_det = 1, + use_all_feats_as_hvf = FALSE, + min_nr_of_hvf = 5, + pca_param = list(expression_values = "normalized", scale_unit = TRUE), + nn_param = list(dimensions_to_use = 1:20), + k_neighbors = 10, + gamma = 1, + omega = 1, + nn_network_to_use = "sNN", + network_name = "sNN.pca", + return_gobject = TRUE, + verbose = TRUE) { if ("multinet" %in% rownames(installed.packages()) == FALSE) { stop( "package 'multinet' is not yet installed \n", @@ -2639,36 +2635,35 @@ doLeidenSubCluster <- function( #' @param version version of Louvain algorithm to use. One of "community" or #' "multinet", with the default being "community" #' @export -doLouvainSubCluster <- function( - gobject, - name = "sub_louvain_clus", - version = c("community", "multinet"), - cluster_column = NULL, - selected_clusters = NULL, - hvg_param = deprecated(), - hvf_param = list( - reverse_log_scale = TRUE, difference_in_cov = 1, - expression_values = "normalized" - ), - hvg_min_perc_cells = deprecated(), - hvf_min_perc_cells = 5, - hvg_mean_expr_det = deprecated(), - hvf_mean_expr_det = 1, - use_all_genes_as_hvg = deprecated(), - use_all_feats_as_hvf = FALSE, - min_nr_of_hvg = deprecated(), - min_nr_of_hvf = 5, - pca_param = list(expression_values = "normalized", scale_unit = TRUE), - nn_param = list(dimensions_to_use = 1:20), - k_neighbors = 10, - resolution = 0.5, - gamma = 1, - omega = 1, - python_path = NULL, - nn_network_to_use = "sNN", - network_name = "sNN.pca", - return_gobject = TRUE, - verbose = TRUE) { +doLouvainSubCluster <- function(gobject, + name = "sub_louvain_clus", + version = c("community", "multinet"), + cluster_column = NULL, + selected_clusters = NULL, + hvg_param = deprecated(), + hvf_param = list( + reverse_log_scale = TRUE, difference_in_cov = 1, + expression_values = "normalized" + ), + hvg_min_perc_cells = deprecated(), + hvf_min_perc_cells = 5, + hvg_mean_expr_det = deprecated(), + hvf_mean_expr_det = 1, + use_all_genes_as_hvg = deprecated(), + use_all_feats_as_hvf = FALSE, + min_nr_of_hvg = deprecated(), + min_nr_of_hvf = 5, + pca_param = list(expression_values = "normalized", scale_unit = TRUE), + nn_param = list(dimensions_to_use = 1:20), + k_neighbors = 10, + resolution = 0.5, + gamma = 1, + omega = 1, + python_path = NULL, + nn_network_to_use = "sNN", + network_name = "sNN.pca", + return_gobject = TRUE, + verbose = TRUE) { ## louvain clustering version to use version <- match.arg(version, c("community", "multinet")) @@ -2683,7 +2678,8 @@ doLouvainSubCluster <- function( hvf_param <- .dep_param(hvg_param, hvf_param) hvf_min_perc_cells <- .dep_param(hvg_min_perc_cells, hvf_min_perc_cells) hvf_mean_expr_det <- .dep_param(hvg_mean_expr_det, hvf_mean_expr_det) - use_all_feats_as_hvf <- .dep_param(use_all_genes_as_hvg, use_all_feats_as_hvf) + use_all_feats_as_hvf <- .dep_param(use_all_genes_as_hvg, + use_all_feats_as_hvf) min_nr_of_hvf <- .dep_param(min_nr_of_hvg, min_nr_of_hvf) # get common args @@ -2760,13 +2756,12 @@ doLouvainSubCluster <- function( #' #' getClusterSimilarity(g, cluster_column = "leiden_clus") #' @export -getClusterSimilarity <- function( - gobject, - spat_unit = NULL, - feat_type = NULL, - expression_values = c("normalized", "scaled", "custom"), - cluster_column, - cor = c("pearson", "spearman")) { +getClusterSimilarity <- function(gobject, + spat_unit = NULL, + feat_type = NULL, + expression_values = c("normalized", "scaled", "custom"), + cluster_column, + cor = c("pearson", "spearman")) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -2817,7 +2812,7 @@ getClusterSimilarity <- function( # correlation matrix cormatrix <- cor_flex(x = testmatrix, method = cor) - cor_table <- data.table::as.data.table(reshape2::melt(cormatrix)) + cor_table <- melt_matrix(cormatrix) data.table::setnames( cor_table, old = c("Var1", "Var2"), c("group1", "group2") @@ -2885,20 +2880,19 @@ getClusterSimilarity <- function( #' #' mergeClusters(g, cluster_column = "leiden_clus") #' @export -mergeClusters <- function( - gobject, - spat_unit = NULL, - feat_type = NULL, - expression_values = c("normalized", "scaled", "custom"), - cluster_column, - cor = c("pearson", "spearman"), - new_cluster_name = "merged_cluster", - min_cor_score = 0.8, - max_group_size = 20, - force_min_group_size = 10, - max_sim_clusters = 10, - return_gobject = TRUE, - verbose = TRUE) { +mergeClusters <- function(gobject, + spat_unit = NULL, + feat_type = NULL, + expression_values = c("normalized", "scaled", "custom"), + cluster_column, + cor = c("pearson", "spearman"), + new_cluster_name = "merged_cluster", + min_cor_score = 0.8, + max_group_size = 20, + force_min_group_size = 10, + max_sim_clusters = 10, + return_gobject = TRUE, + verbose = TRUE) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -3176,18 +3170,17 @@ mergeClusters <- function( #' #' getDendrogramSplits(g, cluster_column = "leiden_clus") #' @export -getDendrogramSplits <- function( - gobject, - spat_unit = NULL, - feat_type = NULL, - expression_values = c("normalized", "scaled", "custom"), - cluster_column, - cor = c("pearson", "spearman"), - distance = "ward.D", - h = NULL, - h_color = "red", - show_dend = TRUE, - verbose = TRUE) { +getDendrogramSplits <- function(gobject, + spat_unit = NULL, + feat_type = NULL, + expression_values = c("normalized", "scaled", "custom"), + cluster_column, + cor = c("pearson", "spearman"), + distance = "ward.D", + h = NULL, + h_color = "red", + show_dend = TRUE, + verbose = TRUE) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -3235,11 +3228,11 @@ getDendrogramSplits <- function( if (show_dend == TRUE) { # plot dendrogram - graphics::plot(cordend) + plot(cordend) # add horizontal lines if (!is.null(h)) { - graphics::abline(h = h, col = h_color) + abline(h = h, col = h_color) } } @@ -3262,6 +3255,330 @@ getDendrogramSplits <- function( # projection #### + +# * labelTransfer #### + +#' @name labelTransfer +#' @title Transfer labels/annotations between sets of data via similarity +#' voting +#' @description +#' When two sets of data share an embedding space, transfer the labels from +#' one of the sets to the other based on KNN similarity voting in that space. +#' @param x target object +#' @param y source object +#' @param source_cell_ids cell/spatial IDs with the source labels to transfer +#' @param target_cell_ids cell/spatial IDs to transfer the labels to. +#' IDs from `source_cell_ids` are always included as well. +#' @param labels metadata column in source with labels to transfer +#' @param k number of k-neighbors to train a KNN classifier +#' @param name metadata column in target to apply the full set of labels to +#' @param prob output knn probabilities together with label predictions +#' @param reduction reduction on cells or features (default = "cells") +#' @param reduction_method shared reduction method (default = "pca" space) +#' @param reduction_name name of shared reduction space (default name = "pca") +#' @param dimensions_to_use dimensions to use in shared reduction space +#' (default = 1:10) +#' @returns object `x` with new transferred labels added to metadata +#' @inheritDotParams FNN::knn -train -test -cl -k -prob +#' @details +#' This function trains a KNN classifier with [FNN::knn()]. +#' The training data is from object `y` or `source_cell_ids` subset in `x` and +#' uses existing annotations within the cell metadata. +#' Cells without annotation/labels from `x` or `target_cell_ids` subset in `x` +#' will receive predicted labels (and optional probabilities when +#' `prob = TRUE`). +#' +#' **IMPORTANT** This projection assumes that you're using the same dimension +#' reduction space (e.g. PCA) and number of dimensions (e.g. first 10 PCs) to +#' train the KNN classifier as you used to create the initial +#' annotations/labels in the source Giotto object. +#' +#' This function can allow you to work with very big data as you can predict +#' cell labels on a smaller & subsetted Giotto object and then project the cell +#' labels to the remaining cells in the target Giotto object. It can also be +#' used to transfer labels from one set of annotated data to another dataset +#' based on expression similarity after joining and integrating. +#' +#' @examples +#' g <- GiottoData::loadGiottoMini("visium") +#' id_subset <- sample(spatIDs(g), 300) +#' n_pred <- nrow(pDataDT(g)) - 300 +#' +#' # transfer labels from one object to another ################### +#' g_small <- g[, id_subset] +#' # additional steps to get labels to transfer on smaller object... +#' g <- labelTransfer(g, g_small, labels = "leiden_clus") +#' +#' # transfer labels between subsets of a single object ########### +#' g <- labelTransfer(g, +#' label = "leiden_clus", source_cell_ids = id_subset, name = "knn_leiden2" +#' ) +#' @md +NULL + +setGeneric("labelTransfer", + function(x, y, ...) standardGeneric("labelTransfer")) + +#' @rdname labelTransfer +#' @export +setMethod("labelTransfer", signature(x = "giotto", y = "giotto"), function(x, y, + spat_unit = NULL, + feat_type = NULL, + labels, + k = 10, + name = paste0("trnsfr_", labels), + prob = TRUE, + reduction = "cells", + reduction_method = "pca", + reduction_name = "pca", + dimensions_to_use = 1:10, + return_gobject = TRUE, + ...) { + # NSE vars + temp_name <- cell_ID <- temp_name_prob <- NULL + + package_check(pkg_name = "FNN", repository = "CRAN") + spat_unit <- set_default_spat_unit(x, spat_unit = spat_unit) + feat_type <- set_default_feat_type(x, + spat_unit = spat_unit, feat_type = feat_type + ) + + # get data + cx_src <- getCellMetadata(y, + spat_unit = spat_unit, + feat_type = feat_type, + output = "data.table" + ) + cx_tgt <- getCellMetadata(x, + spat_unit = spat_unit, + feat_type = feat_type, + output = "data.table" + ) + dim_coord <- getDimReduction(x, + spat_unit = spat_unit, + feat_type = feat_type, + reduction = reduction, + reduction_method = reduction_method, + name = reduction_name, + output = "matrix" + ) + + # source annotation vector # + # names : cell_ID + # values: label + source_annot_vec <- cx_src[[labels]] + names(source_annot_vec) <- cx_src[["cell_ID"]] + + # create the matrix from the target object that you want to use for the + # kNN classifier + # the matrix should be the same for the source and target objects + # (e.g. same PCA space) + dimensions_to_use <- dimensions_to_use[ + # ensure dims to use exist + dimensions_to_use %in% seq_len(ncol(dim_coord)) + ] + matrix_to_use <- dim_coord[, dimensions_to_use] + + ## create the training and testset from the matrix + + # the training set is those spatial IDs that are in the source + # (w/ labels) AND target giotto object + in_common <- rownames(matrix_to_use) %in% names(source_annot_vec) + train <- matrix_to_use[in_common, ] + train <- train[match(names(source_annot_vec), rownames(train)), ] + + # the test set are the remaining cell_IDs that need a label + test <- matrix_to_use[!in_common, ] + + # make prediction + knnprediction <- FNN::knn( + train = train, + test = test, + cl = source_annot_vec, + k = k, + prob = prob, + ... + ) + + # get prediction results + knnprediction_vec <- as.vector(knnprediction) + names(knnprediction_vec) <- rownames(test) + + # add probability information + if (isTRUE(prob)) { + probs <- attr(knnprediction, "prob") + names(probs) <- rownames(test) + } + + # create annotation vector for all cell IDs (from source and predicted) + all_vec <- c(source_annot_vec, knnprediction_vec) + cx_tgt[, temp_name := all_vec[cell_ID]] + + if (isTRUE(prob)) { + cx_tgt[, temp_name_prob := probs[cell_ID]] + cx_tgt <- cx_tgt[, .(cell_ID, temp_name, temp_name_prob)] + cx_tgt[, temp_name_prob := ifelse( + is.na(temp_name_prob), 1, temp_name_prob + )] + + data.table::setnames(cx_tgt, + old = c("temp_name", "temp_name_prob"), + new = c(name, paste0(name, "_prob")) + ) + } else { + cx_tgt <- cx_tgt[, .(cell_ID, temp_name)] + data.table::setnames(cx_tgt, old = "temp_name", new = name) + } + + + if (return_gobject) { + x <- addCellMetadata(x, + spat_unit = spat_unit, + feat_type = feat_type, + new_metadata = cx_tgt, + by_column = TRUE, + column_cell_ID = "cell_ID" + ) + return(x) + } else { + return(cx_tgt) + } +}) + +#' @rdname labelTransfer +#' @export +setMethod("labelTransfer", signature(x = "giotto", y = "missing"), function(x, + spat_unit = NULL, + feat_type = NULL, + source_cell_ids, + target_cell_ids, + labels, + k = 10, + name = paste0("trnsfr_", labels), + prob = TRUE, + reduction = "cells", + reduction_method = "pca", + reduction_name = "pca", + dimensions_to_use = 1:10, + return_gobject = TRUE, + ...) { + # NSE vars + temp_name <- cell_ID <- temp_name_prob <- NULL + + package_check(pkg_name = "FNN", repository = "CRAN") + spat_unit <- set_default_spat_unit(x, spat_unit = spat_unit) + feat_type <- set_default_feat_type(x, + spat_unit = spat_unit, feat_type = feat_type + ) + + # get data + cx <- getCellMetadata(x, + spat_unit = spat_unit, + feat_type = feat_type, + output = "data.table" + ) + dim_coord <- getDimReduction(x, + spat_unit = spat_unit, + feat_type = feat_type, + reduction = reduction, + reduction_method = reduction_method, + name = reduction_name, + output = "matrix" + ) + + # source annotation vector # + # names : cell_ID + # values: label + source_annot_vec <- cx[[labels]] + names(source_annot_vec) <- cx[["cell_ID"]] + source_annot_vec <- source_annot_vec[source_cell_ids] + + # target cell IDs (if not provided) are everything not in the source cell + # IDs + if (missing(target_cell_ids)) { + sids <- cx[["cell_ID"]] + target_cell_ids <- sids[!sids %in% source_cell_ids] + } + + # create the matrix from the target object that you want to use for the + # kNN classifier + # the matrix should be the same for the source and target objects + # (e.g. same PCA space) + dimensions_to_use <- dimensions_to_use[ + # ensure dims to use exist + dimensions_to_use %in% seq_len(ncol(dim_coord)) + ] + matrix_to_use <- dim_coord[, dimensions_to_use] + + ## create the training and testset from the matrix + + # the training set is those spatial IDs that are in the source + # (w/ labels) AND target giotto object + train <- matrix_to_use[source_cell_ids, ] + train <- train[match(names(source_annot_vec), rownames(train)), ] + + # the test set are the remaining cell_IDs that need a label + test <- matrix_to_use[target_cell_ids, ] + + # make prediction + knnprediction <- FNN::knn( + train = train, + test = test, + cl = source_annot_vec, + k = k, + prob = prob, + ... + ) + + # get prediction results + knnprediction_vec <- as.vector(knnprediction) + names(knnprediction_vec) <- rownames(test) + + # add probability information + if (isTRUE(prob)) { + probs <- attr(knnprediction, "prob") + names(probs) <- rownames(test) + } + + # create annotation vector for all cell IDs (from source and predicted) + all_vec <- c(source_annot_vec, knnprediction_vec) + cx[, temp_name := all_vec[cell_ID]] + + if (isTRUE(prob)) { + cx[, temp_name_prob := probs[cell_ID]] + cx <- cx[, .(cell_ID, temp_name, temp_name_prob)] + cx[, temp_name_prob := ifelse( + is.na(temp_name_prob), 1, temp_name_prob + )] + + data.table::setnames(cx, + old = c("temp_name", "temp_name_prob"), + new = c(name, paste0(name, "_prob")) + ) + } else { + cx <- cx[, .(cell_ID, temp_name)] + data.table::setnames(cx, old = "temp_name", new = name) + } + + + if (return_gobject) { + x <- addCellMetadata(x, + spat_unit = spat_unit, + feat_type = feat_type, + new_metadata = cx, + by_column = TRUE, + column_cell_ID = "cell_ID" + ) + return(x) + } else { + return(cx) + } +}) + + + + + #' @title Projection of cluster labels #' @name doClusterProjection #' @description Use a fast KNN classifier to predict labels from a smaller @@ -3307,28 +3624,32 @@ getDendrogramSplits <- function( #' source_cluster_labels = "leiden_clus" #' ) #' @export -doClusterProjection <- function( - target_gobject, - target_cluster_label_name = "knn_labels", - spat_unit = NULL, - feat_type = NULL, - source_gobject, - source_cluster_labels = NULL, - reduction = "cells", - reduction_method = "pca", - reduction_name = "pca", - dimensions_to_use = 1:10, - knn_k = 10, - prob = FALSE, - algorithm = c( - "kd_tree", - "cover_tree", "brute" - ), - return_gobject = TRUE) { +doClusterProjection <- function(target_gobject, + target_cluster_label_name = "knn_labels", + spat_unit = NULL, + feat_type = NULL, + source_gobject, + source_cluster_labels = NULL, + reduction = "cells", + reduction_method = "pca", + reduction_name = "pca", + dimensions_to_use = 1:10, + knn_k = 10, + prob = FALSE, + algorithm = c( + "kd_tree", + "cover_tree", "brute" + ), + return_gobject = TRUE) { + deprecate_warn( + when = "4.1.2", + what = "doClusterProjection()", + with = "labelTransfer()" + ) + # NSE vars cell_ID <- temp_name_prob <- NULL - # package check for dendextend package_check(pkg_name = "FNN", repository = "CRAN") spat_unit <- set_default_spat_unit( @@ -3351,8 +3672,10 @@ doClusterProjection <- function( source_annot_vec <- cell_meta_source[[source_cluster_labels]] names(source_annot_vec) <- cell_meta_source[["cell_ID"]] - # create the matrix from the target object that you want to use for the kNN classifier - # the matrix should be the same for the source and target objects (e.g. same PCA space) + # create the matrix from the target object that you want to use for the + # kNN classifier + # the matrix should be the same for the source and target objects + # (e.g. same PCA space) dim_obj <- getDimReduction( gobject = target_gobject, spat_unit = spat_unit, diff --git a/R/convenience_cosmx.R b/R/convenience_cosmx.R index 4a107002f..de4c8be65 100644 --- a/R/convenience_cosmx.R +++ b/R/convenience_cosmx.R @@ -1,5 +1,3 @@ - - # CLASS #### @@ -79,7 +77,8 @@ setMethod( plot(y ~ x, data = dat, asp = 1L, type = "n", ...) text(y ~ x, data = dat, labels = dat$fov, cex = cex, ...) - }) + } +) @@ -134,9 +133,8 @@ setMethod( #' force(g) #' } #' @export -importCosMx <- function( - cosmx_dir = NULL, slide = 1, fovs = NULL, micron = FALSE, px2mm = 0.12028 -) { +importCosMx <- function(cosmx_dir = NULL, slide = 1, fovs = NULL, + micron = FALSE, px2mm = 0.12028) { # get params a <- list(Class = "CosmxReader") if (!is.null(cosmx_dir)) { @@ -153,9 +151,8 @@ importCosMx <- function( } # * init #### -setMethod("initialize", signature("CosmxReader"), function( - .Object, cosmx_dir, slide, fovs, micron, px2mm -) { +setMethod("initialize", signature("CosmxReader"), + function(.Object, cosmx_dir, slide, fovs, micron, px2mm) { # provided params (if any) if (!missing(cosmx_dir)) { checkmate::assert_directory_exists(cosmx_dir) @@ -224,12 +221,12 @@ setMethod("initialize", signature("CosmxReader"), function( tx_dt = data.table::fread(tx_path), flip_loc_y = TRUE ) - } - else { + } else { pos <- data.table::data.table() warning(wrap_txt( "NO FOV SHIFTS. - fov_positions_file, tx_file, and metadata_file not auto detected. + fov_positions_file, tx_file, + and metadata_file not auto detected. One of these must be provided to infer FOV shifts.\n Alternatively, directly supply a data.table with: fov(int), x(numeric), y(numeric) in px scaling to `$offsets`" @@ -242,18 +239,16 @@ setMethod("initialize", signature("CosmxReader"), function( # transcripts load call - tx_fun <- function( - path = tx_path, - feat_type = c("rna", "negprobes"), - split_keyword = list("NegPrb"), - dropcols = c( - "x_local_px", - "y_local_px", - "cell_ID", - "cell" - ), - verbose = NULL - ) { + tx_fun <- function(path = tx_path, + feat_type = c("rna", "negprobes"), + split_keyword = list("NegPrb"), + dropcols = c( + "x_local_px", + "y_local_px", + "cell_ID", + "cell" + ), + verbose = NULL) { .cosmx_transcript( path = path, fovs = .Object@fovs %none% NULL, @@ -271,16 +266,14 @@ setMethod("initialize", signature("CosmxReader"), function( # mask load call - mask_fun <- function( - path = mask_dir, - # VERTICAL FLIP + NO VERTICAL SHIFT - flip_vertical = TRUE, - flip_horizontal = FALSE, - shift_vertical_step = FALSE, - shift_horizontal_step = FALSE, - remove_background_polygon = TRUE, - verbose = NULL - ) { + mask_fun <- function(path = mask_dir, + # VERTICAL FLIP + NO VERTICAL SHIFT + flip_vertical = TRUE, + flip_horizontal = FALSE, + shift_vertical_step = FALSE, + shift_horizontal_step = FALSE, + remove_background_polygon = TRUE, + verbose = NULL) { .cosmx_poly( path = path, fovs = .Object@fovs %none% NULL, @@ -299,11 +292,9 @@ setMethod("initialize", signature("CosmxReader"), function( # expression load call - expr_fun <- function( - path = expr_path, - feat_type = c("rna", "negprobes"), - split_keyword = list("NegPrb") - ) { + expr_fun <- function(path = expr_path, + feat_type = c("rna", "negprobes"), + split_keyword = list("NegPrb")) { .cosmx_expression( path = path, fovs = .Object@fovs %none% NULL, @@ -315,15 +306,13 @@ setMethod("initialize", signature("CosmxReader"), function( # images load call - img_fun <- function( - path = composite_img_dir, - img_type = "composite", - img_name_fmt = paste0(img_type, "_fov%03d"), - negative_y = TRUE, - flip_vertical = FALSE, - flip_horizontal = FALSE, - verbose = NULL - ) { + img_fun <- function(path = composite_img_dir, + img_type = "composite", + img_name_fmt = paste0(img_type, "_fov%03d"), + negative_y = TRUE, + flip_vertical = FALSE, + flip_horizontal = FALSE, + verbose = NULL) { .cosmx_image( path = path, fovs = .Object@fovs %none% NULL, @@ -342,17 +331,15 @@ setMethod("initialize", signature("CosmxReader"), function( # meta load call - meta_fun <- function( - path = meta_path, - dropcols = c( - "CenterX_local_px", - "CenterY_local_px", - "CenterX_global_px", - "CenterY_global_px", - "cell_id" - ), - verbose = NULL - ) { + meta_fun <- function(path = meta_path, + dropcols = c( + "CenterX_local_px", + "CenterY_local_px", + "CenterX_global_px", + "CenterY_global_px", + "cell_id" + ), + verbose = NULL) { .cosmx_cellmeta( path = path, fovs = .Object@fovs %none% NULL, @@ -365,30 +352,29 @@ setMethod("initialize", signature("CosmxReader"), function( # build gobject call - gobject_fun <- function( - transcript_path = tx_path, - cell_labels_dir = mask_dir, - expression_path = expr_path, - metadata_path = meta_path, - feat_type = c("rna", "negprobes"), - split_keyword = list( - "NegPrb" - ), - load_images = list( - composite = "composite", - overlay = "overlay" - ), - load_expression = FALSE, - load_cellmeta = FALSE, - instructions = NULL - ) { + gobject_fun <- function(transcript_path = tx_path, + cell_labels_dir = mask_dir, + expression_path = expr_path, + metadata_path = meta_path, + feat_type = c("rna", "negprobes"), + split_keyword = list( + "NegPrb" + ), + load_images = list( + composite = "composite", + overlay = "overlay" + ), + load_expression = FALSE, + load_cellmeta = FALSE, + instructions = NULL) { load_expression <- as.logical(load_expression) load_cellmeta <- as.logical(load_cellmeta) if (!is.null(load_images)) { checkmate::assert_list(load_images) if (is.null(names(load_images))) { - stop("Images directories provided to 'load_images' must be named") + stop("Images directories provided to + 'load_images' must be named") } } @@ -459,7 +445,7 @@ setMethod("initialize", signature("CosmxReader"), function( path = metadata_path ) - cx[] <- cx[][cell_ID %in% allowed_ids,] + cx[] <- cx[][cell_ID %in% allowed_ids, ] g <- setGiotto(g, cx) } @@ -479,7 +465,9 @@ setMethod("initialize", signature("CosmxReader"), function( #' @export setMethod("$", signature("CosmxReader"), function(x, name) { basic_info <- c("cosmx_dir", "slide", "fovs", "micron", "px2mm", "offsets") - if (name %in% basic_info) return(methods::slot(x, name)) + if (name %in% basic_info) { + return(methods::slot(x, name)) + } return(x@calls[[name]]) }) @@ -497,8 +485,10 @@ setMethod("$<-", signature("CosmxReader"), function(x, name, value) { return(initialize(x)) } - stop(sprintf("Only items in '%s' can be set", - paste0(basic_info, collapse = "', '"))) + stop(sprintf( + "Only items in '%s' can be set", + paste0(basic_info, collapse = "', '") + )) }) #' @export @@ -516,23 +506,20 @@ setMethod("$<-", signature("CosmxReader"), function(x, name, value) { # MODULAR #### -.cosmx_transcript <- function( - path, - fovs = NULL, - feat_type = c("rna", "negprobes"), - split_keyword = list("NegPrb"), - dropcols = c( - "x_local_px", - "y_local_px", - "cell_ID", - "cell" - ), - micron = FALSE, - px2mm = 0.12028, - cores = determine_cores(), - verbose = NULL -) { - +.cosmx_transcript <- function(path, + fovs = NULL, + feat_type = c("rna", "negprobes"), + split_keyword = list("NegPrb"), + dropcols = c( + "x_local_px", + "y_local_px", + "cell_ID", + "cell" + ), + micron = FALSE, + px2mm = 0.12028, + cores = determine_cores(), + verbose = NULL) { if (missing(path)) { stop(wrap_txt( "No path to tx file provided or auto-detected" @@ -547,7 +534,7 @@ setMethod("$<-", signature("CosmxReader"), function(x, name, value) { tx <- data.table::fread(input = path, nThread = cores, drop = dropcols) if (!is.null(fovs)) { # subset to only needed FOVs - tx <- tx[fov %in% as.numeric(fovs),] + tx <- tx[fov %in% as.numeric(fovs), ] } # micron scaling if desired @@ -605,15 +592,13 @@ setMethod("$<-", signature("CosmxReader"), function(x, name, value) { #' When the variance is higher than 0.001, the function is re-run with the #' opposite `flip_loc_y` value. #' @keywords internal -.cosmx_infer_fov_shifts <- function( - tx_dt, meta_dt, flip_loc_y = TRUE, navg = 100L -) { +.cosmx_infer_fov_shifts <- function(tx_dt, meta_dt, + flip_loc_y = TRUE, navg = 100L) { fov <- NULL # NSE vars if (!missing(tx_dt)) { tx_head <- tx_dt[, head(.SD, navg), by = fov] x <- tx_head[, mean(x_global_px - x_local_px), by = fov] if (flip_loc_y) { - # test if flip is needed # Usual yshift variance / fov expected when correct is 0 to 1e-22 # if var is too high for any fov, swap `flip_loc_y` value @@ -633,11 +618,11 @@ setMethod("$<-", signature("CosmxReader"), function(x, name, value) { meta_head <- meta_dt[, head(.SD, navg), by = fov] x <- meta_head[, mean(CenterX_global_px - CenterX_local_px), by = fov] if (flip_loc_y) { - # test if flip is needed # Usual yshift variance / fov expected when correct is 0 to 1e-22 # if var is too high for any fov, swap `flip_loc_y` value - y <- meta_head[, var(CenterY_global_px + CenterY_local_px), by = fov] + y <- meta_head[ + , var(CenterY_global_px + CenterY_local_px), by = fov] if (y[, any(V1 > 0.001)]) { return(.cosmx_infer_fov_shifts( meta_dt = meta_dt, flip_loc_y = FALSE, navg = navg @@ -646,10 +631,12 @@ setMethod("$<-", signature("CosmxReader"), function(x, name, value) { # use +y if local y values are flipped y <- meta_head[, mean(CenterY_global_px + CenterY_local_px), - by = fov] + by = fov + ] } else { y <- meta_head[, mean(CenterY_global_px - CenterY_local_px), - by = fov] + by = fov + ] } } else { stop("One of tx_dt or meta_dt must be provided\n") @@ -661,9 +648,7 @@ setMethod("$<-", signature("CosmxReader"), function(x, name, value) { return(res) } -.cosmx_imgname_fovparser <- function( - path -) { +.cosmx_imgname_fovparser <- function(path) { im_names <- list.files(path) fovs <- as.numeric(sub(".*F(\\d+)\\..*", "\\1", im_names)) if (any(is.na(fovs))) { @@ -675,22 +660,20 @@ setMethod("$<-", signature("CosmxReader"), function(x, name, value) { return(fovs) } -.cosmx_poly <- function( - path, - slide = 1, - fovs = NULL, - name = "cell", - # VERTICAL FLIP + NO SHIFTS - flip_vertical = TRUE, - flip_horizontal = FALSE, - shift_vertical_step = FALSE, - shift_horizontal_step = FALSE, - remove_background_polygon = TRUE, - micron = FALSE, - px2mm = 0.12028, - offsets, - verbose = NULL -) { +.cosmx_poly <- function(path, + slide = 1, + fovs = NULL, + name = "cell", + # VERTICAL FLIP + NO SHIFTS + flip_vertical = TRUE, + flip_horizontal = FALSE, + shift_vertical_step = FALSE, + shift_horizontal_step = FALSE, + remove_background_polygon = TRUE, + micron = FALSE, + px2mm = 0.12028, + offsets, + verbose = NULL) { # NSE params f <- x <- y <- NULL @@ -723,13 +706,13 @@ setMethod("$<-", signature("CosmxReader"), function(x, name, value) { ) fovs <- fovs %null% .cosmx_imgname_fovparser(path) # ALL if NULL - progressr::with_progress({ - p <- progressr::progressor(along = fovs) + with_pbar({ + p <- pbar(along = fovs) gpolys <- lapply(fovs, function(f) { segfile <- Sys.glob(paths = sprintf("%s/*F%03d*", path, f)) # naming format: c_SLIDENUMBER_FOVNUMBER_CELLID - mask_params$ID_fmt = paste0( + mask_params$ID_fmt <- paste0( sprintf("c_%d_%d_", slide, f), "%d" ) @@ -745,7 +728,8 @@ setMethod("$<-", signature("CosmxReader"), function(x, name, value) { if (micron) { px2micron <- px2mm / 1000 gpoly <- rescale( - gpoly, fx = px2micron, fy = px2micron, x0 = 0, y0 = 0 + gpoly, + fx = px2micron, fy = px2micron, x0 = 0, y0 = 0 ) xshift <- xshift * px2micron yshift <- yshift * px2micron @@ -765,21 +749,18 @@ setMethod("$<-", signature("CosmxReader"), function(x, name, value) { return(gpolys) } -.cosmx_cellmeta <- function( - path, - slide = 1, - fovs = NULL, - dropcols = c( - "CenterX_local_px", - "CenterY_local_px", - "CenterX_global_px", - "CenterY_global_px", - "cell_id" - ), - cores = determine_cores(), - verbose = NULL -) { - +.cosmx_cellmeta <- function(path, + slide = 1, + fovs = NULL, + dropcols = c( + "CenterX_local_px", + "CenterY_local_px", + "CenterX_global_px", + "CenterY_global_px", + "cell_id" + ), + cores = determine_cores(), + verbose = NULL) { if (missing(path)) { stop(wrap_txt( "No path to metadata file provided or auto-detected" @@ -800,7 +781,7 @@ setMethod("$<-", signature("CosmxReader"), function(x, name, value) { # subset to needed fovs if (!is.null(fovs)) { fovs <- as.integer(fovs) - meta_dt <- meta_dt[fov %in% fovs,] + meta_dt <- meta_dt[fov %in% fovs, ] } # create cell ID as `c_SLIDENUMBER_FOVNUMBER_CELLID` @@ -827,16 +808,13 @@ setMethod("$<-", signature("CosmxReader"), function(x, name, value) { return(cx) } -.cosmx_expression <- function( - path, - slide = 1, - fovs = NULL, - feat_type = c("rna", "negprobes"), - split_keyword = list("NegPrb"), - cores = determine_cores(), - verbose = NULL -) { - +.cosmx_expression <- function(path, + slide = 1, + fovs = NULL, + feat_type = c("rna", "negprobes"), + split_keyword = list("NegPrb"), + cores = determine_cores(), + verbose = NULL) { if (missing(path)) { stop(wrap_txt( "No path to exprMat file provided or auto-detected" @@ -851,11 +829,11 @@ setMethod("$<-", signature("CosmxReader"), function(x, name, value) { # subset to needed fovs if (!is.null(fovs)) { fovs <- as.integer(fovs) - expr_dt <- expr_dt[fov %in% fovs,] + expr_dt <- expr_dt[fov %in% fovs, ] } # remove background values (cell 0) - expr_dt <- expr_dt[cell_ID != 0L,] + expr_dt <- expr_dt[cell_ID != 0L, ] # create cell ID as `c_SLIDENUMBER_FOVNUMBER_CELLID` expr_dt[, cell_ID := sprintf("c_%d_%d_%d", slide, fov, cell_ID)] @@ -876,10 +854,10 @@ setMethod("$<-", signature("CosmxReader"), function(x, name, value) { feat_ids <- rownames(expr_mat) bool <- grepl(pattern = split_keyword[[key_i]], x = feat_ids) # subset and store split matrix - sub_mat <- expr_mat[bool,] + sub_mat <- expr_mat[bool, ] expr_list[[key_i + 1L]] <- sub_mat # remaining matrix - expr_mat <- expr_mat[!bool,] + expr_mat <- expr_mat[!bool, ] } # assign the main expr expr_list[[1L]] <- expr_mat @@ -889,30 +867,29 @@ setMethod("$<-", signature("CosmxReader"), function(x, name, value) { } expr_list <- lapply(seq_along(expr_list), function(expr_i) { - createExprObj(expression_data = expr_list[[expr_i]], - spat_unit = "cell", - feat_type = names(expr_list)[[expr_i]], - name = "raw", - provenance = "cell") + createExprObj( + expression_data = expr_list[[expr_i]], + spat_unit = "cell", + feat_type = names(expr_list)[[expr_i]], + name = "raw", + provenance = "cell" + ) }) return(expr_list) } -.cosmx_image <- function( - path, - fovs = NULL, - img_type = "composite", - img_name_fmt = paste(img_type, "_fov%03d"), - negative_y = TRUE, - flip_vertical = FALSE, - flip_horizontal = FALSE, - micron = FALSE, - px2mm = 0.12028, - offsets, - verbose = NULL -) { - +.cosmx_image <- function(path, + fovs = NULL, + img_type = "composite", + img_name_fmt = paste(img_type, "_fov%03d"), + negative_y = TRUE, + flip_vertical = FALSE, + flip_horizontal = FALSE, + micron = FALSE, + px2mm = 0.12028, + offsets, + verbose = NULL) { if (missing(path)) { stop(wrap_txt( "No path to image subdirectory to load provided or auto-detected" @@ -925,8 +902,8 @@ setMethod("$<-", signature("CosmxReader"), function(x, name, value) { fovs <- fovs %null% .cosmx_imgname_fovparser(path) # ALL if NULL verbose <- verbose %null% TRUE - progressr::with_progress({ - p <- progressr::progressor(along = fovs) + with_pbar({ + p <- pbar(along = fovs) gimg_list <- lapply(fovs, function(f) { imgfile <- Sys.glob(paths = sprintf("%s/*F%03d*", path, f)) @@ -947,7 +924,8 @@ setMethod("$<-", signature("CosmxReader"), function(x, name, value) { if (micron) { px2micron <- px2mm / 1000 gimg <- rescale( - gimg, fx = px2micron, fy = px2micron, x0 = 0, y0 = 0 + gimg, + fx = px2micron, fy = px2micron, x0 = 0, y0 = 0 ) xshift <- xshift * px2micron yshift <- yshift * px2micron @@ -974,9 +952,9 @@ setMethod("$<-", signature("CosmxReader"), function(x, name, value) { #' @returns list #' @keywords internal .load_cosmx_folder_subcellular <- function(dir_items, - FOVs = NULL, - cores, - verbose = TRUE) { + FOVs = NULL, + cores, + verbose = TRUE) { vmsg(.v = verbose, "Loading subcellular information...") # subcellular checks @@ -990,7 +968,8 @@ setMethod("$<-", signature("CosmxReader"), function(x, name, value) { # FOVs to load vmsg(.v = verbose, "Loading FOV offsets...") fov_offset_file <- fread( - input = dir_items$`fov positions file`, nThread = cores) + input = dir_items$`fov positions file`, nThread = cores + ) if (is.null(FOVs)) FOVs <- fov_offset_file$fov # default to ALL FOVs FOV_ID <- as.list(sprintf("%03d", FOVs)) @@ -998,7 +977,8 @@ setMethod("$<-", signature("CosmxReader"), function(x, name, value) { vmsg(.v = verbose, "Loading transcript level info...") tx_coord_all <- fread( - input = dir_items$`transcript locations file`, nThread = cores) + input = dir_items$`transcript locations file`, nThread = cores + ) vmsg(.v = verbose, "Subcellular load done") data_list <- list( @@ -1018,8 +998,8 @@ setMethod("$<-", signature("CosmxReader"), function(x, name, value) { #' @returns list #' @keywords internal .load_cosmx_folder_aggregate <- function(dir_items, - cores, - verbose = TRUE) { + cores, + verbose = TRUE) { # data.table vars fov <- cell_ID <- fov_cell_ID <- CenterX_global_px <- CenterY_global_px <- CenterX_local_px <- @@ -1029,15 +1009,18 @@ setMethod("$<-", signature("CosmxReader"), function(x, name, value) { vmsg(.v = verbose, "Loading provided aggregated information...") # aggregate checks - if (!file.exists(dir_items$`expression matrix file`)) + if (!file.exists(dir_items$`expression matrix file`)) { stop(wrap_txt("No expression matrix file (.csv) detected")) - if (!file.exists(dir_items$`metadata file`)) + } + if (!file.exists(dir_items$`metadata file`)) { stop(wrap_txt("No metadata file (.csv) detected. Needed for cell spatial locations.")) + } # read in aggregate data expr_mat <- fread( - input = dir_items$`expression matrix file`, nThread = cores) + input = dir_items$`expression matrix file`, nThread = cores + ) metadata <- fread(input = dir_items$`metadata file`, nThread = cores) # setorder expression and spatlocs @@ -1047,12 +1030,14 @@ setMethod("$<-", signature("CosmxReader"), function(x, name, value) { # generate unique cell IDs expr_mat[, cell_ID := paste0( - "fov", sprintf("%03d", fov), "-", "cell_", cell_ID)] + "fov", sprintf("%03d", fov), "-", "cell_", cell_ID + )] expr_mat <- expr_mat[, fov := NULL] metadata[, fov_cell_ID := cell_ID] metadata[, cell_ID := paste0( - "fov", sprintf("%03d", fov), "-", "cell_", cell_ID)] + "fov", sprintf("%03d", fov), "-", "cell_", cell_ID + )] # reorder data.table::setcolorder(x = metadata, c("cell_ID", "fov", "fov_cell_ID")) @@ -1071,13 +1056,18 @@ setMethod("$<-", signature("CosmxReader"), function(x, name, value) { spatloc_oldnames <- c("CenterX_global_px", "CenterY_global_px", "cell_ID") spatloc_oldnames_fov <- c("CenterX_local_px", "CenterY_local_px", "cell_ID") spatloc_newnames <- c("sdimx", "sdimy", "cell_ID") - data.table::setnames(spatlocs, old = spatloc_oldnames, new = spatloc_newnames) data.table::setnames( - spatlocs_fov, old = spatloc_oldnames_fov, new = spatloc_newnames) + spatlocs, old = spatloc_oldnames, new = spatloc_newnames) + data.table::setnames( + spatlocs_fov, + old = spatloc_oldnames_fov, new = spatloc_newnames + ) # cleanup metadata and spatlocs - metadata <- metadata[, c("CenterX_global_px", "CenterY_global_px", - "CenterX_local_px", "CenterY_local_px") := NULL] + metadata <- metadata[, c( + "CenterX_global_px", "CenterY_global_px", + "CenterX_local_px", "CenterY_local_px" + ) := NULL] # find unique cell_IDs present in both expression and metadata giotto_cell_ID <- unique(intersect(expr_mat$cell_ID, metadata$cell_ID)) @@ -1087,27 +1077,36 @@ setMethod("$<-", signature("CosmxReader"), function(x, name, value) { # convert protein metadata to expr mat - # take all mean intensity protein information except for MembraneStain and DAPI + # take all mean intensity protein information except for + # MembraneStain and DAPI protein_meta_cols <- colnames(metadata) protein_meta_cols <- protein_meta_cols[ - grepl(pattern = "Mean.*", x = protein_meta_cols)] + grepl(pattern = "Mean.*", x = protein_meta_cols) + ] protein_meta_cols <- protein_meta_cols[ - !protein_meta_cols %in% c("Mean.MembraneStain", "Mean.DAPI")] + !protein_meta_cols %in% c("Mean.MembraneStain", "Mean.DAPI") + ] protein_meta_cols <- c("cell_ID", protein_meta_cols) prot_expr <- metadata[, protein_meta_cols, with = FALSE] prot_cell_ID <- metadata[, cell_ID] protM <- Matrix::Matrix(as.matrix(prot_expr[, -1]), - dimnames = list(prot_expr[[1]], - colnames(prot_expr[, -1])), - sparse = FALSE) + dimnames = list( + prot_expr[[1]], + colnames(prot_expr[, -1]) + ), + sparse = FALSE + ) protM <- t_flex(protM) # convert expression to sparse matrix spM <- Matrix::Matrix(as.matrix(expr_mat[, -1]), - dimnames = list(expr_mat[[1]], - colnames(expr_mat[, -1])), - sparse = TRUE) + dimnames = list( + expr_mat[[1]], + colnames(expr_mat[, -1]) + ), + sparse = TRUE + ) spM <- t_flex(spM) ## Ready for downstream aggregate gobject creation or appending into @@ -1168,7 +1167,8 @@ setMethod("$<-", signature("CosmxReader"), function(x, name, value) { #' \item{experimentname_\strong{tx_file}.csv (file)} #' } #' -#' [\strong{Workflows}] Workflow to use is accessed through the data_to_use param +#' [\strong{Workflows}] Workflow to use is accessed through the data_to_use +#' param #' \itemize{ #' \item{'all' - loads and requires subcellular information from tx_file and #' fov_positions_file @@ -1191,23 +1191,22 @@ setMethod("$<-", signature("CosmxReader"), function(x, name, value) { #' these image objects more responsive when accessing them from a server. #' \code{\link{showGiottoImageNames}} can be used to see the available images. #' @export -createGiottoCosMxObject <- function( - cosmx_dir = NULL, - data_to_use = c("all", "subcellular", "aggregate"), - remove_background_polygon = TRUE, - background_algo = c("range"), - remove_unvalid_polygons = TRUE, - FOVs = NULL, - instructions = NULL, - cores = determine_cores(), - verbose = TRUE -) { +createGiottoCosMxObject <- function(cosmx_dir = NULL, + data_to_use = c("all", "subcellular", "aggregate"), + remove_background_polygon = TRUE, + background_algo = c("range"), + remove_unvalid_polygons = TRUE, + FOVs = NULL, + instructions = NULL, + cores = determine_cores(), + verbose = TRUE) { # 0. setup cosmx_dir <- path.expand(cosmx_dir) # determine data to use data_to_use <- match.arg( - arg = data_to_use, choices = c("all", "subcellular", "aggregate")) + arg = data_to_use, choices = c("all", "subcellular", "aggregate") + ) if (data_to_use %in% c("all", "aggregate")) { stop(wrap_txt('Convenience workflows "all" and "aggregate" are not available yet')) @@ -1276,15 +1275,14 @@ createGiottoCosMxObject <- function( #' @inheritParams createGiottoCosMxObject #' @returns giotto object #' @keywords internal -.createGiottoCosMxObject_subcellular <- function( - dir_items, - FOVs = NULL, - remove_background_polygon = TRUE, - background_algo = c("range"), - remove_unvalid_polygons = TRUE, - cores, - verbose = TRUE, - instructions = NULL) { +.createGiottoCosMxObject_subcellular <- function(dir_items, + FOVs = NULL, + remove_background_polygon = TRUE, + background_algo = c("range"), + remove_unvalid_polygons = TRUE, + cores, + verbose = TRUE, + instructions = NULL) { target <- fov <- NULL # load tx detections and FOV offsets ------------------------------------- # @@ -1304,7 +1302,8 @@ createGiottoCosMxObject <- function( tx_coord_all[, c("x_global_px", "y_global_px", "cell_ID") := NULL] data.table::setcolorder( - tx_coord_all, c("target", "x_local_px", "y_local_px", "z", "fov")) + tx_coord_all, c("target", "x_local_px", "y_local_px", "z", "fov") + ) # feature detection type splitting --------------------------------------- # @@ -1330,13 +1329,17 @@ createGiottoCosMxObject <- function( if (isTRUE(verbose)) message("Loading image information...") composite_dir <- Sys.glob(paths = file.path( - dir_items$`CellComposite folder`, paste0("*", x, "*"))) + dir_items$`CellComposite folder`, paste0("*", x, "*") + )) cellLabel_dir <- Sys.glob(paths = file.path( - dir_items$`CellLabels folder`, paste0("*", x, "*"))) + dir_items$`CellLabels folder`, paste0("*", x, "*") + )) compartmentLabel_dir <- Sys.glob(paths = file.path( - dir_items$`CompartmentLabels folder`, paste0("*", x, "*"))) + dir_items$`CompartmentLabels folder`, paste0("*", x, "*") + )) cellOverlay_dir <- Sys.glob(paths = file.path( - dir_items$`CellOverlay folder`, paste0("*", x, "*"))) + dir_items$`CellOverlay folder`, paste0("*", x, "*") + )) # Missing warnings if (length(composite_dir) == 0) { @@ -1371,11 +1374,15 @@ createGiottoCosMxObject <- function( feat_coord <- feat_coords_all[fov == as.numeric(x)] data.table::setnames( - feat_coord, old = coord_oldnames, new = coord_newnames) + feat_coord, + old = coord_oldnames, new = coord_newnames + ) # neg probe info neg_coord <- neg_coords_all[fov == as.numeric(x)] data.table::setnames( - neg_coord, old = coord_oldnames, new = coord_newnames) + neg_coord, + old = coord_oldnames, new = coord_newnames + ) # build giotto object -------------------------------------- # @@ -1401,8 +1408,9 @@ createGiottoCosMxObject <- function( # find centroids as spatial locations ---------------------- # - if (isTRUE(verbose)) + if (isTRUE(verbose)) { message("Finding polygon centroids as cell spatial locations...") + } fov_subset <- addSpatialCentroidLocations( fov_subset, poly_info = "cell", @@ -1497,9 +1505,9 @@ createGiottoCosMxObject <- function( #' @returns giotto object #' @keywords internal .createGiottoCosMxObject_aggregate <- function(dir_items, - cores, - verbose = TRUE, - instructions = NULL) { + cores, + verbose = TRUE, + instructions = NULL) { data_to_use <- fov <- NULL data_list <- .load_cosmx_folder_aggregate( @@ -1536,19 +1544,25 @@ createGiottoCosMxObject <- function( # load in images img_ID <- data.table::data.table( fov = fov_shifts[, fov], - img_name = paste0("fov", - sprintf("%03d", fov_shifts[, fov]), "-image") + img_name = paste0( + "fov", + sprintf("%03d", fov_shifts[, fov]), "-image" + ) ) if (isTRUE(verbose)) message("Attaching image files...") composite_dir <- Sys.glob(paths = file.path( - dir_items$`CellComposite folder`, paste0("/*"))) + dir_items$`CellComposite folder`, paste0("/*") + )) cellLabel_dir <- Sys.glob(paths = file.path( - dir_items$`CellLabels folder`, paste0("/*"))) + dir_items$`CellLabels folder`, paste0("/*") + )) compartmentLabel_dir <- Sys.glob(paths = file.path( - dir_items$`CompartmentLabels folder`, paste0("/*"))) + dir_items$`CompartmentLabels folder`, paste0("/*") + )) overlay_dir <- Sys.glob(paths = file.path( - dir_items$`CellOverlay folder`, paste0("/*"))) + dir_items$`CellOverlay folder`, paste0("/*") + )) if (length(cellLabel_imgList) > 0) { cellLabel_imgList <- lapply(cellLabel_dir, function(x) { @@ -1563,12 +1577,17 @@ createGiottoCosMxObject <- function( if (length(compartmentLabel_dir) > 0) { compartmentLabel_imgList <- lapply( compartmentLabel_dir, function(x) { - createGiottoLargeImage(x, name = "composite", negative_y = TRUE) - }) + createGiottoLargeImage(x, + name = "composite", + negative_y = TRUE) + } + ) } if (length(overlay_dir) > 0) { overlay_imgList <- lapply(overlay_dir, function(x) { - createGiottoLargeImage(x, name = "composite", negative_y = TRUE) + createGiottoLargeImage(x, + name = "composite", + negative_y = TRUE) }) } } @@ -1587,20 +1606,20 @@ createGiottoCosMxObject <- function( #' (subellular transcript detection information) and #' \emph{aggregate} (aggregated detection count matrices by cell polygon from #' NanoString) -#' data will be loaded in. The two will be separated into 'cell' and 'cell_agg' +#' data will be loaded in. The two will be separated into "cell" and "cell_agg" #' spatial units in order to denote the difference in origin of the two. #' @seealso createGiottoCosMxObject .createGiottoCosMxObject_aggregate #' .createGiottoCosMxObject_subcellular #' @keywords internal .createGiottoCosMxObject_all <- function(dir_items, - FOVs, - remove_background_polygon = TRUE, - background_algo = c("range"), - remove_unvalid_polygons = TRUE, - cores, - verbose = TRUE, - instructions = NULL, - ...) { + FOVs, + remove_background_polygon = TRUE, + background_algo = "range", + remove_unvalid_polygons = TRUE, + cores, + verbose = TRUE, + instructions = NULL, + ...) { # 1. create subcellular giotto as spat_unit 'cell' cosmx_gobject <- .createGiottoCosMxObject_subcellular( dir_items = dir_items, @@ -1631,11 +1650,14 @@ createGiottoCosMxObject <- function( # workflow # Add aggregate expression information - if (isTRUE(verbose)) wrap_msg( - 'Appending provided aggregate expression data as... + if (isTRUE(verbose)) { + wrap_msg( + 'Appending provided aggregate expression data as... spat_unit: "cell_agg" feat_type: "rna" - name: "raw"') + name: "raw"' + ) + } # add expression data to expression slot s4_expr <- createExprObj( name = "raw", @@ -1645,16 +1667,22 @@ createGiottoCosMxObject <- function( provenance = "cell_agg" ) - cosmx_gobject <- set_expression_values(cosmx_gobject, values = s4_expr) + cosmx_gobject <- setGiotto(cosmx_gobject, s4_expr) # Add spatial locations - if (isTRUE(verbose)) wrap_msg( - 'Appending metadata provided spatial locations data as... + if (isTRUE(verbose)) { + wrap_msg( + 'Appending metadata provided spatial locations data as... --> spat_unit: "cell_agg" name: "raw" - --> spat_unit: "cell" name: "raw_fov"') - if (isTRUE(verbose)) wrap_msg( - 'Polygon centroid derived spatial locations assigned as... - --> spat_unit: "cell" name: "raw" (default)') + --> spat_unit: "cell" name: "raw_fov"' + ) + } + if (isTRUE(verbose)) { + wrap_msg( + 'Polygon centroid derived spatial locations assigned as... + --> spat_unit: "cell" name: "raw" (default)' + ) + } locsObj <- create_spat_locs_obj( name = "raw", @@ -1669,9 +1697,8 @@ createGiottoCosMxObject <- function( provenance = "cell_agg" ) - cosmx_gobject <- set_spatial_locations(cosmx_gobject, spatlocs = locsObj) - cosmx_gobject <- set_spatial_locations(cosmx_gobject, - spatlocs = locsObj_fov) + cosmx_gobject <- setGiotto(cosmx_gobject, locsObj, initialize = FALSE) + cosmx_gobject <- setGiotto(cosmx_gobject, locsObj_fov, initialize = FALSE) # initialize cell and feat IDs and metadata slots for 'cell_agg' spat_unit agg_cell_ID <- colnames(s4_expr[]) @@ -1685,18 +1712,18 @@ createGiottoCosMxObject <- function( # Add metadata to both the given and the poly spat_units if (isTRUE(verbose)) message("Appending provided cell metadata...") cosmx_gobject <- addCellMetadata(cosmx_gobject, - spat_unit = "cell", - feat_type = "rna", - new_metadata = metadata, - by_column = TRUE, - column_cell_ID = "cell_ID" + spat_unit = "cell", + feat_type = "rna", + new_metadata = metadata, + by_column = TRUE, + column_cell_ID = "cell_ID" ) cosmx_gobject <- addCellMetadata(cosmx_gobject, - spat_unit = "cell_agg", - feat_type = "rna", - new_metadata = metadata, - by_column = TRUE, - column_cell_ID = "cell_ID" + spat_unit = "cell_agg", + feat_type = "rna", + new_metadata = metadata, + by_column = TRUE, + column_cell_ID = "cell_ID" ) initialize(cosmx_gobject) @@ -1712,11 +1739,12 @@ createGiottoCosMxObject <- function( #' values denote missing items #' @keywords internal .read_cosmx_folder <- function(cosmx_dir, - verbose = TRUE) { + verbose = TRUE) { ch <- box_chars() - if (is.null(cosmx_dir) | !dir.exists(cosmx_dir)) + if (is.null(cosmx_dir) | !dir.exists(cosmx_dir)) { stop("The full path to a cosmx directory must be given.") + } vmsg("A structured CosMx directory will be used\n", .v = verbose) # find directories (length = 1 if present, length = 0 if missing) @@ -1731,7 +1759,8 @@ createGiottoCosMxObject <- function( `metadata file` = "*metadata_file*" ) dir_items <- lapply( - dir_items, function(x) Sys.glob(paths = file.path(cosmx_dir, x))) + dir_items, function(x) Sys.glob(paths = file.path(cosmx_dir, x)) + ) dir_items_lengths <- lengths(dir_items) if (isTRUE(verbose)) { @@ -1761,8 +1790,3 @@ createGiottoCosMxObject <- function( return(dir_items) } - - - - - diff --git a/R/convenience_general.R b/R/convenience_general.R index 1d626c5bf..86cc63493 100644 --- a/R/convenience_general.R +++ b/R/convenience_general.R @@ -30,9 +30,9 @@ #' \itemize{ #' \item{1. detection of items within \code{data_dir} by looking for keywords #' assigned through \code{dir_items}} -#' \item{2. check of detected items to see if everything needed has been found. -#' Dictionary of necessary vs optional items for each \code{data_to_use} -#' *workflow* is provided through \code{require_data_DT}} +#' \item{2. check of detected items to see if everything needed has been +#' found. Dictionary of necessary vs optional items for each +#' \code{data_to_use} *workflow* is provided through \code{require_data_DT}} #' \item{3. if multiple filepaths are found to be matching then select the #' first one. This function is only intended to find the first level #' subdirectories and files.} @@ -82,27 +82,32 @@ NULL #' @describeIn read_data_folder Should not be used directly #' @keywords internal -.read_data_folder <- function(spat_method = NULL, - data_dir = NULL, - dir_items, - data_to_use, - load_format = NULL, - require_data_DT, - cores = NA, - verbose = NULL, - toplevel = 2L) { +.read_data_folder <- function( + spat_method = NULL, + data_dir = NULL, + dir_items, + data_to_use, + load_format = NULL, + require_data_DT, + cores = NA, + verbose = NULL, + toplevel = 2L) { ch <- box_chars() # 0. check params if (is.null(data_dir) || !dir.exists(data_dir)) { - .gstop(.n = toplevel, "The full path to a", spat_method, - "directory must be given.") + .gstop( + .n = toplevel, "The full path to a", spat_method, + "directory must be given." + ) } vmsg(.v = verbose, "A structured", spat_method, "directory will be used") if (!data_to_use %in% require_data_DT$workflow) { - .gstop(.n = toplevel, - "Data requirements for data_to_use not found in require_data_DT") + .gstop( + .n = toplevel, + "Data requirements for data_to_use not found in require_data_DT" + ) } # 1. detect items @@ -128,8 +133,10 @@ NULL ) for (item_i in seq_along(dir_items[[item]])) { # print found item names - subItem <- gsub(pattern = ".*/", replacement = "", - x = dir_items[[item]][[item_i]]) + subItem <- gsub( + pattern = ".*/", replacement = "", + x = dir_items[[item]][[item_i]] + ) vmsg( .v = verbose, .is_debug = TRUE, .initial = paste0(ch$s, ch$s, ch$l, ch$h, ch$h), @@ -147,13 +154,16 @@ NULL require_data_DT <- require_data_DT[workflow == data_to_use, ] - if (!is.null(load_format)) + if (!is.null(load_format)) { require_data_DT <- require_data_DT[filetype == load_format, ] + } - if (item %in% require_data_DT[needed == TRUE, item]) + if (item %in% require_data_DT[needed == TRUE, item]) { stop(item, " is missing") - if (item %in% require_data_DT[needed == FALSE, item]) + } + if (item %in% require_data_DT[needed == FALSE, item]) { warning(item, "is missing (optional)") + } } } @@ -203,30 +213,38 @@ abbrev_path <- function(path, head = 15, tail = 35L) { nftype <- length(x@filetype) datatype <- format(names(x@filetype)) pre_ftypes <- format(c(pre, rep("", nftype - 1L))) - cat(sprintf("%s %s -- %s\n", - pre_ftypes, - datatype, - x@filetype), - sep = "") + cat( + sprintf( + "%s %s -- %s\n", + pre_ftypes, + datatype, + x@filetype + ), + sep = "" + ) } # pattern - list.files pattern to use to search for specific files/dirs # warn - whether to warn when a pattern does not find any files # first - whether to only return the first match -.detect_in_dir <- function( - path, pattern, recursive = FALSE, platform, warn = TRUE, first = TRUE -) { - f <- list.files(path, pattern = pattern, recursive = recursive, full.names = TRUE) +.detect_in_dir <- function(path, pattern, recursive = FALSE, + platform, warn = TRUE, first = TRUE) { + f <- list.files(path, pattern = pattern, recursive = recursive, + full.names = TRUE) lenf <- length(f) - if (lenf == 1L) return(f) # one match + if (lenf == 1L) { + return(f) + } # one match else if (lenf == 0L) { # no matches if (warn) { - warning(sprintf( - "%s not detected in %s directory", - pattern, - platform - ), - call. = FALSE) + warning( + sprintf( + "%s not detected in %s directory", + pattern, + platform + ), + call. = FALSE + ) } return(NULL) } @@ -262,7 +280,7 @@ abbrev_path <- function(path, head = 15, tail = 35L) { #' @param h5_gene_ids gene names as symbols (default) or ensemble gene ids #' @param h5_tissue_positions_path path to tissue locations (.csv file) #' @param h5_image_png_path path to tissue .png file (optional). Image -#' autoscaling looks for matches in the filename for either 'hires' or 'lowres' +#' autoscaling looks for matches in the filename for either "hires" or "lowres" #' @param h5_json_scalefactors_path path to .json scalefactors (optional) #' @param png_name select name of png to use (see details) #' @param do_manual_adj deprecated @@ -275,7 +293,7 @@ abbrev_path <- function(path, head = 15, tail = 35L) { #' @param cores how many cores or threads to use to read data if paths are #' provided #' @param expression_matrix_class class of expression matrix to use -#' (e.g. 'dgCMatrix', 'DelayedArray') +#' (e.g. "dgCMatrix", "DelayedArray") #' @param h5_file optional path to create an on-disk h5 file #' @param verbose be verbose #' @@ -283,40 +301,48 @@ abbrev_path <- function(path, head = 15, tail = 35L) { #' @details #' If starting from a Visium 10X directory: #' \itemize{ -#' \item{expr_data: raw will take expression data from raw_feature_bc_matrix and filter from filtered_feature_bc_matrix} -#' \item{gene_column_index: which gene identifiers (names) to use if there are multiple columns (e.g. ensemble and gene symbol)} -#' \item{png_name: by default the first png will be selected, provide the png name to override this (e.g. myimage.png)} -#' \item{the file scalefactors_json.json will be detected automatically and used to attempt to align the data} +#' \item{expr_data: raw will take expression data from +#' raw_feature_bc_matrix and filter from filtered_feature_bc_matrix} +#' \item{gene_column_index: which gene identifiers (names) to use if there +#' are multiple columns (e.g. ensemble and gene symbol)} +#' \item{png_name: by default the first png will be selected, provide the png +#' name to override this (e.g. myimage.png)} +#' \item{the file scalefactors_json.json will be detected automatically and +#' used to attempt to align the data} #' } #' #' If starting from a Visium 10X .h5 file #' \itemize{ #' \item{h5_visium_path: full path to .h5 file: /your/path/to/visium_file.h5} -#' \item{h5_tissue_positions_path: full path to spatial locations file: /you/path/to/tissue_positions_list.csv} -#' \item{h5_image_png_path: full path to png: /your/path/to/images/tissue_lowres_image.png} -#' \item{h5_json_scalefactors_path: full path to .json file: /your/path/to/scalefactors_json.json} +#' \item{h5_tissue_positions_path: full path to spatial locations file: +#' /you/path/to/tissue_positions_list.csv} +#' \item{h5_image_png_path: full path to png: +#' /your/path/to/images/tissue_lowres_image.png} +#' \item{h5_json_scalefactors_path: full path to .json file: +#' /your/path/to/scalefactors_json.json} #' } #' #' @export -createGiottoVisiumObject <- function(visium_dir = NULL, - expr_data = c("raw", "filter"), - gene_column_index = 1, - h5_visium_path = NULL, - h5_gene_ids = c("symbols", "ensembl"), - h5_tissue_positions_path = NULL, - h5_image_png_path = NULL, - h5_json_scalefactors_path = NULL, - png_name = NULL, - do_manual_adj = FALSE, # deprecated - xmax_adj = 0, # deprecated - xmin_adj = 0, # deprecated - ymax_adj = 0, # deprecated - ymin_adj = 0, # deprecated - instructions = NULL, - expression_matrix_class = c("dgCMatrix", "DelayedArray"), - h5_file = NULL, - cores = NA, - verbose = NULL) { +createGiottoVisiumObject <- function( + visium_dir = NULL, + expr_data = c("raw", "filter"), + gene_column_index = 1, + h5_visium_path = NULL, + h5_gene_ids = c("symbols", "ensembl"), + h5_tissue_positions_path = NULL, + h5_image_png_path = NULL, + h5_json_scalefactors_path = NULL, + png_name = NULL, + do_manual_adj = FALSE, # deprecated + xmax_adj = 0, # deprecated + xmin_adj = 0, # deprecated + ymax_adj = 0, # deprecated + ymin_adj = 0, # deprecated + instructions = NULL, + expression_matrix_class = c("dgCMatrix", "DelayedArray"), + h5_file = NULL, + cores = NA, + verbose = NULL) { # NSE vars barcode <- row_pxl <- col_pxl <- in_tissue <- array_row <- array_col <- NULL @@ -375,18 +401,17 @@ createGiottoVisiumObject <- function(visium_dir = NULL, -.visium_create <- function( - expr_counts_path, - h5_gene_ids = NULL, # h5 - gene_column_index = NULL, # folder - tissue_positions_path, - image_path = NULL, - scale_json_path = NULL, - png_name = NULL, - instructions = NULL, - expression_matrix_class = c("dgCMatrix", "DelayedArray"), - h5_file = NULL, - verbose = NULL) { +.visium_create <- function(expr_counts_path, + h5_gene_ids = NULL, # h5 + gene_column_index = NULL, # folder + tissue_positions_path, + image_path = NULL, + scale_json_path = NULL, + png_name = NULL, + instructions = NULL, + expression_matrix_class = c("dgCMatrix", "DelayedArray"), + h5_file = NULL, + verbose = NULL) { # NSE vars barcode <- cell_ID <- row_pxl <- col_pxl <- in_tissue <- array_row <- array_col <- NULL @@ -407,12 +432,16 @@ createGiottoVisiumObject <- function(visium_dir = NULL, } # if expr_results is not a list, make it a list compatible with downstream - if (!is.list(expr_results)) expr_results <- list( - "Gene Expression" = expr_results) + if (!is.list(expr_results)) { + expr_results <- list( + "Gene Expression" = expr_results + ) + } # format expected data into list to be used with readExprData() raw_matrix_list <- list("cell" = list("rna" = list( - "raw" = expr_results[["Gene Expression"]]))) + "raw" = expr_results[["Gene Expression"]] + ))) # add protein expression data to list if it exists if ("Antibody Capture" %in% names(expr_results)) { @@ -422,10 +451,13 @@ createGiottoVisiumObject <- function(visium_dir = NULL, # 2. spatial locations spatial_results <- data.table::fread(tissue_positions_path) - colnames(spatial_results) <- c("barcode", "in_tissue", "array_row", - "array_col", "col_pxl", "row_pxl") + colnames(spatial_results) <- c( + "barcode", "in_tissue", "array_row", + "array_col", "col_pxl", "row_pxl" + ) spatial_results <- spatial_results[match(colnames( - raw_matrix_list$cell[[1]]$raw), barcode)] + raw_matrix_list$cell[[1]]$raw + ), barcode)] data.table::setnames(spatial_results, old = "barcode", new = "cell_ID") spatial_locs <- spatial_results[, .(cell_ID, row_pxl, -col_pxl)] # flip x and y @@ -447,7 +479,8 @@ createGiottoVisiumObject <- function(visium_dir = NULL, # 5. metadata meta_results <- spatial_results[ - , .(cell_ID, in_tissue, array_row, array_col)] + , .(cell_ID, in_tissue, array_row, array_col) + ] expr_types <- names(raw_matrix_list$cell) meta_list <- list() for (etype in expr_types) { @@ -486,17 +519,17 @@ createGiottoVisiumObject <- function(visium_dir = NULL, # Find and check the filepaths within a structured visium directory -.visium_read_folder <- function( - visium_dir = NULL, - expr_data = c("raw", "filter"), - gene_column_index = 1, - png_name = NULL, - verbose = NULL) { +.visium_read_folder <- function(visium_dir = NULL, + expr_data = c("raw", "filter"), + gene_column_index = 1, + png_name = NULL, + verbose = NULL) { vmsg(.v = verbose, "A structured visium directory will be used") ## check arguments - if (is.null(visium_dir)) + if (is.null(visium_dir)) { .gstop("visium_dir needs to be a path to a visium directory") + } visium_dir <- path.expand(visium_dir) if (!dir.exists(visium_dir)) .gstop(visium_dir, " does not exist!") expr_data <- match.arg(expr_data, choices = c("raw", "filter")) @@ -507,14 +540,16 @@ createGiottoVisiumObject <- function(visium_dir = NULL, "raw" = paste0(visium_dir, "/", "raw_feature_bc_matrix/"), "filter" = paste0(visium_dir, "/", "filtered_feature_bc_matrix/") ) - if (!file.exists(expr_counts_path)) + if (!file.exists(expr_counts_path)) { .gstop(expr_counts_path, "does not exist!") + } ## 2. check spatial locations spatial_dir <- paste0(visium_dir, "/", "spatial/") tissue_positions_path <- Sys.glob( - paths = file.path(spatial_dir, "tissue_positions*")) + paths = file.path(spatial_dir, "tissue_positions*") + ) ## 3. check spatial image @@ -528,8 +563,9 @@ createGiottoVisiumObject <- function(visium_dir = NULL, ## 4. check scalefactors scalefactors_path <- paste0(spatial_dir, "/", "scalefactors_json.json") - if (!file.exists(scalefactors_path)) + if (!file.exists(scalefactors_path)) { .gstop(scalefactors_path, "does not exist!") + } list( @@ -543,29 +579,37 @@ createGiottoVisiumObject <- function(visium_dir = NULL, -.visium_read_h5 <- function( - h5_visium_path = h5_visium_path, # expression matrix - h5_gene_ids = h5_gene_ids, - h5_tissue_positions_path = h5_tissue_positions_path, - h5_image_png_path = h5_image_png_path, - h5_json_scalefactors_path = h5_json_scalefactors_path, - verbose = NULL) { +.visium_read_h5 <- function(h5_visium_path = h5_visium_path, # expression matrix + h5_gene_ids = h5_gene_ids, + h5_tissue_positions_path = h5_tissue_positions_path, + h5_image_png_path = h5_image_png_path, + h5_json_scalefactors_path = h5_json_scalefactors_path, + verbose = NULL) { # 1. filepaths - vmsg(.v = verbose, - "A path to an .h5 10X file was provided and will be used") - if (!file.exists(h5_visium_path)) + vmsg( + .v = verbose, + "A path to an .h5 10X file was provided and will be used" + ) + if (!file.exists(h5_visium_path)) { .gstop("The provided path ", h5_visium_path, " does not exist") - if (is.null(h5_tissue_positions_path)) + } + if (is.null(h5_tissue_positions_path)) { .gstop("A path to the tissue positions (.csv) needs to be provided to h5_tissue_positions_path") - if (!file.exists(h5_tissue_positions_path)) - .gstop("The provided path ", h5_tissue_positions_path, - " does not exist") + } + if (!file.exists(h5_tissue_positions_path)) { + .gstop( + "The provided path ", h5_tissue_positions_path, + " does not exist" + ) + } if (!is.null(h5_image_png_path)) { if (!file.exists(h5_image_png_path)) { - .gstop("The provided h5 image path ", h5_image_png_path, - "does not exist. - Set to NULL to exclude or provide the correct path.") + .gstop( + "The provided h5 image path ", h5_image_png_path, + "does not exist. + Set to NULL to exclude or provide the correct path." + ) } } if (!is.null(h5_json_scalefactors_path)) { @@ -608,8 +652,9 @@ createGiottoVisiumObject <- function(visium_dir = NULL, #' Adds circular giottoPolygons to the spatial_info slot of a Giotto Object #' for the "cell" spatial unit. #' @export -addVisiumPolygons <- function(gobject, - scalefactor_path = NULL) { +addVisiumPolygons <- function( + gobject, + scalefactor_path = NULL) { assert_giotto(gobject) visium_spat_locs <- getSpatialLocations( @@ -651,12 +696,14 @@ addVisiumPolygons <- function(gobject, if (!checkmate::test_file_exists(json_path)) { if (!is.null(json_path)) { warning("scalefactors not discovered at: \n", - json_path, call. = FALSE) + json_path, + call. = FALSE + ) } return(NULL) } - json_scalefactors <- jsonlite::read_json(json_path) + json_scalefactors <- read_json(json_path) # Intial assertion that json dimensions are appropriate checkmate::assert_list( @@ -729,8 +776,9 @@ addVisiumPolygons <- function(gobject, #' Visium spots. #' @keywords internal #' @md -.visium_spot_poly <- function(spatlocs = NULL, - json_scalefactors) { +.visium_spot_poly <- function( + spatlocs = NULL, + json_scalefactors) { if (inherits(spatlocs, "spatLocsObj")) { spatlocs <- spatlocs[] } @@ -758,11 +806,10 @@ addVisiumPolygons <- function(gobject, # json_info expects the list read output from .visium_read_scalefactors # image_path should be expected to be full filepath # should only be used when do_manual_adj (deprecated) is FALSE -.visium_image <- function( - image_path, - json_info = NULL, - micron_scale = FALSE, - verbose = NULL) { +.visium_image <- function(image_path, + json_info = NULL, + micron_scale = FALSE, + verbose = NULL) { # assume image already checked vmsg(.v = verbose, .initial = " - ", "found image") @@ -852,9 +899,10 @@ addVisiumPolygons <- function(gobject, #' if image_file is a list. #' @returns giottoLargeImage #' @export -createMerscopeLargeImage <- function(image_file, - transforms_file, - name = "image") { +createMerscopeLargeImage <- function( + image_file, + transforms_file, + name = "image") { checkmate::assert_character(transforms_file) tfsDT <- data.table::fread(transforms_file) if (inherits(image_file, "character")) { @@ -916,27 +964,29 @@ createMerscopeLargeImage <- function(image_file, #' function matches against: #' \itemize{ #' \item{\strong{cell_boundaries} (folder .hdf5 files)} -#' \item{\strong{images} (folder of .tif images and a scalefactor/transfrom table)} +#' \item{\strong{images} (folder of .tif images and a +#' scalefactor/transfrom table)} #' \item{\strong{cell_by_gene}.csv (file)} #' \item{cell_metadata\strong{fov_positions_file}.csv (file)} #' \item{detected_transcripts\strong{metadata_file}.csv (file)} #' } #' @export -createGiottoMerscopeObject <- function(merscope_dir, - data_to_use = c("subcellular", "aggregate"), - FOVs = NULL, - poly_z_indices = 1:7, - calculate_overlap = TRUE, - overlap_to_matrix = TRUE, - aggregate_stack = TRUE, - aggregate_stack_param = list( - summarize_expression = "sum", - summarize_locations = "mean", - new_spat_unit = "cell" - ), - instructions = NULL, - cores = NA, - verbose = TRUE) { +createGiottoMerscopeObject <- function( + merscope_dir, + data_to_use = c("subcellular", "aggregate"), + FOVs = NULL, + poly_z_indices = seq(from = 1, to = 7), + calculate_overlap = TRUE, + overlap_to_matrix = TRUE, + aggregate_stack = TRUE, + aggregate_stack_param = list( + summarize_expression = "sum", + summarize_locations = "mean", + new_spat_unit = "cell" + ), + instructions = NULL, + cores = NA, + verbose = TRUE) { fovs <- NULL # 0. setup @@ -945,14 +995,16 @@ createGiottoMerscopeObject <- function(merscope_dir, poly_z_indices <- as.integer(poly_z_indices) if (any(poly_z_indices < 1)) { stop(wrap_txt( - "poly_z_indices is a vector of one or more integers starting from 1.", + "poly_z_indices is a vector of one or more integers starting + from 1.", errWidth = TRUE )) } # determine data to use data_to_use <- match.arg( - arg = data_to_use, choices = c("subcellular", "aggregate")) + arg = data_to_use, choices = c("subcellular", "aggregate") + ) # 1. test if folder structure exists and is as expected dir_items <- .read_merscope_folder( @@ -991,7 +1043,9 @@ createGiottoMerscopeObject <- function(merscope_dir, ) } else { stop(wrap_txt('data_to_use "', data_to_use, - '" not implemented', sep = "")) + '" not implemented', + sep = "" + )) } return(merscope_gobject) @@ -1004,17 +1058,18 @@ createGiottoMerscopeObject <- function(merscope_dir, #' 'subcellular' workflow #' @param data_list list of loaded data from \code{\link{load_merscope_folder}} #' @keywords internal -.createGiottoMerscopeObject_subcellular <- function(data_list, - calculate_overlap = TRUE, - overlap_to_matrix = TRUE, - aggregate_stack = TRUE, - aggregate_stack_param = list( - summarize_expression = "sum", - summarize_locations = "mean", - new_spat_unit = "cell" - ), - cores = NA, - verbose = TRUE) { +.createGiottoMerscopeObject_subcellular <- function( + data_list, + calculate_overlap = TRUE, + overlap_to_matrix = TRUE, + aggregate_stack = TRUE, + aggregate_stack_param = list( + summarize_expression = "sum", + summarize_locations = "mean", + new_spat_unit = "cell" + ), + cores = NA, + verbose = TRUE) { feat_coord <- neg_coord <- cellLabel_dir <- instructions <- NULL # unpack data_list @@ -1037,9 +1092,11 @@ createGiottoMerscopeObject <- function(merscope_dir, # extract transcript_id col and store as feature meta feat_meta <- unique(feat_dt[, c("gene", "transcript_id", "barcode_id"), - with = FALSE]) + with = FALSE + ]) blank_meta <- unique(blank_dt[, c("gene", "transcript_id", "barcode_id"), - with = FALSE]) + with = FALSE + ]) feat_dt[, c("transcript_id", "barcode_id") := NULL] blank_dt[, c("transcript_id", "barcode_id") := NULL] @@ -1074,9 +1131,10 @@ createGiottoMerscopeObject <- function(merscope_dir, #' workflow #' @param data_list list of loaded data from \code{\link{load_merscope_folder}} #' @keywords internal -.createGiottoMerscopeObject_aggregate <- function(data_list, - cores = NA, - verbose = TRUE) { +.createGiottoMerscopeObject_aggregate <- function( + data_list, + cores = NA, + verbose = TRUE) { # unpack data_list micronToPixelScale <- data_list$micronToPixelScale expr_dt <- data_list$expr_dt @@ -1102,8 +1160,9 @@ createGiottoMerscopeObject <- function(merscope_dir, #' @description Given the path to a Spatial Genomics data directory, creates a #' Giotto object. #' @export -createSpatialGenomicsObject <- function(sg_dir = NULL, - instructions = NULL) { +createSpatialGenomicsObject <- function( + sg_dir = NULL, + instructions = NULL) { # Find files in Spatial Genomics directory dapi <- list.files(sg_dir, full.names = TRUE, pattern = "DAPI") mask <- list.files(sg_dir, full.names = TRUE, pattern = "mask") @@ -1151,10 +1210,11 @@ createSpatialGenomicsObject <- function(sg_dir = NULL, #' @describeIn read_data_folder Read a structured MERSCOPE folder #' @keywords internal -.read_merscope_folder <- function(merscope_dir, - data_to_use, - cores = NA, - verbose = NULL) { +.read_merscope_folder <- function( + merscope_dir, + data_to_use, + cores = NA, + verbose = NULL) { # prepare dir_items list dir_items <- list( `boundary info` = "*cell_boundaries*", @@ -1229,12 +1289,13 @@ NULL #' @rdname load_merscope_folder #' @keywords internal -.load_merscope_folder <- function(dir_items, - data_to_use, - fovs = NULL, - poly_z_indices = 1L:7L, - cores = NA, - verbose = TRUE) { +.load_merscope_folder <- function( + dir_items, + data_to_use, + fovs = NULL, + poly_z_indices = seq(from = 1, to = 7), + cores = NA, + verbose = TRUE) { # 1. load data_to_use-specific if (data_to_use == "subcellular") { data_list <- .load_merscope_folder_subcellular( @@ -1254,16 +1315,21 @@ NULL ) } else { stop(wrap_txt('data_to_use "', data_to_use, - '" not implemented', sep = "")) + '" not implemented', + sep = "" + )) } # 2. Load images if available if (!is.null(dir_items$`image info`)) { ## micron to px scaling factor micronToPixelScale <- Sys.glob(paths = file.path( - dir_items$`image info`, "*micron_to_mosaic_pixel_transform*"))[[1]] + dir_items$`image info`, "*micron_to_mosaic_pixel_transform*" + ))[[1]] micronToPixelScale <- data.table::fread( - micronToPixelScale, nThread = cores) + micronToPixelScale, + nThread = cores + ) # add to data_list data_list$micronToPixelScale <- micronToPixelScale @@ -1271,14 +1337,17 @@ NULL ## determine types of stains images_filenames <- list.files(dir_items$`image info`) bound_stains_filenames <- images_filenames[ - grep(pattern = ".tif", images_filenames)] + grep(pattern = ".tif", images_filenames) + ] bound_stains_types <- sapply(strsplit( - bound_stains_filenames, "_"), `[`, 2) + bound_stains_filenames, "_" + ), `[`, 2) bound_stains_types <- unique(bound_stains_types) img_list <- lapply_flex(bound_stains_types, function(stype) { img_paths <- Sys.glob(paths = file.path( - dir_items$`image info`, paste0("*", stype, "*"))) + dir_items$`image info`, paste0("*", stype, "*") + )) lapply_flex(img_paths, function(img) { createGiottoLargeImage(raster_object = img) @@ -1297,16 +1366,19 @@ NULL #' @describeIn load_merscope_folder Load items for 'subcellular' workflow #' @keywords internal -.load_merscope_folder_subcellular <- function(dir_items, - data_to_use, - cores = NA, - poly_z_indices = 1L:7L, - verbose = TRUE, - fovs = NULL) { +.load_merscope_folder_subcellular <- function( + dir_items, + data_to_use, + cores = NA, + poly_z_indices = 1L:7L, + verbose = TRUE, + fovs = NULL) { if (isTRUE(verbose)) message("Loading transcript level info...") if (is.null(fovs)) { tx_dt <- data.table::fread( - dir_items$`raw transcript info`, nThread = cores) + dir_items$`raw transcript info`, + nThread = cores + ) } else { message("Selecting FOV subset transcripts") tx_dt <- fread_colmatch( @@ -1319,7 +1391,8 @@ NULL } tx_dt[, c("x", "y") := NULL] # remove unneeded cols data.table::setcolorder( - tx_dt, c("gene", "global_x", "global_y", "global_z")) + tx_dt, c("gene", "global_x", "global_y", "global_z") + ) if (isTRUE(verbose)) message("Loading polygon info...") poly_info <- readPolygonFilesVizgenHDF5( @@ -1343,18 +1416,23 @@ NULL #' @describeIn load_merscope_folder Load items for 'aggregate' workflow #' @keywords internal -.load_merscope_folder_aggregate <- function(dir_items, - data_to_use, - cores = NA, - verbose = TRUE) { +.load_merscope_folder_aggregate <- function( + dir_items, + data_to_use, + cores = NA, + verbose = TRUE) { # metadata is polygon-related measurements vmsg("Loading cell metadata...", .v = verbose) cell_metadata_file <- data.table::fread( - dir_items$`cell metadata`, nThread = cores) + dir_items$`cell metadata`, + nThread = cores + ) vmsg("Loading expression matrix", .v = verbose) expr_dt <- data.table::fread( - dir_items$`cell feature matrix`, nThread = cores) + dir_items$`cell feature matrix`, + nThread = cores + ) data_list <- list( @@ -1392,7 +1470,8 @@ NULL #' These files can be in one of the following formats: (i) scATAC tabix files, #' (ii) fragment files, or (iii) bam files. #' @param genome A string indicating the default genome to be used for all ArchR -#' functions. Currently supported values include "hg19","hg38","mm9", and "mm10". +#' functions. Currently supported values include "hg19","hg38","mm9", and +#' "mm10". #' This value is stored as a global environment variable, not part of the #' ArchRProject. #' This can be overwritten on a per-function basis using the given function's @@ -1411,27 +1490,28 @@ NULL #' @returns An ArchR project with GeneScoreMatrix, TileMatrix, and #' TileMatrix-based LSI #' @export -createArchRProj <- function(fragmentsPath, - genome = c("hg19", "hg38", "mm9", "mm10"), - createArrowFiles_params = list( - sampleNames = "sample1", - minTSS = 0, - minFrags = 0, - maxFrags = 1e+07, - minFragSize = 10, - maxFragSize = 2000, - offsetPlus = 0, - offsetMinus = 0, - TileMatParams = list(tileSize = 5000) - ), - ArchRProject_params = list( - outputDirectory = getwd(), - copyArrows = FALSE - ), - addIterativeLSI_params = list(), - threads = ArchR::getArchRThreads(), - force = FALSE, - verbose = TRUE) { +createArchRProj <- function( + fragmentsPath, + genome = c("hg19", "hg38", "mm9", "mm10"), + createArrowFiles_params = list( + sampleNames = "sample1", + minTSS = 0, + minFrags = 0, + maxFrags = 1e+07, + minFragSize = 10, + maxFragSize = 2000, + offsetPlus = 0, + offsetMinus = 0, + TileMatParams = list(tileSize = 5000) + ), + ArchRProject_params = list( + outputDirectory = getwd(), + copyArrows = FALSE + ), + addIterativeLSI_params = list(), + threads = ArchR::getArchRThreads(), + force = FALSE, + verbose = TRUE) { if (!requireNamespace("ArchR")) { message('ArchR is needed. Install the package using remotes::install_github("GreenleafLab/ArchR")') @@ -1492,18 +1572,23 @@ createArchRProj <- function(fragmentsPath, #' @returns A Giotto object with at least an atac or epigenetic modality #' #' @export -createGiottoObjectfromArchR <- function(archRproj, - expression = NULL, - expression_feat = "atac", - spatial_locs = NULL, - sampleNames = "sample1", - ...) { +createGiottoObjectfromArchR <- function( + archRproj, + expression = NULL, + expression_feat = "atac", + spatial_locs = NULL, + sampleNames = "sample1", + ...) { # extract GeneScoreMatrix GeneScoreMatrix_summarizedExperiment <- ArchR::getMatrixFromProject( - archRproj) - GeneScoreMatrix <- slot(slot( - GeneScoreMatrix_summarizedExperiment, "assays"), - "data")[["GeneScoreMatrix"]] + archRproj + ) + GeneScoreMatrix <- slot( + slot( + GeneScoreMatrix_summarizedExperiment, "assays" + ), + "data" + )[["GeneScoreMatrix"]] ## get cell names cell_names <- colnames(GeneScoreMatrix) @@ -1511,8 +1596,10 @@ createGiottoObjectfromArchR <- function(archRproj, cell_names <- gsub("-1", "", cell_names) ## get gene names - gene_names <- slot(GeneScoreMatrix_summarizedExperiment, - "elementMetadata")[["name"]] + gene_names <- slot( + GeneScoreMatrix_summarizedExperiment, + "elementMetadata" + )[["name"]] ## replace colnames with cell names colnames(GeneScoreMatrix) <- cell_names diff --git a/R/convenience_visiumHD.R b/R/convenience_visiumHD.R index a0c3ca5fe..5c8bbc3b2 100644 --- a/R/convenience_visiumHD.R +++ b/R/convenience_visiumHD.R @@ -16,7 +16,7 @@ setClass( calls = "list" ), prototype = list( - expression_source = 'raw', + expression_source = "raw", gene_column_index = 2, barcodes = NULL, array_subset_row = NULL, @@ -36,9 +36,7 @@ setMethod("show", signature("VisiumHDReader"), function(object) { "barcodes", "array_subset_row", "array_subset_col", "pxl_subset_row", "pxl_subset_col", "funs") - pre <- sprintf( - "%s :", format(print_slots) - ) + pre <- sprintf("%s :", format(print_slots)) names(pre) <- print_slots # dir @@ -64,11 +62,13 @@ setMethod("show", signature("VisiumHDReader"), function(object) { cat(pre["barcodes"], barcodes, "\n") # array_subset_row - array_subset_row <- ifelse(!is.null(object@array_subset_row), "found", "none") + array_subset_row <- ifelse(!is.null(object@array_subset_row), + "found", "none") cat(pre["array_subset_row"], array_subset_row, "\n") # array_subset_col - array_subset_col <- ifelse(!is.null(object@array_subset_col), "found", "none") + array_subset_col <- ifelse(!is.null(object@array_subset_col), + "found", "none") cat(pre["array_subset_col"], array_subset_col, "\n") # pxl_subset_row @@ -92,27 +92,28 @@ setMethod("print", signature("VisiumHDReader"), function(x, ...) show(x)) #' @name importVisiumHD #' @description #' Giotto import functionalities for Visium HD datasets. This function generates -#' a `VisiumHDReader` instance that has convenient reader functions for converting -#' individual pieces of Visium HD data into Giotto-compatible representations when -#' the param `visiumHD_dir` is provided. +#' a `VisiumHDReader` instance that has convenient reader functions for +#' converting individual pieces of Visium HD data into Giotto-compatible +#' representations when the param `visiumHD_dir` is provided. #' A function that creates the full `giotto` object is also available. #' These functions should have all param values provided as defaults, but #' can be flexibly modified to do things such as look in alternative #' directories or paths. #' @param visiumHD_dir Visium HD output directory (e.g. square_016um) -#' @param expression_source character. Raw or filter expression data. Defaults to 'raw' +#' @param expression_source character. Raw or filter expression data. Defaults +#' to "raw" #' @param gene_column_index numeric. Expression column to use for gene names #' 1 = Ensembl and 2 = gene symbols #' @param barcodes character vector. (optional) Use if you only want to load #' a subset of the pixel barcodes -#' @param array_subset_row numeric vector. (optional) Vector with min and max values -#' to subset based on array rows -#' @param array_subset_col numeric vector. (optional) Vector with min and max values -#' to subset based on array columns -#' @param pxl_subset_row numeric vector. (optional) Vector with min and max values -#' to subset based on row pixels -#' @param pxl_subset_col numeric vector. (optional) Vector with min and max values -#' to subset based on column pixels +#' @param array_subset_row numeric vector. (optional) Vector with min and max +#' values to subset based on array rows +#' @param array_subset_col numeric vector. (optional) Vector with min and max +#' values to subset based on array columns +#' @param pxl_subset_row numeric vector. (optional) Vector with min and max +#' values to subset based on row pixels +#' @param pxl_subset_col numeric vector. (optional) Vector with min and max +#' values to subset based on column pixels #' @details #' Loading functions are generated after the `visiumHD_dir` is added. #' @returns VisiumHDReader object @@ -126,16 +127,17 @@ setMethod("print", signature("VisiumHDReader"), function(x, ...) show(x)) #' readerHD$visiumHD_dir <- visiumHD_dir #' #' # Load tissue positions or create cell metadata -#' tissue_pos = readerHD$load_tissue_position() +#' tissue_pos <- readerHD$load_tissue_position() #' metadata <- readerHD$load_metadata() #' #' Load matrix or create expression object #' matrix <- readerHD$load_matrix() -#' expression_obj = readerHD$load_expression() +#' expression_obj <- readerHD$load_expression() #' -#' Load transcript data (cell metadata, expression object, and transcripts per pixel) -#' my_transcripts = readerHD$load_transcripts(array_subset_row = c(500, 1000), -#' array_subset_col = c(500, 1000)) +#' Load transcript data (cell metadata, expression object, and transcripts per +#' pixel) +#' my_transcripts <- readerHD$load_transcripts(array_subset_row = c(500, 1000), +#' array_subset_col = c(500, 1000)) #' #' # Create a `giotto` object and add the loaded data #' # TODO @@ -143,13 +145,15 @@ setMethod("print", signature("VisiumHDReader"), function(x, ...) show(x)) #' @export importVisiumHD <- function( visiumHD_dir = NULL, - expression_source = 'raw', + expression_source = "raw", gene_column_index = 2, barcodes = NULL, array_subset_row = NULL, array_subset_col = NULL, pxl_subset_row = NULL, - pxl_subset_col = NULL) { + pxl_subset_col = NULL, + shape = "hexagon", + shape_size = 400) { # get params a <- list(Class = "VisiumHDReader") @@ -242,20 +246,23 @@ setMethod("initialize", signature("VisiumHDReader"), function( .visiumHD_detect <- function(pattern, path = p, recursive = FALSE) { - .detect_in_dir(pattern = pattern, path = path, recursive = recursive, platform = "visiumHD") + .detect_in_dir(pattern = pattern, path = path, + recursive = recursive, platform = "visiumHD") } - filter_expr_dir <- .visiumHD_detect(pattern = "filtered_feature_bc_matrix", path = p) - raw_expr_dir <- .visiumHD_detect(pattern = "raw_feature_bc_matrix", path = p) + filter_expr_dir <- .visiumHD_detect(pattern = "filtered_feature_bc_matrix", + path = p) + raw_expr_dir <- .visiumHD_detect(pattern = "raw_feature_bc_matrix", + path = p) s <- .Object@expression_source - if(s == 'raw') { - expr_dir = raw_expr_dir - } else if(s == 'filter') { - expr_dir = filter_expr_dir + if(s == "raw") { + expr_dir <- raw_expr_dir + } else if(s == "filter") { + expr_dir <- filter_expr_dir } else { - stop('expression source for visiumHD can only be raw or filter') + stop("expression source for visiumHD can only be raw or filter") } spatial_dir <- .visiumHD_detect(pattern = "spatial", path = p) @@ -263,7 +270,7 @@ setMethod("initialize", signature("VisiumHDReader"), function( c_index <- .Object@gene_column_index if(!c_index %in% c(1, 2)) { - stop('gene column index can only be 1 (Ensembl) or 2 (gene symbols)') + stop("gene column index can only be 1 (Ensembl) or 2 (gene symbols)") } read_folder_fun <- function( @@ -364,17 +371,13 @@ setMethod("initialize", signature("VisiumHDReader"), function( load_poly_fun <- function( path = expr_dir, - gpoints, - tissue_positions_path = spatial_dir, - shape = 'hexagon', + shape = "hexagon", shape_size = 400, - name = 'hex400', + name = "hex400", verbose = NULL ) { .visiumHD_poly( path = path, - gpoints = gpoints, - tissue_positions_path = tissue_positions_path, shape = shape, shape_size = shape_size, name = name, @@ -424,6 +427,8 @@ setMethod("initialize", signature("VisiumHDReader"), function( tissue_positions_path = spatial_dir, expression_path = expr_path, metadata_path = spatial_dir, + shape = "hexagon", + shape_size = 400, load_expression = TRUE, load_metadata = TRUE, instructions = NULL, @@ -456,16 +461,21 @@ setMethod("initialize", signature("VisiumHDReader"), function( polys <- funs$load_polygon( path = expr_dir, - gpoints, - tissue_positions_path = spatial_dir, - shape = 'hexagon', - shape_size = 400, - name = 'hex400', + shape = shape, + shape_size = shape_size, + name = paste0(shape, shape_size), verbose = NULL ) g <- setGiotto(g, polys) g <- addSpatialCentroidLocations(gobject = g, - poly_info = "hex400") + poly_info = paste0(shape, shape_size)) + g <- calculateOverlap(g, + spatial_info = paste0(shape, shape_size), + feat_info = "rna") + g <- overlapToMatrix(g, + poly_info = paste0(shape, shape_size), + feat_info = "rna", + name = "raw") # images images <- funs$load_image( @@ -521,8 +531,8 @@ setMethod("initialize", signature("VisiumHDReader"), function( #' @export setMethod("$", signature("VisiumHDReader"), function(x, name) { - basic_info <- c("visiumHD_dir", "expression_source", "gene_column_index", "barcodes", - "array_subset_row", "array_subset_col", + basic_info <- c("visiumHD_dir", "expression_source", "gene_column_index", + "barcodes", "array_subset_row", "array_subset_col", "pxl_subset_row", "pxl_subset_col") if (name %in% basic_info) return(methods::slot(x, name)) @@ -531,8 +541,8 @@ setMethod("$", signature("VisiumHDReader"), function(x, name) { #' @export setMethod("$<-", signature("VisiumHDReader"), function(x, name, value) { - basic_info <- c("visiumHD_dir", "expression_source", "gene_column_index", "barcodes", - "array_subset_row", "array_subset_col", + basic_info <- c("visiumHD_dir", "expression_source", "gene_column_index", + "barcodes", "array_subset_row", "array_subset_col", "pxl_subset_row", "pxl_subset_col") if (name %in% basic_info) { methods::slot(x, name) <- value @@ -545,8 +555,8 @@ setMethod("$<-", signature("VisiumHDReader"), function(x, name, value) { #' @export `.DollarNames.VisiumHDReader` <- function(x, pattern) { - dn <- c("visiumHD_dir", "expression_source", "gene_column_index", "barcodes", - "array_subset_row", "array_subset_col", + dn <- c("visiumHD_dir", "expression_source", "gene_column_index", + "barcodes", "array_subset_row", "array_subset_col", "pxl_subset_row", "pxl_subset_col") if (length(methods::slot(x, "calls")) > 0) { dn <- c(dn, paste0(names(methods::slot(x, "calls")), "()")) @@ -556,13 +566,63 @@ setMethod("$<-", signature("VisiumHDReader"), function(x, name, value) { +.visiumHD_read_folder <- function( + path, + expr_data = c("raw", "filter"), + gene_column_index = 1, + png_name = NULL, + verbose = NULL) { + vmsg(.v = verbose, "A structured visium directory will be used") + if (is.null(path)) + .gstop("path needs to be a path to a visium directory") + path <- path.expand(path) + path <- dirname(path) + if (!dir.exists(path)) .gstop(path, " does not exist!") + expr_data <- match.arg(expr_data, choices = c("raw", "filter")) -.visiumHD_matrix = function(path, - gene_column_index = 2, - remove_zero_rows = TRUE, - split_by_type = TRUE, - verbose = TRUE) { + ## 1. check expression + expr_counts_path <- switch( + expr_data, + "raw" = paste0(path, "/", "raw_feature_bc_matrix/"), + "filter" = paste0(path, "/", "filtered_feature_bc_matrix/") + ) + if (!file.exists(expr_counts_path)) + .gstop(expr_counts_path, "does not exist!") + + ## 2. check spatial locations + spatial_dir <- paste0(path, "/", "spatial") + tissue_positions_path <- Sys.glob(paths = file.path(spatial_dir, + "tissue_positions*")) + + ## 3. check spatial image + if(is.null(png_name)) { + png_list <- list.files(spatial_dir, pattern = "*.png") + png_name <- png_list[1] + } + png_path <- paste0(spatial_dir,"/",png_name) + if(!file.exists(png_path)) .gstop(png_path, " does not exist!") + + ## 4. check scalefactors + scalefactors_path <- paste0(spatial_dir, "/", "scalefactors_json.json") + if (!file.exists(scalefactors_path)) + .gstop(scalefactors_path, "does not exist!") + + + list( + expr_counts_path = expr_counts_path, + gene_column_index = gene_column_index, + tissue_positions_path = tissue_positions_path, + image_path = png_path, + scale_json_path = scalefactors_path + ) +} + +.visiumHD_matrix <- function(path = path, + gene_column_index = 2, + remove_zero_rows = TRUE, + split_by_type = TRUE, + verbose = TRUE) { # check if path is provided if (missing(path)) { @@ -591,11 +651,11 @@ setMethod("$<-", signature("VisiumHDReader"), function(x, name, value) { -.visiumHD_expression = function(path, - gene_column_index = 2, - remove_zero_rows = TRUE, - split_by_type = TRUE, - verbose = TRUE) { +.visiumHD_expression <- function(path, + gene_column_index = 2, + remove_zero_rows = TRUE, + split_by_type = TRUE, + verbose = TRUE) { # check if path is provided if (missing(path)) { @@ -617,67 +677,19 @@ setMethod("$<-", signature("VisiumHDReader"), function(x, name, value) { split_by_type = split_by_type) - exprObj = createExprObj(expression_data = matrix_results, - spat_unit = "pixel", - feat_type = 'rna', - name = "raw", - provenance = "pixel") + exprObj <- createExprObj(expression_data = matrix_results, + spat_unit = "pixel", + feat_type = "rna", + name = "raw", + provenance = "pixel") - return(list('rna' = exprObj)) + return(list("rna" = exprObj)) } -.visiumHD_read_folder <- function( - path, - expr_data = c("raw", "filter"), - gene_column_index = 1, - png_name = NULL, - verbose = NULL) { - vmsg(.v = verbose, "A structured visium directory will be used") - if (is.null(path)) - .gstop("path needs to be a path to a visium directory") - path <- path.expand(path) - path <- dirname(path) - if (!dir.exists(path)) .gstop(path, " does not exist!") - expr_data <- match.arg(expr_data, choices = c("raw", "filter")) - - ## 1. check expression - expr_counts_path <- switch( - expr_data, - "raw" = paste0(path, '/', 'raw_feature_bc_matrix/'), - "filter" = paste0(path, '/', 'filtered_feature_bc_matrix/') - ) - if (!file.exists(expr_counts_path)) .gstop(expr_counts_path, "does not exist!") - - ## 2. check spatial locations - spatial_dir <- paste0(path, "/", "spatial/") - tissue_positions_path = Sys.glob(paths = file.path(spatial_dir, 'tissue_positions*')) - - ## 3. check spatial image - if(is.null(png_name)) { - png_list = list.files(spatial_dir, pattern = "*.png") - png_name = png_list[1] - } - png_path = paste0(spatial_dir,'/',png_name) - if(!file.exists(png_path)) .gstop(png_path, ' does not exist!') - - ## 4. check scalefactors - scalefactors_path <- paste0(spatial_dir, "/", "scalefactors_json.json") - if (!file.exists(scalefactors_path)) - .gstop(scalefactors_path, "does not exist!") - - - list( - expr_counts_path = expr_counts_path, - gene_column_index = gene_column_index, - tissue_positions_path = tissue_positions_path, - image_path = png_path, - scale_json_path = scalefactors_path - ) -} -.visiumHD_tissue_positions = function(path, - verbose = TRUE) { +.visiumHD_tissue_positions <- function(path, + verbose = TRUE) { # check if path is provided if (missing(path)) { @@ -693,11 +705,13 @@ setMethod("$<-", signature("VisiumHDReader"), function(x, name, value) { vmsg(.v = verbose, .is_debug = TRUE, path) # check existence and access rights of files - tissue_positions_path = file.path(path, 'tissue_positions.parquet') + tissue_positions_path <- file.path(path, "tissue_positions.parquet") checkmate::assert_file_exists(tissue_positions_path) # read with parquet and data.table - tissue_positions = data.table::as.data.table(x = arrow::read_parquet(tissue_positions_path)) + # tissue_positions = data.table::as.data.table(x = arrow::read_parquet( + # tissue_positions_path)) + tissue_positions <- arrow::read_parquet(tissue_positions_path) return(tissue_positions) @@ -743,10 +757,10 @@ setMethod("$<-", signature("VisiumHDReader"), function(x, name, value) { vmsg(.v = verbose, .is_debug = TRUE, path) # check existence and access rights of files - json_path = file.path(path, 'scalefactors_json.json') + json_path <- file.path(path, "scalefactors_json.json") checkmate::assert_file_exists(json_path) - json_scalefactors <- jsonlite::read_json(json_path) + json_scalefactors <- read_json(json_path) expected_json_names <- c( "regist_target_img_scalef", # NEW as of 2023 @@ -756,7 +770,8 @@ setMethod("$<-", signature("VisiumHDReader"), function(x, name, value) { "tissue_lowres_scalef" ) new_format_2023 <- .check_new_format(json_scalefactors) - expected_json_names <- .adjust_expected_names(new_format_2023, expected_json_names) + expected_json_names <- .adjust_expected_names(new_format_2023, + expected_json_names) .validate_json_names(json_scalefactors, expected_json_names) return(json_scalefactors) @@ -784,12 +799,14 @@ setMethod("$<-", signature("VisiumHDReader"), function(x, name, value) { return(img_type) } } - stop("image_path filename did not match either 'lowres' or 'hires'. Ensure the image is named accordingly.") + stop("image_path filename did not match either 'lowres' or 'hires'. + Ensure the image is named accordingly.") } .get_scale_factor <- function(visiumHD_img_type, json_info) { if (is.null(json_info)) { - warning("No scalefactors json info provided. VisiumHD image scale_factor defaulting to 1.") + warning("No scalefactors json info provided. VisiumHD image + scale_factor defaulting to 1.") return(1) } @@ -801,7 +818,8 @@ setMethod("$<-", signature("VisiumHDReader"), function(x, name, value) { stop("Unexpected image type: ", visiumHD_img_type)) if (is.null(scale_factor)) { - stop("Scale factor for ", visiumHD_img_type, " image not found in json_info.") + stop("Scale factor for ", visiumHD_img_type, + " image not found in json_info.") } return(scale_factor) @@ -830,7 +848,7 @@ setMethod("$<-", signature("VisiumHDReader"), function(x, name, value) { # 1. determine image scalefactor to use ---------------------------------- # image_path <- .visiumHD_read_folder(path = image_path) - image_path <- image_path[[4]] + image_path <- image_path[[4]] json_info <- .visiumHD_read_scalefactors(dirname(image_path)) if (!is.null(json_info)) checkmate::assert_list(json_info) png_name <- basename(image_path) # used for name pattern matching only @@ -862,38 +880,49 @@ setMethod("$<-", signature("VisiumHDReader"), function(x, name, value) { return(visiumHD_img_list) } -.visiumHD_poly = function(path, - gpoints, - tissue_positions_path, - shape = 'hexagon', - shape_size = 400, - name = 'hex400', - verbose = TRUE){ - - transcripts <- .visiumHD_transcript(expr_path = path, - gene_column_index = 2, - remove_zero_rows = TRUE, - split_by_type = TRUE, - tissue_positions_path = tissue_positions_path, - barcodes = NULL, - array_subset_row =c(500, 1000), - array_subset_col = c(500, 1000), - pxl_subset_row = NULL, - pxl_subset_col = NULL, - verbose = TRUE) - - gpoints = transcripts[[3]] - original_feat_ext = ext(gpoints$rna@spatVector) - polygons = tessellate(extent = original_feat_ext, - shape = shape, - shape_size = shape_size, - name = name) + +.visiumHD_poly <- function(path, + shape = "hexagon", + shape_size = 400, + name = "hex400", + verbose = TRUE){ + + if (!shape %in% c("hexagon", "square", "circle")) { + stop("Invalid shape. Please choose either + 'hexagon', 'square', or 'circle'.") + } + + if (shape_size <= 0) { + stop("Size must be a positive number.") + } + + tp <- arrow::read_parquet( + file = .visiumHD_read_folder( + path, verbose = FALSE)[3]$tissue_positions_path, + as_data_frame = FALSE) + + + original_feat_ext <- tp %>% + dplyr::summarise( + xmin = min(pxl_row_in_fullres, na.rm = TRUE), + xmax = max(pxl_row_in_fullres, na.rm = TRUE), + ymin = min(pxl_col_in_fullres, na.rm = TRUE), + ymax = max(pxl_col_in_fullres, na.rm = TRUE)) %>% + dplyr::collect() %>% + { ext(.$xmin, .$xmax, .$ymin, .$ymax) } + + #ext(gpoints$rna@spatVector) + message(paste("Creating a", shape, "polygon with size", shape_size)) + polygons <- tessellate(extent = original_feat_ext, + shape = shape, + shape_size = shape_size, + name = name) return(polygons) } -.visiumHD_meta = function( +.visiumHD_meta <- function( path, verbose = TRUE) { @@ -911,15 +940,16 @@ setMethod("$<-", signature("VisiumHDReader"), function(x, name, value) { vmsg(.v = verbose, .is_debug = TRUE, path) # check existence and access rights of files - tissue_positions_path = file.path(path, 'tissue_positions.parquet') + tissue_positions_path <- file.path(path, "tissue_positions.parquet") checkmate::assert_file_exists(tissue_positions_path) # read with parquet and data.table - tissue_positions = data.table::as.data.table(x = arrow::read_parquet(tissue_positions_path)) + tissue_positions <- data.table::as.data.table( + x = arrow::read_parquet(tissue_positions_path)) vmsg(.v = verbose, "creating metadata ...") - data.table::setnames(tissue_positions, 'barcode', 'cell_ID') + data.table::setnames(tissue_positions, "barcode", "cell_ID") cx <- createCellMetaObj( metadata = tissue_positions, @@ -934,21 +964,21 @@ setMethod("$<-", signature("VisiumHDReader"), function(x, name, value) { -.visiumHD_transcript = function(expr_path, - gene_column_index = 2, - remove_zero_rows = TRUE, - split_by_type = TRUE, - tissue_positions_path, - barcodes = NULL, - array_subset_row = c(500, 1000), - array_subset_col = c(500, 1000), - pxl_subset_row = NULL, - pxl_subset_col = NULL, - verbose = TRUE) { +.visiumHD_transcript <- function(expr_path, + gene_column_index = 2, + remove_zero_rows = TRUE, + split_by_type = TRUE, + tissue_positions_path, + barcodes = NULL, + array_subset_row = c(500, 1000), + array_subset_col = c(500, 1000), + pxl_subset_row = NULL, + pxl_subset_col = NULL, + verbose = TRUE) { # function to create expression matrix - matrix = .visiumHD_matrix( + matrix <- .visiumHD_matrix( path = expr_path, gene_column_index = gene_column_index, remove_zero_rows = remove_zero_rows, @@ -958,56 +988,69 @@ setMethod("$<-", signature("VisiumHDReader"), function(x, name, value) { # function to create tissue position data.table - tissue_positions = .visiumHD_tissue_positions( + tissue_positions <- data.table::as.data.table(.visiumHD_tissue_positions( path = tissue_positions_path, verbose = verbose - ) + )) - vmsg(.v = verbose, "creating visiumHD tissue position x expression data file ...") + vmsg(.v = verbose, + "creating visiumHD tissue position x expression data file ...") # subset data if(!is.null(barcodes)) { vmsg(.v = verbose, "subsetting visiumHD on barcodes") - tissue_positions = tissue_positions[barcode %in% barcodes] + tissue_positions <- tissue_positions[barcode %in% barcodes] } if(!is.null(array_subset_row)) { if(is.vector(array_subset_row) & length(array_subset_row) == 2) { vmsg(.v = verbose, "subsetting visiumHD on array rows") - tissue_positions = tissue_positions[array_row > array_subset_row[1] & array_row < array_subset_row[2]] + tissue_positions <- tissue_positions[ + array_row > array_subset_row[1] & + array_row < array_subset_row[2]] } else { - stop('array_subset_row was provided but is not a vector with length 2') + stop("array_subset_row was provided but is not a vector with + length 2") } } if(!is.null(array_subset_col)) { if(is.vector(array_subset_col) & length(array_subset_col) == 2) { vmsg(.v = verbose, "subsetting visiumHD on array columns") - tissue_positions = tissue_positions[array_col > array_subset_col[1] & array_col < array_subset_col[2]] + tissue_positions <- tissue_positions[ + array_col > array_subset_col[1] & + array_col < array_subset_col[2]] } else { - stop('array_subset_col was provided but is not a vector with length 2') + stop("array_subset_col was provided but is not a vector with + length 2") } } if(!is.null(pxl_subset_row)) { if(is.vector(pxl_subset_row) & length(pxl_subset_row) == 2) { vmsg(.v = verbose, "subsetting visiumHD on row pixels") - tissue_positions = tissue_positions[pxl_row_in_fullres > pxl_subset_row[1] & pxl_row_in_fullres < pxl_subset_row[2]] + tissue_positions <- tissue_positions[ + pxl_row_in_fullres > pxl_subset_row[1] & + pxl_row_in_fullres < pxl_subset_row[2]] } else { - cat('pxl_subset_row is ', pxl_subset_row) - stop('pxl_subset_row was provided but is not a vector with length 2') + cat("pxl_subset_row is ", pxl_subset_row) + stop("pxl_subset_row was provided but is not a vector with + length 2") } } if(!is.null(pxl_subset_col)) { if(is.vector(pxl_subset_col) & length(pxl_subset_col) == 2) { vmsg(.v = verbose, "subsetting visiumHD on column pixels") - tissue_positions = tissue_positions[pxl_col_in_fullres > pxl_subset_col[1] & pxl_col_in_fullres < pxl_subset_col[2]] + tissue_positions <- tissue_positions[ + pxl_col_in_fullres > pxl_subset_col[1] & + pxl_col_in_fullres < pxl_subset_col[2]] } else { cat(pxl_subset_col) - stop('pxl_subset_col was provided but is not a vector with length 2') + stop("pxl_subset_col was provided but is not a vector with + length 2") } } @@ -1016,23 +1059,26 @@ setMethod("$<-", signature("VisiumHDReader"), function(x, name, value) { array_subset_row, array_subset_col, pxl_subset_row, pxl_subset_col)))) { vmsg(.v = verbose, "subsetting visiumHD on expression matrix") - matrix = matrix[, colnames(matrix) %in% tissue_positions$barcode] + matrix <- matrix[, colnames(matrix) %in% tissue_positions$barcode] } # convert expression matrix to minimal data.table object - matrix_tile_dt = data.table::as.data.table(Matrix::summary(matrix)) - genes = matrix@Dimnames[[1]] - samples = matrix@Dimnames[[2]] + matrix_tile_dt <- data.table::as.data.table(Matrix::summary(matrix)) + genes <- matrix@Dimnames[[1]] + samples <- matrix@Dimnames[[2]] matrix_tile_dt[, gene := genes[i]] matrix_tile_dt[, pixel := samples[j]] - # merge data.table matrix and spatial coordinates to create input for Giotto Polygons - gpoints = data.table::merge.data.table(matrix_tile_dt, tissue_positions, by.x = 'pixel', by.y = 'barcode') - gpoints = gpoints[,.(pixel, pxl_row_in_fullres, pxl_col_in_fullres, gene, x)] - colnames(gpoints) = c('pixel', 'x', 'y', 'gene', 'counts') + # merge data.table matrix and spatial coordinates to create input for + # Giotto Polygons + gpoints <- data.table::merge.data.table( + matrix_tile_dt, tissue_positions, by.x = "pixel", by.y = "barcode") + gpoints <- gpoints[ + ,.(pixel, pxl_row_in_fullres, pxl_col_in_fullres, gene, x)] + colnames(gpoints) <- c("pixel", "x", "y", "gene", "counts") - gpoints = createGiottoPoints(x = gpoints[,.(x, y, gene, pixel, counts)]) + gpoints <- createGiottoPoints(x = gpoints[,.(x, y, gene, pixel, counts)]) # ensure output is always a list if (!is.list(gpoints)) { @@ -1040,24 +1086,48 @@ setMethod("$<-", signature("VisiumHDReader"), function(x, name, value) { names(gpoints) <- objName(gpoints[[1L]]) } - return(list('matrix' = matrix, 'tissue_positions' = tissue_positions, 'gpoints' = gpoints)) + return(list("matrix" = matrix, + "tissue_positions" = tissue_positions, + "gpoints" = gpoints)) } - -createGiottoVisiumHDObject = function(visiumHD_dir = NULL, - expr_data = c('raw', 'filter'), - gene_column_index = 1, - instructions = NULL, - expression_matrix_class = c("dgCMatrix", "DelayedArray"), - cores = NA, - verbose = NULL){ +#' @title Create 10x VisiumHD Giotto Object +#' @name createGiottoVisiumHDObject +#' @description Given the path to a VisiumHD output folder, creates a +#' Giotto object +#' @param VisiumHD_dir full path to the exported visiumHD directory +#' @param expr_data raw or filtered data (see details) +#' @param gene_column_index which column index to select (see details) +#' @param expression_matrix_class class of expression matrix to use +#' (e.g. "dgCMatrix", "DelayedArray") +#' @param cores how many cores or threads to use to read data if paths are +#' provided +#' @param verbose be verbose +#' @inheritParams get10Xmatrix +#' @returns giotto object +#' @details +#' \itemize{ +#' \item{expr_data: raw will take expression data from raw_feature_bc_matrix +#' and filter from filtered_feature_bc_matrix} +#' \item{gene_column_index: which gene identifiers (names) to use if there +#' are multiple columns (e.g. ensemble and gene symbol)} +#' } +#' @export +createGiottoVisiumHDObject <- function( + visiumHD_dir = NULL, + expr_data = c("raw", "filter"), + gene_column_index = 1, + instructions = NULL, + expression_matrix_class = c("dgCMatrix", "DelayedArray"), + cores = NA, + verbose = FALSE){ # NSE vars - barcode = row_pxl = col_pxl = in_tissue = array_row = array_col = NULL + barcode <- row_pxl <- col_pxl <- in_tissue <- array_row <- array_col <- NULL # set number of cores automatically, but with limit of 10 - cores = determine_cores(cores) + cores <- determine_cores(cores) data.table::setDTthreads(threads = cores) readerHD <- importVisiumHD() @@ -1093,27 +1163,30 @@ createGiottoVisiumHDObject = function(visiumHD_dir = NULL, stop("readerHD is not provided") } - expr_counts_path = readerHD$read_folder()[[1]] + expr_counts_path <- readerHD$read_folder()[[1]] # 1. expression expr_results <- get10Xmatrix(path_to_data = expr_counts_path, gene_column_index = gene_column_index) # if expr_results is not a list, make it a list compatible with downstream - if (!is.list(expr_results)) expr_results = list("Gene Expression" = expr_results) + if (!is.list(expr_results)) + expr_results <- list("Gene Expression" = expr_results) # format expected data into list to be used with readExprData() - raw_matrix_list <- list("cell" = list("rna" = list("raw" = expr_results[["Gene Expression"]]))) + raw_matrix_list <- list("cell" = list("rna" = list( + "raw" = expr_results[["Gene Expression"]]))) # add protein expression data to list if it exists - if ('Antibody Capture' %in% names(expr_results)) { + if ("Antibody Capture" %in% names(expr_results)) { raw_matrix_list$cell$protein$raw <- expr_results[["Antibody Capture"]] } # 2. spatial locations - tissue_positions_path = readerHD$read_folder()[[2]] + tissue_positions_path <- readerHD$read_folder()[[2]] spatial_results <- readerHD$load_tissue_position() data.table::setnames(spatial_results, old = "barcode", new = "cell_ID") - spatial_locs <- spatial_results[,.(cell_ID, pxl_row_in_fullres,-pxl_col_in_fullres)] # flip x and y + spatial_locs <- spatial_results[ + ,.(cell_ID, pxl_row_in_fullres,-pxl_col_in_fullres)] # flip x and y colnames(spatial_locs) <- c("cell_ID", 'sdimx', 'sdimy') # 3. scalefactors (optional) @@ -1136,12 +1209,12 @@ createGiottoVisiumHDObject = function(visiumHD_dir = NULL, spatial_locs = spatial_locs, instructions = instructions, cell_metadata = meta_list, - largeImages = visium_png_list + images = visium_png_list ) # 7. polygon information - visium_polygons = readerHD$load_polygon() - giotto_object = setPolygonInfo( + visium_polygons <- readerHD$load_polygon() + giotto_object <- setPolygonInfo( gobject = giotto_object, x = visium_polygons, centroids_to_spatlocs = FALSE, diff --git a/R/convenience_xenium.R b/R/convenience_xenium.R index ef7f9d836..fa305be0b 100644 --- a/R/convenience_xenium.R +++ b/R/convenience_xenium.R @@ -1,4 +1,3 @@ - # modular reader functions layout # # # # - initialize method for reader object @@ -83,13 +82,11 @@ setMethod("print", signature("XeniumReader"), function(x, ...) show(x)) # * init #### setMethod( "initialize", signature("XeniumReader"), - function( - .Object, - xenium_dir, - filetype, - qv_cutoff, - micron - ) { + function(.Object, + xenium_dir, + filetype, + qv_cutoff, + micron) { obj <- callNextMethod(.Object) # provided params (if any) @@ -111,32 +108,50 @@ setMethod( # check filetype ftype_data <- c("transcripts", "boundaries", "expression", "cell_meta") if (!all(ftype_data %in% names(obj@filetype))) { - stop(wrap_txt("`$filetype` must have entries for each of:\n", - paste(ftype_data, collapse = ", "))) + stop(wrap_txt( + "`$filetype` must have entries for each of:\n", + paste(ftype_data, collapse = ", ") + )) } ftype <- obj@filetype ft_tab <- c("csv", "parquet") ft_exp <- c("h5", "mtx") # zarr not yet supported if (!ftype$transcripts %in% ft_tab) { - stop(wrap_txt("`$filetype$transcripts` must be one of", - paste(ft_tab, collapse = ", ")), - call. = FALSE) + stop( + wrap_txt( + "`$filetype$transcripts` must be one of", + paste(ft_tab, collapse = ", ") + ), + call. = FALSE + ) } if (!ftype$boundaries %in% ft_tab) { - stop(wrap_txt("`$filetype$boundaries` must be one of", - paste(ft_tab, collapse = ", ")), - call. = FALSE) + stop( + wrap_txt( + "`$filetype$boundaries` must be one of", + paste(ft_tab, collapse = ", ") + ), + call. = FALSE + ) } if (!ftype$cell_meta %in% ft_tab) { - stop(wrap_txt("`$filetype$cell_meta` must be one of", - paste(ft_tab, collapse = ", ")), - call. = FALSE) + stop( + wrap_txt( + "`$filetype$cell_meta` must be one of", + paste(ft_tab, collapse = ", ") + ), + call. = FALSE + ) } if (!ftype$expression %in% ft_exp) { - stop(wrap_txt("`$filetype$expression` must be one of", - paste(tf_exp, collapse = ", ")), - call. = FALSE) + stop( + wrap_txt( + "`$filetype$expression` must be one of", + paste(tf_exp, collapse = ", ") + ), + call. = FALSE + ) } @@ -183,7 +198,8 @@ setMethod( # for mtx, check if directory instead if (ftype$expression == "mtx") { is_dir <- vapply( - expr_path, checkmate::test_directory, FUN.VALUE = logical(1L) + expr_path, checkmate::test_directory, + FUN.VALUE = logical(1L) ) expr_path <- expr_path[is_dir] } else { @@ -193,8 +209,9 @@ setMethod( # decide micron scaling if (length(obj@micron) == 0) { # if no value already set if (!is.null(experiment_info_path)) { - obj@micron <- jsonlite::fromJSON( - experiment_info_path)$pixel_size + obj@micron <- fromJSON( + experiment_info_path + )$pixel_size } else { warning(wrap_txt("No .xenium file found. Guessing 0.2125 as micron scaling")) @@ -203,25 +220,23 @@ setMethod( } # transcripts load call - tx_fun <- function( - path = tx_path, - feat_type = c( - "rna", - "NegControlProbe", - "UnassignedCodeword", - "NegControlCodeword" - ), - split_keyword = list( - "NegControlProbe", - "UnassignedCodeword", - "NegControlCodeword" - ), - flip_vertical = TRUE, - dropcols = c(), - qv_threshold = obj@qv, - cores = determine_cores(), - verbose = NULL - ) { + tx_fun <- function(path = tx_path, + feat_type = c( + "rna", + "NegControlProbe", + "UnassignedCodeword", + "NegControlCodeword" + ), + split_keyword = list( + "NegControlProbe", + "UnassignedCodeword", + "NegControlCodeword" + ), + flip_vertical = TRUE, + dropcols = c(), + qv_threshold = obj@qv, + cores = determine_cores(), + verbose = NULL) { .xenium_transcript( path = path, feat_type = feat_type, @@ -236,14 +251,12 @@ setMethod( obj@calls$load_transcripts <- tx_fun # load polys call - poly_fun <- function( - path = cell_bound_path, - name = "cell", - flip_vertical = TRUE, - calc_centroids = TRUE, - cores = determine_cores(), - verbose = NULL - ) { + poly_fun <- function(path = cell_bound_path, + name = "cell", + flip_vertical = TRUE, + calc_centroids = TRUE, + cores = determine_cores(), + verbose = NULL) { .xenium_poly( path = path, name = name, @@ -256,12 +269,10 @@ setMethod( obj@calls$load_polys <- poly_fun # load cellmeta - cmeta_fun <- function( - path = cell_meta_path, - dropcols = c("x_centroid", "y_centroid"), - cores = determine_cores(), - verbose = NULL - ) { + cmeta_fun <- function(path = cell_meta_path, + dropcols = c("x_centroid", "y_centroid"), + cores = determine_cores(), + verbose = NULL) { .xenium_cellmeta( path = path, dropcols = dropcols, @@ -272,13 +283,11 @@ setMethod( obj@calls$load_cellmeta <- cmeta_fun # load featmeta - fmeta_fun <- function( - path = panel_meta_path, - gene_ids = "symbols", - dropcols = c(), - cores = determine_cores(), - verbose = NULL - ) { + fmeta_fun <- function(path = panel_meta_path, + gene_ids = "symbols", + dropcols = c(), + cores = determine_cores(), + verbose = NULL) { .xenium_featmeta( path = path, gene_ids = gene_ids, @@ -290,13 +299,11 @@ setMethod( obj@calls$load_featmeta <- fmeta_fun # load expression call - expr_fun <- function( - path = expr_path, - gene_ids = "symbols", - remove_zero_rows = TRUE, - split_by_type = TRUE, - verbose = NULL - ) { + expr_fun <- function(path = expr_path, + gene_ids = "symbols", + remove_zero_rows = TRUE, + split_by_type = TRUE, + verbose = NULL) { .xenium_expression( path = path, gene_ids = gene_ids, @@ -308,15 +315,13 @@ setMethod( obj@calls$load_expression <- expr_fun # load image call - img_fun <- function( - path, - name = "image", - micron = obj@micron, - negative_y = TRUE, - flip_vertical = FALSE, - flip_horizontal = FALSE, - verbose = NULL - ) { + img_fun <- function(path, + name = "image", + micron = obj@micron, + negative_y = TRUE, + flip_vertical = FALSE, + flip_horizontal = FALSE, + verbose = NULL) { .xenium_image( path = path, name = name, @@ -330,13 +335,11 @@ setMethod( obj@calls$load_image <- img_fun # load aligned image call - img_aff_fun <- function( - path, - imagealignment_path, - name = "aligned_image", - micron = obj@micron, - verbose = NULL - ) { + img_aff_fun <- function(path, + imagealignment_path, + name = "aligned_image", + micron = obj@micron, + verbose = NULL) { read10xAffineImage( file = path, imagealignment_path = imagealignment_path, @@ -349,32 +352,31 @@ setMethod( # create giotto object call - gobject_fun <- function( - transcript_path = tx_path, - load_bounds = list( - cell = "cell", - nucleus = "nucleus" - ), - gene_panel_json_path = panel_meta_path, - expression_path = expr_path, - metadata_path = cell_meta_path, - feat_type = c( - "rna", - "NegControlProbe", - "UnassignedCodeword", - "NegControlCodeword" - ), - split_keyword = list( - "NegControlProbe", - "UnassignedCodeword", - "NegControlCodeword" - ), - load_images = NULL, - load_aligned_images = NULL, - load_expression = FALSE, - load_cellmeta = FALSE, - verbose = NULL - ) { + gobject_fun <- function(transcript_path = tx_path, + load_bounds = list( + cell = "cell", + nucleus = "nucleus" + ), + gene_panel_json_path = panel_meta_path, + expression_path = expr_path, + metadata_path = cell_meta_path, + feat_type = c( + "rna", + "NegControlProbe", + "UnassignedCodeword", + "NegControlCodeword" + ), + split_keyword = list( + "NegControlProbe", + "UnassignedCodeword", + "NegControlCodeword" + ), + load_images = NULL, + load_aligned_images = NULL, + load_expression = FALSE, + load_cellmeta = FALSE, + instructions = NULL, + verbose = NULL) { load_expression <- as.logical(load_expression) load_cellmeta <- as.logical(load_cellmeta) @@ -393,7 +395,8 @@ setMethod( } if (any(lengths(load_aligned_images) != 2L) || any(!vapply(load_aligned_images, is.character, - FUN.VALUE = logical(1L)))) { + FUN.VALUE = logical(1L) + ))) { stop(wrap_txt( "'load_aligned_images' must be character with length 2: 1. image path @@ -413,7 +416,7 @@ setMethod( funs <- obj@calls # init gobject - g <- giotto() + g <- giotto(instructions = instructions) # transcripts @@ -503,8 +506,10 @@ setMethod( aimglist <- list() aimnames <- names(load_aligned_images) for (aim_i in seq_along(load_aligned_images)) { - vmsg(.v = verbose, sprintf("loading aligned image as '%s'", - aimnames[[aim_i]])) + vmsg(.v = verbose, sprintf( + "loading aligned image as '%s'", + aimnames[[aim_i]] + )) aim <- funs$load_aligned_image( path = load_aligned_images[[aim_i]][1], imagealignment_path = load_aligned_images[[aim_i]][2], @@ -532,7 +537,9 @@ setMethod( #' @export setMethod("$", signature("XeniumReader"), function(x, name) { basic_info <- c("xenium_dir", "filetype", "qv") - if (name %in% basic_info) return(methods::slot(x, name)) + if (name %in% basic_info) { + return(methods::slot(x, name)) + } return(x@calls[[name]]) }) @@ -545,8 +552,10 @@ setMethod("$<-", signature("XeniumReader"), function(x, name, value) { return(initialize(x)) } - stop(sprintf("Only items in '%s' can be set", - paste0(basic_info, collapse = "', '"))) + stop(sprintf( + "Only items in '%s' can be set", + paste0(basic_info, collapse = "', '") + )) }) #' @export @@ -578,9 +587,7 @@ setMethod("$<-", signature("XeniumReader"), function(x, name, value) { #' as a subcellular transcript detection (default = 20) #' @returns `XeniumReader` object #' @export -importXenium <- function( - xenium_dir = NULL, qv_threshold = 20 -) { +importXenium <- function(xenium_dir = NULL, qv_threshold = 20) { a <- list(Class = "XeniumReader") if (!is.null(xenium_dir)) { a$xenium_dir <- xenium_dir @@ -599,25 +606,23 @@ importXenium <- function( ## transcript #### -.xenium_transcript <- function( - path, - feat_type = c( - "rna", - "NegControlProbe", - "UnassignedCodeword", - "NegControlCodeword" - ), - split_keyword = list( - "NegControlProbe", - "UnassignedCodeword", - "NegControlCodeword" - ), - flip_vertical = TRUE, - dropcols = c(), - qv_threshold = 20, - cores = determine_cores(), - verbose = NULL -) { +.xenium_transcript <- function(path, + feat_type = c( + "rna", + "NegControlProbe", + "UnassignedCodeword", + "NegControlCodeword" + ), + split_keyword = list( + "NegControlProbe", + "UnassignedCodeword", + "NegControlCodeword" + ), + flip_vertical = TRUE, + dropcols = c(), + qv_threshold = 20, + cores = determine_cores(), + verbose = NULL) { if (missing(path)) { stop(wrap_txt( "No path to tx file provided or auto-detected" @@ -625,7 +630,9 @@ importXenium <- function( } checkmate::assert_file_exists(path) - e <- file_extension(path) %>% head(1L) %>% tolower() + e <- file_extension(path) %>% + head(1L) %>% + tolower() vmsg(.v = verbose, .is_debug = TRUE, "[TX_READ] FMT =", e) vmsg(.v = verbose, .is_debug = TRUE, path) @@ -641,9 +648,10 @@ importXenium <- function( # return as data.table with colnames `feat_ID`, `x`, `y` tx <- switch(e, "csv" = do.call(.xenium_transcript_csv, - args = c(a, list(cores = cores))), + args = c(a, list(cores = cores)) + ), "parquet" = do.call(.xenium_transcript_parquet, args = a), - "zarr" = stop('zarr not yet supported') + "zarr" = stop("zarr not yet supported") ) # flip values vertically @@ -665,29 +673,28 @@ importXenium <- function( } -.xenium_transcript_csv <- function( - path, - dropcols = c(), - qv_threshold = 20, - cores = determine_cores(), - verbose = NULL -) { +.xenium_transcript_csv <- function(path, + dropcols = c(), + qv_threshold = 20, + cores = determine_cores(), + verbose = NULL) { tx_dt <- data.table::fread( - path, nThread = cores, + path, + nThread = cores, colClasses = c(transcript_id = "character"), drop = dropcols ) data.table::setnames( x = tx_dt, - old = c('feature_name', 'x_location', 'y_location'), - new = c('feat_ID', 'x', 'y') + old = c("feature_name", "x_location", "y_location"), + new = c("feat_ID", "x", "y") ) # qv filtering if (!is.null(qv_threshold)) { - n_before <- tx_dt[,.N] + n_before <- tx_dt[, .N] tx_dt <- tx_dt[qv >= qv_threshold] - n_after <- tx_dt[,.N] + n_after <- tx_dt[, .N] vmsg( .v = verbose, @@ -703,12 +710,10 @@ importXenium <- function( return(tx_dt) } -.xenium_transcript_parquet <- function( - path, - dropcols = c(), - qv_threshold = 20, - verbose = NULL -) { +.xenium_transcript_parquet <- function(path, + dropcols = c(), + qv_threshold = 20, + verbose = NULL) { package_check("dplyr") package_check("arrow", custom_msg = sprintf( "package 'arrow' is not yet installed\n\n To install:\n%s\n%s%s", @@ -727,7 +732,9 @@ importXenium <- function( # qv filtering if (!is.null(qv_threshold)) { .nr <- function(x) { - dplyr::tally(x) %>% dplyr::collect() %>% as.numeric() + dplyr::tally(x) %>% + dplyr::collect() %>% + as.numeric() } n_before <- .nr(tx_arrow) tx_arrow <- dplyr::filter(tx_arrow, qv > qv_threshold) @@ -743,8 +750,8 @@ importXenium <- function( tx_dt <- as.data.frame(tx_arrow) %>% data.table::setDT() data.table::setnames( x = tx_dt, - old = c('feature_name', 'x_location', 'y_location'), - new = c('feat_ID', 'x', 'y') + old = c("feature_name", "x_location", "y_location"), + new = c("feat_ID", "x", "y") ) return(tx_dt) } @@ -752,18 +759,18 @@ importXenium <- function( ## polygon #### -.xenium_poly <- function( - path, - name = "cell", - flip_vertical = TRUE, - calc_centroids = TRUE, - cores = determine_cores(), - verbose = NULL -) { +.xenium_poly <- function(path, + name = "cell", + flip_vertical = TRUE, + calc_centroids = TRUE, + cores = determine_cores(), + verbose = NULL) { checkmate::assert_file_exists(path) checkmate::assert_character(name, len = 1L) - e <- file_extension(path) %>% head(1L) %>% tolower() + e <- file_extension(path) %>% + head(1L) %>% + tolower() a <- list(path = path) vmsg(sprintf("Loading boundary info '%s'", name), .v = verbose) @@ -793,7 +800,8 @@ importXenium <- function( .xenium_poly_csv <- function(path, cores = determine_cores()) { data.table::fread( - path, nThread = cores, + path, + nThread = cores, colClasses = c(cell_id = "character") ) } @@ -813,12 +821,10 @@ importXenium <- function( ## cellmeta #### -.xenium_cellmeta <- function( - path, - dropcols = c(), - cores = determine_cores(), - verbose = NULL -) { +.xenium_cellmeta <- function(path, + dropcols = c(), + cores = determine_cores(), + verbose = NULL) { if (missing(path)) { stop(wrap_txt( "No path to metadata file provided or auto-detected" @@ -826,16 +832,18 @@ importXenium <- function( } checkmate::assert_file_exists(path) - e <- file_extension(path) %>% head(1L) %>% tolower() + e <- file_extension(path) %>% + head(1L) %>% + tolower() a <- list(path = path, dropcols = dropcols) - vmsg('Loading 10X cell metadata...', .v = verbose) + vmsg("Loading 10X cell metadata...", .v = verbose) vmsg(.v = verbose, .is_debug = TRUE, "[CMETA_READ] FMT =", e) vmsg(.v = verbose, .is_debug = TRUE, path) verbose <- verbose %null% TRUE cx <- switch(e, "csv" = do.call( - .xenium_cellmeta_csv, - args = c(a, list(cores = cores)) + .xenium_cellmeta_csv, + args = c(a, list(cores = cores)) ), "parquet" = do.call(.xenium_cellmeta_parquet, args = a) ) @@ -851,9 +859,8 @@ importXenium <- function( return(cx) } -.xenium_cellmeta_csv <- function( - path, dropcols = c(), cores = determine_cores() -) { +.xenium_cellmeta_csv <- function(path, dropcols = c(), + cores = determine_cores()) { data.table::fread(path, nThread = cores, drop = dropcols) } @@ -867,13 +874,11 @@ importXenium <- function( ## featmeta #### -.xenium_featmeta <- function( - path, - gene_ids = "symbols", - dropcols = c(), - cores = determine_cores(), - verbose = NULL -) { +.xenium_featmeta <- function(path, + gene_ids = "symbols", + dropcols = c(), + cores = determine_cores(), + verbose = NULL) { if (missing(path)) { stop(wrap_txt( "No path to panel metadata file provided or auto-detected" @@ -890,7 +895,7 @@ importXenium <- function( ) } else { feat_meta <- data.table::fread(path, nThread = cores) - colnames(feat_meta)[[1]] <- 'feat_ID' + colnames(feat_meta)[[1]] <- "feat_ID" } dropcols <- dropcols[dropcols %in% colnames(feat_meta)] @@ -913,7 +918,7 @@ importXenium <- function( gene_ids <- match.arg(gene_ids, c("symbols", "ensembl")) # tested on v1.6 - j <- jsonlite::fromJSON(path) + j <- fromJSON(path) # j$metadata # dataset meta # j$payload # main content # j$payload$chemistry # panel chemistry used @@ -926,16 +931,16 @@ importXenium <- function( data.table::as.data.table() switch(gene_ids, - "symbols" = data.table::setnames( - panel_info, - old = c("data.id", "data.name", "descriptor"), - new = c("ensembl", "feat_ID", "type") - ), - "ensembl" = data.table::setnames( - panel_info, - old = c("data.id", "data.name", "descriptor"), - new = c("feat_ID", "symbol", "type") - ) + "symbols" = data.table::setnames( + panel_info, + old = c("data.id", "data.name", "descriptor"), + new = c("ensembl", "feat_ID", "type") + ), + "ensembl" = data.table::setnames( + panel_info, + old = c("data.id", "data.name", "descriptor"), + new = c("feat_ID", "symbol", "type") + ) ) return(panel_info) } @@ -944,16 +949,15 @@ importXenium <- function( ## expression #### -.xenium_expression <- function( - path, - gene_ids = "symbols", - remove_zero_rows = TRUE, - split_by_type = TRUE, - verbose = NULL -) { +.xenium_expression <- function(path, + gene_ids = "symbols", + remove_zero_rows = TRUE, + split_by_type = TRUE, + verbose = NULL) { if (missing(path)) { stop(wrap_txt( - "No path to expression dir (mtx) or file (h5) provided or auto-detected" + "No path to expression dir (mtx) or file (h5) provided or + auto-detected" ), call. = FALSE) } if (!file.exists(path)) stop("filepath or directory does not exist.\n") @@ -969,7 +973,9 @@ importXenium <- function( # zarr can also be unzipped into a dir, but zarr implementation with # 32bit UINT support is not available in R yet (needed for cell_IDs). } else { - e <- file_extension(path) %>% head(1L) %>% tolower() + e <- file_extension(path) %>% + head(1L) %>% + tolower() } vmsg("Loading 10x pre-aggregated expression...", .v = verbose) @@ -1003,12 +1009,10 @@ importXenium <- function( return(eo_list) } -.xenium_expression_h5 <- function( - path, - gene_ids = "symbols", - remove_zero_rows = TRUE, - split_by_type = TRUE -) { +.xenium_expression_h5 <- function(path, + gene_ids = "symbols", + remove_zero_rows = TRUE, + split_by_type = TRUE) { get10Xmatrix_h5( path_to_data = path, gene_ids = gene_ids, @@ -1017,15 +1021,13 @@ importXenium <- function( ) } -.xenium_expression_mtx <- function( - path, - gene_ids = "symbols", - remove_zero_rows = TRUE, - split_by_type = TRUE -) { +.xenium_expression_mtx <- function(path, + gene_ids = "symbols", + remove_zero_rows = TRUE, + split_by_type = TRUE) { gene_ids <- switch(gene_ids, - "ensembl" = 1, - "symbols" = 2 + "ensembl" = 1, + "symbols" = 2 ) get10Xmatrix( path_to_data = path, @@ -1039,17 +1041,15 @@ importXenium <- function( ## image #### -.xenium_image <- function( - path, - name, - # output_dir, - micron, - negative_y = TRUE, - flip_vertical = FALSE, - flip_horizontal = FALSE, - verbose = NULL, - ... -) { +.xenium_image <- function(path, + name, + # output_dir, + micron, + negative_y = TRUE, + flip_vertical = FALSE, + flip_horizontal = FALSE, + verbose = NULL, + ...) { if (missing(path)) { stop(wrap_txt( "No path to image file provided or auto-detected" @@ -1101,8 +1101,8 @@ importXenium <- function( # micron checkmate::assert_numeric(micron) - progressr::with_progress({ - p <- progressr::progressor(along = path) + with_pbar({ + p <- pbar(along = path) gimg_list <- lapply(seq_along(path), function(img_i) { gimg <- .xenium_image_single( @@ -1121,21 +1121,21 @@ importXenium <- function( return(gimg_list) } -.xenium_image_single <- function( - path, - name = "image", - micron, - negative_y = TRUE, - flip_vertical = FALSE, - flip_horizontal = FALSE, - verbose = NULL -) { +.xenium_image_single <- function(path, + name = "image", + micron, + negative_y = TRUE, + flip_vertical = FALSE, + flip_horizontal = FALSE, + verbose = NULL) { vmsg(.v = verbose, sprintf("loading image as '%s'", name)) vmsg(.v = verbose, .is_debug = TRUE, path) vmsg( .v = verbose, .is_debug = TRUE, - sprintf("negative_y: %s\nflip_vertical: %s\nflip_horizontal: %s", - negative_y, flip_vertical, flip_horizontal), + sprintf( + "negative_y: %s\nflip_vertical: %s\nflip_horizontal: %s", + negative_y, flip_vertical, flip_horizontal + ), .prefix = "" ) @@ -1145,8 +1145,8 @@ importXenium <- function( if (file_extension(path) %in% "ome") { warning(wrap_txt( ".ome.tif images not fully supported. - If reading fails, try converting to a basic tif `ometif_to_tif()`") - ) + If reading fails, try converting to a basic tif `ometif_to_tif()`" + )) } img <- createGiottoLargeImage(path, @@ -1215,8 +1215,10 @@ importXenium <- function( #' slower in our imaging pipeline. #' @param load_expression logical. Default = FALSE. Whether to load in 10X #' provided expression matrix. -#' @param load_cellmeta logical. Default = FALSE. Whether to laod in 10X +#' @param load_cellmeta logical. Default = FALSE. Whether to load in 10X #' provided cell metadata information +#' @param instructions list of instructions or output result from +#' [createGiottoInstructions()] #' @param verbose logical or NULL. NULL uses the `giotto.verbose` option #' setting and defaults to TRUE. #' @returns `giotto` object @@ -1256,47 +1258,49 @@ importXenium <- function( #' #' @md #' @export -createGiottoXeniumObject <- function( - xenium_dir, - transcript_path = NULL, # optional - bounds_path = list( # looks for parquets by default - cell = "cell", - nucleus = "nucleus" - ), - gene_panel_json_path = NULL, # optional - expression_path = NULL, # optional - cell_metadata_path = NULL, # optional - feat_type = c( - "rna", - "NegControlProbe", - "UnassignedCodeword", - "NegControlCodeword" - ), - split_keyword = list( - "NegControlProbe", - "UnassignedCodeword", - "NegControlCodeword" - ), - qv_threshold = 20, - load_images = NULL, - load_aligned_images = NULL, - load_expression = FALSE, - load_cellmeta = FALSE, - verbose = NULL -) { +createGiottoXeniumObject <- function(xenium_dir, + transcript_path = NULL, # optional + bounds_path = list( # looks for parquets by default + cell = "cell", + nucleus = "nucleus" + ), + gene_panel_json_path = NULL, # optional + expression_path = NULL, # optional + cell_metadata_path = NULL, # optional + feat_type = c( + "rna", + "NegControlProbe", + "UnassignedCodeword", + "NegControlCodeword" + ), + split_keyword = list( + "NegControlProbe", + "UnassignedCodeword", + "NegControlCodeword" + ), + qv_threshold = 20, + load_images = NULL, + load_aligned_images = NULL, + load_expression = FALSE, + load_cellmeta = FALSE, + instructions = NULL, + verbose = NULL) { x <- importXenium(xenium_dir) # apply reader params x$qv <- qv_threshold # directly passed - a <- list(load_bounds = bounds_path, - feat_type = feat_type, - split_keyword = split_keyword, - load_images = load_images, - load_aligned_images = load_aligned_images, - load_expression = load_expression, - load_cellmeta = load_cellmeta, - verbose = verbose) + a <- list( + load_bounds = bounds_path, + feat_type = feat_type, + split_keyword = split_keyword, + load_images = load_images, + load_aligned_images = load_aligned_images, + load_expression = load_expression, + load_cellmeta = load_cellmeta, + instructions = instructions, + verbose = verbose + ) # only passed if not null if (!is.null(transcript_path)) a$transcript_path <- transcript_path @@ -1312,7 +1316,7 @@ createGiottoXeniumObject <- function( -#' + #' #' @title Create 10x Xenium Giotto Object #' #' @name createGiottoXeniumObject #' #' @description Given the path to a Xenium experiment output folder, creates a diff --git a/R/cross_section.R b/R/cross_section.R index ff93ccaf2..1887bbc3a 100644 --- a/R/cross_section.R +++ b/R/cross_section.R @@ -31,21 +31,20 @@ #' @param cell_subset_projection_coords 2D PCA coordinates of selected cells #' in the cross section plane #' @returns crossSection object -create_crossSection_object <- function( - name = NULL, - method = NULL, - thickness_unit = NULL, - slice_thickness = NULL, - cell_distance_estimate_method = NULL, - extend_ratio = NULL, - plane_equation = NULL, - mesh_grid_n = NULL, - mesh_obj = NULL, - cell_subset = NULL, - cell_subset_spatial_locations = NULL, - cell_subset_projection_locations = NULL, - cell_subset_projection_PCA = NULL, - cell_subset_projection_coords = NULL) { +create_crossSection_object <- function(name = NULL, + method = NULL, + thickness_unit = NULL, + slice_thickness = NULL, + cell_distance_estimate_method = NULL, + extend_ratio = NULL, + plane_equation = NULL, + mesh_grid_n = NULL, + mesh_obj = NULL, + cell_subset = NULL, + cell_subset_spatial_locations = NULL, + cell_subset_projection_locations = NULL, + cell_subset_projection_PCA = NULL, + cell_subset_projection_coords = NULL) { crossSection_obj <- list( "method" = method, "thickness_unit" = thickness_unit, @@ -70,11 +69,10 @@ create_crossSection_object <- function( #' @param spatial_network_name spatial_network_name #' @returns crossSectionObjects #' @keywords internal -read_crossSection <- function( - gobject, - spat_unit = NULL, - name = NULL, - spatial_network_name = NULL) { +read_crossSection <- function(gobject, + spat_unit = NULL, + name = NULL, + spatial_network_name = NULL) { spat_unit <- set_default_spat_unit( gobject = gobject, spat_unit = spat_unit ) @@ -131,11 +129,10 @@ read_crossSection <- function( #' @param method method #' @returns matrix #' @keywords internal -estimateCellCellDistance <- function( - gobject, - spat_unit = NULL, - spatial_network_name = "Delaunay_network", - method = c("mean", "median")) { +estimateCellCellDistance <- function(gobject, + spat_unit = NULL, + spatial_network_name = "Delaunay_network", + method = c("mean", "median")) { spat_unit <- set_default_spat_unit( gobject = gobject, spat_unit = spat_unit ) @@ -165,14 +162,13 @@ estimateCellCellDistance <- function( #' @param plane_equation plane_equation #' @returns numeric #' @keywords internal -get_sectionThickness <- function( - gobject, - spat_unit = NULL, - thickness_unit = c("cell", "natural"), - slice_thickness = 2, - spatial_network_name = "Delaunay_network", - cell_distance_estimate_method = c("mean", "median"), - plane_equation = NULL) { +get_sectionThickness <- function(gobject, + spat_unit = NULL, + thickness_unit = c("cell", "natural"), + slice_thickness = 2, + spatial_network_name = "Delaunay_network", + cell_distance_estimate_method = c("mean", "median"), + plane_equation = NULL) { thickness_unit <- match.arg(thickness_unit, c("cell", "natural")) section_thickness <- switch(thickness_unit, @@ -229,10 +225,9 @@ projection_fun <- function(point_to_project, plane_point, plane_norm) { #' @param mesh_obj mesh_obj #' @returns numeric #' @keywords internal -adapt_aspect_ratio <- function( - current_ratio, cell_locations, - sdimx = NULL, sdimy = NULL, sdimz = NULL, - mesh_obj = NULL) { +adapt_aspect_ratio <- function(current_ratio, cell_locations, + sdimx = NULL, sdimy = NULL, sdimz = NULL, + mesh_obj = NULL) { x_range <- max(cell_locations[[sdimx]]) - min(cell_locations[[sdimx]]) y_range <- max(cell_locations[[sdimy]]) - min(cell_locations[[sdimy]]) z_range <- max(cell_locations[[sdimz]]) - min(cell_locations[[sdimz]]) @@ -320,7 +315,8 @@ find_x_y_ranges <- function(data, extend_ratio) { #' @param mesh_grid_n mesh_grid_n #' @returns 2d mesh grid line object #' @keywords internal -create_2d_mesh_grid_line_obj <- function(x_min, x_max, y_min, y_max, mesh_grid_n) { +create_2d_mesh_grid_line_obj <- function(x_min, x_max, y_min, + y_max, mesh_grid_n) { x_grid <- seq(x_min, x_max, length.out = mesh_grid_n) y_grid <- seq(y_min, y_max, length.out = mesh_grid_n) @@ -418,7 +414,8 @@ reshape_to_mesh_grid_obj <- function(data_points, mesh_grid_n) { #' @param mesh_grid_n mesh_grid_n #' @returns 3d mesh #' @keywords internal -transform_2d_mesh_to_3d_mesh <- function(mesh_line_obj_2d, pca_out, center_vec, mesh_grid_n) { +transform_2d_mesh_to_3d_mesh <- function(mesh_line_obj_2d, pca_out, + center_vec, mesh_grid_n) { data_point_2d <- reshape_to_data_point(mesh_line_obj_2d) center_mat <- matrix( rep(center_vec, dim(data_point_2d)[1]), @@ -461,7 +458,8 @@ get_cross_section_coordinates <- function(cell_subset_projection_locations) { #' @param mesh_grid_n mesh_grid_n #' @returns mesh grid lines #' @keywords internal -create_mesh_grid_lines <- function(cell_subset_projection_locations, extend_ratio, mesh_grid_n) { +create_mesh_grid_lines <- function(cell_subset_projection_locations, + extend_ratio, mesh_grid_n) { cell_subset_projection_PCA <- stats::prcomp( cell_subset_projection_locations ) @@ -559,27 +557,26 @@ create_mesh_grid_lines <- function(cell_subset_projection_locations, extend_rati #' #' crossSectionPlot(g, name = "new_cs") #' @export -createCrossSection <- function( - gobject, - spat_unit = NULL, - spat_loc_name = "raw", - name = "cross_section", - spatial_network_name = "Delaunay_network", - thickness_unit = c("cell", "natural"), - slice_thickness = 2, - cell_distance_estimate_method = "mean", - extend_ratio = 0.2, - method = c( - "equation", "3 points", "point and norm vector", - "point and two plane vectors" - ), - equation = NULL, - point1 = NULL, point2 = NULL, point3 = NULL, - normVector = NULL, - planeVector1 = NULL, planeVector2 = NULL, - mesh_grid_n = 20, - return_gobject = TRUE, - verbose = NULL) { +createCrossSection <- function(gobject, + spat_unit = NULL, + spat_loc_name = "raw", + name = "cross_section", + spatial_network_name = "Delaunay_network", + thickness_unit = c("cell", "natural"), + slice_thickness = 2, + cell_distance_estimate_method = "mean", + extend_ratio = 0.2, + method = c( + "equation", "3 points", "point and norm vector", + "point and two plane vectors" + ), + equation = NULL, + point1 = NULL, point2 = NULL, point3 = NULL, + normVector = NULL, + planeVector1 = NULL, planeVector2 = NULL, + mesh_grid_n = 20, + return_gobject = TRUE, + verbose = NULL) { spat_unit <- set_default_spat_unit( gobject = gobject, spat_unit = spat_unit ) @@ -781,15 +778,16 @@ createCrossSection <- function( #' @md #' @seealso [GiottoVisuals::spatGenePlot3D] and [GiottoVisuals::spatFeatPlot2D] #' @export -crossSectionFeatPlot <- function(gobject = NULL, - spat_unit = NULL, - feat_type = NULL, - spat_loc_name = "raw", - crossSection_obj = NULL, - name = NULL, - spatial_network_name = "Delaunay_network", - default_save_name = "crossSectionGenePlot", - ...) { +crossSectionFeatPlot <- function( + gobject = NULL, + spat_unit = NULL, + feat_type = NULL, + spat_loc_name = "raw", + crossSection_obj = NULL, + name = NULL, + spatial_network_name = "Delaunay_network", + default_save_name = "crossSectionGenePlot", + ...) { spat_unit <- set_default_spat_unit( gobject = gobject, spat_unit = spat_unit ) @@ -867,16 +865,15 @@ crossSectionFeatPlot <- function(gobject = NULL, #' @details Description of parameters. #' @export #' @seealso \code{\link{crossSectionPlot}} -crossSectionPlot <- function( - gobject, - spat_unit = NULL, - feat_type = NULL, - spat_loc_name = "raw", - crossSection_obj = NULL, - name = NULL, - spatial_network_name = "Delaunay_network", - default_save_name = "crossSectionPlot", - ...) { +crossSectionPlot <- function(gobject, + spat_unit = NULL, + feat_type = NULL, + spat_loc_name = "raw", + crossSection_obj = NULL, + name = NULL, + spatial_network_name = "Delaunay_network", + default_save_name = "crossSectionPlot", + ...) { spat_unit <- set_default_spat_unit( gobject = gobject, spat_unit = spat_unit ) @@ -942,7 +939,8 @@ crossSectionPlot <- function( #' @param gobject giotto object #' @param spat_unit spatial unit #' @param feat_type feature type -#' @param crossSection_obj cross section object as alternative input. default = NULL. +#' @param crossSection_obj cross section object as alternative input. +#' default = NULL. #' @param name name of virtual cross section to use #' @param spatial_network_name name of spatial network to use #' @param show_other_cells logical. Default = TRUE @@ -954,17 +952,16 @@ crossSectionPlot <- function( #' @return ggplot #' @details Description of parameters. #' @export -crossSectionFeatPlot3D <- function( - gobject, - spat_unit = NULL, - feat_type = NULL, - crossSection_obj = NULL, - name = NULL, - spatial_network_name = "Delaunay_network", - show_other_cells = TRUE, - other_cell_color = alpha("lightgrey", 0), - default_save_name = "crossSectionGenePlot3D", - ...) { +crossSectionFeatPlot3D <- function(gobject, + spat_unit = NULL, + feat_type = NULL, + crossSection_obj = NULL, + name = NULL, + spatial_network_name = "Delaunay_network", + show_other_cells = TRUE, + other_cell_color = alpha("lightgrey", 0), + default_save_name = "crossSectionGenePlot3D", + ...) { spat_unit <- set_default_spat_unit( gobject = gobject, spat_unit = spat_unit ) @@ -1023,17 +1020,16 @@ crossSectionFeatPlot3D <- function( #' @returns ggplot #' @details Description of parameters. #' @export -crossSectionPlot3D <- function( - gobject, - spat_unit = NULL, - feat_type = NULL, - crossSection_obj = NULL, - name = NULL, - spatial_network_name = "Delaunay_network", - show_other_cells = TRUE, - other_cell_color = alpha("lightgrey", 0), - default_save_name = "crossSection3D", - ...) { +crossSectionPlot3D <- function(gobject, + spat_unit = NULL, + feat_type = NULL, + crossSection_obj = NULL, + name = NULL, + spatial_network_name = "Delaunay_network", + show_other_cells = TRUE, + other_cell_color = alpha("lightgrey", 0), + default_save_name = "crossSection3D", + ...) { spat_unit <- set_default_spat_unit( gobject = gobject, spat_unit = spat_unit ) @@ -1102,23 +1098,24 @@ crossSectionPlot3D <- function( #' @returns ggplot #' @details Description of parameters. #' @export -insertCrossSectionSpatPlot3D <- function( - gobject, - spat_unit = NULL, - feat_type = NULL, - spat_loc_name = "raw", - crossSection_obj = NULL, - name = NULL, - spatial_network_name = "Delaunay_network", - mesh_grid_color = "#1f77b4", - mesh_grid_width = 3, - mesh_grid_style = "dot", - sdimx = "sdimx", sdimy = "sdimy", sdimz = "sdimz", - show_other_cells = FALSE, - axis_scale = c("cube", "real", "custom"), - custom_ratio = NULL, - default_save_name = "spat3D_with_cross_section", - ...) { +insertCrossSectionSpatPlot3D <- function(gobject, + spat_unit = NULL, + feat_type = NULL, + spat_loc_name = "raw", + crossSection_obj = NULL, + name = NULL, + spatial_network_name = "Delaunay_network", + mesh_grid_color = "#1f77b4", + mesh_grid_width = 3, + mesh_grid_style = "dot", + sdimx = "sdimx", sdimy = "sdimy", sdimz = "sdimz", + show_other_cells = FALSE, + axis_scale = c("cube", "real", "custom"), + custom_ratio = NULL, + default_save_name = "spat3D_with_cross_section", + ...) { + package_check("plotly", repository = "CRAN:plotly") + spat_unit <- set_default_spat_unit( gobject = gobject, spat_unit = spat_unit ) @@ -1235,24 +1232,27 @@ insertCrossSectionSpatPlot3D <- function( #' @details Description of parameters. #' @md #' @export -insertCrossSectionFeatPlot3D <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - spat_loc_name = "raw", - crossSection_obj = NULL, - name = NULL, - spatial_network_name = "Delaunay_network", - mesh_grid_color = "#1f77b4", - mesh_grid_width = 3, - mesh_grid_style = "dot", - sdimx = "sdimx", sdimy = "sdimy", sdimz = "sdimz", - show_other_cells = FALSE, - axis_scale = c("cube", "real", "custom"), - custom_ratio = NULL, - show_plot = NULL, return_plot = NULL, save_plot = NULL, - save_param = list(), - default_save_name = "spatGenePlot3D_with_cross_section", - ...) { +insertCrossSectionFeatPlot3D <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + spat_loc_name = "raw", + crossSection_obj = NULL, + name = NULL, + spatial_network_name = "Delaunay_network", + mesh_grid_color = "#1f77b4", + mesh_grid_width = 3, + mesh_grid_style = "dot", + sdimx = "sdimx", sdimy = "sdimy", sdimz = "sdimz", + show_other_cells = FALSE, + axis_scale = c("cube", "real", "custom"), + custom_ratio = NULL, + show_plot = NULL, return_plot = NULL, save_plot = NULL, + save_param = list(), + default_save_name = "spatGenePlot3D_with_cross_section", + ...) { + package_check("plotly", repository = "CRAN:plotly") + spat_unit <- set_default_spat_unit( gobject = gobject, spat_unit = spat_unit ) diff --git a/R/dd.R b/R/dd.R index 7f51a2501..b3e7510e7 100644 --- a/R/dd.R +++ b/R/dd.R @@ -28,11 +28,14 @@ #' @param gobject giotto object #' @param spat_unit spatial unit (e.g. "cell") #' @param feat_type feature type (e.g. "rna", "dna", "protein") -#' @param return_uniques return unique nesting names (ignores if final object exists/is correct class) +#' @param return_uniques return unique nesting names (ignores if final object +#' exists/is correct class) #' @param output what format in which to get information (e.g. "data.table") -#' @param set_defaults set default spat_unit and feat_type. Change to FALSE only when +#' @param set_defaults set default spat_unit and feat_type. Change to FALSE +#' only when #' expression and spat_info are not expected to exist. -#' @param copy_obj whether to deep copy/duplicate when getting the object (default = TRUE) +#' @param copy_obj whether to deep copy/duplicate when getting the object +#' (default = TRUE) #' @param initialize (default = FALSE) whether to initialize the gobject before #' returning #' @returns list @@ -66,8 +69,8 @@ NULL #' @name plot_cell_params #' @param cell_color character. what to color cells by (e.g. metadata col or #' spatial enrichment col) -#' @param color_as_factor logical. convert color column to factor. discrete colors -#' are used when this is TRUE. continuous colors when FALSE. +#' @param color_as_factor logical. convert color column to factor. discrete +#' colors are used when this is TRUE. continuous colors when FALSE. #' @param cell_color_code character. discrete colors to use. palette to use or #' named vector of colors #' @param cell_color_gradient character. continuous colors to use. palette to @@ -90,12 +93,14 @@ NULL #' @param use_overlap use polygon and feature coordinates overlap results #' @param polygon_feat_type feature type associated with polygon information #' @param polygon_color color for polygon border -#' @param polygon_bg_color color for polygon background (overruled by polygon_fill) -#' @param polygon_fill character. what to color to fill polgyons by (e.g. metadata -#' col or spatial enrichment col) -#' @param polygon_fill_gradient polygon fill gradient colors given in order from low to high -#' @param polygon_fill_gradient_midpoint value to set as gradient midpoint (optional). If -#' left as \code{NULL}, the median value detected will be chosen +#' @param polygon_bg_color color for polygon background (overruled by +#' polygon_fill) +#' @param polygon_fill character. what to color to fill polgyons by (e.g. +#' metadata col or spatial enrichment col) +#' @param polygon_fill_gradient polygon fill gradient colors given in order +#' from low to high +#' @param polygon_fill_gradient_midpoint value to set as gradient midpoint +#' (optional). If left as \code{NULL}, the median value detected will be chosen #' @param polygon_fill_gradient_style either 'divergent' (midpoint is used in #' color scaling) or 'sequential' (scaled based on data range) #' @param polygon_fill_as_factor is fill color a factor @@ -117,7 +122,8 @@ NULL #' @param dim_point_size size of points in dim. reduction space #' @param dim_point_alpha transparancy of point in dim. reduction space #' @param dim_point_border_col border color of points in dim. reduction space -#' @param dim_point_border_stroke border stroke of points in dim. reduction space +#' @param dim_point_border_stroke border stroke of points in dim. reduction +#' space #' @returns plot #' @keywords internal NULL @@ -126,8 +132,10 @@ NULL #' @name plot_nn_net_params #' @param show_NN_network logical. Show underlying NN network #' @param nn_network_to_use character. type of NN network to use (kNN vs sNN) -#' @param network_name character. name of NN network to use, if show_NN_network = TRUE -#' @param nn_network_name character. name of NN network to use, if show_NN_network = TRUE +#' @param network_name character. name of NN network to use, if +#' show_NN_network = TRUE +#' @param nn_network_name character. name of NN network to use, if +#' show_NN_network = TRUE #' @param network_color color of NN network #' @param nn_network_alpha column to use for alpha of the edges #' @returns plot @@ -148,7 +156,8 @@ NULL #' Params documentation template: plot_spatenr_params #' @name plot_spatenr_params -#' @param spat_enr_names character. names of spatial enrichment results to include +#' @param spat_enr_names character. names of spatial enrichment results to +#' include #' @returns plot #' @keywords internal NULL @@ -158,7 +167,8 @@ NULL #' @param show_image show a tissue background image #' @param gimage a giotto image #' @param image_name name of a giotto image or multiple images with group_by -#' @param largeImage_name name of a giottoLargeImage or multiple images with group_by +#' @param largeImage_name name of a giottoLargeImage or multiple images with +#' group_by #' @returns plot #' @keywords internal NULL @@ -168,7 +178,8 @@ NULL #' #' @name plot_params #' -#' @param group_by character. Create multiple plots based on cell annotation column +#' @param group_by character. Create multiple plots based on cell annotation +#' column #' @param group_by_subset character. subset the group_by factor column #' #' @param gradient_midpoint numeric. midpoint for color gradient @@ -178,7 +189,8 @@ NULL #' @param gradient_color character. continuous colors to use. palette to #' use or vector of colors to use (minimum of 2). #' -#' @param select_cell_groups select subset of cells/clusters based on cell_color parameter +#' @param select_cell_groups select subset of cells/clusters based on +#' cell_color parameter #' @param select_cells select subset of cells based on cell IDs #' #' @param show_other_cells display not selected cells @@ -231,8 +243,10 @@ NULL #' @param show_plot logical. show plot #' @param return_plot logical. return ggplot object #' @param save_plot logical. save the plot -#' @param save_param list of saving parameters, see \code{\link{showSaveParameters}} -#' @param default_save_name default save name for saving, don't change, change save_name in save_param +#' @param save_param list of saving parameters, see +#' \code{\link{showSaveParameters}} +#' @param default_save_name default save name for saving, don't change, +#' change save_name in save_param #' @returns plot #' @keywords internal NULL diff --git a/R/differential_expression.R b/R/differential_expression.R index 5eead79a0..03c8e26e5 100644 --- a/R/differential_expression.R +++ b/R/differential_expression.R @@ -35,19 +35,18 @@ #' #' findScranMarkers(g, cluster_column = "leiden_clus") #' @export -findScranMarkers <- function( - gobject, - spat_unit = NULL, - feat_type = NULL, - expression_values = c("normalized", "scaled", "custom"), - cluster_column, - subset_clusters = NULL, - group_1 = NULL, - group_1_name = NULL, - group_2 = NULL, - group_2_name = NULL, - verbose = TRUE, - ...) { +findScranMarkers <- function(gobject, + spat_unit = NULL, + feat_type = NULL, + expression_values = c("normalized", "scaled", "custom"), + cluster_column, + subset_clusters = NULL, + group_1 = NULL, + group_1_name = NULL, + group_2 = NULL, + group_2_name = NULL, + verbose = TRUE, + ...) { # verify if optional package is installed package_check(pkg_name = "scran", repository = "Bioc") @@ -189,19 +188,18 @@ findScranMarkers <- function( #' #' findScranMarkers_one_vs_all(g, cluster_column = "leiden_clus") #' @export -findScranMarkers_one_vs_all <- function( - gobject, - spat_unit = NULL, - feat_type = NULL, - expression_values = c("normalized", "scaled", "custom"), - cluster_column, - subset_clusters = NULL, - pval = 0.01, - logFC = 0.5, - min_feats = 10, - min_genes = NULL, - verbose = TRUE, - ...) { +findScranMarkers_one_vs_all <- function(gobject, + spat_unit = NULL, + feat_type = NULL, + expression_values = c("normalized", "scaled", "custom"), + cluster_column, + subset_clusters = NULL, + pval = 0.01, + logFC = 0.5, + min_feats = 10, + min_genes = NULL, + verbose = TRUE, + ...) { ## deprecated arguments if (!is.null(min_genes)) { min_feats <- min_genes @@ -279,8 +277,8 @@ findScranMarkers_one_vs_all <- function( # save list - progressr::with_progress({ - pb <- progressr::progressor(along = uniq_clusters) + with_pbar({ + pb <- pbar(along = uniq_clusters) result_list <- lapply( seq_along(uniq_clusters), function(clus_i) { @@ -403,23 +401,22 @@ findScranMarkers_one_vs_all <- function( #' #' findGiniMarkers(g, cluster_column = "leiden_clus") #' @export -findGiniMarkers <- function( - gobject, - feat_type = NULL, - spat_unit = NULL, - expression_values = c("normalized", "scaled", "custom"), - cluster_column, - subset_clusters = NULL, - group_1 = NULL, - group_1_name = NULL, - group_2 = NULL, - group_2_name = NULL, - min_expr_gini_score = 0.2, - min_det_gini_score = 0.2, - detection_threshold = 0, - rank_score = 1, - min_feats = 5, - min_genes = NULL) { +findGiniMarkers <- function(gobject, + feat_type = NULL, + spat_unit = NULL, + expression_values = c("normalized", "scaled", "custom"), + cluster_column, + subset_clusters = NULL, + group_1 = NULL, + group_1_name = NULL, + group_2 = NULL, + group_2_name = NULL, + min_expr_gini_score = 0.2, + min_det_gini_score = 0.2, + detection_threshold = 0, + rank_score = 1, + min_feats = 5, + min_genes = NULL) { ## deprecated arguments if (!is.null(min_genes)) { min_feats <- min_genes @@ -512,10 +509,7 @@ findGiniMarkers <- function( ) ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### - gobject <- set_cell_metadata(gobject, - metadata = cell_metadata, - verbose = FALSE - ) + gobject <- setGiotto(gobject, cell_metadata, verbose = FALSE) ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### } @@ -660,20 +654,19 @@ findGiniMarkers <- function( #' #' findGiniMarkers_one_vs_all(g, cluster_column = "leiden_clus") #' @export -findGiniMarkers_one_vs_all <- function( - gobject, - feat_type = NULL, - spat_unit = NULL, - expression_values = c("normalized", "scaled", "custom"), - cluster_column, - subset_clusters = NULL, - min_expr_gini_score = 0.5, - min_det_gini_score = 0.5, - detection_threshold = 0, - rank_score = 1, - min_feats = 4, - min_genes = NULL, - verbose = TRUE) { +findGiniMarkers_one_vs_all <- function(gobject, + feat_type = NULL, + spat_unit = NULL, + expression_values = c("normalized", "scaled", "custom"), + cluster_column, + subset_clusters = NULL, + min_expr_gini_score = 0.5, + min_det_gini_score = 0.5, + detection_threshold = 0, + rank_score = 1, + min_feats = 4, + min_genes = NULL, + verbose = TRUE) { ## deprecated arguments if (!is.null(min_genes)) { min_feats <- min_genes @@ -734,8 +727,8 @@ findGiniMarkers_one_vs_all <- function( # GINI - progressr::with_progress({ - pb <- progressr::progressor(along = uniq_clusters) + with_pbar({ + pb <- pbar(along = uniq_clusters) result_list <- lapply( seq_along(uniq_clusters), function(clus_i) { @@ -811,19 +804,18 @@ findGiniMarkers_one_vs_all <- function( #' group_2 = 2 #' ) #' @export -findMastMarkers <- function( - gobject, - feat_type = NULL, - spat_unit = NULL, - expression_values = c("normalized", "scaled", "custom"), - cluster_column, - group_1 = NULL, - group_1_name = NULL, - group_2 = NULL, - group_2_name = NULL, - adjust_columns = NULL, - verbose = FALSE, - ...) { +findMastMarkers <- function(gobject, + feat_type = NULL, + spat_unit = NULL, + expression_values = c("normalized", "scaled", "custom"), + cluster_column, + group_1 = NULL, + group_1_name = NULL, + group_2 = NULL, + group_2_name = NULL, + adjust_columns = NULL, + verbose = FALSE, + ...) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -907,10 +899,7 @@ findMastMarkers <- function( ) ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### - gobject <- set_cell_metadata(gobject, - metadata = cell_metadata, - verbose = FALSE - ) + gobject <- setGiotto(gobject, cell_metadata, verbose = FALSE) ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### @@ -1029,20 +1018,19 @@ findMastMarkers <- function( #' #' findMastMarkers_one_vs_all(gobject = g, cluster_column = "leiden_clus") #' @export -findMastMarkers_one_vs_all <- function( - gobject, - feat_type = NULL, - spat_unit = NULL, - expression_values = c("normalized", "scaled", "custom"), - cluster_column, - subset_clusters = NULL, - adjust_columns = NULL, - pval = 0.001, - logFC = 1, - min_feats = 10, - min_genes = NULL, - verbose = TRUE, - ...) { +findMastMarkers_one_vs_all <- function(gobject, + feat_type = NULL, + spat_unit = NULL, + expression_values = c("normalized", "scaled", "custom"), + cluster_column, + subset_clusters = NULL, + adjust_columns = NULL, + pval = 0.001, + logFC = 1, + min_feats = 10, + min_genes = NULL, + verbose = TRUE, + ...) { ## deprecated arguments if (!is.null(min_genes)) { min_feats <- min_genes @@ -1190,26 +1178,25 @@ findMastMarkers_one_vs_all <- function( #' #' findMarkers(g, cluster_column = "leiden_clus") #' @export -findMarkers <- function( - gobject, - spat_unit = NULL, - feat_type = NULL, - expression_values = c("normalized", "scaled", "custom"), - cluster_column = NULL, - method = c("scran", "gini", "mast"), - subset_clusters = NULL, - group_1 = NULL, - group_2 = NULL, - min_expr_gini_score = 0.5, - min_det_gini_score = 0.5, - detection_threshold = 0, - rank_score = 1, - min_feats = 4, - min_genes = NULL, - group_1_name = NULL, - group_2_name = NULL, - adjust_columns = NULL, - ...) { +findMarkers <- function(gobject, + spat_unit = NULL, + feat_type = NULL, + expression_values = c("normalized", "scaled", "custom"), + cluster_column = NULL, + method = c("scran", "gini", "mast"), + subset_clusters = NULL, + group_1 = NULL, + group_2 = NULL, + min_expr_gini_score = 0.5, + min_det_gini_score = 0.5, + detection_threshold = 0, + rank_score = 1, + min_feats = 4, + min_genes = NULL, + group_1_name = NULL, + group_2_name = NULL, + adjust_columns = NULL, + ...) { ## deprecated arguments if (!is.null(min_genes)) { min_feats <- min_genes @@ -1313,28 +1300,27 @@ findMarkers <- function( #' #' findMarkers_one_vs_all(g, cluster_column = "leiden_clus") #' @export -findMarkers_one_vs_all <- function( - gobject, - feat_type = NULL, - spat_unit = NULL, - expression_values = c("normalized", "scaled", "custom"), - cluster_column, - subset_clusters = NULL, - method = c("scran", "gini", "mast"), - # scran & mast - pval = 0.01, - logFC = 0.5, - min_feats = 10, - min_genes = NULL, - # gini - min_expr_gini_score = 0.5, - min_det_gini_score = 0.5, - detection_threshold = 0, - rank_score = 1, - # mast specific - adjust_columns = NULL, - verbose = TRUE, - ...) { +findMarkers_one_vs_all <- function(gobject, + feat_type = NULL, + spat_unit = NULL, + expression_values = c("normalized", "scaled", "custom"), + cluster_column, + subset_clusters = NULL, + method = c("scran", "gini", "mast"), + # scran & mast + pval = 0.01, + logFC = 0.5, + min_feats = 10, + min_genes = NULL, + # gini + min_expr_gini_score = 0.5, + min_det_gini_score = 0.5, + detection_threshold = 0, + rank_score = 1, + # mast specific + adjust_columns = NULL, + verbose = TRUE, + ...) { ## deprecated arguments if (!is.null(min_genes)) { min_feats <- min_genes diff --git a/R/dimension_reduction.R b/R/dimension_reduction.R index 0e4bcbf70..8b658a634 100644 --- a/R/dimension_reduction.R +++ b/R/dimension_reduction.R @@ -3,6 +3,60 @@ +## * general function #### + +#' @name reduceDims +#' @title Run dimension reduction method +#' @description +#' Wrapper function for Giotto dimension reduction methods for easier coding. +#' @param gobject giotto object +#' @param method character. Dimension reduction method to use +#' @param projection logical. Whether to run in a projection manner +#' (faster, but is an approximation) +#' @param toplevel relative stackframe the call was made at. do not use. +#' @param \dots additional params to pass to specific functions +#' @returns `giotto` object with attached dimension reduction +#' @examples +#' g <- GiottoData::loadGiottoMini("vis") +#' x <- reduceDims(g, "tsne", spat_unit = "cell") +#' x <- reduceDims(x, "umap", projection = TRUE) +#' x <- reduceDims(x, method = "nmf") +#' @export +reduceDims <- function( + gobject, + method = c("pca", "nmf", "umap", "tsne"), + projection = FALSE, + toplevel = 1L, + ... +) { + a <- list(...) + method <- match.arg(method, choices = c("pca", "nmf", "umap", "tsne")) + if (projection) method <- paste(method, "proj", sep = "_") + + fun <- switch(method, + "pca" = runPCA, + "umap" = runUMAP, + "tsne" = runtSNE, + "pca_proj" = runPCAprojection, + "umap_proj" = runUMAPprojection, + "nmf" = runNMF, + stop("Not implemented yet.") + ) + res <- do.call(fun, args = list(gobject, toplevel = -100, ...)) + + if (!isFALSE(a$return_gobject)) { + res <- update_giotto_params(res, + description = "_reduce_dims", + toplevel = toplevel + 1L + ) + } + + return(res) +} + + + + ## * PCA #### # ---------- # @@ -19,14 +73,13 @@ #' @param seed_number seed number to use #' @keywords internal #' @returns list of eigenvalues, loadings and pca coordinates -.run_pca_factominer <- function( - x, - ncp = 100, - scale = TRUE, - rev = FALSE, - set_seed = TRUE, - seed_number = 1234, - ...) { +.run_pca_factominer <- function(x, + ncp = 100, + scale = TRUE, + rev = FALSE, + set_seed = TRUE, + seed_number = 1234, + ...) { # verify if optional package is installed package_check(pkg_name = "FactoMineR", repository = "CRAN") @@ -139,17 +192,16 @@ #' @param BPPARAM BiocParallelParam object #' @keywords internal #' @returns list of eigenvalues, loadings and pca coordinates -.run_pca_biocsingular <- function( - x, - ncp = 100, - center = TRUE, - scale = TRUE, - rev = FALSE, - set_seed = TRUE, - seed_number = 1234, - BSPARAM = c("irlba", "exact", "random"), - BPPARAM = BiocParallel::SerialParam(), - ...) { +.run_pca_biocsingular <- function(x, + ncp = 100, + center = TRUE, + scale = TRUE, + rev = FALSE, + set_seed = TRUE, + seed_number = 1234, + BSPARAM = c("irlba", "exact", "random"), + BPPARAM = BiocParallel::SerialParam(), + ...) { BSPARAM <- match.arg(BSPARAM, choices = c("irlba", "exact", "random")) min_ncp <- min(dim(x)) @@ -160,103 +212,62 @@ ncp <- min_ncp - 1 } - # start seed - if (isTRUE(set_seed)) { - set.seed(seed = seed_number) - } + if (isTRUE(rev)) x <- t_flex(x) - if (isTRUE(rev)) { - x <- t_flex(x) + pca_param <- list( + x = x, + rank = ncp, + center = center, + scale = scale, + BPPARAM = BPPARAM, + ... + ) - if (BSPARAM == "irlba") { - pca_res <- BiocSingular::runPCA( - x = x, rank = ncp, - center = center, scale = scale, - BSPARAM = BiocSingular::IrlbaParam(), - BPPARAM = BPPARAM, - ... - ) - } else if (BSPARAM == "exact") { - pca_res <- BiocSingular::runPCA( - x = x, rank = ncp, - center = center, scale = scale, - BSPARAM = BiocSingular::ExactParam(), - BPPARAM = BPPARAM, - ... - ) - } else if (BSPARAM == "random") { - pca_res <- BiocSingular::runPCA( - x = x, rank = ncp, - center = center, scale = scale, - BSPARAM = BiocSingular::RandomParam(), - BPPARAM = BPPARAM, - ... - ) - } + pca_param$BSPARAM <- switch(BSPARAM, + "irlba" = BiocSingular::IrlbaParam(), + "exact" = BiocSingular::ExactParam(), + "random" = BiocSingular::RandomParam() + ) + if (set_seed) { + gwith_seed( + seed = seed_number, + { + pca_res <- do.call(BiocSingular::runPCA, pca_param) + }, + ) + } else { + pca_res <- do.call(BiocSingular::runPCA, pca_param) + } - # eigenvalues - eigenvalues <- pca_res$sdev^2 - # PC loading + # eigenvalues + eigenvalues <- pca_res$sdev^2 + + # loadings and coords + if (isTRUE(rev)) { loadings <- pca_res$x - rownames(loadings) <- rownames(x) - colnames(loadings) <- paste0("Dim.", seq_len(ncol(loadings))) - # coordinates coords <- pca_res$rotation + rownames(loadings) <- rownames(x) rownames(coords) <- colnames(x) - colnames(coords) <- paste0("Dim.", seq_len(ncol(coords))) - result <- list( - eigenvalues = eigenvalues, loadings = loadings, coords = coords - ) } else { - if (BSPARAM == "irlba") { - pca_res <- BiocSingular::runPCA( - x = x, rank = ncp, - center = center, scale = scale, - BSPARAM = BiocSingular::IrlbaParam(), - BPPARAM = BPPARAM, - ... - ) - } else if (BSPARAM == "exact") { - pca_res <- BiocSingular::runPCA( - x = x, rank = ncp, - center = center, scale = scale, - BSPARAM = BiocSingular::ExactParam(), - BPPARAM = BPPARAM, - ... - ) - } else if (BSPARAM == "random") { - pca_res <- BiocSingular::runPCA( - x = x, rank = ncp, - center = center, scale = scale, - BSPARAM = BiocSingular::RandomParam(), - BPPARAM = BPPARAM, - ... - ) - } - - # eigenvalues - eigenvalues <- pca_res$sdev^2 - # PC loading loadings <- pca_res$rotation - rownames(loadings) <- colnames(x) - colnames(loadings) <- paste0("Dim.", seq_len(ncol(loadings))) - # coordinates coords <- pca_res$x + rownames(loadings) <- colnames(x) rownames(coords) <- rownames(x) - colnames(coords) <- paste0("Dim.", seq_len(ncol(coords))) - result <- list( - eigenvalues = eigenvalues, loadings = loadings, coords = coords - ) } - # exit seed - if (isTRUE(set_seed)) { - set.seed(seed = Sys.time()) - } + colnames(loadings) <- paste0("Dim.", seq_len(ncol(loadings))) + colnames(coords) <- paste0("Dim.", seq_len(ncol(coords))) + + result <- list( + eigenvalues = eigenvalues, loadings = loadings, coords = coords + ) - vmsg(.is_debug = TRUE, "finished .run_pca_biocsingular, method ==", BSPARAM) + vmsg( + .is_debug = TRUE, + "finished .run_pca_biocsingular, method ==", BSPARAM + ) return(result) } @@ -276,14 +287,14 @@ #' @param feats_to_use feats to use, character or vector of features #' @param verbose verbosity #' @keywords internal +#' @noRd #' @returns subsetted matrix based on selected features -.create_feats_to_use_matrix <- function( - gobject, - feat_type = NULL, - spat_unit = NULL, - sel_matrix, - feats_to_use, - verbose = FALSE) { +.create_feats_to_use_matrix <- function(gobject, + feat_type = NULL, + spat_unit = NULL, + sel_matrix, + feats_to_use, + verbose = FALSE) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -358,6 +369,7 @@ #' @param set_seed use of seed #' @param seed_number seed number to use #' @param verbose verbosity of the function +#' @param toplevel relative stackframe where call was made #' @param ... additional parameters for PCA (see details) #' @returns giotto object with updated PCA dimension recuction #' @details See \code{\link[BiocSingular]{runPCA}} and @@ -380,25 +392,25 @@ #' #' runPCA(g) #' @export -runPCA <- function( - gobject, - spat_unit = NULL, - feat_type = NULL, - expression_values = c("normalized", "scaled", "custom"), - reduction = c("cells", "feats"), - name = NULL, - feats_to_use = "hvf", - return_gobject = TRUE, - center = TRUE, - scale_unit = TRUE, - ncp = 100, - method = c("irlba", "exact", "random", "factominer"), - method_params = BiocParallel::SerialParam(), - rev = FALSE, - set_seed = TRUE, - seed_number = 1234, - verbose = TRUE, - ...) { +runPCA <- function(gobject, + spat_unit = NULL, + feat_type = NULL, + expression_values = c("normalized", "scaled", "custom"), + reduction = c("cells", "feats"), + name = NULL, + feats_to_use = "hvf", + return_gobject = TRUE, + center = TRUE, + scale_unit = TRUE, + ncp = 100, + method = c("irlba", "exact", "random", "factominer"), + method_params = BiocParallel::SerialParam(), + rev = FALSE, + set_seed = TRUE, + seed_number = 1234, + verbose = TRUE, + toplevel = 1L, + ...) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -541,14 +553,14 @@ runPCA <- function( ) ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### - gobject <- set_dimReduction( - gobject = gobject, dimObject = dimObject, verbose = verbose - ) + gobject <- setGiotto(gobject, dimObject, verbose = verbose) ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## update parameters used ## - gobject <- update_giotto_params(gobject, description = "_pca") + gobject <- update_giotto_params( + gobject, description = "_pca", toplevel = toplevel + 1L + ) return(gobject) } else { return(pca_object) @@ -582,18 +594,17 @@ runPCA <- function( #' @param verbose verbosity level #' @keywords internal #' @returns list of eigenvalues, loadings and pca coordinates -.run_pca_biocsingular_irlba_projection <- function( - x, - ncp = 100, - center = TRUE, - scale = TRUE, - rev = FALSE, - set_seed = TRUE, - seed_number = 1234, - BPPARAM = BiocParallel::SerialParam(), - random_subset = 500, - verbose = TRUE, - ...) { +.run_pca_biocsingular_irlba_projection <- function(x, + ncp = 100, + center = TRUE, + scale = TRUE, + rev = FALSE, + set_seed = TRUE, + seed_number = 1234, + BPPARAM = BiocParallel::SerialParam(), + random_subset = 500, + verbose = TRUE, + ...) { x <- scale(x, center = center, scale = scale) min_ncp <- min(dim(x)) @@ -756,6 +767,7 @@ runPCA <- function( #' @param set_seed use of seed #' @param seed_number seed number to use #' @param verbose verbosity of the function +#' @param toplevel relative stackframe where call was made #' @param ... additional parameters for PCA (see details) #' @returns giotto object with updated PCA dimension recuction #' @details See \code{\link[BiocSingular]{runPCA}} and @@ -775,26 +787,26 @@ runPCA <- function( #' #' runPCAprojection(g) #' @export -runPCAprojection <- function( - gobject, - spat_unit = NULL, - feat_type = NULL, - expression_values = c("normalized", "scaled", "custom"), - reduction = c("cells", "feats"), - random_subset = 500, - name = "pca.projection", - feats_to_use = "hvf", - return_gobject = TRUE, - center = TRUE, - scale_unit = TRUE, - ncp = 100, - method = c("irlba"), - method_params = BiocParallel::SerialParam(), - rev = FALSE, - set_seed = TRUE, - seed_number = 1234, - verbose = TRUE, - ...) { +runPCAprojection <- function(gobject, + spat_unit = NULL, + feat_type = NULL, + expression_values = c("normalized", "scaled", "custom"), + reduction = c("cells", "feats"), + random_subset = 500, + name = "pca.projection", + feats_to_use = "hvf", + return_gobject = TRUE, + center = TRUE, + scale_unit = TRUE, + ncp = 100, + method = c("irlba"), + method_params = BiocParallel::SerialParam(), + rev = FALSE, + set_seed = TRUE, + seed_number = 1234, + verbose = TRUE, + toplevel = 1L, + ...) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -911,17 +923,6 @@ runPCAprojection <- function( } if (isTRUE(return_gobject)) { - pca_names <- list_dim_reductions_names( - gobject = gobject, - data_type = reduction, - spat_unit = spat_unit, - feat_type = feat_type, - dim_type = "pca" - ) - - if (name %in% pca_names) { - cat(name, " has already been used, will be overwritten") - } if (reduction == "cells") { my_row_names <- colnames(expr_values) @@ -945,12 +946,14 @@ runPCAprojection <- function( ) ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### - gobject <- set_dimReduction(gobject = gobject, dimObject = dimObject) + gobject <- setGiotto(gobject, dimObject, verbose = verbose) ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## update parameters used ## - gobject <- update_giotto_params(gobject, description = "_pca") + gobject <- update_giotto_params( + gobject, description = "_pca", toplevel = toplevel + 1L + ) return(gobject) } else { return(pca_object) @@ -984,6 +987,7 @@ runPCAprojection <- function( #' @param set_seed use of seed #' @param seed_number seed number to use #' @param verbose verbosity of the function +#' @param toplevel relative stackframe where call was made #' @param ... additional parameters for PCA (see details) #' @returns giotto object with updated PCA dimension reduction #' @details See \code{\link[BiocSingular]{runPCA}} and @@ -1008,27 +1012,27 @@ runPCAprojection <- function( #' # (only 48 in this mini dataset) #' runPCAprojectionBatch(g, feats_to_use = NULL) #' @export -runPCAprojectionBatch <- function( - gobject, - spat_unit = NULL, - feat_type = NULL, - expression_values = c("normalized", "scaled", "custom"), - reduction = c("cells", "feats"), - random_subset = 500, - batch_number = 5, - name = "pca.projection.batch", - feats_to_use = "hvf", - return_gobject = TRUE, - center = TRUE, - scale_unit = TRUE, - ncp = 100, - method = c("irlba"), - method_params = BiocParallel::SerialParam(), - rev = FALSE, - set_seed = TRUE, - seed_number = 1234, - verbose = TRUE, - ...) { +runPCAprojectionBatch <- function(gobject, + spat_unit = NULL, + feat_type = NULL, + expression_values = c("normalized", "scaled", "custom"), + reduction = c("cells", "feats"), + random_subset = 500, + batch_number = 5, + name = "pca.projection.batch", + feats_to_use = "hvf", + return_gobject = TRUE, + center = TRUE, + scale_unit = TRUE, + ncp = 100, + method = c("irlba"), + method_params = BiocParallel::SerialParam(), + rev = FALSE, + set_seed = TRUE, + seed_number = 1234, + verbose = TRUE, + toplevel = 1L, + ...) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -1315,17 +1319,6 @@ runPCAprojectionBatch <- function( if (return_gobject == TRUE) { - pca_names <- list_dim_reductions_names( - gobject = gobject, - data_type = reduction, - spat_unit = spat_unit, - feat_type = feat_type, - dim_type = "pca" - ) - - if (name %in% pca_names) { - cat(name, " has already been used, will be overwritten") - } if (reduction == "cells") { my_row_names <- colnames(expr_values) @@ -1349,12 +1342,14 @@ runPCAprojectionBatch <- function( ) ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### - gobject <- set_dimReduction(gobject = gobject, dimObject = dimObject) + gobject <- setGiotto(gobject, dimObject, verbose = verbose) ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## update parameters used ## - gobject <- update_giotto_params(gobject, description = "_pca") + gobject <- update_giotto_params( + gobject, description = "_pca", toplevel = toplevel + 1L + ) return(gobject) } else { return(pca_object) @@ -1376,7 +1371,8 @@ runPCAprojectionBatch <- function( #' @inheritParams data_access_params #' @inheritParams plot_output_params #' @inheritParams create_screeplot -#' @param name name of PCA object if available +#' @param dim_reduction_name name of PCA +#' @param name deprecated #' @param expression_values expression values to use #' @param reduction cells or features #' @param method which implementation to use @@ -1398,27 +1394,37 @@ runPCAprojectionBatch <- function( #' #' screePlot(g) #' @export -screePlot <- function( - gobject, - spat_unit = NULL, - feat_type = NULL, - name = NULL, - expression_values = c("normalized", "scaled", "custom"), - reduction = c("cells", "feats"), - method = c("irlba", "exact", "random", "factominer"), - rev = FALSE, - feats_to_use = NULL, - center = FALSE, - scale_unit = FALSE, - ncp = 100, - ylim = c(0, 20), - verbose = TRUE, - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "screePlot", - ...) { +screePlot <- function(gobject, + spat_unit = NULL, + feat_type = NULL, + dim_reduction_name = NULL, + name = deprecated(), + expression_values = c("normalized", "scaled", "custom"), + reduction = c("cells", "feats"), + method = c("irlba", "exact", "random", "factominer"), + rev = FALSE, + feats_to_use = NULL, + center = FALSE, + scale_unit = FALSE, + ncp = 100, + ylim = c(0, 20), + verbose = TRUE, + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "screePlot", + ...) { + if (is_present(name)) { + deprecate_warn( + when = "4.1.1", + what = "screePlot(name = )", + with = "screePlot(dim_reduction_name = )" + ) + } else { + name <- dim_reduction_name # shorter varname + } + # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -1629,7 +1635,7 @@ create_screeplot <- function(eigs, ncp = 20, ylim = c(0, 20)) { savelist <- list(pl, cpl) ## combine plots with cowplot - combo_plot <- cowplot::plot_grid( + combo_plot <- plot_grid( plotlist = savelist, ncol = 1, rel_heights = c(1), @@ -1649,7 +1655,7 @@ create_screeplot <- function(eigs, ncp = 20, ylim = c(0, 20)) { #' @title jackstrawPlot #' @name jackstrawPlot -#' @description identify significant prinicipal components (PCs) +#' @description Identify significant principal components (PCs) #' @inheritParams data_access_params #' @inheritParams plot_output_params #' @param expression_values expression values to use @@ -1659,39 +1665,57 @@ create_screeplot <- function(eigs, ncp = 20, ylim = c(0, 20)) { #' @param scale_unit scale features before PCA #' @param ncp number of principal components to calculate #' @param ylim y-axis limits on jackstraw plot -#' @param iter number of interations for jackstraw +#' @param iter number of iterations for jackstraw #' @param threshold p-value threshold to call a PC significant +#' @param random_subset randomized subset of matrix to use to approximate but +#' speed up calculation +#' @param set_seed logical. whether to set a seed when random_subset is used +#' @param seed_number seed number to use when random_subset is used #' @param verbose show progress of jackstraw method -#' @returns ggplot object for jackstraw method +#' @returns if `return_plot` = `TRUE`: ggplot object for jackstraw method +#' if `return_plot` = `FALSE`: silently returns number of significant PCs #' @details #' The Jackstraw method uses the \code{\link[jackstraw]{permutationPA}} #' function. By systematically permuting genes it identifies robust, and thus -#' significant, PCs. +#' significant, PCs. This implementation makes small modifications to SVD +#' calculation for improved efficiency and flexibility with different matrix +#' types. \cr +#' This implementation supports both dense and sparse input matrices. \cr +#' +#' \strong{steps} +#' +#' 1. Select singular values to calculate based on matrix dims and ncp +#' 2. Find SVD to get variance explained of each PC +#' 3. Randomly sample across features then re-calculate randomized variance +#' 4. Determine P-value by comparing actual vs randomized explained variance, +#' indicating the significance of each PC #' @examples #' g <- GiottoData::loadGiottoMini("visium") #' #' jackstrawPlot(gobject = g) +#' @md #' @export -jackstrawPlot <- function( - gobject, - spat_unit = NULL, - feat_type = NULL, - expression_values = c("normalized", "scaled", "custom"), - reduction = c("cells", "feats"), - feats_to_use = NULL, - center = FALSE, - scale_unit = FALSE, - ncp = 20, - ylim = c(0, 1), - iter = 10, - threshold = 0.01, - verbose = TRUE, - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "jackstrawPlot") { - package_check(pkg_name = "jackstraw", repository = "CRAN") +jackstrawPlot <- function(gobject, + spat_unit = NULL, + feat_type = NULL, + expression_values = c("normalized", "scaled", "custom"), + reduction = c("cells", "feats"), + feats_to_use = "hvf", + center = TRUE, + scale_unit = TRUE, + ncp = 20, + ylim = c(0, 1), + iter = 10, + threshold = 0.01, + random_subset = NULL, + set_seed = TRUE, + seed_number = 1234, + verbose = TRUE, + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "jackstrawPlot") { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( @@ -1705,13 +1729,14 @@ jackstrawPlot <- function( ) # print message with information # - if (verbose) { - message("using 'jackstraw' to identify significant PCs If used in + vmsg( + .v = verbose, .prefix = " ", + "using 'jackstraw' to identify significant PCs If used in published research, please cite: Neo Christopher Chung and John D. Storey (2014). 'Statistical significance of variables driving systematic variation in - high-dimensional data. Bioinformatics") - } + high-dimensional data. Bioinformatics\n\n" + ) # select direction of reduction reduction <- match.arg(reduction, c("cells", "feats")) @@ -1729,6 +1754,24 @@ jackstrawPlot <- function( output = "matrix" ) + # create a random subset if random_subset is not NULL + if (!is.null(random_subset)) { + if (set_seed) { + gwith_seed(seed = seed_number, { + random_selection <- sort(sample( + seq_len(ncol(expr_values)), random_subset + )) + expr_values <- expr_values[, random_selection] + }) + } else { + random_selection <- sort(sample( + seq_len(ncol(expr_values)), random_subset + )) + expr_values <- expr_values[, random_selection] + } + + } + ## subset matrix if (!is.null(feats_to_use)) { @@ -1744,30 +1787,56 @@ jackstrawPlot <- function( # reduction of cells if (reduction == "cells") { - if (scale_unit == TRUE | center == TRUE) { + if (scale_unit || center) { expr_values <- t_flex(scale( t_flex(expr_values), center = center, scale = scale_unit )) } - jtest <- jackstraw::permutationPA( - dat = as.matrix(expr_values), - B = iter, threshold = threshold, verbose = verbose - ) + if (set_seed) { + gwith_seed(seed = seed_number, { + jtest <- .perm_pa( + dat = expr_values, + iter = iter, + threshold = threshold, + ncp = ncp, + verbose = verbose + ) + }) + } else { + jtest <- .perm_pa( + dat = expr_values, + iter = iter, + threshold = threshold, + ncp = ncp, + verbose = verbose + ) + } + ## results ## nr_sign_components <- jtest$r - if (verbose) { - cat( - "number of estimated significant components: ", - nr_sign_components - ) + vmsg(.v = verbose, + "\nnumber of estimated significant components: ", + nr_sign_components) + + if (ncp <= nr_sign_components) { + warning(wrap_txt( + "Number of significant components equals `ncp`. + Increasing `ncp` may be needed." + )) } - final_results <- jtest$p - jackplot <- create_jackstrawplot( + + final_results <- jtest[c("p", "cum_var_explained")] + vmsg(.v = verbose, .is_debug = TRUE, final_results$p) + + jackplot <- .create_jackstrawplot( jackstraw_data = final_results, - ncp = ncp, ylim = ylim, threshold = threshold + ncp = ncp, + ylim = ylim, + threshold = threshold, + iter = iter ) } @@ -1779,7 +1848,7 @@ jackstrawPlot <- function( show_plot = show_plot, default_save_name = default_save_name, save_param = save_param, - else_return = NULL + else_return = nr_sign_components )) } @@ -1788,18 +1857,20 @@ jackstrawPlot <- function( #' @title create_jackstrawplot #' @name create_jackstrawplot #' @description create jackstrawplot with ggplot -#' @param jackstraw_data result from jackstraw function (`testresult$p`) +#' @param jackstraw_data result from jackstraw function (`testresult$p`) and +#' (`testresult$cum_var_explained`) #' @param ncp number of principal components to calculate #' @param ylim y-axis limits on jackstraw plot #' @param threshold p.value threshold to call a PC significant +#' @param iter number of iterations performed #' @keywords internal #' @returns ggplot -#' @export -create_jackstrawplot <- function( - jackstraw_data, - ncp = 20, - ylim = c(0, 1), - threshold = 0.01) { +#' @noRd +.create_jackstrawplot <- function(jackstraw_data, + ncp = 20, + ylim = c(0, 1), + threshold = 0.01, + iter = 100) { checkmate::assert_numeric(ncp, len = 1L) checkmate::assert_numeric(ylim, len = 2L) checkmate::assert_numeric(threshold, len = 1L) @@ -1808,34 +1879,88 @@ create_jackstrawplot <- function( PC <- p.val <- NULL testDT <- data.table::data.table( - PC = paste0("PC.", seq_along(jackstraw_data)), - p.val = jackstraw_data + PC = paste0("PC.", seq_along(jackstraw_data$p)), + p.val = jackstraw_data$p, + cum_var = jackstraw_data$cum_var_explained # TODO ) testDT[, "PC" := factor(PC, levels = PC)] testDT[, "sign" := ifelse(p.val <= threshold, "sign", "n.s.")] - pl <- ggplot2::ggplot() - pl <- pl + ggplot2::theme_bw() - pl <- pl + ggplot2::geom_point( - data = testDT[seq_len(ncp)], - ggplot2::aes(x = PC, y = p.val, fill = sign), shape = 21 - ) - pl <- pl + ggplot2::scale_fill_manual( - values = c("n.s." = "lightgrey", "sign" = "darkorange") - ) - pl <- pl + ggplot2::theme( - axis.text.x = ggplot2::element_text(angle = 45, hjust = 1, vjust = 1) - ) - pl <- pl + ggplot2::coord_cartesian(ylim = ylim) - pl <- pl + ggplot2::theme(panel.grid.major.x = ggplot2::element_blank()) - pl <- pl + ggplot2::labs(x = "", y = "p-value per PC") + pl <- ggplot2::ggplot() + + ggplot2::theme_bw() + + ggplot2::geom_point( + data = testDT[seq_len(ncp)], + ggplot2::aes(x = PC, y = p.val, fill = sign), shape = 21 + ) + + ggplot2::scale_fill_manual( + values = c("n.s." = "lightgrey", "sign" = "darkorange") + ) + + ggplot2::theme( + axis.text.x = ggplot2::element_text( + angle = 45, hjust = 1, vjust = 1 + ) + ) + + ggplot2::coord_cartesian(ylim = ylim) + + ggplot2::theme(panel.grid.major.x = ggplot2::element_blank()) + + ggplot2::labs( + x = "", y = "p-value per PC", + title = paste("PC Significance Plot (iter =", iter, ")") + ) return(pl) } +# based on the `permutationPA`() implementation in jackstraw package +.perm_pa <- function (dat, iter = 100, threshold = 0.05, ncp, verbose = TRUE) +{ + if (missing(dat)) + stop("`dat` is required!") + n <- ncol(dat) + m <- nrow(dat) + ndf <- min(m, n - 1, ncp) # this is a limitation of svd singular values + sum_of_squared_singular_vals <- sum(dat^2) + + # pick SVD fun based on whether partial or full is appropriate + # These biocsingular functions should not scale or center + svd_fun <- if (ndf >= 0.5 * m || ndf >= 100) BiocSingular::runExactSVD + else BiocSingular::runIrlbaSVD # partial SVDs + + .calc_svd_var_explained <- function(x, k) { + res <- svd_fun(x, k = k) + singular_val_square <- res$d[1:k]^2 + return(singular_val_square / sum_of_squared_singular_vals) + } + + dstat <- .calc_svd_var_explained(dat, k = ndf) + cum_var_explained <- cumsum(dstat) + + # randomize and compare + dstat0 <- matrix(0, nrow = iter, ncol = ndf) + vmsg(.v = verbose, + "Estimating number of significant principal components: ") + + with_pbar({ + pb <- pbar(steps = iter) + for (i in seq_len(iter)) { + pb() + dat0 <- t(apply(dat, 1, sample)) + dstat0[i, ] <- .calc_svd_var_explained(dat0, k = ndf) + } + }) + p <- rep(1, ndf) + for (i in 1:ndf) { + p[i] <- mean(dstat0[, i] >= dstat[i]) + } + # ensure all p vals are 1 after the first 1 detected + for (i in 2:ndf) { + p[i] <- max(p[i - 1], p[i]) + } + r <- sum(p <= threshold) + return(list(r = r, p = p, cum_var_explained = cum_var_explained)) +} @@ -1876,30 +2001,29 @@ create_jackstrawplot <- function( #' #' signPCA(g) #' @export -signPCA <- function( - gobject, - feat_type = NULL, - spat_unit = NULL, - name = NULL, - method = c("screeplot", "jackstraw"), - expression_values = c("normalized", "scaled", "custom"), - reduction = c("cells", "feats"), - pca_method = c("irlba", "factominer"), - rev = FALSE, - feats_to_use = NULL, - center = TRUE, - scale_unit = TRUE, - ncp = 50, - scree_ylim = c(0, 10), - jack_iter = 10, - jack_threshold = 0.01, - jack_ylim = c(0, 1), - verbose = TRUE, - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "signPCA") { +signPCA <- function(gobject, + feat_type = NULL, + spat_unit = NULL, + name = NULL, + method = c("screeplot", "jackstraw"), + expression_values = c("normalized", "scaled", "custom"), + reduction = c("cells", "feats"), + pca_method = c("irlba", "factominer"), + rev = FALSE, + feats_to_use = NULL, + center = TRUE, + scale_unit = TRUE, + ncp = 50, + scree_ylim = c(0, 10), + jack_iter = 10, + jack_threshold = 0.01, + jack_ylim = c(0, 1), + verbose = TRUE, + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "signPCA") { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -2031,6 +2155,260 @@ signPCA <- function( +# * NMF #### + +#' @name runNMF +#' @title Run Non-Negative Matrix Factorization +#' @description +#' Use NMF to perform dimension reduction. +#' @inheritParams data_access_params +#' @param expression_values expression values to use +#' @param reduction "cells" or "feats" +#' @param name arbitrary name for NMF run +#' @param feats_to_use subset of features to use for NMF +#' @param return_gobject boolean: return giotto object (default = TRUE) +#' @param scale_unit scale features before NMF (default = TRUE) +#' @param k NMF rank (number of components to decompose into). Default is 20 +#' @param method which implementation to use (only rcppml right now) +#' @param rev do a reverse NMF +#' @param set_seed use of seed +#' @param seed_number seed number to use +#' @param verbose verbosity of the function +#' @param toplevel relative stackframe where call was made +#' @param ... additional parameters for NMF (see details) +#' @returns giotto object with updated NMF dimension reduction +#' @details +#' See \code{\link[RcppML]{nmf}} for more information about other parameters. +#' @examples +#' g <- GiottoData::loadGiottoMini("visium") +#' x <- runNMF(g, k = 20) +#' x <- runUMAP(x, +#' dim_reduction_to_use = "nmf", +#' dimensions_to_use = 1:20, +#' name = "nmf_umap" +#' ) +#' x <- createNearestNetwork(x, +#' dim_reduction_to_use = "nmf", +#' dim_reduction_name = "nmf", +#' dimensions_to_use = 1:20 +#' ) +#' x <- doLeidenCluster(x, name = "nmf_leiden", network_name = "sNN.nmf") +#' plotUMAP(x, dim_reduction_name = "nmf_umap", cell_color = "nmf_leiden") +#' spatPlot2D(x, cell_color = "nmf_leiden") +#' @export +runNMF <- function(gobject, + spat_unit = NULL, + feat_type = NULL, + expression_values = c("normalized", "scaled", "custom"), + reduction = c("cells", "feats"), + name = NULL, + feats_to_use = "hvf", + return_gobject = TRUE, + scale_unit = TRUE, + k = 20, + method = c("rcppml"), + rev = FALSE, + set_seed = TRUE, + seed_number = 1234, + verbose = TRUE, + toplevel = 1L, + ... +) { + checkmate::assert_class(gobject, "giotto") + reduction <- match.arg(reduction, c("cells", "feats")) + # Set feat_type and spat_unit + spat_unit <- set_default_spat_unit( + gobject = gobject, + spat_unit = spat_unit + ) + feat_type <- set_default_feat_type( + gobject = gobject, + spat_unit = spat_unit, + feat_type = feat_type + ) + + # specify name to use for nmf + if (is.null(name)) { + if (feat_type == "rna") { + name <- "nmf" + } else { + name <- paste0(feat_type, ".", "nmf") + } + } + + # expression values to be used + values <- match.arg( + expression_values, + unique(c("normalized", "scaled", "custom", expression_values)) + ) + expr_values <- getExpression( + gobject = gobject, + feat_type = feat_type, + spat_unit = spat_unit, + values = values, + output = "exprObj" + ) + + provenance <- prov(expr_values) + + expr_values <- expr_values[] # extract matrix + + ## subset matrix + if (!is.null(feats_to_use)) { + expr_values <- .create_feats_to_use_matrix( + gobject = gobject, + spat_unit = spat_unit, + feat_type = feat_type, + sel_matrix = expr_values, + feats_to_use = feats_to_use, + verbose = verbose + ) + } + + # reduction type + expr_values <- switch(reduction, + "cells" = t_flex(expr_values), + "feats" = expr_values + ) + + # do NMF + nmf_res <- switch(method, + "rcppml" = .run_nmf_rcppml( + x = expr_values, + k = k, + scale = scale_unit, + rev = rev, + set_seed = set_seed, + seed_number = seed_number, + verbose = verbose, + ... + ), + stop("method not implemented") + ) + + if (return_gobject) { + + if (reduction == "cells") { + my_row_names <- rownames(expr_values) + } else { + my_row_names <- colnames(expr_values) + } + + dimObject <- create_dim_obj( + name = name, + feat_type = feat_type, + spat_unit = spat_unit, + provenance = provenance, + reduction = reduction, + reduction_method = "nmf", + coordinates = nmf_res$coords, + misc = list( + diag = nmf_res$d, + loadings = nmf_res$loadings, + iter = nmf_res$iter, + tol = nmf_res$tol + ), + my_rownames = my_row_names + ) + + ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### + gobject <- setGiotto(gobject, dimObject, verbose = verbose) + ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### + + ## update parameters used ## + gobject <- update_giotto_params( + gobject, description = "_nmf", toplevel = toplevel + 1L + ) + } else { + return(nmf_res) + } +} + +.run_nmf_rcppml <- function(x, + k = 50, + scale = TRUE, + rev = FALSE, + set_seed = TRUE, + seed_number = 1234, + verbose = TRUE, + ... +) { + package_check("RcppML", repository = "CRAN") + .rcppml_cite() + + # catch max k + max_k <- min(dim(x)) + if (k > max_k) { + warning(wrap_txt("k >= minimum dimension of x, will be set to + minimum dimension of x: ", max_k)) + k <- max_k + } + + if (rev) x <- t_flex(x) + + if (!set_seed) seed_number <- NULL + + nmf_res <- gwith_package("Matrix", { + RcppML::nmf( + A = x, + k = k, + verbose = verbose, + seed = seed_number, + diag = TRUE, + nonneg = TRUE, + ... + ) + }) + + # diag (scale) + d <- nmf_res$d + + # loadings and coords + if (rev) { + loadings <- t_flex(nmf_res$w) + coords <- nmf_res$h + rownames(loadings) <- rownames(x) + rownames(coords) <- colnames(x) + } else { + loadings <- t_flex(nmf_res$h) + coords <- nmf_res$w + rownames(loadings) <- colnames(x) + rownames(coords) <- rownames(x) + } + + if (scale) coords <- coords %*% diag(d) + + colnames(loadings) <- paste0("Dim.", seq_len(ncol(loadings))) + colnames(coords) <- paste0("Dim.", seq_len(ncol(coords))) + + result = list( + coords = coords, loadings = loadings, d = d, + iter = nmf_res$iter, tol = nmf_res$tol + ) + + vmsg(.is_debug = TRUE, + "finished .run_nmf_rcppml") + + return(result) +} + + + + +.rcppml_cite <- function() { + if (isFALSE(getOption("giotto.rcppml_cite", TRUE))) { + return(invisible()) + } + message("[Running RcppML NMF]. This citation shown once per session:\n") + wrap_msg( + "Zachary J. DeBruine, Karsten Melcher, Timothy J. Triche Jr + Fast and robust non-negative matrix factorization for single-cell experiments + bioRxiv 2021.09.01.458620. + https://doi.org/10.1101/2021.09.01.458620" + ) + options("giotto.rcppml_cite" = FALSE) + return(invisible()) +} @@ -2061,7 +2439,8 @@ signPCA <- function( #' @param set_seed use of seed #' @param seed_number seed number to use #' @param verbose verbosity of function -#' @param toplevel_params parameters to extract +#' @param toplevel_params deprecated +#' @param toplevel relative stackframe where call was made from #' @inheritDotParams uwot::umap -X -n_neighbors -n_components -n_epochs #' -min_dist -n_threads -spread -seed -scale -pca -pca_center -pca_method #' @returns giotto object with updated UMAP dimension reduction @@ -2082,32 +2461,36 @@ signPCA <- function( #' #' runUMAP(g) #' @export -runUMAP <- function( - gobject, - feat_type = NULL, - spat_unit = NULL, - expression_values = c("normalized", "scaled", "custom"), - reduction = c("cells", "feats"), - dim_reduction_to_use = "pca", - dim_reduction_name = NULL, - dimensions_to_use = 1:10, - name = NULL, - feats_to_use = NULL, - return_gobject = TRUE, - n_neighbors = 40, - n_components = 2, - n_epochs = 400, - min_dist = 0.01, - n_threads = NA, - spread = 5, - set_seed = TRUE, - seed_number = 1234L, - verbose = TRUE, - toplevel_params = 2L, - ...) { +runUMAP <- function(gobject, + feat_type = NULL, + spat_unit = NULL, + expression_values = c("normalized", "scaled", "custom"), + reduction = c("cells", "feats"), + dim_reduction_to_use = "pca", + dim_reduction_name = NULL, + dimensions_to_use = 1:10, + name = NULL, + feats_to_use = NULL, + return_gobject = TRUE, + n_neighbors = 40, + n_components = 2, + n_epochs = 400, + min_dist = 0.01, + n_threads = NA, + spread = 5, + set_seed = TRUE, + seed_number = 1234L, + verbose = TRUE, + toplevel_params = deprecated(), + toplevel = 1L, + ...) { # NSE vars cell_ID <- NULL + toplevel <- deprecate_param( + toplevel_params, toplevel, fun = "runUMAP",when = "4.1.2" + ) + # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -2155,7 +2538,7 @@ runUMAP <- function( ## using dimension reduction ## if (!is.null(dim_reduction_to_use)) { ## TODO: check if reduction exists - dimObj_to_use <- get_dimReduction( + dimObj_to_use <- getDimReduction( gobject = gobject, spat_unit = spat_unit, feat_type = feat_type, @@ -2249,18 +2632,6 @@ runUMAP <- function( if (return_gobject == TRUE) { - umap_names <- list_dim_reductions_names( - gobject = gobject, - data_type = reduction, - spat_unit = spat_unit, - feat_type = feat_type, - dim_type = "umap" - ) - - if (name %in% umap_names) { - message(name, " has already been used, will be overwritten") - } - coordinates <- uwot_clus rownames(coordinates) <- rownames(matrix_to_use) @@ -2278,10 +2649,7 @@ runUMAP <- function( ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### - gobject <- set_dimReduction( - gobject = gobject, - dimObject = dimObject - ) + gobject <- setGiotto(gobject, dimObject, verbose = verbose) ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### @@ -2290,7 +2658,7 @@ runUMAP <- function( gobject <- update_giotto_params(gobject, description = "_umap", return_gobject = TRUE, - toplevel = toplevel_params + toplevel = toplevel + 1L ) return(gobject) } else { @@ -2331,50 +2699,59 @@ runUMAP <- function( #' @param set_seed use of seed #' @param seed_number seed number to use #' @param verbose verbosity of function -#' @param toplevel_params parameters to extract +#' @param toplevel_params deprecated +#' @param toplevel relative stackframe where call was made from #' @param ... additional UMAP parameters #' @returns giotto object with updated UMAP dimension reduction #' @details See \code{\link[uwot]{umap}} for more information about these and #' other parameters. #' \itemize{ -#' \item Input for UMAP dimension reduction can be another dimension reduction (default = 'pca') +#' \item Input for UMAP dimension reduction can be another dimension +#' reduction (default = 'pca') #' \item To use gene expression as input set dim_reduction_to_use = NULL -#' \item If dim_reduction_to_use = NULL, feats_to_use can be used to select a column name of -#' highly variable genes (see \code{\link{calculateHVF}}) or simply provide a vector of genes -#' \item multiple UMAP results can be stored by changing the \emph{name} of the analysis +#' \item If dim_reduction_to_use = NULL, feats_to_use can be used to select a +#' column name of +#' highly variable genes (see \code{\link{calculateHVF}}) or simply provide a +#' vector of genes +#' \item multiple UMAP results can be stored by changing the \emph{name} of +#' the analysis #' } #' @examples #' g <- GiottoData::loadGiottoMini("visium") #' #' runUMAPprojection(g) #' @export -runUMAPprojection <- function( - gobject, - feat_type = NULL, - spat_unit = NULL, - expression_values = c("normalized", "scaled", "custom"), - reduction = c("cells", "feats"), - dim_reduction_to_use = "pca", - dim_reduction_name = NULL, - dimensions_to_use = 1:10, - random_subset = 500, - name = NULL, - feats_to_use = NULL, - return_gobject = TRUE, - n_neighbors = 40, - n_components = 2, - n_epochs = 400, - min_dist = 0.01, - n_threads = NA, - spread = 5, - set_seed = TRUE, - seed_number = 1234, - verbose = TRUE, - toplevel_params = 2, - ...) { +runUMAPprojection <- function(gobject, + feat_type = NULL, + spat_unit = NULL, + expression_values = c("normalized", "scaled", "custom"), + reduction = c("cells", "feats"), + dim_reduction_to_use = "pca", + dim_reduction_name = NULL, + dimensions_to_use = 1:10, + random_subset = 500, + name = NULL, + feats_to_use = NULL, + return_gobject = TRUE, + n_neighbors = 40, + n_components = 2, + n_epochs = 400, + min_dist = 0.01, + n_threads = NA, + spread = 5, + set_seed = TRUE, + seed_number = 1234, + verbose = TRUE, + toplevel_params = deprecated(), + toplevel = 1L, + ...) { # NSE vars cell_ID <- NULL + toplevel <- deprecate_param( + toplevel_params, toplevel, fun = "runUMAPprojection", when = "4.1.2" + ) + # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -2525,18 +2902,6 @@ runUMAPprojection <- function( if (isTRUE(return_gobject)) { - umap_names <- list_dim_reductions_names( - gobject = gobject, - data_type = reduction, - spat_unit = spat_unit, - feat_type = feat_type, - dim_type = "umap" - ) - - if (name %in% umap_names) { - message(name, " has already been used, will be overwritten") - } - coordinates <- coords_umap @@ -2553,7 +2918,7 @@ runUMAPprojection <- function( ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### - gobject <- set_dimReduction(gobject = gobject, dimObject = dimObject) + gobject <- setGiotto(gobject, dimObject, verbose = verbose) ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## update parameters used ## @@ -2561,7 +2926,7 @@ runUMAPprojection <- function( gobject, description = "_umap", return_gobject = TRUE, - toplevel = toplevel_params + toplevel = toplevel + 1L ) return(gobject) } else { @@ -2598,42 +2963,49 @@ runUMAPprojection <- function( #' @param set_seed use of seed #' @param seed_number seed number to use #' @param verbose verbosity of the function +#' @param toplevel relative stackframe where call was made from #' @param ... additional tSNE parameters -#' @returns giotto object with updated tSNE dimension recuction +#' @returns giotto object with updated tSNE dimension reduction #' @details See \code{\link[Rtsne]{Rtsne}} for more information about these and #' other parameters. \cr #' \itemize{ -#' \item Input for tSNE dimension reduction can be another dimension reduction (default = 'pca') +#' \item Input for tSNE dimension reduction can be another dimension +#' reduction (default = 'pca') #' \item To use gene expression as input set dim_reduction_to_use = NULL -#' \item If dim_reduction_to_use = NULL, feats_to_use can be used to select a column name of -#' highly variable genes (see \code{\link{calculateHVF}}) or simply provide a vector of genes -#' \item multiple tSNE results can be stored by changing the \emph{name} of the analysis +#' \item If dim_reduction_to_use = NULL, feats_to_use can be used to select +#' a column name of highly variable genes +#' (see \code{\link{calculateHVF}}) or simply provide a vector of genes +#' \item multiple tSNE results can be stored by changing the \emph{name} of +#' the analysis #' } #' @examples #' g <- GiottoData::loadGiottoMini("visium") #' #' runtSNE(g) #' @export -runtSNE <- function( - gobject, - spat_unit = NULL, - feat_type = NULL, - expression_values = c("normalized", "scaled", "custom"), - reduction = c("cells", "feats"), - dim_reduction_to_use = "pca", - dim_reduction_name = NULL, - dimensions_to_use = 1:10, - name = NULL, - feats_to_use = NULL, - return_gobject = TRUE, - dims = 2, - perplexity = 30, - theta = 0.5, - do_PCA_first = FALSE, - set_seed = TRUE, - seed_number = 1234, - verbose = TRUE, - ...) { +runtSNE <- function(gobject, + spat_unit = NULL, + feat_type = NULL, + expression_values = c("normalized", "scaled", "custom"), + reduction = c("cells", "feats"), + dim_reduction_to_use = "pca", + dim_reduction_name = NULL, + dimensions_to_use = 1:10, + name = NULL, + feats_to_use = NULL, + return_gobject = TRUE, + dims = 2, + perplexity = 30, + theta = 0.5, + do_PCA_first = FALSE, + set_seed = TRUE, + seed_number = 1234, + verbose = TRUE, + toplevel = 1L, + ...) { + + package_check("Rtsne") + # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -2752,16 +3124,6 @@ runtSNE <- function( if (isTRUE(return_gobject)) { - tsne_names <- list_dim_reductions_names( - gobject = gobject, data_type = reduction, - spat_unit = spat_unit, feat_type = feat_type, - dim_type = "tsne" - ) - - if (name %in% tsne_names) { - cat(name, " has already been used, will be overwritten") - } - coordinates <- tsne_clus$Y rownames(coordinates) <- rownames(matrix_to_use) @@ -2778,20 +3140,19 @@ runtSNE <- function( ) ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### - gobject <- set_dimReduction( - gobject = gobject, - dimObject = dimObject - ) + gobject <- setGiotto(gobject, dimObject, verbose = verbose) ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## update parameters used ## - gobject <- update_giotto_params(gobject, description = "_tsne") + gobject <- update_giotto_params( + gobject, description = "_tsne", toplevel = toplevel + 1L + ) return(gobject) } else { return(tsne_clus_pos_DT) } } else if (reduction == "feats") { - message("Not yet implemented") + stop("Not yet implemented") } } @@ -2810,20 +3171,18 @@ runtSNE <- function( #' @param gobject giotto object #' @param spat_unit spatial unit #' @param feat_type feature type -#' @param vars_use If meta_data is dataframe, this defines which variable(s) to -#' remove (character vector). -#' @param do_pca Whether to perform PCA on input matrix. -#' @param expression_values expression values to use +#' @param vars_use character vector. Which variable(s) in metadata +#' for harmony to remove #' @param reduction reduction on cells or features #' @param dim_reduction_to_use use another dimension reduction set as input #' @param dim_reduction_name name of dimension reduction set to use #' @param dimensions_to_use number of dimensions to use as input #' @param name arbitrary name for Harmony run -#' @param feats_to_use if dim_reduction_to_use = NULL, which feats to use #' @param set_seed use of seed #' @param seed_number seed number to use #' @param return_gobject boolean: return giotto object (default = TRUE) -#' @param toplevel_params parameters to extract +#' @param toplevel_params deprecated +#' @param toplevel relative stackframe where call was made from #' @param verbose be verbose #' @param ... additional \code{\link[harmony]{HarmonyMatrix}} parameters #' @returns giotto object with updated Harmony dimension reduction @@ -2834,25 +3193,27 @@ runtSNE <- function( #' #' runGiottoHarmony(g, vars_use = "leiden_clus") #' @export -runGiottoHarmony <- function( - gobject, - spat_unit = NULL, - feat_type = NULL, - vars_use = "list_ID", - do_pca = FALSE, - expression_values = c("normalized", "scaled", "custom"), - reduction = "cells", - dim_reduction_to_use = "pca", - dim_reduction_name = NULL, - dimensions_to_use = 1:10, - name = NULL, - feats_to_use = NULL, - set_seed = TRUE, - seed_number = 1234, - toplevel_params = 2, - return_gobject = TRUE, - verbose = NULL, - ...) { +runGiottoHarmony <- function(gobject, + spat_unit = NULL, + feat_type = NULL, + vars_use = "list_ID", + reduction = "cells", + dim_reduction_to_use = "pca", + dim_reduction_name = NULL, + dimensions_to_use = 1:10, + name = NULL, + set_seed = TRUE, + seed_number = 1234, + toplevel_params = deprecated(), + toplevel = 1L, + return_gobject = TRUE, + verbose = NULL, + ...) { + + toplevel <- deprecate_param( + toplevel_params, toplevel, fun = "runGiottoHarmony", when = "4.1.2" + ) + # verify if optional package is installed package_check(pkg_name = "harmony", repository = "CRAN") @@ -2906,61 +3267,20 @@ runGiottoHarmony <- function( } - - - # set cores to use - # n_threads = determine_cores(cores = n_threads) - - ## using dimension reduction ## - if (!is.null(dim_reduction_to_use)) { - ## TODO: check if reduction exists - matrix_to_use <- get_dimReduction( - gobject = gobject, - spat_unit = spat_unit, - feat_type = feat_type, - reduction = reduction, # set to spat_unit? - reduction_method = dim_reduction_to_use, - name = dim_reduction_name, - output = "dimObj" - ) - provenance <- prov(matrix_to_use) - matrix_to_use <- matrix_to_use[] - - matrix_to_use <- matrix_to_use[, dimensions_to_use] - } else { - ## using original matrix ## - # expression values to be used - values <- match.arg( - expression_values, - unique(c("normalized", "scaled", "custom", expression_values)) - ) - expr_values <- getExpression( - gobject = gobject, - spat_unit = spat_unit, - feat_type = feat_type, - values = values, - output = "exprObj" - ) - - provenance <- prov(expr_values) - expr_values <- expr_values[] # extract matrix - - - ## subset matrix - if (!is.null(feats_to_use)) { - expr_values <- .create_feats_to_use_matrix( - gobject = gobject, - feat_type = feat_type, - spat_unit = spat_unit, - sel_matrix = expr_values, - feats_to_use = feats_to_use, - verbose = verbose - ) - } + matrix_to_use <- getDimReduction( + gobject = gobject, + spat_unit = spat_unit, + feat_type = feat_type, + reduction = reduction, + reduction_method = dim_reduction_to_use, + name = dim_reduction_name, + output = "dimObj" + ) + provenance <- prov(matrix_to_use) + matrix_to_use <- matrix_to_use[] - matrix_to_use <- t_flex(expr_values) - } + matrix_to_use <- matrix_to_use[, dimensions_to_use] # get metadata metadata <- pDataDT(gobject, feat_type = feat_type, spat_unit = spat_unit) @@ -2976,7 +3296,6 @@ runGiottoHarmony <- function( data_mat = matrix_to_use, meta_data = metadata, vars_use = vars_use, - do_pca = do_pca, ... ) @@ -2984,14 +3303,14 @@ runGiottoHarmony <- function( colnames(harmony_results) <- paste0("Dim.", seq_len(ncol(harmony_results))) rownames(harmony_results) <- rownames(matrix_to_use) - harmdimObject <- create_dim_obj( + harmdimObject <- createDimObj( + coordinates = harmony_results, name = name, spat_unit = spat_unit, feat_type = feat_type, - provenance = provenance, + method = "harmony", reduction = "cells", # set to spat_unit? - reduction_method = "harmony", - coordinates = harmony_results, + provenance = provenance, misc = NULL ) @@ -3013,10 +3332,7 @@ runGiottoHarmony <- function( } ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### - gobject <- set_dimReduction( - gobject = gobject, - dimObject = harmdimObject - ) + gobject <- setGiotto(gobject, harmdimObject, verbose = FALSE) ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### @@ -3024,7 +3340,7 @@ runGiottoHarmony <- function( gobject <- update_giotto_params(gobject, description = "_harmony", return_gobject = TRUE, - toplevel = toplevel_params + toplevel = toplevel + 1L ) return(gobject) } else { diff --git a/R/feature_set_enrichment.R b/R/feature_set_enrichment.R index 8bb2568cb..5bfa48277 100644 --- a/R/feature_set_enrichment.R +++ b/R/feature_set_enrichment.R @@ -11,9 +11,8 @@ #' @param output_folder path to which the GSEA results will be saved. Default #' is current working directory. #' @param name_analysis_folder default output subdirectory prefix to which -#' results are saved. -#' Will live within output_folder; equivalent of -#' "Analysis Name" in GSEA Application. +#' results are saved. Will live within output_folder; equivalent of +#' "Analysis Name" in GSEA Application. #' @param collapse only 'false' is supported. This will use your dataset as-is, #' in the original format. #' @param mode option selected in Advanced Field "Collapsing Mode for @@ -50,27 +49,26 @@ #' please reference GSEA's documentation here: #' https://www.gsea-msigdb.org/gsea/doc/GSEAUserGuideTEXT.htm#_Syntax #' @export -doFeatureSetEnrichment <- function( - dryrun = TRUE, - path_to_GSEA = NULL, - GSEA_dataset = NULL, - GSEA_ranked_file = NULL, - output_folder = NULL, - name_analysis_folder = "my_GSEA_analysis", - collapse = "false", - mode = c( - "Abs_max_of_probes", - "Max_probe", - "Median_of_probes", - "Mean_of_probes", - "Sum_of_probes" - ), - norm = "meandiv", - nperm = 1000, - scoring_scheme = "weighted", - plot_top_x = 20, - set_max = 500, - set_min = 15) { +doFeatureSetEnrichment <- function(dryrun = TRUE, + path_to_GSEA = NULL, + GSEA_dataset = NULL, + GSEA_ranked_file = NULL, + output_folder = NULL, + name_analysis_folder = "my_GSEA_analysis", + collapse = "false", + mode = c( + "Abs_max_of_probes", + "Max_probe", + "Median_of_probes", + "Mean_of_probes", + "Sum_of_probes" + ), + norm = "meandiv", + nperm = 1000, + scoring_scheme = "weighted", + plot_top_x = 20, + set_max = 500, + set_min = 15) { # set don't run to false as a start dont_run <- FALSE diff --git a/R/filter.R b/R/filter.R new file mode 100644 index 000000000..1ca2e6177 --- /dev/null +++ b/R/filter.R @@ -0,0 +1,666 @@ +### Filter Values #### + + + + +#' @title filterDistributions +#' @name filterDistributions +#' @description show gene or cell distribution after filtering on expression +#' threshold +#' @param gobject giotto object +#' @param feat_type feature type +#' @param spat_unit spatial unit +#' @param expression_values expression values to use +#' @param method method to create distribution (see details) +#' @param expression_threshold threshold to consider a gene expressed +#' @param detection consider features (e.g. genes) or cells +#' @param plot_type type of plot +#' @param scale_y scale y-axis (e.g. "log"), NULL = no scaling +#' @param nr_bins number of bins for histogram plot +#' @param fill_color fill color for plots +#' @param scale_axis ggplot transformation for axis (e.g. log2) +#' @param axis_offset offset to be used together with the scaling +#' transformation +#' @param show_plot logical. show plot +#' @param return_plot logical. return ggplot object +#' @param save_plot logical. directly save the plot +#' @param save_param list of saving parameters from +#' [GiottoVisuals::all_plots_save_function] +#' @param default_save_name default save name for saving, don't change, +#' change save_name in save_param +#' @returns ggplot object +#' @details +#' There are 3 ways to create a distribution profile and summarize it for +#' either the features or the cells (spatial units) \cr +#' \itemize{ +#' \item{1. threshold: calculate features that cross a thresold (default)} +#' \item{2. sum: summarize the features, i.e. total of a feature} +#' \item{3. mean: calculate mean of the features, i.e. average expression} +#' } +#' @md +#' @examples +#' g <- GiottoData::loadGiottoMini("visium") +#' +#' filterDistributions(g) +#' @export +filterDistributions <- function( + gobject, + feat_type = NULL, + spat_unit = NULL, + expression_values = c("raw", "normalized", "scaled", "custom"), + method = c("threshold", "sum", "mean"), + expression_threshold = 1, + detection = c("feats", "cells"), + plot_type = c("histogram", "violin"), + scale_y = NULL, + nr_bins = 30, + fill_color = "lightblue", + scale_axis = "identity", + axis_offset = 0, + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "filterDistributions") { + # Set feat_type and spat_unit + spat_unit <- set_default_spat_unit( + gobject = gobject, + spat_unit = spat_unit + ) + feat_type <- set_default_feat_type( + gobject = gobject, + spat_unit = spat_unit, + feat_type = feat_type + ) + + # expression values to be used + values <- match.arg( + expression_values, + unique(c("raw", "normalized", "scaled", "custom", expression_values)) + ) + expr_values <- getExpression( + gobject = gobject, + spat_unit = spat_unit, + feat_type = feat_type, + values = values, + output = "matrix" + ) + + # plot distribution for feats or cells + detection <- match.arg(detection, c("feats", "cells")) + + # method to calculate distribution + method <- match.arg(method, c("threshold", "sum", "mean")) + + # plot type + plot_type <- match.arg(plot_type, c("histogram", "violin")) + + # variables + V1 <- NULL + + # for genes + if (detection == "feats") { + if (method == "threshold") { + feat_detection_levels <- data.table::as.data.table( + rowSums_flex(expr_values >= expression_threshold) + ) + mytitle <- "feat detected in # of cells" + } else if (method == "sum") { + feat_detection_levels <- data.table::as.data.table( + rowSums_flex(expr_values) + ) + mytitle <- "total sum of feature detected in all cells" + } else if (method == "mean") { + feat_detection_levels <- data.table::as.data.table( + rowMeans_flex(expr_values) + ) + mytitle <- "average of feature detected in all cells" + } + + y_title <- "count" + if (!is.null(scale_y)) { + feat_detection_levels[, V1 := do.call(what = scale_y, list(V1))] + y_title <- paste(scale_y, y_title) + } + + + + if (plot_type == "violin") { + pl <- ggplot2::ggplot() + pl <- pl + ggplot2::theme_classic() + pl <- pl + ggplot2::geom_violin( + data = feat_detection_levels, + ggplot2::aes(x = "feats", y = V1 + axis_offset), + fill = fill_color + ) + pl <- pl + ggplot2::scale_y_continuous(trans = scale_axis) + pl <- pl + ggplot2::labs(y = mytitle, x = "") + } else if (plot_type == "histogram") { + pl <- ggplot2::ggplot() + pl <- pl + ggplot2::theme_classic() + pl <- pl + ggplot2::geom_histogram( + data = feat_detection_levels, + ggplot2::aes(x = V1 + axis_offset), + color = "white", bins = nr_bins, fill = fill_color + ) + pl <- pl + ggplot2::scale_x_continuous(trans = scale_axis) + pl <- pl + ggplot2::labs(x = mytitle, y = y_title) + } + + # for cells + } else if (detection == "cells") { + if (method == "threshold") { + cell_detection_levels <- data.table::as.data.table( + colSums_flex(expr_values >= expression_threshold) + ) + mytitle <- "feats detected per cell" + } else if (method == "sum") { + cell_detection_levels <- data.table::as.data.table( + colSums_flex(expr_values) + ) + mytitle <- "total features per cell" + } else if (method == "mean") { + cell_detection_levels <- data.table::as.data.table( + colMeans_flex(expr_values) + ) + mytitle <- "average number of features per cell" + } + + y_title <- "count" + if (!is.null(scale_y)) { + cell_detection_levels[, V1 := do.call(what = scale_y, list(V1))] + y_title <- paste(scale_y, y_title) + } + + + + if (plot_type == "violin") { + pl <- ggplot2::ggplot() + pl <- pl + ggplot2::theme_classic() + pl <- pl + ggplot2::geom_violin( + data = cell_detection_levels, + ggplot2::aes(x = "cells", y = V1 + axis_offset), + fill = fill_color + ) + pl <- pl + ggplot2::scale_y_continuous(trans = scale_axis) + pl <- pl + ggplot2::labs(y = mytitle, x = "") + } else if (plot_type == "histogram") { + pl <- ggplot2::ggplot() + pl <- pl + ggplot2::theme_classic() + pl <- pl + ggplot2::geom_histogram( + data = cell_detection_levels, + ggplot2::aes(x = V1 + axis_offset), + color = "white", bins = nr_bins, fill = fill_color + ) + pl <- pl + ggplot2::scale_x_continuous(trans = scale_axis) + pl <- pl + ggplot2::labs(x = mytitle, y = y_title) + } + } + + return(GiottoVisuals::plot_output_handler( + gobject = gobject, + plot_object = pl, + save_plot = save_plot, + return_plot = return_plot, + show_plot = show_plot, + default_save_name = default_save_name, + save_param = save_param, + else_return = NULL + )) +} + + + +#' @title filterCombinations +#' @name filterCombinations +#' @description Shows how many genes and cells are lost with combinations of +#' thresholds. +#' @inheritParams data_access_params +#' @inheritParams plot_output_params +#' @param expression_values expression values to use +#' @param expression_thresholds all thresholds to consider a gene expressed +#' @param feat_det_in_min_cells minimum # of cells that need to express a +#' feature +#' @param min_det_feats_per_cell minimum # of features that need to be +#' detected in a cell +#' @param scale_x_axis ggplot transformation for x-axis (e.g. log2) +#' @param x_axis_offset x-axis offset to be used together with the scaling +#' transformation +#' @param scale_y_axis ggplot transformation for y-axis (e.g. log2) +#' @param y_axis_offset y-axis offset to be used together with the scaling +#' transformation +#' @returns list of data.table and ggplot object +#' @details Creates a scatterplot that visualizes the number of genes and +#' cells that are lost with a specific combination of a gene and cell +#' threshold given an arbitrary cutoff to call a gene expressed. This function +#' can be used to make an informed decision at the filtering step with +#' filterGiotto. +#' @examples +#' g <- GiottoData::loadGiottoMini("visium") +#' +#' filterCombinations(g) +#' @export +filterCombinations <- function( + gobject, + feat_type = NULL, + spat_unit = NULL, + expression_values = c("raw", "normalized", "scaled", "custom"), + expression_thresholds = c(1, 2), + feat_det_in_min_cells = c(5, 50), + min_det_feats_per_cell = c(200, 400), + scale_x_axis = "identity", + x_axis_offset = 0, + scale_y_axis = "identity", + y_axis_offset = 0, + show_plot = TRUE, + return_plot = FALSE, + save_plot = NULL, + save_param = list(), + default_save_name = "filterCombinations") { + # Set feat_type and spat_unit + spat_unit <- set_default_spat_unit( + gobject = gobject, + spat_unit = spat_unit + ) + feat_type <- set_default_feat_type( + gobject = gobject, + spat_unit = spat_unit, + feat_type = feat_type + ) + + + # expression values to be used + values <- match.arg( + expression_values, + unique(c("raw", "normalized", "scaled", "custom", expression_values)) + ) + expr_values <- getExpression( + gobject = gobject, + spat_unit = spat_unit, + feat_type = feat_type, + values = values + )[] + + # feat and cell minimums need to have the same length + if (length(feat_det_in_min_cells) != length(min_det_feats_per_cell)) { + stop("\n feat_det_in_min_cells and min_det_feats_per_cell need to be + the same size \n") + } + + # compute the number of removed feats and cells + result_list <- list() + for (thresh_i in seq_along(expression_thresholds)) { + threshold <- expression_thresholds[thresh_i] + + det_feats_res <- list() + det_cells_res <- list() + for (combn_i in seq_along(feat_det_in_min_cells)) { + min_cells_for_feat <- feat_det_in_min_cells[combn_i] + min_feats_per_cell <- min_det_feats_per_cell[combn_i] + + + # first remove feats + filter_index_feats <- rowSums_flex( + expr_values >= threshold + ) >= min_cells_for_feat + removed_feats <- length(filter_index_feats[ + filter_index_feats == FALSE + ]) + det_cells_res[[combn_i]] <- removed_feats + + # then remove cells + filter_index_cells <- colSums_flex(expr_values[ + filter_index_feats, + ] >= threshold) >= min_feats_per_cell + removed_cells <- length(filter_index_cells[ + filter_index_cells == FALSE + ]) + det_feats_res[[combn_i]] <- removed_cells + } + + temp_dt <- data.table::data.table( + "threshold" = threshold, + removed_feats = unlist(det_cells_res), + removed_cells = unlist(det_feats_res) + ) + + result_list[[thresh_i]] <- temp_dt + } + + result_DT <- do.call("rbind", result_list) + + # data.table variables + feat_detected_in_min_cells <- min_detected_feats_per_cell <- + combination <- NULL + + result_DT[["feat_detected_in_min_cells"]] <- feat_det_in_min_cells + result_DT[["min_detected_feats_per_cell"]] <- min_det_feats_per_cell + result_DT[["combination"]] <- paste0( + result_DT$feat_detected_in_min_cells, "-", + result_DT$min_detected_feats_per_cell + ) + + result_DT <- result_DT[, .( + threshold, + feat_detected_in_min_cells, + min_detected_feats_per_cell, + combination, + removed_feats, + removed_cells + )] + + maximum_x_value <- max(result_DT[["removed_cells"]], na.rm = TRUE) + maximum_y_value <- max(result_DT[["removed_feats"]], na.rm = TRUE) + + pl <- ggplot2::ggplot() + pl <- pl + ggplot2::theme_classic() + pl <- pl + ggplot2::geom_line(data = result_DT, aes( + x = removed_cells + x_axis_offset, + y = removed_feats + y_axis_offset, + group = as.factor(threshold) + ), linetype = 2) + pl <- pl + ggplot2::geom_point(data = result_DT, aes( + x = removed_cells + x_axis_offset, + y = removed_feats + y_axis_offset, + color = as.factor(threshold) + )) + pl <- pl + scale_color_discrete( + guide = guide_legend(title = "threshold(s)") + ) + pl <- pl + geom_text_repel(data = result_DT, aes( + x = removed_cells + x_axis_offset, + y = removed_feats + y_axis_offset, + label = combination + )) + pl <- pl + ggplot2::scale_x_continuous( + trans = scale_x_axis, limits = c(0, maximum_x_value) + ) + pl <- pl + ggplot2::scale_y_continuous( + trans = scale_y_axis, limits = c(0, maximum_y_value) + ) + pl <- pl + ggplot2::labs( + x = "number of removed cells", y = "number of removed feats" + ) + + + return(plot_output_handler( + gobject = gobject, + plot_object = pl, + save_plot = save_plot, + return_plot = return_plot, + show_plot = show_plot, + default_save_name = default_save_name, + save_param = save_param, + else_return = list(results = result_DT, ggplot = pl) + )) +} + + +#' @title filterGiotto +#' @name filterGiotto +#' @description filter Giotto object based on expression threshold +#' @param gobject giotto object +#' @param spat_unit character. spatial unit. If more than one is provided then +#' the first will be filtered, the filtering results will be applied across the +#' other spat_units provided +#' @param feat_type character. feature type. If more than one is provided then +#' the first will be filtered, the filtering results will be applied across the +#' other feat_types provided. +#' @param expression_values expression values to use +#' @param expression_threshold threshold to consider a gene expressed +#' @param feat_det_in_min_cells minimum # of cells that need to express a +#' feature +#' @param min_det_feats_per_cell minimum # of features that need to be detected +#' in a cell +#' @param all_spat_units deprecated. Use spat_unit_fsub = ":all:" +#' @param all_feat_types deprecated. Use feat_type_ssub = ":all:" +#' @param spat_unit_fsub character vector. (default = ':all:') limit features +#' to remove results to selected spat_units +#' @param feat_type_ssub character vector. (default = ':all:') limit cells to +#' remove results to selected feat_types +#' @param poly_info polygon information to use +#' @param tag_cells tag filtered cells in metadata vs. remove cells +#' @param tag_cell_name column name for tagged cells in metadata +#' @param tag_feats tag features in metadata vs. remove features +#' @param tag_feats_name column name for tagged features in metadata +#' @param verbose verbose +#' +#' @returns giotto object +#' @details The function \code{\link{filterCombinations}} can be used to +#' explore the effect of different parameter values. +#' Please note that this function filters data in a predefined order, features, +#' then cells. +#' After filtering in this order, certain features may be left over in the +#' metadata with a corresponding number of cells which is less than that of +#' the threshold value of cells, +#' feat_det_in_min_cells. This behavior is explained in detail here: +#' \url{https://github.com/drieslab/Giotto/issues/500#issuecomment-1396083446} +#' @examples +#' g <- GiottoData::loadGiottoMini("visium") +#' +#' filterGiotto(g) +#' @export +filterGiotto <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + expression_values = c("raw", "normalized", "scaled", "custom"), + expression_threshold = 1, + feat_det_in_min_cells = 100, + min_det_feats_per_cell = 100, + spat_unit_fsub = ":all:", + feat_type_ssub = ":all:", + all_spat_units = NULL, + all_feat_types = NULL, + poly_info = NULL, + tag_cells = FALSE, + tag_cell_name = "tag", + tag_feats = FALSE, + tag_feats_name = "tag", + verbose = TRUE) { + # data.table vars + cell_ID <- feat_ID <- NULL + + # handle deprecations + if (!is.null(all_spat_units)) { + if (all_spat_units) { + spat_unit_fsub <- ":all:" + } else { + spat_unit_fsub <- spat_unit + } + + warning(wrap_txt( + 'filterGiotto: + all_spat_units param is deprecated. + Please use spat_unit_fsub = \":all:\" instead. (this is the default)' + )) + } + if (!is.null(all_feat_types)) { + if (all_feat_types) { + feat_type_ssub <- ":all:" + } else { + feat_type_ssub <- feat_type + } + + warning(wrap_txt( + 'filterGiotto: all_feat_types param is deprecated. + Please use feat_type_ssub = \":all:\" instead. + (this is the default)' + )) + } + + + # Set feat_type and spat_unit + spat_unit <- set_default_spat_unit( + gobject = gobject, + spat_unit = spat_unit + ) + feat_type <- set_default_feat_type( + gobject = gobject, + spat_unit = spat_unit, + feat_type = feat_type + ) + # set poly_info + if (is.null(poly_info)) { + poly_info <- spat_unit + } + + if (verbose && length(spat_unit) > 1L) { + wrap_msg( + "More than one spat_unit provided.\n", + paste0("[", spat_unit[[1L]], "]"), + "filtering results will be applied across spat_units:", spat_unit + ) + } + if (verbose && length(feat_type) > 1L) { + wrap_msg( + "More than one feat_type provided.\n", + paste0("[", feat_type[[1L]], "]"), + "filtering results will be applied across spat_units:", feat_type + ) + } + + + # expression values to be used + values <- match.arg( + expression_values, + unique(c("raw", "normalized", "scaled", "custom", expression_values)) + ) + + # get expression values to perform filtering on + # Only the first spat_unit and feat_type provided are filtered. + # IF there are additional spat_units and feat_types provided, then the + # filtering + # results from this round will be applied to the other provided spat_units + # and feat_types as well. + expr_values <- getExpression( + gobject = gobject, + spat_unit = spat_unit[[1L]], + feat_type = feat_type[[1L]], + values = values, + output = "matrix" + ) + + # approach: + # 1. first remove genes that are not frequently detected + # 2. then remove cells that do not have sufficient detected genes + + ## filter features + filter_index_feats <- rowSums_flex( + expr_values >= expression_threshold + ) >= feat_det_in_min_cells + selected_feat_ids <- names(filter_index_feats[filter_index_feats == TRUE]) + + + + ## filter cells + filter_index_cells <- colSums_flex(expr_values[ + filter_index_feats, + ] >= expression_threshold) >= min_det_feats_per_cell + selected_cell_ids <- names(filter_index_cells[filter_index_cells == TRUE]) + + + + # update cell metadata + if (isTRUE(tag_cells)) { + cell_meta <- getCellMetadata(gobject = gobject, copy_obj = TRUE) + cell_meta[][, c(tag_cell_name) := ifelse( + cell_ID %in% selected_cell_ids, 0, 1 + )] + gobject <- setCellMetadata( + gobject = gobject, x = cell_meta, initialize = FALSE + ) + + # set selected cells back to all cells + selected_cell_ids <- names(filter_index_cells) + } + + if (isTRUE(tag_feats)) { + feat_meta <- getFeatureMetadata(gobject = gobject, copy_obj = TRUE) + feat_meta[][, c(tag_feats_name) := ifelse( + feat_ID %in% selected_feat_ids, 0, 1 + )] + gobject <- setFeatureMetadata( + gobject = gobject, x = feat_meta, initialize = FALSE + ) + + # set selected feats back to all feats + selected_feat_ids <- names(filter_index_feats) + } + + + + # update feature metadata + newGiottoObject <- subsetGiotto( + gobject = gobject, + feat_type = feat_type, + spat_unit = spat_unit, + cell_ids = selected_cell_ids, + feat_ids = selected_feat_ids, + spat_unit_fsub = spat_unit_fsub, + feat_type_ssub = feat_type_ssub, + poly_info = poly_info, + verbose = verbose + ) + + ## print output ## + removed_feats <- length(filter_index_feats[filter_index_feats == FALSE]) + total_feats <- length(filter_index_feats) + + removed_cells <- length(filter_index_cells[filter_index_cells == FALSE]) + total_cells <- length(filter_index_cells) + + if (isTRUE(verbose)) { + cat("\n") + cat("Feature type: ", feat_type, "\n") + + if (isTRUE(tag_cells)) { + cat( + "Number of cells tagged: ", removed_cells, " out of ", + total_cells, "\n" + ) + } else { + cat( + "Number of cells removed: ", removed_cells, " out of ", + total_cells, "\n" + ) + } + + if (isTRUE(tag_feats)) { + cat( + "Number of feats tagged: ", removed_feats, " out of ", + total_feats, "\n" + ) + } else { + cat( + "Number of feats removed: ", removed_feats, " out of ", + total_feats, "\n" + ) + } + } + + + ## update parameters used ## + + # Do not update downstream of processGiotto + # Parameters will be updated within processGiotto + try( + { + upstream_func <- sys.call(-2) + fname <- as.character(upstream_func[[1]]) + if (fname == "processGiotto") { + return(newGiottoObject) + } + }, + silent = TRUE + ) + + + # If this function call is not downstream of processGiotto, update normally + newGiottoObject <- update_giotto_params( + newGiottoObject, + description = "_filter" + ) + + return(newGiottoObject) +} diff --git a/R/general_help.R b/R/general_help.R index c10ae4ffb..c993db7e0 100644 --- a/R/general_help.R +++ b/R/general_help.R @@ -7,9 +7,8 @@ #' @description calculate gini coefficient #' @keywords internal #' @returns gini coefficient -mygini_fun <- function( - x, - weights = rep(1, length(x))) { +mygini_fun <- function(x, + weights = rep(1, length(x))) { # adapted from R package GiniWegNeg dataset <- cbind(x, weights) ord_x <- order(x) @@ -37,10 +36,9 @@ mygini_fun <- function( #' @description calculate gini coefficient on a minimum length vector #' @keywords internal #' @returns gini coefficient -extended_gini_fun <- function( - x, - weights = rep(1, length = length(x)), - minimum_length = 16) { +extended_gini_fun <- function(x, + weights = rep(1, length = length(x)), + minimum_length = 16) { if (length(x) < minimum_length) { difference <- minimum_length - length(x) min_value <- min(x) @@ -59,11 +57,10 @@ extended_gini_fun <- function( #' @description create binarized scores from a vector using kmeans #' @returns numeric #' @keywords internal -.kmeans_binarize <- function( - x, - nstart = 3, - iter.max = 10, - seed = NULL) { +.kmeans_binarize <- function(x, + nstart = 3, + iter.max = 10, + seed = NULL) { if (!is.null(seed)) { on.exit(random_seed(), add = TRUE) set.seed(seed) @@ -134,12 +131,11 @@ extended_gini_fun <- function( #' kmeans_arma #' @returns numeric #' @keywords internal -.kmeans_arma_subset_binarize <- function( - x, - n_iter = 5, - extreme_nr = 20, - sample_nr = 200, - seed = NULL) { +.kmeans_arma_subset_binarize <- function(x, + n_iter = 5, + extreme_nr = 20, + sample_nr = 200, + seed = NULL) { length_x <- length(x) vector_x <- sort(x) @@ -190,14 +186,15 @@ extended_gini_fun <- function( #' @description wrapper for different binarization functions #' @returns matrix #' @keywords internal -kmeans_binarize_wrapper <- function(expr_values, - subset_feats = NULL, - kmeans_algo = c("kmeans", "kmeans_arma", "kmeans_arma_subset"), - nstart = 3, - iter_max = 10, - extreme_nr = 50, - sample_nr = 50, - seed = NULL) { +kmeans_binarize_wrapper <- function( + expr_values, + subset_feats = NULL, + kmeans_algo = c("kmeans", "kmeans_arma", "kmeans_arma_subset"), + nstart = 3, + iter_max = 10, + extreme_nr = 50, + sample_nr = 50, + seed = NULL) { # expression values if (!is.null(subset_feats)) { expr_values <- expr_values[rownames(expr_values) %in% subset_feats, ] @@ -254,10 +251,9 @@ kmeans_binarize_wrapper <- function(expr_values, #' @description wrapper for rank binarization function #' @returns matrix #' @keywords internal -rank_binarize_wrapper <- function( - expr_values, - subset_feats = NULL, - percentage_rank = 30) { +rank_binarize_wrapper <- function(expr_values, + subset_feats = NULL, + percentage_rank = 30) { # expression values if (!is.null(subset_feats)) { expr_values <- expr_values[rownames(expr_values) %in% subset_feats, ] @@ -276,44 +272,50 @@ rank_binarize_wrapper <- function( #' @title writeChatGPTqueryDEG #' @name writeChatGPTqueryDEG -#' @description This function writes a query as a .txt file that can be used with -#' ChatGPT or a similar LLM service to find the most likely cell types based on the -#' top differential expressed genes (DEGs) between identified clusters. -#' @param DEG_output the output format from the differenetial expression functions +#' @description This function writes a query as a .txt file that can be used +#' with ChatGPT or a similar LLM service to find the most likely cell types +#' based on the top differential expressed genes (DEGs) between identified +#' clusters. +#' @param DEG_output the output format from the differential expression +#' functions #' @param top_n_genes number of genes for each cluster #' @param tissue_type tissue type #' @param folder_name path to the folder where you want to save the .txt file #' @param file_name name of .txt file #' @returns writes a .txt file to the desired location -#' @details This function does not run any LLM service. It simply creates the .txt -#' file that can then be used any LLM service (e.g. OpenAI, Gemini, ...) +#' @details This function does not run any LLM service. It simply creates the +#' .txt file that can then be used any LLM service (e.g. OpenAI, Gemini, ...) #' @export -writeChatGPTqueryDEG = function(DEG_output, - top_n_genes = 10, - tissue_type = 'human breast cancer', - folder_name = getwd(), - file_name = 'chatgpt_query.txt') { - - chatgpt_query = paste0("Identify cell types of ", tissue_type, " tissue using the following markers. Identify one cell type for each row. Only provide the cell type name and the marker genes used for cell type identification.") - - selected_DEG_output = DEG_output[, head(.SD, top_n_genes), by="cluster"] - - finallist = list() - finallist[[1]] = chatgpt_query - - for(clus in unique(selected_DEG_output$cluster)) { - x = selected_DEG_output[cluster == clus][['feats']] - x = c(clus, x) - finallist[[as.numeric(clus)+1]] = x - } - - outputdt = data.table::data.table(finallist) +writeChatGPTqueryDEG <- function(DEG_output, + top_n_genes = 10, + tissue_type = "human breast cancer", + folder_name = getwd(), + file_name = "chatgpt_query.txt") { + chatgpt_query <- paste0( + "Identify cell types of ", tissue_type, + " tissue using the following markers. Identify one cell type for each + row. Only provide the cell type name and the marker genes used for cell + type identification.") + + selected_DEG_output <- DEG_output[, head(.SD, top_n_genes), by = "cluster"] + + finallist <- list() + finallist[[1]] <- chatgpt_query + + for (clus in unique(selected_DEG_output$cluster)) { + x <- selected_DEG_output[cluster == clus][["feats"]] + x <- c(clus, x) + finallist[[as.numeric(clus) + 1]] <- x + } - cat('\n start writing \n') - data.table::fwrite(x = outputdt, - file = paste0(folder_name,'/', file_name), - sep2 = c(""," ",""), col.names = F) + outputdt <- data.table::data.table(finallist) + cat("\n start writing \n") + data.table::fwrite( + x = outputdt, + file = paste0(folder_name, "/", file_name), + sep2 = c("", " ", ""), col.names = FALSE + ) } @@ -330,9 +332,8 @@ writeChatGPTqueryDEG = function(DEG_output, #' @returns expression matrix with gene symbols as rownames #' @details This function requires that the biomaRt library is installed #' @export -convertEnsemblToGeneSymbol <- function( - matrix, - species = c("mouse", "human")) { +convertEnsemblToGeneSymbol <- function(matrix, + species = c("mouse", "human")) { # data.table: set global variable dupes <- mgi_symbol <- gene_symbol <- ensembl_gene_id <- hgnc_symbol <- NULL @@ -445,16 +446,17 @@ convertEnsemblToGeneSymbol <- function( #' @name gpoly_from_dfr_smoothed_wrapped #' @returns giottoPolygon #' @keywords internal -gpoly_from_dfr_smoothed_wrapped <- function(segmdfr, - name = "cell", - calc_centroids = FALSE, - smooth_polygons = FALSE, - vertices = 20L, - k = 3L, - set_neg_to_zero = TRUE, - skip_eval_dfr = FALSE, - copy_dt = TRUE, - verbose = TRUE) { +gpoly_from_dfr_smoothed_wrapped <- function( + segmdfr, + name = "cell", + calc_centroids = FALSE, + smooth_polygons = FALSE, + vertices = 20L, + k = 3L, + set_neg_to_zero = TRUE, + skip_eval_dfr = FALSE, + copy_dt = TRUE, + verbose = TRUE) { gpoly <- createGiottoPolygonsFromDfr( segmdfr = segmdfr, name = name, @@ -514,11 +516,10 @@ gpoly_from_dfr_smoothed_wrapped <- function(segmdfr, #' annotations are provided (e.g. ensembl gene ids and gene symbols) the user #' can select another column. #' @export -get10Xmatrix <- function( - path_to_data, - gene_column_index = 1, - remove_zero_rows = TRUE, - split_by_type = TRUE) { +get10Xmatrix <- function(path_to_data, + gene_column_index = 1, + remove_zero_rows = TRUE, + split_by_type = TRUE) { # data.table variables total <- gene_symbol <- gene_id <- gene_id_num <- cell_id <- cell_id_num <- sort_gene_id_num <- NULL @@ -605,11 +606,10 @@ get10Xmatrix <- function( #' (e.g. expression vs QC probes) or modalities (e.g. RNA and protein), and #' \code{split_by_type} param is \code{TRUE}, multiple matrices will be returned #' @export -get10Xmatrix_h5 <- function( - path_to_data, - gene_ids = c("symbols", "ensembl"), - remove_zero_rows = TRUE, - split_by_type = TRUE) { +get10Xmatrix_h5 <- function(path_to_data, + gene_ids = c("symbols", "ensembl"), + remove_zero_rows = TRUE, + split_by_type = TRUE) { ## function inspired by and modified from the VISION package ## see read_10x_h5_v3 in ## https://github.com/YosefLab/VISION/blob/master/R/Utilities.R @@ -736,15 +736,15 @@ get10Xmatrix_h5 <- function( #' @param \dots additional params to pass to #' `[GiottoClass::createGiottoLargeImage]` #' @md +#' @returns 10xAffineImage #' @export -read10xAffineImage <- function( - file, imagealignment_path, name = "aligned_image", micron = 0.2125, ... -) { +read10xAffineImage <- function(file, imagealignment_path, + name = "aligned_image", micron = 0.2125, ...) { checkmate::assert_file_exists(file) checkmate::assert_file_exists(imagealignment_path) if (!is.numeric(micron)) { checkmate::assert_file_exists(micron) - micron <- jsonlite::read_json(micron)$pixel_size + micron <- read_json(micron)$pixel_size } aff <- data.table::fread(imagealignment_path) %>% @@ -761,7 +761,7 @@ read10xAffineImage <- function( x %>% affine(affine[seq(2), seq(2)]) %>% rescale(micron, x0 = 0, y0 = 0) %>% - spatShift(dx = affine[1,3] * micron, dy = -affine[2,3] * micron) + spatShift(dx = affine[1, 3] * micron, dy = -affine[2, 3] * micron) } @@ -794,19 +794,18 @@ read10xAffineImage <- function( #' @details Set H5Fopen_flags to "H5F_ACC_RDONLY" if you encounter permission #' issues. #' @export -readPolygonFilesVizgenHDF5_old <- function( - boundaries_path, - fovs = NULL, - polygon_feat_types = 0:6, - custom_polygon_names = NULL, - flip_x_axis = FALSE, - flip_y_axis = FALSE, - smooth_polygons = TRUE, - smooth_vertices = 60, - set_neg_to_zero = FALSE, - H5Fopen_flags = "H5F_ACC_RDWR", - cores = NA, - verbose = TRUE) { +readPolygonFilesVizgenHDF5_old <- function(boundaries_path, + fovs = NULL, + polygon_feat_types = 0:6, + custom_polygon_names = NULL, + flip_x_axis = FALSE, + flip_y_axis = FALSE, + smooth_polygons = TRUE, + smooth_vertices = 60, + set_neg_to_zero = FALSE, + H5Fopen_flags = "H5F_ACC_RDWR", + cores = NA, + verbose = TRUE) { # necessary pkgs package_check(pkg_name = "rhdf5", repository = "Bioc") @@ -859,8 +858,8 @@ readPolygonFilesVizgenHDF5_old <- function( # append data from all FOVs to single list init <- proc.time() - progressr::with_progress({ - pb <- progressr::progressor(along = hdf5_boundary_selected_list) + with_pbar({ + pb <- pbar(along = hdf5_boundary_selected_list) read_list <- lapply_flex(seq_along(hdf5_boundary_selected_list), cores = cores, future.packages = c("rhdf5", "Rhdf5lib"), @@ -933,8 +932,8 @@ readPolygonFilesVizgenHDF5_old <- function( # create Giotto polygons and add them to gobject - progressr::with_progress({ - pb <- progressr::progressor(along = result_list_rbind) + with_pbar({ + pb <- pbar(along = result_list_rbind) smooth_cell_polygons_list <- lapply_flex(seq_along(result_list_rbind), cores = cores, function(i) { dfr_subset <- result_list_rbind[[i]][, .(x, y, cell_id)] @@ -1001,25 +1000,24 @@ readPolygonFilesVizgenHDF5_old <- function( #' @details Set H5Fopen_flags to "H5F_ACC_RDONLY" if you encounter permission #' issues. #' @export -readPolygonFilesVizgenHDF5 <- function( - boundaries_path, - fovs = NULL, - z_indices = 1L:7L, - segm_to_use = 1L, - custom_polygon_names = NULL, - flip_x_axis = FALSE, - flip_y_axis = TRUE, - calc_centroids = FALSE, - smooth_polygons = TRUE, - smooth_vertices = 60L, - set_neg_to_zero = FALSE, - H5Fopen_flags = "H5F_ACC_RDWR", - cores = determine_cores(), - create_gpoly_parallel = TRUE, - create_gpoly_bin = FALSE, - verbose = TRUE, - output = c("giottoPolygon", "data.table"), - polygon_feat_types = NULL) { +readPolygonFilesVizgenHDF5 <- function(boundaries_path, + fovs = NULL, + z_indices = 1L:7L, + segm_to_use = 1L, + custom_polygon_names = NULL, + flip_x_axis = FALSE, + flip_y_axis = TRUE, + calc_centroids = FALSE, + smooth_polygons = TRUE, + smooth_vertices = 60L, + set_neg_to_zero = FALSE, + H5Fopen_flags = "H5F_ACC_RDWR", + cores = determine_cores(), + create_gpoly_parallel = TRUE, + create_gpoly_bin = FALSE, + verbose = TRUE, + output = c("giottoPolygon", "data.table"), + polygon_feat_types = NULL) { # necessary pkgs package_check(pkg_name = "rhdf5", repository = "Bioc") @@ -1074,8 +1072,8 @@ readPolygonFilesVizgenHDF5 <- function( # append data from all FOVs to single list init <- Sys.time() - progressr::with_progress({ - pb <- progressr::progressor(length(hdf5_boundary_selected_list) / 5) + with_pbar({ + pb <- pbar(length(hdf5_boundary_selected_list) / 5) read_list <- lapply_flex(seq_along(hdf5_boundary_selected_list), future.packages = c("rhdf5", "Rhdf5lib"), function(init, z_indices, segm_to_use, bound_i) { @@ -1147,16 +1145,15 @@ readPolygonFilesVizgenHDF5 <- function( #' @keywords internal #' @noRd -.create_giotto_polygons_vizgen <- function( - z_read_DT, - poly_names = names(z_read_DT), - set_neg_to_zero = FALSE, - calc_centroids = FALSE, - smooth_polygons = TRUE, - smooth_vertices = 60L, - create_gpoly_parallel = TRUE, - create_gpoly_bin = FALSE, - verbose = TRUE) { +.create_giotto_polygons_vizgen <- function(z_read_DT, + poly_names = names(z_read_DT), + set_neg_to_zero = FALSE, + calc_centroids = FALSE, + smooth_polygons = TRUE, + smooth_vertices = 60L, + create_gpoly_parallel = TRUE, + create_gpoly_bin = FALSE, + verbose = TRUE) { checkmate::assert_list(z_read_DT) checkmate::assert_numeric(smooth_vertices) @@ -1167,8 +1164,8 @@ readPolygonFilesVizgenHDF5 <- function( # **** sequential method **** if (!isTRUE(create_gpoly_parallel)) { - progressr::with_progress({ - pb <- progressr::progressor(along = z_read_DT) + with_pbar({ + pb <- pbar(along = z_read_DT) smooth_cell_polygons_list <- lapply( seq_along(z_read_DT), function(i) { dfr_subset <- z_read_DT[[i]][, .(x, y, cell_id)] @@ -1193,7 +1190,8 @@ readPolygonFilesVizgenHDF5 <- function( ) } if (isTRUE(calc_centroids)) { - # NOTE: won't recalculate if centroids are already attached + # NOTE: won't recalculate if centroids are already + # attached cell_polygons <- centroids( cell_polygons, append_gpolygon = TRUE @@ -1213,8 +1211,8 @@ readPolygonFilesVizgenHDF5 <- function( # **** parallel methods **** # no binning if (!is.numeric(create_gpoly_bin)) { - progressr::with_progress({ - pb <- progressr::progressor(along = z_read_DT) + with_pbar({ + pb <- pbar(along = z_read_DT) smooth_cell_polygons_list <- lapply_flex( seq_along(z_read_DT), future.packages = c("terra", "stats", "data.table"), @@ -1282,8 +1280,8 @@ readPolygonFilesVizgenHDF5 <- function( bin_steps <- sum(unlist(lapply(dfr_subset, length))) - progressr::with_progress({ - pb <- progressr::progressor(steps = bin_steps) + with_pbar({ + pb <- pbar(steps = bin_steps) smooth_cell_polygons_list <- lapply( # sequential across z index seq_along(dfr_subset), function(i) { @@ -1355,10 +1353,11 @@ readPolygonFilesVizgenHDF5 <- function( #' @param verbose be verbose #' @returns giottoPolygons #' @export -readPolygonVizgenParquet <- function(file, - z_index = "all", - calc_centroids = TRUE, - verbose = TRUE) { +readPolygonVizgenParquet <- function( + file, + z_index = "all", + calc_centroids = TRUE, + verbose = TRUE) { # package checks package_check("arrow") package_check("sf") @@ -1468,18 +1467,17 @@ readPolygonVizgenParquet <- function(file, #' @returns giotto object or cell polygons list #' @seealso \code{\link{smoothGiottoPolygons}} #' @export -readPolygonFilesVizgen <- function( - gobject, - boundaries_path, - fovs = NULL, - polygon_feat_types = 0:6, - flip_x_axis = FALSE, - flip_y_axis = FALSE, - smooth_polygons = TRUE, - smooth_vertices = 60, - set_neg_to_zero = FALSE, - return_gobject = TRUE, - verbose = TRUE) { +readPolygonFilesVizgen <- function(gobject, + boundaries_path, + fovs = NULL, + polygon_feat_types = 0:6, + flip_x_axis = FALSE, + flip_y_axis = FALSE, + smooth_polygons = TRUE, + smooth_vertices = 60, + set_neg_to_zero = FALSE, + return_gobject = TRUE, + verbose = TRUE) { # define names poly_feat_names <- paste0("z", polygon_feat_types) poly_feat_indexes <- paste0("zIndex_", polygon_feat_types) @@ -1524,11 +1522,10 @@ readPolygonFilesVizgen <- function( #' @describeIn readPolygonFilesVizgen (internal) Optimized .hdf5 reading for #' vizgen merscope output. Returns a data.table of xyz coords and cell_id #' @keywords internal -.h5_read_vizgen <- function( - h5File, - z_indices = 1L:7L, - segm_to_use = "p_0", - H5Fopen_flags = "H5F_ACC_RDWR") { +.h5_read_vizgen <- function(h5File, + z_indices = 1L:7L, + segm_to_use = "p_0", + H5Fopen_flags = "H5F_ACC_RDWR") { # data.table vars group <- name <- cell <- z_name <- otype <- d_name <- cell_id <- NULL @@ -1629,9 +1626,8 @@ readPolygonFilesVizgen <- function( #' @param bin_size bin size to select from .gef file #' @returns transcript with coordinates #' @export -getGEFtxCoords <- function( - gef_file, - bin_size = "bin100") { +getGEFtxCoords <- function(gef_file, + bin_size = "bin100") { # data.table vars genes <- NULL diff --git a/R/giotto_viewer.R b/R/giotto_viewer.R index c21f66f75..ea8d84b41 100644 --- a/R/giotto_viewer.R +++ b/R/giotto_viewer.R @@ -6,10 +6,9 @@ #' @param output_directory directory where to save the files #' @returns write a .txt and .annot file for the selection annotation #' @keywords internal -write_giotto_viewer_annotation <- function( - annotation, - annot_name = "test", - output_directory = getwd()) { +write_giotto_viewer_annotation <- function(annotation, + annot_name = "test", + output_directory = getwd()) { if (is.numeric(annotation) == TRUE) { # annotation information and mapping sorted_unique_numbers <- sort(unique(annotation)) @@ -27,7 +26,8 @@ write_giotto_viewer_annotation <- function( names(uniq_factor_num_converter) <- uniq_factors # annotation information and mapping - annot_map <- data.table::data.table(num = uniq_numerics, fac = uniq_factors) + annot_map <- data.table::data.table( + num = uniq_numerics, fac = uniq_factors) annot_information <- uniq_factor_num_converter[annotation] } @@ -50,16 +50,16 @@ write_giotto_viewer_annotation <- function( #' @title write_giotto_viewer_numeric_annotation -#' @description write out numeric annotation data from a giotto object for the Viewer +#' @description write out numeric annotation data from a giotto object for the +#' Viewer #' @param annotation annotation from the data.table from giotto object #' @param annot_name name of the annotation #' @param output_directory directory where to save the files #' @returns write a .txt and .annot file for the selection annotation #' @keywords internal -write_giotto_viewer_numeric_annotation <- function( - annotation, - annot_name = "test", - output_directory = getwd()) { +write_giotto_viewer_numeric_annotation <- function(annotation, + annot_name = "test", + output_directory = getwd()) { # write to output directory annot_inf_map <- paste0(annot_name, "_num_annot_information", ".txt") write.table(annotation, @@ -73,7 +73,8 @@ write_giotto_viewer_numeric_annotation <- function( #' @title write_giotto_viewer_dim_reduction -#' @description write out dimensional reduction data from a giotto object for the Viewer +#' @description write out dimensional reduction data from a giotto object for +#' the Viewer #' @param dim_reduction_cell dimension reduction slot from giotto object #' @param dim_red high level name of dimension reduction #' @param dim_red_name specific name of dimension reduction to use @@ -82,19 +83,19 @@ write_giotto_viewer_numeric_annotation <- function( #' @param output_directory directory where to save the files #' @returns write a .txt and .annot file for the selection annotation #' @keywords internal -write_giotto_viewer_dim_reduction <- function( - dim_reduction_cell, - dim_red = NULL, - dim_red_name = NULL, - dim_red_rounding = NULL, - dim_red_rescale = c(-20, 20), - output_directory = getwd()) { +write_giotto_viewer_dim_reduction <- function(dim_reduction_cell, + dim_red = NULL, + dim_red_name = NULL, + dim_red_rounding = NULL, + dim_red_rescale = c(-20, 20), + output_directory = getwd()) { dim_red_coord <- dim_reduction_cell[[dim_red]][[ dim_red_name ]]$coordinates[, seq_len(2)] if (is.null(dim_red_coord)) { - cat("\n combination of ", dim_red, " and ", dim_red_name, " does not exist \n") + cat("\n combination of ", dim_red, " and ", dim_red_name, + " does not exist \n") } else { # round dimension reduction coordinates if (!is.null(dim_red_rounding) & is.integer(dim_red_rounding)) { @@ -134,32 +135,33 @@ write_giotto_viewer_dim_reduction <- function( #' @param expression_values expression values to use in Viewer #' @param dim_red_rounding numerical indicating how to round the coordinates #' @param dim_red_rescale numericals to rescale the coordinates -#' @param expression_rounding numerical indicating how to round the expression data +#' @param expression_rounding numerical indicating how to round the expression +#' data #' @param overwrite_dir overwrite files in the directory if it already existed #' @param verbose be verbose #' @returns writes the necessary output to use in Giotto Viewer -#' @details Giotto Viewer expects the results from Giotto Analyzer in a specific format, -#' which is provided by this function. To include enrichment results from {\code{\link{createSpatialEnrich}}} -#' include the provided spatial enrichment name (default PAGE or rank) -#' and add the gene signature names (.e.g cell types) to the numeric annotations parameter. +#' @details Giotto Viewer expects the results from Giotto Analyzer in a +#' specific format, which is provided by this function. To include enrichment +#' results from {\code{\link{createSpatialEnrich}}} include the provided +#' spatial enrichment name (default PAGE or rank) and add the gene signature +#' names (.e.g cell types) to the numeric annotations parameter. #' @export -exportGiottoViewer <- function( - gobject, - spat_unit = NULL, - feat_type = NULL, - spat_loc_name = "raw", - output_directory = NULL, - spat_enr_names = NULL, - factor_annotations = NULL, - numeric_annotations = NULL, - dim_reductions, - dim_reduction_names, - expression_values = c("scaled", "normalized", "custom"), - dim_red_rounding = NULL, - dim_red_rescale = c(-20, 20), - expression_rounding = 2, - overwrite_dir = TRUE, - verbose = TRUE) { +exportGiottoViewer <- function(gobject, + spat_unit = NULL, + feat_type = NULL, + spat_loc_name = "raw", + output_directory = NULL, + spat_enr_names = NULL, + factor_annotations = NULL, + numeric_annotations = NULL, + dim_reductions, + dim_reduction_names, + expression_values = c("scaled", "normalized", "custom"), + dim_red_rounding = NULL, + dim_red_rescale = c(-20, 20), + expression_rounding = 2, + overwrite_dir = TRUE, + verbose = TRUE) { ## output directory ## if (file.exists(output_directory)) { if (overwrite_dir == TRUE) { @@ -212,9 +214,10 @@ exportGiottoViewer <- function( # data.table variables sdimx <- sdimy <- NULL - spatial_location <- get_spatial_locations( + spatial_location <- getSpatialLocations( gobject = gobject, - spat_loc_name = spat_loc_name + spat_unit = spat_unit, + name = spat_loc_name ) spatial_location <- spatial_location[, .(sdimx, sdimy)] write.table(spatial_location, @@ -241,16 +244,19 @@ exportGiottoViewer <- function( cell_metadata <- combineMetadata( gobject = gobject, feat_type = feat, + spat_unit = spat_unit, spat_enr_names = spat_enr_names ) # factor annotations # if (!is.null(factor_annotations)) { - found_factor_annotations <- factor_annotations[factor_annotations %in% colnames(cell_metadata)] + found_factor_annotations <- factor_annotations[ + factor_annotations %in% colnames(cell_metadata)] for (sel_annot in found_factor_annotations) { - if (verbose == TRUE) cat("\n write annotation data for: ", sel_annot, "\n") + if (verbose == TRUE) + cat("\n write annotation data for: ", sel_annot, "\n") selected_annotation <- cell_metadata[[sel_annot]] write_giotto_viewer_annotation( @@ -265,7 +271,8 @@ exportGiottoViewer <- function( annot_names <- list() for (sel_annot_id in seq_along(found_factor_annotations)) { sel_annot_name <- found_factor_annotations[sel_annot_id] - annot_inf_name <- paste0(sel_annot_name, "_annot_information.txt") + annot_inf_name <- paste0( + sel_annot_name, "_annot_information.txt") annot_names[[sel_annot_id]] <- sel_annot_name text_file_names[[sel_annot_id]] <- annot_inf_name @@ -287,9 +294,11 @@ exportGiottoViewer <- function( # numeric annotations # if (!is.null(numeric_annotations)) { - found_numeric_annotations <- numeric_annotations[numeric_annotations %in% colnames(cell_metadata)] + found_numeric_annotations <- numeric_annotations[ + numeric_annotations %in% colnames(cell_metadata)] for (sel_annot in found_numeric_annotations) { - if (verbose == TRUE) cat("\n write annotation data for: ", sel_annot, "\n") + if (verbose == TRUE) + cat("\n write annotation data for: ", sel_annot, "\n") selected_annotation <- cell_metadata[[sel_annot]] write_giotto_viewer_numeric_annotation( annotation = selected_annotation, @@ -305,7 +314,8 @@ exportGiottoViewer <- function( annot_names <- list() for (sel_annot_id in seq_along(found_numeric_annotations)) { sel_annot_name <- found_numeric_annotations[sel_annot_id] - annot_inf_name <- paste0(sel_annot_name, "_num_annot_information.txt") + annot_inf_name <- paste0( + sel_annot_name, "_num_annot_information.txt") annot_names[[sel_annot_id]] <- sel_annot_name text_file_names[[sel_annot_id]] <- annot_inf_name @@ -340,7 +350,9 @@ exportGiottoViewer <- function( temp_dim_red <- dim_reductions[i] temp_dim_red_name <- dim_reduction_names[i] - if (verbose == TRUE) cat("write annotation data for: ", temp_dim_red, " for ", temp_dim_red_name, "\n") + if (verbose == TRUE) + cat("write annotation data for: ", temp_dim_red, " for ", + temp_dim_red_name, "\n") write_giotto_viewer_dim_reduction( dim_reduction_cell = dim_reduction_cell, @@ -358,14 +370,17 @@ exportGiottoViewer <- function( ### expression data ### # expression values to be used if (verbose == TRUE) cat("\n write expression values \n") - values <- match.arg(expression_values, unique(c("scaled", "normalized", "custom", expression_values))) + values <- match.arg( + expression_values, + unique(c("scaled", "normalized", "custom", expression_values))) for (feat in feat_type) { - expr_values <- get_expression_values( + expr_values <- getExpression( gobject = gobject, spat_unit = spat_unit, feat_type = feat, - values = values + values = values, + output = "matrix" ) expr_values <- as.matrix(expr_values) diff --git a/R/globals.R b/R/globals.R index 53c08c870..c6d015e16 100644 --- a/R/globals.R +++ b/R/globals.R @@ -1,4 +1,4 @@ -utils::globalVariables(names = c( +globalVariables(names = c( ":=", ".N", ".SD", ".", "cast", "%--%", ".inc", # igraph "python_leiden", "python_louvain", "python_spatial_genes", diff --git a/R/gstop.R b/R/gstop.R index d83ad98d8..be2a805e1 100644 --- a/R/gstop.R +++ b/R/gstop.R @@ -2,15 +2,14 @@ # .n should be increased when called from a nested location if capturing the # original call is desired. # .n should be increased to 2L when within a generic method -.gstop <- function( - ..., - sep = " ", - strWidth = 100, - errWidth = FALSE, - .prefix = " ", - .initial = "", - .n = 1L, - .call = FALSE) { +.gstop <- function(..., + sep = " ", + strWidth = 100, + errWidth = FALSE, + .prefix = " ", + .initial = "", + .n = 1L, + .call = FALSE) { GiottoUtils::gstop( ..., sep = sep, diff --git a/R/image_registration.R b/R/image_registration.R index a98e9a280..0cc485233 100644 --- a/R/image_registration.R +++ b/R/image_registration.R @@ -88,10 +88,9 @@ #' @returns spatlocs #' @keywords internal # Rotation is performed first, followed by XY transform. -.rigid_transform_spatial_locations <- function( - spatlocs, - transform_values, - method) { +.rigid_transform_spatial_locations <- function(spatlocs, + transform_values, + method) { if (method == "fiji") { spatlocsXY <- spatlocs[, c("sdimx", "sdimy")] # These functions must be performed in positive y values @@ -139,13 +138,12 @@ #' @returns list #' @keywords internal # Automatically account for changes in image size due to alignment -.reg_img_minmax_finder <- function( - gobject_list, - image_unreg = NULL, - largeImage_unreg = NULL, # TODO Currently unused - scale_factor, - transform_values, - method) { +.reg_img_minmax_finder <- function(gobject_list, + image_unreg = NULL, + largeImage_unreg = NULL, # TODO Currently unused + scale_factor, + transform_values, + method) { # Find image spatial info from original image if possible # Check to make sure that image_unreg finds an existing image in each # gobject to be registered @@ -163,7 +161,7 @@ img_type = img_type )))) { giottoImage_list <- lapply( - X = gobject_list, FUN = get_giottoImage, name = image_unreg, + X = gobject_list, FUN = getGiottoImage, name = image_unreg, image_type = img_type ) image_corners <- lapply(giottoImage_list, .get_img_corners) @@ -268,15 +266,15 @@ #' @param spat_unit spatial unit #' @param method Method used to align gobjects. Current options are either #' using FIJI register_virtual_stack_slices output or rvision -#' @param image_unreg Gobject image slot to use. Defaults to 'image' (optional) +#' @param image_unreg Gobject image slot to use. Defaults to "image" (optional) #' @param image_reg_name Arbitrary image slot name for registered images to -#' occupy. Defaults to replacement of 'image' slot (optional) +#' occupy. Defaults to replacement of "image" slot (optional) #' @param image_list RVISION - under construction #' @param save_dir RVISION - under construction #' @param spatloc_unreg Unregistered spatial locations to align. Defaults to #' 'raw' slot (optional) #' @param spatloc_reg_name Arbitrary name for registered spatial locations. -#' Defaults to replacement of 'raw' slot (optional) +#' Defaults to replacement of "raw" slot (optional) #' @param fiji_xml_files Filepaths to FIJI registration XML outputs #' @param fiji_registered_images Registered images output by FIJI #' register_virtual_stack_slices @@ -287,22 +285,21 @@ #' @returns List of registered giotto objects where the registered images and #' spatial locations #' @export -registerGiottoObjectList <- function( - gobject_list, - spat_unit = NULL, - method = c("fiji", "rvision"), - image_unreg = "image", - image_reg_name = "image", - image_list = NULL, # Rvision - save_dir = NULL, # Rvision - spatloc_unreg = "raw", - spatloc_reg_name = "raw", - fiji_xml_files, - fiji_registered_images, - scale_factor = NULL, - allow_rvision_autoscale = TRUE, # Rvision - # auto_comp_reg_border = TRUE, - verbose = TRUE) { +registerGiottoObjectList <- function(gobject_list, + spat_unit = NULL, + method = c("fiji", "rvision"), + image_unreg = "image", + image_reg_name = "image", + image_list = NULL, # Rvision + save_dir = NULL, # Rvision + spatloc_unreg = "raw", + spatloc_reg_name = "raw", + fiji_xml_files, + fiji_registered_images, + scale_factor = NULL, + allow_rvision_autoscale = TRUE, # Rvision + # auto_comp_reg_border = TRUE, + verbose = TRUE) { method <- match.arg(method, choices = c("fiji", "rvision")) if (method == "fiji") { @@ -343,16 +340,16 @@ registerGiottoObjectList <- function( #' @param gobject_list list of gobjects to register #' @param spat_unit spatial unit #' @param image_unreg name of original unregistered images. Defaults to -#' 'image' (optional) +#' "image" (optional) #' @param image_reg_name arbitrary name for registered images to occupy. -#' Defaults to replacement of 'image' (optional) +#' Defaults to replacement of "image" (optional) #' @param image_replace_name arbitrary name for any images replaced due to #' image_reg_name argument (optional) #' @param registered_images registered images output by FIJI #' register_virtual_stack_slices -#' @param spatloc_unreg spatial locations to use. Defaults to 'raw' (optional) +#' @param spatloc_unreg spatial locations to use. Defaults to "raw" (optional) #' @param spatloc_reg_name name for registered spatial locations. Defaults to -#' replacement of 'raw' (optional) +#' replacement of "raw" (optional) #' @param spatloc_replace_name arbitrary name for any spatial locations #' replaced due to spatloc_reg_name argument (optional) #' @param xml_files atomic vector of filepaths to xml outputs from FIJI @@ -363,19 +360,18 @@ registerGiottoObjectList <- function( #' @returns list of registered giotto objects where the registered images and #' spatial locations #' @export -registerGiottoObjectListFiji <- function( - gobject_list, - spat_unit = NULL, - image_unreg = "image", - image_reg_name = "image", - image_replace_name = "unregistered", - registered_images = NULL, - spatloc_unreg = "raw", - spatloc_reg_name = "raw", - spatloc_replace_name = "unregistered", - xml_files, - scale_factor = NULL, - verbose = TRUE) { +registerGiottoObjectListFiji <- function(gobject_list, + spat_unit = NULL, + image_unreg = "image", + image_reg_name = "image", + image_replace_name = "unregistered", + registered_images = NULL, + spatloc_unreg = "raw", + spatloc_reg_name = "raw", + spatloc_replace_name = "unregistered", + xml_files, + scale_factor = NULL, + verbose = TRUE) { # set spat_unit based on first gobject spat_unit <- set_default_spat_unit( gobject = gobject_list[[1]], @@ -431,10 +427,10 @@ registerGiottoObjectListFiji <- function( spatloc_list <- list() for (gobj_i in seq_along(gobject_list)) { gobj <- gobject_list[[gobj_i]] - spatloc <- get_spatial_locations( + spatloc <- getSpatialLocations( gobject = gobj, spat_unit = spat_unit, - spat_loc_name = spatloc_unreg + name = spatloc_unreg ) #------ Put all spatial location data together spatloc_list[[gobj_i]] <- spatloc @@ -526,24 +522,24 @@ registerGiottoObjectListFiji <- function( # Rename original spatial locations to 'unregistered' if conflicting # with output if (spatloc_unreg == spatloc_reg_name) { - gobj <- set_spatial_locations( + gobj <- setSpatialLocations( gobject = gobj, spat_unit = spat_unit, - spat_loc_name = spatloc_replace_name, - spatlocs = get_spatial_locations( + name = spatloc_replace_name, + spatlocs = getSpatialLocations( gobject = gobj, spat_unit = spat_unit, - spat_loc_name = spatloc_unreg + name = spatloc_unreg ) ) } # Assign registered spatial locations from spatloc_list to gobject_list - gobj <- set_spatial_locations( + gobj <- setSpatialLocations( gobject = gobj, spat_unit = spat_unit, - spat_loc_name = spatloc_reg_name, + name = spatloc_reg_name, spatlocs = spatloc_list[[gobj_i]] ) @@ -619,17 +615,16 @@ registerGiottoObjectListFiji <- function( #' spatial locations #' @export # Register giotto objects when given raw images and spatial locations -registerGiottoObjectListRvision <- function( - gobject_list = gobject_list, - image_list = NULL, - save_dir = NULL, - spatloc_unreg = NULL, - spatloc_reg_name = "raw", - verbose = TRUE) { # Not used +registerGiottoObjectListRvision <- function(gobject_list = gobject_list, + image_list = NULL, + save_dir = NULL, + spatloc_unreg = NULL, + spatloc_reg_name = "raw", + verbose = TRUE) { # Not used package_check( pkg_name = "Rvision", - repository = c("github"), + repository = "github", github_repo = "swarm-lab/Rvision" ) @@ -637,9 +632,9 @@ registerGiottoObjectListRvision <- function( spatloc_list <- list() for (gobj_i in seq_along(gobject_list)) { gobj <- gobject_list[[gobj_i]] - spatloc <- get_spatial_locations( + spatloc <- getSpatialLocations( gobject = gobj, - spat_loc_name = spatloc_unreg, + name = spatloc_unreg, output = "spatLocsObj", copy_obj = TRUE ) @@ -710,10 +705,10 @@ registerGiottoObjectListRvision <- function( ## 4. Compute transformations # Choose reference image - refImage <- unreg_images[[base::floor(length(unreg_images) / 2)]] + refImage <- unreg_images[[floor(length(unreg_images) / 2)]] # Compute ECC transforms - transfs <- base::vector(mode = "list", length = length(unreg_images)) + transfs <- vector(mode = "list", length = length(unreg_images)) for (i in seq_along(unreg_images)) { transfs[[i]] <- Rvision::findTransformECC( refImage, unreg_images[[i]], @@ -744,22 +739,22 @@ registerGiottoObjectListRvision <- function( gobj <- gobject_list[[gobj_i]] # Rename original spatial locations to 'unregistered' - unreg_locs <- get_spatial_locations(gobj, - spat_loc_name = spatloc_unreg, + unreg_locs <- getSpatialLocations(gobj, + name = spatloc_unreg, copy_obj = FALSE, output = "spatLocsObj" ) ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### - gobj <- set_spatial_locations(gobj, + gobj <- setSpatialLocations(gobj, spatlocs = unreg_locs, - spat_loc_name = "unregistered" + name = "unregistered" ) # Assign registered spatial locations from spatloc_list to gobject_list - gobj <- set_spatial_locations(gobj, + gobj <- setSpatialLocations(gobj, spatlocs = spatloc_list[[gobj_i]], - spat_loc_name = spatloc_reg_name + name = spatloc_reg_name ) ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### @@ -885,36 +880,35 @@ fiji <- function(fijiPath = NULL) { #' jimpipeline by jefferislab #' #' @export -registerImagesFIJI <- function( - source_img_dir, - output_img_dir, - transforms_save_dir, - ref_img_name, - # Scale Invariant Interest Point Detector Options - init_gauss_blur = 1.6, - steps_per_scale_octave = 3, - min_img_size = 64, - max_img_size = 1024, - # Feature Descriptor Options - feat_desc_size = 8, - feat_desc_orient_bins = 8, - closest_next_closest_ratio = 0.92, - # Geometric Consensus Filter Options - max_align_err = 25, - inlier_ratio = 0.05, - # FIJI Options - headless = FALSE, - batch = TRUE, - MinMem = MaxMem, - MaxMem = 2500, - IncrementalGC = TRUE, - Threads = NULL, - fijiArgs = NULL, - javaArgs = NULL, - ijArgs = NULL, - jython = FALSE, - fijiPath = fiji(), - DryRun = FALSE) { +registerImagesFIJI <- function(source_img_dir, + output_img_dir, + transforms_save_dir, + ref_img_name, + # Scale Invariant Interest Point Detector Options + init_gauss_blur = 1.6, + steps_per_scale_octave = 3, + min_img_size = 64, + max_img_size = 1024, + # Feature Descriptor Options + feat_desc_size = 8, + feat_desc_orient_bins = 8, + closest_next_closest_ratio = 0.92, + # Geometric Consensus Filter Options + max_align_err = 25, + inlier_ratio = 0.05, + # FIJI Options + headless = FALSE, + batch = TRUE, + MinMem = MaxMem, + MaxMem = 2500, + IncrementalGC = TRUE, + Threads = NULL, + fijiArgs = NULL, + javaArgs = NULL, + ijArgs = NULL, + jython = FALSE, + fijiPath = fiji(), + DryRun = FALSE) { # Check if output directory exists. If not, create the directory if (!file.exists(output_img_dir)) { dir.create(output_img_dir) @@ -1023,8 +1017,12 @@ registerImagesFIJI <- function( #' @title title Record landmarks by interactive selection #' @name interactiveLandmarkSelection #' @description Record landmarks by interactive selection -#' @param source_image the image to be plotted on the left, and landmarks will output in the first of the list. Input can be a ggplot object, a GiottoImage, or a character represent a path to a image -#' @param target_image the image to be plotted on the right, and landmarks will output in the second of the list. Input can be a ggplot object, a GiottoImage, or a character represent a path to a image +#' @param source_image the image to be plotted on the left, and landmarks will +#' output in the first of the list. Input can be a ggplot object, +#' a GiottoImage, or a character represent a path to a image +#' @param target_image the image to be plotted on the right, and landmarks will +#' output in the second of the list. Input can be a ggplot object, a +#' GiottoImage, or a character represent a path to a image #' #' @returns a list of landmarks #' @@ -1033,26 +1031,24 @@ interactiveLandmarkSelection <- function(source, target) { GiottoUtils::package_check("shiny") GiottoUtils::package_check("ggplot2") GiottoUtils::package_check("miniUI") - - .create_image_to_plot <- function(x){ - if (inherits(x, "gg")){ + + .create_image_to_plot <- function(x) { + if (inherits(x, "gg")) { return(x) - } - else if (is.character(x)){ - gimg = Giotto::createGiottoLargeImage(x) + } else if (is.character(x)) { + gimg <- Giotto::createGiottoLargeImage(x) gg <- ggplot2::ggplot() - gg_raster = GiottoVisuals::gg_annotation_raster(gg,gimg) + gg_raster <- GiottoVisuals::gg_annotation_raster(gg, gimg) return(gg_raster) - } - else{ + } else { gg <- ggplot2::ggplot() - gg_raster = GiottoVisuals::gg_annotation_raster(gg,x) + gg_raster <- GiottoVisuals::gg_annotation_raster(gg, x) return(gg_raster) } } source_image <- .create_image_to_plot(source) target_image <- .create_image_to_plot(target) - + # Function to extract the range of x and y values from a ggplot object .extract_plot_ranges <- function(plot) { data <- ggplot2::ggplot_build(plot)$data[[1]] @@ -1060,26 +1056,46 @@ interactiveLandmarkSelection <- function(source, target) { y_range <- range(data$y, na.rm = TRUE) list(x_range = x_range, y_range = y_range) } - + # Extract ranges for the input plots source_ranges <- .extract_plot_ranges(source_image) target_ranges <- .extract_plot_ranges(target_image) - + ui <- miniUI::miniPage( miniUI::gadgetTitleBar("Select Extents and Points"), miniUI::miniContentPanel( shiny::fluidRow( - shiny::column(6, shiny::plotOutput("plot1", click = "plot1_click")), - shiny::column(6, shiny::plotOutput("plot2", click = "plot2_click")) + shiny::column( + 6, shiny::plotOutput("plot1", click = "plot1_click")), + shiny::column( + 6, shiny::plotOutput("plot2", click = "plot2_click")) ), shiny::fluidRow( - shiny::column(6, - shiny::sliderInput("xrange1", "X Range for Plot 1", min = source_ranges$x_range[1], max = source_ranges$x_range[2], value = source_ranges$x_range), - shiny::sliderInput("yrange1", "Y Range for Plot 1", min = source_ranges$y_range[1], max = source_ranges$y_range[2], value = source_ranges$y_range) + shiny::column( + 6, + shiny::sliderInput( + "xrange1", "X Range for Plot 1", + min = source_ranges$x_range[1], + max = source_ranges$x_range[2], + value = source_ranges$x_range), + shiny::sliderInput( + "yrange1", "Y Range for Plot 1", + min = source_ranges$y_range[1], + max = source_ranges$y_range[2], + value = source_ranges$y_range) ), - shiny::column(6, - shiny::sliderInput("xrange2", "X Range for Plot 2", min = target_ranges$x_range[1], max = target_ranges$x_range[2], value = target_ranges$x_range), - shiny::sliderInput("yrange2", "Y Range for Plot 2", min = target_ranges$y_range[1], max = target_ranges$y_range[2], value = target_ranges$y_range) + shiny::column( + 6, + shiny::sliderInput( + "xrange2", "X Range for Plot 2", + min = target_ranges$x_range[1], + max = target_ranges$x_range[2], + value = target_ranges$x_range), + shiny::sliderInput( + "yrange2", "Y Range for Plot 2", + min = target_ranges$y_range[1], + max = target_ranges$y_range[2], + value = target_ranges$y_range) ) ), shiny::fluidRow( @@ -1087,68 +1103,82 @@ interactiveLandmarkSelection <- function(source, target) { shiny::column(6, shiny::verbatimTextOutput("click_info2")) ), shiny::fluidRow( - shiny::column(6, shiny::actionButton("undo1", "Undo Click on Source Image")), - shiny::column(6, shiny::actionButton("undo2", "Undo Click on Target Image")) + shiny::column(6, shiny::actionButton( + "undo1", "Undo Click on Source Image")), + shiny::column(6, shiny::actionButton( + "undo2", "Undo Click on Target Image")) ) ) ) - + server <- function(input, output, session) { - click_history1 <- shiny::reactiveVal(data.frame(x = numeric(), y = numeric())) - click_history2 <- shiny::reactiveVal(data.frame(x = numeric(), y = numeric())) - + click_history1 <- shiny::reactiveVal(data.frame(x = numeric(), + y = numeric())) + click_history2 <- shiny::reactiveVal(data.frame(x = numeric(), + y = numeric())) + output$plot1 <- shiny::renderPlot({ source_image + - ggplot2::coord_cartesian(xlim = input$xrange1, ylim = input$yrange1) + - ggplot2::geom_point(data = click_history1(), ggplot2::aes(x = x, y = y), color = "red", size = 4.5) + ggplot2::coord_cartesian( + xlim = input$xrange1, ylim = input$yrange1) + + ggplot2::geom_point( + data = click_history1(), ggplot2::aes(x = x, y = y), + color = "red", size = 4.5) }) - + output$plot2 <- shiny::renderPlot({ target_image + - ggplot2::coord_cartesian(xlim = input$xrange2, ylim = input$yrange2) + - ggplot2::geom_point(data = click_history2(), ggplot2::aes(x = x, y = y), color = "blue",size = 4.5) + ggplot2::coord_cartesian( + xlim = input$xrange2, ylim = input$yrange2) + + ggplot2::geom_point( + data = click_history2(), ggplot2::aes(x = x, y = y), + color = "blue", size = 4.5) }) - + shiny::observeEvent(input$plot1_click, { click <- input$plot1_click - new_coords <- rbind(click_history1(), data.frame(x = click$x, y = click$y)) + new_coords <- rbind( + click_history1(), data.frame(x = click$x, y = click$y)) click_history1(new_coords) }) - + shiny::observeEvent(input$plot2_click, { click <- input$plot2_click - new_coords <- rbind(click_history2(), data.frame(x = click$x, y = click$y)) + new_coords <- rbind( + click_history2(), data.frame(x = click$x, y = click$y)) click_history2(new_coords) }) - + shiny::observeEvent(input$undo1, { if (nrow(click_history1()) > 0) { - new_coords <- click_history1()[-nrow(click_history1()), , drop = FALSE] + new_coords <- click_history1()[ + -nrow(click_history1()), , drop = FALSE] click_history1(new_coords) } }) - + shiny::observeEvent(input$undo2, { if (nrow(click_history2()) > 0) { - new_coords <- click_history2()[-nrow(click_history2()), , drop = FALSE] + new_coords <- click_history2()[ + -nrow(click_history2()), , drop = FALSE] click_history2(new_coords) } }) - + output$click_info1 <- shiny::renderPrint({ click_history1() }) - + output$click_info2 <- shiny::renderPrint({ click_history2() }) - + shiny::observeEvent(input$done, { - returnValue <- list(click_history1(),click_history2()) + returnValue <- list(click_history1(), click_history2()) shiny::stopApp(returnValue) }) } - + shiny::runGadget(ui, server) } @@ -1159,24 +1189,29 @@ interactiveLandmarkSelection <- function(source, target) { #' @title Calculate a affine transformation matrix from two set of landmarks #' @name calculateAffineMatrixFromLandmarks -#' @description calculate a affine transformation matrix from two set of landmarks -#' @param source_df source landmarks, two columns, first column represent x coordinate and second column represent y coordinate. -#' @param target_df target landmarks, two columns, first column represent x coordinate and second column represent y coordinate. +#' @description calculate a affine transformation matrix from two set of +#' landmarks +#' @param source_df source landmarks, two columns, first column represent +#' x coordinate and second column represent y coordinate. +#' @param target_df target landmarks, two columns, first column represent +#' x coordinate and second column represent y coordinate. #' #' @returns a 3 by 3 matrix with the third row close to (0,0,1) #' #' @export -calculateAffineMatrixFromLandmarks <- function(source_df,target_df){ - source_landmarks_matrix = as.matrix(source_df) - source_landmarks_matrix = cbind(source_landmarks_matrix,rep(1,nrow(source_landmarks_matrix))) +calculateAffineMatrixFromLandmarks <- function(source_df, target_df) { + source_landmarks_matrix <- as.matrix(source_df) + source_landmarks_matrix <- cbind( + source_landmarks_matrix, rep(1, nrow(source_landmarks_matrix))) ## Create landmark matrix for the target image target_landmarks_matrix <- as.matrix(target_df) - target_landmarks_matrix = cbind(target_landmarks_matrix,rep(1,nrow(target_landmarks_matrix))) + target_landmarks_matrix <- cbind( + target_landmarks_matrix, rep(1, nrow(target_landmarks_matrix))) ## Compute the affine matrix - source_dp = t(source_landmarks_matrix) %*% source_landmarks_matrix - source_target_dp = t(source_landmarks_matrix) %*% target_landmarks_matrix + source_dp <- t(source_landmarks_matrix) %*% source_landmarks_matrix + source_target_dp <- t(source_landmarks_matrix) %*% target_landmarks_matrix source_dp_inv <- solve(source_dp) - Affine_matrix = t(source_dp_inv %*% source_target_dp) + Affine_matrix <- t(source_dp_inv %*% source_target_dp) return(Affine_matrix) } @@ -1184,32 +1219,33 @@ calculateAffineMatrixFromLandmarks <- function(source_df,target_df){ #' @name .sift_detect -#' @title Run SIFT feature detector and descriptor extractor +#' @title Run SIFT feature detector and descriptor extractor #' @description -#' Perform feature detector and descriptor extractor on a matrix object or preprocessed image object -#' @param x input matrix or preprocessed image to extract feature and descriptor from +#' Perform feature detector and descriptor extractor on a matrix object or +#' preprocessed image object +#' @param x input matrix or preprocessed image to extract feature and +#' descriptor from #' @param ... additional params to pass to `skimage.feature.SIFT()` #' @returns list of keypoints and descriptors -#' +#' .sift_detect <- function(x, ..., pkg_ptr) { - if (missing(pkg_ptr)) { GiottoUtils::package_check("skimage", repository = "pip:scikit-image") SKI <- reticulate::import("skimage", convert = TRUE, delay_load = TRUE) } else { SKI <- pkg_ptr } - + # sift object SIFT <- SKI$feature$SIFT() - + SIFT$detect_and_extract(x) - + out <- list( keypoints = SIFT$keypoints, descriptors = SIFT$descriptors ) - + return(out) } @@ -1230,28 +1266,25 @@ calculateAffineMatrixFromLandmarks <- function(source_df,target_df){ #' Computer Vision, 2004. #' @param ... additional params to pass to `skimage.feature.match_descriptors()` #' @returns list -#' -.match_descriptor <- function( - descriptor_list, - target_idx = 1L, - cross_check = TRUE, - max_ratio = 0.8, - ..., - pkg_ptr -) { - +#' +.match_descriptor <- function(descriptor_list, + target_idx = 1L, + cross_check = TRUE, + max_ratio = 0.8, + ..., + pkg_ptr) { checkmate::assert_list(descriptor_list, min.len = 2L) target_idx <- as.integer(target_idx) - + if (missing(pkg_ptr)) { package_check("skimage", repository = "pip:scikit-image") SKI <- reticulate::import("skimage", convert = TRUE, delay_load = TRUE) } else { SKI <- pkg_ptr } - + target <- descriptor_list[[target_idx]] - + out <- lapply( seq_along(descriptor_list), function(moving_idx) { @@ -1263,9 +1296,9 @@ calculateAffineMatrixFromLandmarks <- function(source_df,target_df){ )) # directly return all as matches } - + moving <- descriptor_list[[moving_idx]] - + m <- .match_descriptor_single( x = target, y = moving, @@ -1275,25 +1308,24 @@ calculateAffineMatrixFromLandmarks <- function(source_df,target_df){ m + 1 # since it is 0 indexed } ) - + return(out) } # wrapper for sklearn-image match_descriptors # returns a 2 col matrix of x to y index matches -.match_descriptor_single <- function(x, y,max_ratio, ..., pkg_ptr) { - +.match_descriptor_single <- function(x, y, max_ratio, ..., pkg_ptr) { checkmate::assert_class(x, "matrix") checkmate::assert_class(y, "matrix") - + if (missing(pkg_ptr)) { GiottoUtils::package_check("skimage", repository = "pip:scikit-image") SKI <- reticulate::import("skimage", convert = TRUE, delay_load = TRUE) } else { SKI <- pkg_ptr } - + match_descriptors <- SKI$feature$match_descriptors m <- match_descriptors( descriptors1 = x, @@ -1301,36 +1333,42 @@ calculateAffineMatrixFromLandmarks <- function(source_df,target_df){ max_ratio = max_ratio, ... # max_ratio of 0.6 - 0.8 recommended for sift, cross_check = TRUE ) - + return(m) } #' @name preprocessImageToMatrix -#' @title Preprocess from image directory to the required matrix format for Image registration pipeline built on scikit-image +#' @title Preprocess from image directory to the required matrix format for +#' Image registration pipeline built on scikit-image #' @description -#' Preprocess a image path to the required matrix format for Image registration pipeline built on scikit-image +#' Preprocess a image path to the required matrix format for Image +#' registration pipeline built on scikit-image #' @param x input file path, required -#' @param invert whether or not to invert intensity to make calculation of descriptors more accurate, default FALSE -#' @param equalize_histogram whether or not to calculate equalized histogram of the image,default TRUE +#' @param invert whether or not to invert intensity to make calculation of +#' descriptors more accurate, default FALSE +#' @param equalize_histogram whether or not to calculate equalized histogram of +#' the image,default TRUE #' @param flip_vertical whether or not to flip vertical, default FALSE #' @param flip_horizontal whether or not to flip horizontal, default FALSE -#' @param rotate_90 whether or not to rotates the image 90 degrees counter-clockwise, default FALSE -#' @param use_single_channel If input is a multichannel image, whether or not to extract single channel, default FALSE -#' @param single_channel_number Channel number in the multichannel image, required if use_single_channel = TRUE +#' @param rotate_90 whether or not to rotates the image 90 degrees +#' counter-clockwise, default FALSE +#' @param use_single_channel If input is a multichannel image, whether or not +#' to extract single channel, default FALSE +#' @param single_channel_number Channel number in the multichannel image, +#' required if use_single_channel = TRUE #' @returns a matrix array to input to .sift_detect -#' +#' #' @export preprocessImageToMatrix <- function(x, - invert = F, - equalize_histogram = T, - flip_vertical = F, - flip_horizontal = F, - rotate_90 = F, - use_single_channel = F, - single_channel_number = NULL, - pkg_ptr) { - + invert = FALSE, + equalize_histogram = TRUE, + flip_vertical = FALSE, + flip_horizontal = FALSE, + rotate_90 = FALSE, + use_single_channel = FALSE, + single_channel_number = NULL, + pkg_ptr) { if (missing(pkg_ptr)) { GiottoUtils::package_check("skimage", repository = "pip:scikit-image") SKI <- reticulate::import("skimage", convert = TRUE, delay_load = TRUE) @@ -1339,32 +1377,35 @@ preprocessImageToMatrix <- function(x, } GiottoUtils::package_check("numpy", repository = "pip:scikit-image") np <- reticulate::import("numpy", convert = TRUE, delay_load = TRUE) - - image = SKI$io$imread(x) - - if (length(dim(image)) >2 & use_single_channel == FALSE){ - image = SKI$color$rgb2gray(image) - } - if (use_single_channel == TRUE) { - if (is.null(single_channel_number)) {stop("Set use single channel == TRUE, please provide a channel number to continue")} - image <- image[,,single_channel_number] + + image <- SKI$io$imread(x) + + if (length(dim(image)) > 2 & use_single_channel == FALSE) { + image <- SKI$color$rgb2gray(image) + } + if (use_single_channel == TRUE) { + if (is.null(single_channel_number)) { + stop("Set use single channel == TRUE, please provide a channel + number to continue") + } + image <- image[, , single_channel_number] } - - - if (flip_vertical == T){ - image = np$flipud(image) + + + if (flip_vertical == TRUE) { + image <- np$flipud(image) } - if (flip_horizontal == T){ - image = np$fliplr(image) + if (flip_horizontal == TRUE) { + image <- np$fliplr(image) } - if (rotate_90 == T){ - image = np$rot90(image) + if (rotate_90 == TRUE) { + image <- np$rot90(image) } - if (invert == T){ - image = SKI$util$invert(image) + if (invert == TRUE) { + image <- SKI$util$invert(image) } - if (equalize_histogram == T){ - image = SKI$exposure$equalize_hist(image) + if (equalize_histogram == TRUE) { + image <- SKI$exposure$equalize_hist(image) } return(image) } @@ -1376,34 +1417,38 @@ preprocessImageToMatrix <- function(x, #' Estimate affine transformation from matched descriptor #' @param keypoints1 keypoints extracted from source image via .sift_detect #' @param keypoints1 keypoints extracted from target image via .sift_detect -#' @param match a 2 col matrix of x to y index matched descriptors via .match_descriptor_single +#' @param match a 2 col matrix of x to y index matched descriptors via +#' .match_descriptor_single #' @returns a list of model and inliners .estimate_transform_from_matched_descriptor <- function(keypoints1, - keypoints2, - match, - estimate_fun, - ..., - pkg_ptr){ + keypoints2, + match, + estimate_fun, + ..., + pkg_ptr) { if (missing(pkg_ptr)) { GiottoUtils::package_check("skimage", repository = "pip:scikit-image") SKI <- reticulate::import("skimage", convert = TRUE, delay_load = TRUE) } else { SKI <- pkg_ptr } - + # Extract matched keypoints src_pts <- keypoints1[match[, 1] + 1, , drop = FALSE] dst_pts <- keypoints2[match[, 2] + 1, , drop = FALSE] - - estimate_fun <- match.arg(estimate_fun, unique(c('euclidean', 'similarity', 'affine', 'piecewise-affine', 'projective', 'polynomial', estimate_fun))) - - # Estimate homography matrix + + estimate_fun <- match.arg( + estimate_fun, + unique(c("euclidean", "similarity", "affine", "piecewise-affine", + "projective", "polynomial", estimate_fun))) + + # Estimate homography matrix ransac_result <- SKI$transform$estimate_transform( ttype = estimate_fun, src = src_pts, dst = dst_pts, ) - + return(ransac_result) } @@ -1414,26 +1459,27 @@ preprocessImageToMatrix <- function(x, #' Warp transformed images from estimated transformation #' @param x source image from .sift_preprocess #' @param y target image from .sift_preprocess -#' @param model estimated transformation object from .estimate_transform_from_matched_descriptor +#' @param model estimated transformation object from +#' .estimate_transform_from_matched_descriptor #' @returns None, it will write to a output path .warp_transformed_image <- function(x, - y, - model, - outpath = NULL, - pkg_ptr){ + y, + model, + outpath = NULL, + pkg_ptr) { if (missing(pkg_ptr)) { GiottoUtils::package_check("skimage", repository = "pip:scikit-image") SKI <- reticulate::import("skimage", convert = TRUE, delay_load = TRUE) } else { SKI <- pkg_ptr } - + # Ensure the source image array is writable by making a copy x_copy <- reticulate::r_to_py(x)$copy() - + # Warp the source image to align with the destination image warped_image <- SKI$transform$warp(x_copy, model, output_shape = dim(y)) - SKI$io$imsave(outpath,warped_image) + SKI$io$imsave(outpath, warped_image) } @@ -1441,37 +1487,42 @@ preprocessImageToMatrix <- function(x, #' @name .plot_matched_descriptors #' @title plot matched descriptors #' @description -#' A wrapper function for the plot_matches for the SIFT feature extractor and descriptor pipeline +#' A wrapper function for the plot_matches for the SIFT feature extractor and +#' descriptor pipeline #' @param x source image from .sift_preprocess #' @param y target image from .sift_preprocess #' @param keypoints1 keypoints extracted from source image via .sift_detect #' @param keypoints1 keypoints extracted from target image via .sift_detect -#' @param match a 2 col matrix of x to y index matched descriptors via .match_descriptor_single +#' @param match a 2 col matrix of x to y index matched descriptors via +#' .match_descriptor_single #' @returns None -.plot_matched_descriptors <- function(x, y, keypoints1, keypoints2, match, pkg_ptr){ +.plot_matched_descriptors <- function( + x, y, keypoints1, keypoints2, match, pkg_ptr) { if (missing(pkg_ptr)) { GiottoUtils::package_check("skimage", repository = "pip:scikit-image") SKI <- reticulate::import("skimage", convert = TRUE, delay_load = TRUE) } else { SKI <- pkg_ptr } - - matplotlib <-reticulate::import("matplotlib", convert = TRUE, delay_load = TRUE) - np <- reticulate::import("numpy",convert = T, delay_load = T) + + matplotlib <- reticulate::import( + "matplotlib", convert = TRUE, delay_load = TRUE) + np <- reticulate::import("numpy", convert = TRUE, delay_load = TRUE) plt <- matplotlib$pyplot - + match_py <- reticulate::r_to_py(match) - match_py <- np$array(match_py, dtype = np$int32) - + match_py <- np$array(match_py, dtype = np$int32) + # Create a subplot fig_ax <- plt$subplots(nrows = 1L, ncols = 1L, figsize = c(11, 8)) fig <- fig_ax[[1]] ax <- fig_ax[[2]] - + # Plot the matches - SKI$feature$plot_matches(ax, x, y, keypoints1, keypoints2, match_py, only_matches = TRUE) - - ax$axis('off') + SKI$feature$plot_matches( + ax, x, y, keypoints1, keypoints2, match_py, only_matches = TRUE) + + ax$axis("off") plt$show() plt$close() } @@ -1480,51 +1531,63 @@ preprocessImageToMatrix <- function(x, #' @title Estimate Automated ImageRegistration With SIFT #' @name estimateAutomatedImageRegistrationWithSIFT #' @description -#' Automatically estimate a transform with SIFT feature detection, descriptor match and returns a transformation object to use -#' @param x required. Source matrix input, could be generated from preprocessImageToMatrix -#' @param y required. Source matrix input, could be generated from preprocessImageToMatrix +#' Automatically estimate a transform with SIFT feature detection, descriptor +#' match and returns a transformation object to use +#' @param x required. Source matrix input, could be generated from +#' preprocessImageToMatrix +#' @param y required. Source matrix input, could be generated from +#' preprocessImageToMatrix #' @param max_ratio max_ratio parameter for matching descriptors, default 0.6 -#' @param save_warp default NULL, if not NULL, please provide an output image path to save the warpped image. -#' @param estimate_fun default Affine. The transformation model to use estimation -#' @param plot_match whether or not to plot the matching descriptors.Default False +#' @param save_warp default NULL, if not NULL, please provide an output image +#' path to save the warpped image. +#' @param estimate_fun default Affine. The transformation model to use +#' estimation +#' @param plot_match whether or not to plot the matching descriptors. +#' Default False #' @returns a list of the estimated transformation object -#' example estimation <- estimateAutomatedImageRegistrationWithSIFT(x = image_mtx1,y = image_mtx2) +#' @examples +#' estimation <- estimateAutomatedImageRegistrationWithSIFT( +#' x = image_mtx1, y = image_mtx2) #' @export estimateAutomatedImageRegistrationWithSIFT <- function(x, - y, - plot_match = F, - max_ratio = 0.6, - estimate_fun = 'affine', - save_warp = NULL, - verbose = T){ - - GiottoUtils::vmsg(.v = verbose, .is_debug = T,'Detecting features via SIFT... ') + y, + plot_match = FALSE, + max_ratio = 0.6, + estimate_fun = "affine", + save_warp = NULL, + verbose = TRUE) { + GiottoUtils::vmsg(.v = verbose, .is_debug = TRUE, + "Detecting features via SIFT... ") x_sift <- .sift_detect(x) y_sift <- .sift_detect(y) - - GiottoUtils::vmsg(.v = verbose, .is_debug = T,'Matching Descriptors via SIFT... ') - matched <- .match_descriptor_single(x_sift$descriptor, y_sift$descriptor,max_ratio = max_ratio) - - if (plot_match == TRUE){ - .plot_matched_descriptors(x, y, x_sift$keypoints, y_sift$keypoints, matched) - } - - - GiottoUtils::vmsg(.v = verbose, .is_debug = T,'Estimating transformation matrix from matched descriptor... ') - estimation <- .estimate_transform_from_matched_descriptor(x_sift$keypoints, - y_sift$keypoints, - matched, - estimate_fun = estimate_fun) - - if (!is.null(save_warp)){ - .warp_transformed_image(x = x, - y = y, - model = estimation$inverse, outpath = save_warp) + + GiottoUtils::vmsg(.v = verbose, .is_debug = TRUE, + "Matching Descriptors via SIFT... ") + matched <- .match_descriptor_single( + x_sift$descriptor, y_sift$descriptor, max_ratio = max_ratio) + + if (plot_match == TRUE) { + .plot_matched_descriptors( + x, y, x_sift$keypoints, y_sift$keypoints, matched) } - - return(estimation) -} + GiottoUtils::vmsg( + .v = verbose, .is_debug = TRUE, + "Estimating transformation matrix from matched descriptor... ") + estimation <- .estimate_transform_from_matched_descriptor(x_sift$keypoints, + y_sift$keypoints, + matched, + estimate_fun = estimate_fun + ) + if (!is.null(save_warp)) { + .warp_transformed_image( + x = x, + y = y, + model = estimation$inverse, outpath = save_warp + ) + } + return(estimation) +} diff --git a/R/interactivity.R b/R/interactivity.R index 16e61af35..b73cc1878 100644 --- a/R/interactivity.R +++ b/R/interactivity.R @@ -11,11 +11,10 @@ #' @returns A `data.table` containing x,y coordinates from the plotted polygons. #' #' @export -plotInteractivePolygons <- function( - x, - width = "auto", - height = "auto", - ...) { +plotInteractivePolygons <- function(x, + width = "auto", + height = "auto", + ...) { package_check(pkg_name = "miniUI", repository = "CRAN") package_check(pkg_name = "shiny", repository = "CRAN") @@ -96,7 +95,7 @@ plotInteractivePolygons <- function( geom_polygon( data = clicklist(), aes(x, y, color = name), - alpha = 0, + alpha = 0, show.legend = FALSE, ... ) + @@ -179,12 +178,11 @@ plotInteractivePolygons <- function( #' getCellsFromPolygon(g) #' #' @export -getCellsFromPolygon <- function( - gobject, - polygon_name = "selections", - spat_unit = "cell", - spat_loc_name = "raw", - polygons = NULL) { +getCellsFromPolygon <- function(gobject, + polygon_name = "selections", + spat_unit = "cell", + spat_loc_name = "raw", + polygons = NULL) { if (!inherits(gobject, "giotto")) { stop("gobject needs to be a giotto object") } @@ -263,14 +261,13 @@ getCellsFromPolygon <- function( #' g <- addPolygonCells(g) #' pDataDT(g) #' @export -addPolygonCells <- function( - gobject, - polygon_name = "selections", - spat_unit = "cell", - spat_loc_name = "raw", - feat_type = "rna", - polygons = NULL, - na.label = "no_polygon") { +addPolygonCells <- function(gobject, + polygon_name = "selections", + spat_unit = "cell", + spat_loc_name = "raw", + feat_type = "rna", + polygons = NULL, + na.label = "no_polygon") { ## verify gobject if (!inherits(gobject, "giotto")) { stop("gobject needs to be a giotto object") @@ -365,22 +362,21 @@ addPolygonCells <- function( #' #' comparePolygonExpression(g) #' @export -comparePolygonExpression <- function( - gobject, - polygon_name = "selections", - spat_unit = "cell", - feat_type = "rna", - selected_feats = "top_genes", - expression_values = "normalized", - method = "scran", - ...) { +comparePolygonExpression <- function(gobject, + polygon_name = "selections", + spat_unit = "cell", + feat_type = "rna", + selected_feats = "top_genes", + expression_values = "normalized", + method = "scran", + ...) { # verify gobject if (!inherits(gobject, "giotto")) { stop("gobject needs to be a giotto object") } # get expression - my_expression <- get_expression_values(gobject, + my_expression <- getExpression(gobject, values = expression_values, spat_unit = spat_unit, feat_type = feat_type, @@ -487,13 +483,12 @@ comparePolygonExpression <- function( #' #' compareCellAbundance(g) #' @export -compareCellAbundance <- function( - gobject, - polygon_name = "selections", - spat_unit = "cell", - feat_type = "rna", - cell_type_column = "leiden_clus", - ...) { +compareCellAbundance <- function(gobject, + polygon_name = "selections", + spat_unit = "cell", + feat_type = "rna", + cell_type_column = "leiden_clus", + ...) { # verify gobject if (!inherits(gobject, "giotto")) { stop("gobject needs to be a giotto object") @@ -569,13 +564,12 @@ compareCellAbundance <- function( #' #' plotPolygons(g, x = x) #' @export -plotPolygons <- function( - gobject, - polygon_name = "selections", - x, - spat_unit = "cell", - polygons = NULL, - ...) { +plotPolygons <- function(gobject, + polygon_name = "selections", + x, + spat_unit = "cell", + polygons = NULL, + ...) { ## verify gobject if (!inherits(gobject, "giotto")) { stop("gobject must be a Giotto object") @@ -587,7 +581,7 @@ plotPolygons <- function( if (is.null(x)) stop("A plot object must be provided") ## get polygons spatial info - polygon_spatVector <- get_polygon_info( + polygon_spatVector <- getPolygonInfo( gobject = gobject, polygon_name = polygon_name, return_giottoPolygon = FALSE @@ -644,11 +638,15 @@ plotPolygons <- function( #' @returns data.table with selected cell_IDs, spatial coordinates, and #' cluster_ID. #' @export -plotInteractive3D <- function( - gobject, spat_unit = "cell", feat_type = "rna", - cell_color = "leiden_clus", - cell_color_code = NULL, point_size = 0.5, - width = "100%", height = "400px") { +plotInteractive3D <- function(gobject, spat_unit = "cell", feat_type = "rna", + cell_color = "leiden_clus", + cell_color_code = NULL, point_size = 0.5, + width = "100%", height = "400px") { + package_check( + c("plotly", "miniUI", "shiny"), + repository = c("CRAN:plotly", "CRAN:miniUI", "CRAN:shiny") + ) + # NSE vars sdimx <- sdimy <- sdimz <- cell_ID <- NULL @@ -748,3 +746,145 @@ plotInteractive3D <- function( shiny::runGadget(ui, server) } + +#' Create a local anndata zarr folder +#' +#' @param gobject giotto object +#' @param spat_unit spatial unit (e.g. "cell") +#' @param feat_type feature type (e.g. "rna", "dna", "protein") +#' @param expression expression values to extract (e.g. "raw", "normalized", +#' "scaled") +#' @param output_path path to create and save the anndata zarr folder +#' +#' @return local anndata zarr folder +#' @export +#' +#' @examples +#' # using the mini visium object +#' giotto_object <- GiottoData::loadGiottoMini("visium") +#' +#' giottoToAnndataZarr(giotto_object, +#' expression = "raw", +#' output_path = tempdir() +#' ) +#' +#' # using the mini vizgen object +#' giotto_object <- GiottoData::loadGiottoMini("vizgen") +#' +#' giottoToAnndataZarr(giotto_object, +#' spat_unit = "aggregate", +#' expression = "scaled", +#' output_path = tempdir() +#' ) +giottoToAnndataZarr <- function(gobject, spat_unit = NULL, + feat_type = NULL, expression = "raw", + output_path) { + proc <- basilisk::basiliskStart(GiottoClass::instructions( + gobject = gobject, + param = "python_path" + )) + on.exit(basilisk::basiliskStop(proc)) + + success <- basilisk::basiliskRun( + proc, + function(gobject, + output_path, + expression) { + anndata <- reticulate::import("anndata") + zarr <- reticulate::import("zarr") + + # extract expression matrix + X <- t(as.matrix(GiottoClass::getExpression( + gobject = gobject, + spat_unit = spat_unit, + feat_type = feat_type, + values = expression, + output = "matrix" + ))) + + # extract cell metadata + obs <- as.data.frame(GiottoClass::getCellMetadata( + gobject = gobject, + spat_unit = spat_unit, + feat_type = feat_type, + output = "data.table" + )) + + rownames(obs) <- obs$cell_ID + + # extract feature metadata + var <- as.data.frame(GiottoClass::getFeatureMetadata( + gobject = gobject, + spat_unit = spat_unit, + feat_type = feat_type, + output = "data.table" + )) + + obsm <- list() + + # extract spatial locations + spatial_locs <- as.data.frame(GiottoClass::getSpatialLocations( + gobject = gobject, + spat_unit = spat_unit, + output = "data.table" + )) + + if (!is.null(spatial_locs)) { + rownames(spatial_locs) <- spatial_locs$cell_ID + spatial_locs <- spatial_locs[obs$cell_ID, ] + spatial_locs_matrix <- as.matrix(spatial_locs[, 1:2]) + + obsm[["spatial"]] <- spatial_locs_matrix + } + + # extract pca + dim_reducs_pca <- GiottoClass::getDimReduction( + gobject = gobject, + spat_unit = spat_unit, + feat_type = feat_type, + reduction_method = "pca", + output = "matrix" + ) + + if (!is.null(dim_reducs_pca)) { + obsm[["pca"]] <- dim_reducs_pca[obs$cell_ID, ] + } + + # extract umap + dim_reducs_umap <- GiottoClass::getDimReduction( + gobject = gobject, + spat_unit = spat_unit, + feat_type = feat_type, + reduction_method = "umap", + name = "umap", + output = "matrix" + ) + + if (!is.null(dim_reducs_umap)) { + obsm[["umap"]] <- dim_reducs_umap[obs$cell_ID, ] + } + + # extract tSNE + dim_reducs_tsne <- GiottoClass::getDimReduction( + gobject = gobject, + spat_unit = spat_unit, + feat_type = feat_type, + reduction_method = "tsne", + name = "tsne", + output = "matrix" + ) + + if (!is.null(dim_reducs_tsne)) { + obsm[["tsne"]] <- dim_reducs_tsne[obs$cell_ID, ] + } + + adata <- anndata$AnnData(X = X, obs = obs, var = var, obsm = obsm) + + adata$write_zarr(output_path) + return(TRUE) + }, + gobject = gobject, output_path = output_path, expression = expression + ) + + return(success) +} diff --git a/R/kriging.R b/R/kriging.R index 2c0c8b204..f8d760af6 100644 --- a/R/kriging.R +++ b/R/kriging.R @@ -43,20 +43,19 @@ NULL #' @export setMethod( "interpolateFeature", signature(x = "giotto", y = "missing"), - function( - x, - spat_unit = NULL, - feat_type = NULL, - feats, - spatvalues_params = list(), - spat_loc_name = "raw", - ext = NULL, - buffer = 50, - name_fmt = "%s", - savedir = file.path(getwd(), "interp_rasters"), - overwrite = FALSE, - verbose = NULL, - ...) { + function(x, + spat_unit = NULL, + feat_type = NULL, + feats, + spatvalues_params = list(), + spat_loc_name = "raw", + ext = NULL, + buffer = 50, + name_fmt = "%s", + savedir = file.path(getwd(), "interp_rasters"), + overwrite = FALSE, + verbose = NULL, + ...) { sl <- NULL # This method prepares the data from the giotto object to pass @@ -142,16 +141,15 @@ setMethod( setMethod( "interpolateFeature", signature(x = "spatLocsObj", y = "data.frame"), - function( - x, y, - ext = NULL, - buffer = 50, - rastersize = 500, - name_fmt = "%s", - savedir = file.path(getwd(), "interp_rasters"), - overwrite = FALSE, - # cores = GiottoUtils::determine_cores(), - ...) { + function(x, y, + ext = NULL, + buffer = 50, + rastersize = 500, + name_fmt = "%s", + savedir = file.path(getwd(), "interp_rasters"), + overwrite = FALSE, + # cores = GiottoUtils::determine_cores(), + ...) { checkmate::assert_character(savedir) checkmate::assert_character(name_fmt) checkmate::assert_logical(overwrite) @@ -182,8 +180,8 @@ setMethod( e_numeric <- ext[] - progressr::with_progress({ - pb <- progressr::progressor(along = feats) + with_pbar({ + pb <- pbar(along = feats) interp_img_list <- lapply_flex( feats, @@ -193,7 +191,8 @@ setMethod( # create subset table with only relevant data data <- annotatedlocs[, - c("cell_ID", feat, "sdimx", "sdimy"), with = FALSE + c("cell_ID", feat, "sdimx", "sdimy"), + with = FALSE ] data.table::setnames(data, old = feat, new = "count") diff --git a/R/normalize.R b/R/normalize.R new file mode 100644 index 000000000..2a1a2a1a3 --- /dev/null +++ b/R/normalize.R @@ -0,0 +1,656 @@ +#' @title normalizeGiotto +#' @name normalizeGiotto +#' @description fast normalize and/or scale expression values of Giotto object +#' @param gobject `giotto` object +#' @param spat_unit spatial unit +#' @param feat_type feature type +#' @param expression_values expression values to use +#' @param norm_methods normalization method to use +#' @param library_size_norm normalize cells by library size +#' @param scalefactor scale factor to use after library size normalization +#' @param log_norm transform values to log-scale +#' @param log_offset offset value to add to expression matrix, default = 1 +#' @param logbase log base to use to log normalize expression values +#' @param scale_feats z-score genes over all cells +#' @param scale_cells z-score cells over all genes +#' @param scale_order order to scale feats and cells +#' @param theta theta parameter for the pearson residual normalization step +#' @param name character. name to use for normalization results +#' @param verbose be verbose +#' @param scale_genes deprecated, use scale_feats +#' @param update_slot deprecated. Use `name` param instead +#' @md +#' @returns `giotto` object +#' @details Currently there are two 'methods' to normalize your raw counts data. +#' +#' A. The standard method follows the standard protocol which can be adjusted +#' using the provided parameters and follows the following order: \cr +#' \itemize{ +#' \item{1. Data normalization for total library size and scaling by a custom +#' scale-factor.} +#' \item{2. Log transformation of data.} +#' \item{3. Z-scoring of data by genes and/or cells.} +#' } +#' B. The normalization method as provided by the osmFISH paper is also +#' implemented: \cr +#' \itemize{ +#' \item{1. First normalize genes, for each gene divide the counts by the +#' total gene count and multiply by the total number of genes.} +#' \item{2. Next normalize cells, for each cell divide the normalized gene +#' counts by the total counts per cell and multiply by the total number of +#' cells.} +#' } +#' C. The normalization method as provided by Lause/Kobak et al is also +#' implemented: \cr +#' \itemize{ +#' \item{1. First calculate expected values based on Pearson correlations.} +#' \item{2. Next calculate z-scores based on observed and expected values.} +#' } +#' D. Quantile normalization across features +#' \itemize{ +#' \item{1. Rank feature expression} +#' \item{2. Define a common distribution by sorting expression values per +#' feature then finding the mean across all features per index} +#' \item{3. Apply common distribution to expression information by using +#' the ranks from step 1 as indices} +#' } +#' By default the latter two results will be saved in the Giotto slot for +#' scaled expression, this can be changed by changing the update_slot parameters +#' @examples +#' g <- GiottoData::loadGiottoMini("visium") +#' +#' normalizeGiotto(g) # default is method A +#' @export +normalizeGiotto <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + expression_values = "raw", + norm_methods = c("standard", "pearson_resid", "osmFISH", "quantile"), + library_size_norm = TRUE, + scalefactor = 6e3, + log_norm = TRUE, + log_offset = 1, + logbase = 2, + scale_feats = TRUE, + scale_genes = deprecated(), + scale_cells = TRUE, + scale_order = c("first_feats", "first_cells"), + theta = 100, + name = "scaled", + update_slot = deprecated(), + verbose = TRUE) { + ## deprecated arguments + scale_feats <- deprecate_param( + scale_genes, scale_feats, + fun = "normalizeGiotto", + when = "3.0.0" + ) + name <- deprecate_param( + update_slot, name, + fun = "normalizeGiotto", + when = "4.1.3" + ) + + # Set feat_type and spat_unit + spat_unit <- set_default_spat_unit( + gobject = gobject, + spat_unit = spat_unit + ) + feat_type <- set_default_feat_type( + gobject = gobject, + spat_unit = spat_unit, + feat_type = feat_type + ) + + ## default is to start from raw data + values <- match.arg(expression_values, unique(c("raw", expression_values))) + raw_expr <- getExpression( + gobject = gobject, + spat_unit = spat_unit, + feat_type = feat_type, + values = values, + output = "exprObj" + ) + + norm_methods <- match.arg( + arg = norm_methods, choices = c( + "standard", "pearson_resid", "osmFISH", "quantile" + ) + ) + + # normalization according to standard methods + gobject <- switch(norm_methods, + "standard" = .rna_standard_normalization( + gobject = gobject, + raw_expr = raw_expr, + feat_type = feat_type, + spat_unit = spat_unit, + library_size_norm = library_size_norm, + scalefactor = scalefactor, + log_norm = log_norm, + log_offset = log_offset, + logbase = logbase, + scale_feats = scale_feats, + scale_cells = scale_cells, + scale_order = scale_order, + verbose = verbose + ), + "osmFISH" = .rna_osmfish_normalization( + gobject = gobject, + raw_expr = raw_expr, + feat_type = feat_type, + spat_unit = spat_unit, + name = name, + verbose = verbose + ), + "pearson_resid" = .rna_pears_resid_normalization( + gobject = gobject, + raw_expr = raw_expr, + feat_type = feat_type, + spat_unit = spat_unit, + theta = theta, + name = name, + verbose = verbose + ), + "quantile" = .quantile_norm( + gobject = gobject, + raw_expr = raw_expr, + feat_type = feat_type, + spat_unit = spat_unit, + name = name, + verbose = verbose + ) + ) + + ## update parameters used ## + + # Do not update downstream of processGiotto + # Parameters will be updated within processGiotto + try( + { + upstream_func <- sys.call(-2) + fname <- as.character(upstream_func[[1]]) + if (fname == "processGiotto") { + return(gobject) + } + }, + silent = TRUE + ) + + + # If this function call is not downstream of processGiotto, update normally + gobject <- update_giotto_params(gobject, description = "_normalize") + + return(gobject) +} + + + +# internals #### + + +#' @title Normalize expression matrix for library size +#' @param mymatrix matrix object +#' @param scalefactor scalefactor +#' @returns matrix +#' @keywords internal +#' @noRd +.lib_norm_giotto <- function(mymatrix, scalefactor) { + libsizes <- colSums_flex(mymatrix) + + if (0 %in% libsizes) { + warning(wrap_txt("Total library size or counts for individual spat + units are 0. + This will likely result in normalization problems. + filter (filterGiotto) or impute (imputeGiotto) spatial + units.")) + } + + norm_expr <- t_flex(t_flex(mymatrix) / libsizes) * scalefactor + return(norm_expr) +} + +#' @title Log normalize expression matrix +#' @returns matrix +#' @keywords internal +#' @noRd +.log_norm_giotto <- function(mymatrix, base, offset) { + if (methods::is(mymatrix, "DelayedArray")) { + mymatrix <- log(mymatrix + offset) / log(base) + # } else if(methods::is(mymatrix, 'DelayedMatrix')) { + # mymatrix = log(mymatrix + offset)/log(base) + } else if (methods::is(mymatrix, "dgCMatrix")) { + mymatrix@x <- log(mymatrix@x + offset) / log(base) + # replace with sparseMatrixStats + } else if (methods::is(mymatrix, "Matrix")) { + mymatrix@x <- log(mymatrix@x + offset) / log(base) + } else if (methods::is(mymatrix, "dbMatrix")) { + mymatrix[] <- dplyr::mutate(mymatrix[], x = x + offset) + # workaround for lack of @x slot + mymatrix <- log(mymatrix) / log(base) + } else { + mymatrix <- log(as.matrix(mymatrix) + offset) / log(base) + } + + return(mymatrix) +} + + +#' @title compute_dbMatrix +#' @description saves dbMatrix to db if global option is set +#' @details +#' Set \code{options(giotto.dbmatrix_compute = FALSE)} if saving dbMatrix +#' after each step of normalization workflow is not desired. +#' @keywords internal +#' @noRd +.compute_dbMatrix <- function(dbMatrix, name, verbose = TRUE) { + # input validation + if (!inherits(dbMatrix, "dbMatrix")) { + stop("dbMatrix must be of class dbMatrix") + } + + if (!is.character(name)) { + stop("name must be a character") + } + + # TODO: update with dbData generic + con <- dbMatrix:::get_con(dbMatrix) + + # overwrite table by default + if (name %in% DBI::dbListTables(con)) { + DBI::dbRemoveTable(con, name) + } + + if (verbose) { + msg <- glue::glue("Computing {name} expression matrix on disk...") + cat(msg) + } + + dbMatrix[] |> + dplyr::compute(temporary = FALSE, name = name) + + # TODO: update below with proper setters from dbMatrix + dbMatrix[] <- dplyr::tbl(con, name) # reassign to computed mat + dbMatrix@name <- name + + if (verbose) cat("done \n") + + return(dbMatrix) +} + +#' @title RNA standard normalization +#' @name .rna_standard_normalization +#' @description standard function for RNA normalization +#' @returns giotto object +#' @keywords internal +#' @noRd +.rna_standard_normalization <- function( + gobject, + raw_expr, + feat_type, + spat_unit, + library_size_norm = TRUE, + scalefactor = 6e3, + log_norm = TRUE, + log_offset = 1, + logbase = 2, + scale_feats = TRUE, + scale_cells = TRUE, + scale_order = c("first_feats", "first_cells"), + verbose = TRUE) { + # check feature type compatibility + if (!feat_type %in% c("rna", "RNA")) { + warning("Caution: Standard normalization was developed for RNA data \n") + } + + # evaluate provenance before modifying raw_expr in case h5_file exists + if (isS4(raw_expr)) { + provenance <- raw_expr@provenance + } else { + provenance <- NULL + } + + + feat_names <- rownames(raw_expr[]) + col_names <- colnames(raw_expr[]) + + ## 1. library size normalize + if (isTRUE(library_size_norm)) { + norm_expr <- .lib_norm_giotto( + mymatrix = raw_expr[], + scalefactor = scalefactor + ) + } else { + norm_expr <- raw_expr[] + } + + ## 2. log normalize + if (isTRUE(log_norm)) { + norm_expr <- .log_norm_giotto( + mymatrix = norm_expr, + base = logbase, + offset = log_offset + ) + } + + ## 3. scale + if (isTRUE(scale_feats) && isTRUE(scale_cells)) { + scale_order <- match.arg( + arg = scale_order, choices = c("first_feats", "first_cells") + ) + + if (scale_order == "first_feats") { + if (isTRUE(verbose)) { + vmsg(.v = verbose, "first scale feats and then cells") + } + + norm_scaled_expr <- t_flex(standardise_flex( + x = t_flex(norm_expr), center = TRUE, scale = TRUE + )) + norm_scaled_expr <- standardise_flex( + x = norm_scaled_expr, center = TRUE, scale = TRUE + ) + } else if (scale_order == "first_cells") { + if (isTRUE(verbose)) { + vmsg(.v = verbose, "first scale cells and then feats") + } + + norm_scaled_expr <- standardise_flex( + x = norm_expr, center = TRUE, scale = TRUE + ) + norm_scaled_expr <- t_flex(standardise_flex( + x = t_flex(norm_scaled_expr), center = TRUE, scale = TRUE + )) + } else { + stop("\n scale order must be given \n") + } + } else if (isTRUE(scale_feats)) { + norm_scaled_expr <- t_flex(standardise_flex( + x = t_flex(norm_expr), center = TRUE, scale = TRUE + )) + } else if (isTRUE(scale_cells)) { + norm_scaled_expr <- standardise_flex( + x = norm_expr, center = TRUE, scale = TRUE + ) + } else { + norm_scaled_expr <- NULL + } + + + ## 4. add cell and gene names back + if (!is.null(norm_expr)) { + rownames(norm_expr) <- feat_names + colnames(norm_expr) <- col_names + } + if (!is.null(norm_scaled_expr)) { + rownames(norm_scaled_expr) <- feat_names + colnames(norm_scaled_expr) <- col_names + } + + ## 5. create and set exprObj + # Save dbMatrix to db + compute_mat <- getOption("giotto.dbmatrix_compute", default = FALSE) + if (compute_mat && !is.null(norm_expr)) { + norm_expr <- .compute_dbMatrix( + dbMatrix = norm_expr, + name = "normalized", + verbose = verbose + ) + } + + norm_expr <- create_expr_obj( + name = "normalized", + exprMat = norm_expr, + spat_unit = spat_unit, + feat_type = feat_type, + provenance = provenance, + misc = NULL + ) + + # Save dbMatrix to db + if (compute_mat && !is.null(norm_scaled_expr)) { + norm_scaled_expr <- .compute_dbMatrix( + dbMatrix = norm_scaled_expr, + name = "scaled", + verbose = verbose + ) + } + + norm_scaled_expr <- create_expr_obj( + name = "scaled", + exprMat = norm_scaled_expr, + spat_unit = spat_unit, + feat_type = feat_type, + provenance = provenance, + misc = NULL + ) + + ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### + gobject <- setGiotto(gobject, norm_expr, initialize = FALSE) + gobject <- setGiotto(gobject, norm_scaled_expr, initialize = FALSE) + ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### + + ## 6. return Giotto object + return(initialize(gobject)) +} + + + +#' @title RNA osmfish normalization +#' @name .rna_osmfish_normalization +#' @description function for RNA normalization according to osmFISH paper +#' @returns giotto object +#' @keywords internal +#' @noRd +.rna_osmfish_normalization <- function( + gobject, + raw_expr, + feat_type, + spat_unit, + name = "custom", + verbose = TRUE) { + # check feature type compatibility + if (!feat_type %in% c("rna", "RNA")) { + warning("Caution: osmFISH normalization was developed for RNA in situ + data \n") + } + + # 1. normalize per gene with scale-factor equal to number of genes + norm_feats <- (raw_expr[] / rowSums_flex(raw_expr[])) * nrow(raw_expr[]) + # 2. normalize per cells with scale-factor equal to number of cells + norm_feats_cells <- t_flex((t_flex(norm_feats) / + colSums_flex(norm_feats)) * ncol(raw_expr[])) + + # return results to Giotto object + if (verbose == TRUE) { + message( + "\n osmFISH-like normalized data will be returned to the", + name, "Giotto slot \n" + ) + } + + norm_feats_cells <- create_expr_obj( + name = name, + exprMat = norm_feats_cells, + spat_unit = spat_unit, + feat_type = feat_type, + provenance = raw_expr@provenance + ) + + ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### + gobject <- setGiotto(giotto, norm_feats_cells) + ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### + + return(gobject) +} + + +#' @title RNA pearson residuals normalization +#' @name rna_pears_resid_normalization +#' @description function for RNA normalization according to Lause/Kobak et al +#' paper +#' Adapted from https://gist.github.com/hypercompetent/51a3c428745e1c06d826d76c3671797c#file-pearson_residuals-r +#' @returns giotto object +#' @keywords internal +#' @noRd +.rna_pears_resid_normalization <- function( + gobject, + raw_expr, + feat_type, + spat_unit, + theta = 100, + name = "scaled", + verbose = TRUE) { + # print message with information # + if (verbose) { + message("using 'Lause/Kobak' method to normalize count matrix If used in + published research, please cite: + Jan Lause, Philipp Berens, Dmitry Kobak (2020). + 'Analytic Pearson residuals for normalization of single-cell RNA-seq UMI + data' ") + } + + # check feature type compatibility + if (!feat_type %in% c("rna", "RNA")) { + warning("Caution: pearson residual normalization was developed for RNA + count normalization \n") + } + + if (methods::is(raw_expr[], "HDF5Matrix")) { + .csums <- .csum_nodrop.HDF5Matrix + .rsums <- .rsum_nodrop.HDF5Matrix + } else { + .csums <- .csum_nodrop.Matrix + .rsums <- .rsum_nodrop.Matrix + } + + z <- .prnorm(x = raw_expr[], theta, .csums = .csums, .rsums = .rsums) + z <- create_expr_obj( + name = name, + exprMat = z, + spat_unit = spat_unit, + feat_type = feat_type, + provenance = prov(raw_expr) + ) + + ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### + gobject <- setGiotto(gobject, z, verbose = verbose) + ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### + + return(gobject) +} + +.quantile_norm <- function( + gobject, + raw_expr, + feat_type, + spat_unit, + name = "quantile", + verbose = TRUE) { + z <- .qnorm(x = raw_expr[]) + z <- create_expr_obj( + name = name, + exprMat = z, + spat_unit = spat_unit, + feat_type = feat_type, + provenance = prov(raw_expr) + ) + + ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### + gobject <- setGiotto(gobject, z, verbose = verbose) + ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### + + return(gobject) +} + +# pearson residuals normalization +# x : raw expression matrix +# .csums : function for colSums that does not drop to vector +# .rsums : function for rowSums that does not drop to vector +.prnorm <- function( + x, + theta = 100, + .csums = .csum_nodrop.Matrix, + .rsums = .rsum_nodrop.Matrix) { + # find 1. colsums, 2. rowsums, 3. matrix sum + counts_sum0 <- .csums(x) + counts_sum1 <- .rsums(x) + counts_sum <- sum(x) + + # get residuals + mu <- (counts_sum1 %*% counts_sum0) / counts_sum + z <- (x - mu) / sqrt(mu + mu^2 / theta) + + # clip to be within the range [-sqrt(n), sqrt(n)] + # This is done to prevent extreme values from dominating the analysis. + n <- ncol(x) + z[z > sqrt(n)] <- sqrt(n) + z[z < -sqrt(n)] <- -sqrt(n) + return(z) +} + + + +# quantile normalization +.qnorm <- function(x) { + # apply on features by default + x <- t_flex(x) + # Rank the values within each column + ranked_data <- t_flex(MatrixGenerics::colRanks(x, ties.method = "average")) + + # Calculate the mean of sorted values across all columns + rank_means <- rowMeans(apply(x, 2, sort)) + + # Replace the original values with the rank means + # TODO revisit for large matrices + normalized_data <- apply(ranked_data, 2, function(idx) { + .qnorm_vector(idx, rank_means) + }) |> + methods::as("Matrix") + + # Retain the original column names + colnames(normalized_data) <- colnames(x) + normalized_data <- t_flex(normalized_data) + return(normalized_data) +} + +# create lookup value vector for quantile norm. +# .5 indices should pull the mean of the adjacent values +# indices: index values with some values being .5, designating ranking ties +# values: values to pull from with the indices +.qnorm_vector <- function(indices, values) { + sorted_values <- sort(values) + lower_indices <- floor(indices) + upper_indices <- ceiling(indices) + lower_values <- sorted_values[lower_indices] + upper_values <- sorted_values[upper_indices] + weights <- indices - lower_indices + result <- (1 - weights) * lower_values + weights * upper_values + return(result) +} + +.csum_nodrop.Matrix <- function(x) { + x |> + Matrix::colSums() |> + matrix(nrow = 1L) |> + methods::as("Matrix") +} +.rsum_nodrop.Matrix <- function(x) { + x |> + Matrix::rowSums() |> + matrix(ncol = 1L) |> + methods::as("Matrix") +} +.csum_nodrop.HDF5Matrix <- function(x) { + x |> + MatrixGenerics::colSums2() |> + matrix(nrow = 1L) |> + methods::as("HDF5Matrix") +} +.rsum_nodrop.HDF5Matrix <- function(x) { + x |> + MatrixGenerics::rowSums2() |> + matrix(ncol = 1L) |> + methods::as("HDF5Matrix") +} diff --git a/R/poly_influence.R b/R/poly_influence.R index cf5a2a031..6c1bea3fb 100644 --- a/R/poly_influence.R +++ b/R/poly_influence.R @@ -26,14 +26,13 @@ #' condensed to align with the smaller number of clusters and ensure overlap. #' #' @export -showPolygonSizeInfluence <- function( - gobject = NULL, - spat_unit = NULL, - alt_spat_unit = NULL, - feat_type = NULL, - clus_name = "kmeans", - return_plot = FALSE, - verbose = FALSE) { +showPolygonSizeInfluence <- function(gobject = NULL, + spat_unit = NULL, + alt_spat_unit = NULL, + feat_type = NULL, + clus_name = "kmeans", + return_plot = FALSE, + verbose = FALSE) { # NSE vars cell_ID <- total_expr <- cluster_interactions <- N <- resize_switch <- NULL @@ -187,10 +186,9 @@ showPolygonSizeInfluence <- function( #' in each cluster. #' #' @keywords internal -.determine_switch_string_equal <- function( - cell_meta = NULL, - cell_meta_new = NULL, - clus_name = NULL) { +.determine_switch_string_equal <- function(cell_meta = NULL, + cell_meta_new = NULL, + clus_name = NULL) { k_clusters <- sort(unique(cell_meta[[clus_name]])) num_clusters <- k_clusters[length(k_clusters)] @@ -238,9 +236,8 @@ showPolygonSizeInfluence <- function( #' Essentially determines iteration order for .create_switch_string_unequal() #' #' @keywords internal -.determine_switch_string_unequal <- function( - num_orig = NULL, - num_new = NULL) { +.determine_switch_string_unequal <- function(num_orig = NULL, + num_new = NULL) { switch_strs <- c() orig_first <- TRUE @@ -277,10 +274,9 @@ showPolygonSizeInfluence <- function( #' n is the number of clusters in the original spatial unit #' m is the number of clusters in the new spatial unit #' @keywords internal -.create_switch_string_unequal <- function( - num_first = NULL, - num_second = NULL, - switch_strs = NULL) { +.create_switch_string_unequal <- function(num_first = NULL, + num_second = NULL, + switch_strs = NULL) { for (o in num_first) { for (n in num_second) { if (as.integer(o) == as.integer(n)) { @@ -312,10 +308,9 @@ showPolygonSizeInfluence <- function( #' The function showPolygonSizeInfluence() must have been run on the Giotto #' Object for this function to run. #' @export -showCellProportionSwitchedPie <- function( - gobject = NULL, - spat_unit = NULL, - feat_type = NULL) { +showCellProportionSwitchedPie <- function(gobject = NULL, + spat_unit = NULL, + feat_type = NULL) { # NSE vars cluster_status <- num_cells <- resize_switch <- perc <- ypos <- NULL @@ -387,11 +382,10 @@ showCellProportionSwitchedPie <- function( #' @details Creates a Sankey Diagram to illustrate cluster switching behavior. #' Currently only supports displaying cluster switching for kmeans clusters. #' @export -showCellProportionSwitchedSanKey <- function( - gobject = NULL, - spat_unit = NULL, - alt_spat_unit = NULL, - feat_type = NULL) { +showCellProportionSwitchedSanKey <- function(gobject = NULL, + spat_unit = NULL, + alt_spat_unit = NULL, + feat_type = NULL) { # NSE vars kmeans_small <- cell_ID <- NULL diff --git a/R/python_hmrf.R b/R/python_hmrf.R index 6b0b597d0..38e3330f7 100644 --- a/R/python_hmrf.R +++ b/R/python_hmrf.R @@ -37,28 +37,28 @@ #' output_folder = tempdir() #' ) #' @export -doHMRF <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - expression_values = c("normalized", "scaled", "custom"), - spatial_network_name = "Delaunay_network", - spat_loc_name = "raw", - spatial_genes = NULL, - spatial_dimensions = c("sdimx", "sdimy", "sdimz"), - dim_reduction_to_use = NULL, - dim_reduction_name = "pca", - dimensions_to_use = 1:10, - seed = 100, - name = "test", - k = 10, - betas = c(0, 2, 50), - tolerance = 1e-10, - zscore = c("none", "rowcol", "colrow"), - numinit = 100, - python_path = NULL, - output_folder = NULL, - overwrite_output = TRUE) { - +doHMRF <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + expression_values = c("normalized", "scaled", "custom"), + spatial_network_name = "Delaunay_network", + spat_loc_name = "raw", + spatial_genes = NULL, + spatial_dimensions = c("sdimx", "sdimy", "sdimz"), + dim_reduction_to_use = NULL, + dim_reduction_name = "pca", + dimensions_to_use = 1:10, + seed = 100, + name = "test", + k = 10, + betas = c(0, 2, 50), + tolerance = 1e-10, + zscore = c("none", "rowcol", "colrow"), + numinit = 100, + python_path = NULL, + output_folder = NULL, + overwrite_output = TRUE) { package_check("smfishHmrf", repository = "pip") # data.table set global variable @@ -145,16 +145,16 @@ doHMRF <- function(gobject, # overwrite if exists if (file.exists(expression_file) & overwrite_output == TRUE) { - message("\n expression_matrix.txt already exists at this location, will be - overwritten") + message("\n expression_matrix.txt already exists at this location, + will be overwritten") data.table::fwrite( data.table::as.data.table(expr_values, keep.rownames = "gene"), file = expression_file, quote = FALSE, col.names = TRUE, row.names = FALSE, sep = " " ) } else if (file.exists(expression_file) & overwrite_output == FALSE) { - message("\n expression_matrix.txt already exists at this location, will be - used again") + message("\n expression_matrix.txt already exists at this location, + will be used again") } else { data.table::fwrite( data.table::as.data.table(expr_values, keep.rownames = "gene"), @@ -362,12 +362,11 @@ doHMRF <- function(gobject, #' ) #' #' @export -loadHMRF <- function( - name_used = "test", - output_folder_used, - k_used = 10, - betas_used, - python_path_used) { +loadHMRF <- function(name_used = "test", + output_folder_used, + k_used = 10, + betas_used, + python_path_used) { output_data <- paste0(output_folder_used, "/", "result.spatial.zscore") if (!file.exists(output_data)) { stop("\n doHMRF was not run in this output directory") @@ -403,13 +402,12 @@ loadHMRF <- function( #' @returns spatial plots with HMRF domains #' @seealso \code{\link{spatPlot2D}} and \code{\link{spatPlot3D}} #' @export -viewHMRFresults <- function( - gobject, - HMRFoutput, - k = NULL, - betas_to_view = NULL, - third_dim = FALSE, - ...) { +viewHMRFresults <- function(gobject, + HMRFoutput, + k = NULL, + betas_to_view = NULL, + third_dim = FALSE, + ...) { if (!"HMRFoutput" %in% class(HMRFoutput)) { stop("\n HMRFoutput needs to be output from doHMRFextend") } @@ -490,12 +488,11 @@ viewHMRFresults <- function( #' @param print_command see the python command #' @returns data.table with HMRF results for each b and the selected k #' @export -writeHMRFresults <- function( - gobject, - HMRFoutput, - k = NULL, - betas_to_view = NULL, - print_command = FALSE) { +writeHMRFresults <- function(gobject, + HMRFoutput, + k = NULL, + betas_to_view = NULL, + print_command = FALSE) { if (!"HMRFoutput" %in% class(HMRFoutput)) { stop("\n HMRFoutput needs to be output from doHMRFextend") } @@ -600,14 +597,13 @@ writeHMRFresults <- function( #' gobject = g, cell_color = "HMRF_k6_b.20", #' ) #' @export -addHMRF <- function( - gobject, - spat_unit = NULL, - feat_type = NULL, - HMRFoutput, - k = NULL, - betas_to_add = NULL, - hmrf_name = NULL) { +addHMRF <- function(gobject, + spat_unit = NULL, + feat_type = NULL, + HMRFoutput, + k = NULL, + betas_to_add = NULL, + hmrf_name = NULL) { if (!"HMRFoutput" %in% class(HMRFoutput)) { stop("\n HMRFoutput needs to be output from doHMRFextend") } @@ -727,14 +723,13 @@ addHMRF <- function( #' @returns spatial plots with HMRF domains #' @seealso \code{\link{spatPlot2D}} #' @export -viewHMRFresults2D <- function( - gobject, - spat_unit = NULL, - feat_type = NULL, - HMRFoutput, - k = NULL, - betas_to_view = NULL, - ...) { +viewHMRFresults2D <- function(gobject, + spat_unit = NULL, + feat_type = NULL, + HMRFoutput, + k = NULL, + betas_to_view = NULL, + ...) { spat_unit <- set_default_spat_unit( gobject = gobject, spat_unit = spat_unit ) @@ -844,14 +839,13 @@ viewHMRFresults2D <- function( #' @returns spatial plots with HMRF domains #' @seealso \code{\link{spatPlot3D}} #' @export -viewHMRFresults3D <- function( - gobject, - spat_unit = NULL, - feat_type = NULL, - HMRFoutput, - k = NULL, - betas_to_view = NULL, - ...) { +viewHMRFresults3D <- function(gobject, + spat_unit = NULL, + feat_type = NULL, + HMRFoutput, + k = NULL, + betas_to_view = NULL, + ...) { if (!"HMRFoutput" %in% class(HMRFoutput)) { stop("\n HMRFoutput needs to be output from doHMRFextend") } @@ -968,11 +962,10 @@ viewHMRFresults3D <- function( #' Changing from equal size by setting sample_rate = 1 to with exact proportion #' of each cluster by setting sample_rate = +Inf #' @keywords internal -sampling_sp_genes <- function( - clust, - sample_rate = 2, - target = 500, - seed = 10) { +sampling_sp_genes <- function(clust, + sample_rate = 2, + target = 500, + seed = 10) { tot <- 0 num_cluster <- length(unique(clust)) gene_list <- list() @@ -1026,10 +1019,9 @@ sampling_sp_genes <- function( #' This function calculates the number of data points in a sorted sequence #' below a line with given slope through a certain point on this sequence. #' @keywords internal -numPts_below_line <- function( - myVector, - slope, - x) { +numPts_below_line <- function(myVector, + slope, + x) { yPt <- myVector[x] b <- yPt - (slope * x) xPts <- seq_along(myVector) @@ -1058,9 +1050,10 @@ numPts_below_line <- function( #' #' filterSpatialGenes(g, spatial_genes = "Gm19935") #' @export -filterSpatialGenes <- function(gobject, spat_unit = NULL, feat_type = NULL, spatial_genes, max = 2500, - name = c("binSpect", "silhouetteRank", "silhouetteRankTest"), - method = c("none", "elbow")) { +filterSpatialGenes <- function(gobject, spat_unit = NULL, feat_type = NULL, + spatial_genes, max = 2500, + name = c("binSpect", "silhouetteRank", "silhouetteRankTest"), + method = c("none", "elbow")) { name <- match.arg( name, unique(c("binSpect", "silhouetteRank", "silhouetteRankTest", name)) @@ -1152,7 +1145,8 @@ filterSpatialGenes <- function(gobject, spat_unit = NULL, feat_type = NULL, spat #' Priorities for showing the spatial gene test names are ‘binSpect’ > #' ‘silhouetteRankTest’ > ‘silhouetteRank’. #' @keywords internal -chooseAvailableSpatialGenes <- function(gobject, spat_unit = NULL, feat_type = NULL) { +chooseAvailableSpatialGenes <- function(gobject, + spat_unit = NULL, feat_type = NULL) { gx <- fDataDT(gobject, spat_unit = NULL, feat_type = NULL) eval1 <- "binSpect.pval" %in% names(gx) eval2 <- "silhouetteRankTest.pval" %in% names(gx) @@ -1186,12 +1180,11 @@ chooseAvailableSpatialGenes <- function(gobject, spat_unit = NULL, feat_type = N #' SilhouetteRank works only with score, and SilhouetteRankTest works only #' with pval. Use parameter use_score to specify. #' @keywords internal -checkAndFixSpatialGenes <- function( - gobject, - spat_unit = NULL, - feat_type = NULL, - use_spatial_genes, - use_score = FALSE) { +checkAndFixSpatialGenes <- function(gobject, + spat_unit = NULL, + feat_type = NULL, + use_spatial_genes, + use_score = FALSE) { gx <- fDataDT(gobject, spat_unit = NULL, feat_type = NULL) if (use_spatial_genes == "silhouetteRank") { @@ -1230,7 +1223,7 @@ checkAndFixSpatialGenes <- function( } return(use_spatial_genes) } else { - stop(paste0("\n use_spatial_genes is set to one that is not supported."), + stop("use_spatial_genes is set to one that is not supported.", call. = FALSE ) } @@ -1325,38 +1318,37 @@ checkAndFixSpatialGenes <- function( #' initHMRF_V2(gobject = g, cl.method = "km") #' @export initHMRF_V2 <- - function( - gobject, - spat_unit = NULL, - feat_type = NULL, - expression_values = c("scaled", "normalized", "custom"), - spatial_network_name = "Delaunay_network", - use_spatial_genes = c("binSpect", "silhouetteRank"), - use_score = FALSE, - gene_list_from_top = 2500, - filter_method = c("none", "elbow"), - user_gene_list = NULL, - use_pca = FALSE, - use_pca_dim = 1:20, - gene_samples = 500, - gene_sampling_rate = 2, - gene_sampling_seed = 10, - use_metagene = FALSE, - cluster_metagene = 50, - top_metagene = 20, - existing_spatial_enrichm_to_use = NULL, - use_neighborhood_composition = FALSE, - spatial_network_name_for_neighborhood = NULL, - metadata_to_use = NULL, - hmrf_seed = 100, - cl.method = c("km", "leiden", "louvain"), - resolution.cl = 1, - k = 10, - tolerance = 1e-05, - zscore = c("none", "rowcol", "colrow"), - nstart = 1000, - factor_step = 1.05, - python_path = NULL) { + function(gobject, + spat_unit = NULL, + feat_type = NULL, + expression_values = c("scaled", "normalized", "custom"), + spatial_network_name = "Delaunay_network", + use_spatial_genes = c("binSpect", "silhouetteRank"), + use_score = FALSE, + gene_list_from_top = 2500, + filter_method = c("none", "elbow"), + user_gene_list = NULL, + use_pca = FALSE, + use_pca_dim = 1:20, + gene_samples = 500, + gene_sampling_rate = 2, + gene_sampling_seed = 10, + use_metagene = FALSE, + cluster_metagene = 50, + top_metagene = 20, + existing_spatial_enrichm_to_use = NULL, + use_neighborhood_composition = FALSE, + spatial_network_name_for_neighborhood = NULL, + metadata_to_use = NULL, + hmrf_seed = 100, + cl.method = c("km", "leiden", "louvain"), + resolution.cl = 1, + k = 10, + tolerance = 1e-05, + zscore = c("none", "rowcol", "colrow"), + nstart = 1000, + factor_step = 1.05, + python_path = NULL) { wrap_msg( "\n If used in published research, please cite: Q Zhu, S Shah, R Dries, L Cai, GC Yuan. @@ -1498,7 +1490,7 @@ initHMRF_V2 <- "scaled", "normalized", "custom", expression_values ))) - expr_values <- get_expression_values( + expr_values <- getExpression( gobject = gobject, spat_unit = spat_unit, feat_type = feat_type, @@ -1506,7 +1498,7 @@ initHMRF_V2 <- ) if (zscore != "none") { zscore <- match.arg(zscore, c("none", "colrow", "rowcol")) - expr_values <- get_expression_values( + expr_values <- getExpression( gobject = gobject, spat_unit = spat_unit, feat_type = feat_type, @@ -1758,7 +1750,7 @@ initHMRF_V2 <- cl.method <- tolower(cl.method) if (!cl.method %in% c("km", "leiden", "louvain")) { cl.method <- "km" - message("\n clustering method not specified, use kmeans as default...") + message("clustering method not specified, use kmeans as default...") } if (cl.method == "km") { @@ -1778,7 +1770,9 @@ initHMRF_V2 <- gobject@dimension_reduction$cells$spatial$spatial_feat$coordinates <- y gobject <- createNearestNetwork( - gobject = gobject, spat_unit = spat_unit, feat_type = feat_type, + gobject = gobject, + spat_unit = spat_unit, + feat_type = feat_type, dim_reduction_to_use = "spatial", dim_reduction_name = "spatial_feat", dimensions_to_use = seq_len(ncol(y)), @@ -1788,7 +1782,9 @@ initHMRF_V2 <- if (cl.method == "leiden") { message("\n Leiden clustering initialization...") leiden.cl <- doLeidenCluster( - gobject = gobject, spat_unit = spat_unit, feat_type = feat_type, + gobject = gobject, + spat_unit = spat_unit, + feat_type = feat_type, nn_network_to_use = "sNN", network_name = "sNN.initHMRF", set_seed = hmrf_seed, @@ -1803,7 +1799,9 @@ initHMRF_V2 <- } else if (cl.method == "louvain") { message("\n Louvain clustering initialization...") louvain.cl <- doLouvainCluster( - gobject = gobject, spat_unit = spat_unit, feat_type = feat_type, + gobject = gobject, + spat_unit = spat_unit, + feat_type = feat_type, nn_network_to_use = "sNN", network_name = "sNN.initHMRF", set_seed = hmrf_seed, @@ -2034,8 +2032,9 @@ addHMRF_V2 <- function(gobject, HMRFoutput, name = "hmrf") { spat_unit = spat_unit, feat_type = feat_type, new_metadata = HMRFoutput[[i]]$class[match( - ordered_cell_IDs, - rownames(HMRFoutput[[i]]$prob))], + ordered_cell_IDs, + rownames(HMRFoutput[[i]]$prob) + )], vector_name = paste(name, names(HMRFoutput)[i]) # ,column_cell_ID = 'cell_ID', # by_column = TRUE @@ -2044,8 +2043,8 @@ addHMRF_V2 <- function(gobject, HMRFoutput, name = "hmrf") { return(gobject) } - - + + #' @title viewHMRFresults_V2 #' @name viewHMRFresults_V2 #' @description function to view HMRF results with multiple betas @@ -2074,22 +2073,21 @@ addHMRF_V2 <- function(gobject, HMRFoutput, name = "hmrf") { #' (for example name of ‘hmrf1 k=8 b=0.00’ is ‘hmrf1’) #' @export viewHMRFresults_V2 <- - function( - gobject, k, betas, - hmrf_name, - spat_unit = NULL, - feat_type = NULL, - third_dim = FALSE, - cow_n_col = 2, - cow_rel_h = 1, - cow_rel_w = 1, - cow_align = "h", - show_plot = TRUE, - save_plot = TRUE, - return_plot = TRUE, - default_save_name = "HMRF_result", - save_param = list(), - ...) { + function(gobject, k, betas, + hmrf_name, + spat_unit = NULL, + feat_type = NULL, + third_dim = FALSE, + cow_n_col = 2, + cow_rel_h = 1, + cow_rel_w = 1, + cow_align = "h", + show_plot = TRUE, + save_plot = TRUE, + return_plot = TRUE, + default_save_name = "HMRF_result", + save_param = list(), + ...) { # beta_seq = round(betas,digits = 2) # t_key = paste0(hmrf_name,'_k', k, '_b.',beta_seq) t_key <- paste(hmrf_name, sprintf("k=%d b=%.2f", k, betas)) @@ -2151,7 +2149,7 @@ viewHMRFresults_V2 <- } # combine plots with cowplot - combo_plot <- cowplot::plot_grid( + combo_plot <- plot_grid( plotlist = savelist, ncol = cow_n_col, rel_heights = cow_rel_h, diff --git a/R/python_scrublet.R b/R/python_scrublet.R index 0f422ceae..b22e373c3 100644 --- a/R/python_scrublet.R +++ b/R/python_scrublet.R @@ -36,18 +36,17 @@ #' pDataDT(g) # doublet_scores and doublet cols are added #' dimPlot2D(g, cell_color = "doublet_scores", color_as_factor = FALSE) #' @export -doScrubletDetect <- function( - gobject, - feat_type = NULL, - spat_unit = "cell", - expression_values = "raw", - expected_doublet_rate = 0.06, - min_counts = 1, - min_cells = 1, - min_gene_variability_pctl = 85, - n_prin_comps = 30, - return_gobject = TRUE, - seed = 1234) { +doScrubletDetect <- function(gobject, + feat_type = NULL, + spat_unit = "cell", + expression_values = "raw", + expected_doublet_rate = 0.06, + min_counts = 1, + min_cells = 1, + min_gene_variability_pctl = 85, + n_prin_comps = 30, + return_gobject = TRUE, + seed = 1234) { # verify if optional package is installed package_check( pkg_name = "scrublet", diff --git a/R/spatial_clusters.R b/R/spatial_clusters.R index 71bac9c94..81fbf9e98 100644 --- a/R/spatial_clusters.R +++ b/R/spatial_clusters.R @@ -1,77 +1,3 @@ -#' @title Remove hetero edges from igraph -#' @name .igraph_remove_hetero_edges -#' @description -#' Given an igraph `g` and set of node attributes `clus_att` that encode -#' different spatial clusters, remove edges that connect non-similar nodes. -#' This can be used when data is already clustered, but these clusters should -#' be further broken up based on whether they are spatially touching. -#' @param g igraph object -#' @param clus_attr character. A categorical node attribute -#' @md -#' @returns igraph -#' @keywords internal -.igraph_remove_hetero_edges <- function(g, clus_attr) { - clus_attr_values <- igraph::vertex_attr(g, name = clus_attr) - - for (n in unique(clus_attr_values)) { - # find all vertices of the attribute - nv <- igraph::V(g)$name[clus_attr_values == n] - - # find edges that include these vertices - n_all_edges <- igraph::E(g)[.inc(igraph::V(g)[nv])] %>% - igraph::as_ids() - - # find edges associated with only these vertices - n_internal_edges <- igraph::E(g)[nv %--% nv] %>% - igraph::as_ids() - - het_edges <- n_all_edges[!n_all_edges %in% n_internal_edges] - - g <- igraph::delete_edges(g, edges = het_edges) - } - - g -} - - - - -#' @title igraph vertex membership -#' @name .igraph_vertex_membership -#' @description -#' Get which weakly connected set of vertices each vertex is part of -#' @param g igraph -#' @param clus_name character. name to assign column of clustering info -#' @param all_ids (optional) character vector with all ids -#' @param missing_id_name character and name for vertices that were missing from g -#' @returns data.table -#' @keywords internal -.igraph_vertex_membership <- function(g, - clus_name, - all_ids = NULL, - missing_id_name) { - - # get membership - membership <- igraph::components(g)$membership %>% - data.table::as.data.table(keep.rownames = TRUE) - data.table::setnames(membership, c("cell_ID", clus_name)) - - # add vertices that were missing from g back - if(!is.null(all_ids)) { - missing_ids = all_ids[!all_ids %in% V(g)$name] - missing_membership = data.table::data.table('cell_ID' = missing_ids, 'cluster_name' = missing_id_name) - data.table::setnames(missing_membership, c("cell_ID", clus_name)) - membership = data.table::rbindlist(list(membership, missing_membership)) - } - - return(membership) - -} - - - - - #' @title Split cluster annotations based on a spatial network #' @name spatialSplitCluster #' @inheritParams data_access_params @@ -79,14 +5,11 @@ #' @param cluster_col character. Column in metadata containing original #' clustering #' @param split_clus_name character. Name to assign the split cluster results -#' @param include_all_ids Boolean. Include all ids, including vertex ids not found -#' in the spatial network -#' @param missing_id_name Character. Name for vertices that were missing from -#' spatial network -#' @param return_gobject Boolean. Return giotto object +#' @param include_all_ids logical. Include all ids, including vertex ids not +#' found in the spatial network +#' @param return_gobject logical. Return giotto object #' @returns giotto object with cluster annotations #' @examples -#' library(Giotto) #' g <- GiottoData::loadGiottoMini("vizgen") #' activeSpatUnit(g) <- "aggregate" #' spatPlot2D(g, cell_color = "leiden_clus") @@ -98,17 +21,14 @@ #' # don't show legend since there are too many categories generated #' spatPlot2D(g, cell_color = "new", show_legend = FALSE) #' @export -spatialSplitCluster <- function( - gobject, - spat_unit = NULL, - feat_type = NULL, - spatial_network_name = "Delaunay_network", - cluster_col, - split_clus_name = paste0(cluster_col, "_split"), - include_all_ids = TRUE, - missing_id_name = 'not_connected', - return_gobject = TRUE) { - +spatialSplitCluster <- function(gobject, + spat_unit = NULL, + feat_type = NULL, + spatial_network_name = "Delaunay_network", + cluster_col, + split_clus_name = paste0(cluster_col, "_split"), + missing_id_name = "not_connected", + return_gobject = TRUE) { # NSE vars cell_ID <- NULL @@ -157,38 +77,26 @@ spatialSplitCluster <- function( ) # get new clusterings - if(isTRUE(include_all_ids)) { - # include all cell IDs - all_ids = unique(cell_meta$cell_ID) - new_clus_dt <- .igraph_vertex_membership( - g = g, - clus_name = split_clus_name, - all_ids = all_ids, - missing_id_name = missing_id_name - ) - } else { - # only IDs present in graph - new_clus_dt <- .igraph_vertex_membership( + # spatially unconnected nodes (if any) will always be returned as 0 + all_ids <- unique(cell_meta$cell_ID) + new_clus_dt <- .igraph_vertex_membership( g = g, clus_name = split_clus_name, - all_ids = NULL - ) - - } - - if(isTRUE(return_gobject)) { - gobject <- addCellMetadata( - gobject, - spat_unit = spat_unit, - new_metadata = new_clus_dt, - by_column = TRUE, - column_cell_ID = "cell_ID" - ) - return(gobject) + all_ids <- all_ids + ) + + if (isTRUE(return_gobject)) { + gobject <- addCellMetadata( + gobject, + spat_unit = spat_unit, + new_metadata = new_clus_dt, + by_column = TRUE, + column_cell_ID = "cell_ID" + ) + return(gobject) } else { - new_clus_dt + new_clus_dt } - } @@ -200,92 +108,255 @@ spatialSplitCluster <- function( #' @inheritParams data_access_params #' @param spatial_network_name character. Name of spatial network to use #' @param core_id_name metadata column name for the core information -#' @param include_all_ids Boolean. Include all ids, including vertex ids not found -#' in the spatial network -#' @param missing_id_name Character. Name for vertices that were missing from -#' spatial network -#' @param return_gobject Boolean. Return giotto object +#' @param id_fmt character. [sprintf] formatting to use for core ids +#' @param include_all_ids logical. Include all ids, including vertex ids not +#' found in the spatial network +#' @param missing_id_name character. Name for nodes that are not connected to +#' a core. +#' @param min_nodes numeric. Minimal number of nodes to not be considered +#' an unconnected group. +#' @param join_split_cores logical. Attempt to repair core IDs when a core +#' is split down the middle and detected as two different cores. +#' @param join_tolerance numeric. Max ratio allowed relative to previous max +#' core convex hull area when determining if a pair of cores should be joined. +#' @param return_gobject logical. Return giotto object #' @returns cluster annotations #' @export identifyTMAcores <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - spatial_network_name = "Delaunay_network", - core_id_name = 'core_id', - include_all_ids = TRUE, - missing_id_name = 'not_connected', - return_gobject = TRUE) { - - - # NSE vars - cell_ID <- NULL - - spat_unit <- set_default_spat_unit( - gobject = gobject, - spat_unit = spat_unit - ) - feat_type <- set_default_feat_type( - gobject = gobject, - spat_unit = spat_unit, - feat_type = feat_type - ) - - cell_meta <- getCellMetadata( - gobject = gobject, - spat_unit = spat_unit, - feat_type = feat_type, - output = "data.table", - copy_obj = FALSE - ) - - sn <- getSpatialNetwork( - gobject = gobject, - spat_unit = spat_unit, - name = spatial_network_name, - output = "spatialNetworkObj", - copy_obj = FALSE, - verbose = FALSE, - ) - - - g <- GiottoClass::spat_net_to_igraph(sn) - # convert spatialNetworkObject to igraph - - - # get new clusterings - if(isTRUE(include_all_ids)) { - # include all cell IDs - all_ids = unique(cell_meta$cell_ID) - new_clus_dt <- .igraph_vertex_membership( - g = g, - clus_name = core_id_name, - all_ids = all_ids, - missing_id_name = missing_id_name + spat_unit = NULL, + feat_type = NULL, + spatial_network_name = "Delaunay_network", + core_id_name = "core_id", + id_fmt = "%d", + include_all_ids = TRUE, + missing_id_name = "not_connected", + min_nodes = 5, + join_split_cores = TRUE, + join_tolerance = 1.2, + return_gobject = TRUE) { + # NSE vars + cell_ID <- NULL + + spat_unit <- set_default_spat_unit( + gobject = gobject, + spat_unit = spat_unit ) - } else { - # only IDs present in graph - new_clus_dt <- .igraph_vertex_membership( - g = g, - clus_name = core_id_name, - all_ids = NULL + feat_type <- set_default_feat_type( + gobject = gobject, + spat_unit = spat_unit, + feat_type = feat_type + ) + + # get data + cell_meta <- getCellMetadata( + gobject = gobject, + spat_unit = spat_unit, + feat_type = feat_type, + output = "data.table", + copy_obj = FALSE + ) + + sn <- getSpatialNetwork( + gobject = gobject, + spat_unit = spat_unit, + name = spatial_network_name, + output = "spatialNetworkObj", + copy_obj = FALSE, + verbose = FALSE, ) - - } - - if(isTRUE(return_gobject)) { - gobject <- addCellMetadata( - gobject, - spat_unit = spat_unit, - new_metadata = new_clus_dt, - by_column = TRUE, - column_cell_ID = "cell_ID" + + g <- GiottoClass::spat_net_to_igraph(sn) + # convert spatialNetworkObject to igraph + + # get new clusterings as initial indices + # these indices may need repairs and updates to be finalized + ivm_params <- list( + g = g, clus_name = "init_idx" ) - return(gobject) - } else { - new_clus_dt - } - - + if (isTRUE(include_all_ids)) { + # include all cell IDs + all_ids <- unique(cell_meta$cell_ID) + ivm_params$all_ids <- all_ids + } else { + # only IDs present in graph + ivm_params$all_ids <- NULL + } + + new_clus_dt <- do.call(.igraph_vertex_membership, args = ivm_params) + # connected nodes + con <- new_clus_dt[init_idx > 0] + # spatially disconnected observations (not connected to a group of nodes) + dcon <- new_clus_dt[init_idx == 0] + + # min nodes filter + con_nodes <- con[, .N, by = init_idx] + small_con_idx <- con_nodes[N < min_nodes, init_idx] + # shift filtered values to dcon (disconnected) + con[init_idx %in% small_con_idx, init_idx := 0] + dcon <- rbind(dcon, con[init_idx == 0]) + con <- con[init_idx != 0] + + # fix split cores + if (join_split_cores) { + sl <- getSpatialLocations(gobject, spat_unit = spat_unit) + con_init_idx_uniq <- sort(unique(con$init_idx)) + + areas <- vapply( + FUN.VALUE = numeric(1L), con_init_idx_uniq, function(core_id) { + sl[con[init_idx == core_id, cell_ID]] |> + convHull() |> + area() + } + ) + max_area <- max(areas) + + # find ext of cores + # iterate through angles to catch cases where extents do not + # bridge across split. + ovlp_reps <- lapply(c(0, 22.5, 45), function(rangle) { + sl_rot <- spin(sl, rangle) + + # get ext poly of rotated cores + epoly_list <- lapply(con_init_idx_uniq, function(core_id) { + sl_rot[con[init_idx == core_id, cell_ID]] |> + ext() |> + as.polygons() + }) + poly <- do.call(rbind, epoly_list) + + # test for overlaps + ovlp <- relate(poly, relation = "overlaps", pairs = TRUE) |> + # determine sorted pairs of overlaps + apply(MARGIN = 2, sort) |> + t() + return(ovlp) + }) + # combine test reps + ovlps <- do.call(rbind, ovlp_reps) |> + unique() + + # update ids based on test + for (pair_i in nrow(ovlps)) { + idx_1 <- ovlps[pair_i, 1L] + idx_2 <- ovlps[pair_i, 2L] + # ignore hits from two full cores + # combined area of IDs to join cannot be greater than join_tolerance of max_area + if ((areas[[idx_1]] + areas[[idx_2]]) > + (join_tolerance * max_area)) { + next + } + + con[init_idx == idx_2, init_idx := idx_1] + } + + } + + # apply core_id_name + con[, (core_id_name) := sprintf(id_fmt, init_idx)] + dcon[, (core_id_name) := missing_id_name] + + res <- rbind( + con[, .SD, .SDcols = c("cell_ID", core_id_name)], + dcon[, .SD, .SDcols = c("cell_ID", core_id_name)] + ) + + if (isTRUE(return_gobject)) { + gobject <- addCellMetadata( + gobject, + spat_unit = spat_unit, + new_metadata = res, + by_column = TRUE, + column_cell_ID = "cell_ID" + ) + return(gobject) + } else { + new_clus_dt + } +} + + + + + + + + + + + + +# internals #### + +#' @title Remove hetero edges from igraph +#' @name .igraph_remove_hetero_edges +#' @description +#' Given an igraph `g` and set of node attributes `clus_att` that encode +#' different spatial clusters, remove edges that connect non-similar nodes. +#' This can be used when data is already clustered, but these clusters should +#' be further broken up based on whether they are spatially touching. +#' @param g igraph object +#' @param clus_attr character. A categorical node attribute +#' @returns igraph +#' @noRd +#' @keywords internal +.igraph_remove_hetero_edges <- function(g, clus_attr) { + clus_attr_values <- igraph::vertex_attr(g, name = clus_attr) + + for (n in unique(clus_attr_values)) { + # find all vertices of the attribute + nv <- igraph::V(g)$name[clus_attr_values == n] + + # find edges that include these vertices + n_all_edges <- igraph::E(g)[.inc(igraph::V(g)[nv])] %>% + igraph::as_ids() + + # find edges associated with only these vertices + n_internal_edges <- igraph::E(g)[nv %--% nv] %>% + igraph::as_ids() + + het_edges <- n_all_edges[!n_all_edges %in% n_internal_edges] + + g <- igraph::delete_edges(g, edges = het_edges) + } + + g +} + + + + +#' @title igraph vertex membership +#' @name .igraph_vertex_membership +#' @description +#' Get which weakly connected set of vertices each vertex is part of +#' @param g igraph +#' @param clus_name character. name to assign column of clustering info +#' @param all_ids (optional) character vector with all ids +#' @returns `data.table` with two columns. 1st is "cell_ID", second is named with +#' `clus_name` and is of type `numeric` +#' @keywords internal +#' @noRd +.igraph_vertex_membership <- function(g, + clus_name, + all_ids = NULL) { + # get membership + membership <- igraph::components(g)$membership %>% + data.table::as.data.table(keep.rownames = TRUE) + data.table::setnames(membership, c("cell_ID", clus_name)) + + # add vertices that were missing from g back + if (!is.null(all_ids)) { + missing_ids <- all_ids[!all_ids %in% igraph::V(g)$name] + missing_membership <- data.table::data.table( + "cell_ID" = missing_ids, + "cluster_name" = 0 + ) + data.table::setnames(missing_membership, c("cell_ID", clus_name)) + membership <- data.table::rbindlist( + list(membership, missing_membership)) + } + + return(membership) } diff --git a/R/spatial_enrichment.R b/R/spatial_enrichment.R index 1f95d9a53..76f3c652a 100644 --- a/R/spatial_enrichment.R +++ b/R/spatial_enrichment.R @@ -1,42 +1,88 @@ ## create spatial enrichment matrix #### -#' @title makeSignMatrixPAGE -#' @description Function to convert a list of signature genes -#' (e.g. for cell types or processes) into -#' a binary matrix format that can be used with the PAGE enrichment option. -#' Each cell type or process should -#' have a vector of cell-type or process specific genes. These vectors need to -#' be combined into a list (sign_list). -#' The names of the cell types or processes that are provided in the list need -#' to be given (sign_names). -#' @param sign_names vector with names for each provided gene signature -#' @param sign_list list of genes (signature) -#' @returns matrix -#' @seealso \code{\link{PAGEEnrich}} +#' @title PAGE feature enrichment +#' @name enrichment_PAGE +#' @description +#' Expression feature-based enrichment scoring of labels. \cr +#' A binary matrix of signature features (e.g. for cell types or processes) can +#' either be directly provided or converted from a list using +#' `makeSignMatrixPAGE()`. This matrix is then used with `runPAGEEnrich()` in +#' order to calculate feature signature enrichment scores per spatial position +#' using PAGE. +#' @param sign_names `character` vector with names (labels) for each provided +#' feat signature +#' @param sign_list list of feats in signature +#' @param gobject Giotto object +#' @param spat_unit spatial unit +#' @param feat_type feature type +#' @param sign_matrix binary matrix of signature feats for each cell type / +#' process. Alternatively a list of signature feats can be provided to +#' `makeSignMatrixPAGE()`, which will create the matrix for you. +#' @param expression_values expression values to use +#' @param min_overlap_genes minimum number of overlapping feats in +#' `sign_matrix` required to calculate enrichment +#' @param reverse_log_scale reverse expression values from log scale +#' @param logbase log base to use if reverse_log_scale = TRUE +#' @param output_enrichment how to return enrichment output +#' @param p_value logical. Default = `FALSE`. calculate p-values +#' @param include_depletion calculate both enrichment and depletion +#' @param n_times number of permutations to calculate for p_value +#' @param max_block number of lines to process together (default = 20e6) +#' @param name to give to spatial enrichment results, default = PAGE +#' @param verbose be verbose +#' @param return_gobject return giotto object +#' @returns `matrix` (`makeSignMatrixPAGE()`) and +#' `giotto` (`runPAGEEnrich(return_gobject = TRUE)`) or +#' `data.table` (`runPAGEEnrich(return_gobject = FALSE)`) +#' @details +#' The enrichment Z score is calculated by using method (PAGE) from +#' Kim SY et al., BMC bioinformatics, 2005 as \cr +#' \eqn{Z = ((Sm – mu)*m^(1/2)) / delta}. \cr +#' For each gene in each spot, mu is the fold change values versus the mean +#' expression and delta is the standard deviation. Sm is the mean fold change +#' value of a specific marker gene set and m is the size of a given marker +#' gene set. +#' @md #' @examples +#' g <- GiottoData::loadGiottoMini("visium") +#' #' sign_list <- list( #' cell_type1 = c( #' "Bcl11b", "Lmo1", "F3", "Cnih3", "Ppp1r3c", #' "Rims2", "Gfap", "Gjc3", "Chrna4", "Prkcd" #' ), #' cell_type2 = c( -#' "Prr18", "Grb14", "Tprn", "Clic1", "Olig2", "Hrh3", "Tmbim1", -#' "Carhsp1", "Tmem88b", "Ugt8a" +#' "Prr18", "Grb14", "Tprn", "Clic1", "Olig2", "Hrh3", +#' "Tmbim1", "Carhsp1", "Tmem88b", "Ugt8a" #' ), #' cell_type2 = c( -#' "Arpp19", "Lamp5", "Galnt6", "Hlf", "Hs3st2", "Tbr1", "Myl4", -#' "Cygb", "Ttc9b", "Ipcef1" +#' "Arpp19", "Lamp5", "Galnt6", "Hlf", "Hs3st2", +#' "Tbr1", "Myl4", "Cygb", "Ttc9b", "Ipcef1" #' ) #' ) #' -#' makeSignMatrixPAGE( +#' sm <- makeSignMatrixPAGE( #' sign_names = c("cell_type1", "cell_type2", "cell_type3"), #' sign_list = sign_list #' ) +#' +#' g <- runPAGEEnrich( +#' gobject = g, +#' sign_matrix = sm, +#' min_overlap_genes = 2 +#' ) +#' +#' spatPlot2D(g, +#' cell_color = "cell_type2", +#' spat_enr_names = "PAGE", +#' color_as_factor = FALSE +#' ) +NULL + +#' @rdname enrichment_PAGE #' @export -makeSignMatrixPAGE <- function( - sign_names, - sign_list) { +makeSignMatrixPAGE <- function(sign_names, + sign_list) { ## check input if (!inherits(sign_list, "list")) { stop("sign_list needs to be a list of signatures for each cell type / @@ -100,10 +146,9 @@ makeSignMatrixPAGE <- function( #' cell_type_vector = c("cell_type1", "cell_type2", "cell_type3") #' ) #' @export -makeSignMatrixDWLSfromMatrix <- function( - matrix, - sign_gene, - cell_type_vector) { +makeSignMatrixDWLSfromMatrix <- function(matrix, + sign_gene, + cell_type_vector) { # 1. check if cell_type_vector and matrix are compatible if (ncol(matrix) != length(cell_type_vector)) { stop("ncol(matrix) needs to be the same as length(cell_type_vector)") @@ -177,16 +222,15 @@ makeSignMatrixDWLSfromMatrix <- function( #' cell_type_vector = pDataDT(g)[["leiden_clus"]] #' ) #' @export -makeSignMatrixDWLS <- function( - gobject, - spat_unit = NULL, - feat_type = NULL, - expression_values = c("normalized", "scaled", "custom"), - reverse_log = TRUE, - log_base = 2, - sign_gene, - cell_type_vector, - cell_type = NULL) { +makeSignMatrixDWLS <- function(gobject, + spat_unit = NULL, + feat_type = NULL, + expression_values = c("normalized", "scaled", "custom"), + reverse_log = TRUE, + log_base = 2, + sign_gene, + cell_type_vector, + cell_type = NULL) { ## deprecated arguments if (!is.null(cell_type)) { warning("cell_type is deprecated, use cell_type_vector in the future") @@ -210,7 +254,7 @@ makeSignMatrixDWLS <- function( expression_values, unique(c("normalized", "scaled", "custom", expression_values)) ) - expr_values <- get_expression_values( + expr_values <- getExpression( gobject = gobject, spat_unit = spat_unit, feat_type = feat_type, @@ -265,11 +309,10 @@ makeSignMatrixDWLS <- function( #' sc_cluster_ids = c("cell_type1", "cell_type2", "cell_type3") #' ) #' @export -makeSignMatrixRank <- function( - sc_matrix, - sc_cluster_ids, - ties_method = c("random", "max"), - gobject = NULL) { +makeSignMatrixRank <- function(sc_matrix, + sc_cluster_ids, + ties_method = c("random", "max"), + gobject = NULL) { if (inherits(sc_matrix, "exprObj")) { sc_matrix <- sc_matrix[] } @@ -350,10 +393,9 @@ makeSignMatrixRank <- function( #' @description creates permutation for the PAGEEnrich test #' @returns PAGEEnrich test #' @keywords internal -.do_page_permutation <- function( - gobject, - sig_gene, - ntimes) { +.do_page_permutation <- function(gobject, + sig_gene, + ntimes) { # check available gene available_ct <- c() for (i in colnames(sig_gene)) { @@ -420,236 +462,23 @@ makeSignMatrixRank <- function( -#' @title runPAGEEnrich_OLD -#' @description Function to calculate gene signature enrichment scores per -#' spatial position using PAGE. -#' @param gobject Giotto object -#' @param sign_matrix Matrix of signature genes for each cell type / process -#' @param expression_values expression values to use -#' @param reverse_log_scale reverse expression values from log scale -#' @param logbase log base to use if reverse_log_scale = TRUE -#' @param output_enrichment how to return enrichment output -#' @param p_value calculate p-values (boolean, default = FALSE) -#' @param n_times number of permutations to calculate for p_value -#' @param name to give to spatial enrichment results, default = PAGE -#' @param return_gobject return giotto object -#' @returns data.table with enrichment results -#' @details -#' sign_matrix: a binary matrix with genes as row names and cell-types as -#' column names. -#' Alternatively a list of signature genes can be provided to -#' makeSignMatrixPAGE, which will create the matrix for you. \cr -#' -#' The enrichment Z score is calculated by using method (PAGE) from -#' Kim SY et al., BMC bioinformatics, 2005 as -#' \eqn{Z = ((Sm – mu)*m^(1/2)) / delta}. -#' For each gene in each spot, mu is the fold change values versus the mean -#' expression and delta is the standard deviation. Sm is the mean fold change -#' value of a specific marker gene set and m is the size of a given marker -#' gene set. -#' @seealso \code{\link{makeSignMatrixPAGE}} -#' @export -runPAGEEnrich_OLD <- function( - gobject, - sign_matrix, - expression_values = c("normalized", "scaled", "custom"), - reverse_log_scale = TRUE, - logbase = 2, - output_enrichment = c("original", "zscore"), - p_value = FALSE, - n_times = 1000, - name = NULL, - return_gobject = TRUE) { - # expression values to be used - values <- match.arg(expression_values, c("normalized", "scaled", "custom")) - expr_values <- get_expression_values(gobject = gobject, values = values) - - # check parameters - if (is.null(name)) name <- "PAGE" - - # check available gene - available_ct <- c() - for (i in colnames(sign_matrix)) { - gene_i <- rownames(sign_matrix)[which(sign_matrix[, i] == 1)] - overlap_i <- intersect(gene_i, rownames(expr_values)) - if (length(overlap_i) <= 5) { - output <- paste0( - "Warning, ", i, " only has ", length(overlap_i), - " overlapped genes. Will remove it." - ) - } else { - available_ct <- c(available_ct, i) - } - } - - if (length(available_ct) == 1) { - stop("Only one cell type available.") - } - - # output enrichment - output_enrichment <- match.arg( - output_enrichment, - choices = c("original", "zscore") - ) - - # only continue with genes present in both datasets - interGene <- intersect(rownames(sign_matrix), rownames(expr_values)) - filterSig <- sign_matrix[interGene, available_ct] - signames <- rownames(filterSig)[which(filterSig[, 1] == 1)] - - # calculate mean gene expression - if (reverse_log_scale == TRUE) { - mean_gene_expr <- log(rowMeans(logbase^expr_values - 1, dims = 1) + 1) - } else { - mean_gene_expr <- rowMeans(expr_values) - } - geneFold <- expr_values - mean_gene_expr - - # calculate sample/spot mean and sd - cellColMean <- apply(geneFold, 2, mean) - cellColSd <- apply(geneFold, 2, stats::sd) - - # get enrichment scores - enrichment <- matrix( - data = NA, nrow = dim(filterSig)[2], ncol = length(cellColMean) - ) - for (i in seq_len(dim(filterSig)[2])) { - signames <- rownames(filterSig)[which(filterSig[, i] == 1)] - sigColMean <- apply(geneFold[signames, ], 2, mean) - m <- length(signames) - vectorX <- NULL - for (j in (seq_along(cellColMean))) { - Sm <- sigColMean[j] - u <- cellColMean[j] - sigma <- cellColSd[j] - zscore <- (Sm - u) * m^(1 / 2) / sigma - vectorX <- append(vectorX, zscore) - } - enrichment[i, ] <- vectorX - } - - rownames(enrichment) <- colnames(filterSig) - colnames(enrichment) <- names(cellColMean) - enrichment <- t(enrichment) - - if (output_enrichment == "zscore") { - enrichment <- scale(enrichment) - } - - enrichmentDT <- data.table::data.table(cell_ID = rownames(enrichment)) - enrichmentDT <- cbind(enrichmentDT, data.table::as.data.table(enrichment)) - - - - ## calculate p-values if requested - if (p_value == TRUE) { - # check available gene - available_ct <- c() - for (i in colnames(sign_matrix)) { - gene_i <- rownames(sign_matrix)[which(sign_matrix[, i] == 1)] - overlap_i <- intersect( - gene_i, rownames(gobject@expression$rna$normalized) - ) - - if (length(overlap_i) <= 5) { - output <- paste0( - "Warning, ", i, " only has ", length(overlap_i), - " overlapped genes. It will be removed." - ) - } else { - available_ct <- c(available_ct, i) - } - } - - if (length(available_ct) == 1) { - stop("Only one cell type available.") - } - - # only continue with genes present in both datasets - interGene <- intersect( - rownames(sign_matrix), rownames(gobject@expression$rna$normalized) - ) - filter_sign_matrix <- sign_matrix[interGene, available_ct] - - background_mean_sd <- .do_page_permutation( - gobject = gobject, - sig_gene = filter_sign_matrix, - ntimes = n_times - ) - - for (i in seq_len(dim(filter_sign_matrix)[2])) { - length_gene <- length(which(filter_sign_matrix[, i] == 1)) - join_gene_with_length <- paste("gene_num_", length_gene, sep = "") - mean_i <- as.numeric(as.character( - background_mean_sd[join_gene_with_length, ][[1]] - )) - sd_i <- as.numeric(as.character( - background_mean_sd[join_gene_with_length, ][[2]] - )) - j <- i + 1 - enrichmentDT[[j]] <- stats::pnorm( - enrichmentDT[[j]], - mean = mean_i, sd = sd_i, - lower.tail = FALSE, log.p = FALSE - ) - } - } - - - - ## return object or results ## - if (return_gobject == TRUE) { - spenr_names <- names(gobject@spatial_enrichment) - - if (name %in% spenr_names) { - cat(name, " has already been used, will be overwritten") - } - - ## update parameters used ## - parameters_list <- gobject@parameters - number_of_rounds <- length(parameters_list) - update_name <- paste0(number_of_rounds, "_spatial_enrichment") - - # parameters to include - parameters_list[[update_name]] <- c( - "method used" = "PAGE", - "enrichment name" = name, - "expression values" = expression_values, - "reverse log scale" = reverse_log_scale, - "logbase" = logbase, - "p-values calculated" = p_value, - "output enrichment scores" = output_enrichment, - "p values calculated" = p_value, - "nr permutations" = n_times - ) - gobject@parameters <- parameters_list - - gobject@spatial_enrichment[[name]] <- enrichmentDT - - return(gobject) - } else { - return(enrichmentDT) - } -} - #' @title PAGE data.table method #' @param expr_values matrix of expression values #' @returns data.table #' @keywords internal -.page_dt_method <- function( - sign_matrix, - expr_values, - min_overlap_genes = 5, - logbase = 2, - reverse_log_scale = TRUE, - output_enrichment = c("original", "zscore"), - p_value = FALSE, - include_depletion = FALSE, - n_times = 1000, - max_block = 20e6, - verbose = TRUE) { +.page_dt_method <- function(sign_matrix, + expr_values, + min_overlap_genes = 5, + logbase = 2, + reverse_log_scale = TRUE, + output_enrichment = c("original", "zscore"), + p_value = FALSE, + include_depletion = FALSE, + n_times = 1000, + max_block = 20e6, + verbose = TRUE) { # data.table variables Var1 <- value <- Var2 <- V1 <- marker <- nr_markers <- fc <- cell_ID <- zscore <- colmean <- colSd <- pval <- NULL @@ -664,7 +493,7 @@ runPAGEEnrich_OLD <- function( ## identify available cell types all_genes <- rownames(expr_values) sign_matrix <- as.matrix(sign_matrix) - sign_matrix_DT <- data.table::as.data.table(reshape2::melt(sign_matrix)) + sign_matrix_DT <- melt_matrix(sign_matrix) sign_matrix_DT <- sign_matrix_DT[Var1 %in% all_genes] detected_DT <- sign_matrix_DT[, sum(value), by = Var2] @@ -685,7 +514,7 @@ runPAGEEnrich_OLD <- function( stop("Only one cell type available.") } - # create subset of sinature matrix + # create subset of signature matrix interGene <- intersect(rownames(sign_matrix), rownames(expr_values)) filterSig <- sign_matrix[interGene, available_ct] @@ -707,13 +536,13 @@ runPAGEEnrich_OLD <- function( colSd = cellColSd ) - filterSig_DT <- data.table::as.data.table(reshape2::melt(filterSig)) + filterSig_DT <- melt_matrix(filterSig) colnames(filterSig_DT) <- c("gene", "cell_type", "marker") sub_ct_DT <- filterSig_DT[marker == 1] sub_ct_DT[, nr_markers := .N, by = cell_type] ## reshape gene fold-expression - geneFold_DT <- data.table::as.data.table(reshape2::melt(geneFold)) + geneFold_DT <- melt_matrix(geneFold) colnames(geneFold_DT) <- c("gene", "cell_ID", "fc") mergetest <- data.table::merge.data.table( @@ -882,75 +711,24 @@ runPAGEEnrich_OLD <- function( -#' @title runPAGEEnrich -#' @description Function to calculate gene signature enrichment scores per -#' spatial position using PAGE. -#' @param gobject Giotto object -#' @param spat_unit spatial unit -#' @param feat_type feature type -#' @param sign_matrix Matrix of signature genes for each cell type / process -#' @param expression_values expression values to use -#' @param min_overlap_genes minimum number of overlapping genes in sign_matrix -#' required to calculate enrichment -#' @param reverse_log_scale reverse expression values from log scale -#' @param logbase log base to use if reverse_log_scale = TRUE -#' @param output_enrichment how to return enrichment output -#' @param p_value calculate p-values (boolean, default = FALSE) -#' @param include_depletion calculate both enrichment and depletion -#' @param n_times number of permutations to calculate for p_value -#' @param max_block number of lines to process together (default = 20e6) -#' @param name to give to spatial enrichment results, default = PAGE -#' @param verbose be verbose -#' @param return_gobject return giotto object -#' @returns data.table with enrichment results -#' @details -#' sign_matrix: a binary matrix with genes as row names and cell-types as -#' column names. -#' Alternatively a list of signature genes can be provided to -#' makeSignMatrixPAGE, which will create the matrix for you. \cr -#' -#' The enrichment Z score is calculated by using method (PAGE) from -#' Kim SY et al., BMC bioinformatics, 2005 as -#' \eqn{Z = ((Sm – mu)*m^(1/2)) / delta}. -#' For each gene in each spot, mu is the fold change values versus the mean -#' expression and delta is the standard deviation. Sm is the mean fold change -#' value of a specific marker gene set and m is the size of a given marker -#' gene set. -#' @seealso \code{\link{makeSignMatrixPAGE}} -#' @examples -#' g <- GiottoData::loadGiottoMini("visium") -#' sign_gene <- c( -#' "Bcl11b", "Lmo1", "F3", "Cnih3", "Ppp1r3c", "Rims2", "Gfap", -#' "Gjc3", "Chrna4", "Prkcd", "Prr18", "Grb14", "Tprn", "Clic1", "Olig2", -#' "Hrh3", "Tmbim1", "Carhsp1", "Tmem88b", "Ugt8a", "Arpp19", "Lamp5", -#' "Galnt6", "Hlf", "Hs3st2", "Tbr1", "Myl4", "Cygb", "Ttc9b", "Ipcef1" -#' ) -#' -#' sign_matrix <- matrix(rnorm(length(sign_gene) * 3, mean = 10), -#' nrow = length(sign_gene) -#' ) -#' rownames(sign_matrix) <- sign_gene -#' colnames(sign_matrix) <- c("cell_type1", "cell_type2", "cell_type3") -#' -#' runPAGEEnrich(gobject = g, sign_matrix = sign_matrix) +#' @rdname enrichment_PAGE #' @export -runPAGEEnrich <- function( - gobject, - spat_unit = NULL, - feat_type = NULL, - sign_matrix, - expression_values = c("normalized", "scaled", "custom"), - min_overlap_genes = 5, - reverse_log_scale = TRUE, - logbase = 2, - output_enrichment = c("original", "zscore"), - p_value = FALSE, - include_depletion = FALSE, - n_times = 1000, - max_block = 20e6, - name = NULL, - verbose = TRUE, - return_gobject = TRUE) { +runPAGEEnrich <- function(gobject, + spat_unit = NULL, + feat_type = NULL, + sign_matrix, + expression_values = c("normalized", "scaled", "custom"), + min_overlap_genes = 5, + reverse_log_scale = TRUE, + logbase = 2, + output_enrichment = c("original", "zscore"), + p_value = FALSE, + include_depletion = FALSE, + n_times = 1000, + max_block = 20e6, + name = NULL, + verbose = TRUE, + return_gobject = TRUE) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -1044,10 +822,7 @@ runPAGEEnrich <- function( gobject@parameters <- parameters_list ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### - gobject <- set_spatial_enrichment( - gobject = gobject, - spatenrichment = enrObj - ) + gobject <- setGiotto(gobject, enrObj) ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### @@ -1064,21 +839,6 @@ runPAGEEnrich <- function( -#' @title PAGEEnrich -#' @description Function to calculate gene signature enrichment scores per -#' spatial position using PAGE. -#' @inheritDotParams runPAGEEnrich -#' @seealso \code{\link{runPAGEEnrich}} -#' @returns gene enrichment -#' @export -PAGEEnrich <- function(...) { - .Deprecated(new = "runPAGEEnrich") - - runPAGEEnrich(...) -} - - - #' @title Rank permutation @@ -1150,22 +910,21 @@ PAGEEnrich <- function(...) { #' expression_values = "normalized" #' ) #' @export -runRankEnrich <- function( - gobject, - spat_unit = NULL, - feat_type = NULL, - sign_matrix, - expression_values = c("normalized", "raw", "scaled", "custom"), - reverse_log_scale = TRUE, - logbase = 2, - output_enrichment = c("original", "zscore"), - ties_method = c("average", "max"), - p_value = FALSE, - n_times = 1000, - rbp_p = 0.99, - num_agg = 100, - name = NULL, - return_gobject = TRUE) { +runRankEnrich <- function(gobject, + spat_unit = NULL, + feat_type = NULL, + sign_matrix, + expression_values = c("normalized", "raw", "scaled", "custom"), + reverse_log_scale = TRUE, + logbase = 2, + output_enrichment = c("original", "zscore"), + ties_method = c("average", "max"), + p_value = FALSE, + n_times = 1000, + rbp_p = 0.99, + num_agg = 100, + name = NULL, + return_gobject = TRUE) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -1363,10 +1122,7 @@ runRankEnrich <- function( gobject@parameters <- parameters_list ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### - gobject <- set_spatial_enrichment( - gobject = gobject, - spatenrichment = enrObj - ) + gobject <- setGiotto(gobject, enrObj) ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### return(gobject) @@ -1377,20 +1133,6 @@ runRankEnrich <- function( -#' @title rankEnrich -#' @description Function to calculate gene signature enrichment scores per -#' spatial position using a rank based approach. -#' @inheritDotParams runRankEnrich -#' @seealso \code{\link{runRankEnrich}} -#' @returns gene enrichment -#' @export -rankEnrich <- function(...) { - .Deprecated(new = "runRankEnrich") - - runRankEnrich(...) -} - - #' @title runHyperGeometricEnrich #' @description Function to calculate gene signature enrichment scores per @@ -1426,19 +1168,18 @@ rankEnrich <- function(...) { #' #' runHyperGeometricEnrich(gobject = g, sign_matrix = sign_matrix) #' @export -runHyperGeometricEnrich <- function( - gobject, - spat_unit = NULL, - feat_type = NULL, - sign_matrix, - expression_values = c("normalized", "scaled", "custom"), - reverse_log_scale = TRUE, - logbase = 2, - top_percentage = 5, - output_enrichment = c("original", "zscore"), - p_value = FALSE, - name = NULL, - return_gobject = TRUE) { +runHyperGeometricEnrich <- function(gobject, + spat_unit = NULL, + feat_type = NULL, + sign_matrix, + expression_values = c("normalized", "scaled", "custom"), + reverse_log_scale = TRUE, + logbase = 2, + top_percentage = 5, + output_enrichment = c("original", "zscore"), + p_value = FALSE, + name = NULL, + return_gobject = TRUE) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -1596,10 +1337,7 @@ runHyperGeometricEnrich <- function( gobject@parameters <- parameters_list ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### - gobject <- set_spatial_enrichment( - gobject = gobject, - spatenrichment = enrObj - ) + gobject <- setGiotto(gobject, enrObj) ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### return(gobject) @@ -1609,19 +1347,6 @@ runHyperGeometricEnrich <- function( } -#' @title hyperGeometricEnrich -#' @description Function to calculate gene signature enrichment scores per -#' spatial position using a hypergeometric test. -#' @inheritDotParams runHyperGeometricEnrich -#' @seealso \code{\link{runHyperGeometricEnrich}} -#' @returns enrichment scores -#' @export -hyperGeometricEnrich <- function(...) { - .Deprecated(new = "runHyperGeometricEnrich") - - runHyperGeometricEnrich(...) -} - @@ -1671,26 +1396,25 @@ hyperGeometricEnrich <- function(...) { #' #' runSpatialEnrich(gobject = g, sign_matrix = sign_matrix) #' @export -runSpatialEnrich <- function( - gobject, - spat_unit = NULL, - feat_type = NULL, - enrich_method = c("PAGE", "rank", "hypergeometric"), - sign_matrix, - expression_values = c("normalized", "scaled", "custom"), - min_overlap_genes = 5, - reverse_log_scale = TRUE, - logbase = 2, - p_value = FALSE, - n_times = 1000, - rbp_p = 0.99, - num_agg = 100, - max_block = 20e6, - top_percentage = 5, - output_enrichment = c("original", "zscore"), - name = NULL, - verbose = TRUE, - return_gobject = TRUE) { +runSpatialEnrich <- function(gobject, + spat_unit = NULL, + feat_type = NULL, + enrich_method = c("PAGE", "rank", "hypergeometric"), + sign_matrix, + expression_values = c("normalized", "scaled", "custom"), + min_overlap_genes = 5, + reverse_log_scale = TRUE, + logbase = 2, + p_value = FALSE, + n_times = 1000, + rbp_p = 0.99, + num_agg = 100, + max_block = 20e6, + top_percentage = 5, + output_enrichment = c("original", "zscore"), + name = NULL, + verbose = TRUE, + return_gobject = TRUE) { enrich_method <- match.arg( enrich_method, choices = c("PAGE", "rank", "hypergeometric") @@ -1755,18 +1479,6 @@ runSpatialEnrich <- function( } -#' @title createSpatialEnrich -#' @description Function to calculate gene signature enrichment scores per -#' spatial position using an enrichment test. -#' @inheritDotParams runSpatialEnrich -#' @returns gene signature enrichment scores -#' @seealso \code{\link{runSpatialEnrich}} -#' @export -createSpatialEnrich <- function(...) { - .Deprecated(new = "runSpatialEnrich") - - runSpatialEnrich(...) -} @@ -1838,25 +1550,24 @@ NULL #' \item{\emph{Geary's C} 'geary'} #' } #' @export -spatialAutoCorGlobal <- function( - gobject = NULL, - spat_unit = NULL, - feat_type = NULL, - feats = NULL, - method = c("moran", "geary"), - data_to_use = c("expression", "cell_meta"), - expression_values = c("normalized", "scaled", "custom"), - meta_cols = NULL, - spatial_network_to_use = "kNN_network", - wm_method = c("distance", "adjacency"), - wm_name = "spat_weights", - node_values = NULL, - weight_matrix = NULL, - test_method = c("none", "monte_carlo"), - mc_nsim = 99, - cor_name = NULL, - return_gobject = FALSE, - verbose = TRUE) { +spatialAutoCorGlobal <- function(gobject = NULL, + spat_unit = NULL, + feat_type = NULL, + feats = NULL, + method = c("moran", "geary"), + data_to_use = c("expression", "cell_meta"), + expression_values = c("normalized", "scaled", "custom"), + meta_cols = NULL, + spatial_network_to_use = "kNN_network", + wm_method = c("distance", "adjacency"), + wm_name = "spat_weights", + node_values = NULL, + weight_matrix = NULL, + test_method = c("none", "monte_carlo"), + mc_nsim = 99, + cor_name = NULL, + return_gobject = FALSE, + verbose = TRUE) { # 0. determine inputs method <- match.arg(method, choices = c("moran", "geary")) test_method <- match.arg(test_method, choices = c("none", "monte_carlo")) @@ -2011,26 +1722,25 @@ spatialAutoCorGlobal <- function( #' \item{\emph{Local mean} 'mean'} #' } #' @export -spatialAutoCorLocal <- function( - gobject = NULL, - spat_unit = NULL, - feat_type = NULL, - feats = NULL, - method = c("moran", "gi", "gi*", "mean"), - data_to_use = c("expression", "cell_meta"), - expression_values = c("normalized", "scaled", "custom"), - meta_cols = NULL, - spatial_network_to_use = "kNN_network", - wm_method = c("distance", "adjacency"), - wm_name = "spat_weights", - node_values = NULL, - weight_matrix = NULL, - test_method = c("none"), - # cor_name = NULL, - enrich_name = NULL, - return_gobject = TRUE, - output = c("spatEnrObj", "data.table"), - verbose = TRUE) { +spatialAutoCorLocal <- function(gobject = NULL, + spat_unit = NULL, + feat_type = NULL, + feats = NULL, + method = c("moran", "gi", "gi*", "mean"), + data_to_use = c("expression", "cell_meta"), + expression_values = c("normalized", "scaled", "custom"), + meta_cols = NULL, + spatial_network_to_use = "kNN_network", + wm_method = c("distance", "adjacency"), + wm_name = "spat_weights", + node_values = NULL, + weight_matrix = NULL, + test_method = c("none"), + # cor_name = NULL, + enrich_name = NULL, + return_gobject = TRUE, + output = c("spatEnrObj", "data.table"), + verbose = TRUE) { # 0. determine inputs method_select <- match.arg( method, @@ -2174,10 +1884,7 @@ spatialAutoCorLocal <- function( ) } ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### - gobject <- set_spatial_enrichment( - gobject = gobject, - spatenrichment = enr - ) + gobject <- setGiotto(gobject, enr) ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### return(gobject) @@ -2200,14 +1907,13 @@ spatialAutoCorLocal <- function( #' .run_spat_autocor_global #' @returns data.table #' @keywords internal -.run_spat_autocor_global <- function( - use_values, - feats, - weight_matrix, - method, - test_method, - mc_nsim, - cor_name) { +.run_spat_autocor_global <- function(use_values, + feats, + weight_matrix, + method, + test_method, + mc_nsim, + cor_name) { # data.table vars cell_ID <- nsim <- NULL @@ -2218,9 +1924,9 @@ spatialAutoCorLocal <- function( step_size <- step_size <- ceiling(nfeats / 10L) } - progressr::with_progress({ + with_pbar({ if (step_size > 1) { - pb <- progressr::progressor( + pb <- pbar( steps = nfeats / step_size ) } @@ -2285,13 +1991,12 @@ spatialAutoCorLocal <- function( #' .run_spat_autocor_local #' @returns data.table #' @keywords internal -.run_spat_autocor_local <- function( - use_values, - feats, - weight_matrix, - method, - test_method, - IDs) { +.run_spat_autocor_local <- function(use_values, + feats, + weight_matrix, + method, + test_method, + IDs) { cell_ID <- NULL nfeats <- length(feats) @@ -2301,9 +2006,9 @@ spatialAutoCorLocal <- function( step_size <- step_size <- ceiling(nfeats / 10L) } - progressr::with_progress({ + with_pbar({ if (step_size > 1) { - pb <- progressr::progressor( + pb <- pbar( steps = nfeats / step_size ) } @@ -2372,24 +2077,23 @@ spatialAutoCorLocal <- function( # 4, IDs - cell_IDs if available # Some additional information about information used in specific workflows are # also returned -.evaluate_autocor_input <- function( - gobject, - use_ext_vals, - use_sn, - use_expr, - use_meta, - spat_unit, - feat_type, - feats, - data_to_use, - expression_values, - meta_cols, - spatial_network_to_use, - wm_method, - wm_name, - node_values, - weight_matrix, - verbose = TRUE) { +.evaluate_autocor_input <- function(gobject, + use_ext_vals, + use_sn, + use_expr, + use_meta, + spat_unit, + feat_type, + feats, + data_to_use, + expression_values, + meta_cols, + spatial_network_to_use, + wm_method, + wm_name, + node_values, + weight_matrix, + verbose = TRUE) { cell_ID <- NULL # 1. Get spatial network to either get or generate a spatial weight matrix @@ -2448,7 +2152,8 @@ spatialAutoCorLocal <- function( # 2. Get and format node values for use with autocorrelation function. # End outputs are: # - use_values for a spatID (rows) by features (cols) table or matrix - # - feats the names of selected features to use that will be iterated through downstream + # - feats the names of selected features to use that will be iterated + # through downstream if (isTRUE(use_expr)) { # EXPR=================================================================# values <- match.arg( @@ -2552,12 +2257,11 @@ spatialAutoCorLocal <- function( #' @description Rui to fill in #' @returns matrix #' @keywords internal -enrich_deconvolution <- function( - expr, - log_expr, - cluster_info, - ct_exp, - cutoff) { +enrich_deconvolution <- function(expr, + log_expr, + cluster_info, + ct_exp, + cutoff) { ##### generate enrich 0/1 matrix based on expression matrix ct_exp <- ct_exp[rowSums(ct_exp) > 0, ] enrich_matrix <- matrix(0, nrow = dim(ct_exp)[1], ncol = dim(ct_exp)[2]) @@ -2624,11 +2328,10 @@ enrich_deconvolution <- function( #' @description Rui to fill in #' @returns matrix #' @keywords internal -spot_deconvolution <- function( - expr, - cluster_info, - ct_exp, - binary_matrix) { +spot_deconvolution <- function(expr, + cluster_info, + ct_exp, + binary_matrix) { ##### generate enrich 0/1 matrix based on expression matrix enrich_matrix <- matrix(0, nrow = dim(ct_exp)[1], ncol = dim(ct_exp)[2]) rowmax_col <- Rfast::rowMaxs(ct_exp) @@ -2709,7 +2412,9 @@ spot_deconvolution <- function( solDWLS <- optimize_solveDampenedWLS(S_k, B[ uniq_ct_k_gene, ], constant_J) - dwls_results[names(solDWLS), colnames(cluster_cell_exp)[k]] <- solDWLS + dwls_results[ + names(solDWLS), + colnames(cluster_cell_exp)[k]] <- solDWLS } } } @@ -2729,10 +2434,9 @@ spot_deconvolution <- function( #' @description Rui to fill in #' @returns enrichment values #' @keywords internal -cluster_enrich_analysis <- function( - exp_matrix, - cluster_info, - enrich_sig_matrix) { +cluster_enrich_analysis <- function(exp_matrix, + cluster_info, + enrich_sig_matrix) { uniq_cluster <- mixedsort(unique(cluster_info)) if (length(uniq_cluster) == 1) { stop("Only one cluster identified, need at least two.") @@ -2758,9 +2462,8 @@ cluster_enrich_analysis <- function( #' @description Rui to fill in #' @returns enrichment matrix #' @keywords internal -enrich_analysis <- function( - expr_values, - sign_matrix) { +enrich_analysis <- function(expr_values, + sign_matrix) { # output enrichment # only continue with genes present in both datasets interGene <- intersect(rownames(sign_matrix), rownames(expr_values)) @@ -2803,9 +2506,8 @@ enrich_analysis <- function( #' @description Rui to fill in #' @returns matrix #' @keywords internal -optimize_deconvolute_dwls <- function( - exp, - Signature) { +optimize_deconvolute_dwls <- function(exp, + Signature) { ###### overlap signature with spatial genes Genes <- intersect(rownames(Signature), rownames(exp)) S <- Signature[Genes, ] @@ -2839,10 +2541,9 @@ optimize_deconvolute_dwls <- function( #' @title optimize_solveDampenedWLS #' @returns numeric #' @keywords internal -optimize_solveDampenedWLS <- function( - S, - B, - constant_J) { +optimize_solveDampenedWLS <- function(S, + B, + constant_J) { # first solve OLS, use this solution to find a starting point for the # weights solution <- solve_OLS_internal(S, B) @@ -2878,10 +2579,9 @@ optimize_solveDampenedWLS <- function( #' @description find a dampening constant for the weights using cross-validation #' @returns numeric #' @keywords internal -find_dampening_constant <- function( - S, - B, - goldStandard) { +find_dampening_constant <- function(S, + B, + goldStandard) { solutionsSd <- NULL # goldStandard is used to define the weights @@ -2930,9 +2630,8 @@ find_dampening_constant <- function( #' @description basic functions for dwls #' @returns numeric #' @keywords internal -solve_OLS_internal <- function( - S, - B) { +solve_OLS_internal <- function(S, + B) { D <- t(S) %*% S d <- t(S) %*% B A <- cbind(diag(dim(S)[2])) @@ -2997,11 +2696,10 @@ solve_OLS_internal <- function( #' @description solve WLS given a dampening constant #' @returns matrix #' @keywords internal -solve_dampened_WLSj <- function( - S, - B, - goldStandard, - j) { +solve_dampened_WLSj <- function(S, + B, + goldStandard, + j) { multiplier <- 1 * 2^(j - 1) sol <- goldStandard ws <- as.vector((1 / (S %*% sol))^2) @@ -3062,18 +2760,17 @@ solve_dampened_WLSj <- function( #' #' runDWLSDeconv(gobject = g, sign_matrix = sign_matrix) #' @export -runDWLSDeconv <- function( - gobject, - spat_unit = NULL, - feat_type = NULL, - expression_values = c("normalized"), - logbase = 2, - cluster_column = "leiden_clus", - sign_matrix, - n_cell = 50, - cutoff = 2, - name = NULL, - return_gobject = TRUE) { +runDWLSDeconv <- function(gobject, + spat_unit = NULL, + feat_type = NULL, + expression_values = c("normalized"), + logbase = 2, + cluster_column = "leiden_clus", + sign_matrix, + n_cell = 50, + cutoff = 2, + name = NULL, + return_gobject = TRUE) { # verify if optional package is installed package_check(pkg_name = "quadprog", repository = "CRAN") package_check(pkg_name = "Rfast", repository = "CRAN") @@ -3198,10 +2895,7 @@ runDWLSDeconv <- function( gobject@parameters <- parameters_list ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### - gobject <- set_spatial_enrichment( - gobject = gobject, - spatenrichment = enrObj - ) + gobject <- setGiotto(gobject, enrObj) ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### return(gobject) @@ -3245,19 +2939,18 @@ runDWLSDeconv <- function( #' #' runSpatialDeconv(gobject = g, sign_matrix = sign_matrix) #' @export -runSpatialDeconv <- function( - gobject, - spat_unit = NULL, - feat_type = NULL, - deconv_method = c("DWLS"), - expression_values = c("normalized"), - logbase = 2, - cluster_column = "leiden_clus", - sign_matrix, - n_cell = 50, - cutoff = 2, - name = NULL, - return_gobject = TRUE) { +runSpatialDeconv <- function(gobject, + spat_unit = NULL, + feat_type = NULL, + deconv_method = c("DWLS"), + expression_values = c("normalized"), + logbase = 2, + cluster_column = "leiden_clus", + sign_matrix, + n_cell = 50, + cutoff = 2, + name = NULL, + return_gobject = TRUE) { deconv_method <- match.arg(deconv_method, choices = c("DWLS")) diff --git a/R/spatial_enrichment_visuals.R b/R/spatial_enrichment_visuals.R index fec02713c..103ba6e82 100644 --- a/R/spatial_enrichment_visuals.R +++ b/R/spatial_enrichment_visuals.R @@ -5,8 +5,8 @@ #' @param feat_type feature type for which the enrichment information was #' calculated #' @param enrichment_name name of the spatial enrichment -#' i.e. output from GiottoClass::list_spatial_enrichment_names() -#' Default value is "PAGE_Z_score" +#' i.e. output from GiottoClass::list_spatial_enrichment_names() +#' Default value is "PAGE_Z_score" #' @param return_frequency_table see details. Default FALSE #' @returns table #' @details @@ -24,12 +24,11 @@ #' the associated cell types from the enrichment. #' #' @export -findCellTypesFromEnrichment <- function( - gobject = NULL, - spat_unit = NULL, - feat_type = NULL, - enrichment_name = "PAGE_z_score", - return_frequency_table = FALSE) { +findCellTypesFromEnrichment <- function(gobject = NULL, + spat_unit = NULL, + feat_type = NULL, + enrichment_name = "PAGE_z_score", + return_frequency_table = FALSE) { # guard clauses if (!inherits(gobject, "giotto")) { @@ -113,17 +112,16 @@ findCellTypesFromEnrichment <- function( #' annotation. #' #' @export -plotCellTypesFromEnrichment <- function( - gobject = NULL, - spat_unit = NULL, - feat_type = NULL, - enrichment_name = "PAGE_z_score", - title = NULL, - save_param = list(), - default_save_name = "cell_types_from_enrichment", - save_plot = NULL, - show_plot = NULL, - return_plot = NULL) { +plotCellTypesFromEnrichment <- function(gobject = NULL, + spat_unit = NULL, + feat_type = NULL, + enrichment_name = "PAGE_z_score", + title = NULL, + save_param = list(), + default_save_name = "cell_types_from_enrichment", + save_plot = NULL, + show_plot = NULL, + return_plot = NULL) { # guard clauses handled at first step downstream # therefore, omitting here. id_and_types <- findCellTypesFromEnrichment( @@ -188,17 +186,16 @@ plotCellTypesFromEnrichment <- function( #' and will be determined by the maximum value of the z-score #' or p-value for a given cell or annotation. #' @export -pieCellTypesFromEnrichment <- function( - gobject = NULL, - spat_unit = NULL, - feat_type = NULL, - enrichment_name = "PAGE_z_score", - title = NULL, - save_param = list(), - default_save_name = "cell_types_from_enrichment_pie", - save_plot = NULL, - show_plot = NULL, - return_plot = NULL) { +pieCellTypesFromEnrichment <- function(gobject = NULL, + spat_unit = NULL, + feat_type = NULL, + enrichment_name = "PAGE_z_score", + title = NULL, + save_param = list(), + default_save_name = "cell_types_from_enrichment_pie", + save_plot = NULL, + show_plot = NULL, + return_plot = NULL) { # guard clauses handled one step downstream freq_table <- findCellTypesFromEnrichment( diff --git a/R/spatial_genes.R b/R/spatial_genes.R index 53bc927c5..12dee31de 100644 --- a/R/spatial_genes.R +++ b/R/spatial_genes.R @@ -8,12 +8,11 @@ NULL #' @rdname spat_fisher_exact #' @keywords internal -.spat_fish_func <- function( - feat, - bin_matrix, - spat_mat, - calc_hub = FALSE, - hub_min_int = 3) { +.spat_fish_func <- function(feat, + bin_matrix, + spat_mat, + calc_hub = FALSE, + hub_min_int = 3) { feat_vector <- bin_matrix[rownames(bin_matrix) == feat, ] feat_vectorA <- feat_vector[names(feat_vector) %in% rownames(spat_mat)] @@ -76,12 +75,11 @@ NULL #' @describeIn spat_fisher_exact data.table implementation #' @keywords internal -.spat_fish_func_dt <- function( - bin_matrix_DTm, - spat_netw_min, - calc_hub = FALSE, - hub_min_int = 3, - cores = NA) { +.spat_fish_func_dt <- function(bin_matrix_DTm, + spat_netw_min, + calc_hub = FALSE, + hub_min_int = 3, + cores = NA) { # set number of cores automatically, but with limit of 10 cores <- determine_cores(cores) data.table::setDTthreads(threads = cores) @@ -176,12 +174,11 @@ NULL #' @rdname spat_odds_ratio #' @keywords internal -.spat_or_func <- function( - feat, - bin_matrix, - spat_mat, - calc_hub = FALSE, - hub_min_int = 3) { +.spat_or_func <- function(feat, + bin_matrix, + spat_mat, + calc_hub = FALSE, + hub_min_int = 3) { feat_vector <- bin_matrix[rownames(bin_matrix) == feat, ] feat_vectorA <- feat_vector[names(feat_vector) %in% rownames(spat_mat)] @@ -250,12 +247,11 @@ NULL #' @describeIn spat_odds_ratio data.table implementation #' @keywords internal -.spat_or_func_dt <- function( - bin_matrix_DTm, - spat_netw_min, - calc_hub = FALSE, - hub_min_int = 3, - cores = NA) { +.spat_or_func_dt <- function(bin_matrix_DTm, + spat_netw_min, + calc_hub = FALSE, + hub_min_int = 3, + cores = NA) { # set number of cores automatically, but with limit of 10 cores <- determine_cores(cores) data.table::setDTthreads(threads = cores) @@ -361,11 +357,10 @@ NULL #' @describeIn calculate_spatial_enrichment calculate using a 'simple' and #' efficient for loop #' @keywords internal -.calc_spatial_enrichment_minimum <- function( - spatial_network, - bin_matrix, - adjust_method = "fdr", - do_fisher_test = TRUE) { +.calc_spatial_enrichment_minimum <- function(spatial_network, + bin_matrix, + adjust_method = "fdr", + do_fisher_test = TRUE) { # data.table variables from <- to <- feats <- variable <- value <- p.value <- adj.p.value <- score <- estimate <- NULL @@ -455,16 +450,15 @@ NULL #' @describeIn calculate_spatial_enrichment calculate using 'matrix' #' implementation #' @keywords internal -.calc_spatial_enrichment_matrix <- function( - spatial_network, - bin_matrix, - adjust_method = "fdr", - do_fisher_test = TRUE, - do_parallel = TRUE, - cores = NA, - calc_hub = FALSE, - hub_min_int = 3, - verbose = TRUE) { +.calc_spatial_enrichment_matrix <- function(spatial_network, + bin_matrix, + adjust_method = "fdr", + do_fisher_test = TRUE, + do_parallel = TRUE, + cores = NA, + calc_hub = FALSE, + hub_min_int = 3, + verbose = TRUE) { # data.table variables verbose <- feats <- p.value <- estimate <- adj.p.value <- score <- NULL @@ -548,14 +542,15 @@ NULL #' @describeIn calculate_spatial_enrichment calculate using 'data.table' #' implementation #' @keywords internal -.calc_spatial_enrichment_dt <- function(bin_matrix, - spatial_network, - calc_hub = FALSE, - hub_min_int = 3, - group_size = "automatic", - do_fisher_test = TRUE, - adjust_method = "fdr", - cores = NA) { +.calc_spatial_enrichment_dt <- function( + bin_matrix, + spatial_network, + calc_hub = FALSE, + hub_min_int = 3, + group_size = "automatic", + do_fisher_test = TRUE, + adjust_method = "fdr", + cores = NA) { # set number of cores automatically, but with limit of 10 cores <- determine_cores(cores) data.table::setDTthreads(threads = cores) @@ -762,38 +757,39 @@ NULL #' @rdname binSpect #' @export -binSpect <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - bin_method = c("kmeans", "rank"), - expression_values = c("normalized", "scaled", "custom"), - subset_feats = NULL, - spatial_network_name = "Delaunay_network", - spatial_network_k = NULL, - reduce_network = FALSE, - kmeans_algo = c("kmeans", "kmeans_arma", "kmeans_arma_subset"), - nstart = 3, - iter_max = 10, - extreme_nr = 50, - sample_nr = 50, - percentage_rank = 30, - do_fisher_test = TRUE, - adjust_method = "fdr", - calc_hub = FALSE, - hub_min_int = 3, - get_av_expr = TRUE, - get_high_expr = TRUE, - implementation = c("data.table", "simple", "matrix"), - group_size = "automatic", - do_parallel = TRUE, - cores = NA, - verbose = TRUE, - knn_params = NULL, - set.seed = deprecated(), - seed = 1234, - bin_matrix = NULL, - summarize = c("p.value", "adj.p.value"), - return_gobject = FALSE) { +binSpect <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + bin_method = c("kmeans", "rank"), + expression_values = c("normalized", "scaled", "custom"), + subset_feats = NULL, + spatial_network_name = "Delaunay_network", + spatial_network_k = NULL, + reduce_network = FALSE, + kmeans_algo = c("kmeans", "kmeans_arma", "kmeans_arma_subset"), + nstart = 3, + iter_max = 10, + extreme_nr = 50, + sample_nr = 50, + percentage_rank = 30, + do_fisher_test = TRUE, + adjust_method = "fdr", + calc_hub = FALSE, + hub_min_int = 3, + get_av_expr = TRUE, + get_high_expr = TRUE, + implementation = c("data.table", "simple", "matrix"), + group_size = "automatic", + do_parallel = TRUE, + cores = NA, + verbose = TRUE, + knn_params = NULL, + set.seed = deprecated(), + seed = 1234, + bin_matrix = NULL, + summarize = c("p.value", "adj.p.value"), + return_gobject = FALSE) { # TODO align set.seed, set_seed, seed_number naming and usage across # packages # use only param seed. If NULL, set no seed. If !NULL set value as seed @@ -862,31 +858,30 @@ binSpect <- function(gobject, #' @param expression_matrix expression matrix #' @param spatial_network spatial network in data.table format #' @export -binSpectSingleMatrix <- function( - expression_matrix, - spatial_network = NULL, - bin_matrix = NULL, - bin_method = c("kmeans", "rank"), - subset_feats = NULL, - kmeans_algo = c("kmeans", "kmeans_arma", "kmeans_arma_subset"), - nstart = 3, - iter_max = 10, - extreme_nr = 50, - sample_nr = 50, - percentage_rank = 30, - do_fisher_test = TRUE, - adjust_method = "fdr", - calc_hub = FALSE, - hub_min_int = 3, - get_av_expr = TRUE, - get_high_expr = TRUE, - implementation = c("data.table", "simple", "matrix"), - group_size = "automatic", - do_parallel = TRUE, - cores = NA, - verbose = FALSE, - set.seed = deprecated(), - seed = 1234) { +binSpectSingleMatrix <- function(expression_matrix, + spatial_network = NULL, + bin_matrix = NULL, + bin_method = c("kmeans", "rank"), + subset_feats = NULL, + kmeans_algo = c("kmeans", "kmeans_arma", "kmeans_arma_subset"), + nstart = 3, + iter_max = 10, + extreme_nr = 50, + sample_nr = 50, + percentage_rank = 30, + do_fisher_test = TRUE, + adjust_method = "fdr", + calc_hub = FALSE, + hub_min_int = 3, + get_av_expr = TRUE, + get_high_expr = TRUE, + implementation = c("data.table", "simple", "matrix"), + group_size = "automatic", + do_parallel = TRUE, + cores = NA, + verbose = FALSE, + set.seed = deprecated(), + seed = 1234) { if (is_present(set.seed) && !is.function(set.seed)) { deprecate_warn( when = "4.0.3", @@ -1073,35 +1068,34 @@ binSpectSingleMatrix <- function( #' @describeIn binSpect binSpect for a single spatial network #' @export -binSpectSingle <- function( - gobject, - spat_unit = NULL, - feat_type = NULL, - bin_method = c("kmeans", "rank"), - expression_values = c("normalized", "scaled", "custom"), - subset_feats = NULL, - spatial_network_name = "Delaunay_network", - reduce_network = FALSE, - kmeans_algo = c("kmeans", "kmeans_arma", "kmeans_arma_subset"), - nstart = 3, - iter_max = 10, - extreme_nr = 50, - sample_nr = 50, - percentage_rank = 30, - do_fisher_test = TRUE, - adjust_method = "fdr", - calc_hub = FALSE, - hub_min_int = 3, - get_av_expr = TRUE, - get_high_expr = TRUE, - implementation = c("data.table", "simple", "matrix"), - group_size = "automatic", - do_parallel = TRUE, - cores = NA, - verbose = TRUE, - set.seed = deprecated(), - seed = 1234, - bin_matrix = NULL) { +binSpectSingle <- function(gobject, + spat_unit = NULL, + feat_type = NULL, + bin_method = c("kmeans", "rank"), + expression_values = c("normalized", "scaled", "custom"), + subset_feats = NULL, + spatial_network_name = "Delaunay_network", + reduce_network = FALSE, + kmeans_algo = c("kmeans", "kmeans_arma", "kmeans_arma_subset"), + nstart = 3, + iter_max = 10, + extreme_nr = 50, + sample_nr = 50, + percentage_rank = 30, + do_fisher_test = TRUE, + adjust_method = "fdr", + calc_hub = FALSE, + hub_min_int = 3, + get_av_expr = TRUE, + get_high_expr = TRUE, + implementation = c("data.table", "simple", "matrix"), + group_size = "automatic", + do_parallel = TRUE, + cores = NA, + verbose = TRUE, + set.seed = deprecated(), + seed = 1234, + bin_matrix = NULL) { ## deprecated arguments if (is_present(set.seed) && !is.function(set.seed)) { @@ -1196,36 +1190,35 @@ binSpectSingle <- function( #' @describeIn binSpect binSpect for multiple spatial kNN networks #' @export -binSpectMulti <- function( - gobject, - feat_type = NULL, - spat_unit = NULL, - bin_method = c("kmeans", "rank"), - expression_values = c("normalized", "scaled", "custom"), - subset_feats = NULL, - spatial_network_k = c(5, 10, 20), - reduce_network = FALSE, - kmeans_algo = c("kmeans", "kmeans_arma", "kmeans_arma_subset"), - nstart = 3, - iter_max = 10, - extreme_nr = 50, - sample_nr = 50, - percentage_rank = c(10, 30), - do_fisher_test = TRUE, - adjust_method = "fdr", - calc_hub = FALSE, - hub_min_int = 3, - get_av_expr = TRUE, - get_high_expr = TRUE, - implementation = c("data.table", "simple", "matrix"), - group_size = "automatic", - do_parallel = TRUE, - cores = NA, - verbose = TRUE, - knn_params = NULL, - set.seed = deprecated(), - seed = 1234, - summarize = c("adj.p.value", "p.value")) { +binSpectMulti <- function(gobject, + feat_type = NULL, + spat_unit = NULL, + bin_method = c("kmeans", "rank"), + expression_values = c("normalized", "scaled", "custom"), + subset_feats = NULL, + spatial_network_k = c(5, 10, 20), + reduce_network = FALSE, + kmeans_algo = c("kmeans", "kmeans_arma", "kmeans_arma_subset"), + nstart = 3, + iter_max = 10, + extreme_nr = 50, + sample_nr = 50, + percentage_rank = c(10, 30), + do_fisher_test = TRUE, + adjust_method = "fdr", + calc_hub = FALSE, + hub_min_int = 3, + get_av_expr = TRUE, + get_high_expr = TRUE, + implementation = c("data.table", "simple", "matrix"), + group_size = "automatic", + do_parallel = TRUE, + cores = NA, + verbose = TRUE, + knn_params = NULL, + set.seed = deprecated(), + seed = 1234, + summarize = c("adj.p.value", "p.value")) { ## deprecated arguments if (is_present(set.seed) && !is.function(set.seed)) { deprecate_warn( @@ -1455,32 +1448,31 @@ binSpectMulti <- function( #' is set. #' @param summarize summarize the p-values or adjusted p-values #' @returns data.table with results -binSpectMultiMatrix <- function( - expression_matrix, - spatial_networks, - bin_method = c("kmeans", "rank"), - subset_feats = NULL, - kmeans_algo = c("kmeans", "kmeans_arma", "kmeans_arma_subset"), - nstart = 3, - iter_max = 10, - extreme_nr = 50, - sample_nr = 50, - percentage_rank = c(10, 30), - do_fisher_test = TRUE, - adjust_method = "fdr", - calc_hub = FALSE, - hub_min_int = 3, - get_av_expr = TRUE, - get_high_expr = TRUE, - implementation = c("data.table", "simple", "matrix"), - group_size = "automatic", - do_parallel = TRUE, - cores = NA, - verbose = TRUE, - knn_params = NULL, - set.seed = deprecated(), - seed = 1234, - summarize = c("adj.p.value", "p.value")) { +binSpectMultiMatrix <- function(expression_matrix, + spatial_networks, + bin_method = c("kmeans", "rank"), + subset_feats = NULL, + kmeans_algo = c("kmeans", "kmeans_arma", "kmeans_arma_subset"), + nstart = 3, + iter_max = 10, + extreme_nr = 50, + sample_nr = 50, + percentage_rank = c(10, 30), + do_fisher_test = TRUE, + adjust_method = "fdr", + calc_hub = FALSE, + hub_min_int = 3, + get_av_expr = TRUE, + get_high_expr = TRUE, + implementation = c("data.table", "simple", "matrix"), + group_size = "automatic", + do_parallel = TRUE, + cores = NA, + verbose = TRUE, + knn_params = NULL, + set.seed = deprecated(), + seed = 1234, + summarize = c("adj.p.value", "p.value")) { if (is_present(set.seed) && !is.function(set.seed)) { deprecate_warn( when = "4.0.3", @@ -1647,14 +1639,13 @@ binSpectMultiMatrix <- function( #' #' silhouetteRank(g) #' @export -silhouetteRank <- function( - gobject, - expression_values = c("normalized", "scaled", "custom"), - metric = "euclidean", - subset_genes = NULL, - rbp_p = 0.95, - examine_top = 0.3, - python_path = NULL) { +silhouetteRank <- function(gobject, + expression_values = c("normalized", "scaled", "custom"), + metric = "euclidean", + subset_genes = NULL, + rbp_p = 0.95, + examine_top = 0.3, + python_path = NULL) { # expression values values <- match.arg(expression_values, c("normalized", "scaled", "custom")) expr_values <- getExpression( @@ -1741,19 +1732,18 @@ silhouetteRank <- function( #' #' silhouetteRankTest(g) #' @export -silhouetteRankTest <- function( - gobject, - expression_values = c("normalized", "scaled", "custom"), - subset_genes = NULL, - overwrite_input_bin = TRUE, - rbp_ps = c(0.95, 0.99), - examine_tops = c(0.005, 0.010, 0.050, 0.100, 0.300), - matrix_type = "dissim", - num_core = 4, - parallel_path = "/usr/bin", - output = NULL, - query_sizes = 10L, - verbose = FALSE) { +silhouetteRankTest <- function(gobject, + expression_values = c("normalized", "scaled", "custom"), + subset_genes = NULL, + overwrite_input_bin = TRUE, + rbp_ps = c(0.95, 0.99), + examine_tops = c(0.005, 0.010, 0.050, 0.100, 0.300), + matrix_type = "dissim", + num_core = 4, + parallel_path = "/usr/bin", + output = NULL, + query_sizes = 10L, + verbose = FALSE) { # data.table variables cell_ID <- sdimx <- sdimy <- sdimz <- NULL @@ -1943,22 +1933,21 @@ silhouetteRankTest <- function( #' #' spatialDE(g) #' @export -spatialDE <- function( - gobject = NULL, - feat_type = NULL, - spat_unit = NULL, - spat_loc_name = "raw", - expression_values = c("raw", "normalized", "scaled", "custom"), - size = c(4, 2, 1), - color = c("blue", "green", "red"), - sig_alpha = 0.5, - unsig_alpha = 0.5, - python_path = NULL, - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "SpatialDE") { +spatialDE <- function(gobject = NULL, + feat_type = NULL, + spat_unit = NULL, + spat_loc_name = "raw", + expression_values = c("raw", "normalized", "scaled", "custom"), + size = c(4, 2, 1), + color = c("blue", "green", "red"), + sig_alpha = 0.5, + unsig_alpha = 0.5, + python_path = NULL, + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "SpatialDE") { # test if SPARK is installed ## module_test <- reticulate::py_module_available("SpatialDE") @@ -2135,18 +2124,17 @@ spatialDE <- function( #' #' spatialAEH(g) #' @export -spatialAEH <- function( - gobject = NULL, - feat_type = NULL, - spat_unit = NULL, - spat_loc_name = "raw", - SpatialDE_results = NULL, - name_pattern = "AEH_patterns", - expression_values = c("raw", "normalized", "scaled", "custom"), - pattern_num = 6, - l = 1.05, - python_path = NULL, - return_gobject = TRUE) { +spatialAEH <- function(gobject = NULL, + feat_type = NULL, + spat_unit = NULL, + spat_loc_name = "raw", + SpatialDE_results = NULL, + name_pattern = "AEH_patterns", + expression_values = c("raw", "normalized", "scaled", "custom"), + pattern_num = 6, + l = 1.05, + python_path = NULL, + return_gobject = TRUE) { # data.table variables cell_ID <- NULL @@ -2241,13 +2229,12 @@ spatialAEH <- function( #' @param unsig_alpha transparency of unsignificant genes #' @returns ggplot object #' @keywords internal -FSV_show <- function( - results, - ms_results = NULL, - size = c(4, 2, 1), - color = c("blue", "green", "red"), - sig_alpha = 0.5, - unsig_alpha = 0.5) { +FSV_show <- function(results, + ms_results = NULL, + size = c(4, 2, 1), + color = c("blue", "green", "red"), + sig_alpha = 0.5, + unsig_alpha = 0.5) { results$FSV95conf <- 2 * sqrt(results$s2_FSV) results$intervals <- cut( results$FSV95conf, c(0, 1e-1, 1e0, Inf), @@ -2332,16 +2319,15 @@ FSV_show <- function( #' #' trendSceek(g) #' @export -trendSceek <- function( - gobject, - feat_type = NULL, - spat_unit = NULL, - spat_loc_name = "raw", - expression_values = c("normalized", "raw"), - subset_genes = NULL, - nrand = 100, - ncores = 8, - ...) { +trendSceek <- function(gobject, + feat_type = NULL, + spat_unit = NULL, + spat_loc_name = "raw", + expression_values = c("normalized", "raw"), + subset_genes = NULL, + nrand = 100, + ncores = 8, + ...) { # verify if optional package is installed package_check( pkg_name = "trendsceek", @@ -2456,18 +2442,17 @@ trendSceek <- function( #' #' spark(g) #' @export -spark <- function( - gobject, - spat_loc_name = "raw", - feat_type = NULL, - spat_unit = NULL, - percentage = 0.1, - min_count = 10, - expression_values = "raw", - num_core = 5, - covariates = NULL, - return_object = c("data.table", "spark"), - ...) { +spark <- function(gobject, + spat_loc_name = "raw", + feat_type = NULL, + spat_unit = NULL, + percentage = 0.1, + min_count = 10, + expression_values = "raw", + num_core = 5, + covariates = NULL, + return_object = c("data.table", "spark"), + ...) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -2609,15 +2594,14 @@ spark <- function( #' select PCs based on a z-score threshold #' } #' @export -detectSpatialPatterns <- function( - gobject, - expression_values = c("normalized", "scaled", "custom"), - spatial_grid_name = "spatial_grid", - min_cells_per_grid = 4, - scale_unit = FALSE, - ncp = 100, - show_plot = TRUE, - PC_zscore = 1.5) { +detectSpatialPatterns <- function(gobject, + expression_values = c("normalized", "scaled", "custom"), + spatial_grid_name = "spatial_grid", + min_cells_per_grid = 4, + scale_unit = FALSE, + ncp = 100, + show_plot = TRUE, + PC_zscore = 1.5) { ############################################################################ stop(wrap_txt( "This function has not been updated for use with the current version @@ -2785,20 +2769,19 @@ detectSpatialPatterns <- function( #' change save_name in save_param #' @returns ggplot #' @export -showPattern2D <- function( - gobject, - spatPatObj, - dimension = 1, - trim = c(0.02, 0.98), - background_color = "white", - grid_border_color = "grey", - show_legend = TRUE, - point_size = 1, - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "showPattern2D") { +showPattern2D <- function(gobject, + spatPatObj, + dimension = 1, + trim = c(0.02, 0.98), + background_color = "white", + grid_border_color = "grey", + show_legend = TRUE, + point_size = 1, + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "showPattern2D") { if (!"spatPatObj" %in% class(spatPatObj)) { stop("spatPatObj needs to be the output from detectSpatialPatterns") } @@ -2905,25 +2888,26 @@ showPattern <- function(gobject, spatPatObj, ...) { #' change save_name in save_param #' @returns plotly #' @export -showPattern3D <- function( - gobject, - spatPatObj, - dimension = 1, - trim = c(0.02, 0.98), - background_color = "white", - grid_border_color = "grey", - show_legend = TRUE, - point_size = 1, - axis_scale = c("cube", "real", "custom"), - custom_ratio = NULL, - x_ticks = NULL, - y_ticks = NULL, - z_ticks = NULL, - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "showPattern3D") { +showPattern3D <- function(gobject, + spatPatObj, + dimension = 1, + trim = c(0.02, 0.98), + background_color = "white", + grid_border_color = "grey", + show_legend = TRUE, + point_size = 1, + axis_scale = c("cube", "real", "custom"), + custom_ratio = NULL, + x_ticks = NULL, + y_ticks = NULL, + z_ticks = NULL, + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "showPattern3D") { + package_check("plotly", repository = "CRAN:plotly") + # data.table variables center_x <- x_start <- x_end <- center_y <- y_start <- y_end <- center_z <- z_start <- z_end <- NULL @@ -2975,7 +2959,8 @@ showPattern3D <- function( dpl <- plotly::plot_ly( type = "scatter3d", - x = annotated_grid$center_x, y = annotated_grid$center_y, z = annotated_grid$center_z, + x = annotated_grid$center_x, + y = annotated_grid$center_y, z = annotated_grid$center_z, color = annotated_grid[[selected_PC]], marker = list(size = point_size), mode = "markers", colors = c("darkblue", "white", "darkred") ) @@ -3030,19 +3015,18 @@ showPattern3D <- function( #' change save_name in save_param #' @returns ggplot #' @export -showPatternGenes <- function( - gobject, - spatPatObj, - dimension = 1, - top_pos_genes = 5, - top_neg_genes = 5, - point_size = 1, - return_DT = FALSE, - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "showPatternGenes") { +showPatternGenes <- function(gobject, + spatPatObj, + dimension = 1, + top_pos_genes = 5, + top_neg_genes = 5, + point_size = 1, + return_DT = FALSE, + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "showPatternGenes") { # data.table variables gene_ID <- NULL @@ -3115,14 +3099,13 @@ showPatternGenes <- function( #' @returns Data.table with genes associated with selected dimension (PC). #' @details Description. #' @export -selectPatternGenes <- function( - spatPatObj, - dimensions = 1:5, - top_pos_genes = 10, - top_neg_genes = 10, - min_pos_cor = 0.5, - min_neg_cor = -0.5, - return_top_selection = FALSE) { +selectPatternGenes <- function(spatPatObj, + dimensions = 1:5, + top_pos_genes = 10, + top_neg_genes = 10, + min_pos_cor = 0.5, + min_neg_cor = -0.5, + return_top_selection = FALSE) { if (!"spatPatObj" %in% class(spatPatObj)) { stop("spatPatObj needs to be the output from detectSpatialPatterns") } @@ -3204,11 +3187,10 @@ selectPatternGenes <- function( #' number of k-neighbors in the selected spatial network. Setting b = 0 means #' no smoothing and b = 1 means no contribution from its own expression. #' @keywords internal -do_spatial_knn_smoothing <- function( - expression_matrix, - spatial_network, - subset_feats = NULL, - b = NULL) { +do_spatial_knn_smoothing <- function(expression_matrix, + spatial_network, + subset_feats = NULL, + b = NULL) { # checks if (!is.null(b)) { if (b > 1 | b < 0) { @@ -3328,12 +3310,11 @@ evaluate_provided_spatial_locations <- function(spatial_locs) { #' @description smooth gene expression over a defined spatial grid #' @returns matrix with smoothened gene expression values based on spatial grid #' @keywords internal -do_spatial_grid_averaging <- function( - expression_matrix, - spatial_grid, - spatial_locs, - subset_feats = NULL, - min_cells_per_grid = 4) { +do_spatial_grid_averaging <- function(expression_matrix, + spatial_grid, + spatial_locs, + subset_feats = NULL, + min_cells_per_grid = 4) { # matrix expr_values <- expression_matrix if (!is.null(subset_feats)) { @@ -3451,18 +3432,19 @@ NULL #' @rdname detectSpatialCorFeats #' @export -detectSpatialCorFeats <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - spat_loc_name = "raw", - method = c("grid", "network"), - expression_values = c("normalized", "scaled", "custom"), - subset_feats = NULL, - spatial_network_name = "Delaunay_network", - network_smoothing = NULL, - spatial_grid_name = "spatial_grid", - min_cells_per_grid = 4, - cor_method = c("pearson", "kendall", "spearman")) { +detectSpatialCorFeats <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + spat_loc_name = "raw", + method = c("grid", "network"), + expression_values = c("normalized", "scaled", "custom"), + subset_feats = NULL, + spatial_network_name = "Delaunay_network", + network_smoothing = NULL, + spatial_grid_name = "spatial_grid", + min_cells_per_grid = 4, + cor_method = c("pearson", "kendall", "spearman")) { # set default spat_unit and feat_type spat_unit <- set_default_spat_unit( gobject = gobject, @@ -3626,16 +3608,15 @@ detectSpatialCorFeats <- function(gobject, #' @rdname detectSpatialCorFeats #' @export -detectSpatialCorFeatsMatrix <- function( - expression_matrix, - method = c("grid", "network"), - spatial_network, - spatial_grid, - spatial_locs, - subset_feats = NULL, - network_smoothing = NULL, - min_cells_per_grid = 4, - cor_method = c("pearson", "kendall", "spearman")) { +detectSpatialCorFeatsMatrix <- function(expression_matrix, + method = c("grid", "network"), + spatial_network, + spatial_grid, + spatial_locs, + subset_feats = NULL, + network_smoothing = NULL, + min_cells_per_grid = 4, + cor_method = c("pearson", "kendall", "spearman")) { ## correlation method to be used cor_method <- match.arg( cor_method, @@ -3767,16 +3748,15 @@ detectSpatialCorFeatsMatrix <- function( #' @param show_top_feats show top features per gene #' @returns data.table with filtered information #' @export -showSpatialCorFeats <- function( - spatCorObject, - use_clus_name = NULL, - selected_clusters = NULL, - feats = NULL, - min_spat_cor = 0.5, - min_expr_cor = NULL, - min_cor_diff = NULL, - min_rank_diff = NULL, - show_top_feats = NULL) { +showSpatialCorFeats <- function(spatCorObject, + use_clus_name = NULL, + selected_clusters = NULL, + feats = NULL, + min_spat_cor = 0.5, + min_expr_cor = NULL, + min_cor_diff = NULL, + min_rank_diff = NULL, + show_top_feats = NULL) { # data.table variables clus <- feat_ID <- spat_cor <- cor_diff <- rankdiff <- NULL @@ -3859,16 +3839,15 @@ showSpatialCorFeats <- function( #' @param show_top_genes show top genes per gene #' @returns data.table with filtered information #' @export -showSpatialCorGenes <- function( - spatCorObject, - use_clus_name = NULL, - selected_clusters = NULL, - genes = NULL, - min_spat_cor = 0.5, - min_expr_cor = NULL, - min_cor_diff = NULL, - min_rank_diff = NULL, - show_top_genes = NULL) { +showSpatialCorGenes <- function(spatCorObject, + use_clus_name = NULL, + selected_clusters = NULL, + genes = NULL, + min_spat_cor = 0.5, + min_expr_cor = NULL, + min_cor_diff = NULL, + min_rank_diff = NULL, + show_top_genes = NULL) { warning("Deprecated and replaced by showSpatialCorFeats") showSpatialCorFeats( @@ -3907,12 +3886,11 @@ showSpatialCorGenes <- function( #' method = "network" #' )) #' @export -clusterSpatialCorFeats <- function( - spatCorObject, - name = "spat_clus", - hclust_method = "ward.D", - k = 10, - return_obj = TRUE) { +clusterSpatialCorFeats <- function(spatCorObject, + name = "spat_clus", + hclust_method = "ward.D", + k = 10, + return_obj = TRUE) { # check input if (!"spatCorObject" %in% class(spatCorObject)) { stop("spatCorObject needs to be the output from @@ -3960,12 +3938,11 @@ clusterSpatialCorFeats <- function( #' @param return_obj return spatial correlation object (spatCorObject) #' @returns spatCorObject or cluster results #' @export -clusterSpatialCorGenes <- function( - spatCorObject, - name = "spat_clus", - hclust_method = "ward.D", - k = 10, - return_obj = TRUE) { +clusterSpatialCorGenes <- function(spatCorObject, + name = "spat_clus", + hclust_method = "ward.D", + k = 10, + return_obj = TRUE) { warning("Deprecated and replaced by clusterSpatialCorFeats") clusterSpatialCorFeats( @@ -4004,21 +3981,20 @@ clusterSpatialCorGenes <- function( #' \code{\link[ComplexHeatmap]{Heatmap}} function from ComplexHeatmap #' @returns Heatmap generated by ComplexHeatmap #' @export -heatmSpatialCorFeats <- function( - gobject, - spatCorObject, - use_clus_name = NULL, - show_cluster_annot = TRUE, - show_row_dend = TRUE, - show_column_dend = FALSE, - show_row_names = FALSE, - show_column_names = FALSE, - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "heatmSpatialCorFeats", - ...) { +heatmSpatialCorFeats <- function(gobject, + spatCorObject, + use_clus_name = NULL, + show_cluster_annot = TRUE, + show_row_dend = TRUE, + show_column_dend = FALSE, + show_row_names = FALSE, + show_column_names = FALSE, + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "heatmSpatialCorFeats", + ...) { ## check input if (!"spatCorObject" %in% class(spatCorObject)) { stop("spatCorObject needs to be the output from @@ -4101,19 +4077,6 @@ heatmSpatialCorFeats <- function( -#' @title heatmSpatialCorGenes -#' @name heatmSpatialCorGenes -#' @description Create heatmap of spatially correlated genes -#' @inheritDotParams heatmSpatialCorFeats -#' @returns heatmap -#' @seealso \code{\link{heatmSpatialCorFeats}} -#' @export -heatmSpatialCorGenes <- function(...) { - .Deprecated(new = "heatmSpatialCorFeats") - - heatmSpatialCorFeats(...) -} - @@ -4147,15 +4110,14 @@ heatmSpatialCorGenes <- function(...) { #' ) #' @md #' @export -rankSpatialCorGroups <- function( - gobject, - spatCorObject, - use_clus_name = NULL, - show_plot = NULL, - return_plot = FALSE, - save_plot = NULL, - save_param = list(), - default_save_name = "rankSpatialCorGroups") { +rankSpatialCorGroups <- function(gobject, + spatCorObject, + use_clus_name = NULL, + show_plot = NULL, + return_plot = FALSE, + save_plot = NULL, + save_param = list(), + default_save_name = "rankSpatialCorGroups") { ## check input if (!"spatCorObject" %in% class(spatCorObject)) { stop("spatCorObject needs to be the output from @@ -4274,13 +4236,12 @@ rankSpatialCorGroups <- function( #' #' @md #' @export -getBalancedSpatCoexpressionFeats <- function( - spatCorObject, - maximum = 50, - rank = c("weighted", "random", "informed"), - informed_ranking = NULL, - seed = NA, - verbose = TRUE) { +getBalancedSpatCoexpressionFeats <- function(spatCorObject, + maximum = 50, + rank = c("weighted", "random", "informed"), + informed_ranking = NULL, + seed = NA, + verbose = TRUE) { # data.table vars feat_ID <- variable <- combo <- spat_cor <- rnk <- feat_id <- V1 <- NULL @@ -4488,16 +4449,15 @@ getBalancedSpatCoexpressionFeats <- function( #' gene_name = "Gna12" #' ) #' @export -simulateOneGenePatternGiottoObject <- function( - gobject, - pattern_name = "pattern", - pattern_cell_ids = NULL, - gene_name = NULL, - spatial_prob = 0.95, - gradient_direction = NULL, - show_pattern = TRUE, - pattern_colors = c("in" = "green", "out" = "red"), - normalization_params = list()) { +simulateOneGenePatternGiottoObject <- function(gobject, + pattern_name = "pattern", + pattern_cell_ids = NULL, + gene_name = NULL, + spatial_prob = 0.95, + gradient_direction = NULL, + show_pattern = TRUE, + pattern_colors = c("in" = "green", "out" = "red"), + normalization_params = list()) { # data.table variables cell_ID <- sdimx_y <- sdimx <- sdimy <- NULL @@ -4683,30 +4643,29 @@ simulateOneGenePatternGiottoObject <- function( #' @description runs all spatial tests for 1 probability and 1 rep #' @returns data.table #' @keywords internal -run_spatial_sim_tests_one_rep <- function( - gobject, - pattern_name = "pattern", - pattern_cell_ids = NULL, - gene_name = NULL, - spatial_prob = 0.95, - show_pattern = FALSE, - spatial_network_name = "kNN_network", - spat_methods = c( - "binSpect_single", "binSpect_multi", "spatialDE", - "spark", "silhouetteRank" - ), - spat_methods_params = list(NA, NA, NA, NA, NA), - spat_methods_names = c( - "binSpect_single", "binSpect_multi", "spatialDE", - "spark", "silhouetteRank" - ), - save_plot = FALSE, - save_raw = FALSE, - save_norm = FALSE, - save_dir = "~", - save_name = "plot", - run_simulations = TRUE, - ...) { +run_spatial_sim_tests_one_rep <- function(gobject, + pattern_name = "pattern", + pattern_cell_ids = NULL, + gene_name = NULL, + spatial_prob = 0.95, + show_pattern = FALSE, + spatial_network_name = "kNN_network", + spat_methods = c( + "binSpect_single", "binSpect_multi", "spatialDE", + "spark", "silhouetteRank" + ), + spat_methods_params = list(NA, NA, NA, NA, NA), + spat_methods_names = c( + "binSpect_single", "binSpect_multi", "spatialDE", + "spark", "silhouetteRank" + ), + save_plot = FALSE, + save_raw = FALSE, + save_norm = FALSE, + save_dir = "~", + save_name = "plot", + run_simulations = TRUE, + ...) { # data.table variables genes <- prob <- time <- adj.p.value <- method <- p.val <- sd <- qval <- pval <- g <- adjusted_pvalue <- feats <- NULL @@ -5020,30 +4979,29 @@ run_spatial_sim_tests_one_rep <- function( #' repetitions #' @returns data.table #' @keywords internal -run_spatial_sim_tests_multi <- function( - gobject, - pattern_name = "pattern", - pattern_cell_ids = NULL, - gene_name = NULL, - spatial_probs = c(0.5, 1), - reps = 2, - spatial_network_name = "kNN_network", - spat_methods = c( - "binSpect_single", "binSpect_multi", "spatialDE", - "spark", "silhouetteRank" - ), - spat_methods_params = list(NA, NA, NA, NA, NA), - spat_methods_names = c( - "binSpect_single", "binSpect_multi", "spatialDE", - "spark", "silhouetteRank" - ), - save_plot = FALSE, - save_raw = FALSE, - save_norm = FALSE, - save_dir = "~", - verbose = TRUE, - run_simulations = TRUE, - ...) { +run_spatial_sim_tests_multi <- function(gobject, + pattern_name = "pattern", + pattern_cell_ids = NULL, + gene_name = NULL, + spatial_probs = c(0.5, 1), + reps = 2, + spatial_network_name = "kNN_network", + spat_methods = c( + "binSpect_single", "binSpect_multi", "spatialDE", + "spark", "silhouetteRank" + ), + spat_methods_params = list(NA, NA, NA, NA, NA), + spat_methods_names = c( + "binSpect_single", "binSpect_multi", "spatialDE", + "spark", "silhouetteRank" + ), + save_plot = FALSE, + save_raw = FALSE, + save_norm = FALSE, + save_dir = "~", + verbose = TRUE, + run_simulations = TRUE, + ...) { prob_list <- list() for (prob_ind in seq_along(spatial_probs)) { prob_i <- spatial_probs[prob_ind] @@ -5137,37 +5095,37 @@ run_spatial_sim_tests_multi <- function( #' "AAAGGGATGTAGCAAG-1", #' "TCAAACAACCGCGTCG-1", "ACGATCATACATAGAG-1", "TATGCTCCCTACTTAC-1" #' ), -#' spatial_network_name = "spatial_network", gene_names = c("Gna12", "Ccnd2") +#' spatial_network_name = "spatial_network", +#' gene_names = c("Gna12", "Ccnd2") #' ) #' @export -runPatternSimulation <- function( - gobject, - pattern_name = "pattern", - pattern_colors = c("in" = "green", "out" = "red"), - pattern_cell_ids = NULL, - gene_names = NULL, - spatial_probs = c(0.5, 1), - reps = 2, - spatial_network_name = "kNN_network", - spat_methods = c( - "binSpect_single", "binSpect_multi", "spatialDE", - "spark", "silhouetteRank" - ), - spat_methods_params = list(NA, NA, NA, NA, NA), - spat_methods_names = c( - "binSpect_single", "binSpect_multi", "spatialDE", - "spark", "silhouetteRank" - ), - scalefactor = 6000, - save_plot = TRUE, - save_raw = TRUE, - save_norm = TRUE, - save_dir = "~", - max_col = 4, - height = 7, - width = 7, - run_simulations = TRUE, - ...) { +runPatternSimulation <- function(gobject, + pattern_name = "pattern", + pattern_colors = c("in" = "green", "out" = "red"), + pattern_cell_ids = NULL, + gene_names = NULL, + spatial_probs = c(0.5, 1), + reps = 2, + spatial_network_name = "kNN_network", + spat_methods = c( + "binSpect_single", "binSpect_multi", "spatialDE", + "spark", "silhouetteRank" + ), + spat_methods_params = list(NA, NA, NA, NA, NA), + spat_methods_names = c( + "binSpect_single", "binSpect_multi", "spatialDE", + "spark", "silhouetteRank" + ), + scalefactor = 6000, + save_plot = TRUE, + save_raw = TRUE, + save_norm = TRUE, + save_dir = "~", + max_col = 4, + height = 7, + width = 7, + run_simulations = TRUE, + ...) { # data.table variables prob <- method <- adj.p.value <- time <- NULL diff --git a/R/spatial_interaction.R b/R/spatial_interaction.R index a643c66d0..765d0f56d 100644 --- a/R/spatial_interaction.R +++ b/R/spatial_interaction.R @@ -5,14 +5,15 @@ #' @description Simulate random network. #' @returns data.table #' @keywords internal -make_simulated_network <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - spatial_network_name = "Delaunay_network", - cluster_column, - number_of_simulations = 100, - set_seed = TRUE, - seed_number = 1234) { +make_simulated_network <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + spatial_network_name = "Delaunay_network", + cluster_column, + number_of_simulations = 100, + set_seed = TRUE, + seed_number = 1234) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -47,8 +48,10 @@ make_simulated_network <- function(gobject, s1_list <- list() s2_list <- list() - all_cell_type <- c(spatial_network_annot$from_cell_type, - spatial_network_annot$to_cell_type) + all_cell_type <- c( + spatial_network_annot$from_cell_type, + spatial_network_annot$to_cell_type + ) middle_point <- length(all_cell_type) / 2 for (sim in seq_len(number_of_simulations)) { @@ -58,13 +61,15 @@ make_simulated_network <- function(gobject, } reshuffled_all_cell_type <- sample( - x = all_cell_type, size = length(all_cell_type), replace = FALSE) + x = all_cell_type, size = length(all_cell_type), replace = FALSE + ) new_from_cell_type <- reshuffled_all_cell_type[seq_len(middle_point)] s1_list[[sim]] <- new_from_cell_type new_to_cell_type <- reshuffled_all_cell_type[ - (middle_point + 1):length(all_cell_type)] + (middle_point + 1):length(all_cell_type) + ] s2_list[[sim]] <- new_to_cell_type } @@ -77,12 +82,16 @@ make_simulated_network <- function(gobject, s1 <- s2 <- unified_int <- type_int <- NULL sample_dt <- data.table::data.table( - s1 = s1_vector, s2 = s2_vector, round = round_vector) + s1 = s1_vector, s2 = s2_vector, round = round_vector + ) uniq_sim_comb <- unique(sample_dt[, .(s1, s2)]) uniq_sim_comb[, unified_int := paste( - sort(c(s1, s2)), collapse = "--"), by = seq_len(nrow(uniq_sim_comb))] + sort(c(s1, s2)), + collapse = "--" + ), by = seq_len(nrow(uniq_sim_comb))] sample_dt[uniq_sim_comb, unified_int := unified_int, on = c( - s1 = "s1", s2 = "s2")] + s1 = "s1", s2 = "s2" + )] sample_dt[, type_int := ifelse(s1 == s2, "homo", "hetero")] return(sample_dt) @@ -118,19 +127,20 @@ make_simulated_network <- function(gobject, #' #' cellProximityEnrichment(g, cluster_column = "leiden_clus") #' @export -cellProximityEnrichment <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - spatial_network_name = "Delaunay_network", - cluster_column, - number_of_simulations = 1000, - adjust_method = c( - "none", "fdr", "bonferroni", "BH", - "holm", "hochberg", "hommel", - "BY" - ), - set_seed = TRUE, - seed_number = 1234) { +cellProximityEnrichment <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + spatial_network_name = "Delaunay_network", + cluster_column, + number_of_simulations = 1000, + adjust_method = c( + "none", "fdr", "bonferroni", "BH", + "holm", "hochberg", "hommel", + "BY" + ), + set_seed = TRUE, + seed_number = 1234) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -164,7 +174,8 @@ cellProximityEnrichment <- function(gobject, unified_cells <- type_int <- N <- NULL spatial_network_annot <- dt_sort_combine_two_columns( - spatial_network_annot, "to", "from", "unified_cells") + spatial_network_annot, "to", "from", "unified_cells" + ) spatial_network_annot <- spatial_network_annot[!duplicated(unified_cells)] sample_dt <- make_simulated_network( @@ -180,7 +191,8 @@ cellProximityEnrichment <- function(gobject, # combine original and simulated network table_sim_results <- sample_dt[, .N, by = c( - "unified_int", "type_int", "round")] + "unified_int", "type_int", "round" + )] ## create complete simulations ## add 0 if no single interaction was found @@ -188,17 +200,21 @@ cellProximityEnrichment <- function(gobject, # data.table with 0's for all interactions minimum_simulations <- unique_ints[rep( - seq_len(nrow(unique_ints)), number_of_simulations), ] + seq_len(nrow(unique_ints)), number_of_simulations + ), ] minimum_simulations[, round := rep( paste0("sim", seq_len(number_of_simulations)), - each = nrow(unique_ints))] + each = nrow(unique_ints) + )] minimum_simulations[, N := 0] table_sim_minimum_results <- rbind(table_sim_results, minimum_simulations) table_sim_minimum_results[, V1 := sum(N), by = c( - "unified_int", "type_int", "round")] + "unified_int", "type_int", "round" + )] table_sim_minimum_results <- unique( - table_sim_minimum_results[, .(unified_int, type_int, round, V1)]) + table_sim_minimum_results[, .(unified_int, type_int, round, V1)] + ) table_sim_results <- table_sim_minimum_results @@ -209,7 +225,8 @@ cellProximityEnrichment <- function(gobject, spatial_network_annot[, round := "original"] table_orig_results <- spatial_network_annot[, .N, by = c( - "unified_int", "type_int", "round")] + "unified_int", "type_int", "round" + )] table_orig_results[, orig := "original"] data.table::setnames(table_orig_results, old = "N", new = "V1") @@ -220,27 +237,39 @@ cellProximityEnrichment <- function(gobject, # add missing combinations from original or simulations # probably not needed anymore all_simulation_ints <- as.character(unique(table_results[ - orig == "simulations"]$unified_int)) + orig == "simulations" + ]$unified_int)) all_original_ints <- as.character(unique(table_results[ - orig == "original"]$unified_int)) + orig == "original" + ]$unified_int)) missing_in_original <- all_simulation_ints[ - !all_simulation_ints %in% all_original_ints] + !all_simulation_ints %in% all_original_ints + ] missing_in_simulations <- all_original_ints[ - !all_original_ints %in% all_simulation_ints] + !all_original_ints %in% all_simulation_ints + ] create_missing_for_original <- table_results[ - unified_int %in% missing_in_original] + unified_int %in% missing_in_original + ] create_missing_for_original <- unique(create_missing_for_original[ - , c("orig", "V1") := list("original", 0)]) + , c("orig", "V1") := list("original", 0) + ]) create_missing_for_simulations <- table_results[ - unified_int %in% missing_in_simulations] + unified_int %in% missing_in_simulations + ] create_missing_for_simulations <- unique( create_missing_for_simulations[, c("orig", "V1") := list( - "simulations", 0)]) + "simulations", 0 + )] + ) table_results <- do.call( "rbind", - list(table_results, create_missing_for_original, - create_missing_for_simulations)) + list( + table_results, create_missing_for_original, + create_missing_for_simulations + ) + ) ## p-values @@ -264,9 +293,9 @@ cellProximityEnrichment <- function(gobject, } p_orig_higher <- 1 - (sum((orig_value + 1) > (sim_values + 1)) / - number_of_simulations) + number_of_simulations) p_orig_lower <- 1 - (sum((orig_value + 1) < (sim_values + 1)) / - number_of_simulations) + number_of_simulations) combo_list[[int_combo]] <- this_combo p_high[[int_combo]] <- p_orig_higher @@ -275,21 +304,26 @@ cellProximityEnrichment <- function(gobject, res_pvalue_DT <- data.table::data.table( unified_int = as.vector(combo_list), p_higher_orig = p_high, - p_lower_orig = p_low) + p_lower_orig = p_low + ) # depletion or enrichment in barplot format table_mean_results <- table_results[, .(mean(V1)), by = c( - "orig", "unified_int", "type_int")] + "orig", "unified_int", "type_int" + )] table_mean_results_dc <- data.table::dcast.data.table( data = table_mean_results, formula = type_int + unified_int ~ orig, - value.var = "V1") + value.var = "V1" + ) table_mean_results_dc[, original := ifelse(is.na(original), 0, original)] table_mean_results_dc[, enrichm := log2((original + 1) / (simulations + 1))] table_mean_results_dc <- merge( - table_mean_results_dc, res_pvalue_DT, by = "unified_int") + table_mean_results_dc, res_pvalue_DT, + by = "unified_int" + ) data.table::setorder(table_mean_results_dc, enrichm) table_mean_results_dc[, unified_int := factor(unified_int, unified_int)] @@ -300,9 +334,13 @@ cellProximityEnrichment <- function(gobject, PI_value <- int_ranking <- NULL table_mean_results_dc[, p.adj_higher := stats::p.adjust( - p_higher_orig, method = sel_adjust_method)] + p_higher_orig, + method = sel_adjust_method + )] table_mean_results_dc[, p.adj_lower := stats::p.adjust( - p_lower_orig, method = sel_adjust_method)] + p_lower_orig, + method = sel_adjust_method + )] table_mean_results_dc[, PI_value := ifelse(p.adj_higher <= p.adj_lower, @@ -315,8 +353,10 @@ cellProximityEnrichment <- function(gobject, table_mean_results_dc <- table_mean_results_dc[order(-PI_value)] table_mean_results_dc[, int_ranking := seq_len(.N)] - return(list(raw_sim_table = table_results, - enrichm_res = table_mean_results_dc)) + return(list( + raw_sim_table = table_results, + enrichm_res = table_mean_results_dc + )) } @@ -345,17 +385,20 @@ cellProximityEnrichment <- function(gobject, #' @examples #' g <- GiottoData::loadGiottoMini("visium") #' -#' addCellIntMetadata(g, cluster_column = "leiden_clus", -#' cell_interaction = "custom_leiden") +#' addCellIntMetadata(g, +#' cluster_column = "leiden_clus", +#' cell_interaction = "custom_leiden" +#' ) #' @export -addCellIntMetadata <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - spatial_network = "spatial_network", - cluster_column, - cell_interaction, - name = "select_int", - return_gobject = TRUE) { +addCellIntMetadata <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + spatial_network = "spatial_network", + cluster_column, + cell_interaction, + name = "select_int", + return_gobject = TRUE) { # set spatial unit and feature type spat_unit <- set_default_spat_unit( gobject = gobject, @@ -413,29 +456,29 @@ addCellIntMetadata <- function(gobject, cell_type_2 <- strsplit(cell_interaction, split = "--")[[1]][2] cell_metadata[][, c(name) := ifelse(!get(cluster_column) %in% c( - cell_type_1, cell_type_2), "other", - ifelse(get(cluster_column) == cell_type_1 & cell_ID %in% selected_cells, - paste0("select_", cell_type_1), - ifelse(get(cluster_column) == cell_type_2 & cell_ID %in% - selected_cells, paste0("select_", cell_type_2), - ifelse(get(cluster_column) == cell_type_1, - paste0("other_", cell_type_1), - paste0("other_", cell_type_2)) - ) + cell_type_1, cell_type_2 + ), "other", + ifelse(get(cluster_column) == cell_type_1 & cell_ID %in% selected_cells, + paste0("select_", cell_type_1), + ifelse(get(cluster_column) == cell_type_2 & cell_ID %in% + selected_cells, paste0("select_", cell_type_2), + ifelse(get(cluster_column) == cell_type_1, + paste0("other_", cell_type_1), + paste0("other_", cell_type_2) + ) ) + ) )] if (return_gobject == TRUE) { ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### - gobject <- set_cell_metadata(gobject, - metadata = cell_metadata, - verbose = FALSE - ) + gobject <- setGiotto(gobject, cell_metadata, verbose = FALSE) ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## update parameters used ## gobject <- update_giotto_params(gobject, - description = "_add_cell_int_info") + description = "_add_cell_int_info" + ) return(gobject) } else { @@ -460,21 +503,26 @@ NULL #' @describeIn cell_proximity_tests t.test #' @keywords internal -.do_ttest <- function(expr_values, - select_ind, - other_ind, - adjust_method, - mean_method, - offset = 0.1) { +.do_ttest <- function( + expr_values, + select_ind, + other_ind, + adjust_method, + mean_method, + offset = 0.1) { vmsg(.is_debug = TRUE, ".do_ttest") # data.table variables p.value <- p.adj <- NULL mean_sel <- my_rowMeans( - expr_values[, select_ind], method = mean_method, offset = offset) + expr_values[, select_ind], + method = mean_method, offset = offset + ) mean_all <- my_rowMeans( - expr_values[, other_ind], method = mean_method, offset = offset) + expr_values[, other_ind], + method = mean_method, offset = offset + ) if (length(select_ind) == 1 | length(other_ind) == 1) { results <- NaN @@ -492,7 +540,8 @@ NULL "feats" = rownames(expr_values), "sel" = mean_sel, "other" = mean_all, "log2fc" = log2fc, "diff" = diff, - "p.value" = unlist(results)) + "p.value" = unlist(results) + ) resultsDT[, p.value := ifelse(is.nan(p.value), 1, p.value)] resultsDT[, p.adj := stats::p.adjust(p.value, method = adjust_method)] setorder(resultsDT, p.adj) @@ -505,20 +554,26 @@ NULL #' @describeIn cell_proximity_tests limma t.test #' @keywords internal -.do_limmatest <- function(expr_values, - select_ind, - other_ind, - mean_method, - offset = 0.1) { +.do_limmatest <- function( + expr_values, + select_ind, + other_ind, + mean_method, + offset = 0.1) { vmsg(.is_debug = TRUE, ".do_limmatest") + package_check("limma") + # data.table variables sel <- other <- feats <- P.Value <- adj.P.Val <- p.adj <- NULL expr_values_subset <- cbind( - expr_values[, select_ind], expr_values[, other_ind]) - mygroups <- c(rep("sel", length(select_ind)), - rep("other", length(other_ind))) + expr_values[, select_ind], expr_values[, other_ind] + ) + mygroups <- c( + rep("sel", length(select_ind)), + rep("other", length(other_ind)) + ) mygroups <- factor(mygroups, levels = unique(mygroups)) design <- stats::model.matrix(~ 0 + mygroups) @@ -536,15 +591,21 @@ NULL # limma to DT limma_result <- limma::topTable( - fitc_ebayes, coef = 1, number = 100000, confint = TRUE) + fitc_ebayes, + coef = 1, number = 100000, confint = TRUE + ) limmaDT <- data.table::as.data.table(limma_result) limmaDT[, feats := rownames(limma_result)] # other info mean_sel <- my_rowMeans( - expr_values[, select_ind], method = mean_method, offset = offset) + expr_values[, select_ind], + method = mean_method, offset = offset + ) mean_all <- my_rowMeans( - expr_values[, other_ind], method = mean_method, offset = offset) + expr_values[, other_ind], + method = mean_method, offset = offset + ) log2fc <- log2((mean_sel + offset) / (mean_all + offset)) diff <- mean_sel - mean_all @@ -558,9 +619,12 @@ NULL ) limmaDT <- data.table::merge.data.table(limmaDT, tempDT, by = "feats") limmaDT <- limmaDT[ - , .(feats, sel, other, log2fc, diff, P.Value, adj.P.Val)] - colnames(limmaDT) <- c("feats", "sel", "other", "log2fc", "diff", - "p.value", "p.adj") + , .(feats, sel, other, log2fc, diff, P.Value, adj.P.Val) + ] + colnames(limmaDT) <- c( + "feats", "sel", "other", "log2fc", "diff", + "p.value", "p.adj" + ) setorder(limmaDT, p.adj) @@ -572,21 +636,26 @@ NULL #' @describeIn cell_proximity_tests wilcoxon #' @keywords internal -.do_wilctest <- function(expr_values, - select_ind, - other_ind, - adjust_method, - mean_method, - offset = 0.1) { +.do_wilctest <- function( + expr_values, + select_ind, + other_ind, + adjust_method, + mean_method, + offset = 0.1) { vmsg(.is_debug = TRUE, ".do_wilctest") # data.table variables p.value <- p.adj <- NULL mean_sel <- my_rowMeans( - expr_values[, select_ind], method = mean_method, offset = offset) + expr_values[, select_ind], + method = mean_method, offset = offset + ) mean_all <- my_rowMeans( - expr_values[, other_ind], method = mean_method, offset = offset) + expr_values[, other_ind], + method = mean_method, offset = offset + ) if (length(select_ind) == 1 | length(other_ind) == 1) { results <- NaN @@ -606,7 +675,8 @@ NULL "other" = mean_all, "log2fc" = log2fc, "diff" = diff, - "p.value" = unlist(results)) + "p.value" = unlist(results) + ) resultsDT[, p.value := ifelse(is.nan(p.value), 1, p.value)] resultsDT[, p.adj := stats::p.adjust(p.value, method = adjust_method)] setorder(resultsDT, p.adj) @@ -616,25 +686,29 @@ NULL # calculate original values -.do_permuttest_original <- function(expr_values, - select_ind, - other_ind, - name = "orig", - mean_method, - offset = 0.1) { +.do_permuttest_original <- function( + expr_values, + select_ind, + other_ind, + name = "orig", + mean_method, + offset = 0.1) { # data.table variables feats <- NULL mean_sel <- my_rowMeans(expr_values[ - , select_ind], method = mean_method, offset = offset) + , select_ind + ], method = mean_method, offset = offset) mean_all <- my_rowMeans(expr_values[ - , other_ind], method = mean_method, offset = offset) + , other_ind + ], method = mean_method, offset = offset) log2fc <- log2((mean_sel + offset) / (mean_all + offset)) diff <- mean_sel - mean_all resultsDT <- data.table( - "sel" = mean_sel, "other" = mean_all, "log2fc" = log2fc, "diff" = diff) + "sel" = mean_sel, "other" = mean_all, "log2fc" = log2fc, "diff" = diff + ) resultsDT[, feats := rownames(expr_values)] resultsDT[, name := name] @@ -644,14 +718,15 @@ NULL # calculate random values -.do_permuttest_random <- function(expr_values, - select_ind, - other_ind, - name = "perm_1", - mean_method, - offset = 0.1, - set_seed = TRUE, - seed_number = 1234) { +.do_permuttest_random <- function( + expr_values, + select_ind, + other_ind, + name = "perm_1", + mean_method, + offset = 0.1, + set_seed = TRUE, + seed_number = 1234) { # data.table variables feats <- NULL @@ -668,15 +743,20 @@ NULL # alternative mean_sel <- my_rowMeans( - expr_values[, random_select], method = mean_method, offset = offset) + expr_values[, random_select], + method = mean_method, offset = offset + ) mean_all <- my_rowMeans( - expr_values[, random_other], method = mean_method, offset = offset) + expr_values[, random_other], + method = mean_method, offset = offset + ) log2fc <- log2((mean_sel + offset) / (mean_all + offset)) diff <- mean_sel - mean_all resultsDT <- data.table( - "sel" = mean_sel, "other" = mean_all, "log2fc" = log2fc, "diff" = diff) + "sel" = mean_sel, "other" = mean_all, "log2fc" = log2fc, "diff" = diff + ) resultsDT[, feats := rownames(expr_values)] resultsDT[, name := name] @@ -687,14 +767,15 @@ NULL # calculate multiple random values -.do_multi_permuttest_random <- function(expr_values, - select_ind, - other_ind, - mean_method, - offset = 0.1, - n = 100, - set_seed = TRUE, - seed_number = 1234) { +.do_multi_permuttest_random <- function( + expr_values, + select_ind, + other_ind, + mean_method, + offset = 0.1, + n = 100, + set_seed = TRUE, + seed_number = 1234) { if (set_seed == TRUE) { seed_number_list <- seed_number:(seed_number + (n - 1)) } @@ -720,14 +801,15 @@ NULL #' @describeIn cell_proximity_tests random permutation #' @keywords internal -.do_permuttest <- function(expr_values, - select_ind, other_ind, - n_perm = 1000, - adjust_method = "fdr", - mean_method, - offset = 0.1, - set_seed = TRUE, - seed_number = 1234) { +.do_permuttest <- function( + expr_values, + select_ind, other_ind, + n_perm = 1000, + adjust_method = "fdr", + mean_method, + offset = 0.1, + set_seed = TRUE, + seed_number = 1234) { # data.table variables log2fc_diff <- log2fc <- sel <- other <- feats <- p_higher <- p_lower <- perm_sel <- NULL @@ -759,9 +841,11 @@ NULL ## random_perms[, log2fc_diff := rep(original$log2fc, n_perm) - log2fc] random_perms[, - c("perm_sel", "perm_other", "perm_log2fc", "perm_diff") := list( - mean(sel), mean(other), mean(log2fc), mean(diff)), - by = feats] + c("perm_sel", "perm_other", "perm_log2fc", "perm_diff") := list( + mean(sel), mean(other), mean(log2fc), mean(diff) + ), + by = feats + ] ## get p-values random_perms[, p_higher := sum(log2fc_diff > 0), by = feats] @@ -771,19 +855,26 @@ NULL ## combine results permutation and original random_perms_res <- unique(random_perms[ - , .(feats, perm_sel, perm_other, perm_log2fc, perm_diff, p_higher, - p_lower)]) + , .( + feats, perm_sel, perm_other, perm_log2fc, perm_diff, p_higher, + p_lower + ) + ]) results_m <- data.table::merge.data.table( random_perms_res, original[, .(feats, sel, other, log2fc, diff)], - by = "feats") + by = "feats" + ) # select lowest p-value and perform p.adj results_m[, p.value := ifelse(p_higher <= p_lower, p_higher, p_lower)] results_m[, p.adj := stats::p.adjust(p.value, method = adjust_method)] results_m <- results_m[ - , .(feats, sel, other, log2fc, diff, p.value, p.adj, perm_sel, - perm_other, perm_log2fc, perm_diff)] + , .( + feats, sel, other, log2fc, diff, p.value, p.adj, perm_sel, + perm_other, perm_log2fc, perm_diff + ) + ] setorder(results_m, p.adj, -log2fc) return(results_m) @@ -798,22 +889,25 @@ NULL #' @returns differential test on subsets of a matrix #' @keywords internal #' @seealso [cell_proximity_tests] -.do_cell_proximity_test <- function(expr_values, - select_ind, other_ind, - diff_test = c("permutation", "limma", "t.test", "wilcox"), - mean_method = c("arithmic", "geometric"), - offset = 0.1, - n_perm = 100, - adjust_method = c( - "bonferroni", "BH", "holm", "hochberg", "hommel", - "BY", "fdr", "none" - ), - set_seed = TRUE, - seed_number = 1234, - verbose = FALSE) { +.do_cell_proximity_test <- function( + expr_values, + select_ind, other_ind, + diff_test = c("permutation", "limma", "t.test", "wilcox"), + mean_method = c("arithmic", "geometric"), + offset = 0.1, + n_perm = 100, + adjust_method = c( + "bonferroni", "BH", "holm", "hochberg", "hommel", + "BY", "fdr", "none" + ), + set_seed = TRUE, + seed_number = 1234, + verbose = FALSE) { # get parameters diff_test <- match.arg( - diff_test, choices = c("permutation", "limma", "t.test", "wilcox")) + diff_test, + choices = c("permutation", "limma", "t.test", "wilcox") + ) adjust_method <- match.arg(adjust_method, choices = c( "bonferroni", "BH", "holm", "hochberg", "hommel", "BY", "fdr", "none" @@ -867,21 +961,22 @@ NULL #' @returns data.table #' @keywords internal #' @seealso [.do_cell_proximity_test()] for specific tests -.findCellProximityFeats_per_interaction <- function(sel_int, - expr_values, - cell_metadata, - annot_spatnetwork, - cluster_column = NULL, - minimum_unique_cells = 1, - minimum_unique_int_cells = 1, - exclude_selected_cells_from_test = TRUE, - diff_test = c("permutation", "limma", "t.test", "wilcox"), - mean_method = c("arithmic", "geometric"), - offset = 0.1, - adjust_method = "bonferroni", - nr_permutations = 100, - set_seed = TRUE, - seed_number = 1234) { +.findCellProximityFeats_per_interaction <- function( + sel_int, + expr_values, + cell_metadata, + annot_spatnetwork, + cluster_column = NULL, + minimum_unique_cells = 1, + minimum_unique_int_cells = 1, + exclude_selected_cells_from_test = TRUE, + diff_test = c("permutation", "limma", "t.test", "wilcox"), + mean_method = c("arithmic", "geometric"), + offset = 0.1, + adjust_method = "bonferroni", + nr_permutations = 100, + set_seed = TRUE, + seed_number = 1234) { # data.table variables unified_int <- to_cell_type <- from_cell_type <- cell_type <- int_cell_type <- NULL @@ -890,14 +985,16 @@ NULL # select test to perform diff_test <- match.arg( arg = diff_test, - choices = c("permutation", "limma", "t.test", "wilcox")) + choices = c("permutation", "limma", "t.test", "wilcox") + ) # select subnetwork sub_spatnetwork <- annot_spatnetwork[unified_int == sel_int] # unique cell types unique_cell_types <- unique( - c(sub_spatnetwork$to_cell_type, sub_spatnetwork$from_cell_type)) + c(sub_spatnetwork$to_cell_type, sub_spatnetwork$from_cell_type) + ) if (length(unique_cell_types) == 2) { first_cell_type <- unique_cell_types[1] @@ -915,9 +1012,11 @@ NULL ## all cell ids all_cell1 <- cell_metadata[get(cluster_column) == first_cell_type][[ - "cell_ID"]] + "cell_ID" + ]] all_cell2 <- cell_metadata[get(cluster_column) == second_cell_type][[ - "cell_ID"]] + "cell_ID" + ]] ## exclude selected if (exclude_selected_cells_from_test == TRUE) { @@ -1004,7 +1103,8 @@ NULL ## all cell ids all_cell1 <- cell_metadata[get(cluster_column) == first_cell_type][[ - "cell_ID"]] + "cell_ID" + ]] ## exclude selected if (exclude_selected_cells_from_test == TRUE) { @@ -1118,29 +1218,30 @@ NULL #' cluster_column = "leiden_clus", #' selected_feats = c("Gna12", "Ccnd2", "Btbd17"), #' nr_permutations = 10 -#' ) +#' ) #' @export -findInteractionChangedFeats <- function(gobject, - feat_type = NULL, - spat_unit = NULL, - expression_values = "normalized", - selected_feats = NULL, - cluster_column, - spatial_network_name = "Delaunay_network", - minimum_unique_cells = 1, - minimum_unique_int_cells = 1, - diff_test = c("permutation", "limma", "t.test", "wilcox"), - mean_method = c("arithmic", "geometric"), - offset = 0.1, - adjust_method = c( - "bonferroni", "BH", "holm", "hochberg", "hommel", - "BY", "fdr", "none" - ), - nr_permutations = 1000, - exclude_selected_cells_from_test = TRUE, - do_parallel = TRUE, - set_seed = TRUE, - seed_number = 1234) { +findInteractionChangedFeats <- function( + gobject, + feat_type = NULL, + spat_unit = NULL, + expression_values = "normalized", + selected_feats = NULL, + cluster_column, + spatial_network_name = "Delaunay_network", + minimum_unique_cells = 1, + minimum_unique_int_cells = 1, + diff_test = c("permutation", "limma", "t.test", "wilcox"), + mean_method = c("arithmic", "geometric"), + offset = 0.1, + adjust_method = c( + "bonferroni", "BH", "holm", "hochberg", "hommel", + "BY", "fdr", "none" + ), + nr_permutations = 1000, + exclude_selected_cells_from_test = TRUE, + do_parallel = TRUE, + set_seed = TRUE, + seed_number = 1234) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -1155,7 +1256,8 @@ findInteractionChangedFeats <- function(gobject, # expression values to be used values <- match.arg( expression_values, - unique(c("normalized", "scaled", "custom", expression_values))) + unique(c("normalized", "scaled", "custom", expression_values)) + ) expr_values <- getExpression( gobject = gobject, spat_unit = spat_unit, @@ -1173,7 +1275,9 @@ findInteractionChangedFeats <- function(gobject, # difference test diff_test <- match.arg( - diff_test, choices = c("permutation", "limma", "t.test", "wilcox")) + diff_test, + choices = c("permutation", "limma", "t.test", "wilcox") + ) # p.adj test adjust_method <- match.arg(adjust_method, choices = c( @@ -1185,7 +1289,8 @@ findInteractionChangedFeats <- function(gobject, ## metadata cell_metadata <- pDataDT( - gobject, spat_unit = spat_unit, feat_type = feat_type + gobject, + spat_unit = spat_unit, feat_type = feat_type ) @@ -1241,10 +1346,12 @@ findInteractionChangedFeats <- function(gobject, final_result[, spec_int := paste0(cell_type, "--", int_cell_type)] final_result[, type_int := ifelse( - cell_type == int_cell_type, "homo", "hetero")] + cell_type == int_cell_type, "homo", "hetero" + )] permutation_test <- ifelse( - diff_test == "permutation", nr_permutations, "no permutations") + diff_test == "permutation", nr_permutations, "no permutations" + ) icfObject <- structure( .Data = list( @@ -1277,12 +1384,15 @@ findICF <- findInteractionChangedFeats #' @param x object to print #' @param \dots additional params to pass (none implemented) #' @keywords internal +#' @returns icfObject #' @export print.icfObject <- function(x, ...) { cat("An object of class", class(x), "\n") info <- list( - dimensions = sprintf("%d, %d (icfs, attributes)", - nrow(x$ICFscores), ncol(x$ICFscores)) + dimensions = sprintf( + "%d, %d (icfs, attributes)", + nrow(x$ICFscores), ncol(x$ICFscores) + ) ) print_list(info, pre = " -") cat("\n") @@ -1320,9 +1430,9 @@ print.icfObject <- function(x, ...) { #' g <- GiottoData::loadGiottoMini("visium") #' #' icf <- findInteractionChangedFeats(g, -#' cluster_column = "leiden_clus", -#' selected_feats = c("Gna12", "Ccnd2", "Btbd17"), -#' nr_permutations = 10 +#' cluster_column = "leiden_clus", +#' selected_feats = c("Gna12", "Ccnd2", "Btbd17"), +#' nr_permutations = 10 #' ) #' force(icf) #' force(icf$ICFscores) @@ -1336,17 +1446,18 @@ print.icfObject <- function(x, ...) { #' force(icf_filter2) #' #' @export -filterInteractionChangedFeats <- function(icfObject, - min_cells = 4, - min_cells_expr = 1, - min_int_cells = 4, - min_int_cells_expr = 1, - min_fdr = 0.1, - min_spat_diff = 0.2, - min_log2_fc = 0.2, - min_zscore = 2, - zscores_column = c("cell_type", "feats"), - direction = c("both", "up", "down")) { +filterInteractionChangedFeats <- function( + icfObject, + min_cells = 4, + min_cells_expr = 1, + min_int_cells = 4, + min_int_cells_expr = 1, + min_fdr = 0.1, + min_spat_diff = 0.2, + min_log2_fc = 0.2, + min_zscore = 2, + zscores_column = c("cell_type", "feats"), + direction = c("both", "up", "down")) { # NSE vars nr_select <- int_nr_select <- zscores <- log2fc <- sel <- other <- p.adj <- NULL @@ -1357,7 +1468,9 @@ filterInteractionChangedFeats <- function(icfObject, } zscores_column <- match.arg( - zscores_column, choices = c("cell_type", "feats")) + zscores_column, + choices = c("cell_type", "feats") + ) ICFscore <- copy(icfObject[["ICFscores"]]) @@ -1368,7 +1481,7 @@ filterInteractionChangedFeats <- function(icfObject, ## sequential filter steps ## # 1. minimum number of source and target cells selection_scores <- ICFscore[nr_select >= min_cells & - int_nr_select >= min_int_cells] + int_nr_select >= min_int_cells] # 2. create z-scores for log2fc per cell type selection_scores[, zscores := scale(log2fc), by = c(zscores_column)] @@ -1376,12 +1489,12 @@ filterInteractionChangedFeats <- function(icfObject, # 3. filter based on z-scores and minimum levels comb_DT <- rbind( selection_scores[zscores >= min_zscore & - abs(diff) >= min_spat_diff & - log2fc >= min_log2_fc & sel >= min_cells_expr], + abs(diff) >= min_spat_diff & + log2fc >= min_log2_fc & sel >= min_cells_expr], selection_scores[zscores <= -min_zscore & - abs(diff) >= min_spat_diff & - log2fc <= -min_log2_fc & - other >= min_int_cells_expr] + abs(diff) >= min_spat_diff & + log2fc <= -min_log2_fc & + other >= min_int_cells_expr] ) # 4. filter based on adjusted p-value (fdr) @@ -1422,16 +1535,17 @@ filterICF <- filterInteractionChangedFeats #' @description Combine ICF scores per interaction #' @returns data.table #' @keywords internal -.combineInteractionChangedFeatures_per_interaction <- function(icfObject, - sel_int, - selected_feats = NULL, - specific_feats_1 = NULL, - specific_feats_2 = NULL, - min_cells = 5, - min_int_cells = 3, - min_fdr = 0.05, - min_spat_diff = 0, - min_log2_fc = 0.5) { +.combineInteractionChangedFeatures_per_interaction <- function( + icfObject, + sel_int, + selected_feats = NULL, + specific_feats_1 = NULL, + specific_feats_2 = NULL, + min_cells = 5, + min_int_cells = 3, + min_fdr = 0.05, + min_spat_diff = 0, + min_log2_fc = 0.5) { # data.table variables unif_int <- feats <- cell_type <- p.adj <- nr_select <- int_nr_select <- log2fc <- sel <- NULL @@ -1543,16 +1657,23 @@ filterICF <- filterInteractionChangedFeats } else { # make it specific subset_cell_1 <- subset_cell_1[ - , .(feats, sel, other, log2fc, diff, p.value, p.adj, + , .( + feats, sel, other, log2fc, diff, p.value, p.adj, cell_type, int_cell_type, nr_select, nr_other, - unif_int)] + unif_int + ) + ] data.table::setnames(subset_cell_1, - old = c("feats", "sel", "other", "log2fc", "diff", - "p.value", "p.adj", "cell_type", "int_cell_type", - "nr_select", "nr_other"), - new = c("feats_1", "sel_1", "other_1", "log2fc_1", - "diff_1", "p.value_1", "p.adj_1", "cell_type_1", - "int_cell_type_1", "nr_select_1", "nr_other_1") + old = c( + "feats", "sel", "other", "log2fc", "diff", + "p.value", "p.adj", "cell_type", "int_cell_type", + "nr_select", "nr_other" + ), + new = c( + "feats_1", "sel_1", "other_1", "log2fc_1", + "diff_1", "p.value_1", "p.adj_1", "cell_type_1", + "int_cell_type_1", "nr_select_1", "nr_other_1" + ) ) } } @@ -1631,23 +1752,32 @@ filterICF <- filterInteractionChangedFeats ) } else { subset_cell_2 <- subset_cell_2[ - , .(feats, sel, other, log2fc, diff, p.value, p.adj, + , .( + feats, sel, other, log2fc, diff, p.value, p.adj, cell_type, int_cell_type, nr_select, nr_other, - unif_int)] + unif_int + ) + ] data.table::setnames(subset_cell_2, - old = c("feats", "sel", "other", "log2fc", "diff", - "p.value", "p.adj", "cell_type", "int_cell_type", - "nr_select", "nr_other"), - new = c("feats_2", "sel_2", "other_2", "log2fc_2", - "diff_2", "p.value_2", "p.adj_2", "cell_type_2", - "int_cell_type_2", "nr_select_2", "nr_other_2") + old = c( + "feats", "sel", "other", "log2fc", "diff", + "p.value", "p.adj", "cell_type", "int_cell_type", + "nr_select", "nr_other" + ), + new = c( + "feats_2", "sel_2", "other_2", "log2fc_2", + "diff_2", "p.value_2", "p.adj_2", "cell_type_2", + "int_cell_type_2", "nr_select_2", "nr_other_2" + ) ) } } merge_subsets <- data.table::merge.data.table( - subset_cell_1, subset_cell_2, by = c("unif_int"), - allow.cartesian = TRUE) + subset_cell_1, subset_cell_2, + by = c("unif_int"), + allow.cartesian = TRUE + ) } else if (length(unique_cell_types) == 1) { ## CELL TYPE 1 subset_cell_1 <- subset[cell_type == unique_cell_types[1]] @@ -1685,15 +1815,22 @@ filterICF <- filterInteractionChangedFeats ) } else { subset_cell_1A <- subset_cell_1[ - , .(feats, sel, other, log2fc, diff, p.value, p.adj, - cell_type, int_cell_type, nr_select, nr_other, unif_int)] + , .( + feats, sel, other, log2fc, diff, p.value, p.adj, + cell_type, int_cell_type, nr_select, nr_other, unif_int + ) + ] data.table::setnames(subset_cell_1A, - old = c("feats", "sel", "other", "log2fc", "diff", "p.value", - "p.adj", "cell_type", "int_cell_type", "nr_select", - "nr_other"), - new = c("feats_1", "sel_1", "other_1", "log2fc_1", "diff_1", - "p.value_1", "p.adj_1", "cell_type_1", - "int_cell_type_1", "nr_select_1", "nr_other_1") + old = c( + "feats", "sel", "other", "log2fc", "diff", "p.value", + "p.adj", "cell_type", "int_cell_type", "nr_select", + "nr_other" + ), + new = c( + "feats_1", "sel_1", "other_1", "log2fc_1", "diff_1", + "p.value_1", "p.adj_1", "cell_type_1", + "int_cell_type_1", "nr_select_1", "nr_other_1" + ) ) } @@ -1724,21 +1861,30 @@ filterICF <- filterInteractionChangedFeats ) } else { subset_cell_1B <- subset_cell_1[ - , .(feats, sel, other, log2fc, diff, p.value, p.adj, - cell_type, int_cell_type, nr_select, nr_other, unif_int)] + , .( + feats, sel, other, log2fc, diff, p.value, p.adj, + cell_type, int_cell_type, nr_select, nr_other, unif_int + ) + ] data.table::setnames(subset_cell_1B, - old = c("feats", "sel", "other", "log2fc", "diff", "p.value", - "p.adj", "cell_type", "int_cell_type", "nr_select", - "nr_other"), - new = c("feats_2", "sel_2", "other_2", "log2fc_2", "diff_2", - "p.value_2", "p.adj_2", "cell_type_2", - "int_cell_type_2", "nr_select_2", "nr_other_2") + old = c( + "feats", "sel", "other", "log2fc", "diff", "p.value", + "p.adj", "cell_type", "int_cell_type", "nr_select", + "nr_other" + ), + new = c( + "feats_2", "sel_2", "other_2", "log2fc_2", "diff_2", + "p.value_2", "p.adj_2", "cell_type_2", + "int_cell_type_2", "nr_select_2", "nr_other_2" + ) ) } merge_subsets <- data.table::merge.data.table( - subset_cell_1A, subset_cell_1B, by = c("unif_int"), - allow.cartesian = TRUE) + subset_cell_1A, subset_cell_1B, + by = c("unif_int"), + allow.cartesian = TRUE + ) } # restrict to feature combinations if needed @@ -1789,18 +1935,19 @@ filterICF <- filterInteractionChangedFeats #' force(cicf) #' combineICF(g_icf) # this is a shortened alias #' @export -combineInteractionChangedFeats <- function(icfObject, - selected_ints = NULL, - selected_feats = NULL, - specific_feats_1 = NULL, - specific_feats_2 = NULL, - min_cells = 5, - min_int_cells = 3, - min_fdr = 0.05, - min_spat_diff = 0, - min_log2_fc = 0.5, - do_parallel = TRUE, - verbose = TRUE) { +combineInteractionChangedFeats <- function( + icfObject, + selected_ints = NULL, + selected_feats = NULL, + specific_feats_1 = NULL, + specific_feats_2 = NULL, + min_cells = 5, + min_int_cells = 3, + min_fdr = 0.05, + min_spat_diff = 0, + min_log2_fc = 0.5, + do_parallel = TRUE, + verbose = TRUE) { # NSE vars unif_int <- feat1_feat2 <- feats_1 <- feats_2 <- comb_logfc <- log2fc_1 <- log2fc_2 <- direction <- NULL @@ -1888,9 +2035,11 @@ combineInteractionChangedFeats <- function(icfObject, "p.adj" = icfObject[["test_info"]][["p.adj"]], "min cells" = icfObject[["test_info"]][["min cells"]], "min interacting cells" = icfObject[["test_info"]][[ - "min interacting cells"]], + "min interacting cells" + ]], "exclude selected cells" = icfObject[["test_info"]][[ - "exclude selected cells"]], + "exclude selected cells" + ]], "perm" = icfObject[["test_info"]][["perm"]] ) ), @@ -1908,12 +2057,15 @@ combineICF <- combineInteractionChangedFeats #' @param x object to print #' @param \dots additional params to pass (none implemented) #' @keywords internal +#' @returns combIcfObject #' @export print.combIcfObject <- function(x, ...) { cat("An object of class", class(x), "\n") info <- list( - dimensions = sprintf("%d, %d (icf pairs, attributes)", - nrow(x$combICFscores), ncol(x$combICFscores)) + dimensions = sprintf( + "%d, %d (icf pairs, attributes)", + nrow(x$combICFscores), ncol(x$combICFscores) + ) ) print_list(info, pre = " -") cat("\n") @@ -1939,12 +2091,13 @@ print.combIcfObject <- function(x, ...) { #' @param feat_set_2 second specific feat set from feat pairs #' @returns data.table with average expression scores for each cluster #' @keywords internal -.average_feat_feat_expression_in_groups <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - cluster_column = "cell_types", - feat_set_1, - feat_set_2) { +.average_feat_feat_expression_in_groups <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + cluster_column = "cell_types", + feat_set_1, + feat_set_2) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -1965,7 +2118,8 @@ print.combIcfObject <- function(x, ...) { # change column names back to original new_colnames <- gsub( - pattern = "cluster_", replacement = "", colnames(average_DT)) + pattern = "cluster_", replacement = "", colnames(average_DT) + ) colnames(average_DT) <- new_colnames # keep order of colnames @@ -1985,43 +2139,59 @@ print.combIcfObject <- function(x, ...) { # get ligand and receptor information ligand_match <- average_DT[ - match(feat_set_1, rownames(average_DT)), , drop = FALSE] + match(feat_set_1, rownames(average_DT)), , + drop = FALSE + ] receptor_match <- average_DT[ - match(feat_set_2, rownames(average_DT)), , drop = FALSE] + match(feat_set_2, rownames(average_DT)), , + drop = FALSE + ] # data.table variables ligand <- LR_comb <- receptor <- LR_expr <- lig_expr <- rec_expr <- lig_cell_type <- rec_cell_type <- NULL all_ligand_cols <- colnames(ligand_match) - lig_test <- data.table::as.data.table( - reshape2::melt(ligand_match, measure.vars = all_ligand_cols)) + lig_test <- data.table::melt( + data.table::as.data.table(ligand_match), + measure.vars = all_ligand_cols + ) lig_test[, ligand := rep(rownames(ligand_match), ncol(ligand_match))] lig_test[, ligand := strsplit(ligand, "\\.")[[1]][1], - by = seq_len(nrow(lig_test))] + by = seq_len(nrow(lig_test)) + ] lig_test[, LR_comb := rep(LR_pairs, ncol(ligand_match))] setnames(lig_test, "value", "lig_expr") setnames(lig_test, "variable", "lig_cell_type") all_receptor_cols <- colnames(receptor_match) - rec_test <- data.table::as.data.table(reshape2::melt( - receptor_match, measure.vars = all_receptor_cols)) + rec_test <- data.table::melt( + data.table::as.data.table(receptor_match), + measure.vars = all_receptor_cols + ) rec_test[, receptor := rep(rownames(receptor_match), ncol(receptor_match))] rec_test[, receptor := strsplit( - receptor, "\\.")[[1]][1], by = seq_len(nrow(rec_test))] + receptor, "\\." + )[[1]][1], by = seq_len(nrow(rec_test))] rec_test[, LR_comb := rep(LR_pairs, ncol(receptor_match))] setnames(rec_test, "value", "rec_expr") setnames(rec_test, "variable", "rec_cell_type") lig_rec_test <- merge( - lig_test, rec_test, by = "LR_comb", allow.cartesian = TRUE) + lig_test, rec_test, + by = "LR_comb", allow.cartesian = TRUE + ) lig_rec_test[, LR_expr := lig_expr + rec_expr] lig_rec_test[, lig_cell_type := factor( - lig_cell_type, levels = colnames_order)] + lig_cell_type, + levels = colnames_order + )] lig_rec_test[, rec_cell_type := factor( - rec_cell_type, levels = colnames_order)] + rec_cell_type, + levels = colnames_order + )] setorder(lig_rec_test, LR_comb, lig_cell_type, rec_cell_type) return(lig_rec_test) @@ -2043,7 +2213,7 @@ print.combIcfObject <- function(x, ...) { #' (random variance and z-score) #' @param adjust_method which method to adjust p-values #' @param adjust_target adjust multiple hypotheses at the cell or feature level -#' @param set_seed set seed for random simulations (default = TRUE) +#' @param set_seed `logical`. set seed for random simulations (default = TRUE) #' @param seed_number seed number #' @param verbose verbose #' @returns Cell-Cell communication scores for feature pairs based on @@ -2065,23 +2235,24 @@ print.combIcfObject <- function(x, ...) { #' #' force(res) #' @export -exprCellCellcom <- function(gobject, - feat_type = NULL, - spat_unit = NULL, - cluster_column = "cell_types", - random_iter = 1000, - feat_set_1, - feat_set_2, - log2FC_addendum = 0.1, - detailed = FALSE, - adjust_method = c( - "fdr", "bonferroni", "BH", "holm", "hochberg", "hommel", - "BY", "none" - ), - adjust_target = c("feats", "cells"), - set_seed = TRUE, - seed_number = 1234, - verbose = TRUE) { +exprCellCellcom <- function( + gobject, + feat_type = NULL, + spat_unit = NULL, + cluster_column = "cell_types", + random_iter = 1000, + feat_set_1, + feat_set_2, + log2FC_addendum = 0.1, + detailed = FALSE, + adjust_method = c( + "fdr", "bonferroni", "BH", "holm", "hochberg", "hommel", + "BY", "none" + ), + adjust_target = c("feats", "cells"), + set_seed = TRUE, + seed_number = 1234, + verbose = TRUE) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -2143,11 +2314,10 @@ exprCellCellcom <- function(gobject, # not yet available - progressr::with_progress({ - pb <- progressr::progressor(steps = random_iter) + with_pbar({ + pb <- pbar(steps = random_iter) for (sim in seq_len(random_iter)) { - # create temporary giotto tempGiotto <- subsetGiotto( gobject = gobject, @@ -2226,10 +2396,12 @@ exprCellCellcom <- function(gobject, if (adjust_target == "feats") { comScore[, p.adj := stats::p.adjust(pvalue, method = adjust_method), - by = .(LR_cell_comb)] + by = .(LR_cell_comb) + ] } else if (adjust_target == "cells") { comScore[, p.adj := stats::p.adjust(pvalue, method = adjust_method), - by = .(LR_comb)] + by = .(LR_comb) + ] } @@ -2237,7 +2409,8 @@ exprCellCellcom <- function(gobject, all_p.adj <- comScore[["p.adj"]] lowest_p.adj <- min(all_p.adj[all_p.adj != 0]) comScore[, PI := ifelse(p.adj == 0, log2fc * (-log10(lowest_p.adj)), - log2fc * (-log10(p.adj)))] + log2fc * (-log10(p.adj)) + )] data.table::setorder(comScore, LR_comb, -LR_expr) @@ -2258,13 +2431,14 @@ exprCellCellcom <- function(gobject, #' @param seed_number seed number #' @returns list of randomly sampled cell ids with same cell type composition #' @keywords internal -.create_cell_type_random_cell_IDs <- function(gobject, - feat_type = NULL, - spat_unit = NULL, - cluster_column = "cell_types", - needed_cell_types, - set_seed = FALSE, - seed_number = 1234) { +.create_cell_type_random_cell_IDs <- function( + gobject, + feat_type = NULL, + spat_unit = NULL, + cluster_column = "cell_types", + needed_cell_types, + set_seed = FALSE, + seed_number = 1234) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -2282,7 +2456,8 @@ exprCellCellcom <- function(gobject, spat_unit = spat_unit ) possible_metadata <- full_metadata[get(cluster_column) %in% unique( - needed_cell_types)] + needed_cell_types + )] sample_ids <- list() @@ -2291,12 +2466,14 @@ exprCellCellcom <- function(gobject, for (i in seq_along(uniq_types)) { uniq_type <- uniq_types[i] length_random <- length(needed_cell_types[ - needed_cell_types == uniq_type]) + needed_cell_types == uniq_type + ]) if (set_seed == TRUE) { set.seed(seed = seed_number) } sub_sample_ids <- possible_metadata[get(cluster_column) == uniq_type][ - sample(x = seq_len(.N), size = length_random)][["cell_ID"]] + sample(x = seq_len(.N), size = length_random) + ][["cell_ID"]] sample_ids[[i]] <- sub_sample_ids } return(unlist(sample_ids)) @@ -2388,29 +2565,30 @@ exprCellCellcom <- function(gobject, #' #' force(res2) #' @export -spatCellCellcom <- function(gobject, - feat_type = NULL, - spat_unit = NULL, - spatial_network_name = "Delaunay_network", - cluster_column = NULL, - random_iter = 1000, - feat_set_1, - feat_set_2, - gene_set_1 = NULL, - gene_set_2 = NULL, - log2FC_addendum = 0.1, - min_observations = 2, - detailed = FALSE, - adjust_method = c( - "fdr", "bonferroni", "BH", "holm", "hochberg", "hommel", - "BY", "none" - ), - adjust_target = c("feats", "cells"), - do_parallel = TRUE, - cores = NA, - set_seed = TRUE, - seed_number = 1234, - verbose = c("a little", "a lot", "none")) { +spatCellCellcom <- function( + gobject, + feat_type = NULL, + spat_unit = NULL, + spatial_network_name = "Delaunay_network", + cluster_column = NULL, + random_iter = 1000, + feat_set_1, + feat_set_2, + gene_set_1 = NULL, + gene_set_2 = NULL, + log2FC_addendum = 0.1, + min_observations = 2, + detailed = FALSE, + adjust_method = c( + "fdr", "bonferroni", "BH", "holm", "hochberg", "hommel", + "BY", "none" + ), + adjust_target = c("feats", "cells"), + do_parallel = TRUE, + cores = NA, + set_seed = TRUE, + seed_number = 1234, + verbose = c("a little", "a lot", "none")) { verbose <- match.arg(verbose, choices = c("a little", "a lot", "none")) # Set feat_type and spat_unit @@ -2460,7 +2638,8 @@ spatCellCellcom <- function(gobject, ## get all combinations between cell types all_uniq_values <- unique(cell_metadata[[cluster_column]]) same_DT <- data.table::data.table( - V1 = all_uniq_values, V2 = all_uniq_values) + V1 = all_uniq_values, V2 = all_uniq_values + ) combn_DT <- data.table::as.data.table(t(combn(all_uniq_values, m = 2))) combn_DT <- rbind(same_DT, combn_DT) @@ -2469,30 +2648,31 @@ spatCellCellcom <- function(gobject, savelist <- lapply_flex( X = seq_len(nrow(combn_DT)), future.seed = TRUE, cores = cores, fun = function(row) { - cell_type_1 <- combn_DT[row][["V1"]] - cell_type_2 <- combn_DT[row][["V2"]] - - specific_scores <- specificCellCellcommunicationScores( - gobject = gobject, - feat_type = feat_type, - spat_unit = spat_unit, - cluster_column = cluster_column, - random_iter = random_iter, - cell_type_1 = cell_type_1, - cell_type_2 = cell_type_2, - feat_set_1 = feat_set_1, - feat_set_2 = feat_set_2, - spatial_network_name = spatial_network_name, - log2FC_addendum = log2FC_addendum, - min_observations = min_observations, - detailed = detailed, - adjust_method = adjust_method, - adjust_target = adjust_target, - set_seed = set_seed, - seed_number = seed_number, - verbose = verbose %in% c("a lot") - ) - }) + cell_type_1 <- combn_DT[row][["V1"]] + cell_type_2 <- combn_DT[row][["V2"]] + + specific_scores <- specificCellCellcommunicationScores( + gobject = gobject, + feat_type = feat_type, + spat_unit = spat_unit, + cluster_column = cluster_column, + random_iter = random_iter, + cell_type_1 = cell_type_1, + cell_type_2 = cell_type_2, + feat_set_1 = feat_set_1, + feat_set_2 = feat_set_2, + spatial_network_name = spatial_network_name, + log2FC_addendum = log2FC_addendum, + min_observations = min_observations, + detailed = detailed, + adjust_method = adjust_method, + adjust_target = adjust_target, + set_seed = set_seed, + seed_number = seed_number, + verbose = verbose %in% c("a lot") + ) + } + ) } else { ## for loop over all combinations ## savelist <- list() @@ -2502,9 +2682,12 @@ spatCellCellcom <- function(gobject, cell_type_1 <- combn_DT[row][["V1"]] cell_type_2 <- combn_DT[row][["V2"]] - if (verbose == "a little" || verbose == "a lot") - cat(sprintf("[PROCESS nr %d : %d and %d] ", - countdown, cell_type_1, cell_type_2)) + if (verbose == "a little" || verbose == "a lot") { + cat(sprintf( + "[PROCESS nr %d : %d and %d] ", + countdown, cell_type_1, cell_type_2 + )) + } if (verbose %in% c("a little", "none")) { specific_verbose <- FALSE @@ -2555,32 +2738,29 @@ spatCellCellcom <- function(gobject, #' @param cell_type_1 character. First cell type #' @param cell_type_2 character. Second cell type #' @export -specificCellCellcommunicationScores <- function( - gobject, - feat_type = NULL, - spat_unit = NULL, - spatial_network_name = "Delaunay_network", - cluster_column = NULL, - random_iter = 100, - cell_type_1 = NULL, - cell_type_2 = NULL, - feat_set_1, - feat_set_2, - gene_set_1 = NULL, - gene_set_2 = NULL, - log2FC_addendum = 0.1, - min_observations = 2, - detailed = FALSE, - adjust_method = c( - "fdr", "bonferroni", "BH", "holm", "hochberg", "hommel", - "BY", "none" - ), - adjust_target = c("feats", "cells"), - set_seed = FALSE, - seed_number = 1234, - verbose = TRUE -) { - +specificCellCellcommunicationScores <- function(gobject, + feat_type = NULL, + spat_unit = NULL, + spatial_network_name = "Delaunay_network", + cluster_column = NULL, + random_iter = 100, + cell_type_1 = NULL, + cell_type_2 = NULL, + feat_set_1, + feat_set_2, + gene_set_1 = NULL, + gene_set_2 = NULL, + log2FC_addendum = 0.1, + min_observations = 2, + detailed = FALSE, + adjust_method = c( + "fdr", "bonferroni", "BH", "holm", "hochberg", "hommel", + "BY", "none" + ), + adjust_target = c("feats", "cells"), + set_seed = FALSE, + seed_number = 1234, + verbose = TRUE) { # Set feat_type and spat_unit spat_unit <- set_default_spat_unit( gobject = gobject, @@ -2609,8 +2789,8 @@ specificCellCellcommunicationScores <- function( if (is.null(cell_type_1) || is.null(cell_type_2)) { stop(sprintf( "`%s` and `%s` in `%s` must be given", - "cell_type_1", "cell_type_2", "cluster_column") - ) + "cell_type_1", "cell_type_2", "cluster_column" + )) } @@ -2647,7 +2827,8 @@ specificCellCellcommunicationScores <- function( cell_direction_2 <- paste0(cell_type_2, "-", cell_type_1) subset_annot_network <- annot_network[from_to %in% c( - cell_direction_1, cell_direction_2)] + cell_direction_1, cell_direction_2 + )] # make sure that there are sufficient observations if (nrow(subset_annot_network) <= min_observations) { @@ -2655,7 +2836,8 @@ specificCellCellcommunicationScores <- function( } else { # subset giotto object to only interacting cells subset_ids <- unique(c( - subset_annot_network$to, subset_annot_network$from)) + subset_annot_network$to, subset_annot_network$from + )) subsetGiotto <- subsetGiotto( gobject = gobject, cell_ids = subset_ids, @@ -2665,11 +2847,13 @@ specificCellCellcommunicationScores <- function( # get information about number of cells temp_meta <- pDataDT(subsetGiotto, - feat_type = feat_type, - spat_unit = spat_unit + feat_type = feat_type, + spat_unit = spat_unit ) nr_cell_types <- temp_meta[cell_ID %in% subset_ids][ - , .N, by = c(cluster_column)] + , .N, + by = c(cluster_column) + ] nr_cells <- nr_cell_types$N names(nr_cells) <- nr_cell_types$cell_types @@ -2683,8 +2867,8 @@ specificCellCellcommunicationScores <- function( feat_set_2 = feat_set_2 ) comScore <- comScore[(lig_cell_type == cell_type_1 & - rec_cell_type == cell_type_2) | - (lig_cell_type == cell_type_2 & rec_cell_type == cell_type_1)] + rec_cell_type == cell_type_2) | + (lig_cell_type == cell_type_2 & rec_cell_type == cell_type_1)] comScore[, lig_nr := nr_cells[lig_cell_type]] comScore[, rec_nr := nr_cells[rec_cell_type]] @@ -2743,8 +2927,8 @@ specificCellCellcommunicationScores <- function( feat_set_2 = feat_set_2 ) randomScore <- randomScore[(lig_cell_type == cell_type_1 & - rec_cell_type == cell_type_2) | - (lig_cell_type == cell_type_2 & rec_cell_type == cell_type_1)] + rec_cell_type == cell_type_2) | + (lig_cell_type == cell_type_2 & rec_cell_type == cell_type_1)] @@ -2773,7 +2957,9 @@ specificCellCellcommunicationScores <- function( if (detailed == TRUE) { av_difference_scores <- rowMeans_flex(total_sum) sd_difference_scores <- apply( - total_sum, MARGIN = 1, FUN = stats::sd) + total_sum, + MARGIN = 1, FUN = stats::sd + ) comScore[, av_diff := av_difference_scores] comScore[, sd_diff := sd_difference_scores] @@ -2791,10 +2977,14 @@ specificCellCellcommunicationScores <- function( if (adjust_target == "feats") { comScore[, p.adj := stats::p.adjust( - pvalue, method = adjust_method), by = .(LR_cell_comb)] + pvalue, + method = adjust_method + ), by = .(LR_cell_comb)] } else if (adjust_target == "cells") { comScore[, p.adj := stats::p.adjust( - pvalue, method = adjust_method), by = .(LR_comb)] + pvalue, + method = adjust_method + ), by = .(LR_comb)] } # get minimum adjusted p.value that is not zero @@ -2846,7 +3036,8 @@ specificCellCellcommunicationScores <- function( #' feat_set_2 = "9630013A20Rik" #' ) #' -#' spatialCC <- spatCellCellcom(gobject = g, +#' spatialCC <- spatCellCellcom( +#' gobject = g, #' cluster_column = "leiden_clus", #' feat_set_1 = "Gm19935", #' feat_set_2 = "9630013A20Rik", @@ -2857,20 +3048,21 @@ specificCellCellcommunicationScores <- function( #' combCC <- combCCcom(spatialCC = spatialCC, exprCC = exprCC) #' force(combCC) #' @export -combCCcom <- function(spatialCC, - exprCC, - min_lig_nr = 3, - min_rec_nr = 3, - min_padj_value = 1, - min_log2fc = 0, - min_av_diff = 0, - detailed = FALSE) { +combCCcom <- function( + spatialCC, + exprCC, + min_lig_nr = 3, + min_rec_nr = 3, + min_padj_value = 1, + min_log2fc = 0, + min_av_diff = 0, + detailed = FALSE) { # data.table variables lig_nr <- rec_nr <- p.adj <- log2fc <- av_diff <- NULL spatialCC <- spatialCC[lig_nr >= min_lig_nr & rec_nr >= min_rec_nr & p.adj <= min_padj_value & abs(log2fc) >= min_log2fc & - abs(av_diff) >= min_av_diff] + abs(av_diff) >= min_av_diff] if (detailed == TRUE) { diff --git a/R/spatial_interaction_spot.R b/R/spatial_interaction_spot.R index ab8ed82d4..edc3c92b7 100644 --- a/R/spatial_interaction_spot.R +++ b/R/spatial_interaction_spot.R @@ -20,9 +20,8 @@ NULL #' value inner each spot #' @param cell_IDs cell_IDs #' @keywords internal -.cell_proximity_spots_internal <- function( - cell_IDs, - dwls_values) { +.cell_proximity_spots_internal <- function(cell_IDs, + dwls_values) { # data.table variables value <- unified_int <- Var1 <- Var2 <- internal <- NULL @@ -54,7 +53,7 @@ NULL diag(diff_ct) <- NA diff_ct[lower.tri(diff_ct)] <- NA # transfer format to data.table - diff_ct <- data.table::as.data.table(reshape2::melt(diff_ct)) + diff_ct <- melt_matrix(diff_ct) diff_ct <- diff_ct[value != "NA"] diff_ct[, c("Var1", "Var2") := lapply( .SD, as.character @@ -86,8 +85,9 @@ NULL #' value for interacted spots #' @param pairs data.table of paired spots. Format: cell_ID1, cell_ID2, N #' @keywords internal -.cell_proximity_spots_external <- function(pairs, - dwls_values) { +.cell_proximity_spots_external <- function( + pairs, + dwls_values) { cell_IDs <- unique(c(pairs$from, pairs$to)) pairs <- pairs[, .N, by = c("from", "to")] # add internal pairs to make full matrix @@ -99,10 +99,10 @@ NULL pairs_for_mat <- pairs_for_mat[, .N, by = c("from", "to")] # make square matrix of interaction between spots - pairs_mat <- reshape2::acast( + pairs_mat <- as.matrix(data.table::dcast( pairs_for_mat, from ~ to, value.var = "N", fill = 0 - ) + ), rownames = "from") pairs_mat <- pairs_mat[cell_IDs, cell_IDs] # calculate cell-type/cell-type interactions @@ -141,10 +141,9 @@ NULL #' @param pairs_external data.table of paired spots. Format: cell_ID1, cell_ID2, #' N. Passes to `.cell_proximity_spots_external` `pairs` param #' @keywords internal -.cell_proximity_spots <- function( - cell_IDs, - pairs_external, - dwls_values) { +.cell_proximity_spots <- function(cell_IDs, + pairs_external, + dwls_values) { # data.table variables V1 <- internal <- external <- s1 <- s2 <- unified_int <- type_int <- NULL @@ -225,7 +224,7 @@ NULL #' ) #' sign_gene <- x$feats #' -#' sign_matrix <- matrix(rnorm(length(sign_gene) * 8, mean = 10), +#' sign_matrix <- matrix(rnorm(length(sign_gene) * 7, mean = 10), #' nrow = length(sign_gene) #' ) #' rownames(sign_matrix) <- sign_gene @@ -235,22 +234,21 @@ NULL #' #' cellProximityEnrichmentSpots(gobject = g) #' @export -cellProximityEnrichmentSpots <- function( - gobject, - spat_unit = NULL, - feat_type = NULL, - spatial_network_name = "spatial_network", - cluster_column = "cell_ID", - cells_in_spot = 1, - number_of_simulations = 100, - adjust_method = c( - "none", "fdr", "bonferroni", "BH", - "holm", "hochberg", "hommel", - "BY" - ), - set_seed = TRUE, - seed_number = 1234, - verbose = FALSE) { +cellProximityEnrichmentSpots <- function(gobject, + spat_unit = NULL, + feat_type = NULL, + spatial_network_name = "spatial_network", + cluster_column = "cell_ID", + cells_in_spot = 1, + number_of_simulations = 100, + adjust_method = c( + "none", "fdr", "bonferroni", "BH", + "holm", "hochberg", "hommel", + "BY" + ), + set_seed = TRUE, + seed_number = 1234, + verbose = FALSE) { # p.adj test sel_adjust_method <- match.arg(adjust_method, choices = c( "none", "fdr", "bonferroni", "BH", @@ -476,11 +474,10 @@ cellProximityEnrichmentSpots <- function( #' #' @returns matrix #' @export -featExpDWLS <- function( - gobject, - spat_unit = NULL, - feat_type = NULL, - ave_celltype_exp) { +featExpDWLS <- function(gobject, + spat_unit = NULL, + feat_type = NULL, + ave_celltype_exp) { # exact spatial_enrichment matrix dwls_values <- getSpatialEnrichment(gobject, spat_unit = spat_unit, @@ -537,12 +534,11 @@ featExpDWLS <- function( #' @param ave_celltype_exp average expression matrix in cell types #' @returns matrix #' @keywords internal -.cal_expr_residual <- function( - gobject, - spat_unit = NULL, - feat_type = NULL, - expression_values = c("normalized", "scaled", "custom"), - ave_celltype_exp) { +.cal_expr_residual <- function(gobject, + spat_unit = NULL, + feat_type = NULL, + expression_values = c("normalized", "scaled", "custom"), + ave_celltype_exp) { # expression data values <- match.arg( expression_values, @@ -605,12 +601,11 @@ featExpDWLS <- function( #' #' cellProximityEnrichmentEachSpot(gobject = g) #' @export -cellProximityEnrichmentEachSpot <- function( - gobject, - spat_unit = NULL, - feat_type = NULL, - spatial_network_name = "spatial_network", - cluster_column = "cell_ID") { +cellProximityEnrichmentEachSpot <- function(gobject, + spat_unit = NULL, + feat_type = NULL, + spatial_network_name = "spatial_network", + cluster_column = "cell_ID") { spatial_network_annot <- annotateSpatialNetwork( gobject = gobject, spat_unit = spat_unit, @@ -715,8 +710,8 @@ cellProximityEnrichmentEachSpot <- function( dimnames = list(names(dwls_target_cell), names(spot_proximity)) ) } - spot_proximity <- reshape2::melt(spot_proximity) - spot_proximity <- data.table::data.table(spot_proximity) + + spot_proximity <- melt_matrix(spot_proximity) spot_proximity[, c("Var1", "Var2") := lapply( .SD, as.character ), .SDcols = c("Var1", "Var2")] @@ -734,13 +729,12 @@ cellProximityEnrichmentEachSpot <- function( #' cell proximity score of selected cell for spots #' @returns data.table #' @keywords internal -.cal_diff_per_interaction <- function( - sel_int, - other_ints, - select_ind, - other_ind, - proximityMat, - expr_residual) { +.cal_diff_per_interaction <- function(sel_int, + other_ints, + select_ind, + other_ind, + proximityMat, + expr_residual) { pcc_diff <- sel <- other <- NULL # get data @@ -798,14 +792,13 @@ NULL #' @describeIn do_permuttest_spot Calculate original values for spots #' @keywords internal -.do_permuttest_original_spot <- function( - sel_int, - other_ints, - select_ind, - other_ind, - name = "orig", - proximityMat, - expr_residual) { +.do_permuttest_original_spot <- function(sel_int, + other_ints, + select_ind, + other_ind, + name = "orig", + proximityMat, + expr_residual) { resultsDT <- .cal_diff_per_interaction( sel_int = sel_int, other_ints = other_ints, @@ -821,16 +814,15 @@ NULL #' @describeIn do_permuttest_spot Calculate random values for spots #' @keywords internal -.do_permuttest_random_spot <- function( - sel_int, - other_ints, - select_ind, - other_ind, - name = "perm_1", - proximityMat, - expr_residual, - set_seed = TRUE, - seed_number = 1234) { +.do_permuttest_random_spot <- function(sel_int, + other_ints, + select_ind, + other_ind, + name = "perm_1", + proximityMat, + expr_residual, + set_seed = TRUE, + seed_number = 1234) { # data.table variables features <- NULL @@ -881,17 +873,16 @@ NULL #' @describeIn do_permuttest_spot Calculate multiple random values for spots #' @keywords internal -.do_multi_permuttest_random_spot <- function( - sel_int, - other_ints, - select_ind, - other_ind, - proximityMat, - expr_residual, - n = 100, - cores = NA, - set_seed = TRUE, - seed_number = 1234) { +.do_multi_permuttest_random_spot <- function(sel_int, + other_ints, + select_ind, + other_ind, + proximityMat, + expr_residual, + n = 100, + cores = NA, + set_seed = TRUE, + seed_number = 1234) { if (set_seed == TRUE) { seed_number_list <- seed_number:(seed_number + (n - 1)) } @@ -919,18 +910,17 @@ NULL #' @describeIn do_permuttest_spot Performs permutation test on subsets of a #' matrix for spots #' @keywords internal -.do_permuttest_spot <- function( - sel_int, - other_ints, - select_ind, - other_ind, - proximityMat, - expr_residual, - n_perm = 100, - adjust_method = "fdr", - cores = 2, - set_seed = TRUE, - seed_number = 1234) { +.do_permuttest_spot <- function(sel_int, + other_ints, + select_ind, + other_ind, + proximityMat, + expr_residual, + n_perm = 100, + adjust_method = "fdr", + cores = 2, + set_seed = TRUE, + seed_number = 1234) { # data.table variables log2fc_diff <- log2fc <- sel <- other <- features <- p_higher <- p_lower <- perm_sel <- NULL @@ -1010,19 +1000,18 @@ NULL #' for spots #' @returns differential test on subsets of a matrix #' @keywords internal -.do_cell_proximity_test_spot <- function( - sel_int, - other_ints, - select_ind, - other_ind, - proximityMat, - expr_residual, - diff_test, - n_perm = 100, - adjust_method = "fdr", - cores = 2, - set_seed = TRUE, - seed_number = 1234) { +.do_cell_proximity_test_spot <- function(sel_int, + other_ints, + select_ind, + other_ind, + proximityMat, + expr_residual, + diff_test, + n_perm = 100, + adjust_method = "fdr", + cores = 2, + set_seed = TRUE, + seed_number = 1234) { # get parameters diff_test <- match.arg( diff_test, @@ -1058,22 +1047,21 @@ NULL #' proximity to other cell types for spots. #' @returns data.table #' @keywords internal -.findICF_per_interaction_spot <- function( - sel_int, - all_ints, - proximityMat, - expr_residual, - dwls_values, - dwls_cutoff = 0.001, - CCI_cell_score = 0.01, - minimum_unique_cells = 1, - minimum_unique_int_cells = 1, - diff_test = "permutation", - n_perm = 100, - adjust_method = "fdr", - cores = 2, - set_seed = TRUE, - seed_number = 1234) { +.findICF_per_interaction_spot <- function(sel_int, + all_ints, + proximityMat, + expr_residual, + dwls_values, + dwls_cutoff = 0.001, + CCI_cell_score = 0.01, + minimum_unique_cells = 1, + minimum_unique_int_cells = 1, + diff_test = "permutation", + n_perm = 100, + adjust_method = "fdr", + cores = 2, + set_seed = TRUE, + seed_number = 1234) { # data.table variables unified_int <- NULL @@ -1146,7 +1134,9 @@ NULL #' @param spat_unit spatial unit (e.g. 'cell') #' @param feat_type feature type (e.g. 'rna') #' @param expression_values expression values to use -#' @param ave_celltype_exp average feature expression in each cell type +#' @param ave_celltype_exp `matrix` or `data.frame`. Average feature expression +#' in each cell type. Colnames should be the cell type and rownames are feat +#' names. #' @param selected_features subset of selected features (optional) #' @param spatial_network_name name of spatial network to use #' @param deconv_name name of deconvolution/spatial enrichment values to use @@ -1197,7 +1187,7 @@ NULL #' ) #' sign_gene <- x$feats #' -#' sign_matrix <- matrix(rnorm(length(sign_gene) * 8, mean = 10), +#' sign_matrix <- matrix(rnorm(length(sign_gene) * 7, mean = 10), #' nrow = length(sign_gene) #' ) #' rownames(sign_matrix) <- sign_gene @@ -1205,40 +1195,43 @@ NULL #' #' g <- runDWLSDeconv(gobject = g, sign_matrix = sign_matrix) #' ave_celltype_exp <- calculateMetaTable(g, metadata_cols = "leiden_clus") -#' ave_celltype_exp <- reshape2::dcast(ave_celltype_exp, variable~leiden_clus) -#' rownames(ave_celltype_exp) <- ave_celltype_exp$variable -#' ave_celltype_exp <- ave_celltype_exp[,-1] +#' ave_celltype_exp <- data.table::dcast( +#' ave_celltype_exp, variable ~ leiden_clus +#' ) +#' ave_celltype_exp <- as.matrix(ave_celltype_exp, rownames = "variable") #' colnames(ave_celltype_exp) <- colnames(sign_matrix) -#' -#' findICFSpot(g, +#' +#' res <- findICFSpot(g, #' spat_unit = "cell", #' feat_type = "rna", #' ave_celltype_exp = ave_celltype_exp, #' spatial_network_name = "spatial_network" #' ) +#' force(res) #' @seealso [findInteractionChangedFeats()] #' @md #' @export -findICFSpot <- function(gobject, - spat_unit = NULL, - feat_type = NULL, - expression_values = c("normalized", "scaled", "custom"), - ave_celltype_exp, - selected_features = NULL, - spatial_network_name = "Delaunay_network", - deconv_name = "DWLS", - minimum_unique_cells = 5, - minimum_unique_int_cells = 5, - CCI_cell_score = 0.1, - dwls_cutoff = 0.001, - diff_test = "permutation", - nr_permutations = 100, - adjust_method = "fdr", - do_parallel = TRUE, - cores = NA, - set_seed = TRUE, - seed_number = 1234, - verbose = FALSE) { +findICFSpot <- function( + gobject, + spat_unit = NULL, + feat_type = NULL, + expression_values = c("normalized", "scaled", "custom"), + ave_celltype_exp, + selected_features = NULL, + spatial_network_name = "Delaunay_network", + deconv_name = "DWLS", + minimum_unique_cells = 5, + minimum_unique_int_cells = 5, + CCI_cell_score = 0.1, + dwls_cutoff = 0.001, + diff_test = "permutation", + nr_permutations = 100, + adjust_method = "fdr", + do_parallel = TRUE, + cores = NA, + set_seed = TRUE, + seed_number = 1234, + verbose = FALSE) { # data.table variables unified_int <- NULL @@ -1406,17 +1399,16 @@ findICFSpot <- function(gobject, #' #' filterICFSpot(icfObject = icfObject) #' @export -filterICFSpot <- function( - icfObject, - min_cells = 4, - min_cells_expr_resi = 0.05, - min_int_cells = 4, - min_int_cells_expr_resi = 0.05, - min_fdr = 0.5, - min_pcc_diff = 0.05, - min_zscore = 0.05, - zscores_column = c("cell_type", "features"), - direction = c("both", "up", "down")) { +filterICFSpot <- function(icfObject, + min_cells = 4, + min_cells_expr_resi = 0.05, + min_int_cells = 4, + min_int_cells_expr_resi = 0.05, + min_fdr = 0.5, + min_pcc_diff = 0.05, + min_zscore = 0.05, + zscores_column = c("cell_type", "features"), + direction = c("both", "up", "down")) { # data.table variables nr_select <- int_nr_select <- zscores <- perm_diff <- sel <- other <- p.adj <- NULL @@ -1500,18 +1492,17 @@ filterICFSpot <- function( #' ICF_features = c("3" = "Gna12", "1" = "Ccnd2", "8" = "Btbd17") #' ) #' @export -plotICFSpot <- function( - gobject, - icfObject, - source_type, - source_markers, - ICF_features, - cell_color_code = NULL, - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "plotICFSpot") { +plotICFSpot <- function(gobject, + icfObject, + source_type, + source_markers, + ICF_features, + cell_color_code = NULL, + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "plotICFSpot") { # data.table variables cell_type <- int_cell_type <- pcc_diff <- feats <- perm_diff <- NULL @@ -1620,28 +1611,27 @@ plotICFSpot <- function( #' min_pcc_diff = 0.01 #' ) #' @export -plotCellProximityFeatSpot <- function( - gobject, - icfObject, - method = c( - "volcano", "cell_barplot", "cell-cell", "cell_sankey", - "heatmap", "dotplot" - ), - min_cells = 4, - min_cells_expr_resi = 0.05, - min_int_cells = 4, - min_int_cells_expr_resi = 0.05, - min_fdr = 0.5, - min_pcc_diff = 0.05, - min_zscore = 0.05, - zscores_column = c("cell_type", "features"), - direction = c("both", "up", "down"), - cell_color_code = NULL, - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "plotCellProximityFeats") { +plotCellProximityFeatSpot <- function(gobject, + icfObject, + method = c( + "volcano", "cell_barplot", "cell-cell", "cell_sankey", + "heatmap", "dotplot" + ), + min_cells = 4, + min_cells_expr_resi = 0.05, + min_int_cells = 4, + min_int_cells_expr_resi = 0.05, + min_fdr = 0.5, + min_pcc_diff = 0.05, + min_zscore = 0.05, + zscores_column = c("cell_type", "features"), + direction = c("both", "up", "down"), + cell_color_code = NULL, + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "plotCellProximityFeats") { if (!"icfObject" %in% class(icfObject)) { stop("icfObject needs to be the output from findInteractionChangedFeats() or findICF()") @@ -2033,28 +2023,27 @@ plotCellProximityFeatSpot <- function( #' * PI: significance score: log2fc \* -log10(p.adj) #' } #' @keywords internal -.specific_CCCScores_spots <- function( - gobject, - spat_unit = NULL, - feat_type = NULL, - expr_residual, - dwls_values, - proximityMat, - random_iter = 1000, - cell_type_1 = "astrocytes", - cell_type_2 = "endothelial", - feature_set_1, - feature_set_2, - min_observations = 2, - detailed = FALSE, - adjust_method = c( - "fdr", "bonferroni", " BH", "holm", "hochberg", "hommel", - "BY", "none" - ), - adjust_target = c("features", "cells"), - set_seed = FALSE, - seed_number = 1234, - verbose = FALSE) { +.specific_CCCScores_spots <- function(gobject, + spat_unit = NULL, + feat_type = NULL, + expr_residual, + dwls_values, + proximityMat, + random_iter = 1000, + cell_type_1 = "astrocytes", + cell_type_2 = "endothelial", + feature_set_1, + feature_set_2, + min_observations = 2, + detailed = FALSE, + adjust_method = c( + "fdr", "bonferroni", " BH", "holm", "hochberg", "hommel", + "BY", "none" + ), + adjust_target = c("features", "cells"), + set_seed = FALSE, + seed_number = 1234, + verbose = FALSE) { # data.table variables from_to <- cell_ID <- lig_cell_type <- rec_cell_type <- lig_nr <- rec_nr <- rand_expr <- NULL @@ -2252,6 +2241,8 @@ plotCellProximityFeatSpot <- function( #' @param expression_values (e.g. 'normalized', 'scaled', 'custom') #' @param spatial_network_name spatial network to use for identifying #' interacting cells +#' @param spat_enr_name name of spatial enrichment containing DWLS results. +#' Default = `"DWLS"` #' @param cluster_column cluster column with cell type information #' @param random_iter number of iterations #' @param feature_set_1 first specific feature set from feature pairs @@ -2303,30 +2294,31 @@ plotCellProximityFeatSpot <- function( #' * p.adj: adjusted p-value #' * PI: significanc score: log2fc \* -log10(p.adj) #' } +#' @md #' @export -spatCellCellcomSpots <- function( - gobject, - spat_unit = NULL, - feat_type = NULL, - ave_celltype_exp, - spatial_network_name = "Delaunay_network", - cluster_column = "cell_ID", - random_iter = 1000, - feature_set_1, - feature_set_2, - min_observations = 2, - expression_values = c("normalized", "scaled", "custom"), - detailed = FALSE, - adjust_method = c( - "fdr", "bonferroni", "BH", "holm", "hochberg", "hommel", - "BY", "none" - ), - adjust_target = c("features", "cells"), - do_parallel = TRUE, - cores = NA, - set_seed = TRUE, - seed_number = 1234, - verbose = c("a little", "a lot", "none")) { +spatCellCellcomSpots <- function(gobject, + spat_unit = NULL, + feat_type = NULL, + ave_celltype_exp, + spatial_network_name = "Delaunay_network", + spat_enr_name = "DWLS", + cluster_column = "cell_ID", + random_iter = 1000, + feature_set_1, + feature_set_2, + min_observations = 2, + expression_values = c("normalized", "scaled", "custom"), + detailed = FALSE, + adjust_method = c( + "fdr", "bonferroni", "BH", "holm", "hochberg", "hommel", + "BY", "none" + ), + adjust_target = c("features", "cells"), + do_parallel = TRUE, + cores = NA, + set_seed = TRUE, + seed_number = 1234, + verbose = c("a little", "a lot", "none")) { # data.table vars V1 <- V2 <- LR_cell_comb <- NULL @@ -2378,10 +2370,11 @@ spatCellCellcomSpots <- function( proximityMat <- proximityMat[, intersect_cell_IDs] # exact spatial_enrichment matrix - dwls_values <- get_spatial_enrichment( + dwls_values <- getSpatialEnrichment( gobject = gobject, spat_unit = spat_unit, feat_type = feat_type, + name = spat_enr_name, output = "data.table" ) data.table::setDF(dwls_values) diff --git a/R/spatial_interaction_visuals.R b/R/spatial_interaction_visuals.R index 15769e7d9..cd9ab46d0 100644 --- a/R/spatial_interaction_visuals.R +++ b/R/spatial_interaction_visuals.R @@ -18,17 +18,16 @@ #' CPscore = cellProximityEnrichment(g, cluster_column = "leiden_clus") #' ) #' @export -cellProximityBarplot <- function( - gobject, - CPscore, - min_orig_ints = 5, - min_sim_ints = 5, - p_val = 0.05, - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "cellProximityBarplot") { +cellProximityBarplot <- function(gobject, + CPscore, + min_orig_ints = 5, + min_sim_ints = 5, + p_val = 0.05, + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "cellProximityBarplot") { table_mean_results_dc <- CPscore$enrichm_res ## filter to remove low number of cell-cell proximity interactions ## @@ -68,7 +67,7 @@ cellProximityBarplot <- function( bpl <- bpl + ggplot2::labs(y = "# of interactions") bpl - combo_plot <- cowplot::plot_grid( + combo_plot <- plot_grid( pl, bpl, ncol = 2, rel_heights = c(1), rel_widths = c(3, 1.5), align = "h" @@ -108,18 +107,17 @@ cellProximityBarplot <- function( #' #' cellProximityHeatmap(gobject = g, CPscore = x) #' @export -cellProximityHeatmap <- function( - gobject, - CPscore, - scale = TRUE, - order_cell_types = TRUE, - color_breaks = NULL, - color_names = NULL, - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "cellProximityHeatmap") { +cellProximityHeatmap <- function(gobject, + CPscore, + scale = TRUE, + order_cell_types = TRUE, + color_breaks = NULL, + color_names = NULL, + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "cellProximityHeatmap") { enrich_res <- CPscore$enrichm_res # data.table variables @@ -251,27 +249,26 @@ cellProximityHeatmap <- function( #' #' cellProximityNetwork(gobject = g, CPscore = x) #' @export -cellProximityNetwork <- function( - gobject, - CPscore, - remove_self_edges = FALSE, - self_loop_strength = 0.1, - color_depletion = "lightgreen", - color_enrichment = "red", - rescale_edge_weights = TRUE, - edge_weight_range_depletion = c(0.1, 1), - edge_weight_range_enrichment = c(1, 5), - layout = c("Fruchterman", "DrL", "Kamada-Kawai"), - only_show_enrichment_edges = FALSE, - edge_width_range = c(0.1, 2), - node_size = 4, - node_color_code = NULL, - node_text_size = 6, - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "cellProximityNetwork") { +cellProximityNetwork <- function(gobject, + CPscore, + remove_self_edges = FALSE, + self_loop_strength = 0.1, + color_depletion = "lightgreen", + color_enrichment = "red", + rescale_edge_weights = TRUE, + edge_weight_range_depletion = c(0.1, 1), + edge_weight_range_enrichment = c(1, 5), + layout = c("Fruchterman", "DrL", "Kamada-Kawai"), + only_show_enrichment_edges = FALSE, + edge_width_range = c(0.1, 2), + node_size = 4, + node_color_code = NULL, + node_text_size = 6, + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "cellProximityNetwork") { # extract scores # data.table variables @@ -446,33 +443,32 @@ NULL #' @describeIn cellProximityVisPlot_internals Visualize 2D cell-cell #' interactions according to spatial coordinates in ggplot mode #' @keywords internal -.cellProximityVisPlot_2D_ggplot <- function( - gobject, - interaction_name = NULL, - cluster_column = NULL, - sdimx = NULL, - sdimy = NULL, - cell_color = NULL, - cell_color_code = NULL, - color_as_factor = TRUE, - show_other_cells = FALSE, - show_network = FALSE, - show_other_network = FALSE, - network_color = NULL, - spatial_network_name = "Delaunay_network", - show_grid = FALSE, - grid_color = NULL, - spatial_grid_name = "spatial_grid", - coord_fix_ratio = 1, - show_legend = TRUE, - point_size_select = 2, - point_select_border_col = "black", - point_select_border_stroke = 0.05, - point_size_other = 1, - point_alpha_other = 0.3, - point_other_border_col = "lightgrey", - point_other_border_stroke = 0.01, - ...) { +.cellProximityVisPlot_2D_ggplot <- function(gobject, + interaction_name = NULL, + cluster_column = NULL, + sdimx = NULL, + sdimy = NULL, + cell_color = NULL, + cell_color_code = NULL, + color_as_factor = TRUE, + show_other_cells = FALSE, + show_network = FALSE, + show_other_network = FALSE, + network_color = NULL, + spatial_network_name = "Delaunay_network", + show_grid = FALSE, + grid_color = NULL, + spatial_grid_name = "spatial_grid", + coord_fix_ratio = 1, + show_legend = TRUE, + point_size_select = 2, + point_select_border_col = "black", + point_select_border_stroke = 0.05, + point_size_other = 1, + point_alpha_other = 0.3, + point_other_border_col = "lightgrey", + point_other_border_stroke = 0.01, + ...) { # data.table variables unified_int <- sdimx_begin <- sdimy_begin <- sdimx_end <- sdimy_end <- x_start <- x_end <- NULL @@ -694,32 +690,33 @@ NULL #' @describeIn cellProximityVisPlot_internals Visualize 2D cell-cell #' interactions according to spatial coordinates in plotly mode #' @keywords internal -.cellProximityVisPlot_2D_plotly <- function( - gobject, - interaction_name = NULL, - cluster_column = NULL, - sdimx = NULL, - sdimy = NULL, - cell_color = NULL, - cell_color_code = NULL, - color_as_factor = TRUE, - show_other_cells = FALSE, - show_network = FALSE, - show_other_network = FALSE, - network_color = NULL, - spatial_network_name = "Delaunay_network", - show_grid = FALSE, - grid_color = NULL, - spatial_grid_name = "spatial_grid", - show_legend = TRUE, - point_size_select = 2, - point_size_other = 1, - point_alpha_other = 0.3, - axis_scale = c("cube", "real", "custom"), - custom_ratio = NULL, - x_ticks = NULL, - y_ticks = NULL, - ...) { +.cellProximityVisPlot_2D_plotly <- function(gobject, + interaction_name = NULL, + cluster_column = NULL, + sdimx = NULL, + sdimy = NULL, + cell_color = NULL, + cell_color_code = NULL, + color_as_factor = TRUE, + show_other_cells = FALSE, + show_network = FALSE, + show_other_network = FALSE, + network_color = NULL, + spatial_network_name = "Delaunay_network", + show_grid = FALSE, + grid_color = NULL, + spatial_grid_name = "spatial_grid", + show_legend = TRUE, + point_size_select = 2, + point_size_other = 1, + point_alpha_other = 0.3, + axis_scale = c("cube", "real", "custom"), + custom_ratio = NULL, + x_ticks = NULL, + y_ticks = NULL, + ...) { + package_check("plotly") + # data.table variables cell_ID <- unified_int <- NULL @@ -956,34 +953,35 @@ NULL #' @describeIn cellProximityVisPlot_internals Visualize 3D cell-cell #' interactions according to spatial coordinates in plotly mode #' @keywords internal -.cellProximityVisPlot_3D_plotly <- function( - gobject, - interaction_name = NULL, - cluster_column = NULL, - sdimx = NULL, - sdimy = NULL, - sdimz = NULL, - cell_color = NULL, - cell_color_code = NULL, - color_as_factor = TRUE, - show_other_cells = FALSE, - show_network = FALSE, - show_other_network = FALSE, - network_color = NULL, - spatial_network_name = "Delaunay_network", - show_grid = FALSE, - grid_color = NULL, - spatial_grid_name = "spatial_grid", - show_legend = TRUE, - point_size_select = 2, - point_size_other = 1, - point_alpha_other = 0.5, - axis_scale = c("cube", "real", "custom"), - custom_ratio = NULL, - x_ticks = NULL, - y_ticks = NULL, - z_ticks = NULL, - ...) { +.cellProximityVisPlot_3D_plotly <- function(gobject, + interaction_name = NULL, + cluster_column = NULL, + sdimx = NULL, + sdimy = NULL, + sdimz = NULL, + cell_color = NULL, + cell_color_code = NULL, + color_as_factor = TRUE, + show_other_cells = FALSE, + show_network = FALSE, + show_other_network = FALSE, + network_color = NULL, + spatial_network_name = "Delaunay_network", + show_grid = FALSE, + grid_color = NULL, + spatial_grid_name = "spatial_grid", + show_legend = TRUE, + point_size_select = 2, + point_size_other = 1, + point_alpha_other = 0.5, + axis_scale = c("cube", "real", "custom"), + custom_ratio = NULL, + x_ticks = NULL, + y_ticks = NULL, + z_ticks = NULL, + ...) { + package_check("plotly") + # data.table variables cell_ID <- unified_int <- NULL @@ -1243,40 +1241,39 @@ NULL #' cluster_column = "leiden_clus", sdimx = "sdimx", sdimy = "sdimy" #' ) #' @export -cellProximityVisPlot <- function( - gobject, - interaction_name = NULL, - cluster_column = NULL, - sdimx = NULL, - sdimy = NULL, - sdimz = NULL, - cell_color = NULL, - cell_color_code = NULL, - color_as_factor = TRUE, - show_other_cells = FALSE, - show_network = FALSE, - show_other_network = FALSE, - network_color = NULL, - spatial_network_name = "Delaunay_network", - show_grid = FALSE, - grid_color = NULL, - spatial_grid_name = "spatial_grid", - coord_fix_ratio = 1, - show_legend = TRUE, - point_size_select = 2, - point_select_border_col = "black", - point_select_border_stroke = 0.05, - point_size_other = 1, - point_alpha_other = 0.3, - point_other_border_col = "lightgrey", - point_other_border_stroke = 0.01, - axis_scale = c("cube", "real", "custom"), - custom_ratio = NULL, - x_ticks = NULL, - y_ticks = NULL, - z_ticks = NULL, - plot_method = c("ggplot", "plotly"), - ...) { +cellProximityVisPlot <- function(gobject, + interaction_name = NULL, + cluster_column = NULL, + sdimx = NULL, + sdimy = NULL, + sdimz = NULL, + cell_color = NULL, + cell_color_code = NULL, + color_as_factor = TRUE, + show_other_cells = FALSE, + show_network = FALSE, + show_other_network = FALSE, + network_color = NULL, + spatial_network_name = "Delaunay_network", + show_grid = FALSE, + grid_color = NULL, + spatial_grid_name = "spatial_grid", + coord_fix_ratio = 1, + show_legend = TRUE, + point_size_select = 2, + point_select_border_col = "black", + point_select_border_stroke = 0.05, + point_size_other = 1, + point_alpha_other = 0.3, + point_other_border_col = "lightgrey", + point_other_border_stroke = 0.01, + axis_scale = c("cube", "real", "custom"), + custom_ratio = NULL, + x_ticks = NULL, + y_ticks = NULL, + z_ticks = NULL, + plot_method = c("ggplot", "plotly"), + ...) { ## decide plot method plot_method <- match.arg(plot_method, choices = c("ggplot", "plotly")) axis_scale <- match.arg(axis_scale, c("cube", "real", "custom")) @@ -1421,7 +1418,7 @@ cellProximityVisPlot <- function( #' @param min_spat_diff minimum absolute spatial expression difference #' @param min_log2_fc minimum log2 fold-change #' @param min_zscore minimum z-score change -#' @param zscores_column calculate z-scores over cell types or featuress +#' @param zscores_column calculate z-scores over cell types or features #' @param direction differential expression directions to keep #' @returns volcano, cell_barplot, cell-cell, cell_sankey, heatmap, or dotplot #' @examples @@ -1433,29 +1430,28 @@ cellProximityVisPlot <- function( #' show_plot = TRUE, save_plot = FALSE, return_plot = FALSE #' ) #' @export -plotCellProximityFeats <- function( - gobject, - icfObject, - method = c( - "volcano", "cell_barplot", "cell-cell", "cell_sankey", - "heatmap", "dotplot" - ), - min_cells = 4, - min_cells_expr = 1, - min_int_cells = 4, - min_int_cells_expr = 1, - min_fdr = 0.1, - min_spat_diff = 0.2, - min_log2_fc = 0.2, - min_zscore = 2, - zscores_column = c("cell_type", "feats"), - direction = c("both", "up", "down"), - cell_color_code = NULL, - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "plotCellProximityFeats") { +plotCellProximityFeats <- function(gobject, + icfObject, + method = c( + "volcano", "cell_barplot", "cell-cell", "cell_sankey", + "heatmap", "dotplot" + ), + min_cells = 4, + min_cells_expr = 1, + min_int_cells = 4, + min_int_cells_expr = 1, + min_fdr = 0.1, + min_spat_diff = 0.2, + min_log2_fc = 0.2, + min_zscore = 2, + zscores_column = c("cell_type", "feats"), + direction = c("both", "up", "down"), + cell_color_code = NULL, + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "plotCellProximityFeats") { if (!"icfObject" %in% class(icfObject)) { stop("icfObject needs to be the output from findInteractionChangedFeats() or findICF()") @@ -1822,29 +1818,28 @@ plotCellProximityFeats <- function( #' save_plot = FALSE, return_plot = FALSE #' ) #' @export -plotCPF <- function( - gobject, - icfObject, - method = c( - "volcano", "cell_barplot", "cell-cell", "cell_sankey", - "heatmap", "dotplot" - ), - min_cells = 5, - min_cells_expr = 1, - min_int_cells = 3, - min_int_cells_expr = 1, - min_fdr = 0.05, - min_spat_diff = 0.2, - min_log2_fc = 0.2, - min_zscore = 2, - zscores_column = c("cell_type", "feats"), - direction = c("both", "up", "down"), - cell_color_code = NULL, - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "plotCPG") { +plotCPF <- function(gobject, + icfObject, + method = c( + "volcano", "cell_barplot", "cell-cell", "cell_sankey", + "heatmap", "dotplot" + ), + min_cells = 5, + min_cells_expr = 1, + min_int_cells = 3, + min_int_cells_expr = 1, + min_fdr = 0.05, + min_spat_diff = 0.2, + min_log2_fc = 0.2, + min_zscore = 2, + zscores_column = c("cell_type", "feats"), + direction = c("both", "up", "down"), + cell_color_code = NULL, + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "plotCPG") { plotCellProximityFeats( gobject = gobject, icfObject = icfObject, @@ -1894,18 +1889,17 @@ plotCPF <- function( #' ICF_feats = c("3" = "Gna12", "1" = "Ccnd2", "8" = "Btbd17") #' ) #' @export -plotInteractionChangedFeats <- function( - gobject, - icfObject, - source_type, - source_markers, - ICF_feats, - cell_color_code = NULL, - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "plotInteractionChangedFeats") { +plotInteractionChangedFeats <- function(gobject, + icfObject, + source_type, + source_markers, + ICF_feats, + cell_color_code = NULL, + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "plotInteractionChangedFeats") { # data.table variables cell_type <- int_cell_type <- log2fc <- NULL @@ -2017,18 +2011,17 @@ plotInteractionChangedFeats <- function( #' ICF_feats = c("3" = "Gna12", "1" = "Ccnd2", "8" = "Btbd17") #' ) #' @export -plotICF <- function( - gobject, - icfObject, - source_type, - source_markers, - ICF_feats, - cell_color_code = NULL, - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "plotICF") { +plotICF <- function(gobject, + icfObject, + source_type, + source_markers, + ICF_feats, + cell_color_code = NULL, + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "plotICF") { plotInteractionChangedFeats( gobject = gobject, icfObject = icfObject, @@ -2084,23 +2077,22 @@ plotICF <- function( #' selected_interactions = "1--8" #' ) #' @export -plotCombineInteractionChangedFeats <- function( - gobject, - combIcfObject, - selected_interactions = NULL, - selected_feat_to_feat = NULL, - detail_plot = TRUE, - simple_plot = FALSE, - simple_plot_facet = c("interaction", "feats"), - facet_scales = "fixed", - facet_ncol = length(selected_feat_to_feat), - facet_nrow = length(selected_interactions), - colors = c("#9932CC", "#FF8C00"), - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "plotCombineICF") { +plotCombineInteractionChangedFeats <- function(gobject, + combIcfObject, + selected_interactions = NULL, + selected_feat_to_feat = NULL, + detail_plot = TRUE, + simple_plot = FALSE, + simple_plot_facet = c("interaction", "feats"), + facet_scales = "fixed", + facet_ncol = length(selected_feat_to_feat), + facet_nrow = length(selected_interactions), + colors = c("#9932CC", "#FF8C00"), + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "plotCombineICF") { ## check validity if (!"combIcfObject" %in% class(combIcfObject)) { stop("combIcfObject needs to be the output from @@ -2295,23 +2287,22 @@ plotCombineInteractionChangedFeats <- function( #' selected_interactions = "1--8" #' ) #' @export -plotCombineICF <- function( - gobject, - combIcfObject, - selected_interactions = NULL, - selected_feat_to_feat = NULL, - detail_plot = TRUE, - simple_plot = FALSE, - simple_plot_facet = c("interaction", "feats"), - facet_scales = "fixed", - facet_ncol = length(selected_feat_to_feat), - facet_nrow = length(selected_interactions), - colors = c("#9932CC", "#FF8C00"), - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "plotCombineICF") { +plotCombineICF <- function(gobject, + combIcfObject, + selected_interactions = NULL, + selected_feat_to_feat = NULL, + detail_plot = TRUE, + simple_plot = FALSE, + simple_plot_facet = c("interaction", "feats"), + facet_scales = "fixed", + facet_ncol = length(selected_feat_to_feat), + facet_nrow = length(selected_interactions), + colors = c("#9932CC", "#FF8C00"), + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "plotCombineICF") { plotCombineInteractionChangedFeats( gobject = gobject, combIcfObject = combIcfObject, @@ -2389,23 +2380,22 @@ plotCombineICF <- function( #' selected_LR = c("Gm19935-9630013A20Rik"), selected_cell_LR = c("1--1") #' ) #' @export -plotCombineCellCellCommunication <- function( - gobject, - combCCcom, - selected_LR = NULL, - selected_cell_LR = NULL, - detail_plot = TRUE, - simple_plot = FALSE, - simple_plot_facet = c("interaction", "genes"), - facet_scales = "fixed", - facet_ncol = length(selected_LR), - facet_nrow = length(selected_cell_LR), - colors = c("#9932CC", "#FF8C00"), - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "plotCombineCellCellCommunication") { +plotCombineCellCellCommunication <- function(gobject, + combCCcom, + selected_LR = NULL, + selected_cell_LR = NULL, + detail_plot = TRUE, + simple_plot = FALSE, + simple_plot_facet = c("interaction", "genes"), + facet_scales = "fixed", + facet_ncol = length(selected_LR), + facet_nrow = length(selected_cell_LR), + colors = c("#9932CC", "#FF8C00"), + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "plotCombineCellCellCommunication") { # data.table variables LR_comb <- LR_cell_comb <- lig_expr <- lig_expr_spat <- rec_expr <- rec_expr_spat <- LR_expr <- LR_expr_spat <- NULL @@ -2601,23 +2591,22 @@ plotCombineCellCellCommunication <- function( #' selected_LR = c("Gm19935-9630013A20Rik"), selected_cell_LR = c("1--1") #' ) #' @export -plotCombineCCcom <- function( - gobject, - combCCcom, - selected_LR = NULL, - selected_cell_LR = NULL, - detail_plot = TRUE, - simple_plot = FALSE, - simple_plot_facet = c("interaction", "genes"), - facet_scales = "fixed", - facet_ncol = length(selected_LR), - facet_nrow = length(selected_cell_LR), - colors = c("#9932CC", "#FF8C00"), - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "plotCombineCCcom") { +plotCombineCCcom <- function(gobject, + combCCcom, + selected_LR = NULL, + selected_cell_LR = NULL, + detail_plot = TRUE, + simple_plot = FALSE, + simple_plot_facet = c("interaction", "genes"), + facet_scales = "fixed", + facet_ncol = length(selected_LR), + facet_nrow = length(selected_cell_LR), + colors = c("#9932CC", "#FF8C00"), + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "plotCombineCCcom") { plotCombineCellCellCommunication( gobject = gobject, combCCcom = combCCcom, @@ -2669,26 +2658,25 @@ plotCombineCCcom <- function( #' #' plotCCcomHeatmap(gobject = g, comScores = comScores, show_plot = TRUE) #' @export -plotCCcomHeatmap <- function( - gobject, - comScores, - selected_LR = NULL, - selected_cell_LR = NULL, - show_LR_names = TRUE, - show_cell_LR_names = TRUE, - show = c("PI", "LR_expr", "log2fc"), - cor_method = c("pearson", "kendall", "spearman"), - aggl_method = c( - "ward.D", "ward.D2", "single", "complete", "average", - "mcquitty", "median", "centroid" - ), - gradient_color = NULL, - gradient_style = c("divergent", "sequential"), - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "plotCCcomHeatmap") { +plotCCcomHeatmap <- function(gobject, + comScores, + selected_LR = NULL, + selected_cell_LR = NULL, + show_LR_names = TRUE, + show_cell_LR_names = TRUE, + show = c("PI", "LR_expr", "log2fc"), + cor_method = c("pearson", "kendall", "spearman"), + aggl_method = c( + "ward.D", "ward.D2", "single", "complete", "average", + "mcquitty", "median", "centroid" + ), + gradient_color = NULL, + gradient_style = c("divergent", "sequential"), + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "plotCCcomHeatmap") { # get parameters cor_method <- match.arg( cor_method, @@ -2825,26 +2813,25 @@ plotCCcomHeatmap <- function( #' #' plotCCcomDotplot(gobject = g, comScores = comScores, show_plot = TRUE) #' @export -plotCCcomDotplot <- function( - gobject, - comScores, - selected_LR = NULL, - selected_cell_LR = NULL, - show_LR_names = TRUE, - show_cell_LR_names = TRUE, - cluster_on = c("PI", "LR_expr", "log2fc"), - cor_method = c("pearson", "kendall", "spearman"), - aggl_method = c( - "ward.D", "ward.D2", "single", "complete", "average", - "mcquitty", "median", "centroid" - ), - dot_color_gradient = NULL, - gradient_style = c("divergent", "sequential"), - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "plotCCcomDotplot") { +plotCCcomDotplot <- function(gobject, + comScores, + selected_LR = NULL, + selected_cell_LR = NULL, + show_LR_names = TRUE, + show_cell_LR_names = TRUE, + cluster_on = c("PI", "LR_expr", "log2fc"), + cor_method = c("pearson", "kendall", "spearman"), + aggl_method = c( + "ward.D", "ward.D2", "single", "complete", "average", + "mcquitty", "median", "centroid" + ), + dot_color_gradient = NULL, + gradient_style = c("divergent", "sequential"), + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "plotCCcomDotplot") { # get parameters cor_method <- match.arg( cor_method, @@ -3012,24 +2999,23 @@ plotCCcomDotplot <- function( #' #' plotRankSpatvsExpr(gobject = g, combCC = combCC) #' @export -plotRankSpatvsExpr <- function( - gobject, - combCC, - expr_rnk_column = "LR_expr_rnk", - spat_rnk_column = "LR_spat_rnk", - dot_color_gradient = NULL, - midpoint = deprecated(), - gradient_midpoint = 10, - gradient_style = c("divergent", "sequential"), - size_range = c(0.01, 1.5), - xlims = NULL, - ylims = NULL, - selected_ranks = c(1, 10, 20), - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "plotRankSpatvsExpr") { +plotRankSpatvsExpr <- function(gobject, + combCC, + expr_rnk_column = "LR_expr_rnk", + spat_rnk_column = "LR_spat_rnk", + dot_color_gradient = NULL, + midpoint = deprecated(), + gradient_midpoint = 10, + gradient_style = c("divergent", "sequential"), + size_range = c(0.01, 1.5), + xlims = NULL, + ylims = NULL, + selected_ranks = c(1, 10, 20), + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "plotRankSpatvsExpr") { # deprecate if (GiottoUtils::is_present(midpoint)) { deprecate_warn( @@ -3133,10 +3119,9 @@ plotRankSpatvsExpr <- function( #' @param second_col second column to use #' @returns ggplot #' @keywords internal -.plotRecovery_sub <- function( - combCC, - first_col = "LR_expr_rnk", - second_col = "LR_spat_rnk") { +.plotRecovery_sub <- function(combCC, + first_col = "LR_expr_rnk", + second_col = "LR_spat_rnk") { # data.table variables concord <- perc <- not_concord <- secondrank <- secondrank_perc <- NULL @@ -3212,17 +3197,16 @@ plotRankSpatvsExpr <- function( #' #' plotRecovery(gobject = g, combCC = combCC) #' @export -plotRecovery <- function( - gobject, - combCC, - expr_rnk_column = "exprPI_rnk", - spat_rnk_column = "spatPI_rnk", - ground_truth = c("spatial", "expression"), - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "plotRecovery") { +plotRecovery <- function(gobject, + combCC, + expr_rnk_column = "exprPI_rnk", + spat_rnk_column = "spatPI_rnk", + ground_truth = c("spatial", "expression"), + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "plotRecovery") { ground_truth <- match.arg( ground_truth, choices = c("spatial", "expression") @@ -3315,40 +3299,39 @@ plotRecovery <- function( #' interaction_name = x #' ) #' @export -cellProximitySpatPlot2D <- function( - gobject, - spat_unit = NULL, - feat_type = NULL, - spat_loc_name = NULL, - interaction_name = NULL, - cluster_column = NULL, - sdimx = "sdimx", - sdimy = "sdimy", - cell_color = NULL, - cell_color_code = NULL, - color_as_factor = TRUE, - show_other_cells = FALSE, - show_network = FALSE, - show_other_network = FALSE, - network_color = NULL, - spatial_network_name = "Delaunay_network", - show_grid = FALSE, - grid_color = NULL, - spatial_grid_name = "spatial_grid", - coord_fix_ratio = 1, - show_legend = TRUE, - point_size_select = 2, - point_select_border_col = "black", - point_select_border_stroke = 0.05, - point_size_other = 1, - point_alpha_other = 0.3, - point_other_border_col = "lightgrey", - point_other_border_stroke = 0.01, - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "cellProximitySpatPlot2D") { +cellProximitySpatPlot2D <- function(gobject, + spat_unit = NULL, + feat_type = NULL, + spat_loc_name = NULL, + interaction_name = NULL, + cluster_column = NULL, + sdimx = "sdimx", + sdimy = "sdimy", + cell_color = NULL, + cell_color_code = NULL, + color_as_factor = TRUE, + show_other_cells = FALSE, + show_network = FALSE, + show_other_network = FALSE, + network_color = NULL, + spatial_network_name = "Delaunay_network", + show_grid = FALSE, + grid_color = NULL, + spatial_grid_name = "spatial_grid", + coord_fix_ratio = 1, + show_legend = TRUE, + point_size_select = 2, + point_select_border_col = "black", + point_select_border_stroke = 0.05, + point_size_other = 1, + point_alpha_other = 0.3, + point_other_border_col = "lightgrey", + point_other_border_stroke = 0.01, + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "cellProximitySpatPlot2D") { if (is.null(interaction_name)) { stop("you need to specific at least one interaction name, run cellProximityEnrichment") @@ -3644,39 +3627,38 @@ cellProximitySpatPlot <- function(gobject, ...) { #' @returns plotly #' @details Description of parameters. #' @export -cellProximitySpatPlot3D <- function( - gobject, - interaction_name = NULL, - cluster_column = NULL, - sdimx = "sdimx", - sdimy = "sdimy", - sdimz = "sdimz", - cell_color = NULL, - cell_color_code = NULL, - color_as_factor = TRUE, - show_other_cells = TRUE, - show_network = TRUE, - show_other_network = FALSE, - network_color = NULL, - spatial_network_name = "Delaunay_network", - show_grid = FALSE, - grid_color = NULL, - spatial_grid_name = "spatial_grid", - show_legend = TRUE, - point_size_select = 4, - point_size_other = 2, - point_alpha_other = 0.5, - axis_scale = c("cube", "real", "custom"), - custom_ratio = NULL, - x_ticks = NULL, - y_ticks = NULL, - z_ticks = NULL, - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "cellProximitySpatPlot3D", - ...) { +cellProximitySpatPlot3D <- function(gobject, + interaction_name = NULL, + cluster_column = NULL, + sdimx = "sdimx", + sdimy = "sdimy", + sdimz = "sdimz", + cell_color = NULL, + cell_color_code = NULL, + color_as_factor = TRUE, + show_other_cells = TRUE, + show_network = TRUE, + show_other_network = FALSE, + network_color = NULL, + spatial_network_name = "Delaunay_network", + show_grid = FALSE, + grid_color = NULL, + spatial_grid_name = "spatial_grid", + show_legend = TRUE, + point_size_select = 4, + point_size_other = 2, + point_alpha_other = 0.5, + axis_scale = c("cube", "real", "custom"), + custom_ratio = NULL, + x_ticks = NULL, + y_ticks = NULL, + z_ticks = NULL, + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "cellProximitySpatPlot3D", + ...) { if (is.null(sdimz)) { pl <- .cellProximityVisPlot_2D_plotly( gobject = gobject, diff --git a/R/spdep.R b/R/spdep.R index 6abbe9cad..fe924fa9f 100644 --- a/R/spdep.R +++ b/R/spdep.R @@ -18,15 +18,14 @@ #' #' spdepAutoCorr(g) #' @export -spdepAutoCorr <- function( - gobject, - method = c("geary.test", "lee.test", "lm.morantest", "moran.test"), - spat_unit = NULL, - feat_type = NULL, - expression_values = "normalized", - spatial_network_to_use = "spatial_network", - return_gobject = FALSE, - verbose = FALSE) { +spdepAutoCorr <- function(gobject, + method = c("geary.test", "lee.test", "lm.morantest", "moran.test"), + spat_unit = NULL, + feat_type = NULL, + expression_values = "normalized", + spatial_network_to_use = "spatial_network", + return_gobject = FALSE, + verbose = FALSE) { # Check and match the specified method argument method <- match.arg(method) @@ -77,9 +76,9 @@ spdepAutoCorr <- function( step_size <- ceiling(nfeats / 10L) result_list <- list() - progressr::with_progress({ + with_pbar({ if (step_size > 1) { - pb <- progressr::progressor( + pb <- pbar( steps = nfeats / step_size ) } diff --git a/R/suite_reexports.R b/R/suite_reexports.R index f3f51f5c5..905811ab7 100644 --- a/R/suite_reexports.R +++ b/R/suite_reexports.R @@ -167,8 +167,6 @@ GiottoClass::combineFeatureOverlapData #' @export GiottoClass::combineMetadata #' @export -GiottoClass::combineSpatialCellFeatureInfo -#' @export GiottoClass::combineSpatialCellMetadataInfo #' @export GiottoClass::combineToMultiPolygon diff --git a/R/variable_genes.R b/R/variable_genes.R index c65c0cb89..bc65fcc60 100644 --- a/R/variable_genes.R +++ b/R/variable_genes.R @@ -1,10 +1,9 @@ -.calc_cov_group_hvf <- function( - feat_in_cells_detected, - nr_expression_groups = 20, - zscore_threshold = 1, - show_plot = NULL, - return_plot = NULL, - save_plot = NULL) { +.calc_cov_group_hvf <- function(feat_in_cells_detected, + nr_expression_groups = 20, + zscore_threshold = 1, + show_plot = NULL, + return_plot = NULL, + save_plot = NULL) { # NSE vars cov_group_zscore <- cov <- selected <- mean_expr <- NULL @@ -55,12 +54,11 @@ -.calc_cov_loess_hvf <- function( - feat_in_cells_detected, - difference_in_cov = 0.1, - show_plot = NULL, - return_plot = NULL, - save_plot = NULL) { +.calc_cov_loess_hvf <- function(feat_in_cells_detected, + difference_in_cov = 0.1, + show_plot = NULL, + return_plot = NULL, + save_plot = NULL) { # NSE vars cov_diff <- pred_cov_feats <- selected <- NULL @@ -97,14 +95,13 @@ -.calc_var_hvf <- function( - scaled_matrix, - var_threshold = 1.5, - var_number = NULL, - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - use_parallel = FALSE) { +.calc_var_hvf <- function(scaled_matrix, + var_threshold = 1.5, + var_number = NULL, + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + use_parallel = FALSE) { # NSE vars var <- selected <- NULL @@ -183,9 +180,10 @@ } -.calc_expr_cov_stats_parallel <- function(expr_values, - expression_threshold, - cores = GiottoUtils::determine_cores()) { +.calc_expr_cov_stats_parallel <- function( + expr_values, + expression_threshold, + cores = GiottoUtils::determine_cores()) { # NSE vars cov <- sd <- mean_expr <- NULL @@ -284,31 +282,30 @@ #' #' calculateHVF(g) #' @export -calculateHVF <- function( - gobject, - spat_unit = NULL, - feat_type = NULL, - expression_values = c("normalized", "scaled", "custom"), - method = c("cov_groups", "cov_loess", "var_p_resid"), - reverse_log_scale = FALSE, - logbase = 2, - expression_threshold = 0, - nr_expression_groups = 20, - zscore_threshold = 1.5, - HVFname = "hvf", - difference_in_cov = 0.1, - var_threshold = 1.5, - var_number = NULL, - random_subset = NULL, - set_seed = TRUE, - seed_number = 1234, - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - default_save_name = "HVFplot", - return_gobject = TRUE, - verbose = TRUE) { +calculateHVF <- function(gobject, + spat_unit = NULL, + feat_type = NULL, + expression_values = c("normalized", "scaled", "custom"), + method = c("cov_groups", "cov_loess", "var_p_resid"), + reverse_log_scale = FALSE, + logbase = 2, + expression_threshold = 0, + nr_expression_groups = 20, + zscore_threshold = 1.5, + HVFname = "hvf", + difference_in_cov = 0.1, + var_threshold = 1.5, + var_number = NULL, + random_subset = NULL, + set_seed = TRUE, + seed_number = 1234, + show_plot = NULL, + return_plot = NULL, + save_plot = NULL, + save_param = list(), + default_save_name = "HVFplot", + return_gobject = TRUE, + verbose = TRUE) { # NSE vars selected <- feats <- var <- NULL @@ -522,7 +519,8 @@ calculateHVF <- function( # plot generation #### -.create_cov_group_hvf_plot <- function(feat_in_cells_detected, nr_expression_groups) { +.create_cov_group_hvf_plot <- function(feat_in_cells_detected, + nr_expression_groups) { pl <- ggplot2::ggplot() pl <- pl + ggplot2::theme_classic() + ggplot2::theme( @@ -553,7 +551,8 @@ calculateHVF <- function( } -.create_cov_loess_hvf_plot <- function(feat_in_cells_detected, difference_in_cov, var_col) { +.create_cov_loess_hvf_plot <- function(feat_in_cells_detected, + difference_in_cov, var_col) { pl <- ggplot2::ggplot() pl <- pl + ggplot2::theme_classic() + ggplot2::theme( diff --git a/R/wnn.R b/R/wnn.R index 588d11888..b8578f5c5 100644 --- a/R/wnn.R +++ b/R/wnn.R @@ -1,372 +1,301 @@ #' Multi omics integration with WNN #' -#' @param gobject A Giotto object with individual PCA modalities pre-calculated -#' @param k k number, default = 20 +#' @param gobject A Giotto object with individual PCA feat_types pre-calculated #' @param spat_unit spatial unit -#' @param modality_1 modality 1 name. Default = "rna" -#' @param modality_2 modality 2 name. Default = "protein" -#' @param pca_name_modality_1 Default = 'rna.pca' -#' @param pca_name_modality_2 Default = 'protein.pca' +#' @param feat_types feature types to integrate. Default = c("rna", "protein") +#' @param reduction_methods reduction methods for each feature type. Default = c("pca", "pca") +#' @param reduction_names names of the reduction methods to use. Default = c("rna.pca", "protein.pca") +#' @param k number of neighbors to calculate cell distances, default = 20 #' @param integrated_feat_type integrated feature type (e.g. 'rna_protein') #' @param matrix_result_name Default = 'theta_weighted_matrix' -#' @param w_name_modality_1 name for modality 1 weights -#' @param w_name_modality_2 name for modality 2 weights #' @param verbose be verbose +#' @param w_names optional. Names for the weighted matrices. If NULL, automatic names composed by w_feat_type will be created. #' -#' @returns A Giotto object with integrated UMAP (integrated.umap) within the -#' dimension_reduction slot and Leiden clusters (integrated_leiden_clus) in the -#' cellular metadata. +#' @returns A Giotto object with a new multiomics slot containing the theta_weighted_matrix and individual weight matrices. #' @export -runWNN <- function( - gobject, - spat_unit = "cell", - modality_1 = "rna", - modality_2 = "protein", - pca_name_modality_1 = "rna.pca", - pca_name_modality_2 = "protein.pca", - k = 20, - integrated_feat_type = NULL, - matrix_result_name = NULL, - w_name_modality_1 = NULL, - w_name_modality_2 = NULL, - verbose = FALSE) { +runWNN <- function(gobject, + spat_unit = "cell", + feat_types = c("rna", "protein"), + reduction_methods = c("pca", "pca"), + reduction_names = c("rna.pca", "protein.pca"), + k = 20, + integrated_feat_type = NULL, + matrix_result_name = NULL, + w_names = c(NULL, NULL), + verbose = FALSE) { # validate Giotto object if (!inherits(gobject, "giotto")) { stop("gobject needs to be a giotto object") } - # validate modalities - if (!modality_1 %in% names( - gobject@dimension_reduction$cells[[spat_unit]] - ) || - !modality_2 %in% names(gobject@dimension_reduction$cells[[spat_unit]])) { - stop(paste(modality_1, "and", modality_2, " pca must exist")) + # validate feat_types + for (feat_type in feat_types) { + if (!feat_type %in% names( + slot(gobject, "dimension_reduction")$cells[[spat_unit]] + )) { + stop(paste(feat_type, " and their dimension reductions must exist in the Giotto object")) + } } - # extract PCA + # extract kNN and PCA + kNN_list <- list() + pca_list <- list() - ## modality 1 - kNN_1 <- getNearestNetwork(gobject, - spat_unit = spat_unit, - feat_type = modality_1, - nn_type = "kNN" - ) - kNN_1 <- slot(kNN_1, "igraph") + for (i in seq(length(feat_types))) { + feat_type <- feat_types[i] - pca_1 <- getDimReduction(gobject, - spat_unit = spat_unit, - feat_type = modality_1, - reduction = "cells", - reduction_method = "pca", - name = pca_name_modality_1 - ) - pca_1 <- slot(pca_1, "coordinates") + kNN_list[[feat_type]] <- getNearestNetwork(gobject, + spat_unit = spat_unit, + feat_type = feat_type, + nn_type = "kNN", + output = "igraph" + ) + + pca_list[[feat_type]] <- getDimReduction(gobject, + spat_unit = spat_unit, + feat_type = feat_type, + reduction = "cells", + reduction_method = reduction_methods[i], + name = reduction_names[i], + output = "matrix" + ) + } - ## modality 2 - kNN_2 <- getNearestNetwork(gobject, - spat_unit = spat_unit, - feat_type = modality_2, - nn_type = "kNN" - ) - kNN_2 <- slot(kNN_2, "igraph") - - pca_2 <- getDimReduction(gobject, - spat_unit = "cell", - feat_type = modality_2, - reduction = "cells", - reduction_method = "pca", - name = pca_name_modality_2 - ) - pca_2 <- slot(pca_2, "coordinates") ## get cell names - cell_names <- unique(igraph::get.edgelist(kNN_1)[, 1]) + cell_names <- GiottoClass:::get_cell_id(gobject, + spat_unit = spat_unit + ) ######################## distances calculation ############################ - message(paste("Calculating", modality_1, "-", modality_1, "distance")) + cell_distances <- list() - ### distances modality1 modality1 - cell_distances_1_1 <- list() - - for (cell_a in cell_names) { - my_kNN <- kNN_1[[cell_a]][[cell_a]] - - cell_distances_1_1[[cell_a]] <- rep(0, k) - names(cell_distances_1_1[[cell_a]]) <- names(my_kNN) - - for (cell_i in names(my_kNN)) { - dimensions_cell_a_i <- pca_1[c(cell_a, cell_i), ] - cell_distances_1_1[[cell_a]][cell_i] <- sqrt(sum(( - dimensions_cell_a_i[1, ] - dimensions_cell_a_i[2, ])^2)) - } - } + for (i in seq(length(feat_types))) { + feat_type <- feat_types[i] - ### distances modality2 modality2 + cell_distances[[feat_type]] <- list() - message(paste("Calculating", modality_2, "-", modality_2, "distance")) + message("Calculating ", feat_type, "-", feat_type, " distance") - cell_distances_2_2 <- list() + cell_distances[[feat_type]][[feat_type]] <- list() - for (cell_a in cell_names) { - my_kNN <- kNN_2[[cell_a]][[cell_a]] + for (cell_a in cell_names) { + my_kNN <- kNN_list[[feat_type]][[cell_a]][[cell_a]] - cell_distances_2_2[[cell_a]] <- rep(0, k) - names(cell_distances_2_2[[cell_a]]) <- names(my_kNN) + cell_distances[[feat_type]][[feat_type]][[cell_a]] <- rep(0, k) + names(cell_distances[[feat_type]][[feat_type]][[cell_a]]) <- names(my_kNN) - for (cell_i in names(my_kNN)) { - dimensions_cell_a_i <- pca_2[c(cell_a, cell_i), ] - cell_distances_2_2[[cell_a]][cell_i] <- sqrt(sum(( - dimensions_cell_a_i[1, ] - dimensions_cell_a_i[2, ])^2)) + for (cell_i in names(my_kNN)) { + dimensions_cell_a_i <- pca_list[[feat_type]][c(cell_a, cell_i), ] + cell_distances[[feat_type]][[feat_type]][[cell_a]][cell_i] <- sqrt(sum(( + dimensions_cell_a_i[1, ] - dimensions_cell_a_i[2, ])^2)) + } } } ########################### all cell-cell distances ######################## - ## modality1 modality1 + all_cell_distances <- list() - if (verbose) { - message(paste( - "Calculating low dimensional cell-cell distances for", - modality_1 - )) - } + for (i in seq(length(feat_types))) { + feat_type <- feat_types[i] - all_cell_distances_1_1 <- dist(pca_1) - all_cell_distances_1_1 <- as.matrix(all_cell_distances_1_1) + all_cell_distances[[feat_type]] <- list() - ## modality2 modality2 + if (verbose) { + message(paste( + "Calculating low dimensional cell-cell distances for", + feat_type + )) + } - if (verbose) { - message(paste( - "Calculating low dimensional cell-cell distances for", - modality_2 - )) + pca_distances <- dist(pca_list[[feat_type]]) + all_cell_distances[[feat_type]][[feat_type]] <- as.matrix(pca_distances) } - - all_cell_distances_2_2 <- dist(pca_2) - all_cell_distances_2_2 <- as.matrix(all_cell_distances_2_2) - - ######################## within-modality prediction ######################## if (verbose) message("Calculating within-modality prediction") - ### predicted modality1 modality1 - predicted_1_1 <- list() + ### predicted feat_type1 feat_type1 + predicted_values <- list() - for (cell_a in cell_names) { - dimensions_cell_a <- pca_1[kNN_1[[cell_a]][[cell_a]], ] - - predicted_1_1[[cell_a]] <- colSums(dimensions_cell_a) / k - } + for (i in seq(length(feat_types))) { + feat_type <- feat_types[i] - ### predicted modality2 modality2 - predicted_2_2 <- list() + predicted_values[[feat_type]] <- list() + predicted_values[[feat_type]][[feat_type]] <- list() - for (cell_a in cell_names) { - dimensions_cell_a <- pca_2[kNN_2[[cell_a]][[cell_a]], ] + for (cell_a in cell_names) { + dimensions_cell_a <- pca_list[[feat_type]][kNN_list[[feat_type]][[cell_a]][[cell_a]], ] - predicted_2_2[[cell_a]] <- colSums(dimensions_cell_a) / k + predicted_values[[feat_type]][[feat_type]][[cell_a]] <- colSums(dimensions_cell_a) / k + } } + ######################## cross-modality prediction ######################### if (verbose) message("Calculating cross-modality prediction") - ## predicted modality1 modality2 - predicted_1_2 <- list() + ## predicted feat_type1 modality2 + feat_type1 <- feat_types[1] + feat_type2 <- feat_types[2] + + predicted_values[[feat_type1]][[feat_type2]] <- list() for (cell_a in cell_names) { - dimensions_cell_a <- pca_1[kNN_2[[cell_a]][[cell_a]], ] + dimensions_cell_a <- pca_list[[feat_type1]][kNN_list[[feat_type2]][[cell_a]][[cell_a]], ] - predicted_1_2[[cell_a]] <- colSums(dimensions_cell_a) / k + predicted_values[[feat_type1]][[feat_type2]][[cell_a]] <- colSums(dimensions_cell_a) / k } - ## predicted modality2 modality1 - predicted_2_1 <- list() + ## predicted modality2 feat_type1 + predicted_values[[feat_type2]][[feat_type1]] <- list() for (cell_a in cell_names) { - dimensions_cell_a <- pca_2[kNN_1[[cell_a]][[cell_a]], ] + dimensions_cell_a <- pca_list[[feat_type2]][kNN_list[[feat_type1]][[cell_a]][[cell_a]], ] - predicted_2_1[[cell_a]] <- colSums(dimensions_cell_a) / k + predicted_values[[feat_type2]][[feat_type1]][[cell_a]] <- colSums(dimensions_cell_a) / k } ###################### calculate jaccard similarities ###################### if (verbose) message("Calculating Jaccard similarities") - ## modality1 modality1 - sNN_1 <- createNearestNetwork(gobject, - spat_unit = "cell", - feat_type = modality_1, - type = "sNN", - dim_reduction_to_use = "pca", - dim_reduction_name = pca_name_modality_1, - dimensions_to_use = 1:100, - return_gobject = FALSE, - minimum_shared = 1, - k = 20 - ) - - sNN_1 <- igraph::as_data_frame(sNN_1) - - ## modality2 modality2 - - sNN_2 <- createNearestNetwork(gobject, - spat_unit = "cell", - feat_type = modality_2, - type = "sNN", - dim_reduction_to_use = "pca", - dim_reduction_name = pca_name_modality_2, - dimensions_to_use = 1:100, - return_gobject = FALSE, - minimum_shared = 1, - k = 20 - ) + sNN_list <- list() + + for (i in seq(length(feat_types))) { + feat_type <- feat_types[i] + + sNN_result <- createNearestNetwork(gobject, + spat_unit = "cell", + feat_type = feat_type, + type = "sNN", + dim_reduction_to_use = reduction_methods[i], + dim_reduction_name = reduction_names[i], + dimensions_to_use = 1:100, + return_gobject = FALSE, + minimum_shared = 1, + k = 20 + ) - sNN_2 <- igraph::as_data_frame(sNN_2) + sNN_list[[feat_type]] <- igraph::as_data_frame(sNN_result) + } if (verbose) message("Calculating kernel bandwidths") # cell-specific kernel bandwidth. + sigma_i <- list() - ## modality1 - modality1_sigma_i <- numeric() + for (i in seq(length(feat_types))) { + feat_type <- feat_types[i] - for (cell_a in cell_names) { - ### 20 small jaccard values - jaccard_values <- sNN_1[sNN_1$from == cell_a, ] - - if (nrow(jaccard_values == 20)) { - further_cell_cell_distances <- all_cell_distances_1_1[ - cell_a, jaccard_values$to - ] - } else { - further_cell_cell_distances <- tail(sort(all_cell_distances_1_1[ - cell_a, - ]), 20) - } + sigma_i[[feat_type]] <- numeric() - modality1_sigma_i[cell_a] <- mean(further_cell_cell_distances) - # cell-specific kernel bandwidth. - } - ## modality2 + for (cell_a in cell_names) { + ### 20 small jaccard values + jaccard_values <- sNN_list[[feat_type]][sNN_list[[feat_type]]$from == cell_a, ] - modality2_sigma_i <- numeric() + if (nrow(jaccard_values == 20)) { + further_cell_cell_distances <- all_cell_distances[[feat_type]][[feat_type]][ + cell_a, jaccard_values$to + ] + } else { + further_cell_cell_distances <- tail(sort(all_cell_distances[[feat_type]][[feat_type]][ + cell_a, + ]), 20) + } - for (cell_a in cell_names) { - ### 20 small jaccard values - jaccard_values <- sNN_2[sNN_2$from == cell_a, ] - - if (nrow(jaccard_values == 20)) { - further_cell_cell_distances <- all_cell_distances_2_2[ - cell_a, jaccard_values$to - ] - } else { - further_cell_cell_distances <- tail(sort(all_cell_distances_2_2[ - cell_a, - ]), 20) + sigma_i[[feat_type]][cell_a] <- mean(further_cell_cell_distances) + # cell-specific kernel bandwidth. } - - modality2_sigma_i[cell_a] <- mean(further_cell_cell_distances) - # cell-specific kernel bandwidth. } - ###################### cell-specific modality weights ###################### if (verbose) message("Calculating modality weights") - ## modality1 modality1 - theta_1_1 <- list() + ## feat_type1 feat_type1 + theta_list <- list() - for (cell_a in cell_names) { - modality1_i <- pca_1[cell_a, ] # profile of current cell - d_modality1_i_modality2_predicted <- sqrt(sum(( - modality1_i - predicted_1_1[[cell_a]])^2)) + for (i in seq(length(feat_types))) { + feat_type <- feat_types[i] - first_knn <- names(sort(cell_distances_1_1[[cell_a]]))[1] - modality1_knn1 <- pca_1[first_knn, ] # profile of the nearest neighbor - d_modality1_i_modality1_knn1 <- sqrt(sum(( - modality1_i - modality1_knn1)^2)) + theta_list[[feat_type]] <- list() + theta_list[[feat_type]][[feat_type]] <- list() - difference_distances <- d_modality1_i_modality2_predicted - - d_modality1_i_modality1_knn1 - max_value <- max(c(difference_distances, 0)) + for (cell_a in cell_names) { + feat_type1_i <- pca_list[[feat_type]][cell_a, ] # profile of current cell - theta_1_1[[cell_a]] <- exp(( - -max_value) / (modality1_sigma_i[cell_a] - - d_modality1_i_modality1_knn1)) - } + d_feat_type1_i_modality2_predicted <- sqrt(sum(( + feat_type1_i - predicted_values[[feat_type]][[feat_type]][[cell_a]])^2)) - ## modality2 modality2 - theta_modality2_modality2 <- list() + first_knn <- names(sort(cell_distances[[feat_type]][[feat_type]][[cell_a]]))[1] - for (cell_a in cell_names) { - modality2_i <- pca_2[cell_a, ] # profile of current cell - d_modality2_i_modality2_predicted <- sqrt(sum(( - modality2_i - predicted_2_2[[cell_a]])^2)) + feat_type1_knn1 <- pca_list[[feat_type]][first_knn, ] # profile of the nearest neighbor - first_knn <- names(sort(cell_distances_2_2[[cell_a]]))[1] - modality2_knn1 <- pca_2[first_knn, ] # profile of the nearest neighbor - d_modality2_i_modality2_knn1 <- sqrt(sum(( - modality2_i - modality2_knn1)^2)) + d_feat_type1_i_feat_type1_knn1 <- sqrt(sum(( + feat_type1_i - feat_type1_knn1)^2)) - difference_distances <- d_modality2_i_modality2_predicted - - d_modality2_i_modality2_knn1 - max_value <- max(c(difference_distances, 0)) + difference_distances <- d_feat_type1_i_modality2_predicted - + d_feat_type1_i_feat_type1_knn1 + max_value <- max(c(difference_distances, 0)) - theta_modality2_modality2[[cell_a]] <- exp(( - -max_value) / (modality2_sigma_i[cell_a] - - d_modality2_i_modality2_knn1)) + theta_list[[feat_type]][[feat_type]][[cell_a]] <- exp(( + -max_value) / (sigma_i[[feat_type]][cell_a] - + d_feat_type1_i_feat_type1_knn1)) + } } - ## modality1 modality2 - theta_modality1_modality2 <- list() + ## feat_type1 modality2 + feat_type1 <- feat_types[1] + feat_type2 <- feat_types[2] + + theta_list[[feat_type1]][[feat_type2]] <- list() for (cell_a in cell_names) { - modality1_i <- pca_1[cell_a, ] # profile of current cell - d_modality1_i_modality2_predicted <- sqrt(sum(( - modality1_i - predicted_1_2[[cell_a]])^2)) + feat_type1_i <- pca_list[[feat_type1]][cell_a, ] # profile of current cell + d_feat_type1_i_modality2_predicted <- sqrt(sum(( + feat_type1_i - predicted_values[[feat_type1]][[feat_type2]][[cell_a]])^2)) + first_knn <- names(sort(cell_distances[[feat_type1]][[feat_type1]][[cell_a]]))[1] - first_knn <- names(sort(cell_distances_1_1[[cell_a]]))[1] - modality1_knn1 <- pca_1[first_knn, ] # profile of the nearest neighbor - d_modality1_i_modality1_knn1 <- sqrt(sum(( - modality1_i - modality1_knn1)^2)) + feat_type1_knn1 <- pca_list[[feat_type1]][first_knn, ] # profile of the nearest neighbor + d_feat_type1_i_feat_type1_knn1 <- sqrt(sum(( + feat_type1_i - feat_type1_knn1)^2)) - difference_distances <- d_modality1_i_modality2_predicted - - d_modality1_i_modality1_knn1 + difference_distances <- d_feat_type1_i_modality2_predicted - + d_feat_type1_i_feat_type1_knn1 max_value <- max(c(difference_distances, 0)) - theta_modality1_modality2[[cell_a]] <- exp(( - -max_value) / (modality1_sigma_i[cell_a] - - d_modality1_i_modality1_knn1)) + theta_list[[feat_type1]][[feat_type2]][[cell_a]] <- exp(( + -max_value) / (sigma_i[[feat_type1]][cell_a] - + d_feat_type1_i_feat_type1_knn1)) } - ## modality2 modality1 - theta_modality2_modality1 <- list() + ## modality2 feat_type1 + theta_list[[feat_type2]][[feat_type1]] <- list() for (cell_a in cell_names) { - modality2_i <- pca_2[cell_a, ] # profile of current cell - d_modality2_i_modality1_predicted <- sqrt(sum(( - modality2_i - predicted_2_1[[cell_a]])^2)) + modality2_i <- pca_list[[feat_type2]][cell_a, ] # profile of current cell + d_modality2_i_feat_type1_predicted <- sqrt(sum(( + modality2_i - predicted_values[[feat_type2]][[feat_type1]][[cell_a]])^2)) + first_knn <- names(sort(cell_distances[[feat_type2]][[feat_type2]][[cell_a]]))[1] - first_knn <- names(sort(cell_distances_2_2[[cell_a]]))[1] - modality2_knn1 <- pca_2[first_knn, ] # profile of the nearest neighbor + modality2_knn1 <- pca_list[[feat_type2]][first_knn, ] # profile of the nearest neighbor d_modality2_i_modality2_knn1 <- sqrt(sum(( modality2_i - modality2_knn1)^2)) - difference_distances <- d_modality2_i_modality1_predicted - + difference_distances <- d_modality2_i_feat_type1_predicted - d_modality2_i_modality2_knn1 max_value <- max(c(difference_distances, 0)) - theta_modality2_modality1[[cell_a]] <- exp(( - -max_value) / (modality2_sigma_i[cell_a] - + theta_list[[feat_type2]][[feat_type1]][[cell_a]] <- exp(( + -max_value) / (sigma_i[[feat_type2]][cell_a] - d_modality2_i_modality2_knn1)) } @@ -377,21 +306,22 @@ runWNN <- function( epsilon <- 10^-4 - ## modality1 - ratio_modality1 <- list() + ratio_list <- list() + + ## feat_type1 + ratio_list[[feat_type1]] <- list() for (cell_a in cell_names) { - ratio_modality1[[cell_a]] <- theta_1_1[[cell_a]] / - (theta_modality1_modality2[[cell_a]] + epsilon) + ratio_list[[feat_type1]][[cell_a]] <- theta_list[[feat_type1]][[feat_type1]][[cell_a]] / + (theta_list[[feat_type1]][[feat_type2]][[cell_a]] + epsilon) } - ## modality2 - ratio_modality2 <- list() + ratio_list[[feat_type2]] <- list() for (cell_a in cell_names) { - ratio_modality2[[cell_a]] <- theta_modality2_modality2[[cell_a]] / - (theta_modality2_modality1[[cell_a]] + epsilon) + ratio_list[[feat_type2]][[cell_a]] <- theta_list[[feat_type2]][[feat_type2]][[cell_a]] / + (theta_list[[feat_type2]][[feat_type1]][[cell_a]] + epsilon) } @@ -399,23 +329,20 @@ runWNN <- function( if (verbose) message("Calculating WNN normalization") - w_modality1 <- rep(0, length(cell_names)) - names(w_modality1) <- cell_names + w_list <- list() - for (cell_a in cell_names) { - w_modality1[cell_a] <- exp(ratio_modality1[[cell_a]]) / - (exp(ratio_modality1[[cell_a]]) + exp(ratio_modality2[[cell_a]])) - } + for (i in seq(length(feat_types))) { + feat_type <- feat_types[i] - w_modality2 <- rep(0, length(cell_names)) - names(w_modality2) <- cell_names + w_list[[feat_type]] <- rep(0, length(cell_names)) + names(w_list[[feat_type]]) <- cell_names - for (cell_a in cell_names) { - w_modality2[cell_a] <- exp(ratio_modality2[[cell_a]]) / - (exp(ratio_modality1[[cell_a]]) + exp(ratio_modality2[[cell_a]])) + for (cell_a in cell_names) { + w_list[[feat_type]][cell_a] <- exp(ratio_list[[feat_type]][[cell_a]]) / + (exp(ratio_list[[feat_type1]][[cell_a]]) + exp(ratio_list[[feat_type2]][[cell_a]])) + } } - ######################### Calculating a WNN graph ########################## if (verbose) message("Calculating WNN graph") @@ -430,18 +357,20 @@ runWNN <- function( kernelpower <- 1 - ## theta_modality1 + ## theta_feat_type1 + + theta_cella_cellb <- list() - theta_modality1_cella_cellb <- exp(-1 * (all_cell_distances_1_1 / - modality1_sigma_i)**kernelpower) + for (i in seq(length(feat_types))) { + feat_type <- feat_types[i] - ## theta_modality2 - theta_modality2_cella_cellb <- exp(-1 * (all_cell_distances_2_2 / - modality2_sigma_i)**kernelpower) + theta_cella_cellb[[feat_type]] <- exp(-1 * (all_cell_distances[[feat_type]][[feat_type]] / + sigma_i[[feat_type]])**kernelpower) + } ## theta_weighted - theta_weighted <- w_modality1 * theta_modality1_cella_cellb + - w_modality2 * theta_modality2_cella_cellb + theta_weighted <- w_list[[feat_type1]] * theta_cella_cellb[[feat_type1]] + + w_list[[feat_type2]] * theta_cella_cellb[[feat_type2]] # save theta_weighted @@ -451,14 +380,14 @@ runWNN <- function( ## set integrated feat_type and result name if not provided if (is.null(integrated_feat_type)) { - integrated_feat_type <- paste0(modality_1, "_", modality_2) + integrated_feat_type <- paste0(feat_type1, "_", feat_type2) } if (is.null(matrix_result_name)) { matrix_result_name <- "theta_weighted_matrix" } - gobject <- set_multiomics( + gobject <- setMultiomics( gobject = gobject, result = theta_weighted, spat_unit = spat_unit, @@ -468,39 +397,25 @@ runWNN <- function( verbose = TRUE ) + # save feat_types weight + for (i in seq(length(feat_types))) { + feat_type <- feat_types[i] - # save modalities weight - - ## modality 1 - if (is.null(w_name_modality_1)) { - w_name_modality_1 <- paste0("w_", modality_1) - } - - gobject <- set_multiomics( - gobject = gobject, - result = w_modality1, - spat_unit = spat_unit, - feat_type = integrated_feat_type, - integration_method = "WNN", - result_name = w_name_modality_1, - verbose = TRUE - ) + if (is.null(w_names[i])) { + w_names[i] <- paste0("w_", feat_type) + } - ## modality 2 - if (is.null(w_name_modality_2)) { - w_name_modality_2 <- paste0("w_", modality_2) + gobject <- setMultiomics( + gobject = gobject, + result = w_list[feat_type], + spat_unit = spat_unit, + feat_type = integrated_feat_type, + integration_method = "WNN", + result_name = w_names[i], + verbose = TRUE + ) } - gobject <- set_multiomics( - gobject = gobject, - result = w_modality2, - spat_unit = spat_unit, - feat_type = integrated_feat_type, - integration_method = "WNN", - result_name = w_name_modality_2, - verbose = TRUE - ) - return(gobject) } @@ -509,8 +424,7 @@ runWNN <- function( #' #' @param gobject A giotto object #' @param spat_unit spatial unit -#' @param modality1 modality 1 name. Default = "rna" -#' @param modality2 modality 2 name. Default = "protein" +#' @param feat_types feature types to integrate. Default = c("rna", "protein") #' @param k k number #' @param spread UMAP param: spread #' @param min_dist UMAP param: min_dist @@ -522,24 +436,34 @@ runWNN <- function( #' #' @returns A Giotto object with integrated UMAP #' @export -runIntegratedUMAP <- function( - gobject, - spat_unit = "cell", - modality1 = "rna", - modality2 = "protein", - integrated_feat_type = NULL, - integration_method = "WNN", - matrix_result_name = "theta_weighted_matrix", - k = 20, - spread = 5, - min_dist = 0.01, - force = FALSE, - ...) { +runIntegratedUMAP <- function(gobject, + spat_unit = "cell", + feat_types = c("rna", "protein"), + integrated_feat_type = NULL, + integration_method = "WNN", + matrix_result_name = "theta_weighted_matrix", + k = 20, + spread = 5, + min_dist = 0.01, + force = FALSE, + ...) { + # validate feat_types + for (feat_type in feat_types) { + if (!feat_type %in% names( + slot(gobject, "dimension_reduction")$cells[[spat_unit]] + )) { + stop(paste(feat_type, " and their dimension reductions must exist in the Giotto object")) + } + } + + feat_type1 <- feat_types[1] + feat_type2 <- feat_types[2] + if (is.null(integrated_feat_type)) { - integrated_feat_type <- paste0(modality1, "_", modality2) + integrated_feat_type <- paste0(feat_type1, "_", feat_type2) } - theta_weighted <- get_multiomics(gobject, + theta_weighted <- getMultiomics(gobject, spat_unit = spat_unit, feat_type = integrated_feat_type, integration_method = integration_method, @@ -549,7 +473,7 @@ runIntegratedUMAP <- function( theta_weighted[is.na(theta_weighted)] <- 0 if (is.null(gobject@nn_network[[spat_unit]][[ - modality1 + feat_type1 ]]$kNN$integrated_kNN) || force == TRUE) { ################# Calculate integrated Nearest Neighbors ############### @@ -591,20 +515,13 @@ runIntegratedUMAP <- function( nn_type = "kNN", igraph = nn_network_igraph, spat_unit = spat_unit, - feat_type = modality1 + feat_type = feat_type1 ) - gobject <- set_NearestNetwork( - gobject = gobject, - nn_network = nnNetObj, - spat_unit = spat_unit, - feat_type = modality1, - nn_network_to_use = "kNN", - network_name = "integrated_kNN" - ) + gobject <- setGiotto(gobject, nnNetObj) ## store nn_network id - gobject <- set_multiomics( + gobject <- setMultiomics( gobject = gobject, result = nn_network$id, spat_unit = spat_unit, @@ -615,7 +532,7 @@ runIntegratedUMAP <- function( ) ## store nn_network dist - gobject <- set_multiomics( + gobject <- setMultiomics( gobject = gobject, result = nn_network$dist, spat_unit = spat_unit, @@ -662,27 +579,24 @@ runIntegratedUMAP <- function( colnames(integrated_umap) <- c("Dim.1", "Dim.2") ## add umap - gobject@dimension_reduction$cells[[spat_unit]][[modality1]][["umap"]][[ - "integrated.umap" - ]] <- list( - name = "integrated.umap", - feat_type = modality1, - spat_unit = spat_unit, - reduction_method = "umap", - coordinates = integrated_umap, - misc = NULL - ) + for (feat_type in feat_types) { + umap_object <- createDimObj( + coordinates = integrated_umap, + spat_unit = spat_unit, + feat_type = feat_type, + method = "umap", + name = "integrated.umap" + ) - gobject@dimension_reduction$cells[[spat_unit]][[modality2]][["umap"]][[ - "integrated.umap" - ]] <- list( - name = "integrated.umap", - feat_type = modality2, - spat_unit = spat_unit, - reduction_method = "umap", - coordinates = integrated_umap, - misc = NULL - ) + gobject <- setDimReduction( + gobject = gobject, + x = umap_object, + spat_unit = spat_unit, + feat_type = feat_type, + reduction_method = "umap", + name = "integrated.umap" + ) + } return(gobject) } diff --git a/R/zzz.R b/R/zzz.R index 9cfa6b5af..3c024dc1c 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -3,7 +3,7 @@ .onAttach <- function(libname, pkgname) { ## print version number ## - packageStartupMessage("Giotto Suite ", utils::packageVersion("Giotto")) + packageStartupMessage("Giotto Suite ", packageVersion("Giotto")) check_ver <- getOption("giotto.check_version", TRUE) if (isTRUE(check_ver)) { diff --git a/inst/python/python_ontrac.py b/inst/python/python_ontrac.py new file mode 100644 index 000000000..64430a56d --- /dev/null +++ b/inst/python/python_ontrac.py @@ -0,0 +1,146 @@ +from optparse import Values + +import pandas as pd +from ONTraC.integrate import run_ontrac +from ONTraC.utils import write_version_info + + +def prepare_ontrac_options(preprocessing_dir: str, + GNN_dir: str, + NTScore_dir: str, + n_cpu: int = 4, + n_neighbors: int = 50, + n_local: int = 20, + device: str = 'cpu', + epochs: int = 1000, + patience: int = 100, + min_delta: float = 0.001, + min_epochs: int = 50, + batch_size: int = 0, + seed: int = 42, + lr: float = 0.03, + hidden_feats: int = 4, + k: int = 6, + modularity_loss_weight: float = 0.3, + purity_loss_weight: float = 300.0, + regularization_loss_weight: float = 0.1, + beta: float = 0.03) -> Values: + """Prepare the options for the ONTraC program. + :param dataset: str, the dataset to be used. + :param preprocessing_dir: str, the directory of the preprocessed data. + :param GNN_dir: str, the directory of the GNN model. + :param NTScore_dir: str, the directory of the NTScore model. + :param n_cpu: int, the number of CPUs to be used in niche network construction. + :param n_neighbors: int, the number of neighbors in niche network construction. + :param n_local: int, the index of local neighbors used for normalization in niche features calculation. + :param device: str, the device to be used in GNN training. + :param epochs: int, the maximum number of epochs in GNN training. + :param patience: int, the number of epochs to wait before early stopping. + :param min_delta: float, the minimum delta to be considered as improvement in early stopping. + :param min_epochs: int, the minimum epochs to be trained. + :param batch_size: int, the batch size for each iteration in GNN training. + :param seed: int, the seed. + :param lr: float, the learning rate in GNN training. + :param hidden_feats: int, the hidden features in GNN model. + :param k: int, the number of niche clusters. + :param modularity_loss_weight: float, the modularity loss weight. + :param purity_loss_weight: float, the purity loss weight. + :param regularization_loss_weight: float, the regularization loss weight. + :param beta: float, the beta value in softmax function. + :return: Values, the options. + """ + + options = Values() + options.preprocessing_dir = preprocessing_dir + options.GNN_dir = GNN_dir + options.NTScore_dir = NTScore_dir + options.n_cpu = n_cpu + options.n_neighbors = n_neighbors + options.n_local = n_local + options.device = device + options.epochs = epochs + options.patience = patience + options.min_delta = min_delta + options.min_epochs = min_epochs + options.batch_size = batch_size + options.seed = seed + options.lr = lr + options.hidden_feats = hidden_feats + options.k = k + options.modularity_loss_weight = modularity_loss_weight + options.purity_loss_weight = purity_loss_weight + options.regularization_loss_weight = regularization_loss_weight + options.beta = beta + + return options + + +def ONTraC(ONTraC_input: pd.DataFrame, + preprocessing_dir: str, + GNN_dir: str, + NTScore_dir: str, + n_cpu: int = 4, + n_neighbors: int = 50, + n_local: int = 20, + device: str = 'cpu', + epochs: int = 1000, + patience: int = 100, + min_delta: float = 0.001, + min_epochs: int = 50, + batch_size: int = 0, + seed: int = 42, + lr: float = 0.03, + hidden_feats: int = 4, + k: int = 6, + modularity_loss_weight: float = 0.3, + purity_loss_weight: float = 300.0, + regularization_loss_weight: float = 0.1, + beta: float = 0.03) -> None: + """Run the ONTraC program. + :param ONTraC_input: pd.DataFrame, the input data. + :param dataset: str, the dataset to be used. + :param preprocessing_dir: str, the directory of the preprocessed data. + :param GNN_dir: str, the directory of the GNN model. + :param NTScore_dir: str, the directory of the NTScore model. + :param n_cpu: int, the number of CPUs to be used in niche network construction. + :param n_neighbors: int, the number of neighbors in niche network construction. + :param n_local: int, the index of local neighbors used for normalization in niche features calculation. + :param device: str, the device to be used in GNN training. + :param epochs: int, the maximum number of epochs in GNN training. + :param patience: int, the number of epochs to wait before early stopping. + :param min_delta: float, the minimum delta to be considered as improvement in early stopping. + :param min_epochs: int, the minimum epochs to be trained. + :param batch_size: int, the batch size for each iteration in GNN training. + :param seed: int, the seed. + :param lr: float, the learning rate in GNN training. + :param hidden_feats: int, the hidden features in GNN model. + :param k: int, the number of niche clusters. + :param modularity_loss_weight: float, the modularity loss weight. + :param purity_loss_weight: float, the purity loss weight. + :param regularization_loss_weight: float, the regularization loss weight. + :param beta: float, the beta value in softmax function.""" + + write_version_info() + + options = prepare_ontrac_options(preprocessing_dir=preprocessing_dir, + GNN_dir=GNN_dir, + NTScore_dir=NTScore_dir, + n_cpu=n_cpu, + n_neighbors=n_neighbors, + n_local=n_local, + device=device, + epochs=epochs, + patience=patience, + min_delta=min_delta, + min_epochs=min_epochs, + batch_size=batch_size, + seed=seed, + lr=lr, + hidden_feats=hidden_feats, + k=k, + modularity_loss_weight=modularity_loss_weight, + purity_loss_weight=purity_loss_weight, + regularization_loss_weight=regularization_loss_weight, + beta=beta) + + run_ontrac(options=options, ori_data_df=ONTraC_input) diff --git a/man/PAGEEnrich.Rd b/man/PAGEEnrich.Rd deleted file mode 100644 index 50596ada6..000000000 --- a/man/PAGEEnrich.Rd +++ /dev/null @@ -1,41 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/spatial_enrichment.R -\name{PAGEEnrich} -\alias{PAGEEnrich} -\title{PAGEEnrich} -\usage{ -PAGEEnrich(...) -} -\arguments{ -\item{...}{ - Arguments passed on to \code{\link[=runPAGEEnrich]{runPAGEEnrich}} - \describe{ - \item{\code{gobject}}{Giotto object} - \item{\code{spat_unit}}{spatial unit} - \item{\code{feat_type}}{feature type} - \item{\code{sign_matrix}}{Matrix of signature genes for each cell type / process} - \item{\code{expression_values}}{expression values to use} - \item{\code{min_overlap_genes}}{minimum number of overlapping genes in sign_matrix -required to calculate enrichment} - \item{\code{reverse_log_scale}}{reverse expression values from log scale} - \item{\code{logbase}}{log base to use if reverse_log_scale = TRUE} - \item{\code{output_enrichment}}{how to return enrichment output} - \item{\code{p_value}}{calculate p-values (boolean, default = FALSE)} - \item{\code{include_depletion}}{calculate both enrichment and depletion} - \item{\code{n_times}}{number of permutations to calculate for p_value} - \item{\code{max_block}}{number of lines to process together (default = 20e6)} - \item{\code{name}}{to give to spatial enrichment results, default = PAGE} - \item{\code{verbose}}{be verbose} - \item{\code{return_gobject}}{return giotto object} - }} -} -\value{ -gene enrichment -} -\description{ -Function to calculate gene signature enrichment scores per -spatial position using PAGE. -} -\seealso{ -\code{\link{runPAGEEnrich}} -} diff --git a/man/addCellIntMetadata.Rd b/man/addCellIntMetadata.Rd index 67181b306..58907a270 100644 --- a/man/addCellIntMetadata.Rd +++ b/man/addCellIntMetadata.Rd @@ -52,6 +52,8 @@ all other cell types found within the selected cell type column. \examples{ g <- GiottoData::loadGiottoMini("visium") -addCellIntMetadata(g, cluster_column = "leiden_clus", -cell_interaction = "custom_leiden") +addCellIntMetadata(g, + cluster_column = "leiden_clus", + cell_interaction = "custom_leiden" +) } diff --git a/man/cal_cell_niche_cluster_bin.Rd b/man/cal_cell_niche_cluster_bin.Rd new file mode 100644 index 000000000..af917ccfb --- /dev/null +++ b/man/cal_cell_niche_cluster_bin.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ONTraC_wrapper.R +\name{cal_cell_niche_cluster_bin} +\alias{cal_cell_niche_cluster_bin} +\title{cal_cell_niche_cluster_bin} +\usage{ +cal_cell_niche_cluster_bin( + gobject, + spat_unit = "cell", + feat_type = "niche cluster" +) +} +\arguments{ +\item{gobject}{giotto object} + +\item{spat_unit}{spatial unit (e.g. "cell")} + +\item{feat_type}{feature type (e.g. "rna", "dna", "protein")} +} +\value{ +gobject with binarized cell-level niche cluster assignment +} +\description{ +calculate binarized cell-level niche cluster assignment +} diff --git a/man/calculateAffineMatrixFromLandmarks.Rd b/man/calculateAffineMatrixFromLandmarks.Rd index eb1fcaf21..7edcbbd8a 100644 --- a/man/calculateAffineMatrixFromLandmarks.Rd +++ b/man/calculateAffineMatrixFromLandmarks.Rd @@ -7,13 +7,16 @@ calculateAffineMatrixFromLandmarks(source_df, target_df) } \arguments{ -\item{source_df}{source landmarks, two columns, first column represent x coordinate and second column represent y coordinate.} +\item{source_df}{source landmarks, two columns, first column represent +x coordinate and second column represent y coordinate.} -\item{target_df}{target landmarks, two columns, first column represent x coordinate and second column represent y coordinate.} +\item{target_df}{target landmarks, two columns, first column represent +x coordinate and second column represent y coordinate.} } \value{ a 3 by 3 matrix with the third row close to (0,0,1) } \description{ -calculate a affine transformation matrix from two set of landmarks +calculate a affine transformation matrix from two set of +landmarks } diff --git a/man/cellProximityBarplot.Rd b/man/cellProximityBarplot.Rd index bebc29ba1..06b577cf4 100644 --- a/man/cellProximityBarplot.Rd +++ b/man/cellProximityBarplot.Rd @@ -34,9 +34,11 @@ cellProximityBarplot( \item{save_plot}{logical. save the plot} -\item{save_param}{list of saving parameters, see \code{\link{showSaveParameters}}} +\item{save_param}{list of saving parameters, see +\code{\link{showSaveParameters}}} -\item{default_save_name}{default save name for saving, don't change, change save_name in save_param} +\item{default_save_name}{default save name for saving, don't change, +change save_name in save_param} } \value{ ggplot barplot diff --git a/man/cellProximityEnrichmentSpots.Rd b/man/cellProximityEnrichmentSpots.Rd index e618dbbc3..432921401 100644 --- a/man/cellProximityEnrichmentSpots.Rd +++ b/man/cellProximityEnrichmentSpots.Rd @@ -70,7 +70,7 @@ x <- findMarkers_one_vs_all(g, ) sign_gene <- x$feats -sign_matrix <- matrix(rnorm(length(sign_gene) * 8, mean = 10), +sign_matrix <- matrix(rnorm(length(sign_gene) * 7, mean = 10), nrow = length(sign_gene) ) rownames(sign_matrix) <- sign_gene diff --git a/man/cellProximityHeatmap.Rd b/man/cellProximityHeatmap.Rd index 7939e7d52..6b64cef90 100644 --- a/man/cellProximityHeatmap.Rd +++ b/man/cellProximityHeatmap.Rd @@ -38,9 +38,11 @@ and maximum} \item{save_plot}{logical. save the plot} -\item{save_param}{list of saving parameters, see \code{\link{showSaveParameters}}} +\item{save_param}{list of saving parameters, see +\code{\link{showSaveParameters}}} -\item{default_save_name}{default save name for saving, don't change, change save_name in save_param} +\item{default_save_name}{default save name for saving, don't change, +change save_name in save_param} } \value{ ggplot heatmap diff --git a/man/cellProximityNetwork.Rd b/man/cellProximityNetwork.Rd index 1a67385bd..6da55ef13 100644 --- a/man/cellProximityNetwork.Rd +++ b/man/cellProximityNetwork.Rd @@ -66,9 +66,11 @@ enriched edge weights} \item{save_plot}{logical. save the plot} -\item{save_param}{list of saving parameters, see \code{\link{showSaveParameters}}} +\item{save_param}{list of saving parameters, see +\code{\link{showSaveParameters}}} -\item{default_save_name}{default save name for saving, don't change, change save_name in save_param} +\item{default_save_name}{default save name for saving, don't change, +change save_name in save_param} } \value{ igraph plot diff --git a/man/cellProximitySpatPlot.Rd b/man/cellProximitySpatPlot.Rd index d6bd596a5..6c906506b 100644 --- a/man/cellProximitySpatPlot.Rd +++ b/man/cellProximitySpatPlot.Rd @@ -37,12 +37,14 @@ cellProximitySpatPlot(gobject, ...) \item{\code{show_plot}}{logical. show plot} \item{\code{return_plot}}{logical. return ggplot object} \item{\code{save_plot}}{logical. save the plot} - \item{\code{save_param}}{list of saving parameters, see \code{\link{showSaveParameters}}} - \item{\code{default_save_name}}{default save name for saving, don't change, change save_name in save_param} + \item{\code{save_param}}{list of saving parameters, see +\code{\link{showSaveParameters}}} + \item{\code{default_save_name}}{default save name for saving, don't change, +change save_name in save_param} \item{\code{cell_color}}{character. what to color cells by (e.g. metadata col or spatial enrichment col)} - \item{\code{color_as_factor}}{logical. convert color column to factor. discrete colors -are used when this is TRUE. continuous colors when FALSE.} + \item{\code{color_as_factor}}{logical. convert color column to factor. discrete +colors are used when this is TRUE. continuous colors when FALSE.} \item{\code{cell_color_code}}{character. discrete colors to use. palette to use or named vector of colors} \item{\code{spatial_network_name}}{name of spatial network to use} diff --git a/man/cellProximitySpatPlot2D.Rd b/man/cellProximitySpatPlot2D.Rd index d6fbf5549..1d05af9e5 100644 --- a/man/cellProximitySpatPlot2D.Rd +++ b/man/cellProximitySpatPlot2D.Rd @@ -63,8 +63,8 @@ spatial enrichment col)} \item{cell_color_code}{character. discrete colors to use. palette to use or named vector of colors} -\item{color_as_factor}{logical. convert color column to factor. discrete colors -are used when this is TRUE. continuous colors when FALSE.} +\item{color_as_factor}{logical. convert color column to factor. discrete +colors are used when this is TRUE. continuous colors when FALSE.} \item{show_other_cells}{decide if show cells not in network} @@ -106,9 +106,11 @@ are used when this is TRUE. continuous colors when FALSE.} \item{save_plot}{logical. save the plot} -\item{save_param}{list of saving parameters, see \code{\link{showSaveParameters}}} +\item{save_param}{list of saving parameters, see +\code{\link{showSaveParameters}}} -\item{default_save_name}{default save name for saving, don't change, change save_name in save_param} +\item{default_save_name}{default save name for saving, don't change, +change save_name in save_param} } \value{ ggplot diff --git a/man/cellProximitySpatPlot3D.Rd b/man/cellProximitySpatPlot3D.Rd index 79c56c1c2..a829a4154 100644 --- a/man/cellProximitySpatPlot3D.Rd +++ b/man/cellProximitySpatPlot3D.Rd @@ -58,8 +58,8 @@ spatial enrichment col)} \item{cell_color_code}{character. discrete colors to use. palette to use or named vector of colors} -\item{color_as_factor}{logical. convert color column to factor. discrete colors -are used when this is TRUE. continuous colors when FALSE.} +\item{color_as_factor}{logical. convert color column to factor. discrete +colors are used when this is TRUE. continuous colors when FALSE.} \item{show_other_cells}{decide if show cells not in network} @@ -101,9 +101,11 @@ are used when this is TRUE. continuous colors when FALSE.} \item{save_plot}{logical. save the plot} -\item{save_param}{list of saving parameters, see \code{\link{showSaveParameters}}} +\item{save_param}{list of saving parameters, see +\code{\link{showSaveParameters}}} -\item{default_save_name}{default save name for saving, don't change, change save_name in save_param} +\item{default_save_name}{default save name for saving, don't change, +change save_name in save_param} \item{\dots}{additional parameters} } diff --git a/man/combCCcom.Rd b/man/combCCcom.Rd index f31d5799b..78b00355a 100644 --- a/man/combCCcom.Rd +++ b/man/combCCcom.Rd @@ -49,7 +49,8 @@ exprCC <- exprCellCellcom(g, feat_set_2 = "9630013A20Rik" ) -spatialCC <- spatCellCellcom(gobject = g, +spatialCC <- spatCellCellcom( + gobject = g, cluster_column = "leiden_clus", feat_set_1 = "Gm19935", feat_set_2 = "9630013A20Rik", diff --git a/man/createArchRProj.Rd b/man/createArchRProj.Rd index 502a4aa49..70a312647 100644 --- a/man/createArchRProj.Rd +++ b/man/createArchRProj.Rd @@ -24,7 +24,8 @@ These files can be in one of the following formats: (i) scATAC tabix files, (ii) fragment files, or (iii) bam files.} \item{genome}{A string indicating the default genome to be used for all ArchR -functions. Currently supported values include "hg19","hg38","mm9", and "mm10". +functions. Currently supported values include "hg19","hg38","mm9", and +"mm10". This value is stored as a global environment variable, not part of the ArchRProject. This can be overwritten on a per-function basis using the given function's diff --git a/man/createGiottoCosMxObject.Rd b/man/createGiottoCosMxObject.Rd index 24be2efaf..6b0a98861 100644 --- a/man/createGiottoCosMxObject.Rd +++ b/man/createGiottoCosMxObject.Rd @@ -64,7 +64,8 @@ function matches against: \item{experimentname_\strong{tx_file}.csv (file)} } -[\strong{Workflows}] Workflow to use is accessed through the data_to_use param +[\strong{Workflows}] Workflow to use is accessed through the data_to_use +param \itemize{ \item{'all' - loads and requires subcellular information from tx_file and fov_positions_file diff --git a/man/createGiottoMerscopeObject.Rd b/man/createGiottoMerscopeObject.Rd index 0daf8027f..269988444 100644 --- a/man/createGiottoMerscopeObject.Rd +++ b/man/createGiottoMerscopeObject.Rd @@ -10,7 +10,7 @@ createGiottoMerscopeObject( merscope_dir, data_to_use = c("subcellular", "aggregate"), FOVs = NULL, - poly_z_indices = 1:7, + poly_z_indices = seq(from = 1, to = 7), calculate_overlap = TRUE, overlap_to_matrix = TRUE, aggregate_stack = TRUE, @@ -76,7 +76,8 @@ within the directory where the \strong{bolded} portions are what this function matches against: \itemize{ \item{\strong{cell_boundaries} (folder .hdf5 files)} - \item{\strong{images} (folder of .tif images and a scalefactor/transfrom table)} + \item{\strong{images} (folder of .tif images and a + scalefactor/transfrom table)} \item{\strong{cell_by_gene}.csv (file)} \item{cell_metadata\strong{fov_positions_file}.csv (file)} \item{detected_transcripts\strong{metadata_file}.csv (file)} diff --git a/man/createGiottoVisiumHDObject.Rd b/man/createGiottoVisiumHDObject.Rd new file mode 100644 index 000000000..a903e504b --- /dev/null +++ b/man/createGiottoVisiumHDObject.Rd @@ -0,0 +1,46 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/convenience_visiumHD.R +\name{createGiottoVisiumHDObject} +\alias{createGiottoVisiumHDObject} +\title{Create 10x VisiumHD Giotto Object} +\usage{ +createGiottoVisiumHDObject( + visiumHD_dir = NULL, + expr_data = c("raw", "filter"), + gene_column_index = 1, + instructions = NULL, + expression_matrix_class = c("dgCMatrix", "DelayedArray"), + cores = NA, + verbose = FALSE +) +} +\arguments{ +\item{expr_data}{raw or filtered data (see details)} + +\item{gene_column_index}{which column index to select (see details)} + +\item{expression_matrix_class}{class of expression matrix to use +(e.g. "dgCMatrix", "DelayedArray")} + +\item{cores}{how many cores or threads to use to read data if paths are +provided} + +\item{verbose}{be verbose} + +\item{VisiumHD_dir}{full path to the exported visiumHD directory} +} +\value{ +giotto object +} +\description{ +Given the path to a VisiumHD output folder, creates a +Giotto object +} +\details{ +\itemize{ + \item{expr_data: raw will take expression data from raw_feature_bc_matrix + and filter from filtered_feature_bc_matrix} + \item{gene_column_index: which gene identifiers (names) to use if there + are multiple columns (e.g. ensemble and gene symbol)} +} +} diff --git a/man/createGiottoVisiumObject.Rd b/man/createGiottoVisiumObject.Rd index ec6db99e6..af34c70be 100644 --- a/man/createGiottoVisiumObject.Rd +++ b/man/createGiottoVisiumObject.Rd @@ -40,7 +40,7 @@ createGiottoVisiumObject( \item{h5_tissue_positions_path}{path to tissue locations (.csv file)} \item{h5_image_png_path}{path to tissue .png file (optional). Image -autoscaling looks for matches in the filename for either 'hires' or 'lowres'} +autoscaling looks for matches in the filename for either "hires" or "lowres"} \item{h5_json_scalefactors_path}{path to .json scalefactors (optional)} @@ -60,7 +60,7 @@ autoscaling looks for matches in the filename for either 'hires' or 'lowres'} \code{\link[GiottoClass]{createGiottoInstructions}}} \item{expression_matrix_class}{class of expression matrix to use -(e.g. 'dgCMatrix', 'DelayedArray')} +(e.g. "dgCMatrix", "DelayedArray")} \item{h5_file}{optional path to create an on-disk h5 file} @@ -79,17 +79,24 @@ accepts visium H5 outputs. \details{ If starting from a Visium 10X directory: \itemize{ - \item{expr_data: raw will take expression data from raw_feature_bc_matrix and filter from filtered_feature_bc_matrix} - \item{gene_column_index: which gene identifiers (names) to use if there are multiple columns (e.g. ensemble and gene symbol)} - \item{png_name: by default the first png will be selected, provide the png name to override this (e.g. myimage.png)} - \item{the file scalefactors_json.json will be detected automatically and used to attempt to align the data} + \item{expr_data: raw will take expression data from + raw_feature_bc_matrix and filter from filtered_feature_bc_matrix} + \item{gene_column_index: which gene identifiers (names) to use if there + are multiple columns (e.g. ensemble and gene symbol)} + \item{png_name: by default the first png will be selected, provide the png + name to override this (e.g. myimage.png)} + \item{the file scalefactors_json.json will be detected automatically and + used to attempt to align the data} } If starting from a Visium 10X .h5 file \itemize{ \item{h5_visium_path: full path to .h5 file: /your/path/to/visium_file.h5} - \item{h5_tissue_positions_path: full path to spatial locations file: /you/path/to/tissue_positions_list.csv} - \item{h5_image_png_path: full path to png: /your/path/to/images/tissue_lowres_image.png} - \item{h5_json_scalefactors_path: full path to .json file: /your/path/to/scalefactors_json.json} + \item{h5_tissue_positions_path: full path to spatial locations file: + /you/path/to/tissue_positions_list.csv} + \item{h5_image_png_path: full path to png: + /your/path/to/images/tissue_lowres_image.png} + \item{h5_json_scalefactors_path: full path to .json file: + /your/path/to/scalefactors_json.json} } } diff --git a/man/createGiottoXeniumObject.Rd b/man/createGiottoXeniumObject.Rd index 0d310af52..f803aece8 100644 --- a/man/createGiottoXeniumObject.Rd +++ b/man/createGiottoXeniumObject.Rd @@ -18,6 +18,7 @@ createGiottoXeniumObject( load_aligned_images = NULL, load_expression = FALSE, load_cellmeta = FALSE, + instructions = NULL, verbose = NULL ) } @@ -68,9 +69,12 @@ slower in our imaging pipeline.} \item{load_expression}{logical. Default = FALSE. Whether to load in 10X provided expression matrix.} -\item{load_cellmeta}{logical. Default = FALSE. Whether to laod in 10X +\item{load_cellmeta}{logical. Default = FALSE. Whether to load in 10X provided cell metadata information} +\item{instructions}{list of instructions or output result from +\code{\link[=createGiottoInstructions]{createGiottoInstructions()}}} + \item{verbose}{logical or NULL. NULL uses the \code{giotto.verbose} option setting and defaults to TRUE.} } diff --git a/man/createSpatialEnrich.Rd b/man/createSpatialEnrich.Rd deleted file mode 100644 index c62e820da..000000000 --- a/man/createSpatialEnrich.Rd +++ /dev/null @@ -1,46 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/spatial_enrichment.R -\name{createSpatialEnrich} -\alias{createSpatialEnrich} -\title{createSpatialEnrich} -\usage{ -createSpatialEnrich(...) -} -\arguments{ -\item{...}{ - Arguments passed on to \code{\link[=runSpatialEnrich]{runSpatialEnrich}} - \describe{ - \item{\code{gobject}}{Giotto object} - \item{\code{spat_unit}}{spatial unit} - \item{\code{feat_type}}{feature type} - \item{\code{enrich_method}}{method for gene signature enrichment calculation} - \item{\code{sign_matrix}}{Matrix of signature genes for each cell type / process} - \item{\code{expression_values}}{expression values to use} - \item{\code{reverse_log_scale}}{reverse expression values from log scale} - \item{\code{min_overlap_genes}}{minimum number of overlapping genes in sign_matrix -required to calculate enrichment (PAGE)} - \item{\code{logbase}}{log base to use if reverse_log_scale = TRUE} - \item{\code{p_value}}{calculate p-value (default = FALSE)} - \item{\code{n_times}}{(page/rank) number of permutation iterations to calculate -p-value} - \item{\code{rbp_p}}{(rank) fractional binarization threshold (default = 0.99)} - \item{\code{num_agg}}{(rank) number of top genes to aggregate (default = 100)} - \item{\code{max_block}}{number of lines to process together (default = 20e6)} - \item{\code{top_percentage}}{(hyper) percentage of cells that will be considered -to have gene expression with matrix binarization} - \item{\code{output_enrichment}}{how to return enrichment output} - \item{\code{name}}{to give to spatial enrichment results, default = PAGE} - \item{\code{verbose}}{be verbose} - \item{\code{return_gobject}}{return giotto object} - }} -} -\value{ -gene signature enrichment scores -} -\description{ -Function to calculate gene signature enrichment scores per -spatial position using an enrichment test. -} -\seealso{ -\code{\link{runSpatialEnrich}} -} diff --git a/man/create_jackstrawplot.Rd b/man/create_jackstrawplot.Rd deleted file mode 100644 index 73848fd94..000000000 --- a/man/create_jackstrawplot.Rd +++ /dev/null @@ -1,29 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/dimension_reduction.R -\name{create_jackstrawplot} -\alias{create_jackstrawplot} -\title{create_jackstrawplot} -\usage{ -create_jackstrawplot( - jackstraw_data, - ncp = 20, - ylim = c(0, 1), - threshold = 0.01 -) -} -\arguments{ -\item{jackstraw_data}{result from jackstraw function (`testresult$p`)} - -\item{ncp}{number of principal components to calculate} - -\item{ylim}{y-axis limits on jackstraw plot} - -\item{threshold}{p.value threshold to call a PC significant} -} -\value{ -ggplot -} -\description{ -create jackstrawplot with ggplot -} -\keyword{internal} diff --git a/man/crossSectionFeatPlot3D.Rd b/man/crossSectionFeatPlot3D.Rd index 8d6339194..8fed2910f 100644 --- a/man/crossSectionFeatPlot3D.Rd +++ b/man/crossSectionFeatPlot3D.Rd @@ -24,7 +24,8 @@ crossSectionFeatPlot3D( \item{feat_type}{feature type} -\item{crossSection_obj}{cross section object as alternative input. default = NULL.} +\item{crossSection_obj}{cross section object as alternative input. +default = NULL.} \item{name}{name of virtual cross section to use} diff --git a/man/data_access_params.Rd b/man/data_access_params.Rd index 02ea1e816..4a44a7274 100644 --- a/man/data_access_params.Rd +++ b/man/data_access_params.Rd @@ -10,14 +10,17 @@ \item{feat_type}{feature type (e.g. "rna", "dna", "protein")} -\item{return_uniques}{return unique nesting names (ignores if final object exists/is correct class)} +\item{return_uniques}{return unique nesting names (ignores if final object +exists/is correct class)} \item{output}{what format in which to get information (e.g. "data.table")} -\item{set_defaults}{set default spat_unit and feat_type. Change to FALSE only when +\item{set_defaults}{set default spat_unit and feat_type. Change to FALSE +only when expression and spat_info are not expected to exist.} -\item{copy_obj}{whether to deep copy/duplicate when getting the object (default = TRUE)} +\item{copy_obj}{whether to deep copy/duplicate when getting the object +(default = TRUE)} \item{initialize}{(default = FALSE) whether to initialize the gobject before returning} diff --git a/man/doCellposeSegmentation.Rd b/man/doCellposeSegmentation.Rd index 084d3148e..1bd919987 100644 --- a/man/doCellposeSegmentation.Rd +++ b/man/doCellposeSegmentation.Rd @@ -38,7 +38,8 @@ doCellposeSegmentation( ) } \arguments{ -\item{image_dir}{character, required. Provide a path to a gray scale or a three channel image.} +\item{image_dir}{character, required. Provide a path to a gray scale or a +three channel image.} \item{mask_output}{required. Provide a path to the output mask file.} @@ -46,9 +47,13 @@ doCellposeSegmentation( \item{channel_2}{channel number for Nuclei, default to 0(gray scale)} -\item{model_name}{Name of the model to run inference. Default to 'cyto3', if you want to run cutomized trained model, place your model file in ~/.cellpose/models and specify your model name.} +\item{model_name}{Name of the model to run inference. Default to 'cyto3', +if you want to run cutomized trained model, place your model file in +~/.cellpose/models and specify your model name.} -\item{batch_size}{Cellpose Parameter, Number of 224x224 patches to run simultaneously on the GPU. Can make smaller or bigger depending on GPU memory usage. Defaults to 8.} +\item{batch_size}{Cellpose Parameter, Number of 224x224 patches to run +simultaneously on the GPU. Can make smaller or bigger depending on GPU +memory usage. Defaults to 8.} \item{resample}{Cellpose Parameter} @@ -92,16 +97,21 @@ doCellposeSegmentation( \item{progress}{Cellpose Parameter} -\item{python_path}{python environment with cellpose installed. default = "giotto_cellpose".} +\item{python_path}{python environment with cellpose installed. +default = "giotto_cellpose".} } \value{ -No return variable, as this will write directly to output path provided. +No return variable, as this will write directly to output path +provided. } \description{ -perform the Giotto Wrapper of cellpose segmentation. This is for a model inference to generate segmentation mask file from input image. +perform the Giotto Wrapper of cellpose segmentation. This is for a model +inference to generate segmentation mask file from input image. main parameters needed } \examples{ # example code -doCellposeSegmentation(image_dir = input_image, mask_output = output, channel_1 = 2, channel_2 = 1, model_name = 'cyto3',batch_size=4) +doCellposeSegmentation(image_dir = input_image, +mask_output = output, channel_1 = 2, +channel_2 = 1, model_name = "cyto3", batch_size = 4) } diff --git a/man/doFeatureSetEnrichment.Rd b/man/doFeatureSetEnrichment.Rd index 470e47dbd..0c0755d89 100644 --- a/man/doFeatureSetEnrichment.Rd +++ b/man/doFeatureSetEnrichment.Rd @@ -38,9 +38,8 @@ more information} is current working directory.} \item{name_analysis_folder}{default output subdirectory prefix to which -results are saved. - Will live within output_folder; equivalent of - "Analysis Name" in GSEA Application.} +results are saved. Will live within output_folder; equivalent of +"Analysis Name" in GSEA Application.} \item{collapse}{only 'false' is supported. This will use your dataset as-is, in the original format.} diff --git a/man/doKmeans.Rd b/man/doKmeans.Rd index 3bfc8552d..730f3a0ec 100644 --- a/man/doKmeans.Rd +++ b/man/doKmeans.Rd @@ -65,15 +65,18 @@ doKmeans( \item{seed_number}{number for seed} } \value{ -if return_gobject = TRUE: giotto object with new clusters appended to cell metadata +if return_gobject = TRUE: giotto object with new clusters appended +to cell metadata } \description{ cluster cells using kmeans algorithm } \details{ -The default settings will use dimension reduction results as input. -Set dim_reduction_to_use = NULL if you want to directly use expression values as input. -By providing a feature vector to feats_to_use you can subset the expression matrix. +The default settings will use dimension reduction results as input. +Set dim_reduction_to_use = NULL if you want to directly use expression +values as input. +By providing a feature vector to feats_to_use you can subset the expression +matrix. } \examples{ g <- GiottoData::loadGiottoMini("visium") diff --git a/man/doLeidenClusterIgraph.Rd b/man/doLeidenClusterIgraph.Rd index 8eb0af54e..2ef5f9c6b 100644 --- a/man/doLeidenClusterIgraph.Rd +++ b/man/doLeidenClusterIgraph.Rd @@ -77,7 +77,8 @@ which can detect communities in graphs of millions of nodes (cells), as long as they can fit in memory. See \code{\link[igraph]{cluster_leiden}} for more information. -Set \emph{weights = NULL} to use the vertices weights associated with the igraph network. +Set \emph{weights = NULL} to use the vertices weights associated with the +igraph network. Set \emph{weights = NA} if you don't want to use vertices weights } \examples{ diff --git a/man/dot-compute_dbMatrix.Rd b/man/dot-compute_dbMatrix.Rd deleted file mode 100644 index d630ed00b..000000000 --- a/man/dot-compute_dbMatrix.Rd +++ /dev/null @@ -1,16 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/auxiliary_giotto.R -\name{.compute_dbMatrix} -\alias{.compute_dbMatrix} -\title{compute_dbMatrix} -\usage{ -.compute_dbMatrix(dbMatrix, name, verbose = TRUE) -} -\description{ -saves dbMatrix to db if global option is set -} -\details{ -Set \code{options(giotto.dbmatrix_compute = FALSE)} if saving dbMatrix -after each step of normalization workflow is not desired. -} -\keyword{internal} diff --git a/man/dot-createGiottoCosMxObject_all.Rd b/man/dot-createGiottoCosMxObject_all.Rd index 48a9caf16..7f1dc6e12 100644 --- a/man/dot-createGiottoCosMxObject_all.Rd +++ b/man/dot-createGiottoCosMxObject_all.Rd @@ -9,7 +9,7 @@ info} dir_items, FOVs, remove_background_polygon = TRUE, - background_algo = c("range"), + background_algo = "range", remove_unvalid_polygons = TRUE, cores, verbose = TRUE, @@ -49,7 +49,7 @@ Both \emph{subcellular} (subellular transcript detection information) and \emph{aggregate} (aggregated detection count matrices by cell polygon from NanoString) -data will be loaded in. The two will be separated into 'cell' and 'cell_agg' +data will be loaded in. The two will be separated into "cell" and "cell_agg" spatial units in order to denote the difference in origin of the two. } \seealso{ diff --git a/man/dot-create_feats_to_use_matrix.Rd b/man/dot-create_feats_to_use_matrix.Rd deleted file mode 100644 index 6b887d9d2..000000000 --- a/man/dot-create_feats_to_use_matrix.Rd +++ /dev/null @@ -1,35 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/dimension_reduction.R -\name{.create_feats_to_use_matrix} -\alias{.create_feats_to_use_matrix} -\title{Create features to use matrix} -\usage{ -.create_feats_to_use_matrix( - gobject, - feat_type = NULL, - spat_unit = NULL, - sel_matrix, - feats_to_use, - verbose = FALSE -) -} -\arguments{ -\item{gobject}{giotto object} - -\item{feat_type}{feature type} - -\item{spat_unit}{spatial unit} - -\item{sel_matrix}{selected expression matrix} - -\item{feats_to_use}{feats to use, character or vector of features} - -\item{verbose}{verbosity} -} -\value{ -subsetted matrix based on selected features -} -\description{ -subsets matrix based on vector of genes or hvf column -} -\keyword{internal} diff --git a/man/dot-estimate_transform_from_matched_descriptor.Rd b/man/dot-estimate_transform_from_matched_descriptor.Rd index 45c31a330..4a5cbc0ad 100644 --- a/man/dot-estimate_transform_from_matched_descriptor.Rd +++ b/man/dot-estimate_transform_from_matched_descriptor.Rd @@ -16,7 +16,8 @@ \arguments{ \item{keypoints1}{keypoints extracted from target image via .sift_detect} -\item{match}{a 2 col matrix of x to y index matched descriptors via .match_descriptor_single} +\item{match}{a 2 col matrix of x to y index matched descriptors via +.match_descriptor_single} } \value{ a list of model and inliners diff --git a/man/dot-igraph_remove_hetero_edges.Rd b/man/dot-igraph_remove_hetero_edges.Rd deleted file mode 100644 index 155d1e958..000000000 --- a/man/dot-igraph_remove_hetero_edges.Rd +++ /dev/null @@ -1,23 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/spatial_clusters.R -\name{.igraph_remove_hetero_edges} -\alias{.igraph_remove_hetero_edges} -\title{Remove hetero edges from igraph} -\usage{ -.igraph_remove_hetero_edges(g, clus_attr) -} -\arguments{ -\item{g}{igraph object} - -\item{clus_attr}{character. A categorical node attribute} -} -\value{ -igraph -} -\description{ -Given an igraph \code{g} and set of node attributes \code{clus_att} that encode -different spatial clusters, remove edges that connect non-similar nodes. -This can be used when data is already clustered, but these clusters should -be further broken up based on whether they are spatially touching. -} -\keyword{internal} diff --git a/man/dot-igraph_vertex_membership.Rd b/man/dot-igraph_vertex_membership.Rd deleted file mode 100644 index f4362ce83..000000000 --- a/man/dot-igraph_vertex_membership.Rd +++ /dev/null @@ -1,24 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/spatial_clusters.R -\name{.igraph_vertex_membership} -\alias{.igraph_vertex_membership} -\title{igraph vertex membership} -\usage{ -.igraph_vertex_membership(g, clus_name, all_ids = NULL, missing_id_name) -} -\arguments{ -\item{g}{igraph} - -\item{clus_name}{character. name to assign column of clustering info} - -\item{all_ids}{(optional) character vector with all ids} - -\item{missing_id_name}{character and name for vertices that were missing from g} -} -\value{ -data.table -} -\description{ -Get which weakly connected set of vertices each vertex is part of -} -\keyword{internal} diff --git a/man/dot-lib_norm_giotto.Rd b/man/dot-lib_norm_giotto.Rd deleted file mode 100644 index a979a47fc..000000000 --- a/man/dot-lib_norm_giotto.Rd +++ /dev/null @@ -1,20 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/auxiliary_giotto.R -\name{.lib_norm_giotto} -\alias{.lib_norm_giotto} -\title{Normalize expression matrix for library size} -\usage{ -.lib_norm_giotto(mymatrix, scalefactor) -} -\arguments{ -\item{mymatrix}{matrix object} - -\item{scalefactor}{scalefactor} -} -\value{ -matrix -} -\description{ -Normalize expression matrix for library size -} -\keyword{internal} diff --git a/man/dot-log_norm_giotto.Rd b/man/dot-log_norm_giotto.Rd deleted file mode 100644 index 94043ba4c..000000000 --- a/man/dot-log_norm_giotto.Rd +++ /dev/null @@ -1,15 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/auxiliary_giotto.R -\name{.log_norm_giotto} -\alias{.log_norm_giotto} -\title{Log normalize expression matrix} -\usage{ -.log_norm_giotto(mymatrix, base, offset) -} -\value{ -matrix -} -\description{ -Log normalize expression matrix -} -\keyword{internal} diff --git a/man/dot-mean_expr_det_test.Rd b/man/dot-mean_expr_det_test.Rd deleted file mode 100644 index c1e722e36..000000000 --- a/man/dot-mean_expr_det_test.Rd +++ /dev/null @@ -1,20 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/auxiliary_giotto.R -\name{.mean_expr_det_test} -\alias{.mean_expr_det_test} -\title{Mean expression detected test} -\usage{ -.mean_expr_det_test(mymatrix, detection_threshold = 1) -} -\arguments{ -\item{mymatrix}{matrix of expression info} - -\item{detection_threshold}{detection threshold. Defaults to 1 count.} -} -\value{ -numeric -} -\description{ -Mean expression detected test -} -\keyword{internal} diff --git a/man/dot-plot_matched_descriptors.Rd b/man/dot-plot_matched_descriptors.Rd index 4b64af54a..9a7de1c3f 100644 --- a/man/dot-plot_matched_descriptors.Rd +++ b/man/dot-plot_matched_descriptors.Rd @@ -13,11 +13,13 @@ \item{keypoints1}{keypoints extracted from target image via .sift_detect} -\item{match}{a 2 col matrix of x to y index matched descriptors via .match_descriptor_single} +\item{match}{a 2 col matrix of x to y index matched descriptors via +.match_descriptor_single} } \value{ None } \description{ -A wrapper function for the plot_matches for the SIFT feature extractor and descriptor pipeline +A wrapper function for the plot_matches for the SIFT feature extractor and +descriptor pipeline } diff --git a/man/dot-rna_osmfish_normalization.Rd b/man/dot-rna_osmfish_normalization.Rd deleted file mode 100644 index 58e3ee193..000000000 --- a/man/dot-rna_osmfish_normalization.Rd +++ /dev/null @@ -1,22 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/auxiliary_giotto.R -\name{.rna_osmfish_normalization} -\alias{.rna_osmfish_normalization} -\title{RNA osmfish normalization} -\usage{ -.rna_osmfish_normalization( - gobject, - raw_expr, - feat_type, - spat_unit, - name = "custom", - verbose = TRUE -) -} -\value{ -giotto object -} -\description{ -function for RNA normalization according to osmFISH paper -} -\keyword{internal} diff --git a/man/dot-rna_pears_resid_normalization.Rd b/man/dot-rna_pears_resid_normalization.Rd deleted file mode 100644 index 2216f57cf..000000000 --- a/man/dot-rna_pears_resid_normalization.Rd +++ /dev/null @@ -1,25 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/auxiliary_giotto.R -\name{.rna_pears_resid_normalization} -\alias{.rna_pears_resid_normalization} -\title{RNA pearson residuals normalization} -\usage{ -.rna_pears_resid_normalization( - gobject, - raw_expr, - feat_type, - spat_unit, - theta = 100, - name = "scaled", - verbose = TRUE -) -} -\value{ -giotto object -} -\description{ -function for RNA normalization according to Lause/Kobak et al -paper -Adapted from https://gist.github.com/hypercompetent/51a3c428745e1c06d826d76c3671797c#file-pearson_residuals-r -} -\keyword{internal} diff --git a/man/dot-rna_standard_normalization.Rd b/man/dot-rna_standard_normalization.Rd deleted file mode 100644 index 7f6edb891..000000000 --- a/man/dot-rna_standard_normalization.Rd +++ /dev/null @@ -1,29 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/auxiliary_giotto.R -\name{.rna_standard_normalization} -\alias{.rna_standard_normalization} -\title{RNA standard normalization} -\usage{ -.rna_standard_normalization( - gobject, - raw_expr, - feat_type, - spat_unit, - library_size_norm = TRUE, - scalefactor = 6000, - log_norm = TRUE, - log_offset = 1, - logbase = 2, - scale_feats = TRUE, - scale_cells = TRUE, - scale_order = c("first_feats", "first_cells"), - verbose = TRUE -) -} -\value{ -giotto object -} -\description{ -standard function for RNA normalization -} -\keyword{internal} diff --git a/man/dot-sift_detect.Rd b/man/dot-sift_detect.Rd index a28954503..dc2480fde 100644 --- a/man/dot-sift_detect.Rd +++ b/man/dot-sift_detect.Rd @@ -7,7 +7,8 @@ .sift_detect(x, ..., pkg_ptr) } \arguments{ -\item{x}{input matrix or preprocessed image to extract feature and descriptor from} +\item{x}{input matrix or preprocessed image to extract feature and +descriptor from} \item{...}{additional params to pass to `skimage.feature.SIFT()`} } @@ -15,5 +16,6 @@ list of keypoints and descriptors } \description{ -Perform feature detector and descriptor extractor on a matrix object or preprocessed image object +Perform feature detector and descriptor extractor on a matrix object or +preprocessed image object } diff --git a/man/dot-warp_transformed_image.Rd b/man/dot-warp_transformed_image.Rd index 662dad1c4..1c2e0756a 100644 --- a/man/dot-warp_transformed_image.Rd +++ b/man/dot-warp_transformed_image.Rd @@ -11,7 +11,8 @@ \item{y}{target image from .sift_preprocess} -\item{model}{estimated transformation object from .estimate_transform_from_matched_descriptor} +\item{model}{estimated transformation object from +.estimate_transform_from_matched_descriptor} } \value{ None, it will write to a output path diff --git a/man/enrichment_PAGE.Rd b/man/enrichment_PAGE.Rd new file mode 100644 index 000000000..933d79eac --- /dev/null +++ b/man/enrichment_PAGE.Rd @@ -0,0 +1,127 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/spatial_enrichment.R +\name{enrichment_PAGE} +\alias{enrichment_PAGE} +\alias{makeSignMatrixPAGE} +\alias{runPAGEEnrich} +\title{PAGE feature enrichment} +\usage{ +makeSignMatrixPAGE(sign_names, sign_list) + +runPAGEEnrich( + gobject, + spat_unit = NULL, + feat_type = NULL, + sign_matrix, + expression_values = c("normalized", "scaled", "custom"), + min_overlap_genes = 5, + reverse_log_scale = TRUE, + logbase = 2, + output_enrichment = c("original", "zscore"), + p_value = FALSE, + include_depletion = FALSE, + n_times = 1000, + max_block = 2e+07, + name = NULL, + verbose = TRUE, + return_gobject = TRUE +) +} +\arguments{ +\item{sign_names}{\code{character} vector with names (labels) for each provided +feat signature} + +\item{sign_list}{list of feats in signature} + +\item{gobject}{Giotto object} + +\item{spat_unit}{spatial unit} + +\item{feat_type}{feature type} + +\item{sign_matrix}{binary matrix of signature feats for each cell type / +process. Alternatively a list of signature feats can be provided to +\code{makeSignMatrixPAGE()}, which will create the matrix for you.} + +\item{expression_values}{expression values to use} + +\item{min_overlap_genes}{minimum number of overlapping feats in +\code{sign_matrix} required to calculate enrichment} + +\item{reverse_log_scale}{reverse expression values from log scale} + +\item{logbase}{log base to use if reverse_log_scale = TRUE} + +\item{output_enrichment}{how to return enrichment output} + +\item{p_value}{logical. Default = \code{FALSE}. calculate p-values} + +\item{include_depletion}{calculate both enrichment and depletion} + +\item{n_times}{number of permutations to calculate for p_value} + +\item{max_block}{number of lines to process together (default = 20e6)} + +\item{name}{to give to spatial enrichment results, default = PAGE} + +\item{verbose}{be verbose} + +\item{return_gobject}{return giotto object} +} +\value{ +\code{matrix} (\code{makeSignMatrixPAGE()}) and +\code{giotto} (\code{runPAGEEnrich(return_gobject = TRUE)}) or +\code{data.table} (\code{runPAGEEnrich(return_gobject = FALSE)}) +} +\description{ +Expression feature-based enrichment scoring of labels. \cr +A binary matrix of signature features (e.g. for cell types or processes) can +either be directly provided or converted from a list using +\code{makeSignMatrixPAGE()}. This matrix is then used with \code{runPAGEEnrich()} in +order to calculate feature signature enrichment scores per spatial position +using PAGE. +} +\details{ +The enrichment Z score is calculated by using method (PAGE) from +Kim SY et al., BMC bioinformatics, 2005 as \cr +\eqn{Z = ((Sm – mu)*m^(1/2)) / delta}. \cr +For each gene in each spot, mu is the fold change values versus the mean +expression and delta is the standard deviation. Sm is the mean fold change +value of a specific marker gene set and m is the size of a given marker +gene set. +} +\examples{ +g <- GiottoData::loadGiottoMini("visium") + +sign_list <- list( + cell_type1 = c( + "Bcl11b", "Lmo1", "F3", "Cnih3", "Ppp1r3c", + "Rims2", "Gfap", "Gjc3", "Chrna4", "Prkcd" + ), + cell_type2 = c( + "Prr18", "Grb14", "Tprn", "Clic1", "Olig2", "Hrh3", + "Tmbim1", "Carhsp1", "Tmem88b", "Ugt8a" + ), + cell_type2 = c( + "Arpp19", "Lamp5", "Galnt6", "Hlf", "Hs3st2", + "Tbr1", "Myl4", "Cygb", "Ttc9b", "Ipcef1" + ) +) + +sm <- makeSignMatrixPAGE( + sign_names = c("cell_type1", "cell_type2", "cell_type3"), + sign_list = sign_list +) + +g <- runPAGEEnrich( + gobject = g, + sign_matrix = sm, + min_overlap_genes = 2 +) + +spatPlot2D(g, + cell_color = "cell_type2", + spat_enr_names = "PAGE", + color_as_factor = FALSE +) +} diff --git a/man/estimateAutomatedImageRegistrationWithSIFT.Rd b/man/estimateAutomatedImageRegistrationWithSIFT.Rd index a8601a0f2..ee7346a15 100644 --- a/man/estimateAutomatedImageRegistrationWithSIFT.Rd +++ b/man/estimateAutomatedImageRegistrationWithSIFT.Rd @@ -7,30 +7,39 @@ estimateAutomatedImageRegistrationWithSIFT( x, y, - plot_match = F, + plot_match = FALSE, max_ratio = 0.6, estimate_fun = "affine", save_warp = NULL, - verbose = T + verbose = TRUE ) } \arguments{ -\item{x}{required. Source matrix input, could be generated from preprocessImageToMatrix} +\item{x}{required. Source matrix input, could be generated from +preprocessImageToMatrix} -\item{y}{required. Source matrix input, could be generated from preprocessImageToMatrix} +\item{y}{required. Source matrix input, could be generated from +preprocessImageToMatrix} -\item{plot_match}{whether or not to plot the matching descriptors.Default False} +\item{plot_match}{whether or not to plot the matching descriptors. +Default False} \item{max_ratio}{max_ratio parameter for matching descriptors, default 0.6} -\item{estimate_fun}{default Affine. The transformation model to use estimation} +\item{estimate_fun}{default Affine. The transformation model to use +estimation} -\item{save_warp}{default NULL, if not NULL, please provide an output image path to save the warpped image.} +\item{save_warp}{default NULL, if not NULL, please provide an output image +path to save the warpped image.} } \value{ a list of the estimated transformation object -example estimation <- estimateAutomatedImageRegistrationWithSIFT(x = image_mtx1,y = image_mtx2) } \description{ -Automatically estimate a transform with SIFT feature detection, descriptor match and returns a transformation object to use +Automatically estimate a transform with SIFT feature detection, descriptor +match and returns a transformation object to use +} +\examples{ +estimation <- estimateAutomatedImageRegistrationWithSIFT( +x = image_mtx1, y = image_mtx2) } diff --git a/man/exportGiottoViewer.Rd b/man/exportGiottoViewer.Rd index 4ee0c25e6..6b6b0e139 100644 --- a/man/exportGiottoViewer.Rd +++ b/man/exportGiottoViewer.Rd @@ -50,7 +50,8 @@ exportGiottoViewer( \item{dim_red_rescale}{numericals to rescale the coordinates} -\item{expression_rounding}{numerical indicating how to round the expression data} +\item{expression_rounding}{numerical indicating how to round the expression +data} \item{overwrite_dir}{overwrite files in the directory if it already existed} @@ -63,8 +64,9 @@ writes the necessary output to use in Giotto Viewer compute highly variable genes } \details{ -Giotto Viewer expects the results from Giotto Analyzer in a specific format, -which is provided by this function. To include enrichment results from {\code{\link{createSpatialEnrich}}} -include the provided spatial enrichment name (default PAGE or rank) -and add the gene signature names (.e.g cell types) to the numeric annotations parameter. +Giotto Viewer expects the results from Giotto Analyzer in a +specific format, which is provided by this function. To include enrichment +results from {\code{\link{createSpatialEnrich}}} include the provided +spatial enrichment name (default PAGE or rank) and add the gene signature +names (.e.g cell types) to the numeric annotations parameter. } diff --git a/man/exprCellCellcom.Rd b/man/exprCellCellcom.Rd index e3d89cef8..ca167a8c1 100644 --- a/man/exprCellCellcom.Rd +++ b/man/exprCellCellcom.Rd @@ -46,7 +46,7 @@ exprCellCellcom( \item{adjust_target}{adjust multiple hypotheses at the cell or feature level} -\item{set_seed}{set seed for random simulations (default = TRUE)} +\item{set_seed}{`logical`. set seed for random simulations (default = TRUE)} \item{seed_number}{seed number} diff --git a/man/filterCombinations.Rd b/man/filterCombinations.Rd index 301330bb9..8d1cd3728 100644 --- a/man/filterCombinations.Rd +++ b/man/filterCombinations.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/auxiliary_giotto.R +% Please edit documentation in R/filter.R \name{filterCombinations} \alias{filterCombinations} \title{filterCombinations} @@ -56,9 +56,11 @@ transformation} \item{save_plot}{logical. save the plot} -\item{save_param}{list of saving parameters, see \code{\link{showSaveParameters}}} +\item{save_param}{list of saving parameters, see +\code{\link{showSaveParameters}}} -\item{default_save_name}{default save name for saving, don't change, change save_name in save_param} +\item{default_save_name}{default save name for saving, don't change, +change save_name in save_param} } \value{ list of data.table and ggplot object diff --git a/man/filterDistributions.Rd b/man/filterDistributions.Rd index 79a541513..c67f79c3b 100644 --- a/man/filterDistributions.Rd +++ b/man/filterDistributions.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/auxiliary_giotto.R +% Please edit documentation in R/filter.R \name{filterDistributions} \alias{filterDistributions} \title{filterDistributions} diff --git a/man/filterGiotto.Rd b/man/filterGiotto.Rd index 4b8070200..7633c3153 100644 --- a/man/filterGiotto.Rd +++ b/man/filterGiotto.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/auxiliary_giotto.R +% Please edit documentation in R/filter.R \name{filterGiotto} \alias{filterGiotto} \title{filterGiotto} diff --git a/man/filterInteractionChangedFeats.Rd b/man/filterInteractionChangedFeats.Rd index 06998f34d..151eda75f 100644 --- a/man/filterInteractionChangedFeats.Rd +++ b/man/filterInteractionChangedFeats.Rd @@ -67,9 +67,9 @@ Filter Interaction Changed Feature scores. g <- GiottoData::loadGiottoMini("visium") icf <- findInteractionChangedFeats(g, - cluster_column = "leiden_clus", - selected_feats = c("Gna12", "Ccnd2", "Btbd17"), - nr_permutations = 10 + cluster_column = "leiden_clus", + selected_feats = c("Gna12", "Ccnd2", "Btbd17"), + nr_permutations = 10 ) force(icf) force(icf$ICFscores) diff --git a/man/findICFSpot.Rd b/man/findICFSpot.Rd index 500f53e2d..056140815 100644 --- a/man/findICFSpot.Rd +++ b/man/findICFSpot.Rd @@ -36,7 +36,9 @@ findICFSpot( \item{expression_values}{expression values to use} -\item{ave_celltype_exp}{average feature expression in each cell type} +\item{ave_celltype_exp}{\code{matrix} or \code{data.frame}. Average feature expression +in each cell type. Colnames should be the cell type and rownames are feat +names.} \item{selected_features}{subset of selected features (optional)} @@ -113,7 +115,7 @@ x <- findMarkers_one_vs_all(g, ) sign_gene <- x$feats -sign_matrix <- matrix(rnorm(length(sign_gene) * 8, mean = 10), +sign_matrix <- matrix(rnorm(length(sign_gene) * 7, mean = 10), nrow = length(sign_gene) ) rownames(sign_matrix) <- sign_gene @@ -121,17 +123,19 @@ colnames(sign_matrix) <- paste0("celltype_", unique(x$cluster)) g <- runDWLSDeconv(gobject = g, sign_matrix = sign_matrix) ave_celltype_exp <- calculateMetaTable(g, metadata_cols = "leiden_clus") -ave_celltype_exp <- reshape2::dcast(ave_celltype_exp, variable~leiden_clus) -rownames(ave_celltype_exp) <- ave_celltype_exp$variable -ave_celltype_exp <- ave_celltype_exp[,-1] +ave_celltype_exp <- data.table::dcast( + ave_celltype_exp, variable ~ leiden_clus +) +ave_celltype_exp <- as.matrix(ave_celltype_exp, rownames = "variable") colnames(ave_celltype_exp) <- colnames(sign_matrix) -findICFSpot(g, +res <- findICFSpot(g, spat_unit = "cell", feat_type = "rna", ave_celltype_exp = ave_celltype_exp, spatial_network_name = "spatial_network" ) +force(res) } \seealso{ \code{\link[=findInteractionChangedFeats]{findInteractionChangedFeats()}} diff --git a/man/findInteractionChangedFeats.Rd b/man/findInteractionChangedFeats.Rd index e47ebf356..23534c936 100644 --- a/man/findInteractionChangedFeats.Rd +++ b/man/findInteractionChangedFeats.Rd @@ -140,7 +140,7 @@ icf2 <- findICF(g, cluster_column = "leiden_clus", selected_feats = c("Gna12", "Ccnd2", "Btbd17"), nr_permutations = 10 - ) +) } \seealso{ \code{\link[=filterInteractionChangedFeats]{filterInteractionChangedFeats()}} \code{\link[=findICFSpot]{findICFSpot()}} diff --git a/man/getONTraCv1Input.Rd b/man/getONTraCv1Input.Rd index 784a77678..a54bb1b2a 100644 --- a/man/getONTraCv1Input.Rd +++ b/man/getONTraCv1Input.Rd @@ -39,7 +39,7 @@ This function generate the input data for ONTraC v1 g <- GiottoData::loadGiottoMini("visium") getONTraCv1Input( - gobject = g, - cell_type = "custom_leiden" + gobject = g, + cell_type = "custom_leiden" ) } diff --git a/man/getONTraCv2Input.Rd b/man/getONTraCv2Input.Rd deleted file mode 100644 index fb4336bb8..000000000 --- a/man/getONTraCv2Input.Rd +++ /dev/null @@ -1,45 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/ONTraC_wrapper.R -\name{getONTraCv2Input} -\alias{getONTraCv2Input} -\title{getONTraCv2Input} -\usage{ -getONTraCv2Input( - gobject, - cell_type, - output_path = getwd(), - spat_unit = NULL, - feat_type = NULL, - verbose = TRUE -) -} -\arguments{ -\item{gobject}{giotto object} - -\item{cell_type}{the cell type column name in the metadata} - -\item{output_path}{the path to save the output file} - -\item{spat_unit}{spatial unit (e.g. "cell")} - -\item{feat_type}{feature type (e.g. "rna", "dna", "protein")} - -\item{verbose}{be verbose} -} -\value{ -data.table with columns: Cell_ID, Sample, x, y, Cell_Type -} -\description{ -generate the input data for ONTraC v2 -} -\details{ -This function generate the input data for ONTraC v2 -} -\examples{ -g <- GiottoData::loadGiottoMini("visium") - -getONTraCv2Input( - gobject = g, - cell_type = "custom_leiden" -) -} diff --git a/man/giottoToAnndataZarr.Rd b/man/giottoToAnndataZarr.Rd new file mode 100644 index 000000000..d89184a62 --- /dev/null +++ b/man/giottoToAnndataZarr.Rd @@ -0,0 +1,50 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/interactivity.R +\name{giottoToAnndataZarr} +\alias{giottoToAnndataZarr} +\title{Create a local anndata zarr folder} +\usage{ +giottoToAnndataZarr( + gobject, + spat_unit = NULL, + feat_type = NULL, + expression = "raw", + output_path +) +} +\arguments{ +\item{gobject}{giotto object} + +\item{spat_unit}{spatial unit (e.g. "cell")} + +\item{feat_type}{feature type (e.g. "rna", "dna", "protein")} + +\item{expression}{expression values to extract (e.g. "raw", "normalized", +"scaled")} + +\item{output_path}{path to create and save the anndata zarr folder} +} +\value{ +local anndata zarr folder +} +\description{ +Create a local anndata zarr folder +} +\examples{ +# using the mini visium object +giotto_object <- GiottoData::loadGiottoMini("visium") + +giottoToAnndataZarr(giotto_object, + expression = "raw", + output_path = tempdir() +) + +# using the mini vizgen object +giotto_object <- GiottoData::loadGiottoMini("vizgen") + +giottoToAnndataZarr(giotto_object, + spat_unit = "aggregate", + expression = "scaled", + output_path = tempdir() +) +} diff --git a/man/heatmSpatialCorGenes.Rd b/man/heatmSpatialCorGenes.Rd deleted file mode 100644 index bbf314b0a..000000000 --- a/man/heatmSpatialCorGenes.Rd +++ /dev/null @@ -1,39 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/spatial_genes.R -\name{heatmSpatialCorGenes} -\alias{heatmSpatialCorGenes} -\title{heatmSpatialCorGenes} -\usage{ -heatmSpatialCorGenes(...) -} -\arguments{ -\item{...}{ - Arguments passed on to \code{\link[=heatmSpatialCorFeats]{heatmSpatialCorFeats}} - \describe{ - \item{\code{gobject}}{giotto object} - \item{\code{spatCorObject}}{spatial correlation object} - \item{\code{use_clus_name}}{name of clusters to visualize -(from clusterSpatialCorFeats())} - \item{\code{show_cluster_annot}}{show cluster annotation on top of heatmap} - \item{\code{show_row_dend}}{show row dendrogram} - \item{\code{show_column_dend}}{show column dendrogram} - \item{\code{show_row_names}}{show row names} - \item{\code{show_column_names}}{show column names} - \item{\code{show_plot}}{show plot} - \item{\code{return_plot}}{return ggplot object} - \item{\code{save_plot}}{directly save the plot [boolean]} - \item{\code{save_param}}{list of saving parameters, see -\code{\link{showSaveParameters}}} - \item{\code{default_save_name}}{default save name for saving, don't change, -change save_name in save_param} - }} -} -\value{ -heatmap -} -\description{ -Create heatmap of spatially correlated genes -} -\seealso{ -\code{\link{heatmSpatialCorFeats}} -} diff --git a/man/hyperGeometricEnrich.Rd b/man/hyperGeometricEnrich.Rd deleted file mode 100644 index c4dd29587..000000000 --- a/man/hyperGeometricEnrich.Rd +++ /dev/null @@ -1,37 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/spatial_enrichment.R -\name{hyperGeometricEnrich} -\alias{hyperGeometricEnrich} -\title{hyperGeometricEnrich} -\usage{ -hyperGeometricEnrich(...) -} -\arguments{ -\item{...}{ - Arguments passed on to \code{\link[=runHyperGeometricEnrich]{runHyperGeometricEnrich}} - \describe{ - \item{\code{gobject}}{Giotto object} - \item{\code{spat_unit}}{spatial unit} - \item{\code{feat_type}}{feature type} - \item{\code{sign_matrix}}{Matrix of signature genes for each cell type / process} - \item{\code{expression_values}}{expression values to use} - \item{\code{reverse_log_scale}}{reverse expression values from log scale} - \item{\code{logbase}}{log base to use if reverse_log_scale = TRUE} - \item{\code{top_percentage}}{percentage of cells that will be considered to have -gene expression with matrix binarization} - \item{\code{output_enrichment}}{how to return enrichment output} - \item{\code{p_value}}{calculate p-values (boolean, default = FALSE)} - \item{\code{name}}{to give to spatial enrichment results, default = hypergeometric} - \item{\code{return_gobject}}{return giotto object} - }} -} -\value{ -enrichment scores -} -\description{ -Function to calculate gene signature enrichment scores per -spatial position using a hypergeometric test. -} -\seealso{ -\code{\link{runHyperGeometricEnrich}} -} diff --git a/man/identifyTMAcores.Rd b/man/identifyTMAcores.Rd index 010f05d50..e0de7a25d 100644 --- a/man/identifyTMAcores.Rd +++ b/man/identifyTMAcores.Rd @@ -10,8 +10,12 @@ identifyTMAcores( feat_type = NULL, spatial_network_name = "Delaunay_network", core_id_name = "core_id", + id_fmt = "\%d", include_all_ids = TRUE, missing_id_name = "not_connected", + min_nodes = 5, + join_split_cores = TRUE, + join_tolerance = 1.2, return_gobject = TRUE ) } @@ -26,13 +30,24 @@ identifyTMAcores( \item{core_id_name}{metadata column name for the core information} -\item{include_all_ids}{Boolean. Include all ids, including vertex ids not found -in the spatial network} +\item{id_fmt}{character. [sprintf] formatting to use for core ids} -\item{missing_id_name}{Character. Name for vertices that were missing from -spatial network} +\item{include_all_ids}{logical. Include all ids, including vertex ids not +found in the spatial network} -\item{return_gobject}{Boolean. Return giotto object} +\item{missing_id_name}{character. Name for nodes that are not connected to +a core.} + +\item{min_nodes}{numeric. Minimal number of nodes to not be considered +an unconnected group.} + +\item{join_split_cores}{logical. Attempt to repair core IDs when a core +is split down the middle and detected as two different cores.} + +\item{join_tolerance}{numeric. Max ratio allowed relative to previous max +core convex hull area when determining if a pair of cores should be joined.} + +\item{return_gobject}{logical. Return giotto object} } \value{ cluster annotations diff --git a/man/importVisiumHD.Rd b/man/importVisiumHD.Rd index cf54b664c..6d49f3f46 100644 --- a/man/importVisiumHD.Rd +++ b/man/importVisiumHD.Rd @@ -12,13 +12,16 @@ importVisiumHD( array_subset_row = NULL, array_subset_col = NULL, pxl_subset_row = NULL, - pxl_subset_col = NULL + pxl_subset_col = NULL, + shape = "hexagon", + shape_size = 400 ) } \arguments{ \item{visiumHD_dir}{Visium HD output directory (e.g. square_016um)} -\item{expression_source}{character. Raw or filter expression data. Defaults to 'raw'} +\item{expression_source}{character. Raw or filter expression data. Defaults +to "raw"} \item{gene_column_index}{numeric. Expression column to use for gene names 1 = Ensembl and 2 = gene symbols} @@ -26,26 +29,26 @@ importVisiumHD( \item{barcodes}{character vector. (optional) Use if you only want to load a subset of the pixel barcodes} -\item{array_subset_row}{numeric vector. (optional) Vector with min and max values -to subset based on array rows} +\item{array_subset_row}{numeric vector. (optional) Vector with min and max +values to subset based on array rows} -\item{array_subset_col}{numeric vector. (optional) Vector with min and max values -to subset based on array columns} +\item{array_subset_col}{numeric vector. (optional) Vector with min and max +values to subset based on array columns} -\item{pxl_subset_row}{numeric vector. (optional) Vector with min and max values -to subset based on row pixels} +\item{pxl_subset_row}{numeric vector. (optional) Vector with min and max +values to subset based on row pixels} -\item{pxl_subset_col}{numeric vector. (optional) Vector with min and max values -to subset based on column pixels} +\item{pxl_subset_col}{numeric vector. (optional) Vector with min and max +values to subset based on column pixels} } \value{ VisiumHDReader object } \description{ Giotto import functionalities for Visium HD datasets. This function generates -a `VisiumHDReader` instance that has convenient reader functions for converting -individual pieces of Visium HD data into Giotto-compatible representations when -the param `visiumHD_dir` is provided. +a `VisiumHDReader` instance that has convenient reader functions for +converting individual pieces of Visium HD data into Giotto-compatible +representations when the param `visiumHD_dir` is provided. A function that creates the full `giotto` object is also available. These functions should have all param values provided as defaults, but can be flexibly modified to do things such as look in alternative @@ -64,16 +67,17 @@ reader$visiumHD_dir <- "path to visium HD dir" readerHD$visiumHD_dir <- visiumHD_dir # Load tissue positions or create cell metadata -tissue_pos = readerHD$load_tissue_position() +tissue_pos <- readerHD$load_tissue_position() metadata <- readerHD$load_metadata() Load matrix or create expression object matrix <- readerHD$load_matrix() -expression_obj = readerHD$load_expression() +expression_obj <- readerHD$load_expression() -Load transcript data (cell metadata, expression object, and transcripts per pixel) -my_transcripts = readerHD$load_transcripts(array_subset_row = c(500, 1000), - array_subset_col = c(500, 1000)) +Load transcript data (cell metadata, expression object, and transcripts per +pixel) +my_transcripts <- readerHD$load_transcripts(array_subset_row = c(500, 1000), + array_subset_col = c(500, 1000)) # Create a `giotto` object and add the loaded data # TODO diff --git a/man/installGiottoONTraCEnvironment.Rd b/man/installGiottoONTraCEnvironment.Rd new file mode 100644 index 000000000..d4f015a92 --- /dev/null +++ b/man/installGiottoONTraCEnvironment.Rd @@ -0,0 +1,62 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ONTraC_wrapper.R +\name{installGiottoONTraCEnvironment} +\alias{installGiottoONTraCEnvironment} +\title{installGiottoONTraCEnvironment} +\usage{ +installGiottoONTraCEnvironment( + python_version = "3.11.9", + ontrac_version = "latest", + mini_install_path = NULL, + confirm = TRUE, + envname = "giotto_ontrac_env", + conda = "auto", + force_miniconda = FALSE, + force_environment = FALSE, + verbose = NULL +) +} +\arguments{ +\item{python_version}{python version to use within the giotto conda +environment. Default is v3.11.9} + +\item{ontrac_version}{ONTraC version to install. Default is "latest"} + +\item{mini_install_path}{(optional) desired miniconda installation location. +Default is chosen by `reticulate::install_miniconda()`} + +\item{confirm}{whether to pause for confirmation of conda environment +install location (default = TRUE)} + +\item{envname}{name to assign environment. Default = "giotto_ontrac_env"} + +\item{conda}{either "auto" (default) to allow reticulate to handle it, or +the full filepath to the conda executable. You can also set the option +"reticulate.conda_binary" or `Sys.setenv()` "RETICULATE_CONDA" to tell +reticulate where to look.} + +\item{force_miniconda}{force reinstallation of miniconda} + +\item{force_environment}{force reinstallation of the giotto environment} + +\item{verbose}{be verbose} + +\item{packages_to_install}{python modules (packages) to install for Giotto.} +} +\value{ +installs a giotto environment using the reticulate miniconda system +} +\description{ +Installs a conda environment contains ONTraC. This +includes a miniconda installation and also a set of python packages that +Giotto may often use. See details for further information +} +\details{ +This function will install a local conda environment using +the miniconda system as implemented by \pkg{reticulate}. Which contains +ONTraC and a set of python packages that Giotto may often use. +} +\examples{ +installGiottoONTraCEnvironment() + +} diff --git a/man/interactiveLandmarkSelection.Rd b/man/interactiveLandmarkSelection.Rd index c0b1ecef4..e139cd081 100644 --- a/man/interactiveLandmarkSelection.Rd +++ b/man/interactiveLandmarkSelection.Rd @@ -7,9 +7,13 @@ interactiveLandmarkSelection(source, target) } \arguments{ -\item{source_image}{the image to be plotted on the left, and landmarks will output in the first of the list. Input can be a ggplot object, a GiottoImage, or a character represent a path to a image} +\item{source_image}{the image to be plotted on the left, and landmarks will +output in the first of the list. Input can be a ggplot object, +a GiottoImage, or a character represent a path to a image} -\item{target_image}{the image to be plotted on the right, and landmarks will output in the second of the list. Input can be a ggplot object, a GiottoImage, or a character represent a path to a image} +\item{target_image}{the image to be plotted on the right, and landmarks will +output in the second of the list. Input can be a ggplot object, a +GiottoImage, or a character represent a path to a image} } \value{ a list of landmarks diff --git a/man/jackstrawPlot.Rd b/man/jackstrawPlot.Rd index 6a757e5c7..1a4778a85 100644 --- a/man/jackstrawPlot.Rd +++ b/man/jackstrawPlot.Rd @@ -10,13 +10,16 @@ jackstrawPlot( feat_type = NULL, expression_values = c("normalized", "scaled", "custom"), reduction = c("cells", "feats"), - feats_to_use = NULL, - center = FALSE, - scale_unit = FALSE, + feats_to_use = "hvf", + center = TRUE, + scale_unit = TRUE, ncp = 20, ylim = c(0, 1), iter = 10, threshold = 0.01, + random_subset = NULL, + set_seed = TRUE, + seed_number = 1234, verbose = TRUE, show_plot = NULL, return_plot = NULL, @@ -46,10 +49,17 @@ jackstrawPlot( \item{ylim}{y-axis limits on jackstraw plot} -\item{iter}{number of interations for jackstraw} +\item{iter}{number of iterations for jackstraw} \item{threshold}{p-value threshold to call a PC significant} +\item{random_subset}{randomized subset of matrix to use to approximate but +speed up calculation} + +\item{set_seed}{logical. whether to set a seed when random_subset is used} + +\item{seed_number}{seed number to use when random_subset is used} + \item{verbose}{show progress of jackstraw method} \item{show_plot}{logical. show plot} @@ -58,20 +68,35 @@ jackstrawPlot( \item{save_plot}{logical. save the plot} -\item{save_param}{list of saving parameters, see \code{\link{showSaveParameters}}} +\item{save_param}{list of saving parameters, see +\code{\link{showSaveParameters}}} -\item{default_save_name}{default save name for saving, don't change, change save_name in save_param} +\item{default_save_name}{default save name for saving, don't change, +change save_name in save_param} } \value{ -ggplot object for jackstraw method +if \code{return_plot} = \code{TRUE}: ggplot object for jackstraw method +if \code{return_plot} = \code{FALSE}: silently returns number of significant PCs } \description{ -identify significant prinicipal components (PCs) +Identify significant principal components (PCs) } \details{ The Jackstraw method uses the \code{\link[jackstraw]{permutationPA}} - function. By systematically permuting genes it identifies robust, and thus - significant, PCs. +function. By systematically permuting genes it identifies robust, and thus +significant, PCs. This implementation makes small modifications to SVD +calculation for improved efficiency and flexibility with different matrix +types. \cr +This implementation supports both dense and sparse input matrices. \cr + +\strong{steps} +\enumerate{ +\item Select singular values to calculate based on matrix dims and ncp +\item Find SVD to get variance explained of each PC +\item Randomly sample across features then re-calculate randomized variance +\item Determine P-value by comparing actual vs randomized explained variance, +indicating the significance of each PC +} } \examples{ g <- GiottoData::loadGiottoMini("visium") diff --git a/man/labelTransfer.Rd b/man/labelTransfer.Rd new file mode 100644 index 000000000..dbf876a0b --- /dev/null +++ b/man/labelTransfer.Rd @@ -0,0 +1,118 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/clustering.R +\name{labelTransfer} +\alias{labelTransfer} +\alias{labelTransfer,giotto,giotto-method} +\alias{labelTransfer,giotto,missing-method} +\title{Transfer labels/annotations between sets of data via similarity +voting} +\usage{ +\S4method{labelTransfer}{giotto,giotto}( + x, + y, + spat_unit = NULL, + feat_type = NULL, + labels, + k = 10, + name = paste0("trnsfr_", labels), + prob = TRUE, + reduction = "cells", + reduction_method = "pca", + reduction_name = "pca", + dimensions_to_use = 1:10, + return_gobject = TRUE, + ... +) + +\S4method{labelTransfer}{giotto,missing}( + x, + spat_unit = NULL, + feat_type = NULL, + source_cell_ids, + target_cell_ids, + labels, + k = 10, + name = paste0("trnsfr_", labels), + prob = TRUE, + reduction = "cells", + reduction_method = "pca", + reduction_name = "pca", + dimensions_to_use = 1:10, + return_gobject = TRUE, + ... +) +} +\arguments{ +\item{x}{target object} + +\item{y}{source object} + +\item{labels}{metadata column in source with labels to transfer} + +\item{k}{number of k-neighbors to train a KNN classifier} + +\item{name}{metadata column in target to apply the full set of labels to} + +\item{prob}{output knn probabilities together with label predictions} + +\item{reduction}{reduction on cells or features (default = "cells")} + +\item{reduction_method}{shared reduction method (default = "pca" space)} + +\item{reduction_name}{name of shared reduction space (default name = "pca")} + +\item{dimensions_to_use}{dimensions to use in shared reduction space +(default = 1:10)} + +\item{...}{ + Arguments passed on to \code{\link[FNN:knn]{FNN::knn}} + \describe{ + \item{\code{algorithm}}{nearest neighbor search algorithm.} + }} + +\item{source_cell_ids}{cell/spatial IDs with the source labels to transfer} + +\item{target_cell_ids}{cell/spatial IDs to transfer the labels to. +IDs from \code{source_cell_ids} are always included as well.} +} +\value{ +object \code{x} with new transferred labels added to metadata +} +\description{ +When two sets of data share an embedding space, transfer the labels from +one of the sets to the other based on KNN similarity voting in that space. +} +\details{ +This function trains a KNN classifier with \code{\link[FNN:knn]{FNN::knn()}}. +The training data is from object \code{y} or \code{source_cell_ids} subset in \code{x} and +uses existing annotations within the cell metadata. +Cells without annotation/labels from \code{x} or \code{target_cell_ids} subset in \code{x} +will receive predicted labels (and optional probabilities when +\code{prob = TRUE}). + +\strong{IMPORTANT} This projection assumes that you're using the same dimension +reduction space (e.g. PCA) and number of dimensions (e.g. first 10 PCs) to +train the KNN classifier as you used to create the initial +annotations/labels in the source Giotto object. + +This function can allow you to work with very big data as you can predict +cell labels on a smaller & subsetted Giotto object and then project the cell +labels to the remaining cells in the target Giotto object. It can also be +used to transfer labels from one set of annotated data to another dataset +based on expression similarity after joining and integrating. +} +\examples{ +g <- GiottoData::loadGiottoMini("visium") +id_subset <- sample(spatIDs(g), 300) +n_pred <- nrow(pDataDT(g)) - 300 + +# transfer labels from one object to another ################### +g_small <- g[, id_subset] +# additional steps to get labels to transfer on smaller object... +g <- labelTransfer(g, g_small, labels = "leiden_clus") + +# transfer labels between subsets of a single object ########### +g <- labelTransfer(g, + label = "leiden_clus", source_cell_ids = id_subset, name = "knn_leiden2" +) +} diff --git a/man/loadOntraCResults.Rd b/man/loadOntraCResults.Rd index 2eda6902f..9bba567de 100644 --- a/man/loadOntraCResults.Rd +++ b/man/loadOntraCResults.Rd @@ -4,12 +4,30 @@ \alias{loadOntraCResults} \title{loadOntraCResults} \usage{ -loadOntraCResults(gobject, ontrac_results_dir = getwd()) +loadOntraCResults( + gobject, + ontrac_results_dir = getwd(), + preprocessing_dir = file.path(ontrac_results_dir, "preprocessing_dir"), + GNN_dir = file.path(ontrac_results_dir, "GNN_dir"), + NTScore_dir = file.path(ontrac_results_dir, "NTScore_dir"), + NTScore_reverse = FALSE +) } \arguments{ \item{gobject}{giotto object} \item{ontrac_results_dir}{the directory where the ONTraC results are saved} + +\item{preprocessing_dir}{the directory to save the preprocessing results. +Default is file.path(ontrac_results_dir, "preprocessing_dir")} + +\item{GNN_dir}{the directory to save the GNN results. Default is +file.path(ontrac_results_dir, "GNN_dir")} + +\item{NTScore_dir}{the directory to save the NTScore results. Default is +file.path(ontrac_results_dir, "NTScore_dir")} + +\item{NTScore_reverse}{whether to reverse the NTScore. Default is FALSE} } \value{ gobject with ONTraC results diff --git a/man/load_cell_NT_score.Rd b/man/load_cell_NT_score.Rd index 4a8b5d87b..a5b3205f1 100644 --- a/man/load_cell_NT_score.Rd +++ b/man/load_cell_NT_score.Rd @@ -4,12 +4,23 @@ \alias{load_cell_NT_score} \title{load_cell_NT_score} \usage{ -load_cell_NT_score(gobject, ontrac_results_dir = getwd()) +load_cell_NT_score( + gobject, + ontrac_results_dir = getwd(), + NTScore_dir = file.path(ontrac_results_dir, "NTScore_dir"), + NTScore_reverse = FALSE +) } \arguments{ \item{gobject}{giotto object} -\item{ontrac_results_dir}{the directory where the ONTraC results are saved} +\item{ontrac_results_dir}{the directory where the ONTraC results are saved. +Default is getwd()} + +\item{NTScore_dir}{the directory to save the NTScore results. Default is +file.path(ontrac_results_dir, "NTScore_dir")} + +\item{NTScore_reverse}{whether to reverse the NTScore. Default is FALSE} } \value{ gobject with cell-level NT score diff --git a/man/load_cell_bin_niche_cluster.Rd b/man/load_cell_bin_niche_cluster.Rd deleted file mode 100644 index 4aafb4cb5..000000000 --- a/man/load_cell_bin_niche_cluster.Rd +++ /dev/null @@ -1,23 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/ONTraC_wrapper.R -\name{load_cell_bin_niche_cluster} -\alias{load_cell_bin_niche_cluster} -\title{load_cell_bin_niche_cluster} -\usage{ -load_cell_bin_niche_cluster(gobject, ontrac_results_dir = getwd()) -} -\arguments{ -\item{gobject}{giotto object} - -\item{ontrac_results_dir}{the directory where the ONTraC results are saved} -} -\value{ -gobject with cell-level binarized niche cluster -} -\description{ -load cell-level binarized niche cluster -} -\details{ -This function loads the ONTraC outputed cell-level binarized niche -cluster into the giotto object. -} diff --git a/man/load_cell_niche_cluster_prob.Rd b/man/load_cell_niche_cluster_prob.Rd index 2bcb59346..204bfe00d 100644 --- a/man/load_cell_niche_cluster_prob.Rd +++ b/man/load_cell_niche_cluster_prob.Rd @@ -7,6 +7,7 @@ load_cell_niche_cluster_prob( gobject, ontrac_results_dir = getwd(), + GNN_dir = file.path(ontrac_results_dir, "GNN_dir"), spat_unit = "cell", feat_type = "niche cluster", name = "prob" @@ -15,7 +16,11 @@ load_cell_niche_cluster_prob( \arguments{ \item{gobject}{giotto object} -\item{ontrac_results_dir}{the directory where the ONTraC results are saved} +\item{ontrac_results_dir}{the directory where the ONTraC results are saved. +Default is getwd()} + +\item{GNN_dir}{the directory to save the GNN results. Default is +file.path(ontrac_results_dir, "GNN_dir")} \item{spat_unit}{spatial unit (e.g. "cell")} diff --git a/man/load_merscope_folder.Rd b/man/load_merscope_folder.Rd index f187f244a..4b06bef51 100644 --- a/man/load_merscope_folder.Rd +++ b/man/load_merscope_folder.Rd @@ -11,7 +11,7 @@ dir_items, data_to_use, fovs = NULL, - poly_z_indices = 1L:7L, + poly_z_indices = seq(from = 1, to = 7), cores = NA, verbose = TRUE ) diff --git a/man/load_nc_connectivity.Rd b/man/load_nc_connectivity.Rd index ae96d3677..9a9f76752 100644 --- a/man/load_nc_connectivity.Rd +++ b/man/load_nc_connectivity.Rd @@ -7,6 +7,7 @@ load_nc_connectivity( gobject, ontrac_results_dir = getwd(), + GNN_dir = file.path(ontrac_results_dir, "GNN_dir"), spat_unit = "niche cluster", feat_type = "connectivity", name = "normalized" @@ -15,7 +16,11 @@ load_nc_connectivity( \arguments{ \item{gobject}{giotto object} -\item{ontrac_results_dir}{the directory where the ONTraC results are saved} +\item{ontrac_results_dir}{the directory where the ONTraC results are saved. +Default is getwd()} + +\item{GNN_dir}{the directory to save the GNN results. Default is +file.path(ontrac_results_dir, "GNN_dir")} \item{spat_unit}{spatial unit (e.g. "cell")} diff --git a/man/load_niche_cluster_nt_score.Rd b/man/load_niche_cluster_nt_score.Rd new file mode 100644 index 000000000..b71f91e84 --- /dev/null +++ b/man/load_niche_cluster_nt_score.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ONTraC_wrapper.R +\name{load_niche_cluster_nt_score} +\alias{load_niche_cluster_nt_score} +\title{load_niche_cluster_nt_score} +\usage{ +load_niche_cluster_nt_score( + gobject, + ontrac_results_dir = getwd(), + NTScore_dir = file.path(ontrac_results_dir, "NTScore_dir"), + NTScore_reverse = FALSE +) +} +\arguments{ +\item{gobject}{giotto object} + +\item{ontrac_results_dir}{the directory where the ONTraC results are saved. +Default is getwd()} + +\item{NTScore_dir}{the directory to save the NTScore results. Default is +file.path(ontrac_results_dir, "NTScore_dir")} + +\item{NTScore_reverse}{whether to reverse the NTScore. Default is FALSE} +} +\value{ +gobject with niche cluster NT score +} +\description{ +load niche cluster NT score +} +\details{ +This function loads the ONTraC outputed niche cluster NT score +into the giotto object. +} diff --git a/man/makeSignMatrixPAGE.Rd b/man/makeSignMatrixPAGE.Rd deleted file mode 100644 index 805dde245..000000000 --- a/man/makeSignMatrixPAGE.Rd +++ /dev/null @@ -1,50 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/spatial_enrichment.R -\name{makeSignMatrixPAGE} -\alias{makeSignMatrixPAGE} -\title{makeSignMatrixPAGE} -\usage{ -makeSignMatrixPAGE(sign_names, sign_list) -} -\arguments{ -\item{sign_names}{vector with names for each provided gene signature} - -\item{sign_list}{list of genes (signature)} -} -\value{ -matrix -} -\description{ -Function to convert a list of signature genes -(e.g. for cell types or processes) into -a binary matrix format that can be used with the PAGE enrichment option. -Each cell type or process should -have a vector of cell-type or process specific genes. These vectors need to -be combined into a list (sign_list). -The names of the cell types or processes that are provided in the list need -to be given (sign_names). -} -\examples{ -sign_list <- list( - cell_type1 = c( - "Bcl11b", "Lmo1", "F3", "Cnih3", "Ppp1r3c", - "Rims2", "Gfap", "Gjc3", "Chrna4", "Prkcd" - ), - cell_type2 = c( - "Prr18", "Grb14", "Tprn", "Clic1", "Olig2", "Hrh3", "Tmbim1", - "Carhsp1", "Tmem88b", "Ugt8a" - ), - cell_type2 = c( - "Arpp19", "Lamp5", "Galnt6", "Hlf", "Hs3st2", "Tbr1", "Myl4", - "Cygb", "Ttc9b", "Ipcef1" - ) -) - -makeSignMatrixPAGE( - sign_names = c("cell_type1", "cell_type2", "cell_type3"), - sign_list = sign_list -) -} -\seealso{ -\code{\link{PAGEEnrich}} -} diff --git a/man/normalizeGiotto.Rd b/man/normalizeGiotto.Rd index 6c4b6ee1a..0b495932d 100644 --- a/man/normalizeGiotto.Rd +++ b/man/normalizeGiotto.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/auxiliary_giotto.R +% Please edit documentation in R/normalize.R \name{normalizeGiotto} \alias{normalizeGiotto} \title{normalizeGiotto} @@ -9,23 +9,24 @@ normalizeGiotto( spat_unit = NULL, feat_type = NULL, expression_values = "raw", - norm_methods = c("standard", "pearson_resid", "osmFISH"), + norm_methods = c("standard", "pearson_resid", "osmFISH", "quantile"), library_size_norm = TRUE, scalefactor = 6000, log_norm = TRUE, log_offset = 1, logbase = 2, scale_feats = TRUE, - scale_genes = NULL, + scale_genes = deprecated(), scale_cells = TRUE, scale_order = c("first_feats", "first_cells"), theta = 100, - update_slot = "scaled", + name = "scaled", + update_slot = deprecated(), verbose = TRUE ) } \arguments{ -\item{gobject}{giotto object} +\item{gobject}{\code{giotto} object} \item{spat_unit}{spatial unit} @@ -55,16 +56,17 @@ normalizeGiotto( \item{theta}{theta parameter for the pearson residual normalization step} -\item{update_slot}{slot or name to use for the results from osmFISH and -pearson residual normalization} +\item{name}{character. name to use for normalization results} + +\item{update_slot}{deprecated. Use \code{name} param instead} \item{verbose}{be verbose} } \value{ -giotto object +\code{giotto} object } \description{ -fast normalize and/or scale expresion values of Giotto object +fast normalize and/or scale expression values of Giotto object } \details{ Currently there are two 'methods' to normalize your raw counts data. @@ -72,21 +74,33 @@ Currently there are two 'methods' to normalize your raw counts data. A. The standard method follows the standard protocol which can be adjusted using the provided parameters and follows the following order: \cr \itemize{ - \item{1. Data normalization for total library size and scaling by a custom scale-factor.} - \item{2. Log transformation of data.} - \item{3. Z-scoring of data by genes and/or cells.} +\item{1. Data normalization for total library size and scaling by a custom +scale-factor.} +\item{2. Log transformation of data.} +\item{3. Z-scoring of data by genes and/or cells.} +} +B. The normalization method as provided by the osmFISH paper is also +implemented: \cr +\itemize{ +\item{1. First normalize genes, for each gene divide the counts by the +total gene count and multiply by the total number of genes.} +\item{2. Next normalize cells, for each cell divide the normalized gene +counts by the total counts per cell and multiply by the total number of +cells.} } -B. The normalization method as provided by the osmFISH paper is also implemented: \cr +C. The normalization method as provided by Lause/Kobak et al is also +implemented: \cr \itemize{ - \item{1. First normalize genes, for each gene divide the counts by the total gene count and -multiply by the total number of genes.} - \item{2. Next normalize cells, for each cell divide the normalized gene counts by the total -counts per cell and multiply by the total number of cells.} +\item{1. First calculate expected values based on Pearson correlations.} +\item{2. Next calculate z-scores based on observed and expected values.} } -C. The normalization method as provided by Lause/Kobak et al is also implemented: \cr +D. Quantile normalization across features \itemize{ - \item{1. First calculate expected values based on Pearson correlations.} - \item{2. Next calculate z-scores based on observed and expected values.} +\item{1. Rank feature expression} +\item{2. Define a common distribution by sorting expression values per +feature then finding the mean across all features per index} +\item{3. Apply common distribution to expression information by using +the ranks from step 1 as indices} } By default the latter two results will be saved in the Giotto slot for scaled expression, this can be changed by changing the update_slot parameters @@ -94,5 +108,5 @@ scaled expression, this can be changed by changing the update_slot parameters \examples{ g <- GiottoData::loadGiottoMini("visium") -normalizeGiotto(g) +normalizeGiotto(g) # default is method A } diff --git a/man/pieCellTypesFromEnrichment.Rd b/man/pieCellTypesFromEnrichment.Rd index 86aac16bf..d66ddbf18 100644 --- a/man/pieCellTypesFromEnrichment.Rd +++ b/man/pieCellTypesFromEnrichment.Rd @@ -32,9 +32,11 @@ Default value is "PAGE_Z_score"} \item{title}{Title of the generated plot. Default `paste0(spat_unit,"cell types (maximum", enrichment_name, ")")`} -\item{save_param}{list of saving parameters, see \code{\link{showSaveParameters}}} +\item{save_param}{list of saving parameters, see +\code{\link{showSaveParameters}}} -\item{default_save_name}{default save name for saving, don't change, change save_name in save_param} +\item{default_save_name}{default save name for saving, don't change, +change save_name in save_param} \item{save_plot}{logical. save the plot} diff --git a/man/plotCCcomDotplot.Rd b/man/plotCCcomDotplot.Rd index 854d74f64..f6f115ebc 100644 --- a/man/plotCCcomDotplot.Rd +++ b/man/plotCCcomDotplot.Rd @@ -58,9 +58,11 @@ or 'sequential' (scaled based on data range)} \item{save_plot}{logical. save the plot} -\item{save_param}{list of saving parameters, see \code{\link{showSaveParameters}}} +\item{save_param}{list of saving parameters, see +\code{\link{showSaveParameters}}} -\item{default_save_name}{default save name for saving, don't change, change save_name in save_param} +\item{default_save_name}{default save name for saving, don't change, +change save_name in save_param} } \value{ ggplot diff --git a/man/plotCCcomHeatmap.Rd b/man/plotCCcomHeatmap.Rd index cc370e675..af80a6f3f 100644 --- a/man/plotCCcomHeatmap.Rd +++ b/man/plotCCcomHeatmap.Rd @@ -57,9 +57,11 @@ or 'sequential' (scaled based on data range)} \item{save_plot}{logical. save the plot} -\item{save_param}{list of saving parameters, see \code{\link{showSaveParameters}}} +\item{save_param}{list of saving parameters, see +\code{\link{showSaveParameters}}} -\item{default_save_name}{default save name for saving, don't change, change save_name in save_param} +\item{default_save_name}{default save name for saving, don't change, +change save_name in save_param} } \value{ ggplot diff --git a/man/plotCPF.Rd b/man/plotCPF.Rd index 8b5829673..1a7839777 100644 --- a/man/plotCPF.Rd +++ b/man/plotCPF.Rd @@ -63,9 +63,11 @@ named vector of colors} \item{save_plot}{logical. save the plot} -\item{save_param}{list of saving parameters, see \code{\link{showSaveParameters}}} +\item{save_param}{list of saving parameters, see +\code{\link{showSaveParameters}}} -\item{default_save_name}{default save name for saving, don't change, change save_name in save_param} +\item{default_save_name}{default save name for saving, don't change, +change save_name in save_param} } \value{ plot diff --git a/man/plotCTCompositionInNicheCluster.Rd b/man/plotCTCompositionInNicheCluster.Rd index e121ae78f..6e05a9db9 100644 --- a/man/plotCTCompositionInNicheCluster.Rd +++ b/man/plotCTCompositionInNicheCluster.Rd @@ -10,11 +10,11 @@ plotCTCompositionInNicheCluster( values = "prob", spat_unit = "cell", feat_type = "niche cluster", + normalization = c("by_niche_cluster", "by_cell_type", NULL), show_plot = NULL, return_plot = NULL, save_plot = NULL, save_param = list(), - theme_param = list(), default_save_name = "CellTypeCompositionInNicheCluster" ) } @@ -23,25 +23,34 @@ plotCTCompositionInNicheCluster( \item{cell_type}{the cell type column name in the metadata} -\item{values}{name of the expression matrix stored probability of each cell assigned to each niche cluster} +\item{values}{name of the expression matrix stored probability of each cell +assigned to each niche cluster} \item{spat_unit}{name of spatial unit niche stored cluster features} \item{feat_type}{name of the feature type stored probability matrix} +\item{normalization}{normalization method for the cell type composition} + \item{show_plot}{logical. show plot} \item{return_plot}{logical. return ggplot object} \item{save_plot}{logical. save the plot} -\item{save_param}{list of saving parameters, see \code{\link{showSaveParameters}}} +\item{save_param}{list of saving parameters, see +\code{\link{showSaveParameters}}} -\item{default_save_name}{default save name for saving, don't change, change save_name in save_param} +\item{default_save_name}{default save name for saving, don't change, +change save_name in save_param} +} +\value{ +ggplot } \description{ plot cell type composition within each niche cluster } \details{ -This function plots the cell type composition within each niche cluster +This function plots the cell type composition within each niche +cluster } diff --git a/man/plotCTCompositionInProbCluster.Rd b/man/plotCTCompositionInProbCluster.Rd deleted file mode 100644 index 7116b6bd4..000000000 --- a/man/plotCTCompositionInProbCluster.Rd +++ /dev/null @@ -1,47 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/ONTraC_wrapper.R -\name{plotCTCompositionInProbCluster} -\alias{plotCTCompositionInProbCluster} -\title{plotCTCompositionInProbCluster} -\usage{ -plotCTCompositionInProbCluster( - gobject, - cell_type, - values = "prob", - spat_unit = "cell", - feat_type = "niche cluster", - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - theme_param = list(), - default_save_name = "plotCTCompositionInProbCluster" -) -} -\arguments{ -\item{gobject}{giotto object} - -\item{cell_type}{the cell type column name in the metadata} - -\item{values}{name of the expression matrix stored probability of each cell assigned to each probabilistic cluster} - -\item{spat_unit}{name of spatial unit niche stored cluster features} - -\item{feat_type}{name of the feature type stored niche cluster connectivities} - -\item{show_plot}{logical. show plot} - -\item{return_plot}{logical. return ggplot object} - -\item{save_plot}{logical. save the plot} - -\item{save_param}{list of saving parameters, see \code{\link{showSaveParameters}}} - -\item{default_save_name}{default save name for saving, don't change, change save_name in save_param} -} -\description{ -plot cell type composition within each probabilistic cluster -} -\details{ -This function plots the cell type composition within each probabilistic cluster -} diff --git a/man/plotCellProximityFeatSpot.Rd b/man/plotCellProximityFeatSpot.Rd index d1b765942..3648628a8 100644 --- a/man/plotCellProximityFeatSpot.Rd +++ b/man/plotCellProximityFeatSpot.Rd @@ -58,9 +58,11 @@ plotCellProximityFeatSpot( \item{save_plot}{logical. save the plot} -\item{save_param}{list of saving parameters, see \code{\link{showSaveParameters}}} +\item{save_param}{list of saving parameters, see +\code{\link{showSaveParameters}}} -\item{default_save_name}{default save name for saving, don't change, change save_name in save_param} +\item{default_save_name}{default save name for saving, don't change, +change save_name in save_param} } \value{ plot diff --git a/man/plotCellProximityFeats.Rd b/man/plotCellProximityFeats.Rd index bff1fd4fc..f12ab5c9c 100644 --- a/man/plotCellProximityFeats.Rd +++ b/man/plotCellProximityFeats.Rd @@ -50,7 +50,7 @@ cell type} \item{min_zscore}{minimum z-score change} -\item{zscores_column}{calculate z-scores over cell types or featuress} +\item{zscores_column}{calculate z-scores over cell types or features} \item{direction}{differential expression directions to keep} @@ -63,9 +63,11 @@ named vector of colors} \item{save_plot}{logical. save the plot} -\item{save_param}{list of saving parameters, see \code{\link{showSaveParameters}}} +\item{save_param}{list of saving parameters, see +\code{\link{showSaveParameters}}} -\item{default_save_name}{default save name for saving, don't change, change save_name in save_param} +\item{default_save_name}{default save name for saving, don't change, +change save_name in save_param} } \value{ volcano, cell_barplot, cell-cell, cell_sankey, heatmap, or dotplot diff --git a/man/plotCellTypeNTScore.Rd b/man/plotCellTypeNTScore.Rd index 5938c5c37..391ef68c7 100644 --- a/man/plotCellTypeNTScore.Rd +++ b/man/plotCellTypeNTScore.Rd @@ -14,8 +14,7 @@ plotCellTypeNTScore( return_plot = NULL, save_plot = NULL, save_param = list(), - theme_param = list(), - default_save_name = "discreteAlongContinuous" + default_save_name = "CellTypeNTScore" ) } \arguments{ @@ -33,9 +32,14 @@ plotCellTypeNTScore( \item{save_plot}{logical. save the plot} -\item{save_param}{list of saving parameters, see \code{\link{showSaveParameters}}} +\item{save_param}{list of saving parameters, see +\code{\link{showSaveParameters}}} -\item{default_save_name}{default save name for saving, don't change, change save_name in save_param} +\item{default_save_name}{default save name for saving, don't change, +change save_name in save_param} +} +\value{ +ggplot } \description{ plot NTScore by cell type diff --git a/man/plotCellTypesFromEnrichment.Rd b/man/plotCellTypesFromEnrichment.Rd index 31eaa8a8f..b12e39317 100644 --- a/man/plotCellTypesFromEnrichment.Rd +++ b/man/plotCellTypesFromEnrichment.Rd @@ -32,9 +32,11 @@ Default value is "PAGE_Z_score"} \item{title}{Title of the generated plot. Default `paste0(spat_unit,"cell types (maximum", enrichment_name, ")")`} -\item{save_param}{list of saving parameters, see \code{\link{showSaveParameters}}} +\item{save_param}{list of saving parameters, see +\code{\link{showSaveParameters}}} -\item{default_save_name}{default save name for saving, don't change, change save_name in save_param} +\item{default_save_name}{default save name for saving, don't change, +change save_name in save_param} \item{save_plot}{logical. save the plot} diff --git a/man/plotCombineCCcom.Rd b/man/plotCombineCCcom.Rd index f5c014c70..8f4190809 100644 --- a/man/plotCombineCCcom.Rd +++ b/man/plotCombineCCcom.Rd @@ -53,9 +53,11 @@ ligand-receptor pair} \item{save_plot}{logical. save the plot} -\item{save_param}{list of saving parameters, see \code{\link{showSaveParameters}}} +\item{save_param}{list of saving parameters, see +\code{\link{showSaveParameters}}} -\item{default_save_name}{default save name for saving, don't change, change save_name in save_param} +\item{default_save_name}{default save name for saving, don't change, +change save_name in save_param} } \value{ ggplot diff --git a/man/plotCombineCellCellCommunication.Rd b/man/plotCombineCellCellCommunication.Rd index 6527a36d0..df01e73db 100644 --- a/man/plotCombineCellCellCommunication.Rd +++ b/man/plotCombineCellCellCommunication.Rd @@ -53,9 +53,11 @@ ligand-receptor pair} \item{save_plot}{logical. save the plot} -\item{save_param}{list of saving parameters, see \code{\link{showSaveParameters}}} +\item{save_param}{list of saving parameters, see +\code{\link{showSaveParameters}}} -\item{default_save_name}{default save name for saving, don't change, change save_name in save_param} +\item{default_save_name}{default save name for saving, don't change, +change save_name in save_param} } \value{ ggplot diff --git a/man/plotCombineICF.Rd b/man/plotCombineICF.Rd index 4c9a94f75..00ce75d20 100644 --- a/man/plotCombineICF.Rd +++ b/man/plotCombineICF.Rd @@ -52,9 +52,11 @@ plotCombineICF( \item{save_plot}{logical. save the plot} -\item{save_param}{list of saving parameters, see \code{\link{showSaveParameters}}} +\item{save_param}{list of saving parameters, see +\code{\link{showSaveParameters}}} -\item{default_save_name}{default save name for saving, don't change, change save_name in save_param} +\item{default_save_name}{default save name for saving, don't change, +change save_name in save_param} } \value{ ggplot diff --git a/man/plotCombineInteractionChangedFeats.Rd b/man/plotCombineInteractionChangedFeats.Rd index 901b07c1d..d572331fb 100644 --- a/man/plotCombineInteractionChangedFeats.Rd +++ b/man/plotCombineInteractionChangedFeats.Rd @@ -52,9 +52,11 @@ plotCombineInteractionChangedFeats( \item{save_plot}{logical. save the plot} -\item{save_param}{list of saving parameters, see \code{\link{showSaveParameters}}} +\item{save_param}{list of saving parameters, see +\code{\link{showSaveParameters}}} -\item{default_save_name}{default save name for saving, don't change, change save_name in save_param} +\item{default_save_name}{default save name for saving, don't change, +change save_name in save_param} } \value{ ggplot diff --git a/man/plotDiscreteAlongContinuous.Rd b/man/plotDiscreteAlongContinuous.Rd deleted file mode 100644 index e5fe7e131..000000000 --- a/man/plotDiscreteAlongContinuous.Rd +++ /dev/null @@ -1,45 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/ONTraC_wrapper.R -\name{plotDiscreteAlongContinuous} -\alias{plotDiscreteAlongContinuous} -\alias{plotCellTypeNTScore} -\title{plotDiscreteAlongContinuous} -\usage{ -plotCellTypeNTScore( - gobject, - cell_type, - values = "NTScore", - spat_unit = "cell", - feat_type = "niche cluster", - show_plot = NULL, - return_plot = NULL, - save_plot = NULL, - save_param = list(), - theme_param = list(), - default_save_name = "discreteAlongContinuous" -) -} -\arguments{ -\item{gobject}{giotto object} - -\item{cell_type}{the column name of discrete annotation in cell metadata} - -\item{values}{the column name of continuous values in cell metadata} - -\item{spat_unit}{spatial unit (e.g. "cell")} - -\item{feat_type}{feature type (e.g. "rna", "dna", "protein")} - -\item{show_plot}{logical. show plot} - -\item{return_plot}{logical. return ggplot object} - -\item{save_plot}{logical. save the plot} - -\item{save_param}{list of saving parameters, see \code{\link{showSaveParameters}}} - -\item{default_save_name}{default save name for saving, don't change, change save_name in save_param} -} -\description{ -plot density of a discrete annotation along a continuou values -} diff --git a/man/plotICF.Rd b/man/plotICF.Rd index c67c3628e..1b2c64cf9 100644 --- a/man/plotICF.Rd +++ b/man/plotICF.Rd @@ -38,9 +38,11 @@ named vector of colors} \item{save_plot}{logical. save the plot} -\item{save_param}{list of saving parameters, see \code{\link{showSaveParameters}}} +\item{save_param}{list of saving parameters, see +\code{\link{showSaveParameters}}} -\item{default_save_name}{default save name for saving, don't change, change save_name in save_param} +\item{default_save_name}{default save name for saving, don't change, +change save_name in save_param} } \value{ plot diff --git a/man/plotICFSpot.Rd b/man/plotICFSpot.Rd index 7e5fca157..4e315a36d 100644 --- a/man/plotICFSpot.Rd +++ b/man/plotICFSpot.Rd @@ -37,9 +37,11 @@ plotICFSpot( \item{save_plot}{logical. save the plot} -\item{save_param}{list of saving parameters, see \code{\link{showSaveParameters}}} +\item{save_param}{list of saving parameters, see +\code{\link{showSaveParameters}}} -\item{default_save_name}{default save name for saving, don't change, change save_name in save_param} +\item{default_save_name}{default save name for saving, don't change, +change save_name in save_param} } \value{ plot diff --git a/man/plotInteractionChangedFeats.Rd b/man/plotInteractionChangedFeats.Rd index 7395258f9..24896a35c 100644 --- a/man/plotInteractionChangedFeats.Rd +++ b/man/plotInteractionChangedFeats.Rd @@ -38,9 +38,11 @@ named vector of colors} \item{save_plot}{logical. save the plot} -\item{save_param}{list of saving parameters, see \code{\link{showSaveParameters}}} +\item{save_param}{list of saving parameters, see +\code{\link{showSaveParameters}}} -\item{default_save_name}{default save name for saving, don't change, change save_name in save_param} +\item{default_save_name}{default save name for saving, don't change, +change save_name in save_param} } \value{ plot diff --git a/man/plotNicheClusterConnectivity.Rd b/man/plotNicheClusterConnectivity.Rd index b6f236496..e29c1c52b 100644 --- a/man/plotNicheClusterConnectivity.Rd +++ b/man/plotNicheClusterConnectivity.Rd @@ -13,7 +13,6 @@ plotNicheClusterConnectivity( return_plot = NULL, save_plot = NULL, save_param = list(), - theme_param = list(), default_save_name = "NicheClusterConnectivity" ) } @@ -32,9 +31,14 @@ plotNicheClusterConnectivity( \item{save_plot}{logical. save the plot} -\item{save_param}{list of saving parameters, see \code{\link{showSaveParameters}}} +\item{save_param}{list of saving parameters, see +\code{\link{showSaveParameters}}} -\item{default_save_name}{default save name for saving, don't change, change save_name in save_param} +\item{default_save_name}{default save name for saving, don't change, +change save_name in save_param} +} +\value{ +ggplot } \description{ plot niche cluster connectivity diff --git a/man/plotRankSpatvsExpr.Rd b/man/plotRankSpatvsExpr.Rd index fa91fe188..12dc92595 100644 --- a/man/plotRankSpatvsExpr.Rd +++ b/man/plotRankSpatvsExpr.Rd @@ -58,9 +58,11 @@ percentage of top spatial ranks are recovered} \item{save_plot}{logical. save the plot} -\item{save_param}{list of saving parameters, see \code{\link{showSaveParameters}}} +\item{save_param}{list of saving parameters, see +\code{\link{showSaveParameters}}} -\item{default_save_name}{default save name for saving, don't change, change save_name in save_param} +\item{default_save_name}{default save name for saving, don't change, +change save_name in save_param} } \value{ ggplot diff --git a/man/plotRecovery.Rd b/man/plotRecovery.Rd index 4cba6b66b..163c1b6d1 100644 --- a/man/plotRecovery.Rd +++ b/man/plotRecovery.Rd @@ -34,9 +34,11 @@ plotRecovery( \item{save_plot}{logical. save the plot} -\item{save_param}{list of saving parameters, see \code{\link{showSaveParameters}}} +\item{save_param}{list of saving parameters, see +\code{\link{showSaveParameters}}} -\item{default_save_name}{default save name for saving, don't change, change save_name in save_param} +\item{default_save_name}{default save name for saving, don't change, +change save_name in save_param} } \value{ ggplot diff --git a/man/plotSpatNicheClusterBin.Rd b/man/plotSpatNicheClusterBin.Rd new file mode 100644 index 000000000..426e3afd3 --- /dev/null +++ b/man/plotSpatNicheClusterBin.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ONTraC_wrapper.R +\name{plotSpatNicheClusterBin} +\alias{plotSpatNicheClusterBin} +\title{plotSpatNicheClusterBin} +\usage{ +plotSpatNicheClusterBin( + gobject, + spat_unit = "cell", + feat_type = "niche cluster", + ..., + default_save_name = "spatNicheClusterBin" +) +} +\arguments{ +\item{gobject}{giotto object} + +\item{spat_unit}{name of spatial unit niche stored cluster features} + +\item{feat_type}{name of the feature type stored binarized niche cluster} + +\item{...}{additional arguments to be passed to the spatFeatPlot2D function} + +\item{default_save_name}{default save name for saving, don't change, +change save_name in save_param} + +\item{niche_cluster_label}{name of the niche cluster label} +} +\value{ +ggplot +} +\description{ +plot spatial niche cluster binarized +} +\details{ +This function plots the spatial niche cluster binarized +} diff --git a/man/plotSpatNicheClusterProb.Rd b/man/plotSpatNicheClusterProb.Rd new file mode 100644 index 000000000..22fec4334 --- /dev/null +++ b/man/plotSpatNicheClusterProb.Rd @@ -0,0 +1,39 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ONTraC_wrapper.R +\name{plotSpatNicheClusterProb} +\alias{plotSpatNicheClusterProb} +\title{plotSpatNicheClusterProb} +\usage{ +plotSpatNicheClusterProb( + gobject, + spat_unit = "cell", + feat_type = "niche cluster", + expression_values = "prob", + ..., + default_save_name = "spatNicheClusterProb" +) +} +\arguments{ +\item{gobject}{giotto object} + +\item{spat_unit}{name of spatial unit niche stored cluster features} + +\item{feat_type}{name of the feature type stored probability matrix} + +\item{expression_values}{name of the expression matrix stored probability +values} + +\item{...}{additional arguments to be passed to the spatFeatPlot2D function} + +\item{default_save_name}{default save name for saving, don't change, +change save_name in save_param} +} +\value{ +ggplot +} +\description{ +plot spatial niche cluster probability +} +\details{ +This function plots the spatial niche cluster probability +} diff --git a/man/plot_cell_params.Rd b/man/plot_cell_params.Rd index b7371cf79..05f8f67c1 100644 --- a/man/plot_cell_params.Rd +++ b/man/plot_cell_params.Rd @@ -7,8 +7,8 @@ \item{cell_color}{character. what to color cells by (e.g. metadata col or spatial enrichment col)} -\item{color_as_factor}{logical. convert color column to factor. discrete colors -are used when this is TRUE. continuous colors when FALSE.} +\item{color_as_factor}{logical. convert color column to factor. discrete +colors are used when this is TRUE. continuous colors when FALSE.} \item{cell_color_code}{character. discrete colors to use. palette to use or named vector of colors} diff --git a/man/plot_dimred_params.Rd b/man/plot_dimred_params.Rd index 1429ab53b..0552eff17 100644 --- a/man/plot_dimred_params.Rd +++ b/man/plot_dimred_params.Rd @@ -22,7 +22,8 @@ \item{dim_point_border_col}{border color of points in dim. reduction space} -\item{dim_point_border_stroke}{border stroke of points in dim. reduction space} +\item{dim_point_border_stroke}{border stroke of points in dim. reduction +space} } \value{ plot diff --git a/man/plot_image_params.Rd b/man/plot_image_params.Rd index d80b733fa..5eed3e533 100644 --- a/man/plot_image_params.Rd +++ b/man/plot_image_params.Rd @@ -10,7 +10,8 @@ \item{image_name}{name of a giotto image or multiple images with group_by} -\item{largeImage_name}{name of a giottoLargeImage or multiple images with group_by} +\item{largeImage_name}{name of a giottoLargeImage or multiple images with +group_by} } \value{ plot diff --git a/man/plot_nn_net_params.Rd b/man/plot_nn_net_params.Rd index c3fbb93a5..6e53ca66b 100644 --- a/man/plot_nn_net_params.Rd +++ b/man/plot_nn_net_params.Rd @@ -8,9 +8,11 @@ \item{nn_network_to_use}{character. type of NN network to use (kNN vs sNN)} -\item{network_name}{character. name of NN network to use, if show_NN_network = TRUE} +\item{network_name}{character. name of NN network to use, if +show_NN_network = TRUE} -\item{nn_network_name}{character. name of NN network to use, if show_NN_network = TRUE} +\item{nn_network_name}{character. name of NN network to use, if +show_NN_network = TRUE} \item{network_color}{color of NN network} diff --git a/man/plot_output_params.Rd b/man/plot_output_params.Rd index 7f26bd1fe..4d7f5e501 100644 --- a/man/plot_output_params.Rd +++ b/man/plot_output_params.Rd @@ -10,9 +10,11 @@ \item{save_plot}{logical. save the plot} -\item{save_param}{list of saving parameters, see \code{\link{showSaveParameters}}} +\item{save_param}{list of saving parameters, see +\code{\link{showSaveParameters}}} -\item{default_save_name}{default save name for saving, don't change, change save_name in save_param} +\item{default_save_name}{default save name for saving, don't change, +change save_name in save_param} } \value{ plot diff --git a/man/plot_params.Rd b/man/plot_params.Rd index d508fbd9d..6a2a94bb1 100644 --- a/man/plot_params.Rd +++ b/man/plot_params.Rd @@ -4,7 +4,8 @@ \alias{plot_params} \title{Params documentation template: plot_params} \arguments{ -\item{group_by}{character. Create multiple plots based on cell annotation column} +\item{group_by}{character. Create multiple plots based on cell annotation +column} \item{group_by_subset}{character. subset the group_by factor column} @@ -18,7 +19,8 @@ or 'sequential' (scaled based on data range)} \item{gradient_color}{character. continuous colors to use. palette to use or vector of colors to use (minimum of 2).} -\item{select_cell_groups}{select subset of cells/clusters based on cell_color parameter} +\item{select_cell_groups}{select subset of cells/clusters based on +cell_color parameter} \item{select_cells}{select subset of cells based on cell IDs} diff --git a/man/plot_poly_params.Rd b/man/plot_poly_params.Rd index 9b99625bb..da343befe 100644 --- a/man/plot_poly_params.Rd +++ b/man/plot_poly_params.Rd @@ -12,15 +12,17 @@ \item{polygon_color}{color for polygon border} -\item{polygon_bg_color}{color for polygon background (overruled by polygon_fill)} +\item{polygon_bg_color}{color for polygon background (overruled by +polygon_fill)} -\item{polygon_fill}{character. what to color to fill polgyons by (e.g. metadata -col or spatial enrichment col)} +\item{polygon_fill}{character. what to color to fill polgyons by (e.g. +metadata col or spatial enrichment col)} -\item{polygon_fill_gradient}{polygon fill gradient colors given in order from low to high} +\item{polygon_fill_gradient}{polygon fill gradient colors given in order +from low to high} -\item{polygon_fill_gradient_midpoint}{value to set as gradient midpoint (optional). If -left as \code{NULL}, the median value detected will be chosen} +\item{polygon_fill_gradient_midpoint}{value to set as gradient midpoint +(optional). If left as \code{NULL}, the median value detected will be chosen} \item{polygon_fill_gradient_style}{either 'divergent' (midpoint is used in color scaling) or 'sequential' (scaled based on data range)} diff --git a/man/plot_spatenr_params.Rd b/man/plot_spatenr_params.Rd index 0f152db84..8e329332a 100644 --- a/man/plot_spatenr_params.Rd +++ b/man/plot_spatenr_params.Rd @@ -4,7 +4,8 @@ \alias{plot_spatenr_params} \title{Params documentation template: plot_spatenr_params} \arguments{ -\item{spat_enr_names}{character. names of spatial enrichment results to include} +\item{spat_enr_names}{character. names of spatial enrichment results to +include} } \value{ plot diff --git a/man/preprocessImageToMatrix.Rd b/man/preprocessImageToMatrix.Rd index 842cbefcb..1d564d2df 100644 --- a/man/preprocessImageToMatrix.Rd +++ b/man/preprocessImageToMatrix.Rd @@ -2,16 +2,17 @@ % Please edit documentation in R/image_registration.R \name{preprocessImageToMatrix} \alias{preprocessImageToMatrix} -\title{Preprocess from image directory to the required matrix format for Image registration pipeline built on scikit-image} +\title{Preprocess from image directory to the required matrix format for +Image registration pipeline built on scikit-image} \usage{ preprocessImageToMatrix( x, - invert = F, - equalize_histogram = T, - flip_vertical = F, - flip_horizontal = F, - rotate_90 = F, - use_single_channel = F, + invert = FALSE, + equalize_histogram = TRUE, + flip_vertical = FALSE, + flip_horizontal = FALSE, + rotate_90 = FALSE, + use_single_channel = FALSE, single_channel_number = NULL, pkg_ptr ) @@ -19,23 +20,29 @@ preprocessImageToMatrix( \arguments{ \item{x}{input file path, required} -\item{invert}{whether or not to invert intensity to make calculation of descriptors more accurate, default FALSE} +\item{invert}{whether or not to invert intensity to make calculation of +descriptors more accurate, default FALSE} -\item{equalize_histogram}{whether or not to calculate equalized histogram of the image,default TRUE} +\item{equalize_histogram}{whether or not to calculate equalized histogram of +the image,default TRUE} \item{flip_vertical}{whether or not to flip vertical, default FALSE} \item{flip_horizontal}{whether or not to flip horizontal, default FALSE} -\item{rotate_90}{whether or not to rotates the image 90 degrees counter-clockwise, default FALSE} +\item{rotate_90}{whether or not to rotates the image 90 degrees +counter-clockwise, default FALSE} -\item{use_single_channel}{If input is a multichannel image, whether or not to extract single channel, default FALSE} +\item{use_single_channel}{If input is a multichannel image, whether or not +to extract single channel, default FALSE} -\item{single_channel_number}{Channel number in the multichannel image, required if use_single_channel = TRUE} +\item{single_channel_number}{Channel number in the multichannel image, +required if use_single_channel = TRUE} } \value{ a matrix array to input to .sift_detect } \description{ -Preprocess a image path to the required matrix format for Image registration pipeline built on scikit-image +Preprocess a image path to the required matrix format for Image +registration pipeline built on scikit-image } diff --git a/man/print.combIcfObject.Rd b/man/print.combIcfObject.Rd index 7b3835f8f..0d5b2a116 100644 --- a/man/print.combIcfObject.Rd +++ b/man/print.combIcfObject.Rd @@ -11,6 +11,9 @@ print.combIcfObject(x, ...) \item{\dots}{additional params to pass (none implemented)} } +\value{ +combIcfObject +} \description{ combIcfObject print method } diff --git a/man/print.icfObject.Rd b/man/print.icfObject.Rd index c43b99eae..6bac12663 100644 --- a/man/print.icfObject.Rd +++ b/man/print.icfObject.Rd @@ -11,6 +11,9 @@ print.icfObject(x, ...) \item{\dots}{additional params to pass (none implemented)} } +\value{ +icfObject +} \description{ icfObject print method } diff --git a/man/rankEnrich.Rd b/man/rankEnrich.Rd deleted file mode 100644 index 7835c0802..000000000 --- a/man/rankEnrich.Rd +++ /dev/null @@ -1,39 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/spatial_enrichment.R -\name{rankEnrich} -\alias{rankEnrich} -\title{rankEnrich} -\usage{ -rankEnrich(...) -} -\arguments{ -\item{...}{ - Arguments passed on to \code{\link[=runRankEnrich]{runRankEnrich}} - \describe{ - \item{\code{gobject}}{Giotto object} - \item{\code{spat_unit}}{spatial unit} - \item{\code{feat_type}}{feature type} - \item{\code{sign_matrix}}{Matrix of signature genes for each cell type / process} - \item{\code{expression_values}}{expression values to use} - \item{\code{reverse_log_scale}}{reverse expression values from log scale} - \item{\code{logbase}}{log base to use if reverse_log_scale = TRUE} - \item{\code{output_enrichment}}{how to return enrichment output} - \item{\code{ties_method}}{how to handle rank ties} - \item{\code{p_value}}{calculate p-values (boolean, default = FALSE)} - \item{\code{n_times}}{number of permutations to calculate for p_value} - \item{\code{rbp_p}}{fractional binarization threshold (default = 0.99)} - \item{\code{num_agg}}{number of top genes to aggregate (default = 100)} - \item{\code{name}}{to give to spatial enrichment results, default = rank} - \item{\code{return_gobject}}{return giotto object} - }} -} -\value{ -gene enrichment -} -\description{ -Function to calculate gene signature enrichment scores per -spatial position using a rank based approach. -} -\seealso{ -\code{\link{runRankEnrich}} -} diff --git a/man/read10xAffineImage.Rd b/man/read10xAffineImage.Rd index b4681f097..d5def684c 100644 --- a/man/read10xAffineImage.Rd +++ b/man/read10xAffineImage.Rd @@ -27,6 +27,9 @@ info. A default of 0.2125 is provided.} \item{\dots}{additional params to pass to \verb{[GiottoClass::createGiottoLargeImage]}} } +\value{ +10xAffineImage +} \description{ Read a 10x image that is provided with an affine matrix transform. Loads the image in with an orientation that matches the dataset diff --git a/man/read_data_folder.Rd b/man/read_data_folder.Rd index 3dd678024..82ffbb4a2 100644 --- a/man/read_data_folder.Rd +++ b/man/read_data_folder.Rd @@ -57,9 +57,9 @@ reader functions should be built using it as a base. \itemize{ \item{1. detection of items within \code{data_dir} by looking for keywords assigned through \code{dir_items}} -\item{2. check of detected items to see if everything needed has been found. -Dictionary of necessary vs optional items for each \code{data_to_use} -\emph{workflow} is provided through \code{require_data_DT}} +\item{2. check of detected items to see if everything needed has been +found. Dictionary of necessary vs optional items for each +\code{data_to_use} \emph{workflow} is provided through \code{require_data_DT}} \item{3. if multiple filepaths are found to be matching then select the first one. This function is only intended to find the first level subdirectories and files.} diff --git a/man/reduceDims.Rd b/man/reduceDims.Rd new file mode 100644 index 000000000..7407d4783 --- /dev/null +++ b/man/reduceDims.Rd @@ -0,0 +1,38 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dimension_reduction.R +\name{reduceDims} +\alias{reduceDims} +\title{Run dimension reduction method} +\usage{ +reduceDims( + gobject, + method = c("pca", "nmf", "umap", "tsne"), + projection = FALSE, + toplevel = 1L, + ... +) +} +\arguments{ +\item{gobject}{giotto object} + +\item{method}{character. Dimension reduction method to use} + +\item{projection}{logical. Whether to run in a projection manner +(faster, but is an approximation)} + +\item{toplevel}{relative stackframe the call was made at. do not use.} + +\item{\dots}{additional params to pass to specific functions} +} +\value{ +`giotto` object with attached dimension reduction +} +\description{ +Wrapper function for Giotto dimension reduction methods for easier coding. +} +\examples{ +g <- GiottoData::loadGiottoMini("vis") +x <- reduceDims(g, "tsne", spat_unit = "cell") +x <- reduceDims(x, "umap", projection = TRUE) +x <- reduceDims(x, method = "nmf") +} diff --git a/man/reexports.Rd b/man/reexports.Rd index f5533929f..f9575eb02 100644 --- a/man/reexports.Rd +++ b/man/reexports.Rd @@ -81,7 +81,6 @@ \alias{combineFeatureData} \alias{combineFeatureOverlapData} \alias{combineMetadata} -\alias{combineSpatialCellFeatureInfo} \alias{combineSpatialCellMetadataInfo} \alias{combineToMultiPolygon} \alias{convertGiottoLargeImageToMG} @@ -298,7 +297,7 @@ These objects are imported from other packages. Follow the links below to see their documentation. \describe{ - \item{GiottoClass}{\code{\link[GiottoClass:activeFeatType-generic]{activeFeatType}}, \code{\link[GiottoClass:activeFeatType-generic]{activeFeatType<-}}, \code{\link[GiottoClass:activeSpatUnit-generic]{activeSpatUnit}}, \code{\link[GiottoClass:activeSpatUnit-generic]{activeSpatUnit<-}}, \code{\link[GiottoClass]{addCellMetadata}}, \code{\link[GiottoClass]{addFeatMetadata}}, \code{\link[GiottoClass]{addGiottoImage}}, \code{\link[GiottoClass]{addGiottoImageMG}}, \code{\link[GiottoClass]{addGiottoLargeImage}}, \code{\link[GiottoClass]{addGiottoPoints}}, \code{\link[GiottoClass:addGiottoPoints]{addGiottoPoints3D}}, \code{\link[GiottoClass]{addGiottoPolygons}}, \code{\link[GiottoClass]{addNetworkLayout}}, \code{\link[GiottoClass]{addSpatialCentroidLocations}}, \code{\link[GiottoClass]{addSpatialCentroidLocationsLayer}}, \code{\link[GiottoClass]{aggregateStacks}}, \code{\link[GiottoClass]{aggregateStacksExpression}}, \code{\link[GiottoClass]{aggregateStacksLocations}}, \code{\link[GiottoClass]{aggregateStacksPolygonOverlaps}}, \code{\link[GiottoClass]{aggregateStacksPolygons}}, \code{\link[GiottoClass]{anndataToGiotto}}, \code{\link[GiottoClass]{annotateGiotto}}, \code{\link[GiottoClass]{annotateSpatialGrid}}, \code{\link[GiottoClass]{annotateSpatialNetwork}}, \code{\link[GiottoClass]{as.points}}, \code{\link[GiottoClass]{as.polygons}}, \code{\link[GiottoClass:r_spatial_conversions]{as.sf}}, \code{\link[GiottoClass:r_spatial_conversions]{as.sp}}, \code{\link[GiottoClass:r_spatial_conversions]{as.stars}}, \code{\link[GiottoClass:r_spatial_conversions]{as.terra}}, \code{\link[GiottoClass]{calculateMetaTable}}, \code{\link[GiottoClass]{calculateMetaTableCells}}, \code{\link[GiottoClass]{calculateOverlap}}, \code{\link[GiottoClass]{calculateOverlapParallel}}, \code{\link[GiottoClass]{calculateOverlapPolygonImages}}, \code{\link[GiottoClass]{calculateOverlapRaster}}, \code{\link[GiottoClass]{calculateOverlapSerial}}, \code{\link[GiottoClass]{calculateSpatCellMetadataProportions}}, \code{\link[GiottoClass:centroids-generic]{centroids}}, \code{\link[GiottoClass]{changeGiottoInstructions}}, \code{\link[GiottoClass]{changeImageBg}}, \code{\link[GiottoClass]{checkGiottoEnvironment}}, \code{\link[GiottoClass]{circleVertices}}, \code{\link[GiottoClass]{combineCellData}}, \code{\link[GiottoClass]{combineFeatureData}}, \code{\link[GiottoClass]{combineFeatureOverlapData}}, \code{\link[GiottoClass]{combineMetadata}}, \code{\link[GiottoClass]{combineSpatialCellFeatureInfo}}, \code{\link[GiottoClass]{combineSpatialCellMetadataInfo}}, \code{\link[GiottoClass]{combineToMultiPolygon}}, \code{\link[GiottoClass]{convertGiottoLargeImageToMG}}, \code{\link[GiottoClass]{copy}}, \code{\link[GiottoClass]{createBentoAdata}}, \code{\link[GiottoClass]{createCellMetaObj}}, \code{\link[GiottoClass]{createDimObj}}, \code{\link[GiottoClass]{createExprObj}}, \code{\link[GiottoClass]{createFeatMetaObj}}, \code{\link[GiottoClass]{createGiottoImage}}, \code{\link[GiottoClass]{createGiottoInstructions}}, \code{\link[GiottoClass]{createGiottoLargeImage}}, \code{\link[GiottoClass]{createGiottoLargeImageList}}, \code{\link[GiottoClass]{createGiottoObject}}, \code{\link[GiottoClass]{createGiottoObjectSubcellular}}, \code{\link[GiottoClass]{createGiottoPoints}}, \code{\link[GiottoClass]{createGiottoPolygon}}, \code{\link[GiottoClass:createGiottoPolygon]{createGiottoPolygonsFromDfr}}, \code{\link[GiottoClass:createGiottoPolygon]{createGiottoPolygonsFromGeoJSON}}, \code{\link[GiottoClass:createGiottoPolygon]{createGiottoPolygonsFromMask}}, \code{\link[GiottoClass]{createMetafeats}}, \code{\link[GiottoClass]{createNearestNetObj}}, \code{\link[GiottoClass]{createNearestNetwork}}, \code{\link[GiottoClass]{createSpatEnrObj}}, \code{\link[GiottoClass]{createSpatialDefaultGrid}}, \code{\link[GiottoClass]{createSpatialDelaunayNetwork}}, \code{\link[GiottoClass]{createSpatialFeaturesKNNnetwork}}, \code{\link[GiottoClass]{createSpatialGrid}}, \code{\link[GiottoClass]{createSpatialKNNnetwork}}, \code{\link[GiottoClass]{createSpatialNetwork}}, \code{\link[GiottoClass]{createSpatialWeightMatrix}}, \code{\link[GiottoClass]{createSpatLocsObj}}, \code{\link[GiottoClass]{createSpatNetObj}}, \code{\link[GiottoClass]{crop}}, \code{\link[GiottoClass]{cropGiottoLargeImage}}, \code{\link[GiottoClass]{density}}, \code{\link[GiottoClass]{distGiottoImage}}, \code{\link[GiottoClass]{estimateImageBg}}, \code{\link[GiottoClass]{ext}}, \code{\link[GiottoClass:ext]{ext<-}}, \code{\link[GiottoClass]{fDataDT}}, \code{\link[GiottoClass:spatIDs-generic]{featIDs}}, \code{\link[GiottoClass:featType-generic]{featType}}, \code{\link[GiottoClass:featType-generic]{featType<-}}, \code{\link[GiottoClass:featureNetwork-class]{featureNetwork}}, \code{\link[GiottoClass]{flip}}, \code{\link[GiottoClass]{gefToGiotto}}, \code{\link[GiottoClass]{getCellMetadata}}, \code{\link[GiottoClass]{getDimReduction}}, \code{\link[GiottoClass]{getExpression}}, \code{\link[GiottoClass]{getFeatureInfo}}, \code{\link[GiottoClass]{getFeatureMetadata}}, \code{\link[GiottoClass]{getGiottoImage}}, \code{\link[GiottoClass]{getMultiomics}}, \code{\link[GiottoClass]{getNearestNetwork}}, \code{\link[GiottoClass]{getPolygonInfo}}, \code{\link[GiottoClass]{getSpatialEnrichment}}, \code{\link[GiottoClass]{getSpatialEnrichment}}, \code{\link[GiottoClass]{getSpatialGrid}}, \code{\link[GiottoClass]{getSpatialGrid}}, \code{\link[GiottoClass]{getSpatialLocations}}, \code{\link[GiottoClass]{getSpatialLocations}}, \code{\link[GiottoClass]{getSpatialNetwork}}, \code{\link[GiottoClass]{getSpatialNetwork}}, \code{\link[GiottoClass:giotto-class]{giotto}}, \code{\link[GiottoClass:giottoImage-class]{giottoImage}}, \code{\link[GiottoClass:giottoLargeImage-class]{giottoLargeImage}}, \code{\link[GiottoClass]{giottoMasterToSuite}}, \code{\link[GiottoClass:giottoPoints-class]{giottoPoints}}, \code{\link[GiottoClass:giottoPolygon-class]{giottoPolygon}}, \code{\link[GiottoClass]{giottoToAnnData}}, \code{\link[GiottoClass]{giottoToSeurat}}, \code{\link[GiottoClass]{giottoToSeuratV4}}, \code{\link[GiottoClass]{giottoToSeuratV5}}, \code{\link[GiottoClass]{giottoToSpatialExperiment}}, \code{\link[GiottoClass]{hexVertices}}, \code{\link[GiottoClass]{hist}}, \code{\link[GiottoClass]{installGiottoEnvironment}}, \code{\link[GiottoClass:instructions-generic]{instructions}}, \code{\link[GiottoClass:instructions-generic]{instructions<-}}, \code{\link[GiottoClass]{joinGiottoObjects}}, \code{\link[GiottoClass]{loadGiotto}}, \code{\link[GiottoClass]{makePseudoVisium}}, \code{\link[GiottoClass]{objHistory}}, \code{\link[GiottoClass:objName-generic]{objName}}, \code{\link[GiottoClass:objName-generic]{objName<-}}, \code{\link[GiottoClass:generate_grid]{orthoGrid}}, \code{\link[GiottoClass]{overlapImagesToMatrix}}, \code{\link[GiottoClass:overlaps-generic]{overlaps}}, \code{\link[GiottoClass]{overlapToMatrix}}, \code{\link[GiottoClass]{overlapToMatrixMultiPoly}}, \code{\link[GiottoClass]{pDataDT}}, \code{\link[GiottoClass]{plotGiottoImage}}, \code{\link[GiottoClass]{polyStamp}}, \code{\link[GiottoClass:prov-generic]{prov}}, \code{\link[GiottoClass:prov-generic]{prov<-}}, \code{\link[GiottoClass]{readCellMetadata}}, \code{\link[GiottoClass]{readDimReducData}}, \code{\link[GiottoClass]{readExprData}}, \code{\link[GiottoClass]{readExprMatrix}}, \code{\link[GiottoClass]{readFeatData}}, \code{\link[GiottoClass]{readFeatMetadata}}, \code{\link[GiottoClass]{readGiottoInstructions}}, \code{\link[GiottoClass]{readNearestNetData}}, \code{\link[GiottoClass]{readPolygonData}}, \code{\link[GiottoClass]{readSpatEnrichData}}, \code{\link[GiottoClass]{readSpatLocsData}}, \code{\link[GiottoClass]{readSpatNetData}}, \code{\link[GiottoClass]{reconnectGiottoImage}}, \code{\link[GiottoClass]{rectVertices}}, \code{\link[GiottoClass]{removeCellAnnotation}}, \code{\link[GiottoClass]{removeFeatAnnotation}}, \code{\link[GiottoClass]{removeGiottoEnvironment}}, \code{\link[GiottoClass]{replaceGiottoInstructions}}, \code{\link[GiottoClass]{rescale}}, \code{\link[GiottoClass]{rescalePolygons}}, \code{\link[GiottoClass]{saveGiotto}}, \code{\link[GiottoClass]{setCellMetadata}}, \code{\link[GiottoClass]{setDimReduction}}, \code{\link[GiottoClass]{setExpression}}, \code{\link[GiottoClass]{setFeatureInfo}}, \code{\link[GiottoClass]{setFeatureMetadata}}, \code{\link[GiottoClass]{setGiotto}}, \code{\link[GiottoClass]{setGiottoImage}}, \code{\link[GiottoClass]{setMultiomics}}, \code{\link[GiottoClass]{setNearestNetwork}}, \code{\link[GiottoClass]{setPolygonInfo}}, \code{\link[GiottoClass]{setSpatialEnrichment}}, \code{\link[GiottoClass]{setSpatialGrid}}, \code{\link[GiottoClass]{setSpatialLocations}}, \code{\link[GiottoClass]{setSpatialNetwork}}, \code{\link[GiottoClass]{seuratToGiotto}}, \code{\link[GiottoClass]{seuratToGiottoV4}}, \code{\link[GiottoClass]{seuratToGiottoV5}}, \code{\link[GiottoClass]{showGiottoCellMetadata}}, \code{\link[GiottoClass]{showGiottoDimRed}}, \code{\link[GiottoClass]{showGiottoExpression}}, \code{\link[GiottoClass]{showGiottoFeatInfo}}, \code{\link[GiottoClass]{showGiottoFeatMetadata}}, \code{\link[GiottoClass]{showGiottoImageNames}}, \code{\link[GiottoClass]{showGiottoInstructions}}, \code{\link[GiottoClass]{showGiottoNearestNetworks}}, \code{\link[GiottoClass]{showGiottoSpatEnrichments}}, \code{\link[GiottoClass]{showGiottoSpatGrids}}, \code{\link[GiottoClass]{showGiottoSpatialInfo}}, \code{\link[GiottoClass]{showGiottoSpatLocs}}, \code{\link[GiottoClass]{showGiottoSpatNetworks}}, \code{\link[GiottoClass]{showProcessingSteps}}, \code{\link[GiottoClass]{smoothGiottoPolygons}}, \code{\link[GiottoClass]{spatialExperimentToGiotto}}, \code{\link[GiottoClass:spatIDs-generic]{spatIDs}}, \code{\link[GiottoClass]{spatQueryGiottoPolygons}}, \code{\link[GiottoClass]{spatShift}}, \code{\link[GiottoClass:spatUnit-generic]{spatUnit}}, \code{\link[GiottoClass:spatUnit-generic]{spatUnit<-}}, \code{\link[GiottoClass]{spin}}, \code{\link[GiottoClass]{stitchFieldCoordinates}}, \code{\link[GiottoClass]{stitchFieldCoordinates}}, \code{\link[GiottoClass]{stitchGiottoLargeImage}}, \code{\link[GiottoClass]{subsetGiotto}}, \code{\link[GiottoClass]{subsetGiottoLocs}}, \code{\link[GiottoClass]{subsetGiottoLocsMulti}}, \code{\link[GiottoClass]{subsetGiottoLocsSubcellular}}, \code{\link[GiottoClass]{tessellate}}, \code{\link[GiottoClass:generate_grid]{triGrid}}, \code{\link[GiottoClass]{updateGiottoImage}}, \code{\link[GiottoClass]{updateGiottoImageMG}}, \code{\link[GiottoClass]{updateGiottoLargeImage}}, \code{\link[GiottoClass]{updateGiottoObject}}, \code{\link[GiottoClass]{updateGiottoPointsObject}}, \code{\link[GiottoClass]{updateGiottoPolygonObject}}, \code{\link[GiottoClass:wrap]{vect}}, \code{\link[GiottoClass]{wrap}}, \code{\link[GiottoClass]{writeGiottoLargeImage}}} + \item{GiottoClass}{\code{\link[GiottoClass:activeFeatType-generic]{activeFeatType}}, \code{\link[GiottoClass:activeFeatType-generic]{activeFeatType<-}}, \code{\link[GiottoClass:activeSpatUnit-generic]{activeSpatUnit}}, \code{\link[GiottoClass:activeSpatUnit-generic]{activeSpatUnit<-}}, \code{\link[GiottoClass]{addCellMetadata}}, \code{\link[GiottoClass]{addFeatMetadata}}, \code{\link[GiottoClass]{addGiottoImage}}, \code{\link[GiottoClass]{addGiottoImageMG}}, \code{\link[GiottoClass]{addGiottoLargeImage}}, \code{\link[GiottoClass]{addGiottoPoints}}, \code{\link[GiottoClass:addGiottoPoints]{addGiottoPoints3D}}, \code{\link[GiottoClass]{addGiottoPolygons}}, \code{\link[GiottoClass]{addNetworkLayout}}, \code{\link[GiottoClass]{addSpatialCentroidLocations}}, \code{\link[GiottoClass]{addSpatialCentroidLocationsLayer}}, \code{\link[GiottoClass]{aggregateStacks}}, \code{\link[GiottoClass]{aggregateStacksExpression}}, \code{\link[GiottoClass]{aggregateStacksLocations}}, \code{\link[GiottoClass]{aggregateStacksPolygonOverlaps}}, \code{\link[GiottoClass]{aggregateStacksPolygons}}, \code{\link[GiottoClass]{anndataToGiotto}}, \code{\link[GiottoClass]{annotateGiotto}}, \code{\link[GiottoClass]{annotateSpatialGrid}}, \code{\link[GiottoClass]{annotateSpatialNetwork}}, \code{\link[GiottoClass]{as.points}}, \code{\link[GiottoClass]{as.polygons}}, \code{\link[GiottoClass:r_spatial_conversions]{as.sf}}, \code{\link[GiottoClass:r_spatial_conversions]{as.sp}}, \code{\link[GiottoClass:r_spatial_conversions]{as.stars}}, \code{\link[GiottoClass:r_spatial_conversions]{as.terra}}, \code{\link[GiottoClass]{calculateMetaTable}}, \code{\link[GiottoClass]{calculateMetaTableCells}}, \code{\link[GiottoClass]{calculateOverlap}}, \code{\link[GiottoClass]{calculateOverlapParallel}}, \code{\link[GiottoClass]{calculateOverlapPolygonImages}}, \code{\link[GiottoClass]{calculateOverlapRaster}}, \code{\link[GiottoClass]{calculateOverlapSerial}}, \code{\link[GiottoClass]{calculateSpatCellMetadataProportions}}, \code{\link[GiottoClass:centroids-generic]{centroids}}, \code{\link[GiottoClass]{changeGiottoInstructions}}, \code{\link[GiottoClass]{changeImageBg}}, \code{\link[GiottoClass:giotto_python]{checkGiottoEnvironment}}, \code{\link[GiottoClass]{circleVertices}}, \code{\link[GiottoClass]{combineCellData}}, \code{\link[GiottoClass]{combineFeatureData}}, \code{\link[GiottoClass]{combineFeatureOverlapData}}, \code{\link[GiottoClass]{combineMetadata}}, \code{\link[GiottoClass]{combineSpatialCellMetadataInfo}}, \code{\link[GiottoClass]{combineToMultiPolygon}}, \code{\link[GiottoClass]{convertGiottoLargeImageToMG}}, \code{\link[GiottoClass]{copy}}, \code{\link[GiottoClass]{createBentoAdata}}, \code{\link[GiottoClass]{createCellMetaObj}}, \code{\link[GiottoClass]{createDimObj}}, \code{\link[GiottoClass]{createExprObj}}, \code{\link[GiottoClass]{createFeatMetaObj}}, \code{\link[GiottoClass]{createGiottoImage}}, \code{\link[GiottoClass:giotto_instructions]{createGiottoInstructions}}, \code{\link[GiottoClass]{createGiottoLargeImage}}, \code{\link[GiottoClass]{createGiottoLargeImageList}}, \code{\link[GiottoClass]{createGiottoObject}}, \code{\link[GiottoClass]{createGiottoObjectSubcellular}}, \code{\link[GiottoClass]{createGiottoPoints}}, \code{\link[GiottoClass]{createGiottoPolygon}}, \code{\link[GiottoClass:createGiottoPolygon]{createGiottoPolygonsFromDfr}}, \code{\link[GiottoClass:createGiottoPolygon]{createGiottoPolygonsFromGeoJSON}}, \code{\link[GiottoClass:createGiottoPolygon]{createGiottoPolygonsFromMask}}, \code{\link[GiottoClass]{createMetafeats}}, \code{\link[GiottoClass]{createNearestNetObj}}, \code{\link[GiottoClass]{createNearestNetwork}}, \code{\link[GiottoClass]{createSpatEnrObj}}, \code{\link[GiottoClass]{createSpatialDefaultGrid}}, \code{\link[GiottoClass]{createSpatialDelaunayNetwork}}, \code{\link[GiottoClass]{createSpatialFeaturesKNNnetwork}}, \code{\link[GiottoClass]{createSpatialGrid}}, \code{\link[GiottoClass]{createSpatialKNNnetwork}}, \code{\link[GiottoClass]{createSpatialNetwork}}, \code{\link[GiottoClass]{createSpatialWeightMatrix}}, \code{\link[GiottoClass]{createSpatLocsObj}}, \code{\link[GiottoClass]{createSpatNetObj}}, \code{\link[GiottoClass]{crop}}, \code{\link[GiottoClass]{cropGiottoLargeImage}}, \code{\link[GiottoClass]{density}}, \code{\link[GiottoClass]{distGiottoImage}}, \code{\link[GiottoClass]{estimateImageBg}}, \code{\link[GiottoClass]{ext}}, \code{\link[GiottoClass:ext]{ext<-}}, \code{\link[GiottoClass]{fDataDT}}, \code{\link[GiottoClass:spatIDs-generic]{featIDs}}, \code{\link[GiottoClass:featType-generic]{featType}}, \code{\link[GiottoClass:featType-generic]{featType<-}}, \code{\link[GiottoClass:featureNetwork-class]{featureNetwork}}, \code{\link[GiottoClass]{flip}}, \code{\link[GiottoClass]{gefToGiotto}}, \code{\link[GiottoClass]{getCellMetadata}}, \code{\link[GiottoClass]{getDimReduction}}, \code{\link[GiottoClass]{getExpression}}, \code{\link[GiottoClass]{getFeatureInfo}}, \code{\link[GiottoClass]{getFeatureMetadata}}, \code{\link[GiottoClass]{getGiottoImage}}, \code{\link[GiottoClass]{getMultiomics}}, \code{\link[GiottoClass]{getNearestNetwork}}, \code{\link[GiottoClass]{getPolygonInfo}}, \code{\link[GiottoClass]{getSpatialEnrichment}}, \code{\link[GiottoClass]{getSpatialEnrichment}}, \code{\link[GiottoClass]{getSpatialGrid}}, \code{\link[GiottoClass]{getSpatialGrid}}, \code{\link[GiottoClass]{getSpatialLocations}}, \code{\link[GiottoClass]{getSpatialLocations}}, \code{\link[GiottoClass]{getSpatialNetwork}}, \code{\link[GiottoClass]{getSpatialNetwork}}, \code{\link[GiottoClass:giotto-class]{giotto}}, \code{\link[GiottoClass:giottoImage-class]{giottoImage}}, \code{\link[GiottoClass:giottoLargeImage-class]{giottoLargeImage}}, \code{\link[GiottoClass]{giottoMasterToSuite}}, \code{\link[GiottoClass:giottoPoints-class]{giottoPoints}}, \code{\link[GiottoClass:giottoPolygon-class]{giottoPolygon}}, \code{\link[GiottoClass]{giottoToAnnData}}, \code{\link[GiottoClass]{giottoToSeurat}}, \code{\link[GiottoClass]{giottoToSeuratV4}}, \code{\link[GiottoClass]{giottoToSeuratV5}}, \code{\link[GiottoClass]{giottoToSpatialExperiment}}, \code{\link[GiottoClass]{hexVertices}}, \code{\link[GiottoClass]{hist}}, \code{\link[GiottoClass:giotto_python]{installGiottoEnvironment}}, \code{\link[GiottoClass:giotto_instructions]{instructions}}, \code{\link[GiottoClass:giotto_instructions]{instructions<-}}, \code{\link[GiottoClass]{joinGiottoObjects}}, \code{\link[GiottoClass]{loadGiotto}}, \code{\link[GiottoClass]{makePseudoVisium}}, \code{\link[GiottoClass]{objHistory}}, \code{\link[GiottoClass:objName-generic]{objName}}, \code{\link[GiottoClass:objName-generic]{objName<-}}, \code{\link[GiottoClass:generate_grid]{orthoGrid}}, \code{\link[GiottoClass]{overlapImagesToMatrix}}, \code{\link[GiottoClass:overlaps-generic]{overlaps}}, \code{\link[GiottoClass]{overlapToMatrix}}, \code{\link[GiottoClass]{overlapToMatrixMultiPoly}}, \code{\link[GiottoClass]{pDataDT}}, \code{\link[GiottoClass]{plotGiottoImage}}, \code{\link[GiottoClass]{polyStamp}}, \code{\link[GiottoClass:prov-generic]{prov}}, \code{\link[GiottoClass:prov-generic]{prov<-}}, \code{\link[GiottoClass]{readCellMetadata}}, \code{\link[GiottoClass]{readDimReducData}}, \code{\link[GiottoClass]{readExprData}}, \code{\link[GiottoClass]{readExprMatrix}}, \code{\link[GiottoClass]{readFeatData}}, \code{\link[GiottoClass]{readFeatMetadata}}, \code{\link[GiottoClass]{readGiottoInstructions}}, \code{\link[GiottoClass]{readNearestNetData}}, \code{\link[GiottoClass]{readPolygonData}}, \code{\link[GiottoClass]{readSpatEnrichData}}, \code{\link[GiottoClass]{readSpatLocsData}}, \code{\link[GiottoClass]{readSpatNetData}}, \code{\link[GiottoClass]{reconnectGiottoImage}}, \code{\link[GiottoClass]{rectVertices}}, \code{\link[GiottoClass]{removeCellAnnotation}}, \code{\link[GiottoClass]{removeFeatAnnotation}}, \code{\link[GiottoClass:giotto_python]{removeGiottoEnvironment}}, \code{\link[GiottoClass]{replaceGiottoInstructions}}, \code{\link[GiottoClass]{rescale}}, \code{\link[GiottoClass]{rescalePolygons}}, \code{\link[GiottoClass]{saveGiotto}}, \code{\link[GiottoClass]{setCellMetadata}}, \code{\link[GiottoClass]{setDimReduction}}, \code{\link[GiottoClass]{setExpression}}, \code{\link[GiottoClass]{setFeatureInfo}}, \code{\link[GiottoClass]{setFeatureMetadata}}, \code{\link[GiottoClass]{setGiotto}}, \code{\link[GiottoClass]{setGiottoImage}}, \code{\link[GiottoClass]{setMultiomics}}, \code{\link[GiottoClass]{setNearestNetwork}}, \code{\link[GiottoClass]{setPolygonInfo}}, \code{\link[GiottoClass]{setSpatialEnrichment}}, \code{\link[GiottoClass]{setSpatialGrid}}, \code{\link[GiottoClass]{setSpatialLocations}}, \code{\link[GiottoClass]{setSpatialNetwork}}, \code{\link[GiottoClass]{seuratToGiotto}}, \code{\link[GiottoClass]{seuratToGiottoV4}}, \code{\link[GiottoClass]{seuratToGiottoV5}}, \code{\link[GiottoClass]{showGiottoCellMetadata}}, \code{\link[GiottoClass]{showGiottoDimRed}}, \code{\link[GiottoClass]{showGiottoExpression}}, \code{\link[GiottoClass]{showGiottoFeatInfo}}, \code{\link[GiottoClass]{showGiottoFeatMetadata}}, \code{\link[GiottoClass]{showGiottoImageNames}}, \code{\link[GiottoClass]{showGiottoInstructions}}, \code{\link[GiottoClass]{showGiottoNearestNetworks}}, \code{\link[GiottoClass]{showGiottoSpatEnrichments}}, \code{\link[GiottoClass]{showGiottoSpatGrids}}, \code{\link[GiottoClass]{showGiottoSpatialInfo}}, \code{\link[GiottoClass]{showGiottoSpatLocs}}, \code{\link[GiottoClass]{showGiottoSpatNetworks}}, \code{\link[GiottoClass]{showProcessingSteps}}, \code{\link[GiottoClass]{smoothGiottoPolygons}}, \code{\link[GiottoClass]{spatialExperimentToGiotto}}, \code{\link[GiottoClass:spatIDs-generic]{spatIDs}}, \code{\link[GiottoClass]{spatQueryGiottoPolygons}}, \code{\link[GiottoClass]{spatShift}}, \code{\link[GiottoClass:spatUnit-generic]{spatUnit}}, \code{\link[GiottoClass:spatUnit-generic]{spatUnit<-}}, \code{\link[GiottoClass]{spin}}, \code{\link[GiottoClass]{stitchFieldCoordinates}}, \code{\link[GiottoClass]{stitchFieldCoordinates}}, \code{\link[GiottoClass]{stitchGiottoLargeImage}}, \code{\link[GiottoClass]{subsetGiotto}}, \code{\link[GiottoClass]{subsetGiottoLocs}}, \code{\link[GiottoClass]{subsetGiottoLocsMulti}}, \code{\link[GiottoClass]{subsetGiottoLocsSubcellular}}, \code{\link[GiottoClass]{tessellate}}, \code{\link[GiottoClass:generate_grid]{triGrid}}, \code{\link[GiottoClass]{updateGiottoImage}}, \code{\link[GiottoClass]{updateGiottoImageMG}}, \code{\link[GiottoClass]{updateGiottoLargeImage}}, \code{\link[GiottoClass]{updateGiottoObject}}, \code{\link[GiottoClass]{updateGiottoPointsObject}}, \code{\link[GiottoClass]{updateGiottoPolygonObject}}, \code{\link[GiottoClass:wrap]{vect}}, \code{\link[GiottoClass]{wrap}}, \code{\link[GiottoClass]{writeGiottoLargeImage}}} \item{GiottoUtils}{\code{\link[GiottoUtils:pipe]{\%>\%}}, \code{\link[GiottoUtils]{getDistinctColors}}, \code{\link[GiottoUtils]{getRainbowColors}}} diff --git a/man/registerGiottoObjectList.Rd b/man/registerGiottoObjectList.Rd index 46eeeab63..1c057806b 100644 --- a/man/registerGiottoObjectList.Rd +++ b/man/registerGiottoObjectList.Rd @@ -29,10 +29,10 @@ registerGiottoObjectList( \item{method}{Method used to align gobjects. Current options are either using FIJI register_virtual_stack_slices output or rvision} -\item{image_unreg}{Gobject image slot to use. Defaults to 'image' (optional)} +\item{image_unreg}{Gobject image slot to use. Defaults to "image" (optional)} \item{image_reg_name}{Arbitrary image slot name for registered images to -occupy. Defaults to replacement of 'image' slot (optional)} +occupy. Defaults to replacement of "image" slot (optional)} \item{image_list}{RVISION - under construction} @@ -42,7 +42,7 @@ occupy. Defaults to replacement of 'image' slot (optional)} 'raw' slot (optional)} \item{spatloc_reg_name}{Arbitrary name for registered spatial locations. -Defaults to replacement of 'raw' slot (optional)} +Defaults to replacement of "raw" slot (optional)} \item{fiji_xml_files}{Filepaths to FIJI registration XML outputs} diff --git a/man/registerGiottoObjectListFiji.Rd b/man/registerGiottoObjectListFiji.Rd index 3b7cf7ee6..4bd7a2a66 100644 --- a/man/registerGiottoObjectListFiji.Rd +++ b/man/registerGiottoObjectListFiji.Rd @@ -25,10 +25,10 @@ registerGiottoObjectListFiji( \item{spat_unit}{spatial unit} \item{image_unreg}{name of original unregistered images. Defaults to -'image' (optional)} +"image" (optional)} \item{image_reg_name}{arbitrary name for registered images to occupy. -Defaults to replacement of 'image' (optional)} +Defaults to replacement of "image" (optional)} \item{image_replace_name}{arbitrary name for any images replaced due to image_reg_name argument (optional)} @@ -36,10 +36,10 @@ image_reg_name argument (optional)} \item{registered_images}{registered images output by FIJI register_virtual_stack_slices} -\item{spatloc_unreg}{spatial locations to use. Defaults to 'raw' (optional)} +\item{spatloc_unreg}{spatial locations to use. Defaults to "raw" (optional)} \item{spatloc_reg_name}{name for registered spatial locations. Defaults to -replacement of 'raw' (optional)} +replacement of "raw" (optional)} \item{spatloc_replace_name}{arbitrary name for any spatial locations replaced due to spatloc_reg_name argument (optional)} diff --git a/man/runGiottoHarmony.Rd b/man/runGiottoHarmony.Rd index 35348339b..12ad97de5 100644 --- a/man/runGiottoHarmony.Rd +++ b/man/runGiottoHarmony.Rd @@ -9,17 +9,15 @@ runGiottoHarmony( spat_unit = NULL, feat_type = NULL, vars_use = "list_ID", - do_pca = FALSE, - expression_values = c("normalized", "scaled", "custom"), reduction = "cells", dim_reduction_to_use = "pca", dim_reduction_name = NULL, dimensions_to_use = 1:10, name = NULL, - feats_to_use = NULL, set_seed = TRUE, seed_number = 1234, - toplevel_params = 2, + toplevel_params = deprecated(), + toplevel = 1L, return_gobject = TRUE, verbose = NULL, ... @@ -32,12 +30,8 @@ runGiottoHarmony( \item{feat_type}{feature type} -\item{vars_use}{If meta_data is dataframe, this defines which variable(s) to -remove (character vector).} - -\item{do_pca}{Whether to perform PCA on input matrix.} - -\item{expression_values}{expression values to use} +\item{vars_use}{character vector. Which variable(s) in metadata +for harmony to remove} \item{reduction}{reduction on cells or features} @@ -49,13 +43,13 @@ remove (character vector).} \item{name}{arbitrary name for Harmony run} -\item{feats_to_use}{if dim_reduction_to_use = NULL, which feats to use} - \item{set_seed}{use of seed} \item{seed_number}{seed number to use} -\item{toplevel_params}{parameters to extract} +\item{toplevel_params}{deprecated} + +\item{toplevel}{relative stackframe where call was made from} \item{return_gobject}{boolean: return giotto object (default = TRUE)} diff --git a/man/runIntegratedUMAP.Rd b/man/runIntegratedUMAP.Rd index 98a1eec50..b764a0c7b 100644 --- a/man/runIntegratedUMAP.Rd +++ b/man/runIntegratedUMAP.Rd @@ -7,8 +7,7 @@ runIntegratedUMAP( gobject, spat_unit = "cell", - modality1 = "rna", - modality2 = "protein", + feat_types = c("rna", "protein"), integrated_feat_type = NULL, integration_method = "WNN", matrix_result_name = "theta_weighted_matrix", @@ -24,9 +23,7 @@ runIntegratedUMAP( \item{spat_unit}{spatial unit} -\item{modality1}{modality 1 name. Default = "rna"} - -\item{modality2}{modality 2 name. Default = "protein"} +\item{feat_types}{feature types to integrate. Default = c("rna", "protein")} \item{integrated_feat_type}{integrated feature type (e.g. 'rna_protein')} diff --git a/man/runNMF.Rd b/man/runNMF.Rd new file mode 100644 index 000000000..a33adfa3b --- /dev/null +++ b/man/runNMF.Rd @@ -0,0 +1,87 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dimension_reduction.R +\name{runNMF} +\alias{runNMF} +\title{Run Non-Negative Matrix Factorization} +\usage{ +runNMF( + gobject, + spat_unit = NULL, + feat_type = NULL, + expression_values = c("normalized", "scaled", "custom"), + reduction = c("cells", "feats"), + name = NULL, + feats_to_use = "hvf", + return_gobject = TRUE, + scale_unit = TRUE, + k = 20, + method = c("rcppml"), + rev = FALSE, + set_seed = TRUE, + seed_number = 1234, + verbose = TRUE, + toplevel = 1L, + ... +) +} +\arguments{ +\item{gobject}{giotto object} + +\item{spat_unit}{spatial unit (e.g. "cell")} + +\item{feat_type}{feature type (e.g. "rna", "dna", "protein")} + +\item{expression_values}{expression values to use} + +\item{reduction}{"cells" or "feats"} + +\item{name}{arbitrary name for NMF run} + +\item{feats_to_use}{subset of features to use for NMF} + +\item{return_gobject}{boolean: return giotto object (default = TRUE)} + +\item{scale_unit}{scale features before NMF (default = TRUE)} + +\item{k}{NMF rank (number of components to decompose into). Default is 20} + +\item{method}{which implementation to use (only rcppml right now)} + +\item{rev}{do a reverse NMF} + +\item{set_seed}{use of seed} + +\item{seed_number}{seed number to use} + +\item{verbose}{verbosity of the function} + +\item{toplevel}{relative stackframe where call was made} + +\item{...}{additional parameters for NMF (see details)} +} +\value{ +giotto object with updated NMF dimension reduction +} +\description{ +Use NMF to perform dimension reduction. +} +\details{ +See \code{\link[RcppML]{nmf}} for more information about other parameters. +} +\examples{ +g <- GiottoData::loadGiottoMini("visium") +x <- runNMF(g, k = 20) +x <- runUMAP(x, + dim_reduction_to_use = "nmf", + dimensions_to_use = 1:20, + name = "nmf_umap" +) +x <- createNearestNetwork(x, + dim_reduction_to_use = "nmf", + dim_reduction_name = "nmf", + dimensions_to_use = 1:20 +) +x <- doLeidenCluster(x, name = "nmf_leiden", network_name = "sNN.nmf") +plotUMAP(x, dim_reduction_name = "nmf_umap", cell_color = "nmf_leiden") +spatPlot2D(x, cell_color = "nmf_leiden") +} diff --git a/man/runONTraCV1.Rd b/man/runONTraCV1.Rd new file mode 100644 index 000000000..6e593d832 --- /dev/null +++ b/man/runONTraCV1.Rd @@ -0,0 +1,104 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ONTraC_wrapper.R +\name{runONTraCV1} +\alias{runONTraCV1} +\title{runONTraCV1} +\usage{ +runONTraCV1( + ONTraC_input, + dataset, + preprocessing_dir, + GNN_dir, + NTScore_dir, + n_cpu = 4L, + n_neighbors = 50L, + n_local = 20L, + device = c("cpu", "cuda"), + epochs = 1000L, + patience = 100L, + min_delta = 0.001, + min_epochs = 50L, + batch_size = 0L, + seed = 42L, + lr = 0.03, + hidden_feats = 4L, + k = 6L, + modularity_loss_weight = 0.3, + purity_loss_weight = 300, + regularization_loss_weight = 0.1, + beta = 0.03, + python_path = "giotto_ontrac_env" +) +} +\arguments{ +\item{dataset}{the path to the input data file} + +\item{preprocessing_dir}{the directory to save the preprocessing results} + +\item{GNN_dir}{the directory to save the GNN results} + +\item{NTScore_dir}{the directory to save the NTScore results} + +\item{n_cpu}{the number of CPUs used for niche network constructing. Default +is 4L} + +\item{n_neighbors}{the number of neighbors used for ONTraC in niche network +construction. Default is 50L} + +\item{n_local}{the index of local neighbor used for ONTraC in niche network +construction for normalization. Default is 20L} + +\item{device}{the device used for ONTraC running GNN model. Default is "cpu"} + +\item{epochs}{the maximum number of epochs for model training. Default is +1000L} + +\item{patience}{the number of epochs wait for better result. Default is 100L} + +\item{min_delta}{the minimum change of loss to be considered as improvement. +Default is 0.001} + +\item{min_epochs}{the minimum number of epochs to train. Default is 50L} + +\item{batch_size}{the batch size for training. Default is 0L for whole +dataset} + +\item{seed}{the random seed for reproducibility. Default is 42L} + +\item{lr}{the learning rate for training. Default is 0.03} + +\item{hidden_feats}{the number of hidden features for GNN model. Default is +4L} + +\item{k}{the number of neighbors for GNN model. Default is 6L} + +\item{modularity_loss_weight}{the weight of modularity loss. Default is 0.3} + +\item{purity_loss_weight}{the weight of purity loss. Default is 300.0} + +\item{regularization_loss_weight}{the weight of regularization loss. Default +is 0.1} + +\item{beta}{the weight of entropy loss. Default is 0.03} + +\item{python_path, }{path to python executable within a conda/miniconda +environment. Default is "giotto_ontrac_env"} +} +\value{ +none +} +\description{ +run ONTraC +} +\details{ +This function runs ONTraC +} +\examples{ +runONTraCV1( + dataset = "ONTraC_dataset_input.csv", + preprocessing_dir = "preprocessing_dir", + GNN_dir = "GNN_dir", + NTScore_dir = "NTScore_dir", + envname = "giotto_ontrac_env" +) +} diff --git a/man/runPAGEEnrich.Rd b/man/runPAGEEnrich.Rd deleted file mode 100644 index 8d1ec1818..000000000 --- a/man/runPAGEEnrich.Rd +++ /dev/null @@ -1,100 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/spatial_enrichment.R -\name{runPAGEEnrich} -\alias{runPAGEEnrich} -\title{runPAGEEnrich} -\usage{ -runPAGEEnrich( - gobject, - spat_unit = NULL, - feat_type = NULL, - sign_matrix, - expression_values = c("normalized", "scaled", "custom"), - min_overlap_genes = 5, - reverse_log_scale = TRUE, - logbase = 2, - output_enrichment = c("original", "zscore"), - p_value = FALSE, - include_depletion = FALSE, - n_times = 1000, - max_block = 2e+07, - name = NULL, - verbose = TRUE, - return_gobject = TRUE -) -} -\arguments{ -\item{gobject}{Giotto object} - -\item{spat_unit}{spatial unit} - -\item{feat_type}{feature type} - -\item{sign_matrix}{Matrix of signature genes for each cell type / process} - -\item{expression_values}{expression values to use} - -\item{min_overlap_genes}{minimum number of overlapping genes in sign_matrix -required to calculate enrichment} - -\item{reverse_log_scale}{reverse expression values from log scale} - -\item{logbase}{log base to use if reverse_log_scale = TRUE} - -\item{output_enrichment}{how to return enrichment output} - -\item{p_value}{calculate p-values (boolean, default = FALSE)} - -\item{include_depletion}{calculate both enrichment and depletion} - -\item{n_times}{number of permutations to calculate for p_value} - -\item{max_block}{number of lines to process together (default = 20e6)} - -\item{name}{to give to spatial enrichment results, default = PAGE} - -\item{verbose}{be verbose} - -\item{return_gobject}{return giotto object} -} -\value{ -data.table with enrichment results -} -\description{ -Function to calculate gene signature enrichment scores per -spatial position using PAGE. -} -\details{ -sign_matrix: a binary matrix with genes as row names and cell-types as -column names. -Alternatively a list of signature genes can be provided to -makeSignMatrixPAGE, which will create the matrix for you. \cr - -The enrichment Z score is calculated by using method (PAGE) from -Kim SY et al., BMC bioinformatics, 2005 as -\eqn{Z = ((Sm – mu)*m^(1/2)) / delta}. -For each gene in each spot, mu is the fold change values versus the mean -expression and delta is the standard deviation. Sm is the mean fold change -value of a specific marker gene set and m is the size of a given marker -gene set. -} -\examples{ -g <- GiottoData::loadGiottoMini("visium") -sign_gene <- c( - "Bcl11b", "Lmo1", "F3", "Cnih3", "Ppp1r3c", "Rims2", "Gfap", - "Gjc3", "Chrna4", "Prkcd", "Prr18", "Grb14", "Tprn", "Clic1", "Olig2", - "Hrh3", "Tmbim1", "Carhsp1", "Tmem88b", "Ugt8a", "Arpp19", "Lamp5", - "Galnt6", "Hlf", "Hs3st2", "Tbr1", "Myl4", "Cygb", "Ttc9b", "Ipcef1" -) - -sign_matrix <- matrix(rnorm(length(sign_gene) * 3, mean = 10), - nrow = length(sign_gene) -) -rownames(sign_matrix) <- sign_gene -colnames(sign_matrix) <- c("cell_type1", "cell_type2", "cell_type3") - -runPAGEEnrich(gobject = g, sign_matrix = sign_matrix) -} -\seealso{ -\code{\link{makeSignMatrixPAGE}} -} diff --git a/man/runPAGEEnrich_OLD.Rd b/man/runPAGEEnrich_OLD.Rd deleted file mode 100644 index e6e826493..000000000 --- a/man/runPAGEEnrich_OLD.Rd +++ /dev/null @@ -1,64 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/spatial_enrichment.R -\name{runPAGEEnrich_OLD} -\alias{runPAGEEnrich_OLD} -\title{runPAGEEnrich_OLD} -\usage{ -runPAGEEnrich_OLD( - gobject, - sign_matrix, - expression_values = c("normalized", "scaled", "custom"), - reverse_log_scale = TRUE, - logbase = 2, - output_enrichment = c("original", "zscore"), - p_value = FALSE, - n_times = 1000, - name = NULL, - return_gobject = TRUE -) -} -\arguments{ -\item{gobject}{Giotto object} - -\item{sign_matrix}{Matrix of signature genes for each cell type / process} - -\item{expression_values}{expression values to use} - -\item{reverse_log_scale}{reverse expression values from log scale} - -\item{logbase}{log base to use if reverse_log_scale = TRUE} - -\item{output_enrichment}{how to return enrichment output} - -\item{p_value}{calculate p-values (boolean, default = FALSE)} - -\item{n_times}{number of permutations to calculate for p_value} - -\item{name}{to give to spatial enrichment results, default = PAGE} - -\item{return_gobject}{return giotto object} -} -\value{ -data.table with enrichment results -} -\description{ -Function to calculate gene signature enrichment scores per -spatial position using PAGE. -} -\details{ -sign_matrix: a binary matrix with genes as row names and cell-types as -column names. -Alternatively a list of signature genes can be provided to -makeSignMatrixPAGE, which will create the matrix for you. \cr - -The enrichment Z score is calculated by using method (PAGE) from -Kim SY et al., BMC bioinformatics, 2005 as -\eqn{Z = ((Sm – mu)*m^(1/2)) / delta}. -For each gene in each spot, mu is the fold change values versus the mean -expression and delta is the standard deviation. Sm is the mean fold change -value of a specific marker gene set and m is the size of a given marker -gene set. -} -\seealso{ -\code{\link{makeSignMatrixPAGE}} -} diff --git a/man/runPCA.Rd b/man/runPCA.Rd index 2e3ca25e1..6b4264659 100644 --- a/man/runPCA.Rd +++ b/man/runPCA.Rd @@ -22,6 +22,7 @@ runPCA( set_seed = TRUE, seed_number = 1234, verbose = TRUE, + toplevel = 1L, ... ) } @@ -60,6 +61,8 @@ runPCA( \item{verbose}{verbosity of the function} +\item{toplevel}{relative stackframe where call was made} + \item{...}{additional parameters for PCA (see details)} } \value{ diff --git a/man/runPCAprojection.Rd b/man/runPCAprojection.Rd index 1492e60ad..73c105224 100644 --- a/man/runPCAprojection.Rd +++ b/man/runPCAprojection.Rd @@ -23,6 +23,7 @@ runPCAprojection( set_seed = TRUE, seed_number = 1234, verbose = TRUE, + toplevel = 1L, ... ) } @@ -63,6 +64,8 @@ runPCAprojection( \item{verbose}{verbosity of the function} +\item{toplevel}{relative stackframe where call was made} + \item{...}{additional parameters for PCA (see details)} } \value{ diff --git a/man/runPCAprojectionBatch.Rd b/man/runPCAprojectionBatch.Rd index 518ff46c0..77d526ec3 100644 --- a/man/runPCAprojectionBatch.Rd +++ b/man/runPCAprojectionBatch.Rd @@ -24,6 +24,7 @@ runPCAprojectionBatch( set_seed = TRUE, seed_number = 1234, verbose = TRUE, + toplevel = 1L, ... ) } @@ -66,6 +67,8 @@ runPCAprojectionBatch( \item{verbose}{verbosity of the function} +\item{toplevel}{relative stackframe where call was made} + \item{...}{additional parameters for PCA (see details)} } \value{ diff --git a/man/runPatternSimulation.Rd b/man/runPatternSimulation.Rd index c06ba28da..cdcd2d6ca 100644 --- a/man/runPatternSimulation.Rd +++ b/man/runPatternSimulation.Rd @@ -91,6 +91,7 @@ runPatternSimulation( "AAAGGGATGTAGCAAG-1", "TCAAACAACCGCGTCG-1", "ACGATCATACATAGAG-1", "TATGCTCCCTACTTAC-1" ), - spatial_network_name = "spatial_network", gene_names = c("Gna12", "Ccnd2") + spatial_network_name = "spatial_network", + gene_names = c("Gna12", "Ccnd2") ) } diff --git a/man/runUMAP.Rd b/man/runUMAP.Rd index 067cb2054..e07468609 100644 --- a/man/runUMAP.Rd +++ b/man/runUMAP.Rd @@ -25,7 +25,8 @@ runUMAP( set_seed = TRUE, seed_number = 1234L, verbose = TRUE, - toplevel_params = 2L, + toplevel_params = deprecated(), + toplevel = 1L, ... ) } @@ -70,7 +71,9 @@ runUMAP( \item{verbose}{verbosity of function} -\item{toplevel_params}{parameters to extract} +\item{toplevel_params}{deprecated} + +\item{toplevel}{relative stackframe where call was made from} \item{...}{ Arguments passed on to \code{\link[uwot:umap]{uwot::umap}} diff --git a/man/runUMAPprojection.Rd b/man/runUMAPprojection.Rd index 4bc1e2abd..720d69a81 100644 --- a/man/runUMAPprojection.Rd +++ b/man/runUMAPprojection.Rd @@ -26,7 +26,8 @@ runUMAPprojection( set_seed = TRUE, seed_number = 1234, verbose = TRUE, - toplevel_params = 2, + toplevel_params = deprecated(), + toplevel = 1L, ... ) } @@ -73,7 +74,9 @@ runUMAPprojection( \item{verbose}{verbosity of function} -\item{toplevel_params}{parameters to extract} +\item{toplevel_params}{deprecated} + +\item{toplevel}{relative stackframe where call was made from} \item{...}{additional UMAP parameters} } @@ -87,11 +90,15 @@ run UMAP on subset and project on the rest See \code{\link[uwot]{umap}} for more information about these and other parameters. \itemize{ - \item Input for UMAP dimension reduction can be another dimension reduction (default = 'pca') + \item Input for UMAP dimension reduction can be another dimension + reduction (default = 'pca') \item To use gene expression as input set dim_reduction_to_use = NULL - \item If dim_reduction_to_use = NULL, feats_to_use can be used to select a column name of - highly variable genes (see \code{\link{calculateHVF}}) or simply provide a vector of genes - \item multiple UMAP results can be stored by changing the \emph{name} of the analysis + \item If dim_reduction_to_use = NULL, feats_to_use can be used to select a + column name of + highly variable genes (see \code{\link{calculateHVF}}) or simply provide a + vector of genes + \item multiple UMAP results can be stored by changing the \emph{name} of + the analysis } } \examples{ diff --git a/man/runWNN.Rd b/man/runWNN.Rd index f58829f3f..2b704413c 100644 --- a/man/runWNN.Rd +++ b/man/runWNN.Rd @@ -7,47 +7,39 @@ runWNN( gobject, spat_unit = "cell", - modality_1 = "rna", - modality_2 = "protein", - pca_name_modality_1 = "rna.pca", - pca_name_modality_2 = "protein.pca", + feat_types = c("rna", "protein"), + reduction_methods = c("pca", "pca"), + reduction_names = c("rna.pca", "protein.pca"), k = 20, integrated_feat_type = NULL, matrix_result_name = NULL, - w_name_modality_1 = NULL, - w_name_modality_2 = NULL, + w_names = c(NULL, NULL), verbose = FALSE ) } \arguments{ -\item{gobject}{A Giotto object with individual PCA modalities pre-calculated} +\item{gobject}{A Giotto object with individual PCA feat_types pre-calculated} \item{spat_unit}{spatial unit} -\item{modality_1}{modality 1 name. Default = "rna"} +\item{feat_types}{feature types to integrate. Default = c("rna", "protein")} -\item{modality_2}{modality 2 name. Default = "protein"} +\item{reduction_methods}{reduction methods for each feature type. Default = c("pca", "pca")} -\item{pca_name_modality_1}{Default = 'rna.pca'} +\item{reduction_names}{names of the reduction methods to use. Default = c("rna.pca", "protein.pca")} -\item{pca_name_modality_2}{Default = 'protein.pca'} - -\item{k}{k number, default = 20} +\item{k}{number of neighbors to calculate cell distances, default = 20} \item{integrated_feat_type}{integrated feature type (e.g. 'rna_protein')} \item{matrix_result_name}{Default = 'theta_weighted_matrix'} -\item{w_name_modality_1}{name for modality 1 weights} - -\item{w_name_modality_2}{name for modality 2 weights} +\item{w_names}{optional. Names for the weighted matrices. If NULL, automatic names composed by w_feat_type will be created.} \item{verbose}{be verbose} } \value{ -A Giotto object with integrated UMAP (integrated.umap) within the -dimension_reduction slot and Leiden clusters (integrated_leiden_clus) in the -cellular metadata. +A Giotto object with a new multiomics slot containing the theta_weighted_matrix and individual weight matrices. } \description{ Multi omics integration with WNN diff --git a/man/runtSNE.Rd b/man/runtSNE.Rd index ff8cddfee..5c8ee3839 100644 --- a/man/runtSNE.Rd +++ b/man/runtSNE.Rd @@ -23,6 +23,7 @@ runtSNE( set_seed = TRUE, seed_number = 1234, verbose = TRUE, + toplevel = 1L, ... ) } @@ -63,10 +64,12 @@ runtSNE( \item{verbose}{verbosity of the function} +\item{toplevel}{relative stackframe where call was made from} + \item{...}{additional tSNE parameters} } \value{ -giotto object with updated tSNE dimension recuction +giotto object with updated tSNE dimension reduction } \description{ run tSNE @@ -75,11 +78,14 @@ run tSNE See \code{\link[Rtsne]{Rtsne}} for more information about these and other parameters. \cr \itemize{ - \item Input for tSNE dimension reduction can be another dimension reduction (default = 'pca') + \item Input for tSNE dimension reduction can be another dimension + reduction (default = 'pca') \item To use gene expression as input set dim_reduction_to_use = NULL - \item If dim_reduction_to_use = NULL, feats_to_use can be used to select a column name of - highly variable genes (see \code{\link{calculateHVF}}) or simply provide a vector of genes - \item multiple tSNE results can be stored by changing the \emph{name} of the analysis + \item If dim_reduction_to_use = NULL, feats_to_use can be used to select + a column name of highly variable genes + (see \code{\link{calculateHVF}}) or simply provide a vector of genes + \item multiple tSNE results can be stored by changing the \emph{name} of + the analysis } } \examples{ diff --git a/man/screePlot.Rd b/man/screePlot.Rd index 2f65fb4f9..04bf2f287 100644 --- a/man/screePlot.Rd +++ b/man/screePlot.Rd @@ -8,7 +8,8 @@ screePlot( gobject, spat_unit = NULL, feat_type = NULL, - name = NULL, + dim_reduction_name = NULL, + name = deprecated(), expression_values = c("normalized", "scaled", "custom"), reduction = c("cells", "feats"), method = c("irlba", "exact", "random", "factominer"), @@ -34,7 +35,9 @@ screePlot( \item{feat_type}{feature type (e.g. "rna", "dna", "protein")} -\item{name}{name of PCA object if available} +\item{dim_reduction_name}{name of PCA} + +\item{name}{deprecated} \item{expression_values}{expression values to use} @@ -62,9 +65,11 @@ screePlot( \item{save_plot}{logical. save the plot} -\item{save_param}{list of saving parameters, see \code{\link{showSaveParameters}}} +\item{save_param}{list of saving parameters, see +\code{\link{showSaveParameters}}} -\item{default_save_name}{default save name for saving, don't change, change save_name in save_param} +\item{default_save_name}{default save name for saving, don't change, +change save_name in save_param} \item{...}{additional arguments to pca function, see \code{\link{runPCA}}} } diff --git a/man/signPCA.Rd b/man/signPCA.Rd index 0df0b6379..93e38cc4a 100644 --- a/man/signPCA.Rd +++ b/man/signPCA.Rd @@ -73,9 +73,11 @@ signPCA( \item{save_plot}{logical. save the plot} -\item{save_param}{list of saving parameters, see \code{\link{showSaveParameters}}} +\item{save_param}{list of saving parameters, see +\code{\link{showSaveParameters}}} -\item{default_save_name}{default save name for saving, don't change, change save_name in save_param} +\item{default_save_name}{default save name for saving, don't change, +change save_name in save_param} } \value{ ggplot object for scree method and maxtrix of p-values for jackstraw diff --git a/man/spatCellCellcomSpots.Rd b/man/spatCellCellcomSpots.Rd index ac6e46389..dee251301 100644 --- a/man/spatCellCellcomSpots.Rd +++ b/man/spatCellCellcomSpots.Rd @@ -10,6 +10,7 @@ spatCellCellcomSpots( feat_type = NULL, ave_celltype_exp, spatial_network_name = "Delaunay_network", + spat_enr_name = "DWLS", cluster_column = "cell_ID", random_iter = 1000, feature_set_1, @@ -39,6 +40,9 @@ spatCellCellcomSpots( \item{spatial_network_name}{spatial network to use for identifying interacting cells} +\item{spat_enr_name}{name of spatial enrichment containing DWLS results. +Default = \code{"DWLS"}} + \item{cluster_column}{cluster column with cell type information} \item{random_iter}{number of iterations} @@ -84,31 +88,33 @@ Statistical framework to identify if pairs of features expected based on a reshuffled null distribution of feature expression values in cells that are spatially in proximity to each other. \itemize{ - * LR_comb:Pair of ligand and receptor - * lig_cell_type: cell type to assess expression level of ligand - * lig_expr: average expression residual(observed - DWLS_predicted) of - ligand in lig_cell_type - * ligand: ligand name - * rec_cell_type: cell type to assess expression level of receptor - * rec_expr: average expression residual(observed - DWLS_predicted) of - receptor in rec_cell_type - * receptor: receptor name - * LR_expr: combined average ligand and receptor expression residual - * lig_nr: total number of cells from lig_cell_type that spatially interact - with cells from rec_cell_type - * rec_nr: total number of cells from rec_cell_type that spatially interact - with cells from lig_cell_type - * rand_expr: average combined ligand and receptor expression residual from - random spatial permutations - * av_diff: average difference between LR_expr and rand_expr over all random - spatial permutations - * sd_diff: (optional) standard deviation of the difference between LR_expr - and rand_expr over all random spatial permutations - * z_score: (optional) z-score - * log2fc: LR_expr - rand_expr - * pvalue: p-value - * LR_cell_comb: cell type pair combination - * p.adj: adjusted p-value - * PI: significanc score: log2fc \* -log10(p.adj) +\itemize{ +\item LR_comb:Pair of ligand and receptor +\item lig_cell_type: cell type to assess expression level of ligand +\item lig_expr: average expression residual(observed - DWLS_predicted) of +ligand in lig_cell_type +\item ligand: ligand name +\item rec_cell_type: cell type to assess expression level of receptor +\item rec_expr: average expression residual(observed - DWLS_predicted) of +receptor in rec_cell_type +\item receptor: receptor name +\item LR_expr: combined average ligand and receptor expression residual +\item lig_nr: total number of cells from lig_cell_type that spatially interact +with cells from rec_cell_type +\item rec_nr: total number of cells from rec_cell_type that spatially interact +with cells from lig_cell_type +\item rand_expr: average combined ligand and receptor expression residual from +random spatial permutations +\item av_diff: average difference between LR_expr and rand_expr over all random +spatial permutations +\item sd_diff: (optional) standard deviation of the difference between LR_expr +and rand_expr over all random spatial permutations +\item z_score: (optional) z-score +\item log2fc: LR_expr - rand_expr +\item pvalue: p-value +\item LR_cell_comb: cell type pair combination +\item p.adj: adjusted p-value +\item PI: significanc score: log2fc \* -log10(p.adj) +} } } diff --git a/man/spatialSplitCluster.Rd b/man/spatialSplitCluster.Rd index f6d503a68..52582a0ea 100644 --- a/man/spatialSplitCluster.Rd +++ b/man/spatialSplitCluster.Rd @@ -11,7 +11,6 @@ spatialSplitCluster( spatial_network_name = "Delaunay_network", cluster_col, split_clus_name = paste0(cluster_col, "_split"), - include_all_ids = TRUE, missing_id_name = "not_connected", return_gobject = TRUE ) @@ -30,13 +29,10 @@ clustering} \item{split_clus_name}{character. Name to assign the split cluster results} -\item{include_all_ids}{Boolean. Include all ids, including vertex ids not found -in the spatial network} +\item{return_gobject}{logical. Return giotto object} -\item{missing_id_name}{Character. Name for vertices that were missing from -spatial network} - -\item{return_gobject}{Boolean. Return giotto object} +\item{include_all_ids}{logical. Include all ids, including vertex ids not +found in the spatial network} } \value{ giotto object with cluster annotations @@ -45,7 +41,6 @@ giotto object with cluster annotations Split cluster annotations based on a spatial network } \examples{ -library(Giotto) g <- GiottoData::loadGiottoMini("vizgen") activeSpatUnit(g) <- "aggregate" spatPlot2D(g, cell_color = "leiden_clus") diff --git a/man/writeChatGPTqueryDEG.Rd b/man/writeChatGPTqueryDEG.Rd index 8e1920852..d51cb7230 100644 --- a/man/writeChatGPTqueryDEG.Rd +++ b/man/writeChatGPTqueryDEG.Rd @@ -13,7 +13,8 @@ writeChatGPTqueryDEG( ) } \arguments{ -\item{DEG_output}{the output format from the differenetial expression functions} +\item{DEG_output}{the output format from the differential expression +functions} \item{top_n_genes}{number of genes for each cluster} @@ -27,11 +28,12 @@ writeChatGPTqueryDEG( writes a .txt file to the desired location } \description{ -This function writes a query as a .txt file that can be used with -ChatGPT or a similar LLM service to find the most likely cell types based on the -top differential expressed genes (DEGs) between identified clusters. +This function writes a query as a .txt file that can be used +with ChatGPT or a similar LLM service to find the most likely cell types +based on the top differential expressed genes (DEGs) between identified +clusters. } \details{ -This function does not run any LLM service. It simply creates the .txt -file that can then be used any LLM service (e.g. OpenAI, Gemini, ...) +This function does not run any LLM service. It simply creates the +.txt file that can then be used any LLM service (e.g. OpenAI, Gemini, ...) } diff --git a/man/write_giotto_viewer_dim_reduction.Rd b/man/write_giotto_viewer_dim_reduction.Rd index e55394539..f42775b91 100644 --- a/man/write_giotto_viewer_dim_reduction.Rd +++ b/man/write_giotto_viewer_dim_reduction.Rd @@ -30,6 +30,7 @@ write_giotto_viewer_dim_reduction( write a .txt and .annot file for the selection annotation } \description{ -write out dimensional reduction data from a giotto object for the Viewer +write out dimensional reduction data from a giotto object for +the Viewer } \keyword{internal} diff --git a/man/write_giotto_viewer_numeric_annotation.Rd b/man/write_giotto_viewer_numeric_annotation.Rd index ec3f05d44..e3101f756 100644 --- a/man/write_giotto_viewer_numeric_annotation.Rd +++ b/man/write_giotto_viewer_numeric_annotation.Rd @@ -21,6 +21,7 @@ write_giotto_viewer_numeric_annotation( write a .txt and .annot file for the selection annotation } \description{ -write out numeric annotation data from a giotto object for the Viewer +write out numeric annotation data from a giotto object for the +Viewer } \keyword{internal} diff --git a/tests/testthat/test-dbMatrix_filterGiotto.R b/tests/testthat/test-dbMatrix_filterGiotto.R index 29979f555..441d1775f 100644 --- a/tests/testthat/test-dbMatrix_filterGiotto.R +++ b/tests/testthat/test-dbMatrix_filterGiotto.R @@ -3,43 +3,51 @@ rlang::local_options(lifecycle_verbosity = "quiet") # ---------------------------------------------------------------------------- # # Setup data -visium = GiottoData::loadGiottoMini(dataset = "visium") -dgc = getExpression(visium, output = "matrix") +visium <- GiottoData::loadGiottoMini(dataset = "visium") +dgc <- getExpression(visium, output = "matrix") -con = DBI::dbConnect(duckdb::duckdb(), ":memory:") +con <- DBI::dbConnect(duckdb::duckdb(), ":memory:") -dbsm = dbMatrix::dbMatrix(value = dgc, - con = con, - name = 'dgc', - class = "dbSparseMatrix", - overwrite = TRUE) +dbsm <- dbMatrix::dbMatrix( + value = dgc, + con = con, + name = "dgc", + class = "dbSparseMatrix", + overwrite = TRUE +) # Create exprObj with dbsm -expObj_db = createExprObj(expression_data = dbsm, - expression_matrix_class = 'dbSparseMatrix', - name = 'raw') +expObj_db <- createExprObj( + expression_data = dbsm, + expression_matrix_class = "dbSparseMatrix", + name = "raw" +) # Create giotto object -gobject_db = suppressWarnings(createGiottoObject(expression = expObj_db)) +gobject_db <- suppressWarnings(createGiottoObject(expression = expObj_db)) # ---------------------------------------------------------------------------- # -# Perform filtering -visium_filtered = filterGiotto(visium, spat_unit = "cell", - feat_type = "rna", - expression_values = "raw") - -gobject_db_filtered = filterGiotto(gobject_db, spat_unit = "cell", - feat_type = "rna", - expression_values = "raw") +# Perform filtering +visium_filtered <- filterGiotto(visium, + spat_unit = "cell", + feat_type = "rna", + expression_values = "raw" +) + +gobject_db_filtered <- filterGiotto(gobject_db, + spat_unit = "cell", + feat_type = "rna", + expression_values = "raw" +) # Get filtered matrix -dgc_visium = getExpression(visium_filtered, output = "matrix") -mat_db = getExpression(gobject_db_filtered, output = "matrix") -dgc_db = dbMatrix:::as_matrix(mat_db) +dgc_visium <- getExpression(visium_filtered, output = "matrix") +mat_db <- getExpression(gobject_db_filtered, output = "matrix") +dgc_db <- dbMatrix:::as_matrix(mat_db) # ---------------------------------------------------------------------------- # # Test filterGiotto() equivalence between dbMatrix and dgCMatrix test_that("dbMatrix equivalent to dgCMatrix after filterGiotto()", { - expect_equal(dgc_visium, dgc_db) + expect_equal(dgc_visium, dgc_db) }) diff --git a/tests/testthat/test-dbMatrix_libNorm.R b/tests/testthat/test-dbMatrix_libNorm.R index be575f17b..3f9bdf410 100644 --- a/tests/testthat/test-dbMatrix_libNorm.R +++ b/tests/testthat/test-dbMatrix_libNorm.R @@ -3,62 +3,74 @@ rlang::local_options(lifecycle_verbosity = "quiet") # ---------------------------------------------------------------------------- # # Setup data -visium = GiottoData::loadGiottoMini(dataset = "visium") -dgc = getExpression(visium, output = "matrix") +visium <- GiottoData::loadGiottoMini(dataset = "visium") +dgc <- getExpression(visium, output = "matrix") -con = DBI::dbConnect(duckdb::duckdb(), ":memory:") +con <- DBI::dbConnect(duckdb::duckdb(), ":memory:") -dbsm = dbMatrix::dbMatrix(value = dgc, - con = con, - name = 'dgc', - class = "dbSparseMatrix", - overwrite = TRUE) +dbsm <- dbMatrix::dbMatrix( + value = dgc, + con = con, + name = "dgc", + class = "dbSparseMatrix", + overwrite = TRUE +) # Create exprObj with dbsm -expObj_db = createExprObj(expression_data = dbsm, - expression_matrix_class = 'dbSparseMatrix', - name = 'raw') +expObj_db <- createExprObj( + expression_data = dbsm, + expression_matrix_class = "dbSparseMatrix", + name = "raw" +) # Create giotto object -gobject_db = suppressWarnings(createGiottoObject(expression = expObj_db)) +gobject_db <- suppressWarnings(createGiottoObject(expression = expObj_db)) # ---------------------------------------------------------------------------- # -# Perform filtering -visium_filtered = filterGiotto(visium, spat_unit = "cell", - feat_type = "rna", - expression_values = "raw") +# Perform filtering +visium_filtered <- filterGiotto(visium, + spat_unit = "cell", + feat_type = "rna", + expression_values = "raw" +) -gobject_db_filtered = filterGiotto(gobject_db, spat_unit = "cell", - feat_type = "rna", - expression_values = "raw") +gobject_db_filtered <- filterGiotto(gobject_db, + spat_unit = "cell", + feat_type = "rna", + expression_values = "raw" +) # ---------------------------------------------------------------------------- # # Perform library normalization and scaling -visium_filtered = normalizeGiotto(gobject = visium_filtered, - spat_unit = 'cell', - feat_type = 'rna', - expression_values = 'raw', - library_size_norm = TRUE, - log_norm = FALSE, - scale_feats = FALSE, - scale_cells = FALSE) +visium_filtered <- normalizeGiotto( + gobject = visium_filtered, + spat_unit = "cell", + feat_type = "rna", + expression_values = "raw", + library_size_norm = TRUE, + log_norm = FALSE, + scale_feats = FALSE, + scale_cells = FALSE +) -gobject_db_filtered = normalizeGiotto(gobject = gobject_db_filtered, - spat_unit = 'cell', - feat_type = 'rna', - expression_values = 'raw', - library_size_norm = TRUE, - log_norm = FALSE, - scale_feats = FALSE, - scale_cells = FALSE) +gobject_db_filtered <- normalizeGiotto( + gobject = gobject_db_filtered, + spat_unit = "cell", + feat_type = "rna", + expression_values = "raw", + library_size_norm = TRUE, + log_norm = FALSE, + scale_feats = FALSE, + scale_cells = FALSE +) # Get normalized matrix -dgc_visium = getExpression(visium_filtered, output = "matrix", values = "normalized") -mat_db = getExpression(gobject_db_filtered, output = "matrix", values = "normalized") -dgc_db = dbMatrix:::as_matrix(mat_db) +dgc_visium <- getExpression(visium_filtered, output = "matrix", values = "normalized") +mat_db <- getExpression(gobject_db_filtered, output = "matrix", values = "normalized") +dgc_db <- dbMatrix:::as_matrix(mat_db) # ---------------------------------------------------------------------------- # # Test normalizeGiotto() equivalence between dbMatrix and dgCMatrix test_that("dbMatrix equivalent to dgCMatrix after normalizeGiotto(library_size_norm = TRUE)", { - expect_equal(dgc_visium, dgc_db) -}) \ No newline at end of file + expect_equal(dgc_visium, dgc_db) +}) diff --git a/tests/testthat/test-dbMatrix_logNorm.R b/tests/testthat/test-dbMatrix_logNorm.R index 1731e634f..5ec3ee09e 100644 --- a/tests/testthat/test-dbMatrix_logNorm.R +++ b/tests/testthat/test-dbMatrix_logNorm.R @@ -3,62 +3,74 @@ rlang::local_options(lifecycle_verbosity = "quiet") # ---------------------------------------------------------------------------- # # Setup data -visium = GiottoData::loadGiottoMini(dataset = "visium") -dgc = getExpression(visium, output = "matrix") +visium <- GiottoData::loadGiottoMini(dataset = "visium") +dgc <- getExpression(visium, output = "matrix") -con = DBI::dbConnect(duckdb::duckdb(), ":memory:") +con <- DBI::dbConnect(duckdb::duckdb(), ":memory:") -dbsm = dbMatrix::dbMatrix(value = dgc, - con = con, - name = 'dgc', - class = "dbSparseMatrix", - overwrite = TRUE) +dbsm <- dbMatrix::dbMatrix( + value = dgc, + con = con, + name = "dgc", + class = "dbSparseMatrix", + overwrite = TRUE +) # Create exprObj with dbsm -expObj_db = createExprObj(expression_data = dbsm, - expression_matrix_class = 'dbSparseMatrix', - name = 'raw') +expObj_db <- createExprObj( + expression_data = dbsm, + expression_matrix_class = "dbSparseMatrix", + name = "raw" +) # Create giotto object -gobject_db = suppressWarnings(createGiottoObject(expression = expObj_db)) +gobject_db <- suppressWarnings(createGiottoObject(expression = expObj_db)) # ---------------------------------------------------------------------------- # -# Perform filtering -visium_filtered = filterGiotto(visium, spat_unit = "cell", - feat_type = "rna", - expression_values = "raw") +# Perform filtering +visium_filtered <- filterGiotto(visium, + spat_unit = "cell", + feat_type = "rna", + expression_values = "raw" +) -gobject_db_filtered = filterGiotto(gobject_db, spat_unit = "cell", - feat_type = "rna", - expression_values = "raw") +gobject_db_filtered <- filterGiotto(gobject_db, + spat_unit = "cell", + feat_type = "rna", + expression_values = "raw" +) # ---------------------------------------------------------------------------- # # Perform library normalization and scaling -visium_filtered = normalizeGiotto(gobject = visium_filtered, - spat_unit = 'cell', - feat_type = 'rna', - expression_values = 'raw', - library_size_norm = FALSE, - log_norm = TRUE, - scale_feats = FALSE, - scale_cells = FALSE) +visium_filtered <- normalizeGiotto( + gobject = visium_filtered, + spat_unit = "cell", + feat_type = "rna", + expression_values = "raw", + library_size_norm = FALSE, + log_norm = TRUE, + scale_feats = FALSE, + scale_cells = FALSE +) -gobject_db_filtered = normalizeGiotto(gobject = gobject_db_filtered, - spat_unit = 'cell', - feat_type = 'rna', - expression_values = 'raw', - library_size_norm = FALSE, - log_norm = TRUE, - scale_feats = FALSE, - scale_cells = FALSE) +gobject_db_filtered <- normalizeGiotto( + gobject = gobject_db_filtered, + spat_unit = "cell", + feat_type = "rna", + expression_values = "raw", + library_size_norm = FALSE, + log_norm = TRUE, + scale_feats = FALSE, + scale_cells = FALSE +) # Get normalized matrix -dgc_visium = getExpression(visium_filtered, output = "matrix", values = "normalized") -mat_db = getExpression(gobject_db_filtered, output = "matrix", values = "normalized") -dgc_db = dbMatrix:::as_matrix(mat_db) +dgc_visium <- getExpression(visium_filtered, output = "matrix", values = "normalized") +mat_db <- getExpression(gobject_db_filtered, output = "matrix", values = "normalized") +dgc_db <- dbMatrix:::as_matrix(mat_db) # ---------------------------------------------------------------------------- # # Test normalizeGiotto() equivalence between dbMatrix and dgCMatrix test_that("dbMatrix equivalent to dgCMatrix after normalizeGiotto(log_norm=TRUE)", { - expect_equal(dgc_visium, dgc_db) -}) \ No newline at end of file + expect_equal(dgc_visium, dgc_db) +}) diff --git a/tests/testthat/test-dbMatrix_scale.R b/tests/testthat/test-dbMatrix_scale.R index b28504d35..907569424 100644 --- a/tests/testthat/test-dbMatrix_scale.R +++ b/tests/testthat/test-dbMatrix_scale.R @@ -3,62 +3,74 @@ rlang::local_options(lifecycle_verbosity = "quiet") # ---------------------------------------------------------------------------- # # Setup data -visium = GiottoData::loadGiottoMini(dataset = "visium") -dgc = getExpression(visium, output = "matrix") +visium <- GiottoData::loadGiottoMini(dataset = "visium") +dgc <- getExpression(visium, output = "matrix") -con = DBI::dbConnect(duckdb::duckdb(), ":memory:") +con <- DBI::dbConnect(duckdb::duckdb(), ":memory:") -dbsm = dbMatrix::dbMatrix(value = dgc, - con = con, - name = 'dgc', - class = "dbSparseMatrix", - overwrite = TRUE) +dbsm <- dbMatrix::dbMatrix( + value = dgc, + con = con, + name = "dgc", + class = "dbSparseMatrix", + overwrite = TRUE +) # Create exprObj with dbsm -expObj_db = createExprObj(expression_data = dbsm, - expression_matrix_class = 'dbSparseMatrix', - name = 'raw') +expObj_db <- createExprObj( + expression_data = dbsm, + expression_matrix_class = "dbSparseMatrix", + name = "raw" +) # Create giotto object -gobject_db = suppressWarnings(createGiottoObject(expression = expObj_db)) +gobject_db <- suppressWarnings(createGiottoObject(expression = expObj_db)) # ---------------------------------------------------------------------------- # -# Perform filtering -visium_filtered = filterGiotto(visium, spat_unit = "cell", - feat_type = "rna", - expression_values = "raw") +# Perform filtering +visium_filtered <- filterGiotto(visium, + spat_unit = "cell", + feat_type = "rna", + expression_values = "raw" +) -gobject_db_filtered = filterGiotto(gobject_db, spat_unit = "cell", - feat_type = "rna", - expression_values = "raw") +gobject_db_filtered <- filterGiotto(gobject_db, + spat_unit = "cell", + feat_type = "rna", + expression_values = "raw" +) # ---------------------------------------------------------------------------- # # Perform library normalization and scaling -visium_filtered = normalizeGiotto(gobject = visium_filtered, - spat_unit = 'cell', - feat_type = 'rna', - expression_values = 'raw', - library_size_norm = FALSE, - log_norm = FALSE, - scale_feats = TRUE, - scale_cells = TRUE) +visium_filtered <- normalizeGiotto( + gobject = visium_filtered, + spat_unit = "cell", + feat_type = "rna", + expression_values = "raw", + library_size_norm = FALSE, + log_norm = FALSE, + scale_feats = TRUE, + scale_cells = TRUE +) -gobject_db_filtered = normalizeGiotto(gobject = gobject_db_filtered, - spat_unit = 'cell', - feat_type = 'rna', - expression_values = 'raw', - library_size_norm = FALSE, - log_norm = FALSE, - scale_feats = TRUE, - scale_cells = TRUE) +gobject_db_filtered <- normalizeGiotto( + gobject = gobject_db_filtered, + spat_unit = "cell", + feat_type = "rna", + expression_values = "raw", + library_size_norm = FALSE, + log_norm = FALSE, + scale_feats = TRUE, + scale_cells = TRUE +) # Get normalized matrix -dgc_visium = getExpression(visium_filtered, output = "matrix", values = "scaled") |> as.matrix() -mat_db = getExpression(gobject_db_filtered, output = "matrix", values = "scaled") -dgc_db = dbMatrix:::as_matrix(mat_db) +dgc_visium <- getExpression(visium_filtered, output = "matrix", values = "scaled") |> as.matrix() +mat_db <- getExpression(gobject_db_filtered, output = "matrix", values = "scaled") +dgc_db <- dbMatrix:::as_matrix(mat_db) # ---------------------------------------------------------------------------- # # Test normalizeGiotto() equivalence between dbMatrix and dgCMatrix test_that("dbMatrix equivalent to dgCMatrix after normalizeGiotto(scale_feats=T,scale=cells=T)", { - expect_equal(dgc_visium, dgc_db) -}) \ No newline at end of file + expect_equal(dgc_visium, dgc_db) +}) diff --git a/tests/testthat/test_visium.R b/tests/testthat/test_visium.R index 1e3fd6f14..c36811741 100644 --- a/tests/testthat/test_visium.R +++ b/tests/testthat/test_visium.R @@ -19,7 +19,7 @@ lapply( function(url) { myfilename <- basename(url) mydestfile <- file.path(datadir, myfilename) - utils::download.file(url = url, destfile = mydestfile, quiet = TRUE) + download.file(url = url, destfile = mydestfile, quiet = TRUE) } ) @@ -32,7 +32,7 @@ lapply( "raw_feature_bc_matrix.tar.gz", "spatial.tar.gz" )], - utils::untar, + untar, exdir = datadir ) diff --git a/vignettes/dbMatrix.Rmd b/vignettes/dbMatrix.Rmd index 588737d97..3950a4e93 100644 --- a/vignettes/dbMatrix.Rmd +++ b/vignettes/dbMatrix.Rmd @@ -9,8 +9,8 @@ vignette: > ```{r, include = FALSE} knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>" + collapse = TRUE, + comment = "#>" ) ``` @@ -21,23 +21,23 @@ This vignette demonstrates how to use a [`dbMatrix`](https://github.com/drieslab ```{r, eval=FALSE} # Ensure Giotto Suite is installed. -if(!"Giotto" %in% installed.packages()) { - devtools::install_github("drieslab/Giotto@suite") +if (!"Giotto" %in% installed.packages()) { + devtools::install_github("drieslab/Giotto@suite") } # Ensure GiottoData, a small, helper module for tutorials, is installed. -if(!"GiottoData" %in% installed.packages()) { - devtools::install_github("drieslab/GiottoData") +if (!"GiottoData" %in% installed.packages()) { + devtools::install_github("drieslab/GiottoData") } library(Giotto) library(GiottoData) # Ensure the Python environment for Giotto has been installed. -genv_exists = checkGiottoEnvironment() -if(!genv_exists){ - # The following command need only be run once to install the Giotto environment. - installGiottoEnvironment() +genv_exists <- checkGiottoEnvironment() +if (!genv_exists) { + # The following command need only be run once to install the Giotto environment. + installGiottoEnvironment() } ``` @@ -45,49 +45,57 @@ if(!genv_exists){ ```{r} # Get test dataset from Giotto Data package -visium = GiottoData::loadGiottoMini(dataset = "visium") +visium <- GiottoData::loadGiottoMini(dataset = "visium") # Extract the cell expression matrix as a test dataset -dgc = getExpression(visium, output = "matrix") +dgc <- getExpression(visium, output = "matrix") # Create a DBI connection object -con = DBI::dbConnect(duckb::duckdb(), ":memory:") +con <- DBI::dbConnect(duckb::duckdb(), ":memory:") # Create a dbSparseMatrix using the dbMatrix constructor function -dbsm = dbMatrix::dbMatrix(value = dgc, - con = con, - name = 'dgc', - class = "dbSparseMatrix", - overwrite = TRUE) +dbsm <- dbMatrix::dbMatrix( + value = dgc, + con = con, + name = "dgc", + class = "dbSparseMatrix", + overwrite = TRUE +) # Create Giotto exprObj with the dbMatrix -expObj_db = createExprObj(expression_data = dbsm, - expression_matrix_class = 'dbSparseMatrix', - name = 'raw') +expObj_db <- createExprObj( + expression_data = dbsm, + expression_matrix_class = "dbSparseMatrix", + name = "raw" +) # Create the Giotto object consisting of only the cell count matrix -gobject_db = createGiottoObject(expression = expObj_db) +gobject_db <- createGiottoObject(expression = expObj_db) ``` # 3. Preprocess Giotto object with `dbMatrix` ```{r} -# Perform filtering -gobject_db_filtered = filterGiotto(gobject_db, spat_unit = "cell", - feat_type = "rna", - expression_values = "raw") +# Perform filtering +gobject_db_filtered <- filterGiotto(gobject_db, + spat_unit = "cell", + feat_type = "rna", + expression_values = "raw" +) # Perform library normalization and scaling -gobject_db_filtered = normalizeGiotto(gobject = gobject_db_filtered, - spat_unit = 'cell', - feat_type = 'rna', - expression_values = 'raw', - library_size_norm = FALSE, - log_norm = FALSE, - scale_feats = TRUE, - scale_cells = TRUE) +gobject_db_filtered <- normalizeGiotto( + gobject = gobject_db_filtered, + spat_unit = "cell", + feat_type = "rna", + expression_values = "raw", + library_size_norm = FALSE, + log_norm = FALSE, + scale_feats = TRUE, + scale_cells = TRUE +) ``` ```{r} sessionInfo() -``` \ No newline at end of file +```