From 3b1ed890ed2344e4ddc178c0eff68b2c29e413cb Mon Sep 17 00:00:00 2001 From: jiajic <72078254+jiajic@users.noreply.github.com> Date: Thu, 21 Sep 2023 11:21:34 -0400 Subject: [PATCH 1/5] Add `crop()` method for `giottoPoints` --- NEWS.md | 2 ++ R/methods-coerce.R | 3 +- R/methods_crop.R | 81 +++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 84 insertions(+), 2 deletions(-) diff --git a/NEWS.md b/NEWS.md index cf5cbaf7..81b77e9d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -3,6 +3,8 @@ ## Breaking Changes ## Added +- Added `ext<-()` method for `giottoPolygon`, `giottoPoints` +- Added `crop()` method for `giottoLargeImage`, `giottoPoints` ## Changes - Improved performance of gefToGiotto() diff --git a/R/methods-coerce.R b/R/methods-coerce.R index d21319da..4851031b 100644 --- a/R/methods-coerce.R +++ b/R/methods-coerce.R @@ -56,8 +56,9 @@ as.data.table.giottoPoints <- function(x, ...) { # DT -> SpatVector #### - # TODO +# as.points / as.polygon generics from terra are an option, but terra deals with +# this kind of conversion using vect() usually diff --git a/R/methods_crop.R b/R/methods_crop.R index 810fc8b7..5376eb88 100644 --- a/R/methods_crop.R +++ b/R/methods_crop.R @@ -1,15 +1,23 @@ +# documentation #### #' @name crop-generic #' @title Crop to a spatial subset #' @description see [terra::crop]. Object x will be cropped using object y. #' @param x object #' @param y any object that has a SpatExtent or returns a SpatExtent -#' @param ... additional params to pass to terra::crop +#' @param \dots additional params to pass to terra::crop NULL + + + +# methods #### + + + #' @describeIn crop-generic Crop a giottoLargeImage #' @export setMethod('crop', signature('giottoLargeImage'), function(x, y, ...) { @@ -21,3 +29,74 @@ setMethod('crop', signature('giottoLargeImage'), function(x, y, ...) { x }) + + +#' @describeIn crop-generic Crop a giottoPoints +#' @param DT logical. Use alternative DT subsetting for crop operation +#' @param xmin,xmax,ymin,ymax only used if DT = TRUE. Set extent bounds +#' independently +#' @export +setMethod('crop', signature('giottoPoints'), function( + x, y, DT = TRUE, xmin = NULL, xmax = NULL, ymin = NULL, ymax = NULL, ... +) { + checkmate::assert_logical(DT) + if(DT) { + # converting to DT, subsetting, then regeneration of SpatVector with vect() + # is currently faster than using terra::crop() as of 9/21/23 + missing_y = missing(y) + n_single_bounds = 4 - sum(sapply(list(xmin, xmax, ymin, ymax), is.null)) + + # check cropping params + # ONLY y OR the single spat bounds can be used at any one time + if((missing_y && n_single_bounds == 0) || + (!missing_y && n_single_bounds > 0)) { + stop(wrap_txt('Crop bounds must be supplied through either a SpatExtent passed to \'y\' + or single numerical bounds passed to one or more of \'xmin\',\'xmax\', \'ymin\', \'ymax\'')) + } + + # 1. Get full set of cropping bounds + if(!missing_y) { + # if y is available, use y values directly. + # only the extent of y is usable for the DT method + if(!inherits(y, 'SpatExtent')) { + warning(wrap_txt('Only the extent of y is used when cropping with DT = TRUE')) + y = ext(y) + } + + xmin = terra::xmin(y) + xmax = terra::xmax(y) + ymin = terra::ymin(y) + ymax = terra::ymax(y) + + } else { + # otherwise, fill in any spatial subset bounds that may not have been + # supplied with the current extent value(s) + current_ext = ext(x) + if(is.null(xmin)) xmin = terra::xmin(current_ext) + if(is.null(xmax)) xmax = terra::xmax(current_ext) + if(is.null(ymin)) ymin = terra::ymin(current_ext) + if(is.null(ymax)) ymax = terra::ymax(current_ext) + } + + # 2. convert to DT + sv = x@spatVector + spatDT = as.data.table(sv, geom = 'XY') + + # 3. spatial subset then vect() to SpatVector again + spatDT_subset = spatDT[x >= xmin & x <= xmax & y >= ymin & y <= ymax] + sv_subset = terra::vect(spatDT_subset, c('x', 'y')) + + # 4. update x + x@spatVector = sv_subset + + } else { + x@spatVector = terra::crop(x@spatVector, y, ...) + } + + # update ID cache and return + x@unique_ID_cache = unique(terra::values(x@spatVector)$feat_ID) + x +}) + + + From ead6c0dc5b874ae606b9fe887aba38c5f2882c6d Mon Sep 17 00:00:00 2001 From: jiajic <72078254+jiajic@users.noreply.github.com> Date: Thu, 21 Sep 2023 11:31:59 -0400 Subject: [PATCH 2/5] Update subset.R - Add `valid_spat_subset_params()` helper - Update `subset_giotto_points_object()` - Update `subsetGiottoLocsSubcellular()` - Remove deprecated `gene_ids` param in `subset_feature_info_data()` --- R/subset.R | 161 +++++++++++++++++++++++++++++++---------------------- 1 file changed, 94 insertions(+), 67 deletions(-) diff --git a/R/subset.R b/R/subset.R index 38456218..a2f1f42a 100644 --- a/R/subset.R +++ b/R/subset.R @@ -706,40 +706,38 @@ subset_giotto_points_object = function(gpoints, # data.table vars x = y = feat_ID = NULL - if(!is.null(gpoints@spatVector)) { - - if(!is.null(feat_ids)) { - feat_id_bool = gpoints@spatVector$feat_ID %in% feat_ids - gpoints@spatVector = gpoints@spatVector[feat_id_bool] - } - - # spatial subset specific - if(!any(is.null(c(x_min, x_max, y_min, y_max)))) { - - if(verbose) print('im1') - - myspatvector = gpoints@spatVector - spatDT = spatVector_to_dt(myspatvector) - - if(verbose) print('im2') - - spatDT_subset = spatDT[x >= x_min & x <= x_max & y >= y_min & y <= y_max] - myspatvector_subset = dt_to_spatVector_points(dt = spatDT_subset) + # 0. check if spatial feature information exists + if(is.null(gpoints@spatVector)) { + return(gpoints) # return without change since there is no points info + } - if(verbose) print('im3') - gpoints@spatVector = myspatvector_subset - gpoints@unique_ID_cache = spatDT_subset[, unique(feat_ID)] # update cache - return(gpoints) - } + # 1. ID based subsetting # + # ---------------------- # + if(!is.null(feat_ids)) { + feat_id_bool = gpoints@spatVector$feat_ID %in% feat_ids + gpoints@spatVector = gpoints@spatVector[feat_id_bool] + } - # for when no spatial subsetting happens + # 2. Spatial subsetting # + # --------------------- # + # 2.1 if NO spatial subset information available, + # ie: if all spat subset params are NULL, return directly because there are + # no following steps + if(all(sapply(list(x_min, x_max, y_min, y_max), is.null))) { + # even if no spatial subsetting happened, if ID subsetting occurred, the + # unique_ID_cache needs to be updated gpoints@unique_ID_cache = unique(terra::values(gpoints@spatVector)$feat_ID) - + return(gpoints) } - return(gpoints) + # 2.2 otherwise use DT crop method + gpoints = crop(gpoints, + xmin = x_min, xmax = x_max, + ymin = y_min, ymax = y_max, + DT = TRUE) + return(gpoints) } @@ -799,7 +797,6 @@ subset_feature_info_data = function(feat_info, #' @inheritParams data_access_params #' @param cell_ids cell IDs to keep #' @param feat_ids feature IDs to keep -#' @param gene_ids deprecated. Use \code{feat_ids} #' @param poly_info polygon information to use #' @param all_spat_units subset all spatial units with selected feature ids #' @param all_feat_types subset all feature type data with selected cell ids @@ -814,7 +811,6 @@ subsetGiotto <- function(gobject, feat_type = NULL, cell_ids = NULL, feat_ids = NULL, - gene_ids = NULL, poly_info = NULL, all_spat_units = TRUE, all_feat_types = TRUE, @@ -837,19 +833,13 @@ subsetGiotto <- function(gobject, poly_info = spat_unit } - ## deprecated arguments - if(!is.null(gene_ids)) { - feat_ids = gene_ids - warning('gene_ids argument is deprecated, use feat_ids argument in the future \n') - } - if(!is.null(slot(gobject, 'h5_file'))) { g_dimnames = HDF5Array::h5readDimnames(filepath = slot(gobject, 'h5_file'), name = paste0("/expression/",feat_type,"/raw")) g_cell_IDs = g_dimnames[[2]] g_feat_IDs = g_dimnames[[1]] } else { - # filter cell_ID and gene_ID + # filter cell_ID and feat_ID g_cell_IDs = get_cell_id(gobject, spat_unit = spat_unit) g_feat_IDs = get_feat_id(gobject, feat_type = feat_type) @@ -1083,11 +1073,9 @@ subsetGiottoLocs = function(gobject, feat_type = feat_type) # Check spatial params - spatError = NULL - if(!is.null(x_min) && !is.null(x_max)) if(x_min > x_max) spatError = append(spatError, 'x_max must be larger than x_min \n') - if(!is.null(y_min) && !is.null(y_max)) if(y_min > y_max) spatError = append(spatError, 'y_max must be larger than y_min \n') - if(!is.null(z_min) && !is.null(z_max)) if(z_min > z_max) spatError = append(spatError, 'z_max must be larger than z_min \n') - if(!is.null(spatError)) stop(spatError) + valid_spat_subset_params(x_min = x_min, x_max = x_max, + y_min = y_min, y_max = y_max, + z_min = z_min, z_max = z_max) # function requires spat_loc_name @@ -1266,7 +1254,6 @@ subsetGiottoLocsMulti = function(gobject, - #' @title Subset raw subcellular information by location #' @name subsetGiottoLocsSubcellular #' @description Subsets Giotto object based on spatial coordinates @@ -1287,15 +1274,14 @@ subsetGiottoLocsSubcellular = function(gobject, # only to be used if there is no aggregated information # if(!is.null(gobject@expression)) { - stop('Aggregated information was found, use subsetGiottoLocs \n') + stop(wrap_txt('Aggregated information was found in gobject. + Use subsetGiottoLocs() instead')) } # Check spatial params - spatError = NULL - if(!is.null(x_min) && !is.null(x_max)) if(x_min > x_max) spatError = append(spatError, 'x_max must be larger than x_min \n') - if(!is.null(y_min) && !is.null(y_max)) if(y_min > y_max) spatError = append(spatError, 'y_max must be larger than y_min \n') - if(!is.null(z_min) && !is.null(z_max)) if(z_min > z_max) spatError = append(spatError, 'z_max must be larger than z_min \n') - if(!is.null(spatError)) stop(spatError) + valid_spat_subset_params(x_min = x_min, x_max = x_max, + y_min = y_min, y_max = y_max, + z_min = z_min, z_max = z_max) # first subset feature ids based on location @@ -1306,28 +1292,24 @@ subsetGiottoLocsSubcellular = function(gobject, ## --------------- ## if(!is.null(gobject@feat_info)) { - # TODO: make it possible for multiple feature types - - feats_list = slot(gobject, 'feat_info') - cropped_feats = lapply(feats_list[[feat_type]], function(x) { - sv = slot(x, 'spatVector') - sv = terra::crop(sv, terra::ext(x_min, x_max, y_min, y_max)) - sv$feat_ID + # perform crop and return to gobject + gpoints_list = get_feature_info_list(gobject, return_giottoPoints = TRUE)[feat_type] + cropped_gpoints = lapply(gpoints_list, function(x) { + crop(gpoints_list[[x]], + DT = TRUE, + xmin = x_min, xmax = x_max, + ymin = y_min, ymax = y_max) # TODO add cropping for z values as well }) + gobject@feat_info = cropped_gpoints + # extract the cropped feature IDs for use with spatial info overlaps + cropped_feats = lapply(cropped_gpoints, function(cropped_x) { + cropped_x@unique_ID_cache + }) cropped_feats = unique(unlist(cropped_feats)) - gobject@feat_info = subset_feature_info_data(feat_info = gobject@feat_info, - feat_ids = cropped_feats, - feat_type = feat_type, - x_max = x_max, - x_min = x_min, - y_max = y_max, - y_min = y_min, - verbose = verbose) - - if(verbose == TRUE) cat('subsetted spatial feature data \n') + if(verbose) wrap_msg('subsetted spatial feature data') } else { cropped_feats = NULL @@ -1340,7 +1322,7 @@ subsetGiottoLocsSubcellular = function(gobject, if(!is.null(gobject@spatial_info)) { # get the associated poly_IDs - polys_list = slot(gobject, 'spatial_info') + polys_list = get_polygon_info_list(gobject, return_giottoPolygon = TRUE) cropped_IDs = lapply(polys_list, function(x) { sv = slot(x, 'spatVector') sv = terra::crop(sv, terra::ext(x_min, x_max, y_min, y_max)) @@ -1360,7 +1342,7 @@ subsetGiottoLocsSubcellular = function(gobject, } - if(verbose == TRUE) cat('subsetted spatial information data \n') + if(verbose) wrap_msg('subsetted spatial information data') } @@ -1369,3 +1351,48 @@ subsetGiottoLocsSubcellular = function(gobject, return(initialize(gobject)) } + + + + +# helpers #### + +valid_spat_subset_params = function( + x_min = NULL, + x_max = NULL, + y_min = NULL, + y_max = NULL, + z_min = NULL, + z_max = NULL +) { + # Check spatial params + spatError = NULL + if(!is.null(x_min) && !is.null(x_max)) { + if(x_min > x_max) { + spatError = append(spatError, 'x max must be larger than x min \n') + } + } + if(!is.null(y_min) && !is.null(y_max)) { + if(y_min > y_max) { + spatError = append(spatError, 'y max must be larger than y min \n') + } + } + if(!is.null(z_min) && !is.null(z_max)) { + if(z_min > z_max) { + spatError = append(spatError, 'z max must be larger than z min \n') + } + } + + if(!is.null(spatError)) { + stop('Invalid spatial subset params:\n', + spatError, + call. = FALSE) + } +} + + + + + + + From 3d052579849039c1c4de16f14ae4bd5cc2f15f64 Mon Sep 17 00:00:00 2001 From: jiajic <72078254+jiajic@users.noreply.github.com> Date: Thu, 21 Sep 2023 11:32:09 -0400 Subject: [PATCH 3/5] Run document --- man/crop-generic.Rd | 12 +++++++++++- man/subsetGiotto.Rd | 3 --- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/man/crop-generic.Rd b/man/crop-generic.Rd index dd3c35ba..a2238d22 100644 --- a/man/crop-generic.Rd +++ b/man/crop-generic.Rd @@ -3,16 +3,24 @@ \name{crop-generic} \alias{crop-generic} \alias{crop,giottoLargeImage-method} +\alias{crop,giottoPoints-method} \title{Crop to a spatial subset} \usage{ \S4method{crop}{giottoLargeImage}(x, y, ...) + +\S4method{crop}{giottoPoints}(x, y, DT = TRUE, xmin = NULL, xmax = NULL, ymin = NULL, ymax = NULL, ...) } \arguments{ \item{x}{object} \item{y}{any object that has a SpatExtent or returns a SpatExtent} -\item{...}{additional params to pass to terra::crop} +\item{\dots}{additional params to pass to terra::crop} + +\item{DT}{logical. Use alternative DT subsetting for crop operation} + +\item{xmin, xmax, ymin, ymax}{only used if DT = TRUE. Set extent bounds +independently} } \description{ see \link[terra:crop]{terra::crop}. Object x will be cropped using object y. @@ -21,4 +29,6 @@ see \link[terra:crop]{terra::crop}. Object x will be cropped using object y. \itemize{ \item \code{crop(giottoLargeImage)}: Crop a giottoLargeImage +\item \code{crop(giottoPoints)}: Crop a giottoPoints + }} diff --git a/man/subsetGiotto.Rd b/man/subsetGiotto.Rd index 28196a82..e51e382a 100644 --- a/man/subsetGiotto.Rd +++ b/man/subsetGiotto.Rd @@ -10,7 +10,6 @@ subsetGiotto( feat_type = NULL, cell_ids = NULL, feat_ids = NULL, - gene_ids = NULL, poly_info = NULL, all_spat_units = TRUE, all_feat_types = TRUE, @@ -33,8 +32,6 @@ subsetGiotto( \item{feat_ids}{feature IDs to keep} -\item{gene_ids}{deprecated. Use \code{feat_ids}} - \item{poly_info}{polygon information to use} \item{all_spat_units}{subset all spatial units with selected feature ids} From b4544a93b101e535a9efc611e02687e8cf7e38db Mon Sep 17 00:00:00 2001 From: jiajic <72078254+jiajic@users.noreply.github.com> Date: Thu, 21 Sep 2023 17:22:42 -0400 Subject: [PATCH 4/5] Update subset.R - Update `subset_spatial_locations()` to be able to subset across all spatial units for a set of spat_ids - Link up subsetGiotto to subsetGiottoLocs so that spatial subsetting will be passed properly --- R/subset.R | 155 ++++++++++++++++++++++++++++++---------- man/subsetGiotto.Rd | 11 +-- man/subsetGiottoLocs.Rd | 2 +- 3 files changed, 126 insertions(+), 42 deletions(-) diff --git a/R/subset.R b/R/subset.R index a2f1f42a..1b7d526a 100644 --- a/R/subset.R +++ b/R/subset.R @@ -1,4 +1,4 @@ -### subset Giotto object #### +### subset Giotto object slots #### #' @title Subset expression data @@ -171,42 +171,59 @@ subset_expression_data = function(gobject, #' @title Subset spatial locations #' @name subset_spatial_locations #' @description Subset location data from giotto object +#' @param all_spat_units logical. Applies subset operation across the whole gobject +#' (ALL spat_units), ignoring the \code{spat_unit} input param. Defaults to TRUE. #' @keywords internal #' @noRd subset_spatial_locations = function(gobject, cell_ids, - spat_unit) { + spat_unit, + all_spat_units = TRUE) { - avail_locs = list_spatial_locations_names(gobject, spat_unit = spat_unit) + if(all_spat_units) { + avail_locs = list_spatial_locations(gobject) + } else { + avail_locs = list_spatial_locations(gobject, spat_unit = spat_unit) + } - # only subset cell_ID if the spatial unit is the same (e.g. cell) + # 3. get, subset, and set back a single spatLocsObj + # returns a gobject + do_subset = function(su, sname) { + spatObj = get_spatial_locations(gobject, + spat_unit = su, + spat_loc_name = sname, + output = 'spatLocsObj', + copy_obj = FALSE) - if(!is.null(avail_locs)) { - for(spatlocname in avail_locs) { + ## filter index + g_cell_IDs = spatObj@coordinates[['cell_ID']] - spatObj = get_spatial_locations(gobject, - spat_unit = spat_unit, - spat_loc_name = spatlocname, - output = 'spatLocsObj', - copy_obj = FALSE) + if(!is.null(cell_ids)) { + filter_bool_cells = g_cell_IDs %in% cell_ids + } else filter_bool_cells = g_cell_IDs %in% g_cell_IDs - ## filter index - g_cell_IDs = spatObj@coordinates[['cell_ID']] + spatObj[] = spatObj[][filter_bool_cells] - if(!is.null(cell_ids)) { - filter_bool_cells = g_cell_IDs %in% cell_ids - } else filter_bool_cells = g_cell_IDs %in% g_cell_IDs - - spatObj[] = spatObj[][filter_bool_cells] - - ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### - gobject = set_spatial_locations(gobject, spatlocs = spatObj, verbose = FALSE) - ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### - # not yet possible to row subset data.tables by reference. Must be set back in. + ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### + gobject <<- set_spatial_locations(gobject, spatlocs = spatObj, verbose = FALSE) + ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### + # not yet possible to row subset data.tables by reference. Must be set back in. + } + # 2. for each spat unit, subset all contained spatLocsObjs + per_spat_unit = function(su) { + spat_names = avail_locs[spat_unit == su, name] + for(sname in spat_names) { + do_subset(su, sname) } } + # 1. for each spatial unit requested to be subset... + for(su in avail_locs$spat_unit) { + per_spat_unit(su = su) + } + + return(gobject) } @@ -586,7 +603,12 @@ subset_spatial_enrichment = function(gobject, #' @title Subset giotto polygon object #' @name subset_giotto_polygon_object -#' @description Subset a single giotto polygon object +#' @description Subset a single giotto polygon object for cell_ids and feat_ids +#' @param gpolygon giottoPolygon object to subset +#' @param cell_ids character. cell_ids to keep +#' @param feat_ids character. feat_ids to keep +#' @param feat_type character. feature type to subset feat_ids from if overlaps +#' are present within the giottoPolygon object #' @keywords internal #' @noRd subset_giotto_polygon_object = function(gpolygon, @@ -631,7 +653,7 @@ subset_giotto_polygon_object = function(gpolygon, #' @title Subset spatial info data #' @name subset_spatial_info_data -#' @description Subset all spatial info (polygon) data +#' @description Subset all spatial info (polygon) data #' @keywords internal #' @noRd subset_spatial_info_data = function(spatial_info, @@ -656,6 +678,8 @@ subset_spatial_info_data = function(spatial_info, if(verbose) cat('--> ', spat_info, ' found back in polygon layer: ', poly_info, '\n') + # subset the giottoPolygon object for the cell_ids and the specified + # feat_type overlap information (if existing) for the feat_ids spat_subset = subset_giotto_polygon_object(spatial_info[[spat_info]], cell_ids = cell_ids, feat_ids = feat_ids, @@ -787,17 +811,38 @@ subset_feature_info_data = function(feat_info, +# Exported subset functions #### + +# Overview of subset functions # +# +# subsetGiotto +# Main subsetting pipeline that subsets based on the aggregated information +# available. It determines a set of IDs (cell_ids or feat_ids) to subset with +# and then walks through each of the slots, performing the subset. +# subsetGiottoLocs +# Pulls cell_IDs information and spatial locations and performs a spatial +# subset. The cell_IDs that are selected are then fed back into subsetGiotto() +# This operation is performed only for a single spat_unit +# subsetGiottoLocsMulti +# Performs more than one subset operation using subsetGiottoLocs +# subsetGiottoLocsSubcellular +# Performs the spatial subset only on the subcellular spatial info (polygons) +# and feature info (points). This is useful for situations in which aggregate +# information has not been created but the subcellular data is present. #' @title subsetGiotto -#' @description Subsets Giotto object including previous analyses. +#' @description Subsets Giotto object including previous analyses. For subsetting +#' the subcellular information only without editing the aggregate information, +#' use [subsetGiottoLocsSubcellular] #' @inheritParams data_access_params -#' @param cell_ids cell IDs to keep -#' @param feat_ids feature IDs to keep -#' @param poly_info polygon information to use +#' @param cell_ids character. cell IDs to keep +#' @param feat_ids character. feature IDs to keep +#' @param poly_info character. polygon info(s) to subset if present. (defaults +#' to be the same as the spat_unit) #' @param all_spat_units subset all spatial units with selected feature ids #' @param all_feat_types subset all feature type data with selected cell ids #' @param x_max,x_min,y_max,y_min minimum and maximum x and y coordinates to keep for feature coordinates @@ -833,6 +878,35 @@ subsetGiotto <- function(gobject, poly_info = spat_unit } + + # spatial subsetting # + # pass to subsetGiottoLocs if not all spatial subset params are NULL + if(!all(sapply(list(x_max, x_min, y_min, y_max), is.null))) { + comb_metadata = subsetGiottoLocs( + gobject = gobject, + spat_unit = spat_unit, + feat_type = feat_type, + x_min = x_min, x_max = x_max, + y_min = y_min, y_max = y_max, + return_gobject = FALSE # returned the combined and spatially subset + # metadata instead + ) + + # get spatially filtered cell_IDs + # filter current IDs to keep (cell_ids) by selecting only those that are + # also within the spat_filtered_cell_ids + spat_filtered_cell_ids = comb_metadata[['cell_ID']] + if (is.null(cell_ids)) cell_ids = spat_filtered_cell_ids + else cell_ids = cell_ids[cell_ids %in% spat_filtered_cell_ids] + } + + + # all subsetting operations below here should rely on cell_ID or feat_ID # + # ---------------------------------------------------------------------- # + + + + if(!is.null(slot(gobject, 'h5_file'))) { g_dimnames = HDF5Array::h5readDimnames(filepath = slot(gobject, 'h5_file'), name = paste0("/expression/",feat_type,"/raw")) @@ -890,7 +964,8 @@ subsetGiotto <- function(gobject, gobject = subset_spatial_locations(gobject = gobject, cell_ids = cell_ids, - spat_unit = spat_unit) + spat_unit = spat_unit, + all_spat_units = all_spat_units) if(verbose) cat('completed 3: subset spatial locations \n') @@ -1038,7 +1113,7 @@ subsetGiotto <- function(gobject, #' @title Subset by spatial locations #' @name subsetGiottoLocs -#' @description Subsets Giotto object based on spatial locations +#' @description Subsets Giotto object based on spatial locations. #' @inheritParams data_access_params #' @param spat_loc_name name of spatial locations to use #' @param x_max,x_min,y_max,y_min,z_max,z_min minimum and maximum x, y, and z coordinates @@ -1064,15 +1139,19 @@ subsetGiottoLocs = function(gobject, return_gobject = TRUE, verbose = FALSE) { + # FOR AGGREGATE DATA # + # Spatial subsetting is performed on the spatial locations information # + # The IDs after this operation are then used to subset the rest of the # + # Giotto object using subsetGiotto() # - # Set feat_type and spat_unit + # 0. Set feat_type and spat_unit spat_unit = set_default_spat_unit(gobject = gobject, spat_unit = spat_unit) feat_type = set_default_feat_type(gobject = gobject, spat_unit = spat_unit, feat_type = feat_type) - # Check spatial params + # 1. Check spatial params valid_spat_subset_params(x_min = x_min, x_max = x_max, y_min = y_min, y_max = y_max, z_min = z_min, z_max = z_max) @@ -1081,10 +1160,12 @@ subsetGiottoLocs = function(gobject, # function requires spat_loc_name if(is.null(spat_loc_name)) { + # first check spatial locations if(!is.null(slot(gobject, 'spatial_locs'))) { spat_loc_name = names(slot(gobject, 'spatial_locs')[[spat_unit]])[[1]] # cat('No spatial locations have been selected, the first one -',spat_loc_name, '- will be used \n') + # if spatlocs missing, check spatial_info } else if(!is.null(slot(gobject, 'spatial_info'))) { # EXCEPTION: if no spatlocs found but polys exist, find cell_IDs from polys polys_list = slot(gobject, 'spatial_info') @@ -1147,7 +1228,7 @@ subsetGiottoLocs = function(gobject, comb_metadata = comb_metadata[get('sdimz') < z_max & get('sdimz') > z_min] } - if(return_gobject == TRUE) { + if(return_gobject) { filtered_cell_IDs = comb_metadata[['cell_ID']] @@ -1206,11 +1287,11 @@ subsetGiottoLocsMulti = function(gobject, cat('\n \n') - if(verbose) wrap_msg('Start subset on location for spatial unit: ', spat_unit_selected, + if(verbose) wrap_msg('Start subset on locations for spatial unit: ', spat_unit_selected, 'and polygon information layers: ', poly_info_selected, '\n') - if(return_gobject == TRUE) { + if(return_gobject) { gobject = subsetGiottoLocs(gobject = gobject, spat_unit = spat_unit_selected, feat_type = feat_type, @@ -1243,7 +1324,7 @@ subsetGiottoLocsMulti = function(gobject, } } - if(return_gobject == TRUE) { + if(return_gobject) { return(gobject) } else { return(res_list) diff --git a/man/subsetGiotto.Rd b/man/subsetGiotto.Rd index e51e382a..6a2376ec 100644 --- a/man/subsetGiotto.Rd +++ b/man/subsetGiotto.Rd @@ -28,11 +28,12 @@ subsetGiotto( \item{feat_type}{feature type (e.g. "rna", "dna", "protein")} -\item{cell_ids}{cell IDs to keep} +\item{cell_ids}{character. cell IDs to keep} -\item{feat_ids}{feature IDs to keep} +\item{feat_ids}{character. feature IDs to keep} -\item{poly_info}{polygon information to use} +\item{poly_info}{character. polygon info(s) to subset if present. (defaults +to be the same as the spat_unit)} \item{all_spat_units}{subset all spatial units with selected feature ids} @@ -48,7 +49,9 @@ subsetGiotto( giotto object } \description{ -Subsets Giotto object including previous analyses. +Subsets Giotto object including previous analyses. For subsetting +the subcellular information only without editing the aggregate information, +use \link{subsetGiottoLocsSubcellular} } \details{ Subsets a Giotto object for a specific spatial unit and feature type diff --git a/man/subsetGiottoLocs.Rd b/man/subsetGiottoLocs.Rd index 0dc10e66..fbd34cff 100644 --- a/man/subsetGiottoLocs.Rd +++ b/man/subsetGiottoLocs.Rd @@ -42,7 +42,7 @@ to subset to} giotto object } \description{ -Subsets Giotto object based on spatial locations +Subsets Giotto object based on spatial locations. } \details{ Subsets a Giotto based on spatial locations and for one provided spatial unit From 5e33ad816a2e0b83a89fcccf687f7009094256fc Mon Sep 17 00:00:00 2001 From: jiajic <72078254+jiajic@users.noreply.github.com> Date: Thu, 21 Sep 2023 20:10:12 -0400 Subject: [PATCH 5/5] update subsetGiotto functions - Made sure that all_spat_unit and all_feat_types params are being respected by subset. - Added documentation --- DESCRIPTION | 2 +- R/subset.R | 156 +++++++++++++++++++++++++++++++++++----------------- 2 files changed, 108 insertions(+), 50 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 65dd9ba5..21056c12 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: GiottoClass Title: Giotto Suite object definitions and framework -Version: 0.0.0.9004 +Version: 0.0.0.9005 Authors@R: c( person("Ruben", "Dries", email = "rubendries@gmail.com", role = c("aut", "cre")), diff --git a/R/subset.R b/R/subset.R index 1b7d526a..e5193e90 100644 --- a/R/subset.R +++ b/R/subset.R @@ -219,7 +219,7 @@ subset_spatial_locations = function(gobject, } # 1. for each spatial unit requested to be subset... - for(su in avail_locs$spat_unit) { + for(su in unique(avail_locs$spat_unit)) { per_spat_unit(su = su) } @@ -356,35 +356,54 @@ subset_feature_metadata = function(gobject, #' @noRd subset_spatial_network = function(gobject, spat_unit, - cell_ids) { + cell_ids, + all_spat_units = TRUE) { # define for data.table to = from = NULL - # cell spatial network - if(!is.null(slot(gobject, 'spatial_network'))) { - # Find existing networks for given spatial unit - existing_networks = list_spatial_networks_names(gobject = gobject, - spat_unit = spat_unit) - # Iterate through all networks of this spatial unit... - for(network in existing_networks) { - spatNetObj = get_spatialNetwork(gobject = gobject, - spat_unit = spat_unit, - name = network, - output = 'spatialNetworkObj') + # if no spatial networks available, return directly + if(is.null(slot(gobject, 'spatial_network'))) { + return(gobject) + } - # Within each spatialNetworkObj, subset only the cells_to_keep - spatNetObj[] = spatNetObj[][to %in% cell_ids & from %in% cell_ids] + # Find existing networks and return as DT + if(all_spat_units) { + existing_networks = list_spatial_networks(gobject = gobject) + } else { + existing_networks = list_spatial_networks(gobject = gobject, + spat_unit = spat_unit) + } - # Set the spatialNetworkObj back into the gobject - ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### - gobject = set_spatialNetwork(gobject = gobject, + do_subset = function(su, nname) { + spatNetObj = get_spatialNetwork(gobject = gobject, + spat_unit = su, + name = nname, + output = 'spatialNetworkObj') + + # Within each spatialNetworkObj, subset only the cells_to_keep + spatNetObj[] = spatNetObj[][to %in% cell_ids & from %in% cell_ids] + + # Set the spatialNetworkObj back into the gobject + ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### + gobject <<- set_spatialNetwork(gobject = gobject, spatial_network = spatNetObj, verbose = FALSE) - ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### + ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### + } + + per_spat_unit = function(su) { + net_names = existing_networks[spat_unit == su, name] + for(nname in net_names) { + do_subset(su, nname) } } + for(su in unique(existing_networks$spat_unit)) { + per_spat_unit(su = su) + } + + return(gobject) } @@ -398,13 +417,17 @@ subset_spatial_network = function(gobject, subset_dimension_reduction = function(gobject, spat_unit, feat_type, - cell_ids) { + cell_ids, + all_feat_types = TRUE, + all_spat_units = TRUE) { # find available dim reductions - avail_dim = list_dim_reductions(gobject = gobject, - data_type = 'cells', - spat_unit = spat_unit, - feat_type = feat_type) + avail_dim = list_dim_reductions( + gobject = gobject, + data_type = 'cells', + spat_unit = ifelse(all_spat_units, NULL, spat_unit), + feat_type = ifelse(all_feat_types, NULL, feat_type) + ) if(!is.null(avail_dim)) { @@ -441,16 +464,22 @@ subset_dimension_reduction = function(gobject, subset_nearest_network = function(gobject, spat_unit, feat_type, - cell_ids) { - - avail_kNN = list_nearest_networks(gobject, - spat_unit = spat_unit, - feat_type = feat_type, - nn_type = 'kNN') - avail_sNN = list_nearest_networks(gobject, - spat_unit = spat_unit, - feat_type = feat_type, - nn_type = 'sNN') + cell_ids, + all_spat_units = TRUE, + all_feat_types = TRUE) { + + avail_kNN = list_nearest_networks( + gobject, + spat_unit = ifelse(all_spat_units, NULL, spat_unit), + feat_type = ifelse(all_feat_types, NULL, feat_type), + nn_type = 'kNN' + ) + avail_sNN = list_nearest_networks( + gobject, + spat_unit = ifelse(all_spat_units, NULL, spat_unit), + feat_type = ifelse(all_feat_types, NULL, feat_type), + nn_type = 'sNN' + ) if(!is.null(avail_kNN)) { @@ -550,11 +579,15 @@ subset_nearest_network = function(gobject, subset_spatial_enrichment = function(gobject, spat_unit, feat_type, - cell_ids) { + cell_ids, + all_spat_units = TRUE, + all_feat_types = TRUE) { - avail_enr = list_spatial_enrichments(gobject, - spat_unit = spat_unit, - feat_type = feat_type) + avail_enr = list_spatial_enrichments( + gobject, + spat_unit = ifelse(all_spat_units, NULL, spat_unit), + feat_type = ifelse(all_feat_types, NULL, feat_type) + ) if(!is.null(avail_enr)) { for(enr_i in seq(avail_enr[, .N])) { @@ -653,14 +686,20 @@ subset_giotto_polygon_object = function(gpolygon, #' @title Subset spatial info data #' @name subset_spatial_info_data -#' @description Subset all spatial info (polygon) data +#' @description Subset all spatial info (polygon) data. +#' @param spatial_info contents of the Giotto spatial_info slot +#' @param cell_ids character. cell ids to keep +#' @param poly_info character. polygon(s) to subset +#' @param feat_type feature type of overlaps to subset if they exist within the +#' giottoPolygon +#' @param feat_ids character. feat ids to keep #' @keywords internal #' @noRd subset_spatial_info_data = function(spatial_info, cell_ids, poly_info = 'cell', - feat_ids, feat_type = NULL, + feat_ids, verbose = TRUE) { @@ -670,10 +709,12 @@ subset_spatial_info_data = function(spatial_info, } res_list = list() + # iterate through all spatial info entries... for(spat_info in names(spatial_info)) { if(verbose) cat('for ', spat_info, '\n') + # if the spatial info is one selected in poly_info... if(spat_info %in% poly_info) { if(verbose) cat('--> ', spat_info, ' found back in polygon layer: ', poly_info, '\n') @@ -688,7 +729,9 @@ subset_spatial_info_data = function(spatial_info, res_list[[spat_info]] = spat_subset } else { - + # even if the spatial info is not one selected directly through poly_info, + # still subset subset any existing feature overlaps matching the feat_type + # for the feat_ids if(!is.null(spatial_info[[spat_info]]@overlaps)) { for(feat in names(spatial_info[[spat_info]]@overlaps)) { @@ -769,6 +812,11 @@ subset_giotto_points_object = function(gpoints, #' @title Subset feature info data #' @name subset_feature_info_data #' @description Subset all spatial feature (points) data +#' @param feat_info contents of giotto object feat_info slot +#' @param feat_ids character. feat ids to keep +#' @param feat_type character vector. feature type(s) to subset +#' @param x_min,x_max,y_min,y_max spatial bounds to subset by +#' @param verbose be verbose #' @keywords internal #' @noRd subset_feature_info_data = function(feat_info, @@ -831,7 +879,11 @@ subset_feature_info_data = function(feat_info, # and feature info (points). This is useful for situations in which aggregate # information has not been created but the subcellular data is present. +# TODO +# Consider an `across_spat_units` and `across_feat_types` for finer control +# with a special ':all:' input that will apply to all spat_units/feat_types +# Hierarchical subsetting? #' @title subsetGiotto @@ -1003,7 +1055,8 @@ subsetGiotto <- function(gobject, # cell spatial network gobject = subset_spatial_network(gobject = gobject, spat_unit = spat_unit, - cell_ids = cell_ids) + cell_ids = cell_ids, + all_spat_units = all_spat_units) if(verbose) cat('completed 7: subset spatial network(s) \n') @@ -1017,7 +1070,9 @@ subsetGiotto <- function(gobject, gobject = subset_dimension_reduction(gobject = gobject, spat_unit = spat_unit, feat_type = feat_type, - cell_ids = cell_ids) + cell_ids = cell_ids, + all_feat_types = all_feat_types, + all_spat_units = all_spat_units) if(verbose) cat('completed 8: subsetted dimension reductions \n') @@ -1026,7 +1081,9 @@ subsetGiotto <- function(gobject, gobject = subset_nearest_network(gobject = gobject, spat_unit = spat_unit, feat_type = feat_type, - cell_ids = cell_ids) + cell_ids = cell_ids, + all_spat_units = all_spat_units, + all_feat_types = all_feat_types) if(verbose) cat('completed 9: subsetted nearest network(s) \n') @@ -1035,7 +1092,9 @@ subsetGiotto <- function(gobject, gobject = subset_spatial_enrichment(gobject = gobject, spat_unit = spat_unit, feat_type = feat_type, - cell_ids = cell_ids) + cell_ids = cell_ids, + all_spat_units = all_spat_units, + all_feat_types = all_feat_types) if(verbose) cat('completed 10: subsetted spatial enrichment results \n') @@ -1044,6 +1103,9 @@ subsetGiotto <- function(gobject, for(select_poly_info in poly_info) { + # for each entry entry in poly_info, subset using cell_ids + # note that even if no poly_info is selected, the overlaps slots that match + # the feat_type param will be subset using feat_ids gobject@spatial_info = subset_spatial_info_data(spatial_info = gobject@spatial_info, feat_type = feat_type, cell_ids = cell_ids, @@ -1099,10 +1161,6 @@ subsetGiotto <- function(gobject, 'feats removed' = feats_removed) gobject@parameters = parameters_list - # if(verbose){ - # print(gobject@spatial_info) - # print(gobject@spatial_locs) - # } return(initialize(gobject))