Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update methods-ext.R #27

Merged
merged 4 commits into from
Sep 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: GiottoClass
Title: Giotto Suite object definitions and framework
Version: 0.0.0.9003
Version: 0.0.0.9004
Authors@R: c(
person("Ruben", "Dries", email = "rubendries@gmail.com",
role = c("aut", "cre")),
Expand Down
8 changes: 8 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Giotto Class 0.0.1 (Release TBD)

## Breaking Changes

## Added

## Changes
- Improved performance of gefToGiotto()
111 changes: 61 additions & 50 deletions R/interoperability.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,56 +20,67 @@

gefToGiotto = function(gef_file, bin_size = 'bin100', verbose = FALSE){

# data.table vars
genes = y = sdimx = sdimy = cell_ID = count = NULL

# package check
package_check(pkg_name = 'rhdf5', repository = 'Bioc')
if(!file.exists(gef_file)) stop('File path to .gef file does not exist')

# check if proper bin_size is selected. These are determined in SAW pipeline.
bin_size_options = c('bin1', 'bin10', 'bin20', 'bin50', 'bin100', 'bin200')
if(!(bin_size %in% bin_size_options)) stop('Please select valid bin size,see details for choices.')

# step 1: read expression and gene data from gef file
if(isTRUE(verbose)) wrap_msg('reading in .gef file')
geneExpData = rhdf5::h5read(file = gef_file, name = 'geneExp')
exprDT = data.table::as.data.table(geneExpData[[bin_size]][['expression']])
geneDT = data.table::as.data.table(geneExpData[[bin_size]][['gene']])

# step 2: combine gene information from the geneDT to the exprDT
exprDT[, genes := rep(x = geneDT$gene, geneDT$count)]

# step 3: bin coordinates according to selected bin_size
#TODO: update bin_shift for other shapes, not just rect_vertices
bin_size_int = as.integer(gsub("[^0-9.-]", "", bin_size))
bin_shift = ceiling(bin_size_int / 2) # ceiling catches bin_1
bincoord = unique(exprDT[,.(x,y)])
if(isTRUE(verbose)) wrap_msg('shifting and binning coordinates')
data.table::setorder(bincoord, x, y)
data.table::setnames(bincoord, old = c('x', 'y'), new = c('sdimx', 'sdimy'))
bincoord[, c('sdimx', 'sdimy') := list(sdimx+bin_shift, sdimy+bin_shift)]
bincoord[, cell_ID := paste0('bin', 1:.N)]
tx_data = exprDT[,.(genes, x, y, count)]
tx_data[, c('x', 'y') := list(x+bin_shift, y+bin_shift)]

# step 4: create rectangular polygons (grid) starting from the bin centroids
if(isTRUE(verbose)) wrap_msg('creating polygon stamp')
x = polyStamp(stamp_dt = rectVertices(dims = c(x = (bin_size_int - 1),
y = (bin_size_int - 1))),
spatlocs = bincoord[,.(cell_ID, sdimx, sdimy)])
pg = createGiottoPolygonsFromDfr(x)

# step 5: create giotto subcellular object
stereo = createGiottoObjectSubcellular(
gpoints = list(rna = tx_data),
gpolygons = list(cell = pg)
)

stereo = addSpatialCentroidLocations(gobject = stereo)
if(isTRUE(verbose)) wrap_msg('giotto subcellular object created')

return(stereo)
# data.table vars
genes = y = sdimx = sdimy = cell_ID = count = NULL

Check warning

Code scanning / lintr

local variable 'sdimx' assigned but may not be used Warning

local variable 'sdimx' assigned but may not be used

Check warning

Code scanning / lintr

local variable 'sdimy' assigned but may not be used Warning

local variable 'sdimy' assigned but may not be used

Check warning

Code scanning / lintr

local variable 'count' assigned but may not be used Warning

local variable 'count' assigned but may not be used

# package check
package_check(pkg_name = 'rhdf5', repository = 'Bioc')
if(!file.exists(gef_file)) stop('File path to .gef file does not exist')

# check if proper bin_size is selected. These are determined in SAW pipeline.
wrap_msg('1. gefToGiotto() begin... \n')
bin_size_options = c('bin1', 'bin10', 'bin20', 'bin50', 'bin100', 'bin200')
if(!(bin_size %in% bin_size_options)){
stop('Please select valid bin size, see ?gefToGiotto for details.')
}

# 1. read .gef file at specific bin size
geneExpData = rhdf5::h5read(file = gef_file, name = paste0('geneExp/',
bin_size))
exprDT = data.table::as.data.table(geneExpData[['expression']])
exprDT$count = as.integer(exprDT$count)
setorder(exprDT, x, y) # sort by x, y coords (ascending)

Check warning

Code scanning / lintr

no visible binding for global variable 'x' Warning

no visible binding for global variable 'x'
geneDT = data.table::as.data.table(geneExpData[['gene']])
if(isTRUE(verbose)) wrap_msg('finished reading in .gef', bin_size, '\n')

# 2. create spatial locations
if(isTRUE(verbose)) wrap_msg('2. create spatial_locations... \n')
cell_locations = unique(exprDT[,c('x','y')], by = c('x', 'y'))
cell_locations[, bin_ID := as.factor(seq_along(1:nrow(cell_locations)))]

Check warning

Code scanning / lintr

no visible binding for global variable 'bin_ID' Warning

no visible binding for global variable 'bin_ID'

Check warning

Code scanning / lintr

1:nrow(...) is likely to be wrong in the empty edge case. Use seq_len(nrow(...)) instead. Warning

1:nrow(...) is likely to be wrong in the empty edge case. Use seq_len(nrow(...)) instead.
cell_locations[, cell_ID := paste0('cell_', bin_ID)]

Check warning

Code scanning / lintr

no visible binding for global variable 'bin_ID' Warning

no visible binding for global variable 'bin_ID'
setcolorder(cell_locations, c('x', 'y', 'cell_ID', 'bin_ID')) # ensure first non-numerical col is cell_ID
if(isTRUE(verbose)) wrap_msg(nrow(cell_locations), ' bins in total \n')
if(isTRUE(verbose)) wrap_msg('finished spatial_locations \n')

# 3. create expression matrix
if(isTRUE(verbose)) wrap_msg('3. create expression matrix... \n')
exprDT[, genes := as.character(rep(x = geneDT$gene, geneDT$count))]
exprDT[, gene_idx := as.integer(factor(exprDT$genes,

Check warning

Code scanning / lintr

no visible binding for global variable 'gene_idx' Warning

no visible binding for global variable 'gene_idx'
levels = unique(exprDT$genes)))]

# merge on x,y and populate based on bin_ID values in cell_locations
exprDT[cell_locations, cell_ID := i.bin_ID, on = .(x, y)]

Check warning

Code scanning / lintr

no visible binding for global variable 'i.bin_ID' Warning

no visible binding for global variable 'i.bin_ID'

Check warning

Code scanning / lintr

no visible binding for global variable 'x' Warning

no visible binding for global variable 'x'
exprDT$cell_ID <- as.integer(exprDT$cell_ID)

expMatrix <- Matrix::sparseMatrix(i = exprDT$gene_idx,
j = exprDT$cell_ID,
x = exprDT$count)

colnames(expMatrix) = cell_locations$cell_ID
rownames(expMatrix) = unique(exprDT, by = c("genes", "gene_idx"))$genes
if(isTRUE(verbose)) wrap_msg('finished expression matrix')

# 4. create minimal giotto object
if(isTRUE(verbose)) wrap_msg('4. create giotto object... \n')
stereo = createGiottoObject(
expression = expMatrix,
spatial_locs = cell_locations,
verbose = F,
)
if(isTRUE(verbose)) wrap_msg('finished giotto object... \n')

wrap_msg('gefToGiotto() finished \n')
return(stereo)
}


Expand Down
2 changes: 1 addition & 1 deletion R/methods-ext.R
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ setMethod('ext<-', signature(x = 'giottoPolygon', value = 'SpatExtent'), functio
new_ext = ext_to_num_vec(value)
xy_scale = c(diff(new_ext[c(2,1)])/diff(old_ext[c(2,1)]),
diff(new_ext[c(4,3)])/diff(old_ext[c(4,3)]))
x = do_gpoly(x, 'terra'::'rescale', args = list(fx = xy_scale[1], fy = xy_scale[2], x0 = old_ext[1L], y0 = old_ext[3L]))
x = do_gpoly(x, terra::rescale, args = list(fx = xy_scale[1], fy = xy_scale[2], x0 = old_ext[1L], y0 = old_ext[3L]))
x = spatShift(x, dx = new_ext[1L] - old_ext[1L], dy = new_ext[3L] - old_ext[3L])
x
})
Expand Down