diff --git a/build/lib/paste/PASTE.py b/build/lib/paste/PASTE.py index 7f871fc..196151c 100644 --- a/build/lib/paste/PASTE.py +++ b/build/lib/paste/PASTE.py @@ -1,33 +1,53 @@ +from typing import List, Tuple, Optional import numpy as np -import anndata +from anndata import AnnData import ot from sklearn.decomposition import NMF -from .helper import kl_divergence, intersect, kl_divergence_backend, to_dense_array, extract_data_matrix -import time +from .helper import intersect, kl_divergence_backend, to_dense_array, extract_data_matrix -def pairwise_align(sliceA, sliceB, alpha = 0.1, dissimilarity='kl', use_rep = None, G_init = None, a_distribution = None, b_distribution = None, norm = False, numItermax = 200, backend=ot.backend.NumpyBackend(), use_gpu = False, return_obj = False, verbose = False, gpu_verbose = True, **kwargs): +def pairwise_align( + sliceA: AnnData, + sliceB: AnnData, + alpha: float = 0.1, + dissimilarity: str ='kl', + use_rep: Optional[str] = None, + G_init = None, + a_distribution = None, + b_distribution = None, + norm: bool = False, + numItermax: int = 200, + backend = ot.backend.NumpyBackend(), + use_gpu: bool = False, + return_obj: bool = False, + verbose: bool = False, + gpu_verbose: bool = True, + **kwargs) -> Tuple[np.ndarray, Optional[int]]: """ Calculates and returns optimal alignment of two slices. - param: sliceA - AnnData object of spatial slice - param: sliceB - AnnData object of spatial slice - param: alpha - Alignment tuning parameter. Note: 0 ≤ alpha ≤ 1 - param: dissimilarity - Expression dissimilarity measure: 'kl' or 'euclidean' - param: use_rep - If none, uses slice.X to calculate dissimilarity between spots, otherwise uses the representation given by slice.obsm[use_rep] - param: G_init - initial mapping to be used in FGW-OT, otherwise default is uniform mapping - param: a_distribution - distribution of sliceA spots (1-d numpy array), otherwise default is uniform - param: b_distribution - distribution of sliceB spots (1-d numpy array), otherwise default is uniform - param: numItermax - max number of iterations during FGW-OT - param: norm - scales spatial distances such that neighboring spots are at distance 1 if True, otherwise spatial distances remain unchanged - param: backend - type of backend to run calculations. For list of backends available on system: ot.backend.get_backend_list() - param: use_gpu - Whether to run on gpu or cpu. Currently we only have gpu support for Pytorch. - param: return_obj - returns objective function output of FGW-OT if True, nothing if False - param: verbose - FGW-OT is verbose if True, nothing if False - param: gpu_verbose - Print whether gpu is being used to user, nothing if False + Args: + sliceA: Slice A to align. + sliceB: Slice B to align. + alpha: Alignment tuning parameter. Note: 0 <= alpha <= 1. + dissimilarity: Expression dissimilarity measure: ``'kl'`` or ``'euclidean'``. + use_rep: If ``None``, uses ``slice.X`` to calculate dissimilarity between spots, otherwise uses the representation given by ``slice.obsm[use_rep]``. + G_init (array-like, optional): Initial mapping to be used in FGW-OT, otherwise default is uniform mapping. + a_distribution (array-like, optional): Distribution of sliceA spots, otherwise default is uniform. + b_distribution (array-like, optional): Distribution of sliceB spots, otherwise default is uniform. + numItermax: Max number of iterations during FGW-OT. + norm: If ``True``, scales spatial distances such that neighboring spots are at distance 1. Otherwise, spatial distances remain unchanged. + backend: Type of backend to run calculations. For list of backends available on system: ``ot.backend.get_backend_list()``. + use_gpu: If ``True``, use gpu. Otherwise, use cpu. Currently we only have gpu support for Pytorch. + return_obj: If ``True``, additionally returns objective function output of FGW-OT. + verbose: If ``True``, FGW-OT is verbose. + gpu_verbose: If ``True``, print whether gpu is being used to user. - - return: pi - alignment of spots - return: log['fgw_dist'] - objective function output of FGW-OT + Returns: + - Alignment of spots. + + If ``return_obj = True``, additionally returns: + + - Objective function output of FGW-OT. """ # Determine if gpu or cpu is being used @@ -131,31 +151,47 @@ def pairwise_align(sliceA, sliceB, alpha = 0.1, dissimilarity='kl', use_rep = No return pi -def center_align(A, slices, lmbda = None, alpha = 0.1, n_components = 15, threshold = 0.001, max_iter = 10, dissimilarity='kl', use_rep = None, norm = False, random_seed = None, pis_init = None, distributions=None, backend = ot.backend.NumpyBackend(), use_gpu = False, verbose = False, gpu_verbose = True): +def center_align( + A: AnnData, + slices: List[AnnData], + lmbda = None, + alpha: float = 0.1, + n_components: int = 15, + threshold: float = 0.001, + max_iter: int = 10, + dissimilarity: str ='kl', + norm: bool = False, + random_seed: Optional[int] = None, + pis_init: Optional[List[np.ndarray]] = None, + distributions = None, + backend = ot.backend.NumpyBackend(), + use_gpu: bool = False, + verbose: bool = False, + gpu_verbose: bool = True) -> Tuple[AnnData, List[np.ndarray]]: """ Computes center alignment of slices. - param: A - Initialization of starting AnnData Spatial Object; Make sure to include gene expression AND spatial info - param: slices - List of slices (AnnData objects) used to calculate center alignment - param: lmbda - List of probability weights assigned to each slice; default is uniform weights - param: n_components - Number of components in NMF decomposition - param: threshold - Threshold for convergence of W and H - param: max_iter - maximum number of iterations for solving for center slice - param: dissimilarity - Expression dissimilarity measure: 'kl' or 'euclidean' - param: use_rep - If none, uses slice.X to calculate dissimilarity between spots, otherwise uses the representation given by slice.obsm[use_rep] - param: norm - scales spatial distances such that neighboring spots are at distance 1 if True, otherwise spatial distances remain unchanged - param: random_seed - set random seed for reproducibility - param: pis_init - initial list of mappings between 'A' and 'slices' to solver, otherwise will calculate default mappings - param: distributions - distributions of spots for each slice (list of 1-d numpy array), otherwise default is uniform - param: backend - type of backend to run calculations. For list of backends available on system: ot.backend.get_backend_list() - param: use_gpu - Whether to run on gpu or cpu. Currently we only have gpu support for Pytorch. - param: verbose - FGW-OT is verbose if True, nothing if False - param: gpu_verbose - Print whether gpu is being used to user, nothing if False + Args: + A: Slice to use as the initialization for center alignment; Make sure to include gene expression and spatial information. + slices: List of slices to use in the center alignment. + lmbda (array-like, optional): List of probability weights assigned to each slice; If ``None``, use uniform weights. + alpha: Alignment tuning parameter. Note: 0 <= alpha <= 1. + n_components: Number of components in NMF decomposition. + threshold: Threshold for convergence of W and H during NMF decomposition. + max_iter: Maximum number of iterations for our center alignment algorithm. + dissimilarity: Expression dissimilarity measure: ``'kl'`` or ``'euclidean'``. + norm: If ``True``, scales spatial distances such that neighboring spots are at distance 1. Otherwise, spatial distances remain unchanged. + random_seed: Set random seed for reproducibility. + pis_init: Initial list of mappings between 'A' and 'slices' to solver. Otherwise, default will automatically calculate mappings. + distributions (List[array-like], optional): Distributions of spots for each slice. Otherwise, default is uniform. + backend: Type of backend to run calculations. For list of backends available on system: ``ot.backend.get_backend_list()``. + use_gpu: If ``True``, use gpu. Otherwise, use cpu. Currently we only have gpu support for Pytorch. + verbose: If ``True``, FGW-OT is verbose. + gpu_verbose: If ``True``, print whether gpu is being used to user. - - return: center_slice - inferred center slice (AnnData object) with full and low dimensional representations (W, H) of - the gene expression matrix - return: pi - List of pairwise alignment mappings of the center slice (rows) to each input slice (columns) + Returns: + - Inferred center slice with full and low dimensional representations (W, H) of the gene expression matrix. + - List of pairwise alignment mappings of the center slice (rows) to each input slice (columns). """ # Determine if gpu or cpu is being used @@ -213,10 +249,10 @@ def center_align(A, slices, lmbda = None, alpha = 0.1, n_components = 15, thresh center_coordinates = A.obsm['spatial'] if not isinstance(center_coordinates, np.ndarray): - print("Warning: A.obsm['spatial'] is not of type numpy array .") + print("Warning: A.obsm['spatial'] is not of type numpy array.") # Initialize center_slice - center_slice = anndata.AnnData(np.dot(W,H)) + center_slice = AnnData(np.dot(W,H)) center_slice.var.index = common_genes center_slice.obs.index = A.obs.index center_slice.obsm['spatial'] = center_coordinates @@ -246,7 +282,7 @@ def center_align(A, slices, lmbda = None, alpha = 0.1, n_components = 15, thresh #--------------------------- HELPER METHODS ----------------------------------- def center_ot(W, H, slices, center_coordinates, common_genes, alpha, backend, use_gpu, dissimilarity = 'kl', norm = False, G_inits = None, distributions=None, verbose = False): - center_slice = anndata.AnnData(np.dot(W,H)) + center_slice = AnnData(np.dot(W,H)) center_slice.var.index = common_genes center_slice.obsm['spatial'] = center_coordinates @@ -313,4 +349,4 @@ def df(G): return res, log else: - return ot.gromov.cg(p, q, (1 - alpha) * M, alpha, f, df, G0, armijo=armijo, C1=C1, C2=C2, constC=constC, **kwargs) + return ot.gromov.cg(p, q, (1 - alpha) * M, alpha, f, df, G0, armijo=armijo, C1=C1, C2=C2, constC=constC, **kwargs) \ No newline at end of file diff --git a/build/lib/paste/__init__.py b/build/lib/paste/__init__.py index f7c2c33..f1413a7 100644 --- a/build/lib/paste/__init__.py +++ b/build/lib/paste/__init__.py @@ -1,3 +1,3 @@ from .PASTE import pairwise_align, center_align -from .helper import kl_divergence, kl_divergence_backend, intersect, match_spots_using_spatial_heuristic, filter_for_common_genes +from .helper import match_spots_using_spatial_heuristic from .visualization import plot_slice, stack_slices_pairwise, stack_slices_center \ No newline at end of file diff --git a/build/lib/paste/helper.py b/build/lib/paste/helper.py index 384d658..67db0db 100644 --- a/build/lib/paste/helper.py +++ b/build/lib/paste/helper.py @@ -2,27 +2,44 @@ import scipy import ot -def filter_for_common_genes(slices): +def match_spots_using_spatial_heuristic( + X, + Y, + use_ot: bool = True) -> np.ndarray: """ - param: slices - list of slices (AnnData objects) - """ - assert len(slices) > 0, "Cannot have empty list." + Calculates and returns a mapping of spots using a spatial heuristic. + + Args: + X (array-like, optional): Coordinates for spots X. + Y (array-like, optional): Coordinates for spots Y. + use_ot: If ``True``, use optimal transport ``ot.emd()`` to calculate mapping. Otherwise, use Scipy's ``min_weight_full_bipartite_matching()`` algorithm. - common_genes = slices[0].var.index - for s in slices: - common_genes = intersect(common_genes, s.var.index) - for i in range(len(slices)): - slices[i] = slices[i][:, common_genes] - print('Filtered all slices for common genes. There are ' + str(len(common_genes)) + ' common genes.') + Returns: + Mapping of spots using a spatial heuristic. + """ + n1,n2=len(X),len(Y) + X,Y = norm_and_center_coordinates(X),norm_and_center_coordinates(Y) + dist = scipy.spatial.distance_matrix(X,Y) + if use_ot: + pi = ot.emd(np.ones(n1)/n1, np.ones(n2)/n2, dist) + else: + row_ind, col_ind = scipy.sparse.csgraph.min_weight_full_bipartite_matching(scipy.sparse.csr_matrix(dist)) + pi = np.zeros((n1,n2)) + pi[row_ind, col_ind] = 1/max(n1,n2) + if n1 Tuple[List[AnnData], Optional[List[float]], Optional[List[np.ndarray]]]: """ Align spatial coordinates of sequential pairwise slices. @@ -13,11 +19,18 @@ def stack_slices_pairwise(slices, pis, output_params=False): slices[0] --> slices[1] --> slices[2] --> ... - param: slices - list of slices (AnnData Object) - param: pis - list of pi (pairwise_align output) between consecutive slices - param: output_params - Boolean for whether to return angles of rotation (theta) and translations for each slice + Args: + slices: List of slices. + pis: List of pi (``pairwise_align()`` output) between consecutive slices. + output_params: If ``True``, addtionally return angles of rotation (theta) and translations for each slice. - Return: new_layers - list of slices with aligned spatial coordinates. + Returns: + - List of slices with aligned spatial coordinates. + + If ``output_params = True``, additionally return: + + - List of angles of rotation (theta) for each slice. + - List of translations [x_translation, y_translation] for each slice. """ assert len(slices) == len(pis) + 1, "'slices' should have length one more than 'pis'. Please double check." assert len(slices) > 1, "You should have at least 2 layers." @@ -27,7 +40,7 @@ def stack_slices_pairwise(slices, pis, output_params=False): if not output_params: S1, S2 = generalized_procrustes_analysis(slices[0].obsm['spatial'], slices[1].obsm['spatial'], pis[0]) else: - S1, S2,theta,tX,tY = generalized_procrustes_analysis_2D(slices[0].obsm['spatial'], slices[1].obsm['spatial'], pis[0],output_params=output_params) + S1, S2,theta,tX,tY = generalized_procrustes_analysis(slices[0].obsm['spatial'], slices[1].obsm['spatial'], pis[0],output_params=output_params) thetas.append(theta) translations.append(tX) translations.append(tY) @@ -37,7 +50,7 @@ def stack_slices_pairwise(slices, pis, output_params=False): if not output_params: x, y = generalized_procrustes_analysis(new_coor[i], slices[i+1].obsm['spatial'], pis[i]) else: - x, y,theta,tX,tY = generalized_procrustes_analysis_2D(new_coor[i], slices[i+1].obsm['spatial'], pis[i],output_params=output_params) + x, y,theta,tX,tY = generalized_procrustes_analysis(new_coor[i], slices[i+1].obsm['spatial'], pis[i],output_params=output_params) thetas.append(theta) translations.append(tY) new_coor.append(y) @@ -54,25 +67,38 @@ def stack_slices_pairwise(slices, pis, output_params=False): return new_slices, thetas, translations -def stack_slices_center(center_slice, slices, pis, output_params=False): +def stack_slices_center( + center_slice: AnnData, + slices: List[AnnData], + pis: List[np.ndarray], + output_params: bool = False) -> Tuple[AnnData, List[AnnData], Optional[List[float]], Optional[List[np.ndarray]]]: """ Align spatial coordinates of a list of slices to a center_slice. In other words, align: slices[0] --> center_slice + slices[1] --> center_slice + slices[2] --> center_slice + ... - param: center_slice - inferred center slice (AnnData object) - param: slices - list of original slices to be aligned - param: pis - list of pi (center_align output) between center_slice and slices - param: output_params - Boolean for whether to return angles of rotation (theta) and translations for each slice + Args: + center_slice: Inferred center slice. + slices: List of original slices to be aligned. + pis: List of pi (``center_align()`` output) between center_slice and slices. + output_params: If ``True``, additionally return angles of rotation (theta) and translations for each slice. - - Return: new_center - center slice with aligned spatial coordinates. - Return: new_layers - list of other slices with aligned spatial coordinates. + Returns: + - Center slice with aligned spatial coordinates. + - List of other slices with aligned spatial coordinates. + + If ``output_params = True``, additionally return: + + - List of angles of rotation (theta) for each slice. + - List of translations [x_translation, y_translation] for each slice. """ assert len(slices) == len(pis), "'slices' should have the same length 'pis'. Please double check." new_coor = [] @@ -83,7 +109,7 @@ def stack_slices_center(center_slice, slices, pis, output_params=False): if not output_params: c, y = generalized_procrustes_analysis(center_slice.obsm['spatial'], slices[i].obsm['spatial'], pis[i]) else: - c, y,theta,tX,tY = generalized_procrustes_analysis_2D(center_slice.obsm['spatial'], slices[i].obsm['spatial'], pis[i],output_params=output_params) + c, y,theta,tX,tY = generalized_procrustes_analysis(center_slice.obsm['spatial'], slices[i].obsm['spatial'], pis[i],output_params=output_params) thetas.append(theta) translations.append(tY) new_coor.append(y) @@ -101,62 +127,53 @@ def stack_slices_center(center_slice, slices, pis, output_params=False): else: return new_center, new_slices, thetas, translations - -def generalized_procrustes_analysis(X, Y, pi): +def plot_slice( + sliceX: AnnData, + color, + ax: Optional[plt.Axes] = None, + s: float = 100) -> None: """ - Finds and applies optimal rotation between spatial coordinates of two layers (may also do a reflection). + Plots slice spatial coordinates. - param: X - np array of spatial coordinates (ex: sliceA.obs['spatial']) - param: Y - np array of spatial coordinates (ex: sliceB.obs['spatial']) - param: pi - mapping between the two layers output by PASTE - - Return: aligned spatial coordinates of X, Y + Args: + sliceX: Slice to be plotted. + color: Scatterplot color, any format accepted by ``matplotlib``. + ax: Pre-existing axes for the plot. Otherwise, call ``matplotlib.pyplot.gca()`` internally. + s: Size of spots. """ - X = X - pi.sum(axis=1).dot(X) #X.mean(axis=0) - Y = Y - pi.sum(axis=0).dot(Y) #Y.mean(axis=0) - H = Y.T.dot(pi.T.dot(X)) - U, S, Vt = np.linalg.svd(H) - R = Vt.T.dot(U.T) - Y = R.dot(Y.T).T - return X,Y + sns.scatterplot(x = sliceX.obsm['spatial'][:,0],y = sliceX.obsm['spatial'][:,1],linewidth=0,s=s, marker=".",color=color,ax=ax) + if ax: + ax.invert_yaxis() + ax.axis('off') + -def generalized_procrustes_analysis_2D(X,Y,pi,output_params=True): +def generalized_procrustes_analysis(X, Y, pi, output_params = False): """ - Finds and applies optimal rotation between spatial coordinates of two slices in 2D and returns the rotation angle and translation. + Finds and applies optimal rotation between spatial coordinates of two layers (may also do a reflection). - param: X - np array of spatial coordinates (ex: sliceA.obs['spatial']) - param: Y - np array of spatial coordinates (ex: sliceB.obs['spatial']) - param: pi - mapping between the two layers output by PASTE + Args: + X: np array of spatial coordinates (ex: sliceA.obs['spatial']) + Y: np array of spatial coordinates (ex: sliceB.obs['spatial']) + pi: mapping between the two layers output by PASTE + output_params: Boolean of whether to return rotation angle and translations along with spatial coordiantes. + - Return: aligned spatial coordinates of X, Y, rotation angle, translation of X, translation of Y + Returns: + Aligned spatial coordinates of X, Y, rotation angle, translation of X, translation of Y """ assert X.shape[1] == 2 and Y.shape[1] == 2 + tX = pi.sum(axis=1).dot(X) tY = pi.sum(axis=0).dot(Y) - X = X - tX #X.mean(axis=0) - Y = Y - tY #Y.mean(axis=0) + X = X - tX + Y = Y - tY H = Y.T.dot(pi.T.dot(X)) - M = np.array([[0,-1],[1,0]]) - theta = np.arctan(np.trace(M.dot(H))/np.trace(H)) - # print('theta',theta*180/np.pi) - R = np.array([[np.cos(theta),-np.sin(theta)],[np.sin(theta),np.cos(theta)]]) + U, S, Vt = np.linalg.svd(H) + R = Vt.T.dot(U.T) Y = R.dot(Y.T).T if output_params: + M = np.array([[0,-1],[1,0]]) + theta = np.arctan(np.trace(M.dot(H))/np.trace(H)) return X,Y,theta,tX,tY else: - return X,Y - - -def plot_slice(sliceX, color, ax=None, s=100): - """ - Plots slice spatial coordinates. - - param: sliceX - AnnData Object of slice - param: color - scatterplot color - param: ax - Pre-existing axes for the plot. Otherwise, call matplotlib.pyplot.gca() internally. - param: s - size of spots - """ - sns.scatterplot(x = sliceX.obsm['spatial'][:,0],y = sliceX.obsm['spatial'][:,1],linewidth=0,s=s, marker=".",color=color,ax=ax) - if ax: - ax.invert_yaxis() - ax.axis('off') \ No newline at end of file + return X,Y \ No newline at end of file diff --git a/dist/paste-bio-1.2.0.tar.gz b/dist/paste-bio-1.2.0.tar.gz deleted file mode 100644 index afafe0f..0000000 Binary files a/dist/paste-bio-1.2.0.tar.gz and /dev/null differ diff --git a/dist/paste-bio-1.2.1.tar.gz b/dist/paste-bio-1.2.1.tar.gz new file mode 100644 index 0000000..ba4c137 Binary files /dev/null and b/dist/paste-bio-1.2.1.tar.gz differ diff --git a/dist/paste_bio-1.2.0-py3-none-any.whl b/dist/paste_bio-1.2.0-py3-none-any.whl deleted file mode 100644 index 72665d4..0000000 Binary files a/dist/paste_bio-1.2.0-py3-none-any.whl and /dev/null differ diff --git a/dist/paste_bio-1.2.1-py3-none-any.whl b/dist/paste_bio-1.2.1-py3-none-any.whl new file mode 100644 index 0000000..4b6e9e6 Binary files /dev/null and b/dist/paste_bio-1.2.1-py3-none-any.whl differ diff --git a/setup.cfg b/setup.cfg index d5d60c4..38ce193 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = paste-bio -version = 1.2.0 +version = 1.2.1 author = Max Land author_email = max.ruikang.land@gmail.com description = A computational method to align and integrate spatial transcriptomics experiments. diff --git a/src/paste_bio.egg-info/PKG-INFO b/src/paste_bio.egg-info/PKG-INFO new file mode 100644 index 0000000..1988ecc --- /dev/null +++ b/src/paste_bio.egg-info/PKG-INFO @@ -0,0 +1,173 @@ +Metadata-Version: 2.1 +Name: paste-bio +Version: 1.2.1 +Summary: A computational method to align and integrate spatial transcriptomics experiments. +Home-page: https://github.com/raphael-group/paste +Author: Max Land +Author-email: max.ruikang.land@gmail.com +License: UNKNOWN +Project-URL: Bug Tracker, https://github.com/raphael-group/paste/issues +Platform: UNKNOWN +Classifier: Programming Language :: Python :: 3 +Classifier: License :: OSI Approved :: BSD License +Classifier: Operating System :: OS Independent +Requires-Python: >=3.6 +Description-Content-Type: text/markdown +License-File: LICENSE + +# PASTE + +![PASTE Overview](https://github.com/raphael-group/paste/blob/main/docs/source/_static/images/paste_overview.png) + +PASTE is a computational method that leverages both gene expression similarity and spatial distances between spots to align and integrate spatial transcriptomics data. In particular, there are two methods: +1. `pairwise_align`: align spots across pairwise slices. +2. `center_align`: integrate multiple slices into one center slice. + +You can read our preprint [here](https://www.biorxiv.org/content/10.1101/2021.03.16.435604v1). + +PASTE is actively being worked on with future updates coming. + +### Recent News + +As of version 1.2.0, PASTE now supports GPU implementation via Pytorch. For more details, see the GPU section of the [Tutorial notebook](docs/source/notebooks/getting-started.ipynb). + +### Installation + +The easiest way is to install PASTE on pypi: https://pypi.org/project/paste-bio/. + +`pip install paste-bio` + +Or you can install PASTE on bioconda: https://anaconda.org/bioconda/paste-bio. + +`conda install -c bioconda paste-bio` + +Check out Tutorial.ipynb for an example of how to use PASTE. + +Alternatively, you can clone the respository and try the following example in a +notebook or the command line. + +### Quick Start + +To use PASTE we require at least two slices of spatial-omics data (both +expression and coordinates) that are in +anndata format (i.e. read in by scanpy/squidpy). We have included a breast +cancer dataset from [1] in the [sample_data folder](sample_data/) of this repo +that we will use as an example below to show how to use PASTE. + +```python +import matplotlib.pyplot as plt +import matplotlib.patches as mpatches +import numpy as np +import scanpy as sc +import paste as pst + +# Load Slices +data_dir = './sample_data/' # change this path to the data you wish to analyze + +# Assume that the coordinates of slices are named slice_name + "_coor.csv" +def load_slices(data_dir, slice_names=["slice1", "slice2"]): + slices = [] + for slice_name in slice_names: + slice_i = sc.read_csv(data_dir + slice_name + ".csv") + slice_i_coor = np.genfromtxt(data_dir + slice_name + "_coor.csv", delimiter = ',') + slice_i.obsm['spatial'] = slice_i_coor + # Preprocess slices + sc.pp.filter_genes(slice_i, min_counts = 15) + sc.pp.filter_cells(slice_i, min_counts = 100) + slices.append(slice_i) + return slices + +slices = load_slices(data_dir) +slice1, slice2 = slices + +# Pairwise align the slices +pi12 = pst.pairwise_align(slice1, slice2) + +# To visualize the alignment you can stack the slices +# according to the alignment pi +slices, pis = [slice1, slice2], [pi12] +new_slices = pst.stack_slices_pairwise(slices, pis) + +slice_colors = ['#e41a1c','#377eb8'] +plt.figure(figsize=(7,7)) +for i in range(len(new_slices)): + pst.plot_slice(new_slices[i],slice_colors[i],s=400) +plt.legend(handles=[mpatches.Patch(color=slice_colors[0], label='1'),mpatches.Patch(color=slice_colors[1], label='2')]) +plt.gca().invert_yaxis() +plt.axis('off') +plt.show() + +# Center align slices +## We have to reload the slices as pairwise_alignment modifies the slices. +slices = load_slices(data_dir) +slice1, slice2 = slices + +# Construct a center slice +## choose one of the slices as the coordinate reference for the center slice, +## i.e. the center slice will have the same number of spots as this slice and +## the same coordinates. +initial_slice = slice1.copy() +slices = [slice1, slice2] +lmbda = len(slices)*[1/len(slices)] # set hyperparameter to be uniform + +## Possible to pass in an initial pi (as keyword argument pis_init) +## to improve performance, see Tutorial.ipynb notebook for more details. +center_slice, pis = pst.center_align(initial_slice, slices, lmbda) + +## The low dimensional representation of our center slice is held +## in the matrices W and H, which can be used for downstream analyses +W = center_slice.uns['paste_W'] +H = center_slice.uns['paste_H'] +``` + +### GPU implementation +PASTE now is compatible with gpu via Pytorch. All we need to do is add the following two parameters to our main functions: +``` +pi12 = pst.pairwise_align(slice1, slice2, backend = ot.backend.TorchBackend(), use_gpu = True) + +center_slice, pis = pst.center_align(initial_slice, slices, lmbda, backend = ot.backend.TorchBackend(), use_gpu = True) +``` +For more details, see the GPU section of the [Tutorial notebook](docs/source/notebooks/getting-started.ipynb). + +### Command Line + +We provide the option of running PASTE from the command line. + +First, clone the repository: + +`git clone https://github.com/raphael-group/paste.git` + +Next, when providing files, you will need to provide two separate files: the gene expression data followed by spatial data (both as .csv) for the code to initialize one slice object. + +Sample execution (based on this repo): `python paste-cmd-line.py -m center -f ./sample_data/slice1.csv ./sample_data/slice1_coor.csv ./sample_data/slice2.csv ./sample_data/slice2_coor.csv ./sample_data/slice3.csv ./sample_data/slice3_coor.csv` + +Note: `pairwise` will return pairwise alignment between each consecutive pair of slices (e.g. \[slice1,slice2\], \[slice2,slice3\]). + +| Flag | Name | Description | Default Value | +| --- | --- | --- | --- | +| -m | mode | Select either `pairwise` or `center` | (str) `pairwise` | +| -f | files | Path to data files (.csv) | None | +| -d | direc | Directory to store output files | Current Directory | +| -a | alpha | Alpha parameter for PASTE | (float) `0.1` | +| -c | cost | Expression dissimilarity cost (`kl` or `Euclidean`) | (str) `kl` | +| -p | n_components | n_components for NMF step in `center_align` | (int) `15` | +| -l | lmbda | Lambda parameter in `center_align` | (floats) probability vector of length `n` | +| -i | intial_slice | Specify which file is also the intial slice in `center_align` | (int) `1` | +| -t | threshold | Convergence threshold for `center_align` | (float) `0.001` | +| -x | coordinates | Output new coordinates (toggle to turn on) | `False` | +| -w | weights | Weights files of spots in each slice (.csv) | None | +| -s | start | Initial alignments for OT. If not given uses uniform (.csv structure similar to alignment output) | None | + +`pairwise_align` outputs a (.csv) file containing mapping of spots between each consecutive pair of slices. The rows correspond to spots of the first slice, and cols the second. + +`center_align` outputs two files containing the low dimensional representation (NMF decomposition) of the center slice gene expression, and files containing a mapping of spots between the center slice (rows) to each input slice (cols). + +### Sample Dataset + +Added sample spatial transcriptomics dataset consisting of four breast cancer slice courtesy of: + +[1] Ståhl, Patrik & Salmén, Fredrik & Vickovic, Sanja & Lundmark, Anna & Fernandez Navarro, Jose & Magnusson, Jens & Giacomello, Stefania & Asp, Michaela & Westholm, Jakub & Huss, Mikael & Mollbrink, Annelie & Linnarsson, Sten & Codeluppi, Simone & Borg, Åke & Pontén, Fredrik & Costea, Paul & Sahlén, Pelin Akan & Mulder, Jan & Bergmann, Olaf & Frisén, Jonas. (2016). Visualization and analysis of gene expression in tissue sections by spatial transcriptomics. Science. 353. 78-82. 10.1126/science.aaf2403. + +Note: Original data is (.tsv), but we converted it to (.csv). + + diff --git a/src/paste_bio.egg-info/SOURCES.txt b/src/paste_bio.egg-info/SOURCES.txt new file mode 100644 index 0000000..7064fa8 --- /dev/null +++ b/src/paste_bio.egg-info/SOURCES.txt @@ -0,0 +1,14 @@ +LICENSE +README.md +pyproject.toml +setup.cfg +setup.py +src/paste/PASTE.py +src/paste/__init__.py +src/paste/helper.py +src/paste/visualization.py +src/paste_bio.egg-info/PKG-INFO +src/paste_bio.egg-info/SOURCES.txt +src/paste_bio.egg-info/dependency_links.txt +src/paste_bio.egg-info/not-zip-safe +src/paste_bio.egg-info/top_level.txt \ No newline at end of file diff --git a/src/paste_bio.egg-info/dependency_links.txt b/src/paste_bio.egg-info/dependency_links.txt new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/src/paste_bio.egg-info/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/src/paste_bio.egg-info/not-zip-safe b/src/paste_bio.egg-info/not-zip-safe new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/src/paste_bio.egg-info/not-zip-safe @@ -0,0 +1 @@ + diff --git a/src/paste_bio.egg-info/top_level.txt b/src/paste_bio.egg-info/top_level.txt new file mode 100644 index 0000000..b467804 --- /dev/null +++ b/src/paste_bio.egg-info/top_level.txt @@ -0,0 +1 @@ +paste