Skip to content

Commit

Permalink
NickAkhmetov/10X Builder, adjust visium spot opacity (#85)
Browse files Browse the repository at this point in the history
  • Loading branch information
NickAkhmetov authored Apr 17, 2024
1 parent 085911d commit 7e4d8ee
Show file tree
Hide file tree
Showing 14 changed files with 2,239 additions and 31 deletions.
2 changes: 1 addition & 1 deletion VERSION.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.2.0
0.2.1
2 changes: 1 addition & 1 deletion requirements-dev.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
pytest==5.2.1
flake8==3.7.8
flake8==7.0.0
autopep8==1.4.4
pytest-mock==3.7.0
coverage==6.3.1
Expand Down
11 changes: 6 additions & 5 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -20,18 +20,19 @@ package_dir =
packages = find:
python_requires = >=3.7
install_requires =
vitessce==3.2.3
vitessce==3.2.5
hubmap-commons>=2.0.12
requests>=2.27.1
nbformat==5.1.3
zarr>=2.8.0
zarr>=2.17.2
aiohttp>=3.8.1
fsspec>=2022.1.0

[options.packages.find]
where = src

[flake8]
max-line-length = 99
ignore =
W503 # "line break before binary operator": Prefer operator at start of line so the context is clear.
max-line-length = 120
ignore = W503

# W503: "line break before binary operator": Prefer operator at start of line so the context is clear.
11 changes: 9 additions & 2 deletions src/portal_visualization/builder_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
NanoDESIViewConfBuilder
)
from .builders.anndata_builders import (
MultiomicAnndataZarrViewConfBuilder,
SpatialRNASeqAnnDataZarrViewConfBuilder,
RNASeqAnnDataZarrViewConfBuilder,
SpatialMultiomicAnnDataZarrViewConfBuilder
Expand All @@ -36,8 +37,9 @@ def process_hints(hints):
is_codex = "codex" in hints
is_anndata = "anndata" in hints
is_json = "json_based" in hints
is_spatial = "spatial" in hints

return is_image, is_rna, is_atac, is_sprm, is_codex, is_anndata, is_json
return is_image, is_rna, is_atac, is_sprm, is_codex, is_anndata, is_json, is_spatial


# This function is the main entrypoint for the builder factory.
Expand All @@ -52,7 +54,8 @@ def get_view_config_builder(entity, get_assaytype, parent=None):
assay = get_assaytype(entity)
assay_name = assay.get('assaytype')
hints = assay.get('vitessce-hints', [])
is_image, is_rna, is_atac, is_sprm, is_codex, is_anndata, is_json = process_hints(hints)
is_image, is_rna, is_atac, is_sprm, is_codex, is_anndata, is_json, is_spatial = process_hints(
hints)
if is_image:
if is_rna:
# e.g. Visium (no probes) [Salmon + Scanpy]
Expand Down Expand Up @@ -91,6 +94,10 @@ def get_view_config_builder(entity, get_assaytype, parent=None):
return ImagePyramidViewConfBuilder

if is_rna:
# multiomic mudata, e.g. 10x Multiome, SNARE-Seq, etc.
# e.g. 272789a950b2b5d4b9387a1cf66ad487 on dev
if is_atac:
return MultiomicAnndataZarrViewConfBuilder
if is_json:
# e.g. c019a1cd35aab4d2b4a6ff221e92aaab
return RNASeqViewConfBuilder
Expand Down
251 changes: 238 additions & 13 deletions src/portal_visualization/builders/anndata_builders.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,12 @@
from vitessce import (
VitessceConfig,
AnnDataWrapper,
MultivecZarrWrapper,
Component as cm,
CoordinationType as ct,
ImageOmeTiffWrapper,
CoordinationLevel as CL,
ViewType as vt,
get_initial_coordination_scope_prefix
)

Expand Down Expand Up @@ -64,6 +66,12 @@ def is_annotated(self):
else:
return False

@cached_property
def has_marker_genes(self):
z = self.zarr_store
if 'obs/marker_gene_0' in z:
return True

def get_conf_cells(self, marker=None):
zarr_path = 'hubmap_ui/anndata-zarr/secondary_analysis.zarr'
file_paths_found = [file["rel_path"] for file in self._entity["files"]]
Expand Down Expand Up @@ -151,7 +159,15 @@ def _set_up_dataset(self, vc):
))
return dataset

def _set_up_obs_labels(self):
def _set_up_obs_labels(self,
additional_obs_labels_paths=[],
additional_obs_labels_names=[],
additional_obs_set_paths=[],
additional_obs_set_names=[],
# Optionally skip default obs paths and labels
skip_default_paths=False,
# Support multiomic datasets
modality_prefix=None):
# Some of the keys (like marker_genes_for_heatmap) here are from our pipeline
# https://github.com/hubmapconsortium/portal-containers/blob/master/containers/anndata-to-ui
# while others come from Matt's standard scanpy pipeline
Expand All @@ -163,26 +179,33 @@ def _set_up_obs_labels(self):
# or help map predicted cell labels to their IDs
obs_label_paths = []
obs_label_names = []
dags = [
dag for dag in self._entity['metadata']['dag_provenance_list']
if 'name' in dag]

# Add additional obs labels and sets if provided
obs_set_paths.extend(additional_obs_set_paths)
obs_set_names.extend(additional_obs_set_names)
obs_label_paths.extend(additional_obs_labels_paths)
obs_label_names.extend(additional_obs_labels_names)

z = self.zarr_store
if (any(['azimuth-annotate' in dag['origin'] for dag in dags])):
obs = z['obs'] if modality_prefix is None else z[f'{modality_prefix}/obs']

if not skip_default_paths:
if self.is_annotated:
if 'predicted.ASCT.celltype' in z['obs']:
if 'predicted.ASCT.celltype' in obs:
obs_set_paths.append("obs/predicted.ASCT.celltype")
obs_set_names.append("Predicted ASCT Cell Type")
if 'predicted_label' in z['obs']:
if 'predicted_label' in obs:
obs_set_paths.append("obs/predicted_label")
obs_set_names.append("Cell Ontology Annotation")
if 'predicted_CLID' in z['obs']:
if 'predicted_CLID' in obs:
obs_label_paths.append("obs/predicted_CLID")
obs_label_names.append("Predicted CL ID")
obs_set_paths.append("obs/leiden")
obs_set_names.append("Leiden")
if self.has_marker_genes:
obs_label_paths.extend(RNA_SEQ_ANNDATA_FACTOR_PATHS)
obs_label_names.extend(RNA_SEQ_FACTOR_LABEL_NAMES)

obs_set_paths.append("obs/leiden")
obs_set_names.append("Leiden")
obs_label_paths.extend(RNA_SEQ_ANNDATA_FACTOR_PATHS)
obs_label_names.extend(RNA_SEQ_FACTOR_LABEL_NAMES)
self._obs_set_paths = obs_set_paths
self._obs_set_names = obs_set_names
self._obs_labels_paths = obs_label_paths
Expand Down Expand Up @@ -387,8 +410,210 @@ def _setup_anndata_view_config(self, vc, dataset):
}, scope_prefix=get_initial_coordination_scope_prefix(self._uuid, 'image'))
vc.link_views_by_dict(spatial_views, {
"spotLayer": CL([{
"spatialLayerOpacity": 0.5,
"spatialLayerOpacity": 1,
"spatialSpotRadius": self._get_scale_factor(),
}]),
}, scope_prefix=get_initial_coordination_scope_prefix(self._uuid, 'obsSpots'))
return vc


class MultiomicAnndataZarrViewConfBuilder(RNASeqAnnDataZarrViewConfBuilder):
"""Wrapper class for creating a AnnData-backed view configuration
for multiomic data from mudata-to-ui.cwl like 10X Multiome
TODO: Provide specific link to example dataset
"""

def __init__(self, entity, groups_token, assets_endpoint, **kwargs):
super().__init__(entity, groups_token, assets_endpoint, **kwargs)
self._scatterplot_w = 3

@cached_property
def zarr_store(self):
zarr_path = 'hubmap_ui/mudata-zarr/secondary_analysis.zarr'
request_init = self._get_request_init() or {}
adata_url = self._build_assets_url(zarr_path, use_token=False)
return zarr.open(adata_url, mode='r', storage_options={'client_kwargs': request_init})

@cached_property
def is_annotated(self):
z = self.zarr_store
if 'mod/rna/uns/annotation_metadata/is_annotated' in z:
return z['mod/rna/uns/annotation_metadata/is_annotated'][()]
else:
return False

@cached_property
def has_marker_genes(self):
z = self.zarr_store
return 'mod/rna/var/marker_genes_for_heatmap' in z

@cached_property
def has_cbb(self):
z = self.zarr_store
return 'mod/atac_cbb' in z

def get_conf_cells(self, marker=None):

# TODO: The files array is empty for this entity, so we can't check for the zarr store

# zarr_path = 'hubmap_ui/mudata-zarr/secondary_analysis.zarr'
# file_paths_found = [file["rel_path"] for file in self._entity["files"]]
# # Use .zgroup file as proxy for whether or not the zarr store is present.
# if f'{zarr_path}/.zgroup' not in file_paths_found:
# message = f'Multiomic assay with uuid {self._uuid} has no .zarr store at {zarr_path}'
# raise FileNotFoundError(message)

# Each clustering has its own genomic profile; since we can't currently toggle between
# selected genomic profiles, each clustering needs its own view config.
confs = []
cluster_columns = [
["leiden_wnn", "Leiden (Weighted Nearest Neighbor)", "wnn"],
["cluster_cbg", "Cluster (ATAC Cell x Gene)", "cbg"],
["leiden_rna", "Leiden (RNA)", "rna"],
["cluster_cbb", "Cluster (ATAC Cell x Bin)", "cbb"] if self.has_cbb else None,
["predicted_label", "Cell Ontology Annotation", "label"] if self.is_annotated else None,
]
# Filter out None values
cluster_columns = [col for col in cluster_columns if col is not None]

column_names, column_labels = [f'obs/{col[0]}' for col in cluster_columns], [
col[1] for col in cluster_columns]

self._set_up_marker_gene(marker)
self._set_up_obs_labels(additional_obs_set_names=column_labels,
additional_obs_set_paths=column_names,
skip_default_paths=True,
modality_prefix='mod/rna')

for column_name, column_label, multivec_label in cluster_columns:
vc = VitessceConfig(name=f'{column_label}',
schema_version=self._schema_version)
dataset = self._set_up_dataset(vc, multivec_label)
vc = self._setup_anndata_view_config(vc, dataset, column_name, column_label)
vc = self._link_marker_gene(vc)
confs.append(vc.to_dict())
return get_conf_cells(confs)

def _set_up_dataset(self, vc, multivec_label):
zarr_base = 'hubmap_ui/mudata-zarr'
zarr_path = f'{zarr_base}/secondary_analysis.zarr'
h5mu_zarr = self._build_assets_url(zarr_path, use_token=False)
rna_zarr = self._build_assets_url(f'{zarr_path}/mod/rna', use_token=False)
atac_cbg_zarr = self._build_assets_url(f'{zarr_path}/mod/atac_cbg', use_token=False)
multivec_zarr = self._build_assets_url(
f'{zarr_base}/{multivec_label}.multivec.zarr', use_token=False)
dataset = vc.add_dataset(name=multivec_label).add_object(MultivecZarrWrapper(
zarr_url=multivec_zarr,
request_init=self._get_request_init(),
)).add_object(AnnDataWrapper(
# We run add_object with adata_path=rna_zarr first to add the cell-by-gene
# matrix and associated metadata.
adata_url=rna_zarr,
obs_embedding_paths=["obsm/X_umap"],
obs_embedding_names=["UMAP - RNA"],
obs_set_paths=self._obs_set_paths,
obs_set_names=self._obs_set_names,
obs_feature_matrix_path="X",
initial_feature_filter_path="var/highly_variable",
feature_labels_path="var/hugo_symbol",
request_init=self._get_request_init(),
# To be explicit that the features represent genes and gene expression, we
# specify that here.
coordination_values={
"featureType": "gene",
"featureValueType": "expression",
"featureLabelsType": "gene",
}
)).add_object(AnnDataWrapper(
adata_url=atac_cbg_zarr,
obs_feature_matrix_path="X",
initial_feature_filter_path="var/highly_variable",
obs_embedding_paths=["obsm/X_umap"],
obs_embedding_names=["UMAP - ATAC"],
feature_labels_path="var/hugo_symbol",
request_init=self._get_request_init(),
# To be explicit that the features represent genes and gene expression, we
# specify that here.
coordination_values={
"featureType": "peak",
"featureValueType": "count",
}
)).add_object(AnnDataWrapper(
adata_url=h5mu_zarr,
obs_feature_matrix_path="X",
obs_embedding_paths=["obsm/X_umap"],
obs_embedding_names=["UMAP - WNN"],
request_init=self._get_request_init(),
coordination_values={
"featureType": "other"
}
))
return dataset

def _setup_anndata_view_config(self, vc, dataset, column_name, column_label):
umap_scatterplot_by_rna = vc.add_view(
vt.SCATTERPLOT, dataset=dataset, mapping="UMAP - RNA"
).set_props(embeddingCellSetLabelsVisible=False)
umap_scatterplot_by_atac = vc.add_view(
vt.SCATTERPLOT, dataset=dataset, mapping="UMAP - ATAC"
).set_props(embeddingCellSetLabelsVisible=False)
umap_scatterplot_by_wnn = vc.add_view(
vt.SCATTERPLOT, dataset=dataset, mapping="UMAP - WNN"
).set_props(embeddingCellSetLabelsVisible=False)

gene_list = vc.add_view(vt.FEATURE_LIST, dataset=dataset)
peak_list = vc.add_view(vt.FEATURE_LIST, dataset=dataset)

# rna_heatmap = vc.add_view(vt.HEATMAP, dataset=dataset).set_props(transpose=False)
# atac_heatmap = vc.add_view(vt.HEATMAP, dataset=dataset).set_props(transpose=False)
genomic_profiles = vc.add_view(vt.GENOMIC_PROFILES, dataset=dataset)

cell_sets = vc.add_view(vt.OBS_SETS, dataset=dataset)

# specify which of the two features' (i.e., genes or peaks) views correspond to
# We also need to make sure the selection of genes and peaks are scoped only to
# the corresponding view,
# and we want to make sure the color mappings are independent for each modality.
coordination_types = [ct.FEATURE_TYPE, ct.FEATURE_VALUE_TYPE]
vc.link_views([umap_scatterplot_by_rna, gene_list],
coordination_types, ["gene", "expression"])
vc.link_views([umap_scatterplot_by_atac, peak_list],
coordination_types, ["peak", "count"])

# Coordinate the selection of cell sets between the scatterplots and lists
# of features/observations.
coordination_types = [ct.FEATURE_SELECTION,
ct.OBS_COLOR_ENCODING,
ct.FEATURE_VALUE_COLORMAP_RANGE]
vc.link_views([umap_scatterplot_by_rna,
umap_scatterplot_by_atac,
umap_scatterplot_by_wnn,
gene_list, peak_list, cell_sets],
coordination_types, [None, 'cellSetSelection', [0.0, 1.0]])

# Indicate genomic profiles' clusters; based on the display name for the ATAC CBB clusters.
obs_set_coordination, obs_color_coordination = vc.add_coordination(
ct.OBS_SET_SELECTION, ct.OBS_COLOR_ENCODING)
genomic_profiles.use_coordination(obs_set_coordination, obs_color_coordination)

# Dynamically determine the number of clusters in the given clustering column
label_names = self._get_obs_set_members(column_name)
obs_set_coordinations = [[column_label, str(i)] for i in label_names]
obs_set_coordination.set_value(obs_set_coordinations)
obs_color_coordination.set_value('cellSetSelection')

# Hide numeric cluster labels
vc.link_views([umap_scatterplot_by_rna, umap_scatterplot_by_atac, umap_scatterplot_by_wnn], [
ct.EMBEDDING_OBS_SET_LABELS_VISIBLE], [False])

vc.layout(((umap_scatterplot_by_rna | umap_scatterplot_by_atac) | (
umap_scatterplot_by_wnn | cell_sets)) / (genomic_profiles | (peak_list | gene_list)))

self._views = [umap_scatterplot_by_rna, umap_scatterplot_by_atac, umap_scatterplot_by_wnn,
gene_list, peak_list, genomic_profiles, cell_sets]
return vc

def _get_obs_set_members(self, column_name):
z = self.zarr_store
members = z[f'mod/rna/obs/{column_name}'].categories
return members
8 changes: 4 additions & 4 deletions src/portal_visualization/builders/scatterplot_builders.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ def _setup_scatterplot_view_config(self, vc, dataset):


class RNASeqViewConfBuilder(AbstractScatterplotViewConfBuilder):
"""Wrapper class for creating a JSON-backed scatterplot for "first generation" RNA-seq data like
https://portal.hubmapconsortium.org/browse/dataset/c019a1cd35aab4d2b4a6ff221e92aaab
"""Wrapper class for creating a JSON-backed scatterplot for "first generation" RNA-seq data
like https://portal.hubmapconsortium.org/browse/dataset/c019a1cd35aab4d2b4a6ff221e92aaab
from h5ad-to-arrow.cwl (August 2020 release).
"""

Expand Down Expand Up @@ -75,8 +75,8 @@ def __init__(self, entity, groups_token, assets_endpoint, **kwargs):


class ATACSeqViewConfBuilder(AbstractScatterplotViewConfBuilder):
"""Wrapper class for creating a JSON-backed scatterplot for "first generation" ATAC-seq data like
https://portal.hubmapconsortium.org/browse/dataset/d4493657cde29702c5ed73932da5317c
"""Wrapper class for creating a JSON-backed scatterplot for "first generation" ATAC-seq data
like https://portal.hubmapconsortium.org/browse/dataset/d4493657cde29702c5ed73932da5317c
from h5ad-to-arrow.cwl.
"""

Expand Down
Loading

0 comments on commit 7e4d8ee

Please sign in to comment.