Skip to content

Commit

Permalink
Make sure that the tree handles cases where there would be no single …
Browse files Browse the repository at this point in the history
…root for the tree (#72)

This works by detecting if the tree would have several roots and if that's the case, it chooses the rootiest of root nodes
  • Loading branch information
nmalfroy authored Jul 24, 2024
1 parent eb8e018 commit 2b73433
Show file tree
Hide file tree
Showing 5 changed files with 63 additions and 10 deletions.
8 changes: 8 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,14 @@ The format is based on `Keep a Changelog <https://keepachangelog.com/en/1.0.0/>`
and this project adheres to `Semantic Versioning <https://semver.org/spec/v2.0.0.html>`_.


1.4.10 - 2024-07-23
------------------

Fixed
~~~~~
- Bug fix in the circular tree plot visualization Dash App


1.4.9 - 2024-07-22
------------------

Expand Down
4 changes: 2 additions & 2 deletions cellarium/cas/postprocessing/cell_ontology/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from .cell_ontology_cache import CL_CELL_ROOT_NODE, CellOntologyCache
from .cell_ontology_cache import CL_CELL_ROOT_NODE, CL_EUKARYOTIC_CELL_ROOT_NODE, CellOntologyCache

__all__ = ["CellOntologyCache", "CL_CELL_ROOT_NODE"]
__all__ = ["CellOntologyCache", "CL_CELL_ROOT_NODE", "CL_EUKARYOTIC_CELL_ROOT_NODE"]
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,11 @@
# only keep nodes with the following prefix when parsing CL ontology
CL_PREFIX = "CL_"

# the 'cell' node
CL_CELL_ROOT_NODE = "CL_0000000"

# the 'eukaryotic cell' node
CL_CELL_ROOT_NODE = "CL_0000255"
CL_EUKARYOTIC_CELL_ROOT_NODE = "CL_0000255"


class CellOntologyCache:
Expand Down
25 changes: 20 additions & 5 deletions cellarium/cas/postprocessing/ontology_aware.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import scipy.sparse as sp
from anndata import AnnData

from .cell_ontology.cell_ontology_cache import CL_CELL_ROOT_NODE, CellOntologyCache
from .cell_ontology.cell_ontology_cache import CL_CELL_ROOT_NODE, CL_EUKARYOTIC_CELL_ROOT_NODE, CellOntologyCache
from .common import get_obs_indices_for_cluster

# AnnData-related constants
Expand Down Expand Up @@ -178,7 +178,7 @@ def get_aggregated_cas_ontology_aware_scores(
def convert_aggregated_cell_ontology_scores_to_rooted_tree(
aggregated_scores: AggregatedCellOntologyScores,
cl: CellOntologyCache,
root_cl_name: str = CL_CELL_ROOT_NODE,
root_cl_name: str = CL_EUKARYOTIC_CELL_ROOT_NODE,
min_fraction: float = 0.0,
hidden_cl_names_set: t.Optional[t.Set[str]] = None,
) -> OrderedDict:
Expand Down Expand Up @@ -226,7 +226,20 @@ def build_subtree(node_dict: OrderedDict, node_name: str) -> OrderedDict:
build_subtree(node_dict[node_name]["children"], children_node_name)
return node_dict

return build_subtree(tree_dict, root_cl_name)
tree_dict = build_subtree(tree_dict, root_cl_name)
# Validate that this is actually a rooted tree and if not recalculate with the base cell node
if len(tree_dict) == 1: # singly-rooted tree
return tree_dict
elif root_cl_name != CL_CELL_ROOT_NODE:
return convert_aggregated_cell_ontology_scores_to_rooted_tree(
aggregated_scores=aggregated_scores,
cl=cl,
root_cl_name=CL_CELL_ROOT_NODE,
min_fraction=min_fraction,
hidden_cl_names_set=hidden_cl_names_set,
)
else:
raise ValueError("The tree is not singly-rooted.")


def generate_phyloxml_from_scored_cell_ontology_tree(
Expand Down Expand Up @@ -287,7 +300,9 @@ def _get_subtree_phyloxml_string(subtree_dict: OrderedDict, node_name: str, leve
def get_most_granular_top_k_calls(
aggregated_scores: AggregatedCellOntologyScores, cl: CellOntologyCache, min_acceptable_score: float, top_k: int = 1
) -> t.List[tuple]:
depth_list = list(map(cl.get_longest_path_lengths_from_target(CL_CELL_ROOT_NODE).get, aggregated_scores.cl_names))
depth_list = list(
map(cl.get_longest_path_lengths_from_target(CL_EUKARYOTIC_CELL_ROOT_NODE).get, aggregated_scores.cl_names)
)
sorted_score_and_depth_list = sorted(
list(
(score, depth, cl_name)
Expand All @@ -302,7 +317,7 @@ def get_most_granular_top_k_calls(
trunc_list = sorted_score_and_depth_list[:top_k]
# pad with root node if necessary
for _ in range(len(trunc_list) - top_k):
trunc_list.append((1.0, 0, CL_CELL_ROOT_NODE))
trunc_list.append((1.0, 0, CL_EUKARYOTIC_CELL_ROOT_NODE))
return trunc_list


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
get_obs_indices_for_cluster,
insert_cas_ontology_aware_response_into_adata,
)
from cellarium.cas.postprocessing.cell_ontology import CL_CELL_ROOT_NODE, CellOntologyCache
from cellarium.cas.postprocessing.cell_ontology import CL_EUKARYOTIC_CELL_ROOT_NODE, CellOntologyCache
from cellarium.cas.visualization._components.circular_tree_plot import CircularTreePlot
from cellarium.cas.visualization.ui_utils import ConfigValue, find_and_kill_process

Expand Down Expand Up @@ -103,6 +103,33 @@ class DomainSelectionConstants:
class CASCircularTreePlotUMAPDashApp:
ALL_CELLS_DOMAIN_KEY = "all cells"
CLUSTER_PREFIX_DOMAIN_KEY = "cluster "
"""
A Dash app for visualizing the results of a Cellarium CAS cell type ontology-aware analysis.
:param adata: The AnnData object containing the cell type ontology-aware analysis results.
:param cas_ontology_aware_response: The response from the Cellarium CAS cell type ontology-aware analysis.
:param cluster_label_obs_column: The name of the observation column containing the cluster labels.
:param aggregation_op: The aggregation operation to apply to the cell type ontology-aware scores.
:param aggregation_domain: The domain over which to aggregate the cell type ontology-aware scores.
:param score_threshold: The threshold for the cell type ontology-aware scores.
:param min_cell_fraction: The minimum fraction of cells that must have a cell type ontology-aware score above the threshold.
:param umap_marker_size: The size of the markers in the UMAP scatter plot.
:param umap_padding: The padding to apply to the UMAP scatter plot bounds.
:param umap_min_opacity: The minimum opacity for the UMAP scatter plot markers.
:param umap_max_opacity: The maximum opacity for the UMAP scatter plot markers.
:param umap_inactive_cell_color: The color for inactive cells in the UMAP scatter plot.
:param umap_inactive_cell_opacity: The opacity for inactive cells in the UMAP scatter plot.
:param umap_active_cell_color: The color for active cells in the UMAP scatter plot.
:param umap_default_cell_color: The default color for cells in the UMAP scatter plot.
:param umap_default_opacity: The default opacity for cells in the UMAP scatter plot.
:param circular_tree_plot_linecolor: The line color for the circular tree plot.
:param circular_tree_start_angle: The start angle for the circular tree plot.
:param circular_tree_end_angle: The end angle for the circular tree plot.
:param figure_height: The height of the figures in the Dash app.
:param hidden_cl_names_set: The set of cell type ontology terms to hide from the visualization.
:param shown_cl_names_set: The set of cell type ontology terms to always show as text labels in the visualization.
:param score_colorscale: The colorscale to use for the cell type ontology-aware scores.
"""

def __init__(
self,
Expand Down Expand Up @@ -209,7 +236,7 @@ def _instantiate_circular_tree_plot(self) -> CircularTreePlot:
rooted_tree = convert_aggregated_cell_ontology_scores_to_rooted_tree(
aggregated_scores=aggregated_scores,
cl=self.cl,
root_cl_name=CL_CELL_ROOT_NODE,
root_cl_name=CL_EUKARYOTIC_CELL_ROOT_NODE,
min_fraction=self.min_cell_fraction.get(),
hidden_cl_names_set=self.hidden_cl_names_set,
)
Expand Down

0 comments on commit 2b73433

Please sign in to comment.