Skip to content

Commit

Permalink
docs update
Browse files Browse the repository at this point in the history
  • Loading branch information
shitohana committed Feb 18, 2024
1 parent 9165fa2 commit a98beec
Show file tree
Hide file tree
Showing 6 changed files with 203 additions and 19 deletions.
34 changes: 28 additions & 6 deletions docs/_chr.rst
Original file line number Diff line number Diff line change
@@ -1,14 +1,36 @@
ChrLevels
=========

Methods for calculating P-value for cytosine residues or genomic regions.
ChrLevels class.

.. currentmodule:: bsxplorer

.. autosummary::
.. class:: ChrLevels

.. rubric:: Methods

.. autosummary::
:nosignatures:
:toctree: _Binom
:template: class.rst
:toctree: _ChrLevels/_method
:template: method.rst

~ChrLevels.from_bismark


~ChrLevels.from_cgmap


~ChrLevels.from_parquet


~ChrLevels.save_plot_rds


~ChrLevels.filter


~ChrLevels.draw_mpl


~ChrLevels.draw_plotly

RegionStat
BinomialData
9 changes: 5 additions & 4 deletions docs/_cluster.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,13 @@ Cluster

Methods for calculating P-value for cytosine residues or genomic regions.

.. currentmodule:: bsxplorer
.. currentmodule:: bsxplorer.Clusters

.. autosummary::
:nosignatures:
:toctree: _Binom
:toctree: _Cluster
:template: class.rst

RegionStat
BinomialData
ClusterSingle
ClusterMany
ClusterPlot
2 changes: 1 addition & 1 deletion docs/markdowns/eda1nonmodel.md
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ filtered.heat_map(10, 20).draw_mpl(**labels_settings)
![EDA1 - Genes_vs_TE HeatMap](../images/eda1nonmodel/g_vs_te_hm.png){.doc-single-image}

The MetageneFiles object has [**`.box_plot()`**](bsxplorer.MetageneFiles.box_plot) and
[**`.violin_plot()`**](bsxplorer.MetageneFiles.violun_plot) functions for box plot and violin plot graphs.
[**`.violin_plot()`**](bsxplorer.MetageneFiles.violin_plot) functions for box plot and violin plot graphs.
The [**`.trim_flank()`**](bsxplorer.MetageneFiles.trim_flank) method analyzes methylation of the body of the region.

```python
Expand Down
76 changes: 71 additions & 5 deletions src/bsxplorer/ChrLevels.py → src/bsxplorer/ChrLevelsClass.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,13 @@ def _mutate_batch(self, batch) -> pl.DataFrame:

class ChrLevels:
def __init__(self, df: pl.DataFrame) -> None:
"""
Read report and visualize chromosome methylation levels
Parameters
----------
df
"""
self.bismark = df

# delete this in future and change to calculation of plot data
Expand Down Expand Up @@ -302,7 +309,7 @@ def from_parquet(
confidence: int = None
):
"""
Initialize ChrLevels with CX_report file
Initialize ChrLevels with parquet file
:param file: Path to file
:param chr_min_length: Minimum length of chromosome to be analyzed
Expand Down Expand Up @@ -334,10 +341,20 @@ def save_plot_rds(self, path, compress: bool = False):

def filter(self, context: str = None, strand: str = None, chr: str = None):
"""
:param context: Methylation context (CG, CHG, CHH) to filter (only one).
:param strand: Strand to filter (+ or -).
:param chr: Chromosome name to filter.
:return: Filtered :class:`Bismark`.
Filter chromosome methylation levels data.
Parameters
----------
context
Methylation context (CG, CHG, CHH) to filter (only one).
strand
Strand to filter (+ or -).
chr
Chromosome name to filter.
Returns
-------
:class:`ChrLevels`
"""
context_filter = self.bismark["context"] == context if context is not None else True
strand_filter = self.bismark["strand"] == strand if strand is not None else True
Expand Down Expand Up @@ -392,6 +409,34 @@ def draw_mpl(
linewidth: float = 1.0,
linestyle: str = '-'
) -> Figure:
"""
Draws line-plot on given axis.
Parameters
----------
fig_axes
Tuple of (`matplotlib.pyplot.Figure <https://matplotlib.org/stable/api/figure_api.html#matplotlib.figure.Figure>`_, `matplotlib.axes.Axes <https://matplotlib.org/stable/api/_as_gen/matplotlib.axes.Axes.html#matplotlib.axes.Axes>`_). New are created if ``None``
smooth
Number of windows for `SavGol <https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.savgol_filter.html>`_ filter (set 0 for no smoothing)
label
Label of line on line-plot
linewidth
Width of the line
linestyle
Style of the line
Returns
-------
``matplotlib.pyplot.Figure``
See Also
--------
`matplotlib.pyplot.Figure <https://matplotlib.org/stable/api/figure_api.html#matplotlib.figure.Figure>`_
`matplotlib.pyplot.subplot() <https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.subplot.html#matplotlib.pyplot.subplot>`_ : To create fig, axes
`Linestyles <https://matplotlib.org/stable/gallery/lines_bars_and_markers/linestyles.html>`_ : For possible linestyles.
"""
if fig_axes is None:
fig, axes = plt.subplots()
else:
Expand Down Expand Up @@ -429,6 +474,27 @@ def draw_plotly(self,
smooth: int = 10,
label: str = None
):
"""
Draws line-plot on given figure.
Parameters
----------
figure
`plotly.graph_objects.Figure <https://plotly.com/python-api-reference/generated/plotly.graph_objects.Figure>`_. New is created if ``None``
smooth
Number of windows for `SavGol <https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.savgol_filter.html>`_ filter (set 0 for no smoothing)
label
Label of line on line-plot
Returns
-------
``plotly.graph_objects.Figure``
See Also
--------
`plotly.graph_objects.Figure <https://plotly.com/python-api-reference/generated/plotly.graph_objects.Figure>`_
"""
if figure is None:
figure = go.Figure()

Expand Down
99 changes: 97 additions & 2 deletions src/bsxplorer/Clusters.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,8 @@ def all(self):


class ClusterMany(_ClusterBase):
"""Class for operating with multiple samples regions clustering"""

def __init__(self, metagenes: MetageneFilesBase, count_threshold=5, na_rm: float | None = None):
intersect_list = set.intersection(*[set(metagene.bismark["gene"].to_list()) for metagene in metagenes.samples])
for i in range(len(metagenes.samples)):
Expand All @@ -186,15 +188,59 @@ def __init__(self, metagenes: MetageneFilesBase, count_threshold=5, na_rm: float
self.sample_names = metagenes.labels

def kmeans(self, n_clusters: int = 8, n_init: int = 10, **kwargs):
"""
KMeans clustering on sample regions. Clustering is being made with `sklearn.cluster.KMeans <https://scikit-learn.org/stable/modules/generated/sklearn.cluster.KMeans.html>`_.
Parameters
----------
n_clusters
The number of clusters to generate.
n_init
Number of times the k-means algorithm is run with different centroid seeds.
kwargs
See `sklearn.cluster.KMeans <https://scikit-learn.org/stable/modules/generated/sklearn.cluster.KMeans.html>`_.
Returns
-------
:class:`ClusterPlot`
"""
return ClusterPlot([cluster.kmeans(n_clusters, n_init, **kwargs).data for cluster in self.clusters], self.sample_names)

def cut_tree(self, dist_method="euclidean", clust_method="average", cut_height_q=.99, **kwargs):
"""
KMeans clustering on sample regions. Clustering is being made with `dynamicTreeCut.cutreeHybrid <https://github.com/kylessmith/dynamicTreeCut>`_.
Parameters
----------
dist_method
Distances calculation metric
clust_method
Hierarchical clustering method
cut_height_q
Quantile of leaves height to be cut.
kwargs
See `dynamicTreeCut <https://github.com/kylessmith/dynamicTreeCut>`_.
Returns
-------
:class:`ClusterPlot`
"""

return ClusterPlot([
cluster.cut_tree(dist_method="euclidean", clust_method="average", cut_height_q=.99, **kwargs).data
for cluster in self.clusters
], self.sample_names)

def all(self):
"""
Returns all regions for downstream plotting.
Returns
-------
:class:`ClusterPlot`
"""

return ClusterPlot([cluster.all().data for cluster in self.clusters], self.sample_names)


Expand Down Expand Up @@ -226,6 +272,7 @@ def from_matrix(cls, matrix: np.ndarray, labels: np.array, names: list[str] | np


class ClusterPlot:
"""Class for plotting cluster data."""
def __init__(self, data: ClusterData | list[ClusterData], sample_names=None):
if isinstance(data, list) and len(data) == 1:
self.data = data[0]
Expand All @@ -235,6 +282,15 @@ def __init__(self, data: ClusterData | list[ClusterData], sample_names=None):
self.sample_names = sample_names

def save_tsv(self, filename: str):
"""
Save labels for regions in a TSV file.
Parameters
----------
filename
File name for output file
"""

filename = Path(filename)

def save(data: ClusterData, path: Path):
Expand Down Expand Up @@ -263,7 +319,26 @@ def __intersect_genes(self):
print(
f"Found {len(intersection)} intersections between samples with {max(map(len, names))} regions max")

def draw_mpl(self, method='average', metric='euclidean', cmap: str = "cividis"):
def draw_mpl(self, method='average', metric='euclidean', cmap: str = "cividis", **kwargs):
"""
Draws clustermap with seaborn.clustermap.
Parameters
----------
method
Method for hierarchical clustering.
metric
Metric for distance calculation
cmap
Colormap to use
**kwargs
``seaborn.clustermap`` parameters
See Also
--------
`seaborn.clustermap <https://seaborn.pydata.org/generated/seaborn.clustermap.html>`_ : For more information about possible parameters
"""

if isinstance(self.data, list):
warnings.warn("Matplotlib version of cluster plot is not available for multiple samples")
return None
Expand All @@ -272,10 +347,30 @@ def draw_mpl(self, method='average', metric='euclidean', cmap: str = "cividis"):
self.data.centers,
index=[f"{name} ({count})" for name, count in zip(*np.unique(self.data.labels, return_counts=True))])

fig = sns.clustermap(df, col_cluster=False, cmap=cmap, method=method, metric=metric)
args = dict(col_cluster=False) | kwargs
args |= dict(cmap=cmap, method=method, metric=metric)

fig = sns.clustermap(df, **args)
return fig

def draw_plotly(self, method='average', metric='euclidean', cmap: str = "cividis"):
"""
Draws clustermap with plotly imshow.
Parameters
----------
method
Method for hierarchical clustering.
metric
Metric for distance calculation
cmap
Colormap to use
Returns
--------
``plotly.graph_objects.Figure``
"""

if isinstance(self.data, list):
# order for first sample
dist = pdist(self.data[0].centers, metric=metric)
Expand Down
2 changes: 1 addition & 1 deletion src/bsxplorer/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
from .Plots import LinePlot, LinePlotFiles, HeatMap, HeatMapFiles, PCA
from .Binom import BinomialData, RegionStat
from .GenomeClass import Genome
from .ChrLevels import ChrLevels
from .ChrLevelsClass import ChrLevels

0 comments on commit a98beec

Please sign in to comment.