From a98beec0dcc1793e45d5ca36d9fc75e76d94f912 Mon Sep 17 00:00:00 2001 From: shitohana Date: Sun, 18 Feb 2024 22:19:51 +0300 Subject: [PATCH] docs update --- docs/_chr.rst | 34 +++++-- docs/_cluster.rst | 9 +- docs/markdowns/eda1nonmodel.md | 2 +- .../{ChrLevels.py => ChrLevelsClass.py} | 76 +++++++++++++- src/bsxplorer/Clusters.py | 99 ++++++++++++++++++- src/bsxplorer/__init__.py | 2 +- 6 files changed, 203 insertions(+), 19 deletions(-) rename src/bsxplorer/{ChrLevels.py => ChrLevelsClass.py} (85%) diff --git a/docs/_chr.rst b/docs/_chr.rst index 8f8c614..04c2b74 100644 --- a/docs/_chr.rst +++ b/docs/_chr.rst @@ -1,14 +1,36 @@ ChrLevels ========= -Methods for calculating P-value for cytosine residues or genomic regions. +ChrLevels class. .. currentmodule:: bsxplorer -.. autosummary:: +.. class:: ChrLevels + + .. rubric:: Methods + + .. autosummary:: :nosignatures: - :toctree: _Binom - :template: class.rst + :toctree: _ChrLevels/_method + :template: method.rst + + ~ChrLevels.from_bismark + + + ~ChrLevels.from_cgmap + + + ~ChrLevels.from_parquet + + + ~ChrLevels.save_plot_rds + + + ~ChrLevels.filter + + + ~ChrLevels.draw_mpl + + + ~ChrLevels.draw_plotly - RegionStat - BinomialData \ No newline at end of file diff --git a/docs/_cluster.rst b/docs/_cluster.rst index dbb95ce..c5650d3 100644 --- a/docs/_cluster.rst +++ b/docs/_cluster.rst @@ -3,12 +3,13 @@ Cluster Methods for calculating P-value for cytosine residues or genomic regions. -.. currentmodule:: bsxplorer +.. currentmodule:: bsxplorer.Clusters .. autosummary:: :nosignatures: - :toctree: _Binom + :toctree: _Cluster :template: class.rst - RegionStat - BinomialData \ No newline at end of file + ClusterSingle + ClusterMany + ClusterPlot \ No newline at end of file diff --git a/docs/markdowns/eda1nonmodel.md b/docs/markdowns/eda1nonmodel.md index c9ba0f8..3cb0cce 100644 --- a/docs/markdowns/eda1nonmodel.md +++ b/docs/markdowns/eda1nonmodel.md @@ -148,7 +148,7 @@ filtered.heat_map(10, 20).draw_mpl(**labels_settings) ![EDA1 - Genes_vs_TE HeatMap](../images/eda1nonmodel/g_vs_te_hm.png){.doc-single-image} The MetageneFiles object has [**`.box_plot()`**](bsxplorer.MetageneFiles.box_plot) and -[**`.violin_plot()`**](bsxplorer.MetageneFiles.violun_plot) functions for box plot and violin plot graphs. +[**`.violin_plot()`**](bsxplorer.MetageneFiles.violin_plot) functions for box plot and violin plot graphs. The [**`.trim_flank()`**](bsxplorer.MetageneFiles.trim_flank) method analyzes methylation of the body of the region. ```python diff --git a/src/bsxplorer/ChrLevels.py b/src/bsxplorer/ChrLevelsClass.py similarity index 85% rename from src/bsxplorer/ChrLevels.py rename to src/bsxplorer/ChrLevelsClass.py index c707475..6bbd228 100644 --- a/src/bsxplorer/ChrLevels.py +++ b/src/bsxplorer/ChrLevelsClass.py @@ -218,6 +218,13 @@ def _mutate_batch(self, batch) -> pl.DataFrame: class ChrLevels: def __init__(self, df: pl.DataFrame) -> None: + """ + Read report and visualize chromosome methylation levels + + Parameters + ---------- + df + """ self.bismark = df # delete this in future and change to calculation of plot data @@ -302,7 +309,7 @@ def from_parquet( confidence: int = None ): """ - Initialize ChrLevels with CX_report file + Initialize ChrLevels with parquet file :param file: Path to file :param chr_min_length: Minimum length of chromosome to be analyzed @@ -334,10 +341,20 @@ def save_plot_rds(self, path, compress: bool = False): def filter(self, context: str = None, strand: str = None, chr: str = None): """ - :param context: Methylation context (CG, CHG, CHH) to filter (only one). - :param strand: Strand to filter (+ or -). - :param chr: Chromosome name to filter. - :return: Filtered :class:`Bismark`. + Filter chromosome methylation levels data. + + Parameters + ---------- + context + Methylation context (CG, CHG, CHH) to filter (only one). + strand + Strand to filter (+ or -). + chr + Chromosome name to filter. + + Returns + ------- + :class:`ChrLevels` """ context_filter = self.bismark["context"] == context if context is not None else True strand_filter = self.bismark["strand"] == strand if strand is not None else True @@ -392,6 +409,34 @@ def draw_mpl( linewidth: float = 1.0, linestyle: str = '-' ) -> Figure: + """ + Draws line-plot on given axis. + + Parameters + ---------- + fig_axes + Tuple of (`matplotlib.pyplot.Figure `_, `matplotlib.axes.Axes `_). New are created if ``None`` + smooth + Number of windows for `SavGol `_ filter (set 0 for no smoothing) + label + Label of line on line-plot + linewidth + Width of the line + linestyle + Style of the line + + Returns + ------- + ``matplotlib.pyplot.Figure`` + + See Also + -------- + `matplotlib.pyplot.Figure `_ + + `matplotlib.pyplot.subplot() `_ : To create fig, axes + + `Linestyles `_ : For possible linestyles. + """ if fig_axes is None: fig, axes = plt.subplots() else: @@ -429,6 +474,27 @@ def draw_plotly(self, smooth: int = 10, label: str = None ): + """ + Draws line-plot on given figure. + + + Parameters + ---------- + figure + `plotly.graph_objects.Figure `_. New is created if ``None`` + smooth + Number of windows for `SavGol `_ filter (set 0 for no smoothing) + label + Label of line on line-plot + + Returns + ------- + ``plotly.graph_objects.Figure`` + + See Also + -------- + `plotly.graph_objects.Figure `_ + """ if figure is None: figure = go.Figure() diff --git a/src/bsxplorer/Clusters.py b/src/bsxplorer/Clusters.py index 0ae22d7..812ca19 100644 --- a/src/bsxplorer/Clusters.py +++ b/src/bsxplorer/Clusters.py @@ -177,6 +177,8 @@ def all(self): class ClusterMany(_ClusterBase): + """Class for operating with multiple samples regions clustering""" + def __init__(self, metagenes: MetageneFilesBase, count_threshold=5, na_rm: float | None = None): intersect_list = set.intersection(*[set(metagene.bismark["gene"].to_list()) for metagene in metagenes.samples]) for i in range(len(metagenes.samples)): @@ -186,15 +188,59 @@ def __init__(self, metagenes: MetageneFilesBase, count_threshold=5, na_rm: float self.sample_names = metagenes.labels def kmeans(self, n_clusters: int = 8, n_init: int = 10, **kwargs): + """ + KMeans clustering on sample regions. Clustering is being made with `sklearn.cluster.KMeans `_. + + Parameters + ---------- + n_clusters + The number of clusters to generate. + n_init + Number of times the k-means algorithm is run with different centroid seeds. + kwargs + See `sklearn.cluster.KMeans `_. + + Returns + ------- + :class:`ClusterPlot` + + """ return ClusterPlot([cluster.kmeans(n_clusters, n_init, **kwargs).data for cluster in self.clusters], self.sample_names) def cut_tree(self, dist_method="euclidean", clust_method="average", cut_height_q=.99, **kwargs): + """ + KMeans clustering on sample regions. Clustering is being made with `dynamicTreeCut.cutreeHybrid `_. + + Parameters + ---------- + dist_method + Distances calculation metric + clust_method + Hierarchical clustering method + cut_height_q + Quantile of leaves height to be cut. + kwargs + See `dynamicTreeCut `_. + + Returns + ------- + :class:`ClusterPlot` + """ + return ClusterPlot([ cluster.cut_tree(dist_method="euclidean", clust_method="average", cut_height_q=.99, **kwargs).data for cluster in self.clusters ], self.sample_names) def all(self): + """ + Returns all regions for downstream plotting. + + Returns + ------- + :class:`ClusterPlot` + """ + return ClusterPlot([cluster.all().data for cluster in self.clusters], self.sample_names) @@ -226,6 +272,7 @@ def from_matrix(cls, matrix: np.ndarray, labels: np.array, names: list[str] | np class ClusterPlot: + """Class for plotting cluster data.""" def __init__(self, data: ClusterData | list[ClusterData], sample_names=None): if isinstance(data, list) and len(data) == 1: self.data = data[0] @@ -235,6 +282,15 @@ def __init__(self, data: ClusterData | list[ClusterData], sample_names=None): self.sample_names = sample_names def save_tsv(self, filename: str): + """ + Save labels for regions in a TSV file. + + Parameters + ---------- + filename + File name for output file + """ + filename = Path(filename) def save(data: ClusterData, path: Path): @@ -263,7 +319,26 @@ def __intersect_genes(self): print( f"Found {len(intersection)} intersections between samples with {max(map(len, names))} regions max") - def draw_mpl(self, method='average', metric='euclidean', cmap: str = "cividis"): + def draw_mpl(self, method='average', metric='euclidean', cmap: str = "cividis", **kwargs): + """ + Draws clustermap with seaborn.clustermap. + + Parameters + ---------- + method + Method for hierarchical clustering. + metric + Metric for distance calculation + cmap + Colormap to use + **kwargs + ``seaborn.clustermap`` parameters + + See Also + -------- + `seaborn.clustermap `_ : For more information about possible parameters + """ + if isinstance(self.data, list): warnings.warn("Matplotlib version of cluster plot is not available for multiple samples") return None @@ -272,10 +347,30 @@ def draw_mpl(self, method='average', metric='euclidean', cmap: str = "cividis"): self.data.centers, index=[f"{name} ({count})" for name, count in zip(*np.unique(self.data.labels, return_counts=True))]) - fig = sns.clustermap(df, col_cluster=False, cmap=cmap, method=method, metric=metric) + args = dict(col_cluster=False) | kwargs + args |= dict(cmap=cmap, method=method, metric=metric) + + fig = sns.clustermap(df, **args) return fig def draw_plotly(self, method='average', metric='euclidean', cmap: str = "cividis"): + """ + Draws clustermap with plotly imshow. + + Parameters + ---------- + method + Method for hierarchical clustering. + metric + Metric for distance calculation + cmap + Colormap to use + + Returns + -------- + ``plotly.graph_objects.Figure`` + """ + if isinstance(self.data, list): # order for first sample dist = pdist(self.data[0].centers, metric=metric) diff --git a/src/bsxplorer/__init__.py b/src/bsxplorer/__init__.py index 6fdfa73..0312a72 100644 --- a/src/bsxplorer/__init__.py +++ b/src/bsxplorer/__init__.py @@ -2,4 +2,4 @@ from .Plots import LinePlot, LinePlotFiles, HeatMap, HeatMapFiles, PCA from .Binom import BinomialData, RegionStat from .GenomeClass import Genome -from .ChrLevels import ChrLevels \ No newline at end of file +from .ChrLevelsClass import ChrLevels \ No newline at end of file