From 7f34ab5b55c06860d33400db9a7139c827c3e319 Mon Sep 17 00:00:00 2001 From: shitohana Date: Thu, 2 Nov 2023 15:19:46 +0300 Subject: [PATCH] v1.3.0b0 --- pyproject.toml | 2 +- src/bismarkplot/BismarkPlot.py | 14 +++++++-- src/bismarkplot/console_chrs.py | 11 ++++++-- src/bismarkplot/console_metagene.py | 44 +++++++++++++++++++---------- 4 files changed, 49 insertions(+), 22 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 03c192c..96cff3b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "bismarkplot" -version = "1.3.0a1" +version = "1.3.0b0" authors = [ { name="shitohana", email="kyudytskiy@gmail.com" }, ] diff --git a/src/bismarkplot/BismarkPlot.py b/src/bismarkplot/BismarkPlot.py index 59bed11..eb66bd8 100644 --- a/src/bismarkplot/BismarkPlot.py +++ b/src/bismarkplot/BismarkPlot.py @@ -535,6 +535,8 @@ def from_file( :param cpu: How many cores to use. Uses every physical core by default :param batch_size: Number of rows to read by one CPU core """ + cpu = cpu if cpu is not None else cpu_count() + bismark = pl.read_csv_batched( file, separator='\t', has_header=False, @@ -542,7 +544,7 @@ def from_file( 'count_m', 'count_um', 'context'], columns=[0, 1, 2, 3, 4, 5], batch_size=batch_size, - n_threads=cpu if cpu is not None else cpu_count() + n_threads=cpu ) read_approx = approx_batch_num(file, batch_size) read_batches = 0 @@ -722,6 +724,8 @@ def __read_bismark_batches( batch_size: int = 10 ** 7, cpu: int = cpu_count() ) -> pl.DataFrame: + cpu = cpu if cpu is not None else cpu_count() + # enable string cache for categorical comparison pl.enable_string_cache(True) @@ -776,7 +780,7 @@ def __read_bismark_batches( 'count_m', 'count_um', 'context'], columns=[0, 1, 2, 3, 4, 5], batch_size=batch_size, - n_threads=cpu if cpu is not None else cpu_count() + n_threads=cpu ) batches = bismark.next_batches(cpu) @@ -1469,7 +1473,11 @@ def draw( else: subplots_y = 1 - subplots_x = (len(self.samples) + len(self.samples) % 2) // subplots_y + if len(self.samples) > 1: + subplots_x = (len(self.samples) + len(self.samples) % 2) // subplots_y + else: + subplots_x = 1 + fig, axes = plt.subplots(subplots_y, subplots_x) if not isinstance(axes, np.ndarray): diff --git a/src/bismarkplot/console_chrs.py b/src/bismarkplot/console_chrs.py index ac3e8a9..9321880 100644 --- a/src/bismarkplot/console_chrs.py +++ b/src/bismarkplot/console_chrs.py @@ -10,7 +10,8 @@ ) parser.add_argument('filename', help='path to bismark methylation_extractor file', metavar='path/to/txt') -parser.add_argument('-o', '--out', help='output base name', default=os.path.abspath(os.getcwd()), metavar='DIR') +parser.add_argument('-o', '--out', help='output base name', default="plot", metavar='NAME') +parser.add_argument('-d', '--dir', help='output dir', default=os.path.abspath(os.getcwd()), metavar='DIR') parser.add_argument('-b', '--batch', help='number of rows to be read from bismark file by batch', type=int, default=10**6, metavar='N') parser.add_argument('-c', '--cores', help='number of cores to use', type=int, default=None) parser.add_argument('-w', '--wlength', help='number of windows for chromosome', type=int, default=10**5, metavar='N') @@ -42,11 +43,15 @@ def main(): for context in ["CG", "CHG", "CHH"]: chr.filter(strand=strand, context=context).draw((fig, axes), smooth=args.smooth, label=context) - fig.savefig(f"{args.out}_{strand}.{args.file_format}", dpi=args.dpi) + save_path = f"{args.dir}/{args.out}_{strand}.{args.file_format}" + + print(f"Saving to: {save_path}") + + fig.savefig(save_path, dpi=args.dpi) except Exception: filename = f'error{datetime.now().strftime("%m_%d_%H:%M")}.txt' - file_dir = args.out + '/' + filename + file_dir = args.dir + '/' + filename with open(file_dir, 'w') as f: f.write(traceback.format_exc()) print(f'Error happened. Please open an issue at GitHub with Traceback from file: {file_dir}') diff --git a/src/bismarkplot/console_metagene.py b/src/bismarkplot/console_metagene.py index 9f13e61..f7d7308 100644 --- a/src/bismarkplot/console_metagene.py +++ b/src/bismarkplot/console_metagene.py @@ -2,6 +2,7 @@ import os import traceback from datetime import datetime +from matplotlib.pyplot import close parser = argparse.ArgumentParser( prog='BismarkPlot.', @@ -9,7 +10,8 @@ formatter_class=argparse.ArgumentDefaultsHelpFormatter ) parser.add_argument('filename', help='path to bismark methylation_extractor files', nargs='+') -parser.add_argument('-o', '--out', help='output base name', default=os.path.abspath(os.getcwd())) +parser.add_argument('-o', '--out', help='output base name', default="plot", metavar='NAME') +parser.add_argument('--dir', help='output dir', default=os.path.abspath(os.getcwd()), metavar='DIR') parser.add_argument('-g', '--genome', help='path to GFF genome file') parser.add_argument('-r', '--region', help='path to GFF genome file', default="gene", choices=["gene", "exon", "tss", "tes"]) parser.add_argument('-b', '--batch', help='number of rows to be read from bismark file by batch', type=int, default=10**6) @@ -28,8 +30,8 @@ parser.add_argument('-S', '--smooth', help='windows for smoothing', type=float, default=10) parser.add_argument('-L', '--labels', help='labels for plots', nargs='+') parser.add_argument('-C', '--confidence', help='probability for confidence bands for line-plot. 0 if disabled', type=float, default=0) -parser.add_argument('-H', help='vertical resolution for heat-map', type=int, default=100) -parser.add_argument('-V', help='vertical resolution for heat-map', type=int, default=100) +parser.add_argument('-H', help='vertical resolution for heat-map', type=int, default=100, dest="vresolution") +parser.add_argument('-V', help='vertical resolution for heat-map', type=int, default=100, dest="hresolution") parser.add_argument("--dpi", help="dpi of output plot", type=int, default=200) parser.add_argument('-F', '--format', help='format of output plots', choices=['png', 'pdf', 'svg'], default='pdf', dest='file_format') @@ -66,26 +68,38 @@ def main(): cpu=args.cores ) + filename = args.dir + "/" + args.out + print(f"Base name for saving: {filename}_<...>.{args.file_format}") + for context in ["CG", "CHG", "CHH"]: for strand in ["+", "-"]: filtered = bismark.filter(context=context, strand=strand) - base_name = args.out + "_" + context + strand + "_{type}." + args.file_format + base_name = filename + "_" + context + strand + "_{type}." + args.file_format + + if args.line: + fig = filtered.line_plot().draw(smooth=args.smooth, confidence=args.confidence) + fig.savefig(base_name.format(type="line-plot"), dpi = args.dpi) + close() + if args.heatmap: + fig = filtered.heat_map(args.hresolution, args.vresolution).draw() + fig.savefig(base_name.format(type="heat-map"), dpi=args.dpi) + close() + if args.box: + fig = filtered.trim_flank().box_plot() + fig.savefig(base_name.format(type="box-plot"), dpi=args.dpi) + close() + if args.violin: + fig = filtered.trim_flank().violin_plot() + fig.savefig(base_name.format(type="violin-plot"), dpi=args.dpi) + close() - if args.line_plot: - filtered.line_plot().draw(smooth=args.smooth, confidence=args.confidence).savefig(base_name.format(type="line-plot"), dpi = args.dpi) - if args.heat_map: - filtered.heat_map(args.hresolution, args.vresolution).draw().savefig(base_name.format(type="heat-map"), dpi=args.dpi) - if args.box_plot: - filtered.trim_flank().box_plot().savefig(base_name.format(type="box-plot"), dpi=args.dpi) - if args.violin_plot: - filtered.trim_flank().violin_plot().savefig(base_name.format(type="violin-plot"), dpi=args.dpi) except Exception: - filename = f'error{datetime.now().strftime("%m_%d_%H:%M")}.txt' - with open(args.out + '/' + filename, 'w') as f: + filename = args.dir + '/' + f'error{datetime.now().strftime("%m_%d_%H:%M")}.txt' + with open(filename, 'w') as f: f.write(traceback.format_exc()) - print(f'Error happened. Please open an issue at GitHub with Traceback from file: {f}') + print(f'Error happened. Please open an issue at GitHub with Traceback from file: {filename}') if __name__ == "__main__":