From 289c77541aa002111a5841778b8f87d46c6b2a14 Mon Sep 17 00:00:00 2001 From: Magnus Wahlberg Date: Tue, 13 Aug 2024 13:28:38 +0200 Subject: [PATCH] Ruff with safe fixes --- deeprvat/annotations/annotations.py | 5 ++--- deeprvat/cv_utils.py | 6 +----- deeprvat/data/rare.py | 20 +++++++------------ deeprvat/deeprvat/associate.py | 7 +++---- .../common_variant_condition_utils.py | 3 --- deeprvat/deeprvat/config.py | 7 ++----- deeprvat/deeprvat/evaluate.py | 8 +++----- deeprvat/deeprvat/train.py | 4 +--- deeprvat/seed_gene_discovery/evaluate.py | 10 +++++----- .../seed_gene_discovery.py | 6 +++--- deeprvat/utils.py | 3 +-- pipelines/resources/absplice.py | 1 - pipelines/resources/coding_genes.py | 1 - 13 files changed, 28 insertions(+), 53 deletions(-) diff --git a/deeprvat/annotations/annotations.py b/deeprvat/annotations/annotations.py index 622af12e..b7f69a81 100644 --- a/deeprvat/annotations/annotations.py +++ b/deeprvat/annotations/annotations.py @@ -7,7 +7,6 @@ import sys import time from pathlib import Path -from typing import Optional import dask.dataframe as dd import numpy as np import click @@ -795,12 +794,12 @@ def deepsea_pca( del X_std - logger.info(f"Writing values to data frame") + logger.info("Writing values to data frame") pca_df = pd.DataFrame( X_pca, columns=[f"DeepSEA_PC_{i}" for i in range(1, n_components + 1)] ) del X_pca - logger.info(f"adding key values to data frame") + logger.info("adding key values to data frame") pca_df = pd.concat([key_df, pca_df], axis=1) logger.info("Sanity check of results") diff --git a/deeprvat/cv_utils.py b/deeprvat/cv_utils.py index 2fc09155..cd8eedfc 100644 --- a/deeprvat/cv_utils.py +++ b/deeprvat/cv_utils.py @@ -1,9 +1,5 @@ -import pandas as pd import yaml -import os import sys -from typing import Optional -import re # import pickle import logging @@ -202,7 +198,7 @@ def combine_test_set_burdens( for col in range(this_y.shape[1]): this_y[:, col] = standardize_series(this_y[:, col]) elif y_transformation == "quantile_transform": - logger.info(f" Quantile transforming combined target phenotype (y)") + logger.info(" Quantile transforming combined target phenotype (y)") for col in range(this_y.shape[1]): this_y[:, col] = my_quantile_transform(this_y[:, col]) y[:] = this_y diff --git a/deeprvat/data/rare.py b/deeprvat/data/rare.py index 12f65d41..04d36ef3 100644 --- a/deeprvat/data/rare.py +++ b/deeprvat/data/rare.py @@ -1,8 +1,5 @@ -import itertools import logging -import random import sys -from pathlib import Path from pprint import pformat from scipy.sparse import coo_matrix, vstack from typing import Dict, List, Optional, Union, Set @@ -10,9 +7,6 @@ import pandas as pd import copy import torch -import torch.nn.functional as F -import zarr -from torch.utils.data import Dataset from deeprvat.utils import calculate_mean_std, standardize_series_with_params @@ -86,7 +80,7 @@ def __init__( self.setup_metadata() if self.low_memory: - logger.info(f" Cleaning up to save memory") + logger.info(" Cleaning up to save memory") self.annotation_df = None if not self.gene_specific_anno: self.exploded_annotations = None @@ -273,7 +267,7 @@ def setup_annotations( if self.gene_specific_anno else len(self.exploded_annotations) ) == 0: - raise RuntimeError(f"No rare variants found in provided genes") + raise RuntimeError("No rare variants found in provided genes") def apply_thresholds(self, thresholds: Optional[Dict[str, str]]): if thresholds is not None: @@ -302,7 +296,7 @@ def apply_thresholds(self, thresholds: Optional[Dict[str, str]]): ) if self.kept_variants.shape[0] == 0: - raise RuntimeError(f" No variants passed thresholding") + raise RuntimeError(" No variants passed thresholding") logger.info(f" {self.kept_variants.shape[0]} variants passed thresholding") @@ -320,7 +314,7 @@ def apply_thresholds(self, thresholds: Optional[Dict[str, str]]): self.variant_map[self.kept_variants] = np.arange(len(self.annotation_df)) if len(self.annotation_df) == 0: - raise RuntimeError(f" No variants passed thresholding") + raise RuntimeError(" No variants passed thresholding") logger.info(f" {len(self.annotation_df)} variants passed thresholding") @@ -449,7 +443,7 @@ def __init__( self.setup_metadata() if self.low_memory: - logger.info(f" Cleaning up to save memory") + logger.info(" Cleaning up to save memory") self.annotation_df = None if not self.gene_specific_anno: self.exploded_annotations = None @@ -594,7 +588,7 @@ def setup_annotations( ].astype({self.grouping_column: np.int32}) if len(self.annotation_df) == 0: - raise RuntimeError(f"No rare variants found in provided genes") + raise RuntimeError("No rare variants found in provided genes") def apply_thresholds(self, thresholds: Optional[Dict[str, str]]): if self.gene_specific_anno: @@ -614,7 +608,7 @@ def apply_thresholds(self, thresholds: Optional[Dict[str, str]]): ) if self.kept_variants.shape[0] == 0: - raise RuntimeError(f" No variants passed thresholding") + raise RuntimeError(" No variants passed thresholding") logger.info(f" {self.kept_variants.shape[0]} variants passed thresholding") diff --git a/deeprvat/deeprvat/associate.py b/deeprvat/deeprvat/associate.py index 9f61e3b5..11bb28fe 100644 --- a/deeprvat/deeprvat/associate.py +++ b/deeprvat/deeprvat/associate.py @@ -10,7 +10,6 @@ from typing import Dict, List, Optional, Tuple import click -import dask.dataframe as dd import numpy as np import pandas as pd import pyranges as pr @@ -1083,7 +1082,7 @@ def regress_( """ assert len(gene_indices) == len(genes) - logger.info(f"Computing associations") + logger.info("Computing associations") logger.info(f"Covariates shape: {x_pheno.shape}, y shape: {y.shape}") regressed_genes = [] @@ -1288,7 +1287,7 @@ def combine_regression_results( :type model_name: Optional[str] :return: Concatenated regression results saved to a parquet file. """ - logger.info(f"Concatenating results") + logger.info("Concatenating results") results = pd.concat([pd.read_parquet(f, engine="pyarrow") for f in result_files]) if model_name is not None: @@ -1530,7 +1529,7 @@ def regress_common_( assert len(gene_indices) == len(genes) logger.info(common_genotype_prefix) - logger.info(f"Computing associations") + logger.info("Computing associations") logger.info(f"Covariates shape: {x_pheno.shape}, y shape: {y.shape}") regressed_genes = [] diff --git a/deeprvat/deeprvat/common_variant_condition_utils.py b/deeprvat/deeprvat/common_variant_condition_utils.py index 546e9d1b..0005745a 100644 --- a/deeprvat/deeprvat/common_variant_condition_utils.py +++ b/deeprvat/deeprvat/common_variant_condition_utils.py @@ -2,10 +2,7 @@ import pandas as pd import pyranges as pr -import pandas as pd from pyarrow.parquet import ParquetFile -import scipy as sp -import pickle import numpy as np import zarr from pathlib import Path diff --git a/deeprvat/deeprvat/config.py b/deeprvat/deeprvat/config.py index 4c7c818e..eebe0126 100644 --- a/deeprvat/deeprvat/config.py +++ b/deeprvat/deeprvat/config.py @@ -1,17 +1,14 @@ import logging import pprint import sys -from pprint import pprint from typing import Optional, Tuple import click import pandas as pd -import torch.nn.functional as F import yaml from deeprvat.deeprvat.evaluate import pval_correction from pathlib import Path -import os from copy import deepcopy logging.basicConfig( @@ -575,7 +572,7 @@ def update_config( ) baseline_columns = ["gene", "pval"] - logger.info(f" Reading baseline results from:") + logger.info(" Reading baseline results from:") pprint(baseline_results) baseline_df = pd.concat( [ @@ -616,7 +613,7 @@ def update_config( baseline_df = baseline_df.query("significant") else: if threshold is not None: - baseline_temp = baseline_df.query(f"pval_corrected < @threshold") + baseline_temp = baseline_df.query("pval_corrected < @threshold") logger.info( f" {len(baseline_df)} genes " "from baseline passed thresholding" diff --git a/deeprvat/deeprvat/evaluate.py b/deeprvat/deeprvat/evaluate.py index 825bd90d..58130f4d 100644 --- a/deeprvat/deeprvat/evaluate.py +++ b/deeprvat/deeprvat/evaluate.py @@ -2,8 +2,6 @@ import sys from pathlib import Path from typing import Dict, Optional, Tuple -from itertools import combinations -import random import os import click @@ -12,7 +10,7 @@ import yaml from seak.cct import cct -from deeprvat.utils import pval_correction, bfcorrect_df +from deeprvat.utils import pval_correction logging.basicConfig( format="[%(asctime)s] %(levelname)s:%(name)s: %(message)s", @@ -384,8 +382,8 @@ def evaluate( logger.info(significant.query('Method == "DeepRVAT"')) logger.info("Saving results") out_path = Path(out_dir) - significant.to_parquet(out_path / f"significant.parquet", engine="pyarrow") - all_pvals.to_parquet(out_path / f"all_results.parquet", engine="pyarrow") + significant.to_parquet(out_path / "significant.parquet", engine="pyarrow") + all_pvals.to_parquet(out_path / "all_results.parquet", engine="pyarrow") if __name__ == "__main__": diff --git a/deeprvat/deeprvat/train.py b/deeprvat/deeprvat/train.py index 7006b9ad..38221f00 100644 --- a/deeprvat/deeprvat/train.py +++ b/deeprvat/deeprvat/train.py @@ -3,8 +3,6 @@ import itertools import logging import pickle -import random -import shutil import sys from pathlib import Path from pprint import pformat, pprint @@ -864,7 +862,7 @@ def run_bagging( if str(e).find("CUDA out of memory") != -1: if dm.hparams.batch_size > 4: logging.error( - f"Retrying training with half the original batch size" + "Retrying training with half the original batch size" ) gc.collect() torch.cuda.empty_cache() diff --git a/deeprvat/seed_gene_discovery/evaluate.py b/deeprvat/seed_gene_discovery/evaluate.py index 9da7213a..e06eea6d 100644 --- a/deeprvat/seed_gene_discovery/evaluate.py +++ b/deeprvat/seed_gene_discovery/evaluate.py @@ -52,13 +52,13 @@ def evaluate_(associations: Dict[str, pd.DataFrame], alpha: float): corrected_result = pval_correction( result, alpha, correction_type=correction_type ) - corrected_result[f"-log10pval_corrected"] = -np.log10( - corrected_result[f"pval_corrected"] + corrected_result["-log10pval_corrected"] = -np.log10( + corrected_result["pval_corrected"] ) corrected_result["correction_method"] = correction_type corrected_results.append(corrected_result) - sig = corrected_result.query(f"significant") + sig = corrected_result.query("significant") n_sig = len(sig) logger.info(f"Significant genes: {n_sig}") metrics[f"significant{sig_col_suffix}"] = n_sig @@ -70,7 +70,7 @@ def evaluate_(associations: Dict[str, pd.DataFrame], alpha: float): corrected_results = pd.concat(corrected_results) all_evaluations[pheno] = corrected_results - all_sig = corrected_results.query(f"significant") + all_sig = corrected_results.query("significant") all_significant[pheno] = all_sig print(all_sig) @@ -128,7 +128,7 @@ def evaluate( out_dir = Path(out_dir) evaluations[pheno].to_parquet(out_file) - with open(out_dir / f"metrics.pkl", "wb") as f: + with open(out_dir / "metrics.pkl", "wb") as f: pickle.dump(metrics, f) all_associations.to_parquet(f"{out_dir}/all_associations.parquet") diff --git a/deeprvat/seed_gene_discovery/seed_gene_discovery.py b/deeprvat/seed_gene_discovery/seed_gene_discovery.py index b7f66cca..786d17ea 100644 --- a/deeprvat/seed_gene_discovery/seed_gene_discovery.py +++ b/deeprvat/seed_gene_discovery/seed_gene_discovery.py @@ -15,7 +15,7 @@ import pandas as pd import yaml from scipy.stats import beta -from scipy.sparse import coo_matrix, spmatrix +from scipy.sparse import spmatrix from torch.utils.data import DataLoader, Dataset from tqdm import tqdm @@ -553,7 +553,7 @@ def make_dataset_( logger.info("Debug mode: Using only 1000 samples") batch_size = 1000 else: - logger.info(f"Setting batch size to length of dataset") + logger.info("Setting batch size to length of dataset") batch_size = len(dataset) if "batch_size" in data_config["dataloader_config"].keys(): @@ -709,7 +709,7 @@ def run_association( n_genes = len(genes) if n_genes == 0: logger.info( - f"Number of chunks is too large. The pipeline will throw an error beacause there are no genes to test" + "Number of chunks is too large. The pipeline will throw an error beacause there are no genes to test" ) logger.info(f"Processing genes in {genes} from {n_total_genes} in total") this_gene_ids = [gene_ids[i] for i in genes] diff --git a/deeprvat/utils.py b/deeprvat/utils.py index a9c18801..a28b4a5c 100644 --- a/deeprvat/utils.py +++ b/deeprvat/utils.py @@ -5,8 +5,7 @@ import shutil import sys import pickle -from pathlib import Path -from typing import Any, Callable, Dict, Iterable, Union +from typing import Any, Callable, Dict, Iterable import optuna import numpy as np diff --git a/pipelines/resources/absplice.py b/pipelines/resources/absplice.py index d3ef763b..f4dfc1a1 100644 --- a/pipelines/resources/absplice.py +++ b/pipelines/resources/absplice.py @@ -14,7 +14,6 @@ def cli(): @click.argument("input", type=click.Path(exists=True)) @click.argument("output", type=click.Path(exists=False)) def codign_genes(input, output): - import pandas as pd import pyranges as pr gr = pr.read_gtf(input["gtf_file"]) diff --git a/pipelines/resources/coding_genes.py b/pipelines/resources/coding_genes.py index a98a6068..20ae1ce4 100644 --- a/pipelines/resources/coding_genes.py +++ b/pipelines/resources/coding_genes.py @@ -1,4 +1,3 @@ -import pandas as pd import pyranges as pr gr = pr.read_gtf(snakemake.input["gtf_file"])