Skip to content

Commit

Permalink
Ruff with safe fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
endast committed Aug 13, 2024
1 parent 65f5a4f commit 289c775
Show file tree
Hide file tree
Showing 13 changed files with 28 additions and 53 deletions.
5 changes: 2 additions & 3 deletions deeprvat/annotations/annotations.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
import sys
import time
from pathlib import Path
from typing import Optional
import dask.dataframe as dd
import numpy as np
import click
Expand Down Expand Up @@ -795,12 +794,12 @@ def deepsea_pca(

del X_std

logger.info(f"Writing values to data frame")
logger.info("Writing values to data frame")
pca_df = pd.DataFrame(
X_pca, columns=[f"DeepSEA_PC_{i}" for i in range(1, n_components + 1)]
)
del X_pca
logger.info(f"adding key values to data frame")
logger.info("adding key values to data frame")
pca_df = pd.concat([key_df, pca_df], axis=1)

logger.info("Sanity check of results")
Expand Down
6 changes: 1 addition & 5 deletions deeprvat/cv_utils.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,5 @@
import pandas as pd
import yaml
import os
import sys
from typing import Optional
import re

# import pickle
import logging
Expand Down Expand Up @@ -202,7 +198,7 @@ def combine_test_set_burdens(
for col in range(this_y.shape[1]):
this_y[:, col] = standardize_series(this_y[:, col])
elif y_transformation == "quantile_transform":
logger.info(f" Quantile transforming combined target phenotype (y)")
logger.info(" Quantile transforming combined target phenotype (y)")
for col in range(this_y.shape[1]):
this_y[:, col] = my_quantile_transform(this_y[:, col])
y[:] = this_y
Expand Down
20 changes: 7 additions & 13 deletions deeprvat/data/rare.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,12 @@
import itertools
import logging
import random
import sys
from pathlib import Path
from pprint import pformat
from scipy.sparse import coo_matrix, vstack
from typing import Dict, List, Optional, Union, Set
import numpy as np
import pandas as pd
import copy
import torch
import torch.nn.functional as F
import zarr
from torch.utils.data import Dataset

from deeprvat.utils import calculate_mean_std, standardize_series_with_params

Expand Down Expand Up @@ -86,7 +80,7 @@ def __init__(
self.setup_metadata()

if self.low_memory:
logger.info(f" Cleaning up to save memory")
logger.info(" Cleaning up to save memory")
self.annotation_df = None
if not self.gene_specific_anno:
self.exploded_annotations = None
Expand Down Expand Up @@ -273,7 +267,7 @@ def setup_annotations(
if self.gene_specific_anno
else len(self.exploded_annotations)
) == 0:
raise RuntimeError(f"No rare variants found in provided genes")
raise RuntimeError("No rare variants found in provided genes")

def apply_thresholds(self, thresholds: Optional[Dict[str, str]]):
if thresholds is not None:
Expand Down Expand Up @@ -302,7 +296,7 @@ def apply_thresholds(self, thresholds: Optional[Dict[str, str]]):
)

if self.kept_variants.shape[0] == 0:
raise RuntimeError(f" No variants passed thresholding")
raise RuntimeError(" No variants passed thresholding")

logger.info(f" {self.kept_variants.shape[0]} variants passed thresholding")

Expand All @@ -320,7 +314,7 @@ def apply_thresholds(self, thresholds: Optional[Dict[str, str]]):
self.variant_map[self.kept_variants] = np.arange(len(self.annotation_df))

if len(self.annotation_df) == 0:
raise RuntimeError(f" No variants passed thresholding")
raise RuntimeError(" No variants passed thresholding")

logger.info(f" {len(self.annotation_df)} variants passed thresholding")

Expand Down Expand Up @@ -449,7 +443,7 @@ def __init__(
self.setup_metadata()

if self.low_memory:
logger.info(f" Cleaning up to save memory")
logger.info(" Cleaning up to save memory")
self.annotation_df = None
if not self.gene_specific_anno:
self.exploded_annotations = None
Expand Down Expand Up @@ -594,7 +588,7 @@ def setup_annotations(
].astype({self.grouping_column: np.int32})

if len(self.annotation_df) == 0:
raise RuntimeError(f"No rare variants found in provided genes")
raise RuntimeError("No rare variants found in provided genes")

def apply_thresholds(self, thresholds: Optional[Dict[str, str]]):
if self.gene_specific_anno:
Expand All @@ -614,7 +608,7 @@ def apply_thresholds(self, thresholds: Optional[Dict[str, str]]):
)

if self.kept_variants.shape[0] == 0:
raise RuntimeError(f" No variants passed thresholding")
raise RuntimeError(" No variants passed thresholding")

logger.info(f" {self.kept_variants.shape[0]} variants passed thresholding")

Expand Down
7 changes: 3 additions & 4 deletions deeprvat/deeprvat/associate.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
from typing import Dict, List, Optional, Tuple

import click
import dask.dataframe as dd
import numpy as np
import pandas as pd
import pyranges as pr
Expand Down Expand Up @@ -1083,7 +1082,7 @@ def regress_(
"""
assert len(gene_indices) == len(genes)

logger.info(f"Computing associations")
logger.info("Computing associations")
logger.info(f"Covariates shape: {x_pheno.shape}, y shape: {y.shape}")

regressed_genes = []
Expand Down Expand Up @@ -1288,7 +1287,7 @@ def combine_regression_results(
:type model_name: Optional[str]
:return: Concatenated regression results saved to a parquet file.
"""
logger.info(f"Concatenating results")
logger.info("Concatenating results")
results = pd.concat([pd.read_parquet(f, engine="pyarrow") for f in result_files])

if model_name is not None:
Expand Down Expand Up @@ -1530,7 +1529,7 @@ def regress_common_(
assert len(gene_indices) == len(genes)
logger.info(common_genotype_prefix)

logger.info(f"Computing associations")
logger.info("Computing associations")
logger.info(f"Covariates shape: {x_pheno.shape}, y shape: {y.shape}")

regressed_genes = []
Expand Down
3 changes: 0 additions & 3 deletions deeprvat/deeprvat/common_variant_condition_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,7 @@

import pandas as pd
import pyranges as pr
import pandas as pd
from pyarrow.parquet import ParquetFile
import scipy as sp
import pickle
import numpy as np
import zarr
from pathlib import Path
Expand Down
7 changes: 2 additions & 5 deletions deeprvat/deeprvat/config.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,14 @@
import logging
import pprint
import sys
from pprint import pprint
from typing import Optional, Tuple

import click
import pandas as pd
import torch.nn.functional as F
import yaml

from deeprvat.deeprvat.evaluate import pval_correction
from pathlib import Path
import os
from copy import deepcopy

logging.basicConfig(
Expand Down Expand Up @@ -575,7 +572,7 @@ def update_config(
)

baseline_columns = ["gene", "pval"]
logger.info(f" Reading baseline results from:")
logger.info(" Reading baseline results from:")
pprint(baseline_results)
baseline_df = pd.concat(
[
Expand Down Expand Up @@ -616,7 +613,7 @@ def update_config(
baseline_df = baseline_df.query("significant")
else:
if threshold is not None:
baseline_temp = baseline_df.query(f"pval_corrected < @threshold")
baseline_temp = baseline_df.query("pval_corrected < @threshold")
logger.info(
f" {len(baseline_df)} genes "
"from baseline passed thresholding"
Expand Down
8 changes: 3 additions & 5 deletions deeprvat/deeprvat/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@
import sys
from pathlib import Path
from typing import Dict, Optional, Tuple
from itertools import combinations
import random
import os

import click
Expand All @@ -12,7 +10,7 @@
import yaml
from seak.cct import cct

from deeprvat.utils import pval_correction, bfcorrect_df
from deeprvat.utils import pval_correction

logging.basicConfig(
format="[%(asctime)s] %(levelname)s:%(name)s: %(message)s",
Expand Down Expand Up @@ -384,8 +382,8 @@ def evaluate(
logger.info(significant.query('Method == "DeepRVAT"'))
logger.info("Saving results")
out_path = Path(out_dir)
significant.to_parquet(out_path / f"significant.parquet", engine="pyarrow")
all_pvals.to_parquet(out_path / f"all_results.parquet", engine="pyarrow")
significant.to_parquet(out_path / "significant.parquet", engine="pyarrow")
all_pvals.to_parquet(out_path / "all_results.parquet", engine="pyarrow")


if __name__ == "__main__":
Expand Down
4 changes: 1 addition & 3 deletions deeprvat/deeprvat/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
import itertools
import logging
import pickle
import random
import shutil
import sys
from pathlib import Path
from pprint import pformat, pprint
Expand Down Expand Up @@ -864,7 +862,7 @@ def run_bagging(
if str(e).find("CUDA out of memory") != -1:
if dm.hparams.batch_size > 4:
logging.error(
f"Retrying training with half the original batch size"
"Retrying training with half the original batch size"
)
gc.collect()
torch.cuda.empty_cache()
Expand Down
10 changes: 5 additions & 5 deletions deeprvat/seed_gene_discovery/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,13 +52,13 @@ def evaluate_(associations: Dict[str, pd.DataFrame], alpha: float):
corrected_result = pval_correction(
result, alpha, correction_type=correction_type
)
corrected_result[f"-log10pval_corrected"] = -np.log10(
corrected_result[f"pval_corrected"]
corrected_result["-log10pval_corrected"] = -np.log10(
corrected_result["pval_corrected"]
)
corrected_result["correction_method"] = correction_type
corrected_results.append(corrected_result)

sig = corrected_result.query(f"significant")
sig = corrected_result.query("significant")
n_sig = len(sig)
logger.info(f"Significant genes: {n_sig}")
metrics[f"significant{sig_col_suffix}"] = n_sig
Expand All @@ -70,7 +70,7 @@ def evaluate_(associations: Dict[str, pd.DataFrame], alpha: float):
corrected_results = pd.concat(corrected_results)
all_evaluations[pheno] = corrected_results

all_sig = corrected_results.query(f"significant")
all_sig = corrected_results.query("significant")
all_significant[pheno] = all_sig

print(all_sig)
Expand Down Expand Up @@ -128,7 +128,7 @@ def evaluate(
out_dir = Path(out_dir)
evaluations[pheno].to_parquet(out_file)

with open(out_dir / f"metrics.pkl", "wb") as f:
with open(out_dir / "metrics.pkl", "wb") as f:
pickle.dump(metrics, f)

all_associations.to_parquet(f"{out_dir}/all_associations.parquet")
Expand Down
6 changes: 3 additions & 3 deletions deeprvat/seed_gene_discovery/seed_gene_discovery.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import pandas as pd
import yaml
from scipy.stats import beta
from scipy.sparse import coo_matrix, spmatrix
from scipy.sparse import spmatrix
from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm

Expand Down Expand Up @@ -553,7 +553,7 @@ def make_dataset_(
logger.info("Debug mode: Using only 1000 samples")
batch_size = 1000
else:
logger.info(f"Setting batch size to length of dataset")
logger.info("Setting batch size to length of dataset")
batch_size = len(dataset)

if "batch_size" in data_config["dataloader_config"].keys():
Expand Down Expand Up @@ -709,7 +709,7 @@ def run_association(
n_genes = len(genes)
if n_genes == 0:
logger.info(
f"Number of chunks is too large. The pipeline will throw an error beacause there are no genes to test"
"Number of chunks is too large. The pipeline will throw an error beacause there are no genes to test"
)
logger.info(f"Processing genes in {genes} from {n_total_genes} in total")
this_gene_ids = [gene_ids[i] for i in genes]
Expand Down
3 changes: 1 addition & 2 deletions deeprvat/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@
import shutil
import sys
import pickle
from pathlib import Path
from typing import Any, Callable, Dict, Iterable, Union
from typing import Any, Callable, Dict, Iterable

import optuna
import numpy as np
Expand Down
1 change: 0 additions & 1 deletion pipelines/resources/absplice.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ def cli():
@click.argument("input", type=click.Path(exists=True))
@click.argument("output", type=click.Path(exists=False))
def codign_genes(input, output):
import pandas as pd
import pyranges as pr

gr = pr.read_gtf(input["gtf_file"])
Expand Down
1 change: 0 additions & 1 deletion pipelines/resources/coding_genes.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import pandas as pd
import pyranges as pr

gr = pr.read_gtf(snakemake.input["gtf_file"])
Expand Down

0 comments on commit 289c775

Please sign in to comment.