From 0413a0a50fd3395169a2d8601e5d10c71cc274ae Mon Sep 17 00:00:00 2001 From: autoblack <1240432+jfear@users.noreply.github.com> Date: Wed, 6 Sep 2023 16:49:34 +0000 Subject: [PATCH] fixup: Format Python code with Black --- src/cgr_gwas_qc/parsers/illumina/adpc.py | 9 +------ src/cgr_gwas_qc/scripts/legacy_compare.py | 27 +++---------------- src/cgr_gwas_qc/testing/comparison.py | 11 ++------ .../workflow/scripts/bim_filter_vcf.py | 5 +--- .../workflow/scripts/sample_concordance.py | 9 ++----- .../scripts/sample_concordance_new.py | 9 ++----- .../workflow/scripts/sample_qc_table.py | 19 ++++--------- .../workflow/scripts/subject_qc_table.py | 4 +-- .../scripts/update_snps_to_1kg_rsID.py | 5 +--- tests/workflow/modules/test_eigensoft.py | 6 ++--- .../sub_workflows/test_entry_points.py | 9 +++---- .../workflow/sub_workflows/test_sample_qc.py | 3 +-- .../workflow/sub_workflows/test_subject_qc.py | 3 +-- 13 files changed, 26 insertions(+), 93 deletions(-) diff --git a/src/cgr_gwas_qc/parsers/illumina/adpc.py b/src/cgr_gwas_qc/parsers/illumina/adpc.py index 836cf497..1229deef 100644 --- a/src/cgr_gwas_qc/parsers/illumina/adpc.py +++ b/src/cgr_gwas_qc/parsers/illumina/adpc.py @@ -58,14 +58,7 @@ def validate(self): def __iter__(self): """This allows tuple unpacking""" return iter( - ( - self.x_raw, - self.y_raw, - self.x_norm, - self.y_norm, - self.genotype_score, - self.genotype, - ) + (self.x_raw, self.y_raw, self.x_norm, self.y_norm, self.genotype_score, self.genotype,) ) def __str__(self): diff --git a/src/cgr_gwas_qc/scripts/legacy_compare.py b/src/cgr_gwas_qc/scripts/legacy_compare.py index b3df92ac..14a6bc85 100644 --- a/src/cgr_gwas_qc/scripts/legacy_compare.py +++ b/src/cgr_gwas_qc/scripts/legacy_compare.py @@ -161,11 +161,7 @@ def compare_config(config: Config, legacy_dir: Path, ignored_config: bool): config.software_params.contam_population, "B Allele Frequency Population (contamination check)", ), - ( - legacy_config["strand"].lower(), - config.software_params.strand, - "Strand", - ), + (legacy_config["strand"].lower(), config.software_params.strand, "Strand",), ( legacy_config["pi_hat_threshold"], config.software_params.pi_hat_threshold, @@ -222,8 +218,7 @@ def _file_comparison(cmp): typer.secho(f"{cmp.message} did not match ({cmp.legacy} vs {cmp.current}).", fg=RED) except NotImplementedError: typer.secho( - f"Cannot currently compare {cmp.message}", - fg=YELLOW, + f"Cannot currently compare {cmp.message}", fg=YELLOW, ) @@ -449,22 +444,8 @@ def compare_contamination(legacy_dir: Path, mix_atol: float, llk_atol: float, ll def _parse_snpweights(filename): return ( pd.read_csv(filename, dtype={"ID": str}, low_memory=False) - .rename( - { - "ID": "Sample_ID", - "AFR": "Pct_AFR", - "ASN": "Pct_ASN", - "EUR": "Pct_EUR", - }, - axis=1, - ) - .replace( - { - "EUR": "European", - "AFR": "African", - "ASN": "East Asian", - } - ) + .rename({"ID": "Sample_ID", "AFR": "Pct_AFR", "ASN": "Pct_ASN", "EUR": "Pct_EUR",}, axis=1,) + .replace({"EUR": "European", "AFR": "African", "ASN": "East Asian",}) .set_index("Sample_ID") .sort_index() ) diff --git a/src/cgr_gwas_qc/testing/comparison.py b/src/cgr_gwas_qc/testing/comparison.py index 88892a3c..2a01e59c 100644 --- a/src/cgr_gwas_qc/testing/comparison.py +++ b/src/cgr_gwas_qc/testing/comparison.py @@ -30,11 +30,7 @@ def sorted_file_equal(file1: PathLike, file2: PathLike) -> bool: def file_rows_almost_equal( - file1: PathLike, - file2: PathLike, - fuzzy_col: int, - sep: str = "\t", - header: bool = False, + file1: PathLike, file2: PathLike, fuzzy_col: int, sep: str = "\t", header: bool = False, ) -> bool: """Compares two files row by row and makes sure they are almost equal""" results = [] @@ -122,10 +118,7 @@ def assert_legacy_dev_sample_qc_equal(legacy_file: PathLike, dev_file: PathLike) ) assert_series_equal(legacy["Call_Rate_2"], dev["Call_Rate_2"]) assert_series_equal( - legacy["Low Call Rate"], - dev["is_call_rate_filtered"], - check_dtype=False, - check_names=False, + legacy["Low Call Rate"], dev["is_call_rate_filtered"], check_dtype=False, check_names=False, ) # -------------------- diff --git a/src/cgr_gwas_qc/workflow/scripts/bim_filter_vcf.py b/src/cgr_gwas_qc/workflow/scripts/bim_filter_vcf.py index 108c3843..64f2bda9 100755 --- a/src/cgr_gwas_qc/workflow/scripts/bim_filter_vcf.py +++ b/src/cgr_gwas_qc/workflow/scripts/bim_filter_vcf.py @@ -20,10 +20,7 @@ def main( ), vcf_file: Path = typer.Argument(..., help="The 1KG VCF file.", exists=True, readable=True), snp_removal_list: Path = typer.Argument( - ..., - help="Text file to save the list of makers to remove.", - file_okay=True, - writable=True, + ..., help="Text file to save the list of makers to remove.", file_okay=True, writable=True, ), output_bim: Optional[Path] = typer.Argument( None, diff --git a/src/cgr_gwas_qc/workflow/scripts/sample_concordance.py b/src/cgr_gwas_qc/workflow/scripts/sample_concordance.py index 778261df..774937a7 100755 --- a/src/cgr_gwas_qc/workflow/scripts/sample_concordance.py +++ b/src/cgr_gwas_qc/workflow/scripts/sample_concordance.py @@ -102,11 +102,7 @@ def read(filename: PathLike): @app.command() def main( - sample_sheet_csv: Path, - plink_file: Path, - graf_file: Path, - king_file: Path, - outfile: Path, + sample_sheet_csv: Path, plink_file: Path, graf_file: Path, king_file: Path, outfile: Path, ): ss = sample_sheet.read(sample_sheet_csv) concordance = ( @@ -249,8 +245,7 @@ def _graf(filename: PathLike): .set_index(["ID1", "ID2"]) .reindex(["HGMR", "AGMR", "relationship"], axis=1) .rename( - {"HGMR": "GRAF_HGMR", "AGMR": "GRAF_AGMR", "relationship": "GRAF_relationship"}, - axis=1, + {"HGMR": "GRAF_HGMR", "AGMR": "GRAF_AGMR", "relationship": "GRAF_relationship"}, axis=1, ) ) diff --git a/src/cgr_gwas_qc/workflow/scripts/sample_concordance_new.py b/src/cgr_gwas_qc/workflow/scripts/sample_concordance_new.py index 3d1c571d..70544253 100755 --- a/src/cgr_gwas_qc/workflow/scripts/sample_concordance_new.py +++ b/src/cgr_gwas_qc/workflow/scripts/sample_concordance_new.py @@ -104,11 +104,7 @@ def read(filename: PathLike): @app.command() def main( - sample_sheet_csv: Path, - plink_file: Path, - graf_file: Path, - king_file: Path, - outfile: Path, + sample_sheet_csv: Path, plink_file: Path, graf_file: Path, king_file: Path, outfile: Path, ): ss = sample_sheet.read(sample_sheet_csv) concordance = ( @@ -273,8 +269,7 @@ def _graf(filename: PathLike): .set_index(["ID1", "ID2"]) .reindex(["HGMR", "AGMR", "relationship"], axis=1) .rename( - {"HGMR": "GRAF_HGMR", "AGMR": "GRAF_AGMR", "relationship": "GRAF_relationship"}, - axis=1, + {"HGMR": "GRAF_HGMR", "AGMR": "GRAF_AGMR", "relationship": "GRAF_relationship"}, axis=1, ) ) diff --git a/src/cgr_gwas_qc/workflow/scripts/sample_qc_table.py b/src/cgr_gwas_qc/workflow/scripts/sample_qc_table.py index 411effe0..5b1cebba 100755 --- a/src/cgr_gwas_qc/workflow/scripts/sample_qc_table.py +++ b/src/cgr_gwas_qc/workflow/scripts/sample_qc_table.py @@ -190,9 +190,7 @@ def main( intensity, ) add_qc_columns( - sample_qc, - remove_contam, - remove_rep_discordant, + sample_qc, remove_contam, remove_rep_discordant, ) save(sample_qc, outfile) @@ -398,8 +396,7 @@ def _read_contam(file_name: Optional[Path], Sample_IDs: pd.Index) -> pd.DataFram if file_name is None: return pd.DataFrame( - index=Sample_IDs, - columns=["Contamination_Rate", "is_contaminated"], + index=Sample_IDs, columns=["Contamination_Rate", "is_contaminated"], ).astype({"Contamination_Rate": "float", "is_contaminated": "boolean"}) return ( @@ -442,16 +439,12 @@ def _read_intensity(file_name: Optional[Path], Sample_IDs: pd.Index) -> pd.Serie def add_qc_columns( - sample_qc: pd.DataFrame, - remove_contam: bool, - remove_rep_discordant: bool, + sample_qc: pd.DataFrame, remove_contam: bool, remove_rep_discordant: bool, ) -> pd.DataFrame: add_call_rate_flags(sample_qc) _add_identifiler(sample_qc) _add_analytic_exclusion( - sample_qc, - remove_contam, - remove_rep_discordant, + sample_qc, remove_contam, remove_rep_discordant, ) _add_subject_representative(sample_qc) _add_subject_dropped_from_study(sample_qc) @@ -497,9 +490,7 @@ def reason_string(row: pd.Series) -> str: def _add_analytic_exclusion( - sample_qc: pd.DataFrame, - remove_contam: bool, - remove_rep_discordant: bool, + sample_qc: pd.DataFrame, remove_contam: bool, remove_rep_discordant: bool, ) -> pd.DataFrame: """Adds a flag to remove samples based on provided conditions. diff --git a/src/cgr_gwas_qc/workflow/scripts/subject_qc_table.py b/src/cgr_gwas_qc/workflow/scripts/subject_qc_table.py index 81f59aec..44c25c3e 100644 --- a/src/cgr_gwas_qc/workflow/scripts/subject_qc_table.py +++ b/src/cgr_gwas_qc/workflow/scripts/subject_qc_table.py @@ -78,9 +78,7 @@ def read(filename: PathLike) -> pd.DataFrame: @app.command() def main( - sample_qc_csv: Path, - sample_concordance_csv: Path, - outfile: Path, + sample_qc_csv: Path, sample_concordance_csv: Path, outfile: Path, ): ( sample_qc_table.read(sample_qc_csv) diff --git a/src/cgr_gwas_qc/workflow/scripts/update_snps_to_1kg_rsID.py b/src/cgr_gwas_qc/workflow/scripts/update_snps_to_1kg_rsID.py index c12c41f5..00947035 100755 --- a/src/cgr_gwas_qc/workflow/scripts/update_snps_to_1kg_rsID.py +++ b/src/cgr_gwas_qc/workflow/scripts/update_snps_to_1kg_rsID.py @@ -42,10 +42,7 @@ def main( writable=True, ), id_map_out: Path = typer.Argument( - ..., - help="CSV mapping array ids to thousand genome ids.", - file_okay=True, - writable=True, + ..., help="CSV mapping array ids to thousand genome ids.", file_okay=True, writable=True, ), ): """Compares a plink BIM file with a VCF and updates IDs to match VCF. diff --git a/tests/workflow/modules/test_eigensoft.py b/tests/workflow/modules/test_eigensoft.py index 509142c1..8b31fa2e 100644 --- a/tests/workflow/modules/test_eigensoft.py +++ b/tests/workflow/modules/test_eigensoft.py @@ -90,10 +90,8 @@ def test_eigensoft_smartpca(tmp_path, conda_envs): # THEN: the files exists file_hashes_equal( - data_cache / "legacy_outputs/pca/EUR_subjects.smartpca.par", - tmp_path / "samples.pca.par", + data_cache / "legacy_outputs/pca/EUR_subjects.smartpca.par", tmp_path / "samples.pca.par", ) file_hashes_equal( - data_cache / "legacy_outputs/pca/EUR_subjects.eigenvec", - tmp_path / "samples.eigenvec", + data_cache / "legacy_outputs/pca/EUR_subjects.eigenvec", tmp_path / "samples.eigenvec", ) diff --git a/tests/workflow/sub_workflows/test_entry_points.py b/tests/workflow/sub_workflows/test_entry_points.py index 3f0cb71f..d1a0f212 100644 --- a/tests/workflow/sub_workflows/test_entry_points.py +++ b/tests/workflow/sub_workflows/test_entry_points.py @@ -176,16 +176,13 @@ def gtc_grouped_entry(pytestconfig, tmp_path_factory, conda_envs): def test_gtc_grouped_entry(gtc_entry, gtc_grouped_entry): # The merged samples files should exist. assert file_hashes_equal( - gtc_entry / "sample_level/samples.bed", - gtc_grouped_entry / "sample_level/samples.bed", + gtc_entry / "sample_level/samples.bed", gtc_grouped_entry / "sample_level/samples.bed", ) assert file_hashes_equal( - gtc_entry / "sample_level/samples.bim", - gtc_grouped_entry / "sample_level/samples.bim", + gtc_entry / "sample_level/samples.bim", gtc_grouped_entry / "sample_level/samples.bim", ) assert file_hashes_equal( - gtc_entry / "sample_level/samples.fam", - gtc_grouped_entry / "sample_level/samples.fam", + gtc_entry / "sample_level/samples.fam", gtc_grouped_entry / "sample_level/samples.fam", ) diff --git a/tests/workflow/sub_workflows/test_sample_qc.py b/tests/workflow/sub_workflows/test_sample_qc.py index fbfd24dc..6abdc90a 100644 --- a/tests/workflow/sub_workflows/test_sample_qc.py +++ b/tests/workflow/sub_workflows/test_sample_qc.py @@ -339,8 +339,7 @@ def sample_qc_workflow(pytestconfig, tmp_path_factory, conda_envs): @pytest.mark.workflow def test_sample_qc_compare_hashes(real_data_cache, sample_qc_workflow, filename): assert comparison.file_hashes_equal( - real_data_cache / "dev_outputs" / filename, - sample_qc_workflow / filename, + real_data_cache / "dev_outputs" / filename, sample_qc_workflow / filename, ) diff --git a/tests/workflow/sub_workflows/test_subject_qc.py b/tests/workflow/sub_workflows/test_subject_qc.py index bf8c70a7..3cfb47bb 100644 --- a/tests/workflow/sub_workflows/test_subject_qc.py +++ b/tests/workflow/sub_workflows/test_subject_qc.py @@ -300,6 +300,5 @@ def subject_qc_workflow(pytestconfig, tmp_path_factory, conda_envs): @pytest.mark.workflow def test_subject_qc_file_hashes(real_data_cache, subject_qc_workflow, filename): assert comparison.file_hashes_equal( - real_data_cache / "dev_outputs" / filename, - subject_qc_workflow / filename, + real_data_cache / "dev_outputs" / filename, subject_qc_workflow / filename, )