Skip to content

Commit

Permalink
fixup: Format Python code with Black
Browse files Browse the repository at this point in the history
  • Loading branch information
jfear committed Sep 6, 2023
1 parent 0606368 commit 0413a0a
Show file tree
Hide file tree
Showing 13 changed files with 26 additions and 93 deletions.
9 changes: 1 addition & 8 deletions src/cgr_gwas_qc/parsers/illumina/adpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,14 +58,7 @@ def validate(self):
def __iter__(self):
"""This allows tuple unpacking"""
return iter(
(
self.x_raw,
self.y_raw,
self.x_norm,
self.y_norm,
self.genotype_score,
self.genotype,
)
(self.x_raw, self.y_raw, self.x_norm, self.y_norm, self.genotype_score, self.genotype,)
)

def __str__(self):
Expand Down
27 changes: 4 additions & 23 deletions src/cgr_gwas_qc/scripts/legacy_compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,11 +161,7 @@ def compare_config(config: Config, legacy_dir: Path, ignored_config: bool):
config.software_params.contam_population,
"B Allele Frequency Population (contamination check)",
),
(
legacy_config["strand"].lower(),
config.software_params.strand,
"Strand",
),
(legacy_config["strand"].lower(), config.software_params.strand, "Strand",),
(
legacy_config["pi_hat_threshold"],
config.software_params.pi_hat_threshold,
Expand Down Expand Up @@ -222,8 +218,7 @@ def _file_comparison(cmp):
typer.secho(f"{cmp.message} did not match ({cmp.legacy} vs {cmp.current}).", fg=RED)
except NotImplementedError:
typer.secho(
f"Cannot currently compare {cmp.message}",
fg=YELLOW,
f"Cannot currently compare {cmp.message}", fg=YELLOW,
)


Expand Down Expand Up @@ -449,22 +444,8 @@ def compare_contamination(legacy_dir: Path, mix_atol: float, llk_atol: float, ll
def _parse_snpweights(filename):
return (
pd.read_csv(filename, dtype={"ID": str}, low_memory=False)
.rename(
{
"ID": "Sample_ID",
"AFR": "Pct_AFR",
"ASN": "Pct_ASN",
"EUR": "Pct_EUR",
},
axis=1,
)
.replace(
{
"EUR": "European",
"AFR": "African",
"ASN": "East Asian",
}
)
.rename({"ID": "Sample_ID", "AFR": "Pct_AFR", "ASN": "Pct_ASN", "EUR": "Pct_EUR",}, axis=1,)
.replace({"EUR": "European", "AFR": "African", "ASN": "East Asian",})
.set_index("Sample_ID")
.sort_index()
)
Expand Down
11 changes: 2 additions & 9 deletions src/cgr_gwas_qc/testing/comparison.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,7 @@ def sorted_file_equal(file1: PathLike, file2: PathLike) -> bool:


def file_rows_almost_equal(
file1: PathLike,
file2: PathLike,
fuzzy_col: int,
sep: str = "\t",
header: bool = False,
file1: PathLike, file2: PathLike, fuzzy_col: int, sep: str = "\t", header: bool = False,
) -> bool:
"""Compares two files row by row and makes sure they are almost equal"""
results = []
Expand Down Expand Up @@ -122,10 +118,7 @@ def assert_legacy_dev_sample_qc_equal(legacy_file: PathLike, dev_file: PathLike)
)
assert_series_equal(legacy["Call_Rate_2"], dev["Call_Rate_2"])
assert_series_equal(
legacy["Low Call Rate"],
dev["is_call_rate_filtered"],
check_dtype=False,
check_names=False,
legacy["Low Call Rate"], dev["is_call_rate_filtered"], check_dtype=False, check_names=False,
)

# --------------------
Expand Down
5 changes: 1 addition & 4 deletions src/cgr_gwas_qc/workflow/scripts/bim_filter_vcf.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,7 @@ def main(
),
vcf_file: Path = typer.Argument(..., help="The 1KG VCF file.", exists=True, readable=True),
snp_removal_list: Path = typer.Argument(
...,
help="Text file to save the list of makers to remove.",
file_okay=True,
writable=True,
..., help="Text file to save the list of makers to remove.", file_okay=True, writable=True,
),
output_bim: Optional[Path] = typer.Argument(
None,
Expand Down
9 changes: 2 additions & 7 deletions src/cgr_gwas_qc/workflow/scripts/sample_concordance.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,11 +102,7 @@ def read(filename: PathLike):

@app.command()
def main(
sample_sheet_csv: Path,
plink_file: Path,
graf_file: Path,
king_file: Path,
outfile: Path,
sample_sheet_csv: Path, plink_file: Path, graf_file: Path, king_file: Path, outfile: Path,
):
ss = sample_sheet.read(sample_sheet_csv)
concordance = (
Expand Down Expand Up @@ -249,8 +245,7 @@ def _graf(filename: PathLike):
.set_index(["ID1", "ID2"])
.reindex(["HGMR", "AGMR", "relationship"], axis=1)
.rename(
{"HGMR": "GRAF_HGMR", "AGMR": "GRAF_AGMR", "relationship": "GRAF_relationship"},
axis=1,
{"HGMR": "GRAF_HGMR", "AGMR": "GRAF_AGMR", "relationship": "GRAF_relationship"}, axis=1,
)
)

Expand Down
9 changes: 2 additions & 7 deletions src/cgr_gwas_qc/workflow/scripts/sample_concordance_new.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,11 +104,7 @@ def read(filename: PathLike):

@app.command()
def main(
sample_sheet_csv: Path,
plink_file: Path,
graf_file: Path,
king_file: Path,
outfile: Path,
sample_sheet_csv: Path, plink_file: Path, graf_file: Path, king_file: Path, outfile: Path,
):
ss = sample_sheet.read(sample_sheet_csv)
concordance = (
Expand Down Expand Up @@ -273,8 +269,7 @@ def _graf(filename: PathLike):
.set_index(["ID1", "ID2"])
.reindex(["HGMR", "AGMR", "relationship"], axis=1)
.rename(
{"HGMR": "GRAF_HGMR", "AGMR": "GRAF_AGMR", "relationship": "GRAF_relationship"},
axis=1,
{"HGMR": "GRAF_HGMR", "AGMR": "GRAF_AGMR", "relationship": "GRAF_relationship"}, axis=1,
)
)

Expand Down
19 changes: 5 additions & 14 deletions src/cgr_gwas_qc/workflow/scripts/sample_qc_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,9 +190,7 @@ def main(
intensity,
)
add_qc_columns(
sample_qc,
remove_contam,
remove_rep_discordant,
sample_qc, remove_contam, remove_rep_discordant,
)
save(sample_qc, outfile)

Expand Down Expand Up @@ -398,8 +396,7 @@ def _read_contam(file_name: Optional[Path], Sample_IDs: pd.Index) -> pd.DataFram

if file_name is None:
return pd.DataFrame(
index=Sample_IDs,
columns=["Contamination_Rate", "is_contaminated"],
index=Sample_IDs, columns=["Contamination_Rate", "is_contaminated"],
).astype({"Contamination_Rate": "float", "is_contaminated": "boolean"})

return (
Expand Down Expand Up @@ -442,16 +439,12 @@ def _read_intensity(file_name: Optional[Path], Sample_IDs: pd.Index) -> pd.Serie


def add_qc_columns(
sample_qc: pd.DataFrame,
remove_contam: bool,
remove_rep_discordant: bool,
sample_qc: pd.DataFrame, remove_contam: bool, remove_rep_discordant: bool,
) -> pd.DataFrame:
add_call_rate_flags(sample_qc)
_add_identifiler(sample_qc)
_add_analytic_exclusion(
sample_qc,
remove_contam,
remove_rep_discordant,
sample_qc, remove_contam, remove_rep_discordant,
)
_add_subject_representative(sample_qc)
_add_subject_dropped_from_study(sample_qc)
Expand Down Expand Up @@ -497,9 +490,7 @@ def reason_string(row: pd.Series) -> str:


def _add_analytic_exclusion(
sample_qc: pd.DataFrame,
remove_contam: bool,
remove_rep_discordant: bool,
sample_qc: pd.DataFrame, remove_contam: bool, remove_rep_discordant: bool,
) -> pd.DataFrame:
"""Adds a flag to remove samples based on provided conditions.
Expand Down
4 changes: 1 addition & 3 deletions src/cgr_gwas_qc/workflow/scripts/subject_qc_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,7 @@ def read(filename: PathLike) -> pd.DataFrame:

@app.command()
def main(
sample_qc_csv: Path,
sample_concordance_csv: Path,
outfile: Path,
sample_qc_csv: Path, sample_concordance_csv: Path, outfile: Path,
):
(
sample_qc_table.read(sample_qc_csv)
Expand Down
5 changes: 1 addition & 4 deletions src/cgr_gwas_qc/workflow/scripts/update_snps_to_1kg_rsID.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,7 @@ def main(
writable=True,
),
id_map_out: Path = typer.Argument(
...,
help="CSV mapping array ids to thousand genome ids.",
file_okay=True,
writable=True,
..., help="CSV mapping array ids to thousand genome ids.", file_okay=True, writable=True,
),
):
"""Compares a plink BIM file with a VCF and updates IDs to match VCF.
Expand Down
6 changes: 2 additions & 4 deletions tests/workflow/modules/test_eigensoft.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,10 +90,8 @@ def test_eigensoft_smartpca(tmp_path, conda_envs):

# THEN: the files exists
file_hashes_equal(
data_cache / "legacy_outputs/pca/EUR_subjects.smartpca.par",
tmp_path / "samples.pca.par",
data_cache / "legacy_outputs/pca/EUR_subjects.smartpca.par", tmp_path / "samples.pca.par",
)
file_hashes_equal(
data_cache / "legacy_outputs/pca/EUR_subjects.eigenvec",
tmp_path / "samples.eigenvec",
data_cache / "legacy_outputs/pca/EUR_subjects.eigenvec", tmp_path / "samples.eigenvec",
)
9 changes: 3 additions & 6 deletions tests/workflow/sub_workflows/test_entry_points.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,16 +176,13 @@ def gtc_grouped_entry(pytestconfig, tmp_path_factory, conda_envs):
def test_gtc_grouped_entry(gtc_entry, gtc_grouped_entry):
# The merged samples files should exist.
assert file_hashes_equal(
gtc_entry / "sample_level/samples.bed",
gtc_grouped_entry / "sample_level/samples.bed",
gtc_entry / "sample_level/samples.bed", gtc_grouped_entry / "sample_level/samples.bed",
)
assert file_hashes_equal(
gtc_entry / "sample_level/samples.bim",
gtc_grouped_entry / "sample_level/samples.bim",
gtc_entry / "sample_level/samples.bim", gtc_grouped_entry / "sample_level/samples.bim",
)
assert file_hashes_equal(
gtc_entry / "sample_level/samples.fam",
gtc_grouped_entry / "sample_level/samples.fam",
gtc_entry / "sample_level/samples.fam", gtc_grouped_entry / "sample_level/samples.fam",
)


Expand Down
3 changes: 1 addition & 2 deletions tests/workflow/sub_workflows/test_sample_qc.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,8 +339,7 @@ def sample_qc_workflow(pytestconfig, tmp_path_factory, conda_envs):
@pytest.mark.workflow
def test_sample_qc_compare_hashes(real_data_cache, sample_qc_workflow, filename):
assert comparison.file_hashes_equal(
real_data_cache / "dev_outputs" / filename,
sample_qc_workflow / filename,
real_data_cache / "dev_outputs" / filename, sample_qc_workflow / filename,
)


Expand Down
3 changes: 1 addition & 2 deletions tests/workflow/sub_workflows/test_subject_qc.py
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,5 @@ def subject_qc_workflow(pytestconfig, tmp_path_factory, conda_envs):
@pytest.mark.workflow
def test_subject_qc_file_hashes(real_data_cache, subject_qc_workflow, filename):
assert comparison.file_hashes_equal(
real_data_cache / "dev_outputs" / filename,
subject_qc_workflow / filename,
real_data_cache / "dev_outputs" / filename, subject_qc_workflow / filename,
)

0 comments on commit 0413a0a

Please sign in to comment.