Skip to content

Commit

Permalink
Merge pull request #266 from NCI-CGR/williscd_test
Browse files Browse the repository at this point in the history
Lims upload formatting
  • Loading branch information
carynwillis authored Feb 16, 2024
2 parents 16f6943 + 029a3c7 commit 95232f1
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 7 deletions.
2 changes: 1 addition & 1 deletion src/cgr_gwas_qc/models/config/workflow_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ class WorkflowParams(BaseModel):
)

lims_upload: bool = Field(
False,
True,
description="For ``CGEMS/CCAD`` use only, will place a copy of the LimsUpload file in the root directory.",
)

Expand Down
12 changes: 10 additions & 2 deletions src/cgr_gwas_qc/workflow/scripts/lab_lims_upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,17 @@ def main(sample_sheet_csv: Path, sample_qc_csv: Path, outfile: Path):
df = qc.merge(ss, on="Sample_ID", how="outer", suffixes=["", "_DROP"]).filter(
regex="^(?!.*_DROP)", axis=1
)

df = df.reindex(COLUMNS, axis=1)
# rename columns to work with LIMS system
df = df.rename(columns = {'Sample_ID':'Sample ID','Call_Rate_Initial':'Call Rate'})
# For the samples missing GTC files, their status in the low call rate column should be TRUE instead of blank
df.loc[df['Call Rate'] =="",'Low Call Rate'] = True
# Fill in blanks for replicate and discordant columns
df['Unexpected Replicate'] = df['Unexpected Replicate'].replace("",False).fillna(False)
df['Expected Replicate Discordance'] = df['Expected Replicate Discordance'].replace("",False).fillna(False)
df['Sex Discordant'] = df['Sex Discordant'].replace("",False).fillna(False)
# Adjust names and column order to match legacy
df.reindex(COLUMNS, axis=1).to_csv(outfile, index=False)
df.to_csv(outfile, index=False)


if __name__ == "__main__":
Expand Down
6 changes: 3 additions & 3 deletions src/cgr_gwas_qc/workflow/scripts/qc_report_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def main(
"IdatIntensity",
"Expected Replicate",
"Expected Replicate Discordance",
"Unexpected Replicate",
"is_unexpected_replicate",
"Sex Discordant",
"Expected_Sex",
"Predicted_Sex",
Expand Down Expand Up @@ -112,7 +112,7 @@ def _sample_qc(sample_sheet_csv: PathLike, sample_qc_csv: PathLike) -> pd.DataFr
"Ancestry2",
"Expected Replicate",
"Expected Replicate Discordance",
"Unexpected Replicate",
"is_unexpected_replicate",
"PLINK_PI_HAT",
"PLINK_concordance",
"PLINK_is_ge_pi_hat",
Expand Down Expand Up @@ -177,7 +177,7 @@ def _sample_concordance(sample_qc_csv: PathLike, sample_concordance_csv: PathLik
"Group_By_Subject_ID",
"Sample_ID",
"Case/Control_Status",
"Unexpected Replicate",
"is_unexpected_replicate",
"unexpected_replicate_ids",
"Expected_Sex",
"Predicted_Sex",
Expand Down
4 changes: 3 additions & 1 deletion src/cgr_gwas_qc/workflow/scripts/sample_qc_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@
"is_contaminated": "boolean",
"replicate_ids": "string",
"is_discordant_replicate": "boolean",
"is_unexpected_replicate": "boolean",
"expected_sex": SEX_DTYPE,
"predicted_sex": SEX_DTYPE,
"X_inbreeding_coefficient": "float",
Expand Down Expand Up @@ -365,11 +366,12 @@ def _read_concordance(filename: Path, Sample_IDs: pd.Index) -> pd.DataFrame:
- Sample_ID (pd.Index)
- is_discordant_replicate (bool): True if replicates show
a concordance below the supplied threshold. Otherwise False.
- is_unexpected_replicate
"""
df = sample_concordance.read(filename)
return (
df.melt(
id_vars=["is_discordant_replicate"],
id_vars=["is_discordant_replicate","is_unexpected_replicate"],
value_vars=["Sample_ID1", "Sample_ID2"],
var_name="To_Drop",
value_name="Sample_ID",
Expand Down

0 comments on commit 95232f1

Please sign in to comment.