Merge pull request #266 from NCI-CGR/williscd_test

Lims upload formatting
NCI-CGR · Feb 16, 2024 · 95232f1 · 95232f1
2 parents 16f6943 + 029a3c7
commit 95232f1
Show file tree

Hide file tree

Showing 4 changed files with 17 additions and 7 deletions.
diff --git a/src/cgr_gwas_qc/models/config/workflow_params.py b/src/cgr_gwas_qc/models/config/workflow_params.py
@@ -67,7 +67,7 @@ class WorkflowParams(BaseModel):
     )
 
     lims_upload: bool = Field(
-        False,
+        True,
         description="For ``CGEMS/CCAD`` use only, will place a copy of the LimsUpload file in the root directory.",
     )
 

diff --git a/src/cgr_gwas_qc/workflow/scripts/lab_lims_upload.py b/src/cgr_gwas_qc/workflow/scripts/lab_lims_upload.py
@@ -31,9 +31,17 @@ def main(sample_sheet_csv: Path, sample_qc_csv: Path, outfile: Path):
     df = qc.merge(ss, on="Sample_ID", how="outer", suffixes=["", "_DROP"]).filter(
         regex="^(?!.*_DROP)", axis=1
     )
-
+    df = df.reindex(COLUMNS, axis=1)
+    # rename columns to work with LIMS system
+    df = df.rename(columns = {'Sample_ID':'Sample ID','Call_Rate_Initial':'Call Rate'})
+    # For the samples missing GTC files, their status in the low call rate column should be TRUE instead of blank
+    df.loc[df['Call Rate'] =="",'Low Call Rate'] = True
+    # Fill in blanks for replicate and discordant columns
+    df['Unexpected Replicate'] = df['Unexpected Replicate'].replace("",False).fillna(False) 
+    df['Expected Replicate Discordance'] = df['Expected Replicate Discordance'].replace("",False).fillna(False)
+    df['Sex Discordant'] = df['Sex Discordant'].replace("",False).fillna(False)
     # Adjust names and column order to match legacy
-    df.reindex(COLUMNS, axis=1).to_csv(outfile, index=False)
+    df.to_csv(outfile, index=False)
 
 
 if __name__ == "__main__":

diff --git a/src/cgr_gwas_qc/workflow/scripts/qc_report_table.py b/src/cgr_gwas_qc/workflow/scripts/qc_report_table.py
@@ -72,7 +72,7 @@ def main(
     "IdatIntensity",
     "Expected Replicate",
     "Expected Replicate Discordance",
-    "Unexpected Replicate",
+    "is_unexpected_replicate",
     "Sex Discordant",
     "Expected_Sex",
     "Predicted_Sex",
@@ -112,7 +112,7 @@ def _sample_qc(sample_sheet_csv: PathLike, sample_qc_csv: PathLike) -> pd.DataFr
     "Ancestry2",
     "Expected Replicate",
     "Expected Replicate Discordance",
-    "Unexpected Replicate",
+    "is_unexpected_replicate",
     "PLINK_PI_HAT",
     "PLINK_concordance",
     "PLINK_is_ge_pi_hat",
@@ -177,7 +177,7 @@ def _sample_concordance(sample_qc_csv: PathLike, sample_concordance_csv: PathLik
     "Group_By_Subject_ID",
     "Sample_ID",
     "Case/Control_Status",
-    "Unexpected Replicate",
+    "is_unexpected_replicate",
     "unexpected_replicate_ids",
     "Expected_Sex",
     "Predicted_Sex",

diff --git a/src/cgr_gwas_qc/workflow/scripts/sample_qc_table.py b/src/cgr_gwas_qc/workflow/scripts/sample_qc_table.py
@@ -96,6 +96,7 @@
     "is_contaminated": "boolean",
     "replicate_ids": "string",
     "is_discordant_replicate": "boolean",
+    "is_unexpected_replicate": "boolean",
     "expected_sex": SEX_DTYPE,
     "predicted_sex": SEX_DTYPE,
     "X_inbreeding_coefficient": "float",
@@ -365,11 +366,12 @@ def _read_concordance(filename: Path, Sample_IDs: pd.Index) -> pd.DataFrame:
             - Sample_ID (pd.Index)
             - is_discordant_replicate (bool): True if replicates show
               a concordance below the supplied threshold. Otherwise False.
+            - is_unexpected_replicate
     """
     df = sample_concordance.read(filename)
     return (
         df.melt(
-            id_vars=["is_discordant_replicate"],
+            id_vars=["is_discordant_replicate","is_unexpected_replicate"],
             value_vars=["Sample_ID1", "Sample_ID2"],
             var_name="To_Drop",
             value_name="Sample_ID",