From abc3449c965aa71e2dfa5fe5b8ec89f05cac19a9 Mon Sep 17 00:00:00 2001 From: Kevin Liao Date: Thu, 21 Mar 2024 10:06:20 -0400 Subject: [PATCH] Issue 216: Fix sample_qc generation when subject IDs are numeric --- src/cgr_gwas_qc/workflow/scripts/sample_qc_table.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/cgr_gwas_qc/workflow/scripts/sample_qc_table.py b/src/cgr_gwas_qc/workflow/scripts/sample_qc_table.py index 9f8e0e8d..64b48d60 100755 --- a/src/cgr_gwas_qc/workflow/scripts/sample_qc_table.py +++ b/src/cgr_gwas_qc/workflow/scripts/sample_qc_table.py @@ -320,9 +320,10 @@ def _read_GRAF(file_name: Path, Sample_IDs: pd.Index) -> pd.DataFrame: .. _manuscript: https://pubmed.ncbi.nlm.nih.gov/31151998/ """ + return ( pd.read_csv(file_name, sep="\t") - .rename({"Subject": "Sample_ID"}, axis=1) + .assign(Sample_ID=lambda x: x["Subject"].astype(str)) #Issue 216: When subject IDs are numeric reindex fails. This makes sure index Sample_ID will always be as a character .assign(Ancestry=lambda x: x["Computed population"].str.replace(" ", "_")) .assign(AFR=lambda x: x["P_f (%)"] / 100) .assign(EUR=lambda x: x["P_e (%)"] / 100)