From abc3449c965aa71e2dfa5fe5b8ec89f05cac19a9 Mon Sep 17 00:00:00 2001
From: Kevin Liao <liaoks@ccad2.cm.cluster>
Date: Thu, 21 Mar 2024 10:06:20 -0400
Subject: [PATCH] Issue 216: Fix sample_qc generation when subject IDs are
 numeric

---
 src/cgr_gwas_qc/workflow/scripts/sample_qc_table.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/cgr_gwas_qc/workflow/scripts/sample_qc_table.py b/src/cgr_gwas_qc/workflow/scripts/sample_qc_table.py
index 9f8e0e8d..64b48d60 100755
--- a/src/cgr_gwas_qc/workflow/scripts/sample_qc_table.py
+++ b/src/cgr_gwas_qc/workflow/scripts/sample_qc_table.py
@@ -320,9 +320,10 @@ def _read_GRAF(file_name: Path, Sample_IDs: pd.Index) -> pd.DataFrame:
     .. _manuscript: https://pubmed.ncbi.nlm.nih.gov/31151998/
 
     """
+
     return (
         pd.read_csv(file_name, sep="\t")
-        .rename({"Subject": "Sample_ID"}, axis=1)
+        .assign(Sample_ID=lambda x: x["Subject"].astype(str)) #Issue 216: When subject IDs are numeric reindex fails. This makes sure index Sample_ID will always be as a character
         .assign(Ancestry=lambda x: x["Computed population"].str.replace(" ", "_"))
         .assign(AFR=lambda x: x["P_f (%)"] / 100)
         .assign(EUR=lambda x: x["P_e (%)"] / 100)