From 93fa4887c840e3b55fd888cd26613b2c3884d800 Mon Sep 17 00:00:00 2001 From: Magnus Wahlberg Date: Wed, 6 Dec 2023 14:44:34 +0100 Subject: [PATCH] Use pyarrow for reading csvs --- deeprvat/preprocessing/preprocess.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/deeprvat/preprocessing/preprocess.py b/deeprvat/preprocessing/preprocess.py index ed4dfe37..46eba386 100644 --- a/deeprvat/preprocessing/preprocess.py +++ b/deeprvat/preprocessing/preprocess.py @@ -48,10 +48,10 @@ def process_sparse_gt_file( samples: List[str], calls_to_exclude: pd.DataFrame = None, ) -> Tuple[List[np.ndarray], List[np.ndarray]]: - sparse_gt = pd.read_csv( + sparse_gt = pd.read_table( file, names=["chrom", "pos", "ref", "alt", "sample", "gt"], - sep="\t", + engine="pyarrow", index_col=None, ) sparse_gt = sparse_gt[sparse_gt["sample"].isin(samples)] @@ -183,7 +183,7 @@ def process_sparse_gt( logging.info("Reading variants...") start_time = time.time() - variants = pd.read_csv(variant_file, sep="\t") + variants = pd.read_table(variant_file, engine="pyarrow") # Filter all variants based on chromosome if chromosomes is not None: