From cac402c7c372a9188df25fba9b21af8cc529d97c Mon Sep 17 00:00:00 2001 From: Miguel Brown Date: Tue, 24 Oct 2023 20:26:49 +0000 Subject: [PATCH 1/2] :wrench: fixes for numpy pandas compatibility --- scripts/cnv_3_gistic_style.py | 24 +++++++++++------------- scripts/get_files_from_manifest.py | 11 ++++------- 2 files changed, 15 insertions(+), 20 deletions(-) diff --git a/scripts/cnv_3_gistic_style.py b/scripts/cnv_3_gistic_style.py index 9354682..72c609b 100755 --- a/scripts/cnv_3_gistic_style.py +++ b/scripts/cnv_3_gistic_style.py @@ -2,7 +2,6 @@ import sys import argparse -import concurrent.futures import json import subprocess import re @@ -121,18 +120,17 @@ def mt_adjust_cn(obj): x = 1 m = 50 - with concurrent.futures.ThreadPoolExecutor(config_data["threads"]) as executor: - results = { - executor.submit(mt_adjust_cn, bs_id): bs_id for bs_id in bs_cbio_dict - } - for result in concurrent.futures.as_completed(results): - if result.result()[0] == 1: - "Had trouble processing object " + result.result([1] + "\n") - sys.exit(1) - if x % m == 0: - sys.stderr.write("Processed " + str(x) + " samples\n") - sys.stderr.flush() - x += 1 + + for bs_id in bs_cbio_dict: + result = mt_adjust_cn(bs_id) + if result[0] == 1: + sys.stderr.write("Had trouble processing object " + result[1] + "\n") + sys.exit(1) + if x % m == 0: + sys.stderr.write("Processed " + str(x) + " samples\n") + sys.stderr.flush() + x += 1 + sys.stderr.write("Conversion completed. Writing results to file\n") new_fname = cbio_dx = ( args.merged_cnv_dir + "/" + parts.group(1) + ".discrete_cnvs.txt" diff --git a/scripts/get_files_from_manifest.py b/scripts/get_files_from_manifest.py index 3e79e10..98c8030 100755 --- a/scripts/get_files_from_manifest.py +++ b/scripts/get_files_from_manifest.py @@ -128,15 +128,12 @@ def mt_type_download(file_type): sys.stderr.write("Concatenating manifests\n") sys.stderr.flush() manifest_list = args.manifest.split(",") -manifest_concat = pd.DataFrame() +manifest_df_list = [] for manifest in manifest_list: sys.stderr.write("Processing " + manifest + "\n") - current = pd.read_csv(manifest, sep=None) - if manifest_concat.empty: - manifest_concat = current.copy() - else: - manifest_concat = manifest_concat.append(current, ignore_index=True) + manifest_df_list.append(pd.read_csv(manifest, sep=None)) # In the event that s3_path is empty, replace with str to trigger later sbg download +manifest_concat = pd.concat(manifest_df_list, ignore_index=True) manifest_concat.s3_path = manifest_concat.s3_path.fillna('None') file_types = args.fts.split(",") # subset concatenated manifests @@ -185,7 +182,7 @@ def mt_type_download(file_type): key_dict[key]['session'] = boto3.Session(profile_name=key) key_dict[key]['dl_client'] = key_dict[key]['session'].client("s3", config=client_config) else: - key_dict[key]['manifest'] = key_dict[key]['manifest'].append(selected[selected['s3_path'].str.startswith(bucket)], ignore_index=True) + key_dict[key]['manifest'] = pd.concat([key_dict[key]['manifest'], selected[selected['s3_path'].str.startswith(bucket)]], ignore_index=True) if args.sbg_profile is not None: check = 1 config = sbg.Config(profile=args.sbg_profile) From 1d784a152a0b9d8d4c1a7992780937940f4270ac Mon Sep 17 00:00:00 2001 From: Miguel Brown Date: Thu, 9 Nov 2023 11:28:06 -0500 Subject: [PATCH 2/2] :hammer: incorporated PR suggestions --- scripts/cnv_3_gistic_style.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/scripts/cnv_3_gistic_style.py b/scripts/cnv_3_gistic_style.py index 72c609b..ad28e41 100755 --- a/scripts/cnv_3_gistic_style.py +++ b/scripts/cnv_3_gistic_style.py @@ -112,7 +112,6 @@ def mt_adjust_cn(obj): # sample list would be cbio ids samp_list = list(data.columns)[1:] bs_cbio_dict = {} - # fid_dict = {} for samp_id in samp_list: bs_id = file_meta_dict[cbio_dx][samp_id]["kf_tum_id"] bs_cbio_dict[bs_id] = samp_id @@ -122,9 +121,9 @@ def mt_adjust_cn(obj): m = 50 for bs_id in bs_cbio_dict: - result = mt_adjust_cn(bs_id) - if result[0] == 1: - sys.stderr.write("Had trouble processing object " + result[1] + "\n") + exit_code, object = mt_adjust_cn(bs_id) + if exit_code == 1: + sys.stderr.write("Had trouble processing object " + object + "\n") sys.exit(1) if x % m == 0: sys.stderr.write("Processed " + str(x) + " samples\n")