Skip to content

Commit

Permalink
Merge pull request #53 from kids-first/feature/mb-fix-pandas
Browse files Browse the repository at this point in the history
🔧 Fixes for numpy pandas compatibility
  • Loading branch information
migbro committed Nov 9, 2023
2 parents 1c9075a + 1d784a1 commit c2b5bfc
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 21 deletions.
25 changes: 11 additions & 14 deletions scripts/cnv_3_gistic_style.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import sys
import argparse
import concurrent.futures
import json
import subprocess
import re
Expand Down Expand Up @@ -113,26 +112,24 @@ def mt_adjust_cn(obj):
# sample list would be cbio ids
samp_list = list(data.columns)[1:]
bs_cbio_dict = {}
# fid_dict = {}
for samp_id in samp_list:
bs_id = file_meta_dict[cbio_dx][samp_id]["kf_tum_id"]
bs_cbio_dict[bs_id] = samp_id
high_gain = config_data["cnv_high_gain"]

x = 1
m = 50
with concurrent.futures.ThreadPoolExecutor(config_data["threads"]) as executor:
results = {
executor.submit(mt_adjust_cn, bs_id): bs_id for bs_id in bs_cbio_dict
}
for result in concurrent.futures.as_completed(results):
if result.result()[0] == 1:
"Had trouble processing object " + result.result([1] + "\n")
sys.exit(1)
if x % m == 0:
sys.stderr.write("Processed " + str(x) + " samples\n")
sys.stderr.flush()
x += 1

for bs_id in bs_cbio_dict:
exit_code, object = mt_adjust_cn(bs_id)
if exit_code == 1:
sys.stderr.write("Had trouble processing object " + object + "\n")
sys.exit(1)
if x % m == 0:
sys.stderr.write("Processed " + str(x) + " samples\n")
sys.stderr.flush()
x += 1

sys.stderr.write("Conversion completed. Writing results to file\n")
new_fname = cbio_dx = (
args.merged_cnv_dir + "/" + parts.group(1) + ".discrete_cnvs.txt"
Expand Down
11 changes: 4 additions & 7 deletions scripts/get_files_from_manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,15 +128,12 @@ def mt_type_download(file_type):
sys.stderr.write("Concatenating manifests\n")
sys.stderr.flush()
manifest_list = args.manifest.split(",")
manifest_concat = pd.DataFrame()
manifest_df_list = []
for manifest in manifest_list:
sys.stderr.write("Processing " + manifest + "\n")
current = pd.read_csv(manifest, sep=None)
if manifest_concat.empty:
manifest_concat = current.copy()
else:
manifest_concat = manifest_concat.append(current, ignore_index=True)
manifest_df_list.append(pd.read_csv(manifest, sep=None))
# In the event that s3_path is empty, replace with str to trigger later sbg download
manifest_concat = pd.concat(manifest_df_list, ignore_index=True)
manifest_concat.s3_path = manifest_concat.s3_path.fillna('None')
file_types = args.fts.split(",")
# subset concatenated manifests
Expand Down Expand Up @@ -185,7 +182,7 @@ def mt_type_download(file_type):
key_dict[key]['session'] = boto3.Session(profile_name=key)
key_dict[key]['dl_client'] = key_dict[key]['session'].client("s3", config=client_config)
else:
key_dict[key]['manifest'] = key_dict[key]['manifest'].append(selected[selected['s3_path'].str.startswith(bucket)], ignore_index=True)
key_dict[key]['manifest'] = pd.concat([key_dict[key]['manifest'], selected[selected['s3_path'].str.startswith(bucket)]], ignore_index=True)
if args.sbg_profile is not None:
check = 1
config = sbg.Config(profile=args.sbg_profile)
Expand Down

0 comments on commit c2b5bfc

Please sign in to comment.