From 19c8c6af444b5586c5a2aef310ccbc6846cb62bf Mon Sep 17 00:00:00 2001 From: fnguyen Date: Mon, 4 Mar 2024 13:42:04 -0500 Subject: [PATCH 1/2] Prune the HTSGet responses by what donors were found in Katsu --- query_server/query_operations.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/query_server/query_operations.py b/query_server/query_operations.py index 91ab6c3..c242ffd 100644 --- a/query_server/query_operations.py +++ b/query_server/query_operations.py @@ -227,7 +227,6 @@ def query(treatment="", primary_site="", chemotherapy="", immunotherapy="", horm print(f"cohort {cohort} has samples {sample_ids}") htsget_found_donors = {} for response in htsget['response']: - genomic_query = response['caseLevelData'] for case_data in response['caseLevelData']: if 'biosampleId' not in case_data: print(f"Could not parse htsget response for {case_data}") @@ -247,13 +246,22 @@ def query(treatment="", primary_site="", chemotherapy="", immunotherapy="", horm htsget_found_donors[case_data['donor_id']] = 1 else: print(f"Could not parse biosampleId for {case_data}") - case_data['program_id'] = "" - case_data['donor_id'] = "" + case_data['program_id'] = None + case_data['donor_id'] = None case_data['submitter_specimen_id'] = case_data['biosampleId'] case_data['tumour_normal_designation'] = 'Tumour' case_data['position'] = response['variation']['location']['interval']['start']['value'] # Filter clinical results based on genomic results donors = [donor for donor in donors if donor['submitter_donor_id'] in htsget_found_donors] + katsu_allowed_donors = {} + for donor in donors: + katsu_allowed_donors[f"{donor['program_id']}~{donor['submitter_donor_id']}"] = 1 + for response in htsget['response']: + for case_data in response['caseLevelData']: + if ('donor_id' in case_data and 'program_id' in case_data and + f"{case_data['donor_id']}~{case_data['program_id']}" in katsu_allowed_donors): + genomic_query.append(response) + except Exception as ex: print(ex) From 8da1bd6e1f187bd702430eb9df8a34186b180e25 Mon Sep 17 00:00:00 2001 From: fnguyen Date: Mon, 4 Mar 2024 14:32:16 -0500 Subject: [PATCH 2/2] Fixup: grab the proper ID and check against the real program/donor --- query_server/query_operations.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/query_server/query_operations.py b/query_server/query_operations.py index c242ffd..9637d2e 100644 --- a/query_server/query_operations.py +++ b/query_server/query_operations.py @@ -204,7 +204,7 @@ def query(treatment="", primary_site="", chemotherapy="", immunotherapy="", horm # Now we combine this with HTSGet, if any genomic_query = [] - genomic_query_info = None + # genomic_query_info = None if gene != "" or chrom != "": try: if gene != "": @@ -220,11 +220,14 @@ def query(treatment="", primary_site="", chemotherapy="", immunotherapy="", horm for specimen in specimen_query['items']: specimen_mapping[specimen['submitter_sample_id']] = (specimen['submitter_donor_id'], specimen['tumour_normal_designation']) - # handovers = htsget['results']['beaconHandovers'] - genomic_query_info = htsget['query_info'] - for cohort in genomic_query_info: - sample_ids = genomic_query_info[cohort] - print(f"cohort {cohort} has samples {sample_ids}") + # genomic_query_info contains ALL matches from every dataset + # This is meant to be used to fill out the summary stats ONLY + # However, that part isn't covered in this PR (it's in DIG-1372 (https://candig.atlassian.net/browse/DIG-1372)) + # and does not yet function + # genomic_query_info = htsget['query_info'] + # for cohort in genomic_query_info: + # sample_ids = genomic_query_info[cohort] + htsget_found_donors = {} for response in htsget['response']: for case_data in response['caseLevelData']: @@ -259,8 +262,8 @@ def query(treatment="", primary_site="", chemotherapy="", immunotherapy="", horm for response in htsget['response']: for case_data in response['caseLevelData']: if ('donor_id' in case_data and 'program_id' in case_data and - f"{case_data['donor_id']}~{case_data['program_id']}" in katsu_allowed_donors): - genomic_query.append(response) + f"{case_data['program_id']}~{case_data['donor_id']}" in katsu_allowed_donors): + genomic_query.append(case_data) except Exception as ex: print(ex) @@ -286,7 +289,7 @@ def query(treatment="", primary_site="", chemotherapy="", immunotherapy="", horm full_data['summary'] = summary_stats full_data['next'] = None full_data['prev'] = None - full_data['genomic_query_info'] = genomic_query_info + # full_data['genomic_query_info'] = genomic_query_info # Add prev and next parameters to the repsonse, appending a session ID. # Essentially we want to go session ID -> list of donors