Skip to content

Commit

Permalink
Merge pull request #22 from CanDIG/bugfix/htsget-pruning
Browse files Browse the repository at this point in the history
Bugfix/htsget pruning
  • Loading branch information
OrdiNeu authored Mar 5, 2024
2 parents 37d5a46 + 8da1bd6 commit cd22b9b
Showing 1 changed file with 21 additions and 10 deletions.
31 changes: 21 additions & 10 deletions query_server/query_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ def query(treatment="", primary_site="", chemotherapy="", immunotherapy="", horm

# Now we combine this with HTSGet, if any
genomic_query = []
genomic_query_info = None
# genomic_query_info = None
if gene != "" or chrom != "":
try:
if gene != "":
Expand All @@ -220,14 +220,16 @@ def query(treatment="", primary_site="", chemotherapy="", immunotherapy="", horm
for specimen in specimen_query['items']:
specimen_mapping[specimen['submitter_sample_id']] = (specimen['submitter_donor_id'], specimen['tumour_normal_designation'])

# handovers = htsget['results']['beaconHandovers']
genomic_query_info = htsget['query_info']
for cohort in genomic_query_info:
sample_ids = genomic_query_info[cohort]
print(f"cohort {cohort} has samples {sample_ids}")
# genomic_query_info contains ALL matches from every dataset
# This is meant to be used to fill out the summary stats ONLY
# However, that part isn't covered in this PR (it's in DIG-1372 (https://candig.atlassian.net/browse/DIG-1372))
# and does not yet function
# genomic_query_info = htsget['query_info']
# for cohort in genomic_query_info:
# sample_ids = genomic_query_info[cohort]

htsget_found_donors = {}
for response in htsget['response']:
genomic_query = response['caseLevelData']
for case_data in response['caseLevelData']:
if 'biosampleId' not in case_data:
print(f"Could not parse htsget response for {case_data}")
Expand All @@ -247,13 +249,22 @@ def query(treatment="", primary_site="", chemotherapy="", immunotherapy="", horm
htsget_found_donors[case_data['donor_id']] = 1
else:
print(f"Could not parse biosampleId for {case_data}")
case_data['program_id'] = ""
case_data['donor_id'] = ""
case_data['program_id'] = None
case_data['donor_id'] = None
case_data['submitter_specimen_id'] = case_data['biosampleId']
case_data['tumour_normal_designation'] = 'Tumour'
case_data['position'] = response['variation']['location']['interval']['start']['value']
# Filter clinical results based on genomic results
donors = [donor for donor in donors if donor['submitter_donor_id'] in htsget_found_donors]
katsu_allowed_donors = {}
for donor in donors:
katsu_allowed_donors[f"{donor['program_id']}~{donor['submitter_donor_id']}"] = 1
for response in htsget['response']:
for case_data in response['caseLevelData']:
if ('donor_id' in case_data and 'program_id' in case_data and
f"{case_data['program_id']}~{case_data['donor_id']}" in katsu_allowed_donors):
genomic_query.append(case_data)

except Exception as ex:
print(ex)

Expand All @@ -278,7 +289,7 @@ def query(treatment="", primary_site="", chemotherapy="", immunotherapy="", horm
full_data['summary'] = summary_stats
full_data['next'] = None
full_data['prev'] = None
full_data['genomic_query_info'] = genomic_query_info
# full_data['genomic_query_info'] = genomic_query_info

# Add prev and next parameters to the repsonse, appending a session ID.
# Essentially we want to go session ID -> list of donors
Expand Down

0 comments on commit cd22b9b

Please sign in to comment.