diff --git a/kf_update_dbgap_consent/app/cli.py b/kf_update_dbgap_consent/app/cli.py index 1cf29f3..9b5c1f4 100755 --- a/kf_update_dbgap_consent/app/cli.py +++ b/kf_update_dbgap_consent/app/cli.py @@ -3,8 +3,8 @@ from argparse import RawTextHelpFormatter from pprint import pprint -from kf_utils.dataservice.patch import send_patches from kf_update_dbgap_consent.sample_status import ConsentProcessor +from kf_utils.dataservice.patch import send_patches SERVER_DEFAULT = "http://localhost:5000" @@ -46,12 +46,26 @@ def cli(): " - Defaults to match on `external_sample_id`" ), ) + parser.add_argument( + "--coerce_visible", + action="store_true", + default=False, + help=( + "If a specimen is loaded into dbgap, set the specimen, its " + "descendants, the associated participant, and the associated " + "participant's non-specimen descendants to visible." + ), + ) args = parser.parse_args() print(f"Args: {args.__dict__}") patches, alerts = ConsentProcessor( args.server, args.db_url - ).get_patches_for_study(args.study, match_aliquot=args.match_aliquot) + ).get_patches_for_study( + args.study, + match_aliquot=args.match_aliquot, + coerce_visible=args.coerce_visible, + ) all_patches = {} for endpoint_patches in patches.values(): diff --git a/kf_update_dbgap_consent/sample_status.py b/kf_update_dbgap_consent/sample_status.py index a1d8c7e..aea9857 100644 --- a/kf_update_dbgap_consent/sample_status.py +++ b/kf_update_dbgap_consent/sample_status.py @@ -60,9 +60,8 @@ from concurrent.futures import ThreadPoolExecutor, as_completed from d3b_utils.requests_retry import Session - from kf_utils.dataservice.descendants import find_descendants_by_kfids -from kf_utils.dataservice.scrape import yield_entities +from kf_utils.dataservice.scrape import yield_entities, yield_entities_from_kfids from kf_utils.dbgap.release import get_latest_sample_status @@ -88,7 +87,11 @@ def get_accession(self, study_id): ) def get_patches_for_study( - self, study_id, dbgap_status="released", match_aliquot=False + self, + study_id, + dbgap_status="released", + match_aliquot=False, + coerce_visible=False, ): if match_aliquot: match_entity = "external_aliquot_id" @@ -214,6 +217,56 @@ def entities_dict(endpoint, filt): hidden_specimens[kfid] = bs """ + Handle if samples loaded into dbgap should be made visible + """ + if coerce_visible: + unhidden_specimens = {} + for kfid, bs in storage["biospecimens"].items(): + sample = dbgap_samples.get(bs[match_entity], {}) + if sample.get("@dbgap_status") == "Loaded": + # remove the sample from list of hidden specimens if the + # specimen is loaded in dbgap. + hidden_specimens.pop(kfid, None) + unhidden_specimens[kfid] = bs + descendants_of_unhidden_specimens = find_descendants_by_kfids( + self.db_url or self.api_url, + "participants", + [bs["participant_id"] for bs in unhidden_specimens.values()], + ignore_gfs_with_hidden_external_contribs=True, + kfids_only=False, + ) + descendants_of_unhidden_specimens["participants"] = { + e["kf_id"]: e + for e in yield_entities_from_kfids( + self.api_url, + [ + bs["participant_id"] + for bs in unhidden_specimens.values() + ], + ) + } + breakpoint() + keep_hidden = [] + for ( + endpoint, + entities, + ) in descendants_of_unhidden_specimens.items(): + for k, e in entities.items(): + if endpoint == 'biospecimens' + storage[endpoint][k] = e + patches[endpoint][k]["visible"] = True + if endpoint == "genomic-files": + # remove the genomic file from list of hidden files + hidden_genomic_files.discard(k) + + for k in descendants_of_unhidden_specimens['genomic-files'].keys(): + if k in [bg['genomic_file_id'] for bg in descendants_of_unhidden_specimens['biospecimen-genomic-files'].values() ]: + print(k) + + for bg in descendants_of_unhidden_specimens['biospecimen-genomic-files'].values(): + if bg['biospecimen_id'] not in unhidden_specimens.keys(): + print(bg['biospecimen_id']) + """ Rule: If a biospecimen is hidden in the dataservice, its descendants should also be hidden. """