Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

✨ Add ability to set things to be visible if loaded in dbgap #19

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 16 additions & 2 deletions kf_update_dbgap_consent/app/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
from argparse import RawTextHelpFormatter
from pprint import pprint

from kf_utils.dataservice.patch import send_patches
from kf_update_dbgap_consent.sample_status import ConsentProcessor
from kf_utils.dataservice.patch import send_patches

SERVER_DEFAULT = "http://localhost:5000"

Expand Down Expand Up @@ -46,12 +46,26 @@ def cli():
" - Defaults to match on `external_sample_id`"
),
)
parser.add_argument(
"--coerce_visible",
action="store_true",
default=False,
help=(
"If a specimen is loaded into dbgap, set the specimen, its "
"descendants, the associated participant, and the associated "
"participant's non-specimen descendants to visible."
),
)
args = parser.parse_args()
print(f"Args: {args.__dict__}")

patches, alerts = ConsentProcessor(
args.server, args.db_url
).get_patches_for_study(args.study, match_aliquot=args.match_aliquot)
).get_patches_for_study(
args.study,
match_aliquot=args.match_aliquot,
coerce_visible=args.coerce_visible,
)

all_patches = {}
for endpoint_patches in patches.values():
Expand Down
59 changes: 56 additions & 3 deletions kf_update_dbgap_consent/sample_status.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,8 @@
from concurrent.futures import ThreadPoolExecutor, as_completed

from d3b_utils.requests_retry import Session

from kf_utils.dataservice.descendants import find_descendants_by_kfids
from kf_utils.dataservice.scrape import yield_entities
from kf_utils.dataservice.scrape import yield_entities, yield_entities_from_kfids
from kf_utils.dbgap.release import get_latest_sample_status


Expand All @@ -88,7 +87,11 @@ def get_accession(self, study_id):
)

def get_patches_for_study(
self, study_id, dbgap_status="released", match_aliquot=False
self,
study_id,
dbgap_status="released",
match_aliquot=False,
coerce_visible=False,
):
if match_aliquot:
match_entity = "external_aliquot_id"
Expand Down Expand Up @@ -214,6 +217,56 @@ def entities_dict(endpoint, filt):
hidden_specimens[kfid] = bs

"""
Handle if samples loaded into dbgap should be made visible
"""
if coerce_visible:
unhidden_specimens = {}
for kfid, bs in storage["biospecimens"].items():
sample = dbgap_samples.get(bs[match_entity], {})
if sample.get("@dbgap_status") == "Loaded":
# remove the sample from list of hidden specimens if the
# specimen is loaded in dbgap.
hidden_specimens.pop(kfid, None)
unhidden_specimens[kfid] = bs
descendants_of_unhidden_specimens = find_descendants_by_kfids(
self.db_url or self.api_url,
"participants",
[bs["participant_id"] for bs in unhidden_specimens.values()],
ignore_gfs_with_hidden_external_contribs=True,
kfids_only=False,
)
descendants_of_unhidden_specimens["participants"] = {
e["kf_id"]: e
for e in yield_entities_from_kfids(
self.api_url,
[
bs["participant_id"]
for bs in unhidden_specimens.values()
],
)
}
breakpoint()
keep_hidden = []
for (
endpoint,
entities,
) in descendants_of_unhidden_specimens.items():
for k, e in entities.items():
if endpoint == 'biospecimens'
storage[endpoint][k] = e
patches[endpoint][k]["visible"] = True
if endpoint == "genomic-files":
# remove the genomic file from list of hidden files
hidden_genomic_files.discard(k)

for k in descendants_of_unhidden_specimens['genomic-files'].keys():
if k in [bg['genomic_file_id'] for bg in descendants_of_unhidden_specimens['biospecimen-genomic-files'].values() ]:
print(k)

for bg in descendants_of_unhidden_specimens['biospecimen-genomic-files'].values():
if bg['biospecimen_id'] not in unhidden_specimens.keys():
print(bg['biospecimen_id'])
"""
Rule: If a biospecimen is hidden in the dataservice, its descendants
should also be hidden.
"""
Expand Down