Skip to content

Commit

Permalink
🐛 Don't filter out authz = [] patches when testing
Browse files Browse the repository at this point in the history
After implementing new rules which set authz = [] some tests fail
This is because values get filtered out of patches if they are equal to
what is already in dataservice (avoids extra API calls).

The authz field will always be = [] in dataservice during tests since it is
not connected to indexd. That means any GF patches which include authz =
[] will be filtered out causing tests to fail
  • Loading branch information
znatty22 committed Oct 7, 2023
1 parent 95c3206 commit a20adf7
Show file tree
Hide file tree
Showing 3 changed files with 255 additions and 204 deletions.
27 changes: 24 additions & 3 deletions kf_update_dbgap_consent/sample_status.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
"""
from pprint import pprint
from collections import defaultdict
from concurrent.futures import ThreadPoolExecutor, as_completed

Expand All @@ -63,6 +64,11 @@
from kf_utils.dbgap.release import get_latest_sample_status


def is_localhost(url):
hosts = {"localhost", "127.0.0.1"}
return any(hostname in url for hostname in hosts)


class ConsentProcessor:
def __init__(self, api_url, db_url=None):
self.api_url = api_url
Expand Down Expand Up @@ -96,7 +102,7 @@ def get_patches_for_study(
print(f"Found accession ID: {study_phs}")
open_acl = {"/open"}
empty_acl = set()
default_acl = empty_acl
default_acl = [study_id]
alerts = []
patches = defaultdict(lambda: defaultdict(dict))

Expand Down Expand Up @@ -170,6 +176,9 @@ def entities_dict(endpoint, filt):
hidden_genomic_files = set(
k for k, e in storage["genomic-files"].items() if not e["visible"]
)
print("**************")
for entity, entities in storage.items():
print(f"*** {entity} count: {len(entities)}")

"""
Rule: For all samples in the sample status file which are not found in
Expand Down Expand Up @@ -310,8 +319,18 @@ def entities_dict(endpoint, filt):
)

# remove known unneeded patches
def cmp(a, b, field_name):
# Values get filtered out if they are equal to what
# is already in dataservice.
# This matters for the authz field bc it will always
# be equal to [] since local dataservice is not connected to
# indexd. Therefore when we try to patch a GF with
# authz = [], this will get filtered out and
# tests will fail
# So when testing with localhost we force a patch with authz
if field_name == "authz" and is_localhost(self.api_url):
return False

def cmp(a, b):
if isinstance(a, list) and isinstance(b, list):
return sorted(a) == sorted(b)
else:
Expand All @@ -326,7 +345,7 @@ def cmp(a, b):
(endpoint in storage)
and (kfid in storage[endpoint])
and (k in storage[endpoint][kfid])
and (cmp(storage[endpoint][kfid][k], v))
and cmp(storage[endpoint][kfid][k], v, k)
)
}
for kfid, patch in ep_patches.items()
Expand All @@ -338,6 +357,8 @@ def cmp(a, b):
for endpoint, ep_patches in patches.items()
}
patches = {k: v for k, v in patches.items() if v}

# from pprint import pprint
# breakpoint()

return patches, alerts
280 changes: 157 additions & 123 deletions tests/data/phs999999_dataservice.json
Original file line number Diff line number Diff line change
@@ -1,126 +1,160 @@
{
"studies": {
"SD_00000000": {
"data_access_authority": "dbGaP",
"external_id": "phs999999"
}
},
"participants": {
"PT_11111111": {
"study_id": "SD_00000000",
"external_id": "test_subject_1"
},
"PT_22222222": {
"study_id": "SD_00000000",
"external_id": "test_subject_2"
}
},
"sequencing-centers": {
"SC_11111111": {
"name": "test_center"
}
},
"biospecimens": {
"BS_11111111": {
"participant_id": "PT_11111111",
"external_sample_id": "test_sample_1",
"sequencing_center_id": "SC_11111111",
"analyte_type": "DNA"
},
"BS_22222222": {
"participant_id": "PT_22222222",
"external_sample_id": "test_sample_2",
"sequencing_center_id": "SC_11111111",
"analyte_type": "DNA"
},
"BS_33333333": {
"participant_id": "PT_22222222",
"external_sample_id": "test_sample_3",
"sequencing_center_id": "SC_11111111",
"analyte_type": "DNA"
}
},
"diagnoses": {
"DG_11111111": {"participant_id": "PT_11111111"},
"DG_22222222": {"participant_id": "PT_22222222"}
},
"biospecimen-diagnoses": {
"BD_11111111": {
"biospecimen_id": "BS_11111111",
"diagnosis_id": "DG_11111111"
},
"BD_22222222": {
"biospecimen_id": "BS_22222222",
"diagnosis_id": "DG_22222222"
},
"BD_33333333": {
"biospecimen_id": "BS_33333333",
"diagnosis_id": "DG_22222222"
}
},
"genomic-files": {
"GF_00000000": {"hashes": {}, "size": 1, "urls": [], "controlled_access": false},
"GF_11111111": {"hashes": {}, "size": 1, "urls": [], "controlled_access": true},
"GF_22222222": {"hashes": {}, "size": 1, "urls": [], "controlled_access": true},
"GF_33333333": {"hashes": {}, "size": 1, "urls": [], "controlled_access": true},
"GF_44444444": {"hashes": {}, "size": 1, "urls": [], "controlled_access": false}
},
"biospecimen-genomic-files": {
"BG_00000000": {
"biospecimen_id": "BS_11111111",
"genomic_file_id": "GF_00000000"
},
"BG_11111111": {
"biospecimen_id": "BS_11111111",
"genomic_file_id": "GF_11111111"
},
"BG_22222222": {
"biospecimen_id": "BS_22222222",
"genomic_file_id": "GF_22222222"
},
"BG_33333333": {
"biospecimen_id": "BS_33333333",
"genomic_file_id": "GF_33333333"
},
"BG_44444444": {
"biospecimen_id": "BS_33333333",
"genomic_file_id": "GF_44444444"
}
},
"sequencing-experiments": {
"SE_11111111": {
"external_id": "SE_11111111", "sequencing_center_id": "SC_11111111",
"is_paired_end": false, "experiment_strategy": "WGS", "platform": "Not Applicable"
},
"SE_22222222": {
"external_id": "SE_22222222", "sequencing_center_id": "SC_11111111",
"is_paired_end": false, "experiment_strategy": "WGS", "platform": "Not Applicable"
},
"SE_33333333": {
"external_id": "SE_33333333", "sequencing_center_id": "SC_11111111",
"is_paired_end": false, "experiment_strategy": "WGS", "platform": "Not Applicable"
}
},
"sequencing-experiment-genomic-files": {
"SG_00000000": {
"sequencing_experiment_id": "SE_11111111",
"genomic_file_id": "GF_00000000"
},
"SG_11111111": {
"sequencing_experiment_id": "SE_11111111",
"genomic_file_id": "GF_11111111"
},
"SG_22222222": {
"sequencing_experiment_id": "SE_22222222",
"genomic_file_id": "GF_22222222"
},
"SG_33333333": {
"sequencing_experiment_id": "SE_33333333",
"genomic_file_id": "GF_33333333"
},
"SG_44444444": {
"sequencing_experiment_id": "SE_33333333",
"genomic_file_id": "GF_44444444"
}
"studies": {
"SD_00000000": {
"data_access_authority": "dbGaP",
"external_id": "phs999999"
}
},
"participants": {
"PT_11111111": {
"study_id": "SD_00000000",
"external_id": "test_subject_1"
},
"PT_22222222": {
"study_id": "SD_00000000",
"external_id": "test_subject_2"
}
},
"sequencing-centers": {
"SC_11111111": {
"name": "test_center"
}
},
"biospecimens": {
"BS_11111111": {
"participant_id": "PT_11111111",
"external_sample_id": "test_sample_1",
"sequencing_center_id": "SC_11111111",
"analyte_type": "DNA"
},
"BS_22222222": {
"participant_id": "PT_22222222",
"external_sample_id": "test_sample_2",
"sequencing_center_id": "SC_11111111",
"analyte_type": "DNA"
},
"BS_33333333": {
"participant_id": "PT_22222222",
"external_sample_id": "test_sample_3",
"sequencing_center_id": "SC_11111111",
"analyte_type": "DNA"
}
},
"diagnoses": {
"DG_11111111": { "participant_id": "PT_11111111" },
"DG_22222222": { "participant_id": "PT_22222222" }
},
"biospecimen-diagnoses": {
"BD_11111111": {
"biospecimen_id": "BS_11111111",
"diagnosis_id": "DG_11111111"
},
"BD_22222222": {
"biospecimen_id": "BS_22222222",
"diagnosis_id": "DG_22222222"
},
"BD_33333333": {
"biospecimen_id": "BS_33333333",
"diagnosis_id": "DG_22222222"
}
},
"genomic-files": {
"GF_00000000": {
"hashes": {},
"size": 1,
"urls": [],
"controlled_access": false
},
"GF_11111111": {
"hashes": {},
"size": 1,
"urls": [],
"controlled_access": true
},
"GF_22222222": {
"hashes": {},
"size": 1,
"urls": [],
"controlled_access": true
},
"GF_33333333": {
"hashes": {},
"size": 1,
"urls": [],
"controlled_access": true
},
"GF_44444444": {
"hashes": {},
"size": 1,
"urls": [],
"controlled_access": false
}
},
"biospecimen-genomic-files": {
"BG_00000000": {
"biospecimen_id": "BS_11111111",
"genomic_file_id": "GF_00000000"
},
"BG_11111111": {
"biospecimen_id": "BS_11111111",
"genomic_file_id": "GF_11111111"
},
"BG_22222222": {
"biospecimen_id": "BS_22222222",
"genomic_file_id": "GF_22222222"
},
"BG_33333333": {
"biospecimen_id": "BS_33333333",
"genomic_file_id": "GF_33333333"
},
"BG_44444444": {
"biospecimen_id": "BS_33333333",
"genomic_file_id": "GF_44444444"
}
},
"sequencing-experiments": {
"SE_11111111": {
"external_id": "SE_11111111",
"sequencing_center_id": "SC_11111111",
"is_paired_end": false,
"experiment_strategy": "WGS",
"platform": "Not Applicable"
},
"SE_22222222": {
"external_id": "SE_22222222",
"sequencing_center_id": "SC_11111111",
"is_paired_end": false,
"experiment_strategy": "WGS",
"platform": "Not Applicable"
},
"SE_33333333": {
"external_id": "SE_33333333",
"sequencing_center_id": "SC_11111111",
"is_paired_end": false,
"experiment_strategy": "WGS",
"platform": "Not Applicable"
}
},
"sequencing-experiment-genomic-files": {
"SG_00000000": {
"sequencing_experiment_id": "SE_11111111",
"genomic_file_id": "GF_00000000"
},
"SG_11111111": {
"sequencing_experiment_id": "SE_11111111",
"genomic_file_id": "GF_11111111"
},
"SG_22222222": {
"sequencing_experiment_id": "SE_22222222",
"genomic_file_id": "GF_22222222"
},
"SG_33333333": {
"sequencing_experiment_id": "SE_33333333",
"genomic_file_id": "GF_33333333"
},
"SG_44444444": {
"sequencing_experiment_id": "SE_33333333",
"genomic_file_id": "GF_44444444"
}
}
}
Loading

0 comments on commit a20adf7

Please sign in to comment.