From bd60dc3e33d230bda1512891da68eb753164f982 Mon Sep 17 00:00:00 2001 From: Lana Date: Wed, 23 Oct 2024 10:54:30 -0400 Subject: [PATCH] feat(pagination-Bundle): remove _iter()__ form Bundle class Simplify url sanitization, adjust testse Replace redundant code on perform() Add tests perform-iter and perform_resources_iter via Response --- fhir-parser | 2 +- fhirclient/_utils.py | 40 +- fhirclient/models/bundle.py | 3 - fhirclient/models/fhirsearch.py | 11 +- pyproject.toml | 1 + tests/models/bundle_iterator_test.py | 22 - tests/models/fhirsearch_perform_iter_test.py | 414 ++++++++++--------- tests/utils_pagination_test.py | 15 +- 8 files changed, 253 insertions(+), 255 deletions(-) delete mode 100644 tests/models/bundle_iterator_test.py diff --git a/fhir-parser b/fhir-parser index daf47105..5d7d8dca 160000 --- a/fhir-parser +++ b/fhir-parser @@ -1 +1 @@ -Subproject commit daf471059892f117e98387426c3ae723a56e332c +Subproject commit 5d7d8dcacecb988998733be5c5d84b25c711cc83 diff --git a/fhirclient/_utils.py b/fhirclient/_utils.py index 2ca8a4a8..f77b855b 100644 --- a/fhirclient/_utils.py +++ b/fhirclient/_utils.py @@ -20,8 +20,7 @@ def _fetch_next_page(bundle: 'Bundle') -> Optional['Bundle']: Returns: Optional[Bundle]: The next page of results as a FHIR Bundle, or None if no "next" link is found. """ - next_link = _get_next_link(bundle) - if next_link: + if next_link := _get_next_link(bundle): sanitized_next_link = _sanitize_next_link(next_link) return _execute_pagination_request(sanitized_next_link) return None @@ -42,23 +41,28 @@ def _get_next_link(bundle: 'Bundle') -> Optional[str]: for link in bundle.link: if link.relation == "next": - return link.url + return _sanitize_next_link(link.url) return None def _sanitize_next_link(next_link: str) -> str: """ - Sanitize the `next` link to ensure it is safe to use. + Sanitize the `next` link by validating its scheme and hostname against the origin server. + + This function ensures the `next` link URL uses a valid scheme (`http` or `https`) and that it contains a + hostname. This provides a basic safeguard against malformed URLs without overly restricting flexibility. Args: next_link (str): The raw `next` link URL. + origin_server (str): The expected origin server hostname. Returns: - str: The sanitized URL. + str: The validated URL. Raises: - ValueError: If the URL scheme or domain is invalid. + ValueError: If the URL's scheme is not `http` or `https`, or if the hostname does not match the origin server. """ + parsed_url = urllib.parse.urlparse(next_link) # Validate scheme and netloc (domain) @@ -67,26 +71,10 @@ def _sanitize_next_link(next_link: str) -> str: if not parsed_url.netloc: raise ValueError("Invalid URL domain in `next` link.") - # Additional sanitization if necessary, e.g., removing dangerous query parameters - query_params = urllib.parse.parse_qs(parsed_url.query) - sanitized_query = {k: v for k, v in query_params.items()} - - # Rebuild the sanitized URL - sanitized_url = urllib.parse.urlunparse( - ( - parsed_url.scheme, - parsed_url.netloc, - parsed_url.path, - parsed_url.params, - urllib.parse.urlencode(sanitized_query, doseq=True), - parsed_url.fragment, - ) - ) - - return sanitized_url + return next_link -def _execute_pagination_request(sanitized_url: str) -> Optional['Bundle']: +def _execute_pagination_request(sanitized_url: str) -> 'Bundle': """ Execute the request to retrieve the next page using the sanitized URL. @@ -110,9 +98,9 @@ def _execute_pagination_request(sanitized_url: str) -> Optional['Bundle']: return next_bundle - except requests.exceptions.HTTPError as e: + except requests.exceptions.HTTPError: # Handle specific HTTP errors as needed, possibly including retry logic - raise e + raise def iter_pages(first_bundle: 'Bundle') -> Iterable['Bundle']: diff --git a/fhirclient/models/bundle.py b/fhirclient/models/bundle.py index 715b5eab..a8652197 100644 --- a/fhirclient/models/bundle.py +++ b/fhirclient/models/bundle.py @@ -51,9 +51,6 @@ def __init__(self, jsondict=None, strict=True): super(Bundle, self).__init__(jsondict=jsondict, strict=strict) - def __iter__(self): - return iter(self.entry or []) - def elementProperties(self): js = super(Bundle, self).elementProperties() js.extend([ diff --git a/fhirclient/models/fhirsearch.py b/fhirclient/models/fhirsearch.py index 8343c6f7..95f0db6b 100644 --- a/fhirclient/models/fhirsearch.py +++ b/fhirclient/models/fhirsearch.py @@ -133,12 +133,9 @@ def perform(self, server) -> 'Bundle': if server is None: raise Exception("Need a server to perform search") - - from . import bundle as bundle_module - res = server.request_json(self.construct()) - bundle = bundle_module.Bundle(res) - bundle.origin_server = server - return bundle + + from .bundle import Bundle + return Bundle.read_from(self.construct(), server) # Use forward references to avoid circular imports def perform_iter(self, server) -> Iterator['Bundle']: @@ -174,7 +171,7 @@ def perform_resources_iter(self, server) -> Iterator['Resource']: :returns: An iterator of Resource instances """ for bundle in self.perform_iter(server): - for entry in bundle: + for entry in bundle.entry: yield entry.resource diff --git a/pyproject.toml b/pyproject.toml index c90286a9..d18cfc92 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,4 +35,5 @@ testpaths = "tests" tests = [ "pytest >= 2.5", "pytest-cov", + "responses", ] diff --git a/tests/models/bundle_iterator_test.py b/tests/models/bundle_iterator_test.py deleted file mode 100644 index 80408633..00000000 --- a/tests/models/bundle_iterator_test.py +++ /dev/null @@ -1,22 +0,0 @@ -import unittest -from unittest.mock import MagicMock - -from fhirclient.models.bundle import Bundle - - -class TestBundleIterator(unittest.TestCase): - - def test_bundle_iter_and_next(self): - entry1 = MagicMock() - entry2 = MagicMock() - bundle = Bundle() - bundle.entry = [entry1, entry2] - - iterator = iter(bundle) - first_entry = next(iterator) - second_entry = next(iterator) - - self.assertEqual(first_entry, entry1) - self.assertEqual(second_entry, entry2) - with self.assertRaises(StopIteration): - next(iterator) diff --git a/tests/models/fhirsearch_perform_iter_test.py b/tests/models/fhirsearch_perform_iter_test.py index 0d9212be..dc780e93 100644 --- a/tests/models/fhirsearch_perform_iter_test.py +++ b/tests/models/fhirsearch_perform_iter_test.py @@ -1,224 +1,252 @@ -import io -import json -import os import unittest -from unittest.mock import MagicMock, patch - -import fhirclient +import requests +import responses from fhirclient import server -from fhirclient.models import bundle -from fhirclient.models.fhirsearch import FHIRSearch -from fhirclient.models.bundle import Bundle, BundleEntry +from fhirclient.models.fhirsearch import FHIRSearch, FHIRSearchParam +from fhirclient.models.bundle import Bundle class TestFHIRSearchIter(unittest.TestCase): def setUp(self): - # self.mock_server = MockServer(tmpdir=os.path.join(os.path.dirname(__file__), '..', 'data', 'examples')) - self.mock_server = MagicMock() self.search = FHIRSearch(resource_type=Bundle) - self.mock_bundle = self.instantiate_from('bundle-example.json') - self.mock_next_bundle = self.instantiate_from('bundle-example-page2.json') - - def instantiate_from(self, filename): - # Construct the path to the example file - datadir = os.path.join(os.path.dirname(__file__), '..', 'data', 'examples') - filepath = os.path.join(datadir, filename) + self.search.params = [ + FHIRSearchParam(name="patient", value="347"), + FHIRSearchParam(name="_count", value="1") + ] + self.mock_server = MockServer("https://example.com") + + def create_bundle_entry(self, resource_id): + """Helper to create a Bundle entry with a specific resource ID.""" + return { + "fullUrl": f"https://example.com/base/MedicationRequest/{resource_id}", + "resource": { + "resourceType": "MedicationRequest", + "id": resource_id, + "subject": {"reference": "Patient/347"}, + "intent": "order", + "status": "unknown", + "medicationReference": {"reference": "Medication/example"} + } + } - # Read the JSON file and return a Bundle instance - with io.open(filepath, 'r', encoding='utf-8') as handle: - js = json.load(handle) + def add_mock_response(self, url, bundle_content): + """Helper to set up a mock response for the given URL and bundle content.""" + responses.add( + responses.GET, + url, + json=bundle_content, + status=200 + ) + + @responses.activate + def test_perform_iter_single_bundle(self): + # Mock the network response for the initial search request + bundle_content = { + "resourceType": "Bundle", + "type": "searchset", + "entry": [self.create_bundle_entry("3123")] + } - self.assertEqual("Bundle", js["resourceType"], "The resourceType is not 'Bundle'.") - return Bundle(js) + # Mock the single page response + self.add_mock_response("https://example.com/Bundle?patient=347&_count=1", bundle_content) - @patch('fhirclient._utils._fetch_next_page') - @patch('fhirclient._utils.iter_pages') - @patch('fhirclient.models.fhirsearch.FHIRSearch.perform') - def test_perform_iter_single_bundle(self, mock_perform, mock_iter_pages, mock_fetch_next_page): - self.mock_bundle = self.instantiate_from('bundle-example-page2.json') - mock_perform.return_value = self.mock_bundle - mock_iter_pages.side_effect = iter([self.mock_bundle]) - mock_fetch_next_page.side_effect = [None] + # Call perform_iter with the server URL + result = list(self.search.perform_iter(self.mock_server)) - result = self.search.perform_iter(self.mock_server) - bundles = list(result) + self.assertEqual(len(result), 1) + self.assertEqual(result[0].entry[0].resource.id, "3123") + + @responses.activate + def test_perform_iter_pagination(self): + bundle_page_1 = { + "resourceType": "Bundle", + "type": "searchset", + "link": [ + {"relation": "next", + "url": "https://example.com/base/MedicationRequest?patient=347&searchId=ff15fd40-ff71-4b48-b366-09c706bed9d0&page=2"} + ], + "entry": [self.create_bundle_entry("3123")] + } - self.assertEqual(len(bundles), 1, f"Expected 1 bundle but got {len(bundles)}") - self.assertEqual(bundles[0], self.mock_bundle) + # Second page bundle without a "next" link + bundle_page_2 = { + "resourceType": "Bundle", + "type": "searchset", + "entry": [self.create_bundle_entry("3124")] + } - mock_perform.assert_called_once_with(self.mock_server) + # Mock both page responses + self.add_mock_response("https://example.com/Bundle?patient=347&_count=1", bundle_page_1) + self.add_mock_response( + "https://example.com/base/MedicationRequest?patient=347&searchId=ff15fd40-ff71-4b48-b366-09c706bed9d0&page=2", + bundle_page_2) + # Execute perform_iter to iterate over both pages + result = list(self.search.perform_iter(self.mock_server)) - @patch('fhirclient._utils._fetch_next_page') - @patch('fhirclient._utils.iter_pages') - @patch('fhirclient.models.fhirsearch.FHIRSearch.perform') - def test_perform_iter(self, mock_perform, mock_iter_pages, mock_fetch_next_page): - self.mock_bundle = self.instantiate_from('bundle-example.json') - self.mock_next_bundle = self.instantiate_from('bundle-example-page2.json') + self.assertEqual(len(result), 2) # Expect two pages of results + self.assertEqual(result[0].entry[0].resource.id, "3123") + self.assertEqual(result[1].entry[0].resource.id, "3124") + + @responses.activate + def test_perform_iter_empty_bundle(self): + # Mock an empty Bundle with no entries + empty_bundle = { + "resourceType": "Bundle", + "type": "searchset", + "entry": [] + } + self.add_mock_response("https://example.com/Bundle?patient=347&_count=1", empty_bundle) - # Set up mock_perform to return the first bundle - mock_perform.return_value = self.mock_bundle - mock_iter_pages.side_effect = iter([self.mock_bundle, self.mock_next_bundle]) + # Execute perform_iter with empty result + result = list(self.search.perform_iter(self.mock_server)) - # Make _fetch_next_page return the next bundle and then stop - mock_fetch_next_page.side_effect = [self.mock_next_bundle, None] + # Assertion: Should return an empty list + self.assertEqual(len(result), 1) + self.assertEqual(len(result[0].entry), 0) + + @responses.activate + def test_perform_iter_multiple_pages(self): + # First page with a "next" link + bundle_page_1 = { + "resourceType": "Bundle", + "type": "searchset", + "link": [ + {"relation": "next", "url": "https://example.com/base/MedicationRequest?patient=347&page=2"} + ], + "entry": [self.create_bundle_entry("3123")] + } - result = self.search.perform_iter(self.mock_server) - bundles = list(result) + # Second page with a "next" link + bundle_page_2 = { + "resourceType": "Bundle", + "type": "searchset", + "link": [ + {"relation": "next", "url": "https://example.com/base/MedicationRequest?patient=347&page=3"} + ], + "entry": [self.create_bundle_entry("3124")] + } - self.assertEqual(len(bundles), 2, f"Expected 2 bundles but got {len(bundles)}") - self.assertEqual(bundles[0], self.mock_bundle) - self.assertEqual(bundles[1], self.mock_next_bundle) + # Third page without a "next" link (end of pagination) + bundle_page_3 = { + "resourceType": "Bundle", + "type": "searchset", + "entry": [self.create_bundle_entry("3125")] + } - mock_perform.assert_called_once_with(self.mock_server) - mock_fetch_next_page.assert_called() + # Mock all page responses + self.add_mock_response("https://example.com/Bundle?patient=347&_count=1", bundle_page_1) + self.add_mock_response("https://example.com/base/MedicationRequest?patient=347&page=2", bundle_page_2) + self.add_mock_response("https://example.com/base/MedicationRequest?patient=347&page=3", bundle_page_3) - def test_perform_iter_no_first_bundle(self): - self.search.perform = MagicMock(return_value=None) + # Execute perform_iter to iterate over all pages result = list(self.search.perform_iter(self.mock_server)) - self.assertEqual(result, []) - - @patch('fhirclient._utils.iter_pages') - @patch('requests.get') # Mock requests.get to avoid actual HTTP calls - def test_perform_resources_iter_single_page(self, mock_iter_pages, mock_get): - - # Test the case where there is only a single page in the bundle (no pagination). - - # Manually create valid FHIR resources - medication_request_resource = { - 'resourceType': 'MedicationRequest', - 'id': '3123', - 'subject': { - 'reference': 'Patient/347' - }, - 'intent': 'order', - 'status': 'unknown', - 'medicationReference': { - 'reference': 'Medication/example' - } + + self.assertEqual(len(result), 3) # Expect three pages of results + self.assertEqual(result[0].entry[0].resource.id, "3123") + self.assertEqual(result[1].entry[0].resource.id, "3124") + self.assertEqual(result[2].entry[0].resource.id, "3125") + + @responses.activate + def test_perform_resources_iter_single_page(self): + # Single page bundle with one entry + bundle_content = { + "resourceType": "Bundle", + "type": "searchset", + "entry": [self.create_bundle_entry("3123")] } - # Create a valid BundleEntry with the MedicationRequest resource - mock_entry1 = BundleEntry({ - 'fullUrl': 'https://example.com/base/MedicationRequest/3123', - 'resource': medication_request_resource - }) - - # Create a valid Bundle - mock_bundle = Bundle({ - 'resourceType': 'Bundle', - 'type': 'searchset', # Required field - 'entry': [ - {'fullUrl': 'https://example.com/base/MedicationRequest/3123', 'resource': medication_request_resource} - ] - }) - - # Mock the behavior of `perform` to return a bundle with entries - self.search.perform = MagicMock(return_value=mock_bundle) - - # Mock `iter_pages` to return a single page (mock bundle) - mock_iter_pages.return_value = iter([mock_bundle]) - - # Ensure `requests.get` does not actually run - mock_get.return_value.status_code = 200 - mock_get.return_value.json.return_value = {'resourceType': 'Bundle'} - - resource_iter = self.search.perform_resources_iter(self.mock_server) - resources = list(resource_iter) - - self.assertEqual(len(resources), len(mock_bundle.entry)) - self.assertTrue(all(isinstance(entry, BundleEntry) for entry in mock_bundle.entry)) - - @patch('fhirclient._utils.iter_pages') - def test_perform_resources_iter_multiple_pages(self, mock_iter_pages): - # Create first bundle with one entry and a next link - mock_bundle_page1 = Bundle({ - 'resourceType': 'Bundle', - 'type': 'searchset', - 'entry': [ - {'fullUrl': 'https://example.com/base/MedicationRequest/3123', - 'resource': { - 'resourceType': 'MedicationRequest', - 'id': '3123', - 'subject': { - 'reference': 'Patient/347' - }, - 'intent': 'order', - 'status': 'unknown', - 'medicationReference': { - 'reference': 'Medication/example' - } - }} + # Mock the single page response + self.add_mock_response("https://example.com/Bundle?patient=347&_count=1", bundle_content) + + # Call perform_resources_iter and collect resources + result = list(self.search.perform_resources_iter(self.mock_server)) + + self.assertEqual(len(result), 1) + self.assertEqual(result[0].id, "3123") + + @responses.activate + def test_perform_resources_iter_pagination(self): + # First page bundle with a "next" link + bundle_page_1 = { + "resourceType": "Bundle", + "type": "searchset", + "link": [ + {"relation": "next", + "url": "https://example.com/base/MedicationRequest?patient=347&searchId=ff15fd40-ff71-4b48-b366-09c706bed9d0&page=2"} ], - 'link': [ - { - 'relation': 'next', - 'url': 'https://example.com/base/MedicationRequest?page=2' # Simulating the next page link - } - ] - }) - - # Create second bundle with another entry - mock_bundle_page2 = Bundle({ - 'resourceType': 'Bundle', - 'type': 'searchset', - 'entry': [ - {'fullUrl': 'https://example.com/base/MedicationRequest/3124', - 'resource': { - 'resourceType': 'MedicationRequest', - 'id': '3124', - 'subject': { - 'reference': 'Patient/348' - }, - 'intent': 'order', - 'status': 'unknown', - 'medicationReference': { - 'reference': 'Medication/example2' - } - }} - ] - }) - - # Directly mock requests.get to return a proper response - mock_response = MagicMock() - mock_response.raise_for_status = MagicMock() # Simulate that there is no HTTP error - mock_response.json.return_value = mock_bundle_page2.as_json() # Return the second bundle's JSON - - # Now override the requests.get behavior - with patch('requests.get', return_value=mock_response): - # Mock perform to return the first page bundle - self.search.perform = MagicMock(return_value=mock_bundle_page1) - - # Mock iter_pages to return both bundles - mock_iter_pages.return_value = iter([mock_bundle_page1, mock_bundle_page2]) - - # Execute the method to get resources - resource_iter = self.search.perform_resources_iter(self.mock_server) - resources = list(resource_iter) - - # Ensure that both resources from both pages are included - self.assertEqual(len(resources), 2) # Expect 2 resources, one per page - - # Ensure that the entries are from the correct pages - self.assertEqual(resources[0].id, '3123') - self.assertEqual(resources[1].id, '3124') - - # Ensure that all resources are correctly typed - self.assertTrue(all( - isinstance(resource, fhirclient.models.medicationrequest.MedicationRequest) for resource in resources)) + "entry": [self.create_bundle_entry("3123")] + } + # Second page bundle without a "next" link + bundle_page_2 = { + "resourceType": "Bundle", + "type": "searchset", + "entry": [self.create_bundle_entry("3124")] + } -class MockServer(server.FHIRServer): - """ Reads local files. - """ + # Mock both page responses + self.add_mock_response("https://example.com/Bundle?patient=347&_count=1", bundle_page_1) + self.add_mock_response( + "https://example.com/base/MedicationRequest?patient=347&searchId=ff15fd40-ff71-4b48-b366-09c706bed9d0&page=2", + bundle_page_2) + + # Execute perform_resources_iter to retrieve resources across pages + result = list(self.search.perform_resources_iter(self.mock_server)) + + self.assertEqual(len(result), 2) # Expect resources from both pages + self.assertEqual(result[0].id, "3123") # First page resource + self.assertEqual(result[1].id, "3124") # Second page resource + + @responses.activate + def test_perform_resources_iter_multiple_pages(self): + # Mock similar pagination for perform_resources_iter + bundle_page_1 = { + "resourceType": "Bundle", + "type": "searchset", + "link": [ + {"relation": "next", "url": "https://example.com/base/MedicationRequest?patient=347&page=2"} + ], + "entry": [self.create_bundle_entry("3123")] + } + + bundle_page_2 = { + "resourceType": "Bundle", + "type": "searchset", + "link": [ + {"relation": "next", "url": "https://example.com/base/MedicationRequest?patient=347&page=3"} + ], + "entry": [self.create_bundle_entry("3124")] + } + + bundle_page_3 = { + "resourceType": "Bundle", + "type": "searchset", + "entry": [self.create_bundle_entry("3125")] + } + + # Mock responses for each page + self.add_mock_response("https://example.com/Bundle?patient=347&_count=1", bundle_page_1) + self.add_mock_response("https://example.com/base/MedicationRequest?patient=347&page=2", bundle_page_2) + self.add_mock_response("https://example.com/base/MedicationRequest?patient=347&page=3", bundle_page_3) - def __init__(self, tmpdir: str): - super().__init__(None, base_uri='https://fhir.smarthealthit.org') - self.directory = tmpdir + # Execute perform_resources_iter to retrieve resources across all pages + result = list(self.search.perform_resources_iter(self.mock_server)) + + self.assertEqual(len(result), 3) # Expect resources from three pages + self.assertEqual(result[0].id, "3123") + self.assertEqual(result[1].id, "3124") + self.assertEqual(result[2].id, "3125") + + +# Network-level Mocking +class MockServer(server.FHIRServer): + def __init__(self, base_url): + super().__init__(None, base_uri=base_url) - def request_json(self, path, nosign=False): - assert path - with io.open(os.path.join(self.directory, path), encoding='utf-8') as handle: - return json.load(handle) + def request_json(self, path): + full_url = f"{self.base_uri.rstrip('/')}/{path.lstrip('/')}" + response = requests.get(full_url) + return response.json() if response else None diff --git a/tests/utils_pagination_test.py b/tests/utils_pagination_test.py index f1f3c042..611847ec 100644 --- a/tests/utils_pagination_test.py +++ b/tests/utils_pagination_test.py @@ -24,8 +24,12 @@ def instantiate_from(self, filename): def test_get_next_link(self): inst = self.instantiate_from("bundle-example.json") self.assertIsNotNone(inst, "Must have instantiated a Bundle instance") + + # The returned URL should be the sanitized version next_link = _get_next_link(inst) - self.assertEqual(next_link, "https://example.com/base/MedicationRequest?patient=347&searchId=ff15fd40-ff71-4b48-b366-09c706bed9d0&page=2") + sanitized_expected_url = "https://example.com/base/MedicationRequest?patient=347&searchId=ff15fd40-ff71-4b48-b366-09c706bed9d0&page=2" + + self.assertEqual(next_link, sanitized_expected_url) def test_get_next_link_no_next(self): bundle_without_next = { @@ -47,8 +51,13 @@ def test_sanitize_next_link_invalid_scheme(self): with self.assertRaises(ValueError): _sanitize_next_link(next_link) - def test_sanitize_next_link_invalid_domain(self): - next_link = "https:///example.com/base/MedicationRequest?patient=347&searchId=ff15fd40-ff71-4b48-b366-09c706bed9d0&page=2" + def test_sanitize_next_link_missing_scheme(self): + next_link = "example.com/base/MedicationRequest?patient=347&page=2" + with self.assertRaises(ValueError): + _sanitize_next_link(next_link) + + def test_sanitize_next_link_missing_netloc(self): + next_link = "https:///MedicationRequest?page=2" with self.assertRaises(ValueError): _sanitize_next_link(next_link)