-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
DEV-132 add deferred response reader
This allows us to build all of the requests and pass them to the library for aggregation, but defer the actual network request until the library is ready to read the file. By deferring the requests, we don't need to pre-download all of the files before aggregating them.
- Loading branch information
1 parent
5c21680
commit 31cb4e3
Showing
4 changed files
with
157 additions
and
56 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
import hashlib | ||
import io | ||
from typing import Callable, Optional | ||
|
||
import requests | ||
|
||
ResponseProvider = Callable[[], requests.Response] | ||
|
||
|
||
class DeferredRequestReader(io.BufferedIOBase): | ||
"""Defer a request until the caller is ready to read the response. | ||
Attributes: | ||
provider: A function that returns a response object. | ||
md5sum: An optional md5 digest in hex format. | ||
""" | ||
|
||
def __init__(self, provider: ResponseProvider, md5sum: Optional[str] = None): | ||
self._provider = provider | ||
self._md5sum = md5sum | ||
|
||
self._response = None | ||
self._content_position = 0 | ||
self._content_length = 0 | ||
|
||
def _realize(self): | ||
"""Realize the response.""" | ||
if self._response: | ||
return | ||
|
||
response = self._provider() | ||
response.raise_for_status() | ||
self._validate_checksum(response.content) | ||
|
||
self._content_position = 0 | ||
self._content_length = len(response.content) | ||
self._response = response | ||
|
||
def _validate_checksum(self, content): | ||
if not self._md5sum: | ||
return | ||
|
||
hash_md5 = hashlib.md5() | ||
hash_md5.update(content) | ||
md5 = hash_md5.hexdigest() | ||
if self._md5sum != md5: | ||
raise ValueError(f"Failed checksum. Expected {self._md5sum}. Got {md5}.") | ||
|
||
@property | ||
def response(self) -> requests.Response: | ||
self._realize() | ||
return self._response | ||
|
||
def readable(self): | ||
return True | ||
|
||
def read(self, size=-1): | ||
"""Read from the response.""" | ||
self._realize() | ||
|
||
if self._content_position >= self._content_length: | ||
return b"" | ||
|
||
if size == -1: | ||
start = self._content_position | ||
self._content_position = self._content_length | ||
return self.response.content[start:] | ||
|
||
if size == 0: | ||
return b"" | ||
|
||
start = self._content_position | ||
end = start + size | ||
self._content_position = end | ||
return self.response.content[start:end] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
import pytest | ||
import requests | ||
|
||
from gdc_maf_tool import defer | ||
|
||
|
||
def test_deferredrequestreader__read(): | ||
class FakeResponse(requests.Response): | ||
@property | ||
def content(self): | ||
return b"one\ntwo\nthree" | ||
|
||
def raise_for_status(self): | ||
return | ||
|
||
def provider(): | ||
return FakeResponse() | ||
|
||
reader = defer.DeferredRequestReader(provider) | ||
lines = [line for line in reader] | ||
assert lines == [b"one\n", b"two\n", b"three"] | ||
|
||
|
||
def test_deferredrequestreader__failed_request(): | ||
class FakeResponse(requests.Response): | ||
def raise_for_status(self): | ||
raise requests.HTTPError() | ||
|
||
def provider(): | ||
return FakeResponse() | ||
|
||
with pytest.raises(requests.HTTPError): | ||
reader = defer.DeferredRequestReader(provider) | ||
reader.read() | ||
|
||
|
||
def test_deferredrequestreader__md5_match(): | ||
class FakeResponse(requests.Response): | ||
@property | ||
def content(self): | ||
return b"md5_match\n" | ||
|
||
def raise_for_status(self): | ||
return | ||
|
||
def provider(): | ||
return FakeResponse() | ||
|
||
reader = defer.DeferredRequestReader(provider, "d8ab26d704d5d89a5356609ec42c2691") | ||
assert reader.read() == b"md5_match\n" | ||
|
||
|
||
def test_deferredrequestreader__md5_mismatch(): | ||
class FakeResponse(requests.Response): | ||
@property | ||
def content(self): | ||
return b"md5_mismatch\n" | ||
|
||
def raise_for_status(self): | ||
return | ||
|
||
def provider(): | ||
return FakeResponse() | ||
|
||
with pytest.raises(ValueError): | ||
reader = defer.DeferredRequestReader( | ||
provider, "d8ab26d704d5d89a5356609ec42c2691" | ||
) | ||
reader.read() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters