From 2f17a2d9754e8eab1ef37d14ef28ac78d49f2f96 Mon Sep 17 00:00:00 2001 From: jpsmith5 Date: Tue, 22 Oct 2019 08:53:44 -0400 Subject: [PATCH 01/16] initial allow for multiple servers --- refgenconf/const.py | 2 +- refgenconf/refgenconf.py | 277 ++++++++++++++++++++++++--------------- 2 files changed, 174 insertions(+), 105 deletions(-) diff --git a/refgenconf/const.py b/refgenconf/const.py index 72388771..ed396571 100644 --- a/refgenconf/const.py +++ b/refgenconf/const.py @@ -9,7 +9,7 @@ CFG_NAME = "genome configuration" CFG_ENV_VARS = ["REFGENIE"] CFG_CONST = ["CFG_NAME", "CFG_ENV_VARS"] -DEFAULT_SERVER = "http://refgenomes.databio.org" +DEFAULT_SERVER = ["http://refgenomes.databio.org"] API_VERSION = "v2" DEFAULT_TAG = "default" diff --git a/refgenconf/refgenconf.py b/refgenconf/refgenconf.py index 6db34528..33be6187 100755 --- a/refgenconf/refgenconf.py +++ b/refgenconf/refgenconf.py @@ -97,7 +97,15 @@ def __init__(self, filepath=None, entries=None, writable=False, wait_max=10): else: _LOGGER.debug("Config version is compliant: {}".format(version)) try: - self[CFG_SERVER_KEY] = self[CFG_SERVER_KEY].rstrip("/") + #_LOGGER.info("JPS self[CFG_SERVER_KEY]: {}".format(self[CFG_SERVER_KEY])) # JPS DEBUG + if isinstance(self[CFG_SERVER_KEY], list): + #_LOGGER.info("JPS is list()") # JPS DEBUG + tmp_list = [server_url.rstrip("/") for server_url in self[CFG_SERVER_KEY]] + self[CFG_SERVER_KEY] = tmp_list + #_LOGGER.info("JPS rstrip self[CFG_SERVER_KEY]: {}".format(self[CFG_SERVER_KEY])) # JPS DEBUG + else: + self[CFG_SERVER_KEY] = self[CFG_SERVER_KEY].rstrip("/") + self[CFG_SERVER_KEY] = [self[CFG_SERVER_KEY]] except KeyError: raise MissingConfigDataError(CFG_SERVER_KEY) @@ -344,6 +352,7 @@ def list_remote(self, genome=None, order=None, get_url=lambda server, id: constr genomes, assets = _list_remote(url, genome, order) return genomes, assets + def tag_asset(self, genome, asset, tag, new_tag): """ Retags the asset selected by the tag with the new_tag. @@ -466,97 +475,121 @@ def pull_asset(self, genome, asset, tag, unpack=True, force=None, def raise_unpack_error(): raise NotImplementedError("Option to not extract tarballs is not yet supported.") - tag = _download_json(get_json_url(self.genome_server, API_ID_DEFAULT_TAG).format(genome=genome, asset=asset)) \ - if tag is None else tag - _LOGGER.debug("Determined tag: '{}'".format(tag)) - unpack or raise_unpack_error() - - url_attrs = get_json_url(self.genome_server, API_ID_ASSET_ATTRS).format(genome=genome, asset=asset) - url_archive = get_json_url(self.genome_server, API_ID_ARCHIVE).format(genome=genome, asset=asset) - archive_data = _download_json(url_attrs, params={"tag": tag}) - - if sys.version_info[0] == 2: - archive_data = asciify_json_dict(archive_data) - gat = [genome, asset, tag] - # local directory that the asset data will be stored in - tag_dir = os.path.dirname(self.filepath(*gat)) - # local directory the downloaded archive will be temporarily saved in - genome_dir_path = os.path.join(self[CFG_FOLDER_KEY], genome) - # local path to the temporarily saved archive - filepath = os.path.join(genome_dir_path, asset + "__" + tag + ".tgz") - # check if the genome/asset:tag exists and get request user decision - if os.path.exists(tag_dir): - def preserve(): - _LOGGER.debug("Preserving existing: {}".format(tag_dir)) - return gat, None + # Expect list for following logic JPS DEBUG + #if not isinstance(self.genome_server, list): + # self.genome_server = [self.genome_server] + # _LOGGER.info("Should no longer arrive here?") # JPS DEBUG - def msg_overwrite(): - _LOGGER.debug("Overwriting: {}".format(tag_dir)) - shutil.rmtree(tag_dir) - if force is False: - return preserve() - elif force is None: - if not query_yes_no("Replace existing ({})?".format(tag_dir), "no"): + num_servers = 0 + for server_url in self.genome_server: + num_servers += 1 + #_LOGGER.info("self.genome_server: {}".format(self.genome_server)) # JPS DEBUG + #_LOGGER.info("server_url: {}".format(server_url)) # JPS DEBUG + try: + tag = _download_json(get_json_url(server_url, API_ID_DEFAULT_TAG).format(genome=genome, asset=asset)) \ + if tag is None else tag + _LOGGER.debug("Determined tag: '{}'".format(tag)) + #_LOGGER.info("Determined tag: '{}'".format(tag)) # JPS DEBUG + unpack or raise_unpack_error() + #_LOGGER.info("self.genome_server before resetting it: {}".format(self.genome_server)) # JPS DEBUG + self.genome_server = server_url + #_LOGGER.info("self.genome_server after resetting it: {}".format(self.genome_server)) # JPS DEBUG + except DownloadJsonError: + _LOGGER.warning("Could not retrieve json from {}".format(server_url)) # JPS DEBUG + if num_servers == len(self.genome_server): + _LOGGER.error("No servers [{}] responded to request for json".format(self.genome_server)) # JPS DEBUG + return [genome, asset, tag], None + else: + continue + + url_attrs = get_json_url(self.genome_server, API_ID_ASSET_ATTRS).format(genome=genome, asset=asset) + url_archive = get_json_url(self.genome_server, API_ID_ARCHIVE).format(genome=genome, asset=asset) + archive_data = _download_json(url_attrs, params={"tag": tag}) + + if sys.version_info[0] == 2: + archive_data = asciify_json_dict(archive_data) + gat = [genome, asset, tag] + # local directory that the asset data will be stored in + #_LOGGER.info("gat: '{}'".format(gat)) # JPS DEBUG + #_LOGGER.info("tag_dir: '{}'".format(os.path.dirname(self.filepath(*gat)))) # JPS DEBUG + tag_dir = os.path.dirname(self.filepath(*gat)) + # local directory the downloaded archive will be temporarily saved in + genome_dir_path = os.path.join(self[CFG_FOLDER_KEY], genome) + # local path to the temporarily saved archive + filepath = os.path.join(genome_dir_path, asset + "__" + tag + ".tgz") + # check if the genome/asset:tag exists and get request user decision + if os.path.exists(tag_dir): + def preserve(): + _LOGGER.debug("Preserving existing: {}".format(tag_dir)) + return gat, None + + def msg_overwrite(): + _LOGGER.debug("Overwriting: {}".format(tag_dir)) + shutil.rmtree(tag_dir) + if force is False: return preserve() + elif force is None: + if not query_yes_no("Replace existing ({})?".format(tag_dir), "no"): + return preserve() + else: + msg_overwrite() else: msg_overwrite() - else: - msg_overwrite() - # check asset digests local-server match for each parent - [self._chk_digest_if_avail(genome, x) for x in archive_data[CFG_ASSET_PARENTS_KEY] if CFG_ASSET_PARENTS_KEY in archive_data] + # check asset digests local-server match for each parent + [self._chk_digest_if_avail(genome, x) for x in archive_data[CFG_ASSET_PARENTS_KEY] if CFG_ASSET_PARENTS_KEY in archive_data] - bundle_name = '{}/{}:{}'.format(*gat) - archsize = archive_data[CFG_ARCHIVE_SIZE_KEY] - _LOGGER.debug("'{}' archive size: {}".format(bundle_name, archsize)) - if _is_large_archive(archsize) and not query_yes_no("Are you sure you want to download this large archive?"): - _LOGGER.info("pull action aborted by user") - return gat, None + bundle_name = '{}/{}:{}'.format(*gat) + archsize = archive_data[CFG_ARCHIVE_SIZE_KEY] + _LOGGER.debug("'{}' archive size: {}".format(bundle_name, archsize)) + if _is_large_archive(archsize) and not query_yes_no("Are you sure you want to download this large archive?"): + _LOGGER.info("pull action aborted by user") + return gat, None - if not os.path.exists(genome_dir_path): - _LOGGER.debug("Creating directory: {}".format(genome_dir_path)) - os.makedirs(genome_dir_path) + if not os.path.exists(genome_dir_path): + _LOGGER.debug("Creating directory: {}".format(genome_dir_path)) + os.makedirs(genome_dir_path) - # Download the file from `url` and save it locally under `filepath`: - _LOGGER.info("Downloading URL: {}".format(url_archive)) - try: - signal.signal(signal.SIGINT, build_signal_handler(filepath)) - _download_url_progress(url_archive, filepath, bundle_name, params={"tag": tag}) - except HTTPError as e: - _LOGGER.error("File not found on server: {}".format(e)) - return gat, None - except ConnectionRefusedError as e: - _LOGGER.error(str(e)) - _LOGGER.error("Server {}/{} refused download. Check your internet settings".format(self.genome_server, - API_VERSION)) - return gat, None - except ContentTooShortError as e: - _LOGGER.error(str(e)) - _LOGGER.error("'{}' download incomplete".format(bundle_name)) - return gat, None - else: - _LOGGER.info("Download complete: {}".format(filepath)) + # Download the file from `url` and save it locally under `filepath`: + _LOGGER.info("Downloading URL: {}".format(url_archive)) + try: + signal.signal(signal.SIGINT, build_signal_handler(filepath)) + _download_url_progress(url_archive, filepath, bundle_name, params={"tag": tag}) + except HTTPError as e: + _LOGGER.error("File not found on server: {}".format(e)) # JPS Add messaging here? and reboot loop + return gat, None + except ConnectionRefusedError as e: + _LOGGER.error(str(e)) + _LOGGER.error("Server {}/{} refused download. Check your internet settings".format(server_url, + API_VERSION)) + return gat, None + except ContentTooShortError as e: + _LOGGER.error(str(e)) + _LOGGER.error("'{}' download incomplete".format(bundle_name)) + return gat, None + else: + _LOGGER.info("Download complete: {}".format(filepath)) - new_checksum = checksum(filepath) - old_checksum = archive_data and archive_data.get(CFG_ARCHIVE_CHECKSUM_KEY) - if old_checksum and new_checksum != old_checksum: - _LOGGER.error("Checksum mismatch: ({}, {})".format(new_checksum, old_checksum)) - return gat, None - else: - _LOGGER.debug("Matched checksum: '{}'".format(old_checksum)) - import tempfile - # successfully downloaded and moved tarball; untar it - if unpack and filepath.endswith(".tgz"): - _LOGGER.info("Extracting asset tarball and saving to: {}".format(tag_dir)) - tmpdir = tempfile.mkdtemp(dir=genome_dir_path) # TODO: use context manager here when we drop support for py2 - untar(filepath, tmpdir) - # here we suspect the unarchived asset to be an asset-named directory with the asset data inside - # and we transfer it to the tag-named subdirectory - shutil.move(os.path.join(tmpdir, asset), tag_dir) - shutil.rmtree(tmpdir) - if os.path.isfile(filepath): - os.remove(filepath) - return gat, archive_data + new_checksum = checksum(filepath) + old_checksum = archive_data and archive_data.get(CFG_ARCHIVE_CHECKSUM_KEY) + if old_checksum and new_checksum != old_checksum: + _LOGGER.error("Checksum mismatch: ({}, {})".format(new_checksum, old_checksum)) + return gat, None + else: + _LOGGER.debug("Matched checksum: '{}'".format(old_checksum)) + import tempfile + # successfully downloaded and moved tarball; untar it + if unpack and filepath.endswith(".tgz"): + _LOGGER.info("Extracting asset tarball and saving to: {}".format(tag_dir)) + tmpdir = tempfile.mkdtemp(dir=genome_dir_path) # TODO: use context manager here when we drop support for py2 + untar(filepath, tmpdir) + # here we suspect the unarchived asset to be an asset-named directory with the asset data inside + # and we transfer it to the tag-named subdirectory + shutil.move(os.path.join(tmpdir, asset), tag_dir) + shutil.rmtree(tmpdir) + if os.path.isfile(filepath): + os.remove(filepath) + return gat, archive_data def update_relatives_assets(self, genome, asset, tag=None, data=None, children=False): """ @@ -808,27 +841,50 @@ def chk_digest_update_child(self, genome, remote_asset_name, child_name): remote_asset_data = prp(remote_asset_name) asset = remote_asset_data["item"] tag = remote_asset_data["tag"] - asset_digest_url = construct_request_url(self.genome_server, API_ID_DIGEST).\ - format(genome=genome, asset=asset, tag=tag) - try: - remote_digest = _download_json(asset_digest_url) - except DownloadJsonError: - return - try: - # we need to allow for missing seek_keys section so that the digest is respected even from the previously - # populated 'incomplete asset' from the server - _assert_gat_exists(self[CFG_GENOMES_KEY], genome, asset, tag, - allow_incomplete=not self.is_asset_complete(genome, asset, tag)) - except (KeyError, MissingAssetError, MissingGenomeError, MissingSeekKeyError): - self.update_tags(genome, asset, tag, {CFG_ASSET_CHECKSUM_KEY: remote_digest}) - _LOGGER.info("Could not find '{}/{}:{}' digest. Populating with server data".format(genome, asset, tag)) - else: - local_digest = self[CFG_GENOMES_KEY][genome][CFG_ASSETS_KEY][asset][CFG_ASSET_TAGS_KEY]\ - [tag][CFG_ASSET_CHECKSUM_KEY] - if remote_digest != local_digest: - raise RemoteDigestMismatchError(asset, local_digest, remote_digest) - finally: - self.update_relatives_assets(genome, asset, tag, [child_name], children=True) + #_LOGGER.info("self.genome_server in chk_digest_update_child: {}".format(self.genome_server)) # JPS DEBUG + # Expect list for following logic JPS DEBUG + #if not isinstance(self.genome_server, list): + # self.genome_server = [self.genome_server] + + num_servers = 0 + bad_servers = [] + for server_url in self.genome_server: + num_servers += 1 + try: + asset_digest_url = construct_request_url(server_url, API_ID_DIGEST).\ + format(genome=genome, asset=asset, tag=tag) + #_LOGGER.info("self.genome_server before resetting it: {}".format(self.genome_server)) # JPS DEBUG + #self.genome_server = server_url + #_LOGGER.info("self.genome_server after resetting it: {}".format(self.genome_server)) # JPS DEBUG + # JPS TODO: Messaging for successful update? + except DownloadJsonError: + bad_servers.append(server_url) + #_LOGGER.warning("Could not update child digest from {}".format(server_url)) # JPS DEBUG + if num_servers == len(self.genome_server): + _LOGGER.error("Could not download json file from the following server(s), {}".format(bad_servers)) # JPS DEBUG + return + else: + continue + + try: + remote_digest = _download_json(asset_digest_url) + except DownloadJsonError: + return + try: + # we need to allow for missing seek_keys section so that the digest is respected even from the previously + # populated 'incomplete asset' from the server + _assert_gat_exists(self[CFG_GENOMES_KEY], genome, asset, tag, + allow_incomplete=not self.is_asset_complete(genome, asset, tag)) + except (KeyError, MissingAssetError, MissingGenomeError, MissingSeekKeyError): + self.update_tags(genome, asset, tag, {CFG_ASSET_CHECKSUM_KEY: remote_digest}) + _LOGGER.info("Could not find '{}/{}:{}' digest. Populating with server data".format(genome, asset, tag)) + else: + local_digest = self[CFG_GENOMES_KEY][genome][CFG_ASSETS_KEY][asset][CFG_ASSET_TAGS_KEY]\ + [tag][CFG_ASSET_CHECKSUM_KEY] + if remote_digest != local_digest: + raise RemoteDigestMismatchError(asset, local_digest, remote_digest) + finally: + self.update_relatives_assets(genome, asset, tag, [child_name], children=True) class DownloadProgressBar(tqdm): @@ -858,9 +914,19 @@ def _download_json(url, params=None): """ import requests _LOGGER.debug("Downloading JSON data; querying URL: '{}'".format(url)) + #_LOGGER.info("Downloading JSON data; querying URL: '{}'".format(url)) # JPS DEBUG resp = requests.get(url, params=params) + #_LOGGER.info("resp.content: '{}'".format(resp.content)) # JPS DEBUG + #_LOGGER.info("resp.text: '{}'".format(resp.text)) # JPS DEBUG + #_LOGGER.info("resp.status_code: '{}'".format(resp.status_code)) # JPS DEBUG + #resp_default = requests.get("http://staging.refgenomes.databio.org", params=params) # DEBUG + #if resp_default.ok: + # _LOGGER.info("resp_default: '{}'".format(resp_default)) # JPS DEBUG + # _LOGGER.info("resp_default.json(): '{}'".format(resp_default.json())) # JPS DEBUG if resp.ok: return resp.json() + elif resp.status_code == 404: + resp = None raise DownloadJsonError(resp) @@ -1101,6 +1167,9 @@ def construct_request_url(server_url, operation_id): :param str operation_id: the operationId of the endpoint :return str: a complete URL for the request """ + #mapping_value = _get_server_endpoints_mapping(server_url)[operation_id] # JPS DEBUG + #_LOGGER.info("[operation_id] : {}".format(operation_id)) # JPS DEBUG + #_LOGGER.info("_get_server_endpoints_mapping(server_url)[operation_id] : {}".format(mapping_value)) # JPS DEBUG try: return server_url + _get_server_endpoints_mapping(server_url)[operation_id] except KeyError as e: From 6577e8adc9217215549a03a114b7759a81cdcec1 Mon Sep 17 00:00:00 2001 From: Jason Smith Date: Tue, 22 Oct 2019 16:15:18 -0400 Subject: [PATCH 02/16] update pull_assets to handle checking multiple servers for an asset --- refgenconf/refgenconf.py | 57 +++++++++++++++++++++++++++++++++------- 1 file changed, 47 insertions(+), 10 deletions(-) diff --git a/refgenconf/refgenconf.py b/refgenconf/refgenconf.py index edb36238..a01ae20d 100755 --- a/refgenconf/refgenconf.py +++ b/refgenconf/refgenconf.py @@ -103,7 +103,7 @@ def __init__(self, filepath=None, entries=None, writable=False, wait_max=10): tmp_list = [server_url.rstrip("/") for server_url in self[CFG_SERVER_KEY]] self[CFG_SERVER_KEY] = tmp_list #_LOGGER.info("JPS rstrip self[CFG_SERVER_KEY]: {}".format(self[CFG_SERVER_KEY])) # JPS DEBUG - else: + else: # Logic in pull_asset expects a list, even for a single server self[CFG_SERVER_KEY] = self[CFG_SERVER_KEY].rstrip("/") self[CFG_SERVER_KEY] = [self[CFG_SERVER_KEY]] except KeyError: @@ -479,7 +479,11 @@ def raise_unpack_error(): # self.genome_server = [self.genome_server] # _LOGGER.info("Should no longer arrive here?") # JPS DEBUG + # JPS Do I pull this logic out into separate function that checks for good json? num_servers = 0 + bad_servers = [] + no_asset_json = [] + server_list = self.genome_server for server_url in self.genome_server: num_servers += 1 #_LOGGER.info("self.genome_server: {}".format(self.genome_server)) # JPS DEBUG @@ -488,23 +492,52 @@ def raise_unpack_error(): tag = _download_json(get_json_url(server_url, API_ID_DEFAULT_TAG).format(genome=genome, asset=asset)) \ if tag is None else tag _LOGGER.debug("Determined tag: '{}'".format(tag)) - #_LOGGER.info("Determined tag: '{}'".format(tag)) # JPS DEBUG unpack or raise_unpack_error() #_LOGGER.info("self.genome_server before resetting it: {}".format(self.genome_server)) # JPS DEBUG self.genome_server = server_url #_LOGGER.info("self.genome_server after resetting it: {}".format(self.genome_server)) # JPS DEBUG except DownloadJsonError: _LOGGER.warning("Could not retrieve json from {}".format(server_url)) # JPS DEBUG - if num_servers == len(self.genome_server): - _LOGGER.error("No servers [{}] responded to request for json".format(self.genome_server)) # JPS DEBUG - return [genome, asset, tag], None - else: - continue + # if num_servers == len(self.genome_server): + # _LOGGER.error("Could not retrieve json file from the following server(s): {}".format(self.genome_server)) # JPS DEBUG + # return [genome, asset, tag], None + # else: + # continue + bad_servers.append(server_url) + #_LOGGER.warning("Could not list assets from {}".format(server_url)) # JPS DEBUG + continue + #_LOGGER.info("server_url: {}".format(server_url)) # JPS DEBUG + #_LOGGER.info("len(server_list): {}".format(len(server_list))) # JPS DEBUG + #_LOGGER.info("server_list: {}".format(server_list)) # JPS DEBUG + #_LOGGER.info("num_servers: {}".format(num_servers)) # JPS DEBUG + #if num_servers == len(server_list): + # _LOGGER.error("Could not retrieve json file from the following server(s): {}".format(bad_servers)) # JPS DEBUG + #return [genome, asset, tag], None + # if len(bad_servers) == len(self.genome_server): + # return [genome, asset, tag], None url_attrs = get_json_url(self.genome_server, API_ID_ASSET_ATTRS).format(genome=genome, asset=asset) url_archive = get_json_url(self.genome_server, API_ID_ARCHIVE).format(genome=genome, asset=asset) - archive_data = _download_json(url_attrs, params={"tag": tag}) + #_LOGGER.info("url_attrs: '{}'".format(url_attrs)) # JPS DEBUG + try: + archive_data = _download_json(url_attrs, params={"tag": tag}) + except DownloadJsonError: + no_asset_json.append(server_url) + #_LOGGER.warning("Could not retrieve {} {} from the following server: {}".format(genome, asset, server_url)) + #_LOGGER.info("len(server_list): {}".format(len(server_list))) # JPS DEBUG + #_LOGGER.info("server_list: {}".format(server_list)) # JPS DEBUG + #_LOGGER.info("num_servers: {}".format(num_servers)) # JPS DEBUG + #if num_servers < len(server_list): + # _LOGGER.warning("Checking next refgenieserver...") + #else: + if num_servers == len(server_list): + _LOGGER.error("Could not retrieve {} {}'s json file from the following server(s): {}".format(genome, asset, no_asset_json)) # JPS DEBUG + return [genome, asset, tag], None + continue + + #_LOGGER.info("archive_data: {}".format(archive_data)) # JPS DEBUG + #if archive_data is not None: if sys.version_info[0] == 2: archive_data = asciify_json_dict(archive_data) gat = [genome, asset, tag] @@ -917,15 +950,19 @@ def _download_json(url, params=None): resp = requests.get(url, params=params) #_LOGGER.info("resp.content: '{}'".format(resp.content)) # JPS DEBUG #_LOGGER.info("resp.text: '{}'".format(resp.text)) # JPS DEBUG - #_LOGGER.info("resp.status_code: '{}'".format(resp.status_code)) # JPS DEBUG + #_LOGGER.info("resp.status_code, '{}', for {}".format(resp.status_code, url)) # JPS DEBUG + #_LOGGER.info("resp.url: '{}'".format(url)) # JPS DEBUG #resp_default = requests.get("http://staging.refgenomes.databio.org", params=params) # DEBUG #if resp_default.ok: # _LOGGER.info("resp_default: '{}'".format(resp_default)) # JPS DEBUG - # _LOGGER.info("resp_default.json(): '{}'".format(resp_default.json())) # JPS DEBUG + #_LOGGER.info("resp.json(): '{}'".format(resp.json())) # JPS DEBUG if resp.ok: return resp.json() elif resp.status_code == 404: + #return None resp = None + # So right now, when this happens, it raises the error and that's that for the program. + # Instead, we want it to look for the asset on the next server... JPS raise DownloadJsonError(resp) From d8f2562d1be2b5e34f210c4c81e9cbf8ef19f21d Mon Sep 17 00:00:00 2001 From: Jason Smith Date: Wed, 23 Oct 2019 07:55:30 -0400 Subject: [PATCH 03/16] clean up notations --- refgenconf/refgenconf.py | 72 +++------------------------------------- 1 file changed, 4 insertions(+), 68 deletions(-) diff --git a/refgenconf/refgenconf.py b/refgenconf/refgenconf.py index a01ae20d..0d197105 100755 --- a/refgenconf/refgenconf.py +++ b/refgenconf/refgenconf.py @@ -97,12 +97,9 @@ def __init__(self, filepath=None, entries=None, writable=False, wait_max=10): else: _LOGGER.debug("Config version is compliant: {}".format(version)) try: - #_LOGGER.info("JPS self[CFG_SERVER_KEY]: {}".format(self[CFG_SERVER_KEY])) # JPS DEBUG if isinstance(self[CFG_SERVER_KEY], list): - #_LOGGER.info("JPS is list()") # JPS DEBUG tmp_list = [server_url.rstrip("/") for server_url in self[CFG_SERVER_KEY]] self[CFG_SERVER_KEY] = tmp_list - #_LOGGER.info("JPS rstrip self[CFG_SERVER_KEY]: {}".format(self[CFG_SERVER_KEY])) # JPS DEBUG else: # Logic in pull_asset expects a list, even for a single server self[CFG_SERVER_KEY] = self[CFG_SERVER_KEY].rstrip("/") self[CFG_SERVER_KEY] = [self[CFG_SERVER_KEY]] @@ -474,76 +471,39 @@ def pull_asset(self, genome, asset, tag, unpack=True, force=None, def raise_unpack_error(): raise NotImplementedError("Option to not extract tarballs is not yet supported.") - # Expect list for following logic JPS DEBUG - #if not isinstance(self.genome_server, list): - # self.genome_server = [self.genome_server] - # _LOGGER.info("Should no longer arrive here?") # JPS DEBUG - - # JPS Do I pull this logic out into separate function that checks for good json? num_servers = 0 bad_servers = [] no_asset_json = [] server_list = self.genome_server for server_url in self.genome_server: num_servers += 1 - #_LOGGER.info("self.genome_server: {}".format(self.genome_server)) # JPS DEBUG - #_LOGGER.info("server_url: {}".format(server_url)) # JPS DEBUG try: tag = _download_json(get_json_url(server_url, API_ID_DEFAULT_TAG).format(genome=genome, asset=asset)) \ if tag is None else tag _LOGGER.debug("Determined tag: '{}'".format(tag)) unpack or raise_unpack_error() - #_LOGGER.info("self.genome_server before resetting it: {}".format(self.genome_server)) # JPS DEBUG self.genome_server = server_url - #_LOGGER.info("self.genome_server after resetting it: {}".format(self.genome_server)) # JPS DEBUG except DownloadJsonError: - _LOGGER.warning("Could not retrieve json from {}".format(server_url)) # JPS DEBUG - # if num_servers == len(self.genome_server): - # _LOGGER.error("Could not retrieve json file from the following server(s): {}".format(self.genome_server)) # JPS DEBUG - # return [genome, asset, tag], None - # else: - # continue + _LOGGER.warning("Could not retrieve json from {}".format(server_url)) bad_servers.append(server_url) - #_LOGGER.warning("Could not list assets from {}".format(server_url)) # JPS DEBUG continue - #_LOGGER.info("server_url: {}".format(server_url)) # JPS DEBUG - #_LOGGER.info("len(server_list): {}".format(len(server_list))) # JPS DEBUG - #_LOGGER.info("server_list: {}".format(server_list)) # JPS DEBUG - #_LOGGER.info("num_servers: {}".format(num_servers)) # JPS DEBUG - #if num_servers == len(server_list): - # _LOGGER.error("Could not retrieve json file from the following server(s): {}".format(bad_servers)) # JPS DEBUG - #return [genome, asset, tag], None - # if len(bad_servers) == len(self.genome_server): - # return [genome, asset, tag], None url_attrs = get_json_url(self.genome_server, API_ID_ASSET_ATTRS).format(genome=genome, asset=asset) url_archive = get_json_url(self.genome_server, API_ID_ARCHIVE).format(genome=genome, asset=asset) - #_LOGGER.info("url_attrs: '{}'".format(url_attrs)) # JPS DEBUG try: archive_data = _download_json(url_attrs, params={"tag": tag}) except DownloadJsonError: no_asset_json.append(server_url) - #_LOGGER.warning("Could not retrieve {} {} from the following server: {}".format(genome, asset, server_url)) - #_LOGGER.info("len(server_list): {}".format(len(server_list))) # JPS DEBUG - #_LOGGER.info("server_list: {}".format(server_list)) # JPS DEBUG - #_LOGGER.info("num_servers: {}".format(num_servers)) # JPS DEBUG - #if num_servers < len(server_list): - # _LOGGER.warning("Checking next refgenieserver...") - #else: if num_servers == len(server_list): - _LOGGER.error("Could not retrieve {} {}'s json file from the following server(s): {}".format(genome, asset, no_asset_json)) # JPS DEBUG + _LOGGER.error("Could not retrieve {} {}'s json file from the following server(s): {}".format(genome, asset, no_asset_json)) return [genome, asset, tag], None continue - #_LOGGER.info("archive_data: {}".format(archive_data)) # JPS DEBUG - #if archive_data is not None: if sys.version_info[0] == 2: archive_data = asciify_json_dict(archive_data) gat = [genome, asset, tag] # local directory that the asset data will be stored in - #_LOGGER.info("gat: '{}'".format(gat)) # JPS DEBUG - #_LOGGER.info("tag_dir: '{}'".format(os.path.dirname(self.filepath(*gat)))) # JPS DEBUG tag_dir = os.path.dirname(self.filepath(*gat)) # local directory the downloaded archive will be temporarily saved in genome_dir_path = os.path.join(self[CFG_FOLDER_KEY], genome) @@ -588,7 +548,7 @@ def msg_overwrite(): signal.signal(signal.SIGINT, build_signal_handler(filepath)) _download_url_progress(url_archive, filepath, bundle_name, params={"tag": tag}) except HTTPError as e: - _LOGGER.error("File not found on server: {}".format(e)) # JPS Add messaging here? and reboot loop + _LOGGER.error("File not found on server: {}".format(e)) return gat, None except ConnectionRefusedError as e: _LOGGER.error(str(e)) @@ -873,10 +833,6 @@ def chk_digest_update_child(self, genome, remote_asset_name, child_name): remote_asset_data = prp(remote_asset_name) asset = remote_asset_data["item"] tag = remote_asset_data["tag"] - #_LOGGER.info("self.genome_server in chk_digest_update_child: {}".format(self.genome_server)) # JPS DEBUG - # Expect list for following logic JPS DEBUG - #if not isinstance(self.genome_server, list): - # self.genome_server = [self.genome_server] num_servers = 0 bad_servers = [] @@ -885,15 +841,10 @@ def chk_digest_update_child(self, genome, remote_asset_name, child_name): try: asset_digest_url = construct_request_url(server_url, API_ID_DIGEST).\ format(genome=genome, asset=asset, tag=tag) - #_LOGGER.info("self.genome_server before resetting it: {}".format(self.genome_server)) # JPS DEBUG - #self.genome_server = server_url - #_LOGGER.info("self.genome_server after resetting it: {}".format(self.genome_server)) # JPS DEBUG - # JPS TODO: Messaging for successful update? except DownloadJsonError: bad_servers.append(server_url) - #_LOGGER.warning("Could not update child digest from {}".format(server_url)) # JPS DEBUG if num_servers == len(self.genome_server): - _LOGGER.error("Could not download json file from the following server(s), {}".format(bad_servers)) # JPS DEBUG + _LOGGER.error("Could not download json file from the following server(s), {}".format(bad_servers)) return else: continue @@ -946,23 +897,11 @@ def _download_json(url, params=None): """ import requests _LOGGER.debug("Downloading JSON data; querying URL: '{}'".format(url)) - #_LOGGER.info("Downloading JSON data; querying URL: '{}'".format(url)) # JPS DEBUG resp = requests.get(url, params=params) - #_LOGGER.info("resp.content: '{}'".format(resp.content)) # JPS DEBUG - #_LOGGER.info("resp.text: '{}'".format(resp.text)) # JPS DEBUG - #_LOGGER.info("resp.status_code, '{}', for {}".format(resp.status_code, url)) # JPS DEBUG - #_LOGGER.info("resp.url: '{}'".format(url)) # JPS DEBUG - #resp_default = requests.get("http://staging.refgenomes.databio.org", params=params) # DEBUG - #if resp_default.ok: - # _LOGGER.info("resp_default: '{}'".format(resp_default)) # JPS DEBUG - #_LOGGER.info("resp.json(): '{}'".format(resp.json())) # JPS DEBUG if resp.ok: return resp.json() elif resp.status_code == 404: - #return None resp = None - # So right now, when this happens, it raises the error and that's that for the program. - # Instead, we want it to look for the asset on the next server... JPS raise DownloadJsonError(resp) @@ -1203,9 +1142,6 @@ def construct_request_url(server_url, operation_id): :param str operation_id: the operationId of the endpoint :return str: a complete URL for the request """ - #mapping_value = _get_server_endpoints_mapping(server_url)[operation_id] # JPS DEBUG - #_LOGGER.info("[operation_id] : {}".format(operation_id)) # JPS DEBUG - #_LOGGER.info("_get_server_endpoints_mapping(server_url)[operation_id] : {}".format(mapping_value)) # JPS DEBUG try: return server_url + _get_server_endpoints_mapping(server_url)[operation_id] except KeyError as e: From 704995f15fce0bac1012692c779a47aba9053bd3 Mon Sep 17 00:00:00 2001 From: Jason Smith Date: Wed, 23 Oct 2019 07:59:02 -0400 Subject: [PATCH 04/16] update version and changes to allow for multiple refgenieservers --- docs/changelog.md | 5 +++++ refgenconf/_version.py | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/docs/changelog.md b/docs/changelog.md index efe355a0..5ebfa1e1 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -2,6 +2,11 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format. +## [0.5.3] - 2019-10-23 + +### Changed +- enable multiple refgenieservers for pull_asset and digest_update_child + ## [0.5.2] - 2019-10-22 ### Changed diff --git a/refgenconf/_version.py b/refgenconf/_version.py index 72251527..43a1e95b 100644 --- a/refgenconf/_version.py +++ b/refgenconf/_version.py @@ -1 +1 @@ -__version__ = "0.5.2" +__version__ = "0.5.3" From b9294efb1d65f02fcd4d6b263800251c6e5b7baa Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Wed, 23 Oct 2019 08:41:33 -0400 Subject: [PATCH 05/16] more strict remote assets by genome listing; see https://github.com/databio/refgenie/issues/135 --- refgenconf/refgenconf.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/refgenconf/refgenconf.py b/refgenconf/refgenconf.py index 0d197105..3db62452 100755 --- a/refgenconf/refgenconf.py +++ b/refgenconf/refgenconf.py @@ -328,7 +328,9 @@ def list_local(self, genome=None, order=None): :return str, str: text reps of locally available genomes and assets """ if genome is not None: - _assert_gat_exists(self[CFG_GENOMES_KEY], gname=genome) + if isinstance(genome, str): + genome = [genome] + [_assert_gat_exists(self[CFG_GENOMES_KEY], gname=g) for g in genome] genomes_str = self.genomes_str(order=order) if genome is None \ else ", ".join(_select_genomes(sorted(self[CFG_GENOMES_KEY].keys(), key=order), genome)) return genomes_str, self.assets_str(genome=genome, order=order) @@ -984,7 +986,7 @@ def _assert_gat_exists(genomes, gname, aname=None, tname=None, allow_incomplete= try: genome = genomes[gname] except KeyError: - raise MissingGenomeError("Your genomes do not include {}".format(gname)) + raise MissingGenomeError("Your genomes do not include '{}'".format(gname)) if aname is not None: try: asset_data = genome[CFG_ASSETS_KEY][aname] @@ -1025,7 +1027,9 @@ def _list_remote(url, genome, order=None): :return str, str: text reps of remotely available genomes and assets """ genomes_data = _read_remote_data(url) - refgens = _select_genomes(sorted(genomes_data.keys(), key=order), genome) + refgens = _select_genomes(sorted(genomes_data.keys(), key=order), genome, strict=True) + if not refgens: + sys.exit(0) filtered_genomes_data = {refgen: genomes_data[refgen] for refgen in refgens} asset_texts = ["{}/ {}".format(g.rjust(20), ", ".join(a)) for g, a in filtered_genomes_data.items()] return ", ".join(refgens), "\n".join(asset_texts) @@ -1095,11 +1099,13 @@ def _check_insert_data(obj, datatype, name): return True -def _select_genomes(genomes, genome=None): +def _select_genomes(genomes, genome=None, strict=False): """ Safely select a subset of genomes :param list[str] | str genome: genomes that the assets should be found for + :param bool strict: whether a non-existent genome should lead to a warning. + Specific genome request is disregarded otherwise :raise TypeError: if genome argument type is not a list or str :return list: selected subset of genomes """ @@ -1108,6 +1114,11 @@ def _select_genomes(genomes, genome=None): genome = [genome] elif not isinstance(genome, list) or not all(isinstance(i, str) for i in genome): raise TypeError("genome has to be a list[str] or a str, got '{}'".format(genome.__class__.__name__)) + if strict: + for g in genome: + if g not in genomes: + _LOGGER.warning("Genomes do not include '{}'".format(g)) + return return genomes if (genome is None or not all(x in genomes for x in genome)) else genome From ed37920af8c49a24bee6970fb5735211aa9d5848 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Wed, 23 Oct 2019 08:56:11 -0400 Subject: [PATCH 06/16] record all missing requsted genomes --- refgenconf/refgenconf.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/refgenconf/refgenconf.py b/refgenconf/refgenconf.py index 3db62452..930e3a14 100755 --- a/refgenconf/refgenconf.py +++ b/refgenconf/refgenconf.py @@ -351,7 +351,6 @@ def list_remote(self, genome=None, order=None, get_url=lambda server, id: constr genomes, assets = _list_remote(url, genome, order) return genomes, assets - def tag_asset(self, genome, asset, tag, new_tag): """ Retags the asset selected by the tag with the new_tag. @@ -1029,7 +1028,7 @@ def _list_remote(url, genome, order=None): genomes_data = _read_remote_data(url) refgens = _select_genomes(sorted(genomes_data.keys(), key=order), genome, strict=True) if not refgens: - sys.exit(0) + return None, None filtered_genomes_data = {refgen: genomes_data[refgen] for refgen in refgens} asset_texts = ["{}/ {}".format(g.rjust(20), ", ".join(a)) for g, a in filtered_genomes_data.items()] return ", ".join(refgens), "\n".join(asset_texts) @@ -1115,10 +1114,13 @@ def _select_genomes(genomes, genome=None, strict=False): elif not isinstance(genome, list) or not all(isinstance(i, str) for i in genome): raise TypeError("genome has to be a list[str] or a str, got '{}'".format(genome.__class__.__name__)) if strict: + missing = [] for g in genome: if g not in genomes: - _LOGGER.warning("Genomes do not include '{}'".format(g)) - return + missing.append(g) + if missing: + _LOGGER.warning("Genomes do not include: {}".format(", ".join(missing))) + return return genomes if (genome is None or not all(x in genomes for x in genome)) else genome From edee83787c0826e76c80e3e38b149582f608a22e Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Wed, 23 Oct 2019 09:35:36 -0400 Subject: [PATCH 07/16] do not stop selecting after missing genome --- refgenconf/refgenconf.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/refgenconf/refgenconf.py b/refgenconf/refgenconf.py index 930e3a14..3893ab4a 100755 --- a/refgenconf/refgenconf.py +++ b/refgenconf/refgenconf.py @@ -1115,12 +1115,15 @@ def _select_genomes(genomes, genome=None, strict=False): raise TypeError("genome has to be a list[str] or a str, got '{}'".format(genome.__class__.__name__)) if strict: missing = [] + filtered = [] for g in genome: - if g not in genomes: + if g in genomes: + filtered.append(g) + else: missing.append(g) if missing: _LOGGER.warning("Genomes do not include: {}".format(", ".join(missing))) - return + return None if not filtered else filtered return genomes if (genome is None or not all(x in genomes for x in genome)) else genome From 5c8487f33010f647981522148f0beda153a8b391 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Wed, 23 Oct 2019 09:45:52 -0400 Subject: [PATCH 08/16] return entire collection if no particular genome was specified --- refgenconf/refgenconf.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/refgenconf/refgenconf.py b/refgenconf/refgenconf.py index 3893ab4a..a3bbd2d2 100755 --- a/refgenconf/refgenconf.py +++ b/refgenconf/refgenconf.py @@ -1113,6 +1113,8 @@ def _select_genomes(genomes, genome=None, strict=False): genome = [genome] elif not isinstance(genome, list) or not all(isinstance(i, str) for i in genome): raise TypeError("genome has to be a list[str] or a str, got '{}'".format(genome.__class__.__name__)) + else: + return genomes if strict: missing = [] filtered = [] @@ -1124,7 +1126,7 @@ def _select_genomes(genomes, genome=None, strict=False): if missing: _LOGGER.warning("Genomes do not include: {}".format(", ".join(missing))) return None if not filtered else filtered - return genomes if (genome is None or not all(x in genomes for x in genome)) else genome + return genomes if not all(x in genomes for x in genome) else genome def get_asset_tags(asset): From 99e461c7efeb88ec9e60ca3532f108dc5d1a9e59 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Fri, 25 Oct 2019 14:47:20 -0400 Subject: [PATCH 09/16] muliple changes: - do not change RGC attributes in pull method; #69 - return server URL used in pull; #70 --- refgenconf/refgenconf.py | 106 +++++++++++++++++--------------------- tests/test_1pull_asset.py | 2 +- 2 files changed, 49 insertions(+), 59 deletions(-) diff --git a/refgenconf/refgenconf.py b/refgenconf/refgenconf.py index a3bbd2d2..33cc76e7 100755 --- a/refgenconf/refgenconf.py +++ b/refgenconf/refgenconf.py @@ -475,35 +475,36 @@ def raise_unpack_error(): num_servers = 0 bad_servers = [] no_asset_json = [] - server_list = self.genome_server for server_url in self.genome_server: num_servers += 1 try: tag = _download_json(get_json_url(server_url, API_ID_DEFAULT_TAG).format(genome=genome, asset=asset)) \ if tag is None else tag - _LOGGER.debug("Determined tag: '{}'".format(tag)) - unpack or raise_unpack_error() - self.genome_server = server_url except DownloadJsonError: - _LOGGER.warning("Could not retrieve json from {}".format(server_url)) + _LOGGER.warning("Could not retrieve JSON from: {}".format(server_url)) bad_servers.append(server_url) continue - - url_attrs = get_json_url(self.genome_server, API_ID_ASSET_ATTRS).format(genome=genome, asset=asset) - url_archive = get_json_url(self.genome_server, API_ID_ARCHIVE).format(genome=genome, asset=asset) + else: + _LOGGER.debug("Determined tag: {}".format(tag)) + unpack or raise_unpack_error() + gat = [genome, asset, tag] + url_attrs = get_json_url(server_url, API_ID_ASSET_ATTRS).format(genome=genome, asset=asset) + url_archive = get_json_url(server_url, API_ID_ARCHIVE).format(genome=genome, asset=asset) try: archive_data = _download_json(url_attrs, params={"tag": tag}) + _LOGGER.debug("Determined server URL: {}".format(server_url)) except DownloadJsonError: no_asset_json.append(server_url) - if num_servers == len(server_list): - _LOGGER.error("Could not retrieve {} {}'s json file from the following server(s): {}".format(genome, asset, no_asset_json)) - return [genome, asset, tag], None + if num_servers == len(self.genome_server): + _LOGGER.error("Asset '{}/{}:{}' not available on any of the following servers: {}". + format(*gat, ", ".join(no_asset_json))) + return gat, None, None continue if sys.version_info[0] == 2: archive_data = asciify_json_dict(archive_data) - gat = [genome, asset, tag] + # local directory that the asset data will be stored in tag_dir = os.path.dirname(self.filepath(*gat)) # local directory the downloaded archive will be temporarily saved in @@ -514,7 +515,7 @@ def raise_unpack_error(): if os.path.exists(tag_dir): def preserve(): _LOGGER.debug("Preserving existing: {}".format(tag_dir)) - return gat, None + return gat, None, None def msg_overwrite(): _LOGGER.debug("Overwriting: {}".format(tag_dir)) @@ -530,14 +531,15 @@ def msg_overwrite(): msg_overwrite() # check asset digests local-server match for each parent - [self._chk_digest_if_avail(genome, x) for x in archive_data[CFG_ASSET_PARENTS_KEY] if CFG_ASSET_PARENTS_KEY in archive_data] + [self._chk_digest_if_avail(genome, x, server_url) + for x in archive_data[CFG_ASSET_PARENTS_KEY] if CFG_ASSET_PARENTS_KEY in archive_data] bundle_name = '{}/{}:{}'.format(*gat) archsize = archive_data[CFG_ARCHIVE_SIZE_KEY] _LOGGER.debug("'{}' archive size: {}".format(bundle_name, archsize)) if _is_large_archive(archsize) and not query_yes_no("Are you sure you want to download this large archive?"): _LOGGER.info("pull action aborted by user") - return gat, None + return gat, None, None if not os.path.exists(genome_dir_path): _LOGGER.debug("Creating directory: {}".format(genome_dir_path)) @@ -550,16 +552,16 @@ def msg_overwrite(): _download_url_progress(url_archive, filepath, bundle_name, params={"tag": tag}) except HTTPError as e: _LOGGER.error("File not found on server: {}".format(e)) - return gat, None + return gat, None, None except ConnectionRefusedError as e: _LOGGER.error(str(e)) _LOGGER.error("Server {}/{} refused download. Check your internet settings".format(server_url, API_VERSION)) - return gat, None + return gat, None, None except ContentTooShortError as e: _LOGGER.error(str(e)) _LOGGER.error("'{}' download incomplete".format(bundle_name)) - return gat, None + return gat, None, None else: _LOGGER.info("Download complete: {}".format(filepath)) @@ -567,7 +569,7 @@ def msg_overwrite(): old_checksum = archive_data and archive_data.get(CFG_ARCHIVE_CHECKSUM_KEY) if old_checksum and new_checksum != old_checksum: _LOGGER.error("Checksum mismatch: ({}, {})".format(new_checksum, old_checksum)) - return gat, None + return gat, None, None else: _LOGGER.debug("Matched checksum: '{}'".format(old_checksum)) import tempfile @@ -582,7 +584,7 @@ def msg_overwrite(): shutil.rmtree(tmpdir) if os.path.isfile(filepath): os.remove(filepath) - return gat, archive_data + return gat, archive_data, server_url def update_relatives_assets(self, genome, asset, tag=None, data=None, children=False): """ @@ -789,7 +791,7 @@ def _invert_genomes(self, order=None): assets = sorted(genomes.keys(), key=order) return OrderedDict([(a, sorted(genomes[a], key=order)) for a in assets]) - def _chk_digest_if_avail(self, genome, remote_asset_name): + def _chk_digest_if_avail(self, genome, remote_asset_name, server_url): """ Check local asset digest against the remote one and populate children of the asset with the provided asset:tag. @@ -797,12 +799,13 @@ def _chk_digest_if_avail(self, genome, remote_asset_name): :param str genome: name of the genome to check the asset digests for :param str remote_asset_name: asset and tag names, formatted like: asset:tag + :param str server_url: addres of the server to query for the digests :raise RefgenconfError: if the local digest does not match its remote counterpart """ remote_asset_data = prp(remote_asset_name) asset = remote_asset_data["item"] tag = remote_asset_data["tag"] - asset_digest_url = construct_request_url(self.genome_server, API_ID_DIGEST).\ + asset_digest_url = construct_request_url(server_url, API_ID_DIGEST).\ format(genome=genome, asset=asset, tag=tag) try: remote_digest = _download_json(asset_digest_url) @@ -820,7 +823,7 @@ def _chk_digest_if_avail(self, genome, remote_asset_name): "with the server one after the pull".format(genome, asset, tag)) return - def chk_digest_update_child(self, genome, remote_asset_name, child_name): + def chk_digest_update_child(self, genome, remote_asset_name, child_name, server_url): """ Check local asset digest against the remote one and populate children of the asset with the provided asset:tag. @@ -829,46 +832,33 @@ def chk_digest_update_child(self, genome, remote_asset_name, child_name): :param str genome: name of the genome to check the asset digests for :param str remote_asset_name: asset and tag names, formatted like: asset:tag :param str child_name: name to be appended to the children of the parent + :param str server_url: addres of the server to query for the digests :raise RefgenconfError: if the local digest does not match its remote counterpart """ remote_asset_data = prp(remote_asset_name) asset = remote_asset_data["item"] tag = remote_asset_data["tag"] - - num_servers = 0 - bad_servers = [] - for server_url in self.genome_server: - num_servers += 1 - try: - asset_digest_url = construct_request_url(server_url, API_ID_DIGEST).\ - format(genome=genome, asset=asset, tag=tag) - except DownloadJsonError: - bad_servers.append(server_url) - if num_servers == len(self.genome_server): - _LOGGER.error("Could not download json file from the following server(s), {}".format(bad_servers)) - return - else: - continue - - try: - remote_digest = _download_json(asset_digest_url) - except DownloadJsonError: - return - try: - # we need to allow for missing seek_keys section so that the digest is respected even from the previously - # populated 'incomplete asset' from the server - _assert_gat_exists(self[CFG_GENOMES_KEY], genome, asset, tag, - allow_incomplete=not self.is_asset_complete(genome, asset, tag)) - except (KeyError, MissingAssetError, MissingGenomeError, MissingSeekKeyError): - self.update_tags(genome, asset, tag, {CFG_ASSET_CHECKSUM_KEY: remote_digest}) - _LOGGER.info("Could not find '{}/{}:{}' digest. Populating with server data".format(genome, asset, tag)) - else: - local_digest = self[CFG_GENOMES_KEY][genome][CFG_ASSETS_KEY][asset][CFG_ASSET_TAGS_KEY]\ - [tag][CFG_ASSET_CHECKSUM_KEY] - if remote_digest != local_digest: - raise RemoteDigestMismatchError(asset, local_digest, remote_digest) - finally: - self.update_relatives_assets(genome, asset, tag, [child_name], children=True) + asset_digest_url = construct_request_url(server_url, API_ID_DIGEST).\ + format(genome=genome, asset=asset, tag=tag) + try: + remote_digest = _download_json(asset_digest_url) + except DownloadJsonError: + return + try: + # we need to allow for missing seek_keys section so that the digest is respected even from the previously + # populated 'incomplete asset' from the server + _assert_gat_exists(self[CFG_GENOMES_KEY], genome, asset, tag, + allow_incomplete=not self.is_asset_complete(genome, asset, tag)) + except (KeyError, MissingAssetError, MissingGenomeError, MissingSeekKeyError): + self.update_tags(genome, asset, tag, {CFG_ASSET_CHECKSUM_KEY: remote_digest}) + _LOGGER.info("Could not find '{}/{}:{}' digest. Populating with server data".format(genome, asset, tag)) + else: + local_digest = self[CFG_GENOMES_KEY][genome][CFG_ASSETS_KEY][asset][CFG_ASSET_TAGS_KEY] \ + [tag][CFG_ASSET_CHECKSUM_KEY] + if remote_digest != local_digest: + raise RemoteDigestMismatchError(asset, local_digest, remote_digest) + finally: + self.update_relatives_assets(genome, asset, tag, [child_name], children=True) class DownloadProgressBar(tqdm): diff --git a/tests/test_1pull_asset.py b/tests/test_1pull_asset.py index 22e8f92e..35e8f964 100644 --- a/tests/test_1pull_asset.py +++ b/tests/test_1pull_asset.py @@ -40,7 +40,7 @@ def test_negative_response_to_large_download_prompt(rgc, gname, aname, tname): """ Test responsiveness to user abortion of pull request. """ with mock.patch("refgenconf.refgenconf._is_large_archive", return_value=True), \ mock.patch("refgenconf.refgenconf.query_yes_no", return_value=False): - gat, archive_dict = rgc.pull_asset(gname, aname, tname) + gat, archive_dict, server_url = rgc.pull_asset(gname, aname, tname) assert gat == [gname, aname, tname] From 16723136c5d8f1c913a8c1fca2a088f28b376226 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Fri, 25 Oct 2019 14:52:50 -0400 Subject: [PATCH 10/16] py2 compat --- refgenconf/refgenconf.py | 2 +- tests/test_1pull_asset.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/refgenconf/refgenconf.py b/refgenconf/refgenconf.py index 33cc76e7..a99ed6a7 100755 --- a/refgenconf/refgenconf.py +++ b/refgenconf/refgenconf.py @@ -498,7 +498,7 @@ def raise_unpack_error(): no_asset_json.append(server_url) if num_servers == len(self.genome_server): _LOGGER.error("Asset '{}/{}:{}' not available on any of the following servers: {}". - format(*gat, ", ".join(no_asset_json))) + format(genome, asset, tag, ", ".join(no_asset_json))) return gat, None, None continue diff --git a/tests/test_1pull_asset.py b/tests/test_1pull_asset.py index 35e8f964..469c5b72 100644 --- a/tests/test_1pull_asset.py +++ b/tests/test_1pull_asset.py @@ -28,7 +28,7 @@ def test_no_unpack(rgc, genome, asset, tag): rgc.pull_asset(genome, asset, tag, unpack=False) -@pytest.mark.parametrize(["gname", "aname"],[("human_repeats", 1), ("mouse_chrM2x", None)]) +@pytest.mark.parametrize(["gname", "aname"], [("human_repeats", 1), ("mouse_chrM2x", None)]) def test_pull_asset_illegal_asset_name(rgc, gname, aname): """ TypeError occurs if asset argument is not iterable. """ with pytest.raises(TypeError): @@ -50,6 +50,7 @@ def test_download_interruption(my_rgc, gname, aname, tname, caplog): """ Download interruption provides appropriate warning message and halts. """ import signal print("filepath: " + my_rgc._file_path) + def kill_download(*args, **kwargs): os.kill(os.getpid(), signal.SIGINT) From db5da6b324dda494fee0e98dcbb2ff776e53aea5 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Fri, 25 Oct 2019 15:55:59 -0400 Subject: [PATCH 11/16] Deal with HTTPError error; #71 --- refgenconf/refgenconf.py | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/refgenconf/refgenconf.py b/refgenconf/refgenconf.py index a99ed6a7..4209d4a8 100755 --- a/refgenconf/refgenconf.py +++ b/refgenconf/refgenconf.py @@ -478,27 +478,27 @@ def raise_unpack_error(): for server_url in self.genome_server: num_servers += 1 try: - tag = _download_json(get_json_url(server_url, API_ID_DEFAULT_TAG).format(genome=genome, asset=asset)) \ + determined_tag = _download_json(get_json_url(server_url, API_ID_DEFAULT_TAG).format(genome=genome, asset=asset)) \ if tag is None else tag except DownloadJsonError: _LOGGER.warning("Could not retrieve JSON from: {}".format(server_url)) bad_servers.append(server_url) continue else: - _LOGGER.debug("Determined tag: {}".format(tag)) + _LOGGER.debug("Determined tag: {}".format(determined_tag)) unpack or raise_unpack_error() - gat = [genome, asset, tag] + gat = [genome, asset, determined_tag] url_attrs = get_json_url(server_url, API_ID_ASSET_ATTRS).format(genome=genome, asset=asset) url_archive = get_json_url(server_url, API_ID_ARCHIVE).format(genome=genome, asset=asset) try: - archive_data = _download_json(url_attrs, params={"tag": tag}) + archive_data = _download_json(url_attrs, params={"tag": determined_tag}) _LOGGER.debug("Determined server URL: {}".format(server_url)) except DownloadJsonError: no_asset_json.append(server_url) if num_servers == len(self.genome_server): _LOGGER.error("Asset '{}/{}:{}' not available on any of the following servers: {}". - format(genome, asset, tag, ", ".join(no_asset_json))) + format(genome, asset, determined_tag, ", ".join(no_asset_json))) return gat, None, None continue @@ -510,7 +510,7 @@ def raise_unpack_error(): # local directory the downloaded archive will be temporarily saved in genome_dir_path = os.path.join(self[CFG_FOLDER_KEY], genome) # local path to the temporarily saved archive - filepath = os.path.join(genome_dir_path, asset + "__" + tag + ".tgz") + filepath = os.path.join(genome_dir_path, asset + "__" + determined_tag + ".tgz") # check if the genome/asset:tag exists and get request user decision if os.path.exists(tag_dir): def preserve(): @@ -549,10 +549,17 @@ def msg_overwrite(): _LOGGER.info("Downloading URL: {}".format(url_archive)) try: signal.signal(signal.SIGINT, build_signal_handler(filepath)) - _download_url_progress(url_archive, filepath, bundle_name, params={"tag": tag}) - except HTTPError as e: - _LOGGER.error("File not found on server: {}".format(e)) - return gat, None, None + _download_url_progress(url_archive, filepath, bundle_name, params={"tag": determined_tag}) + except HTTPError: + _LOGGER.error("Asset archive '{}/{}:{}' is missing on the server: {s}".format(*gat, s=server_url)) + if server_url == self.genome_server[-1]: + # it this was the last server on the list, return + return gat, None, None + else: + _LOGGER.info("Trying next server") + # set the tag value back to what user requested + determined_tag = tag + continue except ConnectionRefusedError as e: _LOGGER.error(str(e)) _LOGGER.error("Server {}/{} refused download. Check your internet settings".format(server_url, From f40a7dacf7dd5a5bbb82ad06a7094cb67420631d Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Mon, 28 Oct 2019 09:32:06 -0400 Subject: [PATCH 12/16] Update changelog.md --- docs/changelog.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/changelog.md b/docs/changelog.md index 5ebfa1e1..988e7890 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -2,10 +2,11 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format. -## [0.5.3] - 2019-10-23 +## [0.5.3] - unreleased ### Changed -- enable multiple refgenieservers for pull_asset and digest_update_child +- enable multiple refgenieservers for `pull_asset` method +- `_chk_digest_if_avail` and `_chk_digest_update_child` require server URL argument to get the asset information from ## [0.5.2] - 2019-10-22 From fe3d2b7aa2b230e52c86ee211fcc4a813b3de2df Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Mon, 28 Oct 2019 17:04:54 -0400 Subject: [PATCH 13/16] accumulate failures in list_local and raise an aggregated exception for all missging genomes https://github.com/databio/refgenconf/pull/68#discussion_r339621314 --- refgenconf/refgenconf.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/refgenconf/refgenconf.py b/refgenconf/refgenconf.py index 4209d4a8..880b4486 100755 --- a/refgenconf/refgenconf.py +++ b/refgenconf/refgenconf.py @@ -327,10 +327,17 @@ def list_local(self, genome=None, order=None): names for sort :return str, str: text reps of locally available genomes and assets """ + exceptions = [] if genome is not None: if isinstance(genome, str): genome = [genome] - [_assert_gat_exists(self[CFG_GENOMES_KEY], gname=g) for g in genome] + for g in genome: + try: + _assert_gat_exists(self[CFG_GENOMES_KEY], g) + except MissingGenomeError as e: + exceptions.append(e) + if exceptions: + raise MissingGenomeError(", ".join(map(str, exceptions))) genomes_str = self.genomes_str(order=order) if genome is None \ else ", ".join(_select_genomes(sorted(self[CFG_GENOMES_KEY].keys(), key=order), genome)) return genomes_str, self.assets_str(genome=genome, order=order) From 9857a6071518321fd30b0d94ff5410fa8072b6e5 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Tue, 29 Oct 2019 09:07:09 -0400 Subject: [PATCH 14/16] change server config key https://github.com/databio/refgenconf/pull/68#discussion_r340049926 --- refgenconf/const.py | 9 +++++---- refgenconf/refgenconf.py | 27 ++++++++++++++++----------- tests/conftest.py | 2 +- tests/data/genomes.yaml | 2 +- tests/test_config_constructor.py | 10 +++++----- tests/test_list_remote.py | 6 +++--- 6 files changed, 31 insertions(+), 25 deletions(-) diff --git a/refgenconf/const.py b/refgenconf/const.py index ed396571..51fb20f6 100644 --- a/refgenconf/const.py +++ b/refgenconf/const.py @@ -9,7 +9,7 @@ CFG_NAME = "genome configuration" CFG_ENV_VARS = ["REFGENIE"] CFG_CONST = ["CFG_NAME", "CFG_ENV_VARS"] -DEFAULT_SERVER = ["http://refgenomes.databio.org"] +DEFAULT_SERVER = "http://refgenomes.databio.org" API_VERSION = "v2" DEFAULT_TAG = "default" @@ -36,6 +36,7 @@ "API_ID_RECIPE", "API_ID_LOG"] CFG_FOLDER_KEY = "genome_folder" +CFG_SERVERS_KEY = "genome_servers" CFG_SERVER_KEY = "genome_server" CFG_ARCHIVE_KEY = "genome_archive" CFG_VERSION_KEY = "config_version" @@ -59,7 +60,7 @@ CFG_TAG_DESC_KEY = "tag_description" CFG_TOP_LEVEL_KEYS = [ - CFG_FOLDER_KEY, CFG_SERVER_KEY, CFG_ARCHIVE_KEY, CFG_GENOMES_KEY, CFG_VERSION_KEY] + CFG_FOLDER_KEY, CFG_SERVER_KEY, CFG_SERVERS_KEY, CFG_ARCHIVE_KEY, CFG_GENOMES_KEY, CFG_VERSION_KEY] CFG_GENOME_KEYS = [ CFG_GENOME_DESC_KEY, CFG_ASSETS_KEY, CFG_CHECKSUM_KEY] CFG_GENOME_ATTRS_KEYS = [CFG_GENOME_DESC_KEY, CFG_CHECKSUM_KEY] @@ -67,7 +68,7 @@ CFG_ARCHIVE_CHECKSUM_KEY, CFG_SEEK_KEYS_KEY] CFG_KEY_NAMES = [ - "CFG_FOLDER_KEY", "CFG_SERVER_KEY", "CFG_GENOMES_KEY", + "CFG_FOLDER_KEY", "CFG_SERVER_KEY", "CFG_SERVERS_KEY", "CFG_GENOMES_KEY", "CFG_ASSET_PATH_KEY", "CFG_ASSET_DESC_KEY", "CFG_ARCHIVE_KEY", "CFG_ARCHIVE_SIZE_KEY", "CFG_SEEK_KEYS_KEY", "CFG_ASSET_SIZE_KEY", "CFG_CHECKSUM_KEY", "CFG_ARCHIVE_CHECKSUM_KEY", "CFG_VERSION_KEY", "CFG_ASSET_PARENTS_KEY", "CFG_ASSET_CHILDREN_KEY", "CFG_TAG_DESC_KEY", "CFG_ASSET_CHECKSUM_KEY", "CFG_ASSET_TAGS_KEY"] @@ -113,7 +114,7 @@ fasta: hg38.fa.gz fai: hg38.fa.fai chrom_sizes: sizes.txt -""".format(folder=CFG_FOLDER_KEY, server=CFG_SERVER_KEY, version=CFG_VERSION_KEY, assets=CFG_ASSETS_KEY, +""".format(folder=CFG_FOLDER_KEY, server=CFG_SERVERS_KEY, version=CFG_VERSION_KEY, assets=CFG_ASSETS_KEY, archive=CFG_ARCHIVE_KEY, digest=CFG_CHECKSUM_KEY, genomes=CFG_GENOMES_KEY, desc_genome=CFG_GENOME_DESC_KEY, asset_path=CFG_ASSET_PATH_KEY, desc_asset=CFG_ASSET_DESC_KEY, archive_digest=CFG_ARCHIVE_CHECKSUM_KEY, asset_size=CFG_ASSET_SIZE_KEY, archive_size=CFG_ARCHIVE_SIZE_KEY, diff --git a/refgenconf/refgenconf.py b/refgenconf/refgenconf.py index 880b4486..215fd517 100755 --- a/refgenconf/refgenconf.py +++ b/refgenconf/refgenconf.py @@ -96,18 +96,23 @@ def __init__(self, filepath=None, entries=None, writable=False, wait_max=10): raise ConfigNotCompliantError(msg) else: _LOGGER.debug("Config version is compliant: {}".format(version)) + if CFG_SERVERS_KEY not in self and CFG_SERVER_KEY in self: + # backwards compatibility after server config key change + self[CFG_SERVERS_KEY] = self[CFG_SERVER_KEY] + del self[CFG_SERVER_KEY] + _LOGGER.debug("Moved servers list from '{}' to '{}'".format(CFG_SERVER_KEY, CFG_SERVERS_KEY)) try: - if isinstance(self[CFG_SERVER_KEY], list): - tmp_list = [server_url.rstrip("/") for server_url in self[CFG_SERVER_KEY]] - self[CFG_SERVER_KEY] = tmp_list + if isinstance(self[CFG_SERVERS_KEY], list): + tmp_list = [server_url.rstrip("/") for server_url in self[CFG_SERVERS_KEY]] + self[CFG_SERVERS_KEY] = tmp_list else: # Logic in pull_asset expects a list, even for a single server - self[CFG_SERVER_KEY] = self[CFG_SERVER_KEY].rstrip("/") - self[CFG_SERVER_KEY] = [self[CFG_SERVER_KEY]] + self[CFG_SERVERS_KEY] = self[CFG_SERVERS_KEY].rstrip("/") + self[CFG_SERVERS_KEY] = [self[CFG_SERVERS_KEY]] except KeyError: raise MissingConfigDataError(CFG_SERVER_KEY) def __bool__(self): - minkeys = set(self.keys()) == {CFG_SERVER_KEY, CFG_FOLDER_KEY, CFG_GENOMES_KEY} + minkeys = set(self.keys()) == {CFG_SERVERS_KEY, CFG_FOLDER_KEY, CFG_GENOMES_KEY} return not minkeys or bool(self[CFG_GENOMES_KEY]) __nonzero__ = __bool__ @@ -353,7 +358,7 @@ def list_remote(self, genome=None, order=None, get_url=lambda server, id: constr names for sort :return str, str: text reps of remotely available genomes and assets """ - url = get_url(self.genome_server, API_ID_ASSETS) + url = get_url(self[CFG_SERVERS_KEY], API_ID_ASSETS) _LOGGER.info("Querying available assets from server: {}".format(url)) genomes, assets = _list_remote(url, genome, order) return genomes, assets @@ -472,7 +477,7 @@ def pull_asset(self, genome, asset, tag, unpack=True, force=None, :raise refgenconf.UnboundEnvironmentVariablesError: if genome folder path contains any env. var. that's unbound """ - missing_vars = unbound_env_vars(self.genome_folder) + missing_vars = unbound_env_vars(self[CFG_FOLDER_KEY]) if missing_vars: raise UnboundEnvironmentVariablesError(", ".join(missing_vars)) @@ -482,7 +487,7 @@ def raise_unpack_error(): num_servers = 0 bad_servers = [] no_asset_json = [] - for server_url in self.genome_server: + for server_url in self[CFG_SERVERS_KEY]: num_servers += 1 try: determined_tag = _download_json(get_json_url(server_url, API_ID_DEFAULT_TAG).format(genome=genome, asset=asset)) \ @@ -503,7 +508,7 @@ def raise_unpack_error(): _LOGGER.debug("Determined server URL: {}".format(server_url)) except DownloadJsonError: no_asset_json.append(server_url) - if num_servers == len(self.genome_server): + if num_servers == len(self[CFG_SERVERS_KEY]): _LOGGER.error("Asset '{}/{}:{}' not available on any of the following servers: {}". format(genome, asset, determined_tag, ", ".join(no_asset_json))) return gat, None, None @@ -559,7 +564,7 @@ def msg_overwrite(): _download_url_progress(url_archive, filepath, bundle_name, params={"tag": determined_tag}) except HTTPError: _LOGGER.error("Asset archive '{}/{}:{}' is missing on the server: {s}".format(*gat, s=server_url)) - if server_url == self.genome_server[-1]: + if server_url == self[CFG_SERVERS_KEY][-1]: # it this was the last server on the list, return return gat, None, None else: diff --git a/tests/conftest.py b/tests/conftest.py index 2e3f8ca7..ea5601c3 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -120,7 +120,7 @@ def made_genome_config_file(temp_genome_config_file): """ Make the test session's genome config file. """ genome_folder = os.path.dirname(temp_genome_config_file) extra_kv_lines = ["{}: {}".format(CFG_FOLDER_KEY, genome_folder), - "{}: {}".format(CFG_SERVER_KEY, "http://staging.refgenomes.databio.org/"), + "{}: {}".format(CFG_SERVERS_KEY, "http://staging.refgenomes.databio.org/"), "{}: {}".format(CFG_VERSION_KEY, package_version), "{}:".format(CFG_GENOMES_KEY)] gen_data_lines = PathExAttMap(CONF_DATA).get_yaml_lines() diff --git a/tests/data/genomes.yaml b/tests/data/genomes.yaml index 9cf92145..5e6cb98c 100644 --- a/tests/data/genomes.yaml +++ b/tests/data/genomes.yaml @@ -1,6 +1,6 @@ config_version: 0.3 genome_folder: /tmp -genome_server: http://staging.refgenomes.databio.org +genome_servers: http://staging.refgenomes.databio.org genomes: rCRSd: assets: diff --git a/tests/test_config_constructor.py b/tests/test_config_constructor.py index bdeeba8a..a73cf3c9 100644 --- a/tests/test_config_constructor.py +++ b/tests/test_config_constructor.py @@ -4,7 +4,7 @@ import pytest from attmap import PathExAttMap from refgenconf import RefGenConf, MissingConfigDataError, ConfigNotCompliantError -from refgenconf.const import CFG_FOLDER_KEY, CFG_GENOMES_KEY, CFG_SERVER_KEY, \ +from refgenconf.const import CFG_FOLDER_KEY, CFG_GENOMES_KEY, CFG_SERVERS_KEY, \ DEFAULT_SERVER __author__ = "Vince Reuter" @@ -37,16 +37,16 @@ def test_genome_folder_is_value_from_config_file_if_key_present(self, tmpdir_fac fout.write("{}: {}\n".format(CFG_FOLDER_KEY, expected)) else: fout.write(l) - if l.startswith(CFG_SERVER_KEY): + if l.startswith(CFG_SERVERS_KEY): found = True if not found: - fout.write("{}: {}".format(CFG_SERVER_KEY, DEFAULT_SERVER)) + fout.write("{}: {}".format(CFG_SERVERS_KEY, DEFAULT_SERVER)) rgc = RefGenConf(filepath=conf_file) assert expected != os.path.dirname(conf_file) assert expected == rgc[CFG_FOLDER_KEY] def test_empty_rgc_is_false(self): - assert bool(RefGenConf(entries={CFG_SERVER_KEY: DEFAULT_SERVER})) is False + assert bool(RefGenConf(entries={CFG_SERVERS_KEY: DEFAULT_SERVER})) is False def test_nonempty_rgc_is_true(self, rgc): assert bool(rgc) is True @@ -56,7 +56,7 @@ def test_illegal_genomes_mapping_type_gets_converted_to_empty_mapping(self, geno rgc = RefGenConf(entries={ CFG_FOLDER_KEY: tmpdir.strpath, CFG_GENOMES_KEY: genomes, - CFG_SERVER_KEY: DEFAULT_SERVER + CFG_SERVERS_KEY: DEFAULT_SERVER }) res = rgc[CFG_GENOMES_KEY] assert isinstance(res, PathExAttMap) diff --git a/tests/test_list_remote.py b/tests/test_list_remote.py index 2078ad2b..41be2e53 100644 --- a/tests/test_list_remote.py +++ b/tests/test_list_remote.py @@ -2,7 +2,7 @@ import mock from refgenconf import RefGenConf, CFG_FOLDER_KEY, CFG_GENOMES_KEY, \ - CFG_SERVER_KEY, DEFAULT_SERVER + CFG_SERVERS_KEY, DEFAULT_SERVER __author__ = "Vince Reuter" __email__ = "vreuter@virginia.edu" @@ -11,9 +11,9 @@ def test_list_remote(rgc, tmpdir): """ Verify expected behavior of remote genome/asset listing. """ new_rgc = RefGenConf(entries={CFG_FOLDER_KEY: tmpdir.strpath, - CFG_SERVER_KEY: DEFAULT_SERVER, + CFG_SERVERS_KEY: DEFAULT_SERVER, CFG_GENOMES_KEY: rgc[CFG_GENOMES_KEY]}) - new_rgc.genome_server = "http://staging.refgenomes.databio.org" + new_rgc[CFG_SERVERS_KEY] = "http://staging.refgenomes.databio.org" print("NEW RGC KEYS: {}".format(list(new_rgc.keys()))) with mock.patch("refgenconf.refgenconf._read_remote_data", return_value=rgc.genomes): From 7682e8e4d78b00a48216e4f8f5f47b08ccde8fe8 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Tue, 29 Oct 2019 11:11:05 -0400 Subject: [PATCH 15/16] changelog --- docs/changelog.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/changelog.md b/docs/changelog.md index 988e7890..c819c818 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -5,6 +5,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm ## [0.5.3] - unreleased ### Changed +- `genome_server` config key to `genome_servers` - enable multiple refgenieservers for `pull_asset` method - `_chk_digest_if_avail` and `_chk_digest_update_child` require server URL argument to get the asset information from From a1e44445ca8b13844ddad20145b2d5fc6b29a2d4 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Tue, 29 Oct 2019 16:37:26 -0400 Subject: [PATCH 16/16] release date --- docs/changelog.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/changelog.md b/docs/changelog.md index c819c818..51715b78 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -2,7 +2,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format. -## [0.5.3] - unreleased +## [0.5.3] - 2019-10-29 ### Changed - `genome_server` config key to `genome_servers`