From a25127e992f3da8e225724f1dc45ad177cc58585 Mon Sep 17 00:00:00 2001 From: Andreas Motl Date: Fri, 13 Sep 2024 12:09:43 +0200 Subject: [PATCH] Model: Improve implementation of `AddressPair.navigate()` - Do not use the fundamental `.navigate()` method, as it needs too many workarounds. - Do not store and copy query parameters, because the implementation does not use `.navigate()` any longer. - Manipulate the `.path` property directly instead, computing it using the canonical `urljoin` function. - Adjustments about missing trailing slashes still need to take place. --- cratedb_toolkit/model.py | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/cratedb_toolkit/model.py b/cratedb_toolkit/model.py index e90d71b..1fe3a91 100644 --- a/cratedb_toolkit/model.py +++ b/cratedb_toolkit/model.py @@ -1,7 +1,7 @@ import dataclasses import typing as t from copy import deepcopy -from pathlib import Path +from urllib.parse import urljoin from attr import Factory from attrs import define @@ -141,26 +141,21 @@ class AddressPair: __SERVER_SCHEMES__ = ["http", "https", "mongodb", "mongodb+srv"] def navigate(self, source_path: str, target_path: str) -> "AddressPair": - source_url_query_parameters = self.source_url.query_params - target_url_query_parameters = self.target_url.query_params - source_url = deepcopy(self.source_url) target_url = deepcopy(self.target_url) # Q: What the hack? - # A: It makes subsequent `.navigate()` operations work. - if ( - source_url.scheme in self.__SERVER_SCHEMES__ - and Path(source_url.path).is_absolute() - and source_url.path[-1] != "/" - ): + # A: Adjustments about missing trailing slashes, business as usual. + # It makes subsequent `.navigate()` operations work. + # Remark: It is not applicable for filesystem paths including wildcards, + # like `./datasets/*.ndjson`. In this case, `.navigate()` should + # strip the `*.ndjson` part, and replace it by the designated label. + if source_url.scheme in self.__SERVER_SCHEMES__ and source_url.path[-1] != "/": source_url.path += "/" if target_url.path[-1] != "/": target_url.path += "/" - source_url = source_url.navigate(f"./{source_path}") - source_url.query_params = source_url_query_parameters - target_url = target_url.navigate(f"./{target_path}") - target_url.query_params = target_url_query_parameters + source_url.path = urljoin(source_url.path, source_path) + target_url.path = urljoin(target_url.path, target_path) return AddressPair(source_url, target_url)