Skip to content

Commit

Permalink
Issue #115 CrossBackendSplitter: internalize backend_for_collection c…
Browse files Browse the repository at this point in the history
…aching
  • Loading branch information
soxofaan committed Sep 4, 2023
1 parent bba6215 commit b4fc95a
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 10 deletions.
1 change: 0 additions & 1 deletion scripts/crossbackend-processing-poc.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@ def main():
with TimingLogger(title=f"Connecting to {backend_url}", logger=_log):
connection = openeo.connect(url=backend_url).authenticate_oidc()

@functools.lru_cache(maxsize=100)
def backend_for_collection(collection_id) -> str:
metadata = connection.describe_collection(collection_id)
return metadata["summaries"][STAC_PROPERTY_FEDERATION_BACKENDS][0]
Expand Down
18 changes: 9 additions & 9 deletions src/openeo_aggregator/partitionedjobs/crossbackend.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,14 +48,14 @@ def split(
process_graph = process["process_graph"]

# Extract necessary back-ends from `load_collection` usage
backend_usage = collections.Counter(
self.backend_for_collection(node["arguments"]["id"])
for node in process_graph.values()
if node["process_id"] == "load_collection"
)
_log.info(
f"Extracted backend usage from `load_collection` nodes: {backend_usage}"
)
backend_per_collection: Dict[str, str] = {
cid: self.backend_for_collection(cid)
for cid in (
node["arguments"]["id"] for node in process_graph.values() if node["process_id"] == "load_collection"
)
}
backend_usage = collections.Counter(backend_per_collection.values())
_log.info(f"Extracted backend usage from `load_collection` nodes: {backend_usage=} {backend_per_collection=}")

primary_backend = backend_usage.most_common(1)[0][0] if backend_usage else None
secondary_backends = {b for b in backend_usage if b != primary_backend}
Expand All @@ -70,7 +70,7 @@ def split(

for node_id, node in process_graph.items():
if node["process_id"] == "load_collection":
bid = self.backend_for_collection(node["arguments"]["id"])
bid = backend_per_collection[node["arguments"]["id"]]
if bid == primary_backend and not (
self._always_split and primary_has_load_collection
):
Expand Down

0 comments on commit b4fc95a

Please sign in to comment.