Skip to content

Commit

Permalink
Use CTE instead of temporary table in get_collection_stats
Browse files Browse the repository at this point in the history
Creation of temporary tables are not allow in read-only db instances.
We support the use of read-only db instances
  • Loading branch information
candleindark committed May 9, 2024
1 parent 3e837ac commit 9b46f36
Showing 1 changed file with 5 additions and 9 deletions.
14 changes: 5 additions & 9 deletions datalad_registry/blueprints/api/dataset_urls/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,23 +218,19 @@ def get_collection_stats(select_stmt: Select) -> CollectionStats:
Note: The execution of this function requires the Flask app's context
"""

# Cache the result of the select statement to a temporary table
tmp_tb = cache_result_to_tmp_tb(select_stmt, "tmp_tb")
base_cte = select_stmt.cte("base_cte")

# base_q = select_stmt.subquery("base_q")
base_q = select(tmp_tb).subquery("base_q")

datalad_ds_stats = get_dl_ds_collection_stats(base_q)
datalad_ds_stats = get_dl_ds_collection_stats(base_cte)

# Total number of datasets, as individual repos, without any deduplication
ds_count = db.session.execute(
select(func.count().label("ds_count")).select_from(base_q)
select(func.count().label("ds_count")).select_from(base_cte)
).scalar_one()

return CollectionStats(
datalad_ds_stats=datalad_ds_stats,
pure_annex_ds_stats=get_pure_annex_ds_collection_stats(base_q),
non_annex_ds_stats=get_non_annex_ds_collection_stats(base_q),
pure_annex_ds_stats=get_pure_annex_ds_collection_stats(base_cte),
non_annex_ds_stats=get_non_annex_ds_collection_stats(base_cte),
summary=StatsSummary(
unique_ds_count=datalad_ds_stats.unique_ds_stats.ds_count, ds_count=ds_count
),
Expand Down

0 comments on commit 9b46f36

Please sign in to comment.