Skip to content

Commit

Permalink
Limit max number of items in dataproviders
Browse files Browse the repository at this point in the history
that support pagination to 10000. Clients can always request more data via offset
requests, but since this is not a streaming API we need to build the
response in memory before we can start sending.
  • Loading branch information
mvdbeek committed Sep 24, 2024
1 parent 2a7954c commit c0e3ed9
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 12 deletions.
8 changes: 6 additions & 2 deletions client/src/api/schema/schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ export interface paths {
"/api/datasets/{dataset_id}": {
/**
* Displays information about and/or content of a dataset.
* @description **Note**: Due to the multipurpose nature of this endpoint, which can receive a wild variety of parameters
* @description **Note**: Due to the multipurpose nature of this endpoint, which can receive a wide variety of parameters
* and return different kinds of responses, the documentation here will be limited.
* To get more information please check the source code.
*/
Expand Down Expand Up @@ -14366,18 +14366,22 @@ export interface operations {
show_api_datasets__dataset_id__get: {
/**
* Displays information about and/or content of a dataset.
* @description **Note**: Due to the multipurpose nature of this endpoint, which can receive a wild variety of parameters
* @description **Note**: Due to the multipurpose nature of this endpoint, which can receive a wide variety of parameters
* and return different kinds of responses, the documentation here will be limited.
* To get more information please check the source code.
*/
parameters: {
/** @description The type of information about the dataset to be requested. */
/** @description The type of information about the dataset to be requested. Each of these values may require additional parameters in the request and may return different responses. */
/** @description Maximum number of items to return. Currently only applies to `data_type=raw_data` requests */
/** @description Starts at the beginning skip the first ( offset - 1 ) items and begin returning at the Nth item. Currently only applies to `data_type=raw_data` requests */
/** @description View to be passed to the serializer */
/** @description Comma-separated list of keys to be passed to the serializer */
query?: {
hda_ldda?: components["schemas"]["DatasetSourceType"];
data_type?: components["schemas"]["RequestDataType"] | null;
limit?: number | null;
offset?: number | null;
view?: string | null;
keys?: string | null;
};
Expand Down
15 changes: 8 additions & 7 deletions lib/galaxy/datatypes/dataproviders/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from collections import deque
from typing import Dict

from galaxy.exceptions import RequestParameterInvalidException
from . import exceptions

log = logging.getLogger(__name__)
Expand All @@ -36,6 +37,7 @@ def stop( self ): self.endpoint = source.tell(); raise StopIteration()
Building a giant list by sweeping all possible dprov classes doesn't make sense
For now - I'm burying them in the class __init__s - but I don't like that
"""
MAX_LIMIT = 10000


# ----------------------------------------------------------------------------- base classes
Expand Down Expand Up @@ -233,21 +235,20 @@ class LimitedOffsetDataProvider(FilteredDataProvider):
settings = {"limit": "int", "offset": "int"}

# TODO: may want to squash this into DataProvider
def __init__(self, source, offset=0, limit=None, **kwargs):
def __init__(self, source, offset=0, limit=MAX_LIMIT, **kwargs):
"""
:param offset: the number of data to skip before providing.
:param limit: the final number of data to provide.
"""
super().__init__(source, **kwargs)

# how many valid data to skip before we start outputing data - must be positive
# (diff to support neg. indeces - must be pos.)
self.offset = max(offset, 0)
# how many valid data to skip before we start outputting data - must be positive
self.offset = offset

# how many valid data to return - must be positive (None indicates no limit)
# how many valid data to return - must be positive
if limit is None:
limit = MAX_LIMIT
self.limit = limit
if self.limit is not None:
self.limit = max(self.limit, 0)

def __iter__(self):
"""
Expand Down
27 changes: 24 additions & 3 deletions lib/galaxy/webapps/galaxy/api/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,10 @@
RequestDataType,
UpdateObjectStoreIdPayload,
)
from .common import (
LimitQueryParam,
OffsetQueryParam,
)

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -432,18 +436,35 @@ def show(
"may return different responses."
),
),
limit: Annotated[
Optional[int],
Query(
ge=1,
le=10000,
description="Maximum number of items to return. Currently only applies to `data_type=raw_data` requests",
),
] = None,
offset: Annotated[
Optional[int],
Query(
ge=0,
description="Starts at the beginning skip the first ( offset - 1 ) items and begin returning at the Nth item. Currently only applies to `data_type=raw_data` requests",
),
] = 0,
serialization_params: SerializationParams = Depends(query_serialization_params),
):
"""
**Note**: Due to the multipurpose nature of this endpoint, which can receive a wild variety of parameters
**Note**: Due to the multipurpose nature of this endpoint, which can receive a wide variety of parameters
and return different kinds of responses, the documentation here will be limited.
To get more information please check the source code.
"""
exclude_params = {"hda_ldda", "data_type"}
exclude_params = {"hda_ldda", "data_type", "limit", "offset"}
exclude_params.update(SerializationParams.model_fields.keys())
extra_params = get_query_parameters_from_request_excluding(request, exclude_params)

return self.service.show(trans, dataset_id, hda_ldda, serialization_params, data_type, **extra_params)
return self.service.show(
trans, dataset_id, hda_ldda, serialization_params, data_type, limit=limit, offset=offset, **extra_params
)

@router.get(
"/api/datasets/{dataset_id}/content/{content_type}",
Expand Down

0 comments on commit c0e3ed9

Please sign in to comment.