Skip to content

Commit

Permalink
Small improvements: stricter file upload validation, credentials cast…
Browse files Browse the repository at this point in the history
…ed as secrets, ...
  • Loading branch information
Etienne Jodry authored and Etienne Jodry committed Nov 5, 2024
1 parent dc652eb commit 2bc530e
Show file tree
Hide file tree
Showing 13 changed files with 174 additions and 65 deletions.
18 changes: 16 additions & 2 deletions docs/developer_manual/s3conf.rst
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,20 @@ can be done by overloading ``key_salt`` with a ``hybrid_property`` in the follow
session = self.__dict__.pop('session')
# Use session to fetch what you need.
await session.refresh(self, ['dataset'])
await session.refresh(self.dataset, ['project'])
# Build your custom prefix.
return f"{self.dataset.project.name}_{self.dataset.name}"
return f"{self.dataset.name}"
__table_args__ = (
UniqueConstraint(
"filename",
"extension",
"dataset_id",
name="uc_file_in_dataset"
),
)
.. note::

The ``uuid4`` prefix guarantees us key uniqueness in all probable cases.
In case you are replacing it by one of your own like above, you should
also add a unique constraint on the table for that purpose.
5 changes: 4 additions & 1 deletion src/biodm/components/controllers/s3controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def routes(self, **_) -> List[Mount | Route] | List[Mount] | List[BaseRoute]:
Route(f'{prefix}complete_multipart', self.complete_multipart, methods=[HttpMethod.PUT]),
PublicRoute(f'{prefix}post_success', self.post_success, methods=[HttpMethod.GET]),
]
self.post_upload_callback = Path(file_routes[-1].path)
self.svc.post_upload_callback = Path(file_routes[-1].path)

return file_routes + super().routes()

Expand Down Expand Up @@ -94,6 +94,9 @@ async def post_success(self, request: Request):
"""
await self.svc.post_success(
pk_val=self._extract_pk_val(request),
bucket=request.query_params.get('bucket', ''),
key=request.query_params.get('key', ''),
etag=request.query_params.get('etag', '').strip('"'),
)

return json_response("Uploaded.", status_code=201)
Expand Down
4 changes: 2 additions & 2 deletions src/biodm/components/services/dbservice.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from biodm.component import ApiService
from biodm.components import Base
from biodm.exceptions import (
DataError, EndpointError, FailedRead, FailedDelete, ReleaseVersionError, UpdateVersionedError, UnauthorizedError
DataError, EndpointError, FailedCreate, FailedRead, FailedDelete, ReleaseVersionError, UpdateVersionedError, UnauthorizedError
)
from biodm.managers import DatabaseManager
from biodm.tables import ListGroup, Group
Expand Down Expand Up @@ -59,7 +59,7 @@ async def _insert(
raise UpdateVersionedError(
"Attempt at updating versioned resources."
)
raise
raise FailedCreate(str(ie))

@DatabaseManager.in_session
async def _insert_list(
Expand Down
81 changes: 62 additions & 19 deletions src/biodm/components/services/s3service.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
from asyncio import iscoroutine
from math import ceil
from pathlib import Path
from typing import List, Any, Sequence, Dict

from sqlalchemy import Insert
from sqlalchemy.ext.asyncio import AsyncSession

from biodm.components.table import Base, S3File
from biodm.exceptions import FileNotUploadedError, FileTooLargeError
from biodm.exceptions import FileNotUploadedError, FileTooLargeError, FileUploadSuccessError
from biodm.managers import DatabaseManager, S3Manager
from biodm.tables import Upload, UploadPart
from biodm.utils.utils import utcnow, classproperty
from biodm.utils.utils import utcnow, classproperty, check_hash
from biodm.utils.security import UserInfo
from .dbservice import CompositeEntityService

Expand All @@ -19,6 +20,8 @@

class S3Service(CompositeEntityService):
"""Class for automatic management of S3 bucket transactions for file resources."""
post_upload_callback: Path # set by S3Controller.

@classproperty
def s3(cls) -> S3Manager:
return cls.app.s3
Expand All @@ -29,21 +32,34 @@ def post_callback(self, item) -> str:
for key in self.table.pk
}

route = str(self.table.ctrl.post_upload_callback) # TODO: svc argument ?
route = str(self.post_upload_callback)
for key, val in mapping.items():
route = route.replace("{" + f"{key}" + "}", str(val))

srv = self.app.server_endpoint.strip('/')
return f"{srv}{route}"

async def gen_key(self, item, session: AsyncSession):
"""Generate a unique bucket key from file elements."""
await session.refresh(item, ['filename', 'extension'])
version = ""
if self.table.is_versioned:
await session.refresh(item, ['version'])
version = "_v" + str(item.version)
@property
def key_fields(self) -> List[str]:
"""List of fields used to compute the key."""
return ['filename', 'extension'] + (['version'] if self.table.is_versioned else [])

async def gen_key(self, item: S3File, session: AsyncSession, refresh=True) -> str:
"""Generate a unique bucket key from file elements.
:param item: file
:type item: S3File
:param session: current sqla session
:type session: AsyncSession
:param refresh: if key fields should be fetched - can save a db request, defaults to True
:type refresh: bool, optional
:return: unique s3 bucket file key
:rtype: str
"""
if refresh:
await session.refresh(item, self.key_fields)

version = "_v" + str(item.version) if self.table.is_versioned else ""
key_salt = await getattr(item.awaitable_attrs, 'key_salt')
if iscoroutine(key_salt):
item.__dict__['session'] = session
Expand All @@ -69,7 +85,7 @@ async def gen_upload_form(self, file: S3File, session: AsyncSession):
await session.flush()
parts = await getattr(file.upload.awaitable_attrs, 'parts')

key = await self.gen_key(file, session=session)
key = await self.gen_key(file, session=session, refresh=False)
n_chunks = ceil(file.size / CHUNK_SIZE)

if n_chunks > 1:
Expand Down Expand Up @@ -127,8 +143,31 @@ async def _insert_list(
return files

@DatabaseManager.in_session
async def post_success(self, pk_val: List[Any], session: AsyncSession):
file = await self.read(pk_val, fields=['ready', 'upload'], session=session)
async def post_success(
self,
pk_val: List[Any],
bucket: str,
key: str,
etag: str,
session: AsyncSession
):
file = await self.read(
pk_val,
fields=['ready', 'upload'] + self.key_fields,
session=session
)

# Weak check. TODO: [prio not urgent] can be improved ?
indb_key = await self.gen_key(file, session=session, refresh=False)
if (
not check_hash(etag) or
bucket != self.s3.bucket_name or
indb_key != key
):
raise FileUploadSuccessError(
"Critical: Manual attempt at validating file detected !"
)

file.validated_at = utcnow()
file.ready = True
file.upload_id, file.upload = None, None
Expand All @@ -141,10 +180,14 @@ async def complete_multipart(
session: AsyncSession
):
# parts should take the form of [{'PartNumber': part_number, 'ETag': etag}, ...]
file = await self.read(pk_val, fields=['ready', 'upload'], session=session)
file = await self.read(
pk_val,
fields=['ready', 'upload'] + self.key_fields,
session=session
)
upload = await getattr(file.awaitable_attrs, 'upload')
complete = self.s3.complete_multipart_upload(
object_name=await self.gen_key(file, session=session),
object_name=await self.gen_key(file, session=session, refresh=False),
upload_id=upload.s3_uploadId,
parts=parts
)
Expand Down Expand Up @@ -176,7 +219,7 @@ async def download(
:rtype: str
"""
# File management.
fields = ['filename', 'extension', 'dl_count', 'ready']
fields = ['dl_count', 'ready'] + self.key_fields
# Also fetch foreign keys, as some may be necessary for permission check.
fields += list(c.name for c in self.table.__table__.columns if c.foreign_keys)
file = await self.read(pk_val, fields=fields, session=session)
Expand All @@ -188,23 +231,23 @@ async def download(
if not file.ready:
raise FileNotUploadedError("File exists but has not been uploaded yet.")

url = self.s3.create_presigned_download_url(await self.gen_key(file, session=session))
url = self.s3.create_presigned_download_url(
await self.gen_key(file, session=session, refresh=False)
)
file.dl_count += 1
return url

@DatabaseManager.in_session
async def release(
self,
pk_val: List[Any],
fields: List[str],
update: Dict[str, Any],
session: AsyncSession,
user_info: UserInfo | None = None,
) -> Base:
# Bumps version.
file = await super().release(
pk_val=pk_val,
fields=fields,
update=update,
session=session,
user_info=user_info
Expand Down
47 changes: 24 additions & 23 deletions src/biodm/config.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from starlette.config import Config
from starlette.datastructures import Secret
from databases import DatabaseURL

try:
Expand Down Expand Up @@ -27,33 +28,33 @@
CACHE_MAX_AGE = config('CACHE_MAX_AGE', cast=int, default=600)

# DB.
DATABASE_URL = config("DATABASE_URL", cast=str, default="sqlite:///:memory:")
DATABASE_URL = config("DATABASE_URL", cast=DatabaseURL, default="sqlite:///:memory:")

# S3 Bucket.
S3_ENDPOINT_URL = config('S3_ENDPOINT_URL', cast=str, default=None)
S3_BUCKET_NAME = config('S3_BUCKET_NAME', cast=str, default=None)
S3_ACCESS_KEY_ID = config('S3_ACCESS_KEY_ID', cast=str, default=None)
S3_SECRET_ACCESS_KEY = config('S3_SECRET_ACCESS_KEY', cast=str, default=None)
S3_URL_EXPIRATION = config('S3_URL_EXPIRATION', cast=int, default=3600)
S3_PENDING_EXPIRATION = config('S3_PENDING_EXPIRATION', cast=int, default=3600 * 24)
S3_REGION_NAME = config('S3_REGION_NAME', cast=str, default="us-east-1")
S3_FILE_SIZE_LIMIT = config('S3_FILE_SIZE_LIMIT', cast=int, default=100)
S3_ENDPOINT_URL = config('S3_ENDPOINT_URL', cast=str, default=None)
S3_BUCKET_NAME = config('S3_BUCKET_NAME', cast=str, default=None)
S3_ACCESS_KEY_ID = config('S3_ACCESS_KEY_ID', cast=Secret, default=None)
S3_SECRET_ACCESS_KEY = config('S3_SECRET_ACCESS_KEY', cast=Secret, default=None)
S3_URL_EXPIRATION = config('S3_URL_EXPIRATION', cast=int, default=3600)
S3_PENDING_EXPIRATION = config('S3_PENDING_EXPIRATION', cast=int, default=3600 * 24)
S3_REGION_NAME = config('S3_REGION_NAME', cast=str, default="us-east-1")
S3_FILE_SIZE_LIMIT = config('S3_FILE_SIZE_LIMIT', cast=int, default=100)

# Keycloak.
KC_HOST = config("KC_HOST", cast=str, default=None)
KC_REALM = config("KC_REALM", cast=str, default=None)
KC_PUBLIC_KEY = config("KC_PUBLIC_KEY", cast=str, default=None)
KC_ADMIN = config("KC_ADMIN", cast=str, default=None)
KC_ADMIN_PASSWORD = config("KC_ADMIN_PASSWORD", cast=str, default=None)
KC_CLIENT_ID = config("KC_CLIENT_ID", cast=str, default=None)
KC_CLIENT_SECRET = config("KC_CLIENT_SECRET", cast=str, default=None)
KC_JWT_OPTIONS = config("KC_JWT_OPTIONS", cast=dict, default={'verify_exp': False,
KC_HOST = config("KC_HOST", cast=str, default=None)
KC_REALM = config("KC_REALM", cast=str, default=None)
KC_PUBLIC_KEY = config("KC_PUBLIC_KEY", cast=str, default=None)
KC_ADMIN = config("KC_ADMIN", cast=str, default=None)
KC_ADMIN_PASSWORD = config("KC_ADMIN_PASSWORD", cast=Secret, default=None)
KC_CLIENT_ID = config("KC_CLIENT_ID", cast=str, default=None)
KC_CLIENT_SECRET = config("KC_CLIENT_SECRET", cast=Secret, default=None)
KC_JWT_OPTIONS = config("KC_JWT_OPTIONS", cast=dict, default={'verify_exp': False,
'verify_aud': False})

# Kubernetes.
K8_IP = config("K8_IP", cast=str, default=None)
K8_PORT = config("K8_PORT", cast=str, default="8443")
K8_HOST = config("K8_HOST", cast=str, default=None)
K8_CERT = config("K8_CERT", cast=str, default=None)
K8_TOKEN = config("K8_TOKEN", cast=str, default=None)
K8_NAMESPACE = config("K8_NAMESPACE", cast=str, default="default")
K8_IP = config("K8_IP", cast=str, default=None)
K8_PORT = config("K8_PORT", cast=str, default="8443")
K8_HOST = config("K8_HOST", cast=str, default=None)
K8_CERT = config("K8_CERT", cast=str, default=None)
K8_TOKEN = config("K8_TOKEN", cast=Secret, default=None)
K8_NAMESPACE = config("K8_NAMESPACE", cast=str, default="default")
3 changes: 2 additions & 1 deletion src/biodm/error.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from .exceptions import (
EndpointError,
FailedUpdate,
FileUploadSuccessError,
PayloadJSONDecodingError,
RequestError,
FailedDelete,
Expand Down Expand Up @@ -50,7 +51,7 @@ async def onerror(_, exc):
)

match exc:
case FileTooLargeError():
case FileTooLargeError() | FileUploadSuccessError():
status = 400
case DataError() | EndpointError() | PayloadJSONDecodingError():
status = 400
Expand Down
4 changes: 4 additions & 0 deletions src/biodm/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,10 @@ class FileTooLargeError(RequestError):
"""Raised when trying to create a too large file."""


class FileUploadSuccessError(RequestError):
"""Raised when file upload success route is visted with wrong information."""


class DataError(RequestError):
"""Raised when input data is incorrect."""

Expand Down
2 changes: 1 addition & 1 deletion src/biodm/managers/k8smanager.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def __init__(

def authenticate(self, token, host, cert):
"""Set configuration with the credentials and certificate"""
self._config.api_key["authorization"] = token
self._config.api_key["authorization"] = str(token)
self._config.api_key_prefix['authorization'] = 'Bearer'
self._config.host = host
self._config.ssl_ca_cert = cert
Expand Down
9 changes: 5 additions & 4 deletions src/biodm/managers/kcmanager.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from keycloak.openid_connection import KeycloakOpenIDConnection
from keycloak.keycloak_openid import KeycloakOpenID
from keycloak.exceptions import KeycloakError, KeycloakDeleteError, KeycloakGetError
from starlette.datastructures import Secret

from biodm.component import ApiManager
from biodm.exceptions import (
Expand All @@ -26,9 +27,9 @@ def __init__(
realm: str,
public_key: str,
admin: str,
admin_password: str,
admin_password: Secret,
client_id: str,
client_secret: str,
client_secret: Secret,
jwt_options: dict
) -> None:
super().__init__(app=app)
Expand All @@ -44,14 +45,14 @@ def __init__(
user_realm_name="master",
realm_name=realm,
username=admin,
password=admin_password,
password=str(admin_password),
verify=True,
)
self._openid = KeycloakOpenID(
server_url=host,
realm_name=realm,
client_id=client_id,
client_secret_key=client_secret,
client_secret_key=str(client_secret),
)
except KeycloakError as e:
raise KeycloakUnavailableError(
Expand Down
9 changes: 5 additions & 4 deletions src/biodm/managers/s3manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from boto3 import client
from botocore.config import Config
from botocore.exceptions import ClientError
from starlette.datastructures import Secret

from biodm.component import ApiManager
from biodm.utils.utils import utcnow
Expand All @@ -19,8 +20,8 @@ def __init__(
app: Api,
endpoint_url: str,
bucket_name: str,
access_key_id: str,
secret_access_key: str,
access_key_id: Secret,
secret_access_key: Secret,
url_expiration: int,
pending_expiration: int,
region_name: str,
Expand All @@ -36,8 +37,8 @@ def __init__(
self.s3_client = client(
's3',
endpoint_url=endpoint_url,
aws_access_key_id=access_key_id,
aws_secret_access_key=secret_access_key,
aws_access_key_id=str(access_key_id),
aws_secret_access_key=str(secret_access_key),
region_name=self.region_name,
config=Config(
signature_version='s3'
Expand Down
Loading

0 comments on commit 2bc530e

Please sign in to comment.