diff --git a/packages/openneuro-app/src/scripts/datalad/mutations/delete.jsx b/packages/openneuro-app/src/scripts/datalad/mutations/delete.jsx index 3e04ace74..fb616b3c2 100644 --- a/packages/openneuro-app/src/scripts/datalad/mutations/delete.jsx +++ b/packages/openneuro-app/src/scripts/datalad/mutations/delete.jsx @@ -32,7 +32,7 @@ const DeleteDataset = ({ datasetId, metadata }) => { }, }) window.location.replace( - `${window.location.origin}/dashboard/datasets`, + `${window.location.origin}/search`, ) })} > diff --git a/packages/openneuro-app/src/scripts/dataset/mutations/delete.jsx b/packages/openneuro-app/src/scripts/dataset/mutations/delete.jsx index dcd93335b..bda5066e3 100644 --- a/packages/openneuro-app/src/scripts/dataset/mutations/delete.jsx +++ b/packages/openneuro-app/src/scripts/dataset/mutations/delete.jsx @@ -32,7 +32,7 @@ const DeleteDataset = ({ datasetId, metadata }) => { }, }) window.location.replace( - `${window.location.origin}/dashboard/datasets`, + `${window.location.origin}/search`, ) })} > diff --git a/services/datalad/datalad_service/handlers/dataset.py b/services/datalad/datalad_service/handlers/dataset.py index 247b9a4fd..4202938fd 100644 --- a/services/datalad/datalad_service/handlers/dataset.py +++ b/services/datalad/datalad_service/handlers/dataset.py @@ -1,6 +1,6 @@ import asyncio -import os +import aiofiles.os import falcon import pygit2 @@ -19,7 +19,7 @@ def __init__(self, store): async def on_get(self, req, resp, dataset): ds_path = self.store.get_dataset_path(dataset) - if (os.path.isdir(ds_path)): + if await aiofiles.os.path.isdir(ds_path): dataset_description = { 'accession_number': dataset, } @@ -32,7 +32,7 @@ async def on_get(self, req, resp, dataset): async def on_post(self, req, resp, dataset): ds_path = self.store.get_dataset_path(dataset) - if (os.path.isdir(ds_path)): + if await aiofiles.os.path.isdir(ds_path): resp.media = {'error': 'dataset already exists'} resp.status = falcon.HTTP_CONFLICT else: @@ -48,15 +48,12 @@ async def on_post(self, req, resp, dataset): async def on_delete(self, req, resp, dataset): dataset_path = self.store.get_dataset_path(dataset) - async def async_delete(): - await delete_siblings(dataset) - await delete_dataset(dataset_path) - try: - # Don't block before responding - asyncio.run_task(async_delete()) + if await aiofiles.os.path.exists(dataset_path): + await asyncio.gather(delete_siblings(dataset), delete_dataset(dataset_path)) + resp.media = {} resp.status = falcon.HTTP_OK - except: - resp.media = {'error': 'dataset not found'} + else: + resp.media = {'error': 'dataset does not exist'} resp.status = falcon.HTTP_NOT_FOUND diff --git a/services/datalad/datalad_service/tasks/publish.py b/services/datalad/datalad_service/tasks/publish.py index 61ae8f43e..2af4fc943 100644 --- a/services/datalad/datalad_service/tasks/publish.py +++ b/services/datalad/datalad_service/tasks/publish.py @@ -2,6 +2,7 @@ import logging import os.path import re +from concurrent.futures import ProcessPoolExecutor import pygit2 import boto3 @@ -24,6 +25,9 @@ logger = logging.getLogger('datalad_service.' + __name__) +delete_executor = ProcessPoolExecutor(4) + + def github_sibling(dataset_path, dataset_id): """ Find a GitHub remote or create a new repo and configure the remote. @@ -110,7 +114,13 @@ def check_remote_has_version(dataset_path, remote, tag): return remote_id_A == remote_id_B and tree_id_A == tree_id_B -async def delete_s3_sibling(dataset_id): +def delete_s3_sibling(dataset_id): + """Run S3 sibling deletion in another process to avoid blocking any callers""" + delete_executor.submit(delete_s3_sibling_executor, dataset_id) + + +def delete_s3_sibling_executor(dataset_id): + """Delete all versions of a dataset from S3.""" try: client = boto3.client( 's3', @@ -124,8 +134,6 @@ async def delete_s3_sibling(dataset_id): versions.extend(response.get('DeleteMarkers', [])) object_delete_list.extend( [{'VersionId': version['VersionId'], 'Key': version['Key']} for version in versions]) - # Yield after each request - await asyncio.sleep(0) for i in range(0, len(object_delete_list), 1000): client.delete_objects( Bucket=get_s3_bucket(), @@ -134,8 +142,6 @@ async def delete_s3_sibling(dataset_id): 'Quiet': True } ) - # Yield after each request - await asyncio.sleep(0) except Exception as e: raise Exception( f'Attempt to delete dataset {dataset_id} from {get_s3_remote()} has failed. ({e})') @@ -156,15 +162,8 @@ async def delete_github_sibling(dataset_id): async def delete_siblings(dataset_id): - try: - await delete_s3_sibling(dataset_id) - except: - pass - await asyncio.sleep(0) - try: - await delete_github_sibling(dataset_id) - except: - pass + delete_s3_sibling(dataset_id) + await delete_github_sibling(dataset_id) def monitor_remote_configs(dataset_path):