Skip to content

Commit

Permalink
Merge pull request #3130 from OpenNeuroOrg/3128-delete-fixes
Browse files Browse the repository at this point in the history
Worker fixes for deletion
  • Loading branch information
nellh authored Aug 28, 2024
2 parents b33cd43 + 021b19d commit 185a08b
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 27 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ const DeleteDataset = ({ datasetId, metadata }) => {
},
})
window.location.replace(
`${window.location.origin}/dashboard/datasets`,
`${window.location.origin}/search`,
)
})}
>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ const DeleteDataset = ({ datasetId, metadata }) => {
},
})
window.location.replace(
`${window.location.origin}/dashboard/datasets`,
`${window.location.origin}/search`,
)
})}
>
Expand Down
19 changes: 8 additions & 11 deletions services/datalad/datalad_service/handlers/dataset.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import asyncio
import os

import aiofiles.os
import falcon
import pygit2

Expand All @@ -19,7 +19,7 @@ def __init__(self, store):

async def on_get(self, req, resp, dataset):
ds_path = self.store.get_dataset_path(dataset)
if (os.path.isdir(ds_path)):
if await aiofiles.os.path.isdir(ds_path):
dataset_description = {
'accession_number': dataset,
}
Expand All @@ -32,7 +32,7 @@ async def on_get(self, req, resp, dataset):

async def on_post(self, req, resp, dataset):
ds_path = self.store.get_dataset_path(dataset)
if (os.path.isdir(ds_path)):
if await aiofiles.os.path.isdir(ds_path):
resp.media = {'error': 'dataset already exists'}
resp.status = falcon.HTTP_CONFLICT
else:
Expand All @@ -48,15 +48,12 @@ async def on_post(self, req, resp, dataset):

async def on_delete(self, req, resp, dataset):
dataset_path = self.store.get_dataset_path(dataset)
async def async_delete():
await delete_siblings(dataset)
await delete_dataset(dataset_path)

try:
# Don't block before responding
asyncio.run_task(async_delete())
if await aiofiles.os.path.exists(dataset_path):
await asyncio.gather(delete_siblings(dataset), delete_dataset(dataset_path))

resp.media = {}
resp.status = falcon.HTTP_OK
except:
resp.media = {'error': 'dataset not found'}
else:
resp.media = {'error': 'dataset does not exist'}
resp.status = falcon.HTTP_NOT_FOUND
27 changes: 13 additions & 14 deletions services/datalad/datalad_service/tasks/publish.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import logging
import os.path
import re
from concurrent.futures import ProcessPoolExecutor

import pygit2
import boto3
Expand All @@ -24,6 +25,9 @@
logger = logging.getLogger('datalad_service.' + __name__)


delete_executor = ProcessPoolExecutor(4)


def github_sibling(dataset_path, dataset_id):
"""
Find a GitHub remote or create a new repo and configure the remote.
Expand Down Expand Up @@ -110,7 +114,13 @@ def check_remote_has_version(dataset_path, remote, tag):
return remote_id_A == remote_id_B and tree_id_A == tree_id_B


async def delete_s3_sibling(dataset_id):
def delete_s3_sibling(dataset_id):
"""Run S3 sibling deletion in another process to avoid blocking any callers"""
delete_executor.submit(delete_s3_sibling_executor, dataset_id)


def delete_s3_sibling_executor(dataset_id):
"""Delete all versions of a dataset from S3."""
try:
client = boto3.client(
's3',
Expand All @@ -124,8 +134,6 @@ async def delete_s3_sibling(dataset_id):
versions.extend(response.get('DeleteMarkers', []))
object_delete_list.extend(
[{'VersionId': version['VersionId'], 'Key': version['Key']} for version in versions])
# Yield after each request
await asyncio.sleep(0)
for i in range(0, len(object_delete_list), 1000):
client.delete_objects(
Bucket=get_s3_bucket(),
Expand All @@ -134,8 +142,6 @@ async def delete_s3_sibling(dataset_id):
'Quiet': True
}
)
# Yield after each request
await asyncio.sleep(0)
except Exception as e:
raise Exception(
f'Attempt to delete dataset {dataset_id} from {get_s3_remote()} has failed. ({e})')
Expand All @@ -156,15 +162,8 @@ async def delete_github_sibling(dataset_id):


async def delete_siblings(dataset_id):
try:
await delete_s3_sibling(dataset_id)
except:
pass
await asyncio.sleep(0)
try:
await delete_github_sibling(dataset_id)
except:
pass
delete_s3_sibling(dataset_id)
await delete_github_sibling(dataset_id)


def monitor_remote_configs(dataset_path):
Expand Down

0 comments on commit 185a08b

Please sign in to comment.