OpenNeuroOrg · nellh · Aug 7, 2024 · Aug 7, 2024 · Aug 7, 2024
diff --git a/services/datalad/Pipfile b/services/datalad/Pipfile
@@ -20,10 +20,7 @@ requests = "*"
 GitPython = "*"
 PyJWT = ">=2"
 gunicorn = "*"
-elastic-apm = "*"
-falcon-elastic-apm = "*"
 boto3 = "*"
-elasticsearch = "*"
 pygit2 = "==1.15.1"
 pygithub = "==2.3.0"
 greenlet = "*"

diff --git a/services/datalad/Pipfile.lock b/services/datalad/Pipfile.lock
diff --git a/services/datalad/datalad_service/app.py b/services/datalad/datalad_service/app.py
@@ -1,6 +1,5 @@
 import falcon
 import falcon.asgi
-from falcon_elastic_apm import ElasticApmMiddleware
 
 import datalad_service.config
 from datalad_service.tasks.audit import audit_datasets
@@ -40,9 +39,6 @@ def create_app():
         raise Exception("Required DATALAD_DATASET_PATH environment variable is not defined")
 
     middleware = [AuthenticateMiddleware(), CustomErrorHandlerMiddleware()]
-    if datalad_service.config.ELASTIC_APM_SERVER_URL:
-        middleware.append(ElasticApmMiddleware(service_name='datalad-service',
-                                               server_url=datalad_service.config.ELASTIC_APM_SERVER_URL))
 
     app = falcon.asgi.App(middleware=middleware)
     app.router_options.converters['path'] = PathConverter

diff --git a/services/datalad/datalad_service/common/elasticsearch.py b/services/datalad/datalad_service/common/elasticsearch.py
diff --git a/services/datalad/datalad_service/config.py b/services/datalad/datalad_service/config.py
@@ -31,11 +31,5 @@
 # Redis Host
 REDIS_HOST = os.getenv('REDIS_HOST', 'redis')
 
-# The path to connect to Elastic APM server
-ELASTIC_APM_SERVER_URL = os.getenv('ELASTIC_APM_SERVER_URL')
-
-# Elasticsearch URL
-ELASTICSEARCH_CONNECTION = os.getenv('ELASTICSEARCH_CONNECTION')
-
 # Site URL
 CRN_SERVER_URL = os.getenv('CRN_SERVER_URL')
diff --git a/services/datalad/datalad_service/tasks/publish.py b/services/datalad/datalad_service/tasks/publish.py
@@ -3,7 +3,6 @@
 import os.path
 import re
 
-import elasticapm
 import pygit2
 import boto3
 from github import Github
@@ -54,14 +53,12 @@ def create_remotes_and_export(dataset_path, cookies=None):
     export_dataset(dataset_path, cookies)
 
 
-@elasticapm.capture_span()
 def create_remotes(dataset_path):
     dataset = os.path.basename(dataset_path)
     s3_sibling(dataset_path)
     github_sibling(dataset_path, dataset)
 
 
-@elasticapm.capture_span()
 def export_dataset(dataset_path, cookies=None, s3_export=s3_export, github_export=github_export, update_s3_sibling=update_s3_sibling, github_enabled=DATALAD_GITHUB_EXPORTS_ENABLED):
     """
     Export dataset to S3 and GitHub.
@@ -113,7 +110,6 @@ def check_remote_has_version(dataset_path, remote, tag):
     return remote_id_A == remote_id_B and tree_id_A == tree_id_B
 
 
-@elasticapm.capture_span()
 async def delete_s3_sibling(dataset_id):
     try:
         client = boto3.client(
@@ -145,7 +141,6 @@ async def delete_s3_sibling(dataset_id):
             f'Attempt to delete dataset {dataset_id} from {get_s3_remote()} has failed. ({e})')
 
 
-@elasticapm.capture_span()
 async def delete_github_sibling(dataset_id):
     ses = Github(DATALAD_GITHUB_LOGIN, DATALAD_GITHUB_PASS)
     org = ses.get_organization(DATALAD_GITHUB_ORG)

diff --git a/services/datalad/datalad_service/tasks/validator.py b/services/datalad/datalad_service/tasks/validator.py
@@ -1,12 +1,14 @@
 import asyncio
 import json
+import logging
 import os
-import requests
 import re
 
+import requests
+
 from datalad_service.config import GRAPHQL_ENDPOINT
-from datalad_service.common.elasticsearch import ValidationLogger
 
+logger = logging.getLogger('datalad_service.' + __name__)
 
 LEGACY_VALIDATOR_VERSION = json.load(
     open('package.json'))['dependencies']['bids-validator']
@@ -34,12 +36,13 @@
         await process.wait()
 
 
-async def run_and_decode(args, timeout, esLogger):
+async def run_and_decode(args, timeout, logger):
     """Run a subprocess and return the JSON output."""
     process = await asyncio.create_subprocess_exec(*args, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE)
     try:
         await asyncio.wait_for(process.wait(), timeout=timeout)
     except asyncio.TimeoutError:
+        logger.warning(f'Timed out while running `{" ".join(args)}`')
         process.kill()
 
     # Retrieve what we can from the process
@@ -50,10 +53,12 @@
     try:
         return json.loads(escape_ansi(stdout.decode('utf-8')))
     except json.decoder.JSONDecodeError as err:
-        esLogger.log(stdout, stderr, err)
+        logger.exception(err)
+        logger.info(stdout)
+        logger.error(stderr)
 
 
-async def validate_dataset_call(dataset_path, ref, esLogger):
+async def validate_dataset_call(dataset_path, ref, logger=logger):
     """
     Synchronous dataset validation.
 
@@ -63,11 +68,11 @@
     return await run_and_decode(
         ['./node_modules/.bin/bids-validator', '--json', '--ignoreSubjectConsistency', dataset_path],
         timeout=300,
-        esLogger=esLogger,
+        logger=logger,
     )
 
 
-async def validate_dataset_deno_call(dataset_path, ref, esLogger):
+async def validate_dataset_deno_call(dataset_path, ref):
     """
     Synchronous dataset validation.
 
@@ -78,7 +83,6 @@
          f'https://deno.land/x/bids_validator@{DENO_VALIDATOR_VERSION}/bids-validator.ts',
          '--json', dataset_path],
         timeout=300,
-        esLogger=esLogger,
     )
 
 
@@ -127,8 +131,7 @@
 
 
 async def validate_dataset(dataset_id, dataset_path, ref, cookies=None, user=''):
-    esLogger = ValidationLogger(dataset_id, user)
-    validator_output = await validate_dataset_call(dataset_path, ref, esLogger)
+    validator_output = await validate_dataset_call(dataset_path, ref)
     all_issues = validator_output['issues']['warnings'] + \
         validator_output['issues']['errors']
     if validator_output:
@@ -146,7 +149,7 @@
         raise Exception('Validation failed unexpectedly')
 
     # New schema validator second in case of issues
-    validator_output_deno = await validate_dataset_deno_call(dataset_path, ref, esLogger)
+    validator_output_deno = await validate_dataset_deno_call(dataset_path, ref)
     if validator_output_deno:
         if 'issues' in validator_output_deno:
             r = requests.post(

diff --git a/services/datalad/tests/test_validator.py b/services/datalad/tests/test_validator.py
@@ -12,10 +12,12 @@ class MockLogger:
 
 async def test_validator_error(new_dataset):
     logger = MockLogger()
-    logger.log = Mock()
+    logger.info = Mock()
+    logger.error = Mock()
+    logger.exception = Mock()
     await validate_dataset_call(new_dataset.path, 'HEAD', logger)
     # new_dataset completes validation with errors, should not call logger
-    assert not logger.log.called
+    assert not logger.exception.called
 
 
 @pytest.fixture
@@ -35,6 +37,8 @@ async def invalidJson():
 
 async def test_validator_bad_json(new_dataset, mock_validator_crash):
     logger = MockLogger()
-    logger.log = Mock()
+    logger.info = Mock()
+    logger.error = Mock()
+    logger.exception = Mock()
     await validate_dataset_call(new_dataset.path, 'HEAD', logger)
-    assert logger.log.called
+    assert logger.exception.called