From 4dfa81b0bc2a2b7475045366f5f605a336bc0e9e Mon Sep 17 00:00:00 2001 From: Bentley Hensel Date: Tue, 11 Jul 2023 10:37:00 -0400 Subject: [PATCH] Dockerized --- Dockerfile | 34 ++++++++++++++++++++++ app/database/clickhouse/process_tests.py | 7 ++++- app/database/postgres/fetch_unprocessed.py | 3 +- app/database/postgres/process_tests.py | 1 + app/main.py | 7 +++-- 5 files changed, 48 insertions(+), 4 deletions(-) create mode 100644 Dockerfile diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..a1cc79d --- /dev/null +++ b/Dockerfile @@ -0,0 +1,34 @@ +# Use an official Python runtime as a parent image +# Use bullseye with Python pre-installed +FROM python:3.9-bullseye + +# Set the working directory to /app +WORKDIR /app + +# Copy all the things... +ADD . /app/ + +# Add essential packages and psycopg2 prerequisites then upgrade pip +RUN apt-get update && apt-get install -y \ + gcc \ + python3-dev \ + libpq-dev \ +&& pip install --upgrade pip + +# Install python packages and remove unnecessary packages +RUN pip install --no-cache-dir -r requirements.txt \ + && apt-get autoremove -y gcc python3-dev \ + && rm -rf /var/lib/apt/lists/* + +# Make Log file +RUN mkdir -p /app/logs + + +# Env Variables +ENV APP_PORT=3000 + +# Expose APP_PORT of the container to the outside +EXPOSE $APP_PORT + +# Run the command to start things... +CMD ["python", "app/main.py"] \ No newline at end of file diff --git a/app/database/clickhouse/process_tests.py b/app/database/clickhouse/process_tests.py index fa8be1c..4fc6a8f 100644 --- a/app/database/clickhouse/process_tests.py +++ b/app/database/clickhouse/process_tests.py @@ -6,6 +6,7 @@ """ from .connect import client as clickhouse_client +import traceback import json import html from datetime import datetime @@ -82,8 +83,12 @@ def insert_axe_into_clickhouse(data): try: client.execute(query) except Exception as e: + # Log the relevant parts of the exception + exception_traceback = traceback.format_exc() logger.error(f'Failed to insert data into ClickHouse. HTML being processed: {html}') - logger.exception("Exception: ") + logger.error(f'Failed Query:\n{query}') + logger.error(f'Exception: {str(e)}') + logger.debug(f'Exception Traceback:\n{exception_traceback}') # close the client connection diff --git a/app/database/postgres/fetch_unprocessed.py b/app/database/postgres/fetch_unprocessed.py index aaf21fa..fe8b7e7 100644 --- a/app/database/postgres/fetch_unprocessed.py +++ b/app/database/postgres/fetch_unprocessed.py @@ -10,7 +10,7 @@ session = SessionLocal() -def fetch_unprocessed_rules(limit=10000): +def fetch_unprocessed_rules(limit=1000): """Fetches all rule_id that are not processed yet.""" result = session.execute(text(""" SELECT id as rule_id @@ -18,6 +18,7 @@ def fetch_unprocessed_rules(limit=10000): WHERE imported = false LIMIT :limit """), {'limit': limit}) + logger.info(f'Importing {limit} unprocessed rules from Postgres') # Fetch all records from the query execution result records = result.fetchall() diff --git a/app/database/postgres/process_tests.py b/app/database/postgres/process_tests.py index 5e121a8..ee62f1a 100644 --- a/app/database/postgres/process_tests.py +++ b/app/database/postgres/process_tests.py @@ -79,5 +79,6 @@ def mark_rule_as_processed(rule_id): session.execute(text(query), {'rule_id': rule_id}) session.commit() + if __name__ == "__main__": main() \ No newline at end of file diff --git a/app/main.py b/app/main.py index eb3ad6d..c5d4f29 100644 --- a/app/main.py +++ b/app/main.py @@ -5,8 +5,11 @@ """ import time -from .processes import execute_axes -from .database import fetch_unprocessed_rules, mark_axe_rule_as_processed +import sys +import os +sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) +from app.processes import execute_axes +from app.database import fetch_unprocessed_rules, mark_axe_rule_as_processed # rule_id = 15 # 1363