diff --git a/.env-template b/.env-template index 90e8e91..e421162 100644 --- a/.env-template +++ b/.env-template @@ -4,6 +4,23 @@ LOG_LEVEL=INFO LOG_VERBOSE=FALSE +# ------------------------------ +# Databases +# ------------------------------ + +# Postgres +DB_POSTGRES_USER= +DB_POSTGRES_PASSWORD= +DB_POSTGRES_NAME= +DB_POSTGRES_HOST= +DB_POSTGRES_PORT= + +# Clickhouse +DB_CLICKHOUSE_HOST= +DB_CLICKHOUSE_PORT= +DB_CLICKHOUSE_USER= +DB_CLICKHOUSE_PASSWORD= +DB_CLICKHOUSE_NAME= # ------------------------------ # Optional Vars @@ -16,3 +33,7 @@ PYROSCOPE_API_KEY= # Sentry Configuration SENTRY_DSN= + + + + diff --git a/app/__init__.py b/app/__init__.py index c4d22b6..813dc2c 100644 --- a/app/__init__.py +++ b/app/__init__.py @@ -1,15 +1,10 @@ # app/__init__.py from .utils import configure_monitoring, logger from dotenv import load_dotenv - from .database.postgres.connect import test_connection -from .processes import start_processes - def startup(): logger.info('Starting up...') load_dotenv() configure_monitoring() test_connection() - start_processes() - diff --git a/app/database/postgres/queries/clothe_domains.sql b/app/database/postgres/queries/clothe_domains.sql index a4c6da3..6e09445 100644 --- a/app/database/postgres/queries/clothe_domains.sql +++ b/app/database/postgres/queries/clothe_domains.sql @@ -1,6 +1,6 @@ -- app/database/postgres/queries/clothe_domains.sql -- Creates url entry -UPDATE targets.domains d +UPDATE targets.domains SET home_url = :home_url WHERE id = :domain_id RETURNING id; diff --git a/app/database/postgres/queries/upsert_url.sql b/app/database/postgres/queries/upsert_url.sql new file mode 100644 index 0000000..1f0d885 --- /dev/null +++ b/app/database/postgres/queries/upsert_url.sql @@ -0,0 +1,5 @@ +-- app/database/postgres/queries/upsert_url.sql +INSERT INTO targets.urls (url, domain_id) +VALUES (:home_url, :domain_id) +ON CONFLICT (url) DO UPDATE SET url = :home_url, domain_id = :domain_id +RETURNING id; diff --git a/app/processes/__init__.py b/app/processes/__init__.py index 689ef34..0f9e555 100644 --- a/app/processes/__init__.py +++ b/app/processes/__init__.py @@ -11,7 +11,6 @@ def process_loop(process_func, sleep_time): if not process_func(): # If there is no data to process time.sleep(sleep_time) # Wait for the specified amount of time - def start_processes(): logger.info('Starting processes...') diff --git a/app/processes/naked_urls.py b/app/processes/naked_urls.py index 44380d0..8988e6c 100644 --- a/app/processes/naked_urls.py +++ b/app/processes/naked_urls.py @@ -19,6 +19,9 @@ def find_nakies(): home_url = get_home_url(domain) record_home_url(domain_id, home_url) + if home_url != "BADDIE": + upsert_url(domain_id, home_url) + if home_url == "BADDIE": logger.debug(f'We got a BADDIE for %s', domain) else: @@ -51,3 +54,8 @@ def record_home_url(domain_id, home_url): variables = {"domain_id": domain_id, "home_url": home_url} result = run_query(query_name, variables) +def upsert_url(domain_id, home_url): + logger.debug('Upserting url for domain_id: %s', domain_id) + query_name = "upsert_url" + variables = {"domain_id": domain_id, "home_url": home_url} + result = run_query(query_name, variables) diff --git a/run.py b/run.py index 4603a24..b1cb1ab 100644 --- a/run.py +++ b/run.py @@ -1,5 +1,10 @@ # run.py from app import startup +from app.processes import start_processes +def main(): + startup() + start_processes() -startup() +if __name__ == '__main__': + main()