Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Home URL Adder #2

Merged
merged 12 commits into from
Oct 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 39 additions & 0 deletions .env-template
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# ------------------------------
# Environmental Variables
# ------------------------------
LOG_LEVEL=INFO
LOG_VERBOSE=FALSE

# ------------------------------
# Databases
# ------------------------------

# Postgres
DB_POSTGRES_USER=
DB_POSTGRES_PASSWORD=
DB_POSTGRES_NAME=
DB_POSTGRES_HOST=
DB_POSTGRES_PORT=

# Clickhouse
DB_CLICKHOUSE_HOST=
DB_CLICKHOUSE_PORT=
DB_CLICKHOUSE_USER=
DB_CLICKHOUSE_PASSWORD=
DB_CLICKHOUSE_NAME=

# ------------------------------
# Optional Vars
# ------------------------------

# Pyroscope Configuration
PYROSCOPE_SERVER=
PYROSCOPE_APPLICATION_NAME=
PYROSCOPE_API_KEY=

# Sentry Configuration
SENTRY_DSN=




4 changes: 1 addition & 3 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
.nova
.DS_Store
.env
venv
__pycache__
BUILDER.md
logs*
logs*
Binary file added .nova/Artwork
Binary file not shown.
5 changes: 5 additions & 0 deletions .nova/Configuration.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"workspace.art_style" : 1,
"workspace.color" : 0,
"workspace.name" : "Rabbit Run"
}
14 changes: 10 additions & 4 deletions app/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@
# __init__.py
# Relative Path: app/__init__.py
from .utils import logger
from .processes import preprocess_data
# app/__init__.py
from .utils import configure_monitoring, logger
from dotenv import load_dotenv
from .database.postgres.connect import test_connection

def startup():
logger.info('Starting up...')
load_dotenv()
configure_monitoring()
test_connection()
11 changes: 6 additions & 5 deletions app/database/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
# __init__.py
# Relative Path: app/database/__init__.py
from app.utils import logger
# app/database/__init__.py
from ..utils import logger

#from app.utils import logger
# Import from Postgres directory
from .postgres import axe_postgres, fetch_unprocessed_rules, mark_rule_as_processed as mark_axe_rule_as_processed
#from .postgres import axe_postgres, fetch_unprocessed_rules, mark_rule_as_processed as mark_axe_rule_as_processed
# Import from ClickHouse directory
from .clickhouse import axe_clickhouse
#from .clickhouse import axe_clickhouse
7 changes: 4 additions & 3 deletions app/database/postgres/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# __init__.py
# Relative Path: app/database/postgres/__init__.py
# app/database/postgres/__init__.py


from .process_tests import select_rules_data as axe_postgres, mark_rule_as_processed
from .fetch_unprocessed import fetch_unprocessed_rules
from .fetch_unprocessed import fetch_unprocessed_rules
26 changes: 14 additions & 12 deletions app/database/postgres/connect.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
# connect.py
# Relative Path: app/database/postgres/connect.py
# app/database/postgres/connect.py
import os
from dotenv import load_dotenv
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker, registry
import os
from .. import logger
from app import logger

DB_USER = os.getenv("DB_POSTGRES_USER")
if not DB_USER:
raise ValueError("Environment variable DB_POSTGRES_USER is not set!")

# load .env variables
load_dotenv()

# Retrieving environment variables
DB_USER = os.getenv("DB_POSTGRES_USER")
Expand All @@ -20,7 +21,7 @@
SQLALCHEMY_DATABASE_URL = f"postgresql://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}"

engine = create_engine(
SQLALCHEMY_DATABASE_URL
SQLALCHEMY_DATABASE_URL, future=True
)
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)

Expand All @@ -31,20 +32,21 @@
mapper_registry = registry()
Base = mapper_registry.generate_base()

postgres_conn = SessionLocal

def test_connection():
connection = None
try:
connection = engine.connect()
logger.debug("Connected to 🐘")
print("Connected to 🐘")
except Exception as e:
logger.error(f"Unable to connect to PostgreSQL: {str(e)}")
print(f"Unable to connect to PostgreSQL: {str(e)}")
finally:
# Ensure the connection object is not None before trying to close it
if connection:
connection.close()
logger.debug("🐘 Connection closed")

print("🐘 Connection closed")


test_connection()
if __name__ == "__main__":
test_connection()
6 changes: 6 additions & 0 deletions app/database/postgres/queries/clothe_domains.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
-- app/database/postgres/queries/clothe_domains.sql
-- Creates url entry
UPDATE targets.domains
SET home_url = :home_url
WHERE id = :domain_id
RETURNING id;
10 changes: 10 additions & 0 deletions app/database/postgres/queries/get_naked_domains.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
-- app/database/postgres/queries/get_naked_domains.sql
SELECT
id AS "domain_id",
"domain"
FROM targets.domains d
WHERE (home_url IS NULL OR home_url = '')
AND active = TRUE
AND "valid" = TRUE
LIMIT 1;

1 change: 1 addition & 0 deletions app/database/postgres/queries/get_rule_data.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
-- app/database/postgres/queries/get_rule_data.sql
7 changes: 7 additions & 0 deletions app/database/postgres/queries/get_unprocessed_rules.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
-- app/database/postgres/queries/get_unprocessed_rules.sql

SELECT id as rule_id
FROM axe.rules
WHERE imported = false
ORDER BY id
LIMIT %s OFFSET %s
5 changes: 5 additions & 0 deletions app/database/postgres/queries/upsert_url.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
-- app/database/postgres/queries/upsert_url.sql
INSERT INTO targets.urls (url, domain_id)
VALUES (:home_url, :domain_id)
ON CONFLICT (url) DO UPDATE SET url = :home_url, domain_id = :domain_id
RETURNING id;
37 changes: 37 additions & 0 deletions app/database/postgres/run_query.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# app/database/postgres/run_query.py
import os
import re
from app.database.postgres.connect import postgres_conn as conn
from app import logger
from sqlalchemy.exc import SQLAlchemyError
from sqlalchemy import text

QUERIES_DIRECTORY = os.path.join(os.path.dirname(__file__), "queries")

def run_query(query_name, vars=None):
query_file = os.path.join(QUERIES_DIRECTORY, f"{query_name}.sql")

with open(query_file) as file:
sql_content = file.read()

logger.info(f"Running query: {query_name}")

session = conn()

try:
result = session.execute(text(sql_content), vars)
logger.debug(f'Formatted SQL to Run:\n %s', sql_content)

session.commit()

rows = result.fetchall()
logger.debug(f"Result rows: {rows}")

return rows

except SQLAlchemyError as e:
session.rollback()
logger.error(f"Error while running query {query_name}: {str(e)}")
return None
finally:
session.close()
23 changes: 0 additions & 23 deletions app/main.py
Original file line number Diff line number Diff line change
@@ -1,24 +1 @@
# app/main.py
import time
import sys
import os
from .utils import logger
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
from app.processes import execute_axes
from app.database import fetch_unprocessed_rules, mark_axe_rule_as_processed

def yeet_axes():
while True:
rules_to_process = fetch_unprocessed_rules()
if rules_to_process:
# When there are rule_ids to process, process them.
for rule_id in rules_to_process:
execute_axes(rule_id) # Inserts into ClickHouse
mark_axe_rule_as_processed(rule_id) # Marks as processed in Postgres
else:
# When there are no more rule_ids to process, sleep for 10 seconds before checking again.
time.sleep(10)


if __name__ == "__main__":
yeet_axes()
28 changes: 24 additions & 4 deletions app/processes/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,25 @@
# __init__.py
# Relative Path: app/processes/__init__.py
# app/processes/__init__.py
import time
from multiprocessing import Process
from app import logger

from .axe import get_axes, execute_axes
from .preprocess_tests import preprocess_data
# Process Imports
from .naked_urls import find_nakies

def process_loop(process_func, sleep_time):
while True:
if not process_func(): # If there is no data to process
time.sleep(sleep_time) # Wait for the specified amount of time

def start_processes():
logger.info('Starting processes...')

# Functions with their sleep times
processes = [
(find_nakies, 30)
#(fix_axe, 60)
]

for process_func, sleep_time in processes:
process = Process(target=process_loop, args=(process_func, sleep_time))
process.start()
61 changes: 61 additions & 0 deletions app/processes/naked_urls.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
from app import logger
from app.database.postgres.run_query import run_query
import requests

def find_nakies():
logger.info('Starting to find naked domains...')
query_name = "get_naked_domains"
result = run_query(query_name)

logger.debug('SQL result: %s', result)

if result:
try:
domain_id, domain = result[0]
except IndexError:
logger.error('No rows were returned from the SQL query.')
return False # No data to process

home_url = get_home_url(domain)
record_home_url(domain_id, home_url)

if home_url != "BADDIE":
upsert_url(domain_id, home_url)

if home_url == "BADDIE":
logger.debug(f'We got a BADDIE for %s', domain)
else:
logger.debug(f'%s\'s home url is: %s', domain, home_url)

return True # There is data to process

else:
logger.info('No naked domains found.')
return False # No data to process


def get_home_url(domain):
logger.debug(f'Getting home url for %s', domain)

try:
response = requests.get(f'http://{domain}', timeout=5, allow_redirects=True)

if response.status_code == 200:
return response.url
else:
return "BADDIE"
except requests.exceptions.RequestException as e:
logger.error(f"Error while getting home URL for {domain}: {str(e)}")
return "BADDIE"

def record_home_url(domain_id, home_url):
logger.debug('Fixing home_url for domain_id: %s', domain_id)
query_name = "clothe_domains"
variables = {"domain_id": domain_id, "home_url": home_url}
result = run_query(query_name, variables)

def upsert_url(domain_id, home_url):
logger.debug('Upserting url for domain_id: %s', domain_id)
query_name = "upsert_url"
variables = {"domain_id": domain_id, "home_url": home_url}
result = run_query(query_name, variables)
5 changes: 2 additions & 3 deletions app/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
# __init__.py
# Relative Path: app/utils/__init__.py
from .logger import logger
# app/utils/__init__.py
from .monitoring import which_extras as configure_monitoring, logger
3 changes: 0 additions & 3 deletions app/utils/logger/__init__.py

This file was deleted.

18 changes: 18 additions & 0 deletions app/utils/monitoring/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# app/utils/monitoring/__init__.py
import os
from .sentry import configure_sentry
from .pyroscope import configure_pyroscope
from .logging import logger

def which_extras():
# Check if SENTRY_DSN environment variable has a value
if os.getenv("SENTRY_DSN"):
configure_sentry()
logger.info('Sentry Configured')

# Check if PYROSCOPE_API_KEY environment variable has a value
if os.getenv("PYROSCOPE_API_KEY"):
configure_pyroscope()
logger.info('Pyroscope Configured')


Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# app/utils/logger/logger.py
# app/utils/monitoring/logger.py
import logging
import os
import time
Expand All @@ -8,7 +8,8 @@
# Logger Name and Level
LOGGER_NAME = "LoggyMcLogFace"
LOG_LEVEL = os.environ.get("LOG_LEVEL", "INFO").upper()
LOG_VERBOSE = True if LOG_LEVEL == "DEBUG" else os.environ.get("LOG_VERBOSE", "False").lower() == "true"
LOG_VERBOSE = os.environ.get("LOG_VERBOSE", "False").lower() == "true"


if LOG_VERBOSE:
FMT_STREAM = "%(asctime)s.%(msecs)03d %(levelname)-8s [%(filename)s:%(funcName)s:%(lineno)d] %(message)s"
Expand Down Expand Up @@ -86,3 +87,4 @@ def log_exception(exc_type, exc_value, exc_traceback):
logger.warning("This is a warning message")
logger.error("This is an error message")
logger.critical("This is a critical message")

Loading
Loading