Skip to content

Commit

Permalink
IN-928 Restructure the config module to use a 'Config' class (#121)
Browse files Browse the repository at this point in the history
* IN-928 Restructure the config module to use a 'Config' class

Why these changes are being introduced:
* Refactor configuration process and improve code readability

How this addresses that need:
* Create Config class
* Update dependent modules to use Config
* Update unit tests
* Simplify error logging for failed connection tests
* Add .dockerignore file

Side effects of this change:
* None

Relevant ticket(s):
* https://mitlibraries.atlassian.net/browse/IN-928
  • Loading branch information
jonavellecuerdo authored Oct 3, 2023
1 parent fbbfaa6 commit 9fd0a56
Show file tree
Hide file tree
Showing 11 changed files with 903 additions and 648 deletions.
156 changes: 156 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
coverage/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
.pybuilder/
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version

# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock

# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock

# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# pytype static type analyzer
.pytype/

# Cython debug symbols
cython_debug/

# IDEs
.idea/
.vscode/

# Local directories
output/

# MacOS files
.DS_Store
973 changes: 493 additions & 480 deletions Pipfile.lock

Large diffs are not rendered by default.

96 changes: 76 additions & 20 deletions carbon/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import logging
import os
import threading
from ftplib import FTP, FTP_TLS # nosec
from ftplib import FTP, FTP_TLS, error_perm # nosec
from typing import IO, TYPE_CHECKING

from carbon.feed import ArticlesXmlFeed, PeopleXmlFeed
Expand All @@ -12,6 +12,7 @@
from collections.abc import Callable
from socket import socket

from carbon.config import Config
from carbon.database import DatabaseEngine

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -42,7 +43,7 @@ def ntransfercmd(self, cmd: str, rest: str | int | None = None) -> tuple[socket,
conn, size = FTP.ntransfercmd(self, cmd, rest)
if self._prot_p: # type: ignore[attr-defined]
conn = self.context.wrap_socket(
conn, server_hostname=self.host, session=self.sock.session # type: ignore[union-attr] # noqa: E501
conn, server_hostname=self.host, session=self.sock.session # type: ignore[union-attr]
)
return conn, size

Expand All @@ -56,6 +57,8 @@ class FileWriter:
Attributes:
output_file: A file-like object (stream) into which normalized XML
strings are written.
engine: A configured carbon.database.DatabaseEngine that can connect to the
Data Warehouse.
"""

def __init__(self, engine: DatabaseEngine, output_file: IO):
Expand Down Expand Up @@ -85,10 +88,12 @@ class ConcurrentFtpFileWriter(FileWriter):
This class is intended to provide a buffered read/write connecter.
Attributes:
output_file: A file-like object (stream) into which normalized XML
strings are written.
input_file: A file-like object (stream) into which normalized XML
strings are written. This stream is passed into FileWriter.output_file
and provides the contents that are ultimately written to an XML file
on the Symplectic Elements FTP server.
ftp_output_file: A file-like object (stream) that reads data from
PipeWriter().output_file and writes its contents to an XML file
the ConcurrentFtpFileWriter.input_file and writes its contents to an XML file
on the Symplectic Elements FTP server.
"""

Expand Down Expand Up @@ -161,16 +166,25 @@ def __call__(self) -> None:


class DatabaseToFilePipe:
"""A pipe feeding data from the Data Warehouse to a local file."""
"""A pipe feeding data from the Data Warehouse to a local file.
def __init__(self, config: dict, engine: DatabaseEngine, output_file: IO):
Attributes:
config: A carbon.config.Config instance with the required environment variables
for running the feed.
engine: A configured carbon.database.DatabaseEngine that can connect to the
Data Warehouse.
output_file: The full file path to the generated XML file into which normalized
XML strings are written (e.g. "output/people.xml").
"""

def __init__(self, config: Config, engine: DatabaseEngine, output_file: IO):
self.config = config
self.engine = engine
self.output_file = output_file

def run(self) -> None:
FileWriter(engine=self.engine, output_file=self.output_file).write(
feed_type=self.config["FEED_TYPE"]
feed_type=self.config.FEED_TYPE
)


Expand All @@ -189,10 +203,13 @@ class DatabaseToFtpPipe:
'write' file stream into an XML file on the Elements FTP server.
Attributes:
config: A dictionary of required environment variables for running the feed.
config: A carbon.config.Config instance with the required environment variables
for running the feed.
engine: A configured carbon.database.DatabaseEngine that can connect to the
Data Warehouse.
"""

def __init__(self, config: dict, engine: DatabaseEngine):
def __init__(self, config: Config, engine: DatabaseEngine):
self.config = config
self.engine = engine

Expand All @@ -204,15 +221,15 @@ def run(self) -> None:
) as buffered_writer:
ftp_file = FtpFile(
content_feed=buffered_reader,
user=self.config["SYMPLECTIC_FTP_USER"],
password=self.config["SYMPLECTIC_FTP_PASS"],
path=self.config["SYMPLECTIC_FTP_PATH"],
host=self.config["SYMPLECTIC_FTP_HOST"],
port=int(self.config["SYMPLECTIC_FTP_PORT"]),
user=self.config.SYMPLECTIC_FTP_USER,
password=self.config.SYMPLECTIC_FTP_PASS,
path=self.config.SYMPLECTIC_FTP_PATH,
host=self.config.SYMPLECTIC_FTP_HOST,
port=int(self.config.SYMPLECTIC_FTP_PORT),
)
ConcurrentFtpFileWriter(
engine=self.engine, input_file=buffered_writer, ftp_output_file=ftp_file
).write(feed_type=self.config["FEED_TYPE"])
).write(feed_type=self.config.FEED_TYPE)

def run_connection_test(self) -> None:
"""Test connection to the Symplectic Elements FTP server.
Expand All @@ -224,13 +241,19 @@ def run_connection_test(self) -> None:
try:
ftps = CarbonFtpsTls(timeout=30)
ftps.connect(
host=self.config["SYMPLECTIC_FTP_HOST"],
port=int(self.config["SYMPLECTIC_FTP_PORT"]),
host=self.config.SYMPLECTIC_FTP_HOST,
port=int(self.config.SYMPLECTIC_FTP_PORT),
)
ftps.login(
user=self.config["SYMPLECTIC_FTP_USER"],
passwd=self.config["SYMPLECTIC_FTP_PASS"],
user=self.config.SYMPLECTIC_FTP_USER,
passwd=self.config.SYMPLECTIC_FTP_PASS,
)
except error_perm as error:
error_message = (
f"Failed to connect to the Symplectic Elements FTP server: {error}"
)
logger.error(error_message) # noqa: TRY400
raise
except Exception as error:
error_message = (
f"Failed to connect to the Symplectic Elements FTP server: {error}"
Expand All @@ -240,3 +263,36 @@ def run_connection_test(self) -> None:
else:
logger.info("Successfully connected to the Symplectic Elements FTP server")
ftps.quit()


def run_all_connection_tests(
engine: DatabaseEngine, pipe: DatabaseToFilePipe | DatabaseToFtpPipe
) -> None:
"""Run connection tests for the Data Warehouse and Elements FTP server.
Args:
engine (DatabaseEngine): A configured carbon.database.DatabaseEngine that can
connect to the Data Warehouse.
pipe (DatabaseToFilePipe | DatabaseToFtpPipe): The pipe used to run the
data feed. If the pipe is an instance of carbon.app.DatabaseToFtpPipe,
a connection test for the Elements FTP server is run.
"""
# test connection to the Data Warehouse
try:
engine.run_connection_test()
except Exception: # noqa: BLE001
logger.error( # noqa: TRY400
"The Data Warehouse connection test failed. The application is exiting."
)
return

# test connection to the Symplectic Elements FTP server
if isinstance(pipe, DatabaseToFtpPipe):
try:
pipe.run_connection_test()
except Exception: # noqa: BLE001
logger.error( # noqa: TRY400
"Symplectic Elements FTP server connection test failed. "
"The application is exiting."
)
return
Loading

0 comments on commit 9fd0a56

Please sign in to comment.