Skip to content

Commit

Permalink
PIP: reverted changes from poetry to pip
Browse files Browse the repository at this point in the history
* Reverted dependencies installation with poetry to pip.
* Updated docker-compose file.
* Updated dockerfile in order to use official airflow image.
* Updated airflow.cfg configuration for airlfow 2.6.0.
  • Loading branch information
ErnestaP committed Jul 27, 2023
1 parent 4239e62 commit f6f1361
Show file tree
Hide file tree
Showing 11 changed files with 136 additions and 4,657 deletions.
11 changes: 6 additions & 5 deletions .github/workflows/test-and-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,10 @@ jobs:

- name: Install python dependencies
run: |
python -m pip install --upgrade pip
pip install poetry
poetry install
pip install --upgrade pip
pip install --no-cache-dir --upgrade setuptools
pip install --no-cache-dir --upgrade wheel
pip install -r requirements.txt -r requirements-test.txt -r requirements-airflow.txt
- name: Run services for IT Tests, create buckets
run: make start
Expand All @@ -62,9 +63,9 @@ jobs:
-v "$(pwd)"/tests:/opt/airflow/tests
-v "$(pwd)"/data:/opt/airflow/data
-v "$(pwd)"/airflow.cfg:/opt/airflow/airflow.cfg
--entrypoint poetry
--entrypoint pytest
$REGISTRY/$IMAGE@${{ steps.build.outputs.image-digest }}
run pytest tests
tests
--cov=./
--cov-report=xml
Expand Down
18 changes: 6 additions & 12 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,20 +1,14 @@
FROM python:3.10

WORKDIR /opt/airflow/
ENV AIRFLOW_HOME=/opt/airflow/
FROM apache/airflow:2.6.0-python3.10

ENV PYTHONBUFFERED=0
ENV AIRFLOW_UID=501
ENV PYTHONASYNCIODEBUG=1

COPY pyproject.toml ./pyproject.toml
COPY poetry.lock ./poetry.lock

COPY requirements.txt ./requirements.txt
COPY requirements-test.txt ./requirements-test.txt
COPY dags ./dags
USER airflow
RUN pip install --upgrade pip &&\
pip install --no-cache-dir --upgrade setuptools==59.1.1 &&\
pip install --no-cache-dir --upgrade wheel &&\
pip install --no-cache-dir poetry && \
poetry config virtualenvs.create false --local

RUN poetry install
ENTRYPOINT ["airflow"]
pip install --no-cache-dir --user -r requirements.txt -r requirements-test.txt
16 changes: 9 additions & 7 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ FLOWER_PID=airflow-flower.pid
init:
pyenv install ${PYTHON_VERSION}
pyenv global $(PYTHON_VERSION)
pyenv virtualenv ${PYTHON_VERSION} workflows
pyenv activate workflows
export AIRFLOW_HOME=${PWD}

start: compose sleep airflow
Expand All @@ -21,19 +23,19 @@ buckets:
docker-compose up -d create_buckets

airflow:
poetry run airflow db init
poetry run airflow webserver -D
poetry run airflow triggerer -D
poetry run airflow scheduler -D
poetry run airflow celery worker -D
poetry run airflow celery flower -D
airflow db init
airflow webserver -D
airflow triggerer -D
airflow scheduler -D
airflow celery worker -D
airflow celery flower -D
echo -e "\033[0;32m Airflow Started. \033[0m"

compose:
docker-compose up -d redis postgres sftp ftp s3 create_buckets

create_user:
poetry run airflow users create \
airflow users create \
--username admin \
--password admin \
--role Admin \
Expand Down
71 changes: 37 additions & 34 deletions airflow.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,6 @@ default_timezone = utc
# full import path to the class when using a custom executor.
executor = CeleryExecutor

# The encoding for the databases
sql_engine_encoding = utf-8

# Collation for ``dag_id``, ``task_id``, ``key`` columns in case they have different encoding.
# By default this collation is the same as the database collation, however for ``mysql`` and ``mariadb``
Expand All @@ -33,35 +31,6 @@ sql_engine_encoding = utf-8
# (see https://github.com/apache/airflow/pull/17603#issuecomment-901121618).
# sql_engine_collation_for_ids =

# If SqlAlchemy should pool database connections.
sql_alchemy_pool_enabled = True

# The SqlAlchemy pool size is the maximum number of database connections
# in the pool. 0 indicates no limit.
sql_alchemy_pool_size = 5

# The maximum overflow size of the pool.
# When the number of checked-out connections reaches the size set in pool_size,
# additional connections will be returned up to this limit.
# When those additional connections are returned to the pool, they are disconnected and discarded.
# It follows then that the total number of simultaneous connections the pool will allow
# is pool_size + max_overflow,
# and the total number of "sleeping" connections the pool will allow is pool_size.
# max_overflow can be set to ``-1`` to indicate no overflow limit;
# no limit will be placed on the total number of concurrent connections. Defaults to ``10``.
sql_alchemy_max_overflow = 10

# The SqlAlchemy pool recycle is the number of seconds a connection
# can be idle in the pool before it is invalidated. This config does
# not apply to sqlite. If the number of DB connections is ever exceeded,
# a lower config value will allow the system to recover faster.
sql_alchemy_pool_recycle = 1800

# Check connection at the start of each connection pool checkout.
# Typically, this is a simple statement like "SELECT 1".
# More information here:
# https://docs.sqlalchemy.org/en/13/core/pooling.html#disconnect-handling-pessimistic
sql_alchemy_pool_pre_ping = True

# Import path for connect args in SqlAlchemy. Defaults to an empty dict.
# This is useful when you want to configure db engine args that SqlAlchemy won't parse
Expand Down Expand Up @@ -347,6 +316,39 @@ sql_alchemy_schema =
# Currently it is only used in ``DagFileProcessor.process_file`` to retry ``dagbag.sync_to_db``.
max_db_retries = 3

# If SqlAlchemy should pool database connections.
sql_alchemy_pool_enabled = True

# The SqlAlchemy pool size is the maximum number of database connections
# in the pool. 0 indicates no limit.
sql_alchemy_pool_size = 5

# The maximum overflow size of the pool.
# When the number of checked-out connections reaches the size set in pool_size,
# additional connections will be returned up to this limit.
# When those additional connections are returned to the pool, they are disconnected and discarded.
# It follows then that the total number of simultaneous connections the pool will allow
# is pool_size + max_overflow,
# and the total number of "sleeping" connections the pool will allow is pool_size.
# max_overflow can be set to ``-1`` to indicate no overflow limit;
# no limit will be placed on the total number of concurrent connections. Defaults to ``10``.
sql_alchemy_max_overflow = 10

# The SqlAlchemy pool recycle is the number of seconds a connection
# can be idle in the pool before it is invalidated. This config does
# not apply to sqlite. If the number of DB connections is ever exceeded,
# a lower config value will allow the system to recover faster.
sql_alchemy_pool_recycle = 1800

# Check connection at the start of each connection pool checkout.
# Typically, this is a simple statement like "SELECT 1".
# More information here:
# https://docs.sqlalchemy.org/en/13/core/pooling.html#disconnect-handling-pessimistic
sql_alchemy_pool_pre_ping = True

# The encoding for the databases
sql_engine_encoding = utf-8

[secrets]
# Full class name of secrets backend to enable (will precede env vars and metastore in search path)
# Example: backend = airflow.providers.amazon.aws.secrets.systems_manager.SystemsManagerParameterStoreBackend
Expand Down Expand Up @@ -942,6 +944,9 @@ dependency_detector = airflow.serialization.serialized_objects.DependencyDetecto
# How often to check for expired trigger requests that have not run yet.
trigger_timeout_check_interval = 15

# How often in seconds to check if Pending workers have exceeded their timeouts
task_queued_timeout_check_interval = 120

[triggerer]
# How many triggers a single Triggerer will run at once, by default.
default_capacity = 1000
Expand Down Expand Up @@ -999,7 +1004,7 @@ offset_field = offset
use_ssl = False
verify_certs = True

[kubernetes]
[kubernetes_executor]
# Path to the YAML pod file that forms the basis for KubernetesExecutor workers.
pod_template_file =

Expand Down Expand Up @@ -1082,8 +1087,6 @@ verify_ssl = True
# How long in seconds a worker can be in Pending before it is considered a failure
worker_pods_pending_timeout = 300

# How often in seconds to check if Pending workers have exceeded their timeouts
worker_pods_pending_timeout_check_interval = 120

# How often in seconds to check for task instances stuck in "queued" status without a pod
worker_pods_queued_check_interval = 60
Expand Down
2 changes: 1 addition & 1 deletion dags/common/sftp_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def __init__(
host="localhost",
username="airflow",
password="airflow",
port=2222,
port=22,
dir="/upload",
):
self.connection = None
Expand Down
Loading

0 comments on commit f6f1361

Please sign in to comment.