diff --git a/airflow.Dockerfile b/airflow.Dockerfile index 85a3506..5d51c83 100644 --- a/airflow.Dockerfile +++ b/airflow.Dockerfile @@ -11,9 +11,12 @@ RUN wget https://jdbc.postgresql.org/download/postgresql-42.7.4.jar -P /home/ai RUN mv /tmp/upstream/exercises/winter_semester_2024-2025/05_airflow/plugins /home/airflow/airflow RUN mv /tmp/upstream/exercises/winter_semester_2024-2025/05_airflow/dags /home/airflow/airflow RUN mv /tmp/upstream/exercises/winter_semester_2024-2025/05_airflow/python /home/airflow/airflow + +RUN sed -i '34 i sed -i "s/SPARK_MASTER_IP=hadoop/SPARK_MASTER_IP=\$HADOOP_HOST/" /home/airflow/spark/conf/spark-env.sh' /startup.sh +RUN sed -i '34 i sed -i "s/SPARK_LOCAL_IP=\\"airflow\\"/SPARK_LOCAL_IP=\$SPARK_HOST/" /home/airflow/spark/conf/spark-env.sh' /startup.sh # Pyarrow fails to interperet placeholder, subsitute instead -RUN sed -i "34 i sed -i 's/hadoop:/$HADOOP_HOST:/g' /home/airflow/hadoop/etc/hadoop/core-site.xml" /startup.sh -RUN sed -i "34 i sed -i 's/hadoop:/$HADOOP_HOST:/g' /home/airflow/hadoop/etc/hadoop/yarn-site.xml" /startup.sh +RUN sed -i '34 i sed -i "s/hadoop:/\$HADOOP_HOST:/g" /home/airflow/hadoop/etc/hadoop/core-site.xml' /startup.sh +RUN sed -i '34 i sed -i "s/hadoop:/\$HADOOP_HOST:/g" /home/airflow/hadoop/etc/hadoop/yarn-site.xml' /startup.sh # Setting AIRFLOW__WEBSERVER__BASE_URL did not get applied for unknown reasons, update the config file instead RUN sed -i "34 i sed -i 's#base_url = http://localhost:8080#base_url = http://localhost:8080/airflow#' /home/airflow/airflow/airflow.cfg" /startup.sh diff --git a/docker-compose.yml b/docker-compose.yml index 64064c5..e764702 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -42,3 +42,6 @@ services: condition: service_started hadoop: condition: service_healthy + environment: + - HADOOP_HOST=hadoop + - SPARK_HOST=airflow # Define the address under which spark is reachable externally