From 00755e30495b5f32ca9b5eaee74d1ee60c8ccd69 Mon Sep 17 00:00:00 2001 From: Fabian Haas <29468630+hfxbse@users.noreply.github.com> Date: Tue, 19 Nov 2024 18:10:24 +0100 Subject: [PATCH] Connect to spark independent of the host name --- airflow.Dockerfile | 7 +++++-- docker-compose.yml | 3 +++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/airflow.Dockerfile b/airflow.Dockerfile index 85a3506..5d51c83 100644 --- a/airflow.Dockerfile +++ b/airflow.Dockerfile @@ -11,9 +11,12 @@ RUN wget https://jdbc.postgresql.org/download/postgresql-42.7.4.jar -P /home/ai RUN mv /tmp/upstream/exercises/winter_semester_2024-2025/05_airflow/plugins /home/airflow/airflow RUN mv /tmp/upstream/exercises/winter_semester_2024-2025/05_airflow/dags /home/airflow/airflow RUN mv /tmp/upstream/exercises/winter_semester_2024-2025/05_airflow/python /home/airflow/airflow + +RUN sed -i '34 i sed -i "s/SPARK_MASTER_IP=hadoop/SPARK_MASTER_IP=\$HADOOP_HOST/" /home/airflow/spark/conf/spark-env.sh' /startup.sh +RUN sed -i '34 i sed -i "s/SPARK_LOCAL_IP=\\"airflow\\"/SPARK_LOCAL_IP=\$SPARK_HOST/" /home/airflow/spark/conf/spark-env.sh' /startup.sh # Pyarrow fails to interperet placeholder, subsitute instead -RUN sed -i "34 i sed -i 's/hadoop:/$HADOOP_HOST:/g' /home/airflow/hadoop/etc/hadoop/core-site.xml" /startup.sh -RUN sed -i "34 i sed -i 's/hadoop:/$HADOOP_HOST:/g' /home/airflow/hadoop/etc/hadoop/yarn-site.xml" /startup.sh +RUN sed -i '34 i sed -i "s/hadoop:/\$HADOOP_HOST:/g" /home/airflow/hadoop/etc/hadoop/core-site.xml' /startup.sh +RUN sed -i '34 i sed -i "s/hadoop:/\$HADOOP_HOST:/g" /home/airflow/hadoop/etc/hadoop/yarn-site.xml' /startup.sh # Setting AIRFLOW__WEBSERVER__BASE_URL did not get applied for unknown reasons, update the config file instead RUN sed -i "34 i sed -i 's#base_url = http://localhost:8080#base_url = http://localhost:8080/airflow#' /home/airflow/airflow/airflow.cfg" /startup.sh diff --git a/docker-compose.yml b/docker-compose.yml index 64064c5..e764702 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -42,3 +42,6 @@ services: condition: service_started hadoop: condition: service_healthy + environment: + - HADOOP_HOST=hadoop + - SPARK_HOST=airflow # Define the address under which spark is reachable externally