From 1a68f32d6b85a970492e4ed20810cedea4ee109a Mon Sep 17 00:00:00 2001 From: Fabian Haas <29468630+hfxbse@users.noreply.github.com> Date: Tue, 19 Nov 2024 18:10:24 +0100 Subject: [PATCH] Connect to spark independent of the host name --- airflow.Dockerfile | 9 ++++++--- docker-compose.yml | 6 ++++-- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/airflow.Dockerfile b/airflow.Dockerfile index 85a3506..22d3adb 100644 --- a/airflow.Dockerfile +++ b/airflow.Dockerfile @@ -11,11 +11,14 @@ RUN wget https://jdbc.postgresql.org/download/postgresql-42.7.4.jar -P /home/ai RUN mv /tmp/upstream/exercises/winter_semester_2024-2025/05_airflow/plugins /home/airflow/airflow RUN mv /tmp/upstream/exercises/winter_semester_2024-2025/05_airflow/dags /home/airflow/airflow RUN mv /tmp/upstream/exercises/winter_semester_2024-2025/05_airflow/python /home/airflow/airflow + +RUN sed -i 's/SPARK_MASTER_IP=hadoop/SPARK_MASTER_IP=$HADOOP_HOST/' /home/airflow/spark/conf/spark-env.sh +RUN sed -i 's/SPARK_LOCAL_IP="airflow"/SPARK_LOCAL_IP="localhost"/' /home/airflow/spark/conf/spark-env.sh # Pyarrow fails to interperet placeholder, subsitute instead -RUN sed -i "34 i sed -i 's/hadoop:/$HADOOP_HOST:/g' /home/airflow/hadoop/etc/hadoop/core-site.xml" /startup.sh -RUN sed -i "34 i sed -i 's/hadoop:/$HADOOP_HOST:/g' /home/airflow/hadoop/etc/hadoop/yarn-site.xml" /startup.sh +RUN sed -i "26 i sed -i 's/hadoop:/\$HADOOP_HOST:/g' /home/airflow/hadoop/etc/hadoop/core-site.xml" /startup.sh +RUN sed -i "26 i sed -i 's/hadoop:/\$HADOOP_HOST:/g' /home/airflow/hadoop/etc/hadoop/yarn-site.xml" /startup.sh # Setting AIRFLOW__WEBSERVER__BASE_URL did not get applied for unknown reasons, update the config file instead -RUN sed -i "34 i sed -i 's#base_url = http://localhost:8080#base_url = http://localhost:8080/airflow#' /home/airflow/airflow/airflow.cfg" /startup.sh +RUN sed -i "26 i sed -i 's#base_url = http://localhost:8080#base_url = http://localhost:8080/airflow#' /home/airflow/airflow/airflow.cfg" /startup.sh COPY airflow/ /home/airflow/airflow/ COPY spark/ /home/airflow/airflow/python/ diff --git a/docker-compose.yml b/docker-compose.yml index 64064c5..8a817b0 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -27,7 +27,7 @@ services: environment: - POSTGRES_HOST_AUTH_METHOD=trust - hadoop: + hadoop-test: build: dockerfile: hadoop.Dockerfile ports: @@ -40,5 +40,7 @@ services: depends_on: user-db: condition: service_started - hadoop: + hadoop-test: condition: service_healthy + environment: + - HADOOP_HOST=hadoop-test