diff --git a/.github/workflows/build-airflow.yml b/.github/workflows/build-airflow.yml new file mode 100644 index 0000000..e535d2e --- /dev/null +++ b/.github/workflows/build-airflow.yml @@ -0,0 +1,28 @@ +name: build-airflow +run-name: "Build Airflow container image" +on: + push: + paths: + - airflow.Dockerfile + - airflow/** + - spark/** + +jobs: + build-container-image: + runs-on: ubuntu-latest + + permissions: + packages: write + contents: read + + steps: + - uses: actions/checkout@v4 + name: "Checkout repository" + + - uses: VaultVulp/gp-docker-action@1.6.0 + name: "Build and upload Docker image" + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + image-name: airflow + build-context: ./ + dockerfile: airflow.Dockerfile \ No newline at end of file diff --git a/.github/workflows/build-frontend.yml b/.github/workflows/build-frontend.yml new file mode 100644 index 0000000..fa80512 --- /dev/null +++ b/.github/workflows/build-frontend.yml @@ -0,0 +1,26 @@ +name: build-frontend +run-name: "Build frontend container image" +on: + push: + paths: + - frontend/** + +jobs: + build-container-image: + runs-on: ubuntu-latest + + permissions: + packages: write + contents: read + + steps: + - uses: actions/checkout@v4 + name: "Checkout repository" + + - uses: VaultVulp/gp-docker-action@1.6.0 + name: "Build and upload Docker image" + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + image-name: frontend + build-context: frontend/ + dockerfile: frontend/Dockerfile \ No newline at end of file diff --git a/.github/workflows/build-hadoop.yml b/.github/workflows/build-hadoop.yml new file mode 100644 index 0000000..4098ef5 --- /dev/null +++ b/.github/workflows/build-hadoop.yml @@ -0,0 +1,26 @@ +name: build-hadoop +run-name: "Build Hadoop container image" +on: + push: + paths: + - hadoop.Dockerfile + +jobs: + build-container-image: + runs-on: ubuntu-latest + + permissions: + packages: write + contents: read + + steps: + - uses: actions/checkout@v4 + name: "Checkout repository" + + - uses: VaultVulp/gp-docker-action@1.6.0 + name: "Build and upload Docker image" + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + image-name: hadoop + build-context: ./ + dockerfile: hadoop.Dockerfile \ No newline at end of file diff --git a/Dockerfile.airflow b/airflow.Dockerfile similarity index 72% rename from Dockerfile.airflow rename to airflow.Dockerfile index afaa4cb..ff26740 100644 --- a/Dockerfile.airflow +++ b/airflow.Dockerfile @@ -1,4 +1,9 @@ FROM marcelmittelstaedt/airflow:latest + +LABEL org.opencontainers.image.source=https://github.com/hfxbse/dhbw-big-data + +ENV HADOOP_HOST="hadoop" + RUN sed -i '34,41d' /startup.sh RUN sed -i '3 i service ssh start' /startup.sh RUN git clone --depth 1 https://github.com/marcelmittelstaedt/BigData.git /tmp/upstream @@ -6,6 +11,9 @@ RUN wget https://jdbc.postgresql.org/download/postgresql-42.7.4.jar -P /home/ai RUN mv /tmp/upstream/exercises/winter_semester_2024-2025/05_airflow/plugins /home/airflow/airflow RUN mv /tmp/upstream/exercises/winter_semester_2024-2025/05_airflow/dags /home/airflow/airflow RUN mv /tmp/upstream/exercises/winter_semester_2024-2025/05_airflow/python /home/airflow/airflow +RUN sed -i 's/hadoop:/${HADOOP_HOST}:/g' /home/airflow/hadoop/etc/hadoop/core-site.xml +RUN sed -i 's/hadoop:/${HADOOP_HOST}:/g' /home/airflow/hadoop/etc/hadoop/yarn-site.xml + COPY airflow/ /home/airflow/airflow/ COPY spark/ /home/airflow/airflow/python/ RUN chown -R airflow /home/airflow/airflow diff --git a/docker-compose.dev.yml b/docker-compose.dev.yml index f558d24..e1044bc 100644 --- a/docker-compose.dev.yml +++ b/docker-compose.dev.yml @@ -4,8 +4,6 @@ services: - "5432:5432" hadoop: - build: - dockerfile: Dockerfile.hadoop ports: - "9864:9864" - "10000:10000" diff --git a/docker-compose.yml b/docker-compose.yml index dd93e83..249fc5d 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -18,14 +18,14 @@ services: hadoop: build: - dockerfile: Dockerfile.hadoop + dockerfile: hadoop.Dockerfile ports: - "8088:8088" - "9870:9870" airflow: build: - dockerfile: Dockerfile.airflow + dockerfile: airflow.Dockerfile depends_on: user-db: condition: service_started diff --git a/frontend/Dockerfile b/frontend/Dockerfile index 4e9be67..d43193f 100644 --- a/frontend/Dockerfile +++ b/frontend/Dockerfile @@ -1,5 +1,7 @@ FROM node:20-alpine +LABEL org.opencontainers.image.source=https://github.com/hfxbse/dhbw-big-data + RUN mkdir /app WORKDIR /app diff --git a/Dockerfile.hadoop b/hadoop.Dockerfile similarity index 73% rename from Dockerfile.hadoop rename to hadoop.Dockerfile index 63c3c97..8c81101 100644 --- a/Dockerfile.hadoop +++ b/hadoop.Dockerfile @@ -1,8 +1,13 @@ FROM marcelmittelstaedt/spark_base:latest + +LABEL org.opencontainers.image.source=https://github.com/hfxbse/dhbw-big-data + RUN sed -i '44,50 s/^#//' /startup.sh RUN head -n -9 /startup.sh > temp.sh ; mv temp.sh /startup.sh RUN echo "echo executing hiveserver2; sudo -u hadoop -H sh -c /home/hadoop/hive/bin/hiveserver2" >> /startup.sh RUN chmod +x /startup.sh +RUN sed -i 's/hadoop:/0.0.0.0:/g' /home/hadoop/hadoop/etc/hadoop/core-site.xml + HEALTHCHECK --start-interval=10s --start-period=10s --retries=7 \ CMD /bin/sh -c "exit $((4 - $(ps aux | grep -c org.apache.hive.service.server.HiveServer2)))"