Skip to content

Commit

Permalink
Merge pull request #107 from guardian/pm-media-download-infra
Browse files Browse the repository at this point in the history
Support transcribing a url
  • Loading branch information
philmcmahon authored Oct 29, 2024
2 parents c7b7ff2 + 8875b59 commit 2d19531
Show file tree
Hide file tree
Showing 29 changed files with 1,543 additions and 492 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@ on:
workflow_dispatch:
push:
paths:
- 'whisper_container/Dockerfile'
- '.github/workflows/build-whisper-docker.yml'
- 'containers/whisper.Dockerfile'
- '.github/workflows/build-whisper-container.yml'

# Defines two custom environment variables for the workflow. These are used for the Container registry domain, and a name for the Docker image that this workflow builds.
env:
IMAGE_NAME: ${{ github.repository }}
WHISPER_IMAGE_NAME: ${{ github.repository }}
BUILD_NUMBER: ${{ github.run_number }}
GITHUB_REGISTRY: ghcr.io

Expand All @@ -38,18 +38,6 @@ jobs:
registry: ${{ env.GITHUB_REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
# This step uses [docker/metadata-action](https://github.com/docker/metadata-action#about) to extract tags and labels that will be applied to the specified image. The `id` "meta" allows the output of this step to be referenced in a subsequent step. The `images` value provides the base name for the tags and labels.
# Note - this step is the thing where we are indicating which repositories we want to push the container to
- name: Extract metadata (tags, labels) for Docker
id: meta
uses: docker/metadata-action@v5
with:
images: |
${{ env.GITHUB_REGISTRY }}/${{ env.IMAGE_NAME }}
${{ secrets.TRANSCRIPTION_SERVICE_ECR_URI }}
tags: |
type=sha
type=raw,value=latest
- uses: aws-actions/configure-aws-credentials@v4
name: Configure AWS credentials for pushing to ECR
with:
Expand All @@ -61,14 +49,14 @@ jobs:
# This step uses the `docker/build-push-action` action to build the image, based on your repository's `Dockerfile`. If the build succeeds, it pushes the image to GitHub Packages.
# It uses the `context` parameter to define the build's context as the set of files located in the specified path. For more information, see "[Usage](https://github.com/docker/build-push-action#usage)" in the README of the `docker/build-push-action` repository.
# It uses the `tags` and `labels` parameters to tag and label the image with the output from the "meta" step.
- name: Build and push Docker image
- name: Build and push whisper Docker image
uses: docker/build-push-action@v5
with:
context: whisper_container/
context: .
file: containers/whisper.Dockerfile
# to add x86: linux/amd64
platforms: linux/arm64
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
tags: ${{ env.GITHUB_REGISTRY }}/${{ env.WHISPER_IMAGE_NAME }},${{ secrets.TRANSCRIPTION_SERVICE_ECR_URI }}:${{ github.ref_name }}
cache-from: type=gha
cache-to: type=gha,mode=max
45 changes: 45 additions & 0 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,47 @@ on:
workflow_dispatch:
push:
jobs:
media-download-ci:
permissions:
id-token: write
contents: read
pull-requests: write
runs-on: 2core-ubuntu-latest-arm
steps:
- name: Checkout
uses: actions/checkout@v3
- name: Setup node
uses: actions/setup-node@v3
with:
node-version-file: .nvmrc
cache: 'npm'
- name: Install dependencies
run: npm install
- name: Build media-download
run: npm run media-download::build

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- uses: aws-actions/configure-aws-credentials@v4
name: Configure AWS credentials for pushing to ECR
with:
role-to-assume: ${{ secrets.INVESTIGATIONS_GITHUB_ACTIONS_ROLE_ARN }}
aws-region: eu-west-1
- name: Login to Amazon ECR
id: login-ecr
uses: aws-actions/amazon-ecr-login@v2
- name: Build and push media-download Docker image
uses: docker/build-push-action@v5
with:
context: .
file: containers/media-download.Dockerfile
platforms: linux/arm64
push: true
tags: ${{ secrets.MEDIA_DOWNLOAD_ECR_URI }}:${{ github.ref_name }}
cache-from: type=gha
cache-to: type=gha,mode=max
build-args: |
node_version=20.11.0
ci:
permissions:
id-token: write
Expand Down Expand Up @@ -32,6 +73,10 @@ jobs:
- name: Build worker-capacity-manager
run: |
npm run worker-capacity-manager::build
- name: Build media-download
run: |
npm run media-download::build
- name: Package API/Client/Update
run: |
npm run package
Expand Down
17 changes: 17 additions & 0 deletions containers/media-download.Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
FROM python:3.12-bookworm
WORKDIR /opt
LABEL com.theguardian.transcription-service.media-download-container="Media download container with yt-dlp, associated dependencies and media download app"

ARG node_version

RUN pip install yt-dlp

RUN apt-get update
RUN apt-get install -y ffmpeg git nodejs npm
RUN npm install -g n
RUN echo "node version: $node_version"
RUN n $node_version

COPY ./packages/media-download/dist/index.js /opt/media-download.js

CMD node /opt/media-download.js
File renamed without changes.
Loading

0 comments on commit 2d19531

Please sign in to comment.