From 18d8e8183c5b745e8ea2d1f19979c5b9af21d6d5 Mon Sep 17 00:00:00 2001 From: Ryan Cook Date: Tue, 11 Jun 2024 12:19:08 -0400 Subject: [PATCH 1/5] trying to pass runtime var to allow for model pull Signed-off-by: Ryan Cook --- .github/workflows/training-e2e.yaml | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/.github/workflows/training-e2e.yaml b/.github/workflows/training-e2e.yaml index 34c59598..66fd2655 100644 --- a/.github/workflows/training-e2e.yaml +++ b/.github/workflows/training-e2e.yaml @@ -10,7 +10,7 @@ on: paths: - .github/workflows/training-e2e.yaml - ./training/** - + workflow_dispatch: env: @@ -20,6 +20,7 @@ env: TF_VAR_aws_volume_size: 500 TF_VAR_aws_access_key: ${{ secrets.AWS_ACCESS_KEY_ID }} TF_VAR_aws_secret_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + HF_TOKEN: ${{ secrets.HF_TOKEN }} concurrency: group: ${{ github.workflow }} @@ -69,7 +70,7 @@ jobs: env: TF_VAR_aws_instance_type: ${{ matrix.aws_image_type }} TF_VAR_aws_ami_architecture: ${{ matrix.aws_ami_architecture }} - + - name: Terraform Output id: terraform-output run: | @@ -104,7 +105,7 @@ jobs: # with: # detached: true # limit-access-to-actor: false - + - name: Setup tmate session uses: mxschmitt/action-tmate@v3.18 timeout-minutes: 60 @@ -117,6 +118,7 @@ jobs: ansible-playbook ./main/training/tests/e2e-tests/playbook.yml \ -i terraform-test-environment-module/hosts.ini \ --private-key=terraform-test-environment-module/${{ steps.terraform-output.outputs.pem_filename }} \ + --extra-vars "HF_TOKEN=${{ secrets.HF_TOKEN }}" \ --extra-vars "image_name=${{ matrix.image_name }}" \ --extra-vars "ssh_public_key='${{ steps.terraform-output.outputs.ssh_public_key }}'" \ --extra-vars "registry_user=${{ secrets.REGISTRY_USER }}" \ @@ -141,7 +143,7 @@ jobs: env: TF_VAR_aws_instance_type: ${{ matrix.aws_image_type }} TF_VAR_aws_ami_architecture: ${{ matrix.aws_ami_architecture }} - + - name: Publish Job Results to Slack id: slack if: always() From 4911ad30d56ac3b4e2023749bed579481c9a29a3 Mon Sep 17 00:00:00 2001 From: Ryan Cook Date: Tue, 11 Jun 2024 12:59:03 -0400 Subject: [PATCH 2/5] further testing Signed-off-by: Ryan Cook --- .github/workflows/training-e2e.yaml | 2 +- training/tests/e2e-tests/playbook.yml | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/.github/workflows/training-e2e.yaml b/.github/workflows/training-e2e.yaml index 66fd2655..9033f119 100644 --- a/.github/workflows/training-e2e.yaml +++ b/.github/workflows/training-e2e.yaml @@ -115,7 +115,7 @@ jobs: - name: Run tests run: | - ansible-playbook ./main/training/tests/e2e-tests/playbook.yml \ + ansible-playbook ./main/training/tests/e2e-tests/playbook.yml -vvv \ -i terraform-test-environment-module/hosts.ini \ --private-key=terraform-test-environment-module/${{ steps.terraform-output.outputs.pem_filename }} \ --extra-vars "HF_TOKEN=${{ secrets.HF_TOKEN }}" \ diff --git a/training/tests/e2e-tests/playbook.yml b/training/tests/e2e-tests/playbook.yml index dd07b765..4d2102d1 100644 --- a/training/tests/e2e-tests/playbook.yml +++ b/training/tests/e2e-tests/playbook.yml @@ -6,12 +6,12 @@ gather_facts: false tasks: - + - name: Wait until the instance is ready ansible.builtin.wait_for_connection: delay: 15 timeout: 180 - + - name: Gather facts for first time ansible.builtin.setup: @@ -20,6 +20,8 @@ url: https://raw.githubusercontent.com/instructlab/instructlab/main/scripts/basic-workflow-tests.sh dest: /tmp/basic-workflow-tests.sh mode: 755 + environment: + HF_TOKEN: "{{ HF_TOKEN }}" # Allow for debugging with tmate # - name: Wait for 15 minutes From bf3be8317df1fa003ddc8f8ea5e407dfed0b29b1 Mon Sep 17 00:00:00 2001 From: Ryan Cook Date: Tue, 11 Jun 2024 13:45:43 -0400 Subject: [PATCH 3/5] must make this faster Signed-off-by: Ryan Cook --- .github/workflows/training-e2e.yaml | 3 --- training/tests/provision/playbook.yml | 12 ++++++------ 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/.github/workflows/training-e2e.yaml b/.github/workflows/training-e2e.yaml index 9033f119..cc505ed3 100644 --- a/.github/workflows/training-e2e.yaml +++ b/.github/workflows/training-e2e.yaml @@ -96,9 +96,6 @@ jobs: env: ANSIBLE_CONFIG: ./main/training/tests/ansible.cfg - - name: Wait for 4 minutes - run: sleep 240 - # - name: Setup tmate session # uses: mxschmitt/action-tmate@v3.18 # timeout-minutes: 20 diff --git a/training/tests/provision/playbook.yml b/training/tests/provision/playbook.yml index 1181e34c..90d0936f 100644 --- a/training/tests/provision/playbook.yml +++ b/training/tests/provision/playbook.yml @@ -6,12 +6,12 @@ gather_facts: false tasks: - + - name: Wait until the instance is ready ansible.builtin.wait_for_connection: delay: 15 timeout: 180 - + - name: Gather facts for first time ansible.builtin.setup: @@ -42,7 +42,7 @@ ansible.builtin.shell: | podman pull "quay.io/ai-lab/{{ image_name }}:latest" \ --authfile=/etc/containers/auth.json \ - --arch amd64 + --arch amd64 # --retry=3 \ # --retry-delay=15 \ @@ -112,6 +112,6 @@ delegate_to: localhost - name: Reboot - ansible.builtin.shell: systemctl reboot - ignore_errors: true - ignore_unreachable: true + ansible.builtin.reboot: + reboot_timeout: 300 + reboot_msg: "Reboot initiated by Ansible" From 643fd9910e47681c2a690ad5c9803d2a5a821441 Mon Sep 17 00:00:00 2001 From: Ryan Cook Date: Tue, 11 Jun 2024 14:22:16 -0400 Subject: [PATCH 4/5] remove message? Signed-off-by: Ryan Cook --- training/tests/provision/playbook.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/training/tests/provision/playbook.yml b/training/tests/provision/playbook.yml index 90d0936f..1b0e9f40 100644 --- a/training/tests/provision/playbook.yml +++ b/training/tests/provision/playbook.yml @@ -62,7 +62,7 @@ ansible.builtin.shell: | podman build . \ -f /tmp/Containerfile \ - -t quay.io/ai-lab/derived_image:latest \ + -t quay.io/ai-lab/derived_image:testing \ --build-arg "sshpubkey={{ ssh_public_key }}" \ --authfile=/etc/containers/auth.json \ --pull=never > /tmp/build.log 2>&1 @@ -91,7 +91,7 @@ --rm \ --security-opt label=type:unconfined_t \ -v /:/target \ - -v /var/lib/containers:/var/lib/containers quay.io/ai-lab/derived_image:latest \ + -v /var/lib/containers:/var/lib/containers quay.io/ai-lab/derived_image:testing \ bootc install to-existing-root --karg=console=ttyS0,115200n8 --karg=systemd.journald.forward_to_console=1 # --retry=5 \ @@ -114,4 +114,3 @@ - name: Reboot ansible.builtin.reboot: reboot_timeout: 300 - reboot_msg: "Reboot initiated by Ansible" From 45dc5dd0fc2d52c33146c93236e3ab53efe925b5 Mon Sep 17 00:00:00 2001 From: Ryan Cook Date: Tue, 11 Jun 2024 14:35:09 -0400 Subject: [PATCH 5/5] roll back tag Signed-off-by: Ryan Cook --- training/tests/provision/playbook.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/training/tests/provision/playbook.yml b/training/tests/provision/playbook.yml index 1b0e9f40..66426cf2 100644 --- a/training/tests/provision/playbook.yml +++ b/training/tests/provision/playbook.yml @@ -62,7 +62,7 @@ ansible.builtin.shell: | podman build . \ -f /tmp/Containerfile \ - -t quay.io/ai-lab/derived_image:testing \ + -t quay.io/ai-lab/derived_image:latest \ --build-arg "sshpubkey={{ ssh_public_key }}" \ --authfile=/etc/containers/auth.json \ --pull=never > /tmp/build.log 2>&1 @@ -91,7 +91,7 @@ --rm \ --security-opt label=type:unconfined_t \ -v /:/target \ - -v /var/lib/containers:/var/lib/containers quay.io/ai-lab/derived_image:testing \ + -v /var/lib/containers:/var/lib/containers quay.io/ai-lab/derived_image:latest \ bootc install to-existing-root --karg=console=ttyS0,115200n8 --karg=systemd.journald.forward_to_console=1 # --retry=5 \