Skip to content

Commit

Permalink
[LP#2034080] purge nvidia packages vigorously (#92)
Browse files Browse the repository at this point in the history
* purge nvidia packages vigorously

* have containerd ignore all juju proxy config during testing

* speed up the restart functional test
  • Loading branch information
addyess committed Oct 31, 2023
1 parent efd9520 commit ba0e629
Show file tree
Hide file tree
Showing 4 changed files with 9 additions and 4 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/main.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ jobs:
needs:
- lint-unit
- call-inclusive-naming-check
timeout-minutes: 75
timeout-minutes: 90
steps:
- name: Check out code
uses: actions/checkout@v3
Expand Down
3 changes: 2 additions & 1 deletion reactive/containerd.py
Original file line number Diff line number Diff line change
Expand Up @@ -636,7 +636,8 @@ def unconfigure_nvidia(reconfigure=True):
to_purge = apt_packages(nvidia_packages).keys()

if to_purge:
apt_purge(to_purge, fatal=True)
# remove any other nvidia- installed packages
apt_purge(to_purge | {"^nvidia-.*"}, fatal=True)

if os.path.isfile(NVIDIA_SOURCES_FILE):
os.remove(NVIDIA_SOURCES_FILE)
Expand Down
2 changes: 2 additions & 0 deletions tests/data/charm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ applications:
channel: null
resources:
containerd: {{containerd_multiarch|default("0")}}
options:
disable-juju-proxy: "true"
docker-registry:
charm: docker-registry
channel: edge
Expand Down
6 changes: 4 additions & 2 deletions tests/integration/test_containerd_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,8 @@ async def test_restart_containerd(microbots, ops_test):
any_containerd = containerds.units[0]
try:
[await JujuRun.command(_, "service containerd stop") for _ in containerds.units]
await ops_test.model.wait_for_idle(apps=["containerd"], status="blocked", timeout=6 * 60)
async with ops_test.fast_forward():
await ops_test.model.wait_for_idle(apps=["containerd"], status="blocked", timeout=6 * 60)

nodes = await JujuRun.command(any_containerd, "kubectl --kubeconfig /root/cdk/kubeconfig get nodes")
assert nodes.stdout.count("NotReady") == num_units, "Ensure all nodes aren't ready"
Expand All @@ -294,4 +295,5 @@ async def test_restart_containerd(microbots, ops_test):
await JujuRun.command(any_containerd, f"curl {endpoint}")
finally:
[await JujuRun.command(_, "service containerd start") for _ in containerds.units]
await ops_test.model.wait_for_idle(apps=["containerd"], status="active", timeout=6 * 60)
async with ops_test.fast_forward():
await ops_test.model.wait_for_idle(apps=["containerd"], status="active", timeout=6 * 60)

0 comments on commit ba0e629

Please sign in to comment.