From ba060a0ee38a8de197326134428e2f5e62e575f6 Mon Sep 17 00:00:00 2001 From: Jirka B Date: Sat, 19 Oct 2024 11:16:30 +0200 Subject: [PATCH 1/9] bump: Torch `2.5.0` --- .azure/gpu-benchmarks.yml | 2 +- .azure/gpu-tests-fabric.yml | 2 +- .azure/gpu-tests-pytorch.yml | 2 +- .github/checkgroup.yml | 16 ++++++++++++---- .github/workflows/ci-tests-fabric.yml | 5 ++++- .github/workflows/ci-tests-pytorch.yml | 5 ++++- .github/workflows/docker-build.yml | 16 ++++++++++++---- requirements/fabric/base.txt | 2 +- requirements/fabric/examples.txt | 4 ++-- requirements/fabric/test.txt | 2 +- requirements/pytorch/base.txt | 4 ++-- requirements/pytorch/examples.txt | 4 ++-- requirements/typing.txt | 2 +- 13 files changed, 44 insertions(+), 22 deletions(-) diff --git a/.azure/gpu-benchmarks.yml b/.azure/gpu-benchmarks.yml index 111589945e048..24b78542a798a 100644 --- a/.azure/gpu-benchmarks.yml +++ b/.azure/gpu-benchmarks.yml @@ -46,7 +46,7 @@ jobs: variables: DEVICES: $( python -c 'print("$(Agent.Name)".split("_")[-1])' ) container: - image: "pytorchlightning/pytorch_lightning:base-cuda-py3.12-torch2.4-cuda12.1.0" + image: "pytorchlightning/pytorch_lightning:base-cuda-py3.12-torch2.5-cuda12.1.0" options: "--gpus=all --shm-size=32g" strategy: matrix: diff --git a/.azure/gpu-tests-fabric.yml b/.azure/gpu-tests-fabric.yml index e63641b8ecc7d..ee7fe2e281478 100644 --- a/.azure/gpu-tests-fabric.yml +++ b/.azure/gpu-tests-fabric.yml @@ -60,7 +60,7 @@ jobs: image: "pytorchlightning/pytorch_lightning:base-cuda-py3.11-torch2.3-cuda12.1.0" PACKAGE_NAME: "fabric" "Lightning | latest": - image: "pytorchlightning/pytorch_lightning:base-cuda-py3.12-torch2.4-cuda12.1.0" + image: "pytorchlightning/pytorch_lightning:base-cuda-py3.12-torch2.5-cuda12.1.0" PACKAGE_NAME: "lightning" workspace: clean: all diff --git a/.azure/gpu-tests-pytorch.yml b/.azure/gpu-tests-pytorch.yml index 56c0ace195ed0..c014d84663c21 100644 --- a/.azure/gpu-tests-pytorch.yml +++ b/.azure/gpu-tests-pytorch.yml @@ -53,7 +53,7 @@ jobs: image: "pytorchlightning/pytorch_lightning:base-cuda-py3.11-torch2.3-cuda12.1.0" PACKAGE_NAME: "pytorch" "Lightning | latest": - image: "pytorchlightning/pytorch_lightning:base-cuda-py3.12-torch2.4-cuda12.1.0" + image: "pytorchlightning/pytorch_lightning:base-cuda-py3.12-torch2.5-cuda12.1.0" PACKAGE_NAME: "lightning" pool: lit-rtx-3090 variables: diff --git a/.github/checkgroup.yml b/.github/checkgroup.yml index fa455da015fce..6919ca1bef759 100644 --- a/.github/checkgroup.yml +++ b/.github/checkgroup.yml @@ -24,16 +24,19 @@ subprojects: - "pl-cpu (macOS-14, lightning, 3.11, 2.2)" - "pl-cpu (macOS-14, lightning, 3.11, 2.3)" - "pl-cpu (macOS-14, lightning, 3.12, 2.4)" + - "pl-cpu (macOS-14, lightning, 3.12, 2.5)" - "pl-cpu (ubuntu-20.04, lightning, 3.9, 2.1, oldest)" - "pl-cpu (ubuntu-20.04, lightning, 3.10, 2.1)" - "pl-cpu (ubuntu-20.04, lightning, 3.11, 2.2)" - "pl-cpu (ubuntu-20.04, lightning, 3.11, 2.3)" - - "pl-cpu (ubuntu-20.04, lightning, 3.12, 2.4)" + - "pl-cpu (ubuntu-22.04, lightning, 3.12, 2.4)" + - "pl-cpu (ubuntu-22.04, lightning, 3.12, 2.5)" - "pl-cpu (windows-2022, lightning, 3.9, 2.1, oldest)" - "pl-cpu (windows-2022, lightning, 3.10, 2.1)" - "pl-cpu (windows-2022, lightning, 3.11, 2.2)" - "pl-cpu (windows-2022, lightning, 3.11, 2.3)" - "pl-cpu (windows-2022, lightning, 3.12, 2.4)" + - "pl-cpu (windows-2022, lightning, 3.12, 2.5)" - "pl-cpu (macOS-14, pytorch, 3.9, 2.1)" - "pl-cpu (ubuntu-20.04, pytorch, 3.9, 2.1)" - "pl-cpu (windows-2022, pytorch, 3.9, 2.1)" @@ -141,15 +144,17 @@ subprojects: - "!*.md" - "!**/*.md" checks: - - "build-cuda (3.11, 2.1, 12.1.0)" + - "build-cuda (3.10, 2.1, 12.1.0)" - "build-cuda (3.11, 2.2, 12.1.0)" - "build-cuda (3.11, 2.3, 12.1.0)" - "build-cuda (3.12, 2.4, 12.1.0)" + - "build-cuda (3.12, 2.5, 12.1.0)" #- "build-NGC" - - "build-pl (3.11, 2.1, 12.1.0)" + - "build-pl (3.10, 2.1, 12.1.0)" - "build-pl (3.11, 2.2, 12.1.0)" - "build-pl (3.11, 2.3, 12.1.0)" - "build-pl (3.12, 2.4, 12.1.0)" + - "build-pl (3.12, 2.5, 12.1.0)" # SECTION: lightning_fabric @@ -171,16 +176,19 @@ subprojects: - "fabric-cpu (macOS-14, lightning, 3.11, 2.2)" - "fabric-cpu (macOS-14, lightning, 3.11, 2.3)" - "fabric-cpu (macOS-14, lightning, 3.12, 2.4)" + - "fabric-cpu (macOS-14, lightning, 3.12, 2.5)" - "fabric-cpu (ubuntu-20.04, lightning, 3.9, 2.1, oldest)" - "fabric-cpu (ubuntu-20.04, lightning, 3.10, 2.1)" - "fabric-cpu (ubuntu-20.04, lightning, 3.11, 2.2)" - "fabric-cpu (ubuntu-20.04, lightning, 3.11, 2.3)" - - "fabric-cpu (ubuntu-20.04, lightning, 3.12, 2.4)" + - "fabric-cpu (ubuntu-22.04, lightning, 3.12, 2.4)" + - "fabric-cpu (ubuntu-22.04, lightning, 3.12, 2.5)" - "fabric-cpu (windows-2022, lightning, 3.9, 2.1, oldest)" - "fabric-cpu (windows-2022, lightning, 3.10, 2.1)" - "fabric-cpu (windows-2022, lightning, 3.11, 2.2)" - "fabric-cpu (windows-2022, lightning, 3.11, 2.3)" - "fabric-cpu (windows-2022, lightning, 3.12, 2.4)" + - "fabric-cpu (windows-2022, lightning, 3.12, 2.5)" - "fabric-cpu (macOS-14, fabric, 3.9, 2.1)" - "fabric-cpu (ubuntu-20.04, fabric, 3.9, 2.1)" - "fabric-cpu (windows-2022, fabric, 3.9, 2.1)" diff --git a/.github/workflows/ci-tests-fabric.yml b/.github/workflows/ci-tests-fabric.yml index 7d854bbf7e618..60272ffe6d7cc 100644 --- a/.github/workflows/ci-tests-fabric.yml +++ b/.github/workflows/ci-tests-fabric.yml @@ -50,8 +50,11 @@ jobs: - { os: "ubuntu-20.04", pkg-name: "lightning", python-version: "3.11", pytorch-version: "2.3" } - { os: "windows-2022", pkg-name: "lightning", python-version: "3.11", pytorch-version: "2.3" } - { os: "macOS-14", pkg-name: "lightning", python-version: "3.12", pytorch-version: "2.4" } - - { os: "ubuntu-20.04", pkg-name: "lightning", python-version: "3.12", pytorch-version: "2.4" } + - { os: "ubuntu-22.04", pkg-name: "lightning", python-version: "3.12", pytorch-version: "2.4" } - { os: "windows-2022", pkg-name: "lightning", python-version: "3.12", pytorch-version: "2.4" } + - { os: "macOS-14", pkg-name: "lightning", python-version: "3.12", pytorch-version: "2.5" } + - { os: "ubuntu-22.04", pkg-name: "lightning", python-version: "3.12", pytorch-version: "2.5" } + - { os: "windows-2022", pkg-name: "lightning", python-version: "3.12", pytorch-version: "2.5" } # only run PyTorch latest with Python latest, use Fabric scope to limit dependency issues - { os: "macOS-13", pkg-name: "fabric", python-version: "3.10", pytorch-version: "2.1" } - { os: "ubuntu-22.04", pkg-name: "fabric", python-version: "3.10", pytorch-version: "2.1" } diff --git a/.github/workflows/ci-tests-pytorch.yml b/.github/workflows/ci-tests-pytorch.yml index a9d7dfdf55578..af32ef78d685d 100644 --- a/.github/workflows/ci-tests-pytorch.yml +++ b/.github/workflows/ci-tests-pytorch.yml @@ -54,8 +54,11 @@ jobs: - { os: "ubuntu-20.04", pkg-name: "lightning", python-version: "3.11", pytorch-version: "2.3" } - { os: "windows-2022", pkg-name: "lightning", python-version: "3.11", pytorch-version: "2.3" } - { os: "macOS-14", pkg-name: "lightning", python-version: "3.12", pytorch-version: "2.4" } - - { os: "ubuntu-20.04", pkg-name: "lightning", python-version: "3.12", pytorch-version: "2.4" } + - { os: "ubuntu-22.04", pkg-name: "lightning", python-version: "3.12", pytorch-version: "2.4" } - { os: "windows-2022", pkg-name: "lightning", python-version: "3.12", pytorch-version: "2.4" } + - { os: "macOS-14", pkg-name: "lightning", python-version: "3.12", pytorch-version: "2.5" } + - { os: "ubuntu-22.04", pkg-name: "lightning", python-version: "3.12", pytorch-version: "2.5" } + - { os: "windows-2022", pkg-name: "lightning", python-version: "3.12", pytorch-version: "2.5" } # only run PyTorch latest with Python latest, use PyTorch scope to limit dependency issues - { os: "macOS-13", pkg-name: "pytorch", python-version: "3.10", pytorch-version: "2.1" } - { os: "ubuntu-22.04", pkg-name: "pytorch", python-version: "3.10", pytorch-version: "2.1" } diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index 6df2b8cbb73d3..ddc9daffee752 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -43,10 +43,11 @@ jobs: include: # We only release one docker image per PyTorch version. # Make sure the matrix here matches the one below. - - { python_version: "3.11", pytorch_version: "2.1", cuda_version: "12.1.0" } + - { python_version: "3.10", pytorch_version: "2.1", cuda_version: "12.1.0" } - { python_version: "3.11", pytorch_version: "2.2", cuda_version: "12.1.0" } - { python_version: "3.11", pytorch_version: "2.3", cuda_version: "12.1.0" } - - { python_version: "3.12", pytorch_version: "2.4", cuda_version: "12.1.0" } + - { python_version: "3.11", pytorch_version: "2.4", cuda_version: "12.1.0" } + - { python_version: "3.12", pytorch_version: "2.5", cuda_version: "12.1.0" } steps: - uses: actions/checkout@v4 with: @@ -103,10 +104,11 @@ jobs: include: # These are the base images for PL release docker images. # Make sure the matrix here matches the one above. - - { python_version: "3.11", pytorch_version: "2.1", cuda_version: "12.1.0" } + - { python_version: "3.10", pytorch_version: "2.1", cuda_version: "12.1.0" } - { python_version: "3.11", pytorch_version: "2.2", cuda_version: "12.1.0" } - { python_version: "3.11", pytorch_version: "2.3", cuda_version: "12.1.0" } - - { python_version: "3.12", pytorch_version: "2.4", cuda_version: "12.1.0" } + - { python_version: "3.11", pytorch_version: "2.4", cuda_version: "12.1.0" } + - { python_version: "3.12", pytorch_version: "2.5", cuda_version: "12.1.0" } steps: - uses: actions/checkout@v4 - uses: docker/setup-buildx-action@v3 @@ -115,6 +117,12 @@ jobs: with: username: ${{ secrets.DOCKER_USERNAME }} password: ${{ secrets.DOCKER_PASSWORD }} + + - name: shorten Torch version + run: | + # convert 1.10.2 to 1.10 + pt_version=$(echo ${{ matrix.pytorch }} | cut -d. -f1,2) + echo "PT_VERSION=$pt_version" >> $GITHUB_ENV - uses: docker/build-push-action@v6 with: build-args: | diff --git a/requirements/fabric/base.txt b/requirements/fabric/base.txt index 0a99614a46870..42c055e85ca7d 100644 --- a/requirements/fabric/base.txt +++ b/requirements/fabric/base.txt @@ -1,7 +1,7 @@ # NOTE: the upper bound for the package version is only set for CI stability, and it is dropped while installing this package # in case you want to preserve/enforce restrictions on the latest compatible version, add "strict" as an in-line comment -torch >=2.1.0, <2.5.0 +torch >=2.1.0, <2.6.0 fsspec[http] >=2022.5.0, <2024.4.0 packaging >=20.0, <=23.1 typing-extensions >=4.4.0, <4.10.0 diff --git a/requirements/fabric/examples.txt b/requirements/fabric/examples.txt index cb4135da2409a..3352db77d8bd9 100644 --- a/requirements/fabric/examples.txt +++ b/requirements/fabric/examples.txt @@ -1,6 +1,6 @@ # NOTE: the upper bound for the package version is only set for CI stability, and it is dropped while installing this package # in case you want to preserve/enforce restrictions on the latest compatible version, add "strict" as an in-line comment -torchvision >=0.16.0, <0.20.0 -torchmetrics >=0.10.0, <1.3.0 +torchvision >=0.16.0, <0.21.0 +torchmetrics >=0.10.0, <1.5.0 lightning-utilities >=0.8.0, <0.12.0 diff --git a/requirements/fabric/test.txt b/requirements/fabric/test.txt index 8fb9122051eec..2da6ae8854d64 100644 --- a/requirements/fabric/test.txt +++ b/requirements/fabric/test.txt @@ -7,4 +7,4 @@ pytest-rerunfailures ==12.0 pytest-random-order ==1.1.0 click ==8.1.7 tensorboardX >=2.2, <2.7.0 # min version is set by torch.onnx missing attribute -torchmetrics >=0.7.0, <1.3.0 # needed for using fixed compare_version +torchmetrics >=0.7.0, <1.5.0 # needed for using fixed compare_version diff --git a/requirements/pytorch/base.txt b/requirements/pytorch/base.txt index 6ff628d7edfb5..94aca759c37e2 100644 --- a/requirements/pytorch/base.txt +++ b/requirements/pytorch/base.txt @@ -1,11 +1,11 @@ # NOTE: the upper bound for the package version is only set for CI stability, and it is dropped while installing this package # in case you want to preserve/enforce restrictions on the latest compatible version, add "strict" as an in-line comment -torch >=2.1.0, <2.5.0 +torch >=2.1.0, <2.6.0 tqdm >=4.57.0, <4.67.0 PyYAML >=5.4, <6.1.0 fsspec[http] >=2022.5.0, <2024.4.0 -torchmetrics >=0.7.0, <1.3.0 # needed for using fixed compare_version +torchmetrics >=0.7.0, <1.5.0 # needed for using fixed compare_version packaging >=20.0, <=23.1 typing-extensions >=4.4.0, <4.10.0 lightning-utilities >=0.10.0, <0.12.0 diff --git a/requirements/pytorch/examples.txt b/requirements/pytorch/examples.txt index 9a6ae7e47dfb8..2e793e0045da9 100644 --- a/requirements/pytorch/examples.txt +++ b/requirements/pytorch/examples.txt @@ -2,7 +2,7 @@ # in case you want to preserve/enforce restrictions on the latest compatible version, add "strict" as an in-line comment requests <2.32.0 -torchvision >=0.16.0, <0.20.0 +torchvision >=0.16.0, <0.21.0 ipython[all] <8.15.0 -torchmetrics >=0.10.0, <1.3.0 +torchmetrics >=0.10.0, <1.5.0 lightning-utilities >=0.8.0, <0.12.0 diff --git a/requirements/typing.txt b/requirements/typing.txt index 0323edfd6098a..e367fa9bd0c5a 100644 --- a/requirements/typing.txt +++ b/requirements/typing.txt @@ -1,5 +1,5 @@ mypy==1.11.0 -torch==2.4.1 +torch==2.5.0 types-Markdown types-PyYAML From d30eb2d6f21f9dd342e03bc0e744b78cafa2ddae Mon Sep 17 00:00:00 2001 From: Jirka B Date: Sat, 19 Oct 2024 11:36:24 +0200 Subject: [PATCH 2/9] push docker --- .github/workflows/docker-build.yml | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index ddc9daffee752..c107e88ea8c60 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -28,7 +28,7 @@ concurrency: cancel-in-progress: ${{ github.event_name == 'pull_request' }} env: - PUSH_NIGHTLY: ${{ github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }} + PUSH_NIGHTLY: true # fixme: ${{ github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }} PUSH_RELEASE: ${{ startsWith(github.ref, 'refs/tags/') || github.event_name == 'release' }} jobs: @@ -43,11 +43,11 @@ jobs: include: # We only release one docker image per PyTorch version. # Make sure the matrix here matches the one below. - - { python_version: "3.10", pytorch_version: "2.1", cuda_version: "12.1.0" } - - { python_version: "3.11", pytorch_version: "2.2", cuda_version: "12.1.0" } - - { python_version: "3.11", pytorch_version: "2.3", cuda_version: "12.1.0" } - - { python_version: "3.11", pytorch_version: "2.4", cuda_version: "12.1.0" } - - { python_version: "3.12", pytorch_version: "2.5", cuda_version: "12.1.0" } + - { python_version: "3.10", pytorch_version: "2.1.0", cuda_version: "12.1.0" } + - { python_version: "3.11", pytorch_version: "2.2.2", cuda_version: "12.1.0" } + - { python_version: "3.11", pytorch_version: "2.3.1", cuda_version: "12.1.0" } + - { python_version: "3.11", pytorch_version: "2.4.1", cuda_version: "12.1.0" } + - { python_version: "3.12", pytorch_version: "2.5.0", cuda_version: "12.1.0" } steps: - uses: actions/checkout@v4 with: @@ -104,11 +104,11 @@ jobs: include: # These are the base images for PL release docker images. # Make sure the matrix here matches the one above. - - { python_version: "3.10", pytorch_version: "2.1", cuda_version: "12.1.0" } - - { python_version: "3.11", pytorch_version: "2.2", cuda_version: "12.1.0" } - - { python_version: "3.11", pytorch_version: "2.3", cuda_version: "12.1.0" } - - { python_version: "3.11", pytorch_version: "2.4", cuda_version: "12.1.0" } - - { python_version: "3.12", pytorch_version: "2.5", cuda_version: "12.1.0" } + - { python_version: "3.10", pytorch_version: "2.1.0", cuda_version: "12.1.0" } + - { python_version: "3.11", pytorch_version: "2.2.2", cuda_version: "12.1.0" } + - { python_version: "3.11", pytorch_version: "2.3.1", cuda_version: "12.1.0" } + - { python_version: "3.11", pytorch_version: "2.4.1", cuda_version: "12.1.0" } + - { python_version: "3.12", pytorch_version: "2.5.0", cuda_version: "12.1.0" } steps: - uses: actions/checkout@v4 - uses: docker/setup-buildx-action@v3 @@ -121,7 +121,7 @@ jobs: - name: shorten Torch version run: | # convert 1.10.2 to 1.10 - pt_version=$(echo ${{ matrix.pytorch }} | cut -d. -f1,2) + pt_version=$(echo ${{ matrix.pytorch_version }} | cut -d. -f1,2) echo "PT_VERSION=$pt_version" >> $GITHUB_ENV - uses: docker/build-push-action@v6 with: @@ -131,7 +131,7 @@ jobs: CUDA_VERSION=${{ matrix.cuda_version }} file: dockers/base-cuda/Dockerfile push: ${{ env.PUSH_NIGHTLY }} - tags: "pytorchlightning/pytorch_lightning:base-cuda-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }}-cuda${{ matrix.cuda_version }}" + tags: "pytorchlightning/pytorch_lightning:base-cuda-py${{ matrix.python_version }}-torch${{ env.PT_VERSION }}-cuda${{ matrix.cuda_version }}" timeout-minutes: 95 - uses: ravsamhq/notify-slack-action@v2 if: failure() && env.PUSH_NIGHTLY == 'true' From 9971db45ae1858c433a69e9339f5835a2bfed537 Mon Sep 17 00:00:00 2001 From: Jirka B Date: Mon, 21 Oct 2024 08:18:11 +0200 Subject: [PATCH 3/9] docker --- .github/workflows/docker-build.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index c107e88ea8c60..bfa8870e4d3dd 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -28,7 +28,7 @@ concurrency: cancel-in-progress: ${{ github.event_name == 'pull_request' }} env: - PUSH_NIGHTLY: true # fixme: ${{ github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }} + PUSH_NIGHTLY: ${{ github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }} PUSH_RELEASE: ${{ startsWith(github.ref, 'refs/tags/') || github.event_name == 'release' }} jobs: @@ -43,11 +43,11 @@ jobs: include: # We only release one docker image per PyTorch version. # Make sure the matrix here matches the one below. - - { python_version: "3.10", pytorch_version: "2.1.0", cuda_version: "12.1.0" } - - { python_version: "3.11", pytorch_version: "2.2.2", cuda_version: "12.1.0" } - - { python_version: "3.11", pytorch_version: "2.3.1", cuda_version: "12.1.0" } - - { python_version: "3.11", pytorch_version: "2.4.1", cuda_version: "12.1.0" } - - { python_version: "3.12", pytorch_version: "2.5.0", cuda_version: "12.1.0" } + - { python_version: "3.10", pytorch_version: "2.1", cuda_version: "12.1.0" } + - { python_version: "3.11", pytorch_version: "2.2", cuda_version: "12.1.0" } + - { python_version: "3.11", pytorch_version: "2.3", cuda_version: "12.1.0" } + - { python_version: "3.11", pytorch_version: "2.4", cuda_version: "12.1.0" } + - { python_version: "3.12", pytorch_version: "2.5", cuda_version: "12.1.0" } steps: - uses: actions/checkout@v4 with: From 19324fa9067f09a66cbe209d35044559a2977e82 Mon Sep 17 00:00:00 2001 From: Thomas Viehmann Date: Wed, 6 Nov 2024 18:05:12 +0100 Subject: [PATCH 4/9] 2.5.1 and mypy --- .github/workflows/docker-build.yml | 2 +- requirements/typing.txt | 2 +- src/lightning/pytorch/core/module.py | 4 +++- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index bfa8870e4d3dd..0db6d25d60a8b 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -108,7 +108,7 @@ jobs: - { python_version: "3.11", pytorch_version: "2.2.2", cuda_version: "12.1.0" } - { python_version: "3.11", pytorch_version: "2.3.1", cuda_version: "12.1.0" } - { python_version: "3.11", pytorch_version: "2.4.1", cuda_version: "12.1.0" } - - { python_version: "3.12", pytorch_version: "2.5.0", cuda_version: "12.1.0" } + - { python_version: "3.12", pytorch_version: "2.5.1", cuda_version: "12.1.0" } steps: - uses: actions/checkout@v4 - uses: docker/setup-buildx-action@v3 diff --git a/requirements/typing.txt b/requirements/typing.txt index e367fa9bd0c5a..71414998dd7f3 100644 --- a/requirements/typing.txt +++ b/requirements/typing.txt @@ -1,5 +1,5 @@ mypy==1.11.0 -torch==2.5.0 +torch==2.5.1 types-Markdown types-PyYAML diff --git a/src/lightning/pytorch/core/module.py b/src/lightning/pytorch/core/module.py index 782fc40d928ef..7e3a10fee9275 100644 --- a/src/lightning/pytorch/core/module.py +++ b/src/lightning/pytorch/core/module.py @@ -1405,7 +1405,9 @@ def forward(self, x): input_sample = self._apply_batch_transfer_handler(input_sample) file_path = str(file_path) if isinstance(file_path, Path) else file_path - torch.onnx.export(self, input_sample, file_path, **kwargs) + # PyTorch (2.5) declares file_path to be str | PathLike[Any] | None, but + # BytesIO does work, too. + torch.onnx.export(self, input_sample, file_path, **kwargs) # type: ignore self.train(mode) @torch.no_grad() From daa215d946ed67340f9071c2a04d4b50803a8dbc Mon Sep 17 00:00:00 2001 From: Thomas Viehmann Date: Thu, 7 Nov 2024 08:40:18 +0100 Subject: [PATCH 5/9] update USE_DISTRIBUTED=0 test --- tests/tests_fabric/utilities/test_imports.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/tests_fabric/utilities/test_imports.py b/tests/tests_fabric/utilities/test_imports.py index 43ee41a7b3035..85408a4ff83bd 100644 --- a/tests/tests_fabric/utilities/test_imports.py +++ b/tests/tests_fabric/utilities/test_imports.py @@ -23,6 +23,13 @@ def test_import_fabric_with_torch_dist_unavailable(): code = dedent( """ import torch + try: + # PyTorch 2.5 relies on torch,distributed._composable.fsdp not + # existing with USE_DISTRIBUTED=0 + import torch._dynamo.variables.functions + torch._dynamo.variables.functions._fsdp_param_group = None + except ImportError: + pass # pretend torch.distributed not available for name in list(torch.distributed.__dict__.keys()): @@ -31,6 +38,11 @@ def test_import_fabric_with_torch_dist_unavailable(): torch.distributed.is_available = lambda: False + # needed for Dynamo in PT 2.5+ compare the torch.distributed source + class _ProcessGroupStub: + pass + torch.distributed.ProcessGroup = _ProcessGroupStub + import lightning.fabric """ ) From 0d8a55675dcb752e03bc7ce859a6abc0f6fd981b Mon Sep 17 00:00:00 2001 From: Thomas Viehmann Date: Thu, 7 Nov 2024 09:53:27 +0100 Subject: [PATCH 6/9] also for pytorch lightning no distributed --- tests/tests_pytorch/utilities/test_imports.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/tests_pytorch/utilities/test_imports.py b/tests/tests_pytorch/utilities/test_imports.py index 43a3fad916086..56ee326f076dc 100644 --- a/tests/tests_pytorch/utilities/test_imports.py +++ b/tests/tests_pytorch/utilities/test_imports.py @@ -117,6 +117,13 @@ def test_import_pytorch_lightning_with_torch_dist_unavailable(): code = dedent( """ import torch + try: + # PyTorch 2.5 relies on torch,distributed._composable.fsdp not + # existing with USE_DISTRIBUTED=0 + import torch._dynamo.variables.functions + torch._dynamo.variables.functions._fsdp_param_group = None + except ImportError: + pass # pretend torch.distributed not available for name in list(torch.distributed.__dict__.keys()): @@ -125,6 +132,11 @@ def test_import_pytorch_lightning_with_torch_dist_unavailable(): torch.distributed.is_available = lambda: False + # needed for Dynamo in PT 2.5+ compare the torch.distributed source + class _ProcessGroupStub: + pass + torch.distributed.ProcessGroup = _ProcessGroupStub + import lightning.pytorch """ ) From 619cc9128a3de89d73441389d7c4b6ad7fefda1a Mon Sep 17 00:00:00 2001 From: Thomas Viehmann Date: Fri, 8 Nov 2024 12:14:28 +0100 Subject: [PATCH 7/9] set USE_LIBUV=0 on windows --- src/lightning/fabric/__init__.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/lightning/fabric/__init__.py b/src/lightning/fabric/__init__.py index 921d3d61e60fe..d675b21e5d1d2 100644 --- a/src/lightning/fabric/__init__.py +++ b/src/lightning/fabric/__init__.py @@ -2,6 +2,7 @@ import logging import os +import sys from lightning_utilities.core.imports import package_available @@ -26,6 +27,10 @@ # https://github.com/pytorch/pytorch/issues/83973 os.environ["PYTORCH_NVML_BASED_CUDA_CHECK"] = "1" +# see https://github.com/pytorch/pytorch/issues/139990 +if sys.platform == "win32": + os.environ["USE_LIBUV"] = "0" + from lightning.fabric.fabric import Fabric # noqa: E402 from lightning.fabric.utilities.seed import seed_everything # noqa: E402 From 7f5b1e71c83a211e1d66a98a409a941c333ea18c Mon Sep 17 00:00:00 2001 From: Jirka B Date: Thu, 7 Nov 2024 12:00:20 +0000 Subject: [PATCH 8/9] try drop pickle warning --- tests/tests_pytorch/callbacks/test_early_stopping.py | 6 ++---- tests/tests_pytorch/checkpointing/test_model_checkpoint.py | 6 ++---- tests/tests_pytorch/core/test_metric_result_integration.py | 3 +-- tests/tests_pytorch/helpers/test_datasets.py | 6 ++---- tests/tests_pytorch/loggers/test_all.py | 7 +------ tests/tests_pytorch/loggers/test_logger.py | 3 +-- tests/tests_pytorch/loggers/test_wandb.py | 3 +-- 7 files changed, 10 insertions(+), 24 deletions(-) diff --git a/tests/tests_pytorch/callbacks/test_early_stopping.py b/tests/tests_pytorch/callbacks/test_early_stopping.py index b7e52ee549bcc..221718425d7eb 100644 --- a/tests/tests_pytorch/callbacks/test_early_stopping.py +++ b/tests/tests_pytorch/callbacks/test_early_stopping.py @@ -193,13 +193,11 @@ def test_pickling(): early_stopping = EarlyStopping(monitor="foo") early_stopping_pickled = pickle.dumps(early_stopping) - with pytest.warns(FutureWarning, match="`weights_only=False`") if _TORCH_EQUAL_2_4_0 else nullcontext(): - early_stopping_loaded = pickle.loads(early_stopping_pickled) + early_stopping_loaded = pickle.loads(early_stopping_pickled) assert vars(early_stopping) == vars(early_stopping_loaded) early_stopping_pickled = cloudpickle.dumps(early_stopping) - with pytest.warns(FutureWarning, match="`weights_only=False`") if _TORCH_EQUAL_2_4_0 else nullcontext(): - early_stopping_loaded = cloudpickle.loads(early_stopping_pickled) + early_stopping_loaded = cloudpickle.loads(early_stopping_pickled) assert vars(early_stopping) == vars(early_stopping_loaded) diff --git a/tests/tests_pytorch/checkpointing/test_model_checkpoint.py b/tests/tests_pytorch/checkpointing/test_model_checkpoint.py index 97d8d3c4d0e4a..31f6db8b98272 100644 --- a/tests/tests_pytorch/checkpointing/test_model_checkpoint.py +++ b/tests/tests_pytorch/checkpointing/test_model_checkpoint.py @@ -352,13 +352,11 @@ def test_pickling(tmp_path): ckpt = ModelCheckpoint(dirpath=tmp_path) ckpt_pickled = pickle.dumps(ckpt) - with pytest.warns(FutureWarning, match="`weights_only=False`") if _TORCH_EQUAL_2_4_0 else nullcontext(): - ckpt_loaded = pickle.loads(ckpt_pickled) + ckpt_loaded = pickle.loads(ckpt_pickled) assert vars(ckpt) == vars(ckpt_loaded) ckpt_pickled = cloudpickle.dumps(ckpt) - with pytest.warns(FutureWarning, match="`weights_only=False`") if _TORCH_EQUAL_2_4_0 else nullcontext(): - ckpt_loaded = cloudpickle.loads(ckpt_pickled) + ckpt_loaded = cloudpickle.loads(ckpt_pickled) assert vars(ckpt) == vars(ckpt_loaded) diff --git a/tests/tests_pytorch/core/test_metric_result_integration.py b/tests/tests_pytorch/core/test_metric_result_integration.py index ef340d1e17ea9..6e7fa7310e115 100644 --- a/tests/tests_pytorch/core/test_metric_result_integration.py +++ b/tests/tests_pytorch/core/test_metric_result_integration.py @@ -254,8 +254,7 @@ def lightning_log(fx, *args, **kwargs): } # make sure can be pickled - with pytest.warns(FutureWarning, match="`weights_only=False`") if _TORCH_EQUAL_2_4_0 else nullcontext(): - pickle.loads(pickle.dumps(result)) + pickle.loads(pickle.dumps(result)) # make sure can be torch.loaded filepath = str(tmp_path / "result") torch.save(result, filepath) diff --git a/tests/tests_pytorch/helpers/test_datasets.py b/tests/tests_pytorch/helpers/test_datasets.py index 98d77a6d9a8ad..f6d7fae4c86c5 100644 --- a/tests/tests_pytorch/helpers/test_datasets.py +++ b/tests/tests_pytorch/helpers/test_datasets.py @@ -44,9 +44,7 @@ def test_pickling_dataset_mnist(dataset_cls, args): mnist = dataset_cls(**args) mnist_pickled = pickle.dumps(mnist) - with pytest.warns(FutureWarning, match="`weights_only=False`") if _TORCH_EQUAL_2_4_0 else nullcontext(): - pickle.loads(mnist_pickled) + pickle.loads(mnist_pickled) mnist_pickled = cloudpickle.dumps(mnist) - with pytest.warns(FutureWarning, match="`weights_only=False`") if _TORCH_EQUAL_2_4_0 else nullcontext(): - cloudpickle.loads(mnist_pickled) + cloudpickle.loads(mnist_pickled) diff --git a/tests/tests_pytorch/loggers/test_all.py b/tests/tests_pytorch/loggers/test_all.py index c5b07562afb0a..480df336af6ea 100644 --- a/tests/tests_pytorch/loggers/test_all.py +++ b/tests/tests_pytorch/loggers/test_all.py @@ -184,12 +184,7 @@ def _test_loggers_pickle(tmp_path, monkeypatch, logger_class: Logger): trainer = Trainer(max_epochs=1, logger=logger) pkl_bytes = pickle.dumps(trainer) - with ( - pytest.warns(FutureWarning, match="`weights_only=False`") - if _TORCH_EQUAL_2_4_0 or (_TORCH_GREATER_EQUAL_2_4_1 and logger_class not in (CSVLogger, TensorBoardLogger)) - else nullcontext() - ): - trainer2 = pickle.loads(pkl_bytes) + trainer2 = pickle.loads(pkl_bytes) trainer2.logger.log_metrics({"acc": 1.0}) # make sure we restored properly diff --git a/tests/tests_pytorch/loggers/test_logger.py b/tests/tests_pytorch/loggers/test_logger.py index de0028000cd9f..3732a45c5e81c 100644 --- a/tests/tests_pytorch/loggers/test_logger.py +++ b/tests/tests_pytorch/loggers/test_logger.py @@ -124,8 +124,7 @@ def test_multiple_loggers_pickle(tmp_path): trainer = Trainer(logger=[logger1, logger2]) pkl_bytes = pickle.dumps(trainer) - with pytest.warns(FutureWarning, match="`weights_only=False`") if _TORCH_EQUAL_2_4_0 else nullcontext(): - trainer2 = pickle.loads(pkl_bytes) + trainer2 = pickle.loads(pkl_bytes) for logger in trainer2.loggers: logger.log_metrics({"acc": 1.0}, 0) diff --git a/tests/tests_pytorch/loggers/test_wandb.py b/tests/tests_pytorch/loggers/test_wandb.py index 4e3fbb287a1f9..ddaa289172844 100644 --- a/tests/tests_pytorch/loggers/test_wandb.py +++ b/tests/tests_pytorch/loggers/test_wandb.py @@ -162,8 +162,7 @@ def name(self): assert trainer.logger.experiment, "missing experiment" assert trainer.log_dir == logger.save_dir pkl_bytes = pickle.dumps(trainer) - with pytest.warns(FutureWarning, match="`weights_only=False`") if _TORCH_EQUAL_2_4_0 else nullcontext(): - trainer2 = pickle.loads(pkl_bytes) + trainer2 = pickle.loads(pkl_bytes) assert os.environ["WANDB_MODE"] == "dryrun" assert trainer2.logger.__class__.__name__ == WandbLogger.__name__ From 76b00d83b63e10ddf2ef3352f27fe44ce382755c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 8 Nov 2024 12:55:03 +0000 Subject: [PATCH 9/9] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/tests_pytorch/callbacks/test_early_stopping.py | 2 -- tests/tests_pytorch/checkpointing/test_model_checkpoint.py | 2 -- tests/tests_pytorch/core/test_metric_result_integration.py | 1 - tests/tests_pytorch/helpers/test_datasets.py | 2 -- tests/tests_pytorch/loggers/test_all.py | 2 -- tests/tests_pytorch/loggers/test_logger.py | 2 -- tests/tests_pytorch/loggers/test_wandb.py | 2 -- 7 files changed, 13 deletions(-) diff --git a/tests/tests_pytorch/callbacks/test_early_stopping.py b/tests/tests_pytorch/callbacks/test_early_stopping.py index 221718425d7eb..75f331a9401c7 100644 --- a/tests/tests_pytorch/callbacks/test_early_stopping.py +++ b/tests/tests_pytorch/callbacks/test_early_stopping.py @@ -15,7 +15,6 @@ import math import os import pickle -from contextlib import nullcontext from typing import List, Optional from unittest import mock from unittest.mock import Mock @@ -23,7 +22,6 @@ import cloudpickle import pytest import torch -from lightning.fabric.utilities.imports import _TORCH_EQUAL_2_4_0 from lightning.pytorch import Trainer, seed_everything from lightning.pytorch.callbacks import EarlyStopping, ModelCheckpoint from lightning.pytorch.demos.boring_classes import BoringModel diff --git a/tests/tests_pytorch/checkpointing/test_model_checkpoint.py b/tests/tests_pytorch/checkpointing/test_model_checkpoint.py index 31f6db8b98272..d43f07179e7bb 100644 --- a/tests/tests_pytorch/checkpointing/test_model_checkpoint.py +++ b/tests/tests_pytorch/checkpointing/test_model_checkpoint.py @@ -17,7 +17,6 @@ import re import time from argparse import Namespace -from contextlib import nullcontext from datetime import timedelta from inspect import signature from pathlib import Path @@ -32,7 +31,6 @@ import yaml from jsonargparse import ArgumentParser from lightning.fabric.utilities.cloud_io import _load as pl_load -from lightning.fabric.utilities.imports import _TORCH_EQUAL_2_4_0 from lightning.pytorch import Trainer, seed_everything from lightning.pytorch.callbacks import ModelCheckpoint from lightning.pytorch.demos.boring_classes import BoringModel diff --git a/tests/tests_pytorch/core/test_metric_result_integration.py b/tests/tests_pytorch/core/test_metric_result_integration.py index 6e7fa7310e115..004d979fd1b18 100644 --- a/tests/tests_pytorch/core/test_metric_result_integration.py +++ b/tests/tests_pytorch/core/test_metric_result_integration.py @@ -19,7 +19,6 @@ import lightning.pytorch as pl import pytest import torch -from lightning.fabric.utilities.imports import _TORCH_EQUAL_2_4_0 from lightning.fabric.utilities.warnings import PossibleUserWarning from lightning.pytorch import Trainer from lightning.pytorch.callbacks import OnExceptionCheckpoint diff --git a/tests/tests_pytorch/helpers/test_datasets.py b/tests/tests_pytorch/helpers/test_datasets.py index f6d7fae4c86c5..d71ed118fe835 100644 --- a/tests/tests_pytorch/helpers/test_datasets.py +++ b/tests/tests_pytorch/helpers/test_datasets.py @@ -12,12 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. import pickle -from contextlib import nullcontext import cloudpickle import pytest import torch -from lightning.fabric.utilities.imports import _TORCH_EQUAL_2_4_0 from tests_pytorch import _PATH_DATASETS from tests_pytorch.helpers.datasets import MNIST, AverageDataset, TrialMNIST diff --git a/tests/tests_pytorch/loggers/test_all.py b/tests/tests_pytorch/loggers/test_all.py index 480df336af6ea..1b845c57ec35d 100644 --- a/tests/tests_pytorch/loggers/test_all.py +++ b/tests/tests_pytorch/loggers/test_all.py @@ -14,13 +14,11 @@ import inspect import os import pickle -from contextlib import nullcontext from unittest import mock from unittest.mock import ANY, Mock import pytest import torch -from lightning.fabric.utilities.imports import _TORCH_EQUAL_2_4_0, _TORCH_GREATER_EQUAL_2_4_1 from lightning.pytorch import Callback, Trainer from lightning.pytorch.demos.boring_classes import BoringModel from lightning.pytorch.loggers import ( diff --git a/tests/tests_pytorch/loggers/test_logger.py b/tests/tests_pytorch/loggers/test_logger.py index 3732a45c5e81c..4d74e046c590f 100644 --- a/tests/tests_pytorch/loggers/test_logger.py +++ b/tests/tests_pytorch/loggers/test_logger.py @@ -13,7 +13,6 @@ # limitations under the License. import pickle from argparse import Namespace -from contextlib import nullcontext from copy import deepcopy from typing import Any, Dict, Optional from unittest.mock import patch @@ -21,7 +20,6 @@ import numpy as np import pytest import torch -from lightning.fabric.utilities.imports import _TORCH_EQUAL_2_4_0 from lightning.fabric.utilities.logger import _convert_params, _sanitize_params from lightning.pytorch import Trainer from lightning.pytorch.demos.boring_classes import BoringDataModule, BoringModel diff --git a/tests/tests_pytorch/loggers/test_wandb.py b/tests/tests_pytorch/loggers/test_wandb.py index ddaa289172844..a8e70bfb6589d 100644 --- a/tests/tests_pytorch/loggers/test_wandb.py +++ b/tests/tests_pytorch/loggers/test_wandb.py @@ -13,13 +13,11 @@ # limitations under the License. import os import pickle -from contextlib import nullcontext from pathlib import Path from unittest import mock import pytest import yaml -from lightning.fabric.utilities.imports import _TORCH_EQUAL_2_4_0 from lightning.pytorch import Trainer from lightning.pytorch.callbacks import ModelCheckpoint from lightning.pytorch.cli import LightningCLI