2024-11-19 nightly release (04f6fcd)

pytorch · Nov 19, 2024 · 1768668 · 1768668
1 parent 7059b27
commit 1768668
Show file tree

Hide file tree

Showing 99 changed files with 2,780 additions and 749 deletions.
diff --git a/.ci/docker/common/install_cache.sh b/.ci/docker/common/install_cache.sh
@@ -12,6 +12,26 @@ set -ex
 # shellcheck source=/dev/null
 source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
 
+install_ubuntu() {
+  echo "Preparing to build sccache from source"
+  apt-get update
+  # libssl-dev will not work as it is upgraded to libssl3 in Ubuntu-22.04.
+  # Instead use lib and headers from OpenSSL1.1 installed in `install_openssl.sh``
+  apt-get install -y cargo
+  echo "Checking out sccache repo"
+  git clone https://github.com/mozilla/sccache -b v0.8.2
+
+  cd sccache
+  echo "Building sccache"
+  cargo build --release
+  cp target/release/sccache /opt/cache/bin
+  echo "Cleaning up"
+  cd ..
+  rm -rf sccache
+  apt-get remove -y cargo rustc
+  apt-get autoclean && apt-get clean
+}
+
 install_binary() {
   echo "Downloading sccache binary from S3 repo"
   curl --retry 3 https://s3.amazonaws.com/ossci-linux/sccache -o /opt/cache/bin/sccache
@@ -22,15 +42,33 @@ mkdir -p /opt/cache/bin
 sed -e 's|PATH="\(.*\)"|PATH="/opt/cache/bin:\1"|g' -i /etc/environment
 export PATH="/opt/cache/bin:$PATH"
 
-# NB: Install the pre-built binary from S3 as building from source
-# https://github.com/pytorch/sccache has started failing mysteriously
-# in which sccache server couldn't start with the following error:
-#   sccache: error: Invalid argument (os error 22)
-install_binary
+install_ubuntu
 
 function write_sccache_stub() {
   BINARY=$1
-  printf "#!/bin/sh\nif [ \$(env -u LD_PRELOAD ps -p \$PPID -o comm=) != sccache ]; then\n  exec sccache %s \"\$@\"\nelse\n  exec %s \"\$@\"\nfi" "$(which "${BINARY}")" "$(which "${BINARY}")" > "/opt/cache/bin/${BINARY}"
+  if [ $1 == "gcc" ]; then
+    # Do not call sccache recursively when dumping preprocessor argument
+    # For some reason it's very important for the first cached nvcc invocation
+    cat >"/opt/cache/bin/$1" <<EOF
+#!/bin/sh
+if [ "\$1" = "-E" ] || [ "\$2" = "-E" ]; then
+  exec $(which $1) "\$@"
+elif [ \$(env -u LD_PRELOAD ps -p \$PPID -o comm=) != sccache ]; then
+  exec sccache $(which $1) "\$@"
+else
+  exec $(which $1) "\$@"
+fi
+EOF
+  else
+    cat >"/opt/cache/bin/$1" <<EOF
+#!/bin/sh
+if [ \$(env -u LD_PRELOAD ps -p \$PPID -o comm=) != sccache ]; then
+  exec sccache $(which $1) "\$@"
+else
+  exec $(which $1) "\$@"
+fi
+EOF
+  fi
   chmod a+x "/opt/cache/bin/${BINARY}"
 }
 
@@ -44,7 +82,7 @@ init_sccache() {
 
   # NB: This function is adopted from PyTorch core at
   # https://github.com/pytorch/pytorch/blob/main/.ci/pytorch/common-build.sh
-  as_ci_user sccache --stop-server > /dev/null 2>&1 || true
+  as_ci_user sccache --stop-server >/dev/null 2>&1 || true
   rm -f "${SCCACHE_ERROR_LOG}" || true
 
   # Clear sccache stats before using it

diff --git a/.ci/docker/ubuntu/Dockerfile b/.ci/docker/ubuntu/Dockerfile
@@ -57,6 +57,7 @@ COPY ./common/utils.sh utils.sh
 RUN bash ./install_cache.sh && rm install_cache.sh utils.sh
 ENV SCCACHE_BUCKET ossci-compiler-cache-circleci-v2
 ENV SCCACHE_S3_KEY_PREFIX executorch
+ENV SCCACHE_REGION us-east-1
 
 ARG TORCH_VERSION
 COPY ./common/install_pytorch.sh install_pytorch.sh

diff --git a/.github/scripts/check_labels.py b/.github/scripts/check_labels.py
@@ -0,0 +1,62 @@
+#!/usr/bin/env python3
+"""Check whether a PR has required labels."""
+
+import sys
+from typing import Any
+
+from github_utils import gh_delete_comment, gh_post_pr_comment
+from gitutils import get_git_remote_name, get_git_repo_dir, GitRepo
+from label_utils import has_required_labels, is_label_err_comment, LABEL_ERR_MSG
+from trymerge import GitHubPR
+
+
+def delete_all_label_err_comments(pr: "GitHubPR") -> None:
+    for comment in pr.get_comments():
+        if is_label_err_comment(comment):
+            gh_delete_comment(pr.org, pr.project, comment.database_id)
+
+
+def add_label_err_comment(pr: "GitHubPR") -> None:
+    # Only make a comment if one doesn't exist already
+    if not any(is_label_err_comment(comment) for comment in pr.get_comments()):
+        gh_post_pr_comment(pr.org, pr.project, pr.pr_num, LABEL_ERR_MSG)
+
+
+def parse_args() -> Any:
+    from argparse import ArgumentParser
+
+    parser = ArgumentParser("Check PR labels")
+    parser.add_argument("pr_num", type=int)
+    # add a flag to return a non-zero exit code if the PR does not have the required labels
+    parser.add_argument(
+        "--exit-non-zero",
+        action="store_true",
+        help="Return a non-zero exit code if the PR does not have the required labels",
+    )
+
+    return parser.parse_args()
+
+
+def main() -> None:
+    args = parse_args()
+    repo = GitRepo(get_git_repo_dir(), get_git_remote_name())
+    org, project = repo.gh_owner_and_name()
+    pr = GitHubPR(org, project, args.pr_num)
+
+    try:
+        if not has_required_labels(pr):
+            print(LABEL_ERR_MSG)
+            add_label_err_comment(pr)
+            if args.exit_non_zero:
+                sys.exit(1)
+        else:
+            delete_all_label_err_comments(pr)
+    except Exception as e:
+        if args.exit_non_zero:
+            sys.exit(1)
+
+    sys.exit(0)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/.github/workflows/check-labels.yml b/.github/workflows/check-labels.yml
@@ -0,0 +1,54 @@
+name: Check Labels
+
+on:
+  # We need pull_request_target to be able to post comments on PRs from forks.
+  # Only allow pull_request_target when merging to main, not some historical branch.
+  #
+  # Make sure to don't introduce explicit checking out and installing/running
+  # untrusted user code into this workflow!
+  pull_request_target:
+    types: [opened, synchronize, reopened, labeled, unlabeled]
+    branches: [main]
+
+  # To check labels on ghstack PRs.
+  # Note: as pull_request doesn't trigger on PRs targeting main,
+  # to test changes to the workflow itself one needs to create
+  # a PR that targets a gh/**/base branch.
+  pull_request:
+    types: [opened, synchronize, reopened, labeled, unlabeled]
+    branches: [gh/**/base]
+
+  workflow_dispatch:
+    inputs:
+      pr_number:
+        description: 'PR number to check labels for'
+        required: true
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
+  cancel-in-progress: true
+
+jobs:
+  check-labels:
+    permissions:
+      contents: read
+      pull-requests: write
+    name: Check labels
+    if: github.repository_owner == 'pytorch'
+    runs-on: ubuntu-22.04
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+      - uses: actions/setup-python@v4
+        with:
+          python-version: '3.10'
+      # Not the direct dependencies but the script uses trymerge
+      - run: pip install pyyaml==6.0 rockset==1.0.3
+      - name: Check labels
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          PR_NUM: ${{ github.event.number || github.event.inputs.pr_number }}
+        run: |
+          set -ex
+          python3 .github/scripts/check_labels.py --exit-non-zero "${PR_NUM}"
diff --git a/.github/workflows/docker-builds.yml b/.github/workflows/docker-builds.yml
@@ -26,6 +26,9 @@ concurrency:
   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
   cancel-in-progress: true
 
+env:
+  AWS_DEFAULT_REGION: us-east-1
+
 jobs:
   docker-build:
     runs-on: [self-hosted, linux.2xlarge]

diff --git a/backends/arm/test/misc/test_model_evaluator.py b/backends/arm/test/misc/test_model_evaluator.py
@@ -4,17 +4,14 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
-import random
 import tempfile
 import unittest
 
 import torch
 from executorch.backends.arm.util.arm_model_evaluator import GenericModelEvaluator
 
-random.seed(0)
-
 # Create an input that is hard to compress
-COMPRESSION_RATIO_TEST = bytearray(random.getrandbits(8) for _ in range(1000000))
+COMPRESSION_RATIO_TEST = torch.rand([1024, 1024])
 
 
 def mocked_model_1(input: torch.Tensor) -> torch.Tensor:
@@ -47,20 +44,16 @@ def test_get_model_error(self):
 
     def test_get_compression_ratio(self):
         with tempfile.NamedTemporaryFile(delete=True) as temp_bin:
-            temp_bin.write(COMPRESSION_RATIO_TEST)
-
-            # As the size of the file is quite small we need to call flush()
-            temp_bin.flush()
-            temp_bin_name = temp_bin.name
+            torch.save(COMPRESSION_RATIO_TEST, temp_bin)
 
             example_input = torch.tensor([[1.0, 2.0, 3.0, 4.0]])
             evaluator = GenericModelEvaluator(
                 "dummy_model",
                 mocked_model_1,
                 mocked_model_2,
                 example_input,
-                temp_bin_name,
+                temp_bin.name,
             )
 
             ratio = evaluator.get_compression_ratio()
-            self.assertAlmostEqual(ratio, 1.0, places=2)
+            self.assertAlmostEqual(ratio, 1.1, places=1)
diff --git a/backends/arm/util/arm_model_evaluator.py b/backends/arm/util/arm_model_evaluator.py
@@ -4,13 +4,25 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
+import logging
 import os
+import random
 import tempfile
 import zipfile
+
 from collections import defaultdict
-from typing import Optional, Tuple
+from pathlib import Path
+from typing import Any, Optional, Tuple
 
 import torch
+from torch.nn.modules import Module
+from torch.utils.data import DataLoader
+from torchvision import datasets, transforms
+
+
+# Logger for outputting progress for longer running evaluation
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
 
 
 def flatten_args(args) -> tuple | list:
@@ -28,6 +40,8 @@ def flatten_args(args) -> tuple | list:
 
 
 class GenericModelEvaluator:
+    REQUIRES_CONFIG = False
+
     def __init__(
         self,
         model_name: str,
@@ -90,7 +104,7 @@ def get_compression_ratio(self) -> float:
 
         return compression_ratio
 
-    def evaluate(self) -> dict[any]:
+    def evaluate(self) -> dict[Any]:
         model_error_dict = self.get_model_error()
 
         output_metrics = {"name": self.model_name, "metrics": dict(model_error_dict)}
@@ -103,3 +117,93 @@ def evaluate(self) -> dict[any]:
             ] = self.get_compression_ratio()
 
         return output_metrics
+
+
+class MobileNetV2Evaluator(GenericModelEvaluator):
+    REQUIRES_CONFIG = True
+
+    def __init__(
+        self,
+        model_name: str,
+        fp32_model: Module,
+        int8_model: Module,
+        example_input: Tuple[torch.Tensor],
+        tosa_output_path: str | None,
+        batch_size: int,
+        validation_dataset_path: str,
+    ) -> None:
+        super().__init__(
+            model_name, fp32_model, int8_model, example_input, tosa_output_path
+        )
+
+        self.__batch_size = batch_size
+        self.__validation_set_path = validation_dataset_path
+
+    @staticmethod
+    def __load_dataset(directory: str) -> datasets.ImageFolder:
+        directory_path = Path(directory)
+        if not directory_path.exists():
+            raise FileNotFoundError(f"Directory: {directory} does not exist.")
+
+        transform = transforms.Compose(
+            [
+                transforms.Resize(256),
+                transforms.CenterCrop(224),
+                transforms.ToTensor(),
+                transforms.Normalize(
+                    mean=[0.484, 0.454, 0.403], std=[0.225, 0.220, 0.220]
+                ),
+            ]
+        )
+        return datasets.ImageFolder(directory_path, transform=transform)
+
+    @staticmethod
+    def get_calibrator(training_dataset_path: str) -> DataLoader:
+        dataset = MobileNetV2Evaluator.__load_dataset(training_dataset_path)
+        rand_indices = random.sample(range(len(dataset)), k=1000)
+
+        # Return a subset of the dataset to be used for calibration
+        return torch.utils.data.DataLoader(
+            torch.utils.data.Subset(dataset, rand_indices),
+            batch_size=1,
+            shuffle=False,
+        )
+
+    def __evaluate_mobilenet(self) -> Tuple[float, float]:
+        dataset = MobileNetV2Evaluator.__load_dataset(self.__validation_set_path)
+        loaded_dataset = DataLoader(
+            dataset,
+            batch_size=self.__batch_size,
+            shuffle=False,
+        )
+
+        top1_correct = 0
+        top5_correct = 0
+
+        for i, (image, target) in enumerate(loaded_dataset):
+            prediction = self.int8_model(image)
+            top1_prediction = torch.topk(prediction, k=1, dim=1).indices
+            top5_prediction = torch.topk(prediction, k=5, dim=1).indices
+
+            top1_correct += (top1_prediction == target.view(-1, 1)).sum().item()
+            top5_correct += (top5_prediction == target.view(-1, 1)).sum().item()
+
+            logger.info("Iteration: {}".format((i + 1) * self.__batch_size))
+            logger.info(
+                "Top 1: {}".format(top1_correct / ((i + 1) * self.__batch_size))
+            )
+            logger.info(
+                "Top 5: {}".format(top5_correct / ((i + 1) * self.__batch_size))
+            )
+
+        top1_accuracy = top1_correct / len(dataset)
+        top5_accuracy = top5_correct / len(dataset)
+
+        return top1_accuracy, top5_accuracy
+
+    def evaluate(self) -> dict[str, Any]:
+        top1_correct, top5_correct = self.__evaluate_mobilenet()
+        output = super().evaluate()
+
+        output["metrics"]["accuracy"] = {"top-1": top1_correct, "top-5": top5_correct}
+        return output