Add error handling for self.stack when byte exceeding limit

Fix CI on the max length conversion
Fix more byte output
2025-10-25 08:11:06 +08:00 · 2025-10-10 02:48:22 +00:00 · 2025-10-10 02:48:22 +00:00 · 2025-10-10 02:48:21 +00:00 · 2025-10-10 02:48:21 +00:00 · 2025-10-10 02:48:21 +00:00
56 changed files with 374 additions and 3006 deletions
--- a/.ci/pytorch/build.sh
+++ b/.ci/pytorch/build.sh
@ -29,9 +29,6 @@ env
 # https://github.com/pytorch/pytorch/blob/0b6c0898e6c352c8ea93daec854e704b41485375/.ci/docker/common/install_cache.sh#L97
 export PATH="/opt/cache/lib:$PATH"

-# Turn off -MD / -MMD compiler flags to increase sccache hit rate
-export COMPILE_NO_MD=1
-
 if [[ "$BUILD_ENVIRONMENT" == *cuda* ]]; then
  # Use jemalloc during compilation to mitigate https://github.com/pytorch/pytorch/issues/116289
  export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libjemalloc.so.2
@ -292,15 +289,8 @@ else
      python -mpip install numpy==2.0.2

      WERROR=1 python setup.py clean
-      sccache --stop-server
-      export SCCACHE_LOG_LEVEL=debug
-      export SCCACHE_ERROR_LOG=/tmp/sccache_errors.log
-      export SCCACHE_LOG=debug
-      export RUST_LOG=sccache::server=debug
-      sccache --start-server

      WERROR=1 python -m build --wheel --no-isolation
-      mv /tmp/sccache_errors.log dist/
    else
      python setup.py clean
      if [[ "$BUILD_ENVIRONMENT" == *xla* ]]; then
--- a/.ci/pytorch/common-build.sh
+++ b/.ci/pytorch/common-build.sh
@ -2,8 +2,6 @@
 # Required environment variables:
 #   $BUILD_ENVIRONMENT (should be set by your Docker image)

-set -e -x -o pipefail
-
 if [[ "$BUILD_ENVIRONMENT" != *win-* ]]; then
    # Save the absolute path in case later we chdir (as occurs in the gpu perf test)
    script_dir="$( cd "$(dirname "${BASH_SOURCE[0]}")" || exit ; pwd -P )"
@ -47,14 +45,14 @@ if [[ "$BUILD_ENVIRONMENT" != *win-* ]]; then
            # explicitly
            echo "Skipping sccache server initialization, setting environment variables"
            export SCCACHE_IDLE_TIMEOUT=0
-            export SCCACHE_ERROR_LOG=/tmp/sccache_error.log
-            export RUST_LOG=sccache::server=debug
+            export SCCACHE_ERROR_LOG=~/sccache_error.log
+            export RUST_LOG=sccache::server=error
        elif [[ "${BUILD_ENVIRONMENT}" == *rocm* ]]; then
-            SCCACHE_ERROR_LOG=/tmp/sccache_error.log SCCACHE_IDLE_TIMEOUT=0 sccache --start-server
+            SCCACHE_ERROR_LOG=~/sccache_error.log SCCACHE_IDLE_TIMEOUT=0 sccache --start-server
        else
            # increasing SCCACHE_IDLE_TIMEOUT so that extension_backend_test.cpp can build after this PR:
            # https://github.com/pytorch/pytorch/pull/16645
-            SCCACHE_ERROR_LOG=/tmp/sccache_error.log SCCACHE_IDLE_TIMEOUT=0 RUST_LOG=sccache::server=error sccache --start-server
+            SCCACHE_ERROR_LOG=~/sccache_error.log SCCACHE_IDLE_TIMEOUT=0 RUST_LOG=sccache::server=error sccache --start-server
        fi

        # Report sccache stats for easier debugging. It's ok if this commands
--- a/.ci/pytorch/macos-test.sh
+++ b/.ci/pytorch/macos-test.sh
@ -256,7 +256,7 @@ test_torchbench_smoketest() {
  local device=mps
  local dtypes=(undefined float16 bfloat16 notset)
  local dtype=${dtypes[$1]}
-  local models=(hf_T5 llama BERT_pytorch dcgan hf_GPT2 yolov3 resnet152 sam sam_fast pytorch_unet stable_diffusion_text_encoder speech_transformer Super_SloMo doctr_det_predictor doctr_reco_predictor timm_resnet timm_vovnet vgg16)
+  local models=(llama BERT_pytorch dcgan yolov3 resnet152 sam sam_fast pytorch_unet stable_diffusion_text_encoder speech_transformer Super_SloMo doctr_det_predictor doctr_reco_predictor vgg16)

  for backend in eager inductor; do

@ -319,7 +319,7 @@ test_aoti_torchbench_smoketest() {
  local device=mps
  local dtypes=(undefined float16 bfloat16 notset)
  local dtype=${dtypes[$1]}
-  local models=(hf_T5 llama BERT_pytorch dcgan hf_GPT2 yolov3 resnet152 sam sam_fast pytorch_unet stable_diffusion_text_encoder speech_transformer Super_SloMo doctr_det_predictor doctr_reco_predictor timm_resnet timm_vovnet vgg16)
+  local models=(llama BERT_pytorch dcgan yolov3 resnet152 sam sam_fast pytorch_unet stable_diffusion_text_encoder speech_transformer Super_SloMo doctr_det_predictor doctr_reco_predictor vgg16)

  echo "Launching torchbench inference performance run for AOT Inductor and dtype ${dtype}"
  local dtype_arg="--${dtype}"
--- a/.ci/pytorch/test.sh
+++ b/.ci/pytorch/test.sh
@ -838,7 +838,7 @@ test_dynamo_benchmark() {
      elif [[ "${suite}" == "timm_models" ]]; then
        export TORCHBENCH_ONLY_MODELS="inception_v3"
      elif [[ "${suite}" == "torchbench" ]]; then
-        export TORCHBENCH_ONLY_MODELS="hf_Bert"
+        export TORCHBENCH_ONLY_MODELS="BERT_pytorch"
      fi
    fi
    test_single_dynamo_benchmark "dashboard" "$suite" "$shard_id" "$@"
@ -869,13 +869,13 @@ test_inductor_torchbench_smoketest_perf() {
  mkdir -p "$TEST_REPORTS_DIR"

  python benchmarks/dynamo/torchbench.py --device cuda --performance --backend inductor --float16 --training \
-    --batch-size-file "$(realpath benchmarks/dynamo/torchbench_models_list.txt)" --only hf_Bert \
+    --batch-size-file "$(realpath benchmarks/dynamo/torchbench_models_list.txt)" --only BERT_pytorch \
    --output "$TEST_REPORTS_DIR/inductor_training_smoketest.csv"
  # The threshold value needs to be actively maintained to make this check useful
  python benchmarks/dynamo/check_perf_csv.py -f "$TEST_REPORTS_DIR/inductor_training_smoketest.csv" -t 1.4

  # Check memory compression ratio for a few models
-  for test in hf_Albert timm_vision_transformer; do
+  for test in BERT_pytorch yolov3; do
    python benchmarks/dynamo/torchbench.py --device cuda --performance --backend inductor --amp --training \
      --disable-cudagraphs --batch-size-file "$(realpath benchmarks/dynamo/torchbench_models_list.txt)" \
      --only $test --output "$TEST_REPORTS_DIR/inductor_training_smoketest_$test.csv"
--- a/.circleci/scripts/binary_populate_env.sh
+++ b/.circleci/scripts/binary_populate_env.sh
@ -71,14 +71,7 @@ export PYTORCH_BUILD_NUMBER=1

 # Set triton version as part of PYTORCH_EXTRA_INSTALL_REQUIREMENTS
 TRITON_VERSION=$(cat $PYTORCH_ROOT/.ci/docker/triton_version.txt)
-
-# Here PYTORCH_EXTRA_INSTALL_REQUIREMENTS is already set for the all the wheel builds hence append TRITON_CONSTRAINT
-TRITON_CONSTRAINT="platform_system == 'Linux' and platform_machine == 'x86_64'"
-
-# CUDA 12.9/13.0 builds have triton for Linux and Linux aarch64 binaries.
-if [[ "$DESIRED_CUDA" == "cu129" ]] || [[ "$DESIRED_CUDA" == "cu130" ]]; then
-  TRITON_CONSTRAINT="platform_system == 'Linux'"
-fi
+TRITON_CONSTRAINT="platform_system == 'Linux'"

 if [[ "$PACKAGE_TYPE" =~ .*wheel.* &&  -n "${PYTORCH_EXTRA_INSTALL_REQUIREMENTS:-}" && ! "$PYTORCH_BUILD_VERSION" =~ .*xpu.* ]]; then
  TRITON_REQUIREMENT="triton==${TRITON_VERSION}; ${TRITON_CONSTRAINT}"
--- a/.github/workflows/pull.yml
+++ b/.github/workflows/pull.yml
@ -1,16 +1,16 @@
 name: pull

 on:
-  # pull_request:
-  #   branches-ignore:
-  #     - nightly
-  # push:
-  #   branches:
-  #     - main
-  #     - release/*
-  #     - landchecks/*
-  #   tags:
-  #     - ciflow/pull/*
+  pull_request:
+    branches-ignore:
+      - nightly
+  push:
+    branches:
+      - main
+      - release/*
+      - landchecks/*
+    tags:
+      - ciflow/pull/*
  workflow_dispatch:
  schedule:
    - cron: 29 8 * * *  # about 1:29am PDT
--- a/.github/workflows/trunk.yml
+++ b/.github/workflows/trunk.yml
@ -47,6 +47,22 @@ jobs:
      curr_branch: ${{ github.head_ref || github.ref_name }}
      curr_ref_type: ${{ github.ref_type }}

+  libtorch-linux-jammy-cuda12_8-py3_10-gcc11-debug-build:
+    name: libtorch-linux-jammy-cuda12.8-py3.10-gcc11-debug
+    uses: ./.github/workflows/_linux-build.yml
+    needs: get-label-type
+    with:
+      build-environment: libtorch-linux-jammy-cuda12.8-py3.10-gcc11
+      docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc11
+      build-generates-artifacts: false
+      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
+      runner: "linux.4xlarge"
+      test-matrix: |
+        { include: [
+          { config: "default", shard: 1, num_shards: 1 },
+        ]}
+    secrets: inherit
+
  linux-jammy-cuda12_8-py3_10-gcc11-build:
    name: linux-jammy-cuda12.8-py3.10-gcc11
    uses: ./.github/workflows/_linux-build.yml
@ -69,3 +85,167 @@ jobs:
          { config: "pr_time_benchmarks", shard: 1, num_shards: 1, runner: "linux.g4dn.metal.nvidia.gpu" },
        ]}
    secrets: inherit
+
+  linux-jammy-cuda12_8-py3_10-gcc11-test:
+    name: linux-jammy-cuda12.8-py3.10-gcc11
+    uses: ./.github/workflows/_linux-test.yml
+    needs:
+      - linux-jammy-cuda12_8-py3_10-gcc11-build
+      - target-determination
+    with:
+      timeout-minutes: 360
+      build-environment: linux-jammy-cuda12.8-py3.10-gcc11
+      docker-image: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc11-build.outputs.docker-image }}
+      test-matrix: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc11-build.outputs.test-matrix }}
+    secrets: inherit
+
+
+  # no-ops builds test USE_PER_OPERATOR_HEADERS=0 where ATen/ops is not generated
+  linux-jammy-cuda12_8-py3_10-gcc11-no-ops-build:
+    name: linux-jammy-cuda12.8-py3.10-gcc11-no-ops
+    uses: ./.github/workflows/_linux-build.yml
+    needs: get-label-type
+    with:
+      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
+      build-environment: linux-jammy-cuda12.8-py3.10-gcc11-no-ops
+      docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc11
+      test-matrix: |
+        { include: [
+          { config: "default", shard: 1, num_shards: 1 },
+        ]}
+    secrets: inherit
+
+  macos-py3-arm64-build:
+    if: github.repository_owner == 'pytorch'
+    name: macos-py3-arm64
+    uses: ./.github/workflows/_mac-build.yml
+    with:
+      sync-tag: macos-py3-arm64-build
+      build-environment: macos-py3-arm64
+      runner-type: macos-m1-stable
+      build-generates-artifacts: true
+      # To match the one pre-installed in the m1 runners
+      python-version: 3.12.7
+      test-matrix: |
+        { include: [
+          { config: "default", shard: 1, num_shards: 3, runner: "macos-m1-stable" },
+          { config: "default", shard: 2, num_shards: 3, runner: "macos-m1-stable" },
+          { config: "default", shard: 3, num_shards: 3, runner: "macos-m1-stable" },
+          { config: "mps", shard: 1, num_shards: 1, runner: "macos-m1-14" },
+          { config: "mps", shard: 1, num_shards: 1, runner: "macos-m2-15" },
+        ]}
+    secrets: inherit
+
+  macos-py3-arm64-test:
+    name: macos-py3-arm64
+    uses: ./.github/workflows/_mac-test.yml
+    needs:
+      - macos-py3-arm64-build
+      - target-determination
+    with:
+      build-environment: macos-py3-arm64
+      # Same as the build job
+      python-version: 3.12.7
+      test-matrix: ${{ needs.macos-py3-arm64-build.outputs.test-matrix }}
+      disable-monitor: false
+    secrets: inherit
+
+  win-vs2022-cpu-py3-build:
+    name: win-vs2022-cpu-py3
+    uses: ./.github/workflows/_win-build.yml
+    needs: get-label-type
+    with:
+      build-environment: win-vs2022-cpu-py3
+      cuda-version: cpu
+      runner: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge.nonephemeral"
+      test-matrix: |
+        { include: [
+          { config: "default", shard: 1, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge.nonephemeral" },
+          { config: "default", shard: 2, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge.nonephemeral" },
+          { config: "default", shard: 3, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge.nonephemeral" },
+          { config: "default", shard: 4, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge.nonephemeral" },
+        ]}
+    secrets: inherit
+
+  win-vs2022-cpu-py3-test:
+    name: win-vs2022-cpu-py3
+    uses: ./.github/workflows/_win-test.yml
+    needs:
+      - win-vs2022-cpu-py3-build
+      - target-determination
+    with:
+      build-environment: win-vs2022-cpu-py3
+      cuda-version: cpu
+      test-matrix: ${{ needs.win-vs2022-cpu-py3-build.outputs.test-matrix }}
+      disable-monitor: false
+    secrets: inherit
+
+  win-vs2022-cuda12_6-py3-build:
+    name: win-vs2022-cuda12.6-py3
+    uses: ./.github/workflows/_win-build.yml
+    needs: get-label-type
+    with:
+      build-environment: win-vs2022-cuda12.6-py3
+      cuda-version: "12.6"
+      runner: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge.nonephemeral"
+    secrets: inherit
+
+  inductor-build:
+    name: inductor-build
+    uses: ./.github/workflows/_linux-build.yml
+    needs: get-label-type
+    with:
+      build-environment: linux-jammy-cuda12.8-py3.12-gcc9-sm80
+      docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc9-inductor-benchmarks
+      cuda-arch-list: '8.0'
+    secrets: inherit
+
+  verify-cachebench-cpu-build:
+    name: verify-cachebench-cpu-build
+    uses: ./.github/workflows/_linux-build.yml
+    needs: get-label-type
+    with:
+      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
+      build-environment: linux-jammy-py3.10-gcc11
+      docker-image-name: ci-image:pytorch-linux-jammy-py3-gcc11-inductor-benchmarks
+      test-matrix: |
+        { include: [
+          { config: "verify_cachebench", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
+        ]}
+    secrets: inherit
+
+  verify-cachebench-cpu-test:
+    name: verify-cachebench-cpu-test
+    uses: ./.github/workflows/_linux-test.yml
+    needs:
+      - verify-cachebench-cpu-build
+      - target-determination
+    with:
+      build-environment: linux-jammy-py3.10-gcc11
+      docker-image: ${{ needs.verify-cachebench-cpu-build.outputs.docker-image }}
+      test-matrix: ${{ needs.verify-cachebench-cpu-build.outputs.test-matrix }}
+    secrets: inherit
+
+  linux-jammy-py3-clang12-executorch-build:
+    name: linux-jammy-py3-clang12-executorch
+    uses: ./.github/workflows/_linux-build.yml
+    needs: get-label-type
+    with:
+      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
+      build-environment: linux-jammy-py3-clang12-executorch
+      docker-image-name: ci-image:pytorch-linux-jammy-py3-clang12-executorch
+      test-matrix: |
+        { include: [
+          { config: "executorch", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
+        ]}
+    secrets: inherit
+
+  linux-jammy-py3-clang12-executorch-test:
+    name: linux-jammy-py3-clang12-executorch
+    uses: ./.github/workflows/_linux-test.yml
+    needs: linux-jammy-py3-clang12-executorch-build
+    with:
+      build-environment: linux-jammy-py3-clang12-executorch
+      docker-image: ${{ needs.linux-jammy-py3-clang12-executorch-build.outputs.docker-image }}
+      test-matrix: ${{ needs.linux-jammy-py3-clang12-executorch-build.outputs.test-matrix }}
+    secrets: inherit
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -420,14 +420,6 @@ if(USE_CCACHE)
  endif()
 endif()

-# Optionally disable -MD / -MMD if COMPILE_NO_MD is set in the environment
-if(DEFINED ENV{COMPILE_NO_MD})
-  message(STATUS "COMPILE_NO_MD is set — disabling compiler dependency file flags (-MD/-MMD)")
-  foreach(lang C CXX CUDA HIP ASM)
-    set(CMAKE_DEPFILE_FLAGS_${lang} "")
-  endforeach()
-endif()
-
 # Since TensorPipe does not support Windows, set it to OFF when WIN32 detected
 # On Windows platform, if user does not install libuv in build conda env and
 # does not set libuv_ROOT environment variable. Set USE_DISTRIBUTED to OFF.
@ -1495,10 +1487,3 @@ else()
    ]])
  endif()
 endif()
-
-foreach(lang C CXX CUDA)
-  foreach(flg "" "_DEBUG" "_RELEASE" "_RELWITHDEBINFO")
-    string(REPLACE "-MD" "" CMAKE_${lang}_FLAGS${flg} "${CMAKE_${lang}_FLAGS${flg}}")
-    string(REPLACE "-MMD" "" CMAKE_${lang}_FLAGS${flg} "${CMAKE_${lang}_FLAGS${flg}}")
-  endforeach()
-endforeach()
--- a/benchmarks/dynamo/all_torchbench_models_list.txt
+++ b/benchmarks/dynamo/all_torchbench_models_list.txt
@ -25,15 +25,6 @@ drq
 fambench_dlrm
 fambench_xlmr
 fastNLP_Bert
-hf_Albert
-hf_Bart
-hf_Bert
-hf_BigBird
-hf_DistilBert
-hf_GPT2
-hf_Longformer
-hf_Reformer
-hf_T5
 maml
 maml_omniglot
 mnasnet1_0
@ -60,13 +51,6 @@ soft_actor_critic
 speech_transformer
 squeezenet1_1
 tacotron2
-timm_efficientdet
-timm_efficientnet
-timm_nfnet
-timm_regnet
-timm_resnest
-timm_vision_transformer
-timm_vovnet
 tts_angular
 vgg16
 vision_maskrcnn
--- a/benchmarks/dynamo/cachebench.py
+++ b/benchmarks/dynamo/cachebench.py
@ -23,7 +23,6 @@ TORCHBENCH_MODELS: list[str] = [
    "resnet50",
    "moco",
    "llama",
-    "hf_T5",
 ]
 HUGGINGFACE_MODELS: list[str] = [
    "AllenaiLongformerBase",
--- a/benchmarks/dynamo/check_accuracy.py
+++ b/benchmarks/dynamo/check_accuracy.py
@ -11,7 +11,6 @@ import pandas as pd
 flaky_models = {
    "yolov3",
    "detectron2_maskrcnn_r_101_c4",
-    "timm_efficientnet",  # see https://github.com/pytorch/pytorch/issues/148699
    "XGLMForCausalLM",  # discovered in https://github.com/pytorch/pytorch/pull/128148
    "moondream",  # discovered in https://github.com/pytorch/pytorch/pull/159291
    # discovered in https://github.com/pytorch/pytorch/issues/161419. Its not flaky but really hard to repro, so skipping it
@ -40,13 +39,9 @@ def check_accuracy(actual_csv, expected_csv, expected_filename):
                "detectron2_fcos_r_50_fpn",
                "doctr_det_predictor",
                "doctr_reco_predictor",
-                "hf_BigBird",
-                "hf_Longformer",
-                "hf_Reformer",
-                "hf_Roberta_base",
-                "hf_T5",
-                "hf_T5_base",
-                "hf_T5_generate",
+                "dpn107",
+                "fbnetv3_b",
+                "levit_128",
                "llava",
                "microbench_unbacked_tolist_sum",
                "mnasnet1_0",
@ -63,12 +58,7 @@ def check_accuracy(actual_csv, expected_csv, expected_filename):
                "squeezenet1_1",
                "stable_diffusion_text_encoder",
                "stable_diffusion_unet",
-                "timm_efficientdet",
-                "timm_efficientnet",
-                "timm_nfnet",
-                "timm_regnet",
-                "timm_resnest",
-                "timm_vovnet",
+                "swsl_resnext101_32x16d",
                "torchrec_dlrm",
                "vgg16",
                # LLM
--- a/benchmarks/dynamo/check_graph_breaks.py
+++ b/benchmarks/dynamo/check_graph_breaks.py
@ -36,12 +36,7 @@ def check_graph_breaks(actual_csv, expected_csv, expected_filename):
                "detectron2_fcos_r_50_fpn",
                "doctr_det_predictor",
                "doctr_reco_predictor",
-                "hf_BigBird",
-                "hf_Longformer",
-                "hf_Reformer",
-                "hf_Roberta_base",
-                "hf_T5",
-                "hf_T5_base",
+                "levit_128",
                "llava",
                "microbench_unbacked_tolist_sum",
                "resnet50",
@ -51,7 +46,6 @@ def check_graph_breaks(actual_csv, expected_csv, expected_filename):
                "stable_diffusion_text_encoder",
                "stable_diffusion_unet",
                "timm_efficientdet",
-                "timm_nfnet",
                "torchrec_dlrm",
                "vgg16",
                # LLM
--- a/benchmarks/dynamo/ci_expected_accuracy/aot_eager_torchbench_inference.csv
+++ b/benchmarks/dynamo/ci_expected_accuracy/aot_eager_torchbench_inference.csv
@ -130,70 +130,6 @@ functorch_maml_omniglot,pass,0



-hf_Albert,pass,0
-
-
-
-hf_Bart,pass,0
-
-
-
-hf_Bert,pass,0
-
-
-
-hf_Bert_large,pass,0
-
-
-
-hf_BigBird,pass,0
-
-
-
-hf_DistilBert,pass,0
-
-
-
-hf_GPT2,pass,0
-
-
-
-hf_GPT2_large,pass_due_to_skip,0
-
-
-
-hf_Reformer,pass,5
-
-
-
-hf_Roberta_base,pass,0
-
-
-
-hf_T5,pass,0
-
-
-
-hf_T5_base,eager_fail_to_run,0
-
-
-
-hf_T5_generate,pass,7
-
-
-
-hf_T5_large,pass_due_to_skip,0
-
-
-
-hf_Whisper,pass,0
-
-
-
-hf_distil_whisper,pass,0
-
-
-
 lennard_jones,pass,0


@ -342,30 +278,6 @@ stable_diffusion_unet,pass_due_to_skip,0



-timm_efficientnet,pass,0
-
-
-
-timm_regnet,pass,0
-
-
-
-timm_resnest,pass,0
-
-
-
-timm_vision_transformer,pass,0
-
-
-
-timm_vision_transformer_large,pass_due_to_skip,0
-
-
-
-timm_vovnet,pass,0
-
-
-
 torch_multimodal_clip,pass,0


--- a/benchmarks/dynamo/ci_expected_accuracy/aot_eager_torchbench_training.csv
+++ b/benchmarks/dynamo/ci_expected_accuracy/aot_eager_torchbench_training.csv
@ -78,62 +78,6 @@ functorch_maml_omniglot,pass,7



-hf_Albert,pass,6
-
-
-
-hf_Bart,pass,6
-
-
-
-hf_Bert,pass,6
-
-
-
-hf_Bert_large,pass,6
-
-
-
-hf_BigBird,pass,6
-
-
-
-hf_DistilBert,pass,6
-
-
-
-hf_GPT2,pass,8
-
-
-
-hf_GPT2_large,pass_due_to_skip,0
-
-
-
-hf_Reformer,pass,20
-
-
-
-hf_Roberta_base,pass,6
-
-
-
-hf_T5_base,eager_2nd_run_OOM,0
-
-
-
-hf_T5_large,pass_due_to_skip,0
-
-
-
-hf_Whisper,pass,6
-
-
-
-hf_distil_whisper,model_fail_to_load,0
-
-
-
 lennard_jones,pass,7


@ -250,30 +194,6 @@ stable_diffusion_unet,pass_due_to_skip,0



-timm_efficientnet,fail_accuracy,7
-
-
-
-timm_regnet,pass,7
-
-
-
-timm_resnest,pass,6
-
-
-
-timm_vision_transformer,pass,6
-
-
-
-timm_vision_transformer_large,pass_due_to_skip,0
-
-
-
-timm_vovnet,pass,6
-
-
-
 torch_multimodal_clip,pass,7


--- a/benchmarks/dynamo/ci_expected_accuracy/aot_inductor_torchbench_inference.csv
+++ b/benchmarks/dynamo/ci_expected_accuracy/aot_inductor_torchbench_inference.csv
@ -118,62 +118,6 @@ functorch_maml_omniglot,pass,0



-hf_Albert,pass,0
-
-
-
-hf_Bart,pass,0
-
-
-
-hf_Bert,pass,0
-
-
-
-hf_Bert_large,pass,0
-
-
-
-hf_BigBird,fail_accuracy,0
-
-
-
-hf_DistilBert,pass,0
-
-
-
-hf_GPT2,pass,0
-
-
-
-hf_GPT2_large,pass_due_to_skip,0
-
-
-
-hf_Roberta_base,pass,0
-
-
-
-hf_T5,pass,0
-
-
-
-hf_T5_base,eager_fail_to_run,0
-
-
-
-hf_T5_large,pass_due_to_skip,0
-
-
-
-hf_Whisper,pass,0
-
-
-
-hf_distil_whisper,pass,0
-
-
-
 lennard_jones,pass,0


@ -314,30 +258,6 @@ stable_diffusion_unet,pass_due_to_skip,0



-timm_efficientnet,pass,0
-
-
-
-timm_regnet,pass,0
-
-
-
-timm_resnest,pass,0
-
-
-
-timm_vision_transformer,pass,0
-
-
-
-timm_vision_transformer_large,pass_due_to_skip,0
-
-
-
-timm_vovnet,pass,0
-
-
-
 torch_multimodal_clip,pass,0


--- a/benchmarks/dynamo/ci_expected_accuracy/cpu_aot_inductor_amp_freezing_torchbench_inference.csv
+++ b/benchmarks/dynamo/ci_expected_accuracy/cpu_aot_inductor_amp_freezing_torchbench_inference.csv
@ -114,58 +114,6 @@ functorch_maml_omniglot,pass,0



-hf_Albert,pass,0
-
-
-
-hf_Bart,pass,0
-
-
-
-hf_Bert,pass,0
-
-
-
-hf_Bert_large,pass,0
-
-
-
-hf_BigBird,pass,0
-
-
-
-hf_DistilBert,pass,0
-
-
-
-hf_GPT2,pass,0
-
-
-
-hf_GPT2_large,pass_due_to_skip,0
-
-
-
-hf_Roberta_base,pass,0
-
-
-
-hf_T5,pass,0
-
-
-
-hf_T5_base,pass,0
-
-
-
-hf_T5_large,pass_due_to_skip,0
-
-
-
-hf_distil_whisper,pass,0
-
-
-
 lennard_jones,pass,0


@ -278,38 +226,6 @@ stable_diffusion_unet,pass_due_to_skip,0



-timm_efficientdet,model_fail_to_load,0
-
-
-
-timm_efficientnet,pass,0
-
-
-
-timm_nfnet,pass,0
-
-
-
-timm_regnet,pass,0
-
-
-
-timm_resnest,pass,0
-
-
-
-timm_vision_transformer,pass,0
-
-
-
-timm_vision_transformer_large,pass_due_to_skip,0
-
-
-
-timm_vovnet,pass,0
-
-
-
 torch_multimodal_clip,pass,0


--- a/benchmarks/dynamo/ci_expected_accuracy/cpu_aot_inductor_freezing_torchbench_inference.csv
+++ b/benchmarks/dynamo/ci_expected_accuracy/cpu_aot_inductor_freezing_torchbench_inference.csv
@ -114,58 +114,6 @@ functorch_maml_omniglot,pass,0



-hf_Albert,pass,0
-
-
-
-hf_Bart,pass,0
-
-
-
-hf_Bert,pass,0
-
-
-
-hf_Bert_large,pass,0
-
-
-
-hf_BigBird,pass,0
-
-
-
-hf_DistilBert,pass,0
-
-
-
-hf_GPT2,pass,0
-
-
-
-hf_GPT2_large,pass_due_to_skip,0
-
-
-
-hf_Roberta_base,pass,0
-
-
-
-hf_T5,pass,0
-
-
-
-hf_T5_base,pass,0
-
-
-
-hf_T5_large,pass_due_to_skip,0
-
-
-
-hf_distil_whisper,pass,0
-
-
-
 lennard_jones,pass,0


@ -278,38 +226,6 @@ stable_diffusion_unet,pass_due_to_skip,0



-timm_efficientdet,model_fail_to_load,0
-
-
-
-timm_efficientnet,pass,0
-
-
-
-timm_nfnet,pass,0
-
-
-
-timm_regnet,pass,0
-
-
-
-timm_resnest,pass,0
-
-
-
-timm_vision_transformer,pass,0
-
-
-
-timm_vision_transformer_large,pass_due_to_skip,0
-
-
-
-timm_vovnet,pass,0
-
-
-
 torch_multimodal_clip,pass,0


--- a/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_amp_freezing_torchbench_inference.csv
+++ b/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_amp_freezing_torchbench_inference.csv
@ -122,66 +122,6 @@ functorch_maml_omniglot,pass,0



-hf_Albert,pass,0
-
-
-
-hf_Bart,pass,0
-
-
-
-hf_Bert,pass,0
-
-
-
-hf_Bert_large,pass,0
-
-
-
-hf_BigBird,pass,27
-
-
-
-hf_DistilBert,pass,0
-
-
-
-hf_GPT2,pass,0
-
-
-
-hf_GPT2_large,pass_due_to_skip,0
-
-
-
-hf_Longformer,pass,4
-
-
-
-hf_Reformer,pass,5
-
-
-
-hf_Roberta_base,pass,0
-
-
-
-hf_T5,pass,0
-
-
-
-hf_T5_base,pass,0
-
-
-
-hf_T5_large,pass_due_to_skip,0
-
-
-
-hf_distil_whisper,pass,0
-
-
-
 lennard_jones,pass,0


@ -302,38 +242,6 @@ stable_diffusion_unet,pass_due_to_skip,0



-timm_efficientdet,model_fail_to_load,0
-
-
-
-timm_efficientnet,pass,0
-
-
-
-timm_nfnet,pass,0
-
-
-
-timm_regnet,pass,0
-
-
-
-timm_resnest,pass,0
-
-
-
-timm_vision_transformer,pass,0
-
-
-
-timm_vision_transformer_large,pass_due_to_skip,0
-
-
-
-timm_vovnet,pass,0
-
-
-
 torch_multimodal_clip,pass,0


--- a/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_freezing_torchbench_inference.csv
+++ b/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_freezing_torchbench_inference.csv
@ -122,66 +122,6 @@ functorch_maml_omniglot,pass,0



-hf_Albert,pass,0
-
-
-
-hf_Bart,pass,0
-
-
-
-hf_Bert,pass,0
-
-
-
-hf_Bert_large,pass,0
-
-
-
-hf_BigBird,pass,27
-
-
-
-hf_DistilBert,pass,0
-
-
-
-hf_GPT2,pass,0
-
-
-
-hf_GPT2_large,pass_due_to_skip,0
-
-
-
-hf_Longformer,pass,4
-
-
-
-hf_Reformer,pass,5
-
-
-
-hf_Roberta_base,pass,0
-
-
-
-hf_T5,pass,0
-
-
-
-hf_T5_base,pass,0
-
-
-
-hf_T5_large,pass_due_to_skip,0
-
-
-
-hf_distil_whisper,pass,0
-
-
-
 lennard_jones,pass,0


@ -302,38 +242,6 @@ stable_diffusion_unet,pass_due_to_skip,0



-timm_efficientdet,model_fail_to_load,0
-
-
-
-timm_efficientnet,pass,0
-
-
-
-timm_nfnet,pass,0
-
-
-
-timm_regnet,pass,0
-
-
-
-timm_resnest,pass,0
-
-
-
-timm_vision_transformer,pass,0
-
-
-
-timm_vision_transformer_large,pass_due_to_skip,0
-
-
-
-timm_vovnet,pass,0
-
-
-
 torch_multimodal_clip,pass,0


--- a/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_torchbench_inference.csv
+++ b/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_torchbench_inference.csv
@ -122,66 +122,6 @@ functorch_maml_omniglot,pass,0



-hf_Albert,pass,0
-
-
-
-hf_Bart,pass,0
-
-
-
-hf_Bert,pass,0
-
-
-
-hf_Bert_large,pass,0
-
-
-
-hf_BigBird,pass,27
-
-
-
-hf_DistilBert,pass,0
-
-
-
-hf_GPT2,pass,0
-
-
-
-hf_GPT2_large,pass_due_to_skip,0
-
-
-
-hf_Longformer,pass,4
-
-
-
-hf_Reformer,pass,5
-
-
-
-hf_Roberta_base,pass,0
-
-
-
-hf_T5,pass,0
-
-
-
-hf_T5_base,pass,0
-
-
-
-hf_T5_large,pass_due_to_skip,0
-
-
-
-hf_distil_whisper,pass,0
-
-
-
 lennard_jones,pass,0


@ -302,38 +242,6 @@ stable_diffusion_unet,pass_due_to_skip,0



-timm_efficientdet,model_fail_to_load,0
-
-
-
-timm_efficientnet,pass,0
-
-
-
-timm_nfnet,pass,0
-
-
-
-timm_regnet,pass,0
-
-
-
-timm_resnest,pass,0
-
-
-
-timm_vision_transformer,pass,0
-
-
-
-timm_vision_transformer_large,pass_due_to_skip,0
-
-
-
-timm_vovnet,pass,0
-
-
-
 torch_multimodal_clip,pass,0


--- a/benchmarks/dynamo/ci_expected_accuracy/dynamic_aot_eager_torchbench_inference.csv
+++ b/benchmarks/dynamo/ci_expected_accuracy/dynamic_aot_eager_torchbench_inference.csv
@ -130,70 +130,6 @@ functorch_maml_omniglot,pass,0



-hf_Albert,pass,0
-
-
-
-hf_Bart,pass,0
-
-
-
-hf_Bert,pass,0
-
-
-
-hf_Bert_large,pass,0
-
-
-
-hf_BigBird,pass,0
-
-
-
-hf_DistilBert,pass,0
-
-
-
-hf_GPT2,pass,0
-
-
-
-hf_GPT2_large,pass_due_to_skip,0
-
-
-
-hf_Reformer,pass,5
-
-
-
-hf_Roberta_base,pass,0
-
-
-
-hf_T5,pass,0
-
-
-
-hf_T5_base,eager_fail_to_run,0
-
-
-
-hf_T5_generate,pass,7
-
-
-
-hf_T5_large,pass_due_to_skip,0
-
-
-
-hf_Whisper,pass,0
-
-
-
-hf_distil_whisper,pass,0
-
-
-
 lennard_jones,pass,0


@ -342,30 +278,6 @@ stable_diffusion_unet,pass_due_to_skip,0



-timm_efficientnet,pass,0
-
-
-
-timm_regnet,pass,0
-
-
-
-timm_resnest,pass,0
-
-
-
-timm_vision_transformer,pass,0
-
-
-
-timm_vision_transformer_large,pass_due_to_skip,0
-
-
-
-timm_vovnet,pass,0
-
-
-
 torch_multimodal_clip,pass,0


--- a/benchmarks/dynamo/ci_expected_accuracy/dynamic_aot_eager_torchbench_training.csv
+++ b/benchmarks/dynamo/ci_expected_accuracy/dynamic_aot_eager_torchbench_training.csv
@ -78,62 +78,6 @@ functorch_maml_omniglot,pass,7



-hf_Albert,pass,6
-
-
-
-hf_Bart,pass,6
-
-
-
-hf_Bert,pass,6
-
-
-
-hf_Bert_large,pass,6
-
-
-
-hf_BigBird,pass,6
-
-
-
-hf_DistilBert,pass,6
-
-
-
-hf_GPT2,pass,8
-
-
-
-hf_GPT2_large,pass_due_to_skip,0
-
-
-
-hf_Reformer,pass,20
-
-
-
-hf_Roberta_base,pass,6
-
-
-
-hf_T5_base,eager_2nd_run_OOM,0
-
-
-
-hf_T5_large,pass_due_to_skip,0
-
-
-
-hf_Whisper,pass,6
-
-
-
-hf_distil_whisper,model_fail_to_load,0
-
-
-
 lennard_jones,pass,7


@ -246,30 +190,6 @@ stable_diffusion_unet,pass_due_to_skip,0



-timm_efficientnet,pass,7
-
-
-
-timm_regnet,pass,7
-
-
-
-timm_resnest,pass,6
-
-
-
-timm_vision_transformer,pass,6
-
-
-
-timm_vision_transformer_large,pass_due_to_skip,0
-
-
-
-timm_vovnet,pass,6
-
-
-
 torch_multimodal_clip,pass,7


--- a/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_aot_inductor_amp_freezing_torchbench_inference.csv
+++ b/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_aot_inductor_amp_freezing_torchbench_inference.csv
@ -98,58 +98,6 @@ functorch_maml_omniglot,pass,0



-hf_Albert,pass,0
-
-
-
-hf_Bart,pass,0
-
-
-
-hf_Bert,pass,0
-
-
-
-hf_Bert_large,pass,0
-
-
-
-hf_BigBird,pass,0
-
-
-
-hf_DistilBert,pass,0
-
-
-
-hf_GPT2,pass,0
-
-
-
-hf_GPT2_large,pass_due_to_skip,0
-
-
-
-hf_Roberta_base,pass,0
-
-
-
-hf_T5,pass,0
-
-
-
-hf_T5_base,pass,0
-
-
-
-hf_T5_large,pass_due_to_skip,0
-
-
-
-hf_distil_whisper,pass,0
-
-
-
 lennard_jones,pass,0


@ -262,38 +210,6 @@ stable_diffusion_unet,pass_due_to_skip,0



-timm_efficientdet,model_fail_to_load,0
-
-
-
-timm_efficientnet,pass,0
-
-
-
-timm_nfnet,pass,0
-
-
-
-timm_regnet,pass,0
-
-
-
-timm_resnest,pass,0
-
-
-
-timm_vision_transformer,pass,0
-
-
-
-timm_vision_transformer_large,pass_due_to_skip,0
-
-
-
-timm_vovnet,pass,0
-
-
-
 torch_multimodal_clip,pass,0


--- a/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_aot_inductor_freezing_torchbench_inference.csv
+++ b/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_aot_inductor_freezing_torchbench_inference.csv
@ -98,58 +98,6 @@ functorch_maml_omniglot,pass,0



-hf_Albert,pass,0
-
-
-
-hf_Bart,pass,0
-
-
-
-hf_Bert,pass,0
-
-
-
-hf_Bert_large,pass,0
-
-
-
-hf_BigBird,pass,0
-
-
-
-hf_DistilBert,pass,0
-
-
-
-hf_GPT2,pass,0
-
-
-
-hf_GPT2_large,pass_due_to_skip,0
-
-
-
-hf_Roberta_base,pass,0
-
-
-
-hf_T5,pass,0
-
-
-
-hf_T5_base,pass,0
-
-
-
-hf_T5_large,pass_due_to_skip,0
-
-
-
-hf_distil_whisper,pass,0
-
-
-
 lennard_jones,pass,0


@ -262,38 +210,6 @@ stable_diffusion_unet,pass_due_to_skip,0



-timm_efficientdet,model_fail_to_load,0
-
-
-
-timm_efficientnet,pass,0
-
-
-
-timm_nfnet,pass,0
-
-
-
-timm_regnet,pass,0
-
-
-
-timm_resnest,pass,0
-
-
-
-timm_vision_transformer,pass,0
-
-
-
-timm_vision_transformer_large,pass_due_to_skip,0
-
-
-
-timm_vovnet,pass,0
-
-
-
 torch_multimodal_clip,pass,0


--- a/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_inductor_torchbench_inference.csv
+++ b/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_inductor_torchbench_inference.csv
@ -106,66 +106,6 @@ functorch_maml_omniglot,pass,0



-hf_Albert,pass,0
-
-
-
-hf_Bart,pass,0
-
-
-
-hf_Bert,pass,0
-
-
-
-hf_Bert_large,pass,0
-
-
-
-hf_BigBird,pass,27
-
-
-
-hf_DistilBert,pass,0
-
-
-
-hf_GPT2,pass,0
-
-
-
-hf_GPT2_large,pass_due_to_skip,0
-
-
-
-hf_Longformer,pass,4
-
-
-
-hf_Reformer,pass,5
-
-
-
-hf_Roberta_base,pass,0
-
-
-
-hf_T5,pass,0
-
-
-
-hf_T5_base,pass,0
-
-
-
-hf_T5_large,pass_due_to_skip,0
-
-
-
-hf_distil_whisper,pass,0
-
-
-
 lennard_jones,pass,0


@ -286,38 +226,6 @@ stable_diffusion_unet,pass_due_to_skip,0



-timm_efficientdet,model_fail_to_load,0
-
-
-
-timm_efficientnet,pass,0
-
-
-
-timm_nfnet,pass,0
-
-
-
-timm_regnet,pass,0
-
-
-
-timm_resnest,pass,0
-
-
-
-timm_vision_transformer,pass,0
-
-
-
-timm_vision_transformer_large,pass_due_to_skip,0
-
-
-
-timm_vovnet,pass,0
-
-
-
 torch_multimodal_clip,pass,0


--- a/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_max_autotune_inductor_amp_freezing_torchbench_inference.csv
+++ b/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_max_autotune_inductor_amp_freezing_torchbench_inference.csv
@ -122,66 +122,6 @@ functorch_maml_omniglot,pass,0



-hf_Albert,pass,0
-
-
-
-hf_Bart,pass,0
-
-
-
-hf_Bert,pass,0
-
-
-
-hf_Bert_large,pass,0
-
-
-
-hf_BigBird,pass,25
-
-
-
-hf_DistilBert,pass,0
-
-
-
-hf_GPT2,pass,0
-
-
-
-hf_GPT2_large,pass_due_to_skip,0
-
-
-
-hf_Longformer,pass,4
-
-
-
-hf_Reformer,pass,8
-
-
-
-hf_Roberta_base,pass,0
-
-
-
-hf_T5,pass,0
-
-
-
-hf_T5_base,pass,0
-
-
-
-hf_T5_large,pass_due_to_skip,0
-
-
-
-hf_distil_whisper,pass,0
-
-
-
 lennard_jones,pass,0


@ -302,38 +242,6 @@ stable_diffusion_unet,pass_due_to_skip,0



-timm_efficientdet,model_fail_to_load,0
-
-
-
-timm_efficientnet,pass,0
-
-
-
-timm_nfnet,pass,0
-
-
-
-timm_regnet,pass,0
-
-
-
-timm_resnest,pass,0
-
-
-
-timm_vision_transformer,pass,0
-
-
-
-timm_vision_transformer_large,pass_due_to_skip,0
-
-
-
-timm_vovnet,pass,0
-
-
-
 torch_multimodal_clip,pass,3


--- a/benchmarks/dynamo/ci_expected_accuracy/dynamic_inductor_torchbench_inference.csv
+++ b/benchmarks/dynamo/ci_expected_accuracy/dynamic_inductor_torchbench_inference.csv
@ -130,70 +130,6 @@ functorch_maml_omniglot,pass,0



-hf_Albert,pass,0
-
-
-
-hf_Bart,pass,0
-
-
-
-hf_Bert,pass,0
-
-
-
-hf_Bert_large,pass,0
-
-
-
-hf_BigBird,fail_accuracy,0
-
-
-
-hf_DistilBert,pass,0
-
-
-
-hf_GPT2,pass,0
-
-
-
-hf_GPT2_large,pass_due_to_skip,0
-
-
-
-hf_Reformer,pass,5
-
-
-
-hf_Roberta_base,pass,0
-
-
-
-hf_T5,pass,0
-
-
-
-hf_T5_base,eager_fail_to_run,0
-
-
-
-hf_T5_generate,pass,7
-
-
-
-hf_T5_large,pass_due_to_skip,0
-
-
-
-hf_Whisper,pass,0
-
-
-
-hf_distil_whisper,pass,0
-
-
-
 lennard_jones,pass,0


@ -342,30 +278,6 @@ stable_diffusion_unet,pass_due_to_skip,0



-timm_efficientnet,pass,0
-
-
-
-timm_regnet,pass,0
-
-
-
-timm_resnest,pass,0
-
-
-
-timm_vision_transformer,pass,0
-
-
-
-timm_vision_transformer_large,pass_due_to_skip,0
-
-
-
-timm_vovnet,pass,0
-
-
-
 torch_multimodal_clip,pass,0


--- a/benchmarks/dynamo/ci_expected_accuracy/dynamic_inductor_torchbench_training.csv
+++ b/benchmarks/dynamo/ci_expected_accuracy/dynamic_inductor_torchbench_training.csv
@ -78,62 +78,6 @@ functorch_maml_omniglot,pass,7



-hf_Albert,pass,6
-
-
-
-hf_Bart,pass,6
-
-
-
-hf_Bert,pass,6
-
-
-
-hf_Bert_large,pass,6
-
-
-
-hf_BigBird,pass,6
-
-
-
-hf_DistilBert,pass,6
-
-
-
-hf_GPT2,pass,8
-
-
-
-hf_GPT2_large,pass_due_to_skip,0
-
-
-
-hf_Reformer,pass,20
-
-
-
-hf_Roberta_base,pass,6
-
-
-
-hf_T5_base,eager_2nd_run_OOM,0
-
-
-
-hf_T5_large,pass_due_to_skip,0
-
-
-
-hf_Whisper,pass,6
-
-
-
-hf_distil_whisper,model_fail_to_load,0
-
-
-
 lennard_jones,pass,7


@ -246,30 +190,6 @@ stable_diffusion_unet,pass_due_to_skip,0



-timm_efficientnet,fail_accuracy,7
-
-
-
-timm_regnet,pass,7
-
-
-
-timm_resnest,pass,6
-
-
-
-timm_vision_transformer,pass,6
-
-
-
-timm_vision_transformer_large,pass_due_to_skip,0
-
-
-
-timm_vovnet,pass,6
-
-
-
 torch_multimodal_clip,pass,7


--- a/benchmarks/dynamo/ci_expected_accuracy/dynamo_eager_torchbench_inference.csv
+++ b/benchmarks/dynamo/ci_expected_accuracy/dynamo_eager_torchbench_inference.csv
@ -130,70 +130,6 @@ functorch_maml_omniglot,pass,0



-hf_Albert,pass,0
-
-
-
-hf_Bart,pass,0
-
-
-
-hf_Bert,pass,0
-
-
-
-hf_Bert_large,pass,0
-
-
-
-hf_BigBird,pass,0
-
-
-
-hf_DistilBert,pass,0
-
-
-
-hf_GPT2,pass,0
-
-
-
-hf_GPT2_large,pass_due_to_skip,0
-
-
-
-hf_Reformer,pass,5
-
-
-
-hf_Roberta_base,pass,0
-
-
-
-hf_T5,pass,0
-
-
-
-hf_T5_base,eager_fail_to_run,0
-
-
-
-hf_T5_generate,pass,7
-
-
-
-hf_T5_large,pass_due_to_skip,0
-
-
-
-hf_Whisper,pass,0
-
-
-
-hf_distil_whisper,pass,0
-
-
-
 lennard_jones,pass,0


@ -342,30 +278,6 @@ stable_diffusion_unet,pass_due_to_skip,0



-timm_efficientnet,pass,0
-
-
-
-timm_regnet,pass,0
-
-
-
-timm_resnest,pass,0
-
-
-
-timm_vision_transformer,pass,0
-
-
-
-timm_vision_transformer_large,pass_due_to_skip,0
-
-
-
-timm_vovnet,pass,0
-
-
-
 torch_multimodal_clip,pass,0


--- a/benchmarks/dynamo/ci_expected_accuracy/dynamo_eager_torchbench_training.csv
+++ b/benchmarks/dynamo/ci_expected_accuracy/dynamo_eager_torchbench_training.csv
@ -78,62 +78,6 @@ functorch_maml_omniglot,pass,7



-hf_Albert,pass,6
-
-
-
-hf_Bart,pass,6
-
-
-
-hf_Bert,pass,6
-
-
-
-hf_Bert_large,pass,6
-
-
-
-hf_BigBird,pass,6
-
-
-
-hf_DistilBert,pass,6
-
-
-
-hf_GPT2,pass,8
-
-
-
-hf_GPT2_large,pass_due_to_skip,0
-
-
-
-hf_Reformer,pass,20
-
-
-
-hf_Roberta_base,pass,6
-
-
-
-hf_T5_base,eager_2nd_run_OOM,0
-
-
-
-hf_T5_large,pass_due_to_skip,0
-
-
-
-hf_Whisper,pass,6
-
-
-
-hf_distil_whisper,model_fail_to_load,0
-
-
-
 lennard_jones,pass,7


@ -250,30 +194,6 @@ stable_diffusion_unet,pass_due_to_skip,0



-timm_efficientnet,pass,7
-
-
-
-timm_regnet,pass,7
-
-
-
-timm_resnest,pass,6
-
-
-
-timm_vision_transformer,pass,6
-
-
-
-timm_vision_transformer_large,pass_due_to_skip,0
-
-
-
-timm_vovnet,pass,6
-
-
-
 torch_multimodal_clip,pass,7


--- a/benchmarks/dynamo/ci_expected_accuracy/inductor_torchbench_inference.csv
+++ b/benchmarks/dynamo/ci_expected_accuracy/inductor_torchbench_inference.csv
@ -130,70 +130,6 @@ functorch_maml_omniglot,pass,0



-hf_Albert,pass,0
-
-
-
-hf_Bart,pass,0
-
-
-
-hf_Bert,pass,0
-
-
-
-hf_Bert_large,pass,0
-
-
-
-hf_BigBird,fail_accuracy,0
-
-
-
-hf_DistilBert,pass,0
-
-
-
-hf_GPT2,pass,0
-
-
-
-hf_GPT2_large,pass_due_to_skip,0
-
-
-
-hf_Reformer,pass,5
-
-
-
-hf_Roberta_base,pass,0
-
-
-
-hf_T5,pass,0
-
-
-
-hf_T5_base,eager_fail_to_run,0
-
-
-
-hf_T5_generate,pass,7
-
-
-
-hf_T5_large,pass_due_to_skip,0
-
-
-
-hf_Whisper,pass,0
-
-
-
-hf_distil_whisper,pass,0
-
-
-
 lennard_jones,pass,0


@ -342,30 +278,6 @@ stable_diffusion_unet,pass_due_to_skip,0



-timm_efficientnet,pass,0
-
-
-
-timm_regnet,pass,0
-
-
-
-timm_resnest,pass,0
-
-
-
-timm_vision_transformer,pass,0
-
-
-
-timm_vision_transformer_large,pass_due_to_skip,0
-
-
-
-timm_vovnet,pass,0
-
-
-
 torch_multimodal_clip,pass,0


--- a/benchmarks/dynamo/ci_expected_accuracy/inductor_torchbench_training.csv
+++ b/benchmarks/dynamo/ci_expected_accuracy/inductor_torchbench_training.csv
@ -78,62 +78,6 @@ functorch_maml_omniglot,pass,7



-hf_Albert,pass,6
-
-
-
-hf_Bart,pass,6
-
-
-
-hf_Bert,pass,6
-
-
-
-hf_Bert_large,pass,6
-
-
-
-hf_BigBird,pass,6
-
-
-
-hf_DistilBert,pass,6
-
-
-
-hf_GPT2,pass,8
-
-
-
-hf_GPT2_large,pass_due_to_skip,0
-
-
-
-hf_Reformer,pass,20
-
-
-
-hf_Roberta_base,pass,6
-
-
-
-hf_T5_base,eager_2nd_run_OOM,0
-
-
-
-hf_T5_large,pass_due_to_skip,0
-
-
-
-hf_Whisper,pass,6
-
-
-
-hf_distil_whisper,model_fail_to_load,0
-
-
-
 lennard_jones,pass,7


@ -250,30 +194,6 @@ stable_diffusion_unet,pass_due_to_skip,0



-timm_efficientnet,fail_accuracy,7
-
-
-
-timm_regnet,pass,7
-
-
-
-timm_resnest,pass,6
-
-
-
-timm_vision_transformer,pass,6
-
-
-
-timm_vision_transformer_large,pass_due_to_skip,0
-
-
-
-timm_vovnet,pass,6
-
-
-
 torch_multimodal_clip,pass,7


--- a/benchmarks/dynamo/ci_expected_accuracy/rocm/aot_eager_torchbench_inference.csv
+++ b/benchmarks/dynamo/ci_expected_accuracy/rocm/aot_eager_torchbench_inference.csv
@ -130,73 +130,6 @@ functorch_maml_omniglot,pass,0



-hf_Albert,pass,0
-
-
-
-hf_Bart,pass,0
-
-
-
-hf_Bert,pass,0
-
-
-
-hf_Bert_large,pass,0
-
-
-
-hf_BigBird,pass,9
-
-
-
-hf_DistilBert,pass,0
-
-
-
-hf_GPT2,pass,0
-
-
-
-hf_GPT2_large,pass_due_to_skip,0
-
-
-
-hf_Longformer,pass,4
-
-
-
-hf_Reformer,pass,8
-
-
-
-hf_Roberta_base,pass,0
-
-
-hf_T5,pass,0
-
-
-
-hf_T5_base,pass,0
-
-
-
-hf_T5_generate,pass,7
-
-
-
-hf_T5_large,pass_due_to_skip,0
-
-
-
-hf_Whisper,pass,0
-
-
-
-hf_distil_whisper,pass,0
-
-
-
 lennard_jones,pass,0


@ -345,38 +278,6 @@ stable_diffusion_unet,model_fail_to_load,0



-timm_efficientdet,pass,2
-
-
-
-timm_efficientnet,pass,0
-
-
-
-timm_nfnet,pass,0
-
-
-
-timm_regnet,pass,0
-
-
-
-timm_resnest,pass,0
-
-
-
-timm_vision_transformer,pass,0
-
-
-
-timm_vision_transformer_large,pass_due_to_skip,0
-
-
-
-timm_vovnet,pass,0
-
-
-
 torch_multimodal_clip,pass,0


--- a/benchmarks/dynamo/ci_expected_accuracy/rocm/aot_eager_torchbench_training.csv
+++ b/benchmarks/dynamo/ci_expected_accuracy/rocm/aot_eager_torchbench_training.csv
@ -78,70 +78,6 @@ functorch_maml_omniglot,pass,7



-hf_Albert,pass,6
-
-
-
-hf_Bart,pass,6
-
-
-
-hf_Bert,pass,6
-
-
-
-hf_Bert_large,pass,6
-
-
-
-hf_BigBird,pass,6
-
-
-
-hf_DistilBert,pass,6
-
-
-
-hf_GPT2,pass,8
-
-
-
-hf_GPT2_large,pass_due_to_skip,0
-
-
-
-hf_Longformer,pass,4
-
-
-
-hf_Reformer,pass,25
-
-
-
-hf_Roberta_base,pass,6
-
-
-
-hf_T5,pass,0
-
-
-
-hf_T5_base,eager_2nd_run_OOM,0
-
-
-
-hf_T5_large,pass_due_to_skip,0
-
-
-
-hf_Whisper,pass,6
-
-
-
-hf_distil_whisper,model_fail_to_load,0
-
-
-
 lennard_jones,pass,7


@ -258,38 +194,6 @@ stable_diffusion_unet,pass_due_to_skip,0



-timm_efficientdet,pass,2
-
-
-
-timm_efficientnet,pass,7
-
-
-
-timm_nfnet,pass,0
-
-
-
-timm_regnet,pass,0
-
-
-
-timm_resnest,pass,6
-
-
-
-timm_vision_transformer,pass,6
-
-
-
-timm_vision_transformer_large,pass_due_to_skip,0
-
-
-
-timm_vovnet,pass,6
-
-
-
 torch_multimodal_clip,pass,7


--- a/benchmarks/dynamo/ci_expected_accuracy/rocm/aot_inductor_torchbench_inference.csv
+++ b/benchmarks/dynamo/ci_expected_accuracy/rocm/aot_inductor_torchbench_inference.csv
@ -118,62 +118,6 @@ functorch_maml_omniglot,pass,0



-hf_Albert,pass,0
-
-
-
-hf_Bart,pass,0
-
-
-
-hf_Bert,pass,0
-
-
-
-hf_Bert_large,pass,0
-
-
-
-hf_BigBird,fail_accuracy,0
-
-
-
-hf_DistilBert,pass,0
-
-
-
-hf_GPT2,pass,0
-
-
-
-hf_GPT2_large,pass_due_to_skip,0
-
-
-
-hf_Roberta_base,pass,0
-
-
-
-hf_T5,pass,0
-
-
-
-hf_T5_base,eager_fail_to_run,0
-
-
-
-hf_T5_large,pass_due_to_skip,0
-
-
-
-hf_Whisper,pass,0
-
-
-
-hf_distil_whisper,pass,0
-
-
-
 lennard_jones,pass,0


@ -314,34 +258,6 @@ stable_diffusion_unet,pass_due_to_skip,0



-timm_efficientnet,pass,0
-
-
-
-timm_nfnet,pass,0
-
-
-
-timm_regnet,pass,0
-
-
-
-timm_resnest,pass,0
-
-
-
-timm_vision_transformer,pass,0
-
-
-
-timm_vision_transformer_large,pass_due_to_skip,0
-
-
-
-timm_vovnet,pass,0
-
-
-
 torch_multimodal_clip,pass,0


--- a/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamic_aot_eager_torchbench_inference.csv
+++ b/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamic_aot_eager_torchbench_inference.csv
@ -130,73 +130,6 @@ functorch_maml_omniglot,pass,0



-hf_Albert,pass,0
-
-
-
-hf_Bart,pass,0
-
-
-
-hf_Bert,pass,0
-
-
-
-hf_Bert_large,pass,0
-
-
-
-hf_BigBird,pass,9
-
-
-
-hf_DistilBert,pass,0
-
-
-
-hf_GPT2,pass,0
-
-
-
-hf_GPT2_large,pass_due_to_skip,0
-
-
-
-hf_Longformer,pass,4
-
-
-
-hf_Reformer,pass,8
-
-
-
-hf_Roberta_base,pass,0
-
-
-hf_T5,pass,0
-
-
-
-hf_T5_base,pass,0
-
-
-
-hf_T5_generate,pass,7
-
-
-
-hf_T5_large,pass_due_to_skip,0
-
-
-
-hf_Whisper,pass,0
-
-
-
-hf_distil_whisper,pass,0
-
-
-
 lennard_jones,pass,0


@ -345,38 +278,6 @@ stable_diffusion_unet,model_fail_to_load,0



-timm_efficientdet,pass,2
-
-
-
-timm_efficientnet,pass,0
-
-
-
-timm_nfnet,pass,0
-
-
-
-timm_regnet,pass,0
-
-
-
-timm_resnest,pass,0
-
-
-
-timm_vision_transformer,pass,0
-
-
-
-timm_vision_transformer_large,pass_due_to_skip,0
-
-
-
-timm_vovnet,pass,0
-
-
-
 torch_multimodal_clip,pass,0


--- a/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamic_aot_eager_torchbench_training.csv
+++ b/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamic_aot_eager_torchbench_training.csv
@ -78,70 +78,6 @@ functorch_maml_omniglot,pass,7



-hf_Albert,pass,6
-
-
-
-hf_Bart,pass,6
-
-
-
-hf_Bert,pass,6
-
-
-
-hf_Bert_large,pass,6
-
-
-
-hf_BigBird,fail_to_run,3
-
-
-
-hf_DistilBert,pass,6
-
-
-
-hf_GPT2,pass,8
-
-
-
-hf_GPT2_large,pass_due_to_skip,0
-
-
-
-hf_Longformer,pass,4
-
-
-
-hf_Reformer,pass,25
-
-
-
-hf_Roberta_base,pass,6
-
-
-
-hf_T5,pass,0
-
-
-
-hf_T5_base,eager_2nd_run_OOM,0
-
-
-
-hf_T5_large,pass_due_to_skip,0
-
-
-
-hf_Whisper,pass,6
-
-
-
-hf_distil_whisper,model_fail_to_load,0
-
-
-
 lennard_jones,pass,7


@ -254,38 +190,6 @@ stable_diffusion_unet,pass_due_to_skip,0



-timm_efficientdet,pass,2
-
-
-
-timm_efficientnet,pass,7
-
-
-
-timm_nfnet,pass,0
-
-
-
-timm_regnet,pass,0
-
-
-
-timm_resnest,pass,6
-
-
-
-timm_vision_transformer,pass,6
-
-
-
-timm_vision_transformer_large,pass_due_to_skip,0
-
-
-
-timm_vovnet,pass,6
-
-
-
 torch_multimodal_clip,pass,7


--- a/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamic_inductor_torchbench_inference.csv
+++ b/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamic_inductor_torchbench_inference.csv
@ -130,74 +130,6 @@ functorch_maml_omniglot,pass,0



-hf_Albert,pass,0
-
-
-
-hf_Bart,pass,0
-
-
-
-hf_Bert,pass,0
-
-
-
-hf_Bert_large,pass,0
-
-
-
-hf_BigBird,fail_to_run,0
-
-
-
-hf_DistilBert,pass,0
-
-
-
-hf_GPT2,pass,0
-
-
-
-hf_GPT2_large,pass_due_to_skip,0
-
-
-
-hf_Longformer,pass,4
-
-
-
-hf_Reformer,pass,5
-
-
-
-hf_Roberta_base,pass,0
-
-
-
-hf_T5,pass,0
-
-
-
-hf_T5_base,eager_fail_to_run,0
-
-
-
-hf_T5_generate,pass,7
-
-
-
-hf_T5_large,pass_due_to_skip,0
-
-
-
-hf_Whisper,pass,0
-
-
-
-hf_distil_whisper,pass,0
-
-
-
 lennard_jones,pass,0


@ -346,38 +278,6 @@ stable_diffusion_unet,pass_due_to_skip,0



-timm_efficientdet,pass,2
-
-
-
-timm_efficientnet,pass,0
-
-
-
-timm_nfnet,pass,0
-
-
-
-timm_regnet,pass,0
-
-
-
-timm_resnest,pass,0
-
-
-
-timm_vision_transformer,pass,0
-
-
-
-timm_vision_transformer_large,pass_due_to_skip,0
-
-
-
-timm_vovnet,pass,0
-
-
-
 torch_multimodal_clip,pass,0


--- a/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamic_inductor_torchbench_training.csv
+++ b/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamic_inductor_torchbench_training.csv
@ -78,70 +78,6 @@ functorch_maml_omniglot,pass,7



-hf_Albert,pass,6
-
-
-
-hf_Bart,pass,6
-
-
-
-hf_Bert,pass,6
-
-
-
-hf_Bert_large,pass,6
-
-
-
-hf_BigBird,fail_to_run,3
-
-
-
-hf_DistilBert,pass,6
-
-
-
-hf_GPT2,pass,8
-
-
-
-hf_GPT2_large,pass_due_to_skip,0
-
-
-
-hf_Longformer,pass,10
-
-
-
-hf_Reformer,pass,20
-
-
-
-hf_Roberta_base,pass,6
-
-
-
-hf_T5,pass,5
-
-
-
-hf_T5_base,eager_2nd_run_OOM,0
-
-
-
-hf_T5_large,pass_due_to_skip,0
-
-
-
-hf_Whisper,pass,6
-
-
-
-hf_distil_whisper,model_fail_to_load,0
-
-
-
 lennard_jones,pass,7


@ -254,38 +190,6 @@ stable_diffusion_unet,pass_due_to_skip,0



-timm_efficientdet,pass,8
-
-
-
-timm_efficientnet,pass,7
-
-
-
-timm_nfnet,pass,6
-
-
-
-timm_regnet,pass,0
-
-
-
-timm_resnest,pass,6
-
-
-
-timm_vision_transformer,pass,6
-
-
-
-timm_vision_transformer_large,pass_due_to_skip,0
-
-
-
-timm_vovnet,pass,6
-
-
-
 torch_multimodal_clip,pass,7


--- a/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamo_eager_torchbench_inference.csv
+++ b/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamo_eager_torchbench_inference.csv
@ -130,73 +130,6 @@ functorch_maml_omniglot,pass,0



-hf_Albert,pass,0
-
-
-
-hf_Bart,pass,0
-
-
-
-hf_Bert,pass,0
-
-
-
-hf_Bert_large,pass,0
-
-
-
-hf_BigBird,pass,9
-
-
-
-hf_DistilBert,pass,0
-
-
-
-hf_GPT2,pass,0
-
-
-
-hf_GPT2_large,pass_due_to_skip,0
-
-
-
-hf_Longformer,pass,4
-
-
-
-hf_Reformer,pass,8
-
-
-
-hf_Roberta_base,pass,0
-
-
-hf_T5,pass,0
-
-
-
-hf_T5_base,pass,0
-
-
-
-hf_T5_generate,pass,7
-
-
-
-hf_T5_large,pass_due_to_skip,0
-
-
-
-hf_Whisper,pass,0
-
-
-
-hf_distil_whisper,pass,0
-
-
-
 lennard_jones,pass,0


@ -345,38 +278,6 @@ stable_diffusion_unet,model_fail_to_load,0



-timm_efficientdet,pass,2
-
-
-
-timm_efficientnet,pass,0
-
-
-
-timm_nfnet,pass,0
-
-
-
-timm_regnet,pass,0
-
-
-
-timm_resnest,pass,0
-
-
-
-timm_vision_transformer,pass,0
-
-
-
-timm_vision_transformer_large,pass_due_to_skip,0
-
-
-
-timm_vovnet,pass,0
-
-
-
 torch_multimodal_clip,pass,0


--- a/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamo_eager_torchbench_training.csv
+++ b/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamo_eager_torchbench_training.csv
@ -78,70 +78,6 @@ functorch_maml_omniglot,pass,7



-hf_Albert,pass,6
-
-
-
-hf_Bart,pass,6
-
-
-
-hf_Bert,pass,6
-
-
-
-hf_Bert_large,pass,6
-
-
-
-hf_BigBird,pass,15
-
-
-
-hf_DistilBert,pass,6
-
-
-
-hf_GPT2,pass,8
-
-
-
-hf_GPT2_large,pass_due_to_skip,0
-
-
-
-hf_Longformer,pass,4
-
-
-
-hf_Reformer,pass,25
-
-
-
-hf_Roberta_base,pass,6
-
-
-
-hf_T5,pass,0
-
-
-
-hf_T5_base,eager_2nd_run_OOM,0
-
-
-
-hf_T5_large,pass_due_to_skip,0
-
-
-
-hf_Whisper,pass,6
-
-
-
-hf_distil_whisper,model_fail_to_load,0
-
-
-
 lennard_jones,pass,7


@ -258,38 +194,6 @@ stable_diffusion_unet,pass_due_to_skip,0



-timm_efficientdet,pass,2
-
-
-
-timm_efficientnet,pass,7
-
-
-
-timm_nfnet,pass,0
-
-
-
-timm_regnet,pass,0
-
-
-
-timm_resnest,pass,6
-
-
-
-timm_vision_transformer,pass,6
-
-
-
-timm_vision_transformer_large,pass_due_to_skip,0
-
-
-
-timm_vovnet,pass,6
-
-
-
 torch_multimodal_clip,pass,7


--- a/benchmarks/dynamo/ci_expected_accuracy/rocm/inductor_torchbench_inference.csv
+++ b/benchmarks/dynamo/ci_expected_accuracy/rocm/inductor_torchbench_inference.csv
@ -130,66 +130,6 @@ functorch_maml_omniglot,pass,0



-hf_Albert,pass,0
-
-
-
-hf_Bart,pass,0
-
-
-
-hf_Bert,pass,0
-
-
-
-hf_Bert_large,pass,0
-
-
-
-hf_BigBird,fail_accuracy,0
-
-
-
-hf_DistilBert,pass,0
-
-
-
-hf_GPT2,pass,0
-
-
-
-hf_GPT2_large,pass_due_to_skip,0
-
-
-
-hf_Reformer,pass,8
-
-
-
-hf_T5,pass,0
-
-
-
-hf_T5_base,eager_fail_to_run,0
-
-
-
-hf_T5_generate,pass,11
-
-
-
-hf_T5_large,pass_due_to_skip,0
-
-
-
-hf_Whisper,pass,0
-
-
-
-hf_distil_whisper,pass,0
-
-
-
 lennard_jones,pass,0


@ -334,30 +274,6 @@ stable_diffusion_unet,pass_due_to_skip,0



-timm_efficientnet,pass,0
-
-
-
-timm_regnet,pass,0
-
-
-
-timm_resnest,pass,0
-
-
-
-timm_vision_transformer,pass,0
-
-
-
-timm_vision_transformer_large,pass_due_to_skip,0
-
-
-
-timm_vovnet,pass,0
-
-
-
 torch_multimodal_clip,pass,0


--- a/benchmarks/dynamo/ci_expected_accuracy/rocm/inductor_torchbench_training.csv
+++ b/benchmarks/dynamo/ci_expected_accuracy/rocm/inductor_torchbench_training.csv
@ -78,58 +78,6 @@ functorch_maml_omniglot,pass,7



-hf_Albert,pass,6
-
-
-
-hf_Bart,pass,6
-
-
-
-hf_Bert,pass,6
-
-
-
-hf_Bert_large,pass,6
-
-
-
-hf_BigBird,pass,6
-
-
-
-hf_DistilBert,pass,6
-
-
-
-hf_GPT2,pass,8
-
-
-
-hf_GPT2_large,pass_due_to_skip,0
-
-
-
-hf_Reformer,pass,25
-
-
-
-hf_T5_base,eager_2nd_run_OOM,0
-
-
-
-hf_T5_large,pass_due_to_skip,0
-
-
-
-hf_Whisper,pass,6
-
-
-
-hf_distil_whisper,model_fail_to_load,0
-
-
-
 lennard_jones,pass,7


@ -246,30 +194,6 @@ stable_diffusion_unet,pass_due_to_skip,0



-timm_efficientnet,pass,7
-
-
-
-timm_regnet,pass,7
-
-
-
-timm_resnest,pass,6
-
-
-
-timm_vision_transformer,pass,6
-
-
-
-timm_vision_transformer_large,pass_due_to_skip,0
-
-
-
-timm_vovnet,pass,6
-
-
-
 torch_multimodal_clip,pass,7


--- a/benchmarks/dynamo/common.py
+++ b/benchmarks/dynamo/common.py
@ -149,7 +149,6 @@ CI_SKIP_DYNAMIC_BATCH_ONLY = {
    "detectron2_fasterrcnn_r_50_c4",
    "detectron2_fasterrcnn_r_50_dc5",
    "detectron2_fasterrcnn_r_50_fpn",
-    "hf_T5_generate",
    "Reformer",
    "llama",
 }.union(INTERNAL_CI_SKIP_DYNAMIC_BATCH_ONLY)
@ -176,13 +175,7 @@ BENCHMARK_USE_SGD = {
    "speech_transformer",
    "squeezenet1_1",
    "stable_diffusion_text_encoder",
-    "timm_efficientdet",
-    "timm_nfnet",
-    "timm_resnest",
-    "timm_vision_transformer",
-    "timm_vovnet",
    "vgg16",
-    "hf_T5",  # Fails dynamic https://github.com/pytorch/pytorch/issues/115968
    # HF
    "AlbertForMaskedLM",
    "BartForCausalLM",
@ -216,8 +209,6 @@ CI_USE_SGD = {
    "detectron2_maskrcnn_r_101_fpn",
    "detectron2_maskrcnn_r_50_c4",
    "detectron2_maskrcnn_r_50_fpn",
-    "hf_T5_base",
-    "hf_clip",
    "llama_v2_7b_16h",
    "mobilenet_v2_quantized_qat",
    "phi_1_5 resnet50_quantized_qat",
@ -2031,8 +2022,6 @@ class BenchmarkRunner:
        from diffusers.models.transformer_2d import Transformer2DModel
        from torchbenchmark.models.nanogpt.model import Block
        from transformers.models.llama.modeling_llama import LlamaDecoderLayer
-        from transformers.models.t5.modeling_t5 import T5Block
-        from transformers.models.whisper.modeling_whisper import WhisperEncoderLayer

        from torch.distributed.fsdp.wrap import (
            ModuleWrapPolicy,
@ -2042,10 +2031,6 @@ class BenchmarkRunner:
        # handcrafted wrap policy
        MODEL_FSDP_WRAP = {
            "stable_diffusion_unet": (Transformer2DModel,),
-            "hf_T5": (T5Block,),
-            "hf_T5_base": (T5Block,),
-            "hf_T5_large": (T5Block,),
-            "hf_Whisper": (WhisperEncoderLayer,),
            "llama_v2_7b_16h": (LlamaDecoderLayer,),
            "nanogpt": (Block,),
        }
@ -3810,22 +3795,6 @@ def run(runner, args, original_dir=None):
        global synchronize
        synchronize = torch.cuda.synchronize if HAS_CUDA else torch.xpu.synchronize

-    if (
-        args.devices == ["cuda"]
-        and torch.cuda.get_device_properties(0).total_memory < 25 * 2**30
-    ):
-        # OOM errors on an RTX 3090 with 24gb RAM
-        runner.skip_models.update(
-            {
-                # torchbench
-                "hf_Longformer",
-                "timm_nfnet",
-                "timm_efficientdet",
-            }
-        )
-        if args.training:
-            runner.skip_models.add("hf_T5")
-
    if args.nnc:
        torch._C._jit_override_can_fuse_on_cpu(True)
        torch._C._jit_override_can_fuse_on_gpu(True)
--- a/benchmarks/dynamo/dist_util.py
+++ b/benchmarks/dynamo/dist_util.py
@ -21,9 +21,6 @@ try:
 except ImportError:
    from torchbench import setup_torchbench_cwd

-from transformers.models.bert.modeling_bert import BertLayer, BertLMPredictionHead
-from transformers.models.t5.modeling_t5 import T5Block
-

 def setup(rank, world_size):
    os.environ["MASTER_ADDR"] = os.getenv("MASTER_ADDR", "localhost")
@ -128,8 +125,6 @@ def fsdp_checkpointing_base(model, blocks):

 MODEL_FSDP_WRAP = {
    "toy_model": (MyModule,),
-    "hf_Bert": (BertLayer, BertLMPredictionHead),
-    "hf_T5": (T5Block,),
 }


--- a/benchmarks/dynamo/distributed.py
+++ b/benchmarks/dynamo/distributed.py
@ -158,7 +158,7 @@ if __name__ == "__main__":
    model_arg.add_argument(
        "--torchbench-model",
        "--torchbench_model",
-        help="name of torchbench model, e.g. hf_Bert",
+        help="name of torchbench model, e.g. BERT_pytorch",
    )
    model_arg.add_argument(
        "--toy-model", "--toy_model", action="store_true", help="use toy model instead"
--- a/benchmarks/dynamo/expected_ci_perf_inductor_torchbench.csv
+++ b/benchmarks/dynamo/expected_ci_perf_inductor_torchbench.csv
@ -12,17 +12,6 @@ cuda,dlrm,1024,1.3421,3.2177,4.9493,1.0009
 cuda,drq,1,1.0820,3.8157,8.0732,0.9687
 cuda,fastNLP_Bert,6,1.4839,37.9050,32.7583,1.1563
 cuda,functorch_dp_cifar10,64,1.5014,6.9596,14.1516,0.4432
-cuda,hf_Albert,8,2.2452,30.6134,25.9036,1.3098
-cuda,hf_Bart,4,1.7012,34.3999,37.9975,1.0128
-cuda,hf_Bert,4,1.9003,23.3435,34.8196,1.0273
-cuda,hf_Bert_large,4,1.6346,52.8525,62.3112,1.0726
-cuda,hf_BigBird,2,1.9208,105.2672,101.4787,1.1415
-cuda,hf_DistilBert,8,1.3988,22.5793,20.2386,1.0232
-cuda,hf_GPT2,4,1.8075,27.5184,25.3428,1.1562
-cuda,hf_GPT2_large,4,1.7716,118.7404,68.1618,1.1725
-cuda,hf_Reformer,4,1.1744,70.4228,15.1152,0.9266
-cuda,hf_T5,8,1.8778,93.3134,37.0046,1.2279
-cuda,hf_T5_large,2,2.3623,101.5518,143.7982,1.1674
 cuda,lennard_jones,1000,1.0649,1.5233,4.1119,0.9998
 cuda,mnasnet1_0,32,1.1957,19.1993,27.2302,0.7758
 cuda,mobilenet_v2,96,1.4876,32.3311,27.4719,1.1729
@ -42,14 +31,6 @@ cuda,shufflenet_v2_x1_0,128,1.3027,25.7017,27.9875,1.1015
 cuda,soft_actor_critic,256,0.9965,2.2580,4.6661,0.9995
 cuda,speech_transformer,32,1.8405,35.1645,33.3422,1.0888
 cuda,squeezenet1_1,32,1.4191,7.3454,9.4751,1.1148
-cuda,timm_efficientdet,1,1.6630,78.2697,150.9620,0.9904
-cuda,timm_efficientnet,32,1.2689,28.5348,66.3911,0.9428
-cuda,timm_nfnet,128,1.5319,79.5429,32.9961,1.1070
-cuda,timm_regnet,32,1.0564,56.9897,53.0027,0.9500
-cuda,timm_resnest,32,1.6485,14.3908,56.7240,0.9515
-cuda,timm_vision_transformer,8,1.6100,18.7736,36.9495,0.7301
-cuda,timm_vision_transformer_large,8,1.0842,170.9849,72.0604,0.9762
-cuda,timm_vovnet,32,1.0472,25.4676,24.8428,0.8843
 cuda,tts_angular,64,1.0366,6.9889,4.2683,0.9973
 cuda,vgg16,64,1.2560,52.7072,7.3733,0.9884
 cuda,yolov3,16,1.2600,54.2350,42.4711,1.0108
--- a/benchmarks/dynamo/expected_ci_speedup_inductor_torchbench_cpu.csv
+++ b/benchmarks/dynamo/expected_ci_speedup_inductor_torchbench_cpu.csv
@ -1,29 +1,16 @@
 #name,backend,data_type,shape,wrapper,perf_speedup_target_c7i_metal_24xl
-#timm_vision_transformer,inductor,float32,static,default,1.039510755
 phlippe_densenet,inductor,float32,static,default,1.46474287
 basic_gnn_edgecnn,inductor,float32,dynamic,default,1.30092957
 llama_v2_7b_16h,inductor,float32,dynamic,default,1.23234331
 resnet50,inductor,float32,dynamic,default,1.67742767
-#timm_efficientnet,inductor,float32,static,cpp,
 mobilenet_v3_large,inductor,float32,static,cpp,2.63311706
-timm_resnest,inductor,float32,dynamic,cpp,1.7321529
 functorch_maml_omniglot,inductor,float32,dynamic,cpp,1.126799
-#hf_GPT2,inductor,float32,dynamic,cpp,
 yolov3,export-aot-inductor,float32,static,default,1.40687424
 mobilenet_v2,export-aot-inductor,float32,static,default,2.90375357
 resnext50_32x4d,export-aot-inductor,float32,dynamic,default,1.49299689
-hf_Albert,export-aot-inductor,float32,dynamic,default,1.261471
 resnext50_32x4d,inductor,amp,static,default,1.47023111
 vgg16,inductor,amp,static,default,1.2692454
-hf_Longformer,inductor,amp,dynamic,default,1.22015225
-hf_Bert_large,inductor,amp,dynamic,default,1.18572179
 llama,inductor,amp,static,default,1.33157028
-timm_regnet,inductor,amp,static,cpp,1.12734073
 mnasnet1_0,inductor,amp,static,cpp,2.1296814
-#hf_T5_generate,inductor,amp,dynamic,cpp,
-timm_vovnet,inductor,amp,dynamic,cpp,1.10851009
 #mobilenet_v2,inductor,amp,dynamic,cpp,2.27774577 # https://github.com/pytorch/pytorch/issues/131693
-hf_GPT2,export-aot-inductor,amp,static,default,1.4432794
 densenet121,export-aot-inductor,amp,static,default,1.25591385
-hf_DistilBert,export-aot-inductor,amp,dynamic,default,1.2926442
-hf_Bart,export-aot-inductor,amp,dynamic,default,1.19515416
--- a/benchmarks/dynamo/torchbench.py
+++ b/benchmarks/dynamo/torchbench.py
@ -75,29 +75,7 @@ def setup_torchbench_cwd():
    return original_dir


-def process_hf_reformer_output(out):
-    assert isinstance(out, list)
-    # second output is unstable
-    return [elem for i, elem in enumerate(out) if i != 1]
-
-
-def process_hf_whisper_output(out):
-    out_ret = []
-    for i, elem in enumerate(out):
-        if i == 0:
-            if elem is not None:
-                assert isinstance(elem, dict)
-                out_ret.append({k: v for k, v in elem.items() if k != "logits"})
-        elif i != 1:
-            out_ret.append(elem)
-
-    return out_ret
-
-
-process_train_model_output = {
-    "hf_Reformer": process_hf_reformer_output,
-    "hf_Whisper": process_hf_whisper_output,
-}
+process_train_model_output = {}


 class TorchBenchmarkRunner(BenchmarkRunner):
@ -227,12 +205,10 @@ class TorchBenchmarkRunner(BenchmarkRunner):
            "drq",
            "hf_Reformer",
            "DALLE2_pytorch",
-            "hf_BigBird",
            "detectron2_maskrcnn_r_50_fpn",
            "detectron2_maskrcnn_r_101_fpn",
            "vision_maskrcnn",
            "doctr_reco_predictor",
-            "hf_T5_generate",
        }

    def load_model(
@ -395,8 +371,6 @@ class TorchBenchmarkRunner(BenchmarkRunner):
            and hasattr(model.config, "use_cache")
        ):
            model.config.use_cache = False
-        if model_name == "hf_T5_generate":
-            model.model.config.use_cache = False

        self.validate_model(model, example_inputs)
        return device, benchmark.name, model, example_inputs, batch_size
--- a/benchmarks/dynamo/torchbench.yaml
+++ b/benchmarks/dynamo/torchbench.yaml
@ -5,8 +5,6 @@ batch_size:
    demucs: 4
    dlrm: 1024
    densenet121: 4
-    hf_Reformer: 4
-    hf_T5_base: 4
    timm_efficientdet: 1
    llama_v2_7b_16h: 1
    # reduced from 16 due to cudagraphs OOM in TorchInductor dashboard
@ -30,7 +28,6 @@ tolerance:
    - alexnet
    - attention_is_all_you_need_pytorch
    - densenet121
-    - hf_Albert
    - vgg16
    - mobilenet_v3_large
    - nvidia_deeprecommender
@ -40,20 +37,16 @@ tolerance:
    - soft_actor_critic
    - tacotron2
    - yolov3
-    - timm_efficientdet
-    - timm_efficientnet
    - squeezenet1_1

  higher_fp16:
    - doctr_reco_predictor
    - drq
-    - hf_Whisper
    - phlippe_resnet

  higher_bf16:
    - doctr_reco_predictor
    - drq
-    - hf_Whisper

  # These models need higher tolerance for xpu devices with bf16
  higher_bf16_xpu:
@ -71,16 +64,9 @@ tolerance:

 require_larger_multiplier_for_smaller_tensor:
  - yolov3
-  - timm_efficientnet

 # These benchmarks took >600s on an i9-11900K CPU
 very_slow: &VERY_SLOW_MODELS
-  # 3339s
-  - hf_BigBird
-  # 3062s
-  - hf_Longformer
-  # 930s
-  - hf_T5


 # These benchmarks took >60s on an i9-11900K CPU
@ -92,18 +78,6 @@ slow:
  - demucs
  # 242s
  - fastNLP_Bert
-  # 221s
-  - hf_Albert
-  # 400s
-  - hf_Bart
-  # 334s
-  - hf_Bert
-  # 187s
-  - hf_DistilBert
-  # 470s
-  - hf_GPT2
-  # 141s
-  - hf_Reformer
  # 317s
  - speech_transformer
  # 99s
@ -187,11 +161,36 @@ skip:
    - hf_clip
    # multi gpu not always available in benchmark runners
    - simple_gpt_tp_manual
+    # skip hf and timm models in torchbench since
+    # there are already separate benchmarks for them
+    - hf_Albert
+    - hf_Bart
+    - hf_Bert
+    - hf_BigBird
+    - hf_DistilBert
+    - hf_GPT2
+    - hf_Longformer
+    - hf_Reformer
+    - hf_T5
+    - timm_efficientdet
+    - timm_efficientnet
+    - timm_nfnet
+    - timm_regnet
+    - timm_resnest
+    - timm_vision_transformer
+    - timm_vovnet
+    - hf_Bert_large
+    - hf_GPT2_large
+    - hf_Roberta_base
+    - hf_T5_base
+    - hf_T5_generate
+    - hf_T5_large
+    - hf_Whisper
+    - hf_distil_whisper
+    - timm_vision_transformer_large

  device:
    cpu:
-      # OOMs
-      - hf_T5_generate
      # model is CUDA only
      - cm3leon_generate
      # timeout
@ -208,16 +207,12 @@ skip:
      - torchrec_dlrm
      - simple_gpt
      # works on cuda, accuracy failure on cpu
-      - hf_Whisper
      - stable_diffusion_text_encoder
      - llava
      - moco

    # Skip these additional models when running on aarch64
-    cpu_aarch64:
-      # timeout on aarch64
-      - timm_regnet
-      - timm_nfnet
+    cpu_aarch64: []

    cuda: []

@ -235,7 +230,6 @@ skip:
      - sam_fast
      # Model's DEFAULT_TRAIN_BSIZE is not implemented
      - cm3leon_generate
-      - hf_T5_generate
      - doctr_det_predictor
      - doctr_reco_predictor
      - moondream
@ -247,9 +241,6 @@ skip:
    - cm3leon_generate
    - detectron2_fcos_r_50_fpn
    - fastNLP_Bert
-    - hf_Longformer
-    - hf_Reformer
-    - hf_T5_generate
    - opacus_cifar10
    - speech_transformer

@ -286,9 +277,6 @@ accuracy:
      # Models too large to have eager, dynamo and fp64_numbers simultaneosuly
      # even for 40 GB machine. We have tested accuracy for smaller version of
      # these models
-      - hf_GPT2_large
-      - hf_T5_large
-      - timm_vision_transformer_large
      # accuracy https://github.com/pytorch/pytorch/issues/93847
      - maml
      - llama_v2_7b_16h
@ -300,5 +288,4 @@ accuracy:
      - pytorch_unet

  max_batch_size:
-    hf_GPT2: 2
    pytorch_unet: 2
--- a/benchmarks/dynamo/torchbench_models_list.txt
+++ b/benchmarks/dynamo/torchbench_models_list.txt
@ -4,11 +4,6 @@ LearningToPaint,1024
 alexnet,1024
 dcgan,1024
 densenet121,64
-hf_Albert,32
-hf_Bart,16
-hf_Bert,16
-hf_GPT2,16
-hf_T5,4
 mnasnet1_0,256
 mobilenet_v2,128
 mobilenet_v3_large,256
@ -19,10 +14,4 @@ resnet50,128
 resnext50_32x4d,128
 shufflenet_v2_x1_0,512
 squeezenet1_1,512
-timm_nfnet,256
-timm_efficientnet,128
-timm_regnet,128
-timm_resnest,256
-timm_vision_transformer,256
-timm_vovnet,128
 vgg16,128
--- a/benchmarks/dynamo/torchbench_models_list_cpu.txt
+++ b/benchmarks/dynamo/torchbench_models_list_cpu.txt
@ -6,18 +6,6 @@ densenet121,512
 dlrm,2048
 fastNLP_Bert,8
 functorch_dp_cifar10,1024
-hf_Albert,8
-hf_Bart,8
-hf_Bert,8
-hf_Bert_large,8
-hf_DistilBert,8
-hf_GPT2,8
-hf_GPT2_large,1
-hf_Longformer,4
-hf_Reformer,8
-hf_T5,4
-hf_T5_base,1
-hf_T5_large,1
 LearningToPaint,96
 lennard_jones,1024
 mnasnet1_0,32
@ -35,13 +23,6 @@ shufflenet_v2_x1_0,64
 speech_transformer,1024
 squeezenet1_1,16
 Super_SloMo,1024
-timm_efficientnet,64
-timm_nfnet,128
-timm_regnet,32
-timm_resnest,32
-timm_vision_transformer,16
-timm_vision_transformer_large,8
-timm_vovnet,32
 tts_angular,1024
 vgg16,64
 vision_maskrcnn,1
--- a/test/dynamo/test_error_messages.py
+++ b/test/dynamo/test_error_messages.py
@ -14,7 +14,7 @@ import torch._dynamo.config
 import torch._dynamo.test_case
 import torch.utils._pytree as python_pytree
 from torch._dynamo.exc import ResumePrologueTracingError, Unsupported
-from torch._dynamo.testing import skipIfNotPy312
+from torch._dynamo.testing import skipIfNotPy312, skipIfOnlyNotPy312
 from torch._dynamo.utils import counters
 from torch.testing._internal.common_utils import (
    IS_FBCODE,
@ -1015,6 +1015,7 @@ Set TORCHDYNAMO_VERBOSE=1 for the internal stack trace (please do this especiall
            "<Internal traceback>\n",
            msg,
        )
+
        self.assertExpectedInline(
            msg,
            """\
@ -1051,7 +1052,6 @@ from user code:

        torch.compile(fn, backend="eager")(torch.randn(3))

-        # check the log for the 2nd torch._dynamo.graph_break()
        self.assertExpectedInline(
            munge_exc(records[-1].getMessage(), skip=0),
            """\
@ -1075,6 +1075,104 @@ User code traceback:
 """,
        )

+    @torch._dynamo.config.patch(verbose=True)
+    @make_logging_test(graph_breaks=True)
+    def test_latest_bytecode_to_graph_break_fullgraph(self, records):
+        def fn(x):
+            y = x + 1
+            z = x + y
+            torch._dynamo.graph_break()
+            return z
+
+        self.assertExpectedInlineMunged(
+            Unsupported,
+            lambda: torch.compile(fn, backend="eager", fullgraph=True)(torch.randn(3)),
+            """\
+Call to `torch._dynamo.graph_break()`
+  Explanation: User-inserted graph break. Message: None
+  Hint: Remove the `torch._dynamo.graph_break()` call.
+
+  Developer debug context: Called `torch._dynamo.graph_break()` with args `[]`, kwargs `{}`
+
+ For more details about this graph break, please visit: https://meta-pytorch.github.io/compile-graph-break-site/gb/gb0025.html
+
+from user code:
+   File "test_error_messages.py", line N, in fn
+    torch._dynamo.graph_break()
+""",
+        )
+
+    @skipIfOnlyNotPy312
+    @torch._dynamo.config.patch(verbose=True)
+    @make_logging_test(graph_breaks=True)
+    def test_latest_bytecode_to_graph_break_python_versioning(self, records):
+        @torch.compile(backend="eager")
+        def fn(x):
+            y = x + 1
+            z = x + y
+            torch._dynamo.graph_break()
+            return z
+
+        fn(torch.ones(3))
+
+        s = munge_exc(records[0].getMessage(), skip=0)
+
+        self.assertExpectedInline(
+            s,
+            """\
+Graph break in user code at test_error_messages.py:N
+Graph Break Reason: Call to `torch._dynamo.graph_break()`
+  Explanation: User-inserted graph break. Message: None
+  Hint: Remove the `torch._dynamo.graph_break()` call.
+
+  Developer debug context: Called `torch._dynamo.graph_break()` with args `[]`, kwargs `{}`
+
+ For more details about this graph break, please visit: https://meta-pytorch.github.io/compile-graph-break-site/gb/gb0025.html
+User code traceback:
+  File "test_error_messages.py", line N, in test_latest_bytecode_to_graph_break_python_versioning
+    fn(torch.ones(3))
+
+========== most recent `torch.compile` tracing attempt started here ==========
+
+  File "test_error_messages.py", line N, in fn
+    torch._dynamo.graph_break()
+
+NOTE: the most recent `torch.compile` tracing attempt might not be where you applied `torch.compile`! This is due to how graph breaks are implemented - the optimized code object returned by Dynamo will call another Dynamo-generated resume function and tracing is re-enabled by calling the resume function as a normal Python function, which Dynamo intercepts as a top-level frame.
+Most recent bytecode instructions traced (max 20):
+TRACE RESUME 0 []
+TRACE LOAD_FAST 'x' []
+TRACE LOAD_CONST 1 [LazyVariableTracker()]
+TRACE BINARY_OP 0 [LazyVariableTracker(), ConstantVariable(int: 1)]
+TRACE STORE_FAST 'y' [TensorVariable()]
+TRACE LOAD_FAST 'x' []
+TRACE LOAD_FAST 'y' [TensorVariable()]
+TRACE BINARY_OP 0 [TensorVariable(), TensorVariable()]
+TRACE STORE_FAST 'z' [TensorVariable()]
+TRACE LOAD_GLOBAL 'torch' []
+TRACE LOAD_ATTR '_dynamo' [LazyVariableTracker()]
+TRACE LOAD_ATTR 'graph_break' [LazyVariableTracker()]
+TRACE CALL 0 [NullVariable, LazyVariableTracker()]""",
+        )
+
+    @torch._dynamo.config.patch(verbose=True)
+    @make_logging_test(graph_breaks=True)
+    def test_latest_bytecode_to_graph_break(self, records):
+        @torch.compile(backend="eager")
+        def fn(x):
+            y = x + 1
+            z = x + y
+            torch._dynamo.graph_break()
+            return z
+
+        fn(torch.ones(3))
+
+        pattern = r"TRACE.*"
+        s = munge_exc(records[0].getMessage(), skip=0)
+        matches = re.findall(pattern, s)
+        self.assertEqual((len(matches) > 10), True)
+        self.assertEqual((len(matches) <= 20), True)
+        self.assertIn("Most recent bytecode instructions traced (max 20):", s)
+
    @torch._dynamo.config.patch(verbose=True)
    @make_logging_test(graph_breaks=True)
    def test_graph_break_traceback_above_dynamo_shows_user_code(self, records):
--- a/torch/_dynamo/symbolic_convert.py
+++ b/torch/_dynamo/symbolic_convert.py
@ -43,6 +43,7 @@ import threading
 import traceback
 import types
 import weakref
+from collections import deque
 from traceback import StackSummary
 from typing import Any, Callable, cast, NoReturn, Optional, TYPE_CHECKING, Union
 from typing_extensions import TypeAlias, TypeIs
@ -544,6 +545,7 @@ def log_graph_break(
    reason: str = "",
    exc_info: bool = False,
    user_stack: Optional[StackSummary] = None,
+    latest_bytecode_log: Optional[str] = None,
 ) -> None:
    if user_stack is None:
        user_stack = torch._guards.TracingContext.extract_stack()
@ -606,6 +608,10 @@ def log_graph_break(
        # This log line MUST contain the string "Graph break in user code",
        # This log line is exercised from
        #   python test/dynamo/test_exc.py -k test_graph_break_log
+        if latest_bytecode_log and config.verbose:
+            user_stack_trace += "Most recent bytecode instructions traced (max 20):\n"
+            user_stack_trace += latest_bytecode_log
+
        graph_break_log.debug(
            user_stack_trace,
        )
@ -933,6 +939,7 @@ def break_graph_if_unsupported(
                    exc_info=True,
                    reason=str(excp),
                    user_stack=excp.real_stack,
+                    latest_bytecode_log="\n".join(self.latest_bytecode_queue),
                )

                if self.maybe_has_backedge():
@ -1184,6 +1191,8 @@ class InstructionTranslatorBase(
    parent: Optional[InstructionTranslatorBase]
    debug_locals: list[tuple[VariableTracker, list[VariableTracker]]]
    package: Optional[CompilePackage]
+    latest_bytecode_queue: deque[str]
+    # Store the latest bytecode before graph_break() call by user

    def mark_inconsistent_side_effects(self) -> None:
        """
@ -1351,6 +1360,17 @@ class InstructionTranslatorBase(
                "TRACE %s %s %s", inst.opname, inst.argval, self.stack
            )

+        # Store the latest 20 bytecode execution for the process,
+        # Used repr for byte processing and limiting the length to 2048
+        try:
+            stack_repr = repr(self.stack)
+        except ValueError:
+            # Handle large integers that exceed sys.int_info.str_digits_check_threshold
+            stack_repr = "<self.stack repr truncated due to large integer>"
+        self.latest_bytecode_queue.append(
+            f"TRACE {inst.opname} {repr(inst.argval)} {stack_repr}"
+        )
+
        self.update_block_stack(inst)

        try:
@ -4083,6 +4103,7 @@ class InstructionTranslatorBase(
        self.accept_prefix_inst = True
        self.prefix_insts = []
        self.exn_vt_stack = exn_vt_stack
+        self.latest_bytecode_queue = deque(maxlen=20)

        # Properties of the input/output code
        self.instructions: list[Instruction] = instructions
--- a/torch/_dynamo/testing.py
+++ b/torch/_dynamo/testing.py
@ -506,6 +506,12 @@ def skipIfNotPy312(fn: Callable[_P, _T]) -> Callable[_P, _T]:
    return unittest.skip("Requires Python 3.12+")(fn)


+def skipIfOnlyNotPy312(fn: Callable[_P, _T]) -> Callable[_P, _T]:
+    if sys.version_info >= (3, 13) or sys.version_info < (3, 12):
+        return unittest.skip("Requires Python 3.12")(fn)
+    return fn
+
+
 def xfailIfPy312(fn: Callable[_P, _T]) -> Callable[_P, _T]:
    if sys.version_info >= (3, 12):
        return unittest.expectedFailure(fn)
--- a/torch/distributed/device_mesh.py
+++ b/torch/distributed/device_mesh.py
@ -239,7 +239,9 @@ else:
                )
            return not_none(device_mesh.mesh_dim_names.index(mesh_dim_name))

-        def _get_slice_mesh_layout(self, device_mesh, mesh_dim_names) -> _MeshLayout:
+        def _get_slice_mesh_layout(
+            self, device_mesh: "DeviceMesh", mesh_dim_names: tuple[str, ...]
+        ) -> _MeshLayout:
            """
            Validate whether the mesh_dim_names is valid for slicing the given device_mesh.
            If valid, return dim indexes of the slice mesh in the device mesh.
@ -266,7 +268,7 @@ else:
                else {}
            )
            valid_mesh_dim_names = [
-                *device_mesh.mesh_dim_names,
+                *not_none(device_mesh.mesh_dim_names),
                *flatten_name_to_root_layout,
            ]

@ -281,11 +283,17 @@ else:

            layout_sliced = []
            for name in mesh_dim_names:
-                if name in device_mesh.mesh_dim_names:
+                if name in not_none(device_mesh.mesh_dim_names):
                    layout_sliced.append(
-                        device_mesh._layout[device_mesh.mesh_dim_names.index(name)]
+                        device_mesh._layout[
+                            not_none(device_mesh.mesh_dim_names).index(name)
+                        ]
                    )
                elif name in flatten_name_to_root_layout:
+                    warnings.warn(
+                        "Slicing a flattened dim from root mesh will be deprecated in PT 2.11. "
+                        "Users need to bookkeep the flattened mesh directly. "
+                    )
                    layout_sliced.append(flatten_name_to_root_layout[name])

            sliced_sizes = tuple(l.sizes for l in layout_sliced)
Author	SHA1	Message	Date
Xiao Fu	c4d369369f	Add error handling for self.stack when byte exceeding limit	2025-10-10 02:48:22 +00:00
Xiao Fu	a13f24980e	Fix CI on the max length conversion	2025-10-10 02:48:22 +00:00
Xiao Fu	6869487ca4	Fix more byte output	2025-10-10 02:48:21 +00:00
Xiao Fu	5d9105f2ca	Add support for byte in loggin stream	2025-10-10 02:48:21 +00:00
Xiao Fu	191e6bb367	Fix comment and CI again	2025-10-10 02:48:21 +00:00
Xiao Fu	a15a08725b	Add linter	2025-10-10 02:48:21 +00:00
Xiao Fu	756ea14378	Fix linter thank you	2025-10-10 02:48:21 +00:00
Xiao Fu	d7c5ea03df	Fix linter	2025-10-10 02:48:21 +00:00
Xiao Fu	d11e253ee3	Add linter	2025-10-10 02:48:21 +00:00
Xiao Fu	01d5211679	Fix more comment and CI	2025-10-10 02:48:21 +00:00
Xiao Fu	b496a04735	Fix comment and more CI	2025-10-10 02:48:21 +00:00
Xiao Fu	03be8d227b	Fix comment	2025-10-10 02:48:21 +00:00
Xiao Fu	df1b8c3e41	Fix more CI	2025-10-10 02:48:21 +00:00
Xiao Fu	94f39d5749	Fix CI	2025-10-10 02:48:21 +00:00
Xiao Fu	2eb8b70d1b	Fix more comments and the case where verbose is true	2025-10-10 02:48:21 +00:00
Xiao Fu	29680dd928	Fix comments and errors	2025-10-10 02:48:21 +00:00
Xiao Fu	69bcc97937	Add linter	2025-10-10 02:48:21 +00:00
Xiao Fu	babac1d561	Fix bytecode log to graph break with queue initialization with new tx	2025-10-10 02:48:21 +00:00
Xiao Fu	8594b98b0a	Add user called graph break python version specific test	2025-10-10 02:48:21 +00:00
Xiao Fu	b3fc84229e	Add user called graph break test on full graph true mode	2025-10-10 02:48:21 +00:00
Xiao Fu	e409e84a7a	Add fullgraph testing for dynamo	2025-10-10 02:48:21 +00:00
Xiao Fu	9c3742e7a7	Add todo for the logging output of bytecode	2025-10-10 02:48:21 +00:00
Xiao Fu	664a137dbb	Fix comments from github	2025-10-10 02:48:21 +00:00
Xiao Fu	4f5a0deb83	Revert "Update torch/_dynamo/symbolic_convert.py" This reverts commit d3d658ba65c1d627076b79bbdbebfdb9fa0ad37c.	2025-10-10 02:48:21 +00:00
Xiao Fu	4752d8fec9	Revert "Update test/dynamo/test_exc.py" This reverts commit 7996380dc95141bf855a30b5f9b7e2b21c384f88.	2025-10-10 02:48:21 +00:00
Xiao Fu	715f0a26d7	Revert "Update test/dynamo/test_error_messages.py" This reverts commit 1b185d792048e875f48d0a3e0bc67d47a618e5a2.	2025-10-10 02:48:21 +00:00
Xiao	e9e2553603	Update test/dynamo/test_error_messages.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>	2025-10-10 02:48:21 +00:00
Xiao	43fac7f55d	Update test/dynamo/test_exc.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>	2025-10-10 02:48:21 +00:00
Xiao	a875f27482	Update torch/_dynamo/symbolic_convert.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>	2025-10-10 02:48:21 +00:00
Xiao Fu	f34e0a941a	[dynamo] Add most recent bytecode to graph break with developer initiation ghstack-source-id: 8b538f2e1ac703a4538468a758f08db0c89b91a7 Pull Request resolved: https://github.com/pytorch/pytorch/pull/163720 Add most recent bytecode to dynamo graph break called by user Fix other user-initiated graph break and issues Fix linter	2025-10-10 02:48:21 +00:00
atalman	81dbeb06f4	CUDA aarch64 12.6 and 12.8 builds fix triton constraints (#165013 ) Since we have introduced CUDA aarch64 builds for all cuda versions we need to remove this constraint. This was missed by https://github.com/pytorch/pytorch/pull/162364 Proper constraint on triton should be: ``` Requires-Dist: triton==3.5.0; platform_system == "Linux" ``` not: ``` Requires-Dist: triton==3.5.0; platform_system == "Linux" and platform_machine == "x86_64" ``` Pull Request resolved: https://github.com/pytorch/pytorch/pull/165013 Approved by: https://github.com/Camyll, https://github.com/nWEIdia, https://github.com/tinglvv	2025-10-09 00:49:28 +00:00
fduwjj	7a1ead755f	[DeviceMesh] Add a warning for slicing flattened dim from root mesh and types for _get_slice_mesh_layout (#164993 ) As title, we want to add a deprecate warning for slicing flattened dim from root mesh. Also cosmetic changes for adding types for `_get_slice_mesh_layout`. Pull Request resolved: https://github.com/pytorch/pytorch/pull/164993 Approved by: https://github.com/fegin ghstack dependencies: #164750, #164954	2025-10-09 00:47:08 +00:00
Boyuan Feng	90b4e130d6	[Benchmark] cleanup torchbench models (#164816 ) Prune models from TorchInductor dashboard to reduce ci cost. This PR prunes torchbench models according to the [doc](https://docs.google.com/document/d/1nLPNNAU-_M9Clx9FMrJ1ycdPxe-xRA54olPnsFzdpoU/edit?tab=t.0), which removes timm and huggingface models from torchbench. Pull Request resolved: https://github.com/pytorch/pytorch/pull/164816 Approved by: https://github.com/anijain2305, https://github.com/seemethere, https://github.com/huydhn, https://github.com/malfet	2025-10-09 00:31:25 +00:00