[no ci] Turn off TORCHINDUCTOR_GRAPH_PARTITION

Signed-off-by: Huy Do <huydhn@gmail.com>
[no ci] Reinstall detectron2
2025-11-01 22:14:53 +08:00 · 2025-09-25 21:43:47 -07:00 · 2025-09-25 13:01:53 -07:00 · 2025-09-25 11:02:08 -07:00 · 2025-09-24 20:52:25 -07:00 · 2025-09-24 20:51:31 -07:00
5 changed files with 40 additions and 8 deletions
--- a/.ci/pytorch/test.sh
+++ b/.ci/pytorch/test.sh
@ -826,6 +826,24 @@ test_dynamo_benchmark() {
  local shard_id="$1"
  shift

+
+  ### Perf benchmark 2.9 RC4, need to reinstall detectron2 to avoid crashing when importing it
+  pip_uninstall torch torchvision torchaudio torchrec fbgemm-gpu triton pytorch-triton detectron2
+  pip_install torch==2.9.0 torchvision torchaudio --index-url https://download.pytorch.org/whl/test/cu128
+  # Rebuild torchrec and fbgemm because they don't have RC for 2.9 yet
+  if [[ "${TEST_CONFIG}" == *torchbench* ]] && [[ "${TEST_CONFIG}" != *cpu* ]]; then
+    rm -rf dist/torchrec
+    rm -rf dist/fbgemm_gpu
+    install_torchrec_and_fbgemm
+  fi
+  # Same pinned commit as used in TorchBench
+  pip_install git+https://github.com/facebookresearch/detectron2.git@0df2d73d0013db7de629602c23cc120219b4f2b8
+  pip freeze
+
+  # Control TORCHINDUCTOR_GRAPH_PARTITION
+  export TORCHINDUCTOR_GRAPH_PARTITION=0
+
+
  if [[ "${TEST_CONFIG}" == *perf_compare* ]]; then
    test_single_dynamo_benchmark "training" "$suite" "$shard_id" --training --amp "$@"
  elif [[ "${TEST_CONFIG}" == *perf* ]]; then
--- a/.github/workflows/inductor-perf-test-nightly-h100.yml
+++ b/.github/workflows/inductor-perf-test-nightly-h100.yml
@ -100,11 +100,12 @@ jobs:
      cuda-arch-list: '9.0'
      test-matrix: |
        { include: [
-          { config: "inductor_huggingface_perf_cuda_h100", shard: 1, num_shards: 5, runner: "linux.aws.h100" },
-          { config: "inductor_huggingface_perf_cuda_h100", shard: 2, num_shards: 5, runner: "linux.aws.h100" },
-          { config: "inductor_huggingface_perf_cuda_h100", shard: 3, num_shards: 5, runner: "linux.aws.h100" },
-          { config: "inductor_huggingface_perf_cuda_h100", shard: 4, num_shards: 5, runner: "linux.aws.h100" },
-          { config: "inductor_huggingface_perf_cuda_h100", shard: 5, num_shards: 5, runner: "linux.aws.h100" },
+          { config: "inductor_huggingface_perf_cuda_h100", shard: 1, num_shards: 6, runner: "linux.aws.h100" },
+          { config: "inductor_huggingface_perf_cuda_h100", shard: 2, num_shards: 6, runner: "linux.aws.h100" },
+          { config: "inductor_huggingface_perf_cuda_h100", shard: 3, num_shards: 6, runner: "linux.aws.h100" },
+          { config: "inductor_huggingface_perf_cuda_h100", shard: 4, num_shards: 6, runner: "linux.aws.h100" },
+          { config: "inductor_huggingface_perf_cuda_h100", shard: 5, num_shards: 6, runner: "linux.aws.h100" },
+          { config: "inductor_huggingface_perf_cuda_h100", shard: 6, num_shards: 6, runner: "linux.aws.h100" },
          { config: "inductor_timm_perf_cuda_h100", shard: 1, num_shards: 7, runner: "linux.aws.h100" },
          { config: "inductor_timm_perf_cuda_h100", shard: 2, num_shards: 7, runner: "linux.aws.h100" },
          { config: "inductor_timm_perf_cuda_h100", shard: 3, num_shards: 7, runner: "linux.aws.h100" },
--- a/benchmarks/dynamo/common.py
+++ b/benchmarks/dynamo/common.py
@ -3355,7 +3355,7 @@ def parse_args(args=None):
    parser.add_argument(
        "--timeout",
        type=int,
-        default=2000,
+        default=3600,
        help="timeout (second) for benchmarking.",
    )

--- a/benchmarks/dynamo/huggingface.py
+++ b/benchmarks/dynamo/huggingface.py
@ -373,6 +373,10 @@ class HuggingfaceRunner(BenchmarkRunner):
    def skip_models_due_to_control_flow(self):
        return self._skip["control_flow"]

+    @property
+    def skip_not_suitable_for_training_models(self):
+        return self._skip["test"]["training"]
+
    def use_larger_multiplier_for_smaller_tensor(self, name):
        return name in [
            "ElectraForQuestionAnswering",
--- a/benchmarks/dynamo/huggingface.yaml
+++ b/benchmarks/dynamo/huggingface.yaml
@ -9,10 +9,9 @@ skip:
    # Fails with even batch size = 1
    - GPTJForCausalLM
    - GPTJForQuestionAnswering
-    # Model too big
+    # Model too big or the benchmark is taking too long (timeout)
    - google/gemma-3-4b-it
    - openai/gpt-oss-20b
-    - mistralai/Mistral-7B-Instruct-v0.3

  device:
    cpu:
@ -27,6 +26,16 @@ skip:
  control_flow:
    - AllenaiLongformerBase

+  test:
+    training:
+      - meta-llama/Llama-3.2-1B
+      - google/gemma-2-2b
+      - google/gemma-3-4b-it
+      - openai/whisper-tiny
+      - Qwen/Qwen3-0.6B
+      - mistralai/Mistral-7B-Instruct-v0.3
+      - openai/gpt-oss-20b
+
 batch_size:
  # TODO - Fails even after fake tensors
  divisors:
Author	SHA1	Message	Date
Huy Do	34e28963d0	[no ci] Turn off TORCHINDUCTOR_GRAPH_PARTITION Signed-off-by: Huy Do <huydhn@gmail.com>	2025-09-25 21:43:47 -07:00
Huy Do	f0fe616cd5	[no ci] Reinstall detectron2 Signed-off-by: Huy Do <huydhn@gmail.com>	2025-09-25 13:01:53 -07:00
Huy Do	00d3694059	[no ci] Some models are not fit to run on CI Signed-off-by: Huy Do <huydhn@gmail.com>	2025-09-25 11:02:08 -07:00
Huy Do	34e8518bfc	[no ci] Rerun with all LLM models Signed-off-by: Huy Do <huydhn@gmail.com>	2025-09-24 20:52:25 -07:00
Huy Do	f94ebe2599	Merge branch 'main' into prepare-perf-number-2.9 Signed-off-by: Huy Do <huydhn@gmail.com>	2025-09-24 20:51:31 -07:00
Huy Do	e0ba333f4f	[no ci] RC4 Signed-off-by: Huy Do <huydhn@gmail.com>	2025-09-23 23:53:11 -07:00
Huy Do	400d1e9777	Skip LLM training and increase the number of H100 shard for HF Signed-off-by: Huy Do <huydhn@gmail.com>	2025-09-23 04:19:32 -07:00
Huy Do	52d9bd3c93	[no ci] Silly mistake Signed-off-by: Huy Do <huydhn@gmail.com>	2025-09-22 23:03:01 -07:00
Huy Do	aa22b4fa50	2.9 RC3 Signed-off-by: Huy Do <huydhn@gmail.com>	2025-09-22 17:45:51 -07:00
Huy Do	c969b47090	[no ci] Rebuild torchrec and fbgemm Signed-off-by: Huy Do <huydhn@gmail.com>	2025-09-19 22:18:05 -07:00
Huy Do	9218a4716e	Does this work? Signed-off-by: Huy Do <huydhn@gmail.com>	2025-09-19 03:33:40 -07:00
Huy Do	a22515f573	Add a debug Signed-off-by: Huy Do <huydhn@gmail.com>	2025-09-19 03:30:27 -07:00
Huy Do	7e16093692	[no ci ] Merge branch 'main' into prepare-perf-baseline-number-2.8	2025-09-19 03:27:42 -07:00
Huy Do	d4456bde3b	[no ci] Run TorchInductor benchmark on PyTorch 2.8 Signed-off-by: Huy Do <huydhn@gmail.com>	2025-09-18 17:18:13 -07:00