diff --git a/.ci/pytorch/common_utils.sh b/.ci/pytorch/common_utils.sh index 22ebe175bb17..ff7546080a05 100644 --- a/.ci/pytorch/common_utils.sh +++ b/.ci/pytorch/common_utils.sh @@ -158,6 +158,19 @@ function install_torchvision() { fi } +function install_torchrec_and_fbgemm() { + local torchrec_commit + torchrec_commit=$(get_pinned_commit torchrec) + local fbgemm_commit + fbgemm_commit=$(get_pinned_commit fbgemm) + pip_uninstall torchrec-nightly + pip_uninstall fbgemm-gpu-nightly + pip_install setuptools-git-versioning scikit-build pyre-extensions + # See https://github.com/pytorch/pytorch/issues/106971 + CUDA_PATH=/usr/local/cuda-12.1 pip_install --no-use-pep517 --user "git+https://github.com/pytorch/FBGEMM.git@${fbgemm_commit}#egg=fbgemm-gpu&subdirectory=fbgemm_gpu" + pip_install --no-use-pep517 --user "git+https://github.com/pytorch/torchrec.git@${torchrec_commit}" +} + function install_numpy_pytorch_interop() { local commit commit=$(get_pinned_commit numpy_pytorch_interop) diff --git a/.ci/pytorch/test.sh b/.ci/pytorch/test.sh index a4955e3f6859..2c549a603bce 100755 --- a/.ci/pytorch/test.sh +++ b/.ci/pytorch/test.sh @@ -1028,6 +1028,11 @@ elif [[ "${TEST_CONFIG}" == *torchbench* ]]; then PYTHONPATH=$(pwd)/torchbench test_inductor_torchbench_smoketest_perf else checkout_install_torchbench + # Do this after checkout_install_torchbench to ensure we clobber any + # nightlies that torchbench may pull in + if [[ "${TEST_CONFIG}" != *cpu_accuracy* ]]; then + install_torchrec_and_fbgemm + fi PYTHONPATH=$(pwd)/torchbench test_dynamo_benchmark torchbench "$id" fi elif [[ "${TEST_CONFIG}" == *inductor* && "${SHARD_NUMBER}" == 1 ]]; then diff --git a/.github/ci_commit_pins/fbgemm.txt b/.github/ci_commit_pins/fbgemm.txt new file mode 100644 index 000000000000..ce0b3c180e83 --- /dev/null +++ b/.github/ci_commit_pins/fbgemm.txt @@ -0,0 +1 @@ +1b2746f642cc2c99fe9d1a0c34359c0de45341c2 diff --git a/.github/ci_commit_pins/torchbench.txt b/.github/ci_commit_pins/torchbench.txt index ca74da86169d..2eb84a5bdef0 100644 --- a/.github/ci_commit_pins/torchbench.txt +++ b/.github/ci_commit_pins/torchbench.txt @@ -1 +1 @@ -8a0f5e3678bef55148743ab987baa3c89f8dfb5e +9371b9e13c826f3930e54346b4d619cb59182f68 diff --git a/.github/ci_commit_pins/torchrec.txt b/.github/ci_commit_pins/torchrec.txt new file mode 100644 index 000000000000..5226bd4cea36 --- /dev/null +++ b/.github/ci_commit_pins/torchrec.txt @@ -0,0 +1 @@ +6cd9fd362514d14ebb9ed51314c62ac1e1e2bbf2 diff --git a/benchmarks/dynamo/ci_expected_accuracy/inductor_torchbench_dynamic_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/inductor_torchbench_dynamic_inference.csv index c8c9f86a6288..2c775b331c7f 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/inductor_torchbench_dynamic_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/inductor_torchbench_dynamic_inference.csv @@ -12,6 +12,7 @@ basic_gnn_sage,pass,0 clip,pass,0 cm3leon_generate,pass,6 dcgan,pass,0 +dlrm,pass,0 doctr_det_predictor,pass,2 doctr_reco_predictor,fail_accuracy,4 drq,pass,0 diff --git a/benchmarks/dynamo/ci_expected_accuracy/inductor_torchbench_dynamic_training.csv b/benchmarks/dynamo/ci_expected_accuracy/inductor_torchbench_dynamic_training.csv index 3f222c10ca86..5a7c19715c55 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/inductor_torchbench_dynamic_training.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/inductor_torchbench_dynamic_training.csv @@ -1,4 +1,5 @@ name,accuracy,graph_breaks +torchrec_dlrm,infra_error,0 BERT_pytorch,pass,8 LearningToPaint,pass,8 Super_SloMo,pass,8 @@ -10,6 +11,7 @@ basic_gnn_gin,pass,8 basic_gnn_sage,pass,8 clip,pass,8 dcgan,pass,8 +dlrm,pass,8 drq,pass,7 fastNLP_Bert,pass,12 functorch_dp_cifar10,pass,8 diff --git a/benchmarks/dynamo/ci_expected_accuracy/inductor_torchbench_training.csv b/benchmarks/dynamo/ci_expected_accuracy/inductor_torchbench_training.csv index fbc1bf9ee168..f894c18cf0dc 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/inductor_torchbench_training.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/inductor_torchbench_training.csv @@ -1,4 +1,5 @@ name,accuracy,graph_breaks +torchrec_dlrm,pass,8 BERT_pytorch,pass,8 LearningToPaint,pass,8 Super_SloMo,pass,8 @@ -10,6 +11,7 @@ basic_gnn_gin,pass,8 basic_gnn_sage,pass,8 clip,pass,8 dcgan,pass,8 +dlrm,pass,8 drq,pass,7 fastNLP_Bert,pass,12 functorch_dp_cifar10,pass,8 diff --git a/benchmarks/dynamo/common.py b/benchmarks/dynamo/common.py index 73e940869964..c00501c0166e 100644 --- a/benchmarks/dynamo/common.py +++ b/benchmarks/dynamo/common.py @@ -95,8 +95,6 @@ CI_SKIP[CI("eager", training=False)] = [ "hf_BigBird", # fail_accuracy # TypeError: pad_center() takes 1 positional argument but 2 were given "tacotron2", - # torchrec_dlrm requires gcc-11, https://github.com/pytorch/benchmark/pull/1427 - "torchrec_dlrm", # Huggingface "DebertaV2ForQuestionAnswering", # OOM ] @@ -170,8 +168,6 @@ CI_SKIP[CI("aot_eager", training=True)] = [ CI_SKIP[CI("inductor", training=False)] = [ # TorchBench "DALLE2_pytorch", # AttributeError: text_encodings - # torchrec_dlrm requires gcc-11, https://github.com/pytorch/benchmark/pull/1427 - "torchrec_dlrm", "demucs", # OOM "detectron2_fasterrcnn_r_101_c4", "detectron2_fasterrcnn_r_101_dc5", @@ -223,8 +219,6 @@ CI_SKIP[CI("inductor", training=False, device="cpu")] = [ "pyhpc_turbulent_kinetic_energy", "resnet50_quantized_qat", # Eager model failed to run(Quantize only works on Float Tensor, got Double) "sage", # does not work with fp32 - # torchrec_dlrm requires gcc-11, https://github.com/pytorch/benchmark/pull/1427 - "torchrec_dlrm", # Huggingface "MBartForConditionalGeneration", # Accuracy https://github.com/pytorch/pytorch/issues/94793 "PLBartForConditionalGeneration", # Accuracy https://github.com/pytorch/pytorch/issues/94794 @@ -239,7 +233,6 @@ CI_SKIP[CI("inductor", training=True)] = [ *CI_SKIP[CI("inductor", training=False)], # TorchBench "Background_Matting", # fp64_OOM - "dlrm", # Fails on CI - unable to repro locally "hf_T5_base", # accuracy "mobilenet_v3_large", # accuracy "resnet50_quantized_qat", # Eager model failed to run @@ -260,7 +253,6 @@ CI_SKIP[CI("aot_eager", training=False, dynamic=True)] = [ *CI_SKIP[CI("aot_eager", training=False)], "vision_maskrcnn", # accuracy failure on boxes, after https://github.com/pytorch/pytorch/issues/101093 # https://github.com/pytorch/pytorch/issues/103760 - "dlrm", "hf_T5_generate", "hf_Bert", # Error: RelaxedUnspecConstraint(L['input_ids'].size()[0]) - inferred constant (4) ] @@ -269,6 +261,7 @@ CI_SKIP[CI("aot_eager", training=True, dynamic=True)] = [ *CI_SKIP[CI("aot_eager", training=True)], *CI_SKIP[CI("aot_eager", training=False, dynamic=True)], "llama", # AssertionError: cannot compute free_symbols of True + "torchrec_dlrm", # RuntimeError: mat1 and mat2 must have the same dtype, but got Float and BFloat16 ] CI_SKIP[CI("inductor", training=False, dynamic=True)] = [ @@ -296,8 +289,6 @@ CI_SKIP_OPTIMIZER = { # TIMM "convmixer_768_32", # accuracy "hrnet_w18", # Stack issue in fx - # TorchBench - "dlrm", # symbolic shapes error # HF "pnasnet5large", # Stack issue in fx "MobileBertForMaskedLM", # Stack issue in fx @@ -311,6 +302,7 @@ CI_SKIP_DYNAMIC_BATCH_ONLY = { # It iterates over the batch, which is dynamic, and dynamo chokes # We should be able to graphbreak there. "doctr_det_predictor", + "dlrm", } diff --git a/benchmarks/dynamo/torchbench.py b/benchmarks/dynamo/torchbench.py index 060175c15a25..8ffecb56afff 100755 --- a/benchmarks/dynamo/torchbench.py +++ b/benchmarks/dynamo/torchbench.py @@ -90,6 +90,7 @@ SKIP_FOR_CPU = { "sam", # timeout "llama_v2_7b_16h", # model is CUDA only "stable_diffusion", # flaky + "torchrec_dlrm", # requires FBGEMM, CUDA only } SKIP_FOR_CUDA = { @@ -228,6 +229,11 @@ FORCE_AMP_FOR_FP16_BF16_MODELS = { "tts_angular", } +# models in canary_models that we should run anyway +CANARY_MODELS = { + "torchrec_dlrm", +} + class TorchBenchmarkRunner(BenchmarkRunner): def __init__(self): @@ -394,9 +400,16 @@ class TorchBenchmarkRunner(BenchmarkRunner): return device, benchmark.name, model, example_inputs, batch_size def iter_model_names(self, args): - from torchbenchmark import _list_model_paths + from torchbenchmark import _list_canary_model_paths, _list_model_paths models = _list_model_paths() + models += [ + f + for f in _list_canary_model_paths() + if os.path.basename(f) in CANARY_MODELS + ] + models.sort() + start, end = self.get_benchmark_indices(len(models)) for index, model_path in enumerate(models): if index < start or index >= end: