mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
Enable inductor CI for huggingface (#86792)
Summary: Unit tests will be enabled after fixed in trunck. TorchBench and TIMM need more setup and are coming later. Pull Request resolved: https://github.com/pytorch/pytorch/pull/86792 Approved by: https://github.com/jansel, https://github.com/huydhn
This commit is contained in:
committed by
PyTorch MergeBot
parent
9ba632253a
commit
b1cf377cce
1
.github/ci_commit_pins/huggingface.txt
vendored
Normal file
1
.github/ci_commit_pins/huggingface.txt
vendored
Normal file
@ -0,0 +1 @@
|
||||
ebee0a27940adfbb30444d83387b9ea0f1173f40
|
1
.github/ci_commit_pins/timm.txt
vendored
Normal file
1
.github/ci_commit_pins/timm.txt
vendored
Normal file
@ -0,0 +1 @@
|
||||
ebee0a27940adfbb30444d83387b9ea0f1173f40
|
1
.github/ci_commit_pins/torchbench.txt
vendored
Normal file
1
.github/ci_commit_pins/torchbench.txt
vendored
Normal file
@ -0,0 +1 @@
|
||||
24b95f2f627bf07a61cefed653419389a7586357
|
5
.github/labeler.yml
vendored
5
.github/labeler.yml
vendored
@ -7,3 +7,8 @@
|
||||
"module: inductor":
|
||||
- torch/_inductor/**
|
||||
- test/inductor/**
|
||||
|
||||
"ciflow/inductor":
|
||||
- torch/_dynamo/**
|
||||
- torch/_inductor/**
|
||||
- benchmarks/dynamo/**
|
||||
|
1
.github/scripts/filter_test_configs.py
vendored
1
.github/scripts/filter_test_configs.py
vendored
@ -22,6 +22,7 @@ VALID_TEST_CONFIG_LABELS = {f"{PREFIX}{label}" for label in {
|
||||
"dynamo",
|
||||
"force_on_cpu",
|
||||
"functorch",
|
||||
"inductor",
|
||||
"jit_legacy",
|
||||
"multigpu",
|
||||
"nogpu_AVX512",
|
||||
|
36
.github/workflows/inductor.yml
vendored
Normal file
36
.github/workflows/inductor.yml
vendored
Normal file
@ -0,0 +1,36 @@
|
||||
name: inductor
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
tags:
|
||||
- ciflow/inductor/*
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
linux-bionic-cuda11_6-py3_10-gcc7-inductor-build:
|
||||
name: cuda11.6-py3.10-gcc7-sm86
|
||||
uses: ./.github/workflows/_linux-build.yml
|
||||
with:
|
||||
build-environment: linux-bionic-cuda11.6-py3.10-gcc7-sm86
|
||||
docker-image-name: pytorch-linux-bionic-cuda11.6-cudnn8-py3-gcc7
|
||||
cuda-arch-list: 8.6
|
||||
test-matrix: |
|
||||
{ include: [
|
||||
{ config: "inductor", shard: 1, num_shards: 6, runner: "linux.g5.4xlarge.nvidia.gpu" },
|
||||
{ config: "inductor", shard: 2, num_shards: 6, runner: "linux.g5.4xlarge.nvidia.gpu" },
|
||||
]}
|
||||
|
||||
linux-bionic-cuda11_6-py3_10-gcc7-inductor-test:
|
||||
name: cuda11.6-py3.10-gcc7-sm86
|
||||
uses: ./.github/workflows/_linux-test.yml
|
||||
needs: linux-bionic-cuda11_6-py3_10-gcc7-inductor-build
|
||||
with:
|
||||
build-environment: linux-bionic-cuda11.6-py3.10-gcc7-sm86
|
||||
docker-image: ${{ needs.linux-bionic-cuda11_6-py3_10-gcc7-inductor-build.outputs.docker-image }}
|
||||
test-matrix: ${{ needs.linux-bionic-cuda11_6-py3_10-gcc7-inductor-build.outputs.test-matrix }}
|
@ -170,6 +170,33 @@ function test_torch_deploy(){
|
||||
popd
|
||||
}
|
||||
|
||||
function install_huggingface() {
|
||||
local commit
|
||||
commit=$(get_pinned_commit huggingface)
|
||||
pip_install pandas
|
||||
pip_install scipy
|
||||
pip_install "git+https://github.com/huggingface/transformers.git@${commit}#egg=transformers"
|
||||
}
|
||||
|
||||
function install_timm() {
|
||||
local commit
|
||||
commit=$(get_pinned_commit timm)
|
||||
pip_install pandas
|
||||
pip_install scipy
|
||||
pip_install "git+https://github.com/rwightman/pytorch-image-models@${commit}"
|
||||
}
|
||||
|
||||
function checkout_install_torchbench() {
|
||||
local commit
|
||||
commit=$(get_pinned_commit torchbench)
|
||||
git clone https://github.com/pytorch/benchmark torchbench
|
||||
pushd torchbench
|
||||
git checkout "${commit}"
|
||||
python install.py
|
||||
pip_install gym==0.25.2 # workaround issue in 0.26.0
|
||||
popd
|
||||
}
|
||||
|
||||
function test_functorch() {
|
||||
python test/run_test.py --functorch --verbose
|
||||
}
|
||||
|
@ -109,6 +109,10 @@ if [[ "$TEST_CONFIG" == *dynamo* ]]; then
|
||||
export PYTORCH_TEST_WITH_DYNAMO=1
|
||||
fi
|
||||
|
||||
if [[ "$TEST_CONFIG" == *inductor* ]]; then
|
||||
export PYTORCH_TEST_WITH_INDUCTOR=1
|
||||
fi
|
||||
|
||||
# TODO: this condition is never true, need to fix this.
|
||||
if [[ -n "$PR_NUMBER" ]] && [[ -z "$CI_MASTER" || "$CI_MASTER" == "false" ]]; then
|
||||
# skip expensive checks when on PR and CI_MASTER flag is not set
|
||||
@ -249,6 +253,30 @@ test_dynamo_shard() {
|
||||
assert_git_not_dirty
|
||||
}
|
||||
|
||||
|
||||
test_inductor() {
|
||||
echo "TODO: enable inductor unit tests"
|
||||
# time python test/run_test.py --core --exclude test_autograd --continue-through-error --verbose
|
||||
|
||||
# PYTORCH_TEST_WITH_DYNAMO and PYTORCH_TEST_WITH_INDUCTOR are only needed for PyTorch tests not written with
|
||||
# using dynamo/inductor. For dynamo/inductor unit tests, specifiying them will trigger an error like
|
||||
# "Detected two calls to `torchdynamo.optimize(...)` with a different backend compiler arguments."
|
||||
# PYTORCH_TEST_WITH_DYNAMO=0 PYTORCH_TEST_WITH_INDUCTOR=0 pytest test/inductor
|
||||
}
|
||||
|
||||
test_inductor_huggingface_shard() {
|
||||
if [[ -z "$NUM_TEST_SHARDS" ]]; then
|
||||
echo "NUM_TEST_SHARDS must be defined to run a Python test shard"
|
||||
exit 1
|
||||
fi
|
||||
TEST_REPORTS_DIR=/tmp/test-reports
|
||||
mkdir -p "$TEST_REPORTS_DIR"
|
||||
python benchmarks/dynamo/huggingface.py --ci --training --accuracy \
|
||||
--device cuda --inductor --float32 --total-partitions 1 --partition-id "$1" \
|
||||
--output "$TEST_REPORTS_DIR"/inductor_huggingface_"$1".csv
|
||||
python benchmarks/dynamo/check_csv.py -f "$TEST_REPORTS_DIR"/inductor_huggingface_"$1".csv
|
||||
}
|
||||
|
||||
test_python_gloo_with_tls() {
|
||||
source "$(dirname "${BASH_SOURCE[0]}")/run_glootls_test.sh"
|
||||
assert_git_not_dirty
|
||||
@ -699,6 +727,17 @@ elif [[ "${TEST_CONFIG}" == *dynamo* && "${SHARD_NUMBER}" == 2 && $NUM_TEST_SHAR
|
||||
install_filelock
|
||||
install_triton
|
||||
test_dynamo_shard 2
|
||||
elif [[ "${TEST_CONFIG}" == *inductor* && "${SHARD_NUMBER}" == 1 && $NUM_TEST_SHARDS -gt 1 ]]; then
|
||||
install_torchvision
|
||||
install_filelock
|
||||
install_triton
|
||||
test_inductor
|
||||
elif [[ "${TEST_CONFIG}" == *inductor* && "${SHARD_NUMBER}" == 2 && $NUM_TEST_SHARDS -gt 1 ]]; then
|
||||
install_torchvision
|
||||
install_filelock
|
||||
install_triton
|
||||
install_huggingface
|
||||
test_inductor_huggingface_shard 0
|
||||
elif [[ "${SHARD_NUMBER}" == 1 && $NUM_TEST_SHARDS -gt 1 ]]; then
|
||||
test_without_numpy
|
||||
install_torchvision
|
||||
|
40
benchmarks/dynamo/check_csv.py
Normal file
40
benchmarks/dynamo/check_csv.py
Normal file
@ -0,0 +1,40 @@
|
||||
import argparse
|
||||
import sys
|
||||
import textwrap
|
||||
|
||||
import pandas as pd
|
||||
|
||||
|
||||
def check_csv(filename):
|
||||
"""
|
||||
Basic accuracy checking.
|
||||
"""
|
||||
|
||||
df = pd.read_csv(filename)
|
||||
|
||||
failed = []
|
||||
for _, row in df.iterrows():
|
||||
model_name = row["name"]
|
||||
status = row["accuracy"]
|
||||
if "pass" not in status:
|
||||
failed.append(model_name)
|
||||
|
||||
print(f"{model_name:34} {status}")
|
||||
|
||||
if failed:
|
||||
print(
|
||||
textwrap.dedent(
|
||||
f"""
|
||||
Error {len(failed)} models failed
|
||||
{' '.join(failed)}
|
||||
"""
|
||||
)
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--file", "-f", type=str, help="csv file name")
|
||||
args = parser.parse_args()
|
||||
check_csv(args.file)
|
@ -147,6 +147,10 @@ CI_SKIP_INDUCTOR_TRAINING = [
|
||||
"cait_m36_384", # fp64_OOM
|
||||
"coat_lite_mini", # time out
|
||||
"convit_base", # fp64_OOM
|
||||
"gernet_l", # accuracy
|
||||
"gluon_xception65",
|
||||
"lcnet_0500", # accuracy
|
||||
"levit_128", # levit_128
|
||||
"rexnet_100", # accuracy
|
||||
"swin_base_patch4_window7_224",
|
||||
"twins_pcpvt_base", # time out
|
||||
|
Reference in New Issue
Block a user