Enable inductor CI for huggingface (#86792)

Summary: Unit tests will be enabled after fixed in trunck. TorchBench and TIMM need
more setup and are coming later.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/86792
Approved by: https://github.com/jansel, https://github.com/huydhn
This commit is contained in:
Bin Bao
2022-10-20 22:37:07 +00:00
committed by PyTorch MergeBot
parent 9ba632253a
commit b1cf377cce
10 changed files with 155 additions and 0 deletions

View File

@ -0,0 +1 @@
ebee0a27940adfbb30444d83387b9ea0f1173f40

1
.github/ci_commit_pins/timm.txt vendored Normal file
View File

@ -0,0 +1 @@
ebee0a27940adfbb30444d83387b9ea0f1173f40

1
.github/ci_commit_pins/torchbench.txt vendored Normal file
View File

@ -0,0 +1 @@
24b95f2f627bf07a61cefed653419389a7586357

5
.github/labeler.yml vendored
View File

@ -7,3 +7,8 @@
"module: inductor":
- torch/_inductor/**
- test/inductor/**
"ciflow/inductor":
- torch/_dynamo/**
- torch/_inductor/**
- benchmarks/dynamo/**

View File

@ -22,6 +22,7 @@ VALID_TEST_CONFIG_LABELS = {f"{PREFIX}{label}" for label in {
"dynamo",
"force_on_cpu",
"functorch",
"inductor",
"jit_legacy",
"multigpu",
"nogpu_AVX512",

36
.github/workflows/inductor.yml vendored Normal file
View File

@ -0,0 +1,36 @@
name: inductor
on:
push:
branches:
- master
tags:
- ciflow/inductor/*
workflow_dispatch:
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
cancel-in-progress: true
jobs:
linux-bionic-cuda11_6-py3_10-gcc7-inductor-build:
name: cuda11.6-py3.10-gcc7-sm86
uses: ./.github/workflows/_linux-build.yml
with:
build-environment: linux-bionic-cuda11.6-py3.10-gcc7-sm86
docker-image-name: pytorch-linux-bionic-cuda11.6-cudnn8-py3-gcc7
cuda-arch-list: 8.6
test-matrix: |
{ include: [
{ config: "inductor", shard: 1, num_shards: 6, runner: "linux.g5.4xlarge.nvidia.gpu" },
{ config: "inductor", shard: 2, num_shards: 6, runner: "linux.g5.4xlarge.nvidia.gpu" },
]}
linux-bionic-cuda11_6-py3_10-gcc7-inductor-test:
name: cuda11.6-py3.10-gcc7-sm86
uses: ./.github/workflows/_linux-test.yml
needs: linux-bionic-cuda11_6-py3_10-gcc7-inductor-build
with:
build-environment: linux-bionic-cuda11.6-py3.10-gcc7-sm86
docker-image: ${{ needs.linux-bionic-cuda11_6-py3_10-gcc7-inductor-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-bionic-cuda11_6-py3_10-gcc7-inductor-build.outputs.test-matrix }}

View File

@ -170,6 +170,33 @@ function test_torch_deploy(){
popd
}
function install_huggingface() {
local commit
commit=$(get_pinned_commit huggingface)
pip_install pandas
pip_install scipy
pip_install "git+https://github.com/huggingface/transformers.git@${commit}#egg=transformers"
}
function install_timm() {
local commit
commit=$(get_pinned_commit timm)
pip_install pandas
pip_install scipy
pip_install "git+https://github.com/rwightman/pytorch-image-models@${commit}"
}
function checkout_install_torchbench() {
local commit
commit=$(get_pinned_commit torchbench)
git clone https://github.com/pytorch/benchmark torchbench
pushd torchbench
git checkout "${commit}"
python install.py
pip_install gym==0.25.2 # workaround issue in 0.26.0
popd
}
function test_functorch() {
python test/run_test.py --functorch --verbose
}

View File

@ -109,6 +109,10 @@ if [[ "$TEST_CONFIG" == *dynamo* ]]; then
export PYTORCH_TEST_WITH_DYNAMO=1
fi
if [[ "$TEST_CONFIG" == *inductor* ]]; then
export PYTORCH_TEST_WITH_INDUCTOR=1
fi
# TODO: this condition is never true, need to fix this.
if [[ -n "$PR_NUMBER" ]] && [[ -z "$CI_MASTER" || "$CI_MASTER" == "false" ]]; then
# skip expensive checks when on PR and CI_MASTER flag is not set
@ -249,6 +253,30 @@ test_dynamo_shard() {
assert_git_not_dirty
}
test_inductor() {
echo "TODO: enable inductor unit tests"
# time python test/run_test.py --core --exclude test_autograd --continue-through-error --verbose
# PYTORCH_TEST_WITH_DYNAMO and PYTORCH_TEST_WITH_INDUCTOR are only needed for PyTorch tests not written with
# using dynamo/inductor. For dynamo/inductor unit tests, specifiying them will trigger an error like
# "Detected two calls to `torchdynamo.optimize(...)` with a different backend compiler arguments."
# PYTORCH_TEST_WITH_DYNAMO=0 PYTORCH_TEST_WITH_INDUCTOR=0 pytest test/inductor
}
test_inductor_huggingface_shard() {
if [[ -z "$NUM_TEST_SHARDS" ]]; then
echo "NUM_TEST_SHARDS must be defined to run a Python test shard"
exit 1
fi
TEST_REPORTS_DIR=/tmp/test-reports
mkdir -p "$TEST_REPORTS_DIR"
python benchmarks/dynamo/huggingface.py --ci --training --accuracy \
--device cuda --inductor --float32 --total-partitions 1 --partition-id "$1" \
--output "$TEST_REPORTS_DIR"/inductor_huggingface_"$1".csv
python benchmarks/dynamo/check_csv.py -f "$TEST_REPORTS_DIR"/inductor_huggingface_"$1".csv
}
test_python_gloo_with_tls() {
source "$(dirname "${BASH_SOURCE[0]}")/run_glootls_test.sh"
assert_git_not_dirty
@ -699,6 +727,17 @@ elif [[ "${TEST_CONFIG}" == *dynamo* && "${SHARD_NUMBER}" == 2 && $NUM_TEST_SHAR
install_filelock
install_triton
test_dynamo_shard 2
elif [[ "${TEST_CONFIG}" == *inductor* && "${SHARD_NUMBER}" == 1 && $NUM_TEST_SHARDS -gt 1 ]]; then
install_torchvision
install_filelock
install_triton
test_inductor
elif [[ "${TEST_CONFIG}" == *inductor* && "${SHARD_NUMBER}" == 2 && $NUM_TEST_SHARDS -gt 1 ]]; then
install_torchvision
install_filelock
install_triton
install_huggingface
test_inductor_huggingface_shard 0
elif [[ "${SHARD_NUMBER}" == 1 && $NUM_TEST_SHARDS -gt 1 ]]; then
test_without_numpy
install_torchvision

View File

@ -0,0 +1,40 @@
import argparse
import sys
import textwrap
import pandas as pd
def check_csv(filename):
"""
Basic accuracy checking.
"""
df = pd.read_csv(filename)
failed = []
for _, row in df.iterrows():
model_name = row["name"]
status = row["accuracy"]
if "pass" not in status:
failed.append(model_name)
print(f"{model_name:34} {status}")
if failed:
print(
textwrap.dedent(
f"""
Error {len(failed)} models failed
{' '.join(failed)}
"""
)
)
sys.exit(1)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--file", "-f", type=str, help="csv file name")
args = parser.parse_args()
check_csv(args.file)

View File

@ -147,6 +147,10 @@ CI_SKIP_INDUCTOR_TRAINING = [
"cait_m36_384", # fp64_OOM
"coat_lite_mini", # time out
"convit_base", # fp64_OOM
"gernet_l", # accuracy
"gluon_xception65",
"lcnet_0500", # accuracy
"levit_128", # levit_128
"rexnet_100", # accuracy
"swin_base_patch4_window7_224",
"twins_pcpvt_base", # time out