[inductor] Control the cpp_wrapper mode with an env variable (#116615)

Summary: also add one model test for the cpp_wrapper mode on CI

Pull Request resolved: https://github.com/pytorch/pytorch/pull/116615
Approved by: https://github.com/angelayi
This commit is contained in:
Bin Bao
2024-01-02 08:08:55 -08:00
committed by PyTorch MergeBot
parent 295bdaafb7
commit 640d46f823
3 changed files with 10 additions and 7 deletions

View File

@ -390,8 +390,8 @@ test_perf_for_dashboard() {
--output "$TEST_REPORTS_DIR/${backend}_dynamic_${suite}_${dtype}_${mode}_cuda_${target}.csv"
fi
if [[ "$DASHBOARD_TAG" == *cppwrapper-true* ]] && [[ "$mode" == "inference" ]]; then
python "benchmarks/dynamo/$suite.py" \
"${target_flag[@]}" --"$mode" --"$dtype" --backend "$backend" --disable-cudagraphs --cpp-wrapper "$@" \
TORCHINDUCTOR_CPP_WRAPPER=1 python "benchmarks/dynamo/$suite.py" \
"${target_flag[@]}" --"$mode" --"$dtype" --backend "$backend" --disable-cudagraphs "$@" \
--output "$TEST_REPORTS_DIR/${backend}_cpp_wrapper_${suite}_${dtype}_${mode}_cuda_${target}.csv"
fi
if [[ "$DASHBOARD_TAG" == *freezing_cudagraphs-true* ]] && [[ "$mode" == "inference" ]]; then
@ -492,6 +492,13 @@ test_inductor_torchbench_smoketest_perf() {
TEST_REPORTS_DIR=$(pwd)/test/test-reports
mkdir -p "$TEST_REPORTS_DIR"
# smoke test the cpp_wrapper mode
TORCHINDUCTOR_CPP_WRAPPER=1 python benchmarks/dynamo/torchbench.py --device cuda --accuracy --bfloat16 \
--inference --inductor --only hf_T5 --output "$TEST_REPORTS_DIR/inductor_cpp_wrapper_smoketest.csv"
python benchmarks/dynamo/check_accuracy.py \
--actual "$TEST_REPORTS_DIR/inductor_cpp_wrapper_smoketest.csv" \
--expected "benchmarks/dynamo/ci_expected_accuracy/inductor_torchbench_inference.csv"
python benchmarks/dynamo/torchbench.py --device cuda --performance --backend inductor --float16 --training \
--batch-size-file "$(realpath benchmarks/dynamo/torchbench_models_list.txt)" --only hf_Bert \
--output "$TEST_REPORTS_DIR/inductor_training_smoketest.csv"

View File

@ -2884,9 +2884,6 @@ def parse_args(args=None):
"--batch-size-file", type=str, help="String to load batch size from"
)
parser.add_argument("--cosine", action="store_true", help="use cosine similarity")
parser.add_argument(
"--cpp-wrapper", action="store_true", help="turn on cpp/cuda wrapper codegen"
)
parser.add_argument(
"--freezing", action="store_true", help="turn on freezing", default=False
)
@ -3659,7 +3656,6 @@ def run(runner, args, original_dir=None):
)
inductor_config.split_reductions = not args.disable_split_reductions
inductor_config.triton.divisible_by_16 = not args.disable_divisible_by_16
inductor_config.cpp_wrapper = args.cpp_wrapper
if args.inference:
inductor_config.freezing = args.freezing

View File

@ -25,7 +25,7 @@ verbose_progress = False
fx_graph_cache = os.environ.get("TORCHINDUCTOR_FX_GRAPH_CACHE") == "1"
# use cpp wrapper instead of python wrapper
cpp_wrapper = False
cpp_wrapper = os.environ.get("TORCHINDUCTOR_CPP_WRAPPER", "0") == "1"
# dead code elimination
dce = False