Update base for Update on "[WIP] Add a dedicated registration API to support torch.compile-based aten implemantion"

This PR is a follow-up of RFC https://github.com/pytorch/pytorch/issues/115545.

In this PR, we are trying to provide a registration mode to implement a single aten operation on the top of `torch.compile` and then register to aten. 

By now, the Python-based aten kernel implementation assumes the hermetic Python object. For `torch.compile`-based aten kernel implementation, the assumption will be broken. Because 

> While HermeticPyObject was enabled, we attempted to create a tensor subclass with __torch_dispatch__.  This violates the invariant that operations in HermeticPyObject have equivalent C++ implementations.




cc voznesenskym penguinwu jgong5 Guobing-Chen XiaobingSuper zhuhaozhe blzheng wenzhe-nrv jiayisunx peterbell10 ipiszy yf225 chenyang78 kadeng muchulee8 aakhundov ColinPeppler

[ghstack-poisoned]
This commit is contained in:
Wang, Eikan
2024-03-06 06:04:37 +00:00
7946 changed files with 82979 additions and 43546 deletions

View File

@ -204,7 +204,7 @@ case "$image" in
PROTOBUF=yes
DB=yes
VISION=yes
ROCM_VERSION=5.6
ROCM_VERSION=5.7
NINJA_VERSION=1.9.0
CONDA_CMAKE=yes
TRITON=yes
@ -215,7 +215,7 @@ case "$image" in
PROTOBUF=yes
DB=yes
VISION=yes
ROCM_VERSION=5.7
ROCM_VERSION=6.0
NINJA_VERSION=1.9.0
CONDA_CMAKE=yes
TRITON=yes
@ -277,6 +277,7 @@ case "$image" in
CONDA_CMAKE=yes
TRITON=yes
DOCS=yes
UNINSTALL_DILL=yes
;;
pytorch-linux-jammy-py3-clang12-executorch)
ANACONDA_PYTHON_VERSION=3.10
@ -349,7 +350,7 @@ if [[ "$image" == *cuda* && ${OS} == "ubuntu" ]]; then
fi
# Build image
docker build \
DOCKER_BUILDKIT=1 docker build \
--no-cache \
--progress=plain \
--build-arg "BUILD_ENVIRONMENT=${image}" \

View File

@ -1 +1 @@
58a82f5e72a9ec0263a59d5f5d36a6769d12e230
566528fd7bf00badb72d2d9966ba6e301674217d

View File

@ -1 +1 @@
6c26faa159b79a42d7fa46cb66e2d21523351987
243e186efbf7fb93328dd6b34927a4e8c8f24395

View File

@ -1 +1 @@
dafe1459823b9549417ed95e9720f1b594fab329
0a22a91d04c2b4a029a69a198eac390089c3e891

View File

@ -1 +1 @@
e28a256d71f3cf2bcc7b69d6bda73a9b855e385e
901819d2b67bcb4543aa2645b729a9ff8ec32661

View File

@ -153,7 +153,7 @@ wget https://ossci-linux.s3.amazonaws.com/valgrind-${VALGRIND_VERSION}.tar.bz2
tar -xjf valgrind-${VALGRIND_VERSION}.tar.bz2
cd valgrind-${VALGRIND_VERSION}
./configure --prefix=/usr/local
make -j6
make -j$[$(nproc) - 2]
sudo make install
cd ../../
rm -rf valgrind_build

View File

@ -52,9 +52,10 @@ if [ -n "$ANACONDA_PYTHON_VERSION" ]; then
# Install PyTorch conda deps, as per https://github.com/pytorch/pytorch README
CONDA_COMMON_DEPS="astunparse pyyaml mkl=2021.4.0 mkl-include=2021.4.0 setuptools"
if [ "$ANACONDA_PYTHON_VERSION" = "3.11" ]; then
conda_install numpy=1.23.5 ${CONDA_COMMON_DEPS}
if [ "$ANACONDA_PYTHON_VERSION" = "3.11" ] || [ "$ANACONDA_PYTHON_VERSION" = "3.12" ]; then
conda_install numpy=1.26.0 ${CONDA_COMMON_DEPS}
else
conda_install numpy=1.21.2 ${CONDA_COMMON_DEPS}
fi

View File

@ -29,10 +29,11 @@ pip_install \
transformers==4.36.2
pip_install coloredlogs packaging
retry pip_install -i https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple/ --no-cache-dir --no-input ort-nightly==1.17.0.dev20231005006
pip_install -i https://test.pypi.org/simple/ onnx==1.15.0rc2
pip_install onnxscript==0.1.0.dev20240117 --no-deps
pip_install onnxruntime==1.17.0
pip_install onnx==1.15.0
# pip_install "onnxscript@git+https://github.com/microsoft/onnxscript@1d6362db06706c13447e590ecf5ac3238efc1880" --no-deps
pip_install onnxscript==0.1.0.dev20240216 --no-deps
# Cache the transformers model to be used later by ONNX tests. We need to run the transformers
# package to download the model. By default, the model is cached at ~/.cache/huggingface/hub/

View File

@ -9,7 +9,8 @@ tar xf "${OPENSSL}.tar.gz"
cd "${OPENSSL}"
./config --prefix=/opt/openssl -d '-Wl,--enable-new-dtags,-rpath,$(LIBRPATH)'
# NOTE: openssl install errors out when built with the -j option
make -j6; make install_sw
NPROC=$[$(nproc) - 2]
make -j${NPROC}; make install_sw
# Link the ssl libraries to the /usr/lib folder.
sudo ln -s /opt/openssl/lib/lib* /usr/lib
cd ..

View File

@ -2,55 +2,17 @@
set -ex
# This function installs protobuf 3.17
install_protobuf_317() {
pb_dir="/usr/temp_pb_install_dir"
mkdir -p $pb_dir
pb_dir="/usr/temp_pb_install_dir"
mkdir -p $pb_dir
# On the nvidia/cuda:9-cudnn7-devel-centos7 image we need this symlink or
# else it will fail with
# g++: error: ./../lib64/crti.o: No such file or directory
ln -s /usr/lib64 "$pb_dir/lib64"
# On the nvidia/cuda:9-cudnn7-devel-centos7 image we need this symlink or
# else it will fail with
# g++: error: ./../lib64/crti.o: No such file or directory
ln -s /usr/lib64 "$pb_dir/lib64"
curl -LO "https://github.com/protocolbuffers/protobuf/releases/download/v3.17.3/protobuf-all-3.17.3.tar.gz" --retry 3
tar -xvz -C "$pb_dir" --strip-components 1 -f protobuf-all-3.17.3.tar.gz
# -j6 to balance memory usage and speed.
# naked `-j` seems to use too much memory.
pushd "$pb_dir" && ./configure && make -j6 && make -j6 check && sudo make -j6 install && sudo ldconfig
popd
rm -rf $pb_dir
}
install_ubuntu() {
# Ubuntu 14.04 has cmake 2.8.12 as the default option, so we will
# install cmake3 here and use cmake3.
apt-get update
if [[ "$UBUNTU_VERSION" == 14.04 ]]; then
apt-get install -y --no-install-recommends cmake3
fi
# Cleanup
apt-get autoclean && apt-get clean
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
install_protobuf_317
}
install_centos() {
install_protobuf_317
}
# Install base packages depending on the base OS
ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
case "$ID" in
ubuntu)
install_ubuntu
;;
centos)
install_centos
;;
*)
echo "Unable to determine OS..."
exit 1
;;
esac
curl -LO "https://github.com/protocolbuffers/protobuf/releases/download/v3.17.3/protobuf-all-3.17.3.tar.gz" --retry 3
tar -xvz -C "$pb_dir" --strip-components 1 -f protobuf-all-3.17.3.tar.gz
NPROC=$[$(nproc) - 2]
pushd "$pb_dir" && ./configure && make -j${NPROC} && make -j${NPROC} check && sudo make -j${NRPOC} install && sudo ldconfig
popd
rm -rf $pb_dir

View File

@ -80,6 +80,14 @@ install_ubuntu() {
fi
fi
# ROCm 6.0 had a regression where journal_mode was enabled on the kdb files resulting in permission errors at runtime
if [[ $(ver $ROCM_VERSION) -ge $(ver 6.0) ]]; then
for kdb in /opt/rocm/share/miopen/db/*.kdb
do
sqlite3 $kdb "PRAGMA journal_mode=off; PRAGMA VACUUM;"
done
fi
# Cleanup
apt-get autoclean && apt-get clean
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
@ -151,6 +159,14 @@ install_centos() {
fi
fi
# ROCm 6.0 had a regression where journal_mode was enabled on the kdb files resulting in permission errors at runtime
if [[ $(ver $ROCM_VERSION) -ge $(ver 6.0) ]]; then
for kdb in /opt/rocm/share/miopen/db/*.kdb
do
sqlite3 $kdb "PRAGMA journal_mode=off; PRAGMA VACUUM;"
done
fi
# Cleanup
yum clean all
rm -rf /var/cache/yum

View File

@ -7,7 +7,7 @@ git clone https://bitbucket.org/icl/magma.git
pushd magma
# Version 2.7.2 + ROCm related updates
git checkout 823531632140d0edcb7e77c3edc0e837421471c5
git checkout a1625ff4d9bc362906bd01f805dbbe12612953f6
cp make.inc-examples/make.inc.hip-gcc-mkl make.inc
echo 'LIBDIR += -L$(MKLROOT)/lib' >> make.inc

View File

@ -64,5 +64,6 @@ if [ -n "${CONDA_CMAKE}" ]; then
# latest numpy version, which fails ASAN tests with the following import error: Numba
# needs NumPy 1.20 or less.
conda_reinstall cmake="${CMAKE_VERSION}"
conda_reinstall numpy="${NUMPY_VERSION}"
# Note that we install numpy with pip as conda might not have the version we want
pip_install --force-reinstall numpy=="${NUMPY_VERSION}"
fi

View File

@ -36,7 +36,12 @@ function install_ucc() {
git submodule update --init --recursive
./autogen.sh
./configure --prefix=$UCC_HOME --with-ucx=$UCX_HOME --with-cuda=$with_cuda
# We only run distributed tests on Tesla M60 and A10G
NVCC_GENCODE="-gencode=arch=compute_52,code=sm_52 -gencode=arch=compute_86,code=compute_86"
./configure --prefix=$UCC_HOME \
--with-ucx=$UCX_HOME \
--with-cuda=$with_cuda \
--with-nvcc-gencode="${NVCC_GENCODE}"
time make -j
sudo make install

View File

@ -15,7 +15,7 @@ click
#Pinned versions:
#test that import:
coremltools==5.0b5
coremltools==5.0b5 ; python_version < "3.12"
#Description: Apple framework for ML integration
#Pinned versions: 5.0b5
#test that import:
@ -25,6 +25,11 @@ coremltools==5.0b5
#Pinned versions:
#test that import:
dill==0.3.7
#Description: dill extends pickle with serializing and de-serializing for most built-ins
#Pinned versions: 0.3.7
#test that import: dynamo/test_replay_record.py test_dataloader.py test_datapipe.py test_serialization.py
expecttest==0.1.6
#Description: method for writing tests where test framework auto populates
# the expected output based on previous runs
@ -47,6 +52,11 @@ junitparser==2.1.1
#Pinned versions: 2.1.1
#test that import:
lark==0.12.0
#Description: parser
#Pinned versions: 0.12.0
#test that import:
librosa>=0.6.2 ; python_version < "3.11"
#Description: A python package for music and audio analysis
#Pinned versions: >=0.6.2
@ -66,7 +76,7 @@ librosa>=0.6.2 ; python_version < "3.11"
#Description: A testing library that allows you to replace parts of your
#system under test with mock objects
#Pinned versions:
#test that import: test_module_init.py, test_modules.py, test_nn.py,
#test that import: test_modules.py, test_nn.py,
#test_testing.py
#MonkeyType # breaks pytorch-xla-linux-bionic-py3.7-clang8
@ -75,10 +85,10 @@ librosa>=0.6.2 ; python_version < "3.11"
#Pinned versions:
#test that import:
mypy==1.7.0
mypy==1.8.0
# Pin MyPy version because new errors are likely to appear with each release
#Description: linter
#Pinned versions: 1.7.0
#Pinned versions: 1.8.0
#test that import: test_typing.py, test_type_hints.py
networkx==2.8.8
@ -137,9 +147,9 @@ optree==0.9.1
#test_pointwise_ops.py, test_dtensor_ops.py, test_torchinductor.py, test_fx.py,
#test_fake_tensor.py, test_mps.py
pillow==10.0.1
pillow==10.2.0
#Description: Python Imaging Library fork
#Pinned versions: 10.0.1
#Pinned versions: 10.2.0
#test that import:
protobuf==3.20.2
@ -162,11 +172,6 @@ pytest-xdist==3.3.1
#Pinned versions:
#test that import:
pytest-shard==0.1.2
#Description: plugin spliting up tests in pytest
#Pinned versions:
#test that import:
pytest-flakefinder==1.1.0
#Description: plugin for rerunning tests a fixed number of times in pytest
#Pinned versions: 1.1.0
@ -268,14 +273,14 @@ rockset==1.0.3
#Pinned versions: 1.0.3
#test that import:
ghstack==0.7.1
ghstack==0.8.0
#Description: ghstack tool
#Pinned versions: 0.7.1
#Pinned versions: 0.8.0
#test that import:
jinja2==3.1.2
jinja2==3.1.3
#Description: jinja2 template engine
#Pinned versions: 3.1.2
#Pinned versions: 3.1.3
#test that import:
pytest-cpp==2.3.0
@ -293,7 +298,8 @@ tensorboard==2.13.0
#Pinned versions:
#test that import: test_tensorboard
pywavelets==1.4.1
pywavelets==1.4.1 ; python_version < "3.12"
pywavelets==1.5.0 ; python_version >= "3.12"
#Description: This is a requirement of scikit-image, we need to pin
# it here because 1.5.0 conflicts with numpy 1.21.2 used in CI
#Pinned versions: 1.4.1

View File

@ -1 +1 @@
2.2.0
3.0.0

View File

@ -37,6 +37,7 @@ COPY requirements-ci.txt requirements-docs.txt /opt/conda/
COPY ./common/install_conda.sh install_conda.sh
COPY ./common/common_utils.sh common_utils.sh
RUN bash ./install_conda.sh && rm install_conda.sh common_utils.sh /opt/conda/requirements-ci.txt /opt/conda/requirements-docs.txt
RUN if [ -n "${UNINSTALL_DILL}" ]; then pip uninstall -y dill; fi
# Install gcc
ARG GCC_VERSION

View File

@ -82,6 +82,13 @@ if ! which conda; then
fi
else
export CMAKE_PREFIX_PATH=/opt/conda
# Workaround required for MKL library linkage
# https://github.com/pytorch/pytorch/issues/119557
if [ "$ANACONDA_PYTHON_VERSION" = "3.12" ]; then
export CMAKE_LIBRARY_PATH="/opt/conda/envs/py_$ANACONDA_PYTHON_VERSION/lib/"
export CMAKE_INCLUDE_PATH="/opt/conda/envs/py_$ANACONDA_PYTHON_VERSION/include/"
fi
fi
if [[ "$BUILD_ENVIRONMENT" == *libtorch* ]]; then

View File

@ -9,7 +9,7 @@ sysctl -a | grep machdep.cpu
# These are required for both the build job and the test job.
# In the latter to test cpp extensions.
export MACOSX_DEPLOYMENT_TARGET=11.0
export MACOSX_DEPLOYMENT_TARGET=11.1
export CXX=clang++
export CC=clang

View File

@ -34,7 +34,6 @@ time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/test
# functional collective tests
time python test/run_test.py --verbose -i distributed/test_functional_api
# DTensor tests
time python test/run_test.py --verbose -i distributed/_tensor/test_random_ops
time python test/run_test.py --verbose -i distributed/_tensor/test_dtensor_compile
@ -49,6 +48,7 @@ time python test/run_test.py --verbose -i distributed/tensor/parallel/test_tp_ex
# Other tests
time python test/run_test.py --verbose -i test_cuda_primary_ctx
time python test/run_test.py --verbose -i test_optim -- -k optimizers_with_varying_tensors
time python test/run_test.py --verbose -i test_optim -- -k test_forloop_goes_right_direction_multigpu
time python test/run_test.py --verbose -i test_optim -- -k test_mixed_device_dtype
time python test/run_test.py --verbose -i test_foreach -- -k test_tensors_grouping
assert_git_not_dirty

View File

@ -130,6 +130,8 @@ if [[ "$BUILD_ENVIRONMENT" == *cuda* || "$BUILD_ENVIRONMENT" == *rocm* ]]; then
export PYTORCH_TESTING_DEVICE_ONLY_FOR="cuda"
elif [[ "$BUILD_ENVIRONMENT" == *xpu* ]]; then
export PYTORCH_TESTING_DEVICE_ONLY_FOR="xpu"
# setting PYTHON_TEST_EXTRA_OPTION
export PYTHON_TEST_EXTRA_OPTION="--xpu"
fi
if [[ "$TEST_CONFIG" == *crossref* ]]; then
@ -137,6 +139,8 @@ if [[ "$TEST_CONFIG" == *crossref* ]]; then
fi
if [[ "$BUILD_ENVIRONMENT" == *rocm* ]]; then
# regression in ROCm 6.0 on MI50 CI runners due to hipblaslt; remove in 6.1
export VALGRIND=OFF
# Print GPU info
rocminfo
rocminfo | grep -E 'Name:.*\sgfx|Marketing'
@ -250,14 +254,14 @@ test_python_shard() {
# Bare --include flag is not supported and quoting for lint ends up with flag not being interpreted correctly
# shellcheck disable=SC2086
time python test/run_test.py --exclude-jit-executor --exclude-distributed-tests $INCLUDE_CLAUSE --shard "$1" "$NUM_TEST_SHARDS" --verbose
time python test/run_test.py --exclude-jit-executor --exclude-distributed-tests $INCLUDE_CLAUSE --shard "$1" "$NUM_TEST_SHARDS" --verbose $PYTHON_TEST_EXTRA_OPTION
assert_git_not_dirty
}
test_python() {
# shellcheck disable=SC2086
time python test/run_test.py --exclude-jit-executor --exclude-distributed-tests $INCLUDE_CLAUSE --verbose
time python test/run_test.py --exclude-jit-executor --exclude-distributed-tests $INCLUDE_CLAUSE --verbose $PYTHON_TEST_EXTRA_OPTION
assert_git_not_dirty
}
@ -274,6 +278,7 @@ test_dynamo_shard() {
--exclude-inductor-tests \
--exclude-jit-executor \
--exclude-distributed-tests \
--exclude-torch-export-tests \
--shard "$1" "$NUM_TEST_SHARDS" \
--verbose
assert_git_not_dirty
@ -285,8 +290,16 @@ test_inductor_distributed() {
pytest test/inductor/test_torchinductor.py -k test_multi_gpu
pytest test/inductor/test_aot_inductor.py -k test_non_default_cuda_device
pytest test/inductor/test_aot_inductor.py -k test_replicate_on_devices
pytest test/distributed/test_c10d_functional_native.py
pytest test/distributed/_tensor/test_dtensor_compile.py
pytest test/distributed/tensor/parallel/test_fsdp_2d_parallel.py
pytest test/distributed/_composable/fsdp/test_fully_shard_comm.py
pytest test/distributed/_composable/fsdp/test_fully_shard_training.py -k test_train_parity_multi_group
pytest test/distributed/_composable/fsdp/test_fully_shard_training.py -k test_train_parity_with_activation_checkpointing
pytest test/distributed/_composable/fsdp/test_fully_shard_training.py -k test_train_parity_2d_mlp
pytest test/distributed/_composable/fsdp/test_fully_shard_frozen.py
pytest test/distributed/_composable/fsdp/test_fully_shard_mixed_precision.py -k test_compute_dtype
pytest test/distributed/_composable/fsdp/test_fully_shard_mixed_precision.py -k test_reduce_dtype
# this runs on both single-gpu and multi-gpu instance. It should be smart about skipping tests that aren't supported
# with if required # gpus aren't available
@ -400,7 +413,7 @@ test_perf_for_dashboard() {
--output "$TEST_REPORTS_DIR/${backend}_with_cudagraphs_freezing_autotune_${suite}_${dtype}_${mode}_cuda_${target}.csv"
fi
if [[ "$DASHBOARD_TAG" == *aotinductor-true* ]] && [[ "$mode" == "inference" ]]; then
python "benchmarks/dynamo/$suite.py" \
TORCHINDUCTOR_ABI_COMPATIBLE=1 python "benchmarks/dynamo/$suite.py" \
"${target_flag[@]}" --"$mode" --"$dtype" --export-aot-inductor --disable-cudagraphs "$@" \
--output "$TEST_REPORTS_DIR/${backend}_aot_inductor_${suite}_${dtype}_${mode}_cuda_${target}.csv"
fi
@ -444,6 +457,11 @@ test_single_dynamo_benchmark() {
test_perf_for_dashboard "$suite" \
"${DYNAMO_BENCHMARK_FLAGS[@]}" "$@" "${partition_flags[@]}"
else
if [[ "${TEST_CONFIG}" == *aot_inductor* ]]; then
# Test AOTInductor with the ABI-compatible mode on CI
# This can be removed once the ABI-compatible mode becomes default.
export TORCHINDUCTOR_ABI_COMPATIBLE=1
fi
python "benchmarks/dynamo/$suite.py" \
--ci --accuracy --timing --explain \
"${DYNAMO_BENCHMARK_FLAGS[@]}" \
@ -500,7 +518,7 @@ test_inductor_torchbench_smoketest_perf() {
# The threshold value needs to be actively maintained to make this check useful
python benchmarks/dynamo/check_perf_csv.py -f "$TEST_REPORTS_DIR/inductor_training_smoketest.csv" -t 1.4
python benchmarks/dynamo/torchbench.py --device cuda --performance --bfloat16 --inference \
TORCHINDUCTOR_ABI_COMPATIBLE=1 python benchmarks/dynamo/torchbench.py --device cuda --performance --bfloat16 --inference \
--export-aot-inductor --only nanogpt --output "$TEST_REPORTS_DIR/inductor_inference_smoketest.csv"
# The threshold value needs to be actively maintained to make this check useful
# The perf number of nanogpt seems not very stable, e.g.
@ -521,6 +539,50 @@ test_inductor_torchbench_smoketest_perf() {
done
}
test_inductor_torchbench_cpu_smoketest_perf(){
TEST_REPORTS_DIR=$(pwd)/test/test-reports
mkdir -p "$TEST_REPORTS_DIR"
#set jemalloc
JEMALLOC_LIB="/usr/lib/x86_64-linux-gnu/libjemalloc.so.2"
IOMP_LIB="$(dirname "$(which python)")/../lib/libiomp5.so"
export LD_PRELOAD="$JEMALLOC_LIB":"$IOMP_LIB":"$LD_PRELOAD"
export MALLOC_CONF="oversize_threshold:1,background_thread:true,metadata_thp:auto,dirty_decay_ms:-1,muzzy_decay_ms:-1"
export KMP_AFFINITY=granularity=fine,compact,1,0
export KMP_BLOCKTIME=1
CORES=$(lscpu | grep Core | awk '{print $4}')
export OMP_NUM_THREADS=$CORES
end_core=$(( CORES-1 ))
MODELS_SPEEDUP_TARGET=benchmarks/dynamo/expected_ci_speedup_inductor_torchbench_cpu.csv
grep -v '^ *#' < "$MODELS_SPEEDUP_TARGET" | while IFS=',' read -r -a model_cfg
do
local model_name=${model_cfg[0]}
local data_type=${model_cfg[1]}
local speedup_target=${model_cfg[4]}
if [[ ${model_cfg[3]} == "cpp" ]]; then
export TORCHINDUCTOR_CPP_WRAPPER=1
else
unset TORCHINDUCTOR_CPP_WRAPPER
fi
local output_name="$TEST_REPORTS_DIR/inductor_inference_${model_cfg[0]}_${model_cfg[1]}_${model_cfg[2]}_${model_cfg[3]}_cpu_smoketest.csv"
if [[ ${model_cfg[2]} == "dynamic" ]]; then
taskset -c 0-"$end_core" python benchmarks/dynamo/torchbench.py \
--inference --performance --"$data_type" -dcpu -n50 --only "$model_name" --dynamic-shapes \
--dynamic-batch-only --freezing --timeout 9000 --backend=inductor --output "$output_name"
else
taskset -c 0-"$end_core" python benchmarks/dynamo/torchbench.py \
--inference --performance --"$data_type" -dcpu -n50 --only "$model_name" \
--freezing --timeout 9000 --backend=inductor --output "$output_name"
fi
cat "$output_name"
# The threshold value needs to be actively maintained to make this check useful.
python benchmarks/dynamo/check_perf_csv.py -f "$output_name" -t "$speedup_target"
done
}
test_python_gloo_with_tls() {
source "$(dirname "${BASH_SOURCE[0]}")/run_glootls_test.sh"
assert_git_not_dirty
@ -920,7 +982,8 @@ test_bazel() {
tools/bazel test --config=cpu-only --test_timeout=480 --test_output=all --test_tag_filters=-gpu-required --test_filter=-*CUDA :all_tests
else
tools/bazel test --test_output=errors \
# Increase the test timeout to 480 like CPU tests because modules_test frequently timeout
tools/bazel test --test_timeout=480 --test_output=errors \
//:any_test \
//:autograd_test \
//:dataloader_test \
@ -1094,6 +1157,11 @@ elif [[ "${TEST_CONFIG}" == *torchbench* ]]; then
if [[ "${TEST_CONFIG}" == *inductor_torchbench_smoketest_perf* ]]; then
checkout_install_torchbench hf_Bert hf_Albert nanogpt timm_vision_transformer
PYTHONPATH=$(pwd)/torchbench test_inductor_torchbench_smoketest_perf
elif [[ "${TEST_CONFIG}" == *inductor_torchbench_cpu_smoketest_perf* ]]; then
checkout_install_torchbench timm_vision_transformer phlippe_densenet basic_gnn_gcn \
llama_v2_7b_16h resnet50 timm_efficientnet mobilenet_v3_large timm_resnest \
shufflenet_v2_x1_0 hf_GPT2
PYTHONPATH=$(pwd)/torchbench test_inductor_torchbench_cpu_smoketest_perf
else
checkout_install_torchbench
# Do this after checkout_install_torchbench to ensure we clobber any

View File

@ -16,11 +16,6 @@ set PATH=C:\Program Files\CMake\bin;C:\Program Files\7-Zip;C:\ProgramData\chocol
set INSTALLER_DIR=%SCRIPT_HELPERS_DIR%\installation-helpers
call %INSTALLER_DIR%\install_mkl.bat
if errorlevel 1 exit /b
if not errorlevel 0 exit /b
call %INSTALLER_DIR%\install_magma.bat
if errorlevel 1 exit /b
if not errorlevel 0 exit /b
@ -35,6 +30,10 @@ call %INSTALLER_DIR%\activate_miniconda3.bat
if errorlevel 1 exit /b
if not errorlevel 0 exit /b
call pip install mkl-include==2021.4.0 mkl-devel==2021.4.0
if errorlevel 1 exit /b
if not errorlevel 0 exit /b
:: Override VS env here
pushd .
if "%VC_VERSION%" == "" (
@ -89,8 +88,8 @@ set SCCACHE_IGNORE_SERVER_IO_ERROR=1
sccache --stop-server
sccache --start-server
sccache --zero-stats
set CC=sccache-cl
set CXX=sccache-cl
set CMAKE_C_COMPILER_LAUNCHER=sccache
set CMAKE_CXX_COMPILER_LAUNCHER=sccache
set CMAKE_GENERATOR=Ninja

View File

@ -1,14 +0,0 @@
if "%REBUILD%"=="" (
if "%BUILD_ENVIRONMENT%"=="" (
curl --retry 3 --retry-all-errors -k https://s3.amazonaws.com/ossci-windows/mkl_2020.2.254.7z --output %TMP_DIR_WIN%\mkl.7z
) else (
aws s3 cp s3://ossci-windows/mkl_2020.2.254.7z %TMP_DIR_WIN%\mkl.7z --quiet
)
if errorlevel 1 exit /b
if not errorlevel 0 exit /b
7z x -aoa %TMP_DIR_WIN%\mkl.7z -o%TMP_DIR_WIN%\mkl
if errorlevel 1 exit /b
if not errorlevel 0 exit /b
)
set CMAKE_INCLUDE_PATH=%TMP_DIR_WIN%\mkl\include
set LIB=%TMP_DIR_WIN%\mkl\lib;%LIB%

View File

@ -1,18 +1,13 @@
mkdir %TMP_DIR_WIN%\bin
if "%REBUILD%"=="" (
:check_sccache
%TMP_DIR_WIN%\bin\sccache.exe --show-stats || (
IF EXIST %TMP_DIR_WIN%\bin\sccache.exe (
taskkill /im sccache.exe /f /t || ver > nul
del %TMP_DIR_WIN%\bin\sccache.exe || ver > nul
del %TMP_DIR_WIN%\bin\sccache-cl.exe || ver > nul
if "%BUILD_ENVIRONMENT%"=="" (
curl --retry 3 --retry-all-errors -k https://s3.amazonaws.com/ossci-windows/sccache.exe --output %TMP_DIR_WIN%\bin\sccache.exe
curl --retry 3 --retry-all-errors -k https://s3.amazonaws.com/ossci-windows/sccache-cl.exe --output %TMP_DIR_WIN%\bin\sccache-cl.exe
) else (
aws s3 cp s3://ossci-windows/sccache.exe %TMP_DIR_WIN%\bin\sccache.exe
aws s3 cp s3://ossci-windows/sccache-cl.exe %TMP_DIR_WIN%\bin\sccache-cl.exe
)
goto :check_sccache
)
)
if "%BUILD_ENVIRONMENT%"=="" (
curl --retry 3 --retry-all-errors -k https://s3.amazonaws.com/ossci-windows/sccache-v0.7.4.exe --output %TMP_DIR_WIN%\bin\sccache.exe
) else (
aws s3 cp s3://ossci-windows/sccache-v0.7.4.exe %TMP_DIR_WIN%\bin\sccache.exe
)
)

View File

@ -1,468 +1,4 @@
Warning
=======
Contents may be out of date. Our CircleCI workflows are gradually being migrated to Github actions.
Structure of CI
===============
setup job:
1. Does a git checkout
2. Persists CircleCI scripts (everything in `.circleci`) into a workspace. Why?
We don't always do a Git checkout on all subjobs, but we usually
still want to be able to call scripts one way or another in a subjob.
Persisting files this way lets us have access to them without doing a
checkout. This workspace is conventionally mounted on `~/workspace`
(this is distinguished from `~/project`, which is the conventional
working directory that CircleCI will default to starting your jobs
in.)
3. Write out the commit message to `.circleci/COMMIT_MSG`. This is so
we can determine in subjobs if we should actually run the jobs or
not, even if there isn't a Git checkout.
CircleCI configuration generator
================================
One may no longer make changes to the `.circleci/config.yml` file directly.
Instead, one must edit these Python scripts or files in the `verbatim-sources/` directory.
Usage
----------
1. Make changes to these scripts.
2. Run the `regenerate.sh` script in this directory and commit the script changes and the resulting change to `config.yml`.
You'll see a build failure on GitHub if the scripts don't agree with the checked-in version.
Motivation
----------
These scripts establish a single, authoritative source of documentation for the CircleCI configuration matrix.
The documentation, in the form of diagrams, is automatically generated and cannot drift out of sync with the YAML content.
Furthermore, consistency is enforced within the YAML config itself, by using a single source of data to generate
multiple parts of the file.
* Facilitates one-off culling/enabling of CI configs for testing PRs on special targets
Also see https://github.com/pytorch/pytorch/issues/17038
Future direction
----------------
### Declaring sparse config subsets
See comment [here](https://github.com/pytorch/pytorch/pull/17323#pullrequestreview-206945747):
In contrast with a full recursive tree traversal of configuration dimensions,
> in the future I think we actually want to decrease our matrix somewhat and have only a few mostly-orthogonal builds that taste as many different features as possible on PRs, plus a more complete suite on every PR and maybe an almost full suite nightly/weekly (we don't have this yet). Specifying PR jobs in the future might be easier to read with an explicit list when we come to this.
----------------
----------------
# How do the binaries / nightlies / releases work?
### What is a binary?
A binary or package (used interchangeably) is a pre-built collection of c++ libraries, header files, python bits, and other files. We build these and distribute them so that users do not need to install from source.
A **binary configuration** is a collection of
* release or nightly
* releases are stable, nightlies are beta and built every night
* python version
* linux: 3.7m (mu is wide unicode or something like that. It usually doesn't matter but you should know that it exists)
* macos: 3.7, 3.8
* windows: 3.7, 3.8
* cpu version
* cpu, cuda 9.0, cuda 10.0
* The supported cuda versions occasionally change
* operating system
* Linux - these are all built on CentOS. There haven't been any problems in the past building on CentOS and using on Ubuntu
* MacOS
* Windows - these are built on Azure pipelines
* devtoolset version (gcc compiler version)
* This only matters on Linux cause only Linux uses gcc. tldr is gcc made a backwards incompatible change from gcc 4.8 to gcc 5, because it had to change how it implemented std::vector and std::string
### Where are the binaries?
The binaries are built in CircleCI. There are nightly binaries built every night at 9pm PST (midnight EST) and release binaries corresponding to Pytorch releases, usually every few months.
We have 3 types of binary packages
* pip packages - nightlies are stored on s3 (pip install -f \<a s3 url\>). releases are stored in a pip repo (pip install torch) (ask Soumith about this)
* conda packages - nightlies and releases are both stored in a conda repo. Nighty packages have a '_nightly' suffix
* libtorch packages - these are zips of all the c++ libraries, header files, and sometimes dependencies. These are c++ only
* shared with dependencies (the only supported option for Windows)
* static with dependencies
* shared without dependencies
* static without dependencies
All binaries are built in CircleCI workflows except Windows. There are checked-in workflows (committed into the .circleci/config.yml) to build the nightlies every night. Releases are built by manually pushing a PR that builds the suite of release binaries (overwrite the config.yml to build the release)
# CircleCI structure of the binaries
Some quick vocab:
* A \**workflow** is a CircleCI concept; it is a DAG of '**jobs**'. ctrl-f 'workflows' on https://github.com/pytorch/pytorch/blob/main/.circleci/config.yml to see the workflows.
* **jobs** are a sequence of '**steps**'
* **steps** are usually just a bash script or a builtin CircleCI command. *All steps run in new environments, environment variables declared in one script DO NOT persist to following steps*
* CircleCI has a **workspace**, which is essentially a cache between steps of the *same job* in which you can store artifacts between steps.
## How are the workflows structured?
The nightly binaries have 3 workflows. We have one job (actually 3 jobs: build, test, and upload) per binary configuration
1. binary_builds
1. every day midnight EST
2. linux: https://github.com/pytorch/pytorch/blob/main/.circleci/verbatim-sources/linux-binary-build-defaults.yml
3. macos: https://github.com/pytorch/pytorch/blob/main/.circleci/verbatim-sources/macos-binary-build-defaults.yml
4. For each binary configuration, e.g. linux_conda_3.7_cpu there is a
1. binary_linux_conda_3.7_cpu_build
1. Builds the build. On linux jobs this uses the 'docker executor'.
2. Persists the package to the workspace
2. binary_linux_conda_3.7_cpu_test
1. Loads the package to the workspace
2. Spins up a docker image (on Linux), mapping the package and code repos into the docker
3. Runs some smoke tests in the docker
4. (Actually, for macos this is a step rather than a separate job)
3. binary_linux_conda_3.7_cpu_upload
1. Logs in to aws/conda
2. Uploads the package
2. update_s3_htmls
1. every day 5am EST
2. https://github.com/pytorch/pytorch/blob/main/.circleci/verbatim-sources/binary_update_htmls.yml
3. See below for what these are for and why they're needed
4. Three jobs that each examine the current contents of aws and the conda repo and update some html files in s3
3. binarysmoketests
1. every day
2. https://github.com/pytorch/pytorch/blob/main/.circleci/verbatim-sources/nightly-build-smoke-tests-defaults.yml
3. For each binary configuration, e.g. linux_conda_3.7_cpu there is a
1. smoke_linux_conda_3.7_cpu
1. Downloads the package from the cloud, e.g. using the official pip or conda instructions
2. Runs the smoke tests
## How are the jobs structured?
The jobs are in https://github.com/pytorch/pytorch/tree/main/.circleci/verbatim-sources. Jobs are made of multiple steps. There are some shared steps used by all the binaries/smokes. Steps of these jobs are all delegated to scripts in https://github.com/pytorch/pytorch/tree/main/.circleci/scripts .
* Linux jobs: https://github.com/pytorch/pytorch/blob/main/.circleci/verbatim-sources/linux-binary-build-defaults.yml
* binary_linux_build.sh
* binary_linux_test.sh
* binary_linux_upload.sh
* MacOS jobs: https://github.com/pytorch/pytorch/blob/main/.circleci/verbatim-sources/macos-binary-build-defaults.yml
* binary_macos_build.sh
* binary_macos_test.sh
* binary_macos_upload.sh
* Update html jobs: https://github.com/pytorch/pytorch/blob/main/.circleci/verbatim-sources/binary_update_htmls.yml
* These delegate from the pytorch/builder repo
* https://github.com/pytorch/builder/blob/main/cron/update_s3_htmls.sh
* https://github.com/pytorch/builder/blob/main/cron/upload_binary_sizes.sh
* Smoke jobs (both linux and macos): https://github.com/pytorch/pytorch/blob/main/.circleci/verbatim-sources/nightly-build-smoke-tests-defaults.yml
* These delegate from the pytorch/builder repo
* https://github.com/pytorch/builder/blob/main/run_tests.sh
* https://github.com/pytorch/builder/blob/main/smoke_test.sh
* https://github.com/pytorch/builder/blob/main/check_binary.sh
* Common shared code (shared across linux and macos): https://github.com/pytorch/pytorch/blob/main/.circleci/verbatim-sources/nightly-binary-build-defaults.yml
* binary_checkout.sh - checks out pytorch/builder repo. Right now this also checks out pytorch/pytorch, but it shouldn't. pytorch/pytorch should just be shared through the workspace. This can handle being run before binary_populate_env.sh
* binary_populate_env.sh - parses BUILD_ENVIRONMENT into the separate env variables that make up a binary configuration. Also sets lots of default values, the date, the version strings, the location of folders in s3, all sorts of things. This generally has to be run before other steps.
* binary_install_miniconda.sh - Installs miniconda, cross platform. Also hacks this for the update_binary_sizes job that doesn't have the right env variables
* binary_run_in_docker.sh - Takes a bash script file (the actual test code) from a hardcoded location, spins up a docker image, and runs the script inside the docker image
### **Why do the steps all refer to scripts?**
CircleCI creates a final yaml file by inlining every <<* segment, so if we were to keep all the code in the config.yml itself then the config size would go over 4 MB and cause infra problems.
### **What is binary_run_in_docker for?**
So, CircleCI has several executor types: macos, machine, and docker are the ones we use. The 'machine' executor gives you two cores on some linux vm. The 'docker' executor gives you considerably more cores (nproc was 32 instead of 2 back when I tried in February). Since the dockers are faster, we try to run everything that we can in dockers. Thus
* linux build jobs use the docker executor. Running them on the docker executor was at least 2x faster than running them on the machine executor
* linux test jobs use the machine executor in order for them to properly interface with GPUs since docker executors cannot execute with attached GPUs
* linux upload jobs use the machine executor. The upload jobs are so short that it doesn't really matter what they use
* linux smoke test jobs use the machine executor for the same reason as the linux test jobs
binary_run_in_docker.sh is a way to share the docker start-up code between the binary test jobs and the binary smoke test jobs
### **Why does binary_checkout also checkout pytorch? Why shouldn't it?**
We want all the nightly binary jobs to run on the exact same git commit, so we wrote our own checkout logic to ensure that the same commit was always picked. Later circleci changed that to use a single pytorch checkout and persist it through the workspace (they did this because our config file was too big, so they wanted to take a lot of the setup code into scripts, but the scripts needed the code repo to exist to be called, so they added a prereq step called 'setup' to checkout the code and persist the needed scripts to the workspace). The changes to the binary jobs were not properly tested, so they all broke from missing pytorch code no longer existing. We hotfixed the problem by adding the pytorch checkout back to binary_checkout, so now there's two checkouts of pytorch on the binary jobs. This problem still needs to be fixed, but it takes careful tracing of which code is being called where.
# Code structure of the binaries (circleci agnostic)
## Overview
The code that runs the binaries lives in two places, in the normal [github.com/pytorch/pytorch](http://github.com/pytorch/pytorch), but also in [github.com/pytorch/builder](http://github.com/pytorch/builder), which is a repo that defines how all the binaries are built. The relevant code is
```
# All code needed to set-up environments for build code to run in,
# but only code that is specific to the current CI system
pytorch/pytorch
- .circleci/ # Folder that holds all circleci related stuff
- config.yml # GENERATED file that actually controls all circleci behavior
- verbatim-sources # Used to generate job/workflow sections in ^
- scripts/ # Code needed to prepare circleci environments for binary build scripts
- setup.py # Builds pytorch. This is wrapped in pytorch/builder
- cmake files # used in normal building of pytorch
# All code needed to prepare a binary build, given an environment
# with all the right variables/packages/paths.
pytorch/builder
# Given an installed binary and a proper python env, runs some checks
# to make sure the binary was built the proper way. Checks things like
# the library dependencies, symbols present, etc.
- check_binary.sh
# Given an installed binary, runs python tests to make sure everything
# is in order. These should be de-duped. Right now they both run smoke
# tests, but are called from different places. Usually just call some
# import statements, but also has overlap with check_binary.sh above
- run_tests.sh
- smoke_test.sh
# Folders that govern how packages are built. See paragraphs below
- conda/
- build_pytorch.sh # Entrypoint. Delegates to proper conda build folder
- switch_cuda_version.sh # Switches activate CUDA installation in Docker
- pytorch-nightly/ # Build-folder
- manywheel/
- build_cpu.sh # Entrypoint for cpu builds
- build.sh # Entrypoint for CUDA builds
- build_common.sh # Actual build script that ^^ call into
- wheel/
- build_wheel.sh # Entrypoint for wheel builds
- windows/
- build_pytorch.bat # Entrypoint for wheel builds on Windows
```
Every type of package has an entrypoint build script that handles the all the important logic.
## Conda
Linux, MacOS and Windows use the same code flow for the conda builds.
Conda packages are built with conda-build, see https://conda.io/projects/conda-build/en/latest/resources/commands/conda-build.html
Basically, you pass `conda build` a build folder (pytorch-nightly/ above) that contains a build script and a meta.yaml. The meta.yaml specifies in what python environment to build the package in, and what dependencies the resulting package should have, and the build script gets called in the env to build the thing.
tl;dr on conda-build is
1. Creates a brand new conda environment, based off of deps in the meta.yaml
1. Note that environment variables do not get passed into this build env unless they are specified in the meta.yaml
2. If the build fails this environment will stick around. You can activate it for much easier debugging. The “General Python” section below explains what exactly a python “environment” is.
2. Calls build.sh in the environment
3. Copies the finished package to a new conda env, also specified by the meta.yaml
4. Runs some simple import tests (if specified in the meta.yaml)
5. Saves the finished package as a tarball
The build.sh we use is essentially a wrapper around `python setup.py build`, but it also manually copies in some of our dependent libraries into the resulting tarball and messes with some rpaths.
The entrypoint file `builder/conda/build_conda.sh` is complicated because
* It works for Linux, MacOS and Windows
* The mac builds used to create their own environments, since they all used to be on the same machine. Theres now a lot of extra logic to handle conda envs. This extra machinery could be removed
* It used to handle testing too, which adds more logic messing with python environments too. This extra machinery could be removed.
## Manywheels (linux pip and libtorch packages)
Manywheels are pip packages for linux distros. Note that these manywheels are not actually manylinux compliant.
`builder/manywheel/build_cpu.sh` and `builder/manywheel/build.sh` (for CUDA builds) just set different env vars and then call into `builder/manywheel/build_common.sh`
The entrypoint file `builder/manywheel/build_common.sh` is really really complicated because
* This used to handle building for several different python versions at the same time. The loops have been removed, but there's still unnecessary folders and movements here and there.
* The script is never used this way anymore. This extra machinery could be removed.
* This used to handle testing the pip packages too. This is why theres testing code at the end that messes with python installations and stuff
* The script is never used this way anymore. This extra machinery could be removed.
* This also builds libtorch packages
* This should really be separate. libtorch packages are c++ only and have no python. They should not share infra with all the python specific stuff in this file.
* There is a lot of messing with rpaths. This is necessary, but could be made much much simpler if the above issues were fixed.
## Wheels (MacOS pip and libtorch packages)
The entrypoint file `builder/wheel/build_wheel.sh` is complicated because
* The mac builds used to all run on one machine (we didnt have autoscaling mac machines till circleci). So this script handled siloing itself by setting-up and tearing-down its build env and siloing itself into its own build directory.
* The script is never used this way anymore. This extra machinery could be removed.
* This also builds libtorch packages
* Ditto the comment above. This should definitely be separated out.
Note that the MacOS Python wheels are still built in conda environments. Some of the dependencies present during build also come from conda.
## Windows Wheels (Windows pip and libtorch packages)
The entrypoint file `builder/windows/build_pytorch.bat` is complicated because
* This used to handle building for several different python versions at the same time. This is why there are loops everywhere
* The script is never used this way anymore. This extra machinery could be removed.
* This used to handle testing the pip packages too. This is why theres testing code at the end that messes with python installations and stuff
* The script is never used this way anymore. This extra machinery could be removed.
* This also builds libtorch packages
* This should really be separate. libtorch packages are c++ only and have no python. They should not share infra with all the python specific stuff in this file.
Note that the Windows Python wheels are still built in conda environments. Some of the dependencies present during build also come from conda.
## General notes
### Note on run_tests.sh, smoke_test.sh, and check_binary.sh
* These should all be consolidated
* These must run on all OS types: MacOS, Linux, and Windows
* These all run smoke tests at the moment. They inspect the packages some, maybe run a few import statements. They DO NOT run the python tests nor the cpp tests. The idea is that python tests on main and PR merges will catch all breakages. All these tests have to do is make sure the special binary machinery didnt mess anything up.
* There are separate run_tests.sh and smoke_test.sh because one used to be called by the smoke jobs and one used to be called by the binary test jobs (see circleci structure section above). This is still true actually, but these could be united into a single script that runs these checks, given an installed pytorch package.
### Note on libtorch
Libtorch packages are built in the wheel build scripts: manywheel/build_*.sh for linux and build_wheel.sh for mac. There are several things wrong with this
* Its confusing. Most of those scripts deal with python specifics.
* The extra conditionals everywhere severely complicate the wheel build scripts
* The process for building libtorch is different from the official instructions (a plain call to cmake, or a call to a script)
### Note on docker images / Dockerfiles
All linux builds occur in docker images. The docker images are
* pytorch/conda-cuda
* Has ALL CUDA versions installed. The script pytorch/builder/conda/switch_cuda_version.sh sets /usr/local/cuda to a symlink to e.g. /usr/local/cuda-10.0 to enable different CUDA builds
* Also used for cpu builds
* pytorch/manylinux-cuda90
* pytorch/manylinux-cuda100
* Also used for cpu builds
The Dockerfiles are available in pytorch/builder, but there is no circleci job or script to build these docker images, and they cannot be run locally (unless you have the correct local packages/paths). Only Soumith can build them right now.
### General Python
* This is still a good explanation of python installations https://caffe2.ai/docs/faq.html#why-do-i-get-import-errors-in-python-when-i-try-to-use-caffe2
# How to manually rebuild the binaries
tl;dr make a PR that looks like https://github.com/pytorch/pytorch/pull/21159
Sometimes we want to push a change to mainand then rebuild all of today's binaries after that change. As of May 30, 2019 there isn't a way to manually run a workflow in the UI. You can manually re-run a workflow, but it will use the exact same git commits as the first run and will not include any changes. So we have to make a PR and then force circleci to run the binary workflow instead of the normal tests. The above PR is an example of how to do this; essentially you copy-paste the binarybuilds workflow steps into the default workflow steps. If you need to point the builder repo to a different commit then you'd need to change https://github.com/pytorch/pytorch/blob/main/.circleci/scripts/binary_checkout.sh#L42-L45 to checkout what you want.
## How to test changes to the binaries via .circleci
Writing PRs that test the binaries is annoying, since the default circleci jobs that run on PRs are not the jobs that you want to run. Likely, changes to the binaries will touch something under .circleci/ and require that .circleci/config.yml be regenerated (.circleci/config.yml controls all .circleci behavior, and is generated using `.circleci/regenerate.sh` in python 3.7). But you also need to manually hardcode the binary jobs that you want to test into the .circleci/config.yml workflow, so you should actually make at least two commits, one for your changes and one to temporarily hardcode jobs. See https://github.com/pytorch/pytorch/pull/22928 as an example of how to do this.
```sh
# Make your changes
touch .circleci/verbatim-sources/nightly-binary-build-defaults.yml
# Regenerate the yaml, has to be in python 3.7
.circleci/regenerate.sh
# Make a commit
git add .circleci *
git commit -m "My real changes"
git push origin my_branch
# Now hardcode the jobs that you want in the .circleci/config.yml workflows section
# Also eliminate ensure-consistency and should_run_job checks
# e.g. https://github.com/pytorch/pytorch/commit/2b3344bfed8772fe86e5210cc4ee915dee42b32d
# Make a commit you won't keep
git add .circleci
git commit -m "[DO NOT LAND] testing binaries for above changes"
git push origin my_branch
# Now you need to make some changes to the first commit.
git rebase -i HEAD~2 # mark the first commit as 'edit'
# Make the changes
touch .circleci/verbatim-sources/nightly-binary-build-defaults.yml
.circleci/regenerate.sh
# Ammend the commit and recontinue
git add .circleci
git commit --amend
git rebase --continue
# Update the PR, need to force since the commits are different now
git push origin my_branch --force
```
The advantage of this flow is that you can make new changes to the base commit and regenerate the .circleci without having to re-write which binary jobs you want to test on. The downside is that all updates will be force pushes.
## How to build a binary locally
### Linux
You can build Linux binaries locally easily using docker.
```sh
# Run the docker
# Use the correct docker image, pytorch/conda-cuda used here as an example
#
# -v path/to/foo:path/to/bar makes path/to/foo on your local machine (the
# machine that you're running the command on) accessible to the docker
# container at path/to/bar. So if you then run `touch path/to/bar/baz`
# in the docker container then you will see path/to/foo/baz on your local
# machine. You could also clone the pytorch and builder repos in the docker.
#
# If you know how, add ccache as a volume too and speed up everything
docker run \
-v your/pytorch/repo:/pytorch \
-v your/builder/repo:/builder \
-v where/you/want/packages/to/appear:/final_pkgs \
-it pytorch/conda-cuda /bin/bash
# Export whatever variables are important to you. All variables that you'd
# possibly need are in .circleci/scripts/binary_populate_env.sh
# You should probably always export at least these 3 variables
export PACKAGE_TYPE=conda
export DESIRED_PYTHON=3.7
export DESIRED_CUDA=cpu
# Call the entrypoint
# `|& tee foo.log` just copies all stdout and stderr output to foo.log
# The builds generate lots of output so you probably need this when
# building locally.
/builder/conda/build_pytorch.sh |& tee build_output.log
```
**Building CUDA binaries on docker**
You can build CUDA binaries on CPU only machines, but you can only run CUDA binaries on CUDA machines. This means that you can build a CUDA binary on a docker on your laptop if you so choose (though its gonna take a long time).
For Facebook employees, ask about beefy machines that have docker support and use those instead of your laptop; it will be 5x as fast.
### MacOS
Theres no easy way to generate reproducible hermetic MacOS environments. If you have a Mac laptop then you can try emulating the .circleci environments as much as possible, but you probably have packages in /usr/local/, possibly installed by brew, that will probably interfere with the build. If youre trying to repro an error on a Mac build in .circleci and you cant seem to repro locally, then my best advice is actually to iterate on .circleci :/
But if you want to try, then Id recommend
```sh
# Create a new terminal
# Clear your LD_LIBRARY_PATH and trim as much out of your PATH as you
# know how to do
# Install a new miniconda
# First remove any other python or conda installation from your PATH
# Always install miniconda 3, even if building for Python <3
new_conda="~/my_new_conda"
conda_sh="$new_conda/install_miniconda.sh"
curl -o "$conda_sh" https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-x86_64.sh
chmod +x "$conda_sh"
"$conda_sh" -b -p "$MINICONDA_ROOT"
rm -f "$conda_sh"
export PATH="~/my_new_conda/bin:$PATH"
# Create a clean python env
# All MacOS builds use conda to manage the python env and dependencies
# that are built with, even the pip packages
conda create -yn binary python=2.7
conda activate binary
# Export whatever variables are important to you. All variables that you'd
# possibly need are in .circleci/scripts/binary_populate_env.sh
# You should probably always export at least these 3 variables
export PACKAGE_TYPE=conda
export DESIRED_PYTHON=3.7
export DESIRED_CUDA=cpu
# Call the entrypoint you want
path/to/builder/wheel/build_wheel.sh
```
N.B. installing a brand new miniconda is important. This has to do with how conda installations work. See the “General Python” section above, but tldr; is that
1. You make the conda command accessible by prepending `path/to/conda_root/bin` to your PATH.
2. You make a new env and activate it, which then also gets prepended to your PATH. Now you have `path/to/conda_root/envs/new_env/bin:path/to/conda_root/bin:$PATH`
3. Now say you (or some code that you ran) call python executable `foo`
1. if you installed `foo` in `new_env`, then `path/to/conda_root/envs/new_env/bin/foo` will get called, as expected.
2. But if you forgot to installed `foo` in `new_env` but happened to previously install it in your root conda env (called base), then unix/linux will still find `path/to/conda_root/bin/foo` . This is dangerous, since `foo` can be a different version than you want; `foo` can even be for an incompatible python version!
Newer conda versions and proper python hygiene can prevent this, but just install a new miniconda to be safe.
### Windows
TODO: fill in
PyTorch migration from CircleCI to github actions has been completed. All continuous integration & deployment workflows are defined in `.github/workflows` folder

View File

@ -42,7 +42,6 @@ misc-*,
-misc-non-private-member-variables-in-classes,
-misc-confusable-identifiers,
modernize-*,
-modernize-concat-nested-namespaces,
-modernize-macro-to-enum,
-modernize-return-braced-init-list,
-modernize-use-auto,

22
.flake8
View File

@ -2,7 +2,7 @@
# NOTE: **Mirror any changes** to this file the [tool.ruff] config in pyproject.toml
# before we can fully move to use ruff
enable-extensions = G
select = B,C,E,F,G,P,SIM1,T4,W,B9,TOR0,TOR1,TOR2
select = B,C,E,F,G,P,SIM1,T4,W,B9,TOR0,TOR1,TOR2,TOR9
max-line-length = 120
# C408 ignored because we like the dict keyword argument syntax
# E501 is not flexible enough, we're using B950 instead
@ -27,6 +27,9 @@ ignore =
# TODO(kit1980): fix all TOR102 issues
# `torch.load` without `weights_only` parameter is unsafe
TOR102,
# TODO(kit1980): resolve all TOR003 issues
# pass `use_reentrant` explicitly to `checkpoint`.
TOR003
per-file-ignores =
__init__.py: F401
test/**: F821
@ -34,6 +37,23 @@ per-file-ignores =
torch/utils/cpp_extension.py: B950
torchgen/api/types/__init__.py: F401,F403
torchgen/executorch/api/types/__init__.py: F401,F403
test/dynamo/test_higher_order_ops.py: B950
torch/testing/_internal/dynamo_test_failures.py: B950
# TOR901 is only for test, we want to ignore it for everything else.
# It's not easy to configure this without affecting other per-file-ignores,
# so we explicitly list every file where it's violated outside of test.
torch/__init__.py: F401,TOR901
torch/_custom_op/impl.py: TOR901
torch/_export/serde/upgrade.py: TOR901
torch/_functorch/vmap.py: TOR901
torch/_inductor/test_operators.py: TOR901
torch/_library/abstract_impl.py: TOR901
torch/_meta_registrations.py: TOR901
torch/_prims/__init__.py: F401,TOR901
torch/_prims/rng_prims.py: TOR901
torch/ao/quantization/fx/_decomposed.py: TOR901
torch/distributed/_functional_collectives.py: TOR901
torch/distributed/_spmd/data_parallel.py: TOR901
optional-ascii-coding = True
exclude =
./.git,

View File

@ -20,6 +20,7 @@ self-hosted-runner:
- bm-runner
- linux.rocm.gpu
- macos-m1-12
- macos-m1-stable
- macos-m1-13
- macos-12-xl
- macos-12

View File

@ -26,11 +26,20 @@ outputs:
description: True if the filtered test configs matrix is empty. False otherwise.
value: ${{ steps.filter.outputs.is-test-matrix-empty }}
keep-going:
description: True if keep-going label was on PR.
description: True if keep-going label was on PR or [keep-going] in PR body.
value: ${{ steps.filter.outputs.keep-going }}
reenabled-issues:
description: Comma separated list of issue numbers that should correspond to disable test issues that the PR fixes
value: ${{ steps.filter.outputs.reenabled-issues }}
ci-verbose-test-logs:
description: True if ci-verbose-test-logs label was on PR or [ci-verbose-test-logs] in PR body.
value: ${{ steps.filter.outputs.ci-verbose-test-logs }}
ci-no-test-timeout:
description: True if ci-no-test-timeout label was on PR or [ci-no-test-timeout] in PR body.
value: ${{ steps.filter.outputs.ci-no-test-timeout }}
ci-no-td:
description: True if ci-no-td label was on PR or [ci-no-td] in PR body.
value: ${{ steps.filter.outputs.ci-no-td }}
runs:
using: composite

View File

@ -6,7 +6,6 @@ reviewers:
- albanD
- miladm
- bdhirsh
- voznesenskym
per_author:
symbolic-shapes:

View File

@ -1 +1 @@
b2d9c3e315405f2b5cfdfa5b93f849d5b27a4109
5286f9f60d8647fb4a490cdf22eac39a54e63a80

View File

@ -1 +1 @@
a00a72b1ee41483407717379fb5cafe992de2f82
a52607ece94aedbe41107617ace22a8da91efc25

View File

@ -1 +1 @@
2990cb38c17e06d0dbe25437674ca40130d76a8f
fba464b199559f61faa720de8bf64cf955cfdce7

View File

@ -4,6 +4,6 @@ mkl-include=2022.1.0
ninja=1.10.2
numpy=1.23.3
pyyaml=6.0
requests=2.28.1
setuptools=65.5.0
requests=2.31.0
setuptools=68.2.2
typing-extensions=4.3.0

View File

@ -3,6 +3,6 @@ cmake=3.22.1
ninja=1.10.2
numpy=1.23.3
pyyaml=6.0
requests=2.28.1
setuptools=63.4.1
requests=2.31.0
setuptools=68.2.2
typing-extensions=4.3.0

View File

@ -16,7 +16,6 @@ pytest==7.3.2
pytest-xdist==3.3.1
pytest-rerunfailures==10.3
pytest-flakefinder==1.1.0
pytest-shard==0.1.2
scipy==1.10.1
sympy==1.11.1
unittest-xml-reporting<=3.2.0,>=2.0.0

View File

@ -109,7 +109,7 @@ def build_triton(
print("source:\n path: .\n", file=meta)
print(
"build:\n string: py{{py}}\n number: 1\n script: cd python; "
"python setup.py install --single-version-externally-managed --record=record.txt\n",
"python setup.py install --record=record.txt\n",
" script_env:\n - MAX_JOBS\n",
file=meta,
)

223
.github/scripts/cherry_pick.py vendored Executable file
View File

@ -0,0 +1,223 @@
#!/usr/bin/env python3
import json
import os
import re
from typing import Any, Optional
from urllib.error import HTTPError
from github_utils import gh_fetch_url, gh_post_pr_comment
from gitutils import get_git_remote_name, get_git_repo_dir, GitRepo
from trymerge import get_pr_commit_sha, GitHubPR
# This is only a suggestion for now, not a strict requirement
REQUIRES_ISSUE = {
"regression",
"critical",
"fixnewfeature",
}
def parse_args() -> Any:
from argparse import ArgumentParser
parser = ArgumentParser("cherry pick a landed PR onto a release branch")
parser.add_argument(
"--onto-branch", type=str, required=True, help="the target release branch"
)
parser.add_argument(
"--github-actor", type=str, required=True, help="all the worlds a stage"
)
parser.add_argument(
"--classification",
choices=["regression", "critical", "fixnewfeature", "docs", "release"],
required=True,
help="the cherry pick category",
)
parser.add_argument("pr_num", type=int)
parser.add_argument(
"--fixes",
type=str,
default="",
help="the GitHub issue that the cherry pick fixes",
)
parser.add_argument("--dry-run", action="store_true")
return parser.parse_args()
def get_merge_commit_sha(repo: GitRepo, pr: GitHubPR) -> Optional[str]:
"""
Return the merge commit SHA iff the PR has been merged. For simplicity, we
will only cherry pick PRs that have been merged into main
"""
commit_sha = get_pr_commit_sha(repo, pr)
return commit_sha if pr.is_closed() else None
def cherry_pick(
github_actor: str,
repo: GitRepo,
pr: GitHubPR,
commit_sha: str,
onto_branch: str,
classification: str,
fixes: str,
dry_run: bool = False,
) -> None:
"""
Create a local branch to cherry pick the commit and submit it as a pull request
"""
current_branch = repo.current_branch()
cherry_pick_branch = create_cherry_pick_branch(
github_actor, repo, pr, commit_sha, onto_branch
)
try:
if not dry_run:
org, project = repo.gh_owner_and_name()
cherry_pick_pr = submit_pr(repo, pr, cherry_pick_branch, onto_branch)
msg = f"The cherry pick PR is at {cherry_pick_pr}"
if fixes:
msg += f" and it is linked with issue {fixes}"
elif classification in REQUIRES_ISSUE:
msg += f" and it is recommended to link a {classification} cherry pick PR with an issue"
post_comment(org, project, pr.pr_num, msg)
finally:
if current_branch:
repo.checkout(branch=current_branch)
def create_cherry_pick_branch(
github_actor: str, repo: GitRepo, pr: GitHubPR, commit_sha: str, onto_branch: str
) -> str:
"""
Create a local branch and cherry pick the commit. Return the name of the local
cherry picking branch.
"""
repo.checkout(branch=onto_branch)
repo._run_git("submodule", "update", "--init", "--recursive")
# Remove all special characters if we want to include the actor in the branch name
github_actor = re.sub("[^0-9a-zA-Z]+", "_", github_actor)
cherry_pick_branch = f"cherry-pick-{pr.pr_num}-by-{github_actor}"
repo.create_branch_and_checkout(branch=cherry_pick_branch)
# We might want to support ghstack later
repo._run_git("cherry-pick", "-x", "-X", "theirs", commit_sha)
repo.push(branch=cherry_pick_branch, dry_run=False)
return cherry_pick_branch
def submit_pr(
repo: GitRepo,
pr: GitHubPR,
cherry_pick_branch: str,
onto_branch: str,
) -> str:
"""
Submit the cherry pick PR and return the link to the PR
"""
org, project = repo.gh_owner_and_name()
default_msg = f"Cherry pick #{pr.pr_num} onto {onto_branch} branch"
title = pr.info.get("title", default_msg)
body = pr.info.get("body", default_msg)
try:
response = gh_fetch_url(
f"https://api.github.com/repos/{org}/{project}/pulls",
method="POST",
data={
"title": title,
"body": body,
"head": cherry_pick_branch,
"base": onto_branch,
},
headers={"Accept": "application/vnd.github.v3+json"},
reader=json.load,
)
cherry_pick_pr = response.get("html_url", "")
if not cherry_pick_pr:
raise RuntimeError(
f"Fail to find the cherry pick PR: {json.dumps(response)}"
)
return str(cherry_pick_pr)
except HTTPError as error:
msg = f"Fail to submit the cherry pick PR: {error}"
raise RuntimeError(msg) from error
def post_comment(org: str, project: str, pr_num: int, msg: str) -> None:
"""
Post a comment on the PR itself to point to the cherry picking PR when success
or print the error when failure
"""
internal_debugging = ""
run_url = os.getenv("GH_RUN_URL")
# Post a comment to tell folks that the PR is being cherry picked
if run_url is not None:
internal_debugging = "\n".join(
line
for line in (
"<details><summary>Details for Dev Infra team</summary>",
f'Raised by <a href="{run_url}">workflow job</a>\n',
"</details>",
)
if line
)
comment = "\n".join(
(f"### Cherry picking #{pr_num}", f"{msg}", "", f"{internal_debugging}")
)
gh_post_pr_comment(org, project, pr_num, comment)
def main() -> None:
args = parse_args()
pr_num = args.pr_num
repo = GitRepo(get_git_repo_dir(), get_git_remote_name())
org, project = repo.gh_owner_and_name()
pr = GitHubPR(org, project, pr_num)
try:
commit_sha = get_merge_commit_sha(repo, pr)
if not commit_sha:
raise RuntimeError(
f"Refuse to cherry pick #{pr_num} because it hasn't been merged yet"
)
cherry_pick(
args.github_actor,
repo,
pr,
commit_sha,
args.onto_branch,
args.classification,
args.fixes,
args.dry_run,
)
except RuntimeError as error:
if not args.dry_run:
post_comment(org, project, pr_num, str(error))
else:
raise error
if __name__ == "__main__":
main()

274
.github/scripts/delete_old_branches.py vendored Normal file
View File

@ -0,0 +1,274 @@
# Delete old branches
import os
import re
from datetime import datetime
from pathlib import Path
from typing import Any, Callable, Dict, List, Set
from github_utils import gh_fetch_json_dict, gh_graphql
from gitutils import GitRepo
SEC_IN_DAY = 24 * 60 * 60
CLOSED_PR_RETENTION = 30 * SEC_IN_DAY
NO_PR_RETENTION = 1.5 * 365 * SEC_IN_DAY
PR_WINDOW = 90 * SEC_IN_DAY # Set to None to look at all PRs (may take a lot of tokens)
REPO_OWNER = "pytorch"
REPO_NAME = "pytorch"
ESTIMATED_TOKENS = [0]
TOKEN = os.environ["GITHUB_TOKEN"]
if not TOKEN:
raise Exception("GITHUB_TOKEN is not set")
REPO_ROOT = Path(__file__).parent.parent.parent
# Query for all PRs instead of just closed/merged because it's faster
GRAPHQL_ALL_PRS_BY_UPDATED_AT = """
query ($owner: String!, $repo: String!, $cursor: String) {
repository(owner: $owner, name: $repo) {
pullRequests(
first: 100
after: $cursor
orderBy: {field: UPDATED_AT, direction: DESC}
) {
totalCount
pageInfo {
hasNextPage
endCursor
}
nodes {
headRefName
number
updatedAt
state
}
}
}
}
"""
GRAPHQL_OPEN_PRS = """
query ($owner: String!, $repo: String!, $cursor: String) {
repository(owner: $owner, name: $repo) {
pullRequests(
first: 100
after: $cursor
states: [OPEN]
) {
totalCount
pageInfo {
hasNextPage
endCursor
}
nodes {
headRefName
number
updatedAt
state
}
}
}
}
"""
GRAPHQL_NO_DELETE_BRANCH_LABEL = """
query ($owner: String!, $repo: String!, $cursor: String) {
repository(owner: $owner, name: $repo) {
label(name: "no-delete-branch") {
pullRequests(first: 100, after: $cursor) {
totalCount
pageInfo {
hasNextPage
endCursor
}
nodes {
headRefName
number
updatedAt
state
}
}
}
}
}
"""
def is_protected(branch: str) -> bool:
try:
ESTIMATED_TOKENS[0] += 1
res = gh_fetch_json_dict(
f"https://api.github.com/repos/{REPO_OWNER}/{REPO_NAME}/branches/{branch}"
)
return bool(res["protected"])
except Exception as e:
print(f"[{branch}] Failed to fetch branch protections: {e}")
return True
def convert_gh_timestamp(date: str) -> float:
return datetime.strptime(date, "%Y-%m-%dT%H:%M:%SZ").timestamp()
def get_branches(repo: GitRepo) -> Dict[str, Any]:
# Query locally for branches, group by branch base name (e.g. gh/blah/base -> gh/blah), and get the most recent branch
git_response = repo._run_git(
"for-each-ref",
"--sort=creatordate",
"--format=%(refname) %(committerdate:iso-strict)",
"refs/remotes/origin",
)
branches_by_base_name: Dict[str, Any] = {}
for line in git_response.splitlines():
branch, date = line.split(" ")
re_branch = re.match(r"refs/remotes/origin/(.*)", branch)
assert re_branch
branch = branch_base_name = re_branch.group(1)
if x := re.match(r"(gh\/.+)\/(head|base|orig)", branch):
branch_base_name = x.group(1)
date = datetime.fromisoformat(date).timestamp()
if branch_base_name not in branches_by_base_name:
branches_by_base_name[branch_base_name] = [date, [branch]]
else:
branches_by_base_name[branch_base_name][1].append(branch)
if date > branches_by_base_name[branch_base_name][0]:
branches_by_base_name[branch_base_name][0] = date
return branches_by_base_name
def paginate_graphql(
query: str,
kwargs: Dict[str, Any],
termination_func: Callable[[List[Dict[str, Any]]], bool],
get_data: Callable[[Dict[str, Any]], List[Dict[str, Any]]],
get_page_info: Callable[[Dict[str, Any]], Dict[str, Any]],
) -> List[Any]:
hasNextPage = True
endCursor = None
data: List[Dict[str, Any]] = []
while hasNextPage:
ESTIMATED_TOKENS[0] += 1
res = gh_graphql(query, cursor=endCursor, **kwargs)
data.extend(get_data(res))
hasNextPage = get_page_info(res)["hasNextPage"]
endCursor = get_page_info(res)["endCursor"]
if termination_func(data):
break
return data
def get_recent_prs() -> Dict[str, Any]:
now = datetime.now().timestamp()
# Grab all PRs updated in last CLOSED_PR_RETENTION days
pr_infos: List[Dict[str, Any]] = paginate_graphql(
GRAPHQL_ALL_PRS_BY_UPDATED_AT,
{"owner": "pytorch", "repo": "pytorch"},
lambda data: (
PR_WINDOW is not None
and (now - convert_gh_timestamp(data[-1]["updatedAt"]) > PR_WINDOW)
),
lambda res: res["data"]["repository"]["pullRequests"]["nodes"],
lambda res: res["data"]["repository"]["pullRequests"]["pageInfo"],
)
# Get the most recent PR for each branch base (group gh together)
prs_by_branch_base = {}
for pr in pr_infos:
pr["updatedAt"] = convert_gh_timestamp(pr["updatedAt"])
branch_base_name = pr["headRefName"]
if x := re.match(r"(gh\/.+)\/(head|base|orig)", branch_base_name):
branch_base_name = x.group(1)
if branch_base_name not in prs_by_branch_base:
prs_by_branch_base[branch_base_name] = pr
else:
if pr["updatedAt"] > prs_by_branch_base[branch_base_name]["updatedAt"]:
prs_by_branch_base[branch_base_name] = pr
return prs_by_branch_base
def get_branches_with_magic_label_or_open_pr() -> Set[str]:
pr_infos: List[Dict[str, Any]] = paginate_graphql(
GRAPHQL_NO_DELETE_BRANCH_LABEL,
{"owner": "pytorch", "repo": "pytorch"},
lambda data: False,
lambda res: res["data"]["repository"]["label"]["pullRequests"]["nodes"],
lambda res: res["data"]["repository"]["label"]["pullRequests"]["pageInfo"],
)
pr_infos.extend(
paginate_graphql(
GRAPHQL_OPEN_PRS,
{"owner": "pytorch", "repo": "pytorch"},
lambda data: False,
lambda res: res["data"]["repository"]["pullRequests"]["nodes"],
lambda res: res["data"]["repository"]["pullRequests"]["pageInfo"],
)
)
# Get the most recent PR for each branch base (group gh together)
branch_bases = set()
for pr in pr_infos:
branch_base_name = pr["headRefName"]
if x := re.match(r"(gh\/.+)\/(head|base|orig)", branch_base_name):
branch_base_name = x.group(1)
branch_bases.add(branch_base_name)
return branch_bases
def delete_branch(repo: GitRepo, branch: str) -> None:
repo._run_git("push", "origin", "-d", branch)
def delete_branches() -> None:
now = datetime.now().timestamp()
git_repo = GitRepo(str(REPO_ROOT), "origin", debug=True)
branches = get_branches(git_repo)
prs_by_branch = get_recent_prs()
keep_branches = get_branches_with_magic_label_or_open_pr()
delete = []
# Do not delete if:
# * associated PR is open, closed but updated recently, or contains the magic string
# * no associated PR and branch was updated in last 1.5 years
# * is protected
# Setting different values of PR_WINDOW will change how branches with closed
# PRs are treated depending on how old the branch is. The default value of
# 90 will allow branches with closed PRs to be deleted if the PR hasn't been
# updated in 90 days and the branch hasn't been updated in 1.5 years
for base_branch, (date, sub_branches) in branches.items():
print(f"[{base_branch}] Updated {(now - date) / SEC_IN_DAY} days ago")
if base_branch in keep_branches:
print(f"[{base_branch}] Has magic label or open PR, skipping")
continue
pr = prs_by_branch.get(base_branch)
if pr:
print(
f"[{base_branch}] Has PR {pr['number']}: {pr['state']}, updated {(now - pr['updatedAt']) / SEC_IN_DAY} days ago"
)
if (
now - pr["updatedAt"] < CLOSED_PR_RETENTION
or (now - date) < CLOSED_PR_RETENTION
):
continue
elif now - date < NO_PR_RETENTION:
continue
print(f"[{base_branch}] Checking for branch protections")
if any(is_protected(sub_branch) for sub_branch in sub_branches):
print(f"[{base_branch}] Is protected")
continue
for sub_branch in sub_branches:
print(f"[{base_branch}] Deleting {sub_branch}")
delete.append(sub_branch)
if ESTIMATED_TOKENS[0] > 400:
print("Estimated tokens exceeded, exiting")
break
print(f"To delete ({len(delete)}):")
for branch in delete:
print(f"About to delete branch {branch}")
delete_branch(git_repo, branch)
if __name__ == "__main__":
delete_branches()

View File

@ -1,139 +0,0 @@
import os
import re
import sys
from typing import Any, cast, Dict, List, NamedTuple, Tuple
import rockset # type: ignore[import]
from gitutils import _check_output
def eprint(msg: str) -> None:
print(msg, file=sys.stderr)
class WorkflowCheck(NamedTuple):
workflowName: str
name: str
jobName: str
conclusion: str
def get_latest_commits() -> List[str]:
latest_viable_commit = _check_output(
[
"git",
"log",
"-n",
"1",
"--pretty=format:%H",
"origin/viable/strict",
],
encoding="ascii",
)
commits = _check_output(
[
"git",
"rev-list",
f"{latest_viable_commit}^..HEAD",
"--remotes=*origin/main",
],
encoding="ascii",
).splitlines()
return commits
def query_commits(commits: List[str]) -> List[Dict[str, Any]]:
rs = rockset.RocksetClient(
host="api.usw2a1.rockset.com", api_key=os.environ["ROCKSET_API_KEY"]
)
params = [{"name": "shas", "type": "string", "value": ",".join(commits)}]
res = rs.QueryLambdas.execute_query_lambda(
# https://console.rockset.com/lambdas/details/commons.commit_jobs_batch_query
query_lambda="commit_jobs_batch_query",
version="19c74e10819104f9",
workspace="commons",
parameters=params,
)
return cast(List[Dict[str, Any]], res.results)
def print_commit_status(commit: str, results: Dict[str, Any]) -> None:
print(commit)
for check in results["results"]:
if check["sha"] == commit:
print(f"\t{check['conclusion']:>10}: {check['name']}")
def get_commit_results(
commit: str, results: List[Dict[str, Any]]
) -> List[Dict[str, Any]]:
workflow_checks = []
for check in results:
if check["sha"] == commit:
workflow_checks.append(
WorkflowCheck(
workflowName=check["workflowName"],
name=check["name"],
jobName=check["jobName"],
conclusion=check["conclusion"],
)._asdict()
)
return workflow_checks
def isGreen(commit: str, results: List[Dict[str, Any]]) -> Tuple[bool, str]:
workflow_checks = get_commit_results(commit, results)
regex = {
"pull": False,
"trunk": False,
"lint": False,
"linux-binary": False,
}
for check in workflow_checks:
jobName = check["jobName"]
# Ignore result from unstable job, be it success or failure
if "unstable" in jobName:
continue
workflowName = check["workflowName"]
conclusion = check["conclusion"]
for required_check in regex:
if re.match(required_check, workflowName, flags=re.IGNORECASE):
if conclusion not in ["success", "skipped"]:
return (False, workflowName + " checks were not successful")
else:
regex[required_check] = True
missing_workflows = [x for x in regex.keys() if not regex[x]]
if len(missing_workflows) > 0:
return (False, "missing required workflows: " + ", ".join(missing_workflows))
return (True, "")
def get_latest_green_commit(commits: List[str], results: List[Dict[str, Any]]) -> Any:
for commit in commits:
eprint(f"Checking {commit}")
is_green, msg = isGreen(commit, results)
if is_green:
eprint("GREEN")
return commit
else:
eprint("RED: " + msg)
return None
def main() -> None:
commits = get_latest_commits()
results = query_commits(commits)
latest_viable_commit = get_latest_green_commit(commits, results)
print(latest_viable_commit)
if __name__ == "__main__":
main()

View File

@ -474,6 +474,10 @@ def get_reenabled_issues(pr_body: str = "") -> List[str]:
return parse_reenabled_issues(pr_body) + parse_reenabled_issues(commit_messages)
def check_for_setting(labels: Set[str], body: str, setting: str) -> bool:
return setting in labels or f"[{setting}]" in body
def perform_misc_tasks(
labels: Set[str], test_matrix: Dict[str, List[Any]], job_name: str, pr_body: str
) -> None:
@ -481,7 +485,15 @@ def perform_misc_tasks(
In addition to apply the filter logic, the script also does the following
misc tasks to set keep-going and is-unstable variables
"""
set_output("keep-going", "keep-going" in labels)
set_output("keep-going", check_for_setting(labels, pr_body, "keep-going"))
set_output(
"ci-verbose-test-logs",
check_for_setting(labels, pr_body, "ci-verbose-test-logs"),
)
set_output(
"ci-no-test-timeout", check_for_setting(labels, pr_body, "ci-no-test-timeout")
)
set_output("ci-no-td", check_for_setting(labels, pr_body, "ci-no-td"))
# Obviously, if the job name includes unstable, then this is an unstable job
is_unstable = job_name and IssueType.UNSTABLE.value in job_name
@ -577,7 +589,7 @@ def main() -> None:
labels=labels,
test_matrix=filtered_test_matrix,
job_name=args.job_name,
pr_body=pr_body,
pr_body=pr_body if pr_body else "",
)
# Set the filtered test matrix as the output

View File

@ -4,7 +4,7 @@
Will output a condensed version of the matrix. Will include fllowing:
* CUDA version short
* CUDA full verison
* CUDA full version
* CUDNN version short
* Image type either runtime or devel
* Platform linux/arm64,linux/amd64

View File

@ -119,6 +119,19 @@ def gh_fetch_json_dict(
return cast(Dict[str, Any], _gh_fetch_json_any(url, params, data))
def gh_graphql(query: str, **kwargs: Any) -> Dict[str, Any]:
rc = gh_fetch_url(
"https://api.github.com/graphql",
data={"query": query, "variables": kwargs},
reader=json.load,
)
if "errors" in rc:
raise RuntimeError(
f"GraphQL query {query}, args {kwargs} failed: {rc['errors']}"
)
return cast(Dict[str, Any], rc)
def _gh_post_comment(
url: str, comment: str, dry_run: bool = False
) -> List[Dict[str, Any]]:

View File

@ -155,12 +155,19 @@ class GitRepo:
)
return [x.strip() for x in rc.split("\n") if x.strip()] if len(rc) > 0 else []
def current_branch(self) -> str:
return self._run_git("symbolic-ref", "--short", "HEAD").strip()
def current_branch(self) -> Optional[str]:
try:
return self._run_git("symbolic-ref", "--short", "HEAD").strip()
except RuntimeError:
# we are in detached HEAD state
return None
def checkout(self, branch: str) -> None:
self._run_git("checkout", branch)
def create_branch_and_checkout(self, branch: str) -> None:
self._run_git("checkout", "-b", branch)
def fetch(self, ref: Optional[str] = None, branch: Optional[str] = None) -> None:
if branch is None and ref is None:
self._run_git("fetch", self.remote)
@ -273,6 +280,7 @@ class GitRepo:
def cherry_pick_commits(self, from_branch: str, to_branch: str) -> None:
orig_branch = self.current_branch()
assert orig_branch is not None, "Must be on a branch"
self.checkout(to_branch)
from_commits, to_commits = self.compute_branch_diffs(from_branch, to_branch)
if len(from_commits) == 0:

Binary file not shown.

View File

@ -74,15 +74,23 @@ def gh_get_labels(org: str, repo: str) -> List[str]:
def gh_add_labels(
org: str, repo: str, pr_num: int, labels: Union[str, List[str]]
org: str, repo: str, pr_num: int, labels: Union[str, List[str]], dry_run: bool
) -> None:
if dry_run:
print(f"Dryrun: Adding labels {labels} to PR {pr_num}")
return
gh_fetch_url_and_headers(
url=f"https://api.github.com/repos/{org}/{repo}/issues/{pr_num}/labels",
data={"labels": labels},
)
def gh_remove_label(org: str, repo: str, pr_num: int, label: str) -> None:
def gh_remove_label(
org: str, repo: str, pr_num: int, label: str, dry_run: bool
) -> None:
if dry_run:
print(f"Dryrun: Removing {label} from PR {pr_num}")
return
gh_fetch_url_and_headers(
url=f"https://api.github.com/repos/{org}/{repo}/issues/{pr_num}/labels/{label}",
method="DELETE",

43
.github/scripts/lintrunner.sh vendored Executable file
View File

@ -0,0 +1,43 @@
#!/usr/bin/env bash
set -x
# The generic Linux job chooses to use base env, not the one setup by the image
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
conda activate "${CONDA_ENV}"
CACHE_DIRECTORY="/tmp/.lintbin"
# Try to recover the cached binaries
if [[ -d "${CACHE_DIRECTORY}" ]]; then
# It's ok to fail this as lintrunner init would download these binaries
# again if they do not exist
cp -r "${CACHE_DIRECTORY}" . || true
fi
# This has already been cached in the docker image
lintrunner init 2> /dev/null
# Do build steps necessary for linters
if [[ "${CLANG}" == "1" ]]; then
python3 -m tools.linter.clang_tidy.generate_build_files
fi
python3 -m tools.generate_torch_version --is_debug=false
python3 -m tools.pyi.gen_pyi \
--native-functions-path aten/src/ATen/native/native_functions.yaml \
--tags-path aten/src/ATen/native/tags.yaml \
--deprecated-functions-path "tools/autograd/deprecated.yaml"
RC=0
# Run lintrunner on all files
if ! lintrunner --force-color --all-files --tee-json=lint.json ${ADDITIONAL_LINTRUNNER_ARGS} 2> /dev/null; then
echo ""
echo -e "\e[1m\e[36mYou can reproduce these results locally by using \`lintrunner -m origin/main\`. (If you don't get the same results, run \'lintrunner init\' to update your local linter)\e[0m"
echo -e "\e[1m\e[36mSee https://github.com/pytorch/pytorch/wiki/lintrunner for setup instructions.\e[0m"
RC=1
fi
# Use jq to massage the JSON lint output into GitHub Actions workflow commands.
jq --raw-output \
'"::\(if .severity == "advice" or .severity == "disabled" then "warning" else .severity end) file=\(.path),line=\(.line),col=\(.char),title=\(.code) \(.name)::" + (.description | gsub("\\n"; "%0A"))' \
lint.json || true
exit $RC

51
.github/scripts/s390x-ci/README.md vendored Normal file
View File

@ -0,0 +1,51 @@
# Configuring the builder.
## Install prerequisites.
```
$ sudo dnf install docker
```
## Add services.
```
$ sudo cp self-hosted-builder/*.service /etc/systemd/system/
$ sudo systemctl daemon-reload
```
## Download qemu-user-static image
```
# sudo docker pull docker.io/iiilinuxibmcom/qemu-user-static:6.1.0-1
```
## Autostart the x86_64 emulation support.
```
$ sudo systemctl enable --now qemu-user-static
```
## Rebuild the image
In order to build or update the `iiilinuxibmcom/actions-runner` image, e.g. to get the
latest OS security fixes, use the following commands:
```
$ cd self-hosted-builder
$ sudo docker build \
--build-arg repo=<owner>/<name> \
--build-arg token=<***> \
--pull \
-f actions-runner.Dockerfile \
-t iiilinuxibmcom/actions-runner \
.
```
If it fails, ensure that selinux doesn't prevent it from working.
In worst case, selinux can be disabled with `setenforce 0`.
## Autostart the runner.
```
$ sudo systemctl enable --now actions-runner@$NAME
```

View File

@ -0,0 +1,66 @@
# Self-Hosted IBM Z Github Actions Runner.
# Temporary image: amd64 dependencies.
FROM docker.io/amd64/ubuntu:22.04 as ld-prefix
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update && apt-get -y install ca-certificates libicu70 libssl3
# Main image.
FROM docker.io/s390x/ubuntu:22.04
# Packages for pytorch building and testing.
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update && apt-get -y install \
cmake \
curl \
gcc \
git \
jq \
libxml2-dev \
libxslt-dev \
ninja-build \
python-is-python3 \
python3 \
python3-dev \
python3-pip \
pybind11-dev \
python3-numpy \
libopenblas-dev \
liblapack-dev \
libgloo-dev \
python3-yaml \
python3-scipy \
virtualenv
# amd64 dependencies.
COPY --from=ld-prefix / /usr/x86_64-linux-gnu/
RUN ln -fs ../lib/x86_64-linux-gnu/ld-linux-x86-64.so.2 /usr/x86_64-linux-gnu/lib64/
RUN ln -fs /etc/resolv.conf /usr/x86_64-linux-gnu/etc/
ENV QEMU_LD_PREFIX=/usr/x86_64-linux-gnu
# Scripts.
COPY fs/ /
RUN chmod +x /usr/bin/actions-runner /usr/bin/entrypoint
# amd64 Github Actions Runner.
RUN useradd -m actions-runner
USER actions-runner
WORKDIR /home/actions-runner
RUN curl -L https://github.com/actions/runner/releases/download/v2.309.0/actions-runner-linux-x64-2.309.0.tar.gz | tar -xz
# repository
ARG repo
# repository token
ARG token
RUN ./config.sh \
--unattended \
--url "https://github.com/${repo}" \
--token "${token}" \
--no-default-labels \
--labels self-hosted,linux.s390x
ENTRYPOINT ["/usr/bin/entrypoint"]
CMD ["/usr/bin/actions-runner"]

View File

@ -0,0 +1,22 @@
[Unit]
Description=Self-Hosted IBM Z Github Actions Runner
Wants=qemu-user-static
After=qemu-user-static
StartLimitIntervalSec=0
[Service]
Type=simple
Restart=always
ExecStartPre=-/usr/bin/docker rm --force actions-runner.%i
ExecStart=/usr/bin/docker run \
--init \
--interactive \
--name=actions-runner.%i \
--rm \
iiilinuxibmcom/actions-runner
ExecStop=/bin/sh -c "docker exec actions-runner.%i kill -INT -- -1"
ExecStop=/bin/sh -c "docker wait actions-runner.%i"
ExecStop=/bin/sh -c "docker rm actions-runner.%i"
[Install]
WantedBy=multi-user.target

View File

@ -0,0 +1,6 @@
#!/usr/bin/env bash
set -e -u
# Run one job.
./run.sh --once

View File

@ -0,0 +1,30 @@
#!/usr/bin/env bash
#
# Container entrypoint that waits for all spawned processes.
#
set -e -u
# Create a FIFO and start reading from its read end.
tempdir=$(mktemp -d "/tmp/done.XXXXXXXXXX")
trap 'rm -r "$tempdir"' EXIT
done="$tempdir/pipe"
mkfifo "$done"
cat "$done" & waiter=$!
# Start the workload. Its descendants will inherit the FIFO's write end.
status=0
if [ "$#" -eq 0 ]; then
bash 9>"$done" || status=$?
else
"$@" 9>"$done" || status=$?
fi
# When the workload and all of its descendants exit, the FIFO's write end will
# be closed and `cat "$done"` will exit. Wait until it happens. This is needed
# in order to handle SelfUpdater, which the workload may start in background
# before exiting.
wait "$waiter"
exit "$status"

View File

@ -0,0 +1,11 @@
[Unit]
Description=Support for transparent execution of non-native binaries with QEMU user emulation
[Service]
Type=oneshot
# The source code for iiilinuxibmcom/qemu-user-static is at https://github.com/iii-i/qemu-user-static/tree/v6.1.0-1
# TODO: replace it with multiarch/qemu-user-static once version >6.1 is available
ExecStart=/usr/bin/docker run --rm --interactive --privileged docker.io/iiilinuxibmcom/qemu-user-static:6.1.0-1 --reset -p yes
[Install]
WantedBy=multi-user.target

View File

@ -1,148 +0,0 @@
from typing import Any, Dict, List
from unittest import main, mock, TestCase
from fetch_latest_green_commit import isGreen, WorkflowCheck
workflowNames = [
"pull",
"trunk",
"Lint",
"linux-binary-libtorch-pre-cxx11",
"android-tests",
"windows-binary-wheel",
"periodic",
"docker-release-builds",
"nightly",
"pr-labels",
"Close stale pull requests",
"Update S3 HTML indices for download.pytorch.org",
"Create Release",
]
def set_workflow_job_status(
workflow: List[Dict[str, Any]], name: str, status: str
) -> List[Dict[str, Any]]:
for check in workflow:
if check["workflowName"] == name:
check["conclusion"] = status
return workflow
class TestChecks:
def make_test_checks(self) -> List[Dict[str, Any]]:
workflow_checks = []
for i in range(len(workflowNames)):
workflow_checks.append(
WorkflowCheck(
workflowName=workflowNames[i],
name="test/job",
jobName="job",
conclusion="success",
)._asdict()
)
return workflow_checks
class TestPrintCommits(TestCase):
@mock.patch(
"fetch_latest_green_commit.get_commit_results",
return_value=TestChecks().make_test_checks(),
)
def test_all_successful(self, mock_get_commit_results: Any) -> None:
"Test with workflows are successful"
workflow_checks = mock_get_commit_results()
self.assertTrue(isGreen("sha", workflow_checks)[0])
@mock.patch(
"fetch_latest_green_commit.get_commit_results",
return_value=TestChecks().make_test_checks(),
)
def test_necessary_successful(self, mock_get_commit_results: Any) -> None:
"Test with necessary workflows are successful"
workflow_checks = mock_get_commit_results()
workflow_checks = set_workflow_job_status(
workflow_checks, workflowNames[8], "failed"
)
workflow_checks = set_workflow_job_status(
workflow_checks, workflowNames[9], "failed"
)
workflow_checks = set_workflow_job_status(
workflow_checks, workflowNames[10], "failed"
)
workflow_checks = set_workflow_job_status(
workflow_checks, workflowNames[11], "failed"
)
workflow_checks = set_workflow_job_status(
workflow_checks, workflowNames[12], "failed"
)
self.assertTrue(isGreen("sha", workflow_checks)[0])
@mock.patch(
"fetch_latest_green_commit.get_commit_results",
return_value=TestChecks().make_test_checks(),
)
def test_necessary_skipped(self, mock_get_commit_results: Any) -> None:
"Test with necessary job (ex: pull) skipped"
workflow_checks = mock_get_commit_results()
workflow_checks = set_workflow_job_status(workflow_checks, "pull", "skipped")
result = isGreen("sha", workflow_checks)
self.assertTrue(result[0])
@mock.patch(
"fetch_latest_green_commit.get_commit_results",
return_value=TestChecks().make_test_checks(),
)
def test_skippable_skipped(self, mock_get_commit_results: Any) -> None:
"Test with skippable jobs (periodic and docker-release-builds skipped"
workflow_checks = mock_get_commit_results()
workflow_checks = set_workflow_job_status(
workflow_checks, "periodic", "skipped"
)
workflow_checks = set_workflow_job_status(
workflow_checks, "docker-release-builds", "skipped"
)
self.assertTrue(isGreen("sha", workflow_checks))
@mock.patch(
"fetch_latest_green_commit.get_commit_results",
return_value=TestChecks().make_test_checks(),
)
def test_necessary_failed(self, mock_get_commit_results: Any) -> None:
"Test with necessary job (ex: Lint) failed"
workflow_checks = mock_get_commit_results()
workflow_checks = set_workflow_job_status(workflow_checks, "Lint", "failed")
result = isGreen("sha", workflow_checks)
self.assertFalse(result[0])
self.assertEqual(result[1], "Lint checks were not successful")
@mock.patch(
"fetch_latest_green_commit.get_commit_results",
return_value=TestChecks().make_test_checks(),
)
def test_skippable_failed(self, mock_get_commit_results: Any) -> None:
"Test with failing skippable jobs (ex: docker-release-builds) should pass"
workflow_checks = mock_get_commit_results()
workflow_checks = set_workflow_job_status(
workflow_checks, "periodic", "skipped"
)
workflow_checks = set_workflow_job_status(
workflow_checks, "docker-release-builds", "failed"
)
result = isGreen("sha", workflow_checks)
self.assertTrue(result[0])
@mock.patch("fetch_latest_green_commit.get_commit_results", return_value={})
def test_no_workflows(self, mock_get_commit_results: Any) -> None:
"Test with missing workflows"
workflow_checks = mock_get_commit_results()
result = isGreen("sha", workflow_checks)
self.assertFalse(result[0])
self.assertEqual(
result[1],
"missing required workflows: pull, trunk, lint, linux-binary",
)
if __name__ == "__main__":
main()

View File

@ -636,55 +636,108 @@ class TestConfigFilter(TestCase):
@mock.patch("subprocess.check_output")
def test_perform_misc_tasks(self, mocked_subprocess: Any) -> None:
def _gen_expected_string(
keep_going: bool = False,
ci_verbose_test_logs: bool = False,
ci_no_test_timeout: bool = False,
ci_no_td: bool = False,
is_unstable: bool = False,
reenabled_issues: str = "",
) -> str:
return (
f"keep-going={keep_going}\n"
f"ci-verbose-test-logs={ci_verbose_test_logs}\n"
f"ci-no-test-timeout={ci_no_test_timeout}\n"
f"ci-no-td={ci_no_td}\n"
f"is-unstable={is_unstable}\n"
f"reenabled-issues={reenabled_issues}\n"
)
mocked_subprocess.return_value = b""
testcases: List[Dict[str, Any]] = [
{
"labels": {},
"test_matrix": '{include: [{config: "default"}]}',
"job_name": "A job name",
"expected": "keep-going=False\nis-unstable=False\nreenabled-issues=\n",
"expected": _gen_expected_string(),
"description": "No keep-going, no is-unstable",
},
{
"labels": {"keep-going"},
"test_matrix": '{include: [{config: "default"}]}',
"job_name": "A job name",
"expected": "keep-going=True\nis-unstable=False\nreenabled-issues=\n",
"expected": _gen_expected_string(keep_going=True),
"description": "Has keep-going, no is-unstable",
},
{
"labels": {},
"test_matrix": '{include: [{config: "default"}]}',
"job_name": "A job name",
"pr_body": "[keep-going]",
"expected": _gen_expected_string(keep_going=True),
"description": "Keep-going in PR body",
},
{
"labels": {"ci-verbose-test-logs"},
"test_matrix": '{include: [{config: "default"}]}',
"job_name": "A job name",
"pr_body": "[ci-no-test-timeout]",
"expected": _gen_expected_string(
ci_verbose_test_logs=True, ci_no_test_timeout=True
),
"description": "No pipe logs label and no test timeout in PR body",
},
{
"labels": {"ci-no-test-timeout"},
"test_matrix": '{include: [{config: "default"}]}',
"job_name": "A job name",
"pr_body": "[ci-verbose-test-logs]",
"expected": _gen_expected_string(
ci_verbose_test_logs=True, ci_no_test_timeout=True
),
"description": "No pipe logs in PR body and no test timeout in label (same as the above but swapped)",
},
{
"labels": {"ci-no-td"},
"test_matrix": '{include: [{config: "default"}]}',
"job_name": "A job name",
"pr_body": "",
"expected": _gen_expected_string(ci_no_td=True),
"description": "No pipe logs in PR body and no test timeout in label (same as the above but swapped)",
},
{
"labels": {},
"test_matrix": '{include: [{config: "default"}]}',
"job_name": None,
"expected": "keep-going=False\nis-unstable=False\nreenabled-issues=\n",
"expected": _gen_expected_string(),
"description": "No job name",
},
{
"labels": {},
"test_matrix": '{include: [{config: "default"}]}',
"job_name": "macos-12-py3-arm64 / test (default, 1, 3, macos-m1-12, unstable)",
"expected": "keep-going=False\nis-unstable=True\nreenabled-issues=\n",
"job_name": "macos-12-py3-arm64 / test (default, 1, 3, macos-m1-stable, unstable)",
"expected": _gen_expected_string(is_unstable=True),
"description": "Unstable job",
},
{
"labels": {},
"test_matrix": '{include: [{config: "default"}]}',
"job_name": "macos-12-py3-arm64 / test (default, 1, 3, macos-m1-12, unstable)",
"expected": "keep-going=False\nis-unstable=True\nreenabled-issues=\n",
"job_name": "macos-12-py3-arm64 / test (default, 1, 3, macos-m1-stable, unstable)",
"expected": _gen_expected_string(is_unstable=True),
"description": "Unstable job",
},
{
"labels": {},
"test_matrix": '{include: [{config: "1", unstable: "unstable"}, {config: "2", unstable: "unstable"}]}',
"job_name": "macos-12-py3-arm64 / build",
"expected": "keep-going=False\nis-unstable=True\nreenabled-issues=\n",
"expected": _gen_expected_string(is_unstable=True),
"description": "All configs are unstable",
},
{
"labels": {},
"test_matrix": '{include: [{config: "1", unstable: "unstable"}, {config: "2"}]}',
"job_name": "macos-12-py3-arm64 / build",
"expected": "keep-going=False\nis-unstable=False\nreenabled-issues=\n",
"expected": _gen_expected_string(is_unstable=False),
"description": "Only mark some configs as unstable",
},
{
@ -692,7 +745,7 @@ class TestConfigFilter(TestCase):
"test_matrix": '{include: [{config: "default"}]}',
"job_name": "A job name",
"pr_body": "resolves #123 fixes #234",
"expected": "keep-going=False\nis-unstable=False\nreenabled-issues=123,234\n",
"expected": _gen_expected_string(reenabled_issues="123,234"),
"description": "Reenable some issues",
},
]

View File

@ -16,6 +16,8 @@ from typing import Any, Dict, List, Optional
from unittest import main, mock, skip, TestCase
from urllib.error import HTTPError
from github_utils import gh_graphql
from gitutils import get_git_remote_name, get_git_repo_dir, GitRepo
from trymerge import (
@ -26,7 +28,6 @@ from trymerge import (
get_drci_classifications,
get_rockset_results,
gh_get_team_members,
gh_graphql,
GitHubPR,
JobCheckState,
main as trymerge_main,
@ -140,11 +141,14 @@ def mock_parse_args(revert: bool = False, force: bool = False) -> Any:
self.comment_id = 0
self.reason = "this is for testing"
self.ignore_current = False
self.check_mergeability = False
return Object()
def mock_remove_label(org: str, repo: str, pr_num: str, label: str) -> None:
def mock_remove_label(
org: str, repo: str, pr_num: str, label: str, dry_run: bool
) -> None:
pass
@ -431,6 +435,13 @@ class TestTryMerge(TestCase):
assert pr._reviews is not None # to pacify mypy
self.assertGreater(len(pr._reviews), 100)
def get_co_authors(self, *args: Any) -> None:
"""Tests that co-authors are recognized"""
pr = GitHubPR("pytorch", "pytorch", 118347)
authors = pr.get_authors()
self.assertIn("kit1980", authors)
self.assertIn("Co-authored-by:", pr.gen_commit_message())
def test_get_checkruns_many_runs(self, *args: Any) -> None:
"""Tests that all checkruns can be fetched"""
pr = GitHubPR("pytorch", "pytorch", 105260)

View File

@ -39,6 +39,7 @@ from github_utils import (
gh_fetch_json_list,
gh_fetch_merge_base,
gh_fetch_url,
gh_graphql,
gh_post_commit_comment,
gh_post_pr_comment,
gh_update_pr_state,
@ -152,12 +153,14 @@ GH_COMMIT_AUTHORS_FRAGMENT = """
fragment CommitAuthors on PullRequestCommitConnection {
nodes {
commit {
author {
user {
login
authors(first: 2) {
nodes {
user {
login
}
email
name
}
email
name
}
oid
}
@ -458,19 +461,6 @@ HAS_NO_CONNECTED_DIFF_TITLE = (
IGNORABLE_FAILED_CHECKS_THESHOLD = 10
def gh_graphql(query: str, **kwargs: Any) -> Dict[str, Any]:
rc = gh_fetch_url(
"https://api.github.com/graphql",
data={"query": query, "variables": kwargs},
reader=json.load,
)
if "errors" in rc:
raise RuntimeError(
f"GraphQL query {query}, args {kwargs} failed: {rc['errors']}"
)
return cast(Dict[str, Any], rc)
def gh_get_pr_info(org: str, proj: str, pr_no: int) -> Any:
rc = gh_graphql(GH_GET_PR_INFO_QUERY, name=proj, owner=org, number=pr_no)
return rc["data"]["repository"]["pullRequest"]
@ -608,6 +598,7 @@ def parse_args() -> Any:
parser.add_argument("--revert", action="store_true")
parser.add_argument("--force", action="store_true")
parser.add_argument("--ignore-current", action="store_true")
parser.add_argument("--check-mergeability", action="store_true")
parser.add_argument("--comment-id", type=int)
parser.add_argument("--reason", type=str)
parser.add_argument("pr_num", type=int)
@ -745,7 +736,7 @@ class GitHubPR:
# work for ghstack where the base is the custom branch, i.e. gh/USER/ID/base,
# so let's just use main instead
self.merge_base = gh_fetch_merge_base(
self.org, self.project, last_commit_oid, "main"
self.org, self.project, last_commit_oid, self.default_branch()
)
# Fallback to baseRefOid if the API call fails, i.e. rate limit. Note that baseRefOid
@ -845,14 +836,14 @@ class GitHubPR:
def add_authors(info: Dict[str, Any]) -> None:
for node in info["commits_with_authors"]["nodes"]:
author_node = node["commit"]["author"]
user_node = author_node["user"]
author = f"{author_node['name']} <{author_node['email']}>"
if user_node is None:
# If author is not github user, user node will be null
authors.append(("", author))
else:
authors.append((cast(str, user_node["login"]), author))
for author_node in node["commit"]["authors"]["nodes"]:
user_node = author_node["user"]
author = f"{author_node['name']} <{author_node['email']}>"
if user_node is None:
# If author is not github user, user node will be null
authors.append(("", author))
else:
authors.append((cast(str, user_node["login"]), author))
info = self.info
for _ in range(100):
@ -948,11 +939,6 @@ class GitHubPR:
def get_authors(self) -> Dict[str, str]:
rc = {}
# TODO: replace with `self.get_commit_count()` when GraphQL pagination can be used
# to fetch all commits, see https://gist.github.com/malfet/4f35321b0c9315bcd7116c7b54d83372
# and https://support.github.com/ticket/enterprise/1642/1659119
if self.get_commit_count() <= 250:
assert len(self._fetch_authors()) == self.get_commit_count()
for idx in range(len(self._fetch_authors())):
rc[self.get_committer_login(idx)] = self.get_committer_author(idx)
@ -1068,6 +1054,7 @@ class GitHubPR:
repo: GitRepo,
skip_mandatory_checks: bool,
comment_id: Optional[int] = None,
skip_all_rule_checks: bool = False,
) -> List["GitHubPR"]:
assert self.is_ghstack_pr()
ghstack_prs = get_ghstack_prs(
@ -1082,7 +1069,7 @@ class GitHubPR:
commit_msg = pr.gen_commit_message(
filter_ghstack=True, ghstack_deps=pr_dependencies
)
if pr.pr_num != self.pr_num:
if pr.pr_num != self.pr_num and not skip_all_rule_checks:
# Raises exception if matching rule is not found
find_matching_merge_rule(
pr,
@ -1113,13 +1100,19 @@ class GitHubPR:
msg_body = re.sub(RE_GHSTACK_DESC, "", msg_body)
msg = self.get_title() + f" (#{self.pr_num})\n\n"
msg += msg_body
# Mention PR co-authors
for author_login, author_name in self.get_authors().items():
if author_login != self.get_pr_creator_login():
msg += f"\nCo-authored-by: {author_name}"
msg += f"\nPull Request resolved: {self.get_pr_url()}\n"
msg += f"Approved by: {approved_by_urls}\n"
if ghstack_deps:
msg += f"ghstack dependencies: {', '.join([f'#{pr.pr_num}' for pr in ghstack_deps])}\n"
return msg
def add_numbered_label(self, label_base: str) -> None:
def add_numbered_label(self, label_base: str, dry_run: bool) -> None:
labels = self.get_labels() if self.labels is not None else []
full_label = label_base
count = 0
@ -1127,7 +1120,7 @@ class GitHubPR:
if label_base in label:
count += 1
full_label = f"{label_base}X{count}"
gh_add_labels(self.org, self.project, self.pr_num, [full_label])
gh_add_labels(self.org, self.project, self.pr_num, [full_label], dry_run)
def merge_into(
self,
@ -1157,9 +1150,9 @@ class GitHubPR:
repo.push(self.default_branch(), dry_run)
if not dry_run:
self.add_numbered_label(MERGE_COMPLETE_LABEL)
self.add_numbered_label(MERGE_COMPLETE_LABEL, dry_run)
for pr in additional_merged_prs:
pr.add_numbered_label(MERGE_COMPLETE_LABEL)
pr.add_numbered_label(MERGE_COMPLETE_LABEL, dry_run)
if comment_id and self.pr_num:
# When the merge process reaches this part, we can assume that the commit
@ -1199,7 +1192,11 @@ class GitHubPR:
skip_mandatory_checks: bool = False,
comment_id: Optional[int] = None,
branch: Optional[str] = None,
skip_all_rule_checks: bool = False,
) -> List["GitHubPR"]:
"""
:param skip_all_rule_checks: If true, skips all rule checks, useful for dry-running merge locally
"""
branch_to_merge_into = self.default_branch() if branch is None else branch
if repo.current_branch() != branch_to_merge_into:
repo.checkout(branch_to_merge_into)
@ -1215,6 +1212,7 @@ class GitHubPR:
repo,
skip_mandatory_checks,
comment_id=comment_id,
skip_all_rule_checks=skip_all_rule_checks,
)
@ -1669,7 +1667,19 @@ def get_classifications(
# going forward. It's preferable to try calling Dr.CI API directly first
# to get the latest results as well as update Dr.CI PR comment
drci_classifications = get_drci_classifications(pr_num=pr_num, project=project)
print(f"From Dr.CI API: {json.dumps(drci_classifications)}")
def get_readable_drci_results(drci_classifications: Any) -> str:
try:
s = f"From Dr.CI API ({pr_num}):\n"
for classification, jobs in drci_classifications.items():
s += f" {classification}: \n"
for job in jobs:
s += f" {job['id']} {job['name']}\n"
return s
except Exception:
return f"From Dr.CI API: {json.dumps(drci_classifications)}"
print(get_readable_drci_results(drci_classifications))
# NB: if the latest results from Dr.CI is not available, i.e. when calling from
# SandCastle, we fallback to any results we can find on Dr.CI check run summary
@ -1882,8 +1892,8 @@ def do_revert_prs(
pr.org, pr.project, pr.pr_num, revert_message, dry_run=dry_run
)
pr.add_numbered_label("reverted", dry_run)
if not dry_run:
pr.add_numbered_label("reverted")
gh_post_commit_comment(pr.org, pr.project, commit_sha, revert_msg)
gh_update_pr_state(pr.org, pr.project, pr.pr_num)
@ -2053,7 +2063,7 @@ def merge(
print(f"Attempting merge of {initial_commit_sha} ({pr_link})")
if MERGE_IN_PROGRESS_LABEL not in pr.get_labels():
gh_add_labels(pr.org, pr.project, pr.pr_num, [MERGE_IN_PROGRESS_LABEL])
gh_add_labels(pr.org, pr.project, pr.pr_num, [MERGE_IN_PROGRESS_LABEL], dry_run)
explainer = TryMergeExplainer(
skip_mandatory_checks,
@ -2073,8 +2083,7 @@ def merge(
check_for_sev(pr.org, pr.project, skip_mandatory_checks)
if skip_mandatory_checks or can_skip_internal_checks(pr, comment_id):
# do not wait for any pending signals if PR is closed as part of co-development process
if skip_mandatory_checks:
gh_post_pr_comment(
pr.org,
pr.project,
@ -2201,8 +2210,7 @@ def merge(
# Finally report timeout back
msg = f"Merged timed out after {timeout_minutes} minutes. Please contact the pytorch_dev_infra team."
msg += f"The last exception was: {last_exception}"
if not dry_run:
gh_add_labels(pr.org, pr.project, pr.pr_num, ["land-failed"])
gh_add_labels(pr.org, pr.project, pr.pr_num, ["land-failed"], dry_run)
raise RuntimeError(msg)
@ -2281,6 +2289,16 @@ def main() -> None:
)
return
if args.check_mergeability:
if pr.is_ghstack_pr():
get_ghstack_prs(repo, pr) # raises error if out of sync
pr.merge_changes(
repo,
skip_mandatory_checks=True,
skip_all_rule_checks=True,
)
return
if not args.force and pr.has_invalid_submodule_updates():
message = (
f"This PR updates submodules {', '.join(pr.get_changed_submodules())}\n"
@ -2329,7 +2347,10 @@ def main() -> None:
else:
print("Missing comment ID or PR number, couldn't upload to Rockset")
finally:
gh_remove_label(org, project, args.pr_num, MERGE_IN_PROGRESS_LABEL)
if not args.check_mergeability:
gh_remove_label(
org, project, args.pr_num, MERGE_IN_PROGRESS_LABEL, args.dry_run
)
if __name__ == "__main__":

View File

@ -169,6 +169,9 @@ jobs:
NUM_TEST_SHARDS: ${{ matrix.num_shards }}
REENABLED_ISSUES: ${{ steps.keep-going.outputs.reenabled-issues }}
CONTINUE_THROUGH_ERROR: ${{ steps.keep-going.outputs.keep-going }}
VERBOSE_TEST_LOGS: ${{ steps.keep-going.outputs.ci-verbose-test-logs }}
NO_TEST_TIMEOUT: ${{ steps.keep-going.outputs.ci-no-test-timeout }}
NO_TD: ${{ steps.keep-going.outputs.ci-no-td }}
SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2
SCCACHE_S3_KEY_PREFIX: ${{ github.workflow }}
SHM_SIZE: ${{ contains(inputs.build-environment, 'cuda') && '2g' || '1g' }}
@ -218,6 +221,9 @@ jobs:
-e NUM_TEST_SHARDS \
-e REENABLED_ISSUES \
-e CONTINUE_THROUGH_ERROR \
-e VERBOSE_TEST_LOGS \
-e NO_TEST_TIMEOUT \
-e NO_TD \
-e PR_LABELS \
-e MAX_JOBS="$(nproc --ignore=2)" \
-e SCCACHE_BUCKET \

View File

@ -34,12 +34,14 @@ jobs:
test-matrix: ${{ steps.filter.outputs.test-matrix }}
is-test-matrix-empty: ${{ steps.filter.outputs.is-test-matrix-empty }}
keep-going: ${{ steps.filter.outputs.keep-going }}
ci-verbose-test-logs: ${{ steps.filter.outputs.ci-verbose-test-logs }}
ci-no-test-timeout: ${{ steps.filter.outputs.ci-no-test-timeout }}
ci-no-td: ${{ steps.filter.outputs.ci-no-td }}
reenabled-issues: ${{ steps.filter.outputs.reenabled-issues }}
steps:
- name: Checkout PyTorch
uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
with:
fetch-depth: 1
submodules: false
- name: Select all requested test configurations
@ -95,6 +97,9 @@ jobs:
PY_VERS: 3.9
PR_BODY: ${{ github.event.pull_request.body }}
CONTINUE_THROUGH_ERROR: ${{ needs.filter.outputs.keep-going }}
VERBOSE_TEST_LOGS: ${{ needs.filter.outputs.ci-verbose-test-logs }}
NO_TEST_TIMEOUT: ${{ needs.filter.outputs.ci-no-test-timeout }}
NO_TD: ${{ needs.filter.outputs.ci-no-td }}
PIP_REQUIREMENTS_FILE: .github/requirements/pip-requirements-${{ runner.os }}.txt
REENABLED_ISSUES: ${{ needs.filter.outputs.reenabled-issues }}
run: |

View File

@ -148,6 +148,9 @@ jobs:
PYTORCH_TEST_CUDA_MEM_LEAK_CHECK: ${{ matrix.mem_leak_check && '1' || '0' }}
PYTORCH_TEST_RERUN_DISABLED_TESTS: ${{ matrix.rerun_disabled_tests && '1' || '0' }}
CONTINUE_THROUGH_ERROR: ${{ steps.keep-going.outputs.keep-going }}
VERBOSE_TEST_LOGS: ${{ steps.keep-going.outputs.ci-verbose-test-logs }}
NO_TEST_TIMEOUT: ${{ steps.keep-going.outputs.ci-no-test-timeout }}
NO_TD: ${{ steps.keep-going.outputs.ci-no-td }}
PIP_REQUIREMENTS_FILE: .github/requirements/pip-requirements-${{ runner.os }}.txt
GITHUB_REPOSITORY: ${{ github.repository }}
GITHUB_WORKFLOW: ${{ github.workflow }}

View File

@ -71,6 +71,7 @@ jobs:
with:
role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_s3_and_ecr_read_only
aws-region: us-east-1
role-duration-seconds: 18000
- name: Login to Amazon ECR
id: login-ecr
@ -148,6 +149,9 @@ jobs:
BRANCH: ${{ steps.parse-ref.outputs.branch }}
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
CONTINUE_THROUGH_ERROR: ${{ steps.keep-going.outputs.keep-going }}
VERBOSE_TEST_LOGS: ${{ steps.keep-going.outputs.ci-verbose-test-logs }}
NO_TEST_TIMEOUT: ${{ steps.keep-going.outputs.ci-no-test-timeout }}
NO_TD: ${{ steps.keep-going.outputs.ci-no-td }}
TEST_CONFIG: ${{ matrix.config }}
SHARD_NUMBER: ${{ matrix.shard }}
NUM_TEST_SHARDS: ${{ matrix.num_shards }}
@ -196,6 +200,9 @@ jobs:
-e NUM_TEST_SHARDS \
-e REENABLED_ISSUES \
-e CONTINUE_THROUGH_ERROR \
-e VERBOSE_TEST_LOGS \
-e NO_TEST_TIMEOUT \
-e NO_TD \
-e MAX_JOBS="$(nproc --ignore=2)" \
-e SCCACHE_BUCKET \
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \

View File

@ -128,6 +128,7 @@ jobs:
PYTHON_VERSION: "3.8"
SCCACHE_BUCKET: "ossci-compiler-cache"
SCCACHE_S3_KEY_PREFIX: ${{ github.workflow }}
SCCACHE_REGION: us-east-1
VC_PRODUCT: "BuildTools"
VC_VERSION: ""
VC_YEAR: "2019"

View File

@ -25,7 +25,7 @@ on:
timeout-minutes:
required: false
type: number
default: 300
default: 240
description: |
Set the maximum (in minutes) how long the workflow should take to finish
@ -132,14 +132,26 @@ jobs:
test-matrix: ${{ inputs.test-matrix }}
job-name: ${{ steps.get-job-id.outputs.job-name }}
- name: Set Test step time
id: test-timeout
shell: bash
env:
JOB_TIMEOUT: ${{ matrix.mem_leak_check == 'mem_leak_check' && 600 || inputs.timeout-minutes }}
run: |
echo "timeout=$((JOB_TIMEOUT-30))" >> "${GITHUB_OUTPUT}"
- name: Test
id: test
shell: bash
timeout-minutes: ${{ fromJson(steps.test-timeout.outputs.timeout) }}
env:
USE_CUDA: ${{ inputs.cuda-version != 'cpu' && '1' || '0' }}
INSTALL_WINDOWS_SDK: 1
PYTHON_VERSION: 3.8
CONTINUE_THROUGH_ERROR: ${{ steps.keep-going.outputs.keep-going }}
VERBOSE_TEST_LOGS: ${{ steps.keep-going.outputs.ci-verbose-test-logs }}
NO_TEST_TIMEOUT: ${{ steps.keep-going.outputs.ci-no-test-timeout }}
NO_TD: ${{ steps.keep-going.outputs.ci-no-td }}
VC_PRODUCT: "BuildTools"
VC_VERSION: ""
VS_VERSION: "16.8.6"

View File

@ -143,6 +143,9 @@ jobs:
PYTORCH_RETRY_TEST_CASES: 1
PYTORCH_OVERRIDE_FLAKY_SIGNAL: 1
CONTINUE_THROUGH_ERROR: ${{ steps.keep-going.outputs.keep-going }}
VERBOSE_TEST_LOGS: ${{ steps.keep-going.outputs.ci-verbose-test-logs }}
NO_TEST_TIMEOUT: ${{ steps.keep-going.outputs.ci-no-test-timeout }}
NO_TD: ${{ steps.keep-going.outputs.ci-no-td }}
TEST_CONFIG: ${{ matrix.config }}
SHARD_NUMBER: ${{ matrix.shard }}
NUM_TEST_SHARDS: ${{ matrix.num_shards }}
@ -185,6 +188,9 @@ jobs:
-e PYTORCH_RETRY_TEST_CASES \
-e PYTORCH_OVERRIDE_FLAKY_SIGNAL \
-e CONTINUE_THROUGH_ERROR \
-e VERBOSE_TEST_LOGS \
-e NO_TEST_TIMEOUT \
-e NO_TD \
-e MAX_JOBS="$(nproc --ignore=2)" \
-e SCCACHE_BUCKET \
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \

View File

@ -1,29 +1,84 @@
name: Check mergeability and dependencies for ghstack prs
name: Check mergeability of ghstack PR
on:
pull_request:
types: [opened, synchronize, reopened, edited]
types: [opened, synchronize, reopened]
branches: [gh/**/base]
jobs:
check-regex:
ghstack-mergeability-check:
runs-on: ubuntu-latest
outputs:
regex-match: ${{ steps.regex-match.outputs.match }}
steps:
- uses: actions/checkout@v4
- id: regex-match
uses: actions-ecosystem/action-regex-match@d50fd2e7a37d0e617aea3d7ada663bd56862b9cc
with:
text: ${{ github.head_ref }}
regex: '^(gh/[^/]+/[0-9]+/)head$'
fetch-depth: 0
- name: Setup git
shell: bash
run: |
git config --global user.email "pytorchmergebot@users.noreply.github.com"
git config --global user.name "PyTorch MergeBot"
git fetch origin main:main
- name: Wait for orig branch
shell: bash
run: |
BRANCH="${{ github.base_ref }}"
echo "$BRANCH"
BRANCH="${BRANCH%/base}/orig"
echo "$BRANCH"
WAIT_SECONDS=300
END_WAIT=$((SECONDS+WAIT_SECONDS))
BRANCH_EXISTS=0
while [ $SECONDS -lt $END_WAIT ]; do
git fetch --prune origin "${BRANCH}" || true
if git rev-parse --verify "origin/${BRANCH}"; then
BRANCH_EXISTS=1
break
fi
echo "Waiting for branch ${BRANCH} to exist..."
sleep 30 # Wait for 30 seconds before retrying
done
if [ $BRANCH_EXISTS -eq 0 ]; then
echo "Branch ${BRANCH} not found after ${WAIT_SECONDS} seconds."
echo "Mergeability check failed for infrastructure reasons."
exit 1
fi
- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: '3.8'
cache: pip
architecture: x64
- run: pip install pyyaml==6.0 rockset==1.0.3
shell: bash
- name: Verify mergeability
shell: bash
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
PR_NUM: ${{ github.event.pull_request.number }}
run: |
set -ex
python3 .github/scripts/trymerge.py --check-mergeability "${PR_NUM}"
- name: Print debug info
if: failure()
shell: bash
env:
PR_NUM: ${{ github.event.pull_request.number }}
run: |
{
echo "# PR $PR_NUM is not mergeable into main"
echo "To debug, run the diagnostic workflow:"
echo "https://github.com/pytorch/test-infra/actions/workflows/pr-dependencies-check.yml"
} >> "$GITHUB_STEP_SUMMARY"
pr-dependencies-check:
needs: check-regex
if: ${{ needs.check-regex.outputs.regex-match != '' }}
uses: pytorch/test-infra/.github/workflows/pr-dependencies-check.yml@main
with:
pr_number: ${{ github.event.pull_request.number }}
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}

57
.github/workflows/cherry-pick.yml vendored Normal file
View File

@ -0,0 +1,57 @@
name: Create a cherry pick from a PR
on:
repository_dispatch:
types: [try-cherry-pick]
jobs:
cherry-pick:
name: cherry-pick-pr-${{ github.event.client_payload.pr_num }}
runs-on: ubuntu-latest
environment: cherry-pick-bot
env:
GH_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
steps:
- name: Checkout repo
id: checkout
uses: actions/checkout@v3
with:
fetch-depth: 0
token: ${{ secrets.GH_PYTORCHBOT_CHERRY_PICK_TOKEN }}
- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: '3.11'
cache: pip
# Not the direct dependencies but the script uses trymerge
- run: pip install pyyaml==6.0 rockset==1.0.3
- name: Setup committer id
run: |
git config --global user.name "PyTorch Bot"
git config --global user.email "pytorchbot@users.noreply.github.com"
- name: Cherry pick the PR
shell: bash
env:
PR_NUM: ${{ github.event.client_payload.pr_num }}
BRANCH: ${{ github.event.client_payload.branch }}
CLASSIFICATION: ${{ github.event.client_payload.classification }}
FIXES: ${{ github.event.client_payload.fixes || '' }}
ACTOR: ${{ github.actor }}
GITHUB_TOKEN: ${{ secrets.GH_PYTORCHBOT_CHERRY_PICK_TOKEN }}
run: |
set -ex
python .github/scripts/cherry_pick.py \
--onto-branch "${BRANCH}" \
--classification "${CLASSIFICATION}" \
--fixes "${FIXES}" \
--github-actor "${ACTOR}" \
"${PR_NUM}"
concurrency:
group: cherry-pick-pr-${{ github.event.client_payload.pr_num }}
cancel-in-progress: true

View File

@ -15,6 +15,9 @@ jobs:
if: ${{ github.repository == 'pytorch/pytorch' }}
name: Create Release
runs-on: ubuntu-latest
# https://github.com/softprops/action-gh-release?tab=readme-ov-file#permissions
permissions:
contents: write
steps:
- uses: malfet/checkout@silent-checkout
with:

View File

@ -0,0 +1,39 @@
# A workflow that deletes branches of closed PRs
name: Delete old branches
on:
schedule:
# Run daily.
- cron: 30 1 * * *
workflow_dispatch:
concurrency:
group: delete-old-branches
cancel-in-progress: true
permissions:
contents: write
jobs:
delete:
if: ${{ github.repository == 'pytorch/pytorch' }}
runs-on: ubuntu-latest
steps:
- name: Checkout repo
uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: '3.8'
architecture: x64
check-latest: false
- name: Delete old branches
run: python .github/scripts/delete_old_branches.py
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

View File

@ -31,7 +31,7 @@ permissions: read-all
jobs:
docker-build:
runs-on: [self-hosted, linux.2xlarge]
runs-on: [self-hosted, linux.12xlarge]
environment: ${{ (github.ref == 'refs/heads/main' || startsWith(github.event.ref, 'refs/tags/v')) && 'docker-build' || '' }}
timeout-minutes: 240
strategy:
@ -43,6 +43,7 @@ jobs:
- docker-image-name: pytorch-linux-focal-cuda11.8-cudnn8-py3-gcc9
- docker-image-name: pytorch-linux-focal-py3.8-clang10
- docker-image-name: pytorch-linux-focal-py3.11-clang10
- docker-image-name: pytorch-linux-focal-py3.12-clang10
- docker-image-name: pytorch-linux-focal-rocm-n-1-py3
- docker-image-name: pytorch-linux-focal-rocm-n-py3
- docker-image-name: pytorch-linux-jammy-cuda11.8-cudnn8-py3.8-clang12

View File

@ -16,28 +16,28 @@ concurrency:
permissions: read-all
jobs:
linux-focal-rocm5_7-py3_8-inductor-build:
name: rocm5.7-py3.8-inductor
linux-focal-rocm6_0-py3_8-inductor-build:
name: rocm6.0-py3.8-inductor
uses: ./.github/workflows/_linux-build.yml
with:
build-environment: linux-focal-rocm5.7-py3.8
build-environment: linux-focal-rocm6.0-py3.8
docker-image-name: pytorch-linux-focal-rocm-n-py3
test-matrix: |
{ include: [
{ config: "inductor", shard: 1, num_shards: 1, runner: "linux.rocm.gpu.2" },
]}
linux-focal-rocm5_7-py3_8-inductor-test:
linux-focal-rocm6_0-py3_8-inductor-test:
permissions:
id-token: write
contents: read
name: rocm5.7-py3.8-inductor
name: rocm6.0-py3.8-inductor
uses: ./.github/workflows/_rocm-test.yml
needs: linux-focal-rocm5_7-py3_8-inductor-build
needs: linux-focal-rocm6_0-py3_8-inductor-build
with:
build-environment: linux-focal-rocm5.7-py3.8
docker-image: ${{ needs.linux-focal-rocm5_7-py3_8-inductor-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-rocm5_7-py3_8-inductor-build.outputs.test-matrix }}
build-environment: linux-focal-rocm6.0-py3.8
docker-image: ${{ needs.linux-focal-rocm6_0-py3_8-inductor-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-rocm6_0-py3_8-inductor-build.outputs.test-matrix }}
linux-focal-cuda12_1-py3_10-gcc9-inductor-build:
name: cuda12.1-py3.10-gcc9-sm86
@ -124,6 +124,7 @@ jobs:
{ config: "dynamic_cpu_inductor_timm", shard: 2, num_shards: 2, runner: "linux.12xlarge" },
{ config: "dynamic_cpu_inductor_torchbench", shard: 1, num_shards: 2, runner: "linux.12xlarge" },
{ config: "dynamic_cpu_inductor_torchbench", shard: 2, num_shards: 2, runner: "linux.12xlarge" },
{ config: "inductor_torchbench_cpu_smoketest_perf", shard: 1, num_shards: 1, runner: "linux.12xlarge" },
]}
secrets:
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}

View File

@ -6,8 +6,6 @@ on:
- opened
- synchronize
- reopened
- labeled
- unlabeled
branches-ignore:
- nightly
workflow_dispatch:

View File

@ -15,53 +15,36 @@ permissions: read-all
# The names of steps that actually test the code should be suffixed with `(nonretryable)`.
# When any other step fails, it's job will be retried once by retryBot.
jobs:
lintrunner:
lintrunner-clang:
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
with:
timeout: 120
runner: linux.2xlarge
docker-image: pytorch-linux-jammy-cuda11.8-cudnn8-py3.9-linter
# NB: A shallow checkout won't work here because calculate-docker-image requires a full checkout
# to run git rev-parse HEAD~:.ci/docker when a new image is needed
fetch-depth: 0
submodules: true
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
script: |
# The generic Linux job chooses to use base env, not the one setup by the image
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
conda activate "${CONDA_ENV}"
export ADDITIONAL_LINTRUNNER_ARGS="--take CLANGTIDY,CLANGFORMAT"
export CLANG=1
.github/scripts/lintrunner.sh
CACHE_DIRECTORY="/tmp/.lintbin"
# Try to recover the cached binaries
if [[ -d "${CACHE_DIRECTORY}" ]]; then
# It's ok to fail this as lintrunner init would download these binaries
# again if they do not exist
cp -r "${CACHE_DIRECTORY}" . || true
fi
# This has already been cached in the docker image
lintrunner init 2> /dev/null
# Do build steps necessary for linters
python3 -m tools.linter.clang_tidy.generate_build_files
python3 -m tools.generate_torch_version --is_debug=false
python3 -m tools.pyi.gen_pyi \
--native-functions-path aten/src/ATen/native/native_functions.yaml \
--tags-path aten/src/ATen/native/tags.yaml \
--deprecated-functions-path "tools/autograd/deprecated.yaml"
RC=0
# Run lintrunner on all files
if ! lintrunner --force-color --all-files --tee-json=lint.json 2> /dev/null; then
echo ""
echo -e "\e[1m\e[36mYou can reproduce these results locally by using \`lintrunner\`. (If you don't get the same results, run \'lintrunner init\' to update your local linter)\e[0m"
echo -e "\e[1m\e[36mSee https://github.com/pytorch/pytorch/wiki/lintrunner for setup instructions.\e[0m"
RC=1
fi
# Use jq to massage the JSON lint output into GitHub Actions workflow commands.
jq --raw-output \
'"::\(if .severity == "advice" or .severity == "disabled" then "warning" else .severity end) file=\(.path),line=\(.line),col=\(.char),title=\(.code) \(.name)::" + (.description | gsub("\\n"; "%0A"))' \
lint.json || true
exit $RC
lintrunner-noclang:
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
with:
timeout: 120
runner: linux.2xlarge
docker-image: pytorch-linux-jammy-cuda11.8-cudnn8-py3.9-linter
# NB: A shallow checkout won't work here because calculate-docker-image requires a full checkout
# to run git rev-parse HEAD~:.ci/docker when a new image is needed
fetch-depth: 0
submodules: true
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
script: |
export ADDITIONAL_LINTRUNNER_ARGS="--skip CLANGTIDY,CLANGFORMAT"
.github/scripts/lintrunner.sh
quick-checks:
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
@ -225,7 +208,7 @@ jobs:
cache: pip
- name: Install dependencies
run: |
pip install pytest-rerunfailures==11.1.* pytest-shard==0.1.* pytest-flakefinder==1.1.* pytest-xdist==3.3.* expecttest==0.1.* numpy==1.24.*
pip install pytest-rerunfailures==11.1.* pytest-flakefinder==1.1.* pytest-xdist==3.3.* expecttest==0.1.* numpy==1.24.*
pip install torch --pre --index-url https://download.pytorch.org/whl/nightly/cpu/
- name: Run run_test.py (nonretryable)
run: |

View File

@ -19,7 +19,7 @@ jobs:
with:
sync-tag: macos-12-py3-arm64-build
build-environment: macos-12-py3-arm64
runner-type: macos-m1-12
runner-type: macos-m1-stable
build-generates-artifacts: true
# To match the one pre-installed in the m1 runners
python-version: 3.9.12

View File

@ -196,11 +196,11 @@ jobs:
docker-image: ${{ needs.linux-vulkan-focal-py3_11-clang10-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-vulkan-focal-py3_11-clang10-build.outputs.test-matrix }}
linux-focal-rocm5_7-py3_8-build:
name: linux-focal-rocm5.7-py3.8
linux-focal-rocm6_0-py3_8-build:
name: linux-focal-rocm6.0-py3.8
uses: ./.github/workflows/_linux-build.yml
with:
build-environment: linux-focal-rocm5.7-py3.8
build-environment: linux-focal-rocm6.0-py3.8
docker-image-name: pytorch-linux-focal-rocm-n-py3
test-matrix: |
{ include: [
@ -208,14 +208,14 @@ jobs:
{ config: "distributed", shard: 2, num_shards: 2, runner: "linux.rocm.gpu" },
]}
linux-focal-rocm5_7-py3_8-test:
linux-focal-rocm6_0-py3_8-test:
permissions:
id-token: write
contents: read
name: linux-focal-rocm5.7-py3.8
name: linux-focal-rocm6.0-py3.8
uses: ./.github/workflows/_rocm-test.yml
needs: linux-focal-rocm5_7-py3_8-build
needs: linux-focal-rocm6_0-py3_8-build
with:
build-environment: linux-focal-rocm5.7-py3.8
docker-image: ${{ needs.linux-focal-rocm5_7-py3_8-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-rocm5_7-py3_8-build.outputs.test-matrix }}
build-environment: linux-focal-rocm6.0-py3.8
docker-image: ${{ needs.linux-focal-rocm6_0-py3_8-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-rocm6_0-py3_8-build.outputs.test-matrix }}

View File

@ -179,6 +179,29 @@ jobs:
docker-image: ${{ needs.linux-focal-py3_11-clang10-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-py3_11-clang10-build.outputs.test-matrix }}
linux-focal-py3_12-clang10-build:
name: linux-focal-py3.12-clang10
uses: ./.github/workflows/_linux-build.yml
with:
build-environment: linux-focal-py3.12-clang10
docker-image-name: pytorch-linux-focal-py3.12-clang10
test-matrix: |
{ include: [
{ config: "default", shard: 1, num_shards: 3, runner: "linux.2xlarge" },
{ config: "default", shard: 2, num_shards: 3, runner: "linux.2xlarge" },
{ config: "default", shard: 3, num_shards: 3, runner: "linux.2xlarge" },
]}
linux-focal-py3_12-clang10-test:
name: linux-focal-py3.12-clang10
uses: ./.github/workflows/_linux-test.yml
needs: linux-focal-py3_12-clang10-build
with:
build-environment: linux-focal-py3.12-clang10
docker-image: ${{ needs.linux-focal-py3_12-clang10-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-py3_12-clang10-build.outputs.test-matrix }}
timeout-minutes: 600
linux-focal-cuda11_8-py3_10-gcc9-build:
name: linux-focal-cuda11.8-py3.10-gcc9
uses: ./.github/workflows/_linux-build.yml
@ -233,7 +256,7 @@ jobs:
uses: ./.github/workflows/_linux-build.yml
with:
build-environment: linux-jammy-py3-clang12-mobile-build
docker-image-name: pytorch-linux-jammy-py3-clang12-asan
docker-image-name: pytorch-linux-jammy-py3-clang15-asan
build-generates-artifacts: false
test-matrix: |
{ include: [
@ -357,13 +380,13 @@ jobs:
{ config: "default", shard: 1, num_shards: 1 },
]}
linux-focal-rocm5_7-py3_8-build:
linux-focal-rocm6_0-py3_8-build:
# don't run build twice on main
if: github.event_name == 'pull_request'
name: linux-focal-rocm5.7-py3.8
name: linux-focal-rocm6.0-py3.8
uses: ./.github/workflows/_linux-build.yml
with:
build-environment: linux-focal-rocm5.7-py3.8
build-environment: linux-focal-rocm6.0-py3.8
docker-image-name: pytorch-linux-focal-rocm-n-py3
sync-tag: rocm-build
test-matrix: |

View File

@ -18,11 +18,11 @@ concurrency:
permissions: read-all
jobs:
linux-focal-rocm5_7-py3_8-build:
name: linux-focal-rocm5.7-py3.8
linux-focal-rocm6_0-py3_8-build:
name: linux-focal-rocm6.0-py3.8
uses: ./.github/workflows/_linux-build.yml
with:
build-environment: linux-focal-rocm5.7-py3.8
build-environment: linux-focal-rocm6.0-py3.8
docker-image-name: pytorch-linux-focal-rocm-n-py3
sync-tag: rocm-build
test-matrix: |
@ -35,14 +35,14 @@ jobs:
{ config: "default", shard: 6, num_shards: 6, runner: "linux.rocm.gpu.2" },
]}
linux-focal-rocm5_7-py3_8-test:
linux-focal-rocm6_0-py3_8-test:
permissions:
id-token: write
contents: read
name: linux-focal-rocm5.7-py3.8
name: linux-focal-rocm6.0-py3.8
uses: ./.github/workflows/_rocm-test.yml
needs: linux-focal-rocm5_7-py3_8-build
needs: linux-focal-rocm6_0-py3_8-build
with:
build-environment: linux-focal-rocm5.7-py3.8
docker-image: ${{ needs.linux-focal-rocm5_7-py3_8-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-rocm5_7-py3_8-build.outputs.test-matrix }}
build-environment: linux-focal-rocm6.0-py3.8
docker-image: ${{ needs.linux-focal-rocm6_0-py3_8-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-rocm6_0-py3_8-build.outputs.test-matrix }}

View File

@ -88,28 +88,28 @@ jobs:
docker-image: ${{ needs.linux-focal-py3_8-clang10-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-py3_8-clang10-build.outputs.test-matrix }}
linux-focal-rocm5_6-py3_8-build:
name: linux-focal-rocm5.6-py3.8
linux-focal-rocm6_0-py3_8-build:
name: linux-focal-rocm6.0-py3.8
uses: ./.github/workflows/_linux-build.yml
with:
build-environment: linux-focal-rocm5.6-py3.8
build-environment: linux-focal-rocm6.0-py3.8
docker-image-name: pytorch-linux-focal-rocm-n-py3
test-matrix: |
{ include: [
{ config: "slow", shard: 1, num_shards: 1, runner: "linux.rocm.gpu" },
]}
linux-focal-rocm5_6-py3_8-test:
linux-focal-rocm6_0-py3_8-test:
permissions:
id-token: write
contents: read
name: linux-focal-rocm5.6-py3.8
name: linux-focal-rocm6.0-py3.8
uses: ./.github/workflows/_rocm-test.yml
needs: linux-focal-rocm5_6-py3_8-build
needs: linux-focal-rocm6_0-py3_8-build
with:
build-environment: linux-focal-rocm5.6-py3.8
docker-image: ${{ needs.linux-focal-rocm5_6-py3_8-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-rocm5_6-py3_8-build.outputs.test-matrix }}
build-environment: linux-focal-rocm6.0-py3.8
docker-image: ${{ needs.linux-focal-rocm6_0-py3_8-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-rocm6_0-py3_8-build.outputs.test-matrix }}
linux-jammy-py3_10-clang15-asan-build:
name: linux-jammy-py3.10-clang15-asan

View File

@ -0,0 +1,152 @@
name: Index PyTorch Tests for Target Determination
on:
workflow_dispatch:
# TODO: Trigger every few hours
permissions:
id-token: write
contents: read
jobs:
index:
runs-on: linux.g5.4xlarge.nvidia.gpu # 1 GPU A10G 24GB each
environment: target-determinator-env
steps:
- name: Setup Linux
uses: ./.github/actions/setup-linux
- name: Calculate docker image
id: calculate-docker-image
uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
with:
docker-image-name: pytorch-linux-focal-cuda12.1-cudnn8-py3-gcc9
- name: Use following to pull public copy of the image
id: print-ghcr-mirror
env:
ECR_DOCKER_IMAGE: ${{ steps.calculate-docker-image.outputs.docker-image }}
shell: bash
run: |
tag=${ECR_DOCKER_IMAGE##*/}
echo "docker pull ghcr.io/pytorch/ci-image:${tag/:/-}"
- name: Pull docker image
uses: pytorch/test-infra/.github/actions/pull-docker-image@main
with:
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
id: install-nvidia-driver
uses: pytorch/test-infra/.github/actions/setup-nvidia@main
- name: Clone PyTorch
uses: actions/checkout@v3
with:
path: pytorch
- name: Clone CodeLlama
uses: actions/checkout@v3
with:
repository: osalpekar/codellama
ref: main
path: codellama
- name: Clone Target Determination Code
uses: actions/checkout@v3
with:
repository: osalpekar/llm-target-determinator
ref: v0.0.1
path: llm-target-determinator
- name: Install Requirements
shell: bash -l {0}
run: |
set -euxo pipefail
conda create \
--yes \
--quiet \
--name "tdenv" \
"python=3.9"
conda activate tdenv
cd "${GITHUB_WORKSPACE}"
pwd
cd llm-target-determinator
pip install -r requirements.txt
cd ../codellama
pip install -e .
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v3
with:
role-to-assume: arn:aws:iam::308535385114:role/gha_target_determinator_s3_read_write
aws-region: us-east-1
- name: Fetch CodeLlama Checkpoint
shell: bash -l {0}
run: |
set -euxo pipefail
conda activate tdenv
pip install awscli==1.32.18
cd codellama/
mkdir "CodeLlama-7b-Python"
aws s3 cp \
"s3://target-determinator-assets/CodeLlama-7b-Python" \
"CodeLlama-7b-Python" \
--recursive
- name: Run Indexer
id: indexer
shell: bash -l {0}
run: |
set -euxo pipefail
conda activate tdenv
cd "${GITHUB_WORKSPACE}"/llm-target-determinator
python create_filelist.py
torchrun \
--standalone \
--nnodes=1 \
--nproc-per-node=1 \
indexer.py \
--experiment-name indexer-files
- name: Upload Index to S3
shell: bash -l {0}
if: ${{ steps.indexer.outcome == 'success' }}
run: |
set -euxo pipefail
conda activate tdenv
cd "${GITHUB_WORKSPACE}"/llm-target-determinator/assets
TIMESTAMP=$(date -Iseconds)
ZIP_NAME = "indexer-files-${TIMESTAMP}.zip"
# Create a zipfile with all the generated indices
zip -r "${ZIP_NAME}" indexer-files
# Move the old index into the archived/ folder
aws s3 cp \
"s3://target-determinator-assets/indexes/latest/*" \
"s3://target-determinator-assets/indexes/archived/"
# Move the new index into the latestl/ folder
aws s3 cp \
"${ZIP_NAME}" \
"s3://target-determinator-assets/indexes/latest/${ZIP_NAME}"
# Note that because the above 2 operations are not atomic, there will
# be a period of a few seconds between these where there is no index
# present in the latest/ folder. To account for this, the retriever
# should have some retry logic with backoff to ensure fetching the
# index doesn't fail.
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
cancel-in-progress: true

View File

@ -95,7 +95,7 @@ jobs:
with:
sync-tag: macos-12-py3-arm64-build
build-environment: macos-12-py3-arm64
runner-type: macos-m1-12
runner-type: macos-m1-stable
build-generates-artifacts: true
# To match the one pre-installed in the m1 runners
python-version: 3.9.12
@ -177,11 +177,11 @@ jobs:
{ config: "force_on_cpu", shard: 1, num_shards: 1, runner: "windows.4xlarge.nonephemeral" },
]}
linux-focal-rocm5_7-py3_8-build:
name: linux-focal-rocm5.7-py3.8
linux-focal-rocm6_0-py3_8-build:
name: linux-focal-rocm6.0-py3.8
uses: ./.github/workflows/_linux-build.yml
with:
build-environment: linux-focal-rocm5.7-py3.8
build-environment: linux-focal-rocm6.0-py3.8
docker-image-name: pytorch-linux-focal-rocm-n-py3
sync-tag: rocm-build
test-matrix: |
@ -189,15 +189,15 @@ jobs:
{ config: "default", shard: 1, num_shards: 1, runner: "linux.rocm.gpu" },
]}
linux-focal-rocm5_7-py3_8-test:
linux-focal-rocm6_0-py3_8-test:
permissions:
id-token: write
contents: read
name: linux-focal-rocm5.7-py3.8
name: linux-focal-rocm6.0-py3.8
uses: ./.github/workflows/_rocm-test.yml
needs: linux-focal-rocm5_7-py3_8-build
needs: linux-focal-rocm6_0-py3_8-build
with:
build-environment: linux-focal-rocm5.7-py3.8
docker-image: ${{ needs.linux-focal-rocm5_7-py3_8-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-rocm5_7-py3_8-build.outputs.test-matrix }}
build-environment: linux-focal-rocm6.0-py3.8
docker-image: ${{ needs.linux-focal-rocm6_0-py3_8-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-rocm6_0-py3_8-build.outputs.test-matrix }}
tests-to-include: "test_nn test_torch test_cuda test_ops test_unary_ufuncs test_binary_ufuncs test_autograd inductor/test_torchinductor"

View File

@ -13,46 +13,13 @@ jobs:
do_update_viablestrict:
if: ${{ github.repository_owner == 'pytorch' }}
runs-on: ubuntu-20.04
environment: mergebot
environment: ${{ (github.event_name == 'schedule') && 'mergebot' || '' }}
steps:
- name: Checkout repo
uses: actions/checkout@v3
- name: Update viable/strict
uses: pytorch/test-infra/.github/actions/update-viablestrict@main
with:
fetch-depth: 0
token: ${{ secrets.MERGEBOT_TOKEN }}
- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: '3.8'
architecture: x64
check-latest: false
cache: pip
cache-dependency-path: |
**/.ci/docker/requirements-ci.txt
**/.github/requirements-gha-cache.txt
- name: Install Python Packages
run: |
pip3 install rockset==1.0.3
pip3 install boto3==1.19.12
- name: Get latest viable commit
env:
ROCKSET_API_KEY: ${{ secrets.ROCKSET_API_KEY }}
run: |
output=$(python3 .github/scripts/fetch_latest_green_commit.py)
echo "latest_viable_sha=$output" >> "${GITHUB_OUTPUT}"
id: get-latest-commit
- name: Push SHA to viable/strict branch
if: steps.get-latest-commit.outputs.latest_viable_sha != 'None'
env:
GITHUB_TOKEN: ${{ secrets.MERGEBOT_TOKEN }}
run: |
git config --global user.email "pytorchmergebot@users.noreply.github.com"
git config --global user.name "PyTorch MergeBot"
echo "Set the latest sha variable to be ${{ steps.get-latest-commit.outputs.latest_viable_sha }}"
# Pushing an older green commit here will fail because it's non-fast-forward, which is ok
# to ignore because we already have the later green commit in visable/strict
git push origin "${{ steps.get-latest-commit.outputs.latest_viable_sha }}":viable/strict || true
repository: pytorch/pytorch
stable-branch: viable/strict
requires: '[\"pull\", \"trunk\", \"lint\", \"linux-binary\"]'
secret-bot-token: ${{ secrets.MERGEBOT_TOKEN }}
rockset-api-key: ${{ secrets.ROCKSET_API_KEY }}

1
.gitignore vendored
View File

@ -126,6 +126,7 @@ env
.circleci/scripts/COMMIT_MSG
scripts/release_notes/*.json
sccache-stats*.json
lint.json
# These files get copied over on invoking setup.py
torchgen/packaged/*

View File

@ -1,5 +1,3 @@
merge_base_with = "origin/main"
[[linter]]
code = 'FLAKE8'
include_patterns = ['**/*.py']
@ -48,7 +46,7 @@ init_command = [
'mccabe==0.7.0',
'pycodestyle==2.11.1',
'pyflakes==3.1.0',
'torchfix==0.2.0',
'torchfix==0.4.0',
]
@ -66,6 +64,8 @@ include_patterns = [
'aten/src/ATen/native/**/Foreach*.*',
'aten/src/ATen/native/cuda/fused*.*',
'aten/src/ATen/native/cuda/Fused*.cu',
'aten/src/ATen/native/cudnn/*.h',
'aten/src/ATen/native/cudnn/*.cpp',
'c10/**/*.h',
'c10/**/*.cpp',
'torch/csrc/**/*.h',
@ -120,39 +120,6 @@ include_patterns = [
]
exclude_patterns = [
'**/fb/**',
'torch/include/**',
'torch/csrc/**',
'torch/_dynamo/**/*.py',
'torch/_inductor/**/*.py',
'torch/_numpy/**/*.py',
'torch/_functorch/aot_autograd.py',
'torch/_functorch/benchmark_utils.py',
'torch/_functorch/compile_utils.py',
'torch/_functorch/compilers.py',
'torch/_functorch/eager_transforms.py',
'torch/_functorch/fx_minifier.py',
'torch/_functorch/partitioners.py',
'torch/_functorch/top_operators_github_usage.py',
'torch/_functorch/vmap.py',
'torch/_subclasses/schema_check_mode.py',
'torch/distributed/elastic/agent/server/api.py',
'torch/testing/_internal/**',
'torch/distributed/fsdp/fully_sharded_data_parallel.py',
# TODO(suo): these exclusions were added just to get lint clean on master.
# Follow up to do more target suppressions and remove them.
'torch/ao/quantization/fx/convert.py',
'torch/ao/quantization/_dbr/function_fusion.py',
'test/test_datapipe.py',
'caffe2/contrib/fakelowp/test/test_batchmatmul_nnpi_fp16.py',
'test/test_numpy_interop.py',
'torch/torch_version.py',
'torch/fx/proxy.py',
'torch/fx/passes/shape_prop.py',
'torch/fx/node.py',
'torch/fx/experimental/symbolic_shapes.py',
'torch/fx/experimental/proxy_tensor.py',
'torch/_subclasses/fake_utils.py',
'torch/_subclasses/fake_tensor.py',
]
command = [
'python3',
@ -168,47 +135,22 @@ init_command = [
'numpy==1.24.3 ; python_version == "3.8"',
'numpy==1.26.0 ; python_version >= "3.9"',
'expecttest==0.1.6',
'mypy==1.7.0',
'mypy==1.8.0',
'sympy==1.11.1',
'types-requests==2.27.25',
'types-PyYAML==6.0.7',
'types-tabulate==0.8.8',
'types-protobuf==3.19.18',
'types-pkg-resources==0.1.3',
'types-Jinja2==2.11.9',
'types-colorama==0.4.6',
'filelock==3.13.1',
'junitparser==2.1.1',
'rich==10.9.0',
'pyyaml==6.0',
'optree==0.10.0',
]
[[linter]]
code = 'MYPYINDUCTOR'
include_patterns = [
'torch/_dynamo/**/*.py',
'torch/_inductor/**/*.py',
]
exclude_patterns = [
'**/fb/**',
'torch/_dynamo/backends/**/*.py',
'torch/_dynamo/variables/**/*.py',
'torch/_dynamo/polyfill.py',
'torch/_inductor/fx_passes/serialized_patterns/**',
]
command = [
'python3',
'tools/linter/adapters/mypy_linter.py',
'--config=mypy-inductor.ini',
'--code=MYPYINDUCTOR',
'--',
'@{{PATHSFILE}}'
]
init_command = [
'python3',
'tools/linter/adapters/pip_init.py',
'--dry-run={{DRYRUN}}',
'types-colorama==0.4.6',
]
[[linter]]
code = 'MYPYSTRICT'
include_patterns = [
@ -249,7 +191,8 @@ include_patterns = [
'c10/**/*.h',
# Enable coverage of headers in torch/csrc and excluding sub-directories for now.
'torch/csrc/*.h',
'torch/csrc/autograd/**/*.h',
'torch/csrc/*.cpp',
'torch/csrc/**/*.h',
'torch/csrc/**/*.cpp',
]
exclude_patterns = [
@ -258,6 +201,8 @@ exclude_patterns = [
# CUDA files are also excluded.
'**/fb/**',
'**/*pb.h',
'c10/**/cuda/*pp',
'aten/**/cuda/*pp',
'**/cuda/*pp',
'**/*XPU*',
'**/xpu/*pp',
@ -277,8 +222,6 @@ exclude_patterns = [
'third_party/**/*',
'torch/csrc/api/**',
'torch/csrc/autograd/generated/**',
'torch/csrc/autograd/profiler_legacy.cpp',
'torch/csrc/cuda/**',
'torch/csrc/dynamo/*',
'torch/csrc/distributed/**/*',
'torch/csrc/inductor/**/*',
@ -329,6 +272,26 @@ command = [
'@{{PATHSFILE}}'
]
[[linter]]
code = 'TYPENOSKIP'
include_patterns = ['mypy.ini']
command = [
'python3',
'tools/linter/adapters/grep_linter.py',
'--pattern=follow_imports\s*=\s*skip',
'--linter-name=TYPENOSKIP',
'--error-name=use of follow_imports = skip',
"""--error-description=\
follow_imports = skip is forbidden from mypy.ini configuration as it \
is extremely easy to accidentally turn off type checking unintentionally. If \
you need to suppress type errors, use a top level # mypy: ignore-errors. \
Do not rely on automatic Any substitution; instead, manually # type: ignore \
at use sites or define a pyi type stub with more relaxed types. \
""",
'--',
'@{{PATHSFILE}}'
]
[[linter]]
code = 'NOQA'
include_patterns = ['**/*.py', '**/*.pyi']
@ -1392,6 +1355,7 @@ exclude_patterns = [
'test/nn/test_embedding.py',
'test/nn/test_init.py',
'test/nn/test_lazy_modules.py',
'test/nn/test_load_state_dict.py',
'test/nn/test_module_hooks.py',
'test/nn/test_multihead_attention.py',
'test/nn/test_packed_sequence.py',
@ -1586,7 +1550,6 @@ exclude_patterns = [
'test/test_mkldnn_verbose.py',
'test/test_mobile_optimizer.py',
'test/test_model_dump.py',
'test/test_module_init.py',
'test/test_modules.py',
'test/test_monitor.py',
'test/test_mps.py',
@ -2689,7 +2652,7 @@ init_command = [
'python3',
'tools/linter/adapters/pip_init.py',
'--dry-run={{DRYRUN}}',
'ruff==0.1.11',
'ruff==0.2.2',
]
is_formatter = true

View File

@ -228,6 +228,7 @@ filegroup(
[
"aten/src/ATen/cuda/*.cpp",
"aten/src/ATen/cuda/detail/*.cpp",
"aten/src/ATen/cuda/tunable/*.cpp",
"aten/src/ATen/cudnn/*.cpp",
"aten/src/ATen/native/cuda/*.cpp",
"aten/src/ATen/native/cuda/linalg/*.cpp",

View File

@ -43,11 +43,15 @@ set(CMAKE_C_STANDARD 11 CACHE STRING "The C standard whose features are reques
# ---[ Utils
include(cmake/public/utils.cmake)
# --- [ Check that minimal gcc version is 9.4+
if(CMAKE_COMPILER_IS_GNUCXX AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 9.4)
message(FATAL "GCC-9.4 or newer is required to compile PyTorch, but found ${CMAKE_CXX_COMPILER_VERSION}")
# --- [ Check that minimal gcc version is 9.3+
if(CMAKE_COMPILER_IS_GNUCXX AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 9.3)
message(FATAL_ERROR "GCC-9.3 or newer is required to compile PyTorch, but found ${CMAKE_CXX_COMPILER_VERSION}")
endif()
# This define is needed to preserve behavior given anticpated changes to cccl/thrust
# https://nvidia.github.io/libcudacxx/standard_api/numerics_library/complex.html
string(APPEND CMAKE_CUDA_FLAGS "-DLIBCUDACXX_ENABLE_SIMPLIFIED_COMPLEX_OPERATIONS")
if(LINUX)
include(cmake/CheckAbi.cmake)
string(APPEND CMAKE_CXX_FLAGS " -D_GLIBCXX_USE_CXX11_ABI=${GLIBCXX_USE_CXX11_ABI}")
@ -347,6 +351,8 @@ cmake_dependent_option(
"NOT INTERN_BUILD_MOBILE" OFF)
cmake_dependent_option(
BUILD_FUNCTORCH "Build Functorch" ON "BUILD_PYTHON" OFF)
cmake_dependent_option(
BUILD_BUNDLE_PTXAS "Bundle PTX into torch/bin fodler" OFF "USE_CUDA" OFF)
option(USE_MIMALLOC "Use mimalloc" OFF)
# Enable third party mimalloc library to improve memory allocation performance on Windows.
@ -1237,3 +1243,12 @@ if(DEFINED USE_CUSTOM_DEBINFO)
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -g")
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -g")
endif()
# Bundle PTXAS if needed
if(BUILD_BUNDLE_PTXAS AND USE_CUDA)
if(NOT EXISTS "${PROJECT_SOURCE_DIR}/build/bin/ptxas")
message(STATUS "Copying PTXAS into the bin folder")
file(COPY "${CUDAToolkit_BIN_DIR}/ptxas" DESTINATION "${PROJECT_BINARY_DIR}")
endif()
install(PROGRAMS "${PROJECT_BINARY_DIR}/ptxas" DESTINATION "${CMAKE_INSTALL_BINDIR}")
endif()

View File

@ -97,9 +97,9 @@ test/functorch/test_ops.py @zou3519 @chillee @kshitij12345
test/functorch/test_vmap.py @zou3519 @chillee @kshitij12345
# torch MPS
test/test_mps.py @kulinseth
aten/src/ATen/mps/ @kulinseth
aten/src/ATen/native/mps/ @kulinseth
test/test_mps.py @kulinseth @malfet
aten/src/ATen/mps/ @kulinseth @malfet
aten/src/ATen/native/mps/ @kulinseth @malfet
# Profiler
torch/csrc/autograd/profiler* @aaronenyeshi
@ -130,3 +130,12 @@ caffe2/utils/hip @jeffdaily @jithunnair-amd
# torch.export
/torch/export/ @avikchaudhuri @gmagogsfm @tugsbayasgalan @zhxchen17
/torch/_export/ @avikchaudhuri @gmagogsfm @tugsbayasgalan @zhxchen17
# serialization-related files
/aten/src/ATen/MapAllocator* @mikaylagawarecki
/caffe2/serialize/ @mikaylagawarecki
/torch/serialization.py @mikaylagawarecki
/torch/storage.py @mikaylagawarecki
/torch/csrc/Storage* @mikaylagawarecki
# subscribing for PyTorchFileWriter/PyTorchFileReader changes
/torch/csrc/jit/python/init.cpp @mikaylagawarecki

View File

@ -158,13 +158,13 @@ They require JetPack 4.2 and above, and [@dusty-nv](https://github.com/dusty-nv)
#### Prerequisites
If you are installing from source, you will need:
- Python 3.8 or later (for Linux, Python 3.8.1+ is needed)
- A compiler that fully supports C++17, such as clang or gcc (especially for aarch64, gcc 9.4.0 or newer is required)
- A compiler that fully supports C++17, such as clang or gcc (gcc 9.4.0 or newer is required)
We highly recommend installing an [Anaconda](https://www.anaconda.com/download) environment. You will get a high-quality BLAS library (MKL) and you get controlled dependency versions regardless of your Linux distro.
If you want to compile with CUDA support, [select a supported version of CUDA from our support matrix](https://pytorch.org/get-started/locally/), then install the following:
- [NVIDIA CUDA](https://developer.nvidia.com/cuda-downloads)
- [NVIDIA cuDNN](https://developer.nvidia.com/cudnn) v7 or above
- [NVIDIA cuDNN](https://developer.nvidia.com/cudnn) v8.5 or above
- [Compiler](https://gist.github.com/ax3l/9489132) compatible with CUDA
Note: You could refer to the [cuDNN Support Matrix](https://docs.nvidia.com/deeplearning/cudnn/pdf/cuDNN-Support-Matrix.pdf) for cuDNN versions with the various supported CUDA, CUDA driver and NVIDIA hardware

View File

@ -49,7 +49,7 @@ Following is the Release Compatibility Matrix for PyTorch releases:
| PyTorch version | Python | Stable CUDA | Experimental CUDA |
| --- | --- | --- | --- |
| 2.2 | >=3.8, <=3.11 | CUDA 11.8, CUDNN 8.7.0.84 | CUDA 12.1, CUDNN 8.9.2.26 |
| 2.2 | >=3.8, <=3.11, (3.12 experimental) | CUDA 11.8, CUDNN 8.7.0.84 | CUDA 12.1, CUDNN 8.9.2.26 |
| 2.1 | >=3.8, <=3.11 | CUDA 11.8, CUDNN 8.7.0.84 | CUDA 12.1, CUDNN 8.9.2.26 |
| 2.0 | >=3.8, <=3.11 | CUDA 11.7, CUDNN 8.5.0.96 | CUDA 11.8, CUDNN 8.7.0.84 |
| 1.13 | >=3.7, <=3.10 | CUDA 11.6, CUDNN 8.3.2.44 | CUDA 11.7, CUDNN 8.5.0.96 |

View File

@ -125,6 +125,15 @@ class Test(torch.jit.ScriptModule):
r = r.contiguous()
return r
@torch.jit.script_method
def conv3d(self, x: Tensor, w: Tensor, toChannelsLast: bool) -> Tensor:
r = torch.nn.functional.conv3d(x, w)
if toChannelsLast:
r = r.contiguous(memory_format=torch.channels_last_3d)
else:
r = r.contiguous()
return r
@torch.jit.script_method
def contiguous(self, x: Tensor) -> Tensor:
return x.contiguous()

View File

@ -348,15 +348,32 @@ public abstract class PytorchTestBase {
@Test
public void testChannelsLastConv2d() throws IOException {
long[] inputShape = new long[] {1, 3, 2, 2};
long[] dataNCHW = new long[] {1, 2, 3, 4, 11, 12, 13, 14, 101, 102, 103, 104};
Tensor inputNCHW = Tensor.fromBlob(dataNCHW, inputShape, MemoryFormat.CONTIGUOUS);
long[] dataNHWC = new long[] {1, 11, 101, 2, 12, 102, 3, 13, 103, 4, 14, 104};
Tensor inputNHWC = Tensor.fromBlob(dataNHWC, inputShape, MemoryFormat.CHANNELS_LAST);
long[] dataNCHW = new long[] {
111, 112,
121, 122,
211, 212,
221, 222,
311, 312,
321, 322};
Tensor inputNCHW = Tensor.fromBlob(dataNCHW, inputShape, MemoryFormat.CONTIGUOUS);
long[] dataNHWC = new long[] {
111, 211, 311, 112, 212, 312,
121, 221, 321, 122, 222, 322};
Tensor inputNHWC = Tensor.fromBlob(dataNHWC, inputShape, MemoryFormat.CHANNELS_LAST);
long[] weightShape = new long[] {3, 3, 1, 1};
long[] dataWeightOIHW = new long[] {2, 0, 0, 0, 1, 0, 0, 0, -1};
long[] dataWeightOIHW = new long[] {
2, 0, 0,
0, 1, 0,
0, 0, -1};
Tensor wNCHW = Tensor.fromBlob(dataWeightOIHW, weightShape, MemoryFormat.CONTIGUOUS);
long[] dataWeightOHWI = new long[] {2, 0, 0, 0, 1, 0, 0, 0, -1};
long[] dataWeightOHWI = new long[] {
2, 0, 0,
0, 1, 0,
0, 0, -1};
Tensor wNHWC = Tensor.fromBlob(dataWeightOHWI, weightShape, MemoryFormat.CHANNELS_LAST);
final Module module = loadModel(TEST_MODULE_ASSET_NAME);
@ -367,7 +384,15 @@ public abstract class PytorchTestBase {
outputNCHW,
MemoryFormat.CONTIGUOUS,
new long[] {1, 3, 2, 2},
new long[] {2, 4, 6, 8, 11, 12, 13, 14, -101, -102, -103, -104});
new long[] {
2*111, 2*112,
2*121, 2*122,
211, 212,
221, 222,
-311, -312,
-321, -322});
final IValue outputNHWC =
module.runMethod("conv2d", IValue.from(inputNHWC), IValue.from(wNHWC), IValue.from(true));
@ -375,7 +400,89 @@ public abstract class PytorchTestBase {
outputNHWC,
MemoryFormat.CHANNELS_LAST,
new long[] {1, 3, 2, 2},
new long[] {2, 11, -101, 4, 12, -102, 6, 13, -103, 8, 14, -104});
new long[] {
2*111, 211, -311, 2*112, 212, -312,
2*121, 221, -321, 2*122, 222, -322});
}
@Test
public void testChannelsLastConv3d() throws IOException {
long[] inputShape = new long[] {1, 3, 2, 2, 2};
long[] dataNCDHW = new long[] {
1111, 1112,
1121, 1122,
1211, 1212,
1221, 1222,
2111, 2112,
2121, 2122,
2211, 2212,
2221, 2222,
3111, 3112,
3121, 3122,
3211, 3212,
3221, 3222};
Tensor inputNCDHW = Tensor.fromBlob(dataNCDHW, inputShape, MemoryFormat.CONTIGUOUS);
long[] dataNDHWC = new long[] {
1111, 2111, 3111,
1112, 2112, 3112,
1121, 2121, 3121,
1122, 2122, 3122,
1211, 2211, 3211,
1212, 2212, 3212,
1221, 2221, 3221,
1222, 2222, 3222};
Tensor inputNDHWC = Tensor.fromBlob(dataNDHWC, inputShape, MemoryFormat.CHANNELS_LAST_3D);
long[] weightShape = new long[] {3, 3, 1, 1, 1};
long[] dataWeightOIDHW = new long[] {
2, 0, 0,
0, 1, 0,
0, 0, -1,
};
Tensor wNCDHW = Tensor.fromBlob(dataWeightOIDHW, weightShape, MemoryFormat.CONTIGUOUS);
long[] dataWeightODHWI = new long[] {
2, 0, 0,
0, 1, 0,
0, 0, -1,
};
Tensor wNDHWC = Tensor.fromBlob(dataWeightODHWI, weightShape, MemoryFormat.CHANNELS_LAST_3D);
final Module module = loadModel(TEST_MODULE_ASSET_NAME);
final IValue outputNCDHW =
module.runMethod("conv3d", IValue.from(inputNCDHW), IValue.from(wNCDHW), IValue.from(false));
assertIValueTensor(
outputNCDHW,
MemoryFormat.CONTIGUOUS,
new long[] {1, 3, 2, 2, 2},
new long[] {
2*1111, 2*1112, 2*1121, 2*1122,
2*1211, 2*1212, 2*1221, 2*1222,
2111, 2112, 2121, 2122,
2211, 2212, 2221, 2222,
-3111, -3112, -3121, -3122,
-3211, -3212, -3221, -3222});
final IValue outputNDHWC =
module.runMethod("conv3d", IValue.from(inputNDHWC), IValue.from(wNDHWC), IValue.from(true));
assertIValueTensor(
outputNDHWC,
MemoryFormat.CHANNELS_LAST_3D,
new long[] {1, 3, 2, 2, 2},
new long[] {
2*1111, 2111, -3111, 2*1112, 2112, -3112,
2*1121, 2121, -3121, 2*1122, 2122, -3122,
2*1211, 2211, -3211, 2*1212, 2212, -3212,
2*1221, 2221, -3221, 2*1222, 2222, -3222});
}
@Test

View File

@ -84,6 +84,15 @@ def conv2d(self, x: Tensor, w: Tensor, toChannelsLast: bool) -> Tensor:
r = r.contiguous()
return r
def conv3d(self, x: Tensor, w: Tensor, toChannelsLast: bool) -> Tensor:
r = torch.conv3d(x, w)
if (toChannelsLast):
# memory_format=torch.channels_last_3d
r = r.contiguous(memory_format=2)
else:
r = r.contiguous()
return r
def contiguous(self, x: Tensor) -> Tensor:
return x.contiguous()

View File

@ -4,7 +4,9 @@
#include <c10/core/ScalarType.h>
#include <c10/util/BFloat16.h>
#include <c10/util/Float8_e4m3fn.h>
#include <c10/util/Float8_e4m3fnuz.h>
#include <c10/util/Float8_e5m2.h>
#include <c10/util/Float8_e5m2fnuz.h>
#include <c10/util/Half.h>
// Defines the accumulation type for a scalar type.
@ -87,6 +89,8 @@ MPS_ACC_TYPE(BFloat16, float);
MPS_ACC_TYPE(Half, float);
MPS_ACC_TYPE(Float8_e5m2, float);
MPS_ACC_TYPE(Float8_e4m3fn, float);
MPS_ACC_TYPE(Float8_e5m2fnuz, float);
MPS_ACC_TYPE(Float8_e4m3fnuz, float);
MPS_ACC_TYPE(float, float);
MPS_ACC_TYPE(double, float);
MPS_ACC_TYPE(int8_t, int64_t);
@ -107,6 +111,8 @@ CUDA_ACC_TYPE(BFloat16, float);
CUDA_ACC_TYPE(Half, float);
CUDA_ACC_TYPE(Float8_e5m2, float);
CUDA_ACC_TYPE(Float8_e4m3fn, float);
CUDA_ACC_TYPE(Float8_e5m2fnuz, float);
CUDA_ACC_TYPE(Float8_e4m3fnuz, float);
CUDA_ACC_TYPE(float, float);
CUDA_ACC_TYPE(double, double);
CUDA_ACC_TYPE(int8_t, int64_t);
@ -123,8 +129,8 @@ CUDA_ACC_TYPE(c10::complex<double>, c10::complex<double>);
CPU_ACC_TYPE(BFloat16, float);
CPU_ACC_TYPE(Half, float);
CPU_ACC_TYPE(Float8_e5m2, float);
CPU_ACC_TYPE(Float8_e5m2fnuz, float);
CPU_ACC_TYPE(Float8_e4m3fn, float);
CPU_ACC_TYPE(Float8_e5m2fnuz, float);
CPU_ACC_TYPE(Float8_e4m3fnuz, float);
CPU_ACC_TYPE(float, double);
CPU_ACC_TYPE(double, double);

View File

@ -60,11 +60,11 @@ endif()
file(GLOB base_h "*.h" "detail/*.h" "cpu/*.h" "cpu/vec/vec512/*.h" "cpu/vec/vec256/*.h" "cpu/vec/vec256/vsx/*.h" "cpu/vec/vec256/zarch/*.h" "cpu/vec/*.h" "quantized/*.h" "functorch/*.h")
file(GLOB base_cpp "*.cpp" "detail/*.cpp" "cpu/*.cpp" "functorch/*.cpp")
file(GLOB cuda_h "cuda/*.h" "cuda/detail/*.h" "cuda/*.cuh" "cuda/detail/*.cuh")
file(GLOB cuda_cpp "cuda/*.cpp" "cuda/detail/*.cpp")
file(GLOB cuda_h "cuda/*.h" "cuda/detail/*.h" "cuda/*.cuh" "cuda/detail/*.cuh" "cuda/tunable/*.cuh" "cuda/tunable/*.h")
file(GLOB cuda_cpp "cuda/*.cpp" "cuda/detail/*.cpp" "cuda/tunable/*.cpp")
file(GLOB cuda_nvrtc_stub_h "cuda/nvrtc_stub/*.h")
file(GLOB cuda_nvrtc_stub_cpp "cuda/nvrtc_stub/*.cpp")
file(GLOB cuda_cu "cuda/*.cu" "cuda/detail/*.cu")
file(GLOB cuda_cu "cuda/*.cu" "cuda/detail/*.cu" "cuda/tunable/*.cu")
file(GLOB cudnn_h "cudnn/*.h" "cudnn/*.cuh")
file(GLOB cudnn_cpp "cudnn/*.cpp")
file(GLOB ops_h "ops/*.h")
@ -72,10 +72,10 @@ file(GLOB ops_h "ops/*.h")
file(GLOB xpu_h "xpu/*.h" "xpu/detail/*.h")
file(GLOB xpu_cpp "xpu/*.cpp" "xpu/detail/*.cpp")
file(GLOB hip_h "hip/*.h" "hip/detail/*.h" "hip/*.cuh" "hip/detail/*.cuh" "hip/impl/*.h")
file(GLOB hip_cpp "hip/*.cpp" "hip/detail/*.cpp" "hip/impl/*.cpp")
file(GLOB hip_h "hip/*.h" "hip/detail/*.h" "hip/*.cuh" "hip/detail/*.cuh" "hip/impl/*.h" "hip/tunable/*.cuh" "hip/tunable/*.h")
file(GLOB hip_cpp "hip/*.cpp" "hip/detail/*.cpp" "hip/impl/*.cpp" "hip/tunable/*.cpp")
list(REMOVE_ITEM hip_cpp "${CMAKE_CURRENT_SOURCE_DIR}/hip/detail/LazyNVRTC.cpp")
file(GLOB hip_hip "hip/*.hip" "hip/detail/*.hip" "hip/impl/*.hip")
file(GLOB hip_hip "hip/*.hip" "hip/detail/*.hip" "hip/impl/*.hip" "hip/tunable/*.hip")
file(GLOB hip_nvrtc_stub_h "hip/nvrtc_stub/*.h")
file(GLOB hip_nvrtc_stub_cpp "hip/nvrtc_stub/*.cpp")
file(GLOB miopen_h "miopen/*.h")
@ -141,6 +141,7 @@ file(GLOB native_sparse_cuda_cpp "native/sparse/cuda/*.cpp")
file(GLOB native_quantized_cuda_cu "native/quantized/cuda/*.cu")
file(GLOB native_quantized_cuda_cpp "native/quantized/cuda/*.cpp")
file(GLOB native_quantized_cudnn_cpp "native/quantized/cudnn/*.cpp")
file(GLOB native_nested_h "native/nested/*.h")
file(GLOB native_nested_cuda_cu "native/nested/cuda/*.cu")
file(GLOB native_nested_cuda_cpp "native/nested/cuda/*.cpp")
@ -449,19 +450,7 @@ if(NOT MSVC AND NOT EMSCRIPTEN AND NOT INTERN_BUILD_MOBILE)
set(DISABLE_SVE ON CACHE BOOL "Xcode's clang-12.5 crashes while trying to compile SVE code" FORCE)
endif()
endif()
if("${CMAKE_C_COMPILER_ID}" STREQUAL "GNU" AND
CMAKE_C_COMPILER_VERSION VERSION_GREATER 6.9 AND CMAKE_C_COMPILER_VERSION VERSION_LESS 8)
set(GCC_7 True)
else()
set(GCC_7 False)
endif()
if(GCC_7)
set(CMAKE_BUILD_TYPE Release) # Always build Sleef as a Release build to work around a gcc-7 bug
endif()
add_subdirectory("${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/sleef" ${CMAKE_BINARY_DIR}/sleef)
if(GCC_7)
set(CMAKE_BUILD_TYPE ${OLD_CMAKE_BUILD_TYPE})
endif()
set_property(TARGET sleef PROPERTY FOLDER "dependencies")
list(APPEND ATen_THIRD_PARTY_INCLUDE ${CMAKE_BINARY_DIR}/include)
link_directories(${CMAKE_BINARY_DIR}/sleef/lib)
@ -585,7 +574,7 @@ configure_file(ATenConfig.cmake.in "${CMAKE_CURRENT_BINARY_DIR}/cmake-exports/AT
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/cmake-exports/ATenConfig.cmake"
DESTINATION "${AT_INSTALL_SHARE_DIR}/cmake/ATen")
set(INSTALL_HEADERS ${base_h} ${ATen_CORE_HEADERS})
set(INSTALL_HEADERS ${base_h} ${ATen_CORE_HEADERS} ${native_nested_h})
if(NOT INTERN_BUILD_MOBILE)
list(APPEND INSTALL_HEADERS ${native_h} ${native_cpu_h} ${native_ao_sparse_h} ${native_quantized_h} ${cuda_h} ${native_cuda_h} ${native_hip_h} ${cudnn_h} ${hip_h} ${xpu_h} ${mps_h} ${native_mps_h} ${native_utils_h} ${miopen_h})
# Metal

View File

@ -133,6 +133,15 @@ void Context::setSDPUseMath(bool e) {
enabled_mathSDP = e;
}
bool Context::userEnabledCuDNNSDP() const {
return enabled_cudnnSDP;
}
void Context::setSDPUseCuDNN(bool e) {
enabled_cudnnSDP = e;
}
// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,modernize-avoid-c-arrays)
static const char cublas_config_var_name[] = "CUBLAS_WORKSPACE_CONFIG";
// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,modernize-avoid-c-arrays)

View File

@ -1,11 +1,13 @@
#pragma once
#include <ATen/CPUGeneratorImpl.h>
#include <ATen/DeviceAccelerator.h>
#include <ATen/LinalgBackend.h>
#include <ATen/core/ATenGeneral.h>
#include <ATen/core/DeprecatedTypeProperties.h>
#include <ATen/core/Generator.h>
#include <ATen/core/LegacyTypeDispatch.h>
#include <ATen/detail/AcceleratorHooksInterface.h>
#include <ATen/detail/CUDAHooksInterface.h>
#include <ATen/detail/HIPHooksInterface.h>
#include <ATen/detail/IPUHooksInterface.h>
@ -56,9 +58,26 @@ class TORCH_API Context {
AT_ERROR(c10::DeviceTypeName(device_type), " device type not enabled.");
}
}
const AcceleratorHooksInterface& getAcceleratorHooksInterface(
c10::optional<c10::DeviceType> opt_device_type = c10::nullopt) {
c10::DeviceType device_type = opt_device_type.has_value()
? opt_device_type.value()
: at::getAccelerator(true).value();
if (device_type == at::kCUDA) {
return at::detail::getCUDAHooks();
} else if (device_type == at::kMPS) {
return at::detail::getMPSHooks();
} else if (device_type == at::kPrivateUse1) {
return at::detail::getPrivateUse1Hooks();
} else {
AT_ERROR(
c10::DeviceTypeName(device_type), " device type not an accelerator.");
}
}
Device getDeviceFromPtr(void* data, c10::DeviceType device_type) {
initCUDAIfNeeded(device_type);
initHIPIfNeeded(device_type);
initXPUIfNeeded(device_type);
if (device_type == at::kCPU) {
return c10::DeviceType::CPU;
} else if (device_type == at::kCUDA) {
@ -131,6 +150,9 @@ class TORCH_API Context {
void lazyInitHIP() {
c10::call_once(thh_init, [&] { detail::getHIPHooks().initHIP(); });
}
void lazyInitXPU() {
c10::call_once(thx_init, [&] { detail::getXPUHooks().initXPU(); });
}
void lazyInitPrivateUse1() {
c10::call_once(thp_init, [&] {
if (isPrivateUse1HooksRegistered()) {
@ -181,6 +203,9 @@ class TORCH_API Context {
void setSDPUseMath(bool);
bool userEnabledMathSDP() const;
void setSDPUseCuDNN(bool);
bool userEnabledCuDNNSDP() const;
at::LinalgBackend linalgPreferredBackend() const;
void setLinalgPreferredBackend(at::LinalgBackend);
@ -307,9 +332,15 @@ class TORCH_API Context {
lazyInitHIP();
}
}
void initXPUIfNeeded(c10::DeviceType p) {
if (p == c10::DeviceType::XPU) {
lazyInitXPU();
}
}
static bool checkCuBLASConfigDeterministic();
c10::once_flag thc_init;
c10::once_flag thh_init;
c10::once_flag thx_init;
c10::once_flag thp_init;
bool enabled_cudnn = true;
bool deterministic_cudnn = false;
@ -319,6 +350,7 @@ class TORCH_API Context {
bool enabled_flashSDP = true;
bool enabled_mem_efficientSDP = true;
bool enabled_mathSDP = true;
bool enabled_cudnnSDP = false;
#ifdef USE_ROCM
bool benchmark_cudnn = true;
#else

View File

@ -0,0 +1,31 @@
#include <ATen/DeviceAccelerator.h>
#include <ATen/Context.h>
namespace at {
C10_API std::optional<DeviceType> getAccelerator(bool checked) {
#define CHECK_NO_CUDA \
TORCH_CHECK(!at::hasCUDA(), "Cannot have both CUDA and PrivateUse1");
#define CHECK_NO_PU1 \
TORCH_CHECK(!is_privateuse1_backend_registered(), "Cannot have both CUDA and PrivateUse1");
if (is_privateuse1_backend_registered()) {
// We explicitly allow PrivateUse1 and another device at the same time
// as we use this for testing.
// Whenever a PrivateUse1 device is registered, use it first.
return kPrivateUse1;
} else if (at::hasCUDA()) {
CHECK_NO_PU1
return kCUDA;
} else {
TORCH_CHECK(!checked, "Cannot access accelerator device when none is available.")
return std::nullopt;
}
#undef CHECK_NO_CUDA
#undef CHECK_NO_PU1
}
} // namespace at

View File

@ -0,0 +1,27 @@
#pragma once
#include <c10/core/DeviceType.h>
#include <c10/macros/Macros.h>
#include <ATen/detail/MTIAHooksInterface.h>
#include <optional>
// This file defines the top level Accelerator concept for PyTorch.
// A device is an accelerator per the definition here if:
// - It is mutually exclusive with all other accelerators
// - It performs asynchronous compute via a Stream/Event system
// - It provides a set of common APIs as defined by AcceleratorHooksInterface
//
// As of today, accelerator devices are (in no particular order):
// CUDA, MTIA, PrivateUse1
// We want to add once all the proper APIs are supported and tested:
// HIP, MPS, XPU
namespace at {
// Ensures that only one accelerator is available (at
// compile time if possible) and return it.
// When checked is true, the returned optional always has a value.
TORCH_API std::optional<c10::DeviceType> getAccelerator(bool checked = false);
} // namespace at

View File

@ -145,7 +145,7 @@ SymInt computeStorageNbytes(
// of the last element according to stride
SymInt size = 1;
for (const auto i : c10::irange(sizes.size())) {
if (sizes[i] == 0) {
if (TORCH_GUARD_SIZE_OBLIVIOUS(sizes[i].sym_eq(0))) {
return 0;
}

View File

@ -352,6 +352,41 @@ const char* FunctionalTensorWrapper::tensorimpl_type_name() const {
return "FunctionalTensorWrapper";
}
void FunctionalTensorWrapper::copy_tensor_metadata(
const FunctionalTensorWrapper* src_impl,
FunctionalTensorWrapper* dest_impl,
const c10::VariableVersion& version_counter,
bool allow_tensor_metadata_change) {
TensorImpl::copy_tensor_metadata(
src_impl,
dest_impl,
version_counter,
allow_tensor_metadata_change);
// FunctionalTensorWrapper-specific fields.
dest_impl->value_ = src_impl->value_;
dest_impl->level_ = src_impl->level_;
dest_impl->mutation_counter_ = src_impl->mutation_counter_;
dest_impl->mutation_hidden_from_autograd_counter_ = src_impl->mutation_hidden_from_autograd_counter_;
dest_impl->mutation_during_no_grad_or_inference_mode_ = src_impl->mutation_during_no_grad_or_inference_mode_;
dest_impl->has_metadata_mutation_ = src_impl->has_metadata_mutation_;
dest_impl->is_multi_output_view_ = src_impl->is_multi_output_view_;
dest_impl->was_storage_changed_ = src_impl->was_storage_changed_;
dest_impl->generation_ = src_impl->generation_;
dest_impl->view_metas_ = src_impl->view_metas_;
}
void FunctionalTensorWrapper::copy_tensor_metadata_and_refresh(
const FunctionalTensorWrapper* src_impl,
FunctionalTensorWrapper* dest_impl,
const c10::VariableVersion& version_counter,
bool allow_tensor_metadata_change) const {
copy_tensor_metadata(src_impl, dest_impl, version_counter, allow_tensor_metadata_change);
dest_impl->refresh_numel();
dest_impl->refresh_contiguous();
}
template <typename VariableVersion>
c10::intrusive_ptr<TensorImpl> FunctionalTensorWrapper::shallow_copy_and_detach_core(
VariableVersion&& version_counter,
@ -367,16 +402,11 @@ c10::intrusive_ptr<TensorImpl> FunctionalTensorWrapper::shallow_copy_and_detach_
}
auto impl = c10::make_intrusive<FunctionalTensorWrapper>(value_);
copy_tensor_metadata(
copy_tensor_metadata_and_refresh(
/*src_impl=*/this,
/*dest_impl=*/impl.get(),
/*version_counter=*/std::forward<VariableVersion>(version_counter),
/*allow_tensor_metadata_change=*/allow_tensor_metadata_change);
impl->level_ = level_;
impl->generation_ = generation_;
impl->view_metas_ = view_metas_;
impl->refresh_numel();
impl->refresh_contiguous();
return impl;
}
@ -394,6 +424,18 @@ c10::intrusive_ptr<TensorImpl> FunctionalTensorWrapper::shallow_copy_and_detach(
std::move(version_counter), allow_tensor_metadata_change);
}
void FunctionalTensorWrapper::shallow_copy_from(const c10::intrusive_ptr<TensorImpl>& impl) {
AT_ASSERT(has_compatible_shallow_copy_type(impl->key_set()));
auto functional_impl =
static_cast<FunctionalTensorWrapper*>(impl.get());
copy_tensor_metadata_and_refresh(
/*src_impl=*/functional_impl,
/*dest_impl=*/this,
/*version_counter=*/version_counter(),
/*allow_tensor_metadata_change=*/allow_tensor_metadata_change());
}
c10::Device FunctionalTensorWrapper::device_custom() const {
return value_.unsafeGetTensorImpl()->device();
}

View File

@ -211,6 +211,13 @@ struct TORCH_API FunctionalTensorWrapper : public c10::TensorImpl {
VariableVersion&& version_counter,
bool allow_tensor_metadata_change) const;
void shallow_copy_from(const c10::intrusive_ptr<TensorImpl>& impl) override;
void copy_tensor_metadata_and_refresh(
const FunctionalTensorWrapper* src_impl,
FunctionalTensorWrapper* dest_impl,
const c10::VariableVersion& version_counter,
bool allow_tensor_metadata_change) const;
// Note that value is not taken by reference: internally, the wrapper will
// change the value tensor that it points to over time.
Tensor value_;
@ -230,6 +237,13 @@ struct TORCH_API FunctionalTensorWrapper : public c10::TensorImpl {
size_t generation_ = 0;
std::vector<at::functionalization::ViewMeta> view_metas_;
protected:
static void copy_tensor_metadata(
const FunctionalTensorWrapper* src_impl,
FunctionalTensorWrapper* dest_impl,
const c10::VariableVersion& version_counter,
bool allow_tensor_metadata_change);
};
// Utility functions for the functionalization pass.

Some files were not shown because too many files have changed in this diff Show More