pytorch/.ci/pytorch/check_binary.sh

#!/bin/bash

# shellcheck disable=SC2086,SC2006,SC2207,SC2076,SC2155,SC2046,SC1091,SC2143
# TODO: Re-enable shellchecks above

set -eux -o pipefail

# This script checks the following things on binaries
# 1. The gcc abi matches DESIRED_DEVTOOLSET
# 2. MacOS binaries do not link against OpenBLAS
# 3. There are no protobuf symbols of any sort anywhere (turned off, because
#    this is currently not true)
# 4. Standard Python imports work
# 5. MKL is available everywhere except for MacOS wheels
# 6. XNNPACK is available everywhere except for MacOS wheels
# 7. CUDA is setup correctly and does not hang
# 8. Magma is available for CUDA builds
# 9. CuDNN is available for CUDA builds
#
# This script needs the env variables DESIRED_PYTHON, DESIRED_CUDA,
# DESIRED_DEVTOOLSET and PACKAGE_TYPE
#
# This script expects PyTorch to be installed into the active Python (the
# Python returned by `which python`). Or, if this is testing a libtorch
# Pythonless binary, then it expects to be in the root folder of the unzipped
# libtorch package.


if [[ -z ${DESIRED_PYTHON:-} ]]; then
  export DESIRED_PYTHON=${MATRIX_PYTHON_VERSION:-}
fi
if [[ -z ${DESIRED_CUDA:-} ]]; then
  export DESIRED_CUDA=${MATRIX_DESIRED_CUDA:-}
fi
if [[ -z ${DESIRED_DEVTOOLSET:-} ]]; then
  export DESIRED_DEVTOOLSET=${MATRIX_DESIRED_DEVTOOLSET:-}
fi
if [[ -z ${PACKAGE_TYPE:-} ]]; then
  export PACKAGE_TYPE=${MATRIX_PACKAGE_TYPE:-}
fi

# The install root depends on both the package type and the os
# All MacOS packages use conda, even for the wheel packages.
if [[ "$PACKAGE_TYPE" == libtorch ]]; then
  # NOTE: Only $PWD works on both CentOS and Ubuntu
  export install_root="$PWD"
else

  if [[ $DESIRED_PYTHON =~ ([0-9].[0-9]+)t ]]; then
    # For python that is maj.mint keep original version
    py_dot="$DESIRED_PYTHON"
  elif [[ $DESIRED_PYTHON =~ ([0-9].[0-9]+) ]];  then
    # Strip everything but major.minor from DESIRED_PYTHON version
    py_dot="${BASH_REMATCH[0]}"
  else
    echo "Unexpected ${DESIRED_PYTHON} format"
    exit 1
  fi
  export install_root="$(dirname $(which python))/../lib/python${py_dot}/site-packages/torch/"
fi

###############################################################################
# Check GCC ABI
###############################################################################

# NOTE: As of https://github.com/pytorch/pytorch/issues/126551 we only produce
#       wheels with cxx11-abi

echo "Checking that the gcc ABI is what we expect"
if [[ "$(uname)" != 'Darwin' ]]; then
  # We also check that there are cxx11 symbols in libtorch
  #
  echo "Checking that symbols in libtorch.so have the right gcc abi"
  python3 "$(dirname ${BASH_SOURCE[0]})/smoke_test/check_binary_symbols.py"

  echo "cxx11 symbols seem to be in order"
fi # if on Darwin

###############################################################################
# Check for no OpenBLAS
# TODO Check for no Protobuf symbols (not finished)
# Print *all* runtime dependencies
###############################################################################
# We have to loop through all shared libraries for this
if [[ "$(uname)" == 'Darwin' ]]; then
  all_dylibs=($(find "$install_root" -name '*.dylib'))
  for dylib in "${all_dylibs[@]}"; do
    echo "All dependencies of $dylib are $(otool -L $dylib) with rpath $(otool -l $dylib | grep LC_RPATH -A2)"

    # Check that OpenBlas is not linked to on Macs
    echo "Checking the OpenBLAS is not linked to"
    if [[ -n "$(otool -L $dylib | grep -i openblas)" ]]; then
      echo "ERROR: Found openblas as a dependency of $dylib"
      echo "Full dependencies is: $(otool -L $dylib)"
      exit 1
    fi

    # Check for protobuf symbols
    #proto_symbols="$(nm $dylib | grep protobuf)" || true
    #if [[ -n "$proto_symbols" ]]; then
    #  echo "ERROR: Detected protobuf symbols in $dylib"
    #  echo "Symbols are $proto_symbols"
    #  exit 1
    #fi
  done
else
  all_libs=($(find "$install_root" -name '*.so'))
  for lib in "${all_libs[@]}"; do
    echo "All dependencies of $lib are $(ldd $lib) with runpath $(objdump -p $lib | grep RUNPATH)"

    # Check for protobuf symbols
    #proto_symbols=$(nm $lib | grep protobuf) || true
    #if [[ -n "$proto_symbols" ]]; then
    #  echo "ERROR: Detected protobuf symbols in $lib"
    #  echo "Symbols are $proto_symbols"
    #  exit 1
    #fi
  done
fi

setup_link_flags () {
  REF_LIB="-Wl,-R${install_root}/lib"
  if [[ "$(uname)" == 'Darwin' ]]; then
    REF_LIB="-Wl,-rpath ${install_root}/lib"
  fi
  ADDITIONAL_LINKER_FLAGS=""
  if [[ "$(uname)" == 'Linux' ]]; then
    ADDITIONAL_LINKER_FLAGS="-Wl,--no-as-needed"
  fi
  C10_LINK_FLAGS=""
  if [ -f "${install_root}/lib/libc10.so" ] || [ -f "${install_root}/lib/libc10.dylib" ]; then
    C10_LINK_FLAGS="-lc10"
  fi
  TORCH_CPU_LINK_FLAGS=""
  if [ -f "${install_root}/lib/libtorch_cpu.so" ] || [ -f "${install_root}/lib/libtorch_cpu.dylib" ]; then
    TORCH_CPU_LINK_FLAGS="-ltorch_cpu"
  fi
  TORCH_CUDA_LINK_FLAGS=""
  if [ -f "${install_root}/lib/libtorch_cuda.so" ] || [ -f "${install_root}/lib/libtorch_cuda.dylib" ]; then
    TORCH_CUDA_LINK_FLAGS="-ltorch_cuda"
  elif [ -f "${install_root}/lib/libtorch_cuda_cpp.so" ] && [ -f "${install_root}/lib/libtorch_cuda_cpp.so" ] || \
    [ -f "${install_root}/lib/libtorch_cuda_cu.dylib" ] && [ -f "${install_root}/lib/libtorch_cuda_cu.dylib" ]; then
    TORCH_CUDA_LINK_FLAGS="-ltorch_cuda_cpp -ltorch_cuda_cu"
  fi
}

TEST_CODE_DIR="$(dirname $(realpath ${BASH_SOURCE[0]}))/test_example_code"
build_and_run_example_cpp () {
  setup_link_flags
  g++ ${TEST_CODE_DIR}/$1.cpp -I${install_root}/include -I${install_root}/include/torch/csrc/api/include -std=gnu++17 -L${install_root}/lib ${REF_LIB} ${ADDITIONAL_LINKER_FLAGS} -ltorch $TORCH_CPU_LINK_FLAGS $TORCH_CUDA_LINK_FLAGS $C10_LINK_FLAGS -o $1
  ./$1
}

###############################################################################
# Check simple Python/C++ calls
###############################################################################
if [[ "$PACKAGE_TYPE" == 'libtorch' ]]; then
  # NS: Set LD_LIBRARY_PATH for CUDA builds, but perhaps it should be removed
  if [[ "$DESIRED_CUDA" == "cu"* ]]; then
    export LD_LIBRARY_PATH=/usr/local/cuda/lib64
  fi
  build_and_run_example_cpp simple-torch-test
else
  pushd /tmp
  python -c 'import torch'
  popd
fi

###############################################################################
# Check torch.git_version
###############################################################################
if [[ "$PACKAGE_TYPE" != 'libtorch' ]]; then
  pushd /tmp
  python -c 'import torch; assert torch.version.git_version != "Unknown"'
  python -c 'import torch; assert torch.version.git_version != None'
  popd
fi


###############################################################################
# Check for MKL
###############################################################################

if [[ "$PACKAGE_TYPE" == 'libtorch' ]]; then
  echo "Checking that MKL is available"
  build_and_run_example_cpp check-torch-mkl
elif [[ "$(uname -m)" != "arm64" && "$(uname -m)" != "s390x" ]]; then
  if [[ "$(uname)" != 'Darwin' || "$PACKAGE_TYPE" != *wheel ]]; then
    if [[ "$(uname -m)" == "aarch64" ]]; then
      echo "Checking that MKLDNN is available on aarch64"
      pushd /tmp
      python -c 'import torch; exit(0 if torch.backends.mkldnn.is_available() else 1)'
      popd
    else
      echo "Checking that MKL is available"
      pushd /tmp
      python -c 'import torch; exit(0 if torch.backends.mkl.is_available() else 1)'
      popd
    fi
  fi
fi

###############################################################################
# Check for XNNPACK
###############################################################################

if [[ "$PACKAGE_TYPE" == 'libtorch' ]]; then
  echo "Checking that XNNPACK is available"
  build_and_run_example_cpp check-torch-xnnpack
else
  if [[ "$(uname)" != 'Darwin' || "$PACKAGE_TYPE" != *wheel ]] && [[ "$(uname -m)" != "s390x"  ]]; then
    echo "Checking that XNNPACK is available"
    pushd /tmp
    python -c 'import torch.backends.xnnpack; exit(0 if torch.backends.xnnpack.enabled else 1)'
    popd
  fi
fi

###############################################################################
# Check XPU configured correctly
###############################################################################
if [[ "$DESIRED_CUDA" == 'xpu' && "$PACKAGE_TYPE" != 'libtorch' ]]; then
  echo "Checking that xpu is compiled"
  python -c 'import torch; exit(0 if torch.xpu._is_compiled() else 1)'
fi

###############################################################################
# Check CUDA configured correctly
###############################################################################
# Skip these for Windows machines without GPUs
if [[ "$OSTYPE" == "msys" ]]; then
    GPUS=$(wmic path win32_VideoController get name)
    if [[ ! "$GPUS" == *NVIDIA* ]]; then
        echo "Skip CUDA tests for machines without a Nvidia GPU card"
        exit 0
    fi
fi

# Test that CUDA builds are setup correctly
if [[ "$DESIRED_CUDA" != 'cpu' && "$DESIRED_CUDA" != 'xpu' && "$DESIRED_CUDA" != 'cpu-cxx11-abi' && "$DESIRED_CUDA" != *"rocm"* && "$(uname -m)" != "s390x" ]]; then
  if [[ "$PACKAGE_TYPE" == 'libtorch' ]]; then
    build_and_run_example_cpp check-torch-cuda
  else
    pushd /tmp
    echo "Checking that CUDA archs are setup correctly"
    timeout 20 python -c 'import torch; torch.randn([3,5]).cuda()'

    # These have to run after CUDA is initialized

    echo "Checking that magma is available"
    python -c 'import torch; torch.rand(1).cuda(); exit(0 if torch.cuda.has_magma else 1)'

    echo "Checking that CuDNN is available"
    python -c 'import torch; exit(0 if torch.backends.cudnn.is_available() else 1)'

    # Validates builds is free of linker regressions reported in https://github.com/pytorch/pytorch/issues/57744
    echo "Checking that exception handling works"
    python -c "import torch; from unittest import TestCase;TestCase().assertRaises(RuntimeError, lambda:torch.eye(7, 7, device='cuda:7'))"

    echo "Checking that basic RNN works"
    python ${TEST_CODE_DIR}/rnn_smoke.py

    echo "Checking that basic CNN works"
    python "${TEST_CODE_DIR}/cnn_smoke.py"

    echo "Test that linalg works"
    python -c "import torch;x=torch.rand(3,3,device='cuda');print(torch.linalg.svd(torch.mm(x.t(), x)))"

    popd
  fi # if libtorch
fi # if cuda

##########################
# Run parts of smoke tests
##########################
if [[ "$PACKAGE_TYPE" != 'libtorch' ]]; then
  pushd "$(dirname ${BASH_SOURCE[0]})/smoke_test"
  python -c "from smoke_test import test_linalg; test_linalg()"
  if [[ "$DESIRED_CUDA" == *cuda* ]]; then
    python -c "from smoke_test import test_linalg; test_linalg('cuda')"
  fi
  popd
fi

###############################################################################
# Check PyTorch supports TCP_TLS gloo transport
###############################################################################

if [[ "$(uname)" == 'Linux' && "$PACKAGE_TYPE" != 'libtorch' ]]; then
  GLOO_CHECK="import torch.distributed as dist
try:
    dist.init_process_group('gloo', rank=0, world_size=1)
except RuntimeError as e:
    print(e)
"
  RESULT=`GLOO_DEVICE_TRANSPORT=TCP_TLS MASTER_ADDR=localhost MASTER_PORT=63945 python -c "$GLOO_CHECK"`
  GLOO_TRANSPORT_IS_NOT_SUPPORTED='gloo transport is not supported'
  if [[ "$RESULT" =~ "$GLOO_TRANSPORT_IS_NOT_SUPPORTED" ]]; then
    echo "PyTorch doesn't support TLS_TCP transport, please build with USE_GLOO_WITH_OPENSSL=1"
    exit 1
  fi
fi