[docs] Add torch.package documentation for beta release (#59886 )

**Summary** This commit adds documentation for the `torch.package` module to accompany its beta release in 1.9. **Test Plan** Continous integration.
hold references to storages during TorchScript serializaiton (#59672 )
2025-10-30 19:54:53 +08:00 · 2021-06-11 13:43:27 -07:00 · 2021-06-11 13:42:58 -07:00 · 2021-06-11 10:38:31 -07:00 · 2021-06-11 10:04:14 -07:00 · 2021-06-04 20:01:02 -07:00
42 changed files with 1313 additions and 313 deletions
--- a/.circleci/docker/build.sh
+++ b/.circleci/docker/build.sh
@ -88,6 +88,7 @@ case "$image" in
    DB=yes
    VISION=yes
    KATEX=yes
+    BREAKPAD=yes
    ;;
  pytorch-linux-xenial-py3.6-gcc7.2)
    ANACONDA_PYTHON_VERSION=3.6
@ -100,6 +101,7 @@ case "$image" in
    PROTOBUF=yes
    DB=yes
    VISION=yes
+    BREAKPAD=yes
    ;;
  pytorch-linux-xenial-cuda10-cudnn7-py3-gcc7)
    CUDA_VERSION=10.0
@ -109,6 +111,7 @@ case "$image" in
    PROTOBUF=yes
    DB=yes
    VISION=yes
+    BREAKPAD=yes
    ;;
  pytorch-linux-xenial-cuda10.1-cudnn7-py3-gcc7)
    CUDA_VERSION=10.1
@ -119,6 +122,7 @@ case "$image" in
    DB=yes
    VISION=yes
    KATEX=yes
+    BREAKPAD=yes
    ;;
  pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7)
    CUDA_VERSION=10.2
@ -129,6 +133,7 @@ case "$image" in
    DB=yes
    VISION=yes
    KATEX=yes
+    BREAKPAD=yes
    ;;
  pytorch-linux-xenial-cuda11.1-cudnn8-py3-gcc7)
    CUDA_VERSION=11.1
@ -139,6 +144,7 @@ case "$image" in
    DB=yes
    VISION=yes
    KATEX=yes
+    BREAKPAD=yes
    ;;
  pytorch-linux-xenial-cuda11.3-cudnn8-py3-gcc7)
    CUDA_VERSION=11.3.0 # Deviating from major.minor to conform to nvidia's Docker image names
@ -149,6 +155,7 @@ case "$image" in
    DB=yes
    VISION=yes
    KATEX=yes
+    BREAKPAD=yes
    ;;
  pytorch-linux-xenial-py3-clang5-asan)
    ANACONDA_PYTHON_VERSION=3.6
@ -156,6 +163,7 @@ case "$image" in
    PROTOBUF=yes
    DB=yes
    VISION=yes
+    BREAKPAD=yes
    ;;
  pytorch-linux-xenial-py3-clang7-onnx)
    ANACONDA_PYTHON_VERSION=3.6
@ -163,6 +171,7 @@ case "$image" in
    PROTOBUF=yes
    DB=yes
    VISION=yes
+    BREAKPAD=yes
    ;;
  pytorch-linux-xenial-py3-clang5-android-ndk-r19c)
    ANACONDA_PYTHON_VERSION=3.6
@ -181,6 +190,7 @@ case "$image" in
    PROTOBUF=yes
    DB=yes
    VISION=yes
+    BREAKPAD=yes
    ;;
  pytorch-linux-bionic-py3.6-clang9)
    ANACONDA_PYTHON_VERSION=3.6
@ -188,6 +198,7 @@ case "$image" in
    PROTOBUF=yes
    DB=yes
    VISION=yes
+    BREAKPAD=yes
    VULKAN_SDK_VERSION=1.2.162.1
    SWIFTSHADER=yes
    ;;
@ -198,6 +209,7 @@ case "$image" in
    DB=yes
    VISION=yes
    BREAKPAD=yes
+    BREAKPAD=yes
    ;;
  pytorch-linux-bionic-cuda10.2-cudnn7-py3.6-clang9)
    CUDA_VERSION=10.2
@ -207,6 +219,7 @@ case "$image" in
    PROTOBUF=yes
    DB=yes
    VISION=yes
+    BREAKPAD=yes
    ;;
  pytorch-linux-bionic-cuda10.2-cudnn7-py3.8-gcc9)
    CUDA_VERSION=10.2
@ -216,6 +229,7 @@ case "$image" in
    PROTOBUF=yes
    DB=yes
    VISION=yes
+    BREAKPAD=yes
    ;;
  pytorch-linux-bionic-cuda10.2-cudnn7-py3.9-gcc7)
    CUDA_VERSION=10.2
@ -225,6 +239,7 @@ case "$image" in
    PROTOBUF=yes
    DB=yes
    VISION=yes
+    BREAKPAD=yes
    ;;
  pytorch-linux-bionic-cuda11.0-cudnn8-py3.6-gcc9)
    CUDA_VERSION=11.0
@ -234,6 +249,7 @@ case "$image" in
    PROTOBUF=yes
    DB=yes
    VISION=yes
+    BREAKPAD=yes
    ROCM_VERSION=3.9
    ;;
  pytorch-linux-bionic-rocm4.0.1-py3.6)
@ -242,6 +258,7 @@ case "$image" in
    PROTOBUF=yes
    DB=yes
    VISION=yes
+    BREAKPAD=yes
    ROCM_VERSION=4.0.1
    ;;
  pytorch-linux-bionic-rocm4.1-py3.6)
@ -250,6 +267,7 @@ case "$image" in
    PROTOBUF=yes
    DB=yes
    VISION=yes
+    BREAKPAD=yes
    ROCM_VERSION=4.1
    ;;
  pytorch-linux-bionic-rocm4.2-py3.6)
@ -258,6 +276,7 @@ case "$image" in
    PROTOBUF=yes
    DB=yes
    VISION=yes
+    BREAKPAD=yes
    ROCM_VERSION=4.2
    ;;
  *)
@ -265,6 +284,7 @@ case "$image" in
    PROTOBUF=yes
    DB=yes
    VISION=yes
+    BREAKPAD=yes
    echo "image '$image' did not match an existing build configuration"
    if [[ "$image" == *py* ]]; then
      extract_version_from_image_name py ANACONDA_PYTHON_VERSION
--- a/.circleci/docker/common/install_breakpad.sh
+++ b/.circleci/docker/common/install_breakpad.sh
@ -2,12 +2,24 @@

 set -ex

-git clone https://github.com/google/breakpad.git
-cd breakpad
+git clone https://github.com/driazati/breakpad.git
+pushd breakpad
+
+# breakpad has no actual releases, so this is pinned to the top commit from
+# main when this was forked (including the one patch commit). This uses a fork
+# of the breakpad mainline that automatically daisy-chains out to any previously
+# installed signal handlers (instead of overwriting them).
+git checkout 5485e473ed46d065e05489e50dfc59d90dfd7e22

 git clone https://chromium.googlesource.com/linux-syscall-support src/third_party/lss
+pushd src/third_party/lss
+# same as with breakpad, there are no real releases for this repo so use a
+# commit as the pin
+git checkout e1e7b0ad8ee99a875b272c8e33e308472e897660
+popd
+
 ./configure
 make
 make install
-cd ..
+popd
 rm -rf breakpad
--- a/.circleci/scripts/binary_linux_test.sh
+++ b/.circleci/scripts/binary_linux_test.sh
@ -38,6 +38,10 @@ if [[ "$DESIRED_CUDA" == "cu112" ]]; then
  EXTRA_CONDA_FLAGS="-c=conda-forge"
 fi

+# Move debug wheels out of the the package dir so they don't get installed
+mkdir -p /tmp/debug_final_pkgs
+mv /final_pkgs/debug-*.zip /tmp/debug_final_pkgs || echo "no debug packages to move"
+
 # Install the package
 # These network calls should not have 'retry's because they are installing
 # locally and aren't actually network calls
--- a/.circleci/scripts/binary_populate_env.sh
+++ b/.circleci/scripts/binary_populate_env.sh
@ -68,6 +68,18 @@ if [[ -z "$DOCKER_IMAGE" ]]; then
  fi
 fi

+USE_GOLD_LINKER="OFF"
+# GOLD linker can not be used if CUPTI is statically linked into PyTorch, see https://github.com/pytorch/pytorch/issues/57744
+if [[ ${DESIRED_CUDA} == "cpu" ]]; then
+  USE_GOLD_LINKER="ON"
+fi
+
+USE_WHOLE_CUDNN="OFF"
+# Link whole cuDNN for CUDA-11.1 to include fp16 fast kernels
+if [[  "$(uname)" == "Linux" && "${DESIRED_CUDA}" == "cu111" ]]; then
+  USE_WHOLE_CUDNN="ON"
+fi
+
 # Default to nightly, since that's where this normally uploads to
 PIP_UPLOAD_FOLDER='nightly/'
 # We put this here so that OVERRIDE_PACKAGE_VERSION below can read from it
@ -169,7 +181,9 @@ export CIRCLE_PR_NUMBER="${CIRCLE_PR_NUMBER:-}"
 export CIRCLE_BRANCH="$CIRCLE_BRANCH"
 export CIRCLE_WORKFLOW_ID="$CIRCLE_WORKFLOW_ID"

-export USE_GOLD_LINKER=1
+export USE_GOLD_LINKER="${USE_GOLD_LINKER}"
+export USE_GLOO_WITH_OPENSSL="ON"
+export USE_WHOLE_CUDNN="${USE_WHOLE_CUDNN}"
 # =================== The above code will be executed inside Docker container ===================
 EOL

--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -194,6 +194,9 @@ cmake_dependent_option(
 cmake_dependent_option(
    USE_STATIC_CUDNN "Use cuDNN static libraries" OFF
    "USE_CUDNN" OFF)
+cmake_dependent_option(
+  USE_WHOLE_CUDNN "Use whole-library linking for cuDNN" OFF
+    "USE_STATIC_CUDNN" OFF)
 option(USE_FBGEMM "Use FBGEMM (quantized 8-bit server operators)" ON)
 option(USE_KINETO "Use Kineto profiling library" ON)
 option(USE_CUPTI_SO "Use CUPTI as a shared library" OFF)
--- a/aten/src/ATen/DeviceGuard.h
+++ b/aten/src/ATen/DeviceGuard.h
@ -14,11 +14,11 @@ namespace at {
 //    OptionalDeviceGuard guard(device_of(tensor));

 /// Return the Device of a Tensor, if the Tensor is defined.
-inline optional<Device> device_of(const Tensor& t) {
+inline c10::optional<Device> device_of(const Tensor& t) {
  if (t.defined()) {
-    return make_optional(t.device());
+    return c10::make_optional(t.device());
  } else {
-    return nullopt;
+    return c10::nullopt;
  }
 }

@ -29,11 +29,11 @@ inline optional<Device> device_of(const optional<Tensor>& t) {
 /// Return the Device of a TensorList, if the list is non-empty and
 /// the first Tensor is defined.  (This function implicitly assumes
 /// that all tensors in the list have the same device.)
-inline optional<Device> device_of(TensorList t) {
+inline c10::optional<Device> device_of(TensorList t) {
  if (!t.empty()) {
    return device_of(t.front());
  } else {
-    return nullopt;
+    return c10::nullopt;
  }
 }

--- a/aten/src/ATen/core/NamedTensor.h
+++ b/aten/src/ATen/core/NamedTensor.h
@ -133,7 +133,6 @@ TORCH_API DimnameList get_names(const TensorImpl* impl);
 // tensor is constructed with names=None.
 TORCH_API c10::optional<DimnameList> get_opt_names(const TensorImpl* impl);

-
 } // namespace impl

 } // namespace at
--- a/aten/src/ATen/cpu/vec256/vec256_float_neon.h
+++ b/aten/src/ATen/cpu/vec256/vec256_float_neon.h
@ -479,23 +479,12 @@ public:
        vsqrtq_f32(values.val[1]));
  }
  Vec256<float> reciprocal() const {
-    float32x4_t r0 = vrecpeq_f32(values.val[0]);
-    float32x4_t r1 = vrecpeq_f32(values.val[1]);
-    // Run two more Netwon's method iterations to get more accurate results
-    r0 = vmulq_f32(vrecpsq_f32(values.val[0], r0), r0);
-    r0 = vmulq_f32(vrecpsq_f32(values.val[0], r0), r0);
-    r1 = vmulq_f32(vrecpsq_f32(values.val[1], r1), r1);
-    r1 = vmulq_f32(vrecpsq_f32(values.val[1], r1), r1);
+    auto r0 = vdivq_f32(vdupq_n_f32(1.0f), values.val[0]);
+    auto r1 = vdivq_f32(vdupq_n_f32(1.0f), values.val[1]);
    return Vec256<float>(r0, r1);
  }
  Vec256<float> rsqrt() const {
-    float32x4_t r0 =  vrsqrteq_f32(values.val[0]);
-    float32x4_t r1 =  vrsqrteq_f32(values.val[1]);
-    r0 = vmulq_f32(vrsqrtsq_f32(vmulq_f32(values.val[0], r0), r0), r0);
-    r0 = vmulq_f32(vrsqrtsq_f32(vmulq_f32(values.val[0], r0), r0), r0);
-    r1 = vmulq_f32(vrsqrtsq_f32(vmulq_f32(values.val[1], r1), r1), r1);
-    r1 = vmulq_f32(vrsqrtsq_f32(vmulq_f32(values.val[1], r1), r1), r1);
-    return Vec256<float>(r0, r1);
+    return this->sqrt().reciprocal();
  }
  Vec256<float> pow(const Vec256<float> &exp) const {
    __at_align32__ float tmp[size()];
--- a/aten/src/ATen/native/TensorProperties.cpp
+++ b/aten/src/ATen/native/TensorProperties.cpp
@ -47,12 +47,6 @@ bool cudnn_is_acceptable(const Tensor& self) {
  return true;
 }

-Tensor detach(const Tensor& self) {
-  // this just exists to give us a hook in VariableType and an entry in Declarations.yaml
-  //AT_ERROR("detach is not implemented for Tensor");
-  return self;
-}
-
 Tensor & detach_(Tensor & self) {
  // this just exists to give us a hook in VariableType and an entry in Declarations.yaml
  //AT_ERROR("detach_ is not implemented for Tensor");
--- a/aten/src/ATen/native/TensorShape.cpp
+++ b/aten/src/ATen/native/TensorShape.cpp
@ -2170,6 +2170,12 @@ Tensor alias(const Tensor& self) {
    return alias_with_sizes_and_strides(self, self.sizes(), self.strides());
 }

+Tensor detach(const Tensor& self) {
+  // this just exists to give us a hook in VariableType and an entry in Declarations.yaml
+  //AT_ERROR("detach is not implemented for Tensor");
+  return native::alias(self);
+}
+
 Tensor unfold(const Tensor& self, int64_t dimension, int64_t size, int64_t step) {
  // some special handling to deal with allow dimension == 0 when self.dim() == 0
  dimension = at::maybe_wrap_dim(dimension, self.dim(), /*wrap_scalar=*/true);
--- a/aten/src/ATen/native/cuda/Randperm.cu
+++ b/aten/src/ATen/native/cuda/Randperm.cu
@ -47,9 +47,9 @@ template <int N> struct alignas(N) OpaqueType { char data[N]; };

 Tensor& randperm_out_cuda(int64_t n, c10::optional<Generator> generator, Tensor& result) {
  TORCH_CHECK(n >= 0, "n must be non-negative, got", n);
-  TORCH_CHECK(!generator.has_value() || (generator.has_value() && result.device() == generator->device()), "Expected a '", result.device(), "' generator device but found '", generator->device(), "'");
  TORCH_CHECK(n <= std::numeric_limits<int>::max(),
    "randperm of tensors larger than INT_MAX is not supported yet in pytorch");
+
  check_supported_max_int_with_precision(n, result);

  result.resize_({n});
@ -73,13 +73,15 @@ Tensor& randperm_out_cuda(int64_t n, c10::optional<Generator> generator, Tensor&
  const double log_threshold_12 = std::log(0.9) * 12;
  double nd = static_cast<double>(n);

-  constexpr bool is_reduced_bits = true;
  int bits = std::min(64,
    static_cast<int>(std::ceil(std::log2(nd - (6 * nd * nd + 1) / log_threshold_12))));

  if (n == 0) {
    return result;
  } else if (bits <= 32) {
+    // For asserting device type match of the generator and result,
+    // we deligate that to the 'random_' function below.
+
    auto keys = at::empty(result.sizes(), opt.dtype(kInt)).random_(
      std::numeric_limits<int>::min(), std::numeric_limits<int>::max(), generator);
    auto keys_tmp = at::empty_like(keys);
--- a/c10/cuda/CUDAException.h
+++ b/c10/cuda/CUDAException.h
@ -33,17 +33,21 @@ class C10_CUDA_API CUDAError : public c10::Error {
    }                                                            \
  } while (0)
 #else
-#define C10_CUDA_CHECK(EXPR)                                              \
-  do {                                                                    \
-    cudaError_t __err = EXPR;                                             \
-    if (__err != cudaSuccess) {                                           \
-      auto error_unused C10_UNUSED = cudaGetLastError();                  \
-      auto _cuda_check_prefix = c10::cuda::get_cuda_check_prefix();       \
-      throw c10::CUDAError(                                               \
-          {__func__, __FILE__, static_cast<uint32_t>(__LINE__)},          \
-          TORCH_CHECK_MSG(                                                \
-              false, "", _cuda_check_prefix, cudaGetErrorString(__err))); \
-    }                                                                     \
+#define C10_CUDA_CHECK(EXPR)                                        \
+  do {                                                              \
+    cudaError_t __err = EXPR;                                       \
+    if (__err != cudaSuccess) {                                     \
+      auto error_unused C10_UNUSED = cudaGetLastError();            \
+      auto _cuda_check_suffix = c10::cuda::get_cuda_check_suffix(); \
+      throw c10::CUDAError(                                         \
+          {__func__, __FILE__, static_cast<uint32_t>(__LINE__)},    \
+          TORCH_CHECK_MSG(                                          \
+              false,                                                \
+              "",                                                   \
+              "CUDA error: ",                                       \
+              cudaGetErrorString(__err),                            \
+              _cuda_check_suffix));                                 \
+    }                                                               \
  } while (0)
 #endif

--- a/c10/cuda/CUDAFunctions.cpp
+++ b/c10/cuda/CUDAFunctions.cpp
@ -141,17 +141,16 @@ void device_synchronize() {
  C10_CUDA_CHECK(cudaDeviceSynchronize());
 }

-const char* get_cuda_check_prefix() noexcept {
+const char* get_cuda_check_suffix() noexcept {
  static char* device_blocking_flag = getenv("CUDA_LAUNCH_BLOCKING");
  static bool blocking_enabled =
      (device_blocking_flag && atoi(device_blocking_flag));
  if (blocking_enabled) {
-    return "CUDA error: ";
+    return "";
  } else {
-    return "CUDA kernel errors might be "
-           "asynchronously reported at some other API call,so the "
-           "stacktrace below might be incorrect. For debugging "
-           "consider passing CUDA_LAUNCH_BLOCKING=1. CUDA error: ";
+    return "\nCUDA kernel errors might be asynchronously reported at some"
+           " other API call,so the stacktrace below might be incorrect."
+           "\nFor debugging consider passing CUDA_LAUNCH_BLOCKING=1.";
  }
 }

--- a/c10/cuda/CUDAFunctions.h
+++ b/c10/cuda/CUDAFunctions.h
@ -30,7 +30,7 @@ C10_CUDA_API void set_device(DeviceIndex device);

 C10_CUDA_API void device_synchronize();

-C10_CUDA_API const char* get_cuda_check_prefix() noexcept;
+C10_CUDA_API const char* get_cuda_check_suffix() noexcept;

 } // namespace cuda
 } // namespace c10
--- a/caffe2/CMakeLists.txt
+++ b/caffe2/CMakeLists.txt
@ -738,6 +738,7 @@ if(NOT INTERN_BUILD_MOBILE OR NOT BUILD_CAFFE2_MOBILE)
      ${TORCH_SRC_DIR}/csrc/api/src/optim/schedulers/step_lr.cpp
      ${TORCH_SRC_DIR}/csrc/api/src/serialize/input-archive.cpp
      ${TORCH_SRC_DIR}/csrc/api/src/serialize/output-archive.cpp
+      ${TORCH_SRC_DIR}/csrc/utils/crash_handler.cpp
    )
  endif()

@ -1020,9 +1021,10 @@ endif()
 if(LINUX)
  find_library(BREAKPAD_LIB breakpad_client)
  find_path(BREAKPAD_INCLUDE_DIR breakpad)
-  if(BREAKPAD_LIB_FOUND AND BREAKPAD_INCLUDE_DIR_FOUND)
-    target_link_libraries(torch_cpu PRIVATE ${BREAKPAD_LIB})
-    add_compile_definitions(ADD_BREAKPAD_SIGNAL_HANDLER)
+  if(BREAKPAD_LIB AND BREAKPAD_INCLUDE_DIR)
+    message(STATUS "found breakpad library")
+    target_link_libraries(torch_cpu PUBLIC ${BREAKPAD_LIB})
+    target_compile_definitions(torch_cpu PRIVATE ADD_BREAKPAD_SIGNAL_HANDLER)
    target_include_directories(torch_cpu PUBLIC ${BREAKPAD_INCLUDE_DIR}/breakpad)
  else()
    message(STATUS "breakpad library not found")
@ -1466,6 +1468,10 @@ if(BUILD_SPLIT_CUDA)
  target_link_libraries(
      torch_cuda_cpp PRIVATE ${Caffe2_CUDA_DEPENDENCY_LIBS})
  target_link_libraries(torch_cuda_cu PRIVATE torch_cuda_cpp)
+  if(USE_CUDNN)
+    target_link_libraries(
+        torch_cuda_cpp PRIVATE  caffe2::cudnn-private)
+  endif()

  # These public dependencies must go after the previous dependencies, as the
  # order of the libraries in the linker call matters here when statically
@ -1482,6 +1488,10 @@ elseif(USE_CUDA)
      torch_cuda PRIVATE ${Caffe2_GPU_INCLUDE})
  target_link_libraries(
      torch_cuda PRIVATE ${Caffe2_CUDA_DEPENDENCY_LIBS})
+  if(USE_CUDNN)
+    target_link_libraries(
+        torch_cuda PRIVATE  caffe2::cudnn-private)
+  endif()

  # These public dependencies must go after the previous dependencies, as the
  # order of the libraries in the linker call matters here when statically
--- a/cmake/Dependencies.cmake
+++ b/cmake/Dependencies.cmake
@ -1165,7 +1165,7 @@ if(USE_CUDA)
      caffe2_update_option(USE_NVRTC OFF)
    endif()
    if(CAFFE2_USE_CUDNN)
-      list(APPEND Caffe2_PUBLIC_CUDA_DEPENDENCY_LIBS caffe2::cudnn)
+      list(APPEND Caffe2_PUBLIC_CUDA_DEPENDENCY_LIBS caffe2::cudnn-public)
    else()
      caffe2_update_option(USE_CUDNN OFF)
    endif()
--- a/cmake/Modules/FindBLAS.cmake
+++ b/cmake/Modules/FindBLAS.cmake
@ -67,7 +67,7 @@ MACRO(Check_Fortran_Libraries LIBRARIES _prefix _name _flags _list)
      else ( APPLE )
        find_library(${_prefix}_${_library}_LIBRARY
          NAMES ${_library}
-          PATHS /usr/local/lib /usr/lib /usr/local/lib64 /usr/lib64 /opt/OpenBLAS/lib /usr/lib/aarch64-linux-gnu
+          PATHS /usr/local/lib /usr/lib /usr/local/lib64 /usr/lib64 /opt/OpenBLAS/lib /usr/lib/aarch64-linux-gnu ${CMAKE_C_IMPLICIT_LINK_DIRECTORIES}
          ENV LD_LIBRARY_PATH )
      endif( APPLE )
      mark_as_advanced(${_prefix}_${_library}_LIBRARY)
@ -178,6 +178,19 @@ if((NOT BLAS_LIBRARIES)
  endif(BLAS_LIBRARIES)
 endif()

+if((NOT BLAS_LIBRARIES)
+    AND ((NOT WITH_BLAS) OR (WITH_BLAS STREQUAL "open")))
+  check_fortran_libraries(
+  BLAS_LIBRARIES
+  BLAS
+  sgemm
+  ""
+  "openblas;pthread;m;gomp")
+  if(BLAS_LIBRARIES)
+    set(BLAS_INFO "open")
+  endif(BLAS_LIBRARIES)
+endif()
+
 if((NOT BLAS_LIBRARIES) AND (WIN32)
    AND ((NOT WITH_BLAS) OR (WITH_BLAS STREQUAL "open")))
  check_fortran_libraries(
--- a/cmake/Modules/FindLAPACK.cmake
+++ b/cmake/Modules/FindLAPACK.cmake
@ -128,13 +128,7 @@ if(BLAS_FOUND)
      if(NOT LAPACK_CGESDD_WORKS)
        find_library(GFORTRAN_LIBRARY
          NAMES libgfortran.a gfortran
-          PATHS /usr/lib/gcc/aarch64-linux-gnu/9/
-                /usr/lib/gcc/x86_64-redhat-linux/9/
-                /usr/lib/gcc/aarch64-linux-gnu/8/
-                /usr/lib/gcc/x86_64-redhat-linux/8/
-                /usr/lib/gcc/aarch64-linux-gnu/7/
-                /usr/lib/gcc/x86_64-redhat-linux/7/
-                )
+          PATHS ${CMAKE_C_IMPLICIT_LINK_DIRECTORIES})
       list(APPEND CMAKE_REQUIRED_LIBRARIES "${GFORTRAN_LIBRARY}")
       unset(LAPACK_CGESDD_WORKS CACHE)
       check_function_exists("cgesdd_" LAPACK_CGESDD_WORKS)
--- a/cmake/Summary.cmake
+++ b/cmake/Summary.cmake
@ -90,8 +90,12 @@ function(caffe2_print_configuration_summary)
    get_target_property(__tmp caffe2::curand IMPORTED_LOCATION)
    message(STATUS "    curand library      : ${__tmp}")
    if(${USE_CUDNN})
-      get_target_property(__tmp caffe2::cudnn IMPORTED_LOCATION)
+      get_target_property(__tmp caffe2::cudnn-public INTERFACE_LINK_LIBRARIES)
      message(STATUS "    cuDNN library       : ${__tmp}")
+      if(${CUDNN_STATIC})
+        get_target_property(__tmp caffe2::cudnn-private INTERFACE_LINK_LIBRARIES)
+        message(STATUS "    cuDNN static library: ${__tmp}")
+      endif()
    endif()
    get_target_property(__tmp caffe2::nvrtc IMPORTED_LOCATION)
    message(STATUS "    nvrtc               : ${__tmp}")
--- a/cmake/public/cuda.cmake
+++ b/cmake/public/cuda.cmake
@ -272,20 +272,66 @@ else()
      ${LIBNVTOOLSEXT})
 endif()

-# cudnn
-# static linking is handled by USE_STATIC_CUDNN environment variable
-if(CAFFE2_USE_CUDNN)
-  add_library(caffe2::cudnn UNKNOWN IMPORTED)
-  set_property(
-      TARGET caffe2::cudnn PROPERTY IMPORTED_LOCATION
-      ${CUDNN_LIBRARY_PATH})
-  set_property(
-      TARGET caffe2::cudnn PROPERTY INTERFACE_INCLUDE_DIRECTORIES
-      ${CUDNN_INCLUDE_PATH})
-  if(CUDNN_STATIC AND NOT WIN32)
+# cublas. CUDA_CUBLAS_LIBRARIES is actually a list, so we will make an
+# interface library similar to cudart.
+add_library(caffe2::cublas INTERFACE IMPORTED)
+if(CAFFE2_STATIC_LINK_CUDA AND NOT WIN32)
    set_property(
-        TARGET caffe2::cudnn PROPERTY INTERFACE_LINK_LIBRARIES
-        "${CUDA_TOOLKIT_ROOT_DIR}/lib64/libculibos.a" dl)
+        TARGET caffe2::cublas PROPERTY INTERFACE_LINK_LIBRARIES
+        "${CUDA_TOOLKIT_ROOT_DIR}/lib64/libcublas_static.a")
+    if(CUDA_VERSION VERSION_GREATER_EQUAL 10.1)
+      set_property(
+        TARGET caffe2::cublas APPEND PROPERTY INTERFACE_LINK_LIBRARIES
+        "${CUDA_TOOLKIT_ROOT_DIR}/lib64/libcublasLt_static.a")
+      # Add explicit dependency to cudart_static to fix
+      # libcublasLt_static.a.o): undefined reference to symbol 'cudaStreamWaitEvent'
+      # error adding symbols: DSO missing from command line
+      set_property(
+        TARGET caffe2::cublas APPEND PROPERTY INTERFACE_LINK_LIBRARIES
+        "${CUDA_cudart_static_LIBRARY}" rt dl)
+    endif()
+else()
+    set_property(
+        TARGET caffe2::cublas PROPERTY INTERFACE_LINK_LIBRARIES
+        ${CUDA_CUBLAS_LIBRARIES})
+endif()
+set_property(
+    TARGET caffe2::cublas PROPERTY INTERFACE_INCLUDE_DIRECTORIES
+    ${CUDA_INCLUDE_DIRS})
+
+# cudnn public and private interfaces
+# static linking is handled by USE_STATIC_CUDNN environment variable
+# If library is linked dynamically, than private interface is no-op
+# If library is linked statically:
+#  - public interface would only reference headers
+#  - private interface will contain the actual link instructions
+if(CAFFE2_USE_CUDNN)
+  add_library(caffe2::cudnn-public INTERFACE IMPORTED)
+  set_property(
+    TARGET caffe2::cudnn-public PROPERTY INTERFACE_INCLUDE_DIRECTORIES
+    ${CUDNN_INCLUDE_PATH})
+  add_library(caffe2::cudnn-private INTERFACE IMPORTED)
+  set_property(
+    TARGET caffe2::cudnn-private PROPERTY INTERFACE_INCLUDE_DIRECTORIES
+    ${CUDNN_INCLUDE_PATH})
+  if(CUDNN_STATIC AND NOT WIN32)
+    if(USE_WHOLE_CUDNN)
+      set_property(
+        TARGET caffe2::cudnn-private PROPERTY INTERFACE_LINK_LIBRARIES
+        "-Wl,--whole-archive,\"${CUDNN_LIBRARY_PATH}\" -Wl,--no-whole-archive")
+    else()
+      set_property(
+        TARGET caffe2::cudnn-private PROPERTY INTERFACE_LINK_LIBRARIES
+        ${CUDNN_LIBRARY_PATH})
+    endif()
+    set_property(
+      TARGET caffe2::cudnn-private APPEND PROPERTY INTERFACE_LINK_LIBRARIES
+      "${CUDA_TOOLKIT_ROOT_DIR}/lib64/libculibos.a" dl)
+    # Add explicit dependency on cublas to cudnn
+    get_target_property(__tmp caffe2::cublas INTERFACE_LINK_LIBRARIES)
+    set_property(
+      TARGET caffe2::cudnn-private APPEND PROPERTY INTERFACE_LINK_LIBRARIES
+      "${__tmp}")
    # Lines below use target_link_libraries because we support cmake 3.5+.
    # For cmake 3.13+, target_link_options to set INTERFACE_LINK_OPTIONS would be better.
    # https://cmake.org/cmake/help/v3.5/command/target_link_libraries.html warns
@ -295,8 +341,12 @@ if(CAFFE2_USE_CUDNN)
    #  link items that will not propagate to dependents."
    # Propagating to a dependent (torch_cuda) is exactly what we want here, so we are
    # flouting the warning, but I can't think of a better (3.5+ compatible) way.
-    target_link_libraries(caffe2::cudnn INTERFACE
+    target_link_libraries(caffe2::cudnn-private INTERFACE
        "-Wl,--exclude-libs,libcudnn_static.a")
+  else()
+  set_property(
+    TARGET caffe2::cudnn-public PROPERTY INTERFACE_LINK_LIBRARIES
+    ${CUDNN_LIBRARY_PATH})
  endif()
 endif()

@ -346,33 +396,6 @@ if(CAFFE2_USE_TENSORRT)
      ${TENSORRT_INCLUDE_DIR})
 endif()

-# cublas. CUDA_CUBLAS_LIBRARIES is actually a list, so we will make an
-# interface library similar to cudart.
-add_library(caffe2::cublas INTERFACE IMPORTED)
-if(CAFFE2_STATIC_LINK_CUDA AND NOT WIN32)
-    set_property(
-        TARGET caffe2::cublas PROPERTY INTERFACE_LINK_LIBRARIES
-        "${CUDA_TOOLKIT_ROOT_DIR}/lib64/libcublas_static.a")
-    if(CUDA_VERSION VERSION_GREATER_EQUAL 10.1)
-      set_property(
-        TARGET caffe2::cublas APPEND PROPERTY INTERFACE_LINK_LIBRARIES
-        "${CUDA_TOOLKIT_ROOT_DIR}/lib64/libcublasLt_static.a")
-      # Add explicit dependency to cudart_static to fix
-      # libcublasLt_static.a.o): undefined reference to symbol 'cudaStreamWaitEvent'
-      # error adding symbols: DSO missing from command line
-      set_property(
-        TARGET caffe2::cublas APPEND PROPERTY INTERFACE_LINK_LIBRARIES
-        "${CUDA_cudart_static_LIBRARY}" rt dl)
-    endif()
-else()
-    set_property(
-        TARGET caffe2::cublas PROPERTY INTERFACE_LINK_LIBRARIES
-        ${CUDA_CUBLAS_LIBRARIES})
-endif()
-set_property(
-    TARGET caffe2::cublas PROPERTY INTERFACE_INCLUDE_DIRECTORIES
-    ${CUDA_INCLUDE_DIRS})
-
 # nvrtc
 add_library(caffe2::nvrtc UNKNOWN IMPORTED)
 set_property(
--- a/docs/source/package.rst
+++ b/docs/source/package.rst
@ -7,6 +7,778 @@ torch.package

    This module is experimental and has not yet been publicly released.

+``torch.package`` adds support for creating hermetic packages containing arbitrary
+PyTorch code. These packages can be saved, shared, used to load and execute models
+at a later date or on a different machine, and can even be deployed to production using
+``torch::deploy``.
+
+This document contains tutorials, how-to guides, explanations, and an API reference that
+will help you learn more about ``torch.package`` and how to use it.
+
+
+.. contents:: :local:
+    :depth: 2
+
+
+Tutorials
+---------
+Packaging your first model
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+A tutorial that guides you through packaging and unpackaging a simple model is available
+`on Colab <https://colab.research.google.com/drive/1dWATcDir22kgRQqBg2X_Lsh5UPfC7UTK?usp=sharing>`_.
+After completing this exercise, you will be familiar with the basic API for creating and using
+Torch packages.
+
+How do I...
+-----------
+See what is inside a package?
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Treat the package like a ZIP archive
+""""""""""""""""""""""""""""""""""""
+The container format for a ``torch.package`` is ZIP, so any tools that work with standard ZIP files should
+work for exploring the contents. Some common ways to interact with ZIP files:
+
+* ``unzip my_package.pt`` will unzip the ``torch.package`` archive to disk, where you can freely inspect its contents.
+
+
+::
+
+    $ unzip my_package.pt && tree my_package
+    my_package
+    ├── .data
+    │   ├── 94304870911616.storage
+    │   ├── 94304900784016.storage
+    │   ├── extern_modules
+    │   └── version
+    ├── models
+    │   └── model_1.pkl
+    └── torchvision
+        └── models
+            ├── resnet.py
+            └── utils.py
+    ~ cd my_package && cat torchvision/models/resnet.py
+    ...
+
+
+* The Python ``zipfile`` module provides a standard way to read and write ZIP archive contents.
+
+
+::
+
+    from zipfile import ZipFile
+    with ZipFile("my_package.pt") as myzip:
+        file_bytes = myzip.read("torchvision/models/resnet.py")
+        # edit file_bytes in some way
+        myzip.writestr("torchvision/models/resnet.py", new_file_bytes)
+
+
+* vim has the ability to natively read ZIP archives. You can even edit files and :``write`` them back into the archive!
+
+
+::
+
+    # add this to your .vimrc to treat `*.pt` files as zip files
+    au BufReadCmd *.pt call zip#Browse(expand("<amatch>"))
+
+    ~ vi my_package.pt
+
+
+Use the ``file_structure()`` API
+""""""""""""""""""""""""""""""""
+:class:`PackageImporter` and :class:`PackageExporter` provide a ``file_structure()`` method, which will return a printable
+and queryable ``Folder`` object. The ``Folder`` object is a simple directory structure that you can use to explore the
+current contents of a ``torch.package``.
+
+The ``Folder`` object itself is directly printable and will print out a file tree representation. To filter what is returned,
+use the glob-style ``include`` and ``exclude`` filtering arguments.
+
+
+::
+
+    with PackageExporter('my_package.pt', verbose=False) as pe:
+        pe.save_pickle('models', 'model_1.pkl', mod)
+        # can limit printed items with include/exclude args
+        print(pe.file_structure(include=["**/utils.py", "**/*.pkl"], exclude="**/*.storages"))
+
+    importer = PackageImporter('my_package.pt')
+    print(importer.file_structure()) # will print out all files
+
+
+Output:
+
+
+::
+
+    # filtered with glob pattern:
+    #    include=["**/utils.py", "**/*.pkl"], exclude="**/*.storages"
+    ─── my_package.pt
+        ├── models
+        │   └── model_1.pkl
+        └── torchvision
+            └── models
+                └── utils.py
+
+    # all files
+    ─── my_package.pt
+        ├── .data
+        │   ├── 94304870911616.storage
+        │   ├── 94304900784016.storage
+        │   ├── extern_modules
+        │   └── version
+        ├── models
+        │   └── model_1.pkl
+        └── torchvision
+            └── models
+                ├── resnet.py
+                └── utils.py
+
+
+You can also query ``Folder`` objects with the ``has_file()`` method.
+
+
+::
+
+    exporter_file_structure = exporter.file_structure()
+    found: bool = exporter_file_structure.has_file("package_a/subpackage.py")
+
+
+Include arbitrary resources with my package and access them later?
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+:class:`PackageExporter` exposes three methods, ``save_pickle``, ``save_text`` and ``save_binary`` that allow you to save
+Python objects, text, and binary data to a package.
+
+
+::
+
+    with torch.PackageExporter("package.pt") as exporter:
+        # Pickles the object and saves to `my_resources/tens.pkl` in the archive.
+        exporter.save_pickle("my_resources", "tensor.pkl", torch.randn(4))
+        exporter.save_text("config_stuff", "words.txt", "a sample string")
+        exporter.save_binary("raw_data", "binary", my_bytes)
+
+
+:class:`PackageImporter` exposes complementary methods named ``load_pickle``, ``load_text`` and ``load_binary`` that allow you to load
+Python objects, text and binary data from a package.
+
+
+::
+
+    importer = torch.PackageImporter("package.pt")
+    my_tensor = importer.load_pickle("my_resources", "tensor.pkl")
+    text = importer.load_text("config_stuff", "words.txt")
+    binary = importer.load_binary("raw_data", "binary")
+
+
+Customize how a class is packaged?
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+``torch.package`` allows for the customization of how classes are packaged. This behavior is accessed through defining the method
+``__reduce_package__`` on a class and by defining a corresponding de-packaging function. This is similar to defining ``__reduce__`` for
+Python’s normal pickling process.
+
+Steps:
+
+1. Define the method ``__reduce_package__(self, exporter: PackageExporter)`` on the target class. This method should do the work to save the class instance inside of the package, and should return a tuple of the corresponding de-packaging function with the arguments needed to invoke the de-packaging function. This method is called by the ``PackageExporter`` when it encounters an instance of the target class.
+2. Define a de-packaging function for the class. This de-packaging function should do the work to reconstruct and return an instance of the class. The function signature’s first parameter should be a ``PackageImporter`` instance, and the rest of the parameters are user defined.
+
+
+::
+
+    # foo.py [Example of customizing how class Foo is packaged]
+    from torch.package import PackageExporter, PackageImporter
+    import time
+
+
+    class Foo:
+        def __init__(self, my_string: str):
+            super().__init__()
+            self.my_string = my_string
+            self.time_imported = 0
+            self.time_exported = 0
+
+        def __reduce_package__(self, exporter: PackageExporter):
+            """
+            Called by ``torch.package.PackageExporter``'s Pickler's ``persistent_id`` when
+            saving an instance of this object. This method should do the work to save this
+            object inside of the ``torch.package`` archive.
+
+            Returns function w/ arguments to load the object from a
+            ``torch.package.PackageImporter``'s Pickler's ``persistent_load`` function.
+            """
+
+            # use this pattern to ensure no naming conflicts with normal dependencies,
+            # anything saved under this module name shouldn't conflict with other
+            # items in the package
+            generated_module_name = f"foo-generated._{exporter.get_unique_id()}"
+            exporter.save_text(
+                generated_module_name,
+                "foo.txt",
+                self.my_string + ", with exporter modification!",
+            )
+            time_exported = time.clock_gettime(1)
+
+            # returns de-packaging function w/ arguments to invoke with
+            return (unpackage_foo, (generated_module_name, time_exported,))
+
+
+    def unpackage_foo(
+        importer: PackageImporter, generated_module_name: str, time_exported: float
+    ) -> Foo:
+        """
+        Called by ``torch.package.PackageImporter``'s Pickler's ``persistent_load`` function
+        when depickling a Foo object.
+        Performs work of loading and returning a Foo instance from a ``torch.package`` archive.
+        """
+        time_imported = time.clock_gettime(1)
+        foo = Foo(importer.load_text(generated_module_name, "foo.txt"))
+        foo.time_imported = time_imported
+        foo.time_exported = time_exported
+        return foo
+
+
+::
+
+    # example of saving instances of class Foo
+
+    import torch
+    from torch.package import PackageImporter, PackageExporter
+    import foo
+
+    foo_1 = foo.Foo("foo_1 initial string")
+    foo_2 = foo.Foo("foo_2 initial string")
+    with PackageExporter('foo_package.pt', verbose=False) as pe:
+        # save as normal, no extra work necessary
+        pe.save_pickle('foo_collection', 'foo1.pkl', foo_1)
+        pe.save_pickle('foo_collection', 'foo2.pkl', foo_2)
+        print(pe.file_structure())
+
+    pi = PackageImporter('foo_package.pt')
+    imported_foo = pi.load_pickle('foo_collection', 'foo1.pkl')
+    print(f"foo_1 string: '{imported_foo.my_string}'")
+    print(f"foo_1 export time: {imported_foo.time_exported}")
+    print(f"foo_1 import time: {imported_foo.time_imported}")
+
+
+::
+
+    # output of running above script
+    ─── foo_package
+        ├── foo-generated
+        │   ├── _0
+        │   │   └── foo.txt
+        │   └── _1
+        │       └── foo.txt
+        ├── foo_collection
+        │   ├── foo1.pkl
+        │   └── foo2.pkl
+        └── foo.py
+
+    foo_1 string: 'foo_1 initial string, with reduction modification!'
+    foo_1 export time: 9857706.650140837
+    foo_1 import time: 9857706.652698385
+
+
+Test in my source code whether or not it is executing inside a package?
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+A :class:`PackageImporter` will add the attribute ``__torch_package__`` to every module that it initializes. Your code can check for the
+presence of this attribute to determine whether it is executing in a packaged context or not.
+
+
+::
+
+    # In foo/bar.py:
+
+    if "__torch_package__" in dir():  # true if the code is being loaded from a package
+        def is_in_package():
+            return True
+
+        UserException = Exception
+    else:
+        def is_in_package():
+            return False
+
+        UserException = UnpackageableException
+
+
+Now, the code will behave differently depending on whether it’s imported normally through your Python environment or imported from a
+``torch.package``.
+
+
+::
+
+    from foo.bar import is_in_package
+
+    print(is_in_package())  # False
+
+    loaded_module = PackageImporter(my_pacakge).import_module("foo.bar")
+    loaded_module.is_in_package()  # True
+
+
+**Warning**: in general, it’s bad practice to have code that behaves differently depending on whether it’s packaged or not. This can lead to
+hard-to-debug issues that are sensitive to how you imported your code. If your package is intended to be heavily used, consider restructuring
+your code so that it behaves the same way no matter how it was loaded.
+
+
+Patch code into a package?
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+:class:`PackageExporter` offers a ``save_source_string()`` method that allows one to save arbitrary Python source code to a module of your choosing.
+
+
+::
+
+    with PackageExporter(f) as exporter:
+        # Save the my_module.foo available in your current Python environment.
+        exporter.save_module("my_module.foo")
+
+        # This saves the provided string to my_module/foo.py in the package archive.
+        # It will override the my_module.foo that was previously saved.
+        exporter.save_source_string("my_module.foo", textwrap.dedent(
+            """\
+            def my_function():
+                print('hello world')
+            """
+        ))
+
+        # If you want to treat my_module.bar as a package
+        # (e.g. save to `my_module/bar/__init__.py` instead of `my_module/bar.py)
+        # pass is_package=True,
+        exporter.save_source_string("my_module.bar",
+                                    "def foo(): print('hello')\n",
+                                    is_package=True)
+
+    importer = PackageImporter(f)
+    importer.import_module("my_module.foo").my_function()  # prints 'hello world'
+
+
+Access package contents from packaged code?
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+:class:`PackageImporter` implements the
+`importlib.resources <https://docs.python.org/3/library/importlib.html#module-importlib.resources>`_
+API for accessing resources from inside a package.
+
+
+::
+
+    with PackageExporter(f) as exporter:
+        # saves text to one/a.txt in the archive
+        exporter.save_text("my_resource", "a.txt", "hello world!")
+        # saves the tensor to my_pickle/obj.pkl
+        exporter.save_pickle("my_pickle", "obj.pkl", torch.ones(2, 2))
+
+        # see below for module contents
+        exporter.save_module("foo")
+        exporter.save_module("bar")
+
+
+The ``importlib.resources`` API allows access to resources from within packaged code.
+
+
+::
+
+    # foo.py:
+    import importlib.resources
+    import my_resource
+
+    # returns "hello world!"
+    def get_my_resource():
+        return importlib.resources.read_text(my_resource, "a.txt")
+
+
+Using ``importlib.resources`` is the recommended way to access package contents from within packaged code, since it complies
+with the Python standard. However, it is also possible to access the parent :class:`PackageImporter` instance itself from within
+packaged code.
+
+
+::
+
+    # bar.py:
+    import torch_package_importer # this is the PackageImporter that imported this module.
+
+    # Prints "hello world!", equivalient to importlib.resources.read_text
+    def get_my_resource():
+        return torch_package_importer.load_text("my_resource", "a.txt")
+
+    # You also do things that the importlib.resources API does not support, like loading
+    # a pickled object from the package.
+    def get_my_pickle():
+        return torch_package_importer.load_pickle("my_pickle", "obj.pkl")
+
+
+Distinguish between packaged code and non-packaged code?
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+To tell if an object’s code is from a ``torch.package``, use the ``torch.package.is_from_package()`` function.
+Note: if an object is from a package but its definition is from a module marked ``extern`` or from ``stdlib``,
+this check will return ``False``.
+
+
+::
+
+    importer = PackageImporter(f)
+    mod = importer.import_module('foo')
+    obj = importer.load_pickle('model', 'model.pkl')
+    txt = importer.load_text('text', 'my_test.txt')
+
+    assert is_from_package(mod)
+    assert is_from_package(obj)
+    assert not is_from_package(txt) # str is from stdlib, so this will return False
+
+
+Re-export an imported object?
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+To re-export an object that was previously imported by a :class:`PackageImporter`, you must make the new :class:`PackageExporter`
+aware of the original :class:`PackageImporter` so that it can find source code for your object’s dependencies.
+
+
+::
+
+    importer = PackageImporter(f)
+    obj = importer.load_pickle("model", "model.pkl")
+
+    # re-export obj in a new package
+    with PackageExporter(f2, importer=(importer, sys_importer)) as exporter:
+        exporter.save_pickle("model", "model.pkl", obj)
+
+
+Package a TorchScript module?
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+To package a TorchScript model, use the same ``save_pickle`` and ``load_pickle`` APIs as you would with any other object.
+Saving TorchScript objects that are attributes or submodules is supported as well with no extra work.
+
+
+::
+
+    # save TorchScript just like any other object
+    with PackageExporter(file_name, verbose=True) as e:
+        e.save_pickle("res", "script_model.pkl", scripted_model)
+        e.save_pickle("res", "mixed_model.pkl", python_model_with_scripted_submodule)
+    # load as normal
+    importer = PackageImporter(file_name)
+    loaded_script = importer.load_pickle("res", "script_model.pkl")
+    loaded_mixed = importer.load_pickle("res", "mixed_model.pkl"
+
+
+Explanation
+-----------
+``torch.package`` Format Overview
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+A ``torch.package`` file is a ZIP archive which conventionally uses the ``.pt`` extension. Inside the ZIP archive, there are two kinds of files:
+
+* Framework files, which are placed in the ``.data/``.
+* User files, which is everything else.
+
+As an example, this is what a fully packaged ResNet model from ``torchvision`` looks like:
+
+
+::
+
+    resnet
+    ├── .data  # All framework-specific data is stored here.
+    │   │      # It's named to avoid conflicts with user-serialized code.
+    │   ├── 94286146172688.storage  # tensor data
+    │   ├── 94286146172784.storage
+    │   ├── extern_modules  # text file with names of extern modules (e.g. 'torch')
+    │   ├── version         # version metadata
+    │   ├── ...
+    ├── model  # the pickled model
+    │   └── model.pkl
+    └── torchvision  # all code dependencies are captured as source files
+        └── models
+            ├── resnet.py
+            └── utils.py
+
+
+Framework files
+"""""""""""""""
+The ``.data/`` directory is owned by torch.package, and its contents are considered to be a private implementation detail.
+The ``torch.package`` format makes no guarantees about the contents of ``.data/``, but any changes made will be backward compatible
+(that is, newer version of PyTorch will always be able to load older ``torch.packages``).
+
+Currently, the ``.data/`` directory contains the following items:
+
+* ``version``: a version number for the serialized format, so that the ``torch.package`` import infrastructures knows how to load this package.
+* ``extern_modules``: a list of modules that are considered ``extern:class:`PackageImporter`. ``extern`` modules will be imported using the loading environment’s system importer.
+* ``*.storage``: serialized tensor data.
+
+
+::
+
+    .data
+    ├── 94286146172688.storage
+    ├── 94286146172784.storage
+    ├── extern_modules
+    ├── version
+    ├── ...
+
+
+User files
+""""""""""
+All other files in the archive were put there by a user. The layout is identical to a Python
+`regular package <https://docs.python.org/3/reference/import.html#regular-packages>`_. For a deeper dive in how Python packaging works,
+please consult `this essay <https://www.python.org/doc/essays/packages/>`_ (it’s slightly out of date, so double-check implementation details
+with the `Python reference documentation <https://docs.python.org/3/library/importlib.html>`_).
+
+
+::
+
+    <package root>
+    ├── model  # the pickled model
+    │   └── model.pkl
+    ├── another_package
+    │   ├── __init__.py
+    │   ├── foo.txt         # a resource file , see importlib.resources
+    │   └── ...
+    └── torchvision
+        └── models
+            ├── resnet.py   # torchvision.models.resnet
+            └── utils.py    # torchvision.models.utils
+
+
+How ``torch.package`` finds your code's dependencies
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Analyzing an object's dependencies
+""""""""""""""""""""""""""""""""""
+When you issue a ``save_pickle(obj, ...)`` call, :class:`PackageExporter` will pickle the object normally. Then, it uses the
+``pickletools`` standard library module to parse the pickle bytecode.
+
+In a pickle, an object is saved along with a ``GLOBAL`` opcode that describes where to find the implementation of the object’s type, like:
+
+
+::
+
+    GLOBAL 'torchvision.models.resnet Resnet`
+
+
+The dependency resolver will gather up all ``GLOBAL`` ops and mark them as dependencies of your pickled object.
+For more information about pickling and the pickle format, please consult `the Python docs <https://docs.python.org/3/library/pickle.html>`_.
+
+Analyzing a module's dependencies
+"""""""""""""""""""""""""""""""""
+When a Python module is identified as a dependency, ``torch.package`` walks the module’s python AST representation and looks for import statements with
+full support for the standard forms: ``from x import y``, ``import z``, ``from w import v as u``, etc. When one of these import statements are
+encountered, ``torch.package`` registers the imported modules as dependencies that are then themselves parsed in the same AST walking way.
+
+**Note**: AST parsing has limited support for the ``__import__(...)`` syntax and does not support ``importlib.import_module`` calls. In general, you should
+not expect dynamic imports to be detected by ``torch.package``.
+
+
+Dependency Management
+^^^^^^^^^^^^^^^^^^^^^
+``torch.package`` automatically finds the Python modules that your code and objects depend on. This process is called dependency resolution.
+For each module that the dependency resolver finds, you must specify an *action* to take.
+
+The allowed actions are:
+
+* ``intern``: put this module into the package.
+* ``extern``: declare this module as an external dependency of the package.
+* ``mock``: stub out this module.
+* ``deny``: depending on this module will raise an error during package export.
+
+Finally, there is one more important action that is not technically part of ``torch.package``:
+
+* Refactoring: remove or change the dependencies in your code.
+
+Note that actions are only defined on entire Python modules. There is no way to package “just” a function or class from module and leave the rest out.
+This is by design. Python does not offer clean boundaries between objects defined in a module. The only defined unit of dependency organization is a
+module, so that’s what ``torch.package`` uses.
+
+Actions are applied to modules using patterns. Patterns can either be module names (``"foo.bar"``) or globs (like ``"foo.**"``). You associate a pattern
+with an action using methods on :class:`PackageImporter`, e.g.
+
+
+::
+
+    my_exporter.intern("torchvision.**")
+    my_exporter.extern("numpy")
+
+
+If a module matches a pattern, the corresponding action is applied to it. For a given module, patterns will be checked in the order that they were defined,
+and the first action will be taken.
+
+
+``intern``
+""""""""""
+If a module is ``intern``-ed, it will be placed into the package.
+
+This action is your model code, or any related code you want to package. For example, if you are trying to package a ResNet from ``torchvision``,
+you will need to ``intern`` the module torchvision.models.resnet.
+
+On package import, when your packaged code tries to import an ``intern``-ed module, PackageImporter will look inside your package for that module.
+If it can’t find that module, an error will be raised. This ensures that each :class:`PackageImporter` is isolated from the loading environment—even
+if you have ``my_interned_module`` available in both your package and the loading environment, :class:`PackageImporter` will only use the version in your
+package.
+
+**Note**: Only Python source modules can be ``intern``-ed. Other kinds of modules, like C extension modules and bytecode modules, will raise an error if
+you attempt to ``intern`` them. These kinds of modules need to be ``mock``-ed or ``extern``-ed.
+
+
+``extern``
+""""""""""
+If a module is ``extern``-ed, it will not be packaged. Instead, it will be added to a list of external dependencies for this package. You can find this
+list on ``package_exporter.extern_modules``.
+
+On package import, when time packaged code tries to import an ``extern``-ed module, :class:`PackageImporter` will use the default Python importer to find
+that module, as if you did ``importlib.import_module("my_externed_module")``. If it can’t find that module, an error will be raised.
+
+In this way, you can depend on third-party libraries like ``numpy`` and ``scipy`` from within your package without having to package them too.
+
+**Warning**: If any external library changes in a backwards-incompatible way, your package may fail to load. If you need long-term reproducibility
+for your package, try to limit your use of ``extern``.
+
+
+``mock``
+""""""""
+If a module is ``mock``-ed, it will not be packaged. Instead a stub module will be packaged in its place. The stub module will allow you to retrieve
+objects from it (so that ``from my_mocked_module import foo`` will not error), but any use of that object will raise a ``NotImplementedError``.
+
+``mock`` should be used for code that you “know” will not be needed in the loaded package, but you still want to available for use in non-packaged contents.
+For example, initialization/configuration code, or code only used for debugging/training.
+
+**Warning**: In general, ``mock`` should be used as a last resort. It introduces behavioral differences between packaged code and non-packaged code,
+which may lead to later confusion. Prefer instead to refactor your code to remove unwanted dependencies.
+
+
+Refactoring
+"""""""""""
+The best way to manage dependencies is to not have dependencies at all! Often, code can be refactored to remove unnecessary dependencies. Here are some
+guidelines for writing code with clean dependencies (which are also generally good practices!):
+
+**Include only what you use**. Do not leave unused imports in our code. The dependency resolver is not smart enough to tell that they are indeed unused,
+and will try to process them.
+
+**Qualify your imports**. For example, instead of writing import foo and later using ``foo.bar.baz``, prefer to write ``from foo.bar import baz``. This more
+precisely specifies your real dependency (``foo.bar``) and lets the dependency resolver know you don’t need all of ``foo``.
+
+**Split up large files with unrelated functionality into smaller ones**. If your ``utils`` module contains a hodge-podge of unrelated functionality, any module
+that depends on ``utils`` will need to pull in lots of unrelated dependencies, even if you only needed a small part of it. Prefer instead to define
+single-purpose modules that can be packaged independently of one another.
+
+
+Patterns
+""""""""
+Patterns allow you to specify groups of modules with a convenient syntax. The syntax and behavior of patterns follows the Bazel/Buck
+`glob() <https://docs.bazel.build/versions/master/be/functions.html#glob>`_.
+
+A module that we are trying to match against a pattern is called a candidate. A candidate is composed of a list of segments separated by a
+separator string, e.g. ``foo.bar.baz``.
+
+A pattern contains one or more segments. Segments can be:
+
+* A literal string (e.g. ``foo``), which matches exactly.
+* A string containing a wildcard (e.g. ``torch``, or ``foo*baz*``). The wildcard matches any string, including the empty string.
+* A double wildcard (``**``). This matches against zero or more complete segments.
+
+Examples:
+
+* ``torch.**``: matches ``torch`` and all its submodules, e.g. ``torch.nn`` and ``torch.nn.functional``.
+* ``torch.*``: matches ``torch.nn`` or ``torch.functional``, but not ``torch.nn.functional`` or ``torch``
+* ``torch*.**``: matches ``torch``, ``torchvision``, and all of their submodules
+
+When specifying actions, you can pass multiple patterns, e.g.
+
+
+::
+
+    exporter.intern(["torchvision.models.**", "torchvision.utils.**"])
+
+
+A module will match against this action if it matches any of the patterns.
+
+You can also specify patterns to exlcude, e.g.
+
+
+::
+
+    exporter.mock("**", exclude=["torchvision.**"])
+
+
+A module will not match against this action if it matches any of the exclude patterns. In this example, we are mocking all modules except
+``torchvision`` and its submodules.
+
+When a module could potentially match against multiple actions, the first action defined will be taken.
+
+
+``torch.package`` sharp edges
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Avoid global state in your modules
+""""""""""""""""""""""""""""""""""
+Python makes it really easy to bind objects and run code at module-level scope. This is generally fine—after all, functions and classes are bound to
+names this way. However, things become more complicated when you define an object at module scope with the intention of mutating it, introducing mutable
+global state.
+
+Mutable global state is quite useful—it can reduce boilerplate, allow for open registration into tables, etc. But unless employed very carefully, it can
+cause complications when used with ``torch.package``.
+
+Every :class:`PackageImporter` creates an independent environment for its contents. This is nice because it means we load multiple packages and ensure
+they are isolated from each other, but when modules are written in a way that assumes shared mutable global state, this behavior can create hard-to-debug
+errors.
+
+Types are not shared between packages and the loading environment
+"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+Any class that you import from a :class:`PackageImporter` will be a version of the class specific to that importer. For example:
+
+
+::
+
+    from foo import MyClass
+
+    my_class_instance = MyClass()
+
+    with PackageExporter(f) as exporter:
+        exporter.save_module("foo")
+
+    importer = PackageImporter(f)
+    imported_MyClass = importer.import_module("foo").MyClass
+
+    assert isinstance(my_class_instance, MyClass)  # works
+    assert isinstance(my_class_instance, imported_MyClass)  # ERROR!
+
+
+In this example, ``MyClass`` and ``import_MyClass`` are *not the same type*. In this specific example, ``MyClass`` and ``import_MyClass`` have exactly the
+same implementation, so you might thing it’s okay to consider them the same class. But consider the situation where ``import_MyClass`` is coming from an
+older package with an entirely different implementation of ``MyClass`` — in that case, it’s unsafe to consider them the same class.
+
+Under the hood, each importer has a prefix that allows it to uniquely identify classes:
+
+
+::
+
+    print(MyClass.__name__)  # prints "foo.MyClass"
+    print(imported_MyClass.__name__)  # prints <torch_package_0>.foo.MyClass
+
+
+That means you should not expect ``isinstance`` checks to work when one of the arguments if from a package and the other is not. If you need this
+functionality, consider the following options:
+
+* Doing duck typing (just using the class instead of explicitly checking that it is of a given type).
+* Make the typing relationship an explicit part of the class contract. For example, you can add an attribute tag ``self.handler = "handle_me_this_way"`` and have client code check for the value of ``handler`` instead of checking the type directly.
+
+
+How ``torch.package`` keeps packages isolated from each other
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Each :class:`PackageImporter` instance creates an independent, isolated environment for its modules and objects. Modules in a package can only import
+other packaged modules, or modules marked ``extern``. If you use multiple :class:`PackageImporter` instances to load a single package, you will get
+multiple independent environments that do not interact.
+
+This is achieved by extending Python’s import infrastructure with a custom importer. :class:`PackageImporter` provides the same core API as the
+``importlib`` importer; namely, it implements the ``import_module`` and ``__import__`` methods.
+
+When you invoke :meth:`PackageImporter.import_module`, :class:`PackageImporter` will construct and return a new module, much as the system importer does.
+However, :class:`PackageImporter` patches the returned module to use ``self`` (i.e. that :class:`PackageImporter` instance) to fulfill future import
+requests by looking in the package rather than searching the user’s Python environment.
+
+Mangling
+""""""""
+To avoid confusion (“is this ``foo.bar`` object the one from my package, or the one from my Python environment?”), :class:`PackageImporter` mangles the
+``__name__`` and ``__file__`` of all imported modules, by adding a *mangle prefix* to them.
+
+For ``__name__``, a name like ``torchvision.models.resnet18`` becomes ``<torch_package_0>.torchvision.models.resnet18``.
+
+For ``__file__``, a name like ``torchvision/models/resnet18.py`` becomes ``<torch_package_0>.torchvision/modules/resnet18.py``.
+
+Name mangling helps avoid inadvertent punning of module names between different packages, and helps you debug by making stack traces and print
+statements more clearly show whether they are referring to packaged code or not. For developer-facing details about mangling, consult
+``mangling.md`` in ``torch/package/``.
+
+
 API Reference
 -------------
 .. autoclass:: torch.package.PackagingError
--- a/test/package/test_dependency_api.py
+++ b/test/package/test_dependency_api.py
@ -6,7 +6,7 @@ from unittest import skipIf

 from torch.package import EmptyMatchError, Importer, PackageExporter, PackageImporter
 from torch.package.package_exporter import PackagingError
-from torch.testing._internal.common_utils import run_tests
+from torch.testing._internal.common_utils import IS_WINDOWS, run_tests

 try:
    from .common import PackageTestCase
@ -224,19 +224,27 @@ class TestDependencyAPI(PackageTestCase):

        buffer = BytesIO()

-        try:
+        with self.assertRaises(PackagingError) as e:
            with PackageExporter(buffer, verbose=False) as he:
                he.save_pickle("obj", "obj.pkl", obj2)
-        except PackagingError as e:
-            self.assertEqual(e.unhandled, set(["package_a", "package_a.subpackage"]))
-        else:
-            self.fail("PackagingError should have been raised")
+
+        self.assertEqual(
+            str(e.exception),
+            dedent(
+                """
+                * Module did not match against any action pattern. Extern, mock, or intern it.
+                    package_a
+                    package_a.subpackage
+                """
+            ),
+        )

        # Interning all dependencies should work
        with PackageExporter(buffer, verbose=False) as he:
            he.intern(["package_a", "package_a.subpackage"])
            he.save_pickle("obj", "obj.pkl", obj2)

+    @skipIf(IS_WINDOWS, "extension modules have a different file extension on windows")
    def test_broken_dependency(self):
        """A unpackageable dependency should raise a PackagingError."""

@ -262,16 +270,42 @@ class TestDependencyAPI(PackageTestCase):

        buffer = BytesIO()

-        try:
+        with self.assertRaises(PackagingError) as e:
            with PackageExporter(
                buffer, verbose=False, importer=BrokenImporter()
            ) as exporter:
                exporter.intern(["foo", "bar"])
                exporter.save_source_string("my_module", "import foo; import bar")
-        except PackagingError as e:
-            self.assertEqual(set(e.broken.keys()), set(["foo", "bar"]))
-        else:
-            self.fail("PackagingError should have been raised")
+
+        self.assertEqual(
+            str(e.exception),
+            dedent(
+                """
+                * Module is a C extension module. torch.package supports Python modules only.
+                    foo
+                    bar
+                """
+            ),
+        )
+
+    def test_invalid_import(self):
+        """An incorrectly-formed import should raise a PackagingError."""
+        buffer = BytesIO()
+        with self.assertRaises(PackagingError) as e:
+            with PackageExporter(buffer, verbose=False) as exporter:
+                # This import will fail to load.
+                exporter.save_source_string("foo", "from ........ import lol")
+
+        self.assertEqual(
+            str(e.exception),
+            dedent(
+                """
+                * Dependency resolution failed.
+                    foo
+                      Context: attempted relative import beyond top-level package
+                """
+            ),
+        )


 if __name__ == "__main__":
--- a/test/test_cpp_extensions_jit.py
+++ b/test/test_cpp_extensions_jit.py
@ -886,7 +886,7 @@ class TestCppExtensionJIT(common.TestCase):
            #include <torch/torch.h>

            int fail() {{
-                torch::crash_handler::_enable_minidump_collection("{destination}");
+                torch::crash_handler::enable_minidumps("{destination}");

                volatile int* bad = nullptr;
                return *bad;
--- a/test/test_nn.py
+++ b/test/test_nn.py
@ -5467,6 +5467,7 @@ class TestNN(NNTestCase):

    # For https://github.com/pytorch/pytorch/pull/1273
    # Almost identical to the above `test_Conv2d_naive_groups`
+    @skipIfRocm
    def test_Conv2d_groups_nobias(self):
        dev_dtypes = [("cpu", torch.float)]
        if TEST_CUDA:
@ -5504,6 +5505,7 @@ class TestNN(NNTestCase):
    # Covering special case when group > 1, input-channel / group < 16 and output-channel is multiple of 16
    # See also https://github.com/pytorch/pytorch/pull/18463#issuecomment-476563686
    # and https://github.com/pytorch/pytorch/pull/18463#issuecomment-477001024
+    @skipIfRocm
    def test_Conv2d_groups_nobias_v2(self):
        torch.manual_seed(123)
        dev_dtypes = [("cpu", torch.float)]
--- a/test/test_tensor_creation_ops.py
+++ b/test/test_tensor_creation_ops.py
@ -3327,8 +3327,28 @@ class TestRandomTensorCreation(TestCase):
    def test_randperm_device_compatibility(self, device):
        cuda_gen = torch.Generator(device='cuda')
        cpu_gen = torch.Generator(device='cpu')
-        for n in (0, 3, 100, 30000):
-            regex = 'Expected a .* generator device but found .*'
+
+        # n=0 is a special case that we don't need to use generator, thus no error even if
+        # device and generator don't match
+        torch.randperm(0, device='cuda:0', generator=torch.Generator(device='cuda:1'))
+        if torch.cuda.device_count() > 1:
+            torch.randperm(0, device='cuda:1', generator=torch.Generator(device='cuda:0'))
+        torch.randperm(0, device='cuda', generator=torch.Generator(device='cpu'))
+        torch.randperm(0, device='cpu', generator=torch.Generator(device='cuda'))
+
+        for n in (1, 3, 100, 30000):
+            torch.randperm(n, device='cuda', generator=torch.Generator(device='cuda:0'))
+            torch.randperm(n, device='cuda:0', generator=torch.Generator(device='cuda'))
+            # For cuda:0 to match cuda:1, we are making consistent device type matching
+            # behavior just like torch.randint. Longer term, generator should ignore
+            # device ordinal, since it's not used anyway.
+            torch.randint(low=0, high=n + 1, size=(1,), device="cuda:0", generator=torch.Generator(device='cuda:1'))
+            torch.randperm(n, device='cuda:0', generator=torch.Generator(device='cuda:1'))
+            if torch.cuda.device_count() > 1:
+                torch.randint(low=0, high=n + 1, size=(1,), device="cuda:1", generator=torch.Generator(device='cuda:0'))
+                torch.randperm(n, device='cuda:1', generator=torch.Generator(device='cuda:0'))
+
+            regex = 'Expected a .* device type for generator but found .*'
            cuda_t = torch.tensor(n, device='cuda')
            self.assertRaisesRegex(RuntimeError, regex, lambda: torch.randperm(n, device='cuda', generator=cpu_gen))
            self.assertRaisesRegex(RuntimeError, regex, lambda: torch.randperm(n, device='cuda', generator=cpu_gen, out=cuda_t))
--- a/test/test_utils.py
+++ b/test/test_utils.py
@ -751,7 +751,8 @@ class TestCrashHandler(TestCase):
    @unittest.skipIf(not HAS_BREAKPAD, "Crash handler lib was not linked in")
    def test_python_exception_writing(self):
        with tempfile.TemporaryDirectory() as temp_dir:
-            torch.utils._crash_handler.enable_minidump_collection(temp_dir)
+            torch.utils._crash_handler.enable_minidumps(temp_dir)
+            torch.utils._crash_handler.enable_minidumps_on_exceptions()

            files = os.listdir(temp_dir)
            self.assertEqual(len(files), 0)
@ -768,7 +769,7 @@ class TestCrashHandler(TestCase):
            files = os.listdir(temp_dir)
            self.assertEqual(len(files), 1)
            self.assertTrue(files[0].endswith(".dmp"))
-            torch.utils._crash_handler.disable_minidump_collection()
+            torch.utils._crash_handler.disable_minidumps()


@unittest.skipIf(IS_SANDCASTLE, "cpp_extension is OSS only")
--- a/tools/build_variables.bzl
+++ b/tools/build_variables.bzl
@ -629,7 +629,6 @@ libtorch_python_core_sources = [
    "torch/csrc/utils.cpp",
    "torch/csrc/utils/cuda_lazy_init.cpp",
    "torch/csrc/utils/invalid_arguments.cpp",
-    "torch/csrc/utils/crash_handler.cpp",
    "torch/csrc/utils/object_ptr.cpp",
    "torch/csrc/utils/python_arg_parser.cpp",
    "torch/csrc/utils/python_dispatch.cpp",
--- a/torch/_C/init.pyi.in
+++ b/torch/_C/init.pyi.in
@ -1028,3 +1028,7 @@ def _c10d_init() -> _bool: ...

 # Defined in torch/csrc/distributed/rpc/testing/init.cpp
 def _faulty_agent_init() -> _bool: ...
+
+def _enable_minidumps(directory: str) -> None: ...
+def _disable_minidumps() -> None: ...
+def _enable_minidumps_on_exceptions() -> None: ...
--- a/torch/csrc/Module.cpp
+++ b/torch/csrc/Module.cpp
@ -933,7 +933,7 @@ PyObject* initModule() {

  // Automatically translate errors thrown from pybind11 functions
  py::register_exception_translator([](std::exception_ptr e) { // NOLINT
-    if (torch::crash_handler::is_enabled()) {
+    if (torch::crash_handler::is_enabled_on_exceptions()) {
      torch::crash_handler::write_minidump();
    }

--- a/torch/csrc/jit/passes/vulkan_rewrite.cpp
+++ b/torch/csrc/jit/passes/vulkan_rewrite.cpp
@ -15,6 +15,7 @@
 #include <torch/csrc/jit/passes/remove_mutation.h>
 #include <torch/csrc/jit/passes/subgraph_rewrite.h>
 #include <torch/csrc/jit/passes/vulkan_rewrite.h>
+#include <torch/csrc/jit/runtime/graph_executor_impl.h>

 namespace torch {
 namespace jit {
@ -213,6 +214,14 @@ void vulkanRemoveMutation(script::Module& module) {
  RemoveTensorMutation(graph);
 }

+void vulkanRunCanonicalOptimizations(script::Module& module) {
+  auto graph = module.get_method("forward").graph();
+  for (const auto& method : module.get_methods()) {
+    auto graph = method.graph();
+    runOptimization(graph, false /* no loop unrolling */);
+  }
+}
+
 script::Module vulkanOptimizeForMobile(
    const script::Module& m,
    const std::vector<std::string>& preserved_methods) {
@ -225,6 +234,9 @@ script::Module vulkanOptimizeForMobile(
  vulkanFoldPrePackingOps(cloned_module);
  removeDropout(cloned_module);
  vulkanRemoveMutation(cloned_module);
+  // remove duplicated constants
+  vulkanRunCanonicalOptimizations(cloned_module);
+
  cloned_module.register_attribute(
      "optimized_for_vulkan", BoolType::get(), true);
  return cloned_module;
--- a/torch/csrc/jit/serialization/export.h
+++ b/torch/csrc/jit/serialization/export.h
@ -3,6 +3,7 @@
 #include <caffe2/serialize/inline_container.h>
 #include <torch/csrc/jit/api/module.h>
 #include <torch/csrc/jit/ir/ir.h>
+#include <torch/csrc/jit/serialization/import.h>
 #include <torch/csrc/jit/serialization/pickler.h>
 #include <torch/csrc/jit/serialization/python_print.h>
 #include <torch/csrc/jit/serialization/type_name_uniquer.h>
@ -97,6 +98,10 @@ class TORCH_API ScriptModuleSerializer {
  // qualifier, e.g. '__torch__.Bar' -> PythonPrint for the file that will be
  // created
  OrderedDict<std::string, PythonPrint> file_streams_;
+  // Used to keep references of storages around during serialization to solve
+  // for ABA memory reuse problem hit when storages are created/destroyed
+  // during serializaiton process.
+  StorageContext storage_context_;

  // Uniquely identifies a SourceRange in a model.
  // SourceRanges are associated with Nodes of Graphs.
--- a/torch/csrc/jit/serialization/export_module.cpp
+++ b/torch/csrc/jit/serialization/export_module.cpp
@ -411,10 +411,11 @@ void ScriptModuleSerializer::writeArchive(
      [&](const at::Tensor& tensor) {
        // returns a string to use in picker.cpp as storage obj key
        if (tensor_cdata_naming_scheme) {
-          tensor_names.push_back(
+          std::string string_id =
              std::to_string(reinterpret_cast<std::intptr_t>(
-                  tensor.storage().unsafeGetStorageImpl())) +
-              ".storage");
+                  tensor.storage().unsafeGetStorageImpl()));
+          tensor_names.push_back(string_id + ".storage");
+          storage_context_.addStorage(string_id, tensor.storage());
        } else {
          tensor_names.push_back(std::to_string(tensor_names.size()));
        }
--- a/torch/csrc/utils/crash_handler.cpp
+++ b/torch/csrc/utils/crash_handler.cpp
@ -1,7 +1,10 @@
+#include <cstdlib>
+#include <cstring>
 #include <iostream>

 #ifdef ADD_BREAKPAD_SIGNAL_HANDLER
 #include <breakpad/client/linux/handler/exception_handler.h>
+#include <csignal>
 #endif

 #include <c10/util/Exception.h>
@ -12,7 +15,11 @@ namespace crash_handler {

 #ifdef ADD_BREAKPAD_SIGNAL_HANDLER

-bool dumpCallback(
+static std::unique_ptr<google_breakpad::ExceptionHandler> handler; // NOLINT
+static std::string minidump_directory; // NOLINT
+static bool enabled_for_exceptions = false; // NOLINT
+
+bool dump_callback(
    const google_breakpad::MinidumpDescriptor& descriptor,
    void* context,
    bool succeeded) {
@ -22,54 +29,70 @@ bool dumpCallback(
  return succeeded;
 }

-static std::unique_ptr<google_breakpad::ExceptionHandler> handler; // NOLINT
-static std::string minidump_directory; // NOLINT
-
-void _enable_minidump_collection(const std::string& dir) {
+void enable_minidumps(const std::string& dir) {
  minidump_directory = dir;
+  // The constructor here registers the actual signal handler
  handler = std::make_unique<google_breakpad::ExceptionHandler>(
      google_breakpad::MinidumpDescriptor(minidump_directory),
      nullptr,
-      dumpCallback,
+      dump_callback,
      nullptr,
      true,
      -1);
 }

-void _disable_minidump_collection() {
+void disable_minidumps() {
  handler.reset();
 }

-const std::string& _get_minidump_directory() {
+const std::string& get_minidump_directory() {
  if (handler == nullptr) {
    AT_ERROR(
-        "Minidump handler is uninintialized, make sure to call _enable_minidump_collection first");
+        "Minidump handler is uninintialized, make sure to call enable_minidumps first");
  }
  return minidump_directory;
 }
-bool is_enabled() {
-  return handler != nullptr;
+
+bool is_enabled_on_exceptions() {
+  if (handler == nullptr) {
+    return false;
+  }
+
+  return enabled_for_exceptions;
 }
+
 void write_minidump() {
-  TORCH_CHECK(handler != nullptr,"Minidump handler is uninintialized, make sure to call _enable_minidump_collection first");
+  TORCH_CHECK(
+      handler != nullptr,
+      "Minidump handler is uninintialized, make sure to call enable_minidumps first");
  handler->WriteMinidump();
 }
+
+void enable_minidumps_on_exceptions() {
+  if (handler == nullptr) {
+    AT_ERROR(
+        "Minidump handler is uninintialized, make sure to call enable_minidumps first");
+  }
+  enabled_for_exceptions = true;
+}
+
 #else
-void _enable_minidump_collection(const std::string& dir) {
+// On unspported systems we can't do anything, so stub out everything.
+void enable_minidumps(const std::string& dir) {
  AT_ERROR(
      "Minidump collection is currently only implemented for Linux platforms");
 }

-void _disable_minidump_collection() {
+void disable_minidumps() {
  // Purposefully do nothing
 }

-const std::string& _get_minidump_directory() {
+const std::string& get_minidump_directory() {
  AT_ERROR(
      "Minidump collection is currently only implemented for Linux platforms");
 }

-bool is_enabled() {
+bool is_enabled_on_exceptions() {
  return false;
 }

@ -77,6 +100,12 @@ void write_minidump() {
  AT_ERROR(
      "Minidump collection is currently only implemented for Linux platforms");
 }
+
+void enable_minidumps_on_exceptions() {
+  AT_ERROR(
+      "Minidump collection is currently only implemented for Linux platforms");
+}
+
 #endif

 } // namespace crash_handler
--- a/torch/csrc/utils/crash_handler.h
+++ b/torch/csrc/utils/crash_handler.h
@ -2,18 +2,28 @@
 #include <torch/csrc/WindowsTorchApiMacro.h>
 #include <string>

-
 namespace torch {
 namespace crash_handler {

-TORCH_API void _enable_minidump_collection(const std::string& dir);
-TORCH_API void _disable_minidump_collection();
+// Set up a handler that writes minidumps to 'dir' on signals. This is not
+// necessary to call unless you want to change 'dir' to something other than
+// the default '/tmp/pytorch_crashes'.
+TORCH_API void enable_minidumps(const std::string& dir);

-TORCH_API const std::string& _get_minidump_directory();
+// Enable minidumps when passing exceptions up to Python. By default these don't
+// do anything special, but it can be useful to write out a minidump on
+// exceptions for debugging purposes. This has no effect in C++.
+TORCH_API void enable_minidumps_on_exceptions();

-bool is_enabled();
+// Disable all minidump writing and un-register the signal handler
+TORCH_API void disable_minidumps();

-void write_minidump();
+// Get the directory that minidumps will be written to
+TORCH_API const std::string& get_minidump_directory();
+
+// These are TORCH_API'ed since they are used from libtorch_python.so
+TORCH_API bool is_enabled_on_exceptions();
+TORCH_API void write_minidump();

 } // namespace crash_handler
-} // namepsace torch
+} // namespace torch
--- a/torch/csrc/utils/init.cpp
+++ b/torch/csrc/utils/init.cpp
@ -57,9 +57,11 @@ namespace crash_handler {
 void initCrashHandlerBindings(PyObject* module) {
  auto m = pybind11::handle(module).cast<pybind11::module>();

-  m.def("_enable_minidump_collection", _enable_minidump_collection)
-      .def("_disable_minidump_collection", _disable_minidump_collection)
-      .def("_get_minidump_directory", _get_minidump_directory);
+  m.def("_enable_minidumps", enable_minidumps)
+      .def("_is_enabled_on_exceptions", is_enabled_on_exceptions)
+      .def("_enable_minidumps_on_exceptions", enable_minidumps_on_exceptions)
+      .def("_disable_minidumps", disable_minidumps)
+      .def("_get_minidump_directory", get_minidump_directory);
 }
 } // namespace crash_handler
 } // namespace torch
--- a/torch/lib/c10d/ProcessGroupGloo.cpp
+++ b/torch/lib/c10d/ProcessGroupGloo.cpp
@ -2833,7 +2833,10 @@ void ProcessGroupGloo::monitoredBarrier(
        }
        // If we are collecting all failed ranks, check if we need to throw if
        // some ranks have not responded.
-        if (waitAllRanks && processedRanks.size() != size_) {
+        // Ensure all ranks from 1, ... WORLD_SIZE -1 have been successfully
+        // processed.
+        auto rankFailure = (processedRanks.size() != size_ - 1);
+        if (waitAllRanks && rankFailure) {
          std::vector<int> failedRanks;
          for (int i = 1; i < size_; ++i) {
            if (std::find(processedRanks.begin(), processedRanks.end(), i) ==
--- a/torch/package/file_structure_representation.py
+++ b/torch/package/file_structure_representation.py
@ -7,7 +7,7 @@ from .glob_group import GlobPattern, GlobGroup
 class Directory:
    """A file structure representation. Organized as Directory nodes that have lists of
    their Directory children. Directories for a package are created by calling
-    :meth:`PackageExporter.file_structure` or :meth:`PackageImporter.file_structure`."""
+    :meth:`PackageImporter.file_structure`."""

    def __init__(self, name: str, is_dir: bool):
        self.name = name
@ -43,12 +43,12 @@ class Directory:
        dir.children[file] = Directory(file, False)

    def has_file(self, filename: str) -> bool:
-        """Checks if a file is present in a Directory.
+        """Checks if a file is present in a :class:`Directory`.

        Args:
            filename (str): Path of file to search for.
        Returns:
-            bool: if a Directory contains the specified file.
+            bool: If a :class:`Directory` contains the specified file.
        """
        lineage = filename.split("/", maxsplit=1)
        child = lineage[0]
--- a/torch/package/package_exporter.py
+++ b/torch/package/package_exporter.py
@ -3,10 +3,8 @@ import importlib.machinery
 import io
 import linecache
 import pickletools
-import pprint
-import textwrap
 import types
-from collections import OrderedDict
+from collections import OrderedDict, defaultdict
 from dataclasses import dataclass
 from enum import Enum
 from pathlib import Path
@ -40,7 +38,7 @@ ActionHook = Callable[["PackageExporter", str], None]


 class _ModuleProviderAction(Enum):
-    """Represents one of the actions that exporter can take on a module.
+    """Represents one of the actions that :class:`PackageExporter` can take on a module.

    See :meth:`PackageExporter.extern` and friends for a description of what the actions do.
    """
@ -51,6 +49,27 @@ class _ModuleProviderAction(Enum):
    DENY = 4


+class PackagingErrorReason(Enum):
+    """Listing of different reasons a dependency may fail to package.
+
+    This enum is used to provide good error messages when
+    :class:`PackagingError` is raised.
+    """
+    def __repr__(self):
+        return '<%s.%s>' % (self.__class__.__name__, self.name)
+
+    IS_EXTENSION_MODULE = "Module is a C extension module. torch.package supports Python modules only."
+    NO_DUNDER_FILE = "Module had no __file__ defined."
+    SOURCE_FILE_NOT_FOUND = (
+        "Module had a __file__, but we could not find it in your filesystem."
+    )
+    DEPENDENCY_RESOLUTION_FAILED = "Dependency resolution failed."
+    NO_ACTION = (
+        "Module did not match against any action pattern. Extern, mock, or intern it."
+    )
+    DENIED = "Module was denied by a pattern."
+
+
@dataclass
 class _PatternInfo:
    """Holds :class:`PackageExporter`-specific info about how to execute matches against"""
@ -80,62 +99,34 @@ class PackagingError(Exception):
    """This exception is raised when there is an issue with exporting a package.
    ``PackageExporter`` will attempt to gather up all the errors and present
    them to you at once.
-
-    To make error information more understandable, the exception message will
-    only show modules that you ``intern``'d or direct dependencies of
-    ``intern``'d modules. To see the full list of error modules, consult the
-    attributes on this exception.
-
-    Attributes:
-        denied (Set[str]): modules that have been marked as denied by the exporter.
-        broken (Dict[str, str]): modules for which the exporter could not retrieve source info,
-            along with the reason that retrieving it failed.
-        unhandled (Set[str]): modules for which there is no user-specified action.
    """

-    def __init__(
-        self,
-        denied: Set[str],
-        broken: Dict[str, str],
-        unhandled: Set[str],
-        include_filter: Set[str],
-    ):
-        self.denied = denied
-        self.broken = broken
-        self.unhandled = unhandled
+    def __init__(self, dependency_graph: DiGraph):
+        # Group errors by reason.
+        broken: Dict[PackagingErrorReason, List[str]] = defaultdict(list)
+        for module_name, attrs in dependency_graph.nodes.items():
+            error = attrs.get("error")
+            if error is None:
+                continue
+            if error == PackagingErrorReason.NO_ACTION:
+                assert "action" not in attrs
+            broken[error].append(module_name)

-        self.filtered_denied = {
-            module for module in self.denied if module in include_filter
-        }
-        self.filtered_broken = {
-            module: reason
-            for module, reason in self.broken.items()
-            if module in include_filter
-        }
-        self.filtered_unhandled = {
-            module for module in self.unhandled if module in include_filter
-        }
+        message = io.StringIO()
+        message.write("\n")

-        message = io.StringIO("Errors raised while packaging:")
+        for reason, module_names in broken.items():
+            message.write(f"* {reason.value}\n")
+            for module_name in module_names:
+                message.write(f"    {module_name}\n")

-        if self.filtered_denied:
-            message.write(
-                "\n\n* The following modules were detected as dependencies but have been denied:\n"
-                f"{textwrap.indent(pprint.pformat(self.filtered_denied), prefix='  ')}"
-            )
-        if self.filtered_broken:
-            message.write(
-                "\n\n* The following modules did not have source information. "
-                "Extern, mock, or refactor to remove the dependency:\n"
-                f"{textwrap.indent(pprint.pformat(self.filtered_broken), prefix='  ')}"
-            )
-        if self.filtered_unhandled:
-            message.write(
-                "\n\n* The following modules did not match against any patterns. "
-                "Intern, extern, or mock them:\n"
-                f"{textwrap.indent(pprint.pformat(self.filtered_unhandled), prefix='  ')}"
-            )
+                # Print additional context if it's provided.
+                error_context = dependency_graph.nodes[module_name].get("error_context")
+                if error_context is not None:
+                    message.write(f"      Context: {error_context}\n")

+        # Save the dependency graph so that tooling can get at it.
+        self.dependency_graph = dependency_graph
        super().__init__(message.getvalue())


@ -155,11 +146,11 @@ class PackageExporter:

    The importer for packages ensures that code in the module can only be loaded from
    within the package, except for modules explicitly listed as external using :meth:`extern`.
-    The file `extern_modules` in the zip archive lists all the modules that a package externally depends on.
+    The file ``extern_modules`` in the zip archive lists all the modules that a package externally depends on.
    This prevents "implicit" dependencies where the package runs locally because it is importing
    a locally-installed package, but then fails when the package is copied to another machine.

-    When source code is added to the package, the exporter optionally can scan it
+    When source code is added to the package, the exporter can optionally scan it
    for further code dependencies (``dependencies=True``). It looks for import statements,
    resolves relative references to qualified module names, and performs an action specified by the user
    (See: :meth:`extern`, :meth:`mock`, and :meth:`intern`).
@ -180,7 +171,7 @@ class PackageExporter:
        Create an exporter.

        Args:
-            f: The location to export to. Can be a  ``string``/``Path`` object containing a filename,
+            f: The location to export to. Can be a  ``string``/``Path`` object containing a filename
                or a binary I/O object.
            importer: If a single Importer is passed, use that to search for modules.
                If a sequence of importers are passsed, an ``OrderedImporter`` will be constructed out of them.
@ -253,7 +244,16 @@ class PackageExporter:
        package_name = (
            module_name if is_package else module_name.rsplit(".", maxsplit=1)[0]
        )
-        dep_pairs = find_files_source_depends_on(src, package_name)
+        try:
+            dep_pairs = find_files_source_depends_on(src, package_name)
+        except Exception as e:
+            self.dependency_graph.add_node(
+                module_name,
+                error=PackagingErrorReason.DEPENDENCY_RESOLUTION_FAILED,
+                error_context=str(e),
+            )
+            return []
+
        # Use a dict to get uniquing but also deterministic order
        dependencies = {}
        for dep_module_name, dep_module_obj in dep_pairs:
@ -284,14 +284,14 @@ class PackageExporter:
        is_package: bool = False,
        dependencies: bool = True,
    ):
-        """Adds `src` as the source code for `module_name` in the exported package.
+        """Adds ``src`` as the source code for ``module_name`` in the exported package.

        Args:
-            module_name (str): e.g. `my_package.my_subpackage`, code will be saved to provide code for this package.
+            module_name (str): e.g. ``my_package.my_subpackage``, code will be saved to provide code for this package.
            src (str): The Python source code to save for this package.
-            is_package (bool, optional): If True, this module is treated as a package. Packages are allowed to have submodules
-                (e.g. my_package.my_subpackage.my_subsubpackage), and resources can be saved inside them. Defaults to ``False``.
-            dependencies (bool, optional): If True, we scan the source for dependencies.
+            is_package (bool, optional): If ``True``, this module is treated as a package. Packages are allowed to have submodules
+                (e.g. ``my_package.my_subpackage.my_subsubpackage``), and resources can be saved inside them. Defaults to ``False``.
+            dependencies (bool, optional): If ``True``, we scan the source for dependencies.
        """
        self.dependency_graph.add_node(
            module_name,
@ -394,18 +394,29 @@ node [shape=box];
                    module_name, action=pattern_info.action, provided=True
                )

+                if pattern_info.action == _ModuleProviderAction.DENY:
+                    # Requiring a denied module just adds an error to the graph.
+                    self.dependency_graph.add_node(
+                        module_name, error=PackagingErrorReason.DENIED
+                    )
+
                # If we are interning this module, we need to retrieve its
                # dependencies and package those as well.
                if pattern_info.action == _ModuleProviderAction.INTERN:
                    self._add_module_to_dependency_graph(module_name, dependencies)
                return

+        # No patterns have matched. Explicitly add this as an error.
+        self.dependency_graph.add_node(
+            module_name, error=PackagingErrorReason.NO_ACTION
+        )
+
    def save_module(self, module_name: str, dependencies=True):
        """Save the code for ``module`` into the package. Code for the module is resolved using the ``importers`` path to find the
        module object, and then using its ``__file__`` attribute to find the source code.

        Args:
-            module_name (str): e.g. `my_package.my_subpackage`, code will be saved to provide code
+            module_name (str): e.g. ``my_package.my_subpackage``, code will be saved to provide code
                for this package.
            dependencies (bool, optional): If ``True``, we scan the source for dependencies.
        """
@ -432,11 +443,20 @@ node [shape=box];
        if source is None:
            # Couldn't find a source!  Add it to our dependency graph as broken
            # and continue.
+            filename = getattr(module_obj, "__file__", None)
+            error_context = None
+            if filename is None:
+                packaging_error = PackagingErrorReason.NO_DUNDER_FILE
+            elif filename.endswith(tuple(importlib.machinery.EXTENSION_SUFFIXES)):
+                packaging_error = PackagingErrorReason.IS_EXTENSION_MODULE
+            else:
+                packaging_error = PackagingErrorReason.SOURCE_FILE_NOT_FOUND
+                error_context = f"filename: {filename}"
            self.dependency_graph.add_node(
                module_name,
                is_package=is_package,
-                broken=True,
-                filename=getattr(module_obj, "__file__", None),
+                error=packaging_error,
+                error_context=error_context,
            )
            return

@ -455,7 +475,7 @@ node [shape=box];
    ):
        """Save a python object to the archive using pickle. Equivalent to :func:`torch.save` but saving into
        the archive rather than a stand-alone file. Stanard pickle does not save the code, only the objects.
-        If `dependencies` is true, this method will also scan the pickled objects for which modules are required
+        If ``dependencies`` is true, this method will also scan the pickled objects for which modules are required
        to reconstruct them and save the relevant code.

        To be able to save an object where ``type(obj).__name__`` is ``my_module.MyObject``,
@ -464,7 +484,7 @@ node [shape=box];
        for this to work.

        Args:
-            package (str): The name of module package this resource should go in (e.g. "my_package.my_subpackage")
+            package (str): The name of module package this resource should go in (e.g. ``"my_package.my_subpackage"``).
            resource (str): A unique name for the resource, used to identify it to load.
            obj (Any): The object to save, must be picklable.
            dependencies (bool, optional): If ``True``, we scan the source for dependencies.
@ -508,7 +528,7 @@ node [shape=box];
        """Save text data to the package.

        Args:
-            package (str): The name of module package this resource should go it (e.g. "my_package.my_subpackage")
+            package (str): The name of module package this resource should go it (e.g. ``"my_package.my_subpackage"``).
            resource (str): A unique name for the resource, used to identify it to load.
            text (str): The contents to save.
        """
@ -518,7 +538,7 @@ node [shape=box];
        """Save raw bytes to the package.

        Args:
-            package (str): The name of module package this resource should go it (e.g. "my_package.my_subpackage")
+            package (str): The name of module package this resource should go it (e.g. ``"my_package.my_subpackage"``).
            resource (str): A unique name for the resource, used to identify it to load.
            binary (str): The data to save.
        """
@ -537,8 +557,8 @@ node [shape=box];

        Returns:
            :class:`torch.utils.hooks.RemovableHandle`:
-                a handle that can be used to remove the added hook by calling
-                ``handle.remove()``
+                A handle that can be used to remove the added hook by calling
+                ``handle.remove()``.
        """
        handle = RemovableHandle(self._extern_hooks)
        self._extern_hooks[handle.id] = hook
@ -556,8 +576,8 @@ node [shape=box];

        Returns:
            :class:`torch.utils.hooks.RemovableHandle`:
-                a handle that can be used to remove the added hook by calling
-                ``handle.remove()``
+                A handle that can be used to remove the added hook by calling
+                ``handle.remove()``.
        """
        handle = RemovableHandle(self._mock_hooks)
        self._mock_hooks[handle.id] = hook
@ -575,8 +595,8 @@ node [shape=box];

        Returns:
            :class:`torch.utils.hooks.RemovableHandle`:
-                a handle that can be used to remove the added hook by calling
-                ``handle.remove()``
+                A handle that can be used to remove the added hook by calling
+                ``handle.remove()``.
        """
        handle = RemovableHandle(self._intern_hooks)
        self._intern_hooks[handle.id] = hook
@ -589,7 +609,21 @@ node [shape=box];
        exclude: "GlobPattern" = (),
        allow_empty: bool = True,
    ):
-        """TODO DOC"""
+        """Specify modules that should be packaged. A module must match some ``intern`` pattern in order to be
+        included in the package and have its dependencies processed recursively.
+
+        Args:
+            include (Union[List[str], str]): A string e.g. "my_package.my_subpackage", or list of strings
+                for the names of the modules to be externed. This can also be a glob-style pattern, as described in :meth:`mock`.
+
+            exclude (Union[List[str], str]): An optional pattern that excludes some patterns that match the include string.
+
+            allow_empty (bool): An optional flag that specifies whether the intern modules specified by this call
+                to the ``intern`` method must be matched to some module during packaging. If an ``intern`` module glob
+                pattern is added with ``allow_empty=False``, and :meth:`close` is called (either explicitly or via ``__exit__``)
+                before any modules match that pattern, an exception is thrown. If ``allow_empty=True``, no such exception is thrown.
+
+        """
        self.patterns[GlobGroup(include, exclude=exclude)] = _PatternInfo(
            _ModuleProviderAction.INTERN, allow_empty
        )
@ -608,22 +642,27 @@ node [shape=box];
        Use this function to mock this functionality out without having to modify the original code.

        Args:
-            include (Union[List[str], str]): A string e.g. "my_package.my_subpackage", or list of strings
+            include (Union[List[str], str]): A string e.g. ``"my_package.my_subpackage"``, or list of strings
                for the names of the modules to be mocked out. Strings can also be a glob-style pattern
                string that may match multiple modules. Any required dependencies that match this pattern
                string will be mocked out automatically.

-                Examples:
-                  'torch.**' -- matches torch and all submodules of torch, e.g. 'torch.nn' and torch.nn.functional'
-                  'torch.*' -- matches 'torch.nn' or 'torch.functional', but not 'torch.nn.functional'
+                Examples :
+                    ``'torch.**'`` -- matches ``torch`` and all submodules of torch, e.g. ``'torch.nn'``
+                    and ``'torch.nn.functional'``
+
+                    ``'torch.*'`` -- matches ``'torch.nn'`` or ``'torch.functional'``, but not
+                    ``'torch.nn.functional'``

            exclude (Union[List[str], str]): An optional pattern that excludes some patterns that match the include string.
-                e.g. include='torch.**', exclude='torch.foo' will mock all torch packages except 'torch.foo' Default: []
+                e.g. ``include='torch.**', exclude='torch.foo'`` will mock all torch packages except ``'torch.foo'``,
+                Default: is ``[]``.

            allow_empty (bool): An optional flag that specifies whether the mock implementation(s) specified by this call
-                to the `mock` method must be matched to some module during packaging. If a mock is added with allow_empty=False,
-                and `close` is called (either explicitly or via `__exit__`) and the mock has not been matched to a module
-                used by the package being exported, an exception is thrown. If allow_empty=True, no such exception is thrown.
+                to the :meth:`mock` method must be matched to some module during packaging. If a mock is added with
+                ``allow_empty=False``, and :meth:`close` is called (either explicitly or via ``__exit__``) and the mock has
+                not been matched to a module used by the package being exported, an exception is thrown.
+                If ``allow_empty=True``, no such exception is thrown.

        """
        self.patterns[GlobGroup(include, exclude=exclude)] = _PatternInfo(
@ -637,21 +676,24 @@ node [shape=box];
        exclude: "GlobPattern" = (),
        allow_empty: bool = True,
    ):
-        """Include `module` in the list of external modules the package can import.
+        """Include ``module`` in the list of external modules the package can import.
        This will prevent dependency discovery from saving
        it in the package. The importer will load an external module directly from the standard import system.
        Code for extern modules must also exist in the process loading the package.

        Args:
-            include (Union[List[str], str]): A string e.g. "my_package.my_subpackage", or list of strings
-                for the names of the modules to be externed. This can also be a glob-style pattern, as described in :meth:`mock`
+            include (Union[List[str], str]): A string e.g. ``"my_package.my_subpackage"``, or list of strings
+                for the names of the modules to be externed. This can also be a glob-style pattern, as
+                described in :meth:`mock`.

-            exclude (Union[List[str], str]): An optional pattern that excludes some patterns that match the include string.
+            exclude (Union[List[str], str]): An optional pattern that excludes some patterns that match the
+                include string.

            allow_empty (bool): An optional flag that specifies whether the extern modules specified by this call
-                to the `extern` method must be matched to some module during packaging. If an extern module glob pattern is added
-                with allow_empty=False, and `close` is called (either explicitly or via `__exit__`) before any modules match that
-                pattern, an exception is thrown. If allow_empty=True, no such exception is thrown.
+                to the ``extern`` method must be matched to some module during packaging. If an extern module glob
+                pattern is added with ``allow_empty=False``, and :meth:`close` is called (either explicitly or via
+                ``__exit__``) before any modules match that pattern, an exception is thrown. If ``allow_empty=True``,
+                no such exception is thrown.

        """
        self.patterns[GlobGroup(include, exclude=exclude)] = _PatternInfo(
@ -663,8 +705,8 @@ node [shape=box];
        If a dependency on any matching packages is found, a :class:`PackagingError` is raised.

        Args:
-            include (Union[List[str], str]): A string e.g. "my_package.my_subpackage", or list of strings
-                for the names of the modules to be externed. This can also be a glob-style pattern, as described in :meth:`mock`
+            include (Union[List[str], str]): A string e.g. ``"my_package.my_subpackage"``, or list of strings
+                for the names of the modules to be externed. This can also be a glob-style pattern, as described in :meth:`mock`.

            exclude (Union[List[str], str]): An optional pattern that excludes some patterns that match the include string.
        """
@ -723,47 +765,12 @@ node [shape=box];
        self.zip_file.write_record(filename, str_or_bytes, len(str_or_bytes))

    def _validate_dependency_graph(self):
-        # 1. No modules should be denied.
-        # 2. No broken modules (we should have been able to retrieve source for everything interned).
-        # 3. All modules should have an associated action.
-        # 4. All patterns for which allow_empty=False have been matched at least once.
-        denied = set()
-        broken = {}
-        unhandled = set()
+        # 1. Check the graph for any errors inserted during dependency analysis.
        for module_name, attrs in self.dependency_graph.nodes.items():
-            if attrs.get("action") == _ModuleProviderAction.DENY:
-                denied.add(module_name)
-
-            if attrs.get("broken") is True:
-                filename = attrs.get("filename")
-                if filename is None:
-                    broken_reason = "Module does not have a __file__ attribute set."
-                elif filename.endswith(tuple(importlib.machinery.EXTENSION_SUFFIXES)):
-                    broken_reason = (
-                        "Module is an C extension module, which is not supported in packaging. "
-                        "Extern/mock it, or refactor your code to avoid the dependency."
-                    )
-                else:
-                    broken_reason = f"Source file {filename} not found."
-                broken[module_name] = broken_reason
-
-            if attrs.get("action") is None:
-                unhandled.add(module_name)
-
-        if denied or broken or unhandled:
-            # build up the filter set
-            interns = set()
-            for module_name, attrs in self.dependency_graph.nodes.items():
-                if attrs.get("action") == _ModuleProviderAction.INTERN:
-                    interns.add(module_name)
-
-            include_filter = interns.copy()
-            for intern in interns:
-                for dep in self.dependency_graph.successors(intern):
-                    include_filter.add(dep)
-
-            raise PackagingError(denied, broken, unhandled, include_filter)
+            if "error" in attrs:
+                raise PackagingError(self.dependency_graph)

+        # 2. Check that all patterns for which allow_empty=False have been matched at least once.
        for pattern, pattern_info in self.patterns.items():
            if not pattern_info.allow_empty and not pattern_info.was_matched:
                raise EmptyMatchError(
--- a/torch/package/package_importer.py
+++ b/torch/package/package_importer.py
@ -29,7 +29,7 @@ from .importer import Importer


 class PackageImporter(Importer):
-    """Importers allow you to load code written to packages by PackageExporter.
+    """Importers allow you to load code written to packages by :class:`PackageExporter`.
    Code is loaded in a hermetic way, using files from the package
    rather than the normal python import system. This allows
    for the packaging of PyTorch model code and data so that it can be run
@ -37,12 +37,12 @@ class PackageImporter(Importer):

    The importer for packages ensures that code in the module can only be loaded from
    within the package, except for modules explicitly listed as external during export.
-    The file `extern_modules` in the zip archive lists all the modules that a package externally depends on.
+    The file ``extern_modules`` in the zip archive lists all the modules that a package externally depends on.
    This prevents "implicit" dependencies where the package runs locally because it is importing
    a locally-installed package, but then fails when the package is copied to another machine.
    """

-    """The dictionary of already loaded modules from this package, equivalent to `sys.modules` but
+    """The dictionary of already loaded modules from this package, equivalent to ``sys.modules`` but
    local to this importer.
    """
    modules: Dict[str, types.ModuleType]
@ -52,12 +52,12 @@ class PackageImporter(Importer):
        file_or_buffer: Union[str, torch._C.PyTorchFileReader, Path, BinaryIO],
        module_allowed: Callable[[str], bool] = lambda module_name: True,
    ):
-        """Open `file_or_buffer` for importing. This checks that the imported package only requires modules
-        allowed by `module_allowed`
+        """Open ``file_or_buffer`` for importing. This checks that the imported package only requires modules
+        allowed by ``module_allowed``

        Args:
            file_or_buffer: a file-like object (has to implement :meth:`read`, :meth:`readline`, :meth:`tell`, and :meth:`seek`),
-                or a string or os.PathLike object containing a file name.
+                a string, or an ``os.PathLike`` object containing a filename.
            module_allowed (Callable[[str], bool], optional): A method to determine if a externally provided module
                should be allowed. Can be used to ensure packages loaded do not depend on modules that the server
                does not support. Defaults to allowing anything.
@ -111,14 +111,14 @@ class PackageImporter(Importer):
    def import_module(self, name: str, package=None):
        """Load a module from the package if it hasn't already been loaded, and then return
        the module. Modules are loaded locally
-        to the importer and will appear in `self.modules` rather than `sys.modules`
+        to the importer and will appear in ``self.modules`` rather than ``sys.modules``.

        Args:
            name (str): Fully qualified name of the module to load.
-            package ([type], optional): Unused, but present to match the signature of importlib.import_module. Defaults to None.
+            package ([type], optional): Unused, but present to match the signature of importlib.import_module. Defaults to ``None``.

        Returns:
-            types.ModuleType: the (possibly already) loaded module.
+            types.ModuleType: The (possibly already) loaded module.
        """
        return self._gcd_import(name)

@ -126,7 +126,7 @@ class PackageImporter(Importer):
        """Load raw bytes.

        Args:
-            package (str): The name of module package (e.g. "my_package.my_subpackage")
+            package (str): The name of module package (e.g. ``"my_package.my_subpackage"``).
            resource (str): The unique name for the resource.

        Returns:
@ -146,10 +146,10 @@ class PackageImporter(Importer):
        """Load a string.

        Args:
-            package (str): The name of module package (e.g. "my_package.my_subpackage")
+            package (str): The name of module package (e.g. ``"my_package.my_subpackage"``).
            resource (str): The unique name for the resource.
-            encoding (str, optional): Passed to `decode`. Defaults to 'utf-8'.
-            errors (str, optional): Passed to `decode`. Defaults to 'strict'.
+            encoding (str, optional): Passed to ``decode``. Defaults to ``'utf-8'``.
+            errors (str, optional): Passed to ``decode``. Defaults to ``'strict'``.

        Returns:
            str: The loaded text.
@ -159,15 +159,15 @@ class PackageImporter(Importer):

    def load_pickle(self, package: str, resource: str, map_location=None) -> Any:
        """Unpickles the resource from the package, loading any modules that are needed to construct the objects
-        using :meth:`import_module`
+        using :meth:`import_module`.

        Args:
-            package (str): The name of module package (e.g. "my_package.my_subpackage")
+            package (str): The name of module package (e.g. ``"my_package.my_subpackage"``).
            resource (str): The unique name for the resource.
-            map_location: Passed to `torch.load` to determine how tensors are mapped to devices. Defaults to None.
+            map_location: Passed to `torch.load` to determine how tensors are mapped to devices. Defaults to ``None``.

        Returns:
-            Any: the unpickled object.
+            Any: The unpickled object.
        """
        pickle_file = self._zipfile_path(package, resource)
        restore_location = _get_restore_location(map_location)
@ -244,7 +244,7 @@ class PackageImporter(Importer):

    def id(self):
        """
-        Returns internal identifier that torch.package uses to distinguish PackageImporter instances.
+        Returns internal identifier that torch.package uses to distinguish :class:`PackageImporter` instances.
        Looks like::

            <torch_package_0>
@ -257,7 +257,7 @@ class PackageImporter(Importer):
        """Returns a file structure representation of package's zipfile.

        Args:
-            include (Union[List[str], str]): An optional string e.g. "my_package.my_subpackage", or optional list of strings
+            include (Union[List[str], str]): An optional string e.g. ``"my_package.my_subpackage"``, or optional list of strings
                for the names of the files to be inluded in the zipfile representation. This can also be
                a glob-style pattern, as described in :meth:`PackageExporter.mock`

--- a/torch/testing/_internal/distributed/distributed_test.py
+++ b/torch/testing/_internal/distributed/distributed_test.py
@ -5948,9 +5948,13 @@ class DistributedTest:
            # Kick off some allreduce work on all ranks
            for _ in range(10):
                dist.all_reduce(torch.cat(tensors))
-            # Run monitored barrier
+            # Run monitored barrier and ensure it passees
            timeout = timedelta(seconds=2)
            dist.monitored_barrier(timeout=timeout)
+            # Check monitored_barrier success with wait_all_ranks=True
+            for _ in range(10):
+                dist.all_reduce(torch.cat(tensors))
+            dist.monitored_barrier(timeout=timeout, wait_all_ranks=True)
            # All ranks besides 1 call into barrier, rank 0 should report failure
            # while others report gloo error.
            failed_rank = 1
--- a/torch/utils/init.py
+++ b/torch/utils/init.py
@ -2,7 +2,7 @@ import os.path as _osp
 import sys

 from .throughput_benchmark import ThroughputBenchmark
-from ._crash_handler import enable_minidump_collection, disable_minidump_collection
+from ._crash_handler import enable_minidumps, disable_minidumps, enable_minidumps_on_exceptions

 # Set the module for a given object for nicer printing
 def set_module(obj, mod):
--- a/torch/utils/_crash_handler.py
+++ b/torch/utils/_crash_handler.py
@ -6,7 +6,7 @@ import torch

 DEFAULT_MINIDUMP_DIR = "/tmp/pytorch_crashes"

-def enable_minidump_collection(directory=DEFAULT_MINIDUMP_DIR):
+def enable_minidumps(directory=DEFAULT_MINIDUMP_DIR):
    if sys.platform != "linux":
        raise RuntimeError("Minidump collection is currently only implemented for Linux platforms")

@ -15,7 +15,12 @@ def enable_minidump_collection(directory=DEFAULT_MINIDUMP_DIR):
    elif not os.path.exists(directory):
        raise RuntimeError(f"Directory does not exist: {directory}")

-    torch._C._enable_minidump_collection(directory)  # type: ignore[attr-defined]
+    torch._C._enable_minidumps(directory)

-def disable_minidump_collection():
-    torch._C._disable_minidump_collection()  # type: ignore[attr-defined]
+
+def enable_minidumps_on_exceptions():
+    torch._C._enable_minidumps_on_exceptions()
+
+
+def disable_minidumps():
+    torch._C._disable_minidumps()
Author	SHA1	Message	Date
SplitInfinity	d69c22dd61	[docs] Add torch.package documentation for beta release (#59886 ) Summary This commit adds documentation for the `torch.package` module to accompany its beta release in 1.9. Test Plan Continous integration.	2021-06-11 13:43:27 -07:00
Lillian Johnson	4ad4f6db7f	hold references to storages during TorchScript serializaiton (#59672 ) Fixes issue for serialization problem caused by using memory address of storages for mobile and torch.package models. - https://github.com/pytorch/pytorch/pull/59642 hold references to storages during TorchScript serialization Uses StorageContext to hold a reference to all storages seen during TorchScript serialization to allow for tensors to be created/destroyed during serialization process. Tracking of the storages solves for the ABA memory problem.	2021-06-11 13:42:58 -07:00
Nikita Shulga	90e67738b1	[Release/1.9] Link whole CuDNN for CUDA-11.1 (#59873 ) * Move cublas dependency after CuDNN (#58287) Summary: Library linking order matters during static linking Not sure whether its a bug or a feature, but if cublas is reference before CuDNN, it will be partially statically linked into the library, even if it is not used Pull Request resolved: https://github.com/pytorch/pytorch/pull/58287 Reviewed By: janeyx99 Differential Revision: D28433165 Pulled By: malfet fbshipit-source-id: 8dffa0533075126dc383428f838f7d048074205c * [CMake] Split caffe2::cudnn into public and private (#59721) Summary: This is only important for builds where cuDNN is linked statically into libtorch_cpu. Before this PR PyTorch wheels often accidentally contained several partial copies of cudnn_static library. Splitting the interface into header only (cudnn-public) and library+headers(cudnn-private) prevents those from happening. Preliminary step towards enabling optional linking whole cudnn_library to workaround issue reported in https://github.com/pytorch/pytorch/issues/50153 Pull Request resolved: https://github.com/pytorch/pytorch/pull/59721 Reviewed By: ngimel Differential Revision: D29000967 Pulled By: malfet fbshipit-source-id: f054df92b265e9494076ab16c247427b39da9336 * Add USE_WHOLE_CUDNN option (#59744) Summary: It is only enabled if USE_STATIC_CUDNN is enabled Next step after https://github.com/pytorch/pytorch/pull/59721 towards resolving fast kernels stripping reported in https://github.com/pytorch/pytorch/issues/50153 Pull Request resolved: https://github.com/pytorch/pytorch/pull/59744 Reviewed By: seemethere, ngimel Differential Revision: D29007314 Pulled By: malfet fbshipit-source-id: 7091e299c0c6cc2a8aa82fbf49312cecf3bb861a * [Binary] Link whole CuDNN for CUDA-11.1 (#59802) Summary: Fixes https://github.com/pytorch/pytorch/issues/50153 Pull Request resolved: https://github.com/pytorch/pytorch/pull/59802 Reviewed By: driazati, seemethere Differential Revision: D29033537 Pulled By: malfet fbshipit-source-id: e816fc71f273ae0b4ba8a0621d5368a2078561a1	2021-06-11 10:38:31 -07:00
Edward Z. Yang	43c581aa62	Make detach return an alias even under inference mode (#59633 ) (#59757 ) Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/59633 Fixes #59614 This fix isn't 100% correct but it appears to stem the bleeding. A better fix would be understand how to detect when function implementations don't uphold required invariants, leading to refcount disaster. Signed-off-by: Edward Z. Yang <ezyang@fb.com> Test Plan: Imported from OSS Reviewed By: gchanan Differential Revision: D28962183 Pulled By: ezyang fbshipit-source-id: 6ec71994666289dadef47bac363e6902df90b094	2021-06-11 10:04:14 -07:00
Nikita Shulga	bc446f6a54	Fix test_randperm_device_compatibility for 1 GPU (#59484 ) (#59502 ) Summary: Do not try to create tensors on 2nd device if device_count() == 1 Fixes #{issue number} Pull Request resolved: https://github.com/pytorch/pytorch/pull/59484 Reviewed By: ngimel Differential Revision: D28910673 Pulled By: malfet fbshipit-source-id: e3517f31a463dd049ce8a5155409b7b716c8df18	2021-06-04 20:01:02 -07:00
Nikita Shulga	abe996a7fb	Move CUDA async warning to suffix (#59467 ) (#59501 ) Summary: After the change async error warnings look as follows: ``` $ python -c "import torch;torch.eye(3,3,device='cuda:777')" Traceback (most recent call last): File "<string>", line 1, in <module> RuntimeError: CUDA error: invalid device ordinal CUDA kernel errors might be asynchronously reported at some other API call,so the stacktrace below might be incorrect. For debugging consider passing CUDA_LAUNCH_BLOCKING=1. ``` Pull Request resolved: https://github.com/pytorch/pytorch/pull/59467 Reviewed By: ngimel Differential Revision: D28904360 Pulled By: malfet fbshipit-source-id: 2a8fa5affed5b4ffcaa602c8ab2669061cde7db0	2021-06-04 20:00:55 -07:00
Nikita Shulga	795df76568	Do not use gold linker for CUDA builds (#59490 ) (#59500 ) Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/59490 Reviewed By: agolynski, seemethere Differential Revision: D28913160 Pulled By: malfet fbshipit-source-id: d27092c252fc86424028abe146cf5f33a2f74544	2021-06-04 20:00:45 -07:00
Nikita Shulga	3b9cd08901	Prefer accurate reciprocal on ARMv8 (#59361 ) (#59470 ) Summary: Default NEON accelerated implementation of reciprocal uses vrecpeq_f32 which yield Newton-Raphson approximation rather than actual value Use regular NEON accelerated division for reciprocal and reciprocal square root operations. This fixes `test_reference_numerics_hard_frac_cpu_float32`, `test_reference_numerics_normal_rsqrt_cpu_float32` etc Pull Request resolved: https://github.com/pytorch/pytorch/pull/59361 Reviewed By: mruberry Differential Revision: D28870456 Pulled By: malfet fbshipit-source-id: e634b0887cce7efb046ea1fd9b74424e0eceb164	2021-06-04 18:34:39 -07:00
Nikita Shulga	226c274f70	Search for static OpenBLAS compiled with OpenMP (#59428 ) (#59463 ) Summary: Before that, only dynamically linked OpenBLAS compield with OpenMP could be found. Also get rid of hardcoded codepath for libgfortran.a in FindLAPACK.cmake Only affects aarch64 linux builds Pull Request resolved: https://github.com/pytorch/pytorch/pull/59428 Reviewed By: agolynski Differential Revision: D28891314 Pulled By: malfet fbshipit-source-id: 5af55a14c85ac66551ad2805c5716bbefe8d55b2	2021-06-04 11:15:58 -07:00
Zhuojie Zhou	ce24cab257	Fix torch.randperm for CUDA (#59352 ) (#59452 ) Summary: Context https://github.com/pytorch/pytorch/issues/58545 The logic is that we are going to keep it consistent for both torch.randperm and torch.randint 1. Generators can have either a fully-specified or non-fully specified device 2. As long as the device type match with the result, we don't error out Pull Request resolved: https://github.com/pytorch/pytorch/pull/59352 Test Plan: ``` python test/test_tensor_creation_ops.py -k TestRandomTensorCreation ``` Reviewed By: ngimel Differential Revision: D28855920 Pulled By: zhouzhuojie fbshipit-source-id: f8141a2c4b2f177e1aa7baec6999b65916cba02c	2021-06-04 10:23:29 -07:00
Eli Uriegas	d98d113810	.circleci: Disable USE_GOLD_LINKER for CUDA 10.2 (#59413 ) (#59462 ) Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/59413 For CUDA 10.2 builds linked with the gold linker we were observing crashes when exceptions were being raised Signed-off-by: Eli Uriegas <eliuriegas@fb.com> Test Plan: Imported from OSS Reviewed By: malfet Differential Revision: D28888054 Pulled By: seemethere fbshipit-source-id: f9b38147591721803ed3cac607510fe5bbc49d6d (cherry picked from commit c7a3a13baba0d547c5c20579328b0b3d83b94656) Signed-off-by: Eli Uriegas <eliuriegas@fb.com>	2021-06-04 10:22:51 -07:00
Maxime	17a44c2bb5	Added missing namespaces for C++ API (#45736 ) (#59367 ) Summary: Hello, depending on the build environment you may encounter ```c++ error: reference to 'optional' is ambiguous ``` when using the Torch-C++-API. This PR adds `c10::` to avoid possible ambiguities with std::optional and does not introduce any functional change. Fixes https://discuss.pytorch.org/t/linker-failed-with-ambiguous-references/36255 . Pull Request resolved: https://github.com/pytorch/pytorch/pull/45736 Reviewed By: dzhulgakov Differential Revision: D24125123 Pulled By: VitalyFedyunin fbshipit-source-id: df21420f0a2d0270227c28976a7a4218315cc107 Co-authored-by: Johannes Czech <QueensGambit@users.noreply.github.com>	2021-06-03 10:39:51 -07:00
Ivan Kobzarev	26e6fa380e	[vulkan] Remove constant duplication for Vulkan optimize_for_mobile (#59341 ) ghstack-source-id: bb809586d27d1285660d1db2c3561b46d158f499 Pull Request resolved: https://github.com/pytorch/pytorch/pull/59276	2021-06-03 09:45:56 -07:00
Nikita Shulga	bf16699cc8	[Release-1.9] Disable failing ROCM-4.2 tests (#59339 ) * [ROCm] disable test test_Conv2d_groups_nobias for ROCm (#59158) Summary: Disabling the test since its failing in ROCm4.2 Signed-off-by: Jagadish Krishnamoorthy <jagdish.krishna@gmail.com> Pull Request resolved: https://github.com/pytorch/pytorch/pull/59158 Reviewed By: mruberry Differential Revision: D28808953 Pulled By: ngimel fbshipit-source-id: 134f147ead6dc559d2cde49cf8343cd976e6c224 * [ROCm] disable test test_Conv2d_groups_nobias_v2 for ROCm (#58701) Summary: Disable test_Conv2d_groups_nobias_v2 test because it is failing on ROCm 4.2 Pull Request resolved: https://github.com/pytorch/pytorch/pull/58701 Reviewed By: ngimel Differential Revision: D28626651 Pulled By: mruberry fbshipit-source-id: a74bdf45335ae2afee0aa5e3bece6e208e75a63f Co-authored-by: Jagadish Krishnamoorthy <jagdish.krishna@gmail.com> Co-authored-by: Kyle Chen <kylechen@amd.com>	2021-06-02 15:07:06 -07:00
driazati	6d4fe05502	Build with USE_GLOO_WITH_OPENSSL=1 (#59274 ) Needed for https://github.com/pytorch/builder/pull/779 Co-authored-by: Your Name <driazati@users.noreply.github.com>	2021-06-02 08:18:25 -07:00
driazati	b046542f8a	Add breakpad + debug builds (#59275 ) This is the combination of #59236 and #58685 which will enable <insert builder PR here> to land on the release branch. This enables breakpad for minidump collection (which is still opt-in) and debug builds for the release. Co-authored-by: Your Name <driazati@users.noreply.github.com>	2021-06-01 23:32:08 -07:00
SplitInfinity	5d57b9392c	[pkg] Catch exceptions where dependency resolution gets invalid imports (#58573 ) (#59272 ) Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/58573 Users can create invalid imports, like: ``` HG: in a top-level package if False: from .. import foo ``` Since this code is never executed, it will not cause the module to fail to load. But our dependency analysis walks every `import` statement in the AST, and will attempt to resolve the (incorrectly formed) import, throwing an exception. For posterity, the code that triggered this: https://git.io/JsCgM Differential Revision: D28543980 Test Plan: Added a unit test Reviewed By: Chillee Pulled By: suo fbshipit-source-id: 03b7e274633945b186500fab6f974973ef8c7c7d Co-authored-by: Michael Suo <suo@fb.com>	2021-06-01 15:51:38 -07:00
SplitInfinity	f6a9351776	[pkg] simplifications to broken dependency handling (#58572 ) (#59273 ) Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/58572 Right now, we have three categories of error (broken, denied, unhandled). This PR unifies them into a single "error" field in the node, with optional context. It also generalizes how formatting of the error in PackagingError occurs. Differential Revision: D28543982 Test Plan: sandcastle Reviewed By: Chillee Pulled By: suo fbshipit-source-id: d99d37699ec2e172e3798763e60aafe9a66ed6f4 Co-authored-by: Michael Suo <suo@fb.com>	2021-06-01 15:51:30 -07:00
Rohan Varma	3071601491	[c10d] Fix monitored_barrier with wait_all_ranks (#58702 ) (#59266 ) Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/58702 Off by one error when determining if some ranks failed or not with `wait_all_ranks=True`. This wasn't caught by tests because the tests only tested failure scenarios, not success scenarios with `wait_all_ranks=True`. ghstack-source-id: 129559840 Test Plan: CI Reviewed By: zhaojuanmao Differential Revision: D28583235 fbshipit-source-id: a8f376efb13a3f36c788667acab86543c80aff59	2021-06-01 15:45:16 -07:00