remove aliasMultinomial decode from TH and THC (#52585)

Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/52585

Test Plan: Imported from OSS

Reviewed By: ailzhang

Differential Revision: D26654125

Pulled By: albanD

fbshipit-source-id: 6a745080021623a2472dae7862cde91b949983ee
This commit is contained in:
XiaobingSuper
2021-03-16 09:40:49 -07:00
committed by Facebook GitHub Bot
parent e8e570e9c5
commit bea3cb7069
14 changed files with 0 additions and 454 deletions

View File

@ -332,7 +332,6 @@ filegroup(
"aten/src/TH/THTensorLapack.cpp",
"aten/src/TH/THTensorMath.cpp",
"aten/src/TH/THTensorMoreMath.cpp",
"aten/src/TH/THTensorRandom.cpp",
],
)
@ -389,7 +388,6 @@ filegroup(
"aten/src/THC/THCTensorMathReduce.cu.cc",
"aten/src/THC/THCTensorMathScan.cu.cc",
"aten/src/THC/THCTensorMode.cu.cc",
"aten/src/THC/THCTensorRandom.cu.cc",
"aten/src/THC/THCTensorScatterGather.cu.cc",
"aten/src/THC/THCTensorSort.cu.cc",
"aten/src/THC/THCTensorTopK.cu.cc",

View File

@ -9,7 +9,6 @@ set(ATen_TH_SRCS
${CMAKE_CURRENT_SOURCE_DIR}/THAllocator.cpp
${CMAKE_CURRENT_SOURCE_DIR}/THStorageFunctions.cpp
${CMAKE_CURRENT_SOURCE_DIR}/THTensor.cpp
${CMAKE_CURRENT_SOURCE_DIR}/THTensorRandom.cpp
${CMAKE_CURRENT_SOURCE_DIR}/THTensorMath.cpp
${CMAKE_CURRENT_SOURCE_DIR}/THTensorMoreMath.cpp
${CMAKE_CURRENT_SOURCE_DIR}/THTensorEvenMoreMath.cpp
@ -94,8 +93,6 @@ install(FILES
generic/THTensorLapack.h
generic/THTensorMath.cpp
generic/THTensorMath.h
generic/THTensorRandom.cpp
generic/THTensorRandom.h
generic/THVector.h
# See Note [TH abstraction violation]
generic/THTensorFastGetSet.hpp

View File

@ -22,13 +22,6 @@
#include <TH/generic/THTensor.h>
#include <TH/THGenerateBFloat16Type.h>
/* random numbers */
#include <TH/generic/THTensorRandom.h>
#include <TH/THGenerateAllTypes.h>
#include <TH/generic/THTensorRandom.h>
#include <TH/THGenerateBoolType.h>
/* maths */
#include <TH/generic/THTensorMath.h>
#include <TH/THGenerateAllTypes.h>

View File

@ -1,8 +0,0 @@
#include <TH/THTensor.hpp>
#include <TH/THVector.h>
#include <TH/generic/THTensorRandom.cpp>
#include <TH/THGenerateAllTypes.h>
#include <TH/generic/THTensorRandom.cpp>
#include <TH/THGenerateBoolType.h>

View File

@ -1,151 +0,0 @@
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "TH/generic/THTensorRandom.cpp"
#else
#include <cmath>
#include <cpuinfo.h>
#include <array>
#include <iterator>
#include <algorithm>
#include <type_traits>
#include <ATen/Utils.h>
#include <ATen/core/DistributionsHelper.h>
#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE)
#if defined(TH_REAL_IS_FLOAT)
#define TH_REAL_MIN FLT_MIN
#elif defined(TH_REAL_IS_DOUBLE)
#define TH_REAL_MIN DBL_MIN
#endif
#undef TH_REAL_MIN
void THTensor_(multinomialAliasSetup)(THTensor *probs, THLongTensor *J, THTensor *q)
{
int64_t inputsize = THTensor_(nElement)(probs);
THArgCheck(probs->dim() == 1, 1,
"expected 1-D probability tensor, got %d-D probability tensor instead",
probs->dim());
int64_t i = 0;
THLongTensor *smaller = THLongTensor_newWithSize1d(inputsize);
THLongTensor *larger = THLongTensor_newWithSize1d(inputsize);
int64_t small_c = 0;
int64_t large_c = 0;
THLongTensor_resize1d(J, inputsize);
THTensor_(resize1d)(q, inputsize);
scalar_t *q_data = q->data<scalar_t>();
int64_t *J_data = THLongTensor_data(J);
for (i = 0; i < inputsize; i++)
{
THLongTensor_fastSet1d(J, i, -1L);
scalar_t val = THTensor_(fastGet1d)(probs, i);
THTensor_(fastSet1d)(q, i, inputsize*val);
if (inputsize * val < 1.0)
{
THLongTensor_fastSet1d(smaller, small_c, i);
small_c += 1;
}
else
{
THLongTensor_fastSet1d(larger, large_c, i);
large_c += 1;
}
}
// Loop through and create little binary mixtures that
// appropriately allocate the larger outcomes over the
// overall uniform mixture.
int64_t large, small;
while (small_c > 0 && large_c > 0)
{
large = THLongTensor_fastGet1d(larger, large_c-1);
small = THLongTensor_fastGet1d(smaller, small_c-1);
THLongTensor_fastSet1d(J, small, large);
q_data[large * q->stride(0)] -= 1.0 - THTensor_(fastGet1d)(q, small);
if(q_data[large * q->stride(0)] < 1.0)
{
THLongTensor_fastSet1d(smaller, small_c-1, large);
large_c -= 1;
}
else
{
THLongTensor_fastSet1d(larger, large_c-1, large);
small_c -= 1;
}
}
scalar_t q_min = THTensor_(fastGet1d)(q, inputsize-1);
scalar_t q_max = q_min;
scalar_t q_temp;
for (i=0; i < inputsize; i++)
{
q_temp = THTensor_(fastGet1d)(q, i);
if (q_temp < q_min)
q_min = q_temp;
else if (q_temp > q_max)
q_max = q_temp;
}
THArgCheckWithCleanup((q_min >= 0),
THCleanup(THLongTensor_free(smaller); THLongTensor_free(larger);), 2,
"q_min is less than 0");
if (q_max > 1)
{
for (i=0; i < inputsize; i++)
{
q_data[i*q->stride(0)] /= q_max;
}
}
for (i=0; i < inputsize; i++)
{
// sometimes an large index isn't added to J.
// fix it by making the probability 1 so that J isn't indexed.
if(J_data[i] < 0)
q_data[i] = 1.0;
}
THLongTensor_free(smaller);
THLongTensor_free(larger);
}
void THTensor_(multinomialAliasDraw)(THLongTensor *self, THTensor *q, THLongTensor *J, int n_sample, c10::optional<at::Generator> _generator)
{
THArgCheck(q->dim() == 1, 1,
"expected 1-D probability table, got %d-D probability table instead",
q->dim());
THArgCheck(J->dim() == 1, 2,
"expected 1-D alias table, got %d-D alias table instead",
J->dim());
THArgCheck(n_sample > 0, 3, "cannot sample <= 0 samples");
int64_t K = THLongTensor_nElement(J);
int64_t i = 0, _mask=0;
scalar_t _q;
THLongTensor_resize1d(self, n_sample);
int64_t rand_ind, sample_idx, J_sample;
auto gen = at::get_generator_or_default<at::CPUGeneratorImpl>(_generator, at::detail::getDefaultCPUGenerator());
// See Note [Acquire lock when using random generators]
std::lock_guard<std::mutex> lock(gen->mutex_);
for (i=0; i < n_sample; i++)
{
at::uniform_real_distribution<double> uniform(0, K);
rand_ind = uniform(gen);
_q = THTensor_(fastGet1d)(q, rand_ind);
at::bernoulli_distribution<double> bernoulli(_q);
_mask = static_cast<int64_t>(bernoulli(gen));
J_sample = THLongTensor_fastGet1d(J, rand_ind);
sample_idx = J_sample*(1 -_mask) + rand_ind * _mask;
THLongTensor_fastSet1d(self, i, sample_idx);
}
}
#endif
#endif

View File

@ -1,12 +0,0 @@
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "TH/generic/THTensorRandom.h"
#else
#include <ATen/core/Generator.h>
#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE)
TH_API void THTensor_(multinomialAliasSetup)(THTensor *prob_dist, THLongTensor *J, THTensor *q);
TH_API void THTensor_(multinomialAliasDraw)(THLongTensor *self, THTensor *q, THLongTensor *J, int n_sample, c10::optional<at::Generator> _generator);
#endif
#endif

View File

@ -51,7 +51,6 @@ set(ATen_CUDA_SRCS ${ATen_CUDA_SRCS}
${CMAKE_CURRENT_SOURCE_DIR}/THCTensorMathPairwise.cu
${CMAKE_CURRENT_SOURCE_DIR}/THCTensorMathReduce.cu
${CMAKE_CURRENT_SOURCE_DIR}/THCTensorIndex.cu
${CMAKE_CURRENT_SOURCE_DIR}/THCTensorRandom.cu
${CMAKE_CURRENT_SOURCE_DIR}/THCTensorScatterGather.cu
${CMAKE_CURRENT_SOURCE_DIR}/THCTensorSort.cu
${CMAKE_CURRENT_SOURCE_DIR}/THCSortUtils.cu
@ -72,7 +71,6 @@ install(FILES
THCTensor.h
THCTensorCopy.h
THCTensorCopy.hpp
THCTensorRandom.h
THCTensorMath.h
THCApply.cuh
THCReduce.cuh
@ -111,7 +109,6 @@ install(FILES
THCTensorInfo.cuh
THCTensorMathPointwise.cuh
THCTensorTypeUtils.cuh
THCTensorRandom.cuh
THCTensorMathMagma.cuh
THCThrustAllocator.cuh
THCTensorMode.cuh
@ -150,8 +147,6 @@ install(FILES
generic/THCTensorIndex.cu
generic/THCTensorSort.h
generic/THCTensorSort.cu
generic/THCTensorRandom.h
generic/THCTensorRandom.cu
generic/THCTensorMode.h
generic/THCTensorMode.cu
generic/THCTensorTopK.h

View File

@ -11,7 +11,6 @@
#include <THC/THCTensor.h>
#include <THC/THCTensorCopy.h>
#include <THC/THCTensorRandom.h>
#include <THC/THCTensorMath.h>
#endif

View File

@ -1,19 +0,0 @@
#include <THC/THCTensorRandom.h>
#include <THC/THCDeviceUtils.cuh>
#include <THC/THCGeneral.h>
#include <THC/THCTensorCopy.h>
#include <THC/THCTensorMath.h>
#include <THC/THCReduceApplyUtils.cuh>
#include <THC/THCTensorRandom.cuh>
#include <ATen/Config.h>
#include <thrust/functional.h>
#define MAX_NUM_BLOCKS 200
#define BLOCK_SIZE 256
#include <THC/generic/THCTensorRandom.cu>
#include <THC/THCGenerateAllTypes.h>
#include <THC/generic/THCTensorRandom.cu>
#include <THC/THCGenerateBoolType.h>

View File

@ -1,90 +0,0 @@
#ifndef THC_TENSOR_RANDOM_CUH
#define THC_TENSOR_RANDOM_CUH
#include <THC/THCNumerics.cuh>
#include <THC/THCReduceApplyUtils.cuh>
#include <THC/THCTensorMathReduce.cuh>
#include <curand_kernel.h>
#define MAX_NUM_BLOCKS 200
#define BLOCK_SIZE 256
template <typename T>
__global__ void
multinomialAliasDrawKernel(int size, int64_t *output, int64_t *J, T *q, int64_t K, T *uniform, T *bernoulli){
int64_t idx = blockIdx.x * BLOCK_SIZE + threadIdx.x;
if (idx < size) {
int64_t rand_ind = ScalarConvert<T, int64_t>::to(uniform[idx]);
T bern_uniform = bernoulli[idx];
int _mask = (int) THCNumerics<T>::lt(bern_uniform, q[rand_ind]);
output[idx] = J[rand_ind]*(1 -_mask) + rand_ind * _mask;
}
}
template <typename T>
__global__ void
aliasMultinomialFilter(T *q, T *probs, int64_t *smaller, int64_t *larger, int64_t *J_data, int64_t *larger_short_data, int64_t *smaller_short_data, T one, int64_t inputsize){
int64_t idx = blockIdx.x * BLOCK_SIZE + threadIdx.x;
if (idx < inputsize) {
larger_short_data[idx] = 0;
smaller_short_data[idx] = 0;
J_data[idx]= -1;
T val = THCNumerics<T>::mul(probs[idx], ScalarConvert<int64_t, T>::to(inputsize));
if (THCNumerics<T>::lt(val, one)) {
smaller[idx] = idx+1;
larger[idx] = 0;
} else {
larger[idx] = idx+1;
smaller[idx] = 0;
}
q[idx] = val;
}
}
template <typename T>
__global__ void
condDiv(T *q, int64_t *J, int64_t inputsize, T q_max) {
int64_t idx = blockIdx.x * BLOCK_SIZE + threadIdx.x;
T one = ScalarConvert<int, T>::to(1);
if (idx < inputsize) {
if (J[idx] < 0) {
q[idx] = one;
} else {
if (THCNumerics<T>::gt(q_max, one)) {
q[idx] = THCNumerics<T>::div(q[idx], q_max);
}
}
}
}
#undef MAX_NUM_BLOCKS
#undef BLOCK_SIZE
template <typename T>
__global__ void
aliasMultinomialSetup(int64_t *J, T*q, int64_t inputsize, int64_t * smaller, int64_t *larger, int small_c, int large_c) {
T one = ScalarConvert<int64_t, T>::to(1);
// Loop through and create little binary mixtures that
// appropriately allocate the larger outcomes over the
// overall uniform mixture.
int64_t large = 0;
int64_t small = 0;
while (small_c > 0 && large_c > 0) {
large = larger[large_c-1];
small = smaller[small_c-1];
J[small] = large;
T q_sum = THCNumerics<T>::add(q[large], q[small]);
q[large] = THCNumerics<T>::sub(q_sum, one);
if (THCNumerics<T>::lt(q[large], one)) {
smaller[small_c-1] = large;
large_c -= 1;
} else {
larger[large_c-1] = large;
small_c -= 1;
}
}
}
#endif // THC_TENSOR_RANDOM_CUH

View File

@ -1,12 +0,0 @@
#ifndef TH_CUDA_TENSOR_RANDOM_INC
#define TH_CUDA_TENSOR_RANDOM_INC
#include <THC/THCTensor.h>
#include <THC/generic/THCTensorRandom.h>
#include <THC/THCGenerateAllTypes.h>
#include <THC/generic/THCTensorRandom.h>
#include <THC/THCGenerateBoolType.h>
#endif

View File

@ -1,120 +0,0 @@
#ifndef THC_GENERIC_FILE
#define THC_GENERIC_FILE "THC/generic/THCTensorRandom.cu"
#else
#include <ATen/cuda/CUDAContext.h>
#include <ATen/CUDAGeneratorImpl.h>
#include <ATen/Utils.h>
#include <c10/cuda/CUDAException.h>
#include <utility>
#if defined(THC_REAL_IS_FLOAT) || defined(THC_REAL_IS_DOUBLE) || defined(THC_REAL_IS_HALF)
void THCTensor_(multinomialAliasSetup)(THCState *state, THCTensor *_probs, THCudaLongTensor *_J, THCTensor *_q){
THArgCheck(_probs->dim() == 1, 1,
"expected 1-D probability tensor, got %d-D probability tensor instead",
_probs->dim());
THAssert(THCTensor_(isContiguous)(state, _q));
THAssert(THCudaLongTensor_isContiguous(state, _J));
THCTensor *probs = THCTensor_(newContiguous)(state, _probs);
THAssert(THCTensor_(isContiguous)(state, probs));
int64_t inputsize = THCTensor_(nElement)(state, probs);
THCudaLongTensor *smaller = THCudaLongTensor_newWithSize1d(state, inputsize);
THCudaLongTensor *larger = THCudaLongTensor_newWithSize1d(state, inputsize);
THCudaLongTensor *smaller_short = THCudaLongTensor_newWithSize1d(state, inputsize);
THCudaLongTensor *larger_short = THCudaLongTensor_newWithSize1d(state, inputsize);
THCudaLongTensor_resize1d(state, _J, inputsize);
THCTensor_(resize1d)(state, _q, inputsize);
scalar_t one = ScalarConvert<int64_t, scalar_t>::to(1);
int inputBlockDim = THCCeilDiv((int)inputsize + BLOCK_SIZE - 1, BLOCK_SIZE);
aliasMultinomialFilter
<<<inputBlockDim, BLOCK_SIZE, 0, c10::cuda::getCurrentCUDAStream() >>>(
THCTensor_(data)(state, _q),
THCTensor_(data)(state, probs),
THCudaLongTensor_data(state, smaller),
THCudaLongTensor_data(state, larger),
THCudaLongTensor_data(state, _J),
THCudaLongTensor_data(state, smaller_short),
THCudaLongTensor_data(state, larger_short),
one, inputsize
);
C10_CUDA_KERNEL_LAUNCH_CHECK();
at::Tensor smaller_short_wrapped = THTensor_wrap(smaller_short);
at::Tensor smaller_wrapped = THTensor_wrap(smaller);
at::Tensor larger_short_wrapped = THTensor_wrap(larger_short);
at::Tensor larger_wrapped = THTensor_wrap(larger);
at::nonzero_out(smaller_short_wrapped, smaller_wrapped);
at::nonzero_out(larger_short_wrapped, larger_wrapped);
int h_large_c = THCudaLongTensor_nElement(state, larger_short);
THCudaLongTensor_resize1d(state, smaller_short, inputsize);
THCudaLongTensor_resize1d(state, larger_short, inputsize);
aliasMultinomialSetup
<<<1, 1, 0, c10::cuda::getCurrentCUDAStream()>>>(
THCudaLongTensor_data(state, _J),
THCTensor_(data)(state, _q),
inputsize,
THCudaLongTensor_data(state, smaller_short),
THCudaLongTensor_data(state, larger_short),
inputsize - h_large_c, h_large_c
);
C10_CUDA_KERNEL_LAUNCH_CHECK();
scalar_t q_max = at::max(THTensor_wrap(_q)).item<scalar_t>();
condDiv<<<
inputBlockDim, BLOCK_SIZE, 0, c10::cuda::getCurrentCUDAStream()>>>(
THCTensor_(data)(state, _q),
THCudaLongTensor_data(state, _J),
inputsize, q_max
);
C10_CUDA_KERNEL_LAUNCH_CHECK();
THCudaLongTensor_free(state, smaller);
THCudaLongTensor_free(state, larger);
THCudaLongTensor_free(state, smaller_short);
THCudaLongTensor_free(state, larger_short);
THCTensor_free(state, probs);
}
void THCTensor_(multinomialAliasDraw)(THCState *state, THCudaLongTensor *self, THCTensor *_q, THCudaLongTensor *_J, int n_sample, c10::optional<at::Generator> gen_){
THArgCheck(_q->dim() == 1, 1,
"expected 1-D probability table, got %d-D probability table instead",
_q->dim());
THArgCheck(_J->dim() == 1, 2,
"expected 1-D alias table, got %d-D alias table instead",
_J->dim());
THArgCheck(n_sample > 0, 3, "cannot sample <= 0 samples");
THAssert(THCTensor_(isContiguous)(state, _q));
THAssert(THCudaLongTensor_isContiguous(state, _J));
int64_t K = THCudaLongTensor_nElement(state, _J);
THCudaLongTensor_resize1d(state, self, n_sample);
ptrdiff_t size = THCudaLongTensor_nElement(state, self);
THCTensor *uniform = THCTensor_(newWithSize1d)(state, n_sample);
THCTensor *bernoulli = THCTensor_(newWithSize1d)(state, n_sample);
auto out_uniform = THTensor_wrap(uniform);
auto out_bernoulli = THTensor_wrap(bernoulli);
at::native::uniform_(out_uniform, 0, K, gen_);
at::native::uniform_(out_bernoulli, 0, 1, gen_);
multinomialAliasDrawKernel
<<<THCCeilDiv((int)n_sample+BLOCK_SIZE-1, BLOCK_SIZE), BLOCK_SIZE, 0, c10::cuda::getCurrentCUDAStream()>>>(
size,
THCudaLongTensor_data(state, self),
THCudaLongTensor_data(state, _J),
THCTensor_(data)(state, _q),
K,
THCTensor_(data)(state, uniform),
THCTensor_(data)(state, bernoulli)
);
C10_CUDA_KERNEL_LAUNCH_CHECK();
THCTensor_(free)(state, uniform);
THCTensor_(free)(state, bernoulli);
}
#endif
#endif

View File

@ -1,23 +0,0 @@
#ifndef THC_GENERIC_FILE
#define THC_GENERIC_FILE "THC/generic/THCTensorRandom.h"
#else
#include <ATen/core/Generator.h>
#if defined(THC_REAL_IS_FLOAT) || defined(THC_REAL_IS_DOUBLE) || defined(THC_REAL_IS_HALF)
TORCH_CUDA_CU_API void THCTensor_(multinomialAliasSetup)(
struct THCState* state,
THCTensor* probs,
THCudaLongTensor* J,
THCTensor* q);
TORCH_CUDA_CU_API void THCTensor_(multinomialAliasDraw)(
THCState* state,
THCudaLongTensor* self,
THCTensor* _q,
THCudaLongTensor* _J,
int n_sample,
c10::optional<at::Generator> gen_);
#endif
#endif

View File

@ -973,7 +973,6 @@ aten_native_source_non_codegen_list = [
"aten/src/TH/THTensorLapack.cpp",
"aten/src/TH/THTensorMath.cpp",
"aten/src/TH/THTensorMoreMath.cpp",
"aten/src/TH/THTensorRandom.cpp",
"aten/src/ATen/native/utils/Factory.cpp",
"aten/src/ATen/native/xnnpack/ChannelShuffle.cpp",
"aten/src/ATen/native/xnnpack/Convolution.cpp",