mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
remove aliasMultinomial decode from TH and THC (#52585)
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/52585 Test Plan: Imported from OSS Reviewed By: ailzhang Differential Revision: D26654125 Pulled By: albanD fbshipit-source-id: 6a745080021623a2472dae7862cde91b949983ee
This commit is contained in:
committed by
Facebook GitHub Bot
parent
e8e570e9c5
commit
bea3cb7069
@ -332,7 +332,6 @@ filegroup(
|
||||
"aten/src/TH/THTensorLapack.cpp",
|
||||
"aten/src/TH/THTensorMath.cpp",
|
||||
"aten/src/TH/THTensorMoreMath.cpp",
|
||||
"aten/src/TH/THTensorRandom.cpp",
|
||||
],
|
||||
)
|
||||
|
||||
@ -389,7 +388,6 @@ filegroup(
|
||||
"aten/src/THC/THCTensorMathReduce.cu.cc",
|
||||
"aten/src/THC/THCTensorMathScan.cu.cc",
|
||||
"aten/src/THC/THCTensorMode.cu.cc",
|
||||
"aten/src/THC/THCTensorRandom.cu.cc",
|
||||
"aten/src/THC/THCTensorScatterGather.cu.cc",
|
||||
"aten/src/THC/THCTensorSort.cu.cc",
|
||||
"aten/src/THC/THCTensorTopK.cu.cc",
|
||||
|
@ -9,7 +9,6 @@ set(ATen_TH_SRCS
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/THAllocator.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/THStorageFunctions.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/THTensor.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/THTensorRandom.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/THTensorMath.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/THTensorMoreMath.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/THTensorEvenMoreMath.cpp
|
||||
@ -94,8 +93,6 @@ install(FILES
|
||||
generic/THTensorLapack.h
|
||||
generic/THTensorMath.cpp
|
||||
generic/THTensorMath.h
|
||||
generic/THTensorRandom.cpp
|
||||
generic/THTensorRandom.h
|
||||
generic/THVector.h
|
||||
# See Note [TH abstraction violation]
|
||||
generic/THTensorFastGetSet.hpp
|
||||
|
@ -22,13 +22,6 @@
|
||||
#include <TH/generic/THTensor.h>
|
||||
#include <TH/THGenerateBFloat16Type.h>
|
||||
|
||||
/* random numbers */
|
||||
#include <TH/generic/THTensorRandom.h>
|
||||
#include <TH/THGenerateAllTypes.h>
|
||||
|
||||
#include <TH/generic/THTensorRandom.h>
|
||||
#include <TH/THGenerateBoolType.h>
|
||||
|
||||
/* maths */
|
||||
#include <TH/generic/THTensorMath.h>
|
||||
#include <TH/THGenerateAllTypes.h>
|
||||
|
@ -1,8 +0,0 @@
|
||||
#include <TH/THTensor.hpp>
|
||||
#include <TH/THVector.h>
|
||||
|
||||
#include <TH/generic/THTensorRandom.cpp>
|
||||
#include <TH/THGenerateAllTypes.h>
|
||||
|
||||
#include <TH/generic/THTensorRandom.cpp>
|
||||
#include <TH/THGenerateBoolType.h>
|
@ -1,151 +0,0 @@
|
||||
#ifndef TH_GENERIC_FILE
|
||||
#define TH_GENERIC_FILE "TH/generic/THTensorRandom.cpp"
|
||||
#else
|
||||
|
||||
#include <cmath>
|
||||
|
||||
#include <cpuinfo.h>
|
||||
#include <array>
|
||||
#include <iterator>
|
||||
#include <algorithm>
|
||||
#include <type_traits>
|
||||
#include <ATen/Utils.h>
|
||||
#include <ATen/core/DistributionsHelper.h>
|
||||
|
||||
#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE)
|
||||
|
||||
#if defined(TH_REAL_IS_FLOAT)
|
||||
#define TH_REAL_MIN FLT_MIN
|
||||
#elif defined(TH_REAL_IS_DOUBLE)
|
||||
#define TH_REAL_MIN DBL_MIN
|
||||
#endif
|
||||
|
||||
|
||||
#undef TH_REAL_MIN
|
||||
|
||||
void THTensor_(multinomialAliasSetup)(THTensor *probs, THLongTensor *J, THTensor *q)
|
||||
{
|
||||
int64_t inputsize = THTensor_(nElement)(probs);
|
||||
THArgCheck(probs->dim() == 1, 1,
|
||||
"expected 1-D probability tensor, got %d-D probability tensor instead",
|
||||
probs->dim());
|
||||
int64_t i = 0;
|
||||
THLongTensor *smaller = THLongTensor_newWithSize1d(inputsize);
|
||||
THLongTensor *larger = THLongTensor_newWithSize1d(inputsize);
|
||||
int64_t small_c = 0;
|
||||
int64_t large_c = 0;
|
||||
THLongTensor_resize1d(J, inputsize);
|
||||
THTensor_(resize1d)(q, inputsize);
|
||||
scalar_t *q_data = q->data<scalar_t>();
|
||||
int64_t *J_data = THLongTensor_data(J);
|
||||
|
||||
for (i = 0; i < inputsize; i++)
|
||||
{
|
||||
THLongTensor_fastSet1d(J, i, -1L);
|
||||
scalar_t val = THTensor_(fastGet1d)(probs, i);
|
||||
THTensor_(fastSet1d)(q, i, inputsize*val);
|
||||
|
||||
if (inputsize * val < 1.0)
|
||||
{
|
||||
THLongTensor_fastSet1d(smaller, small_c, i);
|
||||
small_c += 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
THLongTensor_fastSet1d(larger, large_c, i);
|
||||
large_c += 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Loop through and create little binary mixtures that
|
||||
// appropriately allocate the larger outcomes over the
|
||||
// overall uniform mixture.
|
||||
int64_t large, small;
|
||||
while (small_c > 0 && large_c > 0)
|
||||
{
|
||||
large = THLongTensor_fastGet1d(larger, large_c-1);
|
||||
small = THLongTensor_fastGet1d(smaller, small_c-1);
|
||||
|
||||
THLongTensor_fastSet1d(J, small, large);
|
||||
q_data[large * q->stride(0)] -= 1.0 - THTensor_(fastGet1d)(q, small);
|
||||
|
||||
if(q_data[large * q->stride(0)] < 1.0)
|
||||
{
|
||||
THLongTensor_fastSet1d(smaller, small_c-1, large);
|
||||
large_c -= 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
THLongTensor_fastSet1d(larger, large_c-1, large);
|
||||
small_c -= 1;
|
||||
}
|
||||
}
|
||||
|
||||
scalar_t q_min = THTensor_(fastGet1d)(q, inputsize-1);
|
||||
scalar_t q_max = q_min;
|
||||
scalar_t q_temp;
|
||||
for (i=0; i < inputsize; i++)
|
||||
{
|
||||
q_temp = THTensor_(fastGet1d)(q, i);
|
||||
if (q_temp < q_min)
|
||||
q_min = q_temp;
|
||||
else if (q_temp > q_max)
|
||||
q_max = q_temp;
|
||||
}
|
||||
THArgCheckWithCleanup((q_min >= 0),
|
||||
THCleanup(THLongTensor_free(smaller); THLongTensor_free(larger);), 2,
|
||||
"q_min is less than 0");
|
||||
|
||||
if (q_max > 1)
|
||||
{
|
||||
for (i=0; i < inputsize; i++)
|
||||
{
|
||||
q_data[i*q->stride(0)] /= q_max;
|
||||
}
|
||||
}
|
||||
for (i=0; i < inputsize; i++)
|
||||
{
|
||||
// sometimes an large index isn't added to J.
|
||||
// fix it by making the probability 1 so that J isn't indexed.
|
||||
if(J_data[i] < 0)
|
||||
q_data[i] = 1.0;
|
||||
}
|
||||
THLongTensor_free(smaller);
|
||||
THLongTensor_free(larger);
|
||||
}
|
||||
void THTensor_(multinomialAliasDraw)(THLongTensor *self, THTensor *q, THLongTensor *J, int n_sample, c10::optional<at::Generator> _generator)
|
||||
{
|
||||
THArgCheck(q->dim() == 1, 1,
|
||||
"expected 1-D probability table, got %d-D probability table instead",
|
||||
q->dim());
|
||||
THArgCheck(J->dim() == 1, 2,
|
||||
"expected 1-D alias table, got %d-D alias table instead",
|
||||
J->dim());
|
||||
THArgCheck(n_sample > 0, 3, "cannot sample <= 0 samples");
|
||||
int64_t K = THLongTensor_nElement(J);
|
||||
int64_t i = 0, _mask=0;
|
||||
scalar_t _q;
|
||||
THLongTensor_resize1d(self, n_sample);
|
||||
int64_t rand_ind, sample_idx, J_sample;
|
||||
auto gen = at::get_generator_or_default<at::CPUGeneratorImpl>(_generator, at::detail::getDefaultCPUGenerator());
|
||||
// See Note [Acquire lock when using random generators]
|
||||
std::lock_guard<std::mutex> lock(gen->mutex_);
|
||||
|
||||
for (i=0; i < n_sample; i++)
|
||||
{
|
||||
at::uniform_real_distribution<double> uniform(0, K);
|
||||
rand_ind = uniform(gen);
|
||||
|
||||
_q = THTensor_(fastGet1d)(q, rand_ind);
|
||||
at::bernoulli_distribution<double> bernoulli(_q);
|
||||
_mask = static_cast<int64_t>(bernoulli(gen));
|
||||
|
||||
J_sample = THLongTensor_fastGet1d(J, rand_ind);
|
||||
|
||||
sample_idx = J_sample*(1 -_mask) + rand_ind * _mask;
|
||||
|
||||
THLongTensor_fastSet1d(self, i, sample_idx);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#endif
|
@ -1,12 +0,0 @@
|
||||
#ifndef TH_GENERIC_FILE
|
||||
#define TH_GENERIC_FILE "TH/generic/THTensorRandom.h"
|
||||
#else
|
||||
|
||||
#include <ATen/core/Generator.h>
|
||||
|
||||
#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE)
|
||||
TH_API void THTensor_(multinomialAliasSetup)(THTensor *prob_dist, THLongTensor *J, THTensor *q);
|
||||
TH_API void THTensor_(multinomialAliasDraw)(THLongTensor *self, THTensor *q, THLongTensor *J, int n_sample, c10::optional<at::Generator> _generator);
|
||||
#endif
|
||||
|
||||
#endif
|
@ -51,7 +51,6 @@ set(ATen_CUDA_SRCS ${ATen_CUDA_SRCS}
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/THCTensorMathPairwise.cu
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/THCTensorMathReduce.cu
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/THCTensorIndex.cu
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/THCTensorRandom.cu
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/THCTensorScatterGather.cu
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/THCTensorSort.cu
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/THCSortUtils.cu
|
||||
@ -72,7 +71,6 @@ install(FILES
|
||||
THCTensor.h
|
||||
THCTensorCopy.h
|
||||
THCTensorCopy.hpp
|
||||
THCTensorRandom.h
|
||||
THCTensorMath.h
|
||||
THCApply.cuh
|
||||
THCReduce.cuh
|
||||
@ -111,7 +109,6 @@ install(FILES
|
||||
THCTensorInfo.cuh
|
||||
THCTensorMathPointwise.cuh
|
||||
THCTensorTypeUtils.cuh
|
||||
THCTensorRandom.cuh
|
||||
THCTensorMathMagma.cuh
|
||||
THCThrustAllocator.cuh
|
||||
THCTensorMode.cuh
|
||||
@ -150,8 +147,6 @@ install(FILES
|
||||
generic/THCTensorIndex.cu
|
||||
generic/THCTensorSort.h
|
||||
generic/THCTensorSort.cu
|
||||
generic/THCTensorRandom.h
|
||||
generic/THCTensorRandom.cu
|
||||
generic/THCTensorMode.h
|
||||
generic/THCTensorMode.cu
|
||||
generic/THCTensorTopK.h
|
||||
|
@ -11,7 +11,6 @@
|
||||
|
||||
#include <THC/THCTensor.h>
|
||||
#include <THC/THCTensorCopy.h>
|
||||
#include <THC/THCTensorRandom.h>
|
||||
#include <THC/THCTensorMath.h>
|
||||
|
||||
#endif
|
||||
|
@ -1,19 +0,0 @@
|
||||
#include <THC/THCTensorRandom.h>
|
||||
#include <THC/THCDeviceUtils.cuh>
|
||||
#include <THC/THCGeneral.h>
|
||||
#include <THC/THCTensorCopy.h>
|
||||
#include <THC/THCTensorMath.h>
|
||||
#include <THC/THCReduceApplyUtils.cuh>
|
||||
#include <THC/THCTensorRandom.cuh>
|
||||
#include <ATen/Config.h>
|
||||
|
||||
#include <thrust/functional.h>
|
||||
|
||||
#define MAX_NUM_BLOCKS 200
|
||||
#define BLOCK_SIZE 256
|
||||
|
||||
#include <THC/generic/THCTensorRandom.cu>
|
||||
#include <THC/THCGenerateAllTypes.h>
|
||||
|
||||
#include <THC/generic/THCTensorRandom.cu>
|
||||
#include <THC/THCGenerateBoolType.h>
|
@ -1,90 +0,0 @@
|
||||
#ifndef THC_TENSOR_RANDOM_CUH
|
||||
#define THC_TENSOR_RANDOM_CUH
|
||||
|
||||
#include <THC/THCNumerics.cuh>
|
||||
#include <THC/THCReduceApplyUtils.cuh>
|
||||
#include <THC/THCTensorMathReduce.cuh>
|
||||
|
||||
#include <curand_kernel.h>
|
||||
|
||||
#define MAX_NUM_BLOCKS 200
|
||||
#define BLOCK_SIZE 256
|
||||
|
||||
template <typename T>
|
||||
__global__ void
|
||||
multinomialAliasDrawKernel(int size, int64_t *output, int64_t *J, T *q, int64_t K, T *uniform, T *bernoulli){
|
||||
int64_t idx = blockIdx.x * BLOCK_SIZE + threadIdx.x;
|
||||
if (idx < size) {
|
||||
int64_t rand_ind = ScalarConvert<T, int64_t>::to(uniform[idx]);
|
||||
T bern_uniform = bernoulli[idx];
|
||||
int _mask = (int) THCNumerics<T>::lt(bern_uniform, q[rand_ind]);
|
||||
output[idx] = J[rand_ind]*(1 -_mask) + rand_ind * _mask;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__global__ void
|
||||
aliasMultinomialFilter(T *q, T *probs, int64_t *smaller, int64_t *larger, int64_t *J_data, int64_t *larger_short_data, int64_t *smaller_short_data, T one, int64_t inputsize){
|
||||
int64_t idx = blockIdx.x * BLOCK_SIZE + threadIdx.x;
|
||||
if (idx < inputsize) {
|
||||
larger_short_data[idx] = 0;
|
||||
smaller_short_data[idx] = 0;
|
||||
J_data[idx]= -1;
|
||||
T val = THCNumerics<T>::mul(probs[idx], ScalarConvert<int64_t, T>::to(inputsize));
|
||||
if (THCNumerics<T>::lt(val, one)) {
|
||||
smaller[idx] = idx+1;
|
||||
larger[idx] = 0;
|
||||
} else {
|
||||
larger[idx] = idx+1;
|
||||
smaller[idx] = 0;
|
||||
}
|
||||
q[idx] = val;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__global__ void
|
||||
condDiv(T *q, int64_t *J, int64_t inputsize, T q_max) {
|
||||
int64_t idx = blockIdx.x * BLOCK_SIZE + threadIdx.x;
|
||||
T one = ScalarConvert<int, T>::to(1);
|
||||
if (idx < inputsize) {
|
||||
if (J[idx] < 0) {
|
||||
q[idx] = one;
|
||||
} else {
|
||||
if (THCNumerics<T>::gt(q_max, one)) {
|
||||
q[idx] = THCNumerics<T>::div(q[idx], q_max);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#undef MAX_NUM_BLOCKS
|
||||
#undef BLOCK_SIZE
|
||||
|
||||
template <typename T>
|
||||
__global__ void
|
||||
aliasMultinomialSetup(int64_t *J, T*q, int64_t inputsize, int64_t * smaller, int64_t *larger, int small_c, int large_c) {
|
||||
T one = ScalarConvert<int64_t, T>::to(1);
|
||||
// Loop through and create little binary mixtures that
|
||||
// appropriately allocate the larger outcomes over the
|
||||
// overall uniform mixture.
|
||||
int64_t large = 0;
|
||||
int64_t small = 0;
|
||||
while (small_c > 0 && large_c > 0) {
|
||||
large = larger[large_c-1];
|
||||
small = smaller[small_c-1];
|
||||
J[small] = large;
|
||||
T q_sum = THCNumerics<T>::add(q[large], q[small]);
|
||||
q[large] = THCNumerics<T>::sub(q_sum, one);
|
||||
if (THCNumerics<T>::lt(q[large], one)) {
|
||||
smaller[small_c-1] = large;
|
||||
large_c -= 1;
|
||||
} else {
|
||||
larger[large_c-1] = large;
|
||||
small_c -= 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif // THC_TENSOR_RANDOM_CUH
|
@ -1,12 +0,0 @@
|
||||
#ifndef TH_CUDA_TENSOR_RANDOM_INC
|
||||
#define TH_CUDA_TENSOR_RANDOM_INC
|
||||
|
||||
#include <THC/THCTensor.h>
|
||||
|
||||
#include <THC/generic/THCTensorRandom.h>
|
||||
#include <THC/THCGenerateAllTypes.h>
|
||||
|
||||
#include <THC/generic/THCTensorRandom.h>
|
||||
#include <THC/THCGenerateBoolType.h>
|
||||
|
||||
#endif
|
@ -1,120 +0,0 @@
|
||||
#ifndef THC_GENERIC_FILE
|
||||
#define THC_GENERIC_FILE "THC/generic/THCTensorRandom.cu"
|
||||
#else
|
||||
|
||||
#include <ATen/cuda/CUDAContext.h>
|
||||
#include <ATen/CUDAGeneratorImpl.h>
|
||||
#include <ATen/Utils.h>
|
||||
#include <c10/cuda/CUDAException.h>
|
||||
#include <utility>
|
||||
|
||||
#if defined(THC_REAL_IS_FLOAT) || defined(THC_REAL_IS_DOUBLE) || defined(THC_REAL_IS_HALF)
|
||||
|
||||
void THCTensor_(multinomialAliasSetup)(THCState *state, THCTensor *_probs, THCudaLongTensor *_J, THCTensor *_q){
|
||||
THArgCheck(_probs->dim() == 1, 1,
|
||||
"expected 1-D probability tensor, got %d-D probability tensor instead",
|
||||
_probs->dim());
|
||||
THAssert(THCTensor_(isContiguous)(state, _q));
|
||||
THAssert(THCudaLongTensor_isContiguous(state, _J));
|
||||
THCTensor *probs = THCTensor_(newContiguous)(state, _probs);
|
||||
THAssert(THCTensor_(isContiguous)(state, probs));
|
||||
int64_t inputsize = THCTensor_(nElement)(state, probs);
|
||||
THCudaLongTensor *smaller = THCudaLongTensor_newWithSize1d(state, inputsize);
|
||||
THCudaLongTensor *larger = THCudaLongTensor_newWithSize1d(state, inputsize);
|
||||
THCudaLongTensor *smaller_short = THCudaLongTensor_newWithSize1d(state, inputsize);
|
||||
THCudaLongTensor *larger_short = THCudaLongTensor_newWithSize1d(state, inputsize);
|
||||
|
||||
THCudaLongTensor_resize1d(state, _J, inputsize);
|
||||
THCTensor_(resize1d)(state, _q, inputsize);
|
||||
|
||||
scalar_t one = ScalarConvert<int64_t, scalar_t>::to(1);
|
||||
int inputBlockDim = THCCeilDiv((int)inputsize + BLOCK_SIZE - 1, BLOCK_SIZE);
|
||||
aliasMultinomialFilter
|
||||
<<<inputBlockDim, BLOCK_SIZE, 0, c10::cuda::getCurrentCUDAStream() >>>(
|
||||
THCTensor_(data)(state, _q),
|
||||
THCTensor_(data)(state, probs),
|
||||
THCudaLongTensor_data(state, smaller),
|
||||
THCudaLongTensor_data(state, larger),
|
||||
THCudaLongTensor_data(state, _J),
|
||||
THCudaLongTensor_data(state, smaller_short),
|
||||
THCudaLongTensor_data(state, larger_short),
|
||||
one, inputsize
|
||||
);
|
||||
C10_CUDA_KERNEL_LAUNCH_CHECK();
|
||||
|
||||
at::Tensor smaller_short_wrapped = THTensor_wrap(smaller_short);
|
||||
at::Tensor smaller_wrapped = THTensor_wrap(smaller);
|
||||
at::Tensor larger_short_wrapped = THTensor_wrap(larger_short);
|
||||
at::Tensor larger_wrapped = THTensor_wrap(larger);
|
||||
at::nonzero_out(smaller_short_wrapped, smaller_wrapped);
|
||||
at::nonzero_out(larger_short_wrapped, larger_wrapped);
|
||||
int h_large_c = THCudaLongTensor_nElement(state, larger_short);
|
||||
THCudaLongTensor_resize1d(state, smaller_short, inputsize);
|
||||
THCudaLongTensor_resize1d(state, larger_short, inputsize);
|
||||
aliasMultinomialSetup
|
||||
<<<1, 1, 0, c10::cuda::getCurrentCUDAStream()>>>(
|
||||
THCudaLongTensor_data(state, _J),
|
||||
THCTensor_(data)(state, _q),
|
||||
inputsize,
|
||||
THCudaLongTensor_data(state, smaller_short),
|
||||
THCudaLongTensor_data(state, larger_short),
|
||||
inputsize - h_large_c, h_large_c
|
||||
);
|
||||
C10_CUDA_KERNEL_LAUNCH_CHECK();
|
||||
|
||||
scalar_t q_max = at::max(THTensor_wrap(_q)).item<scalar_t>();
|
||||
condDiv<<<
|
||||
inputBlockDim, BLOCK_SIZE, 0, c10::cuda::getCurrentCUDAStream()>>>(
|
||||
THCTensor_(data)(state, _q),
|
||||
THCudaLongTensor_data(state, _J),
|
||||
inputsize, q_max
|
||||
);
|
||||
C10_CUDA_KERNEL_LAUNCH_CHECK();
|
||||
|
||||
THCudaLongTensor_free(state, smaller);
|
||||
THCudaLongTensor_free(state, larger);
|
||||
THCudaLongTensor_free(state, smaller_short);
|
||||
THCudaLongTensor_free(state, larger_short);
|
||||
THCTensor_free(state, probs);
|
||||
}
|
||||
|
||||
void THCTensor_(multinomialAliasDraw)(THCState *state, THCudaLongTensor *self, THCTensor *_q, THCudaLongTensor *_J, int n_sample, c10::optional<at::Generator> gen_){
|
||||
THArgCheck(_q->dim() == 1, 1,
|
||||
"expected 1-D probability table, got %d-D probability table instead",
|
||||
_q->dim());
|
||||
THArgCheck(_J->dim() == 1, 2,
|
||||
"expected 1-D alias table, got %d-D alias table instead",
|
||||
_J->dim());
|
||||
THArgCheck(n_sample > 0, 3, "cannot sample <= 0 samples");
|
||||
THAssert(THCTensor_(isContiguous)(state, _q));
|
||||
THAssert(THCudaLongTensor_isContiguous(state, _J));
|
||||
int64_t K = THCudaLongTensor_nElement(state, _J);
|
||||
THCudaLongTensor_resize1d(state, self, n_sample);
|
||||
ptrdiff_t size = THCudaLongTensor_nElement(state, self);
|
||||
|
||||
THCTensor *uniform = THCTensor_(newWithSize1d)(state, n_sample);
|
||||
THCTensor *bernoulli = THCTensor_(newWithSize1d)(state, n_sample);
|
||||
|
||||
auto out_uniform = THTensor_wrap(uniform);
|
||||
auto out_bernoulli = THTensor_wrap(bernoulli);
|
||||
at::native::uniform_(out_uniform, 0, K, gen_);
|
||||
at::native::uniform_(out_bernoulli, 0, 1, gen_);
|
||||
|
||||
multinomialAliasDrawKernel
|
||||
<<<THCCeilDiv((int)n_sample+BLOCK_SIZE-1, BLOCK_SIZE), BLOCK_SIZE, 0, c10::cuda::getCurrentCUDAStream()>>>(
|
||||
size,
|
||||
THCudaLongTensor_data(state, self),
|
||||
THCudaLongTensor_data(state, _J),
|
||||
THCTensor_(data)(state, _q),
|
||||
K,
|
||||
THCTensor_(data)(state, uniform),
|
||||
THCTensor_(data)(state, bernoulli)
|
||||
);
|
||||
C10_CUDA_KERNEL_LAUNCH_CHECK();
|
||||
|
||||
THCTensor_(free)(state, uniform);
|
||||
THCTensor_(free)(state, bernoulli);
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
@ -1,23 +0,0 @@
|
||||
#ifndef THC_GENERIC_FILE
|
||||
#define THC_GENERIC_FILE "THC/generic/THCTensorRandom.h"
|
||||
#else
|
||||
|
||||
#include <ATen/core/Generator.h>
|
||||
|
||||
#if defined(THC_REAL_IS_FLOAT) || defined(THC_REAL_IS_DOUBLE) || defined(THC_REAL_IS_HALF)
|
||||
|
||||
TORCH_CUDA_CU_API void THCTensor_(multinomialAliasSetup)(
|
||||
struct THCState* state,
|
||||
THCTensor* probs,
|
||||
THCudaLongTensor* J,
|
||||
THCTensor* q);
|
||||
TORCH_CUDA_CU_API void THCTensor_(multinomialAliasDraw)(
|
||||
THCState* state,
|
||||
THCudaLongTensor* self,
|
||||
THCTensor* _q,
|
||||
THCudaLongTensor* _J,
|
||||
int n_sample,
|
||||
c10::optional<at::Generator> gen_);
|
||||
|
||||
#endif
|
||||
#endif
|
@ -973,7 +973,6 @@ aten_native_source_non_codegen_list = [
|
||||
"aten/src/TH/THTensorLapack.cpp",
|
||||
"aten/src/TH/THTensorMath.cpp",
|
||||
"aten/src/TH/THTensorMoreMath.cpp",
|
||||
"aten/src/TH/THTensorRandom.cpp",
|
||||
"aten/src/ATen/native/utils/Factory.cpp",
|
||||
"aten/src/ATen/native/xnnpack/ChannelShuffle.cpp",
|
||||
"aten/src/ATen/native/xnnpack/Convolution.cpp",
|
||||
|
Reference in New Issue
Block a user