mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
Revert "Use random64 in Fischer-Yates algorithm for large N (#143682)"
This reverts commit 7013be0094e8d3ded2ba2f948082f98d63e622bb. Reverted https://github.com/pytorch/pytorch/pull/143682 on behalf of https://github.com/wdvr due to failing Meta internal tests that need to be updated ([comment](https://github.com/pytorch/pytorch/pull/143682#issuecomment-2563487675))
This commit is contained in:
@ -1322,7 +1322,6 @@ Tensor randn_like(
|
||||
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ randperm ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
namespace {
|
||||
|
||||
template <typename scalar_t>
|
||||
void randperm_cpu(Tensor& result, int64_t n, CPUGeneratorImpl* generator) {
|
||||
scalar_t* r__data = result.data_ptr<scalar_t>();
|
||||
@ -1330,18 +1329,22 @@ void randperm_cpu(Tensor& result, int64_t n, CPUGeneratorImpl* generator) {
|
||||
result.resize_({n});
|
||||
int64_t r__stride_0 = result.stride(0);
|
||||
|
||||
// we need to pick a number uniformly distributed between 0 and n
|
||||
// when n is of the same order of magnitude as the biggest number returned by
|
||||
// random the % result is not uniformly distributed
|
||||
// so we use random64(), you'd run out of RAM before you
|
||||
// start seeing the skew
|
||||
// use no-initialization Fischer-Yates variant
|
||||
// https://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle#The_.22inside-out.22_algorithm
|
||||
for (int64_t i = 0; i < n; i++) {
|
||||
int64_t z = generator->random64() % (i + 1);
|
||||
r__data[i * r__stride_0] = i;
|
||||
r__data[i * r__stride_0] = r__data[z * r__stride_0];
|
||||
r__data[z * r__stride_0] = i;
|
||||
at::parallel_for(
|
||||
0,
|
||||
n,
|
||||
internal::GRAIN_SIZE,
|
||||
[&r__data, &r__stride_0](int64_t p_begin, int64_t p_end) {
|
||||
for (const auto i : c10::irange(p_begin, p_end)) {
|
||||
r__data[i * r__stride_0] = static_cast<scalar_t>(i);
|
||||
}
|
||||
});
|
||||
|
||||
for (int64_t i = 0; i < n - 1; i++) {
|
||||
// NOLINTNEXTLINE(clang-analyzer-security.insecureAPI.rand)
|
||||
int64_t z = generator->random() % (n - i);
|
||||
scalar_t sav = r__data[i * r__stride_0];
|
||||
r__data[i * r__stride_0] = r__data[(z + i) * r__stride_0];
|
||||
r__data[(z + i) * r__stride_0] = sav;
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
@ -246,7 +246,7 @@ class TestDatasetRandomSplit(TestCase):
|
||||
range(10), [3, 7], generator=torch.Generator().manual_seed(1)
|
||||
)
|
||||
],
|
||||
[[8, 4, 2], [0, 7, 5, 3, 6, 9, 1]],
|
||||
[[5, 6, 1], [2, 0, 8, 9, 3, 7, 4]],
|
||||
)
|
||||
self.assertEqual(
|
||||
random_split(
|
||||
|
@ -1956,7 +1956,7 @@ class TestSparseCSR(TestCase):
|
||||
@dtypesIfCUDA(*floating_and_complex_types_and(
|
||||
*[torch.half] if SM53OrLater and TEST_CUSPARSE_GENERIC else [],
|
||||
*[torch.bfloat16] if SM80OrLater and TEST_CUSPARSE_GENERIC else []))
|
||||
@precisionOverride({torch.bfloat16: 3.5e-2, torch.float16: 1e-2})
|
||||
@precisionOverride({torch.bfloat16: 1e-2, torch.float16: 1e-2})
|
||||
def test_sparse_addmm(self, device, dtype):
|
||||
def test_shape(m, n, p, nnz, broadcast, index_dtype, alpha_beta=None):
|
||||
if alpha_beta is None:
|
||||
@ -2617,7 +2617,7 @@ class TestSparseCSR(TestCase):
|
||||
@skipIfTorchDynamo()
|
||||
@onlyCPU
|
||||
@dtypes(torch.float32, torch.float64, torch.bfloat16, torch.float16)
|
||||
@precisionOverride({torch.bfloat16: 0.02, torch.float16: 0.01})
|
||||
@precisionOverride({torch.bfloat16: 0.01, torch.float16: 0.01})
|
||||
def test_sparse_mm_reduce(self, device, dtype):
|
||||
def run_test(m, n, k, nnz, reduce_type, index_dtype, train):
|
||||
csr = self.genSparseCSRTensor((m, n), nnz, dtype=dtype, device=device, index_dtype=index_dtype)
|
||||
|
@ -3594,29 +3594,6 @@ class TestRandomTensorCreation(TestCase):
|
||||
self.assertEqual(non_contiguous_tensor, res)
|
||||
self.assertEqual(res.sort().values.long(), torch.arange(n, device=device))
|
||||
|
||||
|
||||
@largeTensorTest("10GB", "cpu")
|
||||
@largeTensorTest("40GB", "cuda")
|
||||
@slowTest
|
||||
def test_randperm_large(self, device):
|
||||
# Test even distribution where rand32 might produce skewed "uniform" distribution
|
||||
# n_items is chosen to not evenly divide 2**32 and be sufficiently large
|
||||
# to easily detect skew
|
||||
def decile(index, collection_size):
|
||||
return index // (collection_size // 10)
|
||||
|
||||
n_items = 700_000_000
|
||||
shuffled = torch.randperm(n_items, device=device)
|
||||
interval = 1_000_000
|
||||
shuffled_interval = shuffled[:interval]
|
||||
# histogram implemented for float only
|
||||
deciles = decile(shuffled_interval, shuffled.shape[0]).float().cpu()
|
||||
hist, _ = deciles.histogram(10, range=(0, 10))
|
||||
expected_bin = shuffled_interval.shape[0] / 10
|
||||
expected_error = math.sqrt(expected_bin) / expected_bin * 3
|
||||
error = (hist - expected_bin).abs().max() / expected_bin
|
||||
self.assertTrue(error < expected_error, f"error {error} > {expected_error}")
|
||||
|
||||
# Test exceptions when device and generator types are incompatible
|
||||
@onlyCUDA
|
||||
@unittest.skipIf(IS_FBCODE or IS_SANDCASTLE, "Produces inconsistent errors when run in fbcode.")
|
||||
|
@ -87,7 +87,7 @@ class TestShuffle(TestCase):
|
||||
@parametrize("use_numpy", [True, False])
|
||||
def test_2d(self, use_numpy):
|
||||
# np.shuffle only shuffles the first axis
|
||||
ax = tnp.asarray([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
|
||||
ax = tnp.asarray([[1, 2, 3], [4, 5, 6]])
|
||||
ox = ax.copy()
|
||||
|
||||
tnp.random.seed(1234)
|
||||
|
Reference in New Issue
Block a user