mirror of
				https://github.com/pytorch/pytorch.git
				synced 2025-11-04 16:04:58 +08:00 
			
		
		
		
	restore rng generation for fbcode (#144819)
Fixes #ISSUE_NUMBER Pull Request resolved: https://github.com/pytorch/pytorch/pull/144819 Approved by: https://github.com/malfet, https://github.com/kit1980
This commit is contained in:
		
				
					committed by
					
						
						PyTorch MergeBot
					
				
			
			
				
	
			
			
			
						parent
						
							154185dcd0
						
					
				
				
					commit
					2bc18a9055
				
			@ -40,7 +40,15 @@ struct uniform_int_from_to_distribution {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
  template <typename RNG>
 | 
					  template <typename RNG>
 | 
				
			||||||
  C10_HOST_DEVICE inline T operator()(RNG generator) {
 | 
					  C10_HOST_DEVICE inline T operator()(RNG generator) {
 | 
				
			||||||
 | 
					#ifdef FBCODE_CAFFE2
 | 
				
			||||||
 | 
					    if ((
 | 
				
			||||||
 | 
					      std::is_same_v<T, int64_t> ||
 | 
				
			||||||
 | 
					      std::is_same_v<T, double> ||
 | 
				
			||||||
 | 
					      std::is_same_v<T, float> ||
 | 
				
			||||||
 | 
					      std::is_same_v<T, at::BFloat16>) && range_ >= 1ULL << 32)
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
    if (range_ >= 1ULL << 28) // allow approx 5% skew in uniform int generation using %
 | 
					    if (range_ >= 1ULL << 28) // allow approx 5% skew in uniform int generation using %
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
    {
 | 
					    {
 | 
				
			||||||
      return transformation::uniform_int_from_to<T>(generator->random64(), range_, base_);
 | 
					      return transformation::uniform_int_from_to<T>(generator->random64(), range_, base_);
 | 
				
			||||||
    } else {
 | 
					    } else {
 | 
				
			||||||
 | 
				
			|||||||
@ -279,6 +279,41 @@ namespace cuda {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
template<typename RNG>
 | 
					template<typename RNG>
 | 
				
			||||||
void random_from_to_kernel(TensorIteratorBase& iter, uint64_t range, int64_t base, RNG gen) {
 | 
					void random_from_to_kernel(TensorIteratorBase& iter, uint64_t range, int64_t base, RNG gen) {
 | 
				
			||||||
 | 
					#ifdef FBCODE_CAFFE2
 | 
				
			||||||
 | 
					  AT_DISPATCH_V2(iter.dtype(), "random_from_to_kernel_cuda", AT_WRAP([&] {
 | 
				
			||||||
 | 
					    if ((
 | 
				
			||||||
 | 
					      std::is_same_v<T, int64_t> ||
 | 
				
			||||||
 | 
					      std::is_same_v<T, double> ||
 | 
				
			||||||
 | 
					      std::is_same_v<T, float> ||
 | 
				
			||||||
 | 
					      std::is_same_v<T, at::BFloat16>) && range >= 1ULL << 32)
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					      // define lambda to mod with range and add base
 | 
				
			||||||
 | 
					      auto random_func = [range, base] __device__ (uint64_t rand) {
 | 
				
			||||||
 | 
					        return transformation::uniform_int_from_to<scalar_t>(rand, range, base);
 | 
				
			||||||
 | 
					      };
 | 
				
			||||||
 | 
					      distribution_nullary_kernel<scalar_t, uint64_t, ulonglong2>(iter,
 | 
				
			||||||
 | 
					        gen,
 | 
				
			||||||
 | 
					        [] __device__ (curandStatePhilox4_32_10_t* state) -> ulonglong2 {
 | 
				
			||||||
 | 
					          ulonglong2 ret;
 | 
				
			||||||
 | 
					          uint4 rand_val = curand4(state);
 | 
				
			||||||
 | 
					          ret.x = (static_cast<uint64_t>(rand_val.x) << 32) | rand_val.y;
 | 
				
			||||||
 | 
					          ret.y = (static_cast<uint64_t>(rand_val.z) << 32) | rand_val.w;
 | 
				
			||||||
 | 
					          return ret;
 | 
				
			||||||
 | 
					        },
 | 
				
			||||||
 | 
					        random_func);
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					      auto random_func = [range, base] __device__ (uint32_t rand) {
 | 
				
			||||||
 | 
					        return transformation::uniform_int_from_to<scalar_t>(rand, range, base);
 | 
				
			||||||
 | 
					      };
 | 
				
			||||||
 | 
					      distribution_nullary_kernel<scalar_t, uint32_t, uint4>(iter,
 | 
				
			||||||
 | 
					        gen,
 | 
				
			||||||
 | 
					        [] __device__ (curandStatePhilox4_32_10_t* state) -> uint4 {
 | 
				
			||||||
 | 
					          return curand4(state);
 | 
				
			||||||
 | 
					        },
 | 
				
			||||||
 | 
					        random_func);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					   }), AT_EXPAND(AT_ALL_TYPES), kBool, kHalf, kBFloat16, AT_EXPAND(AT_BAREBONES_UNSIGNED_TYPES));
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
  AT_DISPATCH_V2(iter.dtype(), "random_from_to_kernel_cuda", AT_WRAP([&] {
 | 
					  AT_DISPATCH_V2(iter.dtype(), "random_from_to_kernel_cuda", AT_WRAP([&] {
 | 
				
			||||||
    if (range >= 1ULL << 28) // allow approx 5% skew in uniform int generation using %
 | 
					    if (range >= 1ULL << 28) // allow approx 5% skew in uniform int generation using %
 | 
				
			||||||
    {
 | 
					    {
 | 
				
			||||||
@ -308,6 +343,7 @@ void random_from_to_kernel(TensorIteratorBase& iter, uint64_t range, int64_t bas
 | 
				
			|||||||
        random_func);
 | 
					        random_func);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
   }), AT_EXPAND(AT_ALL_TYPES), kBool, kHalf, kBFloat16, AT_EXPAND(AT_BAREBONES_UNSIGNED_TYPES));
 | 
					   }), AT_EXPAND(AT_ALL_TYPES), kBool, kHalf, kBFloat16, AT_EXPAND(AT_BAREBONES_UNSIGNED_TYPES));
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// This is the special kernel to handle single specific case:
 | 
					// This is the special kernel to handle single specific case:
 | 
				
			||||||
 | 
				
			|||||||
@ -137,9 +137,13 @@ void test_random_from_to(const at::Device& device) {
 | 
				
			|||||||
              range = static_cast<uint64_t>(max_to) - static_cast<uint64_t>(from) + 1;
 | 
					              range = static_cast<uint64_t>(max_to) - static_cast<uint64_t>(from) + 1;
 | 
				
			||||||
              from_case_covered = true;
 | 
					              from_case_covered = true;
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
 | 
					#ifdef FBCODE_CAFFE2
 | 
				
			||||||
 | 
					            if (range < (1ULL << 32)) {
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
            // this is leaking details of implementation into test
 | 
					            // this is leaking details of implementation into test
 | 
				
			||||||
            // we are starting to use random64() at 2^28 to minimize skew due to %
 | 
					            // we are starting to use random64() at 2^28 to minimize skew due to %
 | 
				
			||||||
            if (range < (1ULL << 28)) {
 | 
					            if (range < (1ULL << 28)) {
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
              exp = static_cast<T>(static_cast<int64_t>((static_cast<uint32_t>(val) % range + from)));
 | 
					              exp = static_cast<T>(static_cast<int64_t>((static_cast<uint32_t>(val) % range + from)));
 | 
				
			||||||
            } else {
 | 
					            } else {
 | 
				
			||||||
              exp = static_cast<T>(static_cast<int64_t>((val % range + from)));
 | 
					              exp = static_cast<T>(static_cast<int64_t>((val % range + from)));
 | 
				
			||||||
 | 
				
			|||||||
@ -3502,6 +3502,7 @@ class TestRandomTensorCreation(TestCase):
 | 
				
			|||||||
            self.assertTrue((res1 >= 0).all().item())
 | 
					            self.assertTrue((res1 >= 0).all().item())
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    @unittest.skipIf(IS_FBCODE or IS_SANDCASTLE, "For fb compatibility random not changed in fbcode")
 | 
				
			||||||
    def test_randint_distribution(self, device):
 | 
					    def test_randint_distribution(self, device):
 | 
				
			||||||
        size = 1_000_000
 | 
					        size = 1_000_000
 | 
				
			||||||
        n_max = int(0.75 * 2 ** 32)
 | 
					        n_max = int(0.75 * 2 ** 32)
 | 
				
			||||||
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user