mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 12:54:11 +08:00
This series of changes try to cover C style casts into C++ alternatives. Pull Request resolved: https://github.com/pytorch/pytorch/pull/165750 Approved by: https://github.com/Skylion007
188 lines
6.4 KiB
C++
188 lines
6.4 KiB
C++
// This file is based on the uint128 implementation of protobuf at
|
|
// https://github.com/protocolbuffers/protobuf/blob/1e88936fce10cf773cb72b44c6a7f48b38c7578b/src/google/protobuf/stubs/int128.cc
|
|
//
|
|
// Protocol Buffers - Google's data interchange format
|
|
// Copyright 2008 Google Inc. All rights reserved.
|
|
// https://developers.google.com/protocol-buffers/
|
|
//
|
|
// Redistribution and use in source and binary forms, with or without
|
|
// modification, are permitted provided that the following conditions are
|
|
// met:
|
|
//
|
|
// * Redistributions of source code must retain the above copyright
|
|
// notice, this list of conditions and the following disclaimer.
|
|
// * Redistributions in binary form must reproduce the above
|
|
// copyright notice, this list of conditions and the following disclaimer
|
|
// in the documentation and/or other materials provided with the
|
|
// distribution.
|
|
// * Neither the name of Google Inc. nor the names of its
|
|
// contributors may be used to endorse or promote products derived from
|
|
// this software without specific prior written permission.
|
|
//
|
|
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
#include <c10/util/Logging.h>
|
|
#include <c10/util/int128.h>
|
|
#include <iomanip>
|
|
#include <ostream> // NOLINT(readability/streams)
|
|
|
|
namespace c10 {
|
|
|
|
const uint128_pod kuint128max = {
|
|
uint64_t{0xFFFFFFFFFFFFFFFFu},
|
|
uint64_t{0xFFFFFFFFFFFFFFFFu}};
|
|
|
|
// Returns the 0-based position of the last set bit (i.e., most significant bit)
|
|
// in the given uint64. The argument may not be 0.
|
|
//
|
|
// For example:
|
|
// Given: 5 (decimal) == 101 (binary)
|
|
// Returns: 2
|
|
#define STEP(T, n, pos, sh) \
|
|
do { \
|
|
if ((n) >= (static_cast<T>(1) << (sh))) { \
|
|
(n) = (n) >> (sh); \
|
|
(pos) |= (sh); \
|
|
} \
|
|
} while (0)
|
|
static inline int Fls64(uint64_t n) {
|
|
// GOOGLE_DCHECK_NE(0, n);
|
|
uint64_t pos = 0;
|
|
STEP(uint64_t, n, pos, 0x20);
|
|
uint32_t n32 = n;
|
|
STEP(uint32_t, n32, pos, 0x10);
|
|
STEP(uint32_t, n32, pos, 0x08);
|
|
STEP(uint32_t, n32, pos, 0x04);
|
|
return static_cast<int>(
|
|
pos + ((uint64_t{0x3333333322221100u} >> (n32 << 2)) & 0x3));
|
|
}
|
|
#undef STEP
|
|
|
|
// Like Fls64() above, but returns the 0-based position of the last set bit
|
|
// (i.e., most significant bit) in the given uint128. The argument may not be 0.
|
|
static inline int Fls128(uint128 n) {
|
|
if (uint64_t hi = Uint128High64(n)) {
|
|
return Fls64(hi) + 64;
|
|
}
|
|
return Fls64(Uint128Low64(n));
|
|
}
|
|
|
|
void uint128::DivModImpl(
|
|
uint128 dividend,
|
|
uint128 divisor,
|
|
uint128* quotient_ret,
|
|
uint128* remainder_ret) {
|
|
if (divisor == 0) {
|
|
LOG(FATAL) << "Division or mod by zero: dividend.hi=" << dividend.hi_
|
|
<< ", lo=" << dividend.lo_;
|
|
} else if (dividend < divisor) {
|
|
*quotient_ret = 0;
|
|
*remainder_ret = dividend;
|
|
return;
|
|
} else {
|
|
int dividend_bit_length = Fls128(dividend);
|
|
int divisor_bit_length = Fls128(divisor);
|
|
int difference = dividend_bit_length - divisor_bit_length;
|
|
uint128 quotient = 0;
|
|
while (difference >= 0) {
|
|
quotient <<= 1;
|
|
uint128 shifted_divisor = divisor << difference;
|
|
if (shifted_divisor <= dividend) {
|
|
dividend -= shifted_divisor;
|
|
quotient += 1;
|
|
}
|
|
difference -= 1;
|
|
}
|
|
// record the final quotient and remainder
|
|
*quotient_ret = quotient;
|
|
*remainder_ret = dividend;
|
|
}
|
|
}
|
|
|
|
uint128& uint128::operator/=(const uint128& divisor) {
|
|
uint128 quotient = 0;
|
|
uint128 remainder = 0;
|
|
DivModImpl(*this, divisor, "ient, &remainder);
|
|
*this = quotient;
|
|
return *this;
|
|
}
|
|
uint128& uint128::operator%=(const uint128& divisor) {
|
|
uint128 quotient = 0;
|
|
uint128 remainder = 0;
|
|
DivModImpl(*this, divisor, "ient, &remainder);
|
|
*this = remainder;
|
|
return *this;
|
|
}
|
|
|
|
std::ostream& operator<<(std::ostream& o, const uint128& b) {
|
|
std::ios_base::fmtflags flags = o.flags();
|
|
|
|
// Select a divisor which is the largest power of the base < 2^64.
|
|
uint128 div;
|
|
int div_base_log = 0;
|
|
switch (flags & std::ios::basefield) {
|
|
case std::ios::hex:
|
|
div = static_cast<uint64_t>(0x1000000000000000u); // 16^15
|
|
div_base_log = 15;
|
|
break;
|
|
case std::ios::oct:
|
|
div = static_cast<uint64_t>(01000000000000000000000u); // 8^21
|
|
div_base_log = 21;
|
|
break;
|
|
default: // std::ios::dec
|
|
div = static_cast<uint64_t>(10000000000000000000u); // 10^19
|
|
div_base_log = 19;
|
|
break;
|
|
}
|
|
|
|
// Now piece together the uint128 representation from three chunks of
|
|
// the original value, each less than "div" and therefore representable
|
|
// as a uint64.
|
|
std::ostringstream os;
|
|
std::ios_base::fmtflags copy_mask =
|
|
std::ios::basefield | std::ios::showbase | std::ios::uppercase;
|
|
os.setf(flags & copy_mask, copy_mask);
|
|
uint128 high = b;
|
|
uint128 low;
|
|
uint128::DivModImpl(high, div, &high, &low);
|
|
uint128 mid;
|
|
uint128::DivModImpl(high, div, &high, &mid);
|
|
if (high.lo_ != 0) {
|
|
os << high.lo_;
|
|
os << std::noshowbase << std::setfill('0') << std::setw(div_base_log);
|
|
os << mid.lo_;
|
|
os << std::setw(div_base_log);
|
|
} else if (mid.lo_ != 0) {
|
|
os << mid.lo_;
|
|
os << std::noshowbase << std::setfill('0') << std::setw(div_base_log);
|
|
}
|
|
os << low.lo_;
|
|
std::string rep = os.str();
|
|
|
|
// Add the requisite padding.
|
|
std::streamsize width = o.width(0);
|
|
if (width > static_cast<std::streamsize>(rep.size())) {
|
|
if ((flags & std::ios::adjustfield) == std::ios::left) {
|
|
rep.append(width - rep.size(), o.fill());
|
|
} else {
|
|
rep.insert(
|
|
static_cast<std::string::size_type>(0), width - rep.size(), o.fill());
|
|
}
|
|
}
|
|
|
|
// Stream the final representation in a single "<<" call.
|
|
return o << rep;
|
|
}
|
|
|
|
} // namespace c10
|