Migrate easy q(u)int/bits stuff to torch/headeronly

ghstack-source-id: 2a30e178fd3675eb8409d380baf85ecd25375a26 Pull Request resolved: https://github.com/pytorch/pytorch/pull/159302
2025-10-26 00:24:53 +08:00 · 2025-07-28 15:02:48 -07:00
12 changed files with 192 additions and 155 deletions
--- a/c10/util/bits.h
+++ b/c10/util/bits.h
@ -1,61 +1 @@
-#pragma once
-#include <cstdint>
-
-#include <c10/macros/Macros.h>
-
-namespace c10 {
-
-/**
- * bits1x8 is an uninterpreted dtype of a tensor with 1 bit (packed to byte
- * boundary), without any semantics defined.
- */
-struct alignas(1) bits1x8 {
-  using underlying = uint8_t;
-  uint8_t val_;
-  bits1x8() = default;
-  C10_HOST_DEVICE explicit bits1x8(uint8_t val) : val_(val) {}
-};
-
-/**
- * bits2x4 is an uninterpreted dtype of a tensor with 2 bits (packed to byte
- * boundary), without any semantics defined.
- */
-struct alignas(1) bits2x4 {
-  using underlying = uint8_t;
-  uint8_t val_;
-  bits2x4() = default;
-  C10_HOST_DEVICE explicit bits2x4(uint8_t val) : val_(val) {}
-};
-
-/**
- * bits4x2 is an uninterpreted dtype of a tensor with 4 bits (packed to byte
- * boundary), without any semantics defined.
- */
-struct alignas(1) bits4x2 {
-  using underlying = uint8_t;
-  uint8_t val_;
-  bits4x2() = default;
-  C10_HOST_DEVICE explicit bits4x2(uint8_t val) : val_(val) {}
-};
-
-/**
- * bits8 is an uninterpreted dtype of a tensor with 8 bits, without any
- * semantics defined.
- */
-struct alignas(1) bits8 {
-  uint8_t val_;
-  bits8() = default;
-  C10_HOST_DEVICE explicit bits8(uint8_t val) : val_(val) {}
-};
-
-/**
- * bits16 is an uninterpreted dtype of a tensor with 16 bits, without any
- * semantics defined.
- */
-struct alignas(2) bits16 {
-  uint16_t val_;
-  bits16() = default;
-  C10_HOST_DEVICE explicit bits16(uint16_t val) : val_(val) {}
-};
-
-} // namespace c10
+#include <torch/headeronly/util/bits.h>
--- a/c10/util/qint32.h
+++ b/c10/util/qint32.h
@ -1,18 +1 @@
-#pragma once
-#include <cstdint>
-
-#include <c10/macros/Macros.h>
-
-namespace c10 {
-
-/**
- * qint32 is for signed 32 bit quantized Tensors
- */
-struct alignas(4) qint32 {
-  using underlying = int32_t;
-  int32_t val_;
-  qint32() = default;
-  C10_HOST_DEVICE explicit qint32(int32_t val) : val_(val) {}
-};
-
-} // namespace c10
+#include <torch/headeronly/util/qint32.h>
--- a/c10/util/qint8.h
+++ b/c10/util/qint8.h
@ -1,20 +1 @@
-#pragma once
-#include <cstdint>
-
-#include <c10/macros/Macros.h>
-
-namespace c10 {
-
-/**
- * This is the data type for quantized Tensors. Right now we only have
- * qint8 which is for 8 bit Tensors, and qint32 for 32 bit int Tensors,
- * we might have 4 bit, 2 bit or 1 bit data types in the future.
- */
-struct alignas(1) qint8 {
-  using underlying = int8_t;
-  int8_t val_;
-  qint8() = default;
-  C10_HOST_DEVICE explicit qint8(int8_t val) : val_(val) {}
-};
-
-} // namespace c10
+#include <torch/headeronly/util/qint8.h>
--- a/c10/util/quint2x4.h
+++ b/c10/util/quint2x4.h
@ -1,19 +1 @@
-#pragma once
-#include <cstdint>
-
-#include <c10/macros/Macros.h>
-
-namespace c10 {
-
-/**
- * quint2x4 is for un-signed 2 bit quantized Tensors that are packed to byte
- * boundary.
- */
-struct alignas(1) quint2x4 {
-  using underlying = uint8_t;
-  uint8_t val_;
-  quint2x4() = default;
-  C10_HOST_DEVICE explicit quint2x4(uint8_t val) : val_(val) {}
-};
-
-} // namespace c10
+#include <torch/headeronly/util/quint2x4.h>
--- a/c10/util/quint4x2.h
+++ b/c10/util/quint4x2.h
@ -1,19 +1 @@
-#pragma once
-#include <cstdint>
-
-#include <c10/macros/Macros.h>
-
-namespace c10 {
-
-/**
- * quint4x2 is for un-signed 4 bit quantized Tensors that are packed to byte
- * boundary.
- */
-struct alignas(1) quint4x2 {
-  using underlying = uint8_t;
-  uint8_t val_;
-  quint4x2() = default;
-  C10_HOST_DEVICE explicit quint4x2(uint8_t val) : val_(val) {}
-};
-
-} // namespace c10
+#include <torch/headeronly/util/quint4x2.h>
--- a/c10/util/quint8.h
+++ b/c10/util/quint8.h
@ -1,18 +1 @@
-#pragma once
-#include <cstdint>
-
-#include <c10/macros/Macros.h>
-
-namespace c10 {
-
-/**
- * quint8 is for unsigned 8 bit quantized Tensors
- */
-struct alignas(1) quint8 {
-  using underlying = uint8_t;
-  uint8_t val_;
-  quint8() = default;
-  C10_HOST_DEVICE explicit quint8(uint8_t val) : val_(val) {}
-};
-
-} // namespace c10
+#include <torch/headeronly/util/quint8.h>
--- a/torch/headeronly/util/bits.h
+++ b/torch/headeronly/util/bits.h
@ -0,0 +1,71 @@
+#pragma once
+#include <cstdint>
+
+#include <torch/headeronly/macros/Macros.h>
+
+namespace c10 {
+
+/**
+ * bits1x8 is an uninterpreted dtype of a tensor with 1 bit (packed to byte
+ * boundary), without any semantics defined.
+ */
+struct alignas(1) bits1x8 {
+  using underlying = uint8_t;
+  uint8_t val_;
+  bits1x8() = default;
+  C10_HOST_DEVICE explicit bits1x8(uint8_t val) : val_(val) {}
+};
+
+/**
+ * bits2x4 is an uninterpreted dtype of a tensor with 2 bits (packed to byte
+ * boundary), without any semantics defined.
+ */
+struct alignas(1) bits2x4 {
+  using underlying = uint8_t;
+  uint8_t val_;
+  bits2x4() = default;
+  C10_HOST_DEVICE explicit bits2x4(uint8_t val) : val_(val) {}
+};
+
+/**
+ * bits4x2 is an uninterpreted dtype of a tensor with 4 bits (packed to byte
+ * boundary), without any semantics defined.
+ */
+struct alignas(1) bits4x2 {
+  using underlying = uint8_t;
+  uint8_t val_;
+  bits4x2() = default;
+  C10_HOST_DEVICE explicit bits4x2(uint8_t val) : val_(val) {}
+};
+
+/**
+ * bits8 is an uninterpreted dtype of a tensor with 8 bits, without any
+ * semantics defined.
+ */
+struct alignas(1) bits8 {
+  uint8_t val_;
+  bits8() = default;
+  C10_HOST_DEVICE explicit bits8(uint8_t val) : val_(val) {}
+};
+
+/**
+ * bits16 is an uninterpreted dtype of a tensor with 16 bits, without any
+ * semantics defined.
+ */
+struct alignas(2) bits16 {
+  uint16_t val_;
+  bits16() = default;
+  C10_HOST_DEVICE explicit bits16(uint16_t val) : val_(val) {}
+};
+
+} // namespace c10
+
+namespace torch::headeronly {
+
+using c10::bits1x8;
+using c10::bits2x4;
+using c10::bits4x2;
+using c10::bits8;
+using c10::bits16;
+
+} // namespace torch::headeronly
--- a/torch/headeronly/util/qint32.h
+++ b/torch/headeronly/util/qint32.h
@ -0,0 +1,22 @@
+#pragma once
+#include <cstdint>
+
+#include <torch/headeronly/macros/Macros.h>
+
+namespace c10 {
+
+/**
+ * qint32 is for signed 32 bit quantized Tensors
+ */
+struct alignas(4) qint32 {
+  using underlying = int32_t;
+  int32_t val_;
+  qint32() = default;
+  C10_HOST_DEVICE explicit qint32(int32_t val) : val_(val) {}
+};
+
+} // namespace c10
+
+namespace torch::headeronly {
+  using c10::qint32;
+} // namespace torch::headeronly
--- a/torch/headeronly/util/qint8.h
+++ b/torch/headeronly/util/qint8.h
@ -0,0 +1,24 @@
+#pragma once
+#include <cstdint>
+
+#include <torch/headeronly/macros/Macros.h>
+
+namespace c10 {
+
+/**
+ * This is the data type for quantized Tensors. Right now we only have
+ * qint8 which is for 8 bit Tensors, and qint32 for 32 bit int Tensors,
+ * we might have 4 bit, 2 bit or 1 bit data types in the future.
+ */
+struct alignas(1) qint8 {
+  using underlying = int8_t;
+  int8_t val_;
+  qint8() = default;
+  C10_HOST_DEVICE explicit qint8(int8_t val) : val_(val) {}
+};
+
+} // namespace c10
+
+namespace torch::headeronly {
+  using c10::qint8;
+} // namespace torch::headeronly
--- a/torch/headeronly/util/quint2x4.h
+++ b/torch/headeronly/util/quint2x4.h
@ -0,0 +1,24 @@
+#pragma once
+#include <cstdint>
+
+#include <torch/headeronly/macros/Macros.h>
+
+namespace c10 {
+
+/**
+ * quint2x4 is for un-signed 2 bit quantized Tensors that are packed to byte
+ * boundary.
+ */
+struct alignas(1) quint2x4 {
+  using underlying = uint8_t;
+  uint8_t val_;
+  quint2x4() = default;
+  C10_HOST_DEVICE explicit quint2x4(uint8_t val) : val_(val) {}
+};
+
+} // namespace c10
+
+
+namespace torch::headeronly {
+  using c10::quint2x4;
+} // namespace torch::headeronly
--- a/torch/headeronly/util/quint4x2.h
+++ b/torch/headeronly/util/quint4x2.h
@ -0,0 +1,23 @@
+#pragma once
+#include <cstdint>
+
+#include <torch/headeronly/macros/Macros.h>
+
+namespace c10 {
+
+/**
+ * quint4x2 is for un-signed 4 bit quantized Tensors that are packed to byte
+ * boundary.
+ */
+struct alignas(1) quint4x2 {
+  using underlying = uint8_t;
+  uint8_t val_;
+  quint4x2() = default;
+  C10_HOST_DEVICE explicit quint4x2(uint8_t val) : val_(val) {}
+};
+
+} // namespace c10
+
+namespace torch::headeronly {
+  using c10::quint4x2;
+} // namespace torch::headeronly
--- a/torch/headeronly/util/quint8.h
+++ b/torch/headeronly/util/quint8.h
@ -0,0 +1,22 @@
+#pragma once
+#include <cstdint>
+
+#include <torch/headeronly/macros/Macros.h>
+
+namespace c10 {
+
+/**
+ * quint8 is for unsigned 8 bit quantized Tensors
+ */
+struct alignas(1) quint8 {
+  using underlying = uint8_t;
+  uint8_t val_;
+  quint8() = default;
+  C10_HOST_DEVICE explicit quint8(uint8_t val) : val_(val) {}
+};
+
+} // namespace c10
+
+namespace torch::headeronly {
+  using c10::quint8;
+} // namespace torch::headeronly