Update triton.txt

Update build_triton_wheel.py
2025-11-07 10:01:39 +08:00 · 2025-11-04 13:21:04 -05:00 · 2025-11-04 13:20:16 -05:00 · 2025-11-04 13:12:32 -05:00 · 2025-11-04 12:40:41 -05:00 · 2025-11-04 12:39:15 -05:00
5 changed files with 20 additions and 6 deletions
--- a/.ci/docker/ci_commit_pins/triton.txt
+++ b/.ci/docker/ci_commit_pins/triton.txt
@ -1 +1 @@
-7416ffcb92cdbe98d9f97e4e6f95247e46dfc9fd
+bfeb066872bc1e8b2d2bc0a3b295b99dd77206e7
--- a/.ci/docker/triton_version.txt
+++ b/.ci/docker/triton_version.txt
@ -1 +1 @@
-3.5.0
+3.5.1
--- a/aten/src/ATen/Context.cpp
+++ b/aten/src/ATen/Context.cpp
@ -23,6 +23,8 @@ C10_DIAGNOSTIC_POP()
 #endif
 namespace at {

+namespace {
+
 /*
  These const variables defined the fp32 precisions for different backend
  We have "generic", "cuda", "mkldnn" backend now and we can choose fp32
@ -39,6 +41,16 @@ namespace at {
                ->rnn
 */

+  C10_ALWAYS_INLINE void warn_deprecated_fp32_precision_api(){
+    TORCH_WARN_ONCE(
+      "Please use the new API settings to control TF32 behavior, such as torch.backends.cudnn.conv.fp32_precision = 'tf32' "
+      "or torch.backends.cuda.matmul.fp32_precision = 'ieee'. Old settings, e.g, torch.backends.cuda.matmul.allow_tf32 = True, "
+      "torch.backends.cudnn.allow_tf32 = True, allowTF32CuDNN() and allowTF32CuBLAS() will be deprecated after Pytorch 2.9. Please see "
+      "https://pytorch.org/docs/main/notes/cuda.html#tensorfloat-32-tf32-on-ampere-and-later-devices"
+    );
+  }
+} // namespace
+
 Float32Backend str2backend(const std::string& name) {
  if (name == "generic")
    return Float32Backend::GENERIC;
@ -194,6 +206,7 @@ bool Context::allowTF32CuDNN(std::optional<Float32Op> op) const {
  } else {
    return float32Precision(Float32Backend::CUDA, op.value()) == Float32Precision::TF32;
  }
+  warn_deprecated_fp32_precision_api();
  return allow_tf32_cudnn;
 }

@ -201,6 +214,7 @@ void Context::setAllowTF32CuDNN(bool b) {
  setFloat32Precision(Float32Backend::CUDA, Float32Op::RNN, b ? Float32Precision::TF32 : Float32Precision::NONE);
  setFloat32Precision(Float32Backend::CUDA, Float32Op::CONV, b ? Float32Precision::TF32 : Float32Precision::NONE);
  allow_tf32_cudnn = b;
+  warn_deprecated_fp32_precision_api();
 }

 void Context::setSDPPriorityOrder(const std::vector<int64_t>& order) {
@ -311,6 +325,7 @@ bool Context::allowTF32CuBLAS() const {
      "Current status indicate that you have used mix of the legacy and new APIs to set the TF32 status for cublas matmul. ",
      "We suggest only using the new API to set the TF32 flag. See also: ",
      "https://pytorch.org/docs/main/notes/cuda.html#tensorfloat-32-tf32-on-ampere-and-later-devices");
+  warn_deprecated_fp32_precision_api();
  return allow_tf32_new;
 }

@ -334,6 +349,7 @@ Float32MatmulPrecision Context::float32MatmulPrecision() const {
      "Current status indicate that you have used mix of the legacy and new APIs to set the matmul precision. ",
      "We suggest only using the new API for matmul precision. See also: ",
      "https://pytorch.org/docs/main/notes/cuda.html#tensorfloat-32-tf32-on-ampere-and-later-devices");
+  warn_deprecated_fp32_precision_api();
  return float32_matmul_precision;
 }

@ -361,6 +377,7 @@ Float32Precision Context::float32Precision(Float32Backend backend, Float32Op op)

 void Context::setFloat32MatmulPrecision(const std::string &s) {
  auto match = [this](const std::string & s_) {
+    warn_deprecated_fp32_precision_api();
    // TODO: consider if CuDNN field needs to also be set for potential future CuDNN ops like multi-headed attention
    if (s_ == "highest") {
      float32_matmul_precision = at::Float32MatmulPrecision::HIGHEST;
--- a/torch/_functorch/_aot_autograd/graph_compile.py
+++ b/torch/_functorch/_aot_autograd/graph_compile.py
@ -179,9 +179,6 @@ def aot_stage1_graph_capture(
            )
        )

-    print(f"in aot_stage1_graph_capture. maybe_subclass_meta.fw_metadata.static_input_indices:{maybe_subclass_meta.fw_metadata.static_input_indices if maybe_subclass_meta is not None and maybe_subclass_meta.fw_metadata is not None else None}")
-    print(f"in aot_stage1_graph_capture. aot_state.fw_metadata.static_input_indices:{aot_state.fw_metadata.static_input_indices}")
-
    return AOTGraphCapture(
        wrappers=wrappers,
        graph_module=graph,
--- a/torch/_inductor/compile_fx.py
+++ b/torch/_inductor/compile_fx.py
@ -2318,7 +2318,7 @@ def compile_fx_forward(
    # force the outputs of invoke_subgraph subgraph to follow the
    # original strides
    _recursive_record_user_visible_output_idxs(gm)
-    print(f"in compile_fx_foward. static_input_idxs:{get_static_input_idxs(fixed)}")
+
    return inner_compile(
        gm,
        example_inputs,
Author	SHA1	Message	Date
Andrey Talman	f78738eead	Update triton.txt	2025-11-04 13:21:04 -05:00
Andrey Talman	7002824759	Update build_triton_wheel.py	2025-11-04 13:20:16 -05:00
Andrey Talman	2b3af4a4ba	Update build_triton_wheel.py	2025-11-04 13:12:32 -05:00
Andrey Talman	63fb7949e7	Update triton.txt	2025-11-04 12:40:41 -05:00
Andrey Talman	c50594ff6f	Update triton to 3.5.1 release	2025-11-04 12:39:15 -05:00
 @ -1 +1 @@
 .5.0
 .5.1