[TSAN][live speech translation] Fix A data race in caffe2 (#156378)

Summary: noticed that context quantized_engine is accessed and written from multiple threads

Test Plan:
➜  fbsource buck test --flagfile fbcode/mode/dev-tsan //xplat/assistant/integration_test/tests/supernova/speechtranslation:live_speech_translation_en_fr_tests -- --exact 'fbsource//xplat/assistant/integration_test/tests/supernova/speechtranslation:live_speech_translation_en_fr_tests - Translate/LiveSpeechTranslationTests.LiveSpeechTranslationEnFr/silence___fr_en'

Rollback Plan:

Differential Revision: D76921416

Pull Request resolved: https://github.com/pytorch/pytorch/pull/156378
Approved by: https://github.com/jerryzh168, https://github.com/cyyever
This commit is contained in:
Patryk Ozga
2025-06-29 07:23:20 +00:00
committed by PyTorch MergeBot
parent 9d677389cb
commit e959dd017d
3 changed files with 10 additions and 16 deletions

View File

@ -218,12 +218,13 @@ bool Context::allowTF32OneDNN() const {
return allow_tf32_onednn;
}
void Context::setAllowTF32OneDNN(bool b){
#ifdef USE_XPU
// NOLINTNEXTLINE(clang-diagnostic-unused-parameter)
void Context::setAllowTF32OneDNN(bool b){
#ifdef USE_XPU
allow_tf32_onednn = b;
#else
#else
TORCH_WARN("TF32 acceleration on top of oneDNN is available for Intel GPUs. The current Torch version does not have Intel GPU Support.");
#endif
#endif
}
bool Context::userEnabledFlashSDP() const {
@ -669,13 +670,14 @@ at::QEngine Context::qEngine() const {
#endif
return qengine;
}();
return quantized_engine.value_or(_quantized_engine);
auto qt_engine = quantized_engine.load();
return qt_engine == at::QEngine::NoQEngine ? _quantized_engine : qt_engine;
}
void Context::setQEngine(at::QEngine e) {
const auto& qengines = supportedQEngines();
if (std::find(qengines.begin(), qengines.end(), e) != qengines.end()) {
quantized_engine = e;
quantized_engine.store(e);
return;
}
TORCH_CHECK(false, "quantized engine ", toString(e), " is not supported");
@ -687,17 +689,9 @@ const std::vector<at::QEngine>& Context::supportedQEngines() {
// Engines are listed in priority order: later one wins
// By default we prefer FBGEMM if we're running on server side
// QNNPACK on server side has some issue, so we disable it by default.
#ifdef C10_MOBILE
engines.push_back(at::kNoQEngine);
#ifdef USE_PYTORCH_QNNPACK
engines.push_back(at::kQNNPACK);
#endif
#else // C10_MOBILE
#ifdef USE_PYTORCH_QNNPACK
engines.push_back(at::kQNNPACK);
#endif
engines.push_back(at::kNoQEngine);
#endif // C10_MOBILE
#if AT_MKLDNN_ENABLED()
engines.push_back(at::kONEDNN);
@ -829,6 +823,7 @@ void Context::setAllowFP16ReductionCPU(bool b) {
#if defined(__aarch64__) && !defined(C10_MOBILE)
if (!cpuinfo_initialize() || !cpuinfo_has_arm_fp16_arith())
#else
// NOLINTNEXTLINE(facebook-hte-MissingBraces)
if (true)
#endif
TORCH_CHECK(false, "Float16 arithmetic is not supported by the CPU!");

View File

@ -472,7 +472,7 @@ class TORCH_API Context {
bool release_original_weights = false;
#endif
bool display_vmap_fallback_warnings_ = false;
std::optional<at::QEngine> quantized_engine = std::nullopt;
std::atomic<at::QEngine> quantized_engine = at::QEngine::NoQEngine;
bool enable_sparse_tensor_invariant_checks = false;
bool allow_fp16_reduction_cpu = false;

View File

@ -10,7 +10,6 @@ from contextlib import contextmanager
from torch.testing._internal.common_utils import TEST_WITH_TSAN, IS_PPC, IS_MACOS, IS_WINDOWS
supported_qengines = torch.backends.quantized.supported_engines
supported_qengines.remove('none')
# Note: We currently do not run QNNPACK tests on WINDOWS and MACOS as it is flaky. Issue #29326
# QNNPACK is not supported on PPC
if 'qnnpack' in supported_qengines and any([IS_PPC, TEST_WITH_TSAN, IS_MACOS, IS_WINDOWS]):