mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
[quant][core][gpu][improvement] Made plan and run for quantized cudnn conv op conform with Conv_v8.cpp
Summary: The workspace and plan in Conv_v8.cpp diverged from Conv.cpp after https://github.com/pytorch/pytorch/pull/60755 landed. This PR conforms the two files in regards to the workspace and plan. Test Plan: ``` python test/test_quantization.py -k test_qconv2d_cudnn ``` Pull Request resolved: https://github.com/pytorch/pytorch/pull/76788 Approved by: https://github.com/jerryzh168
This commit is contained in:
@ -17,6 +17,7 @@
|
||||
#include <ATen/native/quantized/packed_params.h>
|
||||
#include <ATen/native/utils/ParamsHash.h>
|
||||
#include <ATen/TensorUtils.h>
|
||||
#include <c10/cuda/CUDACachingAllocator.h>
|
||||
#include <cudnn_frontend.h>
|
||||
#include <torch/library.h>
|
||||
|
||||
@ -50,7 +51,7 @@ struct CacheKey {
|
||||
int8_t bias_alignment;
|
||||
bool kReluFused;
|
||||
};
|
||||
std::unordered_map<CacheKey, cudnn_frontend::ManagedOpaqueDescriptor, at::native::ParamsHash<CacheKey>, at::native::ParamsEqual<CacheKey>> execution_plan_cache;
|
||||
std::unordered_map<CacheKey, cudnn_frontend::ExecutionPlan, at::native::ParamsHash<CacheKey>, at::native::ParamsEqual<CacheKey>> execution_plan_cache;
|
||||
}
|
||||
// TODO: we can use cudnn_frontend::ExecutionPlanCache when it supports caching
|
||||
// multiple operators
|
||||
@ -139,9 +140,9 @@ void PackedConvWeightCudnn<kSpatialDim>::apply_impl_helper(const at::Tensor& qua
|
||||
}
|
||||
key.kReluFused = kReluFused;
|
||||
|
||||
auto run = [&](cudnn_frontend::ManagedOpaqueDescriptor plan_desc) {
|
||||
auto workspace_size = 0;
|
||||
auto workspace = at::empty({workspace_size}, input.options().dtype(at::kByte));
|
||||
auto run = [&](const cudnn_frontend::ExecutionPlan& plan_desc) {
|
||||
auto workspace_size = plan_desc.getWorkspaceSize();
|
||||
auto workspace_ptr = c10::cuda::CUDACachingAllocator::get()->allocate(workspace_size);
|
||||
at::SmallVector<void *, 7> data_ptrs;
|
||||
at::SmallVector<int64_t, 7> uids;
|
||||
data_ptrs = {input.data_ptr<int8_t>(), maybe_padded_weight_.data_ptr<int8_t>(),
|
||||
@ -153,17 +154,17 @@ void PackedConvWeightCudnn<kSpatialDim>::apply_impl_helper(const at::Tensor& qua
|
||||
uids.insert(uids.end(), {'b', 'c', 'd'});
|
||||
}
|
||||
auto variantPack = cudnn_frontend::VariantPackBuilder()
|
||||
.setWorkspacePointer(workspace.data_ptr())
|
||||
.setWorkspacePointer(workspace_size ? workspace_ptr.get() : nullptr)
|
||||
.setDataPointers(uids.size(), data_ptrs.data())
|
||||
.setUids(uids.size(), uids.data())
|
||||
.build();
|
||||
auto variant_pack_desc = variantPack.get_raw_desc();
|
||||
AT_CUDNN_CHECK(cudnnBackendExecute(handle, plan_desc->get_backend_descriptor(), variant_pack_desc));
|
||||
AT_CUDNN_CHECK(cudnnBackendExecute(handle, plan_desc.get_raw_desc(), variant_pack_desc));
|
||||
};
|
||||
|
||||
auto search = execution_plan_cache.find(key);
|
||||
if (search != execution_plan_cache.end()) {
|
||||
cudnn_frontend::ManagedOpaqueDescriptor plan_desc = search->second;
|
||||
cudnn_frontend::ExecutionPlan plan_desc = search->second;
|
||||
run(plan_desc);
|
||||
return;
|
||||
}
|
||||
@ -274,9 +275,8 @@ void PackedConvWeightCudnn<kSpatialDim>::apply_impl_helper(const at::Tensor& qua
|
||||
.setHandle(handle)
|
||||
.setEngineConfig(cfg)
|
||||
.build();
|
||||
auto plan_desc = plan.get_desc();
|
||||
run(plan_desc);
|
||||
execution_plan_cache[key] = plan_desc;
|
||||
run(plan);
|
||||
execution_plan_cache.emplace(key, plan);
|
||||
return;
|
||||
} catch (cudnn_frontend::cudnnException &e) {std::cout << "cudnn error:" << e.what() << std::endl;} catch(c10::CuDNNError &e) { std::cout << "other error" << e.what() << std::endl;}
|
||||
}
|
||||
|
Reference in New Issue
Block a user