[quant][core][gpu][improvement] Made plan and run for quantized cudnn conv op conform with Conv_v8.cpp

Summary:
The workspace and plan in Conv_v8.cpp diverged from Conv.cpp after
https://github.com/pytorch/pytorch/pull/60755 landed. This PR conforms
the two files in regards to the workspace and plan.

Test Plan:
```
python test/test_quantization.py -k test_qconv2d_cudnn
```

Pull Request resolved: https://github.com/pytorch/pytorch/pull/76788

Approved by: https://github.com/jerryzh168
This commit is contained in:
dzdang
2022-05-24 17:27:30 -04:00
committed by PyTorch MergeBot
parent 87148f2b59
commit 589f40e3ad

View File

@ -17,6 +17,7 @@
#include <ATen/native/quantized/packed_params.h>
#include <ATen/native/utils/ParamsHash.h>
#include <ATen/TensorUtils.h>
#include <c10/cuda/CUDACachingAllocator.h>
#include <cudnn_frontend.h>
#include <torch/library.h>
@ -50,7 +51,7 @@ struct CacheKey {
int8_t bias_alignment;
bool kReluFused;
};
std::unordered_map<CacheKey, cudnn_frontend::ManagedOpaqueDescriptor, at::native::ParamsHash<CacheKey>, at::native::ParamsEqual<CacheKey>> execution_plan_cache;
std::unordered_map<CacheKey, cudnn_frontend::ExecutionPlan, at::native::ParamsHash<CacheKey>, at::native::ParamsEqual<CacheKey>> execution_plan_cache;
}
// TODO: we can use cudnn_frontend::ExecutionPlanCache when it supports caching
// multiple operators
@ -139,9 +140,9 @@ void PackedConvWeightCudnn<kSpatialDim>::apply_impl_helper(const at::Tensor& qua
}
key.kReluFused = kReluFused;
auto run = [&](cudnn_frontend::ManagedOpaqueDescriptor plan_desc) {
auto workspace_size = 0;
auto workspace = at::empty({workspace_size}, input.options().dtype(at::kByte));
auto run = [&](const cudnn_frontend::ExecutionPlan& plan_desc) {
auto workspace_size = plan_desc.getWorkspaceSize();
auto workspace_ptr = c10::cuda::CUDACachingAllocator::get()->allocate(workspace_size);
at::SmallVector<void *, 7> data_ptrs;
at::SmallVector<int64_t, 7> uids;
data_ptrs = {input.data_ptr<int8_t>(), maybe_padded_weight_.data_ptr<int8_t>(),
@ -153,17 +154,17 @@ void PackedConvWeightCudnn<kSpatialDim>::apply_impl_helper(const at::Tensor& qua
uids.insert(uids.end(), {'b', 'c', 'd'});
}
auto variantPack = cudnn_frontend::VariantPackBuilder()
.setWorkspacePointer(workspace.data_ptr())
.setWorkspacePointer(workspace_size ? workspace_ptr.get() : nullptr)
.setDataPointers(uids.size(), data_ptrs.data())
.setUids(uids.size(), uids.data())
.build();
auto variant_pack_desc = variantPack.get_raw_desc();
AT_CUDNN_CHECK(cudnnBackendExecute(handle, plan_desc->get_backend_descriptor(), variant_pack_desc));
AT_CUDNN_CHECK(cudnnBackendExecute(handle, plan_desc.get_raw_desc(), variant_pack_desc));
};
auto search = execution_plan_cache.find(key);
if (search != execution_plan_cache.end()) {
cudnn_frontend::ManagedOpaqueDescriptor plan_desc = search->second;
cudnn_frontend::ExecutionPlan plan_desc = search->second;
run(plan_desc);
return;
}
@ -274,9 +275,8 @@ void PackedConvWeightCudnn<kSpatialDim>::apply_impl_helper(const at::Tensor& qua
.setHandle(handle)
.setEngineConfig(cfg)
.build();
auto plan_desc = plan.get_desc();
run(plan_desc);
execution_plan_cache[key] = plan_desc;
run(plan);
execution_plan_cache.emplace(key, plan);
return;
} catch (cudnn_frontend::cudnnException &e) {std::cout << "cudnn error:" << e.what() << std::endl;} catch(c10::CuDNNError &e) { std::cout << "other error" << e.what() << std::endl;}
}