Remove binaries using caffe2 functionality (#125885)

This PR removed some binaries using deleted or to be deleted Caffe2 functions.

Pull Request resolved: https://github.com/pytorch/pytorch/pull/125885
Approved by: https://github.com/r-barnes, https://github.com/Chillee
This commit is contained in:
cyy
2024-05-10 06:21:07 +00:00
committed by PyTorch MergeBot
parent 013722bcb8
commit 2ed17e0b1e
4 changed files with 0 additions and 323 deletions

View File

@ -37,24 +37,12 @@ endif()
if(USE_CUDA) if(USE_CUDA)
caffe2_binary_target("inspect_gpu.cc") caffe2_binary_target("inspect_gpu.cc")
target_link_libraries(inspect_gpu ${CUDA_LIBRARIES}) target_link_libraries(inspect_gpu ${CUDA_LIBRARIES})
caffe2_binary_target("print_core_object_sizes_gpu.cc")
if(BUILD_TEST)
# Core overhead benchmark
caffe2_binary_target("core_overhead_benchmark_gpu.cc")
target_link_libraries(core_overhead_benchmark_gpu benchmark ${CUDA_curand_LIBRARY})
endif()
endif() endif()
if(USE_ROCM) if(USE_ROCM)
caffe2_hip_binary_target("hip/inspect_gpu.cc") caffe2_hip_binary_target("hip/inspect_gpu.cc")
caffe2_hip_binary_target("hip/print_core_object_sizes_gpu.cc")
if(BUILD_TEST)
# Core overhead benchmark
caffe2_hip_binary_target("hip/core_overhead_benchmark_gpu.cc")
target_link_libraries(core_overhead_benchmark_gpu benchmark)
endif()
endif() endif()
if(USE_MPI) if(USE_MPI)

View File

@ -1,222 +0,0 @@
/**
* Copyright (c) 2016-present, Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "benchmark/benchmark.h"
#include "caffe2/core/context.h"
#include "caffe2/core/context_gpu.h"
#include "caffe2/core/operator.h"
#define CAFFE2_SKIP_IF_NO_GPU \
if (!caffe2::NumCudaDevices()) { \
state.SkipWithError("No CUDA available, skipping benchmark."); \
return; \
}
using namespace caffe2;
static void BM_CUDAContextCreation(benchmark::State& state) {
CAFFE2_SKIP_IF_NO_GPU;
volatile CUDAContext context_so_we_do_initialization_work;
while (state.KeepRunning()) {
volatile CUDAContext context;
}
}
BENCHMARK(BM_CUDAContextCreation);
static void BM_CUDAContextStreamAccess(benchmark::State& state) {
CAFFE2_SKIP_IF_NO_GPU;
CUDAContext context;
while (state.KeepRunning()) {
volatile cudaStream_t stream = context.cuda_stream();
}
}
BENCHMARK(BM_CUDAContextStreamAccess);
static void BM_cudaGetDevice(benchmark::State& state) {
CAFFE2_SKIP_IF_NO_GPU;
int id;
while (state.KeepRunning()) {
CUDA_ENFORCE(cudaGetDevice(&id));
}
}
BENCHMARK(BM_cudaGetDevice);
static void BM_cudaSetDevice(benchmark::State& state) {
CAFFE2_SKIP_IF_NO_GPU;
int total = NumCudaDevices();
int i = 0;
while (state.KeepRunning()) {
CUDA_ENFORCE(cudaSetDevice((i++) % total));
}
}
BENCHMARK(BM_cudaSetDevice);
static void BM_cudaSetAndGetDevice(benchmark::State& state) {
CAFFE2_SKIP_IF_NO_GPU;
int total = NumCudaDevices();
int i = 0;
int id;
while (state.KeepRunning()) {
CUDA_ENFORCE(cudaSetDevice((i++) % total));
CUDA_ENFORCE(cudaGetDevice(&id));
}
}
BENCHMARK(BM_cudaSetAndGetDevice);
static void BM_cudaSetSameDevice(benchmark::State& state) {
CAFFE2_SKIP_IF_NO_GPU;
while (state.KeepRunning()) {
CUDA_ENFORCE(cudaSetDevice(0));
}
}
BENCHMARK(BM_cudaSetSameDevice);
static void BM_cudaStreamCreateSyncDelete(benchmark::State& state) {
CAFFE2_SKIP_IF_NO_GPU;
cudaStream_t stream;
while (state.KeepRunning()) {
CUDA_ENFORCE(cudaStreamCreate(&stream));
CUDA_ENFORCE(cudaStreamSynchronize(stream));
CUDA_ENFORCE(cudaStreamDestroy(stream));
}
}
BENCHMARK(BM_cudaStreamCreateSyncDelete);
static void BM_cudaStreamSynchronize(benchmark::State& state) {
CAFFE2_SKIP_IF_NO_GPU;
cudaStream_t stream;
CUDA_ENFORCE(cudaStreamCreate(&stream));
while (state.KeepRunning()) {
CUDA_ENFORCE(cudaStreamSynchronize(stream));
}
}
BENCHMARK(BM_cudaStreamSynchronize);
static void BM_cudaEventRecord(benchmark::State& state) {
CAFFE2_SKIP_IF_NO_GPU;
cudaStream_t stream;
cudaEvent_t event;
CUDA_ENFORCE(cudaStreamCreate(&stream));
CUDA_ENFORCE(cudaEventCreateWithFlags(
&event, cudaEventDefault | cudaEventDisableTiming));
while (state.KeepRunning()) {
CUDA_ENFORCE(cudaEventRecord(event, stream));
}
}
BENCHMARK(BM_cudaEventRecord);
static void BM_cudaStreamWaitEventThenStreamSynchronize(
benchmark::State& state) {
CAFFE2_SKIP_IF_NO_GPU;
cudaStream_t stream;
cudaEvent_t event;
CUDA_ENFORCE(cudaStreamCreate(&stream));
CUDA_ENFORCE(cudaEventCreateWithFlags(
&event, cudaEventDefault | cudaEventDisableTiming));
CUDA_ENFORCE(cudaEventRecord(event, stream));
CUDA_ENFORCE(cudaStreamWaitEvent(stream, event, 0));
CUDA_ENFORCE(cudaStreamSynchronize(stream));
while (state.KeepRunning()) {
CUDA_ENFORCE(cudaStreamWaitEvent(stream, event, 0));
CUDA_ENFORCE(cudaStreamSynchronize(stream));
}
}
BENCHMARK(BM_cudaStreamWaitEventThenStreamSynchronize);
static void BM_CudaPointerAffinity(benchmark::State& state) {
CAFFE2_SKIP_IF_NO_GPU;
Tensor tensor(vector<int64_t>{1, 2, 3, 4}, CUDA);
float* ptr = tensor.mutable_data<float>();
while (state.KeepRunning()) {
volatile int id = GetGPUIDForPointer(ptr);
}
}
BENCHMARK(BM_CudaPointerAffinity);
namespace {
template <class Context>
class DummyEmptyOp : public Operator<Context> {
public:
DummyEmptyOp(const OperatorDef& def, Workspace* ws)
: Operator<Context>(def, ws) {}
bool RunOnDevice() final { return true; }
};
REGISTER_CPU_OPERATOR(DummyEmpty, DummyEmptyOp<CPUContext>);
REGISTER_CUDA_OPERATOR(DummyEmpty, DummyEmptyOp<CUDAContext>);
OPERATOR_SCHEMA(DummyEmpty);
} // namespace
static void BM_OperatorCreationCPU(benchmark::State& state) {
std::unique_ptr<OperatorBase> op;
OperatorDef def;
Workspace ws;
def.set_type("DummyEmpty");
def.mutable_device_option()->set_device_type(PROTO_CPU);
while (state.KeepRunning()) {
op = CreateOperator(def, &ws);
}
}
BENCHMARK(BM_OperatorCreationCPU);
static void BM_OperatorCreationCUDA(benchmark::State& state) {
CAFFE2_SKIP_IF_NO_GPU;
std::unique_ptr<OperatorBase> op;
OperatorDef def;
Workspace ws;
def.set_type("DummyEmpty");
def.mutable_device_option()->set_device_type(PROTO_CUDA);
while (state.KeepRunning()) {
op = CreateOperator(def, &ws);
}
}
BENCHMARK(BM_OperatorCreationCUDA);
static void BM_RawAllocDeallocCPU(benchmark::State& state) {
while (state.KeepRunning()) {
// Allocating only 1 byte in order to measure the overhead.
auto data_ptr = GetCPUAllocator()->allocate(1);
// Deallocated when it's out of scope
}
}
BENCHMARK(BM_RawAllocDeallocCPU);
static void BM_TensorAllocDeallocCPU(benchmark::State& state) {
Tensor tensor(CPU);
// small allocation
tensor.Resize(32, 32);
while (state.KeepRunning()) {
CHECK(tensor.mutable_data<float>());
tensor.FreeMemory();
}
}
BENCHMARK(BM_TensorAllocDeallocCPU);
static void BM_TensorAllocDeallocCUDA(benchmark::State& state) {
CAFFE2_SKIP_IF_NO_GPU;
Tensor tensor(CUDA);
// small allocation
tensor.Resize(32, 32);
while (state.KeepRunning()) {
CHECK(tensor.mutable_data<float>());
tensor.FreeMemory();
}
}
BENCHMARK(BM_TensorAllocDeallocCUDA);
BENCHMARK_MAIN();

View File

@ -1,40 +0,0 @@
/**
* Copyright (c) 2016-present, Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <iostream>
#include "caffe2/core/operator.h"
#include "caffe2/core/context.h"
#include "caffe2/core/context_gpu.h"
#include "caffe2/proto/caffe2_pb.h"
#define PRINT_SIZE(cls) \
std::cout << "Size of " #cls ": " << sizeof(cls) << " bytes." \
<< std::endl;
int main(int /* unused */, char** /* unused */) {
PRINT_SIZE(caffe2::Blob);
PRINT_SIZE(caffe2::Tensor);
PRINT_SIZE(caffe2::CPUContext);
PRINT_SIZE(caffe2::CUDAContext);
PRINT_SIZE(caffe2::OperatorBase);
PRINT_SIZE(caffe2::OperatorDef);
PRINT_SIZE(caffe2::Operator<caffe2::CPUContext>);
PRINT_SIZE(caffe2::Operator<caffe2::CUDAContext>);
PRINT_SIZE(caffe2::TypeMeta);
PRINT_SIZE(caffe2::Workspace);
return 0;
}

View File

@ -1,49 +0,0 @@
/**
* Copyright (c) 2016-present, Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <sstream>
#include <string>
#include "caffe2/core/blob_serialization.h"
#include "caffe2/core/db.h"
#include "caffe2/core/init.h"
#include "caffe2/core/logging.h"
#include "caffe2/proto/caffe2_pb.h"
#include "caffe2/utils/proto_utils.h"
C10_DEFINE_string(f_in, "", "The input data file name.");
C10_DEFINE_string(f_out, "", "The output data file name.");
int main(int argc, char** argv) {
caffe2::GlobalInit(&argc, &argv);
std::ifstream f_in(FLAGS_f_in);
std::ofstream f_out(FLAGS_f_out);
std::string line;
caffe2::TensorProtos tensor_protos;
while (std::getline(f_in, line)) {
caffe2::TensorProto* data = tensor_protos.add_protos();
data->set_data_type(caffe2::TensorProto::STRING);
data->add_dims(1);
data->add_string_data(line);
data->set_name("text");
}
f_in.close();
std::string output_str;
tensor_protos.SerializeToString(&output_str);
f_out << output_str;
f_out.close();
return 0;
}