mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 12:54:11 +08:00
Remove binaries using caffe2 functionality (#125885)
This PR removed some binaries using deleted or to be deleted Caffe2 functions. Pull Request resolved: https://github.com/pytorch/pytorch/pull/125885 Approved by: https://github.com/r-barnes, https://github.com/Chillee
This commit is contained in:
@ -37,24 +37,12 @@ endif()
|
||||
if(USE_CUDA)
|
||||
caffe2_binary_target("inspect_gpu.cc")
|
||||
target_link_libraries(inspect_gpu ${CUDA_LIBRARIES})
|
||||
caffe2_binary_target("print_core_object_sizes_gpu.cc")
|
||||
|
||||
if(BUILD_TEST)
|
||||
# Core overhead benchmark
|
||||
caffe2_binary_target("core_overhead_benchmark_gpu.cc")
|
||||
target_link_libraries(core_overhead_benchmark_gpu benchmark ${CUDA_curand_LIBRARY})
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(USE_ROCM)
|
||||
caffe2_hip_binary_target("hip/inspect_gpu.cc")
|
||||
caffe2_hip_binary_target("hip/print_core_object_sizes_gpu.cc")
|
||||
|
||||
if(BUILD_TEST)
|
||||
# Core overhead benchmark
|
||||
caffe2_hip_binary_target("hip/core_overhead_benchmark_gpu.cc")
|
||||
target_link_libraries(core_overhead_benchmark_gpu benchmark)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(USE_MPI)
|
||||
|
@ -1,222 +0,0 @@
|
||||
/**
|
||||
* Copyright (c) 2016-present, Facebook, Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "benchmark/benchmark.h"
|
||||
|
||||
#include "caffe2/core/context.h"
|
||||
#include "caffe2/core/context_gpu.h"
|
||||
#include "caffe2/core/operator.h"
|
||||
|
||||
#define CAFFE2_SKIP_IF_NO_GPU \
|
||||
if (!caffe2::NumCudaDevices()) { \
|
||||
state.SkipWithError("No CUDA available, skipping benchmark."); \
|
||||
return; \
|
||||
}
|
||||
|
||||
using namespace caffe2;
|
||||
|
||||
static void BM_CUDAContextCreation(benchmark::State& state) {
|
||||
CAFFE2_SKIP_IF_NO_GPU;
|
||||
volatile CUDAContext context_so_we_do_initialization_work;
|
||||
while (state.KeepRunning()) {
|
||||
volatile CUDAContext context;
|
||||
}
|
||||
}
|
||||
BENCHMARK(BM_CUDAContextCreation);
|
||||
|
||||
static void BM_CUDAContextStreamAccess(benchmark::State& state) {
|
||||
CAFFE2_SKIP_IF_NO_GPU;
|
||||
CUDAContext context;
|
||||
while (state.KeepRunning()) {
|
||||
volatile cudaStream_t stream = context.cuda_stream();
|
||||
}
|
||||
}
|
||||
BENCHMARK(BM_CUDAContextStreamAccess);
|
||||
|
||||
static void BM_cudaGetDevice(benchmark::State& state) {
|
||||
CAFFE2_SKIP_IF_NO_GPU;
|
||||
int id;
|
||||
while (state.KeepRunning()) {
|
||||
CUDA_ENFORCE(cudaGetDevice(&id));
|
||||
}
|
||||
}
|
||||
BENCHMARK(BM_cudaGetDevice);
|
||||
|
||||
static void BM_cudaSetDevice(benchmark::State& state) {
|
||||
CAFFE2_SKIP_IF_NO_GPU;
|
||||
int total = NumCudaDevices();
|
||||
int i = 0;
|
||||
while (state.KeepRunning()) {
|
||||
CUDA_ENFORCE(cudaSetDevice((i++) % total));
|
||||
}
|
||||
}
|
||||
BENCHMARK(BM_cudaSetDevice);
|
||||
|
||||
static void BM_cudaSetAndGetDevice(benchmark::State& state) {
|
||||
CAFFE2_SKIP_IF_NO_GPU;
|
||||
int total = NumCudaDevices();
|
||||
int i = 0;
|
||||
int id;
|
||||
while (state.KeepRunning()) {
|
||||
CUDA_ENFORCE(cudaSetDevice((i++) % total));
|
||||
CUDA_ENFORCE(cudaGetDevice(&id));
|
||||
}
|
||||
}
|
||||
BENCHMARK(BM_cudaSetAndGetDevice);
|
||||
|
||||
static void BM_cudaSetSameDevice(benchmark::State& state) {
|
||||
CAFFE2_SKIP_IF_NO_GPU;
|
||||
while (state.KeepRunning()) {
|
||||
CUDA_ENFORCE(cudaSetDevice(0));
|
||||
}
|
||||
}
|
||||
BENCHMARK(BM_cudaSetSameDevice);
|
||||
|
||||
static void BM_cudaStreamCreateSyncDelete(benchmark::State& state) {
|
||||
CAFFE2_SKIP_IF_NO_GPU;
|
||||
cudaStream_t stream;
|
||||
while (state.KeepRunning()) {
|
||||
CUDA_ENFORCE(cudaStreamCreate(&stream));
|
||||
CUDA_ENFORCE(cudaStreamSynchronize(stream));
|
||||
CUDA_ENFORCE(cudaStreamDestroy(stream));
|
||||
}
|
||||
}
|
||||
BENCHMARK(BM_cudaStreamCreateSyncDelete);
|
||||
|
||||
static void BM_cudaStreamSynchronize(benchmark::State& state) {
|
||||
CAFFE2_SKIP_IF_NO_GPU;
|
||||
cudaStream_t stream;
|
||||
CUDA_ENFORCE(cudaStreamCreate(&stream));
|
||||
while (state.KeepRunning()) {
|
||||
CUDA_ENFORCE(cudaStreamSynchronize(stream));
|
||||
}
|
||||
}
|
||||
BENCHMARK(BM_cudaStreamSynchronize);
|
||||
|
||||
static void BM_cudaEventRecord(benchmark::State& state) {
|
||||
CAFFE2_SKIP_IF_NO_GPU;
|
||||
cudaStream_t stream;
|
||||
cudaEvent_t event;
|
||||
CUDA_ENFORCE(cudaStreamCreate(&stream));
|
||||
CUDA_ENFORCE(cudaEventCreateWithFlags(
|
||||
&event, cudaEventDefault | cudaEventDisableTiming));
|
||||
while (state.KeepRunning()) {
|
||||
CUDA_ENFORCE(cudaEventRecord(event, stream));
|
||||
}
|
||||
}
|
||||
BENCHMARK(BM_cudaEventRecord);
|
||||
|
||||
static void BM_cudaStreamWaitEventThenStreamSynchronize(
|
||||
benchmark::State& state) {
|
||||
CAFFE2_SKIP_IF_NO_GPU;
|
||||
cudaStream_t stream;
|
||||
cudaEvent_t event;
|
||||
CUDA_ENFORCE(cudaStreamCreate(&stream));
|
||||
CUDA_ENFORCE(cudaEventCreateWithFlags(
|
||||
&event, cudaEventDefault | cudaEventDisableTiming));
|
||||
CUDA_ENFORCE(cudaEventRecord(event, stream));
|
||||
CUDA_ENFORCE(cudaStreamWaitEvent(stream, event, 0));
|
||||
CUDA_ENFORCE(cudaStreamSynchronize(stream));
|
||||
while (state.KeepRunning()) {
|
||||
CUDA_ENFORCE(cudaStreamWaitEvent(stream, event, 0));
|
||||
CUDA_ENFORCE(cudaStreamSynchronize(stream));
|
||||
}
|
||||
}
|
||||
BENCHMARK(BM_cudaStreamWaitEventThenStreamSynchronize);
|
||||
|
||||
static void BM_CudaPointerAffinity(benchmark::State& state) {
|
||||
CAFFE2_SKIP_IF_NO_GPU;
|
||||
Tensor tensor(vector<int64_t>{1, 2, 3, 4}, CUDA);
|
||||
float* ptr = tensor.mutable_data<float>();
|
||||
while (state.KeepRunning()) {
|
||||
volatile int id = GetGPUIDForPointer(ptr);
|
||||
}
|
||||
}
|
||||
BENCHMARK(BM_CudaPointerAffinity);
|
||||
|
||||
namespace {
|
||||
template <class Context>
|
||||
class DummyEmptyOp : public Operator<Context> {
|
||||
public:
|
||||
DummyEmptyOp(const OperatorDef& def, Workspace* ws)
|
||||
: Operator<Context>(def, ws) {}
|
||||
|
||||
bool RunOnDevice() final { return true; }
|
||||
};
|
||||
|
||||
REGISTER_CPU_OPERATOR(DummyEmpty, DummyEmptyOp<CPUContext>);
|
||||
REGISTER_CUDA_OPERATOR(DummyEmpty, DummyEmptyOp<CUDAContext>);
|
||||
OPERATOR_SCHEMA(DummyEmpty);
|
||||
} // namespace
|
||||
|
||||
static void BM_OperatorCreationCPU(benchmark::State& state) {
|
||||
std::unique_ptr<OperatorBase> op;
|
||||
OperatorDef def;
|
||||
Workspace ws;
|
||||
def.set_type("DummyEmpty");
|
||||
def.mutable_device_option()->set_device_type(PROTO_CPU);
|
||||
while (state.KeepRunning()) {
|
||||
op = CreateOperator(def, &ws);
|
||||
}
|
||||
}
|
||||
BENCHMARK(BM_OperatorCreationCPU);
|
||||
|
||||
static void BM_OperatorCreationCUDA(benchmark::State& state) {
|
||||
CAFFE2_SKIP_IF_NO_GPU;
|
||||
std::unique_ptr<OperatorBase> op;
|
||||
OperatorDef def;
|
||||
Workspace ws;
|
||||
def.set_type("DummyEmpty");
|
||||
def.mutable_device_option()->set_device_type(PROTO_CUDA);
|
||||
while (state.KeepRunning()) {
|
||||
op = CreateOperator(def, &ws);
|
||||
}
|
||||
}
|
||||
BENCHMARK(BM_OperatorCreationCUDA);
|
||||
|
||||
static void BM_RawAllocDeallocCPU(benchmark::State& state) {
|
||||
while (state.KeepRunning()) {
|
||||
// Allocating only 1 byte in order to measure the overhead.
|
||||
auto data_ptr = GetCPUAllocator()->allocate(1);
|
||||
// Deallocated when it's out of scope
|
||||
}
|
||||
}
|
||||
BENCHMARK(BM_RawAllocDeallocCPU);
|
||||
|
||||
static void BM_TensorAllocDeallocCPU(benchmark::State& state) {
|
||||
Tensor tensor(CPU);
|
||||
// small allocation
|
||||
tensor.Resize(32, 32);
|
||||
while (state.KeepRunning()) {
|
||||
CHECK(tensor.mutable_data<float>());
|
||||
tensor.FreeMemory();
|
||||
}
|
||||
}
|
||||
BENCHMARK(BM_TensorAllocDeallocCPU);
|
||||
|
||||
static void BM_TensorAllocDeallocCUDA(benchmark::State& state) {
|
||||
CAFFE2_SKIP_IF_NO_GPU;
|
||||
Tensor tensor(CUDA);
|
||||
// small allocation
|
||||
tensor.Resize(32, 32);
|
||||
while (state.KeepRunning()) {
|
||||
CHECK(tensor.mutable_data<float>());
|
||||
tensor.FreeMemory();
|
||||
}
|
||||
}
|
||||
BENCHMARK(BM_TensorAllocDeallocCUDA);
|
||||
|
||||
BENCHMARK_MAIN();
|
@ -1,40 +0,0 @@
|
||||
/**
|
||||
* Copyright (c) 2016-present, Facebook, Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include "caffe2/core/operator.h"
|
||||
#include "caffe2/core/context.h"
|
||||
#include "caffe2/core/context_gpu.h"
|
||||
#include "caffe2/proto/caffe2_pb.h"
|
||||
|
||||
#define PRINT_SIZE(cls) \
|
||||
std::cout << "Size of " #cls ": " << sizeof(cls) << " bytes." \
|
||||
<< std::endl;
|
||||
|
||||
int main(int /* unused */, char** /* unused */) {
|
||||
PRINT_SIZE(caffe2::Blob);
|
||||
PRINT_SIZE(caffe2::Tensor);
|
||||
PRINT_SIZE(caffe2::CPUContext);
|
||||
PRINT_SIZE(caffe2::CUDAContext);
|
||||
PRINT_SIZE(caffe2::OperatorBase);
|
||||
PRINT_SIZE(caffe2::OperatorDef);
|
||||
PRINT_SIZE(caffe2::Operator<caffe2::CPUContext>);
|
||||
PRINT_SIZE(caffe2::Operator<caffe2::CUDAContext>);
|
||||
PRINT_SIZE(caffe2::TypeMeta);
|
||||
PRINT_SIZE(caffe2::Workspace);
|
||||
return 0;
|
||||
}
|
@ -1,49 +0,0 @@
|
||||
/**
|
||||
* Copyright (c) 2016-present, Facebook, Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
#include "caffe2/core/blob_serialization.h"
|
||||
#include "caffe2/core/db.h"
|
||||
#include "caffe2/core/init.h"
|
||||
#include "caffe2/core/logging.h"
|
||||
#include "caffe2/proto/caffe2_pb.h"
|
||||
#include "caffe2/utils/proto_utils.h"
|
||||
|
||||
C10_DEFINE_string(f_in, "", "The input data file name.");
|
||||
C10_DEFINE_string(f_out, "", "The output data file name.");
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
caffe2::GlobalInit(&argc, &argv);
|
||||
std::ifstream f_in(FLAGS_f_in);
|
||||
std::ofstream f_out(FLAGS_f_out);
|
||||
std::string line;
|
||||
caffe2::TensorProtos tensor_protos;
|
||||
while (std::getline(f_in, line)) {
|
||||
caffe2::TensorProto* data = tensor_protos.add_protos();
|
||||
data->set_data_type(caffe2::TensorProto::STRING);
|
||||
data->add_dims(1);
|
||||
data->add_string_data(line);
|
||||
data->set_name("text");
|
||||
}
|
||||
f_in.close();
|
||||
std::string output_str;
|
||||
tensor_protos.SerializeToString(&output_str);
|
||||
f_out << output_str;
|
||||
f_out.close();
|
||||
return 0;
|
||||
}
|
Reference in New Issue
Block a user