diff --git a/binaries/CMakeLists.txt b/binaries/CMakeLists.txt index b728cd11de3d..70b235e43e7d 100644 --- a/binaries/CMakeLists.txt +++ b/binaries/CMakeLists.txt @@ -37,24 +37,12 @@ endif() if(USE_CUDA) caffe2_binary_target("inspect_gpu.cc") target_link_libraries(inspect_gpu ${CUDA_LIBRARIES}) - caffe2_binary_target("print_core_object_sizes_gpu.cc") - if(BUILD_TEST) - # Core overhead benchmark - caffe2_binary_target("core_overhead_benchmark_gpu.cc") - target_link_libraries(core_overhead_benchmark_gpu benchmark ${CUDA_curand_LIBRARY}) - endif() endif() if(USE_ROCM) caffe2_hip_binary_target("hip/inspect_gpu.cc") - caffe2_hip_binary_target("hip/print_core_object_sizes_gpu.cc") - if(BUILD_TEST) - # Core overhead benchmark - caffe2_hip_binary_target("hip/core_overhead_benchmark_gpu.cc") - target_link_libraries(core_overhead_benchmark_gpu benchmark) - endif() endif() if(USE_MPI) diff --git a/binaries/core_overhead_benchmark_gpu.cc b/binaries/core_overhead_benchmark_gpu.cc deleted file mode 100644 index b16d99c0bf41..000000000000 --- a/binaries/core_overhead_benchmark_gpu.cc +++ /dev/null @@ -1,222 +0,0 @@ -/** - * Copyright (c) 2016-present, Facebook, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "benchmark/benchmark.h" - -#include "caffe2/core/context.h" -#include "caffe2/core/context_gpu.h" -#include "caffe2/core/operator.h" - -#define CAFFE2_SKIP_IF_NO_GPU \ - if (!caffe2::NumCudaDevices()) { \ - state.SkipWithError("No CUDA available, skipping benchmark."); \ - return; \ - } - -using namespace caffe2; - -static void BM_CUDAContextCreation(benchmark::State& state) { - CAFFE2_SKIP_IF_NO_GPU; - volatile CUDAContext context_so_we_do_initialization_work; - while (state.KeepRunning()) { - volatile CUDAContext context; - } -} -BENCHMARK(BM_CUDAContextCreation); - -static void BM_CUDAContextStreamAccess(benchmark::State& state) { - CAFFE2_SKIP_IF_NO_GPU; - CUDAContext context; - while (state.KeepRunning()) { - volatile cudaStream_t stream = context.cuda_stream(); - } -} -BENCHMARK(BM_CUDAContextStreamAccess); - -static void BM_cudaGetDevice(benchmark::State& state) { - CAFFE2_SKIP_IF_NO_GPU; - int id; - while (state.KeepRunning()) { - CUDA_ENFORCE(cudaGetDevice(&id)); - } -} -BENCHMARK(BM_cudaGetDevice); - -static void BM_cudaSetDevice(benchmark::State& state) { - CAFFE2_SKIP_IF_NO_GPU; - int total = NumCudaDevices(); - int i = 0; - while (state.KeepRunning()) { - CUDA_ENFORCE(cudaSetDevice((i++) % total)); - } -} -BENCHMARK(BM_cudaSetDevice); - -static void BM_cudaSetAndGetDevice(benchmark::State& state) { - CAFFE2_SKIP_IF_NO_GPU; - int total = NumCudaDevices(); - int i = 0; - int id; - while (state.KeepRunning()) { - CUDA_ENFORCE(cudaSetDevice((i++) % total)); - CUDA_ENFORCE(cudaGetDevice(&id)); - } -} -BENCHMARK(BM_cudaSetAndGetDevice); - -static void BM_cudaSetSameDevice(benchmark::State& state) { - CAFFE2_SKIP_IF_NO_GPU; - while (state.KeepRunning()) { - CUDA_ENFORCE(cudaSetDevice(0)); - } -} -BENCHMARK(BM_cudaSetSameDevice); - -static void BM_cudaStreamCreateSyncDelete(benchmark::State& state) { - CAFFE2_SKIP_IF_NO_GPU; - cudaStream_t stream; - while (state.KeepRunning()) { - CUDA_ENFORCE(cudaStreamCreate(&stream)); - CUDA_ENFORCE(cudaStreamSynchronize(stream)); - CUDA_ENFORCE(cudaStreamDestroy(stream)); - } -} -BENCHMARK(BM_cudaStreamCreateSyncDelete); - -static void BM_cudaStreamSynchronize(benchmark::State& state) { - CAFFE2_SKIP_IF_NO_GPU; - cudaStream_t stream; - CUDA_ENFORCE(cudaStreamCreate(&stream)); - while (state.KeepRunning()) { - CUDA_ENFORCE(cudaStreamSynchronize(stream)); - } -} -BENCHMARK(BM_cudaStreamSynchronize); - -static void BM_cudaEventRecord(benchmark::State& state) { - CAFFE2_SKIP_IF_NO_GPU; - cudaStream_t stream; - cudaEvent_t event; - CUDA_ENFORCE(cudaStreamCreate(&stream)); - CUDA_ENFORCE(cudaEventCreateWithFlags( - &event, cudaEventDefault | cudaEventDisableTiming)); - while (state.KeepRunning()) { - CUDA_ENFORCE(cudaEventRecord(event, stream)); - } -} -BENCHMARK(BM_cudaEventRecord); - -static void BM_cudaStreamWaitEventThenStreamSynchronize( - benchmark::State& state) { - CAFFE2_SKIP_IF_NO_GPU; - cudaStream_t stream; - cudaEvent_t event; - CUDA_ENFORCE(cudaStreamCreate(&stream)); - CUDA_ENFORCE(cudaEventCreateWithFlags( - &event, cudaEventDefault | cudaEventDisableTiming)); - CUDA_ENFORCE(cudaEventRecord(event, stream)); - CUDA_ENFORCE(cudaStreamWaitEvent(stream, event, 0)); - CUDA_ENFORCE(cudaStreamSynchronize(stream)); - while (state.KeepRunning()) { - CUDA_ENFORCE(cudaStreamWaitEvent(stream, event, 0)); - CUDA_ENFORCE(cudaStreamSynchronize(stream)); - } -} -BENCHMARK(BM_cudaStreamWaitEventThenStreamSynchronize); - -static void BM_CudaPointerAffinity(benchmark::State& state) { - CAFFE2_SKIP_IF_NO_GPU; - Tensor tensor(vector{1, 2, 3, 4}, CUDA); - float* ptr = tensor.mutable_data(); - while (state.KeepRunning()) { - volatile int id = GetGPUIDForPointer(ptr); - } -} -BENCHMARK(BM_CudaPointerAffinity); - -namespace { -template -class DummyEmptyOp : public Operator { - public: - DummyEmptyOp(const OperatorDef& def, Workspace* ws) - : Operator(def, ws) {} - - bool RunOnDevice() final { return true; } -}; - -REGISTER_CPU_OPERATOR(DummyEmpty, DummyEmptyOp); -REGISTER_CUDA_OPERATOR(DummyEmpty, DummyEmptyOp); -OPERATOR_SCHEMA(DummyEmpty); -} // namespace - -static void BM_OperatorCreationCPU(benchmark::State& state) { - std::unique_ptr op; - OperatorDef def; - Workspace ws; - def.set_type("DummyEmpty"); - def.mutable_device_option()->set_device_type(PROTO_CPU); - while (state.KeepRunning()) { - op = CreateOperator(def, &ws); - } -} -BENCHMARK(BM_OperatorCreationCPU); - -static void BM_OperatorCreationCUDA(benchmark::State& state) { - CAFFE2_SKIP_IF_NO_GPU; - std::unique_ptr op; - OperatorDef def; - Workspace ws; - def.set_type("DummyEmpty"); - def.mutable_device_option()->set_device_type(PROTO_CUDA); - while (state.KeepRunning()) { - op = CreateOperator(def, &ws); - } -} -BENCHMARK(BM_OperatorCreationCUDA); - -static void BM_RawAllocDeallocCPU(benchmark::State& state) { - while (state.KeepRunning()) { - // Allocating only 1 byte in order to measure the overhead. - auto data_ptr = GetCPUAllocator()->allocate(1); - // Deallocated when it's out of scope - } -} -BENCHMARK(BM_RawAllocDeallocCPU); - -static void BM_TensorAllocDeallocCPU(benchmark::State& state) { - Tensor tensor(CPU); - // small allocation - tensor.Resize(32, 32); - while (state.KeepRunning()) { - CHECK(tensor.mutable_data()); - tensor.FreeMemory(); - } -} -BENCHMARK(BM_TensorAllocDeallocCPU); - -static void BM_TensorAllocDeallocCUDA(benchmark::State& state) { - CAFFE2_SKIP_IF_NO_GPU; - Tensor tensor(CUDA); - // small allocation - tensor.Resize(32, 32); - while (state.KeepRunning()) { - CHECK(tensor.mutable_data()); - tensor.FreeMemory(); - } -} -BENCHMARK(BM_TensorAllocDeallocCUDA); - -BENCHMARK_MAIN(); diff --git a/binaries/print_core_object_sizes_gpu.cc b/binaries/print_core_object_sizes_gpu.cc deleted file mode 100644 index f091280939d4..000000000000 --- a/binaries/print_core_object_sizes_gpu.cc +++ /dev/null @@ -1,40 +0,0 @@ -/** - * Copyright (c) 2016-present, Facebook, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include - -#include "caffe2/core/operator.h" -#include "caffe2/core/context.h" -#include "caffe2/core/context_gpu.h" -#include "caffe2/proto/caffe2_pb.h" - -#define PRINT_SIZE(cls) \ - std::cout << "Size of " #cls ": " << sizeof(cls) << " bytes." \ - << std::endl; - -int main(int /* unused */, char** /* unused */) { - PRINT_SIZE(caffe2::Blob); - PRINT_SIZE(caffe2::Tensor); - PRINT_SIZE(caffe2::CPUContext); - PRINT_SIZE(caffe2::CUDAContext); - PRINT_SIZE(caffe2::OperatorBase); - PRINT_SIZE(caffe2::OperatorDef); - PRINT_SIZE(caffe2::Operator); - PRINT_SIZE(caffe2::Operator); - PRINT_SIZE(caffe2::TypeMeta); - PRINT_SIZE(caffe2::Workspace); - return 0; -} diff --git a/binaries/tsv_2_proto.cc b/binaries/tsv_2_proto.cc deleted file mode 100644 index 3cd07f1f7a6b..000000000000 --- a/binaries/tsv_2_proto.cc +++ /dev/null @@ -1,49 +0,0 @@ -/** - * Copyright (c) 2016-present, Facebook, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include - -#include "caffe2/core/blob_serialization.h" -#include "caffe2/core/db.h" -#include "caffe2/core/init.h" -#include "caffe2/core/logging.h" -#include "caffe2/proto/caffe2_pb.h" -#include "caffe2/utils/proto_utils.h" - -C10_DEFINE_string(f_in, "", "The input data file name."); -C10_DEFINE_string(f_out, "", "The output data file name."); - -int main(int argc, char** argv) { - caffe2::GlobalInit(&argc, &argv); - std::ifstream f_in(FLAGS_f_in); - std::ofstream f_out(FLAGS_f_out); - std::string line; - caffe2::TensorProtos tensor_protos; - while (std::getline(f_in, line)) { - caffe2::TensorProto* data = tensor_protos.add_protos(); - data->set_data_type(caffe2::TensorProto::STRING); - data->add_dims(1); - data->add_string_data(line); - data->set_name("text"); - } - f_in.close(); - std::string output_str; - tensor_protos.SerializeToString(&output_str); - f_out << output_str; - f_out.close(); - return 0; -}