Files
pytorch/binaries/CMakeLists.txt
Sicheng Stephen Jia be7177751e Add binary to benchmark model load speed (#74700)
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/74700

Test Plan:
Imported from OSS

Some results running this benchmark for a quantized CPU xirp14b model on a Pixel 5:

```
PyTorchObserver {"type": "NET", "unit": "us", "metric": "latency", "value": "46749"}
PyTorchObserver {"type": "NET", "unit": "us", "metric": "latency", "value": "19261"}
PyTorchObserver {"type": "NET", "unit": "us", "metric": "latency", "value": "19235"}
PyTorchObserver {"type": "NET", "unit": "us", "metric": "latency", "value": "19396"}
PyTorchObserver {"type": "NET", "unit": "us", "metric": "latency", "value": "19486"}
PyTorchObserver {"type": "NET", "unit": "us", "metric": "latency", "value": "19562"}
PyTorchObserver {"type": "NET", "unit": "us", "metric": "latency", "value": "19566"}
PyTorchObserver {"type": "NET", "unit": "us", "metric": "latency", "value": "19559"}
PyTorchObserver {"type": "NET", "unit": "us", "metric": "latency", "value": "19632"}
PyTorchObserver {"type": "NET", "unit": "us", "metric": "latency", "value": "19938"}
```

Some results running this benchmark for the Vulkan xirp20a model on Pixel 5, after pre-loading the Context:

```
PyTorchObserver {"type": "NET", "unit": "us", "metric": "latency", "value": "38664"}
PyTorchObserver {"type": "NET", "unit": "us", "metric": "latency", "value": "19921"}
PyTorchObserver {"type": "NET", "unit": "us", "metric": "latency", "value": "20316"}
PyTorchObserver {"type": "NET", "unit": "us", "metric": "latency", "value": "20255"}
PyTorchObserver {"type": "NET", "unit": "us", "metric": "latency", "value": "20219"}
PyTorchObserver {"type": "NET", "unit": "us", "metric": "latency", "value": "20329"}
PyTorchObserver {"type": "NET", "unit": "us", "metric": "latency", "value": "20463"}
PyTorchObserver {"type": "NET", "unit": "us", "metric": "latency", "value": "21072"}
PyTorchObserver {"type": "NET", "unit": "us", "metric": "latency", "value": "20668"}
PyTorchObserver {"type": "NET", "unit": "us", "metric": "latency", "value": "20889"}
```

Without pre-loading Context:

```
PyTorchObserver {"type": "NET", "unit": "us", "metric": "latency", "value": "70850"}
PyTorchObserver {"type": "NET", "unit": "us", "metric": "latency", "value": "19867"}
PyTorchObserver {"type": "NET", "unit": "us", "metric": "latency", "value": "20211"}
PyTorchObserver {"type": "NET", "unit": "us", "metric": "latency", "value": "20039"}
PyTorchObserver {"type": "NET", "unit": "us", "metric": "latency", "value": "20082"}
PyTorchObserver {"type": "NET", "unit": "us", "metric": "latency", "value": "20268"}
PyTorchObserver {"type": "NET", "unit": "us", "metric": "latency", "value": "20363"}
PyTorchObserver {"type": "NET", "unit": "us", "metric": "latency", "value": "21103"}
PyTorchObserver {"type": "NET", "unit": "us", "metric": "latency", "value": "20511"}
PyTorchObserver {"type": "NET", "unit": "us", "metric": "latency", "value": "20528"}

```

Reviewed By: mrshenli

Differential Revision: D35124881

Pulled By: SS-JIA

fbshipit-source-id: 0f093e4aa45d69c538a4fe2003e0d5617d72b97a
(cherry picked from commit 96f991420ad720300aea51cc0a1a6c0f79d2820b)
2022-03-30 20:22:57 +00:00

119 lines
3.7 KiB
CMake

if(INTERN_BUILD_MOBILE)
if(BUILD_CAFFE2_MOBILE)
#caffe2_binary_target("predictor_verifier.cc")
caffe2_binary_target("speed_benchmark.cc")
else()
caffe2_binary_target("speed_benchmark_torch.cc")
caffe2_binary_target("load_benchmark_torch.cc")
if(NOT BUILD_LITE_INTERPRETER)
caffe2_binary_target("compare_models_torch.cc")
endif()
endif()
return()
endif()
if(BUILD_CAFFE2)
caffe2_binary_target("convert_caffe_image_db.cc")
caffe2_binary_target("convert_db.cc")
caffe2_binary_target("make_cifar_db.cc")
caffe2_binary_target("make_mnist_db.cc")
caffe2_binary_target("predictor_verifier.cc")
caffe2_binary_target("speed_benchmark.cc")
caffe2_binary_target("split_db.cc")
caffe2_binary_target("print_registered_core_operators.cc")
if(USE_OBSERVERS)
caffe2_binary_target(caffe2_benchmark "caffe2_benchmark.cc" "benchmark_helper.cc")
target_include_directories(caffe2_benchmark PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/../modules)
endif()
caffe2_binary_target("at_launch_benchmark.cc")
target_include_directories(at_launch_benchmark PUBLIC
${CMAKE_BINARY_DIR}/aten/src)
caffe2_binary_target("intra_inter_benchmark.cc")
target_include_directories(intra_inter_benchmark PUBLIC
${CMAKE_BINARY_DIR}/aten/src)
caffe2_binary_target("run_plan.cc")
caffe2_binary_target("db_throughput.cc")
# ---[ tutorials
caffe2_binary_target("tutorial_blob.cc")
endif()
caffe2_binary_target("parallel_info.cc")
target_include_directories(parallel_info PUBLIC
${CMAKE_BINARY_DIR}/aten/src) # provides "ATen/TypeExtendedInterface.h" to ATen.h
caffe2_binary_target("record_function_benchmark.cc")
target_include_directories(record_function_benchmark PUBLIC
${CMAKE_BINARY_DIR}/aten/src)
caffe2_binary_target("speed_benchmark_torch.cc")
caffe2_binary_target("compare_models_torch.cc")
if(BUILD_TEST)
# Core overhead benchmark
caffe2_binary_target("core_overhead_benchmark.cc")
target_link_libraries(core_overhead_benchmark benchmark)
endif()
if(USE_CUDA)
caffe2_binary_target("inspect_gpu.cc")
target_link_libraries(inspect_gpu ${CUDA_LIBRARIES})
caffe2_binary_target("print_core_object_sizes_gpu.cc")
if(BUILD_TEST)
# Core overhead benchmark
caffe2_binary_target("core_overhead_benchmark_gpu.cc")
target_link_libraries(core_overhead_benchmark_gpu benchmark ${CUDA_curand_LIBRARY})
endif()
endif()
if(USE_ROCM)
caffe2_hip_binary_target("hip/inspect_gpu.cc")
caffe2_hip_binary_target("hip/print_core_object_sizes_gpu.cc")
if(BUILD_TEST)
# Core overhead benchmark
caffe2_hip_binary_target("hip/core_overhead_benchmark_gpu.cc")
target_link_libraries(core_overhead_benchmark_gpu benchmark)
endif()
endif()
if(USE_ZMQ)
caffe2_binary_target("zmq_feeder.cc")
target_link_libraries(zmq_feeder ${ZMQ_LIBRARIES})
endif()
if(USE_MPI)
caffe2_binary_target("run_plan_mpi.cc")
target_link_libraries(run_plan_mpi ${MPI_CXX_LIBRARIES})
endif()
if(USE_OPENCV AND USE_LEVELDB)
caffe2_binary_target("convert_encoded_to_raw_leveldb.cc")
target_link_libraries(
convert_encoded_to_raw_leveldb
${OpenCV_LIBS} ${LevelDB_LIBRARIES} ${Snappy_LIBRARIES})
endif()
if(USE_OPENCV)
caffe2_binary_target("make_image_db.cc")
target_link_libraries(make_image_db ${OpenCV_LIBS})
caffe2_binary_target("convert_image_to_tensor.cc")
target_link_libraries(convert_image_to_tensor ${OpenCV_LIBS})
endif()
if(USE_OBSERVERS AND USE_OPENCV)
caffe2_binary_target("convert_and_benchmark.cc")
target_link_libraries(convert_and_benchmark ${OpenCV_LIBS})
endif()
caffe2_binary_target("dump_operator_names.cc")
caffe2_binary_target("optimize_for_mobile.cc")
caffe2_binary_target(aot_model_compiler "aot_model_compiler.cc")
target_link_libraries(aot_model_compiler torch)