mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/74700 Test Plan: Imported from OSS Some results running this benchmark for a quantized CPU xirp14b model on a Pixel 5: ``` PyTorchObserver {"type": "NET", "unit": "us", "metric": "latency", "value": "46749"} PyTorchObserver {"type": "NET", "unit": "us", "metric": "latency", "value": "19261"} PyTorchObserver {"type": "NET", "unit": "us", "metric": "latency", "value": "19235"} PyTorchObserver {"type": "NET", "unit": "us", "metric": "latency", "value": "19396"} PyTorchObserver {"type": "NET", "unit": "us", "metric": "latency", "value": "19486"} PyTorchObserver {"type": "NET", "unit": "us", "metric": "latency", "value": "19562"} PyTorchObserver {"type": "NET", "unit": "us", "metric": "latency", "value": "19566"} PyTorchObserver {"type": "NET", "unit": "us", "metric": "latency", "value": "19559"} PyTorchObserver {"type": "NET", "unit": "us", "metric": "latency", "value": "19632"} PyTorchObserver {"type": "NET", "unit": "us", "metric": "latency", "value": "19938"} ``` Some results running this benchmark for the Vulkan xirp20a model on Pixel 5, after pre-loading the Context: ``` PyTorchObserver {"type": "NET", "unit": "us", "metric": "latency", "value": "38664"} PyTorchObserver {"type": "NET", "unit": "us", "metric": "latency", "value": "19921"} PyTorchObserver {"type": "NET", "unit": "us", "metric": "latency", "value": "20316"} PyTorchObserver {"type": "NET", "unit": "us", "metric": "latency", "value": "20255"} PyTorchObserver {"type": "NET", "unit": "us", "metric": "latency", "value": "20219"} PyTorchObserver {"type": "NET", "unit": "us", "metric": "latency", "value": "20329"} PyTorchObserver {"type": "NET", "unit": "us", "metric": "latency", "value": "20463"} PyTorchObserver {"type": "NET", "unit": "us", "metric": "latency", "value": "21072"} PyTorchObserver {"type": "NET", "unit": "us", "metric": "latency", "value": "20668"} PyTorchObserver {"type": "NET", "unit": "us", "metric": "latency", "value": "20889"} ``` Without pre-loading Context: ``` PyTorchObserver {"type": "NET", "unit": "us", "metric": "latency", "value": "70850"} PyTorchObserver {"type": "NET", "unit": "us", "metric": "latency", "value": "19867"} PyTorchObserver {"type": "NET", "unit": "us", "metric": "latency", "value": "20211"} PyTorchObserver {"type": "NET", "unit": "us", "metric": "latency", "value": "20039"} PyTorchObserver {"type": "NET", "unit": "us", "metric": "latency", "value": "20082"} PyTorchObserver {"type": "NET", "unit": "us", "metric": "latency", "value": "20268"} PyTorchObserver {"type": "NET", "unit": "us", "metric": "latency", "value": "20363"} PyTorchObserver {"type": "NET", "unit": "us", "metric": "latency", "value": "21103"} PyTorchObserver {"type": "NET", "unit": "us", "metric": "latency", "value": "20511"} PyTorchObserver {"type": "NET", "unit": "us", "metric": "latency", "value": "20528"} ``` Reviewed By: mrshenli Differential Revision: D35124881 Pulled By: SS-JIA fbshipit-source-id: 0f093e4aa45d69c538a4fe2003e0d5617d72b97a (cherry picked from commit 96f991420ad720300aea51cc0a1a6c0f79d2820b)
119 lines
3.7 KiB
CMake
119 lines
3.7 KiB
CMake
if(INTERN_BUILD_MOBILE)
|
|
if(BUILD_CAFFE2_MOBILE)
|
|
#caffe2_binary_target("predictor_verifier.cc")
|
|
caffe2_binary_target("speed_benchmark.cc")
|
|
else()
|
|
caffe2_binary_target("speed_benchmark_torch.cc")
|
|
caffe2_binary_target("load_benchmark_torch.cc")
|
|
if(NOT BUILD_LITE_INTERPRETER)
|
|
caffe2_binary_target("compare_models_torch.cc")
|
|
endif()
|
|
endif()
|
|
return()
|
|
endif()
|
|
|
|
if(BUILD_CAFFE2)
|
|
caffe2_binary_target("convert_caffe_image_db.cc")
|
|
caffe2_binary_target("convert_db.cc")
|
|
caffe2_binary_target("make_cifar_db.cc")
|
|
caffe2_binary_target("make_mnist_db.cc")
|
|
caffe2_binary_target("predictor_verifier.cc")
|
|
caffe2_binary_target("speed_benchmark.cc")
|
|
caffe2_binary_target("split_db.cc")
|
|
caffe2_binary_target("print_registered_core_operators.cc")
|
|
|
|
if(USE_OBSERVERS)
|
|
caffe2_binary_target(caffe2_benchmark "caffe2_benchmark.cc" "benchmark_helper.cc")
|
|
target_include_directories(caffe2_benchmark PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/../modules)
|
|
endif()
|
|
|
|
caffe2_binary_target("at_launch_benchmark.cc")
|
|
target_include_directories(at_launch_benchmark PUBLIC
|
|
${CMAKE_BINARY_DIR}/aten/src)
|
|
|
|
caffe2_binary_target("intra_inter_benchmark.cc")
|
|
target_include_directories(intra_inter_benchmark PUBLIC
|
|
${CMAKE_BINARY_DIR}/aten/src)
|
|
|
|
caffe2_binary_target("run_plan.cc")
|
|
caffe2_binary_target("db_throughput.cc")
|
|
|
|
# ---[ tutorials
|
|
caffe2_binary_target("tutorial_blob.cc")
|
|
endif()
|
|
|
|
caffe2_binary_target("parallel_info.cc")
|
|
target_include_directories(parallel_info PUBLIC
|
|
${CMAKE_BINARY_DIR}/aten/src) # provides "ATen/TypeExtendedInterface.h" to ATen.h
|
|
|
|
caffe2_binary_target("record_function_benchmark.cc")
|
|
target_include_directories(record_function_benchmark PUBLIC
|
|
${CMAKE_BINARY_DIR}/aten/src)
|
|
|
|
caffe2_binary_target("speed_benchmark_torch.cc")
|
|
caffe2_binary_target("compare_models_torch.cc")
|
|
|
|
if(BUILD_TEST)
|
|
# Core overhead benchmark
|
|
caffe2_binary_target("core_overhead_benchmark.cc")
|
|
target_link_libraries(core_overhead_benchmark benchmark)
|
|
endif()
|
|
|
|
if(USE_CUDA)
|
|
caffe2_binary_target("inspect_gpu.cc")
|
|
target_link_libraries(inspect_gpu ${CUDA_LIBRARIES})
|
|
caffe2_binary_target("print_core_object_sizes_gpu.cc")
|
|
|
|
if(BUILD_TEST)
|
|
# Core overhead benchmark
|
|
caffe2_binary_target("core_overhead_benchmark_gpu.cc")
|
|
target_link_libraries(core_overhead_benchmark_gpu benchmark ${CUDA_curand_LIBRARY})
|
|
endif()
|
|
endif()
|
|
|
|
if(USE_ROCM)
|
|
caffe2_hip_binary_target("hip/inspect_gpu.cc")
|
|
caffe2_hip_binary_target("hip/print_core_object_sizes_gpu.cc")
|
|
|
|
if(BUILD_TEST)
|
|
# Core overhead benchmark
|
|
caffe2_hip_binary_target("hip/core_overhead_benchmark_gpu.cc")
|
|
target_link_libraries(core_overhead_benchmark_gpu benchmark)
|
|
endif()
|
|
endif()
|
|
|
|
if(USE_ZMQ)
|
|
caffe2_binary_target("zmq_feeder.cc")
|
|
target_link_libraries(zmq_feeder ${ZMQ_LIBRARIES})
|
|
endif()
|
|
|
|
if(USE_MPI)
|
|
caffe2_binary_target("run_plan_mpi.cc")
|
|
target_link_libraries(run_plan_mpi ${MPI_CXX_LIBRARIES})
|
|
endif()
|
|
|
|
if(USE_OPENCV AND USE_LEVELDB)
|
|
caffe2_binary_target("convert_encoded_to_raw_leveldb.cc")
|
|
target_link_libraries(
|
|
convert_encoded_to_raw_leveldb
|
|
${OpenCV_LIBS} ${LevelDB_LIBRARIES} ${Snappy_LIBRARIES})
|
|
endif()
|
|
|
|
if(USE_OPENCV)
|
|
caffe2_binary_target("make_image_db.cc")
|
|
target_link_libraries(make_image_db ${OpenCV_LIBS})
|
|
caffe2_binary_target("convert_image_to_tensor.cc")
|
|
target_link_libraries(convert_image_to_tensor ${OpenCV_LIBS})
|
|
endif()
|
|
|
|
if(USE_OBSERVERS AND USE_OPENCV)
|
|
caffe2_binary_target("convert_and_benchmark.cc")
|
|
target_link_libraries(convert_and_benchmark ${OpenCV_LIBS})
|
|
endif()
|
|
|
|
caffe2_binary_target("dump_operator_names.cc")
|
|
caffe2_binary_target("optimize_for_mobile.cc")
|
|
|
|
caffe2_binary_target(aot_model_compiler "aot_model_compiler.cc")
|
|
target_link_libraries(aot_model_compiler torch)
|