mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 12:54:11 +08:00
Summary: Adding NEON specializations of Vectorized<T> for int8, int16, int32 and int64. Correcness has been checked using test_ops.py and the comprehensive torch test operator_benchmark_test.py has been enhanced by adding cases of bitwise operations, boolean ops and integer ops. The benchmark, which uses the PyTorch API, shows significant enhancements in a wide variety of operations: Before: bitwise xor: 779.882us boolean any: 636.209us boolean all: 538.621us integer mul: 304.457us integer asr: 447.997us After: bitwise xor: 680.221us ---> 15% higher throughput boolean any: 391.468us ---> 63% higher throughput boolean all: 390.189us ---> 38% higher throughput integer mul: 193.532us ---> 57% higher throughput integer asr: 179.929us---> 149% higher throughput Test Plan: Correctness: buck2 test @mode/opt //caffe2/test:test_ops buck2 test @mode/opt //caffe2/test:torch buck2 test @mode/opt //caffe2/test/distributed/launcher/fb:fb_run_test Performance: buck2 run mode/opt //caffe2/benchmarks/operator_benchmark/fb:operator_benchmark_test Differential Revision: D84424638 Pull Request resolved: https://github.com/pytorch/pytorch/pull/165273 Approved by: https://github.com/malfet
48 lines
856 B
Python
48 lines
856 B
Python
from pt import ( # noqa: F401
|
|
add_test,
|
|
ao_sparsifier_test,
|
|
arange_test,
|
|
as_strided_test,
|
|
batchnorm_test,
|
|
binary_inplace_test,
|
|
binary_test,
|
|
bmm_test,
|
|
boolean_test,
|
|
cat_test,
|
|
channel_shuffle_test,
|
|
chunk_test,
|
|
conv_test,
|
|
diag_test,
|
|
embeddingbag_test,
|
|
fill_test,
|
|
gather_test,
|
|
groupnorm_test,
|
|
hardsigmoid_test,
|
|
hardswish_test,
|
|
index_add__test,
|
|
index_select_test,
|
|
instancenorm_test,
|
|
interpolate_test,
|
|
layernorm_test,
|
|
linear_test,
|
|
matmul_test,
|
|
mm_test,
|
|
nan_to_num_test,
|
|
pool_test,
|
|
remainder_test,
|
|
softmax_test,
|
|
split_test,
|
|
stack_test,
|
|
sum_test,
|
|
tensor_to_test,
|
|
ternary_test,
|
|
topk_test,
|
|
where_test,
|
|
)
|
|
|
|
import operator_benchmark as op_bench
|
|
|
|
|
|
if __name__ == "__main__":
|
|
op_bench.benchmark_runner.main()
|