From edf76cd9c224e244a07d834eccca15933c0d94ee Mon Sep 17 00:00:00 2001 From: Linbin Yu Date: Wed, 29 Jun 2022 22:40:36 +0000 Subject: [PATCH] Move qnnpack to shared BUCK build (#80260) Differential Revision: D37434340 Pull Request resolved: https://github.com/pytorch/pytorch/pull/80260 Approved by: https://github.com/larryliu0820, https://github.com/malfet --- .github/workflows/_buck-build-test.yml | 4 + .../native/quantized/cpu/qnnpack/BUCK.oss | 145 +--- .../quantized/cpu/qnnpack/buckbuild.bzl | 647 ++++++++++++++++++ buckbuild.bzl | 4 +- third_party/BUCK.oss | 70 ++ tools/build_defs/buck_helpers.bzl | 1 + tools/build_defs/fb_xplat_cxx_test.bzl | 18 + 7 files changed, 746 insertions(+), 143 deletions(-) create mode 100644 aten/src/ATen/native/quantized/cpu/qnnpack/buckbuild.bzl create mode 100644 tools/build_defs/fb_xplat_cxx_test.bzl diff --git a/.github/workflows/_buck-build-test.yml b/.github/workflows/_buck-build-test.yml index b005224650a1..2d1e563ed0ee 100644 --- a/.github/workflows/_buck-build-test.yml +++ b/.github/workflows/_buck-build-test.yml @@ -74,6 +74,10 @@ jobs: run: | buck build aten/src/ATen/native/quantized/cpu/qnnpack:pytorch_qnnpack + - name: Test QNNPACK + run: | + buck test aten/src/ATen/native/quantized/cpu/qnnpack:pytorch_qnnpack_test + - name: Build aten_cpu run: | buck build :aten_cpu diff --git a/aten/src/ATen/native/quantized/cpu/qnnpack/BUCK.oss b/aten/src/ATen/native/quantized/cpu/qnnpack/BUCK.oss index 85abc6a60916..4580a6f7205b 100644 --- a/aten/src/ATen/native/quantized/cpu/qnnpack/BUCK.oss +++ b/aten/src/ATen/native/quantized/cpu/qnnpack/BUCK.oss @@ -1,143 +1,4 @@ -load("//tools/build_defs:glob_defs.bzl", "subdir_glob") +load("//:buckbuild.bzl", "third_party") +load(":buckbuild.bzl", "define_qnnpack") -cxx_library( - name = "pytorch_qnnpack", - srcs = ['src/add.c', 'src/average-pooling.c', 'src/channel-shuffle.c', 'src/clamp.c', 'src/conv-prepack.cc', 'src/conv-run.cc', 'src/convolution.c', 'src/deconv-run.cc', 'src/deconvolution.c', 'src/fc-dynamic-run.cc', 'src/fc-prepack.cc', 'src/fc-run.cc', 'src/fully-connected.c', 'src/fully-connected-sparse.c', 'src/global-average-pooling.c', 'src/hardsigmoid.c', 'src/hardswish.c', 'src/indirection.c', 'src/init.c', 'src/leaky-relu.c', 'src/max-pooling.c', 'src/operator-delete.c', 'src/operator-run.c', 'src/pack_block_sparse.cc', 'src/sigmoid.c', 'src/softargmax.c', 'src/tanh.c'], - deps = [':qnnp_interface', ':ukernels_asm', ':ukernels_neon', ':ukernels_psimd', ':ukernels_scalar', ':ukernels_sse2', ':ukernels_sse41', ':ukernels_ssse3', '//third_party:cpuinfo', '//third_party:FP16', '//third_party:FXdiv'], - exported_deps = ['//third_party:cpuinfo'], - compiler_flags = ['-O2', '-DPYTORCH_QNNPACK_RUNTIME_QUANTIZATION', '-Wno-deprecated-declarations'], - preferred_linkage = "static", - exported_headers = subdir_glob([("src", "qnnpack/*.h"),("include", "*.h"),]), - header_namespace = "", - headers = subdir_glob([("src", "**/*.c"), ("src", "q8gemm_sparse/*.h"), ("src", "qnnpack/*.h"), ("src", "requantization/*.h")]), - link_whole = False, - platform_compiler_flags = [['armv7', ['-mfpu=neon']], ['^android-armv7$', ['-marm', '-mfloat-abi=softfp']]], - platform_preprocessor_flags = [['windows', ['-D_WINDOWS', '-D_WIN32', '-DWIN32', '-DNOMINMAX', '-D_CRT_SECURE_NO_WARNINGS', '-D_USE_MATH_DEFINES']], ['windows.*64$', ['-D_WIN64']]], - visibility = ['PUBLIC'], -) - - -cxx_library( - name = "ukernels_ssse3", - srcs = ['wrappers/requantization/gemmlowp-ssse3.c', 'wrappers/requantization/precise-ssse3.c', 'wrappers/requantization/q31-ssse3.c'], - deps = [':qnnp_interface', '//third_party:cpuinfo', '//third_party:FP16', '//third_party:FXdiv'], - exported_deps = [], - compiler_flags = ['-O3', '-ffast-math', '-Wno-error=unused-variable', '-Wno-shadow', '-DPYTORCH_QNNPACK_RUNTIME_QUANTIZATION'], - preferred_linkage = "static", - header_namespace = "", - headers = subdir_glob([("src", "**/*.c"), ("src", "q8gemm_sparse/*.h"), ("src", "qnnpack/*.h"), ("src", "requantization/*.h")]), - link_whole = False, - platform_compiler_flags = [['86', ['-mssse3', '-mno-sse4']], ['osmeta', ['-mosmeta-no-restrict-sse']]], - platform_preprocessor_flags = [['windows', ['-D_WINDOWS', '-D_WIN32', '-DWIN32', '-DNOMINMAX', '-D_CRT_SECURE_NO_WARNINGS', '-D_USE_MATH_DEFINES']], ['windows.*64$', ['-D_WIN64']]], - visibility = ['PUBLIC'], -) - - -cxx_library( - name = "ukernels_psimd", - srcs = ['src/requantization/fp32-psimd.c', 'src/requantization/precise-psimd.c', 'src/sgemm/6x8-psimd.c'], - deps = [':qnnp_interface', '//third_party:cpuinfo', '//third_party:FP16', '//third_party:FXdiv', '//third_party:psimd'], - exported_deps = [], - compiler_flags = ['-O3', '-ffast-math', '-DPYTORCH_QNNPACK_RUNTIME_QUANTIZATION'], - preferred_linkage = "static", - header_namespace = "", - headers = subdir_glob([("src", "**/*.c"), ("src", "q8gemm_sparse/*.h"), ("src", "qnnpack/*.h"), ("src", "requantization/*.h")]), - link_whole = False, - platform_compiler_flags = [['armv7', ['-mfpu=neon']], ['^android-armv7$', ['-marm', '-mfloat-abi=softfp']]], - platform_preprocessor_flags = [['windows', ['-D_WINDOWS', '-D_WIN32', '-DWIN32', '-DNOMINMAX', '-D_CRT_SECURE_NO_WARNINGS', '-D_USE_MATH_DEFINES']], ['windows.*64$', ['-D_WIN64']]], - visibility = ['PUBLIC'], -) - - -cxx_library( - name = "ukernels_scalar", - srcs = ['src/requantization/fp32-scalar.c', 'src/requantization/gemmlowp-scalar.c', 'src/requantization/precise-scalar.c', 'src/requantization/q31-scalar.c', 'src/u8lut32norm/scalar.c', 'src/x8lut/scalar.c'], - deps = [':qnnp_interface', '//third_party:cpuinfo', '//third_party:FP16', '//third_party:FXdiv'], - exported_deps = [], - compiler_flags = ['-O2', '-DPYTORCH_QNNPACK_RUNTIME_QUANTIZATION'], - preferred_linkage = "static", - header_namespace = "", - headers = subdir_glob([("src", "**/*.c"), ("src", "q8gemm_sparse/*.h"), ("src", "qnnpack/*.h"), ("src", "requantization/*.h")]), - link_whole = False, - platform_preprocessor_flags = [['windows', ['-D_WINDOWS', '-D_WIN32', '-DWIN32', '-DNOMINMAX', '-D_CRT_SECURE_NO_WARNINGS', '-D_USE_MATH_DEFINES']], ['windows.*64$', ['-D_WIN64']]], - visibility = ['PUBLIC'], -) - - -cxx_library( - name = "ukernels_asm", - srcs = ['wrappers/dummy.c', 'wrappers/hgemm/8x8-aarch32-neonfp16arith.S', 'wrappers/q8conv/4x8-aarch32-neon.S', 'wrappers/q8dwconv/up8x9-aarch32-neon.S', 'wrappers/q8dwconv/up8x9-aarch32-neon-per-channel.S', 'wrappers/q8gemm/4x8-aarch32-neon.S', 'wrappers/q8gemm/4x8-dq-aarch32-neon.S', 'wrappers/q8gemm/4x8c2-xzp-aarch32-neon.S', 'wrappers/q8gemm_sparse/4x4-packA-aarch32-neon.S', 'wrappers/q8gemm_sparse/4x8c1x4-dq-packedA-aarch32-neon.S', 'wrappers/q8gemm_sparse/4x8c8x1-dq-packedA-aarch32-neon.S', 'wrappers/q8gemm_sparse/8x4-packA-aarch64-neon.S', 'wrappers/q8gemm_sparse/8x8c1x4-dq-packedA-aarch64-neon.S', 'wrappers/q8gemm_sparse/8x8c8x1-dq-packedA-aarch64-neon.S', 'wrappers/q8conv/8x8-aarch64-neon.S', 'wrappers/q8gemm/8x8-aarch64-neon.S', 'wrappers/q8gemm/8x8-dq-aarch64-neon.S'], - deps = [], - exported_deps = [], - compiler_flags = ['-DPYTORCH_QNNPACK_RUNTIME_QUANTIZATION'], - preferred_linkage = "static", - header_namespace = "", - headers = subdir_glob([("src", "**/*.c"), ("src", "q8gemm_sparse/*.h"), ("src", "qnnpack/*.h"), ("src", "requantization/*.h")]), - link_whole = False, - platform_compiler_flags = [['^iphoneos-armv7$', ['-mfpu=neon-vfpv4']], ['osmeta', ['-mfpu=neon-vfpv4']]], - platform_preprocessor_flags = [['android', ['-D__ELF__=1']], ['tizen', ['-D__ELF__=1']], ['windows', ['-D_WINDOWS', '-D_WIN32', '-DWIN32', '-DNOMINMAX', '-D_CRT_SECURE_NO_WARNINGS', '-D_USE_MATH_DEFINES']], ['windows.*64$', ['-D_WIN64']]], - visibility = ['PUBLIC'], -) - - -cxx_library( - name = "ukernels_sse41", - srcs = ['wrappers/requantization/gemmlowp-sse4.c', 'wrappers/requantization/precise-sse4.c', 'wrappers/requantization/q31-sse4.c'], - deps = [':qnnp_interface', '//third_party:cpuinfo', '//third_party:FP16', '//third_party:FXdiv'], - exported_deps = [], - compiler_flags = ['-O3', '-ffast-math', '-Wno-error=unused-variable', '-Wno-shadow', '-DPYTORCH_QNNPACK_RUNTIME_QUANTIZATION'], - preferred_linkage = "static", - header_namespace = "", - headers = subdir_glob([("src", "**/*.c"), ("src", "q8gemm_sparse/*.h"), ("src", "qnnpack/*.h"), ("src", "requantization/*.h")]), - link_whole = False, - platform_compiler_flags = [['86', ['-msse4.1', '-mno-sse4.2']], ['osmeta', ['-mosmeta-no-restrict-sse']]], - platform_preprocessor_flags = [['windows', ['-D_WINDOWS', '-D_WIN32', '-DWIN32', '-DNOMINMAX', '-D_CRT_SECURE_NO_WARNINGS', '-D_USE_MATH_DEFINES']], ['windows.*64$', ['-D_WIN64']]], - visibility = ['PUBLIC'], -) - - -cxx_library( - name = "ukernels_neon", - srcs = ['wrappers/q8avgpool/mp8x9p8q-neon.c', 'wrappers/q8avgpool/up8x9-neon.c', 'wrappers/q8avgpool/up8xm-neon.c', 'wrappers/q8conv/4x8-neon.c', 'wrappers/q8conv/8x8-neon.c', 'wrappers/q8dwconv/mp8x25-neon.c', 'wrappers/q8dwconv/mp8x25-neon-per-channel.c', 'wrappers/q8dwconv/mp8x27-neon.c', 'wrappers/q8dwconv/up8x9-neon.c', 'wrappers/q8dwconv/up8x9-neon-per-channel.c', 'wrappers/q8gavgpool/mp8x7p7q-neon.c', 'wrappers/q8gavgpool/up8x7-neon.c', 'wrappers/q8gavgpool/up8xm-neon.c', 'wrappers/q8gemm/4x-sumrows-neon.c', 'wrappers/q8gemm/4x8-dq-neon.c', 'wrappers/q8gemm/4x8-neon.c', 'wrappers/q8gemm/4x8c2-xzp-neon.c', 'wrappers/q8gemm/6x4-neon.c', 'wrappers/q8gemm/8x8-neon.c', 'wrappers/q8vadd/neon.c', 'wrappers/requantization/fp32-neon.c', 'wrappers/requantization/gemmlowp-neon.c', 'wrappers/requantization/precise-neon.c', 'wrappers/requantization/q31-neon.c', 'wrappers/sgemm/5x8-neon.c', 'wrappers/sgemm/6x8-neon.c', 'wrappers/u8clamp/neon.c', 'wrappers/u8maxpool/16x9p8q-neon.c', 'wrappers/u8maxpool/sub16-neon.c', 'wrappers/u8rmax/neon.c', 'wrappers/x8zip/x2-neon.c', 'wrappers/x8zip/x3-neon.c', 'wrappers/x8zip/x4-neon.c', 'wrappers/x8zip/xm-neon.c'], - deps = [':qnnp_interface', '//third_party:cpuinfo', '//third_party:FP16', '//third_party:FXdiv'], - exported_deps = [], - compiler_flags = ['-O3', '-ffast-math', '-Wno-error=unused-variable', '-Wno-shadow', '-DPYTORCH_QNNPACK_RUNTIME_QUANTIZATION'], - preferred_linkage = "static", - header_namespace = "", - headers = subdir_glob([("src", "**/*.c"), ("src", "q8gemm_sparse/*.h"), ("src", "qnnpack/*.h"), ("src", "requantization/*.h")]), - link_whole = False, - platform_compiler_flags = [['armv7', ['-mfpu=neon']], ['^android-armv7$', ['-marm', '-mfloat-abi=softfp']]], - platform_preprocessor_flags = [['windows', ['-D_WINDOWS', '-D_WIN32', '-DWIN32', '-DNOMINMAX', '-D_CRT_SECURE_NO_WARNINGS', '-D_USE_MATH_DEFINES']], ['windows.*64$', ['-D_WIN64']]], - visibility = ['PUBLIC'], -) - - -cxx_library( - name = "ukernels_sse2", - srcs = ['wrappers/q8avgpool/mp8x9p8q-sse2.c', 'wrappers/q8avgpool/up8x9-sse2.c', 'wrappers/q8avgpool/up8xm-sse2.c', 'wrappers/q8conv/4x4c2-sse2.c', 'wrappers/q8dwconv/mp8x25-sse2.c', 'wrappers/q8dwconv/mp8x25-sse2-per-channel.c', 'wrappers/q8dwconv/mp8x27-sse2.c', 'wrappers/q8dwconv/up8x9-sse2.c', 'wrappers/q8dwconv/up8x9-sse2-per-channel.c', 'wrappers/q8gavgpool/mp8x7p7q-sse2.c', 'wrappers/q8gavgpool/up8x7-sse2.c', 'wrappers/q8gavgpool/up8xm-sse2.c', 'wrappers/q8gemm/2x4c8-sse2.c', 'wrappers/q8gemm/4x4c2-dq-sse2.c', 'wrappers/q8gemm/4x4c2-sse2.c', 'wrappers/q8gemm_sparse/8x4c1x4-packed-sse2.c', 'wrappers/q8vadd/sse2.c', 'wrappers/requantization/fp32-sse2.c', 'wrappers/requantization/gemmlowp-sse2.c', 'wrappers/requantization/precise-sse2.c', 'wrappers/requantization/q31-sse2.c', 'wrappers/u8clamp/sse2.c', 'wrappers/u8maxpool/16x9p8q-sse2.c', 'wrappers/u8maxpool/sub16-sse2.c', 'wrappers/u8rmax/sse2.c', 'wrappers/x8zip/x2-sse2.c', 'wrappers/x8zip/x3-sse2.c', 'wrappers/x8zip/x4-sse2.c', 'wrappers/x8zip/xm-sse2.c'], - deps = [':qnnp_interface', '//third_party:cpuinfo', '//third_party:FP16', '//third_party:FXdiv'], - exported_deps = [], - compiler_flags = ['-O3', '-ffast-math', '-Wno-error=unused-variable', '-Wno-shadow', '-DPYTORCH_QNNPACK_RUNTIME_QUANTIZATION'], - preferred_linkage = "static", - header_namespace = "", - headers = subdir_glob([("src", "**/*.c"), ("src", "q8gemm_sparse/*.h"), ("src", "qnnpack/*.h"), ("src", "requantization/*.h")]), - link_whole = False, - platform_compiler_flags = [['86', ['-msse2', '-mno-sse3']]], - platform_preprocessor_flags = [['windows', ['-D_WINDOWS', '-D_WIN32', '-DWIN32', '-DNOMINMAX', '-D_CRT_SECURE_NO_WARNINGS', '-D_USE_MATH_DEFINES']], ['windows.*64$', ['-D_WIN64']]], - visibility = ['PUBLIC'], -) - - -cxx_library( - name = "qnnp_interface", - srcs = [], - deps = ['//third_party:pthreadpool_header'], - exported_deps = [], - compiler_flags = ['-DPYTORCH_QNNPACK_RUNTIME_QUANTIZATION'], - preferred_linkage = "static", - header_namespace = "", - headers = subdir_glob([("src", "**/*.c"), ("src", "q8gemm_sparse/*.h"), ("src", "qnnpack/*.h"), ("src", "requantization/*.h")]), - link_whole = False, - platform_preprocessor_flags = [['windows', ['-D_WINDOWS', '-D_WIN32', '-DWIN32', '-DNOMINMAX', '-D_CRT_SECURE_NO_WARNINGS', '-D_USE_MATH_DEFINES']], ['windows.*64$', ['-D_WIN64']]], - visibility = ['PUBLIC'], -) +define_qnnpack(third_party) diff --git a/aten/src/ATen/native/quantized/cpu/qnnpack/buckbuild.bzl b/aten/src/ATen/native/quantized/cpu/qnnpack/buckbuild.bzl new file mode 100644 index 000000000000..5c1c316678e1 --- /dev/null +++ b/aten/src/ATen/native/quantized/cpu/qnnpack/buckbuild.bzl @@ -0,0 +1,647 @@ +load("//tools/build_defs:fb_xplat_cxx_library.bzl", "fb_xplat_cxx_library") +load("//tools/build_defs:fb_xplat_cxx_test.bzl", "fb_xplat_cxx_test") +load("//tools/build_defs:glob_defs.bzl", "subdir_glob") +load("//tools/build_defs:platform_defs.bzl", "ANDROID", "APPLE", "APPLETVOS", "CXX", "IOS", "MACOSX") + +# Shared by internal and OSS BUCK +def define_qnnpack(third_party, labels = []): + fb_xplat_cxx_library( + # @autodeps-skip + name = "ukernels_scalar", + srcs = [ + "src/requantization/fp32-scalar.c", + "src/requantization/gemmlowp-scalar.c", + "src/requantization/precise-scalar.c", + "src/requantization/q31-scalar.c", + "src/u8lut32norm/scalar.c", + "src/x8lut/scalar.c", + ], + headers = subdir_glob([ + ("src", "qnnpack/*.h"), + ("src", "requantization/*.h"), + ]), + header_namespace = "", + apple_sdks = (IOS, MACOSX, APPLETVOS), + compiler_flags = [ + "-O2", + "-DPYTORCH_QNNPACK_RUNTIME_QUANTIZATION", + ], + fbobjc_preprocessor_flags = [ + "-DQNNP_PRIVATE=", + "-DQNNP_INTERNAL=", + ], + force_static = True, + labels = labels, + visibility = ["PUBLIC"], + deps = [ + ":qnnp_interface", + third_party("cpuinfo"), + third_party("FP16"), + third_party("FXdiv"), + ], + ) + + fb_xplat_cxx_library( + # @autodeps-skip + name = "ukernels_sse2", + srcs = [ + "wrappers/q8avgpool/mp8x9p8q-sse2.c", + "wrappers/q8avgpool/up8x9-sse2.c", + "wrappers/q8avgpool/up8xm-sse2.c", + "wrappers/q8conv/4x4c2-sse2.c", + "wrappers/q8dwconv/mp8x25-sse2.c", + "wrappers/q8dwconv/mp8x25-sse2-per-channel.c", + "wrappers/q8dwconv/mp8x27-sse2.c", + "wrappers/q8dwconv/up8x9-sse2.c", + "wrappers/q8dwconv/up8x9-sse2-per-channel.c", + "wrappers/q8gavgpool/mp8x7p7q-sse2.c", + "wrappers/q8gavgpool/up8x7-sse2.c", + "wrappers/q8gavgpool/up8xm-sse2.c", + "wrappers/q8gemm/2x4c8-sse2.c", + "wrappers/q8gemm/4x4c2-dq-sse2.c", + "wrappers/q8gemm/4x4c2-sse2.c", + "wrappers/q8gemm_sparse/8x4c1x4-packed-sse2.c", + "wrappers/q8vadd/sse2.c", + "wrappers/requantization/fp32-sse2.c", + "wrappers/requantization/gemmlowp-sse2.c", + "wrappers/requantization/precise-sse2.c", + "wrappers/requantization/q31-sse2.c", + "wrappers/u8clamp/sse2.c", + "wrappers/u8maxpool/16x9p8q-sse2.c", + "wrappers/u8maxpool/sub16-sse2.c", + "wrappers/u8rmax/sse2.c", + "wrappers/x8zip/x2-sse2.c", + "wrappers/x8zip/x3-sse2.c", + "wrappers/x8zip/x4-sse2.c", + "wrappers/x8zip/xm-sse2.c", + ], + headers = subdir_glob([ + ("src", "**/*.c"), + ("src", "q8gemm_sparse/*.h"), + ("src", "qnnpack/*.h"), + ("src", "requantization/*.h"), + ]), + header_namespace = "", + apple_sdks = (IOS, MACOSX, APPLETVOS), + compiler_flags = [ + "-O3", + "-ffast-math", + "-Wno-error=unused-variable", + "-Wno-shadow", + "-DPYTORCH_QNNPACK_RUNTIME_QUANTIZATION", + ], + fbobjc_preprocessor_flags = [ + "-DQNNP_PRIVATE=", + "-DQNNP_INTERNAL=", + ], + force_static = True, + labels = labels, + platform_compiler_flags = [ + ( + "86", + [ + "-msse2", + "-mno-sse3", + ], + ), + ], + visibility = ["PUBLIC"], + deps = [ + ":qnnp_interface", + third_party("cpuinfo"), + third_party("FP16"), + third_party("FXdiv"), + ], + ) + + fb_xplat_cxx_library( + # @autodeps-skip + name = "ukernels_ssse3", + srcs = [ + "wrappers/requantization/gemmlowp-ssse3.c", + "wrappers/requantization/precise-ssse3.c", + "wrappers/requantization/q31-ssse3.c", + ], + headers = subdir_glob([ + ("src", "**/*.c"), + ("src", "qnnpack/*.h"), + ("src", "requantization/*.h"), + ]), + header_namespace = "", + apple_sdks = (IOS, MACOSX, APPLETVOS), + compiler_flags = [ + "-O3", + "-ffast-math", + "-Wno-error=unused-variable", + "-Wno-shadow", + "-DPYTORCH_QNNPACK_RUNTIME_QUANTIZATION", + ], + fbobjc_preprocessor_flags = [ + "-DQNNP_PRIVATE=", + "-DQNNP_INTERNAL=", + ], + force_static = True, + labels = labels, + platform_compiler_flags = [ + ( + "86", + [ + "-mssse3", + "-mno-sse4", + ], + ), + ( + # By default, osmeta compiler silently ignores -msseXX flags. + # This flag disables this behavior. + "osmeta", + [ + "-mosmeta-no-restrict-sse", + ], + ), + ], + visibility = ["PUBLIC"], + deps = [ + ":qnnp_interface", + third_party("cpuinfo"), + third_party("FP16"), + third_party("FXdiv"), + ], + ) + + fb_xplat_cxx_library( + # @autodeps-skip + name = "ukernels_sse41", + srcs = [ + "wrappers/requantization/gemmlowp-sse4.c", + "wrappers/requantization/precise-sse4.c", + "wrappers/requantization/q31-sse4.c", + ], + headers = subdir_glob([ + ("src", "**/*.c"), + ("src", "qnnpack/*.h"), + ("src", "requantization/*.h"), + ]), + header_namespace = "", + apple_sdks = (IOS, MACOSX, APPLETVOS), + compiler_flags = [ + "-O3", + "-ffast-math", + "-Wno-error=unused-variable", + "-Wno-shadow", + "-DPYTORCH_QNNPACK_RUNTIME_QUANTIZATION", + ], + fbobjc_preprocessor_flags = [ + "-DQNNP_PRIVATE=", + "-DQNNP_INTERNAL=", + ], + force_static = True, + labels = labels, + platform_compiler_flags = [ + ( + "86", + [ + "-msse4.1", + "-mno-sse4.2", + ], + ), + ( + # By default, osmeta compiler silently ignores -msseXX flags. + # This flag disables this behavior. + "osmeta", + [ + "-mosmeta-no-restrict-sse", + ], + ), + ], + visibility = ["PUBLIC"], + deps = [ + ":qnnp_interface", + third_party("cpuinfo"), + third_party("FP16"), + third_party("FXdiv"), + ], + ) + + fb_xplat_cxx_library( + # @autodeps-skip + name = "qnnp_interface", + headers = subdir_glob( + [ + ("include", "*.h"), + ("src", "qnnpack/*.h"), + ("src", "requantization/*.h"), + ], + ), + header_namespace = "", + apple_sdks = (IOS, MACOSX, APPLETVOS), + compiler_flags = [ + "-DPYTORCH_QNNPACK_RUNTIME_QUANTIZATION", + ], + force_static = True, + labels = labels, + visibility = ["PUBLIC"], + deps = [ + third_party("pthreadpool_header"), + ], + ) + + fb_xplat_cxx_library( + # @autodeps-skip + name = "pytorch_qnnpack", + srcs = [ + "src/add.c", + "src/average-pooling.c", + "src/channel-shuffle.c", + "src/clamp.c", + "src/conv-prepack.cc", + "src/conv-run.cc", + "src/convolution.c", + "src/deconv-run.cc", + "src/deconvolution.c", + "src/fc-dynamic-run.cc", + "src/fc-prepack.cc", + "src/fc-run.cc", + "src/fully-connected.c", + "src/fully-connected-sparse.c", + "src/global-average-pooling.c", + "src/hardsigmoid.c", + "src/hardswish.c", + "src/indirection.c", + "src/init.c", + "src/leaky-relu.c", + "src/max-pooling.c", + "src/operator-delete.c", + "src/operator-run.c", + "src/pack_block_sparse.cc", + "src/sigmoid.c", + "src/softargmax.c", + "src/tanh.c", + ], + headers = subdir_glob([ + ("src", "**/*.c"), + ("src", "**/*.h"), + ("src", "qnnpack/*.h"), + ("include", "**/*.h"), + ]), + header_namespace = "", + exported_headers = subdir_glob([ + ("src", "qnnpack/*.h"), + ("include", "*.h"), + ]), + apple_sdks = (IOS, MACOSX, APPLETVOS), + compiler_flags = [ + "-O2", + "-DPYTORCH_QNNPACK_RUNTIME_QUANTIZATION", + ], + fbobjc_preprocessor_flags = [ + "-DQNNP_PRIVATE=", + "-DQNNP_INTERNAL=", + ], + force_static = True, + labels = [ + "supermodule:android/default/pytorch", + "supermodule:ios/default/public.pytorch", + ], + platform_compiler_flags = [ + ( + "armv7", + [ + "-mfpu=neon", + ], + ), + ( + "^android-armv7$", + [ + "-marm", + "-mfloat-abi=softfp", + ], + ), + ], + visibility = ["PUBLIC"], + deps = [ + ":qnnp_interface", + ":ukernels_asm", + ":ukernels_neon", + ":ukernels_psimd", + ":ukernels_scalar", + ":ukernels_sse2", + ":ukernels_sse41", + ":ukernels_ssse3", + third_party("cpuinfo"), + third_party("FP16"), + third_party("FXdiv"), + ], + exported_deps = [ + third_party("cpuinfo"), + ], + ) + + # Only ukernels implemented in C with ARM NEON intrinsics + fb_xplat_cxx_library( + # @autodeps-skip + name = "ukernels_neon", + srcs = [ + "wrappers/q8avgpool/mp8x9p8q-neon.c", + "wrappers/q8avgpool/up8x9-neon.c", + "wrappers/q8avgpool/up8xm-neon.c", + "wrappers/q8conv/4x8-neon.c", + "wrappers/q8conv/8x8-neon.c", + "wrappers/q8dwconv/mp8x25-neon.c", + "wrappers/q8dwconv/mp8x25-neon-per-channel.c", + "wrappers/q8dwconv/mp8x27-neon.c", + "wrappers/q8dwconv/up8x9-neon.c", + "wrappers/q8dwconv/up8x9-neon-per-channel.c", + "wrappers/q8gavgpool/mp8x7p7q-neon.c", + "wrappers/q8gavgpool/up8x7-neon.c", + "wrappers/q8gavgpool/up8xm-neon.c", + "wrappers/q8gemm/4x-sumrows-neon.c", + "wrappers/q8gemm/4x8-dq-neon.c", + "wrappers/q8gemm/4x8-neon.c", + "wrappers/q8gemm/4x8c2-xzp-neon.c", + "wrappers/q8gemm/6x4-neon.c", + "wrappers/q8gemm/8x8-neon.c", + "wrappers/q8vadd/neon.c", + "wrappers/requantization/fp32-neon.c", + "wrappers/requantization/gemmlowp-neon.c", + "wrappers/requantization/precise-neon.c", + "wrappers/requantization/q31-neon.c", + "wrappers/sgemm/5x8-neon.c", + "wrappers/sgemm/6x8-neon.c", + "wrappers/u8clamp/neon.c", + "wrappers/u8maxpool/16x9p8q-neon.c", + "wrappers/u8maxpool/sub16-neon.c", + "wrappers/u8rmax/neon.c", + "wrappers/x8zip/x2-neon.c", + "wrappers/x8zip/x3-neon.c", + "wrappers/x8zip/x4-neon.c", + "wrappers/x8zip/xm-neon.c", + ], + headers = subdir_glob([ + ("src", "**/*.c"), + ("src", "qnnpack/*.h"), + ("src", "requantization/*.h"), + ]), + header_namespace = "", + apple_sdks = (IOS, MACOSX, APPLETVOS), + compiler_flags = [ + "-O3", + "-ffast-math", + "-Wno-error=unused-variable", + "-Wno-shadow", + "-DPYTORCH_QNNPACK_RUNTIME_QUANTIZATION", + ], + fbobjc_preprocessor_flags = [ + "-DQNNP_PRIVATE=", + "-DQNNP_INTERNAL=", + ], + force_static = True, + labels = labels, + platform_compiler_flags = [ + ( + "armv7", + [ + "-mfpu=neon", + ], + ), + ( + "^android-armv7$", + [ + "-marm", + "-mfloat-abi=softfp", + ], + ), + ], + visibility = ["PUBLIC"], + deps = [ + ":qnnp_interface", + third_party("cpuinfo"), + third_party("FP16"), + third_party("FXdiv"), + ], + ) + + fb_xplat_cxx_library( + # @autodeps-skip + name = "ukernels_asm", + srcs = [ + # Dummy empty source file to work around link error on x86-64 Android + # when static library contains no symbols. + "wrappers/dummy.c", + # AArch32 ukernels + "wrappers/hgemm/8x8-aarch32-neonfp16arith.S", + "wrappers/q8conv/4x8-aarch32-neon.S", + "wrappers/q8dwconv/up8x9-aarch32-neon.S", + "wrappers/q8dwconv/up8x9-aarch32-neon-per-channel.S", + "wrappers/q8gemm/4x8-aarch32-neon.S", + "wrappers/q8gemm/4x8-dq-aarch32-neon.S", + "wrappers/q8gemm/4x8c2-xzp-aarch32-neon.S", + "wrappers/q8gemm_sparse/4x4-packA-aarch32-neon.S", + "wrappers/q8gemm_sparse/4x8c1x4-dq-packedA-aarch32-neon.S", + "wrappers/q8gemm_sparse/4x8c8x1-dq-packedA-aarch32-neon.S", + "wrappers/q8gemm_sparse/8x4-packA-aarch64-neon.S", + "wrappers/q8gemm_sparse/8x8c1x4-dq-packedA-aarch64-neon.S", + "wrappers/q8gemm_sparse/8x8c8x1-dq-packedA-aarch64-neon.S", + # AArch64 ukernels + "wrappers/q8conv/8x8-aarch64-neon.S", + "wrappers/q8gemm/8x8-aarch64-neon.S", + "wrappers/q8gemm/8x8-dq-aarch64-neon.S", + ], + headers = subdir_glob([ + ("src", "qnnpack/assembly.h"), + ("src", "**/*.S"), + ("src", "requantization/*.h"), + ]), + header_namespace = "", + apple_sdks = (IOS, MACOSX, APPLETVOS), + compiler_flags = [ + "-DPYTORCH_QNNPACK_RUNTIME_QUANTIZATION", + ], + fbobjc_preprocessor_flags = [ + "-DQNNP_PRIVATE=", + "-DQNNP_INTERNAL=", + ], + force_static = True, + labels = labels, + platform_compiler_flags = [ + ( + # iOS assembler doesn't let us specify ISA in the assembly file, + # so this must be set to the highest version of ISA of any of the + # assembly functions + "^iphoneos-armv7$", + [ + "-mfpu=neon-vfpv4", + ], + ), + ( + "osmeta", + [ + "-mfpu=neon-vfpv4", + ], + ), + ], + platform_preprocessor_flags = [ + ( + "android", + [ + # Workaround for osmeta-android, which builds for ELF, but hides it + "-D__ELF__=1", + ], + ), + ( + "tizen", + [ + # Workaround for osmeta-tizen, which builds for ELF, but hides it + "-D__ELF__=1", + ], + ), + ], + visibility = ["PUBLIC"], + ) + + fb_xplat_cxx_library( + # @autodeps-skip + name = "ukernels_psimd", + srcs = [ + "src/requantization/fp32-psimd.c", + "src/requantization/precise-psimd.c", + "src/sgemm/6x8-psimd.c", + ], + headers = subdir_glob([ + ("src", "**/*.c"), + ("src", "qnnpack/*.h"), + ]), + header_namespace = "", + apple_sdks = (IOS, MACOSX, APPLETVOS), + compiler_flags = [ + "-O3", + "-ffast-math", + "-DPYTORCH_QNNPACK_RUNTIME_QUANTIZATION", + ], + fbobjc_preprocessor_flags = [ + "-DQNNP_PRIVATE=", + "-DQNNP_INTERNAL=", + ], + force_static = True, + labels = labels, + platform_compiler_flags = [ + ( + "armv7", + [ + "-mfpu=neon", + ], + ), + ( + "^android-armv7$", + [ + "-marm", + "-mfloat-abi=softfp", + ], + ), + ], + visibility = ["PUBLIC"], + deps = [ + ":qnnp_interface", + third_party("cpuinfo"), + third_party("FP16"), + third_party("FXdiv"), + third_party("psimd"), + ], + ) + + fb_xplat_cxx_test( + # @autodeps-skip + fbandroid_use_instrumentation_test = True, + contacts = ["oncall+ai_infra_mobile_platform@xmail.facebook.com"], + platforms = (CXX, APPLE, ANDROID), + apple_sdks = (IOS, MACOSX), + name = "pytorch_qnnpack_test", + srcs = [ + "test/add.cc", + "test/average-pooling.cc", + "test/channel-shuffle.cc", + "test/clamp.cc", + "test/convolution.cc", + "test/deconvolution.cc", + "test/fully-connected.cc", + "test/fully-connected-sparse.cc", + "test/global-average-pooling.cc", + "test/hardsigmoid.cc", + "test/hardswish.cc", + "test/leaky-relu.cc", + "test/max-pooling.cc", + "test/q8avgpool.cc", + "test/q8conv.cc", + "test/q8dwconv.cc", + "test/q8gavgpool.cc", + "test/q8gemm_sparse.cc", + "test/q8vadd.cc", + "test/requantization.cc", + "test/sgemm.cc", + "test/sigmoid.cc", + "test/softargmax.cc", + "test/tanh.cc", + "test/u8clamp.cc", + "test/u8lut32norm.cc", + "test/u8maxpool.cc", + "test/u8rmax.cc", + "test/x8lut.cc", + "test/x8zip.cc", + ], + headers = { + "add-operator-tester.h": "test/add-operator-tester.h", + "average-pooling-operator-tester.h": "test/average-pooling-operator-tester.h", + "avgpool-microkernel-tester.h": "test/avgpool-microkernel-tester.h", + "channel-shuffle-operator-tester.h": "test/channel-shuffle-operator-tester.h", + "clamp-microkernel-tester.h": "test/clamp-microkernel-tester.h", + "clamp-operator-tester.h": "test/clamp-operator-tester.h", + "convolution-operator-tester.h": "test/convolution-operator-tester.h", + "deconvolution-operator-tester.h": "test/deconvolution-operator-tester.h", + "dwconv-microkernel-tester.h": "test/dwconv-microkernel-tester.h", + "fully-connected-operator-tester.h": "test/fully-connected-operator-tester.h", + "fully-connected-sparse-operator-tester.h": "test/fully-connected-sparse-operator-tester.h", + "gavgpool-microkernel-tester.h": "test/gavgpool-microkernel-tester.h", + "gemm-block-sparse-microkernel-tester.h": "test/gemm-block-sparse-microkernel-tester.h", + "gemm-microkernel-tester.h": "test/gemm-microkernel-tester.h", + "global-average-pooling-operator-tester.h": "test/global-average-pooling-operator-tester.h", + "hardsigmoid-operator-tester.h": "test/hardsigmoid-operator-tester.h", + "hardswish-operator-tester.h": "test/hardswish-operator-tester.h", + "leaky-relu-operator-tester.h": "test/leaky-relu-operator-tester.h", + "lut-microkernel-tester.h": "test/lut-microkernel-tester.h", + "lut-norm-microkernel-tester.h": "test/lut-norm-microkernel-tester.h", + "max-pooling-operator-tester.h": "test/max-pooling-operator-tester.h", + "maxpool-microkernel-tester.h": "test/maxpool-microkernel-tester.h", + "requantization-tester.h": "test/requantization-tester.h", + "rmax-microkernel-tester.h": "test/rmax-microkernel-tester.h", + "sigmoid-operator-tester.h": "test/sigmoid-operator-tester.h", + "softargmax-operator-tester.h": "test/softargmax-operator-tester.h", + "tanh-operator-tester.h": "test/tanh-operator-tester.h", + "test_utils.h": "test/test_utils.h", + "vadd-microkernel-tester.h": "test/vadd-microkernel-tester.h", + "zip-microkernel-tester.h": "test/zip-microkernel-tester.h", + }, + header_namespace = "", + compiler_flags = [ + "-fexceptions", + "-DPYTORCH_QNNPACK_RUNTIME_QUANTIZATION", + ], + platform_linker_flags = [ + ( + "^linux.*$", + [ + "-Wl,--no-as-needed", + "-ldl", + "-pthread", + ], + ), + ], + env = { + # These tests fail in sandcastle since they leak memory. Disable LeakSanitizer. + "ASAN_OPTIONS": "detect_leaks=0", + }, + deps = [ + ":pytorch_qnnpack", + third_party("cpuinfo"), + third_party("FP16"), + third_party("pthreadpool"), + ], + ) diff --git a/buckbuild.bzl b/buckbuild.bzl index d302ca42356c..42abc497af77 100644 --- a/buckbuild.bzl +++ b/buckbuild.bzl @@ -131,9 +131,11 @@ THIRD_PARTY_LIBS = { "flatc": ["//third-party/flatbuffers:flatc", "//third_party:flatc"], "fmt": ["//third-party/fmt:fmt", "//third_party:fmt"], "glog": ["//third-party/glog:glog", "//third_party:glog"], + "gmock": ["//xplat/third-party/gmock:gtest", "//third_party:gmock"], + "gtest": ["//xplat/third-party/gmock:gmock", "//third_party:gtest"], "kineto": ["//xplat/kineto/libkineto:libkineto", "//third_party:libkineto"], "omp": ["//xplat/third-party/linker_lib:omp", "//third_party:no-op"], - "psimd": ["//third-party/psimd:psimd", "//third_party:psimd"], + "psimd": ["//xplat/third-party/psimd:psimd", "//third_party:psimd"], "pthreadpool": ["//xplat/third-party/pthreadpool:pthreadpool", "//third_party:pthreadpool"], "pthreadpool_header": ["//xplat/third-party/pthreadpool:pthreadpool_header", "//third_party:pthreadpool_header"], "pyyaml": ["//third-party/pyyaml:pyyaml", "//third_party:pyyaml"], diff --git a/third_party/BUCK.oss b/third_party/BUCK.oss index ec1c20e0b3a1..f16c5a9335d3 100644 --- a/third_party/BUCK.oss +++ b/third_party/BUCK.oss @@ -330,3 +330,73 @@ cxx_binary( visibility = ["PUBLIC"], deps = [":flatc_library"], ) + +cxx_library( + name = "gtest_headers", + exported_preprocessor_flags = [ + "-DGTEST_USE_OWN_TR1_TUPLE=0", + "-DGTEST_HAS_TR1_TUPLE=0", + "-D_CRT_DECLARE_NONSTDC_NAMES", + "-D_CRT_NONSTDC_NO_WARNINGS", + "-D_CRT_NONSTDC_NO_DEPRECATE", + ], + include_directories = [ + "googletest/googletest", + ], + public_system_include_directories = [ + "googletest/googletest/include", + ], + raw_headers = glob([ + "googletest/googletest/src/**/*.h", + "googletest/googletest/include/**/*.h", + ]), + visibility = [ + "PUBLIC", + ], +) + +cxx_library( + name = "gtest", + srcs = [ + "googletest/googletest/src/gtest-all.cc", + "googletest/googletest/src/gtest_main.cc", + ], + include_directories = [ + "googletest/googletest", + ], + raw_headers = glob([ + "googletest/googletest/src/**/*.cc", + "googletest/googletest/src/**/*.h", + ]), + visibility = [ + "PUBLIC", + ], + xcode_public_headers_symlinks = True, + exported_deps = [ + ":gtest_headers", + ], +) + +cxx_library( + name = "gmock", + srcs = [ + "googletest/googlemock/src/gmock-all.cc", + ], + include_directories = [ + "googletest/googlemock", + ], + public_system_include_directories = [ + "googletest/googlemock/include", + ], + raw_headers = glob([ + "googletest/googlemock/include/**/*.h", + "googletest/googlemock/src/**/*.cc", + ]), + visibility = ["PUBLIC"], + deps = [ + ":gtest", + ], + exported_deps = [ + ":gtest_headers", + ], +) diff --git a/tools/build_defs/buck_helpers.bzl b/tools/build_defs/buck_helpers.bzl index 521661239802..a084e01eff93 100644 --- a/tools/build_defs/buck_helpers.bzl +++ b/tools/build_defs/buck_helpers.bzl @@ -11,6 +11,7 @@ IGNORED_ATTRIBUTE_PREFIX = [ IGNORED_ATTRIBUTES = [ "feature", "platforms", + "contacts", ] def filter_attributes(kwgs): diff --git a/tools/build_defs/fb_xplat_cxx_test.bzl b/tools/build_defs/fb_xplat_cxx_test.bzl new file mode 100644 index 000000000000..c06176630e08 --- /dev/null +++ b/tools/build_defs/fb_xplat_cxx_test.bzl @@ -0,0 +1,18 @@ +# Only used for PyTorch open source BUCK build +# @lint-ignore-every BUCKRESTRICTEDSYNTAX +load(":buck_helpers.bzl", "filter_attributes") + +def fb_xplat_cxx_test( + name, + deps = [], + **kwgs): + if read_config("pt", "is_oss", "0") == "0": + fail("This file is for open source pytorch build. Do not use it in fbsource!") + + cxx_test( + name = name, + deps = deps + [ + "//third_party:gtest", + ], + **filter_attributes(kwgs) + )