Move qnnpack to shared BUCK build (#80260)

Differential Revision: D37434340

Pull Request resolved: https://github.com/pytorch/pytorch/pull/80260
Approved by: https://github.com/larryliu0820, https://github.com/malfet
This commit is contained in:
Linbin Yu
2022-06-29 22:40:36 +00:00
committed by PyTorch MergeBot
parent c1fa9fdff9
commit edf76cd9c2
7 changed files with 746 additions and 143 deletions

View File

@ -74,6 +74,10 @@ jobs:
run: |
buck build aten/src/ATen/native/quantized/cpu/qnnpack:pytorch_qnnpack
- name: Test QNNPACK
run: |
buck test aten/src/ATen/native/quantized/cpu/qnnpack:pytorch_qnnpack_test
- name: Build aten_cpu
run: |
buck build :aten_cpu

View File

@ -1,143 +1,4 @@
load("//tools/build_defs:glob_defs.bzl", "subdir_glob")
load("//:buckbuild.bzl", "third_party")
load(":buckbuild.bzl", "define_qnnpack")
cxx_library(
name = "pytorch_qnnpack",
srcs = ['src/add.c', 'src/average-pooling.c', 'src/channel-shuffle.c', 'src/clamp.c', 'src/conv-prepack.cc', 'src/conv-run.cc', 'src/convolution.c', 'src/deconv-run.cc', 'src/deconvolution.c', 'src/fc-dynamic-run.cc', 'src/fc-prepack.cc', 'src/fc-run.cc', 'src/fully-connected.c', 'src/fully-connected-sparse.c', 'src/global-average-pooling.c', 'src/hardsigmoid.c', 'src/hardswish.c', 'src/indirection.c', 'src/init.c', 'src/leaky-relu.c', 'src/max-pooling.c', 'src/operator-delete.c', 'src/operator-run.c', 'src/pack_block_sparse.cc', 'src/sigmoid.c', 'src/softargmax.c', 'src/tanh.c'],
deps = [':qnnp_interface', ':ukernels_asm', ':ukernels_neon', ':ukernels_psimd', ':ukernels_scalar', ':ukernels_sse2', ':ukernels_sse41', ':ukernels_ssse3', '//third_party:cpuinfo', '//third_party:FP16', '//third_party:FXdiv'],
exported_deps = ['//third_party:cpuinfo'],
compiler_flags = ['-O2', '-DPYTORCH_QNNPACK_RUNTIME_QUANTIZATION', '-Wno-deprecated-declarations'],
preferred_linkage = "static",
exported_headers = subdir_glob([("src", "qnnpack/*.h"),("include", "*.h"),]),
header_namespace = "",
headers = subdir_glob([("src", "**/*.c"), ("src", "q8gemm_sparse/*.h"), ("src", "qnnpack/*.h"), ("src", "requantization/*.h")]),
link_whole = False,
platform_compiler_flags = [['armv7', ['-mfpu=neon']], ['^android-armv7$', ['-marm', '-mfloat-abi=softfp']]],
platform_preprocessor_flags = [['windows', ['-D_WINDOWS', '-D_WIN32', '-DWIN32', '-DNOMINMAX', '-D_CRT_SECURE_NO_WARNINGS', '-D_USE_MATH_DEFINES']], ['windows.*64$', ['-D_WIN64']]],
visibility = ['PUBLIC'],
)
cxx_library(
name = "ukernels_ssse3",
srcs = ['wrappers/requantization/gemmlowp-ssse3.c', 'wrappers/requantization/precise-ssse3.c', 'wrappers/requantization/q31-ssse3.c'],
deps = [':qnnp_interface', '//third_party:cpuinfo', '//third_party:FP16', '//third_party:FXdiv'],
exported_deps = [],
compiler_flags = ['-O3', '-ffast-math', '-Wno-error=unused-variable', '-Wno-shadow', '-DPYTORCH_QNNPACK_RUNTIME_QUANTIZATION'],
preferred_linkage = "static",
header_namespace = "",
headers = subdir_glob([("src", "**/*.c"), ("src", "q8gemm_sparse/*.h"), ("src", "qnnpack/*.h"), ("src", "requantization/*.h")]),
link_whole = False,
platform_compiler_flags = [['86', ['-mssse3', '-mno-sse4']], ['osmeta', ['-mosmeta-no-restrict-sse']]],
platform_preprocessor_flags = [['windows', ['-D_WINDOWS', '-D_WIN32', '-DWIN32', '-DNOMINMAX', '-D_CRT_SECURE_NO_WARNINGS', '-D_USE_MATH_DEFINES']], ['windows.*64$', ['-D_WIN64']]],
visibility = ['PUBLIC'],
)
cxx_library(
name = "ukernels_psimd",
srcs = ['src/requantization/fp32-psimd.c', 'src/requantization/precise-psimd.c', 'src/sgemm/6x8-psimd.c'],
deps = [':qnnp_interface', '//third_party:cpuinfo', '//third_party:FP16', '//third_party:FXdiv', '//third_party:psimd'],
exported_deps = [],
compiler_flags = ['-O3', '-ffast-math', '-DPYTORCH_QNNPACK_RUNTIME_QUANTIZATION'],
preferred_linkage = "static",
header_namespace = "",
headers = subdir_glob([("src", "**/*.c"), ("src", "q8gemm_sparse/*.h"), ("src", "qnnpack/*.h"), ("src", "requantization/*.h")]),
link_whole = False,
platform_compiler_flags = [['armv7', ['-mfpu=neon']], ['^android-armv7$', ['-marm', '-mfloat-abi=softfp']]],
platform_preprocessor_flags = [['windows', ['-D_WINDOWS', '-D_WIN32', '-DWIN32', '-DNOMINMAX', '-D_CRT_SECURE_NO_WARNINGS', '-D_USE_MATH_DEFINES']], ['windows.*64$', ['-D_WIN64']]],
visibility = ['PUBLIC'],
)
cxx_library(
name = "ukernels_scalar",
srcs = ['src/requantization/fp32-scalar.c', 'src/requantization/gemmlowp-scalar.c', 'src/requantization/precise-scalar.c', 'src/requantization/q31-scalar.c', 'src/u8lut32norm/scalar.c', 'src/x8lut/scalar.c'],
deps = [':qnnp_interface', '//third_party:cpuinfo', '//third_party:FP16', '//third_party:FXdiv'],
exported_deps = [],
compiler_flags = ['-O2', '-DPYTORCH_QNNPACK_RUNTIME_QUANTIZATION'],
preferred_linkage = "static",
header_namespace = "",
headers = subdir_glob([("src", "**/*.c"), ("src", "q8gemm_sparse/*.h"), ("src", "qnnpack/*.h"), ("src", "requantization/*.h")]),
link_whole = False,
platform_preprocessor_flags = [['windows', ['-D_WINDOWS', '-D_WIN32', '-DWIN32', '-DNOMINMAX', '-D_CRT_SECURE_NO_WARNINGS', '-D_USE_MATH_DEFINES']], ['windows.*64$', ['-D_WIN64']]],
visibility = ['PUBLIC'],
)
cxx_library(
name = "ukernels_asm",
srcs = ['wrappers/dummy.c', 'wrappers/hgemm/8x8-aarch32-neonfp16arith.S', 'wrappers/q8conv/4x8-aarch32-neon.S', 'wrappers/q8dwconv/up8x9-aarch32-neon.S', 'wrappers/q8dwconv/up8x9-aarch32-neon-per-channel.S', 'wrappers/q8gemm/4x8-aarch32-neon.S', 'wrappers/q8gemm/4x8-dq-aarch32-neon.S', 'wrappers/q8gemm/4x8c2-xzp-aarch32-neon.S', 'wrappers/q8gemm_sparse/4x4-packA-aarch32-neon.S', 'wrappers/q8gemm_sparse/4x8c1x4-dq-packedA-aarch32-neon.S', 'wrappers/q8gemm_sparse/4x8c8x1-dq-packedA-aarch32-neon.S', 'wrappers/q8gemm_sparse/8x4-packA-aarch64-neon.S', 'wrappers/q8gemm_sparse/8x8c1x4-dq-packedA-aarch64-neon.S', 'wrappers/q8gemm_sparse/8x8c8x1-dq-packedA-aarch64-neon.S', 'wrappers/q8conv/8x8-aarch64-neon.S', 'wrappers/q8gemm/8x8-aarch64-neon.S', 'wrappers/q8gemm/8x8-dq-aarch64-neon.S'],
deps = [],
exported_deps = [],
compiler_flags = ['-DPYTORCH_QNNPACK_RUNTIME_QUANTIZATION'],
preferred_linkage = "static",
header_namespace = "",
headers = subdir_glob([("src", "**/*.c"), ("src", "q8gemm_sparse/*.h"), ("src", "qnnpack/*.h"), ("src", "requantization/*.h")]),
link_whole = False,
platform_compiler_flags = [['^iphoneos-armv7$', ['-mfpu=neon-vfpv4']], ['osmeta', ['-mfpu=neon-vfpv4']]],
platform_preprocessor_flags = [['android', ['-D__ELF__=1']], ['tizen', ['-D__ELF__=1']], ['windows', ['-D_WINDOWS', '-D_WIN32', '-DWIN32', '-DNOMINMAX', '-D_CRT_SECURE_NO_WARNINGS', '-D_USE_MATH_DEFINES']], ['windows.*64$', ['-D_WIN64']]],
visibility = ['PUBLIC'],
)
cxx_library(
name = "ukernels_sse41",
srcs = ['wrappers/requantization/gemmlowp-sse4.c', 'wrappers/requantization/precise-sse4.c', 'wrappers/requantization/q31-sse4.c'],
deps = [':qnnp_interface', '//third_party:cpuinfo', '//third_party:FP16', '//third_party:FXdiv'],
exported_deps = [],
compiler_flags = ['-O3', '-ffast-math', '-Wno-error=unused-variable', '-Wno-shadow', '-DPYTORCH_QNNPACK_RUNTIME_QUANTIZATION'],
preferred_linkage = "static",
header_namespace = "",
headers = subdir_glob([("src", "**/*.c"), ("src", "q8gemm_sparse/*.h"), ("src", "qnnpack/*.h"), ("src", "requantization/*.h")]),
link_whole = False,
platform_compiler_flags = [['86', ['-msse4.1', '-mno-sse4.2']], ['osmeta', ['-mosmeta-no-restrict-sse']]],
platform_preprocessor_flags = [['windows', ['-D_WINDOWS', '-D_WIN32', '-DWIN32', '-DNOMINMAX', '-D_CRT_SECURE_NO_WARNINGS', '-D_USE_MATH_DEFINES']], ['windows.*64$', ['-D_WIN64']]],
visibility = ['PUBLIC'],
)
cxx_library(
name = "ukernels_neon",
srcs = ['wrappers/q8avgpool/mp8x9p8q-neon.c', 'wrappers/q8avgpool/up8x9-neon.c', 'wrappers/q8avgpool/up8xm-neon.c', 'wrappers/q8conv/4x8-neon.c', 'wrappers/q8conv/8x8-neon.c', 'wrappers/q8dwconv/mp8x25-neon.c', 'wrappers/q8dwconv/mp8x25-neon-per-channel.c', 'wrappers/q8dwconv/mp8x27-neon.c', 'wrappers/q8dwconv/up8x9-neon.c', 'wrappers/q8dwconv/up8x9-neon-per-channel.c', 'wrappers/q8gavgpool/mp8x7p7q-neon.c', 'wrappers/q8gavgpool/up8x7-neon.c', 'wrappers/q8gavgpool/up8xm-neon.c', 'wrappers/q8gemm/4x-sumrows-neon.c', 'wrappers/q8gemm/4x8-dq-neon.c', 'wrappers/q8gemm/4x8-neon.c', 'wrappers/q8gemm/4x8c2-xzp-neon.c', 'wrappers/q8gemm/6x4-neon.c', 'wrappers/q8gemm/8x8-neon.c', 'wrappers/q8vadd/neon.c', 'wrappers/requantization/fp32-neon.c', 'wrappers/requantization/gemmlowp-neon.c', 'wrappers/requantization/precise-neon.c', 'wrappers/requantization/q31-neon.c', 'wrappers/sgemm/5x8-neon.c', 'wrappers/sgemm/6x8-neon.c', 'wrappers/u8clamp/neon.c', 'wrappers/u8maxpool/16x9p8q-neon.c', 'wrappers/u8maxpool/sub16-neon.c', 'wrappers/u8rmax/neon.c', 'wrappers/x8zip/x2-neon.c', 'wrappers/x8zip/x3-neon.c', 'wrappers/x8zip/x4-neon.c', 'wrappers/x8zip/xm-neon.c'],
deps = [':qnnp_interface', '//third_party:cpuinfo', '//third_party:FP16', '//third_party:FXdiv'],
exported_deps = [],
compiler_flags = ['-O3', '-ffast-math', '-Wno-error=unused-variable', '-Wno-shadow', '-DPYTORCH_QNNPACK_RUNTIME_QUANTIZATION'],
preferred_linkage = "static",
header_namespace = "",
headers = subdir_glob([("src", "**/*.c"), ("src", "q8gemm_sparse/*.h"), ("src", "qnnpack/*.h"), ("src", "requantization/*.h")]),
link_whole = False,
platform_compiler_flags = [['armv7', ['-mfpu=neon']], ['^android-armv7$', ['-marm', '-mfloat-abi=softfp']]],
platform_preprocessor_flags = [['windows', ['-D_WINDOWS', '-D_WIN32', '-DWIN32', '-DNOMINMAX', '-D_CRT_SECURE_NO_WARNINGS', '-D_USE_MATH_DEFINES']], ['windows.*64$', ['-D_WIN64']]],
visibility = ['PUBLIC'],
)
cxx_library(
name = "ukernels_sse2",
srcs = ['wrappers/q8avgpool/mp8x9p8q-sse2.c', 'wrappers/q8avgpool/up8x9-sse2.c', 'wrappers/q8avgpool/up8xm-sse2.c', 'wrappers/q8conv/4x4c2-sse2.c', 'wrappers/q8dwconv/mp8x25-sse2.c', 'wrappers/q8dwconv/mp8x25-sse2-per-channel.c', 'wrappers/q8dwconv/mp8x27-sse2.c', 'wrappers/q8dwconv/up8x9-sse2.c', 'wrappers/q8dwconv/up8x9-sse2-per-channel.c', 'wrappers/q8gavgpool/mp8x7p7q-sse2.c', 'wrappers/q8gavgpool/up8x7-sse2.c', 'wrappers/q8gavgpool/up8xm-sse2.c', 'wrappers/q8gemm/2x4c8-sse2.c', 'wrappers/q8gemm/4x4c2-dq-sse2.c', 'wrappers/q8gemm/4x4c2-sse2.c', 'wrappers/q8gemm_sparse/8x4c1x4-packed-sse2.c', 'wrappers/q8vadd/sse2.c', 'wrappers/requantization/fp32-sse2.c', 'wrappers/requantization/gemmlowp-sse2.c', 'wrappers/requantization/precise-sse2.c', 'wrappers/requantization/q31-sse2.c', 'wrappers/u8clamp/sse2.c', 'wrappers/u8maxpool/16x9p8q-sse2.c', 'wrappers/u8maxpool/sub16-sse2.c', 'wrappers/u8rmax/sse2.c', 'wrappers/x8zip/x2-sse2.c', 'wrappers/x8zip/x3-sse2.c', 'wrappers/x8zip/x4-sse2.c', 'wrappers/x8zip/xm-sse2.c'],
deps = [':qnnp_interface', '//third_party:cpuinfo', '//third_party:FP16', '//third_party:FXdiv'],
exported_deps = [],
compiler_flags = ['-O3', '-ffast-math', '-Wno-error=unused-variable', '-Wno-shadow', '-DPYTORCH_QNNPACK_RUNTIME_QUANTIZATION'],
preferred_linkage = "static",
header_namespace = "",
headers = subdir_glob([("src", "**/*.c"), ("src", "q8gemm_sparse/*.h"), ("src", "qnnpack/*.h"), ("src", "requantization/*.h")]),
link_whole = False,
platform_compiler_flags = [['86', ['-msse2', '-mno-sse3']]],
platform_preprocessor_flags = [['windows', ['-D_WINDOWS', '-D_WIN32', '-DWIN32', '-DNOMINMAX', '-D_CRT_SECURE_NO_WARNINGS', '-D_USE_MATH_DEFINES']], ['windows.*64$', ['-D_WIN64']]],
visibility = ['PUBLIC'],
)
cxx_library(
name = "qnnp_interface",
srcs = [],
deps = ['//third_party:pthreadpool_header'],
exported_deps = [],
compiler_flags = ['-DPYTORCH_QNNPACK_RUNTIME_QUANTIZATION'],
preferred_linkage = "static",
header_namespace = "",
headers = subdir_glob([("src", "**/*.c"), ("src", "q8gemm_sparse/*.h"), ("src", "qnnpack/*.h"), ("src", "requantization/*.h")]),
link_whole = False,
platform_preprocessor_flags = [['windows', ['-D_WINDOWS', '-D_WIN32', '-DWIN32', '-DNOMINMAX', '-D_CRT_SECURE_NO_WARNINGS', '-D_USE_MATH_DEFINES']], ['windows.*64$', ['-D_WIN64']]],
visibility = ['PUBLIC'],
)
define_qnnpack(third_party)

View File

@ -0,0 +1,647 @@
load("//tools/build_defs:fb_xplat_cxx_library.bzl", "fb_xplat_cxx_library")
load("//tools/build_defs:fb_xplat_cxx_test.bzl", "fb_xplat_cxx_test")
load("//tools/build_defs:glob_defs.bzl", "subdir_glob")
load("//tools/build_defs:platform_defs.bzl", "ANDROID", "APPLE", "APPLETVOS", "CXX", "IOS", "MACOSX")
# Shared by internal and OSS BUCK
def define_qnnpack(third_party, labels = []):
fb_xplat_cxx_library(
# @autodeps-skip
name = "ukernels_scalar",
srcs = [
"src/requantization/fp32-scalar.c",
"src/requantization/gemmlowp-scalar.c",
"src/requantization/precise-scalar.c",
"src/requantization/q31-scalar.c",
"src/u8lut32norm/scalar.c",
"src/x8lut/scalar.c",
],
headers = subdir_glob([
("src", "qnnpack/*.h"),
("src", "requantization/*.h"),
]),
header_namespace = "",
apple_sdks = (IOS, MACOSX, APPLETVOS),
compiler_flags = [
"-O2",
"-DPYTORCH_QNNPACK_RUNTIME_QUANTIZATION",
],
fbobjc_preprocessor_flags = [
"-DQNNP_PRIVATE=",
"-DQNNP_INTERNAL=",
],
force_static = True,
labels = labels,
visibility = ["PUBLIC"],
deps = [
":qnnp_interface",
third_party("cpuinfo"),
third_party("FP16"),
third_party("FXdiv"),
],
)
fb_xplat_cxx_library(
# @autodeps-skip
name = "ukernels_sse2",
srcs = [
"wrappers/q8avgpool/mp8x9p8q-sse2.c",
"wrappers/q8avgpool/up8x9-sse2.c",
"wrappers/q8avgpool/up8xm-sse2.c",
"wrappers/q8conv/4x4c2-sse2.c",
"wrappers/q8dwconv/mp8x25-sse2.c",
"wrappers/q8dwconv/mp8x25-sse2-per-channel.c",
"wrappers/q8dwconv/mp8x27-sse2.c",
"wrappers/q8dwconv/up8x9-sse2.c",
"wrappers/q8dwconv/up8x9-sse2-per-channel.c",
"wrappers/q8gavgpool/mp8x7p7q-sse2.c",
"wrappers/q8gavgpool/up8x7-sse2.c",
"wrappers/q8gavgpool/up8xm-sse2.c",
"wrappers/q8gemm/2x4c8-sse2.c",
"wrappers/q8gemm/4x4c2-dq-sse2.c",
"wrappers/q8gemm/4x4c2-sse2.c",
"wrappers/q8gemm_sparse/8x4c1x4-packed-sse2.c",
"wrappers/q8vadd/sse2.c",
"wrappers/requantization/fp32-sse2.c",
"wrappers/requantization/gemmlowp-sse2.c",
"wrappers/requantization/precise-sse2.c",
"wrappers/requantization/q31-sse2.c",
"wrappers/u8clamp/sse2.c",
"wrappers/u8maxpool/16x9p8q-sse2.c",
"wrappers/u8maxpool/sub16-sse2.c",
"wrappers/u8rmax/sse2.c",
"wrappers/x8zip/x2-sse2.c",
"wrappers/x8zip/x3-sse2.c",
"wrappers/x8zip/x4-sse2.c",
"wrappers/x8zip/xm-sse2.c",
],
headers = subdir_glob([
("src", "**/*.c"),
("src", "q8gemm_sparse/*.h"),
("src", "qnnpack/*.h"),
("src", "requantization/*.h"),
]),
header_namespace = "",
apple_sdks = (IOS, MACOSX, APPLETVOS),
compiler_flags = [
"-O3",
"-ffast-math",
"-Wno-error=unused-variable",
"-Wno-shadow",
"-DPYTORCH_QNNPACK_RUNTIME_QUANTIZATION",
],
fbobjc_preprocessor_flags = [
"-DQNNP_PRIVATE=",
"-DQNNP_INTERNAL=",
],
force_static = True,
labels = labels,
platform_compiler_flags = [
(
"86",
[
"-msse2",
"-mno-sse3",
],
),
],
visibility = ["PUBLIC"],
deps = [
":qnnp_interface",
third_party("cpuinfo"),
third_party("FP16"),
third_party("FXdiv"),
],
)
fb_xplat_cxx_library(
# @autodeps-skip
name = "ukernels_ssse3",
srcs = [
"wrappers/requantization/gemmlowp-ssse3.c",
"wrappers/requantization/precise-ssse3.c",
"wrappers/requantization/q31-ssse3.c",
],
headers = subdir_glob([
("src", "**/*.c"),
("src", "qnnpack/*.h"),
("src", "requantization/*.h"),
]),
header_namespace = "",
apple_sdks = (IOS, MACOSX, APPLETVOS),
compiler_flags = [
"-O3",
"-ffast-math",
"-Wno-error=unused-variable",
"-Wno-shadow",
"-DPYTORCH_QNNPACK_RUNTIME_QUANTIZATION",
],
fbobjc_preprocessor_flags = [
"-DQNNP_PRIVATE=",
"-DQNNP_INTERNAL=",
],
force_static = True,
labels = labels,
platform_compiler_flags = [
(
"86",
[
"-mssse3",
"-mno-sse4",
],
),
(
# By default, osmeta compiler silently ignores -msseXX flags.
# This flag disables this behavior.
"osmeta",
[
"-mosmeta-no-restrict-sse",
],
),
],
visibility = ["PUBLIC"],
deps = [
":qnnp_interface",
third_party("cpuinfo"),
third_party("FP16"),
third_party("FXdiv"),
],
)
fb_xplat_cxx_library(
# @autodeps-skip
name = "ukernels_sse41",
srcs = [
"wrappers/requantization/gemmlowp-sse4.c",
"wrappers/requantization/precise-sse4.c",
"wrappers/requantization/q31-sse4.c",
],
headers = subdir_glob([
("src", "**/*.c"),
("src", "qnnpack/*.h"),
("src", "requantization/*.h"),
]),
header_namespace = "",
apple_sdks = (IOS, MACOSX, APPLETVOS),
compiler_flags = [
"-O3",
"-ffast-math",
"-Wno-error=unused-variable",
"-Wno-shadow",
"-DPYTORCH_QNNPACK_RUNTIME_QUANTIZATION",
],
fbobjc_preprocessor_flags = [
"-DQNNP_PRIVATE=",
"-DQNNP_INTERNAL=",
],
force_static = True,
labels = labels,
platform_compiler_flags = [
(
"86",
[
"-msse4.1",
"-mno-sse4.2",
],
),
(
# By default, osmeta compiler silently ignores -msseXX flags.
# This flag disables this behavior.
"osmeta",
[
"-mosmeta-no-restrict-sse",
],
),
],
visibility = ["PUBLIC"],
deps = [
":qnnp_interface",
third_party("cpuinfo"),
third_party("FP16"),
third_party("FXdiv"),
],
)
fb_xplat_cxx_library(
# @autodeps-skip
name = "qnnp_interface",
headers = subdir_glob(
[
("include", "*.h"),
("src", "qnnpack/*.h"),
("src", "requantization/*.h"),
],
),
header_namespace = "",
apple_sdks = (IOS, MACOSX, APPLETVOS),
compiler_flags = [
"-DPYTORCH_QNNPACK_RUNTIME_QUANTIZATION",
],
force_static = True,
labels = labels,
visibility = ["PUBLIC"],
deps = [
third_party("pthreadpool_header"),
],
)
fb_xplat_cxx_library(
# @autodeps-skip
name = "pytorch_qnnpack",
srcs = [
"src/add.c",
"src/average-pooling.c",
"src/channel-shuffle.c",
"src/clamp.c",
"src/conv-prepack.cc",
"src/conv-run.cc",
"src/convolution.c",
"src/deconv-run.cc",
"src/deconvolution.c",
"src/fc-dynamic-run.cc",
"src/fc-prepack.cc",
"src/fc-run.cc",
"src/fully-connected.c",
"src/fully-connected-sparse.c",
"src/global-average-pooling.c",
"src/hardsigmoid.c",
"src/hardswish.c",
"src/indirection.c",
"src/init.c",
"src/leaky-relu.c",
"src/max-pooling.c",
"src/operator-delete.c",
"src/operator-run.c",
"src/pack_block_sparse.cc",
"src/sigmoid.c",
"src/softargmax.c",
"src/tanh.c",
],
headers = subdir_glob([
("src", "**/*.c"),
("src", "**/*.h"),
("src", "qnnpack/*.h"),
("include", "**/*.h"),
]),
header_namespace = "",
exported_headers = subdir_glob([
("src", "qnnpack/*.h"),
("include", "*.h"),
]),
apple_sdks = (IOS, MACOSX, APPLETVOS),
compiler_flags = [
"-O2",
"-DPYTORCH_QNNPACK_RUNTIME_QUANTIZATION",
],
fbobjc_preprocessor_flags = [
"-DQNNP_PRIVATE=",
"-DQNNP_INTERNAL=",
],
force_static = True,
labels = [
"supermodule:android/default/pytorch",
"supermodule:ios/default/public.pytorch",
],
platform_compiler_flags = [
(
"armv7",
[
"-mfpu=neon",
],
),
(
"^android-armv7$",
[
"-marm",
"-mfloat-abi=softfp",
],
),
],
visibility = ["PUBLIC"],
deps = [
":qnnp_interface",
":ukernels_asm",
":ukernels_neon",
":ukernels_psimd",
":ukernels_scalar",
":ukernels_sse2",
":ukernels_sse41",
":ukernels_ssse3",
third_party("cpuinfo"),
third_party("FP16"),
third_party("FXdiv"),
],
exported_deps = [
third_party("cpuinfo"),
],
)
# Only ukernels implemented in C with ARM NEON intrinsics
fb_xplat_cxx_library(
# @autodeps-skip
name = "ukernels_neon",
srcs = [
"wrappers/q8avgpool/mp8x9p8q-neon.c",
"wrappers/q8avgpool/up8x9-neon.c",
"wrappers/q8avgpool/up8xm-neon.c",
"wrappers/q8conv/4x8-neon.c",
"wrappers/q8conv/8x8-neon.c",
"wrappers/q8dwconv/mp8x25-neon.c",
"wrappers/q8dwconv/mp8x25-neon-per-channel.c",
"wrappers/q8dwconv/mp8x27-neon.c",
"wrappers/q8dwconv/up8x9-neon.c",
"wrappers/q8dwconv/up8x9-neon-per-channel.c",
"wrappers/q8gavgpool/mp8x7p7q-neon.c",
"wrappers/q8gavgpool/up8x7-neon.c",
"wrappers/q8gavgpool/up8xm-neon.c",
"wrappers/q8gemm/4x-sumrows-neon.c",
"wrappers/q8gemm/4x8-dq-neon.c",
"wrappers/q8gemm/4x8-neon.c",
"wrappers/q8gemm/4x8c2-xzp-neon.c",
"wrappers/q8gemm/6x4-neon.c",
"wrappers/q8gemm/8x8-neon.c",
"wrappers/q8vadd/neon.c",
"wrappers/requantization/fp32-neon.c",
"wrappers/requantization/gemmlowp-neon.c",
"wrappers/requantization/precise-neon.c",
"wrappers/requantization/q31-neon.c",
"wrappers/sgemm/5x8-neon.c",
"wrappers/sgemm/6x8-neon.c",
"wrappers/u8clamp/neon.c",
"wrappers/u8maxpool/16x9p8q-neon.c",
"wrappers/u8maxpool/sub16-neon.c",
"wrappers/u8rmax/neon.c",
"wrappers/x8zip/x2-neon.c",
"wrappers/x8zip/x3-neon.c",
"wrappers/x8zip/x4-neon.c",
"wrappers/x8zip/xm-neon.c",
],
headers = subdir_glob([
("src", "**/*.c"),
("src", "qnnpack/*.h"),
("src", "requantization/*.h"),
]),
header_namespace = "",
apple_sdks = (IOS, MACOSX, APPLETVOS),
compiler_flags = [
"-O3",
"-ffast-math",
"-Wno-error=unused-variable",
"-Wno-shadow",
"-DPYTORCH_QNNPACK_RUNTIME_QUANTIZATION",
],
fbobjc_preprocessor_flags = [
"-DQNNP_PRIVATE=",
"-DQNNP_INTERNAL=",
],
force_static = True,
labels = labels,
platform_compiler_flags = [
(
"armv7",
[
"-mfpu=neon",
],
),
(
"^android-armv7$",
[
"-marm",
"-mfloat-abi=softfp",
],
),
],
visibility = ["PUBLIC"],
deps = [
":qnnp_interface",
third_party("cpuinfo"),
third_party("FP16"),
third_party("FXdiv"),
],
)
fb_xplat_cxx_library(
# @autodeps-skip
name = "ukernels_asm",
srcs = [
# Dummy empty source file to work around link error on x86-64 Android
# when static library contains no symbols.
"wrappers/dummy.c",
# AArch32 ukernels
"wrappers/hgemm/8x8-aarch32-neonfp16arith.S",
"wrappers/q8conv/4x8-aarch32-neon.S",
"wrappers/q8dwconv/up8x9-aarch32-neon.S",
"wrappers/q8dwconv/up8x9-aarch32-neon-per-channel.S",
"wrappers/q8gemm/4x8-aarch32-neon.S",
"wrappers/q8gemm/4x8-dq-aarch32-neon.S",
"wrappers/q8gemm/4x8c2-xzp-aarch32-neon.S",
"wrappers/q8gemm_sparse/4x4-packA-aarch32-neon.S",
"wrappers/q8gemm_sparse/4x8c1x4-dq-packedA-aarch32-neon.S",
"wrappers/q8gemm_sparse/4x8c8x1-dq-packedA-aarch32-neon.S",
"wrappers/q8gemm_sparse/8x4-packA-aarch64-neon.S",
"wrappers/q8gemm_sparse/8x8c1x4-dq-packedA-aarch64-neon.S",
"wrappers/q8gemm_sparse/8x8c8x1-dq-packedA-aarch64-neon.S",
# AArch64 ukernels
"wrappers/q8conv/8x8-aarch64-neon.S",
"wrappers/q8gemm/8x8-aarch64-neon.S",
"wrappers/q8gemm/8x8-dq-aarch64-neon.S",
],
headers = subdir_glob([
("src", "qnnpack/assembly.h"),
("src", "**/*.S"),
("src", "requantization/*.h"),
]),
header_namespace = "",
apple_sdks = (IOS, MACOSX, APPLETVOS),
compiler_flags = [
"-DPYTORCH_QNNPACK_RUNTIME_QUANTIZATION",
],
fbobjc_preprocessor_flags = [
"-DQNNP_PRIVATE=",
"-DQNNP_INTERNAL=",
],
force_static = True,
labels = labels,
platform_compiler_flags = [
(
# iOS assembler doesn't let us specify ISA in the assembly file,
# so this must be set to the highest version of ISA of any of the
# assembly functions
"^iphoneos-armv7$",
[
"-mfpu=neon-vfpv4",
],
),
(
"osmeta",
[
"-mfpu=neon-vfpv4",
],
),
],
platform_preprocessor_flags = [
(
"android",
[
# Workaround for osmeta-android, which builds for ELF, but hides it
"-D__ELF__=1",
],
),
(
"tizen",
[
# Workaround for osmeta-tizen, which builds for ELF, but hides it
"-D__ELF__=1",
],
),
],
visibility = ["PUBLIC"],
)
fb_xplat_cxx_library(
# @autodeps-skip
name = "ukernels_psimd",
srcs = [
"src/requantization/fp32-psimd.c",
"src/requantization/precise-psimd.c",
"src/sgemm/6x8-psimd.c",
],
headers = subdir_glob([
("src", "**/*.c"),
("src", "qnnpack/*.h"),
]),
header_namespace = "",
apple_sdks = (IOS, MACOSX, APPLETVOS),
compiler_flags = [
"-O3",
"-ffast-math",
"-DPYTORCH_QNNPACK_RUNTIME_QUANTIZATION",
],
fbobjc_preprocessor_flags = [
"-DQNNP_PRIVATE=",
"-DQNNP_INTERNAL=",
],
force_static = True,
labels = labels,
platform_compiler_flags = [
(
"armv7",
[
"-mfpu=neon",
],
),
(
"^android-armv7$",
[
"-marm",
"-mfloat-abi=softfp",
],
),
],
visibility = ["PUBLIC"],
deps = [
":qnnp_interface",
third_party("cpuinfo"),
third_party("FP16"),
third_party("FXdiv"),
third_party("psimd"),
],
)
fb_xplat_cxx_test(
# @autodeps-skip
fbandroid_use_instrumentation_test = True,
contacts = ["oncall+ai_infra_mobile_platform@xmail.facebook.com"],
platforms = (CXX, APPLE, ANDROID),
apple_sdks = (IOS, MACOSX),
name = "pytorch_qnnpack_test",
srcs = [
"test/add.cc",
"test/average-pooling.cc",
"test/channel-shuffle.cc",
"test/clamp.cc",
"test/convolution.cc",
"test/deconvolution.cc",
"test/fully-connected.cc",
"test/fully-connected-sparse.cc",
"test/global-average-pooling.cc",
"test/hardsigmoid.cc",
"test/hardswish.cc",
"test/leaky-relu.cc",
"test/max-pooling.cc",
"test/q8avgpool.cc",
"test/q8conv.cc",
"test/q8dwconv.cc",
"test/q8gavgpool.cc",
"test/q8gemm_sparse.cc",
"test/q8vadd.cc",
"test/requantization.cc",
"test/sgemm.cc",
"test/sigmoid.cc",
"test/softargmax.cc",
"test/tanh.cc",
"test/u8clamp.cc",
"test/u8lut32norm.cc",
"test/u8maxpool.cc",
"test/u8rmax.cc",
"test/x8lut.cc",
"test/x8zip.cc",
],
headers = {
"add-operator-tester.h": "test/add-operator-tester.h",
"average-pooling-operator-tester.h": "test/average-pooling-operator-tester.h",
"avgpool-microkernel-tester.h": "test/avgpool-microkernel-tester.h",
"channel-shuffle-operator-tester.h": "test/channel-shuffle-operator-tester.h",
"clamp-microkernel-tester.h": "test/clamp-microkernel-tester.h",
"clamp-operator-tester.h": "test/clamp-operator-tester.h",
"convolution-operator-tester.h": "test/convolution-operator-tester.h",
"deconvolution-operator-tester.h": "test/deconvolution-operator-tester.h",
"dwconv-microkernel-tester.h": "test/dwconv-microkernel-tester.h",
"fully-connected-operator-tester.h": "test/fully-connected-operator-tester.h",
"fully-connected-sparse-operator-tester.h": "test/fully-connected-sparse-operator-tester.h",
"gavgpool-microkernel-tester.h": "test/gavgpool-microkernel-tester.h",
"gemm-block-sparse-microkernel-tester.h": "test/gemm-block-sparse-microkernel-tester.h",
"gemm-microkernel-tester.h": "test/gemm-microkernel-tester.h",
"global-average-pooling-operator-tester.h": "test/global-average-pooling-operator-tester.h",
"hardsigmoid-operator-tester.h": "test/hardsigmoid-operator-tester.h",
"hardswish-operator-tester.h": "test/hardswish-operator-tester.h",
"leaky-relu-operator-tester.h": "test/leaky-relu-operator-tester.h",
"lut-microkernel-tester.h": "test/lut-microkernel-tester.h",
"lut-norm-microkernel-tester.h": "test/lut-norm-microkernel-tester.h",
"max-pooling-operator-tester.h": "test/max-pooling-operator-tester.h",
"maxpool-microkernel-tester.h": "test/maxpool-microkernel-tester.h",
"requantization-tester.h": "test/requantization-tester.h",
"rmax-microkernel-tester.h": "test/rmax-microkernel-tester.h",
"sigmoid-operator-tester.h": "test/sigmoid-operator-tester.h",
"softargmax-operator-tester.h": "test/softargmax-operator-tester.h",
"tanh-operator-tester.h": "test/tanh-operator-tester.h",
"test_utils.h": "test/test_utils.h",
"vadd-microkernel-tester.h": "test/vadd-microkernel-tester.h",
"zip-microkernel-tester.h": "test/zip-microkernel-tester.h",
},
header_namespace = "",
compiler_flags = [
"-fexceptions",
"-DPYTORCH_QNNPACK_RUNTIME_QUANTIZATION",
],
platform_linker_flags = [
(
"^linux.*$",
[
"-Wl,--no-as-needed",
"-ldl",
"-pthread",
],
),
],
env = {
# These tests fail in sandcastle since they leak memory. Disable LeakSanitizer.
"ASAN_OPTIONS": "detect_leaks=0",
},
deps = [
":pytorch_qnnpack",
third_party("cpuinfo"),
third_party("FP16"),
third_party("pthreadpool"),
],
)

View File

@ -131,9 +131,11 @@ THIRD_PARTY_LIBS = {
"flatc": ["//third-party/flatbuffers:flatc", "//third_party:flatc"],
"fmt": ["//third-party/fmt:fmt", "//third_party:fmt"],
"glog": ["//third-party/glog:glog", "//third_party:glog"],
"gmock": ["//xplat/third-party/gmock:gtest", "//third_party:gmock"],
"gtest": ["//xplat/third-party/gmock:gmock", "//third_party:gtest"],
"kineto": ["//xplat/kineto/libkineto:libkineto", "//third_party:libkineto"],
"omp": ["//xplat/third-party/linker_lib:omp", "//third_party:no-op"],
"psimd": ["//third-party/psimd:psimd", "//third_party:psimd"],
"psimd": ["//xplat/third-party/psimd:psimd", "//third_party:psimd"],
"pthreadpool": ["//xplat/third-party/pthreadpool:pthreadpool", "//third_party:pthreadpool"],
"pthreadpool_header": ["//xplat/third-party/pthreadpool:pthreadpool_header", "//third_party:pthreadpool_header"],
"pyyaml": ["//third-party/pyyaml:pyyaml", "//third_party:pyyaml"],

70
third_party/BUCK.oss vendored
View File

@ -330,3 +330,73 @@ cxx_binary(
visibility = ["PUBLIC"],
deps = [":flatc_library"],
)
cxx_library(
name = "gtest_headers",
exported_preprocessor_flags = [
"-DGTEST_USE_OWN_TR1_TUPLE=0",
"-DGTEST_HAS_TR1_TUPLE=0",
"-D_CRT_DECLARE_NONSTDC_NAMES",
"-D_CRT_NONSTDC_NO_WARNINGS",
"-D_CRT_NONSTDC_NO_DEPRECATE",
],
include_directories = [
"googletest/googletest",
],
public_system_include_directories = [
"googletest/googletest/include",
],
raw_headers = glob([
"googletest/googletest/src/**/*.h",
"googletest/googletest/include/**/*.h",
]),
visibility = [
"PUBLIC",
],
)
cxx_library(
name = "gtest",
srcs = [
"googletest/googletest/src/gtest-all.cc",
"googletest/googletest/src/gtest_main.cc",
],
include_directories = [
"googletest/googletest",
],
raw_headers = glob([
"googletest/googletest/src/**/*.cc",
"googletest/googletest/src/**/*.h",
]),
visibility = [
"PUBLIC",
],
xcode_public_headers_symlinks = True,
exported_deps = [
":gtest_headers",
],
)
cxx_library(
name = "gmock",
srcs = [
"googletest/googlemock/src/gmock-all.cc",
],
include_directories = [
"googletest/googlemock",
],
public_system_include_directories = [
"googletest/googlemock/include",
],
raw_headers = glob([
"googletest/googlemock/include/**/*.h",
"googletest/googlemock/src/**/*.cc",
]),
visibility = ["PUBLIC"],
deps = [
":gtest",
],
exported_deps = [
":gtest_headers",
],
)

View File

@ -11,6 +11,7 @@ IGNORED_ATTRIBUTE_PREFIX = [
IGNORED_ATTRIBUTES = [
"feature",
"platforms",
"contacts",
]
def filter_attributes(kwgs):

View File

@ -0,0 +1,18 @@
# Only used for PyTorch open source BUCK build
# @lint-ignore-every BUCKRESTRICTEDSYNTAX
load(":buck_helpers.bzl", "filter_attributes")
def fb_xplat_cxx_test(
name,
deps = [],
**kwgs):
if read_config("pt", "is_oss", "0") == "0":
fail("This file is for open source pytorch build. Do not use it in fbsource!")
cxx_test(
name = name,
deps = deps + [
"//third_party:gtest",
],
**filter_attributes(kwgs)
)