mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
[pytorch][mobile] deprecate the LLVM-based static analyzer (#68180)
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/68180 Since we've open sourced the tracing-based selective build, we can deprecate the op-dependency-graph-based selective build and the static analyzer tool that produces the dependency graph. ghstack-source-id: 143108377 Test Plan: CIs Reviewed By: seemethere Differential Revision: D32358467 fbshipit-source-id: c61523706b85a49361416da2230ec1b035b8b99c
This commit is contained in:
committed by
Facebook GitHub Bot
parent
301369a774
commit
3dc0754c53
1
.flake8
1
.flake8
@ -16,7 +16,6 @@ per-file-ignores = __init__.py: F401 torch/utils/cpp_extension.py: B950
|
||||
optional-ascii-coding = True
|
||||
exclude =
|
||||
./.git,
|
||||
./build_code_analyzer,
|
||||
./build_test_custom_build,
|
||||
./build,
|
||||
./caffe2,
|
||||
|
7
.github/generated-ciflow-ruleset.json
generated
vendored
7
.github/generated-ciflow-ruleset.json
generated
vendored
@ -19,8 +19,6 @@
|
||||
"linux-vulkan-bionic-py3.6-clang9",
|
||||
"linux-xenial-cuda11.3-py3.6-gcc7",
|
||||
"linux-xenial-py3-clang5-mobile-build",
|
||||
"linux-xenial-py3-clang5-mobile-code-analysis",
|
||||
"linux-xenial-py3-clang5-mobile-custom-build-dynamic",
|
||||
"linux-xenial-py3-clang5-mobile-custom-build-static",
|
||||
"linux-xenial-py3.6-clang7-asan",
|
||||
"linux-xenial-py3.6-clang7-onnx",
|
||||
@ -77,7 +75,6 @@
|
||||
"linux-vulkan-bionic-py3.6-clang9",
|
||||
"linux-xenial-cuda11.3-py3.6-gcc7",
|
||||
"linux-xenial-py3-clang5-mobile-build",
|
||||
"linux-xenial-py3-clang5-mobile-custom-build-dynamic",
|
||||
"linux-xenial-py3-clang5-mobile-custom-build-static",
|
||||
"linux-xenial-py3.6-clang7-asan",
|
||||
"linux-xenial-py3.6-clang7-onnx",
|
||||
@ -113,8 +110,6 @@
|
||||
"linux-vulkan-bionic-py3.6-clang9",
|
||||
"linux-xenial-cuda11.3-py3.6-gcc7",
|
||||
"linux-xenial-py3-clang5-mobile-build",
|
||||
"linux-xenial-py3-clang5-mobile-code-analysis",
|
||||
"linux-xenial-py3-clang5-mobile-custom-build-dynamic",
|
||||
"linux-xenial-py3-clang5-mobile-custom-build-static",
|
||||
"linux-xenial-py3.6-clang7-asan",
|
||||
"linux-xenial-py3.6-clang7-onnx",
|
||||
@ -143,8 +138,6 @@
|
||||
],
|
||||
"ciflow/mobile": [
|
||||
"linux-xenial-py3-clang5-mobile-build",
|
||||
"linux-xenial-py3-clang5-mobile-code-analysis",
|
||||
"linux-xenial-py3-clang5-mobile-custom-build-dynamic",
|
||||
"linux-xenial-py3-clang5-mobile-custom-build-static"
|
||||
],
|
||||
"ciflow/noarch": [
|
||||
|
22
.github/scripts/generate_ci_workflows.py
vendored
22
.github/scripts/generate_ci_workflows.py
vendored
@ -368,17 +368,6 @@ LINUX_WORKFLOWS = [
|
||||
labels={LABEL_CIFLOW_LINUX, LABEL_CIFLOW_MOBILE, LABEL_CIFLOW_DEFAULT},
|
||||
),
|
||||
),
|
||||
CIWorkflow(
|
||||
arch="linux",
|
||||
build_environment="linux-xenial-py3-clang5-mobile-custom-build-dynamic",
|
||||
docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c",
|
||||
test_runner_type=LINUX_CPU_TEST_RUNNER,
|
||||
build_generates_artifacts=False,
|
||||
exclude_test=True,
|
||||
ciflow_config=CIFlowConfig(
|
||||
labels={LABEL_CIFLOW_LINUX, LABEL_CIFLOW_MOBILE, LABEL_CIFLOW_DEFAULT},
|
||||
),
|
||||
),
|
||||
CIWorkflow(
|
||||
arch="linux",
|
||||
build_environment="linux-xenial-py3-clang5-mobile-custom-build-static",
|
||||
@ -390,17 +379,6 @@ LINUX_WORKFLOWS = [
|
||||
labels={LABEL_CIFLOW_LINUX, LABEL_CIFLOW_MOBILE, LABEL_CIFLOW_DEFAULT},
|
||||
),
|
||||
),
|
||||
CIWorkflow(
|
||||
arch="linux",
|
||||
build_environment="linux-xenial-py3-clang5-mobile-code-analysis",
|
||||
docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c",
|
||||
test_runner_type=LINUX_CPU_TEST_RUNNER,
|
||||
build_generates_artifacts=False,
|
||||
exclude_test=True,
|
||||
ciflow_config=CIFlowConfig(
|
||||
labels={LABEL_CIFLOW_LINUX, LABEL_CIFLOW_MOBILE},
|
||||
),
|
||||
),
|
||||
CIWorkflow(
|
||||
arch="linux",
|
||||
build_environment="linux-xenial-py3.6-clang7-asan",
|
||||
|
244
.github/workflows/generated-linux-xenial-py3-clang5-mobile-code-analysis.yml
generated
vendored
244
.github/workflows/generated-linux-xenial-py3-clang5-mobile-code-analysis.yml
generated
vendored
@ -1,244 +0,0 @@
|
||||
# @generated DO NOT EDIT MANUALLY
|
||||
# Template is at: .github/templates/linux_ci_workflow.yml.j2
|
||||
# Generation script: .github/scripts/generate_ci_workflows.py
|
||||
name: linux-xenial-py3-clang5-mobile-code-analysis
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
types: [opened, synchronize, reopened, unassigned]
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
- release/*
|
||||
workflow_dispatch:
|
||||
|
||||
env:
|
||||
BUILD_ENVIRONMENT: linux-xenial-py3-clang5-mobile-code-analysis
|
||||
DOCKER_IMAGE_BASE: 308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c
|
||||
SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2
|
||||
XLA_CLANG_CACHE_S3_BUCKET_NAME: ossci-compiler-clang-cache-circleci-xla
|
||||
TORCH_CUDA_ARCH_LIST: 5.2
|
||||
IN_CI: 1
|
||||
IS_GHA: 1
|
||||
# This is used for the phase of adding wheel tests only, will be removed once completed
|
||||
IN_WHEEL_TEST: 1
|
||||
# Used for custom_opertor, jit_hooks, custom_backend, see .jenkins/pytorch/build.sh
|
||||
CUSTOM_TEST_ARTIFACT_BUILD_DIR: build/custom_test_artifacts
|
||||
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
|
||||
PR_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
AWS_DEFAULT_REGION: us-east-1
|
||||
CIRCLE_PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
CIRCLE_SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
concurrency:
|
||||
group: linux-xenial-py3-clang5-mobile-code-analysis-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
|
||||
ciflow_should_run:
|
||||
runs-on: ubuntu-18.04
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
IS_PROBOT_TRIGGER_EVENT: ${{ (github.event.action == 'unassigned') && (github.event.assigneed.login == 'pytorchbot') }}
|
||||
LABEL_CONDITIONS: ${{ contains(github.event.pull_request.labels.*.name, 'ciflow/all') || contains(github.event.pull_request.labels.*.name, 'ciflow/linux') || contains(github.event.pull_request.labels.*.name, 'ciflow/mobile') }}
|
||||
LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
|
||||
if: ${{ (github.repository == 'pytorch/pytorch') && (
|
||||
(github.event_name == 'push') ||
|
||||
(github.event_name == 'schedule') ||
|
||||
(contains(github.event.pull_request.labels.*.name, 'ciflow/all') || contains(github.event.pull_request.labels.*.name, 'ciflow/linux') || contains(github.event.pull_request.labels.*.name, 'ciflow/mobile')) ||
|
||||
(false))
|
||||
}}
|
||||
steps:
|
||||
- name: noop
|
||||
run: echo running ciflow_should_run
|
||||
- name: print labels
|
||||
run: echo "${LABELS}"
|
||||
|
||||
build:
|
||||
runs-on: linux.2xlarge
|
||||
needs: [ciflow_should_run]
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
JOB_BASE_NAME: linux-xenial-py3-clang5-mobile-code-analysis-build
|
||||
outputs:
|
||||
docker_image: ${{ steps.calculate-tag.outputs.docker_image }}
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
function get_ec2_metadata() {
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
echo "instance-type: $(get_ec2_metadata instance-type)"
|
||||
- name: Log in to ECR
|
||||
env:
|
||||
AWS_RETRY_MODE: standard
|
||||
AWS_MAX_ATTEMPTS: 5
|
||||
run: |
|
||||
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
||||
aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
||||
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
||||
- name: Chown workspace
|
||||
run: |
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry docker pull "${ALPINE_IMAGE}"
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Clean workspace
|
||||
run: |
|
||||
rm -rf "${GITHUB_WORKSPACE:?}/*"
|
||||
rm -f ~/.ssh/authorized_keys
|
||||
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
||||
uses: seemethere/add-github-ssh-key@v1
|
||||
with:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
- name: Preserve github env variables for use in docker
|
||||
run: |
|
||||
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
# deep clone, to allow use of git merge-base
|
||||
fetch-depth: 0
|
||||
submodules: recursive
|
||||
- name: Calculate docker image tag
|
||||
id: calculate-tag
|
||||
run: |
|
||||
DOCKER_TAG=$(git rev-parse HEAD:.circleci/docker)
|
||||
echo "DOCKER_TAG=${DOCKER_TAG}" >> "${GITHUB_ENV}"
|
||||
echo "DOCKER_IMAGE=${DOCKER_IMAGE_BASE}:${DOCKER_TAG}" >> "${GITHUB_ENV}"
|
||||
echo "::set-output name=docker_tag::${DOCKER_TAG}"
|
||||
echo "::set-output name=docker_image::${DOCKER_IMAGE_BASE}:${DOCKER_TAG}"
|
||||
- name: Check if image should be built
|
||||
id: check
|
||||
env:
|
||||
BASE_REVISION: ${{ github.event.pull_request.base.sha || github.sha }}
|
||||
run: |
|
||||
set -x
|
||||
# Check if image already exists, if it does then skip building it
|
||||
if docker manifest inspect "${DOCKER_IMAGE_BASE}:${DOCKER_TAG}"; then
|
||||
exit 0
|
||||
fi
|
||||
if [[ "$BASE_REVISION" = "$(git rev-parse HEAD)" ]]; then
|
||||
# if we're on the base branch then use the parent commit
|
||||
MERGE_BASE=$(git rev-parse HEAD~)
|
||||
else
|
||||
# otherwise we're on a PR, so use the most recent base commit
|
||||
MERGE_BASE=$(git merge-base HEAD "$BASE_REVISION")
|
||||
fi
|
||||
# Covers the case where a previous tag doesn't exist for the tree
|
||||
# this is only really applicable on trees that don't have `.circleci/docker` at its merge base, i.e. nightly
|
||||
if ! git rev-parse "$MERGE_BASE:.circleci/docker"; then
|
||||
echo "Directory '.circleci/docker' not found in commit $MERGE_BASE, you should probably rebase onto a more recent commit"
|
||||
exit 1
|
||||
fi
|
||||
PREVIOUS_DOCKER_TAG=$(git rev-parse "$MERGE_BASE:.circleci/docker")
|
||||
# If no image exists but the hash is the same as the previous hash then we should error out here
|
||||
if [[ "${PREVIOUS_DOCKER_TAG}" = "${DOCKER_TAG}" ]]; then
|
||||
echo "ERROR: Something has gone wrong and the previous image isn't available for the merge-base of your branch"
|
||||
echo " contact the PyTorch team to restore the original images"
|
||||
exit 1
|
||||
fi
|
||||
echo ::set-output name=rebuild::yes
|
||||
- name: Build and push docker image
|
||||
if: ${{ steps.check.outputs.rebuild }}
|
||||
env:
|
||||
DOCKER_SKIP_S3_UPLOAD: 1
|
||||
working-directory: .circleci/docker
|
||||
run: |
|
||||
export IMAGE_NAME=${DOCKER_IMAGE_BASE#308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/}
|
||||
./build_docker.sh
|
||||
- name: Pull Docker image
|
||||
run: |
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry docker pull "${DOCKER_IMAGE}"
|
||||
- name: Parse ref
|
||||
id: parse-ref
|
||||
run: .github/scripts/parse_ref.py
|
||||
- name: Build
|
||||
env:
|
||||
CIRCLE_BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
run: |
|
||||
# detached container should get cleaned up by teardown_ec2_linux
|
||||
container_name=$(docker run \
|
||||
-e BUILD_ENVIRONMENT \
|
||||
-e JOB_BASE_NAME \
|
||||
-e MAX_JOBS="$(nproc --ignore=2)" \
|
||||
-e AWS_DEFAULT_REGION \
|
||||
-e IS_GHA \
|
||||
-e CIRCLE_PR_NUMBER \
|
||||
-e CIRCLE_SHA1 \
|
||||
-e CIRCLE_BRANCH \
|
||||
-e GITHUB_RUN_ID \
|
||||
-e SCCACHE_BUCKET \
|
||||
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \
|
||||
-e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
|
||||
-e SKIP_SCCACHE_INITIALIZATION=1 \
|
||||
-e TORCH_CUDA_ARCH_LIST \
|
||||
-e PR_LABELS \
|
||||
-e http_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e https_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e no_proxy="localhost,127.0.0.1,github.com,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock" \
|
||||
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
|
||||
--security-opt seccomp=unconfined \
|
||||
--cap-add=SYS_PTRACE \
|
||||
--tty \
|
||||
--detach \
|
||||
--user jenkins \
|
||||
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
|
||||
-w /var/lib/jenkins/workspace \
|
||||
"${DOCKER_IMAGE}"
|
||||
)
|
||||
docker exec -t "${container_name}" sh -c 'sudo chown -R jenkins . && .jenkins/pytorch/build.sh'
|
||||
- name: Display and upload binary build size statistics (Click Me)
|
||||
# temporary hack: set CIRCLE_* vars, until we update
|
||||
# tools/stats/print_test_stats.py to natively support GitHub Actions
|
||||
env:
|
||||
SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.SCRIBE_GRAPHQL_ACCESS_TOKEN }}
|
||||
CIRCLE_BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
CIRCLE_TAG: ${{ steps.parse-ref.outputs.tag }}
|
||||
CIRCLE_WORKFLOW_ID: '${{ github.run_id }}_${{ github.run_number }}'
|
||||
run: |
|
||||
COMMIT_TIME=$(git log --max-count=1 --format=%ct || echo 0)
|
||||
export COMMIT_TIME
|
||||
pip3 install requests==2.26 boto3==1.16.34
|
||||
python3 -m tools.stats.upload_binary_size_to_scuba || exit 0
|
||||
- name: Chown workspace
|
||||
run: |
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Hold runner for 2 hours or until ssh sessions have drained
|
||||
# Always hold for active ssh sessions
|
||||
if: always()
|
||||
run: .github/scripts/wait_for_ssh_to_drain.sh
|
||||
- name: Chown workspace
|
||||
if: always()
|
||||
run: |
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Kill containers, clean up images
|
||||
if: always()
|
||||
run: |
|
||||
# ignore expansion of "docker ps -q" since it could be empty
|
||||
# shellcheck disable=SC2046
|
||||
docker stop $(docker ps -q) || true
|
||||
# Prune all of the docker images
|
||||
docker system prune -af
|
||||
- name: Hold runner for 2 hours or until ssh sessions have drained
|
||||
# Always hold for active ssh sessions
|
||||
if: always()
|
||||
run: .github/scripts/wait_for_ssh_to_drain.sh
|
||||
- name: Clean up docker images
|
||||
if: always()
|
||||
run: |
|
||||
# Prune all of the docker images
|
||||
docker system prune -af
|
244
.github/workflows/generated-linux-xenial-py3-clang5-mobile-custom-build-dynamic.yml
generated
vendored
244
.github/workflows/generated-linux-xenial-py3-clang5-mobile-custom-build-dynamic.yml
generated
vendored
@ -1,244 +0,0 @@
|
||||
# @generated DO NOT EDIT MANUALLY
|
||||
# Template is at: .github/templates/linux_ci_workflow.yml.j2
|
||||
# Generation script: .github/scripts/generate_ci_workflows.py
|
||||
name: linux-xenial-py3-clang5-mobile-custom-build-dynamic
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
types: [opened, synchronize, reopened, unassigned]
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
- release/*
|
||||
workflow_dispatch:
|
||||
|
||||
env:
|
||||
BUILD_ENVIRONMENT: linux-xenial-py3-clang5-mobile-custom-build-dynamic
|
||||
DOCKER_IMAGE_BASE: 308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c
|
||||
SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2
|
||||
XLA_CLANG_CACHE_S3_BUCKET_NAME: ossci-compiler-clang-cache-circleci-xla
|
||||
TORCH_CUDA_ARCH_LIST: 5.2
|
||||
IN_CI: 1
|
||||
IS_GHA: 1
|
||||
# This is used for the phase of adding wheel tests only, will be removed once completed
|
||||
IN_WHEEL_TEST: 1
|
||||
# Used for custom_opertor, jit_hooks, custom_backend, see .jenkins/pytorch/build.sh
|
||||
CUSTOM_TEST_ARTIFACT_BUILD_DIR: build/custom_test_artifacts
|
||||
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
|
||||
PR_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
AWS_DEFAULT_REGION: us-east-1
|
||||
CIRCLE_PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
CIRCLE_SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
concurrency:
|
||||
group: linux-xenial-py3-clang5-mobile-custom-build-dynamic-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
|
||||
ciflow_should_run:
|
||||
runs-on: ubuntu-18.04
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
IS_PROBOT_TRIGGER_EVENT: ${{ (github.event.action == 'unassigned') && (github.event.assigneed.login == 'pytorchbot') }}
|
||||
LABEL_CONDITIONS: ${{ contains(github.event.pull_request.labels.*.name, 'ciflow/all') || contains(github.event.pull_request.labels.*.name, 'ciflow/default') || contains(github.event.pull_request.labels.*.name, 'ciflow/linux') || contains(github.event.pull_request.labels.*.name, 'ciflow/mobile') }}
|
||||
LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
|
||||
if: ${{ (github.repository == 'pytorch/pytorch') && (
|
||||
(github.event_name == 'push') ||
|
||||
(github.event_name == 'schedule') ||
|
||||
(contains(github.event.pull_request.labels.*.name, 'ciflow/all') || contains(github.event.pull_request.labels.*.name, 'ciflow/default') || contains(github.event.pull_request.labels.*.name, 'ciflow/linux') || contains(github.event.pull_request.labels.*.name, 'ciflow/mobile')) ||
|
||||
((github.event_name == 'pull_request' && github.event.action != 'unassigned') && !contains(join(github.event.pull_request.labels.*.name), 'ciflow/')))
|
||||
}}
|
||||
steps:
|
||||
- name: noop
|
||||
run: echo running ciflow_should_run
|
||||
- name: print labels
|
||||
run: echo "${LABELS}"
|
||||
|
||||
build:
|
||||
runs-on: linux.2xlarge
|
||||
needs: [ciflow_should_run]
|
||||
timeout-minutes: 240
|
||||
env:
|
||||
JOB_BASE_NAME: linux-xenial-py3-clang5-mobile-custom-build-dynamic-build
|
||||
outputs:
|
||||
docker_image: ${{ steps.calculate-tag.outputs.docker_image }}
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
function get_ec2_metadata() {
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
echo "instance-type: $(get_ec2_metadata instance-type)"
|
||||
- name: Log in to ECR
|
||||
env:
|
||||
AWS_RETRY_MODE: standard
|
||||
AWS_MAX_ATTEMPTS: 5
|
||||
run: |
|
||||
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
|
||||
aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
|
||||
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
|
||||
- name: Chown workspace
|
||||
run: |
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry docker pull "${ALPINE_IMAGE}"
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Clean workspace
|
||||
run: |
|
||||
rm -rf "${GITHUB_WORKSPACE:?}/*"
|
||||
rm -f ~/.ssh/authorized_keys
|
||||
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
|
||||
uses: seemethere/add-github-ssh-key@v1
|
||||
with:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
- name: Preserve github env variables for use in docker
|
||||
run: |
|
||||
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
|
||||
- name: Checkout PyTorch
|
||||
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
|
||||
with:
|
||||
# deep clone, to allow use of git merge-base
|
||||
fetch-depth: 0
|
||||
submodules: recursive
|
||||
- name: Calculate docker image tag
|
||||
id: calculate-tag
|
||||
run: |
|
||||
DOCKER_TAG=$(git rev-parse HEAD:.circleci/docker)
|
||||
echo "DOCKER_TAG=${DOCKER_TAG}" >> "${GITHUB_ENV}"
|
||||
echo "DOCKER_IMAGE=${DOCKER_IMAGE_BASE}:${DOCKER_TAG}" >> "${GITHUB_ENV}"
|
||||
echo "::set-output name=docker_tag::${DOCKER_TAG}"
|
||||
echo "::set-output name=docker_image::${DOCKER_IMAGE_BASE}:${DOCKER_TAG}"
|
||||
- name: Check if image should be built
|
||||
id: check
|
||||
env:
|
||||
BASE_REVISION: ${{ github.event.pull_request.base.sha || github.sha }}
|
||||
run: |
|
||||
set -x
|
||||
# Check if image already exists, if it does then skip building it
|
||||
if docker manifest inspect "${DOCKER_IMAGE_BASE}:${DOCKER_TAG}"; then
|
||||
exit 0
|
||||
fi
|
||||
if [[ "$BASE_REVISION" = "$(git rev-parse HEAD)" ]]; then
|
||||
# if we're on the base branch then use the parent commit
|
||||
MERGE_BASE=$(git rev-parse HEAD~)
|
||||
else
|
||||
# otherwise we're on a PR, so use the most recent base commit
|
||||
MERGE_BASE=$(git merge-base HEAD "$BASE_REVISION")
|
||||
fi
|
||||
# Covers the case where a previous tag doesn't exist for the tree
|
||||
# this is only really applicable on trees that don't have `.circleci/docker` at its merge base, i.e. nightly
|
||||
if ! git rev-parse "$MERGE_BASE:.circleci/docker"; then
|
||||
echo "Directory '.circleci/docker' not found in commit $MERGE_BASE, you should probably rebase onto a more recent commit"
|
||||
exit 1
|
||||
fi
|
||||
PREVIOUS_DOCKER_TAG=$(git rev-parse "$MERGE_BASE:.circleci/docker")
|
||||
# If no image exists but the hash is the same as the previous hash then we should error out here
|
||||
if [[ "${PREVIOUS_DOCKER_TAG}" = "${DOCKER_TAG}" ]]; then
|
||||
echo "ERROR: Something has gone wrong and the previous image isn't available for the merge-base of your branch"
|
||||
echo " contact the PyTorch team to restore the original images"
|
||||
exit 1
|
||||
fi
|
||||
echo ::set-output name=rebuild::yes
|
||||
- name: Build and push docker image
|
||||
if: ${{ steps.check.outputs.rebuild }}
|
||||
env:
|
||||
DOCKER_SKIP_S3_UPLOAD: 1
|
||||
working-directory: .circleci/docker
|
||||
run: |
|
||||
export IMAGE_NAME=${DOCKER_IMAGE_BASE#308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/}
|
||||
./build_docker.sh
|
||||
- name: Pull Docker image
|
||||
run: |
|
||||
retry () {
|
||||
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
|
||||
}
|
||||
retry docker pull "${DOCKER_IMAGE}"
|
||||
- name: Parse ref
|
||||
id: parse-ref
|
||||
run: .github/scripts/parse_ref.py
|
||||
- name: Build
|
||||
env:
|
||||
CIRCLE_BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
run: |
|
||||
# detached container should get cleaned up by teardown_ec2_linux
|
||||
container_name=$(docker run \
|
||||
-e BUILD_ENVIRONMENT \
|
||||
-e JOB_BASE_NAME \
|
||||
-e MAX_JOBS="$(nproc --ignore=2)" \
|
||||
-e AWS_DEFAULT_REGION \
|
||||
-e IS_GHA \
|
||||
-e CIRCLE_PR_NUMBER \
|
||||
-e CIRCLE_SHA1 \
|
||||
-e CIRCLE_BRANCH \
|
||||
-e GITHUB_RUN_ID \
|
||||
-e SCCACHE_BUCKET \
|
||||
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \
|
||||
-e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
|
||||
-e SKIP_SCCACHE_INITIALIZATION=1 \
|
||||
-e TORCH_CUDA_ARCH_LIST \
|
||||
-e PR_LABELS \
|
||||
-e http_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e https_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e no_proxy="localhost,127.0.0.1,github.com,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock" \
|
||||
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
|
||||
--security-opt seccomp=unconfined \
|
||||
--cap-add=SYS_PTRACE \
|
||||
--tty \
|
||||
--detach \
|
||||
--user jenkins \
|
||||
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
|
||||
-w /var/lib/jenkins/workspace \
|
||||
"${DOCKER_IMAGE}"
|
||||
)
|
||||
docker exec -t "${container_name}" sh -c 'sudo chown -R jenkins . && .jenkins/pytorch/build.sh'
|
||||
- name: Display and upload binary build size statistics (Click Me)
|
||||
# temporary hack: set CIRCLE_* vars, until we update
|
||||
# tools/stats/print_test_stats.py to natively support GitHub Actions
|
||||
env:
|
||||
SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.SCRIBE_GRAPHQL_ACCESS_TOKEN }}
|
||||
CIRCLE_BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
CIRCLE_TAG: ${{ steps.parse-ref.outputs.tag }}
|
||||
CIRCLE_WORKFLOW_ID: '${{ github.run_id }}_${{ github.run_number }}'
|
||||
run: |
|
||||
COMMIT_TIME=$(git log --max-count=1 --format=%ct || echo 0)
|
||||
export COMMIT_TIME
|
||||
pip3 install requests==2.26 boto3==1.16.34
|
||||
python3 -m tools.stats.upload_binary_size_to_scuba || exit 0
|
||||
- name: Chown workspace
|
||||
run: |
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Hold runner for 2 hours or until ssh sessions have drained
|
||||
# Always hold for active ssh sessions
|
||||
if: always()
|
||||
run: .github/scripts/wait_for_ssh_to_drain.sh
|
||||
- name: Chown workspace
|
||||
if: always()
|
||||
run: |
|
||||
# Ensure the working directory gets chowned back to the current user
|
||||
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
|
||||
- name: Kill containers, clean up images
|
||||
if: always()
|
||||
run: |
|
||||
# ignore expansion of "docker ps -q" since it could be empty
|
||||
# shellcheck disable=SC2046
|
||||
docker stop $(docker ps -q) || true
|
||||
# Prune all of the docker images
|
||||
docker system prune -af
|
||||
- name: Hold runner for 2 hours or until ssh sessions have drained
|
||||
# Always hold for active ssh sessions
|
||||
if: always()
|
||||
run: .github/scripts/wait_for_ssh_to_drain.sh
|
||||
- name: Clean up docker images
|
||||
if: always()
|
||||
run: |
|
||||
# Prune all of the docker images
|
||||
docker system prune -af
|
@ -1,21 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
# DO NOT ADD 'set -x' not to reveal CircleCI secret context environment variables
|
||||
set -eu -o pipefail
|
||||
|
||||
# This script builds and runs code analyzer tool to generate aten op dependency
|
||||
# graph for custom mobile build.
|
||||
|
||||
# shellcheck disable=SC2034
|
||||
COMPACT_JOB_NAME="${BUILD_ENVIRONMENT}"
|
||||
|
||||
# shellcheck source=./common.sh
|
||||
source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
|
||||
|
||||
echo "Clang version:"
|
||||
clang --version
|
||||
|
||||
LLVM_DIR="$(llvm-config-5.0 --prefix)"
|
||||
export LLVM_DIR
|
||||
echo "LLVM_DIR: ${LLVM_DIR}"
|
||||
|
||||
time ANALYZE_TEST=1 CHECK_RESULT=1 tools/code_analyzer/build.sh
|
@ -26,11 +26,6 @@ retry pip install --pre torch torchvision \
|
||||
# binary, and running forward pass with a real model.
|
||||
if [[ "$BUILD_ENVIRONMENT" == *-mobile-custom-build-static* ]]; then
|
||||
TEST_CUSTOM_BUILD_STATIC=1 test/mobile/custom_build/build.sh
|
||||
elif [[ "$BUILD_ENVIRONMENT" == *-mobile-custom-build-dynamic* ]]; then
|
||||
LLVM_DIR="$(llvm-config-5.0 --prefix)"
|
||||
export LLVM_DIR
|
||||
echo "LLVM_DIR: ${LLVM_DIR}"
|
||||
TEST_CUSTOM_BUILD_DYNAMIC=1 test/mobile/custom_build/build.sh
|
||||
else
|
||||
TEST_DEFAULT_BUILD=1 test/mobile/custom_build/build.sh
|
||||
fi
|
||||
|
@ -20,10 +20,6 @@ if [[ "$BUILD_ENVIRONMENT" == *-mobile-*build* ]]; then
|
||||
exec "$(dirname "${BASH_SOURCE[0]}")/build-mobile.sh" "$@"
|
||||
fi
|
||||
|
||||
if [[ "$BUILD_ENVIRONMENT" == *-mobile-code-analysis* ]]; then
|
||||
exec "$(dirname "${BASH_SOURCE[0]}")/build-mobile-code-analysis.sh" "$@"
|
||||
fi
|
||||
|
||||
if [[ "$BUILD_ENVIRONMENT" == *linux-xenial-cuda11.3* ]]; then
|
||||
# Enabling DEPLOY build (embedded torch python interpreter, experimental)
|
||||
# only on one config for now, can expand later
|
||||
|
@ -3,7 +3,6 @@ code = 'FLAKE8'
|
||||
include_patterns = ['**/*.py']
|
||||
exclude_patterns = [
|
||||
'.git/**',
|
||||
'build_code_analyzer',
|
||||
'build_test_custom_build/**',
|
||||
'build/**',
|
||||
'caffe2/**',
|
||||
|
@ -423,8 +423,6 @@ else()
|
||||
endif()
|
||||
set(SELECTED_OP_LIST "" CACHE STRING
|
||||
"Path to the yaml file that contains the list of operators to include for custom build. Include all operators by default.")
|
||||
set(OP_DEPENDENCY "" CACHE STRING
|
||||
"Path to the yaml file that contains the op dependency graph for custom build.")
|
||||
set(STATIC_DISPATCH_BACKEND "" CACHE STRING
|
||||
"Name of the backend for which static dispatch code is generated, e.g.: CPU.")
|
||||
option(
|
||||
|
@ -173,37 +173,16 @@ if(INTERN_BUILD_ATEN_OPS)
|
||||
endif()
|
||||
|
||||
if(SELECTED_OP_LIST)
|
||||
# With static dispatch we can omit the OP_DEPENDENCY flag. It will not calculate the transitive closure
|
||||
# of used ops. It only needs to register used root ops.
|
||||
if(TRACING_BASED)
|
||||
message(STATUS "Running tracing-based selective build given operator list: ${SELECTED_OP_LIST}")
|
||||
list(APPEND CUSTOM_BUILD_FLAGS
|
||||
--op_selection_yaml_path ${SELECTED_OP_LIST})
|
||||
elseif(NOT STATIC_DISPATCH_BACKEND AND NOT OP_DEPENDENCY)
|
||||
elseif(NOT STATIC_DISPATCH_BACKEND)
|
||||
message(WARNING
|
||||
"For custom build with dynamic dispatch you have to provide the dependency graph of PyTorch operators.\n"
|
||||
"Switching to STATIC_DISPATCH_BACKEND=CPU. If you run into problems with static dispatch and still want"
|
||||
" to use selective build with dynamic dispatch, please try:\n"
|
||||
"1. Run the static analysis tool to generate the dependency graph, e.g.:\n"
|
||||
" LLVM_DIR=/usr ANALYZE_TORCH=1 tools/code_analyzer/build.sh\n"
|
||||
"2. Run the custom build with the OP_DEPENDENCY option pointing to the generated dependency graph, e.g.:\n"
|
||||
" scripts/build_android.sh -DSELECTED_OP_LIST=<op_list.yaml> -DOP_DEPENDENCY=<dependency_graph.yaml>\n"
|
||||
"You have to run tracing-based selective build with dynamic dispatch.\n"
|
||||
"Switching to STATIC_DISPATCH_BACKEND=CPU."
|
||||
)
|
||||
set(STATIC_DISPATCH_BACKEND CPU)
|
||||
else()
|
||||
execute_process(
|
||||
COMMAND
|
||||
"${PYTHON_EXECUTABLE}" ${CMAKE_CURRENT_LIST_DIR}/../tools/code_analyzer/gen_op_registration_allowlist.py
|
||||
--op-dependency "${OP_DEPENDENCY}"
|
||||
--root-ops "${SELECTED_OP_LIST}"
|
||||
OUTPUT_VARIABLE OP_REGISTRATION_WHITELIST
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE
|
||||
)
|
||||
separate_arguments(OP_REGISTRATION_WHITELIST)
|
||||
message(STATUS "Custom build with op registration whitelist: ${OP_REGISTRATION_WHITELIST}")
|
||||
list(APPEND CUSTOM_BUILD_FLAGS
|
||||
--force_schema_registration
|
||||
--op_registration_whitelist ${OP_REGISTRATION_WHITELIST})
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
@ -11,13 +11,9 @@
|
||||
# toolchain), which doesn't contain autograd function nor backward ops thus is
|
||||
# smaller than full LibTorch.
|
||||
#
|
||||
# 2. `TEST_CUSTOM_BUILD_DYNAMIC=1 ./build.sh` - it further optimizes libtorch
|
||||
# size by only including ops used by a specific model.
|
||||
# Note that LLVM_DIR environment variable should be set to the location of
|
||||
# LLVM-dev toolchain.
|
||||
#
|
||||
# 3. `TEST_CUSTOM_BUILD_STATIC=1 ./build.sh` - similar as 2) except that it
|
||||
# relies on the static dispatch + linker to prune code.
|
||||
# 2. `TEST_CUSTOM_BUILD_STATIC=1 ./build.sh` - optimizes libtorch size by only
|
||||
# including ops used by a specific model. It relies on the static dispatch +
|
||||
# linker to prune code.
|
||||
#
|
||||
###############################################################################
|
||||
|
||||
@ -37,18 +33,6 @@ prepare_model_and_dump_root_ops() {
|
||||
python "${TEST_SRC_ROOT}/prepare_model.py"
|
||||
}
|
||||
|
||||
generate_op_dependency_graph() {
|
||||
# Regular users should get this graph from prebuilt package.
|
||||
ANALYZER_BUILD_ROOT="${BUILD_ROOT}/build_analyzer"
|
||||
OP_DEPENDENCY="${ANALYZER_BUILD_ROOT}/work/torch_result.yaml"
|
||||
|
||||
if [ ! -f "${OP_DEPENDENCY}" ]; then
|
||||
BUILD_ROOT="${ANALYZER_BUILD_ROOT}" \
|
||||
ANALYZE_TORCH=1 \
|
||||
"${SRC_ROOT}/tools/code_analyzer/build.sh"
|
||||
fi
|
||||
}
|
||||
|
||||
run_default_build() {
|
||||
LIBTORCH_BUILD_ROOT="${BUILD_ROOT}/build_default_libtorch"
|
||||
LIBTORCH_INSTALL_PREFIX="${LIBTORCH_BUILD_ROOT}/install"
|
||||
@ -61,30 +45,18 @@ run_custom_build_with_static_dispatch() {
|
||||
LIBTORCH_BUILD_ROOT="${BUILD_ROOT}/build_custom_libtorch_static"
|
||||
LIBTORCH_INSTALL_PREFIX="${LIBTORCH_BUILD_ROOT}/install"
|
||||
|
||||
# Here we omitted the OP_DEPENDENCY flag so it generates registration
|
||||
# code for used ROOT ops only, whose unboxing kernels are still needed
|
||||
# by the JIT runtime. The intermediate ops will be automatically kepted
|
||||
# by the linker as they are statically referenced by the static dispatch
|
||||
# code, for which we can bypass the registration.
|
||||
# Here it generates registration code for used ROOT ops only, whose unboxing
|
||||
# kernels are still needed by the JIT runtime. The intermediate ops will be
|
||||
# automatically kepted by the linker as they are statically referenced by the
|
||||
# static dispatch code, for which we can bypass the registration.
|
||||
# We don't set '-DSTATIC_DISPATCH_BACKEND=CPU' explicitly to test automatic
|
||||
# fallback to static dispatch when '-DOP_DEPENDENCY' is omitted.
|
||||
# fallback to static dispatch.
|
||||
BUILD_ROOT="${LIBTORCH_BUILD_ROOT}" \
|
||||
"${SRC_ROOT}/scripts/build_mobile.sh" \
|
||||
-DCMAKE_CXX_FLAGS="-DSTRIP_ERROR_MESSAGES" \
|
||||
-DSELECTED_OP_LIST="${ROOT_OPS}"
|
||||
}
|
||||
|
||||
run_custom_build_with_dynamic_dispatch() {
|
||||
LIBTORCH_BUILD_ROOT="${BUILD_ROOT}/build_custom_libtorch_dynamic"
|
||||
LIBTORCH_INSTALL_PREFIX="${LIBTORCH_BUILD_ROOT}/install"
|
||||
|
||||
BUILD_ROOT="${LIBTORCH_BUILD_ROOT}" \
|
||||
"${SRC_ROOT}/scripts/build_mobile.sh" \
|
||||
-DCMAKE_CXX_FLAGS="-DSTRIP_ERROR_MESSAGES" \
|
||||
-DSELECTED_OP_LIST="${ROOT_OPS}" \
|
||||
-DOP_DEPENDENCY="${OP_DEPENDENCY}"
|
||||
}
|
||||
|
||||
build_predictor() {
|
||||
PREDICTOR_BUILD_ROOT="${BUILD_ROOT}/predictor"
|
||||
|
||||
@ -125,14 +97,6 @@ test_custom_build_with_static_dispatch() {
|
||||
run_predictor
|
||||
}
|
||||
|
||||
test_custom_build_with_dynamic_dispatch() {
|
||||
prepare_model_and_dump_root_ops
|
||||
generate_op_dependency_graph
|
||||
run_custom_build_with_dynamic_dispatch
|
||||
build_predictor
|
||||
run_predictor
|
||||
}
|
||||
|
||||
if [ -n "${TEST_DEFAULT_BUILD}" ]; then
|
||||
test_default_build
|
||||
fi
|
||||
@ -140,7 +104,3 @@ fi
|
||||
if [ -n "${TEST_CUSTOM_BUILD_STATIC}" ]; then
|
||||
test_custom_build_with_static_dispatch
|
||||
fi
|
||||
|
||||
if [ -n "${TEST_CUSTOM_BUILD_DYNAMIC}" ]; then
|
||||
test_custom_build_with_dynamic_dispatch
|
||||
fi
|
||||
|
@ -1,34 +0,0 @@
|
||||
cmake_minimum_required(VERSION 3.1)
|
||||
|
||||
project(op_deps_project)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 14)
|
||||
|
||||
# Find torch library
|
||||
find_package(Torch REQUIRED)
|
||||
|
||||
# Small test op library
|
||||
add_library(OpLib
|
||||
quantized_ops.cpp
|
||||
simple_ops.cpp
|
||||
utils.cpp
|
||||
)
|
||||
target_include_directories(OpLib PUBLIC ${TORCH_INCLUDE_DIRS})
|
||||
|
||||
# Main executable
|
||||
add_executable(main main.cc)
|
||||
|
||||
find_package(Threads REQUIRED)
|
||||
|
||||
target_link_libraries(main
|
||||
-Wl,--gc-sections
|
||||
-Wl,--whole-archive
|
||||
OpLib
|
||||
${TORCH_LIBRARIES}
|
||||
-Wl,--no-whole-archive
|
||||
Threads::Threads
|
||||
)
|
||||
|
||||
# Only install core libraries for downstream analysis job.
|
||||
install(TARGETS OpLib DESTINATION lib)
|
||||
install(FILES ${C10_LIBRARY} DESTINATION lib)
|
@ -1,29 +0,0 @@
|
||||
#!/bin/bash
|
||||
##############################################################################
|
||||
# A simple project that uses C10 op registration API to create a bunch of
|
||||
# inter-dependent dummy ops in order to test op dependency analysis script for
|
||||
# mobile custom build workflow.
|
||||
##############################################################################
|
||||
|
||||
set -ex
|
||||
|
||||
SRC_ROOT="$( cd "$(dirname "$0")"/../../.. ; pwd -P)"
|
||||
BUILD_ROOT="${BUILD_ROOT:-${SRC_ROOT}/build_test_op_deps}"
|
||||
INSTALL_PREFIX="${BUILD_ROOT}/install"
|
||||
|
||||
mkdir -p "${BUILD_ROOT}"
|
||||
cd "${BUILD_ROOT}"
|
||||
|
||||
if [ ! -d "${TORCH_INSTALL_PREFIX:=${SRC_ROOT}/build_mobile/install}" ]; then
|
||||
echo "Unable to find torch library in ${TORCH_INSTALL_PREFIX}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
cmake "${SRC_ROOT}/test/mobile/op_deps" \
|
||||
-DCMAKE_INSTALL_PREFIX="${INSTALL_PREFIX}" \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DCMAKE_PREFIX_PATH="${TORCH_INSTALL_PREFIX}" \
|
||||
"$@" # Use-specified CMake arguments
|
||||
|
||||
cmake --build . --target install -- "-j${MAX_JOBS}"
|
||||
echo "Installation completed: ${INSTALL_PREFIX}"
|
@ -1,41 +0,0 @@
|
||||
- name: __ROOT__
|
||||
depends:
|
||||
- name: _test::AA
|
||||
- name: _test::BB
|
||||
- name: _test::AA
|
||||
depends:
|
||||
- name: _test::BB
|
||||
- name: _test::BB
|
||||
depends:
|
||||
- name: _test::AA
|
||||
- name: _test::CC
|
||||
depends:
|
||||
- name: _test::AA
|
||||
- name: _test::DD
|
||||
depends:
|
||||
- name: _test::AA
|
||||
- name: _test::EE
|
||||
- name: _test::EE
|
||||
depends:
|
||||
- name: _test::FF
|
||||
- name: _test::FF
|
||||
depends:
|
||||
- name: _test::EE
|
||||
- name: _test::GG
|
||||
depends:
|
||||
- name: _test::FF
|
||||
- name: _test::HH
|
||||
- name: quantized::t_add
|
||||
depends:
|
||||
- name: quantized::t_helper1
|
||||
- name: quantized::t_add_relu
|
||||
depends:
|
||||
- name: quantized::t_helper2
|
||||
- name: quantized::t_helper1
|
||||
depends:
|
||||
- name: quantized::t_helper3
|
||||
- name: quantized::t_helper2
|
||||
depends:
|
||||
- name: quantized::t_helper4
|
||||
- name: quantized::t_helper3
|
||||
- name: quantized::t_helper4
|
@ -1,20 +0,0 @@
|
||||
#include <torch/script.h>
|
||||
|
||||
#include "quantized_ops.h"
|
||||
#include "simple_ops.h"
|
||||
|
||||
int main() {
|
||||
c10::InferenceMode guard;
|
||||
auto input = torch::empty({1, 3, 224, 224});
|
||||
at::call_AA_op(input);
|
||||
at::call_BB_op(input);
|
||||
at::call_CC_op(input);
|
||||
at::call_DD_op(input);
|
||||
at::call_EE_op(input);
|
||||
at::call_FF_op(input);
|
||||
const auto t_add = c10::Dispatcher::singleton().findSchemaOrThrow("quantized::t_add", "").typed<at::Tensor(at::Tensor, at::Tensor, double, int64_t)>();
|
||||
const auto t_add_relu = c10::Dispatcher::singleton().findSchemaOrThrow("quantized::t_add_relu", "").typed<at::Tensor (at::Tensor, at::Tensor, double, int64_t)>();
|
||||
t_add.call(input, input, 1.0, 0);
|
||||
t_add_relu.call(input, input, 1.0, 0);
|
||||
return 0;
|
||||
}
|
@ -1,76 +0,0 @@
|
||||
#include "quantized_ops.h"
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include <c10/core/TensorOptions.h>
|
||||
#include <ATen/core/dispatch/Dispatcher.h>
|
||||
#include <ATen/core/op_registration/op_registration.h>
|
||||
|
||||
// This file simulates some irregular op registration/invocation patterns for
|
||||
// quantized operators which are not covered by aten codegen.
|
||||
|
||||
namespace at {
|
||||
|
||||
namespace {
|
||||
|
||||
template <bool ReLUFused>
|
||||
Tensor _add_out(Tensor& out, const Tensor& self, const Tensor& other);
|
||||
|
||||
template <>
|
||||
Tensor _add_out<false>(Tensor& out, const Tensor& self, const Tensor& other) {
|
||||
constexpr auto kName = "quantized::t_helper1";
|
||||
static const auto op = c10::Dispatcher::singleton().findSchemaOrThrow(kName, "").typed<Tensor(Tensor)>();;
|
||||
op.call(self);
|
||||
return out;
|
||||
}
|
||||
|
||||
template <>
|
||||
Tensor _add_out<true>(Tensor& out, const Tensor& self, const Tensor& other) {
|
||||
constexpr auto kName = "quantized::t_helper2";
|
||||
static const auto op = c10::Dispatcher::singleton().findSchemaOrThrow(kName, "").typed<Tensor(Tensor)>();
|
||||
op.call(self);
|
||||
return out;
|
||||
}
|
||||
|
||||
template <bool ReLUFused = false>
|
||||
class QAdd final : public c10::OperatorKernel {
|
||||
public:
|
||||
Tensor operator()(Tensor qa, Tensor qb, double scale, int64_t zero_point) {
|
||||
std::cout << "QAdd with ReLUFused = " << ReLUFused << std::endl;
|
||||
return _add_out<ReLUFused>(qa, qa, qb); // hack
|
||||
}
|
||||
};
|
||||
|
||||
template <const char* opName, const char* callOpName>
|
||||
Tensor QHelper(Tensor qa) {
|
||||
std::cout << "Op: " << opName << std::endl;
|
||||
if (callOpName != nullptr) {
|
||||
std::cout << "Call op: " << callOpName << std::endl;
|
||||
static const auto op = c10::Dispatcher::singleton().findSchemaOrThrow(callOpName, "").typed<Tensor(Tensor)>();
|
||||
op.call(qa);
|
||||
}
|
||||
return qa;
|
||||
}
|
||||
|
||||
constexpr char helper1[] = "quantized::t_helper1";
|
||||
constexpr char helper2[] = "quantized::t_helper2";
|
||||
constexpr char helper3[] = "quantized::t_helper3";
|
||||
constexpr char helper4[] = "quantized::t_helper4";
|
||||
|
||||
static auto registry = c10::RegisterOperators()
|
||||
.op("quantized::t_add(Tensor qa, Tensor qb, float scale, int zero_point)"
|
||||
"-> Tensor qc",
|
||||
c10::RegisterOperators::options()
|
||||
.catchAllKernel<QAdd</*ReLUFused=*/false>>())
|
||||
.op("quantized::t_add_relu(Tensor qa, Tensor qb, float scale, int zero_point)"
|
||||
"-> Tensor qc",
|
||||
c10::RegisterOperators::options()
|
||||
.catchAllKernel<QAdd</*ReLUFused=*/true>>())
|
||||
.op("quantized::t_helper1(Tensor qa) -> Tensor", &QHelper<helper1, helper3>)
|
||||
.op("quantized::t_helper2(Tensor qa) -> Tensor", &QHelper<helper2, helper4>)
|
||||
.op("quantized::t_helper3(Tensor qa) -> Tensor", &QHelper<helper3, nullptr>)
|
||||
.op("quantized::t_helper4(Tensor qa) -> Tensor", &QHelper<helper4, nullptr>);
|
||||
|
||||
} // namespace
|
||||
|
||||
} // namespace at
|
@ -1,3 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <ATen/Tensor.h>
|
@ -1,117 +0,0 @@
|
||||
#include "simple_ops.h"
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include <c10/core/TensorOptions.h>
|
||||
#include <torch/library.h>
|
||||
|
||||
#include "utils.h"
|
||||
|
||||
namespace at {
|
||||
|
||||
// AA -> BB
|
||||
Tensor AA_op(const Tensor& self) {
|
||||
std::cout << "AA op" << std::endl;
|
||||
if (self.ndimension() >= 4) {
|
||||
return call_BB_op(self);
|
||||
}
|
||||
return self;
|
||||
}
|
||||
|
||||
// BB -> AA
|
||||
Tensor BB_op(const Tensor& self) {
|
||||
std::cout << "BB op" << std::endl;
|
||||
if (self.ndimension() < 4) {
|
||||
return global_helper_call_AA_op_1(self);
|
||||
}
|
||||
return self;
|
||||
}
|
||||
|
||||
// CC -> (AA -> BB)
|
||||
Tensor CC_op(const Tensor& self) {
|
||||
std::cout << "CC op" << std::endl;
|
||||
return global_helper_call_AA_op_2(self);
|
||||
}
|
||||
|
||||
// DD -> (AA -> BB) / (EE -> FF)
|
||||
Tensor DD_op(const Tensor& self) {
|
||||
std::cout << "DD op" << std::endl;
|
||||
if (self.ndimension() < 4) {
|
||||
return global_helper_call_AA_op_3(self);
|
||||
}
|
||||
return call_EE_op(self);
|
||||
}
|
||||
|
||||
// EE -> FF
|
||||
Tensor EE_op(const Tensor& self) {
|
||||
std::cout << "EE op" << std::endl;
|
||||
if (self.ndimension() >= 4) {
|
||||
return call_FF_op(self);
|
||||
}
|
||||
return self;
|
||||
}
|
||||
|
||||
// FF -> EE
|
||||
Tensor FF_op(const Tensor& self) {
|
||||
std::cout << "FF op" << std::endl;
|
||||
if (self.ndimension() < 4) {
|
||||
return call_EE_op(self);
|
||||
}
|
||||
return self;
|
||||
}
|
||||
|
||||
// GG -> FF
|
||||
Tensor GG_op(const Tensor& self) {
|
||||
return call_FF_op(self);
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
// NB: Some of these registrations (AA, EE) are not what you
|
||||
// actually expect to see in practice, but we cover them here
|
||||
// as they are technically "valid" API calls and we want to
|
||||
// make sure the analyzer catches them. (The analyzer is very
|
||||
// generic, so actually there isn't any reason it shouldn't work,
|
||||
// but it's good to test them!)
|
||||
//
|
||||
// Additionally, the code in this file is not really runnable; for
|
||||
// example we are missing schemas for all of the impl registrations
|
||||
// here. The analyzer doesn't really care, as it only really
|
||||
// cares about the name
|
||||
TORCH_LIBRARY(_test, m) {
|
||||
m.def("AA(Tensor self) -> Tensor");
|
||||
m.impl("AA", torch::CppFunction::makeFromUnboxedFunction(AA_op));
|
||||
|
||||
m.def("BB(Tensor self) -> Tensor");
|
||||
m.impl("BB", TORCH_FN(BB_op));
|
||||
|
||||
m.def("CC(Tensor self) -> Tensor", TORCH_FN(CC_op));
|
||||
m.def("DD", TORCH_FN(DD_op));
|
||||
}
|
||||
|
||||
TORCH_LIBRARY_FRAGMENT(_test, m) {
|
||||
m.def("EE(Tensor self) -> Tensor");
|
||||
m.def("FF(Tensor self) -> Tensor");
|
||||
m.def("GG(Tensor self) -> Tensor");
|
||||
m.def("HH(Tensor self) -> Tensor");
|
||||
}
|
||||
|
||||
TORCH_LIBRARY_IMPL(_test, CPU, m) {
|
||||
m.impl("EE", EE_op);
|
||||
m.impl("FF",
|
||||
torch::dispatch(DispatchKey::CPU,
|
||||
torch::CppFunction::makeFromUnboxedFunction(FF_op))
|
||||
);
|
||||
m.impl("GG",
|
||||
torch::dispatch(DispatchKey::CPU,
|
||||
TORCH_FN((GG_op)))
|
||||
);
|
||||
m.impl("HH",
|
||||
[] (Tensor a) -> Tensor {
|
||||
return a;
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
} // namespace at
|
@ -1,50 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <ATen/core/dispatch/Dispatcher.h>
|
||||
#include <ATen/Tensor.h>
|
||||
|
||||
namespace at {
|
||||
|
||||
static inline Tensor call_AA_op(const Tensor& self) {
|
||||
static const auto op = c10::Dispatcher::singleton()
|
||||
.findSchemaOrThrow("_test::AA", "")
|
||||
.typed<Tensor(const Tensor&)>();
|
||||
return op.call(self);
|
||||
}
|
||||
|
||||
static inline Tensor call_BB_op(const Tensor& self) {
|
||||
static const auto op = c10::Dispatcher::singleton()
|
||||
.findSchemaOrThrow("_test::BB", "")
|
||||
.typed<Tensor(const Tensor&)>();
|
||||
return op.call(self);
|
||||
}
|
||||
|
||||
static inline Tensor call_CC_op(const Tensor& self) {
|
||||
static const auto op = c10::Dispatcher::singleton()
|
||||
.findSchemaOrThrow("_test::CC", "")
|
||||
.typed<Tensor(const Tensor&)>();
|
||||
return op.call(self);
|
||||
}
|
||||
|
||||
static inline Tensor call_DD_op(const Tensor& self) {
|
||||
static const auto op = c10::Dispatcher::singleton()
|
||||
.findSchemaOrThrow("_test::DD", "")
|
||||
.typed<Tensor(const Tensor&)>();
|
||||
return op.call(self);
|
||||
}
|
||||
|
||||
static inline Tensor call_EE_op(const Tensor& self) {
|
||||
static const auto op = c10::Dispatcher::singleton()
|
||||
.findSchemaOrThrow("_test::EE", "")
|
||||
.typed<Tensor(const Tensor&)>();
|
||||
return op.call(self);
|
||||
}
|
||||
|
||||
static inline Tensor call_FF_op(const Tensor& self) {
|
||||
static const auto op = c10::Dispatcher::singleton()
|
||||
.findSchemaOrThrow("_test::FF", "")
|
||||
.typed<Tensor(const Tensor&)>();
|
||||
return op.call(self);
|
||||
}
|
||||
|
||||
} // namespace at
|
@ -1,48 +0,0 @@
|
||||
#include "utils.h"
|
||||
|
||||
#include <c10/core/TensorOptions.h>
|
||||
#include <ATen/core/op_registration/op_registration.h>
|
||||
|
||||
#include "simple_ops.h"
|
||||
using namespace at;
|
||||
|
||||
Tensor global_helper_call_AA_op_1(const Tensor& self) {
|
||||
auto lambda = [&]() {
|
||||
return call_AA_op(self);
|
||||
};
|
||||
return lambda();
|
||||
}
|
||||
|
||||
static std::function<Tensor()> helper(const Tensor& self) {
|
||||
return [&]() {
|
||||
return call_AA_op(self);
|
||||
};
|
||||
}
|
||||
|
||||
Tensor global_helper_call_AA_op_2(const Tensor& self) {
|
||||
return helper(self)();
|
||||
}
|
||||
|
||||
Tensor global_helper_call_AA_op_3(const Tensor& self) {
|
||||
auto lambda = [&]() {
|
||||
static const auto op = c10::Dispatcher::singleton()
|
||||
.findSchemaOrThrow("_test::AA", "")
|
||||
.typed<Tensor (const Tensor&)>();
|
||||
return op.call(self);
|
||||
};
|
||||
return lambda();
|
||||
}
|
||||
|
||||
namespace torch {
|
||||
namespace jit {
|
||||
|
||||
C10_EXPORT Tensor API_Function(const Tensor& self) {
|
||||
return call_AA_op(self);
|
||||
}
|
||||
|
||||
at::Tensor API_Class::API_Method(const at::Tensor& self) {
|
||||
return call_BB_op(self);
|
||||
}
|
||||
|
||||
} // namespace jit
|
||||
} // namespace torch
|
@ -1,18 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <ATen/Tensor.h>
|
||||
|
||||
at::Tensor global_helper_call_AA_op_1(const at::Tensor& self);
|
||||
at::Tensor global_helper_call_AA_op_2(const at::Tensor& self);
|
||||
at::Tensor global_helper_call_AA_op_3(const at::Tensor& self);
|
||||
|
||||
namespace torch {
|
||||
namespace jit {
|
||||
|
||||
class C10_EXPORT API_Class {
|
||||
public:
|
||||
at::Tensor API_Method(const at::Tensor& self);
|
||||
};
|
||||
|
||||
} // namespace jit
|
||||
} // namespace torch
|
@ -1,22 +0,0 @@
|
||||
cmake_minimum_required(VERSION 3.1)
|
||||
|
||||
project(code_analyzer)
|
||||
set(CMAKE_CXX_STANDARD 14)
|
||||
|
||||
find_package(LLVM REQUIRED CONFIG)
|
||||
add_definitions(${LLVM_DEFINITIONS})
|
||||
include_directories(${LLVM_INCLUDE_DIRS})
|
||||
link_directories(${LLVM_LIBRARY_DIRS})
|
||||
|
||||
# Main executable
|
||||
add_executable(analyzer
|
||||
analyzer.cpp
|
||||
op_deps_pass.cpp
|
||||
)
|
||||
|
||||
set_target_properties(analyzer PROPERTIES
|
||||
COMPILE_FLAGS "-fno-rtti -O3")
|
||||
|
||||
llvm_map_components_to_libnames(llvm_libs core irreader support)
|
||||
|
||||
target_link_libraries(analyzer ${llvm_libs})
|
@ -1,32 +0,0 @@
|
||||
#include "llvm/IR/LLVMContext.h"
|
||||
#include "llvm/IR/Module.h"
|
||||
#include "llvm/IRReader/IRReader.h"
|
||||
#include "llvm/Pass.h"
|
||||
#include "llvm/PassRegistry.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/SourceMgr.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
namespace {
|
||||
|
||||
cl::opt<std::string> InputFilename(
|
||||
cl::Positional,
|
||||
cl::desc("<input bitcode file>"),
|
||||
cl::init("-"),
|
||||
cl::value_desc("filename"));
|
||||
|
||||
} // namespace
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
LLVMContext Context;
|
||||
cl::ParseCommandLineOptions(argc, argv);
|
||||
SMDiagnostic Err;
|
||||
std::unique_ptr<Module> M = parseIRFile(InputFilename, Err, Context);
|
||||
|
||||
auto opDependencyPass = PassRegistry::getPassRegistry()
|
||||
->getPassInfo(StringRef("op_dependency"))
|
||||
->createPass();
|
||||
static_cast<ModulePass*>(opDependencyPass)->runOnModule(*M);
|
||||
return 0;
|
||||
}
|
@ -1,148 +0,0 @@
|
||||
#!/bin/bash
|
||||
##############################################################################
|
||||
# Build LLVM code analyzer and analyze torch code dependency.
|
||||
##############################################################################
|
||||
#
|
||||
# Example usage:
|
||||
#
|
||||
# 1. Analyze torch and generate yaml file of op dependency transitive closure:
|
||||
# LLVM_DIR=${HOME}/src/llvm8/build/install \
|
||||
# ANALYZE_TORCH=1 tools/code_analyzer/build.sh
|
||||
#
|
||||
# 2. Analyze test project and compare with expected result:
|
||||
# LLVM_DIR=${HOME}/src/llvm8/build/install \
|
||||
# ANALYZE_TEST=1 tools/code_analyzer/build.sh
|
||||
#
|
||||
# 3. Analyze torch and generate yaml file of op dependency with debug path:
|
||||
# LLVM_DIR=${HOME}/src/llvm8/build/install \
|
||||
# ANALYZE_TORCH=1 tools/code_analyzer/build.sh -debug_path=true
|
||||
#
|
||||
# If you're a Facebook employee, chances are you're running on CentOS 8.
|
||||
# If that's the case, you can install all the dependencies you need with:
|
||||
#
|
||||
# sudo dnf install llvm-devel llvm-static clang ncurses-devel
|
||||
#
|
||||
# and then set LLVM_DIR=/usr
|
||||
|
||||
set -ex
|
||||
|
||||
SRC_ROOT="$( cd "$(dirname "$0")"/../.. ; pwd -P)"
|
||||
ANALYZER_SRC_HOME="${SRC_ROOT}/tools/code_analyzer"
|
||||
|
||||
# Clang/LLVM path
|
||||
export LLVM_DIR="${LLVM_DIR:-/usr/lib/llvm-8}"
|
||||
export CC="${LLVM_DIR}/bin/clang"
|
||||
export CXX="${LLVM_DIR}/bin/clang++"
|
||||
EXTRA_ANALYZER_FLAGS=$@
|
||||
|
||||
BUILD_ROOT="${BUILD_ROOT:-${SRC_ROOT}/build_code_analyzer}"
|
||||
WORK_DIR="${BUILD_ROOT}/work"
|
||||
|
||||
rm -rf "${BUILD_ROOT}"
|
||||
mkdir -p "${BUILD_ROOT}"
|
||||
mkdir -p "${WORK_DIR}"
|
||||
cd "${BUILD_ROOT}"
|
||||
|
||||
build_analyzer() {
|
||||
cmake "${ANALYZER_SRC_HOME}" -DCMAKE_BUILD_TYPE=Release
|
||||
|
||||
if [ -z "${MAX_JOBS}" ]; then
|
||||
if [ "$(uname)" == 'Darwin' ]; then
|
||||
MAX_JOBS=$(sysctl -n hw.ncpu)
|
||||
else
|
||||
MAX_JOBS=$(nproc)
|
||||
fi
|
||||
fi
|
||||
|
||||
make "-j${MAX_JOBS}"
|
||||
}
|
||||
|
||||
build_torch_mobile() {
|
||||
TORCH_BUILD_ROOT="${BUILD_ROOT}/build_mobile"
|
||||
TORCH_INSTALL_PREFIX="${TORCH_BUILD_ROOT}/install"
|
||||
|
||||
BUILD_ROOT="${TORCH_BUILD_ROOT}" "${SRC_ROOT}/scripts/build_mobile.sh" \
|
||||
-DCMAKE_CXX_FLAGS="-S -emit-llvm -DSTRIP_ERROR_MESSAGES" \
|
||||
${MOBILE_BUILD_FLAGS}
|
||||
}
|
||||
|
||||
build_test_project() {
|
||||
TEST_SRC_ROOT="${SRC_ROOT}/test/mobile/op_deps"
|
||||
TEST_BUILD_ROOT="${BUILD_ROOT}/build_test"
|
||||
TEST_INSTALL_PREFIX="${TEST_BUILD_ROOT}/install"
|
||||
|
||||
BUILD_ROOT="${TEST_BUILD_ROOT}" \
|
||||
TORCH_INSTALL_PREFIX="${TORCH_INSTALL_PREFIX}" \
|
||||
"${TEST_SRC_ROOT}/build.sh" \
|
||||
-DCMAKE_CXX_FLAGS="-S -emit-llvm -DSTRIP_ERROR_MESSAGES"
|
||||
}
|
||||
|
||||
call_analyzer() {
|
||||
ANALYZER_BIN="${BUILD_ROOT}/analyzer" \
|
||||
INPUT="${INPUT}" OUTPUT="${OUTPUT}" \
|
||||
EXTRA_ANALYZER_FLAGS="${EXTRA_ANALYZER_FLAGS}" \
|
||||
"${ANALYZER_SRC_HOME}/run_analyzer.sh"
|
||||
}
|
||||
|
||||
analyze_torch_mobile() {
|
||||
INPUT="${WORK_DIR}/torch.ll"
|
||||
OUTPUT="${WORK_DIR}/torch_result.yaml"
|
||||
|
||||
if [ ! -f "${INPUT}" ]; then
|
||||
# Link libtorch into a single module
|
||||
# TODO: invoke llvm-link from cmake directly to avoid this hack.
|
||||
# TODO: include *.c.o when there is meaningful fan-out from pure-c code.
|
||||
"${LLVM_DIR}/bin/llvm-link" -S \
|
||||
$(find "${TORCH_BUILD_ROOT}" -name '*.cpp.o' -o -name '*.cc.o') \
|
||||
-o "${INPUT}"
|
||||
fi
|
||||
|
||||
# Analyze dependency
|
||||
call_analyzer
|
||||
}
|
||||
|
||||
print_output_file_path() {
|
||||
echo "Deployed file at: ${OUTPUT}"
|
||||
}
|
||||
|
||||
analyze_test_project() {
|
||||
INPUT="${WORK_DIR}/test.ll"
|
||||
OUTPUT="${WORK_DIR}/test_result.yaml"
|
||||
|
||||
# Link into a single module (only need c10 and OpLib srcs)
|
||||
# TODO: invoke llvm-link from cmake directly to avoid this hack.
|
||||
"${LLVM_DIR}/bin/llvm-link" -S \
|
||||
$(find "${TORCH_BUILD_ROOT}" -path '*/c10*' \( -name '*.cpp.o' -o -name '*.cc.o' \)) \
|
||||
$(find "${TEST_BUILD_ROOT}" -path '*/OpLib*' \( -name '*.cpp.o' -o -name '*.cc.o' \)) \
|
||||
-o "${INPUT}"
|
||||
|
||||
# Analyze dependency
|
||||
call_analyzer
|
||||
}
|
||||
|
||||
check_test_result() {
|
||||
if cmp -s "${OUTPUT}" "${TEST_SRC_ROOT}/expected_deps.yaml"; then
|
||||
echo "Test result is the same as expected."
|
||||
else
|
||||
echo "Test result is DIFFERENT from expected!"
|
||||
diff -u "${TEST_SRC_ROOT}/expected_deps.yaml" "${OUTPUT}"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
build_analyzer
|
||||
|
||||
if [ -n "${ANALYZE_TORCH}" ]; then
|
||||
build_torch_mobile
|
||||
analyze_torch_mobile
|
||||
if [ -n "${DEPLOY}" ]; then
|
||||
print_output_file_path
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ -n "${ANALYZE_TEST}" ]; then
|
||||
build_torch_mobile
|
||||
build_test_project
|
||||
analyze_test_project
|
||||
check_test_result
|
||||
fi
|
@ -1,894 +0,0 @@
|
||||
// This LLVM pass takes LLVM bitcode / assembly as input and generates
|
||||
// dependency graph among aten ops. From a set of root ops used by a model, we
|
||||
// can calculate transitive closure of all dependent ops, then we can produce a
|
||||
// custom LibTorch library with optimal build size which only registers and
|
||||
// contains ops needed by the specific model - unregistered / unused ops can be
|
||||
// stripped at link time.
|
||||
//
|
||||
// [Approach]
|
||||
// To generate the dependency graph it searches for 3 types of connections in
|
||||
// LLVM bitcode / assembly:
|
||||
// 1) op registration: op name (schema string literal) -> registered function;
|
||||
// 2) regular function call: function -> function;
|
||||
// 3) op invocation: function -> op name (schema string literal)
|
||||
//
|
||||
// For #2 it uses similar algorithm as llvm::LazyCallGraph - not only looks into
|
||||
// call/invoke instructions but also recursively searches for function pointers
|
||||
// in each instruction's operands.
|
||||
//
|
||||
// For #1 and #3 it searches for connections between operator name string
|
||||
// literals / function pointers and c10 op registration/invocation API calls in
|
||||
// LLVM IR graph via "use" edges (bi-directional):
|
||||
// 1. llvm::Value has "users()" method to get other llvm::Value nodes that use
|
||||
// the value;
|
||||
// 2. most of types derive from llvm::User which has "operands()" method to get
|
||||
// other llvm::Value nodes being used by the value;
|
||||
//
|
||||
// [Limitation]
|
||||
// For now the search doesn't go beyond the function boundary because the
|
||||
// reference to op name string literals and c10 op registration/invocation
|
||||
// APIs are almost always in the same function. If we create helper function
|
||||
// around c10 API, we could simply add them to the regular expression used to identify c10 API.
|
||||
//
|
||||
// [Example]
|
||||
// In the following example, it finds out:
|
||||
// 1) the registered function for "quantized:add" operator;
|
||||
// 2) one possible call path to at::empty() function;
|
||||
// 3) the called operator name "aten::empty":
|
||||
//
|
||||
// - quantized::add
|
||||
// - c10::detail::wrap_kernel_functor_unboxed_<at::native::(anonymous
|
||||
// namespace)::QAdd<false>, at::Tensor (at::Tensor, at::Tensor, double,
|
||||
// long)>::call(c10::OperatorKernel*, at::Tensor, at::Tensor, double, long)
|
||||
// - at::native::(anonymous namespace)::QAdd<false>::operator()(at::Tensor,
|
||||
// at::Tensor, double, long)
|
||||
// - void at::native::DispatchStub<void (*)(at::Tensor&, at::Tensor const&,
|
||||
// at::Tensor const&), at::native::qadd_stub>::operator()<at::Tensor&,
|
||||
// at::Tensor const&, at::Tensor const&>(c10::DeviceType, at::Tensor&,
|
||||
// at::Tensor const&, at::Tensor const&)
|
||||
// - at::native::DispatchStub<void (*)(at::Tensor&, at::Tensor const&,
|
||||
// at::Tensor const&), at::native::qadd_stub>::choose_cpu_impl()
|
||||
// - void at::native::(anonymous namespace)::qadd_kernel<false>(at::Tensor&,
|
||||
// at::Tensor const&, at::Tensor const&)
|
||||
// - at::TensorIterator::binary_op(at::Tensor&, at::Tensor const&, at::Tensor
|
||||
// const&, bool)
|
||||
// - at::TensorIterator::build()
|
||||
// - at::TensorIterator::fast_set_up()
|
||||
// - at::empty(c10::ArrayRef<long>, c10::TensorOptions const&,
|
||||
// c10::optional<c10::MemoryFormat>)
|
||||
// - aten::empty
|
||||
|
||||
#include <deque>
|
||||
#include <iostream>
|
||||
#include <set>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
|
||||
#include "llvm/Demangle/Demangle.h"
|
||||
#include "llvm/Analysis/LazyCallGraph.h"
|
||||
#if LLVM_VERSION_MAJOR < 8
|
||||
#include "llvm/IR/CallSite.h"
|
||||
#endif
|
||||
#include "llvm/IR/Constant.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/IR/Instruction.h"
|
||||
#include "llvm/IR/Instructions.h"
|
||||
#include "llvm/IR/Intrinsics.h"
|
||||
#include "llvm/IR/Module.h"
|
||||
#include "llvm/Pass.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/Regex.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
namespace {
|
||||
|
||||
struct RegexOpt {
|
||||
std::shared_ptr<Regex> pattern;
|
||||
|
||||
void operator=(const std::string& val) {
|
||||
if (val.empty()) {
|
||||
return;
|
||||
}
|
||||
pattern = std::make_shared<Regex>(val);
|
||||
std::string regexError;
|
||||
if (!pattern->isValid(regexError)) {
|
||||
report_fatal_error(
|
||||
"Invalid regular expression param: '" + val + "' err: " + regexError,
|
||||
false);
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
class RegexOptParser : public cl::basic_parser<RegexOpt> {
|
||||
public:
|
||||
RegexOptParser(cl::Option& O) : basic_parser(O) {}
|
||||
virtual ~RegexOptParser() = default;
|
||||
|
||||
// parse - Return true on error.
|
||||
bool parse(cl::Option&, StringRef, StringRef Arg, RegexOpt& Value) {
|
||||
Value = Arg.str();
|
||||
return false;
|
||||
}
|
||||
StringRef getValueName() const override {
|
||||
return "RegexOpt";
|
||||
}
|
||||
};
|
||||
|
||||
RegexOpt FunctionSchemaPatternLoc;
|
||||
cl::opt<RegexOpt, true, cl::parser<std::string>> FunctionSchemaPattern(
|
||||
"op_schema_pattern",
|
||||
cl::desc("Regular expression used to identify aten op schema strings. "
|
||||
"Example: -op_schema_pattern '^(aten|quantized)::[^ ]+'"),
|
||||
cl::location(FunctionSchemaPatternLoc),
|
||||
cl::Required,
|
||||
cl::ValueRequired);
|
||||
|
||||
RegexOpt OpRegistrationPatternLoc;
|
||||
cl::opt<RegexOpt, true, cl::parser<std::string>> OpRegistrationPattern(
|
||||
"op_register_pattern",
|
||||
cl::desc("Regular expression used to identify c10 op registration API. "
|
||||
"Example: -op_register_pattern 'c10::RegisterOperators::op'"),
|
||||
cl::location(OpRegistrationPatternLoc),
|
||||
cl::Required,
|
||||
cl::ValueRequired);
|
||||
|
||||
RegexOpt OpInvocationPatternLoc;
|
||||
cl::opt<RegexOpt, true, cl::parser<std::string>> OpInvocationPattern(
|
||||
"op_invoke_pattern",
|
||||
cl::desc("Regular expression used to identify c10 op invocation API. "
|
||||
"Example: -op_invoke_pattern 'c10::Dispatcher::findSchema'"),
|
||||
cl::location(OpInvocationPatternLoc),
|
||||
cl::Required,
|
||||
cl::ValueRequired);
|
||||
|
||||
// The `root_symbol_pattern` is used to specify the seeds of C++ symbols
|
||||
// from which it searches for transitively reachable ops which need to be
|
||||
// kept for these C++ APIs to be able to run.
|
||||
//
|
||||
// Why not dump ops that are reachable from any visible C++ symbols? Why
|
||||
// limit it to a subset of root symbols?
|
||||
// Because op registration callsites in static initializer are visible root
|
||||
// symbols, too. It will dump ALL the registered ops without any filtering.
|
||||
//
|
||||
// Can we use some fixed entry point like `main()`?
|
||||
// The target to be analyzed can be DSO that doesn't have a `main()`. And
|
||||
// sometimes we want to get ops that could (but not yet) be called.
|
||||
//
|
||||
// This temporary flag will be deprecated by better alternatives in the future.
|
||||
RegexOpt RootSymbolPatternLoc;
|
||||
cl::opt<RegexOpt, true, cl::parser<std::string>> RootSymbolPattern(
|
||||
"root_symbol_pattern",
|
||||
cl::desc("Regular expression used to identify root symbols. It will insert "
|
||||
"an entry to the output graph with key = `__ROOT__` and value = "
|
||||
"set of ops reachable from root symbols, if the pattern is set. "
|
||||
"Example: -root_symbol_pattern 'torch::jit'"),
|
||||
cl::location(RootSymbolPatternLoc));
|
||||
|
||||
cl::list<RegexOpt, bool, RegexOptParser> TorchLibraryInitPattern(
|
||||
"torch_library_init_pattern",
|
||||
cl::desc("Regular expression used to identify TorchLibraryInit symbols "
|
||||
"that are generated by `TORCH_LIBRARY` macro. The first capturing "
|
||||
"group is used to extract namespace string. "
|
||||
"Example: -torch_library_init_pattern "
|
||||
"'^.*TORCH_LIBRARY_init_([^(]+)(\\(.*)?$'"),
|
||||
cl::ZeroOrMore);
|
||||
|
||||
cl::opt<int> Verbose(
|
||||
"v",
|
||||
cl::desc("Verbose level"),
|
||||
cl::Hidden,
|
||||
cl::init(0));
|
||||
|
||||
cl::opt<bool> DebugPath(
|
||||
"debug_path",
|
||||
cl::desc("Output path between two nodes."),
|
||||
cl::init(false));
|
||||
|
||||
using SET = std::set<std::string>;
|
||||
using GRAPH = std::unordered_map<std::string, std::set<std::string>>;
|
||||
using VALUE_MAP = std::unordered_map<Value*, Value*>;
|
||||
using VALUE_SET = std::unordered_set<Value*>;
|
||||
|
||||
// SRC -> Inverse "tree" from all reachable destinations back to SRC, e.g.:
|
||||
// (DEST-1 -> PREV_11, PREV_11 -> PREV_12, ..., PREV_1n -> SRC)
|
||||
// (DEST-2 -> PREV_21, PREV_21 -> PREV_22, ..., PREV_2n -> SRC)
|
||||
using PATH = std::unordered_map<std::string,
|
||||
std::unordered_map<std::string, std::string>>;
|
||||
|
||||
inline std::string _name(const Value* V) {
|
||||
return V->getName().str();
|
||||
}
|
||||
|
||||
// Referenced the logic in llvm-cxxfilt.cpp.
|
||||
// Starting from LLVM 9 it provides a `demangle()` API. Here we keep our ad-hoc
|
||||
// version for backward compatibility.
|
||||
std::string _demangle(const std::string& mangled) {
|
||||
int status;
|
||||
const char* decorated = mangled.c_str();
|
||||
size_t decoratedLength = mangled.length();
|
||||
|
||||
char *undecorated = itaniumDemangle(decorated, nullptr, nullptr, &status);
|
||||
|
||||
if (!undecorated &&
|
||||
(decoratedLength > 6 && strncmp(decorated, "__imp_", 6) == 0)) {
|
||||
undecorated = itaniumDemangle(decorated + 6, nullptr, nullptr, &status);
|
||||
}
|
||||
std::string result(undecorated ? undecorated : mangled);
|
||||
free(undecorated);
|
||||
return result;
|
||||
}
|
||||
|
||||
inline bool _isCallSite(Value* V) {
|
||||
#if LLVM_VERSION_MAJOR >= 8
|
||||
return isa<CallBase>(V);
|
||||
#else
|
||||
return !!CallSite(V);
|
||||
#endif
|
||||
}
|
||||
|
||||
inline Function* _getCalledFunction(Value* V) {
|
||||
#if LLVM_VERSION_MAJOR >= 8
|
||||
return dyn_cast<CallBase>(V)->getCalledFunction();
|
||||
#else
|
||||
return CallSite(V).getCalledFunction();
|
||||
#endif
|
||||
}
|
||||
|
||||
// LLVM_DEBUG needs opt to be built with debug support.
|
||||
template<
|
||||
typename T,
|
||||
typename std::enable_if<std::is_base_of<Value, T>::value, int>::type = 0>
|
||||
std::ostream& operator<<(std::ostream& out, T& I) {
|
||||
std::string str;
|
||||
raw_string_ostream O(str);
|
||||
O << I;
|
||||
return out << str;
|
||||
}
|
||||
|
||||
class OpDependency : public ModulePass {
|
||||
public:
|
||||
static char ID; // Pass identification, replacement for typeid
|
||||
|
||||
OpDependency() : ModulePass(ID) {}
|
||||
~OpDependency() = default;
|
||||
|
||||
bool runOnModule(Module& M) override {
|
||||
// Scan all functions and instructions to construct function -> function
|
||||
// dependency graph and to find out:
|
||||
// - visible functions matching `root_symbol_pattern` option;
|
||||
// - instructions that might register or invoke operators, respectively.
|
||||
GRAPH deps;
|
||||
VALUE_SET visibleFuncs, opRegistrationInsts, opInvocationInsts;
|
||||
scanAllFunctions(
|
||||
M, &deps, &visibleFuncs, &opRegistrationInsts, &opInvocationInsts);
|
||||
|
||||
// "Key nodes" are nodes we want to keep in output graph. They are usually
|
||||
// op-schema strings.
|
||||
SET keyNodes;
|
||||
|
||||
// Insert a dummy root node with links to function nodes matching the
|
||||
// "root symbol" regex pattern and with default visibility. The goal is to
|
||||
// find aten ops that are possibly called via torch C++ APIs.
|
||||
insertRoot(visibleFuncs, &deps, &keyNodes);
|
||||
|
||||
// Scan op registration/invocation API calls to construct the link between
|
||||
// op name (a.k.a op schema string) and related functions.
|
||||
// Dump the op-schema -> function and function -> op-schema mappings into
|
||||
// the same `deps` graph with function -> function mappings as they will
|
||||
// be processed together next.
|
||||
scanOpRegistration(opRegistrationInsts, &keyNodes, &deps);
|
||||
scanOpInvocation(opInvocationInsts, &keyNodes, &deps);
|
||||
|
||||
// Shrink the graph by removing intermediate nodes (functions) while
|
||||
// maintaining transitive dependency between operators (schema strings).
|
||||
GRAPH result;
|
||||
std::shared_ptr<PATH> path = DebugPath ? std::make_shared<PATH>() : nullptr;
|
||||
simplifyGraph(deps, keyNodes, &result, path.get());
|
||||
|
||||
printAsYAML(std::cout, keyNodes, result, path.get());
|
||||
return false;
|
||||
}
|
||||
|
||||
private:
|
||||
static void insertRoot(
|
||||
const VALUE_SET& visibleFuncs, GRAPH* deps, SET* keyNodes) {
|
||||
if (!RootSymbolPatternLoc.pattern) {
|
||||
return;
|
||||
}
|
||||
SET roots;
|
||||
for (const auto& F : visibleFuncs) {
|
||||
std::string name = _name(F);
|
||||
auto demangled = _demangle(name);
|
||||
if (RootSymbolPatternLoc.pattern->match(demangled)) {
|
||||
roots.insert(name);
|
||||
if (Verbose) {
|
||||
std::cerr << "[DEBUG][ROOT_FUNC] " << demangled << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static const std::string ROOT_NODE{"__ROOT__"};
|
||||
deps->emplace(ROOT_NODE, std::move(roots));
|
||||
keyNodes->insert(ROOT_NODE);
|
||||
}
|
||||
|
||||
// Scan the entire IR graph to construct function -> function dependency graph
|
||||
// as well as instructions that might register or invoke operators.
|
||||
static void scanAllFunctions(
|
||||
Module& M, GRAPH* deps, VALUE_SET* visibleFuncs,
|
||||
VALUE_SET* opRegistrationInsts, VALUE_SET* opInvocationInsts) {
|
||||
for (Function& F : M) {
|
||||
if (F.hasDefaultVisibility()) {
|
||||
visibleFuncs->insert(&F);
|
||||
}
|
||||
std::string caller = _name(&F);
|
||||
std::string callerDemangled = _demangle(caller);
|
||||
for (BasicBlock& BB : F) {
|
||||
for (Instruction& I : BB) {
|
||||
scanReferredFunctions(I, [&](Function* func) -> void {
|
||||
std::string callee = _name(func);
|
||||
std::string calleeDemangled = _demangle(callee);
|
||||
(*deps)[caller].insert(callee);
|
||||
if (Verbose > 1) {
|
||||
std::cerr << "[DEBUG][FUNC_CALL] " << callerDemangled << " => "
|
||||
<< calleeDemangled << std::endl;
|
||||
}
|
||||
// One registration/invocation API might call another registration/
|
||||
// invocation API in which case we can skip processing the nested
|
||||
// call. This is a simple trick to avoid "cannot find registered/
|
||||
// invoked op" warning and doesn't affect correctness, because
|
||||
// later in scanOpRegistration we'll walk the transitively reachable
|
||||
// IR graph again from each registration instance.
|
||||
if (!OpRegistrationPatternLoc.pattern->match(callerDemangled) &&
|
||||
OpRegistrationPatternLoc.pattern->match(calleeDemangled)) {
|
||||
(*opRegistrationInsts).insert(&I);
|
||||
}
|
||||
if (!OpInvocationPatternLoc.pattern->match(callerDemangled) &&
|
||||
OpInvocationPatternLoc.pattern->match(calleeDemangled)) {
|
||||
(*opInvocationInsts).insert(&I);
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// llvm::CallGraph only searches for functions referenced by "CallSites" (i.e.
|
||||
// by call/invoke instructions). However functions can be referenced by
|
||||
// non-call/invoke instructions as well (being passed as function pointer),
|
||||
// e.g.:
|
||||
// ```
|
||||
// store i64 ptrtoint (void (%"class.at::Tensor"*, %"class.at::Tensor"*)*
|
||||
// @at::foo_op(at::Tensor const&) to i64), i64* %14, ...
|
||||
// ```
|
||||
// "@at::foo_op" is a operand of "ptrtoint", which in turn is a constant
|
||||
// operand of "store" instruction. The stored function pointer can be called
|
||||
// indirectly later on.
|
||||
//
|
||||
// Sometimes directly called functions can be in ConstExpr as well, e.g.:
|
||||
// ```
|
||||
// invoke void bitcast (
|
||||
// void (ty1*, ...)* @c10::Dispatcher::findSchema(...) to
|
||||
// void (ty2*, ...)*)(...)
|
||||
// ```
|
||||
// In above case, "CallSite(I).getCalledFunction()" won't return "findSchema"
|
||||
// as it's nested in "bitcast" instruction.
|
||||
//
|
||||
// To cover these cases this method recursively traverses all operands of the
|
||||
// input instruction "I" to search for directly/indirectly referenced function
|
||||
// pointers by the instruction. The referenced functions might NOT actually be
|
||||
// called (which is fine for our use case). llvm::LazyCallGraph has similar
|
||||
// logic and we reuse its "visitReferences" method to traverse all operands.
|
||||
static void scanReferredFunctions(
|
||||
Instruction& I, const std::function<void(Function*)>& CB) {
|
||||
SmallVector<Constant*, 16> worklist;
|
||||
SmallPtrSet<Constant*, 16> visited;
|
||||
|
||||
if (_isCallSite(&I)) {
|
||||
Function* callee = _getCalledFunction(&I);
|
||||
if (callee && !callee->isIntrinsic() && visited.insert(callee).second) {
|
||||
CB(callee);
|
||||
}
|
||||
}
|
||||
|
||||
for (Value* op : I.operand_values()) {
|
||||
Constant* C = dyn_cast<Constant>(op);
|
||||
if (C && visited.insert(C).second) {
|
||||
worklist.push_back(C);
|
||||
}
|
||||
}
|
||||
|
||||
LazyCallGraph::visitReferences(worklist, visited, [&](Function& F) {
|
||||
if (!F.isIntrinsic()) {
|
||||
CB(&F);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// Naive connectivity analysis to find out all nodes that are reachable from a
|
||||
// specific node in IR graph by following each node's "use" edges (link to its
|
||||
// operands and users).
|
||||
// This is the core algorithm we use to find the connection between op name
|
||||
// string literals and registered/invoked functions - there should be a path
|
||||
// to connect them to the c10 op registration/invocation APIs.
|
||||
// For now the search doesn't go beyond the function boundary because the
|
||||
// reference to op name string literals and c10 op registration/invocation
|
||||
// APIs are almost always in the same function.
|
||||
static void scanConnectedNodes(
|
||||
Value* src,
|
||||
VALUE_SET blocked,
|
||||
const std::function<void(Value*)>& CB, VALUE_MAP* debugPath) {
|
||||
std::deque<Value*> worklist;
|
||||
SmallPtrSet<Value*, 16> visited;
|
||||
|
||||
auto insert = [&](Value* cur, Value* parent) -> void {
|
||||
if (!blocked.count(cur) && visited.insert(cur).second) {
|
||||
worklist.push_back(cur);
|
||||
if (debugPath) {
|
||||
(*debugPath).emplace(cur, parent);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
auto expandOperands = [&](Value* V) -> void {
|
||||
// Stops if it doesn't have operands (!isa<User>) or it is a function.
|
||||
if (!isa<User>(V) || isa<Function>(V)) {
|
||||
return;
|
||||
}
|
||||
auto node = dyn_cast<User>(V);
|
||||
for (auto& O : node->operands()) {
|
||||
insert(O, node);
|
||||
}
|
||||
};
|
||||
|
||||
auto blockSiblingOperands = [&](User* U, Value* V) -> void {
|
||||
// This is to handle a special case only appears in LLVM 9 (not in 5 - 8
|
||||
// and 10), where it can falsely associate unrelated PyTorch op
|
||||
// registrations.
|
||||
//
|
||||
// If the value `V` is used by a PHI-node `U`, then we should stop
|
||||
// crawling `U`'s operands, i.e. `V`'s siblings in `U`. E.g.:
|
||||
//
|
||||
// 114: ; preds = %111, %109
|
||||
// %115 = phi i32 [ %110, %109 ], [ %112, %111 ]
|
||||
//
|
||||
// `%115` might take the value of `%110` or `%112`, depending on from
|
||||
// which label it comes. Assuming `V` is `%110` and `U` is `%115`, we can
|
||||
// continue to scan `%115` but should not crawl `%112` as it does not
|
||||
// directly pass data from `%110` to `%112` (and vice versa).
|
||||
//
|
||||
// NB: we probably should do the same for other LLVM instructions with
|
||||
// this kind of selective semantics. But for the purpose of analyzing
|
||||
// PyTorch registrations it seems to be sufficent for now.
|
||||
if (isa<PHINode>(U)) {
|
||||
for (auto& S : U->operands()) {
|
||||
blocked.insert(S);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
auto expandUsers = [&](Value* V) -> void {
|
||||
// If the value is not constant, then the user of the value might pass
|
||||
// other value into it, e.g.:
|
||||
// store @.str.15, %10
|
||||
// invoke @c10.reg_op, %10, @foo
|
||||
// The store instruction, which is the user of "%10", passes "@.str.15" to
|
||||
// "%10" which in turn is passed to "@c10.reg_op" API function.
|
||||
// Users of constants are not interesting as they cannot change the state
|
||||
// of the constant. We skip users of functions as well assuming
|
||||
// interesting values (op names and function pointers) are not set via
|
||||
// other invocations of the function.
|
||||
if (!isa<User>(V) || isa<Constant>(V) || isa<Function>(V)) {
|
||||
return;
|
||||
}
|
||||
for (auto U : V->users()) {
|
||||
insert(U, V);
|
||||
blockSiblingOperands(U, V);
|
||||
}
|
||||
};
|
||||
|
||||
auto expand = [&](Value* V) -> void {
|
||||
expandOperands(V);
|
||||
expandUsers(V);
|
||||
};
|
||||
|
||||
expand(src);
|
||||
while (!worklist.empty()) {
|
||||
auto cur = worklist.front();
|
||||
worklist.pop_front();
|
||||
expand(cur);
|
||||
|
||||
if (isa<Function>(cur) || isa<Constant>(cur)) {
|
||||
CB(cur);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate transitive closure and remove intermediate (non-key) nodes.
|
||||
// Note that there are two type of nodes in the dependency graph:
|
||||
// 1) String literals in source files, e.g.:
|
||||
// "aten::cos_(Tensor(a!) self) -> Tensor(a!)", which represents operator
|
||||
// "schema";
|
||||
// 2) Function symbols in object files, e.g.:
|
||||
// "at::CPUType::(anonymous namespace)::cos_(at::Tensor&)";
|
||||
// Both of them are added to the dependency graph as std::string. Ultimately
|
||||
// we only care about #1 as that's what we use to prune registered ops via
|
||||
// codegen, then #2 will be stripped by linker automatically. So the goal is
|
||||
// to remove #2 from the graph while maintaining the transitive dependency
|
||||
// between #1. #1 is called "key nodes" in this method.
|
||||
static void simplifyGraph(
|
||||
const GRAPH& input, SET& keyNodes, GRAPH* output, PATH* path) {
|
||||
// Starting from every key node, use BFS to traverse all nodes that are
|
||||
// transitively reachable from the node in the sparse graph.
|
||||
for (auto& key : keyNodes) {
|
||||
std::deque<std::string> queue;
|
||||
SET visited; // has some runtime issue with std::unordered_set
|
||||
auto expand = [&](const std::string& curNode) -> void {
|
||||
auto it = input.find(curNode);
|
||||
if (it == input.end()) {
|
||||
return;
|
||||
}
|
||||
for (const auto& next : it->second) {
|
||||
if (!visited.insert(next).second) {
|
||||
continue;
|
||||
}
|
||||
queue.push_back(next);
|
||||
if (path) {
|
||||
(*path)[key].emplace(next, curNode);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
expand(key);
|
||||
while (!queue.empty()) {
|
||||
auto curNode = queue.front();
|
||||
queue.pop_front();
|
||||
if (keyNodes.count(curNode)) {
|
||||
// Output links between key nodes.
|
||||
(*output)[key].insert(curNode);
|
||||
// Stop expanding key nodes.
|
||||
continue;
|
||||
}
|
||||
expand(curNode);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Find out operator names and function pointers that are transitively
|
||||
// connected to the same 'src' instruction.
|
||||
static void scanOpSchemaStrAndFunction(
|
||||
Instruction* src, const VALUE_SET& blocked,
|
||||
const std::string& contextualNamespace,
|
||||
SET* visitedOps, SET* visitedFunctions) {
|
||||
std::shared_ptr<VALUE_MAP> debugPath =
|
||||
(Verbose > 2 ? std::make_shared<VALUE_MAP>() : nullptr);
|
||||
auto callback = [&](Value* V) -> void {
|
||||
if (auto schemaStr = extractOpSchema(contextualNamespace, V)) {
|
||||
if (visitedOps) {
|
||||
// NB: Some debug string constants might be connected to the
|
||||
// registration instruction, e.g.: "Lambda". Since we have factored
|
||||
// out namespace from op schema string, there is no longer a simple
|
||||
// way to identify these fake ops. For now we simply take the first
|
||||
// instance as the real op name is closest to the seed instruction
|
||||
// in BFS order.
|
||||
if (!visitedOps->empty()) {
|
||||
if (Verbose) {
|
||||
std::cerr << "[INFO] ignore extra op schema str: " << *schemaStr
|
||||
<< " in: " << _demangle(_name(src->getFunction()))
|
||||
<< ", because already found valid op schema str: "
|
||||
<< *visitedOps->begin() << std::endl;
|
||||
}
|
||||
} else {
|
||||
(*visitedOps).insert(*schemaStr);
|
||||
}
|
||||
}
|
||||
if (Verbose > 1) {
|
||||
std::cerr << "[DEBUG][OP_SCHEMA] " << *schemaStr << std::endl;
|
||||
printDebugPath(debugPath.get(), src, V);
|
||||
}
|
||||
} else if (auto F = dyn_cast<Function>(V)) {
|
||||
if (F->isIntrinsic()) {
|
||||
return;
|
||||
}
|
||||
if (visitedFunctions) {
|
||||
(*visitedFunctions).insert(_name(F));
|
||||
}
|
||||
if (Verbose > 1) {
|
||||
std::cerr << "[DEBUG][FUNC] " << _demangle(_name(F)) << std::endl;
|
||||
printDebugPath(debugPath.get(), src, V);
|
||||
}
|
||||
}
|
||||
};
|
||||
scanConnectedNodes(src, blocked, callback, debugPath.get());
|
||||
}
|
||||
|
||||
// This method looks for op schema strings and function pointers that connect
|
||||
// to the same c10 op registration API call via "use" edges (bi-directional)
|
||||
// in IR graph - exploring both nodes being used (operands) by the node and
|
||||
// nodes using (users) the node.
|
||||
//
|
||||
// It assumes that the function pointers are needed (registered) for the op.
|
||||
//
|
||||
// For example, from op name "aten::add" to registration API call:
|
||||
// [OP_SCHEMA] aten::add
|
||||
// [PATH][1][CONST] [70 x i8] c"aten::add.Scalar(Tensor self...\00"
|
||||
// [PATH][2][CONST] @.str.55.20575 = private unnamed_addr constant [70 x i8]
|
||||
// c"aten::add.Scalar(Tensor self, ...\00", align 1
|
||||
// [PATH][3][CONST] i8* getelementptr inbounds ([70 x i8], [70 x i8]*
|
||||
// @.str.55.20575, i64 0, i64 0)
|
||||
// [PATH][4][INST] invoke void @std::basic_string<...>::basic_string(...)
|
||||
// (%"class.std::basic_string"* ... %1477,
|
||||
// i8* getelementptr ... @.str.55.20575 ...)
|
||||
// [PATH][5][INST] %1477 = alloca %"class.std::basic_string" ...
|
||||
// [PATH][6][INST] %4086 = invoke ...
|
||||
// @c10::RegisterOperators::Options::schema(... %1477)
|
||||
// [PATH][7][INST] %4088 = invoke ... @...catchAllKernel...(... %4086, ...
|
||||
// @at::TypeDefault::add(at::Tensor const&...))
|
||||
// [PATH][8][INST] %4090 = invoke ...
|
||||
// &&(%"class.c10::RegisterOperators::Options"*... %4088 ...)
|
||||
// [PATH][9][INST] invoke void
|
||||
// @c10::RegisterOperators::checkSchemaAndRegisterOp_(...
|
||||
// %"class.c10::RegisterOperators::Options"* ... %4090)
|
||||
//
|
||||
// From function pointer to registration API call:
|
||||
// [FUNC] at::TypeDefault::add(at::Tensor const&, c10::Scalar, c10::Scalar)
|
||||
// [PATH][1][FUNC] at::TypeDefault::add(at::Tensor const&...)
|
||||
// [PATH][2][INST] %4088 = invoke ... @...catchAllKernel...(... %4086, ...
|
||||
// @at::TypeDefault::add(at::Tensor const&...))
|
||||
// [PATH][3][INST] %4090 = invoke ...
|
||||
// &&(%"class.c10::RegisterOperators::Options"*... %4088 ...)
|
||||
// [PATH][4][INST] invoke void
|
||||
// @c10::RegisterOperators::checkSchemaAndRegisterOp_(...
|
||||
// %"class.c10::RegisterOperators::Options"* ... %4090)
|
||||
static void scanOpRegistration(
|
||||
VALUE_SET& instructions, SET* opSchemaStrs, GRAPH* schemaStrToFunctions) {
|
||||
for (auto V : instructions) {
|
||||
auto I = dyn_cast<Instruction>(V);
|
||||
// We only need to process call/invoke instructions.
|
||||
if (!I || !_isCallSite(I)) {
|
||||
continue;
|
||||
}
|
||||
auto contextualNamespace = inferContextualNamespace(I);
|
||||
if (Verbose && !contextualNamespace.empty()) {
|
||||
std::cerr << "[DEBUG][REG][NAMESPACE] " << contextualNamespace
|
||||
<< std::endl;
|
||||
}
|
||||
if (Verbose > 2) {
|
||||
std::cerr << "[DEBUG][REG][INST] " << *I << std::endl;
|
||||
}
|
||||
SET visitedOps, visitedFunctions;
|
||||
// Pass in "instructions" set as "blocked" set - all operator registration
|
||||
// calls are connected to global op registry object so we should avoid
|
||||
// going from one op registration call to another op registration call via
|
||||
// the global registry object.
|
||||
scanOpSchemaStrAndFunction(
|
||||
I, instructions, contextualNamespace, &visitedOps, &visitedFunctions);
|
||||
if (visitedOps.size() != 1) {
|
||||
std::cerr << "[WARNING] found " << visitedOps.size() << " ops ( ";
|
||||
for (auto& op : visitedOps) {
|
||||
std::cerr << op << " ";
|
||||
}
|
||||
std::cerr << ") in a registration call in function: "
|
||||
<< _demangle(_name(I->getFunction()))
|
||||
<< " contextualNamespace: " << contextualNamespace
|
||||
<< std::endl;
|
||||
}
|
||||
for (const auto& op : visitedOps) {
|
||||
opSchemaStrs->insert(op);
|
||||
if (visitedFunctions.empty()) {
|
||||
std::cerr << "[WARNING] could not find registered function for op: "
|
||||
<< op << " in function: "
|
||||
<< _demangle(_name(I->getFunction()))
|
||||
<< " contextualNamespace: " << contextualNamespace
|
||||
<< std::endl;
|
||||
}
|
||||
for (const auto& func : visitedFunctions) {
|
||||
(*schemaStrToFunctions)[op].insert(func);
|
||||
if (Verbose) {
|
||||
std::cerr << "[DEBUG][OP_REG] " << op << " => "
|
||||
<< _demangle(func) << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static std::string inferContextualNamespace(Instruction* I) {
|
||||
auto functionName = _demangle(_name(I->getFunction()));
|
||||
for (auto& pattern : TorchLibraryInitPattern) {
|
||||
if (!pattern.pattern->match(functionName)) {
|
||||
continue;
|
||||
}
|
||||
if (Verbose) {
|
||||
std::cerr << "[DEBUG][REG][INIT_FUNC] " << functionName << std::endl;
|
||||
}
|
||||
return pattern.pattern->sub("\\1", functionName) + "::";
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
// Similar as scanOpRegistration - it searches for op schema strings that
|
||||
// connect to c10 op invocation API call and assume the parent function of the
|
||||
// API call invokes the operator.
|
||||
//
|
||||
// For example, from op name "aten::empty" to invocation API call:
|
||||
// [OP_SCHEMA] aten::empty
|
||||
// [PATH][1][CONST] [12 x i8] c"aten::empty\00"
|
||||
// [PATH][2][CONST] @.str.69.1990 = private unnamed_addr constant [12 x i8]
|
||||
// c"aten::empty\00", align 1
|
||||
// [PATH][3][CONST] i8* getelementptr inbounds ([12 x i8], [12 x i8]*
|
||||
// @.str.69.1990, i64 0, i64 0)
|
||||
// [PATH][4][INST] invoke void @std::basic_string<...>::basic_string(...
|
||||
// (%"class.std::basic_string"* nonnull %19,
|
||||
// i8* getelementptr inbounds ([12 x i8], [12 x i8]*
|
||||
// @.str.69.1990, i64 0, i64 0) ...
|
||||
// [PATH][5][INST] %19 = alloca %"class.std::basic_string", align 8
|
||||
// [PATH][6][INST] %53 = bitcast %"class.std::basic_string"* %19 to i64*
|
||||
// [PATH][7][INST] %54 = load i64, i64* %53, align 8, !tbaa !4
|
||||
// [PATH][8][INST] store i64 %54, i64* %55, align 8, !tbaa !4
|
||||
// [PATH][9][INST] %55 = bitcast %"struct.c10::OperatorName"* %18 to i64*
|
||||
// [PATH][10][INST] %18 = alloca %"struct.c10::OperatorName", align 8
|
||||
// [PATH][11][INST] invoke void @c10::Dispatcher::findSchema(c10::OperatorName
|
||||
// const&)(%"class.c10::optional.105"* nonnull sret %17,
|
||||
// %"class.c10::Dispatcher.6320"* nonnull %45,
|
||||
// %"struct.c10::OperatorName"* nonnull dereferenceable(16)
|
||||
// %18)
|
||||
static void scanOpInvocation(
|
||||
VALUE_SET& instructions, SET* opSchemaStrs, GRAPH* functionToSchemaStrs) {
|
||||
for (auto V : instructions) {
|
||||
auto I = dyn_cast<Instruction>(V);
|
||||
// We only need to process call/invoke instructions.
|
||||
if (!I || !_isCallSite(I)) {
|
||||
continue;
|
||||
}
|
||||
if (Verbose > 2) {
|
||||
std::cerr << "[DEBUG][CALL][INST] " << *I << std::endl;
|
||||
}
|
||||
std::string caller = _name(I->getFunction());
|
||||
SET visitedOps;
|
||||
scanOpSchemaStrAndFunction(I, {}, {}, &visitedOps, nullptr);
|
||||
if (visitedOps.size() != 1) {
|
||||
std::cerr << "[WARNING] found " << visitedOps.size() << " ops ( ";
|
||||
for (auto& op : visitedOps) {
|
||||
std::cerr << op << " ";
|
||||
}
|
||||
std::cerr << ") in a invocation call in function: "
|
||||
<< _demangle(caller) << std::endl;
|
||||
}
|
||||
for (const auto& op : visitedOps) {
|
||||
opSchemaStrs->insert(op);
|
||||
(*functionToSchemaStrs)[caller].insert(op);
|
||||
if (Verbose) {
|
||||
std::cerr << "[DEBUG][OP_CALL] " << _demangle(caller) << " => "
|
||||
<< op << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void extractStringValue(
|
||||
Value* V, const std::function<void(const std::string&)>& CB) {
|
||||
if (isa<UndefValue>(V)) {
|
||||
// UndefValue inherits from ConstantValue, but don't contain any data
|
||||
// See: https://llvm.org/docs/LangRef.html#undefined-values
|
||||
return;
|
||||
}
|
||||
|
||||
if (auto array = dyn_cast<ConstantDataArray>(V)) {
|
||||
// Normal case for c-style string literal and "std::basic_string".
|
||||
if (array->isCString()) {
|
||||
CB(array->getAsCString().str());
|
||||
} else if (array->isString()) {
|
||||
std::cerr << "[WARNING] ignore non-C string: "
|
||||
<< array->getAsString().str() << std::endl;
|
||||
}
|
||||
} else if (auto CI = dyn_cast<ConstantInt>(V)) {
|
||||
// Short string literal might be encoded into constant integer, e.g.:
|
||||
// "aten::AA" => 4702103508586165345 (0x41413A3A6E657461)
|
||||
// This can be tricky as it depends on consistent endianness/size.
|
||||
// Seen this case for "std::__1::basic_string" ABI.
|
||||
uint64_t intValue = CI->getZExtValue();
|
||||
auto data = reinterpret_cast<const char*>(&intValue);
|
||||
CB({data, data + sizeof(uint64_t)/sizeof(char)});
|
||||
} else if (auto C = dyn_cast<Constant>(V)) {
|
||||
// Short string literal might be in a constant vector, e.g.:
|
||||
// store <2 x i64> <i64 8, i64 4702103508586165345>, <2 x i64>* %25
|
||||
// Recursively extract each element to cover this case.
|
||||
// Seen this case for "std::__cxx11::basic_string" ABI.
|
||||
for (unsigned i = 0; auto elem = C->getAggregateElement(i); ++i) {
|
||||
extractStringValue(elem, CB);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static std::shared_ptr<std::string> extractOpSchema(
|
||||
const std::string& contextualNamespace, Value* V) {
|
||||
std::vector<std::string> schemaStrs;
|
||||
extractStringValue(V, [&](const std::string& str) {
|
||||
// NB: some operator names might contain namespace. If this occurs, we
|
||||
// MUST NOT use the contextual namespace. Fortunately, it's easy to tell
|
||||
// if a namespace is included: a double colon will be present.
|
||||
// In particular, this will occur with TORCH_SELECTIVE_NAME.
|
||||
const std::string& schemaStr =
|
||||
(contextualNamespace.empty() || str.find("::") != std::string::npos)
|
||||
? str : contextualNamespace + str;
|
||||
if (FunctionSchemaPatternLoc.pattern->match(schemaStr)) {
|
||||
schemaStrs.push_back(schemaStr);
|
||||
}
|
||||
});
|
||||
if (schemaStrs.empty()) {
|
||||
return {};
|
||||
}
|
||||
if (schemaStrs.size() > 1) {
|
||||
std::cerr << "[WARNING] found " << schemaStrs.size()
|
||||
<< " op schema strings in one value!" << std::endl;
|
||||
}
|
||||
const std::string schemaStr = schemaStrs[0];
|
||||
auto pos = schemaStr.find_first_of(".(");
|
||||
return std::make_shared<std::string>(
|
||||
pos == std::string::npos ? schemaStr : schemaStr.substr(0, pos));
|
||||
}
|
||||
|
||||
static void printDebugPath(
|
||||
const VALUE_MAP* debugPath, Value* src, Value* dest) {
|
||||
if (!debugPath) {
|
||||
return;
|
||||
}
|
||||
int depth = 0;
|
||||
for (auto N = dest; ; N = debugPath->at(N)) {
|
||||
std::cerr << "[DEBUG][PATH][" << ++depth << "]";
|
||||
printDebugValue(N);
|
||||
std::cerr << std::endl;
|
||||
if (N == src) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void printDebugValue(Value* V) {
|
||||
if (auto F = dyn_cast<Function>(V)) {
|
||||
std::cerr << "[FUNC] " << _demangle(_name(F));
|
||||
} else if (isa<Constant>(V)) {
|
||||
std::cerr << "[CONST] " << *V;
|
||||
} else if (isa<Instruction>(V)) {
|
||||
std::cerr << "[INST] " << *V;
|
||||
} else if (V) {
|
||||
std::cerr << "[VALUE] " << *V;
|
||||
} else {
|
||||
std::cerr << "NULL";
|
||||
}
|
||||
}
|
||||
|
||||
static void printAsYAML(
|
||||
std::ostream& out, const SET& keys, const GRAPH& graph,
|
||||
const PATH* path) {
|
||||
for (const auto& K : keys) {
|
||||
out << "- name: " << _demangle(K) << std::endl;
|
||||
auto it = graph.find(K);
|
||||
if (it == graph.end() || it->second.empty()) {
|
||||
continue;
|
||||
}
|
||||
out << " depends:" << std::endl;
|
||||
for (const auto& value : it->second) {
|
||||
out << " - name: " << _demangle(value) << std::endl;
|
||||
if (path) {
|
||||
std::vector<std::string> rpath;
|
||||
for (std::string prev = value;
|
||||
rpath.push_back(prev), prev != K;
|
||||
prev = path->at(K).at(prev));
|
||||
out << " path:" << std::endl;
|
||||
for (auto pit = rpath.rbegin(); pit != rpath.rend(); ++pit) {
|
||||
out << " - " << _demangle(*pit) << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
char OpDependency::ID = 0;
|
||||
static RegisterPass<OpDependency> X("op_dependency", "Op Dependency Pass");
|
@ -1,137 +0,0 @@
|
||||
"""
|
||||
This util is used to parse op_deps_pass output (in yaml) and convert it into
|
||||
other formats for downstream use cases. It is not used by OSS cmake build.
|
||||
|
||||
To run this file by hand from the root of the PyTorch repository, run:
|
||||
|
||||
python -m tools.code_analyzer.op_deps_processor \
|
||||
--op-dependency build_code_analyzer/work/torch_result.yaml \
|
||||
--output pt_deps.bzl
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import yaml
|
||||
from typing import Any, List
|
||||
|
||||
from tools.codegen.code_template import CodeTemplate
|
||||
|
||||
BAZEL_OUTPUT = CodeTemplate("""\
|
||||
TORCH_DEPS = {
|
||||
${ops}
|
||||
}
|
||||
""")
|
||||
|
||||
BAZEL_OP = CodeTemplate("""\
|
||||
"${op_name}": [
|
||||
${op_deps}
|
||||
],
|
||||
""")
|
||||
|
||||
BAZEL_OP_DEP = CodeTemplate("""\
|
||||
"${dep_name}",
|
||||
""")
|
||||
|
||||
DOT_OUTPUT = CodeTemplate("""\
|
||||
digraph {
|
||||
layout="circo";
|
||||
${ops}
|
||||
}
|
||||
""")
|
||||
|
||||
DOT_OP = CodeTemplate("""\
|
||||
${op_deps}
|
||||
""")
|
||||
|
||||
DOT_OP_DEP = CodeTemplate("""\
|
||||
"${op_name}" -> "${dep_name}";
|
||||
""")
|
||||
|
||||
|
||||
def load_op_deps(fname: str) -> Any:
|
||||
with open(fname, 'r') as stream:
|
||||
return yaml.safe_load(stream)
|
||||
|
||||
|
||||
def process_base_ops(graph: Any, base_ops: List[str]) -> None:
|
||||
# remove base ops from all `depends` lists to compress the output graph
|
||||
for op in graph:
|
||||
op['depends'] = [
|
||||
dep for dep in op.get('depends', []) if dep['name'] not in base_ops
|
||||
]
|
||||
|
||||
# add base ops section at the beginning
|
||||
graph.insert(0, {
|
||||
'name': '__BASE__',
|
||||
'depends': [{'name': name} for name in base_ops]})
|
||||
|
||||
|
||||
def convert(
|
||||
fname: str,
|
||||
graph: Any,
|
||||
output_template: CodeTemplate,
|
||||
op_template: CodeTemplate,
|
||||
op_dep_template: CodeTemplate,
|
||||
) -> None:
|
||||
ops = []
|
||||
for op in graph:
|
||||
op_name = op['name']
|
||||
op_deps = []
|
||||
|
||||
for dep in op.get('depends', []):
|
||||
dep_name = dep['name']
|
||||
if dep_name == op_name:
|
||||
# skip itself reference
|
||||
continue
|
||||
op_deps.append(
|
||||
op_dep_template.substitute(
|
||||
op_name=op_name,
|
||||
dep_name=dep_name))
|
||||
|
||||
if not op_deps:
|
||||
# skip ops without any fanout
|
||||
continue
|
||||
|
||||
ops.append(
|
||||
op_template.substitute(
|
||||
op_name=op_name,
|
||||
op_deps=op_deps))
|
||||
|
||||
with open(fname, 'w') as out:
|
||||
out.write(output_template.substitute(ops=ops))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Util to parse & convert op_deps_pass output')
|
||||
parser.add_argument(
|
||||
'--op_dependency',
|
||||
required=True,
|
||||
help='input yaml file of op dependency graph produced by op_deps_pass')
|
||||
parser.add_argument(
|
||||
'--format',
|
||||
default='bazel',
|
||||
help='output file format [bazel, dot]')
|
||||
parser.add_argument(
|
||||
'--base_ops',
|
||||
nargs='*',
|
||||
help='optional list of `base` ops that should always be kept in '
|
||||
'custom build, to make the output stable from trivial changes; '
|
||||
'each item is `namespace`::`operator name` without overload; '
|
||||
'e.g.: aten::empty aten::size ...')
|
||||
parser.add_argument(
|
||||
'--output',
|
||||
required=True,
|
||||
help='output file')
|
||||
args = parser.parse_args()
|
||||
|
||||
deps = load_op_deps(args.op_dependency)
|
||||
|
||||
if args.base_ops:
|
||||
process_base_ops(deps, args.base_ops)
|
||||
|
||||
if args.format == 'bazel':
|
||||
convert(args.output, deps, BAZEL_OUTPUT, BAZEL_OP, BAZEL_OP_DEP)
|
||||
elif args.format == 'dot':
|
||||
convert(args.output, deps, DOT_OUTPUT, DOT_OP, DOT_OP_DEP)
|
||||
else:
|
||||
raise Exception("Unknown output format: " + args.format)
|
@ -1,28 +0,0 @@
|
||||
#!/bin/bash
|
||||
##############################################################################
|
||||
# Invoke code analyzer binary with pre-defined parameters for LibTorch.
|
||||
# This script should be called via build.sh. Do NOT use it directly.
|
||||
##############################################################################
|
||||
|
||||
set -exu
|
||||
|
||||
echo "Analyze: ${INPUT}"
|
||||
|
||||
# NB: op_register_pattern actually contains "too" many entries. We only
|
||||
# need to regex for symbols which occur after inlining; and most of the
|
||||
# public API for the registration API disappears after inlining (e.g.,
|
||||
# only _def and _impl are retained). But the inliner isn't guaranteed
|
||||
# to operate, so for safety we match a more expansive set.
|
||||
"${ANALYZER_BIN}" \
|
||||
-op_schema_pattern="^(_aten|_prim|aten|quantized|_quantized|prepacked|profiler|_test)::[a-zA-Z0-9_.]+(\(.*)?$" \
|
||||
-op_register_pattern="c10::RegisterOperators::(op|checkSchemaAndRegisterOp_)|c10::Module::(_?def|_?impl)|torch::Library::(_?def|_?impl)" \
|
||||
-op_invoke_pattern="c10::Dispatcher::findSchema" \
|
||||
-root_symbol_pattern="torch::jit::[^(]" \
|
||||
-torch_library_init_pattern="^.*TORCH_LIBRARY_init_([^(]+)(\(.*)?$" \
|
||||
-torch_library_init_pattern="^.*TORCH_LIBRARY_FRAGMENT_init_([_]*[^_]+)_[0-9]+(\(.*)?$" \
|
||||
-torch_library_init_pattern="^.*TORCH_LIBRARY_IMPL_init_([_]*[^_]+)_([^_]+)_[0-9]+(\(.*)?$" \
|
||||
${EXTRA_ANALYZER_FLAGS} \
|
||||
"${INPUT}" \
|
||||
> "${OUTPUT}"
|
||||
|
||||
echo "Result: ${OUTPUT}"
|
Reference in New Issue
Block a user