[pytorch][mobile] deprecate the LLVM-based static analyzer (#68180)

Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/68180

Since we've open sourced the tracing-based selective build, we can deprecate the
op-dependency-graph-based selective build and the static analyzer tool that
produces the dependency graph.
ghstack-source-id: 143108377

Test Plan: CIs

Reviewed By: seemethere

Differential Revision: D32358467

fbshipit-source-id: c61523706b85a49361416da2230ec1b035b8b99c
This commit is contained in:
Jiakai Liu
2021-11-11 16:35:18 -08:00
committed by Facebook GitHub Bot
parent 301369a774
commit 3dc0754c53
28 changed files with 11 additions and 2320 deletions

View File

@ -16,7 +16,6 @@ per-file-ignores = __init__.py: F401 torch/utils/cpp_extension.py: B950
optional-ascii-coding = True
exclude =
./.git,
./build_code_analyzer,
./build_test_custom_build,
./build,
./caffe2,

View File

@ -19,8 +19,6 @@
"linux-vulkan-bionic-py3.6-clang9",
"linux-xenial-cuda11.3-py3.6-gcc7",
"linux-xenial-py3-clang5-mobile-build",
"linux-xenial-py3-clang5-mobile-code-analysis",
"linux-xenial-py3-clang5-mobile-custom-build-dynamic",
"linux-xenial-py3-clang5-mobile-custom-build-static",
"linux-xenial-py3.6-clang7-asan",
"linux-xenial-py3.6-clang7-onnx",
@ -77,7 +75,6 @@
"linux-vulkan-bionic-py3.6-clang9",
"linux-xenial-cuda11.3-py3.6-gcc7",
"linux-xenial-py3-clang5-mobile-build",
"linux-xenial-py3-clang5-mobile-custom-build-dynamic",
"linux-xenial-py3-clang5-mobile-custom-build-static",
"linux-xenial-py3.6-clang7-asan",
"linux-xenial-py3.6-clang7-onnx",
@ -113,8 +110,6 @@
"linux-vulkan-bionic-py3.6-clang9",
"linux-xenial-cuda11.3-py3.6-gcc7",
"linux-xenial-py3-clang5-mobile-build",
"linux-xenial-py3-clang5-mobile-code-analysis",
"linux-xenial-py3-clang5-mobile-custom-build-dynamic",
"linux-xenial-py3-clang5-mobile-custom-build-static",
"linux-xenial-py3.6-clang7-asan",
"linux-xenial-py3.6-clang7-onnx",
@ -143,8 +138,6 @@
],
"ciflow/mobile": [
"linux-xenial-py3-clang5-mobile-build",
"linux-xenial-py3-clang5-mobile-code-analysis",
"linux-xenial-py3-clang5-mobile-custom-build-dynamic",
"linux-xenial-py3-clang5-mobile-custom-build-static"
],
"ciflow/noarch": [

View File

@ -368,17 +368,6 @@ LINUX_WORKFLOWS = [
labels={LABEL_CIFLOW_LINUX, LABEL_CIFLOW_MOBILE, LABEL_CIFLOW_DEFAULT},
),
),
CIWorkflow(
arch="linux",
build_environment="linux-xenial-py3-clang5-mobile-custom-build-dynamic",
docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c",
test_runner_type=LINUX_CPU_TEST_RUNNER,
build_generates_artifacts=False,
exclude_test=True,
ciflow_config=CIFlowConfig(
labels={LABEL_CIFLOW_LINUX, LABEL_CIFLOW_MOBILE, LABEL_CIFLOW_DEFAULT},
),
),
CIWorkflow(
arch="linux",
build_environment="linux-xenial-py3-clang5-mobile-custom-build-static",
@ -390,17 +379,6 @@ LINUX_WORKFLOWS = [
labels={LABEL_CIFLOW_LINUX, LABEL_CIFLOW_MOBILE, LABEL_CIFLOW_DEFAULT},
),
),
CIWorkflow(
arch="linux",
build_environment="linux-xenial-py3-clang5-mobile-code-analysis",
docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c",
test_runner_type=LINUX_CPU_TEST_RUNNER,
build_generates_artifacts=False,
exclude_test=True,
ciflow_config=CIFlowConfig(
labels={LABEL_CIFLOW_LINUX, LABEL_CIFLOW_MOBILE},
),
),
CIWorkflow(
arch="linux",
build_environment="linux-xenial-py3.6-clang7-asan",

View File

@ -1,244 +0,0 @@
# @generated DO NOT EDIT MANUALLY
# Template is at: .github/templates/linux_ci_workflow.yml.j2
# Generation script: .github/scripts/generate_ci_workflows.py
name: linux-xenial-py3-clang5-mobile-code-analysis
on:
pull_request:
types: [opened, synchronize, reopened, unassigned]
push:
branches:
- master
- release/*
workflow_dispatch:
env:
BUILD_ENVIRONMENT: linux-xenial-py3-clang5-mobile-code-analysis
DOCKER_IMAGE_BASE: 308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c
SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2
XLA_CLANG_CACHE_S3_BUCKET_NAME: ossci-compiler-clang-cache-circleci-xla
TORCH_CUDA_ARCH_LIST: 5.2
IN_CI: 1
IS_GHA: 1
# This is used for the phase of adding wheel tests only, will be removed once completed
IN_WHEEL_TEST: 1
# Used for custom_opertor, jit_hooks, custom_backend, see .jenkins/pytorch/build.sh
CUSTOM_TEST_ARTIFACT_BUILD_DIR: build/custom_test_artifacts
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
PR_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
AWS_DEFAULT_REGION: us-east-1
CIRCLE_PR_NUMBER: ${{ github.event.pull_request.number }}
CIRCLE_SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
concurrency:
group: linux-xenial-py3-clang5-mobile-code-analysis-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
cancel-in-progress: true
jobs:
ciflow_should_run:
runs-on: ubuntu-18.04
timeout-minutes: 240
env:
IS_PROBOT_TRIGGER_EVENT: ${{ (github.event.action == 'unassigned') && (github.event.assigneed.login == 'pytorchbot') }}
LABEL_CONDITIONS: ${{ contains(github.event.pull_request.labels.*.name, 'ciflow/all') || contains(github.event.pull_request.labels.*.name, 'ciflow/linux') || contains(github.event.pull_request.labels.*.name, 'ciflow/mobile') }}
LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
if: ${{ (github.repository == 'pytorch/pytorch') && (
(github.event_name == 'push') ||
(github.event_name == 'schedule') ||
(contains(github.event.pull_request.labels.*.name, 'ciflow/all') || contains(github.event.pull_request.labels.*.name, 'ciflow/linux') || contains(github.event.pull_request.labels.*.name, 'ciflow/mobile')) ||
(false))
}}
steps:
- name: noop
run: echo running ciflow_should_run
- name: print labels
run: echo "${LABELS}"
build:
runs-on: linux.2xlarge
needs: [ciflow_should_run]
timeout-minutes: 240
env:
JOB_BASE_NAME: linux-xenial-py3-clang5-mobile-code-analysis-build
outputs:
docker_image: ${{ steps.calculate-tag.outputs.docker_image }}
steps:
- name: Display EC2 information
shell: bash
run: |
set -euo pipefail
function get_ec2_metadata() {
# Pulled from instance metadata endpoint for EC2
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
category=$1
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
}
echo "ami-id: $(get_ec2_metadata ami-id)"
echo "instance-id: $(get_ec2_metadata instance-id)"
echo "instance-type: $(get_ec2_metadata instance-type)"
- name: Log in to ECR
env:
AWS_RETRY_MODE: standard
AWS_MAX_ATTEMPTS: 5
run: |
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
- name: Chown workspace
run: |
retry () {
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
}
retry docker pull "${ALPINE_IMAGE}"
# Ensure the working directory gets chowned back to the current user
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Clean workspace
run: |
rm -rf "${GITHUB_WORKSPACE:?}/*"
rm -f ~/.ssh/authorized_keys
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
uses: seemethere/add-github-ssh-key@v1
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Preserve github env variables for use in docker
run: |
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
- name: Checkout PyTorch
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
with:
# deep clone, to allow use of git merge-base
fetch-depth: 0
submodules: recursive
- name: Calculate docker image tag
id: calculate-tag
run: |
DOCKER_TAG=$(git rev-parse HEAD:.circleci/docker)
echo "DOCKER_TAG=${DOCKER_TAG}" >> "${GITHUB_ENV}"
echo "DOCKER_IMAGE=${DOCKER_IMAGE_BASE}:${DOCKER_TAG}" >> "${GITHUB_ENV}"
echo "::set-output name=docker_tag::${DOCKER_TAG}"
echo "::set-output name=docker_image::${DOCKER_IMAGE_BASE}:${DOCKER_TAG}"
- name: Check if image should be built
id: check
env:
BASE_REVISION: ${{ github.event.pull_request.base.sha || github.sha }}
run: |
set -x
# Check if image already exists, if it does then skip building it
if docker manifest inspect "${DOCKER_IMAGE_BASE}:${DOCKER_TAG}"; then
exit 0
fi
if [[ "$BASE_REVISION" = "$(git rev-parse HEAD)" ]]; then
# if we're on the base branch then use the parent commit
MERGE_BASE=$(git rev-parse HEAD~)
else
# otherwise we're on a PR, so use the most recent base commit
MERGE_BASE=$(git merge-base HEAD "$BASE_REVISION")
fi
# Covers the case where a previous tag doesn't exist for the tree
# this is only really applicable on trees that don't have `.circleci/docker` at its merge base, i.e. nightly
if ! git rev-parse "$MERGE_BASE:.circleci/docker"; then
echo "Directory '.circleci/docker' not found in commit $MERGE_BASE, you should probably rebase onto a more recent commit"
exit 1
fi
PREVIOUS_DOCKER_TAG=$(git rev-parse "$MERGE_BASE:.circleci/docker")
# If no image exists but the hash is the same as the previous hash then we should error out here
if [[ "${PREVIOUS_DOCKER_TAG}" = "${DOCKER_TAG}" ]]; then
echo "ERROR: Something has gone wrong and the previous image isn't available for the merge-base of your branch"
echo " contact the PyTorch team to restore the original images"
exit 1
fi
echo ::set-output name=rebuild::yes
- name: Build and push docker image
if: ${{ steps.check.outputs.rebuild }}
env:
DOCKER_SKIP_S3_UPLOAD: 1
working-directory: .circleci/docker
run: |
export IMAGE_NAME=${DOCKER_IMAGE_BASE#308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/}
./build_docker.sh
- name: Pull Docker image
run: |
retry () {
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
}
retry docker pull "${DOCKER_IMAGE}"
- name: Parse ref
id: parse-ref
run: .github/scripts/parse_ref.py
- name: Build
env:
CIRCLE_BRANCH: ${{ steps.parse-ref.outputs.branch }}
run: |
# detached container should get cleaned up by teardown_ec2_linux
container_name=$(docker run \
-e BUILD_ENVIRONMENT \
-e JOB_BASE_NAME \
-e MAX_JOBS="$(nproc --ignore=2)" \
-e AWS_DEFAULT_REGION \
-e IS_GHA \
-e CIRCLE_PR_NUMBER \
-e CIRCLE_SHA1 \
-e CIRCLE_BRANCH \
-e GITHUB_RUN_ID \
-e SCCACHE_BUCKET \
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \
-e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
-e SKIP_SCCACHE_INITIALIZATION=1 \
-e TORCH_CUDA_ARCH_LIST \
-e PR_LABELS \
-e http_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e https_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e no_proxy="localhost,127.0.0.1,github.com,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock" \
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
--security-opt seccomp=unconfined \
--cap-add=SYS_PTRACE \
--tty \
--detach \
--user jenkins \
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
-w /var/lib/jenkins/workspace \
"${DOCKER_IMAGE}"
)
docker exec -t "${container_name}" sh -c 'sudo chown -R jenkins . && .jenkins/pytorch/build.sh'
- name: Display and upload binary build size statistics (Click Me)
# temporary hack: set CIRCLE_* vars, until we update
# tools/stats/print_test_stats.py to natively support GitHub Actions
env:
SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.SCRIBE_GRAPHQL_ACCESS_TOKEN }}
CIRCLE_BRANCH: ${{ steps.parse-ref.outputs.branch }}
CIRCLE_TAG: ${{ steps.parse-ref.outputs.tag }}
CIRCLE_WORKFLOW_ID: '${{ github.run_id }}_${{ github.run_number }}'
run: |
COMMIT_TIME=$(git log --max-count=1 --format=%ct || echo 0)
export COMMIT_TIME
pip3 install requests==2.26 boto3==1.16.34
python3 -m tools.stats.upload_binary_size_to_scuba || exit 0
- name: Chown workspace
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Hold runner for 2 hours or until ssh sessions have drained
# Always hold for active ssh sessions
if: always()
run: .github/scripts/wait_for_ssh_to_drain.sh
- name: Chown workspace
if: always()
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Kill containers, clean up images
if: always()
run: |
# ignore expansion of "docker ps -q" since it could be empty
# shellcheck disable=SC2046
docker stop $(docker ps -q) || true
# Prune all of the docker images
docker system prune -af
- name: Hold runner for 2 hours or until ssh sessions have drained
# Always hold for active ssh sessions
if: always()
run: .github/scripts/wait_for_ssh_to_drain.sh
- name: Clean up docker images
if: always()
run: |
# Prune all of the docker images
docker system prune -af

View File

@ -1,244 +0,0 @@
# @generated DO NOT EDIT MANUALLY
# Template is at: .github/templates/linux_ci_workflow.yml.j2
# Generation script: .github/scripts/generate_ci_workflows.py
name: linux-xenial-py3-clang5-mobile-custom-build-dynamic
on:
pull_request:
types: [opened, synchronize, reopened, unassigned]
push:
branches:
- master
- release/*
workflow_dispatch:
env:
BUILD_ENVIRONMENT: linux-xenial-py3-clang5-mobile-custom-build-dynamic
DOCKER_IMAGE_BASE: 308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c
SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2
XLA_CLANG_CACHE_S3_BUCKET_NAME: ossci-compiler-clang-cache-circleci-xla
TORCH_CUDA_ARCH_LIST: 5.2
IN_CI: 1
IS_GHA: 1
# This is used for the phase of adding wheel tests only, will be removed once completed
IN_WHEEL_TEST: 1
# Used for custom_opertor, jit_hooks, custom_backend, see .jenkins/pytorch/build.sh
CUSTOM_TEST_ARTIFACT_BUILD_DIR: build/custom_test_artifacts
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
PR_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
AWS_DEFAULT_REGION: us-east-1
CIRCLE_PR_NUMBER: ${{ github.event.pull_request.number }}
CIRCLE_SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
concurrency:
group: linux-xenial-py3-clang5-mobile-custom-build-dynamic-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
cancel-in-progress: true
jobs:
ciflow_should_run:
runs-on: ubuntu-18.04
timeout-minutes: 240
env:
IS_PROBOT_TRIGGER_EVENT: ${{ (github.event.action == 'unassigned') && (github.event.assigneed.login == 'pytorchbot') }}
LABEL_CONDITIONS: ${{ contains(github.event.pull_request.labels.*.name, 'ciflow/all') || contains(github.event.pull_request.labels.*.name, 'ciflow/default') || contains(github.event.pull_request.labels.*.name, 'ciflow/linux') || contains(github.event.pull_request.labels.*.name, 'ciflow/mobile') }}
LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
if: ${{ (github.repository == 'pytorch/pytorch') && (
(github.event_name == 'push') ||
(github.event_name == 'schedule') ||
(contains(github.event.pull_request.labels.*.name, 'ciflow/all') || contains(github.event.pull_request.labels.*.name, 'ciflow/default') || contains(github.event.pull_request.labels.*.name, 'ciflow/linux') || contains(github.event.pull_request.labels.*.name, 'ciflow/mobile')) ||
((github.event_name == 'pull_request' && github.event.action != 'unassigned') && !contains(join(github.event.pull_request.labels.*.name), 'ciflow/')))
}}
steps:
- name: noop
run: echo running ciflow_should_run
- name: print labels
run: echo "${LABELS}"
build:
runs-on: linux.2xlarge
needs: [ciflow_should_run]
timeout-minutes: 240
env:
JOB_BASE_NAME: linux-xenial-py3-clang5-mobile-custom-build-dynamic-build
outputs:
docker_image: ${{ steps.calculate-tag.outputs.docker_image }}
steps:
- name: Display EC2 information
shell: bash
run: |
set -euo pipefail
function get_ec2_metadata() {
# Pulled from instance metadata endpoint for EC2
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
category=$1
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
}
echo "ami-id: $(get_ec2_metadata ami-id)"
echo "instance-id: $(get_ec2_metadata instance-id)"
echo "instance-type: $(get_ec2_metadata instance-type)"
- name: Log in to ECR
env:
AWS_RETRY_MODE: standard
AWS_MAX_ATTEMPTS: 5
run: |
AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
--password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
- name: Chown workspace
run: |
retry () {
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
}
retry docker pull "${ALPINE_IMAGE}"
# Ensure the working directory gets chowned back to the current user
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Clean workspace
run: |
rm -rf "${GITHUB_WORKSPACE:?}/*"
rm -f ~/.ssh/authorized_keys
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
uses: seemethere/add-github-ssh-key@v1
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Preserve github env variables for use in docker
run: |
env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
- name: Checkout PyTorch
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
with:
# deep clone, to allow use of git merge-base
fetch-depth: 0
submodules: recursive
- name: Calculate docker image tag
id: calculate-tag
run: |
DOCKER_TAG=$(git rev-parse HEAD:.circleci/docker)
echo "DOCKER_TAG=${DOCKER_TAG}" >> "${GITHUB_ENV}"
echo "DOCKER_IMAGE=${DOCKER_IMAGE_BASE}:${DOCKER_TAG}" >> "${GITHUB_ENV}"
echo "::set-output name=docker_tag::${DOCKER_TAG}"
echo "::set-output name=docker_image::${DOCKER_IMAGE_BASE}:${DOCKER_TAG}"
- name: Check if image should be built
id: check
env:
BASE_REVISION: ${{ github.event.pull_request.base.sha || github.sha }}
run: |
set -x
# Check if image already exists, if it does then skip building it
if docker manifest inspect "${DOCKER_IMAGE_BASE}:${DOCKER_TAG}"; then
exit 0
fi
if [[ "$BASE_REVISION" = "$(git rev-parse HEAD)" ]]; then
# if we're on the base branch then use the parent commit
MERGE_BASE=$(git rev-parse HEAD~)
else
# otherwise we're on a PR, so use the most recent base commit
MERGE_BASE=$(git merge-base HEAD "$BASE_REVISION")
fi
# Covers the case where a previous tag doesn't exist for the tree
# this is only really applicable on trees that don't have `.circleci/docker` at its merge base, i.e. nightly
if ! git rev-parse "$MERGE_BASE:.circleci/docker"; then
echo "Directory '.circleci/docker' not found in commit $MERGE_BASE, you should probably rebase onto a more recent commit"
exit 1
fi
PREVIOUS_DOCKER_TAG=$(git rev-parse "$MERGE_BASE:.circleci/docker")
# If no image exists but the hash is the same as the previous hash then we should error out here
if [[ "${PREVIOUS_DOCKER_TAG}" = "${DOCKER_TAG}" ]]; then
echo "ERROR: Something has gone wrong and the previous image isn't available for the merge-base of your branch"
echo " contact the PyTorch team to restore the original images"
exit 1
fi
echo ::set-output name=rebuild::yes
- name: Build and push docker image
if: ${{ steps.check.outputs.rebuild }}
env:
DOCKER_SKIP_S3_UPLOAD: 1
working-directory: .circleci/docker
run: |
export IMAGE_NAME=${DOCKER_IMAGE_BASE#308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/}
./build_docker.sh
- name: Pull Docker image
run: |
retry () {
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
}
retry docker pull "${DOCKER_IMAGE}"
- name: Parse ref
id: parse-ref
run: .github/scripts/parse_ref.py
- name: Build
env:
CIRCLE_BRANCH: ${{ steps.parse-ref.outputs.branch }}
run: |
# detached container should get cleaned up by teardown_ec2_linux
container_name=$(docker run \
-e BUILD_ENVIRONMENT \
-e JOB_BASE_NAME \
-e MAX_JOBS="$(nproc --ignore=2)" \
-e AWS_DEFAULT_REGION \
-e IS_GHA \
-e CIRCLE_PR_NUMBER \
-e CIRCLE_SHA1 \
-e CIRCLE_BRANCH \
-e GITHUB_RUN_ID \
-e SCCACHE_BUCKET \
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \
-e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
-e SKIP_SCCACHE_INITIALIZATION=1 \
-e TORCH_CUDA_ARCH_LIST \
-e PR_LABELS \
-e http_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e https_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e no_proxy="localhost,127.0.0.1,github.com,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock" \
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
--security-opt seccomp=unconfined \
--cap-add=SYS_PTRACE \
--tty \
--detach \
--user jenkins \
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
-w /var/lib/jenkins/workspace \
"${DOCKER_IMAGE}"
)
docker exec -t "${container_name}" sh -c 'sudo chown -R jenkins . && .jenkins/pytorch/build.sh'
- name: Display and upload binary build size statistics (Click Me)
# temporary hack: set CIRCLE_* vars, until we update
# tools/stats/print_test_stats.py to natively support GitHub Actions
env:
SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.SCRIBE_GRAPHQL_ACCESS_TOKEN }}
CIRCLE_BRANCH: ${{ steps.parse-ref.outputs.branch }}
CIRCLE_TAG: ${{ steps.parse-ref.outputs.tag }}
CIRCLE_WORKFLOW_ID: '${{ github.run_id }}_${{ github.run_number }}'
run: |
COMMIT_TIME=$(git log --max-count=1 --format=%ct || echo 0)
export COMMIT_TIME
pip3 install requests==2.26 boto3==1.16.34
python3 -m tools.stats.upload_binary_size_to_scuba || exit 0
- name: Chown workspace
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Hold runner for 2 hours or until ssh sessions have drained
# Always hold for active ssh sessions
if: always()
run: .github/scripts/wait_for_ssh_to_drain.sh
- name: Chown workspace
if: always()
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Kill containers, clean up images
if: always()
run: |
# ignore expansion of "docker ps -q" since it could be empty
# shellcheck disable=SC2046
docker stop $(docker ps -q) || true
# Prune all of the docker images
docker system prune -af
- name: Hold runner for 2 hours or until ssh sessions have drained
# Always hold for active ssh sessions
if: always()
run: .github/scripts/wait_for_ssh_to_drain.sh
- name: Clean up docker images
if: always()
run: |
# Prune all of the docker images
docker system prune -af

View File

@ -1,21 +0,0 @@
#!/usr/bin/env bash
# DO NOT ADD 'set -x' not to reveal CircleCI secret context environment variables
set -eu -o pipefail
# This script builds and runs code analyzer tool to generate aten op dependency
# graph for custom mobile build.
# shellcheck disable=SC2034
COMPACT_JOB_NAME="${BUILD_ENVIRONMENT}"
# shellcheck source=./common.sh
source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
echo "Clang version:"
clang --version
LLVM_DIR="$(llvm-config-5.0 --prefix)"
export LLVM_DIR
echo "LLVM_DIR: ${LLVM_DIR}"
time ANALYZE_TEST=1 CHECK_RESULT=1 tools/code_analyzer/build.sh

View File

@ -26,11 +26,6 @@ retry pip install --pre torch torchvision \
# binary, and running forward pass with a real model.
if [[ "$BUILD_ENVIRONMENT" == *-mobile-custom-build-static* ]]; then
TEST_CUSTOM_BUILD_STATIC=1 test/mobile/custom_build/build.sh
elif [[ "$BUILD_ENVIRONMENT" == *-mobile-custom-build-dynamic* ]]; then
LLVM_DIR="$(llvm-config-5.0 --prefix)"
export LLVM_DIR
echo "LLVM_DIR: ${LLVM_DIR}"
TEST_CUSTOM_BUILD_DYNAMIC=1 test/mobile/custom_build/build.sh
else
TEST_DEFAULT_BUILD=1 test/mobile/custom_build/build.sh
fi

View File

@ -20,10 +20,6 @@ if [[ "$BUILD_ENVIRONMENT" == *-mobile-*build* ]]; then
exec "$(dirname "${BASH_SOURCE[0]}")/build-mobile.sh" "$@"
fi
if [[ "$BUILD_ENVIRONMENT" == *-mobile-code-analysis* ]]; then
exec "$(dirname "${BASH_SOURCE[0]}")/build-mobile-code-analysis.sh" "$@"
fi
if [[ "$BUILD_ENVIRONMENT" == *linux-xenial-cuda11.3* ]]; then
# Enabling DEPLOY build (embedded torch python interpreter, experimental)
# only on one config for now, can expand later

View File

@ -3,7 +3,6 @@ code = 'FLAKE8'
include_patterns = ['**/*.py']
exclude_patterns = [
'.git/**',
'build_code_analyzer',
'build_test_custom_build/**',
'build/**',
'caffe2/**',

View File

@ -423,8 +423,6 @@ else()
endif()
set(SELECTED_OP_LIST "" CACHE STRING
"Path to the yaml file that contains the list of operators to include for custom build. Include all operators by default.")
set(OP_DEPENDENCY "" CACHE STRING
"Path to the yaml file that contains the op dependency graph for custom build.")
set(STATIC_DISPATCH_BACKEND "" CACHE STRING
"Name of the backend for which static dispatch code is generated, e.g.: CPU.")
option(

View File

@ -173,37 +173,16 @@ if(INTERN_BUILD_ATEN_OPS)
endif()
if(SELECTED_OP_LIST)
# With static dispatch we can omit the OP_DEPENDENCY flag. It will not calculate the transitive closure
# of used ops. It only needs to register used root ops.
if(TRACING_BASED)
message(STATUS "Running tracing-based selective build given operator list: ${SELECTED_OP_LIST}")
list(APPEND CUSTOM_BUILD_FLAGS
--op_selection_yaml_path ${SELECTED_OP_LIST})
elseif(NOT STATIC_DISPATCH_BACKEND AND NOT OP_DEPENDENCY)
elseif(NOT STATIC_DISPATCH_BACKEND)
message(WARNING
"For custom build with dynamic dispatch you have to provide the dependency graph of PyTorch operators.\n"
"Switching to STATIC_DISPATCH_BACKEND=CPU. If you run into problems with static dispatch and still want"
" to use selective build with dynamic dispatch, please try:\n"
"1. Run the static analysis tool to generate the dependency graph, e.g.:\n"
" LLVM_DIR=/usr ANALYZE_TORCH=1 tools/code_analyzer/build.sh\n"
"2. Run the custom build with the OP_DEPENDENCY option pointing to the generated dependency graph, e.g.:\n"
" scripts/build_android.sh -DSELECTED_OP_LIST=<op_list.yaml> -DOP_DEPENDENCY=<dependency_graph.yaml>\n"
"You have to run tracing-based selective build with dynamic dispatch.\n"
"Switching to STATIC_DISPATCH_BACKEND=CPU."
)
set(STATIC_DISPATCH_BACKEND CPU)
else()
execute_process(
COMMAND
"${PYTHON_EXECUTABLE}" ${CMAKE_CURRENT_LIST_DIR}/../tools/code_analyzer/gen_op_registration_allowlist.py
--op-dependency "${OP_DEPENDENCY}"
--root-ops "${SELECTED_OP_LIST}"
OUTPUT_VARIABLE OP_REGISTRATION_WHITELIST
OUTPUT_STRIP_TRAILING_WHITESPACE
)
separate_arguments(OP_REGISTRATION_WHITELIST)
message(STATUS "Custom build with op registration whitelist: ${OP_REGISTRATION_WHITELIST}")
list(APPEND CUSTOM_BUILD_FLAGS
--force_schema_registration
--op_registration_whitelist ${OP_REGISTRATION_WHITELIST})
endif()
endif()

View File

@ -11,13 +11,9 @@
# toolchain), which doesn't contain autograd function nor backward ops thus is
# smaller than full LibTorch.
#
# 2. `TEST_CUSTOM_BUILD_DYNAMIC=1 ./build.sh` - it further optimizes libtorch
# size by only including ops used by a specific model.
# Note that LLVM_DIR environment variable should be set to the location of
# LLVM-dev toolchain.
#
# 3. `TEST_CUSTOM_BUILD_STATIC=1 ./build.sh` - similar as 2) except that it
# relies on the static dispatch + linker to prune code.
# 2. `TEST_CUSTOM_BUILD_STATIC=1 ./build.sh` - optimizes libtorch size by only
# including ops used by a specific model. It relies on the static dispatch +
# linker to prune code.
#
###############################################################################
@ -37,18 +33,6 @@ prepare_model_and_dump_root_ops() {
python "${TEST_SRC_ROOT}/prepare_model.py"
}
generate_op_dependency_graph() {
# Regular users should get this graph from prebuilt package.
ANALYZER_BUILD_ROOT="${BUILD_ROOT}/build_analyzer"
OP_DEPENDENCY="${ANALYZER_BUILD_ROOT}/work/torch_result.yaml"
if [ ! -f "${OP_DEPENDENCY}" ]; then
BUILD_ROOT="${ANALYZER_BUILD_ROOT}" \
ANALYZE_TORCH=1 \
"${SRC_ROOT}/tools/code_analyzer/build.sh"
fi
}
run_default_build() {
LIBTORCH_BUILD_ROOT="${BUILD_ROOT}/build_default_libtorch"
LIBTORCH_INSTALL_PREFIX="${LIBTORCH_BUILD_ROOT}/install"
@ -61,30 +45,18 @@ run_custom_build_with_static_dispatch() {
LIBTORCH_BUILD_ROOT="${BUILD_ROOT}/build_custom_libtorch_static"
LIBTORCH_INSTALL_PREFIX="${LIBTORCH_BUILD_ROOT}/install"
# Here we omitted the OP_DEPENDENCY flag so it generates registration
# code for used ROOT ops only, whose unboxing kernels are still needed
# by the JIT runtime. The intermediate ops will be automatically kepted
# by the linker as they are statically referenced by the static dispatch
# code, for which we can bypass the registration.
# Here it generates registration code for used ROOT ops only, whose unboxing
# kernels are still needed by the JIT runtime. The intermediate ops will be
# automatically kepted by the linker as they are statically referenced by the
# static dispatch code, for which we can bypass the registration.
# We don't set '-DSTATIC_DISPATCH_BACKEND=CPU' explicitly to test automatic
# fallback to static dispatch when '-DOP_DEPENDENCY' is omitted.
# fallback to static dispatch.
BUILD_ROOT="${LIBTORCH_BUILD_ROOT}" \
"${SRC_ROOT}/scripts/build_mobile.sh" \
-DCMAKE_CXX_FLAGS="-DSTRIP_ERROR_MESSAGES" \
-DSELECTED_OP_LIST="${ROOT_OPS}"
}
run_custom_build_with_dynamic_dispatch() {
LIBTORCH_BUILD_ROOT="${BUILD_ROOT}/build_custom_libtorch_dynamic"
LIBTORCH_INSTALL_PREFIX="${LIBTORCH_BUILD_ROOT}/install"
BUILD_ROOT="${LIBTORCH_BUILD_ROOT}" \
"${SRC_ROOT}/scripts/build_mobile.sh" \
-DCMAKE_CXX_FLAGS="-DSTRIP_ERROR_MESSAGES" \
-DSELECTED_OP_LIST="${ROOT_OPS}" \
-DOP_DEPENDENCY="${OP_DEPENDENCY}"
}
build_predictor() {
PREDICTOR_BUILD_ROOT="${BUILD_ROOT}/predictor"
@ -125,14 +97,6 @@ test_custom_build_with_static_dispatch() {
run_predictor
}
test_custom_build_with_dynamic_dispatch() {
prepare_model_and_dump_root_ops
generate_op_dependency_graph
run_custom_build_with_dynamic_dispatch
build_predictor
run_predictor
}
if [ -n "${TEST_DEFAULT_BUILD}" ]; then
test_default_build
fi
@ -140,7 +104,3 @@ fi
if [ -n "${TEST_CUSTOM_BUILD_STATIC}" ]; then
test_custom_build_with_static_dispatch
fi
if [ -n "${TEST_CUSTOM_BUILD_DYNAMIC}" ]; then
test_custom_build_with_dynamic_dispatch
fi

View File

@ -1,34 +0,0 @@
cmake_minimum_required(VERSION 3.1)
project(op_deps_project)
set(CMAKE_CXX_STANDARD 14)
# Find torch library
find_package(Torch REQUIRED)
# Small test op library
add_library(OpLib
quantized_ops.cpp
simple_ops.cpp
utils.cpp
)
target_include_directories(OpLib PUBLIC ${TORCH_INCLUDE_DIRS})
# Main executable
add_executable(main main.cc)
find_package(Threads REQUIRED)
target_link_libraries(main
-Wl,--gc-sections
-Wl,--whole-archive
OpLib
${TORCH_LIBRARIES}
-Wl,--no-whole-archive
Threads::Threads
)
# Only install core libraries for downstream analysis job.
install(TARGETS OpLib DESTINATION lib)
install(FILES ${C10_LIBRARY} DESTINATION lib)

View File

@ -1,29 +0,0 @@
#!/bin/bash
##############################################################################
# A simple project that uses C10 op registration API to create a bunch of
# inter-dependent dummy ops in order to test op dependency analysis script for
# mobile custom build workflow.
##############################################################################
set -ex
SRC_ROOT="$( cd "$(dirname "$0")"/../../.. ; pwd -P)"
BUILD_ROOT="${BUILD_ROOT:-${SRC_ROOT}/build_test_op_deps}"
INSTALL_PREFIX="${BUILD_ROOT}/install"
mkdir -p "${BUILD_ROOT}"
cd "${BUILD_ROOT}"
if [ ! -d "${TORCH_INSTALL_PREFIX:=${SRC_ROOT}/build_mobile/install}" ]; then
echo "Unable to find torch library in ${TORCH_INSTALL_PREFIX}"
exit 1
fi
cmake "${SRC_ROOT}/test/mobile/op_deps" \
-DCMAKE_INSTALL_PREFIX="${INSTALL_PREFIX}" \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_PREFIX_PATH="${TORCH_INSTALL_PREFIX}" \
"$@" # Use-specified CMake arguments
cmake --build . --target install -- "-j${MAX_JOBS}"
echo "Installation completed: ${INSTALL_PREFIX}"

View File

@ -1,41 +0,0 @@
- name: __ROOT__
depends:
- name: _test::AA
- name: _test::BB
- name: _test::AA
depends:
- name: _test::BB
- name: _test::BB
depends:
- name: _test::AA
- name: _test::CC
depends:
- name: _test::AA
- name: _test::DD
depends:
- name: _test::AA
- name: _test::EE
- name: _test::EE
depends:
- name: _test::FF
- name: _test::FF
depends:
- name: _test::EE
- name: _test::GG
depends:
- name: _test::FF
- name: _test::HH
- name: quantized::t_add
depends:
- name: quantized::t_helper1
- name: quantized::t_add_relu
depends:
- name: quantized::t_helper2
- name: quantized::t_helper1
depends:
- name: quantized::t_helper3
- name: quantized::t_helper2
depends:
- name: quantized::t_helper4
- name: quantized::t_helper3
- name: quantized::t_helper4

View File

@ -1,20 +0,0 @@
#include <torch/script.h>
#include "quantized_ops.h"
#include "simple_ops.h"
int main() {
c10::InferenceMode guard;
auto input = torch::empty({1, 3, 224, 224});
at::call_AA_op(input);
at::call_BB_op(input);
at::call_CC_op(input);
at::call_DD_op(input);
at::call_EE_op(input);
at::call_FF_op(input);
const auto t_add = c10::Dispatcher::singleton().findSchemaOrThrow("quantized::t_add", "").typed<at::Tensor(at::Tensor, at::Tensor, double, int64_t)>();
const auto t_add_relu = c10::Dispatcher::singleton().findSchemaOrThrow("quantized::t_add_relu", "").typed<at::Tensor (at::Tensor, at::Tensor, double, int64_t)>();
t_add.call(input, input, 1.0, 0);
t_add_relu.call(input, input, 1.0, 0);
return 0;
}

View File

@ -1,76 +0,0 @@
#include "quantized_ops.h"
#include <iostream>
#include <c10/core/TensorOptions.h>
#include <ATen/core/dispatch/Dispatcher.h>
#include <ATen/core/op_registration/op_registration.h>
// This file simulates some irregular op registration/invocation patterns for
// quantized operators which are not covered by aten codegen.
namespace at {
namespace {
template <bool ReLUFused>
Tensor _add_out(Tensor& out, const Tensor& self, const Tensor& other);
template <>
Tensor _add_out<false>(Tensor& out, const Tensor& self, const Tensor& other) {
constexpr auto kName = "quantized::t_helper1";
static const auto op = c10::Dispatcher::singleton().findSchemaOrThrow(kName, "").typed<Tensor(Tensor)>();;
op.call(self);
return out;
}
template <>
Tensor _add_out<true>(Tensor& out, const Tensor& self, const Tensor& other) {
constexpr auto kName = "quantized::t_helper2";
static const auto op = c10::Dispatcher::singleton().findSchemaOrThrow(kName, "").typed<Tensor(Tensor)>();
op.call(self);
return out;
}
template <bool ReLUFused = false>
class QAdd final : public c10::OperatorKernel {
public:
Tensor operator()(Tensor qa, Tensor qb, double scale, int64_t zero_point) {
std::cout << "QAdd with ReLUFused = " << ReLUFused << std::endl;
return _add_out<ReLUFused>(qa, qa, qb); // hack
}
};
template <const char* opName, const char* callOpName>
Tensor QHelper(Tensor qa) {
std::cout << "Op: " << opName << std::endl;
if (callOpName != nullptr) {
std::cout << "Call op: " << callOpName << std::endl;
static const auto op = c10::Dispatcher::singleton().findSchemaOrThrow(callOpName, "").typed<Tensor(Tensor)>();
op.call(qa);
}
return qa;
}
constexpr char helper1[] = "quantized::t_helper1";
constexpr char helper2[] = "quantized::t_helper2";
constexpr char helper3[] = "quantized::t_helper3";
constexpr char helper4[] = "quantized::t_helper4";
static auto registry = c10::RegisterOperators()
.op("quantized::t_add(Tensor qa, Tensor qb, float scale, int zero_point)"
"-> Tensor qc",
c10::RegisterOperators::options()
.catchAllKernel<QAdd</*ReLUFused=*/false>>())
.op("quantized::t_add_relu(Tensor qa, Tensor qb, float scale, int zero_point)"
"-> Tensor qc",
c10::RegisterOperators::options()
.catchAllKernel<QAdd</*ReLUFused=*/true>>())
.op("quantized::t_helper1(Tensor qa) -> Tensor", &QHelper<helper1, helper3>)
.op("quantized::t_helper2(Tensor qa) -> Tensor", &QHelper<helper2, helper4>)
.op("quantized::t_helper3(Tensor qa) -> Tensor", &QHelper<helper3, nullptr>)
.op("quantized::t_helper4(Tensor qa) -> Tensor", &QHelper<helper4, nullptr>);
} // namespace
} // namespace at

View File

@ -1,3 +0,0 @@
#pragma once
#include <ATen/Tensor.h>

View File

@ -1,117 +0,0 @@
#include "simple_ops.h"
#include <iostream>
#include <c10/core/TensorOptions.h>
#include <torch/library.h>
#include "utils.h"
namespace at {
// AA -> BB
Tensor AA_op(const Tensor& self) {
std::cout << "AA op" << std::endl;
if (self.ndimension() >= 4) {
return call_BB_op(self);
}
return self;
}
// BB -> AA
Tensor BB_op(const Tensor& self) {
std::cout << "BB op" << std::endl;
if (self.ndimension() < 4) {
return global_helper_call_AA_op_1(self);
}
return self;
}
// CC -> (AA -> BB)
Tensor CC_op(const Tensor& self) {
std::cout << "CC op" << std::endl;
return global_helper_call_AA_op_2(self);
}
// DD -> (AA -> BB) / (EE -> FF)
Tensor DD_op(const Tensor& self) {
std::cout << "DD op" << std::endl;
if (self.ndimension() < 4) {
return global_helper_call_AA_op_3(self);
}
return call_EE_op(self);
}
// EE -> FF
Tensor EE_op(const Tensor& self) {
std::cout << "EE op" << std::endl;
if (self.ndimension() >= 4) {
return call_FF_op(self);
}
return self;
}
// FF -> EE
Tensor FF_op(const Tensor& self) {
std::cout << "FF op" << std::endl;
if (self.ndimension() < 4) {
return call_EE_op(self);
}
return self;
}
// GG -> FF
Tensor GG_op(const Tensor& self) {
return call_FF_op(self);
}
namespace {
// NB: Some of these registrations (AA, EE) are not what you
// actually expect to see in practice, but we cover them here
// as they are technically "valid" API calls and we want to
// make sure the analyzer catches them. (The analyzer is very
// generic, so actually there isn't any reason it shouldn't work,
// but it's good to test them!)
//
// Additionally, the code in this file is not really runnable; for
// example we are missing schemas for all of the impl registrations
// here. The analyzer doesn't really care, as it only really
// cares about the name
TORCH_LIBRARY(_test, m) {
m.def("AA(Tensor self) -> Tensor");
m.impl("AA", torch::CppFunction::makeFromUnboxedFunction(AA_op));
m.def("BB(Tensor self) -> Tensor");
m.impl("BB", TORCH_FN(BB_op));
m.def("CC(Tensor self) -> Tensor", TORCH_FN(CC_op));
m.def("DD", TORCH_FN(DD_op));
}
TORCH_LIBRARY_FRAGMENT(_test, m) {
m.def("EE(Tensor self) -> Tensor");
m.def("FF(Tensor self) -> Tensor");
m.def("GG(Tensor self) -> Tensor");
m.def("HH(Tensor self) -> Tensor");
}
TORCH_LIBRARY_IMPL(_test, CPU, m) {
m.impl("EE", EE_op);
m.impl("FF",
torch::dispatch(DispatchKey::CPU,
torch::CppFunction::makeFromUnboxedFunction(FF_op))
);
m.impl("GG",
torch::dispatch(DispatchKey::CPU,
TORCH_FN((GG_op)))
);
m.impl("HH",
[] (Tensor a) -> Tensor {
return a;
});
}
} // namespace
} // namespace at

View File

@ -1,50 +0,0 @@
#pragma once
#include <ATen/core/dispatch/Dispatcher.h>
#include <ATen/Tensor.h>
namespace at {
static inline Tensor call_AA_op(const Tensor& self) {
static const auto op = c10::Dispatcher::singleton()
.findSchemaOrThrow("_test::AA", "")
.typed<Tensor(const Tensor&)>();
return op.call(self);
}
static inline Tensor call_BB_op(const Tensor& self) {
static const auto op = c10::Dispatcher::singleton()
.findSchemaOrThrow("_test::BB", "")
.typed<Tensor(const Tensor&)>();
return op.call(self);
}
static inline Tensor call_CC_op(const Tensor& self) {
static const auto op = c10::Dispatcher::singleton()
.findSchemaOrThrow("_test::CC", "")
.typed<Tensor(const Tensor&)>();
return op.call(self);
}
static inline Tensor call_DD_op(const Tensor& self) {
static const auto op = c10::Dispatcher::singleton()
.findSchemaOrThrow("_test::DD", "")
.typed<Tensor(const Tensor&)>();
return op.call(self);
}
static inline Tensor call_EE_op(const Tensor& self) {
static const auto op = c10::Dispatcher::singleton()
.findSchemaOrThrow("_test::EE", "")
.typed<Tensor(const Tensor&)>();
return op.call(self);
}
static inline Tensor call_FF_op(const Tensor& self) {
static const auto op = c10::Dispatcher::singleton()
.findSchemaOrThrow("_test::FF", "")
.typed<Tensor(const Tensor&)>();
return op.call(self);
}
} // namespace at

View File

@ -1,48 +0,0 @@
#include "utils.h"
#include <c10/core/TensorOptions.h>
#include <ATen/core/op_registration/op_registration.h>
#include "simple_ops.h"
using namespace at;
Tensor global_helper_call_AA_op_1(const Tensor& self) {
auto lambda = [&]() {
return call_AA_op(self);
};
return lambda();
}
static std::function<Tensor()> helper(const Tensor& self) {
return [&]() {
return call_AA_op(self);
};
}
Tensor global_helper_call_AA_op_2(const Tensor& self) {
return helper(self)();
}
Tensor global_helper_call_AA_op_3(const Tensor& self) {
auto lambda = [&]() {
static const auto op = c10::Dispatcher::singleton()
.findSchemaOrThrow("_test::AA", "")
.typed<Tensor (const Tensor&)>();
return op.call(self);
};
return lambda();
}
namespace torch {
namespace jit {
C10_EXPORT Tensor API_Function(const Tensor& self) {
return call_AA_op(self);
}
at::Tensor API_Class::API_Method(const at::Tensor& self) {
return call_BB_op(self);
}
} // namespace jit
} // namespace torch

View File

@ -1,18 +0,0 @@
#pragma once
#include <ATen/Tensor.h>
at::Tensor global_helper_call_AA_op_1(const at::Tensor& self);
at::Tensor global_helper_call_AA_op_2(const at::Tensor& self);
at::Tensor global_helper_call_AA_op_3(const at::Tensor& self);
namespace torch {
namespace jit {
class C10_EXPORT API_Class {
public:
at::Tensor API_Method(const at::Tensor& self);
};
} // namespace jit
} // namespace torch

View File

@ -1,22 +0,0 @@
cmake_minimum_required(VERSION 3.1)
project(code_analyzer)
set(CMAKE_CXX_STANDARD 14)
find_package(LLVM REQUIRED CONFIG)
add_definitions(${LLVM_DEFINITIONS})
include_directories(${LLVM_INCLUDE_DIRS})
link_directories(${LLVM_LIBRARY_DIRS})
# Main executable
add_executable(analyzer
analyzer.cpp
op_deps_pass.cpp
)
set_target_properties(analyzer PROPERTIES
COMPILE_FLAGS "-fno-rtti -O3")
llvm_map_components_to_libnames(llvm_libs core irreader support)
target_link_libraries(analyzer ${llvm_libs})

View File

@ -1,32 +0,0 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/IRReader/IRReader.h"
#include "llvm/Pass.h"
#include "llvm/PassRegistry.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/SourceMgr.h"
using namespace llvm;
namespace {
cl::opt<std::string> InputFilename(
cl::Positional,
cl::desc("<input bitcode file>"),
cl::init("-"),
cl::value_desc("filename"));
} // namespace
int main(int argc, char **argv) {
LLVMContext Context;
cl::ParseCommandLineOptions(argc, argv);
SMDiagnostic Err;
std::unique_ptr<Module> M = parseIRFile(InputFilename, Err, Context);
auto opDependencyPass = PassRegistry::getPassRegistry()
->getPassInfo(StringRef("op_dependency"))
->createPass();
static_cast<ModulePass*>(opDependencyPass)->runOnModule(*M);
return 0;
}

View File

@ -1,148 +0,0 @@
#!/bin/bash
##############################################################################
# Build LLVM code analyzer and analyze torch code dependency.
##############################################################################
#
# Example usage:
#
# 1. Analyze torch and generate yaml file of op dependency transitive closure:
# LLVM_DIR=${HOME}/src/llvm8/build/install \
# ANALYZE_TORCH=1 tools/code_analyzer/build.sh
#
# 2. Analyze test project and compare with expected result:
# LLVM_DIR=${HOME}/src/llvm8/build/install \
# ANALYZE_TEST=1 tools/code_analyzer/build.sh
#
# 3. Analyze torch and generate yaml file of op dependency with debug path:
# LLVM_DIR=${HOME}/src/llvm8/build/install \
# ANALYZE_TORCH=1 tools/code_analyzer/build.sh -debug_path=true
#
# If you're a Facebook employee, chances are you're running on CentOS 8.
# If that's the case, you can install all the dependencies you need with:
#
# sudo dnf install llvm-devel llvm-static clang ncurses-devel
#
# and then set LLVM_DIR=/usr
set -ex
SRC_ROOT="$( cd "$(dirname "$0")"/../.. ; pwd -P)"
ANALYZER_SRC_HOME="${SRC_ROOT}/tools/code_analyzer"
# Clang/LLVM path
export LLVM_DIR="${LLVM_DIR:-/usr/lib/llvm-8}"
export CC="${LLVM_DIR}/bin/clang"
export CXX="${LLVM_DIR}/bin/clang++"
EXTRA_ANALYZER_FLAGS=$@
BUILD_ROOT="${BUILD_ROOT:-${SRC_ROOT}/build_code_analyzer}"
WORK_DIR="${BUILD_ROOT}/work"
rm -rf "${BUILD_ROOT}"
mkdir -p "${BUILD_ROOT}"
mkdir -p "${WORK_DIR}"
cd "${BUILD_ROOT}"
build_analyzer() {
cmake "${ANALYZER_SRC_HOME}" -DCMAKE_BUILD_TYPE=Release
if [ -z "${MAX_JOBS}" ]; then
if [ "$(uname)" == 'Darwin' ]; then
MAX_JOBS=$(sysctl -n hw.ncpu)
else
MAX_JOBS=$(nproc)
fi
fi
make "-j${MAX_JOBS}"
}
build_torch_mobile() {
TORCH_BUILD_ROOT="${BUILD_ROOT}/build_mobile"
TORCH_INSTALL_PREFIX="${TORCH_BUILD_ROOT}/install"
BUILD_ROOT="${TORCH_BUILD_ROOT}" "${SRC_ROOT}/scripts/build_mobile.sh" \
-DCMAKE_CXX_FLAGS="-S -emit-llvm -DSTRIP_ERROR_MESSAGES" \
${MOBILE_BUILD_FLAGS}
}
build_test_project() {
TEST_SRC_ROOT="${SRC_ROOT}/test/mobile/op_deps"
TEST_BUILD_ROOT="${BUILD_ROOT}/build_test"
TEST_INSTALL_PREFIX="${TEST_BUILD_ROOT}/install"
BUILD_ROOT="${TEST_BUILD_ROOT}" \
TORCH_INSTALL_PREFIX="${TORCH_INSTALL_PREFIX}" \
"${TEST_SRC_ROOT}/build.sh" \
-DCMAKE_CXX_FLAGS="-S -emit-llvm -DSTRIP_ERROR_MESSAGES"
}
call_analyzer() {
ANALYZER_BIN="${BUILD_ROOT}/analyzer" \
INPUT="${INPUT}" OUTPUT="${OUTPUT}" \
EXTRA_ANALYZER_FLAGS="${EXTRA_ANALYZER_FLAGS}" \
"${ANALYZER_SRC_HOME}/run_analyzer.sh"
}
analyze_torch_mobile() {
INPUT="${WORK_DIR}/torch.ll"
OUTPUT="${WORK_DIR}/torch_result.yaml"
if [ ! -f "${INPUT}" ]; then
# Link libtorch into a single module
# TODO: invoke llvm-link from cmake directly to avoid this hack.
# TODO: include *.c.o when there is meaningful fan-out from pure-c code.
"${LLVM_DIR}/bin/llvm-link" -S \
$(find "${TORCH_BUILD_ROOT}" -name '*.cpp.o' -o -name '*.cc.o') \
-o "${INPUT}"
fi
# Analyze dependency
call_analyzer
}
print_output_file_path() {
echo "Deployed file at: ${OUTPUT}"
}
analyze_test_project() {
INPUT="${WORK_DIR}/test.ll"
OUTPUT="${WORK_DIR}/test_result.yaml"
# Link into a single module (only need c10 and OpLib srcs)
# TODO: invoke llvm-link from cmake directly to avoid this hack.
"${LLVM_DIR}/bin/llvm-link" -S \
$(find "${TORCH_BUILD_ROOT}" -path '*/c10*' \( -name '*.cpp.o' -o -name '*.cc.o' \)) \
$(find "${TEST_BUILD_ROOT}" -path '*/OpLib*' \( -name '*.cpp.o' -o -name '*.cc.o' \)) \
-o "${INPUT}"
# Analyze dependency
call_analyzer
}
check_test_result() {
if cmp -s "${OUTPUT}" "${TEST_SRC_ROOT}/expected_deps.yaml"; then
echo "Test result is the same as expected."
else
echo "Test result is DIFFERENT from expected!"
diff -u "${TEST_SRC_ROOT}/expected_deps.yaml" "${OUTPUT}"
exit 1
fi
}
build_analyzer
if [ -n "${ANALYZE_TORCH}" ]; then
build_torch_mobile
analyze_torch_mobile
if [ -n "${DEPLOY}" ]; then
print_output_file_path
fi
fi
if [ -n "${ANALYZE_TEST}" ]; then
build_torch_mobile
build_test_project
analyze_test_project
check_test_result
fi

View File

@ -1,894 +0,0 @@
// This LLVM pass takes LLVM bitcode / assembly as input and generates
// dependency graph among aten ops. From a set of root ops used by a model, we
// can calculate transitive closure of all dependent ops, then we can produce a
// custom LibTorch library with optimal build size which only registers and
// contains ops needed by the specific model - unregistered / unused ops can be
// stripped at link time.
//
// [Approach]
// To generate the dependency graph it searches for 3 types of connections in
// LLVM bitcode / assembly:
// 1) op registration: op name (schema string literal) -> registered function;
// 2) regular function call: function -> function;
// 3) op invocation: function -> op name (schema string literal)
//
// For #2 it uses similar algorithm as llvm::LazyCallGraph - not only looks into
// call/invoke instructions but also recursively searches for function pointers
// in each instruction's operands.
//
// For #1 and #3 it searches for connections between operator name string
// literals / function pointers and c10 op registration/invocation API calls in
// LLVM IR graph via "use" edges (bi-directional):
// 1. llvm::Value has "users()" method to get other llvm::Value nodes that use
// the value;
// 2. most of types derive from llvm::User which has "operands()" method to get
// other llvm::Value nodes being used by the value;
//
// [Limitation]
// For now the search doesn't go beyond the function boundary because the
// reference to op name string literals and c10 op registration/invocation
// APIs are almost always in the same function. If we create helper function
// around c10 API, we could simply add them to the regular expression used to identify c10 API.
//
// [Example]
// In the following example, it finds out:
// 1) the registered function for "quantized:add" operator;
// 2) one possible call path to at::empty() function;
// 3) the called operator name "aten::empty":
//
// - quantized::add
// - c10::detail::wrap_kernel_functor_unboxed_<at::native::(anonymous
// namespace)::QAdd<false>, at::Tensor (at::Tensor, at::Tensor, double,
// long)>::call(c10::OperatorKernel*, at::Tensor, at::Tensor, double, long)
// - at::native::(anonymous namespace)::QAdd<false>::operator()(at::Tensor,
// at::Tensor, double, long)
// - void at::native::DispatchStub<void (*)(at::Tensor&, at::Tensor const&,
// at::Tensor const&), at::native::qadd_stub>::operator()<at::Tensor&,
// at::Tensor const&, at::Tensor const&>(c10::DeviceType, at::Tensor&,
// at::Tensor const&, at::Tensor const&)
// - at::native::DispatchStub<void (*)(at::Tensor&, at::Tensor const&,
// at::Tensor const&), at::native::qadd_stub>::choose_cpu_impl()
// - void at::native::(anonymous namespace)::qadd_kernel<false>(at::Tensor&,
// at::Tensor const&, at::Tensor const&)
// - at::TensorIterator::binary_op(at::Tensor&, at::Tensor const&, at::Tensor
// const&, bool)
// - at::TensorIterator::build()
// - at::TensorIterator::fast_set_up()
// - at::empty(c10::ArrayRef<long>, c10::TensorOptions const&,
// c10::optional<c10::MemoryFormat>)
// - aten::empty
#include <deque>
#include <iostream>
#include <set>
#include <unordered_map>
#include <unordered_set>
#include "llvm/Demangle/Demangle.h"
#include "llvm/Analysis/LazyCallGraph.h"
#if LLVM_VERSION_MAJOR < 8
#include "llvm/IR/CallSite.h"
#endif
#include "llvm/IR/Constant.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Regex.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
namespace {
struct RegexOpt {
std::shared_ptr<Regex> pattern;
void operator=(const std::string& val) {
if (val.empty()) {
return;
}
pattern = std::make_shared<Regex>(val);
std::string regexError;
if (!pattern->isValid(regexError)) {
report_fatal_error(
"Invalid regular expression param: '" + val + "' err: " + regexError,
false);
}
};
};
class RegexOptParser : public cl::basic_parser<RegexOpt> {
public:
RegexOptParser(cl::Option& O) : basic_parser(O) {}
virtual ~RegexOptParser() = default;
// parse - Return true on error.
bool parse(cl::Option&, StringRef, StringRef Arg, RegexOpt& Value) {
Value = Arg.str();
return false;
}
StringRef getValueName() const override {
return "RegexOpt";
}
};
RegexOpt FunctionSchemaPatternLoc;
cl::opt<RegexOpt, true, cl::parser<std::string>> FunctionSchemaPattern(
"op_schema_pattern",
cl::desc("Regular expression used to identify aten op schema strings. "
"Example: -op_schema_pattern '^(aten|quantized)::[^ ]+'"),
cl::location(FunctionSchemaPatternLoc),
cl::Required,
cl::ValueRequired);
RegexOpt OpRegistrationPatternLoc;
cl::opt<RegexOpt, true, cl::parser<std::string>> OpRegistrationPattern(
"op_register_pattern",
cl::desc("Regular expression used to identify c10 op registration API. "
"Example: -op_register_pattern 'c10::RegisterOperators::op'"),
cl::location(OpRegistrationPatternLoc),
cl::Required,
cl::ValueRequired);
RegexOpt OpInvocationPatternLoc;
cl::opt<RegexOpt, true, cl::parser<std::string>> OpInvocationPattern(
"op_invoke_pattern",
cl::desc("Regular expression used to identify c10 op invocation API. "
"Example: -op_invoke_pattern 'c10::Dispatcher::findSchema'"),
cl::location(OpInvocationPatternLoc),
cl::Required,
cl::ValueRequired);
// The `root_symbol_pattern` is used to specify the seeds of C++ symbols
// from which it searches for transitively reachable ops which need to be
// kept for these C++ APIs to be able to run.
//
// Why not dump ops that are reachable from any visible C++ symbols? Why
// limit it to a subset of root symbols?
// Because op registration callsites in static initializer are visible root
// symbols, too. It will dump ALL the registered ops without any filtering.
//
// Can we use some fixed entry point like `main()`?
// The target to be analyzed can be DSO that doesn't have a `main()`. And
// sometimes we want to get ops that could (but not yet) be called.
//
// This temporary flag will be deprecated by better alternatives in the future.
RegexOpt RootSymbolPatternLoc;
cl::opt<RegexOpt, true, cl::parser<std::string>> RootSymbolPattern(
"root_symbol_pattern",
cl::desc("Regular expression used to identify root symbols. It will insert "
"an entry to the output graph with key = `__ROOT__` and value = "
"set of ops reachable from root symbols, if the pattern is set. "
"Example: -root_symbol_pattern 'torch::jit'"),
cl::location(RootSymbolPatternLoc));
cl::list<RegexOpt, bool, RegexOptParser> TorchLibraryInitPattern(
"torch_library_init_pattern",
cl::desc("Regular expression used to identify TorchLibraryInit symbols "
"that are generated by `TORCH_LIBRARY` macro. The first capturing "
"group is used to extract namespace string. "
"Example: -torch_library_init_pattern "
"'^.*TORCH_LIBRARY_init_([^(]+)(\\(.*)?$'"),
cl::ZeroOrMore);
cl::opt<int> Verbose(
"v",
cl::desc("Verbose level"),
cl::Hidden,
cl::init(0));
cl::opt<bool> DebugPath(
"debug_path",
cl::desc("Output path between two nodes."),
cl::init(false));
using SET = std::set<std::string>;
using GRAPH = std::unordered_map<std::string, std::set<std::string>>;
using VALUE_MAP = std::unordered_map<Value*, Value*>;
using VALUE_SET = std::unordered_set<Value*>;
// SRC -> Inverse "tree" from all reachable destinations back to SRC, e.g.:
// (DEST-1 -> PREV_11, PREV_11 -> PREV_12, ..., PREV_1n -> SRC)
// (DEST-2 -> PREV_21, PREV_21 -> PREV_22, ..., PREV_2n -> SRC)
using PATH = std::unordered_map<std::string,
std::unordered_map<std::string, std::string>>;
inline std::string _name(const Value* V) {
return V->getName().str();
}
// Referenced the logic in llvm-cxxfilt.cpp.
// Starting from LLVM 9 it provides a `demangle()` API. Here we keep our ad-hoc
// version for backward compatibility.
std::string _demangle(const std::string& mangled) {
int status;
const char* decorated = mangled.c_str();
size_t decoratedLength = mangled.length();
char *undecorated = itaniumDemangle(decorated, nullptr, nullptr, &status);
if (!undecorated &&
(decoratedLength > 6 && strncmp(decorated, "__imp_", 6) == 0)) {
undecorated = itaniumDemangle(decorated + 6, nullptr, nullptr, &status);
}
std::string result(undecorated ? undecorated : mangled);
free(undecorated);
return result;
}
inline bool _isCallSite(Value* V) {
#if LLVM_VERSION_MAJOR >= 8
return isa<CallBase>(V);
#else
return !!CallSite(V);
#endif
}
inline Function* _getCalledFunction(Value* V) {
#if LLVM_VERSION_MAJOR >= 8
return dyn_cast<CallBase>(V)->getCalledFunction();
#else
return CallSite(V).getCalledFunction();
#endif
}
// LLVM_DEBUG needs opt to be built with debug support.
template<
typename T,
typename std::enable_if<std::is_base_of<Value, T>::value, int>::type = 0>
std::ostream& operator<<(std::ostream& out, T& I) {
std::string str;
raw_string_ostream O(str);
O << I;
return out << str;
}
class OpDependency : public ModulePass {
public:
static char ID; // Pass identification, replacement for typeid
OpDependency() : ModulePass(ID) {}
~OpDependency() = default;
bool runOnModule(Module& M) override {
// Scan all functions and instructions to construct function -> function
// dependency graph and to find out:
// - visible functions matching `root_symbol_pattern` option;
// - instructions that might register or invoke operators, respectively.
GRAPH deps;
VALUE_SET visibleFuncs, opRegistrationInsts, opInvocationInsts;
scanAllFunctions(
M, &deps, &visibleFuncs, &opRegistrationInsts, &opInvocationInsts);
// "Key nodes" are nodes we want to keep in output graph. They are usually
// op-schema strings.
SET keyNodes;
// Insert a dummy root node with links to function nodes matching the
// "root symbol" regex pattern and with default visibility. The goal is to
// find aten ops that are possibly called via torch C++ APIs.
insertRoot(visibleFuncs, &deps, &keyNodes);
// Scan op registration/invocation API calls to construct the link between
// op name (a.k.a op schema string) and related functions.
// Dump the op-schema -> function and function -> op-schema mappings into
// the same `deps` graph with function -> function mappings as they will
// be processed together next.
scanOpRegistration(opRegistrationInsts, &keyNodes, &deps);
scanOpInvocation(opInvocationInsts, &keyNodes, &deps);
// Shrink the graph by removing intermediate nodes (functions) while
// maintaining transitive dependency between operators (schema strings).
GRAPH result;
std::shared_ptr<PATH> path = DebugPath ? std::make_shared<PATH>() : nullptr;
simplifyGraph(deps, keyNodes, &result, path.get());
printAsYAML(std::cout, keyNodes, result, path.get());
return false;
}
private:
static void insertRoot(
const VALUE_SET& visibleFuncs, GRAPH* deps, SET* keyNodes) {
if (!RootSymbolPatternLoc.pattern) {
return;
}
SET roots;
for (const auto& F : visibleFuncs) {
std::string name = _name(F);
auto demangled = _demangle(name);
if (RootSymbolPatternLoc.pattern->match(demangled)) {
roots.insert(name);
if (Verbose) {
std::cerr << "[DEBUG][ROOT_FUNC] " << demangled << std::endl;
}
}
}
static const std::string ROOT_NODE{"__ROOT__"};
deps->emplace(ROOT_NODE, std::move(roots));
keyNodes->insert(ROOT_NODE);
}
// Scan the entire IR graph to construct function -> function dependency graph
// as well as instructions that might register or invoke operators.
static void scanAllFunctions(
Module& M, GRAPH* deps, VALUE_SET* visibleFuncs,
VALUE_SET* opRegistrationInsts, VALUE_SET* opInvocationInsts) {
for (Function& F : M) {
if (F.hasDefaultVisibility()) {
visibleFuncs->insert(&F);
}
std::string caller = _name(&F);
std::string callerDemangled = _demangle(caller);
for (BasicBlock& BB : F) {
for (Instruction& I : BB) {
scanReferredFunctions(I, [&](Function* func) -> void {
std::string callee = _name(func);
std::string calleeDemangled = _demangle(callee);
(*deps)[caller].insert(callee);
if (Verbose > 1) {
std::cerr << "[DEBUG][FUNC_CALL] " << callerDemangled << " => "
<< calleeDemangled << std::endl;
}
// One registration/invocation API might call another registration/
// invocation API in which case we can skip processing the nested
// call. This is a simple trick to avoid "cannot find registered/
// invoked op" warning and doesn't affect correctness, because
// later in scanOpRegistration we'll walk the transitively reachable
// IR graph again from each registration instance.
if (!OpRegistrationPatternLoc.pattern->match(callerDemangled) &&
OpRegistrationPatternLoc.pattern->match(calleeDemangled)) {
(*opRegistrationInsts).insert(&I);
}
if (!OpInvocationPatternLoc.pattern->match(callerDemangled) &&
OpInvocationPatternLoc.pattern->match(calleeDemangled)) {
(*opInvocationInsts).insert(&I);
}
});
}
}
}
}
// llvm::CallGraph only searches for functions referenced by "CallSites" (i.e.
// by call/invoke instructions). However functions can be referenced by
// non-call/invoke instructions as well (being passed as function pointer),
// e.g.:
// ```
// store i64 ptrtoint (void (%"class.at::Tensor"*, %"class.at::Tensor"*)*
// @at::foo_op(at::Tensor const&) to i64), i64* %14, ...
// ```
// "@at::foo_op" is a operand of "ptrtoint", which in turn is a constant
// operand of "store" instruction. The stored function pointer can be called
// indirectly later on.
//
// Sometimes directly called functions can be in ConstExpr as well, e.g.:
// ```
// invoke void bitcast (
// void (ty1*, ...)* @c10::Dispatcher::findSchema(...) to
// void (ty2*, ...)*)(...)
// ```
// In above case, "CallSite(I).getCalledFunction()" won't return "findSchema"
// as it's nested in "bitcast" instruction.
//
// To cover these cases this method recursively traverses all operands of the
// input instruction "I" to search for directly/indirectly referenced function
// pointers by the instruction. The referenced functions might NOT actually be
// called (which is fine for our use case). llvm::LazyCallGraph has similar
// logic and we reuse its "visitReferences" method to traverse all operands.
static void scanReferredFunctions(
Instruction& I, const std::function<void(Function*)>& CB) {
SmallVector<Constant*, 16> worklist;
SmallPtrSet<Constant*, 16> visited;
if (_isCallSite(&I)) {
Function* callee = _getCalledFunction(&I);
if (callee && !callee->isIntrinsic() && visited.insert(callee).second) {
CB(callee);
}
}
for (Value* op : I.operand_values()) {
Constant* C = dyn_cast<Constant>(op);
if (C && visited.insert(C).second) {
worklist.push_back(C);
}
}
LazyCallGraph::visitReferences(worklist, visited, [&](Function& F) {
if (!F.isIntrinsic()) {
CB(&F);
}
});
}
// Naive connectivity analysis to find out all nodes that are reachable from a
// specific node in IR graph by following each node's "use" edges (link to its
// operands and users).
// This is the core algorithm we use to find the connection between op name
// string literals and registered/invoked functions - there should be a path
// to connect them to the c10 op registration/invocation APIs.
// For now the search doesn't go beyond the function boundary because the
// reference to op name string literals and c10 op registration/invocation
// APIs are almost always in the same function.
static void scanConnectedNodes(
Value* src,
VALUE_SET blocked,
const std::function<void(Value*)>& CB, VALUE_MAP* debugPath) {
std::deque<Value*> worklist;
SmallPtrSet<Value*, 16> visited;
auto insert = [&](Value* cur, Value* parent) -> void {
if (!blocked.count(cur) && visited.insert(cur).second) {
worklist.push_back(cur);
if (debugPath) {
(*debugPath).emplace(cur, parent);
}
}
};
auto expandOperands = [&](Value* V) -> void {
// Stops if it doesn't have operands (!isa<User>) or it is a function.
if (!isa<User>(V) || isa<Function>(V)) {
return;
}
auto node = dyn_cast<User>(V);
for (auto& O : node->operands()) {
insert(O, node);
}
};
auto blockSiblingOperands = [&](User* U, Value* V) -> void {
// This is to handle a special case only appears in LLVM 9 (not in 5 - 8
// and 10), where it can falsely associate unrelated PyTorch op
// registrations.
//
// If the value `V` is used by a PHI-node `U`, then we should stop
// crawling `U`'s operands, i.e. `V`'s siblings in `U`. E.g.:
//
// 114: ; preds = %111, %109
// %115 = phi i32 [ %110, %109 ], [ %112, %111 ]
//
// `%115` might take the value of `%110` or `%112`, depending on from
// which label it comes. Assuming `V` is `%110` and `U` is `%115`, we can
// continue to scan `%115` but should not crawl `%112` as it does not
// directly pass data from `%110` to `%112` (and vice versa).
//
// NB: we probably should do the same for other LLVM instructions with
// this kind of selective semantics. But for the purpose of analyzing
// PyTorch registrations it seems to be sufficent for now.
if (isa<PHINode>(U)) {
for (auto& S : U->operands()) {
blocked.insert(S);
}
}
};
auto expandUsers = [&](Value* V) -> void {
// If the value is not constant, then the user of the value might pass
// other value into it, e.g.:
// store @.str.15, %10
// invoke @c10.reg_op, %10, @foo
// The store instruction, which is the user of "%10", passes "@.str.15" to
// "%10" which in turn is passed to "@c10.reg_op" API function.
// Users of constants are not interesting as they cannot change the state
// of the constant. We skip users of functions as well assuming
// interesting values (op names and function pointers) are not set via
// other invocations of the function.
if (!isa<User>(V) || isa<Constant>(V) || isa<Function>(V)) {
return;
}
for (auto U : V->users()) {
insert(U, V);
blockSiblingOperands(U, V);
}
};
auto expand = [&](Value* V) -> void {
expandOperands(V);
expandUsers(V);
};
expand(src);
while (!worklist.empty()) {
auto cur = worklist.front();
worklist.pop_front();
expand(cur);
if (isa<Function>(cur) || isa<Constant>(cur)) {
CB(cur);
}
}
}
// Calculate transitive closure and remove intermediate (non-key) nodes.
// Note that there are two type of nodes in the dependency graph:
// 1) String literals in source files, e.g.:
// "aten::cos_(Tensor(a!) self) -> Tensor(a!)", which represents operator
// "schema";
// 2) Function symbols in object files, e.g.:
// "at::CPUType::(anonymous namespace)::cos_(at::Tensor&)";
// Both of them are added to the dependency graph as std::string. Ultimately
// we only care about #1 as that's what we use to prune registered ops via
// codegen, then #2 will be stripped by linker automatically. So the goal is
// to remove #2 from the graph while maintaining the transitive dependency
// between #1. #1 is called "key nodes" in this method.
static void simplifyGraph(
const GRAPH& input, SET& keyNodes, GRAPH* output, PATH* path) {
// Starting from every key node, use BFS to traverse all nodes that are
// transitively reachable from the node in the sparse graph.
for (auto& key : keyNodes) {
std::deque<std::string> queue;
SET visited; // has some runtime issue with std::unordered_set
auto expand = [&](const std::string& curNode) -> void {
auto it = input.find(curNode);
if (it == input.end()) {
return;
}
for (const auto& next : it->second) {
if (!visited.insert(next).second) {
continue;
}
queue.push_back(next);
if (path) {
(*path)[key].emplace(next, curNode);
}
}
};
expand(key);
while (!queue.empty()) {
auto curNode = queue.front();
queue.pop_front();
if (keyNodes.count(curNode)) {
// Output links between key nodes.
(*output)[key].insert(curNode);
// Stop expanding key nodes.
continue;
}
expand(curNode);
}
}
}
// Find out operator names and function pointers that are transitively
// connected to the same 'src' instruction.
static void scanOpSchemaStrAndFunction(
Instruction* src, const VALUE_SET& blocked,
const std::string& contextualNamespace,
SET* visitedOps, SET* visitedFunctions) {
std::shared_ptr<VALUE_MAP> debugPath =
(Verbose > 2 ? std::make_shared<VALUE_MAP>() : nullptr);
auto callback = [&](Value* V) -> void {
if (auto schemaStr = extractOpSchema(contextualNamespace, V)) {
if (visitedOps) {
// NB: Some debug string constants might be connected to the
// registration instruction, e.g.: "Lambda". Since we have factored
// out namespace from op schema string, there is no longer a simple
// way to identify these fake ops. For now we simply take the first
// instance as the real op name is closest to the seed instruction
// in BFS order.
if (!visitedOps->empty()) {
if (Verbose) {
std::cerr << "[INFO] ignore extra op schema str: " << *schemaStr
<< " in: " << _demangle(_name(src->getFunction()))
<< ", because already found valid op schema str: "
<< *visitedOps->begin() << std::endl;
}
} else {
(*visitedOps).insert(*schemaStr);
}
}
if (Verbose > 1) {
std::cerr << "[DEBUG][OP_SCHEMA] " << *schemaStr << std::endl;
printDebugPath(debugPath.get(), src, V);
}
} else if (auto F = dyn_cast<Function>(V)) {
if (F->isIntrinsic()) {
return;
}
if (visitedFunctions) {
(*visitedFunctions).insert(_name(F));
}
if (Verbose > 1) {
std::cerr << "[DEBUG][FUNC] " << _demangle(_name(F)) << std::endl;
printDebugPath(debugPath.get(), src, V);
}
}
};
scanConnectedNodes(src, blocked, callback, debugPath.get());
}
// This method looks for op schema strings and function pointers that connect
// to the same c10 op registration API call via "use" edges (bi-directional)
// in IR graph - exploring both nodes being used (operands) by the node and
// nodes using (users) the node.
//
// It assumes that the function pointers are needed (registered) for the op.
//
// For example, from op name "aten::add" to registration API call:
// [OP_SCHEMA] aten::add
// [PATH][1][CONST] [70 x i8] c"aten::add.Scalar(Tensor self...\00"
// [PATH][2][CONST] @.str.55.20575 = private unnamed_addr constant [70 x i8]
// c"aten::add.Scalar(Tensor self, ...\00", align 1
// [PATH][3][CONST] i8* getelementptr inbounds ([70 x i8], [70 x i8]*
// @.str.55.20575, i64 0, i64 0)
// [PATH][4][INST] invoke void @std::basic_string<...>::basic_string(...)
// (%"class.std::basic_string"* ... %1477,
// i8* getelementptr ... @.str.55.20575 ...)
// [PATH][5][INST] %1477 = alloca %"class.std::basic_string" ...
// [PATH][6][INST] %4086 = invoke ...
// @c10::RegisterOperators::Options::schema(... %1477)
// [PATH][7][INST] %4088 = invoke ... @...catchAllKernel...(... %4086, ...
// @at::TypeDefault::add(at::Tensor const&...))
// [PATH][8][INST] %4090 = invoke ...
// &&(%"class.c10::RegisterOperators::Options"*... %4088 ...)
// [PATH][9][INST] invoke void
// @c10::RegisterOperators::checkSchemaAndRegisterOp_(...
// %"class.c10::RegisterOperators::Options"* ... %4090)
//
// From function pointer to registration API call:
// [FUNC] at::TypeDefault::add(at::Tensor const&, c10::Scalar, c10::Scalar)
// [PATH][1][FUNC] at::TypeDefault::add(at::Tensor const&...)
// [PATH][2][INST] %4088 = invoke ... @...catchAllKernel...(... %4086, ...
// @at::TypeDefault::add(at::Tensor const&...))
// [PATH][3][INST] %4090 = invoke ...
// &&(%"class.c10::RegisterOperators::Options"*... %4088 ...)
// [PATH][4][INST] invoke void
// @c10::RegisterOperators::checkSchemaAndRegisterOp_(...
// %"class.c10::RegisterOperators::Options"* ... %4090)
static void scanOpRegistration(
VALUE_SET& instructions, SET* opSchemaStrs, GRAPH* schemaStrToFunctions) {
for (auto V : instructions) {
auto I = dyn_cast<Instruction>(V);
// We only need to process call/invoke instructions.
if (!I || !_isCallSite(I)) {
continue;
}
auto contextualNamespace = inferContextualNamespace(I);
if (Verbose && !contextualNamespace.empty()) {
std::cerr << "[DEBUG][REG][NAMESPACE] " << contextualNamespace
<< std::endl;
}
if (Verbose > 2) {
std::cerr << "[DEBUG][REG][INST] " << *I << std::endl;
}
SET visitedOps, visitedFunctions;
// Pass in "instructions" set as "blocked" set - all operator registration
// calls are connected to global op registry object so we should avoid
// going from one op registration call to another op registration call via
// the global registry object.
scanOpSchemaStrAndFunction(
I, instructions, contextualNamespace, &visitedOps, &visitedFunctions);
if (visitedOps.size() != 1) {
std::cerr << "[WARNING] found " << visitedOps.size() << " ops ( ";
for (auto& op : visitedOps) {
std::cerr << op << " ";
}
std::cerr << ") in a registration call in function: "
<< _demangle(_name(I->getFunction()))
<< " contextualNamespace: " << contextualNamespace
<< std::endl;
}
for (const auto& op : visitedOps) {
opSchemaStrs->insert(op);
if (visitedFunctions.empty()) {
std::cerr << "[WARNING] could not find registered function for op: "
<< op << " in function: "
<< _demangle(_name(I->getFunction()))
<< " contextualNamespace: " << contextualNamespace
<< std::endl;
}
for (const auto& func : visitedFunctions) {
(*schemaStrToFunctions)[op].insert(func);
if (Verbose) {
std::cerr << "[DEBUG][OP_REG] " << op << " => "
<< _demangle(func) << std::endl;
}
}
}
}
}
static std::string inferContextualNamespace(Instruction* I) {
auto functionName = _demangle(_name(I->getFunction()));
for (auto& pattern : TorchLibraryInitPattern) {
if (!pattern.pattern->match(functionName)) {
continue;
}
if (Verbose) {
std::cerr << "[DEBUG][REG][INIT_FUNC] " << functionName << std::endl;
}
return pattern.pattern->sub("\\1", functionName) + "::";
}
return {};
}
// Similar as scanOpRegistration - it searches for op schema strings that
// connect to c10 op invocation API call and assume the parent function of the
// API call invokes the operator.
//
// For example, from op name "aten::empty" to invocation API call:
// [OP_SCHEMA] aten::empty
// [PATH][1][CONST] [12 x i8] c"aten::empty\00"
// [PATH][2][CONST] @.str.69.1990 = private unnamed_addr constant [12 x i8]
// c"aten::empty\00", align 1
// [PATH][3][CONST] i8* getelementptr inbounds ([12 x i8], [12 x i8]*
// @.str.69.1990, i64 0, i64 0)
// [PATH][4][INST] invoke void @std::basic_string<...>::basic_string(...
// (%"class.std::basic_string"* nonnull %19,
// i8* getelementptr inbounds ([12 x i8], [12 x i8]*
// @.str.69.1990, i64 0, i64 0) ...
// [PATH][5][INST] %19 = alloca %"class.std::basic_string", align 8
// [PATH][6][INST] %53 = bitcast %"class.std::basic_string"* %19 to i64*
// [PATH][7][INST] %54 = load i64, i64* %53, align 8, !tbaa !4
// [PATH][8][INST] store i64 %54, i64* %55, align 8, !tbaa !4
// [PATH][9][INST] %55 = bitcast %"struct.c10::OperatorName"* %18 to i64*
// [PATH][10][INST] %18 = alloca %"struct.c10::OperatorName", align 8
// [PATH][11][INST] invoke void @c10::Dispatcher::findSchema(c10::OperatorName
// const&)(%"class.c10::optional.105"* nonnull sret %17,
// %"class.c10::Dispatcher.6320"* nonnull %45,
// %"struct.c10::OperatorName"* nonnull dereferenceable(16)
// %18)
static void scanOpInvocation(
VALUE_SET& instructions, SET* opSchemaStrs, GRAPH* functionToSchemaStrs) {
for (auto V : instructions) {
auto I = dyn_cast<Instruction>(V);
// We only need to process call/invoke instructions.
if (!I || !_isCallSite(I)) {
continue;
}
if (Verbose > 2) {
std::cerr << "[DEBUG][CALL][INST] " << *I << std::endl;
}
std::string caller = _name(I->getFunction());
SET visitedOps;
scanOpSchemaStrAndFunction(I, {}, {}, &visitedOps, nullptr);
if (visitedOps.size() != 1) {
std::cerr << "[WARNING] found " << visitedOps.size() << " ops ( ";
for (auto& op : visitedOps) {
std::cerr << op << " ";
}
std::cerr << ") in a invocation call in function: "
<< _demangle(caller) << std::endl;
}
for (const auto& op : visitedOps) {
opSchemaStrs->insert(op);
(*functionToSchemaStrs)[caller].insert(op);
if (Verbose) {
std::cerr << "[DEBUG][OP_CALL] " << _demangle(caller) << " => "
<< op << std::endl;
}
}
}
}
static void extractStringValue(
Value* V, const std::function<void(const std::string&)>& CB) {
if (isa<UndefValue>(V)) {
// UndefValue inherits from ConstantValue, but don't contain any data
// See: https://llvm.org/docs/LangRef.html#undefined-values
return;
}
if (auto array = dyn_cast<ConstantDataArray>(V)) {
// Normal case for c-style string literal and "std::basic_string".
if (array->isCString()) {
CB(array->getAsCString().str());
} else if (array->isString()) {
std::cerr << "[WARNING] ignore non-C string: "
<< array->getAsString().str() << std::endl;
}
} else if (auto CI = dyn_cast<ConstantInt>(V)) {
// Short string literal might be encoded into constant integer, e.g.:
// "aten::AA" => 4702103508586165345 (0x41413A3A6E657461)
// This can be tricky as it depends on consistent endianness/size.
// Seen this case for "std::__1::basic_string" ABI.
uint64_t intValue = CI->getZExtValue();
auto data = reinterpret_cast<const char*>(&intValue);
CB({data, data + sizeof(uint64_t)/sizeof(char)});
} else if (auto C = dyn_cast<Constant>(V)) {
// Short string literal might be in a constant vector, e.g.:
// store <2 x i64> <i64 8, i64 4702103508586165345>, <2 x i64>* %25
// Recursively extract each element to cover this case.
// Seen this case for "std::__cxx11::basic_string" ABI.
for (unsigned i = 0; auto elem = C->getAggregateElement(i); ++i) {
extractStringValue(elem, CB);
}
}
}
static std::shared_ptr<std::string> extractOpSchema(
const std::string& contextualNamespace, Value* V) {
std::vector<std::string> schemaStrs;
extractStringValue(V, [&](const std::string& str) {
// NB: some operator names might contain namespace. If this occurs, we
// MUST NOT use the contextual namespace. Fortunately, it's easy to tell
// if a namespace is included: a double colon will be present.
// In particular, this will occur with TORCH_SELECTIVE_NAME.
const std::string& schemaStr =
(contextualNamespace.empty() || str.find("::") != std::string::npos)
? str : contextualNamespace + str;
if (FunctionSchemaPatternLoc.pattern->match(schemaStr)) {
schemaStrs.push_back(schemaStr);
}
});
if (schemaStrs.empty()) {
return {};
}
if (schemaStrs.size() > 1) {
std::cerr << "[WARNING] found " << schemaStrs.size()
<< " op schema strings in one value!" << std::endl;
}
const std::string schemaStr = schemaStrs[0];
auto pos = schemaStr.find_first_of(".(");
return std::make_shared<std::string>(
pos == std::string::npos ? schemaStr : schemaStr.substr(0, pos));
}
static void printDebugPath(
const VALUE_MAP* debugPath, Value* src, Value* dest) {
if (!debugPath) {
return;
}
int depth = 0;
for (auto N = dest; ; N = debugPath->at(N)) {
std::cerr << "[DEBUG][PATH][" << ++depth << "]";
printDebugValue(N);
std::cerr << std::endl;
if (N == src) {
break;
}
}
}
static void printDebugValue(Value* V) {
if (auto F = dyn_cast<Function>(V)) {
std::cerr << "[FUNC] " << _demangle(_name(F));
} else if (isa<Constant>(V)) {
std::cerr << "[CONST] " << *V;
} else if (isa<Instruction>(V)) {
std::cerr << "[INST] " << *V;
} else if (V) {
std::cerr << "[VALUE] " << *V;
} else {
std::cerr << "NULL";
}
}
static void printAsYAML(
std::ostream& out, const SET& keys, const GRAPH& graph,
const PATH* path) {
for (const auto& K : keys) {
out << "- name: " << _demangle(K) << std::endl;
auto it = graph.find(K);
if (it == graph.end() || it->second.empty()) {
continue;
}
out << " depends:" << std::endl;
for (const auto& value : it->second) {
out << " - name: " << _demangle(value) << std::endl;
if (path) {
std::vector<std::string> rpath;
for (std::string prev = value;
rpath.push_back(prev), prev != K;
prev = path->at(K).at(prev));
out << " path:" << std::endl;
for (auto pit = rpath.rbegin(); pit != rpath.rend(); ++pit) {
out << " - " << _demangle(*pit) << std::endl;
}
}
}
}
}
};
} // namespace
char OpDependency::ID = 0;
static RegisterPass<OpDependency> X("op_dependency", "Op Dependency Pass");

View File

@ -1,137 +0,0 @@
"""
This util is used to parse op_deps_pass output (in yaml) and convert it into
other formats for downstream use cases. It is not used by OSS cmake build.
To run this file by hand from the root of the PyTorch repository, run:
python -m tools.code_analyzer.op_deps_processor \
--op-dependency build_code_analyzer/work/torch_result.yaml \
--output pt_deps.bzl
"""
import argparse
import yaml
from typing import Any, List
from tools.codegen.code_template import CodeTemplate
BAZEL_OUTPUT = CodeTemplate("""\
TORCH_DEPS = {
${ops}
}
""")
BAZEL_OP = CodeTemplate("""\
"${op_name}": [
${op_deps}
],
""")
BAZEL_OP_DEP = CodeTemplate("""\
"${dep_name}",
""")
DOT_OUTPUT = CodeTemplate("""\
digraph {
layout="circo";
${ops}
}
""")
DOT_OP = CodeTemplate("""\
${op_deps}
""")
DOT_OP_DEP = CodeTemplate("""\
"${op_name}" -> "${dep_name}";
""")
def load_op_deps(fname: str) -> Any:
with open(fname, 'r') as stream:
return yaml.safe_load(stream)
def process_base_ops(graph: Any, base_ops: List[str]) -> None:
# remove base ops from all `depends` lists to compress the output graph
for op in graph:
op['depends'] = [
dep for dep in op.get('depends', []) if dep['name'] not in base_ops
]
# add base ops section at the beginning
graph.insert(0, {
'name': '__BASE__',
'depends': [{'name': name} for name in base_ops]})
def convert(
fname: str,
graph: Any,
output_template: CodeTemplate,
op_template: CodeTemplate,
op_dep_template: CodeTemplate,
) -> None:
ops = []
for op in graph:
op_name = op['name']
op_deps = []
for dep in op.get('depends', []):
dep_name = dep['name']
if dep_name == op_name:
# skip itself reference
continue
op_deps.append(
op_dep_template.substitute(
op_name=op_name,
dep_name=dep_name))
if not op_deps:
# skip ops without any fanout
continue
ops.append(
op_template.substitute(
op_name=op_name,
op_deps=op_deps))
with open(fname, 'w') as out:
out.write(output_template.substitute(ops=ops))
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description='Util to parse & convert op_deps_pass output')
parser.add_argument(
'--op_dependency',
required=True,
help='input yaml file of op dependency graph produced by op_deps_pass')
parser.add_argument(
'--format',
default='bazel',
help='output file format [bazel, dot]')
parser.add_argument(
'--base_ops',
nargs='*',
help='optional list of `base` ops that should always be kept in '
'custom build, to make the output stable from trivial changes; '
'each item is `namespace`::`operator name` without overload; '
'e.g.: aten::empty aten::size ...')
parser.add_argument(
'--output',
required=True,
help='output file')
args = parser.parse_args()
deps = load_op_deps(args.op_dependency)
if args.base_ops:
process_base_ops(deps, args.base_ops)
if args.format == 'bazel':
convert(args.output, deps, BAZEL_OUTPUT, BAZEL_OP, BAZEL_OP_DEP)
elif args.format == 'dot':
convert(args.output, deps, DOT_OUTPUT, DOT_OP, DOT_OP_DEP)
else:
raise Exception("Unknown output format: " + args.format)

View File

@ -1,28 +0,0 @@
#!/bin/bash
##############################################################################
# Invoke code analyzer binary with pre-defined parameters for LibTorch.
# This script should be called via build.sh. Do NOT use it directly.
##############################################################################
set -exu
echo "Analyze: ${INPUT}"
# NB: op_register_pattern actually contains "too" many entries. We only
# need to regex for symbols which occur after inlining; and most of the
# public API for the registration API disappears after inlining (e.g.,
# only _def and _impl are retained). But the inliner isn't guaranteed
# to operate, so for safety we match a more expansive set.
"${ANALYZER_BIN}" \
-op_schema_pattern="^(_aten|_prim|aten|quantized|_quantized|prepacked|profiler|_test)::[a-zA-Z0-9_.]+(\(.*)?$" \
-op_register_pattern="c10::RegisterOperators::(op|checkSchemaAndRegisterOp_)|c10::Module::(_?def|_?impl)|torch::Library::(_?def|_?impl)" \
-op_invoke_pattern="c10::Dispatcher::findSchema" \
-root_symbol_pattern="torch::jit::[^(]" \
-torch_library_init_pattern="^.*TORCH_LIBRARY_init_([^(]+)(\(.*)?$" \
-torch_library_init_pattern="^.*TORCH_LIBRARY_FRAGMENT_init_([_]*[^_]+)_[0-9]+(\(.*)?$" \
-torch_library_init_pattern="^.*TORCH_LIBRARY_IMPL_init_([_]*[^_]+)_([^_]+)_[0-9]+(\(.*)?$" \
${EXTRA_ANALYZER_FLAGS} \
"${INPUT}" \
> "${OUTPUT}"
echo "Result: ${OUTPUT}"