Files
pytorch/.github/workflows/_mac-test.yml
Nikita Shulga bd0907dc4c [BE][CI] Unify requirments (#163396)
Both Linux, Windows and MacOS CI workflows should use `.ci/docker/requirements-ci.txt`
TODOS:
 - Investigate why `choco install cmake` is needed to successfully detect MKL
 - Move `psutil` installation from specific scripts into requirements-ci.txt
Pull Request resolved: https://github.com/pytorch/pytorch/pull/163396
Approved by: https://github.com/Skylion007
2025-10-01 03:28:48 +00:00

291 lines
11 KiB
YAML

name: mac-test
on:
workflow_call:
inputs:
build-environment:
required: true
type: string
description: Top-level label for what's being built/tested.
test-matrix:
required: true
type: string
description: JSON description of what test configs to run.
sync-tag:
required: false
type: string
default: ""
description: |
If this is set, our linter will use this to make sure that every other
job with the same `sync-tag` is identical.
python-version:
required: false
type: string
default: "3.12"
description: |
The python version to be used. Will be 3.9 by default
timeout-minutes:
required: false
type: number
default: 270
description: |
Set the maximum (in minutes) how long the workflow should take to finish
disable-monitor:
description: |
[Experimental] Disable utilization monitoring for tests.
Currently, by default we disable the monitor job and only look for specific tests,
since we are investigating the behaviour of the monitor script with different tests.
required: false
type: boolean
default: true
monitor-log-interval:
description: |
Set the interval for the monitor script to log utilization.
required: false
type: number
default: 5
monitor-data-collect-interval:
description: |
Set the interval for the monitor script to collect data.
required: false
type: number
default: 1
secrets:
HUGGING_FACE_HUB_TOKEN:
required: false
description: |
HF Auth token to avoid rate limits when downloading models or datasets from hub
jobs:
test:
# Don't run on forked repos or empty test matrix
if: github.repository_owner == 'pytorch' && toJSON(fromJSON(inputs.test-matrix).include) != '[]'
defaults:
run:
shell: bash -e -l {0}
strategy:
matrix: ${{ fromJSON(inputs.test-matrix) }}
fail-fast: false
runs-on: ${{ matrix.runner }}
timeout-minutes: ${{ matrix.mem_leak_check == 'mem_leak_check' && 600 || inputs.timeout-minutes }}
env:
GIT_DEFAULT_BRANCH: ${{ github.event.repository.default_branch }}
BUILD_ENVIRONMENT: ${{ inputs.build-environment }}
TEST_CONFIG: ${{ matrix.config }}
SHARD_NUMBER: ${{ matrix.shard }}
NUM_TEST_SHARDS: ${{ matrix.num_shards }}
PR_BODY: ${{ github.event.pull_request.body }}
steps:
- name: Print runner OS/HW info
run: |
sysctl machdep.cpu.brand_string kern.osproductversion
- name: Clean up leftover processes on MacOS pet runner
continue-on-error: true
run: |
for PROCESS in "python" "conda" "ninja" "clang"; do
echo "Cleaning up all remaining ${PROCESS} process"
pkill "${PROCESS}" || true
done
- name: Clean up brew miniconda, if installed
continue-on-error: true
run: |
if brew list miniconda; then
brew uninstall miniconda
echo "REINSTALL_BREW_MINICONDA=1" >> "${GITHUB_ENV}"
fi
- name: Clean up leftover local python3 site-packages on MacOS pet runner
continue-on-error: true
run: |
for dir in ~/.local/lib/python3.*/site-packages; do
echo "Cleaning up ${dir}"
rm -rf "${dir}"
done
- name: Clean up disk space before running MacOS workflow
uses: pytorch/test-infra/.github/actions/check-disk-space@main
# [see note: pytorch repo ref]
- name: Checkout PyTorch
uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
- name: Get workflow job id
id: get-job-id
uses: ./.github/actions/get-workflow-job-id
if: always()
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
- name: Setup Python
uses: pytorch/test-infra/.github/actions/setup-python@main
with:
python-version: ${{ inputs.python-version }}
pip-requirements-file: .ci/docker/requirements-ci.txt
- name: Start monitoring script
id: monitor-script
if: ${{ !inputs.disable-monitor }}
continue-on-error: true
env:
JOB_ID: ${{ steps.get-job-id.outputs.job-id }}
JOB_NAME: ${{ steps.get-job-id.outputs.job-name }}
WORKFLOW_NAME: ${{ github.workflow }}
WORKFLOW_RUN_ID: ${{github.run_id}}
MONITOR_LOG_INTERVAL: ${{ inputs.monitor-log-interval }}
MONITOR_DATA_COLLECT_INTERVAL: ${{ inputs.monitor-data-collect-interval }}
run: |
"$VENV_PATH/bin/python3" -m pip install psutil==5.9.8 dataclasses_json==0.6.7
"$VENV_PATH/bin/python3" -m tools.stats.monitor --log-interval "$MONITOR_LOG_INTERVAL" --data-collect-interval "$MONITOR_DATA_COLLECT_INTERVAL" > usage_log.txt 2>&1 &
echo "monitor-script-pid=${!}" >> "${GITHUB_OUTPUT}"
- name: Download build artifacts
uses: ./.github/actions/download-build-artifacts
with:
name: ${{ inputs.build-environment }}
use-gha: true
- name: Download TD artifacts
continue-on-error: true
uses: ./.github/actions/download-td-artifacts
with:
use-gha: true
- name: Parse ref
id: parse-ref
run: .github/scripts/parse_ref.py
- name: Check for keep-going label and re-enabled test issues
# This uses the filter-test-configs action because it conveniently
# checks for labels and re-enabled test issues. It does not actually do
# any filtering. All filtering is done in the build step.
id: keep-going
uses: ./.github/actions/filter-test-configs
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
test-matrix: ${{ inputs.test-matrix }}
job-name: ${{ steps.get-job-id.outputs.job-name }}
- name: Set Test step time
id: test-timeout
shell: bash
env:
JOB_TIMEOUT: ${{ matrix.mem_leak_check == 'mem_leak_check' && 600 || inputs.timeout-minutes }}
run: |
echo "timeout=$((JOB_TIMEOUT-30))" >> "${GITHUB_OUTPUT}"
- name: Test
id: test
timeout-minutes: ${{ fromJson(steps.test-timeout.outputs.timeout) }}
env:
PYTORCH_TEST_CUDA_MEM_LEAK_CHECK: ${{ matrix.mem_leak_check && '1' || '0' }}
PYTORCH_TEST_RERUN_DISABLED_TESTS: ${{ matrix.rerun_disabled_tests && '1' || '0' }}
CONTINUE_THROUGH_ERROR: ${{ steps.keep-going.outputs.keep-going }}
VERBOSE_TEST_LOGS: ${{ steps.keep-going.outputs.ci-verbose-test-logs }}
TEST_SHOWLOCALS: ${{ steps.keep-going.outputs.ci-test-showlocals }}
NO_TEST_TIMEOUT: ${{ steps.keep-going.outputs.ci-no-test-timeout }}
NO_TD: ${{ steps.keep-going.outputs.ci-no-td }}
PIP_REQUIREMENTS_FILE: .github/requirements/pip-requirements-${{ runner.os }}.txt
GITHUB_REPOSITORY: ${{ github.repository }}
GITHUB_WORKFLOW: ${{ github.workflow }}
GITHUB_JOB: ${{ github.job }}
GITHUB_RUN_ID: ${{ github.run_id }}
GITHUB_RUN_NUMBER: ${{ github.run_number }}
GITHUB_RUN_ATTEMPT: ${{ github.run_attempt }}
JOB_ID: ${{ steps.get-job-id.outputs.job-id }}
JOB_NAME: ${{ steps.get-job-id.outputs.job-name }}
REENABLED_ISSUES: ${{ steps.keep-going.outputs.reenabled-issues }}
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
run: |
# shellcheck disable=SC1090
set -ex
# TODO: Remove me later, and properly activate venv
PATH="$VENV_PATH/bin:$PATH"
export PATH
# Print out some information about the test environment
for tool in python3 python; do
which $tool
$tool --version
done
python3 -mpip install --no-index --no-deps dist/*.whl
set +e
pushd "${RUNNER_TEMP}"
# Install pip dependencies if they are not found. This is to mitigate a peculiar
# flaky missing dependencies on MacOS
python3 -c "import torch"
RC=$?
popd
if [ "${RC}" -ne 0 ]; then
python3 -mpip install --ignore-installed -r "${PIP_REQUIREMENTS_FILE}"
fi
set -e
.ci/pytorch/macos-test.sh
- name: Print remaining test logs
shell: bash
if: always() && steps.test.conclusion
run: |
cat test/**/*_toprint.log || true
- name: Run OP benchmark
shell: bash
if: ${{ contains(steps.get-job-id.outputs.job-name, 'mps') }}
run: |
python3 test/bench_mps_ops.py
- name: Stop monitoring script
if: ${{ always() && steps.monitor-script.outputs.monitor-script-pid }}
continue-on-error: true
env:
MONITOR_SCRIPT_PID: ${{ steps.monitor-script.outputs.monitor-script-pid }}
run: |
kill "$MONITOR_SCRIPT_PID"
- name: Upload test artifacts
uses: ./.github/actions/upload-test-artifacts
if: always() && steps.test.conclusion && steps.test.conclusion != 'skipped'
with:
use-gha: true
file-suffix: ${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}_${{ steps.get-job-id.outputs.job-id }}
- name: Upload the benchmark results
uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
with:
benchmark-results-dir: test/test-reports
dry-run: false
schema-version: v3
github-token: ${{ secrets.GITHUB_TOKEN }}
- name: Upload utilization stats
if: ${{ always() && steps.test.conclusion && steps.test.conclusion != 'skipped' && !inputs.disable-monitor }}
continue-on-error: true
uses: ./.github/actions/upload-utilization-stats
with:
job_id: ${{ steps.get-job-id.outputs.job-id }}
job_name: ${{ steps.get-job-id.outputs.job-name }}
workflow_name: ${{ github.workflow }}
workflow_run_id: ${{github.run_id}}
workflow_attempt: ${{github.run_attempt}}
local_path: usage_log.txt
- name: Reinstall brew miniconda, if was installed
if: always()
continue-on-error: true
run: |
if [[ -n "$REINSTALL_BREW_MINICONDA" ]]; then
brew install --cask miniconda
fi
- name: Clean up disk space
if: always()
continue-on-error: true
uses: pytorch/test-infra/.github/actions/check-disk-space@main