[Utilization Monitor] input to disable utilization monitor (#140857)

# Overview
Currently monitor.py produces error only result, this pr introduct disable-monitor option to all *-test.yml. We also like to explore how the monitor code affect benchmark results.

# next steps
- fix the monitor.py
- enable non-benchmark tests with monitor
- investigate benchmark test behavior with monitor background job

Pull Request resolved: https://github.com/pytorch/pytorch/pull/140857
Approved by: https://github.com/huydhn
This commit is contained in:
Yang Wang
2024-11-18 23:26:01 +00:00
committed by PyTorch MergeBot
parent 48a276c5a0
commit 175ba9fed6
6 changed files with 60 additions and 7 deletions

View File

@ -47,7 +47,14 @@ inputs:
GITHUB_TOKEN:
description: GitHub token
required: true
disable-monitor:
description: |
[Experimental] Disable utilization monitoring for tests.
Currently, by default we disable the monitor job and only look for specific tests,
since we are investigating the behaviour of the monitor script with different tests.
required: false
type: boolean
default: true
#env:
# GIT_DEFAULT_BRANCH: ${{ inputs.default_branch }}
@ -115,6 +122,7 @@ runs:
- name: Start monitoring script
id: monitor-script
if: ${{ !inputs.disable-monitor }}
shell: bash
continue-on-error: true
run: |
@ -289,7 +297,7 @@ runs:
cat test/**/*_toprint.log || true
- name: Stop monitoring script
if: always() && steps.monitor-script.outputs.monitor-script-pid
if: ${{ always() && steps.monitor-script.outputs.monitor-script-pid }}
shell: bash
continue-on-error: true
env:

View File

@ -47,6 +47,14 @@ on:
required: false
type: string
default: ""
disable-monitor:
description: |
[Experimental] Disable utilization monitoring for tests.
Currently, by default we disable the monitor job and only look for specific tests,
since we are investigating the behaviour of the monitor script with different tests.
required: false
type: boolean
default: true
secrets:
HUGGING_FACE_HUB_TOKEN:
required: false
@ -145,6 +153,7 @@ jobs:
- name: Start monitoring script
id: monitor-script
if: ${{ !inputs.disable-monitor }}
shell: bash
continue-on-error: true
run: |
@ -328,7 +337,7 @@ jobs:
cat test/**/*_toprint.log || true
- name: Stop monitoring script
if: always() && steps.monitor-script.outputs.monitor-script-pid
if: ${{ always() && steps.monitor-script.outputs.monitor-script-pid }}
shell: bash
continue-on-error: true
env:

View File

@ -30,6 +30,14 @@ on:
default: 270
description: |
Set the maximum (in minutes) how long the workflow should take to finish
disable-monitor:
description: |
[Experimental] Disable utilization monitoring for tests.
Currently, by default we disable the monitor job and only look for specific tests,
since we are investigating the behaviour of the monitor script with different tests.
required: false
type: boolean
default: true
jobs:
test:
@ -101,6 +109,7 @@ jobs:
- name: Start monitoring script
id: monitor-script
if: ${{ !inputs.disable-monitor }}
continue-on-error: true
run: |
${CONDA_RUN} python3 -m tools.stats.monitor > usage_log.txt 2>&1 &
@ -200,7 +209,7 @@ jobs:
cat test/**/*_toprint.log || true
- name: Stop monitoring script
if: always() && ${{ steps.monitor-script.outputs.monitor-script-pid }}
if: ${{ always() && steps.monitor-script.outputs.monitor-script-pid }}
continue-on-error: true
env:
MONITOR_SCRIPT_PID: ${{ steps.monitor-script.outputs.monitor-script-pid }}

View File

@ -38,6 +38,14 @@ on:
default: ""
description: |
List of tests to include (empty string implies default list)
disable-monitor:
description: |
[Experimental] Disable utilization monitoring for tests.
Currently, by default we disable the monitor job and only look for specific tests,
since we are investigating the behaviour of the monitor script with different tests.
required: false
type: boolean
default: true
env:
GIT_DEFAULT_BRANCH: ${{ github.event.repository.default_branch }}
@ -91,6 +99,7 @@ jobs:
- name: Start monitoring script
id: monitor-script
if: ${{ !inputs.disable-monitor }}
shell: bash
continue-on-error: true
run: |
@ -247,7 +256,7 @@ jobs:
cat test/**/*_toprint.log || true
- name: Stop monitoring script
if: always() && steps.monitor-script.outputs.monitor-script-pid
if: ${{ always() && steps.monitor-script.outputs.monitor-script-pid }}
shell: bash
continue-on-error: true
env:

View File

@ -28,6 +28,14 @@ on:
default: 240
description: |
Set the maximum (in minutes) how long the workflow should take to finish
disable-monitor:
description: |
[Experimental] Disable utilization monitoring for tests.
Currently, by default we disable the monitor job and only look for specific tests,
since we are investigating the behaviour of the monitor script with different tests.
required: false
type: boolean
default: true
env:
GIT_DEFAULT_BRANCH: ${{ github.event.repository.default_branch }}
@ -101,6 +109,7 @@ jobs:
- name: Start monitoring script
id: monitor-script
shell: bash
if: ${{ !inputs.disable-monitor }}
continue-on-error: true
run: |
# Windows conda doesn't have python3 binary, only python, but it's python3
@ -213,7 +222,7 @@ jobs:
cat test/**/*_toprint.log || true
- name: Stop monitoring script
if: always() && steps.monitor-script.outputs.monitor-script-pid
if: ${{ always() && steps.monitor-script.outputs.monitor-script-pid }}
shell: bash
continue-on-error: true
env:

View File

@ -38,6 +38,14 @@ on:
default: ""
description: |
List of tests to include (empty string implies default list)
disable-monitor:
description: |
[Experimental] Disable utilization monitoring for tests.
Currently, by default we disable the monitor job and only look for specific tests,
since we are investigating the behaviour of the monitor script with different tests.
required: false
type: boolean
default: true
env:
GIT_DEFAULT_BRANCH: ${{ github.event.repository.default_branch }}
@ -83,6 +91,7 @@ jobs:
- name: Start monitoring script
id: monitor-script
if: ${{ !inputs.disable-monitor }}
shell: bash
continue-on-error: true
run: |
@ -242,7 +251,7 @@ jobs:
cat test/**/*_toprint.log || true
- name: Stop monitoring script
if: always() && steps.monitor-script.outputs.monitor-script-pid
if: ${{ always() && steps.monitor-script.outputs.monitor-script-pid }}
shell: bash
continue-on-error: true
env: