AutoHeuristic: util scripts (#133409)

This PR introduces scripts that make it easier to use autoheuristic:
- `collect_data.sh`: The user can specify things like the number of GPUs to be used and the number of training samples to collect. This script will open one tmux pane per GPU and collect num_training_samples/num_gpus samples per GPU.
- `merge_data.py`: This script can be used to merge multiple training data files into a single file.

Pull Request resolved: https://github.com/pytorch/pytorch/pull/133409
Approved by: https://github.com/Chillee
This commit is contained in:
Alnis Murtovi
2024-08-13 22:38:56 -07:00
committed by PyTorch MergeBot
parent b0fc6aa412
commit 142353eca3
5 changed files with 155 additions and 0 deletions

View File

@ -57,7 +57,9 @@ class BenchmarkRunner:
args = self.parser.parse_args()
if args.use_heuristic:
torch._inductor.config.autoheuristic_use = self.name
torch._inductor.config.autoheuristic_collect = ""
else:
torch._inductor.config.autoheuristic_use = ""
torch._inductor.config.autoheuristic_collect = self.name
torch._inductor.config.autoheuristic_log_path = args.o
if args.device is not None:

View File

@ -0,0 +1,55 @@
#!/bin/bash
# this script makes it easy parallize collecting data across using multiple GPUs
# Check if tmux is installed
if ! command -v tmux &> /dev/null; then
echo "tmux is not installed. Please install it and try again."
exit 1
fi
# Check if the correct number of arguments is provided
if [ "$#" -ne 5 ]; then
echo "Usage: $0 \"<python_command>\" <comma_separated_device_numbers> <num_samples to generate> <CONDA_ENV> <OUTPUT_DIR>"
echo "Example: $0 \"python run.py --a b --b c\" 1,4,5,3 1000 pytorch-3.10 a100"
exit 1
fi
PYTHON_COMMAND=$1
DEVICE_NUMBERS=$2
NUM_SAMPLES=$3
CONDA_ENV=$4
OUTPUT_DIR=$5
# Create a new tmux session
SESSION_NAME="parallel_run_$(date +%s)"
tmux new-session -d -s "$SESSION_NAME"
# Split the device numbers
IFS=',' read -ra DEVICES <<< "$DEVICE_NUMBERS"
NUM_GPUS=${#DEVICES[@]}
NUM_SAMPLES_PER_GPU=$((NUM_SAMPLES / NUM_GPUS))
echo "AutoHeuristic will collect ${NUM_SAMPLES} samples split across ${NUM_GPUS} GPUs"
echo "Each GPU will collect ${NUM_SAMPLES_PER_GPU}"
# Function to create a new pane and run the script
create_pane() {
local device=$1
tmux split-window -t "$SESSION_NAME"
tmux send-keys -t "$SESSION_NAME" "conda activate ${CONDA_ENV} && $PYTHON_COMMAND --device $device -o ${OUTPUT_DIR}/data_${device}.txt --num-samples ${NUM_SAMPLES_PER_GPU}" C-m
}
# Create panes for each device number
for device in "${DEVICES[@]}"; do
create_pane ${device}
done
# Remove the first pane (empty one)
tmux kill-pane -t "$SESSION_NAME.0"
# Arrange panes in a tiled layout
tmux select-layout -t "$SESSION_NAME" tiled
# Attach to the tmux session
tmux attach-session -t "$SESSION_NAME"

View File

@ -0,0 +1,36 @@
#!/bin/bash
if [ $# -lt 8 ]; then
echo "Error: This script requires exactly at least 8 arguments."
exit 1
fi
MODE=$1
GPU_DEVICE_IDS=$2
CONDA_ENV=$3
NUM_SAMPLES=$4
OUTPUT_DIR=$5
HEURISTIC_NAME=$6
BENCHMARK_SCRIPT=$7
TRAIN_SCRIPT=$8
EXTRA_TRAIN_ARGS=$9
mkdir -p ${OUTPUT_DIR}
if [ "$MODE" = "collect" ]; then
# this will collect data for NUM_SAMPLES samples on the number of GPUs specified in GPU_DEVICE_IDS in parallel
bash ../collect_data.sh "python ${BENCHMARK_SCRIPT}" ${GPU_DEVICE_IDS} ${NUM_SAMPLES} ${CONDA_ENV} ${OUTPUT_DIR}
elif [ "$MODE" = "generate" ]; then
# the bash script above generates one separate txt file per GPU
# if GPU_DEVICE_IDS=6,7, it will generate "data_6.txt", "data_7.txt" inside OUTPUT_DIR
# these files have to be merged into a single file before we can use AutoHeuristic to learn a heuristic
OUTPUT_FILE="${OUTPUT_DIR}/${HEURISTIC_NAME}.txt"
INPUT_FILES=$(echo $GPU_DEVICE_IDS | tr ',' '\n' | sed "s|^|${OUTPUT_DIR}/data_|" | sed 's/$/.txt/')
python ../merge_data.py ${OUTPUT_FILE} ${INPUT_FILES}
# This will learn a heuristic and generate the code into torch/_inductor/autoheuristic/artifacts/_${HEURISTIC_NAME}.py
python ${TRAIN_SCRIPT} ${OUTPUT_FILE} --heuristic-name ${HEURISTIC_NAME} ${EXTRA_TRAIN_ARGS}
else
echo "Error: Invalid mode ${MODE}. Please use 'collect' or 'generate'."
exit 1
fi

View File

@ -0,0 +1,60 @@
import sys
from typing import List
def merge_txt_files(file_list: List[str], output_file: str) -> None:
if not file_list:
print("No input files provided.")
return
metadata: List[str] = []
content: List[str] = []
# Read metadata and content from all files
for file_path in file_list:
try:
with open(file_path) as file:
lines = file.readlines()
if len(lines) < 2:
print(
f"Error: {file_path} does not have enough lines for metadata."
)
return
file_metadata = lines[:2]
file_content = lines[2:]
if not metadata:
metadata = file_metadata
elif metadata != file_metadata:
print(f"Error: Metadata mismatch in {file_path}")
print("Expected metadata:")
print("".join(metadata))
print(f"Metadata in {file_path}:")
print("".join(file_metadata))
return
content.extend(file_content)
except OSError as e:
print(f"Error reading file {file_path}: {e}")
return
# Write merged content to output file
try:
with open(output_file, "w") as outfile:
outfile.writelines(metadata)
outfile.writelines(content)
print(f"Successfully merged files into {output_file}")
except OSError as e:
print(f"Error writing to output file {output_file}: {e}")
if __name__ == "__main__":
if len(sys.argv) < 3:
print(
"Usage: python script.py output_file.txt input_file1.txt input_file2.txt ..."
)
else:
output_file = sys.argv[1]
input_files = sys.argv[2:]
merge_txt_files(input_files, output_file)

View File

@ -0,0 +1,2 @@
pandas
scikit-learn