AutoHeuristic: util scripts (#133409)

This PR introduces scripts that make it easier to use autoheuristic: - `collect_data.sh`: The user can specify things like the number of GPUs to be used and the number of training samples to collect. This script will open one tmux pane per GPU and collect num_training_samples/num_gpus samples per GPU. - `merge_data.py`: This script can be used to merge multiple training data files into a single file. Pull Request resolved: https://github.com/pytorch/pytorch/pull/133409 Approved by: https://github.com/Chillee
2025-10-20 21:14:14 +08:00 · 2024-08-13 22:38:56 -07:00
parent b0fc6aa412
commit 142353eca3
5 changed files with 155 additions and 0 deletions
--- a/torchgen/_autoheuristic/benchmark_runner.py
+++ b/torchgen/_autoheuristic/benchmark_runner.py
@ -57,7 +57,9 @@ class BenchmarkRunner:
        args = self.parser.parse_args()
        if args.use_heuristic:
            torch._inductor.config.autoheuristic_use = self.name
+            torch._inductor.config.autoheuristic_collect = ""
        else:
+            torch._inductor.config.autoheuristic_use = ""
            torch._inductor.config.autoheuristic_collect = self.name
        torch._inductor.config.autoheuristic_log_path = args.o
        if args.device is not None:
--- a/torchgen/_autoheuristic/collect_data.sh
+++ b/torchgen/_autoheuristic/collect_data.sh
@ -0,0 +1,55 @@
+#!/bin/bash
+
+# this script makes it easy parallize collecting data across using multiple GPUs
+
+# Check if tmux is installed
+if ! command -v tmux &> /dev/null; then
+    echo "tmux is not installed. Please install it and try again."
+    exit 1
+fi
+
+# Check if the correct number of arguments is provided
+if [ "$#" -ne 5 ]; then
+    echo "Usage: $0 \"<python_command>\" <comma_separated_device_numbers> <num_samples to generate> <CONDA_ENV> <OUTPUT_DIR>"
+    echo "Example: $0 \"python run.py --a b --b c\" 1,4,5,3 1000 pytorch-3.10 a100"
+    exit 1
+fi
+
+PYTHON_COMMAND=$1
+DEVICE_NUMBERS=$2
+NUM_SAMPLES=$3
+CONDA_ENV=$4
+OUTPUT_DIR=$5
+
+# Create a new tmux session
+SESSION_NAME="parallel_run_$(date +%s)"
+tmux new-session -d -s "$SESSION_NAME"
+
+# Split the device numbers
+IFS=',' read -ra DEVICES <<< "$DEVICE_NUMBERS"
+
+NUM_GPUS=${#DEVICES[@]}
+NUM_SAMPLES_PER_GPU=$((NUM_SAMPLES / NUM_GPUS))
+echo "AutoHeuristic will collect ${NUM_SAMPLES} samples split across ${NUM_GPUS} GPUs"
+echo "Each GPU will collect ${NUM_SAMPLES_PER_GPU}"
+
+# Function to create a new pane and run the script
+create_pane() {
+    local device=$1
+    tmux split-window -t "$SESSION_NAME"
+    tmux send-keys -t "$SESSION_NAME" "conda activate ${CONDA_ENV} && $PYTHON_COMMAND --device $device -o ${OUTPUT_DIR}/data_${device}.txt --num-samples ${NUM_SAMPLES_PER_GPU}" C-m
+}
+
+# Create panes for each device number
+for device in "${DEVICES[@]}"; do
+    create_pane ${device}
+done
+
+# Remove the first pane (empty one)
+tmux kill-pane -t "$SESSION_NAME.0"
+
+# Arrange panes in a tiled layout
+tmux select-layout -t "$SESSION_NAME" tiled
+
+# Attach to the tmux session
+tmux attach-session -t "$SESSION_NAME"
--- a/torchgen/_autoheuristic/generate_heuristic.sh
+++ b/torchgen/_autoheuristic/generate_heuristic.sh
@ -0,0 +1,36 @@
+#!/bin/bash
+
+if [ $# -lt 8 ]; then
+    echo "Error: This script requires exactly at least 8 arguments."
+    exit 1
+fi
+
+MODE=$1
+GPU_DEVICE_IDS=$2
+CONDA_ENV=$3
+NUM_SAMPLES=$4
+OUTPUT_DIR=$5
+HEURISTIC_NAME=$6
+BENCHMARK_SCRIPT=$7
+TRAIN_SCRIPT=$8
+EXTRA_TRAIN_ARGS=$9
+
+mkdir -p ${OUTPUT_DIR}
+
+if [ "$MODE" = "collect" ]; then
+    # this will collect data for NUM_SAMPLES samples on the number of GPUs specified in GPU_DEVICE_IDS in parallel
+    bash ../collect_data.sh "python ${BENCHMARK_SCRIPT}" ${GPU_DEVICE_IDS} ${NUM_SAMPLES} ${CONDA_ENV} ${OUTPUT_DIR}
+elif [ "$MODE" = "generate" ]; then
+    # the bash script above generates one separate txt file per GPU
+    # if GPU_DEVICE_IDS=6,7, it will generate "data_6.txt", "data_7.txt" inside OUTPUT_DIR
+    # these files have to be merged into a single file before we can use AutoHeuristic to learn a heuristic
+    OUTPUT_FILE="${OUTPUT_DIR}/${HEURISTIC_NAME}.txt"
+    INPUT_FILES=$(echo $GPU_DEVICE_IDS | tr ',' '\n' | sed "s|^|${OUTPUT_DIR}/data_|" | sed 's/$/.txt/')
+    python ../merge_data.py ${OUTPUT_FILE} ${INPUT_FILES}
+
+    # This will learn a heuristic and generate the code into torch/_inductor/autoheuristic/artifacts/_${HEURISTIC_NAME}.py
+    python ${TRAIN_SCRIPT} ${OUTPUT_FILE} --heuristic-name ${HEURISTIC_NAME} ${EXTRA_TRAIN_ARGS}
+else
+    echo "Error: Invalid mode ${MODE}. Please use 'collect' or 'generate'."
+    exit 1
+fi
--- a/torchgen/_autoheuristic/merge_data.py
+++ b/torchgen/_autoheuristic/merge_data.py
@ -0,0 +1,60 @@
+import sys
+from typing import List
+
+
+def merge_txt_files(file_list: List[str], output_file: str) -> None:
+    if not file_list:
+        print("No input files provided.")
+        return
+
+    metadata: List[str] = []
+    content: List[str] = []
+
+    # Read metadata and content from all files
+    for file_path in file_list:
+        try:
+            with open(file_path) as file:
+                lines = file.readlines()
+                if len(lines) < 2:
+                    print(
+                        f"Error: {file_path} does not have enough lines for metadata."
+                    )
+                    return
+
+                file_metadata = lines[:2]
+                file_content = lines[2:]
+
+                if not metadata:
+                    metadata = file_metadata
+                elif metadata != file_metadata:
+                    print(f"Error: Metadata mismatch in {file_path}")
+                    print("Expected metadata:")
+                    print("".join(metadata))
+                    print(f"Metadata in {file_path}:")
+                    print("".join(file_metadata))
+                    return
+
+                content.extend(file_content)
+        except OSError as e:
+            print(f"Error reading file {file_path}: {e}")
+            return
+
+    # Write merged content to output file
+    try:
+        with open(output_file, "w") as outfile:
+            outfile.writelines(metadata)
+            outfile.writelines(content)
+        print(f"Successfully merged files into {output_file}")
+    except OSError as e:
+        print(f"Error writing to output file {output_file}: {e}")
+
+
+if __name__ == "__main__":
+    if len(sys.argv) < 3:
+        print(
+            "Usage: python script.py output_file.txt input_file1.txt input_file2.txt ..."
+        )
+    else:
+        output_file = sys.argv[1]
+        input_files = sys.argv[2:]
+        merge_txt_files(input_files, output_file)
--- a/torchgen/_autoheuristic/requirements.txt
+++ b/torchgen/_autoheuristic/requirements.txt
@ -0,0 +1,2 @@
+pandas
+scikit-learn