diff --git a/.github/workflows/slow-tests.yml b/.github/workflows/slow-tests.yml index 95bff1c26..54001d723 100644 --- a/.github/workflows/slow-tests.yml +++ b/.github/workflows/slow-tests.yml @@ -102,13 +102,6 @@ jobs: source .venv/bin/activate make slow_tests - - name: Run end-to-end examples tests on multi GPU - if: always() - run: | - source .venv/bin/activate - uv pip install deepspeed - make test_examples - - name: Generate Reports if: always() run: | diff --git a/Makefile b/Makefile index b152531ba..16aae3f9b 100644 --- a/Makefile +++ b/Makefile @@ -1,9 +1,8 @@ -.PHONY: test precommit common_tests slow_tests test_examples tests_gpu test_experimental +.PHONY: test precommit common_tests slow_tests tests_gpu test_experimental check_dirs := examples tests trl ACCELERATE_CONFIG_PATH = `pwd`/examples/accelerate_configs -COMMAND_FILES_PATH = `pwd`/commands test: pytest -n auto -m "not slow and not low_priority" -s -v --reruns 5 --reruns-delay 1 --only-rerun '(OSError|Timeout|HTTPError.*502|HTTPError.*504||not less than or equal to 0.01)' tests/ @@ -16,18 +15,5 @@ precommit: slow_tests: pytest -m "slow" tests/ $(if $(IS_GITHUB_CI),--report-log "slow_tests.log",) -test_examples: - touch temp_results_sft_tests.txt - for file in $(ACCELERATE_CONFIG_PATH)/*.yaml; do \ - TRL_ACCELERATE_CONFIG=$${file} bash $(COMMAND_FILES_PATH)/run_sft.sh; \ - echo $$?','$${file} >> temp_results_sft_tests.txt; \ - done - - touch temp_results_dpo_tests.txt - for file in $(ACCELERATE_CONFIG_PATH)/*.yaml; do \ - TRL_ACCELERATE_CONFIG=$${file} bash $(COMMAND_FILES_PATH)/run_dpo.sh; \ - echo $$?','$${file} >> temp_results_dpo_tests.txt; \ - done - test_experimental: pytest -k "experimental" diff --git a/commands/run_dpo.sh b/commands/run_dpo.sh deleted file mode 100644 index f34b12cbb..000000000 --- a/commands/run_dpo.sh +++ /dev/null @@ -1,58 +0,0 @@ -#!/bin/bash -# This script runs an SFT example end-to-end on a tiny model using different possible configurations -# but defaults to QLoRA + PEFT -OUTPUT_DIR="test_dpo/" -MODEL_NAME="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" -DATASET_NAME="trl-internal-testing/hh-rlhf-helpful-base-trl-style" -MAX_STEPS=5 -BATCH_SIZE=2 -SEQ_LEN=128 - -# Handle extra arguments in case one passes accelerate configs. -EXTRA_ACCELERATE_ARGS="" -EXTRA_TRAINING_ARGS="""--use_peft \ - --load_in_4bit -""" - -# This is a hack to get the number of available GPUs -NUM_GPUS=2 - -if [[ "${TRL_ACCELERATE_CONFIG}" == "" ]]; then - EXTRA_ACCELERATE_ARGS="" -else - EXTRA_ACCELERATE_ARGS="--config_file $TRL_ACCELERATE_CONFIG" - # For DeepSpeed configs we need to set the `--fp16` flag to comply with our configs exposed - # on `examples/accelerate_configs` and our runners do not support bf16 mixed precision training. - if [[ $TRL_ACCELERATE_CONFIG == *"deepspeed"* ]]; then - EXTRA_TRAINING_ARGS="--fp16" - else - echo "Keeping QLoRA + PEFT" - fi -fi - - -CMD=""" -accelerate launch $EXTRA_ACCELERATE_ARGS \ - --num_processes $NUM_GPUS \ - --mixed_precision 'fp16' \ - `pwd`/trl/scripts/dpo.py \ - --model_name_or_path $MODEL_NAME \ - --dataset_name $DATASET_NAME \ - --output_dir $OUTPUT_DIR \ - --max_steps $MAX_STEPS \ - --per_device_train_batch_size $BATCH_SIZE \ - --max_length $SEQ_LEN \ - $EXTRA_TRAINING_ARGS -""" - -echo "Starting program..." - -{ # try - echo $CMD - eval "$CMD" -} || { # catch - # save log for exception - echo "Operation Failed!" - exit 1 -} -exit 0 diff --git a/commands/run_sft.sh b/commands/run_sft.sh deleted file mode 100644 index b7beaaf7f..000000000 --- a/commands/run_sft.sh +++ /dev/null @@ -1,59 +0,0 @@ -#!/bin/bash -# This script runs an SFT example end-to-end on a tiny model using different possible configurations -# but defaults to QLoRA + PEFT -OUTPUT_DIR="test_sft/" -MODEL_NAME="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" -DATASET_NAME="stanfordnlp/imdb" -MAX_STEPS=5 -BATCH_SIZE=2 -SEQ_LEN=128 - - -# Handle extra arguments in case one passes accelerate configs. -EXTRA_ACCELERATE_ARGS="" -EXTRA_TRAINING_ARGS="""--use_peft \ - --load_in_4bit -""" - -# Set your number of GPUs here -NUM_GPUS=2 - -if [[ "${TRL_ACCELERATE_CONFIG}" == "" ]]; then - EXTRA_ACCELERATE_ARGS="" -else - EXTRA_ACCELERATE_ARGS="--config_file $TRL_ACCELERATE_CONFIG" - # For DeepSpeed configs we need to set the `--fp16` flag to comply with our configs exposed - # on `examples/accelerate_configs` and our runners do not support bf16 mixed precision training. - if [[ $TRL_ACCELERATE_CONFIG == *"deepspeed"* ]]; then - EXTRA_TRAINING_ARGS="--fp16" - else - echo "Keeping QLoRA + PEFT" - fi -fi - - -CMD=""" -accelerate launch $EXTRA_ACCELERATE_ARGS \ - --num_processes $NUM_GPUS \ - --mixed_precision 'fp16' \ - `pwd`/trl/scripts/sft.py \ - --model_name $MODEL_NAME \ - --dataset_name $DATASET_NAME \ - --output_dir $OUTPUT_DIR \ - --max_steps $MAX_STEPS \ - --per_device_train_batch_size $BATCH_SIZE \ - --max_length $SEQ_LEN \ - $EXTRA_TRAINING_ARGS -""" - -echo "Starting program..." - -{ # try - echo $CMD - eval "$CMD" -} || { # catch - # save log for exception - echo "Operation Failed!" - exit 1 -} -exit 0