mirror of
https://github.com/huggingface/trl.git
synced 2025-10-20 10:03:51 +08:00
Fix CI CUDA out of memory errors by improving GPU memory management (#4238)
This commit is contained in:
committed by
GitHub
parent
803ec0d856
commit
f853e091ea
1
.github/workflows/tests.yml
vendored
1
.github/workflows/tests.yml
vendored
@ -16,6 +16,7 @@ on:
|
||||
env:
|
||||
TQDM_DISABLE: 1
|
||||
CI_SLACK_CHANNEL: ${{ secrets.CI_PUSH_MAIN_CHANNEL }}
|
||||
PYTORCH_CUDA_ALLOC_CONF: "expandable_segments:True"
|
||||
|
||||
jobs:
|
||||
check_code_quality:
|
||||
|
35
tests/conftest.py
Normal file
35
tests/conftest.py
Normal file
@ -0,0 +1,35 @@
|
||||
# Copyright 2020-2025 The HuggingFace Team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import gc
|
||||
|
||||
import pytest
|
||||
import torch
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def cleanup_gpu():
|
||||
"""
|
||||
Automatically cleanup GPU memory after each test.
|
||||
|
||||
This fixture helps prevent CUDA out of memory errors when running tests in parallel
|
||||
with pytest-xdist by ensuring models and tensors are properly garbage collected
|
||||
and GPU memory caches are cleared between tests.
|
||||
"""
|
||||
yield
|
||||
# Cleanup after test
|
||||
gc.collect()
|
||||
if torch.cuda.is_available():
|
||||
torch.cuda.empty_cache()
|
||||
torch.cuda.synchronize()
|
Reference in New Issue
Block a user