🐳 Docker update + Simplify Jobs doc (#3931)

Co-authored-by: sergiopaniego <sergiopaniegoblanco@gmail.com> Co-authored-by: Kashif Rasul <kashif.rasul@gmail.com>
2025-10-20 10:03:51 +08:00 · 2025-09-13 18:35:55 -06:00
parent 304eaf8053
commit 9955ee7eaa
40 changed files with 244 additions and 486 deletions
--- a/.github/workflows/docker-build.yml
+++ b/.github/workflows/docker-build.yml
@ -1,95 +1,84 @@
-name: Build Docker images (scheduled)
+name: Build TRL Docker image

 on:
+  push:
+    branches:
+      - main
  workflow_dispatch:
-  workflow_call:
-  schedule:
-    - cron: "0 1 * * *"

 concurrency:
  group: docker-image-builds
  cancel-in-progress: false

-env:
-  CI_SLACK_CHANNEL: ${{ secrets.CI_DOCKER_CHANNEL }}
-
 jobs:
-  trl-latest:
-    name: "Latest TRL GPU"
+  trl:
+    name: "Build and push TRL Docker image"
    runs-on: ubuntu-latest
    steps:
-      - name: Cleanup disk
-        run: |
-          sudo ls -l /usr/local/lib/
-          sudo ls -l /usr/share/
-          sudo du -sh /usr/local/lib/
-          sudo du -sh /usr/share/
-          sudo rm -rf /usr/local/lib/android
-          sudo rm -rf /usr/share/dotnet
-          sudo du -sh /usr/local/lib/
-          sudo du -sh /usr/share/
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v1
      - name: Checkout code
        uses: actions/checkout@v4
+
+      - name: Get TRL version from PyPI
+        run: |
+          VERSION=$(curl -s https://pypi.org/pypi/trl/json | jq -r .info.version)
+          echo "VERSION=$VERSION" >> $GITHUB_ENV
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
      - name: Login to DockerHub
-        uses: docker/login-action@v1
+        uses: docker/login-action@v3
        with:
          username: ${{ secrets.DOCKERHUB_USERNAME }}
          password: ${{ secrets.DOCKERHUB_PASSWORD }}

-      - name: Build and Push GPU
+      - name: Build and Push
        uses: docker/build-push-action@v4
        with:
-          context: ./docker/trl-latest-gpu
+          context: docker/trl
          push: true
-          tags: huggingface/trl-latest-gpu
+          tags: |
+            huggingface/trl:${{ env.VERSION }}
+            huggingface/trl

      - name: Post to Slack
        if: always()
        uses: huggingface/hf-workflows/.github/actions/post-slack@main
        with:
-          slack_channel: ${{ env.CI_SLACK_CHANNEL }}
-          title: 🤗 Results of the trl-latest-gpu Docker Image build
+          slack_channel: ${{ secrets.CI_DOCKER_CHANNEL }}
+          title: 🤗 Results of the TRL Dev Docker Image build
          status: ${{ job.status }}
          slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}

-  trl-source:
-    name: "Latest TRL + HF ecosystem from source"
+  trl-dev:
+    name: "Build and push TRL Dev Docker image"
    runs-on: ubuntu-latest
    steps:
-      - name: Cleanup disk
-        run: |
-          sudo ls -l /usr/local/lib/
-          sudo ls -l /usr/share/
-          sudo du -sh /usr/local/lib/
-          sudo du -sh /usr/share/
-          sudo rm -rf /usr/local/lib/android
-          sudo rm -rf /usr/share/dotnet
-          sudo du -sh /usr/local/lib/
-          sudo du -sh /usr/share/
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v1
      - name: Checkout code
        uses: actions/checkout@v4
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
      - name: Login to DockerHub
-        uses: docker/login-action@v1
+        uses: docker/login-action@v3
        with:
          username: ${{ secrets.DOCKERHUB_USERNAME }}
          password: ${{ secrets.DOCKERHUB_PASSWORD }}

-      - name: Build and Push GPU
+      - name: Build and Push
        uses: docker/build-push-action@v4
        with:
-          context: ./docker/trl-source-gpu
+          context: docker/trl-dev
          push: true
-          tags: huggingface/trl-source-gpu
+          tags: |
+            huggingface/trl:dev

      - name: Post to Slack
        if: always()
        uses: huggingface/hf-workflows/.github/actions/post-slack@main
        with:
-          slack_channel: ${{ env.CI_SLACK_CHANNEL }}
-          title: 🤗 Results of the trl-source-gpu Docker Image build
+          slack_channel: ${{ secrets.CI_DOCKER_CHANNEL }}
+          title: 🤗 Results of the TRL Dev Docker Image build
          status: ${{ job.status }}
          slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
--- a/.github/workflows/slow-tests.yml
+++ b/.github/workflows/slow-tests.yml
@ -19,8 +19,8 @@ jobs:
      matrix:
        docker-image-name:
          [
-            "huggingface/trl-latest-gpu:latest",
-            "huggingface/trl-source-gpu:latest",
+            "huggingface/trl",
+            "huggingface/trl:dev",
          ]
    runs-on:
      group: aws-g4dn-2xlarge
@ -59,8 +59,8 @@ jobs:
      matrix:
        docker-image-name:
          [
-            "huggingface/trl-latest-gpu:latest",
-            "huggingface/trl-source-gpu:latest",
+            "huggingface/trl",
+            "huggingface/trl:dev",
          ]
    runs-on:
      group: aws-g4dn-2xlarge
--- a/docker/trl-dev/Dockerfile
+++ b/docker/trl-dev/Dockerfile
@ -0,0 +1,6 @@
+FROM pytorch/pytorch:2.8.0-cuda12.8-cudnn9-runtime
+RUN apt-get update && apt-get install -y git && rm -rf /var/lib/apt/lists/*
+RUN pip install --upgrade pip uv
+RUN uv pip install --system --no-cache "git+https://github.com/huggingface/trl.git#egg=trl[liger,peft,vlm]"
+RUN uv pip install --system hf_transfer liger_kernel trackio peft
+RUN uv pip install --system https://github.com/Dao-AILab/flash-attention/releases/download/v2.8.3/flash_attn-2.8.3+cu12torch2.8cxx11abiFALSE-cp311-cp311-linux_x86_64.whl
--- a/docker/trl-latest-gpu/Dockerfile
+++ b/docker/trl-latest-gpu/Dockerfile
@ -1,66 +0,0 @@
-# Builds GPU docker image of PyTorch
-# Uses multi-staged approach to reduce size
-# Stage 1
-# Use base conda image to reduce time
-FROM continuumio/miniconda3:latest AS compile-image
-# Specify py version
-ENV PYTHON_VERSION=3.10
-# Install apt libs - copied from https://github.com/huggingface/accelerate/blob/main/docker/accelerate-gpu/Dockerfile
-RUN apt-get update && \
-    apt-get install -y curl git wget software-properties-common git-lfs && \
-    apt-get clean && \
-    rm -rf /var/lib/apt/lists*
-
-# Install audio-related libraries 
-RUN apt-get update && \
-    apt install -y ffmpeg
-
-RUN apt install -y libsndfile1-dev
-RUN git lfs install
-
-# Create our conda env - copied from https://github.com/huggingface/accelerate/blob/main/docker/accelerate-gpu/Dockerfile
-RUN conda create --name trl python=${PYTHON_VERSION} ipython jupyter pip
-RUN python3 -m pip install --no-cache-dir --upgrade pip
-
-# Below is copied from https://github.com/huggingface/accelerate/blob/main/docker/accelerate-gpu/Dockerfile
-# We don't install pytorch here yet since CUDA isn't available
-# instead we use the direct torch wheel
-ENV PATH /opt/conda/envs/trl/bin:$PATH
-# Activate our bash shell
-RUN chsh -s /bin/bash
-SHELL ["/bin/bash", "-c"]
-
-# Stage 2
-FROM nvidia/cuda:12.2.2-devel-ubuntu22.04 AS build-image
-COPY --from=compile-image /opt/conda /opt/conda
-ENV PATH /opt/conda/bin:$PATH
-
-RUN chsh -s /bin/bash
-SHELL ["/bin/bash", "-c"]
-RUN source activate trl && \ 
-    python3 -m pip install --no-cache-dir bitsandbytes optimum auto-gptq
-
-# Install apt libs
-RUN apt-get update && \
-    apt-get install -y curl git wget && \
-    apt-get clean && \
-    rm -rf /var/lib/apt/lists*
-
-# Activate the conda env and install transformers + accelerate from source
-RUN source activate trl && \
-    python3 -m pip install -U --no-cache-dir \
-    librosa \
-    "soundfile>=0.12.1" \
-    scipy \
-    transformers \
-    accelerate \
-    peft \
-    trl[test,vlm]@git+https://github.com/huggingface/trl
-
-RUN source activate trl && \ 
-    pip freeze | grep trl
-
-RUN echo "source activate trl" >> ~/.profile
-
-# Activate the virtualenv
-CMD ["/bin/bash"]
--- a/docker/trl-source-gpu/Dockerfile
+++ b/docker/trl-source-gpu/Dockerfile
@ -1,66 +0,0 @@
-# Builds GPU docker image of PyTorch
-# Uses multi-staged approach to reduce size
-# Stage 1
-# Use base conda image to reduce time
-FROM continuumio/miniconda3:latest AS compile-image
-# Specify py version
-ENV PYTHON_VERSION=3.10
-# Install apt libs - copied from https://github.com/huggingface/accelerate/blob/main/docker/accelerate-gpu/Dockerfile
-RUN apt-get update && \
-    apt-get install -y curl git wget software-properties-common git-lfs && \
-    apt-get clean && \
-    rm -rf /var/lib/apt/lists*
-
-# Install audio-related libraries 
-RUN apt-get update && \
-    apt install -y ffmpeg
-
-RUN apt install -y libsndfile1-dev
-RUN git lfs install
-
-# Create our conda env - copied from https://github.com/huggingface/accelerate/blob/main/docker/accelerate-gpu/Dockerfile
-RUN conda create --name trl python=${PYTHON_VERSION} ipython jupyter pip
-RUN python3 -m pip install --no-cache-dir --upgrade pip
-
-# Below is copied from https://github.com/huggingface/accelerate/blob/main/docker/accelerate-gpu/Dockerfile
-# We don't install pytorch here yet since CUDA isn't available
-# instead we use the direct torch wheel
-ENV PATH /opt/conda/envs/trl/bin:$PATH
-# Activate our bash shell
-RUN chsh -s /bin/bash
-SHELL ["/bin/bash", "-c"]
-
-# Stage 2
-FROM nvidia/cuda:12.2.2-devel-ubuntu22.04 AS build-image
-COPY --from=compile-image /opt/conda /opt/conda
-ENV PATH /opt/conda/bin:$PATH
-
-RUN chsh -s /bin/bash
-SHELL ["/bin/bash", "-c"]
-RUN source activate trl && \ 
-    python3 -m pip install --no-cache-dir bitsandbytes optimum auto-gptq
-
-# Install apt libs
-RUN apt-get update && \
-    apt-get install -y curl git wget && \
-    apt-get clean && \
-    rm -rf /var/lib/apt/lists*
-
-# Activate the conda env and install transformers + accelerate from source
-RUN source activate trl && \
-    python3 -m pip install -U --no-cache-dir \
-    librosa \
-    "soundfile>=0.12.1" \
-    scipy \
-    git+https://github.com/huggingface/transformers \
-    git+https://github.com/huggingface/accelerate \
-    git+https://github.com/huggingface/peft \
-    trl[test,vlm]@git+https://github.com/huggingface/trl
-
-RUN source activate trl && \ 
-    pip freeze | grep transformers
-
-RUN echo "source activate trl" >> ~/.profile
-
-# Activate the virtualenv
-CMD ["/bin/bash"]
--- a/docker/trl/Dockerfile
+++ b/docker/trl/Dockerfile
@ -0,0 +1,4 @@
+FROM pytorch/pytorch:2.8.0-cuda12.8-cudnn9-runtime
+RUN pip install --upgrade pip uv
+RUN uv pip install --system trl[liger,peft,vlm] hf_transfer trackio
+RUN uv pip install --system https://github.com/Dao-AILab/flash-attention/releases/download/v2.8.3/flash_attn-2.8.3+cu12torch2.8cxx11abiFALSE-cp311-cp311-linux_x86_64.whl
--- a/docs/source/jobs_training.md
+++ b/docs/source/jobs_training.md
@ -1,46 +1,64 @@
-# Training using Jobs
+# Training with Jobs

-[Jobs](https://huggingface.co/docs/huggingface_hub/guides/jobs) lets you run training scripts on fully managed infrastructure (no need to handle GPUs, dependencies, or environment setup locally). This makes it easy to scale and monitor your experiments directly from the Hub.
+[![](https://img.shields.io/badge/All_models-HF_Jobs-blue)](https://huggingface.co/models?other=hf_jobs,trl)

-In this guide, you’ll learn how to:
+[Hugging Face Jobs](https://huggingface.co/docs/huggingface_hub/guides/jobs) lets you run training scripts on fully managed infrastructure—no need to manage GPUs or local environment setup.

- Run TRL training scripts using Jobs.
- Configure hardware, timeouts, environment variables, and secrets.
- Monitor and manage jobs from the CLI or Python.
+In this guide, you'll learn how to:

-<Tip>
+* Use [TRL Jobs](https://github.com/huggingface/trl-jobs) to easily run pre-optimized TRL training
+* Run any TRL training script with uv scripts

-When a model is trained using **TRL + Jobs**, a tag is automatically added to the model card.  
-You can explore models trained with this method [Hugging Face model hub](https://huggingface.co/models?other=hf_jobs).
-
-</Tip>
+For general details about Hugging Face Jobs (hardware selection, job monitoring, etc.), see the [Jobs documentation](https://huggingface.co/docs/huggingface_hub/guides/jobs).

 ## Requirements

- [Pro](https://hf.co/pro), [Team](https://hf.co/enterprise), or [Enterprise](https://hf.co/enterprise) plan.
- Logged into the Hugging Face Hub (`hf auth login`).
+* A [Pro](https://hf.co/pro), [Team](https://hf.co/enterprise), or [Enterprise](https://hf.co/enterprise) plan
+* Logged in to the Hugging Face Hub (`huggingface-cli login`)

-## Preparing your Script
+## Using TRL Jobs

-You can launch Jobs using either the [`hf jobs` CLI](https://huggingface.co/docs/huggingface_hub/guides/cli#hf-jobs) or the Python API. A convenient option is to use [UV scripts](https://docs.astral.sh/uv/guides/scripts/), which packages all dependencies directly into a single Python file. You can run them like this:
+[TRL Jobs](https://github.com/huggingface/trl-jobs) is a high-level wrapper around Hugging Face Jobs and TRL that streamlines training. It provides optimized default configurations so you can start quickly without manually tuning parameters.
+
+Example:
+
+```bash
+pip install trl-jobs
+trl-jobs sft --model_name Qwen/Qwen3-0.6B --dataset_name trl-lib/Capybara
+```
+
+TRL Jobs supports everything covered in this guide, with additional optimizations to simplify workflows.
+
+## Using uv Scripts
+
+For more control, you can run Hugging Face Jobs directly with your own scripts, using [uv scripts](https://docs.astral.sh/uv/guides/scripts/).
+
+Create a Python script (e.g., `train.py`) containing your training code:
+
+```python
+from datasets import load_dataset
+from trl import SFTTrainer
+
+dataset = load_dataset("trl-lib/Capybara", split="train")
+trainer = SFTTrainer(
+    model="Qwen/Qwen2.5-0.5B",
+    train_dataset=dataset,
+)
+trainer.train()
+trainer.push_to_hub("Qwen2.5-0.5B-SFT")
+```
+
+Launch the job using either the [`hf jobs` CLI](https://huggingface.co/docs/huggingface_hub/guides/cli#hf-jobs) or the Python API:

 <hfoptions id="script_type">
 <hfoption id="bash">

 ```bash
-hf jobs uv run --flavor a100-large --secrets HF_TOKEN "https://raw.githubusercontent.com/huggingface/trl/main/trl/scripts/sft.py" --model_name_or_path Qwen/Qwen2-0.5B --dataset_name trl-lib/Capybara
-```
-
-The script can also be a local file:
-
-```bash
-hf jobs uv run --flavor a100-large --secrets HF_TOKEN trl/scripts/sft.py --model_name_or_path Qwen/Qwen2-0.5B --dataset_name trl-lib/Capybara
-```
-
-Since it runs using a Docker Image from Hugging Face Spaces or Docker Hub, you can also specify it:
-
-```bash
-hf jobs uv run --flavor a100-large --secrets HF_TOKEN --image <docker-image> trl/scripts/sft.py --model_name_or_path Qwen/Qwen2-0.5B --dataset_name trl-lib/Capybara
+hf jobs uv run \
+    --flavor a100-large \
+    --with trl \
+    --secrets HF_TOKEN \
+    train.py
 ```

 </hfoption>
@ -48,236 +66,118 @@ hf jobs uv run --flavor a100-large --secrets HF_TOKEN --image <docker-image> trl

 ```python
 from huggingface_hub import run_uv_job
+
 run_uv_job(
-    "https://raw.githubusercontent.com/huggingface/trl/main/trl/scripts/sft.py",
-    token="hf...",
+    "train.py",
+    dependencies=["trl"],
    flavor="a100-large",
-    script_args=[
-        "--model_name_or_path", "Qwen/Qwen2-0.5B",
-        "--dataset_name", "trl-lib/Capybara",
-    ]
-)
-```
-
-The script can also be a local file:
-
-```python
-from huggingface_hub import run_uv_job
-run_uv_job(
-    "trl/scripts/sft.py",
-    token="hf...",
-    flavor="a100-large",
-    script_args=[
-        "--model_name_or_path", "Qwen/Qwen2-0.5B",
-        "--dataset_name", "trl-lib/Capybara",
-    ]
-)
-```
-
-Since it runs using a Docker Image from Hugging Face Spaces or Docker Hub, you can also specify it:
-
-```python
-from huggingface_hub import run_uv_job
-run_uv_job(
-    "sft.py",
-    token="hf...",
-    flavor="a100-large",
-    image="<docker-image>",
-    script_args=[
-        "--model_name_or_path", "Qwen/Qwen2-0.5B",
-        "--dataset_name", "trl-lib/Capybara",
-    ]
+    secrets={"HF_TOKEN": "hf_..."},
 )
 ```

 </hfoption>
 </hfoptions>

-You can also run jobs without UV:
+To run successfully, the script needs:
+
+* **TRL installed**: Use the `--with trl` flag or the `dependencies` argument. uv installs these dependencies automatically before running the script.
+* **An authentication token**: Required to push the trained model (or perform other authenticated operations). Provide it with the `--secrets HF_TOKEN` flag or the `secrets` argument.
+
+<Tip warning={true}>
+
+When training with Jobs, be sure to:
+
+* **Set a sufficient timeout**. Jobs time out after 30 minutes by default. If your job exceeds the without, it will fail and all progress will be lost. See [Setting a custom timeout](https://huggingface.co/docs/huggingface_hub/guides/jobs#setting-a-custom-timeout).
+* **Push the model to the Hub**. The Jobs environment is ephemeral—files are deleted when the job ends. If you don’t push the model, it will be lost.
+
+</Tip>
+
+You can also run a script directly from a URL:

 <hfoptions id="script_type">
 <hfoption id="bash">

-In this case, we give the cli the Docker image and run it as:
-
 ```bash
-hf jobs run --flavor a100-large pytorch/pytorch:2.6.0-cuda12.4-cudnn9-devel python -c "import torch; print(torch.cuda.get_device_name())"
+hf jobs uv run \
+    --flavor a100-large \
+    --with trl \
+    --secrets HF_TOKEN \
+    "https://gist.githubusercontent.com/qgallouedec/eb6a7d20bd7d56f9c440c3c8c56d2307/raw/69fd78a179e19af115e4a54a1cdedd2a6c237f2f/train.py"
 ```

 </hfoption>
 <hfoption id="python">

 ```python
-from huggingface_hub import run_job
-run_job(
-    image="pytorch/pytorch:2.6.0-cuda12.4-cudnn9-devel",
-    command=["python", "-c", "import torch; print(torch.cuda.get_device_name())"],
+from huggingface_hub import run_uv_job
+
+run_uv_job(
+    "https://gist.githubusercontent.com/qgallouedec/eb6a7d20bd7d56f9c440c3c8c56d2307/raw/69fd78a179e19af115e4a54a1cdedd2a6c237f2f/train.py",
    flavor="a100-large",
+    dependencies=["trl"],
+    secrets={"HF_TOKEN": "hf_..."},
 )
 ```

 </hfoption>
 </hfoptions>

-### Adding Dependencies with UV
-
-All example scripts in TRL are compatible with `uv`, allowing seamless execution with Jobs. You can check the full list of examples in [Maintained examples](example_overview#maintained-examples).  
-
-Dependencies are specified at the top of the script using this structure:
+To make a script self-contained, declare dependencies at the top:

 ```python
 # /// script
 # dependencies = [
-#     "trl @ git+https://github.com/huggingface/trl.git",
+#     "trl",
 #     "peft",
 # ]
 # ///
+
+from datasets import load_dataset
+from peft import LoraConfig
+from trl import SFTTrainer
+
+dataset = load_dataset("trl-lib/Capybara", split="train")
+
+trainer = SFTTrainer(
+    model="Qwen/Qwen2.5-0.5B",
+    train_dataset=dataset,
+    peft_config=LoraConfig(),
+)
+trainer.train()
+trainer.push_to_hub("Qwen2.5-0.5B-SFT")
 ```

-When you run the UV script, these dependencies are automatically installed. In the example above, `trl` and `peft` would be installed before the script runs.
-
-You can also provide dependencies directly in the `uv run` command:
+You can then run the script without specifying dependencies:

 <hfoptions id="script_type">
 <hfoption id="bash">

-Using the `--with` flag.
-
 ```bash
 hf jobs uv run \
    --flavor a100-large \
    --secrets HF_TOKEN \
-    --with transformers \
-    --with torch \
-    "https://raw.githubusercontent.com/huggingface/trl/main/trl/scripts/sft.py" \
-    --model_name_or_path Qwen/Qwen2-0.5B  \
-    --dataset_name trl-lib/Capybara
+    train.py
 ```

 </hfoption>
 <hfoption id="python">

-Using the `dependencies` argument.
-
 ```python
 from huggingface_hub import run_uv_job
+
 run_uv_job(
-    "https://raw.githubusercontent.com/huggingface/trl/main/trl/scripts/sft.py",
-    dependencies=["transformers", "torch"]
-    token="hf...",
+    "train.py",
    flavor="a100-large",
-    script_args=[
-        "--model_name_or_path", "Qwen/Qwen2-0.5B",
-        "--dataset_name", "trl-lib/Capybara",
-    ]
+    secrets={"HF_TOKEN": "hf_..."},
 )
 ```

 </hfoption>
 </hfoptions>

-### Hardware and Timeout Settings
+<Tip>

-Jobs allow you to select a specific hardware configuration using the `--flavor` flag. As of 08/25, the available options are:
-
-**CPU:** `cpu-basic`, `cpu-upgrade`  
-**GPU:** `t4-small`, `t4-medium`, `l4x1`, `l4x4`, `a10g-small`, `a10g-large`, `a10g-largex2`, `a10g-largex4`, `a100-large`  
-**TPU:** `v5e-1x1`, `v5e-2x2`, `v5e-2x4`  
-
-You can always check the latest list of supported hardware flavors in [Spaces config reference](https://huggingface.co/docs/hub/en/spaces-config-reference).
-
-By default, jobs have a **30-minute timeout**, after which they will automatically stop. For long-running tasks like training, you can increase the timeout as needed. Supported time units are:
-
- `s`: seconds
- `m`: minutes
- `h`: hours
- `d`: days
-
-Example with a 2-hour timeout:
-
-<hfoptions id="script_type">
-<hfoption id="bash">
-
-Using the `--timeout` flag:
-
-```bash
-hf jobs uv run \
-    --timeout 2h \
-    --flavor a100-large \
-    --secrets HF_TOKEN \
-    --with transformers \
-    --with torch \
-    "https://raw.githubusercontent.com/huggingface/trl/main/trl/scripts/sft.py" \
-    --model_name_or_path Qwen/Qwen2-0.5B  \
-    --dataset_name trl-lib/Capybara
-```
-
-</hfoption>
-<hfoption id="python">
-
-Using the `timeout` argument:
-
-```python
-from huggingface_hub import run_uv_job
-run_uv_job(
-    "https://raw.githubusercontent.com/huggingface/trl/main/trl/scripts/sft.py",
-    timeout="2h",
-    token="hf...",
-    flavor="a100-large",
-    script_args=[
-        "--model_name_or_path", "Qwen/Qwen2-0.5B",
-        "--dataset_name", "trl-lib/Capybara",
-    ]
-)
-```
-
-</hfoption>
-</hfoptions>
-
-### Environment Variables, Secrets, and Token
-
-You can pass environment variables, secrets, and your auth token to your jobs. 
-
-<hfoptions id="script_type">
-<hfoption id="bash">
-
-Using the `--env`, `--secrets`, and/or `--token` options.
-
-```bash
-hf jobs uv run \
-    trl/scripts/sft.py \
-    --flavor a100-large \
-    --env FOO=foo \
-    --env BAR=bar \
-    --secrets HF_TOKEN=HF_TOKEN \
-    --secrets MY_SECRET=password \
-    --token hf...
-```
-
-</hfoption>
-<hfoption id="python">
-
-
-Using the `env`, `secrets`, and/or `token` arguments.
-
-```python
-from huggingface_hub import run_uv_job
-run_uv_job(
-    "trl/scripts/sft.py",
-    env={"FOO": "foo", "BAR": "bar"},
-    secrets={"MY_SECRET": "psswrd"},
-    token="hf..."
-)
-```
-
-</hfoption>
-</hfoptions>
-
-## Training and Evaluating a Model with Jobs
-
-TRL example scripts are fully UV-compatible, allowing you to run a complete training workflow directly on Jobs. You can customize the training by providing the usual script arguments, along with hardware specifications and secrets.  
-
-To evaluate your training runs, in addition to reviewing the job logs, you can use [**Trackio**](https://huggingface.co/blog/trackio), a lightweight experiment tracking library. Trackio enables end-to-end experiment management on the Hugging Face Hub. All TRL example scripts already support reporting to Trackio via the `report_to` argument. Using this feature saves your experiments in an interactive HF Space, making it easy to monitor metrics, compare runs, and track progress over time.
+TRL example scripts are fully uv-compatible, so you can run a complete training workflow directly on Jobs. You can customize training with standard script arguments plus hardware and secrets:

 <hfoptions id="script_type">
 <hfoption id="bash">
@ -286,19 +186,10 @@ To evaluate your training runs, in addition to reviewing the job logs, you can u
 hf jobs uv run \
    --flavor a100-large \
    --secrets HF_TOKEN \
-    "trl/scripts/sft.py" \
-    --model_name_or_path Qwen/Qwen2-0.5B \
-    --dataset_name trl-lib/Capybara \
-    --learning_rate 2.0e-5 \
-    --num_train_epochs 1 \
-    --packing \
-    --per_device_train_batch_size 2 \
-    --gradient_accumulation_steps 8 \
-    --eos_token '<|im_end|>' \
-    --eval_strategy steps \
-    --eval_steps 100 \
-    --output_dir Qwen2-0.5B-SFT \
-    --report_to trackio \
+    https://raw.githubusercontent.com/huggingface/trl/refs/heads/main/examples/scripts/prm.py \
+    --model_name_or_path Qwen/Qwen2-0.5B-Instruct \
+    --dataset_name trl-lib/prm800k \
+    --output_dir Qwen2-0.5B-Reward \
    --push_to_hub
 ```

@ -309,22 +200,13 @@ hf jobs uv run \
 from huggingface_hub import run_uv_job

 run_uv_job(
-    "trl/scripts/sft.py",
+    "https://raw.githubusercontent.com/huggingface/trl/refs/heads/main/examples/scripts/prm.py",
    flavor="a100-large",
-    secrets={"HF_TOKEN": "your_hf_token"},
+    secrets={"HF_TOKEN": "hf_..."},
    script_args=[
-        "--model_name_or_path", "Qwen/Qwen2-0.5B",
-        "--dataset_name", "trl-lib/Capybara",
-        "--learning_rate", "2.0e-5",
-        "--num_train_epochs", "1",
-        "--packing",
-        "--per_device_train_batch_size", "2",
-        "--gradient_accumulation_steps", "8",
-        "--eos_token", "<|im_end|>",
-        "--eval_strategy", "steps",
-        "--eval_steps", "100",
-        "--output_dir", "Qwen2-0.5B-SFT",
-        "--report_to", "trackio",
+        "--model_name_or_path", "Qwen/Qwen2-0.5B-Instruct",
+        "--dataset_name", "trl-lib/prm800k",
+        "--output_dir", "Qwen2-0.5B-Reward",
        "--push_to_hub"
    ]
 )
@ -333,60 +215,69 @@ run_uv_job(
 </hfoption>
 </hfoptions>

-## Monitoring and Managing Jobs
+See the full list of examples in [Maintained examples](example_overview#maintained-examples).

-After launching a job, you can track its progress on the [Jobs page](https://huggingface.co/settings/jobs). Additionally, Jobs provides CLI and Python commands to check status, view logs, or cancel a job.
+</Tip>
+
+### Docker Images
+
+An up-to-date Docker image with all TRL dependencies is available at [huggingface/trl](https://hub.docker.com/r/huggingface/trl) and can be used directly with Hugging Face Jobs:

 <hfoptions id="script_type">
 <hfoption id="bash">

 ```bash
-# List your jobs
-hf jobs ps -a
-
-# List your running jobs
-hf jobs ps 
-
-# Inspect the status of a job
-hf jobs inspect
-
-# View logs from a job
-hf jobs logs job_id
-
-# Cancel a job
-hf jobs cancel job_id
+hf jobs uv run \
+    --flavor a100-large \
+    --secrets HF_TOKEN \
+    --image huggingface/trl \
+    train.py
 ```

 </hfoption>
 <hfoption id="python">

-
 ```python
-from huggingface_hub import list_jobs, inspect_job, fetch_job_logs, cancel_job
+from huggingface_hub import run_uv_job

-# List your jobs
-jobs = list_jobs()
-jobs[0]
-
-# List your running jobs
-running_jobs = [job for job in list_jobs() if job.status.stage == "RUNNING"]
-
-# Inspect the status of a job
-inspect_job(job_id=job_id)
-
-# View logs from a job
-for log in fetch_job_logs(job_id=job_id):
-    print(log)
-
-# Cancel a job
-cancel_job(job_id=job_id)
+run_uv_job(
+    "train.py",
+    flavor="a100-large",
+    secrets={"HF_TOKEN": "hf_..."},
+    image="huggingface/trl",
+)
 ```

 </hfoption>
 </hfoptions>

-## Best Practices and Tips
+Jobs runs on a Docker image from Hugging Face Spaces or Docker Hub, so you can also specify any custom image:

- Choose hardware that fits the size of your model and dataset for optimal performance.
- Training jobs can be long-running. Consider increasing the default timeout.
- Reuse training and evaluation scripts whenever possible to streamline workflows.
+<hfoptions id="script_type">
+<hfoption id="bash">
+
+```bash
+hf jobs uv run \
+    --flavor a100-large \
+    --secrets HF_TOKEN \
+    --image <docker-image> \
+    --secrets HF_TOKEN \
+    train.py
+```
+
+</hfoption>
+<hfoption id="python">
+
+```python
+from huggingface_hub import run_uv_job
+
+run_uv_job(
+    "train.py",
+    flavor="a100-large",
+    secrets={"HF_TOKEN": "hf_..."},
+    image="<docker-image>",
+)
+```
+
+</hfoption>
+</hfoptions>
--- a/examples/scripts/alignprop.py
+++ b/examples/scripts/alignprop.py
@ -14,7 +14,7 @@

 # /// script
 # dependencies = [
-#     "trl @ git+https://github.com/huggingface/trl.git",
+#     "trl",
 #     "diffusers",
 #     "torchvision",
 #     "peft",
--- a/examples/scripts/bco.py
+++ b/examples/scripts/bco.py
@ -14,7 +14,7 @@

 # /// script
 # dependencies = [
-#     "trl @ git+https://github.com/huggingface/trl.git",
+#     "trl",
 #     "peft",
 #     "einops",
 #     "scikit-learn",
--- a/examples/scripts/cpo.py
+++ b/examples/scripts/cpo.py
@ -14,7 +14,7 @@

 # /// script
 # dependencies = [
-#     "trl @ git+https://github.com/huggingface/trl.git",
+#     "trl",
 #     "peft",
 #     "trackio",
 #     "kernels",
--- a/examples/scripts/ddpo.py
+++ b/examples/scripts/ddpo.py
@ -14,7 +14,7 @@

 # /// script
 # dependencies = [
-#     "trl @ git+https://github.com/huggingface/trl.git",
+#     "trl",
 #     "diffusers",
 #     "peft",
 #     "trackio",
--- a/examples/scripts/dpo_vlm.py
+++ b/examples/scripts/dpo_vlm.py
@ -14,7 +14,7 @@

 # /// script
 # dependencies = [
-#     "trl @ git+https://github.com/huggingface/trl.git",
+#     "trl",
 #     "peft",
 #     "Pillow>=9.4.0",
 #     "torchvision",
--- a/examples/scripts/evals/judge_tldr.py
+++ b/examples/scripts/evals/judge_tldr.py
@ -14,7 +14,7 @@

 # /// script
 # dependencies = [
-#     "trl @ git+https://github.com/huggingface/trl.git",
+#     "trl",
 #     "vllm",
 # ]
 # ///
--- a/examples/scripts/gkd.py
+++ b/examples/scripts/gkd.py
@ -14,7 +14,7 @@

 # /// script
 # dependencies = [
-#     "trl @ git+https://github.com/huggingface/trl.git",
+#     "trl",
 #     "peft",
 #     "trackio",
 #     "kernels",
--- a/examples/scripts/grpo_vlm.py
+++ b/examples/scripts/grpo_vlm.py
@ -14,7 +14,7 @@

 # /// script
 # dependencies = [
-#     "trl @ git+https://github.com/huggingface/trl.git",
+#     "trl",
 #     "Pillow",
 #     "peft",
 #     "math-verify",
--- a/examples/scripts/gspo.py
+++ b/examples/scripts/gspo.py
@ -14,7 +14,7 @@

 # /// script
 # dependencies = [
-#     "trl @ git+https://github.com/huggingface/trl.git",
+#     "trl",
 #     "peft",
 #     "math-verify",
 #     "latex2sympy2_extended",
--- a/examples/scripts/gspo_vlm.py
+++ b/examples/scripts/gspo_vlm.py
@ -14,7 +14,7 @@

 # /// script
 # dependencies = [
-#     "trl @ git+https://github.com/huggingface/trl.git",
+#     "trl",
 #     "Pillow",
 #     "peft",
 #     "math-verify",
--- a/examples/scripts/kto.py
+++ b/examples/scripts/kto.py
@ -14,7 +14,7 @@

 # /// script
 # dependencies = [
-#     "trl @ git+https://github.com/huggingface/trl.git",
+#     "trl",
 #     "peft",
 #     "trackio",
 #     "kernels",
--- a/examples/scripts/mpo_vlm.py
+++ b/examples/scripts/mpo_vlm.py
@ -14,7 +14,7 @@

 # /// script
 # dependencies = [
-#     "trl @ git+https://github.com/huggingface/trl.git",
+#     "trl",
 #     "Pillow",
 #     "peft",
 #     "torchvision",
--- a/examples/scripts/nash_md.py
+++ b/examples/scripts/nash_md.py
@ -14,7 +14,7 @@

 # /// script
 # dependencies = [
-#     "trl @ git+https://github.com/huggingface/trl.git",
+#     "trl",
 #     "trackio",
 #     "kernels",
 # ]
--- a/examples/scripts/online_dpo.py
+++ b/examples/scripts/online_dpo.py
@ -14,7 +14,7 @@

 # /// script
 # dependencies = [
-#     "trl @ git+https://github.com/huggingface/trl.git",
+#     "trl",
 #     "peft",
 #     "trackio",
 #     "kernels",
--- a/examples/scripts/online_dpo_vlm.py
+++ b/examples/scripts/online_dpo_vlm.py
@ -14,7 +14,7 @@

 # /// script
 # dependencies = [
-#     "trl @ git+https://github.com/huggingface/trl.git",
+#     "trl",
 #     "peft",
 #     "math-verify",
 #     "latex2sympy2_extended",
--- a/examples/scripts/orpo.py
+++ b/examples/scripts/orpo.py
@ -14,7 +14,7 @@

 # /// script
 # dependencies = [
-#     "trl @ git+https://github.com/huggingface/trl.git",
+#     "trl",
 #     "peft",
 #     "trackio",
 #     "kernels",
--- a/examples/scripts/ppo/ppo.py
+++ b/examples/scripts/ppo/ppo.py
@ -14,7 +14,7 @@

 # /// script
 # dependencies = [
-#     "trl @ git+https://github.com/huggingface/trl.git",
+#     "trl",
 #     "peft",
 #     "trackio",
 #     "kernels",
--- a/examples/scripts/ppo/ppo_tldr.py
+++ b/examples/scripts/ppo/ppo_tldr.py
@ -14,7 +14,7 @@

 # /// script
 # dependencies = [
-#     "trl @ git+https://github.com/huggingface/trl.git",
+#     "trl",
 #     "peft",
 #     "trackio",
 #     "kernels",
--- a/examples/scripts/prm.py
+++ b/examples/scripts/prm.py
@ -14,7 +14,7 @@

 # /// script
 # dependencies = [
-#     "trl @ git+https://github.com/huggingface/trl.git",
+#     "trl",
 #     "trackio",
 #     "kernels",
 # ]
--- a/examples/scripts/reward_modeling.py
+++ b/examples/scripts/reward_modeling.py
@ -14,7 +14,7 @@

 # /// script
 # dependencies = [
-#     "trl @ git+https://github.com/huggingface/trl.git",
+#     "trl",
 #     "trackio",
 #     "kernels",
 # ]
--- a/examples/scripts/rloo.py
+++ b/examples/scripts/rloo.py
@ -14,7 +14,7 @@

 # /// script
 # dependencies = [
-#     "trl @ git+https://github.com/huggingface/trl.git",
+#     "trl",
 #     "peft",
 #     "math-verify",
 #     "latex2sympy2_extended",
--- a/examples/scripts/sft_gemma3.py
+++ b/examples/scripts/sft_gemma3.py
@ -14,7 +14,7 @@

 # /// script
 # dependencies = [
-#     "trl @ git+https://github.com/huggingface/trl.git",
+#     "trl",
 #     "Pillow",
 #     "trackio",
 #     "kernels",
--- a/examples/scripts/sft_gpt_oss.py
+++ b/examples/scripts/sft_gpt_oss.py
@ -14,7 +14,7 @@

 # /// script
 # dependencies = [
-#     "trl @ git+https://github.com/huggingface/trl.git",
+#     "trl",
 #     "kernels",
 #     "trackio",
 #     "kernels",
--- a/examples/scripts/sft_video_llm.py
+++ b/examples/scripts/sft_video_llm.py
@ -14,7 +14,7 @@

 # /// script
 # dependencies = [
-#     "trl @ git+https://github.com/huggingface/trl.git",
+#     "trl",
 #     "peft",
 #     "qwen-vl-utils",
 #     "torchvision",
--- a/examples/scripts/sft_vlm.py
+++ b/examples/scripts/sft_vlm.py
@ -14,7 +14,7 @@

 # /// script
 # dependencies = [
-#     "trl @ git+https://github.com/huggingface/trl.git",
+#     "trl",
 #     "Pillow>=9.4.0",
 #     "peft",
 #     "trackio",
--- a/examples/scripts/sft_vlm_gemma3.py
+++ b/examples/scripts/sft_vlm_gemma3.py
@ -14,7 +14,7 @@

 # /// script
 # dependencies = [
-#     "trl @ git+https://github.com/huggingface/trl.git",
+#     "trl",
 #     "Pillow>=9.4.0",
 #     "peft",
 #     "trackio",
--- a/examples/scripts/xpo.py
+++ b/examples/scripts/xpo.py
@ -14,7 +14,7 @@

 # /// script
 # dependencies = [
-#     "trl @ git+https://github.com/huggingface/trl.git",
+#     "trl",
 #     "trackio",
 #     "kernels",
 # ]
--- a/trl/scripts/dpo.py
+++ b/trl/scripts/dpo.py
@ -14,7 +14,7 @@

 # /// script
 # dependencies = [
-#     "trl @ git+https://github.com/huggingface/trl.git",
+#     "trl",
 #     "peft",
 #     "trackio",
 #     "kernels",
--- a/trl/scripts/env.py
+++ b/trl/scripts/env.py
@ -14,7 +14,7 @@

 # /// script
 # dependencies = [
-#     "trl @ git+https://github.com/huggingface/trl.git",
+#     "trl",
 # ]
 # ///

@ -27,15 +27,15 @@ from accelerate.commands.config import default_config_file, load_config_from_fil
 from transformers import is_bitsandbytes_available
 from transformers.utils import is_openai_available, is_peft_available

-from .. import __version__
-from ..import_utils import (
+from trl import __version__
+from trl.import_utils import (
    is_deepspeed_available,
    is_diffusers_available,
    is_liger_kernel_available,
    is_llm_blender_available,
    is_vllm_available,
 )
-from .utils import get_git_commit_hash
+from trl.scripts.utils import get_git_commit_hash


 def print_env():
--- a/trl/scripts/grpo.py
+++ b/trl/scripts/grpo.py
@ -14,7 +14,7 @@

 # /// script
 # dependencies = [
-#     "trl @ git+https://github.com/huggingface/trl.git",
+#     "trl",
 #     "peft",
 #     "trackio",
 #     "kernels",
--- a/trl/scripts/kto.py
+++ b/trl/scripts/kto.py
@ -14,7 +14,7 @@

 # /// script
 # dependencies = [
-#     "trl @ git+https://github.com/huggingface/trl.git",
+#     "trl",
 #     "peft",
 #     "trackio",
 #     "kernels",
--- a/trl/scripts/rloo.py
+++ b/trl/scripts/rloo.py
@ -14,7 +14,7 @@

 # /// script
 # dependencies = [
-#     "trl @ git+https://github.com/huggingface/trl.git",
+#     "trl",
 #     "peft",
 # ]
 # ///
--- a/trl/scripts/sft.py
+++ b/trl/scripts/sft.py
@ -14,7 +14,7 @@

 # /// script
 # dependencies = [
-#     "trl @ git+https://github.com/huggingface/trl.git",
+#     "trl",
 #     "peft",
 #     "trackio",
 #     "kernels",