Add num_trainable_params column to gradio app (#2819)

While memory usage correlates with the number of trainable params, having this number directly makes it easier to see that methods are using similar numbers of trainable params and outliers can be inspected easily.
2025-10-20 15:33:48 +08:00 · 2025-10-13 14:36:58 +02:00
parent 2410f458c8
commit 2f9f759587
2 changed files with 4 additions and 1 deletions
--- a/method_comparison/app.py
+++ b/method_comparison/app.py
@ -33,6 +33,7 @@ metric_preferences = {
    "file_size": "lower",
    "test_accuracy": "higher",
    "train_loss": "lower",
+    "num_trainable_params": "lower",
 }


--- a/method_comparison/processing.py
+++ b/method_comparison/processing.py
@ -51,6 +51,7 @@ def preprocess(rows, task_name: str, print_fn=print):
            "total_time": run_info["total_time"],
            "train_time": train_info["train_time"],
            "file_size": train_info["file_size"],
+            "num_trainable_params": train_info["num_trainable_params"],
            "test_accuracy": train_metrics["test accuracy"],
            "train_loss": train_metrics["train loss"],
            "train_samples": train_metrics["train samples"],
@ -103,6 +104,7 @@ def load_df(path, task_name, print_fn=print):
        "train_loss": float,
        "train_samples": int,
        "train_total_tokens": int,
+        "num_trainable_params": int,
        "peft_version": "string",
        "peft_branch": "string",
        "transformers_version": "string",
@ -131,6 +133,7 @@ def load_df(path, task_name, print_fn=print):
        "accelerator_memory_max",
        "accelerator_memory_reserved_99th",
        "accelerator_memory_reserved_avg",
+        "num_trainable_params",
        "file_size",
        "created_at",
        "task_name",
@ -138,7 +141,6 @@ def load_df(path, task_name, print_fn=print):
    other_columns = [col for col in df if col not in important_columns]
    df = df[important_columns + other_columns]

-    size_before_drop_dups = len(df)
    columns = ["experiment_name", "model_id", "peft_type", "created_at"]
    # we want to keep only the most recent run for each experiment
    df = df.sort_values("created_at").drop_duplicates(columns, keep="last")