Add num_trainable_params column to gradio app (#2819)

While memory usage correlates with the number of trainable params, having this number directly
makes it easier to see that methods are using similar numbers of trainable params and outliers
can be inspected easily.
This commit is contained in:
githubnemo
2025-10-13 14:36:58 +02:00
committed by GitHub
parent 2410f458c8
commit 2f9f759587
2 changed files with 4 additions and 1 deletions

View File

@ -33,6 +33,7 @@ metric_preferences = {
"file_size": "lower",
"test_accuracy": "higher",
"train_loss": "lower",
"num_trainable_params": "lower",
}

View File

@ -51,6 +51,7 @@ def preprocess(rows, task_name: str, print_fn=print):
"total_time": run_info["total_time"],
"train_time": train_info["train_time"],
"file_size": train_info["file_size"],
"num_trainable_params": train_info["num_trainable_params"],
"test_accuracy": train_metrics["test accuracy"],
"train_loss": train_metrics["train loss"],
"train_samples": train_metrics["train samples"],
@ -103,6 +104,7 @@ def load_df(path, task_name, print_fn=print):
"train_loss": float,
"train_samples": int,
"train_total_tokens": int,
"num_trainable_params": int,
"peft_version": "string",
"peft_branch": "string",
"transformers_version": "string",
@ -131,6 +133,7 @@ def load_df(path, task_name, print_fn=print):
"accelerator_memory_max",
"accelerator_memory_reserved_99th",
"accelerator_memory_reserved_avg",
"num_trainable_params",
"file_size",
"created_at",
"task_name",
@ -138,7 +141,6 @@ def load_df(path, task_name, print_fn=print):
other_columns = [col for col in df if col not in important_columns]
df = df[important_columns + other_columns]
size_before_drop_dups = len(df)
columns = ["experiment_name", "model_id", "peft_type", "created_at"]
# we want to keep only the most recent run for each experiment
df = df.sort_values("created_at").drop_duplicates(columns, keep="last")