report_to default changed to "none" + cleaning deprecated env var (#41375)

* reporting

* fix

* fix
This commit is contained in:
Marc Sun
2025-10-09 18:28:48 +02:00
committed by GitHub
parent 78f79ba5af
commit 3839d51013
5 changed files with 40 additions and 115 deletions

View File

@ -109,7 +109,6 @@ training_args = TrainingArguments(
output_dir="my-awesome-model",
num_train_epochs=30,
fp16=True,
logging_dir=f"{repo_name}/logs",
logging_strategy="epoch",
eval_strategy="epoch",
save_strategy="epoch",

View File

@ -110,7 +110,6 @@ training_args = TrainingArguments(
output_dir="my-awesome-model",
num_train_epochs=30,
fp16=True,
logging_dir=f"{repo_name}/logs",
logging_strategy="epoch",
eval_strategy="epoch",
save_strategy="epoch",

View File

@ -115,7 +115,6 @@ training_args = TrainingArguments(
output_dir="my-awesome-model",
num_train_epochs=30,
fp16=True,
logging_dir=f"{repo_name}/logs",
logging_strategy="epoch",
eval_strategy="epoch",
save_strategy="epoch",

View File

@ -103,13 +103,6 @@ from ..utils import ENV_VARS_TRUE_VALUES, is_torch_xla_available # noqa: E402
# Integration functions:
def is_wandb_available():
# any value of WANDB_DISABLED disables wandb
if os.getenv("WANDB_DISABLED", "").upper() in ENV_VARS_TRUE_VALUES:
logger.warning(
"Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the "
"--report_to flag to control the integrations used for logging result (for instance --report_to none)."
)
return False
if importlib.util.find_spec("wandb") is not None:
import wandb
@ -129,13 +122,6 @@ def is_clearml_available():
def is_comet_available():
if os.getenv("COMET_MODE", "").upper() == "DISABLED":
logger.warning(
"Using the `COMET_MODE=DISABLED` environment variable is deprecated and will be removed in v5. Use the "
"--report_to flag to control the integrations used for logging result (for instance --report_to none)."
)
return False
if _is_comet_installed is False:
return False
@ -557,6 +543,17 @@ def rewrite_logs(d):
return new_d
def default_logdir() -> str:
"""
Same default as PyTorch
"""
import socket
from datetime import datetime
current_time = datetime.now().strftime("%b%d_%H-%M-%S")
return os.path.join("runs", current_time + "_" + socket.gethostname())
class TensorBoardCallback(TrainerCallback):
"""
A [`TrainerCallback`] that sends the logs to [TensorBoard](https://www.tensorflow.org/tensorboard).
@ -564,49 +561,47 @@ class TensorBoardCallback(TrainerCallback):
Args:
tb_writer (`SummaryWriter`, *optional*):
The writer to use. Will instantiate one if not set.
Environment:
- **TENSORBOARD_LOGGING_DIR** (`str`, *optional*, defaults to `None`):
The logging dir to log the results. Default value is os.path.join(args.output_dir, default_logdir())
"""
def __init__(self, tb_writer=None):
has_tensorboard = is_tensorboard_available()
if not has_tensorboard:
if not is_tensorboard_available():
raise RuntimeError(
"TensorBoardCallback requires tensorboard to be installed. Either update your PyTorch version or"
" install tensorboardX."
)
if has_tensorboard:
try:
from torch.utils.tensorboard import SummaryWriter
try:
from torch.utils.tensorboard import SummaryWriter
except ImportError:
from tensorboardX import SummaryWriter
self._SummaryWriter = SummaryWriter
except ImportError:
try:
from tensorboardX import SummaryWriter
self._SummaryWriter = SummaryWriter
except ImportError:
self._SummaryWriter = None
else:
self._SummaryWriter = None
self._SummaryWriter = SummaryWriter
self.tb_writer = tb_writer
self.logging_dir = os.getenv("TENSORBOARD_LOGGING_DIR", None)
if self.logging_dir is not None:
self.logging_dir = os.path.expanduser(self.logging_dir)
def _init_summary_writer(self, args, log_dir=None):
log_dir = log_dir or args.logging_dir
def _init_summary_writer(self, args):
if self._SummaryWriter is not None:
self.tb_writer = self._SummaryWriter(log_dir=log_dir)
self.tb_writer = self._SummaryWriter(log_dir=self.logging_dir)
def on_train_begin(self, args, state, control, **kwargs):
if not state.is_world_process_zero:
return
log_dir = None
if state.is_hyper_param_search:
trial_name = state.trial_name
if trial_name is not None:
log_dir = os.path.join(args.logging_dir, trial_name)
# overwrite logging dir for trials
self.logging_dir = os.path.join(args.output_dir, default_logdir(), trial_name)
if self.logging_dir is None:
self.logging_dir = os.path.join(args.output_dir, default_logdir())
if self.tb_writer is None:
self._init_summary_writer(args, log_dir)
self._init_summary_writer(args)
if self.tb_writer is not None:
self.tb_writer.add_text("args", args.to_json_string())
@ -671,13 +666,6 @@ class WandbLogModel(str, Enum):
def _missing_(cls, value: Any) -> "WandbLogModel":
if not isinstance(value, str):
raise TypeError(f"Expecting to have a string `WANDB_LOG_MODEL` setting, but got {type(value)}")
if value.upper() in ENV_VARS_TRUE_VALUES:
raise DeprecationWarning(
f"Setting `WANDB_LOG_MODEL` as {os.getenv('WANDB_LOG_MODEL')} is deprecated and will be removed in "
"version 5 of transformers. Use one of `'end'` or `'checkpoint'` instead."
)
logger.info(f"Setting `WANDB_LOG_MODEL` from {os.getenv('WANDB_LOG_MODEL')} to `end` instead")
return WandbLogModel.END
logger.warning(
f"Received unrecognized `WANDB_LOG_MODEL` setting value={value}; so disabling `WANDB_LOG_MODEL`"
)
@ -692,24 +680,11 @@ class WandbCallback(TrainerCallback):
def __init__(self):
has_wandb = is_wandb_available()
if not has_wandb:
# Check if wandb is actually installed but disabled via WANDB_DISABLED
if importlib.util.find_spec("wandb") is not None:
# wandb is installed but disabled
wandb_disabled = os.getenv("WANDB_DISABLED", "").upper() in ENV_VARS_TRUE_VALUES
if wandb_disabled:
raise RuntimeError(
"You specified `report_to='wandb'` but also set the `WANDB_DISABLED` environment variable.\n"
"This disables wandb logging, even though it was explicitly requested.\n\n"
"- To enable wandb logging: unset `WANDB_DISABLED`.\n"
"- To disable logging: use `report_to='none'`.\n\n"
"Note: WANDB_DISABLED is deprecated and will be removed in v5."
)
# If wandb is not installed at all, use the original error message
raise RuntimeError("WandbCallback requires wandb to be installed. Run `pip install wandb`.")
if has_wandb:
import wandb
self._wandb = wandb
import wandb
self._wandb = wandb
self._initialized = False
self._log_model = WandbLogModel(os.getenv("WANDB_LOG_MODEL", "false"))
@ -727,19 +702,11 @@ class WandbCallback(TrainerCallback):
to `"end"`, the model will be uploaded at the end of training. If set to `"checkpoint"`, the checkpoint
will be uploaded every `args.save_steps` . If set to `"false"`, the model will not be uploaded. Use along
with [`~transformers.TrainingArguments.load_best_model_at_end`] to upload best model.
<Deprecated version="5.0">
Setting `WANDB_LOG_MODEL` as `bool` will be deprecated in version 5 of 🤗 Transformers.
</Deprecated>
- **WANDB_WATCH** (`str`, *optional* defaults to `"false"`):
Can be `"gradients"`, `"all"`, `"parameters"`, or `"false"`. Set to `"all"` to log gradients and
parameters.
- **WANDB_PROJECT** (`str`, *optional*, defaults to `"huggingface"`):
Set this to a custom string to store results in a different project.
- **WANDB_DISABLED** (`bool`, *optional*, defaults to `False`):
Whether to disable wandb entirely. Set `WANDB_DISABLED=true` to disable.
"""
if self._wandb is None:
return
@ -749,9 +716,6 @@ class WandbCallback(TrainerCallback):
from wandb.sdk.lib.config_util import ConfigError as WandbConfigError
if state.is_world_process_zero:
logger.info(
'Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"'
)
combined_dict = {**args.to_dict()}
if hasattr(model, "config") and model.config is not None:
@ -1108,13 +1072,8 @@ class CometCallback(TrainerCallback):
* `create`: Always create a new Comet Experiment.
* `get`: Always try to append to an Existing Comet Experiment.
Requires `COMET_EXPERIMENT_KEY` to be set.
* `ONLINE`: **deprecated**, used to create an online
Experiment. Use `COMET_START_ONLINE=1` instead.
* `OFFLINE`: **deprecated**, used to created an offline
Experiment. Use `COMET_START_ONLINE=0` instead.
* `DISABLED`: **deprecated**, used to disable Comet logging.
Use the `--report_to` flag to control the integrations used
for logging result instead.
- **COMET_START_ONLINE** (`bool`, *optional*):
Whether to create an online or offline Experiment.
- **COMET_PROJECT_NAME** (`str`, *optional*):
Comet project name for experiments.
- **COMET_LOG_ASSETS** (`str`, *optional*, defaults to `TRUE`):
@ -1136,12 +1095,7 @@ class CometCallback(TrainerCallback):
if comet_old_mode is not None:
comet_old_mode = comet_old_mode.lower()
if comet_old_mode == "online":
online = True
elif comet_old_mode == "offline":
online = False
elif comet_old_mode in ("get", "get_or_create", "create"):
if comet_old_mode in ("get", "get_or_create", "create"):
mode = comet_old_mode
elif comet_old_mode:
logger.warning("Invalid COMET_MODE env value %r, Comet logging is disabled", comet_old_mode)

View File

@ -107,17 +107,6 @@ if is_sagemaker_mp_enabled():
smp.init()
def default_logdir() -> str:
"""
Same default as PyTorch
"""
import socket
from datetime import datetime
current_time = datetime.now().strftime("%b%d_%H-%M-%S")
return os.path.join("runs", current_time + "_" + socket.gethostname())
def get_int_from_env(env_keys, default):
"""Returns the first positive env value found in the `env_keys` list or the default."""
for e in env_keys:
@ -312,9 +301,6 @@ class TrainingArguments:
log_on_each_node (`bool`, *optional*, defaults to `True`):
In multinode distributed training, whether to log using `log_level` once per node, or only on the main
node.
logging_dir (`str`, *optional*):
[TensorBoard](https://www.tensorflow.org/tensorboard) log directory. Will default to
*output_dir/runs/**CURRENT_DATETIME_HOSTNAME***.
logging_strategy (`str` or [`~trainer_utils.IntervalStrategy`], *optional*, defaults to `"steps"`):
The logging strategy to adopt during training. Possible values are:
@ -605,7 +591,7 @@ class TrainingArguments:
Column name for precomputed lengths. If the column exists, grouping by length will use these values rather
than computing them on train startup. Ignored unless `group_by_length` is `True` and the dataset is an
instance of `Dataset`.
report_to (`str` or `list[str]`, *optional*, defaults to `"all"`):
report_to (`str` or `list[str]`, *optional*, defaults to `"none"`):
The list of integrations to report the results and logs to. Supported platforms are `"azure_ml"`,
`"clearml"`, `"codecarbon"`, `"comet_ml"`, `"dagshub"`, `"dvclive"`, `"flyte"`, `"mlflow"`, `"neptune"`,
`"swanlab"`, `"tensorboard"`, `"trackio"` and `"wandb"`. Use `"all"` to report to all integrations
@ -914,7 +900,6 @@ class TrainingArguments:
)
},
)
logging_dir: Optional[str] = field(default=None, metadata={"help": "Tensorboard log dir."})
logging_strategy: Union[IntervalStrategy, str] = field(
default="steps",
metadata={"help": "The logging strategy to use."},
@ -1199,7 +1184,7 @@ class TrainingArguments:
metadata={"help": "Column name with precomputed lengths to use when grouping by length."},
)
report_to: Union[None, str, list[str]] = field(
default=None, metadata={"help": "The list of integrations to report the results and logs to."}
default="none", metadata={"help": "The list of integrations to report the results and logs to."}
)
project: str = field(
default="huggingface",
@ -1466,10 +1451,6 @@ class TrainingArguments:
# see https://github.com/huggingface/transformers/issues/10628
if self.output_dir is not None:
self.output_dir = os.path.expanduser(self.output_dir)
if self.logging_dir is None and self.output_dir is not None:
self.logging_dir = os.path.join(self.output_dir, default_logdir())
if self.logging_dir is not None:
self.logging_dir = os.path.expanduser(self.logging_dir)
if self.disable_tqdm is None:
self.disable_tqdm = logger.getEffectiveLevel() > logging.WARN
@ -1672,13 +1653,6 @@ class TrainingArguments:
mixed_precision_dtype = "bf16"
os.environ["ACCELERATE_MIXED_PRECISION"] = mixed_precision_dtype
if self.report_to is None:
logger.info(
"The default value for the training argument `--report_to` will change in v5 (from all installed "
"integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as "
"now. You should start updating your code and make this info disappear :-)."
)
self.report_to = "all"
if self.report_to == "all" or self.report_to == ["all"]:
# Import at runtime to avoid a circular import.
from .integrations import get_available_reporting_integrations
@ -2548,7 +2522,7 @@ class TrainingArguments:
Logger log level to use on the main process. Possible choices are the log levels as strings: `"debug"`,
`"info"`, `"warning"`, `"error"` and `"critical"`, plus a `"passive"` level which doesn't set anything
and lets the application set the level.
report_to (`str` or `list[str]`, *optional*, defaults to `"all"`):
report_to (`str` or `list[str]`, *optional*, defaults to `"none"`):
The list of integrations to report the results and logs to. Supported platforms are `"azure_ml"`,
`"clearml"`, `"codecarbon"`, `"comet_ml"`, `"dagshub"`, `"dvclive"`, `"flyte"`, `"mlflow"`,
`"neptune"`, `"swanlab"`, `"tensorboard"`, `"trackio"` and `"wandb"`. Use `"all"` to report to all