mirror of
https://github.com/huggingface/transformers.git
synced 2025-10-20 17:13:56 +08:00
report_to
default changed to "none" + cleaning deprecated env var (#41375)
* reporting * fix * fix
This commit is contained in:
@ -109,7 +109,6 @@ training_args = TrainingArguments(
|
||||
output_dir="my-awesome-model",
|
||||
num_train_epochs=30,
|
||||
fp16=True,
|
||||
logging_dir=f"{repo_name}/logs",
|
||||
logging_strategy="epoch",
|
||||
eval_strategy="epoch",
|
||||
save_strategy="epoch",
|
||||
|
@ -110,7 +110,6 @@ training_args = TrainingArguments(
|
||||
output_dir="my-awesome-model",
|
||||
num_train_epochs=30,
|
||||
fp16=True,
|
||||
logging_dir=f"{repo_name}/logs",
|
||||
logging_strategy="epoch",
|
||||
eval_strategy="epoch",
|
||||
save_strategy="epoch",
|
||||
|
@ -115,7 +115,6 @@ training_args = TrainingArguments(
|
||||
output_dir="my-awesome-model",
|
||||
num_train_epochs=30,
|
||||
fp16=True,
|
||||
logging_dir=f"{repo_name}/logs",
|
||||
logging_strategy="epoch",
|
||||
eval_strategy="epoch",
|
||||
save_strategy="epoch",
|
||||
|
@ -103,13 +103,6 @@ from ..utils import ENV_VARS_TRUE_VALUES, is_torch_xla_available # noqa: E402
|
||||
|
||||
# Integration functions:
|
||||
def is_wandb_available():
|
||||
# any value of WANDB_DISABLED disables wandb
|
||||
if os.getenv("WANDB_DISABLED", "").upper() in ENV_VARS_TRUE_VALUES:
|
||||
logger.warning(
|
||||
"Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the "
|
||||
"--report_to flag to control the integrations used for logging result (for instance --report_to none)."
|
||||
)
|
||||
return False
|
||||
if importlib.util.find_spec("wandb") is not None:
|
||||
import wandb
|
||||
|
||||
@ -129,13 +122,6 @@ def is_clearml_available():
|
||||
|
||||
|
||||
def is_comet_available():
|
||||
if os.getenv("COMET_MODE", "").upper() == "DISABLED":
|
||||
logger.warning(
|
||||
"Using the `COMET_MODE=DISABLED` environment variable is deprecated and will be removed in v5. Use the "
|
||||
"--report_to flag to control the integrations used for logging result (for instance --report_to none)."
|
||||
)
|
||||
return False
|
||||
|
||||
if _is_comet_installed is False:
|
||||
return False
|
||||
|
||||
@ -557,6 +543,17 @@ def rewrite_logs(d):
|
||||
return new_d
|
||||
|
||||
|
||||
def default_logdir() -> str:
|
||||
"""
|
||||
Same default as PyTorch
|
||||
"""
|
||||
import socket
|
||||
from datetime import datetime
|
||||
|
||||
current_time = datetime.now().strftime("%b%d_%H-%M-%S")
|
||||
return os.path.join("runs", current_time + "_" + socket.gethostname())
|
||||
|
||||
|
||||
class TensorBoardCallback(TrainerCallback):
|
||||
"""
|
||||
A [`TrainerCallback`] that sends the logs to [TensorBoard](https://www.tensorflow.org/tensorboard).
|
||||
@ -564,49 +561,47 @@ class TensorBoardCallback(TrainerCallback):
|
||||
Args:
|
||||
tb_writer (`SummaryWriter`, *optional*):
|
||||
The writer to use. Will instantiate one if not set.
|
||||
Environment:
|
||||
- **TENSORBOARD_LOGGING_DIR** (`str`, *optional*, defaults to `None`):
|
||||
The logging dir to log the results. Default value is os.path.join(args.output_dir, default_logdir())
|
||||
"""
|
||||
|
||||
def __init__(self, tb_writer=None):
|
||||
has_tensorboard = is_tensorboard_available()
|
||||
if not has_tensorboard:
|
||||
if not is_tensorboard_available():
|
||||
raise RuntimeError(
|
||||
"TensorBoardCallback requires tensorboard to be installed. Either update your PyTorch version or"
|
||||
" install tensorboardX."
|
||||
)
|
||||
if has_tensorboard:
|
||||
try:
|
||||
from torch.utils.tensorboard import SummaryWriter
|
||||
try:
|
||||
from torch.utils.tensorboard import SummaryWriter
|
||||
except ImportError:
|
||||
from tensorboardX import SummaryWriter
|
||||
|
||||
self._SummaryWriter = SummaryWriter
|
||||
except ImportError:
|
||||
try:
|
||||
from tensorboardX import SummaryWriter
|
||||
|
||||
self._SummaryWriter = SummaryWriter
|
||||
except ImportError:
|
||||
self._SummaryWriter = None
|
||||
else:
|
||||
self._SummaryWriter = None
|
||||
self._SummaryWriter = SummaryWriter
|
||||
self.tb_writer = tb_writer
|
||||
self.logging_dir = os.getenv("TENSORBOARD_LOGGING_DIR", None)
|
||||
if self.logging_dir is not None:
|
||||
self.logging_dir = os.path.expanduser(self.logging_dir)
|
||||
|
||||
def _init_summary_writer(self, args, log_dir=None):
|
||||
log_dir = log_dir or args.logging_dir
|
||||
def _init_summary_writer(self, args):
|
||||
if self._SummaryWriter is not None:
|
||||
self.tb_writer = self._SummaryWriter(log_dir=log_dir)
|
||||
self.tb_writer = self._SummaryWriter(log_dir=self.logging_dir)
|
||||
|
||||
def on_train_begin(self, args, state, control, **kwargs):
|
||||
if not state.is_world_process_zero:
|
||||
return
|
||||
|
||||
log_dir = None
|
||||
|
||||
if state.is_hyper_param_search:
|
||||
trial_name = state.trial_name
|
||||
if trial_name is not None:
|
||||
log_dir = os.path.join(args.logging_dir, trial_name)
|
||||
# overwrite logging dir for trials
|
||||
self.logging_dir = os.path.join(args.output_dir, default_logdir(), trial_name)
|
||||
|
||||
if self.logging_dir is None:
|
||||
self.logging_dir = os.path.join(args.output_dir, default_logdir())
|
||||
|
||||
if self.tb_writer is None:
|
||||
self._init_summary_writer(args, log_dir)
|
||||
self._init_summary_writer(args)
|
||||
|
||||
if self.tb_writer is not None:
|
||||
self.tb_writer.add_text("args", args.to_json_string())
|
||||
@ -671,13 +666,6 @@ class WandbLogModel(str, Enum):
|
||||
def _missing_(cls, value: Any) -> "WandbLogModel":
|
||||
if not isinstance(value, str):
|
||||
raise TypeError(f"Expecting to have a string `WANDB_LOG_MODEL` setting, but got {type(value)}")
|
||||
if value.upper() in ENV_VARS_TRUE_VALUES:
|
||||
raise DeprecationWarning(
|
||||
f"Setting `WANDB_LOG_MODEL` as {os.getenv('WANDB_LOG_MODEL')} is deprecated and will be removed in "
|
||||
"version 5 of transformers. Use one of `'end'` or `'checkpoint'` instead."
|
||||
)
|
||||
logger.info(f"Setting `WANDB_LOG_MODEL` from {os.getenv('WANDB_LOG_MODEL')} to `end` instead")
|
||||
return WandbLogModel.END
|
||||
logger.warning(
|
||||
f"Received unrecognized `WANDB_LOG_MODEL` setting value={value}; so disabling `WANDB_LOG_MODEL`"
|
||||
)
|
||||
@ -692,24 +680,11 @@ class WandbCallback(TrainerCallback):
|
||||
def __init__(self):
|
||||
has_wandb = is_wandb_available()
|
||||
if not has_wandb:
|
||||
# Check if wandb is actually installed but disabled via WANDB_DISABLED
|
||||
if importlib.util.find_spec("wandb") is not None:
|
||||
# wandb is installed but disabled
|
||||
wandb_disabled = os.getenv("WANDB_DISABLED", "").upper() in ENV_VARS_TRUE_VALUES
|
||||
if wandb_disabled:
|
||||
raise RuntimeError(
|
||||
"You specified `report_to='wandb'` but also set the `WANDB_DISABLED` environment variable.\n"
|
||||
"This disables wandb logging, even though it was explicitly requested.\n\n"
|
||||
"- To enable wandb logging: unset `WANDB_DISABLED`.\n"
|
||||
"- To disable logging: use `report_to='none'`.\n\n"
|
||||
"Note: WANDB_DISABLED is deprecated and will be removed in v5."
|
||||
)
|
||||
# If wandb is not installed at all, use the original error message
|
||||
raise RuntimeError("WandbCallback requires wandb to be installed. Run `pip install wandb`.")
|
||||
if has_wandb:
|
||||
import wandb
|
||||
|
||||
self._wandb = wandb
|
||||
import wandb
|
||||
|
||||
self._wandb = wandb
|
||||
self._initialized = False
|
||||
self._log_model = WandbLogModel(os.getenv("WANDB_LOG_MODEL", "false"))
|
||||
|
||||
@ -727,19 +702,11 @@ class WandbCallback(TrainerCallback):
|
||||
to `"end"`, the model will be uploaded at the end of training. If set to `"checkpoint"`, the checkpoint
|
||||
will be uploaded every `args.save_steps` . If set to `"false"`, the model will not be uploaded. Use along
|
||||
with [`~transformers.TrainingArguments.load_best_model_at_end`] to upload best model.
|
||||
|
||||
<Deprecated version="5.0">
|
||||
|
||||
Setting `WANDB_LOG_MODEL` as `bool` will be deprecated in version 5 of 🤗 Transformers.
|
||||
|
||||
</Deprecated>
|
||||
- **WANDB_WATCH** (`str`, *optional* defaults to `"false"`):
|
||||
Can be `"gradients"`, `"all"`, `"parameters"`, or `"false"`. Set to `"all"` to log gradients and
|
||||
parameters.
|
||||
- **WANDB_PROJECT** (`str`, *optional*, defaults to `"huggingface"`):
|
||||
Set this to a custom string to store results in a different project.
|
||||
- **WANDB_DISABLED** (`bool`, *optional*, defaults to `False`):
|
||||
Whether to disable wandb entirely. Set `WANDB_DISABLED=true` to disable.
|
||||
"""
|
||||
if self._wandb is None:
|
||||
return
|
||||
@ -749,9 +716,6 @@ class WandbCallback(TrainerCallback):
|
||||
from wandb.sdk.lib.config_util import ConfigError as WandbConfigError
|
||||
|
||||
if state.is_world_process_zero:
|
||||
logger.info(
|
||||
'Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"'
|
||||
)
|
||||
combined_dict = {**args.to_dict()}
|
||||
|
||||
if hasattr(model, "config") and model.config is not None:
|
||||
@ -1108,13 +1072,8 @@ class CometCallback(TrainerCallback):
|
||||
* `create`: Always create a new Comet Experiment.
|
||||
* `get`: Always try to append to an Existing Comet Experiment.
|
||||
Requires `COMET_EXPERIMENT_KEY` to be set.
|
||||
* `ONLINE`: **deprecated**, used to create an online
|
||||
Experiment. Use `COMET_START_ONLINE=1` instead.
|
||||
* `OFFLINE`: **deprecated**, used to created an offline
|
||||
Experiment. Use `COMET_START_ONLINE=0` instead.
|
||||
* `DISABLED`: **deprecated**, used to disable Comet logging.
|
||||
Use the `--report_to` flag to control the integrations used
|
||||
for logging result instead.
|
||||
- **COMET_START_ONLINE** (`bool`, *optional*):
|
||||
Whether to create an online or offline Experiment.
|
||||
- **COMET_PROJECT_NAME** (`str`, *optional*):
|
||||
Comet project name for experiments.
|
||||
- **COMET_LOG_ASSETS** (`str`, *optional*, defaults to `TRUE`):
|
||||
@ -1136,12 +1095,7 @@ class CometCallback(TrainerCallback):
|
||||
|
||||
if comet_old_mode is not None:
|
||||
comet_old_mode = comet_old_mode.lower()
|
||||
|
||||
if comet_old_mode == "online":
|
||||
online = True
|
||||
elif comet_old_mode == "offline":
|
||||
online = False
|
||||
elif comet_old_mode in ("get", "get_or_create", "create"):
|
||||
if comet_old_mode in ("get", "get_or_create", "create"):
|
||||
mode = comet_old_mode
|
||||
elif comet_old_mode:
|
||||
logger.warning("Invalid COMET_MODE env value %r, Comet logging is disabled", comet_old_mode)
|
||||
|
@ -107,17 +107,6 @@ if is_sagemaker_mp_enabled():
|
||||
smp.init()
|
||||
|
||||
|
||||
def default_logdir() -> str:
|
||||
"""
|
||||
Same default as PyTorch
|
||||
"""
|
||||
import socket
|
||||
from datetime import datetime
|
||||
|
||||
current_time = datetime.now().strftime("%b%d_%H-%M-%S")
|
||||
return os.path.join("runs", current_time + "_" + socket.gethostname())
|
||||
|
||||
|
||||
def get_int_from_env(env_keys, default):
|
||||
"""Returns the first positive env value found in the `env_keys` list or the default."""
|
||||
for e in env_keys:
|
||||
@ -312,9 +301,6 @@ class TrainingArguments:
|
||||
log_on_each_node (`bool`, *optional*, defaults to `True`):
|
||||
In multinode distributed training, whether to log using `log_level` once per node, or only on the main
|
||||
node.
|
||||
logging_dir (`str`, *optional*):
|
||||
[TensorBoard](https://www.tensorflow.org/tensorboard) log directory. Will default to
|
||||
*output_dir/runs/**CURRENT_DATETIME_HOSTNAME***.
|
||||
logging_strategy (`str` or [`~trainer_utils.IntervalStrategy`], *optional*, defaults to `"steps"`):
|
||||
The logging strategy to adopt during training. Possible values are:
|
||||
|
||||
@ -605,7 +591,7 @@ class TrainingArguments:
|
||||
Column name for precomputed lengths. If the column exists, grouping by length will use these values rather
|
||||
than computing them on train startup. Ignored unless `group_by_length` is `True` and the dataset is an
|
||||
instance of `Dataset`.
|
||||
report_to (`str` or `list[str]`, *optional*, defaults to `"all"`):
|
||||
report_to (`str` or `list[str]`, *optional*, defaults to `"none"`):
|
||||
The list of integrations to report the results and logs to. Supported platforms are `"azure_ml"`,
|
||||
`"clearml"`, `"codecarbon"`, `"comet_ml"`, `"dagshub"`, `"dvclive"`, `"flyte"`, `"mlflow"`, `"neptune"`,
|
||||
`"swanlab"`, `"tensorboard"`, `"trackio"` and `"wandb"`. Use `"all"` to report to all integrations
|
||||
@ -914,7 +900,6 @@ class TrainingArguments:
|
||||
)
|
||||
},
|
||||
)
|
||||
logging_dir: Optional[str] = field(default=None, metadata={"help": "Tensorboard log dir."})
|
||||
logging_strategy: Union[IntervalStrategy, str] = field(
|
||||
default="steps",
|
||||
metadata={"help": "The logging strategy to use."},
|
||||
@ -1199,7 +1184,7 @@ class TrainingArguments:
|
||||
metadata={"help": "Column name with precomputed lengths to use when grouping by length."},
|
||||
)
|
||||
report_to: Union[None, str, list[str]] = field(
|
||||
default=None, metadata={"help": "The list of integrations to report the results and logs to."}
|
||||
default="none", metadata={"help": "The list of integrations to report the results and logs to."}
|
||||
)
|
||||
project: str = field(
|
||||
default="huggingface",
|
||||
@ -1466,10 +1451,6 @@ class TrainingArguments:
|
||||
# see https://github.com/huggingface/transformers/issues/10628
|
||||
if self.output_dir is not None:
|
||||
self.output_dir = os.path.expanduser(self.output_dir)
|
||||
if self.logging_dir is None and self.output_dir is not None:
|
||||
self.logging_dir = os.path.join(self.output_dir, default_logdir())
|
||||
if self.logging_dir is not None:
|
||||
self.logging_dir = os.path.expanduser(self.logging_dir)
|
||||
|
||||
if self.disable_tqdm is None:
|
||||
self.disable_tqdm = logger.getEffectiveLevel() > logging.WARN
|
||||
@ -1672,13 +1653,6 @@ class TrainingArguments:
|
||||
mixed_precision_dtype = "bf16"
|
||||
os.environ["ACCELERATE_MIXED_PRECISION"] = mixed_precision_dtype
|
||||
|
||||
if self.report_to is None:
|
||||
logger.info(
|
||||
"The default value for the training argument `--report_to` will change in v5 (from all installed "
|
||||
"integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as "
|
||||
"now. You should start updating your code and make this info disappear :-)."
|
||||
)
|
||||
self.report_to = "all"
|
||||
if self.report_to == "all" or self.report_to == ["all"]:
|
||||
# Import at runtime to avoid a circular import.
|
||||
from .integrations import get_available_reporting_integrations
|
||||
@ -2548,7 +2522,7 @@ class TrainingArguments:
|
||||
Logger log level to use on the main process. Possible choices are the log levels as strings: `"debug"`,
|
||||
`"info"`, `"warning"`, `"error"` and `"critical"`, plus a `"passive"` level which doesn't set anything
|
||||
and lets the application set the level.
|
||||
report_to (`str` or `list[str]`, *optional*, defaults to `"all"`):
|
||||
report_to (`str` or `list[str]`, *optional*, defaults to `"none"`):
|
||||
The list of integrations to report the results and logs to. Supported platforms are `"azure_ml"`,
|
||||
`"clearml"`, `"codecarbon"`, `"comet_ml"`, `"dagshub"`, `"dvclive"`, `"flyte"`, `"mlflow"`,
|
||||
`"neptune"`, `"swanlab"`, `"tensorboard"`, `"trackio"` and `"wandb"`. Use `"all"` to report to all
|
||||
|
Reference in New Issue
Block a user