accelerate/examples/config_yaml_templates/fp8.yaml

# This config template simply setups up the TransformersEngine config (and a config for a single GPU),
# this can interop with the other configs in this folder
distributed_type: "NO"
mixed_precision: "fp8"
# Then we specify the fp8 configuration:
fp8_config:
  backend: TE # Can be TE | MS-AMP
  # The following are TE specific arguments.
  # See https://docs.nvidia.com/deeplearning/transformer-engine/user-guide/api/common.html#common-api for more details
  amax_history_len: 1024
  fp8_format: E4M3
  interval: 1
  margin: 0
  override_linear_precision: [false, false, false]
  # Generally this should always be set to `false` to have the most realistic fp8 eval performance
  use_autocast_during_eval: false
  # If using MS-AMP, we ignore all of the prior and set a opt_level
  #opt_level: O1