mirror of
https://github.com/huggingface/accelerate.git
synced 2025-10-20 18:13:46 +08:00
* Working version rebased from main * kwargs * Clean * Fix more nits * Fin * Delay autocast flag * Enable FP8 autocast during eval only if specified * Fin * Rm comment * All done * Zero3 works! * Let the wrapper come off during unwrap_model * Add import check * Migrate all to benchmarks folder and make TE import check work * Add readme * Add README to benchmarks folder * Update CLI to now include fp8 args * Add test config for 0_34 * Finish adding to config yaml * Write docs * Expound docs w/ FP8 * Add to toctree
27 lines
543 B
YAML
27 lines
543 B
YAML
compute_environment: LOCAL_MACHINE
|
|
debug: false
|
|
distributed_type: MULTI_GPU
|
|
downcast_bf16: 'no'
|
|
enable_cpu_affinity: false
|
|
fp8_config:
|
|
amax_compute_algorithm: max
|
|
amax_history_length: 1024
|
|
backend: TE
|
|
fp8_format: E4M3
|
|
interval: 1
|
|
margin: 0
|
|
override_linear_precision: false
|
|
use_autocast_during_eval: false
|
|
gpu_ids: all
|
|
machine_rank: 0
|
|
main_training_function: main
|
|
mixed_precision: fp8
|
|
num_machines: 1
|
|
num_processes: 2
|
|
rdzv_backend: static
|
|
same_network: true
|
|
tpu_env: []
|
|
tpu_use_cluster: false
|
|
tpu_use_sudo: false
|
|
use_cpu: false
|