diff --git a/benchmarks/fp8/ms_amp/ddp.py b/benchmarks/fp8/ms_amp/ddp.py
index ce80cded..942c3c6c 100644
--- a/benchmarks/fp8/ms_amp/ddp.py
+++ b/benchmarks/fp8/ms_amp/ddp.py
@@ -62,12 +62,12 @@ def train_baseline(opt_level="O2"):
 
     trained_model_results = evaluate_model(model, eval_dataloader, METRIC, accelerator=accelerator)
 
-    assert (
-        trained_model_results["accuracy"] > base_model_results["accuracy"]
-    ), f'Accuracy should be higher for the trained model: {trained_model_results["accuracy"]} > {base_model_results["accuracy"]}'
-    assert (
-        trained_model_results["f1"] > base_model_results["f1"]
-    ), f'F1 score should be higher for the trained model: {trained_model_results["f1"]} > {base_model_results["f1"]}'
+    assert trained_model_results["accuracy"] > base_model_results["accuracy"], (
+        f"Accuracy should be higher for the trained model: {trained_model_results['accuracy']} > {base_model_results['accuracy']}"
+    )
+    assert trained_model_results["f1"] > base_model_results["f1"], (
+        f"F1 score should be higher for the trained model: {trained_model_results['f1']} > {base_model_results['f1']}"
+    )
 
     return base_model_results, trained_model_results
 
@@ -95,12 +95,12 @@ def train_integration(opt_level="O2"):
 
     trained_model_results = evaluate_model(model, eval_dataloader, METRIC, accelerator=accelerator)
 
-    assert (
-        trained_model_results["accuracy"] > base_model_results["accuracy"]
-    ), f'Accuracy should be higher for the trained model: {trained_model_results["accuracy"]} > {base_model_results["accuracy"]}'
-    assert (
-        trained_model_results["f1"] > base_model_results["f1"]
-    ), f'F1 score should be higher for the trained model: {trained_model_results["f1"]} > {base_model_results["f1"]}'
+    assert trained_model_results["accuracy"] > base_model_results["accuracy"], (
+        f"Accuracy should be higher for the trained model: {trained_model_results['accuracy']} > {base_model_results['accuracy']}"
+    )
+    assert trained_model_results["f1"] > base_model_results["f1"], (
+        f"F1 score should be higher for the trained model: {trained_model_results['f1']} > {base_model_results['f1']}"
+    )
 
     return base_model_results, trained_model_results
 
@@ -109,15 +109,15 @@ if __name__ == "__main__":
     for opt_level in ["O1", "O2"]:
         baseline_not_trained, baseline_trained = train_baseline(opt_level)
         accelerator_not_trained, accelerator_trained = train_integration(opt_level)
-        assert (
-            baseline_not_trained["accuracy"] == accelerator_not_trained["accuracy"]
-        ), f'Accuracy not the same for untrained baseline and accelerator using opt_level={opt_level}: {baseline_not_trained["accuracy"]} == {accelerator_not_trained["accuracy"]}'
-        assert (
-            baseline_not_trained["f1"] == accelerator_not_trained["f1"]
-        ), f'F1 not the same for untrained baseline and accelerator using opt_level={opt_level}: {baseline_not_trained["f1"]} == {accelerator_not_trained["f1"]}'
-        assert (
-            baseline_trained["accuracy"] == accelerator_trained["accuracy"]
-        ), f'Accuracy not the same for trained baseline and accelerator using opt_level={opt_level}: {baseline_trained["accuracy"]} == {accelerator_trained["accuracy"]}'
-        assert (
-            baseline_trained["f1"] == accelerator_trained["f1"]
-        ), f'F1 not the same for trained baseline and accelerator using opt_level={opt_level}: {baseline_trained["f1"]} == {accelerator_trained["f1"]}'
+        assert baseline_not_trained["accuracy"] == accelerator_not_trained["accuracy"], (
+            f"Accuracy not the same for untrained baseline and accelerator using opt_level={opt_level}: {baseline_not_trained['accuracy']} == {accelerator_not_trained['accuracy']}"
+        )
+        assert baseline_not_trained["f1"] == accelerator_not_trained["f1"], (
+            f"F1 not the same for untrained baseline and accelerator using opt_level={opt_level}: {baseline_not_trained['f1']} == {accelerator_not_trained['f1']}"
+        )
+        assert baseline_trained["accuracy"] == accelerator_trained["accuracy"], (
+            f"Accuracy not the same for trained baseline and accelerator using opt_level={opt_level}: {baseline_trained['accuracy']} == {accelerator_trained['accuracy']}"
+        )
+        assert baseline_trained["f1"] == accelerator_trained["f1"], (
+            f"F1 not the same for trained baseline and accelerator using opt_level={opt_level}: {baseline_trained['f1']} == {accelerator_trained['f1']}"
+        )
diff --git a/benchmarks/fp8/ms_amp/distrib_deepspeed.py b/benchmarks/fp8/ms_amp/distrib_deepspeed.py
index 57a25697..a6bb2688 100644
--- a/benchmarks/fp8/ms_amp/distrib_deepspeed.py
+++ b/benchmarks/fp8/ms_amp/distrib_deepspeed.py
@@ -90,12 +90,12 @@ def train_baseline(zero_stage: int = 1, opt_level: str = "O1"):
     model.destroy()
     torch.cuda.empty_cache()
     AcceleratorState()._reset_state(True)
-    assert (
-        trained_model_results["accuracy"] > base_model_results["accuracy"]
-    ), f'Accuracy should be higher for the trained model: {trained_model_results["accuracy"]} > {base_model_results["accuracy"]}'
-    assert (
-        trained_model_results["f1"] > base_model_results["f1"]
-    ), f'F1 score should be higher for the trained model: {trained_model_results["f1"]} > {base_model_results["f1"]}'
+    assert trained_model_results["accuracy"] > base_model_results["accuracy"], (
+        f"Accuracy should be higher for the trained model: {trained_model_results['accuracy']} > {base_model_results['accuracy']}"
+    )
+    assert trained_model_results["f1"] > base_model_results["f1"], (
+        f"F1 score should be higher for the trained model: {trained_model_results['f1']} > {base_model_results['f1']}"
+    )
 
     return base_model_results, trained_model_results
 
@@ -129,12 +129,12 @@ def train_integration(zero_stage: int = 1, opt_level: str = "O1"):
     trained_model_results = evaluate_model(model, eval_dataloader, METRIC, accelerator=accelerator)
     model.destroy()
     torch.cuda.empty_cache()
-    assert (
-        trained_model_results["accuracy"] > base_model_results["accuracy"]
-    ), f'Accuracy should be higher for the trained model: {trained_model_results["accuracy"]} > {base_model_results["accuracy"]}'
-    assert (
-        trained_model_results["f1"] > base_model_results["f1"]
-    ), f'F1 score should be higher for the trained model: {trained_model_results["f1"]} > {base_model_results["f1"]}'
+    assert trained_model_results["accuracy"] > base_model_results["accuracy"], (
+        f"Accuracy should be higher for the trained model: {trained_model_results['accuracy']} > {base_model_results['accuracy']}"
+    )
+    assert trained_model_results["f1"] > base_model_results["f1"], (
+        f"F1 score should be higher for the trained model: {trained_model_results['f1']} > {base_model_results['f1']}"
+    )
 
     AcceleratorState()._reset_state(True)
     return base_model_results, trained_model_results
@@ -145,17 +145,17 @@ if __name__ == "__main__":
         for opt_level in ["O1", "O2", "O3"]:
             baseline_not_trained, baseline_trained = train_baseline(zero_stage, opt_level)
             accelerator_not_trained, accelerator_trained = train_integration(zero_stage, opt_level)
-            assert (
-                baseline_not_trained["accuracy"] == accelerator_not_trained["accuracy"]
-            ), f'ZERO stage {zero_stage}, opt_level={opt_level}:\nAccuracy should be the same for the baseline and accelerator: {baseline_not_trained["accuracy"]} == {accelerator_not_trained["accuracy"]}'
-            assert (
-                baseline_not_trained["f1"] == accelerator_not_trained["f1"]
-            ), f'ZERO stage {zero_stage}, opt_level={opt_level}:\nF1 score should be the same for the baseline and accelerator: {baseline_not_trained["f1"]} == {accelerator_not_trained["f1"]}'
-            assert (
-                baseline_trained["accuracy"] == accelerator_trained["accuracy"]
-            ), f'ZERO stage {zero_stage}, opt_level={opt_level}:\nAccuracy should be the same for the baseline and accelerator: {baseline_trained["accuracy"]} == {accelerator_trained["accuracy"]}'
-            assert (
-                baseline_trained["f1"] == accelerator_trained["f1"]
-            ), f'ZERO stage {zero_stage}, opt_level={opt_level}:\nF1 score should be the same for the baseline and accelerator: {baseline_trained["f1"]} == {accelerator_trained["f1"]}'
+            assert baseline_not_trained["accuracy"] == accelerator_not_trained["accuracy"], (
+                f"ZERO stage {zero_stage}, opt_level={opt_level}:\nAccuracy should be the same for the baseline and accelerator: {baseline_not_trained['accuracy']} == {accelerator_not_trained['accuracy']}"
+            )
+            assert baseline_not_trained["f1"] == accelerator_not_trained["f1"], (
+                f"ZERO stage {zero_stage}, opt_level={opt_level}:\nF1 score should be the same for the baseline and accelerator: {baseline_not_trained['f1']} == {accelerator_not_trained['f1']}"
+            )
+            assert baseline_trained["accuracy"] == accelerator_trained["accuracy"], (
+                f"ZERO stage {zero_stage}, opt_level={opt_level}:\nAccuracy should be the same for the baseline and accelerator: {baseline_trained['accuracy']} == {accelerator_trained['accuracy']}"
+            )
+            assert baseline_trained["f1"] == accelerator_trained["f1"], (
+                f"ZERO stage {zero_stage}, opt_level={opt_level}:\nF1 score should be the same for the baseline and accelerator: {baseline_trained['f1']} == {accelerator_trained['f1']}"
+            )
 
     torch.distributed.destroy_process_group()
diff --git a/benchmarks/fp8/ms_amp/non_distributed.py b/benchmarks/fp8/ms_amp/non_distributed.py
index 6e4284ba..dc45aa45 100644
--- a/benchmarks/fp8/ms_amp/non_distributed.py
+++ b/benchmarks/fp8/ms_amp/non_distributed.py
@@ -56,12 +56,12 @@ def train_baseline(opt_level="O2"):
 
     trained_model_results = evaluate_model(model, eval_dataloader, METRIC)
 
-    assert (
-        trained_model_results["accuracy"] > base_model_results["accuracy"]
-    ), f'Accuracy should be higher for the trained model: {trained_model_results["accuracy"]} > {base_model_results["accuracy"]}'
-    assert (
-        trained_model_results["f1"] > base_model_results["f1"]
-    ), f'F1 score should be higher for the trained model: {trained_model_results["f1"]} > {base_model_results["f1"]}'
+    assert trained_model_results["accuracy"] > base_model_results["accuracy"], (
+        f"Accuracy should be higher for the trained model: {trained_model_results['accuracy']} > {base_model_results['accuracy']}"
+    )
+    assert trained_model_results["f1"] > base_model_results["f1"], (
+        f"F1 score should be higher for the trained model: {trained_model_results['f1']} > {base_model_results['f1']}"
+    )
 
     return base_model_results, trained_model_results
 
@@ -89,12 +89,12 @@ def train_integration(opt_level="O2"):
 
     trained_model_results = evaluate_model(model, eval_dataloader, METRIC)
 
-    assert (
-        trained_model_results["accuracy"] > base_model_results["accuracy"]
-    ), f'Accuracy should be higher for the trained model: {trained_model_results["accuracy"]} > {base_model_results["accuracy"]}'
-    assert (
-        trained_model_results["f1"] > base_model_results["f1"]
-    ), f'F1 score should be higher for the trained model: {trained_model_results["f1"]} > {base_model_results["f1"]}'
+    assert trained_model_results["accuracy"] > base_model_results["accuracy"], (
+        f"Accuracy should be higher for the trained model: {trained_model_results['accuracy']} > {base_model_results['accuracy']}"
+    )
+    assert trained_model_results["f1"] > base_model_results["f1"], (
+        f"F1 score should be higher for the trained model: {trained_model_results['f1']} > {base_model_results['f1']}"
+    )
 
     return base_model_results, trained_model_results
 
@@ -104,15 +104,15 @@ if __name__ == "__main__":
         baseline_not_trained, baseline_trained = train_baseline(opt_level)
         accelerator_not_trained, accelerator_trained = train_integration(opt_level)
 
-        assert (
-            baseline_not_trained["accuracy"] == accelerator_not_trained["accuracy"]
-        ), f'Accuracy should be the same for the baseline and accelerator: {baseline_not_trained["accuracy"]} == {accelerator_not_trained["accuracy"]}'
-        assert (
-            baseline_not_trained["f1"] == accelerator_not_trained["f1"]
-        ), f'F1 score should be the same for the baseline and accelerator: {baseline_not_trained["f1"]} == {accelerator_not_trained["f1"]}'
-        assert (
-            baseline_trained["accuracy"] == accelerator_trained["accuracy"]
-        ), f'Accuracy should be the same for the baseline and accelerator: {baseline_trained["accuracy"]} == {accelerator_trained["accuracy"]}'
-        assert (
-            baseline_trained["f1"] == accelerator_trained["f1"]
-        ), f'F1 score should be the same for the baseline and accelerator: {baseline_trained["f1"]} == {accelerator_trained["f1"]}'
+        assert baseline_not_trained["accuracy"] == accelerator_not_trained["accuracy"], (
+            f"Accuracy should be the same for the baseline and accelerator: {baseline_not_trained['accuracy']} == {accelerator_not_trained['accuracy']}"
+        )
+        assert baseline_not_trained["f1"] == accelerator_not_trained["f1"], (
+            f"F1 score should be the same for the baseline and accelerator: {baseline_not_trained['f1']} == {accelerator_not_trained['f1']}"
+        )
+        assert baseline_trained["accuracy"] == accelerator_trained["accuracy"], (
+            f"Accuracy should be the same for the baseline and accelerator: {baseline_trained['accuracy']} == {accelerator_trained['accuracy']}"
+        )
+        assert baseline_trained["f1"] == accelerator_trained["f1"], (
+            f"F1 score should be the same for the baseline and accelerator: {baseline_trained['f1']} == {accelerator_trained['f1']}"
+        )
diff --git a/benchmarks/fp8/torchao/ddp.py b/benchmarks/fp8/torchao/ddp.py
index 5cb125b5..13b551ab 100644
--- a/benchmarks/fp8/torchao/ddp.py
+++ b/benchmarks/fp8/torchao/ddp.py
@@ -96,12 +96,12 @@ def train_baseline():
 
     trained_model_results = evaluate_model(model, eval_dataloader, METRIC, accelerator=accelerator)
 
-    assert (
-        trained_model_results["accuracy"] > base_model_results["accuracy"]
-    ), f'Accuracy should be higher for the trained model: {trained_model_results["accuracy"]} > {base_model_results["accuracy"]}'
-    assert (
-        trained_model_results["f1"] > base_model_results["f1"]
-    ), f'F1 score should be higher for the trained model: {trained_model_results["f1"]} > {base_model_results["f1"]}'
+    assert trained_model_results["accuracy"] > base_model_results["accuracy"], (
+        f"Accuracy should be higher for the trained model: {trained_model_results['accuracy']} > {base_model_results['accuracy']}"
+    )
+    assert trained_model_results["f1"] > base_model_results["f1"], (
+        f"F1 score should be higher for the trained model: {trained_model_results['f1']} > {base_model_results['f1']}"
+    )
 
     return base_model_results, trained_model_results
 
@@ -128,12 +128,12 @@ def train_integration():
 
     trained_model_results = evaluate_model(model, eval_dataloader, METRIC, accelerator=accelerator)
 
-    assert (
-        trained_model_results["accuracy"] > base_model_results["accuracy"]
-    ), f'Accuracy should be higher for the trained model: {trained_model_results["accuracy"]} > {base_model_results["accuracy"]}'
-    assert (
-        trained_model_results["f1"] > base_model_results["f1"]
-    ), f'F1 score should be higher for the trained model: {trained_model_results["f1"]} > {base_model_results["f1"]}'
+    assert trained_model_results["accuracy"] > base_model_results["accuracy"], (
+        f"Accuracy should be higher for the trained model: {trained_model_results['accuracy']} > {base_model_results['accuracy']}"
+    )
+    assert trained_model_results["f1"] > base_model_results["f1"], (
+        f"F1 score should be higher for the trained model: {trained_model_results['f1']} > {base_model_results['f1']}"
+    )
 
     return base_model_results, trained_model_results
 
@@ -142,17 +142,17 @@ if __name__ == "__main__":
     baseline_not_trained, baseline_trained = train_baseline()
     accelerator_not_trained, accelerator_trained = train_integration()
 
-    assert (
-        baseline_not_trained["accuracy"] == accelerator_not_trained["accuracy"]
-    ), f'Accuracy should be the same for the baseline and accelerator: {baseline_not_trained["accuracy"]} == {accelerator_not_trained["accuracy"]}'
-    assert (
-        baseline_not_trained["f1"] == accelerator_not_trained["f1"]
-    ), f'F1 score should be the same for the baseline and accelerator: {baseline_not_trained["f1"]} == {accelerator_not_trained["f1"]}'
-    assert (
-        baseline_trained["accuracy"] == accelerator_trained["accuracy"]
-    ), f'Accuracy should be the same for the baseline and accelerator: {baseline_trained["accuracy"]} == {accelerator_trained["accuracy"]}'
-    assert (
-        baseline_trained["f1"] == accelerator_trained["f1"]
-    ), f'F1 score should be the same for the baseline and accelerator: {baseline_trained["f1"]} == {accelerator_trained["f1"]}'
+    assert baseline_not_trained["accuracy"] == accelerator_not_trained["accuracy"], (
+        f"Accuracy should be the same for the baseline and accelerator: {baseline_not_trained['accuracy']} == {accelerator_not_trained['accuracy']}"
+    )
+    assert baseline_not_trained["f1"] == accelerator_not_trained["f1"], (
+        f"F1 score should be the same for the baseline and accelerator: {baseline_not_trained['f1']} == {accelerator_not_trained['f1']}"
+    )
+    assert baseline_trained["accuracy"] == accelerator_trained["accuracy"], (
+        f"Accuracy should be the same for the baseline and accelerator: {baseline_trained['accuracy']} == {accelerator_trained['accuracy']}"
+    )
+    assert baseline_trained["f1"] == accelerator_trained["f1"], (
+        f"F1 score should be the same for the baseline and accelerator: {baseline_trained['f1']} == {accelerator_trained['f1']}"
+    )
 
     torch.distributed.destroy_process_group()
diff --git a/benchmarks/fp8/torchao/distrib_deepspeed.py b/benchmarks/fp8/torchao/distrib_deepspeed.py
index 6fc2080b..f76d1c66 100644
--- a/benchmarks/fp8/torchao/distrib_deepspeed.py
+++ b/benchmarks/fp8/torchao/distrib_deepspeed.py
@@ -126,12 +126,12 @@ def train_baseline(zero_stage: int = 1):
 
     trained_model_results = evaluate_model(model, eval_dataloader, METRIC, accelerator=accelerator)
     model.destroy()
-    assert (
-        trained_model_results["accuracy"] > base_model_results["accuracy"]
-    ), f'Accuracy should be higher for the trained model: {trained_model_results["accuracy"]} > {base_model_results["accuracy"]}'
-    assert (
-        trained_model_results["f1"] > base_model_results["f1"]
-    ), f'F1 score should be higher for the trained model: {trained_model_results["f1"]} > {base_model_results["f1"]}'
+    assert trained_model_results["accuracy"] > base_model_results["accuracy"], (
+        f"Accuracy should be higher for the trained model: {trained_model_results['accuracy']} > {base_model_results['accuracy']}"
+    )
+    assert trained_model_results["f1"] > base_model_results["f1"], (
+        f"F1 score should be higher for the trained model: {trained_model_results['f1']} > {base_model_results['f1']}"
+    )
 
     del config
     return base_model_results, trained_model_results, model_outputs, data
@@ -180,12 +180,12 @@ def train_integration(zero_stage: int = 1):
 
     trained_model_results = evaluate_model(model, eval_dataloader, METRIC, accelerator=accelerator)
     model.destroy()
-    assert (
-        trained_model_results["accuracy"] > base_model_results["accuracy"]
-    ), f'Accuracy should be higher for the trained model: {trained_model_results["accuracy"]} > {base_model_results["accuracy"]}'
-    assert (
-        trained_model_results["f1"] > base_model_results["f1"]
-    ), f'F1 score should be higher for the trained model: {trained_model_results["f1"]} > {base_model_results["f1"]}'
+    assert trained_model_results["accuracy"] > base_model_results["accuracy"], (
+        f"Accuracy should be higher for the trained model: {trained_model_results['accuracy']} > {base_model_results['accuracy']}"
+    )
+    assert trained_model_results["f1"] > base_model_results["f1"], (
+        f"F1 score should be higher for the trained model: {trained_model_results['f1']} > {base_model_results['f1']}"
+    )
 
     del config
     return base_model_results, trained_model_results, model_outputs, data
@@ -197,17 +197,17 @@ if __name__ == "__main__":
         accelerator_not_trained, accelerator_trained, accelerator_outputs, accelerator_data = train_integration(
             zero_stage
         )
-        assert (
-            baseline_not_trained["accuracy"] == accelerator_not_trained["accuracy"]
-        ), f'ZERO stage {zero_stage}: Accuracy should be the same for the baseline and accelerator: {baseline_not_trained["accuracy"]} == {accelerator_not_trained["accuracy"]}'
-        assert (
-            baseline_not_trained["f1"] == accelerator_not_trained["f1"]
-        ), f'ZERO stage {zero_stage}: F1 score should be the same for the baseline and accelerator: {baseline_not_trained["f1"]} == {accelerator_not_trained["f1"]}'
-        assert (
-            baseline_trained["accuracy"] == accelerator_trained["accuracy"]
-        ), f'ZERO stage {zero_stage}: Accuracy should be the same for the baseline and accelerator: {baseline_trained["accuracy"]} == {accelerator_trained["accuracy"]}'
-        assert (
-            baseline_trained["f1"] == accelerator_trained["f1"]
-        ), f'ZERO stage {zero_stage}: F1 score should be the same for the baseline and accelerator: {baseline_trained["f1"]} == {accelerator_trained["f1"]}'
+        assert baseline_not_trained["accuracy"] == accelerator_not_trained["accuracy"], (
+            f"ZERO stage {zero_stage}: Accuracy should be the same for the baseline and accelerator: {baseline_not_trained['accuracy']} == {accelerator_not_trained['accuracy']}"
+        )
+        assert baseline_not_trained["f1"] == accelerator_not_trained["f1"], (
+            f"ZERO stage {zero_stage}: F1 score should be the same for the baseline and accelerator: {baseline_not_trained['f1']} == {accelerator_not_trained['f1']}"
+        )
+        assert baseline_trained["accuracy"] == accelerator_trained["accuracy"], (
+            f"ZERO stage {zero_stage}: Accuracy should be the same for the baseline and accelerator: {baseline_trained['accuracy']} == {accelerator_trained['accuracy']}"
+        )
+        assert baseline_trained["f1"] == accelerator_trained["f1"], (
+            f"ZERO stage {zero_stage}: F1 score should be the same for the baseline and accelerator: {baseline_trained['f1']} == {accelerator_trained['f1']}"
+        )
         AcceleratorState()._reset_state(True)
     torch.distributed.destroy_process_group()
diff --git a/benchmarks/fp8/torchao/fsdp.py b/benchmarks/fp8/torchao/fsdp.py
index 42eedb48..fbac6e11 100644
--- a/benchmarks/fp8/torchao/fsdp.py
+++ b/benchmarks/fp8/torchao/fsdp.py
@@ -106,12 +106,12 @@ def train_baseline():
 
     trained_model_results = evaluate_model(model, eval_dataloader, METRIC, accelerator=accelerator)
 
-    assert (
-        trained_model_results["accuracy"] > base_model_results["accuracy"]
-    ), f'Accuracy should be higher for the trained model: {trained_model_results["accuracy"]} > {base_model_results["accuracy"]}'
-    assert (
-        trained_model_results["f1"] > base_model_results["f1"]
-    ), f'F1 score should be higher for the trained model: {trained_model_results["f1"]} > {base_model_results["f1"]}'
+    assert trained_model_results["accuracy"] > base_model_results["accuracy"], (
+        f"Accuracy should be higher for the trained model: {trained_model_results['accuracy']} > {base_model_results['accuracy']}"
+    )
+    assert trained_model_results["f1"] > base_model_results["f1"], (
+        f"F1 score should be higher for the trained model: {trained_model_results['f1']} > {base_model_results['f1']}"
+    )
 
     return base_model_results, trained_model_results
 
@@ -143,12 +143,12 @@ def train_integration():
 
     trained_model_results = evaluate_model(model, eval_dataloader, METRIC, accelerator=accelerator)
 
-    assert (
-        trained_model_results["accuracy"] > base_model_results["accuracy"]
-    ), f'Accuracy should be higher for the trained model: {trained_model_results["accuracy"]} > {base_model_results["accuracy"]}'
-    assert (
-        trained_model_results["f1"] > base_model_results["f1"]
-    ), f'F1 score should be higher for the trained model: {trained_model_results["f1"]} > {base_model_results["f1"]}'
+    assert trained_model_results["accuracy"] > base_model_results["accuracy"], (
+        f"Accuracy should be higher for the trained model: {trained_model_results['accuracy']} > {base_model_results['accuracy']}"
+    )
+    assert trained_model_results["f1"] > base_model_results["f1"], (
+        f"F1 score should be higher for the trained model: {trained_model_results['f1']} > {base_model_results['f1']}"
+    )
 
     return base_model_results, trained_model_results
 
@@ -157,17 +157,17 @@ if __name__ == "__main__":
     baseline_not_trained, baseline_trained = train_baseline()
     accelerator_not_trained, accelerator_trained = train_integration()
 
-    assert (
-        baseline_not_trained["accuracy"] == accelerator_not_trained["accuracy"]
-    ), f'Accuracy should be the same for the baseline and accelerator: {baseline_not_trained["accuracy"]} == {accelerator_not_trained["accuracy"]}'
-    assert (
-        baseline_not_trained["f1"] == accelerator_not_trained["f1"]
-    ), f'F1 score should be the same for the baseline and accelerator: {baseline_not_trained["f1"]} == {accelerator_not_trained["f1"]}'
-    assert (
-        baseline_trained["accuracy"] == accelerator_trained["accuracy"]
-    ), f'Accuracy should be the same for the baseline and accelerator: {baseline_trained["accuracy"]} == {accelerator_trained["accuracy"]}'
-    assert (
-        baseline_trained["f1"] == accelerator_trained["f1"]
-    ), f'F1 score should be the same for the baseline and accelerator: {baseline_trained["f1"]} == {accelerator_trained["f1"]}'
+    assert baseline_not_trained["accuracy"] == accelerator_not_trained["accuracy"], (
+        f"Accuracy should be the same for the baseline and accelerator: {baseline_not_trained['accuracy']} == {accelerator_not_trained['accuracy']}"
+    )
+    assert baseline_not_trained["f1"] == accelerator_not_trained["f1"], (
+        f"F1 score should be the same for the baseline and accelerator: {baseline_not_trained['f1']} == {accelerator_not_trained['f1']}"
+    )
+    assert baseline_trained["accuracy"] == accelerator_trained["accuracy"], (
+        f"Accuracy should be the same for the baseline and accelerator: {baseline_trained['accuracy']} == {accelerator_trained['accuracy']}"
+    )
+    assert baseline_trained["f1"] == accelerator_trained["f1"], (
+        f"F1 score should be the same for the baseline and accelerator: {baseline_trained['f1']} == {accelerator_trained['f1']}"
+    )
 
     torch.distributed.destroy_process_group()
diff --git a/benchmarks/fp8/torchao/non_distributed.py b/benchmarks/fp8/torchao/non_distributed.py
index 7b8e5993..621b87f3 100644
--- a/benchmarks/fp8/torchao/non_distributed.py
+++ b/benchmarks/fp8/torchao/non_distributed.py
@@ -87,12 +87,12 @@ def train_baseline():
 
     trained_model_results = evaluate_model(model, eval_dataloader, METRIC)
 
-    assert (
-        trained_model_results["accuracy"] > base_model_results["accuracy"]
-    ), f'Accuracy should be higher for the trained model: {trained_model_results["accuracy"]} > {base_model_results["accuracy"]}'
-    assert (
-        trained_model_results["f1"] > base_model_results["f1"]
-    ), f'F1 score should be higher for the trained model: {trained_model_results["f1"]} > {base_model_results["f1"]}'
+    assert trained_model_results["accuracy"] > base_model_results["accuracy"], (
+        f"Accuracy should be higher for the trained model: {trained_model_results['accuracy']} > {base_model_results['accuracy']}"
+    )
+    assert trained_model_results["f1"] > base_model_results["f1"], (
+        f"F1 score should be higher for the trained model: {trained_model_results['f1']} > {base_model_results['f1']}"
+    )
 
     return base_model_results, trained_model_results
 
@@ -117,12 +117,12 @@ def train_integration():
 
     trained_model_results = evaluate_model(model, eval_dataloader, METRIC)
 
-    assert (
-        trained_model_results["accuracy"] > base_model_results["accuracy"]
-    ), f'Accuracy should be higher for the trained model: {trained_model_results["accuracy"]} > {base_model_results["accuracy"]}'
-    assert (
-        trained_model_results["f1"] > base_model_results["f1"]
-    ), f'F1 score should be higher for the trained model: {trained_model_results["f1"]} > {base_model_results["f1"]}'
+    assert trained_model_results["accuracy"] > base_model_results["accuracy"], (
+        f"Accuracy should be higher for the trained model: {trained_model_results['accuracy']} > {base_model_results['accuracy']}"
+    )
+    assert trained_model_results["f1"] > base_model_results["f1"], (
+        f"F1 score should be higher for the trained model: {trained_model_results['f1']} > {base_model_results['f1']}"
+    )
 
     return base_model_results, trained_model_results
 
@@ -131,15 +131,15 @@ if __name__ == "__main__":
     baseline_not_trained, baseline_trained = train_baseline()
     AcceleratorState._reset_state(True)
     accelerator_not_trained, accelerator_trained = train_integration()
-    assert (
-        baseline_not_trained["accuracy"] == accelerator_not_trained["accuracy"]
-    ), f'Accuracy should be the same for the baseline and accelerator: {baseline_not_trained["accuracy"]} == {accelerator_not_trained["accuracy"]}'
-    assert (
-        baseline_not_trained["f1"] == accelerator_not_trained["f1"]
-    ), f'F1 score should be the same for the baseline and accelerator: {baseline_not_trained["f1"]} == {accelerator_not_trained["f1"]}'
-    assert (
-        baseline_trained["accuracy"] == accelerator_trained["accuracy"]
-    ), f'Accuracy should be the same for the baseline and accelerator: {baseline_trained["accuracy"]} == {accelerator_trained["accuracy"]}'
-    assert (
-        baseline_trained["f1"] == accelerator_trained["f1"]
-    ), f'F1 score should be the same for the baseline and accelerator: {baseline_trained["f1"]} == {accelerator_trained["f1"]}'
+    assert baseline_not_trained["accuracy"] == accelerator_not_trained["accuracy"], (
+        f"Accuracy should be the same for the baseline and accelerator: {baseline_not_trained['accuracy']} == {accelerator_not_trained['accuracy']}"
+    )
+    assert baseline_not_trained["f1"] == accelerator_not_trained["f1"], (
+        f"F1 score should be the same for the baseline and accelerator: {baseline_not_trained['f1']} == {accelerator_not_trained['f1']}"
+    )
+    assert baseline_trained["accuracy"] == accelerator_trained["accuracy"], (
+        f"Accuracy should be the same for the baseline and accelerator: {baseline_trained['accuracy']} == {accelerator_trained['accuracy']}"
+    )
+    assert baseline_trained["f1"] == accelerator_trained["f1"], (
+        f"F1 score should be the same for the baseline and accelerator: {baseline_trained['f1']} == {accelerator_trained['f1']}"
+    )
diff --git a/benchmarks/fp8/transformer_engine/ddp.py b/benchmarks/fp8/transformer_engine/ddp.py
index ba708a27..2cab285e 100644
--- a/benchmarks/fp8/transformer_engine/ddp.py
+++ b/benchmarks/fp8/transformer_engine/ddp.py
@@ -79,12 +79,12 @@ def train_baseline():
 
     trained_model_results = evaluate_model(model, eval_dataloader, METRIC, accelerator=accelerator)
 
-    assert (
-        trained_model_results["accuracy"] > base_model_results["accuracy"]
-    ), f'Accuracy should be higher for the trained model: {trained_model_results["accuracy"]} > {base_model_results["accuracy"]}'
-    assert (
-        trained_model_results["f1"] > base_model_results["f1"]
-    ), f'F1 score should be higher for the trained model: {trained_model_results["f1"]} > {base_model_results["f1"]}'
+    assert trained_model_results["accuracy"] > base_model_results["accuracy"], (
+        f"Accuracy should be higher for the trained model: {trained_model_results['accuracy']} > {base_model_results['accuracy']}"
+    )
+    assert trained_model_results["f1"] > base_model_results["f1"], (
+        f"F1 score should be higher for the trained model: {trained_model_results['f1']} > {base_model_results['f1']}"
+    )
 
     return base_model_results, trained_model_results
 
@@ -114,12 +114,12 @@ def train_integration():
 
     trained_model_results = evaluate_model(model, eval_dataloader, METRIC, accelerator=accelerator)
 
-    assert (
-        trained_model_results["accuracy"] > base_model_results["accuracy"]
-    ), f'Accuracy should be higher for the trained model: {trained_model_results["accuracy"]} > {base_model_results["accuracy"]}'
-    assert (
-        trained_model_results["f1"] > base_model_results["f1"]
-    ), f'F1 score should be higher for the trained model: {trained_model_results["f1"]} > {base_model_results["f1"]}'
+    assert trained_model_results["accuracy"] > base_model_results["accuracy"], (
+        f"Accuracy should be higher for the trained model: {trained_model_results['accuracy']} > {base_model_results['accuracy']}"
+    )
+    assert trained_model_results["f1"] > base_model_results["f1"], (
+        f"F1 score should be higher for the trained model: {trained_model_results['f1']} > {base_model_results['f1']}"
+    )
 
     return base_model_results, trained_model_results
 
@@ -128,17 +128,17 @@ if __name__ == "__main__":
     baseline_not_trained, baseline_trained = train_baseline()
     accelerator_not_trained, accelerator_trained = train_integration()
 
-    assert (
-        baseline_not_trained["accuracy"] == accelerator_not_trained["accuracy"]
-    ), f'Accuracy should be the same for the baseline and accelerator: {baseline_not_trained["accuracy"]} == {accelerator_not_trained["accuracy"]}'
-    assert (
-        baseline_not_trained["f1"] == accelerator_not_trained["f1"]
-    ), f'F1 score should be the same for the baseline and accelerator: {baseline_not_trained["f1"]} == {accelerator_not_trained["f1"]}'
-    assert (
-        baseline_trained["accuracy"] == accelerator_trained["accuracy"]
-    ), f'Accuracy should be the same for the baseline and accelerator: {baseline_trained["accuracy"]} == {accelerator_trained["accuracy"]}'
-    assert (
-        baseline_trained["f1"] == accelerator_trained["f1"]
-    ), f'F1 score should be the same for the baseline and accelerator: {baseline_trained["f1"]} == {accelerator_trained["f1"]}'
+    assert baseline_not_trained["accuracy"] == accelerator_not_trained["accuracy"], (
+        f"Accuracy should be the same for the baseline and accelerator: {baseline_not_trained['accuracy']} == {accelerator_not_trained['accuracy']}"
+    )
+    assert baseline_not_trained["f1"] == accelerator_not_trained["f1"], (
+        f"F1 score should be the same for the baseline and accelerator: {baseline_not_trained['f1']} == {accelerator_not_trained['f1']}"
+    )
+    assert baseline_trained["accuracy"] == accelerator_trained["accuracy"], (
+        f"Accuracy should be the same for the baseline and accelerator: {baseline_trained['accuracy']} == {accelerator_trained['accuracy']}"
+    )
+    assert baseline_trained["f1"] == accelerator_trained["f1"], (
+        f"F1 score should be the same for the baseline and accelerator: {baseline_trained['f1']} == {accelerator_trained['f1']}"
+    )
 
     torch.distributed.destroy_process_group()
diff --git a/benchmarks/fp8/transformer_engine/distrib_deepspeed.py b/benchmarks/fp8/transformer_engine/distrib_deepspeed.py
index 73953b67..6d64bf4f 100644
--- a/benchmarks/fp8/transformer_engine/distrib_deepspeed.py
+++ b/benchmarks/fp8/transformer_engine/distrib_deepspeed.py
@@ -113,12 +113,12 @@ def train_baseline(zero_stage: int = 1):
 
     trained_model_results = evaluate_model(model, eval_dataloader, METRIC, accelerator=accelerator)
     model.destroy()
-    assert (
-        trained_model_results["accuracy"] > base_model_results["accuracy"]
-    ), f'Accuracy should be higher for the trained model: {trained_model_results["accuracy"]} > {base_model_results["accuracy"]}'
-    assert (
-        trained_model_results["f1"] > base_model_results["f1"]
-    ), f'F1 score should be higher for the trained model: {trained_model_results["f1"]} > {base_model_results["f1"]}'
+    assert trained_model_results["accuracy"] > base_model_results["accuracy"], (
+        f"Accuracy should be higher for the trained model: {trained_model_results['accuracy']} > {base_model_results['accuracy']}"
+    )
+    assert trained_model_results["f1"] > base_model_results["f1"], (
+        f"F1 score should be higher for the trained model: {trained_model_results['f1']} > {base_model_results['f1']}"
+    )
 
     return base_model_results, trained_model_results, model_outputs, data
 
@@ -159,12 +159,12 @@ def train_integration(zero_stage: int = 1):
 
     trained_model_results = evaluate_model(model, eval_dataloader, METRIC, accelerator=accelerator)
     model.destroy()
-    assert (
-        trained_model_results["accuracy"] > base_model_results["accuracy"]
-    ), f'Accuracy should be higher for the trained model: {trained_model_results["accuracy"]} > {base_model_results["accuracy"]}'
-    assert (
-        trained_model_results["f1"] > base_model_results["f1"]
-    ), f'F1 score should be higher for the trained model: {trained_model_results["f1"]} > {base_model_results["f1"]}'
+    assert trained_model_results["accuracy"] > base_model_results["accuracy"], (
+        f"Accuracy should be higher for the trained model: {trained_model_results['accuracy']} > {base_model_results['accuracy']}"
+    )
+    assert trained_model_results["f1"] > base_model_results["f1"], (
+        f"F1 score should be higher for the trained model: {trained_model_results['f1']} > {base_model_results['f1']}"
+    )
 
     return base_model_results, trained_model_results, model_outputs, data
 
@@ -175,17 +175,17 @@ if __name__ == "__main__":
         accelerator_not_trained, accelerator_trained, accelerator_outputs, accelerator_data = train_integration(
             zero_stage
         )
-        assert (
-            baseline_not_trained["accuracy"] == accelerator_not_trained["accuracy"]
-        ), f'ZERO stage {zero_stage}: Accuracy should be the same for the baseline and accelerator: {baseline_not_trained["accuracy"]} == {accelerator_not_trained["accuracy"]}'
-        assert (
-            baseline_not_trained["f1"] == accelerator_not_trained["f1"]
-        ), f'ZERO stage {zero_stage}: F1 score should be the same for the baseline and accelerator: {baseline_not_trained["f1"]} == {accelerator_not_trained["f1"]}'
-        assert (
-            baseline_trained["accuracy"] == accelerator_trained["accuracy"]
-        ), f'ZERO stage {zero_stage}: Accuracy should be the same for the baseline and accelerator: {baseline_trained["accuracy"]} == {accelerator_trained["accuracy"]}'
-        assert (
-            baseline_trained["f1"] == accelerator_trained["f1"]
-        ), f'ZERO stage {zero_stage}: F1 score should be the same for the baseline and accelerator: {baseline_trained["f1"]} == {accelerator_trained["f1"]}'
+        assert baseline_not_trained["accuracy"] == accelerator_not_trained["accuracy"], (
+            f"ZERO stage {zero_stage}: Accuracy should be the same for the baseline and accelerator: {baseline_not_trained['accuracy']} == {accelerator_not_trained['accuracy']}"
+        )
+        assert baseline_not_trained["f1"] == accelerator_not_trained["f1"], (
+            f"ZERO stage {zero_stage}: F1 score should be the same for the baseline and accelerator: {baseline_not_trained['f1']} == {accelerator_not_trained['f1']}"
+        )
+        assert baseline_trained["accuracy"] == accelerator_trained["accuracy"], (
+            f"ZERO stage {zero_stage}: Accuracy should be the same for the baseline and accelerator: {baseline_trained['accuracy']} == {accelerator_trained['accuracy']}"
+        )
+        assert baseline_trained["f1"] == accelerator_trained["f1"], (
+            f"ZERO stage {zero_stage}: F1 score should be the same for the baseline and accelerator: {baseline_trained['f1']} == {accelerator_trained['f1']}"
+        )
 
         torch.distributed.destroy_process_group()
diff --git a/benchmarks/fp8/transformer_engine/fsdp.py b/benchmarks/fp8/transformer_engine/fsdp.py
index 41812218..26aca70a 100644
--- a/benchmarks/fp8/transformer_engine/fsdp.py
+++ b/benchmarks/fp8/transformer_engine/fsdp.py
@@ -91,12 +91,12 @@ def train_baseline():
 
     trained_model_results = evaluate_model(model, eval_dataloader, METRIC, accelerator=accelerator)
 
-    assert (
-        trained_model_results["accuracy"] > base_model_results["accuracy"]
-    ), f'Accuracy should be higher for the trained model: {trained_model_results["accuracy"]} > {base_model_results["accuracy"]}'
-    assert (
-        trained_model_results["f1"] > base_model_results["f1"]
-    ), f'F1 score should be higher for the trained model: {trained_model_results["f1"]} > {base_model_results["f1"]}'
+    assert trained_model_results["accuracy"] > base_model_results["accuracy"], (
+        f"Accuracy should be higher for the trained model: {trained_model_results['accuracy']} > {base_model_results['accuracy']}"
+    )
+    assert trained_model_results["f1"] > base_model_results["f1"], (
+        f"F1 score should be higher for the trained model: {trained_model_results['f1']} > {base_model_results['f1']}"
+    )
 
     return base_model_results, trained_model_results
 
@@ -131,12 +131,12 @@ def train_integration():
 
     trained_model_results = evaluate_model(model, eval_dataloader, METRIC, accelerator=accelerator)
 
-    assert (
-        trained_model_results["accuracy"] > base_model_results["accuracy"]
-    ), f'Accuracy should be higher for the trained model: {trained_model_results["accuracy"]} > {base_model_results["accuracy"]}'
-    assert (
-        trained_model_results["f1"] > base_model_results["f1"]
-    ), f'F1 score should be higher for the trained model: {trained_model_results["f1"]} > {base_model_results["f1"]}'
+    assert trained_model_results["accuracy"] > base_model_results["accuracy"], (
+        f"Accuracy should be higher for the trained model: {trained_model_results['accuracy']} > {base_model_results['accuracy']}"
+    )
+    assert trained_model_results["f1"] > base_model_results["f1"], (
+        f"F1 score should be higher for the trained model: {trained_model_results['f1']} > {base_model_results['f1']}"
+    )
 
     return base_model_results, trained_model_results
 
@@ -145,17 +145,17 @@ if __name__ == "__main__":
     baseline_not_trained, baseline_trained = train_baseline()
     accelerator_not_trained, accelerator_trained = train_integration()
 
-    assert (
-        baseline_not_trained["accuracy"] == accelerator_not_trained["accuracy"]
-    ), f'Accuracy should be the same for the baseline and accelerator: {baseline_not_trained["accuracy"]} == {accelerator_not_trained["accuracy"]}'
-    assert (
-        baseline_not_trained["f1"] == accelerator_not_trained["f1"]
-    ), f'F1 score should be the same for the baseline and accelerator: {baseline_not_trained["f1"]} == {accelerator_not_trained["f1"]}'
-    assert (
-        baseline_trained["accuracy"] == accelerator_trained["accuracy"]
-    ), f'Accuracy should be the same for the baseline and accelerator: {baseline_trained["accuracy"]} == {accelerator_trained["accuracy"]}'
-    assert (
-        baseline_trained["f1"] == accelerator_trained["f1"]
-    ), f'F1 score should be the same for the baseline and accelerator: {baseline_trained["f1"]} == {accelerator_trained["f1"]}'
+    assert baseline_not_trained["accuracy"] == accelerator_not_trained["accuracy"], (
+        f"Accuracy should be the same for the baseline and accelerator: {baseline_not_trained['accuracy']} == {accelerator_not_trained['accuracy']}"
+    )
+    assert baseline_not_trained["f1"] == accelerator_not_trained["f1"], (
+        f"F1 score should be the same for the baseline and accelerator: {baseline_not_trained['f1']} == {accelerator_not_trained['f1']}"
+    )
+    assert baseline_trained["accuracy"] == accelerator_trained["accuracy"], (
+        f"Accuracy should be the same for the baseline and accelerator: {baseline_trained['accuracy']} == {accelerator_trained['accuracy']}"
+    )
+    assert baseline_trained["f1"] == accelerator_trained["f1"], (
+        f"F1 score should be the same for the baseline and accelerator: {baseline_trained['f1']} == {accelerator_trained['f1']}"
+    )
 
     torch.distributed.destroy_process_group()
diff --git a/benchmarks/fp8/transformer_engine/non_distributed.py b/benchmarks/fp8/transformer_engine/non_distributed.py
index 71d577c4..8d647bfd 100644
--- a/benchmarks/fp8/transformer_engine/non_distributed.py
+++ b/benchmarks/fp8/transformer_engine/non_distributed.py
@@ -70,12 +70,12 @@ def train_baseline():
 
     trained_model_results = evaluate_model(model, eval_dataloader, METRIC)
 
-    assert (
-        trained_model_results["accuracy"] > base_model_results["accuracy"]
-    ), f'Accuracy should be higher for the trained model: {trained_model_results["accuracy"]} > {base_model_results["accuracy"]}'
-    assert (
-        trained_model_results["f1"] > base_model_results["f1"]
-    ), f'F1 score should be higher for the trained model: {trained_model_results["f1"]} > {base_model_results["f1"]}'
+    assert trained_model_results["accuracy"] > base_model_results["accuracy"], (
+        f"Accuracy should be higher for the trained model: {trained_model_results['accuracy']} > {base_model_results['accuracy']}"
+    )
+    assert trained_model_results["f1"] > base_model_results["f1"], (
+        f"F1 score should be higher for the trained model: {trained_model_results['f1']} > {base_model_results['f1']}"
+    )
 
     return base_model_results, trained_model_results
 
@@ -104,12 +104,12 @@ def train_integration():
 
     trained_model_results = evaluate_model(model, eval_dataloader, METRIC)
 
-    assert (
-        trained_model_results["accuracy"] > base_model_results["accuracy"]
-    ), f'Accuracy should be higher for the trained model: {trained_model_results["accuracy"]} > {base_model_results["accuracy"]}'
-    assert (
-        trained_model_results["f1"] > base_model_results["f1"]
-    ), f'F1 score should be higher for the trained model: {trained_model_results["f1"]} > {base_model_results["f1"]}'
+    assert trained_model_results["accuracy"] > base_model_results["accuracy"], (
+        f"Accuracy should be higher for the trained model: {trained_model_results['accuracy']} > {base_model_results['accuracy']}"
+    )
+    assert trained_model_results["f1"] > base_model_results["f1"], (
+        f"F1 score should be higher for the trained model: {trained_model_results['f1']} > {base_model_results['f1']}"
+    )
 
     return base_model_results, trained_model_results
 
@@ -118,15 +118,15 @@ if __name__ == "__main__":
     baseline_not_trained, baseline_trained = train_baseline()
     accelerator_not_trained, accelerator_trained = train_integration()
 
-    assert (
-        baseline_not_trained["accuracy"] == accelerator_not_trained["accuracy"]
-    ), f'Accuracy should be the same for the baseline and accelerator: {baseline_not_trained["accuracy"]} == {accelerator_not_trained["accuracy"]}'
-    assert (
-        baseline_not_trained["f1"] == accelerator_not_trained["f1"]
-    ), f'F1 score should be the same for the baseline and accelerator: {baseline_not_trained["f1"]} == {accelerator_not_trained["f1"]}'
-    assert (
-        baseline_trained["accuracy"] == accelerator_trained["accuracy"]
-    ), f'Accuracy should be the same for the baseline and accelerator: {baseline_trained["accuracy"]} == {accelerator_trained["accuracy"]}'
-    assert (
-        baseline_trained["f1"] == accelerator_trained["f1"]
-    ), f'F1 score should be the same for the baseline and accelerator: {baseline_trained["f1"]} == {accelerator_trained["f1"]}'
+    assert baseline_not_trained["accuracy"] == accelerator_not_trained["accuracy"], (
+        f"Accuracy should be the same for the baseline and accelerator: {baseline_not_trained['accuracy']} == {accelerator_not_trained['accuracy']}"
+    )
+    assert baseline_not_trained["f1"] == accelerator_not_trained["f1"], (
+        f"F1 score should be the same for the baseline and accelerator: {baseline_not_trained['f1']} == {accelerator_not_trained['f1']}"
+    )
+    assert baseline_trained["accuracy"] == accelerator_trained["accuracy"], (
+        f"Accuracy should be the same for the baseline and accelerator: {baseline_trained['accuracy']} == {accelerator_trained['accuracy']}"
+    )
+    assert baseline_trained["f1"] == accelerator_trained["f1"], (
+        f"F1 score should be the same for the baseline and accelerator: {baseline_trained['f1']} == {accelerator_trained['f1']}"
+    )
diff --git a/benchmarks/fsdp2/main.py b/benchmarks/fsdp2/main.py
index ce96c652..50c60ea3 100644
--- a/benchmarks/fsdp2/main.py
+++ b/benchmarks/fsdp2/main.py
@@ -59,7 +59,7 @@ def evaluate(args, config: dict, init_fn: Callable, run_name: str) -> torch.Tens
 Loss: {loss[-1].item()}
 Peak Allocated Memory: {float(memory_tracker.peak_allocated_memory):.2f} MB
 Peak Reserved Memory: {float(memory_tracker.peak_reserved_memory):.2f} MB
-{'-' * 34}"""
+{"-" * 34}"""
     accelerator.print(msg)
     return loss
 
diff --git a/examples/by_feature/megatron_lm_gpt_pretraining.py b/examples/by_feature/megatron_lm_gpt_pretraining.py
index c9d4787e..b357106e 100644
--- a/examples/by_feature/megatron_lm_gpt_pretraining.py
+++ b/examples/by_feature/megatron_lm_gpt_pretraining.py
@@ -611,7 +611,7 @@ def main():
 
             if isinstance(checkpointing_steps, int):
                 if completed_steps % checkpointing_steps == 0:
-                    output_dir = f"step_{completed_steps }"
+                    output_dir = f"step_{completed_steps}"
                     if args.output_dir is not None:
                         output_dir = os.path.join(args.output_dir, output_dir)
                     accelerator.save_state(output_dir)
diff --git a/setup.py b/setup.py
index 1cbb9872..6718ba6d 100644
--- a/setup.py
+++ b/setup.py
@@ -19,7 +19,7 @@ extras = {}
 extras["quality"] = [
     "black ~= 23.1",  # hf-doc-builder has a hidden dependency on `black`
     "hf-doc-builder >= 0.3.0",
-    "ruff ~= 0.6.4",
+    "ruff ~= 0.11.2",
 ]
 extras["docs"] = []
 extras["test_prod"] = ["pytest>=7.2.0,<=8.0.0", "pytest-xdist", "pytest-subtests", "parameterized", "pytest-order"]
diff --git a/src/accelerate/accelerator.py b/src/accelerate/accelerator.py
index 9f4e73b5..74897906 100755
--- a/src/accelerate/accelerator.py
+++ b/src/accelerate/accelerator.py
@@ -445,9 +445,9 @@ class Accelerator:
         self.has_fp8_handler = False
         if kwargs_handlers is not None:
             for handler in kwargs_handlers:
-                assert isinstance(
-                    handler, KwargsHandler
-                ), f"Unsupported kwargs handler passed: {handler}, must be one that inherits `accelerate.utils.KwargsHandler`."
+                assert isinstance(handler, KwargsHandler), (
+                    f"Unsupported kwargs handler passed: {handler}, must be one that inherits `accelerate.utils.KwargsHandler`."
+                )
                 # Add the handler class to the set of found handlers
                 if handler.__class__ in found_handlers:
                     raise ValueError(f"You can only pass one {handler.__class__} in `kwargs_handlers`.")
diff --git a/src/accelerate/commands/config/cluster.py b/src/accelerate/commands/config/cluster.py
index e62796ac..8ef82cfd 100644
--- a/src/accelerate/commands/config/cluster.py
+++ b/src/accelerate/commands/config/cluster.py
@@ -228,9 +228,9 @@ def get_cluster_input():
         )
         if use_deepspeed:
             distributed_type = DistributedType.DEEPSPEED
-            assert (
-                is_deepspeed_available()
-            ), "DeepSpeed is not installed => run `pip3 install deepspeed` or build it from source"
+            assert is_deepspeed_available(), (
+                "DeepSpeed is not installed => run `pip3 install deepspeed` or build it from source"
+            )
 
         if distributed_type == DistributedType.DEEPSPEED:
             use_deepspeed_config = _ask_field(
diff --git a/src/accelerate/test_utils/scripts/external_deps/test_checkpointing.py b/src/accelerate/test_utils/scripts/external_deps/test_checkpointing.py
index 7b86a40b..6a155389 100644
--- a/src/accelerate/test_utils/scripts/external_deps/test_checkpointing.py
+++ b/src/accelerate/test_utils/scripts/external_deps/test_checkpointing.py
@@ -184,12 +184,12 @@ def training_function(config, args):
         with open(os.path.join(args.output_dir, f"state_{starting_epoch - 1}.json")) as f:
             resumed_state = json.load(f)
             assert resumed_state["accuracy"] == accuracy, "Accuracy mismatch, loading from checkpoint failed"
-            assert (
-                resumed_state["lr"] == lr_scheduler.get_lr()[0]
-            ), "Scheduler learning rate mismatch, loading from checkpoint failed"
-            assert (
-                resumed_state["optimizer_lr"] == optimizer.param_groups[0]["lr"]
-            ), "Optimizer learning rate mismatch, loading from checkpoint failed"
+            assert resumed_state["lr"] == lr_scheduler.get_lr()[0], (
+                "Scheduler learning rate mismatch, loading from checkpoint failed"
+            )
+            assert resumed_state["optimizer_lr"] == optimizer.param_groups[0]["lr"], (
+                "Optimizer learning rate mismatch, loading from checkpoint failed"
+            )
             assert resumed_state["epoch"] == starting_epoch - 1, "Epoch mismatch, loading from checkpoint failed"
             return
 
diff --git a/src/accelerate/test_utils/scripts/external_deps/test_metrics.py b/src/accelerate/test_utils/scripts/external_deps/test_metrics.py
index dfbf5c9f..d1bfe351 100755
--- a/src/accelerate/test_utils/scripts/external_deps/test_metrics.py
+++ b/src/accelerate/test_utils/scripts/external_deps/test_metrics.py
@@ -115,9 +115,9 @@ def test_torch_metrics(
 ):
     _, ddp_model, dataloader = get_basic_setup(accelerator, num_samples, batch_size)
     logits, _ = generate_predictions(ddp_model, dataloader, accelerator)
-    assert (
-        len(logits) == num_samples
-    ), f"Unexpected number of inputs:\n    Expected: {num_samples}\n    Actual: {len(logits)}"
+    assert len(logits) == num_samples, (
+        f"Unexpected number of inputs:\n    Expected: {num_samples}\n    Actual: {len(logits)}"
+    )
 
 
 def test_mrpc(dispatch_batches: bool = False, split_batches: bool = False):
@@ -148,9 +148,9 @@ def test_mrpc(dispatch_batches: bool = False, split_batches: bool = False):
     distributed = metric.compute()
 
     for key in "accuracy f1".split():
-        assert math.isclose(
-            baseline[key], distributed[key]
-        ), f"Baseline and Distributed are not the same for key {key}:\n\tBaseline: {baseline[key]}\n\tDistributed: {distributed[key]}\n"
+        assert math.isclose(baseline[key], distributed[key]), (
+            f"Baseline and Distributed are not the same for key {key}:\n\tBaseline: {baseline[key]}\n\tDistributed: {distributed[key]}\n"
+        )
 
 
 def test_gather_for_metrics_with_non_tensor_objects_iterable_dataset():
@@ -235,9 +235,9 @@ def test_gather_for_metrics_drop_last():
 
     # Should return a full set of complete batches from each GPU
     num_expected_items = per_device_batch_size * accelerator.num_processes
-    assert gathered_items.size(0) == (
-        num_expected_items
-    ), f"Expected number of items: {num_expected_items}, Actual: {gathered_items.size(0)}"
+    assert gathered_items.size(0) == (num_expected_items), (
+        f"Expected number of items: {num_expected_items}, Actual: {gathered_items.size(0)}"
+    )
 
 
 def main():
diff --git a/src/accelerate/test_utils/scripts/external_deps/test_peak_memory_usage.py b/src/accelerate/test_utils/scripts/external_deps/test_peak_memory_usage.py
index ef412e6d..723e5497 100644
--- a/src/accelerate/test_utils/scripts/external_deps/test_peak_memory_usage.py
+++ b/src/accelerate/test_utils/scripts/external_deps/test_peak_memory_usage.py
@@ -255,9 +255,9 @@ def training_function(config, args):
         )
         train_total_peak_memory[f"epoch-{epoch}"] = tracemalloc.peaked + b2mb(tracemalloc.begin)
         if args.peak_memory_upper_bound is not None:
-            assert (
-                train_total_peak_memory[f"epoch-{epoch}"] <= args.peak_memory_upper_bound
-            ), "Peak memory usage exceeded the upper bound"
+            assert train_total_peak_memory[f"epoch-{epoch}"] <= args.peak_memory_upper_bound, (
+                "Peak memory usage exceeded the upper bound"
+            )
 
     accelerator.wait_for_everyone()
     if accelerator.is_main_process:
diff --git a/src/accelerate/test_utils/scripts/external_deps/test_performance.py b/src/accelerate/test_utils/scripts/external_deps/test_performance.py
index 18b36738..60c6cdfd 100644
--- a/src/accelerate/test_utils/scripts/external_deps/test_performance.py
+++ b/src/accelerate/test_utils/scripts/external_deps/test_performance.py
@@ -161,9 +161,9 @@ def training_function(config, args):
                     and linear_decay_scheduler
                     and accelerator.state.mixed_precision == "no"
                 ):
-                    assert (
-                        lr_scheduler.get_last_lr()[0] == expected_lr_after_first_optim_step
-                    ), f"Wrong lr found at second step, expected {expected_lr_after_first_optim_step}, got {lr_scheduler.get_last_lr()[0]}"
+                    assert lr_scheduler.get_last_lr()[0] == expected_lr_after_first_optim_step, (
+                        f"Wrong lr found at second step, expected {expected_lr_after_first_optim_step}, got {lr_scheduler.get_last_lr()[0]}"
+                    )
                     lr_scheduler_check_completed = True
 
         model.eval()
@@ -199,14 +199,14 @@ def training_function(config, args):
 
     # check that the LR is 0
     if linear_decay_scheduler and accelerator.state.mixed_precision == "no":
-        assert (
-            lr_scheduler.get_last_lr()[0] == 0
-        ), f"Wrong lr found at last step, expected 0, got {lr_scheduler.get_last_lr()[0]}"
+        assert lr_scheduler.get_last_lr()[0] == 0, (
+            f"Wrong lr found at last step, expected 0, got {lr_scheduler.get_last_lr()[0]}"
+        )
 
     if args.performance_lower_bound is not None:
-        assert (
-            args.performance_lower_bound <= best_performance
-        ), f"Best performance metric {best_performance} is lower than the lower bound {args.performance_lower_bound}"
+        assert args.performance_lower_bound <= best_performance, (
+            f"Best performance metric {best_performance} is lower than the lower bound {args.performance_lower_bound}"
+        )
 
     accelerator.wait_for_everyone()
     if accelerator.is_main_process:
@@ -216,9 +216,9 @@ def training_function(config, args):
     # Finally try saving the model
     accelerator.save_model(model, args.output_dir)
     accelerator.wait_for_everyone()
-    assert Path(
-        args.output_dir, SAFE_WEIGHTS_NAME
-    ).exists(), "Model was not saved when calling `Accelerator.save_model`"
+    assert Path(args.output_dir, SAFE_WEIGHTS_NAME).exists(), (
+        "Model was not saved when calling `Accelerator.save_model`"
+    )
     accelerator.end_training()
 
 
diff --git a/src/accelerate/test_utils/scripts/test_distributed_data_loop.py b/src/accelerate/test_utils/scripts/test_distributed_data_loop.py
index 641669d0..08cbbeb8 100644
--- a/src/accelerate/test_utils/scripts/test_distributed_data_loop.py
+++ b/src/accelerate/test_utils/scripts/test_distributed_data_loop.py
@@ -270,9 +270,9 @@ def test_data_loader(data_loader, accelerator):
     sorted_all_examples = sorted(all_examples)
 
     # Check if all elements are present in the sorted list of iterated samples
-    assert (
-        len(set(sorted_all_examples)) == NUM_ELEMENTS
-    ), "Not all the dataset elements have been iterated in an epoch due to duplication of samples across processes."
+    assert len(set(sorted_all_examples)) == NUM_ELEMENTS, (
+        "Not all the dataset elements have been iterated in an epoch due to duplication of samples across processes."
+    )
 
 
 def test_stateful_dataloader(accelerator):
diff --git a/src/accelerate/test_utils/scripts/test_script.py b/src/accelerate/test_utils/scripts/test_script.py
index 71891965..6912ba2f 100644
--- a/src/accelerate/test_utils/scripts/test_script.py
+++ b/src/accelerate/test_utils/scripts/test_script.py
@@ -112,9 +112,9 @@ def process_execution_check():
             assert text.startswith("Currently in the main process\n"), "Main process was not first"
             if num_processes > 1:
                 assert text.endswith("Now on another process\n"), "Main process was not first"
-            assert (
-                text.count("Now on another process\n") == accelerator.num_processes - 1
-            ), f"Only wrote to file {text.count('Now on another process') + 1} times, not {accelerator.num_processes}"
+            assert text.count("Now on another process\n") == accelerator.num_processes - 1, (
+                f"Only wrote to file {text.count('Now on another process') + 1} times, not {accelerator.num_processes}"
+            )
         except AssertionError:
             path.unlink()
             raise
@@ -351,13 +351,13 @@ def custom_sampler_check():
     dl = prepare_data_loader(dl, state.device, state.num_processes, state.process_index)
     # We need just ensure that `dl.batch_sampler` (or `dl.batch_sampler.batch_sampler` is indeed the old batch sampler
     if hasattr(dl.batch_sampler, "batch_sampler"):
-        assert isinstance(
-            dl.batch_sampler.batch_sampler, CustomBatchSampler
-        ), "Custom sampler was changed after calling `prepare_data_loader`"
+        assert isinstance(dl.batch_sampler.batch_sampler, CustomBatchSampler), (
+            "Custom sampler was changed after calling `prepare_data_loader`"
+        )
     else:
-        assert isinstance(
-            dl.batch_sampler, CustomBatchSampler
-        ), "Custom sampler was changed after calling `prepare_data_loader`"
+        assert isinstance(dl.batch_sampler, CustomBatchSampler), (
+            "Custom sampler was changed after calling `prepare_data_loader`"
+        )
 
 
 def check_seedable_sampler():
@@ -400,9 +400,9 @@ def check_seedable_sampler_in_batch_sampler_shard():
     )
 
     target_sampler = prepared_data_loader.batch_sampler.batch_sampler.sampler
-    assert isinstance(
-        target_sampler, SeedableRandomSampler
-    ), "Sampler in BatchSamplerShard is not SeedableRandomSampler."
+    assert isinstance(target_sampler, SeedableRandomSampler), (
+        "Sampler in BatchSamplerShard is not SeedableRandomSampler."
+    )
 
 
 def check_seedable_sampler_with_data_seed():
@@ -666,31 +666,31 @@ def test_split_between_processes_dataset(datasets_Dataset):
     state = AcceleratorState()
     data = datasets_Dataset.from_list([dict(k=v) for v in range(2 * state.num_processes)])
     with state.split_between_processes(data, apply_padding=False) as results:
-        assert (
-            len(results) == 2
-        ), f"Each process did not have two items. Process index: {state.process_index}; Length: {len(results)}"
+        assert len(results) == 2, (
+            f"Each process did not have two items. Process index: {state.process_index}; Length: {len(results)}"
+        )
 
     data = datasets_Dataset.from_list([dict(k=v) for v in range(2 * state.num_processes - 1)])
     with state.split_between_processes(data, apply_padding=False) as results:
         if state.is_last_process:
-            assert (
-                len(results) == 1
-            ), f"Last process did not receive a single item. Process index: {state.process_index}; Length: {len(results)}"
+            assert len(results) == 1, (
+                f"Last process did not receive a single item. Process index: {state.process_index}; Length: {len(results)}"
+            )
         else:
-            assert (
-                len(results) == 2
-            ), f"One of the intermediate processes did not receive two items. Process index: {state.process_index}; Length: {len(results)}"
+            assert len(results) == 2, (
+                f"One of the intermediate processes did not receive two items. Process index: {state.process_index}; Length: {len(results)}"
+            )
 
     data = datasets_Dataset.from_list([dict(k=v) for v in range(2 * state.num_processes - 1)])
     with state.split_between_processes(data, apply_padding=True) as results:
         if state.num_processes == 1:
-            assert (
-                len(results) == 1
-            ), f"Single process did not receive a single item. Process index: {state.process_index}; Length: {len(results)}"
+            assert len(results) == 1, (
+                f"Single process did not receive a single item. Process index: {state.process_index}; Length: {len(results)}"
+            )
         else:
-            assert (
-                len(results) == 2
-            ), f"Each process did not have two items. Process index: {state.process_index}; Length: {len(results)}"
+            assert len(results) == 2, (
+                f"Each process did not have two items. Process index: {state.process_index}; Length: {len(results)}"
+            )
 
     state.wait_for_everyone()
 
@@ -699,18 +699,18 @@ def test_split_between_processes_list():
     state = AcceleratorState()
     data = list(range(0, 2 * state.num_processes))
     with state.split_between_processes(data) as results:
-        assert (
-            len(results) == 2
-        ), f"Each process did not have two items. Process index: {state.process_index}; Length: {len(results)}"
+        assert len(results) == 2, (
+            f"Each process did not have two items. Process index: {state.process_index}; Length: {len(results)}"
+        )
 
     data = list(range(0, (3 * state.num_processes) - 1))
     with state.split_between_processes(data, apply_padding=True) as results:
         if state.is_last_process:
             # Test that the last process gets the extra item(s)
             num_samples_per_device = math.ceil(len(data) / state.num_processes)
-            assert (
-                len(results) == num_samples_per_device
-            ), f"Last process did not get the extra item(s). Process index: {state.process_index}; Length: {len(results)}"
+            assert len(results) == num_samples_per_device, (
+                f"Last process did not get the extra item(s). Process index: {state.process_index}; Length: {len(results)}"
+            )
     state.wait_for_everyone()
 
 
@@ -737,17 +737,17 @@ def test_split_between_processes_nested_dict():
             elif state.process_index == 3:
                 assert results["b"] == data_copy["b"][-2:]
             if state.process_index == 0:
-                assert torch.allclose(
-                    results["c"], data_copy["c"][: 8 // state.num_processes]
-                ), f"Did not obtain expected values on process 0, expected `{data['c'][: 8 // state.num_processes]}`, received: {results['c']}"
+                assert torch.allclose(results["c"], data_copy["c"][: 8 // state.num_processes]), (
+                    f"Did not obtain expected values on process 0, expected `{data['c'][: 8 // state.num_processes]}`, received: {results['c']}"
+                )
             elif state.num_processes == 2:
-                assert torch.allclose(
-                    results["c"], data_copy["c"][4:]
-                ), f"Did not obtain expected values on process 2, expected `{data['c'][4:]}`, received: {results['c']}"
+                assert torch.allclose(results["c"], data_copy["c"][4:]), (
+                    f"Did not obtain expected values on process 2, expected `{data['c'][4:]}`, received: {results['c']}"
+                )
             elif state.process_index == 3:
-                assert torch.allclose(
-                    results["c"], data_copy["c"][-2:]
-                ), f"Did not obtain expected values on process 4, expected `{data['c'][-2:]}`, received: {results['c']}"
+                assert torch.allclose(results["c"], data_copy["c"][-2:]), (
+                    f"Did not obtain expected values on process 4, expected `{data['c'][-2:]}`, received: {results['c']}"
+                )
 
     state.wait_for_everyone()
 
@@ -773,13 +773,13 @@ def test_split_between_processes_evenly():
         num_extras = len(data) % state.num_processes
         with state.split_between_processes(data) as results:
             if state.process_index < num_extras:
-                assert (
-                    len(results) == num_samples_per_process + 1
-                ), f"Each Process should have even elements. Expected: {num_samples_per_process + 1}, Actual: {len(results)}"
+                assert len(results) == num_samples_per_process + 1, (
+                    f"Each Process should have even elements. Expected: {num_samples_per_process + 1}, Actual: {len(results)}"
+                )
             else:
-                assert (
-                    len(results) == num_samples_per_process
-                ), f"Each Process should have even elements. Expected: {num_samples_per_process}, Actual: {len(results)}"
+                assert len(results) == num_samples_per_process, (
+                    f"Each Process should have even elements. Expected: {num_samples_per_process}, Actual: {len(results)}"
+                )
     state.wait_for_everyone()
 
 
diff --git a/src/accelerate/test_utils/scripts/test_sync.py b/src/accelerate/test_utils/scripts/test_sync.py
index 02ce0cb7..44e1ecc1 100644
--- a/src/accelerate/test_utils/scripts/test_sync.py
+++ b/src/accelerate/test_utils/scripts/test_sync.py
@@ -32,14 +32,14 @@ def check_model_parameters(model_a, model_b, did_step, iteration, **kwargs):
             continue
         if not did_step:
             # Grads should not be in sync
-            assert (
-                torch.allclose(param.grad, grad_param.grad, **kwargs) is False
-            ), f"Gradients in sync when they should not be at iteration {iteration}:\nmodel_a grad ({param.grad}) == model_b grad ({grad_param.grad})"
+            assert torch.allclose(param.grad, grad_param.grad, **kwargs) is False, (
+                f"Gradients in sync when they should not be at iteration {iteration}:\nmodel_a grad ({param.grad}) == model_b grad ({grad_param.grad})"
+            )
         else:
             # Grads should be in sync
-            assert (
-                torch.allclose(param.grad, grad_param.grad, **kwargs) is True
-            ), f"Gradients not in sync when they should be at iteration {iteration}:\nmodel_a grad ({param.grad}) != model_b grad ({grad_param.grad})"
+            assert torch.allclose(param.grad, grad_param.grad, **kwargs) is True, (
+                f"Gradients not in sync when they should be at iteration {iteration}:\nmodel_a grad ({param.grad}) != model_b grad ({grad_param.grad})"
+            )
 
 
 def step_model(model, input, target, accelerator, do_backward=True):
@@ -101,9 +101,9 @@ def test_noop_sync(accelerator):
         for param, ddp_param in zip(model.parameters(), ddp_model.parameters()):
             if not param.requires_grad:
                 continue
-            assert torch.allclose(
-                param.grad, ddp_param.grad
-            ), f"Gradients not in sync when they should be:\nModel grad ({param.grad}) != DDP grad ({ddp_param.grad})"
+            assert torch.allclose(param.grad, ddp_param.grad), (
+                f"Gradients not in sync when they should be:\nModel grad ({param.grad}) != DDP grad ({ddp_param.grad})"
+            )
 
         # Shuffle ddp_input on each iteration
         torch.manual_seed(1337 + iteration)
@@ -136,14 +136,14 @@ def test_distributed_sync(accelerator):
                 continue
             if iteration % 2 == 0:
                 # Grads should not be in sync
-                assert (
-                    torch.allclose(param.grad, ddp_param.grad) is False
-                ), f"Gradients in sync when they should not be:\nModel grad ({param.grad}) == DDP grad ({ddp_param.grad})"
+                assert torch.allclose(param.grad, ddp_param.grad) is False, (
+                    f"Gradients in sync when they should not be:\nModel grad ({param.grad}) == DDP grad ({ddp_param.grad})"
+                )
             else:
                 # Grads should be in sync
-                assert (
-                    torch.allclose(param.grad, ddp_param.grad) is True
-                ), f"Gradients not in sync when they should be:\nModel grad ({param.grad}) != DDP grad ({ddp_param.grad})"
+                assert torch.allclose(param.grad, ddp_param.grad) is True, (
+                    f"Gradients not in sync when they should be:\nModel grad ({param.grad}) != DDP grad ({ddp_param.grad})"
+                )
 
         # Shuffle ddp_input on each iteration
         torch.manual_seed(1337 + iteration)
@@ -185,9 +185,9 @@ def test_distributed_sync_multiple_fwd(accelerator):
                 if not param.requires_grad:
                     continue
                 # Grads should not be in sync
-                assert (
-                    torch.allclose(param.grad, ddp_param.grad) is False
-                ), f"Gradients in sync when they should not be:\nModel grad ({param.grad}) == DDP grad ({ddp_param.grad})"
+                assert torch.allclose(param.grad, ddp_param.grad) is False, (
+                    f"Gradients in sync when they should not be:\nModel grad ({param.grad}) == DDP grad ({ddp_param.grad})"
+                )
 
         else:
             # Sync grads if last backward
@@ -199,9 +199,9 @@ def test_distributed_sync_multiple_fwd(accelerator):
                 if not param.requires_grad:
                     continue
                 # Grads should be in sync
-                assert (
-                    torch.allclose(param.grad, ddp_param.grad) is True
-                ), f"Gradients not in sync when they should be:\nModel grad ({param.grad}) != DDP grad ({ddp_param.grad})"
+                assert torch.allclose(param.grad, ddp_param.grad) is True, (
+                    f"Gradients not in sync when they should be:\nModel grad ({param.grad}) != DDP grad ({ddp_param.grad})"
+                )
 
 
 def test_gradient_accumulation(split_batches=False, dispatch_batches=False, sync_each_batch=False):
@@ -230,14 +230,14 @@ def test_gradient_accumulation(split_batches=False, dispatch_batches=False, sync
                 continue
             if ((iteration + 1) % 2 == 0) or (iteration == len(dataloader) - 1) or sync_each_batch:
                 # Grads should be in sync
-                assert (
-                    torch.allclose(param.grad, ddp_param.grad) is True
-                ), f"Gradients not in sync when they should be at iteration {iteration}:\nModel grad ({param.grad}) != DDP grad ({ddp_param.grad})"
+                assert torch.allclose(param.grad, ddp_param.grad) is True, (
+                    f"Gradients not in sync when they should be at iteration {iteration}:\nModel grad ({param.grad}) != DDP grad ({ddp_param.grad})"
+                )
             else:
                 # Grads should not be in sync
-                assert (
-                    torch.allclose(param.grad, ddp_param.grad) is False
-                ), f"Gradients in sync when they should not be at iteration {iteration}:\nModel grad ({param.grad}) == DDP grad ({ddp_param.grad})"
+                assert torch.allclose(param.grad, ddp_param.grad) is False, (
+                    f"Gradients in sync when they should not be at iteration {iteration}:\nModel grad ({param.grad}) == DDP grad ({ddp_param.grad})"
+                )
 
         # Shuffle ddp_input on each iteration
         torch.manual_seed(1337 + iteration)
@@ -281,9 +281,9 @@ def test_gradient_accumulation_with_opt_and_scheduler(
             ddp_sched.step()
 
         # Learning rates should be the same
-        assert (
-            opt.param_groups[0]["lr"] == ddp_opt.param_groups[0]["lr"]
-        ), f"Learning rates found in each optimizer did not align\nopt: {opt.param_groups[0]['lr']}\nDDP opt: {ddp_opt.param_groups[0]['lr']}\n"
+        assert opt.param_groups[0]["lr"] == ddp_opt.param_groups[0]["lr"], (
+            f"Learning rates found in each optimizer did not align\nopt: {opt.param_groups[0]['lr']}\nDDP opt: {ddp_opt.param_groups[0]['lr']}\n"
+        )
         did_step = (((iteration + 1) % 2) == 0) or ((iteration + 1) == len(dataloader))
         if accelerator.num_processes > 1:
             check_model_parameters(
diff --git a/tests/fsdp/test_fsdp.py b/tests/fsdp/test_fsdp.py
index 9e01ae7a..1cd63e9a 100644
--- a/tests/fsdp/test_fsdp.py
+++ b/tests/fsdp/test_fsdp.py
@@ -177,9 +177,9 @@ class FSDPPluginIntegration(AccelerateTestCase):
             env["FSDP_BACKWARD_PREFETCH"] = prefetch_policy
             with patch_environment(**env), ctx as cm:
                 fsdp_plugin = FullyShardedDataParallelPlugin()
-                assert (
-                    fsdp_plugin.backward_prefetch == expected_value
-                ), f"Actual: {fsdp_plugin.backward_prefetch} != Expected: {expected_value}"
+                assert fsdp_plugin.backward_prefetch == expected_value, (
+                    f"Actual: {fsdp_plugin.backward_prefetch} != Expected: {expected_value}"
+                )
                 if cm:
                     self.assertTrue(any(_warning_message_fsdp2 in out for out in cm.output))
 
diff --git a/tests/test_accelerator.py b/tests/test_accelerator.py
index d8cc638b..98a2a765 100644
--- a/tests/test_accelerator.py
+++ b/tests/test_accelerator.py
@@ -439,24 +439,24 @@ class AcceleratorTester(AccelerateTestCase):
         model, optimizer, scheduler, train_dl, valid_dl, dummy_obj = accelerator.prepare(
             model, optimizer, scheduler, train_dl, valid_dl, dummy_obj
         )
-        assert (
-            getattr(dummy_obj, "_is_accelerate_prepared", False) is False
-        ), "Dummy object should have `_is_accelerate_prepared` set to `True`"
-        assert (
-            getattr(model, "_is_accelerate_prepared", False) is True
-        ), "Model is missing `_is_accelerator_prepared` or is set to `False`"
-        assert (
-            getattr(optimizer, "_is_accelerate_prepared", False) is True
-        ), "Optimizer is missing `_is_accelerator_prepared` or is set to `False`"
-        assert (
-            getattr(scheduler, "_is_accelerate_prepared", False) is True
-        ), "Scheduler is missing `_is_accelerator_prepared` or is set to `False`"
-        assert (
-            getattr(train_dl, "_is_accelerate_prepared", False) is True
-        ), "Train Dataloader is missing `_is_accelerator_prepared` or is set to `False`"
-        assert (
-            getattr(valid_dl, "_is_accelerate_prepared", False) is True
-        ), "Valid Dataloader is missing `_is_accelerator_prepared` or is set to `False`"
+        assert getattr(dummy_obj, "_is_accelerate_prepared", False) is False, (
+            "Dummy object should have `_is_accelerate_prepared` set to `True`"
+        )
+        assert getattr(model, "_is_accelerate_prepared", False) is True, (
+            "Model is missing `_is_accelerator_prepared` or is set to `False`"
+        )
+        assert getattr(optimizer, "_is_accelerate_prepared", False) is True, (
+            "Optimizer is missing `_is_accelerator_prepared` or is set to `False`"
+        )
+        assert getattr(scheduler, "_is_accelerate_prepared", False) is True, (
+            "Scheduler is missing `_is_accelerator_prepared` or is set to `False`"
+        )
+        assert getattr(train_dl, "_is_accelerate_prepared", False) is True, (
+            "Train Dataloader is missing `_is_accelerator_prepared` or is set to `False`"
+        )
+        assert getattr(valid_dl, "_is_accelerate_prepared", False) is True, (
+            "Valid Dataloader is missing `_is_accelerator_prepared` or is set to `False`"
+        )
 
     @require_cuda_or_xpu
     @slow
diff --git a/tests/test_cli.py b/tests/test_cli.py
index 222e2070..87acd55a 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -498,16 +498,16 @@ class ModelEstimatorTester(unittest.TestCase):
             total_training_size_estimate = total_size_estimate * 4
 
             assert precision_str == output[i][0], f"Output is missing precision `{precision_str}`"
-            assert (
-                largest_layer_estimate == output[i][1]
-            ), f"Calculation for largest layer size in `{precision_str}` is incorrect."
+            assert largest_layer_estimate == output[i][1], (
+                f"Calculation for largest layer size in `{precision_str}` is incorrect."
+            )
 
-            assert (
-                total_size_estimate == output[i][2]
-            ), f"Calculation for total size in `{precision_str}` is incorrect."
-            assert total_training_size_estimate == max(
-                output[i][3].values()
-            ), f"Calculation for total training size in `{precision_str}` is incorrect."
+            assert total_size_estimate == output[i][2], (
+                f"Calculation for total size in `{precision_str}` is incorrect."
+            )
+            assert total_training_size_estimate == max(output[i][3].values()), (
+                f"Calculation for total training size in `{precision_str}` is incorrect."
+            )
 
     @require_transformers
     def test_transformers_model(self):
@@ -515,12 +515,12 @@ class ModelEstimatorTester(unittest.TestCase):
         output = gather_data(args)
         # The largest layer and total size of the model in bytes
         largest_layer, total_size = 90669056, 433249280
-        assert (
-            largest_layer == output[0][1]
-        ), f"Calculation for largest layer size in `fp32` is incorrect, expected {largest_layer} but received {output[0][1]}"
-        assert (
-            total_size == output[0][2]
-        ), f"Calculation for total size in `fp32` is incorrect, expected {total_size} but received {output[0][2]}"
+        assert largest_layer == output[0][1], (
+            f"Calculation for largest layer size in `fp32` is incorrect, expected {largest_layer} but received {output[0][1]}"
+        )
+        assert total_size == output[0][2], (
+            f"Calculation for total size in `fp32` is incorrect, expected {total_size} but received {output[0][2]}"
+        )
 
     @require_transformers
     def test_no_split_modules(self):
@@ -538,12 +538,12 @@ class ModelEstimatorTester(unittest.TestCase):
         output = gather_data(args)
         # The largest layer and total size of the model in bytes
         largest_layer, total_size = 9437184, 102441032
-        assert (
-            largest_layer == output[0][1]
-        ), f"Calculation for largest layer size in `fp32` is incorrect, expected {largest_layer} but received {output[0][1]}"
-        assert (
-            total_size == output[0][2]
-        ), f"Calculation for total size in `fp32` is incorrect, expected {total_size} but received {output[0][2]}"
+        assert largest_layer == output[0][1], (
+            f"Calculation for largest layer size in `fp32` is incorrect, expected {largest_layer} but received {output[0][1]}"
+        )
+        assert total_size == output[0][2], (
+            f"Calculation for total size in `fp32` is incorrect, expected {total_size} but received {output[0][2]}"
+        )
 
 
 class ToFSDP2Tester(unittest.TestCase):
diff --git a/tests/test_fp8.py b/tests/test_fp8.py
index 66645fd8..b7c69783 100644
--- a/tests/test_fp8.py
+++ b/tests/test_fp8.py
@@ -55,9 +55,9 @@ def can_convert_te_model():
 
 
 def maintain_proper_deepspeed_config(expected_version):
-    assert (
-        AcceleratorState().deepspeed_plugin.zero_stage == expected_version
-    ), f"Expected zero stage {expected_version} but got {AcceleratorState().deepspeed_plugin.zero_stage}"
+    assert AcceleratorState().deepspeed_plugin.zero_stage == expected_version, (
+        f"Expected zero stage {expected_version} but got {AcceleratorState().deepspeed_plugin.zero_stage}"
+    )
 
 
 def can_convert_ao_model():
diff --git a/tests/test_scheduler.py b/tests/test_scheduler.py
index 334e5526..e55c6561 100644
--- a/tests/test_scheduler.py
+++ b/tests/test_scheduler.py
@@ -33,13 +33,13 @@ def one_cycle_test(num_processes=2, step_scheduler_with_optimizer=True, split_ba
     # Optimizer has stepped
     scheduler.step()
     if step_scheduler_with_optimizer or (num_processes == 1):
-        assert (
-            scheduler.scheduler.last_epoch == num_processes
-        ), f"Last Epoch ({scheduler.scheduler.last_epoch}) != Num Processes ({num_processes})"
+        assert scheduler.scheduler.last_epoch == num_processes, (
+            f"Last Epoch ({scheduler.scheduler.last_epoch}) != Num Processes ({num_processes})"
+        )
     else:
-        assert (
-            scheduler.scheduler.last_epoch != num_processes
-        ), f"Last Epoch ({scheduler.scheduler.last_epoch}) == Num Processes ({num_processes})"
+        assert scheduler.scheduler.last_epoch != num_processes, (
+            f"Last Epoch ({scheduler.scheduler.last_epoch}) == Num Processes ({num_processes})"
+        )
 
 
 def lambda_test(num_processes=2, step_scheduler_with_optimizer=True, split_batches=False):
@@ -53,18 +53,18 @@ def lambda_test(num_processes=2, step_scheduler_with_optimizer=True, split_batch
     optimizer._is_overflow = False
     scheduler.step()
     expected_lr = 1 - (num_processes if (step_scheduler_with_optimizer and not split_batches) else 1) / 10
-    assert (
-        scheduler.get_last_lr()[0] == expected_lr
-    ), f"Wrong lr found at first step, expected {expected_lr}, got {scheduler.get_last_lr()[0]}"
+    assert scheduler.get_last_lr()[0] == expected_lr, (
+        f"Wrong lr found at first step, expected {expected_lr}, got {scheduler.get_last_lr()[0]}"
+    )
 
     # Optimizer has not stepped
     optimizer._is_overflow = True
     scheduler.step()
     if not step_scheduler_with_optimizer:
         expected_lr = 1 - 2 / 10
-    assert (
-        scheduler.get_last_lr()[0] == expected_lr
-    ), f"Wrong lr found at second step, expected {expected_lr}, got {scheduler.get_last_lr()[0]}"
+    assert scheduler.get_last_lr()[0] == expected_lr, (
+        f"Wrong lr found at second step, expected {expected_lr}, got {scheduler.get_last_lr()[0]}"
+    )
 
 
 def accumulation_test(num_processes: int = 2):
@@ -92,12 +92,12 @@ def accumulation_test(num_processes: int = 2):
                 scheduler.step()
 
             if i == (10 * num_steps - 2):
-                assert (
-                    scheduler.get_last_lr()[0] != 0
-                ), f"Wrong lr found at second-to-last step, expected non-zero, got {scheduler.get_last_lr()[0]}. num_steps: {num_steps}"
-        assert (
-            scheduler.get_last_lr()[0] == 0
-        ), f"Wrong lr found at last step, expected 0, got {scheduler.get_last_lr()[0]}"
+                assert scheduler.get_last_lr()[0] != 0, (
+                    f"Wrong lr found at second-to-last step, expected non-zero, got {scheduler.get_last_lr()[0]}. num_steps: {num_steps}"
+                )
+        assert scheduler.get_last_lr()[0] == 0, (
+            f"Wrong lr found at last step, expected 0, got {scheduler.get_last_lr()[0]}"
+        )
         GradientState._reset_state()
 
 
diff --git a/tests/test_state_checkpointing.py b/tests/test_state_checkpointing.py
index 160e0a25..bb24c672 100644
--- a/tests/test_state_checkpointing.py
+++ b/tests/test_state_checkpointing.py
@@ -421,9 +421,9 @@ if __name__ == "__main__":
     for group in optimizer.param_groups:
         param_device = group["params"][0].device
         break
-    assert (
-        param_device.type == torch.device("cpu").type
-    ), f"Loaded optimizer states did not match, expected to be loaded on the CPU but got {param_device}"
+    assert param_device.type == torch.device("cpu").type, (
+        f"Loaded optimizer states did not match, expected to be loaded on the CPU but got {param_device}"
+    )
 
     # Check device state
     model.to(accelerator.device)
@@ -431,9 +431,9 @@ if __name__ == "__main__":
     for group in optimizer.param_groups:
         param_device = group["params"][0].device
         break
-    assert (
-        param_device.type == accelerator.device.type
-    ), f"Loaded optimizer states did not match, expected to be loaded on {accelerator.device} but got {param_device}"
+    assert param_device.type == accelerator.device.type, (
+        f"Loaded optimizer states did not match, expected to be loaded on {accelerator.device} but got {param_device}"
+    )
 
     # Check error
     with pytest.raises(TypeError, match="Unsupported optimizer map location passed"):
diff --git a/tests/xla_spawn.py b/tests/xla_spawn.py
index 66ed5ee4..409f3302 100644
--- a/tests/xla_spawn.py
+++ b/tests/xla_spawn.py
@@ -40,9 +40,7 @@ def parse_args():
     """
     parser = ArgumentParser(
         description=(
-            "PyTorch TPU distributed training launch "
-            "helper utility that will spawn up "
-            "multiple distributed processes"
+            "PyTorch TPU distributed training launch helper utility that will spawn up multiple distributed processes"
         )
     )
 
diff --git a/utils/log_reports.py b/utils/log_reports.py
index 151643e2..6775cb76 100644
--- a/utils/log_reports.py
+++ b/utils/log_reports.py
@@ -56,7 +56,7 @@ for log in Path().glob("*.log"):
             if line.get("nodeid", "") != "":
                 test = line["nodeid"]
                 if line.get("duration", None) is not None:
-                    duration = f'{line["duration"]:.4f}'
+                    duration = f"{line['duration']:.4f}"
                     if line.get("outcome", "") == "failed":
                         section_num_failed += 1
                         failed.append([test, duration, log.name.split("_")[0]])
@@ -136,7 +136,7 @@ if os.environ.get("TEST_TYPE", "") != "":
                     "text": "Check Action results",
                     "emoji": True,
                 },
-                "url": f'https://github.com/{os.environ["GITHUB_REPOSITORY"]}/actions/runs/{os.environ["GITHUB_RUN_ID"]}',
+                "url": f"https://github.com/{os.environ['GITHUB_REPOSITORY']}/actions/runs/{os.environ['GITHUB_RUN_ID']}",
             },
         }
         payload.append(action_button)