mirror of
https://github.com/volcengine/verl.git
synced 2025-10-20 13:43:50 +08:00
[trainer] fix: address serialization issues when using async reward function and ray ppo trainer (#3769)
This commit is contained in:
@ -1105,7 +1105,9 @@ class RayPPOTrainer:
|
|||||||
batch = batch.union(reward_tensor)
|
batch = batch.union(reward_tensor)
|
||||||
|
|
||||||
if self.config.reward_model.launch_reward_fn_async:
|
if self.config.reward_model.launch_reward_fn_async:
|
||||||
future_reward = compute_reward_async.remote(data=batch, reward_fn=self.reward_fn)
|
future_reward = compute_reward_async.remote(
|
||||||
|
data=batch, config=self.config, tokenizer=self.tokenizer
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
reward_tensor, reward_extra_infos_dict = compute_reward(batch, self.reward_fn)
|
reward_tensor, reward_extra_infos_dict = compute_reward(batch, self.reward_fn)
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user