Merge pull request #17 from Agent-One-Lab/verl

Reason-Wang · web-flow · commit 8a9d8c9902d7 · 2025-12-11T15:04:18.000+04:00
Update verl version to 0.6.x
diff --git a/README.md b/README.md
@@ -10,7 +10,7 @@
 <a href="https://github.com/Agent-One-Lab/AgentFly" target="_blank"><img alt="Static Badge" src="https://img.shields.io/github/stars/Agent-One-Lab/AgentFly?style=for-the-badge&logo=github&color=a2d2ff"></a>
 </p>
 <p align="center">
-<a href="./assets/images/wechat.jpg" target="_blank"><img alt="Static Badge" src="https://img.shields.io/badge/WeChat-%23e9edc9?style=for-the-badge&logo=wechat"></a>
+<a href="https://agent-one-lab.github.io/assets/agentfly/wechat.jpg" target="_blank"><img alt="Static Badge" src="https://img.shields.io/badge/WeChat-%23e9edc9?style=for-the-badge&logo=wechat"></a>
 <a href="https://discord.gg/Ze5Z9QhhJ3" target="_blank"><img alt="Static Badge" src="https://img.shields.io/badge/discord-%23dedbd2?style=for-the-badge&logo=discord"></a>
 </p>
 <p align="center">
@@ -39,16 +39,18 @@ AgentFly is an extensible framework for building LLM agents with reinforcement l
 
 ## News
 
-**08/2025 Multi-Modal (Vision) Agent Training Support** - Thanks to the powerful template system, AgentFly now supports training vision-language agents! 🎉 Train agents that can see and understand visual content, including GUI automation and image-based QA. See our [predefined training examples](docs/examples/predefined_training_examples.md) for ready-to-use scripts.
+**12/2025 verl update**: Updated verl to 0.6.x version.
+
+**08/2025 Multi-Modal (Vision) Agent Training Support**: Thanks to the powerful template system, AgentFly now supports training vision-language agents! 🎉 Train agents that can see and understand visual content, including GUI automation and image-based QA. See our [predefined training examples](docs/examples/predefined_training_examples.md) for ready-to-use scripts.
 
 ---
 
-**08/2025 Chat Template System** - A flexible framework for creating conversation templates with multi-model support, vision capabilities, and tool integration. [Learn more →](docs/chat_template/)
+**08/2025 Chat Template System**: A flexible framework for creating conversation templates with multi-model support, vision capabilities, and tool integration. [Learn more →](docs/chat_template/)
 
 ## Installation
 **Option 1**: One-line Installation:
 ```
-bash install.sh # Assume conda with python3.10.x
+bash install.sh # Assume conda with python3.12.x
 ```
 **Option 2**: Customized Installation
 
@@ -144,7 +146,7 @@ During training, `question` will be used to format the input messages, while oth
 #### 2. Tools & Rewards
 You can use any existing tool, which is in [documentation](https://agentfly.readthedocs.io/), or define a tool by decorating it with `@tool`. The output should eighther be a string, or a dictionary containing `observation` as a key.
 ```python
-@reward(name="customized_tool")
+@tool(name="customized_tool")
 def customized_tool(arg1, arg2):
     # tool logic here
 ```
diff --git a/agentfly/agents/agent_base.py b/agentfly/agents/agent_base.py
@@ -55,7 +55,7 @@ def __init__(
         log_file: str = "agent",
         streaming: str = "console",
         debug: bool = False,
-        monitors: List[str] = [],
+        monitors: List[str] = ["wandb"],
         wandb_project_name: str = None,
         wandb_run_name: str = None,
         local_cache_dir: str = None,
@@ -184,6 +184,12 @@ def _preprocess_messages(self, messages: List[Dict]):
 
         return messages_list.to_list()
 
+    def _preprocess_backends(self):
+        self.llm_engine.preprocess()
+
+    def _postprocess_backends(self):
+        self.llm_engine.postprocess()
+
     def _initialize_monitor(self, monitors: List[str]) -> None:
         for monitor in monitors:
             if monitor == "local":
@@ -212,14 +218,17 @@ async def run(self,
 
         """
         processed_messages = self._preprocess_messages(messages)
+        self._preprocess_backends()
 
-        return await self.run_async(
+        await self.run_async(
             processed_messages,
             max_turns=max_turns,
             generation_config=generation_config,
             **kwargs,
         )
 
+        self._postprocess_backends()
+
     def set_llm_engine(self, llm_engine: Any, tokenizer: Any, processor: Any):
         assert self.backend == "async_verl", "Only async verl backend is supported for now"
 
diff --git a/agentfly/agents/llm_backends/backend_configs.py b/agentfly/agents/llm_backends/backend_configs.py
@@ -1,4 +1,4 @@
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from typing import Optional, Dict, Any, List
 
 from vllm import AsyncEngineArgs
@@ -39,7 +39,7 @@ class VLLMConfig:
 
 
 
-@dataclass
+@dataclass(init=False)
 class AsyncVLLMConfig:
     """Configuration for Async VLLM backend with engine arguments. Arguments are the same as vLLM's arguments, which can
     be found at https://docs.vllm.ai/en/latest/configuration/engine_args.html. Here listed some important arguments:
@@ -53,10 +53,21 @@ class AsyncVLLMConfig:
         data_parallel_size (int): Data parallel size.
         tensor_parallel_size (int): Tensor parallel size.
     """
-    engine_args: AsyncEngineArgs = AsyncEngineArgs()
-
-    def __init__(self, **kwargs):
-        self.engine_args = AsyncEngineArgs(**kwargs)
+    engine_args: AsyncEngineArgs
+
+    def __init__(self, engine_args: Optional[AsyncEngineArgs] = None, **kwargs):
+        """Initialize AsyncVLLMConfig.
+        
+        Args:
+            engine_args: Optional AsyncEngineArgs instance. If provided, kwargs are ignored.
+            **kwargs: Arguments to pass to AsyncEngineArgs if engine_args is not provided.
+        """
+        if engine_args is not None:
+            self.engine_args = engine_args
+        elif kwargs:
+            self.engine_args = AsyncEngineArgs(**kwargs)
+        else:
+            self.engine_args = AsyncEngineArgs()
 
 
 @dataclass
diff --git a/agentfly/agents/llm_backends/llm_backends.py b/agentfly/agents/llm_backends/llm_backends.py
@@ -3,8 +3,6 @@
 This module provides a unified interface to different LLM implementations.
 """
 import asyncio
-from asyncore import loop
-from collections import deque
 import copy
 from functools import partial
 import time
@@ -54,6 +52,10 @@ def apply_chat_template(self, messages_list: List[List[Dict]], template: str, ad
             vision_inputs.append(chat.vision_inputs())
 
         return prompts, vision_inputs
+
+    def prepare(self):
+        """Prepare the backend"""
+        pass
     
     def generate(self, messages_list: str, **kwargs) -> str:
         """Generate text from prompt"""
@@ -404,6 +406,18 @@ def __init__(self, llm_engine, model_name_or_path: str, template: str, max_lengt
             trust_remote_code=True,
         )
         self.llm_engine = llm_engine
+
+    def preprocess(self):
+        """Preprocess the backend"""
+        self.llm_engine.wake_up()
+        if self.llm_engine.reward_model_manager:
+            self.llm_engine.reward_model_manager.wake_up()
+    
+    def postprocess(self):
+        """Postprocess the backend"""
+        self.llm_engine.sleep()
+        if self.llm_engine.reward_model_manager:
+            self.llm_engine.reward_model_manager.sleep()
     
     def _process_inputs(self, prompts: List[str], vision_inputs: Dict[str, List[PIL.Image.Image]]):
         inputs = []
@@ -433,13 +447,31 @@ def _convert_to_openai_chat_without_tool_call_processing(self, messages: list) -
             if "tool_choice" in message:
                 del message["tool_choice"]
         return messages
+
+    def _process_messages(self, messages: List[Dict]):
+        new_messages = []
+        for message in messages:
+            new_message = {}
+            new_message.update(message)
+            if isinstance(message["content"], list):
+                if len(message["content"]) == 1:
+                    assert message["content"][0]["type"] == "text"
+                    new_message["content"] = message["content"][0]["text"]
+                else:
+                    new_message["content"] = message["content"]
+
+            new_messages.append(new_message)
+        return new_messages
+
     
     async def generate_async(self, messages_list: str, **kwargs) -> str:
         """Generate text from prompt using Verl"""
         # We need to build a DataProto from the prompts
 
         generation_config = {}
         tensors = torch.ones(len(messages_list), dtype=torch.int64)
+        # messages_list = [self._convert_to_openai_chat_without_tool_call_processing(messages) for messages in messages_list]
+        messages_list = [self._process_messages(messages) for messages in messages_list]
         messages_list = [self._convert_to_openai_chat_without_tool_call_processing(messages) for messages in messages_list]
         tools = kwargs.get("tools", None)
         tools_list = np.array([tools] * len(messages_list))
@@ -453,8 +485,11 @@ async def generate_async(self, messages_list: str, **kwargs) -> str:
 
         batch = DataProto.from_single_dict(data, meta_info={"n": n, "temperature": temperature})
 
-        gen_batch_output = await self.llm_engine.generate_sequences_async(batch, **generation_config)
-        response_texts = gen_batch_output.batch['responses'].tolist() # np.array of strings with length BS
+        gen_batch_output = await self.llm_engine.generate_sequences_async(batch)
+        response_ids = gen_batch_output.batch['responses'].tolist() # np.array of strings with length BS
+        assert len(response_ids) == len(messages_list)
+        response_texts = [self.tokenizer.decode(response_id, skip_special_tokens=True) for response_id in response_ids]
+
         return response_texts
 
 
diff --git a/agentfly/agents/specialized/hf_agent.py b/agentfly/agents/specialized/hf_agent.py
@@ -5,12 +5,16 @@
 from typing import List
 from ..agent_base import BaseAgent
 from ..parsers import extract_tool_calls
+import logging
+
+logger = logging.getLogger(__file__)
 
 class HFAgent(BaseAgent):
     def __init__(self, model_name_or_path: str, **kwargs):
         super().__init__(model_name_or_path, **kwargs)
 
     def parse(self, responses: List[str], **kwargs) -> List[Dict]:
+        logger.debug(f"[HFAgent] Responses: {responses}")
         new_messages_list = []
         for response in responses:
             tool_calls = extract_tool_calls(response)
diff --git a/agentfly/envs/manager/resource.py b/agentfly/envs/manager/resource.py
@@ -19,8 +19,8 @@ def cleanup_envs():
     for env in tqdm(GLOBAL_ENVS):
         env.close()
 
-import atexit, signal
+# import atexit, signal
 
-atexit.register(cleanup_envs)
-for sig in [signal.SIGTERM, signal.SIGINT]:
-    signal.signal(sig, cleanup_envs)
+# atexit.register(cleanup_envs)
+# for sig in [signal.SIGTERM, signal.SIGINT]:
+#     signal.signal(sig, cleanup_envs)
diff --git a/agentfly/tests/scripts/test_cpu_runs.sh b/agentfly/tests/scripts/test_cpu_runs.sh
@@ -3,8 +3,7 @@
 # Test CPU runs
 
 
-pytest -x agentfly/tests/unit/tools/
-pytest -x agentfly/tests/unit/envs/
-pytest -x agentfly/tests/unit/rewards/
-
-pytest -x agentfly/tests/unit/templates/
+pytest -x agentfly/tests/unit/tools/ || exit 1
+pytest -x agentfly/tests/unit/envs/ || exit 1
+pytest -x agentfly/tests/unit/rewards/ || exit 1
+pytest -x agentfly/tests/unit/templates/ || exit 1
diff --git a/agentfly/utils/monitor.py b/agentfly/utils/monitor.py
diff --git a/pyproject.toml b/pyproject.toml
diff --git a/verl b/verl