Skip to content

Commit 8a9d8c9

Browse files
authored
Merge pull request #17 from Agent-One-Lab/verl
Update verl version to 0.6.x
2 parents 1b9cedd + a78939e commit 8a9d8c9

10 files changed

Lines changed: 175 additions & 152 deletions

File tree

README.md

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
<a href="https://github.com/Agent-One-Lab/AgentFly" target="_blank"><img alt="Static Badge" src="https://img.shields.io/github/stars/Agent-One-Lab/AgentFly?style=for-the-badge&logo=github&color=a2d2ff"></a>
1111
</p>
1212
<p align="center">
13-
<a href="./assets/images/wechat.jpg" target="_blank"><img alt="Static Badge" src="https://img.shields.io/badge/WeChat-%23e9edc9?style=for-the-badge&logo=wechat"></a>
13+
<a href="https://agent-one-lab.github.io/assets/agentfly/wechat.jpg" target="_blank"><img alt="Static Badge" src="https://img.shields.io/badge/WeChat-%23e9edc9?style=for-the-badge&logo=wechat"></a>
1414
<a href="https://discord.gg/Ze5Z9QhhJ3" target="_blank"><img alt="Static Badge" src="https://img.shields.io/badge/discord-%23dedbd2?style=for-the-badge&logo=discord"></a>
1515
</p>
1616
<p align="center">
@@ -39,16 +39,18 @@ AgentFly is an extensible framework for building LLM agents with reinforcement l
3939

4040
## News
4141

42-
**08/2025 Multi-Modal (Vision) Agent Training Support** - Thanks to the powerful template system, AgentFly now supports training vision-language agents! 🎉 Train agents that can see and understand visual content, including GUI automation and image-based QA. See our [predefined training examples](docs/examples/predefined_training_examples.md) for ready-to-use scripts.
42+
**12/2025 verl update**: Updated verl to 0.6.x version.
43+
44+
**08/2025 Multi-Modal (Vision) Agent Training Support**: Thanks to the powerful template system, AgentFly now supports training vision-language agents! 🎉 Train agents that can see and understand visual content, including GUI automation and image-based QA. See our [predefined training examples](docs/examples/predefined_training_examples.md) for ready-to-use scripts.
4345

4446
---
4547

46-
**08/2025 Chat Template System** - A flexible framework for creating conversation templates with multi-model support, vision capabilities, and tool integration. [Learn more →](docs/chat_template/)
48+
**08/2025 Chat Template System**: A flexible framework for creating conversation templates with multi-model support, vision capabilities, and tool integration. [Learn more →](docs/chat_template/)
4749

4850
## Installation
4951
**Option 1**: One-line Installation:
5052
```
51-
bash install.sh # Assume conda with python3.10.x
53+
bash install.sh # Assume conda with python3.12.x
5254
```
5355
**Option 2**: Customized Installation
5456

@@ -144,7 +146,7 @@ During training, `question` will be used to format the input messages, while oth
144146
#### 2. Tools & Rewards
145147
You can use any existing tool, which is in [documentation](https://agentfly.readthedocs.io/), or define a tool by decorating it with `@tool`. The output should eighther be a string, or a dictionary containing `observation` as a key.
146148
```python
147-
@reward(name="customized_tool")
149+
@tool(name="customized_tool")
148150
def customized_tool(arg1, arg2):
149151
# tool logic here
150152
```

agentfly/agents/agent_base.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ def __init__(
5555
log_file: str = "agent",
5656
streaming: str = "console",
5757
debug: bool = False,
58-
monitors: List[str] = [],
58+
monitors: List[str] = ["wandb"],
5959
wandb_project_name: str = None,
6060
wandb_run_name: str = None,
6161
local_cache_dir: str = None,
@@ -184,6 +184,12 @@ def _preprocess_messages(self, messages: List[Dict]):
184184

185185
return messages_list.to_list()
186186

187+
def _preprocess_backends(self):
188+
self.llm_engine.preprocess()
189+
190+
def _postprocess_backends(self):
191+
self.llm_engine.postprocess()
192+
187193
def _initialize_monitor(self, monitors: List[str]) -> None:
188194
for monitor in monitors:
189195
if monitor == "local":
@@ -212,14 +218,17 @@ async def run(self,
212218
213219
"""
214220
processed_messages = self._preprocess_messages(messages)
221+
self._preprocess_backends()
215222

216-
return await self.run_async(
223+
await self.run_async(
217224
processed_messages,
218225
max_turns=max_turns,
219226
generation_config=generation_config,
220227
**kwargs,
221228
)
222229

230+
self._postprocess_backends()
231+
223232
def set_llm_engine(self, llm_engine: Any, tokenizer: Any, processor: Any):
224233
assert self.backend == "async_verl", "Only async verl backend is supported for now"
225234

agentfly/agents/llm_backends/backend_configs.py

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from dataclasses import dataclass
1+
from dataclasses import dataclass, field
22
from typing import Optional, Dict, Any, List
33

44
from vllm import AsyncEngineArgs
@@ -39,7 +39,7 @@ class VLLMConfig:
3939

4040

4141

42-
@dataclass
42+
@dataclass(init=False)
4343
class AsyncVLLMConfig:
4444
"""Configuration for Async VLLM backend with engine arguments. Arguments are the same as vLLM's arguments, which can
4545
be found at https://docs.vllm.ai/en/latest/configuration/engine_args.html. Here listed some important arguments:
@@ -53,10 +53,21 @@ class AsyncVLLMConfig:
5353
data_parallel_size (int): Data parallel size.
5454
tensor_parallel_size (int): Tensor parallel size.
5555
"""
56-
engine_args: AsyncEngineArgs = AsyncEngineArgs()
57-
58-
def __init__(self, **kwargs):
59-
self.engine_args = AsyncEngineArgs(**kwargs)
56+
engine_args: AsyncEngineArgs
57+
58+
def __init__(self, engine_args: Optional[AsyncEngineArgs] = None, **kwargs):
59+
"""Initialize AsyncVLLMConfig.
60+
61+
Args:
62+
engine_args: Optional AsyncEngineArgs instance. If provided, kwargs are ignored.
63+
**kwargs: Arguments to pass to AsyncEngineArgs if engine_args is not provided.
64+
"""
65+
if engine_args is not None:
66+
self.engine_args = engine_args
67+
elif kwargs:
68+
self.engine_args = AsyncEngineArgs(**kwargs)
69+
else:
70+
self.engine_args = AsyncEngineArgs()
6071

6172

6273
@dataclass

agentfly/agents/llm_backends/llm_backends.py

Lines changed: 39 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@
33
This module provides a unified interface to different LLM implementations.
44
"""
55
import asyncio
6-
from asyncore import loop
7-
from collections import deque
86
import copy
97
from functools import partial
108
import time
@@ -54,6 +52,10 @@ def apply_chat_template(self, messages_list: List[List[Dict]], template: str, ad
5452
vision_inputs.append(chat.vision_inputs())
5553

5654
return prompts, vision_inputs
55+
56+
def prepare(self):
57+
"""Prepare the backend"""
58+
pass
5759

5860
def generate(self, messages_list: str, **kwargs) -> str:
5961
"""Generate text from prompt"""
@@ -404,6 +406,18 @@ def __init__(self, llm_engine, model_name_or_path: str, template: str, max_lengt
404406
trust_remote_code=True,
405407
)
406408
self.llm_engine = llm_engine
409+
410+
def preprocess(self):
411+
"""Preprocess the backend"""
412+
self.llm_engine.wake_up()
413+
if self.llm_engine.reward_model_manager:
414+
self.llm_engine.reward_model_manager.wake_up()
415+
416+
def postprocess(self):
417+
"""Postprocess the backend"""
418+
self.llm_engine.sleep()
419+
if self.llm_engine.reward_model_manager:
420+
self.llm_engine.reward_model_manager.sleep()
407421

408422
def _process_inputs(self, prompts: List[str], vision_inputs: Dict[str, List[PIL.Image.Image]]):
409423
inputs = []
@@ -433,13 +447,31 @@ def _convert_to_openai_chat_without_tool_call_processing(self, messages: list) -
433447
if "tool_choice" in message:
434448
del message["tool_choice"]
435449
return messages
450+
451+
def _process_messages(self, messages: List[Dict]):
452+
new_messages = []
453+
for message in messages:
454+
new_message = {}
455+
new_message.update(message)
456+
if isinstance(message["content"], list):
457+
if len(message["content"]) == 1:
458+
assert message["content"][0]["type"] == "text"
459+
new_message["content"] = message["content"][0]["text"]
460+
else:
461+
new_message["content"] = message["content"]
462+
463+
new_messages.append(new_message)
464+
return new_messages
465+
436466

437467
async def generate_async(self, messages_list: str, **kwargs) -> str:
438468
"""Generate text from prompt using Verl"""
439469
# We need to build a DataProto from the prompts
440470

441471
generation_config = {}
442472
tensors = torch.ones(len(messages_list), dtype=torch.int64)
473+
# messages_list = [self._convert_to_openai_chat_without_tool_call_processing(messages) for messages in messages_list]
474+
messages_list = [self._process_messages(messages) for messages in messages_list]
443475
messages_list = [self._convert_to_openai_chat_without_tool_call_processing(messages) for messages in messages_list]
444476
tools = kwargs.get("tools", None)
445477
tools_list = np.array([tools] * len(messages_list))
@@ -453,8 +485,11 @@ async def generate_async(self, messages_list: str, **kwargs) -> str:
453485

454486
batch = DataProto.from_single_dict(data, meta_info={"n": n, "temperature": temperature})
455487

456-
gen_batch_output = await self.llm_engine.generate_sequences_async(batch, **generation_config)
457-
response_texts = gen_batch_output.batch['responses'].tolist() # np.array of strings with length BS
488+
gen_batch_output = await self.llm_engine.generate_sequences_async(batch)
489+
response_ids = gen_batch_output.batch['responses'].tolist() # np.array of strings with length BS
490+
assert len(response_ids) == len(messages_list)
491+
response_texts = [self.tokenizer.decode(response_id, skip_special_tokens=True) for response_id in response_ids]
492+
458493
return response_texts
459494

460495

agentfly/agents/specialized/hf_agent.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,16 @@
55
from typing import List
66
from ..agent_base import BaseAgent
77
from ..parsers import extract_tool_calls
8+
import logging
9+
10+
logger = logging.getLogger(__file__)
811

912
class HFAgent(BaseAgent):
1013
def __init__(self, model_name_or_path: str, **kwargs):
1114
super().__init__(model_name_or_path, **kwargs)
1215

1316
def parse(self, responses: List[str], **kwargs) -> List[Dict]:
17+
logger.debug(f"[HFAgent] Responses: {responses}")
1418
new_messages_list = []
1519
for response in responses:
1620
tool_calls = extract_tool_calls(response)

agentfly/envs/manager/resource.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@ def cleanup_envs():
1919
for env in tqdm(GLOBAL_ENVS):
2020
env.close()
2121

22-
import atexit, signal
22+
# import atexit, signal
2323

24-
atexit.register(cleanup_envs)
25-
for sig in [signal.SIGTERM, signal.SIGINT]:
26-
signal.signal(sig, cleanup_envs)
24+
# atexit.register(cleanup_envs)
25+
# for sig in [signal.SIGTERM, signal.SIGINT]:
26+
# signal.signal(sig, cleanup_envs)

agentfly/tests/scripts/test_cpu_runs.sh

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,7 @@
33
# Test CPU runs
44

55

6-
pytest -x agentfly/tests/unit/tools/
7-
pytest -x agentfly/tests/unit/envs/
8-
pytest -x agentfly/tests/unit/rewards/
9-
10-
pytest -x agentfly/tests/unit/templates/
6+
pytest -x agentfly/tests/unit/tools/ || exit 1
7+
pytest -x agentfly/tests/unit/envs/ || exit 1
8+
pytest -x agentfly/tests/unit/rewards/ || exit 1
9+
pytest -x agentfly/tests/unit/templates/ || exit 1

0 commit comments

Comments
 (0)