Merge pull request #67 from mainframecomputer/bugfix/MFE-524-fix-conduct-tool-and-json-handling

philippe-page · web-flow · commit a1a80de7b9ca · 2025-03-17T08:57:20.000-04:00
Bugfix/mfe 524 fix conduct tool and json handling
diff --git a/packages/python/pyproject.toml b/packages/python/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "mainframe-orchestra"
-version = "0.0.28"
+version = "0.0.29"
 description = "Mainframe-Orchestra is a lightweight, open-source agentic framework for building LLM based pipelines and self-orchestrating multi-agent teams"
 authors = [
     "Mainframe Computer Inc. <hi@mainfra.me>",
diff --git a/packages/python/src/mainframe_orchestra/__init__.py b/packages/python/src/mainframe_orchestra/__init__.py
@@ -3,7 +3,7 @@
 """
 # Copyright 2024 Mainframe-Orchestra Contributors. Licensed under Apache License 2.0.
 
-__version__ = "0.0.28"
+__version__ = "0.0.29"
 
 import importlib
 
diff --git a/packages/python/src/mainframe_orchestra/llm.py b/packages/python/src/mainframe_orchestra/llm.py
@@ -56,6 +56,7 @@
 )
 
 from .utils.braintrust_utils import wrap_openai
+from .utils.parse_json_response import parse_json_response
 
 # Import the configured logger
 from .utils.logging_config import logger
@@ -125,53 +126,16 @@ def set_verbosity(value: Union[str, bool, int]):
             logger.setLevel(logging.WARNING)
 
 
-def parse_json_response(response: str) -> dict:
-    """
-    Parse a JSON response, handling potential formatting issues.
-
-    Args:
-        response (str): The JSON response string to parse.
-
-    Returns:
-        dict: The parsed JSON data.
-
-    Raises:
-        ValueError: If the JSON cannot be parsed after multiple attempts.
-    """
-    # First attempt: Try to parse the entire response
-    try:
-        return json.loads(response)
-    except json.JSONDecodeError:
-        # Second attempt: Find the first complete JSON object
-        json_pattern = r"(\{(?:[^{}]|(?:\{[^{}]*\}))*\})"
-        json_matches = re.finditer(json_pattern, response, re.DOTALL)
-
-        for match in json_matches:
-            try:
-                result = json.loads(match.group(1))
-                # Validate it's a dict and has expected structure
-                if isinstance(result, dict):
-                    return result
-            except json.JSONDecodeError:
-                continue
-
-        # Third attempt: Try to cleave strings before and after JSON
-        cleaved_json = response.strip().lstrip("`").rstrip("`")
-        try:
-            return json.loads(cleaved_json)
-        except json.JSONDecodeError as e:
-            logger.warning(f"All JSON parsing attempts failed: {e}")
-            raise ValueError(f"Invalid JSON structure: {e}")
-
-
 class OpenAICompatibleProvider:
     """
     Base class for handling OpenAI-compatible API providers.
     This handles providers that use the OpenAI API format but with different base URLs.
     """
 
     @staticmethod
-    async def _prepare_image_data(image_data: Union[str, List[str]], provider_name: str) -> Union[str, List[str]]:
+    async def _prepare_image_data(
+        image_data: Union[str, List[str]], provider_name: str
+    ) -> Union[str, List[str]]:
         """Prepare image data according to provider requirements"""
         if not image_data:
             return image_data
@@ -184,7 +148,7 @@ async def _prepare_image_data(image_data: Union[str, List[str]], provider_name:
                 # Download and convert URL to base64
                 response = requests.get(img)
                 response.raise_for_status()
-                base64_data = base64.b64encode(response.content).decode('utf-8')
+                base64_data = base64.b64encode(response.content).decode("utf-8")
 
                 if provider_name in ["OpenAI", "Gemini"]:
                     # These providers need data URL format
@@ -220,7 +184,9 @@ async def send_request(
         try:
             # Process image data if present
             if image_data:
-                image_data = await OpenAICompatibleProvider._prepare_image_data(image_data, provider_name)
+                image_data = await OpenAICompatibleProvider._prepare_image_data(
+                    image_data, provider_name
+                )
 
             spinner = Halo(text=f"Sending request to {provider_name}...", spinner="dots")
             spinner.start()
@@ -563,6 +529,17 @@ async def send_anthropic_request(
                             {"role": "user", "content": f"Function result: {content}"}
                         )
 
+            # If JSON output is required, add instruction to the system message
+            if require_json_output:
+                json_instruction = "Do not comment before or after the JSON, or provide backticks or language declarations, return only the JSON object."
+
+                # If we have a system message, append the instruction
+                if system_message is not None:
+                    system_message += f"\n\n{json_instruction}"
+                else:
+                    # If no system message exists, create one
+                    system_message = json_instruction
+
             # Handle image data if present
             if image_data:
                 if isinstance(image_data, str):
@@ -591,28 +568,32 @@ async def send_anthropic_request(
                         try:
                             response = requests.get(img)
                             response.raise_for_status()
-                            image_base64 = base64.b64encode(response.content).decode('utf-8')
-                            last_msg["content"].append({
-                                "type": "image",
-                                "source": {
-                                    "type": "base64",
-                                    "media_type": "image/jpeg",
-                                    "data": image_base64
+                            image_base64 = base64.b64encode(response.content).decode("utf-8")
+                            last_msg["content"].append(
+                                {
+                                    "type": "image",
+                                    "source": {
+                                        "type": "base64",
+                                        "media_type": "image/jpeg",
+                                        "data": image_base64,
+                                    },
                                 }
-                            })
+                            )
                         except Exception as e:
                             logger.error(f"Failed to process image URL: {str(e)}")
                             raise
                     else:
                         # For base64 data, use it directly
-                        last_msg["content"].append({
-                            "type": "image",
-                            "source": {
-                                "type": "base64",
-                                "media_type": "image/jpeg",
-                                "data": img
+                        last_msg["content"].append(
+                            {
+                                "type": "image",
+                                "source": {
+                                    "type": "base64",
+                                    "media_type": "image/jpeg",
+                                    "data": img,
+                                },
                             }
-                        })
+                        )
 
             # Log request details
             logger.debug(
@@ -1656,16 +1637,16 @@ def _clean_response_tags(text: str) -> str:
 
         # Remove common tag patterns that appear in HuggingFace model responses
         # This handles tags like <||, <|assistant|>, etc.
-        cleaned = re.sub(r'<\|[^>]*\|>', '', text)
+        cleaned = re.sub(r"<\|[^>]*\|>", "", text)
 
         # Handle incomplete tags at the beginning or end
-        cleaned = re.sub(r'^<\|.*?(?=\w)', '', cleaned)  # Beginning of text
-        cleaned = re.sub(r'(?<=\w).*?\|>$', '', cleaned)  # End of text
+        cleaned = re.sub(r"^<\|.*?(?=\w)", "", cleaned)  # Beginning of text
+        cleaned = re.sub(r"(?<=\w).*?\|>$", "", cleaned)  # End of text
 
         # Handle other special cases
-        cleaned = re.sub(r'<\|\|', '', cleaned)
-        cleaned = re.sub(r'<\|', '', cleaned)
-        cleaned = re.sub(r'\|>', '', cleaned)
+        cleaned = re.sub(r"<\|\|", "", cleaned)
+        cleaned = re.sub(r"<\|", "", cleaned)
+        cleaned = re.sub(r"\|>", "", cleaned)
 
         return cleaned.strip()
 
diff --git a/packages/python/src/mainframe_orchestra/orchestration.py b/packages/python/src/mainframe_orchestra/orchestration.py
@@ -8,7 +8,7 @@
 from pydantic import BaseModel
 from .task import Task
 from .agent import Agent
-
+from .utils.logging_config import logger
 
 class TaskInstruction(BaseModel):
     task_id: str
@@ -30,15 +30,15 @@ def create_conduct_tool(agents: List[Any], tool_summaries: bool) -> Callable:
             }
 
             # Format available agents string with their tools
-            available_agents = "\n            ".join(
+            available_agents = "No agents have been installed yet. Notify the user to install or add some agents first." if not agents else "\n            ".join(
                 f"- {agent_id}\n    ({agent_id}'s tools: {', '.join(agent_tools[agent_id] or ['No tools'])})"
                 for agent_id in sorted(agent_map.keys())
             )
 
             async def conduct_tool(
                 tasks: List, event_queue: Optional[Queue] = None, **kwargs
             ) -> Any:
-                print(f"[DELEGATION] Starting conduct delegation with {len(tasks)} tasks")
+                logger.debug(f"Starting conduct delegation with {len(tasks)} tasks")
 
                 # Add max iteration limits
                 MAX_AGENT_ITERATIONS = 3  # Maximum times an agent can attempt to complete a task
@@ -77,22 +77,26 @@ async def conduct_tool(
                     # Convert dict to TaskInstruction model
                     task = TaskInstruction.model_validate(instruction_item)
 
+                    # Add progress logging
+                    current_task_index = tasks.index(instruction_item) + 1
+                    logger.info(f"Processing task {current_task_index} of {len(tasks)}: '{task.task_id}' with agent '{task.agent_id}'")
+
                     target_agent = agent_map.get(task.agent_id)
-                    print(
-                        f"[DELEGATION] Processing task '{task.task_id}' with agent '{task.agent_id}'"
+                    logger.debug(
+                        f"Processing task '{task.task_id}' with agent '{task.agent_id}'"
                     )
 
                     if not target_agent:
-                        print(
-                            f"[DELEGATION] Warning: Agent {task.agent_id} not found. Available agents: {list(agent_map.keys())}"
+                        logger.warning(
+                            f"Warning: Agent {task.agent_id} not found. Available agents: {list(agent_map.keys())}"
                         )
                         continue
 
                     # Track agent iterations
                     agent_call_counts[task.agent_id] = agent_call_counts.get(task.agent_id, 0) + 1
                     if agent_call_counts[task.agent_id] > MAX_AGENT_ITERATIONS:
-                        print(
-                            f"[DELEGATION] Warning: Agent {task.agent_id} exceeded maximum iterations"
+                        logger.warning(
+                            f"Warning: Agent {task.agent_id} exceeded maximum iterations"
                         )
                         continue
 
@@ -108,7 +112,7 @@ async def conduct_tool(
                         }
                     ]
 
-                    print(f"\n[DELEGATION] Starting task for agent: {task.agent_id}")
+                    logger.debug(f"\nStarting task for agent: {task.agent_id}")
                     instruction_text = task.instruction + (
                         "\n\nUse the following information from previous tasks:\n\n"
                         + "\n\n".join(
@@ -121,20 +125,19 @@ async def conduct_tool(
                     )
 
                     async def nested_callback(result):
-                        if isinstance(result, dict) and result.get("tool"):
+                        if isinstance(result, dict) and (result.get("tool") or result.get("type") == "delegation_result"):
                             current_time = datetime.now().isoformat()
 
                             # Ensure any existing timestamp is serializable
                             if "timestamp" in result and isinstance(result["timestamp"], datetime):
                                 result["timestamp"] = result["timestamp"].isoformat()
 
                             # Standardize message format for all delegation-related events
-                            if result.get("type") in ["delegation_result", "final_response"]:
+                            if result.get("type") == "delegation_result":
                                 message = {
                                     "type": "delegation_result",
-                                    "role": "delegation",
-                                    "name": target_agent.agent_id,
                                     "content": result.get("content", ""),
+                                    "agent_id": target_agent.agent_id,
                                     "conducted_task_id": task.task_id,
                                     "timestamp": current_time,
                                 }
@@ -177,13 +180,12 @@ async def nested_callback(result):
                                 msg_signature += (
                                     f":{result.get('tool')}:{json.dumps(result.get('params', {}))}"
                                 )
-                                # print(f"[DELEGATION DEBUG] Tool call: {result.get('tool')}")
+
                             elif result.get("type") == "tool_result":
                                 msg_signature += f":{result.get('tool')}"
-                                # print(f"[DELEGATION DEBUG] Tool result received")
+
                             elif result.get("type") == "delegation_result":
                                 msg_signature += f":delegation:{result.get('conducted_task_id')}"
-                                # print(f"[DELEGATION DEBUG] Conductor result received for task: {result.get('conducted_task_id')}")
 
                             # Send to event queue if available
                             if event_queue:
@@ -199,6 +201,15 @@ async def nested_callback(result):
                         tool_summaries=tool_summaries,
                     )
 
+                    # Generate a delegation result event after task completion
+                    delegation_result = {
+                        "type": "delegation_result",
+                        "content": task_result,
+                        "agent_id": target_agent.agent_id,
+                        "conducted_task_id": task.task_id
+                    }
+                    await nested_callback(delegation_result)
+
                     # Include context in the result
                     context = "\n\n".join(
                         f"Results from task '{dep_id}':\n{all_results[dep_id]}"
@@ -209,13 +220,16 @@ async def nested_callback(result):
                         f"{context}\n\n{task_result}" if context else task_result
                     )
 
-                # Return the final combined results
-                return "\n\n".join(
-                    f"Task '{task_id}':\n"
-                    f"Instruction: {next((item['instruction'] for item in tasks if item['task_id'] == task_id), '')}\n"
-                    f"Result: {result}"
+                # Return results as JSON structure
+                return json.dumps([
+                    {
+                        "task_id": task_id,
+                        "agent": next((item['agent_id'] for item in tasks if item['task_id'] == task_id), ''),
+                        "instruction": next((item['instruction'] for item in tasks if item['task_id'] == task_id), ''),
+                        "result": result
+                    }
                     for task_id, result in all_results.items()
-                )
+                ])
 
             conduct_tool.__name__ = "conduct_tool"
             conduct_tool.__doc__ = f"""Tool function to orchestrate multiple agents in a single, coordinated multi-agent flow. Tasks should be submitted in a single list, and they will be executed in the order they are submitted. Do not make separate calls to the tool.
@@ -275,7 +289,6 @@ def create_composition_tool(agents: List[Agent]) -> Callable:
             async def composition_tool(
                 goal: str, event_queue: Optional[Queue] = None, **kwargs
             ) -> Any:
-                # KEEP: Create composer agent instance with all these fields
                 composer_agent = Agent(
                     agent_id="composer",
                     role="Composer",
@@ -287,7 +300,7 @@ async def composition_tool(
                     llm=next(iter(agents)).llm,
                 )
 
-                # KEEP: Initialize messages array with BOTH system and user messages
+                # Initialize messages array with both system and user messages
                 messages = [
                     {
                         "role": "system",
@@ -312,7 +325,6 @@ async def composition_tool(
                 ]
 
                 try:
-                    # KEEP: All these parameters to Task.create()
                     task_result = await Task.create(
                         agent=composer_agent,
                         instruction=f"Create a detailed plan for achieving this goal: {goal}",
@@ -322,7 +334,7 @@ async def composition_tool(
                     )
                     return task_result
                 except Exception as e:
-                    print(f"[COMPOSITION ERROR] Failed to create task: {str(e)}")
+                    logger.error(f"[COMPOSITION ERROR] Failed to create task: {str(e)}")
                     raise
 
             composition_tool.__name__ = "composition_flow"
diff --git a/packages/python/src/mainframe_orchestra/task.py b/packages/python/src/mainframe_orchestra/task.py
diff --git a/packages/python/src/mainframe_orchestra/utils/parse_json_response.py b/packages/python/src/mainframe_orchestra/utils/parse_json_response.py