TestAgentApp · abalakrishnan1 · Sep 27, 2025 · Sep 14, 2025 · Sep 19, 2025 · Sep 19, 2025
diff --git a/core/execute.py b/core/execute.py
@@ -1,12 +1,12 @@
 from collections import OrderedDict
 import logging
-import shutil
+import re
 from playwright.sync_api import sync_playwright, BrowserContext
 import os
-from urllib.parse import urljoin
+from urllib.parse import urljoin, urlparse
 from pydantic import ValidationError
-from .planner import PlaywrightCommand, PlaywrightTest, ActionType, get_authenticated_browser_context
-
+from .planner import get_authenticated_browser_context
+from .models import PlaywrightTest, PlaywrightCommand, ActionType
 import json
 
 from .planner import get_authenticated_browser_context
@@ -18,10 +18,6 @@ def run_ui_tests(sync_context: BrowserContext, base_url: str, unit_tests: Ordere
 
     github_mode = os.environ.get('GITHUB_ACTION_MODE') == 'true'
 
-    if os.path.exists("results"):
-        shutil.rmtree("results")  # remove old directory and contents
-    os.makedirs("results", exist_ok=True)
-
     test_results = {
         "tests": []
     }
@@ -34,6 +30,19 @@ def run_ui_tests(sync_context: BrowserContext, base_url: str, unit_tests: Ordere
         page_url = urljoin(base_url, page_name)
 
         for test in test_suite["tests"]:
+            test_result = {
+                "description": test['description'],
+                "status": "passed",
+                "error": None,
+                "screenshot": None
+            }
+
+            if test.get('disabled', False):
+                print(f"Skipping test, disabled due to bad selectors: {test['description']}")
+                test_result["status"] = "skipped"
+                test_results["tests"].append(test_result)
+                continue
+
             print(f"\n▶ Running: {test['description']}")
             page = sync_context.new_page()
             response = page.goto(page_url, wait_until="load")
@@ -62,14 +71,7 @@ def run_ui_tests(sync_context: BrowserContext, base_url: str, unit_tests: Ordere
                     })
                     continue 
 
-            test_result = {
-                "description": test['description'],
-                "status": "passed",
-                "error": None,
-                "screenshot": None
-            }
-
-            # attemptp to execute the test steps
+            # attempt to execute the test steps
             try:
                 validated_test = PlaywrightTest(**test)
                 print(f"✓ Test validation passed: {len(validated_test.steps)} steps")
@@ -86,10 +88,6 @@ def run_ui_tests(sync_context: BrowserContext, base_url: str, unit_tests: Ordere
                         page.click(selector, timeout=timeout)
                     elif action == ActionType.TYPE:
                         page.fill(selector, value, timeout=timeout)
-                    elif action == ActionType.ASSERT_VISIBLE:
-                        page.wait_for_selector(selector, state="visible", timeout=timeout)
-                    elif action == ActionType.ASSERT_URL:
-                        page.wait_for_url(value, timeout=timeout)
                     elif action == ActionType.NAVIGATE:
                         if value.startswith(('http://', 'https://')):
                             # Absolute URL
@@ -106,9 +104,9 @@ def run_ui_tests(sync_context: BrowserContext, base_url: str, unit_tests: Ordere
                     if test["expect"]["url"] not in page.url:
                         test_result["status"] = "failed"
                         test_result["error"] = f"URL mismatch: expected {test['expect']['url']}, got {page.url}"
-
+                        raise Exception(test_result["error"])
                 elif "selectorVisible" in test["expect"]:
-                    if not page.locator(test["expect"]["selectorVisible"]).is_visible():
+                    if not page.locator(test["expect"]["selectorVisible"]).is_visible(timeout=timeout):
                         test_result["status"] = "failed"
                         test_result["error"] = f"Selector not visible: {test['expect']['selectorVisible']}"
                         raise Exception(test_result["error"])

diff --git a/core/llm_constants.py b/core/llm_constants.py
@@ -70,23 +70,21 @@
 """
 
 system_prompt = f"""
-You are a UI QA engineer. Given HTML, you return JSON test cases for the page.
+You are a UI QA engineer. Given HTML, you return JSON test cases for the page. You might also receive feedback along with this original prompt with a testcase that needs to be fixed.
 
 Each test case includes:
 - name: a string describing the test
 - steps: a list of actions. Each action has:
-    - action: one of ["click", "type", "assertVisible", "assertURL", "navigate"]
+    - action: one of ["click", "type", "navigate"]
     - selector: a CSS selector
-    - value: optional (only for "type" and "navigate" actions)
-- expect: the final expected outcome (either `url` or a `selectorVisible`)
+    - value: optional (only for "type" and "navigate" actions). You MUST provide a value for "type" and "navigate" actions. 
+- expect: the final expected outcome (either `url` or a `selectorVisible`). The url can be a base path excluding parameters. 
 
 IMPORTANT: Use EXACTLY these action values:
 - "click" for clicking elements
 - "type" for typing into input fields
-- "assertVisible" for checking if elements are visible
-- "assertURL" for checking URL changes
 - "navigate" for direct navigation
 
 Here are two examples:
 {examples} \n
-"""
+"""
diff --git a/core/models.py b/core/models.py
@@ -0,0 +1,86 @@
+from enum import Enum
+from typing import Optional, List
+from pydantic import BaseModel, Field, model_validator
+
+# for testing specific actions via Pydantic
+class ActionType(str, Enum):
+    """Valid Playwright actions"""
+    CLICK = "click"
+    TYPE = "type"
+    NAVIGATE = "navigate"
+    # add new actions here, update properties below if needed...
+
+    @property
+    def requires_selector(self) -> bool:
+        """Actions that need a CSS selector"""
+        return self in {self.CLICK, self.TYPE}
+
+    @property
+    def requires_value(self) -> bool:
+        """Actions that need a value (text to type, URL to navigate to, etc.)"""
+        return self in {self.TYPE, self.NAVIGATE}
+
+
+class Expect(BaseModel):
+    """Expected outcome of a test."""
+    url: Optional[str] = Field(
+        default=None, description="Expected URL after the test steps")
+    selectorVisible: Optional[str] = Field(
+        default=None, description="CSS selector that should be visible after the test steps")
+
+
+class PlaywrightCommand(BaseModel):
+    """A single Playwright command to run in the test."""
+    action: ActionType = Field(
+        ..., description="The Playwright action, e.g. 'click', 'type', 'navigate'")
+    selector: Optional[str] = Field(
+        default=None, description="The CSS/XPath selector or test id to target (if applicable)")
+    value: Optional[str] = Field(
+        default=None, description="Value to input (if applicable), e.g. text to type into a field")
+
+    @model_validator(mode='after')  # need to use model_validator for pydantic v2
+    def validate_command_requirements(self):
+        """Validate command has required fields for its action type."""
+
+        # for debugging what the action type actually is
+        # print(f"self.action type: {type(self.action)}")
+        # print(f"self.action value: {self.action}")
+
+        if not isinstance(self.action, ActionType):
+            raise ValueError(f"Invalid action type: {type(self.action).__name__}")
+
+        # Check if action needs a selector but doesn't have one
+        if self.action.requires_selector and not self.selector:
+            raise ValueError(f"{self.action.value} needs a selector")
+
+        # Check if action needs a value but doesn't have one
+        if self.action.requires_value and self.value is None:
+            raise ValueError(f"{self.action.value} needs a value")
+
+        return self
+
+
+class PlaywrightTest(BaseModel):
+    """A structured test plan for Playwright."""
+    description: str = Field(...,
+                             description="Brief description of what the test is verifying")
+    steps: List[PlaywrightCommand] = Field(
+        ..., description="Ordered list of Playwright commands to execute in sequence")
+    expect: Expect = Field(
+        ..., description="Expected outcome of the test, e.g. URL change, element visibility")
+
+
+class UnitTests(BaseModel):
+    """All generated tests for a page."""
+    description: str = Field(...,
+                             description="Brief description of what these tests are verifying")
+    tests: List[PlaywrightTest] = Field(..., description="List of tests to run")
+
+
+class PageNode():
+    """Represents a single page in the test plan."""
+
+    def __init__(self, name: str, config: dict):
+        self.name = name
+        self.config = config
+        self.tests = []