diff --git a/core/execute.py b/core/execute.py index 0bcf6d4..03a135b 100644 --- a/core/execute.py +++ b/core/execute.py @@ -1,12 +1,12 @@ from collections import OrderedDict import logging -import shutil +import re from playwright.sync_api import sync_playwright, BrowserContext import os -from urllib.parse import urljoin +from urllib.parse import urljoin, urlparse from pydantic import ValidationError -from .planner import PlaywrightCommand, PlaywrightTest, ActionType, get_authenticated_browser_context - +from .planner import get_authenticated_browser_context +from .models import PlaywrightTest, PlaywrightCommand, ActionType import json from .planner import get_authenticated_browser_context @@ -18,10 +18,6 @@ def run_ui_tests(sync_context: BrowserContext, base_url: str, unit_tests: Ordere github_mode = os.environ.get('GITHUB_ACTION_MODE') == 'true' - if os.path.exists("results"): - shutil.rmtree("results") # remove old directory and contents - os.makedirs("results", exist_ok=True) - test_results = { "tests": [] } @@ -34,6 +30,19 @@ def run_ui_tests(sync_context: BrowserContext, base_url: str, unit_tests: Ordere page_url = urljoin(base_url, page_name) for test in test_suite["tests"]: + test_result = { + "description": test['description'], + "status": "passed", + "error": None, + "screenshot": None + } + + if test.get('disabled', False): + print(f"Skipping test, disabled due to bad selectors: {test['description']}") + test_result["status"] = "skipped" + test_results["tests"].append(test_result) + continue + print(f"\n▶ Running: {test['description']}") page = sync_context.new_page() response = page.goto(page_url, wait_until="load") @@ -62,14 +71,7 @@ def run_ui_tests(sync_context: BrowserContext, base_url: str, unit_tests: Ordere }) continue - test_result = { - "description": test['description'], - "status": "passed", - "error": None, - "screenshot": None - } - - # attemptp to execute the test steps + # attempt to execute the test steps try: validated_test = PlaywrightTest(**test) print(f"✓ Test validation passed: {len(validated_test.steps)} steps") @@ -86,10 +88,6 @@ def run_ui_tests(sync_context: BrowserContext, base_url: str, unit_tests: Ordere page.click(selector, timeout=timeout) elif action == ActionType.TYPE: page.fill(selector, value, timeout=timeout) - elif action == ActionType.ASSERT_VISIBLE: - page.wait_for_selector(selector, state="visible", timeout=timeout) - elif action == ActionType.ASSERT_URL: - page.wait_for_url(value, timeout=timeout) elif action == ActionType.NAVIGATE: if value.startswith(('http://', 'https://')): # Absolute URL @@ -106,9 +104,9 @@ def run_ui_tests(sync_context: BrowserContext, base_url: str, unit_tests: Ordere if test["expect"]["url"] not in page.url: test_result["status"] = "failed" test_result["error"] = f"URL mismatch: expected {test['expect']['url']}, got {page.url}" - + raise Exception(test_result["error"]) elif "selectorVisible" in test["expect"]: - if not page.locator(test["expect"]["selectorVisible"]).is_visible(): + if not page.locator(test["expect"]["selectorVisible"]).is_visible(timeout=timeout): test_result["status"] = "failed" test_result["error"] = f"Selector not visible: {test['expect']['selectorVisible']}" raise Exception(test_result["error"]) diff --git a/core/llm_constants.py b/core/llm_constants.py index 7479ec1..be13b63 100644 --- a/core/llm_constants.py +++ b/core/llm_constants.py @@ -70,23 +70,21 @@ """ system_prompt = f""" -You are a UI QA engineer. Given HTML, you return JSON test cases for the page. +You are a UI QA engineer. Given HTML, you return JSON test cases for the page. You might also receive feedback along with this original prompt with a testcase that needs to be fixed. Each test case includes: - name: a string describing the test - steps: a list of actions. Each action has: - - action: one of ["click", "type", "assertVisible", "assertURL", "navigate"] + - action: one of ["click", "type", "navigate"] - selector: a CSS selector - - value: optional (only for "type" and "navigate" actions) -- expect: the final expected outcome (either `url` or a `selectorVisible`) + - value: optional (only for "type" and "navigate" actions). You MUST provide a value for "type" and "navigate" actions. +- expect: the final expected outcome (either `url` or a `selectorVisible`). The url can be a base path excluding parameters. IMPORTANT: Use EXACTLY these action values: - "click" for clicking elements - "type" for typing into input fields -- "assertVisible" for checking if elements are visible -- "assertURL" for checking URL changes - "navigate" for direct navigation Here are two examples: {examples} \n -""" +""" \ No newline at end of file diff --git a/core/models.py b/core/models.py new file mode 100644 index 0000000..3c90479 --- /dev/null +++ b/core/models.py @@ -0,0 +1,86 @@ +from enum import Enum +from typing import Optional, List +from pydantic import BaseModel, Field, model_validator + +# for testing specific actions via Pydantic +class ActionType(str, Enum): + """Valid Playwright actions""" + CLICK = "click" + TYPE = "type" + NAVIGATE = "navigate" + # add new actions here, update properties below if needed... + + @property + def requires_selector(self) -> bool: + """Actions that need a CSS selector""" + return self in {self.CLICK, self.TYPE} + + @property + def requires_value(self) -> bool: + """Actions that need a value (text to type, URL to navigate to, etc.)""" + return self in {self.TYPE, self.NAVIGATE} + + +class Expect(BaseModel): + """Expected outcome of a test.""" + url: Optional[str] = Field( + default=None, description="Expected URL after the test steps") + selectorVisible: Optional[str] = Field( + default=None, description="CSS selector that should be visible after the test steps") + + +class PlaywrightCommand(BaseModel): + """A single Playwright command to run in the test.""" + action: ActionType = Field( + ..., description="The Playwright action, e.g. 'click', 'type', 'navigate'") + selector: Optional[str] = Field( + default=None, description="The CSS/XPath selector or test id to target (if applicable)") + value: Optional[str] = Field( + default=None, description="Value to input (if applicable), e.g. text to type into a field") + + @model_validator(mode='after') # need to use model_validator for pydantic v2 + def validate_command_requirements(self): + """Validate command has required fields for its action type.""" + + # for debugging what the action type actually is + # print(f"self.action type: {type(self.action)}") + # print(f"self.action value: {self.action}") + + if not isinstance(self.action, ActionType): + raise ValueError(f"Invalid action type: {type(self.action).__name__}") + + # Check if action needs a selector but doesn't have one + if self.action.requires_selector and not self.selector: + raise ValueError(f"{self.action.value} needs a selector") + + # Check if action needs a value but doesn't have one + if self.action.requires_value and self.value is None: + raise ValueError(f"{self.action.value} needs a value") + + return self + + +class PlaywrightTest(BaseModel): + """A structured test plan for Playwright.""" + description: str = Field(..., + description="Brief description of what the test is verifying") + steps: List[PlaywrightCommand] = Field( + ..., description="Ordered list of Playwright commands to execute in sequence") + expect: Expect = Field( + ..., description="Expected outcome of the test, e.g. URL change, element visibility") + + +class UnitTests(BaseModel): + """All generated tests for a page.""" + description: str = Field(..., + description="Brief description of what these tests are verifying") + tests: List[PlaywrightTest] = Field(..., description="List of tests to run") + + +class PageNode(): + """Represents a single page in the test plan.""" + + def __init__(self, name: str, config: dict): + self.name = name + self.config = config + self.tests = [] \ No newline at end of file diff --git a/core/planner.py b/core/planner.py index d7aada4..b3a62f8 100644 --- a/core/planner.py +++ b/core/planner.py @@ -1,112 +1,18 @@ import json import os +from typing import List from urllib.parse import urljoin -from pydantic import BaseModel, Field, model_validator +import playwright from .llm_constants import system_prompt +from .models import (ActionType, PlaywrightTest, + UnitTests, PageNode) from langchain.chat_models import init_chat_model from langchain_core.prompts import ChatPromptTemplate +from langchain.memory import ConversationBufferMemory from playwright.sync_api import BrowserContext from collections import OrderedDict -from enum import Enum -from typing import Optional, List -import logging - - -# Configure logging -logging.basicConfig( - filename="app.log", # Log file name - level=logging.INFO, # Minimum log level - format="%(asctime)s - %(levelname)s - %(message)s", # Log format - filemode='w' -) - -# for testing specific actions via Pydantic - - -class ActionType(str, Enum): - """Valid Playwright actions""" - CLICK = "click" - TYPE = "type" - ASSERT_VISIBLE = "assertVisible" - ASSERT_URL = "assertURL" - NAVIGATE = "navigate" - # add new actions here, update properties below if needed... - - @property - def requires_selector(self) -> bool: - """Actions that need a CSS selector""" - return self in {self.CLICK, self.TYPE, self.ASSERT_VISIBLE} - - @property - def requires_value(self) -> bool: - """Actions that need a value (text to type, URL to navigate to, etc.)""" - return self in {self.TYPE, self.NAVIGATE} - - -class Expect(BaseModel): - """Expected outcome of a test.""" - url: Optional[str] = Field( - default=None, description="Expected URL after the test steps") - selectorVisible: Optional[str] = Field( - default=None, description="CSS selector that should be visible after the test steps") - - -class PlaywrightCommand(BaseModel): - """A single Playwright command to run in the test.""" - action: ActionType = Field( - ..., description="The Playwright action, e.g. 'click', 'fill', 'goto', 'assert', 'navigate'") - selector: Optional[str] = Field( - default=None, description="The CSS/XPath selector or test id to target (if applicable)") - value: Optional[str] = Field( - default=None, description="Value to input (if applicable), e.g. text to type into a field") - - @model_validator(mode='after') # need to use model_validator for pydantic v2 - def validate_command_requirements(self): - """Validate command has required fields for its action type.""" - - # for debugging what the action type actually is - # print(f"self.action type: {type(self.action)}") - # print(f"self.action value: {self.action}") - - if not isinstance(self.action, ActionType): - raise ValueError(f"Invalid action type: {type(self.action).__name__}") - - # Check if action needs a selector but doesn't have one - if self.action.requires_selector and not self.selector: - raise ValueError(f"{self.action.value} needs a selector") - - # Check if action needs a value but doesn't have one - if self.action.requires_value and not self.value: - raise ValueError(f"{self.action.value} needs a value") - - return self - - -class PlaywrightTest(BaseModel): - """A structured test plan for Playwright.""" - description: str = Field(..., - description="Brief description of what the test is verifying") - steps: List[PlaywrightCommand] = Field( - ..., description="Ordered list of Playwright commands to execute in sequence") - expect: Expect = Field( - ..., description="Expected outcome of the test, e.g. URL change, element visibility") - - -class UnitTests(BaseModel): - """All generated tests for a page.""" - description: str = Field(..., - description="Brief description of what these tests are verifying") - tests: List[PlaywrightTest] = Field(..., description="List of tests to run") - - -class PageNode(): - """Represents a single page in the test plan.""" - - def __init__(self, name: str, config: dict): - self.name = name - self.config = config - self.tests = [] +import logging def parse_config(config: dict) -> List[PageNode]: """Parse the configuration dictionary into a list of PageNode objects. We want to top sort the pages based on dependencies.""" @@ -131,7 +37,7 @@ def visit(node: PageNode): if page.name not in visited: visit(page) - logging.info(f"Sorted pages: {[p.name for p in sorted_pages]}") + logging.info(f"Page testing order: {[p.name for p in sorted_pages]}") return sorted_pages @@ -242,6 +148,68 @@ def get_authenticated_browser_context(sync_context: BrowserContext, base_url: st return login_page +def is_valid_typing_target(elem) -> bool: + tag = elem.evaluate("el => el.tagName.toLowerCase()") + return tag in ["input", "textarea"] + + +def validate_test_and_retry(auth_page, page_name: str, tests: dict, chat_model_instance, original_prompt, input, page_context, max_retries=2) -> dict: + # Check each test and its steps, retrying generation if needed + validated_test = [] + for i, test in enumerate(tests): + curr_test = test + logging.info(f"validate_test_and_retry(): Initial test for page '{page_name}': {json.dumps(curr_test, indent=2)}") + + for retry in range(max_retries+1): + require_retry, errorList = False, [] + for step in curr_test.get('steps', []): + actionType = step['action'] # convert enum back to string + selector = step.get('selector', '') + if not selector: + continue # No selector to validate for this action + logging.info(f"Validating selector '{selector}' for action '{actionType}' on page '{page_name}'") + try: + # if this times out, we assume the selector is not present + elem_handle = auth_page.wait_for_selector(selector, state='attached',timeout=2000) + + if actionType == ActionType.TYPE: + # ensure the selector is an input or textarea for typing + if not is_valid_typing_target(elem_handle): + raise ValueError(f"Selector '{selector}' is not an input or textarea for typing.") + except Exception as e: + require_retry = True + errorList.append(f"Selector '{selector}' validation error: {str(e)}") + logging.warning(f"Selector '{selector}' validation error on page '{page_name}': {e}") + if require_retry: + curr_test['disabled'] = True # disable the test if we need to retry + else: + curr_test['disabled'] = False + break # all selectors valid, no need to retry + if retry >= max_retries: + logging.error(f"Max retries reached for test on page '{page_name}'. Disabling test.") + break + logging.info(f"Retrying test generation for page '{page_name}' (attempt {retry+1}/{max_retries})") + # Regenerate the test + feedback_prompt = original_prompt + ("\nFEEDBACK: {feedback}\nHere is the original generated test that needs to be fixed: {test}" + "\nPlease fix the test to address the feedback and ensure all selectors are valid on the page.\n") + prompt = ChatPromptTemplate.from_messages( + [("system", feedback_prompt), ("human", "{input}")]) + + # this chat model instance is set up to return a PlaywrightTest structured output instead of UnitTests + few_shot_structured_llm = prompt | chat_model_instance + + response = few_shot_structured_llm.invoke( + {'input': input, 'page_context': page_context, 'feedback': errorList, 'test': json.dumps(curr_test, indent=2)}) + + result = response.model_dump(mode="python", exclude_none=True) + # new test that we will retry in the next loop iteration + curr_test = result + if curr_test.get("disabled", False): + logging.info(f"Unable to fix test for '{page_name}'") + validated_test.append(curr_test) + + return validated_test + def generate_test_plan(sync_context: BrowserContext, base_url: str, config=None): unit_tests = OrderedDict() @@ -252,7 +220,9 @@ def generate_test_plan(sync_context: BrowserContext, base_url: str, config=None) structured_llm = init_chat_model( "gemini-2.5-flash", model_provider="google_genai").with_structured_output(UnitTests) - + retry_llm = init_chat_model( + "gemini-2.5-flash", model_provider="google_genai").with_structured_output(PlaywrightTest) + config = config or {} max_tests = config.get('max_tests', 2) pages_config = config.get('pages', {}) @@ -293,9 +263,6 @@ def generate_test_plan(sync_context: BrowserContext, base_url: str, config=None) custom_prompt += f"\n\nPlease have a maximum of {max_tests} test cases" - config_prompt_text = "\n\n The page configuration is {config}" - custom_prompt += config_prompt_text - prompt = ChatPromptTemplate.from_messages( [("system", custom_prompt), ("human", "{input}")]) few_shot_structured_llm = prompt | structured_llm @@ -305,18 +272,20 @@ def generate_test_plan(sync_context: BrowserContext, base_url: str, config=None) html = auth_page.content() response = few_shot_structured_llm.invoke( - {'input': html, 'config': json.dumps(page_node.config), 'page_context': page_context}) + {'input': html, 'page_context': page_context}) result = response.model_dump(mode="python", exclude_none=True) - - # Updated: convert "name" back to "description" for execute.py compatibility - for test in result.get('tests', []): - if 'name' in test: - test['description'] = test.pop('name') - + + # We check if the selector exists on the page - if not, we retry test generation. If all fails, we disable the test. + validated_tests = validate_test_and_retry(auth_page, page_node.name, result.get("tests", []), retry_llm, custom_prompt, html, page_context) + + result['tests'] = validated_tests + logging.info(f"Generated test for page {page_node.name}: {json.dumps(validated_tests, indent=2)}") # Limit number of tests if configured if 'tests' in result and len(result['tests']) > max_tests: result['tests'] = result['tests'][:max_tests] + + unit_tests[page_node.name] = result diff --git a/main.py b/main.py index 8de5352..32fc0ce 100644 --- a/main.py +++ b/main.py @@ -1,6 +1,8 @@ import argparse import json +import logging import os +import shutil from dotenv import load_dotenv import yaml from pathlib import Path @@ -22,7 +24,6 @@ def create_github_summary(results): "total_tests": len(results.get("tests", [])), "tests_passed": sum(1 for test in results.get("tests", []) if test.get("status") == "passed"), "tests_failed": sum(1 for test in results.get("tests", []) if test.get("status") == "failed"), - "ui_coverage": 85, # Placeholder - could be calculated based on elements tested "failed_tests": [ { "name": test.get("description", "Unknown test"), @@ -74,6 +75,18 @@ def main(): 'headless': args.headless.lower() == 'true' } + if os.path.exists("results"): + shutil.rmtree("results") # remove old directory and contents + os.makedirs("results", exist_ok=True) + + # Configure logging + logging.basicConfig( + filename="./results/app.log", # Log file name + level=logging.INFO, # Minimum log level + format="%(asctime)s - %(levelname)s - %(message)s", # Log format + filemode='w' + ) + with sync_playwright() as p: headless = test_config['headless'] if test_config['browser'].lower() == 'firefox': diff --git a/tests/test_action_type.py b/tests/test_action_type.py index 4a4873d..a5f29c3 100644 --- a/tests/test_action_type.py +++ b/tests/test_action_type.py @@ -1,13 +1,12 @@ import pytest from pydantic import ValidationError -from core.planner import PlaywrightCommand, PlaywrightTest +from core.models import PlaywrightCommand, PlaywrightTest @pytest.fixture def valid_commands(): return [ {"action": "click", "selector": "#button"}, {"action": "type", "selector": "#input", "value": "hello"}, - {"action": "assertVisible", "selector": ".msg"}, {"action": "navigate", "value": "/page"}, ] @@ -18,7 +17,6 @@ def invalid_commands(): {"action": "click"}, # missing selector {"action": "type", "selector": "#input"}, # missing value {"action": "navigate"}, # missing value - {"action": "assertVisible"}, # missing selector ] @pytest.fixture diff --git a/tests/test_config.yml b/tests/test_config.yml index fe87eb1..fd857ee 100644 --- a/tests/test_config.yml +++ b/tests/test_config.yml @@ -1,4 +1,4 @@ -/page: +/: Buttons: true Links: true Input_fields: true diff --git a/tests/test_planner.py b/tests/test_planner.py index a54e0a2..4ec625d 100644 --- a/tests/test_planner.py +++ b/tests/test_planner.py @@ -33,10 +33,7 @@ def test_generate_test_plan_structure(mock_browser_context, sample_config): page_node_mock = MagicMock() page_node_mock.name = "login.html" page_node_mock.config = sample_config['pages']['login.html'] - - mock_llm_response.model_dump = MagicMock(return_value={ - "description": "Test page analysis", - "tests": [ + tests = [ { "description": "Test login form", "steps": [ @@ -47,6 +44,9 @@ def test_generate_test_plan_structure(mock_browser_context, sample_config): "expect": {"selectorVisible": "#content"} } ] + mock_llm_response.model_dump = MagicMock(return_value={ + "description": "Test page analysis", + "tests": tests.copy() }) chat_mock = MagicMock(name="chat") @@ -57,7 +57,8 @@ def test_generate_test_plan_structure(mock_browser_context, sample_config): with patch("testagent.core.planner.get_authenticated_browser_context", return_value="login.html"), \ patch("testagent.core.planner.parse_config", return_value=[page_node_mock]), \ - patch("testagent.core.planner.ChatPromptTemplate", chat_mock): + patch("testagent.core.planner.ChatPromptTemplate", chat_mock), \ + patch("testagent.core.planner.validate_test_and_retry", return_value=tests.copy()): result = generate_test_plan(mock_browser_context, "http://example.com", config=sample_config)