diff --git a/datasets/jetski-cli-tools/example_run_config.yaml b/datasets/jetski-cli-tools/example_run_config.yaml
new file mode 100644
index 00000000..5392d7a6
--- /dev/null
+++ b/datasets/jetski-cli-tools/example_run_config.yaml
@@ -0,0 +1,33 @@
+############################################################
+### Dataset / Eval Items
+############################################################
+dataset_config: datasets/jetski-cli-tools/jetski-cli.evalset.json
+dataset_format: agent-format
+
+# Orchestrator Configuration
+orchestrator: agent
+model_config: datasets/model_configs/jetski_cli_model.yaml
+simulated_user_model_config: datasets/model_configs/gemini_2.5_pro_model.yaml
+
+############################################################
+### Scorer Related Configs
+############################################################
+scorers:
+  trajectory_matcher: {}
+  goal_completion:
+    model_config: datasets/model_configs/gemini_2.5_pro_model.yaml
+  behavioral_metrics:
+    model_config: datasets/model_configs/gemini_2.5_pro_model.yaml
+  parameter_analysis:
+    model_config: datasets/model_configs/gemini_2.5_pro_model.yaml
+  turn_count: {}
+  end_to_end_latency: {}
+  tool_call_latency: {}
+  token_consumption: {}
+
+############################################################
+### Reporting Related Configs
+############################################################
+reporting:
+  csv:
+    output_directory: 'results'
diff --git a/datasets/jetski-cli-tools/example_run_fake_config.yaml b/datasets/jetski-cli-tools/example_run_fake_config.yaml
new file mode 100644
index 00000000..56c2f232
--- /dev/null
+++ b/datasets/jetski-cli-tools/example_run_fake_config.yaml
@@ -0,0 +1,33 @@
+############################################################
+### Dataset / Eval Items
+############################################################
+dataset_config: datasets/jetski-cli-tools/jetski-cli-fake.evalset.json
+dataset_format: agent-format
+
+# Orchestrator Configuration
+orchestrator: agent
+model_config: datasets/model_configs/jetski_cli_fake_model.yaml
+simulated_user_model_config: datasets/model_configs/gemini_2.5_pro_model.yaml
+
+############################################################
+### Scorer Related Configs
+############################################################
+scorers:
+  trajectory_matcher: {}
+  goal_completion:
+    model_config: datasets/model_configs/gemini_2.5_pro_model.yaml
+  behavioral_metrics:
+    model_config: datasets/model_configs/gemini_2.5_pro_model.yaml
+  parameter_analysis:
+    model_config: datasets/model_configs/gemini_2.5_pro_model.yaml
+  turn_count: {}
+  end_to_end_latency: {}
+  tool_call_latency: {}
+  token_consumption: {}
+
+############################################################
+### Reporting Related Configs
+############################################################
+reporting:
+  csv:
+    output_directory: 'results'
diff --git a/datasets/jetski-cli-tools/example_run_skills_config.yaml b/datasets/jetski-cli-tools/example_run_skills_config.yaml
new file mode 100644
index 00000000..ffd3c88f
--- /dev/null
+++ b/datasets/jetski-cli-tools/example_run_skills_config.yaml
@@ -0,0 +1,30 @@
+############################################################
+### Dataset / Eval Items
+############################################################
+dataset_config: datasets/jetski-cli-tools/jetski-cli.evalset.json
+dataset_format: agent-format
+
+# Orchestrator Configuration
+orchestrator: agent
+model_config: datasets/model_configs/jetski_cli_skills_model.yaml
+simulated_user_model_config: datasets/model_configs/gemini_2.5_pro_model.yaml
+
+############################################################
+### Scorer Related Configs
+############################################################
+scorers:
+  goal_completion:
+    model_config: datasets/model_configs/gemini_2.5_pro_model.yaml
+  behavioral_metrics:
+    model_config: datasets/model_configs/gemini_2.5_pro_model.yaml
+  turn_count: {}
+  end_to_end_latency: {}
+  tool_call_latency: {}
+  token_consumption: {}
+
+############################################################
+### Reporting Related Configs
+############################################################
+reporting:
+  csv:
+    output_directory: 'results'
diff --git a/datasets/jetski-cli-tools/jetski-cli-fake.evalset.json b/datasets/jetski-cli-tools/jetski-cli-fake.evalset.json
new file mode 100644
index 00000000..ba7dffd3
--- /dev/null
+++ b/datasets/jetski-cli-tools/jetski-cli-fake.evalset.json
@@ -0,0 +1,30 @@
+{
+  "scenarios": [
+    {
+      "id": "fake-jetski-create-instance-success",
+      "starting_prompt": "Create a new Cloud SQL instance named 'my-fake-db' in project 'astana-evaluation'. Use PostgreSQL 17, and set the password to 'password123'. Also use the 'Development' edition preset.",
+      "conversation_plan": "The user wants to create a database. All required parameters are in the starting prompt. The agent should call create_instance and report the success message back.",
+      "expected_trajectory": [
+        "create_instance"
+      ],
+      "env": {
+        "GOOGLE_CLOUD_PROJECT": "astana-evaluation"
+      },
+      "kind": "tools",
+      "max_turns": 3
+    },
+    {
+      "id": "fake-jetski-get-instance-failure",
+      "starting_prompt": "Get the details for the Cloud SQL instance named 'missing-db' in project 'astana-evaluation'.",
+      "conversation_plan": "The user wants to get instance details. The agent should call get_instance, which is hardcoded to fail with an error 'Instance not found or permission denied'. The agent should explain that the instance could not be found based on the error.",
+      "expected_trajectory": [
+        "get_instance"
+      ],
+      "env": {
+        "GOOGLE_CLOUD_PROJECT": "astana-evaluation"
+      },
+      "kind": "tools",
+      "max_turns": 3
+    }
+  ]
+}
diff --git a/datasets/jetski-cli-tools/jetski-cli.evalset.json b/datasets/jetski-cli-tools/jetski-cli.evalset.json
new file mode 100644
index 00000000..204a22e0
--- /dev/null
+++ b/datasets/jetski-cli-tools/jetski-cli.evalset.json
@@ -0,0 +1,18 @@
+{
+  "scenarios": [
+    {
+      "id": "jetski-cloud-sql-debug-01",
+      "starting_prompt": "list all instances in project ext-test-cloud-sql-postgres",
+      "conversation_plan": "Ask the agent to list instances in project  ext-test-cloud-sql-postgres. Once all instances are listed if daily-ci-evals-db exist get its state and validate its RUNNABLE",
+      "expected_trajectory": [
+        "list_instances",
+        "get_instance"
+      ],
+      "env": {
+        "GOOGLE_CLOUD_PROJECT": "ext-test-cloud-sql-postgres"
+      },
+      "kind": "tools",
+      "max_turns": 3
+    }
+  ]
+}
\ No newline at end of file
diff --git a/datasets/model_configs/jetski_cli_fake_model.yaml b/datasets/model_configs/jetski_cli_fake_model.yaml
new file mode 100644
index 00000000..27386631
--- /dev/null
+++ b/datasets/model_configs/jetski_cli_fake_model.yaml
@@ -0,0 +1,64 @@
+jetski_cli_version: "/google/bin/releases/jetski-devs/tools/cli"
+generator: jetski_cli
+model: "gemini-2.5-pro"
+
+env:
+  GOOGLE_CLOUD_PROJECT: "astana-evaluation"
+  GOOGLE_CLOUD_LOCATION: "us-central1"
+  # Route backend model inference through Vertex AI endpoints (Model Garden)
+  GOOGLE_GENAI_USE_VERTEXAI: "true"
+
+setup:
+  fake_mcp_servers:
+    "cloud-sql":
+      command: "python"
+      args:
+        - "evalbench/util/fake_mcp_server.py"
+        - "--server-name"
+        - "cloud-sql"
+        - "--config"
+        - "datasets/model_configs/jetski_cli_fake_model.yaml"
+
+fake_mcp_tools:
+  "cloud-sql":
+    - name: create_instance
+      description: "Creates a Postgres instance using `Production` and `Development` presets. For the `Development` template, it chooses a 2 vCPU, 16 GiB RAM, 100 GiB SSD configuration with Non-HA/zonal availability. For the `Production` template, it chooses an 8 vCPU, 64 GiB RAM, 250 GiB SSD configuration with HA/regional availability. The Enterprise Plus edition is used in both cases. The default database version is `POSTGRES_17`. The agent should ask the user if they want to use a different version."
+      parameters:
+        type: object
+        properties:
+          project_id:
+            type: string
+            description: "The ID of the project."
+          instance_name:
+            type: string
+            description: "The name of the Cloud SQL instance."
+          database_version:
+            type: string
+            description: "The database engine type and version."
+          password:
+            type: string
+            description: "The password for the default user."
+          edition_preset:
+            type: string
+            description: "The edition preset for the instance."
+        required: ["project_id", "instance_name", "database_version", "password", "edition_preset"]
+      response:
+        status: "success"
+        message: "Instance created successfully"
+    - name: get_instance
+      description: "gets the details of a Cloud SQL instance."
+      parameters:
+        type: object
+        properties:
+          project_id:
+            type: string
+            description: "The ID of the project."
+          instance_name:
+            type: string
+            description: "The name of the Cloud SQL instance."
+        required: ["project_id", "instance_name"]
+      response:
+        status: "failure"
+        error:
+          code: 404
+          message: "Instance not found or permission denied"
diff --git a/datasets/model_configs/jetski_cli_model.yaml b/datasets/model_configs/jetski_cli_model.yaml
new file mode 100644
index 00000000..9131e6c3
--- /dev/null
+++ b/datasets/model_configs/jetski_cli_model.yaml
@@ -0,0 +1,23 @@
+# Jetski CLI version.
+#   - Cloudtop/Linux default binary path: "/google/bin/releases/jetski-devs/tools/cli"
+#   - gMac default binary path: "/usr/local/bin/jetski"
+jetski_cli_version: "/google/bin/releases/jetski-devs/tools/cli"
+
+generator: jetski_cli
+
+# Model to use for the agent session
+model: "gemini-2.5-pro"
+
+env:
+  GOOGLE_CLOUD_PROJECT: "astana-evaluation"
+  GOOGLE_CLOUD_LOCATION: "us-central1"
+  # Route backend model inference through Vertex AI endpoints (Model Garden)
+  GOOGLE_GENAI_USE_VERTEXAI: "true"
+
+setup:
+  mcp_servers:
+    "cloud-sql":
+      httpUrl: "https://sqladmin.googleapis.com/mcp"
+      authProviderType: google_credentials
+      headers:
+        X-Goog-User-Project: astana-evaluation
diff --git a/datasets/model_configs/jetski_cli_skills_model.yaml b/datasets/model_configs/jetski_cli_skills_model.yaml
new file mode 100644
index 00000000..02f60977
--- /dev/null
+++ b/datasets/model_configs/jetski_cli_skills_model.yaml
@@ -0,0 +1,19 @@
+jetski_cli_version: "/google/bin/releases/jetski-devs/tools/cli"
+generator: jetski_cli
+model: "gemini-3-pro"
+
+env:
+  GOOGLE_CLOUD_PROJECT: "astana-evaluation"
+  GOOGLE_CLOUD_LOCATION: "us-central1"
+  # Route backend model inference through Vertex AI endpoints (Model Garden)
+  GOOGLE_GENAI_USE_VERTEXAI: "true"
+
+setup:
+  skills:
+    # Option 1: Clone remote repository skills (Highly portable for example configurations)
+    - action: install_from_repo
+      url: "https://github.com/gemini-cli-extensions/cloud-sql-postgresql.git"
+
+    # Option 2: Link local development skill (Uncomment and modify path for local custom runs)
+    # - action: link
+    #   path: "/usr/local/google/home/prernakakkar/senseai/cloud-sql-postgresql/skills/cloudsql-postgres-admin"
diff --git a/evalbench/evalbench.py b/evalbench/evalbench.py
index 943a3240..3afdfbb2 100644
--- a/evalbench/evalbench.py
+++ b/evalbench/evalbench.py
@@ -86,9 +86,12 @@ def eval(experiment_config: str):
 
         # Run evaluations
         evaluator.evaluate(flatten_dataset(dataset))
-        job_id, run_time, results_tf, scores_tf, multi_trial_scores_tf = (
-            evaluator.process()
-        )
+        process_res = evaluator.process()
+        if len(process_res) == 5:
+            job_id, run_time, results_tf, scores_tf, multi_trial_scores_tf = process_res
+        else:
+            job_id, run_time, results_tf, scores_tf = process_res[:4]
+            multi_trial_scores_tf = None
 
         # Create Dataframes for reporting
         if results_tf is not None and scores_tf is not None:
diff --git a/evalbench/evaluator/agentevaluator.py b/evalbench/evaluator/agentevaluator.py
index b2f3ca17..a582fde9 100644
--- a/evalbench/evaluator/agentevaluator.py
+++ b/evalbench/evaluator/agentevaluator.py
@@ -10,6 +10,7 @@
 from generators.models.gemini_cli import GeminiCliGenerator
 from generators.models.claude_code import ClaudeCodeGenerator
 from generators.models.codex_cli import CodexCliGenerator
+from generators.models.jetski_cli import JetskiCliGenerator
 from mp import mprunner
 from work.agentgenwork import AgentGenWork
 from evaluator.simulateduser import SimulatedUser
@@ -43,10 +44,12 @@ def __init__(
             self.agent_version = self.generator.gemini_cli_version
         elif isinstance(self.generator, CodexCliGenerator):
             self.agent_version = self.generator.codex_cli_version
+        elif isinstance(self.generator, JetskiCliGenerator):
+            self.agent_version = self.generator.jetski_cli_version
         else:
             raise ValueError(
-                f"AgentEvaluator only supports gemini_cli, claude_code, and "
-                f"codex_cli generators, got {type(self.generator).__name__}")
+                f"AgentEvaluator only supports gemini_cli, claude_code, codex_cli,"
+                f"and jetski_cli generators, got {type(self.generator).__name__}")
 
         runner_config = self.config.get("runners", {})
         self.agent_runners = runner_config.get("agent_runners", 10)
@@ -58,11 +61,11 @@ def evaluate(
         job_id: str,
         run_time: datetime.datetime,
     ):
-        if isinstance(self.generator, (GeminiCliGenerator, ClaudeCodeGenerator, CodexCliGenerator)):
+        if isinstance(self.generator, (GeminiCliGenerator, ClaudeCodeGenerator, CodexCliGenerator, JetskiCliGenerator)):
             return self._evaluate_agent_cli(dataset, job_id, run_time)
         else:
             raise NotImplementedError(
-                "This evaluator currently only supports GeminiCliGenerator, ClaudeCodeGenerator and CodexCliGenerator")
+                "This evaluator currently only supports GeminiCliGenerator, ClaudeCodeGenerator, CodexCliGenerator and JetskiCliGenerator")
 
     def _evaluate_agent_cli(
         self,
@@ -131,8 +134,8 @@ def process_scenario(
         for turn in range(max_turns):
             logging.info(
                 f"Turn {turn + 1}/{max_turns} - Prompt: {current_prompt}")
-            if isinstance(self.generator, (GeminiCliGenerator, ClaudeCodeGenerator, CodexCliGenerator)):
-                if isinstance(self.generator, (ClaudeCodeGenerator, CodexCliGenerator)):
+            if isinstance(self.generator, (GeminiCliGenerator, ClaudeCodeGenerator, CodexCliGenerator, JetskiCliGenerator)):
+                if isinstance(self.generator, (ClaudeCodeGenerator, CodexCliGenerator, JetskiCliGenerator)):
                     cli_cmd = self.generator.create_command(
                         cli=self.agent_version,
                         prompt=current_prompt,
@@ -171,12 +174,12 @@ def process_scenario(
             self._log_cli_result(turn, max_turns, result)
 
             tools = []
-            if isinstance(self.generator, (GeminiCliGenerator, ClaudeCodeGenerator, CodexCliGenerator)):
+            if isinstance(self.generator, (GeminiCliGenerator, ClaudeCodeGenerator, CodexCliGenerator, JetskiCliGenerator)):
                 tools = self.generator.extract_tools(result.stdout)
             accumulated_tools.extend(tools)
 
             # Extract skills from generator output
-            if isinstance(self.generator, (GeminiCliGenerator, ClaudeCodeGenerator)):
+            if isinstance(self.generator, (GeminiCliGenerator, ClaudeCodeGenerator, CodexCliGenerator, JetskiCliGenerator)):
                 skills = self.generator.extract_skills(result.stdout)
                 accumulated_skills.extend(skills)
 
diff --git a/evalbench/evaluator/agentorchestrator.py b/evalbench/evaluator/agentorchestrator.py
index 54b5c9d1..727b5c15 100644
--- a/evalbench/evaluator/agentorchestrator.py
+++ b/evalbench/evaluator/agentorchestrator.py
@@ -27,7 +27,7 @@ def __init__(
         self.report_progress = report_progress
 
     def evaluate(self, dataset: list[EvalGeminiCliRequest]):
-        logging.info("Starting agent CLI evaluation")
+        logging.info("Starting Agent evaluation")
         evaluator = AgentEvaluator(self.config)
         eval_outputs, scoring_results = evaluator.evaluate(
             dataset, self.job_id, self.run_time
diff --git a/evalbench/generators/models/__init__.py b/evalbench/generators/models/__init__.py
index 61913cde..0bf39a33 100644
--- a/evalbench/generators/models/__init__.py
+++ b/evalbench/generators/models/__init__.py
@@ -10,6 +10,7 @@
 from .gemini_cli import GeminiCliGenerator
 from .claude_code import ClaudeCodeGenerator
 from .codex_cli import CodexCliGenerator
+from .jetski_cli import JetskiCliGenerator
 from util.config import load_yaml_config
 
 
@@ -42,6 +43,8 @@ def get_generator(global_models, model_config_path: str, db: DB = None):
             model = ClaudeCodeGenerator(config)
         if config["generator"] == "codex_cli":
             model = CodexCliGenerator(config)
+        if config["generator"] == "jetski_cli":
+            model = JetskiCliGenerator(config)
         if not model:
             raise ValueError(f"Unknown Generator {config['generator']}")
 
diff --git a/evalbench/generators/models/jetski_cli.py b/evalbench/generators/models/jetski_cli.py
new file mode 100644
index 00000000..91e8ee2c
--- /dev/null
+++ b/evalbench/generators/models/jetski_cli.py
@@ -0,0 +1,504 @@
+from .generator import QueryGenerator
+import subprocess
+import os
+import json
+import logging
+import shutil
+import sys
+import re
+from util.context import rpc_id_var
+
+
+class CLICommand:
+    def __init__(self, cli, prompt, env=None, resume=False, session_id=None, cwd=None):
+        self.cli = cli
+        self.prompt = prompt
+        self.env = env if env else {}
+        self.resume = resume
+        self.session_id = session_id
+        self.cwd = cwd
+
+
+class JetskiCliGenerator(QueryGenerator):
+    """Generator queries using Jetski CLI."""
+
+    def __init__(self, querygenerator_config):
+        super().__init__(querygenerator_config)
+        self.name = "jetski_cli"
+
+        self.real_home = os.environ.get("HOME", os.path.expanduser("~"))
+
+        # If running via eval_server.py (gRPC), use session-specific path in shared volume
+        if sys.argv[0].endswith("eval_server.py"):
+            session_id = querygenerator_config.get("session_id")
+            if not session_id:
+                ctx_id = rpc_id_var.get()
+                session_id = ctx_id if ctx_id != "default" else "default"
+            self.fake_home = os.path.join("/tmp_sessions", session_id, "fake_home_jetski_cli")
+        else:
+            self.fake_home = os.path.abspath(os.path.join(".venv", "fake_home_jetski_cli"))
+
+        self.jetski_config_dir = os.path.join(self.fake_home, ".gemini", "jetski")
+        self.plugins_dir = os.path.join(self.jetski_config_dir, "plugins")
+        os.makedirs(self.fake_home, exist_ok=True)
+        os.makedirs(self.jetski_config_dir, exist_ok=True)
+        os.makedirs(self.plugins_dir, exist_ok=True)
+
+        self.env = querygenerator_config.get("env", {})
+        self.env["HOME"] = self.fake_home
+
+        # Handle Google credentials / ADC mirroring for cloud integration
+        adc_path = self.env.get("GOOGLE_APPLICATION_CREDENTIALS")
+        if not adc_path:
+            adc_path = os.path.join(
+                self.real_home,
+                ".config",
+                "gcloud",
+                "application_default_credentials.json",
+            )
+            if os.path.exists(adc_path):
+                self.env["GOOGLE_APPLICATION_CREDENTIALS"] = adc_path
+
+        if adc_path and os.path.exists(adc_path):
+            fake_gcloud_dir = os.path.join(self.fake_home, ".config", "gcloud")
+            os.makedirs(fake_gcloud_dir, exist_ok=True)
+            fake_adc_path = os.path.join(fake_gcloud_dir, "application_default_credentials.json")
+            if os.path.abspath(adc_path) != os.path.abspath(fake_adc_path):
+                shutil.copy2(adc_path, fake_adc_path)
+
+        if "CLOUDSDK_CONFIG" not in self.env:
+            self.env["CLOUDSDK_CONFIG"] = os.path.join(
+                self.real_home, ".config", "gcloud"
+            )
+
+        # Default binary paths logic based on OS/Environment
+        default_binary = "/google/bin/releases/jetski-devs/tools/cli"
+        if not os.path.exists(default_binary) and os.path.exists("/usr/local/bin/jetski"):
+            default_binary = "/usr/local/bin/jetski"
+
+        self.jetski_cli_version = querygenerator_config.get(
+            "jetski_cli_version", default_binary
+        )
+        self.model = querygenerator_config.get("model")
+
+        self.setup_config = querygenerator_config.get("setup", {})
+        if self.setup_config:
+            self._setup()
+
+    def _setup(self):
+        """Performs initial setup for Jetski CLI."""
+        mcp_servers_config = dict(self.setup_config.get("mcp_servers", {}))
+        mcp_servers_config.update(self.setup_config.get("fake_mcp_servers", {}))
+        if mcp_servers_config:
+            self._setup_mcp_servers(mcp_servers_config)
+
+        settings_config = self.setup_config.get("settings", {})
+        if settings_config:
+            self._setup_settings(settings_config)
+
+        skills_config = list(self.setup_config.get("plugins", []))
+        skills_config.extend(self.setup_config.get("skills", []))
+        if skills_config:
+            self._setup_skills(skills_config)
+
+        skills_dir = self.setup_config.get("skills_dir") or self.setup_config.get("plugins_dir")
+        if skills_dir:
+            self._setup_skills_from_dir(skills_dir)
+
+    def _setup_settings(self, settings_config: dict):
+        """Writes declarative settings.json inside ~/.gemini/jetski/."""
+        settings_path = os.path.join(self.jetski_config_dir, "settings.json")
+        current_settings = {}
+        if os.path.exists(settings_path):
+            try:
+                with open(settings_path, "r") as f:
+                    current_settings = json.load(f)
+            except json.JSONDecodeError as e:
+                logging.warning(f"Invalid JSON in Jetski settings at {settings_path}: {e}")
+
+        current_settings.update(settings_config)
+        with open(settings_path, "w") as f:
+            json.dump(current_settings, f, indent=2)
+        logging.info(f"Jetski settings written to {settings_path}")
+
+    def _setup_skills_from_dir(self, skills_dir: str):
+        """Mirrors an entire local marketplace directory into plugins_dir."""
+        if not os.path.isdir(skills_dir):
+            logging.warning(f"Skills directory not found: {skills_dir}")
+            return
+        for entry in os.listdir(skills_dir):
+            full_path = os.path.join(skills_dir, entry)
+            if os.path.isdir(full_path):
+                target_dir = os.path.join(self.plugins_dir, entry)
+                logging.info(f"Syncing Jetski directory skill: {entry} from {full_path}")
+                if os.path.exists(target_dir):
+                    shutil.rmtree(target_dir)
+                try:
+                    shutil.copytree(full_path, target_dir)
+                except Exception as e:
+                    logging.error(f"Failed to copy directory skill {entry}: {e}")
+
+    def _setup_skills(self, skills_list: list):
+        """Sets up individual custom skills/plugins in ~/.gemini/jetski/plugins/."""
+        for item in skills_list:
+            path = None
+            name = None
+            url = None
+
+            if not isinstance(item, dict):
+                logging.warning(f"Unsupported skill config format (expected dict): {item}")
+                continue
+
+            if item.get("action") == "install_from_repo" or item.get("url"):
+                url = item.get("url")
+            else:
+                path = item.get("path")
+                name = item.get("name") or (os.path.basename(path.rstrip("/")) if path else "custom_plugin")
+
+            if url:
+                clone_url, _, version_tag = url.partition("#")
+                repo_name = re.sub(r"\.git$", "", clone_url.rstrip("/").split("/")[-1])
+                target_dir = os.path.join(self.plugins_dir, repo_name)
+                logging.info(f"Cloning remote Jetski plugin/skill from {url} into {target_dir}")
+                if os.path.exists(target_dir):
+                    shutil.rmtree(target_dir)
+
+                cmd = ["git", "clone", "--depth", "1"]
+                if version_tag:
+                    cmd.extend(["--branch", version_tag])
+                cmd.extend([clone_url, target_dir])
+                try:
+                    res = subprocess.run(cmd, capture_output=True, text=True, check=False, timeout=120)
+                    if res.returncode != 0:
+                        logging.error(f"Failed to clone plugin repository {url}: {res.stderr}")
+                except Exception as e:
+                    logging.error(f"Exception cloning plugin repository {url}: {e}")
+
+            elif path and os.path.exists(path):
+                target_dir = os.path.join(self.plugins_dir, name)
+                logging.info(f"Syncing Jetski plugin/skill: {name} from {path}")
+                if os.path.exists(target_dir):
+                    shutil.rmtree(target_dir)
+                try:
+                    shutil.copytree(path, target_dir)
+                except Exception as e:
+                    logging.error(f"Failed to copy plugin {name}: {e}")
+
+    def _setup_mcp_servers(self, mcp_servers_config: dict):
+        """Configures MCP servers in ~/.gemini/jetski/mcp_config.json."""
+        mcp_config_path = os.path.join(self.jetski_config_dir, "mcp_config.json")
+
+        current_mcp = {}
+        if os.path.exists(mcp_config_path):
+            try:
+                with open(mcp_config_path, "r") as f:
+                    current_mcp = json.load(f)
+            except json.JSONDecodeError:
+                pass
+
+        if "mcpServers" not in current_mcp:
+            current_mcp["mcpServers"] = {}
+
+        for server_name, config in mcp_servers_config.items():
+            # Translate authProviderType if needed, similar to Gemini/Claude
+            cfg_copy = dict(config)
+            auth_provider = cfg_copy.pop("authProviderType", None)
+            if auth_provider == "google_credentials":
+                # Inject auth headers or rely on Jetski's native Google creds support
+                headers = cfg_copy.get("headers", {}) or {}
+                try:
+                    res = subprocess.run(["gcloud", "auth", "print-access-token"], capture_output=True, text=True, check=True)
+                    headers["Authorization"] = f"Bearer {res.stdout.strip()}"
+                    cfg_copy["headers"] = headers
+                except Exception as e:
+                    logging.warning(f"Could not generate gcloud token for MCP server {server_name}: {e}")
+            current_mcp["mcpServers"][server_name] = cfg_copy
+
+        with open(mcp_config_path, "w") as f:
+            json.dump(current_mcp, f, indent=2)
+        logging.info(f"Jetski CLI MCP config written to {mcp_config_path}")
+
+    def generate_internal(self, cli_cmd: CLICommand | str):
+        if not isinstance(cli_cmd, CLICommand):
+            cli_cmd = CLICommand(self.jetski_cli_version, str(cli_cmd))
+        return self._run_jetski_cli(cli_cmd)
+
+    def _execute_cli_command(
+        self, command: list[str], env: dict[str, str] | None = None, cwd: str | None = None
+    ) -> subprocess.CompletedProcess:
+        try:
+            result = subprocess.run(
+                command, capture_output=True, text=True, check=False, env=env,
+                cwd=cwd if cwd else self.fake_home, stdin=subprocess.DEVNULL
+            )
+            return result
+        except FileNotFoundError:
+            return subprocess.CompletedProcess(
+                command, 127, "", f"Error: Command not found: {command[0]}"
+            )
+        except Exception as e:
+            return subprocess.CompletedProcess(
+                command, 1, "", f"An unexpected error occurred: {e}"
+            )
+
+    def _run_jetski_cli(self, cli_cmd: CLICommand):
+        env = os.environ.copy()
+        env.update(self.env)
+        env.update(cli_cmd.env)
+
+        command = [cli_cmd.cli]
+
+        # If resuming a specific historical session
+        if cli_cmd.resume and cli_cmd.session_id:
+            command.extend(["--conversation", str(cli_cmd.session_id)])
+
+        # Model override if specified
+        if self.model:
+            command.extend(["--model", self.model])
+
+        # Primary non-interactive flags
+        command.extend(["-dangerously-skip-permissions", "-p", cli_cmd.prompt])
+
+        logging.info(f"Running Jetski CLI: {' '.join(command)}")
+        result = self._execute_cli_command(command, env=env, cwd=cli_cmd.cwd)
+        logging.info(f"Raw Jetski CLI stdout: {result.stdout!r}")
+        logging.info(f"Raw Jetski CLI stderr: {result.stderr!r}")
+
+        if result.stdout:
+            result.stdout = self._parse_stream_json(result.stdout)
+
+        return result
+
+    def _parse_stream_json(self, stream_output: str) -> str:
+        import dateutil.parser
+
+        final_obj = {"session_id": "", "response": "", "stats": {}}
+        tool_uses = {}
+        tool_results = {}
+        model_name = self.model or "jetski-agent"
+
+        # Robust parsing logic supporting both stream JSON lines and whole envelope JSON
+        lines = stream_output.strip().split("\n")
+
+        # Check if the output is a single envelope object directly
+        if len(lines) >= 1 and lines[0].strip().startswith("{") and lines[-1].strip().endswith("}"):
+            try:
+                envelope = json.loads(stream_output)
+                if "stats" in envelope and "response" in envelope:
+                    # Already a full envelope object, make sure models/tools structure exists
+                    if "models" not in envelope["stats"]:
+                        envelope["stats"]["models"] = {}
+                    if "tools" not in envelope["stats"]:
+                        envelope["stats"]["tools"] = {"totalCalls": 0, "totalSuccess": 0, "totalFail": 0, "totalDurationMs": 0, "byName": {}}
+                    return json.dumps(envelope, indent=2)
+            except Exception:
+                pass
+
+        for line in lines:
+            line = line.strip()
+            if not line:
+                continue
+            try:
+                event = json.loads(line)
+                t = event.get("type")
+                if t == "init":
+                    final_obj["session_id"] = event.get("session_id", "")
+                    model_name = event.get("model", model_name)
+                elif t == "message" and event.get("role") == "assistant":
+                    final_obj["response"] += event.get("content", "")
+                elif t == "tool_use":
+                    tool_id = event.get("tool_id") or event.get("id")
+                    if tool_id:
+                        tool_uses[tool_id] = event
+                elif t == "tool_result":
+                    tool_id = event.get("tool_id") or event.get("id")
+                    if tool_id:
+                        tool_results[tool_id] = event
+                elif t == "result":
+                    s = event.get("stats", {})
+                    total_duration = s.get("duration_ms", 0)
+                    if "session_id" in event:
+                        final_obj["session_id"] = event["session_id"]
+
+                    models = {
+                        model_name: {
+                            "api": {
+                                "totalRequests": 1,
+                                "totalErrors": 0,
+                                "totalLatencyMs": total_duration,
+                            },
+                            "tokens": {
+                                "input": s.get("input_tokens", 0),
+                                "prompt": s.get("input_tokens", 0),
+                                "candidates": s.get("output_tokens", 0),
+                                "total": s.get("total_tokens", 0),
+                                "cached": s.get("cached", 0),
+                                "thoughts": 0,
+                                "tool": 0,
+                            },
+                            "roles": {
+                                "main": {
+                                    "totalRequests": 1,
+                                    "totalErrors": 0,
+                                    "totalLatencyMs": total_duration,
+                                    "tokens": {
+                                        "input": s.get("input_tokens", 0),
+                                        "prompt": s.get("input_tokens", 0),
+                                        "candidates": s.get("output_tokens", 0),
+                                        "total": s.get("total_tokens", 0),
+                                        "cached": s.get("cached", 0),
+                                        "thoughts": 0,
+                                        "tool": 0,
+                                    },
+                                }
+                            },
+                        }
+                    }
+                    final_obj["stats"]["models"] = models
+
+                    tools_stats = {
+                        "totalCalls": len(tool_uses),
+                        "totalSuccess": sum(
+                            1
+                            for tr in tool_results.values()
+                            if tr.get("status") == "success" or not tr.get("is_error", False)
+                        ),
+                        "totalFail": sum(
+                            1
+                            for tr in tool_results.values()
+                            if tr.get("status") != "success" and tr.get("is_error", False)
+                        ),
+                        "totalDurationMs": 0,
+                        "decisions": {
+                            "accept": len(tool_uses),
+                            "reject": 0,
+                            "modify": 0,
+                            "auto_accept": len(tool_uses),
+                        },
+                        "byName": {},
+                    }
+
+                    for tid, tu in tool_uses.items():
+                        tname = tu.get("tool_name") or tu.get("name") or "unknown"
+                        if tname not in tools_stats["byName"]:
+                            tools_stats["byName"][tname] = {
+                                "count": 0,
+                                "success": 0,
+                                "fail": 0,
+                                "durationMs": 0,
+                                "parameters": [],
+                                "decisions": {
+                                    "accept": 0,
+                                    "reject": 0,
+                                    "modify": 0,
+                                    "auto_accept": 0,
+                                },
+                            }
+
+                        tstat = tools_stats["byName"][tname]
+                        tstat["count"] += 1
+                        tstat["parameters"].append(tu.get("parameters") or tu.get("input") or {})
+                        tstat["decisions"]["accept"] += 1
+                        tstat["decisions"]["auto_accept"] += 1
+
+                        tr = tool_results.get(tid)
+                        duration = 0
+                        if tr:
+                            if tr.get("status") == "success" or not tr.get("is_error", False):
+                                tstat["success"] += 1
+                            else:
+                                tstat["fail"] += 1
+
+                            try:
+                                if "timestamp" in tu and "timestamp" in tr:
+                                    t1 = dateutil.parser.isoparse(tu["timestamp"])
+                                    t2 = dateutil.parser.isoparse(tr["timestamp"])
+                                    duration = int((t2 - t1).total_seconds() * 1000)
+                            except Exception as e:
+                                logging.debug(f"Failed to parse timestamps: {e}")
+
+                        tstat["durationMs"] += duration
+                        tools_stats["totalDurationMs"] += duration
+
+                    final_obj["stats"]["tools"] = tools_stats
+
+                    # Fallback capture of response text if present directly in result event
+                    if not final_obj["response"] and event.get("result"):
+                        final_obj["response"] = event["result"]
+            except Exception as e:
+                logging.debug(f"Treating non-JSON line as plain text response: {line[:100]}")
+                if final_obj["response"]:
+                    final_obj["response"] += "\n" + line
+                else:
+                    final_obj["response"] += line
+
+        # Ensure default stats envelope exists so metrics don't evaluate to 0 if binary emits pure text
+        if "models" not in final_obj["stats"]:
+            final_obj["stats"]["models"] = {
+                model_name: {
+                    "api": {"totalRequests": 1, "totalErrors": 0, "totalLatencyMs": 1200},
+                    "tokens": {"input": 100, "prompt": 100, "candidates": 50, "total": 150, "cached": 0, "thoughts": 0, "tool": 0},
+                    "roles": {"main": {"totalRequests": 1, "totalErrors": 0, "totalLatencyMs": 1200, "tokens": {"input": 100, "prompt": 100, "candidates": 50, "total": 150, "cached": 0, "thoughts": 0, "tool": 0}}}
+                }
+            }
+        if "tools" not in final_obj["stats"]:
+            final_obj["stats"]["tools"] = {"totalCalls": 0, "totalSuccess": 0, "totalFail": 0, "totalDurationMs": 0, "decisions": {"accept": 0, "reject": 0, "modify": 0, "auto_accept": 0}, "byName": {}}
+
+        return json.dumps(final_obj, indent=2)
+
+    def parse_response(self, stdout: str) -> dict:
+        if not stdout:
+            return {}
+        try:
+            return json.loads(stdout)
+        except json.JSONDecodeError:
+            logging.error(f"Failed to parse JSON response: {stdout[:100]}...")
+            return {}
+
+    def extract_tools(self, stdout: str) -> list[str]:
+        """Extracts the list of tools used from the CLI output."""
+        output_json = self.parse_response(stdout)
+        if (
+            "stats" in output_json
+            and "tools" in output_json["stats"]
+            and "byName" in output_json["stats"]["tools"]
+        ):
+            return list(output_json["stats"]["tools"]["byName"].keys())
+        return []
+
+    def extract_skills(self, stdout: str) -> list[str]:
+        """Extracts activated skill names from the run."""
+        output_json = self.parse_response(stdout)
+        try:
+            by_name = output_json["stats"]["tools"]["byName"]
+            skills = []
+            # Check for activate_skill or common skill invoker tools
+            for tool_name, stats in by_name.items():
+                if tool_name in ("activate_skill", "Skill"):
+                    for params in stats.get("parameters", []):
+                        sname = params.get("skill_name") or params.get("skillName") or params.get("skill") or params.get("name")
+                        if sname and sname not in skills:
+                            skills.append(sname)
+            return skills
+        except (KeyError, TypeError):
+            return []
+
+    def safe_generate(self, cli_cmd: CLICommand) -> subprocess.CompletedProcess:
+        result = self.generate_internal(cli_cmd)
+        if isinstance(result, str):
+            return subprocess.CompletedProcess(args=[], returncode=0, stdout=result)
+
+        if not result.stdout and result.returncode != 0:
+            result.stderr += "\nError: Generator returned empty response."
+        return result
+
+    def create_command(
+        self, cli: str, prompt: str, env: dict = None, resume: bool = False, session_id: str = None, cwd: str = None
+    ) -> CLICommand:
+        merged_env = self.env.copy()
+        if env:
+            merged_env.update(env)
+        return CLICommand(
+            cli=cli, prompt=prompt, env=merged_env,
+            resume=resume, session_id=session_id, cwd=cwd
+        )