pgEdge · mmols · Apr 16, 2025 · Apr 3, 2025 · Apr 3, 2025 · Apr 14, 2025
diff --git a/build_cli.sh b/build_cli.sh
@@ -0,0 +1,6 @@
+source ./env.sh
+rm -f $OUT/hub-$hubV*
+rm -f $OUT/$bundle-cli-$hubV*
+./build.sh -X posix -c $bundle-cli -N $hubV
+
+exit 0
diff --git a/cli/scripts/ace.py b/cli/scripts/ace.py
@@ -2019,5 +2019,7 @@ def error_listener(event):
             "schema-diff": ace_cli.schema_diff_cli,
             "spock-diff": ace_cli.spock_diff_cli,
             "spock-exception-update": ace_cli.update_spock_exception_cli,
+            "auto-repair": ace_cli.auto_repair_cli,
+            "start": ace_cli.start_cli,
         }
     )
diff --git a/cli/scripts/ace_cli.py b/cli/scripts/ace_cli.py
@@ -30,29 +30,37 @@ def table_diff_cli(
     quiet=False,
 ):
     """
-    Performs a table diff operation on a specified cluster and table.
+    Compare a table across a cluster and produce a report showing 
+    any differences.
 
     Args:
-        cluster_name (str): Name of the cluster to perform the diff on.
-        table_name (str): Name of the table to diff.
-        dbname (str, optional): Name of the database. Defaults to None.
-        block_rows (int, optional): Number of rows per block. Defaults to
-            config.BLOCK_ROWS_DEFAULT.
-        max_cpu_ratio (float, optional): Maximum CPU usage ratio. Defaults to
-            config.MAX_CPU_RATIO_DEFAULT.
-        output (str, optional): Output format. Defaults to "json".
-        nodes (str, optional): Nodes to include in the diff. Defaults to "all".
-        batch_size (int, optional): Size of each batch. Defaults to
+        cluster_name (str): Name of the cluster where the operation should be performed.
+        table_name (str): Schema-qualified name of the table that you are 
+            comparing across cluster nodes.
+        dbname (str, optional): Name of the database. Defaults to the name of 
+            the first database in the cluster configuration.
+        block_rows (int, optional): Number of rows to process per block. 
+            Defaults to config.BLOCK_ROWS_DEFAULT. 
+        max_cpu_ratio (float, optional): Maximum CPU utilisation. The accepted 
+            range is 0.0-1.0. Defaults to config.MAX_CPU_RATIO_DEFAULT.
+        output (str, optional): Output format. Acceptable values are "json" or 
+            "csv". Defaults to "json".
+        nodes (str, optional): Comma-delimited subset of nodes on which the 
+            command will be executed. Defaults to "all".
+        batch_size (int, optional): Size of each batch. Defaults to 
             config.BATCH_SIZE_DEFAULT.
-        quiet (bool, optional): Whether to suppress output. Defaults to False.
+        table_filter (str, optional): A SQL WHERE clause that allows you to 
+            filter rows for comparison.
+        quiet (bool, optional): Whether to suppress output in stdout. Defaults 
+            to False.
 
     Raises:
         AceException: If there's an error specific to the ACE operation.
         Exception: For any unexpected errors during the table diff operation.
 
     Returns:
-        None. The function performs the table diff operation and handles any
-        exceptions. All output messages are printed to stdout since it's a CLI
+        None. The function performs the table diff operation and handles any 
+        exceptions. All output messages are printed to stdout since it's a CLI 
         function.
     """
     task_id = ace_db.generate_task_id()
@@ -103,34 +111,50 @@ def table_repair_cli(
     bidirectional=False,
 ):
     """
-    Performs a table repair operation on a specified cluster and table.
+    Repair a table across a cluster by fixing data inconsistencies identified 
+    in a table-diff operation.
 
     Args:
-        cluster_name (str): Name of the cluster to perform the repair on.
+        cluster_name (str): Name of the cluster where the operation should be performed.
         diff_file (str): Path to the diff file generated by a previous table diff.
-        source_of_truth (str): Node to be used as the source of truth for the repair.
-        table_name (str): Name of the table to repair.
-        dbname (str, optional): Name of the database. Defaults to None.
-        dry_run (bool, optional): If True, simulates the repair without changes.
-            Defaults to False.
-        quiet (bool, optional): Whether to suppress output. Defaults to False.
-        generate_report (bool, optional): If True, generates a detailed report of
-            the repair. Defaults to False.
-        upsert_only (bool, optional): If True, only performs upsert operations,
+        source_of_truth (str): Node name to be used as the source of truth for 
+            the repair.
+        table_name (str): Schema-qualified name of the table that you are 
+            comparing across cluster nodes.
+        dbname (str, optional): Name of the database. Defaults to the name of 
+            the first database in the cluster configuration.
+        dry_run (bool, optional): If True, simulates the repair without making 
+            changes. Defaults to False.
+        generate_report (bool, optional): If True, generates a detailed report 
+            of the repair. Defaults to False.
+        upsert_only (bool, optional): If True, only performs upsert operations, 
             skipping deletions. Defaults to False.
-        fix_nulls (bool, optional): If True, fixes null values in the table columns
-            by looking at the corresponding column in the other nodes. Does not need
-            the source of truth to be specified. Must be used only in special cases.
-            This is not a recommended option for repairing divergence. Defaults to
-            False.
+        insert_only (bool, optional): If True, only performs insert operations, 
+            skipping updates and deletions.
+        fix_nulls (bool, optional): If True, fixes null values in the table 
+            columns by looking at the corresponding column in the other nodes. 
+            Does not need the source of truth to be specified. Must be used 
+            only in special cases. This is not a recommended option for 
+            repairing divergence. Defaults to False.
+        fire_triggers (bool, optional): If True, instructs triggers to fire 
+            when a repair is performed; note that ENABLE ALWAYS triggers will 
+            fire regardless of the value.
+        bidirectional (bool, optional): If True, performs a bidirectional 
+            repair, applies differences found between nodes to create a 
+            distinct union of the content. In a distinct union, each row that 
+            is missing is recreated on the node from which it is missing, 
+            eventually leading to a data set (on all nodes) in which all rows 
+            are represented exactly once.
+        quiet (bool, optional): Whether to suppress output in stdout. Defaults 
+            to False.
 
     Raises:
         AceException: If there's an error specific to the ACE operation.
         Exception: For any unexpected errors during the table repair operation.
 
     Returns:
-        None. The function performs the table repair operation and handles any
-        exceptions. All output messages are printed to stdout since it's a CLI
+        None. The function performs the table repair operation and handles any 
+        exceptions. All output messages are printed to stdout since it's a CLI 
         function.
     """
     task_id = ace_db.generate_task_id()
@@ -187,21 +211,29 @@ def table_rerun_cli(
     Reruns a table diff operation based on a previous diff file.
 
     Args:
-        cluster_name (str): Name of the cluster.
-        diff_file (str): Path to the diff file from a previous table diff operation.
-        table_name (str): Name of the table to rerun the diff on.
-        dbname (str, optional): Name of the database. Defaults to None.
-        quiet (bool, optional): Whether to suppress output. Defaults to False.
-        behavior (str, optional): The rerun behavior, either "multiprocessing" or
-            "hostdb". Defaults to "multiprocessing".
+        cluster_name (str): Name of the cluster where the operation should be performed.
+        diff_file (str): Path to the diff file from a previous table diff 
+            operation.
+        table_name (str): Schema-qualified name of the table that you are 
+            comparing across cluster nodes.
+        dbname (str, optional): Name of the database. Defaults to the name of 
+            the first database in the cluster configuration.
+        behavior (str, optional): The rerun behavior, either "multiprocessing" 
+            or "hostdb". "multiprocessing" uses parallel processing for faster 
+            execution. "hostdb" uses the host database to create temporary 
+            tables for faster comparisons. Defaults to "multiprocessing".
+        table_filter (str, optional): A SQL WHERE clause that allows you to 
+            filter rows for comparison.
+        quiet (bool, optional): Whether to suppress output in stdout. Defaults 
+            to False.
 
     Raises:
         AceException: If there's an error specific to the ACE operation.
         Exception: For any unexpected errors during the table rerun operation.
 
     Returns:
-        None. The function performs the table rerun operation and handles any
-        exceptions. All output messages are printed to stdout since it's a CLI
+        None. The function performs the table rerun operation and handles any 
+        exceptions. All output messages are printed to stdout since it's a CLI 
         function.
     """
     task_id = ace_db.generate_task_id()
@@ -264,22 +296,28 @@ def repset_diff_cli(
     skip_file=None,
 ):
     """
-    Performs a repset diff operation on a specified cluster and repset.
+    Compare a repset across a cluster and produce a report showing 
+    any differences.
 
     Args:
-        cluster_name (str): Name of the cluster.
-        repset_name (str): Name of the repset to diff.
-        dbname (str, optional): Name of the database. Defaults to None.
-        block_rows (int, optional): Number of rows per block. Defaults to
-            config.BLOCK_ROWS_DEFAULT.
-        max_cpu_ratio (float, optional): Maximum CPU usage ratio. Defaults to
-            config.MAX_CPU_RATIO_DEFAULT.
-        output (str, optional): Output format. Defaults to "json".
-        nodes (str, optional): Nodes to include in the diff. Defaults to "all".
-        batch_size (int, optional): Size of each batch. Defaults to
+        cluster_name (str): Name of the cluster where the operation should be performed.
+        repset_name (str): Name of the repset to compare across cluster nodes.
+        dbname (str, optional): Name of the database. Defaults to the name of 
+            the first database in the cluster configuration.
+        block_rows (int, optional): Number of rows to process per block. 
+            Defaults to config.BLOCK_ROWS_DEFAULT. 
+        max_cpu_ratio (float, optional): Maximum CPU utilisation. The accepted 
+            range is 0.0-1.0. Defaults to config.MAX_CPU_RATIO_DEFAULT.
+        output (str, optional): Output format. Acceptable values are "json" or 
+            "csv". Defaults to "json".
+        nodes (str, optional): Comma-delimited subset of nodes on which the 
+            command will be executed. Defaults to "all".
+        batch_size (int, optional): Size of each batch. Defaults to 
             config.BATCH_SIZE_DEFAULT.
-        quiet (bool, optional): Whether to suppress output. Defaults to False.
-        skip_tables (list, optional): List of tables to skip. Defaults to None.
+        quiet (bool, optional): Whether to suppress output in stdout. Defaults 
+            to False.
+        skip_tables (list, optional): Comma-deliminated list of tables to skip.
+        skip_file (str, optional): Path to a file containing a list of tables to skip.
 
     Raises:
         AceException: If there's an error specific to the ACE operation.
@@ -333,21 +371,27 @@ def spock_diff_cli(
     quiet=False,
 ):
     """
-    Performs a spock diff operation on a specified cluster.
+    Compare the spock metadata across a cluster and produce a report showing 
+    any differences.
 
     Args:
-        cluster_name (str): Name of the cluster.
-        dbname (str, optional): Name of the database. Defaults to None.
-        nodes (str, optional): Nodes to include in the diff. Defaults to "all".
-        quiet (bool, optional): Whether to suppress output. Defaults to False.
+        cluster_name (str): Name of the cluster where the operation should be 
+            performed.
+        dbname (str, optional): Name of the database. Defaults to the name of 
+            the first database in the cluster configuration.
+        nodes (str, optional): Comma-delimited subset of nodes on which the 
+            command will be executed. Defaults to "all".
+        quiet (bool, optional): Whether to suppress output in stdout. Defaults 
+            to False.
 
     Raises:
         AceException: If there's an error specific to the ACE operation.
         Exception: For any unexpected errors during the spock diff operation.
 
     Returns:
-        None. The function performs the spock diff operation and handles any exceptions.
-        All output messages are printed to stdout since it's a CLI function.
+        None. The function performs the spock diff operation and handles any 
+        exceptions. All output messages are printed to stdout since it's a CLI 
+        function.
     """
     task_id = ace_db.generate_task_id()
 
@@ -386,22 +430,34 @@ def schema_diff_cli(
     quiet=False,
 ):
     """
-    Performs a schema diff operation on a specified cluster and schema.
+    Compare a schema across a cluster and produce a report showing 
+    any differences.
 
     Args:
-        cluster_name (str): Name of the cluster.
-        schema_name (str): Name of the schema to diff.
-        nodes (str, optional): Nodes to include in the diff. Defaults to "all".
-        dbname (str, optional): Name of the database. Defaults to None.
-        quiet (bool, optional): Whether to suppress output. Defaults to False.
+        cluster_name (str): Name of the cluster where the operation should 
+            be performed.
+        schema_name (str): Name of the schema that you are comparing across 
+            cluster nodes.
+        nodes (str, optional): Comma-delimited subset of nodes on which the 
+            command will be executed. Defaults to "all".
+        dbname (str, optional): Name of the database. Defaults to the name of 
+            the first database in the cluster configuration.
+        ddl_only (bool, optional): If True, only compares DDL differences 
+            across nodes.
+        skip_tables (list, optional): Comma-delimited list of tables to skip.
+        skip_file (str, optional): Path to a file containing a list of tables 
+            to skip.
+        quiet (bool, optional): Whether to suppress output in stdout. Defaults 
+            to False.
 
     Raises:
         AceException: If there's an error specific to the ACE operation.
         Exception: For any unexpected errors during the schema diff operation.
 
     Returns:
-        None. The function performs the schema diff operation and handles any exceptions.
-        All output messages are printed to stdout since it's a CLI function.
+        None. The function performs the schema diff operation and handles any 
+        exceptions. All output messages are printed to stdout since it's a CLI 
+        function.
     """
     task_id = ace_db.generate_task_id()
 
@@ -441,14 +497,18 @@ def update_spock_exception_cli(cluster_name, node_name, entry, dbname=None) -> N
     Updates the Spock exception status for a specified cluster and node.
 
     Args:
-        cluster_name (str): The name of the cluster where the exception is being updated.
-        node_name (str): The name of the node within the cluster.
+        cluster_name (str): Name of the cluster where the operation should 
+            be performed.
+        node_name (str): The name of the node within the cluster where the 
+            update should be performed.
         entry (str): The exception entry in JSON format.
-        dbname (str, optional): The name of the database. Defaults to the first database if not set
+        dbname (str, optional): Name of the database. Defaults to the name of 
+            the first database in the cluster configuration.
 
     Raises:
         AceException: If an error specific to the ACE system occurs.
-        json.JSONDecodeError: If the provided exception entry is not valid JSON.
+        json.JSONDecodeError: If the provided exception entry is not valid 
+            JSON.
         Exception: For any other unexpected errors.
 
     Returns:
@@ -467,3 +527,15 @@ def update_spock_exception_cli(cluster_name, node_name, entry, dbname=None) -> N
         util.exit_message(f"Unexpected error while running exception status: {e}")
 
     util.message("Spock exception status updated successfully", p_state="success")
+
+def auto_repair_cli() -> None:
+    """
+    Invoke the automatic repair process.
+    """
+    ace_core.auto_repair()
+
+def start_cli() -> None:
+    """
+    Start the ACE background scheduler and API
+    """
+    ace_daemon.start_ace()
diff --git a/cli/scripts/um.py b/cli/scripts/um.py
@@ -142,7 +142,7 @@ def remove(component):
 
 
 def upgrade(component):
-    """Perform an upgrade  to a newer version of a component"""
+    """Perform an upgrade to a newer version of a component"""
 
     run_cmd("upgrade", component)
 

diff --git a/devel/setup/compose/docker-compose.yaml b/devel/setup/compose/docker-compose.yaml
@@ -37,6 +37,7 @@ services:
       - mynetwork
     volumes:
       - ./repo:/home/build/dev/out
+      - ${GITHUB_REPO_ROOT:-../../../}:/home/build/dev/cli
     stdin_open: true
     tty: true