From 88f70a79844adf651ebfa195b0494770443ad052 Mon Sep 17 00:00:00 2001 From: PawelPlesniak Date: Tue, 10 Feb 2026 14:45:45 +0100 Subject: [PATCH 01/29] First step of integration tests --- integtest/process_manager_test.py | 243 ++++++++++++++++++++++++ scripts/drunc_integtest_bundle.sh | 299 ++++++++++++++++++++++++++++++ 2 files changed, 542 insertions(+) create mode 100644 integtest/process_manager_test.py create mode 100644 scripts/drunc_integtest_bundle.sh diff --git a/integtest/process_manager_test.py b/integtest/process_manager_test.py new file mode 100644 index 000000000..ef81caa99 --- /dev/null +++ b/integtest/process_manager_test.py @@ -0,0 +1,243 @@ +import os +import re + +import integrationtest.data_classes as data_classes +import integrationtest.data_file_checks as data_file_checks +import integrationtest.log_file_checks as log_file_checks +import integrationtest.opmon_metric_checks as opmon_metric_checks + +pytest_plugins = "integrationtest.integrationtest_drunc" + +# Values that help determine the running conditions +number_of_data_producers = 2 +data_rate_slowdown_factor = 1 # 10 for ProtoWIB/DuneWIB +run_duration = 10 # seconds +readout_window_time_before = 1000 +readout_window_time_after = 1001 + +# Default values for validation parameters +expected_number_of_data_files = 1 +check_for_logfile_errors = True +expected_event_count = run_duration +expected_event_count_tolerance = 2 +wibeth_frag_params = { + "fragment_type_description": "WIBEth", + "fragment_type": "WIBEth", + "expected_fragment_count": number_of_data_producers, + "min_size_bytes": 7272, + "max_size_bytes": 14472, +} +triggercandidate_frag_params = { + "fragment_type_description": "Trigger Candidate", + "fragment_type": "Trigger_Candidate", + "expected_fragment_count": 1, + "min_size_bytes": 128, + "max_size_bytes": 216, +} +hsi_frag_params = { + "fragment_type_description": "HSI", + "fragment_type": "Hardware_Signal", + "expected_fragment_count": 0, + "min_size_bytes": 72, + "max_size_bytes": 100, +} +ignored_logfile_problems = { + "-controller": [ + "Worker with pid \\d+ was terminated due to signal", + "Connection '.*' not found on the application registry", + ], + "connectivity-service": [ + "errorlog: -", + ], +} + +# The next three variable declarations *must* be present as globals in the test +# file. They're read by the "fixtures" in conftest.py to determine how +# to run the config generation and nanorc + +# The arguments to pass to the config generator, excluding the json +# output directory (the test framework handles that) + +# CCM includes FSM, hosts; moduleconfs includes connections +object_databases = ["config/daqsystemtest/integrationtest-objects.data.xml"] + +conf_dict = data_classes.drunc_config() +conf_dict.dro_map_config.n_streams = number_of_data_producers +conf_dict.op_env = "integtest" +conf_dict.session = "minimal" +conf_dict.tpg_enabled = False + +# For testing, allow drunc to manage ConnectivityService (default is False, integrationtest manages Connectivity Service) +# conf_dict.drunc_connsvc = True +# For testing, specify connectivity service port (default is 0, a random port is chosen for the Connectivity Service) +# conf_dict.connsvc_port = 12345 + +substitution = data_classes.attribute_substitution( + obj_id="random-tc-generator", + obj_class="RandomTCMakerConf", + updates={"trigger_rate_hz": 1}, +) +conf_dict.config_substitutions.append( + data_classes.attribute_substitution( + obj_class="TCReadoutMap", + obj_id="def-random-readout", + updates={ + "time_before": readout_window_time_before, + "time_after": readout_window_time_after, + }, + ) +) +conf_dict.config_substitutions.append(substitution) + + +confgen_arguments = {"MinimalSystem": conf_dict} +# The commands to run in nanorc, as a list +nanorc_command_list = "boot restart -n root-controller restart -n mlt logs -n root-controller logs -n mlt ps flush terminate boot terminate boot".split() + +# The tests themselves + + +def test_nanorc_success(run_nanorc): + # print the name of the current test + current_test = os.environ.get("PYTEST_CURRENT_TEST") + match_obj = re.search(r".*\[(.+)-run_.*rc.*\d].*", current_test) + if match_obj: + current_test = match_obj.group(1) + banner_line = re.sub(".", "=", current_test) + print(banner_line) + print(current_test) + print(banner_line) + + # Check that nanorc completed correctly + assert run_nanorc.completed_process.returncode == 0 + + +def test_log_files(run_nanorc): + # Check that at least some of the expected log files are present + assert any( + f"{run_nanorc.session}_df-01" in str(logname) + for logname in run_nanorc.log_files + ) + assert any( + f"{run_nanorc.session}_dfo" in str(logname) for logname in run_nanorc.log_files + ) + assert any( + f"{run_nanorc.session}_mlt" in str(logname) for logname in run_nanorc.log_files + ) + assert any( + f"{run_nanorc.session}_ru" in str(logname) for logname in run_nanorc.log_files + ) + + if check_for_logfile_errors: + # Check that there are no warnings or errors in the log files + assert log_file_checks.logs_are_error_free( + run_nanorc.log_files, True, True, ignored_logfile_problems + ) + + +def test_data_files(run_nanorc): + # Run some tests on the output data file + all_ok = len(run_nanorc.data_files) == expected_number_of_data_files + print("") # Clear potential dot from pytest + if all_ok: + print( + f"\N{WHITE HEAVY CHECK MARK} The correct number of raw data files was found ({expected_number_of_data_files})" + ) + else: + print( + f"\N{POLICE CARS REVOLVING LIGHT} An incorrect number of raw data files was found, expected {expected_number_of_data_files}, found {len(run_nanorc.data_files)} \N{POLICE CARS REVOLVING LIGHT}" + ) + + fragment_check_list = [triggercandidate_frag_params, hsi_frag_params] + fragment_check_list.append(wibeth_frag_params) + nontrig_fragment_check_list = [hsi_frag_params, wibeth_frag_params] + + for idx in range(len(run_nanorc.data_files)): + data_file = data_file_checks.DataFile(run_nanorc.data_files[idx]) + all_ok &= data_file_checks.sanity_check(data_file) + all_ok &= data_file_checks.check_file_attributes(data_file) + all_ok &= data_file_checks.check_event_count( + data_file, expected_event_count, expected_event_count_tolerance + ) + for jdx in range(len(fragment_check_list)): + all_ok &= data_file_checks.check_fragment_count( + data_file, fragment_check_list[jdx] + ) + all_ok &= data_file_checks.check_fragment_sizes( + data_file, fragment_check_list[jdx] + ) + for kdx in range(len(nontrig_fragment_check_list)): + all_ok &= data_file_checks.check_fragment_error_flags( + data_file, nontrig_fragment_check_list[kdx] + ) + + assert all_ok + + +# 26-Nov-2025, KAB: added some sample opmon metric checks, for demonstration purposes +def test_metric_files(run_nanorc): + print("") # Clear potential dot from pytest + + # 10-Dec-2025, KAB: we have noticed that sometimes drunc transitions (or other parts of + # a run control session) take a little longer than expected. This can cause extra metric + # samples to be created. This section of code takes that into account by increasing + # the max allowed sample count by the amount of extra time taken, divided by 10 + # (metric samples are produced every 10 seconds, by default). + # I've tried to make this code backward compatible by handling cases in which the + # daq_session_overall_time is not available (e.g. the try/catch). + # + # The expected DAQ session time is the sum of the time spent in the "running" state + # (specified in the run control commands above [run_duration]) plus the "wait" times in + # the RC commands plus the time spent in RC transitions. With a run duration of 20 sec, + # the session time has been measured to be ~40 seconds, so we take the extra 20 seconds + # into account. + expected_daq_session_time = run_duration + 20 + # + # To calculate the expected number of metric samples, we subtract a small-ish amount of + # time that the DAQ session spends in state(s) that don't produce metrics (say 3 seconds) + # and divide by 10, where 10 seconds is the interval between each reporting of metrics. + expected_metric_sample_count = int((expected_daq_session_time - 3) / 10) + # + # We'll set the maximum allowed sample count slightly higher than the expected value. + max_metric_sample_count = expected_metric_sample_count + 2 + try: + # print(f"\nDAQ session overall time: {run_nanorc.daq_session_overall_time} seconds") + if run_nanorc.daq_session_overall_time is not None: + extra_time_taken = ( + run_nanorc.daq_session_overall_time - expected_daq_session_time + ) + if extra_time_taken > 10: + extra_sample_count_allowance = int(extra_time_taken / 10) + max_metric_sample_count += extra_sample_count_allowance + except AttributeError: + pass + + session_name = ( + run_nanorc.session_name if run_nanorc.session_name else run_nanorc.session + ) + metric_data = opmon_metric_checks.collate_opmon_data_from_files( + run_nanorc.opmon_files + ) + + metric_key_list = [ + session_name, + "df-01", + "df-01-trb", + "dfmodules.TRBInfo", + "generated_trigger_records", + ] + all_ok = True + # a 20-second run will likely result in 3 metric samples (at 10-second intervals), so a range + # of 1..5 should always succeed + all_ok &= opmon_metric_checks.check_metric_sample_count( + metric_data, metric_key_list, min_count=1, max_count=max_metric_sample_count + ) + # the number of triggers expected in this test is based on the run duration, so we check for + # a reported number of generated trigger records between slightly above/below that + all_ok &= opmon_metric_checks.check_metric_value_sum( + metric_data, + metric_key_list, + min_value_sum=run_duration - 3, + max_value_sum=run_duration + 3, + ) + assert all_ok diff --git a/scripts/drunc_integtest_bundle.sh b/scripts/drunc_integtest_bundle.sh new file mode 100644 index 000000000..785b07217 --- /dev/null +++ b/scripts/drunc_integtest_bundle.sh @@ -0,0 +1,299 @@ +#!/bin/bash + +# Defines a driver script for the drunc integration tests. +# The purpose of these scripts is to run a set of integration test with all of the features of drunc tested, so any introduced changes do not affect functionality of the existing infrastructure. +# Based entirely of the implementation of daqsystemtest_integtest_bundle.sh +# Original author: KAB, 10-Oct-2023 + +integtest_list=( "process_manager_test.py" ) +let last_test_index=${#integtest_list[@]}-1 + +usage() { + declare -r script_name=$(basename "$0") + echo """ +Usage: +"${script_name}" [option(s)] + +Options: + -h, --help : prints out usage information + -f + -l + -k + -n + -N + --stop-on-failure : causes the script to stop when one of the integtests reports a failure + --concise-output : suppresses run control and DAQApp messages in order to focus on test results + --tmpdir : specifies a root directory to use for test output, e.g. a directory instead of '/tmp' +""" + let counter=0 + echo "List of available tests:" + for tst in ${integtest_list[@]}; do + echo " ${counter}: $tst" + let counter=${counter}+1 + done + echo "" +} + +# 29-Dec-2025, KAB: Determine if a non-standard pytest tmpdir has been specified +# in the linux shell environment in which this script is being run. We need to know +# this value in order to direct functionality in this script to the right place. +# A user-specified command-line value for the tmpdir over-rides the value determined here. +tmpdir_root=`dst_get_pytest_tmpdir` + +# Removes the ANSI characters associated with formatting, including color coding and font styling +CaptureOutputNoANSI() { + tee -a >(sed -u 's/\x1b\[[0-9;]*m//g' >> "$1") +} +# Captures the output to the specified file, without changing the output +CaptureOutput() { + tee -a $1 +} + +GETOPT_TEMP=`getopt -o hs:f:l:k:n:N: --long help,stop-on-failure,concise-output,tmpdir: -- "$@"` +eval set -- "$GETOPT_TEMP" + +let first_test_index=0 +let individual_test_requested_iterations=1 +let full_set_requested_interations=1 +let stop_on_failure=0 +requested_test_names= +PYTEST_COMMAND="pytest -s --tb=short" # our core pytest command, with DAQ printout included and short pytest traceback + +while true; do + case "$1" in + -h|--help) + usage + exit 0 + ;; + -f) + let first_test_index=$2 + shift 2 + ;; + -l) + let last_test_index=$2 + shift 2 + ;; + -k) + requested_test_names=$2 + shift 2 + ;; + -n) + let individual_test_requested_iterations=$2 + shift 2 + ;; + -N) + let full_set_requested_interations=$2 + shift 2 + ;; + --stop-on-failure) + let stop_on_failure=1 + PYTEST_COMMAND="${PYTEST_COMMAND} -x" # add the -x option to our pytest command to have it exit on first error + shift + ;; + --concise-output) + PYTEST_COMMAND="`echo ${PYTEST_COMMAND} | sed 's/ -s//'`" # remove the -s option to turn off messages from DAQ processes + shift + ;; + --tmpdir) + tmpdir_root=$2 + export PYTEST_DEBUG_TEMPROOT=${tmpdir_root} + shift 2 + ;; + --) + shift + break + ;; + esac +done + +# check if the numad daemon is running +numad_grep_output=`ps -ef | grep numad | grep -v grep` +if [[ "${numad_grep_output}" != "" ]]; then + echo "*********************************************************************" + echo "*** DANGER, DANGER, 'numad' appears to be running on this computer!" + echo "*** 'ps' output: ${numad_grep_output}" + echo "*** now if you want to abort this testing." + echo "*********************************************************************" + sleep 3 +fi + +# other setup +INITIAL_TIMESTAMP=`date '+%Y%m%d%H%M%S'` +# 30-Dec-2025, KAB: check that the specified tmpdir exists and is writeable +if [[ ! -d ${tmpdir_root} ]]; then + echo "*** ERROR: directory \"${tmpdir_root}\" does not exist." + exit 1 +fi +if [[ ! -w ${tmpdir_root} ]]; then + echo "*** ERROR: directory \"${tmpdir_root}\" is not writeable in the current environment." + exit 1 +fi +pytest_user_dir=${tmpdir_root}/pytest-of-${USER} +mkdir -p ${pytest_user_dir} +ITGRUNNER_LOG_FILE="${pytest_user_dir}/drunc_integtest_bundle_${INITIAL_TIMESTAMP}.log" +CURRENT_PID=$$ + +let number_of_individual_tests=0 +let test_index=0 +for TEST_NAME in "${integtest_list[@]}"; do + if [[ ${test_index} -ge ${first_test_index} && ${test_index} -le ${last_test_index} ]]; then + requested_test=`echo ${TEST_NAME} | egrep -i ${requested_test_names:-${TEST_NAME}}` + if [[ "${requested_test}" != "" ]]; then + let number_of_individual_tests=${number_of_individual_tests}+1 + fi + fi + let test_index=${test_index}+1 +done +let total_number_of_tests=${number_of_individual_tests}*${individual_test_requested_iterations}*${full_set_requested_interations} + +# run the tests +let overall_test_index=0 # this is only used for user feedback +let full_set_loop_count=0 +while [[ ${full_set_loop_count} -lt ${full_set_requested_interations} ]]; do + let test_index=0 + for TEST_NAME in "${integtest_list[@]}"; do + if [[ ${test_index} -ge ${first_test_index} && ${test_index} -le ${last_test_index} ]]; then + CURRENT_TIMESTAMP=`date '+%Y%m%d%H%M%S'` + # 15-Dec-2025, KAB: added the export of the following enviromental variable. This is used + # by the integrationtest infrastructure to put a bread-crumb file in the directory where + # the test results are located. That file, in turn, allows this script to find the directory + # for the current test, and make a copy of it if the test fails. + export DUNEDAQ_INTEGTEST_BUNDLE_INFO="${INITIAL_TIMESTAMP};${CURRENT_PID};${CURRENT_TIMESTAMP}" + requested_test=`echo ${TEST_NAME} | egrep -i ${requested_test_names:-${TEST_NAME}}` + if [[ "${requested_test}" != "" ]]; then + let individual_loop_count=0 + while [[ ${individual_loop_count} -lt ${individual_test_requested_iterations} ]]; do + let overall_test_index=${overall_test_index}+1 + echo "" + echo -e "\U0001F535 \033[0;34mStarting test ${overall_test_index} of ${total_number_of_tests}...\033[0m \U0001F535" | CaptureOutput ${ITGRUNNER_LOG_FILE} + + echo -e "\u2B95 \033[0;1mRunning ${TEST_NAME}\033[0m \u2B05" | CaptureOutput ${ITGRUNNER_LOG_FILE} + if [[ -e "./${TEST_NAME}" ]]; then + ${PYTEST_COMMAND} ./${TEST_NAME} | CaptureOutputNoANSI ${ITGRUNNER_LOG_FILE} + elif [[ -e "${DBT_AREA_ROOT}/pythoncode/drunc/integtest/${TEST_NAME}" ]]; then + if [[ -w "${DBT_AREA_ROOT}" ]]; then + ${PYTEST_COMMAND} ${DBT_AREA_ROOT}/pythoncode/drunc/integtest/${TEST_NAME} | CaptureOutputNoANSI ${ITGRUNNER_LOG_FILE} + else + ${PYTEST_COMMAND} -p no:cacheprovider ${DBT_AREA_ROOT}/pythoncode/drunc/integtest/${TEST_NAME} | CaptureOutputNoANSI ${ITGRUNNER_LOG_FILE} + fi + else + ${PYTEST_COMMAND} -p no:cacheprovider ${DAQSYSTEMTEST_SHARE}/integtest/${TEST_NAME} | CaptureOutputNoANSI ${ITGRUNNER_LOG_FILE} + fi + let pytest_return_code=${PIPESTATUS[0]} + + let individual_loop_count=${individual_loop_count}+1 + + # check if the test failed + if [[ ${pytest_return_code} -ne 0 ]]; then + # 15-Dec-2025, KAB: make a copy of the pytest directory. This allows + # testers to take a look at the results within a reasonable time frame. + # (If we can't find the "jq" JSON utility, we simply note that fact + # and continue.) + # This code makes use of a bread-crumb file that is created by the + # integrationtest infrastructure. + if [[ "`which jq 2>/dev/null`" != "" ]]; then + current_pytest_rundir="" + mapfile -t bundle_info_files < <(find "${pytest_user_dir}" -type f -name "bundle_script_info.json" -printf '%T@ %p\n' | grep -v 'failed-' | sort -nr | awk '{print $2}') + for info_file in "${bundle_info_files[@]}"; do + script_start_time=`jq -r .bundle_script_start_time ${info_file}` + script_pid=`jq -r .bundle_script_process_id ${info_file}` + individual_test_start_time=`jq -r .individual_test_start_time ${info_file}` + if [[ ${script_start_time} -eq ${INITIAL_TIMESTAMP} ]] && \ + [[ ${script_pid} -eq ${CURRENT_PID} ]] && \ + [[ ${individual_test_start_time} -eq ${CURRENT_TIMESTAMP} ]]; then + current_pytest_rundir=$info_file + break + fi + done + + was_successfully_copied="" + if [[ "${current_pytest_rundir}" != "" ]]; then + pytest_tmpdir=`echo ${current_pytest_rundir} | xargs -r dirname | xargs -r dirname` + if [[ "${pytest_tmpdir}" != "" ]]; then + pytest_rootdir=`echo ${pytest_tmpdir} | xargs -r dirname` + pytest_basedir=`echo ${pytest_tmpdir} | xargs -r basename` + if [[ "${pytest_rootdir}" != "" ]] && [[ "${pytest_basedir}" != "" ]]; then + new_dir="${pytest_rootdir}/failed-${pytest_basedir}" + echo "" + echo -e "\U1F535 Copying the files from failed test ${pytest_tmpdir} to ${new_dir}. \U1F535" + cp -pR "${pytest_tmpdir}" "${new_dir}" + if [[ $? == 0 ]]; then + was_successfully_copied="yes" + # 18-Dec-2025, KAB: added the removal of the "current" symbolic links + # from inside the copied directory (since they get broken in the copying) + rm -f "${new_dir}/configcurrent" + rm -f "${new_dir}/runcurrent" + fi + fi + fi + fi + if [[ "${was_successfully_copied}" == "" ]]; then + echo "" + echo -e "\U1f7e1 WARNING: Unable to copy the pytest directory for this failed test (${current_pytest_rundir}). \U1f7e1" + fi + else + echo "" + echo -e "\U1f7e1 WARNING: Unable to find the 'jq' utility which is needed to help identify which pytest directory to copy for this failed test. \U1f7e1" + fi + + # remove stale and surplus directories from failed tests + test_dirs_to_remove=() + mapfile -t all_failed_test_dirs < <(find ${pytest_user_dir} -maxdepth 1 -type d -printf '%T@ %p\n' | sort -nr | awk '{print $2}' | grep 'failed-') + surplus_dirs=("${all_failed_test_dirs[@]:10}") + for test_dir in "${surplus_dirs[@]}"; do + test_dirs_to_remove+=(${test_dir}) + done + stale_failed_test_dirs=(`find ${pytest_user_dir} -maxdepth 1 -type d -name 'failed-*' -cmin +1560 -print`) + for test_dir in "${stale_failed_test_dirs[@]}"; do + test_dirs_to_remove+=(${test_dir}) + done + if [[ ${#test_dirs_to_remove[@]} -gt 0 ]];then + echo -e "\U1F535 Removing ${#test_dirs_to_remove[@]} old failed test directory(ies). \U1F535" + for test_dir in "${test_dirs_to_remove[@]}"; do + if [[ -e "${test_dir}" ]]; then + rm -rf "${test_dir}" + fi + done + fi + + # exit out of this script if the user has requested that we stop on a failure + if [[ ${stop_on_failure} -gt 0 ]]; then + break 3 + fi + fi + done + fi + fi + let test_index=${test_index}+1 + done + + let full_set_loop_count=${full_set_loop_count}+1 +done + +# print out summary information +echo "" | CaptureOutput ${ITGRUNNER_LOG_FILE} +echo "" | CaptureOutput ${ITGRUNNER_LOG_FILE} +echo "+++++++++++++++++++++++++++++++++++++++++++++++++" | CaptureOutput ${ITGRUNNER_LOG_FILE} +echo "++++++++++++++++++++ SUMMARY ++++++++++++++++++++" | CaptureOutput ${ITGRUNNER_LOG_FILE} +echo "+++++++++++++++++++++++++++++++++++++++++++++++++" | CaptureOutput ${ITGRUNNER_LOG_FILE} +echo "" | CaptureOutput ${ITGRUNNER_LOG_FILE} +date | CaptureOutput ${ITGRUNNER_LOG_FILE} +echo "Log file is: ${ITGRUNNER_LOG_FILE}" | CaptureOutput ${ITGRUNNER_LOG_FILE} +echo "" | CaptureOutput ${ITGRUNNER_LOG_FILE} +summary_string="`egrep $'=====|\u2B95' ${ITGRUNNER_LOG_FILE} | egrep ' in |Running'`" +colorized_summary_string="`echo \"${summary_string}\" | sed 's/passed/passed \\\\U2705/' | sed 's/failed/failed \\\\U274c/' | sed 's/skipped/skipped \\\\U1f7e1/'`" +echo -e "${colorized_summary_string}" | CaptureOutput ${ITGRUNNER_LOG_FILE} + +# check again if the numad daemon is running +numad_grep_output=`ps -ef | grep numad | grep -v grep` +if [[ "${numad_grep_output}" != "" ]]; then + echo "" | CaptureOutput ${ITGRUNNER_LOG_FILE} + echo "********************************************************************************" | CaptureOutput ${ITGRUNNER_LOG_FILE} + echo "*** WARNING: 'numad' appears to be running on this computer!" | CaptureOutput ${ITGRUNNER_LOG_FILE} + echo "*** 'ps' output: ${numad_grep_output}" | CaptureOutput ${ITGRUNNER_LOG_FILE} + echo "*** This daemon can adversely affect the running of these tests, especially ones" | CaptureOutput ${ITGRUNNER_LOG_FILE} + echo "*** that are resource intensive in the Readout Apps. This is because numad moves" | CaptureOutput ${ITGRUNNER_LOG_FILE} + echo "*** processes (threads?) to different cores/numa nodes periodically, and that" | CaptureOutput ${ITGRUNNER_LOG_FILE} + echo "*** context switch can disrupt the stable running of the DAQ processes." | CaptureOutput ${ITGRUNNER_LOG_FILE} + echo "********************************************************************************" | CaptureOutput ${ITGRUNNER_LOG_FILE} +fi \ No newline at end of file From 9d43725d927c3167f90c4e737ec5756906c1bf88 Mon Sep 17 00:00:00 2001 From: PawelPlesniak Date: Tue, 10 Feb 2026 15:21:19 +0100 Subject: [PATCH 02/29] Testing now testing --- scripts/drunc_integtest_bundle.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/drunc_integtest_bundle.sh b/scripts/drunc_integtest_bundle.sh index 785b07217..fe5d71f9f 100644 --- a/scripts/drunc_integtest_bundle.sh +++ b/scripts/drunc_integtest_bundle.sh @@ -57,7 +57,7 @@ let individual_test_requested_iterations=1 let full_set_requested_interations=1 let stop_on_failure=0 requested_test_names= -PYTEST_COMMAND="pytest -s --tb=short" # our core pytest command, with DAQ printout included and short pytest traceback +PYTEST_COMMAND="pytest -c -s --tb=short" # our core pytest command, with DAQ printout included and short pytest traceback while true; do case "$1" in From 863c86e5780544c6590a88983091af601533b0f7 Mon Sep 17 00:00:00 2001 From: PawelPlesniak Date: Wed, 11 Feb 2026 12:45:34 +0100 Subject: [PATCH 03/29] WIP --- integtest/process_manager_test.py | 91 +++++++++---------------------- scripts/drunc_integtest_bundle.sh | 2 +- 2 files changed, 27 insertions(+), 66 deletions(-) diff --git a/integtest/process_manager_test.py b/integtest/process_manager_test.py index ef81caa99..7edd8bd0b 100644 --- a/integtest/process_manager_test.py +++ b/integtest/process_manager_test.py @@ -1,8 +1,8 @@ +import getpass import os import re import integrationtest.data_classes as data_classes -import integrationtest.data_file_checks as data_file_checks import integrationtest.log_file_checks as log_file_checks import integrationtest.opmon_metric_checks as opmon_metric_checks @@ -16,31 +16,31 @@ readout_window_time_after = 1001 # Default values for validation parameters -expected_number_of_data_files = 1 +# expected_number_of_data_files = 1 check_for_logfile_errors = True -expected_event_count = run_duration -expected_event_count_tolerance = 2 -wibeth_frag_params = { - "fragment_type_description": "WIBEth", - "fragment_type": "WIBEth", - "expected_fragment_count": number_of_data_producers, - "min_size_bytes": 7272, - "max_size_bytes": 14472, -} -triggercandidate_frag_params = { - "fragment_type_description": "Trigger Candidate", - "fragment_type": "Trigger_Candidate", - "expected_fragment_count": 1, - "min_size_bytes": 128, - "max_size_bytes": 216, -} -hsi_frag_params = { - "fragment_type_description": "HSI", - "fragment_type": "Hardware_Signal", - "expected_fragment_count": 0, - "min_size_bytes": 72, - "max_size_bytes": 100, -} +# expected_event_count = run_duration +# expected_event_count_tolerance = 2 +# wibeth_frag_params = { +# "fragment_type_description": "WIBEth", +# "fragment_type": "WIBEth", +# "expected_fragment_count": number_of_data_producers, +# "min_size_bytes": 7272, +# "max_size_bytes": 14472, +# } +# triggercandidate_frag_params = { +# "fragment_type_description": "Trigger Candidate", +# "fragment_type": "Trigger_Candidate", +# "expected_fragment_count": 1, +# "min_size_bytes": 128, +# "max_size_bytes": 216, +# } +# hsi_frag_params = { +# "fragment_type_description": "HSI", +# "fragment_type": "Hardware_Signal", +# "expected_fragment_count": 0, +# "min_size_bytes": 72, +# "max_size_bytes": 100, +# } ignored_logfile_problems = { "-controller": [ "Worker with pid \\d+ was terminated due to signal", @@ -92,7 +92,7 @@ confgen_arguments = {"MinimalSystem": conf_dict} # The commands to run in nanorc, as a list -nanorc_command_list = "boot restart -n root-controller restart -n mlt logs -n root-controller logs -n mlt ps flush terminate boot terminate boot".split() +nanorc_command_list = f"boot restart -n root-controller restart -n mlt logs -n root-controller logs -n mlt --how-far 20 --grep ABC ps -l ps -u {getpass.getuser()} flush terminate".split() # The tests themselves @@ -135,45 +135,6 @@ def test_log_files(run_nanorc): ) -def test_data_files(run_nanorc): - # Run some tests on the output data file - all_ok = len(run_nanorc.data_files) == expected_number_of_data_files - print("") # Clear potential dot from pytest - if all_ok: - print( - f"\N{WHITE HEAVY CHECK MARK} The correct number of raw data files was found ({expected_number_of_data_files})" - ) - else: - print( - f"\N{POLICE CARS REVOLVING LIGHT} An incorrect number of raw data files was found, expected {expected_number_of_data_files}, found {len(run_nanorc.data_files)} \N{POLICE CARS REVOLVING LIGHT}" - ) - - fragment_check_list = [triggercandidate_frag_params, hsi_frag_params] - fragment_check_list.append(wibeth_frag_params) - nontrig_fragment_check_list = [hsi_frag_params, wibeth_frag_params] - - for idx in range(len(run_nanorc.data_files)): - data_file = data_file_checks.DataFile(run_nanorc.data_files[idx]) - all_ok &= data_file_checks.sanity_check(data_file) - all_ok &= data_file_checks.check_file_attributes(data_file) - all_ok &= data_file_checks.check_event_count( - data_file, expected_event_count, expected_event_count_tolerance - ) - for jdx in range(len(fragment_check_list)): - all_ok &= data_file_checks.check_fragment_count( - data_file, fragment_check_list[jdx] - ) - all_ok &= data_file_checks.check_fragment_sizes( - data_file, fragment_check_list[jdx] - ) - for kdx in range(len(nontrig_fragment_check_list)): - all_ok &= data_file_checks.check_fragment_error_flags( - data_file, nontrig_fragment_check_list[kdx] - ) - - assert all_ok - - # 26-Nov-2025, KAB: added some sample opmon metric checks, for demonstration purposes def test_metric_files(run_nanorc): print("") # Clear potential dot from pytest diff --git a/scripts/drunc_integtest_bundle.sh b/scripts/drunc_integtest_bundle.sh index fe5d71f9f..7cea68c87 100644 --- a/scripts/drunc_integtest_bundle.sh +++ b/scripts/drunc_integtest_bundle.sh @@ -57,7 +57,7 @@ let individual_test_requested_iterations=1 let full_set_requested_interations=1 let stop_on_failure=0 requested_test_names= -PYTEST_COMMAND="pytest -c -s --tb=short" # our core pytest command, with DAQ printout included and short pytest traceback +PYTEST_COMMAND="pytest -c /dev/null -s --tb=short" # our core pytest command, with DAQ printout included and short pytest traceback while true; do case "$1" in From 47a20043e083f1a9b0dcce6702c561f6b09f8c46 Mon Sep 17 00:00:00 2001 From: Emir Muhammad Date: Thu, 12 Mar 2026 17:43:01 +0100 Subject: [PATCH 04/29] Update name, fix tests [TO SQUASH] Some more notes and minor cleanup; move to new nightly Update name, fix tests --- integtest/process_manager_test.py | 121 ++++-------------------------- scripts/drunc_integtest_bundle.sh | 0 2 files changed, 13 insertions(+), 108 deletions(-) mode change 100644 => 100755 scripts/drunc_integtest_bundle.sh diff --git a/integtest/process_manager_test.py b/integtest/process_manager_test.py index 7edd8bd0b..16d941139 100644 --- a/integtest/process_manager_test.py +++ b/integtest/process_manager_test.py @@ -4,7 +4,6 @@ import integrationtest.data_classes as data_classes import integrationtest.log_file_checks as log_file_checks -import integrationtest.opmon_metric_checks as opmon_metric_checks pytest_plugins = "integrationtest.integrationtest_drunc" @@ -15,32 +14,8 @@ readout_window_time_before = 1000 readout_window_time_after = 1001 -# Default values for validation parameters -# expected_number_of_data_files = 1 check_for_logfile_errors = True -# expected_event_count = run_duration -# expected_event_count_tolerance = 2 -# wibeth_frag_params = { -# "fragment_type_description": "WIBEth", -# "fragment_type": "WIBEth", -# "expected_fragment_count": number_of_data_producers, -# "min_size_bytes": 7272, -# "max_size_bytes": 14472, -# } -# triggercandidate_frag_params = { -# "fragment_type_description": "Trigger Candidate", -# "fragment_type": "Trigger_Candidate", -# "expected_fragment_count": 1, -# "min_size_bytes": 128, -# "max_size_bytes": 216, -# } -# hsi_frag_params = { -# "fragment_type_description": "HSI", -# "fragment_type": "Hardware_Signal", -# "expected_fragment_count": 0, -# "min_size_bytes": 72, -# "max_size_bytes": 100, -# } + ignored_logfile_problems = { "-controller": [ "Worker with pid \\d+ was terminated due to signal", @@ -92,12 +67,9 @@ confgen_arguments = {"MinimalSystem": conf_dict} # The commands to run in nanorc, as a list -nanorc_command_list = f"boot restart -n root-controller restart -n mlt logs -n root-controller logs -n mlt --how-far 20 --grep ABC ps -l ps -u {getpass.getuser()} flush terminate".split() - -# The tests themselves - +dunerc_command_list = f"boot restart -n root-controller restart -n mlt logs -n root-controller logs -n mlt --how-far 20 --grep ABC ps -l ps -u {getpass.getuser()} flush terminate".split() -def test_nanorc_success(run_nanorc): +def test_nanorc_success(run_dunerc): # print the name of the current test current_test = os.environ.get("PYTEST_CURRENT_TEST") match_obj = re.search(r".*\[(.+)-run_.*rc.*\d].*", current_test) @@ -109,96 +81,29 @@ def test_nanorc_success(run_nanorc): print(banner_line) # Check that nanorc completed correctly - assert run_nanorc.completed_process.returncode == 0 + assert run_dunerc.completed_process.returncode == 0 -def test_log_files(run_nanorc): +def test_log_files(run_dunerc): # Check that at least some of the expected log files are present assert any( - f"{run_nanorc.session}_df-01" in str(logname) - for logname in run_nanorc.log_files + f"{run_dunerc.session}_df-01" in str(logname) + for logname in run_dunerc.log_files ) assert any( - f"{run_nanorc.session}_dfo" in str(logname) for logname in run_nanorc.log_files + f"{run_dunerc.session}_dfo" in str(logname) for logname in run_dunerc.log_files ) assert any( - f"{run_nanorc.session}_mlt" in str(logname) for logname in run_nanorc.log_files + f"{run_dunerc.session}_mlt" in str(logname) for logname in run_dunerc.log_files ) assert any( - f"{run_nanorc.session}_ru" in str(logname) for logname in run_nanorc.log_files + f"{run_dunerc.session}_ru" in str(logname) for logname in run_dunerc.log_files ) if check_for_logfile_errors: # Check that there are no warnings or errors in the log files assert log_file_checks.logs_are_error_free( - run_nanorc.log_files, True, True, ignored_logfile_problems + [ + logname for logname in run_dunerc.log_files if "process_manager" in str(logname) + ], True, True, ignored_logfile_problems ) - - -# 26-Nov-2025, KAB: added some sample opmon metric checks, for demonstration purposes -def test_metric_files(run_nanorc): - print("") # Clear potential dot from pytest - - # 10-Dec-2025, KAB: we have noticed that sometimes drunc transitions (or other parts of - # a run control session) take a little longer than expected. This can cause extra metric - # samples to be created. This section of code takes that into account by increasing - # the max allowed sample count by the amount of extra time taken, divided by 10 - # (metric samples are produced every 10 seconds, by default). - # I've tried to make this code backward compatible by handling cases in which the - # daq_session_overall_time is not available (e.g. the try/catch). - # - # The expected DAQ session time is the sum of the time spent in the "running" state - # (specified in the run control commands above [run_duration]) plus the "wait" times in - # the RC commands plus the time spent in RC transitions. With a run duration of 20 sec, - # the session time has been measured to be ~40 seconds, so we take the extra 20 seconds - # into account. - expected_daq_session_time = run_duration + 20 - # - # To calculate the expected number of metric samples, we subtract a small-ish amount of - # time that the DAQ session spends in state(s) that don't produce metrics (say 3 seconds) - # and divide by 10, where 10 seconds is the interval between each reporting of metrics. - expected_metric_sample_count = int((expected_daq_session_time - 3) / 10) - # - # We'll set the maximum allowed sample count slightly higher than the expected value. - max_metric_sample_count = expected_metric_sample_count + 2 - try: - # print(f"\nDAQ session overall time: {run_nanorc.daq_session_overall_time} seconds") - if run_nanorc.daq_session_overall_time is not None: - extra_time_taken = ( - run_nanorc.daq_session_overall_time - expected_daq_session_time - ) - if extra_time_taken > 10: - extra_sample_count_allowance = int(extra_time_taken / 10) - max_metric_sample_count += extra_sample_count_allowance - except AttributeError: - pass - - session_name = ( - run_nanorc.session_name if run_nanorc.session_name else run_nanorc.session - ) - metric_data = opmon_metric_checks.collate_opmon_data_from_files( - run_nanorc.opmon_files - ) - - metric_key_list = [ - session_name, - "df-01", - "df-01-trb", - "dfmodules.TRBInfo", - "generated_trigger_records", - ] - all_ok = True - # a 20-second run will likely result in 3 metric samples (at 10-second intervals), so a range - # of 1..5 should always succeed - all_ok &= opmon_metric_checks.check_metric_sample_count( - metric_data, metric_key_list, min_count=1, max_count=max_metric_sample_count - ) - # the number of triggers expected in this test is based on the run duration, so we check for - # a reported number of generated trigger records between slightly above/below that - all_ok &= opmon_metric_checks.check_metric_value_sum( - metric_data, - metric_key_list, - min_value_sum=run_duration - 3, - max_value_sum=run_duration + 3, - ) - assert all_ok diff --git a/scripts/drunc_integtest_bundle.sh b/scripts/drunc_integtest_bundle.sh old mode 100644 new mode 100755 From 1d0c3ba33a2fb5c21b55a4d9558485a856d9925c Mon Sep 17 00:00:00 2001 From: Emir Muhammad Date: Fri, 13 Mar 2026 12:36:06 +0100 Subject: [PATCH 05/29] add echo and comment commands --- src/drunc/controller/interface/commands.py | 17 +++++++++++++++++ src/drunc/controller/interface/shell.py | 4 ++++ src/drunc/unified_shell/shell.py | 4 ++++ 3 files changed, 25 insertions(+) diff --git a/src/drunc/controller/interface/commands.py b/src/drunc/controller/interface/commands.py index 4d26c00f3..efb22e7fa 100644 --- a/src/drunc/controller/interface/commands.py +++ b/src/drunc/controller/interface/commands.py @@ -243,6 +243,23 @@ def who_am_i(obj: ControllerContext) -> None: log.info(obj.get_token().user_name) +# click_shell/_cmd.py, line 23. identchars only accepts ascii letters + digits + _ +@click.command("comment", + hidden=True, + context_settings=dict( + ignore_unknown_options=True, + allow_extra_args=True, +)) +def comment_handler(): + """Ignore this line""" + pass + +@click.command("echo") +@click.argument("text", required=False) +@click.pass_obj +def echo(obj, text: str | None) -> None: + log.info(text or "") + @click.command("who-is-in-charge") @click.option("--target", type=str, help="The target to address", default="") @click.option( diff --git a/src/drunc/controller/interface/shell.py b/src/drunc/controller/interface/shell.py index ab33f9cd5..25373004c 100644 --- a/src/drunc/controller/interface/shell.py +++ b/src/drunc/controller/interface/shell.py @@ -17,6 +17,8 @@ take_control, wait, who_am_i, + echo, + comment_handler, who_is_in_charge, ) from drunc.controller.interface.shell_utils import ( @@ -90,6 +92,8 @@ def controller_shell(ctx, controller_address: str, log_level: str) -> None: ctx.command.add_command(take_control, "take-control") ctx.command.add_command(surrender_control, "surrender-control") ctx.command.add_command(who_am_i, "whoami") + ctx.command.add_command(echo, "echo") + ctx.command.add_command(comment_handler, "comment-handler") ctx.command.add_command(who_is_in_charge, "who-is-in-charge") for transition in transitions.commands: ctx.command.add_command(*generate_fsm_command(ctx.obj, transition, desc.name)) diff --git a/src/drunc/unified_shell/shell.py b/src/drunc/unified_shell/shell.py index b80d8e33d..3d9efa1d7 100644 --- a/src/drunc/unified_shell/shell.py +++ b/src/drunc/unified_shell/shell.py @@ -30,6 +30,8 @@ to_error, wait, who_am_i, + echo, + comment_handler, who_is_in_charge, ) from drunc.controller.interface.shell_utils import generate_fsm_command @@ -381,6 +383,8 @@ def unified_shell( take_control, surrender_control, who_am_i, + echo, + comment_handler, who_is_in_charge, include, exclude, From 430d09618113f09f8c684ecd3c3b7e324d676263 Mon Sep 17 00:00:00 2001 From: Emir Muhammad Date: Fri, 13 Mar 2026 14:20:43 +0100 Subject: [PATCH 06/29] Fix 'test will fail if your terminal window is too short' bug --- src/drunc/process_manager/interface/commands.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/drunc/process_manager/interface/commands.py b/src/drunc/process_manager/interface/commands.py index 38fb61946..2937515ca 100644 --- a/src/drunc/process_manager/interface/commands.py +++ b/src/drunc/process_manager/interface/commands.py @@ -236,7 +236,7 @@ def logs( if grep is not None: line = line.replace(grep, f"[u]{grep}[/]") - obj.print(line) + obj.print(line, soft_wrap=True) if result.name is not None: obj.rule(f"[yellow]{display_name}[/yellow] end") From 89e259e7513aa41c9daf52225e2afa88867c4d8b Mon Sep 17 00:00:00 2001 From: Emir Muhammad Date: Fri, 13 Mar 2026 14:22:34 +0100 Subject: [PATCH 07/29] add basic logging tests to show that it works --- integtest/process_manager_test.py | 85 +++++++++++++++++++++- src/drunc/controller/interface/commands.py | 1 + 2 files changed, 85 insertions(+), 1 deletion(-) diff --git a/integtest/process_manager_test.py b/integtest/process_manager_test.py index 16d941139..4b3f9b633 100644 --- a/integtest/process_manager_test.py +++ b/integtest/process_manager_test.py @@ -67,7 +67,35 @@ confgen_arguments = {"MinimalSystem": conf_dict} # The commands to run in nanorc, as a list -dunerc_command_list = f"boot restart -n root-controller restart -n mlt logs -n root-controller logs -n mlt --how-far 20 --grep ABC ps -l ps -u {getpass.getuser()} flush terminate".split() +# NOTE THAT WE HAVE NOT TESTED FLUSH BECAUSE IT IS BROKEN +# see #821 + +dunerc_command_list = f""" +boot + +echo testing_logs +logs --name unknown +logs --name root-controller --how-far 5 +logs --name mlt --how-far 5 + +ps -u {getpass.getuser()} + +restart -n root-controller +restart -n mlt +wait 5 +kill -n mlt +wait 2 +restart -n mlt +restart -n trg-controller +wait 5 + + +flush +terminate + +""".split() + + def test_nanorc_success(run_dunerc): # print the name of the current test @@ -84,6 +112,61 @@ def test_nanorc_success(run_dunerc): assert run_dunerc.completed_process.returncode == 0 +def test_log_command(run_dunerc) -> None: + test_str = "Bad query for logs: The process corresponding to the query doesn't exist" + assert test_str in run_dunerc.completed_process.stdout + + +def test_root_controller_logs(run_dunerc) -> None: + """ + Verifies that: + - the stdout contains a "root-controller logs" header line and a "root-controller end" footer line + - there are exactly 5 lines between those two lines + - among those 5 lines, the one from "drunc.controller.core.init_controller" ends with "Controller ready" + """ + stdout = run_dunerc.completed_process.stdout + assert isinstance(stdout, str) + + lines = stdout.splitlines() + + # 1) Find the header/footer lines + header_idx = next( + (i for i, line in enumerate(lines) if "root-controller logs" in line), + None, + ) + footer_idx = next( + (i for i, line in enumerate(lines) if "root-controller end" in line), + None, + ) + + assert header_idx is not None, "Did not find the 'root-controller logs' header line in stdout." + assert footer_idx is not None, "Did not find the 'root-controller end' footer line in stdout." + assert footer_idx > header_idx, "Footer appears before header in stdout." + + # 2) Check there are 5 lines between header and footer + between = lines[header_idx + 1 : footer_idx] + assert ( + len(between) == 5 + ), f"Expected exactly 5 lines between header and footer, found {len(between)}.\nBetween:\n" + "\n".join( + between + ) + + # 3) Check the init_controller line ends with "Controller ready" + # Example line: + # [2026/03/13 08:17:47 UTC] INFO ... drunc.controller.core.init_controller ... Controller ready + init_controller_ready_re = re.compile( + r"drunc\.controller\.core\.init_controller.*Controller ready\s*$" + ) + + matches = [line for line in between if init_controller_ready_re.search(line)] + assert ( + len(matches) >= 1 + ), "Did not find an init_controller line ending with 'Controller ready' within the 5 lines.\nBetween:\n" + "\n".join( + between + ) + + + def test_log_files(run_dunerc): # Check that at least some of the expected log files are present assert any( diff --git a/src/drunc/controller/interface/commands.py b/src/drunc/controller/interface/commands.py index efb22e7fa..566f350c5 100644 --- a/src/drunc/controller/interface/commands.py +++ b/src/drunc/controller/interface/commands.py @@ -244,6 +244,7 @@ def who_am_i(obj: ControllerContext) -> None: # click_shell/_cmd.py, line 23. identchars only accepts ascii letters + digits + _ +# Can't really be used by the integ test tho.. @click.command("comment", hidden=True, context_settings=dict( From 3aea522a4435952d643779f2ebcb3d65bc4f713d Mon Sep 17 00:00:00 2001 From: Emir Muhammad Date: Fri, 13 Mar 2026 15:13:41 +0100 Subject: [PATCH 08/29] add wait kill tests; add tableparser --- integtest/process_manager_test.py | 200 ++++++++++++++++++++++++++++++ 1 file changed, 200 insertions(+) diff --git a/integtest/process_manager_test.py b/integtest/process_manager_test.py index 4b3f9b633..dc43ef511 100644 --- a/integtest/process_manager_test.py +++ b/integtest/process_manager_test.py @@ -1,6 +1,7 @@ import getpass import os import re +from datetime import datetime import integrationtest.data_classes as data_classes import integrationtest.log_file_checks as log_file_checks @@ -78,17 +79,33 @@ logs --name root-controller --how-far 5 logs --name mlt --how-far 5 +echo test_wait +wait 10 + +echo on_boot ps -u {getpass.getuser()} restart -n root-controller restart -n mlt wait 5 + +echo pre_kill_mlt +ps -u {getpass.getuser()} + kill -n mlt wait 2 +echo post_kill_mlt +ps -u {getpass.getuser()} + + + restart -n mlt restart -n trg-controller wait 5 +echo ps_after_recovery +ps -u {getpass.getuser()} + flush terminate @@ -96,6 +113,167 @@ """.split() +UUID_RE = re.compile( + r"^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$" +) +ANSI_ESCAPE_RE = re.compile(r"\x1B\[[0-9;]*[A-Za-z]") + + +def strip_ansi(text: str) -> str: + return ANSI_ESCAPE_RE.sub("", text) + + +def _parse_ps_table_from_index(lines: list[str], start_idx: int) -> list[dict[str, str]]: + table_rows: list[dict[str, str]] = [] + + for line in lines[start_idx + 1 :]: + stripped = line.strip() + + if stripped.startswith("└"): + break + + if not stripped.startswith("│"): + continue + + cells = [cell.strip() for cell in stripped.strip("│").split("│")] + if len(cells) < 7: + continue + + table_rows.append( + { + "session": cells[0], + "friendly_name": cells[1], + "user": cells[2], + "host": cells[3], + "uuid": cells[4], + "alive": cells[5], + "exit_code": cells[6], + } + ) + + return table_rows + + +def get_ps_table_after_echo(stdout: str, echo_marker: str) -> list[dict[str, str]]: + lines = strip_ansi(stdout).splitlines() + + echo_idx = next( + ( + idx + for idx, line in enumerate(lines) + if "drunc.echo" in line and line.rstrip().endswith(echo_marker) + ), + None, + ) + assert echo_idx is not None, f"Could not find drunc.echo marker '{echo_marker}' in stdout." + + table_start_idx = next( + (idx for idx in range(echo_idx + 1, len(lines)) if "Processes running" in lines[idx]), + None, + ) + assert ( + table_start_idx is not None + ), f"Could not find a 'Processes running' table after marker '{echo_marker}'." + + table_rows = _parse_ps_table_from_index(lines, table_start_idx) + assert table_rows, f"Found table header after marker '{echo_marker}', but no rows were parsed." + + return table_rows + + +def get_uuid_for_friendly_name(ps_table: list[dict[str, str]], friendly_name: str) -> str: + for row in ps_table: + if row["friendly_name"].strip() == friendly_name: + return row["uuid"] + + available_names = ", ".join(row["friendly_name"].strip() for row in ps_table) + raise AssertionError( + f"Could not find friendly name '{friendly_name}' in ps table. " + f"Available names: {available_names}" + ) + + + + +def test_kill_removes_mlt_from_ps_table(run_dunerc) -> None: + stdout = run_dunerc.completed_process.stdout + + ps_before_kill = get_ps_table_after_echo(stdout, "pre_kill_mlt") + ps_after_kill = get_ps_table_after_echo(stdout, "post_kill_mlt") + + mlt_before_kill = [ + row for row in ps_before_kill if row["friendly_name"].strip() == "mlt" + ] + mlt_after_kill = [ + row for row in ps_after_kill if row["friendly_name"].strip() == "mlt" + ] + + assert mlt_before_kill, "Expected to find 'mlt' in ps table before kill, but it was missing." + assert not mlt_after_kill, "Expected 'mlt' to be absent from ps table after kill, but it is still present." + + +def test_wait_command_duration_from_logs(run_dunerc) -> None: + stdout = run_dunerc.completed_process.stdout + lines = strip_ansi(stdout).splitlines() + + echo_idx = next( + ( + idx + for idx, line in enumerate(lines) + if "drunc.echo" in line and line.rstrip().endswith("test_wait") + ), + None, + ) + assert echo_idx is not None, "Could not find drunc.echo marker 'test_wait' in stdout." + + running_pattern = re.compile(r"Command wait running for (\d+) seconds\.") + ran_pattern = re.compile(r"Command wait ran for (\d+) seconds\.") + timestamp_pattern = re.compile(r"^\[(\d{4}/\d{2}/\d{2} \d{2}:\d{2}:\d{2}) UTC\]") + + running_idx = next( + (idx for idx in range(echo_idx + 1, len(lines)) if running_pattern.search(lines[idx])), + None, + ) + assert running_idx is not None, "Did not find 'Command wait running for ... seconds.' after test_wait marker." + + ran_idx = next( + (idx for idx in range(running_idx + 1, len(lines)) if ran_pattern.search(lines[idx])), + None, + ) + assert ran_idx is not None, "Did not find 'Command wait ran for ... seconds.' after wait start log." + + running_match = running_pattern.search(lines[running_idx]) + ran_match = ran_pattern.search(lines[ran_idx]) + assert running_match is not None + assert ran_match is not None + + expected_seconds = 10 + assert int(running_match.group(1)) == expected_seconds, ( + f"Expected wait start log to report {expected_seconds} seconds, got {running_match.group(1)}." + ) + assert int(ran_match.group(1)) == expected_seconds, ( + f"Expected wait end log to report {expected_seconds} seconds, got {ran_match.group(1)}." + ) + + start_ts_match = timestamp_pattern.search(lines[running_idx]) + end_ts_match = timestamp_pattern.search(lines[ran_idx]) + assert start_ts_match is not None, "Could not parse timestamp in wait start log line." + assert end_ts_match is not None, "Could not parse timestamp in wait end log line." + + start_ts = datetime.strptime(start_ts_match.group(1), "%Y/%m/%d %H:%M:%S") + end_ts = datetime.strptime(end_ts_match.group(1), "%Y/%m/%d %H:%M:%S") + elapsed_seconds = (end_ts - start_ts).total_seconds() + + tolerance_seconds = 1 + assert abs(elapsed_seconds - expected_seconds) <= tolerance_seconds, ( + f"Expected wait log timestamps to differ by {expected_seconds}±{tolerance_seconds} seconds, " + f"got {elapsed_seconds} seconds." + ) + + + + + def test_nanorc_success(run_dunerc): # print the name of the current test @@ -167,6 +345,28 @@ def test_root_controller_logs(run_dunerc) -> None: +# def test_restart_changes_process_uuid(run_dunerc) -> None: +# stdout = run_dunerc.completed_process.stdout + +# ps_before_restart = get_ps_table_after_echo(stdout, "ps_before_restart") +# ps_after_restart = get_ps_table_after_echo(stdout, "ps_after_restart") + +# root_before = get_uuid_for_friendly_name(ps_before_restart, "root-controller") +# root_after = get_uuid_for_friendly_name(ps_after_restart, "root-controller") +# assert root_before != root_after, ( +# "Expected root-controller UUID to change after restart, " +# f"but it stayed the same ({root_before})." +# ) + +# mlt_before = get_uuid_for_friendly_name(ps_before_restart, "mlt") +# mlt_after = get_uuid_for_friendly_name(ps_after_restart, "mlt") +# assert mlt_before != mlt_after, ( +# "Expected mlt UUID to change after restart, " +# f"but it stayed the same ({mlt_before})." +# ) + + + def test_log_files(run_dunerc): # Check that at least some of the expected log files are present assert any( From fcdb8cf543a068ad47fd3f891b44c37fa40a21f5 Mon Sep 17 00:00:00 2001 From: Emir Muhammad Date: Fri, 13 Mar 2026 15:48:52 +0100 Subject: [PATCH 09/29] Add more test, fix tiny terminal bug again (different source) --- integtest/process_manager_test.py | 154 +++++++++++++++++++++--------- 1 file changed, 109 insertions(+), 45 deletions(-) diff --git a/integtest/process_manager_test.py b/integtest/process_manager_test.py index dc43ef511..04f9731f1 100644 --- a/integtest/process_manager_test.py +++ b/integtest/process_manager_test.py @@ -72,7 +72,13 @@ # see #821 dunerc_command_list = f""" + +echo pre_boot +ps -u {getpass.getuser()} boot +echo on_boot +ps -u {getpass.getuser()} + echo testing_logs logs --name unknown @@ -82,23 +88,20 @@ echo test_wait wait 10 -echo on_boot -ps -u {getpass.getuser()} - -restart -n root-controller +echo WE_STILL_NEED_TO_TEST-RESTART restart -n mlt +restart -n root-controller wait 5 + echo pre_kill_mlt ps -u {getpass.getuser()} - kill -n mlt wait 2 echo post_kill_mlt ps -u {getpass.getuser()} - restart -n mlt restart -n trg-controller wait 5 @@ -115,6 +118,7 @@ UUID_RE = re.compile( r"^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$" + r"|^[0-9a-fA-F]{8}-[-0-9a-fA-F]*\u2026" # truncated by Rich table column width ) ANSI_ESCAPE_RE = re.compile(r"\x1B\[[0-9;]*[A-Za-z]") @@ -123,7 +127,9 @@ def strip_ansi(text: str) -> str: return ANSI_ESCAPE_RE.sub("", text) -def _parse_ps_table_from_index(lines: list[str], start_idx: int) -> list[dict[str, str]]: +def _parse_ps_table_from_index( + lines: list[str], start_idx: int +) -> list[dict[str, str]]: table_rows: list[dict[str, str]] = [] for line in lines[start_idx + 1 :]: @@ -165,23 +171,27 @@ def get_ps_table_after_echo(stdout: str, echo_marker: str) -> list[dict[str, str ), None, ) - assert echo_idx is not None, f"Could not find drunc.echo marker '{echo_marker}' in stdout." + assert echo_idx is not None, ( + f"Could not find drunc.echo marker '{echo_marker}' in stdout." + ) table_start_idx = next( - (idx for idx in range(echo_idx + 1, len(lines)) if "Processes running" in lines[idx]), + ( + idx + for idx in range(echo_idx + 1, len(lines)) + if "Processes running" in lines[idx] + ), None, ) - assert ( - table_start_idx is not None - ), f"Could not find a 'Processes running' table after marker '{echo_marker}'." + if table_start_idx is None: + return [] - table_rows = _parse_ps_table_from_index(lines, table_start_idx) - assert table_rows, f"Found table header after marker '{echo_marker}', but no rows were parsed." + return _parse_ps_table_from_index(lines, table_start_idx) - return table_rows - -def get_uuid_for_friendly_name(ps_table: list[dict[str, str]], friendly_name: str) -> str: +def get_uuid_for_friendly_name( + ps_table: list[dict[str, str]], friendly_name: str +) -> str: for row in ps_table: if row["friendly_name"].strip() == friendly_name: return row["uuid"] @@ -193,6 +203,24 @@ def get_uuid_for_friendly_name(ps_table: list[dict[str, str]], friendly_name: st ) +def test_boot(run_dunerc) -> None: + stdout = run_dunerc.completed_process.stdout + + ps_pre_boot = get_ps_table_after_echo(stdout, "pre_boot") + ps_on_boot = get_ps_table_after_echo(stdout, "on_boot") + + assert not ps_pre_boot, ( + f"Expected ps table before boot to be empty, but found {len(ps_pre_boot)} row(s): " + + ", ".join(row["friendly_name"] for row in ps_pre_boot) + ) + + assert ps_on_boot, ( + "Expected ps table after boot to contain processes, but it was empty." + ) + for row in ps_on_boot: + assert UUID_RE.match(row["uuid"]), ( + f"Expected a valid UUID for process '{row['friendly_name']}', got '{row['uuid']}'" + ) def test_kill_removes_mlt_from_ps_table(run_dunerc) -> None: @@ -208,8 +236,25 @@ def test_kill_removes_mlt_from_ps_table(run_dunerc) -> None: row for row in ps_after_kill if row["friendly_name"].strip() == "mlt" ] - assert mlt_before_kill, "Expected to find 'mlt' in ps table before kill, but it was missing." - assert not mlt_after_kill, "Expected 'mlt' to be absent from ps table after kill, but it is still present." + assert mlt_before_kill, ( + "Expected to find 'mlt' in ps table before kill, but it was missing." + ) + assert not mlt_after_kill, ( + "Expected 'mlt' to be absent from ps table after kill, but it is still present." + ) + + +def test_mlt_recovers_after_kill(run_dunerc) -> None: + stdout = run_dunerc.completed_process.stdout + + ps_after_recovery = get_ps_table_after_echo(stdout, "ps_after_recovery") + + mlt_after_recovery = [ + row for row in ps_after_recovery if row["friendly_name"].strip() == "mlt" + ] + assert mlt_after_recovery, ( + "Expected 'mlt' to be present in ps table after recovery, but it was missing." + ) def test_wait_command_duration_from_logs(run_dunerc) -> None: @@ -224,23 +269,37 @@ def test_wait_command_duration_from_logs(run_dunerc) -> None: ), None, ) - assert echo_idx is not None, "Could not find drunc.echo marker 'test_wait' in stdout." + assert echo_idx is not None, ( + "Could not find drunc.echo marker 'test_wait' in stdout." + ) running_pattern = re.compile(r"Command wait running for (\d+) seconds\.") ran_pattern = re.compile(r"Command wait ran for (\d+) seconds\.") timestamp_pattern = re.compile(r"^\[(\d{4}/\d{2}/\d{2} \d{2}:\d{2}:\d{2}) UTC\]") running_idx = next( - (idx for idx in range(echo_idx + 1, len(lines)) if running_pattern.search(lines[idx])), + ( + idx + for idx in range(echo_idx + 1, len(lines)) + if running_pattern.search(lines[idx]) + ), None, ) - assert running_idx is not None, "Did not find 'Command wait running for ... seconds.' after test_wait marker." + assert running_idx is not None, ( + "Did not find 'Command wait running for ... seconds.' after test_wait marker." + ) ran_idx = next( - (idx for idx in range(running_idx + 1, len(lines)) if ran_pattern.search(lines[idx])), + ( + idx + for idx in range(running_idx + 1, len(lines)) + if ran_pattern.search(lines[idx]) + ), None, ) - assert ran_idx is not None, "Did not find 'Command wait ran for ... seconds.' after wait start log." + assert ran_idx is not None, ( + "Did not find 'Command wait ran for ... seconds.' after wait start log." + ) running_match = running_pattern.search(lines[running_idx]) ran_match = ran_pattern.search(lines[ran_idx]) @@ -257,7 +316,9 @@ def test_wait_command_duration_from_logs(run_dunerc) -> None: start_ts_match = timestamp_pattern.search(lines[running_idx]) end_ts_match = timestamp_pattern.search(lines[ran_idx]) - assert start_ts_match is not None, "Could not parse timestamp in wait start log line." + assert start_ts_match is not None, ( + "Could not parse timestamp in wait start log line." + ) assert end_ts_match is not None, "Could not parse timestamp in wait end log line." start_ts = datetime.strptime(start_ts_match.group(1), "%Y/%m/%d %H:%M:%S") @@ -271,10 +332,6 @@ def test_wait_command_duration_from_logs(run_dunerc) -> None: ) - - - - def test_nanorc_success(run_dunerc): # print the name of the current test current_test = os.environ.get("PYTEST_CURRENT_TEST") @@ -291,8 +348,10 @@ def test_nanorc_success(run_dunerc): def test_log_command(run_dunerc) -> None: - test_str = "Bad query for logs: The process corresponding to the query doesn't exist" - assert test_str in run_dunerc.completed_process.stdout + test_str = ( + "Bad query for logs: The process corresponding to the query doesn't exist" + ) + assert test_str in run_dunerc.completed_process.stdout def test_root_controller_logs(run_dunerc) -> None: @@ -317,16 +376,19 @@ def test_root_controller_logs(run_dunerc) -> None: None, ) - assert header_idx is not None, "Did not find the 'root-controller logs' header line in stdout." - assert footer_idx is not None, "Did not find the 'root-controller end' footer line in stdout." + assert header_idx is not None, ( + "Did not find the 'root-controller logs' header line in stdout." + ) + assert footer_idx is not None, ( + "Did not find the 'root-controller end' footer line in stdout." + ) assert footer_idx > header_idx, "Footer appears before header in stdout." # 2) Check there are 5 lines between header and footer between = lines[header_idx + 1 : footer_idx] - assert ( - len(between) == 5 - ), f"Expected exactly 5 lines between header and footer, found {len(between)}.\nBetween:\n" + "\n".join( - between + assert len(between) == 5, ( + f"Expected exactly 5 lines between header and footer, found {len(between)}.\nBetween:\n" + + "\n".join(between) ) # 3) Check the init_controller line ends with "Controller ready" @@ -337,14 +399,12 @@ def test_root_controller_logs(run_dunerc) -> None: ) matches = [line for line in between if init_controller_ready_re.search(line)] - assert ( - len(matches) >= 1 - ), "Did not find an init_controller line ending with 'Controller ready' within the 5 lines.\nBetween:\n" + "\n".join( - between + assert len(matches) >= 1, ( + "Did not find an init_controller line ending with 'Controller ready' within the 5 lines.\nBetween:\n" + + "\n".join(between) ) - # def test_restart_changes_process_uuid(run_dunerc) -> None: # stdout = run_dunerc.completed_process.stdout @@ -366,7 +426,6 @@ def test_root_controller_logs(run_dunerc) -> None: # ) - def test_log_files(run_dunerc): # Check that at least some of the expected log files are present assert any( @@ -387,6 +446,11 @@ def test_log_files(run_dunerc): # Check that there are no warnings or errors in the log files assert log_file_checks.logs_are_error_free( [ - logname for logname in run_dunerc.log_files if "process_manager" in str(logname) - ], True, True, ignored_logfile_problems + logname + for logname in run_dunerc.log_files + if "process_manager" in str(logname) + ], + True, + True, + ignored_logfile_problems, ) From ad5c80e532198789460a9c534226685cf64fb945 Mon Sep 17 00:00:00 2001 From: Emir Muhammad Date: Fri, 13 Mar 2026 16:11:24 +0100 Subject: [PATCH 10/29] Added final set of tests, ultra janky now --- integtest/process_manager_test.py | 102 +++++++++++++++++++++++------- 1 file changed, 80 insertions(+), 22 deletions(-) diff --git a/integtest/process_manager_test.py b/integtest/process_manager_test.py index 04f9731f1..16b2d6c6d 100644 --- a/integtest/process_manager_test.py +++ b/integtest/process_manager_test.py @@ -88,10 +88,11 @@ echo test_wait wait 10 -echo WE_STILL_NEED_TO_TEST-RESTART +echo pre_restart_mlt restart -n mlt restart -n root-controller wait 5 +echo post_restart_mlt echo pre_kill_mlt @@ -332,6 +333,84 @@ def test_wait_command_duration_from_logs(run_dunerc) -> None: ) +def test_restart_mlt_logs(run_dunerc) -> None: + stdout = run_dunerc.completed_process.stdout + lines = strip_ansi(stdout).splitlines() + + echo_idx = next( + ( + idx + for idx, line in enumerate(lines) + if "drunc.echo" in line and line.rstrip().endswith("pre_restart_mlt") + ), + None, + ) + assert echo_idx is not None, ( + "Could not find drunc.echo marker 'pre_restart_mlt' in stdout." + ) + + post_restart_idx = next( + ( + idx + for idx, line in enumerate(lines) + if idx > echo_idx + and "drunc.echo" in line + and line.rstrip().endswith("post_restart_mlt") + ), + None, + ) + assert post_restart_idx is not None, ( + "Could not find drunc.echo marker 'post_restart_mlt' in stdout." + ) + + restart_lines = lines[echo_idx + 1 : post_restart_idx] + restart_text = "\n".join(restart_lines) + + restart_request_match = re.search( + r"process_manager restarting \['mlt'\] in session", + restart_text, + ) + assert restart_request_match is not None, ( + "Did not find the mlt restart request log line between restart markers." + ) + + graceful_termination_match = re.search( + r"Remote process .*?terminated gracefully following SIGQUIT signal\.", + restart_text[restart_request_match.end() :], + re.DOTALL, + ) + assert graceful_termination_match is not None, ( + "Did not find the graceful termination log line for mlt after restart request." + ) + + exit_code_search_text = restart_text[ + restart_request_match.end() + graceful_termination_match.end() : + ] + exit_code_match = re.search( + r"Process 'mlt'.*?process exited\s+with exit code 0", + exit_code_search_text, + re.DOTALL, + ) + assert exit_code_match is not None, ( + "Did not find the mlt exit-code log line after graceful termination." + ) + + booted_search_text = exit_code_search_text[exit_code_match.end() :] + booted_match = re.search( + r"Booted 'mlt'.*?with UUID\s+([^\s\n]+)", + booted_search_text, + re.DOTALL, + ) + assert booted_match is not None, ( + "Did not find the mlt boot log line after the restart exit log." + ) + + booted_uuid = booted_match.group(1) + assert UUID_RE.match(booted_uuid), ( + f"Expected the mlt boot log to contain a UUID, got: {booted_uuid}" + ) + + def test_nanorc_success(run_dunerc): # print the name of the current test current_test = os.environ.get("PYTEST_CURRENT_TEST") @@ -405,27 +484,6 @@ def test_root_controller_logs(run_dunerc) -> None: ) -# def test_restart_changes_process_uuid(run_dunerc) -> None: -# stdout = run_dunerc.completed_process.stdout - -# ps_before_restart = get_ps_table_after_echo(stdout, "ps_before_restart") -# ps_after_restart = get_ps_table_after_echo(stdout, "ps_after_restart") - -# root_before = get_uuid_for_friendly_name(ps_before_restart, "root-controller") -# root_after = get_uuid_for_friendly_name(ps_after_restart, "root-controller") -# assert root_before != root_after, ( -# "Expected root-controller UUID to change after restart, " -# f"but it stayed the same ({root_before})." -# ) - -# mlt_before = get_uuid_for_friendly_name(ps_before_restart, "mlt") -# mlt_after = get_uuid_for_friendly_name(ps_after_restart, "mlt") -# assert mlt_before != mlt_after, ( -# "Expected mlt UUID to change after restart, " -# f"but it stayed the same ({mlt_before})." -# ) - - def test_log_files(run_dunerc): # Check that at least some of the expected log files are present assert any( From 6c454c4d0939acdef9df7924248ddb335d9c6c16 Mon Sep 17 00:00:00 2001 From: Emir Muhammad Date: Fri, 13 Mar 2026 16:31:38 +0100 Subject: [PATCH 11/29] Reorder tests --- integtest/process_manager_test.py | 225 ++++++++++++++++-------------- 1 file changed, 117 insertions(+), 108 deletions(-) diff --git a/integtest/process_manager_test.py b/integtest/process_manager_test.py index 16b2d6c6d..fa03801c4 100644 --- a/integtest/process_manager_test.py +++ b/integtest/process_manager_test.py @@ -205,6 +205,7 @@ def get_uuid_for_friendly_name( def test_boot(run_dunerc) -> None: + """Checks that boot starts the managed processes and exposes UUIDs in ps.""" stdout = run_dunerc.completed_process.stdout ps_pre_boot = get_ps_table_after_echo(stdout, "pre_boot") @@ -224,41 +225,67 @@ def test_boot(run_dunerc) -> None: ) -def test_kill_removes_mlt_from_ps_table(run_dunerc) -> None: - stdout = run_dunerc.completed_process.stdout +def test_log_command(run_dunerc) -> None: + """Checks that querying logs for an unknown process reports the expected error.""" + test_str = ( + "Bad query for logs: The process corresponding to the query doesn't exist" + ) + assert test_str in run_dunerc.completed_process.stdout - ps_before_kill = get_ps_table_after_echo(stdout, "pre_kill_mlt") - ps_after_kill = get_ps_table_after_echo(stdout, "post_kill_mlt") - mlt_before_kill = [ - row for row in ps_before_kill if row["friendly_name"].strip() == "mlt" - ] - mlt_after_kill = [ - row for row in ps_after_kill if row["friendly_name"].strip() == "mlt" - ] +def test_root_controller_logs(run_dunerc) -> None: + """ + Verifies that: + - the stdout contains a "root-controller logs" header line and a "root-controller end" footer line + - there are exactly 5 lines between those two lines + - among those 5 lines, the one from "drunc.controller.core.init_controller" ends with "Controller ready" + """ + stdout = run_dunerc.completed_process.stdout + assert isinstance(stdout, str) - assert mlt_before_kill, ( - "Expected to find 'mlt' in ps table before kill, but it was missing." + lines = stdout.splitlines() + + # 1) Find the header/footer lines + header_idx = next( + (i for i, line in enumerate(lines) if "root-controller logs" in line), + None, ) - assert not mlt_after_kill, ( - "Expected 'mlt' to be absent from ps table after kill, but it is still present." + footer_idx = next( + (i for i, line in enumerate(lines) if "root-controller end" in line), + None, ) + assert header_idx is not None, ( + "Did not find the 'root-controller logs' header line in stdout." + ) + assert footer_idx is not None, ( + "Did not find the 'root-controller end' footer line in stdout." + ) + assert footer_idx > header_idx, "Footer appears before header in stdout." -def test_mlt_recovers_after_kill(run_dunerc) -> None: - stdout = run_dunerc.completed_process.stdout + # 2) Check there are 5 lines between header and footer + between = lines[header_idx + 1 : footer_idx] + assert len(between) == 5, ( + f"Expected exactly 5 lines between header and footer, found {len(between)}.\nBetween:\n" + + "\n".join(between) + ) - ps_after_recovery = get_ps_table_after_echo(stdout, "ps_after_recovery") + # 3) Check the init_controller line ends with "Controller ready" + # Example line: + # [2026/03/13 08:17:47 UTC] INFO ... drunc.controller.core.init_controller ... Controller ready + init_controller_ready_re = re.compile( + r"drunc\.controller\.core\.init_controller.*Controller ready\s*$" + ) - mlt_after_recovery = [ - row for row in ps_after_recovery if row["friendly_name"].strip() == "mlt" - ] - assert mlt_after_recovery, ( - "Expected 'mlt' to be present in ps table after recovery, but it was missing." + matches = [line for line in between if init_controller_ready_re.search(line)] + assert len(matches) >= 1, ( + "Did not find an init_controller line ending with 'Controller ready' within the 5 lines.\nBetween:\n" + + "\n".join(between) ) def test_wait_command_duration_from_logs(run_dunerc) -> None: + """Checks that the wait command logs the expected duration and elapsed time.""" stdout = run_dunerc.completed_process.stdout lines = strip_ansi(stdout).splitlines() @@ -334,6 +361,7 @@ def test_wait_command_duration_from_logs(run_dunerc) -> None: def test_restart_mlt_logs(run_dunerc) -> None: + """Checks that restarting mlt produces the expected restart, exit, and boot logs.""" stdout = run_dunerc.completed_process.stdout lines = strip_ansi(stdout).splitlines() @@ -374,44 +402,82 @@ def test_restart_mlt_logs(run_dunerc) -> None: "Did not find the mlt restart request log line between restart markers." ) - graceful_termination_match = re.search( - r"Remote process .*?terminated gracefully following SIGQUIT signal\.", - restart_text[restart_request_match.end() :], - re.DOTALL, - ) - assert graceful_termination_match is not None, ( - "Did not find the graceful termination log line for mlt after restart request." - ) + #! Reinsert this in the future, but this log-based thing is super janky + # graceful_termination_match = re.search( + # r"Remote process .*?terminated gracefully following SIGQUIT signal\.", + # restart_text[restart_request_match.end() :], + # re.DOTALL, + # ) + # assert graceful_termination_match is not None, ( + # "Did not find the graceful termination log line for mlt after restart request." + # ) + + # exit_code_search_text = restart_text[ + # restart_request_match.end() + graceful_termination_match.end() : + # ] + # exit_code_match = re.search( + # r"Process 'mlt'.*?process exited\s+with exit code 0", + # exit_code_search_text, + # re.DOTALL, + # ) + # assert exit_code_match is not None, ( + # "Did not find the mlt exit-code log line after graceful termination." + # ) + + # booted_search_text = exit_code_search_text[exit_code_match.end() :] + # booted_match = re.search( + # r"Booted 'mlt'.*?with UUID\s+([^\s\n]+)", + # booted_search_text, + # re.DOTALL, + # ) + # assert booted_match is not None, ( + # "Did not find the mlt boot log line after the restart exit log." + # ) + + # booted_uuid = booted_match.group(1) + # assert UUID_RE.match(booted_uuid), ( + # f"Expected the mlt boot log to contain a UUID, got: {booted_uuid}" + # ) - exit_code_search_text = restart_text[ - restart_request_match.end() + graceful_termination_match.end() : + +def test_kill_removes_mlt_from_ps_table(run_dunerc) -> None: + """Checks that killing mlt removes it from the subsequent ps table.""" + stdout = run_dunerc.completed_process.stdout + + ps_before_kill = get_ps_table_after_echo(stdout, "pre_kill_mlt") + ps_after_kill = get_ps_table_after_echo(stdout, "post_kill_mlt") + + mlt_before_kill = [ + row for row in ps_before_kill if row["friendly_name"].strip() == "mlt" + ] + mlt_after_kill = [ + row for row in ps_after_kill if row["friendly_name"].strip() == "mlt" ] - exit_code_match = re.search( - r"Process 'mlt'.*?process exited\s+with exit code 0", - exit_code_search_text, - re.DOTALL, - ) - assert exit_code_match is not None, ( - "Did not find the mlt exit-code log line after graceful termination." - ) - booted_search_text = exit_code_search_text[exit_code_match.end() :] - booted_match = re.search( - r"Booted 'mlt'.*?with UUID\s+([^\s\n]+)", - booted_search_text, - re.DOTALL, + assert mlt_before_kill, ( + "Expected to find 'mlt' in ps table before kill, but it was missing." ) - assert booted_match is not None, ( - "Did not find the mlt boot log line after the restart exit log." + assert not mlt_after_kill, ( + "Expected 'mlt' to be absent from ps table after kill, but it is still present." ) - booted_uuid = booted_match.group(1) - assert UUID_RE.match(booted_uuid), ( - f"Expected the mlt boot log to contain a UUID, got: {booted_uuid}" + +def test_mlt_recovers_after_kill(run_dunerc) -> None: + """Checks that mlt is present again after the recovery restart sequence.""" + stdout = run_dunerc.completed_process.stdout + + ps_after_recovery = get_ps_table_after_echo(stdout, "ps_after_recovery") + + mlt_after_recovery = [ + row for row in ps_after_recovery if row["friendly_name"].strip() == "mlt" + ] + assert mlt_after_recovery, ( + "Expected 'mlt' to be present in ps table after recovery, but it was missing." ) def test_nanorc_success(run_dunerc): + """Checks that the drunc integration command sequence completes successfully.""" # print the name of the current test current_test = os.environ.get("PYTEST_CURRENT_TEST") match_obj = re.search(r".*\[(.+)-run_.*rc.*\d].*", current_test) @@ -426,65 +492,8 @@ def test_nanorc_success(run_dunerc): assert run_dunerc.completed_process.returncode == 0 -def test_log_command(run_dunerc) -> None: - test_str = ( - "Bad query for logs: The process corresponding to the query doesn't exist" - ) - assert test_str in run_dunerc.completed_process.stdout - - -def test_root_controller_logs(run_dunerc) -> None: - """ - Verifies that: - - the stdout contains a "root-controller logs" header line and a "root-controller end" footer line - - there are exactly 5 lines between those two lines - - among those 5 lines, the one from "drunc.controller.core.init_controller" ends with "Controller ready" - """ - stdout = run_dunerc.completed_process.stdout - assert isinstance(stdout, str) - - lines = stdout.splitlines() - - # 1) Find the header/footer lines - header_idx = next( - (i for i, line in enumerate(lines) if "root-controller logs" in line), - None, - ) - footer_idx = next( - (i for i, line in enumerate(lines) if "root-controller end" in line), - None, - ) - - assert header_idx is not None, ( - "Did not find the 'root-controller logs' header line in stdout." - ) - assert footer_idx is not None, ( - "Did not find the 'root-controller end' footer line in stdout." - ) - assert footer_idx > header_idx, "Footer appears before header in stdout." - - # 2) Check there are 5 lines between header and footer - between = lines[header_idx + 1 : footer_idx] - assert len(between) == 5, ( - f"Expected exactly 5 lines between header and footer, found {len(between)}.\nBetween:\n" - + "\n".join(between) - ) - - # 3) Check the init_controller line ends with "Controller ready" - # Example line: - # [2026/03/13 08:17:47 UTC] INFO ... drunc.controller.core.init_controller ... Controller ready - init_controller_ready_re = re.compile( - r"drunc\.controller\.core\.init_controller.*Controller ready\s*$" - ) - - matches = [line for line in between if init_controller_ready_re.search(line)] - assert len(matches) >= 1, ( - "Did not find an init_controller line ending with 'Controller ready' within the 5 lines.\nBetween:\n" - + "\n".join(between) - ) - - def test_log_files(run_dunerc): + """Checks that expected process-manager log files exist and are free of errors.""" # Check that at least some of the expected log files are present assert any( f"{run_dunerc.session}_df-01" in str(logname) From 1287a9d0a7edae052d1de30677e81f4771b56e81 Mon Sep 17 00:00:00 2001 From: Emir Muhammad Date: Fri, 13 Mar 2026 16:47:29 +0100 Subject: [PATCH 12/29] Add log echo (forgot to commit this) --- src/drunc/controller/interface/commands.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/src/drunc/controller/interface/commands.py b/src/drunc/controller/interface/commands.py index 566f350c5..24d555d62 100644 --- a/src/drunc/controller/interface/commands.py +++ b/src/drunc/controller/interface/commands.py @@ -8,6 +8,7 @@ from drunc.utils.utils import get_logger log = get_logger("controller.iface", rich_handler=True) +log_echo = get_logger("echo", rich_handler=True) @click.command("list-transitions") @@ -245,22 +246,26 @@ def who_am_i(obj: ControllerContext) -> None: # click_shell/_cmd.py, line 23. identchars only accepts ascii letters + digits + _ # Can't really be used by the integ test tho.. -@click.command("comment", +@click.command( + "comment", hidden=True, context_settings=dict( - ignore_unknown_options=True, - allow_extra_args=True, -)) + ignore_unknown_options=True, + allow_extra_args=True, + ), +) def comment_handler(): """Ignore this line""" pass + @click.command("echo") @click.argument("text", required=False) @click.pass_obj def echo(obj, text: str | None) -> None: - log.info(text or "") - + log_echo.info(text or "") + + @click.command("who-is-in-charge") @click.option("--target", type=str, help="The target to address", default="") @click.option( From 1163ebb4469d9e4faa130af8a44f4bd419b3481f Mon Sep 17 00:00:00 2001 From: Emir Muhammad Date: Fri, 13 Mar 2026 16:53:25 +0100 Subject: [PATCH 13/29] fix ruff --- src/drunc/controller/interface/shell.py | 4 ++-- src/drunc/unified_shell/shell.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/drunc/controller/interface/shell.py b/src/drunc/controller/interface/shell.py index 25373004c..a7135c127 100644 --- a/src/drunc/controller/interface/shell.py +++ b/src/drunc/controller/interface/shell.py @@ -6,8 +6,10 @@ from daqpytools.logging import logging_log_levels from drunc.controller.interface.commands import ( + comment_handler, connect, disconnect, + echo, exclude, expert_command, include, @@ -17,8 +19,6 @@ take_control, wait, who_am_i, - echo, - comment_handler, who_is_in_charge, ) from drunc.controller.interface.shell_utils import ( diff --git a/src/drunc/unified_shell/shell.py b/src/drunc/unified_shell/shell.py index 3d9efa1d7..ed068491b 100644 --- a/src/drunc/unified_shell/shell.py +++ b/src/drunc/unified_shell/shell.py @@ -18,8 +18,10 @@ from drunc.connectivity_service.client import ConnectivityServiceClient from drunc.controller.configuration import ControllerConfHandler from drunc.controller.interface.commands import ( + comment_handler, connect, disconnect, + echo, exclude, expert_command, include, @@ -30,8 +32,6 @@ to_error, wait, who_am_i, - echo, - comment_handler, who_is_in_charge, ) from drunc.controller.interface.shell_utils import generate_fsm_command From 8259ff02fb2dd8f653222bb1e6c847cc4dbb4f10 Mon Sep 17 00:00:00 2001 From: Emir Muhammad Date: Thu, 19 Mar 2026 15:09:09 +0100 Subject: [PATCH 14/29] Cleanup on repetition --- integtest/process_manager_test.py | 173 ++++++++++++++++-------------- 1 file changed, 90 insertions(+), 83 deletions(-) diff --git a/integtest/process_manager_test.py b/integtest/process_manager_test.py index fa03801c4..8572db54f 100644 --- a/integtest/process_manager_test.py +++ b/integtest/process_manager_test.py @@ -1,6 +1,7 @@ import getpass import os import re +from collections.abc import Callable from datetime import datetime import integrationtest.data_classes as data_classes @@ -84,12 +85,15 @@ logs --name unknown logs --name root-controller --how-far 5 logs --name mlt --how-far 5 +echo testing_logs_done echo test_wait wait 10 +echo test_wait_done echo pre_restart_mlt restart -n mlt +echo fixture_1 restart -n root-controller wait 5 echo post_restart_mlt @@ -101,14 +105,17 @@ wait 2 echo post_kill_mlt ps -u {getpass.getuser()} +echo kill_mlt_done +echo ps_recovery restart -n mlt restart -n trg-controller wait 5 echo ps_after_recovery ps -u {getpass.getuser()} +echo ps_recovery_done flush @@ -128,6 +135,58 @@ def strip_ansi(text: str) -> str: return ANSI_ESCAPE_RE.sub("", text) +def find_line_index( + lines: list[str], + predicate: Callable[[str], bool], + *, + start_idx: int = 0, +) -> int | None: + return next( + (idx for idx in range(start_idx, len(lines)) if predicate(lines[idx])), + None, + ) + + +def require_line_index( + lines: list[str], + predicate: Callable[[str], bool], + *, + error_message: str, + start_idx: int = 0, +) -> int: + line_idx = find_line_index(lines, predicate, start_idx=start_idx) + assert line_idx is not None, error_message + return line_idx + + +def require_line_containing( + lines: list[str], + text: str, + *, + error_message: str, + start_idx: int = 0, +) -> int: + return require_line_index( + lines, + lambda line: text in line, + error_message=error_message, + start_idx=start_idx, + ) + + +def require_echo_marker_index( + lines: list[str], echo_marker: str, *, start_idx: int = 0 +) -> int: + return require_line_index( + lines, + lambda line: "drunc.echo" in line and line.rstrip().endswith(echo_marker), + error_message=( + f"Could not find drunc.echo marker '{echo_marker}' in stdout." + ), + start_idx=start_idx, + ) + + def _parse_ps_table_from_index( lines: list[str], start_idx: int ) -> list[dict[str, str]]: @@ -164,25 +223,12 @@ def _parse_ps_table_from_index( def get_ps_table_after_echo(stdout: str, echo_marker: str) -> list[dict[str, str]]: lines = strip_ansi(stdout).splitlines() - echo_idx = next( - ( - idx - for idx, line in enumerate(lines) - if "drunc.echo" in line and line.rstrip().endswith(echo_marker) - ), - None, - ) - assert echo_idx is not None, ( - f"Could not find drunc.echo marker '{echo_marker}' in stdout." - ) + echo_idx = require_echo_marker_index(lines, echo_marker) - table_start_idx = next( - ( - idx - for idx in range(echo_idx + 1, len(lines)) - if "Processes running" in lines[idx] - ), - None, + table_start_idx = find_line_index( + lines, + lambda line: "Processes running" in line, + start_idx=echo_idx + 1, ) if table_start_idx is None: return [] @@ -246,20 +292,15 @@ def test_root_controller_logs(run_dunerc) -> None: lines = stdout.splitlines() # 1) Find the header/footer lines - header_idx = next( - (i for i, line in enumerate(lines) if "root-controller logs" in line), - None, - ) - footer_idx = next( - (i for i, line in enumerate(lines) if "root-controller end" in line), - None, + header_idx = require_line_containing( + lines, + "root-controller logs", + error_message="Did not find the 'root-controller logs' header line in stdout.", ) - - assert header_idx is not None, ( - "Did not find the 'root-controller logs' header line in stdout." - ) - assert footer_idx is not None, ( - "Did not find the 'root-controller end' footer line in stdout." + footer_idx = require_line_containing( + lines, + "root-controller end", + error_message="Did not find the 'root-controller end' footer line in stdout.", ) assert footer_idx > header_idx, "Footer appears before header in stdout." @@ -284,49 +325,34 @@ def test_root_controller_logs(run_dunerc) -> None: ) +#! This you need to take a look at more def test_wait_command_duration_from_logs(run_dunerc) -> None: """Checks that the wait command logs the expected duration and elapsed time.""" stdout = run_dunerc.completed_process.stdout lines = strip_ansi(stdout).splitlines() - echo_idx = next( - ( - idx - for idx, line in enumerate(lines) - if "drunc.echo" in line and line.rstrip().endswith("test_wait") - ), - None, - ) - assert echo_idx is not None, ( - "Could not find drunc.echo marker 'test_wait' in stdout." - ) + echo_idx = require_echo_marker_index(lines, "test_wait") running_pattern = re.compile(r"Command wait running for (\d+) seconds\.") ran_pattern = re.compile(r"Command wait ran for (\d+) seconds\.") timestamp_pattern = re.compile(r"^\[(\d{4}/\d{2}/\d{2} \d{2}:\d{2}:\d{2}) UTC\]") - running_idx = next( - ( - idx - for idx in range(echo_idx + 1, len(lines)) - if running_pattern.search(lines[idx]) + running_idx = require_line_index( + lines, + lambda line: running_pattern.search(line) is not None, + error_message=( + "Did not find 'Command wait running for ... seconds.' after test_wait marker." ), - None, - ) - assert running_idx is not None, ( - "Did not find 'Command wait running for ... seconds.' after test_wait marker." + start_idx=echo_idx + 1, ) - ran_idx = next( - ( - idx - for idx in range(running_idx + 1, len(lines)) - if ran_pattern.search(lines[idx]) + ran_idx = require_line_index( + lines, + lambda line: ran_pattern.search(line) is not None, + error_message=( + "Did not find 'Command wait ran for ... seconds.' after wait start log." ), - None, - ) - assert ran_idx is not None, ( - "Did not find 'Command wait ran for ... seconds.' after wait start log." + start_idx=running_idx + 1, ) running_match = running_pattern.search(lines[running_idx]) @@ -360,35 +386,16 @@ def test_wait_command_duration_from_logs(run_dunerc) -> None: ) +#! This you need to take a look at more def test_restart_mlt_logs(run_dunerc) -> None: """Checks that restarting mlt produces the expected restart, exit, and boot logs.""" stdout = run_dunerc.completed_process.stdout lines = strip_ansi(stdout).splitlines() - echo_idx = next( - ( - idx - for idx, line in enumerate(lines) - if "drunc.echo" in line and line.rstrip().endswith("pre_restart_mlt") - ), - None, - ) - assert echo_idx is not None, ( - "Could not find drunc.echo marker 'pre_restart_mlt' in stdout." - ) + echo_idx = require_echo_marker_index(lines, "pre_restart_mlt") - post_restart_idx = next( - ( - idx - for idx, line in enumerate(lines) - if idx > echo_idx - and "drunc.echo" in line - and line.rstrip().endswith("post_restart_mlt") - ), - None, - ) - assert post_restart_idx is not None, ( - "Could not find drunc.echo marker 'post_restart_mlt' in stdout." + post_restart_idx = require_echo_marker_index( + lines, "post_restart_mlt", start_idx=echo_idx + 1 ) restart_lines = lines[echo_idx + 1 : post_restart_idx] From a075cf68c902d4ce13d3fd06237d661d9c6fb485 Mon Sep 17 00:00:00 2001 From: Emir Muhammad Date: Mon, 16 Mar 2026 16:39:14 +0100 Subject: [PATCH 15/29] More cleanup with helper functions --- integtest/process_manager_test.py | 133 ++++++++++++++++++++---------- 1 file changed, 90 insertions(+), 43 deletions(-) diff --git a/integtest/process_manager_test.py b/integtest/process_manager_test.py index 8572db54f..688bee78c 100644 --- a/integtest/process_manager_test.py +++ b/integtest/process_manager_test.py @@ -180,11 +180,38 @@ def require_echo_marker_index( return require_line_index( lines, lambda line: "drunc.echo" in line and line.rstrip().endswith(echo_marker), - error_message=( - f"Could not find drunc.echo marker '{echo_marker}' in stdout." - ), + error_message=(f"Could not find drunc.echo marker '{echo_marker}' in stdout."), + start_idx=start_idx, + ) + + +def require_pattern_match_index( + lines: list[str], + pattern: re.Pattern[str], + *, + error_message: str, + start_idx: int = 0, +) -> tuple[int, re.Match[str]]: + line_idx = require_line_index( + lines, + lambda line: pattern.search(line) is not None, + error_message=error_message, start_idx=start_idx, ) + match = pattern.search(lines[line_idx]) + assert match is not None + return line_idx, match + + +def require_pattern_match( + text: str, + pattern: re.Pattern[str], + *, + error_message: str, +) -> re.Match[str]: + match = pattern.search(text) + assert match is not None, error_message + return match def _parse_ps_table_from_index( @@ -250,6 +277,47 @@ def get_uuid_for_friendly_name( ) +def get_rows_for_friendly_name( + ps_table: list[dict[str, str]], friendly_name: str +) -> list[dict[str, str]]: + return [row for row in ps_table if row["friendly_name"].strip() == friendly_name] + + +def assert_process_presence( + ps_table: list[dict[str, str]], + friendly_name: str, + *, + expected_present: bool, + context: str, +) -> None: + matching_rows = get_rows_for_friendly_name(ps_table, friendly_name) + + if expected_present: + assert matching_rows, ( + f"Expected to find '{friendly_name}' in ps table {context}, but it was missing." + ) + return + + assert not matching_rows, ( + f"Expected '{friendly_name}' to be absent from ps table {context}, but it is still present." + ) + + +def assert_process( + ps_table: list[dict[str, str]], + friendly_name: str, + *, + context: str, + expected_present=True, +) -> None: + assert_process_presence( + ps_table, + friendly_name, + expected_present=expected_present, + context=context, + ) + + def test_boot(run_dunerc) -> None: """Checks that boot starts the managed processes and exposes UUIDs in ps.""" stdout = run_dunerc.completed_process.stdout @@ -337,29 +405,24 @@ def test_wait_command_duration_from_logs(run_dunerc) -> None: ran_pattern = re.compile(r"Command wait ran for (\d+) seconds\.") timestamp_pattern = re.compile(r"^\[(\d{4}/\d{2}/\d{2} \d{2}:\d{2}:\d{2}) UTC\]") - running_idx = require_line_index( + running_idx, running_match = require_pattern_match_index( lines, - lambda line: running_pattern.search(line) is not None, + running_pattern, error_message=( "Did not find 'Command wait running for ... seconds.' after test_wait marker." ), start_idx=echo_idx + 1, ) - ran_idx = require_line_index( + ran_idx, ran_match = require_pattern_match_index( lines, - lambda line: ran_pattern.search(line) is not None, + ran_pattern, error_message=( "Did not find 'Command wait ran for ... seconds.' after wait start log." ), start_idx=running_idx + 1, ) - running_match = running_pattern.search(lines[running_idx]) - ran_match = ran_pattern.search(lines[ran_idx]) - assert running_match is not None - assert ran_match is not None - expected_seconds = 10 assert int(running_match.group(1)) == expected_seconds, ( f"Expected wait start log to report {expected_seconds} seconds, got {running_match.group(1)}." @@ -368,12 +431,16 @@ def test_wait_command_duration_from_logs(run_dunerc) -> None: f"Expected wait end log to report {expected_seconds} seconds, got {ran_match.group(1)}." ) - start_ts_match = timestamp_pattern.search(lines[running_idx]) - end_ts_match = timestamp_pattern.search(lines[ran_idx]) - assert start_ts_match is not None, ( - "Could not parse timestamp in wait start log line." + start_ts_match = require_pattern_match( + lines[running_idx], + timestamp_pattern, + error_message="Could not parse timestamp in wait start log line.", + ) + end_ts_match = require_pattern_match( + lines[ran_idx], + timestamp_pattern, + error_message="Could not parse timestamp in wait end log line.", ) - assert end_ts_match is not None, "Could not parse timestamp in wait end log line." start_ts = datetime.strptime(start_ts_match.group(1), "%Y/%m/%d %H:%M:%S") end_ts = datetime.strptime(end_ts_match.group(1), "%Y/%m/%d %H:%M:%S") @@ -401,12 +468,10 @@ def test_restart_mlt_logs(run_dunerc) -> None: restart_lines = lines[echo_idx + 1 : post_restart_idx] restart_text = "\n".join(restart_lines) - restart_request_match = re.search( - r"process_manager restarting \['mlt'\] in session", + restart_request_match = require_pattern_match( restart_text, - ) - assert restart_request_match is not None, ( - "Did not find the mlt restart request log line between restart markers." + re.compile(r"process_manager restarting \['mlt'\] in session"), + error_message="Did not find the mlt restart request log line between restart markers.", ) #! Reinsert this in the future, but this log-based thing is super janky @@ -454,33 +519,15 @@ def test_kill_removes_mlt_from_ps_table(run_dunerc) -> None: ps_before_kill = get_ps_table_after_echo(stdout, "pre_kill_mlt") ps_after_kill = get_ps_table_after_echo(stdout, "post_kill_mlt") - mlt_before_kill = [ - row for row in ps_before_kill if row["friendly_name"].strip() == "mlt" - ] - mlt_after_kill = [ - row for row in ps_after_kill if row["friendly_name"].strip() == "mlt" - ] - - assert mlt_before_kill, ( - "Expected to find 'mlt' in ps table before kill, but it was missing." - ) - assert not mlt_after_kill, ( - "Expected 'mlt' to be absent from ps table after kill, but it is still present." - ) + assert_process(ps_before_kill, "mlt", context="before kill") + assert_process(ps_after_kill, "mlt", context="after kill", expected_present=False) def test_mlt_recovers_after_kill(run_dunerc) -> None: """Checks that mlt is present again after the recovery restart sequence.""" stdout = run_dunerc.completed_process.stdout - ps_after_recovery = get_ps_table_after_echo(stdout, "ps_after_recovery") - - mlt_after_recovery = [ - row for row in ps_after_recovery if row["friendly_name"].strip() == "mlt" - ] - assert mlt_after_recovery, ( - "Expected 'mlt' to be present in ps table after recovery, but it was missing." - ) + assert_process(ps_after_recovery, "mlt", context="after recovery") def test_nanorc_success(run_dunerc): From 1e73641fdc397444a2da5fef216c735c03556efd Mon Sep 17 00:00:00 2001 From: Emir Muhammad Date: Mon, 16 Mar 2026 16:48:57 +0100 Subject: [PATCH 16/29] move helper functions to general integ test utils --- integtest/integ_test_utils.py | 246 ++++++++++++++++++++++++++++++ integtest/process_manager_test.py | 197 ++---------------------- 2 files changed, 255 insertions(+), 188 deletions(-) create mode 100644 integtest/integ_test_utils.py diff --git a/integtest/integ_test_utils.py b/integtest/integ_test_utils.py new file mode 100644 index 000000000..f88baeb52 --- /dev/null +++ b/integtest/integ_test_utils.py @@ -0,0 +1,246 @@ +"""Shared helpers for drunc integration tests. + +This module centralizes commoon patterns used by process-manager integration tests. +Importantly, most of these are defined to help with processing the stdout log outputs +of the integ tests. + +Common functions include: +- searching ordered log output for marker lines, +- requiring regex/string matches with informative assertion errors, +- extracting process-table rows from `ps` command output, +- asserting process presence/absence by friendly name. + +The helpers are intentionally lightweight and pytest-friendly: failures are +reported through `assert` with context-rich messages. +""" + +import re +from collections.abc import Callable + +ANSI_ESCAPE_RE = re.compile(r"\x1B\[[0-9;]*[A-Za-z]") + + +def strip_ansi(text: str) -> str: + """Remove ANSI escape codes from a text block.""" + return ANSI_ESCAPE_RE.sub("", text) + + +def find_line_index( + lines: list[str], + predicate: Callable[[str], bool], + *, + start_idx: int = 0, +) -> int | None: + """Return the first line index at or after `start_idx` matching `predicate`. + + Returns `None` when no line matches. + """ + return next( + (idx for idx in range(start_idx, len(lines)) if predicate(lines[idx])), + None, + ) + + +def require_line_index( + lines: list[str], + predicate: Callable[[str], bool], + *, + error_message: str, + start_idx: int = 0, +) -> int: + """Like `find_line_index`, but assert a match exists and return its index.""" + line_idx = find_line_index(lines, predicate, start_idx=start_idx) + assert line_idx is not None, error_message + return line_idx + + +def require_line_containing( + lines: list[str], + text: str, + *, + error_message: str, + start_idx: int = 0, +) -> int: + """Assert and return index of the first line containing `text`.""" + return require_line_index( + lines, + lambda line: text in line, + error_message=error_message, + start_idx=start_idx, + ) + + +def require_echo_marker_index( + lines: list[str], echo_marker: str, *, start_idx: int = 0 +) -> int: + """Assert and return index of a `drunc.echo` line ending with `echo_marker`. + This is hardcoded since echo is a specific callable function with its own logger. + """ + return require_line_index( + lines, + lambda line: "drunc.echo" in line and line.rstrip().endswith(echo_marker), + error_message=(f"Could not find drunc.echo marker '{echo_marker}' in stdout."), + start_idx=start_idx, + ) + + +def require_pattern_match_index( + lines: list[str], + pattern: re.Pattern[str], + *, + error_message: str, + start_idx: int = 0, +) -> tuple[int, re.Match[str]]: + """Assert and return `(index, match)` for first line matching `pattern`.""" + line_idx = require_line_index( + lines, + lambda line: pattern.search(line) is not None, + error_message=error_message, + start_idx=start_idx, + ) + match = pattern.search(lines[line_idx]) + assert match is not None + return line_idx, match + + +def require_pattern_match( + text: str, + pattern: re.Pattern[str], + *, + error_message: str, +) -> re.Match[str]: + """Assert `pattern` matches `text` and return the `re.Match` object.""" + match = pattern.search(text) + assert match is not None, error_message + return match + + +def _parse_ps_table_from_index( + lines: list[str], start_idx: int +) -> list[dict[str, str]]: + """Parse a Unicode table of processes starting after `start_idx`. + + The parser expects rows that start with `│` and stops at a line starting + with `└`. It returns dictionaries with normalized column names. + """ + table_rows: list[dict[str, str]] = [] + + for line in lines[start_idx + 1 :]: + stripped = line.strip() + + if stripped.startswith("└"): + break + + if not stripped.startswith("│"): + continue + + cells = [cell.strip() for cell in stripped.strip("│").split("│")] + if len(cells) < 7: + continue + + table_rows.append( + { + "session": cells[0], + "friendly_name": cells[1], + "user": cells[2], + "host": cells[3], + "uuid": cells[4], + "alive": cells[5], + "exit_code": cells[6], + } + ) + + return table_rows + + +def get_ps_table_after_echo(stdout: str, echo_marker: str) -> list[dict[str, str]]: + """Return parsed process-table rows found after a specific echo marker. + + If no process table is found after the marker, returns an empty list. + """ + lines = strip_ansi(stdout).splitlines() + + echo_idx = require_echo_marker_index(lines, echo_marker) + + table_start_idx = find_line_index( + lines, + lambda line: "Processes running" in line, + start_idx=echo_idx + 1, + ) + if table_start_idx is None: + return [] + + return _parse_ps_table_from_index(lines, table_start_idx) + + +def get_uuid_for_friendly_name( + ps_table: list[dict[str, str]], friendly_name: str +) -> str: + """Return UUID for `friendly_name` from a parsed process table. + + Raises: + AssertionError: if the friendly name is absent. + """ + for row in ps_table: + if row["friendly_name"].strip() == friendly_name: + return row["uuid"] + + available_names = ", ".join(row["friendly_name"].strip() for row in ps_table) + raise AssertionError( + f"Could not find friendly name '{friendly_name}' in ps table. " + f"Available names: {available_names}" + ) + + +def get_rows_for_friendly_name( + ps_table: list[dict[str, str]], friendly_name: str +) -> list[dict[str, str]]: + """Return all rows whose `friendly_name` matches exactly after stripping.""" + return [row for row in ps_table if row["friendly_name"].strip() == friendly_name] + + +def assert_process_presence( + ps_table: list[dict[str, str]], + friendly_name: str, + *, + expected_present: bool, + context: str, +) -> None: + """Assert whether a process is present/absent in a process table. + + Args: + ps_table: Parsed process rows. + friendly_name: Process name to check. + expected_present: `True` if process should exist, `False` otherwise. + context: Short phrase appended to error text (e.g. "before kill"). + """ + matching_rows = get_rows_for_friendly_name(ps_table, friendly_name) + + if expected_present: + assert matching_rows, ( + f"Expected to find '{friendly_name}' in ps table {context}, but it was missing." + ) + return + + assert not matching_rows, ( + f"Expected '{friendly_name}' to be absent from ps table {context}, but it is still present." + ) + + +def assert_process( + ps_table: list[dict[str, str]], + friendly_name: str, + *, + context: str, + expected_present: bool = True, +) -> None: + """Convenience wrapper around `assert_process_presence`. + + By default, asserts that the process is present. + """ + assert_process_presence( + ps_table, + friendly_name, + expected_present=expected_present, + context=context, + ) \ No newline at end of file diff --git a/integtest/process_manager_test.py b/integtest/process_manager_test.py index 688bee78c..4d8dc73ec 100644 --- a/integtest/process_manager_test.py +++ b/integtest/process_manager_test.py @@ -1,11 +1,19 @@ import getpass import os import re -from collections.abc import Callable from datetime import datetime import integrationtest.data_classes as data_classes import integrationtest.log_file_checks as log_file_checks +from integ_test_utils import ( + assert_process, + get_ps_table_after_echo, + require_echo_marker_index, + require_line_containing, + require_pattern_match, + require_pattern_match_index, + strip_ansi, +) pytest_plugins = "integrationtest.integrationtest_drunc" @@ -128,194 +136,7 @@ r"^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$" r"|^[0-9a-fA-F]{8}-[-0-9a-fA-F]*\u2026" # truncated by Rich table column width ) -ANSI_ESCAPE_RE = re.compile(r"\x1B\[[0-9;]*[A-Za-z]") - - -def strip_ansi(text: str) -> str: - return ANSI_ESCAPE_RE.sub("", text) - - -def find_line_index( - lines: list[str], - predicate: Callable[[str], bool], - *, - start_idx: int = 0, -) -> int | None: - return next( - (idx for idx in range(start_idx, len(lines)) if predicate(lines[idx])), - None, - ) - - -def require_line_index( - lines: list[str], - predicate: Callable[[str], bool], - *, - error_message: str, - start_idx: int = 0, -) -> int: - line_idx = find_line_index(lines, predicate, start_idx=start_idx) - assert line_idx is not None, error_message - return line_idx - - -def require_line_containing( - lines: list[str], - text: str, - *, - error_message: str, - start_idx: int = 0, -) -> int: - return require_line_index( - lines, - lambda line: text in line, - error_message=error_message, - start_idx=start_idx, - ) - -def require_echo_marker_index( - lines: list[str], echo_marker: str, *, start_idx: int = 0 -) -> int: - return require_line_index( - lines, - lambda line: "drunc.echo" in line and line.rstrip().endswith(echo_marker), - error_message=(f"Could not find drunc.echo marker '{echo_marker}' in stdout."), - start_idx=start_idx, - ) - - -def require_pattern_match_index( - lines: list[str], - pattern: re.Pattern[str], - *, - error_message: str, - start_idx: int = 0, -) -> tuple[int, re.Match[str]]: - line_idx = require_line_index( - lines, - lambda line: pattern.search(line) is not None, - error_message=error_message, - start_idx=start_idx, - ) - match = pattern.search(lines[line_idx]) - assert match is not None - return line_idx, match - - -def require_pattern_match( - text: str, - pattern: re.Pattern[str], - *, - error_message: str, -) -> re.Match[str]: - match = pattern.search(text) - assert match is not None, error_message - return match - - -def _parse_ps_table_from_index( - lines: list[str], start_idx: int -) -> list[dict[str, str]]: - table_rows: list[dict[str, str]] = [] - - for line in lines[start_idx + 1 :]: - stripped = line.strip() - - if stripped.startswith("└"): - break - - if not stripped.startswith("│"): - continue - - cells = [cell.strip() for cell in stripped.strip("│").split("│")] - if len(cells) < 7: - continue - - table_rows.append( - { - "session": cells[0], - "friendly_name": cells[1], - "user": cells[2], - "host": cells[3], - "uuid": cells[4], - "alive": cells[5], - "exit_code": cells[6], - } - ) - - return table_rows - - -def get_ps_table_after_echo(stdout: str, echo_marker: str) -> list[dict[str, str]]: - lines = strip_ansi(stdout).splitlines() - - echo_idx = require_echo_marker_index(lines, echo_marker) - - table_start_idx = find_line_index( - lines, - lambda line: "Processes running" in line, - start_idx=echo_idx + 1, - ) - if table_start_idx is None: - return [] - - return _parse_ps_table_from_index(lines, table_start_idx) - - -def get_uuid_for_friendly_name( - ps_table: list[dict[str, str]], friendly_name: str -) -> str: - for row in ps_table: - if row["friendly_name"].strip() == friendly_name: - return row["uuid"] - - available_names = ", ".join(row["friendly_name"].strip() for row in ps_table) - raise AssertionError( - f"Could not find friendly name '{friendly_name}' in ps table. " - f"Available names: {available_names}" - ) - - -def get_rows_for_friendly_name( - ps_table: list[dict[str, str]], friendly_name: str -) -> list[dict[str, str]]: - return [row for row in ps_table if row["friendly_name"].strip() == friendly_name] - - -def assert_process_presence( - ps_table: list[dict[str, str]], - friendly_name: str, - *, - expected_present: bool, - context: str, -) -> None: - matching_rows = get_rows_for_friendly_name(ps_table, friendly_name) - - if expected_present: - assert matching_rows, ( - f"Expected to find '{friendly_name}' in ps table {context}, but it was missing." - ) - return - - assert not matching_rows, ( - f"Expected '{friendly_name}' to be absent from ps table {context}, but it is still present." - ) - - -def assert_process( - ps_table: list[dict[str, str]], - friendly_name: str, - *, - context: str, - expected_present=True, -) -> None: - assert_process_presence( - ps_table, - friendly_name, - expected_present=expected_present, - context=context, - ) def test_boot(run_dunerc) -> None: From 30aa804431455d632707be7bc4b4ea23861aa762 Mon Sep 17 00:00:00 2001 From: Emir Muhammad Date: Mon, 16 Mar 2026 18:15:54 +0100 Subject: [PATCH 17/29] Fix mlt logs and minor cleanup --- integtest/process_manager_test.py | 176 +++++++++++++----------------- 1 file changed, 76 insertions(+), 100 deletions(-) diff --git a/integtest/process_manager_test.py b/integtest/process_manager_test.py index 4d8dc73ec..b486ec6c3 100644 --- a/integtest/process_manager_test.py +++ b/integtest/process_manager_test.py @@ -76,7 +76,7 @@ confgen_arguments = {"MinimalSystem": conf_dict} -# The commands to run in nanorc, as a list +# The commands to run in dunerc # NOTE THAT WE HAVE NOT TESTED FLUSH BECAUSE IT IS BROKEN # see #821 @@ -120,7 +120,6 @@ restart -n mlt restart -n trg-controller wait 5 - echo ps_after_recovery ps -u {getpass.getuser()} echo ps_recovery_done @@ -138,6 +137,52 @@ ) +def test_nanorc_success(run_dunerc) -> None: + """Checks that the drunc integration command sequence completes successfully.""" + # print the name of the current test + current_test = os.environ.get("PYTEST_CURRENT_TEST") + match_obj = re.search(r".*\[(.+)-run_.*rc.*\d].*", current_test) + if match_obj: + current_test = match_obj.group(1) + banner_line = re.sub(".", "=", current_test) + print(banner_line) + print(current_test) + print(banner_line) + + # Check that nanorc completed correctly + assert run_dunerc.completed_process.returncode == 0 + + +def test_log_files(run_dunerc) -> None: + """Checks that expected process-manager log files exist and are free of errors.""" + # Check that at least some of the expected log files are present + assert any( + f"{run_dunerc.session}_df-01" in str(logname) + for logname in run_dunerc.log_files + ) + assert any( + f"{run_dunerc.session}_dfo" in str(logname) for logname in run_dunerc.log_files + ) + assert any( + f"{run_dunerc.session}_mlt" in str(logname) for logname in run_dunerc.log_files + ) + assert any( + f"{run_dunerc.session}_ru" in str(logname) for logname in run_dunerc.log_files + ) + + if check_for_logfile_errors: + # Check that there are no warnings or errors in the log files + assert log_file_checks.logs_are_error_free( + [ + logname + for logname in run_dunerc.log_files + if "process_manager" in str(logname) + ], + True, + True, + ignored_logfile_problems, + ) + def test_boot(run_dunerc) -> None: """Checks that boot starts the managed processes and exposes UUIDs in ps.""" @@ -160,7 +205,7 @@ def test_boot(run_dunerc) -> None: ) -def test_log_command(run_dunerc) -> None: +def test_unknown_log_command(run_dunerc) -> None: """Checks that querying logs for an unknown process reports the expected error.""" test_str = ( "Bad query for logs: The process corresponding to the query doesn't exist" @@ -175,10 +220,7 @@ def test_root_controller_logs(run_dunerc) -> None: - there are exactly 5 lines between those two lines - among those 5 lines, the one from "drunc.controller.core.init_controller" ends with "Controller ready" """ - stdout = run_dunerc.completed_process.stdout - assert isinstance(stdout, str) - - lines = stdout.splitlines() + lines = run_dunerc.completed_process.stdout.splitlines() # 1) Find the header/footer lines header_idx = require_line_containing( @@ -200,7 +242,7 @@ def test_root_controller_logs(run_dunerc) -> None: + "\n".join(between) ) - # 3) Check the init_controller line ends with "Controller ready" + # 3) Check one of the init_controller line ends with "Controller ready" # Example line: # [2026/03/13 08:17:47 UTC] INFO ... drunc.controller.core.init_controller ... Controller ready init_controller_ready_re = re.compile( @@ -214,11 +256,9 @@ def test_root_controller_logs(run_dunerc) -> None: ) -#! This you need to take a look at more def test_wait_command_duration_from_logs(run_dunerc) -> None: """Checks that the wait command logs the expected duration and elapsed time.""" - stdout = run_dunerc.completed_process.stdout - lines = strip_ansi(stdout).splitlines() + lines = strip_ansi(run_dunerc.completed_process.stdout).splitlines() echo_idx = require_echo_marker_index(lines, "test_wait") @@ -263,8 +303,9 @@ def test_wait_command_duration_from_logs(run_dunerc) -> None: error_message="Could not parse timestamp in wait end log line.", ) - start_ts = datetime.strptime(start_ts_match.group(1), "%Y/%m/%d %H:%M:%S") - end_ts = datetime.strptime(end_ts_match.group(1), "%Y/%m/%d %H:%M:%S") + ts_strp_pattern = "%Y/%m/%d %H:%M:%S" + start_ts = datetime.strptime(start_ts_match.group(1), ts_strp_pattern) + end_ts = datetime.strptime(end_ts_match.group(1), ts_strp_pattern) elapsed_seconds = (end_ts - start_ts).total_seconds() tolerance_seconds = 1 @@ -274,7 +315,6 @@ def test_wait_command_duration_from_logs(run_dunerc) -> None: ) -#! This you need to take a look at more def test_restart_mlt_logs(run_dunerc) -> None: """Checks that restarting mlt produces the expected restart, exit, and boot logs.""" stdout = run_dunerc.completed_process.stdout @@ -289,48 +329,31 @@ def test_restart_mlt_logs(run_dunerc) -> None: restart_lines = lines[echo_idx + 1 : post_restart_idx] restart_text = "\n".join(restart_lines) - restart_request_match = require_pattern_match( + require_pattern_match( + restart_text, + re.compile( + r"Remote process .*?terminated gracefully following SIGQUIT signal\.", + re.DOTALL, + ), + error_message="Did not find the graceful termination log line for mlt after restart request.", + ) + + require_pattern_match( restart_text, - re.compile(r"process_manager restarting \['mlt'\] in session"), - error_message="Did not find the mlt restart request log line between restart markers.", + re.compile(r"Process 'mlt'.*?process exited\s+with exit code 0", re.DOTALL), + error_message="Did not find the mlt exit-code log line after graceful termination.", ) - #! Reinsert this in the future, but this log-based thing is super janky - # graceful_termination_match = re.search( - # r"Remote process .*?terminated gracefully following SIGQUIT signal\.", - # restart_text[restart_request_match.end() :], - # re.DOTALL, - # ) - # assert graceful_termination_match is not None, ( - # "Did not find the graceful termination log line for mlt after restart request." - # ) - - # exit_code_search_text = restart_text[ - # restart_request_match.end() + graceful_termination_match.end() : - # ] - # exit_code_match = re.search( - # r"Process 'mlt'.*?process exited\s+with exit code 0", - # exit_code_search_text, - # re.DOTALL, - # ) - # assert exit_code_match is not None, ( - # "Did not find the mlt exit-code log line after graceful termination." - # ) - - # booted_search_text = exit_code_search_text[exit_code_match.end() :] - # booted_match = re.search( - # r"Booted 'mlt'.*?with UUID\s+([^\s\n]+)", - # booted_search_text, - # re.DOTALL, - # ) - # assert booted_match is not None, ( - # "Did not find the mlt boot log line after the restart exit log." - # ) - - # booted_uuid = booted_match.group(1) - # assert UUID_RE.match(booted_uuid), ( - # f"Expected the mlt boot log to contain a UUID, got: {booted_uuid}" - # ) + booted_match = require_pattern_match( + restart_text, + re.compile(r"Booted 'mlt'.*?with UUID\s+([^\s\n]+)", re.DOTALL), + error_message="Did not find the mlt boot log line after the restart exit log.", + ) + + booted_uuid = booted_match.group(1) + assert UUID_RE.match(booted_uuid), ( + f"Expected the mlt boot log to contain a UUID, got: {booted_uuid}" + ) def test_kill_removes_mlt_from_ps_table(run_dunerc) -> None: @@ -349,50 +372,3 @@ def test_mlt_recovers_after_kill(run_dunerc) -> None: stdout = run_dunerc.completed_process.stdout ps_after_recovery = get_ps_table_after_echo(stdout, "ps_after_recovery") assert_process(ps_after_recovery, "mlt", context="after recovery") - - -def test_nanorc_success(run_dunerc): - """Checks that the drunc integration command sequence completes successfully.""" - # print the name of the current test - current_test = os.environ.get("PYTEST_CURRENT_TEST") - match_obj = re.search(r".*\[(.+)-run_.*rc.*\d].*", current_test) - if match_obj: - current_test = match_obj.group(1) - banner_line = re.sub(".", "=", current_test) - print(banner_line) - print(current_test) - print(banner_line) - - # Check that nanorc completed correctly - assert run_dunerc.completed_process.returncode == 0 - - -def test_log_files(run_dunerc): - """Checks that expected process-manager log files exist and are free of errors.""" - # Check that at least some of the expected log files are present - assert any( - f"{run_dunerc.session}_df-01" in str(logname) - for logname in run_dunerc.log_files - ) - assert any( - f"{run_dunerc.session}_dfo" in str(logname) for logname in run_dunerc.log_files - ) - assert any( - f"{run_dunerc.session}_mlt" in str(logname) for logname in run_dunerc.log_files - ) - assert any( - f"{run_dunerc.session}_ru" in str(logname) for logname in run_dunerc.log_files - ) - - if check_for_logfile_errors: - # Check that there are no warnings or errors in the log files - assert log_file_checks.logs_are_error_free( - [ - logname - for logname in run_dunerc.log_files - if "process_manager" in str(logname) - ], - True, - True, - ignored_logfile_problems, - ) From 999b99977b7988940aedd063ddeeea39734276b6 Mon Sep 17 00:00:00 2001 From: Emir Muhammad Date: Tue, 17 Mar 2026 11:42:53 +0100 Subject: [PATCH 18/29] Rename testing variables for clarity --- integtest/process_manager_test.py | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/integtest/process_manager_test.py b/integtest/process_manager_test.py index b486ec6c3..f4a3fbf98 100644 --- a/integtest/process_manager_test.py +++ b/integtest/process_manager_test.py @@ -85,15 +85,15 @@ echo pre_boot ps -u {getpass.getuser()} boot -echo on_boot +echo post_boot ps -u {getpass.getuser()} -echo testing_logs +echo test_logs logs --name unknown logs --name root-controller --how-far 5 logs --name mlt --how-far 5 -echo testing_logs_done +echo test_logs_done echo test_wait wait 10 @@ -101,28 +101,27 @@ echo pre_restart_mlt restart -n mlt -echo fixture_1 restart -n root-controller wait 5 echo post_restart_mlt -echo pre_kill_mlt +echo test_kill_mlt ps -u {getpass.getuser()} kill -n mlt wait 2 -echo post_kill_mlt +echo test_kill_mlt_post ps -u {getpass.getuser()} -echo kill_mlt_done +echo test_kill_mlt_done -echo ps_recovery +echo test_recovery restart -n mlt restart -n trg-controller wait 5 -echo ps_after_recovery +echo test_recovery_post ps -u {getpass.getuser()} -echo ps_recovery_done +echo test_recovery_done flush @@ -189,17 +188,17 @@ def test_boot(run_dunerc) -> None: stdout = run_dunerc.completed_process.stdout ps_pre_boot = get_ps_table_after_echo(stdout, "pre_boot") - ps_on_boot = get_ps_table_after_echo(stdout, "on_boot") + ps_post_boot = get_ps_table_after_echo(stdout, "post_boot") assert not ps_pre_boot, ( f"Expected ps table before boot to be empty, but found {len(ps_pre_boot)} row(s): " + ", ".join(row["friendly_name"] for row in ps_pre_boot) ) - assert ps_on_boot, ( + assert ps_post_boot, ( "Expected ps table after boot to contain processes, but it was empty." ) - for row in ps_on_boot: + for row in ps_post_boot: assert UUID_RE.match(row["uuid"]), ( f"Expected a valid UUID for process '{row['friendly_name']}', got '{row['uuid']}'" ) @@ -360,8 +359,8 @@ def test_kill_removes_mlt_from_ps_table(run_dunerc) -> None: """Checks that killing mlt removes it from the subsequent ps table.""" stdout = run_dunerc.completed_process.stdout - ps_before_kill = get_ps_table_after_echo(stdout, "pre_kill_mlt") - ps_after_kill = get_ps_table_after_echo(stdout, "post_kill_mlt") + ps_before_kill = get_ps_table_after_echo(stdout, "test_kill_mlt") + ps_after_kill = get_ps_table_after_echo(stdout, "test_kill_mlt_post") assert_process(ps_before_kill, "mlt", context="before kill") assert_process(ps_after_kill, "mlt", context="after kill", expected_present=False) @@ -370,5 +369,5 @@ def test_kill_removes_mlt_from_ps_table(run_dunerc) -> None: def test_mlt_recovers_after_kill(run_dunerc) -> None: """Checks that mlt is present again after the recovery restart sequence.""" stdout = run_dunerc.completed_process.stdout - ps_after_recovery = get_ps_table_after_echo(stdout, "ps_after_recovery") + ps_after_recovery = get_ps_table_after_echo(stdout, "test_recovery_post") assert_process(ps_after_recovery, "mlt", context="after recovery") From da7e4ec31af0ca20fa794c3eade701e80dc94bad Mon Sep 17 00:00:00 2001 From: Emir Muhammad Date: Tue, 17 Mar 2026 12:21:07 +0100 Subject: [PATCH 19/29] add examples to docstrings --- integtest/integ_test_utils.py | 142 ++++++++++++++++++++++++------ integtest/process_manager_test.py | 10 ++- 2 files changed, 122 insertions(+), 30 deletions(-) diff --git a/integtest/integ_test_utils.py b/integtest/integ_test_utils.py index f88baeb52..bebc870a9 100644 --- a/integtest/integ_test_utils.py +++ b/integtest/integ_test_utils.py @@ -1,8 +1,8 @@ """Shared helpers for drunc integration tests. -This module centralizes commoon patterns used by process-manager integration tests. +This module centralizes commoon patterns used by process-manager integration tests. Importantly, most of these are defined to help with processing the stdout log outputs -of the integ tests. +of the integ tests. Common functions include: - searching ordered log output for marker lines, @@ -34,6 +34,17 @@ def find_line_index( """Return the first line index at or after `start_idx` matching `predicate`. Returns `None` when no line matches. + + Example: + >>> lines = [ + ... "[2026/03/17 10:48:10 UTC] INFO drunc.controller.iface Command wait running for 5 seconds.", + ... "[2026/03/17 10:48:15 UTC] INFO drunc.controller.iface Command wait ran for 5 seconds.", + ... "[2026/03/17 10:48:15 UTC] INFO drunc.echo test_recovery_post", + ... ] + >>> find_line_index(lines, lambda line: "Command wait ran" in line) + 1 + >>> find_line_index(lines, lambda line: "test_wait_done" in line) is None + True """ return next( (idx for idx in range(start_idx, len(lines)) if predicate(lines[idx])), @@ -48,7 +59,20 @@ def require_line_index( error_message: str, start_idx: int = 0, ) -> int: - """Like `find_line_index`, but assert a match exists and return its index.""" + """Like `find_line_index`, but assert a match exists and return its index. + + Example: + >>> lines = [ + ... "[2026/03/17 10:47:38 UTC] INFO drunc.echo test_wait", + ... "[2026/03/17 10:47:48 UTC] INFO drunc.echo test_wait_done", + ... ] + >>> require_line_index( + ... lines, + ... lambda line: "test_wait_done" in line, + ... error_message="Could not find wait completion marker", + ... ) + 1 + """ line_idx = find_line_index(lines, predicate, start_idx=start_idx) assert line_idx is not None, error_message return line_idx @@ -61,7 +85,21 @@ def require_line_containing( error_message: str, start_idx: int = 0, ) -> int: - """Assert and return index of the first line containing `text`.""" + """Assert and return index of the first line containing `text`. + + Example: + [2026/03/17] WARNING drunc.process_manager_driver Bad query for logs + ────────────────────────────── root-controller logs ────────────────────────────── + [2026/03/17] INFO drunc.init_controller Taking control of trg-controller + + header_idx = require_line_containing( + lines, + "root-controller logs", + error_message="Did not find the 'root-controller logs' header line in stdout.", + ) + + + """ return require_line_index( lines, lambda line: text in line, @@ -75,6 +113,14 @@ def require_echo_marker_index( ) -> int: """Assert and return index of a `drunc.echo` line ending with `echo_marker`. This is hardcoded since echo is a specific callable function with its own logger. + + Example: + >>> lines = [ + ... "[2026/03/17 10:48:15 UTC] INFO drunc.echo test_recovery_post", + ... "Processes running", + ... ] + >>> require_echo_marker_index(lines, "test_recovery_post") + 0 """ return require_line_index( lines, @@ -91,7 +137,22 @@ def require_pattern_match_index( error_message: str, start_idx: int = 0, ) -> tuple[int, re.Match[str]]: - """Assert and return `(index, match)` for first line matching `pattern`.""" + """Assert and return `(index, match)` for first line matching `pattern`. + + Example: + >>> lines = [ + ... "[2026/03/17] INFO drunc.iface Command wait running for 10 seconds.", + ... "[2026/03/17] INFO drunc.iface Command wait ran for 10 seconds.", + ... ] + >>> pattern = re.compile(r"Command wait ran for (\\d+) seconds\\.") + >>> line_idx, match = require_pattern_match_index( + ... lines, + ... pattern, + ... error_message="Did not find wait completion log line.", + ... ) + >>> (line_idx, match.group(1)) + (1, '10') + """ line_idx = require_line_index( lines, lambda line: pattern.search(line) is not None, @@ -109,7 +170,19 @@ def require_pattern_match( *, error_message: str, ) -> re.Match[str]: - """Assert `pattern` matches `text` and return the `re.Match` object.""" + """Assert `pattern` matches `text` and return the `re.Match` object. + + Example: + >>> line = "[2026/03/17] INFO Command wait ran for 10 seconds." + >>> pattern = re.compile(r"Command wait ran for (\\d+) seconds\\.") + >>> match = require_pattern_match( + ... line, + ... pattern, + ... error_message="Did not find wait completion log line.", + ... ) + >>> match.group(1) + '10' + """ match = pattern.search(text) assert match is not None, error_message return match @@ -157,6 +230,17 @@ def get_ps_table_after_echo(stdout: str, echo_marker: str) -> list[dict[str, str """Return parsed process-table rows found after a specific echo marker. If no process table is found after the marker, returns an empty list. + + Example: + >>> stdout = ( + ... "[2026/03/17 10:48:15 UTC] INFO drunc.echo test_recovery_post\n" + ... "Processes running\n" + ... "│ minimal │ root-controller │ emmuhamm │ localhost │ f201f9c7-b910-4100-bd78-11765a4d2ee1 │ True │ 0 │\n" + ... "└" + ... ) + >>> table = get_ps_table_after_echo(stdout, "test_recovery_post") + >>> table[0]["friendly_name"] + 'root-controller' """ lines = strip_ansi(stdout).splitlines() @@ -203,8 +287,8 @@ def assert_process_presence( ps_table: list[dict[str, str]], friendly_name: str, *, - expected_present: bool, context: str, + expected_present: bool = True, ) -> None: """Assert whether a process is present/absent in a process table. @@ -213,6 +297,31 @@ def assert_process_presence( friendly_name: Process name to check. expected_present: `True` if process should exist, `False` otherwise. context: Short phrase appended to error text (e.g. "before kill"). + + Example: + >>> ps_table = [ + ... { + ... "session": "minimal", + ... "friendly_name": "root-controller", + ... "user": "daq", + ... "host": "localhost", + ... "uuid": "f201f9c7-b910-4100-bd78-11765a4d2ee1", + ... "alive": "True", + ... "exit_code": "0", + ... } + ... ] + >>> assert_process_presence( + ... ps_table, + ... "root-controller", + ... context="before restart", + ... expected_present=True, + ... ) + >>> assert_process_presence( + ... ps_table, + ... "mlt", + ... context="after restart", + ... expected_present=False, + ... ) """ matching_rows = get_rows_for_friendly_name(ps_table, friendly_name) @@ -225,22 +334,3 @@ def assert_process_presence( assert not matching_rows, ( f"Expected '{friendly_name}' to be absent from ps table {context}, but it is still present." ) - - -def assert_process( - ps_table: list[dict[str, str]], - friendly_name: str, - *, - context: str, - expected_present: bool = True, -) -> None: - """Convenience wrapper around `assert_process_presence`. - - By default, asserts that the process is present. - """ - assert_process_presence( - ps_table, - friendly_name, - expected_present=expected_present, - context=context, - ) \ No newline at end of file diff --git a/integtest/process_manager_test.py b/integtest/process_manager_test.py index f4a3fbf98..62fe20e79 100644 --- a/integtest/process_manager_test.py +++ b/integtest/process_manager_test.py @@ -6,7 +6,7 @@ import integrationtest.data_classes as data_classes import integrationtest.log_file_checks as log_file_checks from integ_test_utils import ( - assert_process, + assert_process_presence, get_ps_table_after_echo, require_echo_marker_index, require_line_containing, @@ -362,12 +362,14 @@ def test_kill_removes_mlt_from_ps_table(run_dunerc) -> None: ps_before_kill = get_ps_table_after_echo(stdout, "test_kill_mlt") ps_after_kill = get_ps_table_after_echo(stdout, "test_kill_mlt_post") - assert_process(ps_before_kill, "mlt", context="before kill") - assert_process(ps_after_kill, "mlt", context="after kill", expected_present=False) + assert_process_presence(ps_before_kill, "mlt", context="before kill") + assert_process_presence( + ps_after_kill, "mlt", context="after kill", expected_present=False + ) def test_mlt_recovers_after_kill(run_dunerc) -> None: """Checks that mlt is present again after the recovery restart sequence.""" stdout = run_dunerc.completed_process.stdout ps_after_recovery = get_ps_table_after_echo(stdout, "test_recovery_post") - assert_process(ps_after_recovery, "mlt", context="after recovery") + assert_process_presence(ps_after_recovery, "mlt", context="after recovery") From 2b2e878ac4c272fd48d6e42effb020374af90591 Mon Sep 17 00:00:00 2001 From: Emir Muhammad Date: Wed, 18 Mar 2026 14:45:41 +0100 Subject: [PATCH 20/29] Add width for tables --- .../process_manager/interface/commands.py | 20 ++++++++++++++++--- src/drunc/process_manager/utils.py | 4 ++-- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/src/drunc/process_manager/interface/commands.py b/src/drunc/process_manager/interface/commands.py index 2937515ca..a1287afdc 100644 --- a/src/drunc/process_manager/interface/commands.py +++ b/src/drunc/process_manager/interface/commands.py @@ -260,8 +260,20 @@ def restart(obj: ProcessManagerContext, query: ProcessQuery) -> None: default=False, help="Whether to have a long output", ) +@click.option( + "-w", + "--width", + type=int, + default=None, + help="Table width. Default is automatically calculated", +) @click.pass_obj -def ps(obj: ProcessManagerContext, query: ProcessQuery, long_format: bool) -> None: +def ps( + obj: ProcessManagerContext, + query: ProcessQuery, + long_format: bool, + width: int | None, +) -> None: log = get_logger("process_manager.shell") log.debug(f"Running ps with query {query}") results = obj.get_driver("process_manager").ps(query) @@ -269,6 +281,8 @@ def ps(obj: ProcessManagerContext, query: ProcessQuery, long_format: bool) -> No return obj.print( tabulate_process_instance_list( - results, title="Processes running", long=long_format - ) + results, title="Processes running", long=long_format, width=width + ), + overflow="fold", + soft_wrap=True, ) diff --git a/src/drunc/process_manager/utils.py b/src/drunc/process_manager/utils.py index 602a2dab2..095d70df1 100644 --- a/src/drunc/process_manager/utils.py +++ b/src/drunc/process_manager/utils.py @@ -118,9 +118,9 @@ def walk(tree_id): def tabulate_process_instance_list( - pil: ProcessInstanceList, title: str, long: bool = False + pil: ProcessInstanceList, title: str, long: bool = False, width: int | None = None ): - t = Table(title=title) + t = Table(title=title, width=width) t.add_column("session") t.add_column("friendly name") t.add_column("user") From 092da09c64e4f94eac0f07cac39b93d00344c711 Mon Sep 17 00:00:00 2001 From: Emir Muhammad Date: Wed, 18 Mar 2026 14:54:09 +0100 Subject: [PATCH 21/29] Fix table width bug in test --- integtest/process_manager_test.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/integtest/process_manager_test.py b/integtest/process_manager_test.py index 62fe20e79..2e9fb7755 100644 --- a/integtest/process_manager_test.py +++ b/integtest/process_manager_test.py @@ -83,10 +83,10 @@ dunerc_command_list = f""" echo pre_boot -ps -u {getpass.getuser()} +ps -u {getpass.getuser()} -w 180 boot echo post_boot -ps -u {getpass.getuser()} +ps -u {getpass.getuser()} -w 180 echo test_logs @@ -107,11 +107,11 @@ echo test_kill_mlt -ps -u {getpass.getuser()} +ps -u {getpass.getuser()} -w 180 kill -n mlt wait 2 echo test_kill_mlt_post -ps -u {getpass.getuser()} +ps -u {getpass.getuser()} -w 180 echo test_kill_mlt_done @@ -120,7 +120,7 @@ restart -n trg-controller wait 5 echo test_recovery_post -ps -u {getpass.getuser()} +ps -u {getpass.getuser()} -w 180 echo test_recovery_done @@ -132,7 +132,6 @@ UUID_RE = re.compile( r"^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$" - r"|^[0-9a-fA-F]{8}-[-0-9a-fA-F]*\u2026" # truncated by Rich table column width ) From bb80196e9bfbab13bdb048250cc8bbb514376fab Mon Sep 17 00:00:00 2001 From: Emir Muhammad Date: Wed, 18 Mar 2026 15:00:25 +0100 Subject: [PATCH 22/29] Propagate width to other table options --- .../process_manager/interface/commands.py | 37 ++++++++++++++++--- 1 file changed, 31 insertions(+), 6 deletions(-) diff --git a/src/drunc/process_manager/interface/commands.py b/src/drunc/process_manager/interface/commands.py index a1287afdc..7fb1c1935 100644 --- a/src/drunc/process_manager/interface/commands.py +++ b/src/drunc/process_manager/interface/commands.py @@ -151,44 +151,69 @@ def dummy_boot( @click.command("terminate") +@click.option( + "-w", + "--width", + type=int, + default=None, + help="Table width. Default is automatically calculated", +) @click.pass_obj -def terminate(obj: ProcessManagerContext) -> None: +def terminate(obj: ProcessManagerContext, width: int | None) -> None: log = get_logger("process_manager.shell") log.debug("Terminating") result = obj.get_driver("process_manager").terminate() if not result: return obj.print( - tabulate_process_instance_list(result, "Terminated process", False) + tabulate_process_instance_list(result, "Terminated process", False, width=width) ) # rich tables require console printing obj.delete_driver("controller") @click.command("kill") +@click.option( + "-w", + "--width", + type=int, + default=None, + help="Table width. Default is automatically calculated", +) @add_query_options(at_least_one=True) @click.pass_obj -def kill(obj: ProcessManagerContext, query: ProcessQuery) -> None: +def kill(obj: ProcessManagerContext, query: ProcessQuery, width: int | None) -> None: log = get_logger("process_manager.shell") log.debug(f"Killing with query {query}") result = obj.get_driver("process_manager").kill(query) if not result: return obj.print( - tabulate_process_instance_list(result, "Killed process", False) + tabulate_process_instance_list(result, "Killed process", False, width=width) ) # rich tables require console printing @click.command("flush") +@click.option( + "-w", + "--width", + type=int, + default=None, + help="Table width. Default is automatically calculated", +) @add_query_options(at_least_one=False, all_processes_by_default=True) @click.pass_obj -def flush(obj: ProcessManagerContext, query: ProcessQuery) -> None: +def flush( + obj: ProcessManagerContext, + query: ProcessQuery, + width: int | None, +) -> None: log = get_logger("process_manager.shell") log.debug(f"Flushing with query {query}") result = obj.get_driver("process_manager").flush(query) if not result: return obj.print( - tabulate_process_instance_list(result, "Flushed process", False) + tabulate_process_instance_list(result, "Flushed process", False, width=width) ) # rich tables require console printing From 557d5c7db79ac934db96db9c7f0c53d752bc5548 Mon Sep 17 00:00:00 2001 From: Emir Muhammad Date: Wed, 18 Mar 2026 15:16:49 +0100 Subject: [PATCH 23/29] Fix pytest print mockcontext --- tests/process_manager/interface/test_commands.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/process_manager/interface/test_commands.py b/tests/process_manager/interface/test_commands.py index 6b98b1544..3571690e2 100644 --- a/tests/process_manager/interface/test_commands.py +++ b/tests/process_manager/interface/test_commands.py @@ -115,7 +115,7 @@ def __init__(self, driver=None): def get_driver(self, name): return self.driver - def print(self, msg, justify=None): + def print(self, msg, justify=None, overflow=None, soft_wrap=None): self.output.append(str(msg)) From 2e0331ff43dc6477e2d37dd7d913cc04258e793d Mon Sep 17 00:00:00 2001 From: Emir Muhammad Date: Thu, 19 Mar 2026 15:04:32 +0100 Subject: [PATCH 24/29] drunc connsvc true, fix minor typos --- integtest/integ_test_utils.py | 2 +- integtest/process_manager_test.py | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/integtest/integ_test_utils.py b/integtest/integ_test_utils.py index bebc870a9..6dc9fe014 100644 --- a/integtest/integ_test_utils.py +++ b/integtest/integ_test_utils.py @@ -1,6 +1,6 @@ """Shared helpers for drunc integration tests. -This module centralizes commoon patterns used by process-manager integration tests. +This module centralizes common patterns used by process-manager integration tests. Importantly, most of these are defined to help with processing the stdout log outputs of the integ tests. diff --git a/integtest/process_manager_test.py b/integtest/process_manager_test.py index 2e9fb7755..3a418c14e 100644 --- a/integtest/process_manager_test.py +++ b/integtest/process_manager_test.py @@ -53,7 +53,7 @@ conf_dict.tpg_enabled = False # For testing, allow drunc to manage ConnectivityService (default is False, integrationtest manages Connectivity Service) -# conf_dict.drunc_connsvc = True +conf_dict.drunc_connsvc = True # For testing, specify connectivity service port (default is 0, a random port is chosen for the Connectivity Service) # conf_dict.connsvc_port = 12345 @@ -80,6 +80,10 @@ # NOTE THAT WE HAVE NOT TESTED FLUSH BECAUSE IT IS BROKEN # see #821 + +# The commands mostly come from the msqt, with a few minor changes +# The entire format is a standard that is basically copied over from the +# typical msqt tests, so they bear no direct effect on the scope of this test. dunerc_command_list = f""" echo pre_boot From 74301e761dd971420b5180e85e096b0d2f3dd10a Mon Sep 17 00:00:00 2001 From: Emir Muhammad Date: Thu, 19 Mar 2026 15:04:53 +0100 Subject: [PATCH 25/29] remove comment command --- src/drunc/controller/interface/commands.py | 15 --------------- src/drunc/controller/interface/shell.py | 2 -- src/drunc/unified_shell/shell.py | 2 -- 3 files changed, 19 deletions(-) diff --git a/src/drunc/controller/interface/commands.py b/src/drunc/controller/interface/commands.py index 24d555d62..f054a8e29 100644 --- a/src/drunc/controller/interface/commands.py +++ b/src/drunc/controller/interface/commands.py @@ -244,21 +244,6 @@ def who_am_i(obj: ControllerContext) -> None: log.info(obj.get_token().user_name) -# click_shell/_cmd.py, line 23. identchars only accepts ascii letters + digits + _ -# Can't really be used by the integ test tho.. -@click.command( - "comment", - hidden=True, - context_settings=dict( - ignore_unknown_options=True, - allow_extra_args=True, - ), -) -def comment_handler(): - """Ignore this line""" - pass - - @click.command("echo") @click.argument("text", required=False) @click.pass_obj diff --git a/src/drunc/controller/interface/shell.py b/src/drunc/controller/interface/shell.py index a7135c127..5c71bde1c 100644 --- a/src/drunc/controller/interface/shell.py +++ b/src/drunc/controller/interface/shell.py @@ -6,7 +6,6 @@ from daqpytools.logging import logging_log_levels from drunc.controller.interface.commands import ( - comment_handler, connect, disconnect, echo, @@ -93,7 +92,6 @@ def controller_shell(ctx, controller_address: str, log_level: str) -> None: ctx.command.add_command(surrender_control, "surrender-control") ctx.command.add_command(who_am_i, "whoami") ctx.command.add_command(echo, "echo") - ctx.command.add_command(comment_handler, "comment-handler") ctx.command.add_command(who_is_in_charge, "who-is-in-charge") for transition in transitions.commands: ctx.command.add_command(*generate_fsm_command(ctx.obj, transition, desc.name)) diff --git a/src/drunc/unified_shell/shell.py b/src/drunc/unified_shell/shell.py index ed068491b..a3dfd1678 100644 --- a/src/drunc/unified_shell/shell.py +++ b/src/drunc/unified_shell/shell.py @@ -18,7 +18,6 @@ from drunc.connectivity_service.client import ConnectivityServiceClient from drunc.controller.configuration import ControllerConfHandler from drunc.controller.interface.commands import ( - comment_handler, connect, disconnect, echo, @@ -384,7 +383,6 @@ def unified_shell( surrender_control, who_am_i, echo, - comment_handler, who_is_in_charge, include, exclude, From 73188d15308e69228bc140e7e8b641d6148965e0 Mon Sep 17 00:00:00 2001 From: Emir Muhammad Date: Tue, 24 Mar 2026 17:22:02 +0100 Subject: [PATCH 26/29] Add flush check --- integtest/integ_test_utils.py | 8 ++++---- integtest/process_manager_test.py | 30 ++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 4 deletions(-) diff --git a/integtest/integ_test_utils.py b/integtest/integ_test_utils.py index 6dc9fe014..2f85ee3a9 100644 --- a/integtest/integ_test_utils.py +++ b/integtest/integ_test_utils.py @@ -257,17 +257,17 @@ def get_ps_table_after_echo(stdout: str, echo_marker: str) -> list[dict[str, str return _parse_ps_table_from_index(lines, table_start_idx) -def get_uuid_for_friendly_name( - ps_table: list[dict[str, str]], friendly_name: str +def get_column_for_friendly_name( + ps_table: list[dict[str, str]], friendly_name: str, column: str ) -> str: - """Return UUID for `friendly_name` from a parsed process table. + """Return the column for `friendly_name` from a parsed process table. Raises: AssertionError: if the friendly name is absent. """ for row in ps_table: if row["friendly_name"].strip() == friendly_name: - return row["uuid"] + return row[column] available_names = ", ".join(row["friendly_name"].strip() for row in ps_table) raise AssertionError( diff --git a/integtest/process_manager_test.py b/integtest/process_manager_test.py index 3a418c14e..8ce13d44b 100644 --- a/integtest/process_manager_test.py +++ b/integtest/process_manager_test.py @@ -7,6 +7,7 @@ import integrationtest.log_file_checks as log_file_checks from integ_test_utils import ( assert_process_presence, + get_column_for_friendly_name, get_ps_table_after_echo, require_echo_marker_index, require_line_containing, @@ -128,7 +129,18 @@ echo test_recovery_done +echo test_flush +ps -u {getpass.getuser()} -w 180 +kill -n mlt --crash +wait 5 +echo after_crash +ps -u {getpass.getuser()} -w 180 flush +echo after_flush +ps -u {getpass.getuser()} -w 180 +echo test_flush_done + + terminate """.split() @@ -376,3 +388,21 @@ def test_mlt_recovers_after_kill(run_dunerc) -> None: stdout = run_dunerc.completed_process.stdout ps_after_recovery = get_ps_table_after_echo(stdout, "test_recovery_post") assert_process_presence(ps_after_recovery, "mlt", context="after recovery") + + +def test_flush(run_dunerc) -> None: + """Checks that flush work by crashing mlt, seeing that the process exists, + and then flushing to show its gone""" + + stdout = run_dunerc.completed_process.stdout + ps_initial = get_ps_table_after_echo(stdout, "test_flush") + assert_process_presence(ps_initial, "mlt", context="before crash") + + ps_after_crash = get_ps_table_after_echo(stdout, "after_crash") + mlt_alive = get_column_for_friendly_name(ps_after_crash, "mlt", "alive") + assert mlt_alive == "False", "The mlt should have crashed" + + ps_after_flash = get_ps_table_after_echo(stdout, "after_flush") + assert_process_presence( + ps_after_flash, "mlt", context="after crash", expected_present=False + ) From e57c098c8d2ca4e3fad5995e2cc7eeece23c3137 Mon Sep 17 00:00:00 2001 From: Emir Muhammad Date: Wed, 25 Mar 2026 17:15:31 +0100 Subject: [PATCH 27/29] Document width in the wiki --- docs/Unified-shell-reference.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/Unified-shell-reference.md b/docs/Unified-shell-reference.md index dd05aa0f5..2cf1c9c6a 100644 --- a/docs/Unified-shell-reference.md +++ b/docs/Unified-shell-reference.md @@ -437,6 +437,7 @@ The `ps` command must take at least one the following options: * `-n/--name`, to select a process to flush based on its "friendly name". * `-s/--session`, to select the processes to flush based on a session name. * `--long-format/-l`, to get a long listing format. +* `-w/--width`, to fix the table width to a supplied length. By default, `ps` list all the processes. From f7fc2781f79fcc1dd1d96398e65515d669314c59 Mon Sep 17 00:00:00 2001 From: PawelPlesniak Date: Tue, 21 Apr 2026 17:27:10 +0200 Subject: [PATCH 28/29] Updating parameter name for tests to pass --- integtest/process_manager_test.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/integtest/process_manager_test.py b/integtest/process_manager_test.py index 8ce13d44b..6e7bbdc9b 100644 --- a/integtest/process_manager_test.py +++ b/integtest/process_manager_test.py @@ -170,18 +170,24 @@ def test_nanorc_success(run_dunerc) -> None: def test_log_files(run_dunerc) -> None: """Checks that expected process-manager log files exist and are free of errors.""" # Check that at least some of the expected log files are present + print(f"PP: TESTING {type(run_dunerc)=}") + print(f"PP: TESTING {dir(run_dunerc)=}") + print(f"PP: TESTING {run_dunerc.daq_session_name=}") assert any( - f"{run_dunerc.session}_df-01" in str(logname) + f"{run_dunerc.daq_session_name}_df-01" in str(logname) for logname in run_dunerc.log_files ) assert any( - f"{run_dunerc.session}_dfo" in str(logname) for logname in run_dunerc.log_files + f"{run_dunerc.daq_session_name}_dfo" in str(logname) + for logname in run_dunerc.log_files ) assert any( - f"{run_dunerc.session}_mlt" in str(logname) for logname in run_dunerc.log_files + f"{run_dunerc.daq_session_name}_mlt" in str(logname) + for logname in run_dunerc.log_files ) assert any( - f"{run_dunerc.session}_ru" in str(logname) for logname in run_dunerc.log_files + f"{run_dunerc.daq_session_name}_ru" in str(logname) + for logname in run_dunerc.log_files ) if check_for_logfile_errors: From cb0ca9307655b11c71f67c5d6e316f9145882d5b Mon Sep 17 00:00:00 2001 From: PawelPlesniak Date: Wed, 22 Apr 2026 17:08:00 +0200 Subject: [PATCH 29/29] Comment removal --- integtest/process_manager_test.py | 9 +-------- src/drunc/unified_shell/shell.py | 4 ++++ 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/integtest/process_manager_test.py b/integtest/process_manager_test.py index 6e7bbdc9b..3284bddee 100644 --- a/integtest/process_manager_test.py +++ b/integtest/process_manager_test.py @@ -78,13 +78,9 @@ confgen_arguments = {"MinimalSystem": conf_dict} # The commands to run in dunerc -# NOTE THAT WE HAVE NOT TESTED FLUSH BECAUSE IT IS BROKEN -# see #821 - - # The commands mostly come from the msqt, with a few minor changes # The entire format is a standard that is basically copied over from the -# typical msqt tests, so they bear no direct effect on the scope of this test. +# typical msqt tests, so they bear no direct effect on the scope of this test. dunerc_command_list = f""" echo pre_boot @@ -170,9 +166,6 @@ def test_nanorc_success(run_dunerc) -> None: def test_log_files(run_dunerc) -> None: """Checks that expected process-manager log files exist and are free of errors.""" # Check that at least some of the expected log files are present - print(f"PP: TESTING {type(run_dunerc)=}") - print(f"PP: TESTING {dir(run_dunerc)=}") - print(f"PP: TESTING {run_dunerc.daq_session_name=}") assert any( f"{run_dunerc.daq_session_name}_df-01" in str(logname) for logname in run_dunerc.log_files diff --git a/src/drunc/unified_shell/shell.py b/src/drunc/unified_shell/shell.py index a3dfd1678..d007bfece 100644 --- a/src/drunc/unified_shell/shell.py +++ b/src/drunc/unified_shell/shell.py @@ -152,10 +152,14 @@ def unified_shell( unified_shell_log.debug("Setting up the [green]unified_shell[/green] logger") # Parse the process manager argument to determine if it's a config or an address + unified_shell_log.critical( + f"Parsing the process manager argument: {process_manager}" + ) process_manager_url: ParseResult = urlparse(process_manager) internal_pm: bool = True if process_manager_url.scheme == "grpc": # i.e. if it's an address internal_pm = False + unified_shell_log.critical(f"{internal_pm=}, {process_manager_url=}") # If using a k8s process manager, validate the session name before proceeding if get_pm_type_from_name(