diff --git a/.github/workflows/dkg-runner.yml b/.github/workflows/dkg-runner.yml index 0d0fdd23..78fe74ab 100644 --- a/.github/workflows/dkg-runner.yml +++ b/.github/workflows/dkg-runner.yml @@ -99,13 +99,10 @@ jobs: CHARON_BIN: ${{ github.workspace }}/bin/charon PLUTO_BIN: ${{ github.workspace }}/target/debug/pluto TIMEOUT: ${{ inputs.timeout || '120' }} + RUN_SMOKE_VERIFY: "1" + SMOKE_SECONDS: "3" run: ./scripts/dkg-runner/run.sh - - name: Verify ceremony outputs - env: - NODES: "4" - run: ./scripts/dkg-runner/ci/verify-output.sh - - name: Upload work dir on failure if: failure() uses: actions/upload-artifact@v4 diff --git a/scripts/dkg-runner/ci/verify-output-semantic.sh b/scripts/dkg-runner/ci/verify-output-semantic.sh new file mode 100755 index 00000000..18f8d928 --- /dev/null +++ b/scripts/dkg-runner/ci/verify-output-semantic.sh @@ -0,0 +1,267 @@ +#!/usr/bin/env bash +# verify-output-semantic.sh — semantic checks for DKG runner output. +# +# Env: +# WORK_DIR scratch directory used by run.sh (default: /tmp/dkg-run) +# NODES total node count (default: 4) +# THRESHOLD expected threshold (default: 3) +# +# Checks: +# - every node lock is JSON-identical +# - lock operator count, threshold, validator count are consistent +# - every validator has one public share per node +# - validator pubkey matches deposit data and builder registration pubkeys +# - every node keystore pubkey set matches that node's public shares +# +# Does not decrypt keystores: collect.sh does not copy password files. + +set -euo pipefail + +WORK_DIR="${WORK_DIR:-/tmp/dkg-run}" +NODES="${NODES:-4}" +THRESHOLD="${THRESHOLD:-3}" +OUTPUT_DIR="${WORK_DIR}/output" +TMP_DIR="${WORK_DIR}/semantic-verify" + +fail() { + echo "::error::$*" >&2 + exit 1 +} + +warn() { + echo "::warning::$*" >&2 +} + +require_cmd() { + command -v "$1" >/dev/null 2>&1 || fail "missing required command: $1" +} + +node_dir() { + printf '%s/node-%s' "${OUTPUT_DIR}" "$1" +} + +lock_file() { + printf '%s/cluster-lock.json' "$(node_dir "$1")" +} + +norm_hex_jq=' + def normhex: + if type != "string" then error("expected hex string") + elif startswith("0x") then .[2:] | ascii_downcase + else ascii_downcase + end; +' + +require_hex_len() { + local label="$1" + local value="$2" + local expected_len="$3" + [[ "${#value}" == "${expected_len}" ]] \ + || fail "${label}: hex length ${#value}, want ${expected_len}" + [[ "${value}" =~ ^[0-9a-f]+$ ]] \ + || fail "${label}: not lowercase hex" +} + +require_cmd jq +require_cmd sort +require_cmd cmp +require_cmd comm + +[[ -d "${OUTPUT_DIR}" ]] || fail "output directory not found: ${OUTPUT_DIR}" +rm -rf "${TMP_DIR}" +mkdir -p "${TMP_DIR}" + +# Every node must have a readable lock file; this is the core shared DKG output. +for (( i = 0; i < NODES; i++ )); do + lock="$(lock_file "${i}")" + [[ -s "${lock}" ]] || fail "node-${i}: missing or empty cluster-lock.json" + jq -S -c . "${lock}" > "${TMP_DIR}/lock-${i}.json" \ + || fail "node-${i}: invalid cluster-lock.json" +done + +# All nodes must agree on the exact same lock. +for (( i = 1; i < NODES; i++ )); do + cmp -s "${TMP_DIR}/lock-0.json" "${TMP_DIR}/lock-${i}.json" \ + || fail "node-${i}: cluster-lock.json differs from node-0" +done + +LOCK="${TMP_DIR}/lock-0.json" + +# The lock topology must match the runner configuration. +operators_count="$( + jq -r '(.cluster_definition // .definition).operators | length' "${LOCK}" +)" +[[ "${operators_count}" == "${NODES}" ]] \ + || fail "operator count mismatch: got ${operators_count}, want ${NODES}" + +# Each operator must have one distinct ENR. +jq -r ' + (.cluster_definition // .definition).operators[] | + (.enr // .ENR // empty) +' "${LOCK}" | sort > "${TMP_DIR}/operator-enrs" +operator_enr_count="$(wc -l < "${TMP_DIR}/operator-enrs" | tr -d ' ')" +[[ "${operator_enr_count}" == "${NODES}" ]] \ + || fail "operator ENR count mismatch: got ${operator_enr_count}, want ${NODES}" +duplicate_operator_enr="$(uniq -d "${TMP_DIR}/operator-enrs" | sed -n '1p')" +[[ -z "${duplicate_operator_enr}" ]] \ + || fail "duplicate operator ENR: ${duplicate_operator_enr}" + +# The signing threshold must match the requested ceremony threshold. +actual_threshold="$(jq -r '(.cluster_definition // .definition).threshold | tonumber' "${LOCK}")" +[[ "${actual_threshold}" == "${THRESHOLD}" ]] \ + || fail "threshold mismatch: got ${actual_threshold}, want ${THRESHOLD}" +(( actual_threshold > 0 && actual_threshold <= NODES )) \ + || fail "threshold out of range: ${actual_threshold}" + +# The lock must contain exactly the validator set declared by its definition. +validator_count="$(jq -r '(.distributed_validators // .validators) | length' "${LOCK}")" +declared_validators="$(jq -r '(.cluster_definition // .definition).num_validators | tonumber' "${LOCK}")" +[[ "${validator_count}" == "${declared_validators}" ]] \ + || fail "distributed validator count mismatch: got ${validator_count}, definition says ${declared_validators}" +(( validator_count > 0 )) || fail "validator count must be greater than zero" + +# Lock hash and aggregate signature must have valid byte lengths. +lock_hash="$(jq -r "${norm_hex_jq}"'(.lock_hash // empty) | normhex' "${LOCK}")" +[[ -n "${lock_hash}" ]] || fail "missing lock_hash" +require_hex_len "lock_hash" "${lock_hash}" 64 + +signature_aggregate="$(jq -r "${norm_hex_jq}"'(.signature_aggregate // empty) | normhex' "${LOCK}")" +[[ -n "${signature_aggregate}" ]] || fail "missing signature_aggregate" +require_hex_len "signature_aggregate" "${signature_aggregate}" 192 + +# Node signatures are required by modern lock versions. +node_sig_count="$(jq -r '(.node_signatures // []) | length' "${LOCK}")" +if [[ "${node_sig_count}" != "0" && "${node_sig_count}" != "${NODES}" ]]; then + fail "node_signatures count mismatch: got ${node_sig_count}, want 0 or ${NODES}" +fi +lock_version="$(jq -r '(.cluster_definition // .definition).version' "${LOCK}")" +if [[ "${lock_version}" =~ ^v1\.([7-9]|[1-9][0-9]+)\. && "${node_sig_count}" != "${NODES}" ]]; then + fail "node_signatures count mismatch for ${lock_version}: got ${node_sig_count}, want ${NODES}" +fi +if (( node_sig_count > 0 )); then + jq -r "${norm_hex_jq}"'(.node_signatures // [])[] | normhex' "${LOCK}" \ + > "${TMP_DIR}/node-signatures" + sig_idx=0 + while IFS= read -r node_sig; do + require_hex_len "node signature ${sig_idx}" "${node_sig}" 130 + sig_idx=$((sig_idx + 1)) + done < "${TMP_DIR}/node-signatures" +fi + +# Distributed validator pubkeys must be unique. +jq -r "${norm_hex_jq}"' + (.distributed_validators // .validators)[] | + (.distributed_public_key // .pubkey // .pub_key) | normhex +' "${LOCK}" | sort > "${TMP_DIR}/validator-pubkeys" + +duplicate_validator_pubkey="$( + sort "${TMP_DIR}/validator-pubkeys" | uniq -d | sed -n '1p' +)" +[[ -z "${duplicate_validator_pubkey}" ]] \ + || fail "duplicate distributed validator pubkey: ${duplicate_validator_pubkey}" + +for (( v = 0; v < validator_count; v++ )); do + # Each distributed validator must have one public share per node. + share_count="$( + jq -r --argjson v "${v}" '(.distributed_validators // .validators)[$v] | (.public_shares // .pub_shares) | length' "${LOCK}" + )" + [[ "${share_count}" == "${NODES}" ]] \ + || fail "validator-${v}: public share count ${share_count}, want ${NODES}" + + validator_pubkey="$( + jq -r --argjson v "${v}" "${norm_hex_jq}"' + (.distributed_validators // .validators)[$v] | + (.distributed_public_key // .pubkey // .pub_key) | normhex + ' "${LOCK}" + )" + require_hex_len "validator-${v} distributed pubkey" "${validator_pubkey}" 96 + + # Deposit data must belong to the same distributed validator pubkey. + jq -r --argjson v "${v}" "${norm_hex_jq}"' + (.distributed_validators // .validators)[$v] as $validator | + ( + if $validator.deposit_data? then + if ($validator.deposit_data | type) == "array" then $validator.deposit_data[] else $validator.deposit_data end + elif $validator.partial_deposit_data? then + $validator.partial_deposit_data[] + else + empty + end + ) | + (.pubkey // .pub_key) | normhex + ' "${LOCK}" > "${TMP_DIR}/validator-${v}-deposit-pubkeys" + + if [[ ! -s "${TMP_DIR}/validator-${v}-deposit-pubkeys" ]]; then + fail "validator-${v}: no deposit data field" + fi + + while IFS= read -r deposit_pubkey; do + require_hex_len "validator-${v} deposit pubkey" "${deposit_pubkey}" 96 + [[ "${deposit_pubkey}" == "${validator_pubkey}" ]] \ + || fail "validator-${v}: deposit pubkey mismatch" + done < "${TMP_DIR}/validator-${v}-deposit-pubkeys" + + # Builder registration, when present, must also target the same validator pubkey. + reg_pubkey="$( + jq -r --argjson v "${v}" "${norm_hex_jq}"' + (.distributed_validators // .validators)[$v].builder_registration? as $reg | + if ($reg == null or $reg == {}) then empty + else (($reg.message // $reg.v1.message)? | (.pubkey // .pub_key) | normhex) + end + ' "${LOCK}" + )" + if [[ -n "${reg_pubkey}" ]]; then + require_hex_len "validator-${v} builder registration pubkey" "${reg_pubkey}" 96 + [[ "${reg_pubkey}" == "${validator_pubkey}" ]] \ + || fail "validator-${v}: builder registration pubkey mismatch" + fi + + # Save expected public share for each node, indexed by node order in the lock. + for (( i = 0; i < NODES; i++ )); do + share_pubkey="$( + jq -r --argjson v "${v}" --argjson i "${i}" "${norm_hex_jq}"' + (.distributed_validators // .validators)[$v] | + (.public_shares // .pub_shares)[$i] | normhex + ' "${LOCK}" + )" + require_hex_len "validator-${v} node-${i} public share" "${share_pubkey}" 96 + printf '%s\n' "${share_pubkey}" >> "${TMP_DIR}/node-${i}-expected-pubkeys" + done +done + +for (( i = 0; i < NODES; i++ )); do + # Each node must have exactly one keystore for each distributed validator. + : > "${TMP_DIR}/node-${i}-actual-pubkeys" + shopt -s nullglob + keystores=("$(node_dir "${i}")"/keystore-*.json) + shopt -u nullglob + (( ${#keystores[@]} > 0 )) || fail "node-${i}: no keystore files" + (( ${#keystores[@]} == validator_count )) \ + || fail "node-${i}: keystore file count ${#keystores[@]}, want ${validator_count}" + + for keystore in "${keystores[@]}"; do + keystore_pubkey="$( + jq -r "${norm_hex_jq}"'.pubkey | normhex' "${keystore}" \ + || fail "node-${i}: invalid keystore json: ${keystore}" + )" + require_hex_len "node-${i} keystore pubkey" "${keystore_pubkey}" 96 + printf '%s\n' "${keystore_pubkey}" >> "${TMP_DIR}/node-${i}-actual-pubkeys" + done + + # The node's keystore pubkeys must equal that node's public shares from the lock. + sort -u "${TMP_DIR}/node-${i}-expected-pubkeys" > "${TMP_DIR}/node-${i}-expected.sorted" + sort -u "${TMP_DIR}/node-${i}-actual-pubkeys" > "${TMP_DIR}/node-${i}-actual.sorted" + + expected_count="$(wc -l < "${TMP_DIR}/node-${i}-expected.sorted" | tr -d ' ')" + actual_count="$(wc -l < "${TMP_DIR}/node-${i}-actual.sorted" | tr -d ' ')" + [[ "${actual_count}" == "${expected_count}" ]] \ + || fail "node-${i}: keystore pubkey count ${actual_count}, want ${expected_count}" + + if ! cmp -s "${TMP_DIR}/node-${i}-expected.sorted" "${TMP_DIR}/node-${i}-actual.sorted"; then + missing="$(comm -23 "${TMP_DIR}/node-${i}-expected.sorted" "${TMP_DIR}/node-${i}-actual.sorted" | head -3 | tr '\n' ' ')" + extra="$(comm -13 "${TMP_DIR}/node-${i}-expected.sorted" "${TMP_DIR}/node-${i}-actual.sorted" | head -3 | tr '\n' ' ')" + fail "node-${i}: keystore pubkeys do not match lock public shares; missing=${missing} extra=${extra}" + fi +done + +echo "Semantic DKG output check passed: ${NODES} nodes, ${validator_count} validators, threshold ${THRESHOLD}." diff --git a/scripts/dkg-runner/ci/verify-run-smoke.sh b/scripts/dkg-runner/ci/verify-run-smoke.sh new file mode 100755 index 00000000..57337fb7 --- /dev/null +++ b/scripts/dkg-runner/ci/verify-run-smoke.sh @@ -0,0 +1,160 @@ +#!/usr/bin/env bash +# verify-run-smoke.sh — smoke-start collected DKG node dirs with `charon run`. +# +# Env: +# WORK_DIR scratch directory used by run.sh (default: /tmp/dkg-run) +# NODES total node count (default: 4) +# CHARON_BIN charon binary path/name (default: charon) +# SMOKE_SECONDS seconds allowed for monitoring endpoints to become ready +# (default: 8) +# SMOKE_PORT_BASE +# first local port used by this check (default: 39000) +# +# This verifies the generated full node data dirs are loadable by a later +# Charon/Pluto-style runtime: cluster lock, p2p key, validator keystores, and +# passwords are all usable enough for the process to start. +# +# It does not prove real beacon duties. It uses Charon simnet mocks and kills +# the processes after the smoke window. + +set -euo pipefail + +WORK_DIR="${WORK_DIR:-/tmp/dkg-run}" +NODES="${NODES:-4}" +CHARON_BIN="${CHARON_BIN:-charon}" +SMOKE_SECONDS="${SMOKE_SECONDS:-8}" +SMOKE_PORT_BASE="${SMOKE_PORT_BASE:-39000}" +SMOKE_DIR="${WORK_DIR}/run-smoke" + +fail() { + echo "::error::$*" >&2 + exit 1 +} + +log() { + echo "[run-smoke] $*" +} + +require_cmd() { + command -v "$1" >/dev/null 2>&1 || [[ -x "$1" ]] || fail "missing required command: $1" +} + +detect_keys_dir() { + local node_dir="$1" + shopt -s nullglob + local flat_keys=("${node_dir}"/keystore-*.json) + local nested_keys=("${node_dir}"/validator_keys/keystore-*.json) + shopt -u nullglob + + if (( ${#flat_keys[@]} > 0 && ${#nested_keys[@]} > 0 )); then + fail "mixed flat and validator_keys keystore layouts in ${node_dir}" + fi + + if (( ${#nested_keys[@]} > 0 )); then + printf '%s/validator_keys' "${node_dir}" + else + printf '%s' "${node_dir}" + fi +} + +tail_log() { + local index="$1" + local log_file="${SMOKE_DIR}/node-${index}.log" + echo "::error::node-${index} smoke log tail:" >&2 + tail -80 "${log_file}" >&2 || true +} + +kill_nodes() { + for pid in "${pids[@]:-}"; do + kill "${pid}" >/dev/null 2>&1 || true + done + for pid in "${pids[@]:-}"; do + wait "${pid}" >/dev/null 2>&1 || true + done +} + +require_cmd "${CHARON_BIN}" +require_cmd curl +rm -rf "${SMOKE_DIR}" +mkdir -p "${SMOKE_DIR}" + +pids=() +validator_urls=() +trap 'kill_nodes' EXIT INT TERM + +for (( i = 0; i < NODES; i++ )); do + node_dir="${WORK_DIR}/node-${i}" + lock_file="${node_dir}/cluster-lock.json" + key_file="${node_dir}/charon-enr-private-key" + keys_dir="$(detect_keys_dir "${node_dir}")" + log_file="${SMOKE_DIR}/node-${i}.log" + + [[ -d "${node_dir}" ]] || fail "node-${i}: missing data dir ${node_dir}" + [[ -s "${lock_file}" ]] || fail "node-${i}: missing cluster-lock.json" + [[ -s "${key_file}" ]] || fail "node-${i}: missing charon-enr-private-key" + shopt -s nullglob + keystores=("${keys_dir}"/keystore-*.json) + shopt -u nullglob + (( ${#keystores[@]} > 0 )) || fail "node-${i}: missing keystore json files in ${keys_dir}" + + for keystore in "${keystores[@]}"; do + password="${keystore%.json}.txt" + [[ -s "${password}" ]] || fail "node-${i}: missing password file for ${keystore}" + done + + validator_port=$((SMOKE_PORT_BASE + i)) + monitoring_port=$((SMOKE_PORT_BASE + 100 + i)) + validator_urls+=("http://127.0.0.1:${validator_port}/eth/v1/node/version") + + log "starting node-${i}" + "${CHARON_BIN}" run \ + --simnet-beacon-mock \ + --simnet-validator-mock \ + --lock-file="${lock_file}" \ + --private-key-file="${key_file}" \ + --simnet-validator-keys-dir="${keys_dir}" \ + --validator-api-address="127.0.0.1:${validator_port}" \ + --monitoring-address="127.0.0.1:${monitoring_port}" \ + --p2p-relays="" \ + --log-level=info \ + >"${log_file}" 2>&1 & + pids+=("$!") +done + +deadline=$((SECONDS + SMOKE_SECONDS)) +ready=() +for (( i = 0; i < NODES; i++ )); do + ready[i]=0 +done + +while (( SECONDS < deadline )); do + all_ready=1 + for (( i = 0; i < NODES; i++ )); do + pid="${pids[$i]}" + if ! kill -0 "${pid}" >/dev/null 2>&1; then + tail_log "${i}" + fail "node-${i}: exited before validator API became ready" + fi + + if (( ready[i] == 0 )); then + if curl -fsS "${validator_urls[$i]}" >/dev/null 2>&1; then + ready[i]=1 + log "node-${i} validator API ready" + else + all_ready=0 + fi + fi + done + + (( all_ready == 1 )) && break + sleep 1 +done + +for (( i = 0; i < NODES; i++ )); do + if (( ready[i] == 0 )); then + tail_log "${i}" + fail "node-${i}: validator API not ready after ${SMOKE_SECONDS}s" + fi +done + +log "all ${NODES} nodes reached validator API readiness" diff --git a/scripts/dkg-runner/config.sh b/scripts/dkg-runner/config.sh index 33eac79d..ca4fb941 100755 --- a/scripts/dkg-runner/config.sh +++ b/scripts/dkg-runner/config.sh @@ -17,6 +17,9 @@ : "${CHARON_BIN:=charon}" : "${WORK_DIR:=/tmp/dkg-run}" : "${KEEP_NODES:=0}" +: "${RUN_SMOKE_VERIFY:=1}" +: "${SMOKE_SECONDS:=8}" +: "${SMOKE_PORT_BASE:=39000}" : "${NETWORK:=holesky}" : "${FEE_RECIPIENT:=0xDeaDbeefdEAdbeefdEadbEEFdeadbeEFdEaDbeeF}" : "${WITHDRAWAL_ADDR:=0xDeaDbeefdEAdbeefdEadbEEFdeadbeEFdEaDbeeF}" diff --git a/scripts/dkg-runner/run.sh b/scripts/dkg-runner/run.sh index 9ba3f124..aa344eba 100755 --- a/scripts/dkg-runner/run.sh +++ b/scripts/dkg-runner/run.sh @@ -20,6 +20,11 @@ # Scratch directory for the run (wiped on every call). # KEEP_NODES=0 Leave nodes running after a successful ceremony when # set to 1/true/yes/on. +# RUN_SMOKE_VERIFY=0 Smoke-start generated node dirs with charon run after +# successful output collection. +# SMOKE_SECONDS=8 Seconds smoke-started nodes must stay alive. +# SMOKE_PORT_BASE=39000 +# First local port used by runtime smoke verification. # NETWORK=holesky Ethereum network for the cluster definition. # FEE_RECIPIENT=0xDeaD... # Fee recipient address passed to charon create dkg. @@ -63,6 +68,16 @@ if (( THRESHOLD > NODES )); then exit 1 fi +if ! command -v jq >/dev/null 2>&1; then + log_err "jq is required for semantic output verification" + exit 1 +fi + +if is_truthy "${RUN_SMOKE_VERIFY}" && ! command -v curl >/dev/null 2>&1; then + log_err "curl is required for runtime smoke verification" + exit 1 +fi + # ── Cleanup helpers ────────────────────────────────────────────────────────── PID_FILE="${WORK_DIR}/pids" @@ -95,6 +110,8 @@ log_info " PLUTO_BIN = ${PLUTO_BIN}" log_info " CHARON_BIN = ${CHARON_BIN}" log_info " WORK_DIR = ${WORK_DIR}" log_info " KEEP_NODES = ${KEEP_NODES}" +log_info " RUN_SMOKE_VERIFY = ${RUN_SMOKE_VERIFY}" +log_info " SMOKE_PORT_BASE = ${SMOKE_PORT_BASE}" log_info " CI = ${CI:-}" log_info "==============================================" @@ -137,6 +154,14 @@ fi log_info "--- Phase 5: Collect outputs ---" "${SCRIPT_DIR}/collect.sh" +log_info "--- Phase 6: Verify semantic outputs ---" +"${SCRIPT_DIR}/ci/verify-output-semantic.sh" + +if is_truthy "${RUN_SMOKE_VERIFY}"; then + log_info "--- Phase 7: Smoke-start runtime outputs ---" + "${SCRIPT_DIR}/ci/verify-run-smoke.sh" +fi + log_info "==============================================" log_info "DKG ceremony completed successfully." log_info "Outputs available in: ${WORK_DIR}/output"