From 0684a13a1d342421a448b53f3986a6900409dd42 Mon Sep 17 00:00:00 2001
From: Arthur Paulino <arthurleonardo.ap@gmail.com>
Date: Thu, 19 Mar 2026 03:38:51 -0700
Subject: [PATCH 1/2] docs, tests, benchmarks: improve trustworthiness and
 usability

Soundness & documentation:
- Add protocol documentation to prover and verifier modules
- Add full soundness argument to verifier with notation, per-component
  bounds (FRI, constraint folding, OOD, lookup, Fiat-Shamir), and overall
  soundness error formula
- Annotate each verification step with inline soundness comments
- Add security parameter docs to FriParameters
- Document unused InvalidClaim variant

Testing:
- Replace ignored 2^20-row benchmark test with fast 2^4-row test
  including serialization round-trip
- Add adversarial verifier tests: wrong claim, tampered stage-1 values,
  tampered accumulator, truncated proof, serialization round-trip
- Trim blake3 test_all_claims from 9 redundant runs to 6 minimal ones
- CI: run tests with --features parallel, add --all-features to clippy

Benchmarks & examples:
- Add Criterion benchmarks (prove/verify)
- Add examples: simple_proof (no lookups), preprocessed_proof (preprocessed
  trace with lookups), and lookup_proof (multi-circuit)

Repo cleanup:
- Rename chips/ to test_circuits/, move Blake3 helpers into blake3.rs
- Drop the "chip" terminology elsewhere
- Gate test_circuits module with #[cfg(test)]
- Add .cargo/config.toml with target-cpu=native
- Rewrite README with crypto setup, quick start, and soundness reference
---
 .cargo/config.toml                            |   2 +
 .github/workflows/ci.yml                      |   4 +-
 Cargo.lock                                    | 483 +++++++++++-
 Cargo.toml                                    |   7 +
 README.md                                     |  63 +-
 benches/multi_stark.rs                        | 284 +++++++
 examples/lookup_proof.rs                      | 159 ++++
 examples/preprocessed_proof.rs                | 137 ++++
 examples/simple_proof.rs                      |  96 +++
 src/chips/mod.rs                              | 385 ----------
 src/lib.rs                                    |   3 +-
 src/prover.rs                                 |  39 +
 .../blake3.rs}                                | 697 ++++++++++++------
 .../byte_operations.rs                        |   3 +-
 src/test_circuits/mod.rs                      |   8 +
 src/{chips => test_circuits}/u32_add.rs       |  34 +-
 src/types.rs                                  |   6 +
 src/verifier.rs                               | 276 ++++++-
 18 files changed, 2022 insertions(+), 664 deletions(-)
 create mode 100644 .cargo/config.toml
 create mode 100644 benches/multi_stark.rs
 create mode 100644 examples/lookup_proof.rs
 create mode 100644 examples/preprocessed_proof.rs
 create mode 100644 examples/simple_proof.rs
 delete mode 100644 src/chips/mod.rs
 rename src/{chips/blake3_circuit.rs => test_circuits/blake3.rs} (83%)
 rename src/{chips => test_circuits}/byte_operations.rs (99%)
 create mode 100644 src/test_circuits/mod.rs
 rename src/{chips => test_circuits}/u32_add.rs (89%)

diff --git a/.cargo/config.toml b/.cargo/config.toml
new file mode 100644
index 0000000..03f4879
--- /dev/null
+++ b/.cargo/config.toml
@@ -0,0 +1,2 @@
+[build]
+rustflags = ["-Ctarget-cpu=native"]
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index abadc68..15cf08b 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -25,7 +25,7 @@ jobs:
       - uses: Swatinem/rust-cache@v2
       - name: Linux Tests
         run: |
-          cargo nextest run --profile ci --cargo-profile dev-ci --workspace
+          cargo nextest run --profile ci --cargo-profile dev-ci --workspace --features parallel
 
   lints:
     runs-on: ubuntu-latest
@@ -42,7 +42,7 @@ jobs:
       - name: Check *everything* compiles
         run: cargo check --all-targets --all-features --workspace
       - name: Check clippy lints
-        run: cargo clippy --workspace --all-targets -- -D warnings
+        run: cargo clippy --workspace --all-targets --all-features -- -D warnings
       - name: Doctests
         run: cargo test --doc --workspace
       - name: Get Rust version
diff --git a/Cargo.lock b/Cargo.lock
index ce08f77..5b8839f 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2,6 +2,27 @@
 # It is not intended for manual editing.
 version = 4
 
+[[package]]
+name = "aho-corasick"
+version = "1.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "anes"
+version = "0.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299"
+
+[[package]]
+name = "anstyle"
+version = "1.0.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "940b3a0ca603d1eade50a4846a2afffd5ef57a9feac2c0e2ec2e14f9ead76000"
+
 [[package]]
 name = "autocfg"
 version = "1.5.0"
@@ -28,6 +49,112 @@ dependencies = [
  "virtue",
 ]
 
+[[package]]
+name = "bumpalo"
+version = "3.20.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb"
+
+[[package]]
+name = "cast"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
+
+[[package]]
+name = "cfg-if"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
+
+[[package]]
+name = "ciborium"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e"
+dependencies = [
+ "ciborium-io",
+ "ciborium-ll",
+ "serde",
+]
+
+[[package]]
+name = "ciborium-io"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757"
+
+[[package]]
+name = "ciborium-ll"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9"
+dependencies = [
+ "ciborium-io",
+ "half",
+]
+
+[[package]]
+name = "clap"
+version = "4.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b193af5b67834b676abd72466a96c1024e6a6ad978a1f484bd90b85c94041351"
+dependencies = [
+ "clap_builder",
+]
+
+[[package]]
+name = "clap_builder"
+version = "4.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "714a53001bf66416adb0e2ef5ac857140e7dc3a0c48fb28b2f10762fc4b5069f"
+dependencies = [
+ "anstyle",
+ "clap_lex",
+]
+
+[[package]]
+name = "clap_lex"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9"
+
+[[package]]
+name = "criterion"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f"
+dependencies = [
+ "anes",
+ "cast",
+ "ciborium",
+ "clap",
+ "criterion-plot",
+ "is-terminal",
+ "itertools 0.10.5",
+ "num-traits",
+ "once_cell",
+ "oorandom",
+ "plotters",
+ "rayon",
+ "regex",
+ "serde",
+ "serde_derive",
+ "serde_json",
+ "tinytemplate",
+ "walkdir",
+]
+
+[[package]]
+name = "criterion-plot"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1"
+dependencies = [
+ "cast",
+ "itertools 0.10.5",
+]
+
 [[package]]
 name = "crossbeam-deque"
 version = "0.8.6"
@@ -65,6 +192,42 @@ version = "1.15.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
 
+[[package]]
+name = "half"
+version = "2.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "459196ed295495a68f7d7fe1d84f6c4b7ff0e21fe3017b2f283c6fac3ad803c9"
+dependencies = [
+ "cfg-if",
+ "crunchy",
+]
+
+[[package]]
+name = "hermit-abi"
+version = "0.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c"
+
+[[package]]
+name = "is-terminal"
+version = "0.4.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e04d7f318608d35d4b61ddd75cbdaee86b023ebe2bd5a66ee0915f0bf93095a9"
+dependencies = [
+ "hermit-abi",
+ "libc",
+ "windows-sys 0.59.0",
+]
+
+[[package]]
+name = "itertools"
+version = "0.10.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473"
+dependencies = [
+ "either",
+]
+
 [[package]]
 name = "itertools"
 version = "0.14.0"
@@ -74,6 +237,28 @@ dependencies = [
  "either",
 ]
 
+[[package]]
+name = "itoa"
+version = "1.0.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2"
+
+[[package]]
+name = "js-sys"
+version = "0.3.91"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b49715b7073f385ba4bc528e5747d02e66cb39c6146efb66b781f131f0fb399c"
+dependencies = [
+ "once_cell",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "libc"
+version = "0.2.183"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b5b646652bf6661599e1da8901b3b9522896f01e736bad5f723fe7a3a27f899d"
+
 [[package]]
 name = "lock_api"
 version = "0.4.14"
@@ -83,11 +268,18 @@ dependencies = [
  "scopeguard",
 ]
 
+[[package]]
+name = "memchr"
+version = "2.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79"
+
 [[package]]
 name = "multi-stark"
 version = "0.1.0"
 dependencies = [
  "bincode",
+ "criterion",
  "p3-air",
  "p3-challenger",
  "p3-commit",
@@ -132,6 +324,18 @@ dependencies = [
  "autocfg",
 ]
 
+[[package]]
+name = "once_cell"
+version = "1.21.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50"
+
+[[package]]
+name = "oorandom"
+version = "11.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e"
+
 [[package]]
 name = "p3-air"
 version = "0.5.0"
@@ -160,7 +364,7 @@ name = "p3-commit"
 version = "0.5.0"
 source = "git+https://github.com/Plonky3/Plonky3?rev=e52636ec09663fd7d3bd4eaabb21dba8698f129a#e52636ec09663fd7d3bd4eaabb21dba8698f129a"
 dependencies = [
- "itertools",
+ "itertools 0.14.0",
  "p3-challenger",
  "p3-dft",
  "p3-field",
@@ -174,7 +378,7 @@ name = "p3-dft"
 version = "0.5.0"
 source = "git+https://github.com/Plonky3/Plonky3?rev=e52636ec09663fd7d3bd4eaabb21dba8698f129a#e52636ec09663fd7d3bd4eaabb21dba8698f129a"
 dependencies = [
- "itertools",
+ "itertools 0.14.0",
  "p3-field",
  "p3-matrix",
  "p3-maybe-rayon",
@@ -188,7 +392,7 @@ name = "p3-field"
 version = "0.5.0"
 source = "git+https://github.com/Plonky3/Plonky3?rev=e52636ec09663fd7d3bd4eaabb21dba8698f129a#e52636ec09663fd7d3bd4eaabb21dba8698f129a"
 dependencies = [
- "itertools",
+ "itertools 0.14.0",
  "num-bigint",
  "p3-maybe-rayon",
  "p3-util",
@@ -203,7 +407,7 @@ name = "p3-fri"
 version = "0.5.0"
 source = "git+https://github.com/Plonky3/Plonky3?rev=e52636ec09663fd7d3bd4eaabb21dba8698f129a#e52636ec09663fd7d3bd4eaabb21dba8698f129a"
 dependencies = [
- "itertools",
+ "itertools 0.14.0",
  "p3-challenger",
  "p3-commit",
  "p3-dft",
@@ -264,7 +468,7 @@ name = "p3-matrix"
 version = "0.5.0"
 source = "git+https://github.com/Plonky3/Plonky3?rev=e52636ec09663fd7d3bd4eaabb21dba8698f129a#e52636ec09663fd7d3bd4eaabb21dba8698f129a"
 dependencies = [
- "itertools",
+ "itertools 0.14.0",
  "p3-field",
  "p3-maybe-rayon",
  "p3-util",
@@ -298,7 +502,7 @@ name = "p3-merkle-tree"
 version = "0.5.0"
 source = "git+https://github.com/Plonky3/Plonky3?rev=e52636ec09663fd7d3bd4eaabb21dba8698f129a#e52636ec09663fd7d3bd4eaabb21dba8698f129a"
 dependencies = [
- "itertools",
+ "itertools 0.14.0",
  "p3-commit",
  "p3-field",
  "p3-matrix",
@@ -316,7 +520,7 @@ name = "p3-monty-31"
 version = "0.5.0"
 source = "git+https://github.com/Plonky3/Plonky3?rev=e52636ec09663fd7d3bd4eaabb21dba8698f129a#e52636ec09663fd7d3bd4eaabb21dba8698f129a"
 dependencies = [
- "itertools",
+ "itertools 0.14.0",
  "num-bigint",
  "p3-dft",
  "p3-field",
@@ -361,7 +565,7 @@ name = "p3-symmetric"
 version = "0.5.0"
 source = "git+https://github.com/Plonky3/Plonky3?rev=e52636ec09663fd7d3bd4eaabb21dba8698f129a#e52636ec09663fd7d3bd4eaabb21dba8698f129a"
 dependencies = [
- "itertools",
+ "itertools 0.14.0",
  "p3-field",
  "p3-util",
  "serde",
@@ -388,6 +592,34 @@ version = "0.2.17"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd"
 
+[[package]]
+name = "plotters"
+version = "0.3.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747"
+dependencies = [
+ "num-traits",
+ "plotters-backend",
+ "plotters-svg",
+ "wasm-bindgen",
+ "web-sys",
+]
+
+[[package]]
+name = "plotters-backend"
+version = "0.3.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a"
+
+[[package]]
+name = "plotters-svg"
+version = "0.3.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670"
+dependencies = [
+ "plotters-backend",
+]
+
 [[package]]
 name = "proc-macro2"
 version = "1.0.106"
@@ -441,6 +673,50 @@ dependencies = [
  "crossbeam-utils",
 ]
 
+[[package]]
+name = "regex"
+version = "1.12.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-automata",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-automata"
+version = "0.4.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-syntax"
+version = "0.8.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a"
+
+[[package]]
+name = "rustversion"
+version = "1.0.22"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
+
+[[package]]
+name = "same-file"
+version = "1.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
+dependencies = [
+ "winapi-util",
+]
+
 [[package]]
 name = "scopeguard"
 version = "1.2.0"
@@ -477,6 +753,19 @@ dependencies = [
  "syn",
 ]
 
+[[package]]
+name = "serde_json"
+version = "1.0.149"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86"
+dependencies = [
+ "itoa",
+ "memchr",
+ "serde",
+ "serde_core",
+ "zmij",
+]
+
 [[package]]
 name = "spin"
 version = "0.10.0"
@@ -532,6 +821,16 @@ dependencies = [
  "crunchy",
 ]
 
+[[package]]
+name = "tinytemplate"
+version = "1.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc"
+dependencies = [
+ "serde",
+ "serde_json",
+]
+
 [[package]]
 name = "tracing"
 version = "0.1.44"
@@ -587,3 +886,171 @@ name = "virtue"
 version = "0.0.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "051eb1abcf10076295e815102942cc58f9d5e3b4560e46e53c21e8ff6f3af7b1"
+
+[[package]]
+name = "walkdir"
+version = "2.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b"
+dependencies = [
+ "same-file",
+ "winapi-util",
+]
+
+[[package]]
+name = "wasm-bindgen"
+version = "0.2.114"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6532f9a5c1ece3798cb1c2cfdba640b9b3ba884f5db45973a6f442510a87d38e"
+dependencies = [
+ "cfg-if",
+ "once_cell",
+ "rustversion",
+ "wasm-bindgen-macro",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-macro"
+version = "0.2.114"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "18a2d50fcf105fb33bb15f00e7a77b772945a2ee45dcf454961fd843e74c18e6"
+dependencies = [
+ "quote",
+ "wasm-bindgen-macro-support",
+]
+
+[[package]]
+name = "wasm-bindgen-macro-support"
+version = "0.2.114"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "03ce4caeaac547cdf713d280eda22a730824dd11e6b8c3ca9e42247b25c631e3"
+dependencies = [
+ "bumpalo",
+ "proc-macro2",
+ "quote",
+ "syn",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-shared"
+version = "0.2.114"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "75a326b8c223ee17883a4251907455a2431acc2791c98c26279376490c378c16"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "web-sys"
+version = "0.3.91"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "854ba17bb104abfb26ba36da9729addc7ce7f06f5c0f90f3c391f8461cca21f9"
+dependencies = [
+ "js-sys",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "winapi-util"
+version = "0.1.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22"
+dependencies = [
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "windows-link"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
+
+[[package]]
+name = "windows-sys"
+version = "0.59.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
+dependencies = [
+ "windows-targets",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.61.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc"
+dependencies = [
+ "windows-link",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
+dependencies = [
+ "windows_aarch64_gnullvm",
+ "windows_aarch64_msvc",
+ "windows_i686_gnu",
+ "windows_i686_gnullvm",
+ "windows_i686_msvc",
+ "windows_x86_64_gnu",
+ "windows_x86_64_gnullvm",
+ "windows_x86_64_msvc",
+]
+
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
+
+[[package]]
+name = "windows_i686_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
+
+[[package]]
+name = "zmij"
+version = "1.0.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa"
diff --git a/Cargo.toml b/Cargo.toml
index 4aec777..00b0dbd 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -25,6 +25,13 @@ p3-merkle-tree = { git = "https://github.com/Plonky3/Plonky3", rev = "e52636ec09
 p3-symmetric = { git = "https://github.com/Plonky3/Plonky3", rev = "e52636ec09663fd7d3bd4eaabb21dba8698f129a" }
 p3-util = { git = "https://github.com/Plonky3/Plonky3", rev = "e52636ec09663fd7d3bd4eaabb21dba8698f129a" }
 
+[dev-dependencies]
+criterion = "0.5"
+
+[[bench]]
+name = "multi_stark"
+harness = false
+
 [features]
 parallel = ["p3-maybe-rayon/parallel"]
 
diff --git a/README.md b/README.md
index d29716e..0021bd7 100644
--- a/README.md
+++ b/README.md
@@ -1,10 +1,61 @@
-# multi-stark
+# multi-STARK
 
-A multi-STARK ZK protocol with support for
-* Traces with independent heights
-* Lookup arguments with arguments of arbitrary length
-* Preprocessed tables
+A multi-circuit STARK proving system built on [Plonky3](https://github.com/Plonky3/Plonky3).
+
+Prove and verify multiple AIR circuits in a single proof, with cross-circuit
+lookup arguments for shared state.
+
+## Features
+
+- **Multi-circuit proofs** — bundle multiple AIR circuits into one proof with
+  independent trace heights
+- **Lookup arguments** — push/pull interactions of arbitrary length between
+  circuits, enforced via accumulator-based multiset checks
+- **Preprocessed tables** — commit to fixed tables once, reuse across proofs
+- **Serialization** — `Proof::to_bytes` / `Proof::from_bytes` via bincode
+- **Parallel proving** — opt-in via the `parallel` feature flag
+
+## Cryptographic setup
+
+| Component | Choice |
+|-----------|--------|
+| Field | Goldilocks (p = 2^64 - 2^32 + 1) |
+| Extension | Degree-2 binomial extension (~2^128 elements) |
+| Hash | Keccak-256 |
+| PCS | FRI over Merkle trees |
+
+Security level is configurable via `FriParameters`. With `log_blowup = 1` and
+`num_queries = 100`, FRI provides ~2^(-100) soundness error. See the
+[verifier module docs](src/verifier.rs) for the full soundness argument.
+
+## Examples
+
+**Minimal prove-and-verify** (no lookups):
+```sh
+cargo run --example simple_proof --release
+```
+
+**Preprocessed trace with lookups** (byte range-check table):
+```sh
+cargo run --example preprocessed_proof --release
+```
+
+**Multi-circuit with lookup arguments**:
+```sh
+cargo run --example lookup_proof --release
+```
+
+## Benchmarks
+
+```sh
+cargo bench --bench multi_stark --features parallel
+```
+
+Benchmarks cover `prove` and `verify` at 2^12, 2^13, and 2^14 trace rows
+using a U32 addition circuit with lookups and a preprocessed byte table.
+Use `--features parallel` for representative numbers. Native SIMD instructions
+are enabled by default via `.cargo/config.toml`.
 
 ## License
 
-MIT or Apache 2.0
+MIT or Apache-2.0
diff --git a/benches/multi_stark.rs b/benches/multi_stark.rs
new file mode 100644
index 0000000..3feff51
--- /dev/null
+++ b/benches/multi_stark.rs
@@ -0,0 +1,284 @@
+//! Criterion benchmarks for the multi-circuit STARK prover and verifier.
+//!
+//! Uses a U32 addition circuit with:
+//! - A **ByteTable** backed by a preprocessed byte table (256 rows) and lookup pulls
+//! - A **U32Add** that decomposes additions into bytes via lookup pushes
+//!
+//! This exercises lookups, preprocessed traces, and regular constraints –
+//! giving a more representative cost profile than plain arithmetic AIRs.
+//!
+//! Run with:
+//! ```sh
+//! cargo bench --bench multi_stark --features parallel
+//! ```
+
+use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main};
+use multi_stark::builder::symbolic::{SymbolicExpression, preprocessed_var, var};
+use multi_stark::lookup::{Lookup, LookupAir};
+use multi_stark::system::{System, SystemWitness};
+use multi_stark::types::{CommitmentParameters, FriParameters, Val};
+use multi_stark::{
+    p3_air::{Air, AirBuilder, BaseAir, WindowAccess},
+    p3_field::{Field, PrimeCharacteristicRing},
+    p3_matrix::dense::RowMajorMatrix,
+};
+
+type SymbExpr = SymbolicExpression<Val>;
+
+// ---------------------------------------------------------------------------
+// Circuits
+// ---------------------------------------------------------------------------
+
+enum U32CS {
+    /// Preprocessed byte table (256 rows). Main trace column: multiplicity.
+    ByteTable,
+    /// U32 addition: 4 bytes x + 4 bytes y + 4 bytes z + carry + multiplicity = 14 cols.
+    U32Add,
+}
+
+impl<F: Field> BaseAir<F> for U32CS {
+    fn width(&self) -> usize {
+        match self {
+            Self::ByteTable => 1,
+            Self::U32Add => 14,
+        }
+    }
+
+    fn preprocessed_trace(&self) -> Option<RowMajorMatrix<F>> {
+        match self {
+            Self::ByteTable => Some(RowMajorMatrix::new((0..256).map(F::from_u32).collect(), 1)),
+            Self::U32Add => None,
+        }
+    }
+}
+
+impl<AB> Air<AB> for U32CS
+where
+    AB: AirBuilder,
+    AB::Var: Copy,
+    AB::F: Field,
+{
+    fn eval(&self, builder: &mut AB) {
+        match self {
+            Self::ByteTable => {}
+            Self::U32Add => {
+                let main = builder.main();
+                let local = main.current_slice();
+                let x = &local[0..4];
+                let y = &local[4..8];
+                let z = &local[8..12];
+                let carry = local[12];
+                builder.assert_bool(carry);
+
+                let expr1 = x[0]
+                    + x[1] * AB::Expr::from_u32(256)
+                    + x[2] * AB::Expr::from_u32(256 * 256)
+                    + x[3] * AB::Expr::from_u32(256 * 256 * 256)
+                    + y[0]
+                    + y[1] * AB::Expr::from_u32(256)
+                    + y[2] * AB::Expr::from_u32(256 * 256)
+                    + y[3] * AB::Expr::from_u32(256 * 256 * 256);
+                let expr2 = z[0]
+                    + z[1] * AB::Expr::from_u32(256)
+                    + z[2] * AB::Expr::from_u32(256 * 256)
+                    + z[3] * AB::Expr::from_u32(256 * 256 * 256)
+                    + carry * AB::Expr::from_u64(256 * 256 * 256 * 256);
+                builder.assert_eq(expr1, expr2);
+            }
+        }
+    }
+}
+
+impl U32CS {
+    fn lookups(&self) -> Vec<Lookup<SymbExpr>> {
+        let byte_index = SymbExpr::from_u8(0);
+        let u32_index = SymbExpr::from_u8(1);
+        match self {
+            Self::ByteTable => vec![Lookup::pull(var(0), vec![byte_index, preprocessed_var(0)])],
+            Self::U32Add => {
+                let mut lookups = vec![Lookup::pull(
+                    var(13),
+                    vec![
+                        u32_index,
+                        var(0)
+                            + var(1) * SymbExpr::from_u32(256)
+                            + var(2) * SymbExpr::from_u32(256 * 256)
+                            + var(3) * SymbExpr::from_u32(256 * 256 * 256),
+                        var(4)
+                            + var(5) * SymbExpr::from_u32(256)
+                            + var(6) * SymbExpr::from_u32(256 * 256)
+                            + var(7) * SymbExpr::from_u32(256 * 256 * 256),
+                        var(8)
+                            + var(9) * SymbExpr::from_u32(256)
+                            + var(10) * SymbExpr::from_u32(256 * 256)
+                            + var(11) * SymbExpr::from_u32(256 * 256 * 256),
+                    ],
+                )];
+                lookups.extend(
+                    (0..12).map(|i| Lookup::push(SymbExpr::ONE, vec![byte_index.clone(), var(i)])),
+                );
+                lookups
+            }
+        }
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Witness generation
+// ---------------------------------------------------------------------------
+
+fn build_witness(num_adds: usize, system: &System<U32CS>) -> SystemWitness {
+    let byte_width = 1;
+    let add_width = 14;
+    let add_height = num_adds.next_power_of_two();
+
+    let mut byte_trace = RowMajorMatrix::new(vec![Val::ZERO; byte_width * 256], byte_width);
+    let mut add_trace = RowMajorMatrix::new(vec![Val::ZERO; add_width * add_height], add_width);
+
+    // Fill with pseudo-random additions (deterministic for reproducibility).
+    let mut a: u32 = 0xdead_beef;
+    let mut b: u32 = 0xcafe_babe;
+    for row_index in 0..num_adds {
+        // Simple xorshift-style PRNG
+        a ^= a << 13;
+        a ^= a >> 17;
+        a ^= a << 5;
+        b ^= b << 13;
+        b ^= b >> 17;
+        b ^= b << 5;
+
+        let x = a;
+        let y = b;
+        let (z, carry) = x.overflowing_add(y);
+        let x_bytes = x.to_le_bytes();
+        let y_bytes = y.to_le_bytes();
+        let z_bytes = z.to_le_bytes();
+
+        let row = add_trace.row_mut(row_index);
+        for (col, &val) in row[0..4].iter_mut().zip(&x_bytes) {
+            *col = Val::from_u8(val);
+        }
+        for (col, &val) in row[4..8].iter_mut().zip(&y_bytes) {
+            *col = Val::from_u8(val);
+        }
+        for (col, &val) in row[8..12].iter_mut().zip(&z_bytes) {
+            *col = Val::from_u8(val);
+        }
+        row[12] = Val::from_u8(u8::from(carry));
+        row[13] = Val::ONE;
+
+        for &byte in x_bytes.iter().chain(y_bytes.iter()).chain(z_bytes.iter()) {
+            byte_trace.row_mut(byte as usize)[0] += Val::ONE;
+        }
+    }
+
+    SystemWitness::from_stage_1(vec![byte_trace, add_trace], system)
+}
+
+/// Build claims for the first `num_adds` additions (same PRNG seed as `build_witness`).
+fn build_claims(num_adds: usize) -> Vec<[Val; 4]> {
+    let f = Val::from_u32;
+    let mut a: u32 = 0xdead_beef;
+    let mut b: u32 = 0xcafe_babe;
+    let mut claims = Vec::with_capacity(num_adds);
+    for _ in 0..num_adds {
+        a ^= a << 13;
+        a ^= a >> 17;
+        a ^= a << 5;
+        b ^= b << 13;
+        b ^= b >> 17;
+        b ^= b << 5;
+        let x = a;
+        let y = b;
+        let (z, _carry) = x.overflowing_add(y);
+        claims.push([f(1), f(x), f(y), f(z)]);
+    }
+    claims
+}
+
+// ---------------------------------------------------------------------------
+// Benchmarks
+// ---------------------------------------------------------------------------
+
+fn bench_prove(c: &mut Criterion) {
+    let commitment_parameters = CommitmentParameters {
+        log_blowup: 1,
+        cap_height: 0,
+    };
+    let byte_table = LookupAir::new(U32CS::ByteTable, U32CS::ByteTable.lookups());
+    let u32_add = LookupAir::new(U32CS::U32Add, U32CS::U32Add.lookups());
+    let (system, key) = System::new(commitment_parameters, [byte_table, u32_add]);
+    let fri_parameters = FriParameters {
+        log_final_poly_len: 0,
+        max_log_arity: 1,
+        num_queries: 100,
+        commit_proof_of_work_bits: 10,
+        query_proof_of_work_bits: 10,
+    };
+
+    let mut group = c.benchmark_group("prove");
+    group.sample_size(10);
+    group.measurement_time(std::time::Duration::from_secs(20));
+
+    for log_height in [12, 13, 14] {
+        let num_adds = 1 << log_height;
+        let claims = build_claims(num_adds);
+        let claim_refs: Vec<&[Val]> = claims.iter().map(|c| c.as_slice()).collect();
+        group.bench_function(
+            BenchmarkId::new("u32_add", format!("2^{log_height}")),
+            |b| {
+                b.iter_batched(
+                    || build_witness(num_adds, &system),
+                    |witness| {
+                        system.prove_multiple_claims(fri_parameters, &key, &claim_refs, witness)
+                    },
+                    criterion::BatchSize::LargeInput,
+                );
+            },
+        );
+    }
+    group.finish();
+}
+
+fn bench_verify(c: &mut Criterion) {
+    let commitment_parameters = CommitmentParameters {
+        log_blowup: 1,
+        cap_height: 0,
+    };
+    let byte_table = LookupAir::new(U32CS::ByteTable, U32CS::ByteTable.lookups());
+    let u32_add = LookupAir::new(U32CS::U32Add, U32CS::U32Add.lookups());
+    let (system, key) = System::new(commitment_parameters, [byte_table, u32_add]);
+    let fri_parameters = FriParameters {
+        log_final_poly_len: 0,
+        max_log_arity: 1,
+        num_queries: 100,
+        commit_proof_of_work_bits: 10,
+        query_proof_of_work_bits: 10,
+    };
+
+    let mut group = c.benchmark_group("verify");
+    group.sample_size(10);
+    group.measurement_time(std::time::Duration::from_secs(5));
+
+    for log_height in [12, 13, 14] {
+        let num_adds = 1 << log_height;
+        let claims = build_claims(num_adds);
+        let claim_refs: Vec<&[Val]> = claims.iter().map(|c| c.as_slice()).collect();
+        let witness = build_witness(num_adds, &system);
+        let proof = system.prove_multiple_claims(fri_parameters, &key, &claim_refs, witness);
+        group.bench_function(
+            BenchmarkId::new("u32_add", format!("2^{log_height}")),
+            |b| {
+                b.iter(|| {
+                    system
+                        .verify_multiple_claims(fri_parameters, &claim_refs, &proof)
+                        .unwrap()
+                });
+            },
+        );
+    }
+    group.finish();
+}
+
+criterion_group!(benches, bench_prove, bench_verify);
+criterion_main!(benches);
diff --git a/examples/lookup_proof.rs b/examples/lookup_proof.rs
new file mode 100644
index 0000000..9e87168
--- /dev/null
+++ b/examples/lookup_proof.rs
@@ -0,0 +1,159 @@
+//! Multi-circuit example with lookup arguments.
+//!
+//! Defines two AIR circuits (Even and Odd) that compute whether an input
+//! number is even or odd using a recursive lookup argument:
+//!   - Even(n) pulls a lookup claim and, if n > 0, pushes to Odd(n-1).
+//!   - Odd(n) pulls a lookup claim and, if n > 0, pushes to Even(n-1).
+//!
+//! The claim encodes the initial query: is_even(4) == 1.
+//!
+//! Run with:
+//! ```sh
+//! cargo run --example lookup_proof --release
+//! ```
+
+use multi_stark::builder::symbolic::{SymbolicExpression, var};
+use multi_stark::lookup::{Lookup, LookupAir};
+use multi_stark::system::{System, SystemWitness};
+use multi_stark::types::{CommitmentParameters, FriParameters, Val};
+use multi_stark::{
+    p3_air::{Air, AirBuilder, BaseAir, WindowAccess},
+    p3_field::{Field, PrimeCharacteristicRing},
+    p3_matrix::dense::RowMajorMatrix,
+};
+
+/// Circuit for the Even/Odd parity check.
+/// Width: 6 columns [multiplicity, input, input_inverse, input_is_zero, input_not_zero, recursion_output]
+enum ParityAir {
+    Even,
+    Odd,
+}
+
+impl ParityAir {
+    fn lookups(&self) -> Vec<Lookup<SymbolicExpression<Val>>> {
+        let multiplicity = var(0);
+        let input = var(1);
+        let input_is_zero = var(3);
+        let input_not_zero = var(4);
+        let recursion_output = var(5);
+        let even_index = Val::ZERO.into();
+        let odd_index = Val::ONE.into();
+        let one: SymbolicExpression<_> = Val::ONE.into();
+        match self {
+            Self::Even => vec![
+                Lookup::pull(
+                    multiplicity,
+                    vec![
+                        even_index,
+                        input.clone(),
+                        input_not_zero.clone() * recursion_output.clone() + input_is_zero,
+                    ],
+                ),
+                Lookup::push(
+                    input_not_zero,
+                    vec![odd_index, input - one, recursion_output],
+                ),
+            ],
+            Self::Odd => vec![
+                Lookup::pull(
+                    multiplicity,
+                    vec![
+                        odd_index,
+                        input.clone(),
+                        input_not_zero.clone() * recursion_output.clone(),
+                    ],
+                ),
+                Lookup::push(
+                    input_not_zero,
+                    vec![even_index, input - one, recursion_output],
+                ),
+            ],
+        }
+    }
+}
+
+impl<F> BaseAir<F> for ParityAir {
+    fn width(&self) -> usize {
+        6
+    }
+}
+
+impl<AB> Air<AB> for ParityAir
+where
+    AB: AirBuilder,
+    AB::Var: Copy,
+{
+    fn eval(&self, builder: &mut AB) {
+        let main = builder.main();
+        let local = main.current_slice();
+        let multiplicity = local[0];
+        let input = local[1];
+        let input_inverse = local[2];
+        let input_is_zero = local[3];
+        let input_not_zero = local[4];
+        builder.assert_bools([input_is_zero, input_not_zero]);
+        builder
+            .when(multiplicity)
+            .assert_one(input_is_zero + input_not_zero);
+        builder.when(input_is_zero).assert_zero(input);
+        builder
+            .when(input_not_zero)
+            .assert_one(input * input_inverse);
+    }
+}
+
+fn main() {
+    let commitment_parameters = CommitmentParameters {
+        log_blowup: 1,
+        cap_height: 0,
+    };
+
+    let even = LookupAir::new(ParityAir::Even, ParityAir::Even.lookups());
+    let odd = LookupAir::new(ParityAir::Odd, ParityAir::Odd.lookups());
+    let (system, key) = System::new(commitment_parameters, [even, odd]);
+
+    let f = Val::from_u32;
+    #[rustfmt::skip]
+    let witness = SystemWitness::from_stage_1(
+        vec![
+            // Even circuit trace
+            RowMajorMatrix::new(
+                vec![
+                    f(1), f(4), f(4).inverse(), f(0), f(1), f(1),
+                    f(1), f(2), f(2).inverse(), f(0), f(1), f(1),
+                    f(1), f(0), f(0),            f(1), f(0), f(0),
+                    f(0), f(0), f(0),            f(0), f(0), f(0),
+                ],
+                6,
+            ),
+            // Odd circuit trace
+            RowMajorMatrix::new(
+                vec![
+                    f(1), f(3), f(3).inverse(), f(0), f(1), f(1),
+                    f(1), f(1), f(1).inverse(), f(0), f(1), f(1),
+                    f(0), f(0), f(0),            f(0), f(0), f(0),
+                    f(0), f(0), f(0),            f(0), f(0), f(0),
+                ],
+                6,
+            ),
+        ],
+        &system,
+    );
+
+    // Claim: [even_index=0, input=4, expected_output=1] — is_even(4) should be 1
+    let claim = &[f(0), f(4), f(1)];
+    let fri_parameters = FriParameters {
+        log_final_poly_len: 0,
+        max_log_arity: 1,
+        num_queries: 64,
+        commit_proof_of_work_bits: 0,
+        query_proof_of_work_bits: 0,
+    };
+
+    let proof = system.prove(fri_parameters, &key, claim, witness);
+    system.verify(fri_parameters, claim, &proof).unwrap();
+    println!("Lookup proof verified successfully!");
+
+    let bytes = proof.to_bytes().expect("serialization failed");
+    println!("Proof size: {} bytes", bytes.len());
+}
diff --git a/examples/preprocessed_proof.rs b/examples/preprocessed_proof.rs
new file mode 100644
index 0000000..c3ff6f2
--- /dev/null
+++ b/examples/preprocessed_proof.rs
@@ -0,0 +1,137 @@
+//! Example with a preprocessed trace and lookups.
+//!
+//! Defines two circuits:
+//! - **RangeTable**: a read-only byte table (0..256) committed as a preprocessed
+//!   trace. Each row pulls a lookup weighted by its multiplicity column.
+//! - **Squares**: computes `x * x` and range-checks both bytes of the result
+//!   via lookup pushes into the RangeTable.
+//!
+//! Run with:
+//! ```sh
+//! cargo run --example preprocessed_proof --release
+//! ```
+
+use multi_stark::builder::symbolic::{SymbolicExpression, preprocessed_var, var};
+use multi_stark::lookup::{Lookup, LookupAir};
+use multi_stark::system::{System, SystemWitness};
+use multi_stark::types::{CommitmentParameters, FriParameters, Val};
+use multi_stark::{
+    p3_air::{Air, AirBuilder, BaseAir, WindowAccess},
+    p3_field::{Field, PrimeCharacteristicRing},
+    p3_matrix::dense::RowMajorMatrix,
+};
+
+type SymbExpr = SymbolicExpression<Val>;
+
+/// Two-circuit system: a preprocessed range table and a squaring circuit.
+enum SquaresCS {
+    /// Preprocessed column: bytes 0..256.  Main column: multiplicity.
+    RangeTable,
+    /// Columns: [x, x², low_byte, high_byte, multiplicity].
+    Squares,
+}
+
+impl<F: Field> BaseAir<F> for SquaresCS {
+    fn width(&self) -> usize {
+        match self {
+            Self::RangeTable => 1,
+            Self::Squares => 5,
+        }
+    }
+
+    fn preprocessed_trace(&self) -> Option<RowMajorMatrix<F>> {
+        match self {
+            Self::RangeTable => Some(RowMajorMatrix::new((0..256).map(F::from_u32).collect(), 1)),
+            Self::Squares => None,
+        }
+    }
+}
+
+impl<AB> Air<AB> for SquaresCS
+where
+    AB: AirBuilder,
+    AB::Var: Copy,
+    AB::F: Field,
+{
+    fn eval(&self, builder: &mut AB) {
+        match self {
+            Self::RangeTable => {} // constrained entirely via lookups
+            Self::Squares => {
+                let main = builder.main();
+                let local = main.current_slice();
+                let x = local[0];
+                let x_squared = local[1];
+                let low = local[2];
+                let high = local[3];
+                // x² == x * x
+                builder.assert_eq(x_squared, x * x);
+                // x² == low + 256 * high  (byte decomposition)
+                builder.assert_eq(x_squared, low + high * AB::Expr::from_u32(256));
+            }
+        }
+    }
+}
+
+impl SquaresCS {
+    fn lookups(&self) -> Vec<Lookup<SymbExpr>> {
+        match self {
+            // RangeTable pulls: multiplicity × (preprocessed byte value)
+            Self::RangeTable => vec![Lookup::pull(var(0), vec![preprocessed_var(0)])],
+            // Squares pushes each byte into the range table for validation
+            Self::Squares => vec![
+                Lookup::push(var(4), vec![var(2)]), // low byte
+                Lookup::push(var(4), vec![var(3)]), // high byte
+            ],
+        }
+    }
+}
+
+fn main() {
+    let commitment_parameters = CommitmentParameters {
+        log_blowup: 1,
+        cap_height: 0,
+    };
+
+    let range_table = LookupAir::new(SquaresCS::RangeTable, SquaresCS::RangeTable.lookups());
+    let squares = LookupAir::new(SquaresCS::Squares, SquaresCS::Squares.lookups());
+    let (system, key) = System::new(commitment_parameters, [range_table, squares]);
+
+    // Build traces: square every value 0..16
+    let n = 16u32;
+    let f = Val::from_u32;
+
+    // Range-table main trace: multiplicity per byte value (256 rows × 1 col)
+    let mut range_mults = vec![Val::ZERO; 256];
+    // Squares trace: 16 rows × 5 cols
+    let mut sq_values = Vec::with_capacity(5 * n as usize);
+    for x in 0..n {
+        let sq = x * x;
+        let low = sq & 0xFF;
+        let high = (sq >> 8) & 0xFF;
+        sq_values.extend([f(x), f(sq), f(low), f(high), Val::ONE]);
+        range_mults[low as usize] += Val::ONE;
+        range_mults[high as usize] += Val::ONE;
+    }
+
+    let range_trace = RowMajorMatrix::new(range_mults, 1);
+    let squares_trace = RowMajorMatrix::new(sq_values, 5);
+    let witness = SystemWitness::from_stage_1(vec![range_trace, squares_trace], &system);
+
+    let fri_parameters = FriParameters {
+        log_final_poly_len: 0,
+        max_log_arity: 1,
+        num_queries: 64,
+        commit_proof_of_work_bits: 0,
+        query_proof_of_work_bits: 0,
+    };
+
+    let no_claims: &[&[Val]] = &[];
+    let proof = system.prove_multiple_claims(fri_parameters, &key, no_claims, witness);
+    system
+        .verify_multiple_claims(fri_parameters, no_claims, &proof)
+        .unwrap();
+    println!("Preprocessed proof verified successfully!");
+
+    let bytes = proof.to_bytes().expect("serialization failed");
+    println!("Proof size: {} bytes", bytes.len());
+}
diff --git a/examples/simple_proof.rs b/examples/simple_proof.rs
new file mode 100644
index 0000000..12d4db1
--- /dev/null
+++ b/examples/simple_proof.rs
@@ -0,0 +1,96 @@
+//! Minimal prove-and-verify example (no lookups).
+//!
+//! Defines a simple AIR that checks Pythagorean triples: `a² + b² == c²`.
+//! Wraps it in `LookupAir` with no lookups, creates a small trace, and
+//! proves + verifies it.
+//!
+//! Run with:
+//! ```sh
+//! cargo run --example simple_proof --release
+//! ```
+
+use multi_stark::lookup::LookupAir;
+use multi_stark::system::{System, SystemWitness};
+use multi_stark::types::{CommitmentParameters, FriParameters, Val};
+use multi_stark::{
+    p3_air::{Air, AirBuilder, BaseAir, WindowAccess},
+    p3_field::PrimeCharacteristicRing,
+    p3_matrix::dense::RowMajorMatrix,
+};
+
+/// A simple AIR checking Pythagorean triples: a² + b² == c².
+/// Width is 3 columns: [a, b, c].
+struct PythagoreanAir;
+
+impl<F> BaseAir<F> for PythagoreanAir {
+    fn width(&self) -> usize {
+        3
+    }
+}
+
+impl<AB> Air<AB> for PythagoreanAir
+where
+    AB: AirBuilder,
+    AB::Var: Copy,
+{
+    fn eval(&self, builder: &mut AB) {
+        let main = builder.main();
+        let local = main.current_slice();
+        // Constraint: a² + b² == c²
+        let lhs = local[0] * local[0] + local[1] * local[1];
+        let rhs = local[2] * local[2];
+        builder.assert_eq(lhs, rhs);
+    }
+}
+
+fn main() {
+    let commitment_parameters = CommitmentParameters {
+        log_blowup: 1,
+        cap_height: 0,
+    };
+
+    // Wrap the AIR with empty lookups
+    let air = LookupAir::new(PythagoreanAir, vec![]);
+    let (system, key) = System::new(commitment_parameters, [air]);
+
+    // Build a trace with 4 rows of Pythagorean triples
+    let f = Val::from_u32;
+    let trace = RowMajorMatrix::new(
+        vec![
+            f(3),
+            f(4),
+            f(5),
+            f(5),
+            f(12),
+            f(13),
+            f(8),
+            f(15),
+            f(17),
+            f(7),
+            f(24),
+            f(25),
+        ],
+        3,
+    );
+    let witness = SystemWitness::from_stage_1(vec![trace], &system);
+
+    let fri_parameters = FriParameters {
+        log_final_poly_len: 0,
+        max_log_arity: 1,
+        num_queries: 64,
+        commit_proof_of_work_bits: 0,
+        query_proof_of_work_bits: 0,
+    };
+
+    // Prove
+    let no_claims: &[Val] = &[];
+    let proof = system.prove(fri_parameters, &key, no_claims, witness);
+
+    // Verify
+    system.verify(fri_parameters, no_claims, &proof).unwrap();
+    println!("Proof verified successfully!");
+
+    // Show proof size
+    let bytes = proof.to_bytes().expect("serialization failed");
+    println!("Proof size: {} bytes", bytes.len());
+}
diff --git a/src/chips/mod.rs b/src/chips/mod.rs
deleted file mode 100644
index 16e7cf5..0000000
--- a/src/chips/mod.rs
+++ /dev/null
@@ -1,385 +0,0 @@
-#![allow(dead_code)]
-#![allow(clippy::manual_memcpy)]
-#![allow(clippy::cast_possible_truncation)]
-#![allow(clippy::explicit_counter_loop)]
-#![allow(clippy::cast_lossless)]
-
-mod blake3_circuit;
-mod byte_operations;
-mod u32_add;
-
-use crate::builder::symbolic::SymbolicExpression;
-use crate::types::Val;
-
-type SymbExpr = SymbolicExpression<Val>; // used in chips testing
-
-struct CompressionInfo {
-    // input
-    cv: [u32; 8],
-    block_words: [u32; 16],
-    counter_low: u32,
-    counter_high: u32,
-    block_len: u32,
-    flags: u32,
-
-    // output
-    output: [u32; 16],
-}
-
-// one-line Blake3 hasher (that additionally produces compressions IO required for claims construction) tested to be compatible
-// with the reference implementation: https://github.com/BLAKE3-team/BLAKE3/blob/master/reference_impl/reference_impl.rs
-fn blake3_new_update_finalize(input: &[u8]) -> (Vec<CompressionInfo>, [u8; 32]) {
-    const IV: [u32; 8] = [
-        0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 0x510E527F, 0x9B05688C, 0x1F83D9AB,
-        0x5BE0CD19,
-    ];
-    const MSG_PERMUTATION: [usize; 16] = [2, 6, 3, 10, 7, 0, 4, 13, 1, 11, 12, 5, 9, 14, 15, 8];
-    const CHUNK_LEN: usize = 1024;
-    const BLOCK_LEN: usize = 64;
-
-    const CHUNK_START: u32 = 1 << 0;
-    const CHUNK_END: u32 = 1 << 1;
-    const PARENT: u32 = 1 << 2;
-    const OUT_LEN: usize = 32;
-    const ROOT: u32 = 1 << 3;
-
-    fn compress(
-        chaining_value: &[u32; 8],
-        block_words: &[u32; 16],
-        counter: u64,
-        block_len: u32,
-        flags: u32,
-    ) -> [u32; 16] {
-        let counter_low = counter as u32;
-        let counter_high = (counter >> 32) as u32;
-
-        #[rustfmt::skip]
-        let mut state = [
-            chaining_value[0], chaining_value[1], chaining_value[2], chaining_value[3],
-            chaining_value[4], chaining_value[5], chaining_value[6], chaining_value[7],
-            IV[0],             IV[1],             IV[2],             IV[3],
-            counter_low,       counter_high,      block_len,         flags,
-            block_words[0], block_words[1], block_words[2], block_words[3],
-            block_words[4], block_words[5], block_words[6], block_words[7],
-            block_words[8], block_words[9], block_words[10], block_words[11],
-            block_words[12], block_words[13], block_words[14], block_words[15],
-        ];
-
-        let a = [0, 1, 2, 3, 0, 1, 2, 3];
-        let b = [4, 5, 6, 7, 5, 6, 7, 4];
-        let c = [8, 9, 10, 11, 10, 11, 8, 9];
-        let d = [12, 13, 14, 15, 15, 12, 13, 14];
-        let mx = [16, 18, 20, 22, 24, 26, 28, 30];
-        let my = [17, 19, 21, 23, 25, 27, 29, 31];
-
-        // we have 7 rounds in total
-        for round_idx in 0..7 {
-            for j in 0..8 {
-                let a_in = state[a[j]];
-                let b_in = state[b[j]];
-                let c_in = state[c[j]];
-                let d_in = state[d[j]];
-                let mx_in = state[mx[j]];
-                let my_in = state[my[j]];
-
-                let a_0 = a_in.wrapping_add(b_in).wrapping_add(mx_in);
-                let d_0 = (d_in ^ a_0).rotate_right(16);
-                let c_0 = c_in.wrapping_add(d_0);
-                let b_0 = (b_in ^ c_0).rotate_right(12);
-
-                let a_1 = a_0.wrapping_add(b_0).wrapping_add(my_in);
-                let d_1 = (d_0 ^ a_1).rotate_right(8);
-                let c_1 = c_0.wrapping_add(d_1);
-                let b_1 = (b_0 ^ c_1).rotate_right(7);
-
-                state[a[j]] = a_1;
-                state[b[j]] = b_1;
-                state[c[j]] = c_1;
-                state[d[j]] = d_1;
-            }
-
-            // execute permutation for the 6 first rounds
-            if round_idx < 6 {
-                let mut permuted = [0; 16];
-                for i in 0..16 {
-                    permuted[i] = state[16 + MSG_PERMUTATION[i]];
-                }
-                for i in 0..16 {
-                    state[i + 16] = permuted[i];
-                }
-            }
-        }
-
-        for i in 0..8 {
-            state[i] ^= state[i + 8];
-            state[i + 8] ^= chaining_value[i];
-        }
-
-        let state_out: [u32; 16] = std::array::from_fn(|i| state[i]);
-        state_out
-    }
-
-    fn words_from_little_endian_bytes(bytes: &[u8], words: &mut [u32]) {
-        debug_assert_eq!(bytes.len(), 4 * words.len());
-        for (four_bytes, word) in bytes.chunks_exact(4).zip(words) {
-            *word = u32::from_le_bytes(four_bytes.try_into().unwrap());
-        }
-    }
-
-    fn first_8_words(compression_output: [u32; 16]) -> [u32; 8] {
-        compression_output[0..8].try_into().unwrap()
-    }
-
-    fn start_flag(blocks_compressed: u8) -> u32 {
-        if blocks_compressed == 0 {
-            CHUNK_START
-        } else {
-            0
-        }
-    }
-
-    let mut c_info = vec![];
-
-    let mut input = input;
-
-    let mut output = [0u8; 32];
-
-    /* New */
-
-    // Hasher
-    let hasher_key_words = IV;
-    let mut hasher_cv_stack = [[0u32; 8]; 54];
-    let mut hasher_cv_stack_len = 0u32;
-    let hasher_flags = 0u32;
-
-    // ChunkState
-    let mut chunk_state_chaining_value = hasher_key_words;
-    let mut chunk_state_chunk_counter = 0u64;
-    let mut chunk_state_block = [0u8; BLOCK_LEN];
-    let mut chunk_state_block_len = 0u8;
-    let mut chunk_state_blocks_compressed = 0u8;
-    let mut chunk_state_flags = hasher_flags;
-
-    /* Update */
-    while !input.is_empty() {
-        let chunk_state_len =
-            BLOCK_LEN * chunk_state_blocks_compressed as usize + chunk_state_block_len as usize;
-        if CHUNK_LEN == chunk_state_len {
-            // output
-            let mut block_words = [0; 16];
-            words_from_little_endian_bytes(&chunk_state_block, &mut block_words);
-            let chaining_value = chunk_state_chaining_value;
-            let counter = chunk_state_chunk_counter;
-            let block_len = chunk_state_block_len;
-            let flags = chunk_state_flags | start_flag(chunk_state_blocks_compressed) | CHUNK_END;
-
-            // chaining_value
-            let cv = compress(
-                &chaining_value,
-                &block_words,
-                counter,
-                block_len as u32,
-                flags,
-            );
-            c_info.push(CompressionInfo {
-                cv: chaining_value,
-                block_words,
-                counter_low: counter as u32,
-                counter_high: (counter >> 32) as u32,
-                block_len: block_len as u32,
-                flags,
-                output: cv,
-            });
-
-            let chaining_value = first_8_words(cv);
-
-            let chunk_cv = chaining_value;
-            let total_chunks = chunk_state_chunk_counter + 1;
-
-            // add_chunk_chaining_value
-            let mut new_cv = chunk_cv;
-            let mut total_chunks_inner = total_chunks;
-            while total_chunks_inner & 1 == 0 {
-                // pop_stack
-                hasher_cv_stack_len -= 1;
-                let pop_stack = hasher_cv_stack[hasher_cv_stack_len as usize];
-                let key_words = hasher_key_words;
-
-                // parent_cv
-                let left_child_cv = pop_stack;
-                let right_child_cv = new_cv;
-
-                // parent_output
-                let mut block_words = [0u32; 16];
-                block_words[..8].copy_from_slice(&left_child_cv);
-                block_words[8..].copy_from_slice(&right_child_cv);
-
-                let input_chaining_value = key_words;
-                let counter = 0u64;
-                let block_len = BLOCK_LEN as u32;
-                let flags = PARENT | hasher_flags;
-
-                // chaining_value
-                let cv = compress(
-                    &input_chaining_value,
-                    &block_words,
-                    counter,
-                    block_len,
-                    flags,
-                );
-                c_info.push(CompressionInfo {
-                    cv: input_chaining_value,
-                    block_words,
-                    counter_low: counter as u32,
-                    counter_high: (counter >> 32) as u32,
-                    block_len,
-                    flags,
-                    output: cv,
-                });
-
-                new_cv = first_8_words(cv);
-
-                total_chunks_inner >>= 1;
-            }
-
-            // push_stack
-            let cv = new_cv;
-            hasher_cv_stack[hasher_cv_stack_len as usize] = cv;
-            hasher_cv_stack_len += 1;
-
-            // ChunkState::new(self.key_words, total_chunks, self.flags);
-            chunk_state_chaining_value = hasher_key_words;
-            chunk_state_chunk_counter = total_chunks;
-            chunk_state_block = [0u8; BLOCK_LEN];
-            chunk_state_block_len = 0u8;
-            chunk_state_blocks_compressed = 0u8;
-            chunk_state_flags = hasher_flags;
-        }
-
-        let chunk_state_len =
-            BLOCK_LEN * chunk_state_blocks_compressed as usize + chunk_state_block_len as usize;
-        let want = CHUNK_LEN - chunk_state_len;
-        let take = std::cmp::min(want, input.len());
-
-        // chunk_state.update(&input[..take])
-        let mut input_inner = &input[..take];
-
-        while !input_inner.is_empty() {
-            if chunk_state_block_len as usize == BLOCK_LEN {
-                let mut block_words = [0; 16];
-                words_from_little_endian_bytes(&chunk_state_block, &mut block_words);
-
-                let cv = compress(
-                    &chunk_state_chaining_value,
-                    &block_words,
-                    chunk_state_chunk_counter,
-                    BLOCK_LEN as u32,
-                    chunk_state_flags | start_flag(chunk_state_blocks_compressed),
-                );
-                c_info.push(CompressionInfo {
-                    cv: chunk_state_chaining_value,
-                    block_words,
-                    counter_low: chunk_state_chunk_counter as u32,
-                    counter_high: (chunk_state_chunk_counter >> 32) as u32,
-                    block_len: BLOCK_LEN as u32,
-                    flags: chunk_state_flags | start_flag(chunk_state_blocks_compressed),
-                    output: cv,
-                });
-
-                chunk_state_chaining_value = first_8_words(cv);
-
-                chunk_state_blocks_compressed += 1;
-                chunk_state_block = [0u8; BLOCK_LEN];
-                chunk_state_block_len = 0;
-            }
-
-            let want = BLOCK_LEN - chunk_state_block_len as usize;
-            let take = std::cmp::min(want, input_inner.len());
-            chunk_state_block[chunk_state_block_len as usize..][..take]
-                .copy_from_slice(&input_inner[..take]);
-            chunk_state_block_len += take as u8;
-            input_inner = &input_inner[take..];
-        }
-
-        input = &input[take..];
-    }
-
-    /* Finalize */
-
-    // output
-    let mut block_words = [0; 16];
-    words_from_little_endian_bytes(&chunk_state_block, &mut block_words);
-    let mut input_chaining_value = chunk_state_chaining_value;
-    let mut counter = chunk_state_chunk_counter;
-    let mut block_len = chunk_state_block_len as u32;
-    let mut flags = chunk_state_flags | start_flag(chunk_state_blocks_compressed) | CHUNK_END;
-
-    let mut parent_nodes_remaining = hasher_cv_stack_len as usize;
-    while parent_nodes_remaining > 0 {
-        parent_nodes_remaining -= 1;
-
-        // output
-        let left_child_cv = hasher_cv_stack[parent_nodes_remaining];
-
-        // chaining_value
-        let cv = compress(
-            &input_chaining_value,
-            &block_words,
-            counter,
-            block_len,
-            flags,
-        );
-        c_info.push(CompressionInfo {
-            cv: input_chaining_value,
-            block_words,
-            counter_low: counter as u32,
-            counter_high: (counter >> 32) as u32,
-            block_len,
-            flags,
-            output: cv,
-        });
-
-        let right_child_cv = first_8_words(cv);
-
-        let mut block_words_inner = [0; 16];
-        block_words_inner[..8].copy_from_slice(&left_child_cv);
-        block_words_inner[8..].copy_from_slice(&right_child_cv);
-
-        input_chaining_value = hasher_key_words;
-        block_words = block_words_inner;
-        counter = 0;
-        block_len = BLOCK_LEN as u32;
-        flags = PARENT | hasher_flags;
-    }
-
-    // root_output_bytes
-
-    let mut output_block_counter = 0u64;
-    for out_block in output.chunks_mut(2 * OUT_LEN) {
-        let cv = compress(
-            &input_chaining_value,
-            &block_words,
-            output_block_counter,
-            block_len,
-            flags | ROOT,
-        );
-        c_info.push(CompressionInfo {
-            cv: input_chaining_value,
-            block_words,
-            counter_low: output_block_counter as u32,
-            counter_high: (output_block_counter >> 32) as u32,
-            block_len,
-            flags: flags | ROOT,
-            output: cv,
-        });
-
-        let words = cv;
-
-        for (word, out_word) in words.iter().zip(out_block.chunks_mut(4)) {
-            out_word.copy_from_slice(&word.to_le_bytes()[..out_word.len()]);
-        }
-        output_block_counter += 1;
-    }
-
-    (c_info, output)
-}
diff --git a/src/lib.rs b/src/lib.rs
index 390ee41..96709a2 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,8 +1,9 @@
 pub mod builder;
-mod chips;
 pub mod lookup;
 pub mod prover;
 pub mod system;
+#[cfg(test)]
+mod test_circuits;
 pub mod types;
 pub mod verifier;
 
diff --git a/src/prover.rs b/src/prover.rs
index 1626434..0d56b77 100644
--- a/src/prover.rs
+++ b/src/prover.rs
@@ -1,3 +1,35 @@
+//! Multi-circuit STARK prover.
+//!
+//! The proving protocol proceeds in several stages:
+//!
+//! 1. **Stage 1 — Main traces**: Each circuit's execution trace is committed via the
+//!    PCS (FRI-based polynomial commitment over Goldilocks with degree-2 extension and
+//!    Keccak-256 hashing). The preprocessed commitment (if any), stage-1 commitment,
+//!    trace heights, and claims are observed into the Fiat-Shamir challenger. Claims
+//!    must be observed before lookup challenges are sampled; otherwise the prover could
+//!    choose claims adaptively to balance the lookup accumulator.
+//!
+//! 2. **Lookup challenges**: The challenger samples two independent challenges:
+//!    `lookup_argument_challenge` (β) and `fingerprint_challenge` (γ). An initial
+//!    accumulator is computed from the claims:
+//!    `acc = Σ (β + fingerprint(γ, claim_i))⁻¹`.
+//!
+//! 3. **Stage 2 — Lookup traces**: For each circuit, the lookup traces are computed
+//!    (running accumulator and message inverses per row) and committed via PCS. Each
+//!    circuit produces an intermediate accumulator value recording where its running
+//!    sum ended up; the verifier will check that the last one is zero.
+//!
+//! 4. **Quotient polynomial**: A constraint challenge (α) is sampled and used to fold
+//!    all constraints via powers of α. The folded constraint polynomial is divided by
+//!    the vanishing polynomial, split into degree-bounded chunks, and committed.
+//!
+//! 5. **Opening**: An out-of-domain point (ζ) is sampled. All polynomials are opened
+//!    at ζ and ζ·g (where g is the trace domain generator) via the PCS, producing
+//!    the FRI opening proof.
+//!
+//! The resulting [`Proof`] can be serialized with [`Proof::to_bytes`] and deserialized
+//! with [`Proof::from_bytes`] for transport or storage.
+
 use std::ops::Deref;
 
 use crate::{
@@ -71,6 +103,9 @@ impl Proof {
 }
 
 impl<A: BaseAir<Val> + for<'a> Air<ProverConstraintFolder<'a>>> System<A> {
+    /// Generates a STARK proof for the system with a single claim.
+    ///
+    /// This is a convenience wrapper around [`Self::prove_multiple_claims`].
     pub fn prove(
         &self,
         fri_parameters: FriParameters,
@@ -81,6 +116,10 @@ impl<A: BaseAir<Val> + for<'a> Air<ProverConstraintFolder<'a>>> System<A> {
         self.prove_multiple_claims(fri_parameters, key, &[claim], witness)
     }
 
+    /// Generates a STARK proof for the system with multiple claims.
+    ///
+    /// Each claim is a slice of field elements that is observed by the challenger
+    /// before lookup challenges are sampled, binding the proof to the claimed values.
     pub fn prove_multiple_claims(
         &self,
         fri_parameters: FriParameters,
diff --git a/src/chips/blake3_circuit.rs b/src/test_circuits/blake3.rs
similarity index 83%
rename from src/chips/blake3_circuit.rs
rename to src/test_circuits/blake3.rs
index 2e7f672..8fcbf3a 100644
--- a/src/chips/blake3_circuit.rs
+++ b/src/test_circuits/blake3.rs
@@ -1,9 +1,9 @@
 #[cfg(test)]
 mod tests {
     use crate::builder::symbolic::{preprocessed_var, var};
-    use crate::chips::{SymbExpr, blake3_new_update_finalize};
     use crate::lookup::{Lookup, LookupAir};
     use crate::system::{System, SystemWitness};
+    use crate::test_circuits::SymbExpr;
     use crate::types::{CommitmentParameters, FriParameters, Val};
     use p3_air::{Air, AirBuilder, BaseAir, WindowAccess};
     use p3_field::{Field, PrimeCharacteristicRing, PrimeField64};
@@ -12,6 +12,344 @@ mod tests {
     use std::array;
     use std::ops::Range;
 
+    struct CompressionInfo {
+        cv: [u32; 8],
+        block_words: [u32; 16],
+        counter_low: u32,
+        counter_high: u32,
+        block_len: u32,
+        flags: u32,
+        output: [u32; 16],
+    }
+
+    // Blake3 reference hasher that additionally produces compression IO for claims construction.
+    // Tested to be compatible with: https://github.com/BLAKE3-team/BLAKE3/blob/master/reference_impl/reference_impl.rs
+    #[allow(
+        clippy::too_many_lines,
+        clippy::cast_possible_truncation,
+        clippy::cast_lossless
+    )]
+    fn blake3_new_update_finalize(input: &[u8]) -> (Vec<CompressionInfo>, [u8; 32]) {
+        const IV: [u32; 8] = [
+            0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 0x510E527F, 0x9B05688C, 0x1F83D9AB,
+            0x5BE0CD19,
+        ];
+        const MSG_PERMUTATION: [usize; 16] = [2, 6, 3, 10, 7, 0, 4, 13, 1, 11, 12, 5, 9, 14, 15, 8];
+        const CHUNK_LEN: usize = 1024;
+        const BLOCK_LEN: usize = 64;
+
+        const CHUNK_START: u32 = 1 << 0;
+        const CHUNK_END: u32 = 1 << 1;
+        const PARENT: u32 = 1 << 2;
+        const OUT_LEN: usize = 32;
+        const ROOT: u32 = 1 << 3;
+
+        fn compress(
+            chaining_value: &[u32; 8],
+            block_words: &[u32; 16],
+            counter: u64,
+            block_len: u32,
+            flags: u32,
+        ) -> [u32; 16] {
+            let counter_low = counter as u32;
+            let counter_high = (counter >> 32) as u32;
+
+            #[rustfmt::skip]
+            let mut state = [
+                chaining_value[0], chaining_value[1], chaining_value[2], chaining_value[3],
+                chaining_value[4], chaining_value[5], chaining_value[6], chaining_value[7],
+                IV[0],             IV[1],             IV[2],             IV[3],
+                counter_low,       counter_high,      block_len,         flags,
+                block_words[0], block_words[1], block_words[2], block_words[3],
+                block_words[4], block_words[5], block_words[6], block_words[7],
+                block_words[8], block_words[9], block_words[10], block_words[11],
+                block_words[12], block_words[13], block_words[14], block_words[15],
+            ];
+
+            let a = [0, 1, 2, 3, 0, 1, 2, 3];
+            let b = [4, 5, 6, 7, 5, 6, 7, 4];
+            let c = [8, 9, 10, 11, 10, 11, 8, 9];
+            let d = [12, 13, 14, 15, 15, 12, 13, 14];
+            let mx = [16, 18, 20, 22, 24, 26, 28, 30];
+            let my = [17, 19, 21, 23, 25, 27, 29, 31];
+
+            for round_idx in 0..7 {
+                for j in 0..8 {
+                    let a_in = state[a[j]];
+                    let b_in = state[b[j]];
+                    let c_in = state[c[j]];
+                    let d_in = state[d[j]];
+                    let mx_in = state[mx[j]];
+                    let my_in = state[my[j]];
+
+                    let a_0 = a_in.wrapping_add(b_in).wrapping_add(mx_in);
+                    let d_0 = (d_in ^ a_0).rotate_right(16);
+                    let c_0 = c_in.wrapping_add(d_0);
+                    let b_0 = (b_in ^ c_0).rotate_right(12);
+
+                    let a_1 = a_0.wrapping_add(b_0).wrapping_add(my_in);
+                    let d_1 = (d_0 ^ a_1).rotate_right(8);
+                    let c_1 = c_0.wrapping_add(d_1);
+                    let b_1 = (b_0 ^ c_1).rotate_right(7);
+
+                    state[a[j]] = a_1;
+                    state[b[j]] = b_1;
+                    state[c[j]] = c_1;
+                    state[d[j]] = d_1;
+                }
+
+                if round_idx < 6 {
+                    let mut permuted = [0; 16];
+                    for i in 0..16 {
+                        permuted[i] = state[16 + MSG_PERMUTATION[i]];
+                    }
+                    state[16..(16 + 16)].copy_from_slice(&permuted);
+                }
+            }
+
+            for i in 0..8 {
+                state[i] ^= state[i + 8];
+                state[i + 8] ^= chaining_value[i];
+            }
+
+            array::from_fn(|i| state[i])
+        }
+
+        fn words_from_little_endian_bytes(bytes: &[u8], words: &mut [u32]) {
+            debug_assert_eq!(bytes.len(), 4 * words.len());
+            for (four_bytes, word) in bytes.chunks_exact(4).zip(words) {
+                *word = u32::from_le_bytes(four_bytes.try_into().unwrap());
+            }
+        }
+
+        fn first_8_words(compression_output: [u32; 16]) -> [u32; 8] {
+            compression_output[0..8].try_into().unwrap()
+        }
+
+        fn start_flag(blocks_compressed: u8) -> u32 {
+            if blocks_compressed == 0 {
+                CHUNK_START
+            } else {
+                0
+            }
+        }
+
+        let mut c_info = vec![];
+        let mut input = input;
+        let mut output = [0u8; 32];
+
+        let hasher_key_words = IV;
+        let mut hasher_cv_stack = [[0u32; 8]; 54];
+        let mut hasher_cv_stack_len = 0u32;
+        let hasher_flags = 0u32;
+
+        let mut chunk_state_chaining_value = hasher_key_words;
+        let mut chunk_state_chunk_counter = 0u64;
+        let mut chunk_state_block = [0u8; BLOCK_LEN];
+        let mut chunk_state_block_len = 0u8;
+        let mut chunk_state_blocks_compressed = 0u8;
+        let mut chunk_state_flags = hasher_flags;
+
+        while !input.is_empty() {
+            let chunk_state_len =
+                BLOCK_LEN * chunk_state_blocks_compressed as usize + chunk_state_block_len as usize;
+            if CHUNK_LEN == chunk_state_len {
+                let mut block_words = [0; 16];
+                words_from_little_endian_bytes(&chunk_state_block, &mut block_words);
+                let chaining_value = chunk_state_chaining_value;
+                let counter = chunk_state_chunk_counter;
+                let block_len = chunk_state_block_len;
+                let flags =
+                    chunk_state_flags | start_flag(chunk_state_blocks_compressed) | CHUNK_END;
+
+                let cv = compress(
+                    &chaining_value,
+                    &block_words,
+                    counter,
+                    block_len as u32,
+                    flags,
+                );
+                c_info.push(CompressionInfo {
+                    cv: chaining_value,
+                    block_words,
+                    counter_low: counter as u32,
+                    counter_high: (counter >> 32) as u32,
+                    block_len: block_len as u32,
+                    flags,
+                    output: cv,
+                });
+
+                let chaining_value = first_8_words(cv);
+                let chunk_cv = chaining_value;
+                let total_chunks = chunk_state_chunk_counter + 1;
+
+                let mut new_cv = chunk_cv;
+                let mut total_chunks_inner = total_chunks;
+                while total_chunks_inner & 1 == 0 {
+                    hasher_cv_stack_len -= 1;
+                    let pop_stack = hasher_cv_stack[hasher_cv_stack_len as usize];
+
+                    let left_child_cv = pop_stack;
+                    let right_child_cv = new_cv;
+
+                    let mut block_words = [0u32; 16];
+                    block_words[..8].copy_from_slice(&left_child_cv);
+                    block_words[8..].copy_from_slice(&right_child_cv);
+
+                    let input_chaining_value = hasher_key_words;
+                    let counter = 0u64;
+                    let block_len = BLOCK_LEN as u32;
+                    let flags = PARENT | hasher_flags;
+
+                    let cv = compress(
+                        &input_chaining_value,
+                        &block_words,
+                        counter,
+                        block_len,
+                        flags,
+                    );
+                    c_info.push(CompressionInfo {
+                        cv: input_chaining_value,
+                        block_words,
+                        counter_low: counter as u32,
+                        counter_high: (counter >> 32) as u32,
+                        block_len,
+                        flags,
+                        output: cv,
+                    });
+
+                    new_cv = first_8_words(cv);
+                    total_chunks_inner >>= 1;
+                }
+
+                hasher_cv_stack[hasher_cv_stack_len as usize] = new_cv;
+                hasher_cv_stack_len += 1;
+
+                chunk_state_chaining_value = hasher_key_words;
+                chunk_state_chunk_counter = total_chunks;
+                chunk_state_block = [0u8; BLOCK_LEN];
+                chunk_state_block_len = 0u8;
+                chunk_state_blocks_compressed = 0u8;
+                chunk_state_flags = hasher_flags;
+            }
+
+            let chunk_state_len =
+                BLOCK_LEN * chunk_state_blocks_compressed as usize + chunk_state_block_len as usize;
+            let want = CHUNK_LEN - chunk_state_len;
+            let take = std::cmp::min(want, input.len());
+
+            let mut input_inner = &input[..take];
+
+            while !input_inner.is_empty() {
+                if chunk_state_block_len as usize == BLOCK_LEN {
+                    let mut block_words = [0; 16];
+                    words_from_little_endian_bytes(&chunk_state_block, &mut block_words);
+
+                    let cv = compress(
+                        &chunk_state_chaining_value,
+                        &block_words,
+                        chunk_state_chunk_counter,
+                        BLOCK_LEN as u32,
+                        chunk_state_flags | start_flag(chunk_state_blocks_compressed),
+                    );
+                    c_info.push(CompressionInfo {
+                        cv: chunk_state_chaining_value,
+                        block_words,
+                        counter_low: chunk_state_chunk_counter as u32,
+                        counter_high: (chunk_state_chunk_counter >> 32) as u32,
+                        block_len: BLOCK_LEN as u32,
+                        flags: chunk_state_flags | start_flag(chunk_state_blocks_compressed),
+                        output: cv,
+                    });
+
+                    chunk_state_chaining_value = first_8_words(cv);
+                    chunk_state_blocks_compressed += 1;
+                    chunk_state_block = [0u8; BLOCK_LEN];
+                    chunk_state_block_len = 0;
+                }
+
+                let want = BLOCK_LEN - chunk_state_block_len as usize;
+                let take = std::cmp::min(want, input_inner.len());
+                chunk_state_block[chunk_state_block_len as usize..][..take]
+                    .copy_from_slice(&input_inner[..take]);
+                chunk_state_block_len += take as u8;
+                input_inner = &input_inner[take..];
+            }
+
+            input = &input[take..];
+        }
+
+        let mut block_words = [0; 16];
+        words_from_little_endian_bytes(&chunk_state_block, &mut block_words);
+        let mut input_chaining_value = chunk_state_chaining_value;
+        let mut counter = chunk_state_chunk_counter;
+        let mut block_len = chunk_state_block_len as u32;
+        let mut flags = chunk_state_flags | start_flag(chunk_state_blocks_compressed) | CHUNK_END;
+
+        let mut parent_nodes_remaining = hasher_cv_stack_len as usize;
+        while parent_nodes_remaining > 0 {
+            parent_nodes_remaining -= 1;
+
+            let left_child_cv = hasher_cv_stack[parent_nodes_remaining];
+
+            let cv = compress(
+                &input_chaining_value,
+                &block_words,
+                counter,
+                block_len,
+                flags,
+            );
+            c_info.push(CompressionInfo {
+                cv: input_chaining_value,
+                block_words,
+                counter_low: counter as u32,
+                counter_high: (counter >> 32) as u32,
+                block_len,
+                flags,
+                output: cv,
+            });
+
+            let right_child_cv = first_8_words(cv);
+
+            let mut block_words_inner = [0; 16];
+            block_words_inner[..8].copy_from_slice(&left_child_cv);
+            block_words_inner[8..].copy_from_slice(&right_child_cv);
+
+            input_chaining_value = hasher_key_words;
+            block_words = block_words_inner;
+            counter = 0;
+            block_len = BLOCK_LEN as u32;
+            flags = PARENT | hasher_flags;
+        }
+
+        for (output_block_counter, out_block) in output.chunks_mut(2 * OUT_LEN).enumerate() {
+            let output_block_counter = output_block_counter as u64;
+            let cv = compress(
+                &input_chaining_value,
+                &block_words,
+                output_block_counter,
+                block_len,
+                flags | ROOT,
+            );
+            c_info.push(CompressionInfo {
+                cv: input_chaining_value,
+                block_words,
+                counter_low: output_block_counter as u32,
+                counter_high: (output_block_counter >> 32) as u32,
+                block_len,
+                flags: flags | ROOT,
+                output: cv,
+            });
+
+            let words = cv;
+            for (word, out_word) in words.iter().zip(out_block.chunks_mut(4)) {
+                out_word.copy_from_slice(&word.to_le_bytes()[..out_word.len()]);
+            }
+        }
+
+        (c_info, output)
+    }
+
     // Blake3-specific constants
 
     const IV: [u32; 8] = [
@@ -72,7 +410,7 @@ mod tests {
     // 1 + 32 * 4 + 40 * 56 + 12 * 8 * 2 + 16 * 4
     const COMPRESSION_TRACE_WIDTH: usize = 2625;
 
-    enum Blake3CompressionChips {
+    enum Blake3CompressionCircuit {
         U8Xor,
         U32Xor,
         U32Add,
@@ -85,7 +423,7 @@ mod tests {
         Compression,
     }
 
-    impl Blake3CompressionChips {
+    impl Blake3CompressionCircuit {
         fn position(&self) -> usize {
             match self {
                 Self::U8Xor => 0,
@@ -102,7 +440,7 @@ mod tests {
         }
     }
 
-    impl<F: Field> BaseAir<F> for Blake3CompressionChips {
+    impl<F: Field> BaseAir<F> for Blake3CompressionCircuit {
         fn width(&self) -> usize {
             match self {
                 Self::U8Xor | Self::U8PairRangeCheck => U8_XOR_PAIR_RANGE_CHECK_TRACE_WIDTH,
@@ -146,7 +484,7 @@ mod tests {
         }
     }
 
-    impl<AB> Air<AB> for Blake3CompressionChips
+    impl<AB> Air<AB> for Blake3CompressionCircuit
     where
         AB: AirBuilder,
         AB::Var: Copy,
@@ -411,9 +749,7 @@ mod tests {
                             for i in 0..16 {
                                 permuted[i] = state[16 + MSG_PERMUTATION[i]].clone();
                             }
-                            for i in 0..16 {
-                                state[i + 16] = permuted[i].clone();
-                            }
+                            state[16..(16 + 16)].clone_from_slice(&permuted);
                         }
                     }
 
@@ -435,7 +771,7 @@ mod tests {
         }
     }
 
-    impl Blake3CompressionChips {
+    impl Blake3CompressionCircuit {
         fn lookups(&self) -> Vec<Lookup<SymbExpr>> {
             let u8_xor_idx = Self::U8Xor.position();
             let u32_xor_idx = Self::U32Xor.position();
@@ -450,7 +786,7 @@ mod tests {
 
             fn pull_state_in_state_out(
                 multiplicity: SymbExpr,
-                chip_idx: usize,
+                circuit_idx: usize,
                 state_in_range: Range<usize>,
                 state_out_range: Range<usize>,
                 var: fn(usize) -> SymbExpr,
@@ -484,13 +820,13 @@ mod tests {
 
                 Lookup::pull(
                     multiplicity,
-                    [vec![SymbExpr::from_usize(chip_idx)], state_in, state_out].concat(),
+                    [vec![SymbExpr::from_usize(circuit_idx)], state_in, state_out].concat(),
                 )
             }
 
             fn push_round(
                 multiplicity: SymbExpr,
-                chip_idx: usize,
+                circuit_idx: usize,
                 v_ind: Range<usize>,
                 var: fn(usize) -> SymbExpr,
             ) -> Lookup<SymbExpr> {
@@ -501,7 +837,7 @@ mod tests {
                 Lookup::push(
                     multiplicity,
                     vec![
-                        SymbExpr::from_usize(chip_idx),
+                        SymbExpr::from_usize(circuit_idx),
                         var(i[0])
                             + var(i[1]) * SymbExpr::from_u32(256)
                             + var(i[2]) * SymbExpr::from_u32(65536)
@@ -548,26 +884,26 @@ mod tests {
 
             fn push_u32(
                 multiplicity: SymbExpr,
-                chip_idx: usize,
+                circuit_idx: usize,
                 v_ind: Range<usize>,
                 var: fn(usize) -> SymbExpr,
             ) -> Lookup<SymbExpr> {
-                lookup_u32_inner(Lookup::push, multiplicity, chip_idx, v_ind, var)
+                lookup_u32_inner(Lookup::push, multiplicity, circuit_idx, v_ind, var)
             }
 
             fn pull_u32(
                 multiplicity: SymbExpr,
-                chip_idx: usize,
+                circuit_idx: usize,
                 v_ind: Range<usize>,
                 var: fn(usize) -> SymbExpr,
             ) -> Lookup<SymbExpr> {
-                lookup_u32_inner(Lookup::pull, multiplicity, chip_idx, v_ind, var)
+                lookup_u32_inner(Lookup::pull, multiplicity, circuit_idx, v_ind, var)
             }
 
             fn lookup_u32_inner(
                 lookup_fn: fn(SymbExpr, Vec<SymbExpr>) -> Lookup<SymbExpr>,
                 multiplicity: SymbExpr,
-                chip_idx: usize,
+                circuit_idx: usize,
                 v_ind: Range<usize>,
                 var: fn(usize) -> SymbExpr,
             ) -> Lookup<SymbExpr> {
@@ -578,7 +914,7 @@ mod tests {
                 lookup_fn(
                     multiplicity,
                     vec![
-                        SymbExpr::from_usize(chip_idx),
+                        SymbExpr::from_usize(circuit_idx),
                         var(i[0])
                             + var(i[1]) * SymbExpr::from_u32(256)
                             + var(i[2]) * SymbExpr::from_u32(256 * 256)
@@ -618,11 +954,11 @@ mod tests {
                     ]
                 }
 
-                // (4 push lookups to u8_xor_chip)
+                // (4 push lookups to u8_xor circuit)
                 Self::U32Xor => {
                     let mut lookups = vec![pull_u32(var(0), u32_xor_idx, 1..12 + 1, var)];
 
-                    // push (A, B, A^B) tuples to U8Xor chip for verification
+                    // push (A, B, A^B) tuples to U8Xor circuit for verification
                     lookups.extend((0..4).map(|i| {
                         Lookup::push(
                             SymbExpr::ONE,
@@ -642,7 +978,7 @@ mod tests {
                     // Pull
                     let mut lookups = vec![pull_u32(var(13), u32_add_idx, 0..11 + 1, var)];
 
-                    // push (A, B) tuples to U8PairRangeCheck chip for verification
+                    // push (A, B) tuples to U8PairRangeCheck circuit for verification
                     lookups.extend((0..4).map(|i| {
                         Lookup::push(
                             SymbExpr::ONE,
@@ -654,7 +990,7 @@ mod tests {
                         )
                     }));
 
-                    // push (A + B, 0) tuples to U8PairRangeCheck chip for verification. 0 is used just as a stub
+                    // push (A + B, 0) tuples to U8PairRangeCheck circuit for verification. 0 is used just as a stub
                     lookups.extend((0..4).map(|i| {
                         Lookup::push(
                             SymbExpr::ONE,
@@ -822,7 +1158,7 @@ mod tests {
                                     + var(80) * SymbExpr::from_u32(256 * 256 * 256),
                             ],
                         ),
-                        // interacting with lower-level chips that constrain operations used in G function
+                        // interacting with lower-level circuits that constrain operations used in G function
 
                         // a_in + b_in = a_0_tmp
                         Lookup::push(
@@ -1179,7 +1515,7 @@ mod tests {
     impl Blake3CompressionClaims {
         fn witness(
             &self,
-            system: &System<Blake3CompressionChips>,
+            system: &System<Blake3CompressionCircuit>,
         ) -> (Vec<RowMajorMatrix<Val>>, SystemWitness) {
             // Grabbing values from a claims
 
@@ -1204,16 +1540,16 @@ mod tests {
             let mut state_transition_values_from_claims = vec![];
 
             for claim in self.claims.clone() {
-                // we should have at least chip index
+                // we should have at least circuit index
                 assert!(!claim.is_empty(), "wrong claim format");
                 match claim[0].as_canonical_u64() {
                     0u64 => {
-                        // This is our U8Xor claim. We should have chip_idx, A, B, A xor B (where A, B are bytes)
+                        // This is our U8Xor claim. We should have circuit_idx, A, B, A xor B (where A, B are bytes)
                         assert!(claim.len() == 4, "[U8Xor] wrong claim format");
                         byte_xor_values_from_claims.push((claim[1], claim[2], claim[3]));
                     }
                     1u64 => {
-                        /* This is our U32Xor claim. We should have chip_idx, A, B, A xor B (where A, B are u32) */
+                        /* This is our U32Xor claim. We should have circuit_idx, A, B, A xor B (where A, B are u32) */
 
                         assert!(claim.len() == 4, "[U32Xor] wrong claim format");
                         let a_u32 = u32::try_from(claim[1].as_canonical_u64()).unwrap();
@@ -1224,7 +1560,7 @@ mod tests {
                     }
 
                     2u64 => {
-                        /* This is our U32Add claim. We should have chip_idx, A, B, A + B (where A, B are u32) */
+                        /* This is our U32Add claim. We should have circuit_idx, A, B, A + B (where A, B are u32) */
 
                         assert!(claim.len() == 4, "[U32Add] wrong claim format");
                         let a_u32 = u32::try_from(claim[1].as_canonical_u64()).unwrap();
@@ -1234,7 +1570,7 @@ mod tests {
                         u32_add_values_from_claims.push((a_u32, b_u32, add_u32));
                     }
                     3u64 => {
-                        /* This is our U32RotateRight8 claim. We should have chip_idx, A, A_rot */
+                        /* This is our U32RotateRight8 claim. We should have circuit_idx, A, A_rot */
 
                         assert!(claim.len() == 3, "[U32RightRotate8] wrong claim format");
                         let a_u32 = u32::try_from(claim[1].as_canonical_u64()).unwrap();
@@ -1243,7 +1579,7 @@ mod tests {
                         u32_rotate_right_8_values_from_claims.push((a_u32, rot_u32));
                     }
                     4u64 => {
-                        /* This is our U32RotateRight16 claim. We should have chip_idx, A, A_rot */
+                        /* This is our U32RotateRight16 claim. We should have circuit_idx, A, A_rot */
 
                         assert!(claim.len() == 3, "[U32RightRotate16] wrong claim format");
                         let a_u32 = u32::try_from(claim[1].as_canonical_u64()).unwrap();
@@ -1252,7 +1588,7 @@ mod tests {
                         u32_rotate_right_16_values_from_claims.push((a_u32, rot_u32));
                     }
                     5u64 => {
-                        /* This is our U32RotateRight12 claim. We should have chip_idx, A, A_rot */
+                        /* This is our U32RotateRight12 claim. We should have circuit_idx, A, A_rot */
 
                         assert!(claim.len() == 3, "[U32RightRotate12] wrong claim format");
                         let a_u32 = u32::try_from(claim[1].as_canonical_u64()).unwrap();
@@ -1261,7 +1597,7 @@ mod tests {
                         u32_rotate_right_12_values_from_claims.push((a_u32, rot_u32));
                     }
                     6u64 => {
-                        /* This is our U32RotateRight7 claim. We should have chip_idx, A, A_rot */
+                        /* This is our U32RotateRight7 claim. We should have circuit_idx, A, A_rot */
 
                         assert!(claim.len() == 3, "[U32RightRotate7] wrong claim format");
                         let a_u32 = u32::try_from(claim[1].as_canonical_u64()).unwrap();
@@ -1271,14 +1607,14 @@ mod tests {
                     }
 
                     7u64 => {
-                        /* This is our U8PairRangeCheck claim. We should have chip_idx, A, B */
+                        /* This is our U8PairRangeCheck claim. We should have circuit_idx, A, B */
 
                         assert!(claim.len() == 3, "[U8Xor] wrong claim format");
                         byte_range_check_values_from_claims.push((claim[1], claim[2]));
                     }
 
                     8u64 => {
-                        /* This is our GFunction claim. We should have chip_idx, A, B, C, D, MX_IN, MY_IN, A1, D1, C1, B1 */
+                        /* This is our GFunction claim. We should have circuit_idx, A, B, C, D, MX_IN, MY_IN, A1, D1, C1, B1 */
                         assert!(claim.len() == 11, "[GFunction] wrong claim format");
 
                         let a_in = u32::try_from(claim[1].as_canonical_u64()).unwrap();
@@ -1297,7 +1633,7 @@ mod tests {
                     }
 
                     9u64 => {
-                        /* This is our StateTransition claim. We should have chip_idx, state_in[32], state_out[16] */
+                        /* This is our StateTransition claim. We should have circuit_idx, state_in[32], state_out[16] */
                         assert!(claim.len() == 49, "[StateTransition] wrong claim format");
 
                         let state_in: [u32; 32] = array::from_fn(|i| {
@@ -1310,11 +1646,11 @@ mod tests {
                         state_transition_values_from_claims.push((state_in, state_out));
                     }
 
-                    _ => panic!("unsupported chip"),
+                    _ => panic!("unsupported circuit"),
                 }
             }
 
-            // Build traces. If claim for a given chip was not provided (and hence no data available), we just use zero trace
+            // Build traces. If claim for a given circuit was not provided (and hence no data available), we just use zero trace
             // and balance lookups providing zero values
 
             let mut state_transition_trace_values =
@@ -1366,7 +1702,7 @@ mod tests {
                             let b_1 = (b_0 ^ c_1).rotate_right(7);
 
                             g_function_values_from_claims
-                                .push((a_in, b_in, c_in, d_in, mx_in, my_in, a_1, b_1, c_1, d_1)); // send data to G_Function chip
+                                .push((a_in, b_in, c_in, d_in, mx_in, my_in, a_1, b_1, c_1, d_1)); // send data to G_Function circuit
 
                             state[A[j]] = a_1;
                             state[B[j]] = b_1;
@@ -1388,9 +1724,7 @@ mod tests {
                             for i in 0..16 {
                                 permuted[i] = state[16 + MSG_PERMUTATION[i]];
                             }
-                            for i in 0..16 {
-                                state[i + 16] = permuted[i];
-                            }
+                            state[16..(16 + 16)].copy_from_slice(&permuted);
                         }
                     }
 
@@ -1412,7 +1746,7 @@ mod tests {
                         state_transition_trace_values
                             .extend_from_slice(xor_bytes.map(Val::from_u8).as_slice());
 
-                        u32_xor_values_from_claims.push((left, right, xor)); // send data to U32Xor chip
+                        u32_xor_values_from_claims.push((left, right, xor)); // send data to U32Xor circuit
 
                         let left = state[i + 8];
                         let right = state_in_io[i];
@@ -1431,7 +1765,7 @@ mod tests {
                         state_transition_trace_values
                             .extend_from_slice(xor_bytes.map(Val::from_u8).as_slice());
 
-                        u32_xor_values_from_claims.push((left, right, xor)); // send data to U32Xor chip
+                        u32_xor_values_from_claims.push((left, right, xor)); // send data to U32Xor circuit
                     }
 
                     let mut state_out = state.to_vec();
@@ -1456,13 +1790,13 @@ mod tests {
             let height = state_transition_trace.height().next_power_of_two();
             let zero_rows_added = height - state_transition_trace.height();
             for _ in 0..zero_rows_added {
-                // we have 56 communications with G_Function chip
+                // we have 56 communications with G_Function circuit
                 for _ in 0..56 {
                     g_function_values_from_claims
                         .push((0u32, 0u32, 0u32, 0u32, 0u32, 0u32, 0u32, 0u32, 0u32, 0u32));
                 }
 
-                // we have 8 * 2 communications with U32_XOR chip
+                // we have 8 * 2 communications with U32_XOR circuit
                 for _ in 0..8 {
                     u32_xor_values_from_claims.push((0u32, 0u32, 0u32));
                     u32_xor_values_from_claims.push((0u32, 0u32, 0u32));
@@ -1502,43 +1836,43 @@ mod tests {
                     g_function_values_from_claims
                 {
                     let a_0_tmp = a_in.wrapping_add(b_in);
-                    u32_add_values_from_claims.push((a_in, b_in, a_0_tmp)); // send data to U32Add chip
+                    u32_add_values_from_claims.push((a_in, b_in, a_0_tmp)); // send data to U32Add circuit
 
                     let a_0 = a_0_tmp.wrapping_add(mx_in);
-                    u32_add_values_from_claims.push((a_0_tmp, mx_in, a_0)); // send data to U32Add chip
+                    u32_add_values_from_claims.push((a_0_tmp, mx_in, a_0)); // send data to U32Add circuit
 
                     let d_0_tmp = d_in ^ a_0;
-                    u32_xor_values_from_claims.push((d_in, a_0, d_0_tmp)); // send data to U32Xor chip
+                    u32_xor_values_from_claims.push((d_in, a_0, d_0_tmp)); // send data to U32Xor circuit
 
                     let d_0 = d_0_tmp.rotate_right(16);
-                    u32_rotate_right_16_values_from_claims.push((d_0_tmp, d_0)); // send data to U32RightRotate16 chip
+                    u32_rotate_right_16_values_from_claims.push((d_0_tmp, d_0)); // send data to U32RightRotate16 circuit
 
                     let c_0 = c_in.wrapping_add(d_0);
-                    u32_add_values_from_claims.push((c_in, d_0, c_0)); // send data to U32Add chip
+                    u32_add_values_from_claims.push((c_in, d_0, c_0)); // send data to U32Add circuit
 
                     let b_0_tmp = b_in ^ c_0;
-                    u32_xor_values_from_claims.push((b_in, c_0, b_0_tmp)); // send data to U32Xor chip
+                    u32_xor_values_from_claims.push((b_in, c_0, b_0_tmp)); // send data to U32Xor circuit
 
                     let b_0 = b_0_tmp.rotate_right(12);
-                    u32_rotate_right_12_values_from_claims.push((b_0_tmp, b_0)); // send data to U32RightRotate12 chip
+                    u32_rotate_right_12_values_from_claims.push((b_0_tmp, b_0)); // send data to U32RightRotate12 circuit
 
                     let a_1_tmp = a_0.wrapping_add(b_0);
-                    u32_add_values_from_claims.push((a_0, b_0, a_1_tmp)); // send data to U32Add chip
+                    u32_add_values_from_claims.push((a_0, b_0, a_1_tmp)); // send data to U32Add circuit
 
                     let a_1 = a_1_tmp.wrapping_add(my_in);
-                    u32_add_values_from_claims.push((a_1_tmp, my_in, a_1)); // send data to U32Add chip
+                    u32_add_values_from_claims.push((a_1_tmp, my_in, a_1)); // send data to U32Add circuit
 
                     let d_1_tmp = d_0 ^ a_1;
-                    u32_xor_values_from_claims.push((d_0, a_1, d_1_tmp)); // send data to U32Xor chip
+                    u32_xor_values_from_claims.push((d_0, a_1, d_1_tmp)); // send data to U32Xor circuit
 
                     let d_1 = d_1_tmp.rotate_right(8);
                     u32_rotate_right_8_values_from_claims.push((d_1_tmp, d_1));
 
                     let c_1 = c_0.wrapping_add(d_1);
-                    u32_add_values_from_claims.push((c_0, d_1, c_1)); // send data to U32Add chip
+                    u32_add_values_from_claims.push((c_0, d_1, c_1)); // send data to U32Add circuit
 
                     let b_1_tmp = b_0 ^ c_1;
-                    u32_xor_values_from_claims.push((b_0, c_1, b_1_tmp)); // send data to U32Xor chip
+                    u32_xor_values_from_claims.push((b_0, c_1, b_1_tmp)); // send data to U32Xor circuit
 
                     let b_1 = b_1_tmp.rotate_right(7);
                     u32_rotate_right_7_values_from_claims.push((b_1_tmp, b_1));
@@ -1587,7 +1921,7 @@ mod tests {
             if u32_xor_values_from_claims.is_empty() {
                 u32_xor_trace_values = Val::zero_vec(U32_XOR_TRACE_WIDTH);
 
-                // we also need to balance the U8Xor chip lookups using zeroes
+                // we also need to balance the U8Xor circuit lookups using zeroes
 
                 for _ in 0..4 {
                     byte_xor_values_from_claims.push((Val::ZERO, Val::ZERO, Val::ZERO));
@@ -1607,7 +1941,7 @@ mod tests {
                         .extend_from_slice(right_bytes.map(Val::from_u8).as_slice());
                     u32_xor_trace_values.extend_from_slice(xor_bytes.map(Val::from_u8).as_slice());
 
-                    /* we send bytes to U8Xor chip, relying on lookup constraining */
+                    /* we send bytes to U8Xor circuit, relying on lookup constraining */
 
                     for i in 0..4 {
                         byte_xor_values_from_claims.push((
@@ -1622,7 +1956,7 @@ mod tests {
             let height = u32_xor_trace.height().next_power_of_two();
             let zero_rows = height - u32_xor_trace.height();
             for _ in 0..zero_rows {
-                // we also need to balance the U8Xor chip lookups using zeroes for every padded row
+                // we also need to balance the U8Xor circuit lookups using zeroes for every padded row
                 for _ in 0..4 {
                     byte_xor_values_from_claims.push((Val::ZERO, Val::ZERO, Val::ZERO));
                 }
@@ -1657,7 +1991,7 @@ mod tests {
                     u32_add_trace_values.push(Val::from_bool(carry));
                     u32_add_trace_values.push(Val::ONE); // multiplicity
 
-                    /* we send decomposed bytes to U8Xor chip, relying on lookup constraining */
+                    /* we send decomposed bytes to U8Xor circuit, relying on lookup constraining */
 
                     for i in 0..4 {
                         byte_range_check_values_from_claims
@@ -1684,7 +2018,7 @@ mod tests {
             if u32_rotate_right_8_values_from_claims.is_empty() {
                 u32_rotate_right_8_trace_values = Val::zero_vec(U32_RIGHT_ROTATE_8_TRACE_WIDTH);
 
-                // we also need to balance U8PairRangeCheck chip lookups using zeroes
+                // we also need to balance U8PairRangeCheck circuit lookups using zeroes
 
                 byte_range_check_values_from_claims.push((Val::ZERO, Val::ZERO));
                 byte_range_check_values_from_claims.push((Val::ZERO, Val::ZERO));
@@ -1703,7 +2037,7 @@ mod tests {
                     u32_rotate_right_8_trace_values
                         .extend_from_slice(rot_bytes.map(Val::from_u8).as_slice());
 
-                    /* we send decomposed bytes to U8PairRangeCheck chip, relying on lookup constraining */
+                    /* we send decomposed bytes to U8PairRangeCheck circuit, relying on lookup constraining */
 
                     byte_range_check_values_from_claims
                         .push((Val::from_u8(val_bytes[0]), Val::from_u8(val_bytes[2])));
@@ -1748,7 +2082,7 @@ mod tests {
                     u32_rotate_right_16_trace_values
                         .extend_from_slice(rot_bytes.map(Val::from_u8).as_slice());
 
-                    /* we send decomposed bytes to U8PairRangeCheck chip, relying on lookup constraining */
+                    /* we send decomposed bytes to U8PairRangeCheck circuit, relying on lookup constraining */
 
                     byte_range_check_values_from_claims
                         .push((Val::from_u8(a_bytes[0]), Val::from_u8(a_bytes[2])));
@@ -1830,7 +2164,7 @@ mod tests {
                 rot_7_12_trace_values(7, &u32_rotate_right_7_values_from_claims);
 
             // finally build U8Xor / U8PairRangeCheck trace (columns: multiplicity_u8_xor, multiplicity_pair_range_check)
-            // since this it "lowest-level" trace, its multiplicities could be updated by other chips previously
+            // since this it "lowest-level" trace, its multiplicities could be updated by other circuits previously
             let mut u8_xor_range_check_trace_values = Vec::<Val>::with_capacity(
                 BYTE_VALUES_NUM * BYTE_VALUES_NUM * U8_XOR_PAIR_RANGE_CHECK_TRACE_WIDTH,
             );
@@ -1920,41 +2254,41 @@ mod tests {
             cap_height: 0,
         };
         let u8_circuit = LookupAir::new(
-            Blake3CompressionChips::U8Xor,
-            Blake3CompressionChips::U8Xor.lookups(),
+            Blake3CompressionCircuit::U8Xor,
+            Blake3CompressionCircuit::U8Xor.lookups(),
         );
         let u32_circuit = LookupAir::new(
-            Blake3CompressionChips::U32Xor,
-            Blake3CompressionChips::U32Xor.lookups(),
+            Blake3CompressionCircuit::U32Xor,
+            Blake3CompressionCircuit::U32Xor.lookups(),
         );
         let u32_add_circuit = LookupAir::new(
-            Blake3CompressionChips::U32Add,
-            Blake3CompressionChips::U32Add.lookups(),
+            Blake3CompressionCircuit::U32Add,
+            Blake3CompressionCircuit::U32Add.lookups(),
         );
         let u32_rotate_right_8_circuit = LookupAir::new(
-            Blake3CompressionChips::U32RightRotate8,
-            Blake3CompressionChips::U32RightRotate8.lookups(),
+            Blake3CompressionCircuit::U32RightRotate8,
+            Blake3CompressionCircuit::U32RightRotate8.lookups(),
         );
         let u32_rotate_right_16_circuit = LookupAir::new(
-            Blake3CompressionChips::U32RightRotate16,
-            Blake3CompressionChips::U32RightRotate16.lookups(),
+            Blake3CompressionCircuit::U32RightRotate16,
+            Blake3CompressionCircuit::U32RightRotate16.lookups(),
         );
         let u32_rotate_right_12_circuit = LookupAir::new(
-            Blake3CompressionChips::U32RightRotate12,
-            Blake3CompressionChips::U32RightRotate12.lookups(),
+            Blake3CompressionCircuit::U32RightRotate12,
+            Blake3CompressionCircuit::U32RightRotate12.lookups(),
         );
         let u32_rotate_right_7_circuit = LookupAir::new(
-            Blake3CompressionChips::U32RightRotate7,
-            Blake3CompressionChips::U32RightRotate7.lookups(),
+            Blake3CompressionCircuit::U32RightRotate7,
+            Blake3CompressionCircuit::U32RightRotate7.lookups(),
         );
         let g_function_circuit = LookupAir::new(
-            Blake3CompressionChips::GFunction,
-            Blake3CompressionChips::GFunction.lookups(),
+            Blake3CompressionCircuit::GFunction,
+            Blake3CompressionCircuit::GFunction.lookups(),
         );
 
         let state_transition_circuit = LookupAir::new(
-            Blake3CompressionChips::Compression,
-            Blake3CompressionChips::Compression.lookups(),
+            Blake3CompressionCircuit::Compression,
+            Blake3CompressionCircuit::Compression.lookups(),
         );
 
         let (system, prover_key) = System::new(
@@ -1976,7 +2310,7 @@ mod tests {
             claims: vec![
                 [
                     vec![Val::from_usize(
-                        Blake3CompressionChips::Compression.position(),
+                        Blake3CompressionCircuit::Compression.position(),
                     )],
                     state_in.into_iter().map(Val::from_u32).collect(),
                     state_out.into_iter().map(Val::from_u32).collect(),
@@ -2108,41 +2442,41 @@ mod tests {
                 cap_height: 0,
             };
             let u8_circuit = LookupAir::new(
-                Blake3CompressionChips::U8Xor,
-                Blake3CompressionChips::U8Xor.lookups(),
+                Blake3CompressionCircuit::U8Xor,
+                Blake3CompressionCircuit::U8Xor.lookups(),
             );
             let u32_circuit = LookupAir::new(
-                Blake3CompressionChips::U32Xor,
-                Blake3CompressionChips::U32Xor.lookups(),
+                Blake3CompressionCircuit::U32Xor,
+                Blake3CompressionCircuit::U32Xor.lookups(),
             );
             let u32_add_circuit = LookupAir::new(
-                Blake3CompressionChips::U32Add,
-                Blake3CompressionChips::U32Add.lookups(),
+                Blake3CompressionCircuit::U32Add,
+                Blake3CompressionCircuit::U32Add.lookups(),
             );
             let u32_rotate_right_8_circuit = LookupAir::new(
-                Blake3CompressionChips::U32RightRotate8,
-                Blake3CompressionChips::U32RightRotate8.lookups(),
+                Blake3CompressionCircuit::U32RightRotate8,
+                Blake3CompressionCircuit::U32RightRotate8.lookups(),
             );
             let u32_rotate_right_16_circuit = LookupAir::new(
-                Blake3CompressionChips::U32RightRotate16,
-                Blake3CompressionChips::U32RightRotate16.lookups(),
+                Blake3CompressionCircuit::U32RightRotate16,
+                Blake3CompressionCircuit::U32RightRotate16.lookups(),
             );
             let u32_rotate_right_12_circuit = LookupAir::new(
-                Blake3CompressionChips::U32RightRotate12,
-                Blake3CompressionChips::U32RightRotate12.lookups(),
+                Blake3CompressionCircuit::U32RightRotate12,
+                Blake3CompressionCircuit::U32RightRotate12.lookups(),
             );
             let u32_rotate_right_7_circuit = LookupAir::new(
-                Blake3CompressionChips::U32RightRotate7,
-                Blake3CompressionChips::U32RightRotate7.lookups(),
+                Blake3CompressionCircuit::U32RightRotate7,
+                Blake3CompressionCircuit::U32RightRotate7.lookups(),
             );
             let g_function_circuit = LookupAir::new(
-                Blake3CompressionChips::GFunction,
-                Blake3CompressionChips::GFunction.lookups(),
+                Blake3CompressionCircuit::GFunction,
+                Blake3CompressionCircuit::GFunction.lookups(),
             );
 
             let state_transition_circuit = LookupAir::new(
-                Blake3CompressionChips::Compression,
-                Blake3CompressionChips::Compression.lookups(),
+                Blake3CompressionCircuit::Compression,
+                Blake3CompressionCircuit::Compression.lookups(),
             );
 
             let (system, prover_key) = System::new(
@@ -2184,92 +2518,59 @@ mod tests {
         let f = Val::from_u8;
         let f32 = Val::from_u32;
 
+        // 1 u8 xor claim — leaf primitive
         let claims = Blake3CompressionClaims {
-            claims: vec![
-                // 3 u8 xor claims
-                vec![
-                    Val::from_usize(Blake3CompressionChips::U8Xor.position()),
-                    f(a_u8),
-                    f(b_u8),
-                    f(xor_u8),
-                ],
-            ],
-        };
-
-        run_test(&claims);
-
-        let claims = Blake3CompressionClaims {
-            claims: vec![
-                // 5 u8 xor claims
-                vec![
-                    Val::from_usize(Blake3CompressionChips::U8Xor.position()),
-                    f(a_u8),
-                    f(b_u8),
-                    f(xor_u8),
-                ]; 5
-            ],
+            claims: vec![vec![
+                Val::from_usize(Blake3CompressionCircuit::U8Xor.position()),
+                f(a_u8),
+                f(b_u8),
+                f(xor_u8),
+            ]],
         };
         run_test(&claims);
 
+        // 1 u32 xor claim — u32→u8 lookup chain
         let claims = Blake3CompressionClaims {
-            claims: vec![
-                // 2 u32 xor claims
-                vec![
-                    Val::from_usize(Blake3CompressionChips::U8Xor.position()),
-                    f(a_u8),
-                    f(b_u8),
-                    f(xor_u8),
-                ]; 2
-            ],
+            claims: vec![vec![
+                Val::from_usize(Blake3CompressionCircuit::U32Xor.position()),
+                f32(a_u32),
+                f32(b_u32),
+                f32(xor_u32),
+            ]],
         };
         run_test(&claims);
 
+        // 1 u32 add claim — add→range check lookup chain
         let claims = Blake3CompressionClaims {
-            claims: vec![
-                // 3 u32 xor claims
-                vec![
-                    Val::from_usize(Blake3CompressionChips::U32Xor.position()),
-                    f32(a_u32),
-                    f32(b_u32),
-                    f32(xor_u32),
-                ]; 3
-            ],
+            claims: vec![vec![
+                Val::from_usize(Blake3CompressionCircuit::U32Add.position()),
+                f32(a_u32),
+                f32(b_u32),
+                f32(add_u32),
+            ]],
         };
         run_test(&claims);
 
+        // 1 claim per rotation variant
         let claims = Blake3CompressionClaims {
             claims: vec![
-                // 6 u32 add claims
                 vec![
-                    Val::from_usize(Blake3CompressionChips::U32Add.position()),
-                    f32(a_u32),
-                    f32(b_u32),
-                    f32(add_u32),
-                ]; 6
-            ],
-        };
-        run_test(&claims);
-
-        let claims = Blake3CompressionClaims {
-            claims: vec![
-                // Right rotate claims (1 per each operation)
-                vec![
-                    Val::from_usize(Blake3CompressionChips::U32RightRotate8.position()),
+                    Val::from_usize(Blake3CompressionCircuit::U32RightRotate8.position()),
                     f32(a_u32),
                     f32(a_rot_8),
                 ],
                 vec![
-                    Val::from_usize(Blake3CompressionChips::U32RightRotate16.position()),
+                    Val::from_usize(Blake3CompressionCircuit::U32RightRotate16.position()),
                     f32(a_u32),
                     f32(a_rot_16),
                 ],
                 vec![
-                    Val::from_usize(Blake3CompressionChips::U32RightRotate12.position()),
+                    Val::from_usize(Blake3CompressionCircuit::U32RightRotate12.position()),
                     f32(a_u32),
                     f32(a_rot_12),
                 ],
                 vec![
-                    Val::from_usize(Blake3CompressionChips::U32RightRotate7.position()),
+                    Val::from_usize(Blake3CompressionCircuit::U32RightRotate7.position()),
                     f32(a_u32),
                     f32(a_rot_7),
                 ],
@@ -2277,65 +2578,33 @@ mod tests {
         };
         run_test(&claims);
 
+        // 1 G-function claim — G→{add,xor,rotate} composition
         let claims = Blake3CompressionClaims {
-            // 3 G function claims
-            claims: vec![
-                vec![
-                    Val::from_usize(Blake3CompressionChips::GFunction.position()),
-                    f32(a_in),
-                    f32(b_in),
-                    f32(c_in),
-                    f32(d_in),
-                    f32(mx_in),
-                    f32(my_in),
-                    f32(a_1),
-                    f32(d_1),
-                    f32(c_1),
-                    f32(b_1),
-                ];
-                3
-            ],
+            claims: vec![vec![
+                Val::from_usize(Blake3CompressionCircuit::GFunction.position()),
+                f32(a_in),
+                f32(b_in),
+                f32(c_in),
+                f32(d_in),
+                f32(mx_in),
+                f32(my_in),
+                f32(a_1),
+                f32(d_1),
+                f32(c_1),
+                f32(b_1),
+            ]],
         };
         run_test(&claims);
 
+        // 1 compression claim — full end-to-end chain
         let claims = Blake3CompressionClaims {
-            // 11 Compression claims
             claims: vec![
                 [
                     vec![Val::from_usize(
-                        Blake3CompressionChips::Compression.position(),
+                        Blake3CompressionCircuit::Compression.position(),
                     )],
-                    state_in.clone().into_iter().map(Val::from_u32).collect(),
-                    state_out.clone().into_iter().map(Val::from_u32).collect(),
-                ]
-                .concat();
-                5
-            ],
-        };
-        run_test(&claims);
-
-        let claims = Blake3CompressionClaims {
-            // Compression + G_Function claims
-            claims: vec![
-                vec![
-                    Val::from_usize(Blake3CompressionChips::GFunction.position()),
-                    f32(a_in),
-                    f32(b_in),
-                    f32(c_in),
-                    f32(d_in),
-                    f32(mx_in),
-                    f32(my_in),
-                    f32(a_1),
-                    f32(d_1),
-                    f32(c_1),
-                    f32(b_1),
-                ],
-                [
-                    vec![Val::from_usize(
-                        Blake3CompressionChips::Compression.position(),
-                    )],
-                    state_in.clone().into_iter().map(Val::from_u32).collect(),
-                    state_out.clone().into_iter().map(Val::from_u32).collect(),
+                    state_in.into_iter().map(Val::from_u32).collect(),
+                    state_out.into_iter().map(Val::from_u32).collect(),
                 ]
                 .concat(),
             ],
@@ -2443,9 +2712,7 @@ mod tests {
                 for i in 0..16 {
                     permuted[i] = state[16 + MSG_PERMUTATION[i]];
                 }
-                for i in 0..16 {
-                    state[i + 16] = permuted[i];
-                }
+                state[16..(16 + 16)].copy_from_slice(&permuted);
             }
         }
 
diff --git a/src/chips/byte_operations.rs b/src/test_circuits/byte_operations.rs
similarity index 99%
rename from src/chips/byte_operations.rs
rename to src/test_circuits/byte_operations.rs
index da68c7e..9839077 100644
--- a/src/chips/byte_operations.rs
+++ b/src/test_circuits/byte_operations.rs
@@ -1,9 +1,9 @@
 #[cfg(test)]
 mod tests {
     use crate::builder::symbolic::{preprocessed_var, var};
-    use crate::chips::SymbExpr;
     use crate::lookup::{Lookup, LookupAir};
     use crate::system::{System, SystemWitness};
+    use crate::test_circuits::SymbExpr;
     use crate::types::{CommitmentParameters, FriParameters, Val};
     use p3_air::{Air, AirBuilder, BaseAir};
     use p3_field::{Field, PrimeCharacteristicRing};
@@ -122,7 +122,6 @@ mod tests {
     }
 
     #[test]
-    #[ignore]
     fn byte_test() {
         let commitment_parameters = CommitmentParameters {
             log_blowup: 1,
diff --git a/src/test_circuits/mod.rs b/src/test_circuits/mod.rs
new file mode 100644
index 0000000..172193f
--- /dev/null
+++ b/src/test_circuits/mod.rs
@@ -0,0 +1,8 @@
+mod blake3;
+mod byte_operations;
+mod u32_add;
+
+use crate::builder::symbolic::SymbolicExpression;
+use crate::types::Val;
+
+type SymbExpr = SymbolicExpression<Val>;
diff --git a/src/chips/u32_add.rs b/src/test_circuits/u32_add.rs
similarity index 89%
rename from src/chips/u32_add.rs
rename to src/test_circuits/u32_add.rs
index 0224095..0a8abb4 100644
--- a/src/chips/u32_add.rs
+++ b/src/test_circuits/u32_add.rs
@@ -4,7 +4,7 @@ mod tests {
     use p3_field::{Field, PrimeCharacteristicRing};
     use p3_matrix::dense::RowMajorMatrix;
 
-    use crate::chips::SymbExpr;
+    use crate::test_circuits::SymbExpr;
     use crate::{
         builder::symbolic::{preprocessed_var, var},
         lookup::{Lookup, LookupAir},
@@ -13,11 +13,11 @@ mod tests {
     };
 
     enum U32CS {
-        ByteChip,
-        U32AddChip,
+        ByteTable,
+        U32Add,
     }
 
-    // ByteChip will have a preprocessed column for the bytes and
+    // ByteTable has a preprocessed column for the bytes and
     // a column for the multiplicities
     // Example
     // | multiplicity | byte |
@@ -32,16 +32,18 @@ mod tests {
     impl<F: Field> BaseAir<F> for U32CS {
         fn width(&self) -> usize {
             match self {
-                Self::ByteChip => 1,
+                Self::ByteTable => 1,
                 // 4 bytes for x, 4 bytes for y, 4 bytes for z, 1 byte for the carry, 1 column for the multiplicity
-                Self::U32AddChip => 14,
+                Self::U32Add => 14,
             }
         }
 
         fn preprocessed_trace(&self) -> Option<RowMajorMatrix<F>> {
             match self {
-                Self::ByteChip => Some(RowMajorMatrix::new((0..256).map(F::from_u32).collect(), 1)),
-                Self::U32AddChip => None,
+                Self::ByteTable => {
+                    Some(RowMajorMatrix::new((0..256).map(F::from_u32).collect(), 1))
+                }
+                Self::U32Add => None,
             }
         }
     }
@@ -54,8 +56,8 @@ mod tests {
     {
         fn eval(&self, builder: &mut AB) {
             match self {
-                Self::ByteChip => {}
-                Self::U32AddChip => {
+                Self::ByteTable => {}
+                Self::U32Add => {
                     let main = builder.main();
                     let local = main.current_slice();
                     let x = &local[0..4];
@@ -89,8 +91,10 @@ mod tests {
             let byte_index = SymbExpr::from_u8(0);
             let u32_index = SymbExpr::from_u8(1);
             match self {
-                Self::ByteChip => vec![Lookup::pull(var(0), vec![byte_index, preprocessed_var(0)])],
-                Self::U32AddChip => {
+                Self::ByteTable => {
+                    vec![Lookup::pull(var(0), vec![byte_index, preprocessed_var(0)])]
+                }
+                Self::U32Add => {
                     // Pull
                     let mut lookups = vec![Lookup::pull(
                         var(13),
@@ -122,9 +126,9 @@ mod tests {
     }
 
     fn byte_system(commitment_parameters: CommitmentParameters) -> (System<U32CS>, ProverKey) {
-        let byte_chip = LookupAir::new(U32CS::ByteChip, U32CS::ByteChip.lookups());
-        let u32_add_chip = LookupAir::new(U32CS::U32AddChip, U32CS::U32AddChip.lookups());
-        System::new(commitment_parameters, [byte_chip, u32_add_chip])
+        let byte_table = LookupAir::new(U32CS::ByteTable, U32CS::ByteTable.lookups());
+        let u32_add = LookupAir::new(U32CS::U32Add, U32CS::U32Add.lookups());
+        System::new(commitment_parameters, [byte_table, u32_add])
     }
 
     struct AddCalls {
diff --git a/src/types.rs b/src/types.rs
index 893b31f..861f567 100644
--- a/src/types.rs
+++ b/src/types.rs
@@ -97,6 +97,12 @@ pub struct CommitmentParameters {
 }
 
 /// Parameters controlling the FRI protocol.
+///
+/// These parameters determine the concrete security level. The FRI soundness
+/// error is approximately `ρ^num_queries` where `ρ = 2^(-log_blowup)` (set in
+/// [`CommitmentParameters`]). For example, `log_blowup = 1` with
+/// `num_queries = 100` gives ~2^(-100) soundness error from FRI queries alone.
+/// The PoW bits add grinding cost on top of this bound.
 #[derive(Clone, Copy)]
 pub struct FriParameters {
     /// Log2 of the degree of the final polynomial (0 means a constant).
diff --git a/src/verifier.rs b/src/verifier.rs
index 36162d8..224c1a2 100644
--- a/src/verifier.rs
+++ b/src/verifier.rs
@@ -1,3 +1,109 @@
+//! Multi-circuit STARK verifier.
+//!
+//! # Verification steps
+//!
+//! 1. **Shape check** ([`System::verify_shape`]): Validate that the proof's array
+//!    dimensions (opened values, accumulators, quotient chunks) match the system's
+//!    circuit count and column widths.
+//!
+//! 2. **Accumulator balance**: Assert that the last intermediate accumulator is zero,
+//!    ensuring that all lookup pushes and pulls cancel out across circuits.
+//!
+//! 3. **Fiat-Shamir replay**: Reconstruct the challenger state identically to the
+//!    prover by observing commitments, trace heights, claims, and sampling the same
+//!    challenges (lookup, fingerprint, constraint alpha, OOD zeta).
+//!
+//! 4. **PCS verification**: Verify the FRI opening proofs against the committed
+//!    polynomials at the sampled points.
+//!
+//! 5. **OOD evaluation**: For each circuit, recompute the composition polynomial at
+//!    zeta from the opened values and verify that
+//!    `composition(zeta) * inv_vanishing(zeta) == quotient(zeta)`.
+//!
+//! See [`VerificationError`] for the possible failure modes.
+//!
+//! # Soundness argument
+//!
+//! The protocol is sound in the random oracle model (instantiated by Keccak-256 via
+//! the Fiat-Shamir challenger). Informally: if a prover produces a proof that the
+//! verifier accepts, then with overwhelming probability the claimed computation is
+//! correct.
+//!
+//! We use the following notation throughout:
+//! - |F_ext| ≈ 2^128 — size of the extension field (GoldilocksBinomialExtension<2>)
+//! - ρ = 2^(-log_blowup) — FRI rate parameter (inverse of the blowup factor)
+//! - n — number of FRI queries (`num_queries`)
+//! - k — number of AIR constraints (after lookup expansion)
+//! - N — total number of lookup rows across all circuits
+//! - D — maximum degree of the quotient polynomial (trace_degree × quotient_degree)
+//!
+//! ## FRI proximity test
+//!
+//! The FRI-based PCS guarantees that committed polynomials are close to polynomials
+//! of the claimed degree. The exact soundness bound depends on the proximity
+//! parameter, number of folding rounds, and folding arity; a commonly cited
+//! approximation is **ρ^n** (each of the n queries independently catches a cheating
+//! prover with probability ≈ 1 - ρ). With `log_blowup = 1` and `num_queries = 100`,
+//! this gives ≈ 2^(-100). For the precise bound, see the FRI soundness analysis in
+//! the Plonky3 documentation.
+//!
+//! The proof-of-work (PoW) phases add grinding cost: a cheating prover must perform
+//! 2^(commit_proof_of_work_bits) work per batching challenge and
+//! 2^(query_proof_of_work_bits) work before query sampling. This increases the
+//! concrete cost of attack without affecting honest verification time.
+//!
+//! ## Constraint folding (α)
+//!
+//! All k AIR constraints are folded into a single composition polynomial using
+//! powers of a random challenge α. The folded polynomial Σ α^i · C_i(x) has degree
+//! k - 1 in α. By the Schwartz-Zippel lemma, if any individual constraint C_i is
+//! nonzero, the folded sum is nonzero with probability at least
+//! **1 - (k - 1) / |F_ext|**, which is negligible for practical constraint counts
+//! since |F_ext| ≈ 2^128.
+//!
+//! ## Out-of-domain evaluation (ζ)
+//!
+//! The verifier checks `composition(ζ) · inv_vanishing(ζ) = quotient(ζ)` at a
+//! random point ζ. If the composition polynomial is not divisible by the vanishing
+//! polynomial (i.e. some constraint is violated on the trace domain), the
+//! difference `composition · inv_vanishing - quotient` is a nonzero polynomial of
+//! degree at most D. By Schwartz-Zippel, this check passes incorrectly with
+//! probability at most **D / |F_ext|**, which is negligible.
+//!
+//! ## Lookup argument
+//!
+//! The accumulator-based lookup argument uses two random challenges (β, γ) to
+//! compress lookup messages into field elements. For each lookup interaction, the
+//! message `m = β + fingerprint(γ, args)` is a random affine function of the
+//! challenges. If the multiset of "pushed" values differs from the multiset of
+//! "pulled" values, the running accumulator `Σ multiplicity_i / m_i` is a nonzero
+//! rational function of the challenges. By Schwartz-Zippel (applied to the
+//! numerator after clearing denominators), the accumulator evaluates to zero with
+//! probability at most **N / |F_ext|**. Crucially, the challenges are sampled
+//! *after* the prover has committed to the stage-1 traces and the claims have been
+//! observed, so the prover cannot adapt them.
+//!
+//! ## Fiat-Shamir (random oracle model)
+//!
+//! All challenges (α, ζ, β, γ) are derived from the transcript via Keccak-256.
+//! Security relies on Keccak-256 behaving as a random oracle. The ordering of
+//! observations is critical: in particular, claims must be observed *before* lookup
+//! challenges are sampled, otherwise the prover could choose claims adaptively to
+//! make the accumulator balance.
+//!
+//! ## Overall soundness
+//!
+//! By a union bound, the total soundness error is at most:
+//!
+//! ```text
+//! ε ≤ ε_FRI + (k - 1 + D + N) / |F_ext|
+//! ```
+//!
+//! where ε_FRI ≈ ρ^n is the FRI soundness error. The second term is negligible
+//! for any practical parameters since |F_ext| ≈ 2^128, so **FRI dominates**. With
+//! `log_blowup = 1` and `num_queries = 100`, the protocol provides approximately
+//! 100 bits of security from FRI alone, plus additional grinding cost from PoW.
+
 use crate::{
     builder::folder::VerifierConstraintFolder,
     ensure, ensure_eq,
@@ -17,6 +123,9 @@ use p3_util::log2_strict_usize;
 #[derive(Debug)]
 pub enum VerificationError<PcsErr> {
     /// A provided claim is invalid.
+    ///
+    /// Note: this variant is not currently returned by any verification path.
+    /// It is reserved for future claim validation checks.
     InvalidClaim,
     /// The PCS opening proof failed to verify.
     InvalidOpeningArgument(PcsErr),
@@ -31,6 +140,10 @@ pub enum VerificationError<PcsErr> {
 }
 
 impl<A: BaseAir<Val> + for<'a> Air<VerifierConstraintFolder<'a>>> System<A> {
+    /// Verifies a STARK proof against a single claim.
+    ///
+    /// Returns `Ok(())` if the proof is valid, or a [`VerificationError`] describing
+    /// the first check that failed.
     pub fn verify(
         &self,
         fri_parameters: FriParameters,
@@ -40,6 +153,7 @@ impl<A: BaseAir<Val> + for<'a> Air<VerifierConstraintFolder<'a>>> System<A> {
         self.verify_multiple_claims(fri_parameters, &[claim], proof)
     }
 
+    /// Verifies a STARK proof against multiple claims.
     pub fn verify_multiple_claims(
         &self,
         fri_parameters: FriParameters,
@@ -59,14 +173,22 @@ impl<A: BaseAir<Val> + for<'a> Air<VerifierConstraintFolder<'a>>> System<A> {
         // first, verify the proof shape
         let quotient_degrees = self.verify_shape(proof)?;
 
-        // the last accumulator should be 0
+        // Soundness: lookup argument. The accumulator was computed by the prover
+        // under challenges (β, γ) that were sampled after the traces and claims were
+        // committed. If the pushed and pulled multisets differ, the accumulator is a
+        // nonzero rational function of (β, γ) and evaluates to zero with probability
+        // ≤ N / |F_ext| (Schwartz-Zippel on the numerator polynomial).
         ensure_eq!(
             intermediate_accumulators.last(),
             Some(&ExtVal::ZERO),
             VerificationError::UnbalancedChannel
         );
 
-        // initialize pcs and challenger
+        // Soundness: Fiat-Shamir. All challenges below are derived deterministically
+        // from the transcript via Keccak-256 (random oracle model). The verifier
+        // replays exactly the same observations as the prover, so any divergence
+        // (e.g. different commitments) produces different challenges, making it
+        // infeasible for a cheating prover to predict them.
         let config = StarkConfig::new(self.commitment_parameters, fri_parameters);
         let pcs = config.pcs();
         let mut challenger = config.initialise_challenger();
@@ -77,17 +199,22 @@ impl<A: BaseAir<Val> + for<'a> Air<VerifierConstraintFolder<'a>>> System<A> {
         }
         challenger.observe(commitments.stage_1_trace.clone());
 
-        // observe the traces' heights. TODO: is this necessary?
+        // Observe trace heights to bind the proof to specific domain sizes.
         for log_degree in log_degrees {
             challenger.observe(Val::from_u8(*log_degree));
         }
 
-        // observe the claims
+        // Soundness: claims must be observed BEFORE lookup challenges are sampled.
+        // Otherwise, the prover could choose claims adaptively after seeing the
+        // challenges, breaking the lookup argument's binding property.
         for claim in claims {
             challenger.observe_slice(claim);
         }
 
-        // generate lookup challenges
+        // Soundness: lookup argument. The challenges are random elements of F_ext.
+        // The message m_i = lookup_challenge + fingerprint(fingerprint_challenge, args_i)
+        // is an affine function of the challenges, ensuring that distinct argument
+        // tuples produce distinct messages with probability ≥ 1 - 1/|F_ext|.
         let lookup_argument_challenge: ExtVal = challenger.sample_algebra_element();
         challenger.observe_algebra_element(lookup_argument_challenge);
         let fingerprint_challenge: ExtVal = challenger.sample_algebra_element();
@@ -104,13 +231,16 @@ impl<A: BaseAir<Val> + for<'a> Air<VerifierConstraintFolder<'a>>> System<A> {
             acc += message.inverse();
         }
 
-        // generate constraint challenge
+        // Soundness: constraint folding. All k constraints are combined via powers
+        // of α. The folded sum has degree k-1 in α, so by Schwartz-Zippel a violated
+        // constraint survives folding with probability ≥ 1 - (k-1)/|F_ext|.
         let constraint_challenge: ExtVal = challenger.sample_algebra_element();
 
         // observe quotient commitment
         challenger.observe(commitments.quotient_chunks.clone());
 
-        // generate out of domain points and verify the PCS opening
+        // Soundness: OOD evaluation. ζ is sampled after all commitments are fixed.
+        // A nonzero polynomial of degree ≤ D vanishes at ζ with probability ≤ D/|F_ext|.
         let zeta: ExtVal = challenger.sample_algebra_element();
         let mut preprocessed_trace_evaluations = vec![];
         let mut stage_1_trace_evaluations = vec![];
@@ -186,6 +316,10 @@ impl<A: BaseAir<Val> + for<'a> Air<VerifierConstraintFolder<'a>>> System<A> {
                 preprocessed_trace_evaluations,
             )])
         }
+        // Soundness: FRI proximity test. Verifies that the committed polynomials
+        // are close to low-degree polynomials and that the claimed evaluations are
+        // consistent with the commitments. Soundness error ≤ ρ^num_queries, where
+        // ρ = 2^(-log_blowup). This is the dominant term in the overall bound.
         pcs.verify(coms_to_verify, opening_proof, &mut challenger)
             .map_err(VerificationError::InvalidOpeningArgument)?;
 
@@ -283,8 +417,13 @@ impl<A: BaseAir<Val> + for<'a> Air<VerifierConstraintFolder<'a>>> System<A> {
                 .map(|(ch_i, ch)| zps[ch_i] * from_ext_basis(ch))
                 .sum::<ExtVal>();
 
-            // finally, check that the composition polynomial
-            // is divisible by the quotient polynomial
+            // Soundness: OOD check. If any constraint is violated on the trace
+            // domain, the composition polynomial is not divisible by the vanishing
+            // polynomial, so their ratio differs from the committed quotient. At
+            // the random point ζ, this difference is nonzero with probability
+            // ≥ 1 - D/|F_ext| (Schwartz-Zippel). Combined with the FRI check above,
+            // this ensures that the opened values are consistent with actually
+            // low-degree polynomials satisfying all constraints.
             ensure_eq!(
                 composition_polynomial * sels.inv_vanishing,
                 quotient,
@@ -297,6 +436,8 @@ impl<A: BaseAir<Val> + for<'a> Air<VerifierConstraintFolder<'a>>> System<A> {
         Ok(())
     }
 
+    /// Validates the structural shape of the proof without checking any cryptographic
+    /// properties. Returns the quotient degrees per circuit on success.
     pub fn verify_shape(&self, proof: &Proof) -> Result<Vec<usize>, VerificationError<PcsError>> {
         let Proof {
             intermediate_accumulators,
@@ -310,7 +451,7 @@ impl<A: BaseAir<Val> + for<'a> Air<VerifierConstraintFolder<'a>>> System<A> {
         let num_circuits = self.circuits.len();
         // there must be at least one circuit
         ensure!(num_circuits > 0, VerificationError::InvalidSystem);
-        // the preprocessed commitment is empty if and only if there are zero preprocessed chips
+        // the preprocessed commitment is empty if and only if there are zero preprocessed circuits
         let num_preprocessed = self
             .preprocessed_indices
             .iter()
@@ -425,8 +566,8 @@ fn from_ext_basis(coeffs: &[ExtVal]) -> ExtVal {
 mod tests {
     use super::*;
     use crate::{
-        benchmark,
         lookup::LookupAir,
+        prover::Proof,
         system::{ProverKey, SystemWitness},
         types::{CommitmentParameters, FriParameters},
     };
@@ -516,20 +657,17 @@ mod tests {
     }
 
     #[test]
-    #[ignore]
-    fn multi_stark_benchmark_test() {
-        // To run this benchmark effectively, run the following command
-        // RUSTFLAGS="-Ctarget-cpu=native" cargo test multi_stark_benchmark_test --release --features parallel -- --include-ignored --nocapture
-        const LOG_HEIGHT: usize = 20;
+    fn multi_stark_prove_verify_serialize() {
         let commitment_parameters = CommitmentParameters {
             log_blowup: 1,
             cap_height: 0,
         };
         let (system, key) = system(commitment_parameters);
         let f = Val::from_u32;
+        // 2^4 = 16 rows — small enough for fast CI
         let mut pythagorean_trace = [3, 4, 5].map(f).to_vec();
         let mut complex_trace = [4, 2, 3, 1, 10, 10].map(f).to_vec();
-        for _ in 0..LOG_HEIGHT {
+        for _ in 0..4 {
             pythagorean_trace.extend(pythagorean_trace.clone());
             complex_trace.extend(complex_trace.clone());
         }
@@ -543,22 +681,100 @@ mod tests {
         let fri_parameters = FriParameters {
             log_final_poly_len: 0,
             max_log_arity: 1,
-            num_queries: 100,
-            commit_proof_of_work_bits: 10,
-            query_proof_of_work_bits: 10,
+            num_queries: 64,
+            commit_proof_of_work_bits: 0,
+            query_proof_of_work_bits: 0,
         };
         let no_claims = &[];
-        let proof = benchmark!(
-            system.prove_multiple_claims(fri_parameters, &key, no_claims, witness),
-            "proof: "
-        );
+        let proof = system.prove_multiple_claims(fri_parameters, &key, no_claims, witness);
+        // Serialization round-trip
         let proof_bytes = proof.to_bytes().expect("Failed to serialize proof");
-        println!("Proof size: {} bytes", proof_bytes.len());
-        benchmark!(
-            system
-                .verify_multiple_claims(fri_parameters, no_claims, &proof)
-                .unwrap(),
-            "verification: "
+        let proof2 = Proof::from_bytes(&proof_bytes).expect("Failed to deserialize proof");
+        system
+            .verify_multiple_claims(fri_parameters, no_claims, &proof2)
+            .unwrap();
+    }
+
+    // -- Negative / adversarial tests --
+
+    /// Helper: creates a small system and valid proof for negative tests.
+    fn small_system_and_proof() -> (System<CS>, FriParameters, Proof) {
+        let commitment_parameters = CommitmentParameters {
+            log_blowup: 1,
+            cap_height: 0,
+        };
+        let (system, key) = system(commitment_parameters);
+        let f = Val::from_u32;
+        let witness = SystemWitness::from_stage_1(
+            vec![
+                RowMajorMatrix::new(
+                    [3, 4, 5, 5, 12, 13, 8, 15, 17, 7, 24, 25].map(f).to_vec(),
+                    3,
+                ),
+                RowMajorMatrix::new([4, 2, 3, 1, 10, 10, 3, 2, 5, 1, 13, 13].map(f).to_vec(), 6),
+            ],
+            &system,
         );
+        let fri_parameters = FriParameters {
+            log_final_poly_len: 0,
+            max_log_arity: 1,
+            num_queries: 64,
+            commit_proof_of_work_bits: 0,
+            query_proof_of_work_bits: 0,
+        };
+        let no_claims = &[];
+        let proof = system.prove_multiple_claims(fri_parameters, &key, no_claims, witness);
+        (system, fri_parameters, proof)
+    }
+
+    #[test]
+    fn test_wrong_claim_rejected() {
+        let (system, fri_parameters, proof) = small_system_and_proof();
+        let f = Val::from_u32;
+        // Verify with a bogus claim — the prover used no claims, so any claim should fail.
+        let result = system.verify(fri_parameters, &[f(42)], &proof);
+        assert!(result.is_err());
+    }
+
+    #[test]
+    fn test_tampered_stage_1_values_rejected() {
+        let (system, fri_parameters, mut proof) = small_system_and_proof();
+        // Mutate a value in the stage 1 opened values — FRI should catch this.
+        proof.stage_1_opened_values[0][0][0] += ExtVal::ONE;
+        let no_claims: &[&[Val]] = &[];
+        let result = system.verify_multiple_claims(fri_parameters, no_claims, &proof);
+        assert!(result.is_err());
+    }
+
+    #[test]
+    fn test_tampered_accumulator_rejected() {
+        let (system, fri_parameters, mut proof) = small_system_and_proof();
+        // Set the last intermediate accumulator to non-zero.
+        let last = proof.intermediate_accumulators.len() - 1;
+        proof.intermediate_accumulators[last] = ExtVal::ONE;
+        let no_claims: &[&[Val]] = &[];
+        let result = system.verify_multiple_claims(fri_parameters, no_claims, &proof);
+        assert!(result.is_err());
+    }
+
+    #[test]
+    fn test_truncated_proof_rejected() {
+        let (system, fri_parameters, mut proof) = small_system_and_proof();
+        // Remove a quotient opened value — shape check should fail.
+        proof.quotient_opened_values.pop();
+        let no_claims: &[&[Val]] = &[];
+        let result = system.verify_multiple_claims(fri_parameters, no_claims, &proof);
+        assert!(result.is_err());
+    }
+
+    #[test]
+    fn test_serialization_round_trip() {
+        let (system, fri_parameters, proof) = small_system_and_proof();
+        let bytes = proof.to_bytes().expect("serialize");
+        let proof2 = Proof::from_bytes(&bytes).expect("deserialize");
+        let no_claims: &[&[Val]] = &[];
+        system
+            .verify_multiple_claims(fri_parameters, no_claims, &proof2)
+            .unwrap();
     }
 }

From 8d9b60cf41bd2d5a0adc174c23516ca3003c56f2 Mon Sep 17 00:00:00 2001
From: Gabriel Barreto <gabriel.aquino.barreto@gmail.com>
Date: Thu, 19 Mar 2026 12:50:45 -0300
Subject: [PATCH 2/2] fixed table alignment on README.md

---
 README.md | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index 0021bd7..fa1d081 100644
--- a/README.md
+++ b/README.md
@@ -17,12 +17,12 @@ lookup arguments for shared state.
 
 ## Cryptographic setup
 
-| Component | Choice |
-|-----------|--------|
-| Field | Goldilocks (p = 2^64 - 2^32 + 1) |
+| Component | Choice                                        |
+|-----------|-----------------------------------------------|
+| Field     | Goldilocks (p = 2^64 - 2^32 + 1)              |
 | Extension | Degree-2 binomial extension (~2^128 elements) |
-| Hash | Keccak-256 |
-| PCS | FRI over Merkle trees |
+| Hash      | Keccak-256                                    |
+| PCS       | FRI over Merkle trees                         |
 
 Security level is configurable via `FriParameters`. With `log_blowup = 1` and
 `num_queries = 100`, FRI provides ~2^(-100) soundness error. See the