JuliaMath · s-celles · Mar 12, 2026 · Mar 12, 2026 · Mar 12, 2026 · Mar 12, 2026
diff --git a/.gitignore b/.gitignore
@@ -1,7 +1,61 @@
+*
+
+### Allowed files and directories ###
+
+!.gitignore
+!.github/
+!.github/workflows/
+!.github/workflows/*
+!.github/dependabot.yml
+
+!LICENSE.md
+!README.md
+!Project.toml
+
+!src/
+!src/*.jl
+
+!test/
+!test/*.jl
+
+!docs/
+!docs/src/
+!docs/src/*.md
+!docs/make.jl
+!docs/Project.toml
+
+!benchmarks/
+!benchmarks/*.jl
+!benchmarks/Project.toml
+!benchmarks/README.md
+
+### Denied even if allowed above ###
+
+# Files generated by invoking Julia with --code-coverage
 *.jl.cov
 *.jl.*.cov
+
+# Files generated by invoking Julia with --track-allocation
 *.jl.mem
-Manifest.toml
-docs/build
-docs/site
-docs/Manifest.toml
+
+# System-specific files and directories generated by the BinaryProvider and BinDeps packages
+# They contain absolute paths specific to the host computer, and so should not be committed
+deps/deps.jl
+deps/build.log
+deps/downloads/
+deps/usr/
+deps/src/
+
+# Build artifacts for creating documentation generated by the Documenter package
+docs/build/
+docs/site/
+
+# File generated by Pkg, the package manager, based on a corresponding Project.toml
+# It records a fixed state of all packages used by the project. As such, it should not be
+# committed for packages, but should be committed for applications that require a static
+# environment.
+Manifest*.toml
+
+# File generated by the Preferences package to store local preferences
+LocalPreferences.toml
+JuliaLocalPreferences.toml
diff --git a/Project.toml b/Project.toml
@@ -5,14 +5,16 @@ version = "0.5.7"
 [deps]
 IntegerMathUtils = "18e54dd8-cb9d-406c-a71d-865a43cbb235"
 
+[compat]
+BitIntegers = "0.3"
+IntegerMathUtils = "0.1.1"
+julia = "1.6"
+
 [extras]
-IntegerMathUtils = "18e54dd8-cb9d-406c-a71d-865a43cbb235"
+BitIntegers = "c3b6d118-76ef-56ca-8cc7-ebb389d030a1"
 DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
+IntegerMathUtils = "18e54dd8-cb9d-406c-a71d-865a43cbb235"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["DataStructures", "IntegerMathUtils", "Test"]
-
-[compat]
-IntegerMathUtils = "0.1.1"
-julia = "1.6"
+test = ["BitIntegers", "DataStructures", "IntegerMathUtils", "Test"]
diff --git a/benchmarks/Project.toml b/benchmarks/Project.toml
@@ -0,0 +1,7 @@
+[deps]
+BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
+BitIntegers = "c3b6d118-76ef-56ca-8cc7-ebb389d030a1"
+Primes = "27ebfcd6-29c5-5fa9-bf4b-fb8fc14df3ae"
+
+[sources]
+Primes = {path = ".."}
diff --git a/benchmarks/README.md b/benchmarks/README.md
@@ -0,0 +1,61 @@
+# Benchmarks
+
+Benchmark suite for Primes.jl, comparing GMP-optimised in-place arithmetic against
+the generic (allocating) code path for BigInt operations, and measuring end-to-end
+factorisation performance across different number sizes.
+
+## Prerequisites
+
+```bash
+julia --project=benchmarks -e 'using Pkg; Pkg.instantiate()'
+```
+
+## Running
+
+Run all benchmarks:
+
+```bash
+julia --project=benchmarks benchmarks/run_all.jl
+```
+
+Or run individual suites:
+
+```bash
+# GMP in-place vs generic (allocating) ECM — the core comparison
+julia --project=benchmarks benchmarks/ecm_gmp_vs_generic.jl
+
+# Micro-benchmarks for individual ECM operations
+julia --project=benchmarks benchmarks/ecm_microbenchmarks.jl
+
+# End-to-end factor(n) at various sizes
+julia --project=benchmarks benchmarks/factorization_endtoend.jl
+```
+
+## Benchmark Suites
+
+### `ecm_gmp_vs_generic.jl`
+
+Compares the two ECM code paths for `BigInt` inputs:
+
+| Path | Style | Allocations |
+|------|-------|-------------|
+| `ecm_factor(n::BigInt, ...)` | In-place GMP (`Base.GMP.MPZ.*!`) | Near-zero in hot loop |
+| Generic via `_ecm_scalar_mul` | Functional (new BigInt per op) | O(k) per scalar multiply |
+
+Tests at two scales: a ~40-digit and a ~55-digit semiprime.
+
+### `ecm_microbenchmarks.jl`
+
+Benchmarks individual ECM building blocks across three execution modes:
+
+| Operation | GMP in-place | Generic BigInt | UInt128 (LLVM) |
+|-----------|-------------|----------------|----------------|
+| `_mulmod` | `_mulmod!` | `_mulmod` | `_mulmod` |
+| `_ecm_add` | `_ecm_add!` | `_ecm_add` | `_ecm_add` |
+| `_ecm_double` | `_ecm_double!` | `_ecm_double` | `_ecm_double` |
+| `_ecm_scalar_mul` | `_ecm_scalar_mul!` | `_ecm_scalar_mul` | `_ecm_scalar_mul` |
+
+### `factorization_endtoend.jl`
+
+Benchmarks `factor(n)` for semiprimes ranging from 12 to 45+ digits,
+showing how the polyalgorithm (trial division → Pollard rho → ECM → MPQS) scales.
diff --git a/benchmarks/ecm_gmp_vs_generic.jl b/benchmarks/ecm_gmp_vs_generic.jl
@@ -0,0 +1,116 @@
+# ECM Benchmark: GMP In-Place vs Generic (Allocating) BigInt Path
+#
+# Compares the two ECM code paths for BigInt inputs:
+#   1. BigInt-specialised: uses in-place GMP arithmetic (zero-allocation hot loop)
+#   2. Generic: functional style, allocates new BigInts per operation
+#
+# Usage:
+#   julia --project=benchmarks benchmarks/ecm_gmp_vs_generic.jl
+
+using Primes
+using BenchmarkTools
+
+# Reproduce the generic (allocating) ECM path for BigInt, bypassing the
+# BigInt-specialised method that normally intercepts dispatch.
+function ecm_factor_generic(n::BigInt, B1::Int, num_curves::Int)
+    prime_powers = Primes._ecm_prime_powers(B1)
+    T = BigInt
+    for _ in 1:num_curves
+        curve = Primes._ecm_suyama(n)
+        curve === nothing && continue
+        curve isa Tuple || return curve
+        x0, z0, a24 = curve
+
+        QX, QZ = x0, z0
+        degenerate = false
+        acc = one(T)
+        batch_count = 0
+        for pk in prime_powers
+            QX, QZ = Primes._ecm_scalar_mul(pk, QX, QZ, n, a24)
+            acc = Primes._mulmod(acc, QZ, n)
+            batch_count += 1
+            if batch_count >= 100
+                g = gcd(acc, n)
+                if 1 < g < n
+                    return g
+                end
+                if g == n
+                    degenerate = true
+                    break
+                end
+                acc = one(T)
+                batch_count = 0
+            end
+        end
+        degenerate && continue
+        if batch_count > 0
+            g = gcd(acc, n)
+            1 < g < n && return g
+        end
+    end
+    return nothing
+end
+
+# ---------------------------------------------------------------------------
+# Test cases at different scales
+# ---------------------------------------------------------------------------
+const CASES = [
+    (
+        name  = "small (~40-digit semiprime)",
+        n     = big"824633720831" * big"1000000007",
+        B1    = 2_000,
+        curves = 50,
+    ),
+    (
+        name  = "medium (~55-digit semiprime)",
+        n     = big"780002082420426809" * big"810735269523504809437013569",
+        B1    = 11_000,
+        curves = 200,
+    ),
+]
+
+# ---------------------------------------------------------------------------
+# Run benchmarks
+# ---------------------------------------------------------------------------
+println("=" ^ 70)
+println("  ECM Benchmark: GMP In-Place vs Generic (Allocating) BigInt")
+println("=" ^ 70)
+
+for case in CASES
+    (; name, n, B1, curves) = case
+    println("\n--- $name ---")
+    println("  n      = $n  ($(ndigits(n)) digits)")
+    println("  B1     = $B1")
+    println("  curves = $curves")
+
+    # Warm-up both paths
+    Primes.ecm_factor(n, B1, curves)
+    ecm_factor_generic(n, B1, curves)
+
+    println("\n  [GMP in-place]")
+    b_gmp = @benchmark Primes.ecm_factor($n, $B1, $curves) samples=20 evals=1
+    display(b_gmp)
+
+    println("\n  [Generic (allocating)]")
+    b_gen = @benchmark ecm_factor_generic($n, $B1, $curves) samples=20 evals=1
+    display(b_gen)
+
+    med_gmp = median(b_gmp).time / 1e6
+    med_gen = median(b_gen).time / 1e6
+    alloc_gmp = median(b_gmp).memory / 1024
+    alloc_gen = median(b_gen).memory / 1024
+
+    println("\n  Summary (median):")
+    println("    GMP in-place : $(round(med_gmp; digits=2)) ms,  $(round(alloc_gmp; digits=1)) KiB")
+    println("    Generic alloc: $(round(med_gen; digits=2)) ms,  $(round(alloc_gen; digits=1)) KiB")
+    if med_gmp > 0
+        println("    Speedup      : $(round(med_gmp / med_gen; digits=2))×")
+    end
+    if alloc_gen > 0
+        println("    Memory saved : $(round((1 - alloc_gmp / alloc_gen) * 100; digits=1))%")
+    end
+end
+
+println("\n" * "=" ^ 70)
+println("  Done.")
+println("=" ^ 70)