arbor-sim · thorstenhater · Apr 7, 2026 · Jul 7, 2025 · Jul 7, 2025 · Jul 7, 2025
diff --git a/arbor/backends/multicore/partition_by_constraint.hpp b/arbor/backends/multicore/partition_by_constraint.hpp
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <vector>
+#include "backends/multicore/multicore_common.hpp"
 
 #include <arbor/simd/simd.hpp>
 #include <arbor/serdes.hpp>
@@ -13,87 +13,95 @@ using S::index_constraint;
 
 struct constraint_partition {
     using iarray = arb::multicore::iarray;
-
+    // sequence of _ranges_ of contiguous index block of width N
     iarray contiguous;
+    // sequence of constant index blocks of width N
     iarray constant;
+    // sequence of independent index blocks of width N
     iarray independent;
+    // sequence of unconstrained index blocks of width N
     iarray none;
 
     ARB_SERDES_ENABLE(constraint_partition, contiguous, constant, independent, none);
 };
 
+// contiguous over width n
+//
+// all n values are strictly monotonic
 template <typename It>
 bool is_contiguous_n(It first, unsigned width) {
     while (--width) {
         It next = first;
-        if ((*first) +1 != *++next) {
-            return false;
-        }
+        if ((*first) +1 != *++next) return false;
         first = next;
     }
     return true;
 }
 
+// constant over width n
+//
+// all n values are equal to the first
 template <typename It>
 bool is_constant_n(It first, unsigned width) {
     while (--width) {
         It next = first;
-        if (*first != *++next) {
-            return false;
-        }
+        if (*first != *++next) return false;
         first = next;
     }
     return true;
 }
 
+// independent over width n
+//
+// no repetitions?? so 0 1 0 1 qualifies, but not 0 0 1 1
 template <typename It>
 bool is_independent_n(It first, unsigned width) {
     while (--width) {
         It next = first;
-        if (*first == *++next) {
-            return false;
-        }
+        if (*first == *++next) return false;
         first = next;
     }
     return true;
 }
 
 template <typename It>
 index_constraint idx_constraint(It it, unsigned simd_width) {
-    if (is_contiguous_n(it, simd_width)) {
-        return index_constraint::contiguous;
-    }
-    else if (is_constant_n(it, simd_width)) {
-        return index_constraint::constant;
-    }
-    else if (is_independent_n(it, simd_width)) {
-        return index_constraint::independent;
-    }
-    else {
-        return index_constraint::none;
-    }
+    if (is_contiguous_n(it, simd_width))  return index_constraint::contiguous;
+    if (is_constant_n(it, simd_width))    return index_constraint::constant;
+    if (is_independent_n(it, simd_width)) return index_constraint::independent;
+    return index_constraint::none;
 }
 
 template <typename T>
 constraint_partition make_constraint_partition(const T& node_index, unsigned width, unsigned simd_width) {
+    if (!simd_width) return {};
     constraint_partition part;
-    if (simd_width) {
-        for (unsigned i = 0; i < width; i+= simd_width) {
-            auto ptr = &node_index[i];
-            if (is_contiguous_n(ptr, simd_width)) {
-                part.contiguous.push_back(i);
-            }
-            else if (is_constant_n(ptr, simd_width)) {
-                part.constant.push_back(i);
-            }
-            else if (is_independent_n(ptr, simd_width)) {
-                part.independent.push_back(i);
+    unsigned idx = 0;
+    for (; idx < width; idx += simd_width) {
+        auto len = std::min(simd_width, width - idx);
+        if (len < simd_width) break;
+        auto ptr = &node_index[idx];
+        if (is_contiguous_n(ptr, simd_width)) {
+            // extend range vs add a new one
+            if (!part.contiguous.empty() && part.contiguous.back() == (int)idx) {
+                part.contiguous.back() += simd_width;
             }
             else {
-                part.none.push_back(i);
+                part.contiguous.push_back(idx);
+                part.contiguous.push_back(idx + simd_width);
             }
         }
+        else if (is_constant_n(ptr, simd_width)) {
+            part.constant.push_back(idx);
+        }
+        else if (is_independent_n(ptr, simd_width)) {
+            part.independent.push_back(idx);
+        }
+        else {
+            part.none.push_back(idx);
+        }
     }
+    if (idx < width) part.none.push_back(idx);
     return part;
 }
 
@@ -108,10 +116,7 @@ bool compatible_index_constraints(const T& node_index, const U& ion_index, unsig
     for (unsigned i = 0; i < node_index.size(); i+= simd_width) {
         auto nc = idx_constraint(&node_index[i], simd_width);
         auto ic = idx_constraint(&ion_index[i], simd_width);
-
-        if (!is_constraint_stronger(nc, ic)) {
-            return false;
-        }
+        if (!is_constraint_stronger(nc, ic)) return false;
     }
     return true;
 }

diff --git a/arbor/include/arbor/morph/region.hpp b/arbor/include/arbor/morph/region.hpp
@@ -15,7 +15,6 @@ struct mprovider;
 struct region_tag {};
 
 struct ARB_SYMBOL_VISIBLE region {
-public:
     template <typename Impl,
               typename = std::enable_if_t<std::is_base_of<region_tag, std::decay_t<Impl>>::value>>
     explicit region(Impl&& impl):

diff --git a/arbor/include/arbor/simd/neon.hpp b/arbor/include/arbor/simd/neon.hpp
@@ -7,7 +7,6 @@
 #include <cmath>
 #include <cstdint>
 
-#include <iostream>
 #include <arbor/simd/approx.hpp>
 #include <arbor/simd/implbase.hpp>
 
@@ -242,6 +241,10 @@ struct neon_double2 : implbase<neon_double2> {
         return vdivq_f64(a, b);
     }
 
+    static float64x2_t fma(const float64x2_t& a, const float64x2_t& b, const float64x2_t& c) {
+        return vfmaq_f64(c, a, b);
+    }
+
     static float64x2_t logical_not(const float64x2_t& a) {
         return vreinterpretq_f64_u32(vmvnq_u32(vreinterpretq_u32_f64(a)));
     }
@@ -391,43 +394,40 @@ struct neon_double2 : implbase<neon_double2> {
     // precision
     // attributable to catastrophic rounding. C1 comprises the first
     // 32-bits of mantissa, C2 the remainder.
-
     static float64x2_t exp(const float64x2_t& x) {
         // Masks for exceptional cases.
-
         auto is_large = cmp_gt(x, broadcast(exp_maxarg));
         auto is_small = cmp_lt(x, broadcast(exp_minarg));
         auto is_not_nan = cmp_eq(x, x);
 
         // Compute n and g.
 
         // floor: round toward negative infinity
-        auto n = vcvtmq_s64_f64(add(mul(broadcast(ln2inv), x), broadcast(0.5)));
+        auto n = vcvtmq_s64_f64(fma(broadcast(ln2inv), x, broadcast(0.5)));
 
+        // x - a.b
         auto g = sub(x, mul(vcvtq_f64_s64(n), broadcast(ln2C1)));
         g = sub(g, mul(vcvtq_f64_s64(n), broadcast(ln2C2)));
-
         auto gg = mul(g, g);
 
         // Compute the g*P(g^2) and Q(g^2).
-
         auto odd = mul(g, horner(gg, P0exp, P1exp, P2exp));
         auto even = horner(gg, Q0exp, Q1exp, Q2exp, Q3exp);
 
         // Compute R(g)/R(-g) = 1 + 2*g*P(g^2) / (Q(g^2)-g*P(g^2))
-
-        auto expg =
-            add(broadcast(1), mul(broadcast(2), div(odd, sub(even, odd))));
+        auto expg = fma(broadcast(2),
+                        div(odd, sub(even, odd)),
+                        broadcast(1));
 
         // Finally, compute product with 2^n.
         // Note: can only achieve full range using the ldexp implementation,
         // rather than multiplying by 2^n directly.
-
         auto result = ldexp_positive(expg, vmovn_s64(n));
 
         return ifelse(is_large, broadcast(HUGE_VAL),
                       ifelse(is_small, broadcast(0),
-                             ifelse(is_not_nan, result, broadcast(NAN))));
+                             ifelse(is_not_nan, result,
+                                    broadcast(NAN))));
     }
 
     // Use same rational polynomial expansion as for exp(x), without
@@ -468,11 +468,13 @@ struct neon_double2 : implbase<neon_double2> {
         // Otherwise, compute result 2^n * expgm1 + (2^n-1) by:
         //     result = 2 * ( 2^(n-1)*expgm1 + (2^(n-1)+0.5) )
         // to avoid overflow when n=1024.
-
         auto nm1 = vmovn_s64(vcvtmq_s64_f64(sub(vcvtq_f64_s64(n), one)));
 
-        auto scaled =
-            mul(add(sub(exp2int(nm1), half), ldexp_normal(expgm1, nm1)), two);
+        auto scaled = mul(add(sub(exp2int(nm1),
+                                  half),
+                              ldexp_normal(expgm1,
+                                           nm1)),
+                          two);
 
         return ifelse(is_large, broadcast(HUGE_VAL),
                       ifelse(is_small, broadcast(-1),
@@ -525,14 +527,12 @@ struct neon_double2 : implbase<neon_double2> {
         auto z3 = mul(z2, z);
 
         auto r = div(mul(z3, pz), qz);
-        r = add(r, mul(g, broadcast(ln2C4)));
+        r = fma(g, broadcast(ln2C4), r);
         r = sub(r, mul(z2, half));
         r = add(r, z);
-        r = add(r, mul(g, broadcast(ln2C3)));
-
+        r = fma(g, broadcast(ln2C3), r);
         // Return NaN if x is NaN or negarive, +inf if x is +inf,
         // or -inf if zero or (positive) denormal.
-
         return ifelse(is_domainerr, broadcast(NAN),
                       ifelse(is_large, broadcast(HUGE_VAL),
                              ifelse(is_small, broadcast(-HUGE_VAL), r)));
@@ -561,20 +561,19 @@ struct neon_double2 : implbase<neon_double2> {
 
     template <typename... T>
     static float64x2_t horner(float64x2_t x, double a0, T... tail) {
-        return add(mul(x, horner(x, tail...)), broadcast(a0));
+        return fma(x, horner(x, tail...), broadcast(a0));
     }
 
     // horner1(x, a0, ..., an) computes the degree n+1 monic polynomial A(x)
     // with coefficients
     // a0, ..., an, 1 by by a0+x·(a1+x·(a2+...+x·(an+x)...).
-
     static inline float64x2_t horner1(float64x2_t x, double a0) {
         return add(x, broadcast(a0));
     }
 
     template <typename... T>
     static float64x2_t horner1(float64x2_t x, double a0, T... tail) {
-        return add(mul(x, horner1(x, tail...)), broadcast(a0));
+        return fma(x, horner1(x, tail...), broadcast(a0));
     }
 
     // Compute 2.0^n.

diff --git a/example/busyring-tiled/ring.cpp b/example/busyring-tiled/ring.cpp
@@ -86,7 +86,7 @@ struct ring_recipe: public arb::recipe {
             const auto group = gid/s;
             const auto group_start = s*group;
             const auto group_end = std::min(group_start+s, num_cells_);
-            cell_gid_type src = gid==group_start? group_end-1: gid-1;
+            cell_gid_type src = gid==group_start ? group_end-1: gid-1;
             cons.push_back({{src, "d"}, {"p"}, event_weight_, min_delay_*U::ms});
         }
         return cons;

diff --git a/mechanisms/allen/Exp2Syn.mod b/mechanisms/allen/Exp2Syn.mod
@@ -58,8 +58,7 @@ INITIAL {
     A = 0
     B = 0
     tp = (tau1*tau2)/(tau2 - tau1) * log(tau2/tau1)
-    factor = -exp(-tp/tau1) + exp(-tp/tau2)
-    factor = 1/factor
+    factor = 1/(exp(-tp/tau2) - exp(-tp/tau1))
 }
 
 BREAKPOINT {

diff --git a/modcc/module.cpp b/modcc/module.cpp
@@ -176,11 +176,11 @@ bool Module::semantic() {
 
     // Add built in function that approximate exp use pade polynomials
     callables_.push_back(
-        Parser{"FUNCTION exp_pade_11(z) { exp_pade_11=(1+0.5*z)/(1-0.5*z) }"}.parse_function());
+        Parser{"FUNCTION exp_pade_11(z) { exp_pade_11 = (2.0 + z)/(2.0 - z) }"}.parse_function());
     callables_.push_back(
         Parser{
             "FUNCTION exp_pade_22(z)"
-            "{ exp_pade_22=(1+0.5*z+0.08333333333333333*z*z)/(1-0.5*z+0.08333333333333333*z*z) }"
+            "{ exp_pade_22 = (12 + 6*z + z*z)/(12 - 6*z + z*z) }"
         }.parse_function());
 
     // move functions and procedures to the symbol table

diff --git a/modcc/printer/cexpr_emit.cpp b/modcc/printer/cexpr_emit.cpp
@@ -217,7 +217,7 @@ void CExprEmitter::visit(IfExpression* e) {
 std::unordered_set<std::string> SimdExprEmitter::mask_names_;
 
 void SimdExprEmitter::visit(NumberExpression* e) {
-    out_ << " (double)" << as_c_double(e->value());
+    out_ << "(double)" << as_c_double(e->value());
 } 
 
 void SimdExprEmitter::visit(UnaryExpression* e) {
@@ -370,10 +370,9 @@ void SimdExprEmitter::visit(BinaryExpression* e) {
     } else if (scalars_.count(rhs_name) && !scalars_.count(lhs_name)) {
         out_ << func_spelling << '(';
         lhs->accept(this);
-        out_ << ", simd_cast<simd_value>(" << rhs_pfxd;
-        out_ << "))";
+        out_ << ", " << rhs_pfxd << ")";
     } else if (!scalars_.count(rhs_name) && scalars_.count(lhs_name)) {
-        out_ << func_spelling << "(simd_cast<simd_value>(" << lhs_pfxd << "), ";
+        out_ << func_spelling << "(" << lhs_pfxd << ", ";
         rhs->accept(this);
         out_ << ")";
     } else {