arbor-sim · thorstenhater · Feb 20, 2026 · Feb 23, 2026 · Feb 27, 2026 · Feb 27, 2026
diff --git a/arbor/include/arbor/simd/avx.hpp b/arbor/include/arbor/simd/avx.hpp
@@ -409,7 +409,7 @@ struct avx_double4: implbase<avx_double4> {
     //     e^g = 1 + 2·g·P(g^2) / (Q(g^2)-g·P(g^2)).
     //
     // Note that the coefficients for R are close to but not the same as those
-    // from the 6,6 Padé approximant to the exponential. 
+    // from the 6,6 Padé approximant to the exponential.
     //
     // The exponents n and g are calculated by:
     //
@@ -420,7 +420,7 @@ struct avx_double4: implbase<avx_double4> {
     //
     //     |g| = |x - n·ln(2)|
     //         = |x - x + α·ln(2)|
-    //  
+    //
     // for some fraction |α| ≤ 0.5, and thus |g| ≤ 0.5ln(2) ≈ 0.347.
     //
     // Tne subtraction x - n·ln(2) is performed in two parts, with

diff --git a/arbor/include/arbor/simd/implbase.hpp b/arbor/include/arbor/simd/implbase.hpp
@@ -239,15 +239,7 @@ struct implbase {
     }
 
     static vector_type fma(const vector_type& u, const vector_type& v, const vector_type& w) {
-        store a, b, c, r;
-        I::copy_to(u, a);
-        I::copy_to(v, b);
-        I::copy_to(w, c);
-
-        for (unsigned i = 0; i<width; ++i) {
-            r[i] = compat::fma(a[i], b[i], c[i]);
-        }
-        return I::copy_from(r);
+        return I::add(w, I::mul(u, v));
     }
 
     static mask_type cmp_eq(const vector_type& u, const vector_type& v) {

diff --git a/arbor/include/arbor/simd/neon.hpp b/arbor/include/arbor/simd/neon.hpp
@@ -225,33 +225,23 @@ struct neon_double2 : implbase<neon_double2> {
 
     static float64x2_t neg(const float64x2_t& a) { return vnegq_f64(a); }
 
-    static float64x2_t add(const float64x2_t& a, const float64x2_t& b) {
-        return vaddq_f64(a, b);
-    }
+    static float64x2_t add(const float64x2_t& a, const float64x2_t& b) { return vaddq_f64(a, b); }
 
-    static float64x2_t sub(const float64x2_t& a, const float64x2_t& b) {
-        return vsubq_f64(a, b);
-    }
+    static float64x2_t sub(const float64x2_t& a, const float64x2_t& b) { return vsubq_f64(a, b); }
 
-    static float64x2_t mul(const float64x2_t& a, const float64x2_t& b) {
-        return vmulq_f64(a, b);
-    }
+    static float64x2_t mul(const float64x2_t& a, const float64x2_t& b) { return vmulq_f64(a, b); }
 
-    static float64x2_t div(const float64x2_t& a, const float64x2_t& b) {
-        return vdivq_f64(a, b);
-    }
-
-    static float64x2_t fma(const float64x2_t& a, const float64x2_t& b, const float64x2_t& c) {
-        return vfmaq_f64(c, a, b);
-    }
+    static float64x2_t fma(const float64x2_t& a, const float64x2_t& b, const float64x2_t& c) { return vfmaq_f64(c, a, b); }
+
+    static float64x2_t div(const float64x2_t& a, const float64x2_t& b) { return vdivq_f64(a, b); }
 
     static float64x2_t logical_not(const float64x2_t& a) {
         return vreinterpretq_f64_u32(vmvnq_u32(vreinterpretq_u32_f64(a)));
     }
 
     static float64x2_t logical_and(const float64x2_t& a, const float64x2_t& b) {
-        return vreinterpretq_f64_u64(
-            vandq_u64(vreinterpretq_u64_f64(a), vreinterpretq_u64_f64(b)));
+        return vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(a),
+                                               vreinterpretq_u64_f64(b)));
     }
 
     static float64x2_t logical_or(const float64x2_t& a, const float64x2_t& b) {
@@ -283,7 +273,8 @@ struct neon_double2 : implbase<neon_double2> {
         return vreinterpretq_f64_u64(vcleq_f64(a, b));
     }
 
-    static float64x2_t ifelse(const float64x2_t& m, const float64x2_t& u,
+    static float64x2_t ifelse(const float64x2_t& m,
+                              const float64x2_t& u,
                               const float64x2_t& v) {
         return vbslq_f64(vreinterpretq_u64_f64(m), u, v);
     }
@@ -292,22 +283,18 @@ struct neon_double2 : implbase<neon_double2> {
         return vreinterpretq_f64_u64(vdupq_n_u64(-(int64)b));
     }
 
-    static bool mask_element(const float64x2_t& u, int i) {
-        return static_cast<bool>(element(u, i));
-    }
+    static bool mask_element(const float64x2_t& u, int i) { return static_cast<bool>(element(u, i)); }
 
     static float64x2_t mask_unpack(unsigned long long k) {
         // Only care about bottom two bits of k.
-        uint8x8_t b = vdup_n_u8((char)k);
-        uint8x8_t bl = vorr_u8(b, vdup_n_u8(0xfe));
-        uint8x8_t bu = vorr_u8(b, vdup_n_u8(0xfd));
-        uint8x16_t blu = vcombine_u8(bl, bu);
-
+        uint8x8_t  b    = vdup_n_u8((char)k);
+        uint8x8_t  bl   = vorr_u8(b, vdup_n_u8(0xfe));
+        uint8x8_t  bu   = vorr_u8(b, vdup_n_u8(0xfd));
+        uint8x16_t blu  = vcombine_u8(bl, bu);
         uint8x16_t ones = vdupq_n_u8(0xff);
-        uint64x2_t r =
-            vceqq_u64(vreinterpretq_u64_u8(ones), vreinterpretq_u64_u8(blu));
-
-        return vreinterpretq_f64_u64(r);
+        uint64x2_t res  = vceqq_u64(vreinterpretq_u64_u8(ones),
+                                    vreinterpretq_u64_u8(blu));
+        return vreinterpretq_f64_u64(res);
     }
 
     static void mask_set_element(float64x2_t& u, int i, bool b) {

diff --git a/arbor/include/arbor/simd/simd.hpp b/arbor/include/arbor/simd/simd.hpp
@@ -104,7 +104,7 @@ detail::simd_mask_impl<T> logical_not(const detail::simd_mask_impl<T>& a) {
 }
 
 template <typename T>
-detail::simd_impl<T> fma(const detail::simd_impl<T>& a, detail::simd_impl<T> b, detail::simd_impl<T> c) {
+detail::simd_impl<T> fma(detail::simd_impl<T> a, detail::simd_impl<T> b, detail::simd_impl<T> c) {
     return detail::simd_impl<T>::wrap(T::fma(a.value_, b.value_, c.value_));
 }
 
@@ -570,12 +570,11 @@ namespace detail {
             return simd_impl::wrap(Impl::div(a.value_, b.value_));
         }
 
-        friend simd_impl fma(const simd_impl& a, simd_impl b, simd_impl c) {
+        friend simd_impl fma(simd_impl a, simd_impl b, simd_impl c) {
             return simd_impl::wrap(Impl::fma(a.value_, b.value_, c.value_));
         }
 
         // Lane-wise relational operations.
-
         friend simd_mask operator==(const simd_impl& a, const simd_impl& b) {
             return simd_impl::mask(Impl::cmp_eq(a.value_, b.value_));
         }
@@ -692,10 +691,9 @@ namespace detail {
         #undef ARB_DECLARE_BINARY_COMPARISON_
 
         template <typename T>
-        friend simd_impl<T> arb::simd::fma(const simd_impl<T>& a, simd_impl<T> b, simd_impl<T> c);
+        friend simd_impl<T> arb::simd::fma(simd_impl<T> a, simd_impl<T> b, simd_impl<T> c);
 
         // Declare Indirect/Indirect indexed/Where Expression copy function as friends
-
         template <typename T, typename I, typename V>
         friend void compound_indexed_add(const simd_impl<I>& s, V* p, const simd_impl<T>& index, unsigned width, index_constraint constraint);
 

diff --git a/mechanisms/allen/NaV.mod b/mechanisms/allen/NaV.mod
@@ -55,7 +55,9 @@ STATE {
 
 BREAKPOINT {
   SOLVE activation METHOD sparse
-  ina = gbar*O*(v - ena)
+  LOCAL g
+  g = gbar * O         
+  ina = g*(v - ena)
 }
 
 INITIAL {
@@ -64,20 +66,14 @@ INITIAL {
 }
 
 KINETIC activation {
-  LOCAL f01, f02, f03, f04, f0O, f11, f12, f13, f14, fi1, fi2, fi3, fi4, fi5, fin, b01, b02, b03, b04, b0O, b11, b12, b13, b14, bi1, bi2, bi3, bi4, bi5, bin, ibtf
+  LOCAL f04, f0O, f14, fi1, fi2, fi3, fi4, fi5, fin, b01, b0O, b11, bi1, bi2, bi3, bi4, bi5, bin, ibtf
 
   ibtf = 1/btfac 
 
   f04 = qt*alpha*exp(v/x1)
-  f03 = 2*f04
-  f02 = 3*f04
-  f01 = 4*f04
   f0O = qt*gamma
 
   f14 = alfac*f04
-  f13 = 2*f14
-  f12 = 3*f14
-  f11 = 4*f14
 
   fi1 = qt*Con
   fi2 = fi1*alfac
@@ -87,15 +83,9 @@ KINETIC activation {
   fin = qt*Oon
 
   b01 = qt*beta*exp(v/x2)
-  b02 = 2*b01
-  b03 = 3*b01
-  b04 = 4*b01
   b0O = qt*delta
 
   b11 = b01*ibtf
-  b12 = 2*b11
-  b13 = 3*b11
-  b14 = 4*b11
 
   bi1 = qt*Coff
   bi2 = bi1*ibtf
@@ -104,16 +94,16 @@ KINETIC activation {
   bi5 = bi4*ibtf
   bin = qt*Ooff
 
-  ~ C1 <-> C2    (f01, b01)
-  ~ C2 <-> C3    (f02, b02)
-  ~ C3 <-> C4    (f03, b03)
-  ~ C4 <-> C5    (f04, b04)
+  ~ C1 <-> C2    (4*f04, 1*b01)
+  ~ C2 <-> C3    (3*f04, 2*b01)
+  ~ C3 <-> C4    (2*f04, 3*b01)
+  ~ C4 <-> C5    (1*f04, 4*b01)
   ~ C5 <-> O     (f0O, b0O)
   ~ O  <-> I6    (fin, bin)
-  ~ I1 <-> I2    (f11, b11)
-  ~ I2 <-> I3    (f12, b12)
-  ~ I3 <-> I4    (f13, b13)
-  ~ I4 <-> I5    (f14, b14)
+  ~ I1 <-> I2    (4*f14, 1*b11)
+  ~ I2 <-> I3    (3*f14, 2*b11)
+  ~ I3 <-> I4    (2*f14, 3*b11)
+  ~ I4 <-> I5    (1*f14, 4*b11)
   ~ I5 <-> I6    (f0O, b0O)
   ~ C1 <-> I1    (fi1, bi1)
   ~ C2 <-> I2    (fi2, bi2)

diff --git a/modcc/msparse.hpp b/modcc/msparse.hpp
@@ -1,9 +1,7 @@
 #pragma once
 
 // (Possibly augmented) matrix implementation, represented as a vector of sparse rows.
-
 #include <algorithm>
-#include <utility>
 #include <initializer_list>
 #include <iterator>
 #include <stdexcept>
@@ -47,14 +45,12 @@ class row {
     row(const row&) = default;
 
     row(std::initializer_list<entry> il): data_(il) {
-        if (!check_invariant())
-            throw msparse_error("improper row element list");
+        if (!check_invariant()) throw msparse_error("improper row element list");
     }
 
     template <typename InIter>
     row(InIter b, InIter e): data_(b, e) {
-        if (!check_invariant())
-            throw msparse_error("improper row element list");
+        if (!check_invariant()) throw msparse_error("improper row element list");
     }
 
     unsigned size() const { return data_.size(); }
@@ -67,22 +63,18 @@ class row {
     auto end() const -> decltype(data_.cend()) { return data_.cend(); }
 
     // Return column of first (left-most) entry.
-    unsigned mincol() const {
-        return empty()? npos: data_.front().col;
-    }
+    unsigned mincol() const { return empty()? npos: data_.front().col; }
 
     // Return column of first entry with column greater than `c`.
     unsigned mincol_after(unsigned c) const {
-        auto i = std::upper_bound(data_.begin(), data_.end(), c,
-            [](unsigned a, const entry& b) { return a<b.col; });
-
+        auto i = std::upper_bound(data_.begin(), data_.end(),
+                                  c,
+                                  [](unsigned a, const entry& b) { return a<b.col; });
         return i==data_.end()? npos: i->col;
     }
 
     // Return column of last (right-most) entry.
-    unsigned maxcol() const {
-        return empty()? npos: data_.back().col;
-    }
+    unsigned maxcol() const { return empty()? npos: data_.back().col; }
 
     // As opposed to [] indexing (see below), retrieve `i'th entry from
     // the list of entries.
@@ -91,27 +83,25 @@ class row {
     }
 
     void push_back(const entry& e) {
-        if (!empty() && e.col <= data_.back().col)
-            throw msparse_error("cannot push_back row elements out of order");
+        if (!empty() && e.col <= data_.back().col) throw msparse_error("cannot push_back row elements out of order");
         data_.push_back(e);
     }
 
-    void clear() {
-        data_.clear();
-    }
+    void clear() { data_.clear();}
 
     // Return index into entry list which has column `c`.
     unsigned index(unsigned c) const {
-        auto i = std::lower_bound(data_.begin(), data_.end(), c,
-            [](const entry& a, unsigned b) { return a.col<b; });
-
+        auto i = std::lower_bound(data_.begin(), data_.end(),
+                                  c,
+                                  [](const entry& a, unsigned b) { return a.col<b; });
         return (i==data_.end() || i->col!=c)? npos: std::distance(data_.begin(), i);
     }
 
     // Remove all entries from column `c` onwards.
     void truncate(unsigned c) {
-        auto i = std::lower_bound(data_.begin(), data_.end(), c,
-            [](const entry& a, unsigned b) { return a.col<b; });
+        auto i = std::lower_bound(data_.begin(), data_.end(),
+                                  c,
+                                  [](const entry& a, unsigned b) { return a.col<b; });
         data_.erase(i, data_.end());
     }
 
@@ -130,10 +120,11 @@ class row {
         assign_proxy(row<X>& r, unsigned c): row_(r), c(c) {}
 
         operator X() const { return const_cast<const row<X>&>(row_)[c]; }
+
         assign_proxy& operator=(const X& x) {
-            auto i = std::lower_bound(row_.data_.begin(), row_.data_.end(), c,
-                [](const entry& a, unsigned b) { return a.col<b; });
-
+            auto i = std::lower_bound(row_.data_.begin(), row_.data_.end(),
+                                      c,
+                                      [](const entry& a, unsigned b) { return a.col<b; });
             if (i==row_.data_.end() || i->col!=c) {
                 row_.data_.insert(i, {c, x});
             }
@@ -143,14 +134,11 @@ class row {
             else {
                 i->value = x;
             }
-
             return *this;
         }
     };
 
-    assign_proxy operator[](unsigned c) {
-        return assign_proxy{*this, c};
-    }
+    assign_proxy operator[](unsigned c) { return assign_proxy{*this, c}; }
 };
 
 // `msparse::matrix` represents a matrix by a size (number of rows,