Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 44 additions & 39 deletions arbor/backends/multicore/partition_by_constraint.hpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#pragma once

#include <vector>
#include "backends/multicore/multicore_common.hpp"

#include <arbor/simd/simd.hpp>
#include <arbor/serdes.hpp>
Expand All @@ -13,87 +13,95 @@ using S::index_constraint;

struct constraint_partition {
using iarray = arb::multicore::iarray;

// sequence of _ranges_ of contiguous index block of width N
iarray contiguous;
// sequence of constant index blocks of width N
iarray constant;
// sequence of independent index blocks of width N
iarray independent;
// sequence of unconstrained index blocks of width N
iarray none;

ARB_SERDES_ENABLE(constraint_partition, contiguous, constant, independent, none);
};

// contiguous over width n
//
// all n values are strictly monotonic
template <typename It>
bool is_contiguous_n(It first, unsigned width) {
while (--width) {
It next = first;
if ((*first) +1 != *++next) {
return false;
}
if ((*first) +1 != *++next) return false;
first = next;
}
return true;
}

// constant over width n
//
// all n values are equal to the first
template <typename It>
bool is_constant_n(It first, unsigned width) {
while (--width) {
It next = first;
if (*first != *++next) {
return false;
}
if (*first != *++next) return false;
first = next;
}
return true;
}

// independent over width n
//
// no repetitions?? so 0 1 0 1 qualifies, but not 0 0 1 1
template <typename It>
bool is_independent_n(It first, unsigned width) {
while (--width) {
It next = first;
if (*first == *++next) {
return false;
}
if (*first == *++next) return false;
first = next;
}
return true;
}

template <typename It>
index_constraint idx_constraint(It it, unsigned simd_width) {
if (is_contiguous_n(it, simd_width)) {
return index_constraint::contiguous;
}
else if (is_constant_n(it, simd_width)) {
return index_constraint::constant;
}
else if (is_independent_n(it, simd_width)) {
return index_constraint::independent;
}
else {
return index_constraint::none;
}
if (is_contiguous_n(it, simd_width)) return index_constraint::contiguous;
if (is_constant_n(it, simd_width)) return index_constraint::constant;
if (is_independent_n(it, simd_width)) return index_constraint::independent;
return index_constraint::none;
}

template <typename T>
constraint_partition make_constraint_partition(const T& node_index, unsigned width, unsigned simd_width) {
if (!simd_width) return {};
constraint_partition part;
if (simd_width) {
for (unsigned i = 0; i < width; i+= simd_width) {
auto ptr = &node_index[i];
if (is_contiguous_n(ptr, simd_width)) {
part.contiguous.push_back(i);
}
else if (is_constant_n(ptr, simd_width)) {
part.constant.push_back(i);
}
else if (is_independent_n(ptr, simd_width)) {
part.independent.push_back(i);
unsigned idx = 0;
for (; idx < width; idx += simd_width) {
auto len = std::min(simd_width, width - idx);
if (len < simd_width) break;
auto ptr = &node_index[idx];
if (is_contiguous_n(ptr, simd_width)) {
// extend range vs add a new one
if (!part.contiguous.empty() && part.contiguous.back() == (int)idx) {
part.contiguous.back() += simd_width;
}
else {
part.none.push_back(i);
part.contiguous.push_back(idx);
part.contiguous.push_back(idx + simd_width);
}
}
else if (is_constant_n(ptr, simd_width)) {
part.constant.push_back(idx);
}
else if (is_independent_n(ptr, simd_width)) {
part.independent.push_back(idx);
}
else {
part.none.push_back(idx);
}
}
if (idx < width) part.none.push_back(idx);
return part;
}

Expand All @@ -108,10 +116,7 @@ bool compatible_index_constraints(const T& node_index, const U& ion_index, unsig
for (unsigned i = 0; i < node_index.size(); i+= simd_width) {
auto nc = idx_constraint(&node_index[i], simd_width);
auto ic = idx_constraint(&ion_index[i], simd_width);

if (!is_constraint_stronger(nc, ic)) {
return false;
}
if (!is_constraint_stronger(nc, ic)) return false;
}
return true;
}
Expand Down
1 change: 0 additions & 1 deletion arbor/include/arbor/morph/region.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ struct mprovider;
struct region_tag {};

struct ARB_SYMBOL_VISIBLE region {
public:
template <typename Impl,
typename = std::enable_if_t<std::is_base_of<region_tag, std::decay_t<Impl>>::value>>
explicit region(Impl&& impl):
Expand Down
41 changes: 20 additions & 21 deletions arbor/include/arbor/simd/neon.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
#include <cmath>
#include <cstdint>

#include <iostream>
#include <arbor/simd/approx.hpp>
#include <arbor/simd/implbase.hpp>

Expand Down Expand Up @@ -242,6 +241,10 @@ struct neon_double2 : implbase<neon_double2> {
return vdivq_f64(a, b);
}

static float64x2_t fma(const float64x2_t& a, const float64x2_t& b, const float64x2_t& c) {
return vfmaq_f64(c, a, b);
}

static float64x2_t logical_not(const float64x2_t& a) {
return vreinterpretq_f64_u32(vmvnq_u32(vreinterpretq_u32_f64(a)));
}
Expand Down Expand Up @@ -391,43 +394,40 @@ struct neon_double2 : implbase<neon_double2> {
// precision
// attributable to catastrophic rounding. C1 comprises the first
// 32-bits of mantissa, C2 the remainder.

static float64x2_t exp(const float64x2_t& x) {
// Masks for exceptional cases.

auto is_large = cmp_gt(x, broadcast(exp_maxarg));
auto is_small = cmp_lt(x, broadcast(exp_minarg));
auto is_not_nan = cmp_eq(x, x);

// Compute n and g.

// floor: round toward negative infinity
auto n = vcvtmq_s64_f64(add(mul(broadcast(ln2inv), x), broadcast(0.5)));
auto n = vcvtmq_s64_f64(fma(broadcast(ln2inv), x, broadcast(0.5)));

// x - a.b
auto g = sub(x, mul(vcvtq_f64_s64(n), broadcast(ln2C1)));
g = sub(g, mul(vcvtq_f64_s64(n), broadcast(ln2C2)));

auto gg = mul(g, g);

// Compute the g*P(g^2) and Q(g^2).

auto odd = mul(g, horner(gg, P0exp, P1exp, P2exp));
auto even = horner(gg, Q0exp, Q1exp, Q2exp, Q3exp);

// Compute R(g)/R(-g) = 1 + 2*g*P(g^2) / (Q(g^2)-g*P(g^2))

auto expg =
add(broadcast(1), mul(broadcast(2), div(odd, sub(even, odd))));
auto expg = fma(broadcast(2),
div(odd, sub(even, odd)),
broadcast(1));

// Finally, compute product with 2^n.
// Note: can only achieve full range using the ldexp implementation,
// rather than multiplying by 2^n directly.

auto result = ldexp_positive(expg, vmovn_s64(n));

return ifelse(is_large, broadcast(HUGE_VAL),
ifelse(is_small, broadcast(0),
ifelse(is_not_nan, result, broadcast(NAN))));
ifelse(is_not_nan, result,
broadcast(NAN))));
}

// Use same rational polynomial expansion as for exp(x), without
Expand Down Expand Up @@ -468,11 +468,13 @@ struct neon_double2 : implbase<neon_double2> {
// Otherwise, compute result 2^n * expgm1 + (2^n-1) by:
// result = 2 * ( 2^(n-1)*expgm1 + (2^(n-1)+0.5) )
// to avoid overflow when n=1024.

auto nm1 = vmovn_s64(vcvtmq_s64_f64(sub(vcvtq_f64_s64(n), one)));

auto scaled =
mul(add(sub(exp2int(nm1), half), ldexp_normal(expgm1, nm1)), two);
auto scaled = mul(add(sub(exp2int(nm1),
half),
ldexp_normal(expgm1,
nm1)),
two);

return ifelse(is_large, broadcast(HUGE_VAL),
ifelse(is_small, broadcast(-1),
Expand Down Expand Up @@ -525,14 +527,12 @@ struct neon_double2 : implbase<neon_double2> {
auto z3 = mul(z2, z);

auto r = div(mul(z3, pz), qz);
r = add(r, mul(g, broadcast(ln2C4)));
r = fma(g, broadcast(ln2C4), r);
r = sub(r, mul(z2, half));
r = add(r, z);
r = add(r, mul(g, broadcast(ln2C3)));

r = fma(g, broadcast(ln2C3), r);
// Return NaN if x is NaN or negarive, +inf if x is +inf,
// or -inf if zero or (positive) denormal.

return ifelse(is_domainerr, broadcast(NAN),
ifelse(is_large, broadcast(HUGE_VAL),
ifelse(is_small, broadcast(-HUGE_VAL), r)));
Expand Down Expand Up @@ -561,20 +561,19 @@ struct neon_double2 : implbase<neon_double2> {

template <typename... T>
static float64x2_t horner(float64x2_t x, double a0, T... tail) {
return add(mul(x, horner(x, tail...)), broadcast(a0));
return fma(x, horner(x, tail...), broadcast(a0));
}

// horner1(x, a0, ..., an) computes the degree n+1 monic polynomial A(x)
// with coefficients
// a0, ..., an, 1 by by a0+x·(a1+x·(a2+...+x·(an+x)...).

static inline float64x2_t horner1(float64x2_t x, double a0) {
return add(x, broadcast(a0));
}

template <typename... T>
static float64x2_t horner1(float64x2_t x, double a0, T... tail) {
return add(mul(x, horner1(x, tail...)), broadcast(a0));
return fma(x, horner1(x, tail...), broadcast(a0));
}

// Compute 2.0^n.
Expand Down
2 changes: 1 addition & 1 deletion example/busyring-tiled/ring.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ struct ring_recipe: public arb::recipe {
const auto group = gid/s;
const auto group_start = s*group;
const auto group_end = std::min(group_start+s, num_cells_);
cell_gid_type src = gid==group_start? group_end-1: gid-1;
cell_gid_type src = gid==group_start ? group_end-1: gid-1;
cons.push_back({{src, "d"}, {"p"}, event_weight_, min_delay_*U::ms});
}
return cons;
Expand Down
3 changes: 1 addition & 2 deletions mechanisms/allen/Exp2Syn.mod
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,7 @@ INITIAL {
A = 0
B = 0
tp = (tau1*tau2)/(tau2 - tau1) * log(tau2/tau1)
factor = -exp(-tp/tau1) + exp(-tp/tau2)
factor = 1/factor
factor = 1/(exp(-tp/tau2) - exp(-tp/tau1))
}

BREAKPOINT {
Expand Down
4 changes: 2 additions & 2 deletions modcc/module.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -176,11 +176,11 @@ bool Module::semantic() {

// Add built in function that approximate exp use pade polynomials
callables_.push_back(
Parser{"FUNCTION exp_pade_11(z) { exp_pade_11=(1+0.5*z)/(1-0.5*z) }"}.parse_function());
Parser{"FUNCTION exp_pade_11(z) { exp_pade_11 = (2.0 + z)/(2.0 - z) }"}.parse_function());
callables_.push_back(
Parser{
"FUNCTION exp_pade_22(z)"
"{ exp_pade_22=(1+0.5*z+0.08333333333333333*z*z)/(1-0.5*z+0.08333333333333333*z*z) }"
"{ exp_pade_22 = (12 + 6*z + z*z)/(12 - 6*z + z*z) }"
}.parse_function());

// move functions and procedures to the symbol table
Expand Down
7 changes: 3 additions & 4 deletions modcc/printer/cexpr_emit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ void CExprEmitter::visit(IfExpression* e) {
std::unordered_set<std::string> SimdExprEmitter::mask_names_;

void SimdExprEmitter::visit(NumberExpression* e) {
out_ << " (double)" << as_c_double(e->value());
out_ << "(double)" << as_c_double(e->value());
}

void SimdExprEmitter::visit(UnaryExpression* e) {
Expand Down Expand Up @@ -370,10 +370,9 @@ void SimdExprEmitter::visit(BinaryExpression* e) {
} else if (scalars_.count(rhs_name) && !scalars_.count(lhs_name)) {
out_ << func_spelling << '(';
lhs->accept(this);
out_ << ", simd_cast<simd_value>(" << rhs_pfxd;
out_ << "))";
out_ << ", " << rhs_pfxd << ")";
} else if (!scalars_.count(rhs_name) && scalars_.count(lhs_name)) {
out_ << func_spelling << "(simd_cast<simd_value>(" << lhs_pfxd << "), ";
out_ << func_spelling << "(" << lhs_pfxd << ", ";
rhs->accept(this);
out_ << ")";
} else {
Expand Down
Loading
Loading