Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 55 additions & 15 deletions src/graph_tools.jl
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,16 @@ field, which should be interpreted as follows:
* `NODE_CALL_UNIVARIATE_BROADCASTED`: the index into `operators.univariate_operators`
* `NODE_CALL_MULTIVARIATE_BROADCASTED`: the index into `operators.multivariate_operators`
* `NODE_CALL_REDUCE`: the index into `operators.multivariate_operators`
* `NODE_MOI_VARIABLE_BLOCK`: a contiguous block of `MOI.VariableIndex`. The
`index` field is the value of the FIRST `MOI.VariableIndex` in the block;
the shape `(m, n)` is stored in `Expression.block_shapes`. The block holds
`m * n` variables in column-major order.
* `NODE_VARIABLE_BLOCK`: same as `NODE_MOI_VARIABLE_BLOCK` but after MOI
variable indices have been remapped to consecutive 1-based internal
indices. `index` is the FIRST internal index of the block.
* `NODE_VALUE_BLOCK`: a contiguous block of constants. `index` is the start
index in the `.values` field; the next `m * n` entries (column-major) are
the block's data. Shape stored in `Expression.block_shapes`.
"""
@enum(
NodeType,
Expand Down Expand Up @@ -51,6 +61,12 @@ field, which should be interpreted as follows:
NODE_CALL_MULTIVARIATE_BROADCASTED,
# Index into the multivariate operators, with reduction
NODE_CALL_REDUCE,
# Block-of-variables node, before MOI → internal index remap.
NODE_MOI_VARIABLE_BLOCK,
# Block-of-variables node, after MOI → internal index remap.
NODE_VARIABLE_BLOCK,
# Block-of-constants node.
NODE_VALUE_BLOCK,
)

@enum(Linearity, CONSTANT, LINEAR, PIECEWISE_LINEAR, NONLINEAR)
Expand Down Expand Up @@ -96,6 +112,16 @@ function _replace_moi_variables(
moi_index_to_consecutive_index[MOI.VariableIndex(node.index)],
node.parent,
)
elseif node.type == NODE_MOI_VARIABLE_BLOCK
# `node.index` is the FIRST MOI variable index in the block. For an
# `ArrayOfContiguousVariables`, all variables in the block are
# contiguous in MOI's ordering, so we just remap the first index
# and reuse it as the consecutive offset. The block's length comes
# from `Expression.block_shapes[i]`, which the caller threads
# through separately.
first_consec =
moi_index_to_consecutive_index[MOI.VariableIndex(node.index)]
new_nodes[i] = Node(NODE_VARIABLE_BLOCK, first_consec, node.parent)
else
new_nodes[i] = node
end
Expand All @@ -122,10 +148,10 @@ function _classify_linearity(
children_arr = SparseArrays.rowvals(adj)
for k in length(nodes):-1:1
node = nodes[k]
if node.type == NODE_VARIABLE
if node.type == NODE_VARIABLE || node.type == NODE_VARIABLE_BLOCK
linearity[k] = LINEAR
continue
elseif node.type == NODE_VALUE
elseif node.type == NODE_VALUE || node.type == NODE_VALUE_BLOCK
linearity[k] = CONSTANT
continue
elseif node.type == NODE_PARAMETER
Expand Down Expand Up @@ -218,24 +244,30 @@ function _classify_linearity(
end

"""
_compute_gradient_sparsity!(
indices::Coloring.IndexedSet,
nodes::Vector{Nonlinear.Node},
)
_compute_gradient_sparsity!(indices::Coloring.IndexedSet, f)

Compute the sparsity pattern of the gradient of an expression (that is, a list
of which variable indices are present).

Compute the sparsity pattern of the gradient of an expression (that is, a list of
which variable indices are present).
`f` is duck-typed (as its type is defined later) to a
`_SubexpressionStorage`-like object exposing `f.nodes` and `f.sizes`.
For `NODE_VARIABLE_BLOCK` nodes, the block's length is read from `f.sizes`
(via `_length`), and the block contributes that many consecutive variable
indices starting at `nodes[k].index`.
"""
function _compute_gradient_sparsity!(
indices::Coloring.IndexedSet,
nodes::Vector{Node},
)
for node in nodes
function _compute_gradient_sparsity!(indices::Coloring.IndexedSet, f)
for (k, node) in enumerate(f.nodes)
if node.type == NODE_VARIABLE
push!(indices, node.index)
elseif node.type == NODE_MOI_VARIABLE
elseif node.type == NODE_VARIABLE_BLOCK
len = _length(f.sizes, k)
for i in 0:(len-1)
push!(indices, node.index + i)
end
elseif node.type == NODE_MOI_VARIABLE ||
node.type == NODE_MOI_VARIABLE_BLOCK
error(
"Internal error: Invalid to compute sparsity if NODE_MOI_VARIABLE " *
"Internal error: Invalid to compute sparsity if $(node.type) " *
"nodes are present.",
)
end
Expand Down Expand Up @@ -341,6 +373,7 @@ function _compute_hessian_sparsity(
child_group_variables = Dict{Int,Set{Int}}()
for (k, node) in enumerate(nodes)
@assert node.type != NODE_MOI_VARIABLE
@assert node.type != NODE_MOI_VARIABLE_BLOCK
if input_linearity[k] == CONSTANT
continue # No hessian contribution from constant nodes
end
Expand Down Expand Up @@ -376,6 +409,13 @@ function _compute_hessian_sparsity(
child_group_variables[child_group_idx],
nodes[r].index,
)
elseif nodes[r].type == NODE_VARIABLE_BLOCK
# TODO `NODE_VARIABLE_BLOCK` would need its block shape to
# enumerate the variable indices.
error(
"Internal error: Hessian sparsity for " *
"NODE_VARIABLE_BLOCK is not yet supported.",
)
elseif nodes[r].type == NODE_SUBEXPRESSION
sub_vars = subexpression_variables[nodes[r].index]
if !haskey(child_group_variables, child_group_idx)
Expand Down
2 changes: 1 addition & 1 deletion src/mathoptinterface_api.jl
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ function MOI.initialize(
max(max_expr_with_sub_length, length(subex.nodes))
if d.want_hess
empty!(coloring_storage)
_compute_gradient_sparsity!(coloring_storage, subex.nodes)
_compute_gradient_sparsity!(coloring_storage, subex)
# union with all dependent expressions
for idx in _list_subexpressions(subex.nodes)
union!(coloring_storage, subexpression_variables[idx])
Expand Down
104 changes: 22 additions & 82 deletions src/parse_moi.jl
Original file line number Diff line number Diff line change
Expand Up @@ -140,98 +140,38 @@ end
# ── ArrayOfContiguousVariables ───────────────────────────────────────────────────

function _parse_moi_stack!(
stack::Vector{Tuple{Int,Any}},
data::Model,
expr::Expression,
x::ArrayOfContiguousVariables{2},
parent_index::Int,
)
m, n = x.size
# Build vcat(row(v11, v12, ...), row(v21, v22, ...), ...).
#
# The outer loop is `1:m` (forward order), NOT `m:-1:1`. The `:row` nodes
# we push end up at consecutive positions in `expr.nodes`, and `:vcat`
# later reads its children in tape-index order (CSC `nzrange`) — so the
# row with the smallest tape index becomes row 1 of the output matrix.
# If the outer loop ran in reverse, `row_m` would land at the smallest
# tape index and `:vcat` would silently place it as row 1, producing a
# row-flipped matrix on the tape (a latent bug, fixed here).
#
# The inner loop stays `n:-1:1` because the items go on the stack and pop
# in LIFO order — pushing in reverse j order gives forward j-order on
# pop, which matches the column-major layout below.
vcat_id = data.operators.multivariate_operator_to_id[:vcat]
row_id = data.operators.multivariate_operator_to_id[:row]
push!(expr.nodes, Node(NODE_CALL_MULTIVARIATE, vcat_id, parent_index))
vcat_idx = length(expr.nodes)
for i in 1:m
push!(expr.nodes, Node(NODE_CALL_MULTIVARIATE, row_id, vcat_idx))
row_idx = length(expr.nodes)
for j in n:-1:1
vi = MOI.VariableIndex(x.offset + (j - 1) * m + i)
push!(stack, (row_idx, vi))
end
end
return
end

function _parse_moi_stack!(
stack::Vector{Tuple{Int,Any}},
data::Model,
::Vector{Tuple{Int,Any}},
::Model,
expr::Expression,
x::ArrayOfContiguousVariables{1},
x::ArrayOfContiguousVariables,
parent_index::Int,
)
m = x.size[1]
vect_id = data.operators.multivariate_operator_to_id[:vect]
push!(expr.nodes, Node(NODE_CALL_MULTIVARIATE, vect_id, parent_index))
vect_idx = length(expr.nodes)
for i in m:-1:1
vi = MOI.VariableIndex(x.offset + i)
push!(stack, (vect_idx, vi))
end
# Emit a single block node. The block represents the contiguous range of
# MOI variable indices `x.offset+1, ...`, laid out in
# column-major order (matching `Array{Float64}` and `Base.LinearIndices`),
# which is the layout `_view_array` will see at evaluation time.
push!(expr.nodes, Node(NODE_MOI_VARIABLE_BLOCK, x.offset + 1, parent_index))
expr.block_shapes[length(expr.nodes)] = collect(x.size)
return
end

# ── Constant matrices and vectors ────────────────────────────────────────────
# ── Constant arrays ────────────────────────────────────────────

function _parse_moi_stack!(
stack::Vector{Tuple{Int,Any}},
data::Model,
expr::Expression,
x::AbstractMatrix{<:Real},
parent_index::Int,
)
m, n = size(x)
# See the `ArrayOfContiguousVariables{2}` overload for the rationale on
# the `1:m` outer loop (the previous `m:-1:1` produced a row-flipped
# matrix on the tape).
vcat_id = data.operators.multivariate_operator_to_id[:vcat]
row_id = data.operators.multivariate_operator_to_id[:row]
push!(expr.nodes, Node(NODE_CALL_MULTIVARIATE, vcat_id, parent_index))
vcat_idx = length(expr.nodes)
for i in 1:m
push!(expr.nodes, Node(NODE_CALL_MULTIVARIATE, row_id, vcat_idx))
row_idx = length(expr.nodes)
for j in n:-1:1
push!(stack, (row_idx, x[i, j]))
end
end
return
end

function _parse_moi_stack!(
stack::Vector{Tuple{Int,Any}},
data::Model,
::Vector{Tuple{Int,Any}},
::Model,
expr::Expression,
x::AbstractVector{<:Real},
x::AbstractArray{<:Real},
parent_index::Int,
)
vect_id = data.operators.multivariate_operator_to_id[:vect]
push!(expr.nodes, Node(NODE_CALL_MULTIVARIATE, vect_id, parent_index))
vect_idx = length(expr.nodes)
for i in length(x):-1:1
push!(stack, (vect_idx, x[i]))
end
# Emit a single value block. We push the flat values to
# `expr.values` in column-major order (matching `Array{Float64}`'s memory
# layout); `node.index` records the start of that contiguous range so
# `_SubexpressionStorage` can copy it into the tape in one block at
# construction time.
start_idx = length(expr.values) + 1
append!(expr.values, x)
push!(expr.nodes, Node(NODE_VALUE_BLOCK, start_idx, parent_index))
expr.block_shapes[length(expr.nodes)] = collect(size(x))
return
end
21 changes: 20 additions & 1 deletion src/reverse_mode.jl
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,18 @@ function _forward_eval(
# f.forward_storage[k] = x[node.index]
elseif node.type == NODE_VALUE
f.forward_storage[j] = f.const_values[node.index]
elseif node.type == NODE_VARIABLE_BLOCK
# Contiguous-to-contiguous copy from `x` into the tape: on CPU a
# `copyto!`, on GPU a single `cudaMemcpy`. This is the fast path
# for matrix variables from `ArrayOfContiguousVariables{2}`.
tape_range = _storage_range(f.sizes, k)
len = length(tape_range)
copyto!(
view(f.forward_storage, tape_range),
view(x, node.index:(node.index+len-1)),
)
elseif node.type == NODE_VALUE_BLOCK
# Pre-loaded into `forward_storage` at construction.
elseif node.type == NODE_SUBEXPRESSION
f.forward_storage[j] = d.subexpression_forward_values[node.index]
elseif node.type == NODE_PARAMETER
Expand Down Expand Up @@ -955,7 +967,14 @@ function _extract_reverse_pass_inner(
) where {T}
@assert length(f.reverse_storage) >= _length(f.sizes)
for (k, node) in enumerate(f.nodes)
if node.type == NODE_VARIABLE
if node.type == NODE_VARIABLE_BLOCK
tape_range = _storage_range(f.sizes, k)
len = length(tape_range)
x_range = node.index:(node.index+len-1)
view(output, x_range) .+=
scale .* view(f.reverse_storage, tape_range)
elseif node.type == NODE_VARIABLE
# Per-leaf scalar — rare, so the per-leaf `cudaMemcpy` is fine.
output[node.index] += scale * @s f.reverse_storage[k]
elseif node.type == NODE_SUBEXPRESSION
subexpressions[node.index] += scale * @s f.reverse_storage[k]
Expand Down
32 changes: 29 additions & 3 deletions src/sizes.jl
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ macro j(expr)
end

# /!\ Can only be called in decreasing `k` order
function _add_size!(sizes::Sizes, k::Int, size::Tuple)
function _add_size!(sizes::Sizes, k::Int, size)
sizes.ndims[k] = length(size)
sizes.size_offset[k] = length(sizes.size)
append!(sizes.size, size)
Expand Down Expand Up @@ -207,6 +207,7 @@ end
function _infer_sizes(
nodes::Vector{Node},
adj::SparseArrays.SparseMatrixCSC{Bool,Int},
block_shapes::Dict{Int,Vector{Int}} = Dict{Int,Vector{Int}}(),
)
sizes = Sizes(
zeros(Int, length(nodes)),
Expand All @@ -217,6 +218,15 @@ function _infer_sizes(
children_arr = SparseArrays.rowvals(adj)
for k in length(nodes):-1:1
node = nodes[k]
# Block leaves carry their shape in `block_shapes`; they're 2D (m, n)
# by construction.
if node.type == NODE_VARIABLE_BLOCK ||
node.type == NODE_VALUE_BLOCK ||
node.type == NODE_MOI_VARIABLE_BLOCK
shape = block_shapes[k]
_add_size!(sizes, k, shape)
continue
end
children_indices = SparseArrays.nzrange(adj, k)
N = length(children_indices)
if node.type == NODE_CALL_MULTIVARIATE
Expand Down Expand Up @@ -438,18 +448,34 @@ struct _SubexpressionStorage{S<:AbstractVector{Float64}}
nodes::Vector{Node},
adj::SparseArrays.SparseMatrixCSC{Bool,Int},
const_values::Vector{Float64},
block_shapes::Dict{Int,Vector{Int}},
partials_storage_ϵ::Vector{Float64},
linearity::Linearity,
::Type{S} = Vector{Float64},
) where {S<:AbstractVector{Float64}}
sizes = _infer_sizes(nodes, adj)
sizes = _infer_sizes(nodes, adj, block_shapes)
N = _length(sizes)
# Pre-load value blocks into forward_storage once at construction;
# each block is a contiguous-to-contiguous bulk copy. Individual
# `NODE_VALUE` scalars (rare — exponents, constant divisors, etc) and
# variable nodes are loaded by `_forward_eval` in the per-node loop.
cpu_buffer = zeros(N)
for k in 1:length(nodes)
node = nodes[k]
if node.type == NODE_VALUE_BLOCK
j = sizes.storage_offset[k] + 1
len = _length(sizes, k)
cpu_buffer[j:(j+len-1)] .=
view(const_values, node.index:(node.index+len-1))
end
end
forward_storage = convert(S, cpu_buffer)
return new{S}(
nodes,
adj,
sizes,
const_values,
fill!(S(undef, N), 0.0), # forward_storage,
forward_storage,
fill!(S(undef, N), 0.0), # partials_storage,
fill!(S(undef, N), 0.0), # reverse_storage,
partials_storage_ϵ,
Expand Down
Loading
Loading