diff --git a/src/ArrayDiff.jl b/src/ArrayDiff.jl
index 9595d82..6ad4d05 100644
--- a/src/ArrayDiff.jl
+++ b/src/ArrayDiff.jl
@@ -20,10 +20,11 @@ Fork of `MOI.Nonlinear.SparseReverseMode` to add array support.
 
 The type parameter `S` is the storage type used for the AD tape (forward,
 partials, and reverse storage of each subexpression). It must satisfy
-`S<:AbstractVector{Float64}`. Defaults to `Vector{Float64}`. Pass a different
-`S` (for example `CuVector{Float64}`) to keep the tape on a GPU.
+`S<:AbstractVector{<:Real}`. Defaults to `Vector{Float64}`. Pass a different
+`S` (for example `Vector{Float32}` or `CuVector{Float64}`) to run AD in
+another precision or keep the tape on a GPU.
 """
-struct Mode{S<:AbstractVector{Float64}} <:
+struct Mode{S<:AbstractVector{<:Real}} <:
        MOI.Nonlinear.AbstractAutomaticDifferentiation end
 
 Mode() = Mode{Vector{Float64}}()
@@ -65,7 +66,7 @@ include("evaluator.jl")
 include("array_nonlinear_function.jl")
 include("parse_moi.jl")
 
-model(::Mode{S}) where {S} = Model()
+model(::Mode{S}) where {S} = Model{eltype(S)}()
 
 # Extend MOI.Nonlinear.set_objective so that solvers calling
 # MOI.Nonlinear.set_objective(arraydiff_model, snf) dispatch here.
@@ -84,8 +85,8 @@ function Evaluator(
     model::ArrayDiff.Model,
     ::Mode{S},
     ordered_variables::Vector{MOI.VariableIndex},
-) where {S<:AbstractVector{Float64}}
-    return Evaluator(model, NLPEvaluator{S}(model, ordered_variables))
+) where {S<:AbstractVector{<:Real}}
+    return Evaluator(model, NLPEvaluator{eltype(S),S}(model, ordered_variables))
 end
 
 # Called by solvers via MOI.Nonlinear.Evaluator(nlp_model, ad_backend, vars).
diff --git a/src/JuMP/moi_bridge.jl b/src/JuMP/moi_bridge.jl
index a0e818d..6ebcb70 100644
--- a/src/JuMP/moi_bridge.jl
+++ b/src/JuMP/moi_bridge.jl
@@ -12,9 +12,9 @@ function _to_moi_arg(x::GenericArrayExpr{V,N}) where {V,N}
     return ArrayNonlinearFunction{N}(x.head, args, x.size, x.broadcasted)
 end
 
-_to_moi_arg(x::Matrix{Float64}) = x
+_to_moi_arg(x::Matrix{<:Real}) = x
 
-_to_moi_arg(x::Real) = Float64(x)
+_to_moi_arg(x::Real) = x
 
 function JuMP.moi_function(x::GenericArrayExpr{V,N}) where {V,N}
     return _to_moi_arg(x)
diff --git a/src/array_nonlinear_function.jl b/src/array_nonlinear_function.jl
index c1f7491..49cfce4 100644
--- a/src/array_nonlinear_function.jl
+++ b/src/array_nonlinear_function.jl
@@ -87,7 +87,7 @@ function _map_indices_arg(index_map::F, x::ArrayOfContiguousVariables) where {F}
     return MOI.Utilities.map_indices(index_map, x)
 end
 
-function _map_indices_arg(::F, x::Matrix{Float64}) where {F}
+function _map_indices_arg(::F, x::Matrix{<:Real}) where {F}
     return x
 end
 
diff --git a/src/mathoptinterface_api.jl b/src/mathoptinterface_api.jl
index 126630f..413f80a 100644
--- a/src/mathoptinterface_api.jl
+++ b/src/mathoptinterface_api.jl
@@ -20,9 +20,9 @@ function MOI.features_available(d::NLPEvaluator)
 end
 
 function MOI.initialize(
-    d::NLPEvaluator{S},
+    d::NLPEvaluator{T,S},
     requested_features::Vector{Symbol},
-) where {S<:AbstractVector{Float64}}
+) where {T<:Real,S<:AbstractVector{T}}
     # Check that we support the features requested by the user.
     available_features = MOI.features_available(d)
     for feature in requested_features
@@ -40,10 +40,10 @@ function MOI.initialize(
     end
     d.objective = nothing
     d.residual = nothing
-    d.user_output_buffer = zeros(largest_user_input_dimension)
-    d.jac_storage = zeros(max(N, largest_user_input_dimension))
-    d.constraints = _FunctionStorage{S}[]
-    d.last_x = fill(NaN, N)
+    d.user_output_buffer = zeros(T, largest_user_input_dimension)
+    d.jac_storage = zeros(T, max(N, largest_user_input_dimension))
+    d.constraints = _FunctionStorage{T,S}[]
+    d.last_x = fill(T(NaN), N)
     d.want_hess = :Hess in requested_features
     want_hess_storage = (:HessVec in requested_features) || d.want_hess
     coloring_storage = Coloring.IndexedSet(N)
@@ -67,9 +67,9 @@ function MOI.initialize(
     subexpression_edgelist =
         Vector{Set{Tuple{Int,Int}}}(undef, num_subexpressions)
     d.subexpressions =
-        Vector{_SubexpressionStorage{S}}(undef, num_subexpressions)
-    d.subexpression_forward_values = zeros(num_subexpressions)
-    d.subexpression_reverse_values = zeros(num_subexpressions)
+        Vector{_SubexpressionStorage{T,S}}(undef, num_subexpressions)
+    d.subexpression_forward_values = zeros(T, num_subexpressions)
+    d.subexpression_reverse_values = zeros(T, num_subexpressions)
     for k in d.subexpression_order
         # Only load expressions which actually are used
         d.subexpression_forward_values[k] = NaN
@@ -145,6 +145,7 @@ function MOI.initialize(
             moi_index_to_consecutive_index,
             shared_partials_storage_ϵ,
             d,
+            S,
         )
         residual = _FunctionStorage(
             subexpr,
@@ -235,7 +236,7 @@ function MOI.eval_objective_gradient(d::NLPEvaluator, g, x)
         error("No nonlinear objective.")
     end
     _reverse_mode(d, x)
-    fill!(g, 0.0)
+    fill!(g, zero(eltype(g)))
     _extract_reverse_pass(g, d, something(d.objective))
     return
 end
diff --git a/src/reverse_mode.jl b/src/reverse_mode.jl
index 63bff8a..5a90faa 100644
--- a/src/reverse_mode.jl
+++ b/src/reverse_mode.jl
@@ -945,9 +945,9 @@ function _extract_reverse_pass(
     f::_FunctionStorage,
 ) where {T}
     for i in f.dependent_subexpressions
-        d.subexpression_reverse_values[i] = 0.0
+        d.subexpression_reverse_values[i] = zero(T)
     end
-    _extract_reverse_pass_inner(g, f, d.subexpression_reverse_values, 1.0)
+    _extract_reverse_pass_inner(g, f, d.subexpression_reverse_values, one(T))
     for i in length(f.dependent_subexpressions):-1:1
         k = f.dependent_subexpressions[i]
         _extract_reverse_pass_inner(
diff --git a/src/sizes.jl b/src/sizes.jl
index dde5277..7fc3c90 100644
--- a/src/sizes.jl
+++ b/src/sizes.jl
@@ -438,11 +438,11 @@ function _infer_sizes(
     return sizes
 end
 
-struct _SubexpressionStorage{S<:AbstractVector{Float64}}
+struct _SubexpressionStorage{T<:Real,S<:AbstractVector{T}}
     nodes::Vector{Node}
     adj::SparseArrays.SparseMatrixCSC{Bool,Int}
     sizes::Sizes
-    const_values::Vector{Float64}
+    const_values::Vector{T}
     forward_storage::S
     partials_storage::S
     reverse_storage::S
@@ -452,19 +452,19 @@ struct _SubexpressionStorage{S<:AbstractVector{Float64}}
     function _SubexpressionStorage(
         nodes::Vector{Node},
         adj::SparseArrays.SparseMatrixCSC{Bool,Int},
-        const_values::Vector{Float64},
+        const_values::Vector{T},
         block_shapes::Dict{Int,Vector{Int}},
         partials_storage_ϵ::Vector{Float64},
         linearity::Linearity,
-        ::Type{S} = Vector{Float64},
-    ) where {S<:AbstractVector{Float64}}
+        ::Type{S} = Vector{T},
+    ) where {T<:Real,S<:AbstractVector{T}}
         sizes = _infer_sizes(nodes, adj, block_shapes)
         N = _length(sizes)
         # Pre-load value blocks into forward_storage once at construction;
         # each block is a contiguous-to-contiguous bulk copy. Individual
         # `NODE_VALUE` scalars (rare — exponents, constant divisors, etc) and
         # variable nodes are loaded by `_forward_eval` in the per-node loop.
-        cpu_buffer = zeros(N)
+        cpu_buffer = zeros(T, N)
         for k in 1:length(nodes)
             node = nodes[k]
             if node.type == NODE_VALUE_BLOCK
@@ -475,14 +475,14 @@ struct _SubexpressionStorage{S<:AbstractVector{Float64}}
             end
         end
         forward_storage = convert(S, cpu_buffer)
-        return new{S}(
+        return new{T,S}(
             nodes,
             adj,
             sizes,
             const_values,
             forward_storage,
-            fill!(S(undef, N), 0.0),  # partials_storage,
-            fill!(S(undef, N), 0.0),  # reverse_storage,
+            fill!(S(undef, N), zero(T)),  # partials_storage,
+            fill!(S(undef, N), zero(T)),  # reverse_storage,
             partials_storage_ϵ,
             linearity,
         )
diff --git a/src/types.jl b/src/types.jl
index 589bb92..00f6c31 100644
--- a/src/types.jl
+++ b/src/types.jl
@@ -103,7 +103,7 @@ function _subexpression_and_linearity(
     partials_storage_ϵ::Vector{Float64},
     d,
     ::Type{S} = Vector{Float64},
-) where {S<:AbstractVector{Float64}}
+) where {S<:AbstractVector{<:Real}}
     nodes = _replace_moi_variables(expr.nodes, moi_index_to_consecutive_index)
     adj = adjacency_matrix(nodes)
     linearity = if d.want_hess
@@ -114,7 +114,7 @@ function _subexpression_and_linearity(
     return _SubexpressionStorage(
         nodes,
         adj,
-        convert(Vector{Float64}, expr.values),
+        convert(Vector{eltype(S)}, expr.values),
         copy(expr.block_shapes),
         partials_storage_ϵ,
         linearity[1],
@@ -123,28 +123,28 @@ function _subexpression_and_linearity(
     linearity
 end
 
-struct _FunctionStorage{S<:AbstractVector{Float64}}
-    expr::_SubexpressionStorage{S}
+struct _FunctionStorage{T<:Real,S<:AbstractVector{T}}
+    expr::_SubexpressionStorage{T,S}
     grad_sparsity::Vector{Int}
     # Nonzero pattern of Hessian matrix
     hess_I::Vector{Int}
     hess_J::Vector{Int}
     rinfo::Coloring.RecoveryInfo # coloring info for hessians
-    seed_matrix::Matrix{Float64}
+    seed_matrix::Matrix{T}
     # subexpressions which this function depends on, ordered for forward pass.
     dependent_subexpressions::Vector{Int}
 
     function _FunctionStorage(
-        expr::_SubexpressionStorage{S},
+        expr::_SubexpressionStorage{T,S},
         num_variables,
         coloring_storage::Coloring.IndexedSet,
         want_hess::Bool,
-        subexpressions::Vector{_SubexpressionStorage{S}},
+        subexpressions::Vector{_SubexpressionStorage{T,S}},
         dependent_subexpressions,
         subexpression_edgelist,
         subexpression_variables,
         linearity::Vector{Linearity},
-    ) where {S<:AbstractVector{Float64}}
+    ) where {T<:Real,S<:AbstractVector{T}}
         empty!(coloring_storage)
         _compute_gradient_sparsity!(coloring_storage, expr)
         for k in dependent_subexpressions
@@ -166,7 +166,7 @@ struct _FunctionStorage{S<:AbstractVector{Float64}}
                 coloring_storage,
             )
             seed_matrix = Coloring.seed_matrix(rinfo)
-            return new{S}(
+            return new{T,S}(
                 expr,
                 grad_sparsity,
                 hess_I,
@@ -176,13 +176,13 @@ struct _FunctionStorage{S<:AbstractVector{Float64}}
                 dependent_subexpressions,
             )
         else
-            return new{S}(
+            return new{T,S}(
                 expr,
                 grad_sparsity,
                 Int[],
                 Int[],
                 Coloring.RecoveryInfo(),
-                Array{Float64}(undef, 0, 0),
+                Array{T}(undef, 0, 0),
                 dependent_subexpressions,
             )
         end
@@ -305,30 +305,30 @@ interface.
 !!! warning
     Before using, you must initialize the evaluator using `MOI.initialize`.
 """
-mutable struct NLPEvaluator{S<:AbstractVector{Float64}} <:
+mutable struct NLPEvaluator{T<:Real,S<:AbstractVector{T}} <:
                MOI.AbstractNLPEvaluator
     data::Model
     ordered_variables::Vector{MOI.VariableIndex}
 
-    objective::Union{Nothing,_FunctionStorage{S}}
-    residual::Union{Nothing,_FunctionStorage{S}}
-    constraints::Vector{_FunctionStorage{S}}
-    subexpressions::Vector{_SubexpressionStorage{S}}
+    objective::Union{Nothing,_FunctionStorage{T,S}}
+    residual::Union{Nothing,_FunctionStorage{T,S}}
+    constraints::Vector{_FunctionStorage{T,S}}
+    subexpressions::Vector{_SubexpressionStorage{T,S}}
     subexpression_order::Vector{Int}
     # Storage for the subexpressions in reverse-mode automatic differentiation.
-    subexpression_forward_values::Vector{Float64}
-    subexpression_reverse_values::Vector{Float64}
+    subexpression_forward_values::Vector{T}
+    subexpression_reverse_values::Vector{T}
     subexpression_linearity::Vector{Linearity}
 
     # A cache of the last x. This is used to guide whether we need to re-run
     # reverse-mode automatic differentiation.
-    last_x::Vector{Float64}
+    last_x::Vector{T}
 
     # Temporary storage for computing Jacobians. This is also used as temporary
     # storage for the input of multivariate functions.
-    jac_storage::Vector{Float64}
+    jac_storage::Vector{T}
     # Temporary storage for the gradient of multivariate functions
-    user_output_buffer::Vector{Float64}
+    user_output_buffer::Vector{T}
 
     # storage for computing hessians
     # these Float64 vectors are reinterpreted to hold multiple epsilon components
@@ -343,10 +343,10 @@ mutable struct NLPEvaluator{S<:AbstractVector{Float64}} <:
     hessian_sparsity::Vector{Tuple{Int64,Int64}}
     max_chunk::Int # chunk size for which we've allocated storage
 
-    function NLPEvaluator{S}(
+    function NLPEvaluator{T,S}(
         data::Model,
         ordered_variables::Vector{MOI.VariableIndex},
-    ) where {S<:AbstractVector{Float64}}
-        return new{S}(data, ordered_variables)
+    ) where {T<:Real,S<:AbstractVector{T}}
+        return new{T,S}(data, ordered_variables)
     end
 end
diff --git a/test/JuMP.jl b/test/JuMP.jl
index 3043900..9c44143 100644
--- a/test/JuMP.jl
+++ b/test/JuMP.jl
@@ -164,8 +164,8 @@ function test_parse_moi()
     return
 end
 
-function _eval(model, func, x)
-    mode = ArrayDiff.Mode()
+function _eval(model::JuMP.GenericModel{T}, func, x) where {T}
+    mode = ArrayDiff.Mode{Vector{T}}()
     ad = ArrayDiff.model(mode)
     MOI.Nonlinear.set_objective(ad, JuMP.moi_function(func))
     evaluator = MOI.Nonlinear.Evaluator(
@@ -176,7 +176,7 @@ function _eval(model, func, x)
     MOI.initialize(evaluator, [:Grad])
     val = MOI.eval_objective(evaluator, x)
     g = zero(x)
-    MOI.eval_objective_gradient(evaluator, g, Float64.(collect(1:8)))
+    MOI.eval_objective_gradient(evaluator, g, T.(collect(1:8)))
     MOI.Nonlinear.set_objective(ad, nothing)
     @test isnothing(ad.objective)
     return val, g
@@ -188,18 +188,20 @@ function _test_neural(
     plus::Bool,
     wrap::Bool,
     swap::Bool,
+    T::Type,
 )
     n = 2
-    X = [1.0 0.5; 0.3 0.8]
-    target = [0.5 0.2; 0.1 0.7]
+    X = T[1.0 0.5; 0.3 0.8]
+    target = T[0.5 0.2; 0.1 0.7]
     if wrap
-        X = ArrayDiff.MatrixExpr(:+, Any[X], size(X), false)
-        target = ArrayDiff.MatrixExpr(:+, Any[target], size(target), false)
+        ME = ArrayDiff.GenericMatrixExpr{JuMP.GenericVariableRef{T}}
+        X = ME(:+, Any[X], size(X), false)
+        target = ME(:+, Any[target], size(target), false)
     end
     if plus
         target = -target
     end
-    model = Model()
+    model = GenericModel{T}()
     @variable(model, W1[1:n, 1:n], container = ArrayDiff.ArrayOfVariables)
     @variable(model, W2[1:n, 1:n], container = ArrayDiff.ArrayOfVariables)
     # Use distinct starting values to break symmetry
@@ -238,8 +240,8 @@ function _test_neural(
     else
         loss = sum(E .^ 2)
     end
-    W1_val = [0.3 -0.2; 0.1 0.4]
-    W2_val = [-0.1 0.5; 0.2 -0.3]
+    W1_val = T[0.3 -0.2; 0.1 0.4]
+    W2_val = T[-0.1 0.5; 0.2 -0.3]
     obj, g = _eval(model, loss, [vec(W1_val); vec(W2_val)])
     # Reference computed from the same hand-written forward/reverse formulas
     # as `perf/cuda_vs_pytorch.jl::forward_pass`/`reverse_diff`, adapted to
@@ -247,15 +249,15 @@ function _test_neural(
     # over both `W1` and `W2`). `_eval` evaluates the objective at `xstart`
     # and the gradient at `x = [1, ..., 8]`, so we need the references at the
     # corresponding inputs.
-    X_const = [1.0 0.5; 0.3 0.8]
-    target_const = [0.5 0.2; 0.1 0.7]
+    X_const = T[1.0 0.5; 0.3 0.8]
+    target_const = T[0.5 0.2; 0.1 0.7]
     obj_val = _ref_objective(W1_val, W2_val, X_const, target_const)
     if with_norm
         obj_val = sqrt(obj_val)
     end
     @test obj ≈ obj_val
-    W1_at_grad = reshape([1.0, 2.0, 3.0, 4.0], 2, 2)
-    W2_at_grad = reshape([5.0, 6.0, 7.0, 8.0], 2, 2)
+    W1_at_grad = reshape(T[1.0, 2.0, 3.0, 4.0], 2, 2)
+    W2_at_grad = reshape(T[5.0, 6.0, 7.0, 8.0], 2, 2)
     grad_sumsq = _ref_gradient(W1_at_grad, W2_at_grad, X_const, target_const)
     if with_norm
         # `d/dx ‖E‖₂ = (1/(2‖E‖₂)) · d/dx ‖E‖₂² = grad_sumsq / (2 sqrt(sumsq))`,
@@ -299,7 +301,16 @@ function test_neural()
             @testset "$(plus ? "+" : "-")" for plus in bin
                 @testset "$(wrap ? "wrap" : "nowrap")" for wrap in bin
                     @testset "$(swap ? "swap" : "noswap")" for swap in bin
-                        _test_neural(with_norm, broadcast, plus, wrap, swap)
+                        @testset "$T" for T in [Float64, Float32]
+                            _test_neural(
+                                with_norm,
+                                broadcast,
+                                plus,
+                                wrap,
+                                swap,
+                                T,
+                            )
+                        end
                     end
                 end
             end