Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .github/workflows/cmake.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ jobs:
steps:
- uses: actions/checkout@v3

- name: Install BLAS (Ubuntu)
if: runner.os == 'Linux'
run: sudo apt-get update && sudo apt-get install -y libopenblas-dev

- name: Configure CMake
run: cmake -B build -S . -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} -DCMAKE_VERBOSE_MAKEFILE=ON

Expand Down
4 changes: 4 additions & 0 deletions .github/workflows/sanitizer.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@ jobs:
steps:
- uses: actions/checkout@v5

- name: Install BLAS (Ubuntu)
if: runner.os == 'Linux'
run: sudo apt-get update && sudo apt-get install -y libopenblas-dev

# Configure CMake with ASan + UBSan
- name: Configure with sanitizers
run: cmake -B build -S . \
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/valgrind.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:
- name: Install dependencies
run: |
sudo apt-get update
sudo apt-get install -y valgrind
sudo apt-get install -y valgrind libopenblas-dev

- name: Configure Debug build
run: cmake -B build -S . -DCMAKE_BUILD_TYPE=Debug
Expand Down
12 changes: 12 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,10 @@ include_directories(${PROJECT_SOURCE_DIR}/include)
# Source files - automatically gather all .c files from src/
file(GLOB_RECURSE SOURCES "src/*.c")

# Exclude BLAS-dependent files on Windows (no BLAS available)
if(MSVC)
list(FILTER SOURCES EXCLUDE REGEX "dense_left_matmul\\.c$|dense_right_matmul\\.c$")
endif()

# Create core library
add_library(dnlp_diff ${SOURCES})
Expand All @@ -53,6 +57,14 @@ if(NOT MSVC)
target_link_libraries(dnlp_diff m)
endif()

# Link BLAS (needed for dense_left_matmul / dense_right_matmul)
if(APPLE)
target_link_libraries(dnlp_diff "-framework Accelerate")
elseif(NOT MSVC)
find_package(BLAS REQUIRED)
target_link_libraries(dnlp_diff ${BLAS_LIBRARIES})
endif()

# Config-specific compile options (compiler-specific)
if(MSVC)
target_compile_options(dnlp_diff PRIVATE
Expand Down
8 changes: 8 additions & 0 deletions include/bivariate.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,14 @@ expr *new_left_matmul(expr *u, const CSR_Matrix *A);
/* Right matrix multiplication: f(x) @ A where A is a constant matrix */
expr *new_right_matmul(expr *u, const CSR_Matrix *A);

/* Dense left matrix multiplication: A @ f(x) where A is a dense row-major matrix.
Uses BLAS for all operations — much faster than sparse path for dense A. */
expr *new_dense_left_matmul(expr *child, const double *A_dense, int m, int n);

/* Dense right matrix multiplication: f(x) @ A where A is a dense row-major matrix.
Implemented as (A^T @ f(x)^T)^T using dense_left_matmul. */
expr *new_dense_right_matmul(expr *u, const double *A_dense, int m, int n);

/* Constant scalar multiplication: a * f(x) where a is a constant double */
expr *new_const_scalar_mult(double a, expr *child);

Expand Down
19 changes: 19 additions & 0 deletions include/subexpr.h
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,25 @@ typedef struct left_matmul_expr
int *csc_to_csr_workspace;
} left_matmul_expr;

/* Dense left matrix multiplication: y = A * f(x) where A is a dense matrix.
* Performance-optimized variant that uses BLAS (cblas_dgemv/dgemm) instead of
* sparse operations. Avoids the per-row overlap checks in sparsity detection and
* sparse dot products in value computation. */
typedef struct dense_left_matmul_expr
{
expr base;
double *A_dense; /* row-major m×n */
double *AT_dense; /* row-major n×m (for wsum_hess) */
int m, n, n_blocks;
CSC_Matrix *Jchild_CSC;
CSC_Matrix *J_CSC;
int *csc_to_csr_workspace;
double *J_block_work; /* n × BATCH col-major workspace for dgemm */
double *C_block_work; /* m × BATCH col-major workspace for dgemm */
int *col_work; /* 2 × n_vars ints for column indices/offsets */
bool affine_cached; /* true if Jacobian was pre-computed in init */
} dense_left_matmul_expr;

/* Right matrix multiplication: y = f(x) * A where f(x) is an expression.
* f(x) has shape p x n, A has shape n x q, output y has shape p x q.
* Uses vec(y) = B * vec(f(x)) where B = A^T kron I_p. */
Expand Down
28 changes: 28 additions & 0 deletions include/utils/blas_compat.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
/*
* Copyright 2026 Daniel Cederberg and William Zhang
*
* This file is part of the DNLP-differentiation-engine project.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef BLAS_COMPAT_H
#define BLAS_COMPAT_H

#ifdef __APPLE__
#define ACCELERATE_NEW_LAPACK
#include <Accelerate/Accelerate.h>
#else
#include <cblas.h>
#endif

#endif /* BLAS_COMPAT_H */
Loading