Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
198 commits
Select commit Hold shift + click to select a range
2119555
feat(optimizer): add data model — schema, entities, DTOs, converters
mkuchenbecker Apr 3, 2026
3c93d52
fix: address PR review feedback on optimizer data model
mkuchenbecker Apr 3, 2026
d419eb3
feat(optimizer): add repositories and repository tests
mkuchenbecker Apr 6, 2026
7ff3b43
fix: consolidate repo methods — single find with optional filters
mkuchenbecker Apr 6, 2026
f7f6812
feat(optimizer): add REST service layer, controllers, and shared module
mkuchenbecker Apr 6, 2026
ef3260f
fix: update service impl to use consolidated find methods
mkuchenbecker Apr 6, 2026
ac1da01
feat(optimizer): add apps/optimizer shared module with find-only repos
mkuchenbecker Apr 6, 2026
be353ca
Merge mkuchenb/optimizer-1 into optimizer-2
mkuchenbecker Apr 6, 2026
02a5ab3
fix: remove orphan fields from CompleteOperationRequest
mkuchenbecker Apr 6, 2026
5c78c8f
Merge mkuchenb/optimizer-0 into optimizer-1
mkuchenbecker Apr 6, 2026
2ddc445
Merge mkuchenb/optimizer-1 into optimizer-2
mkuchenbecker Apr 6, 2026
01466c7
feat(optimizer): add service-layer integration tests
mkuchenbecker Apr 6, 2026
ff07fde
fix: assert stats history delta values in upsert test
mkuchenbecker Apr 6, 2026
c0802cb
feat(optimizer): add analyzer app — continuous table operation schedu…
mkuchenbecker Apr 7, 2026
63b0768
fix: address PR review feedback on optimizer-3 analyzer
mkuchenbecker Apr 7, 2026
1cbe556
Merge branch 'main' into mkuchenb/optimizer-0
mkuchenbecker Apr 30, 2026
231e1a1
Merge branch 'mkuchenb/optimizer-0' into mkuchenb/optimizer-1
mkuchenbecker Apr 30, 2026
ff448a0
Merge branch 'mkuchenb/optimizer-1' into mkuchenb/optimizer-2
mkuchenbecker Apr 30, 2026
936f3f3
Merge branch 'mkuchenb/optimizer-2' into mkuchenb/optimizer-3
mkuchenbecker Apr 30, 2026
f82d1b3
fix(optimizer): address PR #527 review feedback
mkuchenbecker May 1, 2026
e907a31
Merge branch 'mkuchenb/optimizer-0' into mkuchenb/optimizer-1
mkuchenbecker May 1, 2026
a109f02
fix(optimizer): propagate optimizer-0 renames into repos and tests
mkuchenbecker May 1, 2026
a1ef430
Merge branch 'mkuchenb/optimizer-1' into mkuchenb/optimizer-2
mkuchenbecker May 1, 2026
df01c26
fix(optimizer): propagate optimizer-0 renames into service + controller
mkuchenbecker May 1, 2026
67538f8
Merge branch 'mkuchenb/optimizer-2' into mkuchenb/optimizer-3
mkuchenbecker May 1, 2026
11dd115
fix(optimizer): propagate optimizer-0 renames into apps/optimizer + a…
mkuchenbecker May 1, 2026
027fccd
fix(optimizer): add databaseName + tableName to apps/optimizer histor…
mkuchenbecker May 1, 2026
e0a49da
Merge branch 'mkuchenb/optimizer-1' into mkuchenb/optimizer-2
mkuchenbecker May 1, 2026
4f4158d
Merge branch 'mkuchenb/optimizer-2' into mkuchenb/optimizer-3
mkuchenbecker May 1, 2026
79753f1
fix(optimizer): index table_operations_history on (database_name, tab…
mkuchenbecker May 1, 2026
ae610ae
Merge branch 'mkuchenb/optimizer-0' into mkuchenb/optimizer-1
mkuchenbecker May 1, 2026
85a432f
Merge branch 'mkuchenb/optimizer-1' into mkuchenb/optimizer-2
mkuchenbecker May 1, 2026
dceef97
feat(optimizer): unify REST prefix to /v1/optimizer; add name-based h…
mkuchenbecker May 1, 2026
7170599
Merge branch 'mkuchenb/optimizer-2' into mkuchenb/optimizer-3
mkuchenbecker May 1, 2026
bf04488
fix(optimizer): align apps/optimizer entities with services schema
mkuchenbecker May 12, 2026
f2ac002
Merge branch 'mkuchenb/optimizer-1' into mkuchenb/optimizer-2
mkuchenbecker May 12, 2026
5f6fa3b
Merge branch 'mkuchenb/optimizer-2' into mkuchenb/optimizer-3
mkuchenbecker May 12, 2026
8054586
refactor(optimizer-analyzer): delete unused AnalyzerConfig
mkuchenbecker May 12, 2026
5af5f14
refactor(optimizer-analyzer): remove circuit breaker, defer with TODO
mkuchenbecker May 12, 2026
62f426a
feat(optimizer): add findLatestPerTable to history repo
mkuchenbecker May 12, 2026
e13a31b
Merge branch 'mkuchenb/optimizer-1' into mkuchenb/optimizer-2
mkuchenbecker May 12, 2026
442f1e4
Merge branch 'mkuchenb/optimizer-2' into mkuchenb/optimizer-3
mkuchenbecker May 12, 2026
c4f194a
perf(optimizer-analyzer): use findLatestPerTable for history lookup
mkuchenbecker May 12, 2026
6da624a
refactor(optimizer-analyzer): typed OperationType/Status, polish cade…
mkuchenbecker May 12, 2026
52ba858
refactor(optimizer-analyzer): rename OrphanFilesDeletionAnalyzer → Ca…
mkuchenbecker May 12, 2026
beedad8
fix(optimizer-analyzer): update class name inside renamed files
mkuchenbecker May 12, 2026
3483b25
perf(optimizer): index table_operations_history for findLatestPerTable
mkuchenbecker May 13, 2026
9748548
Merge branch 'mkuchenb/optimizer-1' into mkuchenb/optimizer-2
mkuchenbecker May 13, 2026
e4a1ad1
Merge branch 'mkuchenb/optimizer-2' into mkuchenb/optimizer-3
mkuchenbecker May 13, 2026
f663537
docs(optimizer-analyzer): add scale roadmap as block comment
mkuchenbecker May 13, 2026
d7e3a65
docs(optimizer-analyzer): move scale roadmap to BDP-102182
mkuchenbecker May 13, 2026
0293009
feat(optimizer): add findDistinctDatabaseNames to TableStatsRepository
mkuchenbecker May 13, 2026
0efab45
Merge branch 'mkuchenb/optimizer-1' into mkuchenb/optimizer-2
mkuchenbecker May 13, 2026
6fa885d
refactor(optimizer): Optional<T> for optional filter params in servic…
mkuchenbecker May 13, 2026
a5585f4
Merge branch 'mkuchenb/optimizer-2' into mkuchenb/optimizer-3
mkuchenbecker May 13, 2026
dd4faf2
refactor(optimizer-analyzer): address PR review — required op, per-db…
mkuchenbecker May 13, 2026
91ba362
style(optimizer-analyzer): tighten AnalyzerRunner.analyze body
mkuchenbecker May 13, 2026
eba1392
feat(optimizer): promote internal model types to shared apps/optimizer
mkuchenbecker May 14, 2026
10ed1bb
Merge branch 'mkuchenb/optimizer-1' into mkuchenb/optimizer-2
mkuchenbecker May 14, 2026
35bcd38
Merge branch 'mkuchenb/optimizer-2' into mkuchenb/optimizer-3
mkuchenbecker May 14, 2026
0dbe3d9
refactor(optimizer-analyzer): import shared model + explanatory comment
mkuchenbecker May 14, 2026
e576593
refactor(optimizer): rename apps/optimizer entities + repos to plural…
mkuchenbecker May 14, 2026
9f88a4a
Merge branch 'mkuchenb/optimizer-1' into mkuchenb/optimizer-2
mkuchenbecker May 14, 2026
6f98e1a
refactor(optimizer): consolidate entities/repos into apps/optimizer; …
mkuchenbecker May 14, 2026
d44783f
Merge branch 'mkuchenb/optimizer-2' into mkuchenb/optimizer-3
mkuchenbecker May 14, 2026
d90c26f
refactor(optimizer): move apps/optimizer module into services/optimizer
mkuchenbecker May 14, 2026
62f33b7
Merge branch 'mkuchenb/optimizer-1' into mkuchenb/optimizer-2
mkuchenbecker May 14, 2026
17e280f
refactor(optimizer): drop apps/optimizer-data dep; simplify history API
mkuchenbecker May 14, 2026
a5df7e4
Merge branch 'mkuchenb/optimizer-2' into mkuchenb/optimizer-3
mkuchenbecker May 14, 2026
b0898e3
refactor(optimizer-analyzer): depend on :services:optimizer
mkuchenbecker May 14, 2026
9a129a8
refactor(optimizer): align data model — rename HistoryStatus; String …
mkuchenbecker May 14, 2026
a8978a0
Merge branch 'mkuchenb/optimizer-0' into mkuchenb/optimizer-1
mkuchenbecker May 14, 2026
dfb9102
refactor(optimizer): realign entity shapes with optimizer-0
mkuchenbecker May 14, 2026
e3bf9e1
Merge branch 'mkuchenb/optimizer-1' into mkuchenb/optimizer-2
mkuchenbecker May 14, 2026
fb71bd9
Merge branch 'mkuchenb/optimizer-2' into mkuchenb/optimizer-3
mkuchenbecker May 14, 2026
681407e
feat(optimizer): add internal model layer
mkuchenbecker May 14, 2026
2005bca
Merge branch 'mkuchenb/optimizer-0' into mkuchenb/optimizer-1
mkuchenbecker May 14, 2026
b689969
Merge branch 'mkuchenb/optimizer-1' into mkuchenb/optimizer-2
mkuchenbecker May 14, 2026
7ea8868
Merge branch 'mkuchenb/optimizer-2' into mkuchenb/optimizer-3
mkuchenbecker May 14, 2026
d7767e8
fix(optimizer-analyzer): rewrite AnalyzerRunnerTest to use entity bui…
mkuchenbecker May 14, 2026
e3fb777
perf(optimizer): index table_operations_history for findLatestPerTable
mkuchenbecker May 14, 2026
f89889d
Merge branch 'mkuchenb/optimizer-0' into mkuchenb/optimizer-1
mkuchenbecker May 14, 2026
beaaf88
Merge branch 'mkuchenb/optimizer-1' into mkuchenb/optimizer-2
mkuchenbecker May 14, 2026
4a8796c
Merge branch 'mkuchenb/optimizer-2' into mkuchenb/optimizer-3
mkuchenbecker May 14, 2026
d3e1726
refactor(optimizer): enforce layer boundaries in api/ + model/
mkuchenbecker May 14, 2026
db9513a
Merge branch 'mkuchenb/optimizer-0' into mkuchenb/optimizer-1
mkuchenbecker May 14, 2026
1d469a7
refactor(optimizer): remove db-layer types from optimizer-0
mkuchenbecker May 14, 2026
eee8eca
refactor(optimizer): remove DB schema + schema-init properties
mkuchenbecker May 14, 2026
0567753
Merge branch 'mkuchenb/optimizer-0' into mkuchenb/optimizer-1
mkuchenbecker May 14, 2026
328e5b9
refactor(optimizer): scrub MySQL / JPA / datasource references
mkuchenbecker May 14, 2026
f7a5d20
refactor(optimizer): drop UpsertTableOperationsRequest
mkuchenbecker May 14, 2026
2a532b5
refactor(optimizer): drop JobResult from the wire and internal model
mkuchenbecker May 14, 2026
2e3a231
feat(optimizer): add debug echo fields to CompleteOperationRequest
mkuchenbecker May 14, 2026
db5eb29
refactor(optimizer): move application.properties out of optimizer-0
mkuchenbecker May 14, 2026
bbcf84a
Merge branch 'mkuchenb/optimizer-0' into mkuchenb/optimizer-1
mkuchenbecker May 14, 2026
ac3abc0
feat(optimizer): introduce db/ layer with per-layer types
mkuchenbecker May 14, 2026
e79eec7
refactor(optimizer): split TableStats envelope into snapshot + delta …
mkuchenbecker May 14, 2026
f955ded
fix(optimizer): drop CommitDeltaMetrics from TableStatsRow
mkuchenbecker May 14, 2026
13987c1
Merge branch 'mkuchenb/optimizer-1' into mkuchenb/optimizer-2
mkuchenbecker May 14, 2026
969949d
refactor(optimizer): rewire service layer onto api/model/db mappers
mkuchenbecker May 14, 2026
861b584
feat(optimizer): extend model layer for service-only types
mkuchenbecker May 14, 2026
41d4c6d
Merge branch 'mkuchenb/optimizer-0' into mkuchenb/optimizer-1
mkuchenbecker May 14, 2026
69d9e8f
Merge branch 'mkuchenb/optimizer-1' into mkuchenb/optimizer-2
mkuchenbecker May 14, 2026
b60a3bf
feat(optimizer): extend ModelDbMapper for service-only types
mkuchenbecker May 14, 2026
eb6e3be
Merge branch 'mkuchenb/optimizer-1' into mkuchenb/optimizer-2
mkuchenbecker May 14, 2026
b80b2e5
refactor(optimizer): service layer returns only model/ types
mkuchenbecker May 14, 2026
ef453ca
Merge branch 'mkuchenb/optimizer-2' into mkuchenb/optimizer-3
mkuchenbecker May 14, 2026
ad11533
refactor(optimizer-analyzer): consume model/ types and ModelDbMapper
mkuchenbecker May 14, 2026
25d98aa
feat(optimizer): restore batch CAS methods on TableOperationsRepository
mkuchenbecker May 14, 2026
31fac5b
Merge branch 'mkuchenb/optimizer-1' into mkuchenb/optimizer-2
mkuchenbecker May 14, 2026
51dab67
Merge branch 'mkuchenb/optimizer-2' into mkuchenb/optimizer-3
mkuchenbecker May 14, 2026
188713d
docs(optimizer): comment every field on opt-0 api/ and model/ types
mkuchenbecker May 14, 2026
f060b5e
Merge branch 'mkuchenb/optimizer-0' into mkuchenb/optimizer-1
mkuchenbecker May 14, 2026
1119699
Merge branch 'mkuchenb/optimizer-1' into mkuchenb/optimizer-2
mkuchenbecker May 14, 2026
619df83
Merge branch 'mkuchenb/optimizer-2' into mkuchenb/optimizer-3
mkuchenbecker May 14, 2026
8d64273
refactor(optimizer): remove clusterId from SnapshotMetrics
mkuchenbecker May 14, 2026
ee7bcab
Merge branch 'mkuchenb/optimizer-0' into mkuchenb/optimizer-1
mkuchenbecker May 14, 2026
c1ad246
refactor(optimizer): comment every db/ field; drop clusterId and version
mkuchenbecker May 14, 2026
72b431c
Merge branch 'mkuchenb/optimizer-1' into mkuchenb/optimizer-2
mkuchenbecker May 14, 2026
0b30130
Merge branch 'mkuchenb/optimizer-2' into mkuchenb/optimizer-3
mkuchenbecker May 14, 2026
c72aae8
refactor(optimizer): move api↔model conversion onto api types; delete…
mkuchenbecker May 14, 2026
1fca287
Merge branch 'mkuchenb/optimizer-0' into mkuchenb/optimizer-1
mkuchenbecker May 14, 2026
8ae8777
refactor(optimizer): move model↔db conversion onto model types; delet…
mkuchenbecker May 14, 2026
b3aacff
Merge branch 'mkuchenb/optimizer-1' into mkuchenb/optimizer-2
mkuchenbecker May 14, 2026
bb8aa4d
refactor(optimizer): service + controllers use type to/from methods
mkuchenbecker May 14, 2026
6a23755
Merge branch 'mkuchenb/optimizer-2' into mkuchenb/optimizer-3
mkuchenbecker May 14, 2026
95456be
refactor(optimizer-analyzer): use type to/from methods; drop ModelDbM…
mkuchenbecker May 14, 2026
af23d5e
fix(optimizer): make TableStats self-describing; route DTO conversion…
mkuchenbecker May 15, 2026
3864e42
chore(optimizer): cascade self-describing TableStats from opt-0 to opt-1
mkuchenbecker May 15, 2026
0a1125b
Merge branch 'mkuchenb/optimizer-1' into mkuchenb/optimizer-2
mkuchenbecker May 15, 2026
a6045b5
feat(optimizer): add TableStats↔TableStatsRow conversion on model
mkuchenbecker May 15, 2026
4427de0
Merge branch 'mkuchenb/optimizer-1' into mkuchenb/optimizer-2
mkuchenbecker May 15, 2026
db5921e
refactor(optimizer): service stats methods take/return TableStats, no…
mkuchenbecker May 15, 2026
d82c17f
Merge branch 'mkuchenb/optimizer-2' into mkuchenb/optimizer-3
mkuchenbecker May 15, 2026
3aebf64
chore(optimizer): enable toBuilder on model.Table and model.TableOper…
mkuchenbecker May 15, 2026
bf30f86
chore(optimizer): cascade toBuilder annotations from opt-0 to opt-1
mkuchenbecker May 15, 2026
faba6d7
Merge branch 'mkuchenb/optimizer-1' into mkuchenb/optimizer-2
mkuchenbecker May 15, 2026
1d56fa6
Merge branch 'mkuchenb/optimizer-2' into mkuchenb/optimizer-3
mkuchenbecker May 15, 2026
b6c7f42
refactor(optimizer): drop fileCount enrichment from model.TableOperation
mkuchenbecker May 18, 2026
177af95
Merge branch 'mkuchenb/optimizer-0' into mkuchenb/optimizer-1
mkuchenbecker May 18, 2026
487ac56
Merge branch 'mkuchenb/optimizer-1' into mkuchenb/optimizer-2
mkuchenbecker May 18, 2026
6ffc703
Merge branch 'mkuchenb/optimizer-2' into mkuchenb/optimizer-3
mkuchenbecker May 18, 2026
7f51360
chore(analyzer): add TODOs for scale tests and query-builder migration
mkuchenbecker May 18, 2026
2b06c92
feat(repo): add findClaimedIds for transactional batch-claim verifica…
mkuchenbecker May 18, 2026
5b5aae2
Merge branch 'mkuchenb/optimizer-1' into mkuchenb/optimizer-2
mkuchenbecker May 18, 2026
c862777
Merge branch 'mkuchenb/optimizer-2' into mkuchenb/optimizer-3
mkuchenbecker May 18, 2026
437a0ed
refactor(optimizer): add Dto suffix to all api/model classes (PR #527…
mkuchenbecker May 19, 2026
aabb51c
Merge branch 'mkuchenb/optimizer-0' into mkuchenb/optimizer-1
mkuchenbecker May 19, 2026
928d537
Merge branch 'mkuchenb/optimizer-1' into mkuchenb/optimizer-2
mkuchenbecker May 19, 2026
eedf6d0
refactor(optimizer): update controllers for renamed api/model Dto types
mkuchenbecker May 19, 2026
1166efb
Merge branch 'mkuchenb/optimizer-2' into mkuchenb/optimizer-3
mkuchenbecker May 19, 2026
4f98c22
refactor(optimizer): rename api.model package to api.spec (PR #527 re…
mkuchenbecker May 19, 2026
2c26872
Merge branch 'mkuchenb/optimizer-0' into mkuchenb/optimizer-1
mkuchenbecker May 19, 2026
b849b7d
Merge branch 'mkuchenb/optimizer-1' into mkuchenb/optimizer-2
mkuchenbecker May 19, 2026
231efde
refactor(optimizer): update controller imports for api.model -> api.s…
mkuchenbecker May 19, 2026
a2580b1
Merge branch 'mkuchenb/optimizer-2' into mkuchenb/optimizer-3
mkuchenbecker May 19, 2026
f1c500b
refactor(optimizer): hard-fail in AnalyzerRunner if no analyzer is re…
mkuchenbecker May 19, 2026
f788ab6
fix(analyzer): point @EntityScan at the actual db package
mkuchenbecker May 20, 2026
b31decf
refactor(optimizer): move Dto suffix from api/spec to model
mkuchenbecker May 20, 2026
caf3294
Merge branch 'mkuchenb/optimizer-0' into mkuchenb/optimizer-1
mkuchenbecker May 20, 2026
c6a64bf
Merge branch 'mkuchenb/optimizer-1' into mkuchenb/optimizer-2
mkuchenbecker May 20, 2026
91e89ef
refactor(optimizer): update controller + service refs after Dto suffi…
mkuchenbecker May 20, 2026
8a4251d
Merge branch 'mkuchenb/optimizer-2' into mkuchenb/optimizer-3
mkuchenbecker May 20, 2026
c305aa9
refactor(analyzer): update model refs after Dto suffix swap
mkuchenbecker May 20, 2026
4e86569
feat(optimizer): propagate jobId through model + api conversions
mkuchenbecker May 20, 2026
cc8aa80
Merge branch 'mkuchenb/optimizer-0' into mkuchenb/optimizer-1
mkuchenbecker May 20, 2026
efcceea
feat(optimizer): propagate jobId through model ↔ db conversions
mkuchenbecker May 20, 2026
f85edd5
Merge branch 'mkuchenb/optimizer-1' into mkuchenb/optimizer-2
mkuchenbecker May 20, 2026
c00f201
chore(optimizer): rename OPTIMIZER_DB_USERNAME → OPTIMIZER_DB_USER
mkuchenbecker May 20, 2026
6998a0f
Merge branch 'mkuchenb/optimizer-2' into mkuchenb/optimizer-3
mkuchenbecker May 20, 2026
1fe71f0
refactor(optimizer): rename CompleteOperationRequest → UpdateOperatio…
mkuchenbecker May 20, 2026
fb5e726
Merge branch 'mkuchenb/optimizer-0' into mkuchenb/optimizer-1
mkuchenbecker May 20, 2026
ad0c0f1
Merge branch 'mkuchenb/optimizer-1' into mkuchenb/optimizer-2
mkuchenbecker May 20, 2026
947bedf
refactor(optimizer): rename completeOperation → updateOperation
mkuchenbecker May 20, 2026
ce5745b
Merge branch 'mkuchenb/optimizer-2' into mkuchenb/optimizer-3
mkuchenbecker May 20, 2026
b96c388
Merge remote-tracking branch 'linkedin/main' into mkuchenb/optimizer-1
mkuchenbecker May 20, 2026
d65b511
refactor(optimizer-repo): unify find/updateBatch with Optional params
mkuchenbecker May 21, 2026
78de390
Merge branch 'mkuchenb/optimizer-1' into mkuchenb/optimizer-2
mkuchenbecker May 21, 2026
49e43bc
refactor(optimizer-service): use Optional repo API + configurable limit
mkuchenbecker May 21, 2026
210d5f0
Merge branch 'mkuchenb/optimizer-2' into mkuchenb/optimizer-3
mkuchenbecker May 21, 2026
040046e
refactor(analyzer): switch to Optional repo API + configurable limit
mkuchenbecker May 21, 2026
b69e09a
test(optimizer-repo): truncate Instant to micros for CI precision
mkuchenbecker May 21, 2026
a028a98
Merge branch 'mkuchenb/optimizer-1' into mkuchenb/optimizer-2
mkuchenbecker May 21, 2026
2b6f67c
Merge branch 'mkuchenb/optimizer-2' into mkuchenb/optimizer-3
mkuchenbecker May 21, 2026
6eb6a1e
Merge remote-tracking branch 'linkedin/main' into mkuchenb/optimizer-2
mkuchenbecker May 21, 2026
de39a78
Merge branch 'mkuchenb/optimizer-2' into mkuchenb/optimizer-3
mkuchenbecker May 21, 2026
a89e037
feat(optimizer): require limit on list-API endpoints
mkuchenbecker May 21, 2026
1e361af
feat(optimizer): basic error-code handling across controllers
mkuchenbecker May 22, 2026
a37169d
refactor(optimizer): simplify error handling per PR review
mkuchenbecker May 22, 2026
6416c9d
refactor(optimizer): drop GlobalExceptionHandler + ApiError; use Spri…
mkuchenbecker May 22, 2026
bbef386
refactor(optimizer): revert UpdateOperationRequest doc edits
mkuchenbecker May 22, 2026
02bbc5c
(wip) feat(optimizer): basic error-code handling across controllers (…
mkuchenbecker May 22, 2026
266e2a7
Merge branch 'mkuchenb/optimizer-2' into mkuchenb/optimizer-3
mkuchenbecker May 22, 2026
144da72
Merge remote-tracking branch 'linkedin/mkuchenb/optimizer-2' into mku…
mkuchenbecker May 22, 2026
c74ad5f
Merge branch 'mkuchenb/optimizer-2' into mkuchenb/optimizer-3
mkuchenbecker May 22, 2026
6ef7964
docs(optimizer): add @ApiResponses to controllers for OpenAPI spec
mkuchenbecker May 22, 2026
357d5c2
Merge branch 'mkuchenb/optimizer-2' into mkuchenb/optimizer-3
mkuchenbecker May 22, 2026
08bebcb
Merge commit '78ba0abd' into mkuchenb/optimizer-3
mkuchenbecker May 22, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions apps/optimizer-analyzer/build.gradle
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
plugins {
id 'openhouse.springboot-ext-conventions'
id 'org.springframework.boot' version '2.7.8'
}

dependencies {
implementation project(':services:optimizer')
implementation 'org.springframework.boot:spring-boot-starter:2.7.8'
implementation 'org.springframework.boot:spring-boot-starter-webflux:2.7.8'
implementation 'org.springframework.boot:spring-boot-starter-data-jpa:2.7.8'
implementation 'org.springframework.boot:spring-boot-starter-aop:2.7.8'
runtimeOnly 'mysql:mysql-connector-java:8.0.33'
testImplementation 'org.springframework.boot:spring-boot-starter-test:2.7.8'
testImplementation 'com.squareup.okhttp3:mockwebserver:4.10.0'
testRuntimeOnly 'com.h2database:h2'
}

test {
useJUnitPlatform()
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
package com.linkedin.openhouse.analyzer;

import java.util.List;
import org.springframework.boot.CommandLineRunner;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.boot.autoconfigure.domain.EntityScan;
import org.springframework.context.annotation.Bean;
import org.springframework.data.jpa.repository.config.EnableJpaRepositories;

/** Entry point for the Optimizer Analyzer application. */
@SpringBootApplication
@EntityScan(basePackages = "com.linkedin.openhouse.optimizer.db")
@EnableJpaRepositories(basePackages = "com.linkedin.openhouse.optimizer.repository")
public class AnalyzerApplication {
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the idea to run Analyzer as as service to expose an API to analyze a table, but we have optimizer as a service to expose the required APIs? If needed optimizer can internally trigger Analyzer by exposing /optimizer/analyze API. That way we can expose only one service i.e. optimizer and it APIs. If we are aligned on this, can we make Analyzer as Java application (Similar to JobsScheduler) instead running as Spring based application? We should be able to deploy using k8s cron job and can run at regular cadence (say 30 min or so). If we run as scheduled job we can collect stats on every analyzer execution.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes I see it as an endpoint long term, and even by the time we release V0.

I see it as Interface > Analyzer module > database. Interface today is a CLI kubectl cron, we can add a persistant API in the future though. So today its invoked directly as code via kubectl. An API can host that same functionality (or a CLI for that matter)

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This same comment is true of the scheduler.


public static void main(String[] args) {
SpringApplication.run(AnalyzerApplication.class, args);
}

/**
* Runs the analyzer once per registered {@link OperationAnalyzer} per process invocation. Each
* call is scoped to one operation type; the runner iterates databases internally.
*/
@Bean
public CommandLineRunner run(AnalyzerRunner runner, List<OperationAnalyzer> analyzers) {
return args -> analyzers.forEach(a -> runner.analyze(a.getOperationType()));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
package com.linkedin.openhouse.analyzer;

import com.linkedin.openhouse.optimizer.model.OperationTypeDto;
import com.linkedin.openhouse.optimizer.model.TableDto;
import com.linkedin.openhouse.optimizer.model.TableOperationDto;
import com.linkedin.openhouse.optimizer.model.TableOperationsHistoryDto;
import com.linkedin.openhouse.optimizer.repository.TableOperationsHistoryRepository;
import com.linkedin.openhouse.optimizer.repository.TableOperationsRepository;
import com.linkedin.openhouse.optimizer.repository.TableStatsRepository;
import java.time.Instant;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.stream.Collectors;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.data.domain.PageRequest;
import org.springframework.stereotype.Component;

/**
* Core analysis loop. For one operation type per call, iterates databases and evaluates each table
* in a database against the matching {@link OperationAnalyzer}.
*
* <p>Both sides of the join — current operations and latest history per (table, type) — are loaded
* into maps once per database before the table loop. This is correct at small scale (≤~100k
* tables); past that the per-db query shape and projection need further tuning. Scale-up work is
* tracked in <a href="https://linkedin.atlassian.net/browse/BDP-102182">BDP-102182</a>.
*
* <p>// TODO(scale-test): benchmark the per-db working set at up to 10k tables and measure JVM heap
* residency for the three intermediate maps; per-db iteration bounds memory by tables-per-db rather
* than tables-total, but the upper bound still needs empirical validation.
*/
@Slf4j
@Component
@RequiredArgsConstructor
public class AnalyzerRunner {

private final List<OperationAnalyzer> analyzers;
private final TableStatsRepository statsRepo;
private final TableOperationsRepository operationsRepo;
private final TableOperationsHistoryRepository historyRepo;

@Value("${optimizer.repo.default-limit:10000}")
private int defaultLimit = 10_000;

/**
* Run the analysis loop for {@code operationType} across all databases, with no filters.
* Equivalent to {@link #analyze(OperationTypeDto, Optional, Optional, Optional)} with all-empty
* filters.
*/
public void analyze(OperationTypeDto operationType) {
analyze(operationType, Optional.empty(), Optional.empty(), Optional.empty());
}

/**
* Run the analysis loop for the given operation type, optionally scoped to a single database,
* table name, or table UUID. Iterates databases one at a time so the working set is bounded by
* tables-per-db, not tables-total.
*/
public void analyze(
OperationTypeDto operationType,
Optional<String> databaseName,
Optional<String> tableName,
Optional<String> tableUuid) {
OperationAnalyzer analyzer =
analyzers.stream()
.filter(a -> a.getOperationType() == operationType)
.findFirst()
.orElseThrow(
() ->
new IllegalStateException(
"No analyzer registered for operation type " + operationType));
List<String> dbs = databaseName.map(List::of).orElseGet(statsRepo::findDistinctDatabaseNames);
log.info("Analyzing {} across {} database(s)", operationType, dbs.size());
dbs.forEach(db -> analyzeDatabase(analyzer, db, tableName, tableUuid));
log.info("Analysis complete for {}", operationType);
}

private void analyzeDatabase(
OperationAnalyzer analyzer,
String databaseName,
Optional<String> tableName,
Optional<String> tableUuid) {

com.linkedin.openhouse.optimizer.db.OperationType dbOperationType =
Comment thread
mkuchenbecker marked this conversation as resolved.
analyzer.getOperationType().toDb();

// Pre-load the small sides of the joins — bounded by tables in this database.
PageRequest page = PageRequest.of(0, defaultLimit);
Map<String, TableOperationDto> currentOps =
operationsRepo
.find(
Optional.of(dbOperationType),
Optional.empty(),
tableUuid,
Optional.of(databaseName),
tableName,
Optional.empty(),
Optional.empty(),
page)
.stream()
.filter(e -> e.getTableUuid() != null)
.map(TableOperationDto::fromRow)
.collect(
Collectors.toMap(
TableOperationDto::getTableUuid, op -> op, TableOperationDto::mostRecent));

Map<String, TableOperationsHistoryDto> latestHistory =
historyRepo.findLatest(dbOperationType, page).stream()
.filter(r -> r.getTableUuid() != null)
.map(TableOperationsHistoryDto::fromRow)
.collect(
Collectors.toMap(
TableOperationsHistoryDto::getTableUuid,
h -> h,
AnalyzerRunner::moreRecentHistory));

List<TableDto> tables =
statsRepo.find(Optional.of(databaseName), tableName, tableUuid, page).stream()
.filter(row -> row.getTableUuid() != null)
.map(TableDto::fromRow)
.collect(Collectors.toList());

/*
* For each table in this database, decide whether to create a new PENDING operation.
*
* 1. Skip tables not opted in to this operation type. The opt-in check today reads a
* table-property flag; in the future it will read a denormalized column.
* 2. Look up the table's current active operation (if any) and its most recent completed
* history entry from the maps loaded above.
* 3. Delegate the schedule-or-not decision to the analyzer's shouldSchedule — strategy
* encapsulates cadence, retry policy, and any future per-operation signals.
* 4. On true, persist a new PENDING operation. The scheduler picks it up on its next pass.
*/
tables.forEach(
Comment thread
mkuchenbecker marked this conversation as resolved.
table -> {
if (!analyzer.isEnabled(table)) {
return;
}
Optional<TableOperationDto> currentOp =
Optional.ofNullable(currentOps.get(table.getTableUuid()));
Optional<TableOperationsHistoryDto> entry =
Optional.ofNullable(latestHistory.get(table.getTableUuid()));
if (analyzer.shouldSchedule(table, currentOp, entry)) {
TableOperationDto op = TableOperationDto.pending(table, analyzer.getOperationType());
operationsRepo.save(op.toRow());
log.info(
"Created PENDING {} operation for table {}.{}",
analyzer.getOperationType(),
table.getDatabaseName(),
table.getTableId());
}
});
}

private static TableOperationsHistoryDto moreRecentHistory(
TableOperationsHistoryDto a, TableOperationsHistoryDto b) {
Comparator<TableOperationsHistoryDto> byCompletedAt =
Comparator.comparing(r -> r.getCompletedAt() != null ? r.getCompletedAt() : Instant.EPOCH);
return byCompletedAt.compare(a, b) >= 0 ? a : b;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
package com.linkedin.openhouse.analyzer;

import com.linkedin.openhouse.optimizer.model.OperationTypeDto;
import com.linkedin.openhouse.optimizer.model.TableDto;
import com.linkedin.openhouse.optimizer.model.TableOperationDto;
import com.linkedin.openhouse.optimizer.model.TableOperationsHistoryDto;
import java.time.Duration;
import java.util.Optional;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;

/** Analyzer for the {@link OperationTypeDto#ORPHAN_FILES_DELETION} operation type. */
@Component
public class CadenceBasedOrphanFilesDeletionAnalyzer implements OperationAnalyzer {

static final String OFD_ENABLED_PROPERTY = "maintenance.optimizer.ofd.enabled";

private final CadencePolicy cadencePolicy;

@Autowired
public CadenceBasedOrphanFilesDeletionAnalyzer(
@Value("${ofd.success-retry-hours:24}") long successRetryHours,
@Value("${ofd.failure-retry-hours:1}") long failureRetryHours) {
this.cadencePolicy =
new CadencePolicy(Duration.ofHours(successRetryHours), Duration.ofHours(failureRetryHours));
}

/** Package-private for tests that supply a pre-built {@link CadencePolicy}. */
CadenceBasedOrphanFilesDeletionAnalyzer(CadencePolicy cadencePolicy) {
this.cadencePolicy = cadencePolicy;
}

@Override
public OperationTypeDto getOperationType() {
return OperationTypeDto.ORPHAN_FILES_DELETION;
}

@Override
public boolean isEnabled(TableDto table) {
return "true".equals(table.getTableProperties().get(OFD_ENABLED_PROPERTY));
}

@Override
public boolean shouldSchedule(
TableDto table,
Optional<TableOperationDto> currentOp,
Optional<TableOperationsHistoryDto> latestHistory) {
return cadencePolicy.shouldSchedule(currentOp, latestHistory);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
package com.linkedin.openhouse.analyzer;

import com.linkedin.openhouse.optimizer.model.HistoryStatusDto;
import com.linkedin.openhouse.optimizer.model.OperationStatusDto;
import com.linkedin.openhouse.optimizer.model.TableOperationDto;
import com.linkedin.openhouse.optimizer.model.TableOperationsHistoryDto;
import java.time.Duration;
import java.time.Instant;
import java.util.Optional;
import lombok.RequiredArgsConstructor;

/**
* Time-based scheduling policy. An analyzer delegates to {@link CadencePolicy} to decide whether to
* re-issue a recommendation for a table.
*
* <p>The analyzer stays out of any table that already has a non-CANCELED active operation — those
* belong to the scheduler. For tables with no active operation (or only a CANCELED one), the
* decision is based on the most recent completed-history entry: re-evaluate after {@code
* successRetryInterval} on success, or after {@code failureRetryInterval} on failure.
*/
@RequiredArgsConstructor
public class CadencePolicy {

/**
* How long to wait after a successful operation before re-evaluating the table. For example, if
* set to 24 hours and OFD succeeded at 10:00 AM Monday, the table won't be scheduled again until
* after 10:00 AM Tuesday.
*/
private final Duration successRetryInterval;

/**
* How long to wait after a failed operation before retrying. Shorter than the success interval to
* allow quick recovery. For example, if set to 1 hour and OFD failed at 2:00 PM, the table
* becomes eligible for retry at 3:00 PM.
*/
private final Duration failureRetryInterval;

/**
* Returns {@code true} if a new or refreshed operation record should be upserted.
*
* @param currentOp the existing active operation record, or empty if none exists
* @param latestHistory the most recent history entry for this (table, type), or empty
*/
public boolean shouldSchedule(
Optional<TableOperationDto> currentOp, Optional<TableOperationsHistoryDto> latestHistory) {
if (currentOp.isPresent() && currentOp.get().getStatus() != OperationStatusDto.CANCELED) {
return false;
}
return latestHistory.map(this::readyAfterHistoryEntry).orElse(true);
}

private boolean readyAfterHistoryEntry(TableOperationsHistoryDto entry) {
Duration interval =
entry.getStatus() == HistoryStatusDto.FAILED ? failureRetryInterval : successRetryInterval;
return Duration.between(entry.getCompletedAt(), Instant.now()).compareTo(interval) > 0;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
package com.linkedin.openhouse.analyzer;

import com.linkedin.openhouse.optimizer.model.OperationTypeDto;
import com.linkedin.openhouse.optimizer.model.TableDto;
import com.linkedin.openhouse.optimizer.model.TableOperationDto;
import com.linkedin.openhouse.optimizer.model.TableOperationsHistoryDto;
import java.util.Optional;

/**
* Strategy interface for a single operation type. Each implementation decides whether a given table
* needs an operation recommendation upserted in the Optimizer Service.
*
* <p>// TODO(circuit-breaker): a chronically-failing table currently produces a new PENDING row on
* every Analyzer pass. Add a circuit breaker that suppresses scheduling for a (table, type) after N
* consecutive FAILED history entries. Requirements: configurable threshold per operation type,
* automatic reset via exponential backoff so tables can recover, and an operator-visible signal
* (metric or query) so tripped breakers are diagnosable.
*/
public interface OperationAnalyzer {

/** The operation type this analyzer handles. */
OperationTypeDto getOperationType();

/**
* Returns {@code true} if this operation is opted-in for the given table. Tables that return
* {@code false} are skipped entirely — no upsert is issued.
*/
boolean isEnabled(TableDto table);

/**
* Returns {@code true} if a new or refreshed operation record should be upserted.
*
* @param table the table entry
* @param currentOp the existing active operation record, or empty if none exists
* @param latestHistory the most recent history entry for this (table, type), or empty
*/
boolean shouldSchedule(
TableDto table,
Optional<TableOperationDto> currentOp,
Optional<TableOperationsHistoryDto> latestHistory);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
spring.application.name=openhouse-optimizer-analyzer
spring.main.web-application-type=none
spring.datasource.url=${OPTIMIZER_DB_URL:jdbc:h2:mem:analyzerdb;DB_CLOSE_DELAY=-1;MODE=MySQL}
spring.datasource.username=${OPTIMIZER_DB_USER:sa}
spring.datasource.password=${OPTIMIZER_DB_PASSWORD:}
spring.jpa.hibernate.ddl-auto=none
ofd.success-retry-hours=24
ofd.failure-retry-hours=1
optimizer.repo.default-limit=10000
Loading