Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
2119555
feat(optimizer): add data model — schema, entities, DTOs, converters
mkuchenbecker Apr 3, 2026
3c93d52
fix: address PR review feedback on optimizer data model
mkuchenbecker Apr 3, 2026
02a5ab3
fix: remove orphan fields from CompleteOperationRequest
mkuchenbecker Apr 6, 2026
1cbe556
Merge branch 'main' into mkuchenb/optimizer-0
mkuchenbecker Apr 30, 2026
f82d1b3
fix(optimizer): address PR #527 review feedback
mkuchenbecker May 1, 2026
79753f1
fix(optimizer): index table_operations_history on (database_name, tab…
mkuchenbecker May 1, 2026
9a129a8
refactor(optimizer): align data model — rename HistoryStatus; String …
mkuchenbecker May 14, 2026
681407e
feat(optimizer): add internal model layer
mkuchenbecker May 14, 2026
e3fb777
perf(optimizer): index table_operations_history for findLatestPerTable
mkuchenbecker May 14, 2026
d3e1726
refactor(optimizer): enforce layer boundaries in api/ + model/
mkuchenbecker May 14, 2026
1d469a7
refactor(optimizer): remove db-layer types from optimizer-0
mkuchenbecker May 14, 2026
eee8eca
refactor(optimizer): remove DB schema + schema-init properties
mkuchenbecker May 14, 2026
328e5b9
refactor(optimizer): scrub MySQL / JPA / datasource references
mkuchenbecker May 14, 2026
f7a5d20
refactor(optimizer): drop UpsertTableOperationsRequest
mkuchenbecker May 14, 2026
2a532b5
refactor(optimizer): drop JobResult from the wire and internal model
mkuchenbecker May 14, 2026
2e3a231
feat(optimizer): add debug echo fields to CompleteOperationRequest
mkuchenbecker May 14, 2026
db5eb29
refactor(optimizer): move application.properties out of optimizer-0
mkuchenbecker May 14, 2026
861b584
feat(optimizer): extend model layer for service-only types
mkuchenbecker May 14, 2026
188713d
docs(optimizer): comment every field on opt-0 api/ and model/ types
mkuchenbecker May 14, 2026
8d64273
refactor(optimizer): remove clusterId from SnapshotMetrics
mkuchenbecker May 14, 2026
c72aae8
refactor(optimizer): move api↔model conversion onto api types; delete…
mkuchenbecker May 14, 2026
af23d5e
fix(optimizer): make TableStats self-describing; route DTO conversion…
mkuchenbecker May 15, 2026
3aebf64
chore(optimizer): enable toBuilder on model.Table and model.TableOper…
mkuchenbecker May 15, 2026
b6c7f42
refactor(optimizer): drop fileCount enrichment from model.TableOperation
mkuchenbecker May 18, 2026
437a0ed
refactor(optimizer): add Dto suffix to all api/model classes (PR #527…
mkuchenbecker May 19, 2026
4f98c22
refactor(optimizer): rename api.model package to api.spec (PR #527 re…
mkuchenbecker May 19, 2026
b31decf
refactor(optimizer): move Dto suffix from api/spec to model
mkuchenbecker May 20, 2026
4e86569
feat(optimizer): propagate jobId through model + api conversions
mkuchenbecker May 20, 2026
1fe71f0
refactor(optimizer): rename CompleteOperationRequest → UpdateOperatio…
mkuchenbecker May 20, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ tasks.register('CopyGitHooksTask', Copy) {
// tables-service.Dockerfile -> :services:tables:bootJar
// housetables-service.Dockerfile -> :services:housetables:bootJar
// jobs-service.Dockerfile -> :services:jobs:bootJar
// optimizer-service.Dockerfile -> :services:optimizer:bootJar
// jobs-scheduler.Dockerfile -> :apps:openhouse-spark-apps_2.12:shadowJar (uber JAR)
// spark-base-hadoop2.8.dockerfile ->
// :integrations:spark:spark-3.1:openhouse-spark-runtime_2.12:shadowJar (uber JAR)
Expand All @@ -196,6 +197,7 @@ tasks.register('dockerPrereqs') {
dependsOn ':services:tables:bootJar'
dependsOn ':services:housetables:bootJar'
dependsOn ':services:jobs:bootJar'
dependsOn ':services:optimizer:bootJar'

// Spark runtime uber JARs (shadowJar)
dependsOn ':integrations:spark:spark-3.1:openhouse-spark-runtime_2.12:shadowJar'
Expand All @@ -219,6 +221,7 @@ tasks.register('dockerPrereqs') {
println ' build/tables/libs/tables.jar'
println ' build/housetables/libs/housetables.jar'
println ' build/jobs/libs/jobs.jar'
println ' build/optimizer/libs/optimizer.jar'
println ' build/openhouse-spark-runtime_2.12/libs/openhouse-spark-runtime_2.12-uber.jar'
println ' build/openhouse-spark-3.5-runtime_2.12/libs/openhouse-spark-3.5-runtime_2.12-uber.jar'
println ' build/openhouse-spark-apps_2.12/libs/openhouse-spark-apps_2.12-uber.jar'
Expand Down
13 changes: 13 additions & 0 deletions services/optimizer/build.gradle
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
plugins {
id 'openhouse.springboot-ext-conventions'
id 'org.springframework.boot' version '2.7.8'
}

dependencies {
implementation 'org.springframework.boot:spring-boot-starter-web:2.7.8'
testImplementation 'org.springframework.boot:spring-boot-starter-test:2.7.8'
}

test {
useJUnitPlatform()
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
package com.linkedin.openhouse.optimizer;

import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;

/** Spring Boot entry point for the Optimizer Service. */
@SpringBootApplication
public class OptimizerServiceApplication {

public static void main(String[] args) {
SpringApplication.run(OptimizerServiceApplication.class, args);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
package com.linkedin.openhouse.optimizer.api.spec;

/** Terminal states for a completed Spark maintenance job. */
public enum HistoryStatus {

/** The Spark job for this operation completed successfully. */
SUCCESS,
Comment thread
mkuchenbecker marked this conversation as resolved.

/** The Spark job for this operation failed. */
FAILED;

/** Convert to the internal-model counterpart. */
public com.linkedin.openhouse.optimizer.model.HistoryStatusDto toModel() {
return com.linkedin.openhouse.optimizer.model.HistoryStatusDto.valueOf(name());
}

/** Build the api-layer enum from the internal-model counterpart. */
public static HistoryStatus fromModel(com.linkedin.openhouse.optimizer.model.HistoryStatusDto v) {
return v == null ? null : HistoryStatus.valueOf(v.name());
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
package com.linkedin.openhouse.optimizer.api.spec;

/** Lifecycle states for a table operation recommendation. */
public enum OperationStatus {

/** Recommended by the Analyzer but not yet claimed by the Scheduler. */
PENDING,

/** Claimed by the Scheduler; waiting for the Jobs Service to return a job ID. */
SCHEDULING,

/** Job submitted to the Jobs Service; the row now carries a {@code jobId}. */
SCHEDULED,

/**
* Marked by the Scheduler when it detects duplicate PENDING rows for the same {@code (table_uuid,
* operation_type)}. Only the most-recent PENDING row is claimed; older duplicates are CANCELED
* before the claim step.
*/
CANCELED;

/** Convert to the internal-model counterpart. */
public com.linkedin.openhouse.optimizer.model.OperationStatusDto toModel() {
return com.linkedin.openhouse.optimizer.model.OperationStatusDto.valueOf(name());
}

/** Build the api-layer enum from the internal-model counterpart. */
public static OperationStatus fromModel(
com.linkedin.openhouse.optimizer.model.OperationStatusDto v) {
return v == null ? null : OperationStatus.valueOf(v.name());
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
package com.linkedin.openhouse.optimizer.api.spec;

/** Maintenance operation types supported by the continuous optimizer. */
public enum OperationType {
/** Removes orphaned data files no longer referenced by table metadata. */
ORPHAN_FILES_DELETION;
Comment thread
mkuchenbecker marked this conversation as resolved.

/** Convert to the internal-model counterpart. */
public com.linkedin.openhouse.optimizer.model.OperationTypeDto toModel() {
return com.linkedin.openhouse.optimizer.model.OperationTypeDto.valueOf(name());
}

/** Build the api-layer enum from the internal-model counterpart. */
public static OperationType fromModel(com.linkedin.openhouse.optimizer.model.OperationTypeDto v) {
return v == null ? null : OperationType.valueOf(v.name());
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
package com.linkedin.openhouse.optimizer.api.spec;

import com.linkedin.openhouse.optimizer.model.TableOperationDto;
import java.time.Instant;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;

/** DTO for {@code table_operations} — Analyzer recommendations read by the Scheduler. */
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
public class TableOperations {

/** Client-generated UUID identifying this specific operation recommendation. */
private String id;

/** Stable table identity from the Tables Service. */
private String tableUuid;

/** Denormalized database name for display; not part of the primary key. */
private String databaseName;

/** Denormalized table name for display; not part of the primary key. */
private String tableName;

/** The type of maintenance operation (e.g. ORPHAN_FILES_DELETION). */
private OperationType operationType;

/** {@code PENDING} or {@code SCHEDULED}. Defaults to {@code PENDING} on creation. */
private OperationStatus status;

/** Server-set when the row is first created by the Analyzer. */
private Instant createdAt;

/** Set by the Scheduler when claiming; {@code null} while PENDING. */
private Instant scheduledAt;

/** Job ID returned by the Jobs Service after successful submission. */
private String jobId;

/** Convert to the internal-model counterpart. */
public TableOperationDto toModel() {
return TableOperationDto.builder()
.id(id)
.tableUuid(tableUuid)
.databaseName(databaseName)
.tableName(tableName)
.operationType(operationType == null ? null : operationType.toModel())
.status(status == null ? null : status.toModel())
.createdAt(createdAt)
.scheduledAt(scheduledAt)
.jobId(jobId)
.build();
}

/** Build a wire DTO from the internal-model counterpart. */
public static TableOperations fromModel(TableOperationDto op) {
if (op == null) {
return null;
}
return TableOperations.builder()
.id(op.getId())
.tableUuid(op.getTableUuid())
.databaseName(op.getDatabaseName())
.tableName(op.getTableName())
.operationType(OperationType.fromModel(op.getOperationType()))
.status(OperationStatus.fromModel(op.getStatus()))
.createdAt(op.getCreatedAt())
.scheduledAt(op.getScheduledAt())
.jobId(op.getJobId())
.build();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
package com.linkedin.openhouse.optimizer.api.spec;

import com.linkedin.openhouse.optimizer.model.TableOperationsHistoryDto;
import java.time.Instant;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;

/** DTO for {@code table_operations_history} — append-only operation results. */
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
public class TableOperationsHistory {

/** Same UUID as the originating {@code table_operations.id}; supplied by the caller. */
private String id;

/** Stable table identity from the Tables Service. */
private String tableUuid;

/** Denormalized database name for display. */
private String databaseName;

/** Denormalized table name for display. */
private String tableName;

/** The type of maintenance operation this history row records. */
private OperationType operationType;

/** When the operation completed, as recorded by the complete endpoint. */
private Instant completedAt;

/** {@code SUCCESS} or {@code FAILED}. */
private HistoryStatus status;

/** Convert to the internal-model counterpart. */
public TableOperationsHistoryDto toModel() {
return TableOperationsHistoryDto.builder()
.id(id)
.tableUuid(tableUuid)
.databaseName(databaseName)
.tableName(tableName)
.operationType(operationType == null ? null : operationType.toModel())
.completedAt(completedAt)
.status(status == null ? null : status.toModel())
.build();
}

/** Build a wire DTO from the internal-model counterpart. */
public static TableOperationsHistory fromModel(TableOperationsHistoryDto h) {
if (h == null) {
return null;
}
return TableOperationsHistory.builder()
.id(h.getId())
.tableUuid(h.getTableUuid())
.databaseName(h.getDatabaseName())
.tableName(h.getTableName())
.operationType(OperationType.fromModel(h.getOperationType()))
.completedAt(h.getCompletedAt())
.status(HistoryStatus.fromModel(h.getStatus()))
.build();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
package com.linkedin.openhouse.optimizer.api.spec;

import java.time.Instant;
import java.util.Collections;
import java.util.Map;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;

/** DTO for {@code table_stats} — used for response payloads. */
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
public class TableStats {

/** Stable Iceberg table UUID. Primary key of the stats row. */
private String tableUuid;

/** Denormalized database name for display. */
private String databaseName;

/** Denormalized table name for display. */
private String tableName;

/** Combined snapshot + delta stats payload, stored as JSON. */
private TableStatsPayload stats;

/** Current table properties snapshot (e.g. maintenance opt-in flags). */
private Map<String, String> tableProperties;

/** When this row was last written. Used for staleness monitoring. */
private Instant updatedAt;
Comment thread
mkuchenbecker marked this conversation as resolved.

/** Convert to the internal-model counterpart. */
public com.linkedin.openhouse.optimizer.model.TableStatsDto toModel() {
com.linkedin.openhouse.optimizer.model.TableStatsDto payload =
stats == null
? new com.linkedin.openhouse.optimizer.model.TableStatsDto()
: stats.toModel();
return payload
.toBuilder()
.tableUuid(tableUuid)
.databaseName(databaseName)
.tableName(tableName)
.tableProperties(tableProperties != null ? tableProperties : Collections.emptyMap())
.updatedAt(updatedAt)
.build();
}

/** Build a wire DTO from the internal-model counterpart. */
public static TableStats fromModel(com.linkedin.openhouse.optimizer.model.TableStatsDto m) {
if (m == null) {
return null;
}
return TableStats.builder()
.tableUuid(m.getTableUuid())
.databaseName(m.getDatabaseName())
.tableName(m.getTableName())
.stats(
TableStatsPayload.builder()
.snapshot(TableStatsPayload.SnapshotMetricsDto.fromModel(m.getSnapshot()))
.delta(TableStatsPayload.CommitDeltaDto.fromModel(m.getDelta()))
.build())
.tableProperties(m.getTableProperties())
.updatedAt(m.getUpdatedAt())
.build();
}
}
Loading