From 2119555c2fe7748f704dfdf245fb32921349ba52 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Fri, 3 Apr 2026 11:00:58 -0700 Subject: [PATCH 01/28] =?UTF-8?q?feat(optimizer):=20add=20data=20model=20?= =?UTF-8?q?=E2=80=94=20schema,=20entities,=20DTOs,=20converters?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduces the optimizer service module with: - MySQL/H2 schema for table_operations, table_stats, table_stats_history, and table_operations_history - JPA entities with JSON column support (vladmihalcea hibernate-types) - All model/DTO/enum types: OperationType, OperationStatus, TableStats, CompleteOperationRequest, JobResult, OperationMetrics, etc. - JPA AttributeConverters for JobResult and OperationMetrics JSON columns - MapStruct mapper (OptimizerMapper) for entity→DTO conversion - Spring Boot application shell and build wiring (settings.gradle, build.gradle dockerPrereqs) No repositories, controllers, or service layer yet — those follow in subsequent PRs. Co-Authored-By: Claude Opus 4.6 --- build.gradle | 3 + services/optimizer/build.gradle | 17 ++++ .../OptimizerServiceApplication.java | 13 +++ .../optimizer/api/mapper/OptimizerMapper.java | 32 ++++++ .../api/model/CompleteOperationRequest.java | 31 ++++++ .../optimizer/api/model/JobResult.java | 25 +++++ .../api/model/OperationHistoryStatus.java | 7 ++ .../optimizer/api/model/OperationMetrics.java | 24 +++++ .../optimizer/api/model/OperationStatus.java | 21 ++++ .../optimizer/api/model/OperationType.java | 12 +++ .../api/model/TableOperationsDto.java | 40 ++++++++ .../api/model/TableOperationsHistoryDto.java | 43 ++++++++ .../optimizer/api/model/TableStats.java | 48 +++++++++ .../optimizer/api/model/TableStatsDto.java | 23 +++++ .../api/model/TableStatsHistoryDto.java | 22 +++++ .../model/UpsertTableOperationsRequest.java | 26 +++++ .../api/model/UpsertTableStatsRequest.java | 25 +++++ .../optimizer/config/JobResultConverter.java | 39 ++++++++ .../config/OperationMetricsConverter.java | 44 +++++++++ .../entity/TableOperationsHistoryRow.java | 91 +++++++++++++++++ .../optimizer/entity/TableOperationsRow.java | 99 +++++++++++++++++++ .../entity/TableStatsHistoryRow.java | 64 ++++++++++++ .../optimizer/entity/TableStatsRow.java | 57 +++++++++++ .../optimizer/entity/package-info.java | 2 + .../src/main/resources/application.properties | 20 ++++ .../main/resources/db/optimizer-schema.sql | 53 ++++++++++ .../resources/application-test.properties | 12 +++ settings.gradle | 1 + 28 files changed, 894 insertions(+) create mode 100644 services/optimizer/build.gradle create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/OptimizerServiceApplication.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/mapper/OptimizerMapper.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/JobResult.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationHistoryStatus.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationMetrics.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationStatus.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationType.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableOperationsRequest.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/JobResultConverter.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/OperationMetricsConverter.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/package-info.java create mode 100644 services/optimizer/src/main/resources/application.properties create mode 100644 services/optimizer/src/main/resources/db/optimizer-schema.sql create mode 100644 services/optimizer/src/test/resources/application-test.properties diff --git a/build.gradle b/build.gradle index 4699ca592..4cfac4a5d 100644 --- a/build.gradle +++ b/build.gradle @@ -157,6 +157,7 @@ tasks.register('CopyGitHooksTask', Copy) { // tables-service.Dockerfile -> :services:tables:bootJar // housetables-service.Dockerfile -> :services:housetables:bootJar // jobs-service.Dockerfile -> :services:jobs:bootJar +// optimizer-service.Dockerfile -> :services:optimizer:bootJar // jobs-scheduler.Dockerfile -> :apps:openhouse-spark-apps_2.12:shadowJar (uber JAR) // spark-base-hadoop2.8.dockerfile -> // :integrations:spark:spark-3.1:openhouse-spark-runtime_2.12:shadowJar (uber JAR) @@ -176,6 +177,7 @@ tasks.register('dockerPrereqs') { dependsOn ':services:tables:bootJar' dependsOn ':services:housetables:bootJar' dependsOn ':services:jobs:bootJar' + dependsOn ':services:optimizer:bootJar' // Spark runtime uber JARs (shadowJar) dependsOn ':integrations:spark:spark-3.1:openhouse-spark-runtime_2.12:shadowJar' @@ -196,6 +198,7 @@ tasks.register('dockerPrereqs') { println ' build/tables/libs/tables.jar' println ' build/housetables/libs/housetables.jar' println ' build/jobs/libs/jobs.jar' + println ' build/optimizer/libs/optimizer.jar' println ' build/openhouse-spark-runtime_2.12/libs/openhouse-spark-runtime_2.12-uber.jar' println ' build/openhouse-spark-3.5-runtime_2.12/libs/openhouse-spark-3.5-runtime_2.12-uber.jar' println ' build/openhouse-spark-apps_2.12/libs/openhouse-spark-apps_2.12-uber.jar' diff --git a/services/optimizer/build.gradle b/services/optimizer/build.gradle new file mode 100644 index 000000000..c05c7f9c3 --- /dev/null +++ b/services/optimizer/build.gradle @@ -0,0 +1,17 @@ +plugins { + id 'openhouse.springboot-ext-conventions' + id 'org.springframework.boot' version '2.7.8' +} + +dependencies { + implementation 'org.springframework.boot:spring-boot-starter-data-jpa:2.7.8' + implementation 'com.vladmihalcea:hibernate-types-55:2.21.1' + implementation 'org.springframework.boot:spring-boot-starter-web:2.7.8' + implementation 'mysql:mysql-connector-java:8.+' + testImplementation 'com.h2database:h2:2.2.224' + testImplementation 'org.springframework.boot:spring-boot-starter-test:2.7.8' +} + +test { + useJUnitPlatform() +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/OptimizerServiceApplication.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/OptimizerServiceApplication.java new file mode 100644 index 000000000..38eb363a8 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/OptimizerServiceApplication.java @@ -0,0 +1,13 @@ +package com.linkedin.openhouse.optimizer; + +import org.springframework.boot.SpringApplication; +import org.springframework.boot.autoconfigure.SpringBootApplication; + +/** Spring Boot entry point for the Optimizer Service. */ +@SpringBootApplication +public class OptimizerServiceApplication { + + public static void main(String[] args) { + SpringApplication.run(OptimizerServiceApplication.class, args); + } +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/mapper/OptimizerMapper.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/mapper/OptimizerMapper.java new file mode 100644 index 000000000..8c0b17462 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/mapper/OptimizerMapper.java @@ -0,0 +1,32 @@ +package com.linkedin.openhouse.optimizer.api.mapper; + +import com.linkedin.openhouse.optimizer.api.model.TableOperationsDto; +import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; +import com.linkedin.openhouse.optimizer.api.model.TableStatsDto; +import com.linkedin.openhouse.optimizer.api.model.TableStatsHistoryDto; +import com.linkedin.openhouse.optimizer.entity.TableOperationsHistoryRow; +import com.linkedin.openhouse.optimizer.entity.TableOperationsRow; +import com.linkedin.openhouse.optimizer.entity.TableStatsHistoryRow; +import com.linkedin.openhouse.optimizer.entity.TableStatsRow; +import org.mapstruct.Mapper; + +/** + * MapStruct mapper for converting between optimizer JPA entities and their corresponding DTOs. + * + *

Spring-instantiated at compile time. Inject via {@code @Autowired} or constructor injection. + */ +@Mapper(componentModel = "spring") +public interface OptimizerMapper { + + /** Map a {@link TableOperationsRow} to its DTO. */ + TableOperationsDto toDto(TableOperationsRow row); + + /** Map a {@link TableOperationsHistoryRow} to its DTO. */ + TableOperationsHistoryDto toDto(TableOperationsHistoryRow row); + + /** Map a {@link TableStatsRow} to its DTO. */ + TableStatsDto toDto(TableStatsRow row); + + /** Map a {@link TableStatsHistoryRow} to its DTO. */ + TableStatsHistoryDto toDto(TableStatsHistoryRow row); +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java new file mode 100644 index 000000000..c26893197 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java @@ -0,0 +1,31 @@ +package com.linkedin.openhouse.optimizer.api.model; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** + * Request body for {@code POST /v1/table-operations/{id}/complete}. + * + *

Reports the outcome of a completed operation. The backend looks up the operation row by {@code + * id} and writes a history entry with the operation's table metadata and the supplied result. + */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class CompleteOperationRequest { + + /** Outcome of the operation. */ + private OperationHistoryStatus status; + + /** Error details on failure; {@code null} on success. */ + private JobResult result; + + /** Number of orphan files deleted; set by OFD Spark app on success. */ + private Integer orphanFilesDeleted; + + /** Bytes reclaimed by orphan file deletion; set by OFD Spark app on success. */ + private Long orphanBytesDeleted; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/JobResult.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/JobResult.java new file mode 100644 index 000000000..74942243c --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/JobResult.java @@ -0,0 +1,25 @@ +package com.linkedin.openhouse.optimizer.api.model; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** + * Result payload for a completed Spark maintenance job. + * + *

Stored as JSON in the {@code result} column of {@code table_operations_history}. Both fields + * are {@code null} on success; populated on failure. + */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class JobResult { + + /** Human-readable error message; {@code null} if the job succeeded. */ + private String errorMessage; + + /** Error category (e.g., {@code OOM}, {@code TIMEOUT}); {@code null} if the job succeeded. */ + private String errorType; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationHistoryStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationHistoryStatus.java new file mode 100644 index 000000000..791d910a6 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationHistoryStatus.java @@ -0,0 +1,7 @@ +package com.linkedin.openhouse.optimizer.api.model; + +/** Terminal states for a completed Spark maintenance job. */ +public enum OperationHistoryStatus { + SUCCESS, + FAILED +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationMetrics.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationMetrics.java new file mode 100644 index 000000000..d6f788fcc --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationMetrics.java @@ -0,0 +1,24 @@ +package com.linkedin.openhouse.optimizer.api.model; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** + * Denormalized stats snapshot captured by the Analyzer at analysis time. + * + *

Stored as JSON in the {@code metrics} column of {@code table_operations}. These values are + * point-in-time snapshots — they record what the Analyzer saw when it recommended the operation, + * not cumulative totals. + */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class OperationMetrics { + + private Long tableSizeBytes; + private Integer numFilesAdded; + private Integer numFilesDeleted; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationStatus.java new file mode 100644 index 000000000..c97be441b --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationStatus.java @@ -0,0 +1,21 @@ +package com.linkedin.openhouse.optimizer.api.model; + +/** Lifecycle states for a table operation recommendation. */ +public enum OperationStatus { + + /** Recommended by the Analyzer but not yet claimed by the Scheduler. */ + PENDING, + + /** Claimed by the Scheduler; waiting for the Jobs Service to return a job ID. */ + SCHEDULING, + + /** Job submitted to the Jobs Service; the row now carries a {@code jobId}. */ + SCHEDULED, + + /** + * Marked by the Scheduler when it detects duplicate PENDING rows for the same {@code (table_uuid, + * operation_type)}. Only the most-recent PENDING row is claimed; older duplicates are CANCELED + * before the claim step. + */ + CANCELED +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationType.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationType.java new file mode 100644 index 000000000..05e4a1e7b --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationType.java @@ -0,0 +1,12 @@ +package com.linkedin.openhouse.optimizer.api.model; + +/** + * Maintenance operation types supported by the continuous optimizer. + * + *

Only {@code ORPHAN_FILES_DELETION} is currently implemented. Additional types will be added as + * they are built out. + */ +public enum OperationType { + /** Removes orphaned data files no longer referenced by table metadata. */ + ORPHAN_FILES_DELETION +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java new file mode 100644 index 000000000..5eb5eaaa6 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java @@ -0,0 +1,40 @@ +package com.linkedin.openhouse.optimizer.api.model; + +import java.time.Instant; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** DTO for {@code table_operations} — Analyzer recommendations read by the Scheduler. */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class TableOperationsDto { + + /** Client-generated UUID identifying this specific operation recommendation. */ + private String id; + + /** Stable table identity from the Tables Service. */ + private String tableUuid; + + private String databaseName; + private String tableName; + private OperationType operationType; + + /** {@code PENDING} or {@code SCHEDULED}. Defaults to {@code PENDING} on creation. */ + private OperationStatus status; + + /** Server-set when the row is first created by the Analyzer. */ + private Instant createdAt; + + /** Set by the Scheduler when claiming; {@code null} while PENDING. */ + private Instant scheduledAt; + + /** Job ID returned by the Jobs Service after successful submission. */ + private String jobId; + + /** Denormalized stats snapshot captured at analysis time. */ + private OperationMetrics metrics; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java new file mode 100644 index 000000000..7dca34271 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java @@ -0,0 +1,43 @@ +package com.linkedin.openhouse.optimizer.api.model; + +import java.time.Instant; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** DTO for {@code table_operations_history} — append-only Spark job results. */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class TableOperationsHistoryDto { + + /** Same UUID as the originating {@code table_operations.id}; supplied by the caller. */ + private String id; + + /** Stable table identity from the Tables Service. */ + private String tableUuid; + + private String databaseName; + private String tableName; + private OperationType operationType; + + /** When the Spark job was submitted / ran. */ + private Instant submittedAt; + + /** {@code SUCCESS} or {@code FAILED}. */ + private OperationHistoryStatus status; + + /** Spark job ID. */ + private String jobId; + + /** Job result payload; both fields null on success. */ + private JobResult result; + + /** Number of orphan files deleted; null for non-OFD operations or before completion. */ + private Integer orphanFilesDeleted; + + /** Bytes reclaimed by orphan file deletion; null for non-OFD operations. */ + private Long orphanBytesDeleted; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java new file mode 100644 index 000000000..cb77d994f --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java @@ -0,0 +1,48 @@ +package com.linkedin.openhouse.optimizer.api.model; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** Combined stats payload stored as a single JSON blob per table. */ +@Data +@Builder(toBuilder = true) +@NoArgsConstructor +@AllArgsConstructor +@JsonIgnoreProperties(ignoreUnknown = true) +public class TableStats { + + /** Snapshot fields — overwritten on every upsert. */ + private SnapshotMetrics snapshot; + + /** Delta fields — accumulated across commit events. */ + private CommitDelta delta; + + /** Point-in-time metadata read from Iceberg at scan time. */ + @Data + @Builder(toBuilder = true) + @NoArgsConstructor + @AllArgsConstructor + @JsonIgnoreProperties(ignoreUnknown = true) + public static class SnapshotMetrics { + private String clusterId; + private String tableVersion; + private String tableLocation; + private Long tableSizeBytes; + /** Total number of data files as of the latest snapshot — used for bin-packing. */ + private Long numCurrentFiles; + } + + /** Per-commit incremental counters; accumulated across all recorded commit events. */ + @Data + @Builder(toBuilder = true) + @NoArgsConstructor + @AllArgsConstructor + public static class CommitDelta { + private Long numFilesAdded; + private Long numFilesDeleted; + private Long deletedSizeBytes; + } +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java new file mode 100644 index 000000000..1663d5ab0 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java @@ -0,0 +1,23 @@ +package com.linkedin.openhouse.optimizer.api.model; + +import java.time.Instant; +import java.util.Map; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** DTO for {@code table_stats} — used for response payloads. */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class TableStatsDto { + + private String tableUuid; + private String databaseId; + private String tableName; + private TableStats stats; + private Map tableProperties; + private Instant updatedAt; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java new file mode 100644 index 000000000..142f00245 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java @@ -0,0 +1,22 @@ +package com.linkedin.openhouse.optimizer.api.model; + +import java.time.Instant; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** DTO for {@code table_stats_history} — used for response payloads. */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class TableStatsHistoryDto { + + private Long id; + private String tableUuid; + private String databaseId; + private String tableName; + private TableStats stats; + private Instant recordedAt; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableOperationsRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableOperationsRequest.java new file mode 100644 index 000000000..19dd1baac --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableOperationsRequest.java @@ -0,0 +1,26 @@ +package com.linkedin.openhouse.optimizer.api.model; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** + * PUT request body for {@code /v1/table-operations/{id}}. + * + *

The Analyzer supplies the operation {@code id} (client-generated UUID) in the path and all + * table-identifying fields in this body. The service upserts by {@code id}: creates on first call, + * updates {@code metrics} on subsequent calls with the same {@code id}. + */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class UpsertTableOperationsRequest { + + private String tableUuid; + private String databaseName; + private String tableName; + private OperationType operationType; + private OperationMetrics metrics; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java new file mode 100644 index 000000000..3214a85a6 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java @@ -0,0 +1,25 @@ +package com.linkedin.openhouse.optimizer.api.model; + +import java.util.Map; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** + * Request body for {@code PUT /v1/table-stats/{tableUuid}}. + * + *

{@code tableUuid} comes from the path variable. {@code databaseId} and {@code tableName} are + * denormalized display columns carried in the body. + */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class UpsertTableStatsRequest { + + private String databaseId; + private String tableName; + private TableStats stats; + private Map tableProperties; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/JobResultConverter.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/JobResultConverter.java new file mode 100644 index 000000000..4c9bfbe76 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/JobResultConverter.java @@ -0,0 +1,39 @@ +package com.linkedin.openhouse.optimizer.config; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.linkedin.openhouse.optimizer.api.model.JobResult; +import java.io.IOException; +import javax.persistence.AttributeConverter; +import javax.persistence.Converter; + +/** JPA {@link AttributeConverter} that serializes {@link JobResult} to/from a JSON string. */ +@Converter +public class JobResultConverter implements AttributeConverter { + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + @Override + public String convertToDatabaseColumn(JobResult attribute) { + if (attribute == null) { + return null; + } + try { + return OBJECT_MAPPER.writeValueAsString(attribute); + } catch (JsonProcessingException e) { + throw new IllegalStateException("Failed to serialize JobResult to JSON", e); + } + } + + @Override + public JobResult convertToEntityAttribute(String dbData) { + if (dbData == null) { + return null; + } + try { + return OBJECT_MAPPER.readValue(dbData, JobResult.class); + } catch (IOException e) { + throw new IllegalStateException("Failed to deserialize JobResult from JSON: " + dbData, e); + } + } +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/OperationMetricsConverter.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/OperationMetricsConverter.java new file mode 100644 index 000000000..27f0882f5 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/OperationMetricsConverter.java @@ -0,0 +1,44 @@ +package com.linkedin.openhouse.optimizer.config; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.linkedin.openhouse.optimizer.api.model.OperationMetrics; +import java.io.IOException; +import javax.persistence.AttributeConverter; +import javax.persistence.Converter; + +/** + * JPA {@link AttributeConverter} that serializes {@link OperationMetrics} to/from a JSON string. + */ +@Converter +public class OperationMetricsConverter implements AttributeConverter { + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + @Override + public String convertToDatabaseColumn(OperationMetrics attribute) { + // Null metrics are valid for PENDING operations that have not yet produced output. + if (attribute == null) { + return null; + } + try { + return OBJECT_MAPPER.writeValueAsString(attribute); + } catch (JsonProcessingException e) { + throw new IllegalStateException("Failed to serialize OperationMetrics to JSON", e); + } + } + + @Override + public OperationMetrics convertToEntityAttribute(String dbData) { + // Null is stored for PENDING rows; return null so the entity reflects that state. + if (dbData == null) { + return null; + } + try { + return OBJECT_MAPPER.readValue(dbData, OperationMetrics.class); + } catch (IOException e) { + throw new IllegalStateException( + "Failed to deserialize OperationMetrics from JSON: " + dbData, e); + } + } +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java new file mode 100644 index 000000000..6a47b5022 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java @@ -0,0 +1,91 @@ +package com.linkedin.openhouse.optimizer.entity; + +import com.linkedin.openhouse.optimizer.api.model.JobResult; +import com.linkedin.openhouse.optimizer.api.model.OperationHistoryStatus; +import com.linkedin.openhouse.optimizer.api.model.OperationType; +import com.linkedin.openhouse.optimizer.config.JobResultConverter; +import java.time.Instant; +import javax.persistence.Column; +import javax.persistence.Convert; +import javax.persistence.Entity; +import javax.persistence.EnumType; +import javax.persistence.Enumerated; +import javax.persistence.Id; +import javax.persistence.Index; +import javax.persistence.Table; +import lombok.AccessLevel; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.NoArgsConstructor; + +/** + * Append-only record of a completed Spark maintenance job. + * + *

Written by the Spark app after each table's operation finishes. The {@code id} is the same + * UUID as the originating {@code table_operations.id}, tying each history entry directly back to + * the specific operation cycle that produced it. Multiple runs of the same operation on the same + * table produce multiple rows (each cycle gets a new UUID from the Analyzer). + */ +@Entity +@Table( + name = "table_operations_history", + indexes = { + @Index(name = "idx_table_uuid_hist", columnList = "table_uuid"), + @Index(name = "idx_op_type_hist", columnList = "operation_type"), + @Index(name = "idx_submitted_at", columnList = "submitted_at"), + @Index(name = "idx_status_hist", columnList = "status"), + @Index(name = "idx_job_id", columnList = "job_id") + }) +@Getter +@EqualsAndHashCode +@Builder(toBuilder = true) +@NoArgsConstructor(access = AccessLevel.PROTECTED) +@AllArgsConstructor(access = AccessLevel.PROTECTED) +public class TableOperationsHistoryRow { + + /** Same UUID as the originating {@code table_operations.id}. Set by the caller; not generated. */ + @Id + @Column(name = "id", nullable = false, length = 36) + private String id; + + @Column(name = "table_uuid", nullable = false, length = 36) + private String tableUuid; + + @Column(name = "database_name", nullable = false, length = 255) + private String databaseName; + + @Column(name = "table_name", nullable = false, length = 255) + private String tableName; + + @Enumerated(EnumType.STRING) + @Column(name = "operation_type", nullable = false, length = 50) + private OperationType operationType; + + /** When the Spark job was submitted / ran, as reported by the job itself. */ + @Column(name = "submitted_at", nullable = false) + private Instant submittedAt; + + /** {@code SUCCESS} or {@code FAILED}. */ + @Enumerated(EnumType.STRING) + @Column(name = "status", nullable = false, length = 20) + private OperationHistoryStatus status; + + /** Spark job ID; indexed for job → result lookups. */ + @Column(name = "job_id", length = 255) + private String jobId; + + /** Job result: error details on failure, both fields null on success. */ + @Convert(converter = JobResultConverter.class) + @Column(name = "result") + private JobResult result; + + /** Number of orphan files deleted by the Spark job; null for non-OFD operations. */ + @Column(name = "orphan_files_deleted") + private Integer orphanFilesDeleted; + + /** Bytes reclaimed by orphan file deletion; null for non-OFD operations. */ + @Column(name = "orphan_bytes_deleted") + private Long orphanBytesDeleted; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java new file mode 100644 index 000000000..9d835aa20 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java @@ -0,0 +1,99 @@ +package com.linkedin.openhouse.optimizer.entity; + +import com.linkedin.openhouse.optimizer.api.model.OperationMetrics; +import com.linkedin.openhouse.optimizer.api.model.OperationStatus; +import com.linkedin.openhouse.optimizer.api.model.OperationType; +import com.linkedin.openhouse.optimizer.config.OperationMetricsConverter; +import java.time.Instant; +import javax.persistence.Column; +import javax.persistence.Convert; +import javax.persistence.Entity; +import javax.persistence.EnumType; +import javax.persistence.Enumerated; +import javax.persistence.Id; +import javax.persistence.Index; +import javax.persistence.Table; +import lombok.AccessLevel; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.NoArgsConstructor; + +/** + * JPA entity representing an Analyzer recommendation for a table maintenance operation. + * + *

Each row is identified by a client-generated UUID ({@code id}). The Analyzer creates a new row + * when it first recommends an operation for a table, or when re-recommending after a prior terminal + * state (SUCCESS/FAILED). Old terminal rows accumulate — they serve as implicit history. {@code + * table_uuid} is the stable identity for the table (survives renames; rotates on drop+recreate). + * The application enforces one active (PENDING or SCHEDULED) row per {@code (table_uuid, + * operation_type)} at a time. + */ +@Entity +@Table( + name = "table_operations", + indexes = { + @Index(name = "idx_table_uuid", columnList = "table_uuid"), + @Index(name = "idx_op_type", columnList = "operation_type"), + @Index(name = "idx_status", columnList = "status"), + @Index(name = "idx_created_at", columnList = "created_at"), + @Index(name = "idx_scheduled_at", columnList = "scheduled_at") + }) +@Getter +@EqualsAndHashCode +@Builder(toBuilder = true) +@NoArgsConstructor(access = AccessLevel.PROTECTED) +@AllArgsConstructor(access = AccessLevel.PROTECTED) +public class TableOperationsRow { + + /** Client-generated UUID identifying this specific operation recommendation. */ + @Id + @Column(name = "id", nullable = false, length = 36) + private String id; + + /** Stable table identity from the Tables Service. Survives renames; rotates on drop+recreate. */ + @Column(name = "table_uuid", nullable = false, length = 36) + private String tableUuid; + + @Column(name = "database_name", nullable = false, length = 255) + private String databaseName; + + @Column(name = "table_name", nullable = false, length = 255) + private String tableName; + + @Enumerated(EnumType.STRING) + @Column(name = "operation_type", nullable = false, length = 50) + private OperationType operationType; + + @Enumerated(EnumType.STRING) + @Column(name = "status", nullable = false, length = 20) + private OperationStatus status; + + /** When the Analyzer first created this row. Set by the service on insert; never updated. */ + @Column(name = "created_at", nullable = false) + private Instant createdAt; + + /** Set when the operation is claimed; {@code null} while {@code PENDING}. */ + @Column(name = "scheduled_at") + private Instant scheduledAt; + + /** Job ID returned by the Jobs Service after successful submission. */ + @Column(name = "job_id", length = 255) + private String jobId; + + /** + * Manual optimistic lock for the Scheduler claim. Incremented by the raw {@code claimOperation} + * UPDATE query; must NOT use JPA {@code @Version} since the claim bypasses JPA entity management. + */ + @Column(name = "version") + private Long version; + + /** + * Denormalized stats snapshot captured at analysis time: table size, snapshot count, and file + * counts as of the moment the Analyzer ran. + */ + @Convert(converter = OperationMetricsConverter.class) + @Column(name = "metrics") + private OperationMetrics metrics; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java new file mode 100644 index 000000000..85d97a5eb --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java @@ -0,0 +1,64 @@ +package com.linkedin.openhouse.optimizer.entity; + +import com.linkedin.openhouse.optimizer.api.model.TableStats; +import com.vladmihalcea.hibernate.type.json.JsonStringType; +import java.time.Instant; +import javax.persistence.Column; +import javax.persistence.Entity; +import javax.persistence.GeneratedValue; +import javax.persistence.GenerationType; +import javax.persistence.Id; +import javax.persistence.Index; +import javax.persistence.Table; +import lombok.AccessLevel; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.NoArgsConstructor; +import org.hibernate.annotations.Type; +import org.hibernate.annotations.TypeDef; + +/** + * Append-only record of per-commit stats reported by the Tables Service. + * + *

Each Iceberg commit produces one row. The {@code stats} JSON contains both the snapshot + * metrics (point-in-time) and the commit delta (files added/deleted in this commit). Consumers can + * query this table to reconstruct change rates over arbitrary time windows. + */ +@TypeDef(name = "json", typeClass = JsonStringType.class) +@Entity +@Table( + name = "table_stats_history", + indexes = { + @Index(name = "idx_tsh_table_uuid", columnList = "table_uuid"), + @Index(name = "idx_tsh_recorded_at", columnList = "recorded_at") + }) +@Getter +@EqualsAndHashCode +@Builder(toBuilder = true) +@NoArgsConstructor(access = AccessLevel.PROTECTED) +@AllArgsConstructor(access = AccessLevel.PROTECTED) +public class TableStatsHistoryRow { + + @Id + @GeneratedValue(strategy = GenerationType.IDENTITY) + @Column(name = "id", nullable = false) + private Long id; + + @Column(name = "table_uuid", nullable = false, length = 36) + private String tableUuid; + + @Column(name = "database_id", nullable = false, length = 255) + private String databaseId; + + @Column(name = "table_name", nullable = false, length = 255) + private String tableName; + + @Type(type = "json") + @Column(name = "stats", columnDefinition = "TEXT") + private TableStats stats; + + @Column(name = "recorded_at", nullable = false) + private Instant recordedAt; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java new file mode 100644 index 000000000..71d6a9421 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java @@ -0,0 +1,57 @@ +package com.linkedin.openhouse.optimizer.entity; + +import com.linkedin.openhouse.optimizer.api.model.TableStats; +import com.vladmihalcea.hibernate.type.json.JsonStringType; +import java.time.Instant; +import java.util.Map; +import javax.persistence.Column; +import javax.persistence.Entity; +import javax.persistence.Id; +import javax.persistence.Table; +import lombok.AccessLevel; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.NoArgsConstructor; +import org.hibernate.annotations.Type; +import org.hibernate.annotations.TypeDef; + +/** + * JPA entity representing a per-table stats snapshot in the optimizer DB. + * + *

Written by the Tables Service on every Iceberg commit. Read by the Analyzer directly via JPA + * to enumerate tables and check scheduling eligibility. + */ +@TypeDef(name = "json", typeClass = JsonStringType.class) +@Entity +@Table(name = "table_stats") +@Getter +@EqualsAndHashCode +@Builder(toBuilder = true) +@NoArgsConstructor(access = AccessLevel.PROTECTED) +@AllArgsConstructor(access = AccessLevel.PROTECTED) +public class TableStatsRow { + + @Id + @Column(name = "table_uuid", nullable = false, length = 36) + private String tableUuid; + + @Column(name = "database_id", nullable = false, length = 255) + private String databaseId; + + @Column(name = "table_name", nullable = false, length = 255) + private String tableName; + + @Type(type = "json") + @Column(name = "stats", columnDefinition = "TEXT") + private TableStats stats; + + @Type(type = "json") + @Column(name = "table_properties", columnDefinition = "TEXT") + private Map tableProperties; + + /** Set on every upsert. Used for stats pipeline staleness monitoring. */ + @Column(name = "updated_at", nullable = false) + private Instant updatedAt; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/package-info.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/package-info.java new file mode 100644 index 000000000..7c0ca1f67 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/package-info.java @@ -0,0 +1,2 @@ +/** JPA entities for the optimizer service. */ +package com.linkedin.openhouse.optimizer.entity; diff --git a/services/optimizer/src/main/resources/application.properties b/services/optimizer/src/main/resources/application.properties new file mode 100644 index 000000000..c6c3f8437 --- /dev/null +++ b/services/optimizer/src/main/resources/application.properties @@ -0,0 +1,20 @@ +spring.application.name=openhouse-optimizer-service +server.port=8080 + +spring.jpa.hibernate.ddl-auto=none +spring.sql.init.mode=always +spring.jpa.defer-datasource-initialization=true +spring.sql.init.schema-locations=classpath:db/optimizer-schema.sql + +spring.jpa.properties.hibernate.dialect=org.hibernate.dialect.MySQL8Dialect +spring.jpa.properties.hibernate.show_sql=false +spring.jpa.properties.hibernate.physical_naming_strategy=org.hibernate.boot.model.naming.PhysicalNamingStrategyStandardImpl + +spring.datasource.driver-class-name=com.mysql.cj.jdbc.Driver +spring.datasource.url=${OPTIMIZER_DB_URL:jdbc:mysql://localhost:3306/oh_db} +spring.datasource.username=${OPTIMIZER_DB_USERNAME:oh_user} +spring.datasource.password=${OPTIMIZER_DB_PASSWORD:oh_password} +spring.datasource.hikari.maximum-pool-size=20 + +management.endpoints.web.exposure.include=health,prometheus +management.endpoint.health.enabled=true diff --git a/services/optimizer/src/main/resources/db/optimizer-schema.sql b/services/optimizer/src/main/resources/db/optimizer-schema.sql new file mode 100644 index 000000000..53062c5ad --- /dev/null +++ b/services/optimizer/src/main/resources/db/optimizer-schema.sql @@ -0,0 +1,53 @@ +-- Optimizer Service Schema +-- Compatible with MySQL (production) and H2 in MySQL mode (tests). +CREATE TABLE IF NOT EXISTS table_operations ( + id VARCHAR(36) NOT NULL, + table_uuid VARCHAR(36) NOT NULL, + database_name VARCHAR(255) NOT NULL, + table_name VARCHAR(255) NOT NULL, + operation_type VARCHAR(50) NOT NULL, + status VARCHAR(20) NOT NULL, + created_at TIMESTAMP(6) NOT NULL, + scheduled_at TIMESTAMP(6), + job_id VARCHAR(255), + version BIGINT, + metrics TEXT, + PRIMARY KEY (id) +); + +CREATE TABLE IF NOT EXISTS table_stats ( + table_uuid VARCHAR(36) NOT NULL, + database_id VARCHAR(255) NOT NULL, + table_name VARCHAR(255) NOT NULL, + stats TEXT, + table_properties TEXT, + updated_at TIMESTAMP(6) NOT NULL, + PRIMARY KEY (table_uuid) +); + +CREATE TABLE IF NOT EXISTS table_stats_history ( + id BIGINT NOT NULL AUTO_INCREMENT, + table_uuid VARCHAR(36) NOT NULL, + database_id VARCHAR(255) NOT NULL, + table_name VARCHAR(255) NOT NULL, + stats TEXT, + recorded_at TIMESTAMP(6) NOT NULL, + PRIMARY KEY (id), + INDEX idx_tsh_table_uuid (table_uuid), + INDEX idx_tsh_recorded_at (recorded_at) +); + +CREATE TABLE IF NOT EXISTS table_operations_history ( + id VARCHAR(36) NOT NULL, + table_uuid VARCHAR(36) NOT NULL, + database_name VARCHAR(255) NOT NULL, + table_name VARCHAR(255) NOT NULL, + operation_type VARCHAR(50) NOT NULL, + submitted_at TIMESTAMP(6) NOT NULL, + status VARCHAR(20) NOT NULL, + job_id VARCHAR(255), + result TEXT, + orphan_files_deleted INT, + orphan_bytes_deleted BIGINT, + PRIMARY KEY (id) +); diff --git a/services/optimizer/src/test/resources/application-test.properties b/services/optimizer/src/test/resources/application-test.properties new file mode 100644 index 000000000..97b7841dc --- /dev/null +++ b/services/optimizer/src/test/resources/application-test.properties @@ -0,0 +1,12 @@ +spring.datasource.url=jdbc:h2:mem:optimizer_test;MODE=MySQL;DATABASE_TO_LOWER=TRUE;DB_CLOSE_DELAY=-1 +spring.datasource.driver-class-name=org.h2.Driver +spring.datasource.username=sa +spring.datasource.password= + +spring.jpa.hibernate.ddl-auto=none +spring.sql.init.mode=always +spring.jpa.defer-datasource-initialization=true +spring.jpa.properties.hibernate.dialect=org.hibernate.dialect.H2Dialect +spring.jpa.properties.hibernate.physical_naming_strategy=org.hibernate.boot.model.naming.PhysicalNamingStrategyStandardImpl + +spring.sql.init.schema-locations=classpath:db/optimizer-schema.sql diff --git a/settings.gradle b/settings.gradle index 035e54349..cad06785e 100644 --- a/settings.gradle +++ b/settings.gradle @@ -49,6 +49,7 @@ include ':libs:datalayout' include ':services:common' include ':services:housetables' include ':services:jobs' +include ':services:optimizer' include ':services:tables' include ':tables-test-fixtures:tables-test-fixtures-iceberg-1.2' include ':tables-test-fixtures:tables-test-fixtures-iceberg-1.5' From 3c93d52f21ce82cc01ae37fef8ca5c1dba2522e1 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Fri, 3 Apr 2026 11:35:45 -0700 Subject: [PATCH 02/28] fix: address PR review feedback on optimizer data model - Remove OperationMetrics class and converter; stats are read directly from table_stats instead of duplicating into operations - Remove orphanFilesDeleted/orphanBytesDeleted from history entity, DTO, and schema; operation-specific data belongs in the result JSON - Add addedSizeBytes to CommitDelta for tracking write volume - Fix OperationType javadoc to describe current state, not roadmap - Fix TableOperationsHistoryRow javadoc: written on operation complete, not by Spark app directly - Add field comments to all DTOs and request objects Co-Authored-By: Claude Opus 4.6 --- .../optimizer/api/model/OperationMetrics.java | 24 ---------- .../optimizer/api/model/OperationType.java | 7 +-- .../api/model/TableOperationsDto.java | 9 +++- .../api/model/TableOperationsHistoryDto.java | 12 ++--- .../optimizer/api/model/TableStats.java | 1 + .../optimizer/api/model/TableStatsDto.java | 11 +++++ .../api/model/TableStatsHistoryDto.java | 11 +++++ .../model/UpsertTableOperationsRequest.java | 11 +++-- .../api/model/UpsertTableStatsRequest.java | 7 +++ .../config/OperationMetricsConverter.java | 44 ------------------- .../entity/TableOperationsHistoryRow.java | 20 +++------ .../optimizer/entity/TableOperationsRow.java | 10 ++--- .../main/resources/db/optimizer-schema.sql | 2 - 13 files changed, 58 insertions(+), 111 deletions(-) delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationMetrics.java delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/OperationMetricsConverter.java diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationMetrics.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationMetrics.java deleted file mode 100644 index d6f788fcc..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationMetrics.java +++ /dev/null @@ -1,24 +0,0 @@ -package com.linkedin.openhouse.optimizer.api.model; - -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.Data; -import lombok.NoArgsConstructor; - -/** - * Denormalized stats snapshot captured by the Analyzer at analysis time. - * - *

Stored as JSON in the {@code metrics} column of {@code table_operations}. These values are - * point-in-time snapshots — they record what the Analyzer saw when it recommended the operation, - * not cumulative totals. - */ -@Data -@Builder -@NoArgsConstructor -@AllArgsConstructor -public class OperationMetrics { - - private Long tableSizeBytes; - private Integer numFilesAdded; - private Integer numFilesDeleted; -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationType.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationType.java index 05e4a1e7b..8507bae12 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationType.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationType.java @@ -1,11 +1,6 @@ package com.linkedin.openhouse.optimizer.api.model; -/** - * Maintenance operation types supported by the continuous optimizer. - * - *

Only {@code ORPHAN_FILES_DELETION} is currently implemented. Additional types will be added as - * they are built out. - */ +/** Maintenance operation types supported by the continuous optimizer. */ public enum OperationType { /** Removes orphaned data files no longer referenced by table metadata. */ ORPHAN_FILES_DELETION diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java index 5eb5eaaa6..9c33d8907 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java @@ -19,8 +19,13 @@ public class TableOperationsDto { /** Stable table identity from the Tables Service. */ private String tableUuid; + /** Denormalized database name for display; not part of the primary key. */ private String databaseName; + + /** Denormalized table name for display; not part of the primary key. */ private String tableName; + + /** The type of maintenance operation (e.g. ORPHAN_FILES_DELETION). */ private OperationType operationType; /** {@code PENDING} or {@code SCHEDULED}. Defaults to {@code PENDING} on creation. */ @@ -35,6 +40,6 @@ public class TableOperationsDto { /** Job ID returned by the Jobs Service after successful submission. */ private String jobId; - /** Denormalized stats snapshot captured at analysis time. */ - private OperationMetrics metrics; + /** Reserved for future per-operation metadata; currently unused. */ + private String metrics; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java index 7dca34271..efc9bebbb 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java @@ -6,7 +6,7 @@ import lombok.Data; import lombok.NoArgsConstructor; -/** DTO for {@code table_operations_history} — append-only Spark job results. */ +/** DTO for {@code table_operations_history} — append-only operation results. */ @Data @Builder @NoArgsConstructor @@ -23,21 +23,15 @@ public class TableOperationsHistoryDto { private String tableName; private OperationType operationType; - /** When the Spark job was submitted / ran. */ + /** When the operation completed, as recorded by the complete endpoint. */ private Instant submittedAt; /** {@code SUCCESS} or {@code FAILED}. */ private OperationHistoryStatus status; - /** Spark job ID. */ + /** Job ID from the Jobs Service. */ private String jobId; /** Job result payload; both fields null on success. */ private JobResult result; - - /** Number of orphan files deleted; null for non-OFD operations or before completion. */ - private Integer orphanFilesDeleted; - - /** Bytes reclaimed by orphan file deletion; null for non-OFD operations. */ - private Long orphanBytesDeleted; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java index cb77d994f..51aa8a712 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java @@ -43,6 +43,7 @@ public static class SnapshotMetrics { public static class CommitDelta { private Long numFilesAdded; private Long numFilesDeleted; + private Long addedSizeBytes; private Long deletedSizeBytes; } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java index 1663d5ab0..a668af434 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java @@ -14,10 +14,21 @@ @AllArgsConstructor public class TableStatsDto { + /** Stable Iceberg table UUID. Primary key of the stats row. */ private String tableUuid; + + /** Denormalized database name for display. */ private String databaseId; + + /** Denormalized table name for display. */ private String tableName; + + /** Combined snapshot + delta stats payload, stored as JSON. */ private TableStats stats; + + /** Current table properties snapshot (e.g. maintenance opt-in flags). */ private Map tableProperties; + + /** When this row was last written. Used for staleness monitoring. */ private Instant updatedAt; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java index 142f00245..0604e07de 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java @@ -13,10 +13,21 @@ @AllArgsConstructor public class TableStatsHistoryDto { + /** Auto-increment primary key. */ private Long id; + + /** Stable Iceberg table UUID. */ private String tableUuid; + + /** Denormalized database name for display. */ private String databaseId; + + /** Denormalized table name for display. */ private String tableName; + + /** Snapshot + delta stats from this commit event. */ private TableStats stats; + + /** When this history row was recorded. */ private Instant recordedAt; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableOperationsRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableOperationsRequest.java index 19dd1baac..21174c337 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableOperationsRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableOperationsRequest.java @@ -9,8 +9,7 @@ * PUT request body for {@code /v1/table-operations/{id}}. * *

The Analyzer supplies the operation {@code id} (client-generated UUID) in the path and all - * table-identifying fields in this body. The service upserts by {@code id}: creates on first call, - * updates {@code metrics} on subsequent calls with the same {@code id}. + * table-identifying fields in this body. The service creates the row on first call. */ @Data @Builder @@ -18,9 +17,15 @@ @AllArgsConstructor public class UpsertTableOperationsRequest { + /** Stable Iceberg table UUID identifying the target table. */ private String tableUuid; + + /** Denormalized database name for display. */ private String databaseName; + + /** Denormalized table name for display. */ private String tableName; + + /** The type of maintenance operation to create. */ private OperationType operationType; - private OperationMetrics metrics; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java index 3214a85a6..721c3deaf 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java @@ -18,8 +18,15 @@ @AllArgsConstructor public class UpsertTableStatsRequest { + /** Denormalized database name for display. */ private String databaseId; + + /** Denormalized table name for display. */ private String tableName; + + /** Combined snapshot + delta stats payload from this commit. */ private TableStats stats; + + /** Current table properties snapshot (e.g. maintenance opt-in flags). */ private Map tableProperties; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/OperationMetricsConverter.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/OperationMetricsConverter.java deleted file mode 100644 index 27f0882f5..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/OperationMetricsConverter.java +++ /dev/null @@ -1,44 +0,0 @@ -package com.linkedin.openhouse.optimizer.config; - -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.linkedin.openhouse.optimizer.api.model.OperationMetrics; -import java.io.IOException; -import javax.persistence.AttributeConverter; -import javax.persistence.Converter; - -/** - * JPA {@link AttributeConverter} that serializes {@link OperationMetrics} to/from a JSON string. - */ -@Converter -public class OperationMetricsConverter implements AttributeConverter { - - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - - @Override - public String convertToDatabaseColumn(OperationMetrics attribute) { - // Null metrics are valid for PENDING operations that have not yet produced output. - if (attribute == null) { - return null; - } - try { - return OBJECT_MAPPER.writeValueAsString(attribute); - } catch (JsonProcessingException e) { - throw new IllegalStateException("Failed to serialize OperationMetrics to JSON", e); - } - } - - @Override - public OperationMetrics convertToEntityAttribute(String dbData) { - // Null is stored for PENDING rows; return null so the entity reflects that state. - if (dbData == null) { - return null; - } - try { - return OBJECT_MAPPER.readValue(dbData, OperationMetrics.class); - } catch (IOException e) { - throw new IllegalStateException( - "Failed to deserialize OperationMetrics from JSON: " + dbData, e); - } - } -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java index 6a47b5022..e7493024c 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java @@ -21,12 +21,12 @@ import lombok.NoArgsConstructor; /** - * Append-only record of a completed Spark maintenance job. + * Append-only record of a completed maintenance operation. * - *

Written by the Spark app after each table's operation finishes. The {@code id} is the same - * UUID as the originating {@code table_operations.id}, tying each history entry directly back to - * the specific operation cycle that produced it. Multiple runs of the same operation on the same - * table produce multiple rows (each cycle gets a new UUID from the Analyzer). + *

Written when the operation-complete endpoint is called. The {@code id} is the same UUID as the + * originating {@code table_operations.id}, tying each history entry back to the operation cycle + * that produced it. Multiple runs of the same operation on the same table produce multiple rows + * (each cycle gets a new UUID from the Analyzer). */ @Entity @Table( @@ -63,7 +63,7 @@ public class TableOperationsHistoryRow { @Column(name = "operation_type", nullable = false, length = 50) private OperationType operationType; - /** When the Spark job was submitted / ran, as reported by the job itself. */ + /** When the operation completed, as recorded by the complete endpoint. */ @Column(name = "submitted_at", nullable = false) private Instant submittedAt; @@ -80,12 +80,4 @@ public class TableOperationsHistoryRow { @Convert(converter = JobResultConverter.class) @Column(name = "result") private JobResult result; - - /** Number of orphan files deleted by the Spark job; null for non-OFD operations. */ - @Column(name = "orphan_files_deleted") - private Integer orphanFilesDeleted; - - /** Bytes reclaimed by orphan file deletion; null for non-OFD operations. */ - @Column(name = "orphan_bytes_deleted") - private Long orphanBytesDeleted; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java index 9d835aa20..e5493b510 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java @@ -1,12 +1,9 @@ package com.linkedin.openhouse.optimizer.entity; -import com.linkedin.openhouse.optimizer.api.model.OperationMetrics; import com.linkedin.openhouse.optimizer.api.model.OperationStatus; import com.linkedin.openhouse.optimizer.api.model.OperationType; -import com.linkedin.openhouse.optimizer.config.OperationMetricsConverter; import java.time.Instant; import javax.persistence.Column; -import javax.persistence.Convert; import javax.persistence.Entity; import javax.persistence.EnumType; import javax.persistence.Enumerated; @@ -90,10 +87,9 @@ public class TableOperationsRow { private Long version; /** - * Denormalized stats snapshot captured at analysis time: table size, snapshot count, and file - * counts as of the moment the Analyzer ran. + * Reserved for future per-operation metadata. Stored as JSON text; currently unused. The Analyzer + * reads stats directly from {@code table_stats} instead of duplicating them here. */ - @Convert(converter = OperationMetricsConverter.class) @Column(name = "metrics") - private OperationMetrics metrics; + private String metrics; } diff --git a/services/optimizer/src/main/resources/db/optimizer-schema.sql b/services/optimizer/src/main/resources/db/optimizer-schema.sql index 53062c5ad..098380e7f 100644 --- a/services/optimizer/src/main/resources/db/optimizer-schema.sql +++ b/services/optimizer/src/main/resources/db/optimizer-schema.sql @@ -47,7 +47,5 @@ CREATE TABLE IF NOT EXISTS table_operations_history ( status VARCHAR(20) NOT NULL, job_id VARCHAR(255), result TEXT, - orphan_files_deleted INT, - orphan_bytes_deleted BIGINT, PRIMARY KEY (id) ); From 02a5ab31c62a0847e665f674b1fb3e8684bb3433 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Mon, 6 Apr 2026 12:19:37 -0700 Subject: [PATCH 03/28] fix: remove orphan fields from CompleteOperationRequest MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These fields never belonged in the data model — remove them at the source rather than adding then deleting in a later PR. Co-Authored-By: Claude Opus 4.6 --- .../optimizer/api/model/CompleteOperationRequest.java | 6 ------ 1 file changed, 6 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java index c26893197..35f7ba782 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java @@ -22,10 +22,4 @@ public class CompleteOperationRequest { /** Error details on failure; {@code null} on success. */ private JobResult result; - - /** Number of orphan files deleted; set by OFD Spark app on success. */ - private Integer orphanFilesDeleted; - - /** Bytes reclaimed by orphan file deletion; set by OFD Spark app on success. */ - private Long orphanBytesDeleted; } From f82d1b3ef3e0b1197487a68b851fa394ef9b9c7a Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Fri, 1 May 2026 10:14:18 -0700 Subject: [PATCH 04/28] fix(optimizer): address PR #527 review feedback - Widen-to-tighten: VARCHAR(255) -> VARCHAR(128) for database_name and table_name across all entities and the schema, aligning with prod conventions (can always be widened later, not tightened). - Rename databaseId -> databaseName in TableStatsRow, TableStatsHistoryRow, TableStatsDto, TableStatsHistoryDto, and UpsertTableStatsRequest for consistency with the operations entities and DTOs. - Drop the unused metrics field from TableOperationsRow, TableOperationsDto, and the schema. Add a TODO note in the schema that per-operation metric columns will be added as operations are onboarded. - Rename submittedAt -> completedAt in TableOperationsHistoryRow, TableOperationsHistoryDto, and the schema (column submitted_at -> completed_at, index idx_submitted_at -> idx_completed_at). The history row is written when the complete endpoint is called, so the timestamp captures completion; submission time is already on table_operations.scheduled_at. - Change TableStatsHistoryRow.id from BIGINT auto-increment to VARCHAR(36) UUID, set by the caller, matching the other id-bearing entities. - Add @JsonIgnoreProperties(ignoreUnknown = true) to CommitDelta for consistency with TableStats and SnapshotMetrics. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../api/model/TableOperationsDto.java | 3 --- .../api/model/TableOperationsHistoryDto.java | 2 +- .../optimizer/api/model/TableStats.java | 1 + .../optimizer/api/model/TableStatsDto.java | 2 +- .../api/model/TableStatsHistoryDto.java | 6 ++--- .../api/model/UpsertTableStatsRequest.java | 4 ++-- .../entity/TableOperationsHistoryRow.java | 10 ++++----- .../optimizer/entity/TableOperationsRow.java | 11 ++-------- .../entity/TableStatsHistoryRow.java | 13 +++++------ .../optimizer/entity/TableStatsRow.java | 6 ++--- .../main/resources/db/optimizer-schema.sql | 22 +++++++++---------- 11 files changed, 34 insertions(+), 46 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java index 9c33d8907..d41bd6906 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java @@ -39,7 +39,4 @@ public class TableOperationsDto { /** Job ID returned by the Jobs Service after successful submission. */ private String jobId; - - /** Reserved for future per-operation metadata; currently unused. */ - private String metrics; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java index efc9bebbb..2a901ad2b 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java @@ -24,7 +24,7 @@ public class TableOperationsHistoryDto { private OperationType operationType; /** When the operation completed, as recorded by the complete endpoint. */ - private Instant submittedAt; + private Instant completedAt; /** {@code SUCCESS} or {@code FAILED}. */ private OperationHistoryStatus status; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java index 51aa8a712..64c99061a 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java @@ -40,6 +40,7 @@ public static class SnapshotMetrics { @Builder(toBuilder = true) @NoArgsConstructor @AllArgsConstructor + @JsonIgnoreProperties(ignoreUnknown = true) public static class CommitDelta { private Long numFilesAdded; private Long numFilesDeleted; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java index a668af434..81dd6b802 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java @@ -18,7 +18,7 @@ public class TableStatsDto { private String tableUuid; /** Denormalized database name for display. */ - private String databaseId; + private String databaseName; /** Denormalized table name for display. */ private String tableName; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java index 0604e07de..4a994fdb3 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java @@ -13,14 +13,14 @@ @AllArgsConstructor public class TableStatsHistoryDto { - /** Auto-increment primary key. */ - private Long id; + /** UUID primary key set by the caller. */ + private String id; /** Stable Iceberg table UUID. */ private String tableUuid; /** Denormalized database name for display. */ - private String databaseId; + private String databaseName; /** Denormalized table name for display. */ private String tableName; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java index 721c3deaf..02290bad5 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java @@ -9,7 +9,7 @@ /** * Request body for {@code PUT /v1/table-stats/{tableUuid}}. * - *

{@code tableUuid} comes from the path variable. {@code databaseId} and {@code tableName} are + *

{@code tableUuid} comes from the path variable. {@code databaseName} and {@code tableName} are * denormalized display columns carried in the body. */ @Data @@ -19,7 +19,7 @@ public class UpsertTableStatsRequest { /** Denormalized database name for display. */ - private String databaseId; + private String databaseName; /** Denormalized table name for display. */ private String tableName; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java index e7493024c..6ac5db173 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java @@ -34,7 +34,7 @@ indexes = { @Index(name = "idx_table_uuid_hist", columnList = "table_uuid"), @Index(name = "idx_op_type_hist", columnList = "operation_type"), - @Index(name = "idx_submitted_at", columnList = "submitted_at"), + @Index(name = "idx_completed_at", columnList = "completed_at"), @Index(name = "idx_status_hist", columnList = "status"), @Index(name = "idx_job_id", columnList = "job_id") }) @@ -53,10 +53,10 @@ public class TableOperationsHistoryRow { @Column(name = "table_uuid", nullable = false, length = 36) private String tableUuid; - @Column(name = "database_name", nullable = false, length = 255) + @Column(name = "database_name", nullable = false, length = 128) private String databaseName; - @Column(name = "table_name", nullable = false, length = 255) + @Column(name = "table_name", nullable = false, length = 128) private String tableName; @Enumerated(EnumType.STRING) @@ -64,8 +64,8 @@ public class TableOperationsHistoryRow { private OperationType operationType; /** When the operation completed, as recorded by the complete endpoint. */ - @Column(name = "submitted_at", nullable = false) - private Instant submittedAt; + @Column(name = "completed_at", nullable = false) + private Instant completedAt; /** {@code SUCCESS} or {@code FAILED}. */ @Enumerated(EnumType.STRING) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java index e5493b510..43778495a 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java @@ -53,10 +53,10 @@ public class TableOperationsRow { @Column(name = "table_uuid", nullable = false, length = 36) private String tableUuid; - @Column(name = "database_name", nullable = false, length = 255) + @Column(name = "database_name", nullable = false, length = 128) private String databaseName; - @Column(name = "table_name", nullable = false, length = 255) + @Column(name = "table_name", nullable = false, length = 128) private String tableName; @Enumerated(EnumType.STRING) @@ -85,11 +85,4 @@ public class TableOperationsRow { */ @Column(name = "version") private Long version; - - /** - * Reserved for future per-operation metadata. Stored as JSON text; currently unused. The Analyzer - * reads stats directly from {@code table_stats} instead of duplicating them here. - */ - @Column(name = "metrics") - private String metrics; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java index 85d97a5eb..b0d92fc81 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java @@ -5,8 +5,6 @@ import java.time.Instant; import javax.persistence.Column; import javax.persistence.Entity; -import javax.persistence.GeneratedValue; -import javax.persistence.GenerationType; import javax.persistence.Id; import javax.persistence.Index; import javax.persistence.Table; @@ -42,17 +40,16 @@ public class TableStatsHistoryRow { @Id - @GeneratedValue(strategy = GenerationType.IDENTITY) - @Column(name = "id", nullable = false) - private Long id; + @Column(name = "id", nullable = false, length = 36) + private String id; @Column(name = "table_uuid", nullable = false, length = 36) private String tableUuid; - @Column(name = "database_id", nullable = false, length = 255) - private String databaseId; + @Column(name = "database_name", nullable = false, length = 128) + private String databaseName; - @Column(name = "table_name", nullable = false, length = 255) + @Column(name = "table_name", nullable = false, length = 128) private String tableName; @Type(type = "json") diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java index 71d6a9421..f682a3485 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java @@ -37,10 +37,10 @@ public class TableStatsRow { @Column(name = "table_uuid", nullable = false, length = 36) private String tableUuid; - @Column(name = "database_id", nullable = false, length = 255) - private String databaseId; + @Column(name = "database_name", nullable = false, length = 128) + private String databaseName; - @Column(name = "table_name", nullable = false, length = 255) + @Column(name = "table_name", nullable = false, length = 128) private String tableName; @Type(type = "json") diff --git a/services/optimizer/src/main/resources/db/optimizer-schema.sql b/services/optimizer/src/main/resources/db/optimizer-schema.sql index 098380e7f..49641efe2 100644 --- a/services/optimizer/src/main/resources/db/optimizer-schema.sql +++ b/services/optimizer/src/main/resources/db/optimizer-schema.sql @@ -3,22 +3,22 @@ CREATE TABLE IF NOT EXISTS table_operations ( id VARCHAR(36) NOT NULL, table_uuid VARCHAR(36) NOT NULL, - database_name VARCHAR(255) NOT NULL, - table_name VARCHAR(255) NOT NULL, + database_name VARCHAR(128) NOT NULL, + table_name VARCHAR(128) NOT NULL, operation_type VARCHAR(50) NOT NULL, status VARCHAR(20) NOT NULL, created_at TIMESTAMP(6) NOT NULL, scheduled_at TIMESTAMP(6), job_id VARCHAR(255), version BIGINT, - metrics TEXT, + -- TODO: per-operation metric columns will be added as operations are onboarded. PRIMARY KEY (id) ); CREATE TABLE IF NOT EXISTS table_stats ( table_uuid VARCHAR(36) NOT NULL, - database_id VARCHAR(255) NOT NULL, - table_name VARCHAR(255) NOT NULL, + database_name VARCHAR(128) NOT NULL, + table_name VARCHAR(128) NOT NULL, stats TEXT, table_properties TEXT, updated_at TIMESTAMP(6) NOT NULL, @@ -26,10 +26,10 @@ CREATE TABLE IF NOT EXISTS table_stats ( ); CREATE TABLE IF NOT EXISTS table_stats_history ( - id BIGINT NOT NULL AUTO_INCREMENT, + id VARCHAR(36) NOT NULL, table_uuid VARCHAR(36) NOT NULL, - database_id VARCHAR(255) NOT NULL, - table_name VARCHAR(255) NOT NULL, + database_name VARCHAR(128) NOT NULL, + table_name VARCHAR(128) NOT NULL, stats TEXT, recorded_at TIMESTAMP(6) NOT NULL, PRIMARY KEY (id), @@ -40,10 +40,10 @@ CREATE TABLE IF NOT EXISTS table_stats_history ( CREATE TABLE IF NOT EXISTS table_operations_history ( id VARCHAR(36) NOT NULL, table_uuid VARCHAR(36) NOT NULL, - database_name VARCHAR(255) NOT NULL, - table_name VARCHAR(255) NOT NULL, + database_name VARCHAR(128) NOT NULL, + table_name VARCHAR(128) NOT NULL, operation_type VARCHAR(50) NOT NULL, - submitted_at TIMESTAMP(6) NOT NULL, + completed_at TIMESTAMP(6) NOT NULL, status VARCHAR(20) NOT NULL, job_id VARCHAR(255), result TEXT, From 79753f1da1ae63f84de9b127d1f7cac301a6666b Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Fri, 1 May 2026 14:03:10 -0700 Subject: [PATCH 05/28] fix(optimizer): index table_operations_history on (database_name, table_name) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a composite secondary index on (database_name, table_name) to table_operations_history at the schema and entity layers. This backs a new name-based history-lookup endpoint added on optimizer-2; without the index, the query degrades to a full scan on a table that grows with every operation completion. The other three optimizer tables get no new indexes — no new query patterns on them this round. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../openhouse/optimizer/entity/TableOperationsHistoryRow.java | 3 ++- services/optimizer/src/main/resources/db/optimizer-schema.sql | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java index 6ac5db173..3b6ced892 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java @@ -36,7 +36,8 @@ @Index(name = "idx_op_type_hist", columnList = "operation_type"), @Index(name = "idx_completed_at", columnList = "completed_at"), @Index(name = "idx_status_hist", columnList = "status"), - @Index(name = "idx_job_id", columnList = "job_id") + @Index(name = "idx_job_id", columnList = "job_id"), + @Index(name = "idx_toph_db_table", columnList = "database_name, table_name") }) @Getter @EqualsAndHashCode diff --git a/services/optimizer/src/main/resources/db/optimizer-schema.sql b/services/optimizer/src/main/resources/db/optimizer-schema.sql index 49641efe2..4c2d9604b 100644 --- a/services/optimizer/src/main/resources/db/optimizer-schema.sql +++ b/services/optimizer/src/main/resources/db/optimizer-schema.sql @@ -47,5 +47,6 @@ CREATE TABLE IF NOT EXISTS table_operations_history ( status VARCHAR(20) NOT NULL, job_id VARCHAR(255), result TEXT, - PRIMARY KEY (id) + PRIMARY KEY (id), + INDEX idx_toph_db_table (database_name, table_name) ); From 9a129a8ca7848bf1db15dbf05f847bc7c593e8ef Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 12:00:04 -0700 Subject: [PATCH 06/28] =?UTF-8?q?refactor(optimizer):=20align=20data=20mod?= =?UTF-8?q?el=20=E2=80=94=20rename=20HistoryStatus;=20String=20at=20JPA=20?= =?UTF-8?q?boundary?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Forward fix in response to review feedback that data-model decisions belong in this PR (optimizer-0), not in downstream stack layers. Brings the data-model end-state to where optimizer-1+ already are, so the optimizer-0..optimizer-1 diff is just repositories + wiring. - Rename api/model/OperationHistoryStatus → HistoryStatus. - Move api/model/TableStats → model/TableStats (the in-memory stats domain type is used by both entities and DTOs, so it lives in a neutral package rather than under api/model/). - Delete config/JobResultConverter. Entities now store the JobResult as a JSON String column directly; serialization happens at the wire-API boundary via OptimizerMapper helpers. - Switch the operation/status columns on TableOperationsRow and TableOperationsHistoryRow from JPA-bound enums to String. Keeps the entity layer decoupled from wire-API enum identity. - Add String↔OperationType, String↔OperationStatus, String↔HistoryStatus, and String↔JobResult default helpers to OptimizerMapper so MapStruct can bridge entity (String) and DTO (typed) columns. - Update DTO/entity imports to follow the renamed/moved types. --- .../optimizer/api/mapper/OptimizerMapper.java | 60 +++++++++++++++++++ .../api/model/CompleteOperationRequest.java | 2 +- ...nHistoryStatus.java => HistoryStatus.java} | 2 +- .../api/model/TableOperationsHistoryDto.java | 2 +- .../optimizer/api/model/TableStatsDto.java | 1 + .../api/model/TableStatsHistoryDto.java | 1 + .../api/model/UpsertTableStatsRequest.java | 1 + .../optimizer/config/JobResultConverter.java | 39 ------------ .../entity/TableOperationsHistoryRow.java | 25 ++++---- .../optimizer/entity/TableOperationsRow.java | 15 +++-- .../entity/TableStatsHistoryRow.java | 2 +- .../optimizer/entity/TableStatsRow.java | 2 +- .../optimizer/{api => }/model/TableStats.java | 2 +- 13 files changed, 86 insertions(+), 68 deletions(-) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/{OperationHistoryStatus.java => HistoryStatus.java} (78%) delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/JobResultConverter.java rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/{api => }/model/TableStats.java (96%) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/mapper/OptimizerMapper.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/mapper/OptimizerMapper.java index 8c0b17462..36d4b5f4b 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/mapper/OptimizerMapper.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/mapper/OptimizerMapper.java @@ -1,5 +1,11 @@ package com.linkedin.openhouse.optimizer.api.mapper; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.linkedin.openhouse.optimizer.api.model.HistoryStatus; +import com.linkedin.openhouse.optimizer.api.model.JobResult; +import com.linkedin.openhouse.optimizer.api.model.OperationStatus; +import com.linkedin.openhouse.optimizer.api.model.OperationType; import com.linkedin.openhouse.optimizer.api.model.TableOperationsDto; import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; import com.linkedin.openhouse.optimizer.api.model.TableStatsDto; @@ -14,10 +20,16 @@ * MapStruct mapper for converting between optimizer JPA entities and their corresponding DTOs. * *

Spring-instantiated at compile time. Inject via {@code @Autowired} or constructor injection. + * + *

Type-conversion helpers bridge the entity's raw String/JSON shape (the entities keep enum and + * structured-result columns as Strings to stay decoupled from wire-API identity) and the wire DTO's + * typed enums and nested objects. */ @Mapper(componentModel = "spring") public interface OptimizerMapper { + ObjectMapper JSON = new ObjectMapper(); + /** Map a {@link TableOperationsRow} to its DTO. */ TableOperationsDto toDto(TableOperationsRow row); @@ -29,4 +41,52 @@ public interface OptimizerMapper { /** Map a {@link TableStatsHistoryRow} to its DTO. */ TableStatsHistoryDto toDto(TableStatsHistoryRow row); + + // --- entity String ↔ wire enum/object helpers --- + + default OperationType toOperationType(String value) { + return value == null ? null : OperationType.valueOf(value); + } + + default String fromOperationType(OperationType value) { + return value == null ? null : value.name(); + } + + default OperationStatus toOperationStatus(String value) { + return value == null ? null : OperationStatus.valueOf(value); + } + + default String fromOperationStatus(OperationStatus value) { + return value == null ? null : value.name(); + } + + default HistoryStatus toHistoryStatus(String value) { + return value == null ? null : HistoryStatus.valueOf(value); + } + + default String fromHistoryStatus(HistoryStatus value) { + return value == null ? null : value.name(); + } + + default JobResult toJobResult(String json) { + if (json == null) { + return null; + } + try { + return JSON.readValue(json, JobResult.class); + } catch (JsonProcessingException e) { + throw new IllegalStateException("Failed to parse JobResult JSON from DB", e); + } + } + + default String fromJobResult(JobResult value) { + if (value == null) { + return null; + } + try { + return JSON.writeValueAsString(value); + } catch (JsonProcessingException e) { + throw new IllegalStateException("Failed to serialize JobResult to JSON", e); + } + } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java index 35f7ba782..4f3f6535a 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java @@ -18,7 +18,7 @@ public class CompleteOperationRequest { /** Outcome of the operation. */ - private OperationHistoryStatus status; + private HistoryStatus status; /** Error details on failure; {@code null} on success. */ private JobResult result; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationHistoryStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatus.java similarity index 78% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationHistoryStatus.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatus.java index 791d910a6..2fbcf6235 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationHistoryStatus.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatus.java @@ -1,7 +1,7 @@ package com.linkedin.openhouse.optimizer.api.model; /** Terminal states for a completed Spark maintenance job. */ -public enum OperationHistoryStatus { +public enum HistoryStatus { SUCCESS, FAILED } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java index 2a901ad2b..a7a9d9dc6 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java @@ -27,7 +27,7 @@ public class TableOperationsHistoryDto { private Instant completedAt; /** {@code SUCCESS} or {@code FAILED}. */ - private OperationHistoryStatus status; + private HistoryStatus status; /** Job ID from the Jobs Service. */ private String jobId; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java index 81dd6b802..4aad1e18f 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java @@ -1,5 +1,6 @@ package com.linkedin.openhouse.optimizer.api.model; +import com.linkedin.openhouse.optimizer.model.TableStats; import java.time.Instant; import java.util.Map; import lombok.AllArgsConstructor; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java index 4a994fdb3..6d515a543 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java @@ -1,5 +1,6 @@ package com.linkedin.openhouse.optimizer.api.model; +import com.linkedin.openhouse.optimizer.model.TableStats; import java.time.Instant; import lombok.AllArgsConstructor; import lombok.Builder; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java index 02290bad5..8bb317676 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java @@ -1,5 +1,6 @@ package com.linkedin.openhouse.optimizer.api.model; +import com.linkedin.openhouse.optimizer.model.TableStats; import java.util.Map; import lombok.AllArgsConstructor; import lombok.Builder; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/JobResultConverter.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/JobResultConverter.java deleted file mode 100644 index 4c9bfbe76..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/JobResultConverter.java +++ /dev/null @@ -1,39 +0,0 @@ -package com.linkedin.openhouse.optimizer.config; - -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.linkedin.openhouse.optimizer.api.model.JobResult; -import java.io.IOException; -import javax.persistence.AttributeConverter; -import javax.persistence.Converter; - -/** JPA {@link AttributeConverter} that serializes {@link JobResult} to/from a JSON string. */ -@Converter -public class JobResultConverter implements AttributeConverter { - - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - - @Override - public String convertToDatabaseColumn(JobResult attribute) { - if (attribute == null) { - return null; - } - try { - return OBJECT_MAPPER.writeValueAsString(attribute); - } catch (JsonProcessingException e) { - throw new IllegalStateException("Failed to serialize JobResult to JSON", e); - } - } - - @Override - public JobResult convertToEntityAttribute(String dbData) { - if (dbData == null) { - return null; - } - try { - return OBJECT_MAPPER.readValue(dbData, JobResult.class); - } catch (IOException e) { - throw new IllegalStateException("Failed to deserialize JobResult from JSON: " + dbData, e); - } - } -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java index 3b6ced892..8303a4579 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java @@ -1,15 +1,8 @@ package com.linkedin.openhouse.optimizer.entity; -import com.linkedin.openhouse.optimizer.api.model.JobResult; -import com.linkedin.openhouse.optimizer.api.model.OperationHistoryStatus; -import com.linkedin.openhouse.optimizer.api.model.OperationType; -import com.linkedin.openhouse.optimizer.config.JobResultConverter; import java.time.Instant; import javax.persistence.Column; -import javax.persistence.Convert; import javax.persistence.Entity; -import javax.persistence.EnumType; -import javax.persistence.Enumerated; import javax.persistence.Id; import javax.persistence.Index; import javax.persistence.Table; @@ -27,6 +20,11 @@ * originating {@code table_operations.id}, tying each history entry back to the operation cycle * that produced it. Multiple runs of the same operation on the same table produce multiple rows * (each cycle gets a new UUID from the Analyzer). + * + *

{@code operationType}, {@code status}, and {@code result} are stored as plain {@code String} + * (the last as a JSON blob) so the entity layer stays decoupled from the wire-API enum and + * structured-result types. The wire layer is responsible for converting at the boundary via {@link + * com.linkedin.openhouse.optimizer.api.mapper.OptimizerMapper}. */ @Entity @Table( @@ -60,25 +58,22 @@ public class TableOperationsHistoryRow { @Column(name = "table_name", nullable = false, length = 128) private String tableName; - @Enumerated(EnumType.STRING) @Column(name = "operation_type", nullable = false, length = 50) - private OperationType operationType; + private String operationType; /** When the operation completed, as recorded by the complete endpoint. */ @Column(name = "completed_at", nullable = false) private Instant completedAt; /** {@code SUCCESS} or {@code FAILED}. */ - @Enumerated(EnumType.STRING) @Column(name = "status", nullable = false, length = 20) - private OperationHistoryStatus status; + private String status; /** Spark job ID; indexed for job → result lookups. */ @Column(name = "job_id", length = 255) private String jobId; - /** Job result: error details on failure, both fields null on success. */ - @Convert(converter = JobResultConverter.class) - @Column(name = "result") - private JobResult result; + /** Job result JSON blob: error details on failure, both fields null on success. */ + @Column(name = "result", columnDefinition = "TEXT") + private String result; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java index 43778495a..5d90f3d12 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java @@ -1,12 +1,8 @@ package com.linkedin.openhouse.optimizer.entity; -import com.linkedin.openhouse.optimizer.api.model.OperationStatus; -import com.linkedin.openhouse.optimizer.api.model.OperationType; import java.time.Instant; import javax.persistence.Column; import javax.persistence.Entity; -import javax.persistence.EnumType; -import javax.persistence.Enumerated; import javax.persistence.Id; import javax.persistence.Index; import javax.persistence.Table; @@ -26,6 +22,11 @@ * table_uuid} is the stable identity for the table (survives renames; rotates on drop+recreate). * The application enforces one active (PENDING or SCHEDULED) row per {@code (table_uuid, * operation_type)} at a time. + * + *

{@code operationType} and {@code status} are stored as {@code String} rather than JPA-bound + * enums so the entity layer stays decoupled from the wire-API enum identity. The wire layer is + * responsible for converting at the boundary via {@link + * com.linkedin.openhouse.optimizer.api.mapper.OptimizerMapper}. */ @Entity @Table( @@ -59,13 +60,11 @@ public class TableOperationsRow { @Column(name = "table_name", nullable = false, length = 128) private String tableName; - @Enumerated(EnumType.STRING) @Column(name = "operation_type", nullable = false, length = 50) - private OperationType operationType; + private String operationType; - @Enumerated(EnumType.STRING) @Column(name = "status", nullable = false, length = 20) - private OperationStatus status; + private String status; /** When the Analyzer first created this row. Set by the service on insert; never updated. */ @Column(name = "created_at", nullable = false) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java index b0d92fc81..6ead5e42c 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java @@ -1,6 +1,6 @@ package com.linkedin.openhouse.optimizer.entity; -import com.linkedin.openhouse.optimizer.api.model.TableStats; +import com.linkedin.openhouse.optimizer.model.TableStats; import com.vladmihalcea.hibernate.type.json.JsonStringType; import java.time.Instant; import javax.persistence.Column; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java index f682a3485..2a1414567 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java @@ -1,6 +1,6 @@ package com.linkedin.openhouse.optimizer.entity; -import com.linkedin.openhouse.optimizer.api.model.TableStats; +import com.linkedin.openhouse.optimizer.model.TableStats; import com.vladmihalcea.hibernate.type.json.JsonStringType; import java.time.Instant; import java.util.Map; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java similarity index 96% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java index 64c99061a..3b56196ea 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java @@ -1,4 +1,4 @@ -package com.linkedin.openhouse.optimizer.api.model; +package com.linkedin.openhouse.optimizer.model; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import lombok.AllArgsConstructor; From 681407ef6a1a1d2dc34dee2a4ca308c5d008ca3f Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 12:18:39 -0700 Subject: [PATCH 07/28] feat(optimizer): add internal model layer Per-layer types: wire-API enums (api/model/), DB-side String at JPA boundary, and an internal in-memory model layer that is what the analyzer and scheduler operate on. The wire and DB sides convert at their boundary; consumers of the optimizer library work in the internal types. - model/HistoryStatus, model/OperationStatus, model/OperationType: internal enums mirroring the wire-API counterparts. Decoupled so the analyzer/scheduler can evolve their state machines without churning the wire or DB shapes. - model/Table: an OpenHouse table enriched with stats + properties. Built from a TableStatsRow. - model/TableOperation: analyzer's decision-to-schedule + scheduler's unit of work. Constructed from TableOperationsRow or from a Table; converts back via toRow(). --- .../optimizer/model/HistoryStatus.java | 13 +++ .../optimizer/model/OperationStatus.java | 15 +++ .../optimizer/model/OperationType.java | 10 ++ .../openhouse/optimizer/model/Table.java | 41 +++++++ .../optimizer/model/TableOperation.java | 106 ++++++++++++++++++ 5 files changed, 185 insertions(+) create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java new file mode 100644 index 000000000..d29c88719 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java @@ -0,0 +1,13 @@ +package com.linkedin.openhouse.optimizer.model; + +/** + * Internal lifecycle outcomes for a completed operation. Mirrors the values written to {@code + * table_operations_history.status}; parsed at the boundary so callers switch on a typed value + * instead of comparing strings. + * + *

Intentionally separate from the wire-API and DB representations. + */ +public enum HistoryStatus { + SUCCESS, + FAILED +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java new file mode 100644 index 000000000..66f213c73 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java @@ -0,0 +1,15 @@ +package com.linkedin.openhouse.optimizer.model; + +/** + * Internal lifecycle states for an operation. The analyzer writes {@link #PENDING}; the scheduler + * transitions through {@link #SCHEDULING} and {@link #SCHEDULED}. {@link #CANCELED} marks + * deduplicated PENDING rows. + * + *

Intentionally separate from the wire-API and DB representations. + */ +public enum OperationStatus { + PENDING, + SCHEDULING, + SCHEDULED, + CANCELED +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java new file mode 100644 index 000000000..bea44018b --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java @@ -0,0 +1,10 @@ +package com.linkedin.openhouse.optimizer.model; + +/** + * Internal enum for the operation types the analyzer and scheduler know about. Intentionally + * separate from the wire-API and DB representations so the internal model can evolve its set of + * supported operations without churning either boundary. + */ +public enum OperationType { + ORPHAN_FILES_DELETION +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java new file mode 100644 index 000000000..e232803dd --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java @@ -0,0 +1,41 @@ +package com.linkedin.openhouse.optimizer.model; + +import com.linkedin.openhouse.optimizer.entity.TableStatsRow; +import java.util.Collections; +import java.util.Map; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** + * An OpenHouse table enriched with stats and properties, built by combining data sources. Consumed + * by the analyzer (decides whether to produce a {@link TableOperation}) and the scheduler (reads + * stats for bin-packing). + */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class Table { + + private String tableUuid; + private String databaseName; + private String tableId; + + @Builder.Default private Map tableProperties = Collections.emptyMap(); + + private TableStats stats; + + /** Build a {@code Table} from a {@code table_stats} row. */ + public static Table from(TableStatsRow row) { + return Table.builder() + .tableUuid(row.getTableUuid()) + .databaseName(row.getDatabaseName()) + .tableId(row.getTableName()) + .tableProperties( + row.getTableProperties() != null ? row.getTableProperties() : Collections.emptyMap()) + .stats(row.getStats()) + .build(); + } +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java new file mode 100644 index 000000000..d49625a57 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java @@ -0,0 +1,106 @@ +package com.linkedin.openhouse.optimizer.model; + +import com.linkedin.openhouse.optimizer.entity.TableOperationsRow; +import java.time.Instant; +import java.util.Comparator; +import java.util.UUID; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** + * An operation the analyzer has decided to schedule for a table, and that the scheduler later picks + * up and submits. Built either from an existing {@link TableOperationsRow} (when loading current + * state) or from a {@link Table} (when creating a new PENDING operation). Converts back to a JPA + * row via {@link #toRow()}. + * + *

{@link #fileCount} is a non-persisted enrichment populated by consumers that need it (e.g., + * the OFD scheduler reads it from {@code table_stats} for bin-packing). The DB column does not + * carry it. + */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class TableOperation { + + /** Unique operation ID (UUID). */ + private String id; + + /** The table this operation targets. */ + private String tableUuid; + + /** Database name. */ + private String databaseName; + + /** Table name. */ + private String tableName; + + /** Operation type. */ + private OperationType operationType; + + /** Current lifecycle status. */ + private OperationStatus status; + + /** When this operation record was created. */ + private Instant createdAt; + + /** When the scheduler last submitted a job for this operation. */ + private Instant scheduledAt; + + /** + * Number of current data files on the table at evaluation time. Non-persisted enrichment; + * populated by consumers that need it. Null when not enriched. + */ + private Long fileCount; + + /** Build a {@code TableOperation} from an existing JPA row. */ + public static TableOperation from(TableOperationsRow row) { + return TableOperation.builder() + .id(row.getId()) + .tableUuid(row.getTableUuid()) + .databaseName(row.getDatabaseName()) + .tableName(row.getTableName()) + .operationType(OperationType.valueOf(row.getOperationType())) + .status(OperationStatus.valueOf(row.getStatus())) + .createdAt(row.getCreatedAt()) + .scheduledAt(row.getScheduledAt()) + .build(); + } + + /** Create a new PENDING operation for the given table and operation type. */ + public static TableOperation pending(Table table, OperationType operationType) { + return TableOperation.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(table.getTableUuid()) + .databaseName(table.getDatabaseName()) + .tableName(table.getTableId()) + .operationType(operationType) + .status(OperationStatus.PENDING) + .createdAt(Instant.now()) + .build(); + } + + /** Convert to a JPA entity for persistence. */ + public TableOperationsRow toRow() { + return TableOperationsRow.builder() + .id(id) + .tableUuid(tableUuid) + .databaseName(databaseName) + .tableName(tableName) + .operationType(operationType.name()) + .status(status.name()) + .createdAt(createdAt) + .scheduledAt(scheduledAt) + .version(0L) + .build(); + } + + /** Return the more recently created of two operations. */ + public static TableOperation mostRecent(TableOperation a, TableOperation b) { + Comparator byCreatedAt = + Comparator.comparing(r -> r.getCreatedAt() != null ? r.getCreatedAt() : Instant.EPOCH); + return byCreatedAt.compare(a, b) >= 0 ? a : b; + } +} From e3fb7770613e8635bb4f68bded2945e1845d7510 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 12:26:51 -0700 Subject: [PATCH 08/28] perf(optimizer): index table_operations_history for findLatestPerTable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add idx_toph_optype_uuid_completed on table_operations_history. Drives the correlated MAX(completed_at) subquery in TableOperationsHistoryRepository.findLatestPerTable (introduced in optimizer-1), turning it into an index-only lookup per (operation_type, table_uuid) instead of an O(N²) scan. Lands with the schema in optimizer-0 since the index is part of the data model definition; the query that depends on it lands with the repository in optimizer-1. --- .../optimizer/src/main/resources/db/optimizer-schema.sql | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/services/optimizer/src/main/resources/db/optimizer-schema.sql b/services/optimizer/src/main/resources/db/optimizer-schema.sql index 4c2d9604b..322f3bf92 100644 --- a/services/optimizer/src/main/resources/db/optimizer-schema.sql +++ b/services/optimizer/src/main/resources/db/optimizer-schema.sql @@ -48,5 +48,9 @@ CREATE TABLE IF NOT EXISTS table_operations_history ( job_id VARCHAR(255), result TEXT, PRIMARY KEY (id), - INDEX idx_toph_db_table (database_name, table_name) + INDEX idx_toph_db_table (database_name, table_name), + -- Drives TableOperationHistoryRepository.findLatestPerTable: the correlated + -- MAX(completed_at) subquery becomes an index-only lookup per (operation_type, + -- table_uuid) instead of an O(N²) scan. + INDEX idx_toph_optype_uuid_completed (operation_type, table_uuid, completed_at) ); From d3e17262f5ec8b0e97b54d8312da746278680a6f Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 13:11:11 -0700 Subject: [PATCH 09/28] refactor(optimizer): enforce layer boundaries in api/ + model/ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make data types in api/ and model/ self-contained — no cross-layer imports between them and no references into the DB layer. The internal model layer owns conversion to the api edge via a new mapper sub-package. api/ changes: - Add api/model/TableStats (api-layer copy of the JSON payload). - Drop cross-layer imports from TableStatsDto, TableStatsHistoryDto, UpsertTableStatsRequest; they pick up TableStats from the same package. model/ changes: - Add model/JobResult (internal copy of the result payload). - Add model/TableOperationsHistory (internal container mirroring the history-row field set in typed form). - Remove cross-layer factory methods Table.from(TableStatsRow), TableOperation.from(TableOperationsRow), and TableOperation.toRow(). Construction at the DB boundary moves to a future model/mapper/ ModelDbMapper that ships with the db/ rename on optimizer-1. - Add model/mapper/ApiModelMapper — converts api/ DTOs ↔ model/ types. Only place inside model/ where api/ types appear. Per-PR ownership: - api/ and model/ live on this PR. - db/ (currently entity/) and its boundary-side mapper (model/mapper/ModelDbMapper) land on optimizer-1. - The existing api/mapper/OptimizerMapper still references entity/ on this branch; it gets retired on optimizer-2 once the service routes through the new mappers. --- .../optimizer/api/model/TableStats.java | 55 ++++ .../optimizer/api/model/TableStatsDto.java | 1 - .../api/model/TableStatsHistoryDto.java | 1 - .../api/model/UpsertTableStatsRequest.java | 1 - .../openhouse/optimizer/model/JobResult.java | 25 ++ .../openhouse/optimizer/model/Table.java | 16 +- .../optimizer/model/TableOperation.java | 38 +-- .../model/TableOperationsHistory.java | 47 ++++ .../model/mapper/ApiModelMapper.java | 234 ++++++++++++++++++ 9 files changed, 369 insertions(+), 49 deletions(-) create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/JobResult.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java new file mode 100644 index 000000000..de268ffe7 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java @@ -0,0 +1,55 @@ +package com.linkedin.openhouse.optimizer.api.model; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** + * Combined stats payload exposed on the optimizer wire API. + * + *

API-layer copy of the stats payload — self-contained, evolved only when the wire contract + * changes. + */ +@Data +@Builder(toBuilder = true) +@NoArgsConstructor +@AllArgsConstructor +@JsonIgnoreProperties(ignoreUnknown = true) +public class TableStats { + + /** Snapshot fields — overwritten on every upsert. */ + private SnapshotMetrics snapshot; + + /** Delta fields — accumulated across commit events. */ + private CommitDelta delta; + + /** Point-in-time metadata read from Iceberg at scan time. */ + @Data + @Builder(toBuilder = true) + @NoArgsConstructor + @AllArgsConstructor + @JsonIgnoreProperties(ignoreUnknown = true) + public static class SnapshotMetrics { + private String clusterId; + private String tableVersion; + private String tableLocation; + private Long tableSizeBytes; + /** Total number of data files as of the latest snapshot — used for bin-packing. */ + private Long numCurrentFiles; + } + + /** Per-commit incremental counters; accumulated across all recorded commit events. */ + @Data + @Builder(toBuilder = true) + @NoArgsConstructor + @AllArgsConstructor + @JsonIgnoreProperties(ignoreUnknown = true) + public static class CommitDelta { + private Long numFilesAdded; + private Long numFilesDeleted; + private Long addedSizeBytes; + private Long deletedSizeBytes; + } +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java index 4aad1e18f..81dd6b802 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java @@ -1,6 +1,5 @@ package com.linkedin.openhouse.optimizer.api.model; -import com.linkedin.openhouse.optimizer.model.TableStats; import java.time.Instant; import java.util.Map; import lombok.AllArgsConstructor; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java index 6d515a543..4a994fdb3 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java @@ -1,6 +1,5 @@ package com.linkedin.openhouse.optimizer.api.model; -import com.linkedin.openhouse.optimizer.model.TableStats; import java.time.Instant; import lombok.AllArgsConstructor; import lombok.Builder; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java index 8bb317676..02290bad5 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java @@ -1,6 +1,5 @@ package com.linkedin.openhouse.optimizer.api.model; -import com.linkedin.openhouse.optimizer.model.TableStats; import java.util.Map; import lombok.AllArgsConstructor; import lombok.Builder; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/JobResult.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/JobResult.java new file mode 100644 index 000000000..7e48dd0ef --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/JobResult.java @@ -0,0 +1,25 @@ +package com.linkedin.openhouse.optimizer.model; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** + * Internal-model result payload for a completed Spark maintenance job. + * + *

Internal-layer copy of the structured result. Both fields are {@code null} on success; + * populated on failure. Intentionally separate from the wire-API and DB representations. + */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class JobResult { + + /** Human-readable error message; {@code null} if the job succeeded. */ + private String errorMessage; + + /** Error category (e.g., {@code OOM}, {@code TIMEOUT}); {@code null} if the job succeeded. */ + private String errorType; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java index e232803dd..c8bede225 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java @@ -1,6 +1,5 @@ package com.linkedin.openhouse.optimizer.model; -import com.linkedin.openhouse.optimizer.entity.TableStatsRow; import java.util.Collections; import java.util.Map; import lombok.AllArgsConstructor; @@ -12,6 +11,9 @@ * An OpenHouse table enriched with stats and properties, built by combining data sources. Consumed * by the analyzer (decides whether to produce a {@link TableOperation}) and the scheduler (reads * stats for bin-packing). + * + *

Pure internal-model type — no references to wire-API or DB types. Construct via {@link + * com.linkedin.openhouse.optimizer.model.mapper.ModelDbMapper#toTable} at the DB boundary. */ @Data @Builder @@ -26,16 +28,4 @@ public class Table { @Builder.Default private Map tableProperties = Collections.emptyMap(); private TableStats stats; - - /** Build a {@code Table} from a {@code table_stats} row. */ - public static Table from(TableStatsRow row) { - return Table.builder() - .tableUuid(row.getTableUuid()) - .databaseName(row.getDatabaseName()) - .tableId(row.getTableName()) - .tableProperties( - row.getTableProperties() != null ? row.getTableProperties() : Collections.emptyMap()) - .stats(row.getStats()) - .build(); - } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java index d49625a57..1f14dddff 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java @@ -1,6 +1,5 @@ package com.linkedin.openhouse.optimizer.model; -import com.linkedin.openhouse.optimizer.entity.TableOperationsRow; import java.time.Instant; import java.util.Comparator; import java.util.UUID; @@ -11,9 +10,11 @@ /** * An operation the analyzer has decided to schedule for a table, and that the scheduler later picks - * up and submits. Built either from an existing {@link TableOperationsRow} (when loading current - * state) or from a {@link Table} (when creating a new PENDING operation). Converts back to a JPA - * row via {@link #toRow()}. + * up and submits. + * + *

Pure internal-model type — no references to wire-API or DB types. Cross-layer construction + * happens via {@link com.linkedin.openhouse.optimizer.model.mapper.ModelDbMapper} (DB boundary) or + * {@link com.linkedin.openhouse.optimizer.model.mapper.ApiModelMapper} (API boundary). * *

{@link #fileCount} is a non-persisted enrichment populated by consumers that need it (e.g., * the OFD scheduler reads it from {@code table_stats} for bin-packing). The DB column does not @@ -55,20 +56,6 @@ public class TableOperation { */ private Long fileCount; - /** Build a {@code TableOperation} from an existing JPA row. */ - public static TableOperation from(TableOperationsRow row) { - return TableOperation.builder() - .id(row.getId()) - .tableUuid(row.getTableUuid()) - .databaseName(row.getDatabaseName()) - .tableName(row.getTableName()) - .operationType(OperationType.valueOf(row.getOperationType())) - .status(OperationStatus.valueOf(row.getStatus())) - .createdAt(row.getCreatedAt()) - .scheduledAt(row.getScheduledAt()) - .build(); - } - /** Create a new PENDING operation for the given table and operation type. */ public static TableOperation pending(Table table, OperationType operationType) { return TableOperation.builder() @@ -82,21 +69,6 @@ public static TableOperation pending(Table table, OperationType operationType) { .build(); } - /** Convert to a JPA entity for persistence. */ - public TableOperationsRow toRow() { - return TableOperationsRow.builder() - .id(id) - .tableUuid(tableUuid) - .databaseName(databaseName) - .tableName(tableName) - .operationType(operationType.name()) - .status(status.name()) - .createdAt(createdAt) - .scheduledAt(scheduledAt) - .version(0L) - .build(); - } - /** Return the more recently created of two operations. */ public static TableOperation mostRecent(TableOperation a, TableOperation b) { Comparator byCreatedAt = diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java new file mode 100644 index 000000000..64e0d57b3 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java @@ -0,0 +1,47 @@ +package com.linkedin.openhouse.optimizer.model; + +import java.time.Instant; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** + * Internal-model view of a completed operation history record. + * + *

Mirrors the field set of the underlying history row but in internal types only. Used by + * components that need to reason about completed operations (e.g., scheduling-cadence analyzers). + */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class TableOperationsHistory { + + /** Same UUID as the originating live-operations row. */ + private String id; + + /** Stable table identity from the Tables Service. */ + private String tableUuid; + + /** Denormalized database name. */ + private String databaseName; + + /** Denormalized table name. */ + private String tableName; + + /** Operation type for this completed run. */ + private OperationType operationType; + + /** When the operation completed, as recorded by the complete endpoint. */ + private Instant completedAt; + + /** Terminal outcome: {@link HistoryStatus#SUCCESS} or {@link HistoryStatus#FAILED}. */ + private HistoryStatus status; + + /** Spark job ID for the run that produced this record. */ + private String jobId; + + /** Job result payload; both inner fields {@code null} on success. */ + private JobResult result; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java new file mode 100644 index 000000000..2ae477e0d --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java @@ -0,0 +1,234 @@ +package com.linkedin.openhouse.optimizer.model.mapper; + +import com.linkedin.openhouse.optimizer.api.model.TableOperationsDto; +import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; +import com.linkedin.openhouse.optimizer.model.HistoryStatus; +import com.linkedin.openhouse.optimizer.model.JobResult; +import com.linkedin.openhouse.optimizer.model.OperationStatus; +import com.linkedin.openhouse.optimizer.model.OperationType; +import com.linkedin.openhouse.optimizer.model.TableOperation; +import com.linkedin.openhouse.optimizer.model.TableOperationsHistory; +import com.linkedin.openhouse.optimizer.model.TableStats; +import org.springframework.stereotype.Component; + +/** + * Converts between wire-API DTOs and internal {@code model/} domain objects. + * + *

The only place inside {@code model/} where {@code api/} types are referenced — this is the + * boundary at which the internal model meets the wire-API. Pure data types under {@code model/} + * stay free of any api-side imports. + * + *

API-layer enums + payloads are intentionally separate Java types from the internal-model + * counterparts; the two sides evolve independently. This mapper translates by name. + */ +@Component +public class ApiModelMapper { + + // --- TableOperationsDto <-> TableOperation --- + + public TableOperation toOperation(TableOperationsDto dto) { + if (dto == null) { + return null; + } + return TableOperation.builder() + .id(dto.getId()) + .tableUuid(dto.getTableUuid()) + .databaseName(dto.getDatabaseName()) + .tableName(dto.getTableName()) + .operationType(toModelOperationType(dto.getOperationType())) + .status(toModelOperationStatus(dto.getStatus())) + .createdAt(dto.getCreatedAt()) + .scheduledAt(dto.getScheduledAt()) + .build(); + } + + public TableOperationsDto toDto(TableOperation op) { + if (op == null) { + return null; + } + return TableOperationsDto.builder() + .id(op.getId()) + .tableUuid(op.getTableUuid()) + .databaseName(op.getDatabaseName()) + .tableName(op.getTableName()) + .operationType(toApiOperationType(op.getOperationType())) + .status(toApiOperationStatus(op.getStatus())) + .createdAt(op.getCreatedAt()) + .scheduledAt(op.getScheduledAt()) + .build(); + } + + // --- TableOperationsHistoryDto <-> TableOperationsHistory --- + + public TableOperationsHistory toHistory(TableOperationsHistoryDto dto) { + if (dto == null) { + return null; + } + return TableOperationsHistory.builder() + .id(dto.getId()) + .tableUuid(dto.getTableUuid()) + .databaseName(dto.getDatabaseName()) + .tableName(dto.getTableName()) + .operationType(toModelOperationType(dto.getOperationType())) + .completedAt(dto.getCompletedAt()) + .status(toModelHistoryStatus(dto.getStatus())) + .jobId(dto.getJobId()) + .result(toModelJobResult(dto.getResult())) + .build(); + } + + public TableOperationsHistoryDto toDto(TableOperationsHistory history) { + if (history == null) { + return null; + } + return TableOperationsHistoryDto.builder() + .id(history.getId()) + .tableUuid(history.getTableUuid()) + .databaseName(history.getDatabaseName()) + .tableName(history.getTableName()) + .operationType(toApiOperationType(history.getOperationType())) + .completedAt(history.getCompletedAt()) + .status(toApiHistoryStatus(history.getStatus())) + .jobId(history.getJobId()) + .result(toApiJobResult(history.getResult())) + .build(); + } + + // --- TableStats payload --- + + public TableStats toModelStats(com.linkedin.openhouse.optimizer.api.model.TableStats apiStats) { + if (apiStats == null) { + return null; + } + return TableStats.builder() + .snapshot(toModelSnapshot(apiStats.getSnapshot())) + .delta(toModelDelta(apiStats.getDelta())) + .build(); + } + + public com.linkedin.openhouse.optimizer.api.model.TableStats toApiStats(TableStats modelStats) { + if (modelStats == null) { + return null; + } + return com.linkedin.openhouse.optimizer.api.model.TableStats.builder() + .snapshot(toApiSnapshot(modelStats.getSnapshot())) + .delta(toApiDelta(modelStats.getDelta())) + .build(); + } + + // --- enum helpers --- + + public OperationType toModelOperationType( + com.linkedin.openhouse.optimizer.api.model.OperationType apiValue) { + return apiValue == null ? null : OperationType.valueOf(apiValue.name()); + } + + public com.linkedin.openhouse.optimizer.api.model.OperationType toApiOperationType( + OperationType modelValue) { + return modelValue == null + ? null + : com.linkedin.openhouse.optimizer.api.model.OperationType.valueOf(modelValue.name()); + } + + public OperationStatus toModelOperationStatus( + com.linkedin.openhouse.optimizer.api.model.OperationStatus apiValue) { + return apiValue == null ? null : OperationStatus.valueOf(apiValue.name()); + } + + public com.linkedin.openhouse.optimizer.api.model.OperationStatus toApiOperationStatus( + OperationStatus modelValue) { + return modelValue == null + ? null + : com.linkedin.openhouse.optimizer.api.model.OperationStatus.valueOf(modelValue.name()); + } + + public HistoryStatus toModelHistoryStatus( + com.linkedin.openhouse.optimizer.api.model.HistoryStatus apiValue) { + return apiValue == null ? null : HistoryStatus.valueOf(apiValue.name()); + } + + public com.linkedin.openhouse.optimizer.api.model.HistoryStatus toApiHistoryStatus( + HistoryStatus modelValue) { + return modelValue == null + ? null + : com.linkedin.openhouse.optimizer.api.model.HistoryStatus.valueOf(modelValue.name()); + } + + // --- JobResult --- + + public JobResult toModelJobResult(com.linkedin.openhouse.optimizer.api.model.JobResult apiValue) { + if (apiValue == null) { + return null; + } + return JobResult.builder() + .errorMessage(apiValue.getErrorMessage()) + .errorType(apiValue.getErrorType()) + .build(); + } + + public com.linkedin.openhouse.optimizer.api.model.JobResult toApiJobResult(JobResult modelValue) { + if (modelValue == null) { + return null; + } + return com.linkedin.openhouse.optimizer.api.model.JobResult.builder() + .errorMessage(modelValue.getErrorMessage()) + .errorType(modelValue.getErrorType()) + .build(); + } + + // --- TableStats inner classes --- + + private TableStats.SnapshotMetrics toModelSnapshot( + com.linkedin.openhouse.optimizer.api.model.TableStats.SnapshotMetrics apiValue) { + if (apiValue == null) { + return null; + } + return TableStats.SnapshotMetrics.builder() + .clusterId(apiValue.getClusterId()) + .tableVersion(apiValue.getTableVersion()) + .tableLocation(apiValue.getTableLocation()) + .tableSizeBytes(apiValue.getTableSizeBytes()) + .numCurrentFiles(apiValue.getNumCurrentFiles()) + .build(); + } + + private com.linkedin.openhouse.optimizer.api.model.TableStats.SnapshotMetrics toApiSnapshot( + TableStats.SnapshotMetrics modelValue) { + if (modelValue == null) { + return null; + } + return com.linkedin.openhouse.optimizer.api.model.TableStats.SnapshotMetrics.builder() + .clusterId(modelValue.getClusterId()) + .tableVersion(modelValue.getTableVersion()) + .tableLocation(modelValue.getTableLocation()) + .tableSizeBytes(modelValue.getTableSizeBytes()) + .numCurrentFiles(modelValue.getNumCurrentFiles()) + .build(); + } + + private TableStats.CommitDelta toModelDelta( + com.linkedin.openhouse.optimizer.api.model.TableStats.CommitDelta apiValue) { + if (apiValue == null) { + return null; + } + return TableStats.CommitDelta.builder() + .numFilesAdded(apiValue.getNumFilesAdded()) + .numFilesDeleted(apiValue.getNumFilesDeleted()) + .addedSizeBytes(apiValue.getAddedSizeBytes()) + .deletedSizeBytes(apiValue.getDeletedSizeBytes()) + .build(); + } + + private com.linkedin.openhouse.optimizer.api.model.TableStats.CommitDelta toApiDelta( + TableStats.CommitDelta modelValue) { + if (modelValue == null) { + return null; + } + return com.linkedin.openhouse.optimizer.api.model.TableStats.CommitDelta.builder() + .numFilesAdded(modelValue.getNumFilesAdded()) + .numFilesDeleted(modelValue.getNumFilesDeleted()) + .addedSizeBytes(modelValue.getAddedSizeBytes()) + .deletedSizeBytes(modelValue.getDeletedSizeBytes()) + .build(); + } +} From 1d469a72fdb68133c95cd8def12027f428ab2acd Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 13:17:49 -0700 Subject: [PATCH 10/28] refactor(optimizer): remove db-layer types from optimizer-0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The DB layer (entities + api↔db mapper) belongs to optimizer-1, not optimizer-0. optimizer-0 owns only the wire-API surface and the internal model. Delete from this PR: - entity/ package (TableOperationsRow, TableOperationsHistoryRow, TableStatsRow, TableStatsHistoryRow, package-info). - api/mapper/OptimizerMapper — was the api↔entity bridge. With the entity files moving out of this PR and the new model/mapper/ taking over conversion duties, this mapper is no longer needed here. optimizer-1 will re-introduce these as db/ (renamed) with db-side per-layer types and a model/mapper/ModelDbMapper. --- .../optimizer/api/mapper/OptimizerMapper.java | 92 ------------------- .../entity/TableOperationsHistoryRow.java | 79 ---------------- .../optimizer/entity/TableOperationsRow.java | 87 ------------------ .../entity/TableStatsHistoryRow.java | 61 ------------ .../optimizer/entity/TableStatsRow.java | 57 ------------ .../optimizer/entity/package-info.java | 2 - 6 files changed, 378 deletions(-) delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/mapper/OptimizerMapper.java delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/package-info.java diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/mapper/OptimizerMapper.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/mapper/OptimizerMapper.java deleted file mode 100644 index 36d4b5f4b..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/mapper/OptimizerMapper.java +++ /dev/null @@ -1,92 +0,0 @@ -package com.linkedin.openhouse.optimizer.api.mapper; - -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.linkedin.openhouse.optimizer.api.model.HistoryStatus; -import com.linkedin.openhouse.optimizer.api.model.JobResult; -import com.linkedin.openhouse.optimizer.api.model.OperationStatus; -import com.linkedin.openhouse.optimizer.api.model.OperationType; -import com.linkedin.openhouse.optimizer.api.model.TableOperationsDto; -import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; -import com.linkedin.openhouse.optimizer.api.model.TableStatsDto; -import com.linkedin.openhouse.optimizer.api.model.TableStatsHistoryDto; -import com.linkedin.openhouse.optimizer.entity.TableOperationsHistoryRow; -import com.linkedin.openhouse.optimizer.entity.TableOperationsRow; -import com.linkedin.openhouse.optimizer.entity.TableStatsHistoryRow; -import com.linkedin.openhouse.optimizer.entity.TableStatsRow; -import org.mapstruct.Mapper; - -/** - * MapStruct mapper for converting between optimizer JPA entities and their corresponding DTOs. - * - *

Spring-instantiated at compile time. Inject via {@code @Autowired} or constructor injection. - * - *

Type-conversion helpers bridge the entity's raw String/JSON shape (the entities keep enum and - * structured-result columns as Strings to stay decoupled from wire-API identity) and the wire DTO's - * typed enums and nested objects. - */ -@Mapper(componentModel = "spring") -public interface OptimizerMapper { - - ObjectMapper JSON = new ObjectMapper(); - - /** Map a {@link TableOperationsRow} to its DTO. */ - TableOperationsDto toDto(TableOperationsRow row); - - /** Map a {@link TableOperationsHistoryRow} to its DTO. */ - TableOperationsHistoryDto toDto(TableOperationsHistoryRow row); - - /** Map a {@link TableStatsRow} to its DTO. */ - TableStatsDto toDto(TableStatsRow row); - - /** Map a {@link TableStatsHistoryRow} to its DTO. */ - TableStatsHistoryDto toDto(TableStatsHistoryRow row); - - // --- entity String ↔ wire enum/object helpers --- - - default OperationType toOperationType(String value) { - return value == null ? null : OperationType.valueOf(value); - } - - default String fromOperationType(OperationType value) { - return value == null ? null : value.name(); - } - - default OperationStatus toOperationStatus(String value) { - return value == null ? null : OperationStatus.valueOf(value); - } - - default String fromOperationStatus(OperationStatus value) { - return value == null ? null : value.name(); - } - - default HistoryStatus toHistoryStatus(String value) { - return value == null ? null : HistoryStatus.valueOf(value); - } - - default String fromHistoryStatus(HistoryStatus value) { - return value == null ? null : value.name(); - } - - default JobResult toJobResult(String json) { - if (json == null) { - return null; - } - try { - return JSON.readValue(json, JobResult.class); - } catch (JsonProcessingException e) { - throw new IllegalStateException("Failed to parse JobResult JSON from DB", e); - } - } - - default String fromJobResult(JobResult value) { - if (value == null) { - return null; - } - try { - return JSON.writeValueAsString(value); - } catch (JsonProcessingException e) { - throw new IllegalStateException("Failed to serialize JobResult to JSON", e); - } - } -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java deleted file mode 100644 index 8303a4579..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java +++ /dev/null @@ -1,79 +0,0 @@ -package com.linkedin.openhouse.optimizer.entity; - -import java.time.Instant; -import javax.persistence.Column; -import javax.persistence.Entity; -import javax.persistence.Id; -import javax.persistence.Index; -import javax.persistence.Table; -import lombok.AccessLevel; -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.EqualsAndHashCode; -import lombok.Getter; -import lombok.NoArgsConstructor; - -/** - * Append-only record of a completed maintenance operation. - * - *

Written when the operation-complete endpoint is called. The {@code id} is the same UUID as the - * originating {@code table_operations.id}, tying each history entry back to the operation cycle - * that produced it. Multiple runs of the same operation on the same table produce multiple rows - * (each cycle gets a new UUID from the Analyzer). - * - *

{@code operationType}, {@code status}, and {@code result} are stored as plain {@code String} - * (the last as a JSON blob) so the entity layer stays decoupled from the wire-API enum and - * structured-result types. The wire layer is responsible for converting at the boundary via {@link - * com.linkedin.openhouse.optimizer.api.mapper.OptimizerMapper}. - */ -@Entity -@Table( - name = "table_operations_history", - indexes = { - @Index(name = "idx_table_uuid_hist", columnList = "table_uuid"), - @Index(name = "idx_op_type_hist", columnList = "operation_type"), - @Index(name = "idx_completed_at", columnList = "completed_at"), - @Index(name = "idx_status_hist", columnList = "status"), - @Index(name = "idx_job_id", columnList = "job_id"), - @Index(name = "idx_toph_db_table", columnList = "database_name, table_name") - }) -@Getter -@EqualsAndHashCode -@Builder(toBuilder = true) -@NoArgsConstructor(access = AccessLevel.PROTECTED) -@AllArgsConstructor(access = AccessLevel.PROTECTED) -public class TableOperationsHistoryRow { - - /** Same UUID as the originating {@code table_operations.id}. Set by the caller; not generated. */ - @Id - @Column(name = "id", nullable = false, length = 36) - private String id; - - @Column(name = "table_uuid", nullable = false, length = 36) - private String tableUuid; - - @Column(name = "database_name", nullable = false, length = 128) - private String databaseName; - - @Column(name = "table_name", nullable = false, length = 128) - private String tableName; - - @Column(name = "operation_type", nullable = false, length = 50) - private String operationType; - - /** When the operation completed, as recorded by the complete endpoint. */ - @Column(name = "completed_at", nullable = false) - private Instant completedAt; - - /** {@code SUCCESS} or {@code FAILED}. */ - @Column(name = "status", nullable = false, length = 20) - private String status; - - /** Spark job ID; indexed for job → result lookups. */ - @Column(name = "job_id", length = 255) - private String jobId; - - /** Job result JSON blob: error details on failure, both fields null on success. */ - @Column(name = "result", columnDefinition = "TEXT") - private String result; -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java deleted file mode 100644 index 5d90f3d12..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java +++ /dev/null @@ -1,87 +0,0 @@ -package com.linkedin.openhouse.optimizer.entity; - -import java.time.Instant; -import javax.persistence.Column; -import javax.persistence.Entity; -import javax.persistence.Id; -import javax.persistence.Index; -import javax.persistence.Table; -import lombok.AccessLevel; -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.EqualsAndHashCode; -import lombok.Getter; -import lombok.NoArgsConstructor; - -/** - * JPA entity representing an Analyzer recommendation for a table maintenance operation. - * - *

Each row is identified by a client-generated UUID ({@code id}). The Analyzer creates a new row - * when it first recommends an operation for a table, or when re-recommending after a prior terminal - * state (SUCCESS/FAILED). Old terminal rows accumulate — they serve as implicit history. {@code - * table_uuid} is the stable identity for the table (survives renames; rotates on drop+recreate). - * The application enforces one active (PENDING or SCHEDULED) row per {@code (table_uuid, - * operation_type)} at a time. - * - *

{@code operationType} and {@code status} are stored as {@code String} rather than JPA-bound - * enums so the entity layer stays decoupled from the wire-API enum identity. The wire layer is - * responsible for converting at the boundary via {@link - * com.linkedin.openhouse.optimizer.api.mapper.OptimizerMapper}. - */ -@Entity -@Table( - name = "table_operations", - indexes = { - @Index(name = "idx_table_uuid", columnList = "table_uuid"), - @Index(name = "idx_op_type", columnList = "operation_type"), - @Index(name = "idx_status", columnList = "status"), - @Index(name = "idx_created_at", columnList = "created_at"), - @Index(name = "idx_scheduled_at", columnList = "scheduled_at") - }) -@Getter -@EqualsAndHashCode -@Builder(toBuilder = true) -@NoArgsConstructor(access = AccessLevel.PROTECTED) -@AllArgsConstructor(access = AccessLevel.PROTECTED) -public class TableOperationsRow { - - /** Client-generated UUID identifying this specific operation recommendation. */ - @Id - @Column(name = "id", nullable = false, length = 36) - private String id; - - /** Stable table identity from the Tables Service. Survives renames; rotates on drop+recreate. */ - @Column(name = "table_uuid", nullable = false, length = 36) - private String tableUuid; - - @Column(name = "database_name", nullable = false, length = 128) - private String databaseName; - - @Column(name = "table_name", nullable = false, length = 128) - private String tableName; - - @Column(name = "operation_type", nullable = false, length = 50) - private String operationType; - - @Column(name = "status", nullable = false, length = 20) - private String status; - - /** When the Analyzer first created this row. Set by the service on insert; never updated. */ - @Column(name = "created_at", nullable = false) - private Instant createdAt; - - /** Set when the operation is claimed; {@code null} while {@code PENDING}. */ - @Column(name = "scheduled_at") - private Instant scheduledAt; - - /** Job ID returned by the Jobs Service after successful submission. */ - @Column(name = "job_id", length = 255) - private String jobId; - - /** - * Manual optimistic lock for the Scheduler claim. Incremented by the raw {@code claimOperation} - * UPDATE query; must NOT use JPA {@code @Version} since the claim bypasses JPA entity management. - */ - @Column(name = "version") - private Long version; -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java deleted file mode 100644 index 6ead5e42c..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java +++ /dev/null @@ -1,61 +0,0 @@ -package com.linkedin.openhouse.optimizer.entity; - -import com.linkedin.openhouse.optimizer.model.TableStats; -import com.vladmihalcea.hibernate.type.json.JsonStringType; -import java.time.Instant; -import javax.persistence.Column; -import javax.persistence.Entity; -import javax.persistence.Id; -import javax.persistence.Index; -import javax.persistence.Table; -import lombok.AccessLevel; -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.EqualsAndHashCode; -import lombok.Getter; -import lombok.NoArgsConstructor; -import org.hibernate.annotations.Type; -import org.hibernate.annotations.TypeDef; - -/** - * Append-only record of per-commit stats reported by the Tables Service. - * - *

Each Iceberg commit produces one row. The {@code stats} JSON contains both the snapshot - * metrics (point-in-time) and the commit delta (files added/deleted in this commit). Consumers can - * query this table to reconstruct change rates over arbitrary time windows. - */ -@TypeDef(name = "json", typeClass = JsonStringType.class) -@Entity -@Table( - name = "table_stats_history", - indexes = { - @Index(name = "idx_tsh_table_uuid", columnList = "table_uuid"), - @Index(name = "idx_tsh_recorded_at", columnList = "recorded_at") - }) -@Getter -@EqualsAndHashCode -@Builder(toBuilder = true) -@NoArgsConstructor(access = AccessLevel.PROTECTED) -@AllArgsConstructor(access = AccessLevel.PROTECTED) -public class TableStatsHistoryRow { - - @Id - @Column(name = "id", nullable = false, length = 36) - private String id; - - @Column(name = "table_uuid", nullable = false, length = 36) - private String tableUuid; - - @Column(name = "database_name", nullable = false, length = 128) - private String databaseName; - - @Column(name = "table_name", nullable = false, length = 128) - private String tableName; - - @Type(type = "json") - @Column(name = "stats", columnDefinition = "TEXT") - private TableStats stats; - - @Column(name = "recorded_at", nullable = false) - private Instant recordedAt; -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java deleted file mode 100644 index 2a1414567..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java +++ /dev/null @@ -1,57 +0,0 @@ -package com.linkedin.openhouse.optimizer.entity; - -import com.linkedin.openhouse.optimizer.model.TableStats; -import com.vladmihalcea.hibernate.type.json.JsonStringType; -import java.time.Instant; -import java.util.Map; -import javax.persistence.Column; -import javax.persistence.Entity; -import javax.persistence.Id; -import javax.persistence.Table; -import lombok.AccessLevel; -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.EqualsAndHashCode; -import lombok.Getter; -import lombok.NoArgsConstructor; -import org.hibernate.annotations.Type; -import org.hibernate.annotations.TypeDef; - -/** - * JPA entity representing a per-table stats snapshot in the optimizer DB. - * - *

Written by the Tables Service on every Iceberg commit. Read by the Analyzer directly via JPA - * to enumerate tables and check scheduling eligibility. - */ -@TypeDef(name = "json", typeClass = JsonStringType.class) -@Entity -@Table(name = "table_stats") -@Getter -@EqualsAndHashCode -@Builder(toBuilder = true) -@NoArgsConstructor(access = AccessLevel.PROTECTED) -@AllArgsConstructor(access = AccessLevel.PROTECTED) -public class TableStatsRow { - - @Id - @Column(name = "table_uuid", nullable = false, length = 36) - private String tableUuid; - - @Column(name = "database_name", nullable = false, length = 128) - private String databaseName; - - @Column(name = "table_name", nullable = false, length = 128) - private String tableName; - - @Type(type = "json") - @Column(name = "stats", columnDefinition = "TEXT") - private TableStats stats; - - @Type(type = "json") - @Column(name = "table_properties", columnDefinition = "TEXT") - private Map tableProperties; - - /** Set on every upsert. Used for stats pipeline staleness monitoring. */ - @Column(name = "updated_at", nullable = false) - private Instant updatedAt; -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/package-info.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/package-info.java deleted file mode 100644 index 7c0ca1f67..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/package-info.java +++ /dev/null @@ -1,2 +0,0 @@ -/** JPA entities for the optimizer service. */ -package com.linkedin.openhouse.optimizer.entity; From eee8ecae794fecdc7676e02c0fb286cd3c98e9fa Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 13:20:28 -0700 Subject: [PATCH 11/28] refactor(optimizer): remove DB schema + schema-init properties The DDL is part of the db/ layer's ownership (optimizer-1). Move the schema file and its schema-init properties out of optimizer-0 so this PR is purely api/ + model/. Delete: - src/main/resources/db/optimizer-schema.sql. - spring.sql.init.mode, spring.sql.init.schema-locations, and spring.jpa.defer-datasource-initialization from application.properties (they reference the deleted schema file). optimizer-1 re-introduces these alongside the db/ entities and repositories. --- .../src/main/resources/application.properties | 4 -- .../main/resources/db/optimizer-schema.sql | 56 ------------------- 2 files changed, 60 deletions(-) delete mode 100644 services/optimizer/src/main/resources/db/optimizer-schema.sql diff --git a/services/optimizer/src/main/resources/application.properties b/services/optimizer/src/main/resources/application.properties index c6c3f8437..00982d80e 100644 --- a/services/optimizer/src/main/resources/application.properties +++ b/services/optimizer/src/main/resources/application.properties @@ -2,10 +2,6 @@ spring.application.name=openhouse-optimizer-service server.port=8080 spring.jpa.hibernate.ddl-auto=none -spring.sql.init.mode=always -spring.jpa.defer-datasource-initialization=true -spring.sql.init.schema-locations=classpath:db/optimizer-schema.sql - spring.jpa.properties.hibernate.dialect=org.hibernate.dialect.MySQL8Dialect spring.jpa.properties.hibernate.show_sql=false spring.jpa.properties.hibernate.physical_naming_strategy=org.hibernate.boot.model.naming.PhysicalNamingStrategyStandardImpl diff --git a/services/optimizer/src/main/resources/db/optimizer-schema.sql b/services/optimizer/src/main/resources/db/optimizer-schema.sql deleted file mode 100644 index 322f3bf92..000000000 --- a/services/optimizer/src/main/resources/db/optimizer-schema.sql +++ /dev/null @@ -1,56 +0,0 @@ --- Optimizer Service Schema --- Compatible with MySQL (production) and H2 in MySQL mode (tests). -CREATE TABLE IF NOT EXISTS table_operations ( - id VARCHAR(36) NOT NULL, - table_uuid VARCHAR(36) NOT NULL, - database_name VARCHAR(128) NOT NULL, - table_name VARCHAR(128) NOT NULL, - operation_type VARCHAR(50) NOT NULL, - status VARCHAR(20) NOT NULL, - created_at TIMESTAMP(6) NOT NULL, - scheduled_at TIMESTAMP(6), - job_id VARCHAR(255), - version BIGINT, - -- TODO: per-operation metric columns will be added as operations are onboarded. - PRIMARY KEY (id) -); - -CREATE TABLE IF NOT EXISTS table_stats ( - table_uuid VARCHAR(36) NOT NULL, - database_name VARCHAR(128) NOT NULL, - table_name VARCHAR(128) NOT NULL, - stats TEXT, - table_properties TEXT, - updated_at TIMESTAMP(6) NOT NULL, - PRIMARY KEY (table_uuid) -); - -CREATE TABLE IF NOT EXISTS table_stats_history ( - id VARCHAR(36) NOT NULL, - table_uuid VARCHAR(36) NOT NULL, - database_name VARCHAR(128) NOT NULL, - table_name VARCHAR(128) NOT NULL, - stats TEXT, - recorded_at TIMESTAMP(6) NOT NULL, - PRIMARY KEY (id), - INDEX idx_tsh_table_uuid (table_uuid), - INDEX idx_tsh_recorded_at (recorded_at) -); - -CREATE TABLE IF NOT EXISTS table_operations_history ( - id VARCHAR(36) NOT NULL, - table_uuid VARCHAR(36) NOT NULL, - database_name VARCHAR(128) NOT NULL, - table_name VARCHAR(128) NOT NULL, - operation_type VARCHAR(50) NOT NULL, - completed_at TIMESTAMP(6) NOT NULL, - status VARCHAR(20) NOT NULL, - job_id VARCHAR(255), - result TEXT, - PRIMARY KEY (id), - INDEX idx_toph_db_table (database_name, table_name), - -- Drives TableOperationHistoryRepository.findLatestPerTable: the correlated - -- MAX(completed_at) subquery becomes an index-only lookup per (operation_type, - -- table_uuid) instead of an O(N²) scan. - INDEX idx_toph_optype_uuid_completed (operation_type, table_uuid, completed_at) -); From 328e5b91b4c1db0f5abf22e37a4dea787d351bef Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 13:23:10 -0700 Subject: [PATCH 12/28] refactor(optimizer): scrub MySQL / JPA / datasource references MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DB-layer dependencies belong to optimizer-1. With entities, schema, and the api/mapper deleted from this PR, the JPA + MySQL stack is unused — remove the dependency declarations and configuration that referenced them. build.gradle: - Drop spring-boot-starter-data-jpa, mysql-connector-java, the vladmihalcea hibernate-types JSON serializer, and the h2 test runtime. application.properties: - Drop spring.jpa.* and spring.datasource.* lines. Delete services/optimizer/src/test/resources/application-test.properties (H2 test datasource config — re-introduced on optimizer-1 alongside the repositories and repo tests). --- services/optimizer/build.gradle | 4 ---- .../src/main/resources/application.properties | 11 ----------- .../src/test/resources/application-test.properties | 12 ------------ 3 files changed, 27 deletions(-) delete mode 100644 services/optimizer/src/test/resources/application-test.properties diff --git a/services/optimizer/build.gradle b/services/optimizer/build.gradle index c05c7f9c3..2de8fd5c7 100644 --- a/services/optimizer/build.gradle +++ b/services/optimizer/build.gradle @@ -4,11 +4,7 @@ plugins { } dependencies { - implementation 'org.springframework.boot:spring-boot-starter-data-jpa:2.7.8' - implementation 'com.vladmihalcea:hibernate-types-55:2.21.1' implementation 'org.springframework.boot:spring-boot-starter-web:2.7.8' - implementation 'mysql:mysql-connector-java:8.+' - testImplementation 'com.h2database:h2:2.2.224' testImplementation 'org.springframework.boot:spring-boot-starter-test:2.7.8' } diff --git a/services/optimizer/src/main/resources/application.properties b/services/optimizer/src/main/resources/application.properties index 00982d80e..64c40d1f2 100644 --- a/services/optimizer/src/main/resources/application.properties +++ b/services/optimizer/src/main/resources/application.properties @@ -1,16 +1,5 @@ spring.application.name=openhouse-optimizer-service server.port=8080 -spring.jpa.hibernate.ddl-auto=none -spring.jpa.properties.hibernate.dialect=org.hibernate.dialect.MySQL8Dialect -spring.jpa.properties.hibernate.show_sql=false -spring.jpa.properties.hibernate.physical_naming_strategy=org.hibernate.boot.model.naming.PhysicalNamingStrategyStandardImpl - -spring.datasource.driver-class-name=com.mysql.cj.jdbc.Driver -spring.datasource.url=${OPTIMIZER_DB_URL:jdbc:mysql://localhost:3306/oh_db} -spring.datasource.username=${OPTIMIZER_DB_USERNAME:oh_user} -spring.datasource.password=${OPTIMIZER_DB_PASSWORD:oh_password} -spring.datasource.hikari.maximum-pool-size=20 - management.endpoints.web.exposure.include=health,prometheus management.endpoint.health.enabled=true diff --git a/services/optimizer/src/test/resources/application-test.properties b/services/optimizer/src/test/resources/application-test.properties deleted file mode 100644 index 97b7841dc..000000000 --- a/services/optimizer/src/test/resources/application-test.properties +++ /dev/null @@ -1,12 +0,0 @@ -spring.datasource.url=jdbc:h2:mem:optimizer_test;MODE=MySQL;DATABASE_TO_LOWER=TRUE;DB_CLOSE_DELAY=-1 -spring.datasource.driver-class-name=org.h2.Driver -spring.datasource.username=sa -spring.datasource.password= - -spring.jpa.hibernate.ddl-auto=none -spring.sql.init.mode=always -spring.jpa.defer-datasource-initialization=true -spring.jpa.properties.hibernate.dialect=org.hibernate.dialect.H2Dialect -spring.jpa.properties.hibernate.physical_naming_strategy=org.hibernate.boot.model.naming.PhysicalNamingStrategyStandardImpl - -spring.sql.init.schema-locations=classpath:db/optimizer-schema.sql From f7a5d208e106cb5c1c051bc450f14833be1bb093 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 13:25:41 -0700 Subject: [PATCH 13/28] refactor(optimizer): drop UpsertTableOperationsRequest MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No external system creates table operations — operations are written by the in-process analyzer directly through the model layer. The request type has no wire consumer and no internal consumer, so it's dead code. Delete services/optimizer/.../api/model/UpsertTableOperationsRequest.java. --- .../model/UpsertTableOperationsRequest.java | 31 ------------------- 1 file changed, 31 deletions(-) delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableOperationsRequest.java diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableOperationsRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableOperationsRequest.java deleted file mode 100644 index 21174c337..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableOperationsRequest.java +++ /dev/null @@ -1,31 +0,0 @@ -package com.linkedin.openhouse.optimizer.api.model; - -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.Data; -import lombok.NoArgsConstructor; - -/** - * PUT request body for {@code /v1/table-operations/{id}}. - * - *

The Analyzer supplies the operation {@code id} (client-generated UUID) in the path and all - * table-identifying fields in this body. The service creates the row on first call. - */ -@Data -@Builder -@NoArgsConstructor -@AllArgsConstructor -public class UpsertTableOperationsRequest { - - /** Stable Iceberg table UUID identifying the target table. */ - private String tableUuid; - - /** Denormalized database name for display. */ - private String databaseName; - - /** Denormalized table name for display. */ - private String tableName; - - /** The type of maintenance operation to create. */ - private OperationType operationType; -} From 2a532b577ed51507c72e836ea4d8778967f43062 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 13:47:42 -0700 Subject: [PATCH 14/28] refactor(optimizer): drop JobResult from the wire and internal model MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit JobResult is removed from the optimizer API. CompleteOperationRequest (user-edited) now carries only operationId + status — the failure detail abstraction has been retired. The internal model and DTOs no longer carry it either, and the type itself is deleted from both api/ and model/. CompleteOperationRequest: - operationId moved from path to body (user manual edit). - jobId field removed. - result field removed. api/model/TableOperationsHistoryDto: - Drop jobId and result fields. model/TableOperationsHistory: - Drop jobId and result fields. model/mapper/ApiModelMapper: - Remove toModelJobResult / toApiJobResult helpers + JobResult import. - toHistory()/toDto() no longer touch jobId or result. Delete: - services/optimizer/.../api/model/JobResult.java - services/optimizer/.../model/JobResult.java Downstream propagation: opt-2's service signature changes (completeOperation now takes only the request body); db/HistoryStatus remains needed on opt-1 but db/JobResult no longer is. See memory/tasks/mkuchenb-optimizer-3-fixes.md for the full propagation list. --- .../api/model/CompleteOperationRequest.java | 15 ++++++----- .../optimizer/api/model/JobResult.java | 25 ----------------- .../api/model/TableOperationsHistoryDto.java | 6 ----- .../openhouse/optimizer/model/JobResult.java | 25 ----------------- .../model/TableOperationsHistory.java | 6 ----- .../model/mapper/ApiModelMapper.java | 27 ------------------- 6 files changed, 9 insertions(+), 95 deletions(-) delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/JobResult.java delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/JobResult.java diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java index 4f3f6535a..30648d497 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java @@ -8,8 +8,12 @@ /** * Request body for {@code POST /v1/table-operations/{id}/complete}. * - *

Reports the outcome of a completed operation. The backend looks up the operation row by {@code - * id} and writes a history entry with the operation's table metadata and the supplied result. + *

Reports the outcome of a single completed operation. The path's {@code id} is the per-cycle + * operation UUID — the service looks up that one row and writes a history entry for it. + * + *

A single Spark job typically processes N tables and yields N independent (status, result) + * pairs — one per operation. Callers issue one complete request per operation; the service does not + * bulk-complete by job. */ @Data @Builder @@ -17,9 +21,8 @@ @AllArgsConstructor public class CompleteOperationRequest { - /** Outcome of the operation. */ - private HistoryStatus status; + private String operationId; - /** Error details on failure; {@code null} on success. */ - private JobResult result; + /** Terminal outcome for this single operation. */ + private HistoryStatus status; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/JobResult.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/JobResult.java deleted file mode 100644 index 74942243c..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/JobResult.java +++ /dev/null @@ -1,25 +0,0 @@ -package com.linkedin.openhouse.optimizer.api.model; - -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.Data; -import lombok.NoArgsConstructor; - -/** - * Result payload for a completed Spark maintenance job. - * - *

Stored as JSON in the {@code result} column of {@code table_operations_history}. Both fields - * are {@code null} on success; populated on failure. - */ -@Data -@Builder -@NoArgsConstructor -@AllArgsConstructor -public class JobResult { - - /** Human-readable error message; {@code null} if the job succeeded. */ - private String errorMessage; - - /** Error category (e.g., {@code OOM}, {@code TIMEOUT}); {@code null} if the job succeeded. */ - private String errorType; -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java index a7a9d9dc6..d9fa1f387 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java @@ -28,10 +28,4 @@ public class TableOperationsHistoryDto { /** {@code SUCCESS} or {@code FAILED}. */ private HistoryStatus status; - - /** Job ID from the Jobs Service. */ - private String jobId; - - /** Job result payload; both fields null on success. */ - private JobResult result; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/JobResult.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/JobResult.java deleted file mode 100644 index 7e48dd0ef..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/JobResult.java +++ /dev/null @@ -1,25 +0,0 @@ -package com.linkedin.openhouse.optimizer.model; - -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.Data; -import lombok.NoArgsConstructor; - -/** - * Internal-model result payload for a completed Spark maintenance job. - * - *

Internal-layer copy of the structured result. Both fields are {@code null} on success; - * populated on failure. Intentionally separate from the wire-API and DB representations. - */ -@Data -@Builder -@NoArgsConstructor -@AllArgsConstructor -public class JobResult { - - /** Human-readable error message; {@code null} if the job succeeded. */ - private String errorMessage; - - /** Error category (e.g., {@code OOM}, {@code TIMEOUT}); {@code null} if the job succeeded. */ - private String errorType; -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java index 64e0d57b3..fe5bee5f7 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java @@ -38,10 +38,4 @@ public class TableOperationsHistory { /** Terminal outcome: {@link HistoryStatus#SUCCESS} or {@link HistoryStatus#FAILED}. */ private HistoryStatus status; - - /** Spark job ID for the run that produced this record. */ - private String jobId; - - /** Job result payload; both inner fields {@code null} on success. */ - private JobResult result; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java index 2ae477e0d..35af7fb25 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java @@ -3,7 +3,6 @@ import com.linkedin.openhouse.optimizer.api.model.TableOperationsDto; import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; import com.linkedin.openhouse.optimizer.model.HistoryStatus; -import com.linkedin.openhouse.optimizer.model.JobResult; import com.linkedin.openhouse.optimizer.model.OperationStatus; import com.linkedin.openhouse.optimizer.model.OperationType; import com.linkedin.openhouse.optimizer.model.TableOperation; @@ -72,8 +71,6 @@ public TableOperationsHistory toHistory(TableOperationsHistoryDto dto) { .operationType(toModelOperationType(dto.getOperationType())) .completedAt(dto.getCompletedAt()) .status(toModelHistoryStatus(dto.getStatus())) - .jobId(dto.getJobId()) - .result(toModelJobResult(dto.getResult())) .build(); } @@ -89,8 +86,6 @@ public TableOperationsHistoryDto toDto(TableOperationsHistory history) { .operationType(toApiOperationType(history.getOperationType())) .completedAt(history.getCompletedAt()) .status(toApiHistoryStatus(history.getStatus())) - .jobId(history.getJobId()) - .result(toApiJobResult(history.getResult())) .build(); } @@ -154,28 +149,6 @@ public com.linkedin.openhouse.optimizer.api.model.HistoryStatus toApiHistoryStat : com.linkedin.openhouse.optimizer.api.model.HistoryStatus.valueOf(modelValue.name()); } - // --- JobResult --- - - public JobResult toModelJobResult(com.linkedin.openhouse.optimizer.api.model.JobResult apiValue) { - if (apiValue == null) { - return null; - } - return JobResult.builder() - .errorMessage(apiValue.getErrorMessage()) - .errorType(apiValue.getErrorType()) - .build(); - } - - public com.linkedin.openhouse.optimizer.api.model.JobResult toApiJobResult(JobResult modelValue) { - if (modelValue == null) { - return null; - } - return com.linkedin.openhouse.optimizer.api.model.JobResult.builder() - .errorMessage(modelValue.getErrorMessage()) - .errorType(modelValue.getErrorType()) - .build(); - } - // --- TableStats inner classes --- private TableStats.SnapshotMetrics toModelSnapshot( From 2e3a2316295d67105802f4a4c73032396048be9d Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 13:52:15 -0700 Subject: [PATCH 15/28] feat(optimizer): add debug echo fields to CompleteOperationRequest MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add tableUuid, databaseName, tableName, and operationType to the complete request body. They're debug-only — the server keys lookup off operationId — but preserving them on logs and traces helps an operator diagnose a failing complete call without joining back to the operation row. --- .../api/model/CompleteOperationRequest.java | 29 +++++++++++++++---- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java index 30648d497..0add634b5 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java @@ -6,14 +6,20 @@ import lombok.NoArgsConstructor; /** - * Request body for {@code POST /v1/table-operations/{id}/complete}. + * Request body for {@code POST /v1/table-operations/complete}. * - *

Reports the outcome of a single completed operation. The path's {@code id} is the per-cycle - * operation UUID — the service looks up that one row and writes a history entry for it. + *

Reports the outcome of a single completed operation. The service looks up the operation row by + * {@link #operationId} and writes a history entry for it. * - *

A single Spark job typically processes N tables and yields N independent (status, result) - * pairs — one per operation. Callers issue one complete request per operation; the service does not + *

A single Spark job typically processes N tables and yields N independent (status) outcomes — + * one per operation. Callers issue one complete request per operation; the service does not * bulk-complete by job. + * + *

The remaining fields ({@link #tableUuid}, {@link #databaseName}, {@link #tableName}, {@link + * #operationType}) are debug-only echo information. The server does not key off them; they are + * preserved on log lines and traces so an operator looking at a failing complete call can see which + * (db, table, operation) the caller believed it was completing without joining back to the + * operation row. */ @Data @Builder @@ -21,8 +27,21 @@ @AllArgsConstructor public class CompleteOperationRequest { + /** Operation row's UUID — the primary lookup key. */ private String operationId; /** Terminal outcome for this single operation. */ private HistoryStatus status; + + /** Debug echo: stable table identity the caller believed it was completing. */ + private String tableUuid; + + /** Debug echo: database name. */ + private String databaseName; + + /** Debug echo: table name. */ + private String tableName; + + /** Debug echo: operation type. */ + private OperationType operationType; } From db5eb2959a0fbbfba5d821ee36f00435248f9f5c Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 13:53:16 -0700 Subject: [PATCH 16/28] refactor(optimizer): move application.properties out of optimizer-0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Every line in application.properties is run-time config (server.port, spring.application.name, actuator endpoints). optimizer-0 has no controllers and no endpoint to serve — the file is doing nothing here. The first PR that actually runs a web service is optimizer-2. Delete the file from this PR. optimizer-2 will re-introduce it alongside the REST controllers. The OptimizerServiceApplication @SpringBootApplication shell stays on this branch — optimizer-1's repository tests use @SpringBootTest and need an application class to discover. --- services/optimizer/src/main/resources/application.properties | 5 ----- 1 file changed, 5 deletions(-) delete mode 100644 services/optimizer/src/main/resources/application.properties diff --git a/services/optimizer/src/main/resources/application.properties b/services/optimizer/src/main/resources/application.properties deleted file mode 100644 index 64c40d1f2..000000000 --- a/services/optimizer/src/main/resources/application.properties +++ /dev/null @@ -1,5 +0,0 @@ -spring.application.name=openhouse-optimizer-service -server.port=8080 - -management.endpoints.web.exposure.include=health,prometheus -management.endpoint.health.enabled=true From 861b584c3cd41ff03db336c85cb0cde4bc063fe4 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 14:55:57 -0700 Subject: [PATCH 17/28] feat(optimizer): extend model layer for service-only types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prepare model/ for a service-layer rewrite that returns only model/ types (no api/ DTO leakage into the service interface). - model/Table: add `Instant updatedAt`. The service stamps it on every upsert; controllers read it when assembling the wire DTO. - model/TableStatsHistory: new internal-model counterpart to db.TableStatsHistoryRow. Fields mirror the row in internal types (id, tableUuid, databaseName, tableName, stats, recordedAt). - ApiModelMapper: add the missing api↔model conversions that controllers will own once the service drops api/ knowledge — Table ↔ TableStatsDto, TableStatsHistory ↔ TableStatsHistoryDto, and toTable(tableUuid, UpsertTableStatsRequest). --- .../openhouse/optimizer/model/Table.java | 4 ++ .../optimizer/model/TableStatsHistory.java | 33 +++++++++++ .../model/mapper/ApiModelMapper.java | 58 +++++++++++++++++++ 3 files changed, 95 insertions(+) create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsHistory.java diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java index c8bede225..dc0a16a0c 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java @@ -1,5 +1,6 @@ package com.linkedin.openhouse.optimizer.model; +import java.time.Instant; import java.util.Collections; import java.util.Map; import lombok.AllArgsConstructor; @@ -28,4 +29,7 @@ public class Table { @Builder.Default private Map tableProperties = Collections.emptyMap(); private TableStats stats; + + /** When the current snapshot was last written. Stamped server-side on every upsert. */ + private Instant updatedAt; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsHistory.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsHistory.java new file mode 100644 index 000000000..5cdad1918 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsHistory.java @@ -0,0 +1,33 @@ +package com.linkedin.openhouse.optimizer.model; + +import java.time.Instant; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** + * Internal-model view of an append-only per-commit stats history record. + * + *

One per Iceberg commit. {@link #stats} carries both the snapshot at commit time and the commit + * delta — consumers can reconstruct change rates over arbitrary time windows. + * + *

Pure internal-model type — no references to wire-API or DB types. + */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class TableStatsHistory { + + private String id; + private String tableUuid; + private String databaseName; + private String tableName; + + /** Snapshot + delta for this commit event. */ + private TableStats stats; + + /** When this history row was recorded. */ + private Instant recordedAt; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java index 35af7fb25..d77b3a253 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java @@ -2,12 +2,18 @@ import com.linkedin.openhouse.optimizer.api.model.TableOperationsDto; import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; +import com.linkedin.openhouse.optimizer.api.model.TableStatsDto; +import com.linkedin.openhouse.optimizer.api.model.TableStatsHistoryDto; +import com.linkedin.openhouse.optimizer.api.model.UpsertTableStatsRequest; import com.linkedin.openhouse.optimizer.model.HistoryStatus; import com.linkedin.openhouse.optimizer.model.OperationStatus; import com.linkedin.openhouse.optimizer.model.OperationType; +import com.linkedin.openhouse.optimizer.model.Table; import com.linkedin.openhouse.optimizer.model.TableOperation; import com.linkedin.openhouse.optimizer.model.TableOperationsHistory; import com.linkedin.openhouse.optimizer.model.TableStats; +import com.linkedin.openhouse.optimizer.model.TableStatsHistory; +import java.util.Collections; import org.springframework.stereotype.Component; /** @@ -89,6 +95,58 @@ public TableOperationsHistoryDto toDto(TableOperationsHistory history) { .build(); } + // --- Table <-> TableStatsDto / UpsertTableStatsRequest --- + + /** + * Build an internal-model {@link Table} from a wire upsert request. {@link Table#getUpdatedAt()} + * is intentionally left null — the service stamps it server-side at write time. + */ + public Table toTable(String tableUuid, UpsertTableStatsRequest request) { + if (request == null) { + return null; + } + return Table.builder() + .tableUuid(tableUuid) + .databaseName(request.getDatabaseName()) + .tableId(request.getTableName()) + .tableProperties( + request.getTableProperties() != null + ? request.getTableProperties() + : Collections.emptyMap()) + .stats(toModelStats(request.getStats())) + .build(); + } + + public TableStatsDto toDto(Table table) { + if (table == null) { + return null; + } + return TableStatsDto.builder() + .tableUuid(table.getTableUuid()) + .databaseName(table.getDatabaseName()) + .tableName(table.getTableId()) + .stats(toApiStats(table.getStats())) + .tableProperties(table.getTableProperties()) + .updatedAt(table.getUpdatedAt()) + .build(); + } + + // --- TableStatsHistory <-> TableStatsHistoryDto --- + + public TableStatsHistoryDto toDto(TableStatsHistory history) { + if (history == null) { + return null; + } + return TableStatsHistoryDto.builder() + .id(history.getId()) + .tableUuid(history.getTableUuid()) + .databaseName(history.getDatabaseName()) + .tableName(history.getTableName()) + .stats(toApiStats(history.getStats())) + .recordedAt(history.getRecordedAt()) + .build(); + } + // --- TableStats payload --- public TableStats toModelStats(com.linkedin.openhouse.optimizer.api.model.TableStats apiStats) { From 188713d7479b0d1c0425b9e753e0da2df25915b4 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 15:20:50 -0700 Subject: [PATCH 18/28] docs(optimizer): comment every field on opt-0 api/ and model/ types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Several fields under api/model/ and model/ were left undocumented in the earlier per-layer-types passes. Audit + fill them in: api/model/TableOperationsHistoryDto: databaseName, tableName, operationType — add display/role docs. api/model/HistoryStatus: SUCCESS, FAILED — add enum-value docs. api/model/TableStats inner classes: - SnapshotMetrics: clusterId, tableVersion, tableLocation, tableSizeBytes — add field docs. - CommitDelta: numFilesAdded, numFilesDeleted, addedSizeBytes, deletedSizeBytes — add field docs. model/Table: tableUuid, databaseName, tableId, tableProperties, stats — add field docs. model/TableStats: same field-doc additions on SnapshotMetrics and CommitDelta as the api/ counterpart. model/OperationStatus: PENDING, SCHEDULING, SCHEDULED, CANCELED — add enum-value docs. model/OperationType: ORPHAN_FILES_DELETION — add enum-value doc. model/HistoryStatus: SUCCESS, FAILED — add enum-value docs. model/TableStatsHistory: id, tableUuid, databaseName, tableName — add field docs. --- .../optimizer/api/model/HistoryStatus.java | 4 ++++ .../api/model/TableOperationsHistoryDto.java | 5 +++++ .../optimizer/api/model/TableStats.java | 17 +++++++++++++++++ .../optimizer/model/HistoryStatus.java | 4 ++++ .../optimizer/model/OperationStatus.java | 8 ++++++++ .../optimizer/model/OperationType.java | 2 ++ .../openhouse/optimizer/model/Table.java | 7 +++++++ .../openhouse/optimizer/model/TableStats.java | 17 +++++++++++++++++ .../optimizer/model/TableStatsHistory.java | 7 +++++++ 9 files changed, 71 insertions(+) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatus.java index 2fbcf6235..dc52f863e 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatus.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatus.java @@ -2,6 +2,10 @@ /** Terminal states for a completed Spark maintenance job. */ public enum HistoryStatus { + + /** The Spark job for this operation completed successfully. */ SUCCESS, + + /** The Spark job for this operation failed. */ FAILED } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java index d9fa1f387..4e247c7ce 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java @@ -19,8 +19,13 @@ public class TableOperationsHistoryDto { /** Stable table identity from the Tables Service. */ private String tableUuid; + /** Denormalized database name for display. */ private String databaseName; + + /** Denormalized table name for display. */ private String tableName; + + /** The type of maintenance operation this history row records. */ private OperationType operationType; /** When the operation completed, as recorded by the complete endpoint. */ diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java index de268ffe7..dcb360330 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java @@ -32,10 +32,19 @@ public class TableStats { @AllArgsConstructor @JsonIgnoreProperties(ignoreUnknown = true) public static class SnapshotMetrics { + + /** Cluster the table lives on. */ private String clusterId; + + /** Iceberg metadata version pointer for this snapshot. */ private String tableVersion; + + /** Filesystem path (or URI) of the table's storage root. */ private String tableLocation; + + /** Total on-disk size of the table at this snapshot, in bytes. */ private Long tableSizeBytes; + /** Total number of data files as of the latest snapshot — used for bin-packing. */ private Long numCurrentFiles; } @@ -47,9 +56,17 @@ public static class SnapshotMetrics { @AllArgsConstructor @JsonIgnoreProperties(ignoreUnknown = true) public static class CommitDelta { + + /** Number of data files this commit added to the table. */ private Long numFilesAdded; + + /** Number of data files this commit removed from the table. */ private Long numFilesDeleted; + + /** Total bytes added by this commit. */ private Long addedSizeBytes; + + /** Total bytes removed by this commit. */ private Long deletedSizeBytes; } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java index d29c88719..97b8e2992 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java @@ -8,6 +8,10 @@ *

Intentionally separate from the wire-API and DB representations. */ public enum HistoryStatus { + + /** The operation completed successfully. */ SUCCESS, + + /** The operation failed. */ FAILED } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java index 66f213c73..f284fedaf 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java @@ -8,8 +8,16 @@ *

Intentionally separate from the wire-API and DB representations. */ public enum OperationStatus { + + /** Analyzer has written the row; not yet claimed by the scheduler. */ PENDING, + + /** Scheduler has claimed the row and is launching a job; jobId not yet recorded. */ SCHEDULING, + + /** Job has been submitted to the Jobs Service; the row carries a {@code jobId}. */ SCHEDULED, + + /** Scheduler marked this row as a duplicate of another PENDING row; not claimable. */ CANCELED } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java index bea44018b..8f4fe35a8 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java @@ -6,5 +6,7 @@ * supported operations without churning either boundary. */ public enum OperationType { + + /** Removes orphaned data files no longer referenced by table metadata. */ ORPHAN_FILES_DELETION } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java index dc0a16a0c..bca7e2420 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java @@ -22,12 +22,19 @@ @AllArgsConstructor public class Table { + /** Stable table identity from the Tables Service. Survives renames; rotates on drop+recreate. */ private String tableUuid; + + /** Database the table lives in. */ private String databaseName; + + /** Iceberg table identifier (table name, not UUID). */ private String tableId; + /** Current table-property map (e.g. maintenance opt-in flags). Never null. */ @Builder.Default private Map tableProperties = Collections.emptyMap(); + /** Latest snapshot stats for this table. Delta is null when read from the current-state row. */ private TableStats stats; /** When the current snapshot was last written. Stamped server-side on every upsert. */ diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java index 3b56196ea..94d0a1655 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java @@ -27,10 +27,19 @@ public class TableStats { @AllArgsConstructor @JsonIgnoreProperties(ignoreUnknown = true) public static class SnapshotMetrics { + + /** Cluster the table lives on. */ private String clusterId; + + /** Iceberg metadata version pointer for this snapshot. */ private String tableVersion; + + /** Filesystem path (or URI) of the table's storage root. */ private String tableLocation; + + /** Total on-disk size of the table at this snapshot, in bytes. */ private Long tableSizeBytes; + /** Total number of data files as of the latest snapshot — used for bin-packing. */ private Long numCurrentFiles; } @@ -42,9 +51,17 @@ public static class SnapshotMetrics { @AllArgsConstructor @JsonIgnoreProperties(ignoreUnknown = true) public static class CommitDelta { + + /** Number of data files this commit added to the table. */ private Long numFilesAdded; + + /** Number of data files this commit removed from the table. */ private Long numFilesDeleted; + + /** Total bytes added by this commit. */ private Long addedSizeBytes; + + /** Total bytes removed by this commit. */ private Long deletedSizeBytes; } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsHistory.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsHistory.java index 5cdad1918..53bb54d1e 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsHistory.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsHistory.java @@ -20,9 +20,16 @@ @AllArgsConstructor public class TableStatsHistory { + /** UUID primary key — set by the caller, not generated server-side. */ private String id; + + /** Stable table identity from the Tables Service. */ private String tableUuid; + + /** Denormalized database name for display. */ private String databaseName; + + /** Denormalized table name for display. */ private String tableName; /** Snapshot + delta for this commit event. */ From 8d642732244b002f1f7926ae81e98b27f95b1881 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 15:28:57 -0700 Subject: [PATCH 19/28] refactor(optimizer): remove clusterId from SnapshotMetrics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit clusterId is per-table-immutable in OpenHouse — it never changes after the table is created — so persisting and transmitting it on every snapshot is dead weight. Remove from the wire and internal representations. - api/model/TableStats.SnapshotMetrics: drop clusterId. - model/TableStats.SnapshotMetrics: drop clusterId. - model/mapper/ApiModelMapper: drop the clusterId hop in toModelSnapshot and toApiSnapshot. --- .../com/linkedin/openhouse/optimizer/api/model/TableStats.java | 3 --- .../com/linkedin/openhouse/optimizer/model/TableStats.java | 3 --- .../openhouse/optimizer/model/mapper/ApiModelMapper.java | 2 -- 3 files changed, 8 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java index dcb360330..096eecd1e 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java @@ -33,9 +33,6 @@ public class TableStats { @JsonIgnoreProperties(ignoreUnknown = true) public static class SnapshotMetrics { - /** Cluster the table lives on. */ - private String clusterId; - /** Iceberg metadata version pointer for this snapshot. */ private String tableVersion; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java index 94d0a1655..56291e510 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java @@ -28,9 +28,6 @@ public class TableStats { @JsonIgnoreProperties(ignoreUnknown = true) public static class SnapshotMetrics { - /** Cluster the table lives on. */ - private String clusterId; - /** Iceberg metadata version pointer for this snapshot. */ private String tableVersion; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java index d77b3a253..31141ff44 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java @@ -215,7 +215,6 @@ private TableStats.SnapshotMetrics toModelSnapshot( return null; } return TableStats.SnapshotMetrics.builder() - .clusterId(apiValue.getClusterId()) .tableVersion(apiValue.getTableVersion()) .tableLocation(apiValue.getTableLocation()) .tableSizeBytes(apiValue.getTableSizeBytes()) @@ -229,7 +228,6 @@ private com.linkedin.openhouse.optimizer.api.model.TableStats.SnapshotMetrics to return null; } return com.linkedin.openhouse.optimizer.api.model.TableStats.SnapshotMetrics.builder() - .clusterId(modelValue.getClusterId()) .tableVersion(modelValue.getTableVersion()) .tableLocation(modelValue.getTableLocation()) .tableSizeBytes(modelValue.getTableSizeBytes()) From c72aae8ed9e324591b88cf54f993400370f087b3 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 15:58:31 -0700 Subject: [PATCH 20/28] =?UTF-8?q?refactor(optimizer):=20move=20api?= =?UTF-8?q?=E2=86=94model=20conversion=20onto=20api=20types;=20delete=20Ap?= =?UTF-8?q?iModelMapper?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the api/model boundary mapper with conversion methods on the types themselves. The api layer now imports model/ directly via to/from methods — controllers and other api-edge callers no longer inject a mapper bean. The dependency direction is a strict downward chain: api → model → db api types know about model types (and call model methods); model types know about db types (next round). db remains import-free. No central mapper, no risk of a cycle through a hub class. api/model/* changes (each gets a `toModel()` instance method + a static `fromModel(...)` factory): - TableOperationsDto ↔ model.TableOperation. - TableOperationsHistoryDto ↔ model.TableOperationsHistory. - TableStatsDto ↔ model.Table. - TableStatsHistoryDto ↔ model.TableStatsHistory. - UpsertTableStatsRequest → model.Table (one-way; takes the path-var tableUuid; updatedAt is server-stamped). - TableStats (+ SnapshotMetrics + CommitDelta inner) ↔ model.TableStats. - OperationType / OperationStatus / HistoryStatus (api enums) ↔ model enums. CompleteOperationRequest keeps its fields plain — callers extract `operationId` and `status.toModel()` directly; no wrapper needed. Delete services/optimizer/.../model/mapper/ApiModelMapper.java. --- .../optimizer/api/model/HistoryStatus.java | 12 +- .../optimizer/api/model/OperationStatus.java | 13 +- .../optimizer/api/model/OperationType.java | 12 +- .../api/model/TableOperationsDto.java | 32 +++ .../api/model/TableOperationsHistoryDto.java | 30 ++ .../optimizer/api/model/TableStats.java | 67 +++++ .../optimizer/api/model/TableStatsDto.java | 29 ++ .../api/model/TableStatsHistoryDto.java | 28 ++ .../api/model/UpsertTableStatsRequest.java | 17 ++ .../model/mapper/ApiModelMapper.java | 263 ------------------ 10 files changed, 237 insertions(+), 266 deletions(-) delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatus.java index dc52f863e..0c9ff95da 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatus.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatus.java @@ -7,5 +7,15 @@ public enum HistoryStatus { SUCCESS, /** The Spark job for this operation failed. */ - FAILED + FAILED; + + /** Convert to the internal-model counterpart. */ + public com.linkedin.openhouse.optimizer.model.HistoryStatus toModel() { + return com.linkedin.openhouse.optimizer.model.HistoryStatus.valueOf(name()); + } + + /** Build the api-layer enum from the internal-model counterpart. */ + public static HistoryStatus fromModel(com.linkedin.openhouse.optimizer.model.HistoryStatus v) { + return v == null ? null : HistoryStatus.valueOf(v.name()); + } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationStatus.java index c97be441b..300c28263 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationStatus.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationStatus.java @@ -17,5 +17,16 @@ public enum OperationStatus { * operation_type)}. Only the most-recent PENDING row is claimed; older duplicates are CANCELED * before the claim step. */ - CANCELED + CANCELED; + + /** Convert to the internal-model counterpart. */ + public com.linkedin.openhouse.optimizer.model.OperationStatus toModel() { + return com.linkedin.openhouse.optimizer.model.OperationStatus.valueOf(name()); + } + + /** Build the api-layer enum from the internal-model counterpart. */ + public static OperationStatus fromModel( + com.linkedin.openhouse.optimizer.model.OperationStatus v) { + return v == null ? null : OperationStatus.valueOf(v.name()); + } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationType.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationType.java index 8507bae12..5f325e712 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationType.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationType.java @@ -3,5 +3,15 @@ /** Maintenance operation types supported by the continuous optimizer. */ public enum OperationType { /** Removes orphaned data files no longer referenced by table metadata. */ - ORPHAN_FILES_DELETION + ORPHAN_FILES_DELETION; + + /** Convert to the internal-model counterpart. */ + public com.linkedin.openhouse.optimizer.model.OperationType toModel() { + return com.linkedin.openhouse.optimizer.model.OperationType.valueOf(name()); + } + + /** Build the api-layer enum from the internal-model counterpart. */ + public static OperationType fromModel(com.linkedin.openhouse.optimizer.model.OperationType v) { + return v == null ? null : OperationType.valueOf(v.name()); + } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java index d41bd6906..db8ef1039 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java @@ -1,5 +1,6 @@ package com.linkedin.openhouse.optimizer.api.model; +import com.linkedin.openhouse.optimizer.model.TableOperation; import java.time.Instant; import lombok.AllArgsConstructor; import lombok.Builder; @@ -39,4 +40,35 @@ public class TableOperationsDto { /** Job ID returned by the Jobs Service after successful submission. */ private String jobId; + + /** Convert to the internal-model counterpart. */ + public TableOperation toModel() { + return TableOperation.builder() + .id(id) + .tableUuid(tableUuid) + .databaseName(databaseName) + .tableName(tableName) + .operationType(operationType == null ? null : operationType.toModel()) + .status(status == null ? null : status.toModel()) + .createdAt(createdAt) + .scheduledAt(scheduledAt) + .build(); + } + + /** Build a wire DTO from the internal-model counterpart. */ + public static TableOperationsDto fromModel(TableOperation op) { + if (op == null) { + return null; + } + return TableOperationsDto.builder() + .id(op.getId()) + .tableUuid(op.getTableUuid()) + .databaseName(op.getDatabaseName()) + .tableName(op.getTableName()) + .operationType(OperationType.fromModel(op.getOperationType())) + .status(OperationStatus.fromModel(op.getStatus())) + .createdAt(op.getCreatedAt()) + .scheduledAt(op.getScheduledAt()) + .build(); + } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java index 4e247c7ce..935435040 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java @@ -1,5 +1,6 @@ package com.linkedin.openhouse.optimizer.api.model; +import com.linkedin.openhouse.optimizer.model.TableOperationsHistory; import java.time.Instant; import lombok.AllArgsConstructor; import lombok.Builder; @@ -33,4 +34,33 @@ public class TableOperationsHistoryDto { /** {@code SUCCESS} or {@code FAILED}. */ private HistoryStatus status; + + /** Convert to the internal-model counterpart. */ + public TableOperationsHistory toModel() { + return TableOperationsHistory.builder() + .id(id) + .tableUuid(tableUuid) + .databaseName(databaseName) + .tableName(tableName) + .operationType(operationType == null ? null : operationType.toModel()) + .completedAt(completedAt) + .status(status == null ? null : status.toModel()) + .build(); + } + + /** Build a wire DTO from the internal-model counterpart. */ + public static TableOperationsHistoryDto fromModel(TableOperationsHistory h) { + if (h == null) { + return null; + } + return TableOperationsHistoryDto.builder() + .id(h.getId()) + .tableUuid(h.getTableUuid()) + .databaseName(h.getDatabaseName()) + .tableName(h.getTableName()) + .operationType(OperationType.fromModel(h.getOperationType())) + .completedAt(h.getCompletedAt()) + .status(HistoryStatus.fromModel(h.getStatus())) + .build(); + } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java index 096eecd1e..c75d21d75 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java @@ -25,6 +25,25 @@ public class TableStats { /** Delta fields — accumulated across commit events. */ private CommitDelta delta; + /** Convert to the internal-model counterpart. */ + public com.linkedin.openhouse.optimizer.model.TableStats toModel() { + return com.linkedin.openhouse.optimizer.model.TableStats.builder() + .snapshot(snapshot == null ? null : snapshot.toModel()) + .delta(delta == null ? null : delta.toModel()) + .build(); + } + + /** Build the api-layer payload from the internal-model counterpart. */ + public static TableStats fromModel(com.linkedin.openhouse.optimizer.model.TableStats m) { + if (m == null) { + return null; + } + return TableStats.builder() + .snapshot(SnapshotMetrics.fromModel(m.getSnapshot())) + .delta(CommitDelta.fromModel(m.getDelta())) + .build(); + } + /** Point-in-time metadata read from Iceberg at scan time. */ @Data @Builder(toBuilder = true) @@ -44,6 +63,30 @@ public static class SnapshotMetrics { /** Total number of data files as of the latest snapshot — used for bin-packing. */ private Long numCurrentFiles; + + /** Convert to the internal-model counterpart. */ + public com.linkedin.openhouse.optimizer.model.TableStats.SnapshotMetrics toModel() { + return com.linkedin.openhouse.optimizer.model.TableStats.SnapshotMetrics.builder() + .tableVersion(tableVersion) + .tableLocation(tableLocation) + .tableSizeBytes(tableSizeBytes) + .numCurrentFiles(numCurrentFiles) + .build(); + } + + /** Build the api-layer inner object from the internal-model counterpart. */ + public static SnapshotMetrics fromModel( + com.linkedin.openhouse.optimizer.model.TableStats.SnapshotMetrics m) { + if (m == null) { + return null; + } + return SnapshotMetrics.builder() + .tableVersion(m.getTableVersion()) + .tableLocation(m.getTableLocation()) + .tableSizeBytes(m.getTableSizeBytes()) + .numCurrentFiles(m.getNumCurrentFiles()) + .build(); + } } /** Per-commit incremental counters; accumulated across all recorded commit events. */ @@ -65,5 +108,29 @@ public static class CommitDelta { /** Total bytes removed by this commit. */ private Long deletedSizeBytes; + + /** Convert to the internal-model counterpart. */ + public com.linkedin.openhouse.optimizer.model.TableStats.CommitDelta toModel() { + return com.linkedin.openhouse.optimizer.model.TableStats.CommitDelta.builder() + .numFilesAdded(numFilesAdded) + .numFilesDeleted(numFilesDeleted) + .addedSizeBytes(addedSizeBytes) + .deletedSizeBytes(deletedSizeBytes) + .build(); + } + + /** Build the api-layer inner object from the internal-model counterpart. */ + public static CommitDelta fromModel( + com.linkedin.openhouse.optimizer.model.TableStats.CommitDelta m) { + if (m == null) { + return null; + } + return CommitDelta.builder() + .numFilesAdded(m.getNumFilesAdded()) + .numFilesDeleted(m.getNumFilesDeleted()) + .addedSizeBytes(m.getAddedSizeBytes()) + .deletedSizeBytes(m.getDeletedSizeBytes()) + .build(); + } } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java index 81dd6b802..82dc552c2 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java @@ -1,6 +1,8 @@ package com.linkedin.openhouse.optimizer.api.model; +import com.linkedin.openhouse.optimizer.model.Table; import java.time.Instant; +import java.util.Collections; import java.util.Map; import lombok.AllArgsConstructor; import lombok.Builder; @@ -31,4 +33,31 @@ public class TableStatsDto { /** When this row was last written. Used for staleness monitoring. */ private Instant updatedAt; + + /** Convert to the internal-model counterpart. */ + public Table toModel() { + return Table.builder() + .tableUuid(tableUuid) + .databaseName(databaseName) + .tableId(tableName) + .tableProperties(tableProperties != null ? tableProperties : Collections.emptyMap()) + .stats(stats == null ? null : stats.toModel()) + .updatedAt(updatedAt) + .build(); + } + + /** Build a wire DTO from the internal-model counterpart. */ + public static TableStatsDto fromModel(Table t) { + if (t == null) { + return null; + } + return TableStatsDto.builder() + .tableUuid(t.getTableUuid()) + .databaseName(t.getDatabaseName()) + .tableName(t.getTableId()) + .stats(TableStats.fromModel(t.getStats())) + .tableProperties(t.getTableProperties()) + .updatedAt(t.getUpdatedAt()) + .build(); + } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java index 4a994fdb3..b5f971bbf 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java @@ -1,5 +1,6 @@ package com.linkedin.openhouse.optimizer.api.model; +import com.linkedin.openhouse.optimizer.model.TableStatsHistory; import java.time.Instant; import lombok.AllArgsConstructor; import lombok.Builder; @@ -30,4 +31,31 @@ public class TableStatsHistoryDto { /** When this history row was recorded. */ private Instant recordedAt; + + /** Convert to the internal-model counterpart. */ + public TableStatsHistory toModel() { + return TableStatsHistory.builder() + .id(id) + .tableUuid(tableUuid) + .databaseName(databaseName) + .tableName(tableName) + .stats(stats == null ? null : stats.toModel()) + .recordedAt(recordedAt) + .build(); + } + + /** Build a wire DTO from the internal-model counterpart. */ + public static TableStatsHistoryDto fromModel(TableStatsHistory h) { + if (h == null) { + return null; + } + return TableStatsHistoryDto.builder() + .id(h.getId()) + .tableUuid(h.getTableUuid()) + .databaseName(h.getDatabaseName()) + .tableName(h.getTableName()) + .stats(TableStats.fromModel(h.getStats())) + .recordedAt(h.getRecordedAt()) + .build(); + } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java index 02290bad5..13476543f 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java @@ -1,5 +1,7 @@ package com.linkedin.openhouse.optimizer.api.model; +import com.linkedin.openhouse.optimizer.model.Table; +import java.util.Collections; import java.util.Map; import lombok.AllArgsConstructor; import lombok.Builder; @@ -29,4 +31,19 @@ public class UpsertTableStatsRequest { /** Current table properties snapshot (e.g. maintenance opt-in flags). */ private Map tableProperties; + + /** + * Build the internal-model {@link Table} described by this request. {@code tableUuid} comes from + * the URL path, not the body. {@link Table#getUpdatedAt()} is left {@code null}; the service + * stamps it server-side at write time. + */ + public Table toModel(String tableUuid) { + return Table.builder() + .tableUuid(tableUuid) + .databaseName(databaseName) + .tableId(tableName) + .tableProperties(tableProperties != null ? tableProperties : Collections.emptyMap()) + .stats(stats == null ? null : stats.toModel()) + .build(); + } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java deleted file mode 100644 index 31141ff44..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java +++ /dev/null @@ -1,263 +0,0 @@ -package com.linkedin.openhouse.optimizer.model.mapper; - -import com.linkedin.openhouse.optimizer.api.model.TableOperationsDto; -import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; -import com.linkedin.openhouse.optimizer.api.model.TableStatsDto; -import com.linkedin.openhouse.optimizer.api.model.TableStatsHistoryDto; -import com.linkedin.openhouse.optimizer.api.model.UpsertTableStatsRequest; -import com.linkedin.openhouse.optimizer.model.HistoryStatus; -import com.linkedin.openhouse.optimizer.model.OperationStatus; -import com.linkedin.openhouse.optimizer.model.OperationType; -import com.linkedin.openhouse.optimizer.model.Table; -import com.linkedin.openhouse.optimizer.model.TableOperation; -import com.linkedin.openhouse.optimizer.model.TableOperationsHistory; -import com.linkedin.openhouse.optimizer.model.TableStats; -import com.linkedin.openhouse.optimizer.model.TableStatsHistory; -import java.util.Collections; -import org.springframework.stereotype.Component; - -/** - * Converts between wire-API DTOs and internal {@code model/} domain objects. - * - *

The only place inside {@code model/} where {@code api/} types are referenced — this is the - * boundary at which the internal model meets the wire-API. Pure data types under {@code model/} - * stay free of any api-side imports. - * - *

API-layer enums + payloads are intentionally separate Java types from the internal-model - * counterparts; the two sides evolve independently. This mapper translates by name. - */ -@Component -public class ApiModelMapper { - - // --- TableOperationsDto <-> TableOperation --- - - public TableOperation toOperation(TableOperationsDto dto) { - if (dto == null) { - return null; - } - return TableOperation.builder() - .id(dto.getId()) - .tableUuid(dto.getTableUuid()) - .databaseName(dto.getDatabaseName()) - .tableName(dto.getTableName()) - .operationType(toModelOperationType(dto.getOperationType())) - .status(toModelOperationStatus(dto.getStatus())) - .createdAt(dto.getCreatedAt()) - .scheduledAt(dto.getScheduledAt()) - .build(); - } - - public TableOperationsDto toDto(TableOperation op) { - if (op == null) { - return null; - } - return TableOperationsDto.builder() - .id(op.getId()) - .tableUuid(op.getTableUuid()) - .databaseName(op.getDatabaseName()) - .tableName(op.getTableName()) - .operationType(toApiOperationType(op.getOperationType())) - .status(toApiOperationStatus(op.getStatus())) - .createdAt(op.getCreatedAt()) - .scheduledAt(op.getScheduledAt()) - .build(); - } - - // --- TableOperationsHistoryDto <-> TableOperationsHistory --- - - public TableOperationsHistory toHistory(TableOperationsHistoryDto dto) { - if (dto == null) { - return null; - } - return TableOperationsHistory.builder() - .id(dto.getId()) - .tableUuid(dto.getTableUuid()) - .databaseName(dto.getDatabaseName()) - .tableName(dto.getTableName()) - .operationType(toModelOperationType(dto.getOperationType())) - .completedAt(dto.getCompletedAt()) - .status(toModelHistoryStatus(dto.getStatus())) - .build(); - } - - public TableOperationsHistoryDto toDto(TableOperationsHistory history) { - if (history == null) { - return null; - } - return TableOperationsHistoryDto.builder() - .id(history.getId()) - .tableUuid(history.getTableUuid()) - .databaseName(history.getDatabaseName()) - .tableName(history.getTableName()) - .operationType(toApiOperationType(history.getOperationType())) - .completedAt(history.getCompletedAt()) - .status(toApiHistoryStatus(history.getStatus())) - .build(); - } - - // --- Table <-> TableStatsDto / UpsertTableStatsRequest --- - - /** - * Build an internal-model {@link Table} from a wire upsert request. {@link Table#getUpdatedAt()} - * is intentionally left null — the service stamps it server-side at write time. - */ - public Table toTable(String tableUuid, UpsertTableStatsRequest request) { - if (request == null) { - return null; - } - return Table.builder() - .tableUuid(tableUuid) - .databaseName(request.getDatabaseName()) - .tableId(request.getTableName()) - .tableProperties( - request.getTableProperties() != null - ? request.getTableProperties() - : Collections.emptyMap()) - .stats(toModelStats(request.getStats())) - .build(); - } - - public TableStatsDto toDto(Table table) { - if (table == null) { - return null; - } - return TableStatsDto.builder() - .tableUuid(table.getTableUuid()) - .databaseName(table.getDatabaseName()) - .tableName(table.getTableId()) - .stats(toApiStats(table.getStats())) - .tableProperties(table.getTableProperties()) - .updatedAt(table.getUpdatedAt()) - .build(); - } - - // --- TableStatsHistory <-> TableStatsHistoryDto --- - - public TableStatsHistoryDto toDto(TableStatsHistory history) { - if (history == null) { - return null; - } - return TableStatsHistoryDto.builder() - .id(history.getId()) - .tableUuid(history.getTableUuid()) - .databaseName(history.getDatabaseName()) - .tableName(history.getTableName()) - .stats(toApiStats(history.getStats())) - .recordedAt(history.getRecordedAt()) - .build(); - } - - // --- TableStats payload --- - - public TableStats toModelStats(com.linkedin.openhouse.optimizer.api.model.TableStats apiStats) { - if (apiStats == null) { - return null; - } - return TableStats.builder() - .snapshot(toModelSnapshot(apiStats.getSnapshot())) - .delta(toModelDelta(apiStats.getDelta())) - .build(); - } - - public com.linkedin.openhouse.optimizer.api.model.TableStats toApiStats(TableStats modelStats) { - if (modelStats == null) { - return null; - } - return com.linkedin.openhouse.optimizer.api.model.TableStats.builder() - .snapshot(toApiSnapshot(modelStats.getSnapshot())) - .delta(toApiDelta(modelStats.getDelta())) - .build(); - } - - // --- enum helpers --- - - public OperationType toModelOperationType( - com.linkedin.openhouse.optimizer.api.model.OperationType apiValue) { - return apiValue == null ? null : OperationType.valueOf(apiValue.name()); - } - - public com.linkedin.openhouse.optimizer.api.model.OperationType toApiOperationType( - OperationType modelValue) { - return modelValue == null - ? null - : com.linkedin.openhouse.optimizer.api.model.OperationType.valueOf(modelValue.name()); - } - - public OperationStatus toModelOperationStatus( - com.linkedin.openhouse.optimizer.api.model.OperationStatus apiValue) { - return apiValue == null ? null : OperationStatus.valueOf(apiValue.name()); - } - - public com.linkedin.openhouse.optimizer.api.model.OperationStatus toApiOperationStatus( - OperationStatus modelValue) { - return modelValue == null - ? null - : com.linkedin.openhouse.optimizer.api.model.OperationStatus.valueOf(modelValue.name()); - } - - public HistoryStatus toModelHistoryStatus( - com.linkedin.openhouse.optimizer.api.model.HistoryStatus apiValue) { - return apiValue == null ? null : HistoryStatus.valueOf(apiValue.name()); - } - - public com.linkedin.openhouse.optimizer.api.model.HistoryStatus toApiHistoryStatus( - HistoryStatus modelValue) { - return modelValue == null - ? null - : com.linkedin.openhouse.optimizer.api.model.HistoryStatus.valueOf(modelValue.name()); - } - - // --- TableStats inner classes --- - - private TableStats.SnapshotMetrics toModelSnapshot( - com.linkedin.openhouse.optimizer.api.model.TableStats.SnapshotMetrics apiValue) { - if (apiValue == null) { - return null; - } - return TableStats.SnapshotMetrics.builder() - .tableVersion(apiValue.getTableVersion()) - .tableLocation(apiValue.getTableLocation()) - .tableSizeBytes(apiValue.getTableSizeBytes()) - .numCurrentFiles(apiValue.getNumCurrentFiles()) - .build(); - } - - private com.linkedin.openhouse.optimizer.api.model.TableStats.SnapshotMetrics toApiSnapshot( - TableStats.SnapshotMetrics modelValue) { - if (modelValue == null) { - return null; - } - return com.linkedin.openhouse.optimizer.api.model.TableStats.SnapshotMetrics.builder() - .tableVersion(modelValue.getTableVersion()) - .tableLocation(modelValue.getTableLocation()) - .tableSizeBytes(modelValue.getTableSizeBytes()) - .numCurrentFiles(modelValue.getNumCurrentFiles()) - .build(); - } - - private TableStats.CommitDelta toModelDelta( - com.linkedin.openhouse.optimizer.api.model.TableStats.CommitDelta apiValue) { - if (apiValue == null) { - return null; - } - return TableStats.CommitDelta.builder() - .numFilesAdded(apiValue.getNumFilesAdded()) - .numFilesDeleted(apiValue.getNumFilesDeleted()) - .addedSizeBytes(apiValue.getAddedSizeBytes()) - .deletedSizeBytes(apiValue.getDeletedSizeBytes()) - .build(); - } - - private com.linkedin.openhouse.optimizer.api.model.TableStats.CommitDelta toApiDelta( - TableStats.CommitDelta modelValue) { - if (modelValue == null) { - return null; - } - return com.linkedin.openhouse.optimizer.api.model.TableStats.CommitDelta.builder() - .numFilesAdded(modelValue.getNumFilesAdded()) - .numFilesDeleted(modelValue.getNumFilesDeleted()) - .addedSizeBytes(modelValue.getAddedSizeBytes()) - .deletedSizeBytes(modelValue.getDeletedSizeBytes()) - .build(); - } -} From af23d5ef63ff1e44a483392e6a364c507d4cae34 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 17:02:51 -0700 Subject: [PATCH 21/28] fix(optimizer): make TableStats self-describing; route DTO conversion to TableStats MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit model.TableStats now carries its own identity (tableUuid, databaseName, tableName) and metadata (tableProperties, updatedAt) alongside the snapshot + delta payload. Consumers no longer need an outer wrapper to know which table the stats belong to. api.TableStatsDto.toModel() and api.UpsertTableStatsRequest.toModel() now return model.TableStats (was model.Table). The two types only happened to have the same shape — semantically a DTO for stats is stats, not a table. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../optimizer/api/model/TableStatsDto.java | 31 +++++++++++-------- .../api/model/UpsertTableStatsRequest.java | 17 +++++----- .../openhouse/optimizer/model/TableStats.java | 31 +++++++++++++++++-- 3 files changed, 56 insertions(+), 23 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java index 82dc552c2..244050b04 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java @@ -1,6 +1,5 @@ package com.linkedin.openhouse.optimizer.api.model; -import com.linkedin.openhouse.optimizer.model.Table; import java.time.Instant; import java.util.Collections; import java.util.Map; @@ -35,29 +34,35 @@ public class TableStatsDto { private Instant updatedAt; /** Convert to the internal-model counterpart. */ - public Table toModel() { - return Table.builder() + public com.linkedin.openhouse.optimizer.model.TableStats toModel() { + com.linkedin.openhouse.optimizer.model.TableStats payload = + stats == null ? new com.linkedin.openhouse.optimizer.model.TableStats() : stats.toModel(); + return payload + .toBuilder() .tableUuid(tableUuid) .databaseName(databaseName) - .tableId(tableName) + .tableName(tableName) .tableProperties(tableProperties != null ? tableProperties : Collections.emptyMap()) - .stats(stats == null ? null : stats.toModel()) .updatedAt(updatedAt) .build(); } /** Build a wire DTO from the internal-model counterpart. */ - public static TableStatsDto fromModel(Table t) { - if (t == null) { + public static TableStatsDto fromModel(com.linkedin.openhouse.optimizer.model.TableStats m) { + if (m == null) { return null; } return TableStatsDto.builder() - .tableUuid(t.getTableUuid()) - .databaseName(t.getDatabaseName()) - .tableName(t.getTableId()) - .stats(TableStats.fromModel(t.getStats())) - .tableProperties(t.getTableProperties()) - .updatedAt(t.getUpdatedAt()) + .tableUuid(m.getTableUuid()) + .databaseName(m.getDatabaseName()) + .tableName(m.getTableName()) + .stats( + TableStats.builder() + .snapshot(TableStats.SnapshotMetrics.fromModel(m.getSnapshot())) + .delta(TableStats.CommitDelta.fromModel(m.getDelta())) + .build()) + .tableProperties(m.getTableProperties()) + .updatedAt(m.getUpdatedAt()) .build(); } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java index 13476543f..08b42050f 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java @@ -1,6 +1,5 @@ package com.linkedin.openhouse.optimizer.api.model; -import com.linkedin.openhouse.optimizer.model.Table; import java.util.Collections; import java.util.Map; import lombok.AllArgsConstructor; @@ -33,17 +32,19 @@ public class UpsertTableStatsRequest { private Map tableProperties; /** - * Build the internal-model {@link Table} described by this request. {@code tableUuid} comes from - * the URL path, not the body. {@link Table#getUpdatedAt()} is left {@code null}; the service - * stamps it server-side at write time. + * Build the internal-model {@link com.linkedin.openhouse.optimizer.model.TableStats} described by + * this request. {@code tableUuid} comes from the URL path, not the body. {@code updatedAt} is + * left {@code null}; the service stamps it server-side at write time. */ - public Table toModel(String tableUuid) { - return Table.builder() + public com.linkedin.openhouse.optimizer.model.TableStats toModel(String tableUuid) { + com.linkedin.openhouse.optimizer.model.TableStats payload = + stats == null ? new com.linkedin.openhouse.optimizer.model.TableStats() : stats.toModel(); + return payload + .toBuilder() .tableUuid(tableUuid) .databaseName(databaseName) - .tableId(tableName) + .tableName(tableName) .tableProperties(tableProperties != null ? tableProperties : Collections.emptyMap()) - .stats(stats == null ? null : stats.toModel()) .build(); } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java index 56291e510..906d01669 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java @@ -1,12 +1,24 @@ package com.linkedin.openhouse.optimizer.model; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import java.time.Instant; +import java.util.Collections; +import java.util.Map; import lombok.AllArgsConstructor; import lombok.Builder; import lombok.Data; import lombok.NoArgsConstructor; -/** Combined stats payload stored as a single JSON blob per table. */ +/** + * Self-describing per-table stats record. Carries the table's identity and metadata alongside the + * snapshot + delta payload so consumers don't need an outer wrapper to know which table the stats + * belong to. + * + *

Identity ({@link #tableUuid}, {@link #databaseName}, {@link #tableName}) and metadata ({@link + * #tableProperties}, {@link #updatedAt}) are populated when read from a current-state row. When + * this record is built from a per-commit history row, {@link #delta} is populated and {@link + * #tableProperties} / {@link #updatedAt} are typically {@code null}. + */ @Data @Builder(toBuilder = true) @NoArgsConstructor @@ -14,12 +26,27 @@ @JsonIgnoreProperties(ignoreUnknown = true) public class TableStats { + /** Stable table identity from the Tables Service. Survives renames; rotates on drop+recreate. */ + private String tableUuid; + + /** Database the table lives in. */ + private String databaseName; + + /** Iceberg table name (the human-readable identifier, not the UUID). */ + private String tableName; + + /** Current table-property map (e.g. maintenance opt-in flags). Never null. */ + @Builder.Default private Map tableProperties = Collections.emptyMap(); + /** Snapshot fields — overwritten on every upsert. */ private SnapshotMetrics snapshot; - /** Delta fields — accumulated across commit events. */ + /** Delta fields — accumulated across commit events. Null when read from a current-state row. */ private CommitDelta delta; + /** When the current snapshot was last written. Stamped server-side on every upsert. */ + private Instant updatedAt; + /** Point-in-time metadata read from Iceberg at scan time. */ @Data @Builder(toBuilder = true) From 3aebf64b743fb88b2d92a7d623ed70b5dbdee981 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 17:32:00 -0700 Subject: [PATCH 22/28] chore(optimizer): enable toBuilder on model.Table and model.TableOperationsHistory Moved down from opt-2. The service-layer code (opt-2) uses .toBuilder() on both types; the lombok annotation that enables it belongs on the PR that owns model/. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../main/java/com/linkedin/openhouse/optimizer/model/Table.java | 2 +- .../openhouse/optimizer/model/TableOperationsHistory.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java index bca7e2420..089a52982 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java @@ -17,7 +17,7 @@ * com.linkedin.openhouse.optimizer.model.mapper.ModelDbMapper#toTable} at the DB boundary. */ @Data -@Builder +@Builder(toBuilder = true) @NoArgsConstructor @AllArgsConstructor public class Table { diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java index fe5bee5f7..c8950ee26 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java @@ -13,7 +13,7 @@ * components that need to reason about completed operations (e.g., scheduling-cadence analyzers). */ @Data -@Builder +@Builder(toBuilder = true) @NoArgsConstructor @AllArgsConstructor public class TableOperationsHistory { From b6c7f42774a61214cdabe6d01384b89c685cda35 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Mon, 18 May 2026 10:32:28 -0700 Subject: [PATCH 23/28] refactor(optimizer): drop fileCount enrichment from model.TableOperation TableOperation becomes a pure operation record. Consumers (scheduler) look up TableStats at the point they need it, rather than carrying enrichment data on the model type. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../openhouse/optimizer/model/TableOperation.java | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java index 1f14dddff..fe91c38d0 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java @@ -15,10 +15,6 @@ *

Pure internal-model type — no references to wire-API or DB types. Cross-layer construction * happens via {@link com.linkedin.openhouse.optimizer.model.mapper.ModelDbMapper} (DB boundary) or * {@link com.linkedin.openhouse.optimizer.model.mapper.ApiModelMapper} (API boundary). - * - *

{@link #fileCount} is a non-persisted enrichment populated by consumers that need it (e.g., - * the OFD scheduler reads it from {@code table_stats} for bin-packing). The DB column does not - * carry it. */ @Data @Builder @@ -50,12 +46,6 @@ public class TableOperation { /** When the scheduler last submitted a job for this operation. */ private Instant scheduledAt; - /** - * Number of current data files on the table at evaluation time. Non-persisted enrichment; - * populated by consumers that need it. Null when not enriched. - */ - private Long fileCount; - /** Create a new PENDING operation for the given table and operation type. */ public static TableOperation pending(Table table, OperationType operationType) { return TableOperation.builder() From 437a0ed84a2fa7a53ea827b241404f60d20ac230 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Tue, 19 May 2026 13:35:27 -0700 Subject: [PATCH 24/28] refactor(optimizer): add Dto suffix to all api/model classes (PR #527 review) Per @abhisheknath2011 review comment 3262776356: > "We could change all the internal model add Dto suffix something like > TableOperationsDto. This aligns with the existing services codebase." Renames (suffix added): - CompleteOperationRequest -> CompleteOperationRequestDto - UpsertTableStatsRequest -> UpsertTableStatsRequestDto - OperationType (enum) -> OperationTypeDto - OperationStatus (enum) -> OperationStatusDto - HistoryStatus (enum) -> HistoryStatusDto - TableStats (inner payload) -> TableStatsPayloadDto - TableStats.SnapshotMetrics -> TableStatsPayloadDto.SnapshotMetricsDto - TableStats.CommitDelta -> TableStatsPayloadDto.CommitDeltaDto Cross-reference updates inside api/model. Internal model layer (services/optimizer/.../model/) is intentionally unchanged. Co-Authored-By: Claude Opus 4.7 (1M context) --- ....java => CompleteOperationRequestDto.java} | 6 ++--- ...storyStatus.java => HistoryStatusDto.java} | 6 ++--- ...ionStatus.java => OperationStatusDto.java} | 6 ++--- ...erationType.java => OperationTypeDto.java} | 6 ++--- .../api/model/TableOperationsDto.java | 8 +++--- .../api/model/TableOperationsHistoryDto.java | 8 +++--- .../optimizer/api/model/TableStatsDto.java | 8 +++--- .../api/model/TableStatsHistoryDto.java | 4 +-- ...leStats.java => TableStatsPayloadDto.java} | 27 ++++++++++--------- ...t.java => UpsertTableStatsRequestDto.java} | 4 +-- 10 files changed, 42 insertions(+), 41 deletions(-) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/{CompleteOperationRequest.java => CompleteOperationRequestDto.java} (92%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/{HistoryStatus.java => HistoryStatusDto.java} (73%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/{OperationStatus.java => OperationStatusDto.java} (87%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/{OperationType.java => OperationTypeDto.java} (72%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/{TableStats.java => TableStatsPayloadDto.java} (86%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/{UpsertTableStatsRequest.java => UpsertTableStatsRequestDto.java} (95%) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequestDto.java similarity index 92% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequestDto.java index 0add634b5..0db7a8a37 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequestDto.java @@ -25,13 +25,13 @@ @Builder @NoArgsConstructor @AllArgsConstructor -public class CompleteOperationRequest { +public class CompleteOperationRequestDto { /** Operation row's UUID — the primary lookup key. */ private String operationId; /** Terminal outcome for this single operation. */ - private HistoryStatus status; + private HistoryStatusDto status; /** Debug echo: stable table identity the caller believed it was completing. */ private String tableUuid; @@ -43,5 +43,5 @@ public class CompleteOperationRequest { private String tableName; /** Debug echo: operation type. */ - private OperationType operationType; + private OperationTypeDto operationType; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatusDto.java similarity index 73% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatus.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatusDto.java index 0c9ff95da..5a4421332 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatus.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatusDto.java @@ -1,7 +1,7 @@ package com.linkedin.openhouse.optimizer.api.model; /** Terminal states for a completed Spark maintenance job. */ -public enum HistoryStatus { +public enum HistoryStatusDto { /** The Spark job for this operation completed successfully. */ SUCCESS, @@ -15,7 +15,7 @@ public com.linkedin.openhouse.optimizer.model.HistoryStatus toModel() { } /** Build the api-layer enum from the internal-model counterpart. */ - public static HistoryStatus fromModel(com.linkedin.openhouse.optimizer.model.HistoryStatus v) { - return v == null ? null : HistoryStatus.valueOf(v.name()); + public static HistoryStatusDto fromModel(com.linkedin.openhouse.optimizer.model.HistoryStatus v) { + return v == null ? null : HistoryStatusDto.valueOf(v.name()); } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationStatusDto.java similarity index 87% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationStatus.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationStatusDto.java index 300c28263..89fa9f1b0 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationStatus.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationStatusDto.java @@ -1,7 +1,7 @@ package com.linkedin.openhouse.optimizer.api.model; /** Lifecycle states for a table operation recommendation. */ -public enum OperationStatus { +public enum OperationStatusDto { /** Recommended by the Analyzer but not yet claimed by the Scheduler. */ PENDING, @@ -25,8 +25,8 @@ public com.linkedin.openhouse.optimizer.model.OperationStatus toModel() { } /** Build the api-layer enum from the internal-model counterpart. */ - public static OperationStatus fromModel( + public static OperationStatusDto fromModel( com.linkedin.openhouse.optimizer.model.OperationStatus v) { - return v == null ? null : OperationStatus.valueOf(v.name()); + return v == null ? null : OperationStatusDto.valueOf(v.name()); } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationType.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationTypeDto.java similarity index 72% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationType.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationTypeDto.java index 5f325e712..210010eb0 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationType.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationTypeDto.java @@ -1,7 +1,7 @@ package com.linkedin.openhouse.optimizer.api.model; /** Maintenance operation types supported by the continuous optimizer. */ -public enum OperationType { +public enum OperationTypeDto { /** Removes orphaned data files no longer referenced by table metadata. */ ORPHAN_FILES_DELETION; @@ -11,7 +11,7 @@ public com.linkedin.openhouse.optimizer.model.OperationType toModel() { } /** Build the api-layer enum from the internal-model counterpart. */ - public static OperationType fromModel(com.linkedin.openhouse.optimizer.model.OperationType v) { - return v == null ? null : OperationType.valueOf(v.name()); + public static OperationTypeDto fromModel(com.linkedin.openhouse.optimizer.model.OperationType v) { + return v == null ? null : OperationTypeDto.valueOf(v.name()); } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java index db8ef1039..880fe7926 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java @@ -27,10 +27,10 @@ public class TableOperationsDto { private String tableName; /** The type of maintenance operation (e.g. ORPHAN_FILES_DELETION). */ - private OperationType operationType; + private OperationTypeDto operationType; /** {@code PENDING} or {@code SCHEDULED}. Defaults to {@code PENDING} on creation. */ - private OperationStatus status; + private OperationStatusDto status; /** Server-set when the row is first created by the Analyzer. */ private Instant createdAt; @@ -65,8 +65,8 @@ public static TableOperationsDto fromModel(TableOperation op) { .tableUuid(op.getTableUuid()) .databaseName(op.getDatabaseName()) .tableName(op.getTableName()) - .operationType(OperationType.fromModel(op.getOperationType())) - .status(OperationStatus.fromModel(op.getStatus())) + .operationType(OperationTypeDto.fromModel(op.getOperationType())) + .status(OperationStatusDto.fromModel(op.getStatus())) .createdAt(op.getCreatedAt()) .scheduledAt(op.getScheduledAt()) .build(); diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java index 935435040..652a58b3f 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java @@ -27,13 +27,13 @@ public class TableOperationsHistoryDto { private String tableName; /** The type of maintenance operation this history row records. */ - private OperationType operationType; + private OperationTypeDto operationType; /** When the operation completed, as recorded by the complete endpoint. */ private Instant completedAt; /** {@code SUCCESS} or {@code FAILED}. */ - private HistoryStatus status; + private HistoryStatusDto status; /** Convert to the internal-model counterpart. */ public TableOperationsHistory toModel() { @@ -58,9 +58,9 @@ public static TableOperationsHistoryDto fromModel(TableOperationsHistory h) { .tableUuid(h.getTableUuid()) .databaseName(h.getDatabaseName()) .tableName(h.getTableName()) - .operationType(OperationType.fromModel(h.getOperationType())) + .operationType(OperationTypeDto.fromModel(h.getOperationType())) .completedAt(h.getCompletedAt()) - .status(HistoryStatus.fromModel(h.getStatus())) + .status(HistoryStatusDto.fromModel(h.getStatus())) .build(); } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java index 244050b04..6852081ab 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java @@ -25,7 +25,7 @@ public class TableStatsDto { private String tableName; /** Combined snapshot + delta stats payload, stored as JSON. */ - private TableStats stats; + private TableStatsPayloadDto stats; /** Current table properties snapshot (e.g. maintenance opt-in flags). */ private Map tableProperties; @@ -57,9 +57,9 @@ public static TableStatsDto fromModel(com.linkedin.openhouse.optimizer.model.Tab .databaseName(m.getDatabaseName()) .tableName(m.getTableName()) .stats( - TableStats.builder() - .snapshot(TableStats.SnapshotMetrics.fromModel(m.getSnapshot())) - .delta(TableStats.CommitDelta.fromModel(m.getDelta())) + TableStatsPayloadDto.builder() + .snapshot(TableStatsPayloadDto.SnapshotMetricsDto.fromModel(m.getSnapshot())) + .delta(TableStatsPayloadDto.CommitDeltaDto.fromModel(m.getDelta())) .build()) .tableProperties(m.getTableProperties()) .updatedAt(m.getUpdatedAt()) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java index b5f971bbf..bac3782ff 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java @@ -27,7 +27,7 @@ public class TableStatsHistoryDto { private String tableName; /** Snapshot + delta stats from this commit event. */ - private TableStats stats; + private TableStatsPayloadDto stats; /** When this history row was recorded. */ private Instant recordedAt; @@ -54,7 +54,7 @@ public static TableStatsHistoryDto fromModel(TableStatsHistory h) { .tableUuid(h.getTableUuid()) .databaseName(h.getDatabaseName()) .tableName(h.getTableName()) - .stats(TableStats.fromModel(h.getStats())) + .stats(TableStatsPayloadDto.fromModel(h.getStats())) .recordedAt(h.getRecordedAt()) .build(); } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsPayloadDto.java similarity index 86% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsPayloadDto.java index c75d21d75..692cb7247 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsPayloadDto.java @@ -17,13 +17,13 @@ @NoArgsConstructor @AllArgsConstructor @JsonIgnoreProperties(ignoreUnknown = true) -public class TableStats { +public class TableStatsPayloadDto { /** Snapshot fields — overwritten on every upsert. */ - private SnapshotMetrics snapshot; + private SnapshotMetricsDto snapshot; /** Delta fields — accumulated across commit events. */ - private CommitDelta delta; + private CommitDeltaDto delta; /** Convert to the internal-model counterpart. */ public com.linkedin.openhouse.optimizer.model.TableStats toModel() { @@ -34,13 +34,14 @@ public com.linkedin.openhouse.optimizer.model.TableStats toModel() { } /** Build the api-layer payload from the internal-model counterpart. */ - public static TableStats fromModel(com.linkedin.openhouse.optimizer.model.TableStats m) { + public static TableStatsPayloadDto fromModel( + com.linkedin.openhouse.optimizer.model.TableStats m) { if (m == null) { return null; } - return TableStats.builder() - .snapshot(SnapshotMetrics.fromModel(m.getSnapshot())) - .delta(CommitDelta.fromModel(m.getDelta())) + return TableStatsPayloadDto.builder() + .snapshot(SnapshotMetricsDto.fromModel(m.getSnapshot())) + .delta(CommitDeltaDto.fromModel(m.getDelta())) .build(); } @@ -50,7 +51,7 @@ public static TableStats fromModel(com.linkedin.openhouse.optimizer.model.TableS @NoArgsConstructor @AllArgsConstructor @JsonIgnoreProperties(ignoreUnknown = true) - public static class SnapshotMetrics { + public static class SnapshotMetricsDto { /** Iceberg metadata version pointer for this snapshot. */ private String tableVersion; @@ -75,12 +76,12 @@ public com.linkedin.openhouse.optimizer.model.TableStats.SnapshotMetrics toModel } /** Build the api-layer inner object from the internal-model counterpart. */ - public static SnapshotMetrics fromModel( + public static SnapshotMetricsDto fromModel( com.linkedin.openhouse.optimizer.model.TableStats.SnapshotMetrics m) { if (m == null) { return null; } - return SnapshotMetrics.builder() + return SnapshotMetricsDto.builder() .tableVersion(m.getTableVersion()) .tableLocation(m.getTableLocation()) .tableSizeBytes(m.getTableSizeBytes()) @@ -95,7 +96,7 @@ public static SnapshotMetrics fromModel( @NoArgsConstructor @AllArgsConstructor @JsonIgnoreProperties(ignoreUnknown = true) - public static class CommitDelta { + public static class CommitDeltaDto { /** Number of data files this commit added to the table. */ private Long numFilesAdded; @@ -120,12 +121,12 @@ public com.linkedin.openhouse.optimizer.model.TableStats.CommitDelta toModel() { } /** Build the api-layer inner object from the internal-model counterpart. */ - public static CommitDelta fromModel( + public static CommitDeltaDto fromModel( com.linkedin.openhouse.optimizer.model.TableStats.CommitDelta m) { if (m == null) { return null; } - return CommitDelta.builder() + return CommitDeltaDto.builder() .numFilesAdded(m.getNumFilesAdded()) .numFilesDeleted(m.getNumFilesDeleted()) .addedSizeBytes(m.getAddedSizeBytes()) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequestDto.java similarity index 95% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequestDto.java index 08b42050f..75753fa69 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequestDto.java @@ -17,7 +17,7 @@ @Builder @NoArgsConstructor @AllArgsConstructor -public class UpsertTableStatsRequest { +public class UpsertTableStatsRequestDto { /** Denormalized database name for display. */ private String databaseName; @@ -26,7 +26,7 @@ public class UpsertTableStatsRequest { private String tableName; /** Combined snapshot + delta stats payload from this commit. */ - private TableStats stats; + private TableStatsPayloadDto stats; /** Current table properties snapshot (e.g. maintenance opt-in flags). */ private Map tableProperties; From 4f98c228b6ea661291fb924ed870d41e82757159 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Tue, 19 May 2026 13:56:57 -0700 Subject: [PATCH 25/28] refactor(optimizer): rename api.model package to api.spec (PR #527 review) Per @abhisheknath2011 review comment 3262769497: > "Can we change the client side API to api.spec instead of api.model? > This also aligns with existing services." Mechanical package rename. The 10 api wire types move from services/optimizer/.../api/model/ to services/optimizer/.../api/spec/. No type or signature changes. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../api/{model => spec}/CompleteOperationRequestDto.java | 2 +- .../optimizer/api/{model => spec}/HistoryStatusDto.java | 2 +- .../optimizer/api/{model => spec}/OperationStatusDto.java | 2 +- .../optimizer/api/{model => spec}/OperationTypeDto.java | 2 +- .../optimizer/api/{model => spec}/TableOperationsDto.java | 2 +- .../api/{model => spec}/TableOperationsHistoryDto.java | 2 +- .../openhouse/optimizer/api/{model => spec}/TableStatsDto.java | 2 +- .../optimizer/api/{model => spec}/TableStatsHistoryDto.java | 2 +- .../optimizer/api/{model => spec}/TableStatsPayloadDto.java | 2 +- .../api/{model => spec}/UpsertTableStatsRequestDto.java | 2 +- 10 files changed, 10 insertions(+), 10 deletions(-) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/{model => spec}/CompleteOperationRequestDto.java (96%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/{model => spec}/HistoryStatusDto.java (92%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/{model => spec}/OperationStatusDto.java (95%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/{model => spec}/OperationTypeDto.java (92%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/{model => spec}/TableOperationsDto.java (97%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/{model => spec}/TableOperationsHistoryDto.java (97%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/{model => spec}/TableStatsDto.java (97%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/{model => spec}/TableStatsHistoryDto.java (96%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/{model => spec}/TableStatsPayloadDto.java (98%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/{model => spec}/UpsertTableStatsRequestDto.java (96%) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequestDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/CompleteOperationRequestDto.java similarity index 96% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequestDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/CompleteOperationRequestDto.java index 0db7a8a37..9dca54a8e 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequestDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/CompleteOperationRequestDto.java @@ -1,4 +1,4 @@ -package com.linkedin.openhouse.optimizer.api.model; +package com.linkedin.openhouse.optimizer.api.spec; import lombok.AllArgsConstructor; import lombok.Builder; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatusDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/HistoryStatusDto.java similarity index 92% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatusDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/HistoryStatusDto.java index 5a4421332..034be4cf2 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatusDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/HistoryStatusDto.java @@ -1,4 +1,4 @@ -package com.linkedin.openhouse.optimizer.api.model; +package com.linkedin.openhouse.optimizer.api.spec; /** Terminal states for a completed Spark maintenance job. */ public enum HistoryStatusDto { diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationStatusDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/OperationStatusDto.java similarity index 95% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationStatusDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/OperationStatusDto.java index 89fa9f1b0..f02ee2815 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationStatusDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/OperationStatusDto.java @@ -1,4 +1,4 @@ -package com.linkedin.openhouse.optimizer.api.model; +package com.linkedin.openhouse.optimizer.api.spec; /** Lifecycle states for a table operation recommendation. */ public enum OperationStatusDto { diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationTypeDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/OperationTypeDto.java similarity index 92% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationTypeDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/OperationTypeDto.java index 210010eb0..4e057b232 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationTypeDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/OperationTypeDto.java @@ -1,4 +1,4 @@ -package com.linkedin.openhouse.optimizer.api.model; +package com.linkedin.openhouse.optimizer.api.spec; /** Maintenance operation types supported by the continuous optimizer. */ public enum OperationTypeDto { diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperationsDto.java similarity index 97% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperationsDto.java index 880fe7926..496f59f42 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperationsDto.java @@ -1,4 +1,4 @@ -package com.linkedin.openhouse.optimizer.api.model; +package com.linkedin.openhouse.optimizer.api.spec; import com.linkedin.openhouse.optimizer.model.TableOperation; import java.time.Instant; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperationsHistoryDto.java similarity index 97% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperationsHistoryDto.java index 652a58b3f..8b508bf36 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperationsHistoryDto.java @@ -1,4 +1,4 @@ -package com.linkedin.openhouse.optimizer.api.model; +package com.linkedin.openhouse.optimizer.api.spec; import com.linkedin.openhouse.optimizer.model.TableOperationsHistory; import java.time.Instant; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsDto.java similarity index 97% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsDto.java index 6852081ab..165ae47dc 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsDto.java @@ -1,4 +1,4 @@ -package com.linkedin.openhouse.optimizer.api.model; +package com.linkedin.openhouse.optimizer.api.spec; import java.time.Instant; import java.util.Collections; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsHistoryDto.java similarity index 96% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsHistoryDto.java index bac3782ff..9e7c44c56 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsHistoryDto.java @@ -1,4 +1,4 @@ -package com.linkedin.openhouse.optimizer.api.model; +package com.linkedin.openhouse.optimizer.api.spec; import com.linkedin.openhouse.optimizer.model.TableStatsHistory; import java.time.Instant; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsPayloadDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsPayloadDto.java similarity index 98% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsPayloadDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsPayloadDto.java index 692cb7247..761471f91 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsPayloadDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsPayloadDto.java @@ -1,4 +1,4 @@ -package com.linkedin.openhouse.optimizer.api.model; +package com.linkedin.openhouse.optimizer.api.spec; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import lombok.AllArgsConstructor; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequestDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpsertTableStatsRequestDto.java similarity index 96% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequestDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpsertTableStatsRequestDto.java index 75753fa69..3e1fe4764 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequestDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpsertTableStatsRequestDto.java @@ -1,4 +1,4 @@ -package com.linkedin.openhouse.optimizer.api.model; +package com.linkedin.openhouse.optimizer.api.spec; import java.util.Collections; import java.util.Map; From b31decf8a6cb93351ce5fd153b2740f1ea0329e3 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 20 May 2026 14:51:19 -0700 Subject: [PATCH 26/28] refactor(optimizer): move Dto suffix from api/spec to model MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reversal of an earlier inconsistency surfaced by abhisheknath2011 in the PR #527 review thread on api/spec/HistoryStatusDto.java. The api wire types are the canonical contract; they should carry the canonical name. The internal-model types are transfer objects between layers and now carry the Dto suffix. api/spec/ — Dto stripped from class + filename (10 files): CompleteOperationRequestDto -> CompleteOperationRequest HistoryStatusDto -> HistoryStatus OperationStatusDto -> OperationStatus OperationTypeDto -> OperationType TableOperationsDto -> TableOperations TableOperationsHistoryDto -> TableOperationsHistory TableStatsDto -> TableStats TableStatsHistoryDto -> TableStatsHistory TableStatsPayloadDto -> TableStatsPayload UpsertTableStatsRequestDto -> UpsertTableStatsRequest model/ — Dto added to class + filename (8 files): HistoryStatus -> HistoryStatusDto OperationStatus -> OperationStatusDto OperationType -> OperationTypeDto Table -> TableDto TableOperation -> TableOperationDto TableOperationsHistory -> TableOperationsHistoryDto TableStats -> TableStatsDto TableStatsHistory -> TableStatsHistoryDto Both renames land on opt-0 because opt-0 owns api/spec/ and model/. Cascade up the stack in follow-up commits. Out of scope here: HistoryStatus enum value additions (CANCELED, QUEUED) also raised in the same review thread; separate semantic change. Co-Authored-By: Claude Opus 4.7 --- ...Dto.java => CompleteOperationRequest.java} | 6 ++--- ...storyStatusDto.java => HistoryStatus.java} | 10 ++++---- ...ionStatusDto.java => OperationStatus.java} | 12 +++++----- ...erationTypeDto.java => OperationType.java} | 10 ++++---- ...perationsDto.java => TableOperations.java} | 20 ++++++++-------- ...ryDto.java => TableOperationsHistory.java} | 20 ++++++++-------- .../{TableStatsDto.java => TableStats.java} | 22 +++++++++-------- ...HistoryDto.java => TableStatsHistory.java} | 16 ++++++------- ...PayloadDto.java => TableStatsPayload.java} | 24 +++++++++---------- ...tDto.java => UpsertTableStatsRequest.java} | 16 +++++++------ ...storyStatus.java => HistoryStatusDto.java} | 2 +- ...ionStatus.java => OperationStatusDto.java} | 2 +- ...erationType.java => OperationTypeDto.java} | 2 +- .../model/{Table.java => TableDto.java} | 6 ++--- ...eOperation.java => TableOperationDto.java} | 16 ++++++------- ...ry.java => TableOperationsHistoryDto.java} | 8 +++---- .../{TableStats.java => TableStatsDto.java} | 2 +- ...History.java => TableStatsHistoryDto.java} | 4 ++-- 18 files changed, 101 insertions(+), 97 deletions(-) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/{CompleteOperationRequestDto.java => CompleteOperationRequest.java} (92%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/{HistoryStatusDto.java => HistoryStatus.java} (52%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/{OperationStatusDto.java => OperationStatus.java} (73%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/{OperationTypeDto.java => OperationType.java} (50%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/{TableOperationsDto.java => TableOperations.java} (80%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/{TableOperationsHistoryDto.java => TableOperationsHistory.java} (82%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/{TableStatsDto.java => TableStats.java} (70%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/{TableStatsHistoryDto.java => TableStatsHistory.java} (82%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/{TableStatsPayloadDto.java => TableStatsPayload.java} (81%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/{UpsertTableStatsRequestDto.java => UpsertTableStatsRequest.java} (71%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/{HistoryStatus.java => HistoryStatusDto.java} (93%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/{OperationStatus.java => OperationStatusDto.java} (95%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/{OperationType.java => OperationTypeDto.java} (92%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/{Table.java => TableDto.java} (93%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/{TableOperation.java => TableOperationDto.java} (80%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/{TableOperationsHistory.java => TableOperationsHistoryDto.java} (82%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/{TableStats.java => TableStatsDto.java} (99%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/{TableStatsHistory.java => TableStatsHistoryDto.java} (94%) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/CompleteOperationRequestDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/CompleteOperationRequest.java similarity index 92% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/CompleteOperationRequestDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/CompleteOperationRequest.java index 9dca54a8e..15112882d 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/CompleteOperationRequestDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/CompleteOperationRequest.java @@ -25,13 +25,13 @@ @Builder @NoArgsConstructor @AllArgsConstructor -public class CompleteOperationRequestDto { +public class CompleteOperationRequest { /** Operation row's UUID — the primary lookup key. */ private String operationId; /** Terminal outcome for this single operation. */ - private HistoryStatusDto status; + private HistoryStatus status; /** Debug echo: stable table identity the caller believed it was completing. */ private String tableUuid; @@ -43,5 +43,5 @@ public class CompleteOperationRequestDto { private String tableName; /** Debug echo: operation type. */ - private OperationTypeDto operationType; + private OperationType operationType; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/HistoryStatusDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/HistoryStatus.java similarity index 52% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/HistoryStatusDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/HistoryStatus.java index 034be4cf2..1d799818f 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/HistoryStatusDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/HistoryStatus.java @@ -1,7 +1,7 @@ package com.linkedin.openhouse.optimizer.api.spec; /** Terminal states for a completed Spark maintenance job. */ -public enum HistoryStatusDto { +public enum HistoryStatus { /** The Spark job for this operation completed successfully. */ SUCCESS, @@ -10,12 +10,12 @@ public enum HistoryStatusDto { FAILED; /** Convert to the internal-model counterpart. */ - public com.linkedin.openhouse.optimizer.model.HistoryStatus toModel() { - return com.linkedin.openhouse.optimizer.model.HistoryStatus.valueOf(name()); + public com.linkedin.openhouse.optimizer.model.HistoryStatusDto toModel() { + return com.linkedin.openhouse.optimizer.model.HistoryStatusDto.valueOf(name()); } /** Build the api-layer enum from the internal-model counterpart. */ - public static HistoryStatusDto fromModel(com.linkedin.openhouse.optimizer.model.HistoryStatus v) { - return v == null ? null : HistoryStatusDto.valueOf(v.name()); + public static HistoryStatus fromModel(com.linkedin.openhouse.optimizer.model.HistoryStatusDto v) { + return v == null ? null : HistoryStatus.valueOf(v.name()); } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/OperationStatusDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/OperationStatus.java similarity index 73% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/OperationStatusDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/OperationStatus.java index f02ee2815..b1cbe42b0 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/OperationStatusDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/OperationStatus.java @@ -1,7 +1,7 @@ package com.linkedin.openhouse.optimizer.api.spec; /** Lifecycle states for a table operation recommendation. */ -public enum OperationStatusDto { +public enum OperationStatus { /** Recommended by the Analyzer but not yet claimed by the Scheduler. */ PENDING, @@ -20,13 +20,13 @@ public enum OperationStatusDto { CANCELED; /** Convert to the internal-model counterpart. */ - public com.linkedin.openhouse.optimizer.model.OperationStatus toModel() { - return com.linkedin.openhouse.optimizer.model.OperationStatus.valueOf(name()); + public com.linkedin.openhouse.optimizer.model.OperationStatusDto toModel() { + return com.linkedin.openhouse.optimizer.model.OperationStatusDto.valueOf(name()); } /** Build the api-layer enum from the internal-model counterpart. */ - public static OperationStatusDto fromModel( - com.linkedin.openhouse.optimizer.model.OperationStatus v) { - return v == null ? null : OperationStatusDto.valueOf(v.name()); + public static OperationStatus fromModel( + com.linkedin.openhouse.optimizer.model.OperationStatusDto v) { + return v == null ? null : OperationStatus.valueOf(v.name()); } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/OperationTypeDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/OperationType.java similarity index 50% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/OperationTypeDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/OperationType.java index 4e057b232..ea6d2797c 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/OperationTypeDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/OperationType.java @@ -1,17 +1,17 @@ package com.linkedin.openhouse.optimizer.api.spec; /** Maintenance operation types supported by the continuous optimizer. */ -public enum OperationTypeDto { +public enum OperationType { /** Removes orphaned data files no longer referenced by table metadata. */ ORPHAN_FILES_DELETION; /** Convert to the internal-model counterpart. */ - public com.linkedin.openhouse.optimizer.model.OperationType toModel() { - return com.linkedin.openhouse.optimizer.model.OperationType.valueOf(name()); + public com.linkedin.openhouse.optimizer.model.OperationTypeDto toModel() { + return com.linkedin.openhouse.optimizer.model.OperationTypeDto.valueOf(name()); } /** Build the api-layer enum from the internal-model counterpart. */ - public static OperationTypeDto fromModel(com.linkedin.openhouse.optimizer.model.OperationType v) { - return v == null ? null : OperationTypeDto.valueOf(v.name()); + public static OperationType fromModel(com.linkedin.openhouse.optimizer.model.OperationTypeDto v) { + return v == null ? null : OperationType.valueOf(v.name()); } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperationsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperations.java similarity index 80% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperationsDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperations.java index 496f59f42..60f2c3dd8 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperationsDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperations.java @@ -1,6 +1,6 @@ package com.linkedin.openhouse.optimizer.api.spec; -import com.linkedin.openhouse.optimizer.model.TableOperation; +import com.linkedin.openhouse.optimizer.model.TableOperationDto; import java.time.Instant; import lombok.AllArgsConstructor; import lombok.Builder; @@ -12,7 +12,7 @@ @Builder @NoArgsConstructor @AllArgsConstructor -public class TableOperationsDto { +public class TableOperations { /** Client-generated UUID identifying this specific operation recommendation. */ private String id; @@ -27,10 +27,10 @@ public class TableOperationsDto { private String tableName; /** The type of maintenance operation (e.g. ORPHAN_FILES_DELETION). */ - private OperationTypeDto operationType; + private OperationType operationType; /** {@code PENDING} or {@code SCHEDULED}. Defaults to {@code PENDING} on creation. */ - private OperationStatusDto status; + private OperationStatus status; /** Server-set when the row is first created by the Analyzer. */ private Instant createdAt; @@ -42,8 +42,8 @@ public class TableOperationsDto { private String jobId; /** Convert to the internal-model counterpart. */ - public TableOperation toModel() { - return TableOperation.builder() + public TableOperationDto toModel() { + return TableOperationDto.builder() .id(id) .tableUuid(tableUuid) .databaseName(databaseName) @@ -56,17 +56,17 @@ public TableOperation toModel() { } /** Build a wire DTO from the internal-model counterpart. */ - public static TableOperationsDto fromModel(TableOperation op) { + public static TableOperations fromModel(TableOperationDto op) { if (op == null) { return null; } - return TableOperationsDto.builder() + return TableOperations.builder() .id(op.getId()) .tableUuid(op.getTableUuid()) .databaseName(op.getDatabaseName()) .tableName(op.getTableName()) - .operationType(OperationTypeDto.fromModel(op.getOperationType())) - .status(OperationStatusDto.fromModel(op.getStatus())) + .operationType(OperationType.fromModel(op.getOperationType())) + .status(OperationStatus.fromModel(op.getStatus())) .createdAt(op.getCreatedAt()) .scheduledAt(op.getScheduledAt()) .build(); diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperationsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperationsHistory.java similarity index 82% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperationsHistoryDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperationsHistory.java index 8b508bf36..7a000f840 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperationsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperationsHistory.java @@ -1,6 +1,6 @@ package com.linkedin.openhouse.optimizer.api.spec; -import com.linkedin.openhouse.optimizer.model.TableOperationsHistory; +import com.linkedin.openhouse.optimizer.model.TableOperationsHistoryDto; import java.time.Instant; import lombok.AllArgsConstructor; import lombok.Builder; @@ -12,7 +12,7 @@ @Builder @NoArgsConstructor @AllArgsConstructor -public class TableOperationsHistoryDto { +public class TableOperationsHistory { /** Same UUID as the originating {@code table_operations.id}; supplied by the caller. */ private String id; @@ -27,17 +27,17 @@ public class TableOperationsHistoryDto { private String tableName; /** The type of maintenance operation this history row records. */ - private OperationTypeDto operationType; + private OperationType operationType; /** When the operation completed, as recorded by the complete endpoint. */ private Instant completedAt; /** {@code SUCCESS} or {@code FAILED}. */ - private HistoryStatusDto status; + private HistoryStatus status; /** Convert to the internal-model counterpart. */ - public TableOperationsHistory toModel() { - return TableOperationsHistory.builder() + public TableOperationsHistoryDto toModel() { + return TableOperationsHistoryDto.builder() .id(id) .tableUuid(tableUuid) .databaseName(databaseName) @@ -49,18 +49,18 @@ public TableOperationsHistory toModel() { } /** Build a wire DTO from the internal-model counterpart. */ - public static TableOperationsHistoryDto fromModel(TableOperationsHistory h) { + public static TableOperationsHistory fromModel(TableOperationsHistoryDto h) { if (h == null) { return null; } - return TableOperationsHistoryDto.builder() + return TableOperationsHistory.builder() .id(h.getId()) .tableUuid(h.getTableUuid()) .databaseName(h.getDatabaseName()) .tableName(h.getTableName()) - .operationType(OperationTypeDto.fromModel(h.getOperationType())) + .operationType(OperationType.fromModel(h.getOperationType())) .completedAt(h.getCompletedAt()) - .status(HistoryStatusDto.fromModel(h.getStatus())) + .status(HistoryStatus.fromModel(h.getStatus())) .build(); } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStats.java similarity index 70% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStats.java index 165ae47dc..41f44f763 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStats.java @@ -13,7 +13,7 @@ @Builder @NoArgsConstructor @AllArgsConstructor -public class TableStatsDto { +public class TableStats { /** Stable Iceberg table UUID. Primary key of the stats row. */ private String tableUuid; @@ -25,7 +25,7 @@ public class TableStatsDto { private String tableName; /** Combined snapshot + delta stats payload, stored as JSON. */ - private TableStatsPayloadDto stats; + private TableStatsPayload stats; /** Current table properties snapshot (e.g. maintenance opt-in flags). */ private Map tableProperties; @@ -34,9 +34,11 @@ public class TableStatsDto { private Instant updatedAt; /** Convert to the internal-model counterpart. */ - public com.linkedin.openhouse.optimizer.model.TableStats toModel() { - com.linkedin.openhouse.optimizer.model.TableStats payload = - stats == null ? new com.linkedin.openhouse.optimizer.model.TableStats() : stats.toModel(); + public com.linkedin.openhouse.optimizer.model.TableStatsDto toModel() { + com.linkedin.openhouse.optimizer.model.TableStatsDto payload = + stats == null + ? new com.linkedin.openhouse.optimizer.model.TableStatsDto() + : stats.toModel(); return payload .toBuilder() .tableUuid(tableUuid) @@ -48,18 +50,18 @@ public com.linkedin.openhouse.optimizer.model.TableStats toModel() { } /** Build a wire DTO from the internal-model counterpart. */ - public static TableStatsDto fromModel(com.linkedin.openhouse.optimizer.model.TableStats m) { + public static TableStats fromModel(com.linkedin.openhouse.optimizer.model.TableStatsDto m) { if (m == null) { return null; } - return TableStatsDto.builder() + return TableStats.builder() .tableUuid(m.getTableUuid()) .databaseName(m.getDatabaseName()) .tableName(m.getTableName()) .stats( - TableStatsPayloadDto.builder() - .snapshot(TableStatsPayloadDto.SnapshotMetricsDto.fromModel(m.getSnapshot())) - .delta(TableStatsPayloadDto.CommitDeltaDto.fromModel(m.getDelta())) + TableStatsPayload.builder() + .snapshot(TableStatsPayload.SnapshotMetricsDto.fromModel(m.getSnapshot())) + .delta(TableStatsPayload.CommitDeltaDto.fromModel(m.getDelta())) .build()) .tableProperties(m.getTableProperties()) .updatedAt(m.getUpdatedAt()) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsHistory.java similarity index 82% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsHistoryDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsHistory.java index 9e7c44c56..5508aca27 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsHistory.java @@ -1,6 +1,6 @@ package com.linkedin.openhouse.optimizer.api.spec; -import com.linkedin.openhouse.optimizer.model.TableStatsHistory; +import com.linkedin.openhouse.optimizer.model.TableStatsHistoryDto; import java.time.Instant; import lombok.AllArgsConstructor; import lombok.Builder; @@ -12,7 +12,7 @@ @Builder @NoArgsConstructor @AllArgsConstructor -public class TableStatsHistoryDto { +public class TableStatsHistory { /** UUID primary key set by the caller. */ private String id; @@ -27,14 +27,14 @@ public class TableStatsHistoryDto { private String tableName; /** Snapshot + delta stats from this commit event. */ - private TableStatsPayloadDto stats; + private TableStatsPayload stats; /** When this history row was recorded. */ private Instant recordedAt; /** Convert to the internal-model counterpart. */ - public TableStatsHistory toModel() { - return TableStatsHistory.builder() + public TableStatsHistoryDto toModel() { + return TableStatsHistoryDto.builder() .id(id) .tableUuid(tableUuid) .databaseName(databaseName) @@ -45,16 +45,16 @@ public TableStatsHistory toModel() { } /** Build a wire DTO from the internal-model counterpart. */ - public static TableStatsHistoryDto fromModel(TableStatsHistory h) { + public static TableStatsHistory fromModel(TableStatsHistoryDto h) { if (h == null) { return null; } - return TableStatsHistoryDto.builder() + return TableStatsHistory.builder() .id(h.getId()) .tableUuid(h.getTableUuid()) .databaseName(h.getDatabaseName()) .tableName(h.getTableName()) - .stats(TableStatsPayloadDto.fromModel(h.getStats())) + .stats(TableStatsPayload.fromModel(h.getStats())) .recordedAt(h.getRecordedAt()) .build(); } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsPayloadDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsPayload.java similarity index 81% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsPayloadDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsPayload.java index 761471f91..c347bf385 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsPayloadDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsPayload.java @@ -17,7 +17,7 @@ @NoArgsConstructor @AllArgsConstructor @JsonIgnoreProperties(ignoreUnknown = true) -public class TableStatsPayloadDto { +public class TableStatsPayload { /** Snapshot fields — overwritten on every upsert. */ private SnapshotMetricsDto snapshot; @@ -26,20 +26,20 @@ public class TableStatsPayloadDto { private CommitDeltaDto delta; /** Convert to the internal-model counterpart. */ - public com.linkedin.openhouse.optimizer.model.TableStats toModel() { - return com.linkedin.openhouse.optimizer.model.TableStats.builder() + public com.linkedin.openhouse.optimizer.model.TableStatsDto toModel() { + return com.linkedin.openhouse.optimizer.model.TableStatsDto.builder() .snapshot(snapshot == null ? null : snapshot.toModel()) .delta(delta == null ? null : delta.toModel()) .build(); } /** Build the api-layer payload from the internal-model counterpart. */ - public static TableStatsPayloadDto fromModel( - com.linkedin.openhouse.optimizer.model.TableStats m) { + public static TableStatsPayload fromModel( + com.linkedin.openhouse.optimizer.model.TableStatsDto m) { if (m == null) { return null; } - return TableStatsPayloadDto.builder() + return TableStatsPayload.builder() .snapshot(SnapshotMetricsDto.fromModel(m.getSnapshot())) .delta(CommitDeltaDto.fromModel(m.getDelta())) .build(); @@ -66,8 +66,8 @@ public static class SnapshotMetricsDto { private Long numCurrentFiles; /** Convert to the internal-model counterpart. */ - public com.linkedin.openhouse.optimizer.model.TableStats.SnapshotMetrics toModel() { - return com.linkedin.openhouse.optimizer.model.TableStats.SnapshotMetrics.builder() + public com.linkedin.openhouse.optimizer.model.TableStatsDto.SnapshotMetrics toModel() { + return com.linkedin.openhouse.optimizer.model.TableStatsDto.SnapshotMetrics.builder() .tableVersion(tableVersion) .tableLocation(tableLocation) .tableSizeBytes(tableSizeBytes) @@ -77,7 +77,7 @@ public com.linkedin.openhouse.optimizer.model.TableStats.SnapshotMetrics toModel /** Build the api-layer inner object from the internal-model counterpart. */ public static SnapshotMetricsDto fromModel( - com.linkedin.openhouse.optimizer.model.TableStats.SnapshotMetrics m) { + com.linkedin.openhouse.optimizer.model.TableStatsDto.SnapshotMetrics m) { if (m == null) { return null; } @@ -111,8 +111,8 @@ public static class CommitDeltaDto { private Long deletedSizeBytes; /** Convert to the internal-model counterpart. */ - public com.linkedin.openhouse.optimizer.model.TableStats.CommitDelta toModel() { - return com.linkedin.openhouse.optimizer.model.TableStats.CommitDelta.builder() + public com.linkedin.openhouse.optimizer.model.TableStatsDto.CommitDelta toModel() { + return com.linkedin.openhouse.optimizer.model.TableStatsDto.CommitDelta.builder() .numFilesAdded(numFilesAdded) .numFilesDeleted(numFilesDeleted) .addedSizeBytes(addedSizeBytes) @@ -122,7 +122,7 @@ public com.linkedin.openhouse.optimizer.model.TableStats.CommitDelta toModel() { /** Build the api-layer inner object from the internal-model counterpart. */ public static CommitDeltaDto fromModel( - com.linkedin.openhouse.optimizer.model.TableStats.CommitDelta m) { + com.linkedin.openhouse.optimizer.model.TableStatsDto.CommitDelta m) { if (m == null) { return null; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpsertTableStatsRequestDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpsertTableStatsRequest.java similarity index 71% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpsertTableStatsRequestDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpsertTableStatsRequest.java index 3e1fe4764..d1b4a5fe2 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpsertTableStatsRequestDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpsertTableStatsRequest.java @@ -17,7 +17,7 @@ @Builder @NoArgsConstructor @AllArgsConstructor -public class UpsertTableStatsRequestDto { +public class UpsertTableStatsRequest { /** Denormalized database name for display. */ private String databaseName; @@ -26,19 +26,21 @@ public class UpsertTableStatsRequestDto { private String tableName; /** Combined snapshot + delta stats payload from this commit. */ - private TableStatsPayloadDto stats; + private TableStatsPayload stats; /** Current table properties snapshot (e.g. maintenance opt-in flags). */ private Map tableProperties; /** - * Build the internal-model {@link com.linkedin.openhouse.optimizer.model.TableStats} described by - * this request. {@code tableUuid} comes from the URL path, not the body. {@code updatedAt} is + * Build the internal-model {@link com.linkedin.openhouse.optimizer.model.TableStatsDto} described + * by this request. {@code tableUuid} comes from the URL path, not the body. {@code updatedAt} is * left {@code null}; the service stamps it server-side at write time. */ - public com.linkedin.openhouse.optimizer.model.TableStats toModel(String tableUuid) { - com.linkedin.openhouse.optimizer.model.TableStats payload = - stats == null ? new com.linkedin.openhouse.optimizer.model.TableStats() : stats.toModel(); + public com.linkedin.openhouse.optimizer.model.TableStatsDto toModel(String tableUuid) { + com.linkedin.openhouse.optimizer.model.TableStatsDto payload = + stats == null + ? new com.linkedin.openhouse.optimizer.model.TableStatsDto() + : stats.toModel(); return payload .toBuilder() .tableUuid(tableUuid) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatusDto.java similarity index 93% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatusDto.java index 97b8e2992..463c62605 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatusDto.java @@ -7,7 +7,7 @@ * *

Intentionally separate from the wire-API and DB representations. */ -public enum HistoryStatus { +public enum HistoryStatusDto { /** The operation completed successfully. */ SUCCESS, diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatusDto.java similarity index 95% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatusDto.java index f284fedaf..b766f7dbe 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatusDto.java @@ -7,7 +7,7 @@ * *

Intentionally separate from the wire-API and DB representations. */ -public enum OperationStatus { +public enum OperationStatusDto { /** Analyzer has written the row; not yet claimed by the scheduler. */ PENDING, diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationTypeDto.java similarity index 92% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationTypeDto.java index 8f4fe35a8..39b299806 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationTypeDto.java @@ -5,7 +5,7 @@ * separate from the wire-API and DB representations so the internal model can evolve its set of * supported operations without churning either boundary. */ -public enum OperationType { +public enum OperationTypeDto { /** Removes orphaned data files no longer referenced by table metadata. */ ORPHAN_FILES_DELETION diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableDto.java similarity index 93% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableDto.java index 089a52982..408bc4fc7 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableDto.java @@ -10,7 +10,7 @@ /** * An OpenHouse table enriched with stats and properties, built by combining data sources. Consumed - * by the analyzer (decides whether to produce a {@link TableOperation}) and the scheduler (reads + * by the analyzer (decides whether to produce a {@link TableOperationDto}) and the scheduler (reads * stats for bin-packing). * *

Pure internal-model type — no references to wire-API or DB types. Construct via {@link @@ -20,7 +20,7 @@ @Builder(toBuilder = true) @NoArgsConstructor @AllArgsConstructor -public class Table { +public class TableDto { /** Stable table identity from the Tables Service. Survives renames; rotates on drop+recreate. */ private String tableUuid; @@ -35,7 +35,7 @@ public class Table { @Builder.Default private Map tableProperties = Collections.emptyMap(); /** Latest snapshot stats for this table. Delta is null when read from the current-state row. */ - private TableStats stats; + private TableStatsDto stats; /** When the current snapshot was last written. Stamped server-side on every upsert. */ private Instant updatedAt; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationDto.java similarity index 80% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationDto.java index fe91c38d0..8809a1b62 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationDto.java @@ -20,7 +20,7 @@ @Builder @NoArgsConstructor @AllArgsConstructor -public class TableOperation { +public class TableOperationDto { /** Unique operation ID (UUID). */ private String id; @@ -35,10 +35,10 @@ public class TableOperation { private String tableName; /** Operation type. */ - private OperationType operationType; + private OperationTypeDto operationType; /** Current lifecycle status. */ - private OperationStatus status; + private OperationStatusDto status; /** When this operation record was created. */ private Instant createdAt; @@ -47,21 +47,21 @@ public class TableOperation { private Instant scheduledAt; /** Create a new PENDING operation for the given table and operation type. */ - public static TableOperation pending(Table table, OperationType operationType) { - return TableOperation.builder() + public static TableOperationDto pending(TableDto table, OperationTypeDto operationType) { + return TableOperationDto.builder() .id(UUID.randomUUID().toString()) .tableUuid(table.getTableUuid()) .databaseName(table.getDatabaseName()) .tableName(table.getTableId()) .operationType(operationType) - .status(OperationStatus.PENDING) + .status(OperationStatusDto.PENDING) .createdAt(Instant.now()) .build(); } /** Return the more recently created of two operations. */ - public static TableOperation mostRecent(TableOperation a, TableOperation b) { - Comparator byCreatedAt = + public static TableOperationDto mostRecent(TableOperationDto a, TableOperationDto b) { + Comparator byCreatedAt = Comparator.comparing(r -> r.getCreatedAt() != null ? r.getCreatedAt() : Instant.EPOCH); return byCreatedAt.compare(a, b) >= 0 ? a : b; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistoryDto.java similarity index 82% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistoryDto.java index c8950ee26..e05bb641e 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistoryDto.java @@ -16,7 +16,7 @@ @Builder(toBuilder = true) @NoArgsConstructor @AllArgsConstructor -public class TableOperationsHistory { +public class TableOperationsHistoryDto { /** Same UUID as the originating live-operations row. */ private String id; @@ -31,11 +31,11 @@ public class TableOperationsHistory { private String tableName; /** Operation type for this completed run. */ - private OperationType operationType; + private OperationTypeDto operationType; /** When the operation completed, as recorded by the complete endpoint. */ private Instant completedAt; - /** Terminal outcome: {@link HistoryStatus#SUCCESS} or {@link HistoryStatus#FAILED}. */ - private HistoryStatus status; + /** Terminal outcome: {@link HistoryStatusDto#SUCCESS} or {@link HistoryStatusDto#FAILED}. */ + private HistoryStatusDto status; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsDto.java similarity index 99% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsDto.java index 906d01669..d142dcc8b 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsDto.java @@ -24,7 +24,7 @@ @NoArgsConstructor @AllArgsConstructor @JsonIgnoreProperties(ignoreUnknown = true) -public class TableStats { +public class TableStatsDto { /** Stable table identity from the Tables Service. Survives renames; rotates on drop+recreate. */ private String tableUuid; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsHistory.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsHistoryDto.java similarity index 94% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsHistory.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsHistoryDto.java index 53bb54d1e..5579c95ed 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsHistory.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsHistoryDto.java @@ -18,7 +18,7 @@ @Builder @NoArgsConstructor @AllArgsConstructor -public class TableStatsHistory { +public class TableStatsHistoryDto { /** UUID primary key — set by the caller, not generated server-side. */ private String id; @@ -33,7 +33,7 @@ public class TableStatsHistory { private String tableName; /** Snapshot + delta for this commit event. */ - private TableStats stats; + private TableStatsDto stats; /** When this history row was recorded. */ private Instant recordedAt; From 4e86569ce2e4327665b0d8885276c6b2e048612a Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 20 May 2026 15:16:10 -0700 Subject: [PATCH 27/28] feat(optimizer): propagate jobId through model + api conversions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit model.TableOperationDto grows a jobId field; api.TableOperations conversions copy it across the api ↔ model boundary. The api DTO already had the field; the model side was missing it. Relocated from opt-5 to its proper owner per the model-layer rule. Model ↔ db plumbing for the same field lands on opt-1 in a follow-up. Co-Authored-By: Claude Opus 4.7 --- .../linkedin/openhouse/optimizer/api/spec/TableOperations.java | 2 ++ .../linkedin/openhouse/optimizer/model/TableOperationDto.java | 3 +++ 2 files changed, 5 insertions(+) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperations.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperations.java index 60f2c3dd8..0bca95734 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperations.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperations.java @@ -52,6 +52,7 @@ public TableOperationDto toModel() { .status(status == null ? null : status.toModel()) .createdAt(createdAt) .scheduledAt(scheduledAt) + .jobId(jobId) .build(); } @@ -69,6 +70,7 @@ public static TableOperations fromModel(TableOperationDto op) { .status(OperationStatus.fromModel(op.getStatus())) .createdAt(op.getCreatedAt()) .scheduledAt(op.getScheduledAt()) + .jobId(op.getJobId()) .build(); } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationDto.java index 8809a1b62..4cac14187 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationDto.java @@ -46,6 +46,9 @@ public class TableOperationDto { /** When the scheduler last submitted a job for this operation. */ private Instant scheduledAt; + /** Job ID returned by the Jobs Service after the scheduler submitted; null until SCHEDULED. */ + private String jobId; + /** Create a new PENDING operation for the given table and operation type. */ public static TableOperationDto pending(TableDto table, OperationTypeDto operationType) { return TableOperationDto.builder() From 1fe71f043260d2c5b57c6556cb69ea051f5fafbe Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 20 May 2026 15:25:48 -0700 Subject: [PATCH 28/28] =?UTF-8?q?refactor(optimizer):=20rename=20CompleteO?= =?UTF-8?q?perationRequest=20=E2=86=92=20UpdateOperationRequest?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Symbol rename only. The HistoryStatus enum (SUCCESS/FAILED) and the once-terminal semantics are unchanged; the endpoint's behavior is the same. Future broadening (CANCELED/QUEUED, idempotency, mid-lifecycle status changes) is a separate concern. Method names + URL path will follow on opt-2; Spark-app caller + docs follow on opt-5. Co-Authored-By: Claude Opus 4.7 --- ...nRequest.java => UpdateOperationRequest.java} | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/{CompleteOperationRequest.java => UpdateOperationRequest.java} (70%) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/CompleteOperationRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpdateOperationRequest.java similarity index 70% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/CompleteOperationRequest.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpdateOperationRequest.java index 15112882d..a216e9db3 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/CompleteOperationRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpdateOperationRequest.java @@ -6,26 +6,26 @@ import lombok.NoArgsConstructor; /** - * Request body for {@code POST /v1/table-operations/complete}. + * Request body for {@code POST /v1/table-operations/update}. * - *

Reports the outcome of a single completed operation. The service looks up the operation row by + *

Reports the outcome of a single operation update. The service looks up the operation row by * {@link #operationId} and writes a history entry for it. * *

A single Spark job typically processes N tables and yields N independent (status) outcomes — - * one per operation. Callers issue one complete request per operation; the service does not - * bulk-complete by job. + * one per operation. Callers issue one update request per operation; the service does not + * bulk-update by job. * *

The remaining fields ({@link #tableUuid}, {@link #databaseName}, {@link #tableName}, {@link * #operationType}) are debug-only echo information. The server does not key off them; they are - * preserved on log lines and traces so an operator looking at a failing complete call can see which - * (db, table, operation) the caller believed it was completing without joining back to the - * operation row. + * preserved on log lines and traces so an operator looking at a failing update call can see which + * (db, table, operation) the caller believed it was updating without joining back to the operation + * row. */ @Data @Builder @NoArgsConstructor @AllArgsConstructor -public class CompleteOperationRequest { +public class UpdateOperationRequest { /** Operation row's UUID — the primary lookup key. */ private String operationId;