From 2119555c2fe7748f704dfdf245fb32921349ba52 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Fri, 3 Apr 2026 11:00:58 -0700 Subject: [PATCH 001/104] =?UTF-8?q?feat(optimizer):=20add=20data=20model?= =?UTF-8?q?=20=E2=80=94=20schema,=20entities,=20DTOs,=20converters?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduces the optimizer service module with: - MySQL/H2 schema for table_operations, table_stats, table_stats_history, and table_operations_history - JPA entities with JSON column support (vladmihalcea hibernate-types) - All model/DTO/enum types: OperationType, OperationStatus, TableStats, CompleteOperationRequest, JobResult, OperationMetrics, etc. - JPA AttributeConverters for JobResult and OperationMetrics JSON columns - MapStruct mapper (OptimizerMapper) for entity→DTO conversion - Spring Boot application shell and build wiring (settings.gradle, build.gradle dockerPrereqs) No repositories, controllers, or service layer yet — those follow in subsequent PRs. Co-Authored-By: Claude Opus 4.6 --- build.gradle | 3 + services/optimizer/build.gradle | 17 ++++ .../OptimizerServiceApplication.java | 13 +++ .../optimizer/api/mapper/OptimizerMapper.java | 32 ++++++ .../api/model/CompleteOperationRequest.java | 31 ++++++ .../optimizer/api/model/JobResult.java | 25 +++++ .../api/model/OperationHistoryStatus.java | 7 ++ .../optimizer/api/model/OperationMetrics.java | 24 +++++ .../optimizer/api/model/OperationStatus.java | 21 ++++ .../optimizer/api/model/OperationType.java | 12 +++ .../api/model/TableOperationsDto.java | 40 ++++++++ .../api/model/TableOperationsHistoryDto.java | 43 ++++++++ .../optimizer/api/model/TableStats.java | 48 +++++++++ .../optimizer/api/model/TableStatsDto.java | 23 +++++ .../api/model/TableStatsHistoryDto.java | 22 +++++ .../model/UpsertTableOperationsRequest.java | 26 +++++ .../api/model/UpsertTableStatsRequest.java | 25 +++++ .../optimizer/config/JobResultConverter.java | 39 ++++++++ .../config/OperationMetricsConverter.java | 44 +++++++++ .../entity/TableOperationsHistoryRow.java | 91 +++++++++++++++++ .../optimizer/entity/TableOperationsRow.java | 99 +++++++++++++++++++ .../entity/TableStatsHistoryRow.java | 64 ++++++++++++ .../optimizer/entity/TableStatsRow.java | 57 +++++++++++ .../optimizer/entity/package-info.java | 2 + .../src/main/resources/application.properties | 20 ++++ .../main/resources/db/optimizer-schema.sql | 53 ++++++++++ .../resources/application-test.properties | 12 +++ settings.gradle | 1 + 28 files changed, 894 insertions(+) create mode 100644 services/optimizer/build.gradle create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/OptimizerServiceApplication.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/mapper/OptimizerMapper.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/JobResult.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationHistoryStatus.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationMetrics.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationStatus.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationType.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableOperationsRequest.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/JobResultConverter.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/OperationMetricsConverter.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/package-info.java create mode 100644 services/optimizer/src/main/resources/application.properties create mode 100644 services/optimizer/src/main/resources/db/optimizer-schema.sql create mode 100644 services/optimizer/src/test/resources/application-test.properties diff --git a/build.gradle b/build.gradle index 4699ca592..4cfac4a5d 100644 --- a/build.gradle +++ b/build.gradle @@ -157,6 +157,7 @@ tasks.register('CopyGitHooksTask', Copy) { // tables-service.Dockerfile -> :services:tables:bootJar // housetables-service.Dockerfile -> :services:housetables:bootJar // jobs-service.Dockerfile -> :services:jobs:bootJar +// optimizer-service.Dockerfile -> :services:optimizer:bootJar // jobs-scheduler.Dockerfile -> :apps:openhouse-spark-apps_2.12:shadowJar (uber JAR) // spark-base-hadoop2.8.dockerfile -> // :integrations:spark:spark-3.1:openhouse-spark-runtime_2.12:shadowJar (uber JAR) @@ -176,6 +177,7 @@ tasks.register('dockerPrereqs') { dependsOn ':services:tables:bootJar' dependsOn ':services:housetables:bootJar' dependsOn ':services:jobs:bootJar' + dependsOn ':services:optimizer:bootJar' // Spark runtime uber JARs (shadowJar) dependsOn ':integrations:spark:spark-3.1:openhouse-spark-runtime_2.12:shadowJar' @@ -196,6 +198,7 @@ tasks.register('dockerPrereqs') { println ' build/tables/libs/tables.jar' println ' build/housetables/libs/housetables.jar' println ' build/jobs/libs/jobs.jar' + println ' build/optimizer/libs/optimizer.jar' println ' build/openhouse-spark-runtime_2.12/libs/openhouse-spark-runtime_2.12-uber.jar' println ' build/openhouse-spark-3.5-runtime_2.12/libs/openhouse-spark-3.5-runtime_2.12-uber.jar' println ' build/openhouse-spark-apps_2.12/libs/openhouse-spark-apps_2.12-uber.jar' diff --git a/services/optimizer/build.gradle b/services/optimizer/build.gradle new file mode 100644 index 000000000..c05c7f9c3 --- /dev/null +++ b/services/optimizer/build.gradle @@ -0,0 +1,17 @@ +plugins { + id 'openhouse.springboot-ext-conventions' + id 'org.springframework.boot' version '2.7.8' +} + +dependencies { + implementation 'org.springframework.boot:spring-boot-starter-data-jpa:2.7.8' + implementation 'com.vladmihalcea:hibernate-types-55:2.21.1' + implementation 'org.springframework.boot:spring-boot-starter-web:2.7.8' + implementation 'mysql:mysql-connector-java:8.+' + testImplementation 'com.h2database:h2:2.2.224' + testImplementation 'org.springframework.boot:spring-boot-starter-test:2.7.8' +} + +test { + useJUnitPlatform() +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/OptimizerServiceApplication.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/OptimizerServiceApplication.java new file mode 100644 index 000000000..38eb363a8 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/OptimizerServiceApplication.java @@ -0,0 +1,13 @@ +package com.linkedin.openhouse.optimizer; + +import org.springframework.boot.SpringApplication; +import org.springframework.boot.autoconfigure.SpringBootApplication; + +/** Spring Boot entry point for the Optimizer Service. */ +@SpringBootApplication +public class OptimizerServiceApplication { + + public static void main(String[] args) { + SpringApplication.run(OptimizerServiceApplication.class, args); + } +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/mapper/OptimizerMapper.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/mapper/OptimizerMapper.java new file mode 100644 index 000000000..8c0b17462 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/mapper/OptimizerMapper.java @@ -0,0 +1,32 @@ +package com.linkedin.openhouse.optimizer.api.mapper; + +import com.linkedin.openhouse.optimizer.api.model.TableOperationsDto; +import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; +import com.linkedin.openhouse.optimizer.api.model.TableStatsDto; +import com.linkedin.openhouse.optimizer.api.model.TableStatsHistoryDto; +import com.linkedin.openhouse.optimizer.entity.TableOperationsHistoryRow; +import com.linkedin.openhouse.optimizer.entity.TableOperationsRow; +import com.linkedin.openhouse.optimizer.entity.TableStatsHistoryRow; +import com.linkedin.openhouse.optimizer.entity.TableStatsRow; +import org.mapstruct.Mapper; + +/** + * MapStruct mapper for converting between optimizer JPA entities and their corresponding DTOs. + * + *

Spring-instantiated at compile time. Inject via {@code @Autowired} or constructor injection. + */ +@Mapper(componentModel = "spring") +public interface OptimizerMapper { + + /** Map a {@link TableOperationsRow} to its DTO. */ + TableOperationsDto toDto(TableOperationsRow row); + + /** Map a {@link TableOperationsHistoryRow} to its DTO. */ + TableOperationsHistoryDto toDto(TableOperationsHistoryRow row); + + /** Map a {@link TableStatsRow} to its DTO. */ + TableStatsDto toDto(TableStatsRow row); + + /** Map a {@link TableStatsHistoryRow} to its DTO. */ + TableStatsHistoryDto toDto(TableStatsHistoryRow row); +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java new file mode 100644 index 000000000..c26893197 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java @@ -0,0 +1,31 @@ +package com.linkedin.openhouse.optimizer.api.model; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** + * Request body for {@code POST /v1/table-operations/{id}/complete}. + * + *

Reports the outcome of a completed operation. The backend looks up the operation row by {@code + * id} and writes a history entry with the operation's table metadata and the supplied result. + */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class CompleteOperationRequest { + + /** Outcome of the operation. */ + private OperationHistoryStatus status; + + /** Error details on failure; {@code null} on success. */ + private JobResult result; + + /** Number of orphan files deleted; set by OFD Spark app on success. */ + private Integer orphanFilesDeleted; + + /** Bytes reclaimed by orphan file deletion; set by OFD Spark app on success. */ + private Long orphanBytesDeleted; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/JobResult.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/JobResult.java new file mode 100644 index 000000000..74942243c --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/JobResult.java @@ -0,0 +1,25 @@ +package com.linkedin.openhouse.optimizer.api.model; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** + * Result payload for a completed Spark maintenance job. + * + *

Stored as JSON in the {@code result} column of {@code table_operations_history}. Both fields + * are {@code null} on success; populated on failure. + */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class JobResult { + + /** Human-readable error message; {@code null} if the job succeeded. */ + private String errorMessage; + + /** Error category (e.g., {@code OOM}, {@code TIMEOUT}); {@code null} if the job succeeded. */ + private String errorType; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationHistoryStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationHistoryStatus.java new file mode 100644 index 000000000..791d910a6 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationHistoryStatus.java @@ -0,0 +1,7 @@ +package com.linkedin.openhouse.optimizer.api.model; + +/** Terminal states for a completed Spark maintenance job. */ +public enum OperationHistoryStatus { + SUCCESS, + FAILED +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationMetrics.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationMetrics.java new file mode 100644 index 000000000..d6f788fcc --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationMetrics.java @@ -0,0 +1,24 @@ +package com.linkedin.openhouse.optimizer.api.model; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** + * Denormalized stats snapshot captured by the Analyzer at analysis time. + * + *

Stored as JSON in the {@code metrics} column of {@code table_operations}. These values are + * point-in-time snapshots — they record what the Analyzer saw when it recommended the operation, + * not cumulative totals. + */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class OperationMetrics { + + private Long tableSizeBytes; + private Integer numFilesAdded; + private Integer numFilesDeleted; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationStatus.java new file mode 100644 index 000000000..c97be441b --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationStatus.java @@ -0,0 +1,21 @@ +package com.linkedin.openhouse.optimizer.api.model; + +/** Lifecycle states for a table operation recommendation. */ +public enum OperationStatus { + + /** Recommended by the Analyzer but not yet claimed by the Scheduler. */ + PENDING, + + /** Claimed by the Scheduler; waiting for the Jobs Service to return a job ID. */ + SCHEDULING, + + /** Job submitted to the Jobs Service; the row now carries a {@code jobId}. */ + SCHEDULED, + + /** + * Marked by the Scheduler when it detects duplicate PENDING rows for the same {@code (table_uuid, + * operation_type)}. Only the most-recent PENDING row is claimed; older duplicates are CANCELED + * before the claim step. + */ + CANCELED +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationType.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationType.java new file mode 100644 index 000000000..05e4a1e7b --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationType.java @@ -0,0 +1,12 @@ +package com.linkedin.openhouse.optimizer.api.model; + +/** + * Maintenance operation types supported by the continuous optimizer. + * + *

Only {@code ORPHAN_FILES_DELETION} is currently implemented. Additional types will be added as + * they are built out. + */ +public enum OperationType { + /** Removes orphaned data files no longer referenced by table metadata. */ + ORPHAN_FILES_DELETION +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java new file mode 100644 index 000000000..5eb5eaaa6 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java @@ -0,0 +1,40 @@ +package com.linkedin.openhouse.optimizer.api.model; + +import java.time.Instant; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** DTO for {@code table_operations} — Analyzer recommendations read by the Scheduler. */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class TableOperationsDto { + + /** Client-generated UUID identifying this specific operation recommendation. */ + private String id; + + /** Stable table identity from the Tables Service. */ + private String tableUuid; + + private String databaseName; + private String tableName; + private OperationType operationType; + + /** {@code PENDING} or {@code SCHEDULED}. Defaults to {@code PENDING} on creation. */ + private OperationStatus status; + + /** Server-set when the row is first created by the Analyzer. */ + private Instant createdAt; + + /** Set by the Scheduler when claiming; {@code null} while PENDING. */ + private Instant scheduledAt; + + /** Job ID returned by the Jobs Service after successful submission. */ + private String jobId; + + /** Denormalized stats snapshot captured at analysis time. */ + private OperationMetrics metrics; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java new file mode 100644 index 000000000..7dca34271 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java @@ -0,0 +1,43 @@ +package com.linkedin.openhouse.optimizer.api.model; + +import java.time.Instant; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** DTO for {@code table_operations_history} — append-only Spark job results. */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class TableOperationsHistoryDto { + + /** Same UUID as the originating {@code table_operations.id}; supplied by the caller. */ + private String id; + + /** Stable table identity from the Tables Service. */ + private String tableUuid; + + private String databaseName; + private String tableName; + private OperationType operationType; + + /** When the Spark job was submitted / ran. */ + private Instant submittedAt; + + /** {@code SUCCESS} or {@code FAILED}. */ + private OperationHistoryStatus status; + + /** Spark job ID. */ + private String jobId; + + /** Job result payload; both fields null on success. */ + private JobResult result; + + /** Number of orphan files deleted; null for non-OFD operations or before completion. */ + private Integer orphanFilesDeleted; + + /** Bytes reclaimed by orphan file deletion; null for non-OFD operations. */ + private Long orphanBytesDeleted; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java new file mode 100644 index 000000000..cb77d994f --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java @@ -0,0 +1,48 @@ +package com.linkedin.openhouse.optimizer.api.model; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** Combined stats payload stored as a single JSON blob per table. */ +@Data +@Builder(toBuilder = true) +@NoArgsConstructor +@AllArgsConstructor +@JsonIgnoreProperties(ignoreUnknown = true) +public class TableStats { + + /** Snapshot fields — overwritten on every upsert. */ + private SnapshotMetrics snapshot; + + /** Delta fields — accumulated across commit events. */ + private CommitDelta delta; + + /** Point-in-time metadata read from Iceberg at scan time. */ + @Data + @Builder(toBuilder = true) + @NoArgsConstructor + @AllArgsConstructor + @JsonIgnoreProperties(ignoreUnknown = true) + public static class SnapshotMetrics { + private String clusterId; + private String tableVersion; + private String tableLocation; + private Long tableSizeBytes; + /** Total number of data files as of the latest snapshot — used for bin-packing. */ + private Long numCurrentFiles; + } + + /** Per-commit incremental counters; accumulated across all recorded commit events. */ + @Data + @Builder(toBuilder = true) + @NoArgsConstructor + @AllArgsConstructor + public static class CommitDelta { + private Long numFilesAdded; + private Long numFilesDeleted; + private Long deletedSizeBytes; + } +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java new file mode 100644 index 000000000..1663d5ab0 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java @@ -0,0 +1,23 @@ +package com.linkedin.openhouse.optimizer.api.model; + +import java.time.Instant; +import java.util.Map; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** DTO for {@code table_stats} — used for response payloads. */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class TableStatsDto { + + private String tableUuid; + private String databaseId; + private String tableName; + private TableStats stats; + private Map tableProperties; + private Instant updatedAt; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java new file mode 100644 index 000000000..142f00245 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java @@ -0,0 +1,22 @@ +package com.linkedin.openhouse.optimizer.api.model; + +import java.time.Instant; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** DTO for {@code table_stats_history} — used for response payloads. */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class TableStatsHistoryDto { + + private Long id; + private String tableUuid; + private String databaseId; + private String tableName; + private TableStats stats; + private Instant recordedAt; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableOperationsRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableOperationsRequest.java new file mode 100644 index 000000000..19dd1baac --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableOperationsRequest.java @@ -0,0 +1,26 @@ +package com.linkedin.openhouse.optimizer.api.model; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** + * PUT request body for {@code /v1/table-operations/{id}}. + * + *

The Analyzer supplies the operation {@code id} (client-generated UUID) in the path and all + * table-identifying fields in this body. The service upserts by {@code id}: creates on first call, + * updates {@code metrics} on subsequent calls with the same {@code id}. + */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class UpsertTableOperationsRequest { + + private String tableUuid; + private String databaseName; + private String tableName; + private OperationType operationType; + private OperationMetrics metrics; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java new file mode 100644 index 000000000..3214a85a6 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java @@ -0,0 +1,25 @@ +package com.linkedin.openhouse.optimizer.api.model; + +import java.util.Map; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** + * Request body for {@code PUT /v1/table-stats/{tableUuid}}. + * + *

{@code tableUuid} comes from the path variable. {@code databaseId} and {@code tableName} are + * denormalized display columns carried in the body. + */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class UpsertTableStatsRequest { + + private String databaseId; + private String tableName; + private TableStats stats; + private Map tableProperties; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/JobResultConverter.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/JobResultConverter.java new file mode 100644 index 000000000..4c9bfbe76 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/JobResultConverter.java @@ -0,0 +1,39 @@ +package com.linkedin.openhouse.optimizer.config; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.linkedin.openhouse.optimizer.api.model.JobResult; +import java.io.IOException; +import javax.persistence.AttributeConverter; +import javax.persistence.Converter; + +/** JPA {@link AttributeConverter} that serializes {@link JobResult} to/from a JSON string. */ +@Converter +public class JobResultConverter implements AttributeConverter { + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + @Override + public String convertToDatabaseColumn(JobResult attribute) { + if (attribute == null) { + return null; + } + try { + return OBJECT_MAPPER.writeValueAsString(attribute); + } catch (JsonProcessingException e) { + throw new IllegalStateException("Failed to serialize JobResult to JSON", e); + } + } + + @Override + public JobResult convertToEntityAttribute(String dbData) { + if (dbData == null) { + return null; + } + try { + return OBJECT_MAPPER.readValue(dbData, JobResult.class); + } catch (IOException e) { + throw new IllegalStateException("Failed to deserialize JobResult from JSON: " + dbData, e); + } + } +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/OperationMetricsConverter.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/OperationMetricsConverter.java new file mode 100644 index 000000000..27f0882f5 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/OperationMetricsConverter.java @@ -0,0 +1,44 @@ +package com.linkedin.openhouse.optimizer.config; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.linkedin.openhouse.optimizer.api.model.OperationMetrics; +import java.io.IOException; +import javax.persistence.AttributeConverter; +import javax.persistence.Converter; + +/** + * JPA {@link AttributeConverter} that serializes {@link OperationMetrics} to/from a JSON string. + */ +@Converter +public class OperationMetricsConverter implements AttributeConverter { + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + @Override + public String convertToDatabaseColumn(OperationMetrics attribute) { + // Null metrics are valid for PENDING operations that have not yet produced output. + if (attribute == null) { + return null; + } + try { + return OBJECT_MAPPER.writeValueAsString(attribute); + } catch (JsonProcessingException e) { + throw new IllegalStateException("Failed to serialize OperationMetrics to JSON", e); + } + } + + @Override + public OperationMetrics convertToEntityAttribute(String dbData) { + // Null is stored for PENDING rows; return null so the entity reflects that state. + if (dbData == null) { + return null; + } + try { + return OBJECT_MAPPER.readValue(dbData, OperationMetrics.class); + } catch (IOException e) { + throw new IllegalStateException( + "Failed to deserialize OperationMetrics from JSON: " + dbData, e); + } + } +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java new file mode 100644 index 000000000..6a47b5022 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java @@ -0,0 +1,91 @@ +package com.linkedin.openhouse.optimizer.entity; + +import com.linkedin.openhouse.optimizer.api.model.JobResult; +import com.linkedin.openhouse.optimizer.api.model.OperationHistoryStatus; +import com.linkedin.openhouse.optimizer.api.model.OperationType; +import com.linkedin.openhouse.optimizer.config.JobResultConverter; +import java.time.Instant; +import javax.persistence.Column; +import javax.persistence.Convert; +import javax.persistence.Entity; +import javax.persistence.EnumType; +import javax.persistence.Enumerated; +import javax.persistence.Id; +import javax.persistence.Index; +import javax.persistence.Table; +import lombok.AccessLevel; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.NoArgsConstructor; + +/** + * Append-only record of a completed Spark maintenance job. + * + *

Written by the Spark app after each table's operation finishes. The {@code id} is the same + * UUID as the originating {@code table_operations.id}, tying each history entry directly back to + * the specific operation cycle that produced it. Multiple runs of the same operation on the same + * table produce multiple rows (each cycle gets a new UUID from the Analyzer). + */ +@Entity +@Table( + name = "table_operations_history", + indexes = { + @Index(name = "idx_table_uuid_hist", columnList = "table_uuid"), + @Index(name = "idx_op_type_hist", columnList = "operation_type"), + @Index(name = "idx_submitted_at", columnList = "submitted_at"), + @Index(name = "idx_status_hist", columnList = "status"), + @Index(name = "idx_job_id", columnList = "job_id") + }) +@Getter +@EqualsAndHashCode +@Builder(toBuilder = true) +@NoArgsConstructor(access = AccessLevel.PROTECTED) +@AllArgsConstructor(access = AccessLevel.PROTECTED) +public class TableOperationsHistoryRow { + + /** Same UUID as the originating {@code table_operations.id}. Set by the caller; not generated. */ + @Id + @Column(name = "id", nullable = false, length = 36) + private String id; + + @Column(name = "table_uuid", nullable = false, length = 36) + private String tableUuid; + + @Column(name = "database_name", nullable = false, length = 255) + private String databaseName; + + @Column(name = "table_name", nullable = false, length = 255) + private String tableName; + + @Enumerated(EnumType.STRING) + @Column(name = "operation_type", nullable = false, length = 50) + private OperationType operationType; + + /** When the Spark job was submitted / ran, as reported by the job itself. */ + @Column(name = "submitted_at", nullable = false) + private Instant submittedAt; + + /** {@code SUCCESS} or {@code FAILED}. */ + @Enumerated(EnumType.STRING) + @Column(name = "status", nullable = false, length = 20) + private OperationHistoryStatus status; + + /** Spark job ID; indexed for job → result lookups. */ + @Column(name = "job_id", length = 255) + private String jobId; + + /** Job result: error details on failure, both fields null on success. */ + @Convert(converter = JobResultConverter.class) + @Column(name = "result") + private JobResult result; + + /** Number of orphan files deleted by the Spark job; null for non-OFD operations. */ + @Column(name = "orphan_files_deleted") + private Integer orphanFilesDeleted; + + /** Bytes reclaimed by orphan file deletion; null for non-OFD operations. */ + @Column(name = "orphan_bytes_deleted") + private Long orphanBytesDeleted; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java new file mode 100644 index 000000000..9d835aa20 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java @@ -0,0 +1,99 @@ +package com.linkedin.openhouse.optimizer.entity; + +import com.linkedin.openhouse.optimizer.api.model.OperationMetrics; +import com.linkedin.openhouse.optimizer.api.model.OperationStatus; +import com.linkedin.openhouse.optimizer.api.model.OperationType; +import com.linkedin.openhouse.optimizer.config.OperationMetricsConverter; +import java.time.Instant; +import javax.persistence.Column; +import javax.persistence.Convert; +import javax.persistence.Entity; +import javax.persistence.EnumType; +import javax.persistence.Enumerated; +import javax.persistence.Id; +import javax.persistence.Index; +import javax.persistence.Table; +import lombok.AccessLevel; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.NoArgsConstructor; + +/** + * JPA entity representing an Analyzer recommendation for a table maintenance operation. + * + *

Each row is identified by a client-generated UUID ({@code id}). The Analyzer creates a new row + * when it first recommends an operation for a table, or when re-recommending after a prior terminal + * state (SUCCESS/FAILED). Old terminal rows accumulate — they serve as implicit history. {@code + * table_uuid} is the stable identity for the table (survives renames; rotates on drop+recreate). + * The application enforces one active (PENDING or SCHEDULED) row per {@code (table_uuid, + * operation_type)} at a time. + */ +@Entity +@Table( + name = "table_operations", + indexes = { + @Index(name = "idx_table_uuid", columnList = "table_uuid"), + @Index(name = "idx_op_type", columnList = "operation_type"), + @Index(name = "idx_status", columnList = "status"), + @Index(name = "idx_created_at", columnList = "created_at"), + @Index(name = "idx_scheduled_at", columnList = "scheduled_at") + }) +@Getter +@EqualsAndHashCode +@Builder(toBuilder = true) +@NoArgsConstructor(access = AccessLevel.PROTECTED) +@AllArgsConstructor(access = AccessLevel.PROTECTED) +public class TableOperationsRow { + + /** Client-generated UUID identifying this specific operation recommendation. */ + @Id + @Column(name = "id", nullable = false, length = 36) + private String id; + + /** Stable table identity from the Tables Service. Survives renames; rotates on drop+recreate. */ + @Column(name = "table_uuid", nullable = false, length = 36) + private String tableUuid; + + @Column(name = "database_name", nullable = false, length = 255) + private String databaseName; + + @Column(name = "table_name", nullable = false, length = 255) + private String tableName; + + @Enumerated(EnumType.STRING) + @Column(name = "operation_type", nullable = false, length = 50) + private OperationType operationType; + + @Enumerated(EnumType.STRING) + @Column(name = "status", nullable = false, length = 20) + private OperationStatus status; + + /** When the Analyzer first created this row. Set by the service on insert; never updated. */ + @Column(name = "created_at", nullable = false) + private Instant createdAt; + + /** Set when the operation is claimed; {@code null} while {@code PENDING}. */ + @Column(name = "scheduled_at") + private Instant scheduledAt; + + /** Job ID returned by the Jobs Service after successful submission. */ + @Column(name = "job_id", length = 255) + private String jobId; + + /** + * Manual optimistic lock for the Scheduler claim. Incremented by the raw {@code claimOperation} + * UPDATE query; must NOT use JPA {@code @Version} since the claim bypasses JPA entity management. + */ + @Column(name = "version") + private Long version; + + /** + * Denormalized stats snapshot captured at analysis time: table size, snapshot count, and file + * counts as of the moment the Analyzer ran. + */ + @Convert(converter = OperationMetricsConverter.class) + @Column(name = "metrics") + private OperationMetrics metrics; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java new file mode 100644 index 000000000..85d97a5eb --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java @@ -0,0 +1,64 @@ +package com.linkedin.openhouse.optimizer.entity; + +import com.linkedin.openhouse.optimizer.api.model.TableStats; +import com.vladmihalcea.hibernate.type.json.JsonStringType; +import java.time.Instant; +import javax.persistence.Column; +import javax.persistence.Entity; +import javax.persistence.GeneratedValue; +import javax.persistence.GenerationType; +import javax.persistence.Id; +import javax.persistence.Index; +import javax.persistence.Table; +import lombok.AccessLevel; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.NoArgsConstructor; +import org.hibernate.annotations.Type; +import org.hibernate.annotations.TypeDef; + +/** + * Append-only record of per-commit stats reported by the Tables Service. + * + *

Each Iceberg commit produces one row. The {@code stats} JSON contains both the snapshot + * metrics (point-in-time) and the commit delta (files added/deleted in this commit). Consumers can + * query this table to reconstruct change rates over arbitrary time windows. + */ +@TypeDef(name = "json", typeClass = JsonStringType.class) +@Entity +@Table( + name = "table_stats_history", + indexes = { + @Index(name = "idx_tsh_table_uuid", columnList = "table_uuid"), + @Index(name = "idx_tsh_recorded_at", columnList = "recorded_at") + }) +@Getter +@EqualsAndHashCode +@Builder(toBuilder = true) +@NoArgsConstructor(access = AccessLevel.PROTECTED) +@AllArgsConstructor(access = AccessLevel.PROTECTED) +public class TableStatsHistoryRow { + + @Id + @GeneratedValue(strategy = GenerationType.IDENTITY) + @Column(name = "id", nullable = false) + private Long id; + + @Column(name = "table_uuid", nullable = false, length = 36) + private String tableUuid; + + @Column(name = "database_id", nullable = false, length = 255) + private String databaseId; + + @Column(name = "table_name", nullable = false, length = 255) + private String tableName; + + @Type(type = "json") + @Column(name = "stats", columnDefinition = "TEXT") + private TableStats stats; + + @Column(name = "recorded_at", nullable = false) + private Instant recordedAt; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java new file mode 100644 index 000000000..71d6a9421 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java @@ -0,0 +1,57 @@ +package com.linkedin.openhouse.optimizer.entity; + +import com.linkedin.openhouse.optimizer.api.model.TableStats; +import com.vladmihalcea.hibernate.type.json.JsonStringType; +import java.time.Instant; +import java.util.Map; +import javax.persistence.Column; +import javax.persistence.Entity; +import javax.persistence.Id; +import javax.persistence.Table; +import lombok.AccessLevel; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.NoArgsConstructor; +import org.hibernate.annotations.Type; +import org.hibernate.annotations.TypeDef; + +/** + * JPA entity representing a per-table stats snapshot in the optimizer DB. + * + *

Written by the Tables Service on every Iceberg commit. Read by the Analyzer directly via JPA + * to enumerate tables and check scheduling eligibility. + */ +@TypeDef(name = "json", typeClass = JsonStringType.class) +@Entity +@Table(name = "table_stats") +@Getter +@EqualsAndHashCode +@Builder(toBuilder = true) +@NoArgsConstructor(access = AccessLevel.PROTECTED) +@AllArgsConstructor(access = AccessLevel.PROTECTED) +public class TableStatsRow { + + @Id + @Column(name = "table_uuid", nullable = false, length = 36) + private String tableUuid; + + @Column(name = "database_id", nullable = false, length = 255) + private String databaseId; + + @Column(name = "table_name", nullable = false, length = 255) + private String tableName; + + @Type(type = "json") + @Column(name = "stats", columnDefinition = "TEXT") + private TableStats stats; + + @Type(type = "json") + @Column(name = "table_properties", columnDefinition = "TEXT") + private Map tableProperties; + + /** Set on every upsert. Used for stats pipeline staleness monitoring. */ + @Column(name = "updated_at", nullable = false) + private Instant updatedAt; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/package-info.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/package-info.java new file mode 100644 index 000000000..7c0ca1f67 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/package-info.java @@ -0,0 +1,2 @@ +/** JPA entities for the optimizer service. */ +package com.linkedin.openhouse.optimizer.entity; diff --git a/services/optimizer/src/main/resources/application.properties b/services/optimizer/src/main/resources/application.properties new file mode 100644 index 000000000..c6c3f8437 --- /dev/null +++ b/services/optimizer/src/main/resources/application.properties @@ -0,0 +1,20 @@ +spring.application.name=openhouse-optimizer-service +server.port=8080 + +spring.jpa.hibernate.ddl-auto=none +spring.sql.init.mode=always +spring.jpa.defer-datasource-initialization=true +spring.sql.init.schema-locations=classpath:db/optimizer-schema.sql + +spring.jpa.properties.hibernate.dialect=org.hibernate.dialect.MySQL8Dialect +spring.jpa.properties.hibernate.show_sql=false +spring.jpa.properties.hibernate.physical_naming_strategy=org.hibernate.boot.model.naming.PhysicalNamingStrategyStandardImpl + +spring.datasource.driver-class-name=com.mysql.cj.jdbc.Driver +spring.datasource.url=${OPTIMIZER_DB_URL:jdbc:mysql://localhost:3306/oh_db} +spring.datasource.username=${OPTIMIZER_DB_USERNAME:oh_user} +spring.datasource.password=${OPTIMIZER_DB_PASSWORD:oh_password} +spring.datasource.hikari.maximum-pool-size=20 + +management.endpoints.web.exposure.include=health,prometheus +management.endpoint.health.enabled=true diff --git a/services/optimizer/src/main/resources/db/optimizer-schema.sql b/services/optimizer/src/main/resources/db/optimizer-schema.sql new file mode 100644 index 000000000..53062c5ad --- /dev/null +++ b/services/optimizer/src/main/resources/db/optimizer-schema.sql @@ -0,0 +1,53 @@ +-- Optimizer Service Schema +-- Compatible with MySQL (production) and H2 in MySQL mode (tests). +CREATE TABLE IF NOT EXISTS table_operations ( + id VARCHAR(36) NOT NULL, + table_uuid VARCHAR(36) NOT NULL, + database_name VARCHAR(255) NOT NULL, + table_name VARCHAR(255) NOT NULL, + operation_type VARCHAR(50) NOT NULL, + status VARCHAR(20) NOT NULL, + created_at TIMESTAMP(6) NOT NULL, + scheduled_at TIMESTAMP(6), + job_id VARCHAR(255), + version BIGINT, + metrics TEXT, + PRIMARY KEY (id) +); + +CREATE TABLE IF NOT EXISTS table_stats ( + table_uuid VARCHAR(36) NOT NULL, + database_id VARCHAR(255) NOT NULL, + table_name VARCHAR(255) NOT NULL, + stats TEXT, + table_properties TEXT, + updated_at TIMESTAMP(6) NOT NULL, + PRIMARY KEY (table_uuid) +); + +CREATE TABLE IF NOT EXISTS table_stats_history ( + id BIGINT NOT NULL AUTO_INCREMENT, + table_uuid VARCHAR(36) NOT NULL, + database_id VARCHAR(255) NOT NULL, + table_name VARCHAR(255) NOT NULL, + stats TEXT, + recorded_at TIMESTAMP(6) NOT NULL, + PRIMARY KEY (id), + INDEX idx_tsh_table_uuid (table_uuid), + INDEX idx_tsh_recorded_at (recorded_at) +); + +CREATE TABLE IF NOT EXISTS table_operations_history ( + id VARCHAR(36) NOT NULL, + table_uuid VARCHAR(36) NOT NULL, + database_name VARCHAR(255) NOT NULL, + table_name VARCHAR(255) NOT NULL, + operation_type VARCHAR(50) NOT NULL, + submitted_at TIMESTAMP(6) NOT NULL, + status VARCHAR(20) NOT NULL, + job_id VARCHAR(255), + result TEXT, + orphan_files_deleted INT, + orphan_bytes_deleted BIGINT, + PRIMARY KEY (id) +); diff --git a/services/optimizer/src/test/resources/application-test.properties b/services/optimizer/src/test/resources/application-test.properties new file mode 100644 index 000000000..97b7841dc --- /dev/null +++ b/services/optimizer/src/test/resources/application-test.properties @@ -0,0 +1,12 @@ +spring.datasource.url=jdbc:h2:mem:optimizer_test;MODE=MySQL;DATABASE_TO_LOWER=TRUE;DB_CLOSE_DELAY=-1 +spring.datasource.driver-class-name=org.h2.Driver +spring.datasource.username=sa +spring.datasource.password= + +spring.jpa.hibernate.ddl-auto=none +spring.sql.init.mode=always +spring.jpa.defer-datasource-initialization=true +spring.jpa.properties.hibernate.dialect=org.hibernate.dialect.H2Dialect +spring.jpa.properties.hibernate.physical_naming_strategy=org.hibernate.boot.model.naming.PhysicalNamingStrategyStandardImpl + +spring.sql.init.schema-locations=classpath:db/optimizer-schema.sql diff --git a/settings.gradle b/settings.gradle index 035e54349..cad06785e 100644 --- a/settings.gradle +++ b/settings.gradle @@ -49,6 +49,7 @@ include ':libs:datalayout' include ':services:common' include ':services:housetables' include ':services:jobs' +include ':services:optimizer' include ':services:tables' include ':tables-test-fixtures:tables-test-fixtures-iceberg-1.2' include ':tables-test-fixtures:tables-test-fixtures-iceberg-1.5' From 3c93d52f21ce82cc01ae37fef8ca5c1dba2522e1 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Fri, 3 Apr 2026 11:35:45 -0700 Subject: [PATCH 002/104] fix: address PR review feedback on optimizer data model - Remove OperationMetrics class and converter; stats are read directly from table_stats instead of duplicating into operations - Remove orphanFilesDeleted/orphanBytesDeleted from history entity, DTO, and schema; operation-specific data belongs in the result JSON - Add addedSizeBytes to CommitDelta for tracking write volume - Fix OperationType javadoc to describe current state, not roadmap - Fix TableOperationsHistoryRow javadoc: written on operation complete, not by Spark app directly - Add field comments to all DTOs and request objects Co-Authored-By: Claude Opus 4.6 --- .../optimizer/api/model/OperationMetrics.java | 24 ---------- .../optimizer/api/model/OperationType.java | 7 +-- .../api/model/TableOperationsDto.java | 9 +++- .../api/model/TableOperationsHistoryDto.java | 12 ++--- .../optimizer/api/model/TableStats.java | 1 + .../optimizer/api/model/TableStatsDto.java | 11 +++++ .../api/model/TableStatsHistoryDto.java | 11 +++++ .../model/UpsertTableOperationsRequest.java | 11 +++-- .../api/model/UpsertTableStatsRequest.java | 7 +++ .../config/OperationMetricsConverter.java | 44 ------------------- .../entity/TableOperationsHistoryRow.java | 20 +++------ .../optimizer/entity/TableOperationsRow.java | 10 ++--- .../main/resources/db/optimizer-schema.sql | 2 - 13 files changed, 58 insertions(+), 111 deletions(-) delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationMetrics.java delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/OperationMetricsConverter.java diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationMetrics.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationMetrics.java deleted file mode 100644 index d6f788fcc..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationMetrics.java +++ /dev/null @@ -1,24 +0,0 @@ -package com.linkedin.openhouse.optimizer.api.model; - -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.Data; -import lombok.NoArgsConstructor; - -/** - * Denormalized stats snapshot captured by the Analyzer at analysis time. - * - *

Stored as JSON in the {@code metrics} column of {@code table_operations}. These values are - * point-in-time snapshots — they record what the Analyzer saw when it recommended the operation, - * not cumulative totals. - */ -@Data -@Builder -@NoArgsConstructor -@AllArgsConstructor -public class OperationMetrics { - - private Long tableSizeBytes; - private Integer numFilesAdded; - private Integer numFilesDeleted; -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationType.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationType.java index 05e4a1e7b..8507bae12 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationType.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationType.java @@ -1,11 +1,6 @@ package com.linkedin.openhouse.optimizer.api.model; -/** - * Maintenance operation types supported by the continuous optimizer. - * - *

Only {@code ORPHAN_FILES_DELETION} is currently implemented. Additional types will be added as - * they are built out. - */ +/** Maintenance operation types supported by the continuous optimizer. */ public enum OperationType { /** Removes orphaned data files no longer referenced by table metadata. */ ORPHAN_FILES_DELETION diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java index 5eb5eaaa6..9c33d8907 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java @@ -19,8 +19,13 @@ public class TableOperationsDto { /** Stable table identity from the Tables Service. */ private String tableUuid; + /** Denormalized database name for display; not part of the primary key. */ private String databaseName; + + /** Denormalized table name for display; not part of the primary key. */ private String tableName; + + /** The type of maintenance operation (e.g. ORPHAN_FILES_DELETION). */ private OperationType operationType; /** {@code PENDING} or {@code SCHEDULED}. Defaults to {@code PENDING} on creation. */ @@ -35,6 +40,6 @@ public class TableOperationsDto { /** Job ID returned by the Jobs Service after successful submission. */ private String jobId; - /** Denormalized stats snapshot captured at analysis time. */ - private OperationMetrics metrics; + /** Reserved for future per-operation metadata; currently unused. */ + private String metrics; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java index 7dca34271..efc9bebbb 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java @@ -6,7 +6,7 @@ import lombok.Data; import lombok.NoArgsConstructor; -/** DTO for {@code table_operations_history} — append-only Spark job results. */ +/** DTO for {@code table_operations_history} — append-only operation results. */ @Data @Builder @NoArgsConstructor @@ -23,21 +23,15 @@ public class TableOperationsHistoryDto { private String tableName; private OperationType operationType; - /** When the Spark job was submitted / ran. */ + /** When the operation completed, as recorded by the complete endpoint. */ private Instant submittedAt; /** {@code SUCCESS} or {@code FAILED}. */ private OperationHistoryStatus status; - /** Spark job ID. */ + /** Job ID from the Jobs Service. */ private String jobId; /** Job result payload; both fields null on success. */ private JobResult result; - - /** Number of orphan files deleted; null for non-OFD operations or before completion. */ - private Integer orphanFilesDeleted; - - /** Bytes reclaimed by orphan file deletion; null for non-OFD operations. */ - private Long orphanBytesDeleted; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java index cb77d994f..51aa8a712 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java @@ -43,6 +43,7 @@ public static class SnapshotMetrics { public static class CommitDelta { private Long numFilesAdded; private Long numFilesDeleted; + private Long addedSizeBytes; private Long deletedSizeBytes; } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java index 1663d5ab0..a668af434 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java @@ -14,10 +14,21 @@ @AllArgsConstructor public class TableStatsDto { + /** Stable Iceberg table UUID. Primary key of the stats row. */ private String tableUuid; + + /** Denormalized database name for display. */ private String databaseId; + + /** Denormalized table name for display. */ private String tableName; + + /** Combined snapshot + delta stats payload, stored as JSON. */ private TableStats stats; + + /** Current table properties snapshot (e.g. maintenance opt-in flags). */ private Map tableProperties; + + /** When this row was last written. Used for staleness monitoring. */ private Instant updatedAt; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java index 142f00245..0604e07de 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java @@ -13,10 +13,21 @@ @AllArgsConstructor public class TableStatsHistoryDto { + /** Auto-increment primary key. */ private Long id; + + /** Stable Iceberg table UUID. */ private String tableUuid; + + /** Denormalized database name for display. */ private String databaseId; + + /** Denormalized table name for display. */ private String tableName; + + /** Snapshot + delta stats from this commit event. */ private TableStats stats; + + /** When this history row was recorded. */ private Instant recordedAt; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableOperationsRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableOperationsRequest.java index 19dd1baac..21174c337 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableOperationsRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableOperationsRequest.java @@ -9,8 +9,7 @@ * PUT request body for {@code /v1/table-operations/{id}}. * *

The Analyzer supplies the operation {@code id} (client-generated UUID) in the path and all - * table-identifying fields in this body. The service upserts by {@code id}: creates on first call, - * updates {@code metrics} on subsequent calls with the same {@code id}. + * table-identifying fields in this body. The service creates the row on first call. */ @Data @Builder @@ -18,9 +17,15 @@ @AllArgsConstructor public class UpsertTableOperationsRequest { + /** Stable Iceberg table UUID identifying the target table. */ private String tableUuid; + + /** Denormalized database name for display. */ private String databaseName; + + /** Denormalized table name for display. */ private String tableName; + + /** The type of maintenance operation to create. */ private OperationType operationType; - private OperationMetrics metrics; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java index 3214a85a6..721c3deaf 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java @@ -18,8 +18,15 @@ @AllArgsConstructor public class UpsertTableStatsRequest { + /** Denormalized database name for display. */ private String databaseId; + + /** Denormalized table name for display. */ private String tableName; + + /** Combined snapshot + delta stats payload from this commit. */ private TableStats stats; + + /** Current table properties snapshot (e.g. maintenance opt-in flags). */ private Map tableProperties; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/OperationMetricsConverter.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/OperationMetricsConverter.java deleted file mode 100644 index 27f0882f5..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/OperationMetricsConverter.java +++ /dev/null @@ -1,44 +0,0 @@ -package com.linkedin.openhouse.optimizer.config; - -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.linkedin.openhouse.optimizer.api.model.OperationMetrics; -import java.io.IOException; -import javax.persistence.AttributeConverter; -import javax.persistence.Converter; - -/** - * JPA {@link AttributeConverter} that serializes {@link OperationMetrics} to/from a JSON string. - */ -@Converter -public class OperationMetricsConverter implements AttributeConverter { - - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - - @Override - public String convertToDatabaseColumn(OperationMetrics attribute) { - // Null metrics are valid for PENDING operations that have not yet produced output. - if (attribute == null) { - return null; - } - try { - return OBJECT_MAPPER.writeValueAsString(attribute); - } catch (JsonProcessingException e) { - throw new IllegalStateException("Failed to serialize OperationMetrics to JSON", e); - } - } - - @Override - public OperationMetrics convertToEntityAttribute(String dbData) { - // Null is stored for PENDING rows; return null so the entity reflects that state. - if (dbData == null) { - return null; - } - try { - return OBJECT_MAPPER.readValue(dbData, OperationMetrics.class); - } catch (IOException e) { - throw new IllegalStateException( - "Failed to deserialize OperationMetrics from JSON: " + dbData, e); - } - } -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java index 6a47b5022..e7493024c 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java @@ -21,12 +21,12 @@ import lombok.NoArgsConstructor; /** - * Append-only record of a completed Spark maintenance job. + * Append-only record of a completed maintenance operation. * - *

Written by the Spark app after each table's operation finishes. The {@code id} is the same - * UUID as the originating {@code table_operations.id}, tying each history entry directly back to - * the specific operation cycle that produced it. Multiple runs of the same operation on the same - * table produce multiple rows (each cycle gets a new UUID from the Analyzer). + *

Written when the operation-complete endpoint is called. The {@code id} is the same UUID as the + * originating {@code table_operations.id}, tying each history entry back to the operation cycle + * that produced it. Multiple runs of the same operation on the same table produce multiple rows + * (each cycle gets a new UUID from the Analyzer). */ @Entity @Table( @@ -63,7 +63,7 @@ public class TableOperationsHistoryRow { @Column(name = "operation_type", nullable = false, length = 50) private OperationType operationType; - /** When the Spark job was submitted / ran, as reported by the job itself. */ + /** When the operation completed, as recorded by the complete endpoint. */ @Column(name = "submitted_at", nullable = false) private Instant submittedAt; @@ -80,12 +80,4 @@ public class TableOperationsHistoryRow { @Convert(converter = JobResultConverter.class) @Column(name = "result") private JobResult result; - - /** Number of orphan files deleted by the Spark job; null for non-OFD operations. */ - @Column(name = "orphan_files_deleted") - private Integer orphanFilesDeleted; - - /** Bytes reclaimed by orphan file deletion; null for non-OFD operations. */ - @Column(name = "orphan_bytes_deleted") - private Long orphanBytesDeleted; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java index 9d835aa20..e5493b510 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java @@ -1,12 +1,9 @@ package com.linkedin.openhouse.optimizer.entity; -import com.linkedin.openhouse.optimizer.api.model.OperationMetrics; import com.linkedin.openhouse.optimizer.api.model.OperationStatus; import com.linkedin.openhouse.optimizer.api.model.OperationType; -import com.linkedin.openhouse.optimizer.config.OperationMetricsConverter; import java.time.Instant; import javax.persistence.Column; -import javax.persistence.Convert; import javax.persistence.Entity; import javax.persistence.EnumType; import javax.persistence.Enumerated; @@ -90,10 +87,9 @@ public class TableOperationsRow { private Long version; /** - * Denormalized stats snapshot captured at analysis time: table size, snapshot count, and file - * counts as of the moment the Analyzer ran. + * Reserved for future per-operation metadata. Stored as JSON text; currently unused. The Analyzer + * reads stats directly from {@code table_stats} instead of duplicating them here. */ - @Convert(converter = OperationMetricsConverter.class) @Column(name = "metrics") - private OperationMetrics metrics; + private String metrics; } diff --git a/services/optimizer/src/main/resources/db/optimizer-schema.sql b/services/optimizer/src/main/resources/db/optimizer-schema.sql index 53062c5ad..098380e7f 100644 --- a/services/optimizer/src/main/resources/db/optimizer-schema.sql +++ b/services/optimizer/src/main/resources/db/optimizer-schema.sql @@ -47,7 +47,5 @@ CREATE TABLE IF NOT EXISTS table_operations_history ( status VARCHAR(20) NOT NULL, job_id VARCHAR(255), result TEXT, - orphan_files_deleted INT, - orphan_bytes_deleted BIGINT, PRIMARY KEY (id) ); From d419eb31f0449b5893739391047cf1af013cc6e3 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Mon, 6 Apr 2026 10:57:51 -0700 Subject: [PATCH 003/104] feat(optimizer): add repositories and repository tests Spring Data JPA repositories for all four optimizer tables with filtered query support. Includes tests exercising save/find, filtered queries, upsert semantics, and append-only history. Co-Authored-By: Claude Opus 4.6 --- .../TableOperationsHistoryRepository.java | 60 ++++++ .../repository/TableOperationsRepository.java | 33 +++ .../TableStatsHistoryRepository.java | 41 ++++ .../repository/TableStatsRepository.java | 25 +++ .../OptimizerServiceContextTest.java | 19 ++ .../TableOperationsHistoryRepositoryTest.java | 189 ++++++++++++++++++ .../TableOperationsRepositoryTest.java | 135 +++++++++++++ .../TableStatsHistoryRepositoryTest.java | 127 ++++++++++++ .../repository/TableStatsRepositoryTest.java | 141 +++++++++++++ 9 files changed, 770 insertions(+) create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java create mode 100644 services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/OptimizerServiceContextTest.java create mode 100644 services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java create mode 100644 services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java create mode 100644 services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java create mode 100644 services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java new file mode 100644 index 000000000..2ba5bdf7a --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java @@ -0,0 +1,60 @@ +package com.linkedin.openhouse.optimizer.repository; + +import com.linkedin.openhouse.optimizer.api.model.OperationHistoryStatus; +import com.linkedin.openhouse.optimizer.api.model.OperationType; +import com.linkedin.openhouse.optimizer.entity.TableOperationsHistoryRow; +import java.time.Instant; +import java.util.List; +import org.springframework.data.domain.Pageable; +import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.data.jpa.repository.Query; +import org.springframework.data.repository.query.Param; +import org.springframework.stereotype.Repository; + +/** + * Repository for {@link TableOperationsHistoryRow}. Append-only; PK is auto-increment {@code id}. + */ +@Repository +public interface TableOperationsHistoryRepository + extends JpaRepository { + + /** + * Return the most recent history rows for a table UUID, newest first, up to {@code limit} rows. + * + * @param tableUuid the stable table UUID + * @param limit maximum number of rows to return + * @return history rows ordered by {@code submitted_at} descending + */ + @Query( + value = + "SELECT * FROM table_operations_history " + + "WHERE table_uuid = :tableUuid " + + "ORDER BY submitted_at DESC LIMIT :limit", + nativeQuery = true) + List find( + @Param("tableUuid") String tableUuid, @Param("limit") int limit); + + /** + * Return history rows matching the given filters, ordered by {@code submittedAt} descending. + * Every parameter is optional — pass {@code null} to skip that filter. + */ + @Query( + "SELECT r FROM TableOperationsHistoryRow r " + + "WHERE (:databaseName IS NULL OR r.databaseName = :databaseName) " + + "AND (:tableName IS NULL OR r.tableName = :tableName) " + + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid) " + + "AND (:operationType IS NULL OR r.operationType = :operationType) " + + "AND (:status IS NULL OR r.status = :status) " + + "AND (:since IS NULL OR r.submittedAt >= :since) " + + "AND (:until IS NULL OR r.submittedAt <= :until) " + + "ORDER BY r.submittedAt DESC") + List findFiltered( + @Param("databaseName") String databaseName, + @Param("tableName") String tableName, + @Param("tableUuid") String tableUuid, + @Param("operationType") OperationType operationType, + @Param("status") OperationHistoryStatus status, + @Param("since") Instant since, + @Param("until") Instant until, + Pageable pageable); +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java new file mode 100644 index 000000000..69476991f --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java @@ -0,0 +1,33 @@ +package com.linkedin.openhouse.optimizer.repository; + +import com.linkedin.openhouse.optimizer.api.model.OperationStatus; +import com.linkedin.openhouse.optimizer.api.model.OperationType; +import com.linkedin.openhouse.optimizer.entity.TableOperationsRow; +import java.util.List; +import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.data.jpa.repository.Query; +import org.springframework.data.repository.query.Param; +import org.springframework.stereotype.Repository; + +/** Repository for {@link TableOperationsRow}. PK is the client-generated UUID {@code id}. */ +@Repository +public interface TableOperationsRepository extends JpaRepository { + + /** + * Return operations matching the given filters. Every parameter is optional — pass {@code null} + * to skip that filter. No filters returns all rows. + */ + @Query( + "SELECT r FROM TableOperationsRow r " + + "WHERE (:operationType IS NULL OR r.operationType = :operationType) " + + "AND (:status IS NULL OR r.status = :status) " + + "AND (:databaseName IS NULL OR r.databaseName = :databaseName) " + + "AND (:tableName IS NULL OR r.tableName = :tableName) " + + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid)") + List findFiltered( + @Param("operationType") OperationType operationType, + @Param("status") OperationStatus status, + @Param("databaseName") String databaseName, + @Param("tableName") String tableName, + @Param("tableUuid") String tableUuid); +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java new file mode 100644 index 000000000..c6ec3befd --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java @@ -0,0 +1,41 @@ +package com.linkedin.openhouse.optimizer.repository; + +import com.linkedin.openhouse.optimizer.entity.TableStatsHistoryRow; +import java.time.Instant; +import java.util.List; +import org.springframework.data.domain.Pageable; +import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.data.jpa.repository.Query; +import org.springframework.data.repository.query.Param; + +/** Append-only repository for per-commit stats history rows. */ +public interface TableStatsHistoryRepository extends JpaRepository { + + /** + * Return history rows for a table, newest first. + * + * @param tableUuid the stable table UUID + * @param pageable use {@code PageRequest.of(0, limit)} to cap results + */ + @Query( + "SELECT r FROM TableStatsHistoryRow r " + + "WHERE r.tableUuid = :tableUuid " + + "ORDER BY r.recordedAt DESC") + List findByTableUuid( + @Param("tableUuid") String tableUuid, Pageable pageable); + + /** + * Return history rows for a table recorded at or after {@code since}, newest first. + * + * @param tableUuid the stable table UUID + * @param since inclusive lower bound on recorded_at + * @param pageable use {@code PageRequest.of(0, limit)} to cap results + */ + @Query( + "SELECT r FROM TableStatsHistoryRow r " + + "WHERE r.tableUuid = :tableUuid " + + "AND r.recordedAt >= :since " + + "ORDER BY r.recordedAt DESC") + List findByTableUuidSince( + @Param("tableUuid") String tableUuid, @Param("since") Instant since, Pageable pageable); +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java new file mode 100644 index 000000000..6c071cf5b --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java @@ -0,0 +1,25 @@ +package com.linkedin.openhouse.optimizer.repository; + +import com.linkedin.openhouse.optimizer.entity.TableStatsRow; +import java.util.List; +import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.data.jpa.repository.Query; +import org.springframework.data.repository.query.Param; + +/** Spring Data JPA repository for reading and writing {@code table_stats} rows. */ +public interface TableStatsRepository extends JpaRepository { + + /** + * Return stats rows matching the given filters. Every parameter is optional — pass {@code null} + * to skip that filter. No filters returns all rows. + */ + @Query( + "SELECT r FROM TableStatsRow r " + + "WHERE (:databaseId IS NULL OR r.databaseId = :databaseId) " + + "AND (:tableName IS NULL OR r.tableName = :tableName) " + + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid)") + List findFiltered( + @Param("databaseId") String databaseId, + @Param("tableName") String tableName, + @Param("tableUuid") String tableUuid); +} diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/OptimizerServiceContextTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/OptimizerServiceContextTest.java new file mode 100644 index 000000000..abb89ec42 --- /dev/null +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/OptimizerServiceContextTest.java @@ -0,0 +1,19 @@ +package com.linkedin.openhouse.optimizer; + +import org.junit.jupiter.api.Test; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.test.context.ActiveProfiles; + +/** + * Validates that the Spring application context loads successfully against the H2 schema. This test + * exercises schema-SQL-init, JPA entity scanning, and repository wiring. + */ +@SpringBootTest +@ActiveProfiles("test") +class OptimizerServiceContextTest { + + @Test + void contextLoads() { + // Context load is the assertion — no additional assertions needed. + } +} diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java new file mode 100644 index 000000000..9bde34334 --- /dev/null +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java @@ -0,0 +1,189 @@ +package com.linkedin.openhouse.optimizer.repository; + +import static org.assertj.core.api.Assertions.assertThat; + +import com.linkedin.openhouse.optimizer.api.model.JobResult; +import com.linkedin.openhouse.optimizer.api.model.OperationHistoryStatus; +import com.linkedin.openhouse.optimizer.api.model.OperationType; +import com.linkedin.openhouse.optimizer.entity.TableOperationsHistoryRow; +import java.time.Instant; +import java.util.List; +import java.util.UUID; +import org.junit.jupiter.api.Test; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.data.domain.PageRequest; +import org.springframework.test.context.ActiveProfiles; +import org.springframework.transaction.annotation.Transactional; + +@SpringBootTest +@ActiveProfiles("test") +@Transactional +class TableOperationsHistoryRepositoryTest { + + @Autowired TableOperationsHistoryRepository repository; + + @Test + void appendAndFindByTableUuid() { + Instant t1 = Instant.parse("2024-01-01T10:00:00Z"); + Instant t2 = Instant.parse("2024-01-02T10:00:00Z"); + String tableUuid = UUID.randomUUID().toString(); + + repository.save( + TableOperationsHistoryRow.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(tableUuid) + .databaseName("db1") + .tableName("tbl1") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .submittedAt(t1) + .status(OperationHistoryStatus.SUCCESS) + .jobId("job-001") + .build()); + + repository.save( + TableOperationsHistoryRow.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(tableUuid) + .databaseName("db1") + .tableName("tbl1") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .submittedAt(t2) + .status(OperationHistoryStatus.FAILED) + .jobId("job-002") + .result(JobResult.builder().errorMessage("out of memory").errorType("OOM").build()) + .build()); + + List rows = repository.find(tableUuid, 10); + + assertThat(rows).hasSize(2); + // Newest first + assertThat(rows.get(0).getJobId()).isEqualTo("job-002"); + assertThat(rows.get(1).getJobId()).isEqualTo("job-001"); + } + + @Test + void appendIsNonDestructive_multipleRunsRetained() { + Instant now = Instant.now(); + String tableUuid = UUID.randomUUID().toString(); + for (int i = 0; i < 3; i++) { + repository.save( + TableOperationsHistoryRow.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(tableUuid) + .databaseName("db1") + .tableName("tbl2") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .submittedAt(now.plusSeconds(i)) + .status(OperationHistoryStatus.SUCCESS) + .build()); + } + + List rows = repository.find(tableUuid, 10); + assertThat(rows).hasSize(3); + } + + @Test + void find_respectsLimit() { + Instant now = Instant.now(); + String tableUuid = UUID.randomUUID().toString(); + for (int i = 0; i < 5; i++) { + repository.save( + TableOperationsHistoryRow.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(tableUuid) + .databaseName("db1") + .tableName("tbl3") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .submittedAt(now.plusSeconds(i)) + .status(OperationHistoryStatus.SUCCESS) + .build()); + } + + List rows = repository.find(tableUuid, 3); + assertThat(rows).hasSize(3); + } + + @Test + void findFiltered_noParams_returnsAll() { + Instant now = Instant.now(); + String uuid1 = UUID.randomUUID().toString(); + String uuid2 = UUID.randomUUID().toString(); + + repository.save( + TableOperationsHistoryRow.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(uuid1) + .databaseName("db1") + .tableName("tbl1") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .submittedAt(now) + .status(OperationHistoryStatus.SUCCESS) + .build()); + repository.save( + TableOperationsHistoryRow.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(uuid2) + .databaseName("db2") + .tableName("tbl2") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .submittedAt(now.plusSeconds(1)) + .status(OperationHistoryStatus.FAILED) + .build()); + + List rows = + repository.findFiltered(null, null, null, null, null, null, null, PageRequest.of(0, 100)); + assertThat(rows).hasSize(2); + // Newest first + assertThat(rows.get(0).getStatus()).isEqualTo(OperationHistoryStatus.FAILED); + } + + @Test + void findFiltered_byStatusAndTimeWindow() { + Instant old = Instant.parse("2024-01-01T00:00:00Z"); + Instant recent = Instant.parse("2024-06-01T00:00:00Z"); + String tableUuid = UUID.randomUUID().toString(); + + repository.save( + TableOperationsHistoryRow.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(tableUuid) + .databaseName("db1") + .tableName("tbl1") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .submittedAt(old) + .status(OperationHistoryStatus.SUCCESS) + .build()); + repository.save( + TableOperationsHistoryRow.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(tableUuid) + .databaseName("db1") + .tableName("tbl1") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .submittedAt(recent) + .status(OperationHistoryStatus.FAILED) + .build()); + + // Filter by status + List failed = + repository.findFiltered( + null, + null, + null, + null, + OperationHistoryStatus.FAILED, + null, + null, + PageRequest.of(0, 100)); + assertThat(failed).hasSize(1); + assertThat(failed.get(0).getSubmittedAt()).isEqualTo(recent); + + // Filter by time window + Instant cutoff = Instant.parse("2024-03-01T00:00:00Z"); + List afterCutoff = + repository.findFiltered(null, null, null, null, null, cutoff, null, PageRequest.of(0, 100)); + assertThat(afterCutoff).hasSize(1); + assertThat(afterCutoff.get(0).getSubmittedAt()).isEqualTo(recent); + } +} diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java new file mode 100644 index 000000000..d7b8ee0b8 --- /dev/null +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java @@ -0,0 +1,135 @@ +package com.linkedin.openhouse.optimizer.repository; + +import static org.assertj.core.api.Assertions.assertThat; + +import com.linkedin.openhouse.optimizer.api.model.OperationStatus; +import com.linkedin.openhouse.optimizer.api.model.OperationType; +import com.linkedin.openhouse.optimizer.entity.TableOperationsRow; +import java.time.Instant; +import java.util.List; +import java.util.Optional; +import java.util.UUID; +import org.junit.jupiter.api.Test; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.test.context.ActiveProfiles; +import org.springframework.transaction.annotation.Transactional; + +@SpringBootTest +@ActiveProfiles("test") +@Transactional +class TableOperationsRepositoryTest { + + @Autowired TableOperationsRepository repository; + + @Test + void saveAndFindById() { + String id = UUID.randomUUID().toString(); + + TableOperationsRow row = + TableOperationsRow.builder() + .id(id) + .tableUuid(UUID.randomUUID().toString()) + .databaseName("db1") + .tableName("tbl1") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(OperationStatus.PENDING) + .createdAt(Instant.now()) + .build(); + + repository.save(row); + + Optional found = repository.findById(id); + assertThat(found).isPresent(); + assertThat(found.get().getStatus()).isEqualTo(OperationStatus.PENDING); + } + + @Test + void findFiltered_noParams_returnsAll() { + repository.save( + TableOperationsRow.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(UUID.randomUUID().toString()) + .databaseName("db1") + .tableName("tbl1") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(OperationStatus.PENDING) + .createdAt(Instant.now()) + .build()); + repository.save( + TableOperationsRow.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(UUID.randomUUID().toString()) + .databaseName("db1") + .tableName("tbl2") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(OperationStatus.SCHEDULED) + .createdAt(Instant.now()) + .build()); + + List rows = repository.findFiltered(null, null, null, null, null); + assertThat(rows).hasSize(2); + } + + @Test + void findFiltered_byStatus() { + repository.save( + TableOperationsRow.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(UUID.randomUUID().toString()) + .databaseName("db1") + .tableName("tbl1") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(OperationStatus.PENDING) + .createdAt(Instant.now()) + .build()); + repository.save( + TableOperationsRow.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(UUID.randomUUID().toString()) + .databaseName("db1") + .tableName("tbl2") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(OperationStatus.SCHEDULED) + .createdAt(Instant.now()) + .build()); + + List pending = + repository.findFiltered(null, OperationStatus.PENDING, null, null, null); + assertThat(pending).hasSize(1); + assertThat(pending.get(0).getStatus()).isEqualTo(OperationStatus.PENDING); + + List scheduled = + repository.findFiltered(null, OperationStatus.SCHEDULED, null, null, null); + assertThat(scheduled).hasSize(1); + assertThat(scheduled.get(0).getStatus()).isEqualTo(OperationStatus.SCHEDULED); + } + + @Test + void findFiltered_byDatabaseAndTable() { + repository.save( + TableOperationsRow.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(UUID.randomUUID().toString()) + .databaseName("db1") + .tableName("tbl1") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(OperationStatus.PENDING) + .createdAt(Instant.now()) + .build()); + repository.save( + TableOperationsRow.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(UUID.randomUUID().toString()) + .databaseName("db2") + .tableName("tbl2") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(OperationStatus.PENDING) + .createdAt(Instant.now()) + .build()); + + assertThat(repository.findFiltered(null, null, "db1", null, null)).hasSize(1); + assertThat(repository.findFiltered(null, null, "db2", "tbl2", null)).hasSize(1); + assertThat(repository.findFiltered(null, null, "db1", "tbl2", null)).isEmpty(); + } +} diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java new file mode 100644 index 000000000..fb86762dc --- /dev/null +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java @@ -0,0 +1,127 @@ +package com.linkedin.openhouse.optimizer.repository; + +import static org.assertj.core.api.Assertions.assertThat; + +import com.linkedin.openhouse.optimizer.api.model.TableStats; +import com.linkedin.openhouse.optimizer.entity.TableStatsHistoryRow; +import java.time.Instant; +import java.time.temporal.ChronoUnit; +import java.util.List; +import java.util.UUID; +import org.junit.jupiter.api.Test; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.data.domain.PageRequest; +import org.springframework.test.context.ActiveProfiles; +import org.springframework.transaction.annotation.Transactional; + +@SpringBootTest +@ActiveProfiles("test") +@Transactional +class TableStatsHistoryRepositoryTest { + + @Autowired TableStatsHistoryRepository repository; + + @Test + void saveAndFindByTableUuid() { + String tableUuid = UUID.randomUUID().toString(); + Instant now = Instant.now(); + + repository.save(buildRow(tableUuid, "db1", "tbl1", 10L, 2L, now.minus(2, ChronoUnit.HOURS))); + repository.save(buildRow(tableUuid, "db1", "tbl1", 5L, 1L, now.minus(1, ChronoUnit.HOURS))); + repository.save(buildRow(tableUuid, "db1", "tbl1", 3L, 0L, now)); + + List rows = repository.findByTableUuid(tableUuid, PageRequest.of(0, 100)); + + assertThat(rows).hasSize(3); + // newest first + assertThat(rows.get(0).getStats().getDelta().getNumFilesAdded()).isEqualTo(3L); + assertThat(rows.get(2).getStats().getDelta().getNumFilesAdded()).isEqualTo(10L); + } + + @Test + void findByTableUuid_respectsLimit() { + String tableUuid = UUID.randomUUID().toString(); + Instant now = Instant.now(); + + for (int i = 0; i < 5; i++) { + repository.save(buildRow(tableUuid, "db1", "tbl1", i, 0L, now.minus(i, ChronoUnit.HOURS))); + } + + List rows = repository.findByTableUuid(tableUuid, PageRequest.of(0, 3)); + + assertThat(rows).hasSize(3); + } + + @Test + void findByTableUuidSince_filtersOlderRows() { + String tableUuid = UUID.randomUUID().toString(); + Instant now = Instant.now(); + Instant cutoff = now.minus(90, ChronoUnit.MINUTES); + + repository.save(buildRow(tableUuid, "db1", "tbl1", 10L, 2L, now.minus(2, ChronoUnit.HOURS))); + repository.save(buildRow(tableUuid, "db1", "tbl1", 5L, 1L, now.minus(1, ChronoUnit.HOURS))); + repository.save(buildRow(tableUuid, "db1", "tbl1", 3L, 0L, now)); + + List rows = + repository.findByTableUuidSince(tableUuid, cutoff, PageRequest.of(0, 100)); + + // only the 2 rows within the last 90 minutes + assertThat(rows).hasSize(2); + assertThat(rows.get(0).getStats().getDelta().getNumFilesAdded()).isEqualTo(3L); + } + + @Test + void findByTableUuid_isolatesByTableUuid() { + String uuid1 = UUID.randomUUID().toString(); + String uuid2 = UUID.randomUUID().toString(); + Instant now = Instant.now(); + + repository.save(buildRow(uuid1, "db1", "tbl1", 10L, 0L, now)); + repository.save(buildRow(uuid2, "db2", "tbl2", 20L, 0L, now)); + + assertThat(repository.findByTableUuid(uuid1, PageRequest.of(0, 100))).hasSize(1); + assertThat(repository.findByTableUuid(uuid2, PageRequest.of(0, 100))).hasSize(1); + } + + @Test + void autoIncrementId() { + String tableUuid = UUID.randomUUID().toString(); + Instant now = Instant.now(); + + TableStatsHistoryRow row1 = repository.save(buildRow(tableUuid, "db1", "tbl1", 1L, 0L, now)); + TableStatsHistoryRow row2 = repository.save(buildRow(tableUuid, "db1", "tbl1", 2L, 0L, now)); + + assertThat(row1.getId()).isNotNull(); + assertThat(row2.getId()).isNotNull(); + assertThat(row2.getId()).isGreaterThan(row1.getId()); + } + + private static TableStatsHistoryRow buildRow( + String tableUuid, + String databaseId, + String tableName, + long numFilesAdded, + long numFilesDeleted, + Instant recordedAt) { + return TableStatsHistoryRow.builder() + .tableUuid(tableUuid) + .databaseId(databaseId) + .tableName(tableName) + .stats( + TableStats.builder() + .snapshot( + TableStats.SnapshotMetrics.builder() + .clusterId("cl1") + .tableSizeBytes(1024L) + .build()) + .delta( + TableStats.CommitDelta.builder() + .numFilesAdded(numFilesAdded) + .numFilesDeleted(numFilesDeleted) + .build()) + .build()) + .recordedAt(recordedAt) + .build(); + } +} diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java new file mode 100644 index 000000000..5efb49148 --- /dev/null +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java @@ -0,0 +1,141 @@ +package com.linkedin.openhouse.optimizer.repository; + +import static org.assertj.core.api.Assertions.assertThat; + +import com.linkedin.openhouse.optimizer.api.model.TableStats; +import com.linkedin.openhouse.optimizer.entity.TableStatsRow; +import java.time.Instant; +import java.util.Map; +import java.util.Optional; +import java.util.UUID; +import org.junit.jupiter.api.Test; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.test.context.ActiveProfiles; +import org.springframework.transaction.annotation.Transactional; + +@SpringBootTest +@ActiveProfiles("test") +@Transactional +class TableStatsRepositoryTest { + + @Autowired TableStatsRepository repository; + + @Test + void saveAndFindById() { + String tableUuid = UUID.randomUUID().toString(); + TableStats stats = + TableStats.builder() + .snapshot( + TableStats.SnapshotMetrics.builder().clusterId("cl1").tableSizeBytes(1024L).build()) + .delta(TableStats.CommitDelta.builder().numFilesAdded(3L).numFilesDeleted(1L).build()) + .build(); + + repository.save( + TableStatsRow.builder() + .tableUuid(tableUuid) + .databaseId("db1") + .tableName("tbl1") + .stats(stats) + .tableProperties(Map.of("maintenance.optimizer.ofd.enabled", "true")) + .updatedAt(Instant.now()) + .build()); + + Optional found = repository.findById(tableUuid); + assertThat(found).isPresent(); + assertThat(found.get().getDatabaseId()).isEqualTo("db1"); + assertThat(found.get().getStats().getSnapshot().getTableSizeBytes()).isEqualTo(1024L); + assertThat(found.get().getTableProperties()) + .containsEntry("maintenance.optimizer.ofd.enabled", "true"); + } + + @Test + void upsert_overwritesPreviousStats() { + String tableUuid = UUID.randomUUID().toString(); + + repository.save( + TableStatsRow.builder() + .tableUuid(tableUuid) + .databaseId("db1") + .tableName("tbl1") + .stats( + TableStats.builder() + .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(100L).build()) + .build()) + .updatedAt(Instant.now()) + .build()); + + repository.save( + TableStatsRow.builder() + .tableUuid(tableUuid) + .databaseId("db1") + .tableName("tbl1") + .stats( + TableStats.builder() + .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(200L).build()) + .build()) + .updatedAt(Instant.now()) + .build()); + + assertThat(repository.findAll()).hasSize(1); + assertThat(repository.findById(tableUuid).get().getStats().getSnapshot().getTableSizeBytes()) + .isEqualTo(200L); + } + + @Test + void findFiltered_noParams_returnsAll() { + repository.save( + TableStatsRow.builder() + .tableUuid(UUID.randomUUID().toString()) + .databaseId("db1") + .tableName("tbl1") + .stats( + TableStats.builder() + .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(100L).build()) + .build()) + .updatedAt(Instant.now()) + .build()); + repository.save( + TableStatsRow.builder() + .tableUuid(UUID.randomUUID().toString()) + .databaseId("db2") + .tableName("tbl2") + .stats( + TableStats.builder() + .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(200L).build()) + .build()) + .updatedAt(Instant.now()) + .build()); + + assertThat(repository.findFiltered(null, null, null)).hasSize(2); + } + + @Test + void findFiltered_byDatabase() { + repository.save( + TableStatsRow.builder() + .tableUuid(UUID.randomUUID().toString()) + .databaseId("db1") + .tableName("tbl1") + .stats( + TableStats.builder() + .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(100L).build()) + .build()) + .updatedAt(Instant.now()) + .build()); + repository.save( + TableStatsRow.builder() + .tableUuid(UUID.randomUUID().toString()) + .databaseId("db2") + .tableName("tbl2") + .stats( + TableStats.builder() + .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(200L).build()) + .build()) + .updatedAt(Instant.now()) + .build()); + + assertThat(repository.findFiltered("db1", null, null)).hasSize(1); + assertThat(repository.findFiltered("db1", null, null).get(0).getDatabaseId()).isEqualTo("db1"); + } +} From 7ff3b4360877580f395650223c19542849a5e1f7 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Mon, 6 Apr 2026 11:35:45 -0700 Subject: [PATCH 004/104] =?UTF-8?q?fix:=20consolidate=20repo=20methods=20?= =?UTF-8?q?=E2=80=94=20single=20find=20with=20optional=20filters?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address PR review comments: rename findFiltered → find across all repos, remove redundant findByTableUuid/findByTableUuidSince from history repos, add explicit assertion to context test. Co-Authored-By: Claude Opus 4.6 --- .../TableOperationsHistoryRepository.java | 18 +-------------- .../repository/TableOperationsRepository.java | 2 +- .../TableStatsHistoryRepository.java | 22 +++++-------------- .../repository/TableStatsRepository.java | 2 +- .../OptimizerServiceContextTest.java | 8 ++++++- .../TableOperationsHistoryRepositoryTest.java | 19 +++++++++------- .../TableOperationsRepositoryTest.java | 18 +++++++-------- .../TableStatsHistoryRepositoryTest.java | 19 ++++++++-------- .../repository/TableStatsRepositoryTest.java | 10 ++++----- 9 files changed, 49 insertions(+), 69 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java index 2ba5bdf7a..71ab1cde4 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java @@ -18,22 +18,6 @@ public interface TableOperationsHistoryRepository extends JpaRepository { - /** - * Return the most recent history rows for a table UUID, newest first, up to {@code limit} rows. - * - * @param tableUuid the stable table UUID - * @param limit maximum number of rows to return - * @return history rows ordered by {@code submitted_at} descending - */ - @Query( - value = - "SELECT * FROM table_operations_history " - + "WHERE table_uuid = :tableUuid " - + "ORDER BY submitted_at DESC LIMIT :limit", - nativeQuery = true) - List find( - @Param("tableUuid") String tableUuid, @Param("limit") int limit); - /** * Return history rows matching the given filters, ordered by {@code submittedAt} descending. * Every parameter is optional — pass {@code null} to skip that filter. @@ -48,7 +32,7 @@ List find( + "AND (:since IS NULL OR r.submittedAt >= :since) " + "AND (:until IS NULL OR r.submittedAt <= :until) " + "ORDER BY r.submittedAt DESC") - List findFiltered( + List find( @Param("databaseName") String databaseName, @Param("tableName") String tableName, @Param("tableUuid") String tableUuid, diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java index 69476991f..891322134 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java @@ -24,7 +24,7 @@ public interface TableOperationsRepository extends JpaRepository findFiltered( + List find( @Param("operationType") OperationType operationType, @Param("status") OperationStatus status, @Param("databaseName") String databaseName, diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java index c6ec3befd..767d60c22 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java @@ -12,30 +12,18 @@ public interface TableStatsHistoryRepository extends JpaRepository { /** - * Return history rows for a table, newest first. + * Return history rows for a table, newest first. Pass {@code null} for {@code since} to skip the + * time filter. * * @param tableUuid the stable table UUID + * @param since inclusive lower bound on recorded_at; {@code null} to skip * @param pageable use {@code PageRequest.of(0, limit)} to cap results */ @Query( "SELECT r FROM TableStatsHistoryRow r " + "WHERE r.tableUuid = :tableUuid " + + "AND (:since IS NULL OR r.recordedAt >= :since) " + "ORDER BY r.recordedAt DESC") - List findByTableUuid( - @Param("tableUuid") String tableUuid, Pageable pageable); - - /** - * Return history rows for a table recorded at or after {@code since}, newest first. - * - * @param tableUuid the stable table UUID - * @param since inclusive lower bound on recorded_at - * @param pageable use {@code PageRequest.of(0, limit)} to cap results - */ - @Query( - "SELECT r FROM TableStatsHistoryRow r " - + "WHERE r.tableUuid = :tableUuid " - + "AND r.recordedAt >= :since " - + "ORDER BY r.recordedAt DESC") - List findByTableUuidSince( + List find( @Param("tableUuid") String tableUuid, @Param("since") Instant since, Pageable pageable); } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java index 6c071cf5b..ecae70feb 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java @@ -18,7 +18,7 @@ public interface TableStatsRepository extends JpaRepository findFiltered( + List find( @Param("databaseId") String databaseId, @Param("tableName") String tableName, @Param("tableUuid") String tableUuid); diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/OptimizerServiceContextTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/OptimizerServiceContextTest.java index abb89ec42..fa373c57d 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/OptimizerServiceContextTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/OptimizerServiceContextTest.java @@ -1,7 +1,11 @@ package com.linkedin.openhouse.optimizer; +import static org.assertj.core.api.Assertions.assertThat; + import org.junit.jupiter.api.Test; +import org.springframework.beans.factory.annotation.Autowired; import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.context.ApplicationContext; import org.springframework.test.context.ActiveProfiles; /** @@ -12,8 +16,10 @@ @ActiveProfiles("test") class OptimizerServiceContextTest { + @Autowired ApplicationContext context; + @Test void contextLoads() { - // Context load is the assertion — no additional assertions needed. + assertThat(context).isNotNull(); } } diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java index 9bde34334..1a35a8fda 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java @@ -54,7 +54,8 @@ void appendAndFindByTableUuid() { .result(JobResult.builder().errorMessage("out of memory").errorType("OOM").build()) .build()); - List rows = repository.find(tableUuid, 10); + List rows = + repository.find(null, null, tableUuid, null, null, null, null, PageRequest.of(0, 10)); assertThat(rows).hasSize(2); // Newest first @@ -79,7 +80,8 @@ void appendIsNonDestructive_multipleRunsRetained() { .build()); } - List rows = repository.find(tableUuid, 10); + List rows = + repository.find(null, null, tableUuid, null, null, null, null, PageRequest.of(0, 10)); assertThat(rows).hasSize(3); } @@ -100,12 +102,13 @@ void find_respectsLimit() { .build()); } - List rows = repository.find(tableUuid, 3); + List rows = + repository.find(null, null, tableUuid, null, null, null, null, PageRequest.of(0, 3)); assertThat(rows).hasSize(3); } @Test - void findFiltered_noParams_returnsAll() { + void find_noParams_returnsAll() { Instant now = Instant.now(); String uuid1 = UUID.randomUUID().toString(); String uuid2 = UUID.randomUUID().toString(); @@ -132,14 +135,14 @@ void findFiltered_noParams_returnsAll() { .build()); List rows = - repository.findFiltered(null, null, null, null, null, null, null, PageRequest.of(0, 100)); + repository.find(null, null, null, null, null, null, null, PageRequest.of(0, 100)); assertThat(rows).hasSize(2); // Newest first assertThat(rows.get(0).getStatus()).isEqualTo(OperationHistoryStatus.FAILED); } @Test - void findFiltered_byStatusAndTimeWindow() { + void find_byStatusAndTimeWindow() { Instant old = Instant.parse("2024-01-01T00:00:00Z"); Instant recent = Instant.parse("2024-06-01T00:00:00Z"); String tableUuid = UUID.randomUUID().toString(); @@ -167,7 +170,7 @@ void findFiltered_byStatusAndTimeWindow() { // Filter by status List failed = - repository.findFiltered( + repository.find( null, null, null, @@ -182,7 +185,7 @@ void findFiltered_byStatusAndTimeWindow() { // Filter by time window Instant cutoff = Instant.parse("2024-03-01T00:00:00Z"); List afterCutoff = - repository.findFiltered(null, null, null, null, null, cutoff, null, PageRequest.of(0, 100)); + repository.find(null, null, null, null, null, cutoff, null, PageRequest.of(0, 100)); assertThat(afterCutoff).hasSize(1); assertThat(afterCutoff.get(0).getSubmittedAt()).isEqualTo(recent); } diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java index d7b8ee0b8..b1342b12d 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java @@ -45,7 +45,7 @@ void saveAndFindById() { } @Test - void findFiltered_noParams_returnsAll() { + void find_noParams_returnsAll() { repository.save( TableOperationsRow.builder() .id(UUID.randomUUID().toString()) @@ -67,12 +67,12 @@ void findFiltered_noParams_returnsAll() { .createdAt(Instant.now()) .build()); - List rows = repository.findFiltered(null, null, null, null, null); + List rows = repository.find(null, null, null, null, null); assertThat(rows).hasSize(2); } @Test - void findFiltered_byStatus() { + void find_byStatus() { repository.save( TableOperationsRow.builder() .id(UUID.randomUUID().toString()) @@ -95,18 +95,18 @@ void findFiltered_byStatus() { .build()); List pending = - repository.findFiltered(null, OperationStatus.PENDING, null, null, null); + repository.find(null, OperationStatus.PENDING, null, null, null); assertThat(pending).hasSize(1); assertThat(pending.get(0).getStatus()).isEqualTo(OperationStatus.PENDING); List scheduled = - repository.findFiltered(null, OperationStatus.SCHEDULED, null, null, null); + repository.find(null, OperationStatus.SCHEDULED, null, null, null); assertThat(scheduled).hasSize(1); assertThat(scheduled.get(0).getStatus()).isEqualTo(OperationStatus.SCHEDULED); } @Test - void findFiltered_byDatabaseAndTable() { + void find_byDatabaseAndTable() { repository.save( TableOperationsRow.builder() .id(UUID.randomUUID().toString()) @@ -128,8 +128,8 @@ void findFiltered_byDatabaseAndTable() { .createdAt(Instant.now()) .build()); - assertThat(repository.findFiltered(null, null, "db1", null, null)).hasSize(1); - assertThat(repository.findFiltered(null, null, "db2", "tbl2", null)).hasSize(1); - assertThat(repository.findFiltered(null, null, "db1", "tbl2", null)).isEmpty(); + assertThat(repository.find(null, null, "db1", null, null)).hasSize(1); + assertThat(repository.find(null, null, "db2", "tbl2", null)).hasSize(1); + assertThat(repository.find(null, null, "db1", "tbl2", null)).isEmpty(); } } diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java index fb86762dc..a76c7155d 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java @@ -23,7 +23,7 @@ class TableStatsHistoryRepositoryTest { @Autowired TableStatsHistoryRepository repository; @Test - void saveAndFindByTableUuid() { + void saveAndFind() { String tableUuid = UUID.randomUUID().toString(); Instant now = Instant.now(); @@ -31,7 +31,7 @@ void saveAndFindByTableUuid() { repository.save(buildRow(tableUuid, "db1", "tbl1", 5L, 1L, now.minus(1, ChronoUnit.HOURS))); repository.save(buildRow(tableUuid, "db1", "tbl1", 3L, 0L, now)); - List rows = repository.findByTableUuid(tableUuid, PageRequest.of(0, 100)); + List rows = repository.find(tableUuid, null, PageRequest.of(0, 100)); assertThat(rows).hasSize(3); // newest first @@ -40,7 +40,7 @@ void saveAndFindByTableUuid() { } @Test - void findByTableUuid_respectsLimit() { + void find_respectsLimit() { String tableUuid = UUID.randomUUID().toString(); Instant now = Instant.now(); @@ -48,13 +48,13 @@ void findByTableUuid_respectsLimit() { repository.save(buildRow(tableUuid, "db1", "tbl1", i, 0L, now.minus(i, ChronoUnit.HOURS))); } - List rows = repository.findByTableUuid(tableUuid, PageRequest.of(0, 3)); + List rows = repository.find(tableUuid, null, PageRequest.of(0, 3)); assertThat(rows).hasSize(3); } @Test - void findByTableUuidSince_filtersOlderRows() { + void find_withSince_filtersOlderRows() { String tableUuid = UUID.randomUUID().toString(); Instant now = Instant.now(); Instant cutoff = now.minus(90, ChronoUnit.MINUTES); @@ -63,8 +63,7 @@ void findByTableUuidSince_filtersOlderRows() { repository.save(buildRow(tableUuid, "db1", "tbl1", 5L, 1L, now.minus(1, ChronoUnit.HOURS))); repository.save(buildRow(tableUuid, "db1", "tbl1", 3L, 0L, now)); - List rows = - repository.findByTableUuidSince(tableUuid, cutoff, PageRequest.of(0, 100)); + List rows = repository.find(tableUuid, cutoff, PageRequest.of(0, 100)); // only the 2 rows within the last 90 minutes assertThat(rows).hasSize(2); @@ -72,7 +71,7 @@ void findByTableUuidSince_filtersOlderRows() { } @Test - void findByTableUuid_isolatesByTableUuid() { + void find_isolatesByTableUuid() { String uuid1 = UUID.randomUUID().toString(); String uuid2 = UUID.randomUUID().toString(); Instant now = Instant.now(); @@ -80,8 +79,8 @@ void findByTableUuid_isolatesByTableUuid() { repository.save(buildRow(uuid1, "db1", "tbl1", 10L, 0L, now)); repository.save(buildRow(uuid2, "db2", "tbl2", 20L, 0L, now)); - assertThat(repository.findByTableUuid(uuid1, PageRequest.of(0, 100))).hasSize(1); - assertThat(repository.findByTableUuid(uuid2, PageRequest.of(0, 100))).hasSize(1); + assertThat(repository.find(uuid1, null, PageRequest.of(0, 100))).hasSize(1); + assertThat(repository.find(uuid2, null, PageRequest.of(0, 100))).hasSize(1); } @Test diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java index 5efb49148..a8ac1cbbb 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java @@ -83,7 +83,7 @@ void upsert_overwritesPreviousStats() { } @Test - void findFiltered_noParams_returnsAll() { + void find_noParams_returnsAll() { repository.save( TableStatsRow.builder() .tableUuid(UUID.randomUUID().toString()) @@ -107,11 +107,11 @@ void findFiltered_noParams_returnsAll() { .updatedAt(Instant.now()) .build()); - assertThat(repository.findFiltered(null, null, null)).hasSize(2); + assertThat(repository.find(null, null, null)).hasSize(2); } @Test - void findFiltered_byDatabase() { + void find_byDatabase() { repository.save( TableStatsRow.builder() .tableUuid(UUID.randomUUID().toString()) @@ -135,7 +135,7 @@ void findFiltered_byDatabase() { .updatedAt(Instant.now()) .build()); - assertThat(repository.findFiltered("db1", null, null)).hasSize(1); - assertThat(repository.findFiltered("db1", null, null).get(0).getDatabaseId()).isEqualTo("db1"); + assertThat(repository.find("db1", null, null)).hasSize(1); + assertThat(repository.find("db1", null, null).get(0).getDatabaseId()).isEqualTo("db1"); } } From f7f6812639a9a478d6abe9f003f17464af1f80d0 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Mon, 6 Apr 2026 10:59:15 -0700 Subject: [PATCH 005/104] feat(optimizer): add REST service layer, controllers, and shared module Service interface and implementation for all optimizer CRUD operations including complete-operation lifecycle, stats upsert with history double-write, and filtered queries. Three REST controllers expose the endpoints. The apps/optimizer shared module provides lightweight entity/repo copies for the analyzer and scheduler apps. Co-Authored-By: Claude Opus 4.6 --- apps/optimizer/build.gradle | 13 ++ .../entity/TableOperationHistoryRow.java | 37 ++++ .../optimizer/entity/TableOperationRow.java | 55 +++++ .../optimizer/entity/TableStatsRow.java | 53 +++++ .../openhouse/optimizer/model/TableStats.java | 45 ++++ .../TableOperationHistoryRepository.java | 23 ++ .../repository/TableOperationsRepository.java | 75 +++++++ .../repository/TableStatsRepository.java | 26 +++ .../controller/TableOperationsController.java | 66 ++++++ .../TableOperationsHistoryController.java | 60 ++++++ .../api/controller/TableStatsController.java | 69 ++++++ .../api/model/CompleteOperationRequest.java | 6 - .../service/OptimizerDataService.java | 98 +++++++++ .../service/OptimizerDataServiceImpl.java | 202 ++++++++++++++++++ settings.gradle | 1 + 15 files changed, 823 insertions(+), 6 deletions(-) create mode 100644 apps/optimizer/build.gradle create mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationHistoryRow.java create mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationRow.java create mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java create mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java create mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java create mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java create mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java diff --git a/apps/optimizer/build.gradle b/apps/optimizer/build.gradle new file mode 100644 index 000000000..f14969274 --- /dev/null +++ b/apps/optimizer/build.gradle @@ -0,0 +1,13 @@ +plugins { + id 'openhouse.java-minimal-conventions' +} + +// Avoid build-directory collision with services:optimizer (same project.name 'optimizer'). +buildDir = "${rootProject.buildDir}/apps-optimizer" + +dependencies { + implementation 'org.springframework.boot:spring-boot-starter-data-jpa:2.7.8' + implementation 'com.vladmihalcea:hibernate-types-55:2.21.1' + testImplementation 'org.springframework.boot:spring-boot-starter-test:2.7.8' + testRuntimeOnly 'com.h2database:h2' +} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationHistoryRow.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationHistoryRow.java new file mode 100644 index 000000000..4e638e2e1 --- /dev/null +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationHistoryRow.java @@ -0,0 +1,37 @@ +package com.linkedin.openhouse.optimizer.entity; + +import java.time.Instant; +import javax.persistence.Column; +import javax.persistence.Entity; +import javax.persistence.Id; +import javax.persistence.Table; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Getter; +import lombok.NoArgsConstructor; + +/** Lightweight JPA entity for reading {@code table_operations_history} rows. */ +@Entity +@Table(name = "table_operations_history") +@Getter +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class TableOperationHistoryRow { + + @Id + @Column(name = "id", nullable = false, length = 36) + private String id; + + @Column(name = "table_uuid", nullable = false, length = 36) + private String tableUuid; + + @Column(name = "operation_type", nullable = false, length = 50) + private String operationType; + + @Column(name = "submitted_at", nullable = false) + private Instant submittedAt; + + @Column(name = "status", nullable = false, length = 20) + private String status; +} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationRow.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationRow.java new file mode 100644 index 000000000..fc0104604 --- /dev/null +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationRow.java @@ -0,0 +1,55 @@ +package com.linkedin.openhouse.optimizer.entity; + +import java.time.Instant; +import javax.persistence.Column; +import javax.persistence.Entity; +import javax.persistence.Id; +import javax.persistence.Table; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Getter; +import lombok.NoArgsConstructor; +import lombok.Setter; + +/** JPA entity mapping to the {@code table_operations} table in the optimizer DB. */ +@Entity +@Table(name = "table_operations") +@Getter +@Setter +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class TableOperationRow { + + @Id + @Column(name = "id", nullable = false, length = 36) + private String id; + + @Column(name = "table_uuid", nullable = false, length = 36) + private String tableUuid; + + @Column(name = "database_name", nullable = false, length = 255) + private String databaseName; + + @Column(name = "table_name", nullable = false, length = 255) + private String tableName; + + @Column(name = "operation_type", nullable = false, length = 50) + private String operationType; + + @Column(name = "status", nullable = false, length = 20) + private String status; + + @Column(name = "created_at") + private Instant createdAt; + + @Column(name = "scheduled_at") + private Instant scheduledAt; + + @Column(name = "job_id", length = 255) + private String jobId; + + /** Plain version column — not managed by JPA optimistic locking. */ + @Column(name = "version") + private Long version; +} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java new file mode 100644 index 000000000..5cdf16a97 --- /dev/null +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java @@ -0,0 +1,53 @@ +package com.linkedin.openhouse.optimizer.entity; + +import com.linkedin.openhouse.optimizer.model.TableStats; +import com.vladmihalcea.hibernate.type.json.JsonStringType; +import java.time.Instant; +import java.util.Map; +import javax.persistence.Column; +import javax.persistence.Entity; +import javax.persistence.Id; +import javax.persistence.Table; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Getter; +import lombok.NoArgsConstructor; +import lombok.Setter; +import org.hibernate.annotations.Type; +import org.hibernate.annotations.TypeDef; + +/** + * JPA entity for the optimizer {@code table_stats} table. Written by the Tables Service on every + * Iceberg commit; read by the Analyzer and Scheduler directly via JPA. + */ +@TypeDef(name = "json", typeClass = JsonStringType.class) +@Entity +@Table(name = "table_stats") +@Getter +@Setter +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class TableStatsRow { + + @Id + @Column(name = "table_uuid", nullable = false, length = 36) + private String tableUuid; + + @Column(name = "database_id", nullable = false, length = 255) + private String databaseId; + + @Column(name = "table_name", nullable = false, length = 255) + private String tableName; + + @Type(type = "json") + @Column(name = "stats", columnDefinition = "TEXT") + private TableStats stats; + + @Type(type = "json") + @Column(name = "table_properties", columnDefinition = "TEXT") + private Map tableProperties; + + @Column(name = "updated_at", nullable = false) + private Instant updatedAt; +} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java new file mode 100644 index 000000000..5e0f51468 --- /dev/null +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java @@ -0,0 +1,45 @@ +package com.linkedin.openhouse.optimizer.model; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** Combined stats payload stored as a single JSON blob per table in {@code table_stats}. */ +@Data +@Builder(toBuilder = true) +@NoArgsConstructor +@AllArgsConstructor +public class TableStats { + + /** Snapshot fields — overwritten on every upsert. */ + private SnapshotMetrics snapshot; + + /** Delta fields — accumulated across commit events. */ + private CommitDelta delta; + + /** Point-in-time metadata read from Iceberg at scan time. */ + @Data + @Builder(toBuilder = true) + @NoArgsConstructor + @AllArgsConstructor + public static class SnapshotMetrics { + private String clusterId; + private String tableVersion; + private String tableLocation; + private Long tableSizeBytes; + /** Total number of data files as of the latest snapshot — used for bin-packing. */ + private Long numCurrentFiles; + } + + /** Per-commit incremental counters accumulated across all recorded commit events. */ + @Data + @Builder(toBuilder = true) + @NoArgsConstructor + @AllArgsConstructor + public static class CommitDelta { + private Long numFilesAdded; + private Long numFilesDeleted; + private Long deletedSizeBytes; + } +} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java new file mode 100644 index 000000000..a9434b4b7 --- /dev/null +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java @@ -0,0 +1,23 @@ +package com.linkedin.openhouse.optimizer.repository; + +import com.linkedin.openhouse.optimizer.entity.TableOperationHistoryRow; +import java.util.List; +import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.data.jpa.repository.Query; +import org.springframework.data.repository.query.Param; + +/** Repository for reading {@code table_operations_history} in the Analyzer. */ +public interface TableOperationHistoryRepository + extends JpaRepository { + + /** + * Returns all history rows for an operation type, newest first. Loaded once per analysis run and + * grouped in memory by {@code tableUuid} to eliminate per-table N+1 queries in the circuit + * breaker check. + */ + @Query( + "SELECT r FROM TableOperationHistoryRow r " + + "WHERE r.operationType = :opType " + + "ORDER BY r.submittedAt DESC") + List findAllByOperationType(@Param("opType") String operationType); +} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java new file mode 100644 index 000000000..404aaf873 --- /dev/null +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java @@ -0,0 +1,75 @@ +package com.linkedin.openhouse.optimizer.repository; + +import com.linkedin.openhouse.optimizer.entity.TableOperationRow; +import java.time.Instant; +import java.util.Collection; +import java.util.List; +import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.data.jpa.repository.Modifying; +import org.springframework.data.jpa.repository.Query; +import org.springframework.data.repository.query.Param; + +/** Spring Data JPA repository for {@code table_operations} rows in the optimizer DB. */ +public interface TableOperationsRepository extends JpaRepository { + + /** + * Returns rows for the given operation type whose status is in {@code statuses}. Used by the + * Scheduler to load all PENDING rows in one query. + */ + @Query( + "SELECT r FROM TableOperationRow r WHERE r.operationType = :type" + + " AND r.status IN :statuses") + List findByTypeAndStatuses( + @Param("type") String operationType, @Param("statuses") Collection statuses); + + /** + * Returns all rows for the given operation type regardless of status. Used by the Analyzer to + * find the most recent row per table_uuid for scheduling decisions. + */ + @Query("SELECT r FROM TableOperationRow r WHERE r.operationType = :type") + List findByType(@Param("type") String operationType); + + /** + * Cancel older duplicate PENDING rows for the same (table_uuid, operation_type), keeping only the + * row identified by {@code keepId}. Called by the Scheduler before claiming to prevent duplicate + * job submissions from concurrent Analyzer runs. + * + * @return the number of rows marked CANCELED + */ + @Modifying + @Query( + "UPDATE TableOperationRow r SET r.status = 'CANCELED' " + + "WHERE r.tableUuid = :tableUuid AND r.operationType = :opType " + + "AND r.status = 'PENDING' AND r.id != :keepId") + int cancelDuplicatePending( + @Param("tableUuid") String tableUuid, + @Param("opType") String operationType, + @Param("keepId") String keepId); + + /** + * Atomically claim a PENDING row by flipping its status to SCHEDULING. + * + *

The {@code version} guard prevents double-scheduling when multiple scheduler instances run + * concurrently. Returns 1 if the claim succeeded, 0 if the row was already claimed by another + * instance. + */ + @Modifying(flushAutomatically = true, clearAutomatically = true) + @Query( + "UPDATE TableOperationRow r SET r.status = 'SCHEDULING', r.scheduledAt = :now," + + " r.version = r.version + 1 WHERE r.id = :id AND r.version = :version") + int markScheduling( + @Param("id") String id, @Param("version") Long version, @Param("now") Instant now); + + /** + * Transition a SCHEDULING row to SCHEDULED after the Jobs Service returns a job ID. + * + * @return 1 if updated, 0 if not found or wrong version/status + */ + @Modifying(flushAutomatically = true, clearAutomatically = true) + @Query( + "UPDATE TableOperationRow r SET r.status = 'SCHEDULED', r.jobId = :jobId," + + " r.version = r.version + 1" + + " WHERE r.id = :id AND r.version = :version AND r.status = 'SCHEDULING'") + int markScheduled( + @Param("id") String id, @Param("version") Long version, @Param("jobId") String jobId); +} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java new file mode 100644 index 000000000..3c0ef40b8 --- /dev/null +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java @@ -0,0 +1,26 @@ +package com.linkedin.openhouse.optimizer.repository; + +import com.linkedin.openhouse.optimizer.entity.TableStatsRow; +import java.util.stream.Stream; +import javax.persistence.QueryHint; +import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.data.jpa.repository.Query; +import org.springframework.data.jpa.repository.QueryHints; + +/** Spring Data JPA repository for {@code table_stats} rows in the optimizer DB. */ +public interface TableStatsRepository extends JpaRepository { + + /** + * Streams all rows as a JDBC cursor rather than buffering them in memory. The caller must consume + * the stream inside an active {@code @Transactional} method and close it when done. + * + *

{@code Integer.MIN_VALUE} is MySQL Connector/J's signal to enable row-by-row streaming + * instead of loading the full result set into the driver buffer. + */ + @Query("SELECT r FROM TableStatsRow r") + @QueryHints( + @QueryHint( + name = org.hibernate.jpa.QueryHints.HINT_FETCH_SIZE, + value = "" + Integer.MIN_VALUE)) + Stream streamAll(); +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java new file mode 100644 index 000000000..d8ba13b11 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java @@ -0,0 +1,66 @@ +package com.linkedin.openhouse.optimizer.api.controller; + +import com.linkedin.openhouse.optimizer.api.model.CompleteOperationRequest; +import com.linkedin.openhouse.optimizer.api.model.OperationStatus; +import com.linkedin.openhouse.optimizer.api.model.OperationType; +import com.linkedin.openhouse.optimizer.api.model.TableOperationsDto; +import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; +import com.linkedin.openhouse.optimizer.service.OptimizerDataService; +import java.util.List; +import lombok.RequiredArgsConstructor; +import org.springframework.http.HttpStatus; +import org.springframework.http.ResponseEntity; +import org.springframework.web.bind.annotation.GetMapping; +import org.springframework.web.bind.annotation.PathVariable; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestBody; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RequestParam; +import org.springframework.web.bind.annotation.RestController; + +/** REST controller for {@code table_operations}. */ +@RestController +@RequestMapping("/v1/table-operations") +@RequiredArgsConstructor +public class TableOperationsController { + + private final OptimizerDataService service; + + /** + * Report that an operation has completed. The backend looks up the operation row, writes a + * history entry with the operation's table metadata and the supplied result. Returns 201 Created + * with the history row, or 404 if the operation does not exist. + */ + @PostMapping("/{id}/complete") + public ResponseEntity completeOperation( + @PathVariable String id, @RequestBody CompleteOperationRequest request) { + return service + .completeOperation(id, request) + .map(dto -> ResponseEntity.status(HttpStatus.CREATED).body(dto)) + .orElse(ResponseEntity.notFound().build()); + } + + /** Fetch a single operation row by its ID, regardless of status. Returns 404 if not found. */ + @GetMapping("/{id}") + public ResponseEntity getTableOperation(@PathVariable String id) { + return service + .getTableOperation(id) + .map(ResponseEntity::ok) + .orElse(ResponseEntity.notFound().build()); + } + + /** + * List operations matching the given filters. All parameters are optional — omit all to return + * every row. + */ + @GetMapping + public ResponseEntity> listTableOperations( + @RequestParam(required = false) OperationType operationType, + @RequestParam(required = false) OperationStatus status, + @RequestParam(required = false) String databaseName, + @RequestParam(required = false) String tableName, + @RequestParam(required = false) String tableUuid) { + return ResponseEntity.ok( + service.listTableOperations(operationType, status, databaseName, tableName, tableUuid)); + } +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java new file mode 100644 index 000000000..11c77a15d --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java @@ -0,0 +1,60 @@ +package com.linkedin.openhouse.optimizer.api.controller; + +import com.linkedin.openhouse.optimizer.api.model.OperationHistoryStatus; +import com.linkedin.openhouse.optimizer.api.model.OperationType; +import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; +import com.linkedin.openhouse.optimizer.service.OptimizerDataService; +import java.time.Instant; +import java.util.List; +import lombok.RequiredArgsConstructor; +import org.springframework.http.HttpStatus; +import org.springframework.http.ResponseEntity; +import org.springframework.web.bind.annotation.GetMapping; +import org.springframework.web.bind.annotation.PathVariable; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestBody; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RequestParam; +import org.springframework.web.bind.annotation.RestController; + +/** REST controller for {@code table_operations_history}. */ +@RestController +@RequestMapping("/v1/table-operations-history") +@RequiredArgsConstructor +public class TableOperationsHistoryController { + + private final OptimizerDataService service; + + /** Append a completed-job result. Called by the SparkJob after each run (success or failure). */ + @PostMapping + public ResponseEntity appendHistory( + @RequestBody TableOperationsHistoryDto dto) { + return ResponseEntity.status(HttpStatus.CREATED).body(service.appendHistory(dto)); + } + + /** Return the most recent history for a table, newest first, up to {@code limit} rows. */ + @GetMapping("/{tableUuid}") + public ResponseEntity> getHistory( + @PathVariable String tableUuid, @RequestParam(defaultValue = "100") int limit) { + return ResponseEntity.ok(service.getHistory(tableUuid, limit)); + } + + /** + * List history rows matching the given filters, ordered newest first. All parameters are optional + * — omit all to return every row up to {@code limit}. + */ + @GetMapping + public ResponseEntity> listHistory( + @RequestParam(required = false) String databaseName, + @RequestParam(required = false) String tableName, + @RequestParam(required = false) String tableUuid, + @RequestParam(required = false) OperationType operationType, + @RequestParam(required = false) OperationHistoryStatus status, + @RequestParam(required = false) Instant since, + @RequestParam(required = false) Instant until, + @RequestParam(defaultValue = "100") int limit) { + return ResponseEntity.ok( + service.listHistory( + databaseName, tableName, tableUuid, operationType, status, since, until, limit)); + } +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java new file mode 100644 index 000000000..d469586a2 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java @@ -0,0 +1,69 @@ +package com.linkedin.openhouse.optimizer.api.controller; + +import com.linkedin.openhouse.optimizer.api.model.TableStatsDto; +import com.linkedin.openhouse.optimizer.api.model.TableStatsHistoryDto; +import com.linkedin.openhouse.optimizer.api.model.UpsertTableStatsRequest; +import com.linkedin.openhouse.optimizer.service.OptimizerDataService; +import java.time.Instant; +import java.util.List; +import lombok.RequiredArgsConstructor; +import org.springframework.http.ResponseEntity; +import org.springframework.web.bind.annotation.GetMapping; +import org.springframework.web.bind.annotation.PathVariable; +import org.springframework.web.bind.annotation.PutMapping; +import org.springframework.web.bind.annotation.RequestBody; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RequestParam; +import org.springframework.web.bind.annotation.RestController; + +/** REST controller for managing per-table stats in the optimizer DB. */ +@RestController +@RequestMapping("/v1/table-stats") +@RequiredArgsConstructor +public class TableStatsController { + + private final OptimizerDataService service; + + /** + * Create or overwrite the stats row for {@code tableUuid}. Called by the Tables Service on every + * Iceberg commit. Idempotent. + */ + @PutMapping("/{tableUuid}") + public ResponseEntity upsertTableStats( + @PathVariable String tableUuid, @RequestBody UpsertTableStatsRequest request) { + return ResponseEntity.ok(service.upsertTableStats(tableUuid, request)); + } + + /** Fetch the stats row for {@code tableUuid}. Returns 404 if no stats have been written yet. */ + @GetMapping("/{tableUuid}") + public ResponseEntity getTableStats(@PathVariable String tableUuid) { + return service + .getTableStats(tableUuid) + .map(ResponseEntity::ok) + .orElse(ResponseEntity.notFound().build()); + } + + /** + * List stats rows matching the given filters. All parameters are optional — omit all to return + * every row. + */ + @GetMapping + public ResponseEntity> listTableStats( + @RequestParam(required = false) String databaseId, + @RequestParam(required = false) String tableName, + @RequestParam(required = false) String tableUuid) { + return ResponseEntity.ok(service.listTableStats(databaseId, tableName, tableUuid)); + } + + /** + * Return per-commit stats history for {@code tableUuid}, newest first. Optionally filter by + * {@code since} (inclusive) and cap at {@code limit} rows. + */ + @GetMapping("/{tableUuid}/history") + public ResponseEntity> getStatsHistory( + @PathVariable String tableUuid, + @RequestParam(required = false) Instant since, + @RequestParam(defaultValue = "100") int limit) { + return ResponseEntity.ok(service.getStatsHistory(tableUuid, since, limit)); + } +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java index c26893197..35f7ba782 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java @@ -22,10 +22,4 @@ public class CompleteOperationRequest { /** Error details on failure; {@code null} on success. */ private JobResult result; - - /** Number of orphan files deleted; set by OFD Spark app on success. */ - private Integer orphanFilesDeleted; - - /** Bytes reclaimed by orphan file deletion; set by OFD Spark app on success. */ - private Long orphanBytesDeleted; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java new file mode 100644 index 000000000..ce3120400 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java @@ -0,0 +1,98 @@ +package com.linkedin.openhouse.optimizer.service; + +import com.linkedin.openhouse.optimizer.api.model.CompleteOperationRequest; +import com.linkedin.openhouse.optimizer.api.model.OperationHistoryStatus; +import com.linkedin.openhouse.optimizer.api.model.OperationStatus; +import com.linkedin.openhouse.optimizer.api.model.OperationType; +import com.linkedin.openhouse.optimizer.api.model.TableOperationsDto; +import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; +import com.linkedin.openhouse.optimizer.api.model.TableStatsDto; +import com.linkedin.openhouse.optimizer.api.model.TableStatsHistoryDto; +import com.linkedin.openhouse.optimizer.api.model.UpsertTableStatsRequest; +import java.time.Instant; +import java.util.List; +import java.util.Optional; + +/** Service interface for optimizer data operations. */ +public interface OptimizerDataService { + + // --- TableOperations --- + + /** + * List operations matching the given filters. Every parameter is optional — pass {@code null} to + * skip that filter. No filters returns all rows. + */ + List listTableOperations( + OperationType operationType, + OperationStatus status, + String databaseName, + String tableName, + String tableUuid); + + /** + * Complete an operation by writing a history entry. Looks up the operation row by {@code id}, + * copies its table metadata into a new history row, and saves it. Returns the history DTO, or + * empty if the operation does not exist. + */ + Optional completeOperation( + String id, CompleteOperationRequest request); + + /** + * Return the operation row for {@code id} regardless of status, or empty if it does not exist. + * Used to poll a specific operation (e.g. waiting for SUCCESS after a Spark job completes). + */ + Optional getTableOperation(String id); + + // --- TableStats --- + + /** + * Create or update the stats row for {@code tableUuid}. Fully idempotent: the same call + * overwrites the previous snapshot with the latest commit values. + */ + TableStatsDto upsertTableStats(String tableUuid, UpsertTableStatsRequest request); + + /** Return the stats row for {@code tableUuid}, or empty if none exists. */ + Optional getTableStats(String tableUuid); + + /** + * List stats rows matching the given filters. Every parameter is optional — pass {@code null} to + * skip that filter. No filters returns all rows. + */ + List listTableStats(String databaseId, String tableName, String tableUuid); + + /** + * Return per-commit stats history for {@code tableUuid}, newest first. + * + * @param tableUuid the stable table UUID + * @param since if non-null, only return rows recorded at or after this instant + * @param limit maximum number of rows to return + */ + List getStatsHistory(String tableUuid, Instant since, int limit); + + // --- TableOperationsHistory --- + + /** Append a completed-job result record. */ + TableOperationsHistoryDto appendHistory(TableOperationsHistoryDto dto); + + /** + * Return the most recent history rows for a table UUID, newest first. + * + * @param tableUuid the stable table UUID + * @param limit maximum number of rows to return + */ + List getHistory(String tableUuid, int limit); + + /** + * List history rows matching the given filters, ordered newest first. Every parameter is optional + * — pass {@code null} to skip that filter. No filters returns all rows up to {@code limit}. + */ + List listHistory( + String databaseName, + String tableName, + String tableUuid, + OperationType operationType, + OperationHistoryStatus status, + Instant since, + Instant until, + int limit); +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java new file mode 100644 index 000000000..dbc5f466b --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java @@ -0,0 +1,202 @@ +package com.linkedin.openhouse.optimizer.service; + +import com.linkedin.openhouse.optimizer.api.mapper.OptimizerMapper; +import com.linkedin.openhouse.optimizer.api.model.CompleteOperationRequest; +import com.linkedin.openhouse.optimizer.api.model.OperationHistoryStatus; +import com.linkedin.openhouse.optimizer.api.model.OperationStatus; +import com.linkedin.openhouse.optimizer.api.model.OperationType; +import com.linkedin.openhouse.optimizer.api.model.TableOperationsDto; +import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; +import com.linkedin.openhouse.optimizer.api.model.TableStatsDto; +import com.linkedin.openhouse.optimizer.api.model.TableStatsHistoryDto; +import com.linkedin.openhouse.optimizer.api.model.UpsertTableStatsRequest; +import com.linkedin.openhouse.optimizer.entity.TableOperationsHistoryRow; +import com.linkedin.openhouse.optimizer.entity.TableStatsHistoryRow; +import com.linkedin.openhouse.optimizer.entity.TableStatsRow; +import com.linkedin.openhouse.optimizer.repository.TableOperationsHistoryRepository; +import com.linkedin.openhouse.optimizer.repository.TableOperationsRepository; +import com.linkedin.openhouse.optimizer.repository.TableStatsHistoryRepository; +import com.linkedin.openhouse.optimizer.repository.TableStatsRepository; +import java.time.Instant; +import java.util.List; +import java.util.Optional; +import java.util.stream.Collectors; +import lombok.RequiredArgsConstructor; +import org.springframework.data.domain.PageRequest; +import org.springframework.stereotype.Service; +import org.springframework.transaction.annotation.Transactional; + +/** Implementation of {@link OptimizerDataService}. */ +@Service +@RequiredArgsConstructor +public class OptimizerDataServiceImpl implements OptimizerDataService { + + private final TableOperationsRepository operationsRepository; + private final TableOperationsHistoryRepository historyRepository; + private final TableStatsRepository statsRepository; + private final TableStatsHistoryRepository statsHistoryRepository; + private final OptimizerMapper mapper; + + // --- TableOperations --- + + @Override + public List listTableOperations( + OperationType operationType, + OperationStatus status, + String databaseName, + String tableName, + String tableUuid) { + return operationsRepository + .findFiltered(operationType, status, databaseName, tableName, tableUuid).stream() + .map(mapper::toDto) + .collect(Collectors.toList()); + } + + @Override + @Transactional + public Optional completeOperation( + String id, CompleteOperationRequest request) { + return operationsRepository + .findById(id) + .map( + row -> { + TableOperationsHistoryRow historyRow = + TableOperationsHistoryRow.builder() + .id(row.getId()) + .tableUuid(row.getTableUuid()) + .databaseName(row.getDatabaseName()) + .tableName(row.getTableName()) + .operationType(row.getOperationType()) + .submittedAt(Instant.now()) + .status(request.getStatus()) + .jobId(row.getJobId()) + .result(request.getResult()) + .build(); + return mapper.toDto(historyRepository.save(historyRow)); + }); + } + + @Override + public Optional getTableOperation(String id) { + return operationsRepository.findById(id).map(mapper::toDto); + } + + // --- TableStats --- + + @Override + @Transactional + public TableStatsDto upsertTableStats(String tableUuid, UpsertTableStatsRequest request) { + Instant now = Instant.now(); + TableStatsRow row = + statsRepository + .findById(tableUuid) + .map( + existing -> + existing + .toBuilder() + .databaseId(request.getDatabaseId()) + .tableName(request.getTableName()) + .stats(request.getStats()) + .tableProperties(request.getTableProperties()) + .updatedAt(now) + .build()) + .orElse( + TableStatsRow.builder() + .tableUuid(tableUuid) + .databaseId(request.getDatabaseId()) + .tableName(request.getTableName()) + .stats(request.getStats()) + .tableProperties(request.getTableProperties()) + .updatedAt(now) + .build()); + TableStatsDto saved = mapper.toDto(statsRepository.save(row)); + + statsHistoryRepository.save( + TableStatsHistoryRow.builder() + .tableUuid(tableUuid) + .databaseId(request.getDatabaseId()) + .tableName(request.getTableName()) + .stats(request.getStats()) + .recordedAt(now) + .build()); + + return saved; + } + + @Override + public Optional getTableStats(String tableUuid) { + return statsRepository.findById(tableUuid).map(mapper::toDto); + } + + @Override + public List listTableStats(String databaseId, String tableName, String tableUuid) { + return statsRepository.findFiltered(databaseId, tableName, tableUuid).stream() + .map(mapper::toDto) + .collect(Collectors.toList()); + } + + @Override + public List getStatsHistory(String tableUuid, Instant since, int limit) { + PageRequest page = PageRequest.of(0, limit); + if (since != null) { + return statsHistoryRepository.findByTableUuidSince(tableUuid, since, page).stream() + .map(mapper::toDto) + .collect(Collectors.toList()); + } + return statsHistoryRepository.findByTableUuid(tableUuid, page).stream() + .map(mapper::toDto) + .collect(Collectors.toList()); + } + + // --- TableOperationsHistory --- + + @Override + @Transactional + public TableOperationsHistoryDto appendHistory(TableOperationsHistoryDto dto) { + TableOperationsHistoryRow row = + TableOperationsHistoryRow.builder() + .id(dto.getId()) + .tableUuid(dto.getTableUuid()) + .databaseName(dto.getDatabaseName()) + .tableName(dto.getTableName()) + .operationType(dto.getOperationType()) + .submittedAt(dto.getSubmittedAt() != null ? dto.getSubmittedAt() : Instant.now()) + .status(dto.getStatus()) + .jobId(dto.getJobId()) + .result(dto.getResult()) + .build(); + return mapper.toDto(historyRepository.save(row)); + } + + @Override + public List getHistory(String tableUuid, int limit) { + return historyRepository.find(tableUuid, limit).stream() + .map(mapper::toDto) + .collect(Collectors.toList()); + } + + @Override + public List listHistory( + String databaseName, + String tableName, + String tableUuid, + OperationType operationType, + OperationHistoryStatus status, + Instant since, + Instant until, + int limit) { + return historyRepository + .findFiltered( + databaseName, + tableName, + tableUuid, + operationType, + status, + since, + until, + PageRequest.of(0, limit)) + .stream() + .map(mapper::toDto) + .collect(Collectors.toList()); + } +} diff --git a/settings.gradle b/settings.gradle index cad06785e..0d64dad53 100644 --- a/settings.gradle +++ b/settings.gradle @@ -50,6 +50,7 @@ include ':services:common' include ':services:housetables' include ':services:jobs' include ':services:optimizer' +include ':apps:optimizer' include ':services:tables' include ':tables-test-fixtures:tables-test-fixtures-iceberg-1.2' include ':tables-test-fixtures:tables-test-fixtures-iceberg-1.5' From ef3260f9303a692f218f9d72985c42da421da5d3 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Mon, 6 Apr 2026 11:37:07 -0700 Subject: [PATCH 006/104] fix: update service impl to use consolidated find methods Align OptimizerDataServiceImpl with renamed repository methods from optimizer-1 review feedback. Co-Authored-By: Claude Opus 4.6 --- .../service/OptimizerDataServiceImpl.java | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java index dbc5f466b..629853156 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java @@ -46,8 +46,8 @@ public List listTableOperations( String databaseName, String tableName, String tableUuid) { - return operationsRepository - .findFiltered(operationType, status, databaseName, tableName, tableUuid).stream() + return operationsRepository.find(operationType, status, databaseName, tableName, tableUuid) + .stream() .map(mapper::toDto) .collect(Collectors.toList()); } @@ -130,20 +130,14 @@ public Optional getTableStats(String tableUuid) { @Override public List listTableStats(String databaseId, String tableName, String tableUuid) { - return statsRepository.findFiltered(databaseId, tableName, tableUuid).stream() + return statsRepository.find(databaseId, tableName, tableUuid).stream() .map(mapper::toDto) .collect(Collectors.toList()); } @Override public List getStatsHistory(String tableUuid, Instant since, int limit) { - PageRequest page = PageRequest.of(0, limit); - if (since != null) { - return statsHistoryRepository.findByTableUuidSince(tableUuid, since, page).stream() - .map(mapper::toDto) - .collect(Collectors.toList()); - } - return statsHistoryRepository.findByTableUuid(tableUuid, page).stream() + return statsHistoryRepository.find(tableUuid, since, PageRequest.of(0, limit)).stream() .map(mapper::toDto) .collect(Collectors.toList()); } @@ -170,7 +164,8 @@ public TableOperationsHistoryDto appendHistory(TableOperationsHistoryDto dto) { @Override public List getHistory(String tableUuid, int limit) { - return historyRepository.find(tableUuid, limit).stream() + return historyRepository + .find(null, null, tableUuid, null, null, null, null, PageRequest.of(0, limit)).stream() .map(mapper::toDto) .collect(Collectors.toList()); } @@ -186,7 +181,7 @@ public List listHistory( Instant until, int limit) { return historyRepository - .findFiltered( + .find( databaseName, tableName, tableUuid, From ac1da013711ca3ac680bb24e48f3859813f099a2 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Mon, 6 Apr 2026 12:09:53 -0700 Subject: [PATCH 007/104] feat(optimizer): add apps/optimizer shared module with find-only repos Shared JPA entities and repositories for optimizer apps (analyzer, scheduler). All repos expose a single find method with optional filters. Co-Authored-By: Claude Opus 4.6 --- apps/optimizer/build.gradle | 13 +++++ .../entity/TableOperationHistoryRow.java | 37 +++++++++++++ .../optimizer/entity/TableOperationRow.java | 55 +++++++++++++++++++ .../optimizer/entity/TableStatsRow.java | 53 ++++++++++++++++++ .../openhouse/optimizer/model/TableStats.java | 45 +++++++++++++++ .../TableOperationHistoryRepository.java | 32 +++++++++++ .../repository/TableOperationsRepository.java | 29 ++++++++++ .../repository/TableStatsRepository.java | 25 +++++++++ settings.gradle | 1 + 9 files changed, 290 insertions(+) create mode 100644 apps/optimizer/build.gradle create mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationHistoryRow.java create mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationRow.java create mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java create mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java create mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java create mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java create mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java diff --git a/apps/optimizer/build.gradle b/apps/optimizer/build.gradle new file mode 100644 index 000000000..f14969274 --- /dev/null +++ b/apps/optimizer/build.gradle @@ -0,0 +1,13 @@ +plugins { + id 'openhouse.java-minimal-conventions' +} + +// Avoid build-directory collision with services:optimizer (same project.name 'optimizer'). +buildDir = "${rootProject.buildDir}/apps-optimizer" + +dependencies { + implementation 'org.springframework.boot:spring-boot-starter-data-jpa:2.7.8' + implementation 'com.vladmihalcea:hibernate-types-55:2.21.1' + testImplementation 'org.springframework.boot:spring-boot-starter-test:2.7.8' + testRuntimeOnly 'com.h2database:h2' +} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationHistoryRow.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationHistoryRow.java new file mode 100644 index 000000000..4e638e2e1 --- /dev/null +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationHistoryRow.java @@ -0,0 +1,37 @@ +package com.linkedin.openhouse.optimizer.entity; + +import java.time.Instant; +import javax.persistence.Column; +import javax.persistence.Entity; +import javax.persistence.Id; +import javax.persistence.Table; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Getter; +import lombok.NoArgsConstructor; + +/** Lightweight JPA entity for reading {@code table_operations_history} rows. */ +@Entity +@Table(name = "table_operations_history") +@Getter +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class TableOperationHistoryRow { + + @Id + @Column(name = "id", nullable = false, length = 36) + private String id; + + @Column(name = "table_uuid", nullable = false, length = 36) + private String tableUuid; + + @Column(name = "operation_type", nullable = false, length = 50) + private String operationType; + + @Column(name = "submitted_at", nullable = false) + private Instant submittedAt; + + @Column(name = "status", nullable = false, length = 20) + private String status; +} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationRow.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationRow.java new file mode 100644 index 000000000..fc0104604 --- /dev/null +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationRow.java @@ -0,0 +1,55 @@ +package com.linkedin.openhouse.optimizer.entity; + +import java.time.Instant; +import javax.persistence.Column; +import javax.persistence.Entity; +import javax.persistence.Id; +import javax.persistence.Table; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Getter; +import lombok.NoArgsConstructor; +import lombok.Setter; + +/** JPA entity mapping to the {@code table_operations} table in the optimizer DB. */ +@Entity +@Table(name = "table_operations") +@Getter +@Setter +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class TableOperationRow { + + @Id + @Column(name = "id", nullable = false, length = 36) + private String id; + + @Column(name = "table_uuid", nullable = false, length = 36) + private String tableUuid; + + @Column(name = "database_name", nullable = false, length = 255) + private String databaseName; + + @Column(name = "table_name", nullable = false, length = 255) + private String tableName; + + @Column(name = "operation_type", nullable = false, length = 50) + private String operationType; + + @Column(name = "status", nullable = false, length = 20) + private String status; + + @Column(name = "created_at") + private Instant createdAt; + + @Column(name = "scheduled_at") + private Instant scheduledAt; + + @Column(name = "job_id", length = 255) + private String jobId; + + /** Plain version column — not managed by JPA optimistic locking. */ + @Column(name = "version") + private Long version; +} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java new file mode 100644 index 000000000..5cdf16a97 --- /dev/null +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java @@ -0,0 +1,53 @@ +package com.linkedin.openhouse.optimizer.entity; + +import com.linkedin.openhouse.optimizer.model.TableStats; +import com.vladmihalcea.hibernate.type.json.JsonStringType; +import java.time.Instant; +import java.util.Map; +import javax.persistence.Column; +import javax.persistence.Entity; +import javax.persistence.Id; +import javax.persistence.Table; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Getter; +import lombok.NoArgsConstructor; +import lombok.Setter; +import org.hibernate.annotations.Type; +import org.hibernate.annotations.TypeDef; + +/** + * JPA entity for the optimizer {@code table_stats} table. Written by the Tables Service on every + * Iceberg commit; read by the Analyzer and Scheduler directly via JPA. + */ +@TypeDef(name = "json", typeClass = JsonStringType.class) +@Entity +@Table(name = "table_stats") +@Getter +@Setter +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class TableStatsRow { + + @Id + @Column(name = "table_uuid", nullable = false, length = 36) + private String tableUuid; + + @Column(name = "database_id", nullable = false, length = 255) + private String databaseId; + + @Column(name = "table_name", nullable = false, length = 255) + private String tableName; + + @Type(type = "json") + @Column(name = "stats", columnDefinition = "TEXT") + private TableStats stats; + + @Type(type = "json") + @Column(name = "table_properties", columnDefinition = "TEXT") + private Map tableProperties; + + @Column(name = "updated_at", nullable = false) + private Instant updatedAt; +} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java new file mode 100644 index 000000000..5e0f51468 --- /dev/null +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java @@ -0,0 +1,45 @@ +package com.linkedin.openhouse.optimizer.model; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** Combined stats payload stored as a single JSON blob per table in {@code table_stats}. */ +@Data +@Builder(toBuilder = true) +@NoArgsConstructor +@AllArgsConstructor +public class TableStats { + + /** Snapshot fields — overwritten on every upsert. */ + private SnapshotMetrics snapshot; + + /** Delta fields — accumulated across commit events. */ + private CommitDelta delta; + + /** Point-in-time metadata read from Iceberg at scan time. */ + @Data + @Builder(toBuilder = true) + @NoArgsConstructor + @AllArgsConstructor + public static class SnapshotMetrics { + private String clusterId; + private String tableVersion; + private String tableLocation; + private Long tableSizeBytes; + /** Total number of data files as of the latest snapshot — used for bin-packing. */ + private Long numCurrentFiles; + } + + /** Per-commit incremental counters accumulated across all recorded commit events. */ + @Data + @Builder(toBuilder = true) + @NoArgsConstructor + @AllArgsConstructor + public static class CommitDelta { + private Long numFilesAdded; + private Long numFilesDeleted; + private Long deletedSizeBytes; + } +} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java new file mode 100644 index 000000000..f2ea9e3c8 --- /dev/null +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java @@ -0,0 +1,32 @@ +package com.linkedin.openhouse.optimizer.repository; + +import com.linkedin.openhouse.optimizer.entity.TableOperationHistoryRow; +import java.time.Instant; +import java.util.List; +import org.springframework.data.domain.Pageable; +import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.data.jpa.repository.Query; +import org.springframework.data.repository.query.Param; + +/** Repository for reading {@code table_operations_history} in the Analyzer. */ +public interface TableOperationHistoryRepository + extends JpaRepository { + + /** + * Return history rows matching the given filters, ordered by {@code submittedAt} descending. + * Every parameter is optional — pass {@code null} to skip that filter. + */ + @Query( + "SELECT r FROM TableOperationHistoryRow r " + + "WHERE (:operationType IS NULL OR r.operationType = :operationType) " + + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid) " + + "AND (:status IS NULL OR r.status = :status) " + + "AND (:since IS NULL OR r.submittedAt >= :since) " + + "ORDER BY r.submittedAt DESC") + List find( + @Param("operationType") String operationType, + @Param("tableUuid") String tableUuid, + @Param("status") String status, + @Param("since") Instant since, + Pageable pageable); +} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java new file mode 100644 index 000000000..27424dfdc --- /dev/null +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java @@ -0,0 +1,29 @@ +package com.linkedin.openhouse.optimizer.repository; + +import com.linkedin.openhouse.optimizer.entity.TableOperationRow; +import java.util.List; +import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.data.jpa.repository.Query; +import org.springframework.data.repository.query.Param; + +/** Spring Data JPA repository for {@code table_operations} rows in the optimizer DB. */ +public interface TableOperationsRepository extends JpaRepository { + + /** + * Return operations matching the given filters. Every parameter is optional — pass {@code null} + * to skip that filter. + */ + @Query( + "SELECT r FROM TableOperationRow r " + + "WHERE (:operationType IS NULL OR r.operationType = :operationType) " + + "AND (:status IS NULL OR r.status = :status) " + + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid) " + + "AND (:databaseName IS NULL OR r.databaseName = :databaseName) " + + "AND (:tableName IS NULL OR r.tableName = :tableName)") + List find( + @Param("operationType") String operationType, + @Param("status") String status, + @Param("tableUuid") String tableUuid, + @Param("databaseName") String databaseName, + @Param("tableName") String tableName); +} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java new file mode 100644 index 000000000..6effe19c2 --- /dev/null +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java @@ -0,0 +1,25 @@ +package com.linkedin.openhouse.optimizer.repository; + +import com.linkedin.openhouse.optimizer.entity.TableStatsRow; +import java.util.List; +import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.data.jpa.repository.Query; +import org.springframework.data.repository.query.Param; + +/** Spring Data JPA repository for {@code table_stats} rows in the optimizer DB. */ +public interface TableStatsRepository extends JpaRepository { + + /** + * Return stats rows matching the given filters. Every parameter is optional — pass {@code null} + * to skip that filter. + */ + @Query( + "SELECT r FROM TableStatsRow r " + + "WHERE (:databaseId IS NULL OR r.databaseId = :databaseId) " + + "AND (:tableName IS NULL OR r.tableName = :tableName) " + + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid)") + List find( + @Param("databaseId") String databaseId, + @Param("tableName") String tableName, + @Param("tableUuid") String tableUuid); +} diff --git a/settings.gradle b/settings.gradle index cad06785e..0d64dad53 100644 --- a/settings.gradle +++ b/settings.gradle @@ -50,6 +50,7 @@ include ':services:common' include ':services:housetables' include ':services:jobs' include ':services:optimizer' +include ':apps:optimizer' include ':services:tables' include ':tables-test-fixtures:tables-test-fixtures-iceberg-1.2' include ':tables-test-fixtures:tables-test-fixtures-iceberg-1.5' From 02a5ab31c62a0847e665f674b1fb3e8684bb3433 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Mon, 6 Apr 2026 12:19:37 -0700 Subject: [PATCH 008/104] fix: remove orphan fields from CompleteOperationRequest MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These fields never belonged in the data model — remove them at the source rather than adding then deleting in a later PR. Co-Authored-By: Claude Opus 4.6 --- .../optimizer/api/model/CompleteOperationRequest.java | 6 ------ 1 file changed, 6 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java index c26893197..35f7ba782 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java @@ -22,10 +22,4 @@ public class CompleteOperationRequest { /** Error details on failure; {@code null} on success. */ private JobResult result; - - /** Number of orphan files deleted; set by OFD Spark app on success. */ - private Integer orphanFilesDeleted; - - /** Bytes reclaimed by orphan file deletion; set by OFD Spark app on success. */ - private Long orphanBytesDeleted; } From 01466c70cd4f7ad4f56db31897e23f681512a31a Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Mon, 6 Apr 2026 12:34:29 -0700 Subject: [PATCH 009/104] feat(optimizer): add service-layer integration tests H2 integration tests for OptimizerDataServiceImpl covering completeOperation (write history, not-found) and upsertTableStats (create, update, history append). Co-Authored-By: Claude Opus 4.6 --- .../service/OptimizerDataServiceImplTest.java | 159 ++++++++++++++++++ 1 file changed, 159 insertions(+) create mode 100644 services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java new file mode 100644 index 000000000..6e3194018 --- /dev/null +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java @@ -0,0 +1,159 @@ +package com.linkedin.openhouse.optimizer.service; + +import static org.assertj.core.api.Assertions.assertThat; + +import com.linkedin.openhouse.optimizer.api.model.CompleteOperationRequest; +import com.linkedin.openhouse.optimizer.api.model.JobResult; +import com.linkedin.openhouse.optimizer.api.model.OperationHistoryStatus; +import com.linkedin.openhouse.optimizer.api.model.OperationStatus; +import com.linkedin.openhouse.optimizer.api.model.OperationType; +import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; +import com.linkedin.openhouse.optimizer.api.model.TableStats; +import com.linkedin.openhouse.optimizer.api.model.TableStatsDto; +import com.linkedin.openhouse.optimizer.api.model.UpsertTableStatsRequest; +import com.linkedin.openhouse.optimizer.entity.TableOperationsRow; +import com.linkedin.openhouse.optimizer.repository.TableOperationsRepository; +import com.linkedin.openhouse.optimizer.repository.TableStatsHistoryRepository; +import com.linkedin.openhouse.optimizer.repository.TableStatsRepository; +import java.time.Instant; +import java.util.Map; +import java.util.Optional; +import java.util.UUID; +import org.junit.jupiter.api.Test; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.data.domain.PageRequest; +import org.springframework.test.context.ActiveProfiles; +import org.springframework.transaction.annotation.Transactional; + +@SpringBootTest +@ActiveProfiles("test") +@Transactional +class OptimizerDataServiceImplTest { + + @Autowired OptimizerDataService service; + @Autowired TableOperationsRepository operationsRepository; + @Autowired TableStatsRepository statsRepository; + @Autowired TableStatsHistoryRepository statsHistoryRepository; + + // --- completeOperation --- + + @Test + void completeOperation_writesHistoryFromOperationRow() { + String id = UUID.randomUUID().toString(); + String tableUuid = UUID.randomUUID().toString(); + operationsRepository.save( + TableOperationsRow.builder() + .id(id) + .tableUuid(tableUuid) + .databaseName("db1") + .tableName("tbl1") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(OperationStatus.SCHEDULED) + .createdAt(Instant.now()) + .scheduledAt(Instant.now()) + .jobId("spark-job-123") + .build()); + + Optional result = + service.completeOperation( + id, CompleteOperationRequest.builder().status(OperationHistoryStatus.SUCCESS).build()); + + assertThat(result).isPresent(); + assertThat(result.get().getStatus()).isEqualTo(OperationHistoryStatus.SUCCESS); + assertThat(result.get().getTableUuid()).isEqualTo(tableUuid); + assertThat(result.get().getJobId()).isEqualTo("spark-job-123"); + assertThat(result.get().getOperationType()).isEqualTo(OperationType.ORPHAN_FILES_DELETION); + assertThat(result.get().getDatabaseName()).isEqualTo("db1"); + assertThat(result.get().getSubmittedAt()).isNotNull(); + } + + @Test + void completeOperation_notFound_returnsEmpty() { + Optional result = + service.completeOperation( + UUID.randomUUID().toString(), + CompleteOperationRequest.builder() + .status(OperationHistoryStatus.FAILED) + .result( + JobResult.builder().errorMessage("boom").errorType("RuntimeException").build()) + .build()); + + assertThat(result).isEmpty(); + } + + // --- upsertTableStats --- + + @Test + void upsertTableStats_createsNewRow() { + String tableUuid = UUID.randomUUID().toString(); + TableStats stats = + TableStats.builder() + .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(1024L).build()) + .build(); + + TableStatsDto dto = + service.upsertTableStats( + tableUuid, + UpsertTableStatsRequest.builder() + .databaseId("db1") + .tableName("tbl1") + .stats(stats) + .tableProperties(Map.of("maintenance.optimizer.ofd.enabled", "true")) + .build()); + + assertThat(dto.getTableUuid()).isEqualTo(tableUuid); + assertThat(dto.getDatabaseId()).isEqualTo("db1"); + assertThat(dto.getStats().getSnapshot().getTableSizeBytes()).isEqualTo(1024L); + assertThat(dto.getTableProperties()).containsEntry("maintenance.optimizer.ofd.enabled", "true"); + assertThat(statsRepository.findById(tableUuid)).isPresent(); + } + + @Test + void upsertTableStats_updatesExistingRow() { + String tableUuid = UUID.randomUUID().toString(); + UpsertTableStatsRequest first = + UpsertTableStatsRequest.builder() + .databaseId("db1") + .tableName("tbl1") + .stats( + TableStats.builder() + .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(100L).build()) + .build()) + .build(); + UpsertTableStatsRequest second = + UpsertTableStatsRequest.builder() + .databaseId("db1") + .tableName("tbl1") + .stats( + TableStats.builder() + .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(200L).build()) + .build()) + .build(); + + service.upsertTableStats(tableUuid, first); + TableStatsDto dto = service.upsertTableStats(tableUuid, second); + + assertThat(dto.getStats().getSnapshot().getTableSizeBytes()).isEqualTo(200L); + assertThat(statsRepository.findAll()).hasSize(1); + } + + @Test + void upsertTableStats_appendsHistoryOnEveryCall() { + String tableUuid = UUID.randomUUID().toString(); + UpsertTableStatsRequest request = + UpsertTableStatsRequest.builder() + .databaseId("db1") + .tableName("tbl1") + .stats( + TableStats.builder() + .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(100L).build()) + .build()) + .build(); + + service.upsertTableStats(tableUuid, request); + service.upsertTableStats(tableUuid, request); + + assertThat(statsHistoryRepository.find(tableUuid, null, PageRequest.of(0, 100))).hasSize(2); + } +} From ff07fde3cbfc8dd0cb2c2fde49748dc84ee6734c Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Mon, 6 Apr 2026 12:43:44 -0700 Subject: [PATCH 010/104] fix: assert stats history delta values in upsert test Strengthen upsertTableStats test to verify history rows contain the raw delta stats from each call, not just the row count. Co-Authored-By: Claude Opus 4.6 --- .../service/OptimizerDataServiceImplTest.java | 69 +++++++++---------- 1 file changed, 33 insertions(+), 36 deletions(-) diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java index 6e3194018..244acb204 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java @@ -12,10 +12,12 @@ import com.linkedin.openhouse.optimizer.api.model.TableStatsDto; import com.linkedin.openhouse.optimizer.api.model.UpsertTableStatsRequest; import com.linkedin.openhouse.optimizer.entity.TableOperationsRow; +import com.linkedin.openhouse.optimizer.entity.TableStatsHistoryRow; import com.linkedin.openhouse.optimizer.repository.TableOperationsRepository; import com.linkedin.openhouse.optimizer.repository.TableStatsHistoryRepository; import com.linkedin.openhouse.optimizer.repository.TableStatsRepository; import java.time.Instant; +import java.util.List; import java.util.Map; import java.util.Optional; import java.util.UUID; @@ -110,50 +112,45 @@ void upsertTableStats_createsNewRow() { } @Test - void upsertTableStats_updatesExistingRow() { + void upsertTableStats_updatesExistingRow_andAppendsHistory() { String tableUuid = UUID.randomUUID().toString(); - UpsertTableStatsRequest first = - UpsertTableStatsRequest.builder() - .databaseId("db1") - .tableName("tbl1") - .stats( - TableStats.builder() - .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(100L).build()) - .build()) + TableStats firstStats = + TableStats.builder() + .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(100L).build()) + .delta(TableStats.CommitDelta.builder().numFilesAdded(5L).numFilesDeleted(1L).build()) .build(); - UpsertTableStatsRequest second = - UpsertTableStatsRequest.builder() - .databaseId("db1") - .tableName("tbl1") - .stats( - TableStats.builder() - .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(200L).build()) - .build()) + TableStats secondStats = + TableStats.builder() + .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(200L).build()) + .delta(TableStats.CommitDelta.builder().numFilesAdded(3L).numFilesDeleted(0L).build()) .build(); - service.upsertTableStats(tableUuid, first); - TableStatsDto dto = service.upsertTableStats(tableUuid, second); - - assertThat(dto.getStats().getSnapshot().getTableSizeBytes()).isEqualTo(200L); - assertThat(statsRepository.findAll()).hasSize(1); - } - - @Test - void upsertTableStats_appendsHistoryOnEveryCall() { - String tableUuid = UUID.randomUUID().toString(); - UpsertTableStatsRequest request = + service.upsertTableStats( + tableUuid, UpsertTableStatsRequest.builder() .databaseId("db1") .tableName("tbl1") - .stats( - TableStats.builder() - .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(100L).build()) - .build()) - .build(); + .stats(firstStats) + .build()); + TableStatsDto dto = + service.upsertTableStats( + tableUuid, + UpsertTableStatsRequest.builder() + .databaseId("db1") + .tableName("tbl1") + .stats(secondStats) + .build()); - service.upsertTableStats(tableUuid, request); - service.upsertTableStats(tableUuid, request); + // Current row reflects the latest upsert + assertThat(dto.getStats().getSnapshot().getTableSizeBytes()).isEqualTo(200L); + assertThat(statsRepository.findAll()).hasSize(1); - assertThat(statsHistoryRepository.find(tableUuid, null, PageRequest.of(0, 100))).hasSize(2); + // History has one row per upsert with the raw delta from each call + List history = + statsHistoryRepository.find(tableUuid, null, PageRequest.of(0, 100)); + assertThat(history).hasSize(2); + // Newest first + assertThat(history.get(0).getStats().getDelta().getNumFilesAdded()).isEqualTo(3L); + assertThat(history.get(1).getStats().getDelta().getNumFilesAdded()).isEqualTo(5L); } } From c0802cb67b8bd245d872ad31683d092a8f1a3f95 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Tue, 7 Apr 2026 09:29:32 -0700 Subject: [PATCH 011/104] =?UTF-8?q?feat(optimizer):=20add=20analyzer=20app?= =?UTF-8?q?=20=E2=80=94=20continuous=20table=20operation=20scheduling?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduces apps/optimizer-analyzer, a Spring Boot CommandLineRunner that evaluates every table in table_stats against pluggable OperationAnalyzer strategies. The first strategy, OrphanFilesDeletionAnalyzer, schedules OFD operations with 24h success / 1h failure retry cadence, a 6h SCHEDULED timeout, and a 5-strike circuit breaker. Key design choices: - Bulk-loads operations and history into maps (one query per type), then iterates the stats list — O(types) queries, not O(tables). - Uses the existing generic find() repository methods with null params. - Pure unit tests with Mockito — no Spring context needed. Co-Authored-By: Claude Opus 4.6 --- apps/optimizer-analyzer/build.gradle | 20 ++ .../analyzer/AnalyzerApplication.java | 25 ++ .../openhouse/analyzer/AnalyzerRunner.java | 185 ++++++++++++ .../openhouse/analyzer/CadencePolicy.java | 74 +++++ .../openhouse/analyzer/OperationAnalyzer.java | 43 +++ .../analyzer/OrphanFilesDeletionAnalyzer.java | 55 ++++ .../analyzer/config/AnalyzerConfig.java | 7 + .../analyzer/model/TableOperationRecord.java | 23 ++ .../analyzer/model/TableSummary.java | 26 ++ .../src/main/resources/application.properties | 9 + .../analyzer/AnalyzerRunnerTest.java | 270 ++++++++++++++++++ .../OrphanFilesDeletionAnalyzerTest.java | 242 ++++++++++++++++ settings.gradle | 1 + 13 files changed, 980 insertions(+) create mode 100644 apps/optimizer-analyzer/build.gradle create mode 100644 apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerApplication.java create mode 100644 apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java create mode 100644 apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadencePolicy.java create mode 100644 apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OperationAnalyzer.java create mode 100644 apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OrphanFilesDeletionAnalyzer.java create mode 100644 apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/config/AnalyzerConfig.java create mode 100644 apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/TableOperationRecord.java create mode 100644 apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/TableSummary.java create mode 100644 apps/optimizer-analyzer/src/main/resources/application.properties create mode 100644 apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java create mode 100644 apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/OrphanFilesDeletionAnalyzerTest.java diff --git a/apps/optimizer-analyzer/build.gradle b/apps/optimizer-analyzer/build.gradle new file mode 100644 index 000000000..af4def95a --- /dev/null +++ b/apps/optimizer-analyzer/build.gradle @@ -0,0 +1,20 @@ +plugins { + id 'openhouse.springboot-ext-conventions' + id 'org.springframework.boot' version '2.7.8' +} + +dependencies { + implementation project(':apps:optimizer') + implementation 'org.springframework.boot:spring-boot-starter:2.7.8' + implementation 'org.springframework.boot:spring-boot-starter-webflux:2.7.8' + implementation 'org.springframework.boot:spring-boot-starter-data-jpa:2.7.8' + implementation 'org.springframework.boot:spring-boot-starter-aop:2.7.8' + runtimeOnly 'mysql:mysql-connector-java:8.0.33' + testImplementation 'org.springframework.boot:spring-boot-starter-test:2.7.8' + testImplementation 'com.squareup.okhttp3:mockwebserver:4.10.0' + testRuntimeOnly 'com.h2database:h2' +} + +test { + useJUnitPlatform() +} diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerApplication.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerApplication.java new file mode 100644 index 000000000..99ba56047 --- /dev/null +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerApplication.java @@ -0,0 +1,25 @@ +package com.linkedin.openhouse.analyzer; + +import org.springframework.boot.CommandLineRunner; +import org.springframework.boot.SpringApplication; +import org.springframework.boot.autoconfigure.SpringBootApplication; +import org.springframework.boot.autoconfigure.domain.EntityScan; +import org.springframework.context.annotation.Bean; +import org.springframework.data.jpa.repository.config.EnableJpaRepositories; + +/** Entry point for the Optimizer Analyzer application. */ +@SpringBootApplication +@EntityScan(basePackages = "com.linkedin.openhouse.optimizer.entity") +@EnableJpaRepositories(basePackages = "com.linkedin.openhouse.optimizer.repository") +public class AnalyzerApplication { + + public static void main(String[] args) { + SpringApplication.run(AnalyzerApplication.class, args); + } + + /** Delegates to {@link AnalyzerRunner#analyze()} once per process invocation. */ + @Bean + public CommandLineRunner run(AnalyzerRunner runner) { + return args -> runner.analyze(); + } +} diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java new file mode 100644 index 000000000..5ad568d49 --- /dev/null +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java @@ -0,0 +1,185 @@ +package com.linkedin.openhouse.analyzer; + +import com.linkedin.openhouse.analyzer.model.TableOperationRecord; +import com.linkedin.openhouse.analyzer.model.TableSummary; +import com.linkedin.openhouse.optimizer.entity.TableOperationHistoryRow; +import com.linkedin.openhouse.optimizer.entity.TableOperationRow; +import com.linkedin.openhouse.optimizer.entity.TableStatsRow; +import com.linkedin.openhouse.optimizer.repository.TableOperationHistoryRepository; +import com.linkedin.openhouse.optimizer.repository.TableOperationsRepository; +import com.linkedin.openhouse.optimizer.repository.TableStatsRepository; +import java.time.Instant; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.UUID; +import java.util.stream.Collectors; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.springframework.data.domain.Pageable; +import org.springframework.stereotype.Component; + +/** + * Core analysis loop. Loads all {@code table_stats} rows and evaluates each table against every + * registered {@link OperationAnalyzer} in a single pass. + * + *

The two sides of the join — current operations and circuit-breaker history — are loaded into + * memory once per run before the table loop. Both are naturally bounded (only tables with active or + * recently failed operations have rows), so holding them in maps is safe at any table scale. + */ +@Slf4j +@Component +@RequiredArgsConstructor +public class AnalyzerRunner { + + private final List analyzers; + private final TableStatsRepository statsRepo; + private final TableOperationsRepository operationsRepo; + private final TableOperationHistoryRepository historyRepo; + + /** Run the full analysis loop once. */ + public void analyze() { + // Pre-load the small sides of the joins — one query per analyzer type. + Map> opsByType = + analyzers.stream() + .collect( + Collectors.toMap( + OperationAnalyzer::getOperationType, a -> loadOpsMap(a.getOperationType()))); + + Map>> historyByType = + analyzers.stream() + .collect( + Collectors.toMap( + OperationAnalyzer::getOperationType, + a -> loadHistoryMap(a.getOperationType()))); + + List tableList = + statsRepo.find(null, null, null).stream() + .filter(row -> row.getTableUuid() != null) + .collect(Collectors.toList()); + log.info("Found {} tables in optimizer table_stats", tableList.size()); + + tableList.forEach( + row -> { + TableSummary table = toSummary(row); + analyzers.forEach( + analyzer -> { + String type = analyzer.getOperationType(); + Optional currentOp = + Optional.ofNullable(opsByType.get(type).get(row.getTableUuid())); + List history = + historyByType + .get(type) + .getOrDefault(row.getTableUuid(), Collections.emptyList()); + + Optional latestHistory = history.stream().findFirst(); + + if (analyzer.isEnabled(table) + && analyzer.shouldSchedule(table, currentOp, latestHistory) + && !isCircuitBroken(analyzer, row.getTableUuid(), history)) { + operationsRepo.save(buildOperation(row, type)); + log.info( + "Created PENDING {} operation for table {}.{}", + type, + row.getDatabaseId(), + row.getTableName()); + } + }); + }); + + log.info("Analysis complete"); + } + + /** + * Loads the most recent operation record per table for the given type. Deduplicates by keeping + * the newer row when a table has more than one active record. + */ + private Map loadOpsMap(String operationType) { + Map map = + operationsRepo.find(operationType, null, null, null, null).stream() + .filter(e -> e.getTableUuid() != null) + .collect( + Collectors.toMap( + TableOperationRow::getTableUuid, + AnalyzerRunner::toRecord, + (a, b) -> mostRecent(a, b))); + log.info("Analyzer {} found {} tables with operation history", operationType, map.size()); + return map; + } + + /** + * Loads all history rows for the given type and groups them by {@code tableUuid}, newest first. + * Called once per analyzer type to eliminate per-table N+1 queries in the circuit breaker check. + */ + private Map> loadHistoryMap(String operationType) { + return historyRepo.find(operationType, null, null, null, Pageable.unpaged()).stream() + .collect(Collectors.groupingBy(TableOperationHistoryRow::getTableUuid)); + } + + private TableOperationRow buildOperation(TableStatsRow row, String operationType) { + return TableOperationRow.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(row.getTableUuid()) + .databaseName(row.getDatabaseId()) + .tableName(row.getTableName()) + .operationType(operationType) + .status("PENDING") + .createdAt(Instant.now()) + .version(0L) + .build(); + } + + private TableSummary toSummary(TableStatsRow e) { + return TableSummary.builder() + .tableUuid(e.getTableUuid()) + .databaseId(e.getDatabaseId()) + .tableId(e.getTableName()) + .tableProperties( + e.getTableProperties() != null ? e.getTableProperties() : Collections.emptyMap()) + .stats(e.getStats()) + .build(); + } + + /** + * Returns {@code true} if the circuit breaker has tripped. Uses the pre-loaded history list + * instead of querying the DB per table. + */ + private boolean isCircuitBroken( + OperationAnalyzer analyzer, String tableUuid, List history) { + int threshold = analyzer.getCircuitBreakerThreshold(); + if (threshold <= 0 || history.size() < threshold) { + return false; + } + boolean allFailed = + history.stream().limit(threshold).allMatch(r -> "FAILED".equals(r.getStatus())); + if (allFailed) { + log.warn( + "Circuit breaker tripped for table {} operation {}: last {} attempts all FAILED", + tableUuid, + analyzer.getOperationType(), + threshold); + } + return allFailed; + } + + private static TableOperationRecord mostRecent(TableOperationRecord a, TableOperationRecord b) { + Comparator byCreatedAt = + Comparator.comparing(r -> r.getCreatedAt() != null ? r.getCreatedAt() : Instant.EPOCH); + return byCreatedAt.compare(a, b) >= 0 ? a : b; + } + + private static TableOperationRecord toRecord(TableOperationRow e) { + TableOperationRecord r = new TableOperationRecord(); + r.setId(e.getId()); + r.setTableUuid(e.getTableUuid()); + r.setDatabaseName(e.getDatabaseName()); + r.setTableName(e.getTableName()); + r.setOperationType(e.getOperationType()); + r.setStatus(e.getStatus()); + r.setCreatedAt(e.getCreatedAt()); + r.setScheduledAt(e.getScheduledAt()); + return r; + } +} diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadencePolicy.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadencePolicy.java new file mode 100644 index 000000000..36d9ff841 --- /dev/null +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadencePolicy.java @@ -0,0 +1,74 @@ +package com.linkedin.openhouse.analyzer; + +import com.linkedin.openhouse.analyzer.model.TableOperationRecord; +import com.linkedin.openhouse.optimizer.entity.TableOperationHistoryRow; +import java.time.Duration; +import java.time.Instant; +import java.util.Optional; +import lombok.RequiredArgsConstructor; + +/** + * Encapsulates the time-based scheduling logic shared across operation types. An analyzer delegates + * to {@link CadencePolicy} to decide whether to re-issue a recommendation for a table that already + * has an active operation record and/or history. + * + *

The SCHEDULED timeout is a key safety mechanism: if a Spark job crashes without reporting + * back, the SCHEDULED row would otherwise block the table forever. When the row has been SCHEDULED + * (or SCHEDULING) longer than {@code scheduledTimeout}, the Analyzer treats it as stale and returns + * {@code true}, causing a new PENDING row to be inserted. + */ +@RequiredArgsConstructor +public class CadencePolicy { + + private final Duration successRetryInterval; + private final Duration failureRetryInterval; + private final Duration scheduledTimeout; + + /** + * Returns {@code true} if a new or refreshed operation record should be upserted. + * + * @param currentOp the existing active operation record, or empty if none exists + * @param latestHistory the most recent history entry for this (table, type), or empty + */ + public boolean shouldSchedule( + Optional currentOp, Optional latestHistory) { + if (currentOp.isEmpty()) { + return decideFromHistory(latestHistory); + } + TableOperationRecord op = currentOp.get(); + switch (op.getStatus()) { + case "PENDING": + case "SCHEDULING": + return false; + case "SCHEDULED": + if (latestHistory.isEmpty()) { + return pastInterval(op.getScheduledAt(), scheduledTimeout); + } + return decideFromHistoryEntry(latestHistory.get()); + default: + return true; + } + } + + private boolean decideFromHistory(Optional latestHistory) { + if (latestHistory.isEmpty()) { + return true; + } + return decideFromHistoryEntry(latestHistory.get()); + } + + private boolean decideFromHistoryEntry(TableOperationHistoryRow entry) { + switch (entry.getStatus()) { + case "SUCCESS": + return pastInterval(entry.getSubmittedAt(), successRetryInterval); + case "FAILED": + return pastInterval(entry.getSubmittedAt(), failureRetryInterval); + default: + return true; + } + } + + private boolean pastInterval(Instant timestamp, Duration interval) { + return timestamp == null || Duration.between(timestamp, Instant.now()).compareTo(interval) > 0; + } +} diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OperationAnalyzer.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OperationAnalyzer.java new file mode 100644 index 000000000..425fbdbfb --- /dev/null +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OperationAnalyzer.java @@ -0,0 +1,43 @@ +package com.linkedin.openhouse.analyzer; + +import com.linkedin.openhouse.analyzer.model.TableOperationRecord; +import com.linkedin.openhouse.analyzer.model.TableSummary; +import com.linkedin.openhouse.optimizer.entity.TableOperationHistoryRow; +import java.util.Optional; + +/** + * Strategy interface for a single operation type. Each implementation decides whether a given table + * needs an operation recommendation upserted in the Optimizer Service. + */ +public interface OperationAnalyzer { + + /** The operation type this analyzer handles (e.g., {@code "ORPHAN_FILES_DELETION"}). */ + String getOperationType(); + + /** + * Returns {@code true} if this operation is opted-in for the given table. Tables that return + * {@code false} are skipped entirely — no upsert is issued. + */ + boolean isEnabled(TableSummary table); + + /** + * Returns {@code true} if a new or refreshed operation record should be upserted. + * + * @param table the table entry + * @param currentOp the existing active operation record, or empty if none exists + * @param latestHistory the most recent history entry for this (table, type), or empty + */ + boolean shouldSchedule( + TableSummary table, + Optional currentOp, + Optional latestHistory); + + /** + * Maximum number of consecutive FAILED history entries before the circuit breaker trips and + * scheduling is suppressed for this (table, operation_type). Override per operation type. Returns + * 0 to disable the circuit breaker. + */ + default int getCircuitBreakerThreshold() { + return 5; + } +} diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OrphanFilesDeletionAnalyzer.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OrphanFilesDeletionAnalyzer.java new file mode 100644 index 000000000..016057aa4 --- /dev/null +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OrphanFilesDeletionAnalyzer.java @@ -0,0 +1,55 @@ +package com.linkedin.openhouse.analyzer; + +import com.linkedin.openhouse.analyzer.model.TableOperationRecord; +import com.linkedin.openhouse.analyzer.model.TableSummary; +import com.linkedin.openhouse.optimizer.entity.TableOperationHistoryRow; +import java.time.Duration; +import java.util.Optional; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.stereotype.Component; + +/** Analyzer for the {@code ORPHAN_FILES_DELETION} operation type. */ +@Component +public class OrphanFilesDeletionAnalyzer implements OperationAnalyzer { + + static final String OPERATION_TYPE = "ORPHAN_FILES_DELETION"; + static final String OFD_ENABLED_PROPERTY = "maintenance.optimizer.ofd.enabled"; + + private final CadencePolicy cadencePolicy; + + @Autowired + public OrphanFilesDeletionAnalyzer( + @Value("${ofd.success-retry-hours:24}") long successRetryHours, + @Value("${ofd.failure-retry-hours:1}") long failureRetryHours, + @Value("${ofd.scheduled-timeout-hours:6}") long scheduledTimeoutHours) { + this.cadencePolicy = + new CadencePolicy( + Duration.ofHours(successRetryHours), + Duration.ofHours(failureRetryHours), + Duration.ofHours(scheduledTimeoutHours)); + } + + /** Package-private for tests that supply a pre-built {@link CadencePolicy}. */ + OrphanFilesDeletionAnalyzer(CadencePolicy cadencePolicy) { + this.cadencePolicy = cadencePolicy; + } + + @Override + public String getOperationType() { + return OPERATION_TYPE; + } + + @Override + public boolean isEnabled(TableSummary table) { + return "true".equals(table.getTableProperties().get(OFD_ENABLED_PROPERTY)); + } + + @Override + public boolean shouldSchedule( + TableSummary table, + Optional currentOp, + Optional latestHistory) { + return cadencePolicy.shouldSchedule(currentOp, latestHistory); + } +} diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/config/AnalyzerConfig.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/config/AnalyzerConfig.java new file mode 100644 index 000000000..30ad9f55b --- /dev/null +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/config/AnalyzerConfig.java @@ -0,0 +1,7 @@ +package com.linkedin.openhouse.analyzer.config; + +import org.springframework.context.annotation.Configuration; + +/** Spring configuration for the Analyzer. */ +@Configuration +public class AnalyzerConfig {} diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/TableOperationRecord.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/TableOperationRecord.java new file mode 100644 index 000000000..51bc4d803 --- /dev/null +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/TableOperationRecord.java @@ -0,0 +1,23 @@ +package com.linkedin.openhouse.analyzer.model; + +import java.time.Instant; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** + * Lightweight representation of an active table operation record. Mirrors the fields in {@link + * com.linkedin.openhouse.optimizer.entity.TableOperationRow} that the Analyzer needs. + */ +@Data +@NoArgsConstructor +public class TableOperationRecord { + + private String id; + private String tableUuid; + private String databaseName; + private String tableName; + private String operationType; + private String status; + private Instant createdAt; + private Instant scheduledAt; +} diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/TableSummary.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/TableSummary.java new file mode 100644 index 000000000..fbe166fff --- /dev/null +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/TableSummary.java @@ -0,0 +1,26 @@ +package com.linkedin.openhouse.analyzer.model; + +import com.linkedin.openhouse.optimizer.model.TableStats; +import java.util.Collections; +import java.util.Map; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** Internal representation of a table, decoupled from any external API response model. */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class TableSummary { + + private String tableUuid; + private String databaseId; + private String tableId; + + @Builder.Default private Map tableProperties = Collections.emptyMap(); + + /** Commit stats from the optimizer {@code table_stats} table. Null if no stats recorded yet. */ + private TableStats stats; +} diff --git a/apps/optimizer-analyzer/src/main/resources/application.properties b/apps/optimizer-analyzer/src/main/resources/application.properties new file mode 100644 index 000000000..990740f1d --- /dev/null +++ b/apps/optimizer-analyzer/src/main/resources/application.properties @@ -0,0 +1,9 @@ +spring.application.name=openhouse-optimizer-analyzer +spring.main.web-application-type=none +spring.datasource.url=${OPTIMIZER_DB_URL:jdbc:h2:mem:analyzerdb;DB_CLOSE_DELAY=-1;MODE=MySQL} +spring.datasource.username=${OPTIMIZER_DB_USER:sa} +spring.datasource.password=${OPTIMIZER_DB_PASSWORD:} +spring.jpa.hibernate.ddl-auto=none +ofd.success-retry-hours=24 +ofd.failure-retry-hours=1 +ofd.scheduled-timeout-hours=6 diff --git a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java new file mode 100644 index 000000000..69de877a2 --- /dev/null +++ b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java @@ -0,0 +1,270 @@ +package com.linkedin.openhouse.analyzer; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import com.linkedin.openhouse.analyzer.model.TableOperationRecord; +import com.linkedin.openhouse.analyzer.model.TableSummary; +import com.linkedin.openhouse.optimizer.entity.TableOperationHistoryRow; +import com.linkedin.openhouse.optimizer.entity.TableOperationRow; +import com.linkedin.openhouse.optimizer.entity.TableStatsRow; +import com.linkedin.openhouse.optimizer.repository.TableOperationHistoryRepository; +import com.linkedin.openhouse.optimizer.repository.TableOperationsRepository; +import com.linkedin.openhouse.optimizer.repository.TableStatsRepository; +import java.time.Instant; +import java.util.Collections; +import java.util.List; +import java.util.Optional; +import java.util.stream.Collectors; +import java.util.stream.IntStream; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.ArgumentCaptor; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; +import org.springframework.data.domain.Pageable; + +@ExtendWith(MockitoExtension.class) +class AnalyzerRunnerTest { + + @Mock private TableStatsRepository statsRepo; + @Mock private TableOperationsRepository operationsRepo; + @Mock private TableOperationHistoryRepository historyRepo; + @Mock private OperationAnalyzer analyzer; + + private AnalyzerRunner runner; + + @BeforeEach + void setUp() { + runner = new AnalyzerRunner(List.of(analyzer), statsRepo, operationsRepo, historyRepo); + } + + @Test + void analyze_insertsNewRow_forEligibleTableWithNoExistingOp() { + TableStatsRow statsEntity = new TableStatsRow(); + statsEntity.setTableUuid("uuid-1"); + statsEntity.setDatabaseId("db1"); + statsEntity.setTableName("tbl1"); + + TableSummary expectedTable = + TableSummary.builder().tableUuid("uuid-1").databaseId("db1").tableId("tbl1").build(); + + when(statsRepo.find(null, null, null)).thenReturn(List.of(statsEntity)); + when(analyzer.getOperationType()).thenReturn("ORPHAN_FILES_DELETION"); + when(analyzer.getCircuitBreakerThreshold()).thenReturn(5); + when(operationsRepo.find("ORPHAN_FILES_DELETION", null, null, null, null)) + .thenReturn(Collections.emptyList()); + when(historyRepo.find("ORPHAN_FILES_DELETION", null, null, null, Pageable.unpaged())) + .thenReturn(Collections.emptyList()); + when(analyzer.isEnabled(expectedTable)).thenReturn(true); + when(analyzer.shouldSchedule(expectedTable, Optional.empty(), Optional.empty())) + .thenReturn(true); + + runner.analyze(); + + ArgumentCaptor captor = ArgumentCaptor.forClass(TableOperationRow.class); + verify(operationsRepo).save(captor.capture()); + TableOperationRow saved = captor.getValue(); + assertThat(saved.getTableUuid()).isEqualTo("uuid-1"); + assertThat(saved.getDatabaseName()).isEqualTo("db1"); + assertThat(saved.getTableName()).isEqualTo("tbl1"); + assertThat(saved.getOperationType()).isEqualTo("ORPHAN_FILES_DELETION"); + assertThat(saved.getStatus()).isEqualTo("PENDING"); + assertThat(saved.getId()).isNotNull(); + } + + @Test + void analyze_noOp_whenCadencePolicyReturnsFalseForPending() { + TableStatsRow statsEntity = new TableStatsRow(); + statsEntity.setTableUuid("uuid-1"); + statsEntity.setDatabaseId("db1"); + statsEntity.setTableName("tbl1"); + + TableSummary expectedTable = + TableSummary.builder().tableUuid("uuid-1").databaseId("db1").tableId("tbl1").build(); + + TableOperationRow existingEntity = new TableOperationRow(); + existingEntity.setId("existing-op-id"); + existingEntity.setStatus("PENDING"); + existingEntity.setTableUuid("uuid-1"); + existingEntity.setOperationType("ORPHAN_FILES_DELETION"); + existingEntity.setCreatedAt(Instant.now()); + + when(statsRepo.find(null, null, null)).thenReturn(List.of(statsEntity)); + when(analyzer.getOperationType()).thenReturn("ORPHAN_FILES_DELETION"); + when(operationsRepo.find("ORPHAN_FILES_DELETION", null, null, null, null)) + .thenReturn(List.of(existingEntity)); + when(historyRepo.find("ORPHAN_FILES_DELETION", null, null, null, Pageable.unpaged())) + .thenReturn(Collections.emptyList()); + when(analyzer.isEnabled(expectedTable)).thenReturn(true); + + TableOperationRecord existingRecord = new TableOperationRecord(); + existingRecord.setId("existing-op-id"); + existingRecord.setStatus("PENDING"); + existingRecord.setTableUuid("uuid-1"); + existingRecord.setOperationType("ORPHAN_FILES_DELETION"); + existingRecord.setCreatedAt(existingEntity.getCreatedAt()); + when(analyzer.shouldSchedule(expectedTable, Optional.of(existingRecord), Optional.empty())) + .thenReturn(false); + + runner.analyze(); + + verify(operationsRepo, never()).save(any()); + } + + @Test + void analyze_skipsTable_whenNotEnabled() { + TableStatsRow statsEntity = new TableStatsRow(); + statsEntity.setTableUuid("uuid-1"); + + TableSummary expectedTable = TableSummary.builder().tableUuid("uuid-1").build(); + + when(statsRepo.find(null, null, null)).thenReturn(List.of(statsEntity)); + when(analyzer.getOperationType()).thenReturn("ORPHAN_FILES_DELETION"); + when(operationsRepo.find("ORPHAN_FILES_DELETION", null, null, null, null)) + .thenReturn(Collections.emptyList()); + when(historyRepo.find("ORPHAN_FILES_DELETION", null, null, null, Pageable.unpaged())) + .thenReturn(Collections.emptyList()); + when(analyzer.isEnabled(expectedTable)).thenReturn(false); + + runner.analyze(); + + verify(operationsRepo, never()).save(any()); + } + + @Test + void analyze_skipsTable_whenShouldScheduleReturnsFalse() { + TableStatsRow statsEntity = new TableStatsRow(); + statsEntity.setTableUuid("uuid-1"); + + TableSummary expectedTable = TableSummary.builder().tableUuid("uuid-1").build(); + + TableOperationRow scheduled = new TableOperationRow(); + scheduled.setId("op-id"); + scheduled.setStatus("SCHEDULED"); + scheduled.setTableUuid("uuid-1"); + scheduled.setOperationType("ORPHAN_FILES_DELETION"); + scheduled.setCreatedAt(Instant.now()); + + when(statsRepo.find(null, null, null)).thenReturn(List.of(statsEntity)); + when(analyzer.getOperationType()).thenReturn("ORPHAN_FILES_DELETION"); + when(operationsRepo.find("ORPHAN_FILES_DELETION", null, null, null, null)) + .thenReturn(List.of(scheduled)); + when(historyRepo.find("ORPHAN_FILES_DELETION", null, null, null, Pageable.unpaged())) + .thenReturn(Collections.emptyList()); + when(analyzer.isEnabled(expectedTable)).thenReturn(true); + + TableOperationRecord scheduledRecord = new TableOperationRecord(); + scheduledRecord.setId("op-id"); + scheduledRecord.setStatus("SCHEDULED"); + scheduledRecord.setTableUuid("uuid-1"); + scheduledRecord.setOperationType("ORPHAN_FILES_DELETION"); + scheduledRecord.setCreatedAt(scheduled.getCreatedAt()); + when(analyzer.shouldSchedule(expectedTable, Optional.of(scheduledRecord), Optional.empty())) + .thenReturn(false); + + runner.analyze(); + + verify(operationsRepo, never()).save(any()); + } + + @Test + void analyze_skipsTable_whenTableUuidIsNull() { + TableStatsRow statsEntity = new TableStatsRow(); + statsEntity.setTableUuid(null); + + when(statsRepo.find(null, null, null)).thenReturn(List.of(statsEntity)); + when(analyzer.getOperationType()).thenReturn("ORPHAN_FILES_DELETION"); + when(operationsRepo.find("ORPHAN_FILES_DELETION", null, null, null, null)) + .thenReturn(Collections.emptyList()); + when(historyRepo.find(anyString(), any(), any(), any(), any())) + .thenReturn(Collections.emptyList()); + + runner.analyze(); + + verify(operationsRepo, never()).save(any()); + } + + @Test + void analyze_skipsTable_whenCircuitBreakerTrips() { + TableStatsRow statsEntity = new TableStatsRow(); + statsEntity.setTableUuid("uuid-1"); + statsEntity.setDatabaseId("db1"); + statsEntity.setTableName("tbl1"); + + TableSummary expectedTable = + TableSummary.builder().tableUuid("uuid-1").databaseId("db1").tableId("tbl1").build(); + + List failures = + IntStream.range(0, 3) + .mapToObj( + i -> + TableOperationHistoryRow.builder() + .id("fail-" + i) + .tableUuid("uuid-1") + .operationType("ORPHAN_FILES_DELETION") + .submittedAt(Instant.now().minusSeconds(i * 60)) + .status("FAILED") + .build()) + .collect(Collectors.toList()); + + when(statsRepo.find(null, null, null)).thenReturn(List.of(statsEntity)); + when(analyzer.getOperationType()).thenReturn("ORPHAN_FILES_DELETION"); + when(analyzer.getCircuitBreakerThreshold()).thenReturn(3); + when(operationsRepo.find("ORPHAN_FILES_DELETION", null, null, null, null)) + .thenReturn(Collections.emptyList()); + when(historyRepo.find("ORPHAN_FILES_DELETION", null, null, null, Pageable.unpaged())) + .thenReturn(failures); + when(analyzer.isEnabled(expectedTable)).thenReturn(true); + when(analyzer.shouldSchedule(expectedTable, Optional.empty(), Optional.of(failures.get(0)))) + .thenReturn(true); + + runner.analyze(); + + verify(operationsRepo, never()).save(any()); + } + + @Test + void analyze_doesNotTrip_whenFewerFailuresThanThreshold() { + TableStatsRow statsEntity = new TableStatsRow(); + statsEntity.setTableUuid("uuid-1"); + statsEntity.setDatabaseId("db1"); + statsEntity.setTableName("tbl1"); + + TableSummary expectedTable = + TableSummary.builder().tableUuid("uuid-1").databaseId("db1").tableId("tbl1").build(); + + List failures = + IntStream.range(0, 3) + .mapToObj( + i -> + TableOperationHistoryRow.builder() + .id("fail-" + i) + .tableUuid("uuid-1") + .operationType("ORPHAN_FILES_DELETION") + .submittedAt(Instant.now().minusSeconds(i * 60)) + .status("FAILED") + .build()) + .collect(Collectors.toList()); + + when(statsRepo.find(null, null, null)).thenReturn(List.of(statsEntity)); + when(analyzer.getOperationType()).thenReturn("ORPHAN_FILES_DELETION"); + when(analyzer.getCircuitBreakerThreshold()).thenReturn(5); + when(operationsRepo.find("ORPHAN_FILES_DELETION", null, null, null, null)) + .thenReturn(Collections.emptyList()); + when(historyRepo.find("ORPHAN_FILES_DELETION", null, null, null, Pageable.unpaged())) + .thenReturn(failures); + when(analyzer.isEnabled(expectedTable)).thenReturn(true); + when(analyzer.shouldSchedule(expectedTable, Optional.empty(), Optional.of(failures.get(0)))) + .thenReturn(true); + + runner.analyze(); + + verify(operationsRepo).save(any()); + } +} diff --git a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/OrphanFilesDeletionAnalyzerTest.java b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/OrphanFilesDeletionAnalyzerTest.java new file mode 100644 index 000000000..e2ea5ccdd --- /dev/null +++ b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/OrphanFilesDeletionAnalyzerTest.java @@ -0,0 +1,242 @@ +package com.linkedin.openhouse.analyzer; + +import static org.assertj.core.api.Assertions.assertThat; + +import com.linkedin.openhouse.analyzer.model.TableOperationRecord; +import com.linkedin.openhouse.analyzer.model.TableSummary; +import com.linkedin.openhouse.optimizer.entity.TableOperationHistoryRow; +import java.time.Duration; +import java.time.Instant; +import java.util.Collections; +import java.util.Map; +import java.util.Optional; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +class OrphanFilesDeletionAnalyzerTest { + + private static final Duration SUCCESS_INTERVAL = Duration.ofHours(24); + private static final Duration FAILURE_INTERVAL = Duration.ofHours(1); + private static final Duration SCHEDULED_TIMEOUT = Duration.ofHours(6); + + private OrphanFilesDeletionAnalyzer analyzer; + + @BeforeEach + void setUp() { + analyzer = + new OrphanFilesDeletionAnalyzer( + new CadencePolicy(SUCCESS_INTERVAL, FAILURE_INTERVAL, SCHEDULED_TIMEOUT)); + } + + // --- isEnabled --- + + @Test + void isEnabled_returnsTrue_whenPropertySet() { + assertThat(analyzer.isEnabled(tableWithProperty("true"))).isTrue(); + } + + @Test + void isEnabled_returnsFalse_whenPropertyAbsent() { + assertThat(analyzer.isEnabled(tableWithProperty(null))).isFalse(); + } + + @Test + void isEnabled_returnsFalse_whenPropertyFalse() { + assertThat(analyzer.isEnabled(tableWithProperty("false"))).isFalse(); + } + + @Test + void isEnabled_returnsFalse_whenTablePropertiesEmpty() { + TableSummary table = TableSummary.builder().tableUuid("uuid").build(); + assertThat(analyzer.isEnabled(table)).isFalse(); + } + + // --- shouldSchedule: no existing op --- + + @Test + void shouldSchedule_noOp_noHistory_returnsTrue() { + assertThat( + analyzer.shouldSchedule(tableWithProperty("true"), Optional.empty(), Optional.empty())) + .isTrue(); + } + + @Test + void shouldSchedule_noOp_successHistoryAfterCooldown_returnsTrue() { + Instant longAgo = Instant.now().minus(SUCCESS_INTERVAL).minusSeconds(60); + assertThat( + analyzer.shouldSchedule( + tableWithProperty("true"), + Optional.empty(), + Optional.of(historyWithStatus("SUCCESS", longAgo)))) + .isTrue(); + } + + @Test + void shouldSchedule_noOp_successHistoryBeforeCooldown_returnsFalse() { + Instant recent = Instant.now().minus(SUCCESS_INTERVAL).plusSeconds(60); + assertThat( + analyzer.shouldSchedule( + tableWithProperty("true"), + Optional.empty(), + Optional.of(historyWithStatus("SUCCESS", recent)))) + .isFalse(); + } + + @Test + void shouldSchedule_noOp_failedHistoryAfterRetry_returnsTrue() { + Instant longAgo = Instant.now().minus(FAILURE_INTERVAL).minusSeconds(60); + assertThat( + analyzer.shouldSchedule( + tableWithProperty("true"), + Optional.empty(), + Optional.of(historyWithStatus("FAILED", longAgo)))) + .isTrue(); + } + + @Test + void shouldSchedule_noOp_failedHistoryBeforeRetry_returnsFalse() { + Instant recent = Instant.now().minus(FAILURE_INTERVAL).plusSeconds(60); + assertThat( + analyzer.shouldSchedule( + tableWithProperty("true"), + Optional.empty(), + Optional.of(historyWithStatus("FAILED", recent)))) + .isFalse(); + } + + // --- shouldSchedule: PENDING / SCHEDULING --- + + @Test + void shouldSchedule_pending_returnsFalse() { + assertThat( + analyzer.shouldSchedule( + tableWithProperty("true"), + Optional.of(opWithStatus("PENDING", null)), + Optional.empty())) + .isFalse(); + } + + @Test + void shouldSchedule_scheduling_returnsFalse() { + assertThat( + analyzer.shouldSchedule( + tableWithProperty("true"), + Optional.of(opWithStatus("SCHEDULING", null)), + Optional.empty())) + .isFalse(); + } + + // --- shouldSchedule: SCHEDULED + history --- + + @Test + void shouldSchedule_scheduledNoHistory_withinTimeout_returnsFalse() { + Instant recent = Instant.now().minus(SCHEDULED_TIMEOUT).plusSeconds(60); + assertThat( + analyzer.shouldSchedule( + tableWithProperty("true"), + Optional.of(opWithStatus("SCHEDULED", recent)), + Optional.empty())) + .isFalse(); + } + + @Test + void shouldSchedule_scheduledNoHistory_pastTimeout_returnsTrue() { + Instant longAgo = Instant.now().minus(SCHEDULED_TIMEOUT).minusSeconds(60); + assertThat( + analyzer.shouldSchedule( + tableWithProperty("true"), + Optional.of(opWithStatus("SCHEDULED", longAgo)), + Optional.empty())) + .isTrue(); + } + + @Test + void shouldSchedule_scheduledWithNullScheduledAt_noHistory_returnsTrue() { + assertThat( + analyzer.shouldSchedule( + tableWithProperty("true"), + Optional.of(opWithStatus("SCHEDULED", null)), + Optional.empty())) + .isTrue(); + } + + @Test + void shouldSchedule_scheduledWithSuccessHistory_afterCooldown_returnsTrue() { + Instant scheduledAt = Instant.now().minusSeconds(3600); + Instant historyAt = Instant.now().minus(SUCCESS_INTERVAL).minusSeconds(60); + assertThat( + analyzer.shouldSchedule( + tableWithProperty("true"), + Optional.of(opWithStatus("SCHEDULED", scheduledAt)), + Optional.of(historyWithStatus("SUCCESS", historyAt)))) + .isTrue(); + } + + @Test + void shouldSchedule_scheduledWithSuccessHistory_beforeCooldown_returnsFalse() { + Instant scheduledAt = Instant.now().minusSeconds(3600); + Instant historyAt = Instant.now().minus(SUCCESS_INTERVAL).plusSeconds(60); + assertThat( + analyzer.shouldSchedule( + tableWithProperty("true"), + Optional.of(opWithStatus("SCHEDULED", scheduledAt)), + Optional.of(historyWithStatus("SUCCESS", historyAt)))) + .isFalse(); + } + + @Test + void shouldSchedule_scheduledWithFailedHistory_afterRetry_returnsTrue() { + Instant scheduledAt = Instant.now().minusSeconds(3600); + Instant historyAt = Instant.now().minus(FAILURE_INTERVAL).minusSeconds(60); + assertThat( + analyzer.shouldSchedule( + tableWithProperty("true"), + Optional.of(opWithStatus("SCHEDULED", scheduledAt)), + Optional.of(historyWithStatus("FAILED", historyAt)))) + .isTrue(); + } + + @Test + void shouldSchedule_scheduledWithFailedHistory_beforeRetry_returnsFalse() { + Instant scheduledAt = Instant.now().minusSeconds(3600); + Instant historyAt = Instant.now().minus(FAILURE_INTERVAL).plusSeconds(60); + assertThat( + analyzer.shouldSchedule( + tableWithProperty("true"), + Optional.of(opWithStatus("SCHEDULED", scheduledAt)), + Optional.of(historyWithStatus("FAILED", historyAt)))) + .isFalse(); + } + + // --- helpers --- + + private TableSummary tableWithProperty(String value) { + Map props = + value == null + ? Collections.emptyMap() + : Map.of(OrphanFilesDeletionAnalyzer.OFD_ENABLED_PROPERTY, value); + return TableSummary.builder() + .tableUuid("test-uuid") + .databaseId("db1") + .tableId("tbl1") + .tableProperties(props) + .build(); + } + + private TableOperationRecord opWithStatus(String status, Instant scheduledAt) { + TableOperationRecord op = new TableOperationRecord(); + op.setStatus(status); + op.setScheduledAt(scheduledAt); + return op; + } + + private TableOperationHistoryRow historyWithStatus(String status, Instant submittedAt) { + return TableOperationHistoryRow.builder() + .id("hist-id") + .tableUuid("test-uuid") + .operationType("ORPHAN_FILES_DELETION") + .submittedAt(submittedAt) + .status(status) + .build(); + } +} diff --git a/settings.gradle b/settings.gradle index 0d64dad53..52873b677 100644 --- a/settings.gradle +++ b/settings.gradle @@ -51,6 +51,7 @@ include ':services:housetables' include ':services:jobs' include ':services:optimizer' include ':apps:optimizer' +include ':apps:optimizer-analyzer' include ':services:tables' include ':tables-test-fixtures:tables-test-fixtures-iceberg-1.2' include ':tables-test-fixtures:tables-test-fixtures-iceberg-1.5' From 63b0768b249b29a90470687fc1296d0ef833821b Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Tue, 7 Apr 2026 11:25:40 -0700 Subject: [PATCH 012/104] fix: address PR review feedback on optimizer-3 analyzer - Rename TableSummary to Table, TableOperationRecord to TableOperation - Add Table.from(TableStatsRow) and TableOperation.from(TableOperationRow) - Add TableOperation.pending(Table, type) factory and toRow() for JPA - Move circuit breaker check into OperationAnalyzer as overridable default - Parameterize analyze() with optional filters (optype, db, table, uuid) - Inline loadOpsMap, loadHistoryMap, remove standalone converter methods - Expand CadencePolicy field javadoc with plain-english examples - Add TODOs: per-db iteration, benchmarking, querybuilder, CB reset Co-Authored-By: Claude Opus 4.6 --- .../openhouse/analyzer/AnalyzerRunner.java | 218 +++++++----------- .../openhouse/analyzer/CadencePolicy.java | 24 +- .../openhouse/analyzer/OperationAnalyzer.java | 33 ++- .../analyzer/OrphanFilesDeletionAnalyzer.java | 10 +- .../openhouse/analyzer/model/Table.java | 42 ++++ .../analyzer/model/TableOperation.java | 91 ++++++++ .../analyzer/model/TableOperationRecord.java | 23 -- .../analyzer/model/TableSummary.java | 26 --- .../analyzer/AnalyzerRunnerTest.java | 45 ++-- .../OrphanFilesDeletionAnalyzerTest.java | 14 +- 10 files changed, 295 insertions(+), 231 deletions(-) create mode 100644 apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/Table.java create mode 100644 apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/TableOperation.java delete mode 100644 apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/TableOperationRecord.java delete mode 100644 apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/TableSummary.java diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java index 5ad568d49..5ad653e97 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java @@ -1,20 +1,16 @@ package com.linkedin.openhouse.analyzer; -import com.linkedin.openhouse.analyzer.model.TableOperationRecord; -import com.linkedin.openhouse.analyzer.model.TableSummary; +import com.linkedin.openhouse.analyzer.model.Table; +import com.linkedin.openhouse.analyzer.model.TableOperation; import com.linkedin.openhouse.optimizer.entity.TableOperationHistoryRow; import com.linkedin.openhouse.optimizer.entity.TableOperationRow; -import com.linkedin.openhouse.optimizer.entity.TableStatsRow; import com.linkedin.openhouse.optimizer.repository.TableOperationHistoryRepository; import com.linkedin.openhouse.optimizer.repository.TableOperationsRepository; import com.linkedin.openhouse.optimizer.repository.TableStatsRepository; -import java.time.Instant; import java.util.Collections; -import java.util.Comparator; import java.util.List; import java.util.Map; import java.util.Optional; -import java.util.UUID; import java.util.stream.Collectors; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; @@ -22,12 +18,19 @@ import org.springframework.stereotype.Component; /** - * Core analysis loop. Loads all {@code table_stats} rows and evaluates each table against every + * Core analysis loop. Loads {@code table_stats} rows and evaluates each table against every * registered {@link OperationAnalyzer} in a single pass. * *

The two sides of the join — current operations and circuit-breaker history — are loaded into * memory once per run before the table loop. Both are naturally bounded (only tables with active or * recently failed operations have rows), so holding them in maps is safe at any table scale. + * + *

// TODO: Iterate per-database instead of loading all tables at once. This scopes memory usage + * and allows incremental progress. When we go per-db we may still see 10k tables per iteration, but + * that should be fine. + * + *

// TODO: Add benchmarking and scale tests. Measure memory footprint at 10k tables per + * iteration to validate the in-memory join approach. */ @Slf4j @Component @@ -39,147 +42,90 @@ public class AnalyzerRunner { private final TableOperationsRepository operationsRepo; private final TableOperationHistoryRepository historyRepo; - /** Run the full analysis loop once. */ + /** Run the full analysis loop once with no filters. */ public void analyze() { + analyze(null, null, null, null); + } + + /** + * Run the analysis loop, optionally scoped to a specific operation type, database, table name, or + * table UUID. Pass {@code null} for any parameter to skip that filter. + */ + public void analyze( + String operationType, String databaseName, String tableName, String tableUuid) { + + List activeAnalyzers = + operationType == null + ? analyzers + : analyzers.stream() + .filter(a -> a.getOperationType().equals(operationType)) + .collect(Collectors.toList()); + // Pre-load the small sides of the joins — one query per analyzer type. - Map> opsByType = - analyzers.stream() + // TODO: Move to a query builder (Criteria API or jOOQ) as filter count grows. + Map> opsByType = + activeAnalyzers.stream() .collect( Collectors.toMap( - OperationAnalyzer::getOperationType, a -> loadOpsMap(a.getOperationType()))); + OperationAnalyzer::getOperationType, + a -> + operationsRepo + .find(a.getOperationType(), null, tableUuid, databaseName, tableName) + .stream() + .filter(e -> e.getTableUuid() != null) + .collect( + Collectors.toMap( + TableOperationRow::getTableUuid, + TableOperation::from, + TableOperation::mostRecent)))); Map>> historyByType = - analyzers.stream() + activeAnalyzers.stream() .collect( Collectors.toMap( OperationAnalyzer::getOperationType, - a -> loadHistoryMap(a.getOperationType()))); - - List tableList = - statsRepo.find(null, null, null).stream() + a -> + historyRepo.find(a.getOperationType(), null, null, null, Pageable.unpaged()) + .stream() + .collect( + Collectors.groupingBy(TableOperationHistoryRow::getTableUuid)))); + + List tables = + statsRepo.find(databaseName, tableName, tableUuid).stream() .filter(row -> row.getTableUuid() != null) + .map(Table::from) .collect(Collectors.toList()); - log.info("Found {} tables in optimizer table_stats", tableList.size()); - - tableList.forEach( - row -> { - TableSummary table = toSummary(row); - analyzers.forEach( - analyzer -> { - String type = analyzer.getOperationType(); - Optional currentOp = - Optional.ofNullable(opsByType.get(type).get(row.getTableUuid())); - List history = - historyByType - .get(type) - .getOrDefault(row.getTableUuid(), Collections.emptyList()); - - Optional latestHistory = history.stream().findFirst(); - - if (analyzer.isEnabled(table) - && analyzer.shouldSchedule(table, currentOp, latestHistory) - && !isCircuitBroken(analyzer, row.getTableUuid(), history)) { - operationsRepo.save(buildOperation(row, type)); - log.info( - "Created PENDING {} operation for table {}.{}", - type, - row.getDatabaseId(), - row.getTableName()); - } - }); - }); + log.info("Found {} tables in optimizer table_stats", tables.size()); + + tables.forEach( + table -> + activeAnalyzers.forEach( + analyzer -> { + if (!analyzer.isEnabled(table)) { + return; + } + + Optional currentOp = + Optional.ofNullable( + opsByType.get(analyzer.getOperationType()).get(table.getTableUuid())); + List history = + historyByType + .get(analyzer.getOperationType()) + .getOrDefault(table.getTableUuid(), Collections.emptyList()); + Optional latestHistory = history.stream().findFirst(); + + if (analyzer.shouldSchedule(table, currentOp, latestHistory) + && !analyzer.isCircuitBroken(table.getTableUuid(), history)) { + TableOperation op = TableOperation.pending(table, analyzer.getOperationType()); + operationsRepo.save(op.toRow()); + log.info( + "Created PENDING {} operation for table {}.{}", + analyzer.getOperationType(), + table.getDatabaseId(), + table.getTableId()); + } + })); log.info("Analysis complete"); } - - /** - * Loads the most recent operation record per table for the given type. Deduplicates by keeping - * the newer row when a table has more than one active record. - */ - private Map loadOpsMap(String operationType) { - Map map = - operationsRepo.find(operationType, null, null, null, null).stream() - .filter(e -> e.getTableUuid() != null) - .collect( - Collectors.toMap( - TableOperationRow::getTableUuid, - AnalyzerRunner::toRecord, - (a, b) -> mostRecent(a, b))); - log.info("Analyzer {} found {} tables with operation history", operationType, map.size()); - return map; - } - - /** - * Loads all history rows for the given type and groups them by {@code tableUuid}, newest first. - * Called once per analyzer type to eliminate per-table N+1 queries in the circuit breaker check. - */ - private Map> loadHistoryMap(String operationType) { - return historyRepo.find(operationType, null, null, null, Pageable.unpaged()).stream() - .collect(Collectors.groupingBy(TableOperationHistoryRow::getTableUuid)); - } - - private TableOperationRow buildOperation(TableStatsRow row, String operationType) { - return TableOperationRow.builder() - .id(UUID.randomUUID().toString()) - .tableUuid(row.getTableUuid()) - .databaseName(row.getDatabaseId()) - .tableName(row.getTableName()) - .operationType(operationType) - .status("PENDING") - .createdAt(Instant.now()) - .version(0L) - .build(); - } - - private TableSummary toSummary(TableStatsRow e) { - return TableSummary.builder() - .tableUuid(e.getTableUuid()) - .databaseId(e.getDatabaseId()) - .tableId(e.getTableName()) - .tableProperties( - e.getTableProperties() != null ? e.getTableProperties() : Collections.emptyMap()) - .stats(e.getStats()) - .build(); - } - - /** - * Returns {@code true} if the circuit breaker has tripped. Uses the pre-loaded history list - * instead of querying the DB per table. - */ - private boolean isCircuitBroken( - OperationAnalyzer analyzer, String tableUuid, List history) { - int threshold = analyzer.getCircuitBreakerThreshold(); - if (threshold <= 0 || history.size() < threshold) { - return false; - } - boolean allFailed = - history.stream().limit(threshold).allMatch(r -> "FAILED".equals(r.getStatus())); - if (allFailed) { - log.warn( - "Circuit breaker tripped for table {} operation {}: last {} attempts all FAILED", - tableUuid, - analyzer.getOperationType(), - threshold); - } - return allFailed; - } - - private static TableOperationRecord mostRecent(TableOperationRecord a, TableOperationRecord b) { - Comparator byCreatedAt = - Comparator.comparing(r -> r.getCreatedAt() != null ? r.getCreatedAt() : Instant.EPOCH); - return byCreatedAt.compare(a, b) >= 0 ? a : b; - } - - private static TableOperationRecord toRecord(TableOperationRow e) { - TableOperationRecord r = new TableOperationRecord(); - r.setId(e.getId()); - r.setTableUuid(e.getTableUuid()); - r.setDatabaseName(e.getDatabaseName()); - r.setTableName(e.getTableName()); - r.setOperationType(e.getOperationType()); - r.setStatus(e.getStatus()); - r.setCreatedAt(e.getCreatedAt()); - r.setScheduledAt(e.getScheduledAt()); - return r; - } } diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadencePolicy.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadencePolicy.java index 36d9ff841..a66a7b072 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadencePolicy.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadencePolicy.java @@ -1,6 +1,6 @@ package com.linkedin.openhouse.analyzer; -import com.linkedin.openhouse.analyzer.model.TableOperationRecord; +import com.linkedin.openhouse.analyzer.model.TableOperation; import com.linkedin.openhouse.optimizer.entity.TableOperationHistoryRow; import java.time.Duration; import java.time.Instant; @@ -20,8 +20,26 @@ @RequiredArgsConstructor public class CadencePolicy { + /** + * How long to wait after a successful operation before re-evaluating the table. For example, if + * set to 24 hours and OFD succeeded at 10:00 AM Monday, the table won't be scheduled again until + * after 10:00 AM Tuesday. + */ private final Duration successRetryInterval; + + /** + * How long to wait after a failed operation before retrying. Shorter than the success interval to + * allow quick recovery. For example, if set to 1 hour and OFD failed at 2:00 PM, the table + * becomes eligible for retry at 3:00 PM. + */ private final Duration failureRetryInterval; + + /** + * Maximum time a row can stay in SCHEDULED status before the analyzer treats it as stale and + * overwrites it with a new PENDING row. Handles the case where a Spark job crashes without + * reporting back. For example, if set to 6 hours and a job was submitted at noon but never + * completed, the analyzer will re-schedule the table after 6:00 PM. + */ private final Duration scheduledTimeout; /** @@ -31,11 +49,11 @@ public class CadencePolicy { * @param latestHistory the most recent history entry for this (table, type), or empty */ public boolean shouldSchedule( - Optional currentOp, Optional latestHistory) { + Optional currentOp, Optional latestHistory) { if (currentOp.isEmpty()) { return decideFromHistory(latestHistory); } - TableOperationRecord op = currentOp.get(); + TableOperation op = currentOp.get(); switch (op.getStatus()) { case "PENDING": case "SCHEDULING": diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OperationAnalyzer.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OperationAnalyzer.java index 425fbdbfb..731c8127f 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OperationAnalyzer.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OperationAnalyzer.java @@ -1,8 +1,9 @@ package com.linkedin.openhouse.analyzer; -import com.linkedin.openhouse.analyzer.model.TableOperationRecord; -import com.linkedin.openhouse.analyzer.model.TableSummary; +import com.linkedin.openhouse.analyzer.model.Table; +import com.linkedin.openhouse.analyzer.model.TableOperation; import com.linkedin.openhouse.optimizer.entity.TableOperationHistoryRow; +import java.util.List; import java.util.Optional; /** @@ -18,7 +19,7 @@ public interface OperationAnalyzer { * Returns {@code true} if this operation is opted-in for the given table. Tables that return * {@code false} are skipped entirely — no upsert is issued. */ - boolean isEnabled(TableSummary table); + boolean isEnabled(Table table); /** * Returns {@code true} if a new or refreshed operation record should be upserted. @@ -28,8 +29,8 @@ public interface OperationAnalyzer { * @param latestHistory the most recent history entry for this (table, type), or empty */ boolean shouldSchedule( - TableSummary table, - Optional currentOp, + Table table, + Optional currentOp, Optional latestHistory); /** @@ -40,4 +41,26 @@ boolean shouldSchedule( default int getCircuitBreakerThreshold() { return 5; } + + /** + * Returns {@code true} if the circuit breaker has tripped for this table. The default + * implementation checks whether the last N history entries are all FAILED. Individual analyzers + * can override this to implement different strategies (e.g., time-based backoff). + * + *

// TODO: Add circuit breaker reset with exponential backoff so tables can recover + * automatically after a cooldown period instead of staying tripped permanently. + * + *

// TODO: Add a communication path to surface tripped circuit breakers to users (e.g., + * metrics, alerts, or a dashboard query). + * + * @param tableUuid the table whose history to check + * @param history recent history entries for this (table, type), newest first + */ + default boolean isCircuitBroken(String tableUuid, List history) { + int threshold = getCircuitBreakerThreshold(); + if (threshold <= 0 || history.size() < threshold) { + return false; + } + return history.stream().limit(threshold).allMatch(r -> "FAILED".equals(r.getStatus())); + } } diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OrphanFilesDeletionAnalyzer.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OrphanFilesDeletionAnalyzer.java index 016057aa4..c348b0265 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OrphanFilesDeletionAnalyzer.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OrphanFilesDeletionAnalyzer.java @@ -1,7 +1,7 @@ package com.linkedin.openhouse.analyzer; -import com.linkedin.openhouse.analyzer.model.TableOperationRecord; -import com.linkedin.openhouse.analyzer.model.TableSummary; +import com.linkedin.openhouse.analyzer.model.Table; +import com.linkedin.openhouse.analyzer.model.TableOperation; import com.linkedin.openhouse.optimizer.entity.TableOperationHistoryRow; import java.time.Duration; import java.util.Optional; @@ -41,14 +41,14 @@ public String getOperationType() { } @Override - public boolean isEnabled(TableSummary table) { + public boolean isEnabled(Table table) { return "true".equals(table.getTableProperties().get(OFD_ENABLED_PROPERTY)); } @Override public boolean shouldSchedule( - TableSummary table, - Optional currentOp, + Table table, + Optional currentOp, Optional latestHistory) { return cadencePolicy.shouldSchedule(currentOp, latestHistory); } diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/Table.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/Table.java new file mode 100644 index 000000000..d170f29dd --- /dev/null +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/Table.java @@ -0,0 +1,42 @@ +package com.linkedin.openhouse.analyzer.model; + +import com.linkedin.openhouse.optimizer.entity.TableStatsRow; +import com.linkedin.openhouse.optimizer.model.TableStats; +import java.util.Collections; +import java.util.Map; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** + * An OpenHouse table enriched with stats and properties, built by combining data sources. This is + * the input to the analysis pipeline: analyzers evaluate a {@code Table} and decide whether to + * produce a {@link TableOperation}. + */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class Table { + + private String tableUuid; + private String databaseId; + private String tableId; + + @Builder.Default private Map tableProperties = Collections.emptyMap(); + + private TableStats stats; + + /** Build a {@code Table} from a {@code table_stats} row. */ + public static Table from(TableStatsRow row) { + return Table.builder() + .tableUuid(row.getTableUuid()) + .databaseId(row.getDatabaseId()) + .tableId(row.getTableName()) + .tableProperties( + row.getTableProperties() != null ? row.getTableProperties() : Collections.emptyMap()) + .stats(row.getStats()) + .build(); + } +} diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/TableOperation.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/TableOperation.java new file mode 100644 index 000000000..5a81a3848 --- /dev/null +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/TableOperation.java @@ -0,0 +1,91 @@ +package com.linkedin.openhouse.analyzer.model; + +import com.linkedin.openhouse.optimizer.entity.TableOperationRow; +import java.time.Instant; +import java.util.Comparator; +import java.util.UUID; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** + * An operation the analyzer has decided to schedule for a table. Built either from an existing + * {@link TableOperationRow} (when loading current state) or from a {@link Table} (when creating a + * new PENDING operation). Converts back to a JPA row via {@link #toRow()}. + */ +@Data +@NoArgsConstructor +public class TableOperation { + + /** Unique operation ID (UUID). */ + private String id; + + /** The table this operation targets. */ + private String tableUuid; + + /** Database name, denormalized for display. */ + private String databaseName; + + /** Table name, denormalized for display. */ + private String tableName; + + /** Operation type (e.g., {@code "ORPHAN_FILES_DELETION"}). */ + private String operationType; + + /** Current lifecycle status: PENDING, SCHEDULING, SCHEDULED. */ + private String status; + + /** When this operation record was created. */ + private Instant createdAt; + + /** When the scheduler last submitted a job for this operation. */ + private Instant scheduledAt; + + /** Build a {@code TableOperation} from an existing JPA row. */ + public static TableOperation from(TableOperationRow row) { + TableOperation op = new TableOperation(); + op.id = row.getId(); + op.tableUuid = row.getTableUuid(); + op.databaseName = row.getDatabaseName(); + op.tableName = row.getTableName(); + op.operationType = row.getOperationType(); + op.status = row.getStatus(); + op.createdAt = row.getCreatedAt(); + op.scheduledAt = row.getScheduledAt(); + return op; + } + + /** Create a new PENDING operation for the given table and operation type. */ + public static TableOperation pending(Table table, String operationType) { + TableOperation op = new TableOperation(); + op.id = UUID.randomUUID().toString(); + op.tableUuid = table.getTableUuid(); + op.databaseName = table.getDatabaseId(); + op.tableName = table.getTableId(); + op.operationType = operationType; + op.status = "PENDING"; + op.createdAt = Instant.now(); + return op; + } + + /** Convert to a JPA entity for persistence. */ + public TableOperationRow toRow() { + return TableOperationRow.builder() + .id(id) + .tableUuid(tableUuid) + .databaseName(databaseName) + .tableName(tableName) + .operationType(operationType) + .status(status) + .createdAt(createdAt) + .scheduledAt(scheduledAt) + .version(0L) + .build(); + } + + /** Return the more recently created of two operations. */ + public static TableOperation mostRecent(TableOperation a, TableOperation b) { + Comparator byCreatedAt = + Comparator.comparing(r -> r.getCreatedAt() != null ? r.getCreatedAt() : Instant.EPOCH); + return byCreatedAt.compare(a, b) >= 0 ? a : b; + } +} diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/TableOperationRecord.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/TableOperationRecord.java deleted file mode 100644 index 51bc4d803..000000000 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/TableOperationRecord.java +++ /dev/null @@ -1,23 +0,0 @@ -package com.linkedin.openhouse.analyzer.model; - -import java.time.Instant; -import lombok.Data; -import lombok.NoArgsConstructor; - -/** - * Lightweight representation of an active table operation record. Mirrors the fields in {@link - * com.linkedin.openhouse.optimizer.entity.TableOperationRow} that the Analyzer needs. - */ -@Data -@NoArgsConstructor -public class TableOperationRecord { - - private String id; - private String tableUuid; - private String databaseName; - private String tableName; - private String operationType; - private String status; - private Instant createdAt; - private Instant scheduledAt; -} diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/TableSummary.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/TableSummary.java deleted file mode 100644 index fbe166fff..000000000 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/TableSummary.java +++ /dev/null @@ -1,26 +0,0 @@ -package com.linkedin.openhouse.analyzer.model; - -import com.linkedin.openhouse.optimizer.model.TableStats; -import java.util.Collections; -import java.util.Map; -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.Data; -import lombok.NoArgsConstructor; - -/** Internal representation of a table, decoupled from any external API response model. */ -@Data -@Builder -@NoArgsConstructor -@AllArgsConstructor -public class TableSummary { - - private String tableUuid; - private String databaseId; - private String tableId; - - @Builder.Default private Map tableProperties = Collections.emptyMap(); - - /** Commit stats from the optimizer {@code table_stats} table. Null if no stats recorded yet. */ - private TableStats stats; -} diff --git a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java index 69de877a2..29fd20e50 100644 --- a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java +++ b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java @@ -7,8 +7,8 @@ import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; -import com.linkedin.openhouse.analyzer.model.TableOperationRecord; -import com.linkedin.openhouse.analyzer.model.TableSummary; +import com.linkedin.openhouse.analyzer.model.Table; +import com.linkedin.openhouse.analyzer.model.TableOperation; import com.linkedin.openhouse.optimizer.entity.TableOperationHistoryRow; import com.linkedin.openhouse.optimizer.entity.TableOperationRow; import com.linkedin.openhouse.optimizer.entity.TableStatsRow; @@ -51,11 +51,12 @@ void analyze_insertsNewRow_forEligibleTableWithNoExistingOp() { statsEntity.setDatabaseId("db1"); statsEntity.setTableName("tbl1"); - TableSummary expectedTable = - TableSummary.builder().tableUuid("uuid-1").databaseId("db1").tableId("tbl1").build(); + Table expectedTable = + Table.builder().tableUuid("uuid-1").databaseId("db1").tableId("tbl1").build(); when(statsRepo.find(null, null, null)).thenReturn(List.of(statsEntity)); when(analyzer.getOperationType()).thenReturn("ORPHAN_FILES_DELETION"); + when(analyzer.isCircuitBroken(anyString(), any())).thenCallRealMethod(); when(analyzer.getCircuitBreakerThreshold()).thenReturn(5); when(operationsRepo.find("ORPHAN_FILES_DELETION", null, null, null, null)) .thenReturn(Collections.emptyList()); @@ -85,8 +86,8 @@ void analyze_noOp_whenCadencePolicyReturnsFalseForPending() { statsEntity.setDatabaseId("db1"); statsEntity.setTableName("tbl1"); - TableSummary expectedTable = - TableSummary.builder().tableUuid("uuid-1").databaseId("db1").tableId("tbl1").build(); + Table expectedTable = + Table.builder().tableUuid("uuid-1").databaseId("db1").tableId("tbl1").build(); TableOperationRow existingEntity = new TableOperationRow(); existingEntity.setId("existing-op-id"); @@ -103,13 +104,8 @@ void analyze_noOp_whenCadencePolicyReturnsFalseForPending() { .thenReturn(Collections.emptyList()); when(analyzer.isEnabled(expectedTable)).thenReturn(true); - TableOperationRecord existingRecord = new TableOperationRecord(); - existingRecord.setId("existing-op-id"); - existingRecord.setStatus("PENDING"); - existingRecord.setTableUuid("uuid-1"); - existingRecord.setOperationType("ORPHAN_FILES_DELETION"); - existingRecord.setCreatedAt(existingEntity.getCreatedAt()); - when(analyzer.shouldSchedule(expectedTable, Optional.of(existingRecord), Optional.empty())) + TableOperation existingOp = TableOperation.from(existingEntity); + when(analyzer.shouldSchedule(expectedTable, Optional.of(existingOp), Optional.empty())) .thenReturn(false); runner.analyze(); @@ -122,7 +118,7 @@ void analyze_skipsTable_whenNotEnabled() { TableStatsRow statsEntity = new TableStatsRow(); statsEntity.setTableUuid("uuid-1"); - TableSummary expectedTable = TableSummary.builder().tableUuid("uuid-1").build(); + Table expectedTable = Table.builder().tableUuid("uuid-1").build(); when(statsRepo.find(null, null, null)).thenReturn(List.of(statsEntity)); when(analyzer.getOperationType()).thenReturn("ORPHAN_FILES_DELETION"); @@ -142,7 +138,7 @@ void analyze_skipsTable_whenShouldScheduleReturnsFalse() { TableStatsRow statsEntity = new TableStatsRow(); statsEntity.setTableUuid("uuid-1"); - TableSummary expectedTable = TableSummary.builder().tableUuid("uuid-1").build(); + Table expectedTable = Table.builder().tableUuid("uuid-1").build(); TableOperationRow scheduled = new TableOperationRow(); scheduled.setId("op-id"); @@ -159,13 +155,8 @@ void analyze_skipsTable_whenShouldScheduleReturnsFalse() { .thenReturn(Collections.emptyList()); when(analyzer.isEnabled(expectedTable)).thenReturn(true); - TableOperationRecord scheduledRecord = new TableOperationRecord(); - scheduledRecord.setId("op-id"); - scheduledRecord.setStatus("SCHEDULED"); - scheduledRecord.setTableUuid("uuid-1"); - scheduledRecord.setOperationType("ORPHAN_FILES_DELETION"); - scheduledRecord.setCreatedAt(scheduled.getCreatedAt()); - when(analyzer.shouldSchedule(expectedTable, Optional.of(scheduledRecord), Optional.empty())) + TableOperation scheduledOp = TableOperation.from(scheduled); + when(analyzer.shouldSchedule(expectedTable, Optional.of(scheduledOp), Optional.empty())) .thenReturn(false); runner.analyze(); @@ -197,8 +188,8 @@ void analyze_skipsTable_whenCircuitBreakerTrips() { statsEntity.setDatabaseId("db1"); statsEntity.setTableName("tbl1"); - TableSummary expectedTable = - TableSummary.builder().tableUuid("uuid-1").databaseId("db1").tableId("tbl1").build(); + Table expectedTable = + Table.builder().tableUuid("uuid-1").databaseId("db1").tableId("tbl1").build(); List failures = IntStream.range(0, 3) @@ -215,6 +206,7 @@ void analyze_skipsTable_whenCircuitBreakerTrips() { when(statsRepo.find(null, null, null)).thenReturn(List.of(statsEntity)); when(analyzer.getOperationType()).thenReturn("ORPHAN_FILES_DELETION"); + when(analyzer.isCircuitBroken(anyString(), any())).thenCallRealMethod(); when(analyzer.getCircuitBreakerThreshold()).thenReturn(3); when(operationsRepo.find("ORPHAN_FILES_DELETION", null, null, null, null)) .thenReturn(Collections.emptyList()); @@ -236,8 +228,8 @@ void analyze_doesNotTrip_whenFewerFailuresThanThreshold() { statsEntity.setDatabaseId("db1"); statsEntity.setTableName("tbl1"); - TableSummary expectedTable = - TableSummary.builder().tableUuid("uuid-1").databaseId("db1").tableId("tbl1").build(); + Table expectedTable = + Table.builder().tableUuid("uuid-1").databaseId("db1").tableId("tbl1").build(); List failures = IntStream.range(0, 3) @@ -254,6 +246,7 @@ void analyze_doesNotTrip_whenFewerFailuresThanThreshold() { when(statsRepo.find(null, null, null)).thenReturn(List.of(statsEntity)); when(analyzer.getOperationType()).thenReturn("ORPHAN_FILES_DELETION"); + when(analyzer.isCircuitBroken(anyString(), any())).thenCallRealMethod(); when(analyzer.getCircuitBreakerThreshold()).thenReturn(5); when(operationsRepo.find("ORPHAN_FILES_DELETION", null, null, null, null)) .thenReturn(Collections.emptyList()); diff --git a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/OrphanFilesDeletionAnalyzerTest.java b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/OrphanFilesDeletionAnalyzerTest.java index e2ea5ccdd..171846ff8 100644 --- a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/OrphanFilesDeletionAnalyzerTest.java +++ b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/OrphanFilesDeletionAnalyzerTest.java @@ -2,8 +2,8 @@ import static org.assertj.core.api.Assertions.assertThat; -import com.linkedin.openhouse.analyzer.model.TableOperationRecord; -import com.linkedin.openhouse.analyzer.model.TableSummary; +import com.linkedin.openhouse.analyzer.model.Table; +import com.linkedin.openhouse.analyzer.model.TableOperation; import com.linkedin.openhouse.optimizer.entity.TableOperationHistoryRow; import java.time.Duration; import java.time.Instant; @@ -47,7 +47,7 @@ void isEnabled_returnsFalse_whenPropertyFalse() { @Test void isEnabled_returnsFalse_whenTablePropertiesEmpty() { - TableSummary table = TableSummary.builder().tableUuid("uuid").build(); + Table table = Table.builder().tableUuid("uuid").build(); assertThat(analyzer.isEnabled(table)).isFalse(); } @@ -210,12 +210,12 @@ void shouldSchedule_scheduledWithFailedHistory_beforeRetry_returnsFalse() { // --- helpers --- - private TableSummary tableWithProperty(String value) { + private Table tableWithProperty(String value) { Map props = value == null ? Collections.emptyMap() : Map.of(OrphanFilesDeletionAnalyzer.OFD_ENABLED_PROPERTY, value); - return TableSummary.builder() + return Table.builder() .tableUuid("test-uuid") .databaseId("db1") .tableId("tbl1") @@ -223,8 +223,8 @@ private TableSummary tableWithProperty(String value) { .build(); } - private TableOperationRecord opWithStatus(String status, Instant scheduledAt) { - TableOperationRecord op = new TableOperationRecord(); + private TableOperation opWithStatus(String status, Instant scheduledAt) { + TableOperation op = new TableOperation(); op.setStatus(status); op.setScheduledAt(scheduledAt); return op; From f82d1b3ef3e0b1197487a68b851fa394ef9b9c7a Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Fri, 1 May 2026 10:14:18 -0700 Subject: [PATCH 013/104] fix(optimizer): address PR #527 review feedback - Widen-to-tighten: VARCHAR(255) -> VARCHAR(128) for database_name and table_name across all entities and the schema, aligning with prod conventions (can always be widened later, not tightened). - Rename databaseId -> databaseName in TableStatsRow, TableStatsHistoryRow, TableStatsDto, TableStatsHistoryDto, and UpsertTableStatsRequest for consistency with the operations entities and DTOs. - Drop the unused metrics field from TableOperationsRow, TableOperationsDto, and the schema. Add a TODO note in the schema that per-operation metric columns will be added as operations are onboarded. - Rename submittedAt -> completedAt in TableOperationsHistoryRow, TableOperationsHistoryDto, and the schema (column submitted_at -> completed_at, index idx_submitted_at -> idx_completed_at). The history row is written when the complete endpoint is called, so the timestamp captures completion; submission time is already on table_operations.scheduled_at. - Change TableStatsHistoryRow.id from BIGINT auto-increment to VARCHAR(36) UUID, set by the caller, matching the other id-bearing entities. - Add @JsonIgnoreProperties(ignoreUnknown = true) to CommitDelta for consistency with TableStats and SnapshotMetrics. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../api/model/TableOperationsDto.java | 3 --- .../api/model/TableOperationsHistoryDto.java | 2 +- .../optimizer/api/model/TableStats.java | 1 + .../optimizer/api/model/TableStatsDto.java | 2 +- .../api/model/TableStatsHistoryDto.java | 6 ++--- .../api/model/UpsertTableStatsRequest.java | 4 ++-- .../entity/TableOperationsHistoryRow.java | 10 ++++----- .../optimizer/entity/TableOperationsRow.java | 11 ++-------- .../entity/TableStatsHistoryRow.java | 13 +++++------ .../optimizer/entity/TableStatsRow.java | 6 ++--- .../main/resources/db/optimizer-schema.sql | 22 +++++++++---------- 11 files changed, 34 insertions(+), 46 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java index 9c33d8907..d41bd6906 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java @@ -39,7 +39,4 @@ public class TableOperationsDto { /** Job ID returned by the Jobs Service after successful submission. */ private String jobId; - - /** Reserved for future per-operation metadata; currently unused. */ - private String metrics; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java index efc9bebbb..2a901ad2b 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java @@ -24,7 +24,7 @@ public class TableOperationsHistoryDto { private OperationType operationType; /** When the operation completed, as recorded by the complete endpoint. */ - private Instant submittedAt; + private Instant completedAt; /** {@code SUCCESS} or {@code FAILED}. */ private OperationHistoryStatus status; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java index 51aa8a712..64c99061a 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java @@ -40,6 +40,7 @@ public static class SnapshotMetrics { @Builder(toBuilder = true) @NoArgsConstructor @AllArgsConstructor + @JsonIgnoreProperties(ignoreUnknown = true) public static class CommitDelta { private Long numFilesAdded; private Long numFilesDeleted; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java index a668af434..81dd6b802 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java @@ -18,7 +18,7 @@ public class TableStatsDto { private String tableUuid; /** Denormalized database name for display. */ - private String databaseId; + private String databaseName; /** Denormalized table name for display. */ private String tableName; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java index 0604e07de..4a994fdb3 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java @@ -13,14 +13,14 @@ @AllArgsConstructor public class TableStatsHistoryDto { - /** Auto-increment primary key. */ - private Long id; + /** UUID primary key set by the caller. */ + private String id; /** Stable Iceberg table UUID. */ private String tableUuid; /** Denormalized database name for display. */ - private String databaseId; + private String databaseName; /** Denormalized table name for display. */ private String tableName; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java index 721c3deaf..02290bad5 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java @@ -9,7 +9,7 @@ /** * Request body for {@code PUT /v1/table-stats/{tableUuid}}. * - *

{@code tableUuid} comes from the path variable. {@code databaseId} and {@code tableName} are + *

{@code tableUuid} comes from the path variable. {@code databaseName} and {@code tableName} are * denormalized display columns carried in the body. */ @Data @@ -19,7 +19,7 @@ public class UpsertTableStatsRequest { /** Denormalized database name for display. */ - private String databaseId; + private String databaseName; /** Denormalized table name for display. */ private String tableName; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java index e7493024c..6ac5db173 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java @@ -34,7 +34,7 @@ indexes = { @Index(name = "idx_table_uuid_hist", columnList = "table_uuid"), @Index(name = "idx_op_type_hist", columnList = "operation_type"), - @Index(name = "idx_submitted_at", columnList = "submitted_at"), + @Index(name = "idx_completed_at", columnList = "completed_at"), @Index(name = "idx_status_hist", columnList = "status"), @Index(name = "idx_job_id", columnList = "job_id") }) @@ -53,10 +53,10 @@ public class TableOperationsHistoryRow { @Column(name = "table_uuid", nullable = false, length = 36) private String tableUuid; - @Column(name = "database_name", nullable = false, length = 255) + @Column(name = "database_name", nullable = false, length = 128) private String databaseName; - @Column(name = "table_name", nullable = false, length = 255) + @Column(name = "table_name", nullable = false, length = 128) private String tableName; @Enumerated(EnumType.STRING) @@ -64,8 +64,8 @@ public class TableOperationsHistoryRow { private OperationType operationType; /** When the operation completed, as recorded by the complete endpoint. */ - @Column(name = "submitted_at", nullable = false) - private Instant submittedAt; + @Column(name = "completed_at", nullable = false) + private Instant completedAt; /** {@code SUCCESS} or {@code FAILED}. */ @Enumerated(EnumType.STRING) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java index e5493b510..43778495a 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java @@ -53,10 +53,10 @@ public class TableOperationsRow { @Column(name = "table_uuid", nullable = false, length = 36) private String tableUuid; - @Column(name = "database_name", nullable = false, length = 255) + @Column(name = "database_name", nullable = false, length = 128) private String databaseName; - @Column(name = "table_name", nullable = false, length = 255) + @Column(name = "table_name", nullable = false, length = 128) private String tableName; @Enumerated(EnumType.STRING) @@ -85,11 +85,4 @@ public class TableOperationsRow { */ @Column(name = "version") private Long version; - - /** - * Reserved for future per-operation metadata. Stored as JSON text; currently unused. The Analyzer - * reads stats directly from {@code table_stats} instead of duplicating them here. - */ - @Column(name = "metrics") - private String metrics; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java index 85d97a5eb..b0d92fc81 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java @@ -5,8 +5,6 @@ import java.time.Instant; import javax.persistence.Column; import javax.persistence.Entity; -import javax.persistence.GeneratedValue; -import javax.persistence.GenerationType; import javax.persistence.Id; import javax.persistence.Index; import javax.persistence.Table; @@ -42,17 +40,16 @@ public class TableStatsHistoryRow { @Id - @GeneratedValue(strategy = GenerationType.IDENTITY) - @Column(name = "id", nullable = false) - private Long id; + @Column(name = "id", nullable = false, length = 36) + private String id; @Column(name = "table_uuid", nullable = false, length = 36) private String tableUuid; - @Column(name = "database_id", nullable = false, length = 255) - private String databaseId; + @Column(name = "database_name", nullable = false, length = 128) + private String databaseName; - @Column(name = "table_name", nullable = false, length = 255) + @Column(name = "table_name", nullable = false, length = 128) private String tableName; @Type(type = "json") diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java index 71d6a9421..f682a3485 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java @@ -37,10 +37,10 @@ public class TableStatsRow { @Column(name = "table_uuid", nullable = false, length = 36) private String tableUuid; - @Column(name = "database_id", nullable = false, length = 255) - private String databaseId; + @Column(name = "database_name", nullable = false, length = 128) + private String databaseName; - @Column(name = "table_name", nullable = false, length = 255) + @Column(name = "table_name", nullable = false, length = 128) private String tableName; @Type(type = "json") diff --git a/services/optimizer/src/main/resources/db/optimizer-schema.sql b/services/optimizer/src/main/resources/db/optimizer-schema.sql index 098380e7f..49641efe2 100644 --- a/services/optimizer/src/main/resources/db/optimizer-schema.sql +++ b/services/optimizer/src/main/resources/db/optimizer-schema.sql @@ -3,22 +3,22 @@ CREATE TABLE IF NOT EXISTS table_operations ( id VARCHAR(36) NOT NULL, table_uuid VARCHAR(36) NOT NULL, - database_name VARCHAR(255) NOT NULL, - table_name VARCHAR(255) NOT NULL, + database_name VARCHAR(128) NOT NULL, + table_name VARCHAR(128) NOT NULL, operation_type VARCHAR(50) NOT NULL, status VARCHAR(20) NOT NULL, created_at TIMESTAMP(6) NOT NULL, scheduled_at TIMESTAMP(6), job_id VARCHAR(255), version BIGINT, - metrics TEXT, + -- TODO: per-operation metric columns will be added as operations are onboarded. PRIMARY KEY (id) ); CREATE TABLE IF NOT EXISTS table_stats ( table_uuid VARCHAR(36) NOT NULL, - database_id VARCHAR(255) NOT NULL, - table_name VARCHAR(255) NOT NULL, + database_name VARCHAR(128) NOT NULL, + table_name VARCHAR(128) NOT NULL, stats TEXT, table_properties TEXT, updated_at TIMESTAMP(6) NOT NULL, @@ -26,10 +26,10 @@ CREATE TABLE IF NOT EXISTS table_stats ( ); CREATE TABLE IF NOT EXISTS table_stats_history ( - id BIGINT NOT NULL AUTO_INCREMENT, + id VARCHAR(36) NOT NULL, table_uuid VARCHAR(36) NOT NULL, - database_id VARCHAR(255) NOT NULL, - table_name VARCHAR(255) NOT NULL, + database_name VARCHAR(128) NOT NULL, + table_name VARCHAR(128) NOT NULL, stats TEXT, recorded_at TIMESTAMP(6) NOT NULL, PRIMARY KEY (id), @@ -40,10 +40,10 @@ CREATE TABLE IF NOT EXISTS table_stats_history ( CREATE TABLE IF NOT EXISTS table_operations_history ( id VARCHAR(36) NOT NULL, table_uuid VARCHAR(36) NOT NULL, - database_name VARCHAR(255) NOT NULL, - table_name VARCHAR(255) NOT NULL, + database_name VARCHAR(128) NOT NULL, + table_name VARCHAR(128) NOT NULL, operation_type VARCHAR(50) NOT NULL, - submitted_at TIMESTAMP(6) NOT NULL, + completed_at TIMESTAMP(6) NOT NULL, status VARCHAR(20) NOT NULL, job_id VARCHAR(255), result TEXT, From a109f0231d2edc546b4a1f630ad4e986c14ade02 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Fri, 1 May 2026 10:16:45 -0700 Subject: [PATCH 014/104] fix(optimizer): propagate optimizer-0 renames into repos and tests - Repositories: update JPQL and parameter names to match the renamed entity fields (databaseName, completedAt). Change TableOperationsHistoryRepository and TableStatsHistoryRepository ID type parameter from Long to String to match the entity PK (UUID set by the caller, not auto-generated). - Tests: update builders and getters to use the renamed fields (databaseName, completedAt). Replace the autoIncrementId test with callerSetIdIsPreserved which verifies the caller-set UUID round-trips through save/findById. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../TableOperationsHistoryRepository.java | 13 +++--- .../TableStatsHistoryRepository.java | 2 +- .../repository/TableStatsRepository.java | 4 +- .../TableOperationsHistoryRepositoryTest.java | 20 +++++----- .../TableStatsHistoryRepositoryTest.java | 40 +++++++++++++++---- .../repository/TableStatsRepositoryTest.java | 18 ++++----- 6 files changed, 61 insertions(+), 36 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java index 71ab1cde4..65d62818c 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java @@ -12,14 +12,15 @@ import org.springframework.stereotype.Repository; /** - * Repository for {@link TableOperationsHistoryRow}. Append-only; PK is auto-increment {@code id}. + * Repository for {@link TableOperationsHistoryRow}. Append-only; PK is the UUID set by the caller + * (same UUID as the originating {@code table_operations.id}). */ @Repository public interface TableOperationsHistoryRepository - extends JpaRepository { + extends JpaRepository { /** - * Return history rows matching the given filters, ordered by {@code submittedAt} descending. + * Return history rows matching the given filters, ordered by {@code completedAt} descending. * Every parameter is optional — pass {@code null} to skip that filter. */ @Query( @@ -29,9 +30,9 @@ public interface TableOperationsHistoryRepository + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid) " + "AND (:operationType IS NULL OR r.operationType = :operationType) " + "AND (:status IS NULL OR r.status = :status) " - + "AND (:since IS NULL OR r.submittedAt >= :since) " - + "AND (:until IS NULL OR r.submittedAt <= :until) " - + "ORDER BY r.submittedAt DESC") + + "AND (:since IS NULL OR r.completedAt >= :since) " + + "AND (:until IS NULL OR r.completedAt <= :until) " + + "ORDER BY r.completedAt DESC") List find( @Param("databaseName") String databaseName, @Param("tableName") String tableName, diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java index 767d60c22..aaa1b0050 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java @@ -9,7 +9,7 @@ import org.springframework.data.repository.query.Param; /** Append-only repository for per-commit stats history rows. */ -public interface TableStatsHistoryRepository extends JpaRepository { +public interface TableStatsHistoryRepository extends JpaRepository { /** * Return history rows for a table, newest first. Pass {@code null} for {@code since} to skip the diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java index ecae70feb..9bcaab41b 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java @@ -15,11 +15,11 @@ public interface TableStatsRepository extends JpaRepository find( - @Param("databaseId") String databaseId, + @Param("databaseName") String databaseName, @Param("tableName") String tableName, @Param("tableUuid") String tableUuid); } diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java index 1a35a8fda..b9735a617 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java @@ -36,7 +36,7 @@ void appendAndFindByTableUuid() { .databaseName("db1") .tableName("tbl1") .operationType(OperationType.ORPHAN_FILES_DELETION) - .submittedAt(t1) + .completedAt(t1) .status(OperationHistoryStatus.SUCCESS) .jobId("job-001") .build()); @@ -48,7 +48,7 @@ void appendAndFindByTableUuid() { .databaseName("db1") .tableName("tbl1") .operationType(OperationType.ORPHAN_FILES_DELETION) - .submittedAt(t2) + .completedAt(t2) .status(OperationHistoryStatus.FAILED) .jobId("job-002") .result(JobResult.builder().errorMessage("out of memory").errorType("OOM").build()) @@ -75,7 +75,7 @@ void appendIsNonDestructive_multipleRunsRetained() { .databaseName("db1") .tableName("tbl2") .operationType(OperationType.ORPHAN_FILES_DELETION) - .submittedAt(now.plusSeconds(i)) + .completedAt(now.plusSeconds(i)) .status(OperationHistoryStatus.SUCCESS) .build()); } @@ -97,7 +97,7 @@ void find_respectsLimit() { .databaseName("db1") .tableName("tbl3") .operationType(OperationType.ORPHAN_FILES_DELETION) - .submittedAt(now.plusSeconds(i)) + .completedAt(now.plusSeconds(i)) .status(OperationHistoryStatus.SUCCESS) .build()); } @@ -120,7 +120,7 @@ void find_noParams_returnsAll() { .databaseName("db1") .tableName("tbl1") .operationType(OperationType.ORPHAN_FILES_DELETION) - .submittedAt(now) + .completedAt(now) .status(OperationHistoryStatus.SUCCESS) .build()); repository.save( @@ -130,7 +130,7 @@ void find_noParams_returnsAll() { .databaseName("db2") .tableName("tbl2") .operationType(OperationType.ORPHAN_FILES_DELETION) - .submittedAt(now.plusSeconds(1)) + .completedAt(now.plusSeconds(1)) .status(OperationHistoryStatus.FAILED) .build()); @@ -154,7 +154,7 @@ void find_byStatusAndTimeWindow() { .databaseName("db1") .tableName("tbl1") .operationType(OperationType.ORPHAN_FILES_DELETION) - .submittedAt(old) + .completedAt(old) .status(OperationHistoryStatus.SUCCESS) .build()); repository.save( @@ -164,7 +164,7 @@ void find_byStatusAndTimeWindow() { .databaseName("db1") .tableName("tbl1") .operationType(OperationType.ORPHAN_FILES_DELETION) - .submittedAt(recent) + .completedAt(recent) .status(OperationHistoryStatus.FAILED) .build()); @@ -180,13 +180,13 @@ void find_byStatusAndTimeWindow() { null, PageRequest.of(0, 100)); assertThat(failed).hasSize(1); - assertThat(failed.get(0).getSubmittedAt()).isEqualTo(recent); + assertThat(failed.get(0).getCompletedAt()).isEqualTo(recent); // Filter by time window Instant cutoff = Instant.parse("2024-03-01T00:00:00Z"); List afterCutoff = repository.find(null, null, null, null, null, cutoff, null, PageRequest.of(0, 100)); assertThat(afterCutoff).hasSize(1); - assertThat(afterCutoff.get(0).getSubmittedAt()).isEqualTo(recent); + assertThat(afterCutoff.get(0).getCompletedAt()).isEqualTo(recent); } } diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java index a76c7155d..f3e72b52e 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java @@ -84,28 +84,52 @@ void find_isolatesByTableUuid() { } @Test - void autoIncrementId() { + void callerSetIdIsPreserved() { String tableUuid = UUID.randomUUID().toString(); + String id1 = UUID.randomUUID().toString(); + String id2 = UUID.randomUUID().toString(); Instant now = Instant.now(); - TableStatsHistoryRow row1 = repository.save(buildRow(tableUuid, "db1", "tbl1", 1L, 0L, now)); - TableStatsHistoryRow row2 = repository.save(buildRow(tableUuid, "db1", "tbl1", 2L, 0L, now)); + TableStatsHistoryRow row1 = + repository.save(buildRow(id1, tableUuid, "db1", "tbl1", 1L, 0L, now)); + TableStatsHistoryRow row2 = + repository.save(buildRow(id2, tableUuid, "db1", "tbl1", 2L, 0L, now)); - assertThat(row1.getId()).isNotNull(); - assertThat(row2.getId()).isNotNull(); - assertThat(row2.getId()).isGreaterThan(row1.getId()); + assertThat(row1.getId()).isEqualTo(id1); + assertThat(row2.getId()).isEqualTo(id2); + assertThat(repository.findById(id1)).isPresent(); + assertThat(repository.findById(id2)).isPresent(); } private static TableStatsHistoryRow buildRow( String tableUuid, - String databaseId, + String databaseName, + String tableName, + long numFilesAdded, + long numFilesDeleted, + Instant recordedAt) { + return buildRow( + UUID.randomUUID().toString(), + tableUuid, + databaseName, + tableName, + numFilesAdded, + numFilesDeleted, + recordedAt); + } + + private static TableStatsHistoryRow buildRow( + String id, + String tableUuid, + String databaseName, String tableName, long numFilesAdded, long numFilesDeleted, Instant recordedAt) { return TableStatsHistoryRow.builder() + .id(id) .tableUuid(tableUuid) - .databaseId(databaseId) + .databaseName(databaseName) .tableName(tableName) .stats( TableStats.builder() diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java index a8ac1cbbb..b62371f53 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java @@ -34,7 +34,7 @@ void saveAndFindById() { repository.save( TableStatsRow.builder() .tableUuid(tableUuid) - .databaseId("db1") + .databaseName("db1") .tableName("tbl1") .stats(stats) .tableProperties(Map.of("maintenance.optimizer.ofd.enabled", "true")) @@ -43,7 +43,7 @@ void saveAndFindById() { Optional found = repository.findById(tableUuid); assertThat(found).isPresent(); - assertThat(found.get().getDatabaseId()).isEqualTo("db1"); + assertThat(found.get().getDatabaseName()).isEqualTo("db1"); assertThat(found.get().getStats().getSnapshot().getTableSizeBytes()).isEqualTo(1024L); assertThat(found.get().getTableProperties()) .containsEntry("maintenance.optimizer.ofd.enabled", "true"); @@ -56,7 +56,7 @@ void upsert_overwritesPreviousStats() { repository.save( TableStatsRow.builder() .tableUuid(tableUuid) - .databaseId("db1") + .databaseName("db1") .tableName("tbl1") .stats( TableStats.builder() @@ -68,7 +68,7 @@ void upsert_overwritesPreviousStats() { repository.save( TableStatsRow.builder() .tableUuid(tableUuid) - .databaseId("db1") + .databaseName("db1") .tableName("tbl1") .stats( TableStats.builder() @@ -87,7 +87,7 @@ void find_noParams_returnsAll() { repository.save( TableStatsRow.builder() .tableUuid(UUID.randomUUID().toString()) - .databaseId("db1") + .databaseName("db1") .tableName("tbl1") .stats( TableStats.builder() @@ -98,7 +98,7 @@ void find_noParams_returnsAll() { repository.save( TableStatsRow.builder() .tableUuid(UUID.randomUUID().toString()) - .databaseId("db2") + .databaseName("db2") .tableName("tbl2") .stats( TableStats.builder() @@ -115,7 +115,7 @@ void find_byDatabase() { repository.save( TableStatsRow.builder() .tableUuid(UUID.randomUUID().toString()) - .databaseId("db1") + .databaseName("db1") .tableName("tbl1") .stats( TableStats.builder() @@ -126,7 +126,7 @@ void find_byDatabase() { repository.save( TableStatsRow.builder() .tableUuid(UUID.randomUUID().toString()) - .databaseId("db2") + .databaseName("db2") .tableName("tbl2") .stats( TableStats.builder() @@ -136,6 +136,6 @@ void find_byDatabase() { .build()); assertThat(repository.find("db1", null, null)).hasSize(1); - assertThat(repository.find("db1", null, null).get(0).getDatabaseId()).isEqualTo("db1"); + assertThat(repository.find("db1", null, null).get(0).getDatabaseName()).isEqualTo("db1"); } } From df01c262d3ebfd4e0fcdc3f003e1ebab3ba90220 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Fri, 1 May 2026 10:22:50 -0700 Subject: [PATCH 015/104] fix(optimizer): propagate optimizer-0 renames into service + controller - Service impl: rename databaseId -> databaseName in builder calls and method signatures (listTableStats); rename submittedAt -> completedAt for the history-row build path. Generate a UUID for the TableStatsHistoryRow on insert now that id is no longer DB-allocated. - Service interface: rename listTableStats parameter databaseId -> databaseName. - TableStatsController: rename the databaseId query parameter to databaseName to match the service signature. - Service test: rename builder/getter usages and the timestamp assertion (getSubmittedAt -> getCompletedAt). Co-Authored-By: Claude Opus 4.7 (1M context) --- .../api/controller/TableStatsController.java | 4 ++-- .../optimizer/service/OptimizerDataService.java | 2 +- .../service/OptimizerDataServiceImpl.java | 17 ++++++++++------- .../service/OptimizerDataServiceImplTest.java | 10 +++++----- 4 files changed, 18 insertions(+), 15 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java index d469586a2..36e49055b 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java @@ -49,10 +49,10 @@ public ResponseEntity getTableStats(@PathVariable String tableUui */ @GetMapping public ResponseEntity> listTableStats( - @RequestParam(required = false) String databaseId, + @RequestParam(required = false) String databaseName, @RequestParam(required = false) String tableName, @RequestParam(required = false) String tableUuid) { - return ResponseEntity.ok(service.listTableStats(databaseId, tableName, tableUuid)); + return ResponseEntity.ok(service.listTableStats(databaseName, tableName, tableUuid)); } /** diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java index ce3120400..dd2b2fd58 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java @@ -58,7 +58,7 @@ Optional completeOperation( * List stats rows matching the given filters. Every parameter is optional — pass {@code null} to * skip that filter. No filters returns all rows. */ - List listTableStats(String databaseId, String tableName, String tableUuid); + List listTableStats(String databaseName, String tableName, String tableUuid); /** * Return per-commit stats history for {@code tableUuid}, newest first. diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java index 629853156..285cea914 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java @@ -20,6 +20,7 @@ import java.time.Instant; import java.util.List; import java.util.Optional; +import java.util.UUID; import java.util.stream.Collectors; import lombok.RequiredArgsConstructor; import org.springframework.data.domain.PageRequest; @@ -67,7 +68,7 @@ public Optional completeOperation( .databaseName(row.getDatabaseName()) .tableName(row.getTableName()) .operationType(row.getOperationType()) - .submittedAt(Instant.now()) + .completedAt(Instant.now()) .status(request.getStatus()) .jobId(row.getJobId()) .result(request.getResult()) @@ -94,7 +95,7 @@ public TableStatsDto upsertTableStats(String tableUuid, UpsertTableStatsRequest existing -> existing .toBuilder() - .databaseId(request.getDatabaseId()) + .databaseName(request.getDatabaseName()) .tableName(request.getTableName()) .stats(request.getStats()) .tableProperties(request.getTableProperties()) @@ -103,7 +104,7 @@ public TableStatsDto upsertTableStats(String tableUuid, UpsertTableStatsRequest .orElse( TableStatsRow.builder() .tableUuid(tableUuid) - .databaseId(request.getDatabaseId()) + .databaseName(request.getDatabaseName()) .tableName(request.getTableName()) .stats(request.getStats()) .tableProperties(request.getTableProperties()) @@ -113,8 +114,9 @@ public TableStatsDto upsertTableStats(String tableUuid, UpsertTableStatsRequest statsHistoryRepository.save( TableStatsHistoryRow.builder() + .id(UUID.randomUUID().toString()) .tableUuid(tableUuid) - .databaseId(request.getDatabaseId()) + .databaseName(request.getDatabaseName()) .tableName(request.getTableName()) .stats(request.getStats()) .recordedAt(now) @@ -129,8 +131,9 @@ public Optional getTableStats(String tableUuid) { } @Override - public List listTableStats(String databaseId, String tableName, String tableUuid) { - return statsRepository.find(databaseId, tableName, tableUuid).stream() + public List listTableStats( + String databaseName, String tableName, String tableUuid) { + return statsRepository.find(databaseName, tableName, tableUuid).stream() .map(mapper::toDto) .collect(Collectors.toList()); } @@ -154,7 +157,7 @@ public TableOperationsHistoryDto appendHistory(TableOperationsHistoryDto dto) { .databaseName(dto.getDatabaseName()) .tableName(dto.getTableName()) .operationType(dto.getOperationType()) - .submittedAt(dto.getSubmittedAt() != null ? dto.getSubmittedAt() : Instant.now()) + .completedAt(dto.getCompletedAt() != null ? dto.getCompletedAt() : Instant.now()) .status(dto.getStatus()) .jobId(dto.getJobId()) .result(dto.getResult()) diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java index 244acb204..10605c002 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java @@ -67,7 +67,7 @@ void completeOperation_writesHistoryFromOperationRow() { assertThat(result.get().getJobId()).isEqualTo("spark-job-123"); assertThat(result.get().getOperationType()).isEqualTo(OperationType.ORPHAN_FILES_DELETION); assertThat(result.get().getDatabaseName()).isEqualTo("db1"); - assertThat(result.get().getSubmittedAt()).isNotNull(); + assertThat(result.get().getCompletedAt()).isNotNull(); } @Test @@ -98,14 +98,14 @@ void upsertTableStats_createsNewRow() { service.upsertTableStats( tableUuid, UpsertTableStatsRequest.builder() - .databaseId("db1") + .databaseName("db1") .tableName("tbl1") .stats(stats) .tableProperties(Map.of("maintenance.optimizer.ofd.enabled", "true")) .build()); assertThat(dto.getTableUuid()).isEqualTo(tableUuid); - assertThat(dto.getDatabaseId()).isEqualTo("db1"); + assertThat(dto.getDatabaseName()).isEqualTo("db1"); assertThat(dto.getStats().getSnapshot().getTableSizeBytes()).isEqualTo(1024L); assertThat(dto.getTableProperties()).containsEntry("maintenance.optimizer.ofd.enabled", "true"); assertThat(statsRepository.findById(tableUuid)).isPresent(); @@ -128,7 +128,7 @@ void upsertTableStats_updatesExistingRow_andAppendsHistory() { service.upsertTableStats( tableUuid, UpsertTableStatsRequest.builder() - .databaseId("db1") + .databaseName("db1") .tableName("tbl1") .stats(firstStats) .build()); @@ -136,7 +136,7 @@ void upsertTableStats_updatesExistingRow_andAppendsHistory() { service.upsertTableStats( tableUuid, UpsertTableStatsRequest.builder() - .databaseId("db1") + .databaseName("db1") .tableName("tbl1") .stats(secondStats) .build()); From 11dd115b3c72b637d5ff7e7232be7b20dcb8704c Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Fri, 1 May 2026 10:25:55 -0700 Subject: [PATCH 016/104] fix(optimizer): propagate optimizer-0 renames into apps/optimizer + analyzer - apps/optimizer shared module: rename databaseId -> databaseName in TableStatsRow + TableStatsRepository; submittedAt -> completedAt in TableOperationHistoryRow + TableOperationHistoryRepository; tighten database_name and table_name from VARCHAR(255) to VARCHAR(128). - Analyzer Table model: rename databaseId -> databaseName so the domain object matches the underlying entity. Update Table.from factory and downstream usages in AnalyzerRunner, TableOperation, and CadencePolicy (which now reads completedAt off history rows). - Analyzer tests: update Table builder and TableOperationHistoryRow builder usages to the renamed fields. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../openhouse/analyzer/AnalyzerRunner.java | 2 +- .../openhouse/analyzer/CadencePolicy.java | 4 ++-- .../openhouse/analyzer/model/Table.java | 4 ++-- .../analyzer/model/TableOperation.java | 2 +- .../analyzer/AnalyzerRunnerTest.java | 20 +++++++++---------- .../OrphanFilesDeletionAnalyzerTest.java | 6 +++--- .../entity/TableOperationHistoryRow.java | 4 ++-- .../optimizer/entity/TableOperationRow.java | 4 ++-- .../optimizer/entity/TableStatsRow.java | 6 +++--- .../TableOperationHistoryRepository.java | 6 +++--- .../repository/TableStatsRepository.java | 4 ++-- 11 files changed, 31 insertions(+), 31 deletions(-) diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java index 5ad653e97..343a0712e 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java @@ -121,7 +121,7 @@ public void analyze( log.info( "Created PENDING {} operation for table {}.{}", analyzer.getOperationType(), - table.getDatabaseId(), + table.getDatabaseName(), table.getTableId()); } })); diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadencePolicy.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadencePolicy.java index a66a7b072..0590c2045 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadencePolicy.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadencePolicy.java @@ -78,9 +78,9 @@ private boolean decideFromHistory(Optional latestHisto private boolean decideFromHistoryEntry(TableOperationHistoryRow entry) { switch (entry.getStatus()) { case "SUCCESS": - return pastInterval(entry.getSubmittedAt(), successRetryInterval); + return pastInterval(entry.getCompletedAt(), successRetryInterval); case "FAILED": - return pastInterval(entry.getSubmittedAt(), failureRetryInterval); + return pastInterval(entry.getCompletedAt(), failureRetryInterval); default: return true; } diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/Table.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/Table.java index d170f29dd..45e02fd60 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/Table.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/Table.java @@ -21,7 +21,7 @@ public class Table { private String tableUuid; - private String databaseId; + private String databaseName; private String tableId; @Builder.Default private Map tableProperties = Collections.emptyMap(); @@ -32,7 +32,7 @@ public class Table { public static Table from(TableStatsRow row) { return Table.builder() .tableUuid(row.getTableUuid()) - .databaseId(row.getDatabaseId()) + .databaseName(row.getDatabaseName()) .tableId(row.getTableName()) .tableProperties( row.getTableProperties() != null ? row.getTableProperties() : Collections.emptyMap()) diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/TableOperation.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/TableOperation.java index 5a81a3848..97f4b9f96 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/TableOperation.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/TableOperation.java @@ -59,7 +59,7 @@ public static TableOperation pending(Table table, String operationType) { TableOperation op = new TableOperation(); op.id = UUID.randomUUID().toString(); op.tableUuid = table.getTableUuid(); - op.databaseName = table.getDatabaseId(); + op.databaseName = table.getDatabaseName(); op.tableName = table.getTableId(); op.operationType = operationType; op.status = "PENDING"; diff --git a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java index 29fd20e50..2f3eee3d7 100644 --- a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java +++ b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java @@ -48,11 +48,11 @@ void setUp() { void analyze_insertsNewRow_forEligibleTableWithNoExistingOp() { TableStatsRow statsEntity = new TableStatsRow(); statsEntity.setTableUuid("uuid-1"); - statsEntity.setDatabaseId("db1"); + statsEntity.setDatabaseName("db1"); statsEntity.setTableName("tbl1"); Table expectedTable = - Table.builder().tableUuid("uuid-1").databaseId("db1").tableId("tbl1").build(); + Table.builder().tableUuid("uuid-1").databaseName("db1").tableId("tbl1").build(); when(statsRepo.find(null, null, null)).thenReturn(List.of(statsEntity)); when(analyzer.getOperationType()).thenReturn("ORPHAN_FILES_DELETION"); @@ -83,11 +83,11 @@ void analyze_insertsNewRow_forEligibleTableWithNoExistingOp() { void analyze_noOp_whenCadencePolicyReturnsFalseForPending() { TableStatsRow statsEntity = new TableStatsRow(); statsEntity.setTableUuid("uuid-1"); - statsEntity.setDatabaseId("db1"); + statsEntity.setDatabaseName("db1"); statsEntity.setTableName("tbl1"); Table expectedTable = - Table.builder().tableUuid("uuid-1").databaseId("db1").tableId("tbl1").build(); + Table.builder().tableUuid("uuid-1").databaseName("db1").tableId("tbl1").build(); TableOperationRow existingEntity = new TableOperationRow(); existingEntity.setId("existing-op-id"); @@ -185,11 +185,11 @@ void analyze_skipsTable_whenTableUuidIsNull() { void analyze_skipsTable_whenCircuitBreakerTrips() { TableStatsRow statsEntity = new TableStatsRow(); statsEntity.setTableUuid("uuid-1"); - statsEntity.setDatabaseId("db1"); + statsEntity.setDatabaseName("db1"); statsEntity.setTableName("tbl1"); Table expectedTable = - Table.builder().tableUuid("uuid-1").databaseId("db1").tableId("tbl1").build(); + Table.builder().tableUuid("uuid-1").databaseName("db1").tableId("tbl1").build(); List failures = IntStream.range(0, 3) @@ -199,7 +199,7 @@ void analyze_skipsTable_whenCircuitBreakerTrips() { .id("fail-" + i) .tableUuid("uuid-1") .operationType("ORPHAN_FILES_DELETION") - .submittedAt(Instant.now().minusSeconds(i * 60)) + .completedAt(Instant.now().minusSeconds(i * 60)) .status("FAILED") .build()) .collect(Collectors.toList()); @@ -225,11 +225,11 @@ void analyze_skipsTable_whenCircuitBreakerTrips() { void analyze_doesNotTrip_whenFewerFailuresThanThreshold() { TableStatsRow statsEntity = new TableStatsRow(); statsEntity.setTableUuid("uuid-1"); - statsEntity.setDatabaseId("db1"); + statsEntity.setDatabaseName("db1"); statsEntity.setTableName("tbl1"); Table expectedTable = - Table.builder().tableUuid("uuid-1").databaseId("db1").tableId("tbl1").build(); + Table.builder().tableUuid("uuid-1").databaseName("db1").tableId("tbl1").build(); List failures = IntStream.range(0, 3) @@ -239,7 +239,7 @@ void analyze_doesNotTrip_whenFewerFailuresThanThreshold() { .id("fail-" + i) .tableUuid("uuid-1") .operationType("ORPHAN_FILES_DELETION") - .submittedAt(Instant.now().minusSeconds(i * 60)) + .completedAt(Instant.now().minusSeconds(i * 60)) .status("FAILED") .build()) .collect(Collectors.toList()); diff --git a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/OrphanFilesDeletionAnalyzerTest.java b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/OrphanFilesDeletionAnalyzerTest.java index 171846ff8..f0e915059 100644 --- a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/OrphanFilesDeletionAnalyzerTest.java +++ b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/OrphanFilesDeletionAnalyzerTest.java @@ -217,7 +217,7 @@ private Table tableWithProperty(String value) { : Map.of(OrphanFilesDeletionAnalyzer.OFD_ENABLED_PROPERTY, value); return Table.builder() .tableUuid("test-uuid") - .databaseId("db1") + .databaseName("db1") .tableId("tbl1") .tableProperties(props) .build(); @@ -230,12 +230,12 @@ private TableOperation opWithStatus(String status, Instant scheduledAt) { return op; } - private TableOperationHistoryRow historyWithStatus(String status, Instant submittedAt) { + private TableOperationHistoryRow historyWithStatus(String status, Instant completedAt) { return TableOperationHistoryRow.builder() .id("hist-id") .tableUuid("test-uuid") .operationType("ORPHAN_FILES_DELETION") - .submittedAt(submittedAt) + .completedAt(completedAt) .status(status) .build(); } diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationHistoryRow.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationHistoryRow.java index 4e638e2e1..b05df0f1c 100644 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationHistoryRow.java +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationHistoryRow.java @@ -29,8 +29,8 @@ public class TableOperationHistoryRow { @Column(name = "operation_type", nullable = false, length = 50) private String operationType; - @Column(name = "submitted_at", nullable = false) - private Instant submittedAt; + @Column(name = "completed_at", nullable = false) + private Instant completedAt; @Column(name = "status", nullable = false, length = 20) private String status; diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationRow.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationRow.java index fc0104604..33a83bd3f 100644 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationRow.java +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationRow.java @@ -28,10 +28,10 @@ public class TableOperationRow { @Column(name = "table_uuid", nullable = false, length = 36) private String tableUuid; - @Column(name = "database_name", nullable = false, length = 255) + @Column(name = "database_name", nullable = false, length = 128) private String databaseName; - @Column(name = "table_name", nullable = false, length = 255) + @Column(name = "table_name", nullable = false, length = 128) private String tableName; @Column(name = "operation_type", nullable = false, length = 50) diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java index 5cdf16a97..bc647d86e 100644 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java @@ -34,10 +34,10 @@ public class TableStatsRow { @Column(name = "table_uuid", nullable = false, length = 36) private String tableUuid; - @Column(name = "database_id", nullable = false, length = 255) - private String databaseId; + @Column(name = "database_name", nullable = false, length = 128) + private String databaseName; - @Column(name = "table_name", nullable = false, length = 255) + @Column(name = "table_name", nullable = false, length = 128) private String tableName; @Type(type = "json") diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java index f2ea9e3c8..fd9edd1f4 100644 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java @@ -13,7 +13,7 @@ public interface TableOperationHistoryRepository extends JpaRepository { /** - * Return history rows matching the given filters, ordered by {@code submittedAt} descending. + * Return history rows matching the given filters, ordered by {@code completedAt} descending. * Every parameter is optional — pass {@code null} to skip that filter. */ @Query( @@ -21,8 +21,8 @@ public interface TableOperationHistoryRepository + "WHERE (:operationType IS NULL OR r.operationType = :operationType) " + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid) " + "AND (:status IS NULL OR r.status = :status) " - + "AND (:since IS NULL OR r.submittedAt >= :since) " - + "ORDER BY r.submittedAt DESC") + + "AND (:since IS NULL OR r.completedAt >= :since) " + + "ORDER BY r.completedAt DESC") List find( @Param("operationType") String operationType, @Param("tableUuid") String tableUuid, diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java index 6effe19c2..50f515d07 100644 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java @@ -15,11 +15,11 @@ public interface TableStatsRepository extends JpaRepository find( - @Param("databaseId") String databaseId, + @Param("databaseName") String databaseName, @Param("tableName") String tableName, @Param("tableUuid") String tableUuid); } From 027fccd61c362c1d9b3e2902583579b34d1907f7 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Fri, 1 May 2026 11:01:56 -0700 Subject: [PATCH 017/104] fix(optimizer): add databaseName + tableName to apps/optimizer history row Address PR #530 review feedback: the lightweight read-side TableOperationHistoryRow in the apps/optimizer shared module did not surface the denormalized database_name and table_name columns, even though the underlying schema carries them. Add them so analyst-style queries from the analyzer/scheduler side can read operation history without joining back to table_operations. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../optimizer/entity/TableOperationHistoryRow.java | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationHistoryRow.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationHistoryRow.java index 4e638e2e1..4e3ace953 100644 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationHistoryRow.java +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationHistoryRow.java @@ -26,6 +26,12 @@ public class TableOperationHistoryRow { @Column(name = "table_uuid", nullable = false, length = 36) private String tableUuid; + @Column(name = "database_name", nullable = false, length = 128) + private String databaseName; + + @Column(name = "table_name", nullable = false, length = 128) + private String tableName; + @Column(name = "operation_type", nullable = false, length = 50) private String operationType; From 79753f1da1ae63f84de9b127d1f7cac301a6666b Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Fri, 1 May 2026 14:03:10 -0700 Subject: [PATCH 018/104] fix(optimizer): index table_operations_history on (database_name, table_name) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a composite secondary index on (database_name, table_name) to table_operations_history at the schema and entity layers. This backs a new name-based history-lookup endpoint added on optimizer-2; without the index, the query degrades to a full scan on a table that grows with every operation completion. The other three optimizer tables get no new indexes — no new query patterns on them this round. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../openhouse/optimizer/entity/TableOperationsHistoryRow.java | 3 ++- services/optimizer/src/main/resources/db/optimizer-schema.sql | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java index 6ac5db173..3b6ced892 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java @@ -36,7 +36,8 @@ @Index(name = "idx_op_type_hist", columnList = "operation_type"), @Index(name = "idx_completed_at", columnList = "completed_at"), @Index(name = "idx_status_hist", columnList = "status"), - @Index(name = "idx_job_id", columnList = "job_id") + @Index(name = "idx_job_id", columnList = "job_id"), + @Index(name = "idx_toph_db_table", columnList = "database_name, table_name") }) @Getter @EqualsAndHashCode diff --git a/services/optimizer/src/main/resources/db/optimizer-schema.sql b/services/optimizer/src/main/resources/db/optimizer-schema.sql index 49641efe2..4c2d9604b 100644 --- a/services/optimizer/src/main/resources/db/optimizer-schema.sql +++ b/services/optimizer/src/main/resources/db/optimizer-schema.sql @@ -47,5 +47,6 @@ CREATE TABLE IF NOT EXISTS table_operations_history ( status VARCHAR(20) NOT NULL, job_id VARCHAR(255), result TEXT, - PRIMARY KEY (id) + PRIMARY KEY (id), + INDEX idx_toph_db_table (database_name, table_name) ); From dceef974009ccc0c48cc5df274de4ca85bf74934 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Fri, 1 May 2026 14:04:37 -0700 Subject: [PATCH 019/104] feat(optimizer): unify REST prefix to /v1/optimizer; add name-based history GET - Rename @RequestMapping prefix on the three optimizer controllers to share a /v1/optimizer/... namespace: /v1/table-operations -> /v1/optimizer/operations /v1/table-operations-history -> /v1/optimizer/operations-history /v1/table-stats -> /v1/optimizer/stats - Add TableByNameController hosting human/analyst-oriented name-keyed reads under /v1/optimizer/databases/{databaseName}/ tables/{tableName}. Today it carries one endpoint: GET .../operations-history (lists operation history by name). Other optimizer endpoints stay UUID-keyed because drop-and-recreate of a table produces a new optimizer identity (new stats, new storage, new operation history) and a name-only key would conflate two distinct identities. The new controller is structured for future expansion when more name-based use cases land. Backed by the composite index on table_operations_history (database_name, table_name) added on optimizer-0. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../api/controller/TableByNameController.java | 35 +++++++++++++++++++ .../controller/TableOperationsController.java | 2 +- .../TableOperationsHistoryController.java | 2 +- .../api/controller/TableStatsController.java | 2 +- 4 files changed, 38 insertions(+), 3 deletions(-) create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableByNameController.java diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableByNameController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableByNameController.java new file mode 100644 index 000000000..f1989ef3e --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableByNameController.java @@ -0,0 +1,35 @@ +package com.linkedin.openhouse.optimizer.api.controller; + +import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; +import com.linkedin.openhouse.optimizer.service.OptimizerDataService; +import java.util.List; +import lombok.RequiredArgsConstructor; +import org.springframework.http.ResponseEntity; +import org.springframework.web.bind.annotation.GetMapping; +import org.springframework.web.bind.annotation.PathVariable; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RequestParam; +import org.springframework.web.bind.annotation.RestController; + +/** + * Name-keyed read endpoints for human/analyst convenience. UUID-keyed endpoints elsewhere remain + * the canonical path for machine callers, since drop-and-recreate of a table produces a new + * optimizer identity that a name-only lookup would conflate with the dropped table. + */ +@RestController +@RequestMapping("/v1/optimizer/databases/{databaseName}/tables/{tableName}") +@RequiredArgsConstructor +public class TableByNameController { + + private final OptimizerDataService service; + + /** Operation history for a table by (database, table) name, newest first. */ + @GetMapping("/operations-history") + public ResponseEntity> getOperationsHistoryByName( + @PathVariable String databaseName, + @PathVariable String tableName, + @RequestParam(defaultValue = "100") int limit) { + return ResponseEntity.ok( + service.listHistory(databaseName, tableName, null, null, null, null, null, limit)); + } +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java index d8ba13b11..a0cab4b7c 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java @@ -20,7 +20,7 @@ /** REST controller for {@code table_operations}. */ @RestController -@RequestMapping("/v1/table-operations") +@RequestMapping("/v1/optimizer/operations") @RequiredArgsConstructor public class TableOperationsController { diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java index 11c77a15d..ff4c4a77d 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java @@ -19,7 +19,7 @@ /** REST controller for {@code table_operations_history}. */ @RestController -@RequestMapping("/v1/table-operations-history") +@RequestMapping("/v1/optimizer/operations-history") @RequiredArgsConstructor public class TableOperationsHistoryController { diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java index 36e49055b..4e8624481 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java @@ -18,7 +18,7 @@ /** REST controller for managing per-table stats in the optimizer DB. */ @RestController -@RequestMapping("/v1/table-stats") +@RequestMapping("/v1/optimizer/stats") @RequiredArgsConstructor public class TableStatsController { From bf04488d2ee0f14c0c41095b513c8551333c151d Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Tue, 12 May 2026 12:10:50 -0700 Subject: [PATCH 020/104] fix(optimizer): align apps/optimizer entities with services schema The apps/optimizer shared module was created in this PR with field names and column lengths that did not match the schema established in optimizer-0: - TableStatsRow.databaseId -> databaseName - TableOperationHistoryRow.submittedAt -> completedAt - database_name / table_name VARCHAR(255) -> VARCHAR(128) Repos updated to match (TableStatsRepository param, TableOperationHistoryRepository ORDER BY column). No services/optimizer or schema SQL change needed - those already used the correct names. This change was previously folded into a later commit on optimizer-3; moving it down to the PR that owns these files. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../optimizer/entity/TableOperationHistoryRow.java | 4 ++-- .../openhouse/optimizer/entity/TableOperationRow.java | 4 ++-- .../linkedin/openhouse/optimizer/entity/TableStatsRow.java | 6 +++--- .../repository/TableOperationHistoryRepository.java | 6 +++--- .../optimizer/repository/TableStatsRepository.java | 4 ++-- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationHistoryRow.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationHistoryRow.java index 4e3ace953..d15eb6785 100644 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationHistoryRow.java +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationHistoryRow.java @@ -35,8 +35,8 @@ public class TableOperationHistoryRow { @Column(name = "operation_type", nullable = false, length = 50) private String operationType; - @Column(name = "submitted_at", nullable = false) - private Instant submittedAt; + @Column(name = "completed_at", nullable = false) + private Instant completedAt; @Column(name = "status", nullable = false, length = 20) private String status; diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationRow.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationRow.java index fc0104604..33a83bd3f 100644 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationRow.java +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationRow.java @@ -28,10 +28,10 @@ public class TableOperationRow { @Column(name = "table_uuid", nullable = false, length = 36) private String tableUuid; - @Column(name = "database_name", nullable = false, length = 255) + @Column(name = "database_name", nullable = false, length = 128) private String databaseName; - @Column(name = "table_name", nullable = false, length = 255) + @Column(name = "table_name", nullable = false, length = 128) private String tableName; @Column(name = "operation_type", nullable = false, length = 50) diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java index 5cdf16a97..bc647d86e 100644 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java @@ -34,10 +34,10 @@ public class TableStatsRow { @Column(name = "table_uuid", nullable = false, length = 36) private String tableUuid; - @Column(name = "database_id", nullable = false, length = 255) - private String databaseId; + @Column(name = "database_name", nullable = false, length = 128) + private String databaseName; - @Column(name = "table_name", nullable = false, length = 255) + @Column(name = "table_name", nullable = false, length = 128) private String tableName; @Type(type = "json") diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java index f2ea9e3c8..fd9edd1f4 100644 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java @@ -13,7 +13,7 @@ public interface TableOperationHistoryRepository extends JpaRepository { /** - * Return history rows matching the given filters, ordered by {@code submittedAt} descending. + * Return history rows matching the given filters, ordered by {@code completedAt} descending. * Every parameter is optional — pass {@code null} to skip that filter. */ @Query( @@ -21,8 +21,8 @@ public interface TableOperationHistoryRepository + "WHERE (:operationType IS NULL OR r.operationType = :operationType) " + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid) " + "AND (:status IS NULL OR r.status = :status) " - + "AND (:since IS NULL OR r.submittedAt >= :since) " - + "ORDER BY r.submittedAt DESC") + + "AND (:since IS NULL OR r.completedAt >= :since) " + + "ORDER BY r.completedAt DESC") List find( @Param("operationType") String operationType, @Param("tableUuid") String tableUuid, diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java index 6effe19c2..50f515d07 100644 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java @@ -15,11 +15,11 @@ public interface TableStatsRepository extends JpaRepository find( - @Param("databaseId") String databaseId, + @Param("databaseName") String databaseName, @Param("tableName") String tableName, @Param("tableUuid") String tableUuid); } From 8054586519bab86fbabc0591e02ab141dff3cbfb Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Tue, 12 May 2026 12:15:53 -0700 Subject: [PATCH 021/104] refactor(optimizer-analyzer): delete unused AnalyzerConfig The empty @Configuration class did nothing. @SpringBootApplication on AnalyzerApplication already triggers @ComponentScan, which discovers all @Component-annotated beans without help. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../linkedin/openhouse/analyzer/config/AnalyzerConfig.java | 7 ------- 1 file changed, 7 deletions(-) delete mode 100644 apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/config/AnalyzerConfig.java diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/config/AnalyzerConfig.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/config/AnalyzerConfig.java deleted file mode 100644 index 30ad9f55b..000000000 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/config/AnalyzerConfig.java +++ /dev/null @@ -1,7 +0,0 @@ -package com.linkedin.openhouse.analyzer.config; - -import org.springframework.context.annotation.Configuration; - -/** Spring configuration for the Analyzer. */ -@Configuration -public class AnalyzerConfig {} From 5af5f14ad32815d073ce3dc258abfbf77ce11620 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Tue, 12 May 2026 12:18:11 -0700 Subject: [PATCH 022/104] refactor(optimizer-analyzer): remove circuit breaker, defer with TODO The circuit breaker was hardcoded (threshold=5, no reset, no operator visibility) and forced the AnalyzerRunner to materialize the full history of every (table, operation_type) just to check the last N rows. Cadence policy only needs the single latest history entry; pulling everything was wasted I/O. Changes: - Remove getCircuitBreakerThreshold and isCircuitBroken from OperationAnalyzer. - Add a TODO documenting requirements for the eventual replacement (configurable threshold, exponential-backoff reset, operator-visible signal). - In AnalyzerRunner, fold history loading into a per-(uuid, type) map holding only the most-recent entry; drop the per-table history list and the isCircuitBroken call. - Add a TODO to switch the history scan to a windowed query that returns at most one row per (uuid, type). - Drop the two circuit-breaker tests from AnalyzerRunnerTest. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../openhouse/analyzer/AnalyzerRunner.java | 47 ++++++---- .../openhouse/analyzer/OperationAnalyzer.java | 38 ++------- .../analyzer/AnalyzerRunnerTest.java | 85 ------------------- 3 files changed, 36 insertions(+), 134 deletions(-) diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java index 343a0712e..e48bb241e 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java @@ -7,7 +7,8 @@ import com.linkedin.openhouse.optimizer.repository.TableOperationHistoryRepository; import com.linkedin.openhouse.optimizer.repository.TableOperationsRepository; import com.linkedin.openhouse.optimizer.repository.TableStatsRepository; -import java.util.Collections; +import java.time.Instant; +import java.util.Comparator; import java.util.List; import java.util.Map; import java.util.Optional; @@ -21,16 +22,15 @@ * Core analysis loop. Loads {@code table_stats} rows and evaluates each table against every * registered {@link OperationAnalyzer} in a single pass. * - *

The two sides of the join — current operations and circuit-breaker history — are loaded into - * memory once per run before the table loop. Both are naturally bounded (only tables with active or - * recently failed operations have rows), so holding them in maps is safe at any table scale. + *

Both sides of the join — current operations and latest history per (table, type) — are loaded + * into maps once per run before the table loop. Both are bounded by the number of tables, so + * holding them in memory is safe at any realistic scale. * *

// TODO: Iterate per-database instead of loading all tables at once. This scopes memory usage - * and allows incremental progress. When we go per-db we may still see 10k tables per iteration, but - * that should be fine. + * and allows incremental progress. * - *

// TODO: Add benchmarking and scale tests. Measure memory footprint at 10k tables per - * iteration to validate the in-memory join approach. + *

// TODO: Benchmark memory footprint at 10k tables per iteration to validate the in-memory join + * approach. */ @Slf4j @Component @@ -79,7 +79,10 @@ public void analyze( TableOperation::from, TableOperation::mostRecent)))); - Map>> historyByType = + // TODO(perf): replace this full-history scan with a windowed query that returns at most one + // row per (table_uuid, operation_type) — the analyzer only consumes the latest entry. Today + // this is O(H) per analyzer where H is total history rows; bounded but unnecessary. + Map> latestHistoryByType = activeAnalyzers.stream() .collect( Collectors.toMap( @@ -87,8 +90,12 @@ public void analyze( a -> historyRepo.find(a.getOperationType(), null, null, null, Pageable.unpaged()) .stream() + .filter(r -> r.getTableUuid() != null) .collect( - Collectors.groupingBy(TableOperationHistoryRow::getTableUuid)))); + Collectors.toMap( + TableOperationHistoryRow::getTableUuid, + r -> r, + AnalyzerRunner::moreRecentHistory)))); List

tables = statsRepo.find(databaseName, tableName, tableUuid).stream() @@ -108,14 +115,13 @@ public void analyze( Optional currentOp = Optional.ofNullable( opsByType.get(analyzer.getOperationType()).get(table.getTableUuid())); - List history = - historyByType - .get(analyzer.getOperationType()) - .getOrDefault(table.getTableUuid(), Collections.emptyList()); - Optional latestHistory = history.stream().findFirst(); + Optional latestHistory = + Optional.ofNullable( + latestHistoryByType + .get(analyzer.getOperationType()) + .get(table.getTableUuid())); - if (analyzer.shouldSchedule(table, currentOp, latestHistory) - && !analyzer.isCircuitBroken(table.getTableUuid(), history)) { + if (analyzer.shouldSchedule(table, currentOp, latestHistory)) { TableOperation op = TableOperation.pending(table, analyzer.getOperationType()); operationsRepo.save(op.toRow()); log.info( @@ -128,4 +134,11 @@ public void analyze( log.info("Analysis complete"); } + + private static TableOperationHistoryRow moreRecentHistory( + TableOperationHistoryRow a, TableOperationHistoryRow b) { + Comparator byCompletedAt = + Comparator.comparing(r -> r.getCompletedAt() != null ? r.getCompletedAt() : Instant.EPOCH); + return byCompletedAt.compare(a, b) >= 0 ? a : b; + } } diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OperationAnalyzer.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OperationAnalyzer.java index 731c8127f..0d5fb6770 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OperationAnalyzer.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OperationAnalyzer.java @@ -3,12 +3,17 @@ import com.linkedin.openhouse.analyzer.model.Table; import com.linkedin.openhouse.analyzer.model.TableOperation; import com.linkedin.openhouse.optimizer.entity.TableOperationHistoryRow; -import java.util.List; import java.util.Optional; /** * Strategy interface for a single operation type. Each implementation decides whether a given table * needs an operation recommendation upserted in the Optimizer Service. + * + *

// TODO(circuit-breaker): a chronically-failing table currently produces a new PENDING row on + * every Analyzer pass. Add a circuit breaker that suppresses scheduling for a (table, type) after N + * consecutive FAILED history entries. Requirements: configurable threshold per operation type, + * automatic reset via exponential backoff so tables can recover, and an operator-visible signal + * (metric or query) so tripped breakers are diagnosable. */ public interface OperationAnalyzer { @@ -32,35 +37,4 @@ boolean shouldSchedule( Table table, Optional currentOp, Optional latestHistory); - - /** - * Maximum number of consecutive FAILED history entries before the circuit breaker trips and - * scheduling is suppressed for this (table, operation_type). Override per operation type. Returns - * 0 to disable the circuit breaker. - */ - default int getCircuitBreakerThreshold() { - return 5; - } - - /** - * Returns {@code true} if the circuit breaker has tripped for this table. The default - * implementation checks whether the last N history entries are all FAILED. Individual analyzers - * can override this to implement different strategies (e.g., time-based backoff). - * - *

// TODO: Add circuit breaker reset with exponential backoff so tables can recover - * automatically after a cooldown period instead of staying tripped permanently. - * - *

// TODO: Add a communication path to surface tripped circuit breakers to users (e.g., - * metrics, alerts, or a dashboard query). - * - * @param tableUuid the table whose history to check - * @param history recent history entries for this (table, type), newest first - */ - default boolean isCircuitBroken(String tableUuid, List history) { - int threshold = getCircuitBreakerThreshold(); - if (threshold <= 0 || history.size() < threshold) { - return false; - } - return history.stream().limit(threshold).allMatch(r -> "FAILED".equals(r.getStatus())); - } } diff --git a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java index 2f3eee3d7..ad6938633 100644 --- a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java +++ b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java @@ -9,7 +9,6 @@ import com.linkedin.openhouse.analyzer.model.Table; import com.linkedin.openhouse.analyzer.model.TableOperation; -import com.linkedin.openhouse.optimizer.entity.TableOperationHistoryRow; import com.linkedin.openhouse.optimizer.entity.TableOperationRow; import com.linkedin.openhouse.optimizer.entity.TableStatsRow; import com.linkedin.openhouse.optimizer.repository.TableOperationHistoryRepository; @@ -19,8 +18,6 @@ import java.util.Collections; import java.util.List; import java.util.Optional; -import java.util.stream.Collectors; -import java.util.stream.IntStream; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; @@ -56,8 +53,6 @@ void analyze_insertsNewRow_forEligibleTableWithNoExistingOp() { when(statsRepo.find(null, null, null)).thenReturn(List.of(statsEntity)); when(analyzer.getOperationType()).thenReturn("ORPHAN_FILES_DELETION"); - when(analyzer.isCircuitBroken(anyString(), any())).thenCallRealMethod(); - when(analyzer.getCircuitBreakerThreshold()).thenReturn(5); when(operationsRepo.find("ORPHAN_FILES_DELETION", null, null, null, null)) .thenReturn(Collections.emptyList()); when(historyRepo.find("ORPHAN_FILES_DELETION", null, null, null, Pageable.unpaged())) @@ -180,84 +175,4 @@ void analyze_skipsTable_whenTableUuidIsNull() { verify(operationsRepo, never()).save(any()); } - - @Test - void analyze_skipsTable_whenCircuitBreakerTrips() { - TableStatsRow statsEntity = new TableStatsRow(); - statsEntity.setTableUuid("uuid-1"); - statsEntity.setDatabaseName("db1"); - statsEntity.setTableName("tbl1"); - - Table expectedTable = - Table.builder().tableUuid("uuid-1").databaseName("db1").tableId("tbl1").build(); - - List failures = - IntStream.range(0, 3) - .mapToObj( - i -> - TableOperationHistoryRow.builder() - .id("fail-" + i) - .tableUuid("uuid-1") - .operationType("ORPHAN_FILES_DELETION") - .completedAt(Instant.now().minusSeconds(i * 60)) - .status("FAILED") - .build()) - .collect(Collectors.toList()); - - when(statsRepo.find(null, null, null)).thenReturn(List.of(statsEntity)); - when(analyzer.getOperationType()).thenReturn("ORPHAN_FILES_DELETION"); - when(analyzer.isCircuitBroken(anyString(), any())).thenCallRealMethod(); - when(analyzer.getCircuitBreakerThreshold()).thenReturn(3); - when(operationsRepo.find("ORPHAN_FILES_DELETION", null, null, null, null)) - .thenReturn(Collections.emptyList()); - when(historyRepo.find("ORPHAN_FILES_DELETION", null, null, null, Pageable.unpaged())) - .thenReturn(failures); - when(analyzer.isEnabled(expectedTable)).thenReturn(true); - when(analyzer.shouldSchedule(expectedTable, Optional.empty(), Optional.of(failures.get(0)))) - .thenReturn(true); - - runner.analyze(); - - verify(operationsRepo, never()).save(any()); - } - - @Test - void analyze_doesNotTrip_whenFewerFailuresThanThreshold() { - TableStatsRow statsEntity = new TableStatsRow(); - statsEntity.setTableUuid("uuid-1"); - statsEntity.setDatabaseName("db1"); - statsEntity.setTableName("tbl1"); - - Table expectedTable = - Table.builder().tableUuid("uuid-1").databaseName("db1").tableId("tbl1").build(); - - List failures = - IntStream.range(0, 3) - .mapToObj( - i -> - TableOperationHistoryRow.builder() - .id("fail-" + i) - .tableUuid("uuid-1") - .operationType("ORPHAN_FILES_DELETION") - .completedAt(Instant.now().minusSeconds(i * 60)) - .status("FAILED") - .build()) - .collect(Collectors.toList()); - - when(statsRepo.find(null, null, null)).thenReturn(List.of(statsEntity)); - when(analyzer.getOperationType()).thenReturn("ORPHAN_FILES_DELETION"); - when(analyzer.isCircuitBroken(anyString(), any())).thenCallRealMethod(); - when(analyzer.getCircuitBreakerThreshold()).thenReturn(5); - when(operationsRepo.find("ORPHAN_FILES_DELETION", null, null, null, null)) - .thenReturn(Collections.emptyList()); - when(historyRepo.find("ORPHAN_FILES_DELETION", null, null, null, Pageable.unpaged())) - .thenReturn(failures); - when(analyzer.isEnabled(expectedTable)).thenReturn(true); - when(analyzer.shouldSchedule(expectedTable, Optional.empty(), Optional.of(failures.get(0)))) - .thenReturn(true); - - runner.analyze(); - - verify(operationsRepo).save(any()); - } } From 62f426a0a236f074c0db4c478b10e6e7b7949318 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Tue, 12 May 2026 12:20:11 -0700 Subject: [PATCH 023/104] feat(optimizer): add findLatestPerTable to history repo The Analyzer evaluates cadence using only the most-recent history row per (table_uuid, operation_type); pulling the full history scan per analyzer pass is wasted I/O. Add a dedicated query that returns at most one row per (table_uuid, operation_type), restricted to a single operation type. The query uses a correlated MAX subquery for portability across MySQL and H2. For large history volume, a (operation_type, table_uuid, completed_at) index on the schema would make the subquery index-only; TODO noted in javadoc. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../TableOperationHistoryRepository.java | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java index fd9edd1f4..09930ab08 100644 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java @@ -29,4 +29,24 @@ List find( @Param("status") String status, @Param("since") Instant since, Pageable pageable); + + /** + * Return the most-recent history row per {@code (table_uuid, operation_type)}, filtered to a + * single operation type. Used by the Analyzer to evaluate cadence without materializing every + * historical row. + * + *

The correlated subquery is portable across MySQL and H2 (MySQL mode). On a large {@code + * table_operations_history} table this benefits from an index on {@code (operation_type, + * table_uuid, completed_at)} — TODO add it to the schema. + * + *

Ties on {@code completed_at} for the same {@code (table_uuid, operation_type)} return all + * tied rows; callers should dedupe in memory. + */ + @Query( + "SELECT r FROM TableOperationHistoryRow r " + + "WHERE r.operationType = :operationType " + + "AND r.completedAt = (" + + " SELECT MAX(r2.completedAt) FROM TableOperationHistoryRow r2 " + + " WHERE r2.tableUuid = r.tableUuid AND r2.operationType = r.operationType)") + List findLatestPerTable(@Param("operationType") String operationType); } From c4f194ac19bc57a88c7337b942ad7282ebde4f80 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Tue, 12 May 2026 12:21:40 -0700 Subject: [PATCH 024/104] perf(optimizer-analyzer): use findLatestPerTable for history lookup Switch the AnalyzerRunner from scanning every history row per analyzer pass to the dedicated findLatestPerTable query (added in apps/optimizer). The analyzer only consumes the latest entry per (table_uuid, operation_type); the previous full-history scan was bounded but unnecessary I/O. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../linkedin/openhouse/analyzer/AnalyzerRunner.java | 9 +++------ .../openhouse/analyzer/AnalyzerRunnerTest.java | 12 +++++------- 2 files changed, 8 insertions(+), 13 deletions(-) diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java index e48bb241e..b4ca06966 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java @@ -15,7 +15,6 @@ import java.util.stream.Collectors; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; -import org.springframework.data.domain.Pageable; import org.springframework.stereotype.Component; /** @@ -79,17 +78,15 @@ public void analyze( TableOperation::from, TableOperation::mostRecent)))); - // TODO(perf): replace this full-history scan with a windowed query that returns at most one - // row per (table_uuid, operation_type) — the analyzer only consumes the latest entry. Today - // this is O(H) per analyzer where H is total history rows; bounded but unnecessary. + // Latest history row per (table_uuid, operation_type), one query per analyzer. The repo query + // may return tied rows for the same key on identical completed_at; dedupe in memory. Map> latestHistoryByType = activeAnalyzers.stream() .collect( Collectors.toMap( OperationAnalyzer::getOperationType, a -> - historyRepo.find(a.getOperationType(), null, null, null, Pageable.unpaged()) - .stream() + historyRepo.findLatestPerTable(a.getOperationType()).stream() .filter(r -> r.getTableUuid() != null) .collect( Collectors.toMap( diff --git a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java index ad6938633..8c8bb8145 100644 --- a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java +++ b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java @@ -24,7 +24,6 @@ import org.mockito.ArgumentCaptor; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; -import org.springframework.data.domain.Pageable; @ExtendWith(MockitoExtension.class) class AnalyzerRunnerTest { @@ -55,7 +54,7 @@ void analyze_insertsNewRow_forEligibleTableWithNoExistingOp() { when(analyzer.getOperationType()).thenReturn("ORPHAN_FILES_DELETION"); when(operationsRepo.find("ORPHAN_FILES_DELETION", null, null, null, null)) .thenReturn(Collections.emptyList()); - when(historyRepo.find("ORPHAN_FILES_DELETION", null, null, null, Pageable.unpaged())) + when(historyRepo.findLatestPerTable("ORPHAN_FILES_DELETION")) .thenReturn(Collections.emptyList()); when(analyzer.isEnabled(expectedTable)).thenReturn(true); when(analyzer.shouldSchedule(expectedTable, Optional.empty(), Optional.empty())) @@ -95,7 +94,7 @@ void analyze_noOp_whenCadencePolicyReturnsFalseForPending() { when(analyzer.getOperationType()).thenReturn("ORPHAN_FILES_DELETION"); when(operationsRepo.find("ORPHAN_FILES_DELETION", null, null, null, null)) .thenReturn(List.of(existingEntity)); - when(historyRepo.find("ORPHAN_FILES_DELETION", null, null, null, Pageable.unpaged())) + when(historyRepo.findLatestPerTable("ORPHAN_FILES_DELETION")) .thenReturn(Collections.emptyList()); when(analyzer.isEnabled(expectedTable)).thenReturn(true); @@ -119,7 +118,7 @@ void analyze_skipsTable_whenNotEnabled() { when(analyzer.getOperationType()).thenReturn("ORPHAN_FILES_DELETION"); when(operationsRepo.find("ORPHAN_FILES_DELETION", null, null, null, null)) .thenReturn(Collections.emptyList()); - when(historyRepo.find("ORPHAN_FILES_DELETION", null, null, null, Pageable.unpaged())) + when(historyRepo.findLatestPerTable("ORPHAN_FILES_DELETION")) .thenReturn(Collections.emptyList()); when(analyzer.isEnabled(expectedTable)).thenReturn(false); @@ -146,7 +145,7 @@ void analyze_skipsTable_whenShouldScheduleReturnsFalse() { when(analyzer.getOperationType()).thenReturn("ORPHAN_FILES_DELETION"); when(operationsRepo.find("ORPHAN_FILES_DELETION", null, null, null, null)) .thenReturn(List.of(scheduled)); - when(historyRepo.find("ORPHAN_FILES_DELETION", null, null, null, Pageable.unpaged())) + when(historyRepo.findLatestPerTable("ORPHAN_FILES_DELETION")) .thenReturn(Collections.emptyList()); when(analyzer.isEnabled(expectedTable)).thenReturn(true); @@ -168,8 +167,7 @@ void analyze_skipsTable_whenTableUuidIsNull() { when(analyzer.getOperationType()).thenReturn("ORPHAN_FILES_DELETION"); when(operationsRepo.find("ORPHAN_FILES_DELETION", null, null, null, null)) .thenReturn(Collections.emptyList()); - when(historyRepo.find(anyString(), any(), any(), any(), any())) - .thenReturn(Collections.emptyList()); + when(historyRepo.findLatestPerTable(anyString())).thenReturn(Collections.emptyList()); runner.analyze(); From 6da624ab4836f7d249fa712a1676924939d26137 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Tue, 12 May 2026 12:31:24 -0700 Subject: [PATCH 025/104] refactor(optimizer-analyzer): typed OperationType/Status, polish cadence + TableOperation The analyzer was using raw Strings everywhere for operation type and status. Per-layer types: introduce analyzer-internal OperationType and OperationStatus enums in apps/optimizer-analyzer/model and convert at the entity boundary. The wire API (services/optimizer/api/model) and DB columns (apps/optimizer entity rows) keep their own representations and are unaffected. Changes: - New enums OperationType and OperationStatus in the analyzer model package. - TableOperation: operationType and status become enums. from(row) parses the String columns; toRow() emits .name() back. from() and pending() share a private build() factory. - TableOperation javadocs: drop "denormalized for display" wording. - OperationAnalyzer.getOperationType returns OperationType. - AnalyzerRunner: filter parameter and per-type maps are keyed on OperationType; calls to repos still pass the String .name(). - CadencePolicy.shouldSchedule: switch on OperationStatus is exhaustive (now including CANCELED), unknown values throw IllegalStateException, and the SCHEDULED branch has an inline comment explaining the two cases. - OrphanFilesDeletionAnalyzer: returns the enum. - Tests updated to construct enum values; OFD test helper takes the enum. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../openhouse/analyzer/AnalyzerRunner.java | 18 +++-- .../openhouse/analyzer/CadencePolicy.java | 28 ++++--- .../openhouse/analyzer/OperationAnalyzer.java | 5 +- .../analyzer/OrphanFilesDeletionAnalyzer.java | 8 +- .../analyzer/model/OperationStatus.java | 14 ++++ .../analyzer/model/OperationType.java | 10 +++ .../analyzer/model/TableOperation.java | 76 ++++++++++++------- .../analyzer/AnalyzerRunnerTest.java | 46 +++++------ .../OrphanFilesDeletionAnalyzerTest.java | 21 ++--- 9 files changed, 138 insertions(+), 88 deletions(-) create mode 100644 apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/OperationStatus.java create mode 100644 apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/OperationType.java diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java index b4ca06966..22da50d01 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java @@ -1,5 +1,6 @@ package com.linkedin.openhouse.analyzer; +import com.linkedin.openhouse.analyzer.model.OperationType; import com.linkedin.openhouse.analyzer.model.Table; import com.linkedin.openhouse.analyzer.model.TableOperation; import com.linkedin.openhouse.optimizer.entity.TableOperationHistoryRow; @@ -51,25 +52,30 @@ public void analyze() { * table UUID. Pass {@code null} for any parameter to skip that filter. */ public void analyze( - String operationType, String databaseName, String tableName, String tableUuid) { + OperationType operationType, String databaseName, String tableName, String tableUuid) { List activeAnalyzers = operationType == null ? analyzers : analyzers.stream() - .filter(a -> a.getOperationType().equals(operationType)) + .filter(a -> a.getOperationType() == operationType) .collect(Collectors.toList()); // Pre-load the small sides of the joins — one query per analyzer type. // TODO: Move to a query builder (Criteria API or jOOQ) as filter count grows. - Map> opsByType = + Map> opsByType = activeAnalyzers.stream() .collect( Collectors.toMap( OperationAnalyzer::getOperationType, a -> operationsRepo - .find(a.getOperationType(), null, tableUuid, databaseName, tableName) + .find( + a.getOperationType().name(), + null, + tableUuid, + databaseName, + tableName) .stream() .filter(e -> e.getTableUuid() != null) .collect( @@ -80,13 +86,13 @@ public void analyze( // Latest history row per (table_uuid, operation_type), one query per analyzer. The repo query // may return tied rows for the same key on identical completed_at; dedupe in memory. - Map> latestHistoryByType = + Map> latestHistoryByType = activeAnalyzers.stream() .collect( Collectors.toMap( OperationAnalyzer::getOperationType, a -> - historyRepo.findLatestPerTable(a.getOperationType()).stream() + historyRepo.findLatestPerTable(a.getOperationType().name()).stream() .filter(r -> r.getTableUuid() != null) .collect( Collectors.toMap( diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadencePolicy.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadencePolicy.java index 0590c2045..4cf892021 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadencePolicy.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadencePolicy.java @@ -55,16 +55,22 @@ public boolean shouldSchedule( } TableOperation op = currentOp.get(); switch (op.getStatus()) { - case "PENDING": - case "SCHEDULING": + case PENDING: + case SCHEDULING: return false; - case "SCHEDULED": - if (latestHistory.isEmpty()) { - return pastInterval(op.getScheduledAt(), scheduledTimeout); - } - return decideFromHistoryEntry(latestHistory.get()); + case SCHEDULED: + // Two scenarios for a SCHEDULED row: + // - no history yet: the job is still running (or crashed); fall through to the + // scheduledTimeout safety net. + // - history present: the job completed and history was written; defer to cadence policy + // on the history entry. + return latestHistory.isPresent() + ? decideFromHistory(latestHistory) + : pastInterval(op.getScheduledAt(), scheduledTimeout); + case CANCELED: + return decideFromHistory(latestHistory); default: - return true; + throw new IllegalStateException("Unhandled operation status: " + op.getStatus()); } } @@ -72,10 +78,7 @@ private boolean decideFromHistory(Optional latestHisto if (latestHistory.isEmpty()) { return true; } - return decideFromHistoryEntry(latestHistory.get()); - } - - private boolean decideFromHistoryEntry(TableOperationHistoryRow entry) { + TableOperationHistoryRow entry = latestHistory.get(); switch (entry.getStatus()) { case "SUCCESS": return pastInterval(entry.getCompletedAt(), successRetryInterval); @@ -86,6 +89,7 @@ private boolean decideFromHistoryEntry(TableOperationHistoryRow entry) { } } + /** {@code true} if {@code timestamp} is null or {@code interval} has elapsed since then. */ private boolean pastInterval(Instant timestamp, Duration interval) { return timestamp == null || Duration.between(timestamp, Instant.now()).compareTo(interval) > 0; } diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OperationAnalyzer.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OperationAnalyzer.java index 0d5fb6770..33f2b8e5d 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OperationAnalyzer.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OperationAnalyzer.java @@ -1,5 +1,6 @@ package com.linkedin.openhouse.analyzer; +import com.linkedin.openhouse.analyzer.model.OperationType; import com.linkedin.openhouse.analyzer.model.Table; import com.linkedin.openhouse.analyzer.model.TableOperation; import com.linkedin.openhouse.optimizer.entity.TableOperationHistoryRow; @@ -17,8 +18,8 @@ */ public interface OperationAnalyzer { - /** The operation type this analyzer handles (e.g., {@code "ORPHAN_FILES_DELETION"}). */ - String getOperationType(); + /** The operation type this analyzer handles. */ + OperationType getOperationType(); /** * Returns {@code true} if this operation is opted-in for the given table. Tables that return diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OrphanFilesDeletionAnalyzer.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OrphanFilesDeletionAnalyzer.java index c348b0265..450fda293 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OrphanFilesDeletionAnalyzer.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OrphanFilesDeletionAnalyzer.java @@ -1,5 +1,6 @@ package com.linkedin.openhouse.analyzer; +import com.linkedin.openhouse.analyzer.model.OperationType; import com.linkedin.openhouse.analyzer.model.Table; import com.linkedin.openhouse.analyzer.model.TableOperation; import com.linkedin.openhouse.optimizer.entity.TableOperationHistoryRow; @@ -9,11 +10,10 @@ import org.springframework.beans.factory.annotation.Value; import org.springframework.stereotype.Component; -/** Analyzer for the {@code ORPHAN_FILES_DELETION} operation type. */ +/** Analyzer for the {@link OperationType#ORPHAN_FILES_DELETION} operation type. */ @Component public class OrphanFilesDeletionAnalyzer implements OperationAnalyzer { - static final String OPERATION_TYPE = "ORPHAN_FILES_DELETION"; static final String OFD_ENABLED_PROPERTY = "maintenance.optimizer.ofd.enabled"; private final CadencePolicy cadencePolicy; @@ -36,8 +36,8 @@ public OrphanFilesDeletionAnalyzer( } @Override - public String getOperationType() { - return OPERATION_TYPE; + public OperationType getOperationType() { + return OperationType.ORPHAN_FILES_DELETION; } @Override diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/OperationStatus.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/OperationStatus.java new file mode 100644 index 000000000..8a2d1d541 --- /dev/null +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/OperationStatus.java @@ -0,0 +1,14 @@ +package com.linkedin.openhouse.analyzer.model; + +/** + * Analyzer-internal lifecycle states. The analyzer only writes {@link #PENDING}; the other values + * are read off existing rows when deciding whether to re-issue a recommendation. + * + *

Intentionally separate from the wire-API and DB representations. + */ +public enum OperationStatus { + PENDING, + SCHEDULING, + SCHEDULED, + CANCELED +} diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/OperationType.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/OperationType.java new file mode 100644 index 000000000..da48bb459 --- /dev/null +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/OperationType.java @@ -0,0 +1,10 @@ +package com.linkedin.openhouse.analyzer.model; + +/** + * Analyzer-internal enum for the operation types this app knows how to schedule. Intentionally + * separate from the wire-API and DB representations so the analyzer can evolve its set of supported + * operations without churning either boundary. + */ +public enum OperationType { + ORPHAN_FILES_DELETION +} diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/TableOperation.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/TableOperation.java index 97f4b9f96..54e569b6a 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/TableOperation.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/TableOperation.java @@ -22,17 +22,17 @@ public class TableOperation { /** The table this operation targets. */ private String tableUuid; - /** Database name, denormalized for display. */ + /** Database name. */ private String databaseName; - /** Table name, denormalized for display. */ + /** Table name. */ private String tableName; - /** Operation type (e.g., {@code "ORPHAN_FILES_DELETION"}). */ - private String operationType; + /** Operation type. */ + private OperationType operationType; - /** Current lifecycle status: PENDING, SCHEDULING, SCHEDULED. */ - private String status; + /** Current lifecycle status. */ + private OperationStatus status; /** When this operation record was created. */ private Instant createdAt; @@ -42,29 +42,28 @@ public class TableOperation { /** Build a {@code TableOperation} from an existing JPA row. */ public static TableOperation from(TableOperationRow row) { - TableOperation op = new TableOperation(); - op.id = row.getId(); - op.tableUuid = row.getTableUuid(); - op.databaseName = row.getDatabaseName(); - op.tableName = row.getTableName(); - op.operationType = row.getOperationType(); - op.status = row.getStatus(); - op.createdAt = row.getCreatedAt(); - op.scheduledAt = row.getScheduledAt(); - return op; + return build( + row.getId(), + row.getTableUuid(), + row.getDatabaseName(), + row.getTableName(), + OperationType.valueOf(row.getOperationType()), + OperationStatus.valueOf(row.getStatus()), + row.getCreatedAt(), + row.getScheduledAt()); } /** Create a new PENDING operation for the given table and operation type. */ - public static TableOperation pending(Table table, String operationType) { - TableOperation op = new TableOperation(); - op.id = UUID.randomUUID().toString(); - op.tableUuid = table.getTableUuid(); - op.databaseName = table.getDatabaseName(); - op.tableName = table.getTableId(); - op.operationType = operationType; - op.status = "PENDING"; - op.createdAt = Instant.now(); - return op; + public static TableOperation pending(Table table, OperationType operationType) { + return build( + UUID.randomUUID().toString(), + table.getTableUuid(), + table.getDatabaseName(), + table.getTableId(), + operationType, + OperationStatus.PENDING, + Instant.now(), + null); } /** Convert to a JPA entity for persistence. */ @@ -74,8 +73,8 @@ public TableOperationRow toRow() { .tableUuid(tableUuid) .databaseName(databaseName) .tableName(tableName) - .operationType(operationType) - .status(status) + .operationType(operationType.name()) + .status(status.name()) .createdAt(createdAt) .scheduledAt(scheduledAt) .version(0L) @@ -88,4 +87,25 @@ public static TableOperation mostRecent(TableOperation a, TableOperation b) { Comparator.comparing(r -> r.getCreatedAt() != null ? r.getCreatedAt() : Instant.EPOCH); return byCreatedAt.compare(a, b) >= 0 ? a : b; } + + private static TableOperation build( + String id, + String tableUuid, + String databaseName, + String tableName, + OperationType operationType, + OperationStatus status, + Instant createdAt, + Instant scheduledAt) { + TableOperation op = new TableOperation(); + op.id = id; + op.tableUuid = tableUuid; + op.databaseName = databaseName; + op.tableName = tableName; + op.operationType = operationType; + op.status = status; + op.createdAt = createdAt; + op.scheduledAt = scheduledAt; + return op; + } } diff --git a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java index 8c8bb8145..1feafba29 100644 --- a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java +++ b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java @@ -7,6 +7,7 @@ import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; +import com.linkedin.openhouse.analyzer.model.OperationType; import com.linkedin.openhouse.analyzer.model.Table; import com.linkedin.openhouse.analyzer.model.TableOperation; import com.linkedin.openhouse.optimizer.entity.TableOperationRow; @@ -28,6 +29,8 @@ @ExtendWith(MockitoExtension.class) class AnalyzerRunnerTest { + private static final String OFD = OperationType.ORPHAN_FILES_DELETION.name(); + @Mock private TableStatsRepository statsRepo; @Mock private TableOperationsRepository operationsRepo; @Mock private TableOperationHistoryRepository historyRepo; @@ -51,11 +54,9 @@ void analyze_insertsNewRow_forEligibleTableWithNoExistingOp() { Table.builder().tableUuid("uuid-1").databaseName("db1").tableId("tbl1").build(); when(statsRepo.find(null, null, null)).thenReturn(List.of(statsEntity)); - when(analyzer.getOperationType()).thenReturn("ORPHAN_FILES_DELETION"); - when(operationsRepo.find("ORPHAN_FILES_DELETION", null, null, null, null)) - .thenReturn(Collections.emptyList()); - when(historyRepo.findLatestPerTable("ORPHAN_FILES_DELETION")) - .thenReturn(Collections.emptyList()); + when(analyzer.getOperationType()).thenReturn(OperationType.ORPHAN_FILES_DELETION); + when(operationsRepo.find(OFD, null, null, null, null)).thenReturn(Collections.emptyList()); + when(historyRepo.findLatestPerTable(OFD)).thenReturn(Collections.emptyList()); when(analyzer.isEnabled(expectedTable)).thenReturn(true); when(analyzer.shouldSchedule(expectedTable, Optional.empty(), Optional.empty())) .thenReturn(true); @@ -68,7 +69,7 @@ void analyze_insertsNewRow_forEligibleTableWithNoExistingOp() { assertThat(saved.getTableUuid()).isEqualTo("uuid-1"); assertThat(saved.getDatabaseName()).isEqualTo("db1"); assertThat(saved.getTableName()).isEqualTo("tbl1"); - assertThat(saved.getOperationType()).isEqualTo("ORPHAN_FILES_DELETION"); + assertThat(saved.getOperationType()).isEqualTo(OFD); assertThat(saved.getStatus()).isEqualTo("PENDING"); assertThat(saved.getId()).isNotNull(); } @@ -87,15 +88,13 @@ void analyze_noOp_whenCadencePolicyReturnsFalseForPending() { existingEntity.setId("existing-op-id"); existingEntity.setStatus("PENDING"); existingEntity.setTableUuid("uuid-1"); - existingEntity.setOperationType("ORPHAN_FILES_DELETION"); + existingEntity.setOperationType(OFD); existingEntity.setCreatedAt(Instant.now()); when(statsRepo.find(null, null, null)).thenReturn(List.of(statsEntity)); - when(analyzer.getOperationType()).thenReturn("ORPHAN_FILES_DELETION"); - when(operationsRepo.find("ORPHAN_FILES_DELETION", null, null, null, null)) - .thenReturn(List.of(existingEntity)); - when(historyRepo.findLatestPerTable("ORPHAN_FILES_DELETION")) - .thenReturn(Collections.emptyList()); + when(analyzer.getOperationType()).thenReturn(OperationType.ORPHAN_FILES_DELETION); + when(operationsRepo.find(OFD, null, null, null, null)).thenReturn(List.of(existingEntity)); + when(historyRepo.findLatestPerTable(OFD)).thenReturn(Collections.emptyList()); when(analyzer.isEnabled(expectedTable)).thenReturn(true); TableOperation existingOp = TableOperation.from(existingEntity); @@ -115,11 +114,9 @@ void analyze_skipsTable_whenNotEnabled() { Table expectedTable = Table.builder().tableUuid("uuid-1").build(); when(statsRepo.find(null, null, null)).thenReturn(List.of(statsEntity)); - when(analyzer.getOperationType()).thenReturn("ORPHAN_FILES_DELETION"); - when(operationsRepo.find("ORPHAN_FILES_DELETION", null, null, null, null)) - .thenReturn(Collections.emptyList()); - when(historyRepo.findLatestPerTable("ORPHAN_FILES_DELETION")) - .thenReturn(Collections.emptyList()); + when(analyzer.getOperationType()).thenReturn(OperationType.ORPHAN_FILES_DELETION); + when(operationsRepo.find(OFD, null, null, null, null)).thenReturn(Collections.emptyList()); + when(historyRepo.findLatestPerTable(OFD)).thenReturn(Collections.emptyList()); when(analyzer.isEnabled(expectedTable)).thenReturn(false); runner.analyze(); @@ -138,15 +135,13 @@ void analyze_skipsTable_whenShouldScheduleReturnsFalse() { scheduled.setId("op-id"); scheduled.setStatus("SCHEDULED"); scheduled.setTableUuid("uuid-1"); - scheduled.setOperationType("ORPHAN_FILES_DELETION"); + scheduled.setOperationType(OFD); scheduled.setCreatedAt(Instant.now()); when(statsRepo.find(null, null, null)).thenReturn(List.of(statsEntity)); - when(analyzer.getOperationType()).thenReturn("ORPHAN_FILES_DELETION"); - when(operationsRepo.find("ORPHAN_FILES_DELETION", null, null, null, null)) - .thenReturn(List.of(scheduled)); - when(historyRepo.findLatestPerTable("ORPHAN_FILES_DELETION")) - .thenReturn(Collections.emptyList()); + when(analyzer.getOperationType()).thenReturn(OperationType.ORPHAN_FILES_DELETION); + when(operationsRepo.find(OFD, null, null, null, null)).thenReturn(List.of(scheduled)); + when(historyRepo.findLatestPerTable(OFD)).thenReturn(Collections.emptyList()); when(analyzer.isEnabled(expectedTable)).thenReturn(true); TableOperation scheduledOp = TableOperation.from(scheduled); @@ -164,9 +159,8 @@ void analyze_skipsTable_whenTableUuidIsNull() { statsEntity.setTableUuid(null); when(statsRepo.find(null, null, null)).thenReturn(List.of(statsEntity)); - when(analyzer.getOperationType()).thenReturn("ORPHAN_FILES_DELETION"); - when(operationsRepo.find("ORPHAN_FILES_DELETION", null, null, null, null)) - .thenReturn(Collections.emptyList()); + when(analyzer.getOperationType()).thenReturn(OperationType.ORPHAN_FILES_DELETION); + when(operationsRepo.find(OFD, null, null, null, null)).thenReturn(Collections.emptyList()); when(historyRepo.findLatestPerTable(anyString())).thenReturn(Collections.emptyList()); runner.analyze(); diff --git a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/OrphanFilesDeletionAnalyzerTest.java b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/OrphanFilesDeletionAnalyzerTest.java index f0e915059..50d426eef 100644 --- a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/OrphanFilesDeletionAnalyzerTest.java +++ b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/OrphanFilesDeletionAnalyzerTest.java @@ -2,6 +2,7 @@ import static org.assertj.core.api.Assertions.assertThat; +import com.linkedin.openhouse.analyzer.model.OperationStatus; import com.linkedin.openhouse.analyzer.model.Table; import com.linkedin.openhouse.analyzer.model.TableOperation; import com.linkedin.openhouse.optimizer.entity.TableOperationHistoryRow; @@ -111,7 +112,7 @@ void shouldSchedule_pending_returnsFalse() { assertThat( analyzer.shouldSchedule( tableWithProperty("true"), - Optional.of(opWithStatus("PENDING", null)), + Optional.of(opWithStatus(OperationStatus.PENDING, null)), Optional.empty())) .isFalse(); } @@ -121,7 +122,7 @@ void shouldSchedule_scheduling_returnsFalse() { assertThat( analyzer.shouldSchedule( tableWithProperty("true"), - Optional.of(opWithStatus("SCHEDULING", null)), + Optional.of(opWithStatus(OperationStatus.SCHEDULING, null)), Optional.empty())) .isFalse(); } @@ -134,7 +135,7 @@ void shouldSchedule_scheduledNoHistory_withinTimeout_returnsFalse() { assertThat( analyzer.shouldSchedule( tableWithProperty("true"), - Optional.of(opWithStatus("SCHEDULED", recent)), + Optional.of(opWithStatus(OperationStatus.SCHEDULED, recent)), Optional.empty())) .isFalse(); } @@ -145,7 +146,7 @@ void shouldSchedule_scheduledNoHistory_pastTimeout_returnsTrue() { assertThat( analyzer.shouldSchedule( tableWithProperty("true"), - Optional.of(opWithStatus("SCHEDULED", longAgo)), + Optional.of(opWithStatus(OperationStatus.SCHEDULED, longAgo)), Optional.empty())) .isTrue(); } @@ -155,7 +156,7 @@ void shouldSchedule_scheduledWithNullScheduledAt_noHistory_returnsTrue() { assertThat( analyzer.shouldSchedule( tableWithProperty("true"), - Optional.of(opWithStatus("SCHEDULED", null)), + Optional.of(opWithStatus(OperationStatus.SCHEDULED, null)), Optional.empty())) .isTrue(); } @@ -167,7 +168,7 @@ void shouldSchedule_scheduledWithSuccessHistory_afterCooldown_returnsTrue() { assertThat( analyzer.shouldSchedule( tableWithProperty("true"), - Optional.of(opWithStatus("SCHEDULED", scheduledAt)), + Optional.of(opWithStatus(OperationStatus.SCHEDULED, scheduledAt)), Optional.of(historyWithStatus("SUCCESS", historyAt)))) .isTrue(); } @@ -179,7 +180,7 @@ void shouldSchedule_scheduledWithSuccessHistory_beforeCooldown_returnsFalse() { assertThat( analyzer.shouldSchedule( tableWithProperty("true"), - Optional.of(opWithStatus("SCHEDULED", scheduledAt)), + Optional.of(opWithStatus(OperationStatus.SCHEDULED, scheduledAt)), Optional.of(historyWithStatus("SUCCESS", historyAt)))) .isFalse(); } @@ -191,7 +192,7 @@ void shouldSchedule_scheduledWithFailedHistory_afterRetry_returnsTrue() { assertThat( analyzer.shouldSchedule( tableWithProperty("true"), - Optional.of(opWithStatus("SCHEDULED", scheduledAt)), + Optional.of(opWithStatus(OperationStatus.SCHEDULED, scheduledAt)), Optional.of(historyWithStatus("FAILED", historyAt)))) .isTrue(); } @@ -203,7 +204,7 @@ void shouldSchedule_scheduledWithFailedHistory_beforeRetry_returnsFalse() { assertThat( analyzer.shouldSchedule( tableWithProperty("true"), - Optional.of(opWithStatus("SCHEDULED", scheduledAt)), + Optional.of(opWithStatus(OperationStatus.SCHEDULED, scheduledAt)), Optional.of(historyWithStatus("FAILED", historyAt)))) .isFalse(); } @@ -223,7 +224,7 @@ private Table tableWithProperty(String value) { .build(); } - private TableOperation opWithStatus(String status, Instant scheduledAt) { + private TableOperation opWithStatus(OperationStatus status, Instant scheduledAt) { TableOperation op = new TableOperation(); op.setStatus(status); op.setScheduledAt(scheduledAt); From 52ba8583781971a57c080a56affa881a88f0d7f4 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Tue, 12 May 2026 12:32:43 -0700 Subject: [PATCH 026/104] =?UTF-8?q?refactor(optimizer-analyzer):=20rename?= =?UTF-8?q?=20OrphanFilesDeletionAnalyzer=20=E2=86=92=20CadenceBasedOrphan?= =?UTF-8?q?FilesDeletionAnalyzer?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The class composes CadencePolicy and is one of potentially many strategies (volume-based, schema-aware, etc.) we could write later for the same operation type. Encode the scheduling driver in the class name so the distinction is visible at registration. Co-Authored-By: Claude Opus 4.7 (1M context) --- ...Analyzer.java => CadenceBasedOrphanFilesDeletionAnalyzer.java} | 0 ...Test.java => CadenceBasedOrphanFilesDeletionAnalyzerTest.java} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/{OrphanFilesDeletionAnalyzer.java => CadenceBasedOrphanFilesDeletionAnalyzer.java} (100%) rename apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/{OrphanFilesDeletionAnalyzerTest.java => CadenceBasedOrphanFilesDeletionAnalyzerTest.java} (100%) diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OrphanFilesDeletionAnalyzer.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzer.java similarity index 100% rename from apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OrphanFilesDeletionAnalyzer.java rename to apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzer.java diff --git a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/OrphanFilesDeletionAnalyzerTest.java b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzerTest.java similarity index 100% rename from apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/OrphanFilesDeletionAnalyzerTest.java rename to apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzerTest.java From beedad8ccc57c425b2e3e3ba6ec59d232b5f538d Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Tue, 12 May 2026 12:33:10 -0700 Subject: [PATCH 027/104] fix(optimizer-analyzer): update class name inside renamed files The rename in the previous commit moved the files but did not change the class identifiers inside. Update both class declarations and the constructor calls in the test to match the new file name. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../analyzer/CadenceBasedOrphanFilesDeletionAnalyzer.java | 6 +++--- .../CadenceBasedOrphanFilesDeletionAnalyzerTest.java | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzer.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzer.java index 450fda293..c50025b6a 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzer.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzer.java @@ -12,14 +12,14 @@ /** Analyzer for the {@link OperationType#ORPHAN_FILES_DELETION} operation type. */ @Component -public class OrphanFilesDeletionAnalyzer implements OperationAnalyzer { +public class CadenceBasedOrphanFilesDeletionAnalyzer implements OperationAnalyzer { static final String OFD_ENABLED_PROPERTY = "maintenance.optimizer.ofd.enabled"; private final CadencePolicy cadencePolicy; @Autowired - public OrphanFilesDeletionAnalyzer( + public CadenceBasedOrphanFilesDeletionAnalyzer( @Value("${ofd.success-retry-hours:24}") long successRetryHours, @Value("${ofd.failure-retry-hours:1}") long failureRetryHours, @Value("${ofd.scheduled-timeout-hours:6}") long scheduledTimeoutHours) { @@ -31,7 +31,7 @@ public OrphanFilesDeletionAnalyzer( } /** Package-private for tests that supply a pre-built {@link CadencePolicy}. */ - OrphanFilesDeletionAnalyzer(CadencePolicy cadencePolicy) { + CadenceBasedOrphanFilesDeletionAnalyzer(CadencePolicy cadencePolicy) { this.cadencePolicy = cadencePolicy; } diff --git a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzerTest.java b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzerTest.java index 50d426eef..9a847f34c 100644 --- a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzerTest.java +++ b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzerTest.java @@ -14,18 +14,18 @@ import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; -class OrphanFilesDeletionAnalyzerTest { +class CadenceBasedOrphanFilesDeletionAnalyzerTest { private static final Duration SUCCESS_INTERVAL = Duration.ofHours(24); private static final Duration FAILURE_INTERVAL = Duration.ofHours(1); private static final Duration SCHEDULED_TIMEOUT = Duration.ofHours(6); - private OrphanFilesDeletionAnalyzer analyzer; + private CadenceBasedOrphanFilesDeletionAnalyzer analyzer; @BeforeEach void setUp() { analyzer = - new OrphanFilesDeletionAnalyzer( + new CadenceBasedOrphanFilesDeletionAnalyzer( new CadencePolicy(SUCCESS_INTERVAL, FAILURE_INTERVAL, SCHEDULED_TIMEOUT)); } @@ -215,7 +215,7 @@ private Table tableWithProperty(String value) { Map props = value == null ? Collections.emptyMap() - : Map.of(OrphanFilesDeletionAnalyzer.OFD_ENABLED_PROPERTY, value); + : Map.of(CadenceBasedOrphanFilesDeletionAnalyzer.OFD_ENABLED_PROPERTY, value); return Table.builder() .tableUuid("test-uuid") .databaseName("db1") From 3483b25f394e44b03c5bf94a22c1d644193466ba Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 13 May 2026 08:55:28 -0700 Subject: [PATCH 028/104] perf(optimizer): index table_operations_history for findLatestPerTable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add idx_toph_optype_uuid_completed (operation_type, table_uuid, completed_at) on table_operations_history. TableOperationHistoryRepository.findLatestPerTable uses a correlated MAX(completed_at) subquery; without this index it degenerates to O(N²) and does not complete at 1M-row history scale. With it the inner subquery becomes an index-only lookup per outer row. Update the repo method's javadoc to point at the new index by name and drop the resolved TODO. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../repository/TableOperationHistoryRepository.java | 6 +++--- .../optimizer/src/main/resources/db/optimizer-schema.sql | 6 +++++- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java index 09930ab08..26166271f 100644 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java @@ -35,9 +35,9 @@ List find( * single operation type. Used by the Analyzer to evaluate cadence without materializing every * historical row. * - *

The correlated subquery is portable across MySQL and H2 (MySQL mode). On a large {@code - * table_operations_history} table this benefits from an index on {@code (operation_type, - * table_uuid, completed_at)} — TODO add it to the schema. + *

The correlated subquery is portable across MySQL and H2 (MySQL mode). Backed by index {@code + * idx_toph_optype_uuid_completed (operation_type, table_uuid, completed_at)} on {@code + * table_operations_history}, the subquery becomes an index-only lookup per outer row. * *

Ties on {@code completed_at} for the same {@code (table_uuid, operation_type)} return all * tied rows; callers should dedupe in memory. diff --git a/services/optimizer/src/main/resources/db/optimizer-schema.sql b/services/optimizer/src/main/resources/db/optimizer-schema.sql index 4c2d9604b..322f3bf92 100644 --- a/services/optimizer/src/main/resources/db/optimizer-schema.sql +++ b/services/optimizer/src/main/resources/db/optimizer-schema.sql @@ -48,5 +48,9 @@ CREATE TABLE IF NOT EXISTS table_operations_history ( job_id VARCHAR(255), result TEXT, PRIMARY KEY (id), - INDEX idx_toph_db_table (database_name, table_name) + INDEX idx_toph_db_table (database_name, table_name), + -- Drives TableOperationHistoryRepository.findLatestPerTable: the correlated + -- MAX(completed_at) subquery becomes an index-only lookup per (operation_type, + -- table_uuid) instead of an O(N²) scan. + INDEX idx_toph_optype_uuid_completed (operation_type, table_uuid, completed_at) ); From f663537f5da02faac4aa3ef0f68ede6324a5971f Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 13 May 2026 08:57:27 -0700 Subject: [PATCH 029/104] docs(optimizer-analyzer): add scale roadmap as block comment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Capture optimizations (a)–(m) discussed for scaling AnalyzerRunner past ~100k tables, the failure modes that trigger each, and which items have already landed. (d) — the table_operations_history index on (operation_type, table_uuid, completed_at) — landed on optimizer-1 and is noted inline. The remaining items stay queued. Also tighten the class javadoc: drop the misleading "safe at any realistic scale" wording and the two prior inline TODOs (now subsumed by the block comment), point readers at the roadmap. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../openhouse/analyzer/AnalyzerRunner.java | 86 +++++++++++++++++-- 1 file changed, 78 insertions(+), 8 deletions(-) diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java index 22da50d01..e4084884f 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java @@ -18,19 +18,89 @@ import lombok.extern.slf4j.Slf4j; import org.springframework.stereotype.Component; +/* + * Performance roadmap — read before scaling this runner past ~100k tables. + * + * Current shape: each pass loads all table_stats, all active table_operations + * per op type, and the latest history row per (table_uuid, op_type) per op + * type, then iterates (table × analyzer) in memory. Heap is O(tables) and + * CPU is O(tables × op_types) per pass. This works at ~100k tables / daily + * cadence with ~8 GB heap. It does NOT work at 1M tables / 15-min cadence: + * heap peaks past 10 GB during ops loading, the history correlated subquery + * does not complete without index (d) below, and per-pass wall time exceeds + * the cadence window. + * + * The optimizations below are roughly ordered by when they matter. Items + * (a)–(d) are hard prerequisites for scaling past ~100k tables; the rest + * unlock further headroom. Items already landed are noted inline. + * + * Schema / data-model prerequisites: + * (a) Denormalized per-operation enablement on table_stats (or a sibling + * table), kept current by commit hooks on table-property changes. Each + * op type has its own enable flag — this mirrors the existing + * enable/disable pattern for non-optimizer maintenance operations on + * table properties (e.g. maintenance.optimizer.ofd.enabled). Lets the + * analyzer filter opted-in (table, op_type) pairs at index level + * instead of parsing the tableProperties JSON for every row. 10–100× + * data reduction in early rollout where opt-in is selective per op. + * (b) Index on table_operations(table_uuid, operation_type, created_at) — + * drives the cooldown anti-join in (e)/(f). Lands with that query. + * (c) Index supporting per-op opt-in lookup on the denormalized structure + * from (a). Shape depends on (a)'s representation. Lands with (e)/(f). + * (d) Index on table_operations_history(operation_type, table_uuid, + * completed_at) — makes findLatestPerTable O(N log N) instead of + * O(N²). Without this the query does not complete at 1M-row scale. + * LANDED: idx_toph_optype_uuid_completed. + * + * Query shape: + * (e) findCandidatesByDatabase(db, opType, cooldown) repo method that + * pushes both the per-op opt-in filter (from (a)) and the cooldown + * predicate to SQL via NOT EXISTS. In-cooldown and opted-out tables + * produce zero rows; their op rows are never materialized in the + * application. Single biggest win on data transfer and heap. + * (f) Combined-op variant of (e): one query per db returns candidates + * across all op types in one shot. Cuts read QPS ~10× at the same + * data volume (20k queries/pass vs 200k). + * (g) Per-database iteration replacing the current global scan. Bounds + * working set to one db (1–10k tables) per pass instead of the full + * table count. + * (h) Replace the findLatestPerTable consumer with the (e)/(f) anti-join, + * encoding cadence policy (success-retry-interval, failure-retry- + * interval, scheduled-timeout) in SQL WHERE clauses. If this subsumes + * all cadence reads, historyRepo on this class becomes dead and can + * be removed; if some strategies still need Java-side cadence on + * history rows, keep it. + * + * Projection / IO: + * (i) Drop tableProperties from the stats query projection once (a) lands. + * The TEXT column carries multi-KB JSON per row; at 1M rows that's + * gigabytes of wire transfer per pass for data nobody reads. + * (j) High-volume repo reads must page or stream — Stream or + * paginated cursor, not List. JPA materializes the full list + * before stream-collect, which is the dominant heap-spike source. + * Treat this as a baseline requirement, not an optional optimization. + * (k) Batch PENDING INSERTs via saveAll(500–1000) and set + * hibernate.jdbc.batch_size. Per-row save() dominates the write phase + * at any meaningful candidate volume. + * + * Runtime / deployment: + * (l) Rate-limit queries per analyzer instance to bound MySQL load. + * The runner should pace its per-db iteration across the cadence + * window rather than racing — converts a 2k-QPS spike at the start + * of each pass into a flat ~20 QPS sustained. + * (m) Conditional, only if single-instance runtime exceeds the cadence + * window after (a)–(l): shard databases by hash key and run N + * analyzer instances in parallel. Concurrency is then controlled by + * deployment count, not in-process worker pools. + */ /** * Core analysis loop. Loads {@code table_stats} rows and evaluates each table against every * registered {@link OperationAnalyzer} in a single pass. * *

Both sides of the join — current operations and latest history per (table, type) — are loaded - * into maps once per run before the table loop. Both are bounded by the number of tables, so - * holding them in memory is safe at any realistic scale. - * - *

// TODO: Iterate per-database instead of loading all tables at once. This scopes memory usage - * and allows incremental progress. - * - *

// TODO: Benchmark memory footprint at 10k tables per iteration to validate the in-memory join - * approach. + * into maps once per run before the table loop. This is correct at small scale (≤~100k tables) but + * breaks past that; see the performance roadmap block comment above for the queued optimizations + * and their triggering thresholds. */ @Slf4j @Component From d7e3a6559cbb91162707b15c4b4404f4c609e3c3 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 13 May 2026 10:25:56 -0700 Subject: [PATCH 030/104] docs(optimizer-analyzer): move scale roadmap to BDP-102182 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The (a)–(m) optimization roadmap was a ~75-line block comment on top of AnalyzerRunner. The content now lives in Jira BDP-102182 ("Optimizer analyzer: scale past 100k tables"), where it can be tracked, assigned, and broken into work without churning the source. Replace the block comment with a short class-javadoc pointer. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../openhouse/analyzer/AnalyzerRunner.java | 83 ++----------------- 1 file changed, 5 insertions(+), 78 deletions(-) diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java index e4084884f..6f7c68d6a 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java @@ -18,89 +18,16 @@ import lombok.extern.slf4j.Slf4j; import org.springframework.stereotype.Component; -/* - * Performance roadmap — read before scaling this runner past ~100k tables. - * - * Current shape: each pass loads all table_stats, all active table_operations - * per op type, and the latest history row per (table_uuid, op_type) per op - * type, then iterates (table × analyzer) in memory. Heap is O(tables) and - * CPU is O(tables × op_types) per pass. This works at ~100k tables / daily - * cadence with ~8 GB heap. It does NOT work at 1M tables / 15-min cadence: - * heap peaks past 10 GB during ops loading, the history correlated subquery - * does not complete without index (d) below, and per-pass wall time exceeds - * the cadence window. - * - * The optimizations below are roughly ordered by when they matter. Items - * (a)–(d) are hard prerequisites for scaling past ~100k tables; the rest - * unlock further headroom. Items already landed are noted inline. - * - * Schema / data-model prerequisites: - * (a) Denormalized per-operation enablement on table_stats (or a sibling - * table), kept current by commit hooks on table-property changes. Each - * op type has its own enable flag — this mirrors the existing - * enable/disable pattern for non-optimizer maintenance operations on - * table properties (e.g. maintenance.optimizer.ofd.enabled). Lets the - * analyzer filter opted-in (table, op_type) pairs at index level - * instead of parsing the tableProperties JSON for every row. 10–100× - * data reduction in early rollout where opt-in is selective per op. - * (b) Index on table_operations(table_uuid, operation_type, created_at) — - * drives the cooldown anti-join in (e)/(f). Lands with that query. - * (c) Index supporting per-op opt-in lookup on the denormalized structure - * from (a). Shape depends on (a)'s representation. Lands with (e)/(f). - * (d) Index on table_operations_history(operation_type, table_uuid, - * completed_at) — makes findLatestPerTable O(N log N) instead of - * O(N²). Without this the query does not complete at 1M-row scale. - * LANDED: idx_toph_optype_uuid_completed. - * - * Query shape: - * (e) findCandidatesByDatabase(db, opType, cooldown) repo method that - * pushes both the per-op opt-in filter (from (a)) and the cooldown - * predicate to SQL via NOT EXISTS. In-cooldown and opted-out tables - * produce zero rows; their op rows are never materialized in the - * application. Single biggest win on data transfer and heap. - * (f) Combined-op variant of (e): one query per db returns candidates - * across all op types in one shot. Cuts read QPS ~10× at the same - * data volume (20k queries/pass vs 200k). - * (g) Per-database iteration replacing the current global scan. Bounds - * working set to one db (1–10k tables) per pass instead of the full - * table count. - * (h) Replace the findLatestPerTable consumer with the (e)/(f) anti-join, - * encoding cadence policy (success-retry-interval, failure-retry- - * interval, scheduled-timeout) in SQL WHERE clauses. If this subsumes - * all cadence reads, historyRepo on this class becomes dead and can - * be removed; if some strategies still need Java-side cadence on - * history rows, keep it. - * - * Projection / IO: - * (i) Drop tableProperties from the stats query projection once (a) lands. - * The TEXT column carries multi-KB JSON per row; at 1M rows that's - * gigabytes of wire transfer per pass for data nobody reads. - * (j) High-volume repo reads must page or stream — Stream or - * paginated cursor, not List. JPA materializes the full list - * before stream-collect, which is the dominant heap-spike source. - * Treat this as a baseline requirement, not an optional optimization. - * (k) Batch PENDING INSERTs via saveAll(500–1000) and set - * hibernate.jdbc.batch_size. Per-row save() dominates the write phase - * at any meaningful candidate volume. - * - * Runtime / deployment: - * (l) Rate-limit queries per analyzer instance to bound MySQL load. - * The runner should pace its per-db iteration across the cadence - * window rather than racing — converts a 2k-QPS spike at the start - * of each pass into a flat ~20 QPS sustained. - * (m) Conditional, only if single-instance runtime exceeds the cadence - * window after (a)–(l): shard databases by hash key and run N - * analyzer instances in parallel. Concurrency is then controlled by - * deployment count, not in-process worker pools. - */ /** * Core analysis loop. Loads {@code table_stats} rows and evaluates each table against every * registered {@link OperationAnalyzer} in a single pass. * *

Both sides of the join — current operations and latest history per (table, type) — are loaded - * into maps once per run before the table loop. This is correct at small scale (≤~100k tables) but - * breaks past that; see the performance roadmap block comment above for the queued optimizations - * and their triggering thresholds. + * into maps once per run before the table loop. This is correct at small scale (≤~100k tables); + * past that the runner OOMs and exceeds the cadence window. Scale-up work (per-op enablement + * column, cooldown anti-join push-down, per-db iteration, streaming reads, batched writes, rate + * limiting) is tracked in BDP-102182. */ @Slf4j @Component From 02930094479750d10f25745849e9d511f5aa0aea Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 13 May 2026 11:46:45 -0700 Subject: [PATCH 031/104] feat(optimizer): add findDistinctDatabaseNames to TableStatsRepository Enables per-database iteration in the analyzer. Returns the bounded set of database_name values present in table_stats; the analyzer uses it to drive the outer loop when no specific databaseName filter is supplied. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../optimizer/repository/TableStatsRepository.java | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java index 50f515d07..4215237bc 100644 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java @@ -22,4 +22,12 @@ List find( @Param("databaseName") String databaseName, @Param("tableName") String tableName, @Param("tableUuid") String tableUuid); + + /** + * Return the distinct {@code database_name} values present in {@code table_stats}. Used by the + * Analyzer to enumerate databases when iterating per-db; the result set size is bounded by the + * number of databases (small even at million-table scale). + */ + @Query("SELECT DISTINCT r.databaseName FROM TableStatsRow r") + List findDistinctDatabaseNames(); } From 6fa885db6bad708d54ace4ab61faa290ea208220 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 13 May 2026 11:50:45 -0700 Subject: [PATCH 032/104] refactor(optimizer): Optional for optional filter params in service layer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit OptimizerDataService's filter-style methods (listTableOperations, listTableStats, getStatsHistory, listHistory) accepted nullable strings/ enums to mean "no filter". Switch to Optional at the service boundary; controllers wrap their nullable @RequestParam values via Optional.ofNullable. The implementation unwraps via .orElse(null) at the JPA repo call site — the @Query "IS NULL OR ..." pattern is idiomatic with nullable parameters and stays unchanged. No behavior change. No tests required updating. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../api/controller/TableByNameController.java | 11 +++- .../controller/TableOperationsController.java | 8 ++- .../TableOperationsHistoryController.java | 10 +++- .../api/controller/TableStatsController.java | 9 ++- .../service/OptimizerDataService.java | 42 +++++++------- .../service/OptimizerDataServiceImpl.java | 57 +++++++++++-------- 6 files changed, 88 insertions(+), 49 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableByNameController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableByNameController.java index f1989ef3e..e3582ff7e 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableByNameController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableByNameController.java @@ -3,6 +3,7 @@ import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; import com.linkedin.openhouse.optimizer.service.OptimizerDataService; import java.util.List; +import java.util.Optional; import lombok.RequiredArgsConstructor; import org.springframework.http.ResponseEntity; import org.springframework.web.bind.annotation.GetMapping; @@ -30,6 +31,14 @@ public ResponseEntity> getOperationsHistoryByNam @PathVariable String tableName, @RequestParam(defaultValue = "100") int limit) { return ResponseEntity.ok( - service.listHistory(databaseName, tableName, null, null, null, null, null, limit)); + service.listHistory( + Optional.of(databaseName), + Optional.of(tableName), + Optional.empty(), + Optional.empty(), + Optional.empty(), + Optional.empty(), + Optional.empty(), + limit)); } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java index a0cab4b7c..adc4d7a85 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java @@ -7,6 +7,7 @@ import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; import com.linkedin.openhouse.optimizer.service.OptimizerDataService; import java.util.List; +import java.util.Optional; import lombok.RequiredArgsConstructor; import org.springframework.http.HttpStatus; import org.springframework.http.ResponseEntity; @@ -61,6 +62,11 @@ public ResponseEntity> listTableOperations( @RequestParam(required = false) String tableName, @RequestParam(required = false) String tableUuid) { return ResponseEntity.ok( - service.listTableOperations(operationType, status, databaseName, tableName, tableUuid)); + service.listTableOperations( + Optional.ofNullable(operationType), + Optional.ofNullable(status), + Optional.ofNullable(databaseName), + Optional.ofNullable(tableName), + Optional.ofNullable(tableUuid))); } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java index ff4c4a77d..79fce5b8f 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java @@ -6,6 +6,7 @@ import com.linkedin.openhouse.optimizer.service.OptimizerDataService; import java.time.Instant; import java.util.List; +import java.util.Optional; import lombok.RequiredArgsConstructor; import org.springframework.http.HttpStatus; import org.springframework.http.ResponseEntity; @@ -55,6 +56,13 @@ public ResponseEntity> listHistory( @RequestParam(defaultValue = "100") int limit) { return ResponseEntity.ok( service.listHistory( - databaseName, tableName, tableUuid, operationType, status, since, until, limit)); + Optional.ofNullable(databaseName), + Optional.ofNullable(tableName), + Optional.ofNullable(tableUuid), + Optional.ofNullable(operationType), + Optional.ofNullable(status), + Optional.ofNullable(since), + Optional.ofNullable(until), + limit)); } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java index 4e8624481..ef57598e8 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java @@ -6,6 +6,7 @@ import com.linkedin.openhouse.optimizer.service.OptimizerDataService; import java.time.Instant; import java.util.List; +import java.util.Optional; import lombok.RequiredArgsConstructor; import org.springframework.http.ResponseEntity; import org.springframework.web.bind.annotation.GetMapping; @@ -52,7 +53,11 @@ public ResponseEntity> listTableStats( @RequestParam(required = false) String databaseName, @RequestParam(required = false) String tableName, @RequestParam(required = false) String tableUuid) { - return ResponseEntity.ok(service.listTableStats(databaseName, tableName, tableUuid)); + return ResponseEntity.ok( + service.listTableStats( + Optional.ofNullable(databaseName), + Optional.ofNullable(tableName), + Optional.ofNullable(tableUuid))); } /** @@ -64,6 +69,6 @@ public ResponseEntity> getStatsHistory( @PathVariable String tableUuid, @RequestParam(required = false) Instant since, @RequestParam(defaultValue = "100") int limit) { - return ResponseEntity.ok(service.getStatsHistory(tableUuid, since, limit)); + return ResponseEntity.ok(service.getStatsHistory(tableUuid, Optional.ofNullable(since), limit)); } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java index dd2b2fd58..1c17d7a38 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java @@ -19,15 +19,15 @@ public interface OptimizerDataService { // --- TableOperations --- /** - * List operations matching the given filters. Every parameter is optional — pass {@code null} to - * skip that filter. No filters returns all rows. + * List operations matching the given filters. Every parameter is optional — pass {@link + * Optional#empty()} to skip that filter. No filters returns all rows. */ List listTableOperations( - OperationType operationType, - OperationStatus status, - String databaseName, - String tableName, - String tableUuid); + Optional operationType, + Optional status, + Optional databaseName, + Optional tableName, + Optional tableUuid); /** * Complete an operation by writing a history entry. Looks up the operation row by {@code id}, @@ -55,19 +55,20 @@ Optional completeOperation( Optional getTableStats(String tableUuid); /** - * List stats rows matching the given filters. Every parameter is optional — pass {@code null} to - * skip that filter. No filters returns all rows. + * List stats rows matching the given filters. Every parameter is optional — pass {@link + * Optional#empty()} to skip that filter. No filters returns all rows. */ - List listTableStats(String databaseName, String tableName, String tableUuid); + List listTableStats( + Optional databaseName, Optional tableName, Optional tableUuid); /** * Return per-commit stats history for {@code tableUuid}, newest first. * * @param tableUuid the stable table UUID - * @param since if non-null, only return rows recorded at or after this instant + * @param since if present, only return rows recorded at or after this instant * @param limit maximum number of rows to return */ - List getStatsHistory(String tableUuid, Instant since, int limit); + List getStatsHistory(String tableUuid, Optional since, int limit); // --- TableOperationsHistory --- @@ -84,15 +85,16 @@ Optional completeOperation( /** * List history rows matching the given filters, ordered newest first. Every parameter is optional - * — pass {@code null} to skip that filter. No filters returns all rows up to {@code limit}. + * — pass {@link Optional#empty()} to skip that filter. No filters returns all rows up to {@code + * limit}. */ List listHistory( - String databaseName, - String tableName, - String tableUuid, - OperationType operationType, - OperationHistoryStatus status, - Instant since, - Instant until, + Optional databaseName, + Optional tableName, + Optional tableUuid, + Optional operationType, + Optional status, + Optional since, + Optional until, int limit); } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java index 285cea914..de4faa465 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java @@ -42,12 +42,18 @@ public class OptimizerDataServiceImpl implements OptimizerDataService { @Override public List listTableOperations( - OperationType operationType, - OperationStatus status, - String databaseName, - String tableName, - String tableUuid) { - return operationsRepository.find(operationType, status, databaseName, tableName, tableUuid) + Optional operationType, + Optional status, + Optional databaseName, + Optional tableName, + Optional tableUuid) { + return operationsRepository + .find( + operationType.orElse(null), + status.orElse(null), + databaseName.orElse(null), + tableName.orElse(null), + tableUuid.orElse(null)) .stream() .map(mapper::toDto) .collect(Collectors.toList()); @@ -132,15 +138,18 @@ public Optional getTableStats(String tableUuid) { @Override public List listTableStats( - String databaseName, String tableName, String tableUuid) { - return statsRepository.find(databaseName, tableName, tableUuid).stream() + Optional databaseName, Optional tableName, Optional tableUuid) { + return statsRepository + .find(databaseName.orElse(null), tableName.orElse(null), tableUuid.orElse(null)).stream() .map(mapper::toDto) .collect(Collectors.toList()); } @Override - public List getStatsHistory(String tableUuid, Instant since, int limit) { - return statsHistoryRepository.find(tableUuid, since, PageRequest.of(0, limit)).stream() + public List getStatsHistory( + String tableUuid, Optional since, int limit) { + return statsHistoryRepository.find(tableUuid, since.orElse(null), PageRequest.of(0, limit)) + .stream() .map(mapper::toDto) .collect(Collectors.toList()); } @@ -175,23 +184,23 @@ public List getHistory(String tableUuid, int limit) { @Override public List listHistory( - String databaseName, - String tableName, - String tableUuid, - OperationType operationType, - OperationHistoryStatus status, - Instant since, - Instant until, + Optional databaseName, + Optional tableName, + Optional tableUuid, + Optional operationType, + Optional status, + Optional since, + Optional until, int limit) { return historyRepository .find( - databaseName, - tableName, - tableUuid, - operationType, - status, - since, - until, + databaseName.orElse(null), + tableName.orElse(null), + tableUuid.orElse(null), + operationType.orElse(null), + status.orElse(null), + since.orElse(null), + until.orElse(null), PageRequest.of(0, limit)) .stream() .map(mapper::toDto) From dd4faf2f769548e56541dbbc9abeff84e9a21af9 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 13 May 2026 11:54:17 -0700 Subject: [PATCH 033/104] =?UTF-8?q?refactor(optimizer-analyzer):=20address?= =?UTF-8?q?=20PR=20review=20=E2=80=94=20required=20op,=20per-db,=20Optiona?= =?UTF-8?q?l,=20no=20switch?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses unresolved review threads on #533: - HistoryStatus enum in analyzer-internal model. CadencePolicy parses it at the boundary; switch on String values is gone. - shouldSchedule restructured: an active non-CANCELED op short-circuits to false (the scheduler owns it). CANCELED and no-op cases defer to cadence on the latest history entry via a ternary; no switch on history status. - scheduledTimeout removed from CadencePolicy and the OFD analyzer config. Stuck-SCHEDULED recovery is a scheduler-side concern; the analyzer no longer inspects scheduledAt. - AnalyzerRunner.analyze now requires an OperationType per call. The per- analyzer inner loop is gone; the function processes one operation type at a time. AnalyzerApplication's CommandLineRunner iterates registered analyzers and calls analyze(op) per type. - AnalyzerRunner iterates databases — uses statsRepo.findDistinctDatabaseNames when no databaseName filter is supplied. Per-db query block factored into analyzeDatabase; working set is bounded by tables-per-db. - Optional filter params on analyze (databaseName/tableName/ tableUuid) and Optional unwrapping at the JPA call boundary. - TableOperation switched to Lombok @Builder; manual private build() factory removed; from() and pending() use the generated builder. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../analyzer/AnalyzerApplication.java | 10 +- .../openhouse/analyzer/AnalyzerRunner.java | 167 +++++++++--------- ...denceBasedOrphanFilesDeletionAnalyzer.java | 8 +- .../openhouse/analyzer/CadencePolicy.java | 70 ++------ .../analyzer/model/HistoryStatus.java | 13 ++ .../analyzer/model/TableOperation.java | 62 +++---- .../src/main/resources/application.properties | 1 - .../analyzer/AnalyzerRunnerTest.java | 58 +++--- ...eBasedOrphanFilesDeletionAnalyzerTest.java | 85 +++------ 9 files changed, 195 insertions(+), 279 deletions(-) create mode 100644 apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/HistoryStatus.java diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerApplication.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerApplication.java index 99ba56047..edee9c02e 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerApplication.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerApplication.java @@ -1,5 +1,6 @@ package com.linkedin.openhouse.analyzer; +import java.util.List; import org.springframework.boot.CommandLineRunner; import org.springframework.boot.SpringApplication; import org.springframework.boot.autoconfigure.SpringBootApplication; @@ -17,9 +18,12 @@ public static void main(String[] args) { SpringApplication.run(AnalyzerApplication.class, args); } - /** Delegates to {@link AnalyzerRunner#analyze()} once per process invocation. */ + /** + * Runs the analyzer once per registered {@link OperationAnalyzer} per process invocation. Each + * call is scoped to one operation type; the runner iterates databases internally. + */ @Bean - public CommandLineRunner run(AnalyzerRunner runner) { - return args -> runner.analyze(); + public CommandLineRunner run(AnalyzerRunner runner, List analyzers) { + return args -> analyzers.forEach(a -> runner.analyze(a.getOperationType())); } } diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java index 6f7c68d6a..dfc605f50 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java @@ -19,15 +19,13 @@ import org.springframework.stereotype.Component; /** - * Core analysis loop. Loads {@code table_stats} rows and evaluates each table against every - * registered {@link OperationAnalyzer} in a single pass. + * Core analysis loop. For one operation type per call, iterates databases and evaluates each table + * in a database against the matching {@link OperationAnalyzer}. * *

Both sides of the join — current operations and latest history per (table, type) — are loaded - * into maps once per run before the table loop. This is correct at small scale (≤~100k tables); - * past that the runner OOMs and exceeds the cadence window. Scale-up work (per-op enablement - * column, cooldown anti-join push-down, per-db iteration, streaming reads, batched writes, rate - * limiting) is tracked in BDP-102182. + * into maps once per database before the table loop. This is correct at small scale (≤~100k + * tables); past that the per-db query shape and projection need further tuning. Scale-up work is + * tracked in BDP-102182. */ @Slf4j @Component @@ -39,100 +37,99 @@ public class AnalyzerRunner { private final TableOperationsRepository operationsRepo; private final TableOperationHistoryRepository historyRepo; - /** Run the full analysis loop once with no filters. */ - public void analyze() { - analyze(null, null, null, null); + /** + * Run the analysis loop for {@code operationType} across all databases, with no filters. + * Equivalent to {@link #analyze(OperationType, Optional, Optional, Optional)} with all-empty + * filters. + */ + public void analyze(OperationType operationType) { + analyze(operationType, Optional.empty(), Optional.empty(), Optional.empty()); } /** - * Run the analysis loop, optionally scoped to a specific operation type, database, table name, or - * table UUID. Pass {@code null} for any parameter to skip that filter. + * Run the analysis loop for the given operation type, optionally scoped to a single database, + * table name, or table UUID. Iterates databases one at a time so the working set is bounded by + * tables-per-db, not tables-total. */ public void analyze( - OperationType operationType, String databaseName, String tableName, String tableUuid) { - - List activeAnalyzers = - operationType == null - ? analyzers - : analyzers.stream() - .filter(a -> a.getOperationType() == operationType) - .collect(Collectors.toList()); - - // Pre-load the small sides of the joins — one query per analyzer type. - // TODO: Move to a query builder (Criteria API or jOOQ) as filter count grows. - Map> opsByType = - activeAnalyzers.stream() + OperationType operationType, + Optional databaseName, + Optional tableName, + Optional tableUuid) { + + Optional analyzerOpt = + analyzers.stream().filter(a -> a.getOperationType() == operationType).findFirst(); + if (analyzerOpt.isEmpty()) { + log.warn("No analyzer registered for operation type {}; skipping", operationType); + return; + } + OperationAnalyzer analyzer = analyzerOpt.get(); + + List dbs = databaseName.map(List::of).orElseGet(statsRepo::findDistinctDatabaseNames); + log.info("Analyzing {} across {} database(s)", operationType, dbs.size()); + + dbs.forEach(db -> analyzeDatabase(analyzer, db, tableName, tableUuid)); + + log.info("Analysis complete for {}", operationType); + } + + private void analyzeDatabase( + OperationAnalyzer analyzer, + String databaseName, + Optional tableName, + Optional tableUuid) { + + String operationType = analyzer.getOperationType().name(); + + // Pre-load the small sides of the joins — bounded by tables in this database. + Map currentOps = + operationsRepo + .find(operationType, null, tableUuid.orElse(null), databaseName, tableName.orElse(null)) + .stream() + .filter(e -> e.getTableUuid() != null) .collect( Collectors.toMap( - OperationAnalyzer::getOperationType, - a -> - operationsRepo - .find( - a.getOperationType().name(), - null, - tableUuid, - databaseName, - tableName) - .stream() - .filter(e -> e.getTableUuid() != null) - .collect( - Collectors.toMap( - TableOperationRow::getTableUuid, - TableOperation::from, - TableOperation::mostRecent)))); - - // Latest history row per (table_uuid, operation_type), one query per analyzer. The repo query - // may return tied rows for the same key on identical completed_at; dedupe in memory. - Map> latestHistoryByType = - activeAnalyzers.stream() + TableOperationRow::getTableUuid, + TableOperation::from, + TableOperation::mostRecent)); + + // Latest history row per (table_uuid, op_type) for this analyzer. The repo query may return + // tied rows on identical completed_at; dedupe in memory. + Map latestHistory = + historyRepo.findLatestPerTable(operationType).stream() + .filter(r -> r.getTableUuid() != null) .collect( Collectors.toMap( - OperationAnalyzer::getOperationType, - a -> - historyRepo.findLatestPerTable(a.getOperationType().name()).stream() - .filter(r -> r.getTableUuid() != null) - .collect( - Collectors.toMap( - TableOperationHistoryRow::getTableUuid, - r -> r, - AnalyzerRunner::moreRecentHistory)))); + TableOperationHistoryRow::getTableUuid, + r -> r, + AnalyzerRunner::moreRecentHistory)); List

tables = - statsRepo.find(databaseName, tableName, tableUuid).stream() + statsRepo.find(databaseName, tableName.orElse(null), tableUuid.orElse(null)).stream() .filter(row -> row.getTableUuid() != null) .map(Table::from) .collect(Collectors.toList()); - log.info("Found {} tables in optimizer table_stats", tables.size()); tables.forEach( - table -> - activeAnalyzers.forEach( - analyzer -> { - if (!analyzer.isEnabled(table)) { - return; - } - - Optional currentOp = - Optional.ofNullable( - opsByType.get(analyzer.getOperationType()).get(table.getTableUuid())); - Optional latestHistory = - Optional.ofNullable( - latestHistoryByType - .get(analyzer.getOperationType()) - .get(table.getTableUuid())); - - if (analyzer.shouldSchedule(table, currentOp, latestHistory)) { - TableOperation op = TableOperation.pending(table, analyzer.getOperationType()); - operationsRepo.save(op.toRow()); - log.info( - "Created PENDING {} operation for table {}.{}", - analyzer.getOperationType(), - table.getDatabaseName(), - table.getTableId()); - } - })); - - log.info("Analysis complete"); + table -> { + if (!analyzer.isEnabled(table)) { + return; + } + Optional currentOp = + Optional.ofNullable(currentOps.get(table.getTableUuid())); + Optional entry = + Optional.ofNullable(latestHistory.get(table.getTableUuid())); + + if (analyzer.shouldSchedule(table, currentOp, entry)) { + TableOperation op = TableOperation.pending(table, analyzer.getOperationType()); + operationsRepo.save(op.toRow()); + log.info( + "Created PENDING {} operation for table {}.{}", + analyzer.getOperationType(), + table.getDatabaseName(), + table.getTableId()); + } + }); } private static TableOperationHistoryRow moreRecentHistory( diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzer.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzer.java index c50025b6a..e66bc070d 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzer.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzer.java @@ -21,13 +21,9 @@ public class CadenceBasedOrphanFilesDeletionAnalyzer implements OperationAnalyze @Autowired public CadenceBasedOrphanFilesDeletionAnalyzer( @Value("${ofd.success-retry-hours:24}") long successRetryHours, - @Value("${ofd.failure-retry-hours:1}") long failureRetryHours, - @Value("${ofd.scheduled-timeout-hours:6}") long scheduledTimeoutHours) { + @Value("${ofd.failure-retry-hours:1}") long failureRetryHours) { this.cadencePolicy = - new CadencePolicy( - Duration.ofHours(successRetryHours), - Duration.ofHours(failureRetryHours), - Duration.ofHours(scheduledTimeoutHours)); + new CadencePolicy(Duration.ofHours(successRetryHours), Duration.ofHours(failureRetryHours)); } /** Package-private for tests that supply a pre-built {@link CadencePolicy}. */ diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadencePolicy.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadencePolicy.java index 4cf892021..7aa646cf6 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadencePolicy.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadencePolicy.java @@ -1,5 +1,7 @@ package com.linkedin.openhouse.analyzer; +import com.linkedin.openhouse.analyzer.model.HistoryStatus; +import com.linkedin.openhouse.analyzer.model.OperationStatus; import com.linkedin.openhouse.analyzer.model.TableOperation; import com.linkedin.openhouse.optimizer.entity.TableOperationHistoryRow; import java.time.Duration; @@ -8,14 +10,13 @@ import lombok.RequiredArgsConstructor; /** - * Encapsulates the time-based scheduling logic shared across operation types. An analyzer delegates - * to {@link CadencePolicy} to decide whether to re-issue a recommendation for a table that already - * has an active operation record and/or history. + * Time-based scheduling policy. An analyzer delegates to {@link CadencePolicy} to decide whether to + * re-issue a recommendation for a table. * - *

The SCHEDULED timeout is a key safety mechanism: if a Spark job crashes without reporting - * back, the SCHEDULED row would otherwise block the table forever. When the row has been SCHEDULED - * (or SCHEDULING) longer than {@code scheduledTimeout}, the Analyzer treats it as stale and returns - * {@code true}, causing a new PENDING row to be inserted. + *

The analyzer stays out of any table that already has a non-CANCELED active operation — those + * belong to the scheduler. For tables with no active operation (or only a CANCELED one), the + * decision is based on the most recent completed-history entry: re-evaluate after {@code + * successRetryInterval} on success, or after {@code failureRetryInterval} on failure. */ @RequiredArgsConstructor public class CadencePolicy { @@ -34,14 +35,6 @@ public class CadencePolicy { */ private final Duration failureRetryInterval; - /** - * Maximum time a row can stay in SCHEDULED status before the analyzer treats it as stale and - * overwrites it with a new PENDING row. Handles the case where a Spark job crashes without - * reporting back. For example, if set to 6 hours and a job was submitted at noon but never - * completed, the analyzer will re-schedule the table after 6:00 PM. - */ - private final Duration scheduledTimeout; - /** * Returns {@code true} if a new or refreshed operation record should be upserted. * @@ -50,47 +43,16 @@ public class CadencePolicy { */ public boolean shouldSchedule( Optional currentOp, Optional latestHistory) { - if (currentOp.isEmpty()) { - return decideFromHistory(latestHistory); - } - TableOperation op = currentOp.get(); - switch (op.getStatus()) { - case PENDING: - case SCHEDULING: - return false; - case SCHEDULED: - // Two scenarios for a SCHEDULED row: - // - no history yet: the job is still running (or crashed); fall through to the - // scheduledTimeout safety net. - // - history present: the job completed and history was written; defer to cadence policy - // on the history entry. - return latestHistory.isPresent() - ? decideFromHistory(latestHistory) - : pastInterval(op.getScheduledAt(), scheduledTimeout); - case CANCELED: - return decideFromHistory(latestHistory); - default: - throw new IllegalStateException("Unhandled operation status: " + op.getStatus()); - } - } - - private boolean decideFromHistory(Optional latestHistory) { - if (latestHistory.isEmpty()) { - return true; - } - TableOperationHistoryRow entry = latestHistory.get(); - switch (entry.getStatus()) { - case "SUCCESS": - return pastInterval(entry.getCompletedAt(), successRetryInterval); - case "FAILED": - return pastInterval(entry.getCompletedAt(), failureRetryInterval); - default: - return true; + if (currentOp.isPresent() && currentOp.get().getStatus() != OperationStatus.CANCELED) { + return false; } + return latestHistory.map(this::readyAfterHistoryEntry).orElse(true); } - /** {@code true} if {@code timestamp} is null or {@code interval} has elapsed since then. */ - private boolean pastInterval(Instant timestamp, Duration interval) { - return timestamp == null || Duration.between(timestamp, Instant.now()).compareTo(interval) > 0; + private boolean readyAfterHistoryEntry(TableOperationHistoryRow entry) { + HistoryStatus status = HistoryStatus.valueOf(entry.getStatus()); + Duration interval = + status == HistoryStatus.FAILED ? failureRetryInterval : successRetryInterval; + return Duration.between(entry.getCompletedAt(), Instant.now()).compareTo(interval) > 0; } } diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/HistoryStatus.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/HistoryStatus.java new file mode 100644 index 000000000..eb0e46762 --- /dev/null +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/HistoryStatus.java @@ -0,0 +1,13 @@ +package com.linkedin.openhouse.analyzer.model; + +/** + * Analyzer-internal lifecycle outcomes for a completed operation. Mirrors the values written to + * {@code table_operations_history.status}; parsed at the boundary so the analyzer can switch on a + * typed value instead of comparing strings. + * + *

Intentionally separate from the wire-API and DB representations. + */ +public enum HistoryStatus { + SUCCESS, + FAILED +} diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/TableOperation.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/TableOperation.java index 54e569b6a..3de08b9c5 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/TableOperation.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/TableOperation.java @@ -4,6 +4,8 @@ import java.time.Instant; import java.util.Comparator; import java.util.UUID; +import lombok.AllArgsConstructor; +import lombok.Builder; import lombok.Data; import lombok.NoArgsConstructor; @@ -13,7 +15,9 @@ * new PENDING operation). Converts back to a JPA row via {@link #toRow()}. */ @Data +@Builder @NoArgsConstructor +@AllArgsConstructor public class TableOperation { /** Unique operation ID (UUID). */ @@ -42,28 +46,29 @@ public class TableOperation { /** Build a {@code TableOperation} from an existing JPA row. */ public static TableOperation from(TableOperationRow row) { - return build( - row.getId(), - row.getTableUuid(), - row.getDatabaseName(), - row.getTableName(), - OperationType.valueOf(row.getOperationType()), - OperationStatus.valueOf(row.getStatus()), - row.getCreatedAt(), - row.getScheduledAt()); + return TableOperation.builder() + .id(row.getId()) + .tableUuid(row.getTableUuid()) + .databaseName(row.getDatabaseName()) + .tableName(row.getTableName()) + .operationType(OperationType.valueOf(row.getOperationType())) + .status(OperationStatus.valueOf(row.getStatus())) + .createdAt(row.getCreatedAt()) + .scheduledAt(row.getScheduledAt()) + .build(); } /** Create a new PENDING operation for the given table and operation type. */ public static TableOperation pending(Table table, OperationType operationType) { - return build( - UUID.randomUUID().toString(), - table.getTableUuid(), - table.getDatabaseName(), - table.getTableId(), - operationType, - OperationStatus.PENDING, - Instant.now(), - null); + return TableOperation.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(table.getTableUuid()) + .databaseName(table.getDatabaseName()) + .tableName(table.getTableId()) + .operationType(operationType) + .status(OperationStatus.PENDING) + .createdAt(Instant.now()) + .build(); } /** Convert to a JPA entity for persistence. */ @@ -87,25 +92,4 @@ public static TableOperation mostRecent(TableOperation a, TableOperation b) { Comparator.comparing(r -> r.getCreatedAt() != null ? r.getCreatedAt() : Instant.EPOCH); return byCreatedAt.compare(a, b) >= 0 ? a : b; } - - private static TableOperation build( - String id, - String tableUuid, - String databaseName, - String tableName, - OperationType operationType, - OperationStatus status, - Instant createdAt, - Instant scheduledAt) { - TableOperation op = new TableOperation(); - op.id = id; - op.tableUuid = tableUuid; - op.databaseName = databaseName; - op.tableName = tableName; - op.operationType = operationType; - op.status = status; - op.createdAt = createdAt; - op.scheduledAt = scheduledAt; - return op; - } } diff --git a/apps/optimizer-analyzer/src/main/resources/application.properties b/apps/optimizer-analyzer/src/main/resources/application.properties index 990740f1d..1df0bea15 100644 --- a/apps/optimizer-analyzer/src/main/resources/application.properties +++ b/apps/optimizer-analyzer/src/main/resources/application.properties @@ -6,4 +6,3 @@ spring.datasource.password=${OPTIMIZER_DB_PASSWORD:} spring.jpa.hibernate.ddl-auto=none ofd.success-retry-hours=24 ofd.failure-retry-hours=1 -ofd.scheduled-timeout-hours=6 diff --git a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java index 1feafba29..9734a329a 100644 --- a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java +++ b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java @@ -29,7 +29,9 @@ @ExtendWith(MockitoExtension.class) class AnalyzerRunnerTest { - private static final String OFD = OperationType.ORPHAN_FILES_DELETION.name(); + private static final OperationType OFD_TYPE = OperationType.ORPHAN_FILES_DELETION; + private static final String OFD = OFD_TYPE.name(); + private static final String DB = "db1"; @Mock private TableStatsRepository statsRepo; @Mock private TableOperationsRepository operationsRepo; @@ -41,33 +43,34 @@ class AnalyzerRunnerTest { @BeforeEach void setUp() { runner = new AnalyzerRunner(List.of(analyzer), statsRepo, operationsRepo, historyRepo); + when(analyzer.getOperationType()).thenReturn(OFD_TYPE); + when(statsRepo.findDistinctDatabaseNames()).thenReturn(List.of(DB)); } @Test void analyze_insertsNewRow_forEligibleTableWithNoExistingOp() { TableStatsRow statsEntity = new TableStatsRow(); statsEntity.setTableUuid("uuid-1"); - statsEntity.setDatabaseName("db1"); + statsEntity.setDatabaseName(DB); statsEntity.setTableName("tbl1"); Table expectedTable = - Table.builder().tableUuid("uuid-1").databaseName("db1").tableId("tbl1").build(); + Table.builder().tableUuid("uuid-1").databaseName(DB).tableId("tbl1").build(); - when(statsRepo.find(null, null, null)).thenReturn(List.of(statsEntity)); - when(analyzer.getOperationType()).thenReturn(OperationType.ORPHAN_FILES_DELETION); - when(operationsRepo.find(OFD, null, null, null, null)).thenReturn(Collections.emptyList()); + when(statsRepo.find(DB, null, null)).thenReturn(List.of(statsEntity)); + when(operationsRepo.find(OFD, null, null, DB, null)).thenReturn(Collections.emptyList()); when(historyRepo.findLatestPerTable(OFD)).thenReturn(Collections.emptyList()); when(analyzer.isEnabled(expectedTable)).thenReturn(true); when(analyzer.shouldSchedule(expectedTable, Optional.empty(), Optional.empty())) .thenReturn(true); - runner.analyze(); + runner.analyze(OFD_TYPE); ArgumentCaptor captor = ArgumentCaptor.forClass(TableOperationRow.class); verify(operationsRepo).save(captor.capture()); TableOperationRow saved = captor.getValue(); assertThat(saved.getTableUuid()).isEqualTo("uuid-1"); - assertThat(saved.getDatabaseName()).isEqualTo("db1"); + assertThat(saved.getDatabaseName()).isEqualTo(DB); assertThat(saved.getTableName()).isEqualTo("tbl1"); assertThat(saved.getOperationType()).isEqualTo(OFD); assertThat(saved.getStatus()).isEqualTo("PENDING"); @@ -78,11 +81,11 @@ void analyze_insertsNewRow_forEligibleTableWithNoExistingOp() { void analyze_noOp_whenCadencePolicyReturnsFalseForPending() { TableStatsRow statsEntity = new TableStatsRow(); statsEntity.setTableUuid("uuid-1"); - statsEntity.setDatabaseName("db1"); + statsEntity.setDatabaseName(DB); statsEntity.setTableName("tbl1"); Table expectedTable = - Table.builder().tableUuid("uuid-1").databaseName("db1").tableId("tbl1").build(); + Table.builder().tableUuid("uuid-1").databaseName(DB).tableId("tbl1").build(); TableOperationRow existingEntity = new TableOperationRow(); existingEntity.setId("existing-op-id"); @@ -91,9 +94,8 @@ void analyze_noOp_whenCadencePolicyReturnsFalseForPending() { existingEntity.setOperationType(OFD); existingEntity.setCreatedAt(Instant.now()); - when(statsRepo.find(null, null, null)).thenReturn(List.of(statsEntity)); - when(analyzer.getOperationType()).thenReturn(OperationType.ORPHAN_FILES_DELETION); - when(operationsRepo.find(OFD, null, null, null, null)).thenReturn(List.of(existingEntity)); + when(statsRepo.find(DB, null, null)).thenReturn(List.of(statsEntity)); + when(operationsRepo.find(OFD, null, null, DB, null)).thenReturn(List.of(existingEntity)); when(historyRepo.findLatestPerTable(OFD)).thenReturn(Collections.emptyList()); when(analyzer.isEnabled(expectedTable)).thenReturn(true); @@ -101,7 +103,7 @@ void analyze_noOp_whenCadencePolicyReturnsFalseForPending() { when(analyzer.shouldSchedule(expectedTable, Optional.of(existingOp), Optional.empty())) .thenReturn(false); - runner.analyze(); + runner.analyze(OFD_TYPE); verify(operationsRepo, never()).save(any()); } @@ -110,16 +112,16 @@ void analyze_noOp_whenCadencePolicyReturnsFalseForPending() { void analyze_skipsTable_whenNotEnabled() { TableStatsRow statsEntity = new TableStatsRow(); statsEntity.setTableUuid("uuid-1"); + statsEntity.setDatabaseName(DB); - Table expectedTable = Table.builder().tableUuid("uuid-1").build(); + Table expectedTable = Table.builder().tableUuid("uuid-1").databaseName(DB).build(); - when(statsRepo.find(null, null, null)).thenReturn(List.of(statsEntity)); - when(analyzer.getOperationType()).thenReturn(OperationType.ORPHAN_FILES_DELETION); - when(operationsRepo.find(OFD, null, null, null, null)).thenReturn(Collections.emptyList()); + when(statsRepo.find(DB, null, null)).thenReturn(List.of(statsEntity)); + when(operationsRepo.find(OFD, null, null, DB, null)).thenReturn(Collections.emptyList()); when(historyRepo.findLatestPerTable(OFD)).thenReturn(Collections.emptyList()); when(analyzer.isEnabled(expectedTable)).thenReturn(false); - runner.analyze(); + runner.analyze(OFD_TYPE); verify(operationsRepo, never()).save(any()); } @@ -128,8 +130,9 @@ void analyze_skipsTable_whenNotEnabled() { void analyze_skipsTable_whenShouldScheduleReturnsFalse() { TableStatsRow statsEntity = new TableStatsRow(); statsEntity.setTableUuid("uuid-1"); + statsEntity.setDatabaseName(DB); - Table expectedTable = Table.builder().tableUuid("uuid-1").build(); + Table expectedTable = Table.builder().tableUuid("uuid-1").databaseName(DB).build(); TableOperationRow scheduled = new TableOperationRow(); scheduled.setId("op-id"); @@ -138,9 +141,8 @@ void analyze_skipsTable_whenShouldScheduleReturnsFalse() { scheduled.setOperationType(OFD); scheduled.setCreatedAt(Instant.now()); - when(statsRepo.find(null, null, null)).thenReturn(List.of(statsEntity)); - when(analyzer.getOperationType()).thenReturn(OperationType.ORPHAN_FILES_DELETION); - when(operationsRepo.find(OFD, null, null, null, null)).thenReturn(List.of(scheduled)); + when(statsRepo.find(DB, null, null)).thenReturn(List.of(statsEntity)); + when(operationsRepo.find(OFD, null, null, DB, null)).thenReturn(List.of(scheduled)); when(historyRepo.findLatestPerTable(OFD)).thenReturn(Collections.emptyList()); when(analyzer.isEnabled(expectedTable)).thenReturn(true); @@ -148,7 +150,7 @@ void analyze_skipsTable_whenShouldScheduleReturnsFalse() { when(analyzer.shouldSchedule(expectedTable, Optional.of(scheduledOp), Optional.empty())) .thenReturn(false); - runner.analyze(); + runner.analyze(OFD_TYPE); verify(operationsRepo, never()).save(any()); } @@ -157,13 +159,13 @@ void analyze_skipsTable_whenShouldScheduleReturnsFalse() { void analyze_skipsTable_whenTableUuidIsNull() { TableStatsRow statsEntity = new TableStatsRow(); statsEntity.setTableUuid(null); + statsEntity.setDatabaseName(DB); - when(statsRepo.find(null, null, null)).thenReturn(List.of(statsEntity)); - when(analyzer.getOperationType()).thenReturn(OperationType.ORPHAN_FILES_DELETION); - when(operationsRepo.find(OFD, null, null, null, null)).thenReturn(Collections.emptyList()); + when(statsRepo.find(DB, null, null)).thenReturn(List.of(statsEntity)); + when(operationsRepo.find(OFD, null, null, DB, null)).thenReturn(Collections.emptyList()); when(historyRepo.findLatestPerTable(anyString())).thenReturn(Collections.emptyList()); - runner.analyze(); + runner.analyze(OFD_TYPE); verify(operationsRepo, never()).save(any()); } diff --git a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzerTest.java b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzerTest.java index 9a847f34c..771707258 100644 --- a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzerTest.java +++ b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzerTest.java @@ -18,7 +18,6 @@ class CadenceBasedOrphanFilesDeletionAnalyzerTest { private static final Duration SUCCESS_INTERVAL = Duration.ofHours(24); private static final Duration FAILURE_INTERVAL = Duration.ofHours(1); - private static final Duration SCHEDULED_TIMEOUT = Duration.ofHours(6); private CadenceBasedOrphanFilesDeletionAnalyzer analyzer; @@ -26,7 +25,7 @@ class CadenceBasedOrphanFilesDeletionAnalyzerTest { void setUp() { analyzer = new CadenceBasedOrphanFilesDeletionAnalyzer( - new CadencePolicy(SUCCESS_INTERVAL, FAILURE_INTERVAL, SCHEDULED_TIMEOUT)); + new CadencePolicy(SUCCESS_INTERVAL, FAILURE_INTERVAL)); } // --- isEnabled --- @@ -105,14 +104,14 @@ void shouldSchedule_noOp_failedHistoryBeforeRetry_returnsFalse() { .isFalse(); } - // --- shouldSchedule: PENDING / SCHEDULING --- + // --- shouldSchedule: active op (non-CANCELED) → analyzer stays out --- @Test void shouldSchedule_pending_returnsFalse() { assertThat( analyzer.shouldSchedule( tableWithProperty("true"), - Optional.of(opWithStatus(OperationStatus.PENDING, null)), + Optional.of(opWithStatus(OperationStatus.PENDING)), Optional.empty())) .isFalse(); } @@ -122,93 +121,56 @@ void shouldSchedule_scheduling_returnsFalse() { assertThat( analyzer.shouldSchedule( tableWithProperty("true"), - Optional.of(opWithStatus(OperationStatus.SCHEDULING, null)), + Optional.of(opWithStatus(OperationStatus.SCHEDULING)), Optional.empty())) .isFalse(); } - // --- shouldSchedule: SCHEDULED + history --- - @Test - void shouldSchedule_scheduledNoHistory_withinTimeout_returnsFalse() { - Instant recent = Instant.now().minus(SCHEDULED_TIMEOUT).plusSeconds(60); + void shouldSchedule_scheduled_returnsFalse_regardlessOfHistory() { + Instant historyAt = Instant.now().minus(SUCCESS_INTERVAL).minusSeconds(60); assertThat( analyzer.shouldSchedule( tableWithProperty("true"), - Optional.of(opWithStatus(OperationStatus.SCHEDULED, recent)), - Optional.empty())) + Optional.of(opWithStatus(OperationStatus.SCHEDULED)), + Optional.of(historyWithStatus("SUCCESS", historyAt)))) .isFalse(); } - @Test - void shouldSchedule_scheduledNoHistory_pastTimeout_returnsTrue() { - Instant longAgo = Instant.now().minus(SCHEDULED_TIMEOUT).minusSeconds(60); - assertThat( - analyzer.shouldSchedule( - tableWithProperty("true"), - Optional.of(opWithStatus(OperationStatus.SCHEDULED, longAgo)), - Optional.empty())) - .isTrue(); - } - - @Test - void shouldSchedule_scheduledWithNullScheduledAt_noHistory_returnsTrue() { - assertThat( - analyzer.shouldSchedule( - tableWithProperty("true"), - Optional.of(opWithStatus(OperationStatus.SCHEDULED, null)), - Optional.empty())) - .isTrue(); - } + // --- shouldSchedule: CANCELED → cadence on history --- @Test - void shouldSchedule_scheduledWithSuccessHistory_afterCooldown_returnsTrue() { - Instant scheduledAt = Instant.now().minusSeconds(3600); - Instant historyAt = Instant.now().minus(SUCCESS_INTERVAL).minusSeconds(60); + void shouldSchedule_canceled_successHistoryAfterCooldown_returnsTrue() { + Instant longAgo = Instant.now().minus(SUCCESS_INTERVAL).minusSeconds(60); assertThat( analyzer.shouldSchedule( tableWithProperty("true"), - Optional.of(opWithStatus(OperationStatus.SCHEDULED, scheduledAt)), - Optional.of(historyWithStatus("SUCCESS", historyAt)))) + Optional.of(opWithStatus(OperationStatus.CANCELED)), + Optional.of(historyWithStatus("SUCCESS", longAgo)))) .isTrue(); } @Test - void shouldSchedule_scheduledWithSuccessHistory_beforeCooldown_returnsFalse() { - Instant scheduledAt = Instant.now().minusSeconds(3600); - Instant historyAt = Instant.now().minus(SUCCESS_INTERVAL).plusSeconds(60); + void shouldSchedule_canceled_successHistoryBeforeCooldown_returnsFalse() { + Instant recent = Instant.now().minus(SUCCESS_INTERVAL).plusSeconds(60); assertThat( analyzer.shouldSchedule( tableWithProperty("true"), - Optional.of(opWithStatus(OperationStatus.SCHEDULED, scheduledAt)), - Optional.of(historyWithStatus("SUCCESS", historyAt)))) + Optional.of(opWithStatus(OperationStatus.CANCELED)), + Optional.of(historyWithStatus("SUCCESS", recent)))) .isFalse(); } @Test - void shouldSchedule_scheduledWithFailedHistory_afterRetry_returnsTrue() { - Instant scheduledAt = Instant.now().minusSeconds(3600); - Instant historyAt = Instant.now().minus(FAILURE_INTERVAL).minusSeconds(60); + void shouldSchedule_canceled_noHistory_returnsTrue() { assertThat( analyzer.shouldSchedule( tableWithProperty("true"), - Optional.of(opWithStatus(OperationStatus.SCHEDULED, scheduledAt)), - Optional.of(historyWithStatus("FAILED", historyAt)))) + Optional.of(opWithStatus(OperationStatus.CANCELED)), + Optional.empty())) .isTrue(); } - @Test - void shouldSchedule_scheduledWithFailedHistory_beforeRetry_returnsFalse() { - Instant scheduledAt = Instant.now().minusSeconds(3600); - Instant historyAt = Instant.now().minus(FAILURE_INTERVAL).plusSeconds(60); - assertThat( - analyzer.shouldSchedule( - tableWithProperty("true"), - Optional.of(opWithStatus(OperationStatus.SCHEDULED, scheduledAt)), - Optional.of(historyWithStatus("FAILED", historyAt)))) - .isFalse(); - } - // --- helpers --- private Table tableWithProperty(String value) { @@ -224,11 +186,8 @@ private Table tableWithProperty(String value) { .build(); } - private TableOperation opWithStatus(OperationStatus status, Instant scheduledAt) { - TableOperation op = new TableOperation(); - op.setStatus(status); - op.setScheduledAt(scheduledAt); - return op; + private TableOperation opWithStatus(OperationStatus status) { + return TableOperation.builder().status(status).build(); } private TableOperationHistoryRow historyWithStatus(String status, Instant completedAt) { From 91ba36241499e5c0791d3e8da2670cf1b2d29e41 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 13 May 2026 13:21:39 -0700 Subject: [PATCH 034/104] style(optimizer-analyzer): tighten AnalyzerRunner.analyze body MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drop the blank lines inside analyze() between statements (post-signature, post-dbs, post-forEach). Also remove the explanatory comment above latestHistory — the variable name is self-describing and the tied-rows edge case it warned about doesn't affect correctness here. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../com/linkedin/openhouse/analyzer/AnalyzerRunner.java | 6 ------ 1 file changed, 6 deletions(-) diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java index dfc605f50..1ab40b757 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java @@ -56,7 +56,6 @@ public void analyze( Optional databaseName, Optional tableName, Optional tableUuid) { - Optional analyzerOpt = analyzers.stream().filter(a -> a.getOperationType() == operationType).findFirst(); if (analyzerOpt.isEmpty()) { @@ -64,12 +63,9 @@ public void analyze( return; } OperationAnalyzer analyzer = analyzerOpt.get(); - List dbs = databaseName.map(List::of).orElseGet(statsRepo::findDistinctDatabaseNames); log.info("Analyzing {} across {} database(s)", operationType, dbs.size()); - dbs.forEach(db -> analyzeDatabase(analyzer, db, tableName, tableUuid)); - log.info("Analysis complete for {}", operationType); } @@ -93,8 +89,6 @@ private void analyzeDatabase( TableOperation::from, TableOperation::mostRecent)); - // Latest history row per (table_uuid, op_type) for this analyzer. The repo query may return - // tied rows on identical completed_at; dedupe in memory. Map latestHistory = historyRepo.findLatestPerTable(operationType).stream() .filter(r -> r.getTableUuid() != null) From eba1392e44d9170a3f0a484a7d631c1683cccf91 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 13 May 2026 17:01:41 -0700 Subject: [PATCH 035/104] feat(optimizer): promote internal model types to shared apps/optimizer Move Table, TableOperation, OperationType, OperationStatus, HistoryStatus from the analyzer-internal package into the shared apps/optimizer module. The scheduler will consume the same domain types as the analyzer. Per-layer types still hold (wire-API, internal model, DB each define their own representation); this just consolidates the internal layer so multiple internal consumers (analyzer, scheduler) share one set of classes. TableOperation gains a nullable, non-persisted fileCount field. Consumers that need it (OFD bin-packing) populate it at read time from table_stats; the DB row does not carry it. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../optimizer/model/HistoryStatus.java | 13 +++ .../optimizer/model/OperationStatus.java | 15 +++ .../optimizer/model/OperationType.java | 10 ++ .../openhouse/optimizer/model/Table.java | 41 +++++++ .../optimizer/model/TableOperation.java | 106 ++++++++++++++++++ 5 files changed, 185 insertions(+) create mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java create mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java create mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java create mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java create mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java new file mode 100644 index 000000000..d29c88719 --- /dev/null +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java @@ -0,0 +1,13 @@ +package com.linkedin.openhouse.optimizer.model; + +/** + * Internal lifecycle outcomes for a completed operation. Mirrors the values written to {@code + * table_operations_history.status}; parsed at the boundary so callers switch on a typed value + * instead of comparing strings. + * + *

Intentionally separate from the wire-API and DB representations. + */ +public enum HistoryStatus { + SUCCESS, + FAILED +} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java new file mode 100644 index 000000000..66f213c73 --- /dev/null +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java @@ -0,0 +1,15 @@ +package com.linkedin.openhouse.optimizer.model; + +/** + * Internal lifecycle states for an operation. The analyzer writes {@link #PENDING}; the scheduler + * transitions through {@link #SCHEDULING} and {@link #SCHEDULED}. {@link #CANCELED} marks + * deduplicated PENDING rows. + * + *

Intentionally separate from the wire-API and DB representations. + */ +public enum OperationStatus { + PENDING, + SCHEDULING, + SCHEDULED, + CANCELED +} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java new file mode 100644 index 000000000..bea44018b --- /dev/null +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java @@ -0,0 +1,10 @@ +package com.linkedin.openhouse.optimizer.model; + +/** + * Internal enum for the operation types the analyzer and scheduler know about. Intentionally + * separate from the wire-API and DB representations so the internal model can evolve its set of + * supported operations without churning either boundary. + */ +public enum OperationType { + ORPHAN_FILES_DELETION +} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java new file mode 100644 index 000000000..e232803dd --- /dev/null +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java @@ -0,0 +1,41 @@ +package com.linkedin.openhouse.optimizer.model; + +import com.linkedin.openhouse.optimizer.entity.TableStatsRow; +import java.util.Collections; +import java.util.Map; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** + * An OpenHouse table enriched with stats and properties, built by combining data sources. Consumed + * by the analyzer (decides whether to produce a {@link TableOperation}) and the scheduler (reads + * stats for bin-packing). + */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class Table { + + private String tableUuid; + private String databaseName; + private String tableId; + + @Builder.Default private Map tableProperties = Collections.emptyMap(); + + private TableStats stats; + + /** Build a {@code Table} from a {@code table_stats} row. */ + public static Table from(TableStatsRow row) { + return Table.builder() + .tableUuid(row.getTableUuid()) + .databaseName(row.getDatabaseName()) + .tableId(row.getTableName()) + .tableProperties( + row.getTableProperties() != null ? row.getTableProperties() : Collections.emptyMap()) + .stats(row.getStats()) + .build(); + } +} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java new file mode 100644 index 000000000..d1390ee79 --- /dev/null +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java @@ -0,0 +1,106 @@ +package com.linkedin.openhouse.optimizer.model; + +import com.linkedin.openhouse.optimizer.entity.TableOperationRow; +import java.time.Instant; +import java.util.Comparator; +import java.util.UUID; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** + * An operation the analyzer has decided to schedule for a table, and that the scheduler later picks + * up and submits. Built either from an existing {@link TableOperationRow} (when loading current + * state) or from a {@link Table} (when creating a new PENDING operation). Converts back to a JPA + * row via {@link #toRow()}. + * + *

{@link #fileCount} is a non-persisted enrichment populated by consumers that need it (e.g., + * the OFD scheduler reads it from {@code table_stats} for bin-packing). The DB column does not + * carry it. + */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class TableOperation { + + /** Unique operation ID (UUID). */ + private String id; + + /** The table this operation targets. */ + private String tableUuid; + + /** Database name. */ + private String databaseName; + + /** Table name. */ + private String tableName; + + /** Operation type. */ + private OperationType operationType; + + /** Current lifecycle status. */ + private OperationStatus status; + + /** When this operation record was created. */ + private Instant createdAt; + + /** When the scheduler last submitted a job for this operation. */ + private Instant scheduledAt; + + /** + * Number of current data files on the table at evaluation time. Non-persisted enrichment; + * populated by consumers that need it. Null when not enriched. + */ + private Long fileCount; + + /** Build a {@code TableOperation} from an existing JPA row. */ + public static TableOperation from(TableOperationRow row) { + return TableOperation.builder() + .id(row.getId()) + .tableUuid(row.getTableUuid()) + .databaseName(row.getDatabaseName()) + .tableName(row.getTableName()) + .operationType(OperationType.valueOf(row.getOperationType())) + .status(OperationStatus.valueOf(row.getStatus())) + .createdAt(row.getCreatedAt()) + .scheduledAt(row.getScheduledAt()) + .build(); + } + + /** Create a new PENDING operation for the given table and operation type. */ + public static TableOperation pending(Table table, OperationType operationType) { + return TableOperation.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(table.getTableUuid()) + .databaseName(table.getDatabaseName()) + .tableName(table.getTableId()) + .operationType(operationType) + .status(OperationStatus.PENDING) + .createdAt(Instant.now()) + .build(); + } + + /** Convert to a JPA entity for persistence. */ + public TableOperationRow toRow() { + return TableOperationRow.builder() + .id(id) + .tableUuid(tableUuid) + .databaseName(databaseName) + .tableName(tableName) + .operationType(operationType.name()) + .status(status.name()) + .createdAt(createdAt) + .scheduledAt(scheduledAt) + .version(0L) + .build(); + } + + /** Return the more recently created of two operations. */ + public static TableOperation mostRecent(TableOperation a, TableOperation b) { + Comparator byCreatedAt = + Comparator.comparing(r -> r.getCreatedAt() != null ? r.getCreatedAt() : Instant.EPOCH); + return byCreatedAt.compare(a, b) >= 0 ? a : b; + } +} From 0dbe3d9a5c82b5be36158e2da7cd10b0ed22122f Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 13 May 2026 17:04:07 -0700 Subject: [PATCH 036/104] refactor(optimizer-analyzer): import shared model + explanatory comment Move imports from com.linkedin.openhouse.analyzer.model to the shared com.linkedin.openhouse.optimizer.model package. The five local copies (Table, TableOperation, OperationType, OperationStatus, HistoryStatus) are removed; tests and runtime classes import from the shared location. Adds a /* ... */ block comment above the per-table loop in AnalyzerRunner.analyzeDatabase walking through what each step does: opt-in check, current-op and latest-history lookup, delegation to shouldSchedule, and PENDING persistence on true. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../openhouse/analyzer/AnalyzerRunner.java | 18 +++- ...denceBasedOrphanFilesDeletionAnalyzer.java | 6 +- .../openhouse/analyzer/CadencePolicy.java | 6 +- .../openhouse/analyzer/OperationAnalyzer.java | 6 +- .../analyzer/model/HistoryStatus.java | 13 --- .../analyzer/model/OperationStatus.java | 14 --- .../analyzer/model/OperationType.java | 10 -- .../openhouse/analyzer/model/Table.java | 42 -------- .../analyzer/model/TableOperation.java | 95 ------------------- .../analyzer/AnalyzerRunnerTest.java | 6 +- ...eBasedOrphanFilesDeletionAnalyzerTest.java | 6 +- 11 files changed, 29 insertions(+), 193 deletions(-) delete mode 100644 apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/HistoryStatus.java delete mode 100644 apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/OperationStatus.java delete mode 100644 apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/OperationType.java delete mode 100644 apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/Table.java delete mode 100644 apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/TableOperation.java diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java index 1ab40b757..ae865b11e 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java @@ -1,10 +1,10 @@ package com.linkedin.openhouse.analyzer; -import com.linkedin.openhouse.analyzer.model.OperationType; -import com.linkedin.openhouse.analyzer.model.Table; -import com.linkedin.openhouse.analyzer.model.TableOperation; import com.linkedin.openhouse.optimizer.entity.TableOperationHistoryRow; import com.linkedin.openhouse.optimizer.entity.TableOperationRow; +import com.linkedin.openhouse.optimizer.model.OperationType; +import com.linkedin.openhouse.optimizer.model.Table; +import com.linkedin.openhouse.optimizer.model.TableOperation; import com.linkedin.openhouse.optimizer.repository.TableOperationHistoryRepository; import com.linkedin.openhouse.optimizer.repository.TableOperationsRepository; import com.linkedin.openhouse.optimizer.repository.TableStatsRepository; @@ -104,6 +104,17 @@ private void analyzeDatabase( .map(Table::from) .collect(Collectors.toList()); + /* + * For each table in this database, decide whether to create a new PENDING operation. + * + * 1. Skip tables not opted in to this operation type. The opt-in check today reads a + * table-property flag; in the future it will read a denormalized column. + * 2. Look up the table's current active operation (if any) and its most recent completed + * history entry from the maps loaded above. + * 3. Delegate the schedule-or-not decision to the analyzer's shouldSchedule — strategy + * encapsulates cadence, retry policy, and any future per-operation signals. + * 4. On true, persist a new PENDING operation. The scheduler picks it up on its next pass. + */ tables.forEach( table -> { if (!analyzer.isEnabled(table)) { @@ -113,7 +124,6 @@ private void analyzeDatabase( Optional.ofNullable(currentOps.get(table.getTableUuid())); Optional entry = Optional.ofNullable(latestHistory.get(table.getTableUuid())); - if (analyzer.shouldSchedule(table, currentOp, entry)) { TableOperation op = TableOperation.pending(table, analyzer.getOperationType()); operationsRepo.save(op.toRow()); diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzer.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzer.java index e66bc070d..7f6a0b68b 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzer.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzer.java @@ -1,9 +1,9 @@ package com.linkedin.openhouse.analyzer; -import com.linkedin.openhouse.analyzer.model.OperationType; -import com.linkedin.openhouse.analyzer.model.Table; -import com.linkedin.openhouse.analyzer.model.TableOperation; import com.linkedin.openhouse.optimizer.entity.TableOperationHistoryRow; +import com.linkedin.openhouse.optimizer.model.OperationType; +import com.linkedin.openhouse.optimizer.model.Table; +import com.linkedin.openhouse.optimizer.model.TableOperation; import java.time.Duration; import java.util.Optional; import org.springframework.beans.factory.annotation.Autowired; diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadencePolicy.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadencePolicy.java index 7aa646cf6..b95dadc5b 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadencePolicy.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadencePolicy.java @@ -1,9 +1,9 @@ package com.linkedin.openhouse.analyzer; -import com.linkedin.openhouse.analyzer.model.HistoryStatus; -import com.linkedin.openhouse.analyzer.model.OperationStatus; -import com.linkedin.openhouse.analyzer.model.TableOperation; import com.linkedin.openhouse.optimizer.entity.TableOperationHistoryRow; +import com.linkedin.openhouse.optimizer.model.HistoryStatus; +import com.linkedin.openhouse.optimizer.model.OperationStatus; +import com.linkedin.openhouse.optimizer.model.TableOperation; import java.time.Duration; import java.time.Instant; import java.util.Optional; diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OperationAnalyzer.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OperationAnalyzer.java index 33f2b8e5d..b301f9d09 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OperationAnalyzer.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OperationAnalyzer.java @@ -1,9 +1,9 @@ package com.linkedin.openhouse.analyzer; -import com.linkedin.openhouse.analyzer.model.OperationType; -import com.linkedin.openhouse.analyzer.model.Table; -import com.linkedin.openhouse.analyzer.model.TableOperation; import com.linkedin.openhouse.optimizer.entity.TableOperationHistoryRow; +import com.linkedin.openhouse.optimizer.model.OperationType; +import com.linkedin.openhouse.optimizer.model.Table; +import com.linkedin.openhouse.optimizer.model.TableOperation; import java.util.Optional; /** diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/HistoryStatus.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/HistoryStatus.java deleted file mode 100644 index eb0e46762..000000000 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/HistoryStatus.java +++ /dev/null @@ -1,13 +0,0 @@ -package com.linkedin.openhouse.analyzer.model; - -/** - * Analyzer-internal lifecycle outcomes for a completed operation. Mirrors the values written to - * {@code table_operations_history.status}; parsed at the boundary so the analyzer can switch on a - * typed value instead of comparing strings. - * - *

Intentionally separate from the wire-API and DB representations. - */ -public enum HistoryStatus { - SUCCESS, - FAILED -} diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/OperationStatus.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/OperationStatus.java deleted file mode 100644 index 8a2d1d541..000000000 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/OperationStatus.java +++ /dev/null @@ -1,14 +0,0 @@ -package com.linkedin.openhouse.analyzer.model; - -/** - * Analyzer-internal lifecycle states. The analyzer only writes {@link #PENDING}; the other values - * are read off existing rows when deciding whether to re-issue a recommendation. - * - *

Intentionally separate from the wire-API and DB representations. - */ -public enum OperationStatus { - PENDING, - SCHEDULING, - SCHEDULED, - CANCELED -} diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/OperationType.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/OperationType.java deleted file mode 100644 index da48bb459..000000000 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/OperationType.java +++ /dev/null @@ -1,10 +0,0 @@ -package com.linkedin.openhouse.analyzer.model; - -/** - * Analyzer-internal enum for the operation types this app knows how to schedule. Intentionally - * separate from the wire-API and DB representations so the analyzer can evolve its set of supported - * operations without churning either boundary. - */ -public enum OperationType { - ORPHAN_FILES_DELETION -} diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/Table.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/Table.java deleted file mode 100644 index 45e02fd60..000000000 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/Table.java +++ /dev/null @@ -1,42 +0,0 @@ -package com.linkedin.openhouse.analyzer.model; - -import com.linkedin.openhouse.optimizer.entity.TableStatsRow; -import com.linkedin.openhouse.optimizer.model.TableStats; -import java.util.Collections; -import java.util.Map; -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.Data; -import lombok.NoArgsConstructor; - -/** - * An OpenHouse table enriched with stats and properties, built by combining data sources. This is - * the input to the analysis pipeline: analyzers evaluate a {@code Table} and decide whether to - * produce a {@link TableOperation}. - */ -@Data -@Builder -@NoArgsConstructor -@AllArgsConstructor -public class Table { - - private String tableUuid; - private String databaseName; - private String tableId; - - @Builder.Default private Map tableProperties = Collections.emptyMap(); - - private TableStats stats; - - /** Build a {@code Table} from a {@code table_stats} row. */ - public static Table from(TableStatsRow row) { - return Table.builder() - .tableUuid(row.getTableUuid()) - .databaseName(row.getDatabaseName()) - .tableId(row.getTableName()) - .tableProperties( - row.getTableProperties() != null ? row.getTableProperties() : Collections.emptyMap()) - .stats(row.getStats()) - .build(); - } -} diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/TableOperation.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/TableOperation.java deleted file mode 100644 index 3de08b9c5..000000000 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/model/TableOperation.java +++ /dev/null @@ -1,95 +0,0 @@ -package com.linkedin.openhouse.analyzer.model; - -import com.linkedin.openhouse.optimizer.entity.TableOperationRow; -import java.time.Instant; -import java.util.Comparator; -import java.util.UUID; -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.Data; -import lombok.NoArgsConstructor; - -/** - * An operation the analyzer has decided to schedule for a table. Built either from an existing - * {@link TableOperationRow} (when loading current state) or from a {@link Table} (when creating a - * new PENDING operation). Converts back to a JPA row via {@link #toRow()}. - */ -@Data -@Builder -@NoArgsConstructor -@AllArgsConstructor -public class TableOperation { - - /** Unique operation ID (UUID). */ - private String id; - - /** The table this operation targets. */ - private String tableUuid; - - /** Database name. */ - private String databaseName; - - /** Table name. */ - private String tableName; - - /** Operation type. */ - private OperationType operationType; - - /** Current lifecycle status. */ - private OperationStatus status; - - /** When this operation record was created. */ - private Instant createdAt; - - /** When the scheduler last submitted a job for this operation. */ - private Instant scheduledAt; - - /** Build a {@code TableOperation} from an existing JPA row. */ - public static TableOperation from(TableOperationRow row) { - return TableOperation.builder() - .id(row.getId()) - .tableUuid(row.getTableUuid()) - .databaseName(row.getDatabaseName()) - .tableName(row.getTableName()) - .operationType(OperationType.valueOf(row.getOperationType())) - .status(OperationStatus.valueOf(row.getStatus())) - .createdAt(row.getCreatedAt()) - .scheduledAt(row.getScheduledAt()) - .build(); - } - - /** Create a new PENDING operation for the given table and operation type. */ - public static TableOperation pending(Table table, OperationType operationType) { - return TableOperation.builder() - .id(UUID.randomUUID().toString()) - .tableUuid(table.getTableUuid()) - .databaseName(table.getDatabaseName()) - .tableName(table.getTableId()) - .operationType(operationType) - .status(OperationStatus.PENDING) - .createdAt(Instant.now()) - .build(); - } - - /** Convert to a JPA entity for persistence. */ - public TableOperationRow toRow() { - return TableOperationRow.builder() - .id(id) - .tableUuid(tableUuid) - .databaseName(databaseName) - .tableName(tableName) - .operationType(operationType.name()) - .status(status.name()) - .createdAt(createdAt) - .scheduledAt(scheduledAt) - .version(0L) - .build(); - } - - /** Return the more recently created of two operations. */ - public static TableOperation mostRecent(TableOperation a, TableOperation b) { - Comparator byCreatedAt = - Comparator.comparing(r -> r.getCreatedAt() != null ? r.getCreatedAt() : Instant.EPOCH); - return byCreatedAt.compare(a, b) >= 0 ? a : b; - } -} diff --git a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java index 9734a329a..0d287fccf 100644 --- a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java +++ b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java @@ -7,11 +7,11 @@ import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; -import com.linkedin.openhouse.analyzer.model.OperationType; -import com.linkedin.openhouse.analyzer.model.Table; -import com.linkedin.openhouse.analyzer.model.TableOperation; import com.linkedin.openhouse.optimizer.entity.TableOperationRow; import com.linkedin.openhouse.optimizer.entity.TableStatsRow; +import com.linkedin.openhouse.optimizer.model.OperationType; +import com.linkedin.openhouse.optimizer.model.Table; +import com.linkedin.openhouse.optimizer.model.TableOperation; import com.linkedin.openhouse.optimizer.repository.TableOperationHistoryRepository; import com.linkedin.openhouse.optimizer.repository.TableOperationsRepository; import com.linkedin.openhouse.optimizer.repository.TableStatsRepository; diff --git a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzerTest.java b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzerTest.java index 771707258..af7100357 100644 --- a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzerTest.java +++ b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzerTest.java @@ -2,10 +2,10 @@ import static org.assertj.core.api.Assertions.assertThat; -import com.linkedin.openhouse.analyzer.model.OperationStatus; -import com.linkedin.openhouse.analyzer.model.Table; -import com.linkedin.openhouse.analyzer.model.TableOperation; import com.linkedin.openhouse.optimizer.entity.TableOperationHistoryRow; +import com.linkedin.openhouse.optimizer.model.OperationStatus; +import com.linkedin.openhouse.optimizer.model.Table; +import com.linkedin.openhouse.optimizer.model.TableOperation; import java.time.Duration; import java.time.Instant; import java.util.Collections; From e57659391cc238cc4609682af943843502d8b9b8 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 13 May 2026 17:51:44 -0700 Subject: [PATCH 037/104] refactor(optimizer): rename apps/optimizer entities + repos to plural; add TableStatsHistory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Aligns apps/optimizer with the SQL table names (table_operations, table_operations_history) and the existing services/optimizer convention: - TableOperationRow → TableOperationsRow - TableOperationHistoryRow → TableOperationsHistoryRow - TableOperationHistoryRepository → TableOperationsHistoryRepository Adds the missing TableStatsHistoryRow + TableStatsHistoryRepository so apps/optimizer is a complete entity set covering all four optimizer DB tables. services/optimizer will consume these in a follow-up commit on optimizer-2 (the services-side duplicates will be deleted). Adds an explanatory javadoc on TableOperationsRow.version documenting the application-level optimistic-concurrency-control role used by the scheduler's CAS transitions (resolves PR #530 thread 3231557313). Co-Authored-By: Claude Opus 4.7 (1M context) --- ...ow.java => TableOperationsHistoryRow.java} | 2 +- ...rationRow.java => TableOperationsRow.java} | 10 ++- .../entity/TableStatsHistoryRow.java | 61 +++++++++++++++++++ .../optimizer/model/TableOperation.java | 10 +-- ... => TableOperationsHistoryRepository.java} | 16 ++--- .../repository/TableOperationsRepository.java | 8 +-- .../TableStatsHistoryRepository.java | 29 +++++++++ 7 files changed, 116 insertions(+), 20 deletions(-) rename apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/{TableOperationHistoryRow.java => TableOperationsHistoryRow.java} (96%) rename apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/{TableOperationRow.java => TableOperationsRow.java} (71%) create mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java rename apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/{TableOperationHistoryRepository.java => TableOperationsHistoryRepository.java} (79%) create mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationHistoryRow.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java similarity index 96% rename from apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationHistoryRow.java rename to apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java index d15eb6785..e5ff2bd01 100644 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationHistoryRow.java +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java @@ -17,7 +17,7 @@ @Builder @NoArgsConstructor @AllArgsConstructor -public class TableOperationHistoryRow { +public class TableOperationsHistoryRow { @Id @Column(name = "id", nullable = false, length = 36) diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationRow.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java similarity index 71% rename from apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationRow.java rename to apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java index 33a83bd3f..0e23761ae 100644 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationRow.java +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java @@ -19,7 +19,7 @@ @Builder @NoArgsConstructor @AllArgsConstructor -public class TableOperationRow { +public class TableOperationsRow { @Id @Column(name = "id", nullable = false, length = 36) @@ -49,7 +49,13 @@ public class TableOperationRow { @Column(name = "job_id", length = 255) private String jobId; - /** Plain version column — not managed by JPA optimistic locking. */ + /** + * Monotonically-increasing version for application-level optimistic concurrency control. The + * scheduler's CAS transitions (e.g. {@code markScheduling}, {@code markScheduled}) match this + * value in the WHERE clause and bump it by one on UPDATE, ensuring two scheduler instances can't + * both move the same row out of PENDING. Not managed by JPA optimistic locking — kept as a plain + * column so the WHERE-clause-based CAS pattern works portably across MySQL and H2. + */ @Column(name = "version") private Long version; } diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java new file mode 100644 index 000000000..6f41881d6 --- /dev/null +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java @@ -0,0 +1,61 @@ +package com.linkedin.openhouse.optimizer.entity; + +import com.linkedin.openhouse.optimizer.model.TableStats; +import com.vladmihalcea.hibernate.type.json.JsonStringType; +import java.time.Instant; +import javax.persistence.Column; +import javax.persistence.Entity; +import javax.persistence.Id; +import javax.persistence.Index; +import javax.persistence.Table; +import lombok.AccessLevel; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.NoArgsConstructor; +import org.hibernate.annotations.Type; +import org.hibernate.annotations.TypeDef; + +/** + * Append-only record of per-commit stats reported by the Tables Service. + * + *

Each Iceberg commit produces one row. The {@code stats} JSON contains both the snapshot + * metrics (point-in-time) and the commit delta (files added/deleted in this commit). Consumers + * query this table to reconstruct change rates over arbitrary time windows. + */ +@TypeDef(name = "json", typeClass = JsonStringType.class) +@Entity +@Table( + name = "table_stats_history", + indexes = { + @Index(name = "idx_tsh_table_uuid", columnList = "table_uuid"), + @Index(name = "idx_tsh_recorded_at", columnList = "recorded_at") + }) +@Getter +@EqualsAndHashCode +@Builder(toBuilder = true) +@NoArgsConstructor(access = AccessLevel.PROTECTED) +@AllArgsConstructor(access = AccessLevel.PROTECTED) +public class TableStatsHistoryRow { + + @Id + @Column(name = "id", nullable = false, length = 36) + private String id; + + @Column(name = "table_uuid", nullable = false, length = 36) + private String tableUuid; + + @Column(name = "database_name", nullable = false, length = 128) + private String databaseName; + + @Column(name = "table_name", nullable = false, length = 128) + private String tableName; + + @Type(type = "json") + @Column(name = "stats", columnDefinition = "TEXT") + private TableStats stats; + + @Column(name = "recorded_at", nullable = false) + private Instant recordedAt; +} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java index d1390ee79..d49625a57 100644 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java @@ -1,6 +1,6 @@ package com.linkedin.openhouse.optimizer.model; -import com.linkedin.openhouse.optimizer.entity.TableOperationRow; +import com.linkedin.openhouse.optimizer.entity.TableOperationsRow; import java.time.Instant; import java.util.Comparator; import java.util.UUID; @@ -11,7 +11,7 @@ /** * An operation the analyzer has decided to schedule for a table, and that the scheduler later picks - * up and submits. Built either from an existing {@link TableOperationRow} (when loading current + * up and submits. Built either from an existing {@link TableOperationsRow} (when loading current * state) or from a {@link Table} (when creating a new PENDING operation). Converts back to a JPA * row via {@link #toRow()}. * @@ -56,7 +56,7 @@ public class TableOperation { private Long fileCount; /** Build a {@code TableOperation} from an existing JPA row. */ - public static TableOperation from(TableOperationRow row) { + public static TableOperation from(TableOperationsRow row) { return TableOperation.builder() .id(row.getId()) .tableUuid(row.getTableUuid()) @@ -83,8 +83,8 @@ public static TableOperation pending(Table table, OperationType operationType) { } /** Convert to a JPA entity for persistence. */ - public TableOperationRow toRow() { - return TableOperationRow.builder() + public TableOperationsRow toRow() { + return TableOperationsRow.builder() .id(id) .tableUuid(tableUuid) .databaseName(databaseName) diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java similarity index 79% rename from apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java rename to apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java index 26166271f..f8fe90b0c 100644 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationHistoryRepository.java +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java @@ -1,6 +1,6 @@ package com.linkedin.openhouse.optimizer.repository; -import com.linkedin.openhouse.optimizer.entity.TableOperationHistoryRow; +import com.linkedin.openhouse.optimizer.entity.TableOperationsHistoryRow; import java.time.Instant; import java.util.List; import org.springframework.data.domain.Pageable; @@ -9,21 +9,21 @@ import org.springframework.data.repository.query.Param; /** Repository for reading {@code table_operations_history} in the Analyzer. */ -public interface TableOperationHistoryRepository - extends JpaRepository { +public interface TableOperationsHistoryRepository + extends JpaRepository { /** * Return history rows matching the given filters, ordered by {@code completedAt} descending. * Every parameter is optional — pass {@code null} to skip that filter. */ @Query( - "SELECT r FROM TableOperationHistoryRow r " + "SELECT r FROM TableOperationsHistoryRow r " + "WHERE (:operationType IS NULL OR r.operationType = :operationType) " + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid) " + "AND (:status IS NULL OR r.status = :status) " + "AND (:since IS NULL OR r.completedAt >= :since) " + "ORDER BY r.completedAt DESC") - List find( + List find( @Param("operationType") String operationType, @Param("tableUuid") String tableUuid, @Param("status") String status, @@ -43,10 +43,10 @@ List find( * tied rows; callers should dedupe in memory. */ @Query( - "SELECT r FROM TableOperationHistoryRow r " + "SELECT r FROM TableOperationsHistoryRow r " + "WHERE r.operationType = :operationType " + "AND r.completedAt = (" - + " SELECT MAX(r2.completedAt) FROM TableOperationHistoryRow r2 " + + " SELECT MAX(r2.completedAt) FROM TableOperationsHistoryRow r2 " + " WHERE r2.tableUuid = r.tableUuid AND r2.operationType = r.operationType)") - List findLatestPerTable(@Param("operationType") String operationType); + List findLatestPerTable(@Param("operationType") String operationType); } diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java index 27424dfdc..c7a08cabc 100644 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java @@ -1,26 +1,26 @@ package com.linkedin.openhouse.optimizer.repository; -import com.linkedin.openhouse.optimizer.entity.TableOperationRow; +import com.linkedin.openhouse.optimizer.entity.TableOperationsRow; import java.util.List; import org.springframework.data.jpa.repository.JpaRepository; import org.springframework.data.jpa.repository.Query; import org.springframework.data.repository.query.Param; /** Spring Data JPA repository for {@code table_operations} rows in the optimizer DB. */ -public interface TableOperationsRepository extends JpaRepository { +public interface TableOperationsRepository extends JpaRepository { /** * Return operations matching the given filters. Every parameter is optional — pass {@code null} * to skip that filter. */ @Query( - "SELECT r FROM TableOperationRow r " + "SELECT r FROM TableOperationsRow r " + "WHERE (:operationType IS NULL OR r.operationType = :operationType) " + "AND (:status IS NULL OR r.status = :status) " + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid) " + "AND (:databaseName IS NULL OR r.databaseName = :databaseName) " + "AND (:tableName IS NULL OR r.tableName = :tableName)") - List find( + List find( @Param("operationType") String operationType, @Param("status") String status, @Param("tableUuid") String tableUuid, diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java new file mode 100644 index 000000000..aaa1b0050 --- /dev/null +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java @@ -0,0 +1,29 @@ +package com.linkedin.openhouse.optimizer.repository; + +import com.linkedin.openhouse.optimizer.entity.TableStatsHistoryRow; +import java.time.Instant; +import java.util.List; +import org.springframework.data.domain.Pageable; +import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.data.jpa.repository.Query; +import org.springframework.data.repository.query.Param; + +/** Append-only repository for per-commit stats history rows. */ +public interface TableStatsHistoryRepository extends JpaRepository { + + /** + * Return history rows for a table, newest first. Pass {@code null} for {@code since} to skip the + * time filter. + * + * @param tableUuid the stable table UUID + * @param since inclusive lower bound on recorded_at; {@code null} to skip + * @param pageable use {@code PageRequest.of(0, limit)} to cap results + */ + @Query( + "SELECT r FROM TableStatsHistoryRow r " + + "WHERE r.tableUuid = :tableUuid " + + "AND (:since IS NULL OR r.recordedAt >= :since) " + + "ORDER BY r.recordedAt DESC") + List find( + @Param("tableUuid") String tableUuid, @Param("since") Instant since, Pageable pageable); +} From 6f98e1ad65aba584d4d87e9df9ee0a911b451261 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 13 May 2026 18:07:09 -0700 Subject: [PATCH 038/104] refactor(optimizer): consolidate entities/repos into apps/optimizer; rename wire HistoryStatus MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit services/optimizer no longer maintains its own JPA entities or Spring Data repositories for the four optimizer DB tables. Apps/optimizer is the single source of truth; services/optimizer depends on apps/optimizer (renamed via project(':apps:optimizer').name = 'optimizer-data' to dodge the project.name collision that previously caused a self-referential dependency error). Removed from services/optimizer: - entity/{TableOperationsRow, TableOperationsHistoryRow, TableStatsRow, TableStatsHistoryRow}.java - repository/{TableOperationsRepository, TableOperationsHistoryRepository, TableStatsRepository, TableStatsHistoryRepository}.java - api/model/TableStats.java (duplicate) - api/model/OperationHistoryStatus.java (renamed → HistoryStatus to match the internal enum naming) - config/JobResultConverter.java (no longer needed — entity stores result as raw String JSON and the mapper converts at the wire boundary) Added on apps-side: - TableStatsHistoryRow + TableStatsHistoryRepository (previously only on services-side) - jobId + result fields on TableOperationsHistoryRow so it covers all services-side use cases - find(...) on TableOperationsHistoryRepository extended to the 8-filter service-layer shape (databaseName, tableName, tableUuid, operationType, status, since, until, pageable) - toBuilder = true on TableStatsRow so OptimizerDataServiceImpl.upsertTableStats can use the existing.toBuilder() pattern Mapper updates: - OptimizerMapper gains String ↔ wire-enum helpers and a JSON ↔ JobResult pair (replaces the old JPA AttributeConverter approach). - OptimizerDataServiceImpl unwraps Optional filters via .name() before calling the now-shared apps-side repos. Tests updated to match: entity-builder calls pass enum.name() Strings; repo.find(...) args reordered to apps-side (operationType, status, tableUuid, databaseName, tableName); JobResult.builder() in test fixtures replaced with literal JSON strings to match the String-typed result column. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../entity/TableOperationsHistoryRow.java | 6 ++ .../optimizer/entity/TableStatsRow.java | 2 +- .../TableOperationsHistoryRepository.java | 10 ++- services/optimizer/build.gradle | 1 + .../TableOperationsHistoryController.java | 4 +- .../optimizer/api/mapper/OptimizerMapper.java | 60 +++++++++++++ .../api/model/CompleteOperationRequest.java | 2 +- ...nHistoryStatus.java => HistoryStatus.java} | 2 +- .../api/model/TableOperationsHistoryDto.java | 2 +- .../optimizer/api/model/TableStats.java | 50 ----------- .../optimizer/api/model/TableStatsDto.java | 1 + .../api/model/TableStatsHistoryDto.java | 1 + .../api/model/UpsertTableStatsRequest.java | 1 + .../optimizer/config/JobResultConverter.java | 39 -------- .../entity/TableOperationsHistoryRow.java | 84 ------------------ .../optimizer/entity/TableOperationsRow.java | 88 ------------------- .../entity/TableStatsHistoryRow.java | 61 ------------- .../optimizer/entity/TableStatsRow.java | 57 ------------ .../TableOperationsHistoryRepository.java | 45 ---------- .../repository/TableOperationsRepository.java | 33 ------- .../TableStatsHistoryRepository.java | 29 ------ .../repository/TableStatsRepository.java | 25 ------ .../service/OptimizerDataService.java | 4 +- .../service/OptimizerDataServiceImpl.java | 26 +++--- .../TableOperationsHistoryRepositoryTest.java | 41 +++++---- .../TableOperationsRepositoryTest.java | 44 +++++----- .../TableStatsHistoryRepositoryTest.java | 2 +- .../repository/TableStatsRepositoryTest.java | 2 +- .../service/OptimizerDataServiceImplTest.java | 14 +-- settings.gradle | 1 + 30 files changed, 151 insertions(+), 586 deletions(-) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/{OperationHistoryStatus.java => HistoryStatus.java} (78%) delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/JobResultConverter.java delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java index e5ff2bd01..09eb7fc21 100644 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java @@ -40,4 +40,10 @@ public class TableOperationsHistoryRow { @Column(name = "status", nullable = false, length = 20) private String status; + + @Column(name = "job_id", length = 255) + private String jobId; + + @Column(name = "result", columnDefinition = "TEXT") + private String result; } diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java index bc647d86e..b1fad275c 100644 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java @@ -25,7 +25,7 @@ @Table(name = "table_stats") @Getter @Setter -@Builder +@Builder(toBuilder = true) @NoArgsConstructor @AllArgsConstructor public class TableStatsRow { diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java index f8fe90b0c..61e0316e5 100644 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java +++ b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java @@ -18,16 +18,22 @@ public interface TableOperationsHistoryRepository */ @Query( "SELECT r FROM TableOperationsHistoryRow r " - + "WHERE (:operationType IS NULL OR r.operationType = :operationType) " + + "WHERE (:databaseName IS NULL OR r.databaseName = :databaseName) " + + "AND (:tableName IS NULL OR r.tableName = :tableName) " + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid) " + + "AND (:operationType IS NULL OR r.operationType = :operationType) " + "AND (:status IS NULL OR r.status = :status) " + "AND (:since IS NULL OR r.completedAt >= :since) " + + "AND (:until IS NULL OR r.completedAt < :until) " + "ORDER BY r.completedAt DESC") List find( - @Param("operationType") String operationType, + @Param("databaseName") String databaseName, + @Param("tableName") String tableName, @Param("tableUuid") String tableUuid, + @Param("operationType") String operationType, @Param("status") String status, @Param("since") Instant since, + @Param("until") Instant until, Pageable pageable); /** diff --git a/services/optimizer/build.gradle b/services/optimizer/build.gradle index c05c7f9c3..31051b65c 100644 --- a/services/optimizer/build.gradle +++ b/services/optimizer/build.gradle @@ -4,6 +4,7 @@ plugins { } dependencies { + implementation project(':apps:optimizer-data') implementation 'org.springframework.boot:spring-boot-starter-data-jpa:2.7.8' implementation 'com.vladmihalcea:hibernate-types-55:2.21.1' implementation 'org.springframework.boot:spring-boot-starter-web:2.7.8' diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java index 79fce5b8f..b14156d5b 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java @@ -1,6 +1,6 @@ package com.linkedin.openhouse.optimizer.api.controller; -import com.linkedin.openhouse.optimizer.api.model.OperationHistoryStatus; +import com.linkedin.openhouse.optimizer.api.model.HistoryStatus; import com.linkedin.openhouse.optimizer.api.model.OperationType; import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; import com.linkedin.openhouse.optimizer.service.OptimizerDataService; @@ -50,7 +50,7 @@ public ResponseEntity> listHistory( @RequestParam(required = false) String tableName, @RequestParam(required = false) String tableUuid, @RequestParam(required = false) OperationType operationType, - @RequestParam(required = false) OperationHistoryStatus status, + @RequestParam(required = false) HistoryStatus status, @RequestParam(required = false) Instant since, @RequestParam(required = false) Instant until, @RequestParam(defaultValue = "100") int limit) { diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/mapper/OptimizerMapper.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/mapper/OptimizerMapper.java index 8c0b17462..db9acc27e 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/mapper/OptimizerMapper.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/mapper/OptimizerMapper.java @@ -1,5 +1,11 @@ package com.linkedin.openhouse.optimizer.api.mapper; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.linkedin.openhouse.optimizer.api.model.HistoryStatus; +import com.linkedin.openhouse.optimizer.api.model.JobResult; +import com.linkedin.openhouse.optimizer.api.model.OperationStatus; +import com.linkedin.openhouse.optimizer.api.model.OperationType; import com.linkedin.openhouse.optimizer.api.model.TableOperationsDto; import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; import com.linkedin.openhouse.optimizer.api.model.TableStatsDto; @@ -14,10 +20,16 @@ * MapStruct mapper for converting between optimizer JPA entities and their corresponding DTOs. * *

Spring-instantiated at compile time. Inject via {@code @Autowired} or constructor injection. + * + *

Type-conversion helpers below bridge the entity's raw String/JSON shape (apps/optimizer + * entities use Strings at the JPA boundary for portability) and the wire DTO's typed enums and + * nested objects. */ @Mapper(componentModel = "spring") public interface OptimizerMapper { + ObjectMapper JSON = new ObjectMapper(); + /** Map a {@link TableOperationsRow} to its DTO. */ TableOperationsDto toDto(TableOperationsRow row); @@ -29,4 +41,52 @@ public interface OptimizerMapper { /** Map a {@link TableStatsHistoryRow} to its DTO. */ TableStatsHistoryDto toDto(TableStatsHistoryRow row); + + // --- entity String ↔ wire enum/object helpers --- + + default OperationType toOperationType(String value) { + return value == null ? null : OperationType.valueOf(value); + } + + default String fromOperationType(OperationType value) { + return value == null ? null : value.name(); + } + + default OperationStatus toOperationStatus(String value) { + return value == null ? null : OperationStatus.valueOf(value); + } + + default String fromOperationStatus(OperationStatus value) { + return value == null ? null : value.name(); + } + + default HistoryStatus toHistoryStatus(String value) { + return value == null ? null : HistoryStatus.valueOf(value); + } + + default String fromHistoryStatus(HistoryStatus value) { + return value == null ? null : value.name(); + } + + default JobResult toJobResult(String json) { + if (json == null) { + return null; + } + try { + return JSON.readValue(json, JobResult.class); + } catch (JsonProcessingException e) { + throw new IllegalStateException("Failed to parse JobResult JSON from DB", e); + } + } + + default String fromJobResult(JobResult value) { + if (value == null) { + return null; + } + try { + return JSON.writeValueAsString(value); + } catch (JsonProcessingException e) { + throw new IllegalStateException("Failed to serialize JobResult to JSON", e); + } + } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java index 35f7ba782..4f3f6535a 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java @@ -18,7 +18,7 @@ public class CompleteOperationRequest { /** Outcome of the operation. */ - private OperationHistoryStatus status; + private HistoryStatus status; /** Error details on failure; {@code null} on success. */ private JobResult result; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationHistoryStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatus.java similarity index 78% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationHistoryStatus.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatus.java index 791d910a6..2fbcf6235 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationHistoryStatus.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatus.java @@ -1,7 +1,7 @@ package com.linkedin.openhouse.optimizer.api.model; /** Terminal states for a completed Spark maintenance job. */ -public enum OperationHistoryStatus { +public enum HistoryStatus { SUCCESS, FAILED } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java index 2a901ad2b..a7a9d9dc6 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java @@ -27,7 +27,7 @@ public class TableOperationsHistoryDto { private Instant completedAt; /** {@code SUCCESS} or {@code FAILED}. */ - private OperationHistoryStatus status; + private HistoryStatus status; /** Job ID from the Jobs Service. */ private String jobId; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java deleted file mode 100644 index 64c99061a..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java +++ /dev/null @@ -1,50 +0,0 @@ -package com.linkedin.openhouse.optimizer.api.model; - -import com.fasterxml.jackson.annotation.JsonIgnoreProperties; -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.Data; -import lombok.NoArgsConstructor; - -/** Combined stats payload stored as a single JSON blob per table. */ -@Data -@Builder(toBuilder = true) -@NoArgsConstructor -@AllArgsConstructor -@JsonIgnoreProperties(ignoreUnknown = true) -public class TableStats { - - /** Snapshot fields — overwritten on every upsert. */ - private SnapshotMetrics snapshot; - - /** Delta fields — accumulated across commit events. */ - private CommitDelta delta; - - /** Point-in-time metadata read from Iceberg at scan time. */ - @Data - @Builder(toBuilder = true) - @NoArgsConstructor - @AllArgsConstructor - @JsonIgnoreProperties(ignoreUnknown = true) - public static class SnapshotMetrics { - private String clusterId; - private String tableVersion; - private String tableLocation; - private Long tableSizeBytes; - /** Total number of data files as of the latest snapshot — used for bin-packing. */ - private Long numCurrentFiles; - } - - /** Per-commit incremental counters; accumulated across all recorded commit events. */ - @Data - @Builder(toBuilder = true) - @NoArgsConstructor - @AllArgsConstructor - @JsonIgnoreProperties(ignoreUnknown = true) - public static class CommitDelta { - private Long numFilesAdded; - private Long numFilesDeleted; - private Long addedSizeBytes; - private Long deletedSizeBytes; - } -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java index 81dd6b802..4aad1e18f 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java @@ -1,5 +1,6 @@ package com.linkedin.openhouse.optimizer.api.model; +import com.linkedin.openhouse.optimizer.model.TableStats; import java.time.Instant; import java.util.Map; import lombok.AllArgsConstructor; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java index 4a994fdb3..6d515a543 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java @@ -1,5 +1,6 @@ package com.linkedin.openhouse.optimizer.api.model; +import com.linkedin.openhouse.optimizer.model.TableStats; import java.time.Instant; import lombok.AllArgsConstructor; import lombok.Builder; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java index 02290bad5..8bb317676 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java @@ -1,5 +1,6 @@ package com.linkedin.openhouse.optimizer.api.model; +import com.linkedin.openhouse.optimizer.model.TableStats; import java.util.Map; import lombok.AllArgsConstructor; import lombok.Builder; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/JobResultConverter.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/JobResultConverter.java deleted file mode 100644 index 4c9bfbe76..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/JobResultConverter.java +++ /dev/null @@ -1,39 +0,0 @@ -package com.linkedin.openhouse.optimizer.config; - -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.linkedin.openhouse.optimizer.api.model.JobResult; -import java.io.IOException; -import javax.persistence.AttributeConverter; -import javax.persistence.Converter; - -/** JPA {@link AttributeConverter} that serializes {@link JobResult} to/from a JSON string. */ -@Converter -public class JobResultConverter implements AttributeConverter { - - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - - @Override - public String convertToDatabaseColumn(JobResult attribute) { - if (attribute == null) { - return null; - } - try { - return OBJECT_MAPPER.writeValueAsString(attribute); - } catch (JsonProcessingException e) { - throw new IllegalStateException("Failed to serialize JobResult to JSON", e); - } - } - - @Override - public JobResult convertToEntityAttribute(String dbData) { - if (dbData == null) { - return null; - } - try { - return OBJECT_MAPPER.readValue(dbData, JobResult.class); - } catch (IOException e) { - throw new IllegalStateException("Failed to deserialize JobResult from JSON: " + dbData, e); - } - } -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java deleted file mode 100644 index 3b6ced892..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java +++ /dev/null @@ -1,84 +0,0 @@ -package com.linkedin.openhouse.optimizer.entity; - -import com.linkedin.openhouse.optimizer.api.model.JobResult; -import com.linkedin.openhouse.optimizer.api.model.OperationHistoryStatus; -import com.linkedin.openhouse.optimizer.api.model.OperationType; -import com.linkedin.openhouse.optimizer.config.JobResultConverter; -import java.time.Instant; -import javax.persistence.Column; -import javax.persistence.Convert; -import javax.persistence.Entity; -import javax.persistence.EnumType; -import javax.persistence.Enumerated; -import javax.persistence.Id; -import javax.persistence.Index; -import javax.persistence.Table; -import lombok.AccessLevel; -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.EqualsAndHashCode; -import lombok.Getter; -import lombok.NoArgsConstructor; - -/** - * Append-only record of a completed maintenance operation. - * - *

Written when the operation-complete endpoint is called. The {@code id} is the same UUID as the - * originating {@code table_operations.id}, tying each history entry back to the operation cycle - * that produced it. Multiple runs of the same operation on the same table produce multiple rows - * (each cycle gets a new UUID from the Analyzer). - */ -@Entity -@Table( - name = "table_operations_history", - indexes = { - @Index(name = "idx_table_uuid_hist", columnList = "table_uuid"), - @Index(name = "idx_op_type_hist", columnList = "operation_type"), - @Index(name = "idx_completed_at", columnList = "completed_at"), - @Index(name = "idx_status_hist", columnList = "status"), - @Index(name = "idx_job_id", columnList = "job_id"), - @Index(name = "idx_toph_db_table", columnList = "database_name, table_name") - }) -@Getter -@EqualsAndHashCode -@Builder(toBuilder = true) -@NoArgsConstructor(access = AccessLevel.PROTECTED) -@AllArgsConstructor(access = AccessLevel.PROTECTED) -public class TableOperationsHistoryRow { - - /** Same UUID as the originating {@code table_operations.id}. Set by the caller; not generated. */ - @Id - @Column(name = "id", nullable = false, length = 36) - private String id; - - @Column(name = "table_uuid", nullable = false, length = 36) - private String tableUuid; - - @Column(name = "database_name", nullable = false, length = 128) - private String databaseName; - - @Column(name = "table_name", nullable = false, length = 128) - private String tableName; - - @Enumerated(EnumType.STRING) - @Column(name = "operation_type", nullable = false, length = 50) - private OperationType operationType; - - /** When the operation completed, as recorded by the complete endpoint. */ - @Column(name = "completed_at", nullable = false) - private Instant completedAt; - - /** {@code SUCCESS} or {@code FAILED}. */ - @Enumerated(EnumType.STRING) - @Column(name = "status", nullable = false, length = 20) - private OperationHistoryStatus status; - - /** Spark job ID; indexed for job → result lookups. */ - @Column(name = "job_id", length = 255) - private String jobId; - - /** Job result: error details on failure, both fields null on success. */ - @Convert(converter = JobResultConverter.class) - @Column(name = "result") - private JobResult result; -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java deleted file mode 100644 index 43778495a..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java +++ /dev/null @@ -1,88 +0,0 @@ -package com.linkedin.openhouse.optimizer.entity; - -import com.linkedin.openhouse.optimizer.api.model.OperationStatus; -import com.linkedin.openhouse.optimizer.api.model.OperationType; -import java.time.Instant; -import javax.persistence.Column; -import javax.persistence.Entity; -import javax.persistence.EnumType; -import javax.persistence.Enumerated; -import javax.persistence.Id; -import javax.persistence.Index; -import javax.persistence.Table; -import lombok.AccessLevel; -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.EqualsAndHashCode; -import lombok.Getter; -import lombok.NoArgsConstructor; - -/** - * JPA entity representing an Analyzer recommendation for a table maintenance operation. - * - *

Each row is identified by a client-generated UUID ({@code id}). The Analyzer creates a new row - * when it first recommends an operation for a table, or when re-recommending after a prior terminal - * state (SUCCESS/FAILED). Old terminal rows accumulate — they serve as implicit history. {@code - * table_uuid} is the stable identity for the table (survives renames; rotates on drop+recreate). - * The application enforces one active (PENDING or SCHEDULED) row per {@code (table_uuid, - * operation_type)} at a time. - */ -@Entity -@Table( - name = "table_operations", - indexes = { - @Index(name = "idx_table_uuid", columnList = "table_uuid"), - @Index(name = "idx_op_type", columnList = "operation_type"), - @Index(name = "idx_status", columnList = "status"), - @Index(name = "idx_created_at", columnList = "created_at"), - @Index(name = "idx_scheduled_at", columnList = "scheduled_at") - }) -@Getter -@EqualsAndHashCode -@Builder(toBuilder = true) -@NoArgsConstructor(access = AccessLevel.PROTECTED) -@AllArgsConstructor(access = AccessLevel.PROTECTED) -public class TableOperationsRow { - - /** Client-generated UUID identifying this specific operation recommendation. */ - @Id - @Column(name = "id", nullable = false, length = 36) - private String id; - - /** Stable table identity from the Tables Service. Survives renames; rotates on drop+recreate. */ - @Column(name = "table_uuid", nullable = false, length = 36) - private String tableUuid; - - @Column(name = "database_name", nullable = false, length = 128) - private String databaseName; - - @Column(name = "table_name", nullable = false, length = 128) - private String tableName; - - @Enumerated(EnumType.STRING) - @Column(name = "operation_type", nullable = false, length = 50) - private OperationType operationType; - - @Enumerated(EnumType.STRING) - @Column(name = "status", nullable = false, length = 20) - private OperationStatus status; - - /** When the Analyzer first created this row. Set by the service on insert; never updated. */ - @Column(name = "created_at", nullable = false) - private Instant createdAt; - - /** Set when the operation is claimed; {@code null} while {@code PENDING}. */ - @Column(name = "scheduled_at") - private Instant scheduledAt; - - /** Job ID returned by the Jobs Service after successful submission. */ - @Column(name = "job_id", length = 255) - private String jobId; - - /** - * Manual optimistic lock for the Scheduler claim. Incremented by the raw {@code claimOperation} - * UPDATE query; must NOT use JPA {@code @Version} since the claim bypasses JPA entity management. - */ - @Column(name = "version") - private Long version; -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java deleted file mode 100644 index b0d92fc81..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java +++ /dev/null @@ -1,61 +0,0 @@ -package com.linkedin.openhouse.optimizer.entity; - -import com.linkedin.openhouse.optimizer.api.model.TableStats; -import com.vladmihalcea.hibernate.type.json.JsonStringType; -import java.time.Instant; -import javax.persistence.Column; -import javax.persistence.Entity; -import javax.persistence.Id; -import javax.persistence.Index; -import javax.persistence.Table; -import lombok.AccessLevel; -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.EqualsAndHashCode; -import lombok.Getter; -import lombok.NoArgsConstructor; -import org.hibernate.annotations.Type; -import org.hibernate.annotations.TypeDef; - -/** - * Append-only record of per-commit stats reported by the Tables Service. - * - *

Each Iceberg commit produces one row. The {@code stats} JSON contains both the snapshot - * metrics (point-in-time) and the commit delta (files added/deleted in this commit). Consumers can - * query this table to reconstruct change rates over arbitrary time windows. - */ -@TypeDef(name = "json", typeClass = JsonStringType.class) -@Entity -@Table( - name = "table_stats_history", - indexes = { - @Index(name = "idx_tsh_table_uuid", columnList = "table_uuid"), - @Index(name = "idx_tsh_recorded_at", columnList = "recorded_at") - }) -@Getter -@EqualsAndHashCode -@Builder(toBuilder = true) -@NoArgsConstructor(access = AccessLevel.PROTECTED) -@AllArgsConstructor(access = AccessLevel.PROTECTED) -public class TableStatsHistoryRow { - - @Id - @Column(name = "id", nullable = false, length = 36) - private String id; - - @Column(name = "table_uuid", nullable = false, length = 36) - private String tableUuid; - - @Column(name = "database_name", nullable = false, length = 128) - private String databaseName; - - @Column(name = "table_name", nullable = false, length = 128) - private String tableName; - - @Type(type = "json") - @Column(name = "stats", columnDefinition = "TEXT") - private TableStats stats; - - @Column(name = "recorded_at", nullable = false) - private Instant recordedAt; -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java deleted file mode 100644 index f682a3485..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java +++ /dev/null @@ -1,57 +0,0 @@ -package com.linkedin.openhouse.optimizer.entity; - -import com.linkedin.openhouse.optimizer.api.model.TableStats; -import com.vladmihalcea.hibernate.type.json.JsonStringType; -import java.time.Instant; -import java.util.Map; -import javax.persistence.Column; -import javax.persistence.Entity; -import javax.persistence.Id; -import javax.persistence.Table; -import lombok.AccessLevel; -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.EqualsAndHashCode; -import lombok.Getter; -import lombok.NoArgsConstructor; -import org.hibernate.annotations.Type; -import org.hibernate.annotations.TypeDef; - -/** - * JPA entity representing a per-table stats snapshot in the optimizer DB. - * - *

Written by the Tables Service on every Iceberg commit. Read by the Analyzer directly via JPA - * to enumerate tables and check scheduling eligibility. - */ -@TypeDef(name = "json", typeClass = JsonStringType.class) -@Entity -@Table(name = "table_stats") -@Getter -@EqualsAndHashCode -@Builder(toBuilder = true) -@NoArgsConstructor(access = AccessLevel.PROTECTED) -@AllArgsConstructor(access = AccessLevel.PROTECTED) -public class TableStatsRow { - - @Id - @Column(name = "table_uuid", nullable = false, length = 36) - private String tableUuid; - - @Column(name = "database_name", nullable = false, length = 128) - private String databaseName; - - @Column(name = "table_name", nullable = false, length = 128) - private String tableName; - - @Type(type = "json") - @Column(name = "stats", columnDefinition = "TEXT") - private TableStats stats; - - @Type(type = "json") - @Column(name = "table_properties", columnDefinition = "TEXT") - private Map tableProperties; - - /** Set on every upsert. Used for stats pipeline staleness monitoring. */ - @Column(name = "updated_at", nullable = false) - private Instant updatedAt; -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java deleted file mode 100644 index 65d62818c..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java +++ /dev/null @@ -1,45 +0,0 @@ -package com.linkedin.openhouse.optimizer.repository; - -import com.linkedin.openhouse.optimizer.api.model.OperationHistoryStatus; -import com.linkedin.openhouse.optimizer.api.model.OperationType; -import com.linkedin.openhouse.optimizer.entity.TableOperationsHistoryRow; -import java.time.Instant; -import java.util.List; -import org.springframework.data.domain.Pageable; -import org.springframework.data.jpa.repository.JpaRepository; -import org.springframework.data.jpa.repository.Query; -import org.springframework.data.repository.query.Param; -import org.springframework.stereotype.Repository; - -/** - * Repository for {@link TableOperationsHistoryRow}. Append-only; PK is the UUID set by the caller - * (same UUID as the originating {@code table_operations.id}). - */ -@Repository -public interface TableOperationsHistoryRepository - extends JpaRepository { - - /** - * Return history rows matching the given filters, ordered by {@code completedAt} descending. - * Every parameter is optional — pass {@code null} to skip that filter. - */ - @Query( - "SELECT r FROM TableOperationsHistoryRow r " - + "WHERE (:databaseName IS NULL OR r.databaseName = :databaseName) " - + "AND (:tableName IS NULL OR r.tableName = :tableName) " - + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid) " - + "AND (:operationType IS NULL OR r.operationType = :operationType) " - + "AND (:status IS NULL OR r.status = :status) " - + "AND (:since IS NULL OR r.completedAt >= :since) " - + "AND (:until IS NULL OR r.completedAt <= :until) " - + "ORDER BY r.completedAt DESC") - List find( - @Param("databaseName") String databaseName, - @Param("tableName") String tableName, - @Param("tableUuid") String tableUuid, - @Param("operationType") OperationType operationType, - @Param("status") OperationHistoryStatus status, - @Param("since") Instant since, - @Param("until") Instant until, - Pageable pageable); -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java deleted file mode 100644 index 891322134..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java +++ /dev/null @@ -1,33 +0,0 @@ -package com.linkedin.openhouse.optimizer.repository; - -import com.linkedin.openhouse.optimizer.api.model.OperationStatus; -import com.linkedin.openhouse.optimizer.api.model.OperationType; -import com.linkedin.openhouse.optimizer.entity.TableOperationsRow; -import java.util.List; -import org.springframework.data.jpa.repository.JpaRepository; -import org.springframework.data.jpa.repository.Query; -import org.springframework.data.repository.query.Param; -import org.springframework.stereotype.Repository; - -/** Repository for {@link TableOperationsRow}. PK is the client-generated UUID {@code id}. */ -@Repository -public interface TableOperationsRepository extends JpaRepository { - - /** - * Return operations matching the given filters. Every parameter is optional — pass {@code null} - * to skip that filter. No filters returns all rows. - */ - @Query( - "SELECT r FROM TableOperationsRow r " - + "WHERE (:operationType IS NULL OR r.operationType = :operationType) " - + "AND (:status IS NULL OR r.status = :status) " - + "AND (:databaseName IS NULL OR r.databaseName = :databaseName) " - + "AND (:tableName IS NULL OR r.tableName = :tableName) " - + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid)") - List find( - @Param("operationType") OperationType operationType, - @Param("status") OperationStatus status, - @Param("databaseName") String databaseName, - @Param("tableName") String tableName, - @Param("tableUuid") String tableUuid); -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java deleted file mode 100644 index aaa1b0050..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java +++ /dev/null @@ -1,29 +0,0 @@ -package com.linkedin.openhouse.optimizer.repository; - -import com.linkedin.openhouse.optimizer.entity.TableStatsHistoryRow; -import java.time.Instant; -import java.util.List; -import org.springframework.data.domain.Pageable; -import org.springframework.data.jpa.repository.JpaRepository; -import org.springframework.data.jpa.repository.Query; -import org.springframework.data.repository.query.Param; - -/** Append-only repository for per-commit stats history rows. */ -public interface TableStatsHistoryRepository extends JpaRepository { - - /** - * Return history rows for a table, newest first. Pass {@code null} for {@code since} to skip the - * time filter. - * - * @param tableUuid the stable table UUID - * @param since inclusive lower bound on recorded_at; {@code null} to skip - * @param pageable use {@code PageRequest.of(0, limit)} to cap results - */ - @Query( - "SELECT r FROM TableStatsHistoryRow r " - + "WHERE r.tableUuid = :tableUuid " - + "AND (:since IS NULL OR r.recordedAt >= :since) " - + "ORDER BY r.recordedAt DESC") - List find( - @Param("tableUuid") String tableUuid, @Param("since") Instant since, Pageable pageable); -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java deleted file mode 100644 index 9bcaab41b..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java +++ /dev/null @@ -1,25 +0,0 @@ -package com.linkedin.openhouse.optimizer.repository; - -import com.linkedin.openhouse.optimizer.entity.TableStatsRow; -import java.util.List; -import org.springframework.data.jpa.repository.JpaRepository; -import org.springframework.data.jpa.repository.Query; -import org.springframework.data.repository.query.Param; - -/** Spring Data JPA repository for reading and writing {@code table_stats} rows. */ -public interface TableStatsRepository extends JpaRepository { - - /** - * Return stats rows matching the given filters. Every parameter is optional — pass {@code null} - * to skip that filter. No filters returns all rows. - */ - @Query( - "SELECT r FROM TableStatsRow r " - + "WHERE (:databaseName IS NULL OR r.databaseName = :databaseName) " - + "AND (:tableName IS NULL OR r.tableName = :tableName) " - + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid)") - List find( - @Param("databaseName") String databaseName, - @Param("tableName") String tableName, - @Param("tableUuid") String tableUuid); -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java index 1c17d7a38..2909b8b5a 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java @@ -1,7 +1,7 @@ package com.linkedin.openhouse.optimizer.service; import com.linkedin.openhouse.optimizer.api.model.CompleteOperationRequest; -import com.linkedin.openhouse.optimizer.api.model.OperationHistoryStatus; +import com.linkedin.openhouse.optimizer.api.model.HistoryStatus; import com.linkedin.openhouse.optimizer.api.model.OperationStatus; import com.linkedin.openhouse.optimizer.api.model.OperationType; import com.linkedin.openhouse.optimizer.api.model.TableOperationsDto; @@ -93,7 +93,7 @@ List listHistory( Optional tableName, Optional tableUuid, Optional operationType, - Optional status, + Optional status, Optional since, Optional until, int limit); diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java index de4faa465..0c9af5107 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java @@ -2,7 +2,7 @@ import com.linkedin.openhouse.optimizer.api.mapper.OptimizerMapper; import com.linkedin.openhouse.optimizer.api.model.CompleteOperationRequest; -import com.linkedin.openhouse.optimizer.api.model.OperationHistoryStatus; +import com.linkedin.openhouse.optimizer.api.model.HistoryStatus; import com.linkedin.openhouse.optimizer.api.model.OperationStatus; import com.linkedin.openhouse.optimizer.api.model.OperationType; import com.linkedin.openhouse.optimizer.api.model.TableOperationsDto; @@ -49,11 +49,11 @@ public List listTableOperations( Optional tableUuid) { return operationsRepository .find( - operationType.orElse(null), - status.orElse(null), + operationType.map(OperationType::name).orElse(null), + status.map(OperationStatus::name).orElse(null), + tableUuid.orElse(null), databaseName.orElse(null), - tableName.orElse(null), - tableUuid.orElse(null)) + tableName.orElse(null)) .stream() .map(mapper::toDto) .collect(Collectors.toList()); @@ -75,9 +75,9 @@ public Optional completeOperation( .tableName(row.getTableName()) .operationType(row.getOperationType()) .completedAt(Instant.now()) - .status(request.getStatus()) + .status(request.getStatus().name()) .jobId(row.getJobId()) - .result(request.getResult()) + .result(mapper.fromJobResult(request.getResult())) .build(); return mapper.toDto(historyRepository.save(historyRow)); }); @@ -165,11 +165,11 @@ public TableOperationsHistoryDto appendHistory(TableOperationsHistoryDto dto) { .tableUuid(dto.getTableUuid()) .databaseName(dto.getDatabaseName()) .tableName(dto.getTableName()) - .operationType(dto.getOperationType()) + .operationType(dto.getOperationType() != null ? dto.getOperationType().name() : null) .completedAt(dto.getCompletedAt() != null ? dto.getCompletedAt() : Instant.now()) - .status(dto.getStatus()) + .status(dto.getStatus() != null ? dto.getStatus().name() : null) .jobId(dto.getJobId()) - .result(dto.getResult()) + .result(mapper.fromJobResult(dto.getResult())) .build(); return mapper.toDto(historyRepository.save(row)); } @@ -188,7 +188,7 @@ public List listHistory( Optional tableName, Optional tableUuid, Optional operationType, - Optional status, + Optional status, Optional since, Optional until, int limit) { @@ -197,8 +197,8 @@ public List listHistory( databaseName.orElse(null), tableName.orElse(null), tableUuid.orElse(null), - operationType.orElse(null), - status.orElse(null), + operationType.map(OperationType::name).orElse(null), + status.map(HistoryStatus::name).orElse(null), since.orElse(null), until.orElse(null), PageRequest.of(0, limit)) diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java index b9735a617..7f0879d83 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java @@ -2,8 +2,7 @@ import static org.assertj.core.api.Assertions.assertThat; -import com.linkedin.openhouse.optimizer.api.model.JobResult; -import com.linkedin.openhouse.optimizer.api.model.OperationHistoryStatus; +import com.linkedin.openhouse.optimizer.api.model.HistoryStatus; import com.linkedin.openhouse.optimizer.api.model.OperationType; import com.linkedin.openhouse.optimizer.entity.TableOperationsHistoryRow; import java.time.Instant; @@ -35,9 +34,9 @@ void appendAndFindByTableUuid() { .tableUuid(tableUuid) .databaseName("db1") .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION) + .operationType(OperationType.ORPHAN_FILES_DELETION.name()) .completedAt(t1) - .status(OperationHistoryStatus.SUCCESS) + .status(HistoryStatus.SUCCESS.name()) .jobId("job-001") .build()); @@ -47,11 +46,11 @@ void appendAndFindByTableUuid() { .tableUuid(tableUuid) .databaseName("db1") .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION) + .operationType(OperationType.ORPHAN_FILES_DELETION.name()) .completedAt(t2) - .status(OperationHistoryStatus.FAILED) + .status(HistoryStatus.FAILED.name()) .jobId("job-002") - .result(JobResult.builder().errorMessage("out of memory").errorType("OOM").build()) + .result("{\"errorMessage\":\"out of memory\",\"errorType\":\"OOM\"}") .build()); List rows = @@ -74,9 +73,9 @@ void appendIsNonDestructive_multipleRunsRetained() { .tableUuid(tableUuid) .databaseName("db1") .tableName("tbl2") - .operationType(OperationType.ORPHAN_FILES_DELETION) + .operationType(OperationType.ORPHAN_FILES_DELETION.name()) .completedAt(now.plusSeconds(i)) - .status(OperationHistoryStatus.SUCCESS) + .status(HistoryStatus.SUCCESS.name()) .build()); } @@ -96,9 +95,9 @@ void find_respectsLimit() { .tableUuid(tableUuid) .databaseName("db1") .tableName("tbl3") - .operationType(OperationType.ORPHAN_FILES_DELETION) + .operationType(OperationType.ORPHAN_FILES_DELETION.name()) .completedAt(now.plusSeconds(i)) - .status(OperationHistoryStatus.SUCCESS) + .status(HistoryStatus.SUCCESS.name()) .build()); } @@ -119,9 +118,9 @@ void find_noParams_returnsAll() { .tableUuid(uuid1) .databaseName("db1") .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION) + .operationType(OperationType.ORPHAN_FILES_DELETION.name()) .completedAt(now) - .status(OperationHistoryStatus.SUCCESS) + .status(HistoryStatus.SUCCESS.name()) .build()); repository.save( TableOperationsHistoryRow.builder() @@ -129,16 +128,16 @@ void find_noParams_returnsAll() { .tableUuid(uuid2) .databaseName("db2") .tableName("tbl2") - .operationType(OperationType.ORPHAN_FILES_DELETION) + .operationType(OperationType.ORPHAN_FILES_DELETION.name()) .completedAt(now.plusSeconds(1)) - .status(OperationHistoryStatus.FAILED) + .status(HistoryStatus.FAILED.name()) .build()); List rows = repository.find(null, null, null, null, null, null, null, PageRequest.of(0, 100)); assertThat(rows).hasSize(2); // Newest first - assertThat(rows.get(0).getStatus()).isEqualTo(OperationHistoryStatus.FAILED); + assertThat(rows.get(0).getStatus()).isEqualTo(HistoryStatus.FAILED.name()); } @Test @@ -153,9 +152,9 @@ void find_byStatusAndTimeWindow() { .tableUuid(tableUuid) .databaseName("db1") .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION) + .operationType(OperationType.ORPHAN_FILES_DELETION.name()) .completedAt(old) - .status(OperationHistoryStatus.SUCCESS) + .status(HistoryStatus.SUCCESS.name()) .build()); repository.save( TableOperationsHistoryRow.builder() @@ -163,9 +162,9 @@ void find_byStatusAndTimeWindow() { .tableUuid(tableUuid) .databaseName("db1") .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION) + .operationType(OperationType.ORPHAN_FILES_DELETION.name()) .completedAt(recent) - .status(OperationHistoryStatus.FAILED) + .status(HistoryStatus.FAILED.name()) .build()); // Filter by status @@ -175,7 +174,7 @@ void find_byStatusAndTimeWindow() { null, null, null, - OperationHistoryStatus.FAILED, + HistoryStatus.FAILED.name(), null, null, PageRequest.of(0, 100)); diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java index b1342b12d..2ca8dc61e 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java @@ -32,8 +32,8 @@ void saveAndFindById() { .tableUuid(UUID.randomUUID().toString()) .databaseName("db1") .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION) - .status(OperationStatus.PENDING) + .operationType(OperationType.ORPHAN_FILES_DELETION.name()) + .status(OperationStatus.PENDING.name()) .createdAt(Instant.now()) .build(); @@ -41,7 +41,7 @@ void saveAndFindById() { Optional found = repository.findById(id); assertThat(found).isPresent(); - assertThat(found.get().getStatus()).isEqualTo(OperationStatus.PENDING); + assertThat(found.get().getStatus()).isEqualTo(OperationStatus.PENDING.name()); } @Test @@ -52,8 +52,8 @@ void find_noParams_returnsAll() { .tableUuid(UUID.randomUUID().toString()) .databaseName("db1") .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION) - .status(OperationStatus.PENDING) + .operationType(OperationType.ORPHAN_FILES_DELETION.name()) + .status(OperationStatus.PENDING.name()) .createdAt(Instant.now()) .build()); repository.save( @@ -62,8 +62,8 @@ void find_noParams_returnsAll() { .tableUuid(UUID.randomUUID().toString()) .databaseName("db1") .tableName("tbl2") - .operationType(OperationType.ORPHAN_FILES_DELETION) - .status(OperationStatus.SCHEDULED) + .operationType(OperationType.ORPHAN_FILES_DELETION.name()) + .status(OperationStatus.SCHEDULED.name()) .createdAt(Instant.now()) .build()); @@ -79,8 +79,8 @@ void find_byStatus() { .tableUuid(UUID.randomUUID().toString()) .databaseName("db1") .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION) - .status(OperationStatus.PENDING) + .operationType(OperationType.ORPHAN_FILES_DELETION.name()) + .status(OperationStatus.PENDING.name()) .createdAt(Instant.now()) .build()); repository.save( @@ -89,20 +89,20 @@ void find_byStatus() { .tableUuid(UUID.randomUUID().toString()) .databaseName("db1") .tableName("tbl2") - .operationType(OperationType.ORPHAN_FILES_DELETION) - .status(OperationStatus.SCHEDULED) + .operationType(OperationType.ORPHAN_FILES_DELETION.name()) + .status(OperationStatus.SCHEDULED.name()) .createdAt(Instant.now()) .build()); List pending = - repository.find(null, OperationStatus.PENDING, null, null, null); + repository.find(null, OperationStatus.PENDING.name(), null, null, null); assertThat(pending).hasSize(1); - assertThat(pending.get(0).getStatus()).isEqualTo(OperationStatus.PENDING); + assertThat(pending.get(0).getStatus()).isEqualTo(OperationStatus.PENDING.name()); List scheduled = - repository.find(null, OperationStatus.SCHEDULED, null, null, null); + repository.find(null, OperationStatus.SCHEDULED.name(), null, null, null); assertThat(scheduled).hasSize(1); - assertThat(scheduled.get(0).getStatus()).isEqualTo(OperationStatus.SCHEDULED); + assertThat(scheduled.get(0).getStatus()).isEqualTo(OperationStatus.SCHEDULED.name()); } @Test @@ -113,8 +113,8 @@ void find_byDatabaseAndTable() { .tableUuid(UUID.randomUUID().toString()) .databaseName("db1") .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION) - .status(OperationStatus.PENDING) + .operationType(OperationType.ORPHAN_FILES_DELETION.name()) + .status(OperationStatus.PENDING.name()) .createdAt(Instant.now()) .build()); repository.save( @@ -123,13 +123,13 @@ void find_byDatabaseAndTable() { .tableUuid(UUID.randomUUID().toString()) .databaseName("db2") .tableName("tbl2") - .operationType(OperationType.ORPHAN_FILES_DELETION) - .status(OperationStatus.PENDING) + .operationType(OperationType.ORPHAN_FILES_DELETION.name()) + .status(OperationStatus.PENDING.name()) .createdAt(Instant.now()) .build()); - assertThat(repository.find(null, null, "db1", null, null)).hasSize(1); - assertThat(repository.find(null, null, "db2", "tbl2", null)).hasSize(1); - assertThat(repository.find(null, null, "db1", "tbl2", null)).isEmpty(); + assertThat(repository.find(null, null, null, "db1", null)).hasSize(1); + assertThat(repository.find(null, null, null, "db2", "tbl2")).hasSize(1); + assertThat(repository.find(null, null, null, "db1", "tbl2")).isEmpty(); } } diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java index f3e72b52e..475196630 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java @@ -2,8 +2,8 @@ import static org.assertj.core.api.Assertions.assertThat; -import com.linkedin.openhouse.optimizer.api.model.TableStats; import com.linkedin.openhouse.optimizer.entity.TableStatsHistoryRow; +import com.linkedin.openhouse.optimizer.model.TableStats; import java.time.Instant; import java.time.temporal.ChronoUnit; import java.util.List; diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java index b62371f53..240d512ef 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java @@ -2,8 +2,8 @@ import static org.assertj.core.api.Assertions.assertThat; -import com.linkedin.openhouse.optimizer.api.model.TableStats; import com.linkedin.openhouse.optimizer.entity.TableStatsRow; +import com.linkedin.openhouse.optimizer.model.TableStats; import java.time.Instant; import java.util.Map; import java.util.Optional; diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java index 10605c002..17ab55278 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java @@ -3,16 +3,16 @@ import static org.assertj.core.api.Assertions.assertThat; import com.linkedin.openhouse.optimizer.api.model.CompleteOperationRequest; +import com.linkedin.openhouse.optimizer.api.model.HistoryStatus; import com.linkedin.openhouse.optimizer.api.model.JobResult; -import com.linkedin.openhouse.optimizer.api.model.OperationHistoryStatus; import com.linkedin.openhouse.optimizer.api.model.OperationStatus; import com.linkedin.openhouse.optimizer.api.model.OperationType; import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; -import com.linkedin.openhouse.optimizer.api.model.TableStats; import com.linkedin.openhouse.optimizer.api.model.TableStatsDto; import com.linkedin.openhouse.optimizer.api.model.UpsertTableStatsRequest; import com.linkedin.openhouse.optimizer.entity.TableOperationsRow; import com.linkedin.openhouse.optimizer.entity.TableStatsHistoryRow; +import com.linkedin.openhouse.optimizer.model.TableStats; import com.linkedin.openhouse.optimizer.repository.TableOperationsRepository; import com.linkedin.openhouse.optimizer.repository.TableStatsHistoryRepository; import com.linkedin.openhouse.optimizer.repository.TableStatsRepository; @@ -50,8 +50,8 @@ void completeOperation_writesHistoryFromOperationRow() { .tableUuid(tableUuid) .databaseName("db1") .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION) - .status(OperationStatus.SCHEDULED) + .operationType(OperationType.ORPHAN_FILES_DELETION.name()) + .status(OperationStatus.SCHEDULED.name()) .createdAt(Instant.now()) .scheduledAt(Instant.now()) .jobId("spark-job-123") @@ -59,10 +59,10 @@ void completeOperation_writesHistoryFromOperationRow() { Optional result = service.completeOperation( - id, CompleteOperationRequest.builder().status(OperationHistoryStatus.SUCCESS).build()); + id, CompleteOperationRequest.builder().status(HistoryStatus.SUCCESS).build()); assertThat(result).isPresent(); - assertThat(result.get().getStatus()).isEqualTo(OperationHistoryStatus.SUCCESS); + assertThat(result.get().getStatus()).isEqualTo(HistoryStatus.SUCCESS); assertThat(result.get().getTableUuid()).isEqualTo(tableUuid); assertThat(result.get().getJobId()).isEqualTo("spark-job-123"); assertThat(result.get().getOperationType()).isEqualTo(OperationType.ORPHAN_FILES_DELETION); @@ -76,7 +76,7 @@ void completeOperation_notFound_returnsEmpty() { service.completeOperation( UUID.randomUUID().toString(), CompleteOperationRequest.builder() - .status(OperationHistoryStatus.FAILED) + .status(HistoryStatus.FAILED) .result( JobResult.builder().errorMessage("boom").errorType("RuntimeException").build()) .build()); diff --git a/settings.gradle b/settings.gradle index 0d64dad53..c5544a193 100644 --- a/settings.gradle +++ b/settings.gradle @@ -51,6 +51,7 @@ include ':services:housetables' include ':services:jobs' include ':services:optimizer' include ':apps:optimizer' +project(':apps:optimizer').name = 'optimizer-data' include ':services:tables' include ':tables-test-fixtures:tables-test-fixtures-iceberg-1.2' include ':tables-test-fixtures:tables-test-fixtures-iceberg-1.5' From d90c26fcb36952e94d29e53a12049afc37be395b Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 10:05:07 -0700 Subject: [PATCH 039/104] refactor(optimizer): move apps/optimizer module into services/optimizer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit apps/optimizer was a misplaced shared library duplicating the JPA layer. services/optimizer is the canonical optimizer module — schedulers and analyzers embed it directly as a library. This commit consolidates by moving the JPA entities, repositories, and in-memory domain model from apps/optimizer into services/optimizer, deleting the apps module, and updating the service-side wiring + tests accordingly. - git mv 13 files (entities/repos/model) from apps/optimizer to services/optimizer; preserves history. - Delete services-side pre-R7 duplicates: 4 entities, 4 repos, the duplicate api/model/TableStats DTO, the now-unneeded JobResultConverter. - Rename services-side wire-API enum OperationHistoryStatus → HistoryStatus. - Drop the apps/optimizer module entry from settings.gradle. - OptimizerMapper: add String↔OperationType, String↔OperationStatus, String↔HistoryStatus, String↔JobResult default helpers so MapStruct can bridge the entity (String at JPA boundary) and the wire DTOs. - Update DTOs that import TableStats/HistoryStatus to the new package locations. - Rewrite repo tests against the simplified history repo shape and fix a stale find(...) positional-arg signature in the operations repo test. --- apps/optimizer/build.gradle | 13 -- .../entity/TableOperationsHistoryRow.java | 43 ------ .../optimizer/entity/TableOperationsRow.java | 61 -------- .../entity/TableStatsHistoryRow.java | 61 -------- .../optimizer/entity/TableStatsRow.java | 53 ------- .../TableOperationsHistoryRepository.java | 52 ------- .../repository/TableOperationsRepository.java | 29 ---- .../TableStatsHistoryRepository.java | 29 ---- .../repository/TableStatsRepository.java | 33 ----- .../optimizer/api/mapper/OptimizerMapper.java | 59 ++++++++ .../api/model/CompleteOperationRequest.java | 2 +- ...nHistoryStatus.java => HistoryStatus.java} | 2 +- .../api/model/TableOperationsHistoryDto.java | 2 +- .../optimizer/api/model/TableStats.java | 50 ------- .../optimizer/api/model/TableStatsDto.java | 1 + .../api/model/TableStatsHistoryDto.java | 1 + .../api/model/UpsertTableStatsRequest.java | 1 + .../optimizer/config/JobResultConverter.java | 39 ------ .../entity/TableOperationsHistoryRow.java | 53 ++----- .../optimizer/entity/TableOperationsRow.java | 57 ++------ .../entity/TableStatsHistoryRow.java | 4 +- .../optimizer/entity/TableStatsRow.java | 20 ++- .../optimizer/model/HistoryStatus.java | 0 .../optimizer/model/OperationStatus.java | 0 .../optimizer/model/OperationType.java | 0 .../openhouse/optimizer/model/Table.java | 0 .../optimizer/model/TableOperation.java | 0 .../openhouse/optimizer/model/TableStats.java | 0 .../TableOperationsHistoryRepository.java | 51 +++---- .../repository/TableOperationsRepository.java | 20 ++- .../repository/TableStatsRepository.java | 12 +- .../TableOperationsHistoryRepositoryTest.java | 131 +++++------------- .../TableOperationsRepositoryTest.java | 44 +++--- .../TableStatsHistoryRepositoryTest.java | 2 +- .../repository/TableStatsRepositoryTest.java | 2 +- settings.gradle | 1 - 36 files changed, 200 insertions(+), 728 deletions(-) delete mode 100644 apps/optimizer/build.gradle delete mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java delete mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java delete mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java delete mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java delete mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java delete mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java delete mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java delete mode 100644 apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/{OperationHistoryStatus.java => HistoryStatus.java} (78%) delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/JobResultConverter.java rename {apps => services}/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java (100%) rename {apps => services}/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java (100%) rename {apps => services}/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java (100%) rename {apps => services}/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java (100%) rename {apps => services}/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java (100%) rename {apps => services}/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java (100%) diff --git a/apps/optimizer/build.gradle b/apps/optimizer/build.gradle deleted file mode 100644 index f14969274..000000000 --- a/apps/optimizer/build.gradle +++ /dev/null @@ -1,13 +0,0 @@ -plugins { - id 'openhouse.java-minimal-conventions' -} - -// Avoid build-directory collision with services:optimizer (same project.name 'optimizer'). -buildDir = "${rootProject.buildDir}/apps-optimizer" - -dependencies { - implementation 'org.springframework.boot:spring-boot-starter-data-jpa:2.7.8' - implementation 'com.vladmihalcea:hibernate-types-55:2.21.1' - testImplementation 'org.springframework.boot:spring-boot-starter-test:2.7.8' - testRuntimeOnly 'com.h2database:h2' -} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java deleted file mode 100644 index e5ff2bd01..000000000 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java +++ /dev/null @@ -1,43 +0,0 @@ -package com.linkedin.openhouse.optimizer.entity; - -import java.time.Instant; -import javax.persistence.Column; -import javax.persistence.Entity; -import javax.persistence.Id; -import javax.persistence.Table; -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.Getter; -import lombok.NoArgsConstructor; - -/** Lightweight JPA entity for reading {@code table_operations_history} rows. */ -@Entity -@Table(name = "table_operations_history") -@Getter -@Builder -@NoArgsConstructor -@AllArgsConstructor -public class TableOperationsHistoryRow { - - @Id - @Column(name = "id", nullable = false, length = 36) - private String id; - - @Column(name = "table_uuid", nullable = false, length = 36) - private String tableUuid; - - @Column(name = "database_name", nullable = false, length = 128) - private String databaseName; - - @Column(name = "table_name", nullable = false, length = 128) - private String tableName; - - @Column(name = "operation_type", nullable = false, length = 50) - private String operationType; - - @Column(name = "completed_at", nullable = false) - private Instant completedAt; - - @Column(name = "status", nullable = false, length = 20) - private String status; -} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java deleted file mode 100644 index 0e23761ae..000000000 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java +++ /dev/null @@ -1,61 +0,0 @@ -package com.linkedin.openhouse.optimizer.entity; - -import java.time.Instant; -import javax.persistence.Column; -import javax.persistence.Entity; -import javax.persistence.Id; -import javax.persistence.Table; -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.Getter; -import lombok.NoArgsConstructor; -import lombok.Setter; - -/** JPA entity mapping to the {@code table_operations} table in the optimizer DB. */ -@Entity -@Table(name = "table_operations") -@Getter -@Setter -@Builder -@NoArgsConstructor -@AllArgsConstructor -public class TableOperationsRow { - - @Id - @Column(name = "id", nullable = false, length = 36) - private String id; - - @Column(name = "table_uuid", nullable = false, length = 36) - private String tableUuid; - - @Column(name = "database_name", nullable = false, length = 128) - private String databaseName; - - @Column(name = "table_name", nullable = false, length = 128) - private String tableName; - - @Column(name = "operation_type", nullable = false, length = 50) - private String operationType; - - @Column(name = "status", nullable = false, length = 20) - private String status; - - @Column(name = "created_at") - private Instant createdAt; - - @Column(name = "scheduled_at") - private Instant scheduledAt; - - @Column(name = "job_id", length = 255) - private String jobId; - - /** - * Monotonically-increasing version for application-level optimistic concurrency control. The - * scheduler's CAS transitions (e.g. {@code markScheduling}, {@code markScheduled}) match this - * value in the WHERE clause and bump it by one on UPDATE, ensuring two scheduler instances can't - * both move the same row out of PENDING. Not managed by JPA optimistic locking — kept as a plain - * column so the WHERE-clause-based CAS pattern works portably across MySQL and H2. - */ - @Column(name = "version") - private Long version; -} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java deleted file mode 100644 index 6f41881d6..000000000 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java +++ /dev/null @@ -1,61 +0,0 @@ -package com.linkedin.openhouse.optimizer.entity; - -import com.linkedin.openhouse.optimizer.model.TableStats; -import com.vladmihalcea.hibernate.type.json.JsonStringType; -import java.time.Instant; -import javax.persistence.Column; -import javax.persistence.Entity; -import javax.persistence.Id; -import javax.persistence.Index; -import javax.persistence.Table; -import lombok.AccessLevel; -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.EqualsAndHashCode; -import lombok.Getter; -import lombok.NoArgsConstructor; -import org.hibernate.annotations.Type; -import org.hibernate.annotations.TypeDef; - -/** - * Append-only record of per-commit stats reported by the Tables Service. - * - *

Each Iceberg commit produces one row. The {@code stats} JSON contains both the snapshot - * metrics (point-in-time) and the commit delta (files added/deleted in this commit). Consumers - * query this table to reconstruct change rates over arbitrary time windows. - */ -@TypeDef(name = "json", typeClass = JsonStringType.class) -@Entity -@Table( - name = "table_stats_history", - indexes = { - @Index(name = "idx_tsh_table_uuid", columnList = "table_uuid"), - @Index(name = "idx_tsh_recorded_at", columnList = "recorded_at") - }) -@Getter -@EqualsAndHashCode -@Builder(toBuilder = true) -@NoArgsConstructor(access = AccessLevel.PROTECTED) -@AllArgsConstructor(access = AccessLevel.PROTECTED) -public class TableStatsHistoryRow { - - @Id - @Column(name = "id", nullable = false, length = 36) - private String id; - - @Column(name = "table_uuid", nullable = false, length = 36) - private String tableUuid; - - @Column(name = "database_name", nullable = false, length = 128) - private String databaseName; - - @Column(name = "table_name", nullable = false, length = 128) - private String tableName; - - @Type(type = "json") - @Column(name = "stats", columnDefinition = "TEXT") - private TableStats stats; - - @Column(name = "recorded_at", nullable = false) - private Instant recordedAt; -} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java deleted file mode 100644 index bc647d86e..000000000 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java +++ /dev/null @@ -1,53 +0,0 @@ -package com.linkedin.openhouse.optimizer.entity; - -import com.linkedin.openhouse.optimizer.model.TableStats; -import com.vladmihalcea.hibernate.type.json.JsonStringType; -import java.time.Instant; -import java.util.Map; -import javax.persistence.Column; -import javax.persistence.Entity; -import javax.persistence.Id; -import javax.persistence.Table; -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.Getter; -import lombok.NoArgsConstructor; -import lombok.Setter; -import org.hibernate.annotations.Type; -import org.hibernate.annotations.TypeDef; - -/** - * JPA entity for the optimizer {@code table_stats} table. Written by the Tables Service on every - * Iceberg commit; read by the Analyzer and Scheduler directly via JPA. - */ -@TypeDef(name = "json", typeClass = JsonStringType.class) -@Entity -@Table(name = "table_stats") -@Getter -@Setter -@Builder -@NoArgsConstructor -@AllArgsConstructor -public class TableStatsRow { - - @Id - @Column(name = "table_uuid", nullable = false, length = 36) - private String tableUuid; - - @Column(name = "database_name", nullable = false, length = 128) - private String databaseName; - - @Column(name = "table_name", nullable = false, length = 128) - private String tableName; - - @Type(type = "json") - @Column(name = "stats", columnDefinition = "TEXT") - private TableStats stats; - - @Type(type = "json") - @Column(name = "table_properties", columnDefinition = "TEXT") - private Map tableProperties; - - @Column(name = "updated_at", nullable = false) - private Instant updatedAt; -} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java deleted file mode 100644 index f8fe90b0c..000000000 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java +++ /dev/null @@ -1,52 +0,0 @@ -package com.linkedin.openhouse.optimizer.repository; - -import com.linkedin.openhouse.optimizer.entity.TableOperationsHistoryRow; -import java.time.Instant; -import java.util.List; -import org.springframework.data.domain.Pageable; -import org.springframework.data.jpa.repository.JpaRepository; -import org.springframework.data.jpa.repository.Query; -import org.springframework.data.repository.query.Param; - -/** Repository for reading {@code table_operations_history} in the Analyzer. */ -public interface TableOperationsHistoryRepository - extends JpaRepository { - - /** - * Return history rows matching the given filters, ordered by {@code completedAt} descending. - * Every parameter is optional — pass {@code null} to skip that filter. - */ - @Query( - "SELECT r FROM TableOperationsHistoryRow r " - + "WHERE (:operationType IS NULL OR r.operationType = :operationType) " - + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid) " - + "AND (:status IS NULL OR r.status = :status) " - + "AND (:since IS NULL OR r.completedAt >= :since) " - + "ORDER BY r.completedAt DESC") - List find( - @Param("operationType") String operationType, - @Param("tableUuid") String tableUuid, - @Param("status") String status, - @Param("since") Instant since, - Pageable pageable); - - /** - * Return the most-recent history row per {@code (table_uuid, operation_type)}, filtered to a - * single operation type. Used by the Analyzer to evaluate cadence without materializing every - * historical row. - * - *

The correlated subquery is portable across MySQL and H2 (MySQL mode). Backed by index {@code - * idx_toph_optype_uuid_completed (operation_type, table_uuid, completed_at)} on {@code - * table_operations_history}, the subquery becomes an index-only lookup per outer row. - * - *

Ties on {@code completed_at} for the same {@code (table_uuid, operation_type)} return all - * tied rows; callers should dedupe in memory. - */ - @Query( - "SELECT r FROM TableOperationsHistoryRow r " - + "WHERE r.operationType = :operationType " - + "AND r.completedAt = (" - + " SELECT MAX(r2.completedAt) FROM TableOperationsHistoryRow r2 " - + " WHERE r2.tableUuid = r.tableUuid AND r2.operationType = r.operationType)") - List findLatestPerTable(@Param("operationType") String operationType); -} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java deleted file mode 100644 index c7a08cabc..000000000 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java +++ /dev/null @@ -1,29 +0,0 @@ -package com.linkedin.openhouse.optimizer.repository; - -import com.linkedin.openhouse.optimizer.entity.TableOperationsRow; -import java.util.List; -import org.springframework.data.jpa.repository.JpaRepository; -import org.springframework.data.jpa.repository.Query; -import org.springframework.data.repository.query.Param; - -/** Spring Data JPA repository for {@code table_operations} rows in the optimizer DB. */ -public interface TableOperationsRepository extends JpaRepository { - - /** - * Return operations matching the given filters. Every parameter is optional — pass {@code null} - * to skip that filter. - */ - @Query( - "SELECT r FROM TableOperationsRow r " - + "WHERE (:operationType IS NULL OR r.operationType = :operationType) " - + "AND (:status IS NULL OR r.status = :status) " - + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid) " - + "AND (:databaseName IS NULL OR r.databaseName = :databaseName) " - + "AND (:tableName IS NULL OR r.tableName = :tableName)") - List find( - @Param("operationType") String operationType, - @Param("status") String status, - @Param("tableUuid") String tableUuid, - @Param("databaseName") String databaseName, - @Param("tableName") String tableName); -} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java deleted file mode 100644 index aaa1b0050..000000000 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java +++ /dev/null @@ -1,29 +0,0 @@ -package com.linkedin.openhouse.optimizer.repository; - -import com.linkedin.openhouse.optimizer.entity.TableStatsHistoryRow; -import java.time.Instant; -import java.util.List; -import org.springframework.data.domain.Pageable; -import org.springframework.data.jpa.repository.JpaRepository; -import org.springframework.data.jpa.repository.Query; -import org.springframework.data.repository.query.Param; - -/** Append-only repository for per-commit stats history rows. */ -public interface TableStatsHistoryRepository extends JpaRepository { - - /** - * Return history rows for a table, newest first. Pass {@code null} for {@code since} to skip the - * time filter. - * - * @param tableUuid the stable table UUID - * @param since inclusive lower bound on recorded_at; {@code null} to skip - * @param pageable use {@code PageRequest.of(0, limit)} to cap results - */ - @Query( - "SELECT r FROM TableStatsHistoryRow r " - + "WHERE r.tableUuid = :tableUuid " - + "AND (:since IS NULL OR r.recordedAt >= :since) " - + "ORDER BY r.recordedAt DESC") - List find( - @Param("tableUuid") String tableUuid, @Param("since") Instant since, Pageable pageable); -} diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java b/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java deleted file mode 100644 index 4215237bc..000000000 --- a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java +++ /dev/null @@ -1,33 +0,0 @@ -package com.linkedin.openhouse.optimizer.repository; - -import com.linkedin.openhouse.optimizer.entity.TableStatsRow; -import java.util.List; -import org.springframework.data.jpa.repository.JpaRepository; -import org.springframework.data.jpa.repository.Query; -import org.springframework.data.repository.query.Param; - -/** Spring Data JPA repository for {@code table_stats} rows in the optimizer DB. */ -public interface TableStatsRepository extends JpaRepository { - - /** - * Return stats rows matching the given filters. Every parameter is optional — pass {@code null} - * to skip that filter. - */ - @Query( - "SELECT r FROM TableStatsRow r " - + "WHERE (:databaseName IS NULL OR r.databaseName = :databaseName) " - + "AND (:tableName IS NULL OR r.tableName = :tableName) " - + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid)") - List find( - @Param("databaseName") String databaseName, - @Param("tableName") String tableName, - @Param("tableUuid") String tableUuid); - - /** - * Return the distinct {@code database_name} values present in {@code table_stats}. Used by the - * Analyzer to enumerate databases when iterating per-db; the result set size is bounded by the - * number of databases (small even at million-table scale). - */ - @Query("SELECT DISTINCT r.databaseName FROM TableStatsRow r") - List findDistinctDatabaseNames(); -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/mapper/OptimizerMapper.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/mapper/OptimizerMapper.java index 8c0b17462..ddf33a30f 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/mapper/OptimizerMapper.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/mapper/OptimizerMapper.java @@ -1,5 +1,11 @@ package com.linkedin.openhouse.optimizer.api.mapper; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.linkedin.openhouse.optimizer.api.model.HistoryStatus; +import com.linkedin.openhouse.optimizer.api.model.JobResult; +import com.linkedin.openhouse.optimizer.api.model.OperationStatus; +import com.linkedin.openhouse.optimizer.api.model.OperationType; import com.linkedin.openhouse.optimizer.api.model.TableOperationsDto; import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; import com.linkedin.openhouse.optimizer.api.model.TableStatsDto; @@ -14,10 +20,15 @@ * MapStruct mapper for converting between optimizer JPA entities and their corresponding DTOs. * *

Spring-instantiated at compile time. Inject via {@code @Autowired} or constructor injection. + * + *

Type-conversion helpers bridge the entity's raw String/JSON shape (the entities use Strings at + * the JPA boundary for portability) and the wire DTO's typed enums and nested objects. */ @Mapper(componentModel = "spring") public interface OptimizerMapper { + ObjectMapper JSON = new ObjectMapper(); + /** Map a {@link TableOperationsRow} to its DTO. */ TableOperationsDto toDto(TableOperationsRow row); @@ -29,4 +40,52 @@ public interface OptimizerMapper { /** Map a {@link TableStatsHistoryRow} to its DTO. */ TableStatsHistoryDto toDto(TableStatsHistoryRow row); + + // --- entity String ↔ wire enum/object helpers --- + + default OperationType toOperationType(String value) { + return value == null ? null : OperationType.valueOf(value); + } + + default String fromOperationType(OperationType value) { + return value == null ? null : value.name(); + } + + default OperationStatus toOperationStatus(String value) { + return value == null ? null : OperationStatus.valueOf(value); + } + + default String fromOperationStatus(OperationStatus value) { + return value == null ? null : value.name(); + } + + default HistoryStatus toHistoryStatus(String value) { + return value == null ? null : HistoryStatus.valueOf(value); + } + + default String fromHistoryStatus(HistoryStatus value) { + return value == null ? null : value.name(); + } + + default JobResult toJobResult(String json) { + if (json == null) { + return null; + } + try { + return JSON.readValue(json, JobResult.class); + } catch (JsonProcessingException e) { + throw new IllegalStateException("Failed to parse JobResult JSON from DB", e); + } + } + + default String fromJobResult(JobResult value) { + if (value == null) { + return null; + } + try { + return JSON.writeValueAsString(value); + } catch (JsonProcessingException e) { + throw new IllegalStateException("Failed to serialize JobResult to JSON", e); + } + } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java index 35f7ba782..4f3f6535a 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java @@ -18,7 +18,7 @@ public class CompleteOperationRequest { /** Outcome of the operation. */ - private OperationHistoryStatus status; + private HistoryStatus status; /** Error details on failure; {@code null} on success. */ private JobResult result; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationHistoryStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatus.java similarity index 78% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationHistoryStatus.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatus.java index 791d910a6..2fbcf6235 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationHistoryStatus.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatus.java @@ -1,7 +1,7 @@ package com.linkedin.openhouse.optimizer.api.model; /** Terminal states for a completed Spark maintenance job. */ -public enum OperationHistoryStatus { +public enum HistoryStatus { SUCCESS, FAILED } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java index 2a901ad2b..a7a9d9dc6 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java @@ -27,7 +27,7 @@ public class TableOperationsHistoryDto { private Instant completedAt; /** {@code SUCCESS} or {@code FAILED}. */ - private OperationHistoryStatus status; + private HistoryStatus status; /** Job ID from the Jobs Service. */ private String jobId; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java deleted file mode 100644 index 64c99061a..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java +++ /dev/null @@ -1,50 +0,0 @@ -package com.linkedin.openhouse.optimizer.api.model; - -import com.fasterxml.jackson.annotation.JsonIgnoreProperties; -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.Data; -import lombok.NoArgsConstructor; - -/** Combined stats payload stored as a single JSON blob per table. */ -@Data -@Builder(toBuilder = true) -@NoArgsConstructor -@AllArgsConstructor -@JsonIgnoreProperties(ignoreUnknown = true) -public class TableStats { - - /** Snapshot fields — overwritten on every upsert. */ - private SnapshotMetrics snapshot; - - /** Delta fields — accumulated across commit events. */ - private CommitDelta delta; - - /** Point-in-time metadata read from Iceberg at scan time. */ - @Data - @Builder(toBuilder = true) - @NoArgsConstructor - @AllArgsConstructor - @JsonIgnoreProperties(ignoreUnknown = true) - public static class SnapshotMetrics { - private String clusterId; - private String tableVersion; - private String tableLocation; - private Long tableSizeBytes; - /** Total number of data files as of the latest snapshot — used for bin-packing. */ - private Long numCurrentFiles; - } - - /** Per-commit incremental counters; accumulated across all recorded commit events. */ - @Data - @Builder(toBuilder = true) - @NoArgsConstructor - @AllArgsConstructor - @JsonIgnoreProperties(ignoreUnknown = true) - public static class CommitDelta { - private Long numFilesAdded; - private Long numFilesDeleted; - private Long addedSizeBytes; - private Long deletedSizeBytes; - } -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java index 81dd6b802..4aad1e18f 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java @@ -1,5 +1,6 @@ package com.linkedin.openhouse.optimizer.api.model; +import com.linkedin.openhouse.optimizer.model.TableStats; import java.time.Instant; import java.util.Map; import lombok.AllArgsConstructor; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java index 4a994fdb3..6d515a543 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java @@ -1,5 +1,6 @@ package com.linkedin.openhouse.optimizer.api.model; +import com.linkedin.openhouse.optimizer.model.TableStats; import java.time.Instant; import lombok.AllArgsConstructor; import lombok.Builder; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java index 02290bad5..8bb317676 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java @@ -1,5 +1,6 @@ package com.linkedin.openhouse.optimizer.api.model; +import com.linkedin.openhouse.optimizer.model.TableStats; import java.util.Map; import lombok.AllArgsConstructor; import lombok.Builder; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/JobResultConverter.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/JobResultConverter.java deleted file mode 100644 index 4c9bfbe76..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/JobResultConverter.java +++ /dev/null @@ -1,39 +0,0 @@ -package com.linkedin.openhouse.optimizer.config; - -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.linkedin.openhouse.optimizer.api.model.JobResult; -import java.io.IOException; -import javax.persistence.AttributeConverter; -import javax.persistence.Converter; - -/** JPA {@link AttributeConverter} that serializes {@link JobResult} to/from a JSON string. */ -@Converter -public class JobResultConverter implements AttributeConverter { - - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - - @Override - public String convertToDatabaseColumn(JobResult attribute) { - if (attribute == null) { - return null; - } - try { - return OBJECT_MAPPER.writeValueAsString(attribute); - } catch (JsonProcessingException e) { - throw new IllegalStateException("Failed to serialize JobResult to JSON", e); - } - } - - @Override - public JobResult convertToEntityAttribute(String dbData) { - if (dbData == null) { - return null; - } - try { - return OBJECT_MAPPER.readValue(dbData, JobResult.class); - } catch (IOException e) { - throw new IllegalStateException("Failed to deserialize JobResult from JSON: " + dbData, e); - } - } -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java index 3b6ced892..09eb7fc21 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java @@ -1,52 +1,24 @@ package com.linkedin.openhouse.optimizer.entity; -import com.linkedin.openhouse.optimizer.api.model.JobResult; -import com.linkedin.openhouse.optimizer.api.model.OperationHistoryStatus; -import com.linkedin.openhouse.optimizer.api.model.OperationType; -import com.linkedin.openhouse.optimizer.config.JobResultConverter; import java.time.Instant; import javax.persistence.Column; -import javax.persistence.Convert; import javax.persistence.Entity; -import javax.persistence.EnumType; -import javax.persistence.Enumerated; import javax.persistence.Id; -import javax.persistence.Index; import javax.persistence.Table; -import lombok.AccessLevel; import lombok.AllArgsConstructor; import lombok.Builder; -import lombok.EqualsAndHashCode; import lombok.Getter; import lombok.NoArgsConstructor; -/** - * Append-only record of a completed maintenance operation. - * - *

Written when the operation-complete endpoint is called. The {@code id} is the same UUID as the - * originating {@code table_operations.id}, tying each history entry back to the operation cycle - * that produced it. Multiple runs of the same operation on the same table produce multiple rows - * (each cycle gets a new UUID from the Analyzer). - */ +/** Lightweight JPA entity for reading {@code table_operations_history} rows. */ @Entity -@Table( - name = "table_operations_history", - indexes = { - @Index(name = "idx_table_uuid_hist", columnList = "table_uuid"), - @Index(name = "idx_op_type_hist", columnList = "operation_type"), - @Index(name = "idx_completed_at", columnList = "completed_at"), - @Index(name = "idx_status_hist", columnList = "status"), - @Index(name = "idx_job_id", columnList = "job_id"), - @Index(name = "idx_toph_db_table", columnList = "database_name, table_name") - }) +@Table(name = "table_operations_history") @Getter -@EqualsAndHashCode -@Builder(toBuilder = true) -@NoArgsConstructor(access = AccessLevel.PROTECTED) -@AllArgsConstructor(access = AccessLevel.PROTECTED) +@Builder +@NoArgsConstructor +@AllArgsConstructor public class TableOperationsHistoryRow { - /** Same UUID as the originating {@code table_operations.id}. Set by the caller; not generated. */ @Id @Column(name = "id", nullable = false, length = 36) private String id; @@ -60,25 +32,18 @@ public class TableOperationsHistoryRow { @Column(name = "table_name", nullable = false, length = 128) private String tableName; - @Enumerated(EnumType.STRING) @Column(name = "operation_type", nullable = false, length = 50) - private OperationType operationType; + private String operationType; - /** When the operation completed, as recorded by the complete endpoint. */ @Column(name = "completed_at", nullable = false) private Instant completedAt; - /** {@code SUCCESS} or {@code FAILED}. */ - @Enumerated(EnumType.STRING) @Column(name = "status", nullable = false, length = 20) - private OperationHistoryStatus status; + private String status; - /** Spark job ID; indexed for job → result lookups. */ @Column(name = "job_id", length = 255) private String jobId; - /** Job result: error details on failure, both fields null on success. */ - @Convert(converter = JobResultConverter.class) - @Column(name = "result") - private JobResult result; + @Column(name = "result", columnDefinition = "TEXT") + private String result; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java index 43778495a..0e23761ae 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java @@ -1,55 +1,30 @@ package com.linkedin.openhouse.optimizer.entity; -import com.linkedin.openhouse.optimizer.api.model.OperationStatus; -import com.linkedin.openhouse.optimizer.api.model.OperationType; import java.time.Instant; import javax.persistence.Column; import javax.persistence.Entity; -import javax.persistence.EnumType; -import javax.persistence.Enumerated; import javax.persistence.Id; -import javax.persistence.Index; import javax.persistence.Table; -import lombok.AccessLevel; import lombok.AllArgsConstructor; import lombok.Builder; -import lombok.EqualsAndHashCode; import lombok.Getter; import lombok.NoArgsConstructor; +import lombok.Setter; -/** - * JPA entity representing an Analyzer recommendation for a table maintenance operation. - * - *

Each row is identified by a client-generated UUID ({@code id}). The Analyzer creates a new row - * when it first recommends an operation for a table, or when re-recommending after a prior terminal - * state (SUCCESS/FAILED). Old terminal rows accumulate — they serve as implicit history. {@code - * table_uuid} is the stable identity for the table (survives renames; rotates on drop+recreate). - * The application enforces one active (PENDING or SCHEDULED) row per {@code (table_uuid, - * operation_type)} at a time. - */ +/** JPA entity mapping to the {@code table_operations} table in the optimizer DB. */ @Entity -@Table( - name = "table_operations", - indexes = { - @Index(name = "idx_table_uuid", columnList = "table_uuid"), - @Index(name = "idx_op_type", columnList = "operation_type"), - @Index(name = "idx_status", columnList = "status"), - @Index(name = "idx_created_at", columnList = "created_at"), - @Index(name = "idx_scheduled_at", columnList = "scheduled_at") - }) +@Table(name = "table_operations") @Getter -@EqualsAndHashCode -@Builder(toBuilder = true) -@NoArgsConstructor(access = AccessLevel.PROTECTED) -@AllArgsConstructor(access = AccessLevel.PROTECTED) +@Setter +@Builder +@NoArgsConstructor +@AllArgsConstructor public class TableOperationsRow { - /** Client-generated UUID identifying this specific operation recommendation. */ @Id @Column(name = "id", nullable = false, length = 36) private String id; - /** Stable table identity from the Tables Service. Survives renames; rotates on drop+recreate. */ @Column(name = "table_uuid", nullable = false, length = 36) private String tableUuid; @@ -59,29 +34,27 @@ public class TableOperationsRow { @Column(name = "table_name", nullable = false, length = 128) private String tableName; - @Enumerated(EnumType.STRING) @Column(name = "operation_type", nullable = false, length = 50) - private OperationType operationType; + private String operationType; - @Enumerated(EnumType.STRING) @Column(name = "status", nullable = false, length = 20) - private OperationStatus status; + private String status; - /** When the Analyzer first created this row. Set by the service on insert; never updated. */ - @Column(name = "created_at", nullable = false) + @Column(name = "created_at") private Instant createdAt; - /** Set when the operation is claimed; {@code null} while {@code PENDING}. */ @Column(name = "scheduled_at") private Instant scheduledAt; - /** Job ID returned by the Jobs Service after successful submission. */ @Column(name = "job_id", length = 255) private String jobId; /** - * Manual optimistic lock for the Scheduler claim. Incremented by the raw {@code claimOperation} - * UPDATE query; must NOT use JPA {@code @Version} since the claim bypasses JPA entity management. + * Monotonically-increasing version for application-level optimistic concurrency control. The + * scheduler's CAS transitions (e.g. {@code markScheduling}, {@code markScheduled}) match this + * value in the WHERE clause and bump it by one on UPDATE, ensuring two scheduler instances can't + * both move the same row out of PENDING. Not managed by JPA optimistic locking — kept as a plain + * column so the WHERE-clause-based CAS pattern works portably across MySQL and H2. */ @Column(name = "version") private Long version; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java index b0d92fc81..6f41881d6 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java @@ -1,6 +1,6 @@ package com.linkedin.openhouse.optimizer.entity; -import com.linkedin.openhouse.optimizer.api.model.TableStats; +import com.linkedin.openhouse.optimizer.model.TableStats; import com.vladmihalcea.hibernate.type.json.JsonStringType; import java.time.Instant; import javax.persistence.Column; @@ -21,7 +21,7 @@ * Append-only record of per-commit stats reported by the Tables Service. * *

Each Iceberg commit produces one row. The {@code stats} JSON contains both the snapshot - * metrics (point-in-time) and the commit delta (files added/deleted in this commit). Consumers can + * metrics (point-in-time) and the commit delta (files added/deleted in this commit). Consumers * query this table to reconstruct change rates over arbitrary time windows. */ @TypeDef(name = "json", typeClass = JsonStringType.class) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java index f682a3485..bc647d86e 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java @@ -1,6 +1,6 @@ package com.linkedin.openhouse.optimizer.entity; -import com.linkedin.openhouse.optimizer.api.model.TableStats; +import com.linkedin.openhouse.optimizer.model.TableStats; import com.vladmihalcea.hibernate.type.json.JsonStringType; import java.time.Instant; import java.util.Map; @@ -8,29 +8,26 @@ import javax.persistence.Entity; import javax.persistence.Id; import javax.persistence.Table; -import lombok.AccessLevel; import lombok.AllArgsConstructor; import lombok.Builder; -import lombok.EqualsAndHashCode; import lombok.Getter; import lombok.NoArgsConstructor; +import lombok.Setter; import org.hibernate.annotations.Type; import org.hibernate.annotations.TypeDef; /** - * JPA entity representing a per-table stats snapshot in the optimizer DB. - * - *

Written by the Tables Service on every Iceberg commit. Read by the Analyzer directly via JPA - * to enumerate tables and check scheduling eligibility. + * JPA entity for the optimizer {@code table_stats} table. Written by the Tables Service on every + * Iceberg commit; read by the Analyzer and Scheduler directly via JPA. */ @TypeDef(name = "json", typeClass = JsonStringType.class) @Entity @Table(name = "table_stats") @Getter -@EqualsAndHashCode -@Builder(toBuilder = true) -@NoArgsConstructor(access = AccessLevel.PROTECTED) -@AllArgsConstructor(access = AccessLevel.PROTECTED) +@Setter +@Builder +@NoArgsConstructor +@AllArgsConstructor public class TableStatsRow { @Id @@ -51,7 +48,6 @@ public class TableStatsRow { @Column(name = "table_properties", columnDefinition = "TEXT") private Map tableProperties; - /** Set on every upsert. Used for stats pipeline staleness monitoring. */ @Column(name = "updated_at", nullable = false) private Instant updatedAt; } diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java similarity index 100% rename from apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java similarity index 100% rename from apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java similarity index 100% rename from apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java similarity index 100% rename from apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java similarity index 100% rename from apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java diff --git a/apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java similarity index 100% rename from apps/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java index 65d62818c..ba2ce35a8 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java @@ -1,45 +1,40 @@ package com.linkedin.openhouse.optimizer.repository; -import com.linkedin.openhouse.optimizer.api.model.OperationHistoryStatus; -import com.linkedin.openhouse.optimizer.api.model.OperationType; import com.linkedin.openhouse.optimizer.entity.TableOperationsHistoryRow; -import java.time.Instant; import java.util.List; import org.springframework.data.domain.Pageable; import org.springframework.data.jpa.repository.JpaRepository; import org.springframework.data.jpa.repository.Query; import org.springframework.data.repository.query.Param; -import org.springframework.stereotype.Repository; -/** - * Repository for {@link TableOperationsHistoryRow}. Append-only; PK is the UUID set by the caller - * (same UUID as the originating {@code table_operations.id}). - */ -@Repository +/** Repository for reading {@code table_operations_history}. */ public interface TableOperationsHistoryRepository extends JpaRepository { /** - * Return history rows matching the given filters, ordered by {@code completedAt} descending. - * Every parameter is optional — pass {@code null} to skip that filter. + * Return history rows for a single {@code tableUuid}, newest first. Used by the service-layer + * {@code getHistory} endpoint. + */ + List findByTableUuidOrderByCompletedAtDesc( + String tableUuid, Pageable pageable); + + /** + * Return the most-recent history row per {@code (table_uuid, operation_type)}, filtered to a + * single operation type. Used by the analyzer to evaluate cadence without materializing every + * historical row. + * + *

The correlated subquery is portable across MySQL and H2 (MySQL mode). Backed by index {@code + * idx_toph_optype_uuid_completed (operation_type, table_uuid, completed_at)} on {@code + * table_operations_history}, the subquery becomes an index-only lookup per outer row. + * + *

Ties on {@code completed_at} for the same {@code (table_uuid, operation_type)} return all + * tied rows; callers should dedupe in memory. */ @Query( "SELECT r FROM TableOperationsHistoryRow r " - + "WHERE (:databaseName IS NULL OR r.databaseName = :databaseName) " - + "AND (:tableName IS NULL OR r.tableName = :tableName) " - + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid) " - + "AND (:operationType IS NULL OR r.operationType = :operationType) " - + "AND (:status IS NULL OR r.status = :status) " - + "AND (:since IS NULL OR r.completedAt >= :since) " - + "AND (:until IS NULL OR r.completedAt <= :until) " - + "ORDER BY r.completedAt DESC") - List find( - @Param("databaseName") String databaseName, - @Param("tableName") String tableName, - @Param("tableUuid") String tableUuid, - @Param("operationType") OperationType operationType, - @Param("status") OperationHistoryStatus status, - @Param("since") Instant since, - @Param("until") Instant until, - Pageable pageable); + + "WHERE r.operationType = :operationType " + + "AND r.completedAt = (" + + " SELECT MAX(r2.completedAt) FROM TableOperationsHistoryRow r2 " + + " WHERE r2.tableUuid = r.tableUuid AND r2.operationType = r.operationType)") + List findLatestPerTable(@Param("operationType") String operationType); } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java index 891322134..c7a08cabc 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java @@ -1,33 +1,29 @@ package com.linkedin.openhouse.optimizer.repository; -import com.linkedin.openhouse.optimizer.api.model.OperationStatus; -import com.linkedin.openhouse.optimizer.api.model.OperationType; import com.linkedin.openhouse.optimizer.entity.TableOperationsRow; import java.util.List; import org.springframework.data.jpa.repository.JpaRepository; import org.springframework.data.jpa.repository.Query; import org.springframework.data.repository.query.Param; -import org.springframework.stereotype.Repository; -/** Repository for {@link TableOperationsRow}. PK is the client-generated UUID {@code id}. */ -@Repository +/** Spring Data JPA repository for {@code table_operations} rows in the optimizer DB. */ public interface TableOperationsRepository extends JpaRepository { /** * Return operations matching the given filters. Every parameter is optional — pass {@code null} - * to skip that filter. No filters returns all rows. + * to skip that filter. */ @Query( "SELECT r FROM TableOperationsRow r " + "WHERE (:operationType IS NULL OR r.operationType = :operationType) " + "AND (:status IS NULL OR r.status = :status) " + + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid) " + "AND (:databaseName IS NULL OR r.databaseName = :databaseName) " - + "AND (:tableName IS NULL OR r.tableName = :tableName) " - + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid)") + + "AND (:tableName IS NULL OR r.tableName = :tableName)") List find( - @Param("operationType") OperationType operationType, - @Param("status") OperationStatus status, + @Param("operationType") String operationType, + @Param("status") String status, + @Param("tableUuid") String tableUuid, @Param("databaseName") String databaseName, - @Param("tableName") String tableName, - @Param("tableUuid") String tableUuid); + @Param("tableName") String tableName); } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java index 9bcaab41b..4215237bc 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java @@ -6,12 +6,12 @@ import org.springframework.data.jpa.repository.Query; import org.springframework.data.repository.query.Param; -/** Spring Data JPA repository for reading and writing {@code table_stats} rows. */ +/** Spring Data JPA repository for {@code table_stats} rows in the optimizer DB. */ public interface TableStatsRepository extends JpaRepository { /** * Return stats rows matching the given filters. Every parameter is optional — pass {@code null} - * to skip that filter. No filters returns all rows. + * to skip that filter. */ @Query( "SELECT r FROM TableStatsRow r " @@ -22,4 +22,12 @@ List find( @Param("databaseName") String databaseName, @Param("tableName") String tableName, @Param("tableUuid") String tableUuid); + + /** + * Return the distinct {@code database_name} values present in {@code table_stats}. Used by the + * Analyzer to enumerate databases when iterating per-db; the result set size is bounded by the + * number of databases (small even at million-table scale). + */ + @Query("SELECT DISTINCT r.databaseName FROM TableStatsRow r") + List findDistinctDatabaseNames(); } diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java index b9735a617..436d08066 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java @@ -2,8 +2,7 @@ import static org.assertj.core.api.Assertions.assertThat; -import com.linkedin.openhouse.optimizer.api.model.JobResult; -import com.linkedin.openhouse.optimizer.api.model.OperationHistoryStatus; +import com.linkedin.openhouse.optimizer.api.model.HistoryStatus; import com.linkedin.openhouse.optimizer.api.model.OperationType; import com.linkedin.openhouse.optimizer.entity.TableOperationsHistoryRow; import java.time.Instant; @@ -24,7 +23,7 @@ class TableOperationsHistoryRepositoryTest { @Autowired TableOperationsHistoryRepository repository; @Test - void appendAndFindByTableUuid() { + void findByTableUuid_returnsRowsNewestFirst() { Instant t1 = Instant.parse("2024-01-01T10:00:00Z"); Instant t2 = Instant.parse("2024-01-02T10:00:00Z"); String tableUuid = UUID.randomUUID().toString(); @@ -35,9 +34,9 @@ void appendAndFindByTableUuid() { .tableUuid(tableUuid) .databaseName("db1") .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION) + .operationType(OperationType.ORPHAN_FILES_DELETION.name()) .completedAt(t1) - .status(OperationHistoryStatus.SUCCESS) + .status(HistoryStatus.SUCCESS.name()) .jobId("job-001") .build()); @@ -47,46 +46,23 @@ void appendAndFindByTableUuid() { .tableUuid(tableUuid) .databaseName("db1") .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION) + .operationType(OperationType.ORPHAN_FILES_DELETION.name()) .completedAt(t2) - .status(OperationHistoryStatus.FAILED) + .status(HistoryStatus.FAILED.name()) .jobId("job-002") - .result(JobResult.builder().errorMessage("out of memory").errorType("OOM").build()) + .result("{\"errorMessage\":\"out of memory\",\"errorType\":\"OOM\"}") .build()); List rows = - repository.find(null, null, tableUuid, null, null, null, null, PageRequest.of(0, 10)); + repository.findByTableUuidOrderByCompletedAtDesc(tableUuid, PageRequest.of(0, 10)); assertThat(rows).hasSize(2); - // Newest first assertThat(rows.get(0).getJobId()).isEqualTo("job-002"); assertThat(rows.get(1).getJobId()).isEqualTo("job-001"); } @Test - void appendIsNonDestructive_multipleRunsRetained() { - Instant now = Instant.now(); - String tableUuid = UUID.randomUUID().toString(); - for (int i = 0; i < 3; i++) { - repository.save( - TableOperationsHistoryRow.builder() - .id(UUID.randomUUID().toString()) - .tableUuid(tableUuid) - .databaseName("db1") - .tableName("tbl2") - .operationType(OperationType.ORPHAN_FILES_DELETION) - .completedAt(now.plusSeconds(i)) - .status(OperationHistoryStatus.SUCCESS) - .build()); - } - - List rows = - repository.find(null, null, tableUuid, null, null, null, null, PageRequest.of(0, 10)); - assertThat(rows).hasSize(3); - } - - @Test - void find_respectsLimit() { + void findByTableUuid_respectsLimit() { Instant now = Instant.now(); String tableUuid = UUID.randomUUID().toString(); for (int i = 0; i < 5; i++) { @@ -96,97 +72,62 @@ void find_respectsLimit() { .tableUuid(tableUuid) .databaseName("db1") .tableName("tbl3") - .operationType(OperationType.ORPHAN_FILES_DELETION) + .operationType(OperationType.ORPHAN_FILES_DELETION.name()) .completedAt(now.plusSeconds(i)) - .status(OperationHistoryStatus.SUCCESS) + .status(HistoryStatus.SUCCESS.name()) .build()); } List rows = - repository.find(null, null, tableUuid, null, null, null, null, PageRequest.of(0, 3)); + repository.findByTableUuidOrderByCompletedAtDesc(tableUuid, PageRequest.of(0, 3)); assertThat(rows).hasSize(3); } @Test - void find_noParams_returnsAll() { - Instant now = Instant.now(); - String uuid1 = UUID.randomUUID().toString(); - String uuid2 = UUID.randomUUID().toString(); + void findLatestPerTable_returnsOneRowPerTableUuid() { + Instant t1 = Instant.parse("2024-01-01T10:00:00Z"); + Instant t2 = Instant.parse("2024-02-01T10:00:00Z"); + String tableUuid = UUID.randomUUID().toString(); + String otherUuid = UUID.randomUUID().toString(); repository.save( TableOperationsHistoryRow.builder() .id(UUID.randomUUID().toString()) - .tableUuid(uuid1) + .tableUuid(tableUuid) .databaseName("db1") .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION) - .completedAt(now) - .status(OperationHistoryStatus.SUCCESS) + .operationType(OperationType.ORPHAN_FILES_DELETION.name()) + .completedAt(t1) + .status(HistoryStatus.SUCCESS.name()) .build()); - repository.save( - TableOperationsHistoryRow.builder() - .id(UUID.randomUUID().toString()) - .tableUuid(uuid2) - .databaseName("db2") - .tableName("tbl2") - .operationType(OperationType.ORPHAN_FILES_DELETION) - .completedAt(now.plusSeconds(1)) - .status(OperationHistoryStatus.FAILED) - .build()); - - List rows = - repository.find(null, null, null, null, null, null, null, PageRequest.of(0, 100)); - assertThat(rows).hasSize(2); - // Newest first - assertThat(rows.get(0).getStatus()).isEqualTo(OperationHistoryStatus.FAILED); - } - - @Test - void find_byStatusAndTimeWindow() { - Instant old = Instant.parse("2024-01-01T00:00:00Z"); - Instant recent = Instant.parse("2024-06-01T00:00:00Z"); - String tableUuid = UUID.randomUUID().toString(); - repository.save( TableOperationsHistoryRow.builder() .id(UUID.randomUUID().toString()) .tableUuid(tableUuid) .databaseName("db1") .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION) - .completedAt(old) - .status(OperationHistoryStatus.SUCCESS) + .operationType(OperationType.ORPHAN_FILES_DELETION.name()) + .completedAt(t2) + .status(HistoryStatus.FAILED.name()) .build()); repository.save( TableOperationsHistoryRow.builder() .id(UUID.randomUUID().toString()) - .tableUuid(tableUuid) + .tableUuid(otherUuid) .databaseName("db1") - .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION) - .completedAt(recent) - .status(OperationHistoryStatus.FAILED) + .tableName("tbl2") + .operationType(OperationType.ORPHAN_FILES_DELETION.name()) + .completedAt(t1) + .status(HistoryStatus.SUCCESS.name()) .build()); - // Filter by status - List failed = - repository.find( - null, - null, - null, - null, - OperationHistoryStatus.FAILED, - null, - null, - PageRequest.of(0, 100)); - assertThat(failed).hasSize(1); - assertThat(failed.get(0).getCompletedAt()).isEqualTo(recent); + List latest = + repository.findLatestPerTable(OperationType.ORPHAN_FILES_DELETION.name()); - // Filter by time window - Instant cutoff = Instant.parse("2024-03-01T00:00:00Z"); - List afterCutoff = - repository.find(null, null, null, null, null, cutoff, null, PageRequest.of(0, 100)); - assertThat(afterCutoff).hasSize(1); - assertThat(afterCutoff.get(0).getCompletedAt()).isEqualTo(recent); + assertThat(latest).hasSize(2); + TableOperationsHistoryRow forTarget = + latest.stream().filter(r -> r.getTableUuid().equals(tableUuid)).findFirst().orElseThrow(); + assertThat(forTarget.getCompletedAt()).isEqualTo(t2); + assertThat(forTarget.getStatus()).isEqualTo(HistoryStatus.FAILED.name()); } } diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java index b1342b12d..2ca8dc61e 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java @@ -32,8 +32,8 @@ void saveAndFindById() { .tableUuid(UUID.randomUUID().toString()) .databaseName("db1") .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION) - .status(OperationStatus.PENDING) + .operationType(OperationType.ORPHAN_FILES_DELETION.name()) + .status(OperationStatus.PENDING.name()) .createdAt(Instant.now()) .build(); @@ -41,7 +41,7 @@ void saveAndFindById() { Optional found = repository.findById(id); assertThat(found).isPresent(); - assertThat(found.get().getStatus()).isEqualTo(OperationStatus.PENDING); + assertThat(found.get().getStatus()).isEqualTo(OperationStatus.PENDING.name()); } @Test @@ -52,8 +52,8 @@ void find_noParams_returnsAll() { .tableUuid(UUID.randomUUID().toString()) .databaseName("db1") .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION) - .status(OperationStatus.PENDING) + .operationType(OperationType.ORPHAN_FILES_DELETION.name()) + .status(OperationStatus.PENDING.name()) .createdAt(Instant.now()) .build()); repository.save( @@ -62,8 +62,8 @@ void find_noParams_returnsAll() { .tableUuid(UUID.randomUUID().toString()) .databaseName("db1") .tableName("tbl2") - .operationType(OperationType.ORPHAN_FILES_DELETION) - .status(OperationStatus.SCHEDULED) + .operationType(OperationType.ORPHAN_FILES_DELETION.name()) + .status(OperationStatus.SCHEDULED.name()) .createdAt(Instant.now()) .build()); @@ -79,8 +79,8 @@ void find_byStatus() { .tableUuid(UUID.randomUUID().toString()) .databaseName("db1") .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION) - .status(OperationStatus.PENDING) + .operationType(OperationType.ORPHAN_FILES_DELETION.name()) + .status(OperationStatus.PENDING.name()) .createdAt(Instant.now()) .build()); repository.save( @@ -89,20 +89,20 @@ void find_byStatus() { .tableUuid(UUID.randomUUID().toString()) .databaseName("db1") .tableName("tbl2") - .operationType(OperationType.ORPHAN_FILES_DELETION) - .status(OperationStatus.SCHEDULED) + .operationType(OperationType.ORPHAN_FILES_DELETION.name()) + .status(OperationStatus.SCHEDULED.name()) .createdAt(Instant.now()) .build()); List pending = - repository.find(null, OperationStatus.PENDING, null, null, null); + repository.find(null, OperationStatus.PENDING.name(), null, null, null); assertThat(pending).hasSize(1); - assertThat(pending.get(0).getStatus()).isEqualTo(OperationStatus.PENDING); + assertThat(pending.get(0).getStatus()).isEqualTo(OperationStatus.PENDING.name()); List scheduled = - repository.find(null, OperationStatus.SCHEDULED, null, null, null); + repository.find(null, OperationStatus.SCHEDULED.name(), null, null, null); assertThat(scheduled).hasSize(1); - assertThat(scheduled.get(0).getStatus()).isEqualTo(OperationStatus.SCHEDULED); + assertThat(scheduled.get(0).getStatus()).isEqualTo(OperationStatus.SCHEDULED.name()); } @Test @@ -113,8 +113,8 @@ void find_byDatabaseAndTable() { .tableUuid(UUID.randomUUID().toString()) .databaseName("db1") .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION) - .status(OperationStatus.PENDING) + .operationType(OperationType.ORPHAN_FILES_DELETION.name()) + .status(OperationStatus.PENDING.name()) .createdAt(Instant.now()) .build()); repository.save( @@ -123,13 +123,13 @@ void find_byDatabaseAndTable() { .tableUuid(UUID.randomUUID().toString()) .databaseName("db2") .tableName("tbl2") - .operationType(OperationType.ORPHAN_FILES_DELETION) - .status(OperationStatus.PENDING) + .operationType(OperationType.ORPHAN_FILES_DELETION.name()) + .status(OperationStatus.PENDING.name()) .createdAt(Instant.now()) .build()); - assertThat(repository.find(null, null, "db1", null, null)).hasSize(1); - assertThat(repository.find(null, null, "db2", "tbl2", null)).hasSize(1); - assertThat(repository.find(null, null, "db1", "tbl2", null)).isEmpty(); + assertThat(repository.find(null, null, null, "db1", null)).hasSize(1); + assertThat(repository.find(null, null, null, "db2", "tbl2")).hasSize(1); + assertThat(repository.find(null, null, null, "db1", "tbl2")).isEmpty(); } } diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java index f3e72b52e..475196630 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java @@ -2,8 +2,8 @@ import static org.assertj.core.api.Assertions.assertThat; -import com.linkedin.openhouse.optimizer.api.model.TableStats; import com.linkedin.openhouse.optimizer.entity.TableStatsHistoryRow; +import com.linkedin.openhouse.optimizer.model.TableStats; import java.time.Instant; import java.time.temporal.ChronoUnit; import java.util.List; diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java index b62371f53..240d512ef 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java @@ -2,8 +2,8 @@ import static org.assertj.core.api.Assertions.assertThat; -import com.linkedin.openhouse.optimizer.api.model.TableStats; import com.linkedin.openhouse.optimizer.entity.TableStatsRow; +import com.linkedin.openhouse.optimizer.model.TableStats; import java.time.Instant; import java.util.Map; import java.util.Optional; diff --git a/settings.gradle b/settings.gradle index 0d64dad53..cad06785e 100644 --- a/settings.gradle +++ b/settings.gradle @@ -50,7 +50,6 @@ include ':services:common' include ':services:housetables' include ':services:jobs' include ':services:optimizer' -include ':apps:optimizer' include ':services:tables' include ':tables-test-fixtures:tables-test-fixtures-iceberg-1.2' include ':tables-test-fixtures:tables-test-fixtures-iceberg-1.5' From 17e280ffc661380017170646fb572f24a639cb79 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 10:09:48 -0700 Subject: [PATCH 040/104] refactor(optimizer): drop apps/optimizer-data dep; simplify history API Follow-up to the optimizer-1 module move: - services/optimizer/build.gradle: drop the now-dead `implementation project(':apps:optimizer-data')` (target module removed in the prior merge). - Restore services-side TableStatsHistoryRepository (lost in the merge because optimizer-1 did not touch its services-side copy, but optimizer-2's HEAD had removed it during the R7-5 consolidation). - Drop the multi-filter `listHistory` service method, its controller endpoint, and the standalone TableByNameController. Callers use `getHistory(tableUuid, limit)` which now uses the simplified `findByTableUuidOrderByCompletedAtDesc` derived query. - TableStatsRow: enable `@Builder(toBuilder = true)` so `upsertTableStats` can build from the existing row. --- services/optimizer/build.gradle | 1 - .../api/controller/TableByNameController.java | 44 ------------------- .../TableOperationsHistoryController.java | 30 ------------- .../optimizer/entity/TableStatsRow.java | 2 +- .../TableStatsHistoryRepository.java | 29 ++++++++++++ .../service/OptimizerDataService.java | 16 ------- .../service/OptimizerDataServiceImpl.java | 28 +----------- 7 files changed, 31 insertions(+), 119 deletions(-) delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableByNameController.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java diff --git a/services/optimizer/build.gradle b/services/optimizer/build.gradle index 31051b65c..c05c7f9c3 100644 --- a/services/optimizer/build.gradle +++ b/services/optimizer/build.gradle @@ -4,7 +4,6 @@ plugins { } dependencies { - implementation project(':apps:optimizer-data') implementation 'org.springframework.boot:spring-boot-starter-data-jpa:2.7.8' implementation 'com.vladmihalcea:hibernate-types-55:2.21.1' implementation 'org.springframework.boot:spring-boot-starter-web:2.7.8' diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableByNameController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableByNameController.java deleted file mode 100644 index e3582ff7e..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableByNameController.java +++ /dev/null @@ -1,44 +0,0 @@ -package com.linkedin.openhouse.optimizer.api.controller; - -import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; -import com.linkedin.openhouse.optimizer.service.OptimizerDataService; -import java.util.List; -import java.util.Optional; -import lombok.RequiredArgsConstructor; -import org.springframework.http.ResponseEntity; -import org.springframework.web.bind.annotation.GetMapping; -import org.springframework.web.bind.annotation.PathVariable; -import org.springframework.web.bind.annotation.RequestMapping; -import org.springframework.web.bind.annotation.RequestParam; -import org.springframework.web.bind.annotation.RestController; - -/** - * Name-keyed read endpoints for human/analyst convenience. UUID-keyed endpoints elsewhere remain - * the canonical path for machine callers, since drop-and-recreate of a table produces a new - * optimizer identity that a name-only lookup would conflate with the dropped table. - */ -@RestController -@RequestMapping("/v1/optimizer/databases/{databaseName}/tables/{tableName}") -@RequiredArgsConstructor -public class TableByNameController { - - private final OptimizerDataService service; - - /** Operation history for a table by (database, table) name, newest first. */ - @GetMapping("/operations-history") - public ResponseEntity> getOperationsHistoryByName( - @PathVariable String databaseName, - @PathVariable String tableName, - @RequestParam(defaultValue = "100") int limit) { - return ResponseEntity.ok( - service.listHistory( - Optional.of(databaseName), - Optional.of(tableName), - Optional.empty(), - Optional.empty(), - Optional.empty(), - Optional.empty(), - Optional.empty(), - limit)); - } -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java index b14156d5b..17dc0670a 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java @@ -1,12 +1,8 @@ package com.linkedin.openhouse.optimizer.api.controller; -import com.linkedin.openhouse.optimizer.api.model.HistoryStatus; -import com.linkedin.openhouse.optimizer.api.model.OperationType; import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; import com.linkedin.openhouse.optimizer.service.OptimizerDataService; -import java.time.Instant; import java.util.List; -import java.util.Optional; import lombok.RequiredArgsConstructor; import org.springframework.http.HttpStatus; import org.springframework.http.ResponseEntity; @@ -39,30 +35,4 @@ public ResponseEntity> getHistory( @PathVariable String tableUuid, @RequestParam(defaultValue = "100") int limit) { return ResponseEntity.ok(service.getHistory(tableUuid, limit)); } - - /** - * List history rows matching the given filters, ordered newest first. All parameters are optional - * — omit all to return every row up to {@code limit}. - */ - @GetMapping - public ResponseEntity> listHistory( - @RequestParam(required = false) String databaseName, - @RequestParam(required = false) String tableName, - @RequestParam(required = false) String tableUuid, - @RequestParam(required = false) OperationType operationType, - @RequestParam(required = false) HistoryStatus status, - @RequestParam(required = false) Instant since, - @RequestParam(required = false) Instant until, - @RequestParam(defaultValue = "100") int limit) { - return ResponseEntity.ok( - service.listHistory( - Optional.ofNullable(databaseName), - Optional.ofNullable(tableName), - Optional.ofNullable(tableUuid), - Optional.ofNullable(operationType), - Optional.ofNullable(status), - Optional.ofNullable(since), - Optional.ofNullable(until), - limit)); - } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java index bc647d86e..b1fad275c 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java @@ -25,7 +25,7 @@ @Table(name = "table_stats") @Getter @Setter -@Builder +@Builder(toBuilder = true) @NoArgsConstructor @AllArgsConstructor public class TableStatsRow { diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java new file mode 100644 index 000000000..aaa1b0050 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java @@ -0,0 +1,29 @@ +package com.linkedin.openhouse.optimizer.repository; + +import com.linkedin.openhouse.optimizer.entity.TableStatsHistoryRow; +import java.time.Instant; +import java.util.List; +import org.springframework.data.domain.Pageable; +import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.data.jpa.repository.Query; +import org.springframework.data.repository.query.Param; + +/** Append-only repository for per-commit stats history rows. */ +public interface TableStatsHistoryRepository extends JpaRepository { + + /** + * Return history rows for a table, newest first. Pass {@code null} for {@code since} to skip the + * time filter. + * + * @param tableUuid the stable table UUID + * @param since inclusive lower bound on recorded_at; {@code null} to skip + * @param pageable use {@code PageRequest.of(0, limit)} to cap results + */ + @Query( + "SELECT r FROM TableStatsHistoryRow r " + + "WHERE r.tableUuid = :tableUuid " + + "AND (:since IS NULL OR r.recordedAt >= :since) " + + "ORDER BY r.recordedAt DESC") + List find( + @Param("tableUuid") String tableUuid, @Param("since") Instant since, Pageable pageable); +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java index 2909b8b5a..6f71c708e 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java @@ -1,7 +1,6 @@ package com.linkedin.openhouse.optimizer.service; import com.linkedin.openhouse.optimizer.api.model.CompleteOperationRequest; -import com.linkedin.openhouse.optimizer.api.model.HistoryStatus; import com.linkedin.openhouse.optimizer.api.model.OperationStatus; import com.linkedin.openhouse.optimizer.api.model.OperationType; import com.linkedin.openhouse.optimizer.api.model.TableOperationsDto; @@ -82,19 +81,4 @@ List listTableStats( * @param limit maximum number of rows to return */ List getHistory(String tableUuid, int limit); - - /** - * List history rows matching the given filters, ordered newest first. Every parameter is optional - * — pass {@link Optional#empty()} to skip that filter. No filters returns all rows up to {@code - * limit}. - */ - List listHistory( - Optional databaseName, - Optional tableName, - Optional tableUuid, - Optional operationType, - Optional status, - Optional since, - Optional until, - int limit); } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java index 0c9af5107..93b9af2a0 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java @@ -2,7 +2,6 @@ import com.linkedin.openhouse.optimizer.api.mapper.OptimizerMapper; import com.linkedin.openhouse.optimizer.api.model.CompleteOperationRequest; -import com.linkedin.openhouse.optimizer.api.model.HistoryStatus; import com.linkedin.openhouse.optimizer.api.model.OperationStatus; import com.linkedin.openhouse.optimizer.api.model.OperationType; import com.linkedin.openhouse.optimizer.api.model.TableOperationsDto; @@ -177,32 +176,7 @@ public TableOperationsHistoryDto appendHistory(TableOperationsHistoryDto dto) { @Override public List getHistory(String tableUuid, int limit) { return historyRepository - .find(null, null, tableUuid, null, null, null, null, PageRequest.of(0, limit)).stream() - .map(mapper::toDto) - .collect(Collectors.toList()); - } - - @Override - public List listHistory( - Optional databaseName, - Optional tableName, - Optional tableUuid, - Optional operationType, - Optional status, - Optional since, - Optional until, - int limit) { - return historyRepository - .find( - databaseName.orElse(null), - tableName.orElse(null), - tableUuid.orElse(null), - operationType.map(OperationType::name).orElse(null), - status.map(HistoryStatus::name).orElse(null), - since.orElse(null), - until.orElse(null), - PageRequest.of(0, limit)) - .stream() + .findByTableUuidOrderByCompletedAtDesc(tableUuid, PageRequest.of(0, limit)).stream() .map(mapper::toDto) .collect(Collectors.toList()); } From b0898e3553eab6d54403793e9cda27aefa4309c9 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 10:10:52 -0700 Subject: [PATCH 041/104] refactor(optimizer-analyzer): depend on :services:optimizer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The `:apps:optimizer-data` module was removed in the prior optimizer-2 forward commit. The analyzer's JPA + model classes now live in `:services:optimizer`; package FQNs are unchanged, so no Java imports need updating — only the gradle dep. --- apps/optimizer-analyzer/build.gradle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/optimizer-analyzer/build.gradle b/apps/optimizer-analyzer/build.gradle index 84cc38857..f66ecc608 100644 --- a/apps/optimizer-analyzer/build.gradle +++ b/apps/optimizer-analyzer/build.gradle @@ -4,7 +4,7 @@ plugins { } dependencies { - implementation project(':apps:optimizer-data') + implementation project(':services:optimizer') implementation 'org.springframework.boot:spring-boot-starter:2.7.8' implementation 'org.springframework.boot:spring-boot-starter-webflux:2.7.8' implementation 'org.springframework.boot:spring-boot-starter-data-jpa:2.7.8' From 9a129a8ca7848bf1db15dbf05f847bc7c593e8ef Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 12:00:04 -0700 Subject: [PATCH 042/104] =?UTF-8?q?refactor(optimizer):=20align=20data=20m?= =?UTF-8?q?odel=20=E2=80=94=20rename=20HistoryStatus;=20String=20at=20JPA?= =?UTF-8?q?=20boundary?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Forward fix in response to review feedback that data-model decisions belong in this PR (optimizer-0), not in downstream stack layers. Brings the data-model end-state to where optimizer-1+ already are, so the optimizer-0..optimizer-1 diff is just repositories + wiring. - Rename api/model/OperationHistoryStatus → HistoryStatus. - Move api/model/TableStats → model/TableStats (the in-memory stats domain type is used by both entities and DTOs, so it lives in a neutral package rather than under api/model/). - Delete config/JobResultConverter. Entities now store the JobResult as a JSON String column directly; serialization happens at the wire-API boundary via OptimizerMapper helpers. - Switch the operation/status columns on TableOperationsRow and TableOperationsHistoryRow from JPA-bound enums to String. Keeps the entity layer decoupled from wire-API enum identity. - Add String↔OperationType, String↔OperationStatus, String↔HistoryStatus, and String↔JobResult default helpers to OptimizerMapper so MapStruct can bridge entity (String) and DTO (typed) columns. - Update DTO/entity imports to follow the renamed/moved types. --- .../optimizer/api/mapper/OptimizerMapper.java | 60 +++++++++++++++++++ .../api/model/CompleteOperationRequest.java | 2 +- ...nHistoryStatus.java => HistoryStatus.java} | 2 +- .../api/model/TableOperationsHistoryDto.java | 2 +- .../optimizer/api/model/TableStatsDto.java | 1 + .../api/model/TableStatsHistoryDto.java | 1 + .../api/model/UpsertTableStatsRequest.java | 1 + .../optimizer/config/JobResultConverter.java | 39 ------------ .../entity/TableOperationsHistoryRow.java | 25 ++++---- .../optimizer/entity/TableOperationsRow.java | 15 +++-- .../entity/TableStatsHistoryRow.java | 2 +- .../optimizer/entity/TableStatsRow.java | 2 +- .../optimizer/{api => }/model/TableStats.java | 2 +- 13 files changed, 86 insertions(+), 68 deletions(-) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/{OperationHistoryStatus.java => HistoryStatus.java} (78%) delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/JobResultConverter.java rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/{api => }/model/TableStats.java (96%) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/mapper/OptimizerMapper.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/mapper/OptimizerMapper.java index 8c0b17462..36d4b5f4b 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/mapper/OptimizerMapper.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/mapper/OptimizerMapper.java @@ -1,5 +1,11 @@ package com.linkedin.openhouse.optimizer.api.mapper; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.linkedin.openhouse.optimizer.api.model.HistoryStatus; +import com.linkedin.openhouse.optimizer.api.model.JobResult; +import com.linkedin.openhouse.optimizer.api.model.OperationStatus; +import com.linkedin.openhouse.optimizer.api.model.OperationType; import com.linkedin.openhouse.optimizer.api.model.TableOperationsDto; import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; import com.linkedin.openhouse.optimizer.api.model.TableStatsDto; @@ -14,10 +20,16 @@ * MapStruct mapper for converting between optimizer JPA entities and their corresponding DTOs. * *

Spring-instantiated at compile time. Inject via {@code @Autowired} or constructor injection. + * + *

Type-conversion helpers bridge the entity's raw String/JSON shape (the entities keep enum and + * structured-result columns as Strings to stay decoupled from wire-API identity) and the wire DTO's + * typed enums and nested objects. */ @Mapper(componentModel = "spring") public interface OptimizerMapper { + ObjectMapper JSON = new ObjectMapper(); + /** Map a {@link TableOperationsRow} to its DTO. */ TableOperationsDto toDto(TableOperationsRow row); @@ -29,4 +41,52 @@ public interface OptimizerMapper { /** Map a {@link TableStatsHistoryRow} to its DTO. */ TableStatsHistoryDto toDto(TableStatsHistoryRow row); + + // --- entity String ↔ wire enum/object helpers --- + + default OperationType toOperationType(String value) { + return value == null ? null : OperationType.valueOf(value); + } + + default String fromOperationType(OperationType value) { + return value == null ? null : value.name(); + } + + default OperationStatus toOperationStatus(String value) { + return value == null ? null : OperationStatus.valueOf(value); + } + + default String fromOperationStatus(OperationStatus value) { + return value == null ? null : value.name(); + } + + default HistoryStatus toHistoryStatus(String value) { + return value == null ? null : HistoryStatus.valueOf(value); + } + + default String fromHistoryStatus(HistoryStatus value) { + return value == null ? null : value.name(); + } + + default JobResult toJobResult(String json) { + if (json == null) { + return null; + } + try { + return JSON.readValue(json, JobResult.class); + } catch (JsonProcessingException e) { + throw new IllegalStateException("Failed to parse JobResult JSON from DB", e); + } + } + + default String fromJobResult(JobResult value) { + if (value == null) { + return null; + } + try { + return JSON.writeValueAsString(value); + } catch (JsonProcessingException e) { + throw new IllegalStateException("Failed to serialize JobResult to JSON", e); + } + } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java index 35f7ba782..4f3f6535a 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java @@ -18,7 +18,7 @@ public class CompleteOperationRequest { /** Outcome of the operation. */ - private OperationHistoryStatus status; + private HistoryStatus status; /** Error details on failure; {@code null} on success. */ private JobResult result; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationHistoryStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatus.java similarity index 78% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationHistoryStatus.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatus.java index 791d910a6..2fbcf6235 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationHistoryStatus.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatus.java @@ -1,7 +1,7 @@ package com.linkedin.openhouse.optimizer.api.model; /** Terminal states for a completed Spark maintenance job. */ -public enum OperationHistoryStatus { +public enum HistoryStatus { SUCCESS, FAILED } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java index 2a901ad2b..a7a9d9dc6 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java @@ -27,7 +27,7 @@ public class TableOperationsHistoryDto { private Instant completedAt; /** {@code SUCCESS} or {@code FAILED}. */ - private OperationHistoryStatus status; + private HistoryStatus status; /** Job ID from the Jobs Service. */ private String jobId; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java index 81dd6b802..4aad1e18f 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java @@ -1,5 +1,6 @@ package com.linkedin.openhouse.optimizer.api.model; +import com.linkedin.openhouse.optimizer.model.TableStats; import java.time.Instant; import java.util.Map; import lombok.AllArgsConstructor; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java index 4a994fdb3..6d515a543 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java @@ -1,5 +1,6 @@ package com.linkedin.openhouse.optimizer.api.model; +import com.linkedin.openhouse.optimizer.model.TableStats; import java.time.Instant; import lombok.AllArgsConstructor; import lombok.Builder; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java index 02290bad5..8bb317676 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java @@ -1,5 +1,6 @@ package com.linkedin.openhouse.optimizer.api.model; +import com.linkedin.openhouse.optimizer.model.TableStats; import java.util.Map; import lombok.AllArgsConstructor; import lombok.Builder; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/JobResultConverter.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/JobResultConverter.java deleted file mode 100644 index 4c9bfbe76..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/config/JobResultConverter.java +++ /dev/null @@ -1,39 +0,0 @@ -package com.linkedin.openhouse.optimizer.config; - -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.linkedin.openhouse.optimizer.api.model.JobResult; -import java.io.IOException; -import javax.persistence.AttributeConverter; -import javax.persistence.Converter; - -/** JPA {@link AttributeConverter} that serializes {@link JobResult} to/from a JSON string. */ -@Converter -public class JobResultConverter implements AttributeConverter { - - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - - @Override - public String convertToDatabaseColumn(JobResult attribute) { - if (attribute == null) { - return null; - } - try { - return OBJECT_MAPPER.writeValueAsString(attribute); - } catch (JsonProcessingException e) { - throw new IllegalStateException("Failed to serialize JobResult to JSON", e); - } - } - - @Override - public JobResult convertToEntityAttribute(String dbData) { - if (dbData == null) { - return null; - } - try { - return OBJECT_MAPPER.readValue(dbData, JobResult.class); - } catch (IOException e) { - throw new IllegalStateException("Failed to deserialize JobResult from JSON: " + dbData, e); - } - } -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java index 3b6ced892..8303a4579 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java @@ -1,15 +1,8 @@ package com.linkedin.openhouse.optimizer.entity; -import com.linkedin.openhouse.optimizer.api.model.JobResult; -import com.linkedin.openhouse.optimizer.api.model.OperationHistoryStatus; -import com.linkedin.openhouse.optimizer.api.model.OperationType; -import com.linkedin.openhouse.optimizer.config.JobResultConverter; import java.time.Instant; import javax.persistence.Column; -import javax.persistence.Convert; import javax.persistence.Entity; -import javax.persistence.EnumType; -import javax.persistence.Enumerated; import javax.persistence.Id; import javax.persistence.Index; import javax.persistence.Table; @@ -27,6 +20,11 @@ * originating {@code table_operations.id}, tying each history entry back to the operation cycle * that produced it. Multiple runs of the same operation on the same table produce multiple rows * (each cycle gets a new UUID from the Analyzer). + * + *

{@code operationType}, {@code status}, and {@code result} are stored as plain {@code String} + * (the last as a JSON blob) so the entity layer stays decoupled from the wire-API enum and + * structured-result types. The wire layer is responsible for converting at the boundary via {@link + * com.linkedin.openhouse.optimizer.api.mapper.OptimizerMapper}. */ @Entity @Table( @@ -60,25 +58,22 @@ public class TableOperationsHistoryRow { @Column(name = "table_name", nullable = false, length = 128) private String tableName; - @Enumerated(EnumType.STRING) @Column(name = "operation_type", nullable = false, length = 50) - private OperationType operationType; + private String operationType; /** When the operation completed, as recorded by the complete endpoint. */ @Column(name = "completed_at", nullable = false) private Instant completedAt; /** {@code SUCCESS} or {@code FAILED}. */ - @Enumerated(EnumType.STRING) @Column(name = "status", nullable = false, length = 20) - private OperationHistoryStatus status; + private String status; /** Spark job ID; indexed for job → result lookups. */ @Column(name = "job_id", length = 255) private String jobId; - /** Job result: error details on failure, both fields null on success. */ - @Convert(converter = JobResultConverter.class) - @Column(name = "result") - private JobResult result; + /** Job result JSON blob: error details on failure, both fields null on success. */ + @Column(name = "result", columnDefinition = "TEXT") + private String result; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java index 43778495a..5d90f3d12 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java @@ -1,12 +1,8 @@ package com.linkedin.openhouse.optimizer.entity; -import com.linkedin.openhouse.optimizer.api.model.OperationStatus; -import com.linkedin.openhouse.optimizer.api.model.OperationType; import java.time.Instant; import javax.persistence.Column; import javax.persistence.Entity; -import javax.persistence.EnumType; -import javax.persistence.Enumerated; import javax.persistence.Id; import javax.persistence.Index; import javax.persistence.Table; @@ -26,6 +22,11 @@ * table_uuid} is the stable identity for the table (survives renames; rotates on drop+recreate). * The application enforces one active (PENDING or SCHEDULED) row per {@code (table_uuid, * operation_type)} at a time. + * + *

{@code operationType} and {@code status} are stored as {@code String} rather than JPA-bound + * enums so the entity layer stays decoupled from the wire-API enum identity. The wire layer is + * responsible for converting at the boundary via {@link + * com.linkedin.openhouse.optimizer.api.mapper.OptimizerMapper}. */ @Entity @Table( @@ -59,13 +60,11 @@ public class TableOperationsRow { @Column(name = "table_name", nullable = false, length = 128) private String tableName; - @Enumerated(EnumType.STRING) @Column(name = "operation_type", nullable = false, length = 50) - private OperationType operationType; + private String operationType; - @Enumerated(EnumType.STRING) @Column(name = "status", nullable = false, length = 20) - private OperationStatus status; + private String status; /** When the Analyzer first created this row. Set by the service on insert; never updated. */ @Column(name = "created_at", nullable = false) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java index b0d92fc81..6ead5e42c 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java @@ -1,6 +1,6 @@ package com.linkedin.openhouse.optimizer.entity; -import com.linkedin.openhouse.optimizer.api.model.TableStats; +import com.linkedin.openhouse.optimizer.model.TableStats; import com.vladmihalcea.hibernate.type.json.JsonStringType; import java.time.Instant; import javax.persistence.Column; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java index f682a3485..2a1414567 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java @@ -1,6 +1,6 @@ package com.linkedin.openhouse.optimizer.entity; -import com.linkedin.openhouse.optimizer.api.model.TableStats; +import com.linkedin.openhouse.optimizer.model.TableStats; import com.vladmihalcea.hibernate.type.json.JsonStringType; import java.time.Instant; import java.util.Map; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java similarity index 96% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java index 64c99061a..3b56196ea 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java @@ -1,4 +1,4 @@ -package com.linkedin.openhouse.optimizer.api.model; +package com.linkedin.openhouse.optimizer.model; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import lombok.AllArgsConstructor; From dfb910291443bcfe4b6adfb724808dadcb0c8c5a Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 12:07:53 -0700 Subject: [PATCH 043/104] refactor(optimizer): realign entity shapes with optimizer-0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit R7-1 imported the looser apps-side variant of TableStatsRow, TableStatsHistoryRow, and model/TableStats into the services-side paths, regressing the locked-down shape that optimizer-0 had. R8-1's git mv carried the regression forward. This commit makes optimizer-1's HEAD match optimizer-0's canonical shape so the optimizer-0..optimizer-1 diff no longer shows ghost model edits. - TableStatsRow: restore @EqualsAndHashCode, AccessLevel.PROTECTED on NoArgsConstructor + AllArgsConstructor, and toBuilder=true on @Builder. Drop @Setter (no callers — repo tests and downstream consumers use the builder). - TableStatsHistoryRow: restore the dropped "can" in the javadoc. - model/TableStats: restore @JsonIgnoreProperties(ignoreUnknown = true) on the outer class + both inner classes, and restore the CommitDelta.addedSizeBytes field that R7-1 dropped. --- .../optimizer/entity/TableStatsHistoryRow.java | 2 +- .../optimizer/entity/TableStatsRow.java | 18 +++++++++++------- .../openhouse/optimizer/model/TableStats.java | 9 +++++++-- 3 files changed, 19 insertions(+), 10 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java index 6f41881d6..6ead5e42c 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java @@ -21,7 +21,7 @@ * Append-only record of per-commit stats reported by the Tables Service. * *

Each Iceberg commit produces one row. The {@code stats} JSON contains both the snapshot - * metrics (point-in-time) and the commit delta (files added/deleted in this commit). Consumers + * metrics (point-in-time) and the commit delta (files added/deleted in this commit). Consumers can * query this table to reconstruct change rates over arbitrary time windows. */ @TypeDef(name = "json", typeClass = JsonStringType.class) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java index bc647d86e..2a1414567 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java @@ -8,26 +8,29 @@ import javax.persistence.Entity; import javax.persistence.Id; import javax.persistence.Table; +import lombok.AccessLevel; import lombok.AllArgsConstructor; import lombok.Builder; +import lombok.EqualsAndHashCode; import lombok.Getter; import lombok.NoArgsConstructor; -import lombok.Setter; import org.hibernate.annotations.Type; import org.hibernate.annotations.TypeDef; /** - * JPA entity for the optimizer {@code table_stats} table. Written by the Tables Service on every - * Iceberg commit; read by the Analyzer and Scheduler directly via JPA. + * JPA entity representing a per-table stats snapshot in the optimizer DB. + * + *

Written by the Tables Service on every Iceberg commit. Read by the Analyzer directly via JPA + * to enumerate tables and check scheduling eligibility. */ @TypeDef(name = "json", typeClass = JsonStringType.class) @Entity @Table(name = "table_stats") @Getter -@Setter -@Builder -@NoArgsConstructor -@AllArgsConstructor +@EqualsAndHashCode +@Builder(toBuilder = true) +@NoArgsConstructor(access = AccessLevel.PROTECTED) +@AllArgsConstructor(access = AccessLevel.PROTECTED) public class TableStatsRow { @Id @@ -48,6 +51,7 @@ public class TableStatsRow { @Column(name = "table_properties", columnDefinition = "TEXT") private Map tableProperties; + /** Set on every upsert. Used for stats pipeline staleness monitoring. */ @Column(name = "updated_at", nullable = false) private Instant updatedAt; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java index 5e0f51468..3b56196ea 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java @@ -1,15 +1,17 @@ package com.linkedin.openhouse.optimizer.model; +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import lombok.AllArgsConstructor; import lombok.Builder; import lombok.Data; import lombok.NoArgsConstructor; -/** Combined stats payload stored as a single JSON blob per table in {@code table_stats}. */ +/** Combined stats payload stored as a single JSON blob per table. */ @Data @Builder(toBuilder = true) @NoArgsConstructor @AllArgsConstructor +@JsonIgnoreProperties(ignoreUnknown = true) public class TableStats { /** Snapshot fields — overwritten on every upsert. */ @@ -23,6 +25,7 @@ public class TableStats { @Builder(toBuilder = true) @NoArgsConstructor @AllArgsConstructor + @JsonIgnoreProperties(ignoreUnknown = true) public static class SnapshotMetrics { private String clusterId; private String tableVersion; @@ -32,14 +35,16 @@ public static class SnapshotMetrics { private Long numCurrentFiles; } - /** Per-commit incremental counters accumulated across all recorded commit events. */ + /** Per-commit incremental counters; accumulated across all recorded commit events. */ @Data @Builder(toBuilder = true) @NoArgsConstructor @AllArgsConstructor + @JsonIgnoreProperties(ignoreUnknown = true) public static class CommitDelta { private Long numFilesAdded; private Long numFilesDeleted; + private Long addedSizeBytes; private Long deletedSizeBytes; } } From 681407ef6a1a1d2dc34dee2a4ca308c5d008ca3f Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 12:18:39 -0700 Subject: [PATCH 044/104] feat(optimizer): add internal model layer Per-layer types: wire-API enums (api/model/), DB-side String at JPA boundary, and an internal in-memory model layer that is what the analyzer and scheduler operate on. The wire and DB sides convert at their boundary; consumers of the optimizer library work in the internal types. - model/HistoryStatus, model/OperationStatus, model/OperationType: internal enums mirroring the wire-API counterparts. Decoupled so the analyzer/scheduler can evolve their state machines without churning the wire or DB shapes. - model/Table: an OpenHouse table enriched with stats + properties. Built from a TableStatsRow. - model/TableOperation: analyzer's decision-to-schedule + scheduler's unit of work. Constructed from TableOperationsRow or from a Table; converts back via toRow(). --- .../optimizer/model/HistoryStatus.java | 13 +++ .../optimizer/model/OperationStatus.java | 15 +++ .../optimizer/model/OperationType.java | 10 ++ .../openhouse/optimizer/model/Table.java | 41 +++++++ .../optimizer/model/TableOperation.java | 106 ++++++++++++++++++ 5 files changed, 185 insertions(+) create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java new file mode 100644 index 000000000..d29c88719 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java @@ -0,0 +1,13 @@ +package com.linkedin.openhouse.optimizer.model; + +/** + * Internal lifecycle outcomes for a completed operation. Mirrors the values written to {@code + * table_operations_history.status}; parsed at the boundary so callers switch on a typed value + * instead of comparing strings. + * + *

Intentionally separate from the wire-API and DB representations. + */ +public enum HistoryStatus { + SUCCESS, + FAILED +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java new file mode 100644 index 000000000..66f213c73 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java @@ -0,0 +1,15 @@ +package com.linkedin.openhouse.optimizer.model; + +/** + * Internal lifecycle states for an operation. The analyzer writes {@link #PENDING}; the scheduler + * transitions through {@link #SCHEDULING} and {@link #SCHEDULED}. {@link #CANCELED} marks + * deduplicated PENDING rows. + * + *

Intentionally separate from the wire-API and DB representations. + */ +public enum OperationStatus { + PENDING, + SCHEDULING, + SCHEDULED, + CANCELED +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java new file mode 100644 index 000000000..bea44018b --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java @@ -0,0 +1,10 @@ +package com.linkedin.openhouse.optimizer.model; + +/** + * Internal enum for the operation types the analyzer and scheduler know about. Intentionally + * separate from the wire-API and DB representations so the internal model can evolve its set of + * supported operations without churning either boundary. + */ +public enum OperationType { + ORPHAN_FILES_DELETION +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java new file mode 100644 index 000000000..e232803dd --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java @@ -0,0 +1,41 @@ +package com.linkedin.openhouse.optimizer.model; + +import com.linkedin.openhouse.optimizer.entity.TableStatsRow; +import java.util.Collections; +import java.util.Map; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** + * An OpenHouse table enriched with stats and properties, built by combining data sources. Consumed + * by the analyzer (decides whether to produce a {@link TableOperation}) and the scheduler (reads + * stats for bin-packing). + */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class Table { + + private String tableUuid; + private String databaseName; + private String tableId; + + @Builder.Default private Map tableProperties = Collections.emptyMap(); + + private TableStats stats; + + /** Build a {@code Table} from a {@code table_stats} row. */ + public static Table from(TableStatsRow row) { + return Table.builder() + .tableUuid(row.getTableUuid()) + .databaseName(row.getDatabaseName()) + .tableId(row.getTableName()) + .tableProperties( + row.getTableProperties() != null ? row.getTableProperties() : Collections.emptyMap()) + .stats(row.getStats()) + .build(); + } +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java new file mode 100644 index 000000000..d49625a57 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java @@ -0,0 +1,106 @@ +package com.linkedin.openhouse.optimizer.model; + +import com.linkedin.openhouse.optimizer.entity.TableOperationsRow; +import java.time.Instant; +import java.util.Comparator; +import java.util.UUID; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** + * An operation the analyzer has decided to schedule for a table, and that the scheduler later picks + * up and submits. Built either from an existing {@link TableOperationsRow} (when loading current + * state) or from a {@link Table} (when creating a new PENDING operation). Converts back to a JPA + * row via {@link #toRow()}. + * + *

{@link #fileCount} is a non-persisted enrichment populated by consumers that need it (e.g., + * the OFD scheduler reads it from {@code table_stats} for bin-packing). The DB column does not + * carry it. + */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class TableOperation { + + /** Unique operation ID (UUID). */ + private String id; + + /** The table this operation targets. */ + private String tableUuid; + + /** Database name. */ + private String databaseName; + + /** Table name. */ + private String tableName; + + /** Operation type. */ + private OperationType operationType; + + /** Current lifecycle status. */ + private OperationStatus status; + + /** When this operation record was created. */ + private Instant createdAt; + + /** When the scheduler last submitted a job for this operation. */ + private Instant scheduledAt; + + /** + * Number of current data files on the table at evaluation time. Non-persisted enrichment; + * populated by consumers that need it. Null when not enriched. + */ + private Long fileCount; + + /** Build a {@code TableOperation} from an existing JPA row. */ + public static TableOperation from(TableOperationsRow row) { + return TableOperation.builder() + .id(row.getId()) + .tableUuid(row.getTableUuid()) + .databaseName(row.getDatabaseName()) + .tableName(row.getTableName()) + .operationType(OperationType.valueOf(row.getOperationType())) + .status(OperationStatus.valueOf(row.getStatus())) + .createdAt(row.getCreatedAt()) + .scheduledAt(row.getScheduledAt()) + .build(); + } + + /** Create a new PENDING operation for the given table and operation type. */ + public static TableOperation pending(Table table, OperationType operationType) { + return TableOperation.builder() + .id(UUID.randomUUID().toString()) + .tableUuid(table.getTableUuid()) + .databaseName(table.getDatabaseName()) + .tableName(table.getTableId()) + .operationType(operationType) + .status(OperationStatus.PENDING) + .createdAt(Instant.now()) + .build(); + } + + /** Convert to a JPA entity for persistence. */ + public TableOperationsRow toRow() { + return TableOperationsRow.builder() + .id(id) + .tableUuid(tableUuid) + .databaseName(databaseName) + .tableName(tableName) + .operationType(operationType.name()) + .status(status.name()) + .createdAt(createdAt) + .scheduledAt(scheduledAt) + .version(0L) + .build(); + } + + /** Return the more recently created of two operations. */ + public static TableOperation mostRecent(TableOperation a, TableOperation b) { + Comparator byCreatedAt = + Comparator.comparing(r -> r.getCreatedAt() != null ? r.getCreatedAt() : Instant.EPOCH); + return byCreatedAt.compare(a, b) >= 0 ? a : b; + } +} From d7767e89330b7af78009cfe25bb44d1de7adc14a Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 12:22:28 -0700 Subject: [PATCH 045/104] fix(optimizer-analyzer): rewrite AnalyzerRunnerTest to use entity builders TableStatsRow and TableOperationsRow had their constructors locked down (protected) and @Setter removed when the entity shape was realigned with optimizer-0. The analyzer tests still used `new Entity()` + setters, which no longer compiles. Switch to the builder pattern; behaviour-equivalent. --- .../analyzer/AnalyzerRunnerTest.java | 54 +++++++++---------- 1 file changed, 25 insertions(+), 29 deletions(-) diff --git a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java index a62be5622..6ff8739fa 100644 --- a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java +++ b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java @@ -49,10 +49,8 @@ void setUp() { @Test void analyze_insertsNewRow_forEligibleTableWithNoExistingOp() { - TableStatsRow statsEntity = new TableStatsRow(); - statsEntity.setTableUuid("uuid-1"); - statsEntity.setDatabaseName(DB); - statsEntity.setTableName("tbl1"); + TableStatsRow statsEntity = + TableStatsRow.builder().tableUuid("uuid-1").databaseName(DB).tableName("tbl1").build(); Table expectedTable = Table.builder().tableUuid("uuid-1").databaseName(DB).tableId("tbl1").build(); @@ -79,20 +77,20 @@ void analyze_insertsNewRow_forEligibleTableWithNoExistingOp() { @Test void analyze_noOp_whenCadencePolicyReturnsFalseForPending() { - TableStatsRow statsEntity = new TableStatsRow(); - statsEntity.setTableUuid("uuid-1"); - statsEntity.setDatabaseName(DB); - statsEntity.setTableName("tbl1"); + TableStatsRow statsEntity = + TableStatsRow.builder().tableUuid("uuid-1").databaseName(DB).tableName("tbl1").build(); Table expectedTable = Table.builder().tableUuid("uuid-1").databaseName(DB).tableId("tbl1").build(); - TableOperationsRow existingEntity = new TableOperationsRow(); - existingEntity.setId("existing-op-id"); - existingEntity.setStatus("PENDING"); - existingEntity.setTableUuid("uuid-1"); - existingEntity.setOperationType(OFD); - existingEntity.setCreatedAt(Instant.now()); + TableOperationsRow existingEntity = + TableOperationsRow.builder() + .id("existing-op-id") + .status("PENDING") + .tableUuid("uuid-1") + .operationType(OFD) + .createdAt(Instant.now()) + .build(); when(statsRepo.find(DB, null, null)).thenReturn(List.of(statsEntity)); when(operationsRepo.find(OFD, null, null, DB, null)).thenReturn(List.of(existingEntity)); @@ -110,9 +108,8 @@ void analyze_noOp_whenCadencePolicyReturnsFalseForPending() { @Test void analyze_skipsTable_whenNotEnabled() { - TableStatsRow statsEntity = new TableStatsRow(); - statsEntity.setTableUuid("uuid-1"); - statsEntity.setDatabaseName(DB); + TableStatsRow statsEntity = + TableStatsRow.builder().tableUuid("uuid-1").databaseName(DB).build(); Table expectedTable = Table.builder().tableUuid("uuid-1").databaseName(DB).build(); @@ -128,18 +125,19 @@ void analyze_skipsTable_whenNotEnabled() { @Test void analyze_skipsTable_whenShouldScheduleReturnsFalse() { - TableStatsRow statsEntity = new TableStatsRow(); - statsEntity.setTableUuid("uuid-1"); - statsEntity.setDatabaseName(DB); + TableStatsRow statsEntity = + TableStatsRow.builder().tableUuid("uuid-1").databaseName(DB).build(); Table expectedTable = Table.builder().tableUuid("uuid-1").databaseName(DB).build(); - TableOperationsRow scheduled = new TableOperationsRow(); - scheduled.setId("op-id"); - scheduled.setStatus("SCHEDULED"); - scheduled.setTableUuid("uuid-1"); - scheduled.setOperationType(OFD); - scheduled.setCreatedAt(Instant.now()); + TableOperationsRow scheduled = + TableOperationsRow.builder() + .id("op-id") + .status("SCHEDULED") + .tableUuid("uuid-1") + .operationType(OFD) + .createdAt(Instant.now()) + .build(); when(statsRepo.find(DB, null, null)).thenReturn(List.of(statsEntity)); when(operationsRepo.find(OFD, null, null, DB, null)).thenReturn(List.of(scheduled)); @@ -157,9 +155,7 @@ void analyze_skipsTable_whenShouldScheduleReturnsFalse() { @Test void analyze_skipsTable_whenTableUuidIsNull() { - TableStatsRow statsEntity = new TableStatsRow(); - statsEntity.setTableUuid(null); - statsEntity.setDatabaseName(DB); + TableStatsRow statsEntity = TableStatsRow.builder().databaseName(DB).build(); when(statsRepo.find(DB, null, null)).thenReturn(List.of(statsEntity)); when(operationsRepo.find(OFD, null, null, DB, null)).thenReturn(Collections.emptyList()); From e3fb7770613e8635bb4f68bded2945e1845d7510 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 12:26:51 -0700 Subject: [PATCH 046/104] perf(optimizer): index table_operations_history for findLatestPerTable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add idx_toph_optype_uuid_completed on table_operations_history. Drives the correlated MAX(completed_at) subquery in TableOperationsHistoryRepository.findLatestPerTable (introduced in optimizer-1), turning it into an index-only lookup per (operation_type, table_uuid) instead of an O(N²) scan. Lands with the schema in optimizer-0 since the index is part of the data model definition; the query that depends on it lands with the repository in optimizer-1. --- .../optimizer/src/main/resources/db/optimizer-schema.sql | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/services/optimizer/src/main/resources/db/optimizer-schema.sql b/services/optimizer/src/main/resources/db/optimizer-schema.sql index 4c2d9604b..322f3bf92 100644 --- a/services/optimizer/src/main/resources/db/optimizer-schema.sql +++ b/services/optimizer/src/main/resources/db/optimizer-schema.sql @@ -48,5 +48,9 @@ CREATE TABLE IF NOT EXISTS table_operations_history ( job_id VARCHAR(255), result TEXT, PRIMARY KEY (id), - INDEX idx_toph_db_table (database_name, table_name) + INDEX idx_toph_db_table (database_name, table_name), + -- Drives TableOperationHistoryRepository.findLatestPerTable: the correlated + -- MAX(completed_at) subquery becomes an index-only lookup per (operation_type, + -- table_uuid) instead of an O(N²) scan. + INDEX idx_toph_optype_uuid_completed (operation_type, table_uuid, completed_at) ); From d3e17262f5ec8b0e97b54d8312da746278680a6f Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 13:11:11 -0700 Subject: [PATCH 047/104] refactor(optimizer): enforce layer boundaries in api/ + model/ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make data types in api/ and model/ self-contained — no cross-layer imports between them and no references into the DB layer. The internal model layer owns conversion to the api edge via a new mapper sub-package. api/ changes: - Add api/model/TableStats (api-layer copy of the JSON payload). - Drop cross-layer imports from TableStatsDto, TableStatsHistoryDto, UpsertTableStatsRequest; they pick up TableStats from the same package. model/ changes: - Add model/JobResult (internal copy of the result payload). - Add model/TableOperationsHistory (internal container mirroring the history-row field set in typed form). - Remove cross-layer factory methods Table.from(TableStatsRow), TableOperation.from(TableOperationsRow), and TableOperation.toRow(). Construction at the DB boundary moves to a future model/mapper/ ModelDbMapper that ships with the db/ rename on optimizer-1. - Add model/mapper/ApiModelMapper — converts api/ DTOs ↔ model/ types. Only place inside model/ where api/ types appear. Per-PR ownership: - api/ and model/ live on this PR. - db/ (currently entity/) and its boundary-side mapper (model/mapper/ModelDbMapper) land on optimizer-1. - The existing api/mapper/OptimizerMapper still references entity/ on this branch; it gets retired on optimizer-2 once the service routes through the new mappers. --- .../optimizer/api/model/TableStats.java | 55 ++++ .../optimizer/api/model/TableStatsDto.java | 1 - .../api/model/TableStatsHistoryDto.java | 1 - .../api/model/UpsertTableStatsRequest.java | 1 - .../openhouse/optimizer/model/JobResult.java | 25 ++ .../openhouse/optimizer/model/Table.java | 16 +- .../optimizer/model/TableOperation.java | 38 +-- .../model/TableOperationsHistory.java | 47 ++++ .../model/mapper/ApiModelMapper.java | 234 ++++++++++++++++++ 9 files changed, 369 insertions(+), 49 deletions(-) create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/JobResult.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java new file mode 100644 index 000000000..de268ffe7 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java @@ -0,0 +1,55 @@ +package com.linkedin.openhouse.optimizer.api.model; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** + * Combined stats payload exposed on the optimizer wire API. + * + *

API-layer copy of the stats payload — self-contained, evolved only when the wire contract + * changes. + */ +@Data +@Builder(toBuilder = true) +@NoArgsConstructor +@AllArgsConstructor +@JsonIgnoreProperties(ignoreUnknown = true) +public class TableStats { + + /** Snapshot fields — overwritten on every upsert. */ + private SnapshotMetrics snapshot; + + /** Delta fields — accumulated across commit events. */ + private CommitDelta delta; + + /** Point-in-time metadata read from Iceberg at scan time. */ + @Data + @Builder(toBuilder = true) + @NoArgsConstructor + @AllArgsConstructor + @JsonIgnoreProperties(ignoreUnknown = true) + public static class SnapshotMetrics { + private String clusterId; + private String tableVersion; + private String tableLocation; + private Long tableSizeBytes; + /** Total number of data files as of the latest snapshot — used for bin-packing. */ + private Long numCurrentFiles; + } + + /** Per-commit incremental counters; accumulated across all recorded commit events. */ + @Data + @Builder(toBuilder = true) + @NoArgsConstructor + @AllArgsConstructor + @JsonIgnoreProperties(ignoreUnknown = true) + public static class CommitDelta { + private Long numFilesAdded; + private Long numFilesDeleted; + private Long addedSizeBytes; + private Long deletedSizeBytes; + } +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java index 4aad1e18f..81dd6b802 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java @@ -1,6 +1,5 @@ package com.linkedin.openhouse.optimizer.api.model; -import com.linkedin.openhouse.optimizer.model.TableStats; import java.time.Instant; import java.util.Map; import lombok.AllArgsConstructor; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java index 6d515a543..4a994fdb3 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java @@ -1,6 +1,5 @@ package com.linkedin.openhouse.optimizer.api.model; -import com.linkedin.openhouse.optimizer.model.TableStats; import java.time.Instant; import lombok.AllArgsConstructor; import lombok.Builder; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java index 8bb317676..02290bad5 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java @@ -1,6 +1,5 @@ package com.linkedin.openhouse.optimizer.api.model; -import com.linkedin.openhouse.optimizer.model.TableStats; import java.util.Map; import lombok.AllArgsConstructor; import lombok.Builder; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/JobResult.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/JobResult.java new file mode 100644 index 000000000..7e48dd0ef --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/JobResult.java @@ -0,0 +1,25 @@ +package com.linkedin.openhouse.optimizer.model; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** + * Internal-model result payload for a completed Spark maintenance job. + * + *

Internal-layer copy of the structured result. Both fields are {@code null} on success; + * populated on failure. Intentionally separate from the wire-API and DB representations. + */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class JobResult { + + /** Human-readable error message; {@code null} if the job succeeded. */ + private String errorMessage; + + /** Error category (e.g., {@code OOM}, {@code TIMEOUT}); {@code null} if the job succeeded. */ + private String errorType; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java index e232803dd..c8bede225 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java @@ -1,6 +1,5 @@ package com.linkedin.openhouse.optimizer.model; -import com.linkedin.openhouse.optimizer.entity.TableStatsRow; import java.util.Collections; import java.util.Map; import lombok.AllArgsConstructor; @@ -12,6 +11,9 @@ * An OpenHouse table enriched with stats and properties, built by combining data sources. Consumed * by the analyzer (decides whether to produce a {@link TableOperation}) and the scheduler (reads * stats for bin-packing). + * + *

Pure internal-model type — no references to wire-API or DB types. Construct via {@link + * com.linkedin.openhouse.optimizer.model.mapper.ModelDbMapper#toTable} at the DB boundary. */ @Data @Builder @@ -26,16 +28,4 @@ public class Table { @Builder.Default private Map tableProperties = Collections.emptyMap(); private TableStats stats; - - /** Build a {@code Table} from a {@code table_stats} row. */ - public static Table from(TableStatsRow row) { - return Table.builder() - .tableUuid(row.getTableUuid()) - .databaseName(row.getDatabaseName()) - .tableId(row.getTableName()) - .tableProperties( - row.getTableProperties() != null ? row.getTableProperties() : Collections.emptyMap()) - .stats(row.getStats()) - .build(); - } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java index d49625a57..1f14dddff 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java @@ -1,6 +1,5 @@ package com.linkedin.openhouse.optimizer.model; -import com.linkedin.openhouse.optimizer.entity.TableOperationsRow; import java.time.Instant; import java.util.Comparator; import java.util.UUID; @@ -11,9 +10,11 @@ /** * An operation the analyzer has decided to schedule for a table, and that the scheduler later picks - * up and submits. Built either from an existing {@link TableOperationsRow} (when loading current - * state) or from a {@link Table} (when creating a new PENDING operation). Converts back to a JPA - * row via {@link #toRow()}. + * up and submits. + * + *

Pure internal-model type — no references to wire-API or DB types. Cross-layer construction + * happens via {@link com.linkedin.openhouse.optimizer.model.mapper.ModelDbMapper} (DB boundary) or + * {@link com.linkedin.openhouse.optimizer.model.mapper.ApiModelMapper} (API boundary). * *

{@link #fileCount} is a non-persisted enrichment populated by consumers that need it (e.g., * the OFD scheduler reads it from {@code table_stats} for bin-packing). The DB column does not @@ -55,20 +56,6 @@ public class TableOperation { */ private Long fileCount; - /** Build a {@code TableOperation} from an existing JPA row. */ - public static TableOperation from(TableOperationsRow row) { - return TableOperation.builder() - .id(row.getId()) - .tableUuid(row.getTableUuid()) - .databaseName(row.getDatabaseName()) - .tableName(row.getTableName()) - .operationType(OperationType.valueOf(row.getOperationType())) - .status(OperationStatus.valueOf(row.getStatus())) - .createdAt(row.getCreatedAt()) - .scheduledAt(row.getScheduledAt()) - .build(); - } - /** Create a new PENDING operation for the given table and operation type. */ public static TableOperation pending(Table table, OperationType operationType) { return TableOperation.builder() @@ -82,21 +69,6 @@ public static TableOperation pending(Table table, OperationType operationType) { .build(); } - /** Convert to a JPA entity for persistence. */ - public TableOperationsRow toRow() { - return TableOperationsRow.builder() - .id(id) - .tableUuid(tableUuid) - .databaseName(databaseName) - .tableName(tableName) - .operationType(operationType.name()) - .status(status.name()) - .createdAt(createdAt) - .scheduledAt(scheduledAt) - .version(0L) - .build(); - } - /** Return the more recently created of two operations. */ public static TableOperation mostRecent(TableOperation a, TableOperation b) { Comparator byCreatedAt = diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java new file mode 100644 index 000000000..64e0d57b3 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java @@ -0,0 +1,47 @@ +package com.linkedin.openhouse.optimizer.model; + +import java.time.Instant; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** + * Internal-model view of a completed operation history record. + * + *

Mirrors the field set of the underlying history row but in internal types only. Used by + * components that need to reason about completed operations (e.g., scheduling-cadence analyzers). + */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class TableOperationsHistory { + + /** Same UUID as the originating live-operations row. */ + private String id; + + /** Stable table identity from the Tables Service. */ + private String tableUuid; + + /** Denormalized database name. */ + private String databaseName; + + /** Denormalized table name. */ + private String tableName; + + /** Operation type for this completed run. */ + private OperationType operationType; + + /** When the operation completed, as recorded by the complete endpoint. */ + private Instant completedAt; + + /** Terminal outcome: {@link HistoryStatus#SUCCESS} or {@link HistoryStatus#FAILED}. */ + private HistoryStatus status; + + /** Spark job ID for the run that produced this record. */ + private String jobId; + + /** Job result payload; both inner fields {@code null} on success. */ + private JobResult result; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java new file mode 100644 index 000000000..2ae477e0d --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java @@ -0,0 +1,234 @@ +package com.linkedin.openhouse.optimizer.model.mapper; + +import com.linkedin.openhouse.optimizer.api.model.TableOperationsDto; +import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; +import com.linkedin.openhouse.optimizer.model.HistoryStatus; +import com.linkedin.openhouse.optimizer.model.JobResult; +import com.linkedin.openhouse.optimizer.model.OperationStatus; +import com.linkedin.openhouse.optimizer.model.OperationType; +import com.linkedin.openhouse.optimizer.model.TableOperation; +import com.linkedin.openhouse.optimizer.model.TableOperationsHistory; +import com.linkedin.openhouse.optimizer.model.TableStats; +import org.springframework.stereotype.Component; + +/** + * Converts between wire-API DTOs and internal {@code model/} domain objects. + * + *

The only place inside {@code model/} where {@code api/} types are referenced — this is the + * boundary at which the internal model meets the wire-API. Pure data types under {@code model/} + * stay free of any api-side imports. + * + *

API-layer enums + payloads are intentionally separate Java types from the internal-model + * counterparts; the two sides evolve independently. This mapper translates by name. + */ +@Component +public class ApiModelMapper { + + // --- TableOperationsDto <-> TableOperation --- + + public TableOperation toOperation(TableOperationsDto dto) { + if (dto == null) { + return null; + } + return TableOperation.builder() + .id(dto.getId()) + .tableUuid(dto.getTableUuid()) + .databaseName(dto.getDatabaseName()) + .tableName(dto.getTableName()) + .operationType(toModelOperationType(dto.getOperationType())) + .status(toModelOperationStatus(dto.getStatus())) + .createdAt(dto.getCreatedAt()) + .scheduledAt(dto.getScheduledAt()) + .build(); + } + + public TableOperationsDto toDto(TableOperation op) { + if (op == null) { + return null; + } + return TableOperationsDto.builder() + .id(op.getId()) + .tableUuid(op.getTableUuid()) + .databaseName(op.getDatabaseName()) + .tableName(op.getTableName()) + .operationType(toApiOperationType(op.getOperationType())) + .status(toApiOperationStatus(op.getStatus())) + .createdAt(op.getCreatedAt()) + .scheduledAt(op.getScheduledAt()) + .build(); + } + + // --- TableOperationsHistoryDto <-> TableOperationsHistory --- + + public TableOperationsHistory toHistory(TableOperationsHistoryDto dto) { + if (dto == null) { + return null; + } + return TableOperationsHistory.builder() + .id(dto.getId()) + .tableUuid(dto.getTableUuid()) + .databaseName(dto.getDatabaseName()) + .tableName(dto.getTableName()) + .operationType(toModelOperationType(dto.getOperationType())) + .completedAt(dto.getCompletedAt()) + .status(toModelHistoryStatus(dto.getStatus())) + .jobId(dto.getJobId()) + .result(toModelJobResult(dto.getResult())) + .build(); + } + + public TableOperationsHistoryDto toDto(TableOperationsHistory history) { + if (history == null) { + return null; + } + return TableOperationsHistoryDto.builder() + .id(history.getId()) + .tableUuid(history.getTableUuid()) + .databaseName(history.getDatabaseName()) + .tableName(history.getTableName()) + .operationType(toApiOperationType(history.getOperationType())) + .completedAt(history.getCompletedAt()) + .status(toApiHistoryStatus(history.getStatus())) + .jobId(history.getJobId()) + .result(toApiJobResult(history.getResult())) + .build(); + } + + // --- TableStats payload --- + + public TableStats toModelStats(com.linkedin.openhouse.optimizer.api.model.TableStats apiStats) { + if (apiStats == null) { + return null; + } + return TableStats.builder() + .snapshot(toModelSnapshot(apiStats.getSnapshot())) + .delta(toModelDelta(apiStats.getDelta())) + .build(); + } + + public com.linkedin.openhouse.optimizer.api.model.TableStats toApiStats(TableStats modelStats) { + if (modelStats == null) { + return null; + } + return com.linkedin.openhouse.optimizer.api.model.TableStats.builder() + .snapshot(toApiSnapshot(modelStats.getSnapshot())) + .delta(toApiDelta(modelStats.getDelta())) + .build(); + } + + // --- enum helpers --- + + public OperationType toModelOperationType( + com.linkedin.openhouse.optimizer.api.model.OperationType apiValue) { + return apiValue == null ? null : OperationType.valueOf(apiValue.name()); + } + + public com.linkedin.openhouse.optimizer.api.model.OperationType toApiOperationType( + OperationType modelValue) { + return modelValue == null + ? null + : com.linkedin.openhouse.optimizer.api.model.OperationType.valueOf(modelValue.name()); + } + + public OperationStatus toModelOperationStatus( + com.linkedin.openhouse.optimizer.api.model.OperationStatus apiValue) { + return apiValue == null ? null : OperationStatus.valueOf(apiValue.name()); + } + + public com.linkedin.openhouse.optimizer.api.model.OperationStatus toApiOperationStatus( + OperationStatus modelValue) { + return modelValue == null + ? null + : com.linkedin.openhouse.optimizer.api.model.OperationStatus.valueOf(modelValue.name()); + } + + public HistoryStatus toModelHistoryStatus( + com.linkedin.openhouse.optimizer.api.model.HistoryStatus apiValue) { + return apiValue == null ? null : HistoryStatus.valueOf(apiValue.name()); + } + + public com.linkedin.openhouse.optimizer.api.model.HistoryStatus toApiHistoryStatus( + HistoryStatus modelValue) { + return modelValue == null + ? null + : com.linkedin.openhouse.optimizer.api.model.HistoryStatus.valueOf(modelValue.name()); + } + + // --- JobResult --- + + public JobResult toModelJobResult(com.linkedin.openhouse.optimizer.api.model.JobResult apiValue) { + if (apiValue == null) { + return null; + } + return JobResult.builder() + .errorMessage(apiValue.getErrorMessage()) + .errorType(apiValue.getErrorType()) + .build(); + } + + public com.linkedin.openhouse.optimizer.api.model.JobResult toApiJobResult(JobResult modelValue) { + if (modelValue == null) { + return null; + } + return com.linkedin.openhouse.optimizer.api.model.JobResult.builder() + .errorMessage(modelValue.getErrorMessage()) + .errorType(modelValue.getErrorType()) + .build(); + } + + // --- TableStats inner classes --- + + private TableStats.SnapshotMetrics toModelSnapshot( + com.linkedin.openhouse.optimizer.api.model.TableStats.SnapshotMetrics apiValue) { + if (apiValue == null) { + return null; + } + return TableStats.SnapshotMetrics.builder() + .clusterId(apiValue.getClusterId()) + .tableVersion(apiValue.getTableVersion()) + .tableLocation(apiValue.getTableLocation()) + .tableSizeBytes(apiValue.getTableSizeBytes()) + .numCurrentFiles(apiValue.getNumCurrentFiles()) + .build(); + } + + private com.linkedin.openhouse.optimizer.api.model.TableStats.SnapshotMetrics toApiSnapshot( + TableStats.SnapshotMetrics modelValue) { + if (modelValue == null) { + return null; + } + return com.linkedin.openhouse.optimizer.api.model.TableStats.SnapshotMetrics.builder() + .clusterId(modelValue.getClusterId()) + .tableVersion(modelValue.getTableVersion()) + .tableLocation(modelValue.getTableLocation()) + .tableSizeBytes(modelValue.getTableSizeBytes()) + .numCurrentFiles(modelValue.getNumCurrentFiles()) + .build(); + } + + private TableStats.CommitDelta toModelDelta( + com.linkedin.openhouse.optimizer.api.model.TableStats.CommitDelta apiValue) { + if (apiValue == null) { + return null; + } + return TableStats.CommitDelta.builder() + .numFilesAdded(apiValue.getNumFilesAdded()) + .numFilesDeleted(apiValue.getNumFilesDeleted()) + .addedSizeBytes(apiValue.getAddedSizeBytes()) + .deletedSizeBytes(apiValue.getDeletedSizeBytes()) + .build(); + } + + private com.linkedin.openhouse.optimizer.api.model.TableStats.CommitDelta toApiDelta( + TableStats.CommitDelta modelValue) { + if (modelValue == null) { + return null; + } + return com.linkedin.openhouse.optimizer.api.model.TableStats.CommitDelta.builder() + .numFilesAdded(modelValue.getNumFilesAdded()) + .numFilesDeleted(modelValue.getNumFilesDeleted()) + .addedSizeBytes(modelValue.getAddedSizeBytes()) + .deletedSizeBytes(modelValue.getDeletedSizeBytes()) + .build(); + } +} From 1d469a72fdb68133c95cd8def12027f428ab2acd Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 13:17:49 -0700 Subject: [PATCH 048/104] refactor(optimizer): remove db-layer types from optimizer-0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The DB layer (entities + api↔db mapper) belongs to optimizer-1, not optimizer-0. optimizer-0 owns only the wire-API surface and the internal model. Delete from this PR: - entity/ package (TableOperationsRow, TableOperationsHistoryRow, TableStatsRow, TableStatsHistoryRow, package-info). - api/mapper/OptimizerMapper — was the api↔entity bridge. With the entity files moving out of this PR and the new model/mapper/ taking over conversion duties, this mapper is no longer needed here. optimizer-1 will re-introduce these as db/ (renamed) with db-side per-layer types and a model/mapper/ModelDbMapper. --- .../optimizer/api/mapper/OptimizerMapper.java | 92 ------------------- .../entity/TableOperationsHistoryRow.java | 79 ---------------- .../optimizer/entity/TableOperationsRow.java | 87 ------------------ .../entity/TableStatsHistoryRow.java | 61 ------------ .../optimizer/entity/TableStatsRow.java | 57 ------------ .../optimizer/entity/package-info.java | 2 - 6 files changed, 378 deletions(-) delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/mapper/OptimizerMapper.java delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/package-info.java diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/mapper/OptimizerMapper.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/mapper/OptimizerMapper.java deleted file mode 100644 index 36d4b5f4b..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/mapper/OptimizerMapper.java +++ /dev/null @@ -1,92 +0,0 @@ -package com.linkedin.openhouse.optimizer.api.mapper; - -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.linkedin.openhouse.optimizer.api.model.HistoryStatus; -import com.linkedin.openhouse.optimizer.api.model.JobResult; -import com.linkedin.openhouse.optimizer.api.model.OperationStatus; -import com.linkedin.openhouse.optimizer.api.model.OperationType; -import com.linkedin.openhouse.optimizer.api.model.TableOperationsDto; -import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; -import com.linkedin.openhouse.optimizer.api.model.TableStatsDto; -import com.linkedin.openhouse.optimizer.api.model.TableStatsHistoryDto; -import com.linkedin.openhouse.optimizer.entity.TableOperationsHistoryRow; -import com.linkedin.openhouse.optimizer.entity.TableOperationsRow; -import com.linkedin.openhouse.optimizer.entity.TableStatsHistoryRow; -import com.linkedin.openhouse.optimizer.entity.TableStatsRow; -import org.mapstruct.Mapper; - -/** - * MapStruct mapper for converting between optimizer JPA entities and their corresponding DTOs. - * - *

Spring-instantiated at compile time. Inject via {@code @Autowired} or constructor injection. - * - *

Type-conversion helpers bridge the entity's raw String/JSON shape (the entities keep enum and - * structured-result columns as Strings to stay decoupled from wire-API identity) and the wire DTO's - * typed enums and nested objects. - */ -@Mapper(componentModel = "spring") -public interface OptimizerMapper { - - ObjectMapper JSON = new ObjectMapper(); - - /** Map a {@link TableOperationsRow} to its DTO. */ - TableOperationsDto toDto(TableOperationsRow row); - - /** Map a {@link TableOperationsHistoryRow} to its DTO. */ - TableOperationsHistoryDto toDto(TableOperationsHistoryRow row); - - /** Map a {@link TableStatsRow} to its DTO. */ - TableStatsDto toDto(TableStatsRow row); - - /** Map a {@link TableStatsHistoryRow} to its DTO. */ - TableStatsHistoryDto toDto(TableStatsHistoryRow row); - - // --- entity String ↔ wire enum/object helpers --- - - default OperationType toOperationType(String value) { - return value == null ? null : OperationType.valueOf(value); - } - - default String fromOperationType(OperationType value) { - return value == null ? null : value.name(); - } - - default OperationStatus toOperationStatus(String value) { - return value == null ? null : OperationStatus.valueOf(value); - } - - default String fromOperationStatus(OperationStatus value) { - return value == null ? null : value.name(); - } - - default HistoryStatus toHistoryStatus(String value) { - return value == null ? null : HistoryStatus.valueOf(value); - } - - default String fromHistoryStatus(HistoryStatus value) { - return value == null ? null : value.name(); - } - - default JobResult toJobResult(String json) { - if (json == null) { - return null; - } - try { - return JSON.readValue(json, JobResult.class); - } catch (JsonProcessingException e) { - throw new IllegalStateException("Failed to parse JobResult JSON from DB", e); - } - } - - default String fromJobResult(JobResult value) { - if (value == null) { - return null; - } - try { - return JSON.writeValueAsString(value); - } catch (JsonProcessingException e) { - throw new IllegalStateException("Failed to serialize JobResult to JSON", e); - } - } -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java deleted file mode 100644 index 8303a4579..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsHistoryRow.java +++ /dev/null @@ -1,79 +0,0 @@ -package com.linkedin.openhouse.optimizer.entity; - -import java.time.Instant; -import javax.persistence.Column; -import javax.persistence.Entity; -import javax.persistence.Id; -import javax.persistence.Index; -import javax.persistence.Table; -import lombok.AccessLevel; -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.EqualsAndHashCode; -import lombok.Getter; -import lombok.NoArgsConstructor; - -/** - * Append-only record of a completed maintenance operation. - * - *

Written when the operation-complete endpoint is called. The {@code id} is the same UUID as the - * originating {@code table_operations.id}, tying each history entry back to the operation cycle - * that produced it. Multiple runs of the same operation on the same table produce multiple rows - * (each cycle gets a new UUID from the Analyzer). - * - *

{@code operationType}, {@code status}, and {@code result} are stored as plain {@code String} - * (the last as a JSON blob) so the entity layer stays decoupled from the wire-API enum and - * structured-result types. The wire layer is responsible for converting at the boundary via {@link - * com.linkedin.openhouse.optimizer.api.mapper.OptimizerMapper}. - */ -@Entity -@Table( - name = "table_operations_history", - indexes = { - @Index(name = "idx_table_uuid_hist", columnList = "table_uuid"), - @Index(name = "idx_op_type_hist", columnList = "operation_type"), - @Index(name = "idx_completed_at", columnList = "completed_at"), - @Index(name = "idx_status_hist", columnList = "status"), - @Index(name = "idx_job_id", columnList = "job_id"), - @Index(name = "idx_toph_db_table", columnList = "database_name, table_name") - }) -@Getter -@EqualsAndHashCode -@Builder(toBuilder = true) -@NoArgsConstructor(access = AccessLevel.PROTECTED) -@AllArgsConstructor(access = AccessLevel.PROTECTED) -public class TableOperationsHistoryRow { - - /** Same UUID as the originating {@code table_operations.id}. Set by the caller; not generated. */ - @Id - @Column(name = "id", nullable = false, length = 36) - private String id; - - @Column(name = "table_uuid", nullable = false, length = 36) - private String tableUuid; - - @Column(name = "database_name", nullable = false, length = 128) - private String databaseName; - - @Column(name = "table_name", nullable = false, length = 128) - private String tableName; - - @Column(name = "operation_type", nullable = false, length = 50) - private String operationType; - - /** When the operation completed, as recorded by the complete endpoint. */ - @Column(name = "completed_at", nullable = false) - private Instant completedAt; - - /** {@code SUCCESS} or {@code FAILED}. */ - @Column(name = "status", nullable = false, length = 20) - private String status; - - /** Spark job ID; indexed for job → result lookups. */ - @Column(name = "job_id", length = 255) - private String jobId; - - /** Job result JSON blob: error details on failure, both fields null on success. */ - @Column(name = "result", columnDefinition = "TEXT") - private String result; -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java deleted file mode 100644 index 5d90f3d12..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableOperationsRow.java +++ /dev/null @@ -1,87 +0,0 @@ -package com.linkedin.openhouse.optimizer.entity; - -import java.time.Instant; -import javax.persistence.Column; -import javax.persistence.Entity; -import javax.persistence.Id; -import javax.persistence.Index; -import javax.persistence.Table; -import lombok.AccessLevel; -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.EqualsAndHashCode; -import lombok.Getter; -import lombok.NoArgsConstructor; - -/** - * JPA entity representing an Analyzer recommendation for a table maintenance operation. - * - *

Each row is identified by a client-generated UUID ({@code id}). The Analyzer creates a new row - * when it first recommends an operation for a table, or when re-recommending after a prior terminal - * state (SUCCESS/FAILED). Old terminal rows accumulate — they serve as implicit history. {@code - * table_uuid} is the stable identity for the table (survives renames; rotates on drop+recreate). - * The application enforces one active (PENDING or SCHEDULED) row per {@code (table_uuid, - * operation_type)} at a time. - * - *

{@code operationType} and {@code status} are stored as {@code String} rather than JPA-bound - * enums so the entity layer stays decoupled from the wire-API enum identity. The wire layer is - * responsible for converting at the boundary via {@link - * com.linkedin.openhouse.optimizer.api.mapper.OptimizerMapper}. - */ -@Entity -@Table( - name = "table_operations", - indexes = { - @Index(name = "idx_table_uuid", columnList = "table_uuid"), - @Index(name = "idx_op_type", columnList = "operation_type"), - @Index(name = "idx_status", columnList = "status"), - @Index(name = "idx_created_at", columnList = "created_at"), - @Index(name = "idx_scheduled_at", columnList = "scheduled_at") - }) -@Getter -@EqualsAndHashCode -@Builder(toBuilder = true) -@NoArgsConstructor(access = AccessLevel.PROTECTED) -@AllArgsConstructor(access = AccessLevel.PROTECTED) -public class TableOperationsRow { - - /** Client-generated UUID identifying this specific operation recommendation. */ - @Id - @Column(name = "id", nullable = false, length = 36) - private String id; - - /** Stable table identity from the Tables Service. Survives renames; rotates on drop+recreate. */ - @Column(name = "table_uuid", nullable = false, length = 36) - private String tableUuid; - - @Column(name = "database_name", nullable = false, length = 128) - private String databaseName; - - @Column(name = "table_name", nullable = false, length = 128) - private String tableName; - - @Column(name = "operation_type", nullable = false, length = 50) - private String operationType; - - @Column(name = "status", nullable = false, length = 20) - private String status; - - /** When the Analyzer first created this row. Set by the service on insert; never updated. */ - @Column(name = "created_at", nullable = false) - private Instant createdAt; - - /** Set when the operation is claimed; {@code null} while {@code PENDING}. */ - @Column(name = "scheduled_at") - private Instant scheduledAt; - - /** Job ID returned by the Jobs Service after successful submission. */ - @Column(name = "job_id", length = 255) - private String jobId; - - /** - * Manual optimistic lock for the Scheduler claim. Incremented by the raw {@code claimOperation} - * UPDATE query; must NOT use JPA {@code @Version} since the claim bypasses JPA entity management. - */ - @Column(name = "version") - private Long version; -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java deleted file mode 100644 index 6ead5e42c..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsHistoryRow.java +++ /dev/null @@ -1,61 +0,0 @@ -package com.linkedin.openhouse.optimizer.entity; - -import com.linkedin.openhouse.optimizer.model.TableStats; -import com.vladmihalcea.hibernate.type.json.JsonStringType; -import java.time.Instant; -import javax.persistence.Column; -import javax.persistence.Entity; -import javax.persistence.Id; -import javax.persistence.Index; -import javax.persistence.Table; -import lombok.AccessLevel; -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.EqualsAndHashCode; -import lombok.Getter; -import lombok.NoArgsConstructor; -import org.hibernate.annotations.Type; -import org.hibernate.annotations.TypeDef; - -/** - * Append-only record of per-commit stats reported by the Tables Service. - * - *

Each Iceberg commit produces one row. The {@code stats} JSON contains both the snapshot - * metrics (point-in-time) and the commit delta (files added/deleted in this commit). Consumers can - * query this table to reconstruct change rates over arbitrary time windows. - */ -@TypeDef(name = "json", typeClass = JsonStringType.class) -@Entity -@Table( - name = "table_stats_history", - indexes = { - @Index(name = "idx_tsh_table_uuid", columnList = "table_uuid"), - @Index(name = "idx_tsh_recorded_at", columnList = "recorded_at") - }) -@Getter -@EqualsAndHashCode -@Builder(toBuilder = true) -@NoArgsConstructor(access = AccessLevel.PROTECTED) -@AllArgsConstructor(access = AccessLevel.PROTECTED) -public class TableStatsHistoryRow { - - @Id - @Column(name = "id", nullable = false, length = 36) - private String id; - - @Column(name = "table_uuid", nullable = false, length = 36) - private String tableUuid; - - @Column(name = "database_name", nullable = false, length = 128) - private String databaseName; - - @Column(name = "table_name", nullable = false, length = 128) - private String tableName; - - @Type(type = "json") - @Column(name = "stats", columnDefinition = "TEXT") - private TableStats stats; - - @Column(name = "recorded_at", nullable = false) - private Instant recordedAt; -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java deleted file mode 100644 index 2a1414567..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/TableStatsRow.java +++ /dev/null @@ -1,57 +0,0 @@ -package com.linkedin.openhouse.optimizer.entity; - -import com.linkedin.openhouse.optimizer.model.TableStats; -import com.vladmihalcea.hibernate.type.json.JsonStringType; -import java.time.Instant; -import java.util.Map; -import javax.persistence.Column; -import javax.persistence.Entity; -import javax.persistence.Id; -import javax.persistence.Table; -import lombok.AccessLevel; -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.EqualsAndHashCode; -import lombok.Getter; -import lombok.NoArgsConstructor; -import org.hibernate.annotations.Type; -import org.hibernate.annotations.TypeDef; - -/** - * JPA entity representing a per-table stats snapshot in the optimizer DB. - * - *

Written by the Tables Service on every Iceberg commit. Read by the Analyzer directly via JPA - * to enumerate tables and check scheduling eligibility. - */ -@TypeDef(name = "json", typeClass = JsonStringType.class) -@Entity -@Table(name = "table_stats") -@Getter -@EqualsAndHashCode -@Builder(toBuilder = true) -@NoArgsConstructor(access = AccessLevel.PROTECTED) -@AllArgsConstructor(access = AccessLevel.PROTECTED) -public class TableStatsRow { - - @Id - @Column(name = "table_uuid", nullable = false, length = 36) - private String tableUuid; - - @Column(name = "database_name", nullable = false, length = 128) - private String databaseName; - - @Column(name = "table_name", nullable = false, length = 128) - private String tableName; - - @Type(type = "json") - @Column(name = "stats", columnDefinition = "TEXT") - private TableStats stats; - - @Type(type = "json") - @Column(name = "table_properties", columnDefinition = "TEXT") - private Map tableProperties; - - /** Set on every upsert. Used for stats pipeline staleness monitoring. */ - @Column(name = "updated_at", nullable = false) - private Instant updatedAt; -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/package-info.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/package-info.java deleted file mode 100644 index 7c0ca1f67..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/entity/package-info.java +++ /dev/null @@ -1,2 +0,0 @@ -/** JPA entities for the optimizer service. */ -package com.linkedin.openhouse.optimizer.entity; From eee8ecae794fecdc7676e02c0fb286cd3c98e9fa Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 13:20:28 -0700 Subject: [PATCH 049/104] refactor(optimizer): remove DB schema + schema-init properties The DDL is part of the db/ layer's ownership (optimizer-1). Move the schema file and its schema-init properties out of optimizer-0 so this PR is purely api/ + model/. Delete: - src/main/resources/db/optimizer-schema.sql. - spring.sql.init.mode, spring.sql.init.schema-locations, and spring.jpa.defer-datasource-initialization from application.properties (they reference the deleted schema file). optimizer-1 re-introduces these alongside the db/ entities and repositories. --- .../src/main/resources/application.properties | 4 -- .../main/resources/db/optimizer-schema.sql | 56 ------------------- 2 files changed, 60 deletions(-) delete mode 100644 services/optimizer/src/main/resources/db/optimizer-schema.sql diff --git a/services/optimizer/src/main/resources/application.properties b/services/optimizer/src/main/resources/application.properties index c6c3f8437..00982d80e 100644 --- a/services/optimizer/src/main/resources/application.properties +++ b/services/optimizer/src/main/resources/application.properties @@ -2,10 +2,6 @@ spring.application.name=openhouse-optimizer-service server.port=8080 spring.jpa.hibernate.ddl-auto=none -spring.sql.init.mode=always -spring.jpa.defer-datasource-initialization=true -spring.sql.init.schema-locations=classpath:db/optimizer-schema.sql - spring.jpa.properties.hibernate.dialect=org.hibernate.dialect.MySQL8Dialect spring.jpa.properties.hibernate.show_sql=false spring.jpa.properties.hibernate.physical_naming_strategy=org.hibernate.boot.model.naming.PhysicalNamingStrategyStandardImpl diff --git a/services/optimizer/src/main/resources/db/optimizer-schema.sql b/services/optimizer/src/main/resources/db/optimizer-schema.sql deleted file mode 100644 index 322f3bf92..000000000 --- a/services/optimizer/src/main/resources/db/optimizer-schema.sql +++ /dev/null @@ -1,56 +0,0 @@ --- Optimizer Service Schema --- Compatible with MySQL (production) and H2 in MySQL mode (tests). -CREATE TABLE IF NOT EXISTS table_operations ( - id VARCHAR(36) NOT NULL, - table_uuid VARCHAR(36) NOT NULL, - database_name VARCHAR(128) NOT NULL, - table_name VARCHAR(128) NOT NULL, - operation_type VARCHAR(50) NOT NULL, - status VARCHAR(20) NOT NULL, - created_at TIMESTAMP(6) NOT NULL, - scheduled_at TIMESTAMP(6), - job_id VARCHAR(255), - version BIGINT, - -- TODO: per-operation metric columns will be added as operations are onboarded. - PRIMARY KEY (id) -); - -CREATE TABLE IF NOT EXISTS table_stats ( - table_uuid VARCHAR(36) NOT NULL, - database_name VARCHAR(128) NOT NULL, - table_name VARCHAR(128) NOT NULL, - stats TEXT, - table_properties TEXT, - updated_at TIMESTAMP(6) NOT NULL, - PRIMARY KEY (table_uuid) -); - -CREATE TABLE IF NOT EXISTS table_stats_history ( - id VARCHAR(36) NOT NULL, - table_uuid VARCHAR(36) NOT NULL, - database_name VARCHAR(128) NOT NULL, - table_name VARCHAR(128) NOT NULL, - stats TEXT, - recorded_at TIMESTAMP(6) NOT NULL, - PRIMARY KEY (id), - INDEX idx_tsh_table_uuid (table_uuid), - INDEX idx_tsh_recorded_at (recorded_at) -); - -CREATE TABLE IF NOT EXISTS table_operations_history ( - id VARCHAR(36) NOT NULL, - table_uuid VARCHAR(36) NOT NULL, - database_name VARCHAR(128) NOT NULL, - table_name VARCHAR(128) NOT NULL, - operation_type VARCHAR(50) NOT NULL, - completed_at TIMESTAMP(6) NOT NULL, - status VARCHAR(20) NOT NULL, - job_id VARCHAR(255), - result TEXT, - PRIMARY KEY (id), - INDEX idx_toph_db_table (database_name, table_name), - -- Drives TableOperationHistoryRepository.findLatestPerTable: the correlated - -- MAX(completed_at) subquery becomes an index-only lookup per (operation_type, - -- table_uuid) instead of an O(N²) scan. - INDEX idx_toph_optype_uuid_completed (operation_type, table_uuid, completed_at) -); From 328e5b91b4c1db0f5abf22e37a4dea787d351bef Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 13:23:10 -0700 Subject: [PATCH 050/104] refactor(optimizer): scrub MySQL / JPA / datasource references MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DB-layer dependencies belong to optimizer-1. With entities, schema, and the api/mapper deleted from this PR, the JPA + MySQL stack is unused — remove the dependency declarations and configuration that referenced them. build.gradle: - Drop spring-boot-starter-data-jpa, mysql-connector-java, the vladmihalcea hibernate-types JSON serializer, and the h2 test runtime. application.properties: - Drop spring.jpa.* and spring.datasource.* lines. Delete services/optimizer/src/test/resources/application-test.properties (H2 test datasource config — re-introduced on optimizer-1 alongside the repositories and repo tests). --- services/optimizer/build.gradle | 4 ---- .../src/main/resources/application.properties | 11 ----------- .../src/test/resources/application-test.properties | 12 ------------ 3 files changed, 27 deletions(-) delete mode 100644 services/optimizer/src/test/resources/application-test.properties diff --git a/services/optimizer/build.gradle b/services/optimizer/build.gradle index c05c7f9c3..2de8fd5c7 100644 --- a/services/optimizer/build.gradle +++ b/services/optimizer/build.gradle @@ -4,11 +4,7 @@ plugins { } dependencies { - implementation 'org.springframework.boot:spring-boot-starter-data-jpa:2.7.8' - implementation 'com.vladmihalcea:hibernate-types-55:2.21.1' implementation 'org.springframework.boot:spring-boot-starter-web:2.7.8' - implementation 'mysql:mysql-connector-java:8.+' - testImplementation 'com.h2database:h2:2.2.224' testImplementation 'org.springframework.boot:spring-boot-starter-test:2.7.8' } diff --git a/services/optimizer/src/main/resources/application.properties b/services/optimizer/src/main/resources/application.properties index 00982d80e..64c40d1f2 100644 --- a/services/optimizer/src/main/resources/application.properties +++ b/services/optimizer/src/main/resources/application.properties @@ -1,16 +1,5 @@ spring.application.name=openhouse-optimizer-service server.port=8080 -spring.jpa.hibernate.ddl-auto=none -spring.jpa.properties.hibernate.dialect=org.hibernate.dialect.MySQL8Dialect -spring.jpa.properties.hibernate.show_sql=false -spring.jpa.properties.hibernate.physical_naming_strategy=org.hibernate.boot.model.naming.PhysicalNamingStrategyStandardImpl - -spring.datasource.driver-class-name=com.mysql.cj.jdbc.Driver -spring.datasource.url=${OPTIMIZER_DB_URL:jdbc:mysql://localhost:3306/oh_db} -spring.datasource.username=${OPTIMIZER_DB_USERNAME:oh_user} -spring.datasource.password=${OPTIMIZER_DB_PASSWORD:oh_password} -spring.datasource.hikari.maximum-pool-size=20 - management.endpoints.web.exposure.include=health,prometheus management.endpoint.health.enabled=true diff --git a/services/optimizer/src/test/resources/application-test.properties b/services/optimizer/src/test/resources/application-test.properties deleted file mode 100644 index 97b7841dc..000000000 --- a/services/optimizer/src/test/resources/application-test.properties +++ /dev/null @@ -1,12 +0,0 @@ -spring.datasource.url=jdbc:h2:mem:optimizer_test;MODE=MySQL;DATABASE_TO_LOWER=TRUE;DB_CLOSE_DELAY=-1 -spring.datasource.driver-class-name=org.h2.Driver -spring.datasource.username=sa -spring.datasource.password= - -spring.jpa.hibernate.ddl-auto=none -spring.sql.init.mode=always -spring.jpa.defer-datasource-initialization=true -spring.jpa.properties.hibernate.dialect=org.hibernate.dialect.H2Dialect -spring.jpa.properties.hibernate.physical_naming_strategy=org.hibernate.boot.model.naming.PhysicalNamingStrategyStandardImpl - -spring.sql.init.schema-locations=classpath:db/optimizer-schema.sql From f7a5d208e106cb5c1c051bc450f14833be1bb093 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 13:25:41 -0700 Subject: [PATCH 051/104] refactor(optimizer): drop UpsertTableOperationsRequest MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No external system creates table operations — operations are written by the in-process analyzer directly through the model layer. The request type has no wire consumer and no internal consumer, so it's dead code. Delete services/optimizer/.../api/model/UpsertTableOperationsRequest.java. --- .../model/UpsertTableOperationsRequest.java | 31 ------------------- 1 file changed, 31 deletions(-) delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableOperationsRequest.java diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableOperationsRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableOperationsRequest.java deleted file mode 100644 index 21174c337..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableOperationsRequest.java +++ /dev/null @@ -1,31 +0,0 @@ -package com.linkedin.openhouse.optimizer.api.model; - -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.Data; -import lombok.NoArgsConstructor; - -/** - * PUT request body for {@code /v1/table-operations/{id}}. - * - *

The Analyzer supplies the operation {@code id} (client-generated UUID) in the path and all - * table-identifying fields in this body. The service creates the row on first call. - */ -@Data -@Builder -@NoArgsConstructor -@AllArgsConstructor -public class UpsertTableOperationsRequest { - - /** Stable Iceberg table UUID identifying the target table. */ - private String tableUuid; - - /** Denormalized database name for display. */ - private String databaseName; - - /** Denormalized table name for display. */ - private String tableName; - - /** The type of maintenance operation to create. */ - private OperationType operationType; -} From 2a532b577ed51507c72e836ea4d8778967f43062 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 13:47:42 -0700 Subject: [PATCH 052/104] refactor(optimizer): drop JobResult from the wire and internal model MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit JobResult is removed from the optimizer API. CompleteOperationRequest (user-edited) now carries only operationId + status — the failure detail abstraction has been retired. The internal model and DTOs no longer carry it either, and the type itself is deleted from both api/ and model/. CompleteOperationRequest: - operationId moved from path to body (user manual edit). - jobId field removed. - result field removed. api/model/TableOperationsHistoryDto: - Drop jobId and result fields. model/TableOperationsHistory: - Drop jobId and result fields. model/mapper/ApiModelMapper: - Remove toModelJobResult / toApiJobResult helpers + JobResult import. - toHistory()/toDto() no longer touch jobId or result. Delete: - services/optimizer/.../api/model/JobResult.java - services/optimizer/.../model/JobResult.java Downstream propagation: opt-2's service signature changes (completeOperation now takes only the request body); db/HistoryStatus remains needed on opt-1 but db/JobResult no longer is. See memory/tasks/mkuchenb-optimizer-3-fixes.md for the full propagation list. --- .../api/model/CompleteOperationRequest.java | 15 ++++++----- .../optimizer/api/model/JobResult.java | 25 ----------------- .../api/model/TableOperationsHistoryDto.java | 6 ----- .../openhouse/optimizer/model/JobResult.java | 25 ----------------- .../model/TableOperationsHistory.java | 6 ----- .../model/mapper/ApiModelMapper.java | 27 ------------------- 6 files changed, 9 insertions(+), 95 deletions(-) delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/JobResult.java delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/JobResult.java diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java index 4f3f6535a..30648d497 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java @@ -8,8 +8,12 @@ /** * Request body for {@code POST /v1/table-operations/{id}/complete}. * - *

Reports the outcome of a completed operation. The backend looks up the operation row by {@code - * id} and writes a history entry with the operation's table metadata and the supplied result. + *

Reports the outcome of a single completed operation. The path's {@code id} is the per-cycle + * operation UUID — the service looks up that one row and writes a history entry for it. + * + *

A single Spark job typically processes N tables and yields N independent (status, result) + * pairs — one per operation. Callers issue one complete request per operation; the service does not + * bulk-complete by job. */ @Data @Builder @@ -17,9 +21,8 @@ @AllArgsConstructor public class CompleteOperationRequest { - /** Outcome of the operation. */ - private HistoryStatus status; + private String operationId; - /** Error details on failure; {@code null} on success. */ - private JobResult result; + /** Terminal outcome for this single operation. */ + private HistoryStatus status; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/JobResult.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/JobResult.java deleted file mode 100644 index 74942243c..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/JobResult.java +++ /dev/null @@ -1,25 +0,0 @@ -package com.linkedin.openhouse.optimizer.api.model; - -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.Data; -import lombok.NoArgsConstructor; - -/** - * Result payload for a completed Spark maintenance job. - * - *

Stored as JSON in the {@code result} column of {@code table_operations_history}. Both fields - * are {@code null} on success; populated on failure. - */ -@Data -@Builder -@NoArgsConstructor -@AllArgsConstructor -public class JobResult { - - /** Human-readable error message; {@code null} if the job succeeded. */ - private String errorMessage; - - /** Error category (e.g., {@code OOM}, {@code TIMEOUT}); {@code null} if the job succeeded. */ - private String errorType; -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java index a7a9d9dc6..d9fa1f387 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java @@ -28,10 +28,4 @@ public class TableOperationsHistoryDto { /** {@code SUCCESS} or {@code FAILED}. */ private HistoryStatus status; - - /** Job ID from the Jobs Service. */ - private String jobId; - - /** Job result payload; both fields null on success. */ - private JobResult result; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/JobResult.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/JobResult.java deleted file mode 100644 index 7e48dd0ef..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/JobResult.java +++ /dev/null @@ -1,25 +0,0 @@ -package com.linkedin.openhouse.optimizer.model; - -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.Data; -import lombok.NoArgsConstructor; - -/** - * Internal-model result payload for a completed Spark maintenance job. - * - *

Internal-layer copy of the structured result. Both fields are {@code null} on success; - * populated on failure. Intentionally separate from the wire-API and DB representations. - */ -@Data -@Builder -@NoArgsConstructor -@AllArgsConstructor -public class JobResult { - - /** Human-readable error message; {@code null} if the job succeeded. */ - private String errorMessage; - - /** Error category (e.g., {@code OOM}, {@code TIMEOUT}); {@code null} if the job succeeded. */ - private String errorType; -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java index 64e0d57b3..fe5bee5f7 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java @@ -38,10 +38,4 @@ public class TableOperationsHistory { /** Terminal outcome: {@link HistoryStatus#SUCCESS} or {@link HistoryStatus#FAILED}. */ private HistoryStatus status; - - /** Spark job ID for the run that produced this record. */ - private String jobId; - - /** Job result payload; both inner fields {@code null} on success. */ - private JobResult result; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java index 2ae477e0d..35af7fb25 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java @@ -3,7 +3,6 @@ import com.linkedin.openhouse.optimizer.api.model.TableOperationsDto; import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; import com.linkedin.openhouse.optimizer.model.HistoryStatus; -import com.linkedin.openhouse.optimizer.model.JobResult; import com.linkedin.openhouse.optimizer.model.OperationStatus; import com.linkedin.openhouse.optimizer.model.OperationType; import com.linkedin.openhouse.optimizer.model.TableOperation; @@ -72,8 +71,6 @@ public TableOperationsHistory toHistory(TableOperationsHistoryDto dto) { .operationType(toModelOperationType(dto.getOperationType())) .completedAt(dto.getCompletedAt()) .status(toModelHistoryStatus(dto.getStatus())) - .jobId(dto.getJobId()) - .result(toModelJobResult(dto.getResult())) .build(); } @@ -89,8 +86,6 @@ public TableOperationsHistoryDto toDto(TableOperationsHistory history) { .operationType(toApiOperationType(history.getOperationType())) .completedAt(history.getCompletedAt()) .status(toApiHistoryStatus(history.getStatus())) - .jobId(history.getJobId()) - .result(toApiJobResult(history.getResult())) .build(); } @@ -154,28 +149,6 @@ public com.linkedin.openhouse.optimizer.api.model.HistoryStatus toApiHistoryStat : com.linkedin.openhouse.optimizer.api.model.HistoryStatus.valueOf(modelValue.name()); } - // --- JobResult --- - - public JobResult toModelJobResult(com.linkedin.openhouse.optimizer.api.model.JobResult apiValue) { - if (apiValue == null) { - return null; - } - return JobResult.builder() - .errorMessage(apiValue.getErrorMessage()) - .errorType(apiValue.getErrorType()) - .build(); - } - - public com.linkedin.openhouse.optimizer.api.model.JobResult toApiJobResult(JobResult modelValue) { - if (modelValue == null) { - return null; - } - return com.linkedin.openhouse.optimizer.api.model.JobResult.builder() - .errorMessage(modelValue.getErrorMessage()) - .errorType(modelValue.getErrorType()) - .build(); - } - // --- TableStats inner classes --- private TableStats.SnapshotMetrics toModelSnapshot( From 2e3a2316295d67105802f4a4c73032396048be9d Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 13:52:15 -0700 Subject: [PATCH 053/104] feat(optimizer): add debug echo fields to CompleteOperationRequest MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add tableUuid, databaseName, tableName, and operationType to the complete request body. They're debug-only — the server keys lookup off operationId — but preserving them on logs and traces helps an operator diagnose a failing complete call without joining back to the operation row. --- .../api/model/CompleteOperationRequest.java | 29 +++++++++++++++---- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java index 30648d497..0add634b5 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java @@ -6,14 +6,20 @@ import lombok.NoArgsConstructor; /** - * Request body for {@code POST /v1/table-operations/{id}/complete}. + * Request body for {@code POST /v1/table-operations/complete}. * - *

Reports the outcome of a single completed operation. The path's {@code id} is the per-cycle - * operation UUID — the service looks up that one row and writes a history entry for it. + *

Reports the outcome of a single completed operation. The service looks up the operation row by + * {@link #operationId} and writes a history entry for it. * - *

A single Spark job typically processes N tables and yields N independent (status, result) - * pairs — one per operation. Callers issue one complete request per operation; the service does not + *

A single Spark job typically processes N tables and yields N independent (status) outcomes — + * one per operation. Callers issue one complete request per operation; the service does not * bulk-complete by job. + * + *

The remaining fields ({@link #tableUuid}, {@link #databaseName}, {@link #tableName}, {@link + * #operationType}) are debug-only echo information. The server does not key off them; they are + * preserved on log lines and traces so an operator looking at a failing complete call can see which + * (db, table, operation) the caller believed it was completing without joining back to the + * operation row. */ @Data @Builder @@ -21,8 +27,21 @@ @AllArgsConstructor public class CompleteOperationRequest { + /** Operation row's UUID — the primary lookup key. */ private String operationId; /** Terminal outcome for this single operation. */ private HistoryStatus status; + + /** Debug echo: stable table identity the caller believed it was completing. */ + private String tableUuid; + + /** Debug echo: database name. */ + private String databaseName; + + /** Debug echo: table name. */ + private String tableName; + + /** Debug echo: operation type. */ + private OperationType operationType; } From db5eb2959a0fbbfba5d821ee36f00435248f9f5c Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 13:53:16 -0700 Subject: [PATCH 054/104] refactor(optimizer): move application.properties out of optimizer-0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Every line in application.properties is run-time config (server.port, spring.application.name, actuator endpoints). optimizer-0 has no controllers and no endpoint to serve — the file is doing nothing here. The first PR that actually runs a web service is optimizer-2. Delete the file from this PR. optimizer-2 will re-introduce it alongside the REST controllers. The OptimizerServiceApplication @SpringBootApplication shell stays on this branch — optimizer-1's repository tests use @SpringBootTest and need an application class to discover. --- services/optimizer/src/main/resources/application.properties | 5 ----- 1 file changed, 5 deletions(-) delete mode 100644 services/optimizer/src/main/resources/application.properties diff --git a/services/optimizer/src/main/resources/application.properties b/services/optimizer/src/main/resources/application.properties deleted file mode 100644 index 64c40d1f2..000000000 --- a/services/optimizer/src/main/resources/application.properties +++ /dev/null @@ -1,5 +0,0 @@ -spring.application.name=openhouse-optimizer-service -server.port=8080 - -management.endpoints.web.exposure.include=health,prometheus -management.endpoint.health.enabled=true From ac3abc06fec4b3cb1433649e16536a3e0008a4a2 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 14:04:51 -0700 Subject: [PATCH 055/104] feat(optimizer): introduce db/ layer with per-layer types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit optimizer-0 retired entity/, the schema, JPA/MySQL deps, and the api/mapper. This PR brings the DB layer back as db/ with its own self-contained types and a model↔db boundary mapper. db/ package: - TableOperationsRow, TableOperationsHistoryRow, TableStatsRow, TableStatsHistoryRow — JPA entities (same field set as the pre-deletion entity/ versions, with two exceptions: enum fields on the operations rows are now typed db/-side enums via @Enumerated(STRING), and TableOperationsHistoryRow loses the jobId/result columns since they were removed from the wire on optimizer-0). - OperationType, OperationStatus, HistoryStatus — db-layer enums. - TableStats (+ inner SnapshotMetrics, CommitDelta) — db-layer JSON payload, mirrors the model/ + api/ counterparts in shape but is its own class. model/mapper/ModelDbMapper: - Translates between model/ domain objects and db/ rows. - Lives in model/ per the boundary rule (model/ owns conversions to both edges; api/, model/, db/ data types are self-contained). Repositories: imports switched to db/; find() and findLatestPerTable take typed db enums instead of String. Repository tests: builders pass typed db enums; remove jobId/result fields no longer on TableOperationsHistoryRow. Schema (db/optimizer-schema.sql): restored. table_operations_history no longer has job_id / result columns. The idx_toph_optype_uuid_completed index for findLatestPerTable is preserved. build.gradle: restore spring-boot-starter-data-jpa, hibernate-types, mysql-connector-java, h2 dependencies. application-test.properties: restored (H2 test datasource). --- services/optimizer/build.gradle | 4 + .../openhouse/optimizer/db/HistoryStatus.java | 11 + .../optimizer/db/OperationStatus.java | 13 + .../openhouse/optimizer/db/OperationType.java | 12 + .../db/TableOperationsHistoryRow.java | 69 +++++ .../optimizer/db/TableOperationsRow.java | 87 +++++++ .../openhouse/optimizer/db/TableStats.java | 55 ++++ .../optimizer/db/TableStatsHistoryRow.java | 63 +++++ .../openhouse/optimizer/db/TableStatsRow.java | 59 +++++ .../optimizer/model/mapper/ModelDbMapper.java | 235 ++++++++++++++++++ .../TableOperationsHistoryRepository.java | 6 +- .../repository/TableOperationsRepository.java | 8 +- .../TableStatsHistoryRepository.java | 2 +- .../repository/TableStatsRepository.java | 2 +- .../main/resources/db/optimizer-schema.sql | 54 ++++ .../TableOperationsHistoryRepositoryTest.java | 47 ++-- .../TableOperationsRepositoryTest.java | 44 ++-- .../TableStatsHistoryRepositoryTest.java | 4 +- .../repository/TableStatsRepositoryTest.java | 4 +- .../resources/application-test.properties | 12 + 20 files changed, 734 insertions(+), 57 deletions(-) create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/HistoryStatus.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/OperationStatus.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/OperationType.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableOperationsHistoryRow.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableOperationsRow.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStats.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsHistoryRow.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsRow.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ModelDbMapper.java create mode 100644 services/optimizer/src/main/resources/db/optimizer-schema.sql create mode 100644 services/optimizer/src/test/resources/application-test.properties diff --git a/services/optimizer/build.gradle b/services/optimizer/build.gradle index 2de8fd5c7..c05c7f9c3 100644 --- a/services/optimizer/build.gradle +++ b/services/optimizer/build.gradle @@ -4,7 +4,11 @@ plugins { } dependencies { + implementation 'org.springframework.boot:spring-boot-starter-data-jpa:2.7.8' + implementation 'com.vladmihalcea:hibernate-types-55:2.21.1' implementation 'org.springframework.boot:spring-boot-starter-web:2.7.8' + implementation 'mysql:mysql-connector-java:8.+' + testImplementation 'com.h2database:h2:2.2.224' testImplementation 'org.springframework.boot:spring-boot-starter-test:2.7.8' } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/HistoryStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/HistoryStatus.java new file mode 100644 index 000000000..94e573968 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/HistoryStatus.java @@ -0,0 +1,11 @@ +package com.linkedin.openhouse.optimizer.db; + +/** + * DB-layer enum for the {@code status} column of {@code table_operations_history}. + * + *

Self-contained: no references to api/ or model/ types. + */ +public enum HistoryStatus { + SUCCESS, + FAILED +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/OperationStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/OperationStatus.java new file mode 100644 index 000000000..4e9161693 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/OperationStatus.java @@ -0,0 +1,13 @@ +package com.linkedin.openhouse.optimizer.db; + +/** + * DB-layer enum for the {@code status} column of {@code table_operations}. + * + *

Self-contained: no references to api/ or model/ types. + */ +public enum OperationStatus { + PENDING, + SCHEDULING, + SCHEDULED, + CANCELED +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/OperationType.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/OperationType.java new file mode 100644 index 000000000..3a896e415 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/OperationType.java @@ -0,0 +1,12 @@ +package com.linkedin.openhouse.optimizer.db; + +/** + * DB-layer enum for the operation types persisted in {@code table_operations.operation_type} and + * {@code table_operations_history.operation_type}. + * + *

Self-contained: no references to api/ or model/ types. JPA binds this via + * {@code @Enumerated(EnumType.STRING)}. + */ +public enum OperationType { + ORPHAN_FILES_DELETION +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableOperationsHistoryRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableOperationsHistoryRow.java new file mode 100644 index 000000000..2e1230181 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableOperationsHistoryRow.java @@ -0,0 +1,69 @@ +package com.linkedin.openhouse.optimizer.db; + +import java.time.Instant; +import javax.persistence.Column; +import javax.persistence.Entity; +import javax.persistence.EnumType; +import javax.persistence.Enumerated; +import javax.persistence.Id; +import javax.persistence.Index; +import javax.persistence.Table; +import lombok.AccessLevel; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.NoArgsConstructor; + +/** + * Append-only record of a completed maintenance operation. + * + *

Written when the operation-complete endpoint is called. The {@code id} is the same UUID as the + * originating live-operations row, tying each history entry back to the operation cycle that + * produced it. Multiple runs of the same operation on the same table produce multiple rows. + * + *

Self-contained DB-layer type: enums are {@link OperationType} / {@link HistoryStatus} from the + * same package, JPA-bound as strings. + */ +@Entity +@Table( + name = "table_operations_history", + indexes = { + @Index(name = "idx_table_uuid_hist", columnList = "table_uuid"), + @Index(name = "idx_op_type_hist", columnList = "operation_type"), + @Index(name = "idx_completed_at", columnList = "completed_at"), + @Index(name = "idx_status_hist", columnList = "status"), + @Index(name = "idx_toph_db_table", columnList = "database_name, table_name") + }) +@Getter +@EqualsAndHashCode +@Builder(toBuilder = true) +@NoArgsConstructor(access = AccessLevel.PROTECTED) +@AllArgsConstructor(access = AccessLevel.PROTECTED) +public class TableOperationsHistoryRow { + + /** Same UUID as the originating live-operations row. Set by the caller; not generated. */ + @Id + @Column(name = "id", nullable = false, length = 36) + private String id; + + @Column(name = "table_uuid", nullable = false, length = 36) + private String tableUuid; + + @Column(name = "database_name", nullable = false, length = 128) + private String databaseName; + + @Column(name = "table_name", nullable = false, length = 128) + private String tableName; + + @Enumerated(EnumType.STRING) + @Column(name = "operation_type", nullable = false, length = 50) + private OperationType operationType; + + @Column(name = "completed_at", nullable = false) + private Instant completedAt; + + @Enumerated(EnumType.STRING) + @Column(name = "status", nullable = false, length = 20) + private HistoryStatus status; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableOperationsRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableOperationsRow.java new file mode 100644 index 000000000..9652214d3 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableOperationsRow.java @@ -0,0 +1,87 @@ +package com.linkedin.openhouse.optimizer.db; + +import java.time.Instant; +import javax.persistence.Column; +import javax.persistence.Entity; +import javax.persistence.EnumType; +import javax.persistence.Enumerated; +import javax.persistence.Id; +import javax.persistence.Index; +import javax.persistence.Table; +import lombok.AccessLevel; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.NoArgsConstructor; + +/** + * JPA entity representing an Analyzer recommendation for a table maintenance operation. + * + *

Each row is identified by a client-generated UUID ({@code id}). The Analyzer creates a new row + * when it first recommends an operation for a table, or when re-recommending after a prior terminal + * state. {@code table_uuid} is the stable identity for the table (survives renames; rotates on + * drop+recreate). The application enforces one active (PENDING / SCHEDULING / SCHEDULED) row per + * {@code (table_uuid, operation_type)} at a time. + * + *

Self-contained DB-layer type: enums are {@link OperationType} / {@link OperationStatus} from + * the same package, JPA-bound as strings. + */ +@Entity +@Table( + name = "table_operations", + indexes = { + @Index(name = "idx_table_uuid", columnList = "table_uuid"), + @Index(name = "idx_op_type", columnList = "operation_type"), + @Index(name = "idx_status", columnList = "status"), + @Index(name = "idx_created_at", columnList = "created_at"), + @Index(name = "idx_scheduled_at", columnList = "scheduled_at") + }) +@Getter +@EqualsAndHashCode +@Builder(toBuilder = true) +@NoArgsConstructor(access = AccessLevel.PROTECTED) +@AllArgsConstructor(access = AccessLevel.PROTECTED) +public class TableOperationsRow { + + @Id + @Column(name = "id", nullable = false, length = 36) + private String id; + + @Column(name = "table_uuid", nullable = false, length = 36) + private String tableUuid; + + @Column(name = "database_name", nullable = false, length = 128) + private String databaseName; + + @Column(name = "table_name", nullable = false, length = 128) + private String tableName; + + @Enumerated(EnumType.STRING) + @Column(name = "operation_type", nullable = false, length = 50) + private OperationType operationType; + + @Enumerated(EnumType.STRING) + @Column(name = "status", nullable = false, length = 20) + private OperationStatus status; + + @Column(name = "created_at", nullable = false) + private Instant createdAt; + + @Column(name = "scheduled_at") + private Instant scheduledAt; + + /** Spark job ID written by the scheduler at claim time. Internal-only; never exposed on wire. */ + @Column(name = "job_id", length = 255) + private String jobId; + + /** + * Monotonically-increasing version for application-level optimistic concurrency control. The + * scheduler's batch CAS transitions match this in the WHERE clause and bump it by one on UPDATE, + * ensuring two scheduler instances can't both move the same row out of PENDING. Not managed by + * JPA optimistic locking — kept as a plain column so the WHERE-clause-based CAS pattern works + * portably across MySQL and H2. + */ + @Column(name = "version") + private Long version; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStats.java new file mode 100644 index 000000000..ceebb5ad5 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStats.java @@ -0,0 +1,55 @@ +package com.linkedin.openhouse.optimizer.db; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** + * DB-layer stats payload — stored as a JSON blob in the {@code stats} column of {@code table_stats} + * and {@code table_stats_history}. + * + *

Self-contained: no references to api/ or model/ types. + */ +@Data +@Builder(toBuilder = true) +@NoArgsConstructor +@AllArgsConstructor +@JsonIgnoreProperties(ignoreUnknown = true) +public class TableStats { + + /** Snapshot fields — overwritten on every upsert. */ + private SnapshotMetrics snapshot; + + /** Delta fields — accumulated across commit events. */ + private CommitDelta delta; + + /** Point-in-time metadata read from Iceberg at scan time. */ + @Data + @Builder(toBuilder = true) + @NoArgsConstructor + @AllArgsConstructor + @JsonIgnoreProperties(ignoreUnknown = true) + public static class SnapshotMetrics { + private String clusterId; + private String tableVersion; + private String tableLocation; + private Long tableSizeBytes; + /** Total number of data files as of the latest snapshot. */ + private Long numCurrentFiles; + } + + /** Per-commit incremental counters; accumulated across all recorded commit events. */ + @Data + @Builder(toBuilder = true) + @NoArgsConstructor + @AllArgsConstructor + @JsonIgnoreProperties(ignoreUnknown = true) + public static class CommitDelta { + private Long numFilesAdded; + private Long numFilesDeleted; + private Long addedSizeBytes; + private Long deletedSizeBytes; + } +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsHistoryRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsHistoryRow.java new file mode 100644 index 000000000..2b7628de1 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsHistoryRow.java @@ -0,0 +1,63 @@ +package com.linkedin.openhouse.optimizer.db; + +import com.vladmihalcea.hibernate.type.json.JsonStringType; +import java.time.Instant; +import javax.persistence.Column; +import javax.persistence.Entity; +import javax.persistence.Id; +import javax.persistence.Index; +import javax.persistence.Table; +import lombok.AccessLevel; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.NoArgsConstructor; +import org.hibernate.annotations.Type; +import org.hibernate.annotations.TypeDef; + +/** + * Append-only record of per-commit stats reported by the Tables Service. + * + *

Each Iceberg commit produces one row. The {@code stats} JSON contains both the snapshot + * metrics (point-in-time) and the commit delta (files added/deleted in this commit). Consumers can + * query this table to reconstruct change rates over arbitrary time windows. + * + *

Self-contained DB-layer type: the JSON payload type is {@link TableStats} from the same + * package. + */ +@TypeDef(name = "json", typeClass = JsonStringType.class) +@Entity +@Table( + name = "table_stats_history", + indexes = { + @Index(name = "idx_tsh_table_uuid", columnList = "table_uuid"), + @Index(name = "idx_tsh_recorded_at", columnList = "recorded_at") + }) +@Getter +@EqualsAndHashCode +@Builder(toBuilder = true) +@NoArgsConstructor(access = AccessLevel.PROTECTED) +@AllArgsConstructor(access = AccessLevel.PROTECTED) +public class TableStatsHistoryRow { + + @Id + @Column(name = "id", nullable = false, length = 36) + private String id; + + @Column(name = "table_uuid", nullable = false, length = 36) + private String tableUuid; + + @Column(name = "database_name", nullable = false, length = 128) + private String databaseName; + + @Column(name = "table_name", nullable = false, length = 128) + private String tableName; + + @Type(type = "json") + @Column(name = "stats", columnDefinition = "TEXT") + private TableStats stats; + + @Column(name = "recorded_at", nullable = false) + private Instant recordedAt; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsRow.java new file mode 100644 index 000000000..950cf5327 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsRow.java @@ -0,0 +1,59 @@ +package com.linkedin.openhouse.optimizer.db; + +import com.vladmihalcea.hibernate.type.json.JsonStringType; +import java.time.Instant; +import java.util.Map; +import javax.persistence.Column; +import javax.persistence.Entity; +import javax.persistence.Id; +import javax.persistence.Table; +import lombok.AccessLevel; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.NoArgsConstructor; +import org.hibernate.annotations.Type; +import org.hibernate.annotations.TypeDef; + +/** + * JPA entity representing a per-table stats snapshot in the optimizer DB. + * + *

Written by the Tables Service on every Iceberg commit. Read by the Analyzer directly via JPA + * to enumerate tables and check scheduling eligibility. + * + *

Self-contained DB-layer type: the JSON payload type is {@link TableStats} from the same + * package. + */ +@TypeDef(name = "json", typeClass = JsonStringType.class) +@Entity +@Table(name = "table_stats") +@Getter +@EqualsAndHashCode +@Builder(toBuilder = true) +@NoArgsConstructor(access = AccessLevel.PROTECTED) +@AllArgsConstructor(access = AccessLevel.PROTECTED) +public class TableStatsRow { + + @Id + @Column(name = "table_uuid", nullable = false, length = 36) + private String tableUuid; + + @Column(name = "database_name", nullable = false, length = 128) + private String databaseName; + + @Column(name = "table_name", nullable = false, length = 128) + private String tableName; + + @Type(type = "json") + @Column(name = "stats", columnDefinition = "TEXT") + private TableStats stats; + + @Type(type = "json") + @Column(name = "table_properties", columnDefinition = "TEXT") + private Map tableProperties; + + /** Set on every upsert. Used for stats pipeline staleness monitoring. */ + @Column(name = "updated_at", nullable = false) + private Instant updatedAt; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ModelDbMapper.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ModelDbMapper.java new file mode 100644 index 000000000..f77773928 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ModelDbMapper.java @@ -0,0 +1,235 @@ +package com.linkedin.openhouse.optimizer.model.mapper; + +import com.linkedin.openhouse.optimizer.db.TableOperationsHistoryRow; +import com.linkedin.openhouse.optimizer.db.TableOperationsRow; +import com.linkedin.openhouse.optimizer.db.TableStatsHistoryRow; +import com.linkedin.openhouse.optimizer.db.TableStatsRow; +import com.linkedin.openhouse.optimizer.model.HistoryStatus; +import com.linkedin.openhouse.optimizer.model.OperationStatus; +import com.linkedin.openhouse.optimizer.model.OperationType; +import com.linkedin.openhouse.optimizer.model.Table; +import com.linkedin.openhouse.optimizer.model.TableOperation; +import com.linkedin.openhouse.optimizer.model.TableOperationsHistory; +import com.linkedin.openhouse.optimizer.model.TableStats; +import java.util.Collections; +import org.springframework.stereotype.Component; + +/** + * Converts between internal {@code model/} domain objects and database row entities. + * + *

The only place inside {@code model/} where {@code db/} types are referenced — this is the + * boundary at which the internal model meets the database layer. Pure data types under {@code + * model/} stay free of any DB-side imports. + * + *

Each layer carries its own per-layer enum + payload types. This mapper translates between + * model/-side and db/-side counterparts by name. + */ +@Component +public class ModelDbMapper { + + // --- TableOperationsRow <-> TableOperation --- + + public TableOperation toOperation(TableOperationsRow row) { + if (row == null) { + return null; + } + return TableOperation.builder() + .id(row.getId()) + .tableUuid(row.getTableUuid()) + .databaseName(row.getDatabaseName()) + .tableName(row.getTableName()) + .operationType(toModelOperationType(row.getOperationType())) + .status(toModelOperationStatus(row.getStatus())) + .createdAt(row.getCreatedAt()) + .scheduledAt(row.getScheduledAt()) + .build(); + } + + public TableOperationsRow toRow(TableOperation op) { + if (op == null) { + return null; + } + return TableOperationsRow.builder() + .id(op.getId()) + .tableUuid(op.getTableUuid()) + .databaseName(op.getDatabaseName()) + .tableName(op.getTableName()) + .operationType(toDbOperationType(op.getOperationType())) + .status(toDbOperationStatus(op.getStatus())) + .createdAt(op.getCreatedAt()) + .scheduledAt(op.getScheduledAt()) + .version(0L) + .build(); + } + + // --- TableOperationsHistoryRow <-> TableOperationsHistory --- + + public TableOperationsHistory toHistory(TableOperationsHistoryRow row) { + if (row == null) { + return null; + } + return TableOperationsHistory.builder() + .id(row.getId()) + .tableUuid(row.getTableUuid()) + .databaseName(row.getDatabaseName()) + .tableName(row.getTableName()) + .operationType(toModelOperationType(row.getOperationType())) + .completedAt(row.getCompletedAt()) + .status(toModelHistoryStatus(row.getStatus())) + .build(); + } + + public TableOperationsHistoryRow toRow(TableOperationsHistory history) { + if (history == null) { + return null; + } + return TableOperationsHistoryRow.builder() + .id(history.getId()) + .tableUuid(history.getTableUuid()) + .databaseName(history.getDatabaseName()) + .tableName(history.getTableName()) + .operationType(toDbOperationType(history.getOperationType())) + .completedAt(history.getCompletedAt()) + .status(toDbHistoryStatus(history.getStatus())) + .build(); + } + + // --- TableStatsRow -> Table --- + + public Table toTable(TableStatsRow row) { + if (row == null) { + return null; + } + return Table.builder() + .tableUuid(row.getTableUuid()) + .databaseName(row.getDatabaseName()) + .tableId(row.getTableName()) + .tableProperties( + row.getTableProperties() != null ? row.getTableProperties() : Collections.emptyMap()) + .stats(toModelStats(row.getStats())) + .build(); + } + + // --- TableStats payload --- + + public TableStats toModelStats(com.linkedin.openhouse.optimizer.db.TableStats dbStats) { + if (dbStats == null) { + return null; + } + return TableStats.builder() + .snapshot(toModelSnapshot(dbStats.getSnapshot())) + .delta(toModelDelta(dbStats.getDelta())) + .build(); + } + + public com.linkedin.openhouse.optimizer.db.TableStats toDbStats(TableStats modelStats) { + if (modelStats == null) { + return null; + } + return com.linkedin.openhouse.optimizer.db.TableStats.builder() + .snapshot(toDbSnapshot(modelStats.getSnapshot())) + .delta(toDbDelta(modelStats.getDelta())) + .build(); + } + + public TableStatsHistoryRow toStatsHistoryRow( + String id, + String tableUuid, + String databaseName, + String tableName, + TableStats stats, + java.time.Instant recordedAt) { + return TableStatsHistoryRow.builder() + .id(id) + .tableUuid(tableUuid) + .databaseName(databaseName) + .tableName(tableName) + .stats(toDbStats(stats)) + .recordedAt(recordedAt) + .build(); + } + + // --- enum helpers --- + + public OperationType toModelOperationType(com.linkedin.openhouse.optimizer.db.OperationType v) { + return v == null ? null : OperationType.valueOf(v.name()); + } + + public com.linkedin.openhouse.optimizer.db.OperationType toDbOperationType(OperationType v) { + return v == null ? null : com.linkedin.openhouse.optimizer.db.OperationType.valueOf(v.name()); + } + + public OperationStatus toModelOperationStatus( + com.linkedin.openhouse.optimizer.db.OperationStatus v) { + return v == null ? null : OperationStatus.valueOf(v.name()); + } + + public com.linkedin.openhouse.optimizer.db.OperationStatus toDbOperationStatus( + OperationStatus v) { + return v == null ? null : com.linkedin.openhouse.optimizer.db.OperationStatus.valueOf(v.name()); + } + + public HistoryStatus toModelHistoryStatus(com.linkedin.openhouse.optimizer.db.HistoryStatus v) { + return v == null ? null : HistoryStatus.valueOf(v.name()); + } + + public com.linkedin.openhouse.optimizer.db.HistoryStatus toDbHistoryStatus(HistoryStatus v) { + return v == null ? null : com.linkedin.openhouse.optimizer.db.HistoryStatus.valueOf(v.name()); + } + + // --- TableStats inner classes --- + + private TableStats.SnapshotMetrics toModelSnapshot( + com.linkedin.openhouse.optimizer.db.TableStats.SnapshotMetrics v) { + if (v == null) { + return null; + } + return TableStats.SnapshotMetrics.builder() + .clusterId(v.getClusterId()) + .tableVersion(v.getTableVersion()) + .tableLocation(v.getTableLocation()) + .tableSizeBytes(v.getTableSizeBytes()) + .numCurrentFiles(v.getNumCurrentFiles()) + .build(); + } + + private com.linkedin.openhouse.optimizer.db.TableStats.SnapshotMetrics toDbSnapshot( + TableStats.SnapshotMetrics v) { + if (v == null) { + return null; + } + return com.linkedin.openhouse.optimizer.db.TableStats.SnapshotMetrics.builder() + .clusterId(v.getClusterId()) + .tableVersion(v.getTableVersion()) + .tableLocation(v.getTableLocation()) + .tableSizeBytes(v.getTableSizeBytes()) + .numCurrentFiles(v.getNumCurrentFiles()) + .build(); + } + + private TableStats.CommitDelta toModelDelta( + com.linkedin.openhouse.optimizer.db.TableStats.CommitDelta v) { + if (v == null) { + return null; + } + return TableStats.CommitDelta.builder() + .numFilesAdded(v.getNumFilesAdded()) + .numFilesDeleted(v.getNumFilesDeleted()) + .addedSizeBytes(v.getAddedSizeBytes()) + .deletedSizeBytes(v.getDeletedSizeBytes()) + .build(); + } + + private com.linkedin.openhouse.optimizer.db.TableStats.CommitDelta toDbDelta( + TableStats.CommitDelta v) { + if (v == null) { + return null; + } + return com.linkedin.openhouse.optimizer.db.TableStats.CommitDelta.builder() + .numFilesAdded(v.getNumFilesAdded()) + .numFilesDeleted(v.getNumFilesDeleted()) + .addedSizeBytes(v.getAddedSizeBytes()) + .deletedSizeBytes(v.getDeletedSizeBytes()) + .build(); + } +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java index ba2ce35a8..5faf349e3 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java @@ -1,6 +1,7 @@ package com.linkedin.openhouse.optimizer.repository; -import com.linkedin.openhouse.optimizer.entity.TableOperationsHistoryRow; +import com.linkedin.openhouse.optimizer.db.OperationType; +import com.linkedin.openhouse.optimizer.db.TableOperationsHistoryRow; import java.util.List; import org.springframework.data.domain.Pageable; import org.springframework.data.jpa.repository.JpaRepository; @@ -36,5 +37,6 @@ List findByTableUuidOrderByCompletedAtDesc( + "AND r.completedAt = (" + " SELECT MAX(r2.completedAt) FROM TableOperationsHistoryRow r2 " + " WHERE r2.tableUuid = r.tableUuid AND r2.operationType = r.operationType)") - List findLatestPerTable(@Param("operationType") String operationType); + List findLatestPerTable( + @Param("operationType") OperationType operationType); } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java index c7a08cabc..e9bc1c8b3 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java @@ -1,6 +1,8 @@ package com.linkedin.openhouse.optimizer.repository; -import com.linkedin.openhouse.optimizer.entity.TableOperationsRow; +import com.linkedin.openhouse.optimizer.db.OperationStatus; +import com.linkedin.openhouse.optimizer.db.OperationType; +import com.linkedin.openhouse.optimizer.db.TableOperationsRow; import java.util.List; import org.springframework.data.jpa.repository.JpaRepository; import org.springframework.data.jpa.repository.Query; @@ -21,8 +23,8 @@ public interface TableOperationsRepository extends JpaRepository find( - @Param("operationType") String operationType, - @Param("status") String status, + @Param("operationType") OperationType operationType, + @Param("status") OperationStatus status, @Param("tableUuid") String tableUuid, @Param("databaseName") String databaseName, @Param("tableName") String tableName); diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java index aaa1b0050..6f9595275 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java @@ -1,6 +1,6 @@ package com.linkedin.openhouse.optimizer.repository; -import com.linkedin.openhouse.optimizer.entity.TableStatsHistoryRow; +import com.linkedin.openhouse.optimizer.db.TableStatsHistoryRow; import java.time.Instant; import java.util.List; import org.springframework.data.domain.Pageable; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java index 4215237bc..dbf1de0ae 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java @@ -1,6 +1,6 @@ package com.linkedin.openhouse.optimizer.repository; -import com.linkedin.openhouse.optimizer.entity.TableStatsRow; +import com.linkedin.openhouse.optimizer.db.TableStatsRow; import java.util.List; import org.springframework.data.jpa.repository.JpaRepository; import org.springframework.data.jpa.repository.Query; diff --git a/services/optimizer/src/main/resources/db/optimizer-schema.sql b/services/optimizer/src/main/resources/db/optimizer-schema.sql new file mode 100644 index 000000000..92e79976b --- /dev/null +++ b/services/optimizer/src/main/resources/db/optimizer-schema.sql @@ -0,0 +1,54 @@ +-- Optimizer Service Schema +-- Compatible with MySQL (production) and H2 in MySQL mode (tests). +CREATE TABLE IF NOT EXISTS table_operations ( + id VARCHAR(36) NOT NULL, + table_uuid VARCHAR(36) NOT NULL, + database_name VARCHAR(128) NOT NULL, + table_name VARCHAR(128) NOT NULL, + operation_type VARCHAR(50) NOT NULL, + status VARCHAR(20) NOT NULL, + created_at TIMESTAMP(6) NOT NULL, + scheduled_at TIMESTAMP(6), + job_id VARCHAR(255), + version BIGINT, + -- TODO: per-operation metric columns will be added as operations are onboarded. + PRIMARY KEY (id) +); + +CREATE TABLE IF NOT EXISTS table_stats ( + table_uuid VARCHAR(36) NOT NULL, + database_name VARCHAR(128) NOT NULL, + table_name VARCHAR(128) NOT NULL, + stats TEXT, + table_properties TEXT, + updated_at TIMESTAMP(6) NOT NULL, + PRIMARY KEY (table_uuid) +); + +CREATE TABLE IF NOT EXISTS table_stats_history ( + id VARCHAR(36) NOT NULL, + table_uuid VARCHAR(36) NOT NULL, + database_name VARCHAR(128) NOT NULL, + table_name VARCHAR(128) NOT NULL, + stats TEXT, + recorded_at TIMESTAMP(6) NOT NULL, + PRIMARY KEY (id), + INDEX idx_tsh_table_uuid (table_uuid), + INDEX idx_tsh_recorded_at (recorded_at) +); + +CREATE TABLE IF NOT EXISTS table_operations_history ( + id VARCHAR(36) NOT NULL, + table_uuid VARCHAR(36) NOT NULL, + database_name VARCHAR(128) NOT NULL, + table_name VARCHAR(128) NOT NULL, + operation_type VARCHAR(50) NOT NULL, + completed_at TIMESTAMP(6) NOT NULL, + status VARCHAR(20) NOT NULL, + PRIMARY KEY (id), + INDEX idx_toph_db_table (database_name, table_name), + -- Drives TableOperationHistoryRepository.findLatestPerTable: the correlated + -- MAX(completed_at) subquery becomes an index-only lookup per (operation_type, + -- table_uuid) instead of an O(N²) scan. + INDEX idx_toph_optype_uuid_completed (operation_type, table_uuid, completed_at) +); diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java index 436d08066..706ecd877 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java @@ -2,9 +2,9 @@ import static org.assertj.core.api.Assertions.assertThat; -import com.linkedin.openhouse.optimizer.api.model.HistoryStatus; -import com.linkedin.openhouse.optimizer.api.model.OperationType; -import com.linkedin.openhouse.optimizer.entity.TableOperationsHistoryRow; +import com.linkedin.openhouse.optimizer.db.HistoryStatus; +import com.linkedin.openhouse.optimizer.db.OperationType; +import com.linkedin.openhouse.optimizer.db.TableOperationsHistoryRow; import java.time.Instant; import java.util.List; import java.util.UUID; @@ -27,38 +27,37 @@ void findByTableUuid_returnsRowsNewestFirst() { Instant t1 = Instant.parse("2024-01-01T10:00:00Z"); Instant t2 = Instant.parse("2024-01-02T10:00:00Z"); String tableUuid = UUID.randomUUID().toString(); + String idOlder = UUID.randomUUID().toString(); + String idNewer = UUID.randomUUID().toString(); repository.save( TableOperationsHistoryRow.builder() - .id(UUID.randomUUID().toString()) + .id(idOlder) .tableUuid(tableUuid) .databaseName("db1") .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION.name()) + .operationType(OperationType.ORPHAN_FILES_DELETION) .completedAt(t1) - .status(HistoryStatus.SUCCESS.name()) - .jobId("job-001") + .status(HistoryStatus.SUCCESS) .build()); repository.save( TableOperationsHistoryRow.builder() - .id(UUID.randomUUID().toString()) + .id(idNewer) .tableUuid(tableUuid) .databaseName("db1") .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION.name()) + .operationType(OperationType.ORPHAN_FILES_DELETION) .completedAt(t2) - .status(HistoryStatus.FAILED.name()) - .jobId("job-002") - .result("{\"errorMessage\":\"out of memory\",\"errorType\":\"OOM\"}") + .status(HistoryStatus.FAILED) .build()); List rows = repository.findByTableUuidOrderByCompletedAtDesc(tableUuid, PageRequest.of(0, 10)); assertThat(rows).hasSize(2); - assertThat(rows.get(0).getJobId()).isEqualTo("job-002"); - assertThat(rows.get(1).getJobId()).isEqualTo("job-001"); + assertThat(rows.get(0).getId()).isEqualTo(idNewer); + assertThat(rows.get(1).getId()).isEqualTo(idOlder); } @Test @@ -72,9 +71,9 @@ void findByTableUuid_respectsLimit() { .tableUuid(tableUuid) .databaseName("db1") .tableName("tbl3") - .operationType(OperationType.ORPHAN_FILES_DELETION.name()) + .operationType(OperationType.ORPHAN_FILES_DELETION) .completedAt(now.plusSeconds(i)) - .status(HistoryStatus.SUCCESS.name()) + .status(HistoryStatus.SUCCESS) .build()); } @@ -96,9 +95,9 @@ void findLatestPerTable_returnsOneRowPerTableUuid() { .tableUuid(tableUuid) .databaseName("db1") .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION.name()) + .operationType(OperationType.ORPHAN_FILES_DELETION) .completedAt(t1) - .status(HistoryStatus.SUCCESS.name()) + .status(HistoryStatus.SUCCESS) .build()); repository.save( TableOperationsHistoryRow.builder() @@ -106,9 +105,9 @@ void findLatestPerTable_returnsOneRowPerTableUuid() { .tableUuid(tableUuid) .databaseName("db1") .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION.name()) + .operationType(OperationType.ORPHAN_FILES_DELETION) .completedAt(t2) - .status(HistoryStatus.FAILED.name()) + .status(HistoryStatus.FAILED) .build()); repository.save( TableOperationsHistoryRow.builder() @@ -116,18 +115,18 @@ void findLatestPerTable_returnsOneRowPerTableUuid() { .tableUuid(otherUuid) .databaseName("db1") .tableName("tbl2") - .operationType(OperationType.ORPHAN_FILES_DELETION.name()) + .operationType(OperationType.ORPHAN_FILES_DELETION) .completedAt(t1) - .status(HistoryStatus.SUCCESS.name()) + .status(HistoryStatus.SUCCESS) .build()); List latest = - repository.findLatestPerTable(OperationType.ORPHAN_FILES_DELETION.name()); + repository.findLatestPerTable(OperationType.ORPHAN_FILES_DELETION); assertThat(latest).hasSize(2); TableOperationsHistoryRow forTarget = latest.stream().filter(r -> r.getTableUuid().equals(tableUuid)).findFirst().orElseThrow(); assertThat(forTarget.getCompletedAt()).isEqualTo(t2); - assertThat(forTarget.getStatus()).isEqualTo(HistoryStatus.FAILED.name()); + assertThat(forTarget.getStatus()).isEqualTo(HistoryStatus.FAILED); } } diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java index 2ca8dc61e..44a03ba9e 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java @@ -2,9 +2,9 @@ import static org.assertj.core.api.Assertions.assertThat; -import com.linkedin.openhouse.optimizer.api.model.OperationStatus; -import com.linkedin.openhouse.optimizer.api.model.OperationType; -import com.linkedin.openhouse.optimizer.entity.TableOperationsRow; +import com.linkedin.openhouse.optimizer.db.OperationStatus; +import com.linkedin.openhouse.optimizer.db.OperationType; +import com.linkedin.openhouse.optimizer.db.TableOperationsRow; import java.time.Instant; import java.util.List; import java.util.Optional; @@ -32,8 +32,8 @@ void saveAndFindById() { .tableUuid(UUID.randomUUID().toString()) .databaseName("db1") .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION.name()) - .status(OperationStatus.PENDING.name()) + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(OperationStatus.PENDING) .createdAt(Instant.now()) .build(); @@ -41,7 +41,7 @@ void saveAndFindById() { Optional found = repository.findById(id); assertThat(found).isPresent(); - assertThat(found.get().getStatus()).isEqualTo(OperationStatus.PENDING.name()); + assertThat(found.get().getStatus()).isEqualTo(OperationStatus.PENDING); } @Test @@ -52,8 +52,8 @@ void find_noParams_returnsAll() { .tableUuid(UUID.randomUUID().toString()) .databaseName("db1") .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION.name()) - .status(OperationStatus.PENDING.name()) + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(OperationStatus.PENDING) .createdAt(Instant.now()) .build()); repository.save( @@ -62,8 +62,8 @@ void find_noParams_returnsAll() { .tableUuid(UUID.randomUUID().toString()) .databaseName("db1") .tableName("tbl2") - .operationType(OperationType.ORPHAN_FILES_DELETION.name()) - .status(OperationStatus.SCHEDULED.name()) + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(OperationStatus.SCHEDULED) .createdAt(Instant.now()) .build()); @@ -79,8 +79,8 @@ void find_byStatus() { .tableUuid(UUID.randomUUID().toString()) .databaseName("db1") .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION.name()) - .status(OperationStatus.PENDING.name()) + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(OperationStatus.PENDING) .createdAt(Instant.now()) .build()); repository.save( @@ -89,20 +89,20 @@ void find_byStatus() { .tableUuid(UUID.randomUUID().toString()) .databaseName("db1") .tableName("tbl2") - .operationType(OperationType.ORPHAN_FILES_DELETION.name()) - .status(OperationStatus.SCHEDULED.name()) + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(OperationStatus.SCHEDULED) .createdAt(Instant.now()) .build()); List pending = - repository.find(null, OperationStatus.PENDING.name(), null, null, null); + repository.find(null, OperationStatus.PENDING, null, null, null); assertThat(pending).hasSize(1); - assertThat(pending.get(0).getStatus()).isEqualTo(OperationStatus.PENDING.name()); + assertThat(pending.get(0).getStatus()).isEqualTo(OperationStatus.PENDING); List scheduled = - repository.find(null, OperationStatus.SCHEDULED.name(), null, null, null); + repository.find(null, OperationStatus.SCHEDULED, null, null, null); assertThat(scheduled).hasSize(1); - assertThat(scheduled.get(0).getStatus()).isEqualTo(OperationStatus.SCHEDULED.name()); + assertThat(scheduled.get(0).getStatus()).isEqualTo(OperationStatus.SCHEDULED); } @Test @@ -113,8 +113,8 @@ void find_byDatabaseAndTable() { .tableUuid(UUID.randomUUID().toString()) .databaseName("db1") .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION.name()) - .status(OperationStatus.PENDING.name()) + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(OperationStatus.PENDING) .createdAt(Instant.now()) .build()); repository.save( @@ -123,8 +123,8 @@ void find_byDatabaseAndTable() { .tableUuid(UUID.randomUUID().toString()) .databaseName("db2") .tableName("tbl2") - .operationType(OperationType.ORPHAN_FILES_DELETION.name()) - .status(OperationStatus.PENDING.name()) + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(OperationStatus.PENDING) .createdAt(Instant.now()) .build()); diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java index 475196630..18241ce8d 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java @@ -2,8 +2,8 @@ import static org.assertj.core.api.Assertions.assertThat; -import com.linkedin.openhouse.optimizer.entity.TableStatsHistoryRow; -import com.linkedin.openhouse.optimizer.model.TableStats; +import com.linkedin.openhouse.optimizer.db.TableStats; +import com.linkedin.openhouse.optimizer.db.TableStatsHistoryRow; import java.time.Instant; import java.time.temporal.ChronoUnit; import java.util.List; diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java index 240d512ef..e70704f51 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java @@ -2,8 +2,8 @@ import static org.assertj.core.api.Assertions.assertThat; -import com.linkedin.openhouse.optimizer.entity.TableStatsRow; -import com.linkedin.openhouse.optimizer.model.TableStats; +import com.linkedin.openhouse.optimizer.db.TableStats; +import com.linkedin.openhouse.optimizer.db.TableStatsRow; import java.time.Instant; import java.util.Map; import java.util.Optional; diff --git a/services/optimizer/src/test/resources/application-test.properties b/services/optimizer/src/test/resources/application-test.properties new file mode 100644 index 000000000..97b7841dc --- /dev/null +++ b/services/optimizer/src/test/resources/application-test.properties @@ -0,0 +1,12 @@ +spring.datasource.url=jdbc:h2:mem:optimizer_test;MODE=MySQL;DATABASE_TO_LOWER=TRUE;DB_CLOSE_DELAY=-1 +spring.datasource.driver-class-name=org.h2.Driver +spring.datasource.username=sa +spring.datasource.password= + +spring.jpa.hibernate.ddl-auto=none +spring.sql.init.mode=always +spring.jpa.defer-datasource-initialization=true +spring.jpa.properties.hibernate.dialect=org.hibernate.dialect.H2Dialect +spring.jpa.properties.hibernate.physical_naming_strategy=org.hibernate.boot.model.naming.PhysicalNamingStrategyStandardImpl + +spring.sql.init.schema-locations=classpath:db/optimizer-schema.sql From e79eec7b01dd0890df975c3e3ac311f2ef2cc96c Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 14:25:33 -0700 Subject: [PATCH 056/104] refactor(optimizer): split TableStats envelope into snapshot + delta columns MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The DB layer no longer mirrors the wire-side TableStats JSON envelope. Instead the two structurally-separate concepts inside it — point-in-time snapshot metrics and per-commit delta counters — are persisted as two independent JSON columns. Per-layer decoupling: the api/ envelope can evolve without forcing the DB column shape to change in lockstep. Tables and class names are unchanged: table_stats / table_stats_history on the SQL side; TableStatsRow / TableStatsHistoryRow on the Java side. Changes: - Delete db/TableStats (the envelope wrapper is no longer needed). - Add db/SnapshotMetrics (plain POJO; serialized into the `snapshot` JSON column). - Add db/CommitDeltaMetrics (plain POJO; serialized into the `delta` JSON column). - TableStatsRow: replace `stats: TableStats` with `snapshot: SnapshotMetrics` and `delta: CommitDeltaMetrics`. - TableStatsHistoryRow: same split. - Schema: replace `stats TEXT` with `snapshot TEXT` and `delta TEXT` on both tables. - ModelDbMapper: split/join at the boundary. New helpers `toDbSnapshot`, `toDbDelta`, `joinStats` translate between the single model-layer TableStats and the two DB columns. `toStatsHistoryRow` projects a TableStats into the two-column row. - Repository tests: build rows with the new two-field shape. --- .../optimizer/db/CommitDeltaMetrics.java | 21 +++++++ .../optimizer/db/SnapshotMetrics.java | 24 ++++++++ .../openhouse/optimizer/db/TableStats.java | 55 ------------------ .../optimizer/db/TableStatsHistoryRow.java | 18 +++--- .../openhouse/optimizer/db/TableStatsRow.java | 13 +++-- .../optimizer/model/mapper/ModelDbMapper.java | 57 ++++++++++--------- .../main/resources/db/optimizer-schema.sql | 6 +- .../TableStatsHistoryRepositoryTest.java | 26 ++++----- .../repository/TableStatsRepositoryTest.java | 51 ++++++----------- 9 files changed, 125 insertions(+), 146 deletions(-) create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/CommitDeltaMetrics.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/SnapshotMetrics.java delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStats.java diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/CommitDeltaMetrics.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/CommitDeltaMetrics.java new file mode 100644 index 000000000..8094d28b8 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/CommitDeltaMetrics.java @@ -0,0 +1,21 @@ +package com.linkedin.openhouse.optimizer.db; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** Per-commit incremental counters. Serialized as JSON into the {@code delta} column. */ +@Data +@Builder(toBuilder = true) +@NoArgsConstructor +@AllArgsConstructor +@JsonIgnoreProperties(ignoreUnknown = true) +public class CommitDeltaMetrics { + + private Long numFilesAdded; + private Long numFilesDeleted; + private Long addedSizeBytes; + private Long deletedSizeBytes; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/SnapshotMetrics.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/SnapshotMetrics.java new file mode 100644 index 000000000..22d222172 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/SnapshotMetrics.java @@ -0,0 +1,24 @@ +package com.linkedin.openhouse.optimizer.db; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** Point-in-time snapshot fields. Serialized as JSON into the {@code snapshot} column. */ +@Data +@Builder(toBuilder = true) +@NoArgsConstructor +@AllArgsConstructor +@JsonIgnoreProperties(ignoreUnknown = true) +public class SnapshotMetrics { + + private String clusterId; + private String tableVersion; + private String tableLocation; + private Long tableSizeBytes; + + /** Total number of data files as of the latest snapshot — used for bin-packing. */ + private Long numCurrentFiles; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStats.java deleted file mode 100644 index ceebb5ad5..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStats.java +++ /dev/null @@ -1,55 +0,0 @@ -package com.linkedin.openhouse.optimizer.db; - -import com.fasterxml.jackson.annotation.JsonIgnoreProperties; -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.Data; -import lombok.NoArgsConstructor; - -/** - * DB-layer stats payload — stored as a JSON blob in the {@code stats} column of {@code table_stats} - * and {@code table_stats_history}. - * - *

Self-contained: no references to api/ or model/ types. - */ -@Data -@Builder(toBuilder = true) -@NoArgsConstructor -@AllArgsConstructor -@JsonIgnoreProperties(ignoreUnknown = true) -public class TableStats { - - /** Snapshot fields — overwritten on every upsert. */ - private SnapshotMetrics snapshot; - - /** Delta fields — accumulated across commit events. */ - private CommitDelta delta; - - /** Point-in-time metadata read from Iceberg at scan time. */ - @Data - @Builder(toBuilder = true) - @NoArgsConstructor - @AllArgsConstructor - @JsonIgnoreProperties(ignoreUnknown = true) - public static class SnapshotMetrics { - private String clusterId; - private String tableVersion; - private String tableLocation; - private Long tableSizeBytes; - /** Total number of data files as of the latest snapshot. */ - private Long numCurrentFiles; - } - - /** Per-commit incremental counters; accumulated across all recorded commit events. */ - @Data - @Builder(toBuilder = true) - @NoArgsConstructor - @AllArgsConstructor - @JsonIgnoreProperties(ignoreUnknown = true) - public static class CommitDelta { - private Long numFilesAdded; - private Long numFilesDeleted; - private Long addedSizeBytes; - private Long deletedSizeBytes; - } -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsHistoryRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsHistoryRow.java index 2b7628de1..71c17b582 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsHistoryRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsHistoryRow.java @@ -19,12 +19,12 @@ /** * Append-only record of per-commit stats reported by the Tables Service. * - *

Each Iceberg commit produces one row. The {@code stats} JSON contains both the snapshot - * metrics (point-in-time) and the commit delta (files added/deleted in this commit). Consumers can - * query this table to reconstruct change rates over arbitrary time windows. + *

Each Iceberg commit produces one row. Consumers can query this table to reconstruct change + * rates over arbitrary time windows. * - *

Self-contained DB-layer type: the JSON payload type is {@link TableStats} from the same - * package. + *

Self-contained DB-layer type. The stats payload is split across two JSON columns — {@link + * SnapshotMetrics} (point-in-time fields at commit time) and {@link CommitDeltaMetrics} (per-commit + * counters). */ @TypeDef(name = "json", typeClass = JsonStringType.class) @Entity @@ -55,8 +55,12 @@ public class TableStatsHistoryRow { private String tableName; @Type(type = "json") - @Column(name = "stats", columnDefinition = "TEXT") - private TableStats stats; + @Column(name = "snapshot", columnDefinition = "TEXT") + private SnapshotMetrics snapshot; + + @Type(type = "json") + @Column(name = "delta", columnDefinition = "TEXT") + private CommitDeltaMetrics delta; @Column(name = "recorded_at", nullable = false) private Instant recordedAt; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsRow.java index 950cf5327..2566763ce 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsRow.java @@ -22,8 +22,9 @@ *

Written by the Tables Service on every Iceberg commit. Read by the Analyzer directly via JPA * to enumerate tables and check scheduling eligibility. * - *

Self-contained DB-layer type: the JSON payload type is {@link TableStats} from the same - * package. + *

Self-contained DB-layer type. The stats payload is split across two JSON columns — {@link + * SnapshotMetrics} (point-in-time fields, overwritten each commit) and {@link CommitDeltaMetrics} + * (per-commit counters). */ @TypeDef(name = "json", typeClass = JsonStringType.class) @Entity @@ -46,8 +47,12 @@ public class TableStatsRow { private String tableName; @Type(type = "json") - @Column(name = "stats", columnDefinition = "TEXT") - private TableStats stats; + @Column(name = "snapshot", columnDefinition = "TEXT") + private SnapshotMetrics snapshot; + + @Type(type = "json") + @Column(name = "delta", columnDefinition = "TEXT") + private CommitDeltaMetrics delta; @Type(type = "json") @Column(name = "table_properties", columnDefinition = "TEXT") diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ModelDbMapper.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ModelDbMapper.java index f77773928..0ae9167e1 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ModelDbMapper.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ModelDbMapper.java @@ -1,5 +1,7 @@ package com.linkedin.openhouse.optimizer.model.mapper; +import com.linkedin.openhouse.optimizer.db.CommitDeltaMetrics; +import com.linkedin.openhouse.optimizer.db.SnapshotMetrics; import com.linkedin.openhouse.optimizer.db.TableOperationsHistoryRow; import com.linkedin.openhouse.optimizer.db.TableOperationsRow; import com.linkedin.openhouse.optimizer.db.TableStatsHistoryRow; @@ -21,8 +23,9 @@ * boundary at which the internal model meets the database layer. Pure data types under {@code * model/} stay free of any DB-side imports. * - *

Each layer carries its own per-layer enum + payload types. This mapper translates between - * model/-side and db/-side counterparts by name. + *

Each layer carries its own per-layer enum + payload types. The DB layer flattens the wire-side + * {@code TableStats} envelope into two separate columns ({@code snapshot} and {@code delta}); this + * mapper joins / splits them at the boundary. */ @Component public class ModelDbMapper { @@ -106,30 +109,31 @@ public Table toTable(TableStatsRow row) { .tableId(row.getTableName()) .tableProperties( row.getTableProperties() != null ? row.getTableProperties() : Collections.emptyMap()) - .stats(toModelStats(row.getStats())) + .stats(joinStats(row.getSnapshot(), row.getDelta())) .build(); } - // --- TableStats payload --- + // --- TableStats payload <-> (snapshot, delta) --- - public TableStats toModelStats(com.linkedin.openhouse.optimizer.db.TableStats dbStats) { - if (dbStats == null) { + /** Join the two DB-side columns into a single internal-model {@link TableStats}. */ + public TableStats joinStats(SnapshotMetrics dbSnapshot, CommitDeltaMetrics dbDelta) { + if (dbSnapshot == null && dbDelta == null) { return null; } return TableStats.builder() - .snapshot(toModelSnapshot(dbStats.getSnapshot())) - .delta(toModelDelta(dbStats.getDelta())) + .snapshot(toModelSnapshot(dbSnapshot)) + .delta(toModelDelta(dbDelta)) .build(); } - public com.linkedin.openhouse.optimizer.db.TableStats toDbStats(TableStats modelStats) { - if (modelStats == null) { - return null; - } - return com.linkedin.openhouse.optimizer.db.TableStats.builder() - .snapshot(toDbSnapshot(modelStats.getSnapshot())) - .delta(toDbDelta(modelStats.getDelta())) - .build(); + /** Project the internal-model {@link TableStats#getSnapshot()} side. */ + public SnapshotMetrics toDbSnapshot(TableStats modelStats) { + return modelStats == null ? null : toDbSnapshot(modelStats.getSnapshot()); + } + + /** Project the internal-model {@link TableStats#getDelta()} side. */ + public CommitDeltaMetrics toDbDelta(TableStats modelStats) { + return modelStats == null ? null : toDbDelta(modelStats.getDelta()); } public TableStatsHistoryRow toStatsHistoryRow( @@ -144,7 +148,8 @@ public TableStatsHistoryRow toStatsHistoryRow( .tableUuid(tableUuid) .databaseName(databaseName) .tableName(tableName) - .stats(toDbStats(stats)) + .snapshot(toDbSnapshot(stats)) + .delta(toDbDelta(stats)) .recordedAt(recordedAt) .build(); } @@ -177,10 +182,9 @@ public com.linkedin.openhouse.optimizer.db.HistoryStatus toDbHistoryStatus(Histo return v == null ? null : com.linkedin.openhouse.optimizer.db.HistoryStatus.valueOf(v.name()); } - // --- TableStats inner classes --- + // --- inner-payload field copies --- - private TableStats.SnapshotMetrics toModelSnapshot( - com.linkedin.openhouse.optimizer.db.TableStats.SnapshotMetrics v) { + private TableStats.SnapshotMetrics toModelSnapshot(SnapshotMetrics v) { if (v == null) { return null; } @@ -193,12 +197,11 @@ private TableStats.SnapshotMetrics toModelSnapshot( .build(); } - private com.linkedin.openhouse.optimizer.db.TableStats.SnapshotMetrics toDbSnapshot( - TableStats.SnapshotMetrics v) { + private SnapshotMetrics toDbSnapshot(TableStats.SnapshotMetrics v) { if (v == null) { return null; } - return com.linkedin.openhouse.optimizer.db.TableStats.SnapshotMetrics.builder() + return SnapshotMetrics.builder() .clusterId(v.getClusterId()) .tableVersion(v.getTableVersion()) .tableLocation(v.getTableLocation()) @@ -207,8 +210,7 @@ private com.linkedin.openhouse.optimizer.db.TableStats.SnapshotMetrics toDbSnaps .build(); } - private TableStats.CommitDelta toModelDelta( - com.linkedin.openhouse.optimizer.db.TableStats.CommitDelta v) { + private TableStats.CommitDelta toModelDelta(CommitDeltaMetrics v) { if (v == null) { return null; } @@ -220,12 +222,11 @@ private TableStats.CommitDelta toModelDelta( .build(); } - private com.linkedin.openhouse.optimizer.db.TableStats.CommitDelta toDbDelta( - TableStats.CommitDelta v) { + private CommitDeltaMetrics toDbDelta(TableStats.CommitDelta v) { if (v == null) { return null; } - return com.linkedin.openhouse.optimizer.db.TableStats.CommitDelta.builder() + return CommitDeltaMetrics.builder() .numFilesAdded(v.getNumFilesAdded()) .numFilesDeleted(v.getNumFilesDeleted()) .addedSizeBytes(v.getAddedSizeBytes()) diff --git a/services/optimizer/src/main/resources/db/optimizer-schema.sql b/services/optimizer/src/main/resources/db/optimizer-schema.sql index 92e79976b..3f3d11629 100644 --- a/services/optimizer/src/main/resources/db/optimizer-schema.sql +++ b/services/optimizer/src/main/resources/db/optimizer-schema.sql @@ -19,7 +19,8 @@ CREATE TABLE IF NOT EXISTS table_stats ( table_uuid VARCHAR(36) NOT NULL, database_name VARCHAR(128) NOT NULL, table_name VARCHAR(128) NOT NULL, - stats TEXT, + snapshot TEXT, + delta TEXT, table_properties TEXT, updated_at TIMESTAMP(6) NOT NULL, PRIMARY KEY (table_uuid) @@ -30,7 +31,8 @@ CREATE TABLE IF NOT EXISTS table_stats_history ( table_uuid VARCHAR(36) NOT NULL, database_name VARCHAR(128) NOT NULL, table_name VARCHAR(128) NOT NULL, - stats TEXT, + snapshot TEXT, + delta TEXT, recorded_at TIMESTAMP(6) NOT NULL, PRIMARY KEY (id), INDEX idx_tsh_table_uuid (table_uuid), diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java index 18241ce8d..dbd8cc686 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java @@ -2,7 +2,8 @@ import static org.assertj.core.api.Assertions.assertThat; -import com.linkedin.openhouse.optimizer.db.TableStats; +import com.linkedin.openhouse.optimizer.db.CommitDeltaMetrics; +import com.linkedin.openhouse.optimizer.db.SnapshotMetrics; import com.linkedin.openhouse.optimizer.db.TableStatsHistoryRow; import java.time.Instant; import java.time.temporal.ChronoUnit; @@ -35,8 +36,8 @@ void saveAndFind() { assertThat(rows).hasSize(3); // newest first - assertThat(rows.get(0).getStats().getDelta().getNumFilesAdded()).isEqualTo(3L); - assertThat(rows.get(2).getStats().getDelta().getNumFilesAdded()).isEqualTo(10L); + assertThat(rows.get(0).getDelta().getNumFilesAdded()).isEqualTo(3L); + assertThat(rows.get(2).getDelta().getNumFilesAdded()).isEqualTo(10L); } @Test @@ -67,7 +68,7 @@ void find_withSince_filtersOlderRows() { // only the 2 rows within the last 90 minutes assertThat(rows).hasSize(2); - assertThat(rows.get(0).getStats().getDelta().getNumFilesAdded()).isEqualTo(3L); + assertThat(rows.get(0).getDelta().getNumFilesAdded()).isEqualTo(3L); } @Test @@ -131,18 +132,11 @@ private static TableStatsHistoryRow buildRow( .tableUuid(tableUuid) .databaseName(databaseName) .tableName(tableName) - .stats( - TableStats.builder() - .snapshot( - TableStats.SnapshotMetrics.builder() - .clusterId("cl1") - .tableSizeBytes(1024L) - .build()) - .delta( - TableStats.CommitDelta.builder() - .numFilesAdded(numFilesAdded) - .numFilesDeleted(numFilesDeleted) - .build()) + .snapshot(SnapshotMetrics.builder().clusterId("cl1").tableSizeBytes(1024L).build()) + .delta( + CommitDeltaMetrics.builder() + .numFilesAdded(numFilesAdded) + .numFilesDeleted(numFilesDeleted) .build()) .recordedAt(recordedAt) .build(); diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java index e70704f51..5f6a4ef4f 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java @@ -2,7 +2,8 @@ import static org.assertj.core.api.Assertions.assertThat; -import com.linkedin.openhouse.optimizer.db.TableStats; +import com.linkedin.openhouse.optimizer.db.CommitDeltaMetrics; +import com.linkedin.openhouse.optimizer.db.SnapshotMetrics; import com.linkedin.openhouse.optimizer.db.TableStatsRow; import java.time.Instant; import java.util.Map; @@ -24,19 +25,18 @@ class TableStatsRepositoryTest { @Test void saveAndFindById() { String tableUuid = UUID.randomUUID().toString(); - TableStats stats = - TableStats.builder() - .snapshot( - TableStats.SnapshotMetrics.builder().clusterId("cl1").tableSizeBytes(1024L).build()) - .delta(TableStats.CommitDelta.builder().numFilesAdded(3L).numFilesDeleted(1L).build()) - .build(); + SnapshotMetrics snapshot = + SnapshotMetrics.builder().clusterId("cl1").tableSizeBytes(1024L).build(); + CommitDeltaMetrics delta = + CommitDeltaMetrics.builder().numFilesAdded(3L).numFilesDeleted(1L).build(); repository.save( TableStatsRow.builder() .tableUuid(tableUuid) .databaseName("db1") .tableName("tbl1") - .stats(stats) + .snapshot(snapshot) + .delta(delta) .tableProperties(Map.of("maintenance.optimizer.ofd.enabled", "true")) .updatedAt(Instant.now()) .build()); @@ -44,7 +44,8 @@ void saveAndFindById() { Optional found = repository.findById(tableUuid); assertThat(found).isPresent(); assertThat(found.get().getDatabaseName()).isEqualTo("db1"); - assertThat(found.get().getStats().getSnapshot().getTableSizeBytes()).isEqualTo(1024L); + assertThat(found.get().getSnapshot().getTableSizeBytes()).isEqualTo(1024L); + assertThat(found.get().getDelta().getNumFilesAdded()).isEqualTo(3L); assertThat(found.get().getTableProperties()) .containsEntry("maintenance.optimizer.ofd.enabled", "true"); } @@ -58,10 +59,7 @@ void upsert_overwritesPreviousStats() { .tableUuid(tableUuid) .databaseName("db1") .tableName("tbl1") - .stats( - TableStats.builder() - .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(100L).build()) - .build()) + .snapshot(SnapshotMetrics.builder().tableSizeBytes(100L).build()) .updatedAt(Instant.now()) .build()); @@ -70,15 +68,12 @@ void upsert_overwritesPreviousStats() { .tableUuid(tableUuid) .databaseName("db1") .tableName("tbl1") - .stats( - TableStats.builder() - .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(200L).build()) - .build()) + .snapshot(SnapshotMetrics.builder().tableSizeBytes(200L).build()) .updatedAt(Instant.now()) .build()); assertThat(repository.findAll()).hasSize(1); - assertThat(repository.findById(tableUuid).get().getStats().getSnapshot().getTableSizeBytes()) + assertThat(repository.findById(tableUuid).get().getSnapshot().getTableSizeBytes()) .isEqualTo(200L); } @@ -89,10 +84,7 @@ void find_noParams_returnsAll() { .tableUuid(UUID.randomUUID().toString()) .databaseName("db1") .tableName("tbl1") - .stats( - TableStats.builder() - .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(100L).build()) - .build()) + .snapshot(SnapshotMetrics.builder().tableSizeBytes(100L).build()) .updatedAt(Instant.now()) .build()); repository.save( @@ -100,10 +92,7 @@ void find_noParams_returnsAll() { .tableUuid(UUID.randomUUID().toString()) .databaseName("db2") .tableName("tbl2") - .stats( - TableStats.builder() - .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(200L).build()) - .build()) + .snapshot(SnapshotMetrics.builder().tableSizeBytes(200L).build()) .updatedAt(Instant.now()) .build()); @@ -117,10 +106,7 @@ void find_byDatabase() { .tableUuid(UUID.randomUUID().toString()) .databaseName("db1") .tableName("tbl1") - .stats( - TableStats.builder() - .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(100L).build()) - .build()) + .snapshot(SnapshotMetrics.builder().tableSizeBytes(100L).build()) .updatedAt(Instant.now()) .build()); repository.save( @@ -128,10 +114,7 @@ void find_byDatabase() { .tableUuid(UUID.randomUUID().toString()) .databaseName("db2") .tableName("tbl2") - .stats( - TableStats.builder() - .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(200L).build()) - .build()) + .snapshot(SnapshotMetrics.builder().tableSizeBytes(200L).build()) .updatedAt(Instant.now()) .build()); From f955ded61892180eefdc562ecc48a0b5cbffa391 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 14:34:25 -0700 Subject: [PATCH 057/104] fix(optimizer): drop CommitDeltaMetrics from TableStatsRow table_stats is the current-state row (one per table). Per-commit deltas are an append-only history concern and belong only to TableStatsHistoryRow. Storing a delta on the current-state row implied an aggregation that isn't actually performed. - TableStatsRow: remove the `delta` field. - table_stats schema: drop the `delta` column. - ModelDbMapper.toTable: project only snapshot to model.TableStats; history-only deltas remain in TableStatsHistoryRow. - TableStatsRepositoryTest: drop .delta(...) builder usage. --- .../linkedin/openhouse/optimizer/db/TableStatsRow.java | 9 ++------- .../openhouse/optimizer/model/mapper/ModelDbMapper.java | 3 ++- .../optimizer/src/main/resources/db/optimizer-schema.sql | 1 - .../optimizer/repository/TableStatsRepositoryTest.java | 5 ----- 4 files changed, 4 insertions(+), 14 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsRow.java index 2566763ce..8d869ff1e 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsRow.java @@ -22,9 +22,8 @@ *

Written by the Tables Service on every Iceberg commit. Read by the Analyzer directly via JPA * to enumerate tables and check scheduling eligibility. * - *

Self-contained DB-layer type. The stats payload is split across two JSON columns — {@link - * SnapshotMetrics} (point-in-time fields, overwritten each commit) and {@link CommitDeltaMetrics} - * (per-commit counters). + *

Self-contained DB-layer type. Holds only the point-in-time {@link SnapshotMetrics} — + * per-commit deltas live exclusively on {@link TableStatsHistoryRow} and are not aggregated here. */ @TypeDef(name = "json", typeClass = JsonStringType.class) @Entity @@ -50,10 +49,6 @@ public class TableStatsRow { @Column(name = "snapshot", columnDefinition = "TEXT") private SnapshotMetrics snapshot; - @Type(type = "json") - @Column(name = "delta", columnDefinition = "TEXT") - private CommitDeltaMetrics delta; - @Type(type = "json") @Column(name = "table_properties", columnDefinition = "TEXT") private Map tableProperties; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ModelDbMapper.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ModelDbMapper.java index 0ae9167e1..755b38400 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ModelDbMapper.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ModelDbMapper.java @@ -109,7 +109,8 @@ public Table toTable(TableStatsRow row) { .tableId(row.getTableName()) .tableProperties( row.getTableProperties() != null ? row.getTableProperties() : Collections.emptyMap()) - .stats(joinStats(row.getSnapshot(), row.getDelta())) + // table_stats holds only the snapshot — deltas live on the history table. + .stats(joinStats(row.getSnapshot(), null)) .build(); } diff --git a/services/optimizer/src/main/resources/db/optimizer-schema.sql b/services/optimizer/src/main/resources/db/optimizer-schema.sql index 3f3d11629..24b367549 100644 --- a/services/optimizer/src/main/resources/db/optimizer-schema.sql +++ b/services/optimizer/src/main/resources/db/optimizer-schema.sql @@ -20,7 +20,6 @@ CREATE TABLE IF NOT EXISTS table_stats ( database_name VARCHAR(128) NOT NULL, table_name VARCHAR(128) NOT NULL, snapshot TEXT, - delta TEXT, table_properties TEXT, updated_at TIMESTAMP(6) NOT NULL, PRIMARY KEY (table_uuid) diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java index 5f6a4ef4f..493eb88b6 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java @@ -2,7 +2,6 @@ import static org.assertj.core.api.Assertions.assertThat; -import com.linkedin.openhouse.optimizer.db.CommitDeltaMetrics; import com.linkedin.openhouse.optimizer.db.SnapshotMetrics; import com.linkedin.openhouse.optimizer.db.TableStatsRow; import java.time.Instant; @@ -27,8 +26,6 @@ void saveAndFindById() { String tableUuid = UUID.randomUUID().toString(); SnapshotMetrics snapshot = SnapshotMetrics.builder().clusterId("cl1").tableSizeBytes(1024L).build(); - CommitDeltaMetrics delta = - CommitDeltaMetrics.builder().numFilesAdded(3L).numFilesDeleted(1L).build(); repository.save( TableStatsRow.builder() @@ -36,7 +33,6 @@ void saveAndFindById() { .databaseName("db1") .tableName("tbl1") .snapshot(snapshot) - .delta(delta) .tableProperties(Map.of("maintenance.optimizer.ofd.enabled", "true")) .updatedAt(Instant.now()) .build()); @@ -45,7 +41,6 @@ void saveAndFindById() { assertThat(found).isPresent(); assertThat(found.get().getDatabaseName()).isEqualTo("db1"); assertThat(found.get().getSnapshot().getTableSizeBytes()).isEqualTo(1024L); - assertThat(found.get().getDelta().getNumFilesAdded()).isEqualTo(3L); assertThat(found.get().getTableProperties()) .containsEntry("maintenance.optimizer.ofd.enabled", "true"); } From 969949d98b935443017e1264aa69216c63429001 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 14:42:21 -0700 Subject: [PATCH 058/104] refactor(optimizer): rewire service layer onto api/model/db mappers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adapt the REST service layer to the new architecture introduced on optimizer-0 and optimizer-1: - api/ and db/ data types are self-contained per layer. - model/mapper/ApiModelMapper and model/mapper/ModelDbMapper own all cross-layer translation. - The old api/mapper/OptimizerMapper is gone. - JobResult is removed from the wire entirely. - TableStats is split on the DB side: TableStatsRow holds only the snapshot; TableStatsHistoryRow holds snapshot + delta per commit. Changes: OptimizerDataServiceImpl rewrite: - Inject ApiModelMapper + ModelDbMapper instead of OptimizerMapper. - Operations: list/get/complete/append go db row → ModelDbMapper → model object → ApiModelMapper → wire DTO. Enum filters on list() translate api → model → db. - completeOperation: signature is now (CompleteOperationRequest) only; operationId lives in the body. No jobId / result on the written history row. - Stats: split api.TableStats into snapshot (current-state row) and snapshot+delta (history row) at write time. Join back to the wire TableStats at read time (current-state has snapshot only; history has both). OptimizerDataService interface: - completeOperation(CompleteOperationRequest) — drop the String id path-style parameter. TableOperationsController: - POST endpoint moves from /{id}/complete to /complete. operationId is read from the request body. application.properties: - Re-introduced with production runtime config (server.port, application name, actuator) and JPA/MySQL datasource + schema-init pointing at the schema added on optimizer-1. OptimizerDataServiceImplTest: rewritten to use api/ + db/ types, new completeOperation signature, and the split snapshot/delta on stats; drop JobResult-dependent assertions. --- .../controller/TableOperationsController.java | 13 ++- .../service/OptimizerDataService.java | 9 +- .../service/OptimizerDataServiceImpl.java | 106 +++++++++++++----- .../src/main/resources/application.properties | 20 ++++ .../service/OptimizerDataServiceImplTest.java | 73 +++++++++--- 5 files changed, 162 insertions(+), 59 deletions(-) create mode 100644 services/optimizer/src/main/resources/application.properties diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java index adc4d7a85..e48043a35 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java @@ -28,15 +28,16 @@ public class TableOperationsController { private final OptimizerDataService service; /** - * Report that an operation has completed. The backend looks up the operation row, writes a - * history entry with the operation's table metadata and the supplied result. Returns 201 Created - * with the history row, or 404 if the operation does not exist. + * Report that an operation has completed. The body carries the {@code operationId} the caller is + * completing along with its terminal status. The backend looks up the operation row, writes a + * history entry with the operation's table metadata, and returns 201 Created with the history + * row, or 404 if the operation does not exist. */ - @PostMapping("/{id}/complete") + @PostMapping("/complete") public ResponseEntity completeOperation( - @PathVariable String id, @RequestBody CompleteOperationRequest request) { + @RequestBody CompleteOperationRequest request) { return service - .completeOperation(id, request) + .completeOperation(request) .map(dto -> ResponseEntity.status(HttpStatus.CREATED).body(dto)) .orElse(ResponseEntity.notFound().build()); } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java index 6f71c708e..c3988f668 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java @@ -29,12 +29,11 @@ List listTableOperations( Optional tableUuid); /** - * Complete an operation by writing a history entry. Looks up the operation row by {@code id}, - * copies its table metadata into a new history row, and saves it. Returns the history DTO, or - * empty if the operation does not exist. + * Complete an operation by writing a history entry. Looks up the operation row by {@code + * request.operationId}, copies its table metadata into a new history row, and saves it. Returns + * the history DTO, or empty if the operation does not exist. */ - Optional completeOperation( - String id, CompleteOperationRequest request); + Optional completeOperation(CompleteOperationRequest request); /** * Return the operation row for {@code id} regardless of status, or empty if it does not exist. diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java index 93b9af2a0..21802a84f 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java @@ -1,17 +1,19 @@ package com.linkedin.openhouse.optimizer.service; -import com.linkedin.openhouse.optimizer.api.mapper.OptimizerMapper; import com.linkedin.openhouse.optimizer.api.model.CompleteOperationRequest; import com.linkedin.openhouse.optimizer.api.model.OperationStatus; import com.linkedin.openhouse.optimizer.api.model.OperationType; import com.linkedin.openhouse.optimizer.api.model.TableOperationsDto; import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; +import com.linkedin.openhouse.optimizer.api.model.TableStats; import com.linkedin.openhouse.optimizer.api.model.TableStatsDto; import com.linkedin.openhouse.optimizer.api.model.TableStatsHistoryDto; import com.linkedin.openhouse.optimizer.api.model.UpsertTableStatsRequest; -import com.linkedin.openhouse.optimizer.entity.TableOperationsHistoryRow; -import com.linkedin.openhouse.optimizer.entity.TableStatsHistoryRow; -import com.linkedin.openhouse.optimizer.entity.TableStatsRow; +import com.linkedin.openhouse.optimizer.db.TableOperationsHistoryRow; +import com.linkedin.openhouse.optimizer.db.TableStatsHistoryRow; +import com.linkedin.openhouse.optimizer.db.TableStatsRow; +import com.linkedin.openhouse.optimizer.model.mapper.ApiModelMapper; +import com.linkedin.openhouse.optimizer.model.mapper.ModelDbMapper; import com.linkedin.openhouse.optimizer.repository.TableOperationsHistoryRepository; import com.linkedin.openhouse.optimizer.repository.TableOperationsRepository; import com.linkedin.openhouse.optimizer.repository.TableStatsHistoryRepository; @@ -35,7 +37,8 @@ public class OptimizerDataServiceImpl implements OptimizerDataService { private final TableOperationsHistoryRepository historyRepository; private final TableStatsRepository statsRepository; private final TableStatsHistoryRepository statsHistoryRepository; - private final OptimizerMapper mapper; + private final ApiModelMapper apiMapper; + private final ModelDbMapper dbMapper; // --- TableOperations --- @@ -48,22 +51,26 @@ public List listTableOperations( Optional tableUuid) { return operationsRepository .find( - operationType.map(OperationType::name).orElse(null), - status.map(OperationStatus::name).orElse(null), + operationType + .map(t -> dbMapper.toDbOperationType(apiMapper.toModelOperationType(t))) + .orElse(null), + status + .map(s -> dbMapper.toDbOperationStatus(apiMapper.toModelOperationStatus(s))) + .orElse(null), tableUuid.orElse(null), databaseName.orElse(null), tableName.orElse(null)) .stream() - .map(mapper::toDto) + .map(dbMapper::toOperation) + .map(apiMapper::toDto) .collect(Collectors.toList()); } @Override @Transactional - public Optional completeOperation( - String id, CompleteOperationRequest request) { + public Optional completeOperation(CompleteOperationRequest request) { return operationsRepository - .findById(id) + .findById(request.getOperationId()) .map( row -> { TableOperationsHistoryRow historyRow = @@ -74,17 +81,17 @@ public Optional completeOperation( .tableName(row.getTableName()) .operationType(row.getOperationType()) .completedAt(Instant.now()) - .status(request.getStatus().name()) - .jobId(row.getJobId()) - .result(mapper.fromJobResult(request.getResult())) + .status( + dbMapper.toDbHistoryStatus( + apiMapper.toModelHistoryStatus(request.getStatus()))) .build(); - return mapper.toDto(historyRepository.save(historyRow)); + return apiMapper.toDto(dbMapper.toHistory(historyRepository.save(historyRow))); }); } @Override public Optional getTableOperation(String id) { - return operationsRepository.findById(id).map(mapper::toDto); + return operationsRepository.findById(id).map(dbMapper::toOperation).map(apiMapper::toDto); } // --- TableStats --- @@ -93,6 +100,9 @@ public Optional getTableOperation(String id) { @Transactional public TableStatsDto upsertTableStats(String tableUuid, UpsertTableStatsRequest request) { Instant now = Instant.now(); + com.linkedin.openhouse.optimizer.model.TableStats modelStats = + apiMapper.toModelStats(request.getStats()); + TableStatsRow row = statsRepository .findById(tableUuid) @@ -102,7 +112,7 @@ public TableStatsDto upsertTableStats(String tableUuid, UpsertTableStatsRequest .toBuilder() .databaseName(request.getDatabaseName()) .tableName(request.getTableName()) - .stats(request.getStats()) + .snapshot(dbMapper.toDbSnapshot(modelStats)) .tableProperties(request.getTableProperties()) .updatedAt(now) .build()) @@ -111,11 +121,11 @@ public TableStatsDto upsertTableStats(String tableUuid, UpsertTableStatsRequest .tableUuid(tableUuid) .databaseName(request.getDatabaseName()) .tableName(request.getTableName()) - .stats(request.getStats()) + .snapshot(dbMapper.toDbSnapshot(modelStats)) .tableProperties(request.getTableProperties()) .updatedAt(now) .build()); - TableStatsDto saved = mapper.toDto(statsRepository.save(row)); + TableStatsRow saved = statsRepository.save(row); statsHistoryRepository.save( TableStatsHistoryRow.builder() @@ -123,16 +133,17 @@ public TableStatsDto upsertTableStats(String tableUuid, UpsertTableStatsRequest .tableUuid(tableUuid) .databaseName(request.getDatabaseName()) .tableName(request.getTableName()) - .stats(request.getStats()) + .snapshot(dbMapper.toDbSnapshot(modelStats)) + .delta(dbMapper.toDbDelta(modelStats)) .recordedAt(now) .build()); - return saved; + return toTableStatsDto(saved); } @Override public Optional getTableStats(String tableUuid) { - return statsRepository.findById(tableUuid).map(mapper::toDto); + return statsRepository.findById(tableUuid).map(this::toTableStatsDto); } @Override @@ -140,7 +151,7 @@ public List listTableStats( Optional databaseName, Optional tableName, Optional tableUuid) { return statsRepository .find(databaseName.orElse(null), tableName.orElse(null), tableUuid.orElse(null)).stream() - .map(mapper::toDto) + .map(this::toTableStatsDto) .collect(Collectors.toList()); } @@ -149,7 +160,7 @@ public List getStatsHistory( String tableUuid, Optional since, int limit) { return statsHistoryRepository.find(tableUuid, since.orElse(null), PageRequest.of(0, limit)) .stream() - .map(mapper::toDto) + .map(this::toTableStatsHistoryDto) .collect(Collectors.toList()); } @@ -164,20 +175,55 @@ public TableOperationsHistoryDto appendHistory(TableOperationsHistoryDto dto) { .tableUuid(dto.getTableUuid()) .databaseName(dto.getDatabaseName()) .tableName(dto.getTableName()) - .operationType(dto.getOperationType() != null ? dto.getOperationType().name() : null) + .operationType( + dbMapper.toDbOperationType(apiMapper.toModelOperationType(dto.getOperationType()))) .completedAt(dto.getCompletedAt() != null ? dto.getCompletedAt() : Instant.now()) - .status(dto.getStatus() != null ? dto.getStatus().name() : null) - .jobId(dto.getJobId()) - .result(mapper.fromJobResult(dto.getResult())) + .status(dbMapper.toDbHistoryStatus(apiMapper.toModelHistoryStatus(dto.getStatus()))) .build(); - return mapper.toDto(historyRepository.save(row)); + return apiMapper.toDto(dbMapper.toHistory(historyRepository.save(row))); } @Override public List getHistory(String tableUuid, int limit) { return historyRepository .findByTableUuidOrderByCompletedAtDesc(tableUuid, PageRequest.of(0, limit)).stream() - .map(mapper::toDto) + .map(dbMapper::toHistory) + .map(apiMapper::toDto) .collect(Collectors.toList()); } + + // --- private helpers --- + + /** + * Assemble a wire {@link TableStatsDto} from a {@link TableStatsRow}. The current-state row holds + * only the snapshot — deltas live exclusively on history rows. + */ + private TableStatsDto toTableStatsDto(TableStatsRow row) { + com.linkedin.openhouse.optimizer.model.TableStats modelStats = + dbMapper.joinStats(row.getSnapshot(), null); + TableStats apiStats = apiMapper.toApiStats(modelStats); + return TableStatsDto.builder() + .tableUuid(row.getTableUuid()) + .databaseName(row.getDatabaseName()) + .tableName(row.getTableName()) + .stats(apiStats) + .tableProperties(row.getTableProperties()) + .updatedAt(row.getUpdatedAt()) + .build(); + } + + /** Assemble a wire {@link TableStatsHistoryDto} from a {@link TableStatsHistoryRow}. */ + private TableStatsHistoryDto toTableStatsHistoryDto(TableStatsHistoryRow row) { + com.linkedin.openhouse.optimizer.model.TableStats modelStats = + dbMapper.joinStats(row.getSnapshot(), row.getDelta()); + TableStats apiStats = apiMapper.toApiStats(modelStats); + return TableStatsHistoryDto.builder() + .id(row.getId()) + .tableUuid(row.getTableUuid()) + .databaseName(row.getDatabaseName()) + .tableName(row.getTableName()) + .stats(apiStats) + .recordedAt(row.getRecordedAt()) + .build(); + } } diff --git a/services/optimizer/src/main/resources/application.properties b/services/optimizer/src/main/resources/application.properties new file mode 100644 index 000000000..c6c3f8437 --- /dev/null +++ b/services/optimizer/src/main/resources/application.properties @@ -0,0 +1,20 @@ +spring.application.name=openhouse-optimizer-service +server.port=8080 + +spring.jpa.hibernate.ddl-auto=none +spring.sql.init.mode=always +spring.jpa.defer-datasource-initialization=true +spring.sql.init.schema-locations=classpath:db/optimizer-schema.sql + +spring.jpa.properties.hibernate.dialect=org.hibernate.dialect.MySQL8Dialect +spring.jpa.properties.hibernate.show_sql=false +spring.jpa.properties.hibernate.physical_naming_strategy=org.hibernate.boot.model.naming.PhysicalNamingStrategyStandardImpl + +spring.datasource.driver-class-name=com.mysql.cj.jdbc.Driver +spring.datasource.url=${OPTIMIZER_DB_URL:jdbc:mysql://localhost:3306/oh_db} +spring.datasource.username=${OPTIMIZER_DB_USERNAME:oh_user} +spring.datasource.password=${OPTIMIZER_DB_PASSWORD:oh_password} +spring.datasource.hikari.maximum-pool-size=20 + +management.endpoints.web.exposure.include=health,prometheus +management.endpoint.health.enabled=true diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java index 17ab55278..29374cbfc 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java @@ -4,15 +4,14 @@ import com.linkedin.openhouse.optimizer.api.model.CompleteOperationRequest; import com.linkedin.openhouse.optimizer.api.model.HistoryStatus; -import com.linkedin.openhouse.optimizer.api.model.JobResult; import com.linkedin.openhouse.optimizer.api.model.OperationStatus; import com.linkedin.openhouse.optimizer.api.model.OperationType; import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; +import com.linkedin.openhouse.optimizer.api.model.TableStats; import com.linkedin.openhouse.optimizer.api.model.TableStatsDto; import com.linkedin.openhouse.optimizer.api.model.UpsertTableStatsRequest; -import com.linkedin.openhouse.optimizer.entity.TableOperationsRow; -import com.linkedin.openhouse.optimizer.entity.TableStatsHistoryRow; -import com.linkedin.openhouse.optimizer.model.TableStats; +import com.linkedin.openhouse.optimizer.db.TableOperationsRow; +import com.linkedin.openhouse.optimizer.db.TableStatsHistoryRow; import com.linkedin.openhouse.optimizer.repository.TableOperationsRepository; import com.linkedin.openhouse.optimizer.repository.TableStatsHistoryRepository; import com.linkedin.openhouse.optimizer.repository.TableStatsRepository; @@ -42,16 +41,16 @@ class OptimizerDataServiceImplTest { @Test void completeOperation_writesHistoryFromOperationRow() { - String id = UUID.randomUUID().toString(); + String operationId = UUID.randomUUID().toString(); String tableUuid = UUID.randomUUID().toString(); operationsRepository.save( TableOperationsRow.builder() - .id(id) + .id(operationId) .tableUuid(tableUuid) .databaseName("db1") .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION.name()) - .status(OperationStatus.SCHEDULED.name()) + .operationType(com.linkedin.openhouse.optimizer.db.OperationType.ORPHAN_FILES_DELETION) + .status(com.linkedin.openhouse.optimizer.db.OperationStatus.SCHEDULED) .createdAt(Instant.now()) .scheduledAt(Instant.now()) .jobId("spark-job-123") @@ -59,12 +58,14 @@ void completeOperation_writesHistoryFromOperationRow() { Optional result = service.completeOperation( - id, CompleteOperationRequest.builder().status(HistoryStatus.SUCCESS).build()); + CompleteOperationRequest.builder() + .operationId(operationId) + .status(HistoryStatus.SUCCESS) + .build()); assertThat(result).isPresent(); assertThat(result.get().getStatus()).isEqualTo(HistoryStatus.SUCCESS); assertThat(result.get().getTableUuid()).isEqualTo(tableUuid); - assertThat(result.get().getJobId()).isEqualTo("spark-job-123"); assertThat(result.get().getOperationType()).isEqualTo(OperationType.ORPHAN_FILES_DELETION); assertThat(result.get().getDatabaseName()).isEqualTo("db1"); assertThat(result.get().getCompletedAt()).isNotNull(); @@ -74,11 +75,9 @@ void completeOperation_writesHistoryFromOperationRow() { void completeOperation_notFound_returnsEmpty() { Optional result = service.completeOperation( - UUID.randomUUID().toString(), CompleteOperationRequest.builder() + .operationId(UUID.randomUUID().toString()) .status(HistoryStatus.FAILED) - .result( - JobResult.builder().errorMessage("boom").errorType("RuntimeException").build()) .build()); assertThat(result).isEmpty(); @@ -141,16 +140,54 @@ void upsertTableStats_updatesExistingRow_andAppendsHistory() { .stats(secondStats) .build()); - // Current row reflects the latest upsert + // Current row reflects the latest upsert's snapshot. assertThat(dto.getStats().getSnapshot().getTableSizeBytes()).isEqualTo(200L); assertThat(statsRepository.findAll()).hasSize(1); - // History has one row per upsert with the raw delta from each call + // History has one row per upsert with the raw delta from each call. List history = statsHistoryRepository.find(tableUuid, null, PageRequest.of(0, 100)); assertThat(history).hasSize(2); - // Newest first - assertThat(history.get(0).getStats().getDelta().getNumFilesAdded()).isEqualTo(3L); - assertThat(history.get(1).getStats().getDelta().getNumFilesAdded()).isEqualTo(5L); + // Newest first. + assertThat(history.get(0).getDelta().getNumFilesAdded()).isEqualTo(3L); + assertThat(history.get(1).getDelta().getNumFilesAdded()).isEqualTo(5L); + } + + // --- list filters touch the operations enum mapping path --- + + @Test + void listTableOperations_filtersByOperationTypeAndStatus() { + String pendingId = UUID.randomUUID().toString(); + String scheduledId = UUID.randomUUID().toString(); + operationsRepository.save( + TableOperationsRow.builder() + .id(pendingId) + .tableUuid(UUID.randomUUID().toString()) + .databaseName("db1") + .tableName("tbl1") + .operationType(com.linkedin.openhouse.optimizer.db.OperationType.ORPHAN_FILES_DELETION) + .status(com.linkedin.openhouse.optimizer.db.OperationStatus.PENDING) + .createdAt(Instant.now()) + .build()); + operationsRepository.save( + TableOperationsRow.builder() + .id(scheduledId) + .tableUuid(UUID.randomUUID().toString()) + .databaseName("db1") + .tableName("tbl2") + .operationType(com.linkedin.openhouse.optimizer.db.OperationType.ORPHAN_FILES_DELETION) + .status(com.linkedin.openhouse.optimizer.db.OperationStatus.SCHEDULED) + .createdAt(Instant.now()) + .build()); + + assertThat( + service.listTableOperations( + Optional.of(OperationType.ORPHAN_FILES_DELETION), + Optional.of(OperationStatus.PENDING), + Optional.empty(), + Optional.empty(), + Optional.empty())) + .extracting(dto -> dto.getId()) + .containsExactly(pendingId); } } From 861b584c3cd41ff03db336c85cb0cde4bc063fe4 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 14:55:57 -0700 Subject: [PATCH 059/104] feat(optimizer): extend model layer for service-only types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prepare model/ for a service-layer rewrite that returns only model/ types (no api/ DTO leakage into the service interface). - model/Table: add `Instant updatedAt`. The service stamps it on every upsert; controllers read it when assembling the wire DTO. - model/TableStatsHistory: new internal-model counterpart to db.TableStatsHistoryRow. Fields mirror the row in internal types (id, tableUuid, databaseName, tableName, stats, recordedAt). - ApiModelMapper: add the missing api↔model conversions that controllers will own once the service drops api/ knowledge — Table ↔ TableStatsDto, TableStatsHistory ↔ TableStatsHistoryDto, and toTable(tableUuid, UpsertTableStatsRequest). --- .../openhouse/optimizer/model/Table.java | 4 ++ .../optimizer/model/TableStatsHistory.java | 33 +++++++++++ .../model/mapper/ApiModelMapper.java | 58 +++++++++++++++++++ 3 files changed, 95 insertions(+) create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsHistory.java diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java index c8bede225..dc0a16a0c 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java @@ -1,5 +1,6 @@ package com.linkedin.openhouse.optimizer.model; +import java.time.Instant; import java.util.Collections; import java.util.Map; import lombok.AllArgsConstructor; @@ -28,4 +29,7 @@ public class Table { @Builder.Default private Map tableProperties = Collections.emptyMap(); private TableStats stats; + + /** When the current snapshot was last written. Stamped server-side on every upsert. */ + private Instant updatedAt; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsHistory.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsHistory.java new file mode 100644 index 000000000..5cdad1918 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsHistory.java @@ -0,0 +1,33 @@ +package com.linkedin.openhouse.optimizer.model; + +import java.time.Instant; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** + * Internal-model view of an append-only per-commit stats history record. + * + *

One per Iceberg commit. {@link #stats} carries both the snapshot at commit time and the commit + * delta — consumers can reconstruct change rates over arbitrary time windows. + * + *

Pure internal-model type — no references to wire-API or DB types. + */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class TableStatsHistory { + + private String id; + private String tableUuid; + private String databaseName; + private String tableName; + + /** Snapshot + delta for this commit event. */ + private TableStats stats; + + /** When this history row was recorded. */ + private Instant recordedAt; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java index 35af7fb25..d77b3a253 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java @@ -2,12 +2,18 @@ import com.linkedin.openhouse.optimizer.api.model.TableOperationsDto; import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; +import com.linkedin.openhouse.optimizer.api.model.TableStatsDto; +import com.linkedin.openhouse.optimizer.api.model.TableStatsHistoryDto; +import com.linkedin.openhouse.optimizer.api.model.UpsertTableStatsRequest; import com.linkedin.openhouse.optimizer.model.HistoryStatus; import com.linkedin.openhouse.optimizer.model.OperationStatus; import com.linkedin.openhouse.optimizer.model.OperationType; +import com.linkedin.openhouse.optimizer.model.Table; import com.linkedin.openhouse.optimizer.model.TableOperation; import com.linkedin.openhouse.optimizer.model.TableOperationsHistory; import com.linkedin.openhouse.optimizer.model.TableStats; +import com.linkedin.openhouse.optimizer.model.TableStatsHistory; +import java.util.Collections; import org.springframework.stereotype.Component; /** @@ -89,6 +95,58 @@ public TableOperationsHistoryDto toDto(TableOperationsHistory history) { .build(); } + // --- Table <-> TableStatsDto / UpsertTableStatsRequest --- + + /** + * Build an internal-model {@link Table} from a wire upsert request. {@link Table#getUpdatedAt()} + * is intentionally left null — the service stamps it server-side at write time. + */ + public Table toTable(String tableUuid, UpsertTableStatsRequest request) { + if (request == null) { + return null; + } + return Table.builder() + .tableUuid(tableUuid) + .databaseName(request.getDatabaseName()) + .tableId(request.getTableName()) + .tableProperties( + request.getTableProperties() != null + ? request.getTableProperties() + : Collections.emptyMap()) + .stats(toModelStats(request.getStats())) + .build(); + } + + public TableStatsDto toDto(Table table) { + if (table == null) { + return null; + } + return TableStatsDto.builder() + .tableUuid(table.getTableUuid()) + .databaseName(table.getDatabaseName()) + .tableName(table.getTableId()) + .stats(toApiStats(table.getStats())) + .tableProperties(table.getTableProperties()) + .updatedAt(table.getUpdatedAt()) + .build(); + } + + // --- TableStatsHistory <-> TableStatsHistoryDto --- + + public TableStatsHistoryDto toDto(TableStatsHistory history) { + if (history == null) { + return null; + } + return TableStatsHistoryDto.builder() + .id(history.getId()) + .tableUuid(history.getTableUuid()) + .databaseName(history.getDatabaseName()) + .tableName(history.getTableName()) + .stats(toApiStats(history.getStats())) + .recordedAt(history.getRecordedAt()) + .build(); + } + // --- TableStats payload --- public TableStats toModelStats(com.linkedin.openhouse.optimizer.api.model.TableStats apiStats) { From b60a3bfc1d51a6f60ac42baba3669bf90e71683f Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 15:00:13 -0700 Subject: [PATCH 060/104] feat(optimizer): extend ModelDbMapper for service-only types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Round out the model↔db boundary for the upcoming service-layer rewrite that returns only internal-model types: - toTable: stamp model.Table.updatedAt from the row's updated_at column so the model carries the freshness needed by callers without leaking the row. - toStatsHistory: new — db.TableStatsHistoryRow → model.TableStatsHistory. Joins the row's snapshot + delta columns into the model's single TableStats payload. --- .../optimizer/model/mapper/ModelDbMapper.java | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ModelDbMapper.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ModelDbMapper.java index 755b38400..7a454c78c 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ModelDbMapper.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ModelDbMapper.java @@ -13,6 +13,7 @@ import com.linkedin.openhouse.optimizer.model.TableOperation; import com.linkedin.openhouse.optimizer.model.TableOperationsHistory; import com.linkedin.openhouse.optimizer.model.TableStats; +import com.linkedin.openhouse.optimizer.model.TableStatsHistory; import java.util.Collections; import org.springframework.stereotype.Component; @@ -111,6 +112,23 @@ public Table toTable(TableStatsRow row) { row.getTableProperties() != null ? row.getTableProperties() : Collections.emptyMap()) // table_stats holds only the snapshot — deltas live on the history table. .stats(joinStats(row.getSnapshot(), null)) + .updatedAt(row.getUpdatedAt()) + .build(); + } + + // --- TableStatsHistoryRow -> TableStatsHistory --- + + public TableStatsHistory toStatsHistory(TableStatsHistoryRow row) { + if (row == null) { + return null; + } + return TableStatsHistory.builder() + .id(row.getId()) + .tableUuid(row.getTableUuid()) + .databaseName(row.getDatabaseName()) + .tableName(row.getTableName()) + .stats(joinStats(row.getSnapshot(), row.getDelta())) + .recordedAt(row.getRecordedAt()) .build(); } From b80b2e503f97d318675f2b39e387e1245e28db1e Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 15:03:13 -0700 Subject: [PATCH 061/104] refactor(optimizer): service layer returns only model/ types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Push the api/model boundary out of the service entirely. After this commit, calling into OptimizerDataService never returns or accepts a wire DTO; controllers (or any future CLI / in-process consumer) own the marshalling at their own edge. Service interface: - All return types and parameters are model/ types or primitives. - completeOperation(String operationId, model.HistoryStatus status). - upsertTableStats(model.Table table) — caller supplies a Table; the service stamps Table.updatedAt and returns the updated Table. - listTableOperations / getStatsHistory / etc. return Lists of model types. Service impl: - Drop ApiModelMapper injection. Only depends on ModelDbMapper. - All conversions are db row → ModelDbMapper → model. The new toStatsHistory mapper method (landed on optimizer-1) handles the history-row case. The updated toTable now stamps Table.updatedAt from the row. Controllers (api/controller/*): - TableOperationsController, TableOperationsHistoryController, TableStatsController now inject ApiModelMapper and do api↔model conversion at the boundary. Each controller method takes api request types, converts to model, calls the service, converts the returned model back to api DTOs. - TableOperationsController.complete continues to take the operationId from the request body. Test: - OptimizerDataServiceImplTest now exercises the service in model types: builders create model.Table, assertions read model.HistoryStatus / model.OperationType / model.TableStats etc. Verification: `git grep "import com.linkedin.openhouse.optimizer.api" -- services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/` returns empty. --- .../controller/TableOperationsController.java | 28 ++-- .../TableOperationsHistoryController.java | 12 +- .../api/controller/TableStatsController.java | 28 +++- .../service/OptimizerDataService.java | 51 +++--- .../service/OptimizerDataServiceImpl.java | 152 +++++++----------- .../service/OptimizerDataServiceImplTest.java | 127 +++++++-------- 6 files changed, 195 insertions(+), 203 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java index e48043a35..2c2483c1b 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java @@ -5,9 +5,11 @@ import com.linkedin.openhouse.optimizer.api.model.OperationType; import com.linkedin.openhouse.optimizer.api.model.TableOperationsDto; import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; +import com.linkedin.openhouse.optimizer.model.mapper.ApiModelMapper; import com.linkedin.openhouse.optimizer.service.OptimizerDataService; import java.util.List; import java.util.Optional; +import java.util.stream.Collectors; import lombok.RequiredArgsConstructor; import org.springframework.http.HttpStatus; import org.springframework.http.ResponseEntity; @@ -26,6 +28,7 @@ public class TableOperationsController { private final OptimizerDataService service; + private final ApiModelMapper apiMapper; /** * Report that an operation has completed. The body carries the {@code operationId} the caller is @@ -37,8 +40,9 @@ public class TableOperationsController { public ResponseEntity completeOperation( @RequestBody CompleteOperationRequest request) { return service - .completeOperation(request) - .map(dto -> ResponseEntity.status(HttpStatus.CREATED).body(dto)) + .completeOperation( + request.getOperationId(), apiMapper.toModelHistoryStatus(request.getStatus())) + .map(history -> ResponseEntity.status(HttpStatus.CREATED).body(apiMapper.toDto(history))) .orElse(ResponseEntity.notFound().build()); } @@ -47,6 +51,7 @@ public ResponseEntity completeOperation( public ResponseEntity getTableOperation(@PathVariable String id) { return service .getTableOperation(id) + .map(apiMapper::toDto) .map(ResponseEntity::ok) .orElse(ResponseEntity.notFound().build()); } @@ -62,12 +67,17 @@ public ResponseEntity> listTableOperations( @RequestParam(required = false) String databaseName, @RequestParam(required = false) String tableName, @RequestParam(required = false) String tableUuid) { - return ResponseEntity.ok( - service.listTableOperations( - Optional.ofNullable(operationType), - Optional.ofNullable(status), - Optional.ofNullable(databaseName), - Optional.ofNullable(tableName), - Optional.ofNullable(tableUuid))); + List result = + service + .listTableOperations( + Optional.ofNullable(operationType).map(apiMapper::toModelOperationType), + Optional.ofNullable(status).map(apiMapper::toModelOperationStatus), + Optional.ofNullable(databaseName), + Optional.ofNullable(tableName), + Optional.ofNullable(tableUuid)) + .stream() + .map(apiMapper::toDto) + .collect(Collectors.toList()); + return ResponseEntity.ok(result); } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java index 17dc0670a..df7cabeff 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java @@ -1,8 +1,10 @@ package com.linkedin.openhouse.optimizer.api.controller; import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; +import com.linkedin.openhouse.optimizer.model.mapper.ApiModelMapper; import com.linkedin.openhouse.optimizer.service.OptimizerDataService; import java.util.List; +import java.util.stream.Collectors; import lombok.RequiredArgsConstructor; import org.springframework.http.HttpStatus; import org.springframework.http.ResponseEntity; @@ -21,18 +23,24 @@ public class TableOperationsHistoryController { private final OptimizerDataService service; + private final ApiModelMapper apiMapper; /** Append a completed-job result. Called by the SparkJob after each run (success or failure). */ @PostMapping public ResponseEntity appendHistory( @RequestBody TableOperationsHistoryDto dto) { - return ResponseEntity.status(HttpStatus.CREATED).body(service.appendHistory(dto)); + return ResponseEntity.status(HttpStatus.CREATED) + .body(apiMapper.toDto(service.appendHistory(apiMapper.toHistory(dto)))); } /** Return the most recent history for a table, newest first, up to {@code limit} rows. */ @GetMapping("/{tableUuid}") public ResponseEntity> getHistory( @PathVariable String tableUuid, @RequestParam(defaultValue = "100") int limit) { - return ResponseEntity.ok(service.getHistory(tableUuid, limit)); + List result = + service.getHistory(tableUuid, limit).stream() + .map(apiMapper::toDto) + .collect(Collectors.toList()); + return ResponseEntity.ok(result); } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java index ef57598e8..2b738a6c3 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java @@ -3,10 +3,12 @@ import com.linkedin.openhouse.optimizer.api.model.TableStatsDto; import com.linkedin.openhouse.optimizer.api.model.TableStatsHistoryDto; import com.linkedin.openhouse.optimizer.api.model.UpsertTableStatsRequest; +import com.linkedin.openhouse.optimizer.model.mapper.ApiModelMapper; import com.linkedin.openhouse.optimizer.service.OptimizerDataService; import java.time.Instant; import java.util.List; import java.util.Optional; +import java.util.stream.Collectors; import lombok.RequiredArgsConstructor; import org.springframework.http.ResponseEntity; import org.springframework.web.bind.annotation.GetMapping; @@ -24,6 +26,7 @@ public class TableStatsController { private final OptimizerDataService service; + private final ApiModelMapper apiMapper; /** * Create or overwrite the stats row for {@code tableUuid}. Called by the Tables Service on every @@ -32,7 +35,8 @@ public class TableStatsController { @PutMapping("/{tableUuid}") public ResponseEntity upsertTableStats( @PathVariable String tableUuid, @RequestBody UpsertTableStatsRequest request) { - return ResponseEntity.ok(service.upsertTableStats(tableUuid, request)); + return ResponseEntity.ok( + apiMapper.toDto(service.upsertTableStats(apiMapper.toTable(tableUuid, request)))); } /** Fetch the stats row for {@code tableUuid}. Returns 404 if no stats have been written yet. */ @@ -40,6 +44,7 @@ public ResponseEntity upsertTableStats( public ResponseEntity getTableStats(@PathVariable String tableUuid) { return service .getTableStats(tableUuid) + .map(apiMapper::toDto) .map(ResponseEntity::ok) .orElse(ResponseEntity.notFound().build()); } @@ -53,11 +58,16 @@ public ResponseEntity> listTableStats( @RequestParam(required = false) String databaseName, @RequestParam(required = false) String tableName, @RequestParam(required = false) String tableUuid) { - return ResponseEntity.ok( - service.listTableStats( - Optional.ofNullable(databaseName), - Optional.ofNullable(tableName), - Optional.ofNullable(tableUuid))); + List result = + service + .listTableStats( + Optional.ofNullable(databaseName), + Optional.ofNullable(tableName), + Optional.ofNullable(tableUuid)) + .stream() + .map(apiMapper::toDto) + .collect(Collectors.toList()); + return ResponseEntity.ok(result); } /** @@ -69,6 +79,10 @@ public ResponseEntity> getStatsHistory( @PathVariable String tableUuid, @RequestParam(required = false) Instant since, @RequestParam(defaultValue = "100") int limit) { - return ResponseEntity.ok(service.getStatsHistory(tableUuid, Optional.ofNullable(since), limit)); + List result = + service.getStatsHistory(tableUuid, Optional.ofNullable(since), limit).stream() + .map(apiMapper::toDto) + .collect(Collectors.toList()); + return ResponseEntity.ok(result); } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java index c3988f668..e8a4da86e 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java @@ -1,18 +1,23 @@ package com.linkedin.openhouse.optimizer.service; -import com.linkedin.openhouse.optimizer.api.model.CompleteOperationRequest; -import com.linkedin.openhouse.optimizer.api.model.OperationStatus; -import com.linkedin.openhouse.optimizer.api.model.OperationType; -import com.linkedin.openhouse.optimizer.api.model.TableOperationsDto; -import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; -import com.linkedin.openhouse.optimizer.api.model.TableStatsDto; -import com.linkedin.openhouse.optimizer.api.model.TableStatsHistoryDto; -import com.linkedin.openhouse.optimizer.api.model.UpsertTableStatsRequest; +import com.linkedin.openhouse.optimizer.model.HistoryStatus; +import com.linkedin.openhouse.optimizer.model.OperationStatus; +import com.linkedin.openhouse.optimizer.model.OperationType; +import com.linkedin.openhouse.optimizer.model.Table; +import com.linkedin.openhouse.optimizer.model.TableOperation; +import com.linkedin.openhouse.optimizer.model.TableOperationsHistory; +import com.linkedin.openhouse.optimizer.model.TableStatsHistory; import java.time.Instant; import java.util.List; import java.util.Optional; -/** Service interface for optimizer data operations. */ +/** + * Service interface for optimizer data operations. + * + *

The service is the boundary between the wire-API surface and the database. Inputs and outputs + * are internal-model types only — callers (controllers, future CLI, in-process consumers) + * convert at their own edge. No api/-package types appear here. + */ public interface OptimizerDataService { // --- TableOperations --- @@ -21,7 +26,7 @@ public interface OptimizerDataService { * List operations matching the given filters. Every parameter is optional — pass {@link * Optional#empty()} to skip that filter. No filters returns all rows. */ - List listTableOperations( + List listTableOperations( Optional operationType, Optional status, Optional databaseName, @@ -30,33 +35,35 @@ List listTableOperations( /** * Complete an operation by writing a history entry. Looks up the operation row by {@code - * request.operationId}, copies its table metadata into a new history row, and saves it. Returns - * the history DTO, or empty if the operation does not exist. + * operationId}, copies its table metadata into a new history row with the supplied terminal + * {@code status}, and saves it. Returns the history record, or empty if the operation does not + * exist. */ - Optional completeOperation(CompleteOperationRequest request); + Optional completeOperation(String operationId, HistoryStatus status); /** * Return the operation row for {@code id} regardless of status, or empty if it does not exist. * Used to poll a specific operation (e.g. waiting for SUCCESS after a Spark job completes). */ - Optional getTableOperation(String id); + Optional getTableOperation(String id); // --- TableStats --- /** - * Create or update the stats row for {@code tableUuid}. Fully idempotent: the same call - * overwrites the previous snapshot with the latest commit values. + * Create or update the stats row for {@code table.getTableUuid()}. Fully idempotent: the same + * call overwrites the previous snapshot with the latest commit values. The service stamps {@link + * Table#getUpdatedAt()} server-side and returns the resulting {@link Table}. */ - TableStatsDto upsertTableStats(String tableUuid, UpsertTableStatsRequest request); + Table upsertTableStats(Table table); /** Return the stats row for {@code tableUuid}, or empty if none exists. */ - Optional getTableStats(String tableUuid); + Optional

getTableStats(String tableUuid); /** * List stats rows matching the given filters. Every parameter is optional — pass {@link * Optional#empty()} to skip that filter. No filters returns all rows. */ - List listTableStats( + List
listTableStats( Optional databaseName, Optional tableName, Optional tableUuid); /** @@ -66,12 +73,12 @@ List listTableStats( * @param since if present, only return rows recorded at or after this instant * @param limit maximum number of rows to return */ - List getStatsHistory(String tableUuid, Optional since, int limit); + List getStatsHistory(String tableUuid, Optional since, int limit); // --- TableOperationsHistory --- /** Append a completed-job result record. */ - TableOperationsHistoryDto appendHistory(TableOperationsHistoryDto dto); + TableOperationsHistory appendHistory(TableOperationsHistory history); /** * Return the most recent history rows for a table UUID, newest first. @@ -79,5 +86,5 @@ List listTableStats( * @param tableUuid the stable table UUID * @param limit maximum number of rows to return */ - List getHistory(String tableUuid, int limit); + List getHistory(String tableUuid, int limit); } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java index 21802a84f..47143118c 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java @@ -1,18 +1,15 @@ package com.linkedin.openhouse.optimizer.service; -import com.linkedin.openhouse.optimizer.api.model.CompleteOperationRequest; -import com.linkedin.openhouse.optimizer.api.model.OperationStatus; -import com.linkedin.openhouse.optimizer.api.model.OperationType; -import com.linkedin.openhouse.optimizer.api.model.TableOperationsDto; -import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; -import com.linkedin.openhouse.optimizer.api.model.TableStats; -import com.linkedin.openhouse.optimizer.api.model.TableStatsDto; -import com.linkedin.openhouse.optimizer.api.model.TableStatsHistoryDto; -import com.linkedin.openhouse.optimizer.api.model.UpsertTableStatsRequest; import com.linkedin.openhouse.optimizer.db.TableOperationsHistoryRow; import com.linkedin.openhouse.optimizer.db.TableStatsHistoryRow; import com.linkedin.openhouse.optimizer.db.TableStatsRow; -import com.linkedin.openhouse.optimizer.model.mapper.ApiModelMapper; +import com.linkedin.openhouse.optimizer.model.HistoryStatus; +import com.linkedin.openhouse.optimizer.model.OperationStatus; +import com.linkedin.openhouse.optimizer.model.OperationType; +import com.linkedin.openhouse.optimizer.model.Table; +import com.linkedin.openhouse.optimizer.model.TableOperation; +import com.linkedin.openhouse.optimizer.model.TableOperationsHistory; +import com.linkedin.openhouse.optimizer.model.TableStatsHistory; import com.linkedin.openhouse.optimizer.model.mapper.ModelDbMapper; import com.linkedin.openhouse.optimizer.repository.TableOperationsHistoryRepository; import com.linkedin.openhouse.optimizer.repository.TableOperationsRepository; @@ -28,7 +25,12 @@ import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Transactional; -/** Implementation of {@link OptimizerDataService}. */ +/** + * Implementation of {@link OptimizerDataService}. + * + *

Operates purely on model/ and db/ types. The model↔db boundary is the {@link ModelDbMapper}. + * No api/-package types appear in this class. + */ @Service @RequiredArgsConstructor public class OptimizerDataServiceImpl implements OptimizerDataService { @@ -37,13 +39,12 @@ public class OptimizerDataServiceImpl implements OptimizerDataService { private final TableOperationsHistoryRepository historyRepository; private final TableStatsRepository statsRepository; private final TableStatsHistoryRepository statsHistoryRepository; - private final ApiModelMapper apiMapper; private final ModelDbMapper dbMapper; // --- TableOperations --- @Override - public List listTableOperations( + public List listTableOperations( Optional operationType, Optional status, Optional databaseName, @@ -51,26 +52,22 @@ public List listTableOperations( Optional tableUuid) { return operationsRepository .find( - operationType - .map(t -> dbMapper.toDbOperationType(apiMapper.toModelOperationType(t))) - .orElse(null), - status - .map(s -> dbMapper.toDbOperationStatus(apiMapper.toModelOperationStatus(s))) - .orElse(null), + operationType.map(dbMapper::toDbOperationType).orElse(null), + status.map(dbMapper::toDbOperationStatus).orElse(null), tableUuid.orElse(null), databaseName.orElse(null), tableName.orElse(null)) .stream() .map(dbMapper::toOperation) - .map(apiMapper::toDto) .collect(Collectors.toList()); } @Override @Transactional - public Optional completeOperation(CompleteOperationRequest request) { + public Optional completeOperation( + String operationId, HistoryStatus status) { return operationsRepository - .findById(request.getOperationId()) + .findById(operationId) .map( row -> { TableOperationsHistoryRow historyRow = @@ -81,27 +78,24 @@ public Optional completeOperation(CompleteOperationRe .tableName(row.getTableName()) .operationType(row.getOperationType()) .completedAt(Instant.now()) - .status( - dbMapper.toDbHistoryStatus( - apiMapper.toModelHistoryStatus(request.getStatus()))) + .status(dbMapper.toDbHistoryStatus(status)) .build(); - return apiMapper.toDto(dbMapper.toHistory(historyRepository.save(historyRow))); + return dbMapper.toHistory(historyRepository.save(historyRow)); }); } @Override - public Optional getTableOperation(String id) { - return operationsRepository.findById(id).map(dbMapper::toOperation).map(apiMapper::toDto); + public Optional getTableOperation(String id) { + return operationsRepository.findById(id).map(dbMapper::toOperation); } // --- TableStats --- @Override @Transactional - public TableStatsDto upsertTableStats(String tableUuid, UpsertTableStatsRequest request) { + public Table upsertTableStats(Table table) { Instant now = Instant.now(); - com.linkedin.openhouse.optimizer.model.TableStats modelStats = - apiMapper.toModelStats(request.getStats()); + String tableUuid = table.getTableUuid(); TableStatsRow row = statsRepository @@ -110,19 +104,19 @@ public TableStatsDto upsertTableStats(String tableUuid, UpsertTableStatsRequest existing -> existing .toBuilder() - .databaseName(request.getDatabaseName()) - .tableName(request.getTableName()) - .snapshot(dbMapper.toDbSnapshot(modelStats)) - .tableProperties(request.getTableProperties()) + .databaseName(table.getDatabaseName()) + .tableName(table.getTableId()) + .snapshot(dbMapper.toDbSnapshot(table.getStats())) + .tableProperties(table.getTableProperties()) .updatedAt(now) .build()) .orElse( TableStatsRow.builder() .tableUuid(tableUuid) - .databaseName(request.getDatabaseName()) - .tableName(request.getTableName()) - .snapshot(dbMapper.toDbSnapshot(modelStats)) - .tableProperties(request.getTableProperties()) + .databaseName(table.getDatabaseName()) + .tableName(table.getTableId()) + .snapshot(dbMapper.toDbSnapshot(table.getStats())) + .tableProperties(table.getTableProperties()) .updatedAt(now) .build()); TableStatsRow saved = statsRepository.save(row); @@ -131,36 +125,36 @@ public TableStatsDto upsertTableStats(String tableUuid, UpsertTableStatsRequest TableStatsHistoryRow.builder() .id(UUID.randomUUID().toString()) .tableUuid(tableUuid) - .databaseName(request.getDatabaseName()) - .tableName(request.getTableName()) - .snapshot(dbMapper.toDbSnapshot(modelStats)) - .delta(dbMapper.toDbDelta(modelStats)) + .databaseName(table.getDatabaseName()) + .tableName(table.getTableId()) + .snapshot(dbMapper.toDbSnapshot(table.getStats())) + .delta(dbMapper.toDbDelta(table.getStats())) .recordedAt(now) .build()); - return toTableStatsDto(saved); + return dbMapper.toTable(saved); } @Override - public Optional getTableStats(String tableUuid) { - return statsRepository.findById(tableUuid).map(this::toTableStatsDto); + public Optional

getTableStats(String tableUuid) { + return statsRepository.findById(tableUuid).map(dbMapper::toTable); } @Override - public List listTableStats( + public List
listTableStats( Optional databaseName, Optional tableName, Optional tableUuid) { return statsRepository .find(databaseName.orElse(null), tableName.orElse(null), tableUuid.orElse(null)).stream() - .map(this::toTableStatsDto) + .map(dbMapper::toTable) .collect(Collectors.toList()); } @Override - public List getStatsHistory( + public List getStatsHistory( String tableUuid, Optional since, int limit) { return statsHistoryRepository.find(tableUuid, since.orElse(null), PageRequest.of(0, limit)) .stream() - .map(this::toTableStatsHistoryDto) + .map(dbMapper::toStatsHistory) .collect(Collectors.toList()); } @@ -168,62 +162,26 @@ public List getStatsHistory( @Override @Transactional - public TableOperationsHistoryDto appendHistory(TableOperationsHistoryDto dto) { + public TableOperationsHistory appendHistory(TableOperationsHistory history) { TableOperationsHistoryRow row = TableOperationsHistoryRow.builder() - .id(dto.getId()) - .tableUuid(dto.getTableUuid()) - .databaseName(dto.getDatabaseName()) - .tableName(dto.getTableName()) - .operationType( - dbMapper.toDbOperationType(apiMapper.toModelOperationType(dto.getOperationType()))) - .completedAt(dto.getCompletedAt() != null ? dto.getCompletedAt() : Instant.now()) - .status(dbMapper.toDbHistoryStatus(apiMapper.toModelHistoryStatus(dto.getStatus()))) + .id(history.getId()) + .tableUuid(history.getTableUuid()) + .databaseName(history.getDatabaseName()) + .tableName(history.getTableName()) + .operationType(dbMapper.toDbOperationType(history.getOperationType())) + .completedAt( + history.getCompletedAt() != null ? history.getCompletedAt() : Instant.now()) + .status(dbMapper.toDbHistoryStatus(history.getStatus())) .build(); - return apiMapper.toDto(dbMapper.toHistory(historyRepository.save(row))); + return dbMapper.toHistory(historyRepository.save(row)); } @Override - public List getHistory(String tableUuid, int limit) { + public List getHistory(String tableUuid, int limit) { return historyRepository .findByTableUuidOrderByCompletedAtDesc(tableUuid, PageRequest.of(0, limit)).stream() .map(dbMapper::toHistory) - .map(apiMapper::toDto) .collect(Collectors.toList()); } - - // --- private helpers --- - - /** - * Assemble a wire {@link TableStatsDto} from a {@link TableStatsRow}. The current-state row holds - * only the snapshot — deltas live exclusively on history rows. - */ - private TableStatsDto toTableStatsDto(TableStatsRow row) { - com.linkedin.openhouse.optimizer.model.TableStats modelStats = - dbMapper.joinStats(row.getSnapshot(), null); - TableStats apiStats = apiMapper.toApiStats(modelStats); - return TableStatsDto.builder() - .tableUuid(row.getTableUuid()) - .databaseName(row.getDatabaseName()) - .tableName(row.getTableName()) - .stats(apiStats) - .tableProperties(row.getTableProperties()) - .updatedAt(row.getUpdatedAt()) - .build(); - } - - /** Assemble a wire {@link TableStatsHistoryDto} from a {@link TableStatsHistoryRow}. */ - private TableStatsHistoryDto toTableStatsHistoryDto(TableStatsHistoryRow row) { - com.linkedin.openhouse.optimizer.model.TableStats modelStats = - dbMapper.joinStats(row.getSnapshot(), row.getDelta()); - TableStats apiStats = apiMapper.toApiStats(modelStats); - return TableStatsHistoryDto.builder() - .id(row.getId()) - .tableUuid(row.getTableUuid()) - .databaseName(row.getDatabaseName()) - .tableName(row.getTableName()) - .stats(apiStats) - .recordedAt(row.getRecordedAt()) - .build(); - } } diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java index 29374cbfc..9d653e21d 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java @@ -2,16 +2,14 @@ import static org.assertj.core.api.Assertions.assertThat; -import com.linkedin.openhouse.optimizer.api.model.CompleteOperationRequest; -import com.linkedin.openhouse.optimizer.api.model.HistoryStatus; -import com.linkedin.openhouse.optimizer.api.model.OperationStatus; -import com.linkedin.openhouse.optimizer.api.model.OperationType; -import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; -import com.linkedin.openhouse.optimizer.api.model.TableStats; -import com.linkedin.openhouse.optimizer.api.model.TableStatsDto; -import com.linkedin.openhouse.optimizer.api.model.UpsertTableStatsRequest; import com.linkedin.openhouse.optimizer.db.TableOperationsRow; import com.linkedin.openhouse.optimizer.db.TableStatsHistoryRow; +import com.linkedin.openhouse.optimizer.model.HistoryStatus; +import com.linkedin.openhouse.optimizer.model.OperationStatus; +import com.linkedin.openhouse.optimizer.model.OperationType; +import com.linkedin.openhouse.optimizer.model.Table; +import com.linkedin.openhouse.optimizer.model.TableOperationsHistory; +import com.linkedin.openhouse.optimizer.model.TableStats; import com.linkedin.openhouse.optimizer.repository.TableOperationsRepository; import com.linkedin.openhouse.optimizer.repository.TableStatsHistoryRepository; import com.linkedin.openhouse.optimizer.repository.TableStatsRepository; @@ -56,12 +54,8 @@ void completeOperation_writesHistoryFromOperationRow() { .jobId("spark-job-123") .build()); - Optional result = - service.completeOperation( - CompleteOperationRequest.builder() - .operationId(operationId) - .status(HistoryStatus.SUCCESS) - .build()); + Optional result = + service.completeOperation(operationId, HistoryStatus.SUCCESS); assertThat(result).isPresent(); assertThat(result.get().getStatus()).isEqualTo(HistoryStatus.SUCCESS); @@ -73,12 +67,8 @@ void completeOperation_writesHistoryFromOperationRow() { @Test void completeOperation_notFound_returnsEmpty() { - Optional result = - service.completeOperation( - CompleteOperationRequest.builder() - .operationId(UUID.randomUUID().toString()) - .status(HistoryStatus.FAILED) - .build()); + Optional result = + service.completeOperation(UUID.randomUUID().toString(), HistoryStatus.FAILED); assertThat(result).isEmpty(); } @@ -88,67 +78,72 @@ void completeOperation_notFound_returnsEmpty() { @Test void upsertTableStats_createsNewRow() { String tableUuid = UUID.randomUUID().toString(); - TableStats stats = - TableStats.builder() - .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(1024L).build()) + Table input = + Table.builder() + .tableUuid(tableUuid) + .databaseName("db1") + .tableId("tbl1") + .tableProperties(Map.of("maintenance.optimizer.ofd.enabled", "true")) + .stats( + TableStats.builder() + .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(1024L).build()) + .build()) .build(); - TableStatsDto dto = - service.upsertTableStats( - tableUuid, - UpsertTableStatsRequest.builder() - .databaseName("db1") - .tableName("tbl1") - .stats(stats) - .tableProperties(Map.of("maintenance.optimizer.ofd.enabled", "true")) - .build()); - - assertThat(dto.getTableUuid()).isEqualTo(tableUuid); - assertThat(dto.getDatabaseName()).isEqualTo("db1"); - assertThat(dto.getStats().getSnapshot().getTableSizeBytes()).isEqualTo(1024L); - assertThat(dto.getTableProperties()).containsEntry("maintenance.optimizer.ofd.enabled", "true"); + Table result = service.upsertTableStats(input); + + assertThat(result.getTableUuid()).isEqualTo(tableUuid); + assertThat(result.getDatabaseName()).isEqualTo("db1"); + assertThat(result.getStats().getSnapshot().getTableSizeBytes()).isEqualTo(1024L); + assertThat(result.getTableProperties()) + .containsEntry("maintenance.optimizer.ofd.enabled", "true"); + assertThat(result.getUpdatedAt()).isNotNull(); assertThat(statsRepository.findById(tableUuid)).isPresent(); } @Test void upsertTableStats_updatesExistingRow_andAppendsHistory() { String tableUuid = UUID.randomUUID().toString(); - TableStats firstStats = - TableStats.builder() - .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(100L).build()) - .delta(TableStats.CommitDelta.builder().numFilesAdded(5L).numFilesDeleted(1L).build()) + Table first = + Table.builder() + .tableUuid(tableUuid) + .databaseName("db1") + .tableId("tbl1") + .stats( + TableStats.builder() + .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(100L).build()) + .delta( + TableStats.CommitDelta.builder() + .numFilesAdded(5L) + .numFilesDeleted(1L) + .build()) + .build()) .build(); - TableStats secondStats = - TableStats.builder() - .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(200L).build()) - .delta(TableStats.CommitDelta.builder().numFilesAdded(3L).numFilesDeleted(0L).build()) + Table second = + Table.builder() + .tableUuid(tableUuid) + .databaseName("db1") + .tableId("tbl1") + .stats( + TableStats.builder() + .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(200L).build()) + .delta( + TableStats.CommitDelta.builder() + .numFilesAdded(3L) + .numFilesDeleted(0L) + .build()) + .build()) .build(); - service.upsertTableStats( - tableUuid, - UpsertTableStatsRequest.builder() - .databaseName("db1") - .tableName("tbl1") - .stats(firstStats) - .build()); - TableStatsDto dto = - service.upsertTableStats( - tableUuid, - UpsertTableStatsRequest.builder() - .databaseName("db1") - .tableName("tbl1") - .stats(secondStats) - .build()); - - // Current row reflects the latest upsert's snapshot. - assertThat(dto.getStats().getSnapshot().getTableSizeBytes()).isEqualTo(200L); + service.upsertTableStats(first); + Table result = service.upsertTableStats(second); + + assertThat(result.getStats().getSnapshot().getTableSizeBytes()).isEqualTo(200L); assertThat(statsRepository.findAll()).hasSize(1); - // History has one row per upsert with the raw delta from each call. List history = statsHistoryRepository.find(tableUuid, null, PageRequest.of(0, 100)); assertThat(history).hasSize(2); - // Newest first. assertThat(history.get(0).getDelta().getNumFilesAdded()).isEqualTo(3L); assertThat(history.get(1).getDelta().getNumFilesAdded()).isEqualTo(5L); } @@ -187,7 +182,7 @@ void listTableOperations_filtersByOperationTypeAndStatus() { Optional.empty(), Optional.empty(), Optional.empty())) - .extracting(dto -> dto.getId()) + .extracting(op -> op.getId()) .containsExactly(pendingId); } } From ad11533166f07999d64d89ec61b96336b64d8fdd Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 15:07:18 -0700 Subject: [PATCH 062/104] refactor(optimizer-analyzer): consume model/ types and ModelDbMapper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adapt the analyzer to the entity→db rename and the removal of factory methods on model/ data types. The analyzer's strategy interface and policy now work in pure model/ types (TableOperationsHistory instead of TableOperationsHistoryRow); AnalyzerRunner uses ModelDbMapper to bridge db rows to those types. - OperationAnalyzer.shouldSchedule: parameter type changes from db.TableOperationsHistoryRow to model.TableOperationsHistory. - CadencePolicy: same parameter change; remove String→HistoryStatus parsing — the model carries it as a typed enum. - CadenceBasedOrphanFilesDeletionAnalyzer: signature update. - AnalyzerRunner: inject ModelDbMapper. Imports switch from entity/ to db/. Repository find / findLatestPerTable now take db.OperationType (translated via dbMapper.toDbOperationType). Row → model translation uses dbMapper.toTable / .toOperation / .toHistory. Persisting a new PENDING operation goes through dbMapper.toRow(op) instead of the removed TableOperation.toRow(). - Tests rewritten with the new types and pass a real ModelDbMapper into the runner-under-test. --- .../openhouse/analyzer/AnalyzerRunner.java | 37 ++++++------ ...denceBasedOrphanFilesDeletionAnalyzer.java | 4 +- .../openhouse/analyzer/CadencePolicy.java | 9 ++- .../openhouse/analyzer/OperationAnalyzer.java | 4 +- .../analyzer/AnalyzerRunnerTest.java | 60 ++++++++++--------- ...eBasedOrphanFilesDeletionAnalyzerTest.java | 23 +++---- 6 files changed, 71 insertions(+), 66 deletions(-) diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java index 7b729ab9c..0be4a5a34 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java @@ -1,10 +1,10 @@ package com.linkedin.openhouse.analyzer; -import com.linkedin.openhouse.optimizer.entity.TableOperationsHistoryRow; -import com.linkedin.openhouse.optimizer.entity.TableOperationsRow; import com.linkedin.openhouse.optimizer.model.OperationType; import com.linkedin.openhouse.optimizer.model.Table; import com.linkedin.openhouse.optimizer.model.TableOperation; +import com.linkedin.openhouse.optimizer.model.TableOperationsHistory; +import com.linkedin.openhouse.optimizer.model.mapper.ModelDbMapper; import com.linkedin.openhouse.optimizer.repository.TableOperationsHistoryRepository; import com.linkedin.openhouse.optimizer.repository.TableOperationsRepository; import com.linkedin.openhouse.optimizer.repository.TableStatsRepository; @@ -36,6 +36,7 @@ public class AnalyzerRunner { private final TableStatsRepository statsRepo; private final TableOperationsRepository operationsRepo; private final TableOperationsHistoryRepository historyRepo; + private final ModelDbMapper dbMapper; /** * Run the analysis loop for {@code operationType} across all databases, with no filters. @@ -75,33 +76,35 @@ private void analyzeDatabase( Optional tableName, Optional tableUuid) { - String operationType = analyzer.getOperationType().name(); + com.linkedin.openhouse.optimizer.db.OperationType dbOperationType = + dbMapper.toDbOperationType(analyzer.getOperationType()); // Pre-load the small sides of the joins — bounded by tables in this database. Map currentOps = operationsRepo - .find(operationType, null, tableUuid.orElse(null), databaseName, tableName.orElse(null)) + .find( + dbOperationType, null, tableUuid.orElse(null), databaseName, tableName.orElse(null)) .stream() .filter(e -> e.getTableUuid() != null) + .map(dbMapper::toOperation) .collect( Collectors.toMap( - TableOperationsRow::getTableUuid, - TableOperation::from, - TableOperation::mostRecent)); + TableOperation::getTableUuid, op -> op, TableOperation::mostRecent)); - Map latestHistory = - historyRepo.findLatestPerTable(operationType).stream() + Map latestHistory = + historyRepo.findLatestPerTable(dbOperationType).stream() .filter(r -> r.getTableUuid() != null) + .map(dbMapper::toHistory) .collect( Collectors.toMap( - TableOperationsHistoryRow::getTableUuid, - r -> r, + TableOperationsHistory::getTableUuid, + h -> h, AnalyzerRunner::moreRecentHistory)); List
tables = statsRepo.find(databaseName, tableName.orElse(null), tableUuid.orElse(null)).stream() .filter(row -> row.getTableUuid() != null) - .map(Table::from) + .map(dbMapper::toTable) .collect(Collectors.toList()); /* @@ -122,11 +125,11 @@ private void analyzeDatabase( } Optional currentOp = Optional.ofNullable(currentOps.get(table.getTableUuid())); - Optional entry = + Optional entry = Optional.ofNullable(latestHistory.get(table.getTableUuid())); if (analyzer.shouldSchedule(table, currentOp, entry)) { TableOperation op = TableOperation.pending(table, analyzer.getOperationType()); - operationsRepo.save(op.toRow()); + operationsRepo.save(dbMapper.toRow(op)); log.info( "Created PENDING {} operation for table {}.{}", analyzer.getOperationType(), @@ -136,9 +139,9 @@ private void analyzeDatabase( }); } - private static TableOperationsHistoryRow moreRecentHistory( - TableOperationsHistoryRow a, TableOperationsHistoryRow b) { - Comparator byCompletedAt = + private static TableOperationsHistory moreRecentHistory( + TableOperationsHistory a, TableOperationsHistory b) { + Comparator byCompletedAt = Comparator.comparing(r -> r.getCompletedAt() != null ? r.getCompletedAt() : Instant.EPOCH); return byCompletedAt.compare(a, b) >= 0 ? a : b; } diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzer.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzer.java index 8c7aef286..394b77eca 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzer.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzer.java @@ -1,9 +1,9 @@ package com.linkedin.openhouse.analyzer; -import com.linkedin.openhouse.optimizer.entity.TableOperationsHistoryRow; import com.linkedin.openhouse.optimizer.model.OperationType; import com.linkedin.openhouse.optimizer.model.Table; import com.linkedin.openhouse.optimizer.model.TableOperation; +import com.linkedin.openhouse.optimizer.model.TableOperationsHistory; import java.time.Duration; import java.util.Optional; import org.springframework.beans.factory.annotation.Autowired; @@ -45,7 +45,7 @@ public boolean isEnabled(Table table) { public boolean shouldSchedule( Table table, Optional currentOp, - Optional latestHistory) { + Optional latestHistory) { return cadencePolicy.shouldSchedule(currentOp, latestHistory); } } diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadencePolicy.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadencePolicy.java index bec541bfc..6ce2db80c 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadencePolicy.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadencePolicy.java @@ -1,9 +1,9 @@ package com.linkedin.openhouse.analyzer; -import com.linkedin.openhouse.optimizer.entity.TableOperationsHistoryRow; import com.linkedin.openhouse.optimizer.model.HistoryStatus; import com.linkedin.openhouse.optimizer.model.OperationStatus; import com.linkedin.openhouse.optimizer.model.TableOperation; +import com.linkedin.openhouse.optimizer.model.TableOperationsHistory; import java.time.Duration; import java.time.Instant; import java.util.Optional; @@ -42,17 +42,16 @@ public class CadencePolicy { * @param latestHistory the most recent history entry for this (table, type), or empty */ public boolean shouldSchedule( - Optional currentOp, Optional latestHistory) { + Optional currentOp, Optional latestHistory) { if (currentOp.isPresent() && currentOp.get().getStatus() != OperationStatus.CANCELED) { return false; } return latestHistory.map(this::readyAfterHistoryEntry).orElse(true); } - private boolean readyAfterHistoryEntry(TableOperationsHistoryRow entry) { - HistoryStatus status = HistoryStatus.valueOf(entry.getStatus()); + private boolean readyAfterHistoryEntry(TableOperationsHistory entry) { Duration interval = - status == HistoryStatus.FAILED ? failureRetryInterval : successRetryInterval; + entry.getStatus() == HistoryStatus.FAILED ? failureRetryInterval : successRetryInterval; return Duration.between(entry.getCompletedAt(), Instant.now()).compareTo(interval) > 0; } } diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OperationAnalyzer.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OperationAnalyzer.java index a7792c7ac..ba64f558a 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OperationAnalyzer.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OperationAnalyzer.java @@ -1,9 +1,9 @@ package com.linkedin.openhouse.analyzer; -import com.linkedin.openhouse.optimizer.entity.TableOperationsHistoryRow; import com.linkedin.openhouse.optimizer.model.OperationType; import com.linkedin.openhouse.optimizer.model.Table; import com.linkedin.openhouse.optimizer.model.TableOperation; +import com.linkedin.openhouse.optimizer.model.TableOperationsHistory; import java.util.Optional; /** @@ -37,5 +37,5 @@ public interface OperationAnalyzer { boolean shouldSchedule( Table table, Optional currentOp, - Optional latestHistory); + Optional latestHistory); } diff --git a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java index 6ff8739fa..fbd2fecbf 100644 --- a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java +++ b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java @@ -2,16 +2,16 @@ import static org.assertj.core.api.Assertions.assertThat; import static org.mockito.ArgumentMatchers.any; -import static org.mockito.ArgumentMatchers.anyString; import static org.mockito.Mockito.never; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; -import com.linkedin.openhouse.optimizer.entity.TableOperationsRow; -import com.linkedin.openhouse.optimizer.entity.TableStatsRow; +import com.linkedin.openhouse.optimizer.db.TableOperationsRow; +import com.linkedin.openhouse.optimizer.db.TableStatsRow; import com.linkedin.openhouse.optimizer.model.OperationType; import com.linkedin.openhouse.optimizer.model.Table; import com.linkedin.openhouse.optimizer.model.TableOperation; +import com.linkedin.openhouse.optimizer.model.mapper.ModelDbMapper; import com.linkedin.openhouse.optimizer.repository.TableOperationsHistoryRepository; import com.linkedin.openhouse.optimizer.repository.TableOperationsRepository; import com.linkedin.openhouse.optimizer.repository.TableStatsRepository; @@ -30,7 +30,8 @@ class AnalyzerRunnerTest { private static final OperationType OFD_TYPE = OperationType.ORPHAN_FILES_DELETION; - private static final String OFD = OFD_TYPE.name(); + private static final com.linkedin.openhouse.optimizer.db.OperationType OFD_DB = + com.linkedin.openhouse.optimizer.db.OperationType.ORPHAN_FILES_DELETION; private static final String DB = "db1"; @Mock private TableStatsRepository statsRepo; @@ -38,11 +39,13 @@ class AnalyzerRunnerTest { @Mock private TableOperationsHistoryRepository historyRepo; @Mock private OperationAnalyzer analyzer; + private final ModelDbMapper dbMapper = new ModelDbMapper(); private AnalyzerRunner runner; @BeforeEach void setUp() { - runner = new AnalyzerRunner(List.of(analyzer), statsRepo, operationsRepo, historyRepo); + runner = + new AnalyzerRunner(List.of(analyzer), statsRepo, operationsRepo, historyRepo, dbMapper); when(analyzer.getOperationType()).thenReturn(OFD_TYPE); when(statsRepo.findDistinctDatabaseNames()).thenReturn(List.of(DB)); } @@ -52,12 +55,11 @@ void analyze_insertsNewRow_forEligibleTableWithNoExistingOp() { TableStatsRow statsEntity = TableStatsRow.builder().tableUuid("uuid-1").databaseName(DB).tableName("tbl1").build(); - Table expectedTable = - Table.builder().tableUuid("uuid-1").databaseName(DB).tableId("tbl1").build(); + Table expectedTable = dbMapper.toTable(statsEntity); when(statsRepo.find(DB, null, null)).thenReturn(List.of(statsEntity)); - when(operationsRepo.find(OFD, null, null, DB, null)).thenReturn(Collections.emptyList()); - when(historyRepo.findLatestPerTable(OFD)).thenReturn(Collections.emptyList()); + when(operationsRepo.find(OFD_DB, null, null, DB, null)).thenReturn(Collections.emptyList()); + when(historyRepo.findLatestPerTable(OFD_DB)).thenReturn(Collections.emptyList()); when(analyzer.isEnabled(expectedTable)).thenReturn(true); when(analyzer.shouldSchedule(expectedTable, Optional.empty(), Optional.empty())) .thenReturn(true); @@ -70,8 +72,9 @@ void analyze_insertsNewRow_forEligibleTableWithNoExistingOp() { assertThat(saved.getTableUuid()).isEqualTo("uuid-1"); assertThat(saved.getDatabaseName()).isEqualTo(DB); assertThat(saved.getTableName()).isEqualTo("tbl1"); - assertThat(saved.getOperationType()).isEqualTo(OFD); - assertThat(saved.getStatus()).isEqualTo("PENDING"); + assertThat(saved.getOperationType()).isEqualTo(OFD_DB); + assertThat(saved.getStatus()) + .isEqualTo(com.linkedin.openhouse.optimizer.db.OperationStatus.PENDING); assertThat(saved.getId()).isNotNull(); } @@ -80,24 +83,23 @@ void analyze_noOp_whenCadencePolicyReturnsFalseForPending() { TableStatsRow statsEntity = TableStatsRow.builder().tableUuid("uuid-1").databaseName(DB).tableName("tbl1").build(); - Table expectedTable = - Table.builder().tableUuid("uuid-1").databaseName(DB).tableId("tbl1").build(); + Table expectedTable = dbMapper.toTable(statsEntity); TableOperationsRow existingEntity = TableOperationsRow.builder() .id("existing-op-id") - .status("PENDING") + .status(com.linkedin.openhouse.optimizer.db.OperationStatus.PENDING) .tableUuid("uuid-1") - .operationType(OFD) + .operationType(OFD_DB) .createdAt(Instant.now()) .build(); when(statsRepo.find(DB, null, null)).thenReturn(List.of(statsEntity)); - when(operationsRepo.find(OFD, null, null, DB, null)).thenReturn(List.of(existingEntity)); - when(historyRepo.findLatestPerTable(OFD)).thenReturn(Collections.emptyList()); + when(operationsRepo.find(OFD_DB, null, null, DB, null)).thenReturn(List.of(existingEntity)); + when(historyRepo.findLatestPerTable(OFD_DB)).thenReturn(Collections.emptyList()); when(analyzer.isEnabled(expectedTable)).thenReturn(true); - TableOperation existingOp = TableOperation.from(existingEntity); + TableOperation existingOp = dbMapper.toOperation(existingEntity); when(analyzer.shouldSchedule(expectedTable, Optional.of(existingOp), Optional.empty())) .thenReturn(false); @@ -111,11 +113,11 @@ void analyze_skipsTable_whenNotEnabled() { TableStatsRow statsEntity = TableStatsRow.builder().tableUuid("uuid-1").databaseName(DB).build(); - Table expectedTable = Table.builder().tableUuid("uuid-1").databaseName(DB).build(); + Table expectedTable = dbMapper.toTable(statsEntity); when(statsRepo.find(DB, null, null)).thenReturn(List.of(statsEntity)); - when(operationsRepo.find(OFD, null, null, DB, null)).thenReturn(Collections.emptyList()); - when(historyRepo.findLatestPerTable(OFD)).thenReturn(Collections.emptyList()); + when(operationsRepo.find(OFD_DB, null, null, DB, null)).thenReturn(Collections.emptyList()); + when(historyRepo.findLatestPerTable(OFD_DB)).thenReturn(Collections.emptyList()); when(analyzer.isEnabled(expectedTable)).thenReturn(false); runner.analyze(OFD_TYPE); @@ -128,23 +130,23 @@ void analyze_skipsTable_whenShouldScheduleReturnsFalse() { TableStatsRow statsEntity = TableStatsRow.builder().tableUuid("uuid-1").databaseName(DB).build(); - Table expectedTable = Table.builder().tableUuid("uuid-1").databaseName(DB).build(); + Table expectedTable = dbMapper.toTable(statsEntity); TableOperationsRow scheduled = TableOperationsRow.builder() .id("op-id") - .status("SCHEDULED") + .status(com.linkedin.openhouse.optimizer.db.OperationStatus.SCHEDULED) .tableUuid("uuid-1") - .operationType(OFD) + .operationType(OFD_DB) .createdAt(Instant.now()) .build(); when(statsRepo.find(DB, null, null)).thenReturn(List.of(statsEntity)); - when(operationsRepo.find(OFD, null, null, DB, null)).thenReturn(List.of(scheduled)); - when(historyRepo.findLatestPerTable(OFD)).thenReturn(Collections.emptyList()); + when(operationsRepo.find(OFD_DB, null, null, DB, null)).thenReturn(List.of(scheduled)); + when(historyRepo.findLatestPerTable(OFD_DB)).thenReturn(Collections.emptyList()); when(analyzer.isEnabled(expectedTable)).thenReturn(true); - TableOperation scheduledOp = TableOperation.from(scheduled); + TableOperation scheduledOp = dbMapper.toOperation(scheduled); when(analyzer.shouldSchedule(expectedTable, Optional.of(scheduledOp), Optional.empty())) .thenReturn(false); @@ -158,8 +160,8 @@ void analyze_skipsTable_whenTableUuidIsNull() { TableStatsRow statsEntity = TableStatsRow.builder().databaseName(DB).build(); when(statsRepo.find(DB, null, null)).thenReturn(List.of(statsEntity)); - when(operationsRepo.find(OFD, null, null, DB, null)).thenReturn(Collections.emptyList()); - when(historyRepo.findLatestPerTable(anyString())).thenReturn(Collections.emptyList()); + when(operationsRepo.find(OFD_DB, null, null, DB, null)).thenReturn(Collections.emptyList()); + when(historyRepo.findLatestPerTable(any())).thenReturn(Collections.emptyList()); runner.analyze(OFD_TYPE); diff --git a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzerTest.java b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzerTest.java index 30030a2fb..633c9dceb 100644 --- a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzerTest.java +++ b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzerTest.java @@ -2,10 +2,11 @@ import static org.assertj.core.api.Assertions.assertThat; -import com.linkedin.openhouse.optimizer.entity.TableOperationsHistoryRow; +import com.linkedin.openhouse.optimizer.model.HistoryStatus; import com.linkedin.openhouse.optimizer.model.OperationStatus; import com.linkedin.openhouse.optimizer.model.Table; import com.linkedin.openhouse.optimizer.model.TableOperation; +import com.linkedin.openhouse.optimizer.model.TableOperationsHistory; import java.time.Duration; import java.time.Instant; import java.util.Collections; @@ -67,7 +68,7 @@ void shouldSchedule_noOp_successHistoryAfterCooldown_returnsTrue() { analyzer.shouldSchedule( tableWithProperty("true"), Optional.empty(), - Optional.of(historyWithStatus("SUCCESS", longAgo)))) + Optional.of(historyWithStatus(HistoryStatus.SUCCESS, longAgo)))) .isTrue(); } @@ -78,7 +79,7 @@ void shouldSchedule_noOp_successHistoryBeforeCooldown_returnsFalse() { analyzer.shouldSchedule( tableWithProperty("true"), Optional.empty(), - Optional.of(historyWithStatus("SUCCESS", recent)))) + Optional.of(historyWithStatus(HistoryStatus.SUCCESS, recent)))) .isFalse(); } @@ -89,7 +90,7 @@ void shouldSchedule_noOp_failedHistoryAfterRetry_returnsTrue() { analyzer.shouldSchedule( tableWithProperty("true"), Optional.empty(), - Optional.of(historyWithStatus("FAILED", longAgo)))) + Optional.of(historyWithStatus(HistoryStatus.FAILED, longAgo)))) .isTrue(); } @@ -100,7 +101,7 @@ void shouldSchedule_noOp_failedHistoryBeforeRetry_returnsFalse() { analyzer.shouldSchedule( tableWithProperty("true"), Optional.empty(), - Optional.of(historyWithStatus("FAILED", recent)))) + Optional.of(historyWithStatus(HistoryStatus.FAILED, recent)))) .isFalse(); } @@ -133,7 +134,7 @@ void shouldSchedule_scheduled_returnsFalse_regardlessOfHistory() { analyzer.shouldSchedule( tableWithProperty("true"), Optional.of(opWithStatus(OperationStatus.SCHEDULED)), - Optional.of(historyWithStatus("SUCCESS", historyAt)))) + Optional.of(historyWithStatus(HistoryStatus.SUCCESS, historyAt)))) .isFalse(); } @@ -146,7 +147,7 @@ void shouldSchedule_canceled_successHistoryAfterCooldown_returnsTrue() { analyzer.shouldSchedule( tableWithProperty("true"), Optional.of(opWithStatus(OperationStatus.CANCELED)), - Optional.of(historyWithStatus("SUCCESS", longAgo)))) + Optional.of(historyWithStatus(HistoryStatus.SUCCESS, longAgo)))) .isTrue(); } @@ -157,7 +158,7 @@ void shouldSchedule_canceled_successHistoryBeforeCooldown_returnsFalse() { analyzer.shouldSchedule( tableWithProperty("true"), Optional.of(opWithStatus(OperationStatus.CANCELED)), - Optional.of(historyWithStatus("SUCCESS", recent)))) + Optional.of(historyWithStatus(HistoryStatus.SUCCESS, recent)))) .isFalse(); } @@ -190,11 +191,11 @@ private TableOperation opWithStatus(OperationStatus status) { return TableOperation.builder().status(status).build(); } - private TableOperationsHistoryRow historyWithStatus(String status, Instant completedAt) { - return TableOperationsHistoryRow.builder() + private TableOperationsHistory historyWithStatus(HistoryStatus status, Instant completedAt) { + return TableOperationsHistory.builder() .id("hist-id") .tableUuid("test-uuid") - .operationType("ORPHAN_FILES_DELETION") + .operationType(com.linkedin.openhouse.optimizer.model.OperationType.ORPHAN_FILES_DELETION) .completedAt(completedAt) .status(status) .build(); From 25d98aaacc7ffd4c506b1f43a896210725b83f9a Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 15:10:16 -0700 Subject: [PATCH 063/104] feat(optimizer): restore batch CAS methods on TableOperationsRepository MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The optimizer-1 db/ rewrite accidentally dropped the batch CAS helpers used by the scheduler. Restore them with db/-typed enum parameters and JPQL queries that compare against fully-qualified db.OperationStatus constants. - markSchedulingBatch(ids, scheduledAt): PENDING → SCHEDULING. - markScheduledBatch(ids, jobId): SCHEDULING → SCHEDULED. - markPendingBatch(ids): SCHEDULING → PENDING (job-launch failure retry). - cancelDuplicatePendingBatch(operationType, keepIds): drop dupe PENDING rows for an operation type, keeping the supplied IDs. --- .../repository/TableOperationsRepository.java | 57 +++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java index e9bc1c8b3..962a108a2 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java @@ -3,8 +3,10 @@ import com.linkedin.openhouse.optimizer.db.OperationStatus; import com.linkedin.openhouse.optimizer.db.OperationType; import com.linkedin.openhouse.optimizer.db.TableOperationsRow; +import java.time.Instant; import java.util.List; import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.data.jpa.repository.Modifying; import org.springframework.data.jpa.repository.Query; import org.springframework.data.repository.query.Param; @@ -28,4 +30,59 @@ List find( @Param("tableUuid") String tableUuid, @Param("databaseName") String databaseName, @Param("tableName") String tableName); + + /** + * Batch CAS: PENDING → SCHEDULING for every {@code id} still in PENDING. Returns the number of + * rows transitioned. Rows already claimed by another instance are skipped silently; callers must + * re-query if they need the precise list. + */ + @Modifying + @Query( + "UPDATE TableOperationsRow r " + + "SET r.status = com.linkedin.openhouse.optimizer.db.OperationStatus.SCHEDULING," + + " r.scheduledAt = :scheduledAt, r.version = r.version + 1 " + + "WHERE r.id IN :ids " + + "AND r.status = com.linkedin.openhouse.optimizer.db.OperationStatus.PENDING") + int markSchedulingBatch( + @Param("ids") List ids, @Param("scheduledAt") Instant scheduledAt); + + /** + * Batch CAS: SCHEDULING → SCHEDULED with the given {@code jobId} for every {@code id} still in + * SCHEDULING. Returns the number of rows transitioned. + */ + @Modifying + @Query( + "UPDATE TableOperationsRow r " + + "SET r.status = com.linkedin.openhouse.optimizer.db.OperationStatus.SCHEDULED," + + " r.jobId = :jobId, r.version = r.version + 1 " + + "WHERE r.id IN :ids " + + "AND r.status = com.linkedin.openhouse.optimizer.db.OperationStatus.SCHEDULING") + int markScheduledBatch(@Param("ids") List ids, @Param("jobId") String jobId); + + /** + * Batch transition: SCHEDULING → PENDING for every {@code id} still in SCHEDULING. Used by the + * scheduler to release claimed rows when job submission fails so the next pass can retry. Returns + * the number of rows reverted. + */ + @Modifying + @Query( + "UPDATE TableOperationsRow r " + + "SET r.status = com.linkedin.openhouse.optimizer.db.OperationStatus.PENDING," + + " r.scheduledAt = NULL, r.version = r.version + 1 " + + "WHERE r.id IN :ids " + + "AND r.status = com.linkedin.openhouse.optimizer.db.OperationStatus.SCHEDULING") + int markPendingBatch(@Param("ids") List ids); + + /** + * Batch-delete duplicate PENDING rows for the given operation type, keeping only the IDs in + * {@code keepIds}. Used by the scheduler to deduplicate before claiming. + */ + @Modifying + @Query( + "DELETE FROM TableOperationsRow r " + + "WHERE r.operationType = :operationType " + + "AND r.status = com.linkedin.openhouse.optimizer.db.OperationStatus.PENDING " + + "AND r.id NOT IN :keepIds") + int cancelDuplicatePendingBatch( + @Param("operationType") OperationType operationType, @Param("keepIds") List keepIds); } From 188713d7479b0d1c0425b9e753e0da2df25915b4 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 15:20:50 -0700 Subject: [PATCH 064/104] docs(optimizer): comment every field on opt-0 api/ and model/ types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Several fields under api/model/ and model/ were left undocumented in the earlier per-layer-types passes. Audit + fill them in: api/model/TableOperationsHistoryDto: databaseName, tableName, operationType — add display/role docs. api/model/HistoryStatus: SUCCESS, FAILED — add enum-value docs. api/model/TableStats inner classes: - SnapshotMetrics: clusterId, tableVersion, tableLocation, tableSizeBytes — add field docs. - CommitDelta: numFilesAdded, numFilesDeleted, addedSizeBytes, deletedSizeBytes — add field docs. model/Table: tableUuid, databaseName, tableId, tableProperties, stats — add field docs. model/TableStats: same field-doc additions on SnapshotMetrics and CommitDelta as the api/ counterpart. model/OperationStatus: PENDING, SCHEDULING, SCHEDULED, CANCELED — add enum-value docs. model/OperationType: ORPHAN_FILES_DELETION — add enum-value doc. model/HistoryStatus: SUCCESS, FAILED — add enum-value docs. model/TableStatsHistory: id, tableUuid, databaseName, tableName — add field docs. --- .../optimizer/api/model/HistoryStatus.java | 4 ++++ .../api/model/TableOperationsHistoryDto.java | 5 +++++ .../optimizer/api/model/TableStats.java | 17 +++++++++++++++++ .../optimizer/model/HistoryStatus.java | 4 ++++ .../optimizer/model/OperationStatus.java | 8 ++++++++ .../optimizer/model/OperationType.java | 2 ++ .../openhouse/optimizer/model/Table.java | 7 +++++++ .../openhouse/optimizer/model/TableStats.java | 17 +++++++++++++++++ .../optimizer/model/TableStatsHistory.java | 7 +++++++ 9 files changed, 71 insertions(+) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatus.java index 2fbcf6235..dc52f863e 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatus.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatus.java @@ -2,6 +2,10 @@ /** Terminal states for a completed Spark maintenance job. */ public enum HistoryStatus { + + /** The Spark job for this operation completed successfully. */ SUCCESS, + + /** The Spark job for this operation failed. */ FAILED } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java index d9fa1f387..4e247c7ce 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java @@ -19,8 +19,13 @@ public class TableOperationsHistoryDto { /** Stable table identity from the Tables Service. */ private String tableUuid; + /** Denormalized database name for display. */ private String databaseName; + + /** Denormalized table name for display. */ private String tableName; + + /** The type of maintenance operation this history row records. */ private OperationType operationType; /** When the operation completed, as recorded by the complete endpoint. */ diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java index de268ffe7..dcb360330 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java @@ -32,10 +32,19 @@ public class TableStats { @AllArgsConstructor @JsonIgnoreProperties(ignoreUnknown = true) public static class SnapshotMetrics { + + /** Cluster the table lives on. */ private String clusterId; + + /** Iceberg metadata version pointer for this snapshot. */ private String tableVersion; + + /** Filesystem path (or URI) of the table's storage root. */ private String tableLocation; + + /** Total on-disk size of the table at this snapshot, in bytes. */ private Long tableSizeBytes; + /** Total number of data files as of the latest snapshot — used for bin-packing. */ private Long numCurrentFiles; } @@ -47,9 +56,17 @@ public static class SnapshotMetrics { @AllArgsConstructor @JsonIgnoreProperties(ignoreUnknown = true) public static class CommitDelta { + + /** Number of data files this commit added to the table. */ private Long numFilesAdded; + + /** Number of data files this commit removed from the table. */ private Long numFilesDeleted; + + /** Total bytes added by this commit. */ private Long addedSizeBytes; + + /** Total bytes removed by this commit. */ private Long deletedSizeBytes; } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java index d29c88719..97b8e2992 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java @@ -8,6 +8,10 @@ *

Intentionally separate from the wire-API and DB representations. */ public enum HistoryStatus { + + /** The operation completed successfully. */ SUCCESS, + + /** The operation failed. */ FAILED } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java index 66f213c73..f284fedaf 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java @@ -8,8 +8,16 @@ *

Intentionally separate from the wire-API and DB representations. */ public enum OperationStatus { + + /** Analyzer has written the row; not yet claimed by the scheduler. */ PENDING, + + /** Scheduler has claimed the row and is launching a job; jobId not yet recorded. */ SCHEDULING, + + /** Job has been submitted to the Jobs Service; the row carries a {@code jobId}. */ SCHEDULED, + + /** Scheduler marked this row as a duplicate of another PENDING row; not claimable. */ CANCELED } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java index bea44018b..8f4fe35a8 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java @@ -6,5 +6,7 @@ * supported operations without churning either boundary. */ public enum OperationType { + + /** Removes orphaned data files no longer referenced by table metadata. */ ORPHAN_FILES_DELETION } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java index dc0a16a0c..bca7e2420 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java @@ -22,12 +22,19 @@ @AllArgsConstructor public class Table { + /** Stable table identity from the Tables Service. Survives renames; rotates on drop+recreate. */ private String tableUuid; + + /** Database the table lives in. */ private String databaseName; + + /** Iceberg table identifier (table name, not UUID). */ private String tableId; + /** Current table-property map (e.g. maintenance opt-in flags). Never null. */ @Builder.Default private Map tableProperties = Collections.emptyMap(); + /** Latest snapshot stats for this table. Delta is null when read from the current-state row. */ private TableStats stats; /** When the current snapshot was last written. Stamped server-side on every upsert. */ diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java index 3b56196ea..94d0a1655 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java @@ -27,10 +27,19 @@ public class TableStats { @AllArgsConstructor @JsonIgnoreProperties(ignoreUnknown = true) public static class SnapshotMetrics { + + /** Cluster the table lives on. */ private String clusterId; + + /** Iceberg metadata version pointer for this snapshot. */ private String tableVersion; + + /** Filesystem path (or URI) of the table's storage root. */ private String tableLocation; + + /** Total on-disk size of the table at this snapshot, in bytes. */ private Long tableSizeBytes; + /** Total number of data files as of the latest snapshot — used for bin-packing. */ private Long numCurrentFiles; } @@ -42,9 +51,17 @@ public static class SnapshotMetrics { @AllArgsConstructor @JsonIgnoreProperties(ignoreUnknown = true) public static class CommitDelta { + + /** Number of data files this commit added to the table. */ private Long numFilesAdded; + + /** Number of data files this commit removed from the table. */ private Long numFilesDeleted; + + /** Total bytes added by this commit. */ private Long addedSizeBytes; + + /** Total bytes removed by this commit. */ private Long deletedSizeBytes; } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsHistory.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsHistory.java index 5cdad1918..53bb54d1e 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsHistory.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsHistory.java @@ -20,9 +20,16 @@ @AllArgsConstructor public class TableStatsHistory { + /** UUID primary key — set by the caller, not generated server-side. */ private String id; + + /** Stable table identity from the Tables Service. */ private String tableUuid; + + /** Denormalized database name for display. */ private String databaseName; + + /** Denormalized table name for display. */ private String tableName; /** Snapshot + delta for this commit event. */ From 8d642732244b002f1f7926ae81e98b27f95b1881 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 15:28:57 -0700 Subject: [PATCH 065/104] refactor(optimizer): remove clusterId from SnapshotMetrics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit clusterId is per-table-immutable in OpenHouse — it never changes after the table is created — so persisting and transmitting it on every snapshot is dead weight. Remove from the wire and internal representations. - api/model/TableStats.SnapshotMetrics: drop clusterId. - model/TableStats.SnapshotMetrics: drop clusterId. - model/mapper/ApiModelMapper: drop the clusterId hop in toModelSnapshot and toApiSnapshot. --- .../com/linkedin/openhouse/optimizer/api/model/TableStats.java | 3 --- .../com/linkedin/openhouse/optimizer/model/TableStats.java | 3 --- .../openhouse/optimizer/model/mapper/ApiModelMapper.java | 2 -- 3 files changed, 8 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java index dcb360330..096eecd1e 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java @@ -33,9 +33,6 @@ public class TableStats { @JsonIgnoreProperties(ignoreUnknown = true) public static class SnapshotMetrics { - /** Cluster the table lives on. */ - private String clusterId; - /** Iceberg metadata version pointer for this snapshot. */ private String tableVersion; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java index 94d0a1655..56291e510 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java @@ -28,9 +28,6 @@ public class TableStats { @JsonIgnoreProperties(ignoreUnknown = true) public static class SnapshotMetrics { - /** Cluster the table lives on. */ - private String clusterId; - /** Iceberg metadata version pointer for this snapshot. */ private String tableVersion; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java index d77b3a253..31141ff44 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java @@ -215,7 +215,6 @@ private TableStats.SnapshotMetrics toModelSnapshot( return null; } return TableStats.SnapshotMetrics.builder() - .clusterId(apiValue.getClusterId()) .tableVersion(apiValue.getTableVersion()) .tableLocation(apiValue.getTableLocation()) .tableSizeBytes(apiValue.getTableSizeBytes()) @@ -229,7 +228,6 @@ private com.linkedin.openhouse.optimizer.api.model.TableStats.SnapshotMetrics to return null; } return com.linkedin.openhouse.optimizer.api.model.TableStats.SnapshotMetrics.builder() - .clusterId(modelValue.getClusterId()) .tableVersion(modelValue.getTableVersion()) .tableLocation(modelValue.getTableLocation()) .tableSizeBytes(modelValue.getTableSizeBytes()) From c1ad24615aaae6dd5c5012f2fdd591f8c0c01712 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 15:34:17 -0700 Subject: [PATCH 066/104] refactor(optimizer): comment every db/ field; drop clusterId and version MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two cleanups on the DB layer, plus a doc audit. clusterId removal: - db/SnapshotMetrics: drop clusterId. - model/mapper/ModelDbMapper: drop clusterId from toModelSnapshot and toDbSnapshot. - Repository tests: drop .clusterId("cl1") from builders. (The api/ and model/ copies were retired in the prior optimizer-0 commit; this completes the removal at the db edge.) version removal: - db/TableOperationsRow: drop the `version` field. The batch CAS pattern's atomicity comes from filtering on `status` (PENDING → SCHEDULING is unambiguous on status alone); the version bump was decorative. - table_operations schema: drop the `version BIGINT` column. - TableOperationsRepository: remove `r.version = r.version + 1` from markSchedulingBatch / markScheduledBatch / markPendingBatch query strings. - model/mapper/ModelDbMapper.toRow: stop initializing version on the row builder. Doc audit on db/: - db/SnapshotMetrics, db/CommitDeltaMetrics: doc every field. - db/HistoryStatus, db/OperationStatus, db/OperationType: doc every enum value. - db/TableOperationsRow, db/TableOperationsHistoryRow, db/TableStatsRow, db/TableStatsHistoryRow: doc every field. --- .../optimizer/db/CommitDeltaMetrics.java | 7 +++++++ .../openhouse/optimizer/db/HistoryStatus.java | 4 ++++ .../optimizer/db/OperationStatus.java | 8 ++++++++ .../openhouse/optimizer/db/OperationType.java | 2 ++ .../optimizer/db/SnapshotMetrics.java | 6 +++++- .../db/TableOperationsHistoryRow.java | 6 ++++++ .../optimizer/db/TableOperationsRow.java | 18 ++++++++---------- .../optimizer/db/TableStatsHistoryRow.java | 7 +++++++ .../openhouse/optimizer/db/TableStatsRow.java | 5 +++++ .../optimizer/model/mapper/ModelDbMapper.java | 3 --- .../repository/TableOperationsRepository.java | 6 +++--- .../src/main/resources/db/optimizer-schema.sql | 1 - .../TableStatsHistoryRepositoryTest.java | 2 +- .../repository/TableStatsRepositoryTest.java | 3 +-- 14 files changed, 57 insertions(+), 21 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/CommitDeltaMetrics.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/CommitDeltaMetrics.java index 8094d28b8..5a30c9afd 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/CommitDeltaMetrics.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/CommitDeltaMetrics.java @@ -14,8 +14,15 @@ @JsonIgnoreProperties(ignoreUnknown = true) public class CommitDeltaMetrics { + /** Number of data files this commit added to the table. */ private Long numFilesAdded; + + /** Number of data files this commit removed from the table. */ private Long numFilesDeleted; + + /** Total bytes added by this commit. */ private Long addedSizeBytes; + + /** Total bytes removed by this commit. */ private Long deletedSizeBytes; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/HistoryStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/HistoryStatus.java index 94e573968..3680735f4 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/HistoryStatus.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/HistoryStatus.java @@ -6,6 +6,10 @@ *

Self-contained: no references to api/ or model/ types. */ public enum HistoryStatus { + + /** The Spark job for this operation completed successfully. */ SUCCESS, + + /** The Spark job for this operation failed. */ FAILED } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/OperationStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/OperationStatus.java index 4e9161693..0a2e07483 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/OperationStatus.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/OperationStatus.java @@ -6,8 +6,16 @@ *

Self-contained: no references to api/ or model/ types. */ public enum OperationStatus { + + /** Analyzer has written the row; not yet claimed by the scheduler. */ PENDING, + + /** Scheduler has claimed the row and is launching a job; jobId not yet recorded. */ SCHEDULING, + + /** Job has been submitted to the Jobs Service; the row carries a {@code jobId}. */ SCHEDULED, + + /** Scheduler marked this row as a duplicate of another PENDING row; not claimable. */ CANCELED } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/OperationType.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/OperationType.java index 3a896e415..e4caf549b 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/OperationType.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/OperationType.java @@ -8,5 +8,7 @@ * {@code @Enumerated(EnumType.STRING)}. */ public enum OperationType { + + /** Removes orphaned data files no longer referenced by table metadata. */ ORPHAN_FILES_DELETION } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/SnapshotMetrics.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/SnapshotMetrics.java index 22d222172..452b35097 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/SnapshotMetrics.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/SnapshotMetrics.java @@ -14,9 +14,13 @@ @JsonIgnoreProperties(ignoreUnknown = true) public class SnapshotMetrics { - private String clusterId; + /** Iceberg metadata version pointer for this snapshot. */ private String tableVersion; + + /** Filesystem path (or URI) of the table's storage root. */ private String tableLocation; + + /** Total on-disk size of the table at this snapshot, in bytes. */ private Long tableSizeBytes; /** Total number of data files as of the latest snapshot — used for bin-packing. */ diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableOperationsHistoryRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableOperationsHistoryRow.java index 2e1230181..5f4a598d9 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableOperationsHistoryRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableOperationsHistoryRow.java @@ -47,22 +47,28 @@ public class TableOperationsHistoryRow { @Column(name = "id", nullable = false, length = 36) private String id; + /** Stable table identity from the Tables Service. */ @Column(name = "table_uuid", nullable = false, length = 36) private String tableUuid; + /** Denormalized database name. */ @Column(name = "database_name", nullable = false, length = 128) private String databaseName; + /** Denormalized table name. */ @Column(name = "table_name", nullable = false, length = 128) private String tableName; + /** The type of maintenance operation this history row records. */ @Enumerated(EnumType.STRING) @Column(name = "operation_type", nullable = false, length = 50) private OperationType operationType; + /** When the operation completed, as recorded by the complete endpoint. */ @Column(name = "completed_at", nullable = false) private Instant completedAt; + /** Terminal outcome: {@link HistoryStatus#SUCCESS} or {@link HistoryStatus#FAILED}. */ @Enumerated(EnumType.STRING) @Column(name = "status", nullable = false, length = 20) private HistoryStatus status; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableOperationsRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableOperationsRow.java index 9652214d3..dfe40d402 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableOperationsRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableOperationsRow.java @@ -44,44 +44,42 @@ @AllArgsConstructor(access = AccessLevel.PROTECTED) public class TableOperationsRow { + /** Client-generated UUID identifying this specific operation recommendation. */ @Id @Column(name = "id", nullable = false, length = 36) private String id; + /** Stable table identity from the Tables Service. Survives renames; rotates on drop+recreate. */ @Column(name = "table_uuid", nullable = false, length = 36) private String tableUuid; + /** Denormalized database name. */ @Column(name = "database_name", nullable = false, length = 128) private String databaseName; + /** Denormalized table name. */ @Column(name = "table_name", nullable = false, length = 128) private String tableName; + /** The type of maintenance operation this row recommends. */ @Enumerated(EnumType.STRING) @Column(name = "operation_type", nullable = false, length = 50) private OperationType operationType; + /** Lifecycle state — drives the scheduler's CAS claim and the analyzer's eligibility check. */ @Enumerated(EnumType.STRING) @Column(name = "status", nullable = false, length = 20) private OperationStatus status; + /** When the analyzer first created this row. Set on insert; never updated. */ @Column(name = "created_at", nullable = false) private Instant createdAt; + /** When the scheduler last submitted a job for this row. {@code null} while {@code PENDING}. */ @Column(name = "scheduled_at") private Instant scheduledAt; /** Spark job ID written by the scheduler at claim time. Internal-only; never exposed on wire. */ @Column(name = "job_id", length = 255) private String jobId; - - /** - * Monotonically-increasing version for application-level optimistic concurrency control. The - * scheduler's batch CAS transitions match this in the WHERE clause and bump it by one on UPDATE, - * ensuring two scheduler instances can't both move the same row out of PENDING. Not managed by - * JPA optimistic locking — kept as a plain column so the WHERE-clause-based CAS pattern works - * portably across MySQL and H2. - */ - @Column(name = "version") - private Long version; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsHistoryRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsHistoryRow.java index 71c17b582..4eaee2a6f 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsHistoryRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsHistoryRow.java @@ -41,27 +41,34 @@ @AllArgsConstructor(access = AccessLevel.PROTECTED) public class TableStatsHistoryRow { + /** UUID primary key — set by the caller, not generated server-side. */ @Id @Column(name = "id", nullable = false, length = 36) private String id; + /** Stable Iceberg table UUID. */ @Column(name = "table_uuid", nullable = false, length = 36) private String tableUuid; + /** Denormalized database name. */ @Column(name = "database_name", nullable = false, length = 128) private String databaseName; + /** Denormalized table name. */ @Column(name = "table_name", nullable = false, length = 128) private String tableName; + /** Snapshot fields at commit time. Stored as a JSON blob in the {@code snapshot} column. */ @Type(type = "json") @Column(name = "snapshot", columnDefinition = "TEXT") private SnapshotMetrics snapshot; + /** Per-commit delta counters. Stored as a JSON blob in the {@code delta} column. */ @Type(type = "json") @Column(name = "delta", columnDefinition = "TEXT") private CommitDeltaMetrics delta; + /** When this history row was recorded (commit time). */ @Column(name = "recorded_at", nullable = false) private Instant recordedAt; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsRow.java index 8d869ff1e..165247b6a 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableStatsRow.java @@ -35,20 +35,25 @@ @AllArgsConstructor(access = AccessLevel.PROTECTED) public class TableStatsRow { + /** Stable Iceberg table UUID. Primary key. */ @Id @Column(name = "table_uuid", nullable = false, length = 36) private String tableUuid; + /** Denormalized database name. */ @Column(name = "database_name", nullable = false, length = 128) private String databaseName; + /** Denormalized table name. */ @Column(name = "table_name", nullable = false, length = 128) private String tableName; + /** Latest snapshot fields. Stored as a JSON blob in the {@code snapshot} column. */ @Type(type = "json") @Column(name = "snapshot", columnDefinition = "TEXT") private SnapshotMetrics snapshot; + /** Current table-property map (e.g. maintenance opt-in flags). Stored as JSON. */ @Type(type = "json") @Column(name = "table_properties", columnDefinition = "TEXT") private Map tableProperties; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ModelDbMapper.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ModelDbMapper.java index 7a454c78c..59d7e8680 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ModelDbMapper.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ModelDbMapper.java @@ -62,7 +62,6 @@ public TableOperationsRow toRow(TableOperation op) { .status(toDbOperationStatus(op.getStatus())) .createdAt(op.getCreatedAt()) .scheduledAt(op.getScheduledAt()) - .version(0L) .build(); } @@ -208,7 +207,6 @@ private TableStats.SnapshotMetrics toModelSnapshot(SnapshotMetrics v) { return null; } return TableStats.SnapshotMetrics.builder() - .clusterId(v.getClusterId()) .tableVersion(v.getTableVersion()) .tableLocation(v.getTableLocation()) .tableSizeBytes(v.getTableSizeBytes()) @@ -221,7 +219,6 @@ private SnapshotMetrics toDbSnapshot(TableStats.SnapshotMetrics v) { return null; } return SnapshotMetrics.builder() - .clusterId(v.getClusterId()) .tableVersion(v.getTableVersion()) .tableLocation(v.getTableLocation()) .tableSizeBytes(v.getTableSizeBytes()) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java index 962a108a2..8baddfe42 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java @@ -40,7 +40,7 @@ List find( @Query( "UPDATE TableOperationsRow r " + "SET r.status = com.linkedin.openhouse.optimizer.db.OperationStatus.SCHEDULING," - + " r.scheduledAt = :scheduledAt, r.version = r.version + 1 " + + " r.scheduledAt = :scheduledAt " + "WHERE r.id IN :ids " + "AND r.status = com.linkedin.openhouse.optimizer.db.OperationStatus.PENDING") int markSchedulingBatch( @@ -54,7 +54,7 @@ int markSchedulingBatch( @Query( "UPDATE TableOperationsRow r " + "SET r.status = com.linkedin.openhouse.optimizer.db.OperationStatus.SCHEDULED," - + " r.jobId = :jobId, r.version = r.version + 1 " + + " r.jobId = :jobId " + "WHERE r.id IN :ids " + "AND r.status = com.linkedin.openhouse.optimizer.db.OperationStatus.SCHEDULING") int markScheduledBatch(@Param("ids") List ids, @Param("jobId") String jobId); @@ -68,7 +68,7 @@ int markSchedulingBatch( @Query( "UPDATE TableOperationsRow r " + "SET r.status = com.linkedin.openhouse.optimizer.db.OperationStatus.PENDING," - + " r.scheduledAt = NULL, r.version = r.version + 1 " + + " r.scheduledAt = NULL " + "WHERE r.id IN :ids " + "AND r.status = com.linkedin.openhouse.optimizer.db.OperationStatus.SCHEDULING") int markPendingBatch(@Param("ids") List ids); diff --git a/services/optimizer/src/main/resources/db/optimizer-schema.sql b/services/optimizer/src/main/resources/db/optimizer-schema.sql index 24b367549..892c1c55f 100644 --- a/services/optimizer/src/main/resources/db/optimizer-schema.sql +++ b/services/optimizer/src/main/resources/db/optimizer-schema.sql @@ -10,7 +10,6 @@ CREATE TABLE IF NOT EXISTS table_operations ( created_at TIMESTAMP(6) NOT NULL, scheduled_at TIMESTAMP(6), job_id VARCHAR(255), - version BIGINT, -- TODO: per-operation metric columns will be added as operations are onboarded. PRIMARY KEY (id) ); diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java index dbd8cc686..536b72e35 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java @@ -132,7 +132,7 @@ private static TableStatsHistoryRow buildRow( .tableUuid(tableUuid) .databaseName(databaseName) .tableName(tableName) - .snapshot(SnapshotMetrics.builder().clusterId("cl1").tableSizeBytes(1024L).build()) + .snapshot(SnapshotMetrics.builder().tableSizeBytes(1024L).build()) .delta( CommitDeltaMetrics.builder() .numFilesAdded(numFilesAdded) diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java index 493eb88b6..f9cc28d57 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java @@ -24,8 +24,7 @@ class TableStatsRepositoryTest { @Test void saveAndFindById() { String tableUuid = UUID.randomUUID().toString(); - SnapshotMetrics snapshot = - SnapshotMetrics.builder().clusterId("cl1").tableSizeBytes(1024L).build(); + SnapshotMetrics snapshot = SnapshotMetrics.builder().tableSizeBytes(1024L).build(); repository.save( TableStatsRow.builder() From c72aae8ed9e324591b88cf54f993400370f087b3 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 15:58:31 -0700 Subject: [PATCH 067/104] =?UTF-8?q?refactor(optimizer):=20move=20api?= =?UTF-8?q?=E2=86=94model=20conversion=20onto=20api=20types;=20delete=20Ap?= =?UTF-8?q?iModelMapper?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the api/model boundary mapper with conversion methods on the types themselves. The api layer now imports model/ directly via to/from methods — controllers and other api-edge callers no longer inject a mapper bean. The dependency direction is a strict downward chain: api → model → db api types know about model types (and call model methods); model types know about db types (next round). db remains import-free. No central mapper, no risk of a cycle through a hub class. api/model/* changes (each gets a `toModel()` instance method + a static `fromModel(...)` factory): - TableOperationsDto ↔ model.TableOperation. - TableOperationsHistoryDto ↔ model.TableOperationsHistory. - TableStatsDto ↔ model.Table. - TableStatsHistoryDto ↔ model.TableStatsHistory. - UpsertTableStatsRequest → model.Table (one-way; takes the path-var tableUuid; updatedAt is server-stamped). - TableStats (+ SnapshotMetrics + CommitDelta inner) ↔ model.TableStats. - OperationType / OperationStatus / HistoryStatus (api enums) ↔ model enums. CompleteOperationRequest keeps its fields plain — callers extract `operationId` and `status.toModel()` directly; no wrapper needed. Delete services/optimizer/.../model/mapper/ApiModelMapper.java. --- .../optimizer/api/model/HistoryStatus.java | 12 +- .../optimizer/api/model/OperationStatus.java | 13 +- .../optimizer/api/model/OperationType.java | 12 +- .../api/model/TableOperationsDto.java | 32 +++ .../api/model/TableOperationsHistoryDto.java | 30 ++ .../optimizer/api/model/TableStats.java | 67 +++++ .../optimizer/api/model/TableStatsDto.java | 29 ++ .../api/model/TableStatsHistoryDto.java | 28 ++ .../api/model/UpsertTableStatsRequest.java | 17 ++ .../model/mapper/ApiModelMapper.java | 263 ------------------ 10 files changed, 237 insertions(+), 266 deletions(-) delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatus.java index dc52f863e..0c9ff95da 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatus.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatus.java @@ -7,5 +7,15 @@ public enum HistoryStatus { SUCCESS, /** The Spark job for this operation failed. */ - FAILED + FAILED; + + /** Convert to the internal-model counterpart. */ + public com.linkedin.openhouse.optimizer.model.HistoryStatus toModel() { + return com.linkedin.openhouse.optimizer.model.HistoryStatus.valueOf(name()); + } + + /** Build the api-layer enum from the internal-model counterpart. */ + public static HistoryStatus fromModel(com.linkedin.openhouse.optimizer.model.HistoryStatus v) { + return v == null ? null : HistoryStatus.valueOf(v.name()); + } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationStatus.java index c97be441b..300c28263 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationStatus.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationStatus.java @@ -17,5 +17,16 @@ public enum OperationStatus { * operation_type)}. Only the most-recent PENDING row is claimed; older duplicates are CANCELED * before the claim step. */ - CANCELED + CANCELED; + + /** Convert to the internal-model counterpart. */ + public com.linkedin.openhouse.optimizer.model.OperationStatus toModel() { + return com.linkedin.openhouse.optimizer.model.OperationStatus.valueOf(name()); + } + + /** Build the api-layer enum from the internal-model counterpart. */ + public static OperationStatus fromModel( + com.linkedin.openhouse.optimizer.model.OperationStatus v) { + return v == null ? null : OperationStatus.valueOf(v.name()); + } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationType.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationType.java index 8507bae12..5f325e712 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationType.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationType.java @@ -3,5 +3,15 @@ /** Maintenance operation types supported by the continuous optimizer. */ public enum OperationType { /** Removes orphaned data files no longer referenced by table metadata. */ - ORPHAN_FILES_DELETION + ORPHAN_FILES_DELETION; + + /** Convert to the internal-model counterpart. */ + public com.linkedin.openhouse.optimizer.model.OperationType toModel() { + return com.linkedin.openhouse.optimizer.model.OperationType.valueOf(name()); + } + + /** Build the api-layer enum from the internal-model counterpart. */ + public static OperationType fromModel(com.linkedin.openhouse.optimizer.model.OperationType v) { + return v == null ? null : OperationType.valueOf(v.name()); + } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java index d41bd6906..db8ef1039 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java @@ -1,5 +1,6 @@ package com.linkedin.openhouse.optimizer.api.model; +import com.linkedin.openhouse.optimizer.model.TableOperation; import java.time.Instant; import lombok.AllArgsConstructor; import lombok.Builder; @@ -39,4 +40,35 @@ public class TableOperationsDto { /** Job ID returned by the Jobs Service after successful submission. */ private String jobId; + + /** Convert to the internal-model counterpart. */ + public TableOperation toModel() { + return TableOperation.builder() + .id(id) + .tableUuid(tableUuid) + .databaseName(databaseName) + .tableName(tableName) + .operationType(operationType == null ? null : operationType.toModel()) + .status(status == null ? null : status.toModel()) + .createdAt(createdAt) + .scheduledAt(scheduledAt) + .build(); + } + + /** Build a wire DTO from the internal-model counterpart. */ + public static TableOperationsDto fromModel(TableOperation op) { + if (op == null) { + return null; + } + return TableOperationsDto.builder() + .id(op.getId()) + .tableUuid(op.getTableUuid()) + .databaseName(op.getDatabaseName()) + .tableName(op.getTableName()) + .operationType(OperationType.fromModel(op.getOperationType())) + .status(OperationStatus.fromModel(op.getStatus())) + .createdAt(op.getCreatedAt()) + .scheduledAt(op.getScheduledAt()) + .build(); + } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java index 4e247c7ce..935435040 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java @@ -1,5 +1,6 @@ package com.linkedin.openhouse.optimizer.api.model; +import com.linkedin.openhouse.optimizer.model.TableOperationsHistory; import java.time.Instant; import lombok.AllArgsConstructor; import lombok.Builder; @@ -33,4 +34,33 @@ public class TableOperationsHistoryDto { /** {@code SUCCESS} or {@code FAILED}. */ private HistoryStatus status; + + /** Convert to the internal-model counterpart. */ + public TableOperationsHistory toModel() { + return TableOperationsHistory.builder() + .id(id) + .tableUuid(tableUuid) + .databaseName(databaseName) + .tableName(tableName) + .operationType(operationType == null ? null : operationType.toModel()) + .completedAt(completedAt) + .status(status == null ? null : status.toModel()) + .build(); + } + + /** Build a wire DTO from the internal-model counterpart. */ + public static TableOperationsHistoryDto fromModel(TableOperationsHistory h) { + if (h == null) { + return null; + } + return TableOperationsHistoryDto.builder() + .id(h.getId()) + .tableUuid(h.getTableUuid()) + .databaseName(h.getDatabaseName()) + .tableName(h.getTableName()) + .operationType(OperationType.fromModel(h.getOperationType())) + .completedAt(h.getCompletedAt()) + .status(HistoryStatus.fromModel(h.getStatus())) + .build(); + } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java index 096eecd1e..c75d21d75 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java @@ -25,6 +25,25 @@ public class TableStats { /** Delta fields — accumulated across commit events. */ private CommitDelta delta; + /** Convert to the internal-model counterpart. */ + public com.linkedin.openhouse.optimizer.model.TableStats toModel() { + return com.linkedin.openhouse.optimizer.model.TableStats.builder() + .snapshot(snapshot == null ? null : snapshot.toModel()) + .delta(delta == null ? null : delta.toModel()) + .build(); + } + + /** Build the api-layer payload from the internal-model counterpart. */ + public static TableStats fromModel(com.linkedin.openhouse.optimizer.model.TableStats m) { + if (m == null) { + return null; + } + return TableStats.builder() + .snapshot(SnapshotMetrics.fromModel(m.getSnapshot())) + .delta(CommitDelta.fromModel(m.getDelta())) + .build(); + } + /** Point-in-time metadata read from Iceberg at scan time. */ @Data @Builder(toBuilder = true) @@ -44,6 +63,30 @@ public static class SnapshotMetrics { /** Total number of data files as of the latest snapshot — used for bin-packing. */ private Long numCurrentFiles; + + /** Convert to the internal-model counterpart. */ + public com.linkedin.openhouse.optimizer.model.TableStats.SnapshotMetrics toModel() { + return com.linkedin.openhouse.optimizer.model.TableStats.SnapshotMetrics.builder() + .tableVersion(tableVersion) + .tableLocation(tableLocation) + .tableSizeBytes(tableSizeBytes) + .numCurrentFiles(numCurrentFiles) + .build(); + } + + /** Build the api-layer inner object from the internal-model counterpart. */ + public static SnapshotMetrics fromModel( + com.linkedin.openhouse.optimizer.model.TableStats.SnapshotMetrics m) { + if (m == null) { + return null; + } + return SnapshotMetrics.builder() + .tableVersion(m.getTableVersion()) + .tableLocation(m.getTableLocation()) + .tableSizeBytes(m.getTableSizeBytes()) + .numCurrentFiles(m.getNumCurrentFiles()) + .build(); + } } /** Per-commit incremental counters; accumulated across all recorded commit events. */ @@ -65,5 +108,29 @@ public static class CommitDelta { /** Total bytes removed by this commit. */ private Long deletedSizeBytes; + + /** Convert to the internal-model counterpart. */ + public com.linkedin.openhouse.optimizer.model.TableStats.CommitDelta toModel() { + return com.linkedin.openhouse.optimizer.model.TableStats.CommitDelta.builder() + .numFilesAdded(numFilesAdded) + .numFilesDeleted(numFilesDeleted) + .addedSizeBytes(addedSizeBytes) + .deletedSizeBytes(deletedSizeBytes) + .build(); + } + + /** Build the api-layer inner object from the internal-model counterpart. */ + public static CommitDelta fromModel( + com.linkedin.openhouse.optimizer.model.TableStats.CommitDelta m) { + if (m == null) { + return null; + } + return CommitDelta.builder() + .numFilesAdded(m.getNumFilesAdded()) + .numFilesDeleted(m.getNumFilesDeleted()) + .addedSizeBytes(m.getAddedSizeBytes()) + .deletedSizeBytes(m.getDeletedSizeBytes()) + .build(); + } } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java index 81dd6b802..82dc552c2 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java @@ -1,6 +1,8 @@ package com.linkedin.openhouse.optimizer.api.model; +import com.linkedin.openhouse.optimizer.model.Table; import java.time.Instant; +import java.util.Collections; import java.util.Map; import lombok.AllArgsConstructor; import lombok.Builder; @@ -31,4 +33,31 @@ public class TableStatsDto { /** When this row was last written. Used for staleness monitoring. */ private Instant updatedAt; + + /** Convert to the internal-model counterpart. */ + public Table toModel() { + return Table.builder() + .tableUuid(tableUuid) + .databaseName(databaseName) + .tableId(tableName) + .tableProperties(tableProperties != null ? tableProperties : Collections.emptyMap()) + .stats(stats == null ? null : stats.toModel()) + .updatedAt(updatedAt) + .build(); + } + + /** Build a wire DTO from the internal-model counterpart. */ + public static TableStatsDto fromModel(Table t) { + if (t == null) { + return null; + } + return TableStatsDto.builder() + .tableUuid(t.getTableUuid()) + .databaseName(t.getDatabaseName()) + .tableName(t.getTableId()) + .stats(TableStats.fromModel(t.getStats())) + .tableProperties(t.getTableProperties()) + .updatedAt(t.getUpdatedAt()) + .build(); + } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java index 4a994fdb3..b5f971bbf 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java @@ -1,5 +1,6 @@ package com.linkedin.openhouse.optimizer.api.model; +import com.linkedin.openhouse.optimizer.model.TableStatsHistory; import java.time.Instant; import lombok.AllArgsConstructor; import lombok.Builder; @@ -30,4 +31,31 @@ public class TableStatsHistoryDto { /** When this history row was recorded. */ private Instant recordedAt; + + /** Convert to the internal-model counterpart. */ + public TableStatsHistory toModel() { + return TableStatsHistory.builder() + .id(id) + .tableUuid(tableUuid) + .databaseName(databaseName) + .tableName(tableName) + .stats(stats == null ? null : stats.toModel()) + .recordedAt(recordedAt) + .build(); + } + + /** Build a wire DTO from the internal-model counterpart. */ + public static TableStatsHistoryDto fromModel(TableStatsHistory h) { + if (h == null) { + return null; + } + return TableStatsHistoryDto.builder() + .id(h.getId()) + .tableUuid(h.getTableUuid()) + .databaseName(h.getDatabaseName()) + .tableName(h.getTableName()) + .stats(TableStats.fromModel(h.getStats())) + .recordedAt(h.getRecordedAt()) + .build(); + } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java index 02290bad5..13476543f 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java @@ -1,5 +1,7 @@ package com.linkedin.openhouse.optimizer.api.model; +import com.linkedin.openhouse.optimizer.model.Table; +import java.util.Collections; import java.util.Map; import lombok.AllArgsConstructor; import lombok.Builder; @@ -29,4 +31,19 @@ public class UpsertTableStatsRequest { /** Current table properties snapshot (e.g. maintenance opt-in flags). */ private Map tableProperties; + + /** + * Build the internal-model {@link Table} described by this request. {@code tableUuid} comes from + * the URL path, not the body. {@link Table#getUpdatedAt()} is left {@code null}; the service + * stamps it server-side at write time. + */ + public Table toModel(String tableUuid) { + return Table.builder() + .tableUuid(tableUuid) + .databaseName(databaseName) + .tableId(tableName) + .tableProperties(tableProperties != null ? tableProperties : Collections.emptyMap()) + .stats(stats == null ? null : stats.toModel()) + .build(); + } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java deleted file mode 100644 index 31141ff44..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ApiModelMapper.java +++ /dev/null @@ -1,263 +0,0 @@ -package com.linkedin.openhouse.optimizer.model.mapper; - -import com.linkedin.openhouse.optimizer.api.model.TableOperationsDto; -import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; -import com.linkedin.openhouse.optimizer.api.model.TableStatsDto; -import com.linkedin.openhouse.optimizer.api.model.TableStatsHistoryDto; -import com.linkedin.openhouse.optimizer.api.model.UpsertTableStatsRequest; -import com.linkedin.openhouse.optimizer.model.HistoryStatus; -import com.linkedin.openhouse.optimizer.model.OperationStatus; -import com.linkedin.openhouse.optimizer.model.OperationType; -import com.linkedin.openhouse.optimizer.model.Table; -import com.linkedin.openhouse.optimizer.model.TableOperation; -import com.linkedin.openhouse.optimizer.model.TableOperationsHistory; -import com.linkedin.openhouse.optimizer.model.TableStats; -import com.linkedin.openhouse.optimizer.model.TableStatsHistory; -import java.util.Collections; -import org.springframework.stereotype.Component; - -/** - * Converts between wire-API DTOs and internal {@code model/} domain objects. - * - *

The only place inside {@code model/} where {@code api/} types are referenced — this is the - * boundary at which the internal model meets the wire-API. Pure data types under {@code model/} - * stay free of any api-side imports. - * - *

API-layer enums + payloads are intentionally separate Java types from the internal-model - * counterparts; the two sides evolve independently. This mapper translates by name. - */ -@Component -public class ApiModelMapper { - - // --- TableOperationsDto <-> TableOperation --- - - public TableOperation toOperation(TableOperationsDto dto) { - if (dto == null) { - return null; - } - return TableOperation.builder() - .id(dto.getId()) - .tableUuid(dto.getTableUuid()) - .databaseName(dto.getDatabaseName()) - .tableName(dto.getTableName()) - .operationType(toModelOperationType(dto.getOperationType())) - .status(toModelOperationStatus(dto.getStatus())) - .createdAt(dto.getCreatedAt()) - .scheduledAt(dto.getScheduledAt()) - .build(); - } - - public TableOperationsDto toDto(TableOperation op) { - if (op == null) { - return null; - } - return TableOperationsDto.builder() - .id(op.getId()) - .tableUuid(op.getTableUuid()) - .databaseName(op.getDatabaseName()) - .tableName(op.getTableName()) - .operationType(toApiOperationType(op.getOperationType())) - .status(toApiOperationStatus(op.getStatus())) - .createdAt(op.getCreatedAt()) - .scheduledAt(op.getScheduledAt()) - .build(); - } - - // --- TableOperationsHistoryDto <-> TableOperationsHistory --- - - public TableOperationsHistory toHistory(TableOperationsHistoryDto dto) { - if (dto == null) { - return null; - } - return TableOperationsHistory.builder() - .id(dto.getId()) - .tableUuid(dto.getTableUuid()) - .databaseName(dto.getDatabaseName()) - .tableName(dto.getTableName()) - .operationType(toModelOperationType(dto.getOperationType())) - .completedAt(dto.getCompletedAt()) - .status(toModelHistoryStatus(dto.getStatus())) - .build(); - } - - public TableOperationsHistoryDto toDto(TableOperationsHistory history) { - if (history == null) { - return null; - } - return TableOperationsHistoryDto.builder() - .id(history.getId()) - .tableUuid(history.getTableUuid()) - .databaseName(history.getDatabaseName()) - .tableName(history.getTableName()) - .operationType(toApiOperationType(history.getOperationType())) - .completedAt(history.getCompletedAt()) - .status(toApiHistoryStatus(history.getStatus())) - .build(); - } - - // --- Table <-> TableStatsDto / UpsertTableStatsRequest --- - - /** - * Build an internal-model {@link Table} from a wire upsert request. {@link Table#getUpdatedAt()} - * is intentionally left null — the service stamps it server-side at write time. - */ - public Table toTable(String tableUuid, UpsertTableStatsRequest request) { - if (request == null) { - return null; - } - return Table.builder() - .tableUuid(tableUuid) - .databaseName(request.getDatabaseName()) - .tableId(request.getTableName()) - .tableProperties( - request.getTableProperties() != null - ? request.getTableProperties() - : Collections.emptyMap()) - .stats(toModelStats(request.getStats())) - .build(); - } - - public TableStatsDto toDto(Table table) { - if (table == null) { - return null; - } - return TableStatsDto.builder() - .tableUuid(table.getTableUuid()) - .databaseName(table.getDatabaseName()) - .tableName(table.getTableId()) - .stats(toApiStats(table.getStats())) - .tableProperties(table.getTableProperties()) - .updatedAt(table.getUpdatedAt()) - .build(); - } - - // --- TableStatsHistory <-> TableStatsHistoryDto --- - - public TableStatsHistoryDto toDto(TableStatsHistory history) { - if (history == null) { - return null; - } - return TableStatsHistoryDto.builder() - .id(history.getId()) - .tableUuid(history.getTableUuid()) - .databaseName(history.getDatabaseName()) - .tableName(history.getTableName()) - .stats(toApiStats(history.getStats())) - .recordedAt(history.getRecordedAt()) - .build(); - } - - // --- TableStats payload --- - - public TableStats toModelStats(com.linkedin.openhouse.optimizer.api.model.TableStats apiStats) { - if (apiStats == null) { - return null; - } - return TableStats.builder() - .snapshot(toModelSnapshot(apiStats.getSnapshot())) - .delta(toModelDelta(apiStats.getDelta())) - .build(); - } - - public com.linkedin.openhouse.optimizer.api.model.TableStats toApiStats(TableStats modelStats) { - if (modelStats == null) { - return null; - } - return com.linkedin.openhouse.optimizer.api.model.TableStats.builder() - .snapshot(toApiSnapshot(modelStats.getSnapshot())) - .delta(toApiDelta(modelStats.getDelta())) - .build(); - } - - // --- enum helpers --- - - public OperationType toModelOperationType( - com.linkedin.openhouse.optimizer.api.model.OperationType apiValue) { - return apiValue == null ? null : OperationType.valueOf(apiValue.name()); - } - - public com.linkedin.openhouse.optimizer.api.model.OperationType toApiOperationType( - OperationType modelValue) { - return modelValue == null - ? null - : com.linkedin.openhouse.optimizer.api.model.OperationType.valueOf(modelValue.name()); - } - - public OperationStatus toModelOperationStatus( - com.linkedin.openhouse.optimizer.api.model.OperationStatus apiValue) { - return apiValue == null ? null : OperationStatus.valueOf(apiValue.name()); - } - - public com.linkedin.openhouse.optimizer.api.model.OperationStatus toApiOperationStatus( - OperationStatus modelValue) { - return modelValue == null - ? null - : com.linkedin.openhouse.optimizer.api.model.OperationStatus.valueOf(modelValue.name()); - } - - public HistoryStatus toModelHistoryStatus( - com.linkedin.openhouse.optimizer.api.model.HistoryStatus apiValue) { - return apiValue == null ? null : HistoryStatus.valueOf(apiValue.name()); - } - - public com.linkedin.openhouse.optimizer.api.model.HistoryStatus toApiHistoryStatus( - HistoryStatus modelValue) { - return modelValue == null - ? null - : com.linkedin.openhouse.optimizer.api.model.HistoryStatus.valueOf(modelValue.name()); - } - - // --- TableStats inner classes --- - - private TableStats.SnapshotMetrics toModelSnapshot( - com.linkedin.openhouse.optimizer.api.model.TableStats.SnapshotMetrics apiValue) { - if (apiValue == null) { - return null; - } - return TableStats.SnapshotMetrics.builder() - .tableVersion(apiValue.getTableVersion()) - .tableLocation(apiValue.getTableLocation()) - .tableSizeBytes(apiValue.getTableSizeBytes()) - .numCurrentFiles(apiValue.getNumCurrentFiles()) - .build(); - } - - private com.linkedin.openhouse.optimizer.api.model.TableStats.SnapshotMetrics toApiSnapshot( - TableStats.SnapshotMetrics modelValue) { - if (modelValue == null) { - return null; - } - return com.linkedin.openhouse.optimizer.api.model.TableStats.SnapshotMetrics.builder() - .tableVersion(modelValue.getTableVersion()) - .tableLocation(modelValue.getTableLocation()) - .tableSizeBytes(modelValue.getTableSizeBytes()) - .numCurrentFiles(modelValue.getNumCurrentFiles()) - .build(); - } - - private TableStats.CommitDelta toModelDelta( - com.linkedin.openhouse.optimizer.api.model.TableStats.CommitDelta apiValue) { - if (apiValue == null) { - return null; - } - return TableStats.CommitDelta.builder() - .numFilesAdded(apiValue.getNumFilesAdded()) - .numFilesDeleted(apiValue.getNumFilesDeleted()) - .addedSizeBytes(apiValue.getAddedSizeBytes()) - .deletedSizeBytes(apiValue.getDeletedSizeBytes()) - .build(); - } - - private com.linkedin.openhouse.optimizer.api.model.TableStats.CommitDelta toApiDelta( - TableStats.CommitDelta modelValue) { - if (modelValue == null) { - return null; - } - return com.linkedin.openhouse.optimizer.api.model.TableStats.CommitDelta.builder() - .numFilesAdded(modelValue.getNumFilesAdded()) - .numFilesDeleted(modelValue.getNumFilesDeleted()) - .addedSizeBytes(modelValue.getAddedSizeBytes()) - .deletedSizeBytes(modelValue.getDeletedSizeBytes()) - .build(); - } -} From 8ae8777422a940e3b730ede226f8801db5618619 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 16:02:28 -0700 Subject: [PATCH 068/104] =?UTF-8?q?refactor(optimizer):=20move=20model?= =?UTF-8?q?=E2=86=94db=20conversion=20onto=20model=20types;=20delete=20Mod?= =?UTF-8?q?elDbMapper?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the model/db boundary mapper with conversion methods on the model types themselves. Same pattern that opt-0 just applied at the api↔model boundary — each layer's type carries the to/from methods for the layer below. The dependency chain after this commit: api → model → db api/* → model/* (added on opt-0). model/* → db/* (this commit). db/* still imports nothing — bottom of the chain. model/* changes (each gets a `toRow()` instance method + a static `fromRow(...)` factory): - Table ↔ db.TableStatsRow (current-state row; snapshot only, delta lives on history rows). - TableOperation ↔ db.TableOperationsRow. - TableOperationsHistory ↔ db.TableOperationsHistoryRow. - TableStatsHistory ↔ db.TableStatsHistoryRow (joins/splits the snapshot + delta columns). - TableStats inner: SnapshotMetrics ↔ db.SnapshotMetrics, CommitDelta ↔ db.CommitDeltaMetrics. TableStats itself exposes toSnapshotRow() / toDeltaRow() for the split-write side and a static fromRows(snapshot, delta) for the join-read side. - OperationType / OperationStatus / HistoryStatus (model enums) ↔ db enums. Delete services/optimizer/.../model/mapper/ModelDbMapper.java. --- .../optimizer/model/HistoryStatus.java | 12 +- .../optimizer/model/OperationStatus.java | 12 +- .../optimizer/model/OperationType.java | 12 +- .../openhouse/optimizer/model/Table.java | 37 ++- .../optimizer/model/TableOperation.java | 37 ++- .../model/TableOperationsHistory.java | 30 +++ .../openhouse/optimizer/model/TableStats.java | 71 +++++ .../optimizer/model/TableStatsHistory.java | 31 ++- .../optimizer/model/mapper/ModelDbMapper.java | 252 ------------------ 9 files changed, 232 insertions(+), 262 deletions(-) delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ModelDbMapper.java diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java index 97b8e2992..e6321873d 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java @@ -13,5 +13,15 @@ public enum HistoryStatus { SUCCESS, /** The operation failed. */ - FAILED + FAILED; + + /** Convert to the DB-layer counterpart. */ + public com.linkedin.openhouse.optimizer.db.HistoryStatus toDb() { + return com.linkedin.openhouse.optimizer.db.HistoryStatus.valueOf(name()); + } + + /** Build the internal-model enum from the DB-layer counterpart. */ + public static HistoryStatus fromDb(com.linkedin.openhouse.optimizer.db.HistoryStatus v) { + return v == null ? null : HistoryStatus.valueOf(v.name()); + } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java index f284fedaf..137d97902 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java @@ -19,5 +19,15 @@ public enum OperationStatus { SCHEDULED, /** Scheduler marked this row as a duplicate of another PENDING row; not claimable. */ - CANCELED + CANCELED; + + /** Convert to the DB-layer counterpart. */ + public com.linkedin.openhouse.optimizer.db.OperationStatus toDb() { + return com.linkedin.openhouse.optimizer.db.OperationStatus.valueOf(name()); + } + + /** Build the internal-model enum from the DB-layer counterpart. */ + public static OperationStatus fromDb(com.linkedin.openhouse.optimizer.db.OperationStatus v) { + return v == null ? null : OperationStatus.valueOf(v.name()); + } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java index 8f4fe35a8..13c7e9c61 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java @@ -8,5 +8,15 @@ public enum OperationType { /** Removes orphaned data files no longer referenced by table metadata. */ - ORPHAN_FILES_DELETION + ORPHAN_FILES_DELETION; + + /** Convert to the DB-layer counterpart. */ + public com.linkedin.openhouse.optimizer.db.OperationType toDb() { + return com.linkedin.openhouse.optimizer.db.OperationType.valueOf(name()); + } + + /** Build the internal-model enum from the DB-layer counterpart. */ + public static OperationType fromDb(com.linkedin.openhouse.optimizer.db.OperationType v) { + return v == null ? null : OperationType.valueOf(v.name()); + } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java index bca7e2420..659dd18da 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java @@ -1,5 +1,6 @@ package com.linkedin.openhouse.optimizer.model; +import com.linkedin.openhouse.optimizer.db.TableStatsRow; import java.time.Instant; import java.util.Collections; import java.util.Map; @@ -13,8 +14,8 @@ * by the analyzer (decides whether to produce a {@link TableOperation}) and the scheduler (reads * stats for bin-packing). * - *

Pure internal-model type — no references to wire-API or DB types. Construct via {@link - * com.linkedin.openhouse.optimizer.model.mapper.ModelDbMapper#toTable} at the DB boundary. + *

Conversion methods cross into the DB layer one-way; the inverse lives on the api side. db/ + * types know nothing about model/ or api/. */ @Data @Builder @@ -39,4 +40,36 @@ public class Table { /** When the current snapshot was last written. Stamped server-side on every upsert. */ private Instant updatedAt; + + /** + * Project to the current-state DB row. {@code table_stats} carries the snapshot only — per-commit + * deltas live on {@code table_stats_history} (see {@link TableStatsHistory#toRow()}). + */ + public TableStatsRow toRow() { + return TableStatsRow.builder() + .tableUuid(tableUuid) + .databaseName(databaseName) + .tableName(tableId) + .snapshot(stats == null ? null : stats.toSnapshotRow()) + .tableProperties(tableProperties) + .updatedAt(updatedAt) + .build(); + } + + /** Build a {@link Table} from a current-state DB row. */ + public static Table fromRow(TableStatsRow row) { + if (row == null) { + return null; + } + return Table.builder() + .tableUuid(row.getTableUuid()) + .databaseName(row.getDatabaseName()) + .tableId(row.getTableName()) + .tableProperties( + row.getTableProperties() != null ? row.getTableProperties() : Collections.emptyMap()) + // table_stats holds only the snapshot — deltas live on the history table. + .stats(TableStats.fromRows(row.getSnapshot(), null)) + .updatedAt(row.getUpdatedAt()) + .build(); + } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java index 1f14dddff..81f97f1de 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java @@ -1,5 +1,6 @@ package com.linkedin.openhouse.optimizer.model; +import com.linkedin.openhouse.optimizer.db.TableOperationsRow; import java.time.Instant; import java.util.Comparator; import java.util.UUID; @@ -12,9 +13,8 @@ * An operation the analyzer has decided to schedule for a table, and that the scheduler later picks * up and submits. * - *

Pure internal-model type — no references to wire-API or DB types. Cross-layer construction - * happens via {@link com.linkedin.openhouse.optimizer.model.mapper.ModelDbMapper} (DB boundary) or - * {@link com.linkedin.openhouse.optimizer.model.mapper.ApiModelMapper} (API boundary). + *

Conversion methods cross into the DB layer one-way; the inverse lives on the api side. db/ + * types know nothing about model/ or api/. * *

{@link #fileCount} is a non-persisted enrichment populated by consumers that need it (e.g., * the OFD scheduler reads it from {@code table_stats} for bin-packing). The DB column does not @@ -75,4 +75,35 @@ public static TableOperation mostRecent(TableOperation a, TableOperation b) { Comparator.comparing(r -> r.getCreatedAt() != null ? r.getCreatedAt() : Instant.EPOCH); return byCreatedAt.compare(a, b) >= 0 ? a : b; } + + /** Convert to the corresponding DB row. */ + public TableOperationsRow toRow() { + return TableOperationsRow.builder() + .id(id) + .tableUuid(tableUuid) + .databaseName(databaseName) + .tableName(tableName) + .operationType(operationType == null ? null : operationType.toDb()) + .status(status == null ? null : status.toDb()) + .createdAt(createdAt) + .scheduledAt(scheduledAt) + .build(); + } + + /** Build a {@link TableOperation} from a DB row. */ + public static TableOperation fromRow(TableOperationsRow row) { + if (row == null) { + return null; + } + return TableOperation.builder() + .id(row.getId()) + .tableUuid(row.getTableUuid()) + .databaseName(row.getDatabaseName()) + .tableName(row.getTableName()) + .operationType(OperationType.fromDb(row.getOperationType())) + .status(OperationStatus.fromDb(row.getStatus())) + .createdAt(row.getCreatedAt()) + .scheduledAt(row.getScheduledAt()) + .build(); + } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java index fe5bee5f7..42a48479a 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java @@ -1,5 +1,6 @@ package com.linkedin.openhouse.optimizer.model; +import com.linkedin.openhouse.optimizer.db.TableOperationsHistoryRow; import java.time.Instant; import lombok.AllArgsConstructor; import lombok.Builder; @@ -38,4 +39,33 @@ public class TableOperationsHistory { /** Terminal outcome: {@link HistoryStatus#SUCCESS} or {@link HistoryStatus#FAILED}. */ private HistoryStatus status; + + /** Convert to the corresponding DB row. */ + public TableOperationsHistoryRow toRow() { + return TableOperationsHistoryRow.builder() + .id(id) + .tableUuid(tableUuid) + .databaseName(databaseName) + .tableName(tableName) + .operationType(operationType == null ? null : operationType.toDb()) + .completedAt(completedAt) + .status(status == null ? null : status.toDb()) + .build(); + } + + /** Build a {@link TableOperationsHistory} from a DB row. */ + public static TableOperationsHistory fromRow(TableOperationsHistoryRow row) { + if (row == null) { + return null; + } + return TableOperationsHistory.builder() + .id(row.getId()) + .tableUuid(row.getTableUuid()) + .databaseName(row.getDatabaseName()) + .tableName(row.getTableName()) + .operationType(OperationType.fromDb(row.getOperationType())) + .completedAt(row.getCompletedAt()) + .status(HistoryStatus.fromDb(row.getStatus())) + .build(); + } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java index 56291e510..212390af9 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java @@ -20,6 +20,31 @@ public class TableStats { /** Delta fields — accumulated across commit events. */ private CommitDelta delta; + /** Project to the DB-layer {@link com.linkedin.openhouse.optimizer.db.SnapshotMetrics} object. */ + public com.linkedin.openhouse.optimizer.db.SnapshotMetrics toSnapshotRow() { + return snapshot == null ? null : snapshot.toDb(); + } + + /** + * Project to the DB-layer {@link com.linkedin.openhouse.optimizer.db.CommitDeltaMetrics} object. + */ + public com.linkedin.openhouse.optimizer.db.CommitDeltaMetrics toDeltaRow() { + return delta == null ? null : delta.toDb(); + } + + /** Join the two DB-side columns back into a single internal-model {@link TableStats}. */ + public static TableStats fromRows( + com.linkedin.openhouse.optimizer.db.SnapshotMetrics dbSnapshot, + com.linkedin.openhouse.optimizer.db.CommitDeltaMetrics dbDelta) { + if (dbSnapshot == null && dbDelta == null) { + return null; + } + return TableStats.builder() + .snapshot(SnapshotMetrics.fromDb(dbSnapshot)) + .delta(CommitDelta.fromDb(dbDelta)) + .build(); + } + /** Point-in-time metadata read from Iceberg at scan time. */ @Data @Builder(toBuilder = true) @@ -39,6 +64,29 @@ public static class SnapshotMetrics { /** Total number of data files as of the latest snapshot — used for bin-packing. */ private Long numCurrentFiles; + + /** Convert to the DB-layer counterpart. */ + public com.linkedin.openhouse.optimizer.db.SnapshotMetrics toDb() { + return com.linkedin.openhouse.optimizer.db.SnapshotMetrics.builder() + .tableVersion(tableVersion) + .tableLocation(tableLocation) + .tableSizeBytes(tableSizeBytes) + .numCurrentFiles(numCurrentFiles) + .build(); + } + + /** Build the internal-model inner object from the DB-layer counterpart. */ + public static SnapshotMetrics fromDb(com.linkedin.openhouse.optimizer.db.SnapshotMetrics v) { + if (v == null) { + return null; + } + return SnapshotMetrics.builder() + .tableVersion(v.getTableVersion()) + .tableLocation(v.getTableLocation()) + .tableSizeBytes(v.getTableSizeBytes()) + .numCurrentFiles(v.getNumCurrentFiles()) + .build(); + } } /** Per-commit incremental counters; accumulated across all recorded commit events. */ @@ -60,5 +108,28 @@ public static class CommitDelta { /** Total bytes removed by this commit. */ private Long deletedSizeBytes; + + /** Convert to the DB-layer counterpart. */ + public com.linkedin.openhouse.optimizer.db.CommitDeltaMetrics toDb() { + return com.linkedin.openhouse.optimizer.db.CommitDeltaMetrics.builder() + .numFilesAdded(numFilesAdded) + .numFilesDeleted(numFilesDeleted) + .addedSizeBytes(addedSizeBytes) + .deletedSizeBytes(deletedSizeBytes) + .build(); + } + + /** Build the internal-model inner object from the DB-layer counterpart. */ + public static CommitDelta fromDb(com.linkedin.openhouse.optimizer.db.CommitDeltaMetrics v) { + if (v == null) { + return null; + } + return CommitDelta.builder() + .numFilesAdded(v.getNumFilesAdded()) + .numFilesDeleted(v.getNumFilesDeleted()) + .addedSizeBytes(v.getAddedSizeBytes()) + .deletedSizeBytes(v.getDeletedSizeBytes()) + .build(); + } } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsHistory.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsHistory.java index 53bb54d1e..f7f111151 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsHistory.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsHistory.java @@ -1,5 +1,6 @@ package com.linkedin.openhouse.optimizer.model; +import com.linkedin.openhouse.optimizer.db.TableStatsHistoryRow; import java.time.Instant; import lombok.AllArgsConstructor; import lombok.Builder; @@ -11,8 +12,6 @@ * *

One per Iceberg commit. {@link #stats} carries both the snapshot at commit time and the commit * delta — consumers can reconstruct change rates over arbitrary time windows. - * - *

Pure internal-model type — no references to wire-API or DB types. */ @Data @Builder @@ -37,4 +36,32 @@ public class TableStatsHistory { /** When this history row was recorded. */ private Instant recordedAt; + + /** Convert to the corresponding DB row. */ + public TableStatsHistoryRow toRow() { + return TableStatsHistoryRow.builder() + .id(id) + .tableUuid(tableUuid) + .databaseName(databaseName) + .tableName(tableName) + .snapshot(stats == null ? null : stats.toSnapshotRow()) + .delta(stats == null ? null : stats.toDeltaRow()) + .recordedAt(recordedAt) + .build(); + } + + /** Build a {@link TableStatsHistory} from a DB row. */ + public static TableStatsHistory fromRow(TableStatsHistoryRow row) { + if (row == null) { + return null; + } + return TableStatsHistory.builder() + .id(row.getId()) + .tableUuid(row.getTableUuid()) + .databaseName(row.getDatabaseName()) + .tableName(row.getTableName()) + .stats(TableStats.fromRows(row.getSnapshot(), row.getDelta())) + .recordedAt(row.getRecordedAt()) + .build(); + } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ModelDbMapper.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ModelDbMapper.java deleted file mode 100644 index 59d7e8680..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/mapper/ModelDbMapper.java +++ /dev/null @@ -1,252 +0,0 @@ -package com.linkedin.openhouse.optimizer.model.mapper; - -import com.linkedin.openhouse.optimizer.db.CommitDeltaMetrics; -import com.linkedin.openhouse.optimizer.db.SnapshotMetrics; -import com.linkedin.openhouse.optimizer.db.TableOperationsHistoryRow; -import com.linkedin.openhouse.optimizer.db.TableOperationsRow; -import com.linkedin.openhouse.optimizer.db.TableStatsHistoryRow; -import com.linkedin.openhouse.optimizer.db.TableStatsRow; -import com.linkedin.openhouse.optimizer.model.HistoryStatus; -import com.linkedin.openhouse.optimizer.model.OperationStatus; -import com.linkedin.openhouse.optimizer.model.OperationType; -import com.linkedin.openhouse.optimizer.model.Table; -import com.linkedin.openhouse.optimizer.model.TableOperation; -import com.linkedin.openhouse.optimizer.model.TableOperationsHistory; -import com.linkedin.openhouse.optimizer.model.TableStats; -import com.linkedin.openhouse.optimizer.model.TableStatsHistory; -import java.util.Collections; -import org.springframework.stereotype.Component; - -/** - * Converts between internal {@code model/} domain objects and database row entities. - * - *

The only place inside {@code model/} where {@code db/} types are referenced — this is the - * boundary at which the internal model meets the database layer. Pure data types under {@code - * model/} stay free of any DB-side imports. - * - *

Each layer carries its own per-layer enum + payload types. The DB layer flattens the wire-side - * {@code TableStats} envelope into two separate columns ({@code snapshot} and {@code delta}); this - * mapper joins / splits them at the boundary. - */ -@Component -public class ModelDbMapper { - - // --- TableOperationsRow <-> TableOperation --- - - public TableOperation toOperation(TableOperationsRow row) { - if (row == null) { - return null; - } - return TableOperation.builder() - .id(row.getId()) - .tableUuid(row.getTableUuid()) - .databaseName(row.getDatabaseName()) - .tableName(row.getTableName()) - .operationType(toModelOperationType(row.getOperationType())) - .status(toModelOperationStatus(row.getStatus())) - .createdAt(row.getCreatedAt()) - .scheduledAt(row.getScheduledAt()) - .build(); - } - - public TableOperationsRow toRow(TableOperation op) { - if (op == null) { - return null; - } - return TableOperationsRow.builder() - .id(op.getId()) - .tableUuid(op.getTableUuid()) - .databaseName(op.getDatabaseName()) - .tableName(op.getTableName()) - .operationType(toDbOperationType(op.getOperationType())) - .status(toDbOperationStatus(op.getStatus())) - .createdAt(op.getCreatedAt()) - .scheduledAt(op.getScheduledAt()) - .build(); - } - - // --- TableOperationsHistoryRow <-> TableOperationsHistory --- - - public TableOperationsHistory toHistory(TableOperationsHistoryRow row) { - if (row == null) { - return null; - } - return TableOperationsHistory.builder() - .id(row.getId()) - .tableUuid(row.getTableUuid()) - .databaseName(row.getDatabaseName()) - .tableName(row.getTableName()) - .operationType(toModelOperationType(row.getOperationType())) - .completedAt(row.getCompletedAt()) - .status(toModelHistoryStatus(row.getStatus())) - .build(); - } - - public TableOperationsHistoryRow toRow(TableOperationsHistory history) { - if (history == null) { - return null; - } - return TableOperationsHistoryRow.builder() - .id(history.getId()) - .tableUuid(history.getTableUuid()) - .databaseName(history.getDatabaseName()) - .tableName(history.getTableName()) - .operationType(toDbOperationType(history.getOperationType())) - .completedAt(history.getCompletedAt()) - .status(toDbHistoryStatus(history.getStatus())) - .build(); - } - - // --- TableStatsRow -> Table --- - - public Table toTable(TableStatsRow row) { - if (row == null) { - return null; - } - return Table.builder() - .tableUuid(row.getTableUuid()) - .databaseName(row.getDatabaseName()) - .tableId(row.getTableName()) - .tableProperties( - row.getTableProperties() != null ? row.getTableProperties() : Collections.emptyMap()) - // table_stats holds only the snapshot — deltas live on the history table. - .stats(joinStats(row.getSnapshot(), null)) - .updatedAt(row.getUpdatedAt()) - .build(); - } - - // --- TableStatsHistoryRow -> TableStatsHistory --- - - public TableStatsHistory toStatsHistory(TableStatsHistoryRow row) { - if (row == null) { - return null; - } - return TableStatsHistory.builder() - .id(row.getId()) - .tableUuid(row.getTableUuid()) - .databaseName(row.getDatabaseName()) - .tableName(row.getTableName()) - .stats(joinStats(row.getSnapshot(), row.getDelta())) - .recordedAt(row.getRecordedAt()) - .build(); - } - - // --- TableStats payload <-> (snapshot, delta) --- - - /** Join the two DB-side columns into a single internal-model {@link TableStats}. */ - public TableStats joinStats(SnapshotMetrics dbSnapshot, CommitDeltaMetrics dbDelta) { - if (dbSnapshot == null && dbDelta == null) { - return null; - } - return TableStats.builder() - .snapshot(toModelSnapshot(dbSnapshot)) - .delta(toModelDelta(dbDelta)) - .build(); - } - - /** Project the internal-model {@link TableStats#getSnapshot()} side. */ - public SnapshotMetrics toDbSnapshot(TableStats modelStats) { - return modelStats == null ? null : toDbSnapshot(modelStats.getSnapshot()); - } - - /** Project the internal-model {@link TableStats#getDelta()} side. */ - public CommitDeltaMetrics toDbDelta(TableStats modelStats) { - return modelStats == null ? null : toDbDelta(modelStats.getDelta()); - } - - public TableStatsHistoryRow toStatsHistoryRow( - String id, - String tableUuid, - String databaseName, - String tableName, - TableStats stats, - java.time.Instant recordedAt) { - return TableStatsHistoryRow.builder() - .id(id) - .tableUuid(tableUuid) - .databaseName(databaseName) - .tableName(tableName) - .snapshot(toDbSnapshot(stats)) - .delta(toDbDelta(stats)) - .recordedAt(recordedAt) - .build(); - } - - // --- enum helpers --- - - public OperationType toModelOperationType(com.linkedin.openhouse.optimizer.db.OperationType v) { - return v == null ? null : OperationType.valueOf(v.name()); - } - - public com.linkedin.openhouse.optimizer.db.OperationType toDbOperationType(OperationType v) { - return v == null ? null : com.linkedin.openhouse.optimizer.db.OperationType.valueOf(v.name()); - } - - public OperationStatus toModelOperationStatus( - com.linkedin.openhouse.optimizer.db.OperationStatus v) { - return v == null ? null : OperationStatus.valueOf(v.name()); - } - - public com.linkedin.openhouse.optimizer.db.OperationStatus toDbOperationStatus( - OperationStatus v) { - return v == null ? null : com.linkedin.openhouse.optimizer.db.OperationStatus.valueOf(v.name()); - } - - public HistoryStatus toModelHistoryStatus(com.linkedin.openhouse.optimizer.db.HistoryStatus v) { - return v == null ? null : HistoryStatus.valueOf(v.name()); - } - - public com.linkedin.openhouse.optimizer.db.HistoryStatus toDbHistoryStatus(HistoryStatus v) { - return v == null ? null : com.linkedin.openhouse.optimizer.db.HistoryStatus.valueOf(v.name()); - } - - // --- inner-payload field copies --- - - private TableStats.SnapshotMetrics toModelSnapshot(SnapshotMetrics v) { - if (v == null) { - return null; - } - return TableStats.SnapshotMetrics.builder() - .tableVersion(v.getTableVersion()) - .tableLocation(v.getTableLocation()) - .tableSizeBytes(v.getTableSizeBytes()) - .numCurrentFiles(v.getNumCurrentFiles()) - .build(); - } - - private SnapshotMetrics toDbSnapshot(TableStats.SnapshotMetrics v) { - if (v == null) { - return null; - } - return SnapshotMetrics.builder() - .tableVersion(v.getTableVersion()) - .tableLocation(v.getTableLocation()) - .tableSizeBytes(v.getTableSizeBytes()) - .numCurrentFiles(v.getNumCurrentFiles()) - .build(); - } - - private TableStats.CommitDelta toModelDelta(CommitDeltaMetrics v) { - if (v == null) { - return null; - } - return TableStats.CommitDelta.builder() - .numFilesAdded(v.getNumFilesAdded()) - .numFilesDeleted(v.getNumFilesDeleted()) - .addedSizeBytes(v.getAddedSizeBytes()) - .deletedSizeBytes(v.getDeletedSizeBytes()) - .build(); - } - - private CommitDeltaMetrics toDbDelta(TableStats.CommitDelta v) { - if (v == null) { - return null; - } - return CommitDeltaMetrics.builder() - .numFilesAdded(v.getNumFilesAdded()) - .numFilesDeleted(v.getNumFilesDeleted()) - .addedSizeBytes(v.getAddedSizeBytes()) - .deletedSizeBytes(v.getDeletedSizeBytes()) - .build(); - } -} From bb8aa4d2c4685caa141f830ffa165b8f0ab75a26 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 16:07:15 -0700 Subject: [PATCH 069/104] refactor(optimizer): service + controllers use type to/from methods MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The api↔model and model↔db boundaries no longer go through injected mapper beans. Switch every call site to the to/from methods that now live on the types themselves. OptimizerDataServiceImpl: - Drop the ModelDbMapper field. No DI at all (only repositories). - Row → model via TableOperation::fromRow, TableOperationsHistory::fromRow, Table::fromRow, TableStatsHistory::fromRow. - Model → row via instance methods: history.toRow(), table.toBuilder() ...build().toRow(), and TableStats stats.toSnapshotRow() / .toDeltaRow(). - Enum filters on list() use OperationType::toDb / OperationStatus::toDb method references. Controllers (TableOperationsController, TableOperationsHistoryController, TableStatsController): - Drop the ApiModelMapper field. - api → model on the way in: dto.toModel(), request.toModel(uuid), request.getStatus().toModel(), apiEnum.toModel(). - model → api on the way out: Dto.fromModel(modelObj). --- .../controller/TableOperationsController.java | 18 +++-- .../TableOperationsHistoryController.java | 6 +- .../api/controller/TableStatsController.java | 10 +-- .../openhouse/optimizer/model/Table.java | 2 +- .../model/TableOperationsHistory.java | 2 +- .../service/OptimizerDataServiceImpl.java | 81 ++++++++----------- 6 files changed, 51 insertions(+), 68 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java index 2c2483c1b..19e878910 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java @@ -5,7 +5,6 @@ import com.linkedin.openhouse.optimizer.api.model.OperationType; import com.linkedin.openhouse.optimizer.api.model.TableOperationsDto; import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; -import com.linkedin.openhouse.optimizer.model.mapper.ApiModelMapper; import com.linkedin.openhouse.optimizer.service.OptimizerDataService; import java.util.List; import java.util.Optional; @@ -28,7 +27,6 @@ public class TableOperationsController { private final OptimizerDataService service; - private final ApiModelMapper apiMapper; /** * Report that an operation has completed. The body carries the {@code operationId} the caller is @@ -41,8 +39,12 @@ public ResponseEntity completeOperation( @RequestBody CompleteOperationRequest request) { return service .completeOperation( - request.getOperationId(), apiMapper.toModelHistoryStatus(request.getStatus())) - .map(history -> ResponseEntity.status(HttpStatus.CREATED).body(apiMapper.toDto(history))) + request.getOperationId(), + request.getStatus() == null ? null : request.getStatus().toModel()) + .map( + history -> + ResponseEntity.status(HttpStatus.CREATED) + .body(TableOperationsHistoryDto.fromModel(history))) .orElse(ResponseEntity.notFound().build()); } @@ -51,7 +53,7 @@ public ResponseEntity completeOperation( public ResponseEntity getTableOperation(@PathVariable String id) { return service .getTableOperation(id) - .map(apiMapper::toDto) + .map(TableOperationsDto::fromModel) .map(ResponseEntity::ok) .orElse(ResponseEntity.notFound().build()); } @@ -70,13 +72,13 @@ public ResponseEntity> listTableOperations( List result = service .listTableOperations( - Optional.ofNullable(operationType).map(apiMapper::toModelOperationType), - Optional.ofNullable(status).map(apiMapper::toModelOperationStatus), + Optional.ofNullable(operationType).map(OperationType::toModel), + Optional.ofNullable(status).map(OperationStatus::toModel), Optional.ofNullable(databaseName), Optional.ofNullable(tableName), Optional.ofNullable(tableUuid)) .stream() - .map(apiMapper::toDto) + .map(TableOperationsDto::fromModel) .collect(Collectors.toList()); return ResponseEntity.ok(result); } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java index df7cabeff..0c6f4834c 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java @@ -1,7 +1,6 @@ package com.linkedin.openhouse.optimizer.api.controller; import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; -import com.linkedin.openhouse.optimizer.model.mapper.ApiModelMapper; import com.linkedin.openhouse.optimizer.service.OptimizerDataService; import java.util.List; import java.util.stream.Collectors; @@ -23,14 +22,13 @@ public class TableOperationsHistoryController { private final OptimizerDataService service; - private final ApiModelMapper apiMapper; /** Append a completed-job result. Called by the SparkJob after each run (success or failure). */ @PostMapping public ResponseEntity appendHistory( @RequestBody TableOperationsHistoryDto dto) { return ResponseEntity.status(HttpStatus.CREATED) - .body(apiMapper.toDto(service.appendHistory(apiMapper.toHistory(dto)))); + .body(TableOperationsHistoryDto.fromModel(service.appendHistory(dto.toModel()))); } /** Return the most recent history for a table, newest first, up to {@code limit} rows. */ @@ -39,7 +37,7 @@ public ResponseEntity> getHistory( @PathVariable String tableUuid, @RequestParam(defaultValue = "100") int limit) { List result = service.getHistory(tableUuid, limit).stream() - .map(apiMapper::toDto) + .map(TableOperationsHistoryDto::fromModel) .collect(Collectors.toList()); return ResponseEntity.ok(result); } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java index 2b738a6c3..aa299b015 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java @@ -3,7 +3,6 @@ import com.linkedin.openhouse.optimizer.api.model.TableStatsDto; import com.linkedin.openhouse.optimizer.api.model.TableStatsHistoryDto; import com.linkedin.openhouse.optimizer.api.model.UpsertTableStatsRequest; -import com.linkedin.openhouse.optimizer.model.mapper.ApiModelMapper; import com.linkedin.openhouse.optimizer.service.OptimizerDataService; import java.time.Instant; import java.util.List; @@ -26,7 +25,6 @@ public class TableStatsController { private final OptimizerDataService service; - private final ApiModelMapper apiMapper; /** * Create or overwrite the stats row for {@code tableUuid}. Called by the Tables Service on every @@ -36,7 +34,7 @@ public class TableStatsController { public ResponseEntity upsertTableStats( @PathVariable String tableUuid, @RequestBody UpsertTableStatsRequest request) { return ResponseEntity.ok( - apiMapper.toDto(service.upsertTableStats(apiMapper.toTable(tableUuid, request)))); + TableStatsDto.fromModel(service.upsertTableStats(request.toModel(tableUuid)))); } /** Fetch the stats row for {@code tableUuid}. Returns 404 if no stats have been written yet. */ @@ -44,7 +42,7 @@ public ResponseEntity upsertTableStats( public ResponseEntity getTableStats(@PathVariable String tableUuid) { return service .getTableStats(tableUuid) - .map(apiMapper::toDto) + .map(TableStatsDto::fromModel) .map(ResponseEntity::ok) .orElse(ResponseEntity.notFound().build()); } @@ -65,7 +63,7 @@ public ResponseEntity> listTableStats( Optional.ofNullable(tableName), Optional.ofNullable(tableUuid)) .stream() - .map(apiMapper::toDto) + .map(TableStatsDto::fromModel) .collect(Collectors.toList()); return ResponseEntity.ok(result); } @@ -81,7 +79,7 @@ public ResponseEntity> getStatsHistory( @RequestParam(defaultValue = "100") int limit) { List result = service.getStatsHistory(tableUuid, Optional.ofNullable(since), limit).stream() - .map(apiMapper::toDto) + .map(TableStatsHistoryDto::fromModel) .collect(Collectors.toList()); return ResponseEntity.ok(result); } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java index 659dd18da..149128f44 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java @@ -18,7 +18,7 @@ * types know nothing about model/ or api/. */ @Data -@Builder +@Builder(toBuilder = true) @NoArgsConstructor @AllArgsConstructor public class Table { diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java index 42a48479a..8cbfb6ff7 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java @@ -14,7 +14,7 @@ * components that need to reason about completed operations (e.g., scheduling-cadence analyzers). */ @Data -@Builder +@Builder(toBuilder = true) @NoArgsConstructor @AllArgsConstructor public class TableOperationsHistory { diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java index 47143118c..87f300192 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java @@ -1,6 +1,5 @@ package com.linkedin.openhouse.optimizer.service; -import com.linkedin.openhouse.optimizer.db.TableOperationsHistoryRow; import com.linkedin.openhouse.optimizer.db.TableStatsHistoryRow; import com.linkedin.openhouse.optimizer.db.TableStatsRow; import com.linkedin.openhouse.optimizer.model.HistoryStatus; @@ -9,8 +8,8 @@ import com.linkedin.openhouse.optimizer.model.Table; import com.linkedin.openhouse.optimizer.model.TableOperation; import com.linkedin.openhouse.optimizer.model.TableOperationsHistory; +import com.linkedin.openhouse.optimizer.model.TableStats; import com.linkedin.openhouse.optimizer.model.TableStatsHistory; -import com.linkedin.openhouse.optimizer.model.mapper.ModelDbMapper; import com.linkedin.openhouse.optimizer.repository.TableOperationsHistoryRepository; import com.linkedin.openhouse.optimizer.repository.TableOperationsRepository; import com.linkedin.openhouse.optimizer.repository.TableStatsHistoryRepository; @@ -28,8 +27,9 @@ /** * Implementation of {@link OptimizerDataService}. * - *

Operates purely on model/ and db/ types. The model↔db boundary is the {@link ModelDbMapper}. - * No api/-package types appear in this class. + *

Operates purely on model/ and db/ types. Conversion happens via the {@code toRow()} / {@code + * fromRow(...)} methods on the model types themselves — no injected mapper. No api/-package types + * appear in this class. */ @Service @RequiredArgsConstructor @@ -39,7 +39,6 @@ public class OptimizerDataServiceImpl implements OptimizerDataService { private final TableOperationsHistoryRepository historyRepository; private final TableStatsRepository statsRepository; private final TableStatsHistoryRepository statsHistoryRepository; - private final ModelDbMapper dbMapper; // --- TableOperations --- @@ -52,13 +51,13 @@ public List listTableOperations( Optional tableUuid) { return operationsRepository .find( - operationType.map(dbMapper::toDbOperationType).orElse(null), - status.map(dbMapper::toDbOperationStatus).orElse(null), + operationType.map(OperationType::toDb).orElse(null), + status.map(OperationStatus::toDb).orElse(null), tableUuid.orElse(null), databaseName.orElse(null), tableName.orElse(null)) .stream() - .map(dbMapper::toOperation) + .map(TableOperation::fromRow) .collect(Collectors.toList()); } @@ -69,24 +68,22 @@ public Optional completeOperation( return operationsRepository .findById(operationId) .map( - row -> { - TableOperationsHistoryRow historyRow = - TableOperationsHistoryRow.builder() - .id(row.getId()) - .tableUuid(row.getTableUuid()) - .databaseName(row.getDatabaseName()) - .tableName(row.getTableName()) - .operationType(row.getOperationType()) - .completedAt(Instant.now()) - .status(dbMapper.toDbHistoryStatus(status)) - .build(); - return dbMapper.toHistory(historyRepository.save(historyRow)); - }); + row -> + TableOperationsHistory.builder() + .id(row.getId()) + .tableUuid(row.getTableUuid()) + .databaseName(row.getDatabaseName()) + .tableName(row.getTableName()) + .operationType(OperationType.fromDb(row.getOperationType())) + .completedAt(Instant.now()) + .status(status) + .build()) + .map(history -> TableOperationsHistory.fromRow(historyRepository.save(history.toRow()))); } @Override public Optional getTableOperation(String id) { - return operationsRepository.findById(id).map(dbMapper::toOperation); + return operationsRepository.findById(id).map(TableOperation::fromRow); } // --- TableStats --- @@ -96,6 +93,7 @@ public Optional getTableOperation(String id) { public Table upsertTableStats(Table table) { Instant now = Instant.now(); String tableUuid = table.getTableUuid(); + TableStats stats = table.getStats(); TableStatsRow row = statsRepository @@ -106,19 +104,11 @@ public Table upsertTableStats(Table table) { .toBuilder() .databaseName(table.getDatabaseName()) .tableName(table.getTableId()) - .snapshot(dbMapper.toDbSnapshot(table.getStats())) + .snapshot(stats == null ? null : stats.toSnapshotRow()) .tableProperties(table.getTableProperties()) .updatedAt(now) .build()) - .orElse( - TableStatsRow.builder() - .tableUuid(tableUuid) - .databaseName(table.getDatabaseName()) - .tableName(table.getTableId()) - .snapshot(dbMapper.toDbSnapshot(table.getStats())) - .tableProperties(table.getTableProperties()) - .updatedAt(now) - .build()); + .orElse(table.toBuilder().updatedAt(now).build().toRow()); TableStatsRow saved = statsRepository.save(row); statsHistoryRepository.save( @@ -127,17 +117,17 @@ public Table upsertTableStats(Table table) { .tableUuid(tableUuid) .databaseName(table.getDatabaseName()) .tableName(table.getTableId()) - .snapshot(dbMapper.toDbSnapshot(table.getStats())) - .delta(dbMapper.toDbDelta(table.getStats())) + .snapshot(stats == null ? null : stats.toSnapshotRow()) + .delta(stats == null ? null : stats.toDeltaRow()) .recordedAt(now) .build()); - return dbMapper.toTable(saved); + return Table.fromRow(saved); } @Override public Optional

getTableStats(String tableUuid) { - return statsRepository.findById(tableUuid).map(dbMapper::toTable); + return statsRepository.findById(tableUuid).map(Table::fromRow); } @Override @@ -145,7 +135,7 @@ public List
listTableStats( Optional databaseName, Optional tableName, Optional tableUuid) { return statsRepository .find(databaseName.orElse(null), tableName.orElse(null), tableUuid.orElse(null)).stream() - .map(dbMapper::toTable) + .map(Table::fromRow) .collect(Collectors.toList()); } @@ -154,7 +144,7 @@ public List getStatsHistory( String tableUuid, Optional since, int limit) { return statsHistoryRepository.find(tableUuid, since.orElse(null), PageRequest.of(0, limit)) .stream() - .map(dbMapper::toStatsHistory) + .map(TableStatsHistory::fromRow) .collect(Collectors.toList()); } @@ -163,25 +153,20 @@ public List getStatsHistory( @Override @Transactional public TableOperationsHistory appendHistory(TableOperationsHistory history) { - TableOperationsHistoryRow row = - TableOperationsHistoryRow.builder() - .id(history.getId()) - .tableUuid(history.getTableUuid()) - .databaseName(history.getDatabaseName()) - .tableName(history.getTableName()) - .operationType(dbMapper.toDbOperationType(history.getOperationType())) + TableOperationsHistory toWrite = + history + .toBuilder() .completedAt( history.getCompletedAt() != null ? history.getCompletedAt() : Instant.now()) - .status(dbMapper.toDbHistoryStatus(history.getStatus())) .build(); - return dbMapper.toHistory(historyRepository.save(row)); + return TableOperationsHistory.fromRow(historyRepository.save(toWrite.toRow())); } @Override public List getHistory(String tableUuid, int limit) { return historyRepository .findByTableUuidOrderByCompletedAtDesc(tableUuid, PageRequest.of(0, limit)).stream() - .map(dbMapper::toHistory) + .map(TableOperationsHistory::fromRow) .collect(Collectors.toList()); } } From 95456bef1c35584dd2feebcf1e67e36421b53cac Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 16:10:37 -0700 Subject: [PATCH 070/104] refactor(optimizer-analyzer): use type to/from methods; drop ModelDbMapper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit AnalyzerRunner no longer injects ModelDbMapper. Conversion goes through the model types' static factories / instance methods: - Row → model: Table.fromRow, TableOperation.fromRow, TableOperationsHistory.fromRow. - Model → row: op.toRow(). - model.OperationType → db.OperationType via operationType.toDb(). Test: drop the ModelDbMapper field; use static factories directly. --- .../openhouse/analyzer/AnalyzerRunner.java | 12 +++++------- .../openhouse/analyzer/AnalyzerRunnerTest.java | 17 +++++++---------- 2 files changed, 12 insertions(+), 17 deletions(-) diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java index 0be4a5a34..265b9d303 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java @@ -4,7 +4,6 @@ import com.linkedin.openhouse.optimizer.model.Table; import com.linkedin.openhouse.optimizer.model.TableOperation; import com.linkedin.openhouse.optimizer.model.TableOperationsHistory; -import com.linkedin.openhouse.optimizer.model.mapper.ModelDbMapper; import com.linkedin.openhouse.optimizer.repository.TableOperationsHistoryRepository; import com.linkedin.openhouse.optimizer.repository.TableOperationsRepository; import com.linkedin.openhouse.optimizer.repository.TableStatsRepository; @@ -36,7 +35,6 @@ public class AnalyzerRunner { private final TableStatsRepository statsRepo; private final TableOperationsRepository operationsRepo; private final TableOperationsHistoryRepository historyRepo; - private final ModelDbMapper dbMapper; /** * Run the analysis loop for {@code operationType} across all databases, with no filters. @@ -77,7 +75,7 @@ private void analyzeDatabase( Optional tableUuid) { com.linkedin.openhouse.optimizer.db.OperationType dbOperationType = - dbMapper.toDbOperationType(analyzer.getOperationType()); + analyzer.getOperationType().toDb(); // Pre-load the small sides of the joins — bounded by tables in this database. Map currentOps = @@ -86,7 +84,7 @@ private void analyzeDatabase( dbOperationType, null, tableUuid.orElse(null), databaseName, tableName.orElse(null)) .stream() .filter(e -> e.getTableUuid() != null) - .map(dbMapper::toOperation) + .map(TableOperation::fromRow) .collect( Collectors.toMap( TableOperation::getTableUuid, op -> op, TableOperation::mostRecent)); @@ -94,7 +92,7 @@ private void analyzeDatabase( Map latestHistory = historyRepo.findLatestPerTable(dbOperationType).stream() .filter(r -> r.getTableUuid() != null) - .map(dbMapper::toHistory) + .map(TableOperationsHistory::fromRow) .collect( Collectors.toMap( TableOperationsHistory::getTableUuid, @@ -104,7 +102,7 @@ private void analyzeDatabase( List
tables = statsRepo.find(databaseName, tableName.orElse(null), tableUuid.orElse(null)).stream() .filter(row -> row.getTableUuid() != null) - .map(dbMapper::toTable) + .map(Table::fromRow) .collect(Collectors.toList()); /* @@ -129,7 +127,7 @@ private void analyzeDatabase( Optional.ofNullable(latestHistory.get(table.getTableUuid())); if (analyzer.shouldSchedule(table, currentOp, entry)) { TableOperation op = TableOperation.pending(table, analyzer.getOperationType()); - operationsRepo.save(dbMapper.toRow(op)); + operationsRepo.save(op.toRow()); log.info( "Created PENDING {} operation for table {}.{}", analyzer.getOperationType(), diff --git a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java index fbd2fecbf..fe9561eb9 100644 --- a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java +++ b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java @@ -11,7 +11,6 @@ import com.linkedin.openhouse.optimizer.model.OperationType; import com.linkedin.openhouse.optimizer.model.Table; import com.linkedin.openhouse.optimizer.model.TableOperation; -import com.linkedin.openhouse.optimizer.model.mapper.ModelDbMapper; import com.linkedin.openhouse.optimizer.repository.TableOperationsHistoryRepository; import com.linkedin.openhouse.optimizer.repository.TableOperationsRepository; import com.linkedin.openhouse.optimizer.repository.TableStatsRepository; @@ -39,13 +38,11 @@ class AnalyzerRunnerTest { @Mock private TableOperationsHistoryRepository historyRepo; @Mock private OperationAnalyzer analyzer; - private final ModelDbMapper dbMapper = new ModelDbMapper(); private AnalyzerRunner runner; @BeforeEach void setUp() { - runner = - new AnalyzerRunner(List.of(analyzer), statsRepo, operationsRepo, historyRepo, dbMapper); + runner = new AnalyzerRunner(List.of(analyzer), statsRepo, operationsRepo, historyRepo); when(analyzer.getOperationType()).thenReturn(OFD_TYPE); when(statsRepo.findDistinctDatabaseNames()).thenReturn(List.of(DB)); } @@ -55,7 +52,7 @@ void analyze_insertsNewRow_forEligibleTableWithNoExistingOp() { TableStatsRow statsEntity = TableStatsRow.builder().tableUuid("uuid-1").databaseName(DB).tableName("tbl1").build(); - Table expectedTable = dbMapper.toTable(statsEntity); + Table expectedTable = Table.fromRow(statsEntity); when(statsRepo.find(DB, null, null)).thenReturn(List.of(statsEntity)); when(operationsRepo.find(OFD_DB, null, null, DB, null)).thenReturn(Collections.emptyList()); @@ -83,7 +80,7 @@ void analyze_noOp_whenCadencePolicyReturnsFalseForPending() { TableStatsRow statsEntity = TableStatsRow.builder().tableUuid("uuid-1").databaseName(DB).tableName("tbl1").build(); - Table expectedTable = dbMapper.toTable(statsEntity); + Table expectedTable = Table.fromRow(statsEntity); TableOperationsRow existingEntity = TableOperationsRow.builder() @@ -99,7 +96,7 @@ void analyze_noOp_whenCadencePolicyReturnsFalseForPending() { when(historyRepo.findLatestPerTable(OFD_DB)).thenReturn(Collections.emptyList()); when(analyzer.isEnabled(expectedTable)).thenReturn(true); - TableOperation existingOp = dbMapper.toOperation(existingEntity); + TableOperation existingOp = TableOperation.fromRow(existingEntity); when(analyzer.shouldSchedule(expectedTable, Optional.of(existingOp), Optional.empty())) .thenReturn(false); @@ -113,7 +110,7 @@ void analyze_skipsTable_whenNotEnabled() { TableStatsRow statsEntity = TableStatsRow.builder().tableUuid("uuid-1").databaseName(DB).build(); - Table expectedTable = dbMapper.toTable(statsEntity); + Table expectedTable = Table.fromRow(statsEntity); when(statsRepo.find(DB, null, null)).thenReturn(List.of(statsEntity)); when(operationsRepo.find(OFD_DB, null, null, DB, null)).thenReturn(Collections.emptyList()); @@ -130,7 +127,7 @@ void analyze_skipsTable_whenShouldScheduleReturnsFalse() { TableStatsRow statsEntity = TableStatsRow.builder().tableUuid("uuid-1").databaseName(DB).build(); - Table expectedTable = dbMapper.toTable(statsEntity); + Table expectedTable = Table.fromRow(statsEntity); TableOperationsRow scheduled = TableOperationsRow.builder() @@ -146,7 +143,7 @@ void analyze_skipsTable_whenShouldScheduleReturnsFalse() { when(historyRepo.findLatestPerTable(OFD_DB)).thenReturn(Collections.emptyList()); when(analyzer.isEnabled(expectedTable)).thenReturn(true); - TableOperation scheduledOp = dbMapper.toOperation(scheduled); + TableOperation scheduledOp = TableOperation.fromRow(scheduled); when(analyzer.shouldSchedule(expectedTable, Optional.of(scheduledOp), Optional.empty())) .thenReturn(false); From af23d5ef63ff1e44a483392e6a364c507d4cae34 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 17:02:51 -0700 Subject: [PATCH 071/104] fix(optimizer): make TableStats self-describing; route DTO conversion to TableStats MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit model.TableStats now carries its own identity (tableUuid, databaseName, tableName) and metadata (tableProperties, updatedAt) alongside the snapshot + delta payload. Consumers no longer need an outer wrapper to know which table the stats belong to. api.TableStatsDto.toModel() and api.UpsertTableStatsRequest.toModel() now return model.TableStats (was model.Table). The two types only happened to have the same shape — semantically a DTO for stats is stats, not a table. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../optimizer/api/model/TableStatsDto.java | 31 +++++++++++-------- .../api/model/UpsertTableStatsRequest.java | 17 +++++----- .../openhouse/optimizer/model/TableStats.java | 31 +++++++++++++++++-- 3 files changed, 56 insertions(+), 23 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java index 82dc552c2..244050b04 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java @@ -1,6 +1,5 @@ package com.linkedin.openhouse.optimizer.api.model; -import com.linkedin.openhouse.optimizer.model.Table; import java.time.Instant; import java.util.Collections; import java.util.Map; @@ -35,29 +34,35 @@ public class TableStatsDto { private Instant updatedAt; /** Convert to the internal-model counterpart. */ - public Table toModel() { - return Table.builder() + public com.linkedin.openhouse.optimizer.model.TableStats toModel() { + com.linkedin.openhouse.optimizer.model.TableStats payload = + stats == null ? new com.linkedin.openhouse.optimizer.model.TableStats() : stats.toModel(); + return payload + .toBuilder() .tableUuid(tableUuid) .databaseName(databaseName) - .tableId(tableName) + .tableName(tableName) .tableProperties(tableProperties != null ? tableProperties : Collections.emptyMap()) - .stats(stats == null ? null : stats.toModel()) .updatedAt(updatedAt) .build(); } /** Build a wire DTO from the internal-model counterpart. */ - public static TableStatsDto fromModel(Table t) { - if (t == null) { + public static TableStatsDto fromModel(com.linkedin.openhouse.optimizer.model.TableStats m) { + if (m == null) { return null; } return TableStatsDto.builder() - .tableUuid(t.getTableUuid()) - .databaseName(t.getDatabaseName()) - .tableName(t.getTableId()) - .stats(TableStats.fromModel(t.getStats())) - .tableProperties(t.getTableProperties()) - .updatedAt(t.getUpdatedAt()) + .tableUuid(m.getTableUuid()) + .databaseName(m.getDatabaseName()) + .tableName(m.getTableName()) + .stats( + TableStats.builder() + .snapshot(TableStats.SnapshotMetrics.fromModel(m.getSnapshot())) + .delta(TableStats.CommitDelta.fromModel(m.getDelta())) + .build()) + .tableProperties(m.getTableProperties()) + .updatedAt(m.getUpdatedAt()) .build(); } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java index 13476543f..08b42050f 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java @@ -1,6 +1,5 @@ package com.linkedin.openhouse.optimizer.api.model; -import com.linkedin.openhouse.optimizer.model.Table; import java.util.Collections; import java.util.Map; import lombok.AllArgsConstructor; @@ -33,17 +32,19 @@ public class UpsertTableStatsRequest { private Map tableProperties; /** - * Build the internal-model {@link Table} described by this request. {@code tableUuid} comes from - * the URL path, not the body. {@link Table#getUpdatedAt()} is left {@code null}; the service - * stamps it server-side at write time. + * Build the internal-model {@link com.linkedin.openhouse.optimizer.model.TableStats} described by + * this request. {@code tableUuid} comes from the URL path, not the body. {@code updatedAt} is + * left {@code null}; the service stamps it server-side at write time. */ - public Table toModel(String tableUuid) { - return Table.builder() + public com.linkedin.openhouse.optimizer.model.TableStats toModel(String tableUuid) { + com.linkedin.openhouse.optimizer.model.TableStats payload = + stats == null ? new com.linkedin.openhouse.optimizer.model.TableStats() : stats.toModel(); + return payload + .toBuilder() .tableUuid(tableUuid) .databaseName(databaseName) - .tableId(tableName) + .tableName(tableName) .tableProperties(tableProperties != null ? tableProperties : Collections.emptyMap()) - .stats(stats == null ? null : stats.toModel()) .build(); } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java index 56291e510..906d01669 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java @@ -1,12 +1,24 @@ package com.linkedin.openhouse.optimizer.model; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import java.time.Instant; +import java.util.Collections; +import java.util.Map; import lombok.AllArgsConstructor; import lombok.Builder; import lombok.Data; import lombok.NoArgsConstructor; -/** Combined stats payload stored as a single JSON blob per table. */ +/** + * Self-describing per-table stats record. Carries the table's identity and metadata alongside the + * snapshot + delta payload so consumers don't need an outer wrapper to know which table the stats + * belong to. + * + *

Identity ({@link #tableUuid}, {@link #databaseName}, {@link #tableName}) and metadata ({@link + * #tableProperties}, {@link #updatedAt}) are populated when read from a current-state row. When + * this record is built from a per-commit history row, {@link #delta} is populated and {@link + * #tableProperties} / {@link #updatedAt} are typically {@code null}. + */ @Data @Builder(toBuilder = true) @NoArgsConstructor @@ -14,12 +26,27 @@ @JsonIgnoreProperties(ignoreUnknown = true) public class TableStats { + /** Stable table identity from the Tables Service. Survives renames; rotates on drop+recreate. */ + private String tableUuid; + + /** Database the table lives in. */ + private String databaseName; + + /** Iceberg table name (the human-readable identifier, not the UUID). */ + private String tableName; + + /** Current table-property map (e.g. maintenance opt-in flags). Never null. */ + @Builder.Default private Map tableProperties = Collections.emptyMap(); + /** Snapshot fields — overwritten on every upsert. */ private SnapshotMetrics snapshot; - /** Delta fields — accumulated across commit events. */ + /** Delta fields — accumulated across commit events. Null when read from a current-state row. */ private CommitDelta delta; + /** When the current snapshot was last written. Stamped server-side on every upsert. */ + private Instant updatedAt; + /** Point-in-time metadata read from Iceberg at scan time. */ @Data @Builder(toBuilder = true) From 3864e4257d0476333cbd1d78f87207dc1c46b16e Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 17:04:24 -0700 Subject: [PATCH 072/104] chore(optimizer): cascade self-describing TableStats from opt-0 to opt-1 Enriches model.TableStats with identity (tableUuid, databaseName, tableName) and metadata (tableProperties, updatedAt), and reroutes the api DTOs' toModel/fromModel pair to model.TableStats. opt-1's existing toSnapshotRow / toDeltaRow / fromRows helpers are preserved. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../optimizer/api/model/TableStatsDto.java | 31 +++++++++++-------- .../api/model/UpsertTableStatsRequest.java | 17 +++++----- .../openhouse/optimizer/model/TableStats.java | 31 +++++++++++++++++-- 3 files changed, 56 insertions(+), 23 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java index 82dc552c2..244050b04 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java @@ -1,6 +1,5 @@ package com.linkedin.openhouse.optimizer.api.model; -import com.linkedin.openhouse.optimizer.model.Table; import java.time.Instant; import java.util.Collections; import java.util.Map; @@ -35,29 +34,35 @@ public class TableStatsDto { private Instant updatedAt; /** Convert to the internal-model counterpart. */ - public Table toModel() { - return Table.builder() + public com.linkedin.openhouse.optimizer.model.TableStats toModel() { + com.linkedin.openhouse.optimizer.model.TableStats payload = + stats == null ? new com.linkedin.openhouse.optimizer.model.TableStats() : stats.toModel(); + return payload + .toBuilder() .tableUuid(tableUuid) .databaseName(databaseName) - .tableId(tableName) + .tableName(tableName) .tableProperties(tableProperties != null ? tableProperties : Collections.emptyMap()) - .stats(stats == null ? null : stats.toModel()) .updatedAt(updatedAt) .build(); } /** Build a wire DTO from the internal-model counterpart. */ - public static TableStatsDto fromModel(Table t) { - if (t == null) { + public static TableStatsDto fromModel(com.linkedin.openhouse.optimizer.model.TableStats m) { + if (m == null) { return null; } return TableStatsDto.builder() - .tableUuid(t.getTableUuid()) - .databaseName(t.getDatabaseName()) - .tableName(t.getTableId()) - .stats(TableStats.fromModel(t.getStats())) - .tableProperties(t.getTableProperties()) - .updatedAt(t.getUpdatedAt()) + .tableUuid(m.getTableUuid()) + .databaseName(m.getDatabaseName()) + .tableName(m.getTableName()) + .stats( + TableStats.builder() + .snapshot(TableStats.SnapshotMetrics.fromModel(m.getSnapshot())) + .delta(TableStats.CommitDelta.fromModel(m.getDelta())) + .build()) + .tableProperties(m.getTableProperties()) + .updatedAt(m.getUpdatedAt()) .build(); } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java index 13476543f..08b42050f 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java @@ -1,6 +1,5 @@ package com.linkedin.openhouse.optimizer.api.model; -import com.linkedin.openhouse.optimizer.model.Table; import java.util.Collections; import java.util.Map; import lombok.AllArgsConstructor; @@ -33,17 +32,19 @@ public class UpsertTableStatsRequest { private Map tableProperties; /** - * Build the internal-model {@link Table} described by this request. {@code tableUuid} comes from - * the URL path, not the body. {@link Table#getUpdatedAt()} is left {@code null}; the service - * stamps it server-side at write time. + * Build the internal-model {@link com.linkedin.openhouse.optimizer.model.TableStats} described by + * this request. {@code tableUuid} comes from the URL path, not the body. {@code updatedAt} is + * left {@code null}; the service stamps it server-side at write time. */ - public Table toModel(String tableUuid) { - return Table.builder() + public com.linkedin.openhouse.optimizer.model.TableStats toModel(String tableUuid) { + com.linkedin.openhouse.optimizer.model.TableStats payload = + stats == null ? new com.linkedin.openhouse.optimizer.model.TableStats() : stats.toModel(); + return payload + .toBuilder() .tableUuid(tableUuid) .databaseName(databaseName) - .tableId(tableName) + .tableName(tableName) .tableProperties(tableProperties != null ? tableProperties : Collections.emptyMap()) - .stats(stats == null ? null : stats.toModel()) .build(); } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java index 212390af9..eb11c9d25 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java @@ -1,12 +1,24 @@ package com.linkedin.openhouse.optimizer.model; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import java.time.Instant; +import java.util.Collections; +import java.util.Map; import lombok.AllArgsConstructor; import lombok.Builder; import lombok.Data; import lombok.NoArgsConstructor; -/** Combined stats payload stored as a single JSON blob per table. */ +/** + * Self-describing per-table stats record. Carries the table's identity and metadata alongside the + * snapshot + delta payload so consumers don't need an outer wrapper to know which table the stats + * belong to. + * + *

Identity ({@link #tableUuid}, {@link #databaseName}, {@link #tableName}) and metadata ({@link + * #tableProperties}, {@link #updatedAt}) are populated when read from a current-state row. When + * this record is built from a per-commit history row, {@link #delta} is populated and {@link + * #tableProperties} / {@link #updatedAt} are typically {@code null}. + */ @Data @Builder(toBuilder = true) @NoArgsConstructor @@ -14,12 +26,27 @@ @JsonIgnoreProperties(ignoreUnknown = true) public class TableStats { + /** Stable table identity from the Tables Service. Survives renames; rotates on drop+recreate. */ + private String tableUuid; + + /** Database the table lives in. */ + private String databaseName; + + /** Iceberg table name (the human-readable identifier, not the UUID). */ + private String tableName; + + /** Current table-property map (e.g. maintenance opt-in flags). Never null. */ + @Builder.Default private Map tableProperties = Collections.emptyMap(); + /** Snapshot fields — overwritten on every upsert. */ private SnapshotMetrics snapshot; - /** Delta fields — accumulated across commit events. */ + /** Delta fields — accumulated across commit events. Null when read from a current-state row. */ private CommitDelta delta; + /** When the current snapshot was last written. Stamped server-side on every upsert. */ + private Instant updatedAt; + /** Project to the DB-layer {@link com.linkedin.openhouse.optimizer.db.SnapshotMetrics} object. */ public com.linkedin.openhouse.optimizer.db.SnapshotMetrics toSnapshotRow() { return snapshot == null ? null : snapshot.toDb(); From a6045b5534b39b14299b4eb36a61c3f872ef3ab6 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 17:06:06 -0700 Subject: [PATCH 073/104] =?UTF-8?q?feat(optimizer):=20add=20TableStats?= =?UTF-8?q?=E2=86=94TableStatsRow=20conversion=20on=20model?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit TableStats.toRow() / fromRow() let the service operate purely on the self-describing model.TableStats type instead of going through Table. Existing toSnapshotRow / toDeltaRow / fromRows helpers are preserved for the history path. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../openhouse/optimizer/model/TableStats.java | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java index eb11c9d25..847f5a00e 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java @@ -47,6 +47,38 @@ public class TableStats { /** When the current snapshot was last written. Stamped server-side on every upsert. */ private Instant updatedAt; + /** + * Project to the current-state {@code table_stats} row. Snapshot only; deltas live on history. + */ + public com.linkedin.openhouse.optimizer.db.TableStatsRow toRow() { + return com.linkedin.openhouse.optimizer.db.TableStatsRow.builder() + .tableUuid(tableUuid) + .databaseName(databaseName) + .tableName(tableName) + .snapshot(snapshot == null ? null : snapshot.toDb()) + .tableProperties(tableProperties != null ? tableProperties : Collections.emptyMap()) + .updatedAt(updatedAt) + .build(); + } + + /** + * Build a {@link TableStats} from a current-state DB row. {@link #delta} is left {@code null}. + */ + public static TableStats fromRow(com.linkedin.openhouse.optimizer.db.TableStatsRow row) { + if (row == null) { + return null; + } + return TableStats.builder() + .tableUuid(row.getTableUuid()) + .databaseName(row.getDatabaseName()) + .tableName(row.getTableName()) + .tableProperties( + row.getTableProperties() != null ? row.getTableProperties() : Collections.emptyMap()) + .snapshot(SnapshotMetrics.fromDb(row.getSnapshot())) + .updatedAt(row.getUpdatedAt()) + .build(); + } + /** Project to the DB-layer {@link com.linkedin.openhouse.optimizer.db.SnapshotMetrics} object. */ public com.linkedin.openhouse.optimizer.db.SnapshotMetrics toSnapshotRow() { return snapshot == null ? null : snapshot.toDb(); From db5921e038d22f5e2191ee5766c3ca8aefac5bfd Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 17:07:59 -0700 Subject: [PATCH 074/104] refactor(optimizer): service stats methods take/return TableStats, not Table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit OptimizerDataService.upsertTableStats / getTableStats / listTableStats now operate on model.TableStats. The service stays decoupled from Table — stats are stats, not tables. Conversion to TableStatsRow goes through TableStats.toRow / fromRow. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../service/OptimizerDataService.java | 12 ++--- .../service/OptimizerDataServiceImpl.java | 34 ++++++------ .../service/OptimizerDataServiceImplTest.java | 54 +++++++------------ 3 files changed, 40 insertions(+), 60 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java index e8a4da86e..5d5edaee2 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java @@ -3,9 +3,9 @@ import com.linkedin.openhouse.optimizer.model.HistoryStatus; import com.linkedin.openhouse.optimizer.model.OperationStatus; import com.linkedin.openhouse.optimizer.model.OperationType; -import com.linkedin.openhouse.optimizer.model.Table; import com.linkedin.openhouse.optimizer.model.TableOperation; import com.linkedin.openhouse.optimizer.model.TableOperationsHistory; +import com.linkedin.openhouse.optimizer.model.TableStats; import com.linkedin.openhouse.optimizer.model.TableStatsHistory; import java.time.Instant; import java.util.List; @@ -50,20 +50,20 @@ List listTableOperations( // --- TableStats --- /** - * Create or update the stats row for {@code table.getTableUuid()}. Fully idempotent: the same + * Create or update the stats row for {@code stats.getTableUuid()}. Fully idempotent: the same * call overwrites the previous snapshot with the latest commit values. The service stamps {@link - * Table#getUpdatedAt()} server-side and returns the resulting {@link Table}. + * TableStats#getUpdatedAt()} server-side and returns the resulting {@link TableStats}. */ - Table upsertTableStats(Table table); + TableStats upsertTableStats(TableStats stats); /** Return the stats row for {@code tableUuid}, or empty if none exists. */ - Optional

getTableStats(String tableUuid); + Optional getTableStats(String tableUuid); /** * List stats rows matching the given filters. Every parameter is optional — pass {@link * Optional#empty()} to skip that filter. No filters returns all rows. */ - List
listTableStats( + List listTableStats( Optional databaseName, Optional tableName, Optional tableUuid); /** diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java index 87f300192..633411e98 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java @@ -5,7 +5,6 @@ import com.linkedin.openhouse.optimizer.model.HistoryStatus; import com.linkedin.openhouse.optimizer.model.OperationStatus; import com.linkedin.openhouse.optimizer.model.OperationType; -import com.linkedin.openhouse.optimizer.model.Table; import com.linkedin.openhouse.optimizer.model.TableOperation; import com.linkedin.openhouse.optimizer.model.TableOperationsHistory; import com.linkedin.openhouse.optimizer.model.TableStats; @@ -90,10 +89,9 @@ public Optional getTableOperation(String id) { @Override @Transactional - public Table upsertTableStats(Table table) { + public TableStats upsertTableStats(TableStats stats) { Instant now = Instant.now(); - String tableUuid = table.getTableUuid(); - TableStats stats = table.getStats(); + String tableUuid = stats.getTableUuid(); TableStatsRow row = statsRepository @@ -102,40 +100,40 @@ public Table upsertTableStats(Table table) { existing -> existing .toBuilder() - .databaseName(table.getDatabaseName()) - .tableName(table.getTableId()) - .snapshot(stats == null ? null : stats.toSnapshotRow()) - .tableProperties(table.getTableProperties()) + .databaseName(stats.getDatabaseName()) + .tableName(stats.getTableName()) + .snapshot(stats.toSnapshotRow()) + .tableProperties(stats.getTableProperties()) .updatedAt(now) .build()) - .orElse(table.toBuilder().updatedAt(now).build().toRow()); + .orElse(stats.toBuilder().updatedAt(now).build().toRow()); TableStatsRow saved = statsRepository.save(row); statsHistoryRepository.save( TableStatsHistoryRow.builder() .id(UUID.randomUUID().toString()) .tableUuid(tableUuid) - .databaseName(table.getDatabaseName()) - .tableName(table.getTableId()) - .snapshot(stats == null ? null : stats.toSnapshotRow()) - .delta(stats == null ? null : stats.toDeltaRow()) + .databaseName(stats.getDatabaseName()) + .tableName(stats.getTableName()) + .snapshot(stats.toSnapshotRow()) + .delta(stats.toDeltaRow()) .recordedAt(now) .build()); - return Table.fromRow(saved); + return TableStats.fromRow(saved); } @Override - public Optional
getTableStats(String tableUuid) { - return statsRepository.findById(tableUuid).map(Table::fromRow); + public Optional getTableStats(String tableUuid) { + return statsRepository.findById(tableUuid).map(TableStats::fromRow); } @Override - public List
listTableStats( + public List listTableStats( Optional databaseName, Optional tableName, Optional tableUuid) { return statsRepository .find(databaseName.orElse(null), tableName.orElse(null), tableUuid.orElse(null)).stream() - .map(Table::fromRow) + .map(TableStats::fromRow) .collect(Collectors.toList()); } diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java index 9d653e21d..b329459ad 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java @@ -7,7 +7,6 @@ import com.linkedin.openhouse.optimizer.model.HistoryStatus; import com.linkedin.openhouse.optimizer.model.OperationStatus; import com.linkedin.openhouse.optimizer.model.OperationType; -import com.linkedin.openhouse.optimizer.model.Table; import com.linkedin.openhouse.optimizer.model.TableOperationsHistory; import com.linkedin.openhouse.optimizer.model.TableStats; import com.linkedin.openhouse.optimizer.repository.TableOperationsRepository; @@ -78,23 +77,20 @@ void completeOperation_notFound_returnsEmpty() { @Test void upsertTableStats_createsNewRow() { String tableUuid = UUID.randomUUID().toString(); - Table input = - Table.builder() + TableStats input = + TableStats.builder() .tableUuid(tableUuid) .databaseName("db1") - .tableId("tbl1") + .tableName("tbl1") .tableProperties(Map.of("maintenance.optimizer.ofd.enabled", "true")) - .stats( - TableStats.builder() - .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(1024L).build()) - .build()) + .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(1024L).build()) .build(); - Table result = service.upsertTableStats(input); + TableStats result = service.upsertTableStats(input); assertThat(result.getTableUuid()).isEqualTo(tableUuid); assertThat(result.getDatabaseName()).isEqualTo("db1"); - assertThat(result.getStats().getSnapshot().getTableSizeBytes()).isEqualTo(1024L); + assertThat(result.getSnapshot().getTableSizeBytes()).isEqualTo(1024L); assertThat(result.getTableProperties()) .containsEntry("maintenance.optimizer.ofd.enabled", "true"); assertThat(result.getUpdatedAt()).isNotNull(); @@ -104,41 +100,27 @@ void upsertTableStats_createsNewRow() { @Test void upsertTableStats_updatesExistingRow_andAppendsHistory() { String tableUuid = UUID.randomUUID().toString(); - Table first = - Table.builder() + TableStats first = + TableStats.builder() .tableUuid(tableUuid) .databaseName("db1") - .tableId("tbl1") - .stats( - TableStats.builder() - .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(100L).build()) - .delta( - TableStats.CommitDelta.builder() - .numFilesAdded(5L) - .numFilesDeleted(1L) - .build()) - .build()) + .tableName("tbl1") + .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(100L).build()) + .delta(TableStats.CommitDelta.builder().numFilesAdded(5L).numFilesDeleted(1L).build()) .build(); - Table second = - Table.builder() + TableStats second = + TableStats.builder() .tableUuid(tableUuid) .databaseName("db1") - .tableId("tbl1") - .stats( - TableStats.builder() - .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(200L).build()) - .delta( - TableStats.CommitDelta.builder() - .numFilesAdded(3L) - .numFilesDeleted(0L) - .build()) - .build()) + .tableName("tbl1") + .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(200L).build()) + .delta(TableStats.CommitDelta.builder().numFilesAdded(3L).numFilesDeleted(0L).build()) .build(); service.upsertTableStats(first); - Table result = service.upsertTableStats(second); + TableStats result = service.upsertTableStats(second); - assertThat(result.getStats().getSnapshot().getTableSizeBytes()).isEqualTo(200L); + assertThat(result.getSnapshot().getTableSizeBytes()).isEqualTo(200L); assertThat(statsRepository.findAll()).hasSize(1); List history = From 3aebf64b743fb88b2d92a7d623ed70b5dbdee981 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 17:32:00 -0700 Subject: [PATCH 075/104] chore(optimizer): enable toBuilder on model.Table and model.TableOperationsHistory Moved down from opt-2. The service-layer code (opt-2) uses .toBuilder() on both types; the lombok annotation that enables it belongs on the PR that owns model/. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../main/java/com/linkedin/openhouse/optimizer/model/Table.java | 2 +- .../openhouse/optimizer/model/TableOperationsHistory.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java index bca7e2420..089a52982 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java @@ -17,7 +17,7 @@ * com.linkedin.openhouse.optimizer.model.mapper.ModelDbMapper#toTable} at the DB boundary. */ @Data -@Builder +@Builder(toBuilder = true) @NoArgsConstructor @AllArgsConstructor public class Table { diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java index fe5bee5f7..c8950ee26 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java @@ -13,7 +13,7 @@ * components that need to reason about completed operations (e.g., scheduling-cadence analyzers). */ @Data -@Builder +@Builder(toBuilder = true) @NoArgsConstructor @AllArgsConstructor public class TableOperationsHistory { From bf30f86e18a8f53f185b2c00fb4b0880847a976d Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 14 May 2026 17:33:19 -0700 Subject: [PATCH 076/104] chore(optimizer): cascade toBuilder annotations from opt-0 to opt-1 Co-Authored-By: Claude Opus 4.7 (1M context) --- .../main/java/com/linkedin/openhouse/optimizer/model/Table.java | 2 +- .../openhouse/optimizer/model/TableOperationsHistory.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java index 659dd18da..149128f44 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java @@ -18,7 +18,7 @@ * types know nothing about model/ or api/. */ @Data -@Builder +@Builder(toBuilder = true) @NoArgsConstructor @AllArgsConstructor public class Table { diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java index 42a48479a..8cbfb6ff7 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java @@ -14,7 +14,7 @@ * components that need to reason about completed operations (e.g., scheduling-cadence analyzers). */ @Data -@Builder +@Builder(toBuilder = true) @NoArgsConstructor @AllArgsConstructor public class TableOperationsHistory { From b6c7f42774a61214cdabe6d01384b89c685cda35 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Mon, 18 May 2026 10:32:28 -0700 Subject: [PATCH 077/104] refactor(optimizer): drop fileCount enrichment from model.TableOperation TableOperation becomes a pure operation record. Consumers (scheduler) look up TableStats at the point they need it, rather than carrying enrichment data on the model type. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../openhouse/optimizer/model/TableOperation.java | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java index 1f14dddff..fe91c38d0 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java @@ -15,10 +15,6 @@ *

Pure internal-model type — no references to wire-API or DB types. Cross-layer construction * happens via {@link com.linkedin.openhouse.optimizer.model.mapper.ModelDbMapper} (DB boundary) or * {@link com.linkedin.openhouse.optimizer.model.mapper.ApiModelMapper} (API boundary). - * - *

{@link #fileCount} is a non-persisted enrichment populated by consumers that need it (e.g., - * the OFD scheduler reads it from {@code table_stats} for bin-packing). The DB column does not - * carry it. */ @Data @Builder @@ -50,12 +46,6 @@ public class TableOperation { /** When the scheduler last submitted a job for this operation. */ private Instant scheduledAt; - /** - * Number of current data files on the table at evaluation time. Non-persisted enrichment; - * populated by consumers that need it. Null when not enriched. - */ - private Long fileCount; - /** Create a new PENDING operation for the given table and operation type. */ public static TableOperation pending(Table table, OperationType operationType) { return TableOperation.builder() From 7f5136017e19967d659c7283bc052181a039afa8 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Mon, 18 May 2026 10:38:17 -0700 Subject: [PATCH 078/104] chore(analyzer): add TODOs for scale tests and query-builder migration Captures two open items from PR review on AnalyzerRunner: - scale test: empirically validate the 10k-tables-per-db working-set bound. - query-builder: migrate the optional-filter find(...) calls off raw JPQL to Criteria API or jOOQ once the scaffolding stabilizes. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../com/linkedin/openhouse/analyzer/AnalyzerRunner.java | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java index 265b9d303..69b2f43d0 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java @@ -25,6 +25,10 @@ * into maps once per database before the table loop. This is correct at small scale (≤~100k * tables); past that the per-db query shape and projection need further tuning. Scale-up work is * tracked in BDP-102182. + * + *

// TODO(scale-test): benchmark the per-db working set at up to 10k tables and measure JVM heap + * residency for the three intermediate maps; per-db iteration bounds memory by tables-per-db rather + * than tables-total, but the upper bound still needs empirical validation. */ @Slf4j @Component @@ -78,6 +82,10 @@ private void analyzeDatabase( analyzer.getOperationType().toDb(); // Pre-load the small sides of the joins — bounded by tables in this database. + // TODO(query-builder): the JPQL optional-filter shape used by these find(...) calls gets + // unwieldy as the filter count grows. Migrate to Criteria API or jOOQ once the scaffolding + // stabilizes — applies to operationsRepo.find, historyRepo.findLatestPerTable, and + // statsRepo.find below. Map currentOps = operationsRepo .find( From 2b06c92e0cb3f5eaf0ab8f205dcb141eb9c47650 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Mon, 18 May 2026 14:44:35 -0700 Subject: [PATCH 079/104] feat(repo): add findClaimedIds for transactional batch-claim verification MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit markSchedulingBatch returns only a count of rows transitioned; callers that need to know *which* rows they own must re-query. findClaimedIds takes the same id list + scheduledAt watermark passed to the UPDATE and returns the subset whose SCHEDULING transition matches that watermark — i.e. the rows this caller actually claimed in this call. Used by the scheduler to subset its bin to actually-claimed operations before submitting the Spark job; without this the scheduler can launch a job for ids another instance already owns and then incorrectly mark all of them SCHEDULED. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../repository/TableOperationsRepository.java | 19 +++++- .../TableOperationsRepositoryTest.java | 58 +++++++++++++++++++ 2 files changed, 75 insertions(+), 2 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java index 8baddfe42..513006bf6 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java @@ -33,8 +33,9 @@ List find( /** * Batch CAS: PENDING → SCHEDULING for every {@code id} still in PENDING. Returns the number of - * rows transitioned. Rows already claimed by another instance are skipped silently; callers must - * re-query if they need the precise list. + * rows transitioned. Rows already claimed by another instance are skipped silently; pair this + * call with {@link #findClaimedIds(List, Instant)} (using the same {@code scheduledAt}) to get + * the precise list of rows this caller now owns. */ @Modifying @Query( @@ -46,6 +47,20 @@ List find( int markSchedulingBatch( @Param("ids") List ids, @Param("scheduledAt") Instant scheduledAt); + /** + * Return the subset of {@code ids} that are currently {@code SCHEDULING} with the given {@code + * scheduledAt} watermark. Used after {@link #markSchedulingBatch(List, Instant)} to determine + * which rows this caller actually claimed (vs. rows another instance owns or rows that no longer + * exist). + */ + @Query( + "SELECT r.id FROM TableOperationsRow r " + + "WHERE r.id IN :ids " + + "AND r.status = com.linkedin.openhouse.optimizer.db.OperationStatus.SCHEDULING " + + "AND r.scheduledAt = :scheduledAt") + List findClaimedIds( + @Param("ids") List ids, @Param("scheduledAt") Instant scheduledAt); + /** * Batch CAS: SCHEDULING → SCHEDULED with the given {@code jobId} for every {@code id} still in * SCHEDULING. Returns the number of rows transitioned. diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java index 44a03ba9e..bfe3fc437 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java @@ -105,6 +105,64 @@ void find_byStatus() { assertThat(scheduled.get(0).getStatus()).isEqualTo(OperationStatus.SCHEDULED); } + @Test + void findClaimedIds_returnsOnlyClaimedSubset() { + String idA = UUID.randomUUID().toString(); + String idB = UUID.randomUUID().toString(); + String idC = UUID.randomUUID().toString(); + repository.save(pending(idA)); + repository.save(pending(idB)); + // idC is already SCHEDULING with a different scheduledAt — must NOT appear. + repository.save( + TableOperationsRow.builder() + .id(idC) + .tableUuid(UUID.randomUUID().toString()) + .databaseName("db1") + .tableName("tbl_c") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(OperationStatus.SCHEDULING) + .createdAt(Instant.now()) + .scheduledAt(Instant.now().minusSeconds(60)) + .build()); + + Instant now = Instant.now(); + repository.markSchedulingBatch(List.of(idA, idB, idC), now); + + List claimed = repository.findClaimedIds(List.of(idA, idB, idC), now); + assertThat(claimed).containsExactlyInAnyOrder(idA, idB); + } + + @Test + void findClaimedIds_emptyWhenNothingClaimed() { + String id = UUID.randomUUID().toString(); + repository.save( + TableOperationsRow.builder() + .id(id) + .tableUuid(UUID.randomUUID().toString()) + .databaseName("db1") + .tableName("tbl_x") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(OperationStatus.SCHEDULED) + .createdAt(Instant.now()) + .scheduledAt(Instant.now()) + .build()); + + List claimed = repository.findClaimedIds(List.of(id), Instant.now()); + assertThat(claimed).isEmpty(); + } + + private TableOperationsRow pending(String id) { + return TableOperationsRow.builder() + .id(id) + .tableUuid(UUID.randomUUID().toString()) + .databaseName("db1") + .tableName("tbl_" + id) + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(OperationStatus.PENDING) + .createdAt(Instant.now()) + .build(); + } + @Test void find_byDatabaseAndTable() { repository.save( From 437a0ed84a2fa7a53ea827b241404f60d20ac230 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Tue, 19 May 2026 13:35:27 -0700 Subject: [PATCH 080/104] refactor(optimizer): add Dto suffix to all api/model classes (PR #527 review) Per @abhisheknath2011 review comment 3262776356: > "We could change all the internal model add Dto suffix something like > TableOperationsDto. This aligns with the existing services codebase." Renames (suffix added): - CompleteOperationRequest -> CompleteOperationRequestDto - UpsertTableStatsRequest -> UpsertTableStatsRequestDto - OperationType (enum) -> OperationTypeDto - OperationStatus (enum) -> OperationStatusDto - HistoryStatus (enum) -> HistoryStatusDto - TableStats (inner payload) -> TableStatsPayloadDto - TableStats.SnapshotMetrics -> TableStatsPayloadDto.SnapshotMetricsDto - TableStats.CommitDelta -> TableStatsPayloadDto.CommitDeltaDto Cross-reference updates inside api/model. Internal model layer (services/optimizer/.../model/) is intentionally unchanged. Co-Authored-By: Claude Opus 4.7 (1M context) --- ....java => CompleteOperationRequestDto.java} | 6 ++--- ...storyStatus.java => HistoryStatusDto.java} | 6 ++--- ...ionStatus.java => OperationStatusDto.java} | 6 ++--- ...erationType.java => OperationTypeDto.java} | 6 ++--- .../api/model/TableOperationsDto.java | 8 +++--- .../api/model/TableOperationsHistoryDto.java | 8 +++--- .../optimizer/api/model/TableStatsDto.java | 8 +++--- .../api/model/TableStatsHistoryDto.java | 4 +-- ...leStats.java => TableStatsPayloadDto.java} | 27 ++++++++++--------- ...t.java => UpsertTableStatsRequestDto.java} | 4 +-- 10 files changed, 42 insertions(+), 41 deletions(-) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/{CompleteOperationRequest.java => CompleteOperationRequestDto.java} (92%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/{HistoryStatus.java => HistoryStatusDto.java} (73%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/{OperationStatus.java => OperationStatusDto.java} (87%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/{OperationType.java => OperationTypeDto.java} (72%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/{TableStats.java => TableStatsPayloadDto.java} (86%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/{UpsertTableStatsRequest.java => UpsertTableStatsRequestDto.java} (95%) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequestDto.java similarity index 92% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequestDto.java index 0add634b5..0db7a8a37 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequestDto.java @@ -25,13 +25,13 @@ @Builder @NoArgsConstructor @AllArgsConstructor -public class CompleteOperationRequest { +public class CompleteOperationRequestDto { /** Operation row's UUID — the primary lookup key. */ private String operationId; /** Terminal outcome for this single operation. */ - private HistoryStatus status; + private HistoryStatusDto status; /** Debug echo: stable table identity the caller believed it was completing. */ private String tableUuid; @@ -43,5 +43,5 @@ public class CompleteOperationRequest { private String tableName; /** Debug echo: operation type. */ - private OperationType operationType; + private OperationTypeDto operationType; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatusDto.java similarity index 73% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatus.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatusDto.java index 0c9ff95da..5a4421332 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatus.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatusDto.java @@ -1,7 +1,7 @@ package com.linkedin.openhouse.optimizer.api.model; /** Terminal states for a completed Spark maintenance job. */ -public enum HistoryStatus { +public enum HistoryStatusDto { /** The Spark job for this operation completed successfully. */ SUCCESS, @@ -15,7 +15,7 @@ public com.linkedin.openhouse.optimizer.model.HistoryStatus toModel() { } /** Build the api-layer enum from the internal-model counterpart. */ - public static HistoryStatus fromModel(com.linkedin.openhouse.optimizer.model.HistoryStatus v) { - return v == null ? null : HistoryStatus.valueOf(v.name()); + public static HistoryStatusDto fromModel(com.linkedin.openhouse.optimizer.model.HistoryStatus v) { + return v == null ? null : HistoryStatusDto.valueOf(v.name()); } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationStatusDto.java similarity index 87% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationStatus.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationStatusDto.java index 300c28263..89fa9f1b0 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationStatus.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationStatusDto.java @@ -1,7 +1,7 @@ package com.linkedin.openhouse.optimizer.api.model; /** Lifecycle states for a table operation recommendation. */ -public enum OperationStatus { +public enum OperationStatusDto { /** Recommended by the Analyzer but not yet claimed by the Scheduler. */ PENDING, @@ -25,8 +25,8 @@ public com.linkedin.openhouse.optimizer.model.OperationStatus toModel() { } /** Build the api-layer enum from the internal-model counterpart. */ - public static OperationStatus fromModel( + public static OperationStatusDto fromModel( com.linkedin.openhouse.optimizer.model.OperationStatus v) { - return v == null ? null : OperationStatus.valueOf(v.name()); + return v == null ? null : OperationStatusDto.valueOf(v.name()); } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationType.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationTypeDto.java similarity index 72% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationType.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationTypeDto.java index 5f325e712..210010eb0 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationType.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationTypeDto.java @@ -1,7 +1,7 @@ package com.linkedin.openhouse.optimizer.api.model; /** Maintenance operation types supported by the continuous optimizer. */ -public enum OperationType { +public enum OperationTypeDto { /** Removes orphaned data files no longer referenced by table metadata. */ ORPHAN_FILES_DELETION; @@ -11,7 +11,7 @@ public com.linkedin.openhouse.optimizer.model.OperationType toModel() { } /** Build the api-layer enum from the internal-model counterpart. */ - public static OperationType fromModel(com.linkedin.openhouse.optimizer.model.OperationType v) { - return v == null ? null : OperationType.valueOf(v.name()); + public static OperationTypeDto fromModel(com.linkedin.openhouse.optimizer.model.OperationType v) { + return v == null ? null : OperationTypeDto.valueOf(v.name()); } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java index db8ef1039..880fe7926 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java @@ -27,10 +27,10 @@ public class TableOperationsDto { private String tableName; /** The type of maintenance operation (e.g. ORPHAN_FILES_DELETION). */ - private OperationType operationType; + private OperationTypeDto operationType; /** {@code PENDING} or {@code SCHEDULED}. Defaults to {@code PENDING} on creation. */ - private OperationStatus status; + private OperationStatusDto status; /** Server-set when the row is first created by the Analyzer. */ private Instant createdAt; @@ -65,8 +65,8 @@ public static TableOperationsDto fromModel(TableOperation op) { .tableUuid(op.getTableUuid()) .databaseName(op.getDatabaseName()) .tableName(op.getTableName()) - .operationType(OperationType.fromModel(op.getOperationType())) - .status(OperationStatus.fromModel(op.getStatus())) + .operationType(OperationTypeDto.fromModel(op.getOperationType())) + .status(OperationStatusDto.fromModel(op.getStatus())) .createdAt(op.getCreatedAt()) .scheduledAt(op.getScheduledAt()) .build(); diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java index 935435040..652a58b3f 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java @@ -27,13 +27,13 @@ public class TableOperationsHistoryDto { private String tableName; /** The type of maintenance operation this history row records. */ - private OperationType operationType; + private OperationTypeDto operationType; /** When the operation completed, as recorded by the complete endpoint. */ private Instant completedAt; /** {@code SUCCESS} or {@code FAILED}. */ - private HistoryStatus status; + private HistoryStatusDto status; /** Convert to the internal-model counterpart. */ public TableOperationsHistory toModel() { @@ -58,9 +58,9 @@ public static TableOperationsHistoryDto fromModel(TableOperationsHistory h) { .tableUuid(h.getTableUuid()) .databaseName(h.getDatabaseName()) .tableName(h.getTableName()) - .operationType(OperationType.fromModel(h.getOperationType())) + .operationType(OperationTypeDto.fromModel(h.getOperationType())) .completedAt(h.getCompletedAt()) - .status(HistoryStatus.fromModel(h.getStatus())) + .status(HistoryStatusDto.fromModel(h.getStatus())) .build(); } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java index 244050b04..6852081ab 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java @@ -25,7 +25,7 @@ public class TableStatsDto { private String tableName; /** Combined snapshot + delta stats payload, stored as JSON. */ - private TableStats stats; + private TableStatsPayloadDto stats; /** Current table properties snapshot (e.g. maintenance opt-in flags). */ private Map tableProperties; @@ -57,9 +57,9 @@ public static TableStatsDto fromModel(com.linkedin.openhouse.optimizer.model.Tab .databaseName(m.getDatabaseName()) .tableName(m.getTableName()) .stats( - TableStats.builder() - .snapshot(TableStats.SnapshotMetrics.fromModel(m.getSnapshot())) - .delta(TableStats.CommitDelta.fromModel(m.getDelta())) + TableStatsPayloadDto.builder() + .snapshot(TableStatsPayloadDto.SnapshotMetricsDto.fromModel(m.getSnapshot())) + .delta(TableStatsPayloadDto.CommitDeltaDto.fromModel(m.getDelta())) .build()) .tableProperties(m.getTableProperties()) .updatedAt(m.getUpdatedAt()) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java index b5f971bbf..bac3782ff 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java @@ -27,7 +27,7 @@ public class TableStatsHistoryDto { private String tableName; /** Snapshot + delta stats from this commit event. */ - private TableStats stats; + private TableStatsPayloadDto stats; /** When this history row was recorded. */ private Instant recordedAt; @@ -54,7 +54,7 @@ public static TableStatsHistoryDto fromModel(TableStatsHistory h) { .tableUuid(h.getTableUuid()) .databaseName(h.getDatabaseName()) .tableName(h.getTableName()) - .stats(TableStats.fromModel(h.getStats())) + .stats(TableStatsPayloadDto.fromModel(h.getStats())) .recordedAt(h.getRecordedAt()) .build(); } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsPayloadDto.java similarity index 86% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsPayloadDto.java index c75d21d75..692cb7247 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStats.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsPayloadDto.java @@ -17,13 +17,13 @@ @NoArgsConstructor @AllArgsConstructor @JsonIgnoreProperties(ignoreUnknown = true) -public class TableStats { +public class TableStatsPayloadDto { /** Snapshot fields — overwritten on every upsert. */ - private SnapshotMetrics snapshot; + private SnapshotMetricsDto snapshot; /** Delta fields — accumulated across commit events. */ - private CommitDelta delta; + private CommitDeltaDto delta; /** Convert to the internal-model counterpart. */ public com.linkedin.openhouse.optimizer.model.TableStats toModel() { @@ -34,13 +34,14 @@ public com.linkedin.openhouse.optimizer.model.TableStats toModel() { } /** Build the api-layer payload from the internal-model counterpart. */ - public static TableStats fromModel(com.linkedin.openhouse.optimizer.model.TableStats m) { + public static TableStatsPayloadDto fromModel( + com.linkedin.openhouse.optimizer.model.TableStats m) { if (m == null) { return null; } - return TableStats.builder() - .snapshot(SnapshotMetrics.fromModel(m.getSnapshot())) - .delta(CommitDelta.fromModel(m.getDelta())) + return TableStatsPayloadDto.builder() + .snapshot(SnapshotMetricsDto.fromModel(m.getSnapshot())) + .delta(CommitDeltaDto.fromModel(m.getDelta())) .build(); } @@ -50,7 +51,7 @@ public static TableStats fromModel(com.linkedin.openhouse.optimizer.model.TableS @NoArgsConstructor @AllArgsConstructor @JsonIgnoreProperties(ignoreUnknown = true) - public static class SnapshotMetrics { + public static class SnapshotMetricsDto { /** Iceberg metadata version pointer for this snapshot. */ private String tableVersion; @@ -75,12 +76,12 @@ public com.linkedin.openhouse.optimizer.model.TableStats.SnapshotMetrics toModel } /** Build the api-layer inner object from the internal-model counterpart. */ - public static SnapshotMetrics fromModel( + public static SnapshotMetricsDto fromModel( com.linkedin.openhouse.optimizer.model.TableStats.SnapshotMetrics m) { if (m == null) { return null; } - return SnapshotMetrics.builder() + return SnapshotMetricsDto.builder() .tableVersion(m.getTableVersion()) .tableLocation(m.getTableLocation()) .tableSizeBytes(m.getTableSizeBytes()) @@ -95,7 +96,7 @@ public static SnapshotMetrics fromModel( @NoArgsConstructor @AllArgsConstructor @JsonIgnoreProperties(ignoreUnknown = true) - public static class CommitDelta { + public static class CommitDeltaDto { /** Number of data files this commit added to the table. */ private Long numFilesAdded; @@ -120,12 +121,12 @@ public com.linkedin.openhouse.optimizer.model.TableStats.CommitDelta toModel() { } /** Build the api-layer inner object from the internal-model counterpart. */ - public static CommitDelta fromModel( + public static CommitDeltaDto fromModel( com.linkedin.openhouse.optimizer.model.TableStats.CommitDelta m) { if (m == null) { return null; } - return CommitDelta.builder() + return CommitDeltaDto.builder() .numFilesAdded(m.getNumFilesAdded()) .numFilesDeleted(m.getNumFilesDeleted()) .addedSizeBytes(m.getAddedSizeBytes()) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequestDto.java similarity index 95% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequestDto.java index 08b42050f..75753fa69 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequestDto.java @@ -17,7 +17,7 @@ @Builder @NoArgsConstructor @AllArgsConstructor -public class UpsertTableStatsRequest { +public class UpsertTableStatsRequestDto { /** Denormalized database name for display. */ private String databaseName; @@ -26,7 +26,7 @@ public class UpsertTableStatsRequest { private String tableName; /** Combined snapshot + delta stats payload from this commit. */ - private TableStats stats; + private TableStatsPayloadDto stats; /** Current table properties snapshot (e.g. maintenance opt-in flags). */ private Map tableProperties; From eedf6d09f619c5ca153dc0bc0490be26bfb25673 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Tue, 19 May 2026 13:37:56 -0700 Subject: [PATCH 081/104] refactor(optimizer): update controllers for renamed api/model Dto types Follow-up to opt-0 Dto rename: controllers now import the renamed types. - TableOperationsController: CompleteOperationRequest -> CompleteOperationRequestDto, OperationType -> OperationTypeDto, OperationStatus -> OperationStatusDto. - TableStatsController: UpsertTableStatsRequest -> UpsertTableStatsRequestDto. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../controller/TableOperationsController.java | 16 ++++++++-------- .../api/controller/TableStatsController.java | 4 ++-- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java index 19e878910..f963380da 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java @@ -1,8 +1,8 @@ package com.linkedin.openhouse.optimizer.api.controller; -import com.linkedin.openhouse.optimizer.api.model.CompleteOperationRequest; -import com.linkedin.openhouse.optimizer.api.model.OperationStatus; -import com.linkedin.openhouse.optimizer.api.model.OperationType; +import com.linkedin.openhouse.optimizer.api.model.CompleteOperationRequestDto; +import com.linkedin.openhouse.optimizer.api.model.OperationStatusDto; +import com.linkedin.openhouse.optimizer.api.model.OperationTypeDto; import com.linkedin.openhouse.optimizer.api.model.TableOperationsDto; import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; import com.linkedin.openhouse.optimizer.service.OptimizerDataService; @@ -36,7 +36,7 @@ public class TableOperationsController { */ @PostMapping("/complete") public ResponseEntity completeOperation( - @RequestBody CompleteOperationRequest request) { + @RequestBody CompleteOperationRequestDto request) { return service .completeOperation( request.getOperationId(), @@ -64,16 +64,16 @@ public ResponseEntity getTableOperation(@PathVariable String */ @GetMapping public ResponseEntity> listTableOperations( - @RequestParam(required = false) OperationType operationType, - @RequestParam(required = false) OperationStatus status, + @RequestParam(required = false) OperationTypeDto operationType, + @RequestParam(required = false) OperationStatusDto status, @RequestParam(required = false) String databaseName, @RequestParam(required = false) String tableName, @RequestParam(required = false) String tableUuid) { List result = service .listTableOperations( - Optional.ofNullable(operationType).map(OperationType::toModel), - Optional.ofNullable(status).map(OperationStatus::toModel), + Optional.ofNullable(operationType).map(OperationTypeDto::toModel), + Optional.ofNullable(status).map(OperationStatusDto::toModel), Optional.ofNullable(databaseName), Optional.ofNullable(tableName), Optional.ofNullable(tableUuid)) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java index aa299b015..469170d0a 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java @@ -2,7 +2,7 @@ import com.linkedin.openhouse.optimizer.api.model.TableStatsDto; import com.linkedin.openhouse.optimizer.api.model.TableStatsHistoryDto; -import com.linkedin.openhouse.optimizer.api.model.UpsertTableStatsRequest; +import com.linkedin.openhouse.optimizer.api.model.UpsertTableStatsRequestDto; import com.linkedin.openhouse.optimizer.service.OptimizerDataService; import java.time.Instant; import java.util.List; @@ -32,7 +32,7 @@ public class TableStatsController { */ @PutMapping("/{tableUuid}") public ResponseEntity upsertTableStats( - @PathVariable String tableUuid, @RequestBody UpsertTableStatsRequest request) { + @PathVariable String tableUuid, @RequestBody UpsertTableStatsRequestDto request) { return ResponseEntity.ok( TableStatsDto.fromModel(service.upsertTableStats(request.toModel(tableUuid)))); } From 4f98c228b6ea661291fb924ed870d41e82757159 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Tue, 19 May 2026 13:56:57 -0700 Subject: [PATCH 082/104] refactor(optimizer): rename api.model package to api.spec (PR #527 review) Per @abhisheknath2011 review comment 3262769497: > "Can we change the client side API to api.spec instead of api.model? > This also aligns with existing services." Mechanical package rename. The 10 api wire types move from services/optimizer/.../api/model/ to services/optimizer/.../api/spec/. No type or signature changes. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../api/{model => spec}/CompleteOperationRequestDto.java | 2 +- .../optimizer/api/{model => spec}/HistoryStatusDto.java | 2 +- .../optimizer/api/{model => spec}/OperationStatusDto.java | 2 +- .../optimizer/api/{model => spec}/OperationTypeDto.java | 2 +- .../optimizer/api/{model => spec}/TableOperationsDto.java | 2 +- .../api/{model => spec}/TableOperationsHistoryDto.java | 2 +- .../openhouse/optimizer/api/{model => spec}/TableStatsDto.java | 2 +- .../optimizer/api/{model => spec}/TableStatsHistoryDto.java | 2 +- .../optimizer/api/{model => spec}/TableStatsPayloadDto.java | 2 +- .../api/{model => spec}/UpsertTableStatsRequestDto.java | 2 +- 10 files changed, 10 insertions(+), 10 deletions(-) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/{model => spec}/CompleteOperationRequestDto.java (96%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/{model => spec}/HistoryStatusDto.java (92%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/{model => spec}/OperationStatusDto.java (95%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/{model => spec}/OperationTypeDto.java (92%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/{model => spec}/TableOperationsDto.java (97%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/{model => spec}/TableOperationsHistoryDto.java (97%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/{model => spec}/TableStatsDto.java (97%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/{model => spec}/TableStatsHistoryDto.java (96%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/{model => spec}/TableStatsPayloadDto.java (98%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/{model => spec}/UpsertTableStatsRequestDto.java (96%) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequestDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/CompleteOperationRequestDto.java similarity index 96% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequestDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/CompleteOperationRequestDto.java index 0db7a8a37..9dca54a8e 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/CompleteOperationRequestDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/CompleteOperationRequestDto.java @@ -1,4 +1,4 @@ -package com.linkedin.openhouse.optimizer.api.model; +package com.linkedin.openhouse.optimizer.api.spec; import lombok.AllArgsConstructor; import lombok.Builder; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatusDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/HistoryStatusDto.java similarity index 92% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatusDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/HistoryStatusDto.java index 5a4421332..034be4cf2 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/HistoryStatusDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/HistoryStatusDto.java @@ -1,4 +1,4 @@ -package com.linkedin.openhouse.optimizer.api.model; +package com.linkedin.openhouse.optimizer.api.spec; /** Terminal states for a completed Spark maintenance job. */ public enum HistoryStatusDto { diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationStatusDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/OperationStatusDto.java similarity index 95% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationStatusDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/OperationStatusDto.java index 89fa9f1b0..f02ee2815 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationStatusDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/OperationStatusDto.java @@ -1,4 +1,4 @@ -package com.linkedin.openhouse.optimizer.api.model; +package com.linkedin.openhouse.optimizer.api.spec; /** Lifecycle states for a table operation recommendation. */ public enum OperationStatusDto { diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationTypeDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/OperationTypeDto.java similarity index 92% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationTypeDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/OperationTypeDto.java index 210010eb0..4e057b232 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/OperationTypeDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/OperationTypeDto.java @@ -1,4 +1,4 @@ -package com.linkedin.openhouse.optimizer.api.model; +package com.linkedin.openhouse.optimizer.api.spec; /** Maintenance operation types supported by the continuous optimizer. */ public enum OperationTypeDto { diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperationsDto.java similarity index 97% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperationsDto.java index 880fe7926..496f59f42 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperationsDto.java @@ -1,4 +1,4 @@ -package com.linkedin.openhouse.optimizer.api.model; +package com.linkedin.openhouse.optimizer.api.spec; import com.linkedin.openhouse.optimizer.model.TableOperation; import java.time.Instant; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperationsHistoryDto.java similarity index 97% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperationsHistoryDto.java index 652a58b3f..8b508bf36 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableOperationsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperationsHistoryDto.java @@ -1,4 +1,4 @@ -package com.linkedin.openhouse.optimizer.api.model; +package com.linkedin.openhouse.optimizer.api.spec; import com.linkedin.openhouse.optimizer.model.TableOperationsHistory; import java.time.Instant; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsDto.java similarity index 97% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsDto.java index 6852081ab..165ae47dc 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsDto.java @@ -1,4 +1,4 @@ -package com.linkedin.openhouse.optimizer.api.model; +package com.linkedin.openhouse.optimizer.api.spec; import java.time.Instant; import java.util.Collections; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsHistoryDto.java similarity index 96% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsHistoryDto.java index bac3782ff..9e7c44c56 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsHistoryDto.java @@ -1,4 +1,4 @@ -package com.linkedin.openhouse.optimizer.api.model; +package com.linkedin.openhouse.optimizer.api.spec; import com.linkedin.openhouse.optimizer.model.TableStatsHistory; import java.time.Instant; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsPayloadDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsPayloadDto.java similarity index 98% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsPayloadDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsPayloadDto.java index 692cb7247..761471f91 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/TableStatsPayloadDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsPayloadDto.java @@ -1,4 +1,4 @@ -package com.linkedin.openhouse.optimizer.api.model; +package com.linkedin.openhouse.optimizer.api.spec; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import lombok.AllArgsConstructor; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequestDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpsertTableStatsRequestDto.java similarity index 96% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequestDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpsertTableStatsRequestDto.java index 75753fa69..3e1fe4764 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/model/UpsertTableStatsRequestDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpsertTableStatsRequestDto.java @@ -1,4 +1,4 @@ -package com.linkedin.openhouse.optimizer.api.model; +package com.linkedin.openhouse.optimizer.api.spec; import java.util.Collections; import java.util.Map; From 231efde0ec369c12699a23684d8a38fc3ec5566d Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Tue, 19 May 2026 13:58:15 -0700 Subject: [PATCH 083/104] refactor(optimizer): update controller imports for api.model -> api.spec rename Follow-up to opt-0 package rename: controllers now import from com.linkedin.openhouse.optimizer.api.spec instead of api.model. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../api/controller/TableOperationsController.java | 10 +++++----- .../controller/TableOperationsHistoryController.java | 2 +- .../optimizer/api/controller/TableStatsController.java | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java index f963380da..accf6d543 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java @@ -1,10 +1,10 @@ package com.linkedin.openhouse.optimizer.api.controller; -import com.linkedin.openhouse.optimizer.api.model.CompleteOperationRequestDto; -import com.linkedin.openhouse.optimizer.api.model.OperationStatusDto; -import com.linkedin.openhouse.optimizer.api.model.OperationTypeDto; -import com.linkedin.openhouse.optimizer.api.model.TableOperationsDto; -import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; +import com.linkedin.openhouse.optimizer.api.spec.CompleteOperationRequestDto; +import com.linkedin.openhouse.optimizer.api.spec.OperationStatusDto; +import com.linkedin.openhouse.optimizer.api.spec.OperationTypeDto; +import com.linkedin.openhouse.optimizer.api.spec.TableOperationsDto; +import com.linkedin.openhouse.optimizer.api.spec.TableOperationsHistoryDto; import com.linkedin.openhouse.optimizer.service.OptimizerDataService; import java.util.List; import java.util.Optional; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java index 0c6f4834c..124697f10 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java @@ -1,6 +1,6 @@ package com.linkedin.openhouse.optimizer.api.controller; -import com.linkedin.openhouse.optimizer.api.model.TableOperationsHistoryDto; +import com.linkedin.openhouse.optimizer.api.spec.TableOperationsHistoryDto; import com.linkedin.openhouse.optimizer.service.OptimizerDataService; import java.util.List; import java.util.stream.Collectors; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java index 469170d0a..19dcbabb9 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java @@ -1,8 +1,8 @@ package com.linkedin.openhouse.optimizer.api.controller; -import com.linkedin.openhouse.optimizer.api.model.TableStatsDto; -import com.linkedin.openhouse.optimizer.api.model.TableStatsHistoryDto; -import com.linkedin.openhouse.optimizer.api.model.UpsertTableStatsRequestDto; +import com.linkedin.openhouse.optimizer.api.spec.TableStatsDto; +import com.linkedin.openhouse.optimizer.api.spec.TableStatsHistoryDto; +import com.linkedin.openhouse.optimizer.api.spec.UpsertTableStatsRequestDto; import com.linkedin.openhouse.optimizer.service.OptimizerDataService; import java.time.Instant; import java.util.List; From f1c500b98cf96fd4619132f250569909fdb7c1ec Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Tue, 19 May 2026 14:27:30 -0700 Subject: [PATCH 084/104] refactor(optimizer): hard-fail in AnalyzerRunner if no analyzer is registered (PR #533 review) Per @mkuchenbecker review comment 3245105977: > "impossible this is false" The analyzers list is Spring-injected from registered @Component beans; if a caller passes an OperationType with no matching analyzer, that's a bug, not a runtime condition to log-and-skip. Replaced the isEmpty()/return guard with orElseThrow(IllegalStateException). Mirrors how SchedulerRunner.schedule (opt-4) handles a missing BinPacker. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../openhouse/analyzer/AnalyzerRunner.java | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java index 69b2f43d0..2deb053c6 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java @@ -59,13 +59,14 @@ public void analyze( Optional databaseName, Optional tableName, Optional tableUuid) { - Optional analyzerOpt = - analyzers.stream().filter(a -> a.getOperationType() == operationType).findFirst(); - if (analyzerOpt.isEmpty()) { - log.warn("No analyzer registered for operation type {}; skipping", operationType); - return; - } - OperationAnalyzer analyzer = analyzerOpt.get(); + OperationAnalyzer analyzer = + analyzers.stream() + .filter(a -> a.getOperationType() == operationType) + .findFirst() + .orElseThrow( + () -> + new IllegalStateException( + "No analyzer registered for operation type " + operationType)); List dbs = databaseName.map(List::of).orElseGet(statsRepo::findDistinctDatabaseNames); log.info("Analyzing {} across {} database(s)", operationType, dbs.size()); dbs.forEach(db -> analyzeDatabase(analyzer, db, tableName, tableUuid)); From f788ab6ccaf514a20d5dbafa2beede576830b7c4 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 20 May 2026 09:39:35 -0700 Subject: [PATCH 085/104] fix(analyzer): point @EntityScan at the actual db package AnalyzerApplication scanned com.linkedin.openhouse.optimizer.entity for JPA-managed entities, but the entities live in com.linkedin.openhouse.optimizer.db. The app crashed at startup against a real database (MySQL backend in docker) with "Not a managed type: class ...db.TableStatsRow". Unit tests didn't catch it because they configure JPA differently. Co-Authored-By: Claude Opus 4.7 --- .../com/linkedin/openhouse/analyzer/AnalyzerApplication.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerApplication.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerApplication.java index edee9c02e..1b6250c5e 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerApplication.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerApplication.java @@ -10,7 +10,7 @@ /** Entry point for the Optimizer Analyzer application. */ @SpringBootApplication -@EntityScan(basePackages = "com.linkedin.openhouse.optimizer.entity") +@EntityScan(basePackages = "com.linkedin.openhouse.optimizer.db") @EnableJpaRepositories(basePackages = "com.linkedin.openhouse.optimizer.repository") public class AnalyzerApplication { From b31decf8a6cb93351ce5fd153b2740f1ea0329e3 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 20 May 2026 14:51:19 -0700 Subject: [PATCH 086/104] refactor(optimizer): move Dto suffix from api/spec to model MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reversal of an earlier inconsistency surfaced by abhisheknath2011 in the PR #527 review thread on api/spec/HistoryStatusDto.java. The api wire types are the canonical contract; they should carry the canonical name. The internal-model types are transfer objects between layers and now carry the Dto suffix. api/spec/ — Dto stripped from class + filename (10 files): CompleteOperationRequestDto -> CompleteOperationRequest HistoryStatusDto -> HistoryStatus OperationStatusDto -> OperationStatus OperationTypeDto -> OperationType TableOperationsDto -> TableOperations TableOperationsHistoryDto -> TableOperationsHistory TableStatsDto -> TableStats TableStatsHistoryDto -> TableStatsHistory TableStatsPayloadDto -> TableStatsPayload UpsertTableStatsRequestDto -> UpsertTableStatsRequest model/ — Dto added to class + filename (8 files): HistoryStatus -> HistoryStatusDto OperationStatus -> OperationStatusDto OperationType -> OperationTypeDto Table -> TableDto TableOperation -> TableOperationDto TableOperationsHistory -> TableOperationsHistoryDto TableStats -> TableStatsDto TableStatsHistory -> TableStatsHistoryDto Both renames land on opt-0 because opt-0 owns api/spec/ and model/. Cascade up the stack in follow-up commits. Out of scope here: HistoryStatus enum value additions (CANCELED, QUEUED) also raised in the same review thread; separate semantic change. Co-Authored-By: Claude Opus 4.7 --- ...Dto.java => CompleteOperationRequest.java} | 6 ++--- ...storyStatusDto.java => HistoryStatus.java} | 10 ++++---- ...ionStatusDto.java => OperationStatus.java} | 12 +++++----- ...erationTypeDto.java => OperationType.java} | 10 ++++---- ...perationsDto.java => TableOperations.java} | 20 ++++++++-------- ...ryDto.java => TableOperationsHistory.java} | 20 ++++++++-------- .../{TableStatsDto.java => TableStats.java} | 22 +++++++++-------- ...HistoryDto.java => TableStatsHistory.java} | 16 ++++++------- ...PayloadDto.java => TableStatsPayload.java} | 24 +++++++++---------- ...tDto.java => UpsertTableStatsRequest.java} | 16 +++++++------ ...storyStatus.java => HistoryStatusDto.java} | 2 +- ...ionStatus.java => OperationStatusDto.java} | 2 +- ...erationType.java => OperationTypeDto.java} | 2 +- .../model/{Table.java => TableDto.java} | 6 ++--- ...eOperation.java => TableOperationDto.java} | 16 ++++++------- ...ry.java => TableOperationsHistoryDto.java} | 8 +++---- .../{TableStats.java => TableStatsDto.java} | 2 +- ...History.java => TableStatsHistoryDto.java} | 4 ++-- 18 files changed, 101 insertions(+), 97 deletions(-) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/{CompleteOperationRequestDto.java => CompleteOperationRequest.java} (92%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/{HistoryStatusDto.java => HistoryStatus.java} (52%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/{OperationStatusDto.java => OperationStatus.java} (73%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/{OperationTypeDto.java => OperationType.java} (50%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/{TableOperationsDto.java => TableOperations.java} (80%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/{TableOperationsHistoryDto.java => TableOperationsHistory.java} (82%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/{TableStatsDto.java => TableStats.java} (70%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/{TableStatsHistoryDto.java => TableStatsHistory.java} (82%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/{TableStatsPayloadDto.java => TableStatsPayload.java} (81%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/{UpsertTableStatsRequestDto.java => UpsertTableStatsRequest.java} (71%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/{HistoryStatus.java => HistoryStatusDto.java} (93%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/{OperationStatus.java => OperationStatusDto.java} (95%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/{OperationType.java => OperationTypeDto.java} (92%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/{Table.java => TableDto.java} (93%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/{TableOperation.java => TableOperationDto.java} (80%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/{TableOperationsHistory.java => TableOperationsHistoryDto.java} (82%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/{TableStats.java => TableStatsDto.java} (99%) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/{TableStatsHistory.java => TableStatsHistoryDto.java} (94%) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/CompleteOperationRequestDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/CompleteOperationRequest.java similarity index 92% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/CompleteOperationRequestDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/CompleteOperationRequest.java index 9dca54a8e..15112882d 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/CompleteOperationRequestDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/CompleteOperationRequest.java @@ -25,13 +25,13 @@ @Builder @NoArgsConstructor @AllArgsConstructor -public class CompleteOperationRequestDto { +public class CompleteOperationRequest { /** Operation row's UUID — the primary lookup key. */ private String operationId; /** Terminal outcome for this single operation. */ - private HistoryStatusDto status; + private HistoryStatus status; /** Debug echo: stable table identity the caller believed it was completing. */ private String tableUuid; @@ -43,5 +43,5 @@ public class CompleteOperationRequestDto { private String tableName; /** Debug echo: operation type. */ - private OperationTypeDto operationType; + private OperationType operationType; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/HistoryStatusDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/HistoryStatus.java similarity index 52% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/HistoryStatusDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/HistoryStatus.java index 034be4cf2..1d799818f 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/HistoryStatusDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/HistoryStatus.java @@ -1,7 +1,7 @@ package com.linkedin.openhouse.optimizer.api.spec; /** Terminal states for a completed Spark maintenance job. */ -public enum HistoryStatusDto { +public enum HistoryStatus { /** The Spark job for this operation completed successfully. */ SUCCESS, @@ -10,12 +10,12 @@ public enum HistoryStatusDto { FAILED; /** Convert to the internal-model counterpart. */ - public com.linkedin.openhouse.optimizer.model.HistoryStatus toModel() { - return com.linkedin.openhouse.optimizer.model.HistoryStatus.valueOf(name()); + public com.linkedin.openhouse.optimizer.model.HistoryStatusDto toModel() { + return com.linkedin.openhouse.optimizer.model.HistoryStatusDto.valueOf(name()); } /** Build the api-layer enum from the internal-model counterpart. */ - public static HistoryStatusDto fromModel(com.linkedin.openhouse.optimizer.model.HistoryStatus v) { - return v == null ? null : HistoryStatusDto.valueOf(v.name()); + public static HistoryStatus fromModel(com.linkedin.openhouse.optimizer.model.HistoryStatusDto v) { + return v == null ? null : HistoryStatus.valueOf(v.name()); } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/OperationStatusDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/OperationStatus.java similarity index 73% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/OperationStatusDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/OperationStatus.java index f02ee2815..b1cbe42b0 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/OperationStatusDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/OperationStatus.java @@ -1,7 +1,7 @@ package com.linkedin.openhouse.optimizer.api.spec; /** Lifecycle states for a table operation recommendation. */ -public enum OperationStatusDto { +public enum OperationStatus { /** Recommended by the Analyzer but not yet claimed by the Scheduler. */ PENDING, @@ -20,13 +20,13 @@ public enum OperationStatusDto { CANCELED; /** Convert to the internal-model counterpart. */ - public com.linkedin.openhouse.optimizer.model.OperationStatus toModel() { - return com.linkedin.openhouse.optimizer.model.OperationStatus.valueOf(name()); + public com.linkedin.openhouse.optimizer.model.OperationStatusDto toModel() { + return com.linkedin.openhouse.optimizer.model.OperationStatusDto.valueOf(name()); } /** Build the api-layer enum from the internal-model counterpart. */ - public static OperationStatusDto fromModel( - com.linkedin.openhouse.optimizer.model.OperationStatus v) { - return v == null ? null : OperationStatusDto.valueOf(v.name()); + public static OperationStatus fromModel( + com.linkedin.openhouse.optimizer.model.OperationStatusDto v) { + return v == null ? null : OperationStatus.valueOf(v.name()); } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/OperationTypeDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/OperationType.java similarity index 50% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/OperationTypeDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/OperationType.java index 4e057b232..ea6d2797c 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/OperationTypeDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/OperationType.java @@ -1,17 +1,17 @@ package com.linkedin.openhouse.optimizer.api.spec; /** Maintenance operation types supported by the continuous optimizer. */ -public enum OperationTypeDto { +public enum OperationType { /** Removes orphaned data files no longer referenced by table metadata. */ ORPHAN_FILES_DELETION; /** Convert to the internal-model counterpart. */ - public com.linkedin.openhouse.optimizer.model.OperationType toModel() { - return com.linkedin.openhouse.optimizer.model.OperationType.valueOf(name()); + public com.linkedin.openhouse.optimizer.model.OperationTypeDto toModel() { + return com.linkedin.openhouse.optimizer.model.OperationTypeDto.valueOf(name()); } /** Build the api-layer enum from the internal-model counterpart. */ - public static OperationTypeDto fromModel(com.linkedin.openhouse.optimizer.model.OperationType v) { - return v == null ? null : OperationTypeDto.valueOf(v.name()); + public static OperationType fromModel(com.linkedin.openhouse.optimizer.model.OperationTypeDto v) { + return v == null ? null : OperationType.valueOf(v.name()); } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperationsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperations.java similarity index 80% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperationsDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperations.java index 496f59f42..60f2c3dd8 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperationsDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperations.java @@ -1,6 +1,6 @@ package com.linkedin.openhouse.optimizer.api.spec; -import com.linkedin.openhouse.optimizer.model.TableOperation; +import com.linkedin.openhouse.optimizer.model.TableOperationDto; import java.time.Instant; import lombok.AllArgsConstructor; import lombok.Builder; @@ -12,7 +12,7 @@ @Builder @NoArgsConstructor @AllArgsConstructor -public class TableOperationsDto { +public class TableOperations { /** Client-generated UUID identifying this specific operation recommendation. */ private String id; @@ -27,10 +27,10 @@ public class TableOperationsDto { private String tableName; /** The type of maintenance operation (e.g. ORPHAN_FILES_DELETION). */ - private OperationTypeDto operationType; + private OperationType operationType; /** {@code PENDING} or {@code SCHEDULED}. Defaults to {@code PENDING} on creation. */ - private OperationStatusDto status; + private OperationStatus status; /** Server-set when the row is first created by the Analyzer. */ private Instant createdAt; @@ -42,8 +42,8 @@ public class TableOperationsDto { private String jobId; /** Convert to the internal-model counterpart. */ - public TableOperation toModel() { - return TableOperation.builder() + public TableOperationDto toModel() { + return TableOperationDto.builder() .id(id) .tableUuid(tableUuid) .databaseName(databaseName) @@ -56,17 +56,17 @@ public TableOperation toModel() { } /** Build a wire DTO from the internal-model counterpart. */ - public static TableOperationsDto fromModel(TableOperation op) { + public static TableOperations fromModel(TableOperationDto op) { if (op == null) { return null; } - return TableOperationsDto.builder() + return TableOperations.builder() .id(op.getId()) .tableUuid(op.getTableUuid()) .databaseName(op.getDatabaseName()) .tableName(op.getTableName()) - .operationType(OperationTypeDto.fromModel(op.getOperationType())) - .status(OperationStatusDto.fromModel(op.getStatus())) + .operationType(OperationType.fromModel(op.getOperationType())) + .status(OperationStatus.fromModel(op.getStatus())) .createdAt(op.getCreatedAt()) .scheduledAt(op.getScheduledAt()) .build(); diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperationsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperationsHistory.java similarity index 82% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperationsHistoryDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperationsHistory.java index 8b508bf36..7a000f840 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperationsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperationsHistory.java @@ -1,6 +1,6 @@ package com.linkedin.openhouse.optimizer.api.spec; -import com.linkedin.openhouse.optimizer.model.TableOperationsHistory; +import com.linkedin.openhouse.optimizer.model.TableOperationsHistoryDto; import java.time.Instant; import lombok.AllArgsConstructor; import lombok.Builder; @@ -12,7 +12,7 @@ @Builder @NoArgsConstructor @AllArgsConstructor -public class TableOperationsHistoryDto { +public class TableOperationsHistory { /** Same UUID as the originating {@code table_operations.id}; supplied by the caller. */ private String id; @@ -27,17 +27,17 @@ public class TableOperationsHistoryDto { private String tableName; /** The type of maintenance operation this history row records. */ - private OperationTypeDto operationType; + private OperationType operationType; /** When the operation completed, as recorded by the complete endpoint. */ private Instant completedAt; /** {@code SUCCESS} or {@code FAILED}. */ - private HistoryStatusDto status; + private HistoryStatus status; /** Convert to the internal-model counterpart. */ - public TableOperationsHistory toModel() { - return TableOperationsHistory.builder() + public TableOperationsHistoryDto toModel() { + return TableOperationsHistoryDto.builder() .id(id) .tableUuid(tableUuid) .databaseName(databaseName) @@ -49,18 +49,18 @@ public TableOperationsHistory toModel() { } /** Build a wire DTO from the internal-model counterpart. */ - public static TableOperationsHistoryDto fromModel(TableOperationsHistory h) { + public static TableOperationsHistory fromModel(TableOperationsHistoryDto h) { if (h == null) { return null; } - return TableOperationsHistoryDto.builder() + return TableOperationsHistory.builder() .id(h.getId()) .tableUuid(h.getTableUuid()) .databaseName(h.getDatabaseName()) .tableName(h.getTableName()) - .operationType(OperationTypeDto.fromModel(h.getOperationType())) + .operationType(OperationType.fromModel(h.getOperationType())) .completedAt(h.getCompletedAt()) - .status(HistoryStatusDto.fromModel(h.getStatus())) + .status(HistoryStatus.fromModel(h.getStatus())) .build(); } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStats.java similarity index 70% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStats.java index 165ae47dc..41f44f763 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStats.java @@ -13,7 +13,7 @@ @Builder @NoArgsConstructor @AllArgsConstructor -public class TableStatsDto { +public class TableStats { /** Stable Iceberg table UUID. Primary key of the stats row. */ private String tableUuid; @@ -25,7 +25,7 @@ public class TableStatsDto { private String tableName; /** Combined snapshot + delta stats payload, stored as JSON. */ - private TableStatsPayloadDto stats; + private TableStatsPayload stats; /** Current table properties snapshot (e.g. maintenance opt-in flags). */ private Map tableProperties; @@ -34,9 +34,11 @@ public class TableStatsDto { private Instant updatedAt; /** Convert to the internal-model counterpart. */ - public com.linkedin.openhouse.optimizer.model.TableStats toModel() { - com.linkedin.openhouse.optimizer.model.TableStats payload = - stats == null ? new com.linkedin.openhouse.optimizer.model.TableStats() : stats.toModel(); + public com.linkedin.openhouse.optimizer.model.TableStatsDto toModel() { + com.linkedin.openhouse.optimizer.model.TableStatsDto payload = + stats == null + ? new com.linkedin.openhouse.optimizer.model.TableStatsDto() + : stats.toModel(); return payload .toBuilder() .tableUuid(tableUuid) @@ -48,18 +50,18 @@ public com.linkedin.openhouse.optimizer.model.TableStats toModel() { } /** Build a wire DTO from the internal-model counterpart. */ - public static TableStatsDto fromModel(com.linkedin.openhouse.optimizer.model.TableStats m) { + public static TableStats fromModel(com.linkedin.openhouse.optimizer.model.TableStatsDto m) { if (m == null) { return null; } - return TableStatsDto.builder() + return TableStats.builder() .tableUuid(m.getTableUuid()) .databaseName(m.getDatabaseName()) .tableName(m.getTableName()) .stats( - TableStatsPayloadDto.builder() - .snapshot(TableStatsPayloadDto.SnapshotMetricsDto.fromModel(m.getSnapshot())) - .delta(TableStatsPayloadDto.CommitDeltaDto.fromModel(m.getDelta())) + TableStatsPayload.builder() + .snapshot(TableStatsPayload.SnapshotMetricsDto.fromModel(m.getSnapshot())) + .delta(TableStatsPayload.CommitDeltaDto.fromModel(m.getDelta())) .build()) .tableProperties(m.getTableProperties()) .updatedAt(m.getUpdatedAt()) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsHistory.java similarity index 82% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsHistoryDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsHistory.java index 9e7c44c56..5508aca27 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsHistory.java @@ -1,6 +1,6 @@ package com.linkedin.openhouse.optimizer.api.spec; -import com.linkedin.openhouse.optimizer.model.TableStatsHistory; +import com.linkedin.openhouse.optimizer.model.TableStatsHistoryDto; import java.time.Instant; import lombok.AllArgsConstructor; import lombok.Builder; @@ -12,7 +12,7 @@ @Builder @NoArgsConstructor @AllArgsConstructor -public class TableStatsHistoryDto { +public class TableStatsHistory { /** UUID primary key set by the caller. */ private String id; @@ -27,14 +27,14 @@ public class TableStatsHistoryDto { private String tableName; /** Snapshot + delta stats from this commit event. */ - private TableStatsPayloadDto stats; + private TableStatsPayload stats; /** When this history row was recorded. */ private Instant recordedAt; /** Convert to the internal-model counterpart. */ - public TableStatsHistory toModel() { - return TableStatsHistory.builder() + public TableStatsHistoryDto toModel() { + return TableStatsHistoryDto.builder() .id(id) .tableUuid(tableUuid) .databaseName(databaseName) @@ -45,16 +45,16 @@ public TableStatsHistory toModel() { } /** Build a wire DTO from the internal-model counterpart. */ - public static TableStatsHistoryDto fromModel(TableStatsHistory h) { + public static TableStatsHistory fromModel(TableStatsHistoryDto h) { if (h == null) { return null; } - return TableStatsHistoryDto.builder() + return TableStatsHistory.builder() .id(h.getId()) .tableUuid(h.getTableUuid()) .databaseName(h.getDatabaseName()) .tableName(h.getTableName()) - .stats(TableStatsPayloadDto.fromModel(h.getStats())) + .stats(TableStatsPayload.fromModel(h.getStats())) .recordedAt(h.getRecordedAt()) .build(); } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsPayloadDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsPayload.java similarity index 81% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsPayloadDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsPayload.java index 761471f91..c347bf385 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsPayloadDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableStatsPayload.java @@ -17,7 +17,7 @@ @NoArgsConstructor @AllArgsConstructor @JsonIgnoreProperties(ignoreUnknown = true) -public class TableStatsPayloadDto { +public class TableStatsPayload { /** Snapshot fields — overwritten on every upsert. */ private SnapshotMetricsDto snapshot; @@ -26,20 +26,20 @@ public class TableStatsPayloadDto { private CommitDeltaDto delta; /** Convert to the internal-model counterpart. */ - public com.linkedin.openhouse.optimizer.model.TableStats toModel() { - return com.linkedin.openhouse.optimizer.model.TableStats.builder() + public com.linkedin.openhouse.optimizer.model.TableStatsDto toModel() { + return com.linkedin.openhouse.optimizer.model.TableStatsDto.builder() .snapshot(snapshot == null ? null : snapshot.toModel()) .delta(delta == null ? null : delta.toModel()) .build(); } /** Build the api-layer payload from the internal-model counterpart. */ - public static TableStatsPayloadDto fromModel( - com.linkedin.openhouse.optimizer.model.TableStats m) { + public static TableStatsPayload fromModel( + com.linkedin.openhouse.optimizer.model.TableStatsDto m) { if (m == null) { return null; } - return TableStatsPayloadDto.builder() + return TableStatsPayload.builder() .snapshot(SnapshotMetricsDto.fromModel(m.getSnapshot())) .delta(CommitDeltaDto.fromModel(m.getDelta())) .build(); @@ -66,8 +66,8 @@ public static class SnapshotMetricsDto { private Long numCurrentFiles; /** Convert to the internal-model counterpart. */ - public com.linkedin.openhouse.optimizer.model.TableStats.SnapshotMetrics toModel() { - return com.linkedin.openhouse.optimizer.model.TableStats.SnapshotMetrics.builder() + public com.linkedin.openhouse.optimizer.model.TableStatsDto.SnapshotMetrics toModel() { + return com.linkedin.openhouse.optimizer.model.TableStatsDto.SnapshotMetrics.builder() .tableVersion(tableVersion) .tableLocation(tableLocation) .tableSizeBytes(tableSizeBytes) @@ -77,7 +77,7 @@ public com.linkedin.openhouse.optimizer.model.TableStats.SnapshotMetrics toModel /** Build the api-layer inner object from the internal-model counterpart. */ public static SnapshotMetricsDto fromModel( - com.linkedin.openhouse.optimizer.model.TableStats.SnapshotMetrics m) { + com.linkedin.openhouse.optimizer.model.TableStatsDto.SnapshotMetrics m) { if (m == null) { return null; } @@ -111,8 +111,8 @@ public static class CommitDeltaDto { private Long deletedSizeBytes; /** Convert to the internal-model counterpart. */ - public com.linkedin.openhouse.optimizer.model.TableStats.CommitDelta toModel() { - return com.linkedin.openhouse.optimizer.model.TableStats.CommitDelta.builder() + public com.linkedin.openhouse.optimizer.model.TableStatsDto.CommitDelta toModel() { + return com.linkedin.openhouse.optimizer.model.TableStatsDto.CommitDelta.builder() .numFilesAdded(numFilesAdded) .numFilesDeleted(numFilesDeleted) .addedSizeBytes(addedSizeBytes) @@ -122,7 +122,7 @@ public com.linkedin.openhouse.optimizer.model.TableStats.CommitDelta toModel() { /** Build the api-layer inner object from the internal-model counterpart. */ public static CommitDeltaDto fromModel( - com.linkedin.openhouse.optimizer.model.TableStats.CommitDelta m) { + com.linkedin.openhouse.optimizer.model.TableStatsDto.CommitDelta m) { if (m == null) { return null; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpsertTableStatsRequestDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpsertTableStatsRequest.java similarity index 71% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpsertTableStatsRequestDto.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpsertTableStatsRequest.java index 3e1fe4764..d1b4a5fe2 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpsertTableStatsRequestDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpsertTableStatsRequest.java @@ -17,7 +17,7 @@ @Builder @NoArgsConstructor @AllArgsConstructor -public class UpsertTableStatsRequestDto { +public class UpsertTableStatsRequest { /** Denormalized database name for display. */ private String databaseName; @@ -26,19 +26,21 @@ public class UpsertTableStatsRequestDto { private String tableName; /** Combined snapshot + delta stats payload from this commit. */ - private TableStatsPayloadDto stats; + private TableStatsPayload stats; /** Current table properties snapshot (e.g. maintenance opt-in flags). */ private Map tableProperties; /** - * Build the internal-model {@link com.linkedin.openhouse.optimizer.model.TableStats} described by - * this request. {@code tableUuid} comes from the URL path, not the body. {@code updatedAt} is + * Build the internal-model {@link com.linkedin.openhouse.optimizer.model.TableStatsDto} described + * by this request. {@code tableUuid} comes from the URL path, not the body. {@code updatedAt} is * left {@code null}; the service stamps it server-side at write time. */ - public com.linkedin.openhouse.optimizer.model.TableStats toModel(String tableUuid) { - com.linkedin.openhouse.optimizer.model.TableStats payload = - stats == null ? new com.linkedin.openhouse.optimizer.model.TableStats() : stats.toModel(); + public com.linkedin.openhouse.optimizer.model.TableStatsDto toModel(String tableUuid) { + com.linkedin.openhouse.optimizer.model.TableStatsDto payload = + stats == null + ? new com.linkedin.openhouse.optimizer.model.TableStatsDto() + : stats.toModel(); return payload .toBuilder() .tableUuid(tableUuid) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatusDto.java similarity index 93% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatusDto.java index 97b8e2992..463c62605 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatus.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/HistoryStatusDto.java @@ -7,7 +7,7 @@ * *

Intentionally separate from the wire-API and DB representations. */ -public enum HistoryStatus { +public enum HistoryStatusDto { /** The operation completed successfully. */ SUCCESS, diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatusDto.java similarity index 95% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatusDto.java index f284fedaf..b766f7dbe 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatus.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationStatusDto.java @@ -7,7 +7,7 @@ * *

Intentionally separate from the wire-API and DB representations. */ -public enum OperationStatus { +public enum OperationStatusDto { /** Analyzer has written the row; not yet claimed by the scheduler. */ PENDING, diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationTypeDto.java similarity index 92% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationTypeDto.java index 8f4fe35a8..39b299806 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationType.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/OperationTypeDto.java @@ -5,7 +5,7 @@ * separate from the wire-API and DB representations so the internal model can evolve its set of * supported operations without churning either boundary. */ -public enum OperationType { +public enum OperationTypeDto { /** Removes orphaned data files no longer referenced by table metadata. */ ORPHAN_FILES_DELETION diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableDto.java similarity index 93% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableDto.java index 089a52982..408bc4fc7 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/Table.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableDto.java @@ -10,7 +10,7 @@ /** * An OpenHouse table enriched with stats and properties, built by combining data sources. Consumed - * by the analyzer (decides whether to produce a {@link TableOperation}) and the scheduler (reads + * by the analyzer (decides whether to produce a {@link TableOperationDto}) and the scheduler (reads * stats for bin-packing). * *

Pure internal-model type — no references to wire-API or DB types. Construct via {@link @@ -20,7 +20,7 @@ @Builder(toBuilder = true) @NoArgsConstructor @AllArgsConstructor -public class Table { +public class TableDto { /** Stable table identity from the Tables Service. Survives renames; rotates on drop+recreate. */ private String tableUuid; @@ -35,7 +35,7 @@ public class Table { @Builder.Default private Map tableProperties = Collections.emptyMap(); /** Latest snapshot stats for this table. Delta is null when read from the current-state row. */ - private TableStats stats; + private TableStatsDto stats; /** When the current snapshot was last written. Stamped server-side on every upsert. */ private Instant updatedAt; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationDto.java similarity index 80% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationDto.java index fe91c38d0..8809a1b62 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationDto.java @@ -20,7 +20,7 @@ @Builder @NoArgsConstructor @AllArgsConstructor -public class TableOperation { +public class TableOperationDto { /** Unique operation ID (UUID). */ private String id; @@ -35,10 +35,10 @@ public class TableOperation { private String tableName; /** Operation type. */ - private OperationType operationType; + private OperationTypeDto operationType; /** Current lifecycle status. */ - private OperationStatus status; + private OperationStatusDto status; /** When this operation record was created. */ private Instant createdAt; @@ -47,21 +47,21 @@ public class TableOperation { private Instant scheduledAt; /** Create a new PENDING operation for the given table and operation type. */ - public static TableOperation pending(Table table, OperationType operationType) { - return TableOperation.builder() + public static TableOperationDto pending(TableDto table, OperationTypeDto operationType) { + return TableOperationDto.builder() .id(UUID.randomUUID().toString()) .tableUuid(table.getTableUuid()) .databaseName(table.getDatabaseName()) .tableName(table.getTableId()) .operationType(operationType) - .status(OperationStatus.PENDING) + .status(OperationStatusDto.PENDING) .createdAt(Instant.now()) .build(); } /** Return the more recently created of two operations. */ - public static TableOperation mostRecent(TableOperation a, TableOperation b) { - Comparator byCreatedAt = + public static TableOperationDto mostRecent(TableOperationDto a, TableOperationDto b) { + Comparator byCreatedAt = Comparator.comparing(r -> r.getCreatedAt() != null ? r.getCreatedAt() : Instant.EPOCH); return byCreatedAt.compare(a, b) >= 0 ? a : b; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistoryDto.java similarity index 82% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistoryDto.java index c8950ee26..e05bb641e 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistoryDto.java @@ -16,7 +16,7 @@ @Builder(toBuilder = true) @NoArgsConstructor @AllArgsConstructor -public class TableOperationsHistory { +public class TableOperationsHistoryDto { /** Same UUID as the originating live-operations row. */ private String id; @@ -31,11 +31,11 @@ public class TableOperationsHistory { private String tableName; /** Operation type for this completed run. */ - private OperationType operationType; + private OperationTypeDto operationType; /** When the operation completed, as recorded by the complete endpoint. */ private Instant completedAt; - /** Terminal outcome: {@link HistoryStatus#SUCCESS} or {@link HistoryStatus#FAILED}. */ - private HistoryStatus status; + /** Terminal outcome: {@link HistoryStatusDto#SUCCESS} or {@link HistoryStatusDto#FAILED}. */ + private HistoryStatusDto status; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsDto.java similarity index 99% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsDto.java index 906d01669..d142dcc8b 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStats.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsDto.java @@ -24,7 +24,7 @@ @NoArgsConstructor @AllArgsConstructor @JsonIgnoreProperties(ignoreUnknown = true) -public class TableStats { +public class TableStatsDto { /** Stable table identity from the Tables Service. Survives renames; rotates on drop+recreate. */ private String tableUuid; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsHistory.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsHistoryDto.java similarity index 94% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsHistory.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsHistoryDto.java index 53bb54d1e..5579c95ed 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsHistory.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableStatsHistoryDto.java @@ -18,7 +18,7 @@ @Builder @NoArgsConstructor @AllArgsConstructor -public class TableStatsHistory { +public class TableStatsHistoryDto { /** UUID primary key — set by the caller, not generated server-side. */ private String id; @@ -33,7 +33,7 @@ public class TableStatsHistory { private String tableName; /** Snapshot + delta for this commit event. */ - private TableStats stats; + private TableStatsDto stats; /** When this history row was recorded. */ private Instant recordedAt; From 91e89efa44712cd0483cfd8ca0748e60e174b1f9 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 20 May 2026 14:57:05 -0700 Subject: [PATCH 087/104] refactor(optimizer): update controller + service refs after Dto suffix swap Follow-up to the api/spec strip + model add-Dto rename on opt-0 (commit b31decf8). On opt-2 the merged content compiles after updating the consumer-side refs: api controllers (3 files): strip Dto from api type names (now canonical: TableOperations, CompleteOperationRequest, etc.) service interface + impl + test: add Dto to model type names (now: TableOperationDto, TableStatsDto, HistoryStatusDto, etc.) Co-Authored-By: Claude Opus 4.7 --- .../controller/TableOperationsController.java | 34 ++++----- .../TableOperationsHistoryController.java | 14 ++-- .../api/controller/TableStatsController.java | 28 ++++---- .../service/OptimizerDataService.java | 43 ++++++------ .../service/OptimizerDataServiceImpl.java | 70 +++++++++---------- .../service/OptimizerDataServiceImplTest.java | 54 +++++++------- 6 files changed, 123 insertions(+), 120 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java index accf6d543..6f9d6a177 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java @@ -1,10 +1,10 @@ package com.linkedin.openhouse.optimizer.api.controller; -import com.linkedin.openhouse.optimizer.api.spec.CompleteOperationRequestDto; -import com.linkedin.openhouse.optimizer.api.spec.OperationStatusDto; -import com.linkedin.openhouse.optimizer.api.spec.OperationTypeDto; -import com.linkedin.openhouse.optimizer.api.spec.TableOperationsDto; -import com.linkedin.openhouse.optimizer.api.spec.TableOperationsHistoryDto; +import com.linkedin.openhouse.optimizer.api.spec.CompleteOperationRequest; +import com.linkedin.openhouse.optimizer.api.spec.OperationStatus; +import com.linkedin.openhouse.optimizer.api.spec.OperationType; +import com.linkedin.openhouse.optimizer.api.spec.TableOperations; +import com.linkedin.openhouse.optimizer.api.spec.TableOperationsHistory; import com.linkedin.openhouse.optimizer.service.OptimizerDataService; import java.util.List; import java.util.Optional; @@ -35,8 +35,8 @@ public class TableOperationsController { * row, or 404 if the operation does not exist. */ @PostMapping("/complete") - public ResponseEntity completeOperation( - @RequestBody CompleteOperationRequestDto request) { + public ResponseEntity completeOperation( + @RequestBody CompleteOperationRequest request) { return service .completeOperation( request.getOperationId(), @@ -44,16 +44,16 @@ public ResponseEntity completeOperation( .map( history -> ResponseEntity.status(HttpStatus.CREATED) - .body(TableOperationsHistoryDto.fromModel(history))) + .body(TableOperationsHistory.fromModel(history))) .orElse(ResponseEntity.notFound().build()); } /** Fetch a single operation row by its ID, regardless of status. Returns 404 if not found. */ @GetMapping("/{id}") - public ResponseEntity getTableOperation(@PathVariable String id) { + public ResponseEntity getTableOperation(@PathVariable String id) { return service .getTableOperation(id) - .map(TableOperationsDto::fromModel) + .map(TableOperations::fromModel) .map(ResponseEntity::ok) .orElse(ResponseEntity.notFound().build()); } @@ -63,22 +63,22 @@ public ResponseEntity getTableOperation(@PathVariable String * every row. */ @GetMapping - public ResponseEntity> listTableOperations( - @RequestParam(required = false) OperationTypeDto operationType, - @RequestParam(required = false) OperationStatusDto status, + public ResponseEntity> listTableOperations( + @RequestParam(required = false) OperationType operationType, + @RequestParam(required = false) OperationStatus status, @RequestParam(required = false) String databaseName, @RequestParam(required = false) String tableName, @RequestParam(required = false) String tableUuid) { - List result = + List result = service .listTableOperations( - Optional.ofNullable(operationType).map(OperationTypeDto::toModel), - Optional.ofNullable(status).map(OperationStatusDto::toModel), + Optional.ofNullable(operationType).map(OperationType::toModel), + Optional.ofNullable(status).map(OperationStatus::toModel), Optional.ofNullable(databaseName), Optional.ofNullable(tableName), Optional.ofNullable(tableUuid)) .stream() - .map(TableOperationsDto::fromModel) + .map(TableOperations::fromModel) .collect(Collectors.toList()); return ResponseEntity.ok(result); } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java index 124697f10..36c422623 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java @@ -1,6 +1,6 @@ package com.linkedin.openhouse.optimizer.api.controller; -import com.linkedin.openhouse.optimizer.api.spec.TableOperationsHistoryDto; +import com.linkedin.openhouse.optimizer.api.spec.TableOperationsHistory; import com.linkedin.openhouse.optimizer.service.OptimizerDataService; import java.util.List; import java.util.stream.Collectors; @@ -25,19 +25,19 @@ public class TableOperationsHistoryController { /** Append a completed-job result. Called by the SparkJob after each run (success or failure). */ @PostMapping - public ResponseEntity appendHistory( - @RequestBody TableOperationsHistoryDto dto) { + public ResponseEntity appendHistory( + @RequestBody TableOperationsHistory dto) { return ResponseEntity.status(HttpStatus.CREATED) - .body(TableOperationsHistoryDto.fromModel(service.appendHistory(dto.toModel()))); + .body(TableOperationsHistory.fromModel(service.appendHistory(dto.toModel()))); } /** Return the most recent history for a table, newest first, up to {@code limit} rows. */ @GetMapping("/{tableUuid}") - public ResponseEntity> getHistory( + public ResponseEntity> getHistory( @PathVariable String tableUuid, @RequestParam(defaultValue = "100") int limit) { - List result = + List result = service.getHistory(tableUuid, limit).stream() - .map(TableOperationsHistoryDto::fromModel) + .map(TableOperationsHistory::fromModel) .collect(Collectors.toList()); return ResponseEntity.ok(result); } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java index 19dcbabb9..7cb745250 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java @@ -1,8 +1,8 @@ package com.linkedin.openhouse.optimizer.api.controller; -import com.linkedin.openhouse.optimizer.api.spec.TableStatsDto; -import com.linkedin.openhouse.optimizer.api.spec.TableStatsHistoryDto; -import com.linkedin.openhouse.optimizer.api.spec.UpsertTableStatsRequestDto; +import com.linkedin.openhouse.optimizer.api.spec.TableStats; +import com.linkedin.openhouse.optimizer.api.spec.TableStatsHistory; +import com.linkedin.openhouse.optimizer.api.spec.UpsertTableStatsRequest; import com.linkedin.openhouse.optimizer.service.OptimizerDataService; import java.time.Instant; import java.util.List; @@ -31,18 +31,18 @@ public class TableStatsController { * Iceberg commit. Idempotent. */ @PutMapping("/{tableUuid}") - public ResponseEntity upsertTableStats( - @PathVariable String tableUuid, @RequestBody UpsertTableStatsRequestDto request) { + public ResponseEntity upsertTableStats( + @PathVariable String tableUuid, @RequestBody UpsertTableStatsRequest request) { return ResponseEntity.ok( - TableStatsDto.fromModel(service.upsertTableStats(request.toModel(tableUuid)))); + TableStats.fromModel(service.upsertTableStats(request.toModel(tableUuid)))); } /** Fetch the stats row for {@code tableUuid}. Returns 404 if no stats have been written yet. */ @GetMapping("/{tableUuid}") - public ResponseEntity getTableStats(@PathVariable String tableUuid) { + public ResponseEntity getTableStats(@PathVariable String tableUuid) { return service .getTableStats(tableUuid) - .map(TableStatsDto::fromModel) + .map(TableStats::fromModel) .map(ResponseEntity::ok) .orElse(ResponseEntity.notFound().build()); } @@ -52,18 +52,18 @@ public ResponseEntity getTableStats(@PathVariable String tableUui * every row. */ @GetMapping - public ResponseEntity> listTableStats( + public ResponseEntity> listTableStats( @RequestParam(required = false) String databaseName, @RequestParam(required = false) String tableName, @RequestParam(required = false) String tableUuid) { - List result = + List result = service .listTableStats( Optional.ofNullable(databaseName), Optional.ofNullable(tableName), Optional.ofNullable(tableUuid)) .stream() - .map(TableStatsDto::fromModel) + .map(TableStats::fromModel) .collect(Collectors.toList()); return ResponseEntity.ok(result); } @@ -73,13 +73,13 @@ public ResponseEntity> listTableStats( * {@code since} (inclusive) and cap at {@code limit} rows. */ @GetMapping("/{tableUuid}/history") - public ResponseEntity> getStatsHistory( + public ResponseEntity> getStatsHistory( @PathVariable String tableUuid, @RequestParam(required = false) Instant since, @RequestParam(defaultValue = "100") int limit) { - List result = + List result = service.getStatsHistory(tableUuid, Optional.ofNullable(since), limit).stream() - .map(TableStatsHistoryDto::fromModel) + .map(TableStatsHistory::fromModel) .collect(Collectors.toList()); return ResponseEntity.ok(result); } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java index 5d5edaee2..c32a67bae 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java @@ -1,12 +1,12 @@ package com.linkedin.openhouse.optimizer.service; -import com.linkedin.openhouse.optimizer.model.HistoryStatus; -import com.linkedin.openhouse.optimizer.model.OperationStatus; -import com.linkedin.openhouse.optimizer.model.OperationType; -import com.linkedin.openhouse.optimizer.model.TableOperation; -import com.linkedin.openhouse.optimizer.model.TableOperationsHistory; -import com.linkedin.openhouse.optimizer.model.TableStats; -import com.linkedin.openhouse.optimizer.model.TableStatsHistory; +import com.linkedin.openhouse.optimizer.model.HistoryStatusDto; +import com.linkedin.openhouse.optimizer.model.OperationStatusDto; +import com.linkedin.openhouse.optimizer.model.OperationTypeDto; +import com.linkedin.openhouse.optimizer.model.TableOperationDto; +import com.linkedin.openhouse.optimizer.model.TableOperationsHistoryDto; +import com.linkedin.openhouse.optimizer.model.TableStatsDto; +import com.linkedin.openhouse.optimizer.model.TableStatsHistoryDto; import java.time.Instant; import java.util.List; import java.util.Optional; @@ -26,9 +26,9 @@ public interface OptimizerDataService { * List operations matching the given filters. Every parameter is optional — pass {@link * Optional#empty()} to skip that filter. No filters returns all rows. */ - List listTableOperations( - Optional operationType, - Optional status, + List listTableOperations( + Optional operationType, + Optional status, Optional databaseName, Optional tableName, Optional tableUuid); @@ -39,31 +39,32 @@ List listTableOperations( * {@code status}, and saves it. Returns the history record, or empty if the operation does not * exist. */ - Optional completeOperation(String operationId, HistoryStatus status); + Optional completeOperation( + String operationId, HistoryStatusDto status); /** * Return the operation row for {@code id} regardless of status, or empty if it does not exist. * Used to poll a specific operation (e.g. waiting for SUCCESS after a Spark job completes). */ - Optional getTableOperation(String id); + Optional getTableOperation(String id); - // --- TableStats --- + // --- TableStatsDto --- /** * Create or update the stats row for {@code stats.getTableUuid()}. Fully idempotent: the same * call overwrites the previous snapshot with the latest commit values. The service stamps {@link - * TableStats#getUpdatedAt()} server-side and returns the resulting {@link TableStats}. + * TableStatsDto#getUpdatedAt()} server-side and returns the resulting {@link TableStatsDto}. */ - TableStats upsertTableStats(TableStats stats); + TableStatsDto upsertTableStats(TableStatsDto stats); /** Return the stats row for {@code tableUuid}, or empty if none exists. */ - Optional getTableStats(String tableUuid); + Optional getTableStats(String tableUuid); /** * List stats rows matching the given filters. Every parameter is optional — pass {@link * Optional#empty()} to skip that filter. No filters returns all rows. */ - List listTableStats( + List listTableStats( Optional databaseName, Optional tableName, Optional tableUuid); /** @@ -73,12 +74,12 @@ List listTableStats( * @param since if present, only return rows recorded at or after this instant * @param limit maximum number of rows to return */ - List getStatsHistory(String tableUuid, Optional since, int limit); + List getStatsHistory(String tableUuid, Optional since, int limit); - // --- TableOperationsHistory --- + // --- TableOperationsHistoryDto --- /** Append a completed-job result record. */ - TableOperationsHistory appendHistory(TableOperationsHistory history); + TableOperationsHistoryDto appendHistory(TableOperationsHistoryDto history); /** * Return the most recent history rows for a table UUID, newest first. @@ -86,5 +87,5 @@ List listTableStats( * @param tableUuid the stable table UUID * @param limit maximum number of rows to return */ - List getHistory(String tableUuid, int limit); + List getHistory(String tableUuid, int limit); } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java index 633411e98..a9ead77ce 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java @@ -2,13 +2,13 @@ import com.linkedin.openhouse.optimizer.db.TableStatsHistoryRow; import com.linkedin.openhouse.optimizer.db.TableStatsRow; -import com.linkedin.openhouse.optimizer.model.HistoryStatus; -import com.linkedin.openhouse.optimizer.model.OperationStatus; -import com.linkedin.openhouse.optimizer.model.OperationType; -import com.linkedin.openhouse.optimizer.model.TableOperation; -import com.linkedin.openhouse.optimizer.model.TableOperationsHistory; -import com.linkedin.openhouse.optimizer.model.TableStats; -import com.linkedin.openhouse.optimizer.model.TableStatsHistory; +import com.linkedin.openhouse.optimizer.model.HistoryStatusDto; +import com.linkedin.openhouse.optimizer.model.OperationStatusDto; +import com.linkedin.openhouse.optimizer.model.OperationTypeDto; +import com.linkedin.openhouse.optimizer.model.TableOperationDto; +import com.linkedin.openhouse.optimizer.model.TableOperationsHistoryDto; +import com.linkedin.openhouse.optimizer.model.TableStatsDto; +import com.linkedin.openhouse.optimizer.model.TableStatsHistoryDto; import com.linkedin.openhouse.optimizer.repository.TableOperationsHistoryRepository; import com.linkedin.openhouse.optimizer.repository.TableOperationsRepository; import com.linkedin.openhouse.optimizer.repository.TableStatsHistoryRepository; @@ -42,54 +42,54 @@ public class OptimizerDataServiceImpl implements OptimizerDataService { // --- TableOperations --- @Override - public List listTableOperations( - Optional operationType, - Optional status, + public List listTableOperations( + Optional operationType, + Optional status, Optional databaseName, Optional tableName, Optional tableUuid) { return operationsRepository .find( - operationType.map(OperationType::toDb).orElse(null), - status.map(OperationStatus::toDb).orElse(null), + operationType.map(OperationTypeDto::toDb).orElse(null), + status.map(OperationStatusDto::toDb).orElse(null), tableUuid.orElse(null), databaseName.orElse(null), tableName.orElse(null)) .stream() - .map(TableOperation::fromRow) + .map(TableOperationDto::fromRow) .collect(Collectors.toList()); } @Override @Transactional - public Optional completeOperation( - String operationId, HistoryStatus status) { + public Optional completeOperation( + String operationId, HistoryStatusDto status) { return operationsRepository .findById(operationId) .map( row -> - TableOperationsHistory.builder() + TableOperationsHistoryDto.builder() .id(row.getId()) .tableUuid(row.getTableUuid()) .databaseName(row.getDatabaseName()) .tableName(row.getTableName()) - .operationType(OperationType.fromDb(row.getOperationType())) + .operationType(OperationTypeDto.fromDb(row.getOperationType())) .completedAt(Instant.now()) .status(status) .build()) - .map(history -> TableOperationsHistory.fromRow(historyRepository.save(history.toRow()))); + .map(history -> TableOperationsHistoryDto.fromRow(historyRepository.save(history.toRow()))); } @Override - public Optional getTableOperation(String id) { - return operationsRepository.findById(id).map(TableOperation::fromRow); + public Optional getTableOperation(String id) { + return operationsRepository.findById(id).map(TableOperationDto::fromRow); } - // --- TableStats --- + // --- TableStatsDto --- @Override @Transactional - public TableStats upsertTableStats(TableStats stats) { + public TableStatsDto upsertTableStats(TableStatsDto stats) { Instant now = Instant.now(); String tableUuid = stats.getTableUuid(); @@ -120,51 +120,51 @@ public TableStats upsertTableStats(TableStats stats) { .recordedAt(now) .build()); - return TableStats.fromRow(saved); + return TableStatsDto.fromRow(saved); } @Override - public Optional getTableStats(String tableUuid) { - return statsRepository.findById(tableUuid).map(TableStats::fromRow); + public Optional getTableStats(String tableUuid) { + return statsRepository.findById(tableUuid).map(TableStatsDto::fromRow); } @Override - public List listTableStats( + public List listTableStats( Optional databaseName, Optional tableName, Optional tableUuid) { return statsRepository .find(databaseName.orElse(null), tableName.orElse(null), tableUuid.orElse(null)).stream() - .map(TableStats::fromRow) + .map(TableStatsDto::fromRow) .collect(Collectors.toList()); } @Override - public List getStatsHistory( + public List getStatsHistory( String tableUuid, Optional since, int limit) { return statsHistoryRepository.find(tableUuid, since.orElse(null), PageRequest.of(0, limit)) .stream() - .map(TableStatsHistory::fromRow) + .map(TableStatsHistoryDto::fromRow) .collect(Collectors.toList()); } - // --- TableOperationsHistory --- + // --- TableOperationsHistoryDto --- @Override @Transactional - public TableOperationsHistory appendHistory(TableOperationsHistory history) { - TableOperationsHistory toWrite = + public TableOperationsHistoryDto appendHistory(TableOperationsHistoryDto history) { + TableOperationsHistoryDto toWrite = history .toBuilder() .completedAt( history.getCompletedAt() != null ? history.getCompletedAt() : Instant.now()) .build(); - return TableOperationsHistory.fromRow(historyRepository.save(toWrite.toRow())); + return TableOperationsHistoryDto.fromRow(historyRepository.save(toWrite.toRow())); } @Override - public List getHistory(String tableUuid, int limit) { + public List getHistory(String tableUuid, int limit) { return historyRepository .findByTableUuidOrderByCompletedAtDesc(tableUuid, PageRequest.of(0, limit)).stream() - .map(TableOperationsHistory::fromRow) + .map(TableOperationsHistoryDto::fromRow) .collect(Collectors.toList()); } } diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java index b329459ad..8457949cd 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java @@ -4,11 +4,11 @@ import com.linkedin.openhouse.optimizer.db.TableOperationsRow; import com.linkedin.openhouse.optimizer.db.TableStatsHistoryRow; -import com.linkedin.openhouse.optimizer.model.HistoryStatus; -import com.linkedin.openhouse.optimizer.model.OperationStatus; -import com.linkedin.openhouse.optimizer.model.OperationType; -import com.linkedin.openhouse.optimizer.model.TableOperationsHistory; -import com.linkedin.openhouse.optimizer.model.TableStats; +import com.linkedin.openhouse.optimizer.model.HistoryStatusDto; +import com.linkedin.openhouse.optimizer.model.OperationStatusDto; +import com.linkedin.openhouse.optimizer.model.OperationTypeDto; +import com.linkedin.openhouse.optimizer.model.TableOperationsHistoryDto; +import com.linkedin.openhouse.optimizer.model.TableStatsDto; import com.linkedin.openhouse.optimizer.repository.TableOperationsRepository; import com.linkedin.openhouse.optimizer.repository.TableStatsHistoryRepository; import com.linkedin.openhouse.optimizer.repository.TableStatsRepository; @@ -53,21 +53,21 @@ void completeOperation_writesHistoryFromOperationRow() { .jobId("spark-job-123") .build()); - Optional result = - service.completeOperation(operationId, HistoryStatus.SUCCESS); + Optional result = + service.completeOperation(operationId, HistoryStatusDto.SUCCESS); assertThat(result).isPresent(); - assertThat(result.get().getStatus()).isEqualTo(HistoryStatus.SUCCESS); + assertThat(result.get().getStatus()).isEqualTo(HistoryStatusDto.SUCCESS); assertThat(result.get().getTableUuid()).isEqualTo(tableUuid); - assertThat(result.get().getOperationType()).isEqualTo(OperationType.ORPHAN_FILES_DELETION); + assertThat(result.get().getOperationType()).isEqualTo(OperationTypeDto.ORPHAN_FILES_DELETION); assertThat(result.get().getDatabaseName()).isEqualTo("db1"); assertThat(result.get().getCompletedAt()).isNotNull(); } @Test void completeOperation_notFound_returnsEmpty() { - Optional result = - service.completeOperation(UUID.randomUUID().toString(), HistoryStatus.FAILED); + Optional result = + service.completeOperation(UUID.randomUUID().toString(), HistoryStatusDto.FAILED); assertThat(result).isEmpty(); } @@ -77,16 +77,16 @@ void completeOperation_notFound_returnsEmpty() { @Test void upsertTableStats_createsNewRow() { String tableUuid = UUID.randomUUID().toString(); - TableStats input = - TableStats.builder() + TableStatsDto input = + TableStatsDto.builder() .tableUuid(tableUuid) .databaseName("db1") .tableName("tbl1") .tableProperties(Map.of("maintenance.optimizer.ofd.enabled", "true")) - .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(1024L).build()) + .snapshot(TableStatsDto.SnapshotMetrics.builder().tableSizeBytes(1024L).build()) .build(); - TableStats result = service.upsertTableStats(input); + TableStatsDto result = service.upsertTableStats(input); assertThat(result.getTableUuid()).isEqualTo(tableUuid); assertThat(result.getDatabaseName()).isEqualTo("db1"); @@ -100,25 +100,27 @@ void upsertTableStats_createsNewRow() { @Test void upsertTableStats_updatesExistingRow_andAppendsHistory() { String tableUuid = UUID.randomUUID().toString(); - TableStats first = - TableStats.builder() + TableStatsDto first = + TableStatsDto.builder() .tableUuid(tableUuid) .databaseName("db1") .tableName("tbl1") - .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(100L).build()) - .delta(TableStats.CommitDelta.builder().numFilesAdded(5L).numFilesDeleted(1L).build()) + .snapshot(TableStatsDto.SnapshotMetrics.builder().tableSizeBytes(100L).build()) + .delta( + TableStatsDto.CommitDelta.builder().numFilesAdded(5L).numFilesDeleted(1L).build()) .build(); - TableStats second = - TableStats.builder() + TableStatsDto second = + TableStatsDto.builder() .tableUuid(tableUuid) .databaseName("db1") .tableName("tbl1") - .snapshot(TableStats.SnapshotMetrics.builder().tableSizeBytes(200L).build()) - .delta(TableStats.CommitDelta.builder().numFilesAdded(3L).numFilesDeleted(0L).build()) + .snapshot(TableStatsDto.SnapshotMetrics.builder().tableSizeBytes(200L).build()) + .delta( + TableStatsDto.CommitDelta.builder().numFilesAdded(3L).numFilesDeleted(0L).build()) .build(); service.upsertTableStats(first); - TableStats result = service.upsertTableStats(second); + TableStatsDto result = service.upsertTableStats(second); assertThat(result.getSnapshot().getTableSizeBytes()).isEqualTo(200L); assertThat(statsRepository.findAll()).hasSize(1); @@ -159,8 +161,8 @@ void listTableOperations_filtersByOperationTypeAndStatus() { assertThat( service.listTableOperations( - Optional.of(OperationType.ORPHAN_FILES_DELETION), - Optional.of(OperationStatus.PENDING), + Optional.of(OperationTypeDto.ORPHAN_FILES_DELETION), + Optional.of(OperationStatusDto.PENDING), Optional.empty(), Optional.empty(), Optional.empty())) From c305aa9c1b803c12b41daa1972c5f3675ec20c13 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 20 May 2026 14:58:10 -0700 Subject: [PATCH 088/104] refactor(analyzer): update model refs after Dto suffix swap Adds Dto suffix to model type refs in the analyzer app: TableOperationDto, TableStatsDto, OperationTypeDto, etc. Mechanical follow-up to the opt-0 rename (b31decf8). Co-Authored-By: Claude Opus 4.7 --- .../openhouse/analyzer/AnalyzerRunner.java | 42 +++++++-------- ...denceBasedOrphanFilesDeletionAnalyzer.java | 22 ++++---- .../openhouse/analyzer/CadencePolicy.java | 16 +++--- .../openhouse/analyzer/OperationAnalyzer.java | 18 +++---- .../analyzer/AnalyzerRunnerTest.java | 20 +++---- ...eBasedOrphanFilesDeletionAnalyzerTest.java | 54 ++++++++++--------- 6 files changed, 87 insertions(+), 85 deletions(-) diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java index 2deb053c6..e9a7f69b8 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java @@ -1,9 +1,9 @@ package com.linkedin.openhouse.analyzer; -import com.linkedin.openhouse.optimizer.model.OperationType; -import com.linkedin.openhouse.optimizer.model.Table; -import com.linkedin.openhouse.optimizer.model.TableOperation; -import com.linkedin.openhouse.optimizer.model.TableOperationsHistory; +import com.linkedin.openhouse.optimizer.model.OperationTypeDto; +import com.linkedin.openhouse.optimizer.model.TableDto; +import com.linkedin.openhouse.optimizer.model.TableOperationDto; +import com.linkedin.openhouse.optimizer.model.TableOperationsHistoryDto; import com.linkedin.openhouse.optimizer.repository.TableOperationsHistoryRepository; import com.linkedin.openhouse.optimizer.repository.TableOperationsRepository; import com.linkedin.openhouse.optimizer.repository.TableStatsRepository; @@ -42,10 +42,10 @@ public class AnalyzerRunner { /** * Run the analysis loop for {@code operationType} across all databases, with no filters. - * Equivalent to {@link #analyze(OperationType, Optional, Optional, Optional)} with all-empty + * Equivalent to {@link #analyze(OperationTypeDto, Optional, Optional, Optional)} with all-empty * filters. */ - public void analyze(OperationType operationType) { + public void analyze(OperationTypeDto operationType) { analyze(operationType, Optional.empty(), Optional.empty(), Optional.empty()); } @@ -55,7 +55,7 @@ public void analyze(OperationType operationType) { * tables-per-db, not tables-total. */ public void analyze( - OperationType operationType, + OperationTypeDto operationType, Optional databaseName, Optional tableName, Optional tableUuid) { @@ -87,31 +87,31 @@ private void analyzeDatabase( // unwieldy as the filter count grows. Migrate to Criteria API or jOOQ once the scaffolding // stabilizes — applies to operationsRepo.find, historyRepo.findLatestPerTable, and // statsRepo.find below. - Map currentOps = + Map currentOps = operationsRepo .find( dbOperationType, null, tableUuid.orElse(null), databaseName, tableName.orElse(null)) .stream() .filter(e -> e.getTableUuid() != null) - .map(TableOperation::fromRow) + .map(TableOperationDto::fromRow) .collect( Collectors.toMap( - TableOperation::getTableUuid, op -> op, TableOperation::mostRecent)); + TableOperationDto::getTableUuid, op -> op, TableOperationDto::mostRecent)); - Map latestHistory = + Map latestHistory = historyRepo.findLatestPerTable(dbOperationType).stream() .filter(r -> r.getTableUuid() != null) - .map(TableOperationsHistory::fromRow) + .map(TableOperationsHistoryDto::fromRow) .collect( Collectors.toMap( - TableOperationsHistory::getTableUuid, + TableOperationsHistoryDto::getTableUuid, h -> h, AnalyzerRunner::moreRecentHistory)); - List

tables = + List tables = statsRepo.find(databaseName, tableName.orElse(null), tableUuid.orElse(null)).stream() .filter(row -> row.getTableUuid() != null) - .map(Table::fromRow) + .map(TableDto::fromRow) .collect(Collectors.toList()); /* @@ -130,12 +130,12 @@ private void analyzeDatabase( if (!analyzer.isEnabled(table)) { return; } - Optional currentOp = + Optional currentOp = Optional.ofNullable(currentOps.get(table.getTableUuid())); - Optional entry = + Optional entry = Optional.ofNullable(latestHistory.get(table.getTableUuid())); if (analyzer.shouldSchedule(table, currentOp, entry)) { - TableOperation op = TableOperation.pending(table, analyzer.getOperationType()); + TableOperationDto op = TableOperationDto.pending(table, analyzer.getOperationType()); operationsRepo.save(op.toRow()); log.info( "Created PENDING {} operation for table {}.{}", @@ -146,9 +146,9 @@ private void analyzeDatabase( }); } - private static TableOperationsHistory moreRecentHistory( - TableOperationsHistory a, TableOperationsHistory b) { - Comparator byCompletedAt = + private static TableOperationsHistoryDto moreRecentHistory( + TableOperationsHistoryDto a, TableOperationsHistoryDto b) { + Comparator byCompletedAt = Comparator.comparing(r -> r.getCompletedAt() != null ? r.getCompletedAt() : Instant.EPOCH); return byCompletedAt.compare(a, b) >= 0 ? a : b; } diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzer.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzer.java index 394b77eca..302669dbe 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzer.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzer.java @@ -1,16 +1,16 @@ package com.linkedin.openhouse.analyzer; -import com.linkedin.openhouse.optimizer.model.OperationType; -import com.linkedin.openhouse.optimizer.model.Table; -import com.linkedin.openhouse.optimizer.model.TableOperation; -import com.linkedin.openhouse.optimizer.model.TableOperationsHistory; +import com.linkedin.openhouse.optimizer.model.OperationTypeDto; +import com.linkedin.openhouse.optimizer.model.TableDto; +import com.linkedin.openhouse.optimizer.model.TableOperationDto; +import com.linkedin.openhouse.optimizer.model.TableOperationsHistoryDto; import java.time.Duration; import java.util.Optional; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Value; import org.springframework.stereotype.Component; -/** Analyzer for the {@link OperationType#ORPHAN_FILES_DELETION} operation type. */ +/** Analyzer for the {@link OperationTypeDto#ORPHAN_FILES_DELETION} operation type. */ @Component public class CadenceBasedOrphanFilesDeletionAnalyzer implements OperationAnalyzer { @@ -32,20 +32,20 @@ public CadenceBasedOrphanFilesDeletionAnalyzer( } @Override - public OperationType getOperationType() { - return OperationType.ORPHAN_FILES_DELETION; + public OperationTypeDto getOperationType() { + return OperationTypeDto.ORPHAN_FILES_DELETION; } @Override - public boolean isEnabled(Table table) { + public boolean isEnabled(TableDto table) { return "true".equals(table.getTableProperties().get(OFD_ENABLED_PROPERTY)); } @Override public boolean shouldSchedule( - Table table, - Optional currentOp, - Optional latestHistory) { + TableDto table, + Optional currentOp, + Optional latestHistory) { return cadencePolicy.shouldSchedule(currentOp, latestHistory); } } diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadencePolicy.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadencePolicy.java index 6ce2db80c..b4461e6c6 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadencePolicy.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/CadencePolicy.java @@ -1,9 +1,9 @@ package com.linkedin.openhouse.analyzer; -import com.linkedin.openhouse.optimizer.model.HistoryStatus; -import com.linkedin.openhouse.optimizer.model.OperationStatus; -import com.linkedin.openhouse.optimizer.model.TableOperation; -import com.linkedin.openhouse.optimizer.model.TableOperationsHistory; +import com.linkedin.openhouse.optimizer.model.HistoryStatusDto; +import com.linkedin.openhouse.optimizer.model.OperationStatusDto; +import com.linkedin.openhouse.optimizer.model.TableOperationDto; +import com.linkedin.openhouse.optimizer.model.TableOperationsHistoryDto; import java.time.Duration; import java.time.Instant; import java.util.Optional; @@ -42,16 +42,16 @@ public class CadencePolicy { * @param latestHistory the most recent history entry for this (table, type), or empty */ public boolean shouldSchedule( - Optional currentOp, Optional latestHistory) { - if (currentOp.isPresent() && currentOp.get().getStatus() != OperationStatus.CANCELED) { + Optional currentOp, Optional latestHistory) { + if (currentOp.isPresent() && currentOp.get().getStatus() != OperationStatusDto.CANCELED) { return false; } return latestHistory.map(this::readyAfterHistoryEntry).orElse(true); } - private boolean readyAfterHistoryEntry(TableOperationsHistory entry) { + private boolean readyAfterHistoryEntry(TableOperationsHistoryDto entry) { Duration interval = - entry.getStatus() == HistoryStatus.FAILED ? failureRetryInterval : successRetryInterval; + entry.getStatus() == HistoryStatusDto.FAILED ? failureRetryInterval : successRetryInterval; return Duration.between(entry.getCompletedAt(), Instant.now()).compareTo(interval) > 0; } } diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OperationAnalyzer.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OperationAnalyzer.java index ba64f558a..ab69386e4 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OperationAnalyzer.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/OperationAnalyzer.java @@ -1,9 +1,9 @@ package com.linkedin.openhouse.analyzer; -import com.linkedin.openhouse.optimizer.model.OperationType; -import com.linkedin.openhouse.optimizer.model.Table; -import com.linkedin.openhouse.optimizer.model.TableOperation; -import com.linkedin.openhouse.optimizer.model.TableOperationsHistory; +import com.linkedin.openhouse.optimizer.model.OperationTypeDto; +import com.linkedin.openhouse.optimizer.model.TableDto; +import com.linkedin.openhouse.optimizer.model.TableOperationDto; +import com.linkedin.openhouse.optimizer.model.TableOperationsHistoryDto; import java.util.Optional; /** @@ -19,13 +19,13 @@ public interface OperationAnalyzer { /** The operation type this analyzer handles. */ - OperationType getOperationType(); + OperationTypeDto getOperationType(); /** * Returns {@code true} if this operation is opted-in for the given table. Tables that return * {@code false} are skipped entirely — no upsert is issued. */ - boolean isEnabled(Table table); + boolean isEnabled(TableDto table); /** * Returns {@code true} if a new or refreshed operation record should be upserted. @@ -35,7 +35,7 @@ public interface OperationAnalyzer { * @param latestHistory the most recent history entry for this (table, type), or empty */ boolean shouldSchedule( - Table table, - Optional currentOp, - Optional latestHistory); + TableDto table, + Optional currentOp, + Optional latestHistory); } diff --git a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java index fe9561eb9..450ae3e13 100644 --- a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java +++ b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java @@ -8,9 +8,9 @@ import com.linkedin.openhouse.optimizer.db.TableOperationsRow; import com.linkedin.openhouse.optimizer.db.TableStatsRow; -import com.linkedin.openhouse.optimizer.model.OperationType; -import com.linkedin.openhouse.optimizer.model.Table; -import com.linkedin.openhouse.optimizer.model.TableOperation; +import com.linkedin.openhouse.optimizer.model.OperationTypeDto; +import com.linkedin.openhouse.optimizer.model.TableDto; +import com.linkedin.openhouse.optimizer.model.TableOperationDto; import com.linkedin.openhouse.optimizer.repository.TableOperationsHistoryRepository; import com.linkedin.openhouse.optimizer.repository.TableOperationsRepository; import com.linkedin.openhouse.optimizer.repository.TableStatsRepository; @@ -28,7 +28,7 @@ @ExtendWith(MockitoExtension.class) class AnalyzerRunnerTest { - private static final OperationType OFD_TYPE = OperationType.ORPHAN_FILES_DELETION; + private static final OperationTypeDto OFD_TYPE = OperationTypeDto.ORPHAN_FILES_DELETION; private static final com.linkedin.openhouse.optimizer.db.OperationType OFD_DB = com.linkedin.openhouse.optimizer.db.OperationType.ORPHAN_FILES_DELETION; private static final String DB = "db1"; @@ -52,7 +52,7 @@ void analyze_insertsNewRow_forEligibleTableWithNoExistingOp() { TableStatsRow statsEntity = TableStatsRow.builder().tableUuid("uuid-1").databaseName(DB).tableName("tbl1").build(); - Table expectedTable = Table.fromRow(statsEntity); + TableDto expectedTable = TableDto.fromRow(statsEntity); when(statsRepo.find(DB, null, null)).thenReturn(List.of(statsEntity)); when(operationsRepo.find(OFD_DB, null, null, DB, null)).thenReturn(Collections.emptyList()); @@ -80,7 +80,7 @@ void analyze_noOp_whenCadencePolicyReturnsFalseForPending() { TableStatsRow statsEntity = TableStatsRow.builder().tableUuid("uuid-1").databaseName(DB).tableName("tbl1").build(); - Table expectedTable = Table.fromRow(statsEntity); + TableDto expectedTable = TableDto.fromRow(statsEntity); TableOperationsRow existingEntity = TableOperationsRow.builder() @@ -96,7 +96,7 @@ void analyze_noOp_whenCadencePolicyReturnsFalseForPending() { when(historyRepo.findLatestPerTable(OFD_DB)).thenReturn(Collections.emptyList()); when(analyzer.isEnabled(expectedTable)).thenReturn(true); - TableOperation existingOp = TableOperation.fromRow(existingEntity); + TableOperationDto existingOp = TableOperationDto.fromRow(existingEntity); when(analyzer.shouldSchedule(expectedTable, Optional.of(existingOp), Optional.empty())) .thenReturn(false); @@ -110,7 +110,7 @@ void analyze_skipsTable_whenNotEnabled() { TableStatsRow statsEntity = TableStatsRow.builder().tableUuid("uuid-1").databaseName(DB).build(); - Table expectedTable = Table.fromRow(statsEntity); + TableDto expectedTable = TableDto.fromRow(statsEntity); when(statsRepo.find(DB, null, null)).thenReturn(List.of(statsEntity)); when(operationsRepo.find(OFD_DB, null, null, DB, null)).thenReturn(Collections.emptyList()); @@ -127,7 +127,7 @@ void analyze_skipsTable_whenShouldScheduleReturnsFalse() { TableStatsRow statsEntity = TableStatsRow.builder().tableUuid("uuid-1").databaseName(DB).build(); - Table expectedTable = Table.fromRow(statsEntity); + TableDto expectedTable = TableDto.fromRow(statsEntity); TableOperationsRow scheduled = TableOperationsRow.builder() @@ -143,7 +143,7 @@ void analyze_skipsTable_whenShouldScheduleReturnsFalse() { when(historyRepo.findLatestPerTable(OFD_DB)).thenReturn(Collections.emptyList()); when(analyzer.isEnabled(expectedTable)).thenReturn(true); - TableOperation scheduledOp = TableOperation.fromRow(scheduled); + TableOperationDto scheduledOp = TableOperationDto.fromRow(scheduled); when(analyzer.shouldSchedule(expectedTable, Optional.of(scheduledOp), Optional.empty())) .thenReturn(false); diff --git a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzerTest.java b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzerTest.java index 633c9dceb..e50bb694d 100644 --- a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzerTest.java +++ b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/CadenceBasedOrphanFilesDeletionAnalyzerTest.java @@ -2,11 +2,11 @@ import static org.assertj.core.api.Assertions.assertThat; -import com.linkedin.openhouse.optimizer.model.HistoryStatus; -import com.linkedin.openhouse.optimizer.model.OperationStatus; -import com.linkedin.openhouse.optimizer.model.Table; -import com.linkedin.openhouse.optimizer.model.TableOperation; -import com.linkedin.openhouse.optimizer.model.TableOperationsHistory; +import com.linkedin.openhouse.optimizer.model.HistoryStatusDto; +import com.linkedin.openhouse.optimizer.model.OperationStatusDto; +import com.linkedin.openhouse.optimizer.model.TableDto; +import com.linkedin.openhouse.optimizer.model.TableOperationDto; +import com.linkedin.openhouse.optimizer.model.TableOperationsHistoryDto; import java.time.Duration; import java.time.Instant; import java.util.Collections; @@ -48,7 +48,7 @@ void isEnabled_returnsFalse_whenPropertyFalse() { @Test void isEnabled_returnsFalse_whenTablePropertiesEmpty() { - Table table = Table.builder().tableUuid("uuid").build(); + TableDto table = TableDto.builder().tableUuid("uuid").build(); assertThat(analyzer.isEnabled(table)).isFalse(); } @@ -68,7 +68,7 @@ void shouldSchedule_noOp_successHistoryAfterCooldown_returnsTrue() { analyzer.shouldSchedule( tableWithProperty("true"), Optional.empty(), - Optional.of(historyWithStatus(HistoryStatus.SUCCESS, longAgo)))) + Optional.of(historyWithStatus(HistoryStatusDto.SUCCESS, longAgo)))) .isTrue(); } @@ -79,7 +79,7 @@ void shouldSchedule_noOp_successHistoryBeforeCooldown_returnsFalse() { analyzer.shouldSchedule( tableWithProperty("true"), Optional.empty(), - Optional.of(historyWithStatus(HistoryStatus.SUCCESS, recent)))) + Optional.of(historyWithStatus(HistoryStatusDto.SUCCESS, recent)))) .isFalse(); } @@ -90,7 +90,7 @@ void shouldSchedule_noOp_failedHistoryAfterRetry_returnsTrue() { analyzer.shouldSchedule( tableWithProperty("true"), Optional.empty(), - Optional.of(historyWithStatus(HistoryStatus.FAILED, longAgo)))) + Optional.of(historyWithStatus(HistoryStatusDto.FAILED, longAgo)))) .isTrue(); } @@ -101,7 +101,7 @@ void shouldSchedule_noOp_failedHistoryBeforeRetry_returnsFalse() { analyzer.shouldSchedule( tableWithProperty("true"), Optional.empty(), - Optional.of(historyWithStatus(HistoryStatus.FAILED, recent)))) + Optional.of(historyWithStatus(HistoryStatusDto.FAILED, recent)))) .isFalse(); } @@ -112,7 +112,7 @@ void shouldSchedule_pending_returnsFalse() { assertThat( analyzer.shouldSchedule( tableWithProperty("true"), - Optional.of(opWithStatus(OperationStatus.PENDING)), + Optional.of(opWithStatus(OperationStatusDto.PENDING)), Optional.empty())) .isFalse(); } @@ -122,7 +122,7 @@ void shouldSchedule_scheduling_returnsFalse() { assertThat( analyzer.shouldSchedule( tableWithProperty("true"), - Optional.of(opWithStatus(OperationStatus.SCHEDULING)), + Optional.of(opWithStatus(OperationStatusDto.SCHEDULING)), Optional.empty())) .isFalse(); } @@ -133,8 +133,8 @@ void shouldSchedule_scheduled_returnsFalse_regardlessOfHistory() { assertThat( analyzer.shouldSchedule( tableWithProperty("true"), - Optional.of(opWithStatus(OperationStatus.SCHEDULED)), - Optional.of(historyWithStatus(HistoryStatus.SUCCESS, historyAt)))) + Optional.of(opWithStatus(OperationStatusDto.SCHEDULED)), + Optional.of(historyWithStatus(HistoryStatusDto.SUCCESS, historyAt)))) .isFalse(); } @@ -146,8 +146,8 @@ void shouldSchedule_canceled_successHistoryAfterCooldown_returnsTrue() { assertThat( analyzer.shouldSchedule( tableWithProperty("true"), - Optional.of(opWithStatus(OperationStatus.CANCELED)), - Optional.of(historyWithStatus(HistoryStatus.SUCCESS, longAgo)))) + Optional.of(opWithStatus(OperationStatusDto.CANCELED)), + Optional.of(historyWithStatus(HistoryStatusDto.SUCCESS, longAgo)))) .isTrue(); } @@ -157,8 +157,8 @@ void shouldSchedule_canceled_successHistoryBeforeCooldown_returnsFalse() { assertThat( analyzer.shouldSchedule( tableWithProperty("true"), - Optional.of(opWithStatus(OperationStatus.CANCELED)), - Optional.of(historyWithStatus(HistoryStatus.SUCCESS, recent)))) + Optional.of(opWithStatus(OperationStatusDto.CANCELED)), + Optional.of(historyWithStatus(HistoryStatusDto.SUCCESS, recent)))) .isFalse(); } @@ -167,19 +167,19 @@ void shouldSchedule_canceled_noHistory_returnsTrue() { assertThat( analyzer.shouldSchedule( tableWithProperty("true"), - Optional.of(opWithStatus(OperationStatus.CANCELED)), + Optional.of(opWithStatus(OperationStatusDto.CANCELED)), Optional.empty())) .isTrue(); } // --- helpers --- - private Table tableWithProperty(String value) { + private TableDto tableWithProperty(String value) { Map props = value == null ? Collections.emptyMap() : Map.of(CadenceBasedOrphanFilesDeletionAnalyzer.OFD_ENABLED_PROPERTY, value); - return Table.builder() + return TableDto.builder() .tableUuid("test-uuid") .databaseName("db1") .tableId("tbl1") @@ -187,15 +187,17 @@ private Table tableWithProperty(String value) { .build(); } - private TableOperation opWithStatus(OperationStatus status) { - return TableOperation.builder().status(status).build(); + private TableOperationDto opWithStatus(OperationStatusDto status) { + return TableOperationDto.builder().status(status).build(); } - private TableOperationsHistory historyWithStatus(HistoryStatus status, Instant completedAt) { - return TableOperationsHistory.builder() + private TableOperationsHistoryDto historyWithStatus( + HistoryStatusDto status, Instant completedAt) { + return TableOperationsHistoryDto.builder() .id("hist-id") .tableUuid("test-uuid") - .operationType(com.linkedin.openhouse.optimizer.model.OperationType.ORPHAN_FILES_DELETION) + .operationType( + com.linkedin.openhouse.optimizer.model.OperationTypeDto.ORPHAN_FILES_DELETION) .completedAt(completedAt) .status(status) .build(); From 4e86569ce2e4327665b0d8885276c6b2e048612a Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 20 May 2026 15:16:10 -0700 Subject: [PATCH 089/104] feat(optimizer): propagate jobId through model + api conversions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit model.TableOperationDto grows a jobId field; api.TableOperations conversions copy it across the api ↔ model boundary. The api DTO already had the field; the model side was missing it. Relocated from opt-5 to its proper owner per the model-layer rule. Model ↔ db plumbing for the same field lands on opt-1 in a follow-up. Co-Authored-By: Claude Opus 4.7 --- .../linkedin/openhouse/optimizer/api/spec/TableOperations.java | 2 ++ .../linkedin/openhouse/optimizer/model/TableOperationDto.java | 3 +++ 2 files changed, 5 insertions(+) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperations.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperations.java index 60f2c3dd8..0bca95734 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperations.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperations.java @@ -52,6 +52,7 @@ public TableOperationDto toModel() { .status(status == null ? null : status.toModel()) .createdAt(createdAt) .scheduledAt(scheduledAt) + .jobId(jobId) .build(); } @@ -69,6 +70,7 @@ public static TableOperations fromModel(TableOperationDto op) { .status(OperationStatus.fromModel(op.getStatus())) .createdAt(op.getCreatedAt()) .scheduledAt(op.getScheduledAt()) + .jobId(op.getJobId()) .build(); } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationDto.java index 8809a1b62..4cac14187 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationDto.java @@ -46,6 +46,9 @@ public class TableOperationDto { /** When the scheduler last submitted a job for this operation. */ private Instant scheduledAt; + /** Job ID returned by the Jobs Service after the scheduler submitted; null until SCHEDULED. */ + private String jobId; + /** Create a new PENDING operation for the given table and operation type. */ public static TableOperationDto pending(TableDto table, OperationTypeDto operationType) { return TableOperationDto.builder() From efcceeaa9d4656fe6ec2028c72a60fe7d92f59a3 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 20 May 2026 15:17:06 -0700 Subject: [PATCH 090/104] =?UTF-8?q?feat(optimizer):=20propagate=20jobId=20?= =?UTF-8?q?through=20model=20=E2=86=94=20db=20conversions?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Companion to the opt-0 jobId field addition: now that model.TableOperationDto carries jobId, wire it through toRow/fromRow so the db row's job_id column round-trips through the model layer. Relocated from opt-5. Co-Authored-By: Claude Opus 4.7 --- .../linkedin/openhouse/optimizer/model/TableOperationDto.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationDto.java index c39a71ecd..18d57ce66 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationDto.java @@ -80,6 +80,7 @@ public TableOperationsRow toRow() { .status(status == null ? null : status.toDb()) .createdAt(createdAt) .scheduledAt(scheduledAt) + .jobId(jobId) .build(); } @@ -97,6 +98,7 @@ public static TableOperationDto fromRow(TableOperationsRow row) { .status(OperationStatusDto.fromDb(row.getStatus())) .createdAt(row.getCreatedAt()) .scheduledAt(row.getScheduledAt()) + .jobId(row.getJobId()) .build(); } } From c00f20188d0ccf76d17f8698fc6cf7aaf6ae3bda Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 20 May 2026 15:17:42 -0700 Subject: [PATCH 091/104] =?UTF-8?q?chore(optimizer):=20rename=20OPTIMIZER?= =?UTF-8?q?=5FDB=5FUSERNAME=20=E2=86=92=20OPTIMIZER=5FDB=5FUSER?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The analyzer and scheduler application.properties (introduced on opt-3 / opt-4) both already use OPTIMIZER_DB_USER. The service was the odd one out. Normalize so all three JVMs read the same env var name. Relocated from opt-5. Co-Authored-By: Claude Opus 4.7 --- services/optimizer/src/main/resources/application.properties | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/optimizer/src/main/resources/application.properties b/services/optimizer/src/main/resources/application.properties index c6c3f8437..e78745d00 100644 --- a/services/optimizer/src/main/resources/application.properties +++ b/services/optimizer/src/main/resources/application.properties @@ -12,7 +12,7 @@ spring.jpa.properties.hibernate.physical_naming_strategy=org.hibernate.boot.mode spring.datasource.driver-class-name=com.mysql.cj.jdbc.Driver spring.datasource.url=${OPTIMIZER_DB_URL:jdbc:mysql://localhost:3306/oh_db} -spring.datasource.username=${OPTIMIZER_DB_USERNAME:oh_user} +spring.datasource.username=${OPTIMIZER_DB_USER:oh_user} spring.datasource.password=${OPTIMIZER_DB_PASSWORD:oh_password} spring.datasource.hikari.maximum-pool-size=20 From 1fe71f043260d2c5b57c6556cb69ea051f5fafbe Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 20 May 2026 15:25:48 -0700 Subject: [PATCH 092/104] =?UTF-8?q?refactor(optimizer):=20rename=20Complet?= =?UTF-8?q?eOperationRequest=20=E2=86=92=20UpdateOperationRequest?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Symbol rename only. The HistoryStatus enum (SUCCESS/FAILED) and the once-terminal semantics are unchanged; the endpoint's behavior is the same. Future broadening (CANCELED/QUEUED, idempotency, mid-lifecycle status changes) is a separate concern. Method names + URL path will follow on opt-2; Spark-app caller + docs follow on opt-5. Co-Authored-By: Claude Opus 4.7 --- ...nRequest.java => UpdateOperationRequest.java} | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) rename services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/{CompleteOperationRequest.java => UpdateOperationRequest.java} (70%) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/CompleteOperationRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpdateOperationRequest.java similarity index 70% rename from services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/CompleteOperationRequest.java rename to services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpdateOperationRequest.java index 15112882d..a216e9db3 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/CompleteOperationRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpdateOperationRequest.java @@ -6,26 +6,26 @@ import lombok.NoArgsConstructor; /** - * Request body for {@code POST /v1/table-operations/complete}. + * Request body for {@code POST /v1/table-operations/update}. * - *

Reports the outcome of a single completed operation. The service looks up the operation row by + *

Reports the outcome of a single operation update. The service looks up the operation row by * {@link #operationId} and writes a history entry for it. * *

A single Spark job typically processes N tables and yields N independent (status) outcomes — - * one per operation. Callers issue one complete request per operation; the service does not - * bulk-complete by job. + * one per operation. Callers issue one update request per operation; the service does not + * bulk-update by job. * *

The remaining fields ({@link #tableUuid}, {@link #databaseName}, {@link #tableName}, {@link * #operationType}) are debug-only echo information. The server does not key off them; they are - * preserved on log lines and traces so an operator looking at a failing complete call can see which - * (db, table, operation) the caller believed it was completing without joining back to the - * operation row. + * preserved on log lines and traces so an operator looking at a failing update call can see which + * (db, table, operation) the caller believed it was updating without joining back to the operation + * row. */ @Data @Builder @NoArgsConstructor @AllArgsConstructor -public class CompleteOperationRequest { +public class UpdateOperationRequest { /** Operation row's UUID — the primary lookup key. */ private String operationId; From 947bedfb69869fc8843c766e9dba24f04db58042 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 20 May 2026 15:27:48 -0700 Subject: [PATCH 093/104] =?UTF-8?q?refactor(optimizer):=20rename=20complet?= =?UTF-8?q?eOperation=20=E2=86=92=20updateOperation?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Symbol rename across the controller, service interface, service impl, and tests. URL path POST /v1/optimizer/operations/complete → /update. Symbol-rename only; behavior, semantics, and HistoryStatus values (SUCCESS/FAILED) are unchanged. Spark-app caller + docs follow on opt-5. Co-Authored-By: Claude Opus 4.7 --- .../api/controller/TableOperationsController.java | 14 +++++++------- .../optimizer/service/OptimizerDataService.java | 5 ++--- .../service/OptimizerDataServiceImpl.java | 2 +- .../service/OptimizerDataServiceImplTest.java | 6 +++--- 4 files changed, 13 insertions(+), 14 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java index 6f9d6a177..c28002bf7 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java @@ -1,10 +1,10 @@ package com.linkedin.openhouse.optimizer.api.controller; -import com.linkedin.openhouse.optimizer.api.spec.CompleteOperationRequest; import com.linkedin.openhouse.optimizer.api.spec.OperationStatus; import com.linkedin.openhouse.optimizer.api.spec.OperationType; import com.linkedin.openhouse.optimizer.api.spec.TableOperations; import com.linkedin.openhouse.optimizer.api.spec.TableOperationsHistory; +import com.linkedin.openhouse.optimizer.api.spec.UpdateOperationRequest; import com.linkedin.openhouse.optimizer.service.OptimizerDataService; import java.util.List; import java.util.Optional; @@ -29,16 +29,16 @@ public class TableOperationsController { private final OptimizerDataService service; /** - * Report that an operation has completed. The body carries the {@code operationId} the caller is - * completing along with its terminal status. The backend looks up the operation row, writes a + * Report an update to an operation. The body carries the {@code operationId} the caller is + * updating along with its terminal status. The backend looks up the operation row, writes a * history entry with the operation's table metadata, and returns 201 Created with the history * row, or 404 if the operation does not exist. */ - @PostMapping("/complete") - public ResponseEntity completeOperation( - @RequestBody CompleteOperationRequest request) { + @PostMapping("/update") + public ResponseEntity updateOperation( + @RequestBody UpdateOperationRequest request) { return service - .completeOperation( + .updateOperation( request.getOperationId(), request.getStatus() == null ? null : request.getStatus().toModel()) .map( diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java index c32a67bae..0529d3608 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java @@ -34,13 +34,12 @@ List listTableOperations( Optional tableUuid); /** - * Complete an operation by writing a history entry. Looks up the operation row by {@code + * Update an operation by writing a history entry. Looks up the operation row by {@code * operationId}, copies its table metadata into a new history row with the supplied terminal * {@code status}, and saves it. Returns the history record, or empty if the operation does not * exist. */ - Optional completeOperation( - String operationId, HistoryStatusDto status); + Optional updateOperation(String operationId, HistoryStatusDto status); /** * Return the operation row for {@code id} regardless of status, or empty if it does not exist. diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java index a9ead77ce..1ca9c7777 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java @@ -62,7 +62,7 @@ public List listTableOperations( @Override @Transactional - public Optional completeOperation( + public Optional updateOperation( String operationId, HistoryStatusDto status) { return operationsRepository .findById(operationId) diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java index 8457949cd..e817e3fd5 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java @@ -34,7 +34,7 @@ class OptimizerDataServiceImplTest { @Autowired TableStatsRepository statsRepository; @Autowired TableStatsHistoryRepository statsHistoryRepository; - // --- completeOperation --- + // --- updateOperation --- @Test void completeOperation_writesHistoryFromOperationRow() { @@ -54,7 +54,7 @@ void completeOperation_writesHistoryFromOperationRow() { .build()); Optional result = - service.completeOperation(operationId, HistoryStatusDto.SUCCESS); + service.updateOperation(operationId, HistoryStatusDto.SUCCESS); assertThat(result).isPresent(); assertThat(result.get().getStatus()).isEqualTo(HistoryStatusDto.SUCCESS); @@ -67,7 +67,7 @@ void completeOperation_writesHistoryFromOperationRow() { @Test void completeOperation_notFound_returnsEmpty() { Optional result = - service.completeOperation(UUID.randomUUID().toString(), HistoryStatusDto.FAILED); + service.updateOperation(UUID.randomUUID().toString(), HistoryStatusDto.FAILED); assertThat(result).isEmpty(); } From d65b511d472ff27f08ad12e86d393cf877457c51 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 20 May 2026 17:19:47 -0700 Subject: [PATCH 094/104] refactor(optimizer-repo): unify find/updateBatch with Optional params MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Repo public API now: - find(...) with Optional filters + required Pageable, on all four repos - updateBatch(ids, fromStatus, toStatus, Optional scheduledAt, Optional jobId) — replaces markSchedulingBatch, markScheduledBatch, markPendingBatch - cancel(ids) — replaces cancelDuplicatePendingBatch; deletes by-id with a defensive PENDING-only gate - findLatest(opType, Pageable) — was findLatestPerTable - history.find(tableUuid, Pageable) — was findByTableUuidOrderByCompletedAtDesc Side-effect columns on updateBatch use COALESCE with Optional.empty() → leave-unchanged. scheduledAt is not cleared on SCHEDULING → PENDING revert; status is the source of truth and the watermark is overwritten on the next claim. @Modifying queries get flushAutomatically + clearAutomatically so the L1 cache reflects the change immediately (caught by the unit tests). Spring Data @Query can't share an "IS NULL OR IN :list" pattern (Hibernate expands the list inline and the IS NULL check turns ungrammatical). The find path uses two internal queries dispatched by the default method — one with the ids predicate, one without. Callers (service, analyzer, scheduler) update on opt-2..opt-4 in follow-up commits. Co-Authored-By: Claude Opus 4.7 --- .../TableOperationsHistoryRepository.java | 15 +- .../repository/TableOperationsRepository.java | 181 +++++++---- .../TableStatsHistoryRepository.java | 19 +- .../repository/TableStatsRepository.java | 37 ++- .../TableOperationsHistoryRepositoryTest.java | 8 +- .../TableOperationsRepositoryTest.java | 307 ++++++++++++------ .../TableStatsHistoryRepositoryTest.java | 14 +- .../repository/TableStatsRepositoryTest.java | 17 +- 8 files changed, 396 insertions(+), 202 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java index 5faf349e3..6c08f844a 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepository.java @@ -13,11 +13,14 @@ public interface TableOperationsHistoryRepository extends JpaRepository { /** - * Return history rows for a single {@code tableUuid}, newest first. Used by the service-layer - * {@code getHistory} endpoint. + * Return history rows for a single {@code tableUuid}, newest first. {@code pageable} is required; + * callers pick the row cap (default limit lives in {@code optimizer.repo.default-limit}). */ - List findByTableUuidOrderByCompletedAtDesc( - String tableUuid, Pageable pageable); + @Query( + "SELECT r FROM TableOperationsHistoryRow r " + + "WHERE r.tableUuid = :tableUuid " + + "ORDER BY r.completedAt DESC") + List find(@Param("tableUuid") String tableUuid, Pageable pageable); /** * Return the most-recent history row per {@code (table_uuid, operation_type)}, filtered to a @@ -37,6 +40,6 @@ List findByTableUuidOrderByCompletedAtDesc( + "AND r.completedAt = (" + " SELECT MAX(r2.completedAt) FROM TableOperationsHistoryRow r2 " + " WHERE r2.tableUuid = r.tableUuid AND r2.operationType = r.operationType)") - List findLatestPerTable( - @Param("operationType") OperationType operationType); + List findLatest( + @Param("operationType") OperationType operationType, Pageable pageable); } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java index 513006bf6..e0df2cd21 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepository.java @@ -5,6 +5,8 @@ import com.linkedin.openhouse.optimizer.db.TableOperationsRow; import java.time.Instant; import java.util.List; +import java.util.Optional; +import org.springframework.data.domain.Pageable; import org.springframework.data.jpa.repository.JpaRepository; import org.springframework.data.jpa.repository.Modifying; import org.springframework.data.jpa.repository.Query; @@ -14,90 +16,131 @@ public interface TableOperationsRepository extends JpaRepository { /** - * Return operations matching the given filters. Every parameter is optional — pass {@code null} - * to skip that filter. + * Find operation rows matching the given filters. Every filter is optional ({@link + * Optional#empty()} to skip). {@code pageable} is required; callers pick the row cap (default + * limit lives in {@code optimizer.repo.default-limit}). */ + default List find( + Optional operationType, + Optional status, + Optional tableUuid, + Optional databaseName, + Optional tableName, + Optional scheduledAt, + Optional> ids, + Pageable pageable) { + // List parameters can't share an :ids IS NULL pattern with the IN clause — + // Hibernate expands the list inline and the IS NULL check turns ungrammatical. + // Two internal queries; dispatch by presence. + if (ids.isPresent()) { + return findInternalWithIds( + operationType.orElse(null), + status.orElse(null), + tableUuid.orElse(null), + databaseName.orElse(null), + tableName.orElse(null), + scheduledAt.orElse(null), + ids.get(), + pageable); + } + return findInternal( + operationType.orElse(null), + status.orElse(null), + tableUuid.orElse(null), + databaseName.orElse(null), + tableName.orElse(null), + scheduledAt.orElse(null), + pageable); + } + + /** + * Batch CAS: transition rows from {@code fromStatus} to {@code toStatus} for every id in {@code + * ids} that is still in {@code fromStatus}. Rows in a different status are skipped silently. + * Returns the number of rows transitioned. + * + *

Side-effect columns use COALESCE — {@link Optional#empty()} means "leave unchanged". The + * underlying transitions are: + * + *

    + *
  • PENDING → SCHEDULING: pass {@code scheduledAt = Optional.of(claimedAt)}; the watermark + * lets {@link #find} resolve the precise set of rows this caller claimed. + *
  • SCHEDULING → SCHEDULED: pass {@code jobId = Optional.of(...)}. + *
  • SCHEDULING → PENDING: pass both empty; {@code scheduledAt} stays at the prior claim's + * watermark (overwritten on the next claim) and {@code jobId} stays null. + *
+ */ + default int updateBatch( + List ids, + OperationStatus fromStatus, + OperationStatus toStatus, + Optional scheduledAt, + Optional jobId) { + return updateBatchInternal( + ids, fromStatus, toStatus, scheduledAt.orElse(null), jobId.orElse(null)); + } + + /** + * Delete the specified rows, but only if they are still {@code PENDING}. The status gate is + * defensive — never drop a row another instance has claimed. Returns the number of rows actually + * removed. + */ + @Modifying(flushAutomatically = true, clearAutomatically = true) + @Query( + "DELETE FROM TableOperationsRow r " + + "WHERE r.id IN :ids " + + "AND r.status = com.linkedin.openhouse.optimizer.db.OperationStatus.PENDING") + int cancel(@Param("ids") List ids); + + // ---- Internals. Use the Optional-typed default methods above. ---- + @Query( "SELECT r FROM TableOperationsRow r " + "WHERE (:operationType IS NULL OR r.operationType = :operationType) " + "AND (:status IS NULL OR r.status = :status) " + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid) " + "AND (:databaseName IS NULL OR r.databaseName = :databaseName) " - + "AND (:tableName IS NULL OR r.tableName = :tableName)") - List find( + + "AND (:tableName IS NULL OR r.tableName = :tableName) " + + "AND (:scheduledAt IS NULL OR r.scheduledAt = :scheduledAt)") + List findInternal( @Param("operationType") OperationType operationType, @Param("status") OperationStatus status, @Param("tableUuid") String tableUuid, @Param("databaseName") String databaseName, - @Param("tableName") String tableName); - - /** - * Batch CAS: PENDING → SCHEDULING for every {@code id} still in PENDING. Returns the number of - * rows transitioned. Rows already claimed by another instance are skipped silently; pair this - * call with {@link #findClaimedIds(List, Instant)} (using the same {@code scheduledAt}) to get - * the precise list of rows this caller now owns. - */ - @Modifying - @Query( - "UPDATE TableOperationsRow r " - + "SET r.status = com.linkedin.openhouse.optimizer.db.OperationStatus.SCHEDULING," - + " r.scheduledAt = :scheduledAt " - + "WHERE r.id IN :ids " - + "AND r.status = com.linkedin.openhouse.optimizer.db.OperationStatus.PENDING") - int markSchedulingBatch( - @Param("ids") List ids, @Param("scheduledAt") Instant scheduledAt); - - /** - * Return the subset of {@code ids} that are currently {@code SCHEDULING} with the given {@code - * scheduledAt} watermark. Used after {@link #markSchedulingBatch(List, Instant)} to determine - * which rows this caller actually claimed (vs. rows another instance owns or rows that no longer - * exist). - */ - @Query( - "SELECT r.id FROM TableOperationsRow r " - + "WHERE r.id IN :ids " - + "AND r.status = com.linkedin.openhouse.optimizer.db.OperationStatus.SCHEDULING " - + "AND r.scheduledAt = :scheduledAt") - List findClaimedIds( - @Param("ids") List ids, @Param("scheduledAt") Instant scheduledAt); + @Param("tableName") String tableName, + @Param("scheduledAt") Instant scheduledAt, + Pageable pageable); - /** - * Batch CAS: SCHEDULING → SCHEDULED with the given {@code jobId} for every {@code id} still in - * SCHEDULING. Returns the number of rows transitioned. - */ - @Modifying @Query( - "UPDATE TableOperationsRow r " - + "SET r.status = com.linkedin.openhouse.optimizer.db.OperationStatus.SCHEDULED," - + " r.jobId = :jobId " - + "WHERE r.id IN :ids " - + "AND r.status = com.linkedin.openhouse.optimizer.db.OperationStatus.SCHEDULING") - int markScheduledBatch(@Param("ids") List ids, @Param("jobId") String jobId); + "SELECT r FROM TableOperationsRow r " + + "WHERE (:operationType IS NULL OR r.operationType = :operationType) " + + "AND (:status IS NULL OR r.status = :status) " + + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid) " + + "AND (:databaseName IS NULL OR r.databaseName = :databaseName) " + + "AND (:tableName IS NULL OR r.tableName = :tableName) " + + "AND (:scheduledAt IS NULL OR r.scheduledAt = :scheduledAt) " + + "AND r.id IN :ids") + List findInternalWithIds( + @Param("operationType") OperationType operationType, + @Param("status") OperationStatus status, + @Param("tableUuid") String tableUuid, + @Param("databaseName") String databaseName, + @Param("tableName") String tableName, + @Param("scheduledAt") Instant scheduledAt, + @Param("ids") List ids, + Pageable pageable); - /** - * Batch transition: SCHEDULING → PENDING for every {@code id} still in SCHEDULING. Used by the - * scheduler to release claimed rows when job submission fails so the next pass can retry. Returns - * the number of rows reverted. - */ - @Modifying + @Modifying(flushAutomatically = true, clearAutomatically = true) @Query( "UPDATE TableOperationsRow r " - + "SET r.status = com.linkedin.openhouse.optimizer.db.OperationStatus.PENDING," - + " r.scheduledAt = NULL " + + "SET r.status = :toStatus, " + + " r.scheduledAt = COALESCE(:scheduledAt, r.scheduledAt), " + + " r.jobId = COALESCE(:jobId, r.jobId) " + "WHERE r.id IN :ids " - + "AND r.status = com.linkedin.openhouse.optimizer.db.OperationStatus.SCHEDULING") - int markPendingBatch(@Param("ids") List ids); - - /** - * Batch-delete duplicate PENDING rows for the given operation type, keeping only the IDs in - * {@code keepIds}. Used by the scheduler to deduplicate before claiming. - */ - @Modifying - @Query( - "DELETE FROM TableOperationsRow r " - + "WHERE r.operationType = :operationType " - + "AND r.status = com.linkedin.openhouse.optimizer.db.OperationStatus.PENDING " - + "AND r.id NOT IN :keepIds") - int cancelDuplicatePendingBatch( - @Param("operationType") OperationType operationType, @Param("keepIds") List keepIds); + + "AND r.status = :fromStatus") + int updateBatchInternal( + @Param("ids") List ids, + @Param("fromStatus") OperationStatus fromStatus, + @Param("toStatus") OperationStatus toStatus, + @Param("scheduledAt") Instant scheduledAt, + @Param("jobId") String jobId); } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java index 6f9595275..9b603f265 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepository.java @@ -3,6 +3,7 @@ import com.linkedin.openhouse.optimizer.db.TableStatsHistoryRow; import java.time.Instant; import java.util.List; +import java.util.Optional; import org.springframework.data.domain.Pageable; import org.springframework.data.jpa.repository.JpaRepository; import org.springframework.data.jpa.repository.Query; @@ -12,18 +13,22 @@ public interface TableStatsHistoryRepository extends JpaRepository { /** - * Return history rows for a table, newest first. Pass {@code null} for {@code since} to skip the - * time filter. - * - * @param tableUuid the stable table UUID - * @param since inclusive lower bound on recorded_at; {@code null} to skip - * @param pageable use {@code PageRequest.of(0, limit)} to cap results + * Return history rows for a table, newest first. {@code since} is optional ({@link + * Optional#empty()} to skip the time filter). {@code pageable} is required; callers pick the row + * cap (default limit lives in {@code optimizer.repo.default-limit}). */ + default List find( + String tableUuid, Optional since, Pageable pageable) { + return findInternal(tableUuid, since.orElse(null), pageable); + } + + // ---- Internals. Use the Optional-typed default method above. ---- + @Query( "SELECT r FROM TableStatsHistoryRow r " + "WHERE r.tableUuid = :tableUuid " + "AND (:since IS NULL OR r.recordedAt >= :since) " + "ORDER BY r.recordedAt DESC") - List find( + List findInternal( @Param("tableUuid") String tableUuid, @Param("since") Instant since, Pageable pageable); } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java index dbf1de0ae..1123c0e7a 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepository.java @@ -2,6 +2,8 @@ import com.linkedin.openhouse.optimizer.db.TableStatsRow; import java.util.List; +import java.util.Optional; +import org.springframework.data.domain.Pageable; import org.springframework.data.jpa.repository.JpaRepository; import org.springframework.data.jpa.repository.Query; import org.springframework.data.repository.query.Param; @@ -10,18 +12,18 @@ public interface TableStatsRepository extends JpaRepository { /** - * Return stats rows matching the given filters. Every parameter is optional — pass {@code null} - * to skip that filter. + * Return stats rows matching the given filters. Every filter is optional ({@link + * Optional#empty()} to skip). {@code pageable} is required; callers pick the row cap (default + * limit lives in {@code optimizer.repo.default-limit}). */ - @Query( - "SELECT r FROM TableStatsRow r " - + "WHERE (:databaseName IS NULL OR r.databaseName = :databaseName) " - + "AND (:tableName IS NULL OR r.tableName = :tableName) " - + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid)") - List find( - @Param("databaseName") String databaseName, - @Param("tableName") String tableName, - @Param("tableUuid") String tableUuid); + default List find( + Optional databaseName, + Optional tableName, + Optional tableUuid, + Pageable pageable) { + return findInternal( + databaseName.orElse(null), tableName.orElse(null), tableUuid.orElse(null), pageable); + } /** * Return the distinct {@code database_name} values present in {@code table_stats}. Used by the @@ -30,4 +32,17 @@ List find( */ @Query("SELECT DISTINCT r.databaseName FROM TableStatsRow r") List findDistinctDatabaseNames(); + + // ---- Internals. Use the Optional-typed default methods above. ---- + + @Query( + "SELECT r FROM TableStatsRow r " + + "WHERE (:databaseName IS NULL OR r.databaseName = :databaseName) " + + "AND (:tableName IS NULL OR r.tableName = :tableName) " + + "AND (:tableUuid IS NULL OR r.tableUuid = :tableUuid)") + List findInternal( + @Param("databaseName") String databaseName, + @Param("tableName") String tableName, + @Param("tableUuid") String tableUuid, + Pageable pageable); } diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java index 706ecd877..9f1de0c0c 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsHistoryRepositoryTest.java @@ -52,8 +52,7 @@ void findByTableUuid_returnsRowsNewestFirst() { .status(HistoryStatus.FAILED) .build()); - List rows = - repository.findByTableUuidOrderByCompletedAtDesc(tableUuid, PageRequest.of(0, 10)); + List rows = repository.find(tableUuid, PageRequest.of(0, 10)); assertThat(rows).hasSize(2); assertThat(rows.get(0).getId()).isEqualTo(idNewer); @@ -77,8 +76,7 @@ void findByTableUuid_respectsLimit() { .build()); } - List rows = - repository.findByTableUuidOrderByCompletedAtDesc(tableUuid, PageRequest.of(0, 3)); + List rows = repository.find(tableUuid, PageRequest.of(0, 3)); assertThat(rows).hasSize(3); } @@ -121,7 +119,7 @@ void findLatestPerTable_returnsOneRowPerTableUuid() { .build()); List latest = - repository.findLatestPerTable(OperationType.ORPHAN_FILES_DELETION); + repository.findLatest(OperationType.ORPHAN_FILES_DELETION, PageRequest.of(0, 10_000)); assertThat(latest).hasSize(2); TableOperationsHistoryRow forTarget = diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java index bfe3fc437..8f46af1bf 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java @@ -9,9 +9,12 @@ import java.util.List; import java.util.Optional; import java.util.UUID; +import java.util.stream.Collectors; import org.junit.jupiter.api.Test; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.data.domain.PageRequest; +import org.springframework.data.domain.Pageable; import org.springframework.test.context.ActiveProfiles; import org.springframework.transaction.annotation.Transactional; @@ -20,24 +23,15 @@ @Transactional class TableOperationsRepositoryTest { + private static final Pageable PAGE = PageRequest.of(0, 10_000); + @Autowired TableOperationsRepository repository; @Test void saveAndFindById() { String id = UUID.randomUUID().toString(); - TableOperationsRow row = - TableOperationsRow.builder() - .id(id) - .tableUuid(UUID.randomUUID().toString()) - .databaseName("db1") - .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION) - .status(OperationStatus.PENDING) - .createdAt(Instant.now()) - .build(); - - repository.save(row); + repository.save(pendingRow(id, "tbl1")); Optional found = repository.findById(id); assertThat(found).isPresent(); @@ -45,74 +39,103 @@ void saveAndFindById() { } @Test - void find_noParams_returnsAll() { - repository.save( - TableOperationsRow.builder() - .id(UUID.randomUUID().toString()) - .tableUuid(UUID.randomUUID().toString()) - .databaseName("db1") - .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION) - .status(OperationStatus.PENDING) - .createdAt(Instant.now()) - .build()); - repository.save( - TableOperationsRow.builder() - .id(UUID.randomUUID().toString()) - .tableUuid(UUID.randomUUID().toString()) - .databaseName("db1") - .tableName("tbl2") - .operationType(OperationType.ORPHAN_FILES_DELETION) - .status(OperationStatus.SCHEDULED) - .createdAt(Instant.now()) - .build()); + void find_noFilters_returnsAll() { + repository.save(pendingRow(UUID.randomUUID().toString(), "tbl1")); + repository.save(scheduledRow(UUID.randomUUID().toString(), "tbl2")); - List rows = repository.find(null, null, null, null, null); + List rows = + repository.find( + Optional.empty(), + Optional.empty(), + Optional.empty(), + Optional.empty(), + Optional.empty(), + Optional.empty(), + Optional.empty(), + PAGE); assertThat(rows).hasSize(2); } @Test void find_byStatus() { - repository.save( - TableOperationsRow.builder() - .id(UUID.randomUUID().toString()) - .tableUuid(UUID.randomUUID().toString()) - .databaseName("db1") - .tableName("tbl1") - .operationType(OperationType.ORPHAN_FILES_DELETION) - .status(OperationStatus.PENDING) - .createdAt(Instant.now()) - .build()); - repository.save( - TableOperationsRow.builder() - .id(UUID.randomUUID().toString()) - .tableUuid(UUID.randomUUID().toString()) - .databaseName("db1") - .tableName("tbl2") - .operationType(OperationType.ORPHAN_FILES_DELETION) - .status(OperationStatus.SCHEDULED) - .createdAt(Instant.now()) - .build()); + repository.save(pendingRow(UUID.randomUUID().toString(), "tbl1")); + repository.save(scheduledRow(UUID.randomUUID().toString(), "tbl2")); List pending = - repository.find(null, OperationStatus.PENDING, null, null, null); + repository.find( + Optional.empty(), + Optional.of(OperationStatus.PENDING), + Optional.empty(), + Optional.empty(), + Optional.empty(), + Optional.empty(), + Optional.empty(), + PAGE); assertThat(pending).hasSize(1); assertThat(pending.get(0).getStatus()).isEqualTo(OperationStatus.PENDING); List scheduled = - repository.find(null, OperationStatus.SCHEDULED, null, null, null); + repository.find( + Optional.empty(), + Optional.of(OperationStatus.SCHEDULED), + Optional.empty(), + Optional.empty(), + Optional.empty(), + Optional.empty(), + Optional.empty(), + PAGE); assertThat(scheduled).hasSize(1); assertThat(scheduled.get(0).getStatus()).isEqualTo(OperationStatus.SCHEDULED); } @Test - void findClaimedIds_returnsOnlyClaimedSubset() { + void find_byDatabaseAndTable() { + repository.save(pendingRow(UUID.randomUUID().toString(), "tbl1", "db1")); + repository.save(pendingRow(UUID.randomUUID().toString(), "tbl2", "db2")); + + assertThat( + repository.find( + Optional.empty(), + Optional.empty(), + Optional.empty(), + Optional.of("db1"), + Optional.empty(), + Optional.empty(), + Optional.empty(), + PAGE)) + .hasSize(1); + assertThat( + repository.find( + Optional.empty(), + Optional.empty(), + Optional.empty(), + Optional.of("db2"), + Optional.of("tbl2"), + Optional.empty(), + Optional.empty(), + PAGE)) + .hasSize(1); + assertThat( + repository.find( + Optional.empty(), + Optional.empty(), + Optional.empty(), + Optional.of("db1"), + Optional.of("tbl2"), + Optional.empty(), + Optional.empty(), + PAGE)) + .isEmpty(); + } + + @Test + void find_byScheduledAtAndIds_resolvesClaimedSubset() { String idA = UUID.randomUUID().toString(); String idB = UUID.randomUUID().toString(); String idC = UUID.randomUUID().toString(); - repository.save(pending(idA)); - repository.save(pending(idB)); - // idC is already SCHEDULING with a different scheduledAt — must NOT appear. + repository.save(pendingRow(idA, "tbl_a")); + repository.save(pendingRow(idB, "tbl_b")); + // idC is already SCHEDULING with an older watermark — must NOT appear. repository.save( TableOperationsRow.builder() .id(idC) @@ -126,68 +149,160 @@ void findClaimedIds_returnsOnlyClaimedSubset() { .build()); Instant now = Instant.now(); - repository.markSchedulingBatch(List.of(idA, idB, idC), now); + int transitioned = + repository.updateBatch( + List.of(idA, idB, idC), + OperationStatus.PENDING, + OperationStatus.SCHEDULING, + Optional.of(now), + Optional.empty()); + assertThat(transitioned).isEqualTo(2); - List claimed = repository.findClaimedIds(List.of(idA, idB, idC), now); - assertThat(claimed).containsExactlyInAnyOrder(idA, idB); + List claimedIds = + repository + .find( + Optional.empty(), + Optional.of(OperationStatus.SCHEDULING), + Optional.empty(), + Optional.empty(), + Optional.empty(), + Optional.of(now), + Optional.of(List.of(idA, idB, idC)), + PAGE) + .stream() + .map(TableOperationsRow::getId) + .collect(Collectors.toList()); + assertThat(claimedIds).containsExactlyInAnyOrder(idA, idB); } @Test - void findClaimedIds_emptyWhenNothingClaimed() { + void updateBatch_schedulingToScheduled_setsJobIdAndPreservesScheduledAt() { String id = UUID.randomUUID().toString(); + Instant claimedAt = Instant.parse("2026-05-20T16:42:43Z"); repository.save( TableOperationsRow.builder() .id(id) .tableUuid(UUID.randomUUID().toString()) .databaseName("db1") - .tableName("tbl_x") + .tableName("tbl1") .operationType(OperationType.ORPHAN_FILES_DELETION) - .status(OperationStatus.SCHEDULED) + .status(OperationStatus.SCHEDULING) .createdAt(Instant.now()) - .scheduledAt(Instant.now()) + .scheduledAt(claimedAt) .build()); - List claimed = repository.findClaimedIds(List.of(id), Instant.now()); - assertThat(claimed).isEmpty(); - } + int updated = + repository.updateBatch( + List.of(id), + OperationStatus.SCHEDULING, + OperationStatus.SCHEDULED, + Optional.empty(), + Optional.of("job-123")); + assertThat(updated).isEqualTo(1); - private TableOperationsRow pending(String id) { - return TableOperationsRow.builder() - .id(id) - .tableUuid(UUID.randomUUID().toString()) - .databaseName("db1") - .tableName("tbl_" + id) - .operationType(OperationType.ORPHAN_FILES_DELETION) - .status(OperationStatus.PENDING) - .createdAt(Instant.now()) - .build(); + TableOperationsRow row = repository.findById(id).orElseThrow(); + assertThat(row.getStatus()).isEqualTo(OperationStatus.SCHEDULED); + assertThat(row.getJobId()).isEqualTo("job-123"); + assertThat(row.getScheduledAt()).isEqualTo(claimedAt); } @Test - void find_byDatabaseAndTable() { + void updateBatch_schedulingToPending_leavesScheduledAtUntouched() { + // scheduledAt is intentionally NOT cleared on revert. Status is the source of truth; the + // stale watermark gets overwritten on the next PENDING → SCHEDULING transition. + String id = UUID.randomUUID().toString(); + Instant claimedAt = Instant.parse("2026-05-20T16:42:43Z"); repository.save( TableOperationsRow.builder() - .id(UUID.randomUUID().toString()) + .id(id) .tableUuid(UUID.randomUUID().toString()) .databaseName("db1") .tableName("tbl1") .operationType(OperationType.ORPHAN_FILES_DELETION) - .status(OperationStatus.PENDING) - .createdAt(Instant.now()) - .build()); - repository.save( - TableOperationsRow.builder() - .id(UUID.randomUUID().toString()) - .tableUuid(UUID.randomUUID().toString()) - .databaseName("db2") - .tableName("tbl2") - .operationType(OperationType.ORPHAN_FILES_DELETION) - .status(OperationStatus.PENDING) + .status(OperationStatus.SCHEDULING) .createdAt(Instant.now()) + .scheduledAt(claimedAt) .build()); - assertThat(repository.find(null, null, null, "db1", null)).hasSize(1); - assertThat(repository.find(null, null, null, "db2", "tbl2")).hasSize(1); - assertThat(repository.find(null, null, null, "db1", "tbl2")).isEmpty(); + int reverted = + repository.updateBatch( + List.of(id), + OperationStatus.SCHEDULING, + OperationStatus.PENDING, + Optional.empty(), + Optional.empty()); + assertThat(reverted).isEqualTo(1); + + TableOperationsRow row = repository.findById(id).orElseThrow(); + assertThat(row.getStatus()).isEqualTo(OperationStatus.PENDING); + assertThat(row.getScheduledAt()).isEqualTo(claimedAt); + } + + @Test + void updateBatch_skipsRowsNotInFromStatus() { + String pendingId = UUID.randomUUID().toString(); + String scheduledId = UUID.randomUUID().toString(); + repository.save(pendingRow(pendingId, "tbl_a")); + repository.save(scheduledRow(scheduledId, "tbl_b")); + + int transitioned = + repository.updateBatch( + List.of(pendingId, scheduledId), + OperationStatus.PENDING, + OperationStatus.SCHEDULING, + Optional.of(Instant.now()), + Optional.empty()); + assertThat(transitioned).isEqualTo(1); + + assertThat(repository.findById(pendingId).orElseThrow().getStatus()) + .isEqualTo(OperationStatus.SCHEDULING); + assertThat(repository.findById(scheduledId).orElseThrow().getStatus()) + .isEqualTo(OperationStatus.SCHEDULED); + } + + @Test + void cancel_deletesOnlyPendingRows() { + String pendingId = UUID.randomUUID().toString(); + String scheduledId = UUID.randomUUID().toString(); + repository.save(pendingRow(pendingId, "tbl_p")); + repository.save(scheduledRow(scheduledId, "tbl_s")); + + int deleted = repository.cancel(List.of(pendingId, scheduledId)); + assertThat(deleted).isEqualTo(1); + + assertThat(repository.findById(pendingId)).isEmpty(); + assertThat(repository.findById(scheduledId)).isPresent(); + } + + // --- helpers --- + + private TableOperationsRow pendingRow(String id, String tableName) { + return pendingRow(id, tableName, "db1"); + } + + private TableOperationsRow pendingRow(String id, String tableName, String databaseName) { + return TableOperationsRow.builder() + .id(id) + .tableUuid(UUID.randomUUID().toString()) + .databaseName(databaseName) + .tableName(tableName) + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(OperationStatus.PENDING) + .createdAt(Instant.now()) + .build(); + } + + private TableOperationsRow scheduledRow(String id, String tableName) { + return TableOperationsRow.builder() + .id(id) + .tableUuid(UUID.randomUUID().toString()) + .databaseName("db1") + .tableName(tableName) + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(OperationStatus.SCHEDULED) + .createdAt(Instant.now()) + .scheduledAt(Instant.now()) + .jobId("job-" + id) + .build(); } } diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java index 536b72e35..cddec50c9 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsHistoryRepositoryTest.java @@ -8,6 +8,7 @@ import java.time.Instant; import java.time.temporal.ChronoUnit; import java.util.List; +import java.util.Optional; import java.util.UUID; import org.junit.jupiter.api.Test; import org.springframework.beans.factory.annotation.Autowired; @@ -32,7 +33,8 @@ void saveAndFind() { repository.save(buildRow(tableUuid, "db1", "tbl1", 5L, 1L, now.minus(1, ChronoUnit.HOURS))); repository.save(buildRow(tableUuid, "db1", "tbl1", 3L, 0L, now)); - List rows = repository.find(tableUuid, null, PageRequest.of(0, 100)); + List rows = + repository.find(tableUuid, Optional.empty(), PageRequest.of(0, 100)); assertThat(rows).hasSize(3); // newest first @@ -49,7 +51,8 @@ void find_respectsLimit() { repository.save(buildRow(tableUuid, "db1", "tbl1", i, 0L, now.minus(i, ChronoUnit.HOURS))); } - List rows = repository.find(tableUuid, null, PageRequest.of(0, 3)); + List rows = + repository.find(tableUuid, Optional.empty(), PageRequest.of(0, 3)); assertThat(rows).hasSize(3); } @@ -64,7 +67,8 @@ void find_withSince_filtersOlderRows() { repository.save(buildRow(tableUuid, "db1", "tbl1", 5L, 1L, now.minus(1, ChronoUnit.HOURS))); repository.save(buildRow(tableUuid, "db1", "tbl1", 3L, 0L, now)); - List rows = repository.find(tableUuid, cutoff, PageRequest.of(0, 100)); + List rows = + repository.find(tableUuid, Optional.of(cutoff), PageRequest.of(0, 100)); // only the 2 rows within the last 90 minutes assertThat(rows).hasSize(2); @@ -80,8 +84,8 @@ void find_isolatesByTableUuid() { repository.save(buildRow(uuid1, "db1", "tbl1", 10L, 0L, now)); repository.save(buildRow(uuid2, "db2", "tbl2", 20L, 0L, now)); - assertThat(repository.find(uuid1, null, PageRequest.of(0, 100))).hasSize(1); - assertThat(repository.find(uuid2, null, PageRequest.of(0, 100))).hasSize(1); + assertThat(repository.find(uuid1, Optional.empty(), PageRequest.of(0, 100))).hasSize(1); + assertThat(repository.find(uuid2, Optional.empty(), PageRequest.of(0, 100))).hasSize(1); } @Test diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java index f9cc28d57..e73ac0cb4 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableStatsRepositoryTest.java @@ -11,6 +11,8 @@ import org.junit.jupiter.api.Test; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.data.domain.PageRequest; +import org.springframework.data.domain.Pageable; import org.springframework.test.context.ActiveProfiles; import org.springframework.transaction.annotation.Transactional; @@ -19,6 +21,8 @@ @Transactional class TableStatsRepositoryTest { + private static final Pageable PAGE = PageRequest.of(0, 10_000); + @Autowired TableStatsRepository repository; @Test @@ -90,7 +94,8 @@ void find_noParams_returnsAll() { .updatedAt(Instant.now()) .build()); - assertThat(repository.find(null, null, null)).hasSize(2); + assertThat(repository.find(Optional.empty(), Optional.empty(), Optional.empty(), PAGE)) + .hasSize(2); } @Test @@ -112,7 +117,13 @@ void find_byDatabase() { .updatedAt(Instant.now()) .build()); - assertThat(repository.find("db1", null, null)).hasSize(1); - assertThat(repository.find("db1", null, null).get(0).getDatabaseName()).isEqualTo("db1"); + assertThat(repository.find(Optional.of("db1"), Optional.empty(), Optional.empty(), PAGE)) + .hasSize(1); + assertThat( + repository + .find(Optional.of("db1"), Optional.empty(), Optional.empty(), PAGE) + .get(0) + .getDatabaseName()) + .isEqualTo("db1"); } } From 49e43bc5aeb063e734b062c4223c2fd247fa1ef6 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 20 May 2026 17:22:27 -0700 Subject: [PATCH 095/104] refactor(optimizer-service): use Optional repo API + configurable limit OptimizerDataServiceImpl pipes Optional filters straight through to the repo (no .orElse(null) at the boundary). Adds the optimizer.repo.default-limit config property and threads it into the list-shaped calls. Service-impl test updates the one direct statsHistoryRepository.find(...) call to pass Optional.empty(). Co-Authored-By: Claude Opus 4.7 --- .../service/OptimizerDataServiceImpl.java | 27 +++++++++++-------- .../src/main/resources/application.properties | 2 ++ .../service/OptimizerDataServiceImplTest.java | 2 +- 3 files changed, 19 insertions(+), 12 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java index 1ca9c7777..4f820e1b8 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java @@ -19,6 +19,7 @@ import java.util.UUID; import java.util.stream.Collectors; import lombok.RequiredArgsConstructor; +import org.springframework.beans.factory.annotation.Value; import org.springframework.data.domain.PageRequest; import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Transactional; @@ -39,6 +40,9 @@ public class OptimizerDataServiceImpl implements OptimizerDataService { private final TableStatsRepository statsRepository; private final TableStatsHistoryRepository statsHistoryRepository; + @Value("${optimizer.repo.default-limit:10000}") + private int defaultLimit; + // --- TableOperations --- @Override @@ -50,11 +54,14 @@ public List listTableOperations( Optional tableUuid) { return operationsRepository .find( - operationType.map(OperationTypeDto::toDb).orElse(null), - status.map(OperationStatusDto::toDb).orElse(null), - tableUuid.orElse(null), - databaseName.orElse(null), - tableName.orElse(null)) + operationType.map(OperationTypeDto::toDb), + status.map(OperationStatusDto::toDb), + tableUuid, + databaseName, + tableName, + Optional.empty(), + Optional.empty(), + PageRequest.of(0, defaultLimit)) .stream() .map(TableOperationDto::fromRow) .collect(Collectors.toList()); @@ -131,8 +138,8 @@ public Optional getTableStats(String tableUuid) { @Override public List listTableStats( Optional databaseName, Optional tableName, Optional tableUuid) { - return statsRepository - .find(databaseName.orElse(null), tableName.orElse(null), tableUuid.orElse(null)).stream() + return statsRepository.find(databaseName, tableName, tableUuid, PageRequest.of(0, defaultLimit)) + .stream() .map(TableStatsDto::fromRow) .collect(Collectors.toList()); } @@ -140,8 +147,7 @@ public List listTableStats( @Override public List getStatsHistory( String tableUuid, Optional since, int limit) { - return statsHistoryRepository.find(tableUuid, since.orElse(null), PageRequest.of(0, limit)) - .stream() + return statsHistoryRepository.find(tableUuid, since, PageRequest.of(0, limit)).stream() .map(TableStatsHistoryDto::fromRow) .collect(Collectors.toList()); } @@ -162,8 +168,7 @@ public TableOperationsHistoryDto appendHistory(TableOperationsHistoryDto history @Override public List getHistory(String tableUuid, int limit) { - return historyRepository - .findByTableUuidOrderByCompletedAtDesc(tableUuid, PageRequest.of(0, limit)).stream() + return historyRepository.find(tableUuid, PageRequest.of(0, limit)).stream() .map(TableOperationsHistoryDto::fromRow) .collect(Collectors.toList()); } diff --git a/services/optimizer/src/main/resources/application.properties b/services/optimizer/src/main/resources/application.properties index e78745d00..1b7eb1a40 100644 --- a/services/optimizer/src/main/resources/application.properties +++ b/services/optimizer/src/main/resources/application.properties @@ -16,5 +16,7 @@ spring.datasource.username=${OPTIMIZER_DB_USER:oh_user} spring.datasource.password=${OPTIMIZER_DB_PASSWORD:oh_password} spring.datasource.hikari.maximum-pool-size=20 +optimizer.repo.default-limit=10000 + management.endpoints.web.exposure.include=health,prometheus management.endpoint.health.enabled=true diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java index e817e3fd5..8db14c4d6 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java @@ -126,7 +126,7 @@ void upsertTableStats_updatesExistingRow_andAppendsHistory() { assertThat(statsRepository.findAll()).hasSize(1); List history = - statsHistoryRepository.find(tableUuid, null, PageRequest.of(0, 100)); + statsHistoryRepository.find(tableUuid, Optional.empty(), PageRequest.of(0, 100)); assertThat(history).hasSize(2); assertThat(history.get(0).getDelta().getNumFilesAdded()).isEqualTo(3L); assertThat(history.get(1).getDelta().getNumFilesAdded()).isEqualTo(5L); From 040046e0ee31cc6a1c40c20f50e971d110256ee7 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 20 May 2026 17:25:33 -0700 Subject: [PATCH 096/104] refactor(analyzer): switch to Optional repo API + configurable limit AnalyzerRunner now passes Optional filters to the repos and a PageRequest backed by optimizer.repo.default-limit. findLatestPerTable becomes findLatest with a Pageable. Test stubs updated to use ArgumentMatchers (eq/any) for the Optional-typed params. defaultLimit is initialised inline so pure-Mockito tests (no Spring context) get a sane value when @Value isn't injected. Co-Authored-By: Claude Opus 4.7 --- .../openhouse/analyzer/AnalyzerRunner.java | 23 ++++-- .../src/main/resources/application.properties | 1 + .../analyzer/AnalyzerRunnerTest.java | 81 +++++++++++++++---- 3 files changed, 83 insertions(+), 22 deletions(-) diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java index e9a7f69b8..313e41514 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerRunner.java @@ -15,6 +15,8 @@ import java.util.stream.Collectors; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.data.domain.PageRequest; import org.springframework.stereotype.Component; /** @@ -40,6 +42,9 @@ public class AnalyzerRunner { private final TableOperationsRepository operationsRepo; private final TableOperationsHistoryRepository historyRepo; + @Value("${optimizer.repo.default-limit:10000}") + private int defaultLimit = 10_000; + /** * Run the analysis loop for {@code operationType} across all databases, with no filters. * Equivalent to {@link #analyze(OperationTypeDto, Optional, Optional, Optional)} with all-empty @@ -83,14 +88,18 @@ private void analyzeDatabase( analyzer.getOperationType().toDb(); // Pre-load the small sides of the joins — bounded by tables in this database. - // TODO(query-builder): the JPQL optional-filter shape used by these find(...) calls gets - // unwieldy as the filter count grows. Migrate to Criteria API or jOOQ once the scaffolding - // stabilizes — applies to operationsRepo.find, historyRepo.findLatestPerTable, and - // statsRepo.find below. + PageRequest page = PageRequest.of(0, defaultLimit); Map currentOps = operationsRepo .find( - dbOperationType, null, tableUuid.orElse(null), databaseName, tableName.orElse(null)) + Optional.of(dbOperationType), + Optional.empty(), + tableUuid, + Optional.of(databaseName), + tableName, + Optional.empty(), + Optional.empty(), + page) .stream() .filter(e -> e.getTableUuid() != null) .map(TableOperationDto::fromRow) @@ -99,7 +108,7 @@ private void analyzeDatabase( TableOperationDto::getTableUuid, op -> op, TableOperationDto::mostRecent)); Map latestHistory = - historyRepo.findLatestPerTable(dbOperationType).stream() + historyRepo.findLatest(dbOperationType, page).stream() .filter(r -> r.getTableUuid() != null) .map(TableOperationsHistoryDto::fromRow) .collect( @@ -109,7 +118,7 @@ private void analyzeDatabase( AnalyzerRunner::moreRecentHistory)); List tables = - statsRepo.find(databaseName, tableName.orElse(null), tableUuid.orElse(null)).stream() + statsRepo.find(Optional.of(databaseName), tableName, tableUuid, page).stream() .filter(row -> row.getTableUuid() != null) .map(TableDto::fromRow) .collect(Collectors.toList()); diff --git a/apps/optimizer-analyzer/src/main/resources/application.properties b/apps/optimizer-analyzer/src/main/resources/application.properties index 1df0bea15..4ee825c55 100644 --- a/apps/optimizer-analyzer/src/main/resources/application.properties +++ b/apps/optimizer-analyzer/src/main/resources/application.properties @@ -6,3 +6,4 @@ spring.datasource.password=${OPTIMIZER_DB_PASSWORD:} spring.jpa.hibernate.ddl-auto=none ofd.success-retry-hours=24 ofd.failure-retry-hours=1 +optimizer.repo.default-limit=10000 diff --git a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java index 450ae3e13..546279d64 100644 --- a/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java +++ b/apps/optimizer-analyzer/src/test/java/com/linkedin/openhouse/analyzer/AnalyzerRunnerTest.java @@ -2,6 +2,7 @@ import static org.assertj.core.api.Assertions.assertThat; import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.never; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; @@ -54,9 +55,19 @@ void analyze_insertsNewRow_forEligibleTableWithNoExistingOp() { TableDto expectedTable = TableDto.fromRow(statsEntity); - when(statsRepo.find(DB, null, null)).thenReturn(List.of(statsEntity)); - when(operationsRepo.find(OFD_DB, null, null, DB, null)).thenReturn(Collections.emptyList()); - when(historyRepo.findLatestPerTable(OFD_DB)).thenReturn(Collections.emptyList()); + when(statsRepo.find(eq(Optional.of(DB)), eq(Optional.empty()), eq(Optional.empty()), any())) + .thenReturn(List.of(statsEntity)); + when(operationsRepo.find( + eq(Optional.of(OFD_DB)), + eq(Optional.empty()), + eq(Optional.empty()), + eq(Optional.of(DB)), + eq(Optional.empty()), + eq(Optional.empty()), + eq(Optional.empty()), + any())) + .thenReturn(Collections.emptyList()); + when(historyRepo.findLatest(eq(OFD_DB), any())).thenReturn(Collections.emptyList()); when(analyzer.isEnabled(expectedTable)).thenReturn(true); when(analyzer.shouldSchedule(expectedTable, Optional.empty(), Optional.empty())) .thenReturn(true); @@ -91,9 +102,19 @@ void analyze_noOp_whenCadencePolicyReturnsFalseForPending() { .createdAt(Instant.now()) .build(); - when(statsRepo.find(DB, null, null)).thenReturn(List.of(statsEntity)); - when(operationsRepo.find(OFD_DB, null, null, DB, null)).thenReturn(List.of(existingEntity)); - when(historyRepo.findLatestPerTable(OFD_DB)).thenReturn(Collections.emptyList()); + when(statsRepo.find(eq(Optional.of(DB)), eq(Optional.empty()), eq(Optional.empty()), any())) + .thenReturn(List.of(statsEntity)); + when(operationsRepo.find( + eq(Optional.of(OFD_DB)), + eq(Optional.empty()), + eq(Optional.empty()), + eq(Optional.of(DB)), + eq(Optional.empty()), + eq(Optional.empty()), + eq(Optional.empty()), + any())) + .thenReturn(List.of(existingEntity)); + when(historyRepo.findLatest(eq(OFD_DB), any())).thenReturn(Collections.emptyList()); when(analyzer.isEnabled(expectedTable)).thenReturn(true); TableOperationDto existingOp = TableOperationDto.fromRow(existingEntity); @@ -112,9 +133,19 @@ void analyze_skipsTable_whenNotEnabled() { TableDto expectedTable = TableDto.fromRow(statsEntity); - when(statsRepo.find(DB, null, null)).thenReturn(List.of(statsEntity)); - when(operationsRepo.find(OFD_DB, null, null, DB, null)).thenReturn(Collections.emptyList()); - when(historyRepo.findLatestPerTable(OFD_DB)).thenReturn(Collections.emptyList()); + when(statsRepo.find(eq(Optional.of(DB)), eq(Optional.empty()), eq(Optional.empty()), any())) + .thenReturn(List.of(statsEntity)); + when(operationsRepo.find( + eq(Optional.of(OFD_DB)), + eq(Optional.empty()), + eq(Optional.empty()), + eq(Optional.of(DB)), + eq(Optional.empty()), + eq(Optional.empty()), + eq(Optional.empty()), + any())) + .thenReturn(Collections.emptyList()); + when(historyRepo.findLatest(eq(OFD_DB), any())).thenReturn(Collections.emptyList()); when(analyzer.isEnabled(expectedTable)).thenReturn(false); runner.analyze(OFD_TYPE); @@ -138,9 +169,19 @@ void analyze_skipsTable_whenShouldScheduleReturnsFalse() { .createdAt(Instant.now()) .build(); - when(statsRepo.find(DB, null, null)).thenReturn(List.of(statsEntity)); - when(operationsRepo.find(OFD_DB, null, null, DB, null)).thenReturn(List.of(scheduled)); - when(historyRepo.findLatestPerTable(OFD_DB)).thenReturn(Collections.emptyList()); + when(statsRepo.find(eq(Optional.of(DB)), eq(Optional.empty()), eq(Optional.empty()), any())) + .thenReturn(List.of(statsEntity)); + when(operationsRepo.find( + eq(Optional.of(OFD_DB)), + eq(Optional.empty()), + eq(Optional.empty()), + eq(Optional.of(DB)), + eq(Optional.empty()), + eq(Optional.empty()), + eq(Optional.empty()), + any())) + .thenReturn(List.of(scheduled)); + when(historyRepo.findLatest(eq(OFD_DB), any())).thenReturn(Collections.emptyList()); when(analyzer.isEnabled(expectedTable)).thenReturn(true); TableOperationDto scheduledOp = TableOperationDto.fromRow(scheduled); @@ -156,9 +197,19 @@ void analyze_skipsTable_whenShouldScheduleReturnsFalse() { void analyze_skipsTable_whenTableUuidIsNull() { TableStatsRow statsEntity = TableStatsRow.builder().databaseName(DB).build(); - when(statsRepo.find(DB, null, null)).thenReturn(List.of(statsEntity)); - when(operationsRepo.find(OFD_DB, null, null, DB, null)).thenReturn(Collections.emptyList()); - when(historyRepo.findLatestPerTable(any())).thenReturn(Collections.emptyList()); + when(statsRepo.find(eq(Optional.of(DB)), eq(Optional.empty()), eq(Optional.empty()), any())) + .thenReturn(List.of(statsEntity)); + when(operationsRepo.find( + eq(Optional.of(OFD_DB)), + eq(Optional.empty()), + eq(Optional.empty()), + eq(Optional.of(DB)), + eq(Optional.empty()), + eq(Optional.empty()), + eq(Optional.empty()), + any())) + .thenReturn(Collections.emptyList()); + when(historyRepo.findLatest(any(), any())).thenReturn(Collections.emptyList()); runner.analyze(OFD_TYPE); From b69e09a511e684e30dc9a5adb1b8e26951c7190e Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 20 May 2026 19:47:22 -0700 Subject: [PATCH 097/104] test(optimizer-repo): truncate Instant to micros for CI precision Instant.now() on Linux CI carries nanoseconds; MySQL TIMESTAMP(6) and H2 in MySQL mode store microseconds. The scheduledAt = :scheduledAt predicate in find(...) compared nano-resolution param against micro-resolution stored value and missed. Local (macOS, micro-only) hid the bug. Truncate to ChronoUnit.MICROS at write time in the one repo test that exercises the watermark round-trip. Co-Authored-By: Claude Opus 4.7 --- .../optimizer/repository/TableOperationsRepositoryTest.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java index 8f46af1bf..072be5fd9 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/repository/TableOperationsRepositoryTest.java @@ -6,6 +6,7 @@ import com.linkedin.openhouse.optimizer.db.OperationType; import com.linkedin.openhouse.optimizer.db.TableOperationsRow; import java.time.Instant; +import java.time.temporal.ChronoUnit; import java.util.List; import java.util.Optional; import java.util.UUID; @@ -148,7 +149,10 @@ void find_byScheduledAtAndIds_resolvesClaimedSubset() { .scheduledAt(Instant.now().minusSeconds(60)) .build()); - Instant now = Instant.now(); + // Truncate to microseconds — MySQL TIMESTAMP(6) (and H2 in MySQL mode) stores microseconds, + // so a nano-precision now() round-trips lossily. On Linux CI Instant.now() carries nanos; + // truncating here keeps the watermark comparison exact across platforms. + Instant now = Instant.now().truncatedTo(ChronoUnit.MICROS); int transitioned = repository.updateBatch( List.of(idA, idB, idC), From a89e037dd41b9096271425099701b5011effb804 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Thu, 21 May 2026 16:36:45 -0700 Subject: [PATCH 098/104] feat(optimizer): require limit on list-API endpoints MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All four list-style endpoints now require a caller-supplied limit. No server-side default, no max — getting the API contract right comes first; bounds can land separately. - TableOperationsController.listTableOperations: @RequestParam int limit - TableStatsController.listTableStats: @RequestParam int limit - TableStatsController.getStatsHistory: drop defaultValue="100" - TableOperationsHistoryController.getHistory: drop defaultValue="100" - OptimizerDataService: listTableOperations / listTableStats gain int limit - OptimizerDataServiceImpl: drop @Value("${optimizer.repo.default-limit}") and the defaultLimit field; thread caller-supplied limit straight to PageRequest.of(0, limit), which cascades to MySQL LIMIT n. - application.properties: remove now-unused optimizer.repo.default-limit. Co-Authored-By: Claude Opus 4.7 --- .../controller/TableOperationsController.java | 10 ++++++---- .../TableOperationsHistoryController.java | 7 +++++-- .../api/controller/TableStatsController.java | 16 +++++++++------- .../optimizer/service/OptimizerDataService.java | 16 ++++++++++------ .../service/OptimizerDataServiceImpl.java | 16 ++++++++-------- .../src/main/resources/application.properties | 2 -- .../service/OptimizerDataServiceImplTest.java | 3 ++- 7 files changed, 40 insertions(+), 30 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java index c28002bf7..5db7d31ed 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java @@ -59,8 +59,8 @@ public ResponseEntity getTableOperation(@PathVariable String id } /** - * List operations matching the given filters. All parameters are optional — omit all to return - * every row. + * List operations matching the given filters, capped at {@code limit} rows. Every filter is + * optional; {@code limit} is required so callers always state how much they want back. */ @GetMapping public ResponseEntity> listTableOperations( @@ -68,7 +68,8 @@ public ResponseEntity> listTableOperations( @RequestParam(required = false) OperationStatus status, @RequestParam(required = false) String databaseName, @RequestParam(required = false) String tableName, - @RequestParam(required = false) String tableUuid) { + @RequestParam(required = false) String tableUuid, + @RequestParam int limit) { List result = service .listTableOperations( @@ -76,7 +77,8 @@ public ResponseEntity> listTableOperations( Optional.ofNullable(status).map(OperationStatus::toModel), Optional.ofNullable(databaseName), Optional.ofNullable(tableName), - Optional.ofNullable(tableUuid)) + Optional.ofNullable(tableUuid), + limit) .stream() .map(TableOperations::fromModel) .collect(Collectors.toList()); diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java index 36c422623..9a1b6d303 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java @@ -31,10 +31,13 @@ public ResponseEntity appendHistory( .body(TableOperationsHistory.fromModel(service.appendHistory(dto.toModel()))); } - /** Return the most recent history for a table, newest first, up to {@code limit} rows. */ + /** + * Return the most recent history for a table, newest first, capped at {@code limit} rows. {@code + * limit} is required. + */ @GetMapping("/{tableUuid}") public ResponseEntity> getHistory( - @PathVariable String tableUuid, @RequestParam(defaultValue = "100") int limit) { + @PathVariable String tableUuid, @RequestParam int limit) { List result = service.getHistory(tableUuid, limit).stream() .map(TableOperationsHistory::fromModel) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java index 7cb745250..049516110 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java @@ -48,20 +48,22 @@ public ResponseEntity getTableStats(@PathVariable String tableUuid) } /** - * List stats rows matching the given filters. All parameters are optional — omit all to return - * every row. + * List stats rows matching the given filters, capped at {@code limit} rows. Every filter is + * optional; {@code limit} is required so callers always state how much they want back. */ @GetMapping public ResponseEntity> listTableStats( @RequestParam(required = false) String databaseName, @RequestParam(required = false) String tableName, - @RequestParam(required = false) String tableUuid) { + @RequestParam(required = false) String tableUuid, + @RequestParam int limit) { List result = service .listTableStats( Optional.ofNullable(databaseName), Optional.ofNullable(tableName), - Optional.ofNullable(tableUuid)) + Optional.ofNullable(tableUuid), + limit) .stream() .map(TableStats::fromModel) .collect(Collectors.toList()); @@ -69,14 +71,14 @@ public ResponseEntity> listTableStats( } /** - * Return per-commit stats history for {@code tableUuid}, newest first. Optionally filter by - * {@code since} (inclusive) and cap at {@code limit} rows. + * Return per-commit stats history for {@code tableUuid}, newest first, capped at {@code limit} + * rows. Optional {@code since} filter (inclusive). {@code limit} is required. */ @GetMapping("/{tableUuid}/history") public ResponseEntity> getStatsHistory( @PathVariable String tableUuid, @RequestParam(required = false) Instant since, - @RequestParam(defaultValue = "100") int limit) { + @RequestParam int limit) { List result = service.getStatsHistory(tableUuid, Optional.ofNullable(since), limit).stream() .map(TableStatsHistory::fromModel) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java index 0529d3608..c20ae7bf2 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java @@ -23,15 +23,16 @@ public interface OptimizerDataService { // --- TableOperations --- /** - * List operations matching the given filters. Every parameter is optional — pass {@link - * Optional#empty()} to skip that filter. No filters returns all rows. + * List operations matching the given filters, capped at {@code limit} rows. Every filter + * parameter is optional — pass {@link Optional#empty()} to skip that filter. */ List listTableOperations( Optional operationType, Optional status, Optional databaseName, Optional tableName, - Optional tableUuid); + Optional tableUuid, + int limit); /** * Update an operation by writing a history entry. Looks up the operation row by {@code @@ -60,11 +61,14 @@ List listTableOperations( Optional getTableStats(String tableUuid); /** - * List stats rows matching the given filters. Every parameter is optional — pass {@link - * Optional#empty()} to skip that filter. No filters returns all rows. + * List stats rows matching the given filters, capped at {@code limit} rows. Every filter + * parameter is optional — pass {@link Optional#empty()} to skip that filter. */ List listTableStats( - Optional databaseName, Optional tableName, Optional tableUuid); + Optional databaseName, + Optional tableName, + Optional tableUuid, + int limit); /** * Return per-commit stats history for {@code tableUuid}, newest first. diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java index 4f820e1b8..29fd0eeee 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java @@ -19,7 +19,6 @@ import java.util.UUID; import java.util.stream.Collectors; import lombok.RequiredArgsConstructor; -import org.springframework.beans.factory.annotation.Value; import org.springframework.data.domain.PageRequest; import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Transactional; @@ -40,9 +39,6 @@ public class OptimizerDataServiceImpl implements OptimizerDataService { private final TableStatsRepository statsRepository; private final TableStatsHistoryRepository statsHistoryRepository; - @Value("${optimizer.repo.default-limit:10000}") - private int defaultLimit; - // --- TableOperations --- @Override @@ -51,7 +47,8 @@ public List listTableOperations( Optional status, Optional databaseName, Optional tableName, - Optional tableUuid) { + Optional tableUuid, + int limit) { return operationsRepository .find( operationType.map(OperationTypeDto::toDb), @@ -61,7 +58,7 @@ public List listTableOperations( tableName, Optional.empty(), Optional.empty(), - PageRequest.of(0, defaultLimit)) + PageRequest.of(0, limit)) .stream() .map(TableOperationDto::fromRow) .collect(Collectors.toList()); @@ -137,8 +134,11 @@ public Optional getTableStats(String tableUuid) { @Override public List listTableStats( - Optional databaseName, Optional tableName, Optional tableUuid) { - return statsRepository.find(databaseName, tableName, tableUuid, PageRequest.of(0, defaultLimit)) + Optional databaseName, + Optional tableName, + Optional tableUuid, + int limit) { + return statsRepository.find(databaseName, tableName, tableUuid, PageRequest.of(0, limit)) .stream() .map(TableStatsDto::fromRow) .collect(Collectors.toList()); diff --git a/services/optimizer/src/main/resources/application.properties b/services/optimizer/src/main/resources/application.properties index 1b7eb1a40..e78745d00 100644 --- a/services/optimizer/src/main/resources/application.properties +++ b/services/optimizer/src/main/resources/application.properties @@ -16,7 +16,5 @@ spring.datasource.username=${OPTIMIZER_DB_USER:oh_user} spring.datasource.password=${OPTIMIZER_DB_PASSWORD:oh_password} spring.datasource.hikari.maximum-pool-size=20 -optimizer.repo.default-limit=10000 - management.endpoints.web.exposure.include=health,prometheus management.endpoint.health.enabled=true diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java index 8db14c4d6..2a3c1e676 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java @@ -165,7 +165,8 @@ void listTableOperations_filtersByOperationTypeAndStatus() { Optional.of(OperationStatusDto.PENDING), Optional.empty(), Optional.empty(), - Optional.empty())) + Optional.empty(), + 100)) .extracting(op -> op.getId()) .containsExactly(pendingId); } From 1e361afc3647c4b4570f828daeba73491ba0647c Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Fri, 22 May 2026 10:11:53 -0700 Subject: [PATCH 099/104] feat(optimizer): basic error-code handling across controllers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Every optimizer endpoint now returns a uniform ApiError body ({code, message, path}) on non-2xx. Reshape updateOperation so the operationId lives in the URL (and is repeated in the body for self-describing payloads). - api/error/ApiError.java — DTO shape. - api/error/GlobalExceptionHandler.java — @RestControllerAdvice mapping framework exceptions to {VALIDATION_ERROR, INVALID_PARAMETER, MISSING_PARAMETER, MALFORMED_REQUEST, INTERNAL_ERROR}. Reason of ResponseStatusException is parsed as "CODE: message" for endpoint- specific 404s. - Controllers: orElseThrow(ResponseStatusException) replaces bare 404. TableOperationsController moves updateOperation to POST /v1/optimizer/operations/{id}/update; rejects with 400 PATH_BODY_MISMATCH when body.operationId != path.id. - UpdateOperationRequest: @NotBlank operationId, @NotNull status. - UpsertTableStatsRequest: @NotBlank databaseName, tableName. - spring-boot-starter-validation dep added. - New ControllerErrorHandlingTest: 13 MockMvc cases covering 404 / 400 validation / 400 type-mismatch / 400 missing-param / 400 malformed- body / 400 path-body-mismatch + happy-path sanity. Co-Authored-By: Claude Opus 4.7 --- services/optimizer/build.gradle | 1 + .../controller/TableOperationsController.java | 39 +++- .../TableOperationsHistoryController.java | 3 +- .../api/controller/TableStatsController.java | 11 +- .../optimizer/api/error/ApiError.java | 27 +++ .../api/error/GlobalExceptionHandler.java | 102 +++++++++ .../api/spec/UpdateOperationRequest.java | 15 +- .../api/spec/UpsertTableStatsRequest.java | 9 +- .../ControllerErrorHandlingTest.java | 216 ++++++++++++++++++ 9 files changed, 401 insertions(+), 22 deletions(-) create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/error/ApiError.java create mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/error/GlobalExceptionHandler.java create mode 100644 services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/api/controller/ControllerErrorHandlingTest.java diff --git a/services/optimizer/build.gradle b/services/optimizer/build.gradle index c05c7f9c3..c208cf330 100644 --- a/services/optimizer/build.gradle +++ b/services/optimizer/build.gradle @@ -7,6 +7,7 @@ dependencies { implementation 'org.springframework.boot:spring-boot-starter-data-jpa:2.7.8' implementation 'com.vladmihalcea:hibernate-types-55:2.21.1' implementation 'org.springframework.boot:spring-boot-starter-web:2.7.8' + implementation 'org.springframework.boot:spring-boot-starter-validation:2.7.8' implementation 'mysql:mysql-connector-java:8.+' testImplementation 'com.h2database:h2:2.2.224' testImplementation 'org.springframework.boot:spring-boot-starter-test:2.7.8' diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java index 5db7d31ed..25fd8ab6c 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java @@ -9,6 +9,7 @@ import java.util.List; import java.util.Optional; import java.util.stream.Collectors; +import javax.validation.Valid; import lombok.RequiredArgsConstructor; import org.springframework.http.HttpStatus; import org.springframework.http.ResponseEntity; @@ -19,6 +20,7 @@ import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RequestParam; import org.springframework.web.bind.annotation.RestController; +import org.springframework.web.server.ResponseStatusException; /** REST controller for {@code table_operations}. */ @RestController @@ -29,23 +31,34 @@ public class TableOperationsController { private final OptimizerDataService service; /** - * Report an update to an operation. The body carries the {@code operationId} the caller is - * updating along with its terminal status. The backend looks up the operation row, writes a - * history entry with the operation's table metadata, and returns 201 Created with the history - * row, or 404 if the operation does not exist. + * Report an update to an operation. {@code id} comes from the URL; the body's {@code operationId} + * must match (the controller rejects mismatched requests with 400). The backend looks up the + * operation row, writes a history entry with the operation's table metadata, and returns 201 + * Created with the history row, or 404 if the operation does not exist. */ - @PostMapping("/update") + @PostMapping("/{id}/update") public ResponseEntity updateOperation( - @RequestBody UpdateOperationRequest request) { + @PathVariable String id, @Valid @RequestBody UpdateOperationRequest request) { + if (!id.equals(request.getOperationId())) { + throw new ResponseStatusException( + HttpStatus.BAD_REQUEST, + "PATH_BODY_MISMATCH: operationId in body ('" + + request.getOperationId() + + "') does not match path id ('" + + id + + "')"); + } return service - .updateOperation( - request.getOperationId(), - request.getStatus() == null ? null : request.getStatus().toModel()) + .updateOperation(id, request.getStatus().toModel()) .map( history -> ResponseEntity.status(HttpStatus.CREATED) .body(TableOperationsHistory.fromModel(history))) - .orElse(ResponseEntity.notFound().build()); + .orElseThrow( + () -> + new ResponseStatusException( + HttpStatus.NOT_FOUND, + "OPERATION_NOT_FOUND: no operation with id '" + id + "'")); } /** Fetch a single operation row by its ID, regardless of status. Returns 404 if not found. */ @@ -55,7 +68,11 @@ public ResponseEntity getTableOperation(@PathVariable String id .getTableOperation(id) .map(TableOperations::fromModel) .map(ResponseEntity::ok) - .orElse(ResponseEntity.notFound().build()); + .orElseThrow( + () -> + new ResponseStatusException( + HttpStatus.NOT_FOUND, + "OPERATION_NOT_FOUND: no operation with id '" + id + "'")); } /** diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java index 9a1b6d303..7a457d9cf 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java @@ -4,6 +4,7 @@ import com.linkedin.openhouse.optimizer.service.OptimizerDataService; import java.util.List; import java.util.stream.Collectors; +import javax.validation.Valid; import lombok.RequiredArgsConstructor; import org.springframework.http.HttpStatus; import org.springframework.http.ResponseEntity; @@ -26,7 +27,7 @@ public class TableOperationsHistoryController { /** Append a completed-job result. Called by the SparkJob after each run (success or failure). */ @PostMapping public ResponseEntity appendHistory( - @RequestBody TableOperationsHistory dto) { + @Valid @RequestBody TableOperationsHistory dto) { return ResponseEntity.status(HttpStatus.CREATED) .body(TableOperationsHistory.fromModel(service.appendHistory(dto.toModel()))); } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java index 049516110..976d05e7f 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java @@ -8,7 +8,9 @@ import java.util.List; import java.util.Optional; import java.util.stream.Collectors; +import javax.validation.Valid; import lombok.RequiredArgsConstructor; +import org.springframework.http.HttpStatus; import org.springframework.http.ResponseEntity; import org.springframework.web.bind.annotation.GetMapping; import org.springframework.web.bind.annotation.PathVariable; @@ -17,6 +19,7 @@ import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RequestParam; import org.springframework.web.bind.annotation.RestController; +import org.springframework.web.server.ResponseStatusException; /** REST controller for managing per-table stats in the optimizer DB. */ @RestController @@ -32,7 +35,7 @@ public class TableStatsController { */ @PutMapping("/{tableUuid}") public ResponseEntity upsertTableStats( - @PathVariable String tableUuid, @RequestBody UpsertTableStatsRequest request) { + @PathVariable String tableUuid, @Valid @RequestBody UpsertTableStatsRequest request) { return ResponseEntity.ok( TableStats.fromModel(service.upsertTableStats(request.toModel(tableUuid)))); } @@ -44,7 +47,11 @@ public ResponseEntity getTableStats(@PathVariable String tableUuid) .getTableStats(tableUuid) .map(TableStats::fromModel) .map(ResponseEntity::ok) - .orElse(ResponseEntity.notFound().build()); + .orElseThrow( + () -> + new ResponseStatusException( + HttpStatus.NOT_FOUND, + "STATS_NOT_FOUND: no stats for tableUuid '" + tableUuid + "'")); } /** diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/error/ApiError.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/error/ApiError.java new file mode 100644 index 000000000..9018e1bbe --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/error/ApiError.java @@ -0,0 +1,27 @@ +package com.linkedin.openhouse.optimizer.api.error; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** + * Uniform error response body returned by every optimizer endpoint on a non-2xx status. + * + *

Shape: + * + *

    + *
  • {@code code} — machine-readable identifier (e.g. {@code OPERATION_NOT_FOUND}). + *
  • {@code message} — human-readable explanation. + *
  • {@code path} — the request URI that triggered the error. + *
+ */ +@Data +@Builder +@AllArgsConstructor +@NoArgsConstructor +public class ApiError { + private String code; + private String message; + private String path; +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/error/GlobalExceptionHandler.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/error/GlobalExceptionHandler.java new file mode 100644 index 000000000..00baf5bd9 --- /dev/null +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/error/GlobalExceptionHandler.java @@ -0,0 +1,102 @@ +package com.linkedin.openhouse.optimizer.api.error; + +import javax.servlet.http.HttpServletRequest; +import lombok.extern.slf4j.Slf4j; +import org.springframework.http.HttpStatus; +import org.springframework.http.ResponseEntity; +import org.springframework.http.converter.HttpMessageNotReadableException; +import org.springframework.web.bind.MethodArgumentNotValidException; +import org.springframework.web.bind.MissingServletRequestParameterException; +import org.springframework.web.bind.annotation.ExceptionHandler; +import org.springframework.web.bind.annotation.RestControllerAdvice; +import org.springframework.web.method.annotation.MethodArgumentTypeMismatchException; +import org.springframework.web.server.ResponseStatusException; + +/** + * Maps framework + service exceptions to {@link ApiError} bodies with consistent HTTP status codes + * across every optimizer endpoint. + * + *

Codes used: {@code VALIDATION_ERROR}, {@code INVALID_PARAMETER}, {@code MISSING_PARAMETER}, + * {@code MALFORMED_REQUEST}, {@code OPERATION_NOT_FOUND}, {@code STATS_NOT_FOUND}, {@code + * INTERNAL_ERROR}. Endpoint-specific 404 codes are passed through via {@link + * ResponseStatusException}'s {@code reason} field. + */ +@Slf4j +@RestControllerAdvice +public class GlobalExceptionHandler { + + @ExceptionHandler(MethodArgumentNotValidException.class) + public ResponseEntity handleValidation( + MethodArgumentNotValidException e, HttpServletRequest req) { + String message = + e.getBindingResult().getFieldErrors().stream() + .map(fe -> fe.getField() + ": " + fe.getDefaultMessage()) + .reduce((a, b) -> a + "; " + b) + .orElse(e.getMessage()); + return error(HttpStatus.BAD_REQUEST, "VALIDATION_ERROR", message, req); + } + + @ExceptionHandler(MethodArgumentTypeMismatchException.class) + public ResponseEntity handleTypeMismatch( + MethodArgumentTypeMismatchException e, HttpServletRequest req) { + String type = e.getRequiredType() == null ? "?" : e.getRequiredType().getSimpleName(); + return error( + HttpStatus.BAD_REQUEST, + "INVALID_PARAMETER", + "Parameter '" + + e.getName() + + "' has invalid value '" + + e.getValue() + + "' (expected " + + type + + ")", + req); + } + + @ExceptionHandler(MissingServletRequestParameterException.class) + public ResponseEntity handleMissingParam( + MissingServletRequestParameterException e, HttpServletRequest req) { + return error( + HttpStatus.BAD_REQUEST, + "MISSING_PARAMETER", + "Required parameter '" + e.getParameterName() + "' is missing", + req); + } + + @ExceptionHandler(HttpMessageNotReadableException.class) + public ResponseEntity handleMalformedBody( + HttpMessageNotReadableException e, HttpServletRequest req) { + return error( + HttpStatus.BAD_REQUEST, "MALFORMED_REQUEST", "Request body is missing or malformed", req); + } + + @ExceptionHandler(ResponseStatusException.class) + public ResponseEntity handleResponseStatus( + ResponseStatusException e, HttpServletRequest req) { + HttpStatus status = HttpStatus.resolve(e.getStatus().value()); + if (status == null) { + status = HttpStatus.INTERNAL_SERVER_ERROR; + } + String reason = e.getReason() == null ? status.getReasonPhrase() : e.getReason(); + // Convention: when callers throw ResponseStatusException, they pack a "CODE: human message" + // into the reason. If no colon is present, the whole reason becomes the message and the code + // defaults to the status name (e.g. NOT_FOUND). + int sep = reason.indexOf(':'); + String code = sep > 0 ? reason.substring(0, sep).trim() : status.name(); + String message = sep > 0 ? reason.substring(sep + 1).trim() : reason; + return error(status, code, message, req); + } + + @ExceptionHandler(Exception.class) + public ResponseEntity handleUncaught(Exception e, HttpServletRequest req) { + log.warn("Unhandled exception on {}: {}", req.getRequestURI(), e.toString(), e); + return error( + HttpStatus.INTERNAL_SERVER_ERROR, "INTERNAL_ERROR", "An unexpected error occurred", req); + } + + private static ResponseEntity error( + HttpStatus status, String code, String message, HttpServletRequest req) { + return ResponseEntity.status(status) + .body(ApiError.builder().code(code).message(message).path(req.getRequestURI()).build()); + } +} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpdateOperationRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpdateOperationRequest.java index a216e9db3..fe5bee516 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpdateOperationRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpdateOperationRequest.java @@ -1,5 +1,7 @@ package com.linkedin.openhouse.optimizer.api.spec; +import javax.validation.constraints.NotBlank; +import javax.validation.constraints.NotNull; import lombok.AllArgsConstructor; import lombok.Builder; import lombok.Data; @@ -27,11 +29,16 @@ @AllArgsConstructor public class UpdateOperationRequest { - /** Operation row's UUID — the primary lookup key. */ - private String operationId; + /** + * Operation row's UUID. Required. Must match the {@code {id}} path variable on {@code POST + * /v1/optimizer/operations/{id}/update} — the controller rejects mismatched requests with 400. + * Carrying it in the body keeps the payload self-describing for trace/log consumers that may not + * see the URL. + */ + @NotBlank private String operationId; - /** Terminal outcome for this single operation. */ - private HistoryStatus status; + /** Terminal outcome for this single operation. Required. */ + @NotNull private HistoryStatus status; /** Debug echo: stable table identity the caller believed it was completing. */ private String tableUuid; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpsertTableStatsRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpsertTableStatsRequest.java index d1b4a5fe2..9d2dadb0e 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpsertTableStatsRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpsertTableStatsRequest.java @@ -2,6 +2,7 @@ import java.util.Collections; import java.util.Map; +import javax.validation.constraints.NotBlank; import lombok.AllArgsConstructor; import lombok.Builder; import lombok.Data; @@ -19,11 +20,11 @@ @AllArgsConstructor public class UpsertTableStatsRequest { - /** Denormalized database name for display. */ - private String databaseName; + /** Denormalized database name for display. Required. */ + @NotBlank private String databaseName; - /** Denormalized table name for display. */ - private String tableName; + /** Denormalized table name for display. Required. */ + @NotBlank private String tableName; /** Combined snapshot + delta stats payload from this commit. */ private TableStatsPayload stats; diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/api/controller/ControllerErrorHandlingTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/api/controller/ControllerErrorHandlingTest.java new file mode 100644 index 000000000..97b63b06f --- /dev/null +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/api/controller/ControllerErrorHandlingTest.java @@ -0,0 +1,216 @@ +package com.linkedin.openhouse.optimizer.api.controller; + +import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.get; +import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.post; +import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.put; +import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.jsonPath; +import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.status; + +import com.linkedin.openhouse.optimizer.db.OperationType; +import com.linkedin.openhouse.optimizer.db.TableOperationsRow; +import com.linkedin.openhouse.optimizer.repository.TableOperationsRepository; +import java.util.UUID; +import org.junit.jupiter.api.Test; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.autoconfigure.web.servlet.AutoConfigureMockMvc; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.http.MediaType; +import org.springframework.test.context.ActiveProfiles; +import org.springframework.test.web.servlet.MockMvc; +import org.springframework.transaction.annotation.Transactional; + +/** + * Exercises the {@code GlobalExceptionHandler} contract across all three controllers — every + * non-2xx response carries an {@link com.linkedin.openhouse.optimizer.api.error.ApiError} body with + * {@code code}, {@code message}, and {@code path}. + */ +@SpringBootTest +@AutoConfigureMockMvc +@ActiveProfiles("test") +@Transactional +class ControllerErrorHandlingTest { + + @Autowired MockMvc mockMvc; + @Autowired TableOperationsRepository operationsRepository; + + // --- /operations/{id}/update --- + + @Test + void updateOperation_notFound_returns404WithCode() throws Exception { + String id = UUID.randomUUID().toString(); + String body = "{\"operationId\":\"" + id + "\",\"status\":\"SUCCESS\"}"; + mockMvc + .perform( + post("/v1/optimizer/operations/" + id + "/update") + .contentType(MediaType.APPLICATION_JSON) + .content(body)) + .andExpect(status().isNotFound()) + .andExpect(jsonPath("$.code").value("OPERATION_NOT_FOUND")) + .andExpect(jsonPath("$.message").value(org.hamcrest.Matchers.containsString(id))) + .andExpect(jsonPath("$.path").value("/v1/optimizer/operations/" + id + "/update")); + } + + @Test + void updateOperation_pathBodyMismatch_returns400() throws Exception { + String pathId = UUID.randomUUID().toString(); + String bodyId = UUID.randomUUID().toString(); + String body = "{\"operationId\":\"" + bodyId + "\",\"status\":\"SUCCESS\"}"; + mockMvc + .perform( + post("/v1/optimizer/operations/" + pathId + "/update") + .contentType(MediaType.APPLICATION_JSON) + .content(body)) + .andExpect(status().isBadRequest()) + .andExpect(jsonPath("$.code").value("PATH_BODY_MISMATCH")); + } + + @Test + void updateOperation_missingStatus_returns400Validation() throws Exception { + String id = UUID.randomUUID().toString(); + String body = "{\"operationId\":\"" + id + "\"}"; + mockMvc + .perform( + post("/v1/optimizer/operations/" + id + "/update") + .contentType(MediaType.APPLICATION_JSON) + .content(body)) + .andExpect(status().isBadRequest()) + .andExpect(jsonPath("$.code").value("VALIDATION_ERROR")) + .andExpect(jsonPath("$.message").value(org.hamcrest.Matchers.containsString("status"))); + } + + @Test + void updateOperation_missingOperationId_returns400Validation() throws Exception { + String pathId = UUID.randomUUID().toString(); + String body = "{\"status\":\"SUCCESS\"}"; + mockMvc + .perform( + post("/v1/optimizer/operations/" + pathId + "/update") + .contentType(MediaType.APPLICATION_JSON) + .content(body)) + .andExpect(status().isBadRequest()) + .andExpect(jsonPath("$.code").value("VALIDATION_ERROR")) + .andExpect( + jsonPath("$.message").value(org.hamcrest.Matchers.containsString("operationId"))); + } + + @Test + void updateOperation_malformedJson_returns400Malformed() throws Exception { + String pathId = UUID.randomUUID().toString(); + mockMvc + .perform( + post("/v1/optimizer/operations/" + pathId + "/update") + .contentType(MediaType.APPLICATION_JSON) + .content("not json")) + .andExpect(status().isBadRequest()) + .andExpect(jsonPath("$.code").value("MALFORMED_REQUEST")); + } + + // --- /operations/{id} --- + + @Test + void getTableOperation_notFound_returns404WithCode() throws Exception { + String id = UUID.randomUUID().toString(); + mockMvc + .perform(get("/v1/optimizer/operations/" + id)) + .andExpect(status().isNotFound()) + .andExpect(jsonPath("$.code").value("OPERATION_NOT_FOUND")) + .andExpect(jsonPath("$.path").value("/v1/optimizer/operations/" + id)); + } + + // --- /operations (list) --- + + @Test + void listOperations_missingLimit_returns400Missing() throws Exception { + mockMvc + .perform(get("/v1/optimizer/operations")) + .andExpect(status().isBadRequest()) + .andExpect(jsonPath("$.code").value("MISSING_PARAMETER")) + .andExpect(jsonPath("$.message").value(org.hamcrest.Matchers.containsString("limit"))); + } + + @Test + void listOperations_badLimit_returns400TypeMismatch() throws Exception { + mockMvc + .perform(get("/v1/optimizer/operations").param("limit", "abc")) + .andExpect(status().isBadRequest()) + .andExpect(jsonPath("$.code").value("INVALID_PARAMETER")) + .andExpect(jsonPath("$.message").value(org.hamcrest.Matchers.containsString("limit"))); + } + + @Test + void listOperations_badEnum_returns400() throws Exception { + mockMvc + .perform(get("/v1/optimizer/operations").param("status", "BOGUS").param("limit", "10")) + .andExpect(status().isBadRequest()) + .andExpect(jsonPath("$.code").value("INVALID_PARAMETER")); + } + + // --- /stats/{tableUuid} --- + + @Test + void getTableStats_notFound_returns404WithCode() throws Exception { + String uuid = UUID.randomUUID().toString(); + mockMvc + .perform(get("/v1/optimizer/stats/" + uuid)) + .andExpect(status().isNotFound()) + .andExpect(jsonPath("$.code").value("STATS_NOT_FOUND")); + } + + // --- /stats (upsert) --- + + @Test + void upsertStats_missingRequiredField_returns400Validation() throws Exception { + String uuid = UUID.randomUUID().toString(); + String body = "{\"tableName\":\"tbl1\"}"; // databaseName missing + mockMvc + .perform( + put("/v1/optimizer/stats/" + uuid) + .contentType(MediaType.APPLICATION_JSON) + .content(body)) + .andExpect(status().isBadRequest()) + .andExpect(jsonPath("$.code").value("VALIDATION_ERROR")) + .andExpect( + jsonPath("$.message").value(org.hamcrest.Matchers.containsString("databaseName"))); + } + + // --- /stats/{tableUuid}/history --- + + @Test + void getStatsHistory_badSince_returns400() throws Exception { + String uuid = UUID.randomUUID().toString(); + mockMvc + .perform( + get("/v1/optimizer/stats/" + uuid + "/history") + .param("since", "not-a-date") + .param("limit", "10")) + .andExpect(status().isBadRequest()) + .andExpect(jsonPath("$.code").value("INVALID_PARAMETER")); + } + + // --- happy path sanity --- + + @Test + void updateOperation_happyPath_stillReturns201() throws Exception { + String id = UUID.randomUUID().toString(); + operationsRepository.save( + TableOperationsRow.builder() + .id(id) + .tableUuid(UUID.randomUUID().toString()) + .databaseName("db1") + .tableName("tbl1") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(com.linkedin.openhouse.optimizer.db.OperationStatus.SCHEDULED) + .createdAt(java.time.Instant.now()) + .scheduledAt(java.time.Instant.now()) + .jobId("job-x") + .build()); + String body = "{\"operationId\":\"" + id + "\",\"status\":\"SUCCESS\"}"; + mockMvc + .perform( + post("/v1/optimizer/operations/" + id + "/update") + .contentType(MediaType.APPLICATION_JSON) + .content(body)) + .andExpect(status().isCreated()) + .andExpect(jsonPath("$.status").value("SUCCESS")); + } +} From a37169d8a977fc4faec0212c14e855b0ab07d348 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Fri, 22 May 2026 10:47:01 -0700 Subject: [PATCH 100/104] refactor(optimizer): simplify error handling per PR review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address review comments on PR #596 — minimal diff, no bean validation, no per-framework-exception handlers, scoped to the optimizer controllers. - @RestControllerAdvice scoped to the three optimizer controllers via assignableTypes; no longer global. - Two handlers only: ResponseStatusException → ApiError with code = status.name() + reason as message; Exception → 500 INTERNAL_ERROR. Drop MethodArgumentNotValidException, MethodArgumentTypeMismatchException, MissingServletRequestParameterException, HttpMessageNotReadableException — Spring's defaults handle those. - Drop the "CODE: message" reason-parsing convention. - Drop @NotBlank / @NotNull on UpdateOperationRequest and UpsertTableStatsRequest; drop @Valid on controllers; drop spring-boot-starter-validation dep. Validate operationId / status server- side in TableOperationsController.updateOperation — loose-coupling so relaxing required fields later doesn't break wire callers. - String.format throughout; no message concatenation. - ControllerErrorHandlingTest trimmed from 13 cases to 7: only what the controllers actually own (404s, server-side validation on updateOperation, happy-path sanity). Framework-level 4xx left to Spring. Co-Authored-By: Claude Opus 4.7 --- services/optimizer/build.gradle | 1 - .../controller/TableOperationsController.java | 27 ++-- .../TableOperationsHistoryController.java | 3 +- .../api/controller/TableStatsController.java | 6 +- .../api/error/GlobalExceptionHandler.java | 103 ++++--------- .../api/spec/UpdateOperationRequest.java | 13 +- .../api/spec/UpsertTableStatsRequest.java | 9 +- .../ControllerErrorHandlingTest.java | 135 ++++-------------- 8 files changed, 86 insertions(+), 211 deletions(-) diff --git a/services/optimizer/build.gradle b/services/optimizer/build.gradle index c208cf330..c05c7f9c3 100644 --- a/services/optimizer/build.gradle +++ b/services/optimizer/build.gradle @@ -7,7 +7,6 @@ dependencies { implementation 'org.springframework.boot:spring-boot-starter-data-jpa:2.7.8' implementation 'com.vladmihalcea:hibernate-types-55:2.21.1' implementation 'org.springframework.boot:spring-boot-starter-web:2.7.8' - implementation 'org.springframework.boot:spring-boot-starter-validation:2.7.8' implementation 'mysql:mysql-connector-java:8.+' testImplementation 'com.h2database:h2:2.2.224' testImplementation 'org.springframework.boot:spring-boot-starter-test:2.7.8' diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java index 25fd8ab6c..2f6f62e4b 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java @@ -7,12 +7,13 @@ import com.linkedin.openhouse.optimizer.api.spec.UpdateOperationRequest; import com.linkedin.openhouse.optimizer.service.OptimizerDataService; import java.util.List; +import java.util.Objects; import java.util.Optional; import java.util.stream.Collectors; -import javax.validation.Valid; import lombok.RequiredArgsConstructor; import org.springframework.http.HttpStatus; import org.springframework.http.ResponseEntity; +import org.springframework.util.StringUtils; import org.springframework.web.bind.annotation.GetMapping; import org.springframework.web.bind.annotation.PathVariable; import org.springframework.web.bind.annotation.PostMapping; @@ -38,15 +39,19 @@ public class TableOperationsController { */ @PostMapping("/{id}/update") public ResponseEntity updateOperation( - @PathVariable String id, @Valid @RequestBody UpdateOperationRequest request) { - if (!id.equals(request.getOperationId())) { + @PathVariable String id, @RequestBody UpdateOperationRequest request) { + if (!StringUtils.hasText(request.getOperationId())) { + throw new ResponseStatusException(HttpStatus.BAD_REQUEST, "operationId is required"); + } + if (!Objects.equals(id, request.getOperationId())) { throw new ResponseStatusException( HttpStatus.BAD_REQUEST, - "PATH_BODY_MISMATCH: operationId in body ('" - + request.getOperationId() - + "') does not match path id ('" - + id - + "')"); + String.format( + "operationId in body (%s) does not match path id (%s)", + request.getOperationId(), id)); + } + if (request.getStatus() == null) { + throw new ResponseStatusException(HttpStatus.BAD_REQUEST, "status is required"); } return service .updateOperation(id, request.getStatus().toModel()) @@ -57,8 +62,7 @@ public ResponseEntity updateOperation( .orElseThrow( () -> new ResponseStatusException( - HttpStatus.NOT_FOUND, - "OPERATION_NOT_FOUND: no operation with id '" + id + "'")); + HttpStatus.NOT_FOUND, String.format("no operation with id %s", id))); } /** Fetch a single operation row by its ID, regardless of status. Returns 404 if not found. */ @@ -71,8 +75,7 @@ public ResponseEntity getTableOperation(@PathVariable String id .orElseThrow( () -> new ResponseStatusException( - HttpStatus.NOT_FOUND, - "OPERATION_NOT_FOUND: no operation with id '" + id + "'")); + HttpStatus.NOT_FOUND, String.format("no operation with id %s", id))); } /** diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java index 7a457d9cf..9a1b6d303 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java @@ -4,7 +4,6 @@ import com.linkedin.openhouse.optimizer.service.OptimizerDataService; import java.util.List; import java.util.stream.Collectors; -import javax.validation.Valid; import lombok.RequiredArgsConstructor; import org.springframework.http.HttpStatus; import org.springframework.http.ResponseEntity; @@ -27,7 +26,7 @@ public class TableOperationsHistoryController { /** Append a completed-job result. Called by the SparkJob after each run (success or failure). */ @PostMapping public ResponseEntity appendHistory( - @Valid @RequestBody TableOperationsHistory dto) { + @RequestBody TableOperationsHistory dto) { return ResponseEntity.status(HttpStatus.CREATED) .body(TableOperationsHistory.fromModel(service.appendHistory(dto.toModel()))); } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java index 976d05e7f..ca8db4d51 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java @@ -8,7 +8,6 @@ import java.util.List; import java.util.Optional; import java.util.stream.Collectors; -import javax.validation.Valid; import lombok.RequiredArgsConstructor; import org.springframework.http.HttpStatus; import org.springframework.http.ResponseEntity; @@ -35,7 +34,7 @@ public class TableStatsController { */ @PutMapping("/{tableUuid}") public ResponseEntity upsertTableStats( - @PathVariable String tableUuid, @Valid @RequestBody UpsertTableStatsRequest request) { + @PathVariable String tableUuid, @RequestBody UpsertTableStatsRequest request) { return ResponseEntity.ok( TableStats.fromModel(service.upsertTableStats(request.toModel(tableUuid)))); } @@ -50,8 +49,7 @@ public ResponseEntity getTableStats(@PathVariable String tableUuid) .orElseThrow( () -> new ResponseStatusException( - HttpStatus.NOT_FOUND, - "STATS_NOT_FOUND: no stats for tableUuid '" + tableUuid + "'")); + HttpStatus.NOT_FOUND, String.format("no stats for tableUuid %s", tableUuid))); } /** diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/error/GlobalExceptionHandler.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/error/GlobalExceptionHandler.java index 00baf5bd9..d47dd3911 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/error/GlobalExceptionHandler.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/error/GlobalExceptionHandler.java @@ -1,75 +1,31 @@ package com.linkedin.openhouse.optimizer.api.error; +import com.linkedin.openhouse.optimizer.api.controller.TableOperationsController; +import com.linkedin.openhouse.optimizer.api.controller.TableOperationsHistoryController; +import com.linkedin.openhouse.optimizer.api.controller.TableStatsController; import javax.servlet.http.HttpServletRequest; import lombok.extern.slf4j.Slf4j; import org.springframework.http.HttpStatus; import org.springframework.http.ResponseEntity; -import org.springframework.http.converter.HttpMessageNotReadableException; -import org.springframework.web.bind.MethodArgumentNotValidException; -import org.springframework.web.bind.MissingServletRequestParameterException; import org.springframework.web.bind.annotation.ExceptionHandler; import org.springframework.web.bind.annotation.RestControllerAdvice; -import org.springframework.web.method.annotation.MethodArgumentTypeMismatchException; import org.springframework.web.server.ResponseStatusException; /** - * Maps framework + service exceptions to {@link ApiError} bodies with consistent HTTP status codes - * across every optimizer endpoint. - * - *

Codes used: {@code VALIDATION_ERROR}, {@code INVALID_PARAMETER}, {@code MISSING_PARAMETER}, - * {@code MALFORMED_REQUEST}, {@code OPERATION_NOT_FOUND}, {@code STATS_NOT_FOUND}, {@code - * INTERNAL_ERROR}. Endpoint-specific 404 codes are passed through via {@link - * ResponseStatusException}'s {@code reason} field. + * Scoped to the optimizer REST controllers. Two cases only: pass through any {@link + * ResponseStatusException} that a controller threw, and convert any other uncaught exception into a + * 500. Framework-level 4xx responses (missing query param, malformed body, etc.) are left to + * Spring's defaults — this advice intentionally does not blanket every possible exception type. */ @Slf4j -@RestControllerAdvice +@RestControllerAdvice( + assignableTypes = { + TableOperationsController.class, + TableOperationsHistoryController.class, + TableStatsController.class + }) public class GlobalExceptionHandler { - @ExceptionHandler(MethodArgumentNotValidException.class) - public ResponseEntity handleValidation( - MethodArgumentNotValidException e, HttpServletRequest req) { - String message = - e.getBindingResult().getFieldErrors().stream() - .map(fe -> fe.getField() + ": " + fe.getDefaultMessage()) - .reduce((a, b) -> a + "; " + b) - .orElse(e.getMessage()); - return error(HttpStatus.BAD_REQUEST, "VALIDATION_ERROR", message, req); - } - - @ExceptionHandler(MethodArgumentTypeMismatchException.class) - public ResponseEntity handleTypeMismatch( - MethodArgumentTypeMismatchException e, HttpServletRequest req) { - String type = e.getRequiredType() == null ? "?" : e.getRequiredType().getSimpleName(); - return error( - HttpStatus.BAD_REQUEST, - "INVALID_PARAMETER", - "Parameter '" - + e.getName() - + "' has invalid value '" - + e.getValue() - + "' (expected " - + type - + ")", - req); - } - - @ExceptionHandler(MissingServletRequestParameterException.class) - public ResponseEntity handleMissingParam( - MissingServletRequestParameterException e, HttpServletRequest req) { - return error( - HttpStatus.BAD_REQUEST, - "MISSING_PARAMETER", - "Required parameter '" + e.getParameterName() + "' is missing", - req); - } - - @ExceptionHandler(HttpMessageNotReadableException.class) - public ResponseEntity handleMalformedBody( - HttpMessageNotReadableException e, HttpServletRequest req) { - return error( - HttpStatus.BAD_REQUEST, "MALFORMED_REQUEST", "Request body is missing or malformed", req); - } - @ExceptionHandler(ResponseStatusException.class) public ResponseEntity handleResponseStatus( ResponseStatusException e, HttpServletRequest req) { @@ -77,26 +33,25 @@ public ResponseEntity handleResponseStatus( if (status == null) { status = HttpStatus.INTERNAL_SERVER_ERROR; } - String reason = e.getReason() == null ? status.getReasonPhrase() : e.getReason(); - // Convention: when callers throw ResponseStatusException, they pack a "CODE: human message" - // into the reason. If no colon is present, the whole reason becomes the message and the code - // defaults to the status name (e.g. NOT_FOUND). - int sep = reason.indexOf(':'); - String code = sep > 0 ? reason.substring(0, sep).trim() : status.name(); - String message = sep > 0 ? reason.substring(sep + 1).trim() : reason; - return error(status, code, message, req); + String message = e.getReason() == null ? status.getReasonPhrase() : e.getReason(); + return ResponseEntity.status(status) + .body( + ApiError.builder() + .code(status.name()) + .message(message) + .path(req.getRequestURI()) + .build()); } @ExceptionHandler(Exception.class) public ResponseEntity handleUncaught(Exception e, HttpServletRequest req) { - log.warn("Unhandled exception on {}: {}", req.getRequestURI(), e.toString(), e); - return error( - HttpStatus.INTERNAL_SERVER_ERROR, "INTERNAL_ERROR", "An unexpected error occurred", req); - } - - private static ResponseEntity error( - HttpStatus status, String code, String message, HttpServletRequest req) { - return ResponseEntity.status(status) - .body(ApiError.builder().code(code).message(message).path(req.getRequestURI()).build()); + log.warn(String.format("Unhandled exception on %s", req.getRequestURI()), e); + return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR) + .body( + ApiError.builder() + .code("INTERNAL_ERROR") + .message("An unexpected error occurred") + .path(req.getRequestURI()) + .build()); } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpdateOperationRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpdateOperationRequest.java index fe5bee516..fcae718ad 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpdateOperationRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpdateOperationRequest.java @@ -1,7 +1,5 @@ package com.linkedin.openhouse.optimizer.api.spec; -import javax.validation.constraints.NotBlank; -import javax.validation.constraints.NotNull; import lombok.AllArgsConstructor; import lombok.Builder; import lombok.Data; @@ -30,15 +28,16 @@ public class UpdateOperationRequest { /** - * Operation row's UUID. Required. Must match the {@code {id}} path variable on {@code POST + * Operation row's UUID. Must match the {@code {id}} path variable on {@code POST * /v1/optimizer/operations/{id}/update} — the controller rejects mismatched requests with 400. * Carrying it in the body keeps the payload self-describing for trace/log consumers that may not - * see the URL. + * see the URL. Validated server-side (no bean-validation annotation) so that future relaxation + * does not break clients on the wire contract. */ - @NotBlank private String operationId; + private String operationId; - /** Terminal outcome for this single operation. Required. */ - @NotNull private HistoryStatus status; + /** Terminal outcome for this single operation. Validated server-side. */ + private HistoryStatus status; /** Debug echo: stable table identity the caller believed it was completing. */ private String tableUuid; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpsertTableStatsRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpsertTableStatsRequest.java index 9d2dadb0e..d1b4a5fe2 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpsertTableStatsRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpsertTableStatsRequest.java @@ -2,7 +2,6 @@ import java.util.Collections; import java.util.Map; -import javax.validation.constraints.NotBlank; import lombok.AllArgsConstructor; import lombok.Builder; import lombok.Data; @@ -20,11 +19,11 @@ @AllArgsConstructor public class UpsertTableStatsRequest { - /** Denormalized database name for display. Required. */ - @NotBlank private String databaseName; + /** Denormalized database name for display. */ + private String databaseName; - /** Denormalized table name for display. Required. */ - @NotBlank private String tableName; + /** Denormalized table name for display. */ + private String tableName; /** Combined snapshot + delta stats payload from this commit. */ private TableStatsPayload stats; diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/api/controller/ControllerErrorHandlingTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/api/controller/ControllerErrorHandlingTest.java index 97b63b06f..59d793441 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/api/controller/ControllerErrorHandlingTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/api/controller/ControllerErrorHandlingTest.java @@ -2,13 +2,13 @@ import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.get; import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.post; -import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.put; import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.jsonPath; import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.status; import com.linkedin.openhouse.optimizer.db.OperationType; import com.linkedin.openhouse.optimizer.db.TableOperationsRow; import com.linkedin.openhouse.optimizer.repository.TableOperationsRepository; +import java.time.Instant; import java.util.UUID; import org.junit.jupiter.api.Test; import org.springframework.beans.factory.annotation.Autowired; @@ -20,9 +20,9 @@ import org.springframework.transaction.annotation.Transactional; /** - * Exercises the {@code GlobalExceptionHandler} contract across all three controllers — every - * non-2xx response carries an {@link com.linkedin.openhouse.optimizer.api.error.ApiError} body with - * {@code code}, {@code message}, and {@code path}. + * Exercises what the controllers own: server-side validation on {@code updateOperation}, 404 on + * missing rows, and the {@code ApiError} body shape. Framework-level 4xx (missing query param, + * malformed JSON, etc.) is left to Spring's defaults and not asserted here. */ @SpringBootTest @AutoConfigureMockMvc @@ -33,19 +33,17 @@ class ControllerErrorHandlingTest { @Autowired MockMvc mockMvc; @Autowired TableOperationsRepository operationsRepository; - // --- /operations/{id}/update --- - @Test - void updateOperation_notFound_returns404WithCode() throws Exception { + void updateOperation_notFound_returns404WithApiError() throws Exception { String id = UUID.randomUUID().toString(); - String body = "{\"operationId\":\"" + id + "\",\"status\":\"SUCCESS\"}"; + String body = String.format("{\"operationId\":\"%s\",\"status\":\"SUCCESS\"}", id); mockMvc .perform( post("/v1/optimizer/operations/" + id + "/update") .contentType(MediaType.APPLICATION_JSON) .content(body)) .andExpect(status().isNotFound()) - .andExpect(jsonPath("$.code").value("OPERATION_NOT_FOUND")) + .andExpect(jsonPath("$.code").value("NOT_FOUND")) .andExpect(jsonPath("$.message").value(org.hamcrest.Matchers.containsString(id))) .andExpect(jsonPath("$.path").value("/v1/optimizer/operations/" + id + "/update")); } @@ -54,32 +52,21 @@ void updateOperation_notFound_returns404WithCode() throws Exception { void updateOperation_pathBodyMismatch_returns400() throws Exception { String pathId = UUID.randomUUID().toString(); String bodyId = UUID.randomUUID().toString(); - String body = "{\"operationId\":\"" + bodyId + "\",\"status\":\"SUCCESS\"}"; + String body = String.format("{\"operationId\":\"%s\",\"status\":\"SUCCESS\"}", bodyId); mockMvc .perform( post("/v1/optimizer/operations/" + pathId + "/update") .contentType(MediaType.APPLICATION_JSON) .content(body)) .andExpect(status().isBadRequest()) - .andExpect(jsonPath("$.code").value("PATH_BODY_MISMATCH")); - } - - @Test - void updateOperation_missingStatus_returns400Validation() throws Exception { - String id = UUID.randomUUID().toString(); - String body = "{\"operationId\":\"" + id + "\"}"; - mockMvc - .perform( - post("/v1/optimizer/operations/" + id + "/update") - .contentType(MediaType.APPLICATION_JSON) - .content(body)) - .andExpect(status().isBadRequest()) - .andExpect(jsonPath("$.code").value("VALIDATION_ERROR")) - .andExpect(jsonPath("$.message").value(org.hamcrest.Matchers.containsString("status"))); + .andExpect(jsonPath("$.code").value("BAD_REQUEST")) + .andExpect( + jsonPath("$.message") + .value(org.hamcrest.Matchers.containsString("does not match path id"))); } @Test - void updateOperation_missingOperationId_returns400Validation() throws Exception { + void updateOperation_missingOperationId_returns400() throws Exception { String pathId = UUID.randomUUID().toString(); String body = "{\"status\":\"SUCCESS\"}"; mockMvc @@ -88,107 +75,43 @@ void updateOperation_missingOperationId_returns400Validation() throws Exception .contentType(MediaType.APPLICATION_JSON) .content(body)) .andExpect(status().isBadRequest()) - .andExpect(jsonPath("$.code").value("VALIDATION_ERROR")) - .andExpect( - jsonPath("$.message").value(org.hamcrest.Matchers.containsString("operationId"))); + .andExpect(jsonPath("$.code").value("BAD_REQUEST")) + .andExpect(jsonPath("$.message").value("operationId is required")); } @Test - void updateOperation_malformedJson_returns400Malformed() throws Exception { - String pathId = UUID.randomUUID().toString(); + void updateOperation_missingStatus_returns400() throws Exception { + String id = UUID.randomUUID().toString(); + String body = String.format("{\"operationId\":\"%s\"}", id); mockMvc .perform( - post("/v1/optimizer/operations/" + pathId + "/update") + post("/v1/optimizer/operations/" + id + "/update") .contentType(MediaType.APPLICATION_JSON) - .content("not json")) + .content(body)) .andExpect(status().isBadRequest()) - .andExpect(jsonPath("$.code").value("MALFORMED_REQUEST")); + .andExpect(jsonPath("$.code").value("BAD_REQUEST")) + .andExpect(jsonPath("$.message").value("status is required")); } - // --- /operations/{id} --- - @Test - void getTableOperation_notFound_returns404WithCode() throws Exception { + void getTableOperation_notFound_returns404WithApiError() throws Exception { String id = UUID.randomUUID().toString(); mockMvc .perform(get("/v1/optimizer/operations/" + id)) .andExpect(status().isNotFound()) - .andExpect(jsonPath("$.code").value("OPERATION_NOT_FOUND")) + .andExpect(jsonPath("$.code").value("NOT_FOUND")) .andExpect(jsonPath("$.path").value("/v1/optimizer/operations/" + id)); } - // --- /operations (list) --- - - @Test - void listOperations_missingLimit_returns400Missing() throws Exception { - mockMvc - .perform(get("/v1/optimizer/operations")) - .andExpect(status().isBadRequest()) - .andExpect(jsonPath("$.code").value("MISSING_PARAMETER")) - .andExpect(jsonPath("$.message").value(org.hamcrest.Matchers.containsString("limit"))); - } - - @Test - void listOperations_badLimit_returns400TypeMismatch() throws Exception { - mockMvc - .perform(get("/v1/optimizer/operations").param("limit", "abc")) - .andExpect(status().isBadRequest()) - .andExpect(jsonPath("$.code").value("INVALID_PARAMETER")) - .andExpect(jsonPath("$.message").value(org.hamcrest.Matchers.containsString("limit"))); - } - - @Test - void listOperations_badEnum_returns400() throws Exception { - mockMvc - .perform(get("/v1/optimizer/operations").param("status", "BOGUS").param("limit", "10")) - .andExpect(status().isBadRequest()) - .andExpect(jsonPath("$.code").value("INVALID_PARAMETER")); - } - - // --- /stats/{tableUuid} --- - @Test - void getTableStats_notFound_returns404WithCode() throws Exception { + void getTableStats_notFound_returns404WithApiError() throws Exception { String uuid = UUID.randomUUID().toString(); mockMvc .perform(get("/v1/optimizer/stats/" + uuid)) .andExpect(status().isNotFound()) - .andExpect(jsonPath("$.code").value("STATS_NOT_FOUND")); - } - - // --- /stats (upsert) --- - - @Test - void upsertStats_missingRequiredField_returns400Validation() throws Exception { - String uuid = UUID.randomUUID().toString(); - String body = "{\"tableName\":\"tbl1\"}"; // databaseName missing - mockMvc - .perform( - put("/v1/optimizer/stats/" + uuid) - .contentType(MediaType.APPLICATION_JSON) - .content(body)) - .andExpect(status().isBadRequest()) - .andExpect(jsonPath("$.code").value("VALIDATION_ERROR")) - .andExpect( - jsonPath("$.message").value(org.hamcrest.Matchers.containsString("databaseName"))); - } - - // --- /stats/{tableUuid}/history --- - - @Test - void getStatsHistory_badSince_returns400() throws Exception { - String uuid = UUID.randomUUID().toString(); - mockMvc - .perform( - get("/v1/optimizer/stats/" + uuid + "/history") - .param("since", "not-a-date") - .param("limit", "10")) - .andExpect(status().isBadRequest()) - .andExpect(jsonPath("$.code").value("INVALID_PARAMETER")); + .andExpect(jsonPath("$.code").value("NOT_FOUND")); } - // --- happy path sanity --- - @Test void updateOperation_happyPath_stillReturns201() throws Exception { String id = UUID.randomUUID().toString(); @@ -200,11 +123,11 @@ void updateOperation_happyPath_stillReturns201() throws Exception { .tableName("tbl1") .operationType(OperationType.ORPHAN_FILES_DELETION) .status(com.linkedin.openhouse.optimizer.db.OperationStatus.SCHEDULED) - .createdAt(java.time.Instant.now()) - .scheduledAt(java.time.Instant.now()) + .createdAt(Instant.now()) + .scheduledAt(Instant.now()) .jobId("job-x") .build()); - String body = "{\"operationId\":\"" + id + "\",\"status\":\"SUCCESS\"}"; + String body = String.format("{\"operationId\":\"%s\",\"status\":\"SUCCESS\"}", id); mockMvc .perform( post("/v1/optimizer/operations/" + id + "/update") From 6416c9dfce21ed02561d8ab104802eb1b760d043 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Fri, 22 May 2026 11:41:38 -0700 Subject: [PATCH 101/104] refactor(optimizer): drop GlobalExceptionHandler + ApiError; use Spring defaults The custom advice was producing a body shape (ApiError {code, message, path}) that duplicated Spring Boot's default error JSON ({timestamp, status, error, message, path}). The only substantive difference was that Spring Boot 2.7 omits the `message` field by default. Replace the custom advice with a one-line config: server.error.include-message=always Now ResponseStatusException reasons (e.g. "no operation with id X") reach the caller via Spring's default error body, no custom code. - Delete api/error/GlobalExceptionHandler.java - Delete api/error/ApiError.java - application.properties: server.error.include-message=always - ControllerErrorHandlingTest assertions trimmed to status-code-only (MockMvc does not trigger Spring's error-dispatch to BasicErrorController, so body assertions cannot be made in tests even though the body is populated on real HTTP requests). Co-Authored-By: Claude Opus 4.7 --- .../optimizer/api/error/ApiError.java | 27 --------- .../api/error/GlobalExceptionHandler.java | 57 ------------------- .../src/main/resources/application.properties | 5 ++ .../ControllerErrorHandlingTest.java | 47 ++++++--------- 4 files changed, 21 insertions(+), 115 deletions(-) delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/error/ApiError.java delete mode 100644 services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/error/GlobalExceptionHandler.java diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/error/ApiError.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/error/ApiError.java deleted file mode 100644 index 9018e1bbe..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/error/ApiError.java +++ /dev/null @@ -1,27 +0,0 @@ -package com.linkedin.openhouse.optimizer.api.error; - -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.Data; -import lombok.NoArgsConstructor; - -/** - * Uniform error response body returned by every optimizer endpoint on a non-2xx status. - * - *

Shape: - * - *

    - *
  • {@code code} — machine-readable identifier (e.g. {@code OPERATION_NOT_FOUND}). - *
  • {@code message} — human-readable explanation. - *
  • {@code path} — the request URI that triggered the error. - *
- */ -@Data -@Builder -@AllArgsConstructor -@NoArgsConstructor -public class ApiError { - private String code; - private String message; - private String path; -} diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/error/GlobalExceptionHandler.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/error/GlobalExceptionHandler.java deleted file mode 100644 index d47dd3911..000000000 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/error/GlobalExceptionHandler.java +++ /dev/null @@ -1,57 +0,0 @@ -package com.linkedin.openhouse.optimizer.api.error; - -import com.linkedin.openhouse.optimizer.api.controller.TableOperationsController; -import com.linkedin.openhouse.optimizer.api.controller.TableOperationsHistoryController; -import com.linkedin.openhouse.optimizer.api.controller.TableStatsController; -import javax.servlet.http.HttpServletRequest; -import lombok.extern.slf4j.Slf4j; -import org.springframework.http.HttpStatus; -import org.springframework.http.ResponseEntity; -import org.springframework.web.bind.annotation.ExceptionHandler; -import org.springframework.web.bind.annotation.RestControllerAdvice; -import org.springframework.web.server.ResponseStatusException; - -/** - * Scoped to the optimizer REST controllers. Two cases only: pass through any {@link - * ResponseStatusException} that a controller threw, and convert any other uncaught exception into a - * 500. Framework-level 4xx responses (missing query param, malformed body, etc.) are left to - * Spring's defaults — this advice intentionally does not blanket every possible exception type. - */ -@Slf4j -@RestControllerAdvice( - assignableTypes = { - TableOperationsController.class, - TableOperationsHistoryController.class, - TableStatsController.class - }) -public class GlobalExceptionHandler { - - @ExceptionHandler(ResponseStatusException.class) - public ResponseEntity handleResponseStatus( - ResponseStatusException e, HttpServletRequest req) { - HttpStatus status = HttpStatus.resolve(e.getStatus().value()); - if (status == null) { - status = HttpStatus.INTERNAL_SERVER_ERROR; - } - String message = e.getReason() == null ? status.getReasonPhrase() : e.getReason(); - return ResponseEntity.status(status) - .body( - ApiError.builder() - .code(status.name()) - .message(message) - .path(req.getRequestURI()) - .build()); - } - - @ExceptionHandler(Exception.class) - public ResponseEntity handleUncaught(Exception e, HttpServletRequest req) { - log.warn(String.format("Unhandled exception on %s", req.getRequestURI()), e); - return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR) - .body( - ApiError.builder() - .code("INTERNAL_ERROR") - .message("An unexpected error occurred") - .path(req.getRequestURI()) - .build()); - } -} diff --git a/services/optimizer/src/main/resources/application.properties b/services/optimizer/src/main/resources/application.properties index e78745d00..e7f082b47 100644 --- a/services/optimizer/src/main/resources/application.properties +++ b/services/optimizer/src/main/resources/application.properties @@ -18,3 +18,8 @@ spring.datasource.hikari.maximum-pool-size=20 management.endpoints.web.exposure.include=health,prometheus management.endpoint.health.enabled=true + +# Include ResponseStatusException.reason in the default error response body. Without this, Spring +# Boot 2.7 omits the `message` field, and the human-readable detail from a thrown +# ResponseStatusException never reaches the caller. +server.error.include-message=always diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/api/controller/ControllerErrorHandlingTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/api/controller/ControllerErrorHandlingTest.java index 59d793441..b9c8dc3dc 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/api/controller/ControllerErrorHandlingTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/api/controller/ControllerErrorHandlingTest.java @@ -20,9 +20,12 @@ import org.springframework.transaction.annotation.Transactional; /** - * Exercises what the controllers own: server-side validation on {@code updateOperation}, 404 on - * missing rows, and the {@code ApiError} body shape. Framework-level 4xx (missing query param, - * malformed JSON, etc.) is left to Spring's defaults and not asserted here. + * Exercises what the controllers own: server-side validation on {@code updateOperation} (path/body + * mismatch, missing fields) and 404s on missing rows. Assertions are status-code-only: MockMvc does + * not trigger Spring's error-dispatch to {@code BasicErrorController}, so the response body of a + * {@link org.springframework.web.server.ResponseStatusException} is empty in tests even though it + * is populated in production (with {@code server.error.include-message=always}). Framework-level + * 4xx (missing query param, malformed JSON, etc.) is left to Spring's defaults and not asserted. */ @SpringBootTest @AutoConfigureMockMvc @@ -34,7 +37,7 @@ class ControllerErrorHandlingTest { @Autowired TableOperationsRepository operationsRepository; @Test - void updateOperation_notFound_returns404WithApiError() throws Exception { + void updateOperation_notFound_returns404() throws Exception { String id = UUID.randomUUID().toString(); String body = String.format("{\"operationId\":\"%s\",\"status\":\"SUCCESS\"}", id); mockMvc @@ -42,10 +45,7 @@ void updateOperation_notFound_returns404WithApiError() throws Exception { post("/v1/optimizer/operations/" + id + "/update") .contentType(MediaType.APPLICATION_JSON) .content(body)) - .andExpect(status().isNotFound()) - .andExpect(jsonPath("$.code").value("NOT_FOUND")) - .andExpect(jsonPath("$.message").value(org.hamcrest.Matchers.containsString(id))) - .andExpect(jsonPath("$.path").value("/v1/optimizer/operations/" + id + "/update")); + .andExpect(status().isNotFound()); } @Test @@ -58,11 +58,7 @@ void updateOperation_pathBodyMismatch_returns400() throws Exception { post("/v1/optimizer/operations/" + pathId + "/update") .contentType(MediaType.APPLICATION_JSON) .content(body)) - .andExpect(status().isBadRequest()) - .andExpect(jsonPath("$.code").value("BAD_REQUEST")) - .andExpect( - jsonPath("$.message") - .value(org.hamcrest.Matchers.containsString("does not match path id"))); + .andExpect(status().isBadRequest()); } @Test @@ -74,9 +70,7 @@ void updateOperation_missingOperationId_returns400() throws Exception { post("/v1/optimizer/operations/" + pathId + "/update") .contentType(MediaType.APPLICATION_JSON) .content(body)) - .andExpect(status().isBadRequest()) - .andExpect(jsonPath("$.code").value("BAD_REQUEST")) - .andExpect(jsonPath("$.message").value("operationId is required")); + .andExpect(status().isBadRequest()); } @Test @@ -88,32 +82,23 @@ void updateOperation_missingStatus_returns400() throws Exception { post("/v1/optimizer/operations/" + id + "/update") .contentType(MediaType.APPLICATION_JSON) .content(body)) - .andExpect(status().isBadRequest()) - .andExpect(jsonPath("$.code").value("BAD_REQUEST")) - .andExpect(jsonPath("$.message").value("status is required")); + .andExpect(status().isBadRequest()); } @Test - void getTableOperation_notFound_returns404WithApiError() throws Exception { + void getTableOperation_notFound_returns404() throws Exception { String id = UUID.randomUUID().toString(); - mockMvc - .perform(get("/v1/optimizer/operations/" + id)) - .andExpect(status().isNotFound()) - .andExpect(jsonPath("$.code").value("NOT_FOUND")) - .andExpect(jsonPath("$.path").value("/v1/optimizer/operations/" + id)); + mockMvc.perform(get("/v1/optimizer/operations/" + id)).andExpect(status().isNotFound()); } @Test - void getTableStats_notFound_returns404WithApiError() throws Exception { + void getTableStats_notFound_returns404() throws Exception { String uuid = UUID.randomUUID().toString(); - mockMvc - .perform(get("/v1/optimizer/stats/" + uuid)) - .andExpect(status().isNotFound()) - .andExpect(jsonPath("$.code").value("NOT_FOUND")); + mockMvc.perform(get("/v1/optimizer/stats/" + uuid)).andExpect(status().isNotFound()); } @Test - void updateOperation_happyPath_stillReturns201() throws Exception { + void updateOperation_happyPath_returns201() throws Exception { String id = UUID.randomUUID().toString(); operationsRepository.save( TableOperationsRow.builder() From bbef386ae56acf32ae9d8d31be1a7b50a2720c1c Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Fri, 22 May 2026 12:16:57 -0700 Subject: [PATCH 102/104] refactor(optimizer): revert UpdateOperationRequest doc edits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per review — no change needed on this file. The path/body validation lives in the controller; the DTO carries the same fields as before with the existing javadoc. Co-Authored-By: Claude Opus 4.7 --- .../optimizer/api/spec/UpdateOperationRequest.java | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpdateOperationRequest.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpdateOperationRequest.java index fcae718ad..a216e9db3 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpdateOperationRequest.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpdateOperationRequest.java @@ -27,16 +27,10 @@ @AllArgsConstructor public class UpdateOperationRequest { - /** - * Operation row's UUID. Must match the {@code {id}} path variable on {@code POST - * /v1/optimizer/operations/{id}/update} — the controller rejects mismatched requests with 400. - * Carrying it in the body keeps the payload self-describing for trace/log consumers that may not - * see the URL. Validated server-side (no bean-validation annotation) so that future relaxation - * does not break clients on the wire contract. - */ + /** Operation row's UUID — the primary lookup key. */ private String operationId; - /** Terminal outcome for this single operation. Validated server-side. */ + /** Terminal outcome for this single operation. */ private HistoryStatus status; /** Debug echo: stable table identity the caller believed it was completing. */ From 02bbc5c5af7f1d984fd7360758fc058873818bb7 Mon Sep 17 00:00:00 2001 From: mkuchenbecker <34144575+mkuchenbecker@users.noreply.github.com> Date: Fri, 22 May 2026 12:18:47 -0700 Subject: [PATCH 103/104] (wip) feat(optimizer): basic error-code handling across controllers (#596) Error handling. --- .../controller/TableOperationsController.java | 52 +++++--- .../TableOperationsHistoryController.java | 7 +- .../api/controller/TableStatsController.java | 23 ++-- .../service/OptimizerDataService.java | 16 ++- .../service/OptimizerDataServiceImpl.java | 16 +-- .../src/main/resources/application.properties | 7 +- .../ControllerErrorHandlingTest.java | 124 ++++++++++++++++++ .../service/OptimizerDataServiceImplTest.java | 3 +- 8 files changed, 206 insertions(+), 42 deletions(-) create mode 100644 services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/api/controller/ControllerErrorHandlingTest.java diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java index c28002bf7..2f6f62e4b 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java @@ -7,11 +7,13 @@ import com.linkedin.openhouse.optimizer.api.spec.UpdateOperationRequest; import com.linkedin.openhouse.optimizer.service.OptimizerDataService; import java.util.List; +import java.util.Objects; import java.util.Optional; import java.util.stream.Collectors; import lombok.RequiredArgsConstructor; import org.springframework.http.HttpStatus; import org.springframework.http.ResponseEntity; +import org.springframework.util.StringUtils; import org.springframework.web.bind.annotation.GetMapping; import org.springframework.web.bind.annotation.PathVariable; import org.springframework.web.bind.annotation.PostMapping; @@ -19,6 +21,7 @@ import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RequestParam; import org.springframework.web.bind.annotation.RestController; +import org.springframework.web.server.ResponseStatusException; /** REST controller for {@code table_operations}. */ @RestController @@ -29,23 +32,37 @@ public class TableOperationsController { private final OptimizerDataService service; /** - * Report an update to an operation. The body carries the {@code operationId} the caller is - * updating along with its terminal status. The backend looks up the operation row, writes a - * history entry with the operation's table metadata, and returns 201 Created with the history - * row, or 404 if the operation does not exist. + * Report an update to an operation. {@code id} comes from the URL; the body's {@code operationId} + * must match (the controller rejects mismatched requests with 400). The backend looks up the + * operation row, writes a history entry with the operation's table metadata, and returns 201 + * Created with the history row, or 404 if the operation does not exist. */ - @PostMapping("/update") + @PostMapping("/{id}/update") public ResponseEntity updateOperation( - @RequestBody UpdateOperationRequest request) { + @PathVariable String id, @RequestBody UpdateOperationRequest request) { + if (!StringUtils.hasText(request.getOperationId())) { + throw new ResponseStatusException(HttpStatus.BAD_REQUEST, "operationId is required"); + } + if (!Objects.equals(id, request.getOperationId())) { + throw new ResponseStatusException( + HttpStatus.BAD_REQUEST, + String.format( + "operationId in body (%s) does not match path id (%s)", + request.getOperationId(), id)); + } + if (request.getStatus() == null) { + throw new ResponseStatusException(HttpStatus.BAD_REQUEST, "status is required"); + } return service - .updateOperation( - request.getOperationId(), - request.getStatus() == null ? null : request.getStatus().toModel()) + .updateOperation(id, request.getStatus().toModel()) .map( history -> ResponseEntity.status(HttpStatus.CREATED) .body(TableOperationsHistory.fromModel(history))) - .orElse(ResponseEntity.notFound().build()); + .orElseThrow( + () -> + new ResponseStatusException( + HttpStatus.NOT_FOUND, String.format("no operation with id %s", id))); } /** Fetch a single operation row by its ID, regardless of status. Returns 404 if not found. */ @@ -55,12 +72,15 @@ public ResponseEntity getTableOperation(@PathVariable String id .getTableOperation(id) .map(TableOperations::fromModel) .map(ResponseEntity::ok) - .orElse(ResponseEntity.notFound().build()); + .orElseThrow( + () -> + new ResponseStatusException( + HttpStatus.NOT_FOUND, String.format("no operation with id %s", id))); } /** - * List operations matching the given filters. All parameters are optional — omit all to return - * every row. + * List operations matching the given filters, capped at {@code limit} rows. Every filter is + * optional; {@code limit} is required so callers always state how much they want back. */ @GetMapping public ResponseEntity> listTableOperations( @@ -68,7 +88,8 @@ public ResponseEntity> listTableOperations( @RequestParam(required = false) OperationStatus status, @RequestParam(required = false) String databaseName, @RequestParam(required = false) String tableName, - @RequestParam(required = false) String tableUuid) { + @RequestParam(required = false) String tableUuid, + @RequestParam int limit) { List result = service .listTableOperations( @@ -76,7 +97,8 @@ public ResponseEntity> listTableOperations( Optional.ofNullable(status).map(OperationStatus::toModel), Optional.ofNullable(databaseName), Optional.ofNullable(tableName), - Optional.ofNullable(tableUuid)) + Optional.ofNullable(tableUuid), + limit) .stream() .map(TableOperations::fromModel) .collect(Collectors.toList()); diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java index 36c422623..9a1b6d303 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java @@ -31,10 +31,13 @@ public ResponseEntity appendHistory( .body(TableOperationsHistory.fromModel(service.appendHistory(dto.toModel()))); } - /** Return the most recent history for a table, newest first, up to {@code limit} rows. */ + /** + * Return the most recent history for a table, newest first, capped at {@code limit} rows. {@code + * limit} is required. + */ @GetMapping("/{tableUuid}") public ResponseEntity> getHistory( - @PathVariable String tableUuid, @RequestParam(defaultValue = "100") int limit) { + @PathVariable String tableUuid, @RequestParam int limit) { List result = service.getHistory(tableUuid, limit).stream() .map(TableOperationsHistory::fromModel) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java index 7cb745250..ca8db4d51 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java @@ -9,6 +9,7 @@ import java.util.Optional; import java.util.stream.Collectors; import lombok.RequiredArgsConstructor; +import org.springframework.http.HttpStatus; import org.springframework.http.ResponseEntity; import org.springframework.web.bind.annotation.GetMapping; import org.springframework.web.bind.annotation.PathVariable; @@ -17,6 +18,7 @@ import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RequestParam; import org.springframework.web.bind.annotation.RestController; +import org.springframework.web.server.ResponseStatusException; /** REST controller for managing per-table stats in the optimizer DB. */ @RestController @@ -44,24 +46,29 @@ public ResponseEntity getTableStats(@PathVariable String tableUuid) .getTableStats(tableUuid) .map(TableStats::fromModel) .map(ResponseEntity::ok) - .orElse(ResponseEntity.notFound().build()); + .orElseThrow( + () -> + new ResponseStatusException( + HttpStatus.NOT_FOUND, String.format("no stats for tableUuid %s", tableUuid))); } /** - * List stats rows matching the given filters. All parameters are optional — omit all to return - * every row. + * List stats rows matching the given filters, capped at {@code limit} rows. Every filter is + * optional; {@code limit} is required so callers always state how much they want back. */ @GetMapping public ResponseEntity> listTableStats( @RequestParam(required = false) String databaseName, @RequestParam(required = false) String tableName, - @RequestParam(required = false) String tableUuid) { + @RequestParam(required = false) String tableUuid, + @RequestParam int limit) { List result = service .listTableStats( Optional.ofNullable(databaseName), Optional.ofNullable(tableName), - Optional.ofNullable(tableUuid)) + Optional.ofNullable(tableUuid), + limit) .stream() .map(TableStats::fromModel) .collect(Collectors.toList()); @@ -69,14 +76,14 @@ public ResponseEntity> listTableStats( } /** - * Return per-commit stats history for {@code tableUuid}, newest first. Optionally filter by - * {@code since} (inclusive) and cap at {@code limit} rows. + * Return per-commit stats history for {@code tableUuid}, newest first, capped at {@code limit} + * rows. Optional {@code since} filter (inclusive). {@code limit} is required. */ @GetMapping("/{tableUuid}/history") public ResponseEntity> getStatsHistory( @PathVariable String tableUuid, @RequestParam(required = false) Instant since, - @RequestParam(defaultValue = "100") int limit) { + @RequestParam int limit) { List result = service.getStatsHistory(tableUuid, Optional.ofNullable(since), limit).stream() .map(TableStatsHistory::fromModel) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java index 0529d3608..c20ae7bf2 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java @@ -23,15 +23,16 @@ public interface OptimizerDataService { // --- TableOperations --- /** - * List operations matching the given filters. Every parameter is optional — pass {@link - * Optional#empty()} to skip that filter. No filters returns all rows. + * List operations matching the given filters, capped at {@code limit} rows. Every filter + * parameter is optional — pass {@link Optional#empty()} to skip that filter. */ List listTableOperations( Optional operationType, Optional status, Optional databaseName, Optional tableName, - Optional tableUuid); + Optional tableUuid, + int limit); /** * Update an operation by writing a history entry. Looks up the operation row by {@code @@ -60,11 +61,14 @@ List listTableOperations( Optional getTableStats(String tableUuid); /** - * List stats rows matching the given filters. Every parameter is optional — pass {@link - * Optional#empty()} to skip that filter. No filters returns all rows. + * List stats rows matching the given filters, capped at {@code limit} rows. Every filter + * parameter is optional — pass {@link Optional#empty()} to skip that filter. */ List listTableStats( - Optional databaseName, Optional tableName, Optional tableUuid); + Optional databaseName, + Optional tableName, + Optional tableUuid, + int limit); /** * Return per-commit stats history for {@code tableUuid}, newest first. diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java index 4f820e1b8..29fd0eeee 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java @@ -19,7 +19,6 @@ import java.util.UUID; import java.util.stream.Collectors; import lombok.RequiredArgsConstructor; -import org.springframework.beans.factory.annotation.Value; import org.springframework.data.domain.PageRequest; import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Transactional; @@ -40,9 +39,6 @@ public class OptimizerDataServiceImpl implements OptimizerDataService { private final TableStatsRepository statsRepository; private final TableStatsHistoryRepository statsHistoryRepository; - @Value("${optimizer.repo.default-limit:10000}") - private int defaultLimit; - // --- TableOperations --- @Override @@ -51,7 +47,8 @@ public List listTableOperations( Optional status, Optional databaseName, Optional tableName, - Optional tableUuid) { + Optional tableUuid, + int limit) { return operationsRepository .find( operationType.map(OperationTypeDto::toDb), @@ -61,7 +58,7 @@ public List listTableOperations( tableName, Optional.empty(), Optional.empty(), - PageRequest.of(0, defaultLimit)) + PageRequest.of(0, limit)) .stream() .map(TableOperationDto::fromRow) .collect(Collectors.toList()); @@ -137,8 +134,11 @@ public Optional getTableStats(String tableUuid) { @Override public List listTableStats( - Optional databaseName, Optional tableName, Optional tableUuid) { - return statsRepository.find(databaseName, tableName, tableUuid, PageRequest.of(0, defaultLimit)) + Optional databaseName, + Optional tableName, + Optional tableUuid, + int limit) { + return statsRepository.find(databaseName, tableName, tableUuid, PageRequest.of(0, limit)) .stream() .map(TableStatsDto::fromRow) .collect(Collectors.toList()); diff --git a/services/optimizer/src/main/resources/application.properties b/services/optimizer/src/main/resources/application.properties index 1b7eb1a40..e7f082b47 100644 --- a/services/optimizer/src/main/resources/application.properties +++ b/services/optimizer/src/main/resources/application.properties @@ -16,7 +16,10 @@ spring.datasource.username=${OPTIMIZER_DB_USER:oh_user} spring.datasource.password=${OPTIMIZER_DB_PASSWORD:oh_password} spring.datasource.hikari.maximum-pool-size=20 -optimizer.repo.default-limit=10000 - management.endpoints.web.exposure.include=health,prometheus management.endpoint.health.enabled=true + +# Include ResponseStatusException.reason in the default error response body. Without this, Spring +# Boot 2.7 omits the `message` field, and the human-readable detail from a thrown +# ResponseStatusException never reaches the caller. +server.error.include-message=always diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/api/controller/ControllerErrorHandlingTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/api/controller/ControllerErrorHandlingTest.java new file mode 100644 index 000000000..b9c8dc3dc --- /dev/null +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/api/controller/ControllerErrorHandlingTest.java @@ -0,0 +1,124 @@ +package com.linkedin.openhouse.optimizer.api.controller; + +import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.get; +import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.post; +import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.jsonPath; +import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.status; + +import com.linkedin.openhouse.optimizer.db.OperationType; +import com.linkedin.openhouse.optimizer.db.TableOperationsRow; +import com.linkedin.openhouse.optimizer.repository.TableOperationsRepository; +import java.time.Instant; +import java.util.UUID; +import org.junit.jupiter.api.Test; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.autoconfigure.web.servlet.AutoConfigureMockMvc; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.http.MediaType; +import org.springframework.test.context.ActiveProfiles; +import org.springframework.test.web.servlet.MockMvc; +import org.springframework.transaction.annotation.Transactional; + +/** + * Exercises what the controllers own: server-side validation on {@code updateOperation} (path/body + * mismatch, missing fields) and 404s on missing rows. Assertions are status-code-only: MockMvc does + * not trigger Spring's error-dispatch to {@code BasicErrorController}, so the response body of a + * {@link org.springframework.web.server.ResponseStatusException} is empty in tests even though it + * is populated in production (with {@code server.error.include-message=always}). Framework-level + * 4xx (missing query param, malformed JSON, etc.) is left to Spring's defaults and not asserted. + */ +@SpringBootTest +@AutoConfigureMockMvc +@ActiveProfiles("test") +@Transactional +class ControllerErrorHandlingTest { + + @Autowired MockMvc mockMvc; + @Autowired TableOperationsRepository operationsRepository; + + @Test + void updateOperation_notFound_returns404() throws Exception { + String id = UUID.randomUUID().toString(); + String body = String.format("{\"operationId\":\"%s\",\"status\":\"SUCCESS\"}", id); + mockMvc + .perform( + post("/v1/optimizer/operations/" + id + "/update") + .contentType(MediaType.APPLICATION_JSON) + .content(body)) + .andExpect(status().isNotFound()); + } + + @Test + void updateOperation_pathBodyMismatch_returns400() throws Exception { + String pathId = UUID.randomUUID().toString(); + String bodyId = UUID.randomUUID().toString(); + String body = String.format("{\"operationId\":\"%s\",\"status\":\"SUCCESS\"}", bodyId); + mockMvc + .perform( + post("/v1/optimizer/operations/" + pathId + "/update") + .contentType(MediaType.APPLICATION_JSON) + .content(body)) + .andExpect(status().isBadRequest()); + } + + @Test + void updateOperation_missingOperationId_returns400() throws Exception { + String pathId = UUID.randomUUID().toString(); + String body = "{\"status\":\"SUCCESS\"}"; + mockMvc + .perform( + post("/v1/optimizer/operations/" + pathId + "/update") + .contentType(MediaType.APPLICATION_JSON) + .content(body)) + .andExpect(status().isBadRequest()); + } + + @Test + void updateOperation_missingStatus_returns400() throws Exception { + String id = UUID.randomUUID().toString(); + String body = String.format("{\"operationId\":\"%s\"}", id); + mockMvc + .perform( + post("/v1/optimizer/operations/" + id + "/update") + .contentType(MediaType.APPLICATION_JSON) + .content(body)) + .andExpect(status().isBadRequest()); + } + + @Test + void getTableOperation_notFound_returns404() throws Exception { + String id = UUID.randomUUID().toString(); + mockMvc.perform(get("/v1/optimizer/operations/" + id)).andExpect(status().isNotFound()); + } + + @Test + void getTableStats_notFound_returns404() throws Exception { + String uuid = UUID.randomUUID().toString(); + mockMvc.perform(get("/v1/optimizer/stats/" + uuid)).andExpect(status().isNotFound()); + } + + @Test + void updateOperation_happyPath_returns201() throws Exception { + String id = UUID.randomUUID().toString(); + operationsRepository.save( + TableOperationsRow.builder() + .id(id) + .tableUuid(UUID.randomUUID().toString()) + .databaseName("db1") + .tableName("tbl1") + .operationType(OperationType.ORPHAN_FILES_DELETION) + .status(com.linkedin.openhouse.optimizer.db.OperationStatus.SCHEDULED) + .createdAt(Instant.now()) + .scheduledAt(Instant.now()) + .jobId("job-x") + .build()); + String body = String.format("{\"operationId\":\"%s\",\"status\":\"SUCCESS\"}", id); + mockMvc + .perform( + post("/v1/optimizer/operations/" + id + "/update") + .contentType(MediaType.APPLICATION_JSON) + .content(body)) + .andExpect(status().isCreated()) + .andExpect(jsonPath("$.status").value("SUCCESS")); + } +} diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java index 8db14c4d6..2a3c1e676 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java @@ -165,7 +165,8 @@ void listTableOperations_filtersByOperationTypeAndStatus() { Optional.of(OperationStatusDto.PENDING), Optional.empty(), Optional.empty(), - Optional.empty())) + Optional.empty(), + 100)) .extracting(op -> op.getId()) .containsExactly(pendingId); } From 6ef7964017101ce7ee028ca3c93e66f023856d76 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Fri, 22 May 2026 13:55:14 -0700 Subject: [PATCH 104/104] docs(optimizer): add @ApiResponses to controllers for OpenAPI spec MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address review on PR #531 — each endpoint lists the HTTP response codes it actually returns, in the same "Resource ACTION: STATUS" style used by services/tables. Codes per endpoint: - POST /operations/{id}/update — 201, 400, 404 - GET /operations/{id} — 200, 404 - GET /operations — 200, 400 - POST /operations-history — 201 - GET /operations-history/{u} — 200, 400 - PUT /stats/{u} — 200 - GET /stats/{u} — 200, 404 - GET /stats — 200, 400 - GET /stats/{u}/history — 200, 400 Annotations only — no runtime behavior change, no new tests required. swagger-annotations 2.1.11 is already on the optimizer classpath via openhouse.springboot-conventions. Co-Authored-By: Claude Opus 4.7 --- .../controller/TableOperationsController.java | 18 ++++++++++++++++++ .../TableOperationsHistoryController.java | 11 +++++++++++ .../api/controller/TableStatsController.java | 18 ++++++++++++++++++ 3 files changed, 47 insertions(+) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java index 2f6f62e4b..2ee40802f 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java @@ -6,6 +6,8 @@ import com.linkedin.openhouse.optimizer.api.spec.TableOperationsHistory; import com.linkedin.openhouse.optimizer.api.spec.UpdateOperationRequest; import com.linkedin.openhouse.optimizer.service.OptimizerDataService; +import io.swagger.v3.oas.annotations.responses.ApiResponse; +import io.swagger.v3.oas.annotations.responses.ApiResponses; import java.util.List; import java.util.Objects; import java.util.Optional; @@ -37,6 +39,12 @@ public class TableOperationsController { * operation row, writes a history entry with the operation's table metadata, and returns 201 * Created with the history row, or 404 if the operation does not exist. */ + @ApiResponses( + value = { + @ApiResponse(responseCode = "201", description = "Operation UPDATE: CREATED"), + @ApiResponse(responseCode = "400", description = "Operation UPDATE: BAD_REQUEST"), + @ApiResponse(responseCode = "404", description = "Operation UPDATE: NOT_FOUND") + }) @PostMapping("/{id}/update") public ResponseEntity updateOperation( @PathVariable String id, @RequestBody UpdateOperationRequest request) { @@ -66,6 +74,11 @@ public ResponseEntity updateOperation( } /** Fetch a single operation row by its ID, regardless of status. Returns 404 if not found. */ + @ApiResponses( + value = { + @ApiResponse(responseCode = "200", description = "Operation GET: OK"), + @ApiResponse(responseCode = "404", description = "Operation GET: NOT_FOUND") + }) @GetMapping("/{id}") public ResponseEntity getTableOperation(@PathVariable String id) { return service @@ -82,6 +95,11 @@ public ResponseEntity getTableOperation(@PathVariable String id * List operations matching the given filters, capped at {@code limit} rows. Every filter is * optional; {@code limit} is required so callers always state how much they want back. */ + @ApiResponses( + value = { + @ApiResponse(responseCode = "200", description = "Operation SEARCH: OK"), + @ApiResponse(responseCode = "400", description = "Operation SEARCH: BAD_REQUEST") + }) @GetMapping public ResponseEntity> listTableOperations( @RequestParam(required = false) OperationType operationType, diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java index 9a1b6d303..873d51d2e 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsHistoryController.java @@ -2,6 +2,8 @@ import com.linkedin.openhouse.optimizer.api.spec.TableOperationsHistory; import com.linkedin.openhouse.optimizer.service.OptimizerDataService; +import io.swagger.v3.oas.annotations.responses.ApiResponse; +import io.swagger.v3.oas.annotations.responses.ApiResponses; import java.util.List; import java.util.stream.Collectors; import lombok.RequiredArgsConstructor; @@ -24,6 +26,10 @@ public class TableOperationsHistoryController { private final OptimizerDataService service; /** Append a completed-job result. Called by the SparkJob after each run (success or failure). */ + @ApiResponses( + value = { + @ApiResponse(responseCode = "201", description = "OperationsHistory CREATE: CREATED") + }) @PostMapping public ResponseEntity appendHistory( @RequestBody TableOperationsHistory dto) { @@ -35,6 +41,11 @@ public ResponseEntity appendHistory( * Return the most recent history for a table, newest first, capped at {@code limit} rows. {@code * limit} is required. */ + @ApiResponses( + value = { + @ApiResponse(responseCode = "200", description = "OperationsHistory GET: OK"), + @ApiResponse(responseCode = "400", description = "OperationsHistory GET: BAD_REQUEST") + }) @GetMapping("/{tableUuid}") public ResponseEntity> getHistory( @PathVariable String tableUuid, @RequestParam int limit) { diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java index ca8db4d51..b119dd1c7 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java @@ -4,6 +4,8 @@ import com.linkedin.openhouse.optimizer.api.spec.TableStatsHistory; import com.linkedin.openhouse.optimizer.api.spec.UpsertTableStatsRequest; import com.linkedin.openhouse.optimizer.service.OptimizerDataService; +import io.swagger.v3.oas.annotations.responses.ApiResponse; +import io.swagger.v3.oas.annotations.responses.ApiResponses; import java.time.Instant; import java.util.List; import java.util.Optional; @@ -32,6 +34,7 @@ public class TableStatsController { * Create or overwrite the stats row for {@code tableUuid}. Called by the Tables Service on every * Iceberg commit. Idempotent. */ + @ApiResponses(value = {@ApiResponse(responseCode = "200", description = "Stats PUT: OK")}) @PutMapping("/{tableUuid}") public ResponseEntity upsertTableStats( @PathVariable String tableUuid, @RequestBody UpsertTableStatsRequest request) { @@ -40,6 +43,11 @@ public ResponseEntity upsertTableStats( } /** Fetch the stats row for {@code tableUuid}. Returns 404 if no stats have been written yet. */ + @ApiResponses( + value = { + @ApiResponse(responseCode = "200", description = "Stats GET: OK"), + @ApiResponse(responseCode = "404", description = "Stats GET: NOT_FOUND") + }) @GetMapping("/{tableUuid}") public ResponseEntity getTableStats(@PathVariable String tableUuid) { return service @@ -56,6 +64,11 @@ public ResponseEntity getTableStats(@PathVariable String tableUuid) * List stats rows matching the given filters, capped at {@code limit} rows. Every filter is * optional; {@code limit} is required so callers always state how much they want back. */ + @ApiResponses( + value = { + @ApiResponse(responseCode = "200", description = "Stats SEARCH: OK"), + @ApiResponse(responseCode = "400", description = "Stats SEARCH: BAD_REQUEST") + }) @GetMapping public ResponseEntity> listTableStats( @RequestParam(required = false) String databaseName, @@ -79,6 +92,11 @@ public ResponseEntity> listTableStats( * Return per-commit stats history for {@code tableUuid}, newest first, capped at {@code limit} * rows. Optional {@code since} filter (inclusive). {@code limit} is required. */ + @ApiResponses( + value = { + @ApiResponse(responseCode = "200", description = "StatsHistory GET: OK"), + @ApiResponse(responseCode = "400", description = "StatsHistory GET: BAD_REQUEST") + }) @GetMapping("/{tableUuid}/history") public ResponseEntity> getStatsHistory( @PathVariable String tableUuid,