Fix error handling gaps in recent releases

deepshekhardas · deepshekhardas · commit fccc2fccd1de · 2026-05-16T09:04:52.000+05:30
diff --git a/apps/webapp/app/entry.server.tsx b/apps/webapp/app/entry.server.tsx
@@ -247,6 +247,11 @@ Worker.init().catch((error) => {
   logError(error);
 });
 
+import { initMollifierDrainerWorker } from "~/v3/mollifierDrainerWorker.server";
+initMollifierDrainerWorker().catch((error) => {
+  logger.error("Mollifier drainer initialization failed", { error });
+});
+
 bootstrap().catch((error) => {
   logError(error);
 });
diff --git a/apps/webapp/app/services/replicationErrorRecovery.server.ts b/apps/webapp/app/services/replicationErrorRecovery.server.ts
@@ -0,0 +1,162 @@
+import { Logger } from "@trigger.dev/core/logger";
+
+export type ReplicationErrorRecoveryStrategy =
+  | {
+      type: "reconnect";
+      initialDelayMs?: number;
+      maxDelayMs?: number;
+      maxAttempts?: number;
+    }
+  | {
+      type: "exit";
+      exitDelayMs?: number;
+      exitCode?: number;
+    }
+  | { type: "log" };
+
+export type ReplicationErrorRecoveryDeps = {
+  strategy: ReplicationErrorRecoveryStrategy;
+  logger: Logger;
+  reconnect: () => Promise<void>;
+  isShuttingDown: () => boolean;
+};
+
+export type ReplicationErrorRecovery = {
+  handle(error: unknown): void;
+  notifyStreamStarted(): void;
+  notifyLeaderElectionLost(error: unknown): void;
+  dispose(): void;
+};
+
+export function createReplicationErrorRecovery(
+  deps: ReplicationErrorRecoveryDeps
+): ReplicationErrorRecovery {
+  const { strategy, logger, reconnect, isShuttingDown } = deps;
+  let attempt = 0;
+  let pendingReconnect: NodeJS.Timeout | null = null;
+  let pendingExit: NodeJS.Timeout | null = null;
+
+  function scheduleReconnect(error: unknown): void {
+    if (strategy.type !== "reconnect") return;
+    if (pendingReconnect) return;
+
+    attempt += 1;
+    const maxAttempts = strategy.maxAttempts ?? 0;
+    if (maxAttempts > 0 && attempt > maxAttempts) {
+      logger.error("Replication reconnect exceeded maxAttempts; giving up", {
+        attempt,
+        maxAttempts,
+        error,
+      });
+      return;
+    }
+
+    const initialDelay = strategy.initialDelayMs ?? 1_000;
+    const maxDelay = strategy.maxDelayMs ?? 60_000;
+    const delay = Math.min(initialDelay * Math.pow(2, attempt - 1), maxDelay);
+
+    logger.error("Replication stream lost — scheduling reconnect", {
+      attempt,
+      delayMs: delay,
+      error,
+    });
+
+    pendingReconnect = setTimeout(async () => {
+      pendingReconnect = null;
+
+      if (isShuttingDown()) {
+        logger.info("Replication reconnect skipped — shutting down");
+        return;
+      }
+
+      try {
+        await reconnect();
+      } catch (err) {
+        logger.error("Replication reconnect failed", { error: err });
+        scheduleReconnect(err);
+      }
+    }, delay);
+  }
+
+  function scheduleExit(): void {
+    if (strategy.type !== "exit") return;
+    if (pendingExit) return;
+
+    const delay = strategy.exitDelayMs ?? 5_000;
+    const exitCode = strategy.exitCode ?? 1;
+
+    logger.error("Replication stream lost — exiting", { delayMs: delay, exitCode });
+
+    pendingExit = setTimeout(() => {
+      process.exit(exitCode);
+    }, delay);
+  }
+
+  return {
+    handle(error: unknown) {
+      if (isShuttingDown()) return;
+
+      switch (strategy.type) {
+        case "log":
+          return;
+        case "exit":
+          return scheduleExit();
+        case "reconnect":
+          return scheduleReconnect(error);
+      }
+    },
+    notifyStreamStarted() {
+      if (attempt > 0) {
+        logger.info("Replication reconnect succeeded", { attempt });
+        attempt = 0;
+      }
+    },
+    notifyLeaderElectionLost(error: unknown) {
+      if (isShuttingDown()) return;
+      if (strategy.type !== "reconnect") return;
+      scheduleReconnect(error);
+    },
+    dispose() {
+      if (pendingReconnect) {
+        clearTimeout(pendingReconnect);
+        pendingReconnect = null;
+      }
+      if (pendingExit) {
+        clearTimeout(pendingExit);
+        pendingExit = null;
+      }
+    },
+  };
+}
+
+export type ReplicationErrorRecoveryEnv = {
+  strategy: "reconnect" | "exit" | "log";
+  reconnectInitialDelayMs?: number;
+  reconnectMaxDelayMs?: number;
+  reconnectMaxAttempts?: number;
+  exitDelayMs?: number;
+  exitCode?: number;
+};
+
+export function strategyFromEnv(
+  env: ReplicationErrorRecoveryEnv
+): ReplicationErrorRecoveryStrategy {
+  switch (env.strategy) {
+    case "exit":
+      return {
+        type: "exit",
+        exitDelayMs: env.exitDelayMs,
+        exitCode: env.exitCode,
+      };
+    case "log":
+      return { type: "log" };
+    case "reconnect":
+    default:
+      return {
+        type: "reconnect",
+        initialDelayMs: env.reconnectInitialDelayMs,
+        maxDelayMs: env.reconnectMaxDelayMs,
+        maxAttempts: env.reconnectMaxAttempts,
+      };
+  }
+}
diff --git a/apps/webapp/app/v3/dynamicFlushScheduler.server.ts b/apps/webapp/app/v3/dynamicFlushScheduler.server.ts
@@ -193,14 +193,13 @@ export class DynamicFlushScheduler<T> {
 
     if (batchesToFlush.length === 0) return;
 
-    // Schedule all batches for concurrent processing
+// Schedule all batches for concurrent processing
     const flushPromises = batchesToFlush.map((batch) =>
       this.limiter(async () => {
-        const itemCount = batch.length;
-
         const self = this;
 
         async function tryFlush(flushId: string, batchToFlush: T[], attempt: number = 1) {
+          const itemCount = batchToFlush.length;
           try {
             const startTime = Date.now();
             await self.callback(flushId, batchToFlush);
diff --git a/apps/webapp/app/v3/otlpAttributeLimits.ts b/apps/webapp/app/v3/otlpAttributeLimits.ts
@@ -0,0 +1,115 @@
+export type AttributeValue = string | number | boolean | undefined;
+export type AttributeMap = Record<string, AttributeValue>;
+
+export type AttributeKeyOverride = { prefix: string; limit: number };
+
+export type SpanAttributeLimits = {
+  defaultValueLengthLimit: number;
+  aiContentValueLengthLimit: number;
+  totalAttributesLengthLimit: number;
+};
+
+export const AI_CONTENT_KEY_OVERRIDES = (limit: number): AttributeKeyOverride[] => [
+  { prefix: "ai.prompt", limit },
+  { prefix: "ai.response.text", limit },
+  { prefix: "ai.response.object", limit },
+  { prefix: "ai.response.toolCalls", limit },
+  { prefix: "ai.response.reasoning", limit },
+  { prefix: "ai.response.reasoningDetails", limit },
+  { prefix: "gen_ai.prompt", limit },
+  { prefix: "gen_ai.completion", limit },
+  { prefix: "gen_ai.request.messages", limit },
+  { prefix: "gen_ai.response.text", limit },
+];
+
+export const AI_CONTENT_DROP_PRIORITY: string[] = [
+  "ai.prompt.messages",
+  "ai.prompt",
+  "ai.response.object",
+  "ai.response.text",
+  "ai.response.toolCalls",
+  "ai.response.reasoning",
+  "ai.response.reasoningDetails",
+  "gen_ai.prompt",
+  "gen_ai.completion",
+  "gen_ai.request.messages",
+  "gen_ai.response.text",
+];
+
+function matchPrefix(key: string, prefix: string): boolean {
+  return key === prefix || key.startsWith(prefix + ".");
+}
+
+function getMatchingOverride(key: string, overrides: AttributeKeyOverride[]): number | null {
+  for (const { prefix, limit } of overrides) {
+    if (matchPrefix(key, prefix)) return limit;
+  }
+  return null;
+}
+
+function truncateValue(value: string, limit: number): string {
+  if (value.length <= limit) return value;
+  return value.slice(0, limit);
+}
+
+export function truncateAttributes(
+  attributes: AttributeMap,
+  limits: SpanAttributeLimits,
+  overrides: AttributeKeyOverride[]
+): AttributeMap {
+  const result: AttributeMap = {};
+
+  for (const [key, value] of Object.entries(attributes)) {
+    if (typeof value !== "string") {
+      result[key] = value;
+      continue;
+    }
+
+    const override = getMatchingOverride(key, overrides);
+    const limit = override ?? limits.defaultValueLengthLimit;
+    result[key] = truncateValue(value, limit);
+  }
+
+  return result;
+}
+
+export function applyTotalSizeBackstop(
+  attributes: AttributeMap,
+  limits: SpanAttributeLimits,
+  dropPriority: string[]
+): AttributeMap {
+  const json = JSON.stringify(attributes);
+  if (json.length <= limits.totalAttributesLengthLimit) return attributes;
+
+  const result: AttributeMap = { ...attributes };
+  const aiKeys = new Set<string>();
+
+  for (const key of Object.keys(result)) {
+    for (const prefix of dropPriority) {
+      if (matchPrefix(key, prefix)) {
+        aiKeys.add(key);
+        break;
+      }
+    }
+  }
+
+  const sortedAiKeys = dropPriority.filter((k) => aiKeys.has(k));
+
+  for (const key of sortedAiKeys) {
+    delete result[key];
+    const remainingJson = JSON.stringify(result);
+    if (remainingJson.length <= limits.totalAttributesLengthLimit) break;
+  }
+
+  return result;
+}
+
+export function truncateSpanAttributes(
+  attributes: AttributeMap,
+  limits: SpanAttributeLimits
+): AttributeMap {
+  const overrides = AI_CONTENT_KEY_OVERRIDES(limits.aiContentValueLengthLimit);
+  let result = truncateAttributes(attributes, limits, overrides);
+  result = applyTotalSizeBackstop(result, limits, AI_CONTENT_DROP_PRIORITY);
+  return result;
+}