Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 22 additions & 4 deletions applications/mlflow/Taskfile.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ vars:
PORT: 5000

# Chart configuration
CHARTS: mlflow infra
CHARTS: mlflow infra postgres-support

# Environment detection
CI:
Expand All @@ -39,7 +39,9 @@ vars:
sh: helm show chart ./charts/mlflow | grep '^version:' | cut -d ' ' -f 2
INFRA_VERSION:
sh: helm show chart ./charts/infra | grep '^version:' | cut -d ' ' -f 2

POSTGRES_SUPPORT_VERSION:
sh: helm show chart ./charts/postgres-support | grep '^version:' | cut -d ' ' -f 2

# Release configuration
# APP_NAME can be overridden by setting REPLICATED_APP environment variable
APP_NAME: '{{.REPLICATED_APP | default "diamon-mlflow"}}'
Expand Down Expand Up @@ -338,10 +340,12 @@ tasks:
# Get chart versions
MLFLOW_VERSION="{{.MLFLOW_VERSION}}"
INFRA_VERSION="{{.INFRA_VERSION}}"

POSTGRES_SUPPORT_VERSION="{{.POSTGRES_SUPPORT_VERSION}}"

echo "Working with chart versions:"
echo "MLflow chart version: $MLFLOW_VERSION"
echo "Infra chart version: $INFRA_VERSION"
echo "Postgres Support chart version: $POSTGRES_SUPPORT_VERSION"

# Update MLflow HelmChart manifest
MLFLOW_HELMCHART="{{.RELEASE_DIR}}/mlflow-chart.yaml"
Expand Down Expand Up @@ -374,7 +378,21 @@ tasks:
else
echo "⚠️ Infra HelmChart not found at $INFRA_HELMCHART"
fi


# Update Postgres Support HelmChart manifest
PG_SUPPORT_HELMCHART="{{.RELEASE_DIR}}/postgres-support-chart.yaml"
if [ -f "$PG_SUPPORT_HELMCHART" ]; then
echo "Updating version in $PG_SUPPORT_HELMCHART to $POSTGRES_SUPPORT_VERSION..."

if command -v yq &> /dev/null; then
yq eval ".spec.chart.chartVersion = \"$POSTGRES_SUPPORT_VERSION\"" -i "$PG_SUPPORT_HELMCHART"
else
sed -i.bak "s/chartVersion:.*/chartVersion: \"$POSTGRES_SUPPORT_VERSION\"/" "$PG_SUPPORT_HELMCHART" && rm "${PG_SUPPORT_HELMCHART}.bak"
fi
else
echo "⚠️ Postgres Support HelmChart not found at $PG_SUPPORT_HELMCHART"
fi

echo "✅ Release manifest versions updated successfully."
- cmd: task versions:verify || echo "⚠️ Version check failed after update. Please verify manually."
ignore_error: true
Expand Down
5 changes: 5 additions & 0 deletions applications/mlflow/charts/postgres-support/Chart.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
apiVersion: v2
name: postgres-support
description: Support bundle specs for CloudnativePG PostgreSQL
type: application
version: 0.1.0
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
{{- define "postgres-support.supportbundle" -}}
apiVersion: troubleshoot.sh/v1beta2
kind: SupportBundle
metadata:
name: postgres-supportbundle
spec:
collectors:
- logs:
name: cnpg-operator-logs
namespace: {{ .Release.Namespace }}
selector:
- app.kubernetes.io/name=cloudnative-pg
limits:
maxAge: 720h
maxLines: 10000
- logs:
name: postgres-cluster-logs
namespace: {{ .Release.Namespace }}
selector:
- cnpg.io/cluster
limits:
maxAge: 720h
maxLines: 10000
- exec:
name: pg-isready-check
namespace: {{ .Release.Namespace }}
selector:
- cnpg.io/cluster
- role=primary
command: ["pg_isready"]
args: ["-U", "postgres"]
timeout: 10s
- exec:
name: cnpg-cluster-status
namespace: {{ .Release.Namespace }}
selector:
- cnpg.io/cluster
- role=primary
command: ["psql"]
args:
- "-U"
- "postgres"
- "-c"
- "SELECT version(); SELECT pg_is_in_recovery(); SELECT count(*) AS active_connections FROM pg_stat_activity;"
timeout: 10s
- clusterResources: {}
- copy:
name: postgres-config
namespace: {{ .Release.Namespace }}
selector:
- cnpg.io/cluster
- role=primary
containerPath: /controller/run.json
containerName: postgres
analyzers:
- textAnalyze:
checkName: CloudnativePG Operator Running
fileName: cnpg-operator-logs/*.log
regex: "Starting manager"
outcomes:
- pass:
when: "true"
message: CloudnativePG operator is running
- fail:
when: "false"
message: CloudnativePG operator may not be running - check operator pod logs
- textAnalyze:
checkName: PostgreSQL Accepting Connections
fileName: pg-isready-check/*/pg_isready-*.txt
regex: "accepting connections"
outcomes:
- pass:
when: "true"
message: PostgreSQL is accepting connections
- fail:
when: "false"
message: PostgreSQL is not accepting connections - check cluster pod logs
- textAnalyze:
checkName: PostgreSQL Not in Recovery
fileName: cnpg-cluster-status/*/psql-*.txt
regex: "pg_is_in_recovery.*f"
outcomes:
- pass:
when: "true"
message: Primary PostgreSQL instance is not in recovery mode
- warn:
when: "false"
message: Primary PostgreSQL instance may be in recovery mode
- textAnalyze:
checkName: PostgreSQL WAL Errors
fileName: postgres-cluster-logs/*.log
regex: "FATAL|PANIC|could not write to WAL"
outcomes:
- fail:
when: "true"
message: PostgreSQL logs contain FATAL/PANIC errors or WAL write failures
- pass:
when: "false"
message: No critical PostgreSQL errors detected in logs
- textAnalyze:
checkName: CNPG Failover Events
fileName: cnpg-operator-logs/*.log
regex: "Initiating failover|failover completed"
outcomes:
- warn:
when: "true"
message: CloudnativePG failover events detected - review operator logs for details
- pass:
when: "false"
message: No failover events detected
{{- end -}}
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
apiVersion: v1
kind: Secret
metadata:
name: {{ .Release.Name }}-postgres-supportbundle
labels:
troubleshoot.sh/kind: support-bundle
type: Opaque
stringData:
support-bundle-spec: |
{{ include "postgres-support.supportbundle" . | indent 4 }}
12 changes: 12 additions & 0 deletions applications/mlflow/release/postgres-support-chart.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
apiVersion: kots.io/v1beta2
kind: HelmChart
metadata:
name: postgres-support
spec:
chart:
name: postgres-support
chartVersion: 0.1.0
exclude: 'repl{{ ConfigOptionEquals `postgres_type` `external_postgres` }}'
weight: -5
values: {}
builder: {}
Loading