Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
2e52329
Remove IPAddress/IPAddressClaim; route single-address via IPPrefixClaim
scotwells May 21, 2026
526154d
Fix CI e2e: create ipam-system namespace before postgres; fix string …
scotwells May 22, 2026
9b2f08d
Use namespace component yaml instead of raw kubectl create namespace
scotwells May 22, 2026
be1b163
Simplify CI: apply overlay once, remove redundant postgres step
scotwells May 22, 2026
9e85f5b
Include secret and anonymous-rbac in test-infra overlay
scotwells May 22, 2026
b76a40b
Fix CI: imagePullPolicy Never, drop cert-manager-ca, better diagnostics
scotwells May 22, 2026
98b4900
Fix CI TLS: replace CSI driver with cert-manager Certificate + secret…
scotwells May 22, 2026
c0ff6e2
Fix TLS volume patch: use JSON 6902 to replace CSI volume with secret
scotwells May 22, 2026
dc09a45
Load Kyverno-normalized image name into kind
scotwells May 22, 2026
4fb0070
Fix image name references to use ghcr.io/milo-os/ipam consistently
scotwells May 22, 2026
4432cb3
Fix CI: create control-plane-ca configmap before deploying IPAM
scotwells May 22, 2026
6859f1d
Fix three failing e2e tests in PR #24
scotwells May 22, 2026
50bad02
Fix pool capacity tracking and cacher-based wait timeouts
scotwells May 22, 2026
2ed0165
Replace kubectl wait with polling scripts in e2e setup steps
scotwells May 22, 2026
438c412
Fix multi-tenant finally deletion race and prefix-exhaustion wait tim…
scotwells May 22, 2026
1495ed2
Replace kubectl wait with polling scripts in all e2e suites
scotwells May 22, 2026
f848888
Add missing timeout: 30s to verification scripts in multi-tenant suite
scotwells May 22, 2026
86aceec
Remove ASN resources from multi-tenant test; ASNPool/ASNClaim not yet…
scotwells May 22, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
118 changes: 118 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,124 @@ jobs:
echo " ok $f"
done

e2e:
name: End-to-end tests (Chainsaw)
runs-on: ubuntu-latest
env:
TASK_X_REMOTE_TASKFILES: "1"
# test-infra writes kubeconfig here when running outside the test-infra repo
KUBECONFIG: .test-infra/kubeconfig
steps:
- uses: actions/checkout@v6

- uses: actions/setup-go@v6
with:
go-version-file: go.mod
cache: true

- name: Install Task
uses: arduino/setup-task@v2
with:
version: 3.x
repo-token: ${{ secrets.GITHUB_TOKEN }}

- name: Install kind
run: |
KIND_VERSION="v0.30.0"
curl -fsSL -o kind "https://github.com/kubernetes-sigs/kind/releases/download/${KIND_VERSION}/kind-linux-amd64"
chmod +x kind
sudo mv kind /usr/local/bin/kind

- name: Install kubectl
run: |
KUBECTL_VERSION="$(curl -sL https://dl.k8s.io/release/stable.txt)"
curl -fsSL -o kubectl "https://dl.k8s.io/release/${KUBECTL_VERSION}/bin/linux/amd64/kubectl"
chmod +x kubectl
sudo mv kubectl /usr/local/bin/kubectl

- name: Install kustomize
run: |
curl -sLo /tmp/kustomize.tgz https://github.com/kubernetes-sigs/kustomize/releases/download/kustomize%2Fv5.4.3/kustomize_v5.4.3_linux_amd64.tar.gz
tar -xzf /tmp/kustomize.tgz -C /tmp
sudo mv /tmp/kustomize /usr/local/bin/

- name: Install Helm
run: |
curl -fsSL https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash

- name: Install Flux CLI
run: |
curl -fsSL https://fluxcd.io/install.sh | sudo bash

- name: Install Chainsaw
run: |
CHAINSAW_VERSION="v0.2.12"
curl -fsSL -o /tmp/chainsaw.tgz \
"https://github.com/kyverno/chainsaw/releases/download/${CHAINSAW_VERSION}/chainsaw_linux_amd64.tar.gz"
tar -xzf /tmp/chainsaw.tgz -C /tmp chainsaw
sudo mv /tmp/chainsaw /usr/local/bin/chainsaw

- name: Spin up kind cluster (test-infra)
run: task --yes test-infra:cluster-up

- name: Build IPAM container image
run: task --yes dev:build

- name: Load image into kind
run: task --yes dev:load

- name: Create control-plane-ca configmap
run: |
# The aggregated apiserver uses --requestheader-client-ca-file to verify
# the front proxy identity. In kind the cert lives in the
# extension-apiserver-authentication ConfigMap in kube-system.
kubectl create namespace ipam-system --dry-run=client -o yaml | kubectl apply -f -
kubectl get configmap extension-apiserver-authentication -n kube-system \
-o jsonpath='{.data.requestheader-client-ca-file}' > /tmp/requestheader-ca.crt
kubectl create configmap control-plane-ca \
-n ipam-system \
--from-file=ca.crt=/tmp/requestheader-ca.crt \
--dry-run=client -o yaml | kubectl apply -f -

- name: Deploy IPAM service
run: |
kubectl apply -k config/overlays/test-infra
# Wait for cert-manager to issue the TLS secret before the apiserver pods can mount it.
kubectl -n ipam-system wait certificate/ipam-tls \
--for=condition=Ready --timeout=120s
kubectl -n ipam-system wait helmrelease/postgres \
--for=condition=Ready --timeout=300s
kubectl -n ipam-system wait pod \
-l app.kubernetes.io/name=postgresql \
--for=condition=Ready --timeout=180s
kubectl wait --for=condition=Ready pod \
-l app=ipam-apiserver -n ipam-system --timeout=180s
kubectl wait --for=condition=Available \
apiservice/v1alpha1.ipam.miloapis.com --timeout=180s

- name: Run Chainsaw e2e suites
run: chainsaw test test/e2e/

- name: Dump diagnostics on failure
if: failure()
run: |
echo "=== Pods ==="
kubectl get pods -A
echo "=== IPAM pod describe ==="
kubectl describe pods -n ipam-system -l app=ipam-apiserver || true
echo "=== IPAM apiserver logs ==="
kubectl logs -n ipam-system -l app=ipam-apiserver --all-containers --tail=100 || true
echo "=== CertificateRequests ==="
kubectl get certificaterequests -n ipam-system -o wide || true
echo "=== Events ==="
kubectl get events -n ipam-system --sort-by='.lastTimestamp' | tail -60 || true
echo "=== APIService ==="
kubectl get apiservice v1alpha1.ipam.miloapis.com -o yaml || true

- name: Tear down kind cluster
if: always()
run: task --yes test-infra:cluster-down

observability:
name: Verify observability artifacts
runs-on: ubuntu-latest
Expand Down
5 changes: 3 additions & 2 deletions config/base/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ spec:
type: RuntimeDefault
initContainers:
- name: migrate
image: ghcr.io/datum-cloud/ipam-apiserver:latest
image: ghcr.io/milo-os/ipam:latest
imagePullPolicy: IfNotPresent
securityContext:
allowPrivilegeEscalation: false
Expand Down Expand Up @@ -86,7 +86,7 @@ spec:
memory: 256Mi
containers:
- name: apiserver
image: ghcr.io/datum-cloud/ipam-apiserver:latest
image: ghcr.io/milo-os/ipam:latest
imagePullPolicy: IfNotPresent
securityContext:
allowPrivilegeEscalation: false
Expand All @@ -111,6 +111,7 @@ spec:
- --authentication-skip-lookup=$(AUTHENTICATION_SKIP_LOOKUP)
- --authentication-tolerate-lookup-failure=$(AUTHENTICATION_TOLERATE_LOOKUP_FAILURE)
- --authorization-always-allow-paths=$(AUTHORIZATION_ALWAYS_ALLOW_PATHS)
- --requestheader-client-ca-file=/etc/kubernetes/pki/requestheader/ca.crt
- --requestheader-username-headers=$(REQUESTHEADER_USERNAME_HEADERS)
- --requestheader-group-headers=$(REQUESTHEADER_GROUP_HEADERS)
- --requestheader-uid-headers=$(REQUESTHEADER_UID_HEADERS)
Expand Down
2 changes: 1 addition & 1 deletion config/base/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ labels:

# Image (overlays override newTag for different environments)
images:
- name: ghcr.io/datum-cloud/ipam-apiserver
- name: ghcr.io/milo-os/ipam
newTag: latest

# rbac-auth-reader RoleBinding belongs in kube-system. The patch keeps the
Expand Down
2 changes: 1 addition & 1 deletion config/overlays/dev/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ components:
- ../../components/observability

images:
- name: ghcr.io/datum-cloud/ipam-apiserver
- name: ghcr.io/milo-os/ipam
newName: ipam-apiserver
newTag: dev

Expand Down
15 changes: 15 additions & 0 deletions config/overlays/test-infra/anonymous-rbac.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
---
# Dev-only: grant anonymous users read access via the apiservice so kubectl
# proxy / curl loops work without bearer tokens. Do NOT apply outside dev.
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: ipam-dev-anonymous
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: cluster-admin
subjects:
- apiGroup: rbac.authorization.k8s.io
kind: User
name: system:anonymous
14 changes: 12 additions & 2 deletions config/overlays/test-infra/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,30 @@ namespace: ipam-system

resources:
- ../../base
- secret.yaml
- anonymous-rbac.yaml
- tls-certificate.yaml

components:
- ../../components/namespace
- ../../components/api-registration
- ../../components/cert-manager-ca
- ../../components/postgres

images:
- name: ghcr.io/datum-cloud/ipam-apiserver
- name: ghcr.io/milo-os/ipam
newName: ipam-apiserver
newTag: dev

patches:
- path: patches/apiservice-patch.yaml
- path: patches/deployment-patch.yaml
target:
kind: Deployment
name: ipam-apiserver
- path: patches/tls-volume-patch.yaml
target:
kind: Deployment
name: ipam-apiserver

labels:
- includeSelectors: false
Expand Down
18 changes: 18 additions & 0 deletions config/overlays/test-infra/patches/deployment-patch.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
---
# In kind, images are loaded via `kind load docker-image` and are never
# pulled from a registry. Set Never to prevent Kyverno or other admission
# controllers from mutating imagePullPolicy to Always, which would fail
# because ipam-apiserver:dev doesn't exist on any public registry.
apiVersion: apps/v1
kind: Deployment
metadata:
name: ipam-apiserver
spec:
template:
spec:
initContainers:
- name: migrate
imagePullPolicy: Never
containers:
- name: apiserver
imagePullPolicy: Never
6 changes: 6 additions & 0 deletions config/overlays/test-infra/patches/tls-volume-patch.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
- op: replace
path: /spec/template/spec/volumes/0
value:
name: tls-certs
secret:
secretName: ipam-tls
13 changes: 13 additions & 0 deletions config/overlays/test-infra/secret.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
apiVersion: v1
kind: Secret
metadata:
name: postgres-credentials
namespace: ipam-system
labels:
app.kubernetes.io/name: postgres
app.kubernetes.io/component: database
app.kubernetes.io/part-of: ipam.miloapis.com
type: Opaque
stringData:
dsn: "postgres://ipam:devpassword@postgres-postgresql.ipam-system.svc.cluster.local:5432/ipam?sslmode=disable"
password: "devpassword"
22 changes: 22 additions & 0 deletions config/overlays/test-infra/tls-certificate.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
---
# cert-manager Certificate (not CSI driver) so the built-in cert-manager
# approver auto-approves the CertificateRequest and writes the TLS secret.
# The CSI driver's CertificateRequests are not approved by the built-in
# approver, causing pods to hang in Init:0/1.
apiVersion: cert-manager.io/v1
kind: Certificate
metadata:
name: ipam-tls
namespace: ipam-system
spec:
secretName: ipam-tls
duration: 24h
renewBefore: 1h
dnsNames:
- ipam-apiserver
- ipam-apiserver.ipam-system
- ipam-apiserver.ipam-system.svc
- ipam-apiserver.ipam-system.svc.cluster.local
issuerRef:
name: selfsigned-cluster-issuer
kind: ClusterIssuer
4 changes: 0 additions & 4 deletions internal/allocator/interface.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,6 @@ type PrefixAllocator interface {
// pool identified by poolKey and returns its CIDR string.
AllocatePrefix(ctx context.Context, tx pgx.Tx, poolKey string, prefixLen int, ipFamily string, claimKey string, ownerProject string) (string, error)

// AllocateSingleAddress reserves a single host address within the pool
// identified by poolKey and returns its IP string (without prefix).
AllocateSingleAddress(ctx context.Context, tx pgx.Tx, poolKey string, ipFamily string, claimKey string, ownerProject string) (string, error)

// InsertObject writes a generic API object row into ipam_objects inside
// the supplied transaction and returns the assigned resource_version.
// Callers use the returned rv to populate metadata.resourceVersion on
Expand Down
83 changes: 37 additions & 46 deletions internal/allocator/prefix.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,58 +85,15 @@ func (a *PostgresPrefixAllocator) AllocatePrefix(ctx context.Context, tx pgx.Tx,
// set, so the post-allocation utilization can be computed from data
// already in scope without an extra DB round-trip.
updated := append(append([]net.IPNet(nil), existing...), *cidr)
if err := persistPoolCapacity(ctx, tx, pool, poolKey, parents, updated); err != nil {
return "", fmt.Errorf("update pool capacity after allocation: %w", err)
}
publishPrefixUtilization(poolKey, ipFamily, parents, updated)

klog.V(2).InfoS("Allocated prefix", "pool", poolKey, "cidr", cidr.String(), "claim", claimKey, "ownerProject", ownerProject)
return cidr.String(), nil
}

// AllocateSingleAddress implements PrefixAllocator.AllocateSingleAddress.
func (a *PostgresPrefixAllocator) AllocateSingleAddress(ctx context.Context, tx pgx.Tx, poolKey string, ipFamily string, claimKey string, ownerProject string) (string, error) {
pool, err := lockAndDecodePool(ctx, tx, poolKey)
if err != nil {
return "", err
}

parents, err := parsePoolCIDR(pool)
if err != nil {
return "", err
}

existing, err := loadExistingAllocations(ctx, tx, poolKey)
if err != nil {
return "", err
}

hostBits := 32
if ipFamily == "IPv6" {
hostBits = 128
}

strategy := allocation.Strategy(pool.Spec.Allocation.Strategy)
if strategy == "" {
strategy = allocation.FirstFit
}

cidr, err := allocation.FindFirstAvailableBlock(parents, existing, hostBits, strategy)
if err != nil {
if errors.Is(err, allocation.ErrPoolExhausted) {
return "", ErrPoolExhausted
}
return "", fmt.Errorf("compute next address: %w", err)
}

if err := insertPrefixAllocation(ctx, tx, poolKey, cidr.String(), claimKey, ipFamily, false, ownerProject); err != nil {
return "", err
}

updated := append(append([]net.IPNet(nil), existing...), *cidr)
publishPrefixUtilization(poolKey, ipFamily, parents, updated)

klog.V(2).InfoS("Allocated single address", "pool", poolKey, "addr", cidr.IP.String(), "claim", claimKey, "ownerProject", ownerProject)
return cidr.IP.String(), nil
}

// InsertObject implements PrefixAllocator.InsertObject.
func (a *PostgresPrefixAllocator) InsertObject(ctx context.Context, tx pgx.Tx, key, kind, namespace, name string, data []byte) (int64, error) {
return insertObject(ctx, tx, key, kind, namespace, name, data)
Expand Down Expand Up @@ -244,11 +201,45 @@ func (a *PostgresPrefixAllocator) Release(ctx context.Context, tx pgx.Tx, claimK
if perr != nil {
return fmt.Errorf("reload allocations after release: %w", perr)
}
if perr := persistPoolCapacity(ctx, tx, pool, r.poolKey, parents, remaining); perr != nil {
return fmt.Errorf("update pool capacity after release: %w", perr)
}
publishPrefixUtilization(r.poolKey, r.ipFamily, parents, remaining)
}
return nil
}

// persistPoolCapacity recomputes Total/Allocated/Available for the pool and
// writes the updated pool object back to ipam_objects (+ MODIFIED changelog)
// within the current transaction. Must be called inside the transaction that
// inserted or deleted the allocation row so the capacity stays consistent.
func persistPoolCapacity(ctx context.Context, tx pgx.Tx, pool *ipamv1alpha1.IPPrefix, poolKey string, parents, allocations []net.IPNet) error {
var total, allocated int64
for _, p := range parents {
total += allocation.CountAddresses(p)
}
for _, a := range allocations {
allocated += allocation.CountAddresses(a)
}
available := total - allocated
if available < 0 {
available = 0
}
pool.Status.Capacity = ipamv1alpha1.PrefixCapacity{
Total: total,
Allocated: allocated,
Available: available,
}
data, err := json.Marshal(pool)
if err != nil {
return fmt.Errorf("marshal pool: %w", err)
}
if _, err := updateObject(ctx, tx, poolKey, data); err != nil {
return fmt.Errorf("write pool: %w", err)
}
return nil
}

// DeleteObject implements PrefixAllocator.DeleteObject.
func (a *PostgresPrefixAllocator) DeleteObject(ctx context.Context, tx pgx.Tx, key string) (int64, error) {
return deleteObject(ctx, tx, key)
Expand Down
Loading
Loading