Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
0a9d14b
Fix flaky TestApplyCRDuringCRDFinalization test
aviralgarg05 Jan 13, 2026
bd39991
fake client-go: un-deprecate NewSimpleClientset
pohly Jan 23, 2026
2371267
add dockerized go cache to `make clean`
BenTheElder Feb 10, 2026
d05c506
Fix flake TestDeviceTaintRule test by adjusting event hanlder status …
Karthik-K-N Dec 5, 2025
1c0c911
kubeadm: do not add learner member to etcd client endpoints
pacoxu Feb 26, 2026
02115b6
Update CHANGELOG/CHANGELOG-1.35.md for v1.35.2
k8s-release-robot Feb 26, 2026
946b7f7
Merge pull request #136387 from vikasbolla/automated-cherry-pick-of-#…
k8s-ci-robot Mar 3, 2026
659d489
dra: stabilize DeviceBindingConditions BasicFlow by creating without-…
tsj-30 Feb 26, 2026
b9be60a
start scheduler after creating binding/non-binding slices
tsj-30 Feb 26, 2026
4e48345
Merge pull request #136929 from BenTheElder/1.35-clean-fix
k8s-ci-robot Mar 3, 2026
61003ad
drop publishing rules from dependencies.yaml on release branch
BenTheElder Mar 3, 2026
60ebb8a
Merge pull request #137366 from tsj-30/automated-cherry-pick-of-#1372…
k8s-ci-robot Mar 4, 2026
af6488c
Use localhost image reference in PodObservedGenerationTracking test
Chandan9112 Feb 26, 2026
a0e5f1a
Merge pull request #136903 from pohly/automated-cherry-pick-of-#13645…
k8s-ci-robot Mar 4, 2026
b57e146
cmd/kubeadm: ignore EINVAL error during unmount
fuweid Mar 6, 2026
d595aaf
Merge pull request #137047 from vikasbolla/automated-cherry-pick-of-#…
k8s-ci-robot Mar 11, 2026
77d8b63
Merge pull request #137257 from ahrtr/automated-cherry-pick-of-#13725…
k8s-ci-robot Mar 11, 2026
fe6a745
Merge pull request #137382 from BenTheElder/no-rules
k8s-ci-robot Mar 11, 2026
f503429
Merge pull request #137400 from Chandan9112/automated-cherry-pick-of-…
k8s-ci-robot Mar 11, 2026
b6f632b
Merge pull request #137569 from fuweid/cp-1374942-135
k8s-ci-robot Mar 11, 2026
6c1cd99
Release commit for Kubernetes v1.35.3
k8s-release-robot Mar 18, 2026
d305427
Merge tag 'v1.35.3' into release-4.22
jubittajohn Mar 20, 2026
39a9d96
UPSTREAM: <drop>: hack/update-vendor.sh, make update and update image
jubittajohn Mar 20, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
203 changes: 151 additions & 52 deletions CHANGELOG/CHANGELOG-1.35.md

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions build/common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,9 @@ function kube::build::clean() {
if [[ -d "${LOCAL_OUTPUT_ROOT}/local/go/cache" ]]; then
chmod -R +w "${LOCAL_OUTPUT_ROOT}/local/go/cache"
fi
if [[ -d "${LOCAL_OUTPUT_ROOT}/dockerized/go/cache" ]]; then
chmod -R +w "${LOCAL_OUTPUT_ROOT}/dockerized/go/cache"
fi
rm -rf "${LOCAL_OUTPUT_ROOT}"
fi
}
Expand Down
9 changes: 0 additions & 9 deletions build/dependencies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -114,15 +114,6 @@ dependencies:
- path: cluster/images/etcd/Makefile
match: 'GOLANG_VERSION := \d+.\d+(alpha|beta|rc)?\.?(\d+)?'

# Golang
# TODO: this should really be eliminated and controlled by .go-version
- name: "golang: upstream version"
version: 1.25.7
refPaths:
- path: .go-version
- path: staging/publishing/rules.yaml
match: 'default-go-version\: \d+.\d+(alpha|beta|rc)?\.?(\d+)?'

# This should ideally be updated to match the golang version
# but we can dynamically fetch go if the base image is out of date.
# This allows us to ship go updates more quickly.
Expand Down
6 changes: 6 additions & 0 deletions cmd/kubeadm/app/cmd/phases/reset/unmount_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,12 @@ func unmountKubeletDirectory(kubeletRunDirectory string, flags []string) error {
}
klog.V(5).Infof("[reset] Unmounting %q", m[1])
if err := syscall.Unmount(m[1], flagsInt); err != nil {
// EINVAL is expected here if a duplicate mount entry
// was already unmounted via its shared peer.
if err == syscall.EINVAL {
klog.Warningf("[reset] Ignoring EINVAL error while unmounting %q", m[1])
continue
}
errList = append(errList, errors.WithMessagef(err, "failed to unmount %q", m[1]))
}
}
Expand Down
6 changes: 4 additions & 2 deletions cmd/kubeadm/app/util/etcd/etcd.go
Original file line number Diff line number Diff line change
Expand Up @@ -533,8 +533,10 @@ func (c *Client) addMember(name string, peerAddrs string, isLearner bool) ([]Mem
ret = append(ret, Member{Name: memberName, PeerURL: m.PeerURLs[0]})
}

// Add the new member client address to the list of endpoints
c.Endpoints = append(c.Endpoints, GetClientURLByIP(parsedPeerAddrs.Hostname()))
if !isLearner {
// Add the new member client address to the list of endpoints
c.Endpoints = append(c.Endpoints, GetClientURLByIP(parsedPeerAddrs.Hostname()))
}

return ret, nil
}
Expand Down
2 changes: 1 addition & 1 deletion openshift-hack/images/hyperkube/Dockerfile.rhel
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,4 @@ COPY --from=builder /tmp/build/* /usr/bin/
LABEL io.k8s.display-name="OpenShift Kubernetes Server Commands" \
io.k8s.description="OpenShift is a platform for developing, building, and deploying containerized applications." \
io.openshift.tags="openshift,hyperkube" \
io.openshift.build.versions="kubernetes=1.35.2"
io.openshift.build.versions="kubernetes=1.35.3"
28 changes: 21 additions & 7 deletions pkg/controller/devicetainteviction/device_taint_eviction.go
Original file line number Diff line number Diff line change
Expand Up @@ -434,6 +434,7 @@ func (tc *Controller) maybeDeletePod(ctx context.Context, podRef tainteviction.N
// Doing this immediately is not useful because
// it would just race with the informers update
// (rule status reads from cache!).
tc.logger.V(5).Info("Adding delayed status update because of pod eviction", "deviceTaintRule", klog.KObj(reason.rule), "delay", ruleStatusPeriod)
tc.workqueue.AddAfter(workItemForRule(reason.rule), ruleStatusPeriod)
}
}
Expand Down Expand Up @@ -1016,7 +1017,14 @@ func (tc *Controller) Run(ctx context.Context, numWorkers int) error {
func (tc *Controller) evictPod(podRef tainteviction.NamespacedObject, eviction evictionAndReason) {
tc.deletePodAt[podRef] = eviction
now := time.Now()
tc.workqueue.AddAfter(workItem{podRef: podRef}, eviction.when.Sub(now))
delay := eviction.when.Sub(now)
if delay <= 0 {
tc.logger.V(3).Info("Adding immediate pod eviction", "pod", podRef, "eviction", eviction)
tc.workqueue.Add(workItem{podRef: podRef})
} else {
tc.logger.V(3).Info("Adding delayed pod eviction", "pod", podRef, "eviction", eviction, "delay", delay)
tc.workqueue.AddAfter(workItem{podRef: podRef}, delay)
}

if tc.evictPodHook != nil {
tc.evictPodHook(podRef, eviction)
Expand Down Expand Up @@ -1275,7 +1283,8 @@ func (tc *Controller) handleRuleChange(oldRule, newRule *resourcealpha.DeviceTai
}

if oldRule == nil {
// Update the status at least once.
// Update the status at least once, immediately and before evicting any pods.
tc.logger.V(5).Info("Adding immediate status update because of new rule", "deviceTaintRule", klog.KObj(newRule))
tc.workqueue.Add(workItemForRule(newRule))
Comment on lines +1286 to 1288
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Queueing the rule item first does not guarantee the status update runs first.

These lines only change enqueue order. Once Run() uses multiple workers, a pod work item can still execute before the rule-status item after the mutex is released, so the promised “before evicting any pods” transition remains racy. The test workaround in pkg/controller/devicetainteviction/device_taint_eviction_test.go Lines 2429-2431 confirms the same ordering problem. If that sequencing matters, it needs explicit serialization/prioritization instead of relying on shared-queue order alone.

Also applies to: 1301-1307

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@pkg/controller/devicetainteviction/device_taint_eviction.go` around lines
1286 - 1288, The current approach only enqueues the rule with
tc.workqueue.Add(workItemForRule(newRule)) but does not guarantee its processing
runs before pod eviction work items; to fix, perform the required rule-status
update synchronously (call the controller method that updates rule status
directly, e.g., tc.syncRuleStatus or the same status-update routine used by the
worker) before releasing the mutex/enqueuing pods, or implement explicit
prioritization/serialization (e.g., a dedicated rule-priority queue or a
blocking wait until the rule-status work item is processed) instead of relying
on workqueue order; update both places where workItemForRule(newRule) is used
(the block around the tc.logger.V(5).Info call and the similar section at the
later occurrence) to ensure the status transition happens deterministically
before any pod-eviction work runs.

}

Expand All @@ -1289,9 +1298,13 @@ func (tc *Controller) handleRuleChange(oldRule, newRule *resourcealpha.DeviceTai

if oldRule != nil &&
newRule != nil &&
oldRule.UID == newRule.UID &&
apiequality.Semantic.DeepEqual(&oldRule.Spec, &newRule.Spec) {
return
oldRule.UID == newRule.UID {
if apiequality.Semantic.DeepEqual(&oldRule.Spec, &newRule.Spec) {
return
}
// Update the status at least once, immediately and before evicting any pods.
tc.logger.V(5).Info("Adding immediate status update because of modified rule spec", "deviceTaintRule", klog.KObj(newRule))
tc.workqueue.Add(workItemForRule(newRule))
}

// Rule spec changes should be rare. Simply do a brute-force re-evaluation of all allocated claims.
Expand Down Expand Up @@ -1475,13 +1488,14 @@ func (tc *Controller) handlePod(pod *v1.Pod) {
return
}

tc.logger.V(3).Info("Going to evict pod", "pod", podRef, "eviction", eviction)
tc.evictPod(podRef, *eviction)

// If any reason is because of a taint, then eviction is in progress and the status may need to be updated.
// But don't do it immediately because more pod changes may be coming in.
for _, reason := range eviction.reason {
if reason.rule != nil {
tc.workqueue.Add(workItemForRule(reason.rule))
tc.logger.V(5).Info("Adding delayed status update because of pod change", "deviceTaintRule", klog.KObj(reason.rule), "delay", ruleStatusPeriod)
tc.workqueue.AddAfter(workItemForRule(reason.rule), ruleStatusPeriod)
}
}
}
Expand Down
Loading