Skip to content

Commit eda0942

Browse files
committed
add basic metrics for NRI framework
Introduces basic runtime metrics to improve observability and satisfy v1.0 requirements. Exposes three primary metrics via a new `Metrics` interface in pkg/adaptation: - `nri_plugin_count` (Gauge): The number of active NRI plugins currently connected. - `nri_plugin_invocations_total` (CounterVec): The total number of plugin invocations. Labeled by `plugin` name, `operation` (e.g., "CreateContainer", "Synchronize"), and `status` ("success" or "failure"). - `nri_plugin_adjustments_total` (CounterVec): The total number of container/pod adjustments returned by a plugin. Labeled by `plugin` name, `operation`, and `adjustment_type` ("adjust", "update", or "evict"). To avoid burying state change events under a generic operation name, `StateChange` metrics append the specific event type using human-readable strings (e.g., `"StateChange/StartContainer"`). These metrics are updated during plugin invocations and container lifecycle events. A `WithMetrics` option is exposed to allow container runtimes to attach these to their existing setups (e.g. Prometheus registries). Ref: #272 Signed-off-by: Chris Henzie <chrishenzie@gmail.com> Assisted-by: gemini-cli
1 parent 9b9c0b8 commit eda0942

4 files changed

Lines changed: 208 additions & 2 deletions

File tree

pkg/adaptation/adaptation.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ type Adaptation struct {
7373
builtin []*builtin.BuiltinPlugin
7474
syncLock sync.RWMutex
7575
wasmService *api.PluginPlugin
76+
metrics Metrics
7677
}
7778

7879
var (
@@ -135,6 +136,16 @@ func WithBuiltinPlugins(plugins ...*builtin.BuiltinPlugin) Option {
135136
}
136137
}
137138

139+
// WithMetrics allows consumers to register an implementation of the Metrics interface.
140+
func WithMetrics(m Metrics) Option {
141+
return func(r *Adaptation) error {
142+
if m != nil {
143+
r.metrics = m
144+
}
145+
return nil
146+
}
147+
}
148+
138149
// WithDefaultValidator sets up builtin validator plugin if it is configured.
139150
func WithDefaultValidator(cfg *validator.DefaultValidatorConfig) Option {
140151
return func(r *Adaptation) error {
@@ -174,6 +185,7 @@ func New(name, version string, syncFn SyncFn, updateFn UpdateFn, opts ...Option)
174185
socketPath: DefaultSocketPath,
175186
syncLock: sync.RWMutex{},
176187
wasmService: wasmService,
188+
metrics: &noopMetrics{},
177189
}
178190

179191
for _, o := range opts {
@@ -537,6 +549,7 @@ func (r *Adaptation) removeClosedPlugins() {
537549

538550
r.plugins = active
539551
r.validators = validators
552+
r.metrics.UpdatePluginCount(len(r.plugins))
540553
}
541554

542555
func (r *Adaptation) startListener() error {
@@ -604,6 +617,7 @@ func (r *Adaptation) acceptPluginConnections(l net.Listener) error {
604617
r.validators = append(r.validators, p)
605618
}
606619
r.sortPlugins()
620+
r.metrics.UpdatePluginCount(len(r.plugins))
607621
r.Unlock()
608622
log.Infof(ctx, "plugin %q connected and synchronized", p.name())
609623
}

pkg/adaptation/metrics.go

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
/*
2+
Copyright The containerd Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package adaptation
18+
19+
// Metrics defines the interface that a consumer can implement to collect
20+
// and emit metrics regarding NRI plugin activity.
21+
type Metrics interface {
22+
// RecordPluginInvocation records the invocation of a plugin for a specific operation.
23+
RecordPluginInvocation(pluginName, operation string, err error)
24+
25+
// RecordPluginAdjustments records the adjustments returned by a plugin.
26+
RecordPluginAdjustments(pluginName, operation string, adjust *ContainerAdjustment, updates, evicts int)
27+
28+
// UpdatePluginCount sets the number of currently active plugins.
29+
UpdatePluginCount(count int)
30+
}
31+
32+
// noopMetrics provides a default, no-operation implementation of the Metrics interface.
33+
type noopMetrics struct{}
34+
35+
var _ Metrics = (*noopMetrics)(nil)
36+
37+
func (n *noopMetrics) RecordPluginInvocation(_, _ string, _ error) {}
38+
func (n *noopMetrics) RecordPluginAdjustments(_, _ string, _ *ContainerAdjustment, _, _ int) {}
39+
func (n *noopMetrics) UpdatePluginCount(_ int) {}

pkg/adaptation/metrics_test.go

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
/*
2+
Copyright The containerd Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package adaptation
18+
19+
import (
20+
"context"
21+
"testing"
22+
23+
"github.com/containerd/nri/pkg/api"
24+
"github.com/stretchr/testify/assert"
25+
)
26+
27+
// mockMetrics provides a simple implementation of the Metrics interface for testing.
28+
type mockMetrics struct {
29+
pluginCount int
30+
invocations []mockInvocation
31+
adjustments []mockAdjustment
32+
}
33+
34+
type mockInvocation struct {
35+
pluginName string
36+
operation string
37+
err error
38+
}
39+
40+
type mockAdjustment struct {
41+
pluginName string
42+
operation string
43+
adjust *ContainerAdjustment
44+
updates int
45+
evicts int
46+
}
47+
48+
func (m *mockMetrics) RecordPluginInvocation(pluginName, operation string, err error) {
49+
m.invocations = append(m.invocations, mockInvocation{
50+
pluginName: pluginName,
51+
operation: operation,
52+
err: err,
53+
})
54+
}
55+
56+
func (m *mockMetrics) RecordPluginAdjustments(pluginName, operation string, adjust *ContainerAdjustment, updates, evicts int) {
57+
m.adjustments = append(m.adjustments, mockAdjustment{
58+
pluginName: pluginName,
59+
operation: operation,
60+
adjust: adjust,
61+
updates: updates,
62+
evicts: evicts,
63+
})
64+
}
65+
66+
func (m *mockMetrics) UpdatePluginCount(count int) {
67+
m.pluginCount = count
68+
}
69+
70+
func TestMockMetrics(t *testing.T) {
71+
m := &mockMetrics{}
72+
73+
m.RecordPluginInvocation("test-plugin", "CreateContainer", nil)
74+
assert.Len(t, m.invocations, 1)
75+
assert.Equal(t, "test-plugin", m.invocations[0].pluginName)
76+
assert.Equal(t, "CreateContainer", m.invocations[0].operation)
77+
assert.Nil(t, m.invocations[0].err)
78+
79+
m.RecordPluginAdjustments("test-plugin", "CreateContainer", nil, 2, 3)
80+
assert.Len(t, m.adjustments, 1)
81+
assert.Equal(t, "test-plugin", m.adjustments[0].pluginName)
82+
assert.Equal(t, "CreateContainer", m.adjustments[0].operation)
83+
assert.Nil(t, m.adjustments[0].adjust)
84+
assert.Equal(t, 2, m.adjustments[0].updates)
85+
assert.Equal(t, 3, m.adjustments[0].evicts)
86+
87+
m.UpdatePluginCount(42)
88+
assert.Equal(t, 42, m.pluginCount)
89+
}
90+
91+
type dummyPlugin struct {
92+
api.PluginService
93+
}
94+
95+
func (d *dummyPlugin) StateChange(_ context.Context, _ *api.StateChangeEvent) (*api.Empty, error) {
96+
return &api.Empty{}, nil
97+
}
98+
99+
func TestPluginStateChangeMetrics(t *testing.T) {
100+
m := &mockMetrics{}
101+
adapt := &Adaptation{metrics: m}
102+
103+
impl := &pluginType{builtinImpl: &dummyPlugin{}}
104+
105+
var events api.EventMask
106+
events.Set(api.Event_START_CONTAINER)
107+
108+
p := &plugin{
109+
r: adapt,
110+
events: events,
111+
impl: impl,
112+
idx: "00",
113+
base: "test-plugin",
114+
}
115+
116+
evt := &api.StateChangeEvent{
117+
Event: api.Event_START_CONTAINER,
118+
}
119+
120+
err := p.StateChange(context.Background(), evt)
121+
assert.Nil(t, err)
122+
123+
assert.Len(t, m.invocations, 1)
124+
assert.Equal(t, "00-test-plugin", m.invocations[0].pluginName)
125+
assert.Equal(t, "StateChange/StartContainer", m.invocations[0].operation)
126+
assert.Nil(t, m.invocations[0].err)
127+
}

pkg/adaptation/plugin.go

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -522,7 +522,11 @@ func (p *plugin) synchronize(ctx context.Context, pods []*PodSandbox, containers
522522
len(req.Pods), len(podsToSend), len(req.Containers), len(ctrsToSend), req.More)
523523

524524
rpl, err = p.impl.Synchronize(ctx, req)
525+
p.r.metrics.RecordPluginInvocation(p.name(), "Synchronize", err)
525526
if err == nil {
527+
if rpl != nil {
528+
p.r.metrics.RecordPluginAdjustments(p.name(), "Synchronize", nil, len(rpl.Update), 0)
529+
}
526530
if !req.More {
527531
break
528532
}
@@ -606,6 +610,7 @@ func (p *plugin) createContainer(ctx context.Context, req *CreateContainerReques
606610
defer cancel()
607611

608612
rpl, err := p.impl.CreateContainer(ctx, req)
613+
p.r.metrics.RecordPluginInvocation(p.name(), "CreateContainer", err)
609614
if err != nil {
610615
if isFatalError(err) {
611616
log.Errorf(ctx, "closing plugin %s, failed to handle CreateContainer request: %v",
@@ -615,6 +620,9 @@ func (p *plugin) createContainer(ctx context.Context, req *CreateContainerReques
615620
}
616621
return nil, err
617622
}
623+
if rpl != nil {
624+
p.r.metrics.RecordPluginAdjustments(p.name(), "CreateContainer", rpl.Adjust, len(rpl.Update), len(rpl.Evict))
625+
}
618626

619627
return rpl, nil
620628
}
@@ -629,6 +637,7 @@ func (p *plugin) updateContainer(ctx context.Context, req *UpdateContainerReques
629637
defer cancel()
630638

631639
rpl, err := p.impl.UpdateContainer(ctx, req)
640+
p.r.metrics.RecordPluginInvocation(p.name(), "UpdateContainer", err)
632641
if err != nil {
633642
if isFatalError(err) {
634643
log.Errorf(ctx, "closing plugin %s, failed to handle UpdateContainer request: %v",
@@ -638,6 +647,9 @@ func (p *plugin) updateContainer(ctx context.Context, req *UpdateContainerReques
638647
}
639648
return nil, err
640649
}
650+
if rpl != nil {
651+
p.r.metrics.RecordPluginAdjustments(p.name(), "UpdateContainer", nil, len(rpl.Update), len(rpl.Evict))
652+
}
641653

642654
return rpl, nil
643655
}
@@ -652,6 +664,7 @@ func (p *plugin) stopContainer(ctx context.Context, req *StopContainerRequest) (
652664
defer cancel()
653665

654666
rpl, err = p.impl.StopContainer(ctx, req)
667+
p.r.metrics.RecordPluginInvocation(p.name(), "StopContainer", err)
655668
if err != nil {
656669
if isFatalError(err) {
657670
log.Errorf(ctx, "closing plugin %s, failed to handle StopContainer request: %v",
@@ -661,6 +674,9 @@ func (p *plugin) stopContainer(ctx context.Context, req *StopContainerRequest) (
661674
}
662675
return nil, err
663676
}
677+
if rpl != nil {
678+
p.r.metrics.RecordPluginAdjustments(p.name(), "StopContainer", nil, len(rpl.Update), 0)
679+
}
664680

665681
return rpl, nil
666682
}
@@ -673,7 +689,9 @@ func (p *plugin) updatePodSandbox(ctx context.Context, req *UpdatePodSandboxRequ
673689
ctx, cancel := context.WithTimeout(ctx, getPluginRequestTimeout())
674690
defer cancel()
675691

676-
if _, err := p.impl.UpdatePodSandbox(ctx, req); err != nil {
692+
_, err := p.impl.UpdatePodSandbox(ctx, req)
693+
p.r.metrics.RecordPluginInvocation(p.name(), "UpdatePodSandbox", err)
694+
if err != nil {
677695
if isFatalError(err) {
678696
log.Errorf(ctx, "closing plugin %s, failed to handle event %d: %v",
679697
p.name(), Event_UPDATE_POD_SANDBOX, err)
@@ -695,7 +713,14 @@ func (p *plugin) StateChange(ctx context.Context, evt *StateChangeEvent) (err er
695713
ctx, cancel := context.WithTimeout(ctx, getPluginRequestTimeout())
696714
defer cancel()
697715

698-
if err = p.impl.StateChange(ctx, evt); err != nil {
716+
err = p.impl.StateChange(ctx, evt)
717+
718+
var mask api.EventMask
719+
mask.Set(evt.Event)
720+
operation := fmt.Sprintf("StateChange/%s", mask.PrettyString())
721+
p.r.metrics.RecordPluginInvocation(p.name(), operation, err)
722+
723+
if err != nil {
699724
if isFatalError(err) {
700725
log.Errorf(ctx, "closing plugin %s, failed to handle event %d: %v",
701726
p.name(), evt.Event, err)
@@ -717,6 +742,7 @@ func (p *plugin) ValidateContainerAdjustment(ctx context.Context, req *ValidateC
717742
defer cancel()
718743

719744
rpl, err := p.impl.ValidateContainerAdjustment(ctx, req)
745+
p.r.metrics.RecordPluginInvocation(p.name(), "ValidateContainerAdjustment", err)
720746
if err != nil {
721747
if isFatalError(err) {
722748
log.Errorf(ctx, "closing plugin %s, failed to validate request: %v", p.name(), err)

0 commit comments

Comments
 (0)