-
-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathadmin_observability_prometheus.go
More file actions
125 lines (106 loc) · 4.81 KB
/
admin_observability_prometheus.go
File metadata and controls
125 lines (106 loc) · 4.81 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
package worker
import (
"slices"
"strconv"
"strings"
)
const (
adminMetricsPromContentType = "text/plain; version=0.0.4; charset=utf-8"
adminMetricsInitialLines = 128
)
// Prometheus renders the current observability snapshot in Prometheus text format.
func (collector *AdminObservability) Prometheus() string {
snapshot := collector.Snapshot()
lines := make([]string, 0, adminMetricsInitialLines)
lines = append(lines,
"# HELP worker_admin_uptime_seconds Admin service uptime in seconds.",
"# TYPE worker_admin_uptime_seconds gauge",
"worker_admin_uptime_seconds "+strconv.FormatInt(snapshot.UptimeSec, 10),
"# HELP worker_admin_http_calls_total Total HTTP requests per route.",
"# TYPE worker_admin_http_calls_total counter",
"# HELP worker_admin_http_errors_total Total HTTP error responses per route.",
"# TYPE worker_admin_http_errors_total counter",
"# HELP worker_admin_http_duration_milliseconds_sum Total HTTP latency in milliseconds per route.",
"# TYPE worker_admin_http_duration_milliseconds_sum counter",
"# HELP worker_admin_http_duration_milliseconds_max Maximum HTTP latency in milliseconds per route.",
"# TYPE worker_admin_http_duration_milliseconds_max gauge",
"# HELP worker_admin_grpc_calls_total Total gRPC requests per method.",
"# TYPE worker_admin_grpc_calls_total counter",
"# HELP worker_admin_grpc_errors_total Total gRPC errors per method.",
"# TYPE worker_admin_grpc_errors_total counter",
"# HELP worker_admin_grpc_duration_milliseconds_sum Total gRPC latency in milliseconds per method.",
"# TYPE worker_admin_grpc_duration_milliseconds_sum counter",
"# HELP worker_admin_grpc_duration_milliseconds_max Maximum gRPC latency in milliseconds per method.",
"# TYPE worker_admin_grpc_duration_milliseconds_max gauge",
"# HELP worker_admin_jobs_running Current number of running jobs.",
"# TYPE worker_admin_jobs_running gauge",
"# HELP worker_admin_jobs_completed_total Total completed jobs.",
"# TYPE worker_admin_jobs_completed_total counter",
"# HELP worker_admin_jobs_failed_total Total failed jobs.",
"# TYPE worker_admin_jobs_failed_total counter",
"# HELP worker_admin_jobs_duration_milliseconds_sum Total job runtime in milliseconds.",
"# TYPE worker_admin_jobs_duration_milliseconds_sum counter",
"# HELP worker_admin_jobs_duration_milliseconds_max Maximum job runtime in milliseconds.",
"# TYPE worker_admin_jobs_duration_milliseconds_max gauge",
)
httpKeys := sortedKeys(snapshot.HTTP)
for _, key := range httpKeys {
stat := snapshot.HTTP[key]
method, route := splitHTTPMetricKey(key)
labels := `method="` + promEscape(method) + `",route="` + promEscape(route) + `",last_code="` + promEscape(stat.LastCode) + `"`
lines = append(lines,
`worker_admin_http_calls_total{`+labels+`} `+strconv.FormatInt(stat.Calls, 10),
`worker_admin_http_errors_total{`+labels+`} `+strconv.FormatInt(stat.Errors, 10),
`worker_admin_http_duration_milliseconds_sum{`+labels+`} `+strconv.FormatInt(stat.TotalMs, 10),
`worker_admin_http_duration_milliseconds_max{`+labels+`} `+strconv.FormatInt(stat.MaxMs, 10),
)
}
grpcKeys := sortedKeys(snapshot.GRPC)
for _, key := range grpcKeys {
stat := snapshot.GRPC[key]
labels := `method="` + promEscape(key) + `",last_code="` + promEscape(stat.LastCode) + `"`
lines = append(lines,
`worker_admin_grpc_calls_total{`+labels+`} `+strconv.FormatInt(stat.Calls, 10),
`worker_admin_grpc_errors_total{`+labels+`} `+strconv.FormatInt(stat.Errors, 10),
`worker_admin_grpc_duration_milliseconds_sum{`+labels+`} `+strconv.FormatInt(stat.TotalMs, 10),
`worker_admin_grpc_duration_milliseconds_max{`+labels+`} `+strconv.FormatInt(stat.MaxMs, 10),
)
}
lines = append(lines,
"worker_admin_jobs_running "+strconv.FormatInt(snapshot.Jobs.Running, 10),
"worker_admin_jobs_completed_total "+strconv.FormatInt(snapshot.Jobs.Completed, 10),
"worker_admin_jobs_failed_total "+strconv.FormatInt(snapshot.Jobs.Failed, 10),
"worker_admin_jobs_duration_milliseconds_sum "+strconv.FormatInt(snapshot.Jobs.TotalMs, 10),
"worker_admin_jobs_duration_milliseconds_max "+strconv.FormatInt(snapshot.Jobs.MaxMs, 10),
)
return strings.Join(lines, "\n") + "\n"
}
func sortedKeys[T any](values map[string]T) []string {
keys := make([]string, 0, len(values))
for key := range values {
keys = append(keys, key)
}
slices.Sort(keys)
return keys
}
func splitHTTPMetricKey(key string) (method, route string) {
method, route, ok := strings.Cut(key, " ")
if !ok {
return "unknown", key
}
method = strings.TrimSpace(method)
route = strings.TrimSpace(route)
if method == "" {
method = "unknown"
}
if route == "" {
route = "/"
}
return method, route
}
func promEscape(value string) string {
value = strings.ReplaceAll(value, `\`, `\\`)
value = strings.ReplaceAll(value, "\n", `\n`)
value = strings.ReplaceAll(value, `"`, `\"`)
return value
}