Skip to content

Commit 9b7f5ae

Browse files
Merge pull request cri-o#5487 from swghosh/metrics-refactor
metrics: introduce new metrics matching Prometheus best practices
2 parents cbcc270 + dba27ab commit 9b7f5ae

File tree

8 files changed

+374
-80
lines changed

8 files changed

+374
-80
lines changed

docs/crio.8.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -278,7 +278,7 @@ crio [GLOBAL OPTIONS] command [COMMAND OPTIONS] [ARGUMENTS...]
278278

279279
**--metrics-cert**="": Certificate for the secure metrics endpoint
280280

281-
**--metrics-collectors**="": Enabled metrics collectors (default: [operations operations_latency_microseconds_total operations_latency_microseconds operations_errors image_pulls_by_digest image_pulls_by_name image_pulls_by_name_skipped image_pulls_failures image_pulls_successes image_pulls_layer_size image_layer_reuse containers_oom_total containers_oom processes_defunct])
281+
**--metrics-collectors**="": Enabled metrics collectors (default: [operations operations_latency_microseconds_total operations_latency_microseconds operations_errors image_pulls_by_digest image_pulls_by_name image_pulls_by_name_skipped image_pulls_failures image_pulls_successes image_pulls_layer_size image_layer_reuse containers_oom_total containers_oom processes_defunct operations_total operations_latency_seconds operations_latency_seconds_total operations_errors_total image_pulls_bytes_total image_pulls_skipped_bytes_total image_pulls_failure_total image_pulls_success_total image_layer_reuse_total containers_oom_count_total])
282282

283283
**--metrics-key**="": Certificate key for the secure metrics endpoint
284284

internal/oci/runtime_oci.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -909,7 +909,7 @@ func (r *runtimeOCI) UpdateContainerStatus(ctx context.Context, c *Container) er
909909
metrics.Instance().MetricContainersOOMTotalInc()
910910

911911
// Collect metric by container name
912-
metrics.Instance().MetricContainersOOMInc(c.Name())
912+
metrics.Instance().MetricContainersOOMCountTotalInc(c.Name())
913913
}
914914

915915
return nil

internal/oci/runtime_vm.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -707,7 +707,7 @@ func (r *runtimeVM) updateContainerStatus(ctx context.Context, c *Container) err
707707
metrics.Instance().MetricContainersOOMTotalInc()
708708

709709
// Collect metric by container name
710-
metrics.Instance().MetricContainersOOMInc(c.Name())
710+
metrics.Instance().MetricContainersOOMCountTotalInc(c.Name())
711711
}
712712
}
713713
return nil

server/image_pull.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,8 @@ func (s *Server) pullImage(ctx context.Context, pullArgs *pullArguments) (string
164164
// Skipped bytes metrics
165165
if storedImage.Size != nil {
166166
metrics.Instance().MetricImagePullsByNameSkippedAdd(float64(*storedImage.Size), img)
167+
// Metrics for image pull skipped bytes
168+
metrics.Instance().MetricImagePullsSkippedBytesAdd(float64(*storedImage.Size))
167169
}
168170

169171
break
@@ -204,6 +206,13 @@ func (s *Server) pullImage(ctx context.Context, pullArgs *pullArguments) (string
204206
img, fmt.Sprintf("%d", imageSize(tmpImg)),
205207
)
206208

209+
// Metrics for image pulls bytes
210+
metrics.Instance().MetricImagePullsBytesAdd(
211+
float64(p.OffsetUpdate),
212+
p.Artifact.MediaType,
213+
p.Artifact.Size,
214+
)
215+
207216
// Metrics for size histogram
208217
if p.Event == imageTypes.ProgressEventDone {
209218
metrics.Instance().MetricImagePullsLayerSizeObserve(p.Artifact.Size)

server/metrics/collectors/collectors.go

Lines changed: 62 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -17,46 +17,87 @@ const (
1717
subsystemPrefix = Subsystem + "_"
1818

1919
// Operations is the key for CRI-O operation metrics.
20+
// Deprecated: in favour of OperationsTotal
2021
Operations Collector = crioPrefix + "operations"
2122

2223
// OperationsLatencyTotal is the key for the operation latency metrics.
24+
// Deprecated: in favour of OperationsLatencySecondsTotal
2325
OperationsLatencyTotal Collector = crioPrefix + "operations_latency_microseconds_total"
2426

2527
// OperationsLatency is the key for the operation latency metrics for each CRI call.
28+
// Deprecated: in favour of OperationsLatencySeconds
2629
OperationsLatency Collector = crioPrefix + "operations_latency_microseconds"
2730

2831
// OperationsErrors is the key for the operation error metrics.
32+
// Deprecated: in favour of OperationsErrorsTotal
2933
OperationsErrors Collector = crioPrefix + "operations_errors"
3034

3135
// ImagePullsByDigest is the key for CRI-O image pull metrics by digest.
36+
// Deprecated: in favour of ImagePullsBytesTotal
3237
ImagePullsByDigest Collector = crioPrefix + "image_pulls_by_digest"
3338

3439
// ImagePullsByName is the key for CRI-O image pull metrics by name.
40+
// Deprecated: in favour of ImagePullsBytesTotal
3541
ImagePullsByName Collector = crioPrefix + "image_pulls_by_name"
3642

3743
// ImagePullsByNameSkipped is the key for CRI-O skipped image pull metrics by name (skipped).
44+
// Deprecated: in favour of ImagePullsSkippedBytesTotal
3845
ImagePullsByNameSkipped Collector = crioPrefix + "image_pulls_by_name_skipped"
3946

4047
// ImagePullsFailures is the key for failed image downloads in CRI-O.
48+
// Deprecated: in favour of ImagePullsFailureTotal
4149
ImagePullsFailures Collector = crioPrefix + "image_pulls_failures"
4250

4351
// ImagePullsSuccesses is the key for successful image downloads in CRI-O.
52+
// Deprecated: in favour of ImagePullsSuccessTotal
4453
ImagePullsSuccesses Collector = crioPrefix + "image_pulls_successes"
4554

4655
// ImagePullsLayerSize is the key for CRI-O image pull metrics per layer.
4756
ImagePullsLayerSize Collector = crioPrefix + "image_pulls_layer_size"
4857

4958
// ImageLayerReuse is the key for the CRI-O image layer reuse metrics.
59+
// Deprecated: in favour of ImageLayerReuseTotal
5060
ImageLayerReuse Collector = crioPrefix + "image_layer_reuse"
5161

5262
// ContainersOOMTotal is the key for the total CRI-O container out of memory metrics.
5363
ContainersOOMTotal Collector = crioPrefix + "containers_oom_total"
5464

5565
// ContainersOOM is the key for the CRI-O container out of memory metrics per container name.
66+
// Deprecated: in favour of ContainersOOMCountTotal
5667
ContainersOOM Collector = crioPrefix + "containers_oom"
5768

5869
// ProcessesDefunct is the key for the total number of defunct processes in a node.
5970
ProcessesDefunct Collector = crioPrefix + "processes_defunct"
71+
72+
// OperationsTotal is the key for CRI-O operation metrics.
73+
OperationsTotal Collector = crioPrefix + "operations_total"
74+
75+
// OperationsLatencySeconds is the key for the operation latency metrics for each CRI call.
76+
OperationsLatencySeconds Collector = crioPrefix + "operations_latency_seconds"
77+
78+
// OperationsLatencySecondsTotal is the key for the operation latency metrics.
79+
OperationsLatencySecondsTotal Collector = crioPrefix + "operations_latency_seconds_total"
80+
81+
// OperationsErrorsTotal is the key for the operation error metrics.
82+
OperationsErrorsTotal Collector = crioPrefix + "operations_errors_total"
83+
84+
// ImagePullsBytesTotal is the key for CRI-O image pull metrics.
85+
ImagePullsBytesTotal Collector = crioPrefix + "image_pulls_bytes_total"
86+
87+
// ImagePullsSkippedBytesTotal is the key for CRI-O skipped image pull metrics.
88+
ImagePullsSkippedBytesTotal Collector = crioPrefix + "image_pulls_skipped_bytes_total"
89+
90+
// ImagePullsFailureTotal is the key for failed image downloads in CRI-O.
91+
ImagePullsFailureTotal Collector = crioPrefix + "image_pulls_failure_total"
92+
93+
// ImagePullsSuccessTotal is the key for successful image downloads in CRI-O.
94+
ImagePullsSuccessTotal Collector = crioPrefix + "image_pulls_success_total"
95+
96+
// ImageLayerReuseTotal is the key for the CRI-O image layer reuse metrics.
97+
ImageLayerReuseTotal Collector = crioPrefix + "image_layer_reuse_total"
98+
99+
// ContainersOOMCountTotal is the key for the CRI-O container out of memory metrics per container name.
100+
ContainersOOMCountTotal Collector = crioPrefix + "containers_oom_count_total"
60101
)
61102

62103
// FromSlice converts a string slice to a Collectors type.
@@ -79,20 +120,30 @@ func (c Collectors) ToSlice() (r []string) {
79120
// name key.
80121
func All() Collectors {
81122
return Collectors{
82-
Operations.Stripped(),
83-
OperationsLatencyTotal.Stripped(),
84-
OperationsLatency.Stripped(),
85-
OperationsErrors.Stripped(),
86-
ImagePullsByDigest.Stripped(),
87-
ImagePullsByName.Stripped(),
88-
ImagePullsByNameSkipped.Stripped(),
89-
ImagePullsFailures.Stripped(),
90-
ImagePullsSuccesses.Stripped(),
123+
Operations.Stripped(), // Deprecated: in favour of OperationsTotal
124+
OperationsLatencyTotal.Stripped(), // Deprecated: in favour of OperationsLatencySecondsTotal
125+
OperationsLatency.Stripped(), // Deprecated: in favour of OperationsLatencySeconds
126+
OperationsErrors.Stripped(), // Deprecated: in favour of OperationsErrorsTotal
127+
ImagePullsByDigest.Stripped(), // Deprecated: in favour of ImagePullsBytesTotal
128+
ImagePullsByName.Stripped(), // Deprecated: in favour of ImagePullsBytesTotal
129+
ImagePullsByNameSkipped.Stripped(), // Deprecated: in favour of ImagePullsSkippedBytesTotal
130+
ImagePullsFailures.Stripped(), // Deprecated: in favour of ImagePullsFailureTotal
131+
ImagePullsSuccesses.Stripped(), // Deprecated: in favour of ImagePullsSuccessTotal
91132
ImagePullsLayerSize.Stripped(),
92-
ImageLayerReuse.Stripped(),
133+
ImageLayerReuse.Stripped(), // Deprecated: in favour of ImageLayerReuseTotal
93134
ContainersOOMTotal.Stripped(),
94-
ContainersOOM.Stripped(),
135+
ContainersOOM.Stripped(), // Deprecated: in favour of ContainersOOMCountTotal
95136
ProcessesDefunct.Stripped(),
137+
OperationsTotal.Stripped(),
138+
OperationsLatencySeconds.Stripped(),
139+
OperationsLatencySecondsTotal.Stripped(),
140+
OperationsErrorsTotal.Stripped(),
141+
ImagePullsBytesTotal.Stripped(),
142+
ImagePullsSkippedBytesTotal.Stripped(),
143+
ImagePullsFailureTotal.Stripped(),
144+
ImagePullsSuccessTotal.Stripped(),
145+
ImageLayerReuseTotal.Stripped(),
146+
ContainersOOMCountTotal.Stripped(),
96147
}
97148
}
98149

server/metrics/collectors/collectors_test.go

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -36,25 +36,35 @@ var _ = t.Describe("Collectors", func() {
3636

3737
// When / Then
3838
for _, collector := range []collectors.Collector{
39-
collectors.Operations,
40-
collectors.OperationsLatencyTotal,
41-
collectors.OperationsLatency,
42-
collectors.OperationsErrors,
43-
collectors.ImagePullsByDigest,
44-
collectors.ImagePullsByName,
45-
collectors.ImagePullsByNameSkipped,
46-
collectors.ImagePullsFailures,
47-
collectors.ImagePullsSuccesses,
39+
collectors.Operations, // Deprecated: in favour of OperationsTotal
40+
collectors.OperationsLatencyTotal, // Deprecated: in favour of OperationsLatencySecondsTotal
41+
collectors.OperationsLatency, // Deprecated: in favour of OperationsLatencySeconds
42+
collectors.OperationsErrors, // Deprecated: in favour of OperationsErrorsTotal
43+
collectors.ImagePullsByDigest, // Deprecated: in favour of ImagePullsBytesTotal
44+
collectors.ImagePullsByName, // Deprecated: in favour of ImagePullsBytesTotal
45+
collectors.ImagePullsByNameSkipped, // Deprecated: in favour of ImagePullsSkippedBytesTotal
46+
collectors.ImagePullsFailures, // Deprecated: in favour of ImagePullsFailureTotal
47+
collectors.ImagePullsSuccesses, // Deprecated: in favour of ImagePullsSuccessTotal
4848
collectors.ImagePullsLayerSize,
49-
collectors.ImageLayerReuse,
49+
collectors.ImageLayerReuse, // Deprecated: in favour of ImageLayerReuseTotal
5050
collectors.ContainersOOMTotal,
51-
collectors.ContainersOOM,
51+
collectors.ContainersOOM, // Deprecated: in favour of ContainersOOMCountTotal
5252
collectors.ProcessesDefunct,
53+
collectors.OperationsTotal,
54+
collectors.OperationsLatencySeconds,
55+
collectors.OperationsLatencySecondsTotal,
56+
collectors.OperationsErrorsTotal,
57+
collectors.ImagePullsBytesTotal,
58+
collectors.ImagePullsSkippedBytesTotal,
59+
collectors.ImagePullsFailureTotal,
60+
collectors.ImagePullsSuccessTotal,
61+
collectors.ImageLayerReuseTotal,
62+
collectors.ContainersOOMCountTotal,
5363
} {
5464
Expect(all.Contains(collector)).To(BeTrue())
5565
}
5666

57-
Expect(all).To(HaveLen(14))
67+
Expect(all).To(HaveLen(24))
5868
})
5969
})
6070

0 commit comments

Comments
 (0)