Skip to content

Commit 2d66de3

Browse files
committed
feat: extend oci runtime to collect and manage disk metrics
Signed-off-by: Rehan Khan <[email protected]>
1 parent 7fda065 commit 2d66de3

40 files changed

+191
-3233
lines changed

go.mod

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@ require (
3636
github.com/go-chi/chi/v5 v5.2.3
3737
github.com/go-logr/logr v1.4.3
3838
github.com/godbus/dbus/v5 v5.1.1-0.20230522191255-76236955d466
39-
github.com/google/cadvisor v0.53.0
4039
github.com/google/go-cmp v0.7.0
4140
github.com/google/renameio v1.0.1
4241
github.com/google/uuid v1.6.0
@@ -187,7 +186,6 @@ require (
187186
github.com/mattn/go-shellwords v1.0.12 // indirect
188187
github.com/mattn/go-sqlite3 v1.14.28 // indirect
189188
github.com/miekg/pkcs11 v1.1.1 // indirect
190-
github.com/mistifyio/go-zfs v2.1.1+incompatible // indirect
191189
github.com/mistifyio/go-zfs/v3 v3.0.1 // indirect
192190
github.com/mitchellh/mapstructure v1.5.1-0.20231216201459-8508981c8b6c // indirect
193191
github.com/moby/docker-image-spec v1.3.1 // indirect

go.sum

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -269,8 +269,6 @@ github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw
269269
github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
270270
github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
271271
github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
272-
github.com/google/cadvisor v0.53.0 h1:pmveUw2VBlr/T2SBE9Fsp8gdLhKWyOBkECGbaas9mcI=
273-
github.com/google/cadvisor v0.53.0/go.mod h1:Tz3zf/exzFfdWd1T/U/9eNst0ZR2C6CIV62LJATj5tg=
274272
github.com/google/gnostic-models v0.7.0 h1:qwTtogB15McXDaNqTZdzPJRHvaVJlAl+HVQnLmJEJxo=
275273
github.com/google/gnostic-models v0.7.0/go.mod h1:whL5G0m6dmc5cPxKc5bdKdEN3UjI7OUGxBlw57miDrQ=
276274
github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
@@ -378,8 +376,6 @@ github.com/mfridman/tparse v0.18.0 h1:wh6dzOKaIwkUGyKgOntDW4liXSo37qg5AXbIhkMV3v
378376
github.com/mfridman/tparse v0.18.0/go.mod h1:gEvqZTuCgEhPbYk/2lS3Kcxg1GmTxxU7kTC8DvP0i/A=
379377
github.com/miekg/pkcs11 v1.1.1 h1:Ugu9pdy6vAYku5DEpVWVFPYnzV+bxB+iRdbuFSu7TvU=
380378
github.com/miekg/pkcs11 v1.1.1/go.mod h1:XsNlhZGX73bx86s2hdc/FuaLm2CPZJemRLMA+WTFxgs=
381-
github.com/mistifyio/go-zfs v2.1.1+incompatible h1:gAMO1HM9xBRONLHHYnu5iFsOJUiJdNZo6oqSENd4eW8=
382-
github.com/mistifyio/go-zfs v2.1.1+incompatible/go.mod h1:8AuVvqP/mXw1px98n46wfvcGfQ4ci2FwoAjKYxuo3Z4=
383379
github.com/mistifyio/go-zfs/v3 v3.0.1 h1:YaoXgBePoMA12+S1u/ddkv+QqxcfiZK4prI6HPnkFiU=
384380
github.com/mistifyio/go-zfs/v3 v3.0.1/go.mod h1:CzVgeB0RvF2EGzQnytKVvVSDwmKJXxkOTUGbNrTja/k=
385381
github.com/mitchellh/mapstructure v1.5.1-0.20231216201459-8508981c8b6c h1:cqn374mizHuIWj+OSJCajGr/phAmuMug9qIX3l9CflE=

internal/config/cgmgr/stats_linux.go

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,6 @@ import (
1616
// We could have used the libcontainer/cgroups.Stats object as a standard stats object for cri-o.
1717
// But due to it's incompatibility with non-linux platforms,
1818
// we have to create our own object that can be moved around regardless of the runtime.
19-
type ContainerRuntimeStats struct {
20-
Cgroup *CgroupStats
21-
Disk *DiskMetrics
22-
}
23-
2419
type CgroupStats struct {
2520
Memory *MemoryStats
2621
CPU *CPUStats
@@ -74,13 +69,6 @@ type PidsStats struct {
7469
Limit uint64
7570
}
7671

77-
type DiskMetrics struct {
78-
UsageBytes uint64
79-
LimitBytes uint64
80-
InodesFree uint64
81-
InodesTotal uint64
82-
}
83-
8472
// MemLimitGivenSystem limit returns the memory limit for a given cgroup
8573
// If the configured memory limit is larger than the total memory on the sys, the
8674
// physical system memory size is returned.

internal/config/cgmgr/stats_unsupported.go

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,6 @@ import (
66
"github.com/containers/storage/pkg/system"
77
)
88

9-
type ContainerRuntimeStats struct {
10-
Cgroup *CgroupStats
11-
Disk *DiskMetrics
12-
}
139
type CgroupStats struct {
1410
Memory *MemoryStats
1511
CPU *CPUStats
@@ -59,13 +55,6 @@ type PidsStats struct {
5955
Limit uint64
6056
}
6157

62-
type DiskMetrics struct {
63-
UsageBytes uint64
64-
LimitBytes uint64
65-
InodesFree uint64
66-
InodesTotal uint64
67-
}
68-
6958
// MemLimitGivenSystem limit returns the memory limit for a given cgroup
7059
// If the configured memory limit is larger than the total memory on the sys, the
7160
// physical system memory size is returned

internal/lib/stats/descriptors.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ var (
3737
)
3838

3939
// Disk metrics.
40+
// TODO: Add remaining container filesystem metrics.
4041
var (
4142
containerFsInodesFree = &types.MetricDescriptor{
4243
Name: "container_fs_inodes_free",

internal/lib/stats/disk_metrics.go

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,11 @@ package statsserver
33
import (
44
types "k8s.io/cri-api/pkg/apis/runtime/v1"
55

6-
"github.com/cri-o/cri-o/internal/config/cgmgr"
7-
"github.com/cri-o/cri-o/internal/lib/sandbox"
6+
"github.com/cri-o/cri-o/internal/oci"
87
)
98

109
// generateSandboxDiskMetrics computes filesystem disk metrics from DiskMetrics for a container sandbox.
11-
func generateSandboxDiskMetrics(sb *sandbox.Sandbox, diskStats *cgmgr.DiskMetrics) []*types.Metric {
10+
func generateContainerDiskMetrics(ctr *oci.Container, diskStats *oci.FilesystemMetrics) []*types.Metric {
1211
diskMetrics := []*containerMetric{
1312
{
1413
desc: containerFsInodesFree,
@@ -36,5 +35,5 @@ func generateSandboxDiskMetrics(sb *sandbox.Sandbox, diskStats *cgmgr.DiskMetric
3635
},
3736
}
3837

39-
return computeSandboxMetrics(sb, diskMetrics, "disk")
38+
return computeContainerMetrics(ctr, diskMetrics, "disk")
4039
}

internal/lib/stats/stats_server_linux.go

Lines changed: 36 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,6 @@ import (
44
"errors"
55
"fmt"
66
"slices"
7-
<<<<<<< HEAD
8-
"syscall"
9-
"time"
10-
=======
11-
>>>>>>> 2cfc575ad (Extended ContainerStats to include disk metrics)
127

138
"github.com/containernetworking/plugins/pkg/ns"
149
"github.com/vishvananda/netlink"
@@ -75,11 +70,14 @@ func (ss *StatsServer) updateSandbox(sb *sandbox.Sandbox) *types.PodSandboxStats
7570
ctrStats, err := ss.Runtime().ContainerStats(ss.ctx, c, sb.CgroupParent())
7671
if err != nil {
7772
log.Errorf(ss.ctx, "Error getting container stats %s: %v", c.ID(), err)
73+
}
7874

79-
continue
75+
diskStats, err := ss.Runtime().DiskStats(ss.ctx, c, sb.CgroupParent())
76+
if err != nil {
77+
log.Errorf(ss.ctx, "Error getting disk stats %s: %v", c.ID(), err)
8078
}
8179
// Convert container stats (cgroup + disk) to CRI stats.
82-
cStats := containerCRIStats(ctrStats, c, ctrStats.Cgroup.SystemNano)
80+
cStats := containerCRIStats(ctrStats, diskStats, c, ctrStats.SystemNano)
8381
ss.populateWritableLayer(cStats, c)
8482

8583
if oldcStats, ok := ss.ctrStats[c.ID()]; ok {
@@ -89,7 +87,7 @@ func (ss *StatsServer) updateSandbox(sb *sandbox.Sandbox) *types.PodSandboxStats
8987
containerStats = append(containerStats, cStats)
9088

9189
// Convert cgroups stats to CRI metrics.
92-
cMetrics := ss.containerMetricsFromContainerStats(sb, c, ctrStats)
90+
cMetrics := ss.containerMetricsFromContainerStats(sb, c, ctrStats, *diskStats)
9391
containerMetrics = append(containerMetrics, cMetrics)
9492
}
9593

@@ -125,7 +123,14 @@ func (ss *StatsServer) updateContainerStats(c *oci.Container, sb *sandbox.Sandbo
125123
return nil
126124
}
127125

128-
cStats := containerCRIStats(ctrStats, c, ctrStats.Cgroup.SystemNano)
126+
diskStats, err := ss.Runtime().DiskStats(ss.ctx, c, sb.CgroupParent())
127+
if err != nil {
128+
log.Errorf(ss.ctx, "Error getting disk stats %s: %v", c.ID(), err)
129+
// Continue without disk stats
130+
diskStats = nil
131+
}
132+
133+
cStats := containerCRIStats(ctrStats, diskStats, c, ctrStats.SystemNano)
129134
ss.populateWritableLayer(cStats, c)
130135

131136
if oldcStats, ok := ss.ctrStats[c.ID()]; ok {
@@ -233,57 +238,35 @@ func (ss *StatsServer) GenerateSandboxContainerMetrics(sb *sandbox.Sandbox, c *o
233238
return nil
234239
}
235240

236-
return ss.containerMetricsFromContainerStats(sb, c, ctrStats)
241+
diskStats, err := ss.Runtime().DiskStats(ss.ctx, c, sb.CgroupParent())
242+
if err != nil {
243+
log.Errorf(ss.ctx, "Error getting disk stats %s: %v", c.ID(), err)
244+
245+
return nil
246+
}
247+
248+
return ss.containerMetricsFromContainerStats(sb, c, ctrStats, *diskStats)
237249
}
238250

239-
<<<<<<< HEAD
240-
func (ss *StatsServer) containerMetricsFromCgStats(sb *sandbox.Sandbox, c *oci.Container, cgstats *cgmgr.CgroupStats) *types.ContainerMetrics {
241-
metrics := computeContainerMetrics(c, []*containerMetric{
242-
{
243-
desc: containerLastSeen,
244-
valueFunc: func() metricValues {
245-
return metricValues{{
246-
value: uint64(time.Now().Unix()),
247-
metricType: types.MetricType_GAUGE,
248-
}}
249-
},
250-
},
251-
}, "")
252-
=======
253-
func (ss *StatsServer) containerMetricsFromContainerStats(sb *sandbox.Sandbox, c *oci.Container, containerStats *cgmgr.ContainerRuntimeStats) *types.ContainerMetrics {
251+
func (ss *StatsServer) containerMetricsFromContainerStats(sb *sandbox.Sandbox, c *oci.Container, containerStats *cgmgr.CgroupStats, diskstats oci.DiskMetrics) *types.ContainerMetrics {
254252
var metrics []*types.Metric
255-
>>>>>>> 2cfc575ad (Extended ContainerStats to include disk metrics)
256253

257254
for _, m := range ss.Config().IncludedPodMetrics {
258255
switch m {
259256
case CPUMetrics:
260-
<<<<<<< HEAD
261-
if cpuMetrics := generateContainerCPUMetrics(c, cgstats.CPU); cpuMetrics != nil {
262-
metrics = append(metrics, cpuMetrics...)
263-
}
264-
case HugetlbMetrics:
265-
if hugetlbMetrics := generateContainerHugetlbMetrics(c, cgstats.Hugetlb); hugetlbMetrics != nil {
266-
=======
267-
if cpuMetrics := generateSandboxCPUMetrics(sb, containerStats.Cgroup.CPU); cpuMetrics != nil {
257+
if cpuMetrics := generateContainerCPUMetrics(c, containerStats.CPU); cpuMetrics != nil {
268258
metrics = append(metrics, cpuMetrics...)
269259
}
270260
case HugetlbMetrics:
271-
if hugetlbMetrics := generateSandboxHugetlbMetrics(sb, containerStats.Cgroup.Hugetlb); hugetlbMetrics != nil {
272-
>>>>>>> 2cfc575ad (Extended ContainerStats to include disk metrics)
261+
if hugetlbMetrics := generateContainerHugetlbMetrics(c, containerStats.Hugetlb); hugetlbMetrics != nil {
273262
metrics = append(metrics, hugetlbMetrics...)
274263
}
275264
case DiskMetrics:
276-
if containerStats.Disk != nil {
277-
if diskMetrics := generateSandboxDiskMetrics(sb, containerStats.Disk); diskMetrics != nil {
278-
metrics = append(metrics, diskMetrics...)
279-
}
265+
if diskMetrics := generateContainerDiskMetrics(c, &diskstats.Filesystem); diskMetrics != nil {
266+
metrics = append(metrics, diskMetrics...)
280267
}
281268
case MemoryMetrics:
282-
<<<<<<< HEAD
283-
if memoryMetrics := generateContainerMemoryMetrics(c, cgstats.Memory); memoryMetrics != nil {
284-
=======
285-
if memoryMetrics := generateSandboxMemoryMetrics(sb, containerStats.Cgroup.Memory); memoryMetrics != nil {
286-
>>>>>>> 2cfc575ad (Extended ContainerStats to include disk metrics)
269+
if memoryMetrics := generateContainerMemoryMetrics(c, containerStats.Memory); memoryMetrics != nil {
287270
metrics = append(metrics, memoryMetrics...)
288271
}
289272
case OOMMetrics:
@@ -306,11 +289,7 @@ func (ss *StatsServer) containerMetricsFromContainerStats(sb *sandbox.Sandbox, c
306289
case NetworkMetrics:
307290
continue // Network metrics are collected at the pod level only.
308291
case ProcessMetrics:
309-
<<<<<<< HEAD
310-
if processMetrics := generateContainerProcessMetrics(c, cgstats.Pid); processMetrics != nil {
311-
=======
312-
if processMetrics := generateSandboxProcessMetrics(sb, containerStats.Cgroup.Pid); processMetrics != nil {
313-
>>>>>>> 2cfc575ad (Extended ContainerStats to include disk metrics)
292+
if processMetrics := generateContainerProcessMetrics(c, containerStats.Pid); processMetrics != nil {
314293
metrics = append(metrics, processMetrics...)
315294
}
316295
case SpecMetrics:
@@ -349,17 +328,17 @@ func linkToInterface(link netlink.Link) (*types.NetworkInterfaceUsage, error) {
349328
}, nil
350329
}
351330

352-
func containerCRIStats(stats *cgmgr.ContainerRuntimeStats, ctr *oci.Container, systemNano int64) *types.ContainerStats {
331+
func containerCRIStats(stats *cgmgr.CgroupStats, diskStats *oci.DiskMetrics, ctr *oci.Container, systemNano int64) *types.ContainerStats {
353332
criStats := &types.ContainerStats{
354333
Attributes: ctr.CRIAttributes(),
355334
}
356-
criStats.Cpu = criCPUStats(stats.Cgroup.CPU, systemNano)
357-
criStats.Memory = criMemStats(stats.Cgroup.Memory, systemNano)
358-
criStats.Swap = criSwapStats(stats.Cgroup.Memory, systemNano)
335+
criStats.Cpu = criCPUStats(stats.CPU, systemNano)
336+
criStats.Memory = criMemStats(stats.Memory, systemNano)
337+
criStats.Swap = criSwapStats(stats.Memory, systemNano)
359338

360339
// Add filesystem stats if available
361-
if stats.Disk != nil {
362-
criStats.WritableLayer = criFilesystemStats(stats.Disk, ctr, systemNano)
340+
if diskStats != nil {
341+
criStats.WritableLayer = criFilesystemStats(&diskStats.Filesystem, ctr, systemNano)
363342
}
364343

365344
return criStats
@@ -399,7 +378,7 @@ func criProcessStats(pStats *cgmgr.PidsStats, systemNano int64) *types.ProcessUs
399378
}
400379
}
401380

402-
func criFilesystemStats(diskStats *cgmgr.DiskMetrics, ctr *oci.Container, systemNano int64) *types.FilesystemUsage {
381+
func criFilesystemStats(diskStats *oci.FilesystemMetrics, ctr *oci.Container, systemNano int64) *types.FilesystemUsage {
403382
mountpoint := ctr.MountPoint()
404383
if mountpoint == "" {
405384
// Skip FS stats as mount point is unknown

internal/oci/disk_metrics.go

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
//go:build linux
2+
3+
package oci
4+
5+
import (
6+
"fmt"
7+
"syscall"
8+
9+
"github.com/cri-o/cri-o/utils"
10+
)
11+
12+
// DiskMetrics represents comprehensive disk statistics for a container.
13+
type DiskMetrics struct {
14+
Filesystem FilesystemMetrics
15+
}
16+
17+
// FilesystemMetrics represents filesystem usage statistics.
18+
type FilesystemMetrics struct {
19+
UsageBytes uint64 `json:"usage_bytes"`
20+
LimitBytes uint64 `json:"limit_bytes"`
21+
InodesFree uint64 `json:"inodes_free"`
22+
InodesTotal uint64 `json:"inodes_total"`
23+
}
24+
25+
// GetDiskUsageForPath returns disk usage statistics for a given path.
26+
func GetDiskUsageForPath(path string) (*DiskMetrics, error) {
27+
usageBytes, _, err := utils.GetDiskUsageStats(path)
28+
if err != nil {
29+
return nil, fmt.Errorf("failed to get disk usage stats: %w", err)
30+
}
31+
32+
var stat syscall.Statfs_t
33+
if err := syscall.Statfs(path, &stat); err != nil {
34+
return nil, fmt.Errorf("failed to get filesystem stats: %w", err)
35+
}
36+
37+
totalBytes := stat.Blocks * uint64(stat.Bsize)
38+
totalInodes := stat.Files
39+
freeInodes := stat.Ffree
40+
41+
return &DiskMetrics{
42+
Filesystem: FilesystemMetrics{
43+
UsageBytes: usageBytes,
44+
LimitBytes: totalBytes,
45+
InodesFree: freeInodes,
46+
InodesTotal: totalInodes,
47+
},
48+
}, nil
49+
}
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
//go:build !linux && !freebsd
2+
3+
package oci
4+
5+
import "fmt"
6+
7+
// DiskMetrics represents comprehensive disk statistics for a container
8+
type DiskMetrics struct {
9+
Filesystem FilesystemMetrics
10+
}
11+
12+
// FilesystemMetrics represents filesystem usage statistics
13+
type FilesystemMetrics struct {
14+
UsageBytes uint64 `json:"usage_bytes"`
15+
LimitBytes uint64 `json:"limit_bytes"`
16+
InodesFree uint64 `json:"inodes_free"`
17+
InodesTotal uint64 `json:"inodes_total"`
18+
}
19+
20+
func GetDiskUsageForPath(path string) (*DiskMetrics, error) {
21+
return nil, fmt.Errorf("disk usage statistics not supported on this platform")
22+
}

internal/oci/oci.go

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,8 @@ type RuntimeImpl interface {
7272
UpdateContainerStatus(context.Context, *Container) error
7373
PauseContainer(context.Context, *Container) error
7474
UnpauseContainer(context.Context, *Container) error
75-
ContainerStats(context.Context, *Container, string) (*cgmgr.ContainerRuntimeStats, error)
75+
ContainerStats(context.Context, *Container, string) (*cgmgr.CgroupStats, error)
76+
DiskStats(context.Context, *Container, string) (*DiskMetrics, error)
7677
SignalContainer(context.Context, *Container, syscall.Signal) error
7778
AttachContainer(context.Context, *Container, io.Reader, io.WriteCloser, io.WriteCloser,
7879
bool, <-chan remotecommand.TerminalSize) error
@@ -460,7 +461,7 @@ func (r *Runtime) UnpauseContainer(ctx context.Context, c *Container) error {
460461
}
461462

462463
// ContainerStats provides statistics of a container.
463-
func (r *Runtime) ContainerStats(ctx context.Context, c *Container, cgroup string) (*cgmgr.ContainerRuntimeStats, error) {
464+
func (r *Runtime) ContainerStats(ctx context.Context, c *Container, cgroup string) (*cgmgr.CgroupStats, error) {
464465
ctx, span := log.StartSpan(ctx)
465466
defer span.End()
466467

@@ -472,6 +473,19 @@ func (r *Runtime) ContainerStats(ctx context.Context, c *Container, cgroup strin
472473
return impl.ContainerStats(ctx, c, cgroup)
473474
}
474475

476+
// DiskStats provides disk statistics for a container.
477+
func (r *Runtime) DiskStats(ctx context.Context, c *Container, cgroup string) (*DiskMetrics, error) {
478+
ctx, span := log.StartSpan(ctx)
479+
defer span.End()
480+
481+
impl, err := r.RuntimeImpl(c)
482+
if err != nil {
483+
return nil, err
484+
}
485+
486+
return impl.DiskStats(ctx, c, cgroup)
487+
}
488+
475489
// SignalContainer sends a signal to a container process.
476490
func (r *Runtime) SignalContainer(ctx context.Context, c *Container, sig syscall.Signal) error {
477491
ctx, span := log.StartSpan(ctx)

0 commit comments

Comments
 (0)