Skip to content

Commit d4b5d78

Browse files
committed
cgmgr: create cgroups for systemd cgroup driver for dropped infra pods
The history here is a bit convoluted. Originally, runc created the cgroup for the infra container. cAdvisor was built to assume the cgroup for the infra container would be created, and it uses this to find the network metrics for the pod. When we dropped the infra container, cri-o needed to make this cgroup so cAdvisor could still find the network metrics. However, systemd didn't like the way we did it, and would remove the cgroup mid pod creation, which was fixed in #6196. This actually caused the cgroup to not be created at all, which then caused the networking metrics to not be gathered at all. Thus, we do need to create a cgroup underneath the systemd cgroup. Attempt to use a slice for this, as systemd won't require a process be underneath it. Signed-off-by: Peter Hunt <[email protected]>
1 parent b007cb6 commit d4b5d78

File tree

5 files changed

+115
-16
lines changed

5 files changed

+115
-16
lines changed

internal/config/cgmgr/cgmgr.go

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ import (
1313

1414
"github.com/cri-o/cri-o/internal/config/node"
1515
libctr "github.com/opencontainers/runc/libcontainer/cgroups"
16+
libctrCgMgr "github.com/opencontainers/runc/libcontainer/cgroups/manager"
17+
cgcfgs "github.com/opencontainers/runc/libcontainer/configs"
1618
rspec "github.com/opencontainers/runtime-spec/specs-go"
1719
"github.com/pkg/errors"
1820
"github.com/sirupsen/logrus"
@@ -74,6 +76,9 @@ type CgroupManager interface {
7476
// CreateSandboxCgroup takes the sandbox parent, and sandbox ID.
7577
// It creates a new cgroup for that sandbox, which is useful when spoofing an infra container.
7678
CreateSandboxCgroup(sbParent, containerID string) error
79+
// RemoveSandboxCgroup takes the sandbox parent, and sandbox ID.
80+
// It removes the cgroup for that sandbox, which is useful when spoofing an infra container.
81+
RemoveSandboxCgroup(sbParent, containerID string) error
7782
}
7883

7984
// New creates a new CgroupManager with defaults
@@ -161,3 +166,41 @@ func MoveProcessToContainerCgroup(containerPid, commandPid int) error {
161166
}
162167
return nil
163168
}
169+
170+
// createSandboxCgroup takes the path of the sandbox parent and the desired containerCgroup
171+
// It creates a cgroup through cgroupfs (as opposed to systemd) at the location cgroupRoot/sbParent/containerCgroup.
172+
func createSandboxCgroup(sbParent, containerCgroup string) error {
173+
cg := &cgcfgs.Cgroup{
174+
Name: containerCgroup,
175+
Parent: sbParent,
176+
Resources: &cgcfgs.Resources{
177+
SkipDevices: true,
178+
},
179+
}
180+
mgr, err := libctrCgMgr.New(cg)
181+
if err != nil {
182+
return err
183+
}
184+
185+
return mgr.Apply(-1)
186+
}
187+
188+
func removeSandboxCgroup(sbParent, containerCgroup string) error {
189+
cg := &cgcfgs.Cgroup{
190+
Name: containerCgroup,
191+
Parent: sbParent,
192+
Resources: &cgcfgs.Resources{
193+
SkipDevices: true,
194+
},
195+
}
196+
mgr, err := libctrCgMgr.New(cg)
197+
if err != nil {
198+
return err
199+
}
200+
201+
return mgr.Destroy()
202+
}
203+
204+
func containerCgroupPath(id string) string {
205+
return crioPrefix + "-" + id
206+
}

internal/config/cgmgr/cgroupfs.go

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -180,18 +180,18 @@ func setWorkloadSettings(cgPath string, resources *rspec.LinuxResources) error {
180180
return mgr.Set(cg.Resources)
181181
}
182182

183-
// createSandboxCgroup takes the sandbox parent, and sandbox ID.
184-
// It creates a new cgroup for that sandbox, which is useful when spoofing an infra container.
185-
func createSandboxCgroup(sbParent, containerID string, mgr CgroupManager) error {
186-
cgroupAbsolutePath, err := mgr.ContainerCgroupAbsolutePath(sbParent, containerID)
187-
if err != nil {
188-
return err
189-
}
190-
_, err = cgroups.New(cgroupAbsolutePath, &rspec.LinuxResources{})
191-
return err
192-
}
193-
194183
// CreateSandboxCgroup calls the helper function createSandboxCgroup for this manager.
195184
func (m *CgroupfsManager) CreateSandboxCgroup(sbParent, containerID string) error {
196-
return createSandboxCgroup(sbParent, containerID, m)
185+
// prepend "/" to sbParent so the fs driver interprets it as an absolute path
186+
// and the cgroup isn't created as a relative path to the cgroups of the CRI-O process.
187+
// https://github.com/opencontainers/runc/blob/fd5debf3aa/libcontainer/cgroups/fs/paths.go#L156
188+
return createSandboxCgroup(filepath.Join("/", sbParent), containerCgroupPath(containerID))
189+
}
190+
191+
// RemoveSandboxCgroup calls the helper function removeSandboxCgroup for this manager.
192+
func (m *CgroupfsManager) RemoveSandboxCgroup(sbParent, containerID string) error {
193+
// prepend "/" to sbParent so the fs driver interprets it as an absolute path
194+
// and the cgroup isn't created as a relative path to the cgroups of the CRI-O process.
195+
// https://github.com/opencontainers/runc/blob/fd5debf3aa/libcontainer/cgroups/fs/paths.go#L156
196+
return removeSandboxCgroup(filepath.Join("/", sbParent), containerCgroupPath(containerID))
197197
}

internal/config/cgmgr/systemd.go

Lines changed: 33 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ func (m *SystemdManager) ContainerCgroupAbsolutePath(sbParent, containerID strin
8989
return "", errors.Wrapf(err, "error expanding systemd slice to get container %s stats", containerID)
9090
}
9191

92-
return filepath.Join(cgroup, crioPrefix+"-"+containerID+".scope"), nil
92+
return filepath.Join(cgroup, containerCgroupPath(containerID)+".scope"), nil
9393
}
9494

9595
// MoveConmonToCgroup takes the container ID, cgroup parent, conmon's cgroup (from the config) and conmon's PID
@@ -200,8 +200,37 @@ func convertCgroupFsNameToSystemd(cgroupfsName string) string {
200200
}
201201

202202
// CreateSandboxCgroup calls the helper function createSandboxCgroup for this manager.
203+
// Note: createSandboxCgroup will create a cgroupfs cgroup for the infra container underneath the pod slice.
204+
// It will not use dbus to create this cgroup, but instead call libcontainer's cgroupfs manager directly.
205+
// This is because a scope created here will not have a process within it (as it's usually for a dropped infra container),
206+
// and a slice cannot have the required `crio` prefix (while still being within the pod slice).
207+
// Ultimately, this cgroup is required for cAdvisor to be able to register the pod and collect network metrics for it.
208+
// This work will not be relevant when CRI-O is responsible for gathering pod metrics (KEP-2371), but is required until that's done.
203209
func (m *SystemdManager) CreateSandboxCgroup(sbParent, containerID string) error {
204-
// If we are running systemd as cgroup driver then we would rely on
205-
// systemd to create cgroups for us, there's nothing to do here in this case
206-
return nil
210+
// sbParent should always be specified by kubelet, but sometimes not by critest/crictl.
211+
// Skip creation in this case.
212+
if sbParent == "" {
213+
logrus.Infof("Not creating sandbox cgroup: sbParent is empty")
214+
return nil
215+
}
216+
expandedParent, err := systemd.ExpandSlice(sbParent)
217+
if err != nil {
218+
return err
219+
}
220+
return createSandboxCgroup(expandedParent, containerCgroupPath(containerID))
221+
}
222+
223+
// RemoveSandboxCgroup calls the helper function removeSandboxCgroup for this manager.
224+
func (m *SystemdManager) RemoveSandboxCgroup(sbParent, containerID string) error {
225+
// sbParent should always be specified by kubelet, but sometimes not by critest/crictl.
226+
// Skip creation in this case.
227+
if sbParent == "" {
228+
logrus.Infof("Not creating sandbox cgroup: sbParent is empty")
229+
return nil
230+
}
231+
expandedParent, err := systemd.ExpandSlice(sbParent)
232+
if err != nil {
233+
return err
234+
}
235+
return removeSandboxCgroup(expandedParent, containerCgroupPath(containerID))
207236
}

server/sandbox_remove.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,11 @@ func (s *Server) removePodSandbox(ctx context.Context, sb *sandbox.Sandbox) erro
5252
if err := s.removeContainerInPod(ctx, sb, sb.InfraContainer()); err != nil {
5353
return err
5454
}
55+
if sb.InfraContainer().Spoofed() {
56+
if err := s.config.CgroupManager().RemoveSandboxCgroup(sb.CgroupParent(), sb.ID()); err != nil {
57+
return err
58+
}
59+
}
5560

5661
// Cleanup network resources for this pod
5762
if err := s.networkStop(ctx, sb); err != nil {

test/cgroups.bats

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,28 @@ function teardown() {
3838
[[ "$output" == *"customcrioconmon.slice"* ]]
3939
}
4040

41+
@test "conmon custom cgroup with no infra container" {
42+
parent="Burstablecriotest123"
43+
if [ "$CONTAINER_CGROUP_MANAGER" == "systemd" ]; then
44+
parent="$parent".slice
45+
fi
46+
cgroup_base="/sys/fs/cgroup"
47+
if ! is_cgroup_v2; then
48+
cgroup_base="$cgroup_base"/memory
49+
fi
50+
51+
CONTAINER_DROP_INFRA_CTR=true start_crio
52+
53+
jq --arg cg "$parent" ' .linux.cgroup_parent = $cg' \
54+
"$TESTDATA"/sandbox_config.json > "$TESTDIR"/sandbox_config_slice.json
55+
56+
pod_id=$(crictl runp "$TESTDIR"/sandbox_config_slice.json)
57+
ls "$cgroup_base"/"$parent"/crio-"$pod_id"*
58+
59+
crictl rmp -fa
60+
! ls "$cgroup_base"/"$parent"/crio-"$pod_id"*
61+
}
62+
4163
@test "ctr with swap should be configured" {
4264
if ! grep -v Filename < /proc/swaps; then
4365
skip "swap not enabled"

0 commit comments

Comments
 (0)