Skip to content

Commit 8bf967b

Browse files
Merge pull request #6949 from haircommander/infraless-create-cgroup-1.24
[1.24] cgmgr: create cgroups for systemd cgroup driver for dropped infra pods
2 parents 83c2c42 + 1edcf7f commit 8bf967b

File tree

6 files changed

+121
-27
lines changed

6 files changed

+121
-27
lines changed

internal/config/cgmgr/cgmgr.go

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,12 @@ import (
1111
"strconv"
1212
"strings"
1313

14+
"github.com/containers/podman/v3/pkg/rootless"
1415
"github.com/cri-o/cri-o/internal/config/node"
1516
libctr "github.com/opencontainers/runc/libcontainer/cgroups"
17+
"github.com/opencontainers/runc/libcontainer/cgroups/fs"
18+
"github.com/opencontainers/runc/libcontainer/cgroups/fs2"
19+
cgcfgs "github.com/opencontainers/runc/libcontainer/configs"
1620
rspec "github.com/opencontainers/runtime-spec/specs-go"
1721
"github.com/pkg/errors"
1822
"github.com/sirupsen/logrus"
@@ -74,6 +78,9 @@ type CgroupManager interface {
7478
// CreateSandboxCgroup takes the sandbox parent, and sandbox ID.
7579
// It creates a new cgroup for that sandbox, which is useful when spoofing an infra container.
7680
CreateSandboxCgroup(sbParent, containerID string) error
81+
// RemoveSandboxCgroup takes the sandbox parent, and sandbox ID.
82+
// It removes the cgroup for that sandbox, which is useful when spoofing an infra container.
83+
RemoveSandboxCgroup(sbParent, containerID string) error
7784
}
7885

7986
// New creates a new CgroupManager with defaults
@@ -161,3 +168,39 @@ func MoveProcessToContainerCgroup(containerPid, commandPid int) error {
161168
}
162169
return nil
163170
}
171+
172+
// createSandboxCgroup takes the path of the sandbox parent and the desired containerCgroup
173+
// It creates a cgroup through cgroupfs (as opposed to systemd) at the location cgroupRoot/sbParent/containerCgroup.
174+
func createSandboxCgroup(sbParent, containerCgroup string) error {
175+
mgr, err := libctrCgroupManager(sbParent, containerCgroup)
176+
if err != nil {
177+
return err
178+
}
179+
return mgr.Apply(-1)
180+
}
181+
182+
func removeSandboxCgroup(sbParent, containerCgroup string) error {
183+
mgr, err := libctrCgroupManager(sbParent, containerCgroup)
184+
if err != nil {
185+
return err
186+
}
187+
return mgr.Destroy()
188+
}
189+
190+
func libctrCgroupManager(sbParent, containerCgroup string) (libctr.Manager, error) {
191+
cg := &cgcfgs.Cgroup{
192+
Name: containerCgroup,
193+
Parent: sbParent,
194+
Resources: &cgcfgs.Resources{
195+
SkipDevices: true,
196+
},
197+
}
198+
if node.CgroupIsV2() {
199+
return fs2.NewManager(cg, "", rootless.IsRootless())
200+
}
201+
return fs.NewManager(cg, nil, rootless.IsRootless()), nil
202+
}
203+
204+
func containerCgroupPath(id string) string {
205+
return crioPrefix + "-" + id
206+
}

internal/config/cgmgr/cgroupfs.go

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -180,18 +180,18 @@ func setWorkloadSettings(cgPath string, resources *rspec.LinuxResources) error {
180180
return mgr.Set(cg.Resources)
181181
}
182182

183-
// createSandboxCgroup takes the sandbox parent, and sandbox ID.
184-
// It creates a new cgroup for that sandbox, which is useful when spoofing an infra container.
185-
func createSandboxCgroup(sbParent, containerID string, mgr CgroupManager) error {
186-
cgroupAbsolutePath, err := mgr.ContainerCgroupAbsolutePath(sbParent, containerID)
187-
if err != nil {
188-
return err
189-
}
190-
_, err = cgroups.New(cgroupAbsolutePath, &rspec.LinuxResources{})
191-
return err
192-
}
193-
194183
// CreateSandboxCgroup calls the helper function createSandboxCgroup for this manager.
195184
func (m *CgroupfsManager) CreateSandboxCgroup(sbParent, containerID string) error {
196-
return createSandboxCgroup(sbParent, containerID, m)
185+
// prepend "/" to sbParent so the fs driver interprets it as an absolute path
186+
// and the cgroup isn't created as a relative path to the cgroups of the CRI-O process.
187+
// https://github.com/opencontainers/runc/blob/fd5debf3aa/libcontainer/cgroups/fs/paths.go#L156
188+
return createSandboxCgroup(filepath.Join("/", sbParent), containerCgroupPath(containerID))
189+
}
190+
191+
// RemoveSandboxCgroup calls the helper function removeSandboxCgroup for this manager.
192+
func (m *CgroupfsManager) RemoveSandboxCgroup(sbParent, containerID string) error {
193+
// prepend "/" to sbParent so the fs driver interprets it as an absolute path
194+
// and the cgroup isn't created as a relative path to the cgroups of the CRI-O process.
195+
// https://github.com/opencontainers/runc/blob/fd5debf3aa/libcontainer/cgroups/fs/paths.go#L156
196+
return removeSandboxCgroup(filepath.Join("/", sbParent), containerCgroupPath(containerID))
197197
}

internal/config/cgmgr/systemd.go

Lines changed: 33 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ func (m *SystemdManager) ContainerCgroupAbsolutePath(sbParent, containerID strin
8989
return "", errors.Wrapf(err, "error expanding systemd slice to get container %s stats", containerID)
9090
}
9191

92-
return filepath.Join(cgroup, crioPrefix+"-"+containerID+".scope"), nil
92+
return filepath.Join(cgroup, containerCgroupPath(containerID)+".scope"), nil
9393
}
9494

9595
// MoveConmonToCgroup takes the container ID, cgroup parent, conmon's cgroup (from the config) and conmon's PID
@@ -200,8 +200,37 @@ func convertCgroupFsNameToSystemd(cgroupfsName string) string {
200200
}
201201

202202
// CreateSandboxCgroup calls the helper function createSandboxCgroup for this manager.
203+
// Note: createSandboxCgroup will create a cgroupfs cgroup for the infra container underneath the pod slice.
204+
// It will not use dbus to create this cgroup, but instead call libcontainer's cgroupfs manager directly.
205+
// This is because a scope created here will not have a process within it (as it's usually for a dropped infra container),
206+
// and a slice cannot have the required `crio` prefix (while still being within the pod slice).
207+
// Ultimately, this cgroup is required for cAdvisor to be able to register the pod and collect network metrics for it.
208+
// This work will not be relevant when CRI-O is responsible for gathering pod metrics (KEP-2371), but is required until that's done.
203209
func (m *SystemdManager) CreateSandboxCgroup(sbParent, containerID string) error {
204-
// If we are running systemd as cgroup driver then we would rely on
205-
// systemd to create cgroups for us, there's nothing to do here in this case
206-
return nil
210+
// sbParent should always be specified by kubelet, but sometimes not by critest/crictl.
211+
// Skip creation in this case.
212+
if sbParent == "" {
213+
logrus.Infof("Not creating sandbox cgroup: sbParent is empty")
214+
return nil
215+
}
216+
expandedParent, err := systemd.ExpandSlice(sbParent)
217+
if err != nil {
218+
return err
219+
}
220+
return createSandboxCgroup(expandedParent, containerCgroupPath(containerID))
221+
}
222+
223+
// RemoveSandboxCgroup calls the helper function removeSandboxCgroup for this manager.
224+
func (m *SystemdManager) RemoveSandboxCgroup(sbParent, containerID string) error {
225+
// sbParent should always be specified by kubelet, but sometimes not by critest/crictl.
226+
// Skip creation in this case.
227+
if sbParent == "" {
228+
logrus.Infof("Not creating sandbox cgroup: sbParent is empty")
229+
return nil
230+
}
231+
expandedParent, err := systemd.ExpandSlice(sbParent)
232+
if err != nil {
233+
return err
234+
}
235+
return removeSandboxCgroup(expandedParent, containerCgroupPath(containerID))
207236
}

server/sandbox_remove.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,11 @@ func (s *Server) removePodSandbox(ctx context.Context, sb *sandbox.Sandbox) erro
5252
if err := s.removeContainerInPod(ctx, sb, sb.InfraContainer()); err != nil {
5353
return err
5454
}
55+
if sb.InfraContainer().Spoofed() {
56+
if err := s.config.CgroupManager().RemoveSandboxCgroup(sb.CgroupParent(), sb.ID()); err != nil {
57+
return err
58+
}
59+
}
5560

5661
// Cleanup network resources for this pod
5762
if err := s.networkStop(ctx, sb); err != nil {

test/cgroups.bats

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,28 @@ function teardown() {
3838
[[ "$output" == *"customcrioconmon.slice"* ]]
3939
}
4040

41+
@test "conmon custom cgroup with no infra container" {
42+
parent="Burstablecriotest123"
43+
if [ "$CONTAINER_CGROUP_MANAGER" == "systemd" ]; then
44+
parent="$parent".slice
45+
fi
46+
cgroup_base="/sys/fs/cgroup"
47+
if ! is_cgroup_v2; then
48+
cgroup_base="$cgroup_base"/memory
49+
fi
50+
51+
CONTAINER_DROP_INFRA_CTR=true start_crio
52+
53+
jq --arg cg "$parent" ' .linux.cgroup_parent = $cg' \
54+
"$TESTDATA"/sandbox_config.json > "$TESTDIR"/sandbox_config_slice.json
55+
56+
pod_id=$(crictl runp "$TESTDIR"/sandbox_config_slice.json)
57+
ls "$cgroup_base"/"$parent"/crio-"$pod_id"*
58+
59+
crictl rmp -fa
60+
! ls "$cgroup_base"/"$parent"/crio-"$pod_id"*
61+
}
62+
4163
@test "ctr with swap should be configured" {
4264
if ! grep -v Filename < /proc/swaps; then
4365
skip "swap not enabled"

test/pod.bats

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -278,22 +278,17 @@ function teardown() {
278278
}
279279

280280
@test "kubernetes pod terminationGracePeriod passthru" {
281-
[ -v CIRCLECI ] && skip "runc v1.0.0-rc11 required" # TODO remove this
282-
# Make sure there is no XDG_RUNTIME_DIR set, otherwise the test might end up using the user instance.
283281
# There is an assumption in the test to use the system instance of systemd (systemctl show).
284-
CONTAINER_CGROUP_MANAGER="systemd" DBUS_SESSION_BUS_ADDRESS="" XDG_RUNTIME_DIR="" start_crio
285-
286-
# for systemd, cgroup_parent should not be set
287-
jq ' del(.linux.cgroup_parent)' \
288-
"$TESTDATA"/sandbox_config.json > "$TESTDIR"/sandbox.json
282+
if [[ "$CONTAINER_CGROUP_MANAGER" != "systemd" ]]; then
283+
skip "need systemd cgroup manager"
284+
fi
285+
# Make sure there is no XDG_RUNTIME_DIR set, otherwise the test might end up using the user instance.
286+
DBUS_SESSION_BUS_ADDRESS="" XDG_RUNTIME_DIR="" start_crio
289287

290288
jq ' .annotations += { "io.kubernetes.pod.terminationGracePeriod": "88" }' \
291289
"$TESTDATA"/container_sleep.json > "$TESTDIR"/ctr.json
292290

293-
pod_id=$(crictl runp "$TESTDIR"/sandbox.json)
294-
ctr_id=$(crictl create "$pod_id" "$TESTDIR"/ctr.json "$TESTDIR"/sandbox.json)
295-
296-
crictl start "$ctr_id"
291+
ctr_id=$(crictl run "$TESTDIR"/ctr.json "$TESTDATA"/sandbox_config.json)
297292

298293
output=$(systemctl show "crio-${ctr_id}.scope")
299294
echo "$output" | grep 'TimeoutStopUSec=' || true # show

0 commit comments

Comments
 (0)