@@ -84,6 +84,17 @@ type CgroupManager interface {
8484 // It creates a new cgroup for that sandbox if it does not already exist.
8585 // It returns the cgroup stats for that sandbox.
8686 SandboxCgroupStats (sbParent , sbID string ) (* CgroupStats , error )
87+ // ExecCgroupManager returns the cgroup manager for the exec cgroup used to place exec processes.
88+ // The cgroupPath parameter is the container's cgroup path from spec.Linux.CgroupsPath.
89+ // This is only supported on cgroup v2.
90+ ExecCgroupManager (cgroupPath string ) (cgroups.Manager , error )
91+ // PodAndContainerCgroupManagers returns the libcontainer cgroup managers for both the pod and container cgroups.
92+ // The sbParent is the sandbox parent cgroup, and containerID is the container's ID.
93+ // It returns:
94+ // - podManager: the cgroup manager for the pod cgroup
95+ // - containerManagers: a slice of cgroup managers for the container cgroup(s).
96+ // This may include an extra manager if crun creates a sub-cgroup of the container.
97+ PodAndContainerCgroupManagers (sbParent , containerID string ) (podManager cgroups.Manager , containerManagers []cgroups.Manager , err error )
8798}
8899
89100// New creates a new CgroupManager with defaults.
@@ -245,3 +256,77 @@ func removeSandboxCgroup(sbParent, containerCgroup string) error {
245256func containerCgroupPath (id string ) string {
246257 return CrioPrefix + "-" + id
247258}
259+
260+ // LibctrManager creates a libcontainer cgroup manager for the given cgroup.
261+ // The cgroup parameter is the name of the cgroup, parent is the parent path,
262+ // and systemd indicates whether to use systemd cgroup driver.
263+ func LibctrManager (cgroup , parent string , systemd bool ) (cgroups.Manager , error ) {
264+ if systemd {
265+ parent = filepath .Base (parent )
266+ if parent == "." {
267+ // libcontainer shorthand for root
268+ // see https://github.com/opencontainers/runc/blob/9fffadae8/libcontainer/cgroups/systemd/common.go#L71
269+ parent = "-.slice"
270+ }
271+ }
272+
273+ cg := & cgroups.Cgroup {
274+ Name : cgroup ,
275+ Parent : parent ,
276+ Resources : & cgroups.Resources {
277+ SkipDevices : true ,
278+ },
279+ Systemd : systemd ,
280+ // If the cgroup manager is systemd, then libcontainer
281+ // will construct the cgroup path (for scopes) as:
282+ // ScopePrefix-Name.scope. For slices, and for cgroupfs manager,
283+ // this will be ignored.
284+ // See: https://github.com/opencontainers/runc/tree/main/libcontainer/cgroups/systemd/common.go:getUnitName
285+ ScopePrefix : CrioPrefix ,
286+ }
287+
288+ return manager .New (cg )
289+ }
290+
291+ // crunContainerCgroupManager returns the cgroup manager for the actual container cgroup.
292+ // Some runtimes like crun create a sub-cgroup of the container to do the actual management,
293+ // to enforce systemd's single owner rule. This function checks for and handles that case.
294+ // If no sub-cgroup exists, it returns nil, nil.
295+ func crunContainerCgroupManager (expectedContainerCgroup string ) (cgroups.Manager , error ) {
296+ // HACK: There isn't really a better way to check if the actual container cgroup is in a child cgroup of the expected.
297+ // We could check /proc/$pid/cgroup, but we need to be able to query this after the container exits and the process is gone.
298+ // We know the source of this: crun creates a sub cgroup of the container to do the actual management, to enforce systemd's single
299+ // owner rule. Thus, we need to hardcode this check.
300+ actualContainerCgroup := filepath .Join (expectedContainerCgroup , "container" )
301+ // Choose cpuset as the cgroup to check, with little reason.
302+ cgroupRoot := CgroupMemoryPathV2
303+ if ! node .CgroupIsV2 () {
304+ cgroupRoot += "/cpuset"
305+ }
306+
307+ // Normalize the path so that we don't add duplicate prefix.
308+ cgroupPath := filepath .Join (cgroupRoot , strings .TrimPrefix (actualContainerCgroup , cgroupRoot ))
309+ if _ , err := os .Stat (cgroupPath ); err != nil {
310+ return nil , nil
311+ }
312+ // must be crun, make another LibctrManager. Regardless of cgroup driver, it will be treated as cgroupfs
313+ return LibctrManager (filepath .Base (actualContainerCgroup ), filepath .Dir (actualContainerCgroup ), false )
314+ }
315+
316+ // execCgroupManager creates an exec cgroup for placing exec processes.
317+ // containerCgroupAbsPath is the absolute path to the container's cgroup (without /sys/fs/cgroup prefix).
318+ // Returns the cgroup manager for the exec cgroup.
319+ //
320+ // The exec cgroup location depends on whether crun created a "container" child cgroup:
321+ // - If crun's "container" child exists: exec cgroup is created under it
322+ // - Otherwise: exec cgroup is created directly under the container cgroup
323+ func execCgroupManager (containerCgroupAbsPath string ) (cgroups.Manager , error ) {
324+ execCgroupParent := containerCgroupAbsPath
325+
326+ // Check if crun created a "container" child cgroup
327+ if mgr , err := crunContainerCgroupManager (containerCgroupAbsPath ); err == nil && mgr != nil {
328+ execCgroupParent = filepath .Join (containerCgroupAbsPath , "container" )
329+ }
330+
331+ return LibctrManager ("exec" , execCgroupParent , false )
332+ }
0 commit comments