Skip to content

Commit 6cc30d2

Browse files
Merge pull request #9328 from haircommander/living-readines-1.29
OCPBUGS-58833: [1.29] sandbox: use created/stopped instead of infra container for readiness
2 parents 9b9e68b + c38e5db commit 6cc30d2

File tree

6 files changed

+55
-32
lines changed

6 files changed

+55
-32
lines changed

internal/lib/container_server.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -354,6 +354,11 @@ func (c *ContainerServer) LoadSandbox(ctx context.Context, id string) (sb *sandb
354354
}
355355

356356
sb.SetCreated()
357+
358+
if scontainer.State().Status == oci.ContainerStateStopped {
359+
sb.SetStopped(ctx, true)
360+
}
361+
357362
if err := label.ReserveLabel(processLabel); err != nil {
358363
return sb, err
359364
}

internal/lib/sandbox/sandbox.go

Lines changed: 40 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -56,15 +56,18 @@ type Sandbox struct {
5656
nsOpts *types.NamespaceOption
5757
dnsConfig *types.DNSConfig
5858
stopMutex sync.RWMutex
59-
created bool
60-
stopped bool
61-
networkStopped bool
62-
privileged bool
63-
hostNetwork bool
64-
usernsMode string
65-
containerEnvPath string
66-
podLinuxOverhead *types.LinuxContainerResources
67-
podLinuxResources *types.LinuxContainerResources
59+
// stateMutex protects the use of created, stopped and networkStopped bools
60+
// which are all fields that can change at runtime
61+
stateMutex sync.RWMutex
62+
created bool
63+
stopped bool
64+
networkStopped bool
65+
privileged bool
66+
hostNetwork bool
67+
usernsMode string
68+
containerEnvPath string
69+
podLinuxOverhead *types.LinuxContainerResources
70+
podLinuxResources *types.LinuxContainerResources
6871
}
6972

7073
// DefaultShmSize is the default shm size
@@ -352,6 +355,10 @@ func (s *Sandbox) RemoveInfraContainer() {
352355
func (s *Sandbox) SetStopped(ctx context.Context, createFile bool) {
353356
ctx, span := log.StartSpan(ctx)
354357
defer span.End()
358+
359+
s.stateMutex.Lock()
360+
defer s.stateMutex.Unlock()
361+
355362
if s.stopped {
356363
return
357364
}
@@ -366,16 +373,24 @@ func (s *Sandbox) SetStopped(ctx context.Context, createFile bool) {
366373
// Stopped returns whether the sandbox state has been
367374
// set to stopped.
368375
func (s *Sandbox) Stopped() bool {
376+
s.stateMutex.RLock()
377+
defer s.stateMutex.RUnlock()
378+
369379
return s.stopped
370380
}
371381

372382
// SetCreated sets the created status of sandbox to true
373383
func (s *Sandbox) SetCreated() {
384+
s.stateMutex.Lock()
385+
defer s.stateMutex.Unlock()
374386
s.created = true
375387
}
376388

377389
// NetworkStopped returns whether the network has been stopped
378390
func (s *Sandbox) NetworkStopped() bool {
391+
s.stateMutex.RLock()
392+
defer s.stateMutex.RUnlock()
393+
379394
return s.networkStopped
380395
}
381396

@@ -389,6 +404,10 @@ func (s *Sandbox) NetworkStopped() bool {
389404
func (s *Sandbox) SetNetworkStopped(ctx context.Context, createFile bool) error {
390405
ctx, span := log.StartSpan(ctx)
391406
defer span.End()
407+
408+
s.stateMutex.Lock()
409+
defer s.stateMutex.Unlock()
410+
392411
if s.networkStopped {
393412
return nil
394413
}
@@ -420,6 +439,7 @@ func (s *Sandbox) SetContainerEnvFile(ctx context.Context) error {
420439
return nil
421440
}
422441

442+
// This function assumes the state lock has been taken for this sandbox.
423443
func (s *Sandbox) createFileInInfraDir(ctx context.Context, filename string) error {
424444
// If the sandbox is not yet created,
425445
// this function is being called when
@@ -445,6 +465,9 @@ func (s *Sandbox) createFileInInfraDir(ctx context.Context, filename string) err
445465
}
446466

447467
func (s *Sandbox) RestoreStopped() {
468+
s.stateMutex.Lock()
469+
defer s.stateMutex.Unlock()
470+
448471
if s.fileExistsInInfraDir(sbStoppedFilename) {
449472
s.stopped = true
450473
}
@@ -467,11 +490,14 @@ func (s *Sandbox) fileExistsInInfraDir(filename string) bool {
467490

468491
// Created returns the created status of sandbox
469492
func (s *Sandbox) Created() bool {
493+
s.stateMutex.RLock()
494+
defer s.stateMutex.RUnlock()
495+
470496
return s.created
471497
}
472498

473499
func (s *Sandbox) State() types.PodSandboxState {
474-
if s.Ready(false) {
500+
if s.Ready() {
475501
return types.PodSandboxState_SANDBOX_READY
476502
}
477503
return types.PodSandboxState_SANDBOX_NOTREADY
@@ -482,24 +508,11 @@ func (s *Sandbox) State() types.PodSandboxState {
482508
// `takeLock` should be set if we need to take the lock to get the infra container's state.
483509
// If there is no infra container, it is never considered ready.
484510
// If the infra container is spoofed, the pod is considered ready when it has been created, but not stopped.
485-
func (s *Sandbox) Ready(takeLock bool) bool {
486-
podInfraContainer := s.InfraContainer()
487-
if podInfraContainer == nil {
488-
return false
489-
}
490-
if podInfraContainer.Spoofed() {
491-
return s.created && !s.stopped
492-
}
493-
// Assume the sandbox is ready, unless it has an infra container that
494-
// isn't running
495-
var cState *oci.ContainerState
496-
if takeLock {
497-
cState = podInfraContainer.State()
498-
} else {
499-
cState = podInfraContainer.StateNoLock()
500-
}
511+
func (s *Sandbox) Ready() bool {
512+
s.stateMutex.RLock()
513+
defer s.stateMutex.RUnlock()
501514

502-
return cState.Status == oci.ContainerStateRunning
515+
return s.created && !s.stopped
503516
}
504517

505518
// NeedsInfra is a function that returns whether the sandbox will need an infra container.

internal/lib/sandbox/sandbox_test.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ var _ = t.Describe("Sandbox", func() {
114114

115115
// Then
116116
Expect(testSandbox.Stopped()).To(BeTrue())
117+
Expect(testSandbox.Ready()).To(BeFalse())
117118
})
118119
})
119120

@@ -159,6 +160,7 @@ var _ = t.Describe("Sandbox", func() {
159160

160161
// Then
161162
Expect(testSandbox.Created()).To(BeTrue())
163+
Expect(testSandbox.Ready()).To(BeTrue())
162164
})
163165
})
164166

server/container_portforward.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ func (s StreamService) PortForward(ctx context.Context, podSandboxID string, por
4848
return fmt.Errorf("could not find sandbox %s", podSandboxID)
4949
}
5050

51-
if !sb.Ready(true) {
51+
if !sb.Ready() {
5252
return fmt.Errorf("sandbox %s is not running", podSandboxID)
5353
}
5454

server/sandbox_status.go

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,7 @@ func (s *Server) PodSandboxStatus(ctx context.Context, req *types.PodSandboxStat
2323
return nil, status.Errorf(codes.NotFound, "could not find pod %q: %v", req.PodSandboxId, err)
2424
}
2525

26-
rStatus := types.PodSandboxState_SANDBOX_NOTREADY
27-
if sb.Ready(true) {
28-
rStatus = types.PodSandboxState_SANDBOX_READY
29-
}
26+
rStatus := sb.State()
3027

3128
var linux *types.LinuxPodSandboxStatus
3229
if sb.NamespaceOptions() != nil {

server/server.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -817,6 +817,12 @@ func (s *Server) handleExit(ctx context.Context, event fsnotify.Event) {
817817
}
818818
c = sb.InfraContainer()
819819
resource = "sandbox infra"
820+
// We discovered the infra container stopped (potentially unexpectedly).
821+
// Since sandboxes status is now being judged by the sb.stopped boolean,
822+
// rather than the infra container's status, we have to manually set stopped here.
823+
// It's likely we're doing double the work here, but that's better than missing it
824+
// if the infra container crashed.
825+
sb.SetStopped(ctx, true)
820826
} else {
821827
sb = s.GetSandbox(c.Sandbox())
822828
}

0 commit comments

Comments
 (0)