Skip to content

Commit abfadc8

Browse files
Merge pull request #9291 from openshift-cherrypick-robot/cherry-pick-9224-to-release-1.31
OCPBUGS-58201: [release-1.31] sandbox: use created/stopped instead of infra container for readiness
2 parents 0f69ddd + 0ae7583 commit abfadc8

File tree

7 files changed

+57
-33
lines changed

7 files changed

+57
-33
lines changed

internal/lib/container_server.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -353,6 +353,11 @@ func (c *ContainerServer) LoadSandbox(ctx context.Context, id string) (sb *sandb
353353
}
354354

355355
sb.SetCreated()
356+
357+
if scontainer.State().Status == oci.ContainerStateStopped {
358+
sb.SetStopped(ctx, true)
359+
}
360+
356361
if err := label.ReserveLabel(processLabel); err != nil {
357362
return sb, err
358363
}

internal/lib/sandbox/sandbox.go

Lines changed: 40 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -57,15 +57,18 @@ type Sandbox struct {
5757
nsOpts *types.NamespaceOption
5858
dnsConfig *types.DNSConfig
5959
stopMutex sync.RWMutex
60-
created bool
61-
stopped bool
62-
networkStopped bool
63-
privileged bool
64-
hostNetwork bool
65-
usernsMode string
66-
containerEnvPath string
67-
podLinuxOverhead *types.LinuxContainerResources
68-
podLinuxResources *types.LinuxContainerResources
60+
// stateMutex protects the use of created, stopped and networkStopped bools
61+
// which are all fields that can change at runtime
62+
stateMutex sync.RWMutex
63+
created bool
64+
stopped bool
65+
networkStopped bool
66+
privileged bool
67+
hostNetwork bool
68+
usernsMode string
69+
containerEnvPath string
70+
podLinuxOverhead *types.LinuxContainerResources
71+
podLinuxResources *types.LinuxContainerResources
6972
}
7073

7174
// DefaultShmSize is the default shm size.
@@ -353,6 +356,10 @@ func (s *Sandbox) RemoveInfraContainer() {
353356
func (s *Sandbox) SetStopped(ctx context.Context, createFile bool) {
354357
ctx, span := log.StartSpan(ctx)
355358
defer span.End()
359+
360+
s.stateMutex.Lock()
361+
defer s.stateMutex.Unlock()
362+
356363
if s.stopped {
357364
return
358365
}
@@ -367,16 +374,24 @@ func (s *Sandbox) SetStopped(ctx context.Context, createFile bool) {
367374
// Stopped returns whether the sandbox state has been
368375
// set to stopped.
369376
func (s *Sandbox) Stopped() bool {
377+
s.stateMutex.RLock()
378+
defer s.stateMutex.RUnlock()
379+
370380
return s.stopped
371381
}
372382

373383
// SetCreated sets the created status of sandbox to true.
374384
func (s *Sandbox) SetCreated() {
385+
s.stateMutex.Lock()
386+
defer s.stateMutex.Unlock()
375387
s.created = true
376388
}
377389

378390
// NetworkStopped returns whether the network has been stopped.
379391
func (s *Sandbox) NetworkStopped() bool {
392+
s.stateMutex.RLock()
393+
defer s.stateMutex.RUnlock()
394+
380395
return s.networkStopped
381396
}
382397

@@ -390,6 +405,10 @@ func (s *Sandbox) NetworkStopped() bool {
390405
func (s *Sandbox) SetNetworkStopped(ctx context.Context, createFile bool) error {
391406
ctx, span := log.StartSpan(ctx)
392407
defer span.End()
408+
409+
s.stateMutex.Lock()
410+
defer s.stateMutex.Unlock()
411+
393412
if s.networkStopped {
394413
return nil
395414
}
@@ -421,6 +440,7 @@ func (s *Sandbox) SetContainerEnvFile(ctx context.Context) error {
421440
return nil
422441
}
423442

443+
// This function assumes the state lock has been taken for this sandbox.
424444
func (s *Sandbox) createFileInInfraDir(ctx context.Context, filename string) error {
425445
// If the sandbox is not yet created,
426446
// this function is being called when
@@ -446,6 +466,9 @@ func (s *Sandbox) createFileInInfraDir(ctx context.Context, filename string) err
446466
}
447467

448468
func (s *Sandbox) RestoreStopped() {
469+
s.stateMutex.Lock()
470+
defer s.stateMutex.Unlock()
471+
449472
if s.fileExistsInInfraDir(sbStoppedFilename) {
450473
s.stopped = true
451474
}
@@ -468,11 +491,14 @@ func (s *Sandbox) fileExistsInInfraDir(filename string) bool {
468491

469492
// Created returns the created status of sandbox.
470493
func (s *Sandbox) Created() bool {
494+
s.stateMutex.RLock()
495+
defer s.stateMutex.RUnlock()
496+
471497
return s.created
472498
}
473499

474500
func (s *Sandbox) State() types.PodSandboxState {
475-
if s.Ready(false) {
501+
if s.Ready() {
476502
return types.PodSandboxState_SANDBOX_READY
477503
}
478504
return types.PodSandboxState_SANDBOX_NOTREADY
@@ -483,22 +509,9 @@ func (s *Sandbox) State() types.PodSandboxState {
483509
// `takeLock` should be set if we need to take the lock to get the infra container's state.
484510
// If there is no infra container, it is never considered ready.
485511
// If the infra container is spoofed, the pod is considered ready when it has been created, but not stopped.
486-
func (s *Sandbox) Ready(takeLock bool) bool {
487-
podInfraContainer := s.InfraContainer()
488-
if podInfraContainer == nil {
489-
return false
490-
}
491-
if podInfraContainer.Spoofed() {
492-
return s.created && !s.stopped
493-
}
494-
// Assume the sandbox is ready, unless it has an infra container that
495-
// isn't running
496-
var cState *oci.ContainerState
497-
if takeLock {
498-
cState = podInfraContainer.State()
499-
} else {
500-
cState = podInfraContainer.StateNoLock()
501-
}
512+
func (s *Sandbox) Ready() bool {
513+
s.stateMutex.RLock()
514+
defer s.stateMutex.RUnlock()
502515

503-
return cState.Status == oci.ContainerStateRunning
516+
return s.created && !s.stopped
504517
}

internal/lib/sandbox/sandbox_test.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ var _ = t.Describe("Sandbox", func() {
115115

116116
// Then
117117
Expect(testSandbox.Stopped()).To(BeTrue())
118+
Expect(testSandbox.Ready()).To(BeFalse())
118119
})
119120
})
120121

@@ -160,6 +161,7 @@ var _ = t.Describe("Sandbox", func() {
160161

161162
// Then
162163
Expect(testSandbox.Created()).To(BeTrue())
164+
Expect(testSandbox.Ready()).To(BeTrue())
163165
})
164166
})
165167

server/container_portforward.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ func (s StreamService) PortForward(ctx context.Context, podSandboxID string, por
5050
return fmt.Errorf("could not find sandbox %s", podSandboxID)
5151
}
5252

53-
if !sb.Ready(true) {
53+
if !sb.Ready() {
5454
return fmt.Errorf("sandbox %s is not running", podSandboxID)
5555
}
5656

server/sandbox_list_test.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,8 +96,9 @@ var _ = t.Describe("ListPodSandbox", func() {
9696
// Given
9797
mockDirs(testManifest)
9898
createDummyState()
99-
_, err := sut.LoadSandbox(context.Background(), sandboxID)
99+
sb, err := sut.LoadSandbox(context.Background(), sandboxID)
100100
Expect(err).ToNot(HaveOccurred())
101+
sb.SetStopped(context.Background(), false)
101102

102103
// When
103104
response, err := sut.ListPodSandbox(context.Background(),

server/sandbox_status.go

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,7 @@ func (s *Server) PodSandboxStatus(ctx context.Context, req *types.PodSandboxStat
2424
return nil, status.Errorf(codes.NotFound, "could not find pod %q: %v", req.PodSandboxId, err)
2525
}
2626

27-
rStatus := types.PodSandboxState_SANDBOX_NOTREADY
28-
if sb.Ready(true) {
29-
rStatus = types.PodSandboxState_SANDBOX_READY
30-
}
27+
rStatus := sb.State()
3128

3229
var linux *types.LinuxPodSandboxStatus
3330
if sb.NamespaceOptions() != nil {

server/server.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -808,6 +808,12 @@ func (s *Server) handleExit(ctx context.Context, event fsnotify.Event) {
808808
}
809809
c = sb.InfraContainer()
810810
resource = "sandbox infra"
811+
// We discovered the infra container stopped (potentially unexpectedly).
812+
// Since sandboxes status is now being judged by the sb.stopped boolean,
813+
// rather than the infra container's status, we have to manually set stopped here.
814+
// It's likely we're doing double the work here, but that's better than missing it
815+
// if the infra container crashed.
816+
sb.SetStopped(ctx, true)
811817
} else {
812818
sb = s.GetSandbox(c.Sandbox())
813819
}

0 commit comments

Comments
 (0)