Skip to content

Commit b6af3c8

Browse files
Merge pull request #9310 from openshift-cherrypick-robot/cherry-pick-9291-to-release-1.30
OCPBUGS-58286: [release-1.30] : sandbox: use created/stopped instead of infra container for readiness
2 parents 0a197ae + 841de9a commit b6af3c8

File tree

7 files changed

+57
-33
lines changed

7 files changed

+57
-33
lines changed

internal/lib/container_server.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -358,6 +358,11 @@ func (c *ContainerServer) LoadSandbox(ctx context.Context, id string) (sb *sandb
358358
}
359359

360360
sb.SetCreated()
361+
362+
if scontainer.State().Status == oci.ContainerStateStopped {
363+
sb.SetStopped(ctx, true)
364+
}
365+
361366
if err := label.ReserveLabel(processLabel); err != nil {
362367
return sb, err
363368
}

internal/lib/sandbox/sandbox.go

Lines changed: 40 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -56,15 +56,18 @@ type Sandbox struct {
5656
nsOpts *types.NamespaceOption
5757
dnsConfig *types.DNSConfig
5858
stopMutex sync.RWMutex
59-
created bool
60-
stopped bool
61-
networkStopped bool
62-
privileged bool
63-
hostNetwork bool
64-
usernsMode string
65-
containerEnvPath string
66-
podLinuxOverhead *types.LinuxContainerResources
67-
podLinuxResources *types.LinuxContainerResources
59+
// stateMutex protects the use of created, stopped and networkStopped bools
60+
// which are all fields that can change at runtime
61+
stateMutex sync.RWMutex
62+
created bool
63+
stopped bool
64+
networkStopped bool
65+
privileged bool
66+
hostNetwork bool
67+
usernsMode string
68+
containerEnvPath string
69+
podLinuxOverhead *types.LinuxContainerResources
70+
podLinuxResources *types.LinuxContainerResources
6871
}
6972

7073
// DefaultShmSize is the default shm size
@@ -352,6 +355,10 @@ func (s *Sandbox) RemoveInfraContainer() {
352355
func (s *Sandbox) SetStopped(ctx context.Context, createFile bool) {
353356
ctx, span := log.StartSpan(ctx)
354357
defer span.End()
358+
359+
s.stateMutex.Lock()
360+
defer s.stateMutex.Unlock()
361+
355362
if s.stopped {
356363
return
357364
}
@@ -366,16 +373,24 @@ func (s *Sandbox) SetStopped(ctx context.Context, createFile bool) {
366373
// Stopped returns whether the sandbox state has been
367374
// set to stopped.
368375
func (s *Sandbox) Stopped() bool {
376+
s.stateMutex.RLock()
377+
defer s.stateMutex.RUnlock()
378+
369379
return s.stopped
370380
}
371381

372382
// SetCreated sets the created status of sandbox to true
373383
func (s *Sandbox) SetCreated() {
384+
s.stateMutex.Lock()
385+
defer s.stateMutex.Unlock()
374386
s.created = true
375387
}
376388

377389
// NetworkStopped returns whether the network has been stopped
378390
func (s *Sandbox) NetworkStopped() bool {
391+
s.stateMutex.RLock()
392+
defer s.stateMutex.RUnlock()
393+
379394
return s.networkStopped
380395
}
381396

@@ -389,6 +404,10 @@ func (s *Sandbox) NetworkStopped() bool {
389404
func (s *Sandbox) SetNetworkStopped(ctx context.Context, createFile bool) error {
390405
ctx, span := log.StartSpan(ctx)
391406
defer span.End()
407+
408+
s.stateMutex.Lock()
409+
defer s.stateMutex.Unlock()
410+
392411
if s.networkStopped {
393412
return nil
394413
}
@@ -420,6 +439,7 @@ func (s *Sandbox) SetContainerEnvFile(ctx context.Context) error {
420439
return nil
421440
}
422441

442+
// This function assumes the state lock has been taken for this sandbox.
423443
func (s *Sandbox) createFileInInfraDir(ctx context.Context, filename string) error {
424444
// If the sandbox is not yet created,
425445
// this function is being called when
@@ -445,6 +465,9 @@ func (s *Sandbox) createFileInInfraDir(ctx context.Context, filename string) err
445465
}
446466

447467
func (s *Sandbox) RestoreStopped() {
468+
s.stateMutex.Lock()
469+
defer s.stateMutex.Unlock()
470+
448471
if s.fileExistsInInfraDir(sbStoppedFilename) {
449472
s.stopped = true
450473
}
@@ -467,11 +490,14 @@ func (s *Sandbox) fileExistsInInfraDir(filename string) bool {
467490

468491
// Created returns the created status of sandbox
469492
func (s *Sandbox) Created() bool {
493+
s.stateMutex.RLock()
494+
defer s.stateMutex.RUnlock()
495+
470496
return s.created
471497
}
472498

473499
func (s *Sandbox) State() types.PodSandboxState {
474-
if s.Ready(false) {
500+
if s.Ready() {
475501
return types.PodSandboxState_SANDBOX_READY
476502
}
477503
return types.PodSandboxState_SANDBOX_NOTREADY
@@ -482,22 +508,9 @@ func (s *Sandbox) State() types.PodSandboxState {
482508
// `takeLock` should be set if we need to take the lock to get the infra container's state.
483509
// If there is no infra container, it is never considered ready.
484510
// If the infra container is spoofed, the pod is considered ready when it has been created, but not stopped.
485-
func (s *Sandbox) Ready(takeLock bool) bool {
486-
podInfraContainer := s.InfraContainer()
487-
if podInfraContainer == nil {
488-
return false
489-
}
490-
if podInfraContainer.Spoofed() {
491-
return s.created && !s.stopped
492-
}
493-
// Assume the sandbox is ready, unless it has an infra container that
494-
// isn't running
495-
var cState *oci.ContainerState
496-
if takeLock {
497-
cState = podInfraContainer.State()
498-
} else {
499-
cState = podInfraContainer.StateNoLock()
500-
}
511+
func (s *Sandbox) Ready() bool {
512+
s.stateMutex.RLock()
513+
defer s.stateMutex.RUnlock()
501514

502-
return cState.Status == oci.ContainerStateRunning
515+
return s.created && !s.stopped
503516
}

internal/lib/sandbox/sandbox_test.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ var _ = t.Describe("Sandbox", func() {
114114

115115
// Then
116116
Expect(testSandbox.Stopped()).To(BeTrue())
117+
Expect(testSandbox.Ready()).To(BeFalse())
117118
})
118119
})
119120

@@ -159,6 +160,7 @@ var _ = t.Describe("Sandbox", func() {
159160

160161
// Then
161162
Expect(testSandbox.Created()).To(BeTrue())
163+
Expect(testSandbox.Ready()).To(BeTrue())
162164
})
163165
})
164166

server/container_portforward.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ func (s StreamService) PortForward(ctx context.Context, podSandboxID string, por
4949
return fmt.Errorf("could not find sandbox %s", podSandboxID)
5050
}
5151

52-
if !sb.Ready(true) {
52+
if !sb.Ready() {
5353
return fmt.Errorf("sandbox %s is not running", podSandboxID)
5454
}
5555

server/sandbox_list_test.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,8 +95,9 @@ var _ = t.Describe("ListPodSandbox", func() {
9595
// Given
9696
mockDirs(testManifest)
9797
createDummyState()
98-
_, err := sut.LoadSandbox(context.Background(), sandboxID)
98+
sb, err := sut.LoadSandbox(context.Background(), sandboxID)
9999
Expect(err).ToNot(HaveOccurred())
100+
sb.SetStopped(context.Background(), false)
100101

101102
// When
102103
response, err := sut.ListPodSandbox(context.Background(),

server/sandbox_status.go

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,7 @@ func (s *Server) PodSandboxStatus(ctx context.Context, req *types.PodSandboxStat
2323
return nil, status.Errorf(codes.NotFound, "could not find pod %q: %v", req.PodSandboxId, err)
2424
}
2525

26-
rStatus := types.PodSandboxState_SANDBOX_NOTREADY
27-
if sb.Ready(true) {
28-
rStatus = types.PodSandboxState_SANDBOX_READY
29-
}
26+
rStatus := sb.State()
3027

3128
var linux *types.LinuxPodSandboxStatus
3229
if sb.NamespaceOptions() != nil {

server/server.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -802,6 +802,12 @@ func (s *Server) handleExit(ctx context.Context, event fsnotify.Event) {
802802
}
803803
c = sb.InfraContainer()
804804
resource = "sandbox infra"
805+
// We discovered the infra container stopped (potentially unexpectedly).
806+
// Since sandboxes status is now being judged by the sb.stopped boolean,
807+
// rather than the infra container's status, we have to manually set stopped here.
808+
// It's likely we're doing double the work here, but that's better than missing it
809+
// if the infra container crashed.
810+
sb.SetStopped(ctx, true)
805811
} else {
806812
sb = s.GetSandbox(c.Sandbox())
807813
}

0 commit comments

Comments
 (0)