Skip to content

Commit bfa7401

Browse files
committed
sandbox: use created/stopped instead of infra container for readiness
As we hit cases of deadlock when a sandbox is stuck stopping Signed-off-by: Peter Hunt <[email protected]>
1 parent 592e805 commit bfa7401

File tree

4 files changed

+32
-34
lines changed

4 files changed

+32
-34
lines changed

internal/lib/sandbox/sandbox.go

Lines changed: 28 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -59,15 +59,18 @@ type Sandbox struct {
5959
nsOpts *types.NamespaceOption
6060
dnsConfig *types.DNSConfig
6161
stopMutex sync.RWMutex
62-
created bool
63-
stopped bool
64-
networkStopped bool
65-
privileged bool
66-
hostNetwork bool
67-
usernsMode string
68-
containerEnvPath string
69-
podLinuxOverhead *types.LinuxContainerResources
70-
podLinuxResources *types.LinuxContainerResources
62+
// stateMutex protects the use of created, stopped and networkStopped bools
63+
// which are all fields that can change at runtime
64+
stateMutex sync.RWMutex
65+
created bool
66+
stopped bool
67+
networkStopped bool
68+
privileged bool
69+
hostNetwork bool
70+
usernsMode string
71+
containerEnvPath string
72+
podLinuxOverhead *types.LinuxContainerResources
73+
podLinuxResources *types.LinuxContainerResources
7174
}
7275

7376
// DefaultShmSize is the default shm size.
@@ -329,6 +332,8 @@ func (s *Sandbox) SetStopped(ctx context.Context, createFile bool) {
329332
ctx, span := log.StartSpan(ctx)
330333
defer span.End()
331334

335+
s.stateMutex.Lock()
336+
defer s.stateMutex.Unlock()
332337
if s.stopped {
333338
return
334339
}
@@ -349,11 +354,15 @@ func (s *Sandbox) Stopped() bool {
349354

350355
// SetCreated sets the created status of sandbox to true.
351356
func (s *Sandbox) SetCreated() {
357+
s.stateMutex.Lock()
358+
defer s.stateMutex.Unlock()
352359
s.created = true
353360
}
354361

355362
// NetworkStopped returns whether the network has been stopped.
356363
func (s *Sandbox) NetworkStopped() bool {
364+
s.stateMutex.Lock()
365+
defer s.stateMutex.Unlock()
357366
return s.networkStopped
358367
}
359368

@@ -368,6 +377,8 @@ func (s *Sandbox) SetNetworkStopped(ctx context.Context, createFile bool) error
368377
ctx, span := log.StartSpan(ctx)
369378
defer span.End()
370379

380+
s.stateMutex.Lock()
381+
defer s.stateMutex.Unlock()
371382
if s.networkStopped {
372383
return nil
373384
}
@@ -404,6 +415,7 @@ func (s *Sandbox) SetContainerEnvFile(ctx context.Context) error {
404415
return nil
405416
}
406417

418+
// This function assumes the state lock has been taken for this sandbox
407419
func (s *Sandbox) createFileInInfraDir(ctx context.Context, filename string) error {
408420
// If the sandbox is not yet created,
409421
// this function is being called when
@@ -459,11 +471,13 @@ func (s *Sandbox) fileExistsInInfraDir(filename string) bool {
459471

460472
// Created returns the created status of sandbox.
461473
func (s *Sandbox) Created() bool {
474+
s.stateMutex.Lock()
475+
defer s.stateMutex.Unlock()
462476
return s.created
463477
}
464478

465479
func (s *Sandbox) State() types.PodSandboxState {
466-
if s.Ready(false) {
480+
if s.Ready() {
467481
return types.PodSandboxState_SANDBOX_READY
468482
}
469483

@@ -475,23 +489,8 @@ func (s *Sandbox) State() types.PodSandboxState {
475489
// `takeLock` should be set if we need to take the lock to get the infra container's state.
476490
// If there is no infra container, it is never considered ready.
477491
// If the infra container is spoofed, the pod is considered ready when it has been created, but not stopped.
478-
func (s *Sandbox) Ready(takeLock bool) bool {
479-
podInfraContainer := s.InfraContainer()
480-
if podInfraContainer == nil {
481-
return false
482-
}
483-
484-
if podInfraContainer.Spoofed() {
485-
return s.created && !s.stopped
486-
}
487-
// Assume the sandbox is ready, unless it has an infra container that
488-
// isn't running
489-
var cState *oci.ContainerState
490-
if takeLock {
491-
cState = podInfraContainer.State()
492-
} else {
493-
cState = podInfraContainer.StateNoLock()
494-
}
495-
496-
return cState.Status == oci.ContainerStateRunning
492+
func (s *Sandbox) Ready() bool {
493+
s.stateMutex.Lock()
494+
defer s.stateMutex.Unlock()
495+
return s.created && !s.stopped
497496
}

internal/lib/sandbox/sandbox_test.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,7 @@ var _ = t.Describe("Sandbox", func() {
139139

140140
// Then
141141
Expect(testSandbox.Stopped()).To(BeTrue())
142+
Expect(testSandbox.Ready()).To(BeFalse())
142143
})
143144
})
144145

@@ -184,6 +185,7 @@ var _ = t.Describe("Sandbox", func() {
184185

185186
// Then
186187
Expect(testSandbox.Created()).To(BeTrue())
188+
Expect(testSandbox.Ready()).To(BeTrue())
187189
})
188190
})
189191

server/container_portforward.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ func (s *StreamService) PortForward(ctx context.Context, podSandboxID string, po
5151
return fmt.Errorf("could not find sandbox %s", podSandboxID)
5252
}
5353

54-
if !sb.Ready(true) {
54+
if !sb.Ready() {
5555
return fmt.Errorf("sandbox %s is not running", podSandboxID)
5656
}
5757

server/sandbox_status.go

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,7 @@ func (s *Server) PodSandboxStatus(ctx context.Context, req *types.PodSandboxStat
2525
return nil, status.Errorf(codes.NotFound, "could not find pod %q: %v", req.PodSandboxId, err)
2626
}
2727

28-
rStatus := types.PodSandboxState_SANDBOX_NOTREADY
29-
if sb.Ready(true) {
30-
rStatus = types.PodSandboxState_SANDBOX_READY
31-
}
28+
rStatus := sb.State()
3229

3330
var linux *types.LinuxPodSandboxStatus
3431
if sb.NamespaceOptions() != nil {

0 commit comments

Comments
 (0)