diff --git a/server/sandbox_network.go b/server/sandbox_network.go index 2f81724ff1e..c2c75922c3c 100644 --- a/server/sandbox_network.go +++ b/server/sandbox_network.go @@ -195,7 +195,16 @@ func (s *Server) networkStop(ctx context.Context, sb *sandbox.Sandbox) error { // to prevent IP leaks, but we'll mark the network as stopped regardless of the outcome. netnsValid := true - if podNetwork.NetNS != "" { + if podNetwork.NetNS == "" { + // Network namespace path is unexpectedly empty. This can happen when: + // 1) infra container process died + // 2) namespace not properly initialized + // 3) namespace was already cleaned up + log.Warnf(ctx, "Network namespace path is empty for pod sandbox %s(%s), attempting CNI teardown with cached info", + sb.Name(), sb.ID()) + + netnsValid = false + } else { if _, statErr := os.Stat(podNetwork.NetNS); statErr != nil { // Network namespace file doesn't exist, but we should still attempt CNI teardown log.Debugf(ctx, "Network namespace file %s does not exist for pod sandbox %s(%s), attempting CNI teardown with cached info", @@ -214,6 +223,16 @@ func (s *Server) networkStop(ctx context.Context, sb *sandbox.Sandbox) error { // Always attempt CNI teardown to prevent IP leaks, even if netns is invalid. if err := s.config.CNIPlugin().TearDownPodWithContext(stopCtx, podNetwork); err != nil { + if !netnsValid { + // This is expected when the network namespace is missing/invalid. + log.Debugf(ctx, "CNI teardown failed due to missing/invalid network namespace for pod sandbox %s(%s): %v", sb.Name(), sb.ID(), err) + + // Clean up CNI result files even when NetNS is invalid. + s.cleanupCNIResultFiles(ctx, sb.ID()) + + return sb.SetNetworkStopped(ctx, true) + } + log.Warnf(ctx, "Failed to destroy network for pod sandbox %s(%s): %v", sb.Name(), sb.ID(), err) // If the network namespace exists but CNI teardown failed, try to clean it up. diff --git a/server/sandbox_stop_test.go b/server/sandbox_stop_test.go index f7267e4b905..06e25fc7507 100644 --- a/server/sandbox_stop_test.go +++ b/server/sandbox_stop_test.go @@ -5,7 +5,6 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" - "go.uber.org/mock/gomock" types "k8s.io/cri-api/pkg/apis/runtime/v1" ) @@ -25,7 +24,7 @@ var _ = t.Describe("PodSandboxStatus", func() { // Given addContainerAndSandbox() testSandbox.SetStopped(ctx, false) - Expect(testSandbox.SetNetworkStopped(ctx, false)).To(Succeed()) + Expect(testSandbox.SetNetworkStopped(ctx, true)).To(Succeed()) // When _, err := sut.StopPodSandbox(context.Background(), @@ -45,22 +44,6 @@ var _ = t.Describe("PodSandboxStatus", func() { Expect(err).ToNot(HaveOccurred()) }) - It("should fail when container is not stopped", func() { - // Given - addContainerAndSandbox() - gomock.InOrder( - cniPluginMock.EXPECT().GetDefaultNetworkName().Return(""), - cniPluginMock.EXPECT().TearDownPodWithContext(gomock.Any(), gomock.Any()).Return(t.TestError), - ) - - // When - _, err := sut.StopPodSandbox(context.Background(), - &types.StopPodSandboxRequest{PodSandboxId: testSandbox.ID()}) - - // Then - Expect(err).To(HaveOccurred()) - }) - It("should fail with empty sandbox ID", func() { // Given // When diff --git a/test/network.bats b/test/network.bats index 6c4348e0f47..af6def8b08c 100644 --- a/test/network.bats +++ b/test/network.bats @@ -253,3 +253,31 @@ function check_networking() { # Verify the pod is gone. run ! crictl inspectp "$pod_id" } + +@test "pod deletion succeeds when NetNS path is missing" { + start_crio + + pod_id=$(crictl runp "$TESTDATA"/sandbox_config.json) + ctr_id=$(crictl create "$pod_id" "$TESTDATA"/container_redis.json "$TESTDATA"/sandbox_config.json) + crictl start "$ctr_id" + + crictl ps --id "$ctr_id" | grep Running + + netns_path=$(crictl inspectp "$pod_id" | jq -r '.status.network.namespace_path') + + # Simulating the scenario where infra container dies and NetNS becomes invalid + # by removing the network namespace file. + if [[ -n "$netns_path" && -f "$netns_path" ]]; then + rm -f "$netns_path" + fi + + crictl stop "$ctr_id" + crictl rm "$ctr_id" + + crictl stopp "$pod_id" + + # Pod deletion should succeed even with missing NetNS + crictl rmp "$pod_id" + + run ! crictl inspectp "$pod_id" +}