Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 48 additions & 10 deletions server/sandbox_network.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ import (
"fmt"
"math"
"os"
"path/filepath"
"strings"
"time"

cnitypes "github.com/containernetworking/cni/pkg/types"
Expand All @@ -18,6 +20,10 @@ import (
"github.com/cri-o/cri-o/server/metrics"
)

const (
cacheDir = "/var/lib/cni/results"
)

// networkStart sets up the sandbox's network and returns the pod IP on success
// or an error.
func (s *Server) networkStart(ctx context.Context, sb *sandbox.Sandbox) (podIPs []string, result cnitypes.Result, retErr error) {
Expand Down Expand Up @@ -185,43 +191,75 @@ func (s *Server) networkStop(ctx context.Context, sb *sandbox.Sandbox) error {
}

// Check if the network namespace file exists and is valid before attempting CNI teardown.
// If the file doesn't exist or is invalid, skip CNI teardown and mark network as stopped.
// If the file doesn't exist or is invalid, we should still attempt CNI teardown using cached information
// to prevent IP leaks, but we'll mark the network as stopped regardless of the outcome.
netnsValid := true

if podNetwork.NetNS != "" {
if _, statErr := os.Stat(podNetwork.NetNS); statErr != nil {
// Network namespace file doesn't exist, mark network as stopped and return success
log.Debugf(ctx, "Network namespace file %s does not exist for pod sandbox %s(%s), skipping CNI teardown",
// Network namespace file doesn't exist, but we should still attempt CNI teardown
log.Debugf(ctx, "Network namespace file %s does not exist for pod sandbox %s(%s), attempting CNI teardown with cached info",
podNetwork.NetNS, sb.Name(), sb.ID())

return sb.SetNetworkStopped(ctx, true)
}

if validateErr := s.validateNetworkNamespace(podNetwork.NetNS); validateErr != nil {
netnsValid = false
} else if validateErr := s.validateNetworkNamespace(podNetwork.NetNS); validateErr != nil {
// Network namespace file exists but is invalid (e.g., corrupted or fake file)
log.Warnf(ctx, "Network namespace file %s is invalid for pod sandbox %s(%s): %v, removing and skipping CNI teardown",
log.Warnf(ctx, "Network namespace file %s is invalid for pod sandbox %s(%s): %v, removing and attempting CNI teardown with cached info",
podNetwork.NetNS, sb.Name(), sb.ID(), validateErr)
s.cleanupNetns(ctx, podNetwork.NetNS, sb)

return sb.SetNetworkStopped(ctx, true)
netnsValid = false
}
}

// Always attempt CNI teardown to prevent IP leaks, even if netns is invalid.
if err := s.config.CNIPlugin().TearDownPodWithContext(stopCtx, podNetwork); err != nil {
log.Warnf(ctx, "Failed to destroy network for pod sandbox %s(%s): %v", sb.Name(), sb.ID(), err)

// If the network namespace exists but CNI teardown failed, try to clean it up.
if podNetwork.NetNS != "" {
if podNetwork.NetNS != "" && netnsValid {
if _, statErr := os.Stat(podNetwork.NetNS); statErr == nil {
// Clean up the netns file since CNI teardown failed.
s.cleanupNetns(ctx, podNetwork.NetNS, sb)
}
}

// Clean up CNI result files if CNI teardown failed.
s.cleanupCNIResultFiles(ctx, sb.ID())

// Even if CNI teardown failed, mark network as stopped to prevent retry loops.
if setErr := sb.SetNetworkStopped(ctx, true); setErr != nil {
log.Warnf(ctx, "Failed to set network stopped for pod sandbox %s(%s): %v", sb.Name(), sb.ID(), setErr)
}

return fmt.Errorf("network teardown failed for pod sandbox %s(%s): %w", sb.Name(), sb.ID(), err)
}

return sb.SetNetworkStopped(ctx, true)
}

// cleanupCNIResultFiles removes CNI result files for a given container ID.
// This is called when CNI teardown fails to prevent stale result files from accumulating.
func (s *Server) cleanupCNIResultFiles(ctx context.Context, containerID string) {
entries, err := os.ReadDir(cacheDir)
if err != nil {
log.Warnf(ctx, "Failed to read CNI cache directory %s: %v", cacheDir, err)

return
}

for _, entry := range entries {
if !entry.IsDir() && strings.Contains(entry.Name(), containerID) {
filePath := filepath.Join(cacheDir, entry.Name())
if err := os.Remove(filePath); err != nil {
log.Warnf(ctx, "Failed to remove CNI result file %s: %v", filePath, err)
} else {
log.Infof(ctx, "Cleaned up CNI result file %s for container %s", entry.Name(), containerID)
}
}
}
}

func (s *Server) newPodNetwork(ctx context.Context, sb *sandbox.Sandbox) (ocicni.PodNetwork, error) {
_, span := log.StartSpan(ctx)
defer span.End()
Expand Down
29 changes: 29 additions & 0 deletions test/network.bats
Original file line number Diff line number Diff line change
Expand Up @@ -224,3 +224,32 @@ function check_networking() {
crictl stopp "$new_pod_id"
crictl rmp "$new_pod_id"
}

@test "CNI teardown called even with missing or invalid netns to prevent IP leaks" {
start_crio

pod_id=$(crictl runp "$TESTDATA"/sandbox_config.json)

# Get the network namespace path
NETNS_PATH=/var/run/netns/
NS=$(crictl inspectp "$pod_id" |
jq -er '.info.runtimeSpec.linux.namespaces[] | select(.type == "network").path | sub("'$NETNS_PATH'"; "")')

output=$(crictl inspectp "$pod_id" | jq -r '.status.state')
[[ "$output" == "SANDBOX_READY" ]]

# Stop the pod first to release the network namespace.
crictl stopp "$pod_id"

# Now remove the network namespace file to simulate the issue.
rm -f "$NETNS_PATH$NS"

# Remove the pod - this should still call CNI teardown.
crictl rmp "$pod_id"

# Verify CNI teardown was called by checking logs.
grep -q "Deleting pod.*from CNI network" "$CRIO_LOG"

# Verify the pod is gone.
run ! crictl inspectp "$pod_id"
}
Loading