Skip to content

Commit 22d4f06

Browse files
Merge pull request #9382 from openshift-cherrypick-robot/cherry-pick-9372-to-release-1.33
[release-1.33] server: ensure CNI teardown prevents IP leaks with missing netns
2 parents 94372b0 + a3336a1 commit 22d4f06

File tree

2 files changed

+77
-10
lines changed

2 files changed

+77
-10
lines changed

server/sandbox_network.go

Lines changed: 48 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ import (
55
"fmt"
66
"math"
77
"os"
8+
"path/filepath"
9+
"strings"
810
"time"
911

1012
cnitypes "github.com/containernetworking/cni/pkg/types"
@@ -18,6 +20,10 @@ import (
1820
"github.com/cri-o/cri-o/server/metrics"
1921
)
2022

23+
const (
24+
cacheDir = "/var/lib/cni/results"
25+
)
26+
2127
// networkStart sets up the sandbox's network and returns the pod IP on success
2228
// or an error.
2329
func (s *Server) networkStart(ctx context.Context, sb *sandbox.Sandbox) (podIPs []string, result cnitypes.Result, retErr error) {
@@ -185,43 +191,75 @@ func (s *Server) networkStop(ctx context.Context, sb *sandbox.Sandbox) error {
185191
}
186192

187193
// Check if the network namespace file exists and is valid before attempting CNI teardown.
188-
// If the file doesn't exist or is invalid, skip CNI teardown and mark network as stopped.
194+
// If the file doesn't exist or is invalid, we should still attempt CNI teardown using cached information
195+
// to prevent IP leaks, but we'll mark the network as stopped regardless of the outcome.
196+
netnsValid := true
197+
189198
if podNetwork.NetNS != "" {
190199
if _, statErr := os.Stat(podNetwork.NetNS); statErr != nil {
191-
// Network namespace file doesn't exist, mark network as stopped and return success
192-
log.Debugf(ctx, "Network namespace file %s does not exist for pod sandbox %s(%s), skipping CNI teardown",
200+
// Network namespace file doesn't exist, but we should still attempt CNI teardown
201+
log.Debugf(ctx, "Network namespace file %s does not exist for pod sandbox %s(%s), attempting CNI teardown with cached info",
193202
podNetwork.NetNS, sb.Name(), sb.ID())
194203

195-
return sb.SetNetworkStopped(ctx, true)
196-
}
197-
198-
if validateErr := s.validateNetworkNamespace(podNetwork.NetNS); validateErr != nil {
204+
netnsValid = false
205+
} else if validateErr := s.validateNetworkNamespace(podNetwork.NetNS); validateErr != nil {
199206
// Network namespace file exists but is invalid (e.g., corrupted or fake file)
200-
log.Warnf(ctx, "Network namespace file %s is invalid for pod sandbox %s(%s): %v, removing and skipping CNI teardown",
207+
log.Warnf(ctx, "Network namespace file %s is invalid for pod sandbox %s(%s): %v, removing and attempting CNI teardown with cached info",
201208
podNetwork.NetNS, sb.Name(), sb.ID(), validateErr)
202209
s.cleanupNetns(ctx, podNetwork.NetNS, sb)
203210

204-
return sb.SetNetworkStopped(ctx, true)
211+
netnsValid = false
205212
}
206213
}
207214

215+
// Always attempt CNI teardown to prevent IP leaks, even if netns is invalid.
208216
if err := s.config.CNIPlugin().TearDownPodWithContext(stopCtx, podNetwork); err != nil {
209217
log.Warnf(ctx, "Failed to destroy network for pod sandbox %s(%s): %v", sb.Name(), sb.ID(), err)
210218

211219
// If the network namespace exists but CNI teardown failed, try to clean it up.
212-
if podNetwork.NetNS != "" {
220+
if podNetwork.NetNS != "" && netnsValid {
213221
if _, statErr := os.Stat(podNetwork.NetNS); statErr == nil {
214222
// Clean up the netns file since CNI teardown failed.
215223
s.cleanupNetns(ctx, podNetwork.NetNS, sb)
216224
}
217225
}
218226

227+
// Clean up CNI result files if CNI teardown failed.
228+
s.cleanupCNIResultFiles(ctx, sb.ID())
229+
230+
// Even if CNI teardown failed, mark network as stopped to prevent retry loops.
231+
if setErr := sb.SetNetworkStopped(ctx, true); setErr != nil {
232+
log.Warnf(ctx, "Failed to set network stopped for pod sandbox %s(%s): %v", sb.Name(), sb.ID(), setErr)
233+
}
234+
219235
return fmt.Errorf("network teardown failed for pod sandbox %s(%s): %w", sb.Name(), sb.ID(), err)
220236
}
221237

222238
return sb.SetNetworkStopped(ctx, true)
223239
}
224240

241+
// cleanupCNIResultFiles removes CNI result files for a given container ID.
242+
// This is called when CNI teardown fails to prevent stale result files from accumulating.
243+
func (s *Server) cleanupCNIResultFiles(ctx context.Context, containerID string) {
244+
entries, err := os.ReadDir(cacheDir)
245+
if err != nil {
246+
log.Warnf(ctx, "Failed to read CNI cache directory %s: %v", cacheDir, err)
247+
248+
return
249+
}
250+
251+
for _, entry := range entries {
252+
if !entry.IsDir() && strings.Contains(entry.Name(), containerID) {
253+
filePath := filepath.Join(cacheDir, entry.Name())
254+
if err := os.Remove(filePath); err != nil {
255+
log.Warnf(ctx, "Failed to remove CNI result file %s: %v", filePath, err)
256+
} else {
257+
log.Infof(ctx, "Cleaned up CNI result file %s for container %s", entry.Name(), containerID)
258+
}
259+
}
260+
}
261+
}
262+
225263
func (s *Server) newPodNetwork(ctx context.Context, sb *sandbox.Sandbox) (ocicni.PodNetwork, error) {
226264
_, span := log.StartSpan(ctx)
227265
defer span.End()

test/network.bats

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,3 +224,32 @@ function check_networking() {
224224
crictl stopp "$new_pod_id"
225225
crictl rmp "$new_pod_id"
226226
}
227+
228+
@test "CNI teardown called even with missing or invalid netns to prevent IP leaks" {
229+
start_crio
230+
231+
pod_id=$(crictl runp "$TESTDATA"/sandbox_config.json)
232+
233+
# Get the network namespace path
234+
NETNS_PATH=/var/run/netns/
235+
NS=$(crictl inspectp "$pod_id" |
236+
jq -er '.info.runtimeSpec.linux.namespaces[] | select(.type == "network").path | sub("'$NETNS_PATH'"; "")')
237+
238+
output=$(crictl inspectp "$pod_id" | jq -r '.status.state')
239+
[[ "$output" == "SANDBOX_READY" ]]
240+
241+
# Stop the pod first to release the network namespace.
242+
crictl stopp "$pod_id"
243+
244+
# Now remove the network namespace file to simulate the issue.
245+
rm -f "$NETNS_PATH$NS"
246+
247+
# Remove the pod - this should still call CNI teardown.
248+
crictl rmp "$pod_id"
249+
250+
# Verify CNI teardown was called by checking logs.
251+
grep -q "Deleting pod.*from CNI network" "$CRIO_LOG"
252+
253+
# Verify the pod is gone.
254+
run ! crictl inspectp "$pod_id"
255+
}

0 commit comments

Comments
 (0)