Skip to content

Commit aaefa6b

Browse files
Merge pull request cri-o#5093 from haircommander/exec-inotify-1.21
[1.21] oci: wait until pidfile to be written before starting timer
2 parents e929d72 + fd6dcf3 commit aaefa6b

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+4954
-52
lines changed

go.mod

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ require (
4747
github.com/pkg/errors v0.9.1
4848
github.com/prometheus/client_golang v1.10.0
4949
github.com/psampaz/go-mod-outdated v0.7.0
50+
github.com/rjeczalik/notify v0.9.2
5051
github.com/sirupsen/logrus v1.8.1
5152
github.com/soheilhy/cmux v0.1.5
5253
github.com/stretchr/testify v1.7.0

go.sum

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1139,6 +1139,8 @@ github.com/remyoudompheng/bigfft v0.0.0-20170806203942-52369c62f446/go.mod h1:uY
11391139
github.com/rivo/uniseg v0.1.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
11401140
github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY=
11411141
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
1142+
github.com/rjeczalik/notify v0.9.2 h1:MiTWrPj55mNDHEiIX5YUSKefw/+lCQVoAFmD6oQm5w8=
1143+
github.com/rjeczalik/notify v0.9.2/go.mod h1:aErll2f0sUX9PXZnVNyeiObbmTlk5jnMoCa4QEjJeqM=
11421144
github.com/robfig/cron v1.1.0/go.mod h1:JGuDeoQd7Z6yL4zQhZ3OPEVHB7fL6Ka6skscFHfmt2k=
11431145
github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg=
11441146
github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
@@ -1519,6 +1521,7 @@ golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJ
15191521
golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
15201522
golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
15211523
golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
1524+
golang.org/x/sys v0.0.0-20180926160741-c2ed4eda69e7/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
15221525
golang.org/x/sys v0.0.0-20181026203630-95b1ffbd15a5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
15231526
golang.org/x/sys v0.0.0-20181107165924-66b7b1311ac8/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
15241527
golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=

internal/oci/runtime_oci.go

Lines changed: 71 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,10 @@ import (
2121
"github.com/cri-o/cri-o/server/cri/types"
2222
"github.com/cri-o/cri-o/server/metrics"
2323
"github.com/cri-o/cri-o/utils"
24-
"github.com/fsnotify/fsnotify"
2524
json "github.com/json-iterator/go"
2625
rspec "github.com/opencontainers/runtime-spec/specs-go"
2726
"github.com/pkg/errors"
27+
"github.com/rjeczalik/notify"
2828
"github.com/sirupsen/logrus"
2929
"golang.org/x/net/context"
3030
"golang.org/x/sys/unix"
@@ -343,18 +343,41 @@ func (r *runtimeOCI) ExecSyncContainer(ctx context.Context, c *Container, comman
343343
cmd.Stdout = &stdoutBuf
344344
cmd.Stderr = &stderrBuf
345345

346-
err = cmd.Start()
346+
pidFileCreatedDone := make(chan struct{}, 1)
347+
pidFileCreatedCh, err := WatchForFile(pidFile, pidFileCreatedDone, notify.InModify, notify.InMovedTo)
347348
if err != nil {
349+
return nil, errors.Wrapf(err, "failed to watch %s", pidFile)
350+
}
351+
352+
doneErr := cmd.Start()
353+
if doneErr != nil {
348354
return nil, err
349355
}
350356

351357
// wait till the command is done
352358
done := make(chan error, 1)
353359
go func() {
354360
done <- cmd.Wait()
361+
close(done)
355362
}()
356363

357-
if timeout > 0 {
364+
// First, wait for the pid file to be created.
365+
// When it is, the timer begins for the exec process.
366+
// If the command fails before that happens, however,
367+
// that needs to be caught.
368+
select {
369+
case <-pidFileCreatedCh:
370+
case doneErr = <-done:
371+
}
372+
close(pidFileCreatedDone)
373+
374+
switch {
375+
case doneErr != nil:
376+
// If we've already gotten an error from done
377+
// the runtime finished before writing the pid file
378+
// (probably because the command didn't exist).
379+
case timeout > 0:
380+
// If there's a timeout, wait for that timeout duration.
358381
select {
359382
case <-time.After(time.Second * time.Duration(timeout)):
360383
// Ensure the process is not left behind
@@ -370,17 +393,18 @@ func (r *runtimeOCI) ExecSyncContainer(ctx context.Context, c *Container, comman
370393
Stderr: []byte(conmonconfig.TimedOutMessage),
371394
ExitCode: -1,
372395
}, nil
373-
case err = <-done:
396+
case doneErr = <-done:
374397
break
375398
}
376-
} else {
377-
err = <-done
399+
default:
400+
// If no timeout, just wait until the command finishes.
401+
doneErr = <-done
378402
}
379403

380404
// gather exit code from err
381405
exitCode := int32(0)
382-
if err != nil {
383-
if exitError, ok := err.(*exec.ExitError); ok {
406+
if doneErr != nil {
407+
if exitError, ok := doneErr.(*exec.ExitError); ok {
384408
exitCode = int32(exitError.ExitCode())
385409
}
386410
}
@@ -1017,58 +1041,22 @@ func (r *runtimeOCI) ReopenContainerLog(ctx context.Context, c *Container) error
10171041
}
10181042
defer controlFile.Close()
10191043

1020-
watcher, err := fsnotify.NewWatcher()
1044+
done := make(chan struct{}, 1)
1045+
ch, err := WatchForFile(c.LogPath(), done, notify.InCreate, notify.InModify)
10211046
if err != nil {
1022-
return fmt.Errorf("failed to create new watch: %v", err)
1023-
}
1024-
defer watcher.Close()
1025-
1026-
done := make(chan struct{})
1027-
doneClosed := false
1028-
errorCh := make(chan error)
1029-
go func() {
1030-
for {
1031-
select {
1032-
case event := <-watcher.Events:
1033-
log.Debugf(ctx, "event: %v", event)
1034-
if event.Op&fsnotify.Create == fsnotify.Create || event.Op&fsnotify.Write == fsnotify.Write {
1035-
log.Debugf(ctx, "file created %s", event.Name)
1036-
if event.Name == c.LogPath() {
1037-
log.Debugf(ctx, "expected log file created")
1038-
done <- struct{}{}
1039-
return
1040-
}
1041-
}
1042-
case err := <-watcher.Errors:
1043-
errorCh <- fmt.Errorf("watch error for container log reopen %v: %v", c.ID(), err)
1044-
close(errorCh)
1045-
return
1046-
}
1047-
}
1048-
}()
1049-
cLogDir := filepath.Dir(c.LogPath())
1050-
if err := watcher.Add(cLogDir); err != nil {
1051-
log.Errorf(ctx, "watcher.Add(%q) failed: %s", cLogDir, err)
1052-
close(done)
1053-
doneClosed = true
1047+
return errors.Wrapf(err, "failed to create watch for %s", c.LogPath())
10541048
}
10551049

10561050
if _, err = fmt.Fprintf(controlFile, "%d %d %d\n", 2, 0, 0); err != nil {
10571051
log.Debugf(ctx, "Failed to write to control file to reopen log file: %v", err)
10581052
}
1059-
10601053
select {
1061-
case err := <-errorCh:
1062-
if !doneClosed {
1063-
close(done)
1064-
}
1065-
return err
1066-
case <-done:
1067-
if !doneClosed {
1068-
close(done)
1069-
}
1070-
break
1054+
case <-ch:
1055+
case <-time.After(time.Minute * 3):
1056+
// Give up after 3 minutes, as something wrong probably happened
1057+
log.Errorf(ctx, "Failed to reopen log file for container %s: timed out", c.ID())
10711058
}
1059+
close(done)
10721060

10731061
return nil
10741062
}
@@ -1112,3 +1100,34 @@ func prepareProcessExec(c *Container, cmd []string, tty bool) (processFile strin
11121100
func (c *Container) conmonPidFilePath() string {
11131101
return filepath.Join(c.bundlePath, "conmon-pidfile")
11141102
}
1103+
1104+
// WatchForFile creates a watch on the parent directory of path, looking for events opsToWatch.
1105+
// It returns immediately with a channel to find when path had one of those events.
1106+
// done can be used to stop the watch.
1107+
// WatchForFile is responsible for closing all internal channels and the returned channel, but not for closing done.
1108+
func WatchForFile(path string, done chan struct{}, opsToWatch ...notify.Event) (chan struct{}, error) {
1109+
eiCh := make(chan notify.EventInfo, 1)
1110+
ch := make(chan struct{})
1111+
1112+
dir := filepath.Dir(path)
1113+
if err := notify.Watch(dir, eiCh, opsToWatch...); err != nil {
1114+
return nil, err
1115+
}
1116+
go func() {
1117+
defer close(ch)
1118+
defer close(eiCh)
1119+
defer notify.Stop(eiCh)
1120+
for {
1121+
select {
1122+
case ei := <-eiCh:
1123+
if ei.Path() == path {
1124+
ch <- struct{}{}
1125+
return
1126+
}
1127+
case <-done:
1128+
return
1129+
}
1130+
}
1131+
}()
1132+
return ch, nil
1133+
}

internal/oci/runtime_oci_test.go

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,16 @@ package oci_test
33
import (
44
"context"
55
"math/rand"
6+
"os"
67
"os/exec"
8+
"path/filepath"
79
"time"
810

911
"github.com/cri-o/cri-o/internal/oci"
1012
. "github.com/onsi/ginkgo"
1113
. "github.com/onsi/gomega"
1214
"github.com/pkg/errors"
15+
"github.com/rjeczalik/notify"
1316
)
1417

1518
const (
@@ -142,6 +145,71 @@ var _ = t.Describe("Oci", func() {
142145
})
143146
}
144147
})
148+
t.Describe("WatchForFile", func() {
149+
var notifyFile string
150+
var done chan struct{}
151+
BeforeEach(func() {
152+
notifyFile = filepath.Join(t.MustTempDir("watch"), "file")
153+
done = make(chan struct{}, 1)
154+
})
155+
It("should catch file creation", func() {
156+
// Given
157+
ch, err := oci.WatchForFile(notifyFile, done, notify.InCreate, notify.InModify)
158+
Expect(err).To(BeNil())
159+
160+
// When
161+
f, err := os.Create(notifyFile)
162+
Expect(err).To(BeNil())
163+
f.Close()
164+
165+
<-ch
166+
})
167+
It("should not catch file create if doesn't exist", func() {
168+
// Given
169+
ch, err := oci.WatchForFile(notifyFile, done, notify.InCreate, notify.InModify)
170+
Expect(err).To(BeNil())
171+
172+
// When
173+
f, err := os.Create(notifyFile + "-backup")
174+
Expect(err).To(BeNil())
175+
f.Close()
176+
checkChannelEmpty(ch)
177+
178+
// Then
179+
f, err = os.Create(notifyFile)
180+
Expect(err).To(BeNil())
181+
f.Close()
182+
183+
<-ch
184+
})
185+
It("should only catch file write", func() {
186+
// Given
187+
ch, err := oci.WatchForFile(notifyFile, done, notify.InModify)
188+
Expect(err).To(BeNil())
189+
190+
// When
191+
f, err := os.Create(notifyFile)
192+
Expect(err).To(BeNil())
193+
defer f.Close()
194+
195+
checkChannelEmpty(ch)
196+
197+
_, err = f.Write([]byte("hello"))
198+
Expect(err).To(BeNil())
199+
200+
<-ch
201+
})
202+
It("should give up after sending on done", func() {
203+
// Given
204+
ch, err := oci.WatchForFile(notifyFile, done, notify.InModify)
205+
Expect(err).To(BeNil())
206+
207+
// When
208+
checkChannelEmpty(ch)
209+
done <- struct{}{}
210+
<-ch
211+
})
212+
})
145213
})
146214

147215
func waitContainerStopAndFailAfterTimeout(ctx context.Context,
@@ -183,3 +251,12 @@ func verifyContainerNotStopped(sut *oci.Container, _ *exec.Cmd, waitError error)
183251
func inSeconds(d int64) time.Duration {
184252
return time.Duration(d) * time.Second
185253
}
254+
255+
func checkChannelEmpty(ch chan struct{}) {
256+
select {
257+
case <-ch:
258+
// We don't expect to get anything here
259+
Expect(true).To(Equal(false))
260+
case <-time.After(time.Second * 3):
261+
}
262+
}

0 commit comments

Comments
 (0)