Skip to content

Commit cd1c5a0

Browse files
Merge pull request #5067 from haircommander/exec-inotify
oci: wait until pidfile to be written before starting timer
2 parents 1309dd2 + 2297dba commit cd1c5a0

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+4954
-52
lines changed

go.mod

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ require (
4949
github.com/pkg/errors v0.9.1
5050
github.com/prometheus/client_golang v1.11.0
5151
github.com/psampaz/go-mod-outdated v0.8.0
52+
github.com/rjeczalik/notify v0.9.2
5253
github.com/sirupsen/logrus v1.8.1
5354
github.com/soheilhy/cmux v0.1.5
5455
github.com/stretchr/testify v1.7.0

go.sum

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1151,6 +1151,8 @@ github.com/remyoudompheng/bigfft v0.0.0-20170806203942-52369c62f446/go.mod h1:uY
11511151
github.com/rivo/uniseg v0.1.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
11521152
github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY=
11531153
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
1154+
github.com/rjeczalik/notify v0.9.2 h1:MiTWrPj55mNDHEiIX5YUSKefw/+lCQVoAFmD6oQm5w8=
1155+
github.com/rjeczalik/notify v0.9.2/go.mod h1:aErll2f0sUX9PXZnVNyeiObbmTlk5jnMoCa4QEjJeqM=
11541156
github.com/robfig/cron v1.1.0/go.mod h1:JGuDeoQd7Z6yL4zQhZ3OPEVHB7fL6Ka6skscFHfmt2k=
11551157
github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg=
11561158
github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ=
@@ -1527,6 +1529,7 @@ golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5h
15271529
golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
15281530
golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
15291531
golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
1532+
golang.org/x/sys v0.0.0-20180926160741-c2ed4eda69e7/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
15301533
golang.org/x/sys v0.0.0-20181026203630-95b1ffbd15a5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
15311534
golang.org/x/sys v0.0.0-20181107165924-66b7b1311ac8/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
15321535
golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=

internal/oci/runtime_oci.go

Lines changed: 71 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,10 @@ import (
2121
"github.com/cri-o/cri-o/server/cri/types"
2222
"github.com/cri-o/cri-o/server/metrics"
2323
"github.com/cri-o/cri-o/utils"
24-
"github.com/fsnotify/fsnotify"
2524
json "github.com/json-iterator/go"
2625
rspec "github.com/opencontainers/runtime-spec/specs-go"
2726
"github.com/pkg/errors"
27+
"github.com/rjeczalik/notify"
2828
"github.com/sirupsen/logrus"
2929
"golang.org/x/net/context"
3030
"golang.org/x/sys/unix"
@@ -346,18 +346,41 @@ func (r *runtimeOCI) ExecSyncContainer(ctx context.Context, c *Container, comman
346346
cmd.Stdout = &stdoutBuf
347347
cmd.Stderr = &stderrBuf
348348

349-
err = cmd.Start()
349+
pidFileCreatedDone := make(chan struct{}, 1)
350+
pidFileCreatedCh, err := WatchForFile(pidFile, pidFileCreatedDone, notify.InModify, notify.InMovedTo)
350351
if err != nil {
352+
return nil, errors.Wrapf(err, "failed to watch %s", pidFile)
353+
}
354+
355+
doneErr := cmd.Start()
356+
if doneErr != nil {
351357
return nil, err
352358
}
353359

354360
// wait till the command is done
355361
done := make(chan error, 1)
356362
go func() {
357363
done <- cmd.Wait()
364+
close(done)
358365
}()
359366

360-
if timeout > 0 {
367+
// First, wait for the pid file to be created.
368+
// When it is, the timer begins for the exec process.
369+
// If the command fails before that happens, however,
370+
// that needs to be caught.
371+
select {
372+
case <-pidFileCreatedCh:
373+
case doneErr = <-done:
374+
}
375+
close(pidFileCreatedDone)
376+
377+
switch {
378+
case doneErr != nil:
379+
// If we've already gotten an error from done
380+
// the runtime finished before writing the pid file
381+
// (probably because the command didn't exist).
382+
case timeout > 0:
383+
// If there's a timeout, wait for that timeout duration.
361384
select {
362385
case <-time.After(time.Second * time.Duration(timeout)):
363386
// Ensure the process is not left behind
@@ -373,17 +396,18 @@ func (r *runtimeOCI) ExecSyncContainer(ctx context.Context, c *Container, comman
373396
Stderr: []byte(conmonconfig.TimedOutMessage),
374397
ExitCode: -1,
375398
}, nil
376-
case err = <-done:
399+
case doneErr = <-done:
377400
break
378401
}
379-
} else {
380-
err = <-done
402+
default:
403+
// If no timeout, just wait until the command finishes.
404+
doneErr = <-done
381405
}
382406

383407
// gather exit code from err
384408
exitCode := int32(0)
385-
if err != nil {
386-
if exitError, ok := err.(*exec.ExitError); ok {
409+
if doneErr != nil {
410+
if exitError, ok := doneErr.(*exec.ExitError); ok {
387411
exitCode = int32(exitError.ExitCode())
388412
}
389413
}
@@ -1004,58 +1028,22 @@ func (r *runtimeOCI) ReopenContainerLog(ctx context.Context, c *Container) error
10041028
}
10051029
defer controlFile.Close()
10061030

1007-
watcher, err := fsnotify.NewWatcher()
1031+
done := make(chan struct{}, 1)
1032+
ch, err := WatchForFile(c.LogPath(), done, notify.InCreate, notify.InModify)
10081033
if err != nil {
1009-
return fmt.Errorf("failed to create new watch: %v", err)
1010-
}
1011-
defer watcher.Close()
1012-
1013-
done := make(chan struct{})
1014-
doneClosed := false
1015-
errorCh := make(chan error)
1016-
go func() {
1017-
for {
1018-
select {
1019-
case event := <-watcher.Events:
1020-
log.Debugf(ctx, "Event: %v", event)
1021-
if event.Op&fsnotify.Create == fsnotify.Create || event.Op&fsnotify.Write == fsnotify.Write {
1022-
log.Debugf(ctx, "File created %s", event.Name)
1023-
if event.Name == c.LogPath() {
1024-
log.Debugf(ctx, "Expected log file created")
1025-
done <- struct{}{}
1026-
return
1027-
}
1028-
}
1029-
case err := <-watcher.Errors:
1030-
errorCh <- fmt.Errorf("watch error for container log reopen %v: %v", c.ID(), err)
1031-
close(errorCh)
1032-
return
1033-
}
1034-
}
1035-
}()
1036-
cLogDir := filepath.Dir(c.LogPath())
1037-
if err := watcher.Add(cLogDir); err != nil {
1038-
log.Errorf(ctx, "Watcher.Add(%q) failed: %s", cLogDir, err)
1039-
close(done)
1040-
doneClosed = true
1034+
return errors.Wrapf(err, "failed to create watch for %s", c.LogPath())
10411035
}
10421036

10431037
if _, err = fmt.Fprintf(controlFile, "%d %d %d\n", 2, 0, 0); err != nil {
10441038
log.Debugf(ctx, "Failed to write to control file to reopen log file: %v", err)
10451039
}
1046-
10471040
select {
1048-
case err := <-errorCh:
1049-
if !doneClosed {
1050-
close(done)
1051-
}
1052-
return err
1053-
case <-done:
1054-
if !doneClosed {
1055-
close(done)
1056-
}
1057-
break
1041+
case <-ch:
1042+
case <-time.After(time.Minute * 3):
1043+
// Give up after 3 minutes, as something wrong probably happened
1044+
log.Errorf(ctx, "Failed to reopen log file for container %s: timed out", c.ID())
10581045
}
1046+
close(done)
10591047

10601048
return nil
10611049
}
@@ -1099,3 +1087,34 @@ func prepareProcessExec(c *Container, cmd []string, tty bool) (processFile strin
10991087
func (c *Container) conmonPidFilePath() string {
11001088
return filepath.Join(c.bundlePath, "conmon-pidfile")
11011089
}
1090+
1091+
// WatchForFile creates a watch on the parent directory of path, looking for events opsToWatch.
1092+
// It returns immediately with a channel to find when path had one of those events.
1093+
// done can be used to stop the watch.
1094+
// WatchForFile is responsible for closing all internal channels and the returned channel, but not for closing done.
1095+
func WatchForFile(path string, done chan struct{}, opsToWatch ...notify.Event) (chan struct{}, error) {
1096+
eiCh := make(chan notify.EventInfo, 1)
1097+
ch := make(chan struct{})
1098+
1099+
dir := filepath.Dir(path)
1100+
if err := notify.Watch(dir, eiCh, opsToWatch...); err != nil {
1101+
return nil, err
1102+
}
1103+
go func() {
1104+
defer close(ch)
1105+
defer close(eiCh)
1106+
defer notify.Stop(eiCh)
1107+
for {
1108+
select {
1109+
case ei := <-eiCh:
1110+
if ei.Path() == path {
1111+
ch <- struct{}{}
1112+
return
1113+
}
1114+
case <-done:
1115+
return
1116+
}
1117+
}
1118+
}()
1119+
return ch, nil
1120+
}

internal/oci/runtime_oci_test.go

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,16 @@ package oci_test
33
import (
44
"context"
55
"math/rand"
6+
"os"
67
"os/exec"
8+
"path/filepath"
79
"time"
810

911
"github.com/cri-o/cri-o/internal/oci"
1012
. "github.com/onsi/ginkgo"
1113
. "github.com/onsi/gomega"
1214
"github.com/pkg/errors"
15+
"github.com/rjeczalik/notify"
1316
)
1417

1518
const (
@@ -142,6 +145,71 @@ var _ = t.Describe("Oci", func() {
142145
})
143146
}
144147
})
148+
t.Describe("WatchForFile", func() {
149+
var notifyFile string
150+
var done chan struct{}
151+
BeforeEach(func() {
152+
notifyFile = filepath.Join(t.MustTempDir("watch"), "file")
153+
done = make(chan struct{}, 1)
154+
})
155+
It("should catch file creation", func() {
156+
// Given
157+
ch, err := oci.WatchForFile(notifyFile, done, notify.InCreate, notify.InModify)
158+
Expect(err).To(BeNil())
159+
160+
// When
161+
f, err := os.Create(notifyFile)
162+
Expect(err).To(BeNil())
163+
f.Close()
164+
165+
<-ch
166+
})
167+
It("should not catch file create if doesn't exist", func() {
168+
// Given
169+
ch, err := oci.WatchForFile(notifyFile, done, notify.InCreate, notify.InModify)
170+
Expect(err).To(BeNil())
171+
172+
// When
173+
f, err := os.Create(notifyFile + "-backup")
174+
Expect(err).To(BeNil())
175+
f.Close()
176+
checkChannelEmpty(ch)
177+
178+
// Then
179+
f, err = os.Create(notifyFile)
180+
Expect(err).To(BeNil())
181+
f.Close()
182+
183+
<-ch
184+
})
185+
It("should only catch file write", func() {
186+
// Given
187+
ch, err := oci.WatchForFile(notifyFile, done, notify.InModify)
188+
Expect(err).To(BeNil())
189+
190+
// When
191+
f, err := os.Create(notifyFile)
192+
Expect(err).To(BeNil())
193+
defer f.Close()
194+
195+
checkChannelEmpty(ch)
196+
197+
_, err = f.Write([]byte("hello"))
198+
Expect(err).To(BeNil())
199+
200+
<-ch
201+
})
202+
It("should give up after sending on done", func() {
203+
// Given
204+
ch, err := oci.WatchForFile(notifyFile, done, notify.InModify)
205+
Expect(err).To(BeNil())
206+
207+
// When
208+
checkChannelEmpty(ch)
209+
done <- struct{}{}
210+
<-ch
211+
})
212+
})
145213
})
146214

147215
func waitContainerStopAndFailAfterTimeout(ctx context.Context,
@@ -183,3 +251,12 @@ func verifyContainerNotStopped(sut *oci.Container, _ *exec.Cmd, waitError error)
183251
func inSeconds(d int64) time.Duration {
184252
return time.Duration(d) * time.Second
185253
}
254+
255+
func checkChannelEmpty(ch chan struct{}) {
256+
select {
257+
case <-ch:
258+
// We don't expect to get anything here
259+
Expect(true).To(Equal(false))
260+
case <-time.After(time.Second * 3):
261+
}
262+
}

0 commit comments

Comments
 (0)