@@ -21,10 +21,10 @@ import (
2121 "github.com/cri-o/cri-o/server/cri/types"
2222 "github.com/cri-o/cri-o/server/metrics"
2323 "github.com/cri-o/cri-o/utils"
24- "github.com/fsnotify/fsnotify"
2524 json "github.com/json-iterator/go"
2625 rspec "github.com/opencontainers/runtime-spec/specs-go"
2726 "github.com/pkg/errors"
27+ "github.com/rjeczalik/notify"
2828 "github.com/sirupsen/logrus"
2929 "golang.org/x/net/context"
3030 "golang.org/x/sys/unix"
@@ -343,18 +343,41 @@ func (r *runtimeOCI) ExecSyncContainer(ctx context.Context, c *Container, comman
343343 cmd .Stdout = & stdoutBuf
344344 cmd .Stderr = & stderrBuf
345345
346- err = cmd .Start ()
346+ pidFileCreatedDone := make (chan struct {}, 1 )
347+ pidFileCreatedCh , err := WatchForFile (pidFile , pidFileCreatedDone , notify .InModify , notify .InMovedTo )
347348 if err != nil {
349+ return nil , errors .Wrapf (err , "failed to watch %s" , pidFile )
350+ }
351+
352+ doneErr := cmd .Start ()
353+ if doneErr != nil {
348354 return nil , err
349355 }
350356
351357 // wait till the command is done
352358 done := make (chan error , 1 )
353359 go func () {
354360 done <- cmd .Wait ()
361+ close (done )
355362 }()
356363
357- if timeout > 0 {
364+ // First, wait for the pid file to be created.
365+ // When it is, the timer begins for the exec process.
366+ // If the command fails before that happens, however,
367+ // that needs to be caught.
368+ select {
369+ case <- pidFileCreatedCh :
370+ case doneErr = <- done :
371+ }
372+ close (pidFileCreatedDone )
373+
374+ switch {
375+ case doneErr != nil :
376+ // If we've already gotten an error from done
377+ // the runtime finished before writing the pid file
378+ // (probably because the command didn't exist).
379+ case timeout > 0 :
380+ // If there's a timeout, wait for that timeout duration.
358381 select {
359382 case <- time .After (time .Second * time .Duration (timeout )):
360383 // Ensure the process is not left behind
@@ -370,17 +393,18 @@ func (r *runtimeOCI) ExecSyncContainer(ctx context.Context, c *Container, comman
370393 Stderr : []byte (conmonconfig .TimedOutMessage ),
371394 ExitCode : - 1 ,
372395 }, nil
373- case err = <- done :
396+ case doneErr = <- done :
374397 break
375398 }
376- } else {
377- err = <- done
399+ default :
400+ // If no timeout, just wait until the command finishes.
401+ doneErr = <- done
378402 }
379403
380404 // gather exit code from err
381405 exitCode := int32 (0 )
382- if err != nil {
383- if exitError , ok := err .(* exec.ExitError ); ok {
406+ if doneErr != nil {
407+ if exitError , ok := doneErr .(* exec.ExitError ); ok {
384408 exitCode = int32 (exitError .ExitCode ())
385409 }
386410 }
@@ -1017,58 +1041,22 @@ func (r *runtimeOCI) ReopenContainerLog(ctx context.Context, c *Container) error
10171041 }
10181042 defer controlFile .Close ()
10191043
1020- watcher , err := fsnotify .NewWatcher ()
1044+ done := make (chan struct {}, 1 )
1045+ ch , err := WatchForFile (c .LogPath (), done , notify .InCreate , notify .InModify )
10211046 if err != nil {
1022- return fmt .Errorf ("failed to create new watch: %v" , err )
1023- }
1024- defer watcher .Close ()
1025-
1026- done := make (chan struct {})
1027- doneClosed := false
1028- errorCh := make (chan error )
1029- go func () {
1030- for {
1031- select {
1032- case event := <- watcher .Events :
1033- log .Debugf (ctx , "event: %v" , event )
1034- if event .Op & fsnotify .Create == fsnotify .Create || event .Op & fsnotify .Write == fsnotify .Write {
1035- log .Debugf (ctx , "file created %s" , event .Name )
1036- if event .Name == c .LogPath () {
1037- log .Debugf (ctx , "expected log file created" )
1038- done <- struct {}{}
1039- return
1040- }
1041- }
1042- case err := <- watcher .Errors :
1043- errorCh <- fmt .Errorf ("watch error for container log reopen %v: %v" , c .ID (), err )
1044- close (errorCh )
1045- return
1046- }
1047- }
1048- }()
1049- cLogDir := filepath .Dir (c .LogPath ())
1050- if err := watcher .Add (cLogDir ); err != nil {
1051- log .Errorf (ctx , "watcher.Add(%q) failed: %s" , cLogDir , err )
1052- close (done )
1053- doneClosed = true
1047+ return errors .Wrapf (err , "failed to create watch for %s" , c .LogPath ())
10541048 }
10551049
10561050 if _ , err = fmt .Fprintf (controlFile , "%d %d %d\n " , 2 , 0 , 0 ); err != nil {
10571051 log .Debugf (ctx , "Failed to write to control file to reopen log file: %v" , err )
10581052 }
1059-
10601053 select {
1061- case err := <- errorCh :
1062- if ! doneClosed {
1063- close (done )
1064- }
1065- return err
1066- case <- done :
1067- if ! doneClosed {
1068- close (done )
1069- }
1070- break
1054+ case <- ch :
1055+ case <- time .After (time .Minute * 3 ):
1056+ // Give up after 3 minutes, as something wrong probably happened
1057+ log .Errorf (ctx , "Failed to reopen log file for container %s: timed out" , c .ID ())
10711058 }
1059+ close (done )
10721060
10731061 return nil
10741062}
@@ -1112,3 +1100,34 @@ func prepareProcessExec(c *Container, cmd []string, tty bool) (processFile strin
11121100func (c * Container ) conmonPidFilePath () string {
11131101 return filepath .Join (c .bundlePath , "conmon-pidfile" )
11141102}
1103+
1104+ // WatchForFile creates a watch on the parent directory of path, looking for events opsToWatch.
1105+ // It returns immediately with a channel to find when path had one of those events.
1106+ // done can be used to stop the watch.
1107+ // WatchForFile is responsible for closing all internal channels and the returned channel, but not for closing done.
1108+ func WatchForFile (path string , done chan struct {}, opsToWatch ... notify.Event ) (chan struct {}, error ) {
1109+ eiCh := make (chan notify.EventInfo , 1 )
1110+ ch := make (chan struct {})
1111+
1112+ dir := filepath .Dir (path )
1113+ if err := notify .Watch (dir , eiCh , opsToWatch ... ); err != nil {
1114+ return nil , err
1115+ }
1116+ go func () {
1117+ defer close (ch )
1118+ defer close (eiCh )
1119+ defer notify .Stop (eiCh )
1120+ for {
1121+ select {
1122+ case ei := <- eiCh :
1123+ if ei .Path () == path {
1124+ ch <- struct {}{}
1125+ return
1126+ }
1127+ case <- done :
1128+ return
1129+ }
1130+ }
1131+ }()
1132+ return ch , nil
1133+ }
0 commit comments