@@ -21,10 +21,10 @@ import (
2121 "github.com/cri-o/cri-o/server/cri/types"
2222 "github.com/cri-o/cri-o/server/metrics"
2323 "github.com/cri-o/cri-o/utils"
24- "github.com/fsnotify/fsnotify"
2524 json "github.com/json-iterator/go"
2625 rspec "github.com/opencontainers/runtime-spec/specs-go"
2726 "github.com/pkg/errors"
27+ "github.com/rjeczalik/notify"
2828 "github.com/sirupsen/logrus"
2929 "golang.org/x/net/context"
3030 "golang.org/x/sys/unix"
@@ -346,18 +346,41 @@ func (r *runtimeOCI) ExecSyncContainer(ctx context.Context, c *Container, comman
346346 cmd .Stdout = & stdoutBuf
347347 cmd .Stderr = & stderrBuf
348348
349- err = cmd .Start ()
349+ pidFileCreatedDone := make (chan struct {}, 1 )
350+ pidFileCreatedCh , err := WatchForFile (pidFile , pidFileCreatedDone , notify .InModify , notify .InMovedTo )
350351 if err != nil {
352+ return nil , errors .Wrapf (err , "failed to watch %s" , pidFile )
353+ }
354+
355+ doneErr := cmd .Start ()
356+ if doneErr != nil {
351357 return nil , err
352358 }
353359
354360 // wait till the command is done
355361 done := make (chan error , 1 )
356362 go func () {
357363 done <- cmd .Wait ()
364+ close (done )
358365 }()
359366
360- if timeout > 0 {
367+ // First, wait for the pid file to be created.
368+ // When it is, the timer begins for the exec process.
369+ // If the command fails before that happens, however,
370+ // that needs to be caught.
371+ select {
372+ case <- pidFileCreatedCh :
373+ case doneErr = <- done :
374+ }
375+ close (pidFileCreatedDone )
376+
377+ switch {
378+ case doneErr != nil :
379+ // If we've already gotten an error from done
380+ // the runtime finished before writing the pid file
381+ // (probably because the command didn't exist).
382+ case timeout > 0 :
383+ // If there's a timeout, wait for that timeout duration.
361384 select {
362385 case <- time .After (time .Second * time .Duration (timeout )):
363386 // Ensure the process is not left behind
@@ -373,17 +396,18 @@ func (r *runtimeOCI) ExecSyncContainer(ctx context.Context, c *Container, comman
373396 Stderr : []byte (conmonconfig .TimedOutMessage ),
374397 ExitCode : - 1 ,
375398 }, nil
376- case err = <- done :
399+ case doneErr = <- done :
377400 break
378401 }
379- } else {
380- err = <- done
402+ default :
403+ // If no timeout, just wait until the command finishes.
404+ doneErr = <- done
381405 }
382406
383407 // gather exit code from err
384408 exitCode := int32 (0 )
385- if err != nil {
386- if exitError , ok := err .(* exec.ExitError ); ok {
409+ if doneErr != nil {
410+ if exitError , ok := doneErr .(* exec.ExitError ); ok {
387411 exitCode = int32 (exitError .ExitCode ())
388412 }
389413 }
@@ -1004,58 +1028,22 @@ func (r *runtimeOCI) ReopenContainerLog(ctx context.Context, c *Container) error
10041028 }
10051029 defer controlFile .Close ()
10061030
1007- watcher , err := fsnotify .NewWatcher ()
1031+ done := make (chan struct {}, 1 )
1032+ ch , err := WatchForFile (c .LogPath (), done , notify .InCreate , notify .InModify )
10081033 if err != nil {
1009- return fmt .Errorf ("failed to create new watch: %v" , err )
1010- }
1011- defer watcher .Close ()
1012-
1013- done := make (chan struct {})
1014- doneClosed := false
1015- errorCh := make (chan error )
1016- go func () {
1017- for {
1018- select {
1019- case event := <- watcher .Events :
1020- log .Debugf (ctx , "Event: %v" , event )
1021- if event .Op & fsnotify .Create == fsnotify .Create || event .Op & fsnotify .Write == fsnotify .Write {
1022- log .Debugf (ctx , "File created %s" , event .Name )
1023- if event .Name == c .LogPath () {
1024- log .Debugf (ctx , "Expected log file created" )
1025- done <- struct {}{}
1026- return
1027- }
1028- }
1029- case err := <- watcher .Errors :
1030- errorCh <- fmt .Errorf ("watch error for container log reopen %v: %v" , c .ID (), err )
1031- close (errorCh )
1032- return
1033- }
1034- }
1035- }()
1036- cLogDir := filepath .Dir (c .LogPath ())
1037- if err := watcher .Add (cLogDir ); err != nil {
1038- log .Errorf (ctx , "Watcher.Add(%q) failed: %s" , cLogDir , err )
1039- close (done )
1040- doneClosed = true
1034+ return errors .Wrapf (err , "failed to create watch for %s" , c .LogPath ())
10411035 }
10421036
10431037 if _ , err = fmt .Fprintf (controlFile , "%d %d %d\n " , 2 , 0 , 0 ); err != nil {
10441038 log .Debugf (ctx , "Failed to write to control file to reopen log file: %v" , err )
10451039 }
1046-
10471040 select {
1048- case err := <- errorCh :
1049- if ! doneClosed {
1050- close (done )
1051- }
1052- return err
1053- case <- done :
1054- if ! doneClosed {
1055- close (done )
1056- }
1057- break
1041+ case <- ch :
1042+ case <- time .After (time .Minute * 3 ):
1043+ // Give up after 3 minutes, as something wrong probably happened
1044+ log .Errorf (ctx , "Failed to reopen log file for container %s: timed out" , c .ID ())
10581045 }
1046+ close (done )
10591047
10601048 return nil
10611049}
@@ -1099,3 +1087,34 @@ func prepareProcessExec(c *Container, cmd []string, tty bool) (processFile strin
10991087func (c * Container ) conmonPidFilePath () string {
11001088 return filepath .Join (c .bundlePath , "conmon-pidfile" )
11011089}
1090+
1091+ // WatchForFile creates a watch on the parent directory of path, looking for events opsToWatch.
1092+ // It returns immediately with a channel to find when path had one of those events.
1093+ // done can be used to stop the watch.
1094+ // WatchForFile is responsible for closing all internal channels and the returned channel, but not for closing done.
1095+ func WatchForFile (path string , done chan struct {}, opsToWatch ... notify.Event ) (chan struct {}, error ) {
1096+ eiCh := make (chan notify.EventInfo , 1 )
1097+ ch := make (chan struct {})
1098+
1099+ dir := filepath .Dir (path )
1100+ if err := notify .Watch (dir , eiCh , opsToWatch ... ); err != nil {
1101+ return nil , err
1102+ }
1103+ go func () {
1104+ defer close (ch )
1105+ defer close (eiCh )
1106+ defer notify .Stop (eiCh )
1107+ for {
1108+ select {
1109+ case ei := <- eiCh :
1110+ if ei .Path () == path {
1111+ ch <- struct {}{}
1112+ return
1113+ }
1114+ case <- done :
1115+ return
1116+ }
1117+ }
1118+ }()
1119+ return ch , nil
1120+ }
0 commit comments