-
-
Notifications
You must be signed in to change notification settings - Fork 52
/
supervise.go
164 lines (155 loc) · 4.13 KB
/
supervise.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
package immortal
import (
"fmt"
"log"
"os"
"os/exec"
"sync/atomic"
"time"
)
// Supervisor for the process
type Supervisor struct {
daemon *Daemon
process *process
pid int
wait time.Duration
}
// Supervise keep daemon process up and running
func Supervise(d *Daemon) error {
// start a new process
p, err := d.Run(NewProcess(d.cfg))
if err != nil {
return err
}
supervisor := &Supervisor{
daemon: d,
process: p,
}
return supervisor.Start()
}
// Start loop forever
func (s *Supervisor) Start() error {
for {
select {
case <-s.daemon.quit:
s.daemon.wg.Wait()
return fmt.Errorf("supervisor stopped, count: %d", s.daemon.count)
case <-s.daemon.run:
s.ReStart()
case err := <-s.process.errch:
// get exit code
// TODO check EXIT from kqueue since we don't know the exit code there
exitcode := 0
if exitError, ok := err.(*exec.ExitError); ok {
exitcode = exitError.ExitCode()
}
log.Printf("PID: %d exit code: %d", s.pid, exitcode)
// Check for post_exit command
if len(s.daemon.cfg.PostExit) > 0 {
var shell = "sh"
if sh := os.Getenv("SHELL"); sh != "" {
shell = sh
}
if err := exec.Command(shell, "-c", fmt.Sprintf("%s %d", s.daemon.cfg.PostExit, exitcode)).Run(); err != nil {
log.Printf("post exit command failed: %s", err)
}
}
// stop or exit based on the retries
if s.Terminate(err) {
if s.daemon.cfg.cli || os.Getenv("IMMORTAL_EXIT") != "" {
close(s.daemon.quit)
} else {
// stop don't exit
atomic.StoreUint32(&s.daemon.lock, 1)
}
} else {
// follow the new pid instead of trying to call run again unless the new pid dies
if s.daemon.cfg.Pid.Follow != "" {
s.FollowPid(err)
} else {
s.ReStart()
}
}
}
}
}
// ReStart create a new process
func (s *Supervisor) ReStart() {
var err error
time.Sleep(s.wait)
if s.daemon.lock == 0 {
np := NewProcess(s.daemon.cfg)
if s.process, err = s.daemon.Run(np); err != nil {
close(np.quit)
log.Print(err)
// loop again but wait 1 seccond before trying
s.wait = time.Second
s.daemon.run <- struct{}{}
}
}
}
// Terminate handle process termination
func (s *Supervisor) Terminate(err error) bool {
s.daemon.Lock()
defer s.daemon.Unlock()
// set end time
s.process.eTime = time.Now()
// unlock, or lock once
atomic.StoreUint32(&s.daemon.lock, s.daemon.lockOnce)
// WatchPid returns EXIT
if err != nil && err.Error() == "EXIT" {
log.Printf("PID: %d (%s) exited", s.pid, s.process.cmd.Path)
} else {
log.Printf("PID %d (%s) terminated, %s [%v user %v sys %s up]\n",
s.process.cmd.ProcessState.Pid(),
s.process.cmd.Path,
s.process.cmd.ProcessState,
s.process.cmd.ProcessState.UserTime(),
s.process.cmd.ProcessState.SystemTime(),
time.Since(s.process.sTime),
)
// calculate time for next reboot (avoids high CPU usage)
uptime := s.process.eTime.Sub(s.process.sTime)
s.wait = 0 * time.Second
if uptime < time.Second {
s.wait = time.Second - uptime
}
}
// behavior based on the retries
if s.daemon.cfg.Retries >= 0 {
// 0 run only once (don't retry)
if s.daemon.cfg.Retries == 0 {
return true
}
// +1 run N times
if s.daemon.count > s.daemon.cfg.Retries {
return true
}
}
// -1 run forever
return false
}
// FollowPid check if process still up and running if it is, follow the pid,
// monitor the existing pid created by the process instead of creating
// another process
func (s *Supervisor) FollowPid(err error) {
s.daemon.Lock()
defer s.daemon.Unlock()
s.pid, err = s.daemon.ReadPidFile(s.daemon.cfg.Pid.Follow)
if err != nil {
log.Printf("Cannot read pidfile: %s, %s", s.daemon.cfg.Pid.Follow, err)
s.daemon.run <- struct{}{}
} else {
// check if pid in file is valid
if s.pid > 1 && s.pid != s.process.Pid() && s.daemon.IsRunning(s.pid) {
log.Printf("Watching pid %d on file: %s", s.pid, s.daemon.cfg.Pid.Follow)
s.daemon.fpid = true
// overwrite original (defunct) pid with the fpid in order to be available to send signals
s.process.cmd.Process.Pid = s.pid
s.daemon.WatchPid(s.pid, s.process.errch)
} else {
// if cmd exits or process is kill
s.daemon.run <- struct{}{}
}
}
}