agent.go 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231
  1. // Copyright 2015 The etcd Authors
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package main
  15. import (
  16. "fmt"
  17. "os"
  18. "os/exec"
  19. "path"
  20. "syscall"
  21. "time"
  22. "github.com/coreos/etcd/pkg/netutil"
  23. "github.com/coreos/etcd/tools/functional-tester/etcd-agent/client"
  24. )
  25. const (
  26. stateUninitialized = "uninitialized"
  27. stateStarted = "started"
  28. stateStopped = "stopped"
  29. stateTerminated = "terminated"
  30. )
  31. type Agent struct {
  32. state string // the state of etcd process
  33. cmd *exec.Cmd
  34. logfile *os.File
  35. etcdLogPath string
  36. }
  37. func newAgent(etcd, etcdLogPath string) (*Agent, error) {
  38. // check if the file exists
  39. _, err := os.Stat(etcd)
  40. if err != nil {
  41. return nil, err
  42. }
  43. c := exec.Command(etcd)
  44. f, err := os.Create(etcdLogPath)
  45. if err != nil {
  46. return nil, err
  47. }
  48. return &Agent{state: stateUninitialized, cmd: c, logfile: f, etcdLogPath: etcdLogPath}, nil
  49. }
  50. // start starts a new etcd process with the given args.
  51. func (a *Agent) start(args ...string) error {
  52. a.cmd = exec.Command(a.cmd.Path, args...)
  53. a.cmd.Stdout = a.logfile
  54. a.cmd.Stderr = a.logfile
  55. err := a.cmd.Start()
  56. if err != nil {
  57. return err
  58. }
  59. a.state = stateStarted
  60. return nil
  61. }
  62. // stop stops the existing etcd process the agent started.
  63. func (a *Agent) stopWithSig(sig os.Signal) error {
  64. if a.state != stateStarted {
  65. return nil
  66. }
  67. err := stopWithSig(a.cmd, sig)
  68. if err != nil {
  69. return err
  70. }
  71. a.state = stateStopped
  72. return nil
  73. }
  74. func stopWithSig(cmd *exec.Cmd, sig os.Signal) error {
  75. err := cmd.Process.Signal(sig)
  76. if err != nil {
  77. return err
  78. }
  79. errc := make(chan error)
  80. go func() {
  81. _, ew := cmd.Process.Wait()
  82. errc <- ew
  83. close(errc)
  84. }()
  85. select {
  86. case <-time.After(5 * time.Second):
  87. cmd.Process.Kill()
  88. case e := <-errc:
  89. return e
  90. }
  91. err = <-errc
  92. return err
  93. }
  94. // restart restarts the stopped etcd process.
  95. func (a *Agent) restart() error {
  96. a.cmd = exec.Command(a.cmd.Path, a.cmd.Args[1:]...)
  97. a.cmd.Stdout = a.logfile
  98. a.cmd.Stderr = a.logfile
  99. err := a.cmd.Start()
  100. if err != nil {
  101. return err
  102. }
  103. a.state = stateStarted
  104. return nil
  105. }
  106. func (a *Agent) cleanup() error {
  107. // exit with stackstrace
  108. if err := a.stopWithSig(syscall.SIGQUIT); err != nil {
  109. return err
  110. }
  111. a.state = stateUninitialized
  112. a.logfile.Close()
  113. if err := archiveLogAndDataDir(a.etcdLogPath, a.dataDir()); err != nil {
  114. return err
  115. }
  116. f, err := os.Create(a.etcdLogPath)
  117. a.logfile = f
  118. if err != nil {
  119. return err
  120. }
  121. // https://www.kernel.org/doc/Documentation/sysctl/vm.txt
  122. // https://github.com/torvalds/linux/blob/master/fs/drop_caches.c
  123. cmd := exec.Command("/bin/sh", "-c", `echo "echo 1 > /proc/sys/vm/drop_caches" | sudo sh`)
  124. if err := cmd.Run(); err != nil {
  125. plog.Printf("error when cleaning page cache (%v)", err)
  126. }
  127. return nil
  128. }
  129. // terminate stops the exiting etcd process the agent started
  130. // and removes the data dir.
  131. func (a *Agent) terminate() error {
  132. err := a.stopWithSig(syscall.SIGTERM)
  133. if err != nil {
  134. return err
  135. }
  136. err = os.RemoveAll(a.dataDir())
  137. if err != nil {
  138. return err
  139. }
  140. a.state = stateTerminated
  141. return nil
  142. }
  143. func (a *Agent) dropPort(port int) error {
  144. return netutil.DropPort(port)
  145. }
  146. func (a *Agent) recoverPort(port int) error {
  147. return netutil.RecoverPort(port)
  148. }
  149. func (a *Agent) setLatency(ms, rv int) error {
  150. if ms == 0 {
  151. return netutil.RemoveLatency()
  152. }
  153. return netutil.SetLatency(ms, rv)
  154. }
  155. func (a *Agent) status() client.Status {
  156. return client.Status{State: a.state}
  157. }
  158. func (a *Agent) dataDir() string {
  159. datadir := path.Join(a.cmd.Path, "*.etcd")
  160. args := a.cmd.Args
  161. // only parse the simple case like "--data-dir /var/lib/etcd"
  162. for i, arg := range args {
  163. if arg == "--data-dir" {
  164. datadir = args[i+1]
  165. break
  166. }
  167. }
  168. return datadir
  169. }
  170. func existDir(fpath string) bool {
  171. st, err := os.Stat(fpath)
  172. if err != nil {
  173. if os.IsNotExist(err) {
  174. return false
  175. }
  176. } else {
  177. return st.IsDir()
  178. }
  179. return false
  180. }
  181. func archiveLogAndDataDir(log string, datadir string) error {
  182. dir := path.Join("failure_archive", fmt.Sprint(time.Now().Format(time.RFC3339)))
  183. if existDir(dir) {
  184. dir = path.Join("failure_archive", fmt.Sprint(time.Now().Add(time.Second).Format(time.RFC3339)))
  185. }
  186. if err := os.MkdirAll(dir, 0755); err != nil {
  187. return err
  188. }
  189. if err := os.Rename(log, path.Join(dir, path.Base(log))); err != nil {
  190. if !os.IsNotExist(err) {
  191. return err
  192. }
  193. }
  194. if err := os.Rename(datadir, path.Join(dir, path.Base(datadir))); err != nil {
  195. if !os.IsNotExist(err) {
  196. return err
  197. }
  198. }
  199. return nil
  200. }