agent.go 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243
  1. // Copyright 2015 The etcd Authors
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package main
  15. import (
  16. "fmt"
  17. "os"
  18. "os/exec"
  19. "path/filepath"
  20. "syscall"
  21. "time"
  22. "github.com/coreos/etcd/pkg/fileutil"
  23. "github.com/coreos/etcd/pkg/netutil"
  24. "github.com/coreos/etcd/tools/functional-tester/etcd-agent/client"
  25. )
  26. const (
  27. stateUninitialized = "uninitialized"
  28. stateStarted = "started"
  29. stateStopped = "stopped"
  30. stateTerminated = "terminated"
  31. )
  32. type Agent struct {
  33. state string // the state of etcd process
  34. cmd *exec.Cmd
  35. logfile *os.File
  36. logDir string
  37. }
  38. func newAgent(etcd, logDir string) (*Agent, error) {
  39. // check if the file exists
  40. _, err := os.Stat(etcd)
  41. if err != nil {
  42. return nil, err
  43. }
  44. c := exec.Command(etcd)
  45. err = fileutil.TouchDirAll(logDir)
  46. if err != nil {
  47. return nil, err
  48. }
  49. var f *os.File
  50. f, err = os.Create(filepath.Join(logDir, "etcd.log"))
  51. if err != nil {
  52. return nil, err
  53. }
  54. return &Agent{state: stateUninitialized, cmd: c, logfile: f, logDir: logDir}, nil
  55. }
  56. // start starts a new etcd process with the given args.
  57. func (a *Agent) start(args ...string) error {
  58. a.cmd = exec.Command(a.cmd.Path, args...)
  59. a.cmd.Stdout = a.logfile
  60. a.cmd.Stderr = a.logfile
  61. err := a.cmd.Start()
  62. if err != nil {
  63. return err
  64. }
  65. a.state = stateStarted
  66. return nil
  67. }
  68. // stop stops the existing etcd process the agent started.
  69. func (a *Agent) stopWithSig(sig os.Signal) error {
  70. if a.state != stateStarted {
  71. return nil
  72. }
  73. err := stopWithSig(a.cmd, sig)
  74. if err != nil {
  75. return err
  76. }
  77. a.state = stateStopped
  78. return nil
  79. }
  80. func stopWithSig(cmd *exec.Cmd, sig os.Signal) error {
  81. err := cmd.Process.Signal(sig)
  82. if err != nil {
  83. return err
  84. }
  85. errc := make(chan error)
  86. go func() {
  87. _, ew := cmd.Process.Wait()
  88. errc <- ew
  89. close(errc)
  90. }()
  91. select {
  92. case <-time.After(5 * time.Second):
  93. cmd.Process.Kill()
  94. case e := <-errc:
  95. return e
  96. }
  97. err = <-errc
  98. return err
  99. }
  100. // restart restarts the stopped etcd process.
  101. func (a *Agent) restart() error {
  102. a.cmd = exec.Command(a.cmd.Path, a.cmd.Args[1:]...)
  103. a.cmd.Stdout = a.logfile
  104. a.cmd.Stderr = a.logfile
  105. err := a.cmd.Start()
  106. if err != nil {
  107. return err
  108. }
  109. a.state = stateStarted
  110. return nil
  111. }
  112. func (a *Agent) cleanup() error {
  113. // exit with stackstrace
  114. if err := a.stopWithSig(syscall.SIGQUIT); err != nil {
  115. return err
  116. }
  117. a.state = stateUninitialized
  118. a.logfile.Close()
  119. if err := archiveLogAndDataDir(a.logDir, a.dataDir()); err != nil {
  120. return err
  121. }
  122. if err := fileutil.TouchDirAll(a.logDir); err != nil {
  123. return err
  124. }
  125. f, err := os.Create(filepath.Join(a.logDir, "etcd.log"))
  126. if err != nil {
  127. return err
  128. }
  129. a.logfile = f
  130. // https://www.kernel.org/doc/Documentation/sysctl/vm.txt
  131. // https://github.com/torvalds/linux/blob/master/fs/drop_caches.c
  132. cmd := exec.Command("/bin/sh", "-c", `echo "echo 1 > /proc/sys/vm/drop_caches" | sudo sh`)
  133. if err := cmd.Run(); err != nil {
  134. plog.Printf("error when cleaning page cache (%v)", err)
  135. }
  136. return nil
  137. }
  138. // terminate stops the exiting etcd process the agent started
  139. // and removes the data dir.
  140. func (a *Agent) terminate() error {
  141. err := a.stopWithSig(syscall.SIGTERM)
  142. if err != nil {
  143. return err
  144. }
  145. err = os.RemoveAll(a.dataDir())
  146. if err != nil {
  147. return err
  148. }
  149. a.state = stateTerminated
  150. return nil
  151. }
  152. func (a *Agent) dropPort(port int) error {
  153. return netutil.DropPort(port)
  154. }
  155. func (a *Agent) recoverPort(port int) error {
  156. return netutil.RecoverPort(port)
  157. }
  158. func (a *Agent) setLatency(ms, rv int) error {
  159. if ms == 0 {
  160. return netutil.RemoveLatency()
  161. }
  162. return netutil.SetLatency(ms, rv)
  163. }
  164. func (a *Agent) status() client.Status {
  165. return client.Status{State: a.state}
  166. }
  167. func (a *Agent) dataDir() string {
  168. datadir := filepath.Join(a.cmd.Path, "*.etcd")
  169. args := a.cmd.Args
  170. // only parse the simple case like "--data-dir /var/lib/etcd"
  171. for i, arg := range args {
  172. if arg == "--data-dir" {
  173. datadir = args[i+1]
  174. break
  175. }
  176. }
  177. return datadir
  178. }
  179. func existDir(fpath string) bool {
  180. st, err := os.Stat(fpath)
  181. if err != nil {
  182. if os.IsNotExist(err) {
  183. return false
  184. }
  185. } else {
  186. return st.IsDir()
  187. }
  188. return false
  189. }
  190. func archiveLogAndDataDir(logDir string, datadir string) error {
  191. dir := filepath.Join("failure_archive", fmt.Sprint(time.Now().Format(time.RFC3339)))
  192. if existDir(dir) {
  193. dir = filepath.Join("failure_archive", fmt.Sprint(time.Now().Add(time.Second).Format(time.RFC3339)))
  194. }
  195. if err := fileutil.TouchDirAll(dir); err != nil {
  196. return err
  197. }
  198. if err := os.Rename(logDir, filepath.Join(dir, filepath.Base(logDir))); err != nil {
  199. if !os.IsNotExist(err) {
  200. return err
  201. }
  202. }
  203. if err := os.Rename(datadir, filepath.Join(dir, filepath.Base(datadir))); err != nil {
  204. if !os.IsNotExist(err) {
  205. return err
  206. }
  207. }
  208. return nil
  209. }