agent.go 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243
  1. // Copyright 2015 The etcd Authors
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package main
  15. import (
  16. "os"
  17. "os/exec"
  18. "path/filepath"
  19. "syscall"
  20. "time"
  21. "github.com/coreos/etcd/pkg/fileutil"
  22. "github.com/coreos/etcd/pkg/netutil"
  23. "github.com/coreos/etcd/tools/functional-tester/etcd-agent/client"
  24. )
  25. const (
  26. stateUninitialized = "uninitialized"
  27. stateStarted = "started"
  28. stateStopped = "stopped"
  29. stateTerminated = "terminated"
  30. )
  31. type Agent struct {
  32. state string // the state of etcd process
  33. cmd *exec.Cmd
  34. logfile *os.File
  35. cfg AgentConfig
  36. }
  37. type AgentConfig struct {
  38. EtcdPath string
  39. LogDir string
  40. FailpointAddr string
  41. UseRoot bool
  42. }
  43. func newAgent(cfg AgentConfig) (*Agent, error) {
  44. // check if the file exists
  45. _, err := os.Stat(cfg.EtcdPath)
  46. if err != nil {
  47. return nil, err
  48. }
  49. c := exec.Command(cfg.EtcdPath)
  50. err = fileutil.TouchDirAll(cfg.LogDir)
  51. if err != nil {
  52. return nil, err
  53. }
  54. var f *os.File
  55. f, err = os.Create(filepath.Join(cfg.LogDir, "etcd.log"))
  56. if err != nil {
  57. return nil, err
  58. }
  59. return &Agent{state: stateUninitialized, cmd: c, logfile: f, cfg: cfg}, nil
  60. }
  61. // start starts a new etcd process with the given args.
  62. func (a *Agent) start(args ...string) error {
  63. args = append(args, "--data-dir", a.dataDir())
  64. a.cmd = exec.Command(a.cmd.Path, args...)
  65. a.cmd.Env = []string{"GOFAIL_HTTP=" + a.cfg.FailpointAddr}
  66. a.cmd.Stdout = a.logfile
  67. a.cmd.Stderr = a.logfile
  68. err := a.cmd.Start()
  69. if err != nil {
  70. return err
  71. }
  72. a.state = stateStarted
  73. return nil
  74. }
  75. // stop stops the existing etcd process the agent started.
  76. func (a *Agent) stopWithSig(sig os.Signal) error {
  77. if a.state != stateStarted {
  78. return nil
  79. }
  80. err := stopWithSig(a.cmd, sig)
  81. if err != nil {
  82. return err
  83. }
  84. a.state = stateStopped
  85. return nil
  86. }
  87. func stopWithSig(cmd *exec.Cmd, sig os.Signal) error {
  88. err := cmd.Process.Signal(sig)
  89. if err != nil {
  90. return err
  91. }
  92. errc := make(chan error)
  93. go func() {
  94. _, ew := cmd.Process.Wait()
  95. errc <- ew
  96. close(errc)
  97. }()
  98. select {
  99. case <-time.After(5 * time.Second):
  100. cmd.Process.Kill()
  101. case e := <-errc:
  102. return e
  103. }
  104. err = <-errc
  105. return err
  106. }
  107. // restart restarts the stopped etcd process.
  108. func (a *Agent) restart() error {
  109. return a.start(a.cmd.Args[1:]...)
  110. }
  111. func (a *Agent) cleanup() error {
  112. // exit with stackstrace
  113. if err := a.stopWithSig(syscall.SIGQUIT); err != nil {
  114. return err
  115. }
  116. a.state = stateUninitialized
  117. a.logfile.Close()
  118. if err := archiveLogAndDataDir(a.cfg.LogDir, a.dataDir()); err != nil {
  119. return err
  120. }
  121. if err := fileutil.TouchDirAll(a.cfg.LogDir); err != nil {
  122. return err
  123. }
  124. f, err := os.Create(filepath.Join(a.cfg.LogDir, "etcd.log"))
  125. if err != nil {
  126. return err
  127. }
  128. a.logfile = f
  129. // https://www.kernel.org/doc/Documentation/sysctl/vm.txt
  130. // https://github.com/torvalds/linux/blob/master/fs/drop_caches.c
  131. cmd := exec.Command("/bin/sh", "-c", `echo "echo 1 > /proc/sys/vm/drop_caches" | sudo sh`)
  132. if err := cmd.Run(); err != nil {
  133. plog.Infof("error when cleaning page cache (%v)", err)
  134. }
  135. return nil
  136. }
  137. // terminate stops the exiting etcd process the agent started
  138. // and removes the data dir.
  139. func (a *Agent) terminate() error {
  140. err := a.stopWithSig(syscall.SIGTERM)
  141. if err != nil {
  142. return err
  143. }
  144. err = os.RemoveAll(a.dataDir())
  145. if err != nil {
  146. return err
  147. }
  148. a.state = stateTerminated
  149. return nil
  150. }
  151. func (a *Agent) dropPort(port int) error {
  152. if !a.cfg.UseRoot {
  153. return nil
  154. }
  155. return netutil.DropPort(port)
  156. }
  157. func (a *Agent) recoverPort(port int) error {
  158. if !a.cfg.UseRoot {
  159. return nil
  160. }
  161. return netutil.RecoverPort(port)
  162. }
  163. func (a *Agent) setLatency(ms, rv int) error {
  164. if !a.cfg.UseRoot {
  165. return nil
  166. }
  167. if ms == 0 {
  168. return netutil.RemoveLatency()
  169. }
  170. return netutil.SetLatency(ms, rv)
  171. }
  172. func (a *Agent) status() client.Status {
  173. return client.Status{State: a.state}
  174. }
  175. func (a *Agent) dataDir() string {
  176. return filepath.Join(a.cfg.LogDir, "etcd.data")
  177. }
  178. func existDir(fpath string) bool {
  179. st, err := os.Stat(fpath)
  180. if err != nil {
  181. if os.IsNotExist(err) {
  182. return false
  183. }
  184. } else {
  185. return st.IsDir()
  186. }
  187. return false
  188. }
  189. func archiveLogAndDataDir(logDir string, datadir string) error {
  190. dir := filepath.Join(logDir, "failure_archive", time.Now().Format(time.RFC3339))
  191. if existDir(dir) {
  192. dir = filepath.Join(logDir, "failure_archive", time.Now().Add(time.Second).Format(time.RFC3339))
  193. }
  194. if err := fileutil.TouchDirAll(dir); err != nil {
  195. return err
  196. }
  197. if err := os.Rename(filepath.Join(logDir, "etcd.log"), filepath.Join(dir, "etcd.log")); err != nil {
  198. if !os.IsNotExist(err) {
  199. return err
  200. }
  201. }
  202. if err := os.Rename(datadir, filepath.Join(dir, filepath.Base(datadir))); err != nil {
  203. if !os.IsNotExist(err) {
  204. return err
  205. }
  206. }
  207. return nil
  208. }