123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243 |
- // Copyright 2015 The etcd Authors
- //
- // Licensed under the Apache License, Version 2.0 (the "License");
- // you may not use this file except in compliance with the License.
- // You may obtain a copy of the License at
- //
- // http://www.apache.org/licenses/LICENSE-2.0
- //
- // Unless required by applicable law or agreed to in writing, software
- // distributed under the License is distributed on an "AS IS" BASIS,
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- // See the License for the specific language governing permissions and
- // limitations under the License.
- package main
- import (
- "os"
- "os/exec"
- "path/filepath"
- "syscall"
- "time"
- "github.com/coreos/etcd/pkg/fileutil"
- "github.com/coreos/etcd/pkg/netutil"
- "github.com/coreos/etcd/tools/functional-tester/etcd-agent/client"
- )
- const (
- stateUninitialized = "uninitialized"
- stateStarted = "started"
- stateStopped = "stopped"
- stateTerminated = "terminated"
- )
- type Agent struct {
- state string // the state of etcd process
- cmd *exec.Cmd
- logfile *os.File
- cfg AgentConfig
- }
- type AgentConfig struct {
- EtcdPath string
- LogDir string
- FailpointAddr string
- UseRoot bool
- }
- func newAgent(cfg AgentConfig) (*Agent, error) {
- // check if the file exists
- _, err := os.Stat(cfg.EtcdPath)
- if err != nil {
- return nil, err
- }
- c := exec.Command(cfg.EtcdPath)
- err = fileutil.TouchDirAll(cfg.LogDir)
- if err != nil {
- return nil, err
- }
- var f *os.File
- f, err = os.Create(filepath.Join(cfg.LogDir, "etcd.log"))
- if err != nil {
- return nil, err
- }
- return &Agent{state: stateUninitialized, cmd: c, logfile: f, cfg: cfg}, nil
- }
- // start starts a new etcd process with the given args.
- func (a *Agent) start(args ...string) error {
- args = append(args, "--data-dir", a.dataDir())
- a.cmd = exec.Command(a.cmd.Path, args...)
- a.cmd.Env = []string{"GOFAIL_HTTP=" + a.cfg.FailpointAddr}
- a.cmd.Stdout = a.logfile
- a.cmd.Stderr = a.logfile
- err := a.cmd.Start()
- if err != nil {
- return err
- }
- a.state = stateStarted
- return nil
- }
- // stop stops the existing etcd process the agent started.
- func (a *Agent) stopWithSig(sig os.Signal) error {
- if a.state != stateStarted {
- return nil
- }
- err := stopWithSig(a.cmd, sig)
- if err != nil {
- return err
- }
- a.state = stateStopped
- return nil
- }
- func stopWithSig(cmd *exec.Cmd, sig os.Signal) error {
- err := cmd.Process.Signal(sig)
- if err != nil {
- return err
- }
- errc := make(chan error)
- go func() {
- _, ew := cmd.Process.Wait()
- errc <- ew
- close(errc)
- }()
- select {
- case <-time.After(5 * time.Second):
- cmd.Process.Kill()
- case e := <-errc:
- return e
- }
- err = <-errc
- return err
- }
- // restart restarts the stopped etcd process.
- func (a *Agent) restart() error {
- return a.start(a.cmd.Args[1:]...)
- }
- func (a *Agent) cleanup() error {
- // exit with stackstrace
- if err := a.stopWithSig(syscall.SIGQUIT); err != nil {
- return err
- }
- a.state = stateUninitialized
- a.logfile.Close()
- if err := archiveLogAndDataDir(a.cfg.LogDir, a.dataDir()); err != nil {
- return err
- }
- if err := fileutil.TouchDirAll(a.cfg.LogDir); err != nil {
- return err
- }
- f, err := os.Create(filepath.Join(a.cfg.LogDir, "etcd.log"))
- if err != nil {
- return err
- }
- a.logfile = f
- // https://www.kernel.org/doc/Documentation/sysctl/vm.txt
- // https://github.com/torvalds/linux/blob/master/fs/drop_caches.c
- cmd := exec.Command("/bin/sh", "-c", `echo "echo 1 > /proc/sys/vm/drop_caches" | sudo sh`)
- if err := cmd.Run(); err != nil {
- plog.Infof("error when cleaning page cache (%v)", err)
- }
- return nil
- }
- // terminate stops the exiting etcd process the agent started
- // and removes the data dir.
- func (a *Agent) terminate() error {
- err := a.stopWithSig(syscall.SIGTERM)
- if err != nil {
- return err
- }
- err = os.RemoveAll(a.dataDir())
- if err != nil {
- return err
- }
- a.state = stateTerminated
- return nil
- }
- func (a *Agent) dropPort(port int) error {
- if !a.cfg.UseRoot {
- return nil
- }
- return netutil.DropPort(port)
- }
- func (a *Agent) recoverPort(port int) error {
- if !a.cfg.UseRoot {
- return nil
- }
- return netutil.RecoverPort(port)
- }
- func (a *Agent) setLatency(ms, rv int) error {
- if !a.cfg.UseRoot {
- return nil
- }
- if ms == 0 {
- return netutil.RemoveLatency()
- }
- return netutil.SetLatency(ms, rv)
- }
- func (a *Agent) status() client.Status {
- return client.Status{State: a.state}
- }
- func (a *Agent) dataDir() string {
- return filepath.Join(a.cfg.LogDir, "etcd.data")
- }
- func existDir(fpath string) bool {
- st, err := os.Stat(fpath)
- if err != nil {
- if os.IsNotExist(err) {
- return false
- }
- } else {
- return st.IsDir()
- }
- return false
- }
- func archiveLogAndDataDir(logDir string, datadir string) error {
- dir := filepath.Join(logDir, "failure_archive", time.Now().Format(time.RFC3339))
- if existDir(dir) {
- dir = filepath.Join(logDir, "failure_archive", time.Now().Add(time.Second).Format(time.RFC3339))
- }
- if err := fileutil.TouchDirAll(dir); err != nil {
- return err
- }
- if err := os.Rename(filepath.Join(logDir, "etcd.log"), filepath.Join(dir, "etcd.log")); err != nil {
- if !os.IsNotExist(err) {
- return err
- }
- }
- if err := os.Rename(datadir, filepath.Join(dir, filepath.Base(datadir))); err != nil {
- if !os.IsNotExist(err) {
- return err
- }
- }
- return nil
- }
|