handler.go 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783
  1. // Copyright 2018 The etcd Authors
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package agent
  15. import (
  16. "errors"
  17. "fmt"
  18. "io/ioutil"
  19. "net/url"
  20. "os"
  21. "os/exec"
  22. "path/filepath"
  23. "syscall"
  24. "time"
  25. "github.com/coreos/etcd/embed"
  26. "github.com/coreos/etcd/functional/rpcpb"
  27. "github.com/coreos/etcd/pkg/fileutil"
  28. "github.com/coreos/etcd/pkg/proxy"
  29. "go.uber.org/zap"
  30. )
  31. // return error for system errors (e.g. fail to create files)
  32. // return status error in response for wrong configuration/operation (e.g. start etcd twice)
  33. func (srv *Server) handleTesterRequest(req *rpcpb.Request) (resp *rpcpb.Response, err error) {
  34. defer func() {
  35. if err == nil && req != nil {
  36. srv.last = req.Operation
  37. srv.lg.Info("handler success", zap.String("operation", req.Operation.String()))
  38. }
  39. }()
  40. if req != nil {
  41. srv.Member = req.Member
  42. srv.Tester = req.Tester
  43. }
  44. switch req.Operation {
  45. case rpcpb.Operation_INITIAL_START_ETCD:
  46. return srv.handle_INITIAL_START_ETCD(req)
  47. case rpcpb.Operation_RESTART_ETCD:
  48. return srv.handle_RESTART_ETCD()
  49. case rpcpb.Operation_SIGTERM_ETCD:
  50. return srv.handle_SIGTERM_ETCD()
  51. case rpcpb.Operation_SIGQUIT_ETCD_AND_REMOVE_DATA:
  52. return srv.handle_SIGQUIT_ETCD_AND_REMOVE_DATA()
  53. case rpcpb.Operation_SAVE_SNAPSHOT:
  54. return srv.handle_SAVE_SNAPSHOT()
  55. case rpcpb.Operation_RESTORE_RESTART_FROM_SNAPSHOT:
  56. return srv.handle_RESTORE_RESTART_FROM_SNAPSHOT()
  57. case rpcpb.Operation_RESTART_FROM_SNAPSHOT:
  58. return srv.handle_RESTART_FROM_SNAPSHOT()
  59. case rpcpb.Operation_SIGQUIT_ETCD_AND_ARCHIVE_DATA:
  60. return srv.handle_SIGQUIT_ETCD_AND_ARCHIVE_DATA()
  61. case rpcpb.Operation_SIGQUIT_ETCD_AND_REMOVE_DATA_AND_STOP_AGENT:
  62. return srv.handle_SIGQUIT_ETCD_AND_REMOVE_DATA_AND_STOP_AGENT()
  63. case rpcpb.Operation_BLACKHOLE_PEER_PORT_TX_RX:
  64. return srv.handle_BLACKHOLE_PEER_PORT_TX_RX()
  65. case rpcpb.Operation_UNBLACKHOLE_PEER_PORT_TX_RX:
  66. return srv.handle_UNBLACKHOLE_PEER_PORT_TX_RX()
  67. case rpcpb.Operation_DELAY_PEER_PORT_TX_RX:
  68. return srv.handle_DELAY_PEER_PORT_TX_RX()
  69. case rpcpb.Operation_UNDELAY_PEER_PORT_TX_RX:
  70. return srv.handle_UNDELAY_PEER_PORT_TX_RX()
  71. default:
  72. msg := fmt.Sprintf("operation not found (%v)", req.Operation)
  73. return &rpcpb.Response{Success: false, Status: msg}, errors.New(msg)
  74. }
  75. }
  76. func (srv *Server) createEtcdLogFile() error {
  77. var err error
  78. srv.etcdLogFile, err = os.Create(srv.Member.Etcd.LogOutput)
  79. if err != nil {
  80. return err
  81. }
  82. srv.lg.Info("created etcd log file", zap.String("path", srv.Member.Etcd.LogOutput))
  83. return nil
  84. }
  85. func (srv *Server) creatEtcd(fromSnapshot bool) error {
  86. if !fileutil.Exist(srv.Member.EtcdExec) && srv.Member.EtcdExec != "embed" {
  87. return fmt.Errorf("unknown etcd exec %q or path does not exist", srv.Member.EtcdExec)
  88. }
  89. if srv.Member.EtcdExec != "embed" {
  90. etcdPath, etcdFlags := srv.Member.EtcdExec, srv.Member.Etcd.Flags()
  91. if fromSnapshot {
  92. etcdFlags = srv.Member.EtcdOnSnapshotRestore.Flags()
  93. }
  94. u, _ := url.Parse(srv.Member.FailpointHTTPAddr)
  95. srv.lg.Info(
  96. "creating etcd command",
  97. zap.String("etcd-exec", etcdPath),
  98. zap.Strings("etcd-flags", etcdFlags),
  99. zap.String("failpoint-http-addr", srv.Member.FailpointHTTPAddr),
  100. zap.String("failpoint-addr", u.Host),
  101. )
  102. srv.etcdCmd = exec.Command(etcdPath, etcdFlags...)
  103. srv.etcdCmd.Env = []string{"GOFAIL_HTTP=" + u.Host}
  104. srv.etcdCmd.Stdout = srv.etcdLogFile
  105. srv.etcdCmd.Stderr = srv.etcdLogFile
  106. return nil
  107. }
  108. cfg, err := srv.Member.Etcd.EmbedConfig()
  109. if err != nil {
  110. return err
  111. }
  112. srv.lg.Info("starting embedded etcd", zap.String("name", cfg.Name))
  113. srv.etcdServer, err = embed.StartEtcd(cfg)
  114. if err != nil {
  115. return err
  116. }
  117. srv.lg.Info("started embedded etcd", zap.String("name", cfg.Name))
  118. return nil
  119. }
  120. // start but do not wait for it to complete
  121. func (srv *Server) runEtcd() error {
  122. errc := make(chan error)
  123. go func() {
  124. time.Sleep(5 * time.Second)
  125. // server advertise client/peer listener had to start first
  126. // before setting up proxy listener
  127. errc <- srv.startProxy()
  128. }()
  129. if srv.etcdCmd != nil {
  130. srv.lg.Info(
  131. "starting etcd command",
  132. zap.String("command-path", srv.etcdCmd.Path),
  133. )
  134. err := srv.etcdCmd.Start()
  135. perr := <-errc
  136. srv.lg.Info(
  137. "started etcd command",
  138. zap.String("command-path", srv.etcdCmd.Path),
  139. zap.Errors("errors", []error{err, perr}),
  140. )
  141. if err != nil {
  142. return err
  143. }
  144. return perr
  145. }
  146. select {
  147. case <-srv.etcdServer.Server.ReadyNotify():
  148. srv.lg.Info("embedded etcd is ready")
  149. case <-time.After(time.Minute):
  150. srv.etcdServer.Close()
  151. return fmt.Errorf("took too long to start %v", <-srv.etcdServer.Err())
  152. }
  153. return <-errc
  154. }
  155. // SIGQUIT to exit with stackstrace
  156. func (srv *Server) stopEtcd(sig os.Signal) error {
  157. srv.stopProxy()
  158. if srv.etcdCmd != nil {
  159. srv.lg.Info(
  160. "stopping etcd command",
  161. zap.String("command-path", srv.etcdCmd.Path),
  162. zap.String("signal", sig.String()),
  163. )
  164. err := srv.etcdCmd.Process.Signal(sig)
  165. if err != nil {
  166. return err
  167. }
  168. errc := make(chan error)
  169. go func() {
  170. _, ew := srv.etcdCmd.Process.Wait()
  171. errc <- ew
  172. close(errc)
  173. }()
  174. select {
  175. case <-time.After(5 * time.Second):
  176. srv.etcdCmd.Process.Kill()
  177. case e := <-errc:
  178. return e
  179. }
  180. err = <-errc
  181. srv.lg.Info(
  182. "stopped etcd command",
  183. zap.String("command-path", srv.etcdCmd.Path),
  184. zap.String("signal", sig.String()),
  185. zap.Error(err),
  186. )
  187. return err
  188. }
  189. srv.lg.Info("stopping embedded etcd")
  190. srv.etcdServer.Server.HardStop()
  191. srv.etcdServer.Close()
  192. srv.lg.Info("stopped embedded etcd")
  193. return nil
  194. }
  195. func (srv *Server) startProxy() error {
  196. if srv.Member.EtcdClientProxy {
  197. advertiseClientURL, advertiseClientURLPort, err := getURLAndPort(srv.Member.Etcd.AdvertiseClientURLs[0])
  198. if err != nil {
  199. return err
  200. }
  201. listenClientURL, _, err := getURLAndPort(srv.Member.Etcd.ListenClientURLs[0])
  202. if err != nil {
  203. return err
  204. }
  205. srv.lg.Info("starting proxy on client traffic", zap.String("url", advertiseClientURL.String()))
  206. srv.advertiseClientPortToProxy[advertiseClientURLPort] = proxy.NewServer(proxy.ServerConfig{
  207. Logger: srv.lg,
  208. From: *advertiseClientURL,
  209. To: *listenClientURL,
  210. })
  211. select {
  212. case err = <-srv.advertiseClientPortToProxy[advertiseClientURLPort].Error():
  213. return err
  214. case <-time.After(2 * time.Second):
  215. srv.lg.Info("started proxy on client traffic", zap.String("url", advertiseClientURL.String()))
  216. }
  217. }
  218. if srv.Member.EtcdPeerProxy {
  219. advertisePeerURL, advertisePeerURLPort, err := getURLAndPort(srv.Member.Etcd.AdvertisePeerURLs[0])
  220. if err != nil {
  221. return err
  222. }
  223. listenPeerURL, _, err := getURLAndPort(srv.Member.Etcd.ListenPeerURLs[0])
  224. if err != nil {
  225. return err
  226. }
  227. srv.lg.Info("starting proxy on peer traffic", zap.String("url", advertisePeerURL.String()))
  228. srv.advertisePeerPortToProxy[advertisePeerURLPort] = proxy.NewServer(proxy.ServerConfig{
  229. Logger: srv.lg,
  230. From: *advertisePeerURL,
  231. To: *listenPeerURL,
  232. })
  233. select {
  234. case err = <-srv.advertisePeerPortToProxy[advertisePeerURLPort].Error():
  235. return err
  236. case <-time.After(2 * time.Second):
  237. srv.lg.Info("started proxy on peer traffic", zap.String("url", advertisePeerURL.String()))
  238. }
  239. }
  240. return nil
  241. }
  242. func (srv *Server) stopProxy() {
  243. if srv.Member.EtcdClientProxy && len(srv.advertiseClientPortToProxy) > 0 {
  244. for port, px := range srv.advertiseClientPortToProxy {
  245. if err := px.Close(); err != nil {
  246. srv.lg.Warn("failed to close proxy", zap.Int("port", port))
  247. continue
  248. }
  249. select {
  250. case <-px.Done():
  251. // enough time to release port
  252. time.Sleep(time.Second)
  253. case <-time.After(time.Second):
  254. }
  255. srv.lg.Info("closed proxy",
  256. zap.Int("port", port),
  257. zap.String("from", px.From()),
  258. zap.String("to", px.To()),
  259. )
  260. }
  261. srv.advertiseClientPortToProxy = make(map[int]proxy.Server)
  262. }
  263. if srv.Member.EtcdPeerProxy && len(srv.advertisePeerPortToProxy) > 0 {
  264. for port, px := range srv.advertisePeerPortToProxy {
  265. if err := px.Close(); err != nil {
  266. srv.lg.Warn("failed to close proxy", zap.Int("port", port))
  267. continue
  268. }
  269. select {
  270. case <-px.Done():
  271. // enough time to release port
  272. time.Sleep(time.Second)
  273. case <-time.After(time.Second):
  274. }
  275. srv.lg.Info("closed proxy",
  276. zap.Int("port", port),
  277. zap.String("from", px.From()),
  278. zap.String("to", px.To()),
  279. )
  280. }
  281. srv.advertisePeerPortToProxy = make(map[int]proxy.Server)
  282. }
  283. }
  284. // if started with manual TLS, stores TLS assets
  285. // from tester/client to disk before starting etcd process
  286. func (srv *Server) saveTLSAssets() error {
  287. if srv.Member.PeerCertPath != "" {
  288. if srv.Member.PeerCertData == "" {
  289. return fmt.Errorf("got empty data for %q", srv.Member.PeerCertPath)
  290. }
  291. if err := ioutil.WriteFile(srv.Member.PeerCertPath, []byte(srv.Member.PeerCertData), 0644); err != nil {
  292. return err
  293. }
  294. }
  295. if srv.Member.PeerKeyPath != "" {
  296. if srv.Member.PeerKeyData == "" {
  297. return fmt.Errorf("got empty data for %q", srv.Member.PeerKeyPath)
  298. }
  299. if err := ioutil.WriteFile(srv.Member.PeerKeyPath, []byte(srv.Member.PeerKeyData), 0644); err != nil {
  300. return err
  301. }
  302. }
  303. if srv.Member.PeerTrustedCAPath != "" {
  304. if srv.Member.PeerTrustedCAData == "" {
  305. return fmt.Errorf("got empty data for %q", srv.Member.PeerTrustedCAPath)
  306. }
  307. if err := ioutil.WriteFile(srv.Member.PeerTrustedCAPath, []byte(srv.Member.PeerTrustedCAData), 0644); err != nil {
  308. return err
  309. }
  310. }
  311. if srv.Member.PeerCertPath != "" &&
  312. srv.Member.PeerKeyPath != "" &&
  313. srv.Member.PeerTrustedCAPath != "" {
  314. srv.lg.Info(
  315. "wrote",
  316. zap.String("peer-cert", srv.Member.PeerCertPath),
  317. zap.String("peer-key", srv.Member.PeerKeyPath),
  318. zap.String("peer-trusted-ca", srv.Member.PeerTrustedCAPath),
  319. )
  320. }
  321. if srv.Member.ClientCertPath != "" {
  322. if srv.Member.ClientCertData == "" {
  323. return fmt.Errorf("got empty data for %q", srv.Member.ClientCertPath)
  324. }
  325. if err := ioutil.WriteFile(srv.Member.ClientCertPath, []byte(srv.Member.ClientCertData), 0644); err != nil {
  326. return err
  327. }
  328. }
  329. if srv.Member.ClientKeyPath != "" {
  330. if srv.Member.ClientKeyData == "" {
  331. return fmt.Errorf("got empty data for %q", srv.Member.ClientKeyPath)
  332. }
  333. if err := ioutil.WriteFile(srv.Member.ClientKeyPath, []byte(srv.Member.ClientKeyData), 0644); err != nil {
  334. return err
  335. }
  336. }
  337. if srv.Member.ClientTrustedCAPath != "" {
  338. if srv.Member.ClientTrustedCAData == "" {
  339. return fmt.Errorf("got empty data for %q", srv.Member.ClientTrustedCAPath)
  340. }
  341. if err := ioutil.WriteFile(srv.Member.ClientTrustedCAPath, []byte(srv.Member.ClientTrustedCAData), 0644); err != nil {
  342. return err
  343. }
  344. }
  345. if srv.Member.ClientCertPath != "" &&
  346. srv.Member.ClientKeyPath != "" &&
  347. srv.Member.ClientTrustedCAPath != "" {
  348. srv.lg.Info(
  349. "wrote",
  350. zap.String("client-cert", srv.Member.ClientCertPath),
  351. zap.String("client-key", srv.Member.ClientKeyPath),
  352. zap.String("client-trusted-ca", srv.Member.ClientTrustedCAPath),
  353. )
  354. }
  355. return nil
  356. }
  357. func (srv *Server) loadAutoTLSAssets() error {
  358. if srv.Member.Etcd.PeerAutoTLS {
  359. // in case of slow disk
  360. time.Sleep(time.Second)
  361. fdir := filepath.Join(srv.Member.Etcd.DataDir, "fixtures", "peer")
  362. srv.lg.Info(
  363. "loading client auto TLS assets",
  364. zap.String("dir", fdir),
  365. zap.String("endpoint", srv.EtcdClientEndpoint),
  366. )
  367. certPath := filepath.Join(fdir, "cert.pem")
  368. if !fileutil.Exist(certPath) {
  369. return fmt.Errorf("cannot find %q", certPath)
  370. }
  371. certData, err := ioutil.ReadFile(certPath)
  372. if err != nil {
  373. return fmt.Errorf("cannot read %q (%v)", certPath, err)
  374. }
  375. srv.Member.PeerCertData = string(certData)
  376. keyPath := filepath.Join(fdir, "key.pem")
  377. if !fileutil.Exist(keyPath) {
  378. return fmt.Errorf("cannot find %q", keyPath)
  379. }
  380. keyData, err := ioutil.ReadFile(keyPath)
  381. if err != nil {
  382. return fmt.Errorf("cannot read %q (%v)", keyPath, err)
  383. }
  384. srv.Member.PeerKeyData = string(keyData)
  385. srv.lg.Info(
  386. "loaded peer auto TLS assets",
  387. zap.String("peer-cert-path", certPath),
  388. zap.Int("peer-cert-length", len(certData)),
  389. zap.String("peer-key-path", keyPath),
  390. zap.Int("peer-key-length", len(keyData)),
  391. )
  392. }
  393. if srv.Member.Etcd.ClientAutoTLS {
  394. // in case of slow disk
  395. time.Sleep(time.Second)
  396. fdir := filepath.Join(srv.Member.Etcd.DataDir, "fixtures", "client")
  397. srv.lg.Info(
  398. "loading client TLS assets",
  399. zap.String("dir", fdir),
  400. zap.String("endpoint", srv.EtcdClientEndpoint),
  401. )
  402. certPath := filepath.Join(fdir, "cert.pem")
  403. if !fileutil.Exist(certPath) {
  404. return fmt.Errorf("cannot find %q", certPath)
  405. }
  406. certData, err := ioutil.ReadFile(certPath)
  407. if err != nil {
  408. return fmt.Errorf("cannot read %q (%v)", certPath, err)
  409. }
  410. srv.Member.ClientCertData = string(certData)
  411. keyPath := filepath.Join(fdir, "key.pem")
  412. if !fileutil.Exist(keyPath) {
  413. return fmt.Errorf("cannot find %q", keyPath)
  414. }
  415. keyData, err := ioutil.ReadFile(keyPath)
  416. if err != nil {
  417. return fmt.Errorf("cannot read %q (%v)", keyPath, err)
  418. }
  419. srv.Member.ClientKeyData = string(keyData)
  420. srv.lg.Info(
  421. "loaded client TLS assets",
  422. zap.String("peer-cert-path", certPath),
  423. zap.Int("peer-cert-length", len(certData)),
  424. zap.String("peer-key-path", keyPath),
  425. zap.Int("peer-key-length", len(keyData)),
  426. )
  427. }
  428. return nil
  429. }
  430. func (srv *Server) handle_INITIAL_START_ETCD(req *rpcpb.Request) (*rpcpb.Response, error) {
  431. if srv.last != rpcpb.Operation_NOT_STARTED {
  432. return &rpcpb.Response{
  433. Success: false,
  434. Status: fmt.Sprintf("%q is not valid; last server operation was %q", rpcpb.Operation_INITIAL_START_ETCD.String(), srv.last.String()),
  435. Member: req.Member,
  436. }, nil
  437. }
  438. err := fileutil.TouchDirAll(srv.Member.BaseDir)
  439. if err != nil {
  440. return nil, err
  441. }
  442. srv.lg.Info("created base directory", zap.String("path", srv.Member.BaseDir))
  443. if srv.etcdServer == nil {
  444. if err = srv.createEtcdLogFile(); err != nil {
  445. return nil, err
  446. }
  447. }
  448. if err = srv.saveTLSAssets(); err != nil {
  449. return nil, err
  450. }
  451. if err = srv.creatEtcd(false); err != nil {
  452. return nil, err
  453. }
  454. if err = srv.runEtcd(); err != nil {
  455. return nil, err
  456. }
  457. if err = srv.loadAutoTLSAssets(); err != nil {
  458. return nil, err
  459. }
  460. return &rpcpb.Response{
  461. Success: true,
  462. Status: "start etcd PASS",
  463. Member: srv.Member,
  464. }, nil
  465. }
  466. func (srv *Server) handle_RESTART_ETCD() (*rpcpb.Response, error) {
  467. var err error
  468. if !fileutil.Exist(srv.Member.BaseDir) {
  469. err = fileutil.TouchDirAll(srv.Member.BaseDir)
  470. if err != nil {
  471. return nil, err
  472. }
  473. }
  474. if err = srv.saveTLSAssets(); err != nil {
  475. return nil, err
  476. }
  477. if err = srv.creatEtcd(false); err != nil {
  478. return nil, err
  479. }
  480. if err = srv.runEtcd(); err != nil {
  481. return nil, err
  482. }
  483. if err = srv.loadAutoTLSAssets(); err != nil {
  484. return nil, err
  485. }
  486. return &rpcpb.Response{
  487. Success: true,
  488. Status: "restart etcd PASS",
  489. Member: srv.Member,
  490. }, nil
  491. }
  492. func (srv *Server) handle_SIGTERM_ETCD() (*rpcpb.Response, error) {
  493. if err := srv.stopEtcd(syscall.SIGTERM); err != nil {
  494. return nil, err
  495. }
  496. if srv.etcdServer != nil {
  497. srv.etcdServer.GetLogger().Sync()
  498. } else {
  499. srv.etcdLogFile.Sync()
  500. }
  501. return &rpcpb.Response{
  502. Success: true,
  503. Status: "killed etcd",
  504. }, nil
  505. }
  506. func (srv *Server) handle_SIGQUIT_ETCD_AND_REMOVE_DATA() (*rpcpb.Response, error) {
  507. err := srv.stopEtcd(syscall.SIGQUIT)
  508. if err != nil {
  509. return nil, err
  510. }
  511. if srv.etcdServer != nil {
  512. srv.etcdServer.GetLogger().Sync()
  513. } else {
  514. srv.etcdLogFile.Sync()
  515. srv.etcdLogFile.Close()
  516. }
  517. // for debugging purposes, rename instead of removing
  518. if err = os.RemoveAll(srv.Member.BaseDir + ".backup"); err != nil {
  519. return nil, err
  520. }
  521. if err = os.Rename(srv.Member.BaseDir, srv.Member.BaseDir+".backup"); err != nil {
  522. return nil, err
  523. }
  524. srv.lg.Info(
  525. "renamed",
  526. zap.String("base-dir", srv.Member.BaseDir),
  527. zap.String("new-dir", srv.Member.BaseDir+".backup"),
  528. )
  529. // create a new log file for next new member restart
  530. if !fileutil.Exist(srv.Member.BaseDir) {
  531. err = fileutil.TouchDirAll(srv.Member.BaseDir)
  532. if err != nil {
  533. return nil, err
  534. }
  535. }
  536. return &rpcpb.Response{
  537. Success: true,
  538. Status: "killed etcd and removed base directory",
  539. }, nil
  540. }
  541. func (srv *Server) handle_SAVE_SNAPSHOT() (*rpcpb.Response, error) {
  542. err := srv.Member.SaveSnapshot(srv.lg)
  543. if err != nil {
  544. return nil, err
  545. }
  546. return &rpcpb.Response{
  547. Success: true,
  548. Status: "saved snapshot",
  549. SnapshotInfo: srv.Member.SnapshotInfo,
  550. }, nil
  551. }
  552. func (srv *Server) handle_RESTORE_RESTART_FROM_SNAPSHOT() (resp *rpcpb.Response, err error) {
  553. err = srv.Member.RestoreSnapshot(srv.lg)
  554. if err != nil {
  555. return nil, err
  556. }
  557. resp, err = srv.handle_RESTART_FROM_SNAPSHOT()
  558. if resp != nil && err == nil {
  559. resp.Status = "restored snapshot and " + resp.Status
  560. }
  561. return resp, err
  562. }
  563. func (srv *Server) handle_RESTART_FROM_SNAPSHOT() (resp *rpcpb.Response, err error) {
  564. if err = srv.saveTLSAssets(); err != nil {
  565. return nil, err
  566. }
  567. if err = srv.creatEtcd(true); err != nil {
  568. return nil, err
  569. }
  570. if err = srv.runEtcd(); err != nil {
  571. return nil, err
  572. }
  573. if err = srv.loadAutoTLSAssets(); err != nil {
  574. return nil, err
  575. }
  576. return &rpcpb.Response{
  577. Success: true,
  578. Status: "restarted etcd from snapshot",
  579. SnapshotInfo: srv.Member.SnapshotInfo,
  580. }, nil
  581. }
  582. func (srv *Server) handle_SIGQUIT_ETCD_AND_ARCHIVE_DATA() (*rpcpb.Response, error) {
  583. err := srv.stopEtcd(syscall.SIGQUIT)
  584. if err != nil {
  585. return nil, err
  586. }
  587. if srv.etcdServer != nil {
  588. srv.etcdServer.GetLogger().Sync()
  589. } else {
  590. srv.etcdLogFile.Sync()
  591. srv.etcdLogFile.Close()
  592. }
  593. // TODO: support separate WAL directory
  594. if err = archive(
  595. srv.Member.BaseDir,
  596. srv.Member.Etcd.LogOutput,
  597. srv.Member.Etcd.DataDir,
  598. ); err != nil {
  599. return nil, err
  600. }
  601. srv.lg.Info("archived data", zap.String("base-dir", srv.Member.BaseDir))
  602. if srv.etcdServer == nil {
  603. if err = srv.createEtcdLogFile(); err != nil {
  604. return nil, err
  605. }
  606. }
  607. srv.lg.Info("cleaning up page cache")
  608. if err := cleanPageCache(); err != nil {
  609. srv.lg.Warn("failed to clean up page cache", zap.String("error", err.Error()))
  610. }
  611. srv.lg.Info("cleaned up page cache")
  612. return &rpcpb.Response{
  613. Success: true,
  614. Status: "cleaned up etcd",
  615. }, nil
  616. }
  617. // stop proxy, etcd, delete data directory
  618. func (srv *Server) handle_SIGQUIT_ETCD_AND_REMOVE_DATA_AND_STOP_AGENT() (*rpcpb.Response, error) {
  619. err := srv.stopEtcd(syscall.SIGQUIT)
  620. if err != nil {
  621. return nil, err
  622. }
  623. if srv.etcdServer != nil {
  624. srv.etcdServer.GetLogger().Sync()
  625. } else {
  626. srv.etcdLogFile.Sync()
  627. srv.etcdLogFile.Close()
  628. }
  629. err = os.RemoveAll(srv.Member.BaseDir)
  630. if err != nil {
  631. return nil, err
  632. }
  633. srv.lg.Info("removed base directory", zap.String("dir", srv.Member.BaseDir))
  634. // stop agent server
  635. srv.Stop()
  636. return &rpcpb.Response{
  637. Success: true,
  638. Status: "destroyed etcd and agent",
  639. }, nil
  640. }
  641. func (srv *Server) handle_BLACKHOLE_PEER_PORT_TX_RX() (*rpcpb.Response, error) {
  642. for port, px := range srv.advertisePeerPortToProxy {
  643. srv.lg.Info("blackholing", zap.Int("peer-port", port))
  644. px.BlackholeTx()
  645. px.BlackholeRx()
  646. srv.lg.Info("blackholed", zap.Int("peer-port", port))
  647. }
  648. return &rpcpb.Response{
  649. Success: true,
  650. Status: "blackholed peer port tx/rx",
  651. }, nil
  652. }
  653. func (srv *Server) handle_UNBLACKHOLE_PEER_PORT_TX_RX() (*rpcpb.Response, error) {
  654. for port, px := range srv.advertisePeerPortToProxy {
  655. srv.lg.Info("unblackholing", zap.Int("peer-port", port))
  656. px.UnblackholeTx()
  657. px.UnblackholeRx()
  658. srv.lg.Info("unblackholed", zap.Int("peer-port", port))
  659. }
  660. return &rpcpb.Response{
  661. Success: true,
  662. Status: "unblackholed peer port tx/rx",
  663. }, nil
  664. }
  665. func (srv *Server) handle_DELAY_PEER_PORT_TX_RX() (*rpcpb.Response, error) {
  666. lat := time.Duration(srv.Tester.UpdatedDelayLatencyMs) * time.Millisecond
  667. rv := time.Duration(srv.Tester.DelayLatencyMsRv) * time.Millisecond
  668. for port, px := range srv.advertisePeerPortToProxy {
  669. srv.lg.Info("delaying",
  670. zap.Int("peer-port", port),
  671. zap.Duration("latency", lat),
  672. zap.Duration("random-variable", rv),
  673. )
  674. px.DelayTx(lat, rv)
  675. px.DelayRx(lat, rv)
  676. srv.lg.Info("delayed",
  677. zap.Int("peer-port", port),
  678. zap.Duration("latency", lat),
  679. zap.Duration("random-variable", rv),
  680. )
  681. }
  682. return &rpcpb.Response{
  683. Success: true,
  684. Status: "delayed peer port tx/rx",
  685. }, nil
  686. }
  687. func (srv *Server) handle_UNDELAY_PEER_PORT_TX_RX() (*rpcpb.Response, error) {
  688. for port, px := range srv.advertisePeerPortToProxy {
  689. srv.lg.Info("undelaying", zap.Int("peer-port", port))
  690. px.UndelayTx()
  691. px.UndelayRx()
  692. srv.lg.Info("undelayed", zap.Int("peer-port", port))
  693. }
  694. return &rpcpb.Response{
  695. Success: true,
  696. Status: "undelayed peer port tx/rx",
  697. }, nil
  698. }