failpoint.go 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160
  1. // Copyright 2016 The etcd Authors
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package main
  15. import (
  16. "fmt"
  17. "io/ioutil"
  18. "net/http"
  19. "strings"
  20. "sync"
  21. "time"
  22. )
  23. type failpointStats struct {
  24. // crashes counts the number of crashes for a failpoint
  25. crashes map[string]int
  26. // mu protects crashes
  27. mu sync.Mutex
  28. }
  29. var fpStats failpointStats
  30. func failpointFailures(c *cluster, failpoints []string) (ret []failure, err error) {
  31. var fps []string
  32. fps, err = failpointPaths(c.Members[0].FailpointURL)
  33. if err != nil {
  34. return nil, err
  35. }
  36. // create failure objects for all failpoints
  37. for _, fp := range fps {
  38. if len(fp) == 0 {
  39. continue
  40. }
  41. fpFails := failuresFromFailpoint(fp, failpoints)
  42. // wrap in delays so failpoint has time to trigger
  43. for i, fpf := range fpFails {
  44. if strings.Contains(fp, "Snap") {
  45. // hack to trigger snapshot failpoints
  46. fpFails[i] = &failureUntilSnapshot{fpf}
  47. } else {
  48. fpFails[i] = &failureDelay{fpf, 3 * time.Second}
  49. }
  50. }
  51. ret = append(ret, fpFails...)
  52. }
  53. fpStats.crashes = make(map[string]int)
  54. return ret, err
  55. }
  56. func failpointPaths(endpoint string) ([]string, error) {
  57. resp, err := http.Get(endpoint)
  58. if err != nil {
  59. return nil, err
  60. }
  61. defer resp.Body.Close()
  62. body, rerr := ioutil.ReadAll(resp.Body)
  63. if rerr != nil {
  64. return nil, rerr
  65. }
  66. var fps []string
  67. for _, l := range strings.Split(string(body), "\n") {
  68. fp := strings.Split(l, "=")[0]
  69. fps = append(fps, fp)
  70. }
  71. return fps, nil
  72. }
  73. // failpoints follows FreeBSD KFAIL_POINT syntax.
  74. // e.g. panic("etcd-tester"),1*sleep(1000)->panic("etcd-tester")
  75. func failuresFromFailpoint(fp string, failpoints []string) (fs []failure) {
  76. recov := makeRecoverFailpoint(fp)
  77. for _, failpoint := range failpoints {
  78. inject := makeInjectFailpoint(fp, failpoint)
  79. fs = append(fs, []failure{
  80. &failureOne{
  81. description: description(fmt.Sprintf("failpoint %s (one: %s)", fp, failpoint)),
  82. injectMember: inject,
  83. recoverMember: recov,
  84. },
  85. &failureAll{
  86. description: description(fmt.Sprintf("failpoint %s (all: %s)", fp, failpoint)),
  87. injectMember: inject,
  88. recoverMember: recov,
  89. },
  90. &failureMajority{
  91. description: description(fmt.Sprintf("failpoint %s (majority: %s)", fp, failpoint)),
  92. injectMember: inject,
  93. recoverMember: recov,
  94. },
  95. &failureLeader{
  96. failureByFunc{
  97. description: description(fmt.Sprintf("failpoint %s (leader: %s)", fp, failpoint)),
  98. injectMember: inject,
  99. recoverMember: recov,
  100. },
  101. 0,
  102. },
  103. }...)
  104. }
  105. return fs
  106. }
  107. func makeInjectFailpoint(fp, val string) injectMemberFunc {
  108. return func(m *member) (err error) {
  109. return putFailpoint(m.FailpointURL, fp, val)
  110. }
  111. }
  112. func makeRecoverFailpoint(fp string) recoverMemberFunc {
  113. return func(m *member) error {
  114. if err := delFailpoint(m.FailpointURL, fp); err == nil {
  115. return nil
  116. }
  117. // node not responding, likely dead from fp panic; restart
  118. fpStats.mu.Lock()
  119. fpStats.crashes[fp]++
  120. fpStats.mu.Unlock()
  121. return recoverStop(m)
  122. }
  123. }
  124. func putFailpoint(ep, fp, val string) error {
  125. req, _ := http.NewRequest(http.MethodPut, ep+"/"+fp, strings.NewReader(val))
  126. c := http.Client{}
  127. resp, err := c.Do(req)
  128. if err != nil {
  129. return err
  130. }
  131. resp.Body.Close()
  132. if resp.StatusCode/100 != 2 {
  133. return fmt.Errorf("failed to PUT %s=%s at %s (%v)", fp, val, ep, resp.Status)
  134. }
  135. return nil
  136. }
  137. func delFailpoint(ep, fp string) error {
  138. req, _ := http.NewRequest(http.MethodDelete, ep+"/"+fp, strings.NewReader(""))
  139. c := http.Client{}
  140. resp, err := c.Do(req)
  141. if err != nil {
  142. return err
  143. }
  144. resp.Body.Close()
  145. if resp.StatusCode/100 != 2 {
  146. return fmt.Errorf("failed to DELETE %s at %s (%v)", fp, ep, resp.Status)
  147. }
  148. return nil
  149. }