etcd_test.go 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449
  1. /*
  2. Copyright 2014 CoreOS Inc.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package etcd
  14. import (
  15. "fmt"
  16. "math/rand"
  17. "net/http"
  18. "net/http/httptest"
  19. "net/url"
  20. "testing"
  21. "time"
  22. "github.com/coreos/etcd/config"
  23. "github.com/coreos/etcd/store"
  24. )
  25. func TestMultipleNodes(t *testing.T) {
  26. tests := []int{1, 3, 5, 9, 11}
  27. for _, tt := range tests {
  28. es, hs := buildCluster(tt, false)
  29. waitCluster(t, es)
  30. for i := range es {
  31. es[len(es)-i-1].Stop()
  32. }
  33. for i := range hs {
  34. hs[len(hs)-i-1].Close()
  35. }
  36. }
  37. afterTest(t)
  38. }
  39. func TestMultipleTLSNodes(t *testing.T) {
  40. tests := []int{1, 3, 5}
  41. for _, tt := range tests {
  42. es, hs := buildCluster(tt, true)
  43. waitCluster(t, es)
  44. for i := range es {
  45. es[len(es)-i-1].Stop()
  46. }
  47. for i := range hs {
  48. hs[len(hs)-i-1].Close()
  49. }
  50. }
  51. afterTest(t)
  52. }
  53. func TestV2Redirect(t *testing.T) {
  54. es, hs := buildCluster(3, false)
  55. waitCluster(t, es)
  56. u := hs[1].URL
  57. ru := fmt.Sprintf("%s%s", hs[0].URL, "/v2/keys/foo")
  58. tc := NewTestClient()
  59. v := url.Values{}
  60. v.Set("value", "XXX")
  61. resp, _ := tc.PutForm(fmt.Sprintf("%s%s", u, "/v2/keys/foo"), v)
  62. if resp.StatusCode != http.StatusTemporaryRedirect {
  63. t.Errorf("status = %d, want %d", resp.StatusCode, http.StatusTemporaryRedirect)
  64. }
  65. location, err := resp.Location()
  66. if err != nil {
  67. t.Errorf("want err = %, want nil", err)
  68. }
  69. if location.String() != ru {
  70. t.Errorf("location = %v, want %v", location.String(), ru)
  71. }
  72. resp.Body.Close()
  73. for i := range es {
  74. es[len(es)-i-1].Stop()
  75. }
  76. for i := range hs {
  77. hs[len(hs)-i-1].Close()
  78. }
  79. afterTest(t)
  80. }
  81. func TestAdd(t *testing.T) {
  82. tests := []int{3, 4, 5, 6}
  83. for _, tt := range tests {
  84. es := make([]*Server, tt)
  85. hs := make([]*httptest.Server, tt)
  86. for i := 0; i < tt; i++ {
  87. c := config.New()
  88. if i > 0 {
  89. c.Peers = []string{hs[0].URL}
  90. }
  91. es[i], hs[i] = initTestServer(c, int64(i), false)
  92. }
  93. go es[0].Run()
  94. waitMode(participantMode, es[0])
  95. for i := 1; i < tt; i++ {
  96. id := int64(i)
  97. for {
  98. lead := es[0].p.node.Leader()
  99. if lead == -1 {
  100. time.Sleep(defaultElection * es[0].tickDuration)
  101. continue
  102. }
  103. err := es[lead].p.add(id, es[id].raftPubAddr, es[id].pubAddr)
  104. if err == nil {
  105. break
  106. }
  107. switch err {
  108. case tmpErr:
  109. time.Sleep(defaultElection * es[0].tickDuration)
  110. case raftStopErr, stopErr:
  111. t.Fatalf("#%d on %d: unexpected stop", i, lead)
  112. default:
  113. t.Fatal(err)
  114. }
  115. }
  116. go es[i].Run()
  117. waitMode(participantMode, es[i])
  118. for j := 0; j <= i; j++ {
  119. p := fmt.Sprintf("%s/%d", v2machineKVPrefix, id)
  120. w, err := es[j].p.Watch(p, false, false, 1)
  121. if err != nil {
  122. t.Errorf("#%d on %d: %v", i, j, err)
  123. break
  124. }
  125. <-w.EventChan
  126. }
  127. }
  128. for i := range hs {
  129. es[len(hs)-i-1].Stop()
  130. }
  131. for i := range hs {
  132. hs[len(hs)-i-1].Close()
  133. }
  134. }
  135. afterTest(t)
  136. }
  137. func TestRemove(t *testing.T) {
  138. tests := []int{3, 4, 5, 6}
  139. for k, tt := range tests {
  140. es, hs := buildCluster(tt, false)
  141. waitCluster(t, es)
  142. lead, _ := waitLeader(es)
  143. config := config.NewClusterConfig()
  144. config.ActiveSize = 0
  145. if err := es[lead].p.setClusterConfig(config); err != nil {
  146. t.Fatalf("#%d: setClusterConfig err = %v", k, err)
  147. }
  148. // we don't remove the machine from 2-node cluster because it is
  149. // not 100 percent safe in our raft.
  150. // TODO(yichengq): improve it later.
  151. for i := 0; i < tt-2; i++ {
  152. id := int64(i)
  153. send := id
  154. for {
  155. send++
  156. if send > int64(tt-1) {
  157. send = id
  158. }
  159. lead := es[send].p.node.Leader()
  160. if lead == -1 {
  161. time.Sleep(defaultElection * 5 * time.Millisecond)
  162. continue
  163. }
  164. err := es[lead].p.remove(id)
  165. if err == nil {
  166. break
  167. }
  168. switch err {
  169. case tmpErr:
  170. time.Sleep(defaultElection * 5 * time.Millisecond)
  171. case raftStopErr, stopErr:
  172. if lead == id {
  173. break
  174. }
  175. default:
  176. t.Fatal(err)
  177. }
  178. }
  179. waitMode(standbyMode, es[i])
  180. }
  181. for i := range es {
  182. es[len(hs)-i-1].Stop()
  183. }
  184. for i := range hs {
  185. hs[len(hs)-i-1].Close()
  186. }
  187. }
  188. afterTest(t)
  189. // ensure that no goroutines are running
  190. TestGoroutinesRunning(t)
  191. }
  192. func TestBecomeStandby(t *testing.T) {
  193. size := 5
  194. round := 1
  195. for j := 0; j < round; j++ {
  196. es, hs := buildCluster(size, false)
  197. waitCluster(t, es)
  198. lead, _ := waitActiveLeader(es)
  199. i := rand.Intn(size)
  200. // cluster only demotes follower
  201. if int64(i) == lead {
  202. i = (i + 1) % size
  203. }
  204. id := int64(i)
  205. config := config.NewClusterConfig()
  206. config.SyncInterval = 1000
  207. config.ActiveSize = size - 1
  208. if err := es[lead].p.setClusterConfig(config); err != nil {
  209. t.Fatalf("#%d: setClusterConfig err = %v", i, err)
  210. }
  211. for {
  212. err := es[lead].p.remove(id)
  213. if err == nil {
  214. break
  215. }
  216. switch err {
  217. case tmpErr:
  218. time.Sleep(defaultElection * 5 * time.Millisecond)
  219. default:
  220. t.Fatalf("#%d: remove err = %v", i, err)
  221. }
  222. }
  223. waitMode(standbyMode, es[i])
  224. var leader int64
  225. for k := 0; k < 3; k++ {
  226. leader, _ = es[i].s.leaderInfo()
  227. if leader != noneId {
  228. break
  229. }
  230. time.Sleep(50 * time.Millisecond)
  231. }
  232. if g := leader; g != lead {
  233. t.Errorf("#%d: lead = %d, want %d", i, g, lead)
  234. }
  235. for i := range hs {
  236. es[len(hs)-i-1].Stop()
  237. }
  238. for i := range hs {
  239. hs[len(hs)-i-1].Close()
  240. }
  241. }
  242. afterTest(t)
  243. }
  244. // TODO(yichengq): cannot handle previous msgDenial correctly now
  245. func TestModeSwitch(t *testing.T) {
  246. t.Skip("not passed")
  247. size := 5
  248. round := 3
  249. for i := 0; i < size; i++ {
  250. es, hs := buildCluster(size, false)
  251. waitCluster(t, es)
  252. config := config.NewClusterConfig()
  253. config.SyncInterval = 0
  254. id := int64(i)
  255. for j := 0; j < round; j++ {
  256. lead, _ := waitActiveLeader(es)
  257. // cluster only demotes follower
  258. if lead == id {
  259. continue
  260. }
  261. config.ActiveSize = size - 1
  262. if err := es[lead].p.setClusterConfig(config); err != nil {
  263. t.Fatalf("#%d: setClusterConfig err = %v", i, err)
  264. }
  265. if err := es[lead].p.remove(id); err != nil {
  266. t.Fatalf("#%d: remove err = %v", i, err)
  267. }
  268. waitMode(standbyMode, es[i])
  269. for k := 0; k < 4; k++ {
  270. if es[i].s.leader != noneId {
  271. break
  272. }
  273. time.Sleep(20 * time.Millisecond)
  274. }
  275. if g := es[i].s.leader; g != lead {
  276. t.Errorf("#%d: lead = %d, want %d", i, g, lead)
  277. }
  278. config.ActiveSize = size
  279. if err := es[lead].p.setClusterConfig(config); err != nil {
  280. t.Fatalf("#%d: setClusterConfig err = %v", i, err)
  281. }
  282. waitMode(participantMode, es[i])
  283. if err := checkParticipant(i, es); err != nil {
  284. t.Errorf("#%d: check alive err = %v", i, err)
  285. }
  286. }
  287. for i := range hs {
  288. es[len(hs)-i-1].Stop()
  289. }
  290. for i := range hs {
  291. hs[len(hs)-i-1].Close()
  292. }
  293. }
  294. afterTest(t)
  295. }
  296. func buildCluster(number int, tls bool) ([]*Server, []*httptest.Server) {
  297. bootstrapper := 0
  298. es := make([]*Server, number)
  299. hs := make([]*httptest.Server, number)
  300. var seed string
  301. for i := range es {
  302. c := config.New()
  303. if seed != "" {
  304. c.Peers = []string{seed}
  305. }
  306. es[i], hs[i] = initTestServer(c, int64(i), tls)
  307. if i == bootstrapper {
  308. seed = hs[i].URL
  309. } else {
  310. // wait for the previous configuration change to be committed
  311. // or this configuration request might be dropped
  312. w, err := es[0].p.Watch(v2machineKVPrefix, true, false, uint64(i))
  313. if err != nil {
  314. panic(err)
  315. }
  316. <-w.EventChan
  317. }
  318. go es[i].Run()
  319. waitMode(participantMode, es[i])
  320. }
  321. return es, hs
  322. }
  323. func initTestServer(c *config.Config, id int64, tls bool) (e *Server, h *httptest.Server) {
  324. e = New(c, id)
  325. e.SetTick(time.Millisecond * 5)
  326. m := http.NewServeMux()
  327. m.Handle("/", e)
  328. m.Handle("/raft", e.RaftHandler())
  329. m.Handle("/raft/", e.RaftHandler())
  330. if tls {
  331. h = httptest.NewTLSServer(m)
  332. } else {
  333. h = httptest.NewServer(m)
  334. }
  335. e.raftPubAddr = h.URL
  336. e.pubAddr = h.URL
  337. return
  338. }
  339. func waitCluster(t *testing.T, es []*Server) {
  340. n := len(es)
  341. for i, e := range es {
  342. var index uint64
  343. for k := 0; k < n; k++ {
  344. index++
  345. w, err := e.p.Watch(v2machineKVPrefix, true, false, index)
  346. if err != nil {
  347. panic(err)
  348. }
  349. v := <-w.EventChan
  350. // join command may appear several times due to retry
  351. // when timeout
  352. if k > 0 {
  353. pw := fmt.Sprintf("%s/%d", v2machineKVPrefix, k-1)
  354. if v.Node.Key == pw {
  355. continue
  356. }
  357. }
  358. ww := fmt.Sprintf("%s/%d", v2machineKVPrefix, k)
  359. if v.Node.Key != ww {
  360. t.Errorf("#%d path = %v, want %v", i, v.Node.Key, ww)
  361. }
  362. }
  363. }
  364. }
  365. func waitMode(mode int64, e *Server) {
  366. for {
  367. if e.mode.Get() == mode {
  368. return
  369. }
  370. time.Sleep(10 * time.Millisecond)
  371. }
  372. }
  373. // checkParticipant checks the i-th server works well as participant.
  374. func checkParticipant(i int, es []*Server) error {
  375. lead, _ := waitActiveLeader(es)
  376. key := fmt.Sprintf("/%d", rand.Int31())
  377. ev, err := es[lead].p.Set(key, false, "bar", store.Permanent)
  378. if err != nil {
  379. return err
  380. }
  381. w, err := es[i].p.Watch(key, false, false, ev.Index())
  382. if err != nil {
  383. return err
  384. }
  385. select {
  386. case <-w.EventChan:
  387. case <-time.After(8 * defaultHeartbeat * es[i].tickDuration):
  388. return fmt.Errorf("watch timeout")
  389. }
  390. return nil
  391. }