etcd_test.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603
  1. /*
  2. Copyright 2014 CoreOS Inc.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package etcd
  14. import (
  15. "fmt"
  16. "io/ioutil"
  17. "math/rand"
  18. "net"
  19. "net/http"
  20. "net/http/httptest"
  21. "net/url"
  22. "os"
  23. "reflect"
  24. "strings"
  25. "testing"
  26. "time"
  27. "github.com/coreos/etcd/config"
  28. "github.com/coreos/etcd/store"
  29. )
  30. func TestMultipleNodes(t *testing.T) {
  31. tests := []int{1, 3, 5, 9, 11}
  32. for _, tt := range tests {
  33. es, hs := buildCluster(tt, false)
  34. waitCluster(t, es)
  35. destoryCluster(t, es, hs)
  36. }
  37. afterTest(t)
  38. }
  39. func TestMultipleTLSNodes(t *testing.T) {
  40. tests := []int{1, 3, 5}
  41. for _, tt := range tests {
  42. es, hs := buildCluster(tt, true)
  43. waitCluster(t, es)
  44. destoryCluster(t, es, hs)
  45. }
  46. afterTest(t)
  47. }
  48. func TestV2Redirect(t *testing.T) {
  49. es, hs := buildCluster(3, false)
  50. waitCluster(t, es)
  51. u := hs[1].URL
  52. ru := fmt.Sprintf("%s%s", hs[0].URL, "/v2/keys/foo")
  53. tc := NewTestClient()
  54. v := url.Values{}
  55. v.Set("value", "XXX")
  56. resp, _ := tc.PutForm(fmt.Sprintf("%s%s", u, "/v2/keys/foo"), v)
  57. if resp.StatusCode != http.StatusTemporaryRedirect {
  58. t.Errorf("status = %d, want %d", resp.StatusCode, http.StatusTemporaryRedirect)
  59. }
  60. location, err := resp.Location()
  61. if err != nil {
  62. t.Errorf("want err = %, want nil", err)
  63. }
  64. if location.String() != ru {
  65. t.Errorf("location = %v, want %v", location.String(), ru)
  66. }
  67. resp.Body.Close()
  68. destoryCluster(t, es, hs)
  69. afterTest(t)
  70. }
  71. func TestAdd(t *testing.T) {
  72. tests := []int{3, 4, 5, 6}
  73. for _, tt := range tests {
  74. es := make([]*Server, tt)
  75. hs := make([]*httptest.Server, tt)
  76. for i := 0; i < tt; i++ {
  77. c := config.New()
  78. if i > 0 {
  79. c.Peers = []string{hs[0].URL}
  80. }
  81. es[i], hs[i] = initTestServer(c, int64(i), false)
  82. }
  83. go es[0].Run()
  84. waitMode(participantMode, es[0])
  85. for i := 1; i < tt; i++ {
  86. id := int64(i)
  87. for {
  88. lead := es[0].p.node.Leader()
  89. if lead == -1 {
  90. time.Sleep(defaultElection * es[0].tickDuration)
  91. continue
  92. }
  93. err := es[lead].p.add(id, es[id].raftPubAddr, es[id].pubAddr)
  94. if err == nil {
  95. break
  96. }
  97. switch err {
  98. case tmpErr:
  99. time.Sleep(defaultElection * es[0].tickDuration)
  100. case raftStopErr, stopErr:
  101. t.Fatalf("#%d on %d: unexpected stop", i, lead)
  102. default:
  103. t.Fatal(err)
  104. }
  105. }
  106. go es[i].Run()
  107. waitMode(participantMode, es[i])
  108. for j := 0; j <= i; j++ {
  109. p := fmt.Sprintf("%s/%d", v2machineKVPrefix, id)
  110. w, err := es[j].p.Watch(p, false, false, 1)
  111. if err != nil {
  112. t.Errorf("#%d on %d: %v", i, j, err)
  113. break
  114. }
  115. <-w.EventChan
  116. }
  117. }
  118. destoryCluster(t, es, hs)
  119. }
  120. afterTest(t)
  121. }
  122. func TestRemove(t *testing.T) {
  123. tests := []int{3, 4, 5, 6}
  124. for k, tt := range tests {
  125. es, hs := buildCluster(tt, false)
  126. waitCluster(t, es)
  127. lead, _ := waitLeader(es)
  128. config := config.NewClusterConfig()
  129. config.ActiveSize = 0
  130. if err := es[lead].p.setClusterConfig(config); err != nil {
  131. t.Fatalf("#%d: setClusterConfig err = %v", k, err)
  132. }
  133. // we don't remove the machine from 2-node cluster because it is
  134. // not 100 percent safe in our raft.
  135. // TODO(yichengq): improve it later.
  136. for i := 0; i < tt-2; i++ {
  137. id := int64(i)
  138. send := id
  139. for {
  140. send++
  141. if send > int64(tt-1) {
  142. send = id
  143. }
  144. lead := es[send].p.node.Leader()
  145. if lead == -1 {
  146. time.Sleep(defaultElection * 5 * time.Millisecond)
  147. continue
  148. }
  149. err := es[lead].p.remove(id)
  150. if err == nil {
  151. break
  152. }
  153. switch err {
  154. case tmpErr:
  155. time.Sleep(defaultElection * 5 * time.Millisecond)
  156. case raftStopErr, stopErr:
  157. if lead == id {
  158. break
  159. }
  160. default:
  161. t.Fatal(err)
  162. }
  163. }
  164. waitMode(standbyMode, es[i])
  165. }
  166. destoryCluster(t, es, hs)
  167. }
  168. afterTest(t)
  169. // ensure that no goroutines are running
  170. TestGoroutinesRunning(t)
  171. }
  172. func TestBecomeStandby(t *testing.T) {
  173. size := 5
  174. round := 1
  175. for j := 0; j < round; j++ {
  176. es, hs := buildCluster(size, false)
  177. waitCluster(t, es)
  178. lead, _ := waitActiveLeader(es)
  179. i := rand.Intn(size)
  180. // cluster only demotes follower
  181. if int64(i) == lead {
  182. i = (i + 1) % size
  183. }
  184. id := int64(i)
  185. config := config.NewClusterConfig()
  186. config.SyncInterval = 1000
  187. config.ActiveSize = size - 1
  188. if err := es[lead].p.setClusterConfig(config); err != nil {
  189. t.Fatalf("#%d: setClusterConfig err = %v", i, err)
  190. }
  191. for {
  192. err := es[lead].p.remove(id)
  193. if err == nil {
  194. break
  195. }
  196. switch err {
  197. case tmpErr:
  198. time.Sleep(defaultElection * 5 * time.Millisecond)
  199. default:
  200. t.Fatalf("#%d: remove err = %v", i, err)
  201. }
  202. }
  203. waitMode(standbyMode, es[i])
  204. var leader int64
  205. for k := 0; k < 3; k++ {
  206. leader, _ = es[i].s.leaderInfo()
  207. if leader != noneId {
  208. break
  209. }
  210. time.Sleep(50 * time.Millisecond)
  211. }
  212. if g := leader; g != lead {
  213. t.Errorf("#%d: lead = %d, want %d", i, g, lead)
  214. }
  215. destoryCluster(t, es, hs)
  216. }
  217. afterTest(t)
  218. }
  219. func TestReleaseVersion(t *testing.T) {
  220. es, hs := buildCluster(1, false)
  221. resp, err := http.Get(hs[0].URL + "/version")
  222. if err != nil {
  223. t.Fatal(err)
  224. }
  225. defer resp.Body.Close()
  226. g, err := ioutil.ReadAll(resp.Body)
  227. if err != nil {
  228. t.Error(err)
  229. }
  230. gs := string(g)
  231. w := fmt.Sprintf("etcd %s", releaseVersion)
  232. if gs != w {
  233. t.Errorf("version = %v, want %v", gs, w)
  234. }
  235. for i := range hs {
  236. es[len(hs)-i-1].Stop()
  237. }
  238. for i := range hs {
  239. hs[len(hs)-i-1].Close()
  240. }
  241. }
  242. func TestVersionCheck(t *testing.T) {
  243. es, hs := buildCluster(1, false)
  244. u := hs[0].URL
  245. currentVersion := 2
  246. tests := []struct {
  247. version int
  248. wStatus int
  249. }{
  250. {currentVersion - 1, http.StatusForbidden},
  251. {currentVersion, http.StatusOK},
  252. {currentVersion + 1, http.StatusForbidden},
  253. }
  254. for i, tt := range tests {
  255. resp, err := http.Get(fmt.Sprintf("%s/raft/version/%d/check", u, tt.version))
  256. if err != nil {
  257. t.Fatal(err)
  258. }
  259. resp.Body.Close()
  260. if resp.StatusCode != tt.wStatus {
  261. t.Fatal("#%d: status = %d, want %d", i, resp.StatusCode, tt.wStatus)
  262. }
  263. }
  264. for i := range hs {
  265. es[len(hs)-i-1].Stop()
  266. }
  267. for i := range hs {
  268. hs[len(hs)-i-1].Close()
  269. }
  270. }
  271. func TestSingleNodeRecovery(t *testing.T) {
  272. id := genId()
  273. dataDir, err := ioutil.TempDir(os.TempDir(), "etcd")
  274. if err != nil {
  275. panic(err)
  276. }
  277. c := config.New()
  278. c.DataDir = dataDir
  279. e, h, _ := buildServer(t, c, id)
  280. key := "/foo"
  281. ev, err := e.p.Set(key, false, "bar", time.Now().Add(time.Second*100))
  282. if err != nil {
  283. t.Fatal(err)
  284. }
  285. w, err := e.p.Watch(key, false, false, ev.Index())
  286. if err != nil {
  287. t.Fatal(err)
  288. }
  289. select {
  290. case v := <-w.EventChan:
  291. if v.Node.TTL < 95 {
  292. t.Errorf("ttl = %d, want >= 95", v.Node.TTL)
  293. }
  294. case <-time.After(8 * defaultHeartbeat * e.tickDuration):
  295. t.Fatal("watch timeout")
  296. }
  297. e.Stop()
  298. h.Close()
  299. time.Sleep(2 * time.Second)
  300. c = config.New()
  301. c.DataDir = dataDir
  302. e, h, _ = buildServer(t, c, id)
  303. waitLeader([]*Server{e})
  304. w, err = e.p.Watch(key, false, false, ev.Index())
  305. if err != nil {
  306. t.Fatal(err)
  307. }
  308. select {
  309. case v := <-w.EventChan:
  310. if v.Node.TTL > 99 {
  311. t.Errorf("ttl = %d, want <= 99", v.Node.TTL)
  312. }
  313. case <-time.After(8 * defaultHeartbeat * e.tickDuration):
  314. t.Fatal("2nd watch timeout")
  315. }
  316. destroyServer(t, e, h)
  317. }
  318. func TestTakingSnapshot(t *testing.T) {
  319. es, hs := buildCluster(1, false)
  320. for i := 0; i < defaultCompact; i++ {
  321. es[0].p.Set("/foo", false, "bar", store.Permanent)
  322. }
  323. snap := es[0].p.node.GetSnap()
  324. if snap.Index != defaultCompact {
  325. t.Errorf("snap.Index = %d, want %d", snap.Index, defaultCompact)
  326. }
  327. for i := range hs {
  328. es[len(hs)-i-1].Stop()
  329. }
  330. for i := range hs {
  331. hs[len(hs)-i-1].Close()
  332. }
  333. }
  334. func TestRestoreSnapshotFromLeader(t *testing.T) {
  335. es, hs := buildCluster(1, false)
  336. // let leader do snapshot
  337. for i := 0; i < defaultCompact; i++ {
  338. es[0].p.Set(fmt.Sprint("/foo", i), false, fmt.Sprint("bar", i), store.Permanent)
  339. }
  340. // create one to join the cluster
  341. c := config.New()
  342. c.Peers = []string{hs[0].URL}
  343. e, h := initTestServer(c, 1, false)
  344. go e.Run()
  345. waitMode(participantMode, e)
  346. // check new proposal could be submitted
  347. if _, err := es[0].p.Set("/foo", false, "bar", store.Permanent); err != nil {
  348. t.Fatal(err)
  349. }
  350. // check store is recovered
  351. for i := 0; i < defaultCompact; i++ {
  352. ev, err := e.p.Store.Get(fmt.Sprint("/foo", i), false, false)
  353. if err != nil {
  354. t.Errorf("get err = %v", err)
  355. continue
  356. }
  357. w := fmt.Sprint("bar", i)
  358. if g := *ev.Node.Value; g != w {
  359. t.Errorf("value = %v, want %v", g, w)
  360. }
  361. }
  362. // check new proposal could be committed in the new machine
  363. wch, err := e.p.Watch("/foo", false, false, defaultCompact)
  364. if err != nil {
  365. t.Errorf("watch err = %v", err)
  366. }
  367. <-wch.EventChan
  368. g := e.p.node.Nodes()
  369. w := es[0].p.node.Nodes()
  370. if !reflect.DeepEqual(g, w) {
  371. t.Errorf("nodes = %v, want %v", g, w)
  372. }
  373. e.Stop()
  374. es[0].Stop()
  375. h.Close()
  376. hs[0].Close()
  377. }
  378. func buildCluster(number int, tls bool) ([]*Server, []*httptest.Server) {
  379. bootstrapper := 0
  380. es := make([]*Server, number)
  381. hs := make([]*httptest.Server, number)
  382. var seed string
  383. for i := range es {
  384. c := config.New()
  385. if seed != "" {
  386. c.Peers = []string{seed}
  387. }
  388. es[i], hs[i] = initTestServer(c, int64(i), tls)
  389. if i == bootstrapper {
  390. seed = hs[i].URL
  391. } else {
  392. // wait for the previous configuration change to be committed
  393. // or this configuration request might be dropped
  394. w, err := es[0].p.Watch(v2machineKVPrefix, true, false, uint64(i))
  395. if err != nil {
  396. panic(err)
  397. }
  398. <-w.EventChan
  399. }
  400. go es[i].Run()
  401. waitMode(participantMode, es[i])
  402. }
  403. return es, hs
  404. }
  405. func initTestServer(c *config.Config, id int64, tls bool) (e *Server, h *httptest.Server) {
  406. if c.DataDir == "" {
  407. n, err := ioutil.TempDir(os.TempDir(), "etcd")
  408. if err != nil {
  409. panic(err)
  410. }
  411. c.DataDir = n
  412. }
  413. addr := c.Addr
  414. srv, err := New(c)
  415. if err != nil {
  416. panic(err)
  417. }
  418. e = srv
  419. e.setId(id)
  420. e.SetTick(time.Millisecond * 5)
  421. m := http.NewServeMux()
  422. m.Handle("/", e)
  423. m.Handle("/raft", e.RaftHandler())
  424. m.Handle("/raft/", e.RaftHandler())
  425. m.Handle("/v2/admin/", e.RaftHandler())
  426. if addr == "127.0.0.1:4001" {
  427. if tls {
  428. h = httptest.NewTLSServer(m)
  429. } else {
  430. h = httptest.NewServer(m)
  431. }
  432. } else {
  433. var l net.Listener
  434. var err error
  435. for {
  436. l, err = net.Listen("tcp", addr)
  437. if err == nil {
  438. break
  439. }
  440. if !strings.Contains(err.Error(), "address already in use") {
  441. panic(err)
  442. }
  443. time.Sleep(500 * time.Millisecond)
  444. }
  445. h = &httptest.Server{
  446. Listener: l,
  447. Config: &http.Server{Handler: m},
  448. }
  449. if tls {
  450. h.StartTLS()
  451. } else {
  452. h.Start()
  453. }
  454. }
  455. e.raftPubAddr = h.URL
  456. e.pubAddr = h.URL
  457. return
  458. }
  459. func destoryCluster(t *testing.T, es []*Server, hs []*httptest.Server) {
  460. for i := range es {
  461. e := es[len(es)-i-1]
  462. e.Stop()
  463. err := os.RemoveAll(e.config.DataDir)
  464. if err != nil {
  465. panic(err)
  466. t.Fatal(err)
  467. }
  468. }
  469. for i := range hs {
  470. hs[len(hs)-i-1].Close()
  471. }
  472. }
  473. func destroyServer(t *testing.T, e *Server, h *httptest.Server) {
  474. e.Stop()
  475. h.Close()
  476. err := os.RemoveAll(e.config.DataDir)
  477. if err != nil {
  478. panic(err)
  479. t.Fatal(err)
  480. }
  481. }
  482. func waitCluster(t *testing.T, es []*Server) {
  483. n := len(es)
  484. for _, e := range es {
  485. for k := 0; k < n; k++ {
  486. w, err := e.p.Watch(v2machineKVPrefix+fmt.Sprintf("/%d", es[k].id), true, false, 1)
  487. if err != nil {
  488. panic(err)
  489. }
  490. <-w.EventChan
  491. }
  492. }
  493. clusterId := es[0].p.node.ClusterId()
  494. for i, e := range es {
  495. if e.p.node.ClusterId() != clusterId {
  496. t.Errorf("#%d: clusterId = %x, want %x", i, e.p.node.ClusterId(), clusterId)
  497. }
  498. }
  499. }
  500. func waitMode(mode int64, e *Server) {
  501. for {
  502. if e.mode.Get() == mode {
  503. return
  504. }
  505. time.Sleep(10 * time.Millisecond)
  506. }
  507. }
  508. // checkParticipant checks the i-th server works well as participant.
  509. func checkParticipant(i int, es []*Server) error {
  510. lead, _ := waitActiveLeader(es)
  511. key := fmt.Sprintf("/%d", rand.Int31())
  512. ev, err := es[lead].p.Set(key, false, "bar", store.Permanent)
  513. if err != nil {
  514. return err
  515. }
  516. w, err := es[i].p.Watch(key, false, false, ev.Index())
  517. if err != nil {
  518. return err
  519. }
  520. select {
  521. case <-w.EventChan:
  522. case <-time.After(8 * defaultHeartbeat * es[i].tickDuration):
  523. return fmt.Errorf("watch timeout")
  524. }
  525. return nil
  526. }