etcd_test.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532
  1. /*
  2. Copyright 2014 CoreOS Inc.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package etcd
  14. import (
  15. "fmt"
  16. "io/ioutil"
  17. "math/rand"
  18. "net"
  19. "net/http"
  20. "net/http/httptest"
  21. "net/url"
  22. "os"
  23. "strings"
  24. "testing"
  25. "time"
  26. "github.com/coreos/etcd/config"
  27. "github.com/coreos/etcd/store"
  28. )
  29. func TestMultipleNodes(t *testing.T) {
  30. tests := []int{1, 3, 5, 9, 11}
  31. for _, tt := range tests {
  32. es, hs := buildCluster(tt, false)
  33. waitCluster(t, es)
  34. destoryCluster(t, es, hs)
  35. }
  36. afterTest(t)
  37. }
  38. func TestMultipleTLSNodes(t *testing.T) {
  39. tests := []int{1, 3, 5}
  40. for _, tt := range tests {
  41. es, hs := buildCluster(tt, true)
  42. waitCluster(t, es)
  43. destoryCluster(t, es, hs)
  44. }
  45. afterTest(t)
  46. }
  47. func TestV2Redirect(t *testing.T) {
  48. es, hs := buildCluster(3, false)
  49. waitCluster(t, es)
  50. u := hs[1].URL
  51. ru := fmt.Sprintf("%s%s", hs[0].URL, "/v2/keys/foo")
  52. tc := NewTestClient()
  53. v := url.Values{}
  54. v.Set("value", "XXX")
  55. resp, _ := tc.PutForm(fmt.Sprintf("%s%s", u, "/v2/keys/foo"), v)
  56. if resp.StatusCode != http.StatusTemporaryRedirect {
  57. t.Errorf("status = %d, want %d", resp.StatusCode, http.StatusTemporaryRedirect)
  58. }
  59. location, err := resp.Location()
  60. if err != nil {
  61. t.Errorf("want err = %, want nil", err)
  62. }
  63. if location.String() != ru {
  64. t.Errorf("location = %v, want %v", location.String(), ru)
  65. }
  66. resp.Body.Close()
  67. destoryCluster(t, es, hs)
  68. afterTest(t)
  69. }
  70. func TestAdd(t *testing.T) {
  71. tests := []int{3, 4, 5, 6}
  72. for _, tt := range tests {
  73. es := make([]*Server, tt)
  74. hs := make([]*httptest.Server, tt)
  75. for i := 0; i < tt; i++ {
  76. c := config.New()
  77. if i > 0 {
  78. c.Peers = []string{hs[0].URL}
  79. }
  80. es[i], hs[i] = initTestServer(c, int64(i), false)
  81. }
  82. go es[0].Run()
  83. waitMode(participantMode, es[0])
  84. for i := 1; i < tt; i++ {
  85. id := int64(i)
  86. for {
  87. lead := es[0].p.node.Leader()
  88. if lead == -1 {
  89. time.Sleep(defaultElection * es[0].tickDuration)
  90. continue
  91. }
  92. err := es[lead].p.add(id, es[id].raftPubAddr, es[id].pubAddr)
  93. if err == nil {
  94. break
  95. }
  96. switch err {
  97. case tmpErr:
  98. time.Sleep(defaultElection * es[0].tickDuration)
  99. case raftStopErr, stopErr:
  100. t.Fatalf("#%d on %d: unexpected stop", i, lead)
  101. default:
  102. t.Fatal(err)
  103. }
  104. }
  105. go es[i].Run()
  106. waitMode(participantMode, es[i])
  107. for j := 0; j <= i; j++ {
  108. p := fmt.Sprintf("%s/%d", v2machineKVPrefix, id)
  109. w, err := es[j].p.Watch(p, false, false, 1)
  110. if err != nil {
  111. t.Errorf("#%d on %d: %v", i, j, err)
  112. break
  113. }
  114. <-w.EventChan
  115. }
  116. }
  117. destoryCluster(t, es, hs)
  118. }
  119. afterTest(t)
  120. }
  121. func TestRemove(t *testing.T) {
  122. tests := []int{3, 4, 5, 6}
  123. for k, tt := range tests {
  124. es, hs := buildCluster(tt, false)
  125. waitCluster(t, es)
  126. lead, _ := waitLeader(es)
  127. config := config.NewClusterConfig()
  128. config.ActiveSize = 0
  129. if err := es[lead].p.setClusterConfig(config); err != nil {
  130. t.Fatalf("#%d: setClusterConfig err = %v", k, err)
  131. }
  132. // we don't remove the machine from 2-node cluster because it is
  133. // not 100 percent safe in our raft.
  134. // TODO(yichengq): improve it later.
  135. for i := 0; i < tt-2; i++ {
  136. id := int64(i)
  137. send := id
  138. for {
  139. send++
  140. if send > int64(tt-1) {
  141. send = id
  142. }
  143. lead := es[send].p.node.Leader()
  144. if lead == -1 {
  145. time.Sleep(defaultElection * 5 * time.Millisecond)
  146. continue
  147. }
  148. err := es[lead].p.remove(id)
  149. if err == nil {
  150. break
  151. }
  152. switch err {
  153. case tmpErr:
  154. time.Sleep(defaultElection * 5 * time.Millisecond)
  155. case raftStopErr, stopErr:
  156. if lead == id {
  157. break
  158. }
  159. default:
  160. t.Fatal(err)
  161. }
  162. }
  163. waitMode(standbyMode, es[i])
  164. }
  165. destoryCluster(t, es, hs)
  166. }
  167. afterTest(t)
  168. // ensure that no goroutines are running
  169. TestGoroutinesRunning(t)
  170. }
  171. func TestBecomeStandby(t *testing.T) {
  172. size := 5
  173. round := 1
  174. for j := 0; j < round; j++ {
  175. es, hs := buildCluster(size, false)
  176. waitCluster(t, es)
  177. lead, _ := waitActiveLeader(es)
  178. i := rand.Intn(size)
  179. // cluster only demotes follower
  180. if int64(i) == lead {
  181. i = (i + 1) % size
  182. }
  183. id := int64(i)
  184. config := config.NewClusterConfig()
  185. config.SyncInterval = 1000
  186. config.ActiveSize = size - 1
  187. if err := es[lead].p.setClusterConfig(config); err != nil {
  188. t.Fatalf("#%d: setClusterConfig err = %v", i, err)
  189. }
  190. for {
  191. err := es[lead].p.remove(id)
  192. if err == nil {
  193. break
  194. }
  195. switch err {
  196. case tmpErr:
  197. time.Sleep(defaultElection * 5 * time.Millisecond)
  198. default:
  199. t.Fatalf("#%d: remove err = %v", i, err)
  200. }
  201. }
  202. waitMode(standbyMode, es[i])
  203. var leader int64
  204. for k := 0; k < 3; k++ {
  205. leader, _ = es[i].s.leaderInfo()
  206. if leader != noneId {
  207. break
  208. }
  209. time.Sleep(50 * time.Millisecond)
  210. }
  211. if g := leader; g != lead {
  212. t.Errorf("#%d: lead = %d, want %d", i, g, lead)
  213. }
  214. destoryCluster(t, es, hs)
  215. }
  216. afterTest(t)
  217. }
  218. func TestReleaseVersion(t *testing.T) {
  219. es, hs := buildCluster(1, false)
  220. resp, err := http.Get(hs[0].URL + "/version")
  221. if err != nil {
  222. t.Fatal(err)
  223. }
  224. defer resp.Body.Close()
  225. g, err := ioutil.ReadAll(resp.Body)
  226. if err != nil {
  227. t.Error(err)
  228. }
  229. gs := string(g)
  230. w := fmt.Sprintf("etcd %s", releaseVersion)
  231. if gs != w {
  232. t.Errorf("version = %v, want %v", gs, w)
  233. }
  234. for i := range hs {
  235. es[len(hs)-i-1].Stop()
  236. }
  237. for i := range hs {
  238. hs[len(hs)-i-1].Close()
  239. }
  240. }
  241. func TestVersionCheck(t *testing.T) {
  242. es, hs := buildCluster(1, false)
  243. u := hs[0].URL
  244. currentVersion := 2
  245. tests := []struct {
  246. version int
  247. wStatus int
  248. }{
  249. {currentVersion - 1, http.StatusForbidden},
  250. {currentVersion, http.StatusOK},
  251. {currentVersion + 1, http.StatusForbidden},
  252. }
  253. for i, tt := range tests {
  254. resp, err := http.Get(fmt.Sprintf("%s/raft/version/%d/check", u, tt.version))
  255. if err != nil {
  256. t.Fatal(err)
  257. }
  258. resp.Body.Close()
  259. if resp.StatusCode != tt.wStatus {
  260. t.Fatal("#%d: status = %d, want %d", i, resp.StatusCode, tt.wStatus)
  261. }
  262. }
  263. for i := range hs {
  264. es[len(hs)-i-1].Stop()
  265. }
  266. for i := range hs {
  267. hs[len(hs)-i-1].Close()
  268. }
  269. }
  270. func TestSingleNodeRecovery(t *testing.T) {
  271. id := genId()
  272. dataDir, err := ioutil.TempDir(os.TempDir(), "etcd")
  273. if err != nil {
  274. panic(err)
  275. }
  276. c := config.New()
  277. c.DataDir = dataDir
  278. e, h, _ := buildServer(t, c, id)
  279. key := "/foo"
  280. ev, err := e.p.Set(key, false, "bar", time.Now().Add(time.Second*100))
  281. if err != nil {
  282. t.Fatal(err)
  283. }
  284. w, err := e.p.Watch(key, false, false, ev.Index())
  285. if err != nil {
  286. t.Fatal(err)
  287. }
  288. select {
  289. case v := <-w.EventChan:
  290. if v.Node.TTL < 95 {
  291. t.Errorf("ttl = %d, want >= 95", v.Node.TTL)
  292. }
  293. case <-time.After(8 * defaultHeartbeat * e.tickDuration):
  294. t.Fatal("watch timeout")
  295. }
  296. e.Stop()
  297. h.Close()
  298. time.Sleep(2 * time.Second)
  299. c = config.New()
  300. c.DataDir = dataDir
  301. e, h, _ = buildServer(t, c, id)
  302. waitLeader([]*Server{e})
  303. w, err = e.p.Watch(key, false, false, ev.Index())
  304. if err != nil {
  305. t.Fatal(err)
  306. }
  307. select {
  308. case v := <-w.EventChan:
  309. if v.Node.TTL > 99 {
  310. t.Errorf("ttl = %d, want <= 99", v.Node.TTL)
  311. }
  312. case <-time.After(8 * defaultHeartbeat * e.tickDuration):
  313. t.Fatal("2nd watch timeout")
  314. }
  315. destroyServer(t, e, h)
  316. }
  317. func buildCluster(number int, tls bool) ([]*Server, []*httptest.Server) {
  318. bootstrapper := 0
  319. es := make([]*Server, number)
  320. hs := make([]*httptest.Server, number)
  321. var seed string
  322. for i := range es {
  323. c := config.New()
  324. if seed != "" {
  325. c.Peers = []string{seed}
  326. }
  327. es[i], hs[i] = initTestServer(c, int64(i), tls)
  328. if i == bootstrapper {
  329. seed = hs[i].URL
  330. } else {
  331. // wait for the previous configuration change to be committed
  332. // or this configuration request might be dropped
  333. w, err := es[0].p.Watch(v2machineKVPrefix, true, false, uint64(i))
  334. if err != nil {
  335. panic(err)
  336. }
  337. <-w.EventChan
  338. }
  339. go es[i].Run()
  340. waitMode(participantMode, es[i])
  341. }
  342. return es, hs
  343. }
  344. func initTestServer(c *config.Config, id int64, tls bool) (e *Server, h *httptest.Server) {
  345. if c.DataDir == "" {
  346. n, err := ioutil.TempDir(os.TempDir(), "etcd")
  347. if err != nil {
  348. panic(err)
  349. }
  350. c.DataDir = n
  351. }
  352. addr := c.Addr
  353. srv, err := New(c)
  354. if err != nil {
  355. panic(err)
  356. }
  357. e = srv
  358. e.setId(id)
  359. e.SetTick(time.Millisecond * 5)
  360. m := http.NewServeMux()
  361. m.Handle("/", e)
  362. m.Handle("/raft", e.RaftHandler())
  363. m.Handle("/raft/", e.RaftHandler())
  364. if addr == "127.0.0.1:4001" {
  365. if tls {
  366. h = httptest.NewTLSServer(m)
  367. } else {
  368. h = httptest.NewServer(m)
  369. }
  370. } else {
  371. var l net.Listener
  372. var err error
  373. for {
  374. l, err = net.Listen("tcp", addr)
  375. if err == nil {
  376. break
  377. }
  378. if !strings.Contains(err.Error(), "address already in use") {
  379. panic(err)
  380. }
  381. time.Sleep(500 * time.Millisecond)
  382. }
  383. h = &httptest.Server{
  384. Listener: l,
  385. Config: &http.Server{Handler: m},
  386. }
  387. if tls {
  388. h.StartTLS()
  389. } else {
  390. h.Start()
  391. }
  392. }
  393. e.raftPubAddr = h.URL
  394. e.pubAddr = h.URL
  395. return
  396. }
  397. func destoryCluster(t *testing.T, es []*Server, hs []*httptest.Server) {
  398. for i := range es {
  399. e := es[len(es)-i-1]
  400. e.Stop()
  401. err := os.RemoveAll(e.config.DataDir)
  402. if err != nil {
  403. panic(err)
  404. t.Fatal(err)
  405. }
  406. }
  407. for i := range hs {
  408. hs[len(hs)-i-1].Close()
  409. }
  410. }
  411. func destroyServer(t *testing.T, e *Server, h *httptest.Server) {
  412. e.Stop()
  413. h.Close()
  414. err := os.RemoveAll(e.config.DataDir)
  415. if err != nil {
  416. panic(err)
  417. t.Fatal(err)
  418. }
  419. }
  420. func waitCluster(t *testing.T, es []*Server) {
  421. n := len(es)
  422. for _, e := range es {
  423. for k := 0; k < n; k++ {
  424. w, err := e.p.Watch(v2machineKVPrefix+fmt.Sprintf("/%d", es[k].id), true, false, 1)
  425. if err != nil {
  426. panic(err)
  427. }
  428. <-w.EventChan
  429. }
  430. }
  431. clusterId := es[0].p.node.ClusterId()
  432. for i, e := range es {
  433. if e.p.node.ClusterId() != clusterId {
  434. t.Errorf("#%d: clusterId = %x, want %x", i, e.p.node.ClusterId(), clusterId)
  435. }
  436. }
  437. }
  438. func waitMode(mode int64, e *Server) {
  439. for {
  440. if e.mode.Get() == mode {
  441. return
  442. }
  443. time.Sleep(10 * time.Millisecond)
  444. }
  445. }
  446. // checkParticipant checks the i-th server works well as participant.
  447. func checkParticipant(i int, es []*Server) error {
  448. lead, _ := waitActiveLeader(es)
  449. key := fmt.Sprintf("/%d", rand.Int31())
  450. ev, err := es[lead].p.Set(key, false, "bar", store.Permanent)
  451. if err != nil {
  452. return err
  453. }
  454. w, err := es[i].p.Watch(key, false, false, ev.Index())
  455. if err != nil {
  456. return err
  457. }
  458. select {
  459. case <-w.EventChan:
  460. case <-time.After(8 * defaultHeartbeat * es[i].tickDuration):
  461. return fmt.Errorf("watch timeout")
  462. }
  463. return nil
  464. }