etcd_test.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619
  1. /*
  2. Copyright 2013 CoreOS Inc.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package main
  14. import (
  15. "fmt"
  16. "math/rand"
  17. "net/http"
  18. "net/http/httptest"
  19. "net/url"
  20. "os"
  21. "strconv"
  22. "strings"
  23. "testing"
  24. "time"
  25. "github.com/coreos/etcd/test"
  26. "github.com/coreos/go-etcd/etcd"
  27. )
  28. // Create a single node and try to set value
  29. func TestSingleNode(t *testing.T) {
  30. procAttr := new(os.ProcAttr)
  31. procAttr.Files = []*os.File{nil, os.Stdout, os.Stderr}
  32. args := []string{"etcd", "-n=node1", "-f", "-d=/tmp/node1"}
  33. process, err := os.StartProcess("etcd", args, procAttr)
  34. if err != nil {
  35. t.Fatal("start process failed:" + err.Error())
  36. return
  37. }
  38. defer process.Kill()
  39. time.Sleep(time.Second)
  40. c := etcd.NewClient()
  41. c.SyncCluster()
  42. // Test Set
  43. result, err := c.Set("foo", "bar", 100)
  44. if err != nil || result.Key != "/foo" || result.Value != "bar" || result.TTL < 95 {
  45. if err != nil {
  46. t.Fatal(err)
  47. }
  48. t.Fatalf("Set 1 failed with %s %s %v", result.Key, result.Value, result.TTL)
  49. }
  50. time.Sleep(time.Second)
  51. result, err = c.Set("foo", "bar", 100)
  52. if err != nil || result.Key != "/foo" || result.Value != "bar" || result.PrevValue != "bar" || result.TTL != 99 {
  53. if err != nil {
  54. t.Fatal(err)
  55. }
  56. t.Fatalf("Set 2 failed with %s %s %v", result.Key, result.Value, result.TTL)
  57. }
  58. // Add a test-and-set test
  59. // First, we'll test we can change the value if we get it write
  60. result, match, err := c.TestAndSet("foo", "bar", "foobar", 100)
  61. if err != nil || result.Key != "/foo" || result.Value != "foobar" || result.PrevValue != "bar" || result.TTL != 99 || !match {
  62. if err != nil {
  63. t.Fatal(err)
  64. }
  65. t.Fatalf("Set 3 failed with %s %s %v", result.Key, result.Value, result.TTL)
  66. }
  67. // Next, we'll make sure we can't set it without the correct prior value
  68. _, _, err = c.TestAndSet("foo", "bar", "foofoo", 100)
  69. if err == nil {
  70. t.Fatalf("Set 4 expecting error when setting key with incorrect previous value")
  71. }
  72. // Finally, we'll make sure a blank previous value still counts as a test-and-set and still has to match
  73. _, _, err = c.TestAndSet("foo", "", "barbar", 100)
  74. if err == nil {
  75. t.Fatalf("Set 5 expecting error when setting key with blank (incorrect) previous value")
  76. }
  77. }
  78. // TestInternalVersionFail will ensure that etcd does not come up if the internal raft
  79. // versions do not match.
  80. func TestInternalVersionFail(t *testing.T) {
  81. checkedVersion := false
  82. testMux := http.NewServeMux()
  83. testMux.HandleFunc("/version", func(w http.ResponseWriter, r *http.Request) {
  84. fmt.Fprintln(w, "This is not a version number")
  85. checkedVersion = true
  86. })
  87. testMux.HandleFunc("/join", func(w http.ResponseWriter, r *http.Request) {
  88. t.Fatal("should not attempt to join!")
  89. })
  90. ts := httptest.NewServer(testMux)
  91. defer ts.Close()
  92. fakeURL, _ := url.Parse(ts.URL)
  93. procAttr := new(os.ProcAttr)
  94. procAttr.Files = []*os.File{nil, os.Stdout, os.Stderr}
  95. args := []string{"etcd", "-n=node1", "-f", "-d=/tmp/node1", "-vv", "-C=" + fakeURL.Host}
  96. process, err := os.StartProcess("etcd", args, procAttr)
  97. if err != nil {
  98. t.Fatal("start process failed:" + err.Error())
  99. return
  100. }
  101. defer process.Kill()
  102. time.Sleep(time.Second)
  103. _, err = http.Get("http://127.0.0.1:4001")
  104. if err == nil {
  105. t.Fatal("etcd node should not be up")
  106. return
  107. }
  108. if checkedVersion == false {
  109. t.Fatal("etcd did not check the version")
  110. return
  111. }
  112. }
  113. // This test creates a single node and then set a value to it.
  114. // Then this test kills the node and restart it and tries to get the value again.
  115. func TestSingleNodeRecovery(t *testing.T) {
  116. procAttr := new(os.ProcAttr)
  117. procAttr.Files = []*os.File{nil, os.Stdout, os.Stderr}
  118. args := []string{"etcd", "-n=node1", "-d=/tmp/node1"}
  119. process, err := os.StartProcess("etcd", append(args, "-f"), procAttr)
  120. if err != nil {
  121. t.Fatal("start process failed:" + err.Error())
  122. return
  123. }
  124. time.Sleep(time.Second)
  125. c := etcd.NewClient()
  126. c.SyncCluster()
  127. // Test Set
  128. result, err := c.Set("foo", "bar", 100)
  129. if err != nil || result.Key != "/foo" || result.Value != "bar" || result.TTL < 95 {
  130. if err != nil {
  131. t.Fatal(err)
  132. }
  133. t.Fatalf("Set 1 failed with %s %s %v", result.Key, result.Value, result.TTL)
  134. }
  135. time.Sleep(time.Second)
  136. process.Kill()
  137. process, err = os.StartProcess("etcd", args, procAttr)
  138. defer process.Kill()
  139. if err != nil {
  140. t.Fatal("start process failed:" + err.Error())
  141. return
  142. }
  143. time.Sleep(time.Second)
  144. results, err := c.Get("foo")
  145. if err != nil {
  146. t.Fatal("get fail: " + err.Error())
  147. return
  148. }
  149. result = results[0]
  150. if err != nil || result.Key != "/foo" || result.Value != "bar" || result.TTL > 99 {
  151. if err != nil {
  152. t.Fatal(err)
  153. }
  154. t.Fatalf("Recovery Get failed with %s %s %v", result.Key, result.Value, result.TTL)
  155. }
  156. }
  157. // Create a three nodes and try to set value
  158. func templateTestSimpleMultiNode(t *testing.T, tls bool) {
  159. procAttr := new(os.ProcAttr)
  160. procAttr.Files = []*os.File{nil, os.Stdout, os.Stderr}
  161. clusterSize := 3
  162. _, etcds, err := test.CreateCluster(clusterSize, procAttr, tls)
  163. if err != nil {
  164. t.Fatal("cannot create cluster")
  165. }
  166. defer test.DestroyCluster(etcds)
  167. time.Sleep(time.Second)
  168. c := etcd.NewClient()
  169. c.SyncCluster()
  170. // Test Set
  171. result, err := c.Set("foo", "bar", 100)
  172. if err != nil || result.Key != "/foo" || result.Value != "bar" || result.TTL < 95 {
  173. if err != nil {
  174. t.Fatal(err)
  175. }
  176. t.Fatalf("Set 1 failed with %s %s %v", result.Key, result.Value, result.TTL)
  177. }
  178. time.Sleep(time.Second)
  179. result, err = c.Set("foo", "bar", 100)
  180. if err != nil || result.Key != "/foo" || result.Value != "bar" || result.PrevValue != "bar" || result.TTL != 99 {
  181. if err != nil {
  182. t.Fatal(err)
  183. }
  184. t.Fatalf("Set 2 failed with %s %s %v", result.Key, result.Value, result.TTL)
  185. }
  186. }
  187. func TestSimpleMultiNode(t *testing.T) {
  188. templateTestSimpleMultiNode(t, false)
  189. }
  190. func TestSimpleMultiNodeTls(t *testing.T) {
  191. templateTestSimpleMultiNode(t, true)
  192. }
  193. // Create a five nodes
  194. // Kill all the nodes and restart
  195. func TestMultiNodeKillAllAndRecovery(t *testing.T) {
  196. procAttr := new(os.ProcAttr)
  197. procAttr.Files = []*os.File{nil, os.Stdout, os.Stderr}
  198. clusterSize := 5
  199. argGroup, etcds, err := test.CreateCluster(clusterSize, procAttr, false)
  200. if err != nil {
  201. t.Fatal("cannot create cluster")
  202. }
  203. c := etcd.NewClient()
  204. c.SyncCluster()
  205. time.Sleep(time.Second)
  206. // send 10 commands
  207. for i := 0; i < 10; i++ {
  208. // Test Set
  209. _, err := c.Set("foo", "bar", 0)
  210. if err != nil {
  211. panic(err)
  212. }
  213. }
  214. time.Sleep(time.Second)
  215. // kill all
  216. test.DestroyCluster(etcds)
  217. time.Sleep(time.Second)
  218. stop := make(chan bool)
  219. leaderChan := make(chan string, 1)
  220. all := make(chan bool, 1)
  221. time.Sleep(time.Second)
  222. for i := 0; i < clusterSize; i++ {
  223. etcds[i], err = os.StartProcess("etcd", argGroup[i], procAttr)
  224. }
  225. go test.Monitor(clusterSize, 1, leaderChan, all, stop)
  226. <-all
  227. <-leaderChan
  228. result, err := c.Set("foo", "bar", 0)
  229. if err != nil {
  230. panic(err)
  231. }
  232. if result.Index != 18 {
  233. t.Fatalf("recovery failed! [%d/18]", result.Index)
  234. }
  235. // kill all
  236. test.DestroyCluster(etcds)
  237. }
  238. // Create a five nodes
  239. // Randomly kill one of the node and keep on sending set command to the cluster
  240. func TestMultiNodeKillOne(t *testing.T) {
  241. procAttr := new(os.ProcAttr)
  242. procAttr.Files = []*os.File{nil, os.Stdout, os.Stderr}
  243. clusterSize := 5
  244. argGroup, etcds, err := test.CreateCluster(clusterSize, procAttr, false)
  245. if err != nil {
  246. t.Fatal("cannot create cluster")
  247. }
  248. defer test.DestroyCluster(etcds)
  249. time.Sleep(2 * time.Second)
  250. c := etcd.NewClient()
  251. c.SyncCluster()
  252. stop := make(chan bool)
  253. // Test Set
  254. go test.Set(stop)
  255. for i := 0; i < 10; i++ {
  256. num := rand.Int() % clusterSize
  257. fmt.Println("kill node", num+1)
  258. // kill
  259. etcds[num].Kill()
  260. etcds[num].Release()
  261. time.Sleep(time.Second)
  262. // restart
  263. etcds[num], err = os.StartProcess("etcd", argGroup[num], procAttr)
  264. if err != nil {
  265. panic(err)
  266. }
  267. time.Sleep(time.Second)
  268. }
  269. fmt.Println("stop")
  270. stop <- true
  271. <-stop
  272. }
  273. // This test will kill the current leader and wait for the etcd cluster to elect a new leader for 200 times.
  274. // It will print out the election time and the average election time.
  275. func TestKillLeader(t *testing.T) {
  276. procAttr := new(os.ProcAttr)
  277. procAttr.Files = []*os.File{nil, os.Stdout, os.Stderr}
  278. clusterSize := 5
  279. argGroup, etcds, err := test.CreateCluster(clusterSize, procAttr, false)
  280. if err != nil {
  281. t.Fatal("cannot create cluster")
  282. }
  283. defer test.DestroyCluster(etcds)
  284. stop := make(chan bool)
  285. leaderChan := make(chan string, 1)
  286. all := make(chan bool, 1)
  287. time.Sleep(time.Second)
  288. go test.Monitor(clusterSize, 1, leaderChan, all, stop)
  289. var totalTime time.Duration
  290. leader := "http://127.0.0.1:7001"
  291. for i := 0; i < clusterSize; i++ {
  292. fmt.Println("leader is ", leader)
  293. port, _ := strconv.Atoi(strings.Split(leader, ":")[2])
  294. num := port - 7001
  295. fmt.Println("kill server ", num)
  296. etcds[num].Kill()
  297. etcds[num].Release()
  298. start := time.Now()
  299. for {
  300. newLeader := <-leaderChan
  301. if newLeader != leader {
  302. leader = newLeader
  303. break
  304. }
  305. }
  306. take := time.Now().Sub(start)
  307. totalTime += take
  308. avgTime := totalTime / (time.Duration)(i+1)
  309. fmt.Println("Leader election time is ", take, "with election timeout", ElectionTimeout)
  310. fmt.Println("Leader election time average is", avgTime, "with election timeout", ElectionTimeout)
  311. etcds[num], err = os.StartProcess("etcd", argGroup[num], procAttr)
  312. }
  313. stop <- true
  314. }
  315. // TestKillRandom kills random machines in the cluster and
  316. // restart them after all other machines agree on the same leader
  317. func TestKillRandom(t *testing.T) {
  318. procAttr := new(os.ProcAttr)
  319. procAttr.Files = []*os.File{nil, os.Stdout, os.Stderr}
  320. clusterSize := 9
  321. argGroup, etcds, err := test.CreateCluster(clusterSize, procAttr, false)
  322. if err != nil {
  323. t.Fatal("cannot create cluster")
  324. }
  325. defer test.DestroyCluster(etcds)
  326. stop := make(chan bool)
  327. leaderChan := make(chan string, 1)
  328. all := make(chan bool, 1)
  329. time.Sleep(3 * time.Second)
  330. go test.Monitor(clusterSize, 4, leaderChan, all, stop)
  331. toKill := make(map[int]bool)
  332. for i := 0; i < 20; i++ {
  333. fmt.Printf("TestKillRandom Round[%d/20]\n", i)
  334. j := 0
  335. for {
  336. r := rand.Int31n(9)
  337. if _, ok := toKill[int(r)]; !ok {
  338. j++
  339. toKill[int(r)] = true
  340. }
  341. if j > 3 {
  342. break
  343. }
  344. }
  345. for num, _ := range toKill {
  346. err := etcds[num].Kill()
  347. if err != nil {
  348. panic(err)
  349. }
  350. etcds[num].Wait()
  351. }
  352. time.Sleep(ElectionTimeout)
  353. <-leaderChan
  354. for num, _ := range toKill {
  355. etcds[num], err = os.StartProcess("etcd", argGroup[num], procAttr)
  356. }
  357. toKill = make(map[int]bool)
  358. <-all
  359. }
  360. stop <- true
  361. }
  362. // remove the node and node rejoin with previous log
  363. func TestRemoveNode(t *testing.T) {
  364. procAttr := new(os.ProcAttr)
  365. procAttr.Files = []*os.File{nil, os.Stdout, os.Stderr}
  366. clusterSize := 3
  367. argGroup, etcds, _ := test.CreateCluster(clusterSize, procAttr, false)
  368. time.Sleep(time.Second)
  369. c := etcd.NewClient()
  370. c.SyncCluster()
  371. rmReq, _ := http.NewRequest("DELETE", "http://127.0.0.1:7001/remove/node3", nil)
  372. client := &http.Client{}
  373. for i := 0; i < 2; i++ {
  374. for i := 0; i < 2; i++ {
  375. client.Do(rmReq)
  376. etcds[2].Wait()
  377. resp, err := c.Get("_etcd/machines")
  378. if err != nil {
  379. panic(err)
  380. }
  381. if len(resp) != 2 {
  382. t.Fatal("cannot remove machine")
  383. }
  384. if i == 1 {
  385. // rejoin with log
  386. etcds[2], err = os.StartProcess("etcd", argGroup[2], procAttr)
  387. } else {
  388. // rejoin without log
  389. etcds[2], err = os.StartProcess("etcd", append(argGroup[2], "-f"), procAttr)
  390. }
  391. if err != nil {
  392. panic(err)
  393. }
  394. time.Sleep(time.Second)
  395. resp, err = c.Get("_etcd/machines")
  396. if err != nil {
  397. panic(err)
  398. }
  399. if len(resp) != 3 {
  400. t.Fatal("add machine fails")
  401. }
  402. }
  403. // first kill the node, then remove it, then add it back
  404. for i := 0; i < 2; i++ {
  405. etcds[2].Kill()
  406. etcds[2].Wait()
  407. client.Do(rmReq)
  408. resp, err := c.Get("_etcd/machines")
  409. if err != nil {
  410. panic(err)
  411. }
  412. if len(resp) != 2 {
  413. t.Fatal("cannot remove machine")
  414. }
  415. if i == 1 {
  416. // rejoin with log
  417. etcds[2], err = os.StartProcess("etcd", append(argGroup[2]), procAttr)
  418. } else {
  419. // rejoin without log
  420. etcds[2], err = os.StartProcess("etcd", append(argGroup[2], "-f"), procAttr)
  421. }
  422. if err != nil {
  423. panic(err)
  424. }
  425. time.Sleep(time.Second)
  426. resp, err = c.Get("_etcd/machines")
  427. if err != nil {
  428. panic(err)
  429. }
  430. if len(resp) != 3 {
  431. t.Fatal("add machine fails")
  432. }
  433. }
  434. }
  435. test.DestroyCluster(etcds)
  436. }
  437. func templateBenchmarkEtcdDirectCall(b *testing.B, tls bool) {
  438. procAttr := new(os.ProcAttr)
  439. procAttr.Files = []*os.File{nil, os.Stdout, os.Stderr}
  440. clusterSize := 3
  441. _, etcds, _ := test.CreateCluster(clusterSize, procAttr, tls)
  442. defer test.DestroyCluster(etcds)
  443. time.Sleep(time.Second)
  444. b.ResetTimer()
  445. for i := 0; i < b.N; i++ {
  446. resp, _ := http.Get("http://127.0.0.1:4001/test/speed")
  447. resp.Body.Close()
  448. }
  449. }
  450. func BenchmarkEtcdDirectCall(b *testing.B) {
  451. templateBenchmarkEtcdDirectCall(b, false)
  452. }
  453. func BenchmarkEtcdDirectCallTls(b *testing.B) {
  454. templateBenchmarkEtcdDirectCall(b, true)
  455. }