etcd_test.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601
  1. package main
  2. import (
  3. "fmt"
  4. "math/rand"
  5. "net/http"
  6. "net/http/httptest"
  7. "net/url"
  8. "os"
  9. "strconv"
  10. "strings"
  11. "testing"
  12. "time"
  13. "github.com/coreos/etcd/server"
  14. "github.com/coreos/etcd/test"
  15. "github.com/coreos/go-etcd/etcd"
  16. )
  17. // Create a single node and try to set value
  18. func TestSingleNode(t *testing.T) {
  19. procAttr := new(os.ProcAttr)
  20. procAttr.Files = []*os.File{nil, os.Stdout, os.Stderr}
  21. args := []string{"etcd", "-n=node1", "-f", "-d=/tmp/node1"}
  22. process, err := os.StartProcess("etcd", args, procAttr)
  23. if err != nil {
  24. t.Fatal("start process failed:" + err.Error())
  25. return
  26. }
  27. defer process.Kill()
  28. time.Sleep(time.Second)
  29. c := etcd.NewClient()
  30. c.SyncCluster()
  31. // Test Set
  32. result, err := c.Set("foo", "bar", 100)
  33. if err != nil || result.Key != "/foo" || result.Value != "bar" || result.TTL < 95 {
  34. if err != nil {
  35. t.Fatal("Set 1: ", err)
  36. }
  37. t.Fatalf("Set 1 failed with %s %s %v", result.Key, result.Value, result.TTL)
  38. }
  39. time.Sleep(time.Second)
  40. result, err = c.Set("foo", "bar", 100)
  41. if err != nil || result.Key != "/foo" || result.Value != "bar" || result.PrevValue != "bar" || result.TTL != 100 {
  42. if err != nil {
  43. t.Fatal("Set 2: ", err)
  44. }
  45. t.Fatalf("Set 2 failed with %s %s %v", result.Key, result.Value, result.TTL)
  46. }
  47. // Add a test-and-set test
  48. // First, we'll test we can change the value if we get it write
  49. result, match, err := c.TestAndSet("foo", "bar", "foobar", 100)
  50. if err != nil || result.Key != "/foo" || result.Value != "foobar" || result.PrevValue != "bar" || result.TTL != 100 || !match {
  51. if err != nil {
  52. t.Fatal(err)
  53. }
  54. t.Fatalf("Set 3 failed with %s %s %v", result.Key, result.Value, result.TTL)
  55. }
  56. // Next, we'll make sure we can't set it without the correct prior value
  57. _, _, err = c.TestAndSet("foo", "bar", "foofoo", 100)
  58. if err == nil {
  59. t.Fatalf("Set 4 expecting error when setting key with incorrect previous value")
  60. }
  61. // Finally, we'll make sure a blank previous value still counts as a test-and-set and still has to match
  62. _, _, err = c.TestAndSet("foo", "", "barbar", 100)
  63. if err == nil {
  64. t.Fatalf("Set 5 expecting error when setting key with blank (incorrect) previous value")
  65. }
  66. }
  67. // TestInternalVersionFail will ensure that etcd does not come up if the internal raft
  68. // versions do not match.
  69. func TestInternalVersionFail(t *testing.T) {
  70. checkedVersion := false
  71. testMux := http.NewServeMux()
  72. testMux.HandleFunc("/version", func(w http.ResponseWriter, r *http.Request) {
  73. fmt.Fprintln(w, "This is not a version number")
  74. checkedVersion = true
  75. })
  76. testMux.HandleFunc("/join", func(w http.ResponseWriter, r *http.Request) {
  77. t.Fatal("should not attempt to join!")
  78. })
  79. ts := httptest.NewServer(testMux)
  80. defer ts.Close()
  81. fakeURL, _ := url.Parse(ts.URL)
  82. procAttr := new(os.ProcAttr)
  83. procAttr.Files = []*os.File{nil, os.Stdout, os.Stderr}
  84. args := []string{"etcd", "-n=node1", "-f", "-d=/tmp/node1", "-C=" + fakeURL.Host}
  85. process, err := os.StartProcess("etcd", args, procAttr)
  86. if err != nil {
  87. t.Fatal("start process failed:" + err.Error())
  88. return
  89. }
  90. defer process.Kill()
  91. time.Sleep(time.Second)
  92. _, err = http.Get("http://127.0.0.1:4001")
  93. if err == nil {
  94. t.Fatal("etcd node should not be up")
  95. return
  96. }
  97. if checkedVersion == false {
  98. t.Fatal("etcd did not check the version")
  99. return
  100. }
  101. }
  102. // This test creates a single node and then set a value to it.
  103. // Then this test kills the node and restart it and tries to get the value again.
  104. func TestSingleNodeRecovery(t *testing.T) {
  105. procAttr := new(os.ProcAttr)
  106. procAttr.Files = []*os.File{nil, os.Stdout, os.Stderr}
  107. args := []string{"etcd", "-n=node1", "-d=/tmp/node1"}
  108. process, err := os.StartProcess("etcd", append(args, "-f"), procAttr)
  109. if err != nil {
  110. t.Fatal("start process failed:" + err.Error())
  111. return
  112. }
  113. time.Sleep(time.Second)
  114. c := etcd.NewClient()
  115. c.SyncCluster()
  116. // Test Set
  117. result, err := c.Set("foo", "bar", 100)
  118. if err != nil || result.Key != "/foo" || result.Value != "bar" || result.TTL < 95 {
  119. if err != nil {
  120. t.Fatal(err)
  121. }
  122. t.Fatalf("Set 1 failed with %s %s %v", result.Key, result.Value, result.TTL)
  123. }
  124. time.Sleep(time.Second)
  125. process.Kill()
  126. process, err = os.StartProcess("etcd", args, procAttr)
  127. defer process.Kill()
  128. if err != nil {
  129. t.Fatal("start process failed:" + err.Error())
  130. return
  131. }
  132. time.Sleep(time.Second)
  133. results, err := c.Get("foo")
  134. if err != nil {
  135. t.Fatal("get fail: " + err.Error())
  136. return
  137. }
  138. result = results[0]
  139. if err != nil || result.Key != "/foo" || result.Value != "bar" || result.TTL > 99 {
  140. if err != nil {
  141. t.Fatal(err)
  142. }
  143. t.Fatalf("Recovery Get failed with %s %s %v", result.Key, result.Value, result.TTL)
  144. }
  145. }
  146. // Create a three nodes and try to set value
  147. func templateTestSimpleMultiNode(t *testing.T, tls bool) {
  148. procAttr := new(os.ProcAttr)
  149. procAttr.Files = []*os.File{nil, os.Stdout, os.Stderr}
  150. clusterSize := 3
  151. _, etcds, err := test.CreateCluster(clusterSize, procAttr, tls)
  152. if err != nil {
  153. t.Fatal("cannot create cluster")
  154. }
  155. defer test.DestroyCluster(etcds)
  156. time.Sleep(time.Second)
  157. c := etcd.NewClient()
  158. c.SyncCluster()
  159. // Test Set
  160. result, err := c.Set("foo", "bar", 100)
  161. if err != nil || result.Key != "/foo" || result.Value != "bar" || result.TTL < 95 {
  162. if err != nil {
  163. t.Fatal(err)
  164. }
  165. t.Fatalf("Set 1 failed with %s %s %v", result.Key, result.Value, result.TTL)
  166. }
  167. time.Sleep(time.Second)
  168. result, err = c.Set("foo", "bar", 100)
  169. if err != nil || result.Key != "/foo" || result.Value != "bar" || result.PrevValue != "bar" || result.TTL != 100 {
  170. if err != nil {
  171. t.Fatal(err)
  172. }
  173. t.Fatalf("Set 2 failed with %s %s %v", result.Key, result.Value, result.TTL)
  174. }
  175. }
  176. func TestSimpleMultiNode(t *testing.T) {
  177. templateTestSimpleMultiNode(t, false)
  178. }
  179. func TestSimpleMultiNodeTls(t *testing.T) {
  180. templateTestSimpleMultiNode(t, true)
  181. }
  182. // Create a five nodes
  183. // Kill all the nodes and restart
  184. func TestMultiNodeKillAllAndRecovery(t *testing.T) {
  185. procAttr := new(os.ProcAttr)
  186. procAttr.Files = []*os.File{nil, os.Stdout, os.Stderr}
  187. clusterSize := 5
  188. argGroup, etcds, err := test.CreateCluster(clusterSize, procAttr, false)
  189. defer test.DestroyCluster(etcds)
  190. if err != nil {
  191. t.Fatal("cannot create cluster")
  192. }
  193. c := etcd.NewClient()
  194. c.SyncCluster()
  195. time.Sleep(time.Second)
  196. // send 10 commands
  197. for i := 0; i < 10; i++ {
  198. // Test Set
  199. _, err := c.Set("foo", "bar", 0)
  200. if err != nil {
  201. panic(err)
  202. }
  203. }
  204. time.Sleep(time.Second)
  205. // kill all
  206. test.DestroyCluster(etcds)
  207. time.Sleep(time.Second)
  208. stop := make(chan bool)
  209. leaderChan := make(chan string, 1)
  210. all := make(chan bool, 1)
  211. time.Sleep(time.Second)
  212. for i := 0; i < clusterSize; i++ {
  213. etcds[i], err = os.StartProcess("etcd", argGroup[i], procAttr)
  214. }
  215. go test.Monitor(clusterSize, 1, leaderChan, all, stop)
  216. <-all
  217. <-leaderChan
  218. result, err := c.Set("foo", "bar", 0)
  219. if err != nil {
  220. t.Fatalf("Recovery error: %s", err)
  221. }
  222. if result.Index != 18 {
  223. t.Fatalf("recovery failed! [%d/18]", result.Index)
  224. }
  225. }
  226. // Create a five nodes
  227. // Randomly kill one of the node and keep on sending set command to the cluster
  228. func TestMultiNodeKillOne(t *testing.T) {
  229. procAttr := new(os.ProcAttr)
  230. procAttr.Files = []*os.File{nil, os.Stdout, os.Stderr}
  231. clusterSize := 5
  232. argGroup, etcds, err := test.CreateCluster(clusterSize, procAttr, false)
  233. if err != nil {
  234. t.Fatal("cannot create cluster")
  235. }
  236. defer test.DestroyCluster(etcds)
  237. time.Sleep(2 * time.Second)
  238. c := etcd.NewClient()
  239. c.SyncCluster()
  240. stop := make(chan bool)
  241. // Test Set
  242. go test.Set(stop)
  243. for i := 0; i < 10; i++ {
  244. num := rand.Int() % clusterSize
  245. fmt.Println("kill node", num+1)
  246. // kill
  247. etcds[num].Kill()
  248. etcds[num].Release()
  249. time.Sleep(time.Second)
  250. // restart
  251. etcds[num], err = os.StartProcess("etcd", argGroup[num], procAttr)
  252. if err != nil {
  253. panic(err)
  254. }
  255. time.Sleep(time.Second)
  256. }
  257. fmt.Println("stop")
  258. stop <- true
  259. <-stop
  260. }
  261. // This test will kill the current leader and wait for the etcd cluster to elect a new leader for 200 times.
  262. // It will print out the election time and the average election time.
  263. func TestKillLeader(t *testing.T) {
  264. procAttr := new(os.ProcAttr)
  265. procAttr.Files = []*os.File{nil, os.Stdout, os.Stderr}
  266. clusterSize := 5
  267. argGroup, etcds, err := test.CreateCluster(clusterSize, procAttr, false)
  268. if err != nil {
  269. t.Fatal("cannot create cluster")
  270. }
  271. defer test.DestroyCluster(etcds)
  272. stop := make(chan bool)
  273. leaderChan := make(chan string, 1)
  274. all := make(chan bool, 1)
  275. time.Sleep(time.Second)
  276. go test.Monitor(clusterSize, 1, leaderChan, all, stop)
  277. var totalTime time.Duration
  278. leader := "http://127.0.0.1:7001"
  279. for i := 0; i < clusterSize; i++ {
  280. fmt.Println("leader is ", leader)
  281. port, _ := strconv.Atoi(strings.Split(leader, ":")[2])
  282. num := port - 7001
  283. fmt.Println("kill server ", num)
  284. etcds[num].Kill()
  285. etcds[num].Release()
  286. start := time.Now()
  287. for {
  288. newLeader := <-leaderChan
  289. if newLeader != leader {
  290. leader = newLeader
  291. break
  292. }
  293. }
  294. take := time.Now().Sub(start)
  295. totalTime += take
  296. avgTime := totalTime / (time.Duration)(i+1)
  297. fmt.Println("Leader election time is ", take, "with election timeout", server.ElectionTimeout)
  298. fmt.Println("Leader election time average is", avgTime, "with election timeout", server.ElectionTimeout)
  299. etcds[num], err = os.StartProcess("etcd", argGroup[num], procAttr)
  300. }
  301. stop <- true
  302. }
  303. // TestKillRandom kills random machines in the cluster and
  304. // restart them after all other machines agree on the same leader
  305. func TestKillRandom(t *testing.T) {
  306. procAttr := new(os.ProcAttr)
  307. procAttr.Files = []*os.File{nil, os.Stdout, os.Stderr}
  308. clusterSize := 9
  309. argGroup, etcds, err := test.CreateCluster(clusterSize, procAttr, false)
  310. if err != nil {
  311. t.Fatal("cannot create cluster")
  312. }
  313. defer test.DestroyCluster(etcds)
  314. stop := make(chan bool)
  315. leaderChan := make(chan string, 1)
  316. all := make(chan bool, 1)
  317. time.Sleep(3 * time.Second)
  318. go test.Monitor(clusterSize, 4, leaderChan, all, stop)
  319. toKill := make(map[int]bool)
  320. for i := 0; i < 20; i++ {
  321. fmt.Printf("TestKillRandom Round[%d/20]\n", i)
  322. j := 0
  323. for {
  324. r := rand.Int31n(9)
  325. if _, ok := toKill[int(r)]; !ok {
  326. j++
  327. toKill[int(r)] = true
  328. }
  329. if j > 3 {
  330. break
  331. }
  332. }
  333. for num, _ := range toKill {
  334. err := etcds[num].Kill()
  335. if err != nil {
  336. panic(err)
  337. }
  338. etcds[num].Wait()
  339. }
  340. time.Sleep(server.ElectionTimeout)
  341. <-leaderChan
  342. for num, _ := range toKill {
  343. etcds[num], err = os.StartProcess("etcd", argGroup[num], procAttr)
  344. }
  345. toKill = make(map[int]bool)
  346. <-all
  347. }
  348. stop <- true
  349. }
  350. // remove the node and node rejoin with previous log
  351. func TestRemoveNode(t *testing.T) {
  352. procAttr := new(os.ProcAttr)
  353. procAttr.Files = []*os.File{nil, os.Stdout, os.Stderr}
  354. clusterSize := 3
  355. argGroup, etcds, _ := test.CreateCluster(clusterSize, procAttr, false)
  356. defer test.DestroyCluster(etcds)
  357. time.Sleep(time.Second)
  358. c := etcd.NewClient()
  359. c.SyncCluster()
  360. rmReq, _ := http.NewRequest("DELETE", "http://127.0.0.1:7001/remove/node3", nil)
  361. client := &http.Client{}
  362. for i := 0; i < 2; i++ {
  363. for i := 0; i < 2; i++ {
  364. client.Do(rmReq)
  365. etcds[2].Wait()
  366. resp, err := c.Get("_etcd/machines")
  367. if err != nil {
  368. panic(err)
  369. }
  370. if len(resp) != 2 {
  371. t.Fatal("cannot remove machine")
  372. }
  373. if i == 1 {
  374. // rejoin with log
  375. etcds[2], err = os.StartProcess("etcd", argGroup[2], procAttr)
  376. } else {
  377. // rejoin without log
  378. etcds[2], err = os.StartProcess("etcd", append(argGroup[2], "-f"), procAttr)
  379. }
  380. if err != nil {
  381. panic(err)
  382. }
  383. time.Sleep(time.Second)
  384. resp, err = c.Get("_etcd/machines")
  385. if err != nil {
  386. panic(err)
  387. }
  388. if len(resp) != 3 {
  389. t.Fatalf("add machine fails #1 (%d != 3)", len(resp))
  390. }
  391. }
  392. // first kill the node, then remove it, then add it back
  393. for i := 0; i < 2; i++ {
  394. etcds[2].Kill()
  395. etcds[2].Wait()
  396. client.Do(rmReq)
  397. resp, err := c.Get("_etcd/machines")
  398. if err != nil {
  399. panic(err)
  400. }
  401. if len(resp) != 2 {
  402. t.Fatal("cannot remove machine")
  403. }
  404. if i == 1 {
  405. // rejoin with log
  406. etcds[2], err = os.StartProcess("etcd", append(argGroup[2]), procAttr)
  407. } else {
  408. // rejoin without log
  409. etcds[2], err = os.StartProcess("etcd", append(argGroup[2], "-f"), procAttr)
  410. }
  411. if err != nil {
  412. panic(err)
  413. }
  414. time.Sleep(time.Second)
  415. resp, err = c.Get("_etcd/machines")
  416. if err != nil {
  417. panic(err)
  418. }
  419. if len(resp) != 3 {
  420. t.Fatalf("add machine fails #2 (%d != 3)", len(resp))
  421. }
  422. }
  423. }
  424. }
  425. func templateBenchmarkEtcdDirectCall(b *testing.B, tls bool) {
  426. procAttr := new(os.ProcAttr)
  427. procAttr.Files = []*os.File{nil, os.Stdout, os.Stderr}
  428. clusterSize := 3
  429. _, etcds, _ := test.CreateCluster(clusterSize, procAttr, tls)
  430. defer test.DestroyCluster(etcds)
  431. time.Sleep(time.Second)
  432. b.ResetTimer()
  433. for i := 0; i < b.N; i++ {
  434. resp, _ := http.Get("http://127.0.0.1:4001/test/speed")
  435. resp.Body.Close()
  436. }
  437. }
  438. func BenchmarkEtcdDirectCall(b *testing.B) {
  439. templateBenchmarkEtcdDirectCall(b, false)
  440. }
  441. func BenchmarkEtcdDirectCallTls(b *testing.B) {
  442. templateBenchmarkEtcdDirectCall(b, true)
  443. }