standby_test.go 10.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340
  1. package test
  2. import (
  3. "bytes"
  4. "fmt"
  5. "os"
  6. "testing"
  7. "time"
  8. "github.com/coreos/etcd/server"
  9. "github.com/coreos/etcd/store"
  10. "github.com/coreos/etcd/tests"
  11. "github.com/coreos/etcd/third_party/github.com/coreos/go-etcd/etcd"
  12. "github.com/coreos/etcd/third_party/github.com/stretchr/testify/assert"
  13. )
  14. // Create a full cluster and then change the active size.
  15. func TestStandby(t *testing.T) {
  16. clusterSize := 15
  17. _, etcds, err := CreateCluster(clusterSize, &os.ProcAttr{Files: []*os.File{nil, os.Stdout, os.Stderr}}, false)
  18. if !assert.NoError(t, err) {
  19. t.Fatal("cannot create cluster")
  20. }
  21. defer DestroyCluster(etcds)
  22. resp, _ := tests.Put("http://localhost:7001/v2/admin/config", "application/json", bytes.NewBufferString(`{"syncInterval":1}`))
  23. if !assert.Equal(t, resp.StatusCode, 200) {
  24. t.FailNow()
  25. }
  26. time.Sleep(time.Second)
  27. c := etcd.NewClient(nil)
  28. c.SyncCluster()
  29. // Verify that we just have default machines.
  30. result, err := c.Get("_etcd/machines", false, true)
  31. assert.NoError(t, err)
  32. assert.Equal(t, len(result.Node.Nodes), 9)
  33. t.Log("Reconfigure with a smaller active size")
  34. resp, _ = tests.Put("http://localhost:7001/v2/admin/config", "application/json", bytes.NewBufferString(`{"activeSize":7, "syncInterval":1}`))
  35. if !assert.Equal(t, resp.StatusCode, 200) {
  36. t.FailNow()
  37. }
  38. // Wait for two monitor cycles before checking for demotion.
  39. time.Sleep((2 * server.ActiveMonitorTimeout) + (2 * time.Second))
  40. // Verify that we now have seven peers.
  41. result, err = c.Get("_etcd/machines", false, true)
  42. assert.NoError(t, err)
  43. assert.Equal(t, len(result.Node.Nodes), 7)
  44. t.Log("Test the functionality of all servers")
  45. // Set key.
  46. time.Sleep(time.Second)
  47. if _, err := c.Set("foo", "bar", 0); err != nil {
  48. panic(err)
  49. }
  50. time.Sleep(time.Second)
  51. // Check that all peers and standbys have the value.
  52. for i := range etcds {
  53. resp, err := tests.Get(fmt.Sprintf("http://localhost:%d/v2/keys/foo", 4000+(i+1)))
  54. if assert.NoError(t, err) {
  55. body := tests.ReadBodyJSON(resp)
  56. if node, _ := body["node"].(map[string]interface{}); assert.NotNil(t, node) {
  57. assert.Equal(t, node["value"], "bar")
  58. }
  59. }
  60. }
  61. t.Log("Reconfigure with larger active size and wait for join")
  62. resp, _ = tests.Put("http://localhost:7001/v2/admin/config", "application/json", bytes.NewBufferString(`{"activeSize":8, "syncInterval":1}`))
  63. if !assert.Equal(t, resp.StatusCode, 200) {
  64. t.FailNow()
  65. }
  66. time.Sleep((1 * time.Second) + (1 * time.Second))
  67. // Verify that exactly eight machines are in the cluster.
  68. result, err = c.Get("_etcd/machines", false, true)
  69. assert.NoError(t, err)
  70. assert.Equal(t, len(result.Node.Nodes), 8)
  71. }
  72. // Create a full cluster, disconnect a peer, wait for removal, wait for standby join.
  73. func TestStandbyAutoJoin(t *testing.T) {
  74. clusterSize := 5
  75. _, etcds, err := CreateCluster(clusterSize, &os.ProcAttr{Files: []*os.File{nil, os.Stdout, os.Stderr}}, false)
  76. if err != nil {
  77. t.Fatal("cannot create cluster")
  78. }
  79. defer func() {
  80. // Wrap this in a closure so that it picks up the updated version of
  81. // the "etcds" variable.
  82. DestroyCluster(etcds)
  83. }()
  84. c := etcd.NewClient(nil)
  85. c.SyncCluster()
  86. time.Sleep(1 * time.Second)
  87. // Verify that we have five machines.
  88. result, err := c.Get("_etcd/machines", false, true)
  89. assert.NoError(t, err)
  90. assert.Equal(t, len(result.Node.Nodes), 5)
  91. // Reconfigure with a short remove delay (2 second).
  92. resp, _ := tests.Put("http://localhost:7001/v2/admin/config", "application/json", bytes.NewBufferString(`{"activeSize":4, "removeDelay":2, "syncInterval":1}`))
  93. if !assert.Equal(t, resp.StatusCode, 200) {
  94. t.FailNow()
  95. }
  96. // Wait for a monitor cycle before checking for removal.
  97. time.Sleep(server.ActiveMonitorTimeout + (1 * time.Second))
  98. // Verify that we now have four peers.
  99. result, err = c.Get("_etcd/machines", false, true)
  100. assert.NoError(t, err)
  101. assert.Equal(t, len(result.Node.Nodes), 4)
  102. // Remove peer.
  103. etcd := etcds[1]
  104. etcds = append(etcds[:1], etcds[2:]...)
  105. if err := etcd.Kill(); err != nil {
  106. panic(err.Error())
  107. }
  108. etcd.Release()
  109. // Wait for it to get dropped.
  110. time.Sleep(server.PeerActivityMonitorTimeout + (1 * time.Second))
  111. // Wait for the standby to join.
  112. time.Sleep((1 * time.Second) + (1 * time.Second))
  113. // Verify that we have 4 peers.
  114. result, err = c.Get("_etcd/machines", true, true)
  115. assert.NoError(t, err)
  116. assert.Equal(t, len(result.Node.Nodes), 4)
  117. // Verify that node2 is not one of those peers.
  118. _, err = c.Get("_etcd/machines/node2", false, false)
  119. assert.Error(t, err)
  120. }
  121. // Create a full cluster and then change the active size gradually.
  122. func TestStandbyGradualChange(t *testing.T) {
  123. clusterSize := 9
  124. _, etcds, err := CreateCluster(clusterSize, &os.ProcAttr{Files: []*os.File{nil, os.Stdout, os.Stderr}}, false)
  125. assert.NoError(t, err)
  126. defer DestroyCluster(etcds)
  127. if err != nil {
  128. t.Fatal("cannot create cluster")
  129. }
  130. time.Sleep(time.Second)
  131. c := etcd.NewClient(nil)
  132. c.SyncCluster()
  133. num := clusterSize
  134. for inc := 0; inc < 2; inc++ {
  135. for i := 0; i < 6; i++ {
  136. // Verify that we just have i machines.
  137. result, err := c.Get("_etcd/machines", false, true)
  138. assert.NoError(t, err)
  139. assert.Equal(t, len(result.Node.Nodes), num)
  140. if inc == 0 {
  141. num--
  142. } else {
  143. num++
  144. }
  145. t.Log("Reconfigure with active size", num)
  146. resp, _ := tests.Put("http://localhost:7001/v2/admin/config", "application/json", bytes.NewBufferString(fmt.Sprintf(`{"activeSize":%d, "syncInterval":1}`, num)))
  147. if !assert.Equal(t, resp.StatusCode, 200) {
  148. t.FailNow()
  149. }
  150. if inc == 0 {
  151. // Wait for monitor cycles before checking for demotion.
  152. time.Sleep(server.ActiveMonitorTimeout + (1 * time.Second))
  153. } else {
  154. time.Sleep(time.Second + (1 * time.Second))
  155. }
  156. // Verify that we now have peers.
  157. result, err = c.Get("_etcd/machines", false, true)
  158. assert.NoError(t, err)
  159. assert.Equal(t, len(result.Node.Nodes), num)
  160. t.Log("Test the functionality of all servers")
  161. // Set key.
  162. if _, err := c.Set("foo", "bar", 0); err != nil {
  163. panic(err)
  164. }
  165. time.Sleep(100 * time.Millisecond)
  166. // Check that all peers and standbys have the value.
  167. for i := range etcds {
  168. resp, err := tests.Get(fmt.Sprintf("http://localhost:%d/v2/keys/foo", 4000+(i+1)))
  169. if assert.NoError(t, err) {
  170. body := tests.ReadBodyJSON(resp)
  171. if node, _ := body["node"].(map[string]interface{}); assert.NotNil(t, node) {
  172. assert.Equal(t, node["value"], "bar")
  173. }
  174. }
  175. }
  176. }
  177. }
  178. }
  179. // Create a full cluster and then change the active size dramatically.
  180. func TestStandbyDramaticChange(t *testing.T) {
  181. clusterSize := 9
  182. _, etcds, err := CreateCluster(clusterSize, &os.ProcAttr{Files: []*os.File{nil, os.Stdout, os.Stderr}}, false)
  183. assert.NoError(t, err)
  184. defer DestroyCluster(etcds)
  185. if err != nil {
  186. t.Fatal("cannot create cluster")
  187. }
  188. time.Sleep(time.Second)
  189. c := etcd.NewClient(nil)
  190. c.SyncCluster()
  191. num := clusterSize
  192. for i := 0; i < 3; i++ {
  193. for inc := 0; inc < 2; inc++ {
  194. // Verify that we just have i machines.
  195. result, err := c.Get("_etcd/machines", false, true)
  196. assert.NoError(t, err)
  197. assert.Equal(t, len(result.Node.Nodes), num)
  198. if inc == 0 {
  199. num -= 6
  200. } else {
  201. num += 6
  202. }
  203. t.Log("Reconfigure with active size", num)
  204. resp, _ := tests.Put("http://localhost:7001/v2/admin/config", "application/json", bytes.NewBufferString(fmt.Sprintf(`{"activeSize":%d, "syncInterval":1}`, num)))
  205. if !assert.Equal(t, resp.StatusCode, 200) {
  206. t.FailNow()
  207. }
  208. if inc == 0 {
  209. // Wait for monitor cycles before checking for demotion.
  210. time.Sleep(6*server.ActiveMonitorTimeout + (1 * time.Second))
  211. } else {
  212. time.Sleep(time.Second + (1 * time.Second))
  213. }
  214. // Verify that we now have peers.
  215. result, err = c.Get("_etcd/machines", false, true)
  216. assert.NoError(t, err)
  217. assert.Equal(t, len(result.Node.Nodes), num)
  218. t.Log("Test the functionality of all servers")
  219. // Set key.
  220. if _, err := c.Set("foo", "bar", 0); err != nil {
  221. panic(err)
  222. }
  223. time.Sleep(100 * time.Millisecond)
  224. // Check that all peers and standbys have the value.
  225. for i := range etcds {
  226. resp, err := tests.Get(fmt.Sprintf("http://localhost:%d/v2/keys/foo", 4000+(i+1)))
  227. if assert.NoError(t, err) {
  228. body := tests.ReadBodyJSON(resp)
  229. if node, _ := body["node"].(map[string]interface{}); assert.NotNil(t, node) {
  230. assert.Equal(t, node["value"], "bar")
  231. }
  232. }
  233. }
  234. }
  235. }
  236. }
  237. func TestStandbyJoinMiss(t *testing.T) {
  238. clusterSize := 2
  239. _, etcds, err := CreateCluster(clusterSize, &os.ProcAttr{Files: []*os.File{nil, os.Stdout, os.Stderr}}, false)
  240. if err != nil {
  241. t.Fatal("cannot create cluster")
  242. }
  243. defer DestroyCluster(etcds)
  244. c := etcd.NewClient(nil)
  245. c.SyncCluster()
  246. time.Sleep(1 * time.Second)
  247. // Verify that we have two machines.
  248. result, err := c.Get("_etcd/machines", false, true)
  249. assert.NoError(t, err)
  250. assert.Equal(t, len(result.Node.Nodes), clusterSize)
  251. resp, _ := tests.Put("http://localhost:7001/v2/admin/config", "application/json", bytes.NewBufferString(`{"removeDelay":4, "syncInterval":4}`))
  252. if !assert.Equal(t, resp.StatusCode, 200) {
  253. t.FailNow()
  254. }
  255. time.Sleep(time.Second)
  256. resp, _ = tests.Delete("http://localhost:7001/v2/admin/machines/node2", "application/json", nil)
  257. if !assert.Equal(t, resp.StatusCode, 200) {
  258. t.FailNow()
  259. }
  260. // Wait for a monitor cycle before checking for removal.
  261. time.Sleep(server.ActiveMonitorTimeout + (1 * time.Second))
  262. // Verify that we now have four peers.
  263. result, err = c.Get("_etcd/machines", false, true)
  264. assert.NoError(t, err)
  265. assert.Equal(t, len(result.Node.Nodes), 1)
  266. // Simulate the join failure
  267. _, err = server.NewClient(nil).AddMachine("http://localhost:7001",
  268. &server.JoinCommand{
  269. MinVersion: store.MinVersion(),
  270. MaxVersion: store.MaxVersion(),
  271. Name: "node2",
  272. RaftURL: "http://127.0.0.1:7002",
  273. EtcdURL: "http://127.0.0.1:4002",
  274. })
  275. assert.NoError(t, err)
  276. time.Sleep(6 * time.Second)
  277. go tests.Delete("http://localhost:7001/v2/admin/machines/node2", "application/json", nil)
  278. time.Sleep(time.Second)
  279. result, err = c.Get("_etcd/machines", false, true)
  280. assert.NoError(t, err)
  281. assert.Equal(t, len(result.Node.Nodes), 1)
  282. }