server_shutdown_test.go 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352
  1. // Copyright 2017 The etcd Authors
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package integration
  15. import (
  16. "bytes"
  17. "testing"
  18. "time"
  19. "github.com/coreos/etcd/clientv3"
  20. "github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
  21. "github.com/coreos/etcd/integration"
  22. "github.com/coreos/etcd/pkg/testutil"
  23. "golang.org/x/net/context"
  24. )
  25. // TestBalancerUnderServerShutdownWatch expects that watch client
  26. // switch its endpoints when the member of the pinned endpoint fails.
  27. func TestBalancerUnderServerShutdownWatch(t *testing.T) {
  28. defer testutil.AfterTest(t)
  29. clus := integration.NewClusterV3(t, &integration.ClusterConfig{
  30. Size: 3,
  31. SkipCreatingClient: true,
  32. })
  33. defer clus.Terminate(t)
  34. eps := []string{clus.Members[0].GRPCAddr(), clus.Members[1].GRPCAddr(), clus.Members[2].GRPCAddr()}
  35. lead := clus.WaitLeader(t)
  36. // pin eps[lead]
  37. watchCli, err := clientv3.New(clientv3.Config{Endpoints: []string{eps[lead]}})
  38. if err != nil {
  39. t.Fatal(err)
  40. }
  41. defer watchCli.Close()
  42. // wait for eps[lead] to be pinned
  43. mustWaitPinReady(t, watchCli)
  44. // add all eps to list, so that when the original pined one fails
  45. // the client can switch to other available eps
  46. watchCli.SetEndpoints(eps...)
  47. key, val := "foo", "bar"
  48. wch := watchCli.Watch(context.Background(), key, clientv3.WithCreatedNotify())
  49. select {
  50. case <-wch:
  51. case <-time.After(3 * time.Second):
  52. t.Fatal("took too long to create watch")
  53. }
  54. donec := make(chan struct{})
  55. go func() {
  56. defer close(donec)
  57. // switch to others when eps[lead] is shut down
  58. select {
  59. case ev := <-wch:
  60. if werr := ev.Err(); werr != nil {
  61. t.Fatal(werr)
  62. }
  63. if len(ev.Events) != 1 {
  64. t.Fatalf("expected one event, got %+v", ev)
  65. }
  66. if !bytes.Equal(ev.Events[0].Kv.Value, []byte(val)) {
  67. t.Fatalf("expected %q, got %+v", val, ev.Events[0].Kv)
  68. }
  69. case <-time.After(7 * time.Second):
  70. t.Fatal("took too long to receive events")
  71. }
  72. }()
  73. // shut down eps[lead]
  74. clus.Members[lead].Terminate(t)
  75. // writes to eps[lead+1]
  76. putCli, err := clientv3.New(clientv3.Config{Endpoints: []string{eps[(lead+1)%3]}})
  77. if err != nil {
  78. t.Fatal(err)
  79. }
  80. defer putCli.Close()
  81. for {
  82. ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
  83. _, err = putCli.Put(ctx, key, val)
  84. cancel()
  85. if err == nil {
  86. break
  87. }
  88. if err == context.DeadlineExceeded || err == rpctypes.ErrTimeout || err == rpctypes.ErrTimeoutDueToLeaderFail {
  89. continue
  90. }
  91. t.Fatal(err)
  92. }
  93. select {
  94. case <-donec:
  95. case <-time.After(5 * time.Second): // enough time for balancer switch
  96. t.Fatal("took too long to receive events")
  97. }
  98. }
  99. func TestBalancerUnderServerShutdownPut(t *testing.T) {
  100. testBalancerUnderServerShutdownMutable(t, func(cli *clientv3.Client, ctx context.Context) error {
  101. _, err := cli.Put(ctx, "foo", "bar")
  102. return err
  103. })
  104. }
  105. func TestBalancerUnderServerShutdownDelete(t *testing.T) {
  106. testBalancerUnderServerShutdownMutable(t, func(cli *clientv3.Client, ctx context.Context) error {
  107. _, err := cli.Delete(ctx, "foo")
  108. return err
  109. })
  110. }
  111. func TestBalancerUnderServerShutdownTxn(t *testing.T) {
  112. testBalancerUnderServerShutdownMutable(t, func(cli *clientv3.Client, ctx context.Context) error {
  113. _, err := cli.Txn(ctx).
  114. If(clientv3.Compare(clientv3.Version("foo"), "=", 0)).
  115. Then(clientv3.OpPut("foo", "bar")).
  116. Else(clientv3.OpPut("foo", "baz")).Commit()
  117. return err
  118. })
  119. }
  120. // testBalancerUnderServerShutdownMutable expects that when the member of
  121. // the pinned endpoint is shut down, the balancer switches its endpoints
  122. // and all subsequent put/delete/txn requests succeed with new endpoints.
  123. func testBalancerUnderServerShutdownMutable(t *testing.T, op func(*clientv3.Client, context.Context) error) {
  124. defer testutil.AfterTest(t)
  125. clus := integration.NewClusterV3(t, &integration.ClusterConfig{
  126. Size: 3,
  127. SkipCreatingClient: true,
  128. })
  129. defer clus.Terminate(t)
  130. eps := []string{clus.Members[0].GRPCAddr(), clus.Members[1].GRPCAddr(), clus.Members[2].GRPCAddr()}
  131. // pin eps[0]
  132. cli, err := clientv3.New(clientv3.Config{Endpoints: []string{eps[0]}})
  133. if err != nil {
  134. t.Fatal(err)
  135. }
  136. defer cli.Close()
  137. // wait for eps[0] to be pinned
  138. mustWaitPinReady(t, cli)
  139. // add all eps to list, so that when the original pined one fails
  140. // the client can switch to other available eps
  141. cli.SetEndpoints(eps...)
  142. // shut down eps[0]
  143. clus.Members[0].Terminate(t)
  144. // switched to others when eps[0] was explicitly shut down
  145. // and following request should succeed
  146. // TODO: remove this (expose client connection state?)
  147. time.Sleep(time.Second)
  148. cctx, ccancel := context.WithTimeout(context.Background(), time.Second)
  149. err = op(cli, cctx)
  150. ccancel()
  151. if err != nil {
  152. t.Fatal(err)
  153. }
  154. }
  155. func TestBalancerUnderServerShutdownGetLinearizable(t *testing.T) {
  156. testBalancerUnderServerShutdownImmutable(t, func(cli *clientv3.Client, ctx context.Context) error {
  157. _, err := cli.Get(ctx, "foo")
  158. return err
  159. }, 7*time.Second) // give enough time for leader election, balancer switch
  160. }
  161. func TestBalancerUnderServerShutdownGetSerializable(t *testing.T) {
  162. testBalancerUnderServerShutdownImmutable(t, func(cli *clientv3.Client, ctx context.Context) error {
  163. _, err := cli.Get(ctx, "foo", clientv3.WithSerializable())
  164. return err
  165. }, 2*time.Second)
  166. }
  167. // testBalancerUnderServerShutdownImmutable expects that when the member of
  168. // the pinned endpoint is shut down, the balancer switches its endpoints
  169. // and all subsequent range requests succeed with new endpoints.
  170. func testBalancerUnderServerShutdownImmutable(t *testing.T, op func(*clientv3.Client, context.Context) error, timeout time.Duration) {
  171. defer testutil.AfterTest(t)
  172. clus := integration.NewClusterV3(t, &integration.ClusterConfig{
  173. Size: 3,
  174. SkipCreatingClient: true,
  175. })
  176. defer clus.Terminate(t)
  177. eps := []string{clus.Members[0].GRPCAddr(), clus.Members[1].GRPCAddr(), clus.Members[2].GRPCAddr()}
  178. // pin eps[0]
  179. cli, err := clientv3.New(clientv3.Config{Endpoints: []string{eps[0]}})
  180. if err != nil {
  181. t.Errorf("failed to create client: %v", err)
  182. }
  183. defer cli.Close()
  184. // wait for eps[0] to be pinned
  185. mustWaitPinReady(t, cli)
  186. // add all eps to list, so that when the original pined one fails
  187. // the client can switch to other available eps
  188. cli.SetEndpoints(eps...)
  189. // shut down eps[0]
  190. clus.Members[0].Terminate(t)
  191. // switched to others when eps[0] was explicitly shut down
  192. // and following request should succeed
  193. cctx, ccancel := context.WithTimeout(context.Background(), timeout)
  194. err = op(cli, cctx)
  195. ccancel()
  196. if err != nil {
  197. t.Errorf("failed to finish range request in time %v (timeout %v)", err, timeout)
  198. }
  199. }
  200. func TestBalancerUnderServerStopInflightLinearizableGetOnRestart(t *testing.T) {
  201. tt := []pinTestOpt{
  202. {pinLeader: true, stopPinFirst: true},
  203. {pinLeader: true, stopPinFirst: false},
  204. {pinLeader: false, stopPinFirst: true},
  205. {pinLeader: false, stopPinFirst: false},
  206. }
  207. for i := range tt {
  208. testBalancerUnderServerStopInflightRangeOnRestart(t, true, tt[i])
  209. }
  210. }
  211. func TestBalancerUnderServerStopInflightSerializableGetOnRestart(t *testing.T) {
  212. tt := []pinTestOpt{
  213. {pinLeader: true, stopPinFirst: true},
  214. {pinLeader: true, stopPinFirst: false},
  215. {pinLeader: false, stopPinFirst: true},
  216. {pinLeader: false, stopPinFirst: false},
  217. }
  218. for i := range tt {
  219. testBalancerUnderServerStopInflightRangeOnRestart(t, false, tt[i])
  220. }
  221. }
  222. type pinTestOpt struct {
  223. pinLeader bool
  224. stopPinFirst bool
  225. }
  226. // testBalancerUnderServerStopInflightRangeOnRestart expects
  227. // inflight range request reconnects on server restart.
  228. func testBalancerUnderServerStopInflightRangeOnRestart(t *testing.T, linearizable bool, opt pinTestOpt) {
  229. defer testutil.AfterTest(t)
  230. cfg := &integration.ClusterConfig{
  231. Size: 2,
  232. SkipCreatingClient: true,
  233. }
  234. if linearizable {
  235. cfg.Size = 3
  236. }
  237. clus := integration.NewClusterV3(t, cfg)
  238. defer clus.Terminate(t)
  239. eps := []string{clus.Members[0].GRPCAddr(), clus.Members[1].GRPCAddr()}
  240. if linearizable {
  241. eps = append(eps, clus.Members[2].GRPCAddr())
  242. }
  243. lead := clus.WaitLeader(t)
  244. target := lead
  245. if !opt.pinLeader {
  246. target = (target + 1) % 2
  247. }
  248. // pin eps[target]
  249. cli, err := clientv3.New(clientv3.Config{Endpoints: []string{eps[target]}})
  250. if err != nil {
  251. t.Errorf("failed to create client: %v", err)
  252. }
  253. defer cli.Close()
  254. // wait for eps[target] to be pinned
  255. mustWaitPinReady(t, cli)
  256. // add all eps to list, so that when the original pined one fails
  257. // the client can switch to other available eps
  258. cli.SetEndpoints(eps...)
  259. if opt.stopPinFirst {
  260. clus.Members[target].Stop(t)
  261. // give some time for balancer switch before stopping the other
  262. time.Sleep(time.Second)
  263. clus.Members[(target+1)%2].Stop(t)
  264. } else {
  265. clus.Members[(target+1)%2].Stop(t)
  266. // balancer cannot pin other member since it's already stopped
  267. clus.Members[target].Stop(t)
  268. }
  269. // 3-second is the minimum interval between endpoint being marked
  270. // as unhealthy and being removed from unhealthy, so possibly
  271. // takes >5-second to unpin and repin an endpoint
  272. // TODO: decrease timeout when balancer switch rewrite
  273. clientTimeout := 7 * time.Second
  274. var gops []clientv3.OpOption
  275. if !linearizable {
  276. gops = append(gops, clientv3.WithSerializable())
  277. }
  278. donec, readyc := make(chan struct{}), make(chan struct{}, 1)
  279. go func() {
  280. defer close(donec)
  281. ctx, cancel := context.WithTimeout(context.TODO(), clientTimeout)
  282. readyc <- struct{}{}
  283. _, err := cli.Get(ctx, "abc", gops...)
  284. cancel()
  285. if err != nil {
  286. t.Fatal(err)
  287. }
  288. }()
  289. <-readyc
  290. clus.Members[target].Restart(t)
  291. select {
  292. case <-time.After(clientTimeout + 3*time.Second):
  293. t.Fatalf("timed out waiting for Get [linearizable: %v, opt: %+v]", linearizable, opt)
  294. case <-donec:
  295. }
  296. }