network_partition_test.go 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177
  1. // Copyright 2017 The etcd Authors
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. // +build !cluster_proxy
  15. package integration
  16. import (
  17. "context"
  18. "errors"
  19. "testing"
  20. "time"
  21. "github.com/coreos/etcd/clientv3"
  22. "github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
  23. "github.com/coreos/etcd/integration"
  24. "github.com/coreos/etcd/pkg/testutil"
  25. )
  26. var (
  27. errExpected = errors.New("expected error")
  28. )
  29. // TestBalancerUnderNetworkPartitionPut tests when one member becomes isolated,
  30. // first Put request fails, and following retry succeeds with client balancer
  31. // switching to others.
  32. func TestBalancerUnderNetworkPartitionPut(t *testing.T) {
  33. testBalancerUnderNetworkPartition(t, func(cli *clientv3.Client, ctx context.Context) error {
  34. _, err := cli.Put(ctx, "a", "b")
  35. if err == context.DeadlineExceeded || err == rpctypes.ErrTimeout {
  36. return errExpected
  37. }
  38. return err
  39. })
  40. }
  41. // TestBalancerUnderNetworkPartitionGet tests when one member becomes isolated,
  42. // first Get request fails, and following retry succeeds with client balancer
  43. // switching to others.
  44. func TestBalancerUnderNetworkPartitionGet(t *testing.T) {
  45. testBalancerUnderNetworkPartition(t, func(cli *clientv3.Client, ctx context.Context) error {
  46. _, err := cli.Get(ctx, "a")
  47. if err == context.DeadlineExceeded {
  48. return errExpected
  49. }
  50. return err
  51. })
  52. }
  53. func testBalancerUnderNetworkPartition(t *testing.T, op func(*clientv3.Client, context.Context) error) {
  54. defer testutil.AfterTest(t)
  55. clus := integration.NewClusterV3(t, &integration.ClusterConfig{
  56. Size: 3,
  57. GRPCKeepAliveMinTime: time.Millisecond, // avoid too_many_pings
  58. SkipCreatingClient: true,
  59. })
  60. defer clus.Terminate(t)
  61. // expect pin ep[0]
  62. ccfg := clientv3.Config{
  63. Endpoints: []string{clus.Members[0].GRPCAddr()},
  64. DialTimeout: 3 * time.Second,
  65. DialKeepAliveTime: 2 * time.Second,
  66. DialKeepAliveTimeout: 2 * time.Second,
  67. }
  68. cli, err := clientv3.New(ccfg)
  69. if err != nil {
  70. t.Fatal(err)
  71. }
  72. defer cli.Close()
  73. // wait for ep[0] to be pinned
  74. mustWaitPinReady(t, cli)
  75. // add other endpoints for later endpoint switch
  76. cli.SetEndpoints(clus.Members[0].GRPCAddr(), clus.Members[1].GRPCAddr(), clus.Members[2].GRPCAddr())
  77. clus.Members[0].InjectPartition(t, clus.Members[1:]...)
  78. for i := 0; i < 2; i++ {
  79. ctx, cancel := context.WithTimeout(context.Background(), time.Second)
  80. err = op(cli, ctx)
  81. cancel()
  82. if err == nil {
  83. break
  84. }
  85. if err != errExpected {
  86. t.Errorf("#%d: expected %v, got %v", i, errExpected, err)
  87. }
  88. // give enough time for endpoint switch
  89. // TODO: remove random sleep by syncing directly with balancer
  90. if i == 0 {
  91. time.Sleep(5 * time.Second)
  92. }
  93. }
  94. if err != nil {
  95. t.Errorf("balancer did not switch in time (%v)", err)
  96. }
  97. }
  98. func TestBalancerUnderNetworkPartitionWatchLeader(t *testing.T) {
  99. testBalancerUnderNetworkPartitionWatch(t, true)
  100. }
  101. func TestBalancerUnderNetworkPartitionWatchFollower(t *testing.T) {
  102. testBalancerUnderNetworkPartitionWatch(t, false)
  103. }
  104. // testBalancerUnderNetworkPartitionWatch ensures watch stream
  105. // to a partitioned node be closed when context requires leader.
  106. func testBalancerUnderNetworkPartitionWatch(t *testing.T, isolateLeader bool) {
  107. defer testutil.AfterTest(t)
  108. clus := integration.NewClusterV3(t, &integration.ClusterConfig{
  109. Size: 3,
  110. SkipCreatingClient: true,
  111. })
  112. defer clus.Terminate(t)
  113. eps := []string{clus.Members[0].GRPCAddr(), clus.Members[1].GRPCAddr(), clus.Members[2].GRPCAddr()}
  114. target := clus.WaitLeader(t)
  115. if !isolateLeader {
  116. target = (target + 1) % 3
  117. }
  118. // pin eps[target]
  119. watchCli, err := clientv3.New(clientv3.Config{Endpoints: []string{eps[target]}})
  120. if err != nil {
  121. t.Fatal(err)
  122. }
  123. defer watchCli.Close()
  124. // wait for eps[target] to be pinned
  125. mustWaitPinReady(t, watchCli)
  126. // add all eps to list, so that when the original pined one fails
  127. // the client can switch to other available eps
  128. watchCli.SetEndpoints(eps...)
  129. wch := watchCli.Watch(clientv3.WithRequireLeader(context.Background()), "foo", clientv3.WithCreatedNotify())
  130. select {
  131. case <-wch:
  132. case <-time.After(3 * time.Second):
  133. t.Fatal("took too long to create watch")
  134. }
  135. // isolate eps[target]
  136. clus.Members[target].InjectPartition(t,
  137. clus.Members[(target+1)%3],
  138. clus.Members[(target+2)%3],
  139. )
  140. select {
  141. case ev := <-wch:
  142. if len(ev.Events) != 0 {
  143. t.Fatal("expected no event")
  144. }
  145. if err = ev.Err(); err != rpctypes.ErrNoLeader {
  146. t.Fatalf("expected %v, got %v", rpctypes.ErrNoLeader, err)
  147. }
  148. case <-time.After(3 * time.Second): // enough time to detect leader lost
  149. t.Fatal("took too long to detect leader lost")
  150. }
  151. }