network_partition_test.go 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267
  1. // Copyright 2017 The etcd Authors
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. // +build !cluster_proxy
  15. package integration
  16. import (
  17. "context"
  18. "errors"
  19. "testing"
  20. "time"
  21. "go.etcd.io/etcd/clientv3"
  22. "go.etcd.io/etcd/etcdserver/api/v3rpc/rpctypes"
  23. "go.etcd.io/etcd/integration"
  24. "go.etcd.io/etcd/pkg/testutil"
  25. "google.golang.org/grpc"
  26. )
  27. var errExpected = errors.New("expected error")
  28. // TestBalancerUnderNetworkPartitionPut tests when one member becomes isolated,
  29. // first Put request fails, and following retry succeeds with client balancer
  30. // switching to others.
  31. func TestBalancerUnderNetworkPartitionPut(t *testing.T) {
  32. testBalancerUnderNetworkPartition(t, func(cli *clientv3.Client, ctx context.Context) error {
  33. _, err := cli.Put(ctx, "a", "b")
  34. if isClientTimeout(err) || isServerCtxTimeout(err) || err == rpctypes.ErrTimeout {
  35. return errExpected
  36. }
  37. return err
  38. }, time.Second)
  39. }
  40. func TestBalancerUnderNetworkPartitionDelete(t *testing.T) {
  41. testBalancerUnderNetworkPartition(t, func(cli *clientv3.Client, ctx context.Context) error {
  42. _, err := cli.Delete(ctx, "a")
  43. if isClientTimeout(err) || isServerCtxTimeout(err) || err == rpctypes.ErrTimeout {
  44. return errExpected
  45. }
  46. return err
  47. }, time.Second)
  48. }
  49. func TestBalancerUnderNetworkPartitionTxn(t *testing.T) {
  50. testBalancerUnderNetworkPartition(t, func(cli *clientv3.Client, ctx context.Context) error {
  51. _, err := cli.Txn(ctx).
  52. If(clientv3.Compare(clientv3.Version("foo"), "=", 0)).
  53. Then(clientv3.OpPut("foo", "bar")).
  54. Else(clientv3.OpPut("foo", "baz")).Commit()
  55. if isClientTimeout(err) || isServerCtxTimeout(err) || err == rpctypes.ErrTimeout {
  56. return errExpected
  57. }
  58. return err
  59. }, time.Second)
  60. }
  61. // TestBalancerUnderNetworkPartitionLinearizableGetWithLongTimeout tests
  62. // when one member becomes isolated, first quorum Get request succeeds
  63. // by switching endpoints within the timeout (long enough to cover endpoint switch).
  64. func TestBalancerUnderNetworkPartitionLinearizableGetWithLongTimeout(t *testing.T) {
  65. testBalancerUnderNetworkPartition(t, func(cli *clientv3.Client, ctx context.Context) error {
  66. _, err := cli.Get(ctx, "a")
  67. if err == rpctypes.ErrTimeout {
  68. return errExpected
  69. }
  70. return err
  71. }, 7*time.Second)
  72. }
  73. // TestBalancerUnderNetworkPartitionLinearizableGetWithShortTimeout tests
  74. // when one member becomes isolated, first quorum Get request fails,
  75. // and following retry succeeds with client balancer switching to others.
  76. func TestBalancerUnderNetworkPartitionLinearizableGetWithShortTimeout(t *testing.T) {
  77. testBalancerUnderNetworkPartition(t, func(cli *clientv3.Client, ctx context.Context) error {
  78. _, err := cli.Get(ctx, "a")
  79. if isClientTimeout(err) || isServerCtxTimeout(err) {
  80. return errExpected
  81. }
  82. return err
  83. }, time.Second)
  84. }
  85. func TestBalancerUnderNetworkPartitionSerializableGet(t *testing.T) {
  86. testBalancerUnderNetworkPartition(t, func(cli *clientv3.Client, ctx context.Context) error {
  87. _, err := cli.Get(ctx, "a", clientv3.WithSerializable())
  88. return err
  89. }, time.Second)
  90. }
  91. func testBalancerUnderNetworkPartition(t *testing.T, op func(*clientv3.Client, context.Context) error, timeout time.Duration) {
  92. defer testutil.AfterTest(t)
  93. clus := integration.NewClusterV3(t, &integration.ClusterConfig{
  94. Size: 3,
  95. SkipCreatingClient: true,
  96. })
  97. defer clus.Terminate(t)
  98. eps := []string{clus.Members[0].GRPCAddr(), clus.Members[1].GRPCAddr(), clus.Members[2].GRPCAddr()}
  99. // expect pin eps[0]
  100. ccfg := clientv3.Config{
  101. Endpoints: []string{eps[0]},
  102. DialTimeout: 3 * time.Second,
  103. DialOptions: []grpc.DialOption{grpc.WithBlock()},
  104. }
  105. cli, err := clientv3.New(ccfg)
  106. if err != nil {
  107. t.Fatal(err)
  108. }
  109. defer cli.Close()
  110. // wait for eps[0] to be pinned
  111. mustWaitPinReady(t, cli)
  112. // add other endpoints for later endpoint switch
  113. cli.SetEndpoints(eps...)
  114. time.Sleep(time.Second * 2)
  115. clus.Members[0].InjectPartition(t, clus.Members[1:]...)
  116. for i := 0; i < 5; i++ {
  117. ctx, cancel := context.WithTimeout(context.Background(), timeout)
  118. err = op(cli, ctx)
  119. cancel()
  120. if err == nil {
  121. break
  122. }
  123. if err != errExpected {
  124. t.Errorf("#%d: expected '%v', got '%v'", i, errExpected, err)
  125. }
  126. // give enough time for endpoint switch
  127. // TODO: remove random sleep by syncing directly with balancer
  128. if i == 0 {
  129. time.Sleep(5 * time.Second)
  130. }
  131. }
  132. if err != nil {
  133. t.Errorf("balancer did not switch in time (%v)", err)
  134. }
  135. }
  136. // TestBalancerUnderNetworkPartitionLinearizableGetLeaderElection ensures balancer
  137. // switches endpoint when leader fails and linearizable get requests returns
  138. // "etcdserver: request timed out".
  139. func TestBalancerUnderNetworkPartitionLinearizableGetLeaderElection(t *testing.T) {
  140. defer testutil.AfterTest(t)
  141. clus := integration.NewClusterV3(t, &integration.ClusterConfig{
  142. Size: 3,
  143. SkipCreatingClient: true,
  144. })
  145. defer clus.Terminate(t)
  146. eps := []string{clus.Members[0].GRPCAddr(), clus.Members[1].GRPCAddr(), clus.Members[2].GRPCAddr()}
  147. lead := clus.WaitLeader(t)
  148. timeout := 3 * clus.Members[(lead+1)%2].ServerConfig.ReqTimeout()
  149. cli, err := clientv3.New(clientv3.Config{
  150. Endpoints: []string{eps[(lead+1)%2]},
  151. DialTimeout: 2 * time.Second,
  152. DialOptions: []grpc.DialOption{grpc.WithBlock()},
  153. })
  154. if err != nil {
  155. t.Fatal(err)
  156. }
  157. defer cli.Close()
  158. // add all eps to list, so that when the original pined one fails
  159. // the client can switch to other available eps
  160. cli.SetEndpoints(eps[lead], eps[(lead+1)%2])
  161. // isolate leader
  162. clus.Members[lead].InjectPartition(t, clus.Members[(lead+1)%3], clus.Members[(lead+2)%3])
  163. // expects balancer to round robin to leader within two attempts
  164. for i := 0; i < 2; i++ {
  165. ctx, cancel := context.WithTimeout(context.TODO(), timeout)
  166. _, err = cli.Get(ctx, "a")
  167. cancel()
  168. if err == nil {
  169. break
  170. }
  171. }
  172. if err != nil {
  173. t.Fatal(err)
  174. }
  175. }
  176. func TestBalancerUnderNetworkPartitionWatchLeader(t *testing.T) {
  177. testBalancerUnderNetworkPartitionWatch(t, true)
  178. }
  179. func TestBalancerUnderNetworkPartitionWatchFollower(t *testing.T) {
  180. testBalancerUnderNetworkPartitionWatch(t, false)
  181. }
  182. // testBalancerUnderNetworkPartitionWatch ensures watch stream
  183. // to a partitioned node be closed when context requires leader.
  184. func testBalancerUnderNetworkPartitionWatch(t *testing.T, isolateLeader bool) {
  185. defer testutil.AfterTest(t)
  186. clus := integration.NewClusterV3(t, &integration.ClusterConfig{
  187. Size: 3,
  188. SkipCreatingClient: true,
  189. })
  190. defer clus.Terminate(t)
  191. eps := []string{clus.Members[0].GRPCAddr(), clus.Members[1].GRPCAddr(), clus.Members[2].GRPCAddr()}
  192. target := clus.WaitLeader(t)
  193. if !isolateLeader {
  194. target = (target + 1) % 3
  195. }
  196. // pin eps[target]
  197. watchCli, err := clientv3.New(clientv3.Config{Endpoints: []string{eps[target]}})
  198. if err != nil {
  199. t.Fatal(err)
  200. }
  201. defer watchCli.Close()
  202. // wait for eps[target] to be pinned
  203. mustWaitPinReady(t, watchCli)
  204. // add all eps to list, so that when the original pined one fails
  205. // the client can switch to other available eps
  206. watchCli.SetEndpoints(eps...)
  207. wch := watchCli.Watch(clientv3.WithRequireLeader(context.Background()), "foo", clientv3.WithCreatedNotify())
  208. select {
  209. case <-wch:
  210. case <-time.After(integration.RequestWaitTimeout):
  211. t.Fatal("took too long to create watch")
  212. }
  213. // isolate eps[target]
  214. clus.Members[target].InjectPartition(t,
  215. clus.Members[(target+1)%3],
  216. clus.Members[(target+2)%3],
  217. )
  218. select {
  219. case ev := <-wch:
  220. if len(ev.Events) != 0 {
  221. t.Fatal("expected no event")
  222. }
  223. if err = ev.Err(); err != rpctypes.ErrNoLeader {
  224. t.Fatalf("expected %v, got %v", rpctypes.ErrNoLeader, err)
  225. }
  226. case <-time.After(integration.RequestWaitTimeout): // enough time to detect leader lost
  227. t.Fatal("took too long to detect leader lost")
  228. }
  229. }