black_hole_test.go 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217
  1. // Copyright 2017 The etcd Authors
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. // +build !cluster_proxy
  15. package integration
  16. import (
  17. "context"
  18. "testing"
  19. "time"
  20. "go.etcd.io/etcd/clientv3"
  21. "go.etcd.io/etcd/etcdserver/api/v3rpc/rpctypes"
  22. "go.etcd.io/etcd/integration"
  23. "go.etcd.io/etcd/pkg/testutil"
  24. "google.golang.org/grpc"
  25. )
  26. // TestBalancerUnderBlackholeKeepAliveWatch tests when watch discovers it cannot talk to
  27. // blackholed endpoint, client balancer switches to healthy one.
  28. // TODO: test server-to-client keepalive ping
  29. func TestBalancerUnderBlackholeKeepAliveWatch(t *testing.T) {
  30. defer testutil.AfterTest(t)
  31. clus := integration.NewClusterV3(t, &integration.ClusterConfig{
  32. Size: 2,
  33. GRPCKeepAliveMinTime: time.Millisecond, // avoid too_many_pings
  34. })
  35. defer clus.Terminate(t)
  36. eps := []string{clus.Members[0].GRPCAddr(), clus.Members[1].GRPCAddr()}
  37. ccfg := clientv3.Config{
  38. Endpoints: []string{eps[0]},
  39. DialTimeout: time.Second,
  40. DialOptions: []grpc.DialOption{grpc.WithBlock()},
  41. DialKeepAliveTime: time.Second,
  42. DialKeepAliveTimeout: 500 * time.Millisecond,
  43. }
  44. // gRPC internal implementation related.
  45. pingInterval := ccfg.DialKeepAliveTime + ccfg.DialKeepAliveTimeout
  46. // 3s for slow machine to process watch and reset connections
  47. // TODO: only send healthy endpoint to gRPC so gRPC wont waste time to
  48. // dial for unhealthy endpoint.
  49. // then we can reduce 3s to 1s.
  50. timeout := pingInterval + integration.RequestWaitTimeout
  51. cli, err := clientv3.New(ccfg)
  52. if err != nil {
  53. t.Fatal(err)
  54. }
  55. defer cli.Close()
  56. wch := cli.Watch(context.Background(), "foo", clientv3.WithCreatedNotify())
  57. if _, ok := <-wch; !ok {
  58. t.Fatalf("watch failed on creation")
  59. }
  60. // endpoint can switch to eps[1] when it detects the failure of eps[0]
  61. cli.SetEndpoints(eps...)
  62. // give enough time for balancer resolution
  63. time.Sleep(5 * time.Second)
  64. clus.Members[0].Blackhole()
  65. if _, err = clus.Client(1).Put(context.TODO(), "foo", "bar"); err != nil {
  66. t.Fatal(err)
  67. }
  68. select {
  69. case <-wch:
  70. case <-time.After(timeout):
  71. t.Error("took too long to receive watch events")
  72. }
  73. clus.Members[0].Unblackhole()
  74. // waiting for moving eps[0] out of unhealthy, so that it can be re-pined.
  75. time.Sleep(ccfg.DialTimeout)
  76. clus.Members[1].Blackhole()
  77. // make sure client[0] can connect to eps[0] after remove the blackhole.
  78. if _, err = clus.Client(0).Get(context.TODO(), "foo"); err != nil {
  79. t.Fatal(err)
  80. }
  81. if _, err = clus.Client(0).Put(context.TODO(), "foo", "bar1"); err != nil {
  82. t.Fatal(err)
  83. }
  84. select {
  85. case <-wch:
  86. case <-time.After(timeout):
  87. t.Error("took too long to receive watch events")
  88. }
  89. }
  90. func TestBalancerUnderBlackholeNoKeepAlivePut(t *testing.T) {
  91. testBalancerUnderBlackholeNoKeepAlive(t, func(cli *clientv3.Client, ctx context.Context) error {
  92. _, err := cli.Put(ctx, "foo", "bar")
  93. if isClientTimeout(err) || isServerCtxTimeout(err) || err == rpctypes.ErrTimeout {
  94. return errExpected
  95. }
  96. return err
  97. })
  98. }
  99. func TestBalancerUnderBlackholeNoKeepAliveDelete(t *testing.T) {
  100. testBalancerUnderBlackholeNoKeepAlive(t, func(cli *clientv3.Client, ctx context.Context) error {
  101. _, err := cli.Delete(ctx, "foo")
  102. if isClientTimeout(err) || isServerCtxTimeout(err) || err == rpctypes.ErrTimeout {
  103. return errExpected
  104. }
  105. return err
  106. })
  107. }
  108. func TestBalancerUnderBlackholeNoKeepAliveTxn(t *testing.T) {
  109. testBalancerUnderBlackholeNoKeepAlive(t, func(cli *clientv3.Client, ctx context.Context) error {
  110. _, err := cli.Txn(ctx).
  111. If(clientv3.Compare(clientv3.Version("foo"), "=", 0)).
  112. Then(clientv3.OpPut("foo", "bar")).
  113. Else(clientv3.OpPut("foo", "baz")).Commit()
  114. if isClientTimeout(err) || isServerCtxTimeout(err) || err == rpctypes.ErrTimeout {
  115. return errExpected
  116. }
  117. return err
  118. })
  119. }
  120. func TestBalancerUnderBlackholeNoKeepAliveLinearizableGet(t *testing.T) {
  121. testBalancerUnderBlackholeNoKeepAlive(t, func(cli *clientv3.Client, ctx context.Context) error {
  122. _, err := cli.Get(ctx, "a")
  123. if isClientTimeout(err) || isServerCtxTimeout(err) || err == rpctypes.ErrTimeout {
  124. return errExpected
  125. }
  126. return err
  127. })
  128. }
  129. func TestBalancerUnderBlackholeNoKeepAliveSerializableGet(t *testing.T) {
  130. testBalancerUnderBlackholeNoKeepAlive(t, func(cli *clientv3.Client, ctx context.Context) error {
  131. _, err := cli.Get(ctx, "a", clientv3.WithSerializable())
  132. if isClientTimeout(err) || isServerCtxTimeout(err) {
  133. return errExpected
  134. }
  135. return err
  136. })
  137. }
  138. // testBalancerUnderBlackholeNoKeepAlive ensures that first request to blackholed endpoint
  139. // fails due to context timeout, but succeeds on next try, with endpoint switch.
  140. func testBalancerUnderBlackholeNoKeepAlive(t *testing.T, op func(*clientv3.Client, context.Context) error) {
  141. defer testutil.AfterTest(t)
  142. clus := integration.NewClusterV3(t, &integration.ClusterConfig{
  143. Size: 2,
  144. SkipCreatingClient: true,
  145. })
  146. defer clus.Terminate(t)
  147. eps := []string{clus.Members[0].GRPCAddr(), clus.Members[1].GRPCAddr()}
  148. ccfg := clientv3.Config{
  149. Endpoints: []string{eps[0]},
  150. DialTimeout: 1 * time.Second,
  151. DialOptions: []grpc.DialOption{grpc.WithBlock()},
  152. }
  153. cli, err := clientv3.New(ccfg)
  154. if err != nil {
  155. t.Fatal(err)
  156. }
  157. defer cli.Close()
  158. // wait for eps[0] to be pinned
  159. mustWaitPinReady(t, cli)
  160. // add all eps to list, so that when the original pined one fails
  161. // the client can switch to other available eps
  162. cli.SetEndpoints(eps...)
  163. // blackhole eps[0]
  164. clus.Members[0].Blackhole()
  165. // With round robin balancer, client will make a request to a healthy endpoint
  166. // within a few requests.
  167. // TODO: first operation can succeed
  168. // when gRPC supports better retry on non-delivered request
  169. for i := 0; i < 5; i++ {
  170. ctx, cancel := context.WithTimeout(context.Background(), time.Second*5)
  171. err = op(cli, ctx)
  172. cancel()
  173. if err == nil {
  174. break
  175. } else if err == errExpected {
  176. t.Logf("#%d: current error %v", i, err)
  177. } else {
  178. t.Errorf("#%d: failed with error %v", i, err)
  179. }
  180. }
  181. if err != nil {
  182. t.Fatal(err)
  183. }
  184. }