black_hole_test.go 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
  1. // Copyright 2017 The etcd Authors
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. // +build !cluster_proxy
  15. package integration
  16. import (
  17. "context"
  18. "testing"
  19. "time"
  20. "github.com/coreos/etcd/clientv3"
  21. "github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
  22. "github.com/coreos/etcd/integration"
  23. "github.com/coreos/etcd/pkg/testutil"
  24. )
  25. // TestBalancerUnderBlackholeKeepAliveWatch tests when watch discovers it cannot talk to
  26. // blackholed endpoint, client balancer switches to healthy one.
  27. // TODO: test server-to-client keepalive ping
  28. func TestBalancerUnderBlackholeKeepAliveWatch(t *testing.T) {
  29. defer testutil.AfterTest(t)
  30. clus := integration.NewClusterV3(t, &integration.ClusterConfig{
  31. Size: 2,
  32. GRPCKeepAliveMinTime: 1 * time.Millisecond, // avoid too_many_pings
  33. })
  34. defer clus.Terminate(t)
  35. eps := []string{clus.Members[0].GRPCAddr(), clus.Members[1].GRPCAddr()}
  36. ccfg := clientv3.Config{
  37. Endpoints: []string{eps[0]},
  38. DialTimeout: 1 * time.Second,
  39. DialKeepAliveTime: 1 * time.Second,
  40. DialKeepAliveTimeout: 500 * time.Millisecond,
  41. }
  42. // gRPC internal implementation related.
  43. pingInterval := ccfg.DialKeepAliveTime + ccfg.DialKeepAliveTimeout
  44. // 3s for slow machine to process watch and reset connections
  45. // TODO: only send healthy endpoint to gRPC so gRPC wont waste time to
  46. // dial for unhealthy endpoint.
  47. // then we can reduce 3s to 1s.
  48. timeout := pingInterval + integration.RequestWaitTimeout
  49. cli, err := clientv3.New(ccfg)
  50. if err != nil {
  51. t.Fatal(err)
  52. }
  53. defer cli.Close()
  54. wch := cli.Watch(context.Background(), "foo", clientv3.WithCreatedNotify())
  55. if _, ok := <-wch; !ok {
  56. t.Fatalf("watch failed on creation")
  57. }
  58. // endpoint can switch to eps[1] when it detects the failure of eps[0]
  59. cli.SetEndpoints(eps...)
  60. clus.Members[0].Blackhole()
  61. if _, err = clus.Client(1).Put(context.TODO(), "foo", "bar"); err != nil {
  62. t.Fatal(err)
  63. }
  64. select {
  65. case <-wch:
  66. case <-time.After(timeout):
  67. t.Error("took too long to receive watch events")
  68. }
  69. clus.Members[0].Unblackhole()
  70. // waiting for moving eps[0] out of unhealthy, so that it can be re-pined.
  71. time.Sleep(ccfg.DialTimeout)
  72. clus.Members[1].Blackhole()
  73. // make sure client[0] can connect to eps[0] after remove the blackhole.
  74. if _, err = clus.Client(0).Get(context.TODO(), "foo"); err != nil {
  75. t.Fatal(err)
  76. }
  77. if _, err = clus.Client(0).Put(context.TODO(), "foo", "bar1"); err != nil {
  78. t.Fatal(err)
  79. }
  80. select {
  81. case <-wch:
  82. case <-time.After(timeout):
  83. t.Error("took too long to receive watch events")
  84. }
  85. }
  86. func TestBalancerUnderBlackholeNoKeepAlivePut(t *testing.T) {
  87. testBalancerUnderBlackholeNoKeepAlive(t, func(cli *clientv3.Client, ctx context.Context) error {
  88. _, err := cli.Put(ctx, "foo", "bar")
  89. if err == context.DeadlineExceeded || isServerCtxTimeout(err) || err == rpctypes.ErrTimeout {
  90. return errExpected
  91. }
  92. return err
  93. })
  94. }
  95. func TestBalancerUnderBlackholeNoKeepAliveDelete(t *testing.T) {
  96. testBalancerUnderBlackholeNoKeepAlive(t, func(cli *clientv3.Client, ctx context.Context) error {
  97. _, err := cli.Delete(ctx, "foo")
  98. if err == context.DeadlineExceeded || isServerCtxTimeout(err) || err == rpctypes.ErrTimeout {
  99. return errExpected
  100. }
  101. return err
  102. })
  103. }
  104. func TestBalancerUnderBlackholeNoKeepAliveTxn(t *testing.T) {
  105. testBalancerUnderBlackholeNoKeepAlive(t, func(cli *clientv3.Client, ctx context.Context) error {
  106. _, err := cli.Txn(ctx).
  107. If(clientv3.Compare(clientv3.Version("foo"), "=", 0)).
  108. Then(clientv3.OpPut("foo", "bar")).
  109. Else(clientv3.OpPut("foo", "baz")).Commit()
  110. if err == context.DeadlineExceeded || isServerCtxTimeout(err) || err == rpctypes.ErrTimeout {
  111. return errExpected
  112. }
  113. return err
  114. })
  115. }
  116. func TestBalancerUnderBlackholeNoKeepAliveLinearizableGet(t *testing.T) {
  117. testBalancerUnderBlackholeNoKeepAlive(t, func(cli *clientv3.Client, ctx context.Context) error {
  118. _, err := cli.Get(ctx, "a")
  119. if err == context.DeadlineExceeded || isServerCtxTimeout(err) || err == rpctypes.ErrTimeout {
  120. return errExpected
  121. }
  122. return err
  123. })
  124. }
  125. func TestBalancerUnderBlackholeNoKeepAliveSerializableGet(t *testing.T) {
  126. testBalancerUnderBlackholeNoKeepAlive(t, func(cli *clientv3.Client, ctx context.Context) error {
  127. _, err := cli.Get(ctx, "a", clientv3.WithSerializable())
  128. if err == context.DeadlineExceeded || isServerCtxTimeout(err) {
  129. return errExpected
  130. }
  131. return err
  132. })
  133. }
  134. // testBalancerUnderBlackholeNoKeepAlive ensures that first request to blackholed endpoint
  135. // fails due to context timeout, but succeeds on next try, with endpoint switch.
  136. func testBalancerUnderBlackholeNoKeepAlive(t *testing.T, op func(*clientv3.Client, context.Context) error) {
  137. defer testutil.AfterTest(t)
  138. clus := integration.NewClusterV3(t, &integration.ClusterConfig{
  139. Size: 2,
  140. SkipCreatingClient: true,
  141. })
  142. defer clus.Terminate(t)
  143. eps := []string{clus.Members[0].GRPCAddr(), clus.Members[1].GRPCAddr()}
  144. ccfg := clientv3.Config{
  145. Endpoints: []string{eps[0]},
  146. DialTimeout: 1 * time.Second,
  147. }
  148. cli, err := clientv3.New(ccfg)
  149. if err != nil {
  150. t.Fatal(err)
  151. }
  152. defer cli.Close()
  153. // wait for eps[0] to be pinned
  154. mustWaitPinReady(t, cli)
  155. // add all eps to list, so that when the original pined one fails
  156. // the client can switch to other available eps
  157. cli.SetEndpoints(eps...)
  158. // blackhole eps[0]
  159. clus.Members[0].Blackhole()
  160. // fail first due to blackhole, retry should succeed
  161. // TODO: first operation can succeed
  162. // when gRPC supports better retry on non-delivered request
  163. for i := 0; i < 2; i++ {
  164. ctx, cancel := context.WithTimeout(context.Background(), time.Second)
  165. err = op(cli, ctx)
  166. cancel()
  167. if err == nil {
  168. break
  169. }
  170. if i == 0 {
  171. if err != errExpected {
  172. t.Errorf("#%d: expected %v, got %v", i, errExpected, err)
  173. }
  174. } else if err != nil {
  175. t.Errorf("#%d: failed with error %v", i, err)
  176. }
  177. }
  178. }