grpc1.7-health_test.go 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297
  1. // Copyright 2018 The etcd Authors
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package balancer
  15. import (
  16. "context"
  17. "errors"
  18. "net"
  19. "sync"
  20. "testing"
  21. "time"
  22. pb "go.etcd.io/etcd/etcdserver/etcdserverpb"
  23. "go.etcd.io/etcd/pkg/testutil"
  24. "google.golang.org/grpc"
  25. )
  26. var endpoints = []string{"localhost:2379", "localhost:22379", "localhost:32379"}
  27. func TestOldHealthBalancerGetUnblocking(t *testing.T) {
  28. hb := NewGRPC17Health(endpoints, minHealthRetryDuration, func(ep string, dopts ...grpc.DialOption) (*grpc.ClientConn, error) { return nil, nil })
  29. defer hb.Close()
  30. if addrs := <-hb.Notify(); len(addrs) != len(endpoints) {
  31. t.Errorf("Initialize NewGRPC17Health should have triggered Notify() chan, but it didn't")
  32. }
  33. unblockingOpts := grpc.BalancerGetOptions{BlockingWait: false}
  34. _, _, err := hb.Get(context.Background(), unblockingOpts)
  35. if err != ErrNoAddrAvailable {
  36. t.Errorf("Get() with no up endpoints should return ErrNoAddrAvailable, got: %v", err)
  37. }
  38. down1 := hb.Up(grpc.Address{Addr: endpoints[1]})
  39. if addrs := <-hb.Notify(); len(addrs) != 1 {
  40. t.Errorf("first Up() should have triggered balancer to send the first connected address via Notify chan so that other connections can be closed")
  41. }
  42. down2 := hb.Up(grpc.Address{Addr: endpoints[2]})
  43. addrFirst, putFun, err := hb.Get(context.Background(), unblockingOpts)
  44. if err != nil {
  45. t.Errorf("Get() with up endpoints should success, got %v", err)
  46. }
  47. if addrFirst.Addr != endpoints[1] {
  48. t.Errorf("Get() didn't return expected address, got %v", addrFirst)
  49. }
  50. if putFun == nil {
  51. t.Errorf("Get() returned unexpected nil put function")
  52. }
  53. addrSecond, _, _ := hb.Get(context.Background(), unblockingOpts)
  54. if addrFirst.Addr != addrSecond.Addr {
  55. t.Errorf("Get() didn't return the same address as previous call, got %v and %v", addrFirst, addrSecond)
  56. }
  57. down1(errors.New("error"))
  58. if addrs := <-hb.Notify(); len(addrs) != len(endpoints)-1 { // we call down on one endpoint
  59. t.Errorf("closing the only connection should triggered balancer to send the %d endpoints via Notify chan so that we can establish a connection", len(endpoints)-1)
  60. }
  61. down2(errors.New("error"))
  62. _, _, err = hb.Get(context.Background(), unblockingOpts)
  63. if err != ErrNoAddrAvailable {
  64. t.Errorf("Get() with no up endpoints should return ErrNoAddrAvailable, got: %v", err)
  65. }
  66. }
  67. func TestOldHealthBalancerGetBlocking(t *testing.T) {
  68. hb := NewGRPC17Health(endpoints, minHealthRetryDuration, func(ep string, dopts ...grpc.DialOption) (*grpc.ClientConn, error) { return nil, nil })
  69. defer hb.Close()
  70. if addrs := <-hb.Notify(); len(addrs) != len(endpoints) {
  71. t.Errorf("Initialize NewGRPC17Health should have triggered Notify() chan, but it didn't")
  72. }
  73. blockingOpts := grpc.BalancerGetOptions{BlockingWait: true}
  74. ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*100)
  75. _, _, err := hb.Get(ctx, blockingOpts)
  76. cancel()
  77. if err != context.DeadlineExceeded {
  78. t.Errorf("Get() with no up endpoints should timeout, got %v", err)
  79. }
  80. downC := make(chan func(error), 1)
  81. go func() {
  82. // ensure hb.Up() will be called after hb.Get() to see if Up() releases blocking Get()
  83. time.Sleep(time.Millisecond * 100)
  84. f := hb.Up(grpc.Address{Addr: endpoints[1]})
  85. if addrs := <-hb.Notify(); len(addrs) != 1 {
  86. t.Errorf("first Up() should have triggered balancer to send the first connected address via Notify chan so that other connections can be closed")
  87. }
  88. downC <- f
  89. }()
  90. addrFirst, putFun, err := hb.Get(context.Background(), blockingOpts)
  91. if err != nil {
  92. t.Errorf("Get() with up endpoints should success, got %v", err)
  93. }
  94. if addrFirst.Addr != endpoints[1] {
  95. t.Errorf("Get() didn't return expected address, got %v", addrFirst)
  96. }
  97. if putFun == nil {
  98. t.Errorf("Get() returned unexpected nil put function")
  99. }
  100. down1 := <-downC
  101. down2 := hb.Up(grpc.Address{Addr: endpoints[2]})
  102. addrSecond, _, _ := hb.Get(context.Background(), blockingOpts)
  103. if addrFirst.Addr != addrSecond.Addr {
  104. t.Errorf("Get() didn't return the same address as previous call, got %v and %v", addrFirst, addrSecond)
  105. }
  106. down1(errors.New("error"))
  107. if addrs := <-hb.Notify(); len(addrs) != len(endpoints)-1 { // we call down on one endpoint
  108. t.Errorf("closing the only connection should triggered balancer to send the %d endpoints via Notify chan so that we can establish a connection", len(endpoints)-1)
  109. }
  110. down2(errors.New("error"))
  111. ctx, cancel = context.WithTimeout(context.Background(), time.Millisecond*100)
  112. _, _, err = hb.Get(ctx, blockingOpts)
  113. cancel()
  114. if err != context.DeadlineExceeded {
  115. t.Errorf("Get() with no up endpoints should timeout, got %v", err)
  116. }
  117. }
  118. // TestOldHealthBalancerGraylist checks one endpoint is tried after the other
  119. // due to gray listing.
  120. func TestOldHealthBalancerGraylist(t *testing.T) {
  121. var wg sync.WaitGroup
  122. // Use 3 endpoints so gray list doesn't fallback to all connections
  123. // after failing on 2 endpoints.
  124. lns, eps := make([]net.Listener, 3), make([]string, 3)
  125. wg.Add(3)
  126. connc := make(chan string, 2)
  127. for i := range eps {
  128. ln, err := net.Listen("tcp", ":0")
  129. testutil.AssertNil(t, err)
  130. lns[i], eps[i] = ln, ln.Addr().String()
  131. go func() {
  132. defer wg.Done()
  133. for {
  134. conn, err := ln.Accept()
  135. if err != nil {
  136. return
  137. }
  138. _, err = conn.Read(make([]byte, 512))
  139. conn.Close()
  140. if err == nil {
  141. select {
  142. case connc <- ln.Addr().String():
  143. // sleep some so balancer catches up
  144. // before attempted next reconnect.
  145. time.Sleep(50 * time.Millisecond)
  146. default:
  147. }
  148. }
  149. }
  150. }()
  151. }
  152. hb := NewGRPC17Health(eps, 5*time.Second, func(ep string, dopts ...grpc.DialOption) (*grpc.ClientConn, error) { return nil, nil })
  153. conn, err := grpc.Dial("", grpc.WithInsecure(), grpc.WithBalancer(hb))
  154. testutil.AssertNil(t, err)
  155. defer conn.Close()
  156. kvc := pb.NewKVClient(conn)
  157. <-hb.Ready()
  158. kvc.Range(context.TODO(), &pb.RangeRequest{})
  159. ep1 := <-connc
  160. kvc.Range(context.TODO(), &pb.RangeRequest{})
  161. ep2 := <-connc
  162. for _, ln := range lns {
  163. ln.Close()
  164. }
  165. wg.Wait()
  166. if ep1 == ep2 {
  167. t.Fatalf("expected %q != %q", ep1, ep2)
  168. }
  169. }
  170. // TestBalancerDoNotBlockOnClose ensures that balancer and grpc don't deadlock each other
  171. // due to rapid open/close conn. The deadlock causes balancer.Close() to block forever.
  172. // See issue: https://github.com/etcd-io/etcd/issues/7283 for more detail.
  173. func TestOldHealthBalancerDoNotBlockOnClose(t *testing.T) {
  174. defer testutil.AfterTest(t)
  175. kcl := newKillConnListener(t, 3)
  176. defer kcl.close()
  177. for i := 0; i < 5; i++ {
  178. hb := NewGRPC17Health(kcl.endpoints(), minHealthRetryDuration, func(ep string, dopts ...grpc.DialOption) (*grpc.ClientConn, error) { return nil, nil })
  179. conn, err := grpc.Dial("", grpc.WithInsecure(), grpc.WithBalancer(hb))
  180. if err != nil {
  181. t.Fatal(err)
  182. }
  183. kvc := pb.NewKVClient(conn)
  184. <-hb.readyc
  185. var wg sync.WaitGroup
  186. wg.Add(100)
  187. cctx, cancel := context.WithCancel(context.TODO())
  188. for j := 0; j < 100; j++ {
  189. go func() {
  190. defer wg.Done()
  191. kvc.Range(cctx, &pb.RangeRequest{}, grpc.FailFast(false))
  192. }()
  193. }
  194. // balancer.Close() might block
  195. // if balancer and grpc deadlock each other.
  196. bclosec, cclosec := make(chan struct{}), make(chan struct{})
  197. go func() {
  198. defer close(bclosec)
  199. hb.Close()
  200. }()
  201. go func() {
  202. defer close(cclosec)
  203. conn.Close()
  204. }()
  205. select {
  206. case <-bclosec:
  207. case <-time.After(3 * time.Second):
  208. testutil.FatalStack(t, "balancer close timeout")
  209. }
  210. select {
  211. case <-cclosec:
  212. case <-time.After(3 * time.Second):
  213. t.Fatal("grpc conn close timeout")
  214. }
  215. cancel()
  216. wg.Wait()
  217. }
  218. }
  219. // killConnListener listens incoming conn and kills it immediately.
  220. type killConnListener struct {
  221. wg sync.WaitGroup
  222. eps []string
  223. stopc chan struct{}
  224. t *testing.T
  225. }
  226. func newKillConnListener(t *testing.T, size int) *killConnListener {
  227. kcl := &killConnListener{stopc: make(chan struct{}), t: t}
  228. for i := 0; i < size; i++ {
  229. ln, err := net.Listen("tcp", ":0")
  230. if err != nil {
  231. t.Fatal(err)
  232. }
  233. kcl.eps = append(kcl.eps, ln.Addr().String())
  234. kcl.wg.Add(1)
  235. go kcl.listen(ln)
  236. }
  237. return kcl
  238. }
  239. func (kcl *killConnListener) endpoints() []string {
  240. return kcl.eps
  241. }
  242. func (kcl *killConnListener) listen(l net.Listener) {
  243. go func() {
  244. defer kcl.wg.Done()
  245. for {
  246. conn, err := l.Accept()
  247. select {
  248. case <-kcl.stopc:
  249. return
  250. default:
  251. }
  252. if err != nil {
  253. kcl.t.Error(err)
  254. }
  255. time.Sleep(1 * time.Millisecond)
  256. conn.Close()
  257. }
  258. }()
  259. <-kcl.stopc
  260. l.Close()
  261. }
  262. func (kcl *killConnListener) close() {
  263. close(kcl.stopc)
  264. kcl.wg.Wait()
  265. }