health_balancer_test.go 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298
  1. // Copyright 2017 The etcd Authors
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package clientv3
  15. import (
  16. "context"
  17. "errors"
  18. "net"
  19. "sync"
  20. "testing"
  21. "time"
  22. pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
  23. "github.com/coreos/etcd/pkg/testutil"
  24. "google.golang.org/grpc"
  25. )
  26. var endpoints = []string{"localhost:2379", "localhost:22379", "localhost:32379"}
  27. func TestBalancerGetUnblocking(t *testing.T) {
  28. hb := newHealthBalancer(endpoints, minHealthRetryDuration, func(string) (bool, error) { return true, nil })
  29. defer hb.Close()
  30. if addrs := <-hb.Notify(); len(addrs) != len(endpoints) {
  31. t.Errorf("Initialize newHealthBalancer should have triggered Notify() chan, but it didn't")
  32. }
  33. unblockingOpts := grpc.BalancerGetOptions{BlockingWait: false}
  34. _, _, err := hb.Get(context.Background(), unblockingOpts)
  35. if err != ErrNoAddrAvilable {
  36. t.Errorf("Get() with no up endpoints should return ErrNoAddrAvailable, got: %v", err)
  37. }
  38. down1 := hb.Up(grpc.Address{Addr: endpoints[1]})
  39. if addrs := <-hb.Notify(); len(addrs) != 1 {
  40. t.Errorf("first Up() should have triggered balancer to send the first connected address via Notify chan so that other connections can be closed")
  41. }
  42. down2 := hb.Up(grpc.Address{Addr: endpoints[2]})
  43. addrFirst, putFun, err := hb.Get(context.Background(), unblockingOpts)
  44. if err != nil {
  45. t.Errorf("Get() with up endpoints should success, got %v", err)
  46. }
  47. if addrFirst.Addr != endpoints[1] {
  48. t.Errorf("Get() didn't return expected address, got %v", addrFirst)
  49. }
  50. if putFun == nil {
  51. t.Errorf("Get() returned unexpected nil put function")
  52. }
  53. addrSecond, _, _ := hb.Get(context.Background(), unblockingOpts)
  54. if addrFirst.Addr != addrSecond.Addr {
  55. t.Errorf("Get() didn't return the same address as previous call, got %v and %v", addrFirst, addrSecond)
  56. }
  57. down1(errors.New("error"))
  58. if addrs := <-hb.Notify(); len(addrs) != len(endpoints)-1 { // we call down on one endpoint
  59. t.Errorf("closing the only connection should triggered balancer to send the %d endpoints via Notify chan so that we can establish a connection", len(endpoints)-1)
  60. }
  61. down2(errors.New("error"))
  62. _, _, err = hb.Get(context.Background(), unblockingOpts)
  63. if err != ErrNoAddrAvilable {
  64. t.Errorf("Get() with no up endpoints should return ErrNoAddrAvailable, got: %v", err)
  65. }
  66. }
  67. func TestBalancerGetBlocking(t *testing.T) {
  68. hb := newHealthBalancer(endpoints, minHealthRetryDuration, func(string) (bool, error) { return true, nil })
  69. defer hb.Close()
  70. if addrs := <-hb.Notify(); len(addrs) != len(endpoints) {
  71. t.Errorf("Initialize newHealthBalancer should have triggered Notify() chan, but it didn't")
  72. }
  73. blockingOpts := grpc.BalancerGetOptions{BlockingWait: true}
  74. ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*100)
  75. _, _, err := hb.Get(ctx, blockingOpts)
  76. cancel()
  77. if err != context.DeadlineExceeded {
  78. t.Errorf("Get() with no up endpoints should timeout, got %v", err)
  79. }
  80. downC := make(chan func(error), 1)
  81. go func() {
  82. // ensure hb.Up() will be called after hb.Get() to see if Up() releases blocking Get()
  83. time.Sleep(time.Millisecond * 100)
  84. f := hb.Up(grpc.Address{Addr: endpoints[1]})
  85. if addrs := <-hb.Notify(); len(addrs) != 1 {
  86. t.Errorf("first Up() should have triggered balancer to send the first connected address via Notify chan so that other connections can be closed")
  87. }
  88. downC <- f
  89. }()
  90. addrFirst, putFun, err := hb.Get(context.Background(), blockingOpts)
  91. if err != nil {
  92. t.Errorf("Get() with up endpoints should success, got %v", err)
  93. }
  94. if addrFirst.Addr != endpoints[1] {
  95. t.Errorf("Get() didn't return expected address, got %v", addrFirst)
  96. }
  97. if putFun == nil {
  98. t.Errorf("Get() returned unexpected nil put function")
  99. }
  100. down1 := <-downC
  101. down2 := hb.Up(grpc.Address{Addr: endpoints[2]})
  102. addrSecond, _, _ := hb.Get(context.Background(), blockingOpts)
  103. if addrFirst.Addr != addrSecond.Addr {
  104. t.Errorf("Get() didn't return the same address as previous call, got %v and %v", addrFirst, addrSecond)
  105. }
  106. down1(errors.New("error"))
  107. if addrs := <-hb.Notify(); len(addrs) != len(endpoints)-1 { // we call down on one endpoint
  108. t.Errorf("closing the only connection should triggered balancer to send the %d endpoints via Notify chan so that we can establish a connection", len(endpoints)-1)
  109. }
  110. down2(errors.New("error"))
  111. ctx, cancel = context.WithTimeout(context.Background(), time.Millisecond*100)
  112. _, _, err = hb.Get(ctx, blockingOpts)
  113. cancel()
  114. if err != context.DeadlineExceeded {
  115. t.Errorf("Get() with no up endpoints should timeout, got %v", err)
  116. }
  117. }
  118. // TestHealthBalancerGraylist checks one endpoint is tried after the other
  119. // due to gray listing.
  120. func TestHealthBalancerGraylist(t *testing.T) {
  121. var wg sync.WaitGroup
  122. // Use 3 endpoints so gray list doesn't fallback to all connections
  123. // after failing on 2 endpoints.
  124. lns, eps := make([]net.Listener, 3), make([]string, 3)
  125. wg.Add(3)
  126. connc := make(chan string, 2)
  127. for i := range eps {
  128. ln, err := net.Listen("tcp", ":0")
  129. testutil.AssertNil(t, err)
  130. lns[i], eps[i] = ln, ln.Addr().String()
  131. go func() {
  132. defer wg.Done()
  133. for {
  134. conn, err := ln.Accept()
  135. if err != nil {
  136. return
  137. }
  138. _, err = conn.Read(make([]byte, 512))
  139. conn.Close()
  140. if err == nil {
  141. select {
  142. case connc <- ln.Addr().String():
  143. // sleep some so balancer catches up
  144. // before attempted next reconnect.
  145. time.Sleep(50 * time.Millisecond)
  146. default:
  147. }
  148. }
  149. }
  150. }()
  151. }
  152. tf := func(s string) (bool, error) { return false, nil }
  153. hb := newHealthBalancer(eps, 5*time.Second, tf)
  154. conn, err := grpc.Dial("", grpc.WithInsecure(), grpc.WithBalancer(hb))
  155. testutil.AssertNil(t, err)
  156. defer conn.Close()
  157. kvc := pb.NewKVClient(conn)
  158. <-hb.ready()
  159. kvc.Range(context.TODO(), &pb.RangeRequest{})
  160. ep1 := <-connc
  161. kvc.Range(context.TODO(), &pb.RangeRequest{})
  162. ep2 := <-connc
  163. for _, ln := range lns {
  164. ln.Close()
  165. }
  166. wg.Wait()
  167. if ep1 == ep2 {
  168. t.Fatalf("expected %q != %q", ep1, ep2)
  169. }
  170. }
  171. // TestBalancerDoNotBlockOnClose ensures that balancer and grpc don't deadlock each other
  172. // due to rapid open/close conn. The deadlock causes balancer.Close() to block forever.
  173. // See issue: https://github.com/coreos/etcd/issues/7283 for more detail.
  174. func TestBalancerDoNotBlockOnClose(t *testing.T) {
  175. defer testutil.AfterTest(t)
  176. kcl := newKillConnListener(t, 3)
  177. defer kcl.close()
  178. for i := 0; i < 5; i++ {
  179. hb := newHealthBalancer(kcl.endpoints(), minHealthRetryDuration, func(string) (bool, error) { return true, nil })
  180. conn, err := grpc.Dial("", grpc.WithInsecure(), grpc.WithBalancer(hb))
  181. if err != nil {
  182. t.Fatal(err)
  183. }
  184. kvc := pb.NewKVClient(conn)
  185. <-hb.readyc
  186. var wg sync.WaitGroup
  187. wg.Add(100)
  188. cctx, cancel := context.WithCancel(context.TODO())
  189. for j := 0; j < 100; j++ {
  190. go func() {
  191. defer wg.Done()
  192. kvc.Range(cctx, &pb.RangeRequest{}, grpc.FailFast(false))
  193. }()
  194. }
  195. // balancer.Close() might block
  196. // if balancer and grpc deadlock each other.
  197. bclosec, cclosec := make(chan struct{}), make(chan struct{})
  198. go func() {
  199. defer close(bclosec)
  200. hb.Close()
  201. }()
  202. go func() {
  203. defer close(cclosec)
  204. conn.Close()
  205. }()
  206. select {
  207. case <-bclosec:
  208. case <-time.After(3 * time.Second):
  209. testutil.FatalStack(t, "balancer close timeout")
  210. }
  211. select {
  212. case <-cclosec:
  213. case <-time.After(3 * time.Second):
  214. t.Fatal("grpc conn close timeout")
  215. }
  216. cancel()
  217. wg.Wait()
  218. }
  219. }
  220. // killConnListener listens incoming conn and kills it immediately.
  221. type killConnListener struct {
  222. wg sync.WaitGroup
  223. eps []string
  224. stopc chan struct{}
  225. t *testing.T
  226. }
  227. func newKillConnListener(t *testing.T, size int) *killConnListener {
  228. kcl := &killConnListener{stopc: make(chan struct{}), t: t}
  229. for i := 0; i < size; i++ {
  230. ln, err := net.Listen("tcp", ":0")
  231. if err != nil {
  232. t.Fatal(err)
  233. }
  234. kcl.eps = append(kcl.eps, ln.Addr().String())
  235. kcl.wg.Add(1)
  236. go kcl.listen(ln)
  237. }
  238. return kcl
  239. }
  240. func (kcl *killConnListener) endpoints() []string {
  241. return kcl.eps
  242. }
  243. func (kcl *killConnListener) listen(l net.Listener) {
  244. go func() {
  245. defer kcl.wg.Done()
  246. for {
  247. conn, err := l.Accept()
  248. select {
  249. case <-kcl.stopc:
  250. return
  251. default:
  252. }
  253. if err != nil {
  254. kcl.t.Fatal(err)
  255. }
  256. time.Sleep(1 * time.Millisecond)
  257. conn.Close()
  258. }
  259. }()
  260. <-kcl.stopc
  261. l.Close()
  262. }
  263. func (kcl *killConnListener) close() {
  264. close(kcl.stopc)
  265. kcl.wg.Wait()
  266. }