balancer_test.go 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301
  1. // Copyright 2016 The etcd Authors
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package clientv3
  15. import (
  16. "context"
  17. "errors"
  18. "net"
  19. "sync"
  20. "testing"
  21. "time"
  22. pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
  23. "github.com/coreos/etcd/pkg/testutil"
  24. "google.golang.org/grpc"
  25. )
  26. var (
  27. endpoints = []string{"localhost:2379", "localhost:22379", "localhost:32379"}
  28. )
  29. func TestBalancerGetUnblocking(t *testing.T) {
  30. sb := newSimpleBalancer(endpoints)
  31. defer sb.Close()
  32. if addrs := <-sb.Notify(); len(addrs) != len(endpoints) {
  33. t.Errorf("Initialize newSimpleBalancer should have triggered Notify() chan, but it didn't")
  34. }
  35. unblockingOpts := grpc.BalancerGetOptions{BlockingWait: false}
  36. _, _, err := sb.Get(context.Background(), unblockingOpts)
  37. if err != ErrNoAddrAvilable {
  38. t.Errorf("Get() with no up endpoints should return ErrNoAddrAvailable, got: %v", err)
  39. }
  40. down1 := sb.Up(grpc.Address{Addr: endpoints[1]})
  41. if addrs := <-sb.Notify(); len(addrs) != 1 {
  42. t.Errorf("first Up() should have triggered balancer to send the first connected address via Notify chan so that other connections can be closed")
  43. }
  44. down2 := sb.Up(grpc.Address{Addr: endpoints[2]})
  45. addrFirst, putFun, err := sb.Get(context.Background(), unblockingOpts)
  46. if err != nil {
  47. t.Errorf("Get() with up endpoints should success, got %v", err)
  48. }
  49. if addrFirst.Addr != endpoints[1] {
  50. t.Errorf("Get() didn't return expected address, got %v", addrFirst)
  51. }
  52. if putFun == nil {
  53. t.Errorf("Get() returned unexpected nil put function")
  54. }
  55. addrSecond, _, _ := sb.Get(context.Background(), unblockingOpts)
  56. if addrFirst.Addr != addrSecond.Addr {
  57. t.Errorf("Get() didn't return the same address as previous call, got %v and %v", addrFirst, addrSecond)
  58. }
  59. down1(errors.New("error"))
  60. if addrs := <-sb.Notify(); len(addrs) != len(endpoints) {
  61. t.Errorf("closing the only connection should triggered balancer to send the all endpoints via Notify chan so that we can establish a connection")
  62. }
  63. down2(errors.New("error"))
  64. _, _, err = sb.Get(context.Background(), unblockingOpts)
  65. if err != ErrNoAddrAvilable {
  66. t.Errorf("Get() with no up endpoints should return ErrNoAddrAvailable, got: %v", err)
  67. }
  68. }
  69. func TestBalancerGetBlocking(t *testing.T) {
  70. sb := newSimpleBalancer(endpoints)
  71. defer sb.Close()
  72. if addrs := <-sb.Notify(); len(addrs) != len(endpoints) {
  73. t.Errorf("Initialize newSimpleBalancer should have triggered Notify() chan, but it didn't")
  74. }
  75. blockingOpts := grpc.BalancerGetOptions{BlockingWait: true}
  76. ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*100)
  77. _, _, err := sb.Get(ctx, blockingOpts)
  78. cancel()
  79. if err != context.DeadlineExceeded {
  80. t.Errorf("Get() with no up endpoints should timeout, got %v", err)
  81. }
  82. downC := make(chan func(error), 1)
  83. go func() {
  84. // ensure sb.Up() will be called after sb.Get() to see if Up() releases blocking Get()
  85. time.Sleep(time.Millisecond * 100)
  86. f := sb.Up(grpc.Address{Addr: endpoints[1]})
  87. if addrs := <-sb.Notify(); len(addrs) != 1 {
  88. t.Errorf("first Up() should have triggered balancer to send the first connected address via Notify chan so that other connections can be closed")
  89. }
  90. downC <- f
  91. }()
  92. addrFirst, putFun, err := sb.Get(context.Background(), blockingOpts)
  93. if err != nil {
  94. t.Errorf("Get() with up endpoints should success, got %v", err)
  95. }
  96. if addrFirst.Addr != endpoints[1] {
  97. t.Errorf("Get() didn't return expected address, got %v", addrFirst)
  98. }
  99. if putFun == nil {
  100. t.Errorf("Get() returned unexpected nil put function")
  101. }
  102. down1 := <-downC
  103. down2 := sb.Up(grpc.Address{Addr: endpoints[2]})
  104. addrSecond, _, _ := sb.Get(context.Background(), blockingOpts)
  105. if addrFirst.Addr != addrSecond.Addr {
  106. t.Errorf("Get() didn't return the same address as previous call, got %v and %v", addrFirst, addrSecond)
  107. }
  108. down1(errors.New("error"))
  109. if addrs := <-sb.Notify(); len(addrs) != len(endpoints) {
  110. t.Errorf("closing the only connection should triggered balancer to send the all endpoints via Notify chan so that we can establish a connection")
  111. }
  112. down2(errors.New("error"))
  113. ctx, cancel = context.WithTimeout(context.Background(), time.Millisecond*100)
  114. _, _, err = sb.Get(ctx, blockingOpts)
  115. cancel()
  116. if err != context.DeadlineExceeded {
  117. t.Errorf("Get() with no up endpoints should timeout, got %v", err)
  118. }
  119. }
  120. // TestHealthBalancerGraylist checks one endpoint is tried after the other
  121. // due to gray listing.
  122. func TestHealthBalancerGraylist(t *testing.T) {
  123. var wg sync.WaitGroup
  124. // Use 3 endpoints so gray list doesn't fallback to all connections
  125. // after failing on 2 endpoints.
  126. lns, eps := make([]net.Listener, 3), make([]string, 3)
  127. wg.Add(3)
  128. connc := make(chan string, 2)
  129. for i := range eps {
  130. ln, err := net.Listen("tcp", ":0")
  131. testutil.AssertNil(t, err)
  132. lns[i], eps[i] = ln, ln.Addr().String()
  133. go func() {
  134. defer wg.Done()
  135. for {
  136. conn, err := ln.Accept()
  137. if err != nil {
  138. return
  139. }
  140. _, err = conn.Read(make([]byte, 512))
  141. conn.Close()
  142. if err == nil {
  143. select {
  144. case connc <- ln.Addr().String():
  145. // sleep some so balancer catches up
  146. // before attempted next reconnect.
  147. time.Sleep(50 * time.Millisecond)
  148. default:
  149. }
  150. }
  151. }
  152. }()
  153. }
  154. sb := newSimpleBalancer(eps)
  155. tf := func(s string) (bool, error) { return false, nil }
  156. hb := newHealthBalancer(sb, 5*time.Second, tf)
  157. conn, err := grpc.Dial("", grpc.WithInsecure(), grpc.WithBalancer(hb))
  158. testutil.AssertNil(t, err)
  159. defer conn.Close()
  160. kvc := pb.NewKVClient(conn)
  161. <-hb.ready()
  162. kvc.Range(context.TODO(), &pb.RangeRequest{})
  163. ep1 := <-connc
  164. kvc.Range(context.TODO(), &pb.RangeRequest{})
  165. ep2 := <-connc
  166. for _, ln := range lns {
  167. ln.Close()
  168. }
  169. wg.Wait()
  170. if ep1 == ep2 {
  171. t.Fatalf("expected %q != %q", ep1, ep2)
  172. }
  173. }
  174. // TestBalancerDoNotBlockOnClose ensures that balancer and grpc don't deadlock each other
  175. // due to rapid open/close conn. The deadlock causes balancer.Close() to block forever.
  176. // See issue: https://github.com/coreos/etcd/issues/7283 for more detail.
  177. func TestBalancerDoNotBlockOnClose(t *testing.T) {
  178. defer testutil.AfterTest(t)
  179. kcl := newKillConnListener(t, 3)
  180. defer kcl.close()
  181. for i := 0; i < 5; i++ {
  182. sb := newSimpleBalancer(kcl.endpoints())
  183. conn, err := grpc.Dial("", grpc.WithInsecure(), grpc.WithBalancer(sb))
  184. if err != nil {
  185. t.Fatal(err)
  186. }
  187. kvc := pb.NewKVClient(conn)
  188. <-sb.readyc
  189. var wg sync.WaitGroup
  190. wg.Add(100)
  191. cctx, cancel := context.WithCancel(context.TODO())
  192. for j := 0; j < 100; j++ {
  193. go func() {
  194. defer wg.Done()
  195. kvc.Range(cctx, &pb.RangeRequest{}, grpc.FailFast(false))
  196. }()
  197. }
  198. // balancer.Close() might block
  199. // if balancer and grpc deadlock each other.
  200. bclosec, cclosec := make(chan struct{}), make(chan struct{})
  201. go func() {
  202. defer close(bclosec)
  203. sb.Close()
  204. }()
  205. go func() {
  206. defer close(cclosec)
  207. conn.Close()
  208. }()
  209. select {
  210. case <-bclosec:
  211. case <-time.After(3 * time.Second):
  212. testutil.FatalStack(t, "balancer close timeout")
  213. }
  214. select {
  215. case <-cclosec:
  216. case <-time.After(3 * time.Second):
  217. t.Fatal("grpc conn close timeout")
  218. }
  219. cancel()
  220. wg.Wait()
  221. }
  222. }
  223. // killConnListener listens incoming conn and kills it immediately.
  224. type killConnListener struct {
  225. wg sync.WaitGroup
  226. eps []string
  227. stopc chan struct{}
  228. t *testing.T
  229. }
  230. func newKillConnListener(t *testing.T, size int) *killConnListener {
  231. kcl := &killConnListener{stopc: make(chan struct{}), t: t}
  232. for i := 0; i < size; i++ {
  233. ln, err := net.Listen("tcp", ":0")
  234. if err != nil {
  235. t.Fatal(err)
  236. }
  237. kcl.eps = append(kcl.eps, ln.Addr().String())
  238. kcl.wg.Add(1)
  239. go kcl.listen(ln)
  240. }
  241. return kcl
  242. }
  243. func (kcl *killConnListener) endpoints() []string {
  244. return kcl.eps
  245. }
  246. func (kcl *killConnListener) listen(l net.Listener) {
  247. go func() {
  248. defer kcl.wg.Done()
  249. for {
  250. conn, err := l.Accept()
  251. select {
  252. case <-kcl.stopc:
  253. return
  254. default:
  255. }
  256. if err != nil {
  257. kcl.t.Fatal(err)
  258. }
  259. time.Sleep(1 * time.Millisecond)
  260. conn.Close()
  261. }
  262. }()
  263. <-kcl.stopc
  264. l.Close()
  265. }
  266. func (kcl *killConnListener) close() {
  267. close(kcl.stopc)
  268. kcl.wg.Wait()
  269. }