balancer.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408
  1. /*
  2. *
  3. * Copyright 2016, Google Inc.
  4. * All rights reserved.
  5. *
  6. * Redistribution and use in source and binary forms, with or without
  7. * modification, are permitted provided that the following conditions are
  8. * met:
  9. *
  10. * * Redistributions of source code must retain the above copyright
  11. * notice, this list of conditions and the following disclaimer.
  12. * * Redistributions in binary form must reproduce the above
  13. * copyright notice, this list of conditions and the following disclaimer
  14. * in the documentation and/or other materials provided with the
  15. * distribution.
  16. * * Neither the name of Google Inc. nor the names of its
  17. * contributors may be used to endorse or promote products derived from
  18. * this software without specific prior written permission.
  19. *
  20. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  21. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  22. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  23. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  24. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  25. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  26. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  27. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  28. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  29. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  30. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31. *
  32. */
  33. package grpc
  34. import (
  35. "fmt"
  36. "net"
  37. "sync"
  38. "golang.org/x/net/context"
  39. "google.golang.org/grpc/codes"
  40. "google.golang.org/grpc/credentials"
  41. "google.golang.org/grpc/grpclog"
  42. "google.golang.org/grpc/naming"
  43. )
  44. // Address represents a server the client connects to.
  45. // This is the EXPERIMENTAL API and may be changed or extended in the future.
  46. type Address struct {
  47. // Addr is the server address on which a connection will be established.
  48. Addr string
  49. // Metadata is the information associated with Addr, which may be used
  50. // to make load balancing decision.
  51. Metadata interface{}
  52. }
  53. // BalancerConfig specifies the configurations for Balancer.
  54. type BalancerConfig struct {
  55. // DialCreds is the transport credential the Balancer implementation can
  56. // use to dial to a remote load balancer server. The Balancer implementations
  57. // can ignore this if it does not need to talk to another party securely.
  58. DialCreds credentials.TransportCredentials
  59. // Dialer is the custom dialer the Balancer implementation can use to dial
  60. // to a remote load balancer server. The Balancer implementations
  61. // can ignore this if it doesn't need to talk to remote balancer.
  62. Dialer func(context.Context, string) (net.Conn, error)
  63. }
  64. // BalancerGetOptions configures a Get call.
  65. // This is the EXPERIMENTAL API and may be changed or extended in the future.
  66. type BalancerGetOptions struct {
  67. // BlockingWait specifies whether Get should block when there is no
  68. // connected address.
  69. BlockingWait bool
  70. }
  71. // Balancer chooses network addresses for RPCs.
  72. // This is the EXPERIMENTAL API and may be changed or extended in the future.
  73. type Balancer interface {
  74. // Start does the initialization work to bootstrap a Balancer. For example,
  75. // this function may start the name resolution and watch the updates. It will
  76. // be called when dialing.
  77. Start(target string, config BalancerConfig) error
  78. // Up informs the Balancer that gRPC has a connection to the server at
  79. // addr. It returns down which is called once the connection to addr gets
  80. // lost or closed.
  81. // TODO: It is not clear how to construct and take advantage of the meaningful error
  82. // parameter for down. Need realistic demands to guide.
  83. Up(addr Address) (down func(error))
  84. // Get gets the address of a server for the RPC corresponding to ctx.
  85. // i) If it returns a connected address, gRPC internals issues the RPC on the
  86. // connection to this address;
  87. // ii) If it returns an address on which the connection is under construction
  88. // (initiated by Notify(...)) but not connected, gRPC internals
  89. // * fails RPC if the RPC is fail-fast and connection is in the TransientFailure or
  90. // Shutdown state;
  91. // or
  92. // * issues RPC on the connection otherwise.
  93. // iii) If it returns an address on which the connection does not exist, gRPC
  94. // internals treats it as an error and will fail the corresponding RPC.
  95. //
  96. // Therefore, the following is the recommended rule when writing a custom Balancer.
  97. // If opts.BlockingWait is true, it should return a connected address or
  98. // block if there is no connected address. It should respect the timeout or
  99. // cancellation of ctx when blocking. If opts.BlockingWait is false (for fail-fast
  100. // RPCs), it should return an address it has notified via Notify(...) immediately
  101. // instead of blocking.
  102. //
  103. // The function returns put which is called once the rpc has completed or failed.
  104. // put can collect and report RPC stats to a remote load balancer.
  105. //
  106. // This function should only return the errors Balancer cannot recover by itself.
  107. // gRPC internals will fail the RPC if an error is returned.
  108. Get(ctx context.Context, opts BalancerGetOptions) (addr Address, put func(), err error)
  109. // Notify returns a channel that is used by gRPC internals to watch the addresses
  110. // gRPC needs to connect. The addresses might be from a name resolver or remote
  111. // load balancer. gRPC internals will compare it with the existing connected
  112. // addresses. If the address Balancer notified is not in the existing connected
  113. // addresses, gRPC starts to connect the address. If an address in the existing
  114. // connected addresses is not in the notification list, the corresponding connection
  115. // is shutdown gracefully. Otherwise, there are no operations to take. Note that
  116. // the Address slice must be the full list of the Addresses which should be connected.
  117. // It is NOT delta.
  118. Notify() <-chan []Address
  119. // Close shuts down the balancer.
  120. Close() error
  121. }
  122. // downErr implements net.Error. It is constructed by gRPC internals and passed to the down
  123. // call of Balancer.
  124. type downErr struct {
  125. timeout bool
  126. temporary bool
  127. desc string
  128. }
  129. func (e downErr) Error() string { return e.desc }
  130. func (e downErr) Timeout() bool { return e.timeout }
  131. func (e downErr) Temporary() bool { return e.temporary }
  132. func downErrorf(timeout, temporary bool, format string, a ...interface{}) downErr {
  133. return downErr{
  134. timeout: timeout,
  135. temporary: temporary,
  136. desc: fmt.Sprintf(format, a...),
  137. }
  138. }
  139. // RoundRobin returns a Balancer that selects addresses round-robin. It uses r to watch
  140. // the name resolution updates and updates the addresses available correspondingly.
  141. func RoundRobin(r naming.Resolver) Balancer {
  142. return &roundRobin{r: r}
  143. }
  144. type addrInfo struct {
  145. addr Address
  146. connected bool
  147. }
  148. type roundRobin struct {
  149. r naming.Resolver
  150. w naming.Watcher
  151. addrs []*addrInfo // all the addresses the client should potentially connect
  152. mu sync.Mutex
  153. addrCh chan []Address // the channel to notify gRPC internals the list of addresses the client should connect to.
  154. next int // index of the next address to return for Get()
  155. waitCh chan struct{} // the channel to block when there is no connected address available
  156. done bool // The Balancer is closed.
  157. }
  158. func (rr *roundRobin) watchAddrUpdates() error {
  159. updates, err := rr.w.Next()
  160. if err != nil {
  161. grpclog.Printf("grpc: the naming watcher stops working due to %v.\n", err)
  162. return err
  163. }
  164. rr.mu.Lock()
  165. defer rr.mu.Unlock()
  166. for _, update := range updates {
  167. addr := Address{
  168. Addr: update.Addr,
  169. Metadata: update.Metadata,
  170. }
  171. switch update.Op {
  172. case naming.Add:
  173. var exist bool
  174. for _, v := range rr.addrs {
  175. if addr == v.addr {
  176. exist = true
  177. grpclog.Println("grpc: The name resolver wanted to add an existing address: ", addr)
  178. break
  179. }
  180. }
  181. if exist {
  182. continue
  183. }
  184. rr.addrs = append(rr.addrs, &addrInfo{addr: addr})
  185. case naming.Delete:
  186. for i, v := range rr.addrs {
  187. if addr == v.addr {
  188. copy(rr.addrs[i:], rr.addrs[i+1:])
  189. rr.addrs = rr.addrs[:len(rr.addrs)-1]
  190. break
  191. }
  192. }
  193. default:
  194. grpclog.Println("Unknown update.Op ", update.Op)
  195. }
  196. }
  197. // Make a copy of rr.addrs and write it onto rr.addrCh so that gRPC internals gets notified.
  198. open := make([]Address, len(rr.addrs))
  199. for i, v := range rr.addrs {
  200. open[i] = v.addr
  201. }
  202. if rr.done {
  203. return ErrClientConnClosing
  204. }
  205. rr.addrCh <- open
  206. return nil
  207. }
  208. func (rr *roundRobin) Start(target string, config BalancerConfig) error {
  209. rr.mu.Lock()
  210. defer rr.mu.Unlock()
  211. if rr.done {
  212. return ErrClientConnClosing
  213. }
  214. if rr.r == nil {
  215. // If there is no name resolver installed, it is not needed to
  216. // do name resolution. In this case, target is added into rr.addrs
  217. // as the only address available and rr.addrCh stays nil.
  218. rr.addrs = append(rr.addrs, &addrInfo{addr: Address{Addr: target}})
  219. return nil
  220. }
  221. w, err := rr.r.Resolve(target)
  222. if err != nil {
  223. return err
  224. }
  225. rr.w = w
  226. rr.addrCh = make(chan []Address)
  227. go func() {
  228. for {
  229. if err := rr.watchAddrUpdates(); err != nil {
  230. return
  231. }
  232. }
  233. }()
  234. return nil
  235. }
  236. // Up sets the connected state of addr and sends notification if there are pending
  237. // Get() calls.
  238. func (rr *roundRobin) Up(addr Address) func(error) {
  239. rr.mu.Lock()
  240. defer rr.mu.Unlock()
  241. var cnt int
  242. for _, a := range rr.addrs {
  243. if a.addr == addr {
  244. if a.connected {
  245. return nil
  246. }
  247. a.connected = true
  248. }
  249. if a.connected {
  250. cnt++
  251. }
  252. }
  253. // addr is only one which is connected. Notify the Get() callers who are blocking.
  254. if cnt == 1 && rr.waitCh != nil {
  255. close(rr.waitCh)
  256. rr.waitCh = nil
  257. }
  258. return func(err error) {
  259. rr.down(addr, err)
  260. }
  261. }
  262. // down unsets the connected state of addr.
  263. func (rr *roundRobin) down(addr Address, err error) {
  264. rr.mu.Lock()
  265. defer rr.mu.Unlock()
  266. for _, a := range rr.addrs {
  267. if addr == a.addr {
  268. a.connected = false
  269. break
  270. }
  271. }
  272. }
  273. // Get returns the next addr in the rotation.
  274. func (rr *roundRobin) Get(ctx context.Context, opts BalancerGetOptions) (addr Address, put func(), err error) {
  275. var ch chan struct{}
  276. rr.mu.Lock()
  277. if rr.done {
  278. rr.mu.Unlock()
  279. err = ErrClientConnClosing
  280. return
  281. }
  282. if len(rr.addrs) > 0 {
  283. if rr.next >= len(rr.addrs) {
  284. rr.next = 0
  285. }
  286. next := rr.next
  287. for {
  288. a := rr.addrs[next]
  289. next = (next + 1) % len(rr.addrs)
  290. if a.connected {
  291. addr = a.addr
  292. rr.next = next
  293. rr.mu.Unlock()
  294. return
  295. }
  296. if next == rr.next {
  297. // Has iterated all the possible address but none is connected.
  298. break
  299. }
  300. }
  301. }
  302. if !opts.BlockingWait {
  303. if len(rr.addrs) == 0 {
  304. rr.mu.Unlock()
  305. err = Errorf(codes.Unavailable, "there is no address available")
  306. return
  307. }
  308. // Returns the next addr on rr.addrs for failfast RPCs.
  309. addr = rr.addrs[rr.next].addr
  310. rr.next++
  311. rr.mu.Unlock()
  312. return
  313. }
  314. // Wait on rr.waitCh for non-failfast RPCs.
  315. if rr.waitCh == nil {
  316. ch = make(chan struct{})
  317. rr.waitCh = ch
  318. } else {
  319. ch = rr.waitCh
  320. }
  321. rr.mu.Unlock()
  322. for {
  323. select {
  324. case <-ctx.Done():
  325. err = ctx.Err()
  326. return
  327. case <-ch:
  328. rr.mu.Lock()
  329. if rr.done {
  330. rr.mu.Unlock()
  331. err = ErrClientConnClosing
  332. return
  333. }
  334. if len(rr.addrs) > 0 {
  335. if rr.next >= len(rr.addrs) {
  336. rr.next = 0
  337. }
  338. next := rr.next
  339. for {
  340. a := rr.addrs[next]
  341. next = (next + 1) % len(rr.addrs)
  342. if a.connected {
  343. addr = a.addr
  344. rr.next = next
  345. rr.mu.Unlock()
  346. return
  347. }
  348. if next == rr.next {
  349. // Has iterated all the possible address but none is connected.
  350. break
  351. }
  352. }
  353. }
  354. // The newly added addr got removed by Down() again.
  355. if rr.waitCh == nil {
  356. ch = make(chan struct{})
  357. rr.waitCh = ch
  358. } else {
  359. ch = rr.waitCh
  360. }
  361. rr.mu.Unlock()
  362. }
  363. }
  364. }
  365. func (rr *roundRobin) Notify() <-chan []Address {
  366. return rr.addrCh
  367. }
  368. func (rr *roundRobin) Close() error {
  369. rr.mu.Lock()
  370. defer rr.mu.Unlock()
  371. if rr.done {
  372. return errBalancerClosed
  373. }
  374. rr.done = true
  375. if rr.w != nil {
  376. rr.w.Close()
  377. }
  378. if rr.waitCh != nil {
  379. close(rr.waitCh)
  380. rr.waitCh = nil
  381. }
  382. if rr.addrCh != nil {
  383. close(rr.addrCh)
  384. }
  385. return nil
  386. }