clientconn.go 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611
  1. /*
  2. *
  3. * Copyright 2014, Google Inc.
  4. * All rights reserved.
  5. *
  6. * Redistribution and use in source and binary forms, with or without
  7. * modification, are permitted provided that the following conditions are
  8. * met:
  9. *
  10. * * Redistributions of source code must retain the above copyright
  11. * notice, this list of conditions and the following disclaimer.
  12. * * Redistributions in binary form must reproduce the above
  13. * copyright notice, this list of conditions and the following disclaimer
  14. * in the documentation and/or other materials provided with the
  15. * distribution.
  16. * * Neither the name of Google Inc. nor the names of its
  17. * contributors may be used to endorse or promote products derived from
  18. * this software without specific prior written permission.
  19. *
  20. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  21. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  22. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  23. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  24. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  25. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  26. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  27. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  28. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  29. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  30. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31. *
  32. */
  33. package grpc
  34. import (
  35. "errors"
  36. "fmt"
  37. "net"
  38. "strings"
  39. "sync"
  40. "time"
  41. "golang.org/x/net/context"
  42. "golang.org/x/net/trace"
  43. "google.golang.org/grpc/credentials"
  44. "google.golang.org/grpc/grpclog"
  45. "google.golang.org/grpc/transport"
  46. )
  47. var (
  48. // ErrUnspecTarget indicates that the target address is unspecified.
  49. ErrUnspecTarget = errors.New("grpc: target is unspecified")
  50. // ErrNoTransportSecurity indicates that there is no transport security
  51. // being set for ClientConn. Users should either set one or explicitly
  52. // call WithInsecure DialOption to disable security.
  53. ErrNoTransportSecurity = errors.New("grpc: no transport security set (use grpc.WithInsecure() explicitly or set credentials)")
  54. // ErrCredentialsMisuse indicates that users want to transmit security information
  55. // (e.g., oauth2 token) which requires secure connection on an insecure
  56. // connection.
  57. ErrCredentialsMisuse = errors.New("grpc: the credentials require transport level security (use grpc.WithTransportAuthenticator() to set)")
  58. // ErrClientConnClosing indicates that the operation is illegal because
  59. // the session is closing.
  60. ErrClientConnClosing = errors.New("grpc: the client connection is closing")
  61. // ErrClientConnTimeout indicates that the connection could not be
  62. // established or re-established within the specified timeout.
  63. ErrClientConnTimeout = errors.New("grpc: timed out trying to connect")
  64. // minimum time to give a connection to complete
  65. minConnectTimeout = 20 * time.Second
  66. )
  67. // dialOptions configure a Dial call. dialOptions are set by the DialOption
  68. // values passed to Dial.
  69. type dialOptions struct {
  70. codec Codec
  71. cp Compressor
  72. dc Decompressor
  73. bs backoffStrategy
  74. picker Picker
  75. block bool
  76. insecure bool
  77. copts transport.ConnectOptions
  78. }
  79. // DialOption configures how we set up the connection.
  80. type DialOption func(*dialOptions)
  81. // WithCodec returns a DialOption which sets a codec for message marshaling and unmarshaling.
  82. func WithCodec(c Codec) DialOption {
  83. return func(o *dialOptions) {
  84. o.codec = c
  85. }
  86. }
  87. // WithCompressor returns a DialOption which sets a CompressorGenerator for generating message
  88. // compressor.
  89. func WithCompressor(cp Compressor) DialOption {
  90. return func(o *dialOptions) {
  91. o.cp = cp
  92. }
  93. }
  94. // WithDecompressor returns a DialOption which sets a DecompressorGenerator for generating
  95. // message decompressor.
  96. func WithDecompressor(dc Decompressor) DialOption {
  97. return func(o *dialOptions) {
  98. o.dc = dc
  99. }
  100. }
  101. // WithPicker returns a DialOption which sets a picker for connection selection.
  102. func WithPicker(p Picker) DialOption {
  103. return func(o *dialOptions) {
  104. o.picker = p
  105. }
  106. }
  107. // WithBackoffConfig configures the dialer to use the provided backoff
  108. // parameters after connection failures.
  109. func WithBackoffConfig(b *BackoffConfig) DialOption {
  110. return withBackoff(b)
  111. }
  112. // withBackoff sets the backoff strategy used for retries after a
  113. // failed connection attempt.
  114. //
  115. // This can be exported if arbitrary backoff strategies are allowed by GRPC.
  116. func withBackoff(bs backoffStrategy) DialOption {
  117. return func(o *dialOptions) {
  118. o.bs = bs
  119. }
  120. }
  121. // WithBlock returns a DialOption which makes caller of Dial blocks until the underlying
  122. // connection is up. Without this, Dial returns immediately and connecting the server
  123. // happens in background.
  124. func WithBlock() DialOption {
  125. return func(o *dialOptions) {
  126. o.block = true
  127. }
  128. }
  129. // WithInsecure returns a DialOption which disables transport security for this ClientConn.
  130. // Note that transport security is required unless WithInsecure is set.
  131. func WithInsecure() DialOption {
  132. return func(o *dialOptions) {
  133. o.insecure = true
  134. }
  135. }
  136. // WithTransportCredentials returns a DialOption which configures a
  137. // connection level security credentials (e.g., TLS/SSL).
  138. func WithTransportCredentials(creds credentials.TransportAuthenticator) DialOption {
  139. return func(o *dialOptions) {
  140. o.copts.AuthOptions = append(o.copts.AuthOptions, creds)
  141. }
  142. }
  143. // WithPerRPCCredentials returns a DialOption which sets
  144. // credentials which will place auth state on each outbound RPC.
  145. func WithPerRPCCredentials(creds credentials.Credentials) DialOption {
  146. return func(o *dialOptions) {
  147. o.copts.AuthOptions = append(o.copts.AuthOptions, creds)
  148. }
  149. }
  150. // WithTimeout returns a DialOption that configures a timeout for dialing a client connection.
  151. func WithTimeout(d time.Duration) DialOption {
  152. return func(o *dialOptions) {
  153. o.copts.Timeout = d
  154. }
  155. }
  156. // WithDialer returns a DialOption that specifies a function to use for dialing network addresses.
  157. func WithDialer(f func(addr string, timeout time.Duration) (net.Conn, error)) DialOption {
  158. return func(o *dialOptions) {
  159. o.copts.Dialer = f
  160. }
  161. }
  162. // WithUserAgent returns a DialOption that specifies a user agent string for all the RPCs.
  163. func WithUserAgent(s string) DialOption {
  164. return func(o *dialOptions) {
  165. o.copts.UserAgent = s
  166. }
  167. }
  168. // Dial creates a client connection the given target.
  169. func Dial(target string, opts ...DialOption) (*ClientConn, error) {
  170. cc := &ClientConn{
  171. target: target,
  172. }
  173. for _, opt := range opts {
  174. opt(&cc.dopts)
  175. }
  176. if cc.dopts.codec == nil {
  177. // Set the default codec.
  178. cc.dopts.codec = protoCodec{}
  179. }
  180. if cc.dopts.bs == nil {
  181. cc.dopts.bs = DefaultBackoffConfig
  182. }
  183. if cc.dopts.picker == nil {
  184. cc.dopts.picker = &unicastPicker{
  185. target: target,
  186. }
  187. }
  188. if err := cc.dopts.picker.Init(cc); err != nil {
  189. return nil, err
  190. }
  191. colonPos := strings.LastIndex(target, ":")
  192. if colonPos == -1 {
  193. colonPos = len(target)
  194. }
  195. cc.authority = target[:colonPos]
  196. return cc, nil
  197. }
  198. // ConnectivityState indicates the state of a client connection.
  199. type ConnectivityState int
  200. const (
  201. // Idle indicates the ClientConn is idle.
  202. Idle ConnectivityState = iota
  203. // Connecting indicates the ClienConn is connecting.
  204. Connecting
  205. // Ready indicates the ClientConn is ready for work.
  206. Ready
  207. // TransientFailure indicates the ClientConn has seen a failure but expects to recover.
  208. TransientFailure
  209. // Shutdown indicates the ClientConn has started shutting down.
  210. Shutdown
  211. )
  212. func (s ConnectivityState) String() string {
  213. switch s {
  214. case Idle:
  215. return "IDLE"
  216. case Connecting:
  217. return "CONNECTING"
  218. case Ready:
  219. return "READY"
  220. case TransientFailure:
  221. return "TRANSIENT_FAILURE"
  222. case Shutdown:
  223. return "SHUTDOWN"
  224. default:
  225. panic(fmt.Sprintf("unknown connectivity state: %d", s))
  226. }
  227. }
  228. // ClientConn represents a client connection to an RPC service.
  229. type ClientConn struct {
  230. target string
  231. authority string
  232. dopts dialOptions
  233. }
  234. // State returns the connectivity state of cc.
  235. // This is EXPERIMENTAL API.
  236. func (cc *ClientConn) State() (ConnectivityState, error) {
  237. return cc.dopts.picker.State()
  238. }
  239. // WaitForStateChange blocks until the state changes to something other than the sourceState.
  240. // It returns the new state or error.
  241. // This is EXPERIMENTAL API.
  242. func (cc *ClientConn) WaitForStateChange(ctx context.Context, sourceState ConnectivityState) (ConnectivityState, error) {
  243. return cc.dopts.picker.WaitForStateChange(ctx, sourceState)
  244. }
  245. // Close starts to tear down the ClientConn.
  246. func (cc *ClientConn) Close() error {
  247. return cc.dopts.picker.Close()
  248. }
  249. // Conn is a client connection to a single destination.
  250. type Conn struct {
  251. target string
  252. dopts dialOptions
  253. resetChan chan int
  254. shutdownChan chan struct{}
  255. events trace.EventLog
  256. mu sync.Mutex
  257. state ConnectivityState
  258. stateCV *sync.Cond
  259. // ready is closed and becomes nil when a new transport is up or failed
  260. // due to timeout.
  261. ready chan struct{}
  262. transport transport.ClientTransport
  263. }
  264. // NewConn creates a Conn.
  265. func NewConn(cc *ClientConn) (*Conn, error) {
  266. if cc.target == "" {
  267. return nil, ErrUnspecTarget
  268. }
  269. c := &Conn{
  270. target: cc.target,
  271. dopts: cc.dopts,
  272. resetChan: make(chan int, 1),
  273. shutdownChan: make(chan struct{}),
  274. }
  275. if EnableTracing {
  276. c.events = trace.NewEventLog("grpc.ClientConn", c.target)
  277. }
  278. if !c.dopts.insecure {
  279. var ok bool
  280. for _, cd := range c.dopts.copts.AuthOptions {
  281. if _, ok = cd.(credentials.TransportAuthenticator); ok {
  282. break
  283. }
  284. }
  285. if !ok {
  286. return nil, ErrNoTransportSecurity
  287. }
  288. } else {
  289. for _, cd := range c.dopts.copts.AuthOptions {
  290. if cd.RequireTransportSecurity() {
  291. return nil, ErrCredentialsMisuse
  292. }
  293. }
  294. }
  295. c.stateCV = sync.NewCond(&c.mu)
  296. if c.dopts.block {
  297. if err := c.resetTransport(false); err != nil {
  298. c.Close()
  299. return nil, err
  300. }
  301. // Start to monitor the error status of transport.
  302. go c.transportMonitor()
  303. } else {
  304. // Start a goroutine connecting to the server asynchronously.
  305. go func() {
  306. if err := c.resetTransport(false); err != nil {
  307. grpclog.Printf("Failed to dial %s: %v; please retry.", c.target, err)
  308. c.Close()
  309. return
  310. }
  311. c.transportMonitor()
  312. }()
  313. }
  314. return c, nil
  315. }
  316. // printf records an event in cc's event log, unless cc has been closed.
  317. // REQUIRES cc.mu is held.
  318. func (cc *Conn) printf(format string, a ...interface{}) {
  319. if cc.events != nil {
  320. cc.events.Printf(format, a...)
  321. }
  322. }
  323. // errorf records an error in cc's event log, unless cc has been closed.
  324. // REQUIRES cc.mu is held.
  325. func (cc *Conn) errorf(format string, a ...interface{}) {
  326. if cc.events != nil {
  327. cc.events.Errorf(format, a...)
  328. }
  329. }
  330. // State returns the connectivity state of the Conn
  331. func (cc *Conn) State() ConnectivityState {
  332. cc.mu.Lock()
  333. defer cc.mu.Unlock()
  334. return cc.state
  335. }
  336. // WaitForStateChange blocks until the state changes to something other than the sourceState.
  337. func (cc *Conn) WaitForStateChange(ctx context.Context, sourceState ConnectivityState) (ConnectivityState, error) {
  338. cc.mu.Lock()
  339. defer cc.mu.Unlock()
  340. if sourceState != cc.state {
  341. return cc.state, nil
  342. }
  343. done := make(chan struct{})
  344. var err error
  345. go func() {
  346. select {
  347. case <-ctx.Done():
  348. cc.mu.Lock()
  349. err = ctx.Err()
  350. cc.stateCV.Broadcast()
  351. cc.mu.Unlock()
  352. case <-done:
  353. }
  354. }()
  355. defer close(done)
  356. for sourceState == cc.state {
  357. cc.stateCV.Wait()
  358. if err != nil {
  359. return cc.state, err
  360. }
  361. }
  362. return cc.state, nil
  363. }
  364. // NotifyReset tries to signal the underlying transport needs to be reset due to
  365. // for example a name resolution change in flight.
  366. func (cc *Conn) NotifyReset() {
  367. select {
  368. case cc.resetChan <- 0:
  369. default:
  370. }
  371. }
  372. func (cc *Conn) resetTransport(closeTransport bool) error {
  373. var retries int
  374. start := time.Now()
  375. for {
  376. cc.mu.Lock()
  377. cc.printf("connecting")
  378. if cc.state == Shutdown {
  379. // cc.Close() has been invoked.
  380. cc.mu.Unlock()
  381. return ErrClientConnClosing
  382. }
  383. cc.state = Connecting
  384. cc.stateCV.Broadcast()
  385. cc.mu.Unlock()
  386. if closeTransport {
  387. cc.transport.Close()
  388. }
  389. // Adjust timeout for the current try.
  390. copts := cc.dopts.copts
  391. if copts.Timeout < 0 {
  392. cc.Close()
  393. return ErrClientConnTimeout
  394. }
  395. if copts.Timeout > 0 {
  396. copts.Timeout -= time.Since(start)
  397. if copts.Timeout <= 0 {
  398. cc.Close()
  399. return ErrClientConnTimeout
  400. }
  401. }
  402. sleepTime := cc.dopts.bs.backoff(retries)
  403. timeout := sleepTime
  404. if timeout < minConnectTimeout {
  405. timeout = minConnectTimeout
  406. }
  407. if copts.Timeout == 0 || copts.Timeout > timeout {
  408. copts.Timeout = timeout
  409. }
  410. connectTime := time.Now()
  411. addr, err := cc.dopts.picker.PickAddr()
  412. var newTransport transport.ClientTransport
  413. if err == nil {
  414. newTransport, err = transport.NewClientTransport(addr, &copts)
  415. }
  416. if err != nil {
  417. cc.mu.Lock()
  418. if cc.state == Shutdown {
  419. // cc.Close() has been invoked.
  420. cc.mu.Unlock()
  421. return ErrClientConnClosing
  422. }
  423. cc.errorf("transient failure: %v", err)
  424. cc.state = TransientFailure
  425. cc.stateCV.Broadcast()
  426. if cc.ready != nil {
  427. close(cc.ready)
  428. cc.ready = nil
  429. }
  430. cc.mu.Unlock()
  431. sleepTime -= time.Since(connectTime)
  432. if sleepTime < 0 {
  433. sleepTime = 0
  434. }
  435. // Fail early before falling into sleep.
  436. if cc.dopts.copts.Timeout > 0 && cc.dopts.copts.Timeout < sleepTime+time.Since(start) {
  437. cc.mu.Lock()
  438. cc.errorf("connection timeout")
  439. cc.mu.Unlock()
  440. cc.Close()
  441. return ErrClientConnTimeout
  442. }
  443. closeTransport = false
  444. time.Sleep(sleepTime)
  445. retries++
  446. grpclog.Printf("grpc: Conn.resetTransport failed to create client transport: %v; Reconnecting to %q", err, cc.target)
  447. continue
  448. }
  449. cc.mu.Lock()
  450. cc.printf("ready")
  451. if cc.state == Shutdown {
  452. // cc.Close() has been invoked.
  453. cc.mu.Unlock()
  454. newTransport.Close()
  455. return ErrClientConnClosing
  456. }
  457. cc.state = Ready
  458. cc.stateCV.Broadcast()
  459. cc.transport = newTransport
  460. if cc.ready != nil {
  461. close(cc.ready)
  462. cc.ready = nil
  463. }
  464. cc.mu.Unlock()
  465. return nil
  466. }
  467. }
  468. func (cc *Conn) reconnect() bool {
  469. cc.mu.Lock()
  470. if cc.state == Shutdown {
  471. // cc.Close() has been invoked.
  472. cc.mu.Unlock()
  473. return false
  474. }
  475. cc.state = TransientFailure
  476. cc.stateCV.Broadcast()
  477. cc.mu.Unlock()
  478. if err := cc.resetTransport(true); err != nil {
  479. // The ClientConn is closing.
  480. cc.mu.Lock()
  481. cc.printf("transport exiting: %v", err)
  482. cc.mu.Unlock()
  483. grpclog.Printf("grpc: Conn.transportMonitor exits due to: %v", err)
  484. return false
  485. }
  486. return true
  487. }
  488. // Run in a goroutine to track the error in transport and create the
  489. // new transport if an error happens. It returns when the channel is closing.
  490. func (cc *Conn) transportMonitor() {
  491. for {
  492. select {
  493. // shutdownChan is needed to detect the teardown when
  494. // the ClientConn is idle (i.e., no RPC in flight).
  495. case <-cc.shutdownChan:
  496. return
  497. case <-cc.resetChan:
  498. if !cc.reconnect() {
  499. return
  500. }
  501. case <-cc.transport.Error():
  502. if !cc.reconnect() {
  503. return
  504. }
  505. // Tries to drain reset signal if there is any since it is out-dated.
  506. select {
  507. case <-cc.resetChan:
  508. default:
  509. }
  510. }
  511. }
  512. }
  513. // Wait blocks until i) the new transport is up or ii) ctx is done or iii) cc is closed.
  514. func (cc *Conn) Wait(ctx context.Context) (transport.ClientTransport, error) {
  515. for {
  516. cc.mu.Lock()
  517. switch {
  518. case cc.state == Shutdown:
  519. cc.mu.Unlock()
  520. return nil, ErrClientConnClosing
  521. case cc.state == Ready:
  522. ct := cc.transport
  523. cc.mu.Unlock()
  524. return ct, nil
  525. default:
  526. ready := cc.ready
  527. if ready == nil {
  528. ready = make(chan struct{})
  529. cc.ready = ready
  530. }
  531. cc.mu.Unlock()
  532. select {
  533. case <-ctx.Done():
  534. return nil, transport.ContextErr(ctx.Err())
  535. // Wait until the new transport is ready or failed.
  536. case <-ready:
  537. }
  538. }
  539. }
  540. }
  541. // Close starts to tear down the Conn. Returns ErrClientConnClosing if
  542. // it has been closed (mostly due to dial time-out).
  543. // TODO(zhaoq): Make this synchronous to avoid unbounded memory consumption in
  544. // some edge cases (e.g., the caller opens and closes many ClientConn's in a
  545. // tight loop.
  546. func (cc *Conn) Close() error {
  547. cc.mu.Lock()
  548. defer cc.mu.Unlock()
  549. if cc.state == Shutdown {
  550. return ErrClientConnClosing
  551. }
  552. cc.state = Shutdown
  553. cc.stateCV.Broadcast()
  554. if cc.events != nil {
  555. cc.events.Finish()
  556. cc.events = nil
  557. }
  558. if cc.ready != nil {
  559. close(cc.ready)
  560. cc.ready = nil
  561. }
  562. if cc.transport != nil {
  563. cc.transport.Close()
  564. }
  565. if cc.shutdownChan != nil {
  566. close(cc.shutdownChan)
  567. }
  568. return nil
  569. }