clientconn.go 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589
  1. /*
  2. *
  3. * Copyright 2014, Google Inc.
  4. * All rights reserved.
  5. *
  6. * Redistribution and use in source and binary forms, with or without
  7. * modification, are permitted provided that the following conditions are
  8. * met:
  9. *
  10. * * Redistributions of source code must retain the above copyright
  11. * notice, this list of conditions and the following disclaimer.
  12. * * Redistributions in binary form must reproduce the above
  13. * copyright notice, this list of conditions and the following disclaimer
  14. * in the documentation and/or other materials provided with the
  15. * distribution.
  16. * * Neither the name of Google Inc. nor the names of its
  17. * contributors may be used to endorse or promote products derived from
  18. * this software without specific prior written permission.
  19. *
  20. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  21. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  22. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  23. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  24. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  25. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  26. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  27. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  28. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  29. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  30. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31. *
  32. */
  33. package grpc
  34. import (
  35. "errors"
  36. "fmt"
  37. "net"
  38. "strings"
  39. "sync"
  40. "time"
  41. "golang.org/x/net/context"
  42. "golang.org/x/net/trace"
  43. "google.golang.org/grpc/credentials"
  44. "google.golang.org/grpc/grpclog"
  45. "google.golang.org/grpc/transport"
  46. )
  47. var (
  48. // ErrUnspecTarget indicates that the target address is unspecified.
  49. ErrUnspecTarget = errors.New("grpc: target is unspecified")
  50. // ErrNoTransportSecurity indicates that there is no transport security
  51. // being set for ClientConn. Users should either set one or explicitly
  52. // call WithInsecure DialOption to disable security.
  53. ErrNoTransportSecurity = errors.New("grpc: no transport security set (use grpc.WithInsecure() explicitly or set credentials)")
  54. // ErrCredentialsMisuse indicates that users want to transmit security information
  55. // (e.g., oauth2 token) which requires secure connection on an insecure
  56. // connection.
  57. ErrCredentialsMisuse = errors.New("grpc: the credentials require transport level security (use grpc.WithTransportAuthenticator() to set)")
  58. // ErrClientConnClosing indicates that the operation is illegal because
  59. // the session is closing.
  60. ErrClientConnClosing = errors.New("grpc: the client connection is closing")
  61. // ErrClientConnTimeout indicates that the connection could not be
  62. // established or re-established within the specified timeout.
  63. ErrClientConnTimeout = errors.New("grpc: timed out trying to connect")
  64. // minimum time to give a connection to complete
  65. minConnectTimeout = 20 * time.Second
  66. )
  67. // dialOptions configure a Dial call. dialOptions are set by the DialOption
  68. // values passed to Dial.
  69. type dialOptions struct {
  70. codec Codec
  71. cp Compressor
  72. dc Decompressor
  73. picker Picker
  74. block bool
  75. insecure bool
  76. copts transport.ConnectOptions
  77. }
  78. // DialOption configures how we set up the connection.
  79. type DialOption func(*dialOptions)
  80. // WithCodec returns a DialOption which sets a codec for message marshaling and unmarshaling.
  81. func WithCodec(c Codec) DialOption {
  82. return func(o *dialOptions) {
  83. o.codec = c
  84. }
  85. }
  86. // WithCompressor returns a DialOption which sets a CompressorGenerator for generating message
  87. // compressor.
  88. func WithCompressor(cp Compressor) DialOption {
  89. return func(o *dialOptions) {
  90. o.cp = cp
  91. }
  92. }
  93. // WithDecompressor returns a DialOption which sets a DecompressorGenerator for generating
  94. // message decompressor.
  95. func WithDecompressor(dc Decompressor) DialOption {
  96. return func(o *dialOptions) {
  97. o.dc = dc
  98. }
  99. }
  100. // WithPicker returns a DialOption which sets a picker for connection selection.
  101. func WithPicker(p Picker) DialOption {
  102. return func(o *dialOptions) {
  103. o.picker = p
  104. }
  105. }
  106. // WithBlock returns a DialOption which makes caller of Dial blocks until the underlying
  107. // connection is up. Without this, Dial returns immediately and connecting the server
  108. // happens in background.
  109. func WithBlock() DialOption {
  110. return func(o *dialOptions) {
  111. o.block = true
  112. }
  113. }
  114. // WithInsecure returns a DialOption which disables transport security for this ClientConn.
  115. // Note that transport security is required unless WithInsecure is set.
  116. func WithInsecure() DialOption {
  117. return func(o *dialOptions) {
  118. o.insecure = true
  119. }
  120. }
  121. // WithTransportCredentials returns a DialOption which configures a
  122. // connection level security credentials (e.g., TLS/SSL).
  123. func WithTransportCredentials(creds credentials.TransportAuthenticator) DialOption {
  124. return func(o *dialOptions) {
  125. o.copts.AuthOptions = append(o.copts.AuthOptions, creds)
  126. }
  127. }
  128. // WithPerRPCCredentials returns a DialOption which sets
  129. // credentials which will place auth state on each outbound RPC.
  130. func WithPerRPCCredentials(creds credentials.Credentials) DialOption {
  131. return func(o *dialOptions) {
  132. o.copts.AuthOptions = append(o.copts.AuthOptions, creds)
  133. }
  134. }
  135. // WithTimeout returns a DialOption that configures a timeout for dialing a client connection.
  136. func WithTimeout(d time.Duration) DialOption {
  137. return func(o *dialOptions) {
  138. o.copts.Timeout = d
  139. }
  140. }
  141. // WithDialer returns a DialOption that specifies a function to use for dialing network addresses.
  142. func WithDialer(f func(addr string, timeout time.Duration) (net.Conn, error)) DialOption {
  143. return func(o *dialOptions) {
  144. o.copts.Dialer = f
  145. }
  146. }
  147. // WithUserAgent returns a DialOption that specifies a user agent string for all the RPCs.
  148. func WithUserAgent(s string) DialOption {
  149. return func(o *dialOptions) {
  150. o.copts.UserAgent = s
  151. }
  152. }
  153. // Dial creates a client connection the given target.
  154. func Dial(target string, opts ...DialOption) (*ClientConn, error) {
  155. cc := &ClientConn{
  156. target: target,
  157. }
  158. for _, opt := range opts {
  159. opt(&cc.dopts)
  160. }
  161. if cc.dopts.codec == nil {
  162. // Set the default codec.
  163. cc.dopts.codec = protoCodec{}
  164. }
  165. if cc.dopts.picker == nil {
  166. cc.dopts.picker = &unicastPicker{
  167. target: target,
  168. }
  169. }
  170. if err := cc.dopts.picker.Init(cc); err != nil {
  171. return nil, err
  172. }
  173. colonPos := strings.LastIndex(target, ":")
  174. if colonPos == -1 {
  175. colonPos = len(target)
  176. }
  177. cc.authority = target[:colonPos]
  178. return cc, nil
  179. }
  180. // ConnectivityState indicates the state of a client connection.
  181. type ConnectivityState int
  182. const (
  183. // Idle indicates the ClientConn is idle.
  184. Idle ConnectivityState = iota
  185. // Connecting indicates the ClienConn is connecting.
  186. Connecting
  187. // Ready indicates the ClientConn is ready for work.
  188. Ready
  189. // TransientFailure indicates the ClientConn has seen a failure but expects to recover.
  190. TransientFailure
  191. // Shutdown indicates the ClientConn has started shutting down.
  192. Shutdown
  193. )
  194. func (s ConnectivityState) String() string {
  195. switch s {
  196. case Idle:
  197. return "IDLE"
  198. case Connecting:
  199. return "CONNECTING"
  200. case Ready:
  201. return "READY"
  202. case TransientFailure:
  203. return "TRANSIENT_FAILURE"
  204. case Shutdown:
  205. return "SHUTDOWN"
  206. default:
  207. panic(fmt.Sprintf("unknown connectivity state: %d", s))
  208. }
  209. }
  210. // ClientConn represents a client connection to an RPC service.
  211. type ClientConn struct {
  212. target string
  213. authority string
  214. dopts dialOptions
  215. }
  216. // State returns the connectivity state of cc.
  217. // This is EXPERIMENTAL API.
  218. func (cc *ClientConn) State() (ConnectivityState, error) {
  219. return cc.dopts.picker.State()
  220. }
  221. // WaitForStateChange blocks until the state changes to something other than the sourceState.
  222. // It returns the new state or error.
  223. // This is EXPERIMENTAL API.
  224. func (cc *ClientConn) WaitForStateChange(ctx context.Context, sourceState ConnectivityState) (ConnectivityState, error) {
  225. return cc.dopts.picker.WaitForStateChange(ctx, sourceState)
  226. }
  227. // Close starts to tear down the ClientConn.
  228. func (cc *ClientConn) Close() error {
  229. return cc.dopts.picker.Close()
  230. }
  231. // Conn is a client connection to a single destination.
  232. type Conn struct {
  233. target string
  234. dopts dialOptions
  235. resetChan chan int
  236. shutdownChan chan struct{}
  237. events trace.EventLog
  238. mu sync.Mutex
  239. state ConnectivityState
  240. stateCV *sync.Cond
  241. // ready is closed and becomes nil when a new transport is up or failed
  242. // due to timeout.
  243. ready chan struct{}
  244. transport transport.ClientTransport
  245. }
  246. // NewConn creates a Conn.
  247. func NewConn(cc *ClientConn) (*Conn, error) {
  248. if cc.target == "" {
  249. return nil, ErrUnspecTarget
  250. }
  251. c := &Conn{
  252. target: cc.target,
  253. dopts: cc.dopts,
  254. resetChan: make(chan int, 1),
  255. shutdownChan: make(chan struct{}),
  256. }
  257. if EnableTracing {
  258. c.events = trace.NewEventLog("grpc.ClientConn", c.target)
  259. }
  260. if !c.dopts.insecure {
  261. var ok bool
  262. for _, cd := range c.dopts.copts.AuthOptions {
  263. if _, ok = cd.(credentials.TransportAuthenticator); ok {
  264. break
  265. }
  266. }
  267. if !ok {
  268. return nil, ErrNoTransportSecurity
  269. }
  270. } else {
  271. for _, cd := range c.dopts.copts.AuthOptions {
  272. if cd.RequireTransportSecurity() {
  273. return nil, ErrCredentialsMisuse
  274. }
  275. }
  276. }
  277. c.stateCV = sync.NewCond(&c.mu)
  278. if c.dopts.block {
  279. if err := c.resetTransport(false); err != nil {
  280. c.Close()
  281. return nil, err
  282. }
  283. // Start to monitor the error status of transport.
  284. go c.transportMonitor()
  285. } else {
  286. // Start a goroutine connecting to the server asynchronously.
  287. go func() {
  288. if err := c.resetTransport(false); err != nil {
  289. grpclog.Printf("Failed to dial %s: %v; please retry.", c.target, err)
  290. c.Close()
  291. return
  292. }
  293. c.transportMonitor()
  294. }()
  295. }
  296. return c, nil
  297. }
  298. // printf records an event in cc's event log, unless cc has been closed.
  299. // REQUIRES cc.mu is held.
  300. func (cc *Conn) printf(format string, a ...interface{}) {
  301. if cc.events != nil {
  302. cc.events.Printf(format, a...)
  303. }
  304. }
  305. // errorf records an error in cc's event log, unless cc has been closed.
  306. // REQUIRES cc.mu is held.
  307. func (cc *Conn) errorf(format string, a ...interface{}) {
  308. if cc.events != nil {
  309. cc.events.Errorf(format, a...)
  310. }
  311. }
  312. // State returns the connectivity state of the Conn
  313. func (cc *Conn) State() ConnectivityState {
  314. cc.mu.Lock()
  315. defer cc.mu.Unlock()
  316. return cc.state
  317. }
  318. // WaitForStateChange blocks until the state changes to something other than the sourceState.
  319. func (cc *Conn) WaitForStateChange(ctx context.Context, sourceState ConnectivityState) (ConnectivityState, error) {
  320. cc.mu.Lock()
  321. defer cc.mu.Unlock()
  322. if sourceState != cc.state {
  323. return cc.state, nil
  324. }
  325. done := make(chan struct{})
  326. var err error
  327. go func() {
  328. select {
  329. case <-ctx.Done():
  330. cc.mu.Lock()
  331. err = ctx.Err()
  332. cc.stateCV.Broadcast()
  333. cc.mu.Unlock()
  334. case <-done:
  335. }
  336. }()
  337. defer close(done)
  338. for sourceState == cc.state {
  339. cc.stateCV.Wait()
  340. if err != nil {
  341. return cc.state, err
  342. }
  343. }
  344. return cc.state, nil
  345. }
  346. // NotifyReset tries to signal the underlying transport needs to be reset due to
  347. // for example a name resolution change in flight.
  348. func (cc *Conn) NotifyReset() {
  349. select {
  350. case cc.resetChan <- 0:
  351. default:
  352. }
  353. }
  354. func (cc *Conn) resetTransport(closeTransport bool) error {
  355. var retries int
  356. start := time.Now()
  357. for {
  358. cc.mu.Lock()
  359. cc.printf("connecting")
  360. if cc.state == Shutdown {
  361. // cc.Close() has been invoked.
  362. cc.mu.Unlock()
  363. return ErrClientConnClosing
  364. }
  365. cc.state = Connecting
  366. cc.stateCV.Broadcast()
  367. cc.mu.Unlock()
  368. if closeTransport {
  369. cc.transport.Close()
  370. }
  371. // Adjust timeout for the current try.
  372. copts := cc.dopts.copts
  373. if copts.Timeout < 0 {
  374. cc.Close()
  375. return ErrClientConnTimeout
  376. }
  377. if copts.Timeout > 0 {
  378. copts.Timeout -= time.Since(start)
  379. if copts.Timeout <= 0 {
  380. cc.Close()
  381. return ErrClientConnTimeout
  382. }
  383. }
  384. sleepTime := backoff(retries)
  385. timeout := sleepTime
  386. if timeout < minConnectTimeout {
  387. timeout = minConnectTimeout
  388. }
  389. if copts.Timeout == 0 || copts.Timeout > timeout {
  390. copts.Timeout = timeout
  391. }
  392. connectTime := time.Now()
  393. addr, err := cc.dopts.picker.PickAddr()
  394. var newTransport transport.ClientTransport
  395. if err == nil {
  396. newTransport, err = transport.NewClientTransport(addr, &copts)
  397. }
  398. if err != nil {
  399. cc.mu.Lock()
  400. if cc.state == Shutdown {
  401. // cc.Close() has been invoked.
  402. cc.mu.Unlock()
  403. return ErrClientConnClosing
  404. }
  405. cc.errorf("transient failure: %v", err)
  406. cc.state = TransientFailure
  407. cc.stateCV.Broadcast()
  408. if cc.ready != nil {
  409. close(cc.ready)
  410. cc.ready = nil
  411. }
  412. cc.mu.Unlock()
  413. sleepTime -= time.Since(connectTime)
  414. if sleepTime < 0 {
  415. sleepTime = 0
  416. }
  417. // Fail early before falling into sleep.
  418. if cc.dopts.copts.Timeout > 0 && cc.dopts.copts.Timeout < sleepTime+time.Since(start) {
  419. cc.mu.Lock()
  420. cc.errorf("connection timeout")
  421. cc.mu.Unlock()
  422. cc.Close()
  423. return ErrClientConnTimeout
  424. }
  425. closeTransport = false
  426. time.Sleep(sleepTime)
  427. retries++
  428. grpclog.Printf("grpc: Conn.resetTransport failed to create client transport: %v; Reconnecting to %q", err, cc.target)
  429. continue
  430. }
  431. cc.mu.Lock()
  432. cc.printf("ready")
  433. if cc.state == Shutdown {
  434. // cc.Close() has been invoked.
  435. cc.mu.Unlock()
  436. newTransport.Close()
  437. return ErrClientConnClosing
  438. }
  439. cc.state = Ready
  440. cc.stateCV.Broadcast()
  441. cc.transport = newTransport
  442. if cc.ready != nil {
  443. close(cc.ready)
  444. cc.ready = nil
  445. }
  446. cc.mu.Unlock()
  447. return nil
  448. }
  449. }
  450. func (cc *Conn) reconnect() bool {
  451. cc.mu.Lock()
  452. if cc.state == Shutdown {
  453. // cc.Close() has been invoked.
  454. cc.mu.Unlock()
  455. return false
  456. }
  457. cc.state = TransientFailure
  458. cc.stateCV.Broadcast()
  459. cc.mu.Unlock()
  460. if err := cc.resetTransport(true); err != nil {
  461. // The ClientConn is closing.
  462. cc.mu.Lock()
  463. cc.printf("transport exiting: %v", err)
  464. cc.mu.Unlock()
  465. grpclog.Printf("grpc: Conn.transportMonitor exits due to: %v", err)
  466. return false
  467. }
  468. return true
  469. }
  470. // Run in a goroutine to track the error in transport and create the
  471. // new transport if an error happens. It returns when the channel is closing.
  472. func (cc *Conn) transportMonitor() {
  473. for {
  474. select {
  475. // shutdownChan is needed to detect the teardown when
  476. // the ClientConn is idle (i.e., no RPC in flight).
  477. case <-cc.shutdownChan:
  478. return
  479. case <-cc.resetChan:
  480. if !cc.reconnect() {
  481. return
  482. }
  483. case <-cc.transport.Error():
  484. if !cc.reconnect() {
  485. return
  486. }
  487. // Tries to drain reset signal if there is any since it is out-dated.
  488. select {
  489. case <-cc.resetChan:
  490. default:
  491. }
  492. }
  493. }
  494. }
  495. // Wait blocks until i) the new transport is up or ii) ctx is done or iii) cc is closed.
  496. func (cc *Conn) Wait(ctx context.Context) (transport.ClientTransport, error) {
  497. for {
  498. cc.mu.Lock()
  499. switch {
  500. case cc.state == Shutdown:
  501. cc.mu.Unlock()
  502. return nil, ErrClientConnClosing
  503. case cc.state == Ready:
  504. ct := cc.transport
  505. cc.mu.Unlock()
  506. return ct, nil
  507. default:
  508. ready := cc.ready
  509. if ready == nil {
  510. ready = make(chan struct{})
  511. cc.ready = ready
  512. }
  513. cc.mu.Unlock()
  514. select {
  515. case <-ctx.Done():
  516. return nil, transport.ContextErr(ctx.Err())
  517. // Wait until the new transport is ready or failed.
  518. case <-ready:
  519. }
  520. }
  521. }
  522. }
  523. // Close starts to tear down the Conn. Returns ErrClientConnClosing if
  524. // it has been closed (mostly due to dial time-out).
  525. // TODO(zhaoq): Make this synchronous to avoid unbounded memory consumption in
  526. // some edge cases (e.g., the caller opens and closes many ClientConn's in a
  527. // tight loop.
  528. func (cc *Conn) Close() error {
  529. cc.mu.Lock()
  530. defer cc.mu.Unlock()
  531. if cc.state == Shutdown {
  532. return ErrClientConnClosing
  533. }
  534. cc.state = Shutdown
  535. cc.stateCV.Broadcast()
  536. if cc.events != nil {
  537. cc.events.Finish()
  538. cc.events = nil
  539. }
  540. if cc.ready != nil {
  541. close(cc.ready)
  542. cc.ready = nil
  543. }
  544. if cc.transport != nil {
  545. cc.transport.Close()
  546. }
  547. if cc.shutdownChan != nil {
  548. close(cc.shutdownChan)
  549. }
  550. return nil
  551. }