clientconn.go 44 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447
  1. /*
  2. *
  3. * Copyright 2014 gRPC authors.
  4. *
  5. * Licensed under the Apache License, Version 2.0 (the "License");
  6. * you may not use this file except in compliance with the License.
  7. * You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. *
  17. */
  18. package grpc
  19. import (
  20. "context"
  21. "errors"
  22. "fmt"
  23. "math"
  24. "net"
  25. "reflect"
  26. "strings"
  27. "sync"
  28. "sync/atomic"
  29. "time"
  30. "google.golang.org/grpc/balancer"
  31. _ "google.golang.org/grpc/balancer/roundrobin" // To register roundrobin.
  32. "google.golang.org/grpc/codes"
  33. "google.golang.org/grpc/connectivity"
  34. "google.golang.org/grpc/credentials"
  35. "google.golang.org/grpc/grpclog"
  36. "google.golang.org/grpc/internal/backoff"
  37. "google.golang.org/grpc/internal/channelz"
  38. "google.golang.org/grpc/internal/grpcsync"
  39. "google.golang.org/grpc/internal/transport"
  40. "google.golang.org/grpc/keepalive"
  41. "google.golang.org/grpc/resolver"
  42. _ "google.golang.org/grpc/resolver/dns" // To register dns resolver.
  43. _ "google.golang.org/grpc/resolver/passthrough" // To register passthrough resolver.
  44. "google.golang.org/grpc/serviceconfig"
  45. "google.golang.org/grpc/status"
  46. )
  47. const (
  48. // minimum time to give a connection to complete
  49. minConnectTimeout = 20 * time.Second
  50. // must match grpclbName in grpclb/grpclb.go
  51. grpclbName = "grpclb"
  52. )
  53. var (
  54. // ErrClientConnClosing indicates that the operation is illegal because
  55. // the ClientConn is closing.
  56. //
  57. // Deprecated: this error should not be relied upon by users; use the status
  58. // code of Canceled instead.
  59. ErrClientConnClosing = status.Error(codes.Canceled, "grpc: the client connection is closing")
  60. // errConnDrain indicates that the connection starts to be drained and does not accept any new RPCs.
  61. errConnDrain = errors.New("grpc: the connection is drained")
  62. // errConnClosing indicates that the connection is closing.
  63. errConnClosing = errors.New("grpc: the connection is closing")
  64. // errBalancerClosed indicates that the balancer is closed.
  65. errBalancerClosed = errors.New("grpc: balancer is closed")
  66. // invalidDefaultServiceConfigErrPrefix is used to prefix the json parsing error for the default
  67. // service config.
  68. invalidDefaultServiceConfigErrPrefix = "grpc: the provided default service config is invalid"
  69. )
  70. // The following errors are returned from Dial and DialContext
  71. var (
  72. // errNoTransportSecurity indicates that there is no transport security
  73. // being set for ClientConn. Users should either set one or explicitly
  74. // call WithInsecure DialOption to disable security.
  75. errNoTransportSecurity = errors.New("grpc: no transport security set (use grpc.WithInsecure() explicitly or set credentials)")
  76. // errTransportCredsAndBundle indicates that creds bundle is used together
  77. // with other individual Transport Credentials.
  78. errTransportCredsAndBundle = errors.New("grpc: credentials.Bundle may not be used with individual TransportCredentials")
  79. // errTransportCredentialsMissing indicates that users want to transmit security
  80. // information (e.g., OAuth2 token) which requires secure connection on an insecure
  81. // connection.
  82. errTransportCredentialsMissing = errors.New("grpc: the credentials require transport level security (use grpc.WithTransportCredentials() to set)")
  83. // errCredentialsConflict indicates that grpc.WithTransportCredentials()
  84. // and grpc.WithInsecure() are both called for a connection.
  85. errCredentialsConflict = errors.New("grpc: transport credentials are set for an insecure connection (grpc.WithTransportCredentials() and grpc.WithInsecure() are both called)")
  86. )
  87. const (
  88. defaultClientMaxReceiveMessageSize = 1024 * 1024 * 4
  89. defaultClientMaxSendMessageSize = math.MaxInt32
  90. // http2IOBufSize specifies the buffer size for sending frames.
  91. defaultWriteBufSize = 32 * 1024
  92. defaultReadBufSize = 32 * 1024
  93. )
  94. // Dial creates a client connection to the given target.
  95. func Dial(target string, opts ...DialOption) (*ClientConn, error) {
  96. return DialContext(context.Background(), target, opts...)
  97. }
  98. // DialContext creates a client connection to the given target. By default, it's
  99. // a non-blocking dial (the function won't wait for connections to be
  100. // established, and connecting happens in the background). To make it a blocking
  101. // dial, use WithBlock() dial option.
  102. //
  103. // In the non-blocking case, the ctx does not act against the connection. It
  104. // only controls the setup steps.
  105. //
  106. // In the blocking case, ctx can be used to cancel or expire the pending
  107. // connection. Once this function returns, the cancellation and expiration of
  108. // ctx will be noop. Users should call ClientConn.Close to terminate all the
  109. // pending operations after this function returns.
  110. //
  111. // The target name syntax is defined in
  112. // https://github.com/grpc/grpc/blob/master/doc/naming.md.
  113. // e.g. to use dns resolver, a "dns:///" prefix should be applied to the target.
  114. func DialContext(ctx context.Context, target string, opts ...DialOption) (conn *ClientConn, err error) {
  115. cc := &ClientConn{
  116. target: target,
  117. csMgr: &connectivityStateManager{},
  118. conns: make(map[*addrConn]struct{}),
  119. dopts: defaultDialOptions(),
  120. blockingpicker: newPickerWrapper(),
  121. czData: new(channelzData),
  122. firstResolveEvent: grpcsync.NewEvent(),
  123. }
  124. cc.retryThrottler.Store((*retryThrottler)(nil))
  125. cc.ctx, cc.cancel = context.WithCancel(context.Background())
  126. for _, opt := range opts {
  127. opt.apply(&cc.dopts)
  128. }
  129. chainUnaryClientInterceptors(cc)
  130. chainStreamClientInterceptors(cc)
  131. defer func() {
  132. if err != nil {
  133. cc.Close()
  134. }
  135. }()
  136. if channelz.IsOn() {
  137. if cc.dopts.channelzParentID != 0 {
  138. cc.channelzID = channelz.RegisterChannel(&channelzChannel{cc}, cc.dopts.channelzParentID, target)
  139. channelz.AddTraceEvent(cc.channelzID, &channelz.TraceEventDesc{
  140. Desc: "Channel Created",
  141. Severity: channelz.CtINFO,
  142. Parent: &channelz.TraceEventDesc{
  143. Desc: fmt.Sprintf("Nested Channel(id:%d) created", cc.channelzID),
  144. Severity: channelz.CtINFO,
  145. },
  146. })
  147. } else {
  148. cc.channelzID = channelz.RegisterChannel(&channelzChannel{cc}, 0, target)
  149. channelz.AddTraceEvent(cc.channelzID, &channelz.TraceEventDesc{
  150. Desc: "Channel Created",
  151. Severity: channelz.CtINFO,
  152. })
  153. }
  154. cc.csMgr.channelzID = cc.channelzID
  155. }
  156. if !cc.dopts.insecure {
  157. if cc.dopts.copts.TransportCredentials == nil && cc.dopts.copts.CredsBundle == nil {
  158. return nil, errNoTransportSecurity
  159. }
  160. if cc.dopts.copts.TransportCredentials != nil && cc.dopts.copts.CredsBundle != nil {
  161. return nil, errTransportCredsAndBundle
  162. }
  163. } else {
  164. if cc.dopts.copts.TransportCredentials != nil || cc.dopts.copts.CredsBundle != nil {
  165. return nil, errCredentialsConflict
  166. }
  167. for _, cd := range cc.dopts.copts.PerRPCCredentials {
  168. if cd.RequireTransportSecurity() {
  169. return nil, errTransportCredentialsMissing
  170. }
  171. }
  172. }
  173. if cc.dopts.defaultServiceConfigRawJSON != nil {
  174. sc, err := parseServiceConfig(*cc.dopts.defaultServiceConfigRawJSON)
  175. if err != nil {
  176. return nil, fmt.Errorf("%s: %v", invalidDefaultServiceConfigErrPrefix, err)
  177. }
  178. cc.dopts.defaultServiceConfig = sc
  179. }
  180. cc.mkp = cc.dopts.copts.KeepaliveParams
  181. if cc.dopts.copts.Dialer == nil {
  182. cc.dopts.copts.Dialer = newProxyDialer(
  183. func(ctx context.Context, addr string) (net.Conn, error) {
  184. network, addr := parseDialTarget(addr)
  185. return (&net.Dialer{}).DialContext(ctx, network, addr)
  186. },
  187. )
  188. }
  189. if cc.dopts.copts.UserAgent != "" {
  190. cc.dopts.copts.UserAgent += " " + grpcUA
  191. } else {
  192. cc.dopts.copts.UserAgent = grpcUA
  193. }
  194. if cc.dopts.timeout > 0 {
  195. var cancel context.CancelFunc
  196. ctx, cancel = context.WithTimeout(ctx, cc.dopts.timeout)
  197. defer cancel()
  198. }
  199. defer func() {
  200. select {
  201. case <-ctx.Done():
  202. conn, err = nil, ctx.Err()
  203. default:
  204. }
  205. }()
  206. scSet := false
  207. if cc.dopts.scChan != nil {
  208. // Try to get an initial service config.
  209. select {
  210. case sc, ok := <-cc.dopts.scChan:
  211. if ok {
  212. cc.sc = &sc
  213. scSet = true
  214. }
  215. default:
  216. }
  217. }
  218. if cc.dopts.bs == nil {
  219. cc.dopts.bs = backoff.Exponential{
  220. MaxDelay: DefaultBackoffConfig.MaxDelay,
  221. }
  222. }
  223. if cc.dopts.resolverBuilder == nil {
  224. // Only try to parse target when resolver builder is not already set.
  225. cc.parsedTarget = parseTarget(cc.target)
  226. grpclog.Infof("parsed scheme: %q", cc.parsedTarget.Scheme)
  227. cc.dopts.resolverBuilder = resolver.Get(cc.parsedTarget.Scheme)
  228. if cc.dopts.resolverBuilder == nil {
  229. // If resolver builder is still nil, the parsed target's scheme is
  230. // not registered. Fallback to default resolver and set Endpoint to
  231. // the original target.
  232. grpclog.Infof("scheme %q not registered, fallback to default scheme", cc.parsedTarget.Scheme)
  233. cc.parsedTarget = resolver.Target{
  234. Scheme: resolver.GetDefaultScheme(),
  235. Endpoint: target,
  236. }
  237. cc.dopts.resolverBuilder = resolver.Get(cc.parsedTarget.Scheme)
  238. }
  239. } else {
  240. cc.parsedTarget = resolver.Target{Endpoint: target}
  241. }
  242. creds := cc.dopts.copts.TransportCredentials
  243. if creds != nil && creds.Info().ServerName != "" {
  244. cc.authority = creds.Info().ServerName
  245. } else if cc.dopts.insecure && cc.dopts.authority != "" {
  246. cc.authority = cc.dopts.authority
  247. } else {
  248. // Use endpoint from "scheme://authority/endpoint" as the default
  249. // authority for ClientConn.
  250. cc.authority = cc.parsedTarget.Endpoint
  251. }
  252. if cc.dopts.scChan != nil && !scSet {
  253. // Blocking wait for the initial service config.
  254. select {
  255. case sc, ok := <-cc.dopts.scChan:
  256. if ok {
  257. cc.sc = &sc
  258. }
  259. case <-ctx.Done():
  260. return nil, ctx.Err()
  261. }
  262. }
  263. if cc.dopts.scChan != nil {
  264. go cc.scWatcher()
  265. }
  266. var credsClone credentials.TransportCredentials
  267. if creds := cc.dopts.copts.TransportCredentials; creds != nil {
  268. credsClone = creds.Clone()
  269. }
  270. cc.balancerBuildOpts = balancer.BuildOptions{
  271. DialCreds: credsClone,
  272. CredsBundle: cc.dopts.copts.CredsBundle,
  273. Dialer: cc.dopts.copts.Dialer,
  274. ChannelzParentID: cc.channelzID,
  275. Target: cc.parsedTarget,
  276. }
  277. // Build the resolver.
  278. rWrapper, err := newCCResolverWrapper(cc)
  279. if err != nil {
  280. return nil, fmt.Errorf("failed to build resolver: %v", err)
  281. }
  282. cc.mu.Lock()
  283. cc.resolverWrapper = rWrapper
  284. cc.mu.Unlock()
  285. // A blocking dial blocks until the clientConn is ready.
  286. if cc.dopts.block {
  287. for {
  288. s := cc.GetState()
  289. if s == connectivity.Ready {
  290. break
  291. } else if cc.dopts.copts.FailOnNonTempDialError && s == connectivity.TransientFailure {
  292. if err = cc.blockingpicker.connectionError(); err != nil {
  293. terr, ok := err.(interface {
  294. Temporary() bool
  295. })
  296. if ok && !terr.Temporary() {
  297. return nil, err
  298. }
  299. }
  300. }
  301. if !cc.WaitForStateChange(ctx, s) {
  302. // ctx got timeout or canceled.
  303. return nil, ctx.Err()
  304. }
  305. }
  306. }
  307. return cc, nil
  308. }
  309. // chainUnaryClientInterceptors chains all unary client interceptors into one.
  310. func chainUnaryClientInterceptors(cc *ClientConn) {
  311. interceptors := cc.dopts.chainUnaryInts
  312. // Prepend dopts.unaryInt to the chaining interceptors if it exists, since unaryInt will
  313. // be executed before any other chained interceptors.
  314. if cc.dopts.unaryInt != nil {
  315. interceptors = append([]UnaryClientInterceptor{cc.dopts.unaryInt}, interceptors...)
  316. }
  317. var chainedInt UnaryClientInterceptor
  318. if len(interceptors) == 0 {
  319. chainedInt = nil
  320. } else if len(interceptors) == 1 {
  321. chainedInt = interceptors[0]
  322. } else {
  323. chainedInt = func(ctx context.Context, method string, req, reply interface{}, cc *ClientConn, invoker UnaryInvoker, opts ...CallOption) error {
  324. return interceptors[0](ctx, method, req, reply, cc, getChainUnaryInvoker(interceptors, 0, invoker), opts...)
  325. }
  326. }
  327. cc.dopts.unaryInt = chainedInt
  328. }
  329. // getChainUnaryInvoker recursively generate the chained unary invoker.
  330. func getChainUnaryInvoker(interceptors []UnaryClientInterceptor, curr int, finalInvoker UnaryInvoker) UnaryInvoker {
  331. if curr == len(interceptors)-1 {
  332. return finalInvoker
  333. }
  334. return func(ctx context.Context, method string, req, reply interface{}, cc *ClientConn, opts ...CallOption) error {
  335. return interceptors[curr+1](ctx, method, req, reply, cc, getChainUnaryInvoker(interceptors, curr+1, finalInvoker), opts...)
  336. }
  337. }
  338. // chainStreamClientInterceptors chains all stream client interceptors into one.
  339. func chainStreamClientInterceptors(cc *ClientConn) {
  340. interceptors := cc.dopts.chainStreamInts
  341. // Prepend dopts.streamInt to the chaining interceptors if it exists, since streamInt will
  342. // be executed before any other chained interceptors.
  343. if cc.dopts.streamInt != nil {
  344. interceptors = append([]StreamClientInterceptor{cc.dopts.streamInt}, interceptors...)
  345. }
  346. var chainedInt StreamClientInterceptor
  347. if len(interceptors) == 0 {
  348. chainedInt = nil
  349. } else if len(interceptors) == 1 {
  350. chainedInt = interceptors[0]
  351. } else {
  352. chainedInt = func(ctx context.Context, desc *StreamDesc, cc *ClientConn, method string, streamer Streamer, opts ...CallOption) (ClientStream, error) {
  353. return interceptors[0](ctx, desc, cc, method, getChainStreamer(interceptors, 0, streamer), opts...)
  354. }
  355. }
  356. cc.dopts.streamInt = chainedInt
  357. }
  358. // getChainStreamer recursively generate the chained client stream constructor.
  359. func getChainStreamer(interceptors []StreamClientInterceptor, curr int, finalStreamer Streamer) Streamer {
  360. if curr == len(interceptors)-1 {
  361. return finalStreamer
  362. }
  363. return func(ctx context.Context, desc *StreamDesc, cc *ClientConn, method string, opts ...CallOption) (ClientStream, error) {
  364. return interceptors[curr+1](ctx, desc, cc, method, getChainStreamer(interceptors, curr+1, finalStreamer), opts...)
  365. }
  366. }
  367. // connectivityStateManager keeps the connectivity.State of ClientConn.
  368. // This struct will eventually be exported so the balancers can access it.
  369. type connectivityStateManager struct {
  370. mu sync.Mutex
  371. state connectivity.State
  372. notifyChan chan struct{}
  373. channelzID int64
  374. }
  375. // updateState updates the connectivity.State of ClientConn.
  376. // If there's a change it notifies goroutines waiting on state change to
  377. // happen.
  378. func (csm *connectivityStateManager) updateState(state connectivity.State) {
  379. csm.mu.Lock()
  380. defer csm.mu.Unlock()
  381. if csm.state == connectivity.Shutdown {
  382. return
  383. }
  384. if csm.state == state {
  385. return
  386. }
  387. csm.state = state
  388. if channelz.IsOn() {
  389. channelz.AddTraceEvent(csm.channelzID, &channelz.TraceEventDesc{
  390. Desc: fmt.Sprintf("Channel Connectivity change to %v", state),
  391. Severity: channelz.CtINFO,
  392. })
  393. }
  394. if csm.notifyChan != nil {
  395. // There are other goroutines waiting on this channel.
  396. close(csm.notifyChan)
  397. csm.notifyChan = nil
  398. }
  399. }
  400. func (csm *connectivityStateManager) getState() connectivity.State {
  401. csm.mu.Lock()
  402. defer csm.mu.Unlock()
  403. return csm.state
  404. }
  405. func (csm *connectivityStateManager) getNotifyChan() <-chan struct{} {
  406. csm.mu.Lock()
  407. defer csm.mu.Unlock()
  408. if csm.notifyChan == nil {
  409. csm.notifyChan = make(chan struct{})
  410. }
  411. return csm.notifyChan
  412. }
  413. // ClientConn represents a client connection to an RPC server.
  414. type ClientConn struct {
  415. ctx context.Context
  416. cancel context.CancelFunc
  417. target string
  418. parsedTarget resolver.Target
  419. authority string
  420. dopts dialOptions
  421. csMgr *connectivityStateManager
  422. balancerBuildOpts balancer.BuildOptions
  423. blockingpicker *pickerWrapper
  424. mu sync.RWMutex
  425. resolverWrapper *ccResolverWrapper
  426. sc *ServiceConfig
  427. conns map[*addrConn]struct{}
  428. // Keepalive parameter can be updated if a GoAway is received.
  429. mkp keepalive.ClientParameters
  430. curBalancerName string
  431. balancerWrapper *ccBalancerWrapper
  432. retryThrottler atomic.Value
  433. firstResolveEvent *grpcsync.Event
  434. channelzID int64 // channelz unique identification number
  435. czData *channelzData
  436. }
  437. // WaitForStateChange waits until the connectivity.State of ClientConn changes from sourceState or
  438. // ctx expires. A true value is returned in former case and false in latter.
  439. // This is an EXPERIMENTAL API.
  440. func (cc *ClientConn) WaitForStateChange(ctx context.Context, sourceState connectivity.State) bool {
  441. ch := cc.csMgr.getNotifyChan()
  442. if cc.csMgr.getState() != sourceState {
  443. return true
  444. }
  445. select {
  446. case <-ctx.Done():
  447. return false
  448. case <-ch:
  449. return true
  450. }
  451. }
  452. // GetState returns the connectivity.State of ClientConn.
  453. // This is an EXPERIMENTAL API.
  454. func (cc *ClientConn) GetState() connectivity.State {
  455. return cc.csMgr.getState()
  456. }
  457. func (cc *ClientConn) scWatcher() {
  458. for {
  459. select {
  460. case sc, ok := <-cc.dopts.scChan:
  461. if !ok {
  462. return
  463. }
  464. cc.mu.Lock()
  465. // TODO: load balance policy runtime change is ignored.
  466. // We may revisit this decision in the future.
  467. cc.sc = &sc
  468. cc.mu.Unlock()
  469. case <-cc.ctx.Done():
  470. return
  471. }
  472. }
  473. }
  474. // waitForResolvedAddrs blocks until the resolver has provided addresses or the
  475. // context expires. Returns nil unless the context expires first; otherwise
  476. // returns a status error based on the context.
  477. func (cc *ClientConn) waitForResolvedAddrs(ctx context.Context) error {
  478. // This is on the RPC path, so we use a fast path to avoid the
  479. // more-expensive "select" below after the resolver has returned once.
  480. if cc.firstResolveEvent.HasFired() {
  481. return nil
  482. }
  483. select {
  484. case <-cc.firstResolveEvent.Done():
  485. return nil
  486. case <-ctx.Done():
  487. return status.FromContextError(ctx.Err()).Err()
  488. case <-cc.ctx.Done():
  489. return ErrClientConnClosing
  490. }
  491. }
  492. func (cc *ClientConn) updateResolverState(s resolver.State) error {
  493. cc.mu.Lock()
  494. defer cc.mu.Unlock()
  495. // Check if the ClientConn is already closed. Some fields (e.g.
  496. // balancerWrapper) are set to nil when closing the ClientConn, and could
  497. // cause nil pointer panic if we don't have this check.
  498. if cc.conns == nil {
  499. return nil
  500. }
  501. if cc.dopts.disableServiceConfig || s.ServiceConfig == nil {
  502. if cc.dopts.defaultServiceConfig != nil && cc.sc == nil {
  503. cc.applyServiceConfig(cc.dopts.defaultServiceConfig)
  504. }
  505. } else if sc, ok := s.ServiceConfig.(*ServiceConfig); ok {
  506. cc.applyServiceConfig(sc)
  507. }
  508. var balCfg serviceconfig.LoadBalancingConfig
  509. if cc.dopts.balancerBuilder == nil {
  510. // Only look at balancer types and switch balancer if balancer dial
  511. // option is not set.
  512. var newBalancerName string
  513. if cc.sc != nil && cc.sc.lbConfig != nil {
  514. newBalancerName = cc.sc.lbConfig.name
  515. balCfg = cc.sc.lbConfig.cfg
  516. } else {
  517. var isGRPCLB bool
  518. for _, a := range s.Addresses {
  519. if a.Type == resolver.GRPCLB {
  520. isGRPCLB = true
  521. break
  522. }
  523. }
  524. if isGRPCLB {
  525. newBalancerName = grpclbName
  526. } else if cc.sc != nil && cc.sc.LB != nil {
  527. newBalancerName = *cc.sc.LB
  528. } else {
  529. newBalancerName = PickFirstBalancerName
  530. }
  531. }
  532. cc.switchBalancer(newBalancerName)
  533. } else if cc.balancerWrapper == nil {
  534. // Balancer dial option was set, and this is the first time handling
  535. // resolved addresses. Build a balancer with dopts.balancerBuilder.
  536. cc.curBalancerName = cc.dopts.balancerBuilder.Name()
  537. cc.balancerWrapper = newCCBalancerWrapper(cc, cc.dopts.balancerBuilder, cc.balancerBuildOpts)
  538. }
  539. cc.balancerWrapper.updateClientConnState(&balancer.ClientConnState{ResolverState: s, BalancerConfig: balCfg})
  540. return nil
  541. }
  542. // switchBalancer starts the switching from current balancer to the balancer
  543. // with the given name.
  544. //
  545. // It will NOT send the current address list to the new balancer. If needed,
  546. // caller of this function should send address list to the new balancer after
  547. // this function returns.
  548. //
  549. // Caller must hold cc.mu.
  550. func (cc *ClientConn) switchBalancer(name string) {
  551. if strings.EqualFold(cc.curBalancerName, name) {
  552. return
  553. }
  554. grpclog.Infof("ClientConn switching balancer to %q", name)
  555. if cc.dopts.balancerBuilder != nil {
  556. grpclog.Infoln("ignoring balancer switching: Balancer DialOption used instead")
  557. return
  558. }
  559. if cc.balancerWrapper != nil {
  560. cc.balancerWrapper.close()
  561. }
  562. builder := balancer.Get(name)
  563. if channelz.IsOn() {
  564. if builder == nil {
  565. channelz.AddTraceEvent(cc.channelzID, &channelz.TraceEventDesc{
  566. Desc: fmt.Sprintf("Channel switches to new LB policy %q due to fallback from invalid balancer name", PickFirstBalancerName),
  567. Severity: channelz.CtWarning,
  568. })
  569. } else {
  570. channelz.AddTraceEvent(cc.channelzID, &channelz.TraceEventDesc{
  571. Desc: fmt.Sprintf("Channel switches to new LB policy %q", name),
  572. Severity: channelz.CtINFO,
  573. })
  574. }
  575. }
  576. if builder == nil {
  577. grpclog.Infof("failed to get balancer builder for: %v, using pick_first instead", name)
  578. builder = newPickfirstBuilder()
  579. }
  580. cc.curBalancerName = builder.Name()
  581. cc.balancerWrapper = newCCBalancerWrapper(cc, builder, cc.balancerBuildOpts)
  582. }
  583. func (cc *ClientConn) handleSubConnStateChange(sc balancer.SubConn, s connectivity.State) {
  584. cc.mu.Lock()
  585. if cc.conns == nil {
  586. cc.mu.Unlock()
  587. return
  588. }
  589. // TODO(bar switching) send updates to all balancer wrappers when balancer
  590. // gracefully switching is supported.
  591. cc.balancerWrapper.handleSubConnStateChange(sc, s)
  592. cc.mu.Unlock()
  593. }
  594. // newAddrConn creates an addrConn for addrs and adds it to cc.conns.
  595. //
  596. // Caller needs to make sure len(addrs) > 0.
  597. func (cc *ClientConn) newAddrConn(addrs []resolver.Address, opts balancer.NewSubConnOptions) (*addrConn, error) {
  598. ac := &addrConn{
  599. cc: cc,
  600. addrs: addrs,
  601. scopts: opts,
  602. dopts: cc.dopts,
  603. czData: new(channelzData),
  604. resetBackoff: make(chan struct{}),
  605. }
  606. ac.ctx, ac.cancel = context.WithCancel(cc.ctx)
  607. // Track ac in cc. This needs to be done before any getTransport(...) is called.
  608. cc.mu.Lock()
  609. if cc.conns == nil {
  610. cc.mu.Unlock()
  611. return nil, ErrClientConnClosing
  612. }
  613. if channelz.IsOn() {
  614. ac.channelzID = channelz.RegisterSubChannel(ac, cc.channelzID, "")
  615. channelz.AddTraceEvent(ac.channelzID, &channelz.TraceEventDesc{
  616. Desc: "Subchannel Created",
  617. Severity: channelz.CtINFO,
  618. Parent: &channelz.TraceEventDesc{
  619. Desc: fmt.Sprintf("Subchannel(id:%d) created", ac.channelzID),
  620. Severity: channelz.CtINFO,
  621. },
  622. })
  623. }
  624. cc.conns[ac] = struct{}{}
  625. cc.mu.Unlock()
  626. return ac, nil
  627. }
  628. // removeAddrConn removes the addrConn in the subConn from clientConn.
  629. // It also tears down the ac with the given error.
  630. func (cc *ClientConn) removeAddrConn(ac *addrConn, err error) {
  631. cc.mu.Lock()
  632. if cc.conns == nil {
  633. cc.mu.Unlock()
  634. return
  635. }
  636. delete(cc.conns, ac)
  637. cc.mu.Unlock()
  638. ac.tearDown(err)
  639. }
  640. func (cc *ClientConn) channelzMetric() *channelz.ChannelInternalMetric {
  641. return &channelz.ChannelInternalMetric{
  642. State: cc.GetState(),
  643. Target: cc.target,
  644. CallsStarted: atomic.LoadInt64(&cc.czData.callsStarted),
  645. CallsSucceeded: atomic.LoadInt64(&cc.czData.callsSucceeded),
  646. CallsFailed: atomic.LoadInt64(&cc.czData.callsFailed),
  647. LastCallStartedTimestamp: time.Unix(0, atomic.LoadInt64(&cc.czData.lastCallStartedTime)),
  648. }
  649. }
  650. // Target returns the target string of the ClientConn.
  651. // This is an EXPERIMENTAL API.
  652. func (cc *ClientConn) Target() string {
  653. return cc.target
  654. }
  655. func (cc *ClientConn) incrCallsStarted() {
  656. atomic.AddInt64(&cc.czData.callsStarted, 1)
  657. atomic.StoreInt64(&cc.czData.lastCallStartedTime, time.Now().UnixNano())
  658. }
  659. func (cc *ClientConn) incrCallsSucceeded() {
  660. atomic.AddInt64(&cc.czData.callsSucceeded, 1)
  661. }
  662. func (cc *ClientConn) incrCallsFailed() {
  663. atomic.AddInt64(&cc.czData.callsFailed, 1)
  664. }
  665. // connect starts creating a transport.
  666. // It does nothing if the ac is not IDLE.
  667. // TODO(bar) Move this to the addrConn section.
  668. func (ac *addrConn) connect() error {
  669. ac.mu.Lock()
  670. if ac.state == connectivity.Shutdown {
  671. ac.mu.Unlock()
  672. return errConnClosing
  673. }
  674. if ac.state != connectivity.Idle {
  675. ac.mu.Unlock()
  676. return nil
  677. }
  678. // Update connectivity state within the lock to prevent subsequent or
  679. // concurrent calls from resetting the transport more than once.
  680. ac.updateConnectivityState(connectivity.Connecting)
  681. ac.mu.Unlock()
  682. // Start a goroutine connecting to the server asynchronously.
  683. go ac.resetTransport()
  684. return nil
  685. }
  686. // tryUpdateAddrs tries to update ac.addrs with the new addresses list.
  687. //
  688. // If ac is Connecting, it returns false. The caller should tear down the ac and
  689. // create a new one. Note that the backoff will be reset when this happens.
  690. //
  691. // If ac is TransientFailure, it updates ac.addrs and returns true. The updated
  692. // addresses will be picked up by retry in the next iteration after backoff.
  693. //
  694. // If ac is Shutdown or Idle, it updates ac.addrs and returns true.
  695. //
  696. // If ac is Ready, it checks whether current connected address of ac is in the
  697. // new addrs list.
  698. // - If true, it updates ac.addrs and returns true. The ac will keep using
  699. // the existing connection.
  700. // - If false, it does nothing and returns false.
  701. func (ac *addrConn) tryUpdateAddrs(addrs []resolver.Address) bool {
  702. ac.mu.Lock()
  703. defer ac.mu.Unlock()
  704. grpclog.Infof("addrConn: tryUpdateAddrs curAddr: %v, addrs: %v", ac.curAddr, addrs)
  705. if ac.state == connectivity.Shutdown ||
  706. ac.state == connectivity.TransientFailure ||
  707. ac.state == connectivity.Idle {
  708. ac.addrs = addrs
  709. return true
  710. }
  711. if ac.state == connectivity.Connecting {
  712. return false
  713. }
  714. // ac.state is Ready, try to find the connected address.
  715. var curAddrFound bool
  716. for _, a := range addrs {
  717. if reflect.DeepEqual(ac.curAddr, a) {
  718. curAddrFound = true
  719. break
  720. }
  721. }
  722. grpclog.Infof("addrConn: tryUpdateAddrs curAddrFound: %v", curAddrFound)
  723. if curAddrFound {
  724. ac.addrs = addrs
  725. }
  726. return curAddrFound
  727. }
  728. // GetMethodConfig gets the method config of the input method.
  729. // If there's an exact match for input method (i.e. /service/method), we return
  730. // the corresponding MethodConfig.
  731. // If there isn't an exact match for the input method, we look for the default config
  732. // under the service (i.e /service/). If there is a default MethodConfig for
  733. // the service, we return it.
  734. // Otherwise, we return an empty MethodConfig.
  735. func (cc *ClientConn) GetMethodConfig(method string) MethodConfig {
  736. // TODO: Avoid the locking here.
  737. cc.mu.RLock()
  738. defer cc.mu.RUnlock()
  739. if cc.sc == nil {
  740. return MethodConfig{}
  741. }
  742. m, ok := cc.sc.Methods[method]
  743. if !ok {
  744. i := strings.LastIndex(method, "/")
  745. m = cc.sc.Methods[method[:i+1]]
  746. }
  747. return m
  748. }
  749. func (cc *ClientConn) healthCheckConfig() *healthCheckConfig {
  750. cc.mu.RLock()
  751. defer cc.mu.RUnlock()
  752. if cc.sc == nil {
  753. return nil
  754. }
  755. return cc.sc.healthCheckConfig
  756. }
  757. func (cc *ClientConn) getTransport(ctx context.Context, failfast bool, method string) (transport.ClientTransport, func(balancer.DoneInfo), error) {
  758. t, done, err := cc.blockingpicker.pick(ctx, failfast, balancer.PickOptions{
  759. FullMethodName: method,
  760. })
  761. if err != nil {
  762. return nil, nil, toRPCErr(err)
  763. }
  764. return t, done, nil
  765. }
  766. func (cc *ClientConn) applyServiceConfig(sc *ServiceConfig) error {
  767. if sc == nil {
  768. // should never reach here.
  769. return fmt.Errorf("got nil pointer for service config")
  770. }
  771. cc.sc = sc
  772. if cc.sc.retryThrottling != nil {
  773. newThrottler := &retryThrottler{
  774. tokens: cc.sc.retryThrottling.MaxTokens,
  775. max: cc.sc.retryThrottling.MaxTokens,
  776. thresh: cc.sc.retryThrottling.MaxTokens / 2,
  777. ratio: cc.sc.retryThrottling.TokenRatio,
  778. }
  779. cc.retryThrottler.Store(newThrottler)
  780. } else {
  781. cc.retryThrottler.Store((*retryThrottler)(nil))
  782. }
  783. return nil
  784. }
  785. func (cc *ClientConn) resolveNow(o resolver.ResolveNowOption) {
  786. cc.mu.RLock()
  787. r := cc.resolverWrapper
  788. cc.mu.RUnlock()
  789. if r == nil {
  790. return
  791. }
  792. go r.resolveNow(o)
  793. }
  794. // ResetConnectBackoff wakes up all subchannels in transient failure and causes
  795. // them to attempt another connection immediately. It also resets the backoff
  796. // times used for subsequent attempts regardless of the current state.
  797. //
  798. // In general, this function should not be used. Typical service or network
  799. // outages result in a reasonable client reconnection strategy by default.
  800. // However, if a previously unavailable network becomes available, this may be
  801. // used to trigger an immediate reconnect.
  802. //
  803. // This API is EXPERIMENTAL.
  804. func (cc *ClientConn) ResetConnectBackoff() {
  805. cc.mu.Lock()
  806. defer cc.mu.Unlock()
  807. for ac := range cc.conns {
  808. ac.resetConnectBackoff()
  809. }
  810. }
  811. // Close tears down the ClientConn and all underlying connections.
  812. func (cc *ClientConn) Close() error {
  813. defer cc.cancel()
  814. cc.mu.Lock()
  815. if cc.conns == nil {
  816. cc.mu.Unlock()
  817. return ErrClientConnClosing
  818. }
  819. conns := cc.conns
  820. cc.conns = nil
  821. cc.csMgr.updateState(connectivity.Shutdown)
  822. rWrapper := cc.resolverWrapper
  823. cc.resolverWrapper = nil
  824. bWrapper := cc.balancerWrapper
  825. cc.balancerWrapper = nil
  826. cc.mu.Unlock()
  827. cc.blockingpicker.close()
  828. if rWrapper != nil {
  829. rWrapper.close()
  830. }
  831. if bWrapper != nil {
  832. bWrapper.close()
  833. }
  834. for ac := range conns {
  835. ac.tearDown(ErrClientConnClosing)
  836. }
  837. if channelz.IsOn() {
  838. ted := &channelz.TraceEventDesc{
  839. Desc: "Channel Deleted",
  840. Severity: channelz.CtINFO,
  841. }
  842. if cc.dopts.channelzParentID != 0 {
  843. ted.Parent = &channelz.TraceEventDesc{
  844. Desc: fmt.Sprintf("Nested channel(id:%d) deleted", cc.channelzID),
  845. Severity: channelz.CtINFO,
  846. }
  847. }
  848. channelz.AddTraceEvent(cc.channelzID, ted)
  849. // TraceEvent needs to be called before RemoveEntry, as TraceEvent may add trace reference to
  850. // the entity being deleted, and thus prevent it from being deleted right away.
  851. channelz.RemoveEntry(cc.channelzID)
  852. }
  853. return nil
  854. }
  855. // addrConn is a network connection to a given address.
  856. type addrConn struct {
  857. ctx context.Context
  858. cancel context.CancelFunc
  859. cc *ClientConn
  860. dopts dialOptions
  861. acbw balancer.SubConn
  862. scopts balancer.NewSubConnOptions
  863. // transport is set when there's a viable transport (note: ac state may not be READY as LB channel
  864. // health checking may require server to report healthy to set ac to READY), and is reset
  865. // to nil when the current transport should no longer be used to create a stream (e.g. after GoAway
  866. // is received, transport is closed, ac has been torn down).
  867. transport transport.ClientTransport // The current transport.
  868. mu sync.Mutex
  869. curAddr resolver.Address // The current address.
  870. addrs []resolver.Address // All addresses that the resolver resolved to.
  871. // Use updateConnectivityState for updating addrConn's connectivity state.
  872. state connectivity.State
  873. backoffIdx int // Needs to be stateful for resetConnectBackoff.
  874. resetBackoff chan struct{}
  875. channelzID int64 // channelz unique identification number.
  876. czData *channelzData
  877. }
  878. // Note: this requires a lock on ac.mu.
  879. func (ac *addrConn) updateConnectivityState(s connectivity.State) {
  880. if ac.state == s {
  881. return
  882. }
  883. updateMsg := fmt.Sprintf("Subchannel Connectivity change to %v", s)
  884. ac.state = s
  885. if channelz.IsOn() {
  886. channelz.AddTraceEvent(ac.channelzID, &channelz.TraceEventDesc{
  887. Desc: updateMsg,
  888. Severity: channelz.CtINFO,
  889. })
  890. }
  891. ac.cc.handleSubConnStateChange(ac.acbw, s)
  892. }
  893. // adjustParams updates parameters used to create transports upon
  894. // receiving a GoAway.
  895. func (ac *addrConn) adjustParams(r transport.GoAwayReason) {
  896. switch r {
  897. case transport.GoAwayTooManyPings:
  898. v := 2 * ac.dopts.copts.KeepaliveParams.Time
  899. ac.cc.mu.Lock()
  900. if v > ac.cc.mkp.Time {
  901. ac.cc.mkp.Time = v
  902. }
  903. ac.cc.mu.Unlock()
  904. }
  905. }
  906. func (ac *addrConn) resetTransport() {
  907. for i := 0; ; i++ {
  908. if i > 0 {
  909. ac.cc.resolveNow(resolver.ResolveNowOption{})
  910. }
  911. ac.mu.Lock()
  912. if ac.state == connectivity.Shutdown {
  913. ac.mu.Unlock()
  914. return
  915. }
  916. addrs := ac.addrs
  917. backoffFor := ac.dopts.bs.Backoff(ac.backoffIdx)
  918. // This will be the duration that dial gets to finish.
  919. dialDuration := minConnectTimeout
  920. if ac.dopts.minConnectTimeout != nil {
  921. dialDuration = ac.dopts.minConnectTimeout()
  922. }
  923. if dialDuration < backoffFor {
  924. // Give dial more time as we keep failing to connect.
  925. dialDuration = backoffFor
  926. }
  927. // We can potentially spend all the time trying the first address, and
  928. // if the server accepts the connection and then hangs, the following
  929. // addresses will never be tried.
  930. //
  931. // The spec doesn't mention what should be done for multiple addresses.
  932. // https://github.com/grpc/grpc/blob/master/doc/connection-backoff.md#proposed-backoff-algorithm
  933. connectDeadline := time.Now().Add(dialDuration)
  934. ac.updateConnectivityState(connectivity.Connecting)
  935. ac.transport = nil
  936. ac.mu.Unlock()
  937. newTr, addr, reconnect, err := ac.tryAllAddrs(addrs, connectDeadline)
  938. if err != nil {
  939. // After exhausting all addresses, the addrConn enters
  940. // TRANSIENT_FAILURE.
  941. ac.mu.Lock()
  942. if ac.state == connectivity.Shutdown {
  943. ac.mu.Unlock()
  944. return
  945. }
  946. ac.updateConnectivityState(connectivity.TransientFailure)
  947. // Backoff.
  948. b := ac.resetBackoff
  949. ac.mu.Unlock()
  950. timer := time.NewTimer(backoffFor)
  951. select {
  952. case <-timer.C:
  953. ac.mu.Lock()
  954. ac.backoffIdx++
  955. ac.mu.Unlock()
  956. case <-b:
  957. timer.Stop()
  958. case <-ac.ctx.Done():
  959. timer.Stop()
  960. return
  961. }
  962. continue
  963. }
  964. ac.mu.Lock()
  965. if ac.state == connectivity.Shutdown {
  966. ac.mu.Unlock()
  967. newTr.Close()
  968. return
  969. }
  970. ac.curAddr = addr
  971. ac.transport = newTr
  972. ac.backoffIdx = 0
  973. hctx, hcancel := context.WithCancel(ac.ctx)
  974. ac.startHealthCheck(hctx)
  975. ac.mu.Unlock()
  976. // Block until the created transport is down. And when this happens,
  977. // we restart from the top of the addr list.
  978. <-reconnect.Done()
  979. hcancel()
  980. // restart connecting - the top of the loop will set state to
  981. // CONNECTING. This is against the current connectivity semantics doc,
  982. // however it allows for graceful behavior for RPCs not yet dispatched
  983. // - unfortunate timing would otherwise lead to the RPC failing even
  984. // though the TRANSIENT_FAILURE state (called for by the doc) would be
  985. // instantaneous.
  986. //
  987. // Ideally we should transition to Idle here and block until there is
  988. // RPC activity that leads to the balancer requesting a reconnect of
  989. // the associated SubConn.
  990. }
  991. }
  992. // tryAllAddrs tries to creates a connection to the addresses, and stop when at the
  993. // first successful one. It returns the transport, the address and a Event in
  994. // the successful case. The Event fires when the returned transport disconnects.
  995. func (ac *addrConn) tryAllAddrs(addrs []resolver.Address, connectDeadline time.Time) (transport.ClientTransport, resolver.Address, *grpcsync.Event, error) {
  996. for _, addr := range addrs {
  997. ac.mu.Lock()
  998. if ac.state == connectivity.Shutdown {
  999. ac.mu.Unlock()
  1000. return nil, resolver.Address{}, nil, errConnClosing
  1001. }
  1002. ac.cc.mu.RLock()
  1003. ac.dopts.copts.KeepaliveParams = ac.cc.mkp
  1004. ac.cc.mu.RUnlock()
  1005. copts := ac.dopts.copts
  1006. if ac.scopts.CredsBundle != nil {
  1007. copts.CredsBundle = ac.scopts.CredsBundle
  1008. }
  1009. ac.mu.Unlock()
  1010. if channelz.IsOn() {
  1011. channelz.AddTraceEvent(ac.channelzID, &channelz.TraceEventDesc{
  1012. Desc: fmt.Sprintf("Subchannel picks a new address %q to connect", addr.Addr),
  1013. Severity: channelz.CtINFO,
  1014. })
  1015. }
  1016. newTr, reconnect, err := ac.createTransport(addr, copts, connectDeadline)
  1017. if err == nil {
  1018. return newTr, addr, reconnect, nil
  1019. }
  1020. ac.cc.blockingpicker.updateConnectionError(err)
  1021. }
  1022. // Couldn't connect to any address.
  1023. return nil, resolver.Address{}, nil, fmt.Errorf("couldn't connect to any address")
  1024. }
  1025. // createTransport creates a connection to addr. It returns the transport and a
  1026. // Event in the successful case. The Event fires when the returned transport
  1027. // disconnects.
  1028. func (ac *addrConn) createTransport(addr resolver.Address, copts transport.ConnectOptions, connectDeadline time.Time) (transport.ClientTransport, *grpcsync.Event, error) {
  1029. prefaceReceived := make(chan struct{})
  1030. onCloseCalled := make(chan struct{})
  1031. reconnect := grpcsync.NewEvent()
  1032. target := transport.TargetInfo{
  1033. Addr: addr.Addr,
  1034. Metadata: addr.Metadata,
  1035. Authority: ac.cc.authority,
  1036. }
  1037. once := sync.Once{}
  1038. onGoAway := func(r transport.GoAwayReason) {
  1039. ac.mu.Lock()
  1040. ac.adjustParams(r)
  1041. once.Do(func() {
  1042. if ac.state == connectivity.Ready {
  1043. // Prevent this SubConn from being used for new RPCs by setting its
  1044. // state to Connecting.
  1045. //
  1046. // TODO: this should be Idle when grpc-go properly supports it.
  1047. ac.updateConnectivityState(connectivity.Connecting)
  1048. }
  1049. })
  1050. ac.mu.Unlock()
  1051. reconnect.Fire()
  1052. }
  1053. onClose := func() {
  1054. ac.mu.Lock()
  1055. once.Do(func() {
  1056. if ac.state == connectivity.Ready {
  1057. // Prevent this SubConn from being used for new RPCs by setting its
  1058. // state to Connecting.
  1059. //
  1060. // TODO: this should be Idle when grpc-go properly supports it.
  1061. ac.updateConnectivityState(connectivity.Connecting)
  1062. }
  1063. })
  1064. ac.mu.Unlock()
  1065. close(onCloseCalled)
  1066. reconnect.Fire()
  1067. }
  1068. onPrefaceReceipt := func() {
  1069. close(prefaceReceived)
  1070. }
  1071. connectCtx, cancel := context.WithDeadline(ac.ctx, connectDeadline)
  1072. defer cancel()
  1073. if channelz.IsOn() {
  1074. copts.ChannelzParentID = ac.channelzID
  1075. }
  1076. newTr, err := transport.NewClientTransport(connectCtx, ac.cc.ctx, target, copts, onPrefaceReceipt, onGoAway, onClose)
  1077. if err != nil {
  1078. // newTr is either nil, or closed.
  1079. grpclog.Warningf("grpc: addrConn.createTransport failed to connect to %v. Err :%v. Reconnecting...", addr, err)
  1080. return nil, nil, err
  1081. }
  1082. select {
  1083. case <-time.After(connectDeadline.Sub(time.Now())):
  1084. // We didn't get the preface in time.
  1085. newTr.Close()
  1086. grpclog.Warningf("grpc: addrConn.createTransport failed to connect to %v: didn't receive server preface in time. Reconnecting...", addr)
  1087. return nil, nil, errors.New("timed out waiting for server handshake")
  1088. case <-prefaceReceived:
  1089. // We got the preface - huzzah! things are good.
  1090. case <-onCloseCalled:
  1091. // The transport has already closed - noop.
  1092. return nil, nil, errors.New("connection closed")
  1093. // TODO(deklerk) this should bail on ac.ctx.Done(). Add a test and fix.
  1094. }
  1095. return newTr, reconnect, nil
  1096. }
  1097. // startHealthCheck starts the health checking stream (RPC) to watch the health
  1098. // stats of this connection if health checking is requested and configured.
  1099. //
  1100. // LB channel health checking is enabled when all requirements below are met:
  1101. // 1. it is not disabled by the user with the WithDisableHealthCheck DialOption
  1102. // 2. internal.HealthCheckFunc is set by importing the grpc/healthcheck package
  1103. // 3. a service config with non-empty healthCheckConfig field is provided
  1104. // 4. the load balancer requests it
  1105. //
  1106. // It sets addrConn to READY if the health checking stream is not started.
  1107. //
  1108. // Caller must hold ac.mu.
  1109. func (ac *addrConn) startHealthCheck(ctx context.Context) {
  1110. var healthcheckManagingState bool
  1111. defer func() {
  1112. if !healthcheckManagingState {
  1113. ac.updateConnectivityState(connectivity.Ready)
  1114. }
  1115. }()
  1116. if ac.cc.dopts.disableHealthCheck {
  1117. return
  1118. }
  1119. healthCheckConfig := ac.cc.healthCheckConfig()
  1120. if healthCheckConfig == nil {
  1121. return
  1122. }
  1123. if !ac.scopts.HealthCheckEnabled {
  1124. return
  1125. }
  1126. healthCheckFunc := ac.cc.dopts.healthCheckFunc
  1127. if healthCheckFunc == nil {
  1128. // The health package is not imported to set health check function.
  1129. //
  1130. // TODO: add a link to the health check doc in the error message.
  1131. grpclog.Error("Health check is requested but health check function is not set.")
  1132. return
  1133. }
  1134. healthcheckManagingState = true
  1135. // Set up the health check helper functions.
  1136. currentTr := ac.transport
  1137. newStream := func(method string) (interface{}, error) {
  1138. ac.mu.Lock()
  1139. if ac.transport != currentTr {
  1140. ac.mu.Unlock()
  1141. return nil, status.Error(codes.Canceled, "the provided transport is no longer valid to use")
  1142. }
  1143. ac.mu.Unlock()
  1144. return newNonRetryClientStream(ctx, &StreamDesc{ServerStreams: true}, method, currentTr, ac)
  1145. }
  1146. setConnectivityState := func(s connectivity.State) {
  1147. ac.mu.Lock()
  1148. defer ac.mu.Unlock()
  1149. if ac.transport != currentTr {
  1150. return
  1151. }
  1152. ac.updateConnectivityState(s)
  1153. }
  1154. // Start the health checking stream.
  1155. go func() {
  1156. err := ac.cc.dopts.healthCheckFunc(ctx, newStream, setConnectivityState, healthCheckConfig.ServiceName)
  1157. if err != nil {
  1158. if status.Code(err) == codes.Unimplemented {
  1159. if channelz.IsOn() {
  1160. channelz.AddTraceEvent(ac.channelzID, &channelz.TraceEventDesc{
  1161. Desc: "Subchannel health check is unimplemented at server side, thus health check is disabled",
  1162. Severity: channelz.CtError,
  1163. })
  1164. }
  1165. grpclog.Error("Subchannel health check is unimplemented at server side, thus health check is disabled")
  1166. } else {
  1167. grpclog.Errorf("HealthCheckFunc exits with unexpected error %v", err)
  1168. }
  1169. }
  1170. }()
  1171. }
  1172. func (ac *addrConn) resetConnectBackoff() {
  1173. ac.mu.Lock()
  1174. close(ac.resetBackoff)
  1175. ac.backoffIdx = 0
  1176. ac.resetBackoff = make(chan struct{})
  1177. ac.mu.Unlock()
  1178. }
  1179. // getReadyTransport returns the transport if ac's state is READY.
  1180. // Otherwise it returns nil, false.
  1181. // If ac's state is IDLE, it will trigger ac to connect.
  1182. func (ac *addrConn) getReadyTransport() (transport.ClientTransport, bool) {
  1183. ac.mu.Lock()
  1184. if ac.state == connectivity.Ready && ac.transport != nil {
  1185. t := ac.transport
  1186. ac.mu.Unlock()
  1187. return t, true
  1188. }
  1189. var idle bool
  1190. if ac.state == connectivity.Idle {
  1191. idle = true
  1192. }
  1193. ac.mu.Unlock()
  1194. // Trigger idle ac to connect.
  1195. if idle {
  1196. ac.connect()
  1197. }
  1198. return nil, false
  1199. }
  1200. // tearDown starts to tear down the addrConn.
  1201. // TODO(zhaoq): Make this synchronous to avoid unbounded memory consumption in
  1202. // some edge cases (e.g., the caller opens and closes many addrConn's in a
  1203. // tight loop.
  1204. // tearDown doesn't remove ac from ac.cc.conns.
  1205. func (ac *addrConn) tearDown(err error) {
  1206. ac.mu.Lock()
  1207. if ac.state == connectivity.Shutdown {
  1208. ac.mu.Unlock()
  1209. return
  1210. }
  1211. curTr := ac.transport
  1212. ac.transport = nil
  1213. // We have to set the state to Shutdown before anything else to prevent races
  1214. // between setting the state and logic that waits on context cancelation / etc.
  1215. ac.updateConnectivityState(connectivity.Shutdown)
  1216. ac.cancel()
  1217. ac.curAddr = resolver.Address{}
  1218. if err == errConnDrain && curTr != nil {
  1219. // GracefulClose(...) may be executed multiple times when
  1220. // i) receiving multiple GoAway frames from the server; or
  1221. // ii) there are concurrent name resolver/Balancer triggered
  1222. // address removal and GoAway.
  1223. // We have to unlock and re-lock here because GracefulClose => Close => onClose, which requires locking ac.mu.
  1224. ac.mu.Unlock()
  1225. curTr.GracefulClose()
  1226. ac.mu.Lock()
  1227. }
  1228. if channelz.IsOn() {
  1229. channelz.AddTraceEvent(ac.channelzID, &channelz.TraceEventDesc{
  1230. Desc: "Subchannel Deleted",
  1231. Severity: channelz.CtINFO,
  1232. Parent: &channelz.TraceEventDesc{
  1233. Desc: fmt.Sprintf("Subchanel(id:%d) deleted", ac.channelzID),
  1234. Severity: channelz.CtINFO,
  1235. },
  1236. })
  1237. // TraceEvent needs to be called before RemoveEntry, as TraceEvent may add trace reference to
  1238. // the entity beng deleted, and thus prevent it from being deleted right away.
  1239. channelz.RemoveEntry(ac.channelzID)
  1240. }
  1241. ac.mu.Unlock()
  1242. }
  1243. func (ac *addrConn) getState() connectivity.State {
  1244. ac.mu.Lock()
  1245. defer ac.mu.Unlock()
  1246. return ac.state
  1247. }
  1248. func (ac *addrConn) ChannelzMetric() *channelz.ChannelInternalMetric {
  1249. ac.mu.Lock()
  1250. addr := ac.curAddr.Addr
  1251. ac.mu.Unlock()
  1252. return &channelz.ChannelInternalMetric{
  1253. State: ac.getState(),
  1254. Target: addr,
  1255. CallsStarted: atomic.LoadInt64(&ac.czData.callsStarted),
  1256. CallsSucceeded: atomic.LoadInt64(&ac.czData.callsSucceeded),
  1257. CallsFailed: atomic.LoadInt64(&ac.czData.callsFailed),
  1258. LastCallStartedTimestamp: time.Unix(0, atomic.LoadInt64(&ac.czData.lastCallStartedTime)),
  1259. }
  1260. }
  1261. func (ac *addrConn) incrCallsStarted() {
  1262. atomic.AddInt64(&ac.czData.callsStarted, 1)
  1263. atomic.StoreInt64(&ac.czData.lastCallStartedTime, time.Now().UnixNano())
  1264. }
  1265. func (ac *addrConn) incrCallsSucceeded() {
  1266. atomic.AddInt64(&ac.czData.callsSucceeded, 1)
  1267. }
  1268. func (ac *addrConn) incrCallsFailed() {
  1269. atomic.AddInt64(&ac.czData.callsFailed, 1)
  1270. }
  1271. type retryThrottler struct {
  1272. max float64
  1273. thresh float64
  1274. ratio float64
  1275. mu sync.Mutex
  1276. tokens float64 // TODO(dfawley): replace with atomic and remove lock.
  1277. }
  1278. // throttle subtracts a retry token from the pool and returns whether a retry
  1279. // should be throttled (disallowed) based upon the retry throttling policy in
  1280. // the service config.
  1281. func (rt *retryThrottler) throttle() bool {
  1282. if rt == nil {
  1283. return false
  1284. }
  1285. rt.mu.Lock()
  1286. defer rt.mu.Unlock()
  1287. rt.tokens--
  1288. if rt.tokens < 0 {
  1289. rt.tokens = 0
  1290. }
  1291. return rt.tokens <= rt.thresh
  1292. }
  1293. func (rt *retryThrottler) successfulRPC() {
  1294. if rt == nil {
  1295. return
  1296. }
  1297. rt.mu.Lock()
  1298. defer rt.mu.Unlock()
  1299. rt.tokens += rt.ratio
  1300. if rt.tokens > rt.max {
  1301. rt.tokens = rt.max
  1302. }
  1303. }
  1304. type channelzChannel struct {
  1305. cc *ClientConn
  1306. }
  1307. func (c *channelzChannel) ChannelzMetric() *channelz.ChannelInternalMetric {
  1308. return c.cc.channelzMetric()
  1309. }
  1310. // ErrClientConnTimeout indicates that the ClientConn cannot establish the
  1311. // underlying connections within the specified timeout.
  1312. //
  1313. // Deprecated: This error is never returned by grpc and should not be
  1314. // referenced by users.
  1315. var ErrClientConnTimeout = errors.New("grpc: timed out when dialing")