retry.go 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471
  1. // Copyright 2016 The etcd Authors
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package clientv3
  15. import (
  16. "github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
  17. pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
  18. "golang.org/x/net/context"
  19. "google.golang.org/grpc"
  20. "google.golang.org/grpc/codes"
  21. "google.golang.org/grpc/status"
  22. )
  23. type rpcFunc func(ctx context.Context) error
  24. type retryRPCFunc func(context.Context, rpcFunc) error
  25. type retryStopErrFunc func(error) bool
  26. func isRepeatableStopError(err error) bool {
  27. eErr := rpctypes.Error(err)
  28. // always stop retry on etcd errors
  29. if serverErr, ok := eErr.(rpctypes.EtcdError); ok && serverErr.Code() != codes.Unavailable {
  30. return true
  31. }
  32. // only retry if unavailable
  33. ev, _ := status.FromError(err)
  34. return ev.Code() != codes.Unavailable
  35. }
  36. func isNonRepeatableStopError(err error) bool {
  37. ev, _ := status.FromError(err)
  38. if ev.Code() != codes.Unavailable {
  39. return true
  40. }
  41. desc := rpctypes.ErrorDesc(err)
  42. return desc != "there is no address available" && desc != "there is no connection available"
  43. }
  44. func (c *Client) newRetryWrapper(isStop retryStopErrFunc) retryRPCFunc {
  45. return func(rpcCtx context.Context, f rpcFunc) error {
  46. for {
  47. if err := readyWait(rpcCtx, c.ctx, c.balancer.ConnectNotify()); err != nil {
  48. return err
  49. }
  50. pinned := c.balancer.pinned()
  51. err := f(rpcCtx)
  52. if err == nil {
  53. return nil
  54. }
  55. if logger.V(4) {
  56. logger.Infof("clientv3/retry: error %q on pinned endpoint %q", err.Error(), pinned)
  57. }
  58. if s, ok := status.FromError(err); ok && (s.Code() == codes.Unavailable || s.Code() == codes.DeadlineExceeded || s.Code() == codes.Internal) {
  59. // mark this before endpoint switch is triggered
  60. c.balancer.hostPortError(pinned, err)
  61. c.balancer.next()
  62. if logger.V(4) {
  63. logger.Infof("clientv3/retry: switching from %q due to error %q", pinned, err.Error())
  64. }
  65. }
  66. if isStop(err) {
  67. return err
  68. }
  69. }
  70. }
  71. }
  72. func (c *Client) newAuthRetryWrapper() retryRPCFunc {
  73. return func(rpcCtx context.Context, f rpcFunc) error {
  74. for {
  75. pinned := c.balancer.pinned()
  76. err := f(rpcCtx)
  77. if err == nil {
  78. return nil
  79. }
  80. if logger.V(4) {
  81. logger.Infof("clientv3/auth-retry: error %q on pinned endpoint %q", err.Error(), pinned)
  82. }
  83. // always stop retry on etcd errors other than invalid auth token
  84. if rpctypes.Error(err) == rpctypes.ErrInvalidAuthToken {
  85. gterr := c.getToken(rpcCtx)
  86. if gterr != nil {
  87. if logger.V(4) {
  88. logger.Infof("clientv3/auth-retry: cannot retry due to error %q(%q) on pinned endpoint %q", err.Error(), gterr.Error(), pinned)
  89. }
  90. return err // return the original error for simplicity
  91. }
  92. continue
  93. }
  94. return err
  95. }
  96. }
  97. }
  98. // RetryKVClient implements a KVClient.
  99. func RetryKVClient(c *Client) pb.KVClient {
  100. repeatableRetry := c.newRetryWrapper(isRepeatableStopError)
  101. nonRepeatableRetry := c.newRetryWrapper(isNonRepeatableStopError)
  102. conn := pb.NewKVClient(c.conn)
  103. retryBasic := &retryKVClient{&nonRepeatableKVClient{conn, nonRepeatableRetry}, repeatableRetry}
  104. retryAuthWrapper := c.newAuthRetryWrapper()
  105. return &retryKVClient{
  106. &nonRepeatableKVClient{retryBasic, retryAuthWrapper},
  107. retryAuthWrapper}
  108. }
  109. type retryKVClient struct {
  110. *nonRepeatableKVClient
  111. repeatableRetry retryRPCFunc
  112. }
  113. func (rkv *retryKVClient) Range(ctx context.Context, in *pb.RangeRequest, opts ...grpc.CallOption) (resp *pb.RangeResponse, err error) {
  114. err = rkv.repeatableRetry(ctx, func(rctx context.Context) error {
  115. resp, err = rkv.kc.Range(rctx, in, opts...)
  116. return err
  117. })
  118. return resp, err
  119. }
  120. type nonRepeatableKVClient struct {
  121. kc pb.KVClient
  122. nonRepeatableRetry retryRPCFunc
  123. }
  124. func (rkv *nonRepeatableKVClient) Put(ctx context.Context, in *pb.PutRequest, opts ...grpc.CallOption) (resp *pb.PutResponse, err error) {
  125. err = rkv.nonRepeatableRetry(ctx, func(rctx context.Context) error {
  126. resp, err = rkv.kc.Put(rctx, in, opts...)
  127. return err
  128. })
  129. return resp, err
  130. }
  131. func (rkv *nonRepeatableKVClient) DeleteRange(ctx context.Context, in *pb.DeleteRangeRequest, opts ...grpc.CallOption) (resp *pb.DeleteRangeResponse, err error) {
  132. err = rkv.nonRepeatableRetry(ctx, func(rctx context.Context) error {
  133. resp, err = rkv.kc.DeleteRange(rctx, in, opts...)
  134. return err
  135. })
  136. return resp, err
  137. }
  138. func (rkv *nonRepeatableKVClient) Txn(ctx context.Context, in *pb.TxnRequest, opts ...grpc.CallOption) (resp *pb.TxnResponse, err error) {
  139. // TODO: repeatableRetry if read-only txn
  140. err = rkv.nonRepeatableRetry(ctx, func(rctx context.Context) error {
  141. resp, err = rkv.kc.Txn(rctx, in, opts...)
  142. return err
  143. })
  144. return resp, err
  145. }
  146. func (rkv *nonRepeatableKVClient) Compact(ctx context.Context, in *pb.CompactionRequest, opts ...grpc.CallOption) (resp *pb.CompactionResponse, err error) {
  147. err = rkv.nonRepeatableRetry(ctx, func(rctx context.Context) error {
  148. resp, err = rkv.kc.Compact(rctx, in, opts...)
  149. return err
  150. })
  151. return resp, err
  152. }
  153. type retryLeaseClient struct {
  154. lc pb.LeaseClient
  155. repeatableRetry retryRPCFunc
  156. }
  157. // RetryLeaseClient implements a LeaseClient.
  158. func RetryLeaseClient(c *Client) pb.LeaseClient {
  159. retry := &retryLeaseClient{
  160. pb.NewLeaseClient(c.conn),
  161. c.newRetryWrapper(isRepeatableStopError),
  162. }
  163. return &retryLeaseClient{retry, c.newAuthRetryWrapper()}
  164. }
  165. func (rlc *retryLeaseClient) LeaseTimeToLive(ctx context.Context, in *pb.LeaseTimeToLiveRequest, opts ...grpc.CallOption) (resp *pb.LeaseTimeToLiveResponse, err error) {
  166. err = rlc.repeatableRetry(ctx, func(rctx context.Context) error {
  167. resp, err = rlc.lc.LeaseTimeToLive(rctx, in, opts...)
  168. return err
  169. })
  170. return resp, err
  171. }
  172. func (rlc *retryLeaseClient) LeaseGrant(ctx context.Context, in *pb.LeaseGrantRequest, opts ...grpc.CallOption) (resp *pb.LeaseGrantResponse, err error) {
  173. err = rlc.repeatableRetry(ctx, func(rctx context.Context) error {
  174. resp, err = rlc.lc.LeaseGrant(rctx, in, opts...)
  175. return err
  176. })
  177. return resp, err
  178. }
  179. func (rlc *retryLeaseClient) LeaseRevoke(ctx context.Context, in *pb.LeaseRevokeRequest, opts ...grpc.CallOption) (resp *pb.LeaseRevokeResponse, err error) {
  180. err = rlc.repeatableRetry(ctx, func(rctx context.Context) error {
  181. resp, err = rlc.lc.LeaseRevoke(rctx, in, opts...)
  182. return err
  183. })
  184. return resp, err
  185. }
  186. func (rlc *retryLeaseClient) LeaseKeepAlive(ctx context.Context, opts ...grpc.CallOption) (stream pb.Lease_LeaseKeepAliveClient, err error) {
  187. err = rlc.repeatableRetry(ctx, func(rctx context.Context) error {
  188. stream, err = rlc.lc.LeaseKeepAlive(rctx, opts...)
  189. return err
  190. })
  191. return stream, err
  192. }
  193. type retryClusterClient struct {
  194. *nonRepeatableClusterClient
  195. repeatableRetry retryRPCFunc
  196. }
  197. // RetryClusterClient implements a ClusterClient.
  198. func RetryClusterClient(c *Client) pb.ClusterClient {
  199. repeatableRetry := c.newRetryWrapper(isRepeatableStopError)
  200. nonRepeatableRetry := c.newRetryWrapper(isNonRepeatableStopError)
  201. cc := pb.NewClusterClient(c.conn)
  202. return &retryClusterClient{&nonRepeatableClusterClient{cc, nonRepeatableRetry}, repeatableRetry}
  203. }
  204. func (rcc *retryClusterClient) MemberList(ctx context.Context, in *pb.MemberListRequest, opts ...grpc.CallOption) (resp *pb.MemberListResponse, err error) {
  205. err = rcc.repeatableRetry(ctx, func(rctx context.Context) error {
  206. resp, err = rcc.cc.MemberList(rctx, in, opts...)
  207. return err
  208. })
  209. return resp, err
  210. }
  211. type nonRepeatableClusterClient struct {
  212. cc pb.ClusterClient
  213. nonRepeatableRetry retryRPCFunc
  214. }
  215. func (rcc *nonRepeatableClusterClient) MemberAdd(ctx context.Context, in *pb.MemberAddRequest, opts ...grpc.CallOption) (resp *pb.MemberAddResponse, err error) {
  216. err = rcc.nonRepeatableRetry(ctx, func(rctx context.Context) error {
  217. resp, err = rcc.cc.MemberAdd(rctx, in, opts...)
  218. return err
  219. })
  220. return resp, err
  221. }
  222. func (rcc *nonRepeatableClusterClient) MemberRemove(ctx context.Context, in *pb.MemberRemoveRequest, opts ...grpc.CallOption) (resp *pb.MemberRemoveResponse, err error) {
  223. err = rcc.nonRepeatableRetry(ctx, func(rctx context.Context) error {
  224. resp, err = rcc.cc.MemberRemove(rctx, in, opts...)
  225. return err
  226. })
  227. return resp, err
  228. }
  229. func (rcc *nonRepeatableClusterClient) MemberUpdate(ctx context.Context, in *pb.MemberUpdateRequest, opts ...grpc.CallOption) (resp *pb.MemberUpdateResponse, err error) {
  230. err = rcc.nonRepeatableRetry(ctx, func(rctx context.Context) error {
  231. resp, err = rcc.cc.MemberUpdate(rctx, in, opts...)
  232. return err
  233. })
  234. return resp, err
  235. }
  236. // RetryMaintenanceClient implements a Maintenance.
  237. func RetryMaintenanceClient(c *Client, conn *grpc.ClientConn) pb.MaintenanceClient {
  238. repeatableRetry := c.newRetryWrapper(isRepeatableStopError)
  239. nonRepeatableRetry := c.newRetryWrapper(isNonRepeatableStopError)
  240. mc := pb.NewMaintenanceClient(conn)
  241. return &retryMaintenanceClient{&nonRepeatableMaintenanceClient{mc, nonRepeatableRetry}, repeatableRetry}
  242. }
  243. type retryMaintenanceClient struct {
  244. *nonRepeatableMaintenanceClient
  245. repeatableRetry retryRPCFunc
  246. }
  247. func (rmc *retryMaintenanceClient) Alarm(ctx context.Context, in *pb.AlarmRequest, opts ...grpc.CallOption) (resp *pb.AlarmResponse, err error) {
  248. err = rmc.repeatableRetry(ctx, func(rctx context.Context) error {
  249. resp, err = rmc.mc.Alarm(rctx, in, opts...)
  250. return err
  251. })
  252. return resp, err
  253. }
  254. func (rmc *retryMaintenanceClient) Status(ctx context.Context, in *pb.StatusRequest, opts ...grpc.CallOption) (resp *pb.StatusResponse, err error) {
  255. err = rmc.repeatableRetry(ctx, func(rctx context.Context) error {
  256. resp, err = rmc.mc.Status(rctx, in, opts...)
  257. return err
  258. })
  259. return resp, err
  260. }
  261. func (rmc *retryMaintenanceClient) Hash(ctx context.Context, in *pb.HashRequest, opts ...grpc.CallOption) (resp *pb.HashResponse, err error) {
  262. err = rmc.repeatableRetry(ctx, func(rctx context.Context) error {
  263. resp, err = rmc.mc.Hash(rctx, in, opts...)
  264. return err
  265. })
  266. return resp, err
  267. }
  268. func (rmc *retryMaintenanceClient) Snapshot(ctx context.Context, in *pb.SnapshotRequest, opts ...grpc.CallOption) (stream pb.Maintenance_SnapshotClient, err error) {
  269. err = rmc.repeatableRetry(ctx, func(rctx context.Context) error {
  270. stream, err = rmc.mc.Snapshot(rctx, in, opts...)
  271. return err
  272. })
  273. return stream, err
  274. }
  275. type nonRepeatableMaintenanceClient struct {
  276. mc pb.MaintenanceClient
  277. nonRepeatableRetry retryRPCFunc
  278. }
  279. func (rmc *nonRepeatableMaintenanceClient) Defragment(ctx context.Context, in *pb.DefragmentRequest, opts ...grpc.CallOption) (resp *pb.DefragmentResponse, err error) {
  280. err = rmc.nonRepeatableRetry(ctx, func(rctx context.Context) error {
  281. resp, err = rmc.mc.Defragment(rctx, in, opts...)
  282. return err
  283. })
  284. return resp, err
  285. }
  286. type retryAuthClient struct {
  287. *nonRepeatableAuthClient
  288. repeatableRetry retryRPCFunc
  289. }
  290. // RetryAuthClient implements a AuthClient.
  291. func RetryAuthClient(c *Client) pb.AuthClient {
  292. repeatableRetry := c.newRetryWrapper(isRepeatableStopError)
  293. nonRepeatableRetry := c.newRetryWrapper(isNonRepeatableStopError)
  294. ac := pb.NewAuthClient(c.conn)
  295. return &retryAuthClient{&nonRepeatableAuthClient{ac, nonRepeatableRetry}, repeatableRetry}
  296. }
  297. func (rac *retryAuthClient) UserList(ctx context.Context, in *pb.AuthUserListRequest, opts ...grpc.CallOption) (resp *pb.AuthUserListResponse, err error) {
  298. err = rac.repeatableRetry(ctx, func(rctx context.Context) error {
  299. resp, err = rac.ac.UserList(rctx, in, opts...)
  300. return err
  301. })
  302. return resp, err
  303. }
  304. func (rac *retryAuthClient) UserGet(ctx context.Context, in *pb.AuthUserGetRequest, opts ...grpc.CallOption) (resp *pb.AuthUserGetResponse, err error) {
  305. err = rac.repeatableRetry(ctx, func(rctx context.Context) error {
  306. resp, err = rac.ac.UserGet(rctx, in, opts...)
  307. return err
  308. })
  309. return resp, err
  310. }
  311. func (rac *retryAuthClient) RoleGet(ctx context.Context, in *pb.AuthRoleGetRequest, opts ...grpc.CallOption) (resp *pb.AuthRoleGetResponse, err error) {
  312. err = rac.repeatableRetry(ctx, func(rctx context.Context) error {
  313. resp, err = rac.ac.RoleGet(rctx, in, opts...)
  314. return err
  315. })
  316. return resp, err
  317. }
  318. func (rac *retryAuthClient) RoleList(ctx context.Context, in *pb.AuthRoleListRequest, opts ...grpc.CallOption) (resp *pb.AuthRoleListResponse, err error) {
  319. err = rac.repeatableRetry(ctx, func(rctx context.Context) error {
  320. resp, err = rac.ac.RoleList(rctx, in, opts...)
  321. return err
  322. })
  323. return resp, err
  324. }
  325. type nonRepeatableAuthClient struct {
  326. ac pb.AuthClient
  327. nonRepeatableRetry retryRPCFunc
  328. }
  329. func (rac *nonRepeatableAuthClient) AuthEnable(ctx context.Context, in *pb.AuthEnableRequest, opts ...grpc.CallOption) (resp *pb.AuthEnableResponse, err error) {
  330. err = rac.nonRepeatableRetry(ctx, func(rctx context.Context) error {
  331. resp, err = rac.ac.AuthEnable(rctx, in, opts...)
  332. return err
  333. })
  334. return resp, err
  335. }
  336. func (rac *nonRepeatableAuthClient) AuthDisable(ctx context.Context, in *pb.AuthDisableRequest, opts ...grpc.CallOption) (resp *pb.AuthDisableResponse, err error) {
  337. err = rac.nonRepeatableRetry(ctx, func(rctx context.Context) error {
  338. resp, err = rac.ac.AuthDisable(rctx, in, opts...)
  339. return err
  340. })
  341. return resp, err
  342. }
  343. func (rac *nonRepeatableAuthClient) UserAdd(ctx context.Context, in *pb.AuthUserAddRequest, opts ...grpc.CallOption) (resp *pb.AuthUserAddResponse, err error) {
  344. err = rac.nonRepeatableRetry(ctx, func(rctx context.Context) error {
  345. resp, err = rac.ac.UserAdd(rctx, in, opts...)
  346. return err
  347. })
  348. return resp, err
  349. }
  350. func (rac *nonRepeatableAuthClient) UserDelete(ctx context.Context, in *pb.AuthUserDeleteRequest, opts ...grpc.CallOption) (resp *pb.AuthUserDeleteResponse, err error) {
  351. err = rac.nonRepeatableRetry(ctx, func(rctx context.Context) error {
  352. resp, err = rac.ac.UserDelete(rctx, in, opts...)
  353. return err
  354. })
  355. return resp, err
  356. }
  357. func (rac *nonRepeatableAuthClient) UserChangePassword(ctx context.Context, in *pb.AuthUserChangePasswordRequest, opts ...grpc.CallOption) (resp *pb.AuthUserChangePasswordResponse, err error) {
  358. err = rac.nonRepeatableRetry(ctx, func(rctx context.Context) error {
  359. resp, err = rac.ac.UserChangePassword(rctx, in, opts...)
  360. return err
  361. })
  362. return resp, err
  363. }
  364. func (rac *nonRepeatableAuthClient) UserGrantRole(ctx context.Context, in *pb.AuthUserGrantRoleRequest, opts ...grpc.CallOption) (resp *pb.AuthUserGrantRoleResponse, err error) {
  365. err = rac.nonRepeatableRetry(ctx, func(rctx context.Context) error {
  366. resp, err = rac.ac.UserGrantRole(rctx, in, opts...)
  367. return err
  368. })
  369. return resp, err
  370. }
  371. func (rac *nonRepeatableAuthClient) UserRevokeRole(ctx context.Context, in *pb.AuthUserRevokeRoleRequest, opts ...grpc.CallOption) (resp *pb.AuthUserRevokeRoleResponse, err error) {
  372. err = rac.nonRepeatableRetry(ctx, func(rctx context.Context) error {
  373. resp, err = rac.ac.UserRevokeRole(rctx, in, opts...)
  374. return err
  375. })
  376. return resp, err
  377. }
  378. func (rac *nonRepeatableAuthClient) RoleAdd(ctx context.Context, in *pb.AuthRoleAddRequest, opts ...grpc.CallOption) (resp *pb.AuthRoleAddResponse, err error) {
  379. err = rac.nonRepeatableRetry(ctx, func(rctx context.Context) error {
  380. resp, err = rac.ac.RoleAdd(rctx, in, opts...)
  381. return err
  382. })
  383. return resp, err
  384. }
  385. func (rac *nonRepeatableAuthClient) RoleDelete(ctx context.Context, in *pb.AuthRoleDeleteRequest, opts ...grpc.CallOption) (resp *pb.AuthRoleDeleteResponse, err error) {
  386. err = rac.nonRepeatableRetry(ctx, func(rctx context.Context) error {
  387. resp, err = rac.ac.RoleDelete(rctx, in, opts...)
  388. return err
  389. })
  390. return resp, err
  391. }
  392. func (rac *nonRepeatableAuthClient) RoleGrantPermission(ctx context.Context, in *pb.AuthRoleGrantPermissionRequest, opts ...grpc.CallOption) (resp *pb.AuthRoleGrantPermissionResponse, err error) {
  393. err = rac.nonRepeatableRetry(ctx, func(rctx context.Context) error {
  394. resp, err = rac.ac.RoleGrantPermission(rctx, in, opts...)
  395. return err
  396. })
  397. return resp, err
  398. }
  399. func (rac *nonRepeatableAuthClient) RoleRevokePermission(ctx context.Context, in *pb.AuthRoleRevokePermissionRequest, opts ...grpc.CallOption) (resp *pb.AuthRoleRevokePermissionResponse, err error) {
  400. err = rac.nonRepeatableRetry(ctx, func(rctx context.Context) error {
  401. resp, err = rac.ac.RoleRevokePermission(rctx, in, opts...)
  402. return err
  403. })
  404. return resp, err
  405. }
  406. func (rac *nonRepeatableAuthClient) Authenticate(ctx context.Context, in *pb.AuthenticateRequest, opts ...grpc.CallOption) (resp *pb.AuthenticateResponse, err error) {
  407. err = rac.nonRepeatableRetry(ctx, func(rctx context.Context) error {
  408. resp, err = rac.ac.Authenticate(rctx, in, opts...)
  409. return err
  410. })
  411. return resp, err
  412. }