retry.go 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496
  1. // Copyright 2016 The etcd Authors
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package clientv3
  15. import (
  16. "context"
  17. "github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
  18. pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
  19. "google.golang.org/grpc"
  20. "google.golang.org/grpc/codes"
  21. "google.golang.org/grpc/status"
  22. )
  23. type retryPolicy uint8
  24. const (
  25. repeatable retryPolicy = iota
  26. nonRepeatable
  27. )
  28. type rpcFunc func(ctx context.Context) error
  29. type retryRPCFunc func(context.Context, rpcFunc, retryPolicy) error
  30. type retryStopErrFunc func(error) bool
  31. // immutable requests (e.g. Get) should be retried unless it's
  32. // an obvious server-side error (e.g. rpctypes.ErrRequestTooLarge).
  33. //
  34. // "isRepeatableStopError" returns "true" when an immutable request
  35. // is interrupted by server-side or gRPC-side error and its status
  36. // code is not transient (!= codes.Unavailable).
  37. //
  38. // Returning "true" means retry should stop, since client cannot
  39. // handle itself even with retries.
  40. func isRepeatableStopError(err error) bool {
  41. eErr := rpctypes.Error(err)
  42. // always stop retry on etcd errors
  43. if serverErr, ok := eErr.(rpctypes.EtcdError); ok && serverErr.Code() != codes.Unavailable {
  44. return true
  45. }
  46. // only retry if unavailable
  47. ev, _ := status.FromError(err)
  48. return ev.Code() != codes.Unavailable
  49. }
  50. // mutable requests (e.g. Put, Delete, Txn) should only be retried
  51. // when the status code is codes.Unavailable when initial connection
  52. // has not been established (no pinned endpoint).
  53. //
  54. // "isNonRepeatableStopError" returns "true" when a mutable request
  55. // is interrupted by non-transient error that client cannot handle itself,
  56. // or transient error while the connection has already been established
  57. // (pinned endpoint exists).
  58. //
  59. // Returning "true" means retry should stop, otherwise it violates
  60. // write-at-most-once semantics.
  61. func isNonRepeatableStopError(err error) bool {
  62. ev, _ := status.FromError(err)
  63. if ev.Code() != codes.Unavailable {
  64. return true
  65. }
  66. desc := rpctypes.ErrorDesc(err)
  67. return desc != "there is no address available" && desc != "there is no connection available"
  68. }
  69. func (c *Client) newRetryWrapper() retryRPCFunc {
  70. return func(rpcCtx context.Context, f rpcFunc, rp retryPolicy) error {
  71. var isStop retryStopErrFunc
  72. switch rp {
  73. case repeatable:
  74. isStop = isRepeatableStopError
  75. case nonRepeatable:
  76. isStop = isNonRepeatableStopError
  77. }
  78. for {
  79. if err := readyWait(rpcCtx, c.ctx, c.balancer.ConnectNotify()); err != nil {
  80. return err
  81. }
  82. pinned := c.balancer.pinned()
  83. err := f(rpcCtx)
  84. if err == nil {
  85. return nil
  86. }
  87. logger.Lvl(4).Infof("clientv3/retry: error %q on pinned endpoint %q", err.Error(), pinned)
  88. if s, ok := status.FromError(err); ok && (s.Code() == codes.Unavailable || s.Code() == codes.DeadlineExceeded || s.Code() == codes.Internal) {
  89. // mark this before endpoint switch is triggered
  90. c.balancer.hostPortError(pinned, err)
  91. c.balancer.next()
  92. logger.Lvl(4).Infof("clientv3/retry: switching from %q due to error %q", pinned, err.Error())
  93. }
  94. if isStop(err) {
  95. return err
  96. }
  97. }
  98. }
  99. }
  100. func (c *Client) newAuthRetryWrapper(retryf retryRPCFunc) retryRPCFunc {
  101. return func(rpcCtx context.Context, f rpcFunc, rp retryPolicy) error {
  102. for {
  103. pinned := c.balancer.pinned()
  104. err := retryf(rpcCtx, f, rp)
  105. if err == nil {
  106. return nil
  107. }
  108. logger.Lvl(4).Infof("clientv3/auth-retry: error %q on pinned endpoint %q", err.Error(), pinned)
  109. // always stop retry on etcd errors other than invalid auth token
  110. if rpctypes.Error(err) == rpctypes.ErrInvalidAuthToken {
  111. gterr := c.getToken(rpcCtx)
  112. if gterr != nil {
  113. logger.Lvl(4).Infof("clientv3/auth-retry: cannot retry due to error %q(%q) on pinned endpoint %q", err.Error(), gterr.Error(), pinned)
  114. return err // return the original error for simplicity
  115. }
  116. continue
  117. }
  118. return err
  119. }
  120. }
  121. }
  122. type retryKVClient struct {
  123. kc pb.KVClient
  124. retryf retryRPCFunc
  125. }
  126. // RetryKVClient implements a KVClient.
  127. func RetryKVClient(c *Client) pb.KVClient {
  128. return &retryKVClient{
  129. kc: pb.NewKVClient(c.conn),
  130. retryf: c.newAuthRetryWrapper(c.newRetryWrapper()),
  131. }
  132. }
  133. func (rkv *retryKVClient) Range(ctx context.Context, in *pb.RangeRequest, opts ...grpc.CallOption) (resp *pb.RangeResponse, err error) {
  134. err = rkv.retryf(ctx, func(rctx context.Context) error {
  135. resp, err = rkv.kc.Range(rctx, in, opts...)
  136. return err
  137. }, repeatable)
  138. return resp, err
  139. }
  140. func (rkv *retryKVClient) Put(ctx context.Context, in *pb.PutRequest, opts ...grpc.CallOption) (resp *pb.PutResponse, err error) {
  141. err = rkv.retryf(ctx, func(rctx context.Context) error {
  142. resp, err = rkv.kc.Put(rctx, in, opts...)
  143. return err
  144. }, nonRepeatable)
  145. return resp, err
  146. }
  147. func (rkv *retryKVClient) DeleteRange(ctx context.Context, in *pb.DeleteRangeRequest, opts ...grpc.CallOption) (resp *pb.DeleteRangeResponse, err error) {
  148. err = rkv.retryf(ctx, func(rctx context.Context) error {
  149. resp, err = rkv.kc.DeleteRange(rctx, in, opts...)
  150. return err
  151. }, nonRepeatable)
  152. return resp, err
  153. }
  154. func (rkv *retryKVClient) Txn(ctx context.Context, in *pb.TxnRequest, opts ...grpc.CallOption) (resp *pb.TxnResponse, err error) {
  155. // TODO: "repeatable" for read-only txn
  156. err = rkv.retryf(ctx, func(rctx context.Context) error {
  157. resp, err = rkv.kc.Txn(rctx, in, opts...)
  158. return err
  159. }, nonRepeatable)
  160. return resp, err
  161. }
  162. func (rkv *retryKVClient) Compact(ctx context.Context, in *pb.CompactionRequest, opts ...grpc.CallOption) (resp *pb.CompactionResponse, err error) {
  163. err = rkv.retryf(ctx, func(rctx context.Context) error {
  164. resp, err = rkv.kc.Compact(rctx, in, opts...)
  165. return err
  166. }, nonRepeatable)
  167. return resp, err
  168. }
  169. type retryLeaseClient struct {
  170. lc pb.LeaseClient
  171. retryf retryRPCFunc
  172. }
  173. // RetryLeaseClient implements a LeaseClient.
  174. func RetryLeaseClient(c *Client) pb.LeaseClient {
  175. return &retryLeaseClient{
  176. lc: pb.NewLeaseClient(c.conn),
  177. retryf: c.newAuthRetryWrapper(c.newRetryWrapper()),
  178. }
  179. }
  180. func (rlc *retryLeaseClient) LeaseTimeToLive(ctx context.Context, in *pb.LeaseTimeToLiveRequest, opts ...grpc.CallOption) (resp *pb.LeaseTimeToLiveResponse, err error) {
  181. err = rlc.retryf(ctx, func(rctx context.Context) error {
  182. resp, err = rlc.lc.LeaseTimeToLive(rctx, in, opts...)
  183. return err
  184. }, repeatable)
  185. return resp, err
  186. }
  187. func (rlc *retryLeaseClient) LeaseLeases(ctx context.Context, in *pb.LeaseLeasesRequest, opts ...grpc.CallOption) (resp *pb.LeaseLeasesResponse, err error) {
  188. err = rlc.retryf(ctx, func(rctx context.Context) error {
  189. resp, err = rlc.lc.LeaseLeases(rctx, in, opts...)
  190. return err
  191. }, repeatable)
  192. return resp, err
  193. }
  194. func (rlc *retryLeaseClient) LeaseGrant(ctx context.Context, in *pb.LeaseGrantRequest, opts ...grpc.CallOption) (resp *pb.LeaseGrantResponse, err error) {
  195. err = rlc.retryf(ctx, func(rctx context.Context) error {
  196. resp, err = rlc.lc.LeaseGrant(rctx, in, opts...)
  197. return err
  198. }, repeatable)
  199. return resp, err
  200. }
  201. func (rlc *retryLeaseClient) LeaseRevoke(ctx context.Context, in *pb.LeaseRevokeRequest, opts ...grpc.CallOption) (resp *pb.LeaseRevokeResponse, err error) {
  202. err = rlc.retryf(ctx, func(rctx context.Context) error {
  203. resp, err = rlc.lc.LeaseRevoke(rctx, in, opts...)
  204. return err
  205. }, repeatable)
  206. return resp, err
  207. }
  208. func (rlc *retryLeaseClient) LeaseKeepAlive(ctx context.Context, opts ...grpc.CallOption) (stream pb.Lease_LeaseKeepAliveClient, err error) {
  209. err = rlc.retryf(ctx, func(rctx context.Context) error {
  210. stream, err = rlc.lc.LeaseKeepAlive(rctx, opts...)
  211. return err
  212. }, repeatable)
  213. return stream, err
  214. }
  215. type retryClusterClient struct {
  216. cc pb.ClusterClient
  217. retryf retryRPCFunc
  218. }
  219. // RetryClusterClient implements a ClusterClient.
  220. func RetryClusterClient(c *Client) pb.ClusterClient {
  221. return &retryClusterClient{
  222. cc: pb.NewClusterClient(c.conn),
  223. retryf: c.newRetryWrapper(),
  224. }
  225. }
  226. func (rcc *retryClusterClient) MemberList(ctx context.Context, in *pb.MemberListRequest, opts ...grpc.CallOption) (resp *pb.MemberListResponse, err error) {
  227. err = rcc.retryf(ctx, func(rctx context.Context) error {
  228. resp, err = rcc.cc.MemberList(rctx, in, opts...)
  229. return err
  230. }, repeatable)
  231. return resp, err
  232. }
  233. func (rcc *retryClusterClient) MemberAdd(ctx context.Context, in *pb.MemberAddRequest, opts ...grpc.CallOption) (resp *pb.MemberAddResponse, err error) {
  234. err = rcc.retryf(ctx, func(rctx context.Context) error {
  235. resp, err = rcc.cc.MemberAdd(rctx, in, opts...)
  236. return err
  237. }, nonRepeatable)
  238. return resp, err
  239. }
  240. func (rcc *retryClusterClient) MemberRemove(ctx context.Context, in *pb.MemberRemoveRequest, opts ...grpc.CallOption) (resp *pb.MemberRemoveResponse, err error) {
  241. err = rcc.retryf(ctx, func(rctx context.Context) error {
  242. resp, err = rcc.cc.MemberRemove(rctx, in, opts...)
  243. return err
  244. }, nonRepeatable)
  245. return resp, err
  246. }
  247. func (rcc *retryClusterClient) MemberUpdate(ctx context.Context, in *pb.MemberUpdateRequest, opts ...grpc.CallOption) (resp *pb.MemberUpdateResponse, err error) {
  248. err = rcc.retryf(ctx, func(rctx context.Context) error {
  249. resp, err = rcc.cc.MemberUpdate(rctx, in, opts...)
  250. return err
  251. }, nonRepeatable)
  252. return resp, err
  253. }
  254. type retryMaintenanceClient struct {
  255. mc pb.MaintenanceClient
  256. retryf retryRPCFunc
  257. }
  258. // RetryMaintenanceClient implements a Maintenance.
  259. func RetryMaintenanceClient(c *Client, conn *grpc.ClientConn) pb.MaintenanceClient {
  260. return &retryMaintenanceClient{
  261. mc: pb.NewMaintenanceClient(conn),
  262. retryf: c.newRetryWrapper(),
  263. }
  264. }
  265. func (rmc *retryMaintenanceClient) Alarm(ctx context.Context, in *pb.AlarmRequest, opts ...grpc.CallOption) (resp *pb.AlarmResponse, err error) {
  266. err = rmc.retryf(ctx, func(rctx context.Context) error {
  267. resp, err = rmc.mc.Alarm(rctx, in, opts...)
  268. return err
  269. }, repeatable)
  270. return resp, err
  271. }
  272. func (rmc *retryMaintenanceClient) Status(ctx context.Context, in *pb.StatusRequest, opts ...grpc.CallOption) (resp *pb.StatusResponse, err error) {
  273. err = rmc.retryf(ctx, func(rctx context.Context) error {
  274. resp, err = rmc.mc.Status(rctx, in, opts...)
  275. return err
  276. }, repeatable)
  277. return resp, err
  278. }
  279. func (rmc *retryMaintenanceClient) Hash(ctx context.Context, in *pb.HashRequest, opts ...grpc.CallOption) (resp *pb.HashResponse, err error) {
  280. err = rmc.retryf(ctx, func(rctx context.Context) error {
  281. resp, err = rmc.mc.Hash(rctx, in, opts...)
  282. return err
  283. }, repeatable)
  284. return resp, err
  285. }
  286. func (rmc *retryMaintenanceClient) HashKV(ctx context.Context, in *pb.HashKVRequest, opts ...grpc.CallOption) (resp *pb.HashKVResponse, err error) {
  287. err = rmc.retryf(ctx, func(rctx context.Context) error {
  288. resp, err = rmc.mc.HashKV(rctx, in, opts...)
  289. return err
  290. }, repeatable)
  291. return resp, err
  292. }
  293. func (rmc *retryMaintenanceClient) Snapshot(ctx context.Context, in *pb.SnapshotRequest, opts ...grpc.CallOption) (stream pb.Maintenance_SnapshotClient, err error) {
  294. err = rmc.retryf(ctx, func(rctx context.Context) error {
  295. stream, err = rmc.mc.Snapshot(rctx, in, opts...)
  296. return err
  297. }, repeatable)
  298. return stream, err
  299. }
  300. func (rmc *retryMaintenanceClient) MoveLeader(ctx context.Context, in *pb.MoveLeaderRequest, opts ...grpc.CallOption) (resp *pb.MoveLeaderResponse, err error) {
  301. err = rmc.retryf(ctx, func(rctx context.Context) error {
  302. resp, err = rmc.mc.MoveLeader(rctx, in, opts...)
  303. return err
  304. }, repeatable)
  305. return resp, err
  306. }
  307. func (rmc *retryMaintenanceClient) Defragment(ctx context.Context, in *pb.DefragmentRequest, opts ...grpc.CallOption) (resp *pb.DefragmentResponse, err error) {
  308. err = rmc.retryf(ctx, func(rctx context.Context) error {
  309. resp, err = rmc.mc.Defragment(rctx, in, opts...)
  310. return err
  311. }, nonRepeatable)
  312. return resp, err
  313. }
  314. type retryAuthClient struct {
  315. ac pb.AuthClient
  316. retryf retryRPCFunc
  317. }
  318. // RetryAuthClient implements a AuthClient.
  319. func RetryAuthClient(c *Client) pb.AuthClient {
  320. return &retryAuthClient{
  321. ac: pb.NewAuthClient(c.conn),
  322. retryf: c.newRetryWrapper(),
  323. }
  324. }
  325. func (rac *retryAuthClient) UserList(ctx context.Context, in *pb.AuthUserListRequest, opts ...grpc.CallOption) (resp *pb.AuthUserListResponse, err error) {
  326. err = rac.retryf(ctx, func(rctx context.Context) error {
  327. resp, err = rac.ac.UserList(rctx, in, opts...)
  328. return err
  329. }, repeatable)
  330. return resp, err
  331. }
  332. func (rac *retryAuthClient) UserGet(ctx context.Context, in *pb.AuthUserGetRequest, opts ...grpc.CallOption) (resp *pb.AuthUserGetResponse, err error) {
  333. err = rac.retryf(ctx, func(rctx context.Context) error {
  334. resp, err = rac.ac.UserGet(rctx, in, opts...)
  335. return err
  336. }, repeatable)
  337. return resp, err
  338. }
  339. func (rac *retryAuthClient) RoleGet(ctx context.Context, in *pb.AuthRoleGetRequest, opts ...grpc.CallOption) (resp *pb.AuthRoleGetResponse, err error) {
  340. err = rac.retryf(ctx, func(rctx context.Context) error {
  341. resp, err = rac.ac.RoleGet(rctx, in, opts...)
  342. return err
  343. }, repeatable)
  344. return resp, err
  345. }
  346. func (rac *retryAuthClient) RoleList(ctx context.Context, in *pb.AuthRoleListRequest, opts ...grpc.CallOption) (resp *pb.AuthRoleListResponse, err error) {
  347. err = rac.retryf(ctx, func(rctx context.Context) error {
  348. resp, err = rac.ac.RoleList(rctx, in, opts...)
  349. return err
  350. }, repeatable)
  351. return resp, err
  352. }
  353. func (rac *retryAuthClient) AuthEnable(ctx context.Context, in *pb.AuthEnableRequest, opts ...grpc.CallOption) (resp *pb.AuthEnableResponse, err error) {
  354. err = rac.retryf(ctx, func(rctx context.Context) error {
  355. resp, err = rac.ac.AuthEnable(rctx, in, opts...)
  356. return err
  357. }, nonRepeatable)
  358. return resp, err
  359. }
  360. func (rac *retryAuthClient) AuthDisable(ctx context.Context, in *pb.AuthDisableRequest, opts ...grpc.CallOption) (resp *pb.AuthDisableResponse, err error) {
  361. err = rac.retryf(ctx, func(rctx context.Context) error {
  362. resp, err = rac.ac.AuthDisable(rctx, in, opts...)
  363. return err
  364. }, nonRepeatable)
  365. return resp, err
  366. }
  367. func (rac *retryAuthClient) UserAdd(ctx context.Context, in *pb.AuthUserAddRequest, opts ...grpc.CallOption) (resp *pb.AuthUserAddResponse, err error) {
  368. err = rac.retryf(ctx, func(rctx context.Context) error {
  369. resp, err = rac.ac.UserAdd(rctx, in, opts...)
  370. return err
  371. }, nonRepeatable)
  372. return resp, err
  373. }
  374. func (rac *retryAuthClient) UserDelete(ctx context.Context, in *pb.AuthUserDeleteRequest, opts ...grpc.CallOption) (resp *pb.AuthUserDeleteResponse, err error) {
  375. err = rac.retryf(ctx, func(rctx context.Context) error {
  376. resp, err = rac.ac.UserDelete(rctx, in, opts...)
  377. return err
  378. }, nonRepeatable)
  379. return resp, err
  380. }
  381. func (rac *retryAuthClient) UserChangePassword(ctx context.Context, in *pb.AuthUserChangePasswordRequest, opts ...grpc.CallOption) (resp *pb.AuthUserChangePasswordResponse, err error) {
  382. err = rac.retryf(ctx, func(rctx context.Context) error {
  383. resp, err = rac.ac.UserChangePassword(rctx, in, opts...)
  384. return err
  385. }, nonRepeatable)
  386. return resp, err
  387. }
  388. func (rac *retryAuthClient) UserGrantRole(ctx context.Context, in *pb.AuthUserGrantRoleRequest, opts ...grpc.CallOption) (resp *pb.AuthUserGrantRoleResponse, err error) {
  389. err = rac.retryf(ctx, func(rctx context.Context) error {
  390. resp, err = rac.ac.UserGrantRole(rctx, in, opts...)
  391. return err
  392. }, nonRepeatable)
  393. return resp, err
  394. }
  395. func (rac *retryAuthClient) UserRevokeRole(ctx context.Context, in *pb.AuthUserRevokeRoleRequest, opts ...grpc.CallOption) (resp *pb.AuthUserRevokeRoleResponse, err error) {
  396. err = rac.retryf(ctx, func(rctx context.Context) error {
  397. resp, err = rac.ac.UserRevokeRole(rctx, in, opts...)
  398. return err
  399. }, nonRepeatable)
  400. return resp, err
  401. }
  402. func (rac *retryAuthClient) RoleAdd(ctx context.Context, in *pb.AuthRoleAddRequest, opts ...grpc.CallOption) (resp *pb.AuthRoleAddResponse, err error) {
  403. err = rac.retryf(ctx, func(rctx context.Context) error {
  404. resp, err = rac.ac.RoleAdd(rctx, in, opts...)
  405. return err
  406. }, nonRepeatable)
  407. return resp, err
  408. }
  409. func (rac *retryAuthClient) RoleDelete(ctx context.Context, in *pb.AuthRoleDeleteRequest, opts ...grpc.CallOption) (resp *pb.AuthRoleDeleteResponse, err error) {
  410. err = rac.retryf(ctx, func(rctx context.Context) error {
  411. resp, err = rac.ac.RoleDelete(rctx, in, opts...)
  412. return err
  413. }, nonRepeatable)
  414. return resp, err
  415. }
  416. func (rac *retryAuthClient) RoleGrantPermission(ctx context.Context, in *pb.AuthRoleGrantPermissionRequest, opts ...grpc.CallOption) (resp *pb.AuthRoleGrantPermissionResponse, err error) {
  417. err = rac.retryf(ctx, func(rctx context.Context) error {
  418. resp, err = rac.ac.RoleGrantPermission(rctx, in, opts...)
  419. return err
  420. }, nonRepeatable)
  421. return resp, err
  422. }
  423. func (rac *retryAuthClient) RoleRevokePermission(ctx context.Context, in *pb.AuthRoleRevokePermissionRequest, opts ...grpc.CallOption) (resp *pb.AuthRoleRevokePermissionResponse, err error) {
  424. err = rac.retryf(ctx, func(rctx context.Context) error {
  425. resp, err = rac.ac.RoleRevokePermission(rctx, in, opts...)
  426. return err
  427. }, nonRepeatable)
  428. return resp, err
  429. }
  430. func (rac *retryAuthClient) Authenticate(ctx context.Context, in *pb.AuthenticateRequest, opts ...grpc.CallOption) (resp *pb.AuthenticateResponse, err error) {
  431. err = rac.retryf(ctx, func(rctx context.Context) error {
  432. resp, err = rac.ac.Authenticate(rctx, in, opts...)
  433. return err
  434. }, nonRepeatable)
  435. return resp, err
  436. }