retry.go 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498
  1. // Copyright 2016 The etcd Authors
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package clientv3
  15. import (
  16. "context"
  17. "github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
  18. pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
  19. "google.golang.org/grpc"
  20. "google.golang.org/grpc/codes"
  21. "google.golang.org/grpc/status"
  22. )
  23. type retryPolicy uint8
  24. const (
  25. repeatable retryPolicy = iota
  26. nonRepeatable
  27. )
  28. type rpcFunc func(ctx context.Context) error
  29. type retryRPCFunc func(context.Context, rpcFunc, retryPolicy) error
  30. type retryStopErrFunc func(error) bool
  31. // immutable requests (e.g. Get) should be retried unless it's
  32. // an obvious server-side error (e.g. rpctypes.ErrRequestTooLarge).
  33. //
  34. // "isRepeatableStopError" returns "true" when an immutable request
  35. // is interrupted by server-side or gRPC-side error and its status
  36. // code is not transient (!= codes.Unavailable).
  37. //
  38. // Returning "true" means retry should stop, since client cannot
  39. // handle itself even with retries.
  40. func isRepeatableStopError(err error) bool {
  41. eErr := rpctypes.Error(err)
  42. // always stop retry on etcd errors
  43. if serverErr, ok := eErr.(rpctypes.EtcdError); ok && serverErr.Code() != codes.Unavailable {
  44. return true
  45. }
  46. // only retry if unavailable
  47. ev, ok := status.FromError(err)
  48. if !ok {
  49. return false
  50. }
  51. return ev.Code() != codes.Unavailable
  52. }
  53. // mutable requests (e.g. Put, Delete, Txn) should only be retried
  54. // when the status code is codes.Unavailable when initial connection
  55. // has not been established (no pinned endpoint).
  56. //
  57. // "isNonRepeatableStopError" returns "true" when a mutable request
  58. // is interrupted by non-transient error that client cannot handle itself,
  59. // or transient error while the connection has already been established
  60. // (pinned endpoint exists).
  61. //
  62. // Returning "true" means retry should stop, otherwise it violates
  63. // write-at-most-once semantics.
  64. func isNonRepeatableStopError(err error) bool {
  65. if ev, ok := status.FromError(err); ok && ev.Code() != codes.Unavailable {
  66. return true
  67. }
  68. desc := rpctypes.ErrorDesc(err)
  69. return desc != "there is no address available" && desc != "there is no connection available"
  70. }
  71. func (c *Client) newRetryWrapper() retryRPCFunc {
  72. return func(rpcCtx context.Context, f rpcFunc, rp retryPolicy) error {
  73. var isStop retryStopErrFunc
  74. switch rp {
  75. case repeatable:
  76. isStop = isRepeatableStopError
  77. case nonRepeatable:
  78. isStop = isNonRepeatableStopError
  79. }
  80. for {
  81. if err := readyWait(rpcCtx, c.ctx, c.balancer.ConnectNotify()); err != nil {
  82. return err
  83. }
  84. pinned := c.balancer.pinned()
  85. err := f(rpcCtx)
  86. if err == nil {
  87. return nil
  88. }
  89. lg.Lvl(4).Infof("clientv3/retry: error %q on pinned endpoint %q", err.Error(), pinned)
  90. if s, ok := status.FromError(err); ok && (s.Code() == codes.Unavailable || s.Code() == codes.DeadlineExceeded || s.Code() == codes.Internal) {
  91. // mark this before endpoint switch is triggered
  92. c.balancer.hostPortError(pinned, err)
  93. c.balancer.next()
  94. lg.Lvl(4).Infof("clientv3/retry: switching from %q due to error %q", pinned, err.Error())
  95. }
  96. if isStop(err) {
  97. return err
  98. }
  99. }
  100. }
  101. }
  102. func (c *Client) newAuthRetryWrapper(retryf retryRPCFunc) retryRPCFunc {
  103. return func(rpcCtx context.Context, f rpcFunc, rp retryPolicy) error {
  104. for {
  105. pinned := c.balancer.pinned()
  106. err := retryf(rpcCtx, f, rp)
  107. if err == nil {
  108. return nil
  109. }
  110. lg.Lvl(4).Infof("clientv3/auth-retry: error %q on pinned endpoint %q", err.Error(), pinned)
  111. // always stop retry on etcd errors other than invalid auth token
  112. if rpctypes.Error(err) == rpctypes.ErrInvalidAuthToken {
  113. gterr := c.getToken(rpcCtx)
  114. if gterr != nil {
  115. lg.Lvl(4).Infof("clientv3/auth-retry: cannot retry due to error %q(%q) on pinned endpoint %q", err.Error(), gterr.Error(), pinned)
  116. return err // return the original error for simplicity
  117. }
  118. continue
  119. }
  120. return err
  121. }
  122. }
  123. }
  124. type retryKVClient struct {
  125. kc pb.KVClient
  126. retryf retryRPCFunc
  127. }
  128. // RetryKVClient implements a KVClient.
  129. func RetryKVClient(c *Client) pb.KVClient {
  130. return &retryKVClient{
  131. kc: pb.NewKVClient(c.conn),
  132. retryf: c.newAuthRetryWrapper(c.newRetryWrapper()),
  133. }
  134. }
  135. func (rkv *retryKVClient) Range(ctx context.Context, in *pb.RangeRequest, opts ...grpc.CallOption) (resp *pb.RangeResponse, err error) {
  136. err = rkv.retryf(ctx, func(rctx context.Context) error {
  137. resp, err = rkv.kc.Range(rctx, in, opts...)
  138. return err
  139. }, repeatable)
  140. return resp, err
  141. }
  142. func (rkv *retryKVClient) Put(ctx context.Context, in *pb.PutRequest, opts ...grpc.CallOption) (resp *pb.PutResponse, err error) {
  143. err = rkv.retryf(ctx, func(rctx context.Context) error {
  144. resp, err = rkv.kc.Put(rctx, in, opts...)
  145. return err
  146. }, nonRepeatable)
  147. return resp, err
  148. }
  149. func (rkv *retryKVClient) DeleteRange(ctx context.Context, in *pb.DeleteRangeRequest, opts ...grpc.CallOption) (resp *pb.DeleteRangeResponse, err error) {
  150. err = rkv.retryf(ctx, func(rctx context.Context) error {
  151. resp, err = rkv.kc.DeleteRange(rctx, in, opts...)
  152. return err
  153. }, nonRepeatable)
  154. return resp, err
  155. }
  156. func (rkv *retryKVClient) Txn(ctx context.Context, in *pb.TxnRequest, opts ...grpc.CallOption) (resp *pb.TxnResponse, err error) {
  157. // TODO: "repeatable" for read-only txn
  158. err = rkv.retryf(ctx, func(rctx context.Context) error {
  159. resp, err = rkv.kc.Txn(rctx, in, opts...)
  160. return err
  161. }, nonRepeatable)
  162. return resp, err
  163. }
  164. func (rkv *retryKVClient) Compact(ctx context.Context, in *pb.CompactionRequest, opts ...grpc.CallOption) (resp *pb.CompactionResponse, err error) {
  165. err = rkv.retryf(ctx, func(rctx context.Context) error {
  166. resp, err = rkv.kc.Compact(rctx, in, opts...)
  167. return err
  168. }, nonRepeatable)
  169. return resp, err
  170. }
  171. type retryLeaseClient struct {
  172. lc pb.LeaseClient
  173. retryf retryRPCFunc
  174. }
  175. // RetryLeaseClient implements a LeaseClient.
  176. func RetryLeaseClient(c *Client) pb.LeaseClient {
  177. return &retryLeaseClient{
  178. lc: pb.NewLeaseClient(c.conn),
  179. retryf: c.newAuthRetryWrapper(c.newRetryWrapper()),
  180. }
  181. }
  182. func (rlc *retryLeaseClient) LeaseTimeToLive(ctx context.Context, in *pb.LeaseTimeToLiveRequest, opts ...grpc.CallOption) (resp *pb.LeaseTimeToLiveResponse, err error) {
  183. err = rlc.retryf(ctx, func(rctx context.Context) error {
  184. resp, err = rlc.lc.LeaseTimeToLive(rctx, in, opts...)
  185. return err
  186. }, repeatable)
  187. return resp, err
  188. }
  189. func (rlc *retryLeaseClient) LeaseLeases(ctx context.Context, in *pb.LeaseLeasesRequest, opts ...grpc.CallOption) (resp *pb.LeaseLeasesResponse, err error) {
  190. err = rlc.retryf(ctx, func(rctx context.Context) error {
  191. resp, err = rlc.lc.LeaseLeases(rctx, in, opts...)
  192. return err
  193. }, repeatable)
  194. return resp, err
  195. }
  196. func (rlc *retryLeaseClient) LeaseGrant(ctx context.Context, in *pb.LeaseGrantRequest, opts ...grpc.CallOption) (resp *pb.LeaseGrantResponse, err error) {
  197. err = rlc.retryf(ctx, func(rctx context.Context) error {
  198. resp, err = rlc.lc.LeaseGrant(rctx, in, opts...)
  199. return err
  200. }, repeatable)
  201. return resp, err
  202. }
  203. func (rlc *retryLeaseClient) LeaseRevoke(ctx context.Context, in *pb.LeaseRevokeRequest, opts ...grpc.CallOption) (resp *pb.LeaseRevokeResponse, err error) {
  204. err = rlc.retryf(ctx, func(rctx context.Context) error {
  205. resp, err = rlc.lc.LeaseRevoke(rctx, in, opts...)
  206. return err
  207. }, repeatable)
  208. return resp, err
  209. }
  210. func (rlc *retryLeaseClient) LeaseKeepAlive(ctx context.Context, opts ...grpc.CallOption) (stream pb.Lease_LeaseKeepAliveClient, err error) {
  211. err = rlc.retryf(ctx, func(rctx context.Context) error {
  212. stream, err = rlc.lc.LeaseKeepAlive(rctx, opts...)
  213. return err
  214. }, repeatable)
  215. return stream, err
  216. }
  217. type retryClusterClient struct {
  218. cc pb.ClusterClient
  219. retryf retryRPCFunc
  220. }
  221. // RetryClusterClient implements a ClusterClient.
  222. func RetryClusterClient(c *Client) pb.ClusterClient {
  223. return &retryClusterClient{
  224. cc: pb.NewClusterClient(c.conn),
  225. retryf: c.newRetryWrapper(),
  226. }
  227. }
  228. func (rcc *retryClusterClient) MemberList(ctx context.Context, in *pb.MemberListRequest, opts ...grpc.CallOption) (resp *pb.MemberListResponse, err error) {
  229. err = rcc.retryf(ctx, func(rctx context.Context) error {
  230. resp, err = rcc.cc.MemberList(rctx, in, opts...)
  231. return err
  232. }, repeatable)
  233. return resp, err
  234. }
  235. func (rcc *retryClusterClient) MemberAdd(ctx context.Context, in *pb.MemberAddRequest, opts ...grpc.CallOption) (resp *pb.MemberAddResponse, err error) {
  236. err = rcc.retryf(ctx, func(rctx context.Context) error {
  237. resp, err = rcc.cc.MemberAdd(rctx, in, opts...)
  238. return err
  239. }, nonRepeatable)
  240. return resp, err
  241. }
  242. func (rcc *retryClusterClient) MemberRemove(ctx context.Context, in *pb.MemberRemoveRequest, opts ...grpc.CallOption) (resp *pb.MemberRemoveResponse, err error) {
  243. err = rcc.retryf(ctx, func(rctx context.Context) error {
  244. resp, err = rcc.cc.MemberRemove(rctx, in, opts...)
  245. return err
  246. }, nonRepeatable)
  247. return resp, err
  248. }
  249. func (rcc *retryClusterClient) MemberUpdate(ctx context.Context, in *pb.MemberUpdateRequest, opts ...grpc.CallOption) (resp *pb.MemberUpdateResponse, err error) {
  250. err = rcc.retryf(ctx, func(rctx context.Context) error {
  251. resp, err = rcc.cc.MemberUpdate(rctx, in, opts...)
  252. return err
  253. }, nonRepeatable)
  254. return resp, err
  255. }
  256. type retryMaintenanceClient struct {
  257. mc pb.MaintenanceClient
  258. retryf retryRPCFunc
  259. }
  260. // RetryMaintenanceClient implements a Maintenance.
  261. func RetryMaintenanceClient(c *Client, conn *grpc.ClientConn) pb.MaintenanceClient {
  262. return &retryMaintenanceClient{
  263. mc: pb.NewMaintenanceClient(conn),
  264. retryf: c.newRetryWrapper(),
  265. }
  266. }
  267. func (rmc *retryMaintenanceClient) Alarm(ctx context.Context, in *pb.AlarmRequest, opts ...grpc.CallOption) (resp *pb.AlarmResponse, err error) {
  268. err = rmc.retryf(ctx, func(rctx context.Context) error {
  269. resp, err = rmc.mc.Alarm(rctx, in, opts...)
  270. return err
  271. }, repeatable)
  272. return resp, err
  273. }
  274. func (rmc *retryMaintenanceClient) Status(ctx context.Context, in *pb.StatusRequest, opts ...grpc.CallOption) (resp *pb.StatusResponse, err error) {
  275. err = rmc.retryf(ctx, func(rctx context.Context) error {
  276. resp, err = rmc.mc.Status(rctx, in, opts...)
  277. return err
  278. }, repeatable)
  279. return resp, err
  280. }
  281. func (rmc *retryMaintenanceClient) Hash(ctx context.Context, in *pb.HashRequest, opts ...grpc.CallOption) (resp *pb.HashResponse, err error) {
  282. err = rmc.retryf(ctx, func(rctx context.Context) error {
  283. resp, err = rmc.mc.Hash(rctx, in, opts...)
  284. return err
  285. }, repeatable)
  286. return resp, err
  287. }
  288. func (rmc *retryMaintenanceClient) HashKV(ctx context.Context, in *pb.HashKVRequest, opts ...grpc.CallOption) (resp *pb.HashKVResponse, err error) {
  289. err = rmc.retryf(ctx, func(rctx context.Context) error {
  290. resp, err = rmc.mc.HashKV(rctx, in, opts...)
  291. return err
  292. }, repeatable)
  293. return resp, err
  294. }
  295. func (rmc *retryMaintenanceClient) Snapshot(ctx context.Context, in *pb.SnapshotRequest, opts ...grpc.CallOption) (stream pb.Maintenance_SnapshotClient, err error) {
  296. err = rmc.retryf(ctx, func(rctx context.Context) error {
  297. stream, err = rmc.mc.Snapshot(rctx, in, opts...)
  298. return err
  299. }, repeatable)
  300. return stream, err
  301. }
  302. func (rmc *retryMaintenanceClient) MoveLeader(ctx context.Context, in *pb.MoveLeaderRequest, opts ...grpc.CallOption) (resp *pb.MoveLeaderResponse, err error) {
  303. err = rmc.retryf(ctx, func(rctx context.Context) error {
  304. resp, err = rmc.mc.MoveLeader(rctx, in, opts...)
  305. return err
  306. }, repeatable)
  307. return resp, err
  308. }
  309. func (rmc *retryMaintenanceClient) Defragment(ctx context.Context, in *pb.DefragmentRequest, opts ...grpc.CallOption) (resp *pb.DefragmentResponse, err error) {
  310. err = rmc.retryf(ctx, func(rctx context.Context) error {
  311. resp, err = rmc.mc.Defragment(rctx, in, opts...)
  312. return err
  313. }, nonRepeatable)
  314. return resp, err
  315. }
  316. type retryAuthClient struct {
  317. ac pb.AuthClient
  318. retryf retryRPCFunc
  319. }
  320. // RetryAuthClient implements a AuthClient.
  321. func RetryAuthClient(c *Client) pb.AuthClient {
  322. return &retryAuthClient{
  323. ac: pb.NewAuthClient(c.conn),
  324. retryf: c.newRetryWrapper(),
  325. }
  326. }
  327. func (rac *retryAuthClient) UserList(ctx context.Context, in *pb.AuthUserListRequest, opts ...grpc.CallOption) (resp *pb.AuthUserListResponse, err error) {
  328. err = rac.retryf(ctx, func(rctx context.Context) error {
  329. resp, err = rac.ac.UserList(rctx, in, opts...)
  330. return err
  331. }, repeatable)
  332. return resp, err
  333. }
  334. func (rac *retryAuthClient) UserGet(ctx context.Context, in *pb.AuthUserGetRequest, opts ...grpc.CallOption) (resp *pb.AuthUserGetResponse, err error) {
  335. err = rac.retryf(ctx, func(rctx context.Context) error {
  336. resp, err = rac.ac.UserGet(rctx, in, opts...)
  337. return err
  338. }, repeatable)
  339. return resp, err
  340. }
  341. func (rac *retryAuthClient) RoleGet(ctx context.Context, in *pb.AuthRoleGetRequest, opts ...grpc.CallOption) (resp *pb.AuthRoleGetResponse, err error) {
  342. err = rac.retryf(ctx, func(rctx context.Context) error {
  343. resp, err = rac.ac.RoleGet(rctx, in, opts...)
  344. return err
  345. }, repeatable)
  346. return resp, err
  347. }
  348. func (rac *retryAuthClient) RoleList(ctx context.Context, in *pb.AuthRoleListRequest, opts ...grpc.CallOption) (resp *pb.AuthRoleListResponse, err error) {
  349. err = rac.retryf(ctx, func(rctx context.Context) error {
  350. resp, err = rac.ac.RoleList(rctx, in, opts...)
  351. return err
  352. }, repeatable)
  353. return resp, err
  354. }
  355. func (rac *retryAuthClient) AuthEnable(ctx context.Context, in *pb.AuthEnableRequest, opts ...grpc.CallOption) (resp *pb.AuthEnableResponse, err error) {
  356. err = rac.retryf(ctx, func(rctx context.Context) error {
  357. resp, err = rac.ac.AuthEnable(rctx, in, opts...)
  358. return err
  359. }, nonRepeatable)
  360. return resp, err
  361. }
  362. func (rac *retryAuthClient) AuthDisable(ctx context.Context, in *pb.AuthDisableRequest, opts ...grpc.CallOption) (resp *pb.AuthDisableResponse, err error) {
  363. err = rac.retryf(ctx, func(rctx context.Context) error {
  364. resp, err = rac.ac.AuthDisable(rctx, in, opts...)
  365. return err
  366. }, nonRepeatable)
  367. return resp, err
  368. }
  369. func (rac *retryAuthClient) UserAdd(ctx context.Context, in *pb.AuthUserAddRequest, opts ...grpc.CallOption) (resp *pb.AuthUserAddResponse, err error) {
  370. err = rac.retryf(ctx, func(rctx context.Context) error {
  371. resp, err = rac.ac.UserAdd(rctx, in, opts...)
  372. return err
  373. }, nonRepeatable)
  374. return resp, err
  375. }
  376. func (rac *retryAuthClient) UserDelete(ctx context.Context, in *pb.AuthUserDeleteRequest, opts ...grpc.CallOption) (resp *pb.AuthUserDeleteResponse, err error) {
  377. err = rac.retryf(ctx, func(rctx context.Context) error {
  378. resp, err = rac.ac.UserDelete(rctx, in, opts...)
  379. return err
  380. }, nonRepeatable)
  381. return resp, err
  382. }
  383. func (rac *retryAuthClient) UserChangePassword(ctx context.Context, in *pb.AuthUserChangePasswordRequest, opts ...grpc.CallOption) (resp *pb.AuthUserChangePasswordResponse, err error) {
  384. err = rac.retryf(ctx, func(rctx context.Context) error {
  385. resp, err = rac.ac.UserChangePassword(rctx, in, opts...)
  386. return err
  387. }, nonRepeatable)
  388. return resp, err
  389. }
  390. func (rac *retryAuthClient) UserGrantRole(ctx context.Context, in *pb.AuthUserGrantRoleRequest, opts ...grpc.CallOption) (resp *pb.AuthUserGrantRoleResponse, err error) {
  391. err = rac.retryf(ctx, func(rctx context.Context) error {
  392. resp, err = rac.ac.UserGrantRole(rctx, in, opts...)
  393. return err
  394. }, nonRepeatable)
  395. return resp, err
  396. }
  397. func (rac *retryAuthClient) UserRevokeRole(ctx context.Context, in *pb.AuthUserRevokeRoleRequest, opts ...grpc.CallOption) (resp *pb.AuthUserRevokeRoleResponse, err error) {
  398. err = rac.retryf(ctx, func(rctx context.Context) error {
  399. resp, err = rac.ac.UserRevokeRole(rctx, in, opts...)
  400. return err
  401. }, nonRepeatable)
  402. return resp, err
  403. }
  404. func (rac *retryAuthClient) RoleAdd(ctx context.Context, in *pb.AuthRoleAddRequest, opts ...grpc.CallOption) (resp *pb.AuthRoleAddResponse, err error) {
  405. err = rac.retryf(ctx, func(rctx context.Context) error {
  406. resp, err = rac.ac.RoleAdd(rctx, in, opts...)
  407. return err
  408. }, nonRepeatable)
  409. return resp, err
  410. }
  411. func (rac *retryAuthClient) RoleDelete(ctx context.Context, in *pb.AuthRoleDeleteRequest, opts ...grpc.CallOption) (resp *pb.AuthRoleDeleteResponse, err error) {
  412. err = rac.retryf(ctx, func(rctx context.Context) error {
  413. resp, err = rac.ac.RoleDelete(rctx, in, opts...)
  414. return err
  415. }, nonRepeatable)
  416. return resp, err
  417. }
  418. func (rac *retryAuthClient) RoleGrantPermission(ctx context.Context, in *pb.AuthRoleGrantPermissionRequest, opts ...grpc.CallOption) (resp *pb.AuthRoleGrantPermissionResponse, err error) {
  419. err = rac.retryf(ctx, func(rctx context.Context) error {
  420. resp, err = rac.ac.RoleGrantPermission(rctx, in, opts...)
  421. return err
  422. }, nonRepeatable)
  423. return resp, err
  424. }
  425. func (rac *retryAuthClient) RoleRevokePermission(ctx context.Context, in *pb.AuthRoleRevokePermissionRequest, opts ...grpc.CallOption) (resp *pb.AuthRoleRevokePermissionResponse, err error) {
  426. err = rac.retryf(ctx, func(rctx context.Context) error {
  427. resp, err = rac.ac.RoleRevokePermission(rctx, in, opts...)
  428. return err
  429. }, nonRepeatable)
  430. return resp, err
  431. }
  432. func (rac *retryAuthClient) Authenticate(ctx context.Context, in *pb.AuthenticateRequest, opts ...grpc.CallOption) (resp *pb.AuthenticateResponse, err error) {
  433. err = rac.retryf(ctx, func(rctx context.Context) error {
  434. resp, err = rac.ac.Authenticate(rctx, in, opts...)
  435. return err
  436. }, nonRepeatable)
  437. return resp, err
  438. }