lessor.go 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911
  1. // Copyright 2015 The etcd Authors
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package lease
  15. import (
  16. "container/heap"
  17. "context"
  18. "encoding/binary"
  19. "errors"
  20. "math"
  21. "sort"
  22. "sync"
  23. "time"
  24. pb "go.etcd.io/etcd/etcdserver/etcdserverpb"
  25. "go.etcd.io/etcd/lease/leasepb"
  26. "go.etcd.io/etcd/mvcc/backend"
  27. "go.uber.org/zap"
  28. )
  29. // NoLease is a special LeaseID representing the absence of a lease.
  30. const NoLease = LeaseID(0)
  31. // MaxLeaseTTL is the maximum lease TTL value
  32. const MaxLeaseTTL = 9000000000
  33. var (
  34. forever = time.Time{}
  35. leaseBucketName = []byte("lease")
  36. // maximum number of leases to revoke per second; configurable for tests
  37. leaseRevokeRate = 1000
  38. // maximum number of lease checkpoints recorded to the consensus log per second; configurable for tests
  39. leaseCheckpointRate = 1000
  40. // maximum number of lease checkpoints to batch into a single consensus log entry
  41. maxLeaseCheckpointBatchSize = 1000
  42. ErrNotPrimary = errors.New("not a primary lessor")
  43. ErrLeaseNotFound = errors.New("lease not found")
  44. ErrLeaseExists = errors.New("lease already exists")
  45. ErrLeaseTTLTooLarge = errors.New("too large lease TTL")
  46. )
  47. // TxnDelete is a TxnWrite that only permits deletes. Defined here
  48. // to avoid circular dependency with mvcc.
  49. type TxnDelete interface {
  50. DeleteRange(key, end []byte) (n, rev int64)
  51. End()
  52. }
  53. // RangeDeleter is a TxnDelete constructor.
  54. type RangeDeleter func() TxnDelete
  55. // Checkpointer permits checkpointing of lease remaining TTLs to the consensus log. Defined here to
  56. // avoid circular dependency with mvcc.
  57. type Checkpointer func(ctx context.Context, lc *pb.LeaseCheckpointRequest)
  58. type LeaseID int64
  59. // Lessor owns leases. It can grant, revoke, renew and modify leases for lessee.
  60. type Lessor interface {
  61. // SetRangeDeleter lets the lessor create TxnDeletes to the store.
  62. // Lessor deletes the items in the revoked or expired lease by creating
  63. // new TxnDeletes.
  64. SetRangeDeleter(rd RangeDeleter)
  65. SetCheckpointer(cp Checkpointer)
  66. // Grant grants a lease that expires at least after TTL seconds.
  67. Grant(id LeaseID, ttl int64) (*Lease, error)
  68. // Revoke revokes a lease with given ID. The item attached to the
  69. // given lease will be removed. If the ID does not exist, an error
  70. // will be returned.
  71. Revoke(id LeaseID) error
  72. // Checkpoint applies the remainingTTL of a lease. The remainingTTL is used in Promote to set
  73. // the expiry of leases to less than the full TTL when possible.
  74. Checkpoint(id LeaseID, remainingTTL int64) error
  75. // Attach attaches given leaseItem to the lease with given LeaseID.
  76. // If the lease does not exist, an error will be returned.
  77. Attach(id LeaseID, items []LeaseItem) error
  78. // GetLease returns LeaseID for given item.
  79. // If no lease found, NoLease value will be returned.
  80. GetLease(item LeaseItem) LeaseID
  81. // Detach detaches given leaseItem from the lease with given LeaseID.
  82. // If the lease does not exist, an error will be returned.
  83. Detach(id LeaseID, items []LeaseItem) error
  84. // Promote promotes the lessor to be the primary lessor. Primary lessor manages
  85. // the expiration and renew of leases.
  86. // Newly promoted lessor renew the TTL of all lease to extend + previous TTL.
  87. Promote(extend time.Duration)
  88. // Demote demotes the lessor from being the primary lessor.
  89. Demote()
  90. // Renew renews a lease with given ID. It returns the renewed TTL. If the ID does not exist,
  91. // an error will be returned.
  92. Renew(id LeaseID) (int64, error)
  93. // Lookup gives the lease at a given lease id, if any
  94. Lookup(id LeaseID) *Lease
  95. // Leases lists all leases.
  96. Leases() []*Lease
  97. // ExpiredLeasesC returns a chan that is used to receive expired leases.
  98. ExpiredLeasesC() <-chan []*Lease
  99. // Recover recovers the lessor state from the given backend and RangeDeleter.
  100. Recover(b backend.Backend, rd RangeDeleter)
  101. // Stop stops the lessor for managing leases. The behavior of calling Stop multiple
  102. // times is undefined.
  103. Stop()
  104. }
  105. // lessor implements Lessor interface.
  106. // TODO: use clockwork for testability.
  107. type lessor struct {
  108. mu sync.RWMutex
  109. // demotec is set when the lessor is the primary.
  110. // demotec will be closed if the lessor is demoted.
  111. demotec chan struct{}
  112. leaseMap map[LeaseID]*Lease
  113. leaseHeap LeaseQueue
  114. leaseCheckpointHeap LeaseQueue
  115. itemMap map[LeaseItem]LeaseID
  116. // When a lease expires, the lessor will delete the
  117. // leased range (or key) by the RangeDeleter.
  118. rd RangeDeleter
  119. // When a lease's deadline should be persisted to preserve the remaining TTL across leader
  120. // elections and restarts, the lessor will checkpoint the lease by the Checkpointer.
  121. cp Checkpointer
  122. // backend to persist leases. We only persist lease ID and expiry for now.
  123. // The leased items can be recovered by iterating all the keys in kv.
  124. b backend.Backend
  125. // minLeaseTTL is the minimum lease TTL that can be granted for a lease. Any
  126. // requests for shorter TTLs are extended to the minimum TTL.
  127. minLeaseTTL int64
  128. expiredC chan []*Lease
  129. // stopC is a channel whose closure indicates that the lessor should be stopped.
  130. stopC chan struct{}
  131. // doneC is a channel whose closure indicates that the lessor is stopped.
  132. doneC chan struct{}
  133. lg *zap.Logger
  134. // Wait duration between lease checkpoints.
  135. checkpointInterval time.Duration
  136. }
  137. type LessorConfig struct {
  138. MinLeaseTTL int64
  139. CheckpointInterval time.Duration
  140. }
  141. func NewLessor(lg *zap.Logger, b backend.Backend, cfg LessorConfig) Lessor {
  142. return newLessor(lg, b, cfg)
  143. }
  144. func newLessor(lg *zap.Logger, b backend.Backend, cfg LessorConfig) *lessor {
  145. checkpointInterval := cfg.CheckpointInterval
  146. if checkpointInterval == 0 {
  147. checkpointInterval = 5 * time.Minute
  148. }
  149. l := &lessor{
  150. leaseMap: make(map[LeaseID]*Lease),
  151. itemMap: make(map[LeaseItem]LeaseID),
  152. leaseHeap: make(LeaseQueue, 0),
  153. leaseCheckpointHeap: make(LeaseQueue, 0),
  154. b: b,
  155. minLeaseTTL: cfg.MinLeaseTTL,
  156. checkpointInterval: checkpointInterval,
  157. // expiredC is a small buffered chan to avoid unnecessary blocking.
  158. expiredC: make(chan []*Lease, 16),
  159. stopC: make(chan struct{}),
  160. doneC: make(chan struct{}),
  161. lg: lg,
  162. }
  163. l.initAndRecover()
  164. go l.runLoop()
  165. return l
  166. }
  167. // isPrimary indicates if this lessor is the primary lessor. The primary
  168. // lessor manages lease expiration and renew.
  169. //
  170. // in etcd, raft leader is the primary. Thus there might be two primary
  171. // leaders at the same time (raft allows concurrent leader but with different term)
  172. // for at most a leader election timeout.
  173. // The old primary leader cannot affect the correctness since its proposal has a
  174. // smaller term and will not be committed.
  175. //
  176. // TODO: raft follower do not forward lease management proposals. There might be a
  177. // very small window (within second normally which depends on go scheduling) that
  178. // a raft follow is the primary between the raft leader demotion and lessor demotion.
  179. // Usually this should not be a problem. Lease should not be that sensitive to timing.
  180. func (le *lessor) isPrimary() bool {
  181. return le.demotec != nil
  182. }
  183. func (le *lessor) SetRangeDeleter(rd RangeDeleter) {
  184. le.mu.Lock()
  185. defer le.mu.Unlock()
  186. le.rd = rd
  187. }
  188. func (le *lessor) SetCheckpointer(cp Checkpointer) {
  189. le.mu.Lock()
  190. defer le.mu.Unlock()
  191. le.cp = cp
  192. }
  193. func (le *lessor) Grant(id LeaseID, ttl int64) (*Lease, error) {
  194. if id == NoLease {
  195. return nil, ErrLeaseNotFound
  196. }
  197. if ttl > MaxLeaseTTL {
  198. return nil, ErrLeaseTTLTooLarge
  199. }
  200. // TODO: when lessor is under high load, it should give out lease
  201. // with longer TTL to reduce renew load.
  202. l := &Lease{
  203. ID: id,
  204. ttl: ttl,
  205. itemSet: make(map[LeaseItem]struct{}),
  206. revokec: make(chan struct{}),
  207. }
  208. le.mu.Lock()
  209. defer le.mu.Unlock()
  210. if _, ok := le.leaseMap[id]; ok {
  211. return nil, ErrLeaseExists
  212. }
  213. if l.ttl < le.minLeaseTTL {
  214. l.ttl = le.minLeaseTTL
  215. }
  216. if le.isPrimary() {
  217. l.refresh(0)
  218. } else {
  219. l.forever()
  220. }
  221. le.leaseMap[id] = l
  222. item := &LeaseWithTime{id: l.ID, time: l.expiry.UnixNano()}
  223. heap.Push(&le.leaseHeap, item)
  224. l.persistTo(le.b)
  225. leaseTotalTTLs.Observe(float64(l.ttl))
  226. leaseGranted.Inc()
  227. if le.isPrimary() {
  228. le.scheduleCheckpointIfNeeded(l)
  229. }
  230. return l, nil
  231. }
  232. func (le *lessor) Revoke(id LeaseID) error {
  233. le.mu.Lock()
  234. l := le.leaseMap[id]
  235. if l == nil {
  236. le.mu.Unlock()
  237. return ErrLeaseNotFound
  238. }
  239. defer close(l.revokec)
  240. // unlock before doing external work
  241. le.mu.Unlock()
  242. if le.rd == nil {
  243. return nil
  244. }
  245. txn := le.rd()
  246. // sort keys so deletes are in same order among all members,
  247. // otherwise the backened hashes will be different
  248. keys := l.Keys()
  249. sort.StringSlice(keys).Sort()
  250. for _, key := range keys {
  251. txn.DeleteRange([]byte(key), nil)
  252. }
  253. le.mu.Lock()
  254. defer le.mu.Unlock()
  255. delete(le.leaseMap, l.ID)
  256. // lease deletion needs to be in the same backend transaction with the
  257. // kv deletion. Or we might end up with not executing the revoke or not
  258. // deleting the keys if etcdserver fails in between.
  259. le.b.BatchTx().UnsafeDelete(leaseBucketName, int64ToBytes(int64(l.ID)))
  260. txn.End()
  261. leaseRevoked.Inc()
  262. return nil
  263. }
  264. func (le *lessor) Checkpoint(id LeaseID, remainingTTL int64) error {
  265. le.mu.Lock()
  266. defer le.mu.Unlock()
  267. if l, ok := le.leaseMap[id]; ok {
  268. // when checkpointing, we only update the remainingTTL, Promote is responsible for applying this to lease expiry
  269. l.remainingTTL = remainingTTL
  270. if le.isPrimary() {
  271. // schedule the next checkpoint as needed
  272. le.scheduleCheckpointIfNeeded(l)
  273. }
  274. }
  275. return nil
  276. }
  277. // Renew renews an existing lease. If the given lease does not exist or
  278. // has expired, an error will be returned.
  279. func (le *lessor) Renew(id LeaseID) (int64, error) {
  280. le.mu.RLock()
  281. if !le.isPrimary() {
  282. // forward renew request to primary instead of returning error.
  283. le.mu.RUnlock()
  284. return -1, ErrNotPrimary
  285. }
  286. demotec := le.demotec
  287. l := le.leaseMap[id]
  288. if l == nil {
  289. le.mu.RUnlock()
  290. return -1, ErrLeaseNotFound
  291. }
  292. // Clear remaining TTL when we renew if it is set
  293. clearRemainingTTL := le.cp != nil && l.remainingTTL > 0
  294. le.mu.RUnlock()
  295. if l.expired() {
  296. select {
  297. // A expired lease might be pending for revoking or going through
  298. // quorum to be revoked. To be accurate, renew request must wait for the
  299. // deletion to complete.
  300. case <-l.revokec:
  301. return -1, ErrLeaseNotFound
  302. // The expired lease might fail to be revoked if the primary changes.
  303. // The caller will retry on ErrNotPrimary.
  304. case <-demotec:
  305. return -1, ErrNotPrimary
  306. case <-le.stopC:
  307. return -1, ErrNotPrimary
  308. }
  309. }
  310. // Clear remaining TTL when we renew if it is set
  311. // By applying a RAFT entry only when the remainingTTL is already set, we limit the number
  312. // of RAFT entries written per lease to a max of 2 per checkpoint interval.
  313. if clearRemainingTTL {
  314. le.cp(context.Background(), &pb.LeaseCheckpointRequest{Checkpoints: []*pb.LeaseCheckpoint{{ID: int64(l.ID), Remaining_TTL: 0}}})
  315. }
  316. le.mu.Lock()
  317. l.refresh(0)
  318. item := &LeaseWithTime{id: l.ID, time: l.expiry.UnixNano()}
  319. heap.Push(&le.leaseHeap, item)
  320. le.mu.Unlock()
  321. leaseRenewed.Inc()
  322. return l.ttl, nil
  323. }
  324. func (le *lessor) Lookup(id LeaseID) *Lease {
  325. le.mu.RLock()
  326. defer le.mu.RUnlock()
  327. return le.leaseMap[id]
  328. }
  329. func (le *lessor) unsafeLeases() []*Lease {
  330. leases := make([]*Lease, 0, len(le.leaseMap))
  331. for _, l := range le.leaseMap {
  332. leases = append(leases, l)
  333. }
  334. return leases
  335. }
  336. func (le *lessor) Leases() []*Lease {
  337. le.mu.RLock()
  338. ls := le.unsafeLeases()
  339. le.mu.RUnlock()
  340. sort.Sort(leasesByExpiry(ls))
  341. return ls
  342. }
  343. func (le *lessor) Promote(extend time.Duration) {
  344. le.mu.Lock()
  345. defer le.mu.Unlock()
  346. le.demotec = make(chan struct{})
  347. // refresh the expiries of all leases.
  348. for _, l := range le.leaseMap {
  349. l.refresh(extend)
  350. item := &LeaseWithTime{id: l.ID, time: l.expiry.UnixNano()}
  351. heap.Push(&le.leaseHeap, item)
  352. }
  353. if len(le.leaseMap) < leaseRevokeRate {
  354. // no possibility of lease pile-up
  355. return
  356. }
  357. // adjust expiries in case of overlap
  358. leases := le.unsafeLeases()
  359. sort.Sort(leasesByExpiry(leases))
  360. baseWindow := leases[0].Remaining()
  361. nextWindow := baseWindow + time.Second
  362. expires := 0
  363. // have fewer expires than the total revoke rate so piled up leases
  364. // don't consume the entire revoke limit
  365. targetExpiresPerSecond := (3 * leaseRevokeRate) / 4
  366. for _, l := range leases {
  367. remaining := l.Remaining()
  368. if remaining > nextWindow {
  369. baseWindow = remaining
  370. nextWindow = baseWindow + time.Second
  371. expires = 1
  372. continue
  373. }
  374. expires++
  375. if expires <= targetExpiresPerSecond {
  376. continue
  377. }
  378. rateDelay := float64(time.Second) * (float64(expires) / float64(targetExpiresPerSecond))
  379. // If leases are extended by n seconds, leases n seconds ahead of the
  380. // base window should be extended by only one second.
  381. rateDelay -= float64(remaining - baseWindow)
  382. delay := time.Duration(rateDelay)
  383. nextWindow = baseWindow + delay
  384. l.refresh(delay + extend)
  385. item := &LeaseWithTime{id: l.ID, time: l.expiry.UnixNano()}
  386. heap.Push(&le.leaseHeap, item)
  387. le.scheduleCheckpointIfNeeded(l)
  388. }
  389. }
  390. type leasesByExpiry []*Lease
  391. func (le leasesByExpiry) Len() int { return len(le) }
  392. func (le leasesByExpiry) Less(i, j int) bool { return le[i].Remaining() < le[j].Remaining() }
  393. func (le leasesByExpiry) Swap(i, j int) { le[i], le[j] = le[j], le[i] }
  394. func (le *lessor) Demote() {
  395. le.mu.Lock()
  396. defer le.mu.Unlock()
  397. // set the expiries of all leases to forever
  398. for _, l := range le.leaseMap {
  399. l.forever()
  400. }
  401. le.clearScheduledLeasesCheckpoints()
  402. if le.demotec != nil {
  403. close(le.demotec)
  404. le.demotec = nil
  405. }
  406. }
  407. // Attach attaches items to the lease with given ID. When the lease
  408. // expires, the attached items will be automatically removed.
  409. // If the given lease does not exist, an error will be returned.
  410. func (le *lessor) Attach(id LeaseID, items []LeaseItem) error {
  411. le.mu.Lock()
  412. defer le.mu.Unlock()
  413. l := le.leaseMap[id]
  414. if l == nil {
  415. return ErrLeaseNotFound
  416. }
  417. l.mu.Lock()
  418. for _, it := range items {
  419. l.itemSet[it] = struct{}{}
  420. le.itemMap[it] = id
  421. }
  422. l.mu.Unlock()
  423. return nil
  424. }
  425. func (le *lessor) GetLease(item LeaseItem) LeaseID {
  426. le.mu.RLock()
  427. id := le.itemMap[item]
  428. le.mu.RUnlock()
  429. return id
  430. }
  431. // Detach detaches items from the lease with given ID.
  432. // If the given lease does not exist, an error will be returned.
  433. func (le *lessor) Detach(id LeaseID, items []LeaseItem) error {
  434. le.mu.Lock()
  435. defer le.mu.Unlock()
  436. l := le.leaseMap[id]
  437. if l == nil {
  438. return ErrLeaseNotFound
  439. }
  440. l.mu.Lock()
  441. for _, it := range items {
  442. delete(l.itemSet, it)
  443. delete(le.itemMap, it)
  444. }
  445. l.mu.Unlock()
  446. return nil
  447. }
  448. func (le *lessor) Recover(b backend.Backend, rd RangeDeleter) {
  449. le.mu.Lock()
  450. defer le.mu.Unlock()
  451. le.b = b
  452. le.rd = rd
  453. le.leaseMap = make(map[LeaseID]*Lease)
  454. le.itemMap = make(map[LeaseItem]LeaseID)
  455. le.initAndRecover()
  456. }
  457. func (le *lessor) ExpiredLeasesC() <-chan []*Lease {
  458. return le.expiredC
  459. }
  460. func (le *lessor) Stop() {
  461. close(le.stopC)
  462. <-le.doneC
  463. }
  464. func (le *lessor) runLoop() {
  465. defer close(le.doneC)
  466. for {
  467. le.revokeExpiredLeases()
  468. le.checkpointScheduledLeases()
  469. select {
  470. case <-time.After(500 * time.Millisecond):
  471. case <-le.stopC:
  472. return
  473. }
  474. }
  475. }
  476. // revokeExpiredLeases finds all leases past their expiry and sends them to epxired channel for
  477. // to be revoked.
  478. func (le *lessor) revokeExpiredLeases() {
  479. var ls []*Lease
  480. // rate limit
  481. revokeLimit := leaseRevokeRate / 2
  482. le.mu.RLock()
  483. if le.isPrimary() {
  484. ls = le.findExpiredLeases(revokeLimit)
  485. }
  486. le.mu.RUnlock()
  487. if len(ls) != 0 {
  488. select {
  489. case <-le.stopC:
  490. return
  491. case le.expiredC <- ls:
  492. default:
  493. // the receiver of expiredC is probably busy handling
  494. // other stuff
  495. // let's try this next time after 500ms
  496. }
  497. }
  498. }
  499. // checkpointScheduledLeases finds all scheduled lease checkpoints that are due and
  500. // submits them to the checkpointer to persist them to the consensus log.
  501. func (le *lessor) checkpointScheduledLeases() {
  502. var cps []*pb.LeaseCheckpoint
  503. // rate limit
  504. for i := 0; i < leaseCheckpointRate/2; i++ {
  505. le.mu.Lock()
  506. if le.isPrimary() {
  507. cps = le.findDueScheduledCheckpoints(maxLeaseCheckpointBatchSize)
  508. }
  509. le.mu.Unlock()
  510. if len(cps) != 0 {
  511. le.cp(context.Background(), &pb.LeaseCheckpointRequest{Checkpoints: cps})
  512. }
  513. if len(cps) < maxLeaseCheckpointBatchSize {
  514. return
  515. }
  516. }
  517. }
  518. func (le *lessor) clearScheduledLeasesCheckpoints() {
  519. le.leaseCheckpointHeap = make(LeaseQueue, 0)
  520. }
  521. // expireExists returns true if expiry items exist.
  522. // It pops only when expiry item exists.
  523. // "next" is true, to indicate that it may exist in next attempt.
  524. func (le *lessor) expireExists() (l *Lease, ok bool, next bool) {
  525. if le.leaseHeap.Len() == 0 {
  526. return nil, false, false
  527. }
  528. item := le.leaseHeap[0]
  529. l = le.leaseMap[item.id]
  530. if l == nil {
  531. // lease has expired or been revoked
  532. // no need to revoke (nothing is expiry)
  533. heap.Pop(&le.leaseHeap) // O(log N)
  534. return nil, false, true
  535. }
  536. if time.Now().UnixNano() < item.time /* expiration time */ {
  537. // Candidate expirations are caught up, reinsert this item
  538. // and no need to revoke (nothing is expiry)
  539. return l, false, false
  540. }
  541. // if the lease is actually expired, add to the removal list. If it is not expired, we can ignore it because another entry will have been inserted into the heap
  542. heap.Pop(&le.leaseHeap) // O(log N)
  543. return l, true, false
  544. }
  545. // findExpiredLeases loops leases in the leaseMap until reaching expired limit
  546. // and returns the expired leases that needed to be revoked.
  547. func (le *lessor) findExpiredLeases(limit int) []*Lease {
  548. leases := make([]*Lease, 0, 16)
  549. for {
  550. l, ok, next := le.expireExists()
  551. if !ok && !next {
  552. break
  553. }
  554. if !ok {
  555. continue
  556. }
  557. if next {
  558. continue
  559. }
  560. if l.expired() {
  561. leases = append(leases, l)
  562. // reach expired limit
  563. if len(leases) == limit {
  564. break
  565. }
  566. }
  567. }
  568. return leases
  569. }
  570. func (le *lessor) scheduleCheckpointIfNeeded(lease *Lease) {
  571. if le.cp == nil {
  572. return
  573. }
  574. if lease.RemainingTTL() > int64(le.checkpointInterval.Seconds()) {
  575. if le.lg != nil {
  576. le.lg.Debug("Scheduling lease checkpoint",
  577. zap.Int64("leaseID", int64(lease.ID)),
  578. zap.Duration("intervalSeconds", le.checkpointInterval),
  579. )
  580. }
  581. heap.Push(&le.leaseCheckpointHeap, &LeaseWithTime{
  582. id: lease.ID,
  583. time: time.Now().Add(le.checkpointInterval).UnixNano(),
  584. })
  585. }
  586. }
  587. func (le *lessor) findDueScheduledCheckpoints(checkpointLimit int) []*pb.LeaseCheckpoint {
  588. if le.cp == nil {
  589. return nil
  590. }
  591. now := time.Now()
  592. cps := []*pb.LeaseCheckpoint{}
  593. for le.leaseCheckpointHeap.Len() > 0 && len(cps) < checkpointLimit {
  594. lt := le.leaseCheckpointHeap[0]
  595. if lt.time /* next checkpoint time */ > now.UnixNano() {
  596. return cps
  597. }
  598. heap.Pop(&le.leaseCheckpointHeap)
  599. var l *Lease
  600. var ok bool
  601. if l, ok = le.leaseMap[lt.id]; !ok {
  602. continue
  603. }
  604. if !now.Before(l.expiry) {
  605. continue
  606. }
  607. remainingTTL := int64(math.Ceil(l.expiry.Sub(now).Seconds()))
  608. if remainingTTL >= l.ttl {
  609. continue
  610. }
  611. if le.lg != nil {
  612. le.lg.Debug("Checkpointing lease",
  613. zap.Int64("leaseID", int64(lt.id)),
  614. zap.Int64("remainingTTL", remainingTTL),
  615. )
  616. }
  617. cps = append(cps, &pb.LeaseCheckpoint{ID: int64(lt.id), Remaining_TTL: remainingTTL})
  618. }
  619. return cps
  620. }
  621. func (le *lessor) initAndRecover() {
  622. tx := le.b.BatchTx()
  623. tx.Lock()
  624. tx.UnsafeCreateBucket(leaseBucketName)
  625. _, vs := tx.UnsafeRange(leaseBucketName, int64ToBytes(0), int64ToBytes(math.MaxInt64), 0)
  626. // TODO: copy vs and do decoding outside tx lock if lock contention becomes an issue.
  627. for i := range vs {
  628. var lpb leasepb.Lease
  629. err := lpb.Unmarshal(vs[i])
  630. if err != nil {
  631. tx.Unlock()
  632. panic("failed to unmarshal lease proto item")
  633. }
  634. ID := LeaseID(lpb.ID)
  635. if lpb.TTL < le.minLeaseTTL {
  636. lpb.TTL = le.minLeaseTTL
  637. }
  638. le.leaseMap[ID] = &Lease{
  639. ID: ID,
  640. ttl: lpb.TTL,
  641. // itemSet will be filled in when recover key-value pairs
  642. // set expiry to forever, refresh when promoted
  643. itemSet: make(map[LeaseItem]struct{}),
  644. expiry: forever,
  645. revokec: make(chan struct{}),
  646. }
  647. }
  648. heap.Init(&le.leaseHeap)
  649. heap.Init(&le.leaseCheckpointHeap)
  650. tx.Unlock()
  651. le.b.ForceCommit()
  652. }
  653. type Lease struct {
  654. ID LeaseID
  655. ttl int64 // time to live of the lease in seconds
  656. remainingTTL int64 // remaining time to live in seconds, if zero valued it is considered unset and the full ttl should be used
  657. // expiryMu protects concurrent accesses to expiry
  658. expiryMu sync.RWMutex
  659. // expiry is time when lease should expire. no expiration when expiry.IsZero() is true
  660. expiry time.Time
  661. // mu protects concurrent accesses to itemSet
  662. mu sync.RWMutex
  663. itemSet map[LeaseItem]struct{}
  664. revokec chan struct{}
  665. }
  666. func (l *Lease) expired() bool {
  667. return l.Remaining() <= 0
  668. }
  669. func (l *Lease) persistTo(b backend.Backend) {
  670. key := int64ToBytes(int64(l.ID))
  671. lpb := leasepb.Lease{ID: int64(l.ID), TTL: l.ttl, RemainingTTL: l.remainingTTL}
  672. val, err := lpb.Marshal()
  673. if err != nil {
  674. panic("failed to marshal lease proto item")
  675. }
  676. b.BatchTx().Lock()
  677. b.BatchTx().UnsafePut(leaseBucketName, key, val)
  678. b.BatchTx().Unlock()
  679. }
  680. // TTL returns the TTL of the Lease.
  681. func (l *Lease) TTL() int64 {
  682. return l.ttl
  683. }
  684. // RemainingTTL returns the last checkpointed remaining TTL of the lease.
  685. // TODO(jpbetz): do not expose this utility method
  686. func (l *Lease) RemainingTTL() int64 {
  687. if l.remainingTTL > 0 {
  688. return l.remainingTTL
  689. }
  690. return l.ttl
  691. }
  692. // refresh refreshes the expiry of the lease.
  693. func (l *Lease) refresh(extend time.Duration) {
  694. newExpiry := time.Now().Add(extend + time.Duration(l.RemainingTTL())*time.Second)
  695. l.expiryMu.Lock()
  696. defer l.expiryMu.Unlock()
  697. l.expiry = newExpiry
  698. }
  699. // forever sets the expiry of lease to be forever.
  700. func (l *Lease) forever() {
  701. l.expiryMu.Lock()
  702. defer l.expiryMu.Unlock()
  703. l.expiry = forever
  704. }
  705. // Keys returns all the keys attached to the lease.
  706. func (l *Lease) Keys() []string {
  707. l.mu.RLock()
  708. keys := make([]string, 0, len(l.itemSet))
  709. for k := range l.itemSet {
  710. keys = append(keys, k.Key)
  711. }
  712. l.mu.RUnlock()
  713. return keys
  714. }
  715. // Remaining returns the remaining time of the lease.
  716. func (l *Lease) Remaining() time.Duration {
  717. l.expiryMu.RLock()
  718. defer l.expiryMu.RUnlock()
  719. if l.expiry.IsZero() {
  720. return time.Duration(math.MaxInt64)
  721. }
  722. return time.Until(l.expiry)
  723. }
  724. type LeaseItem struct {
  725. Key string
  726. }
  727. func int64ToBytes(n int64) []byte {
  728. bytes := make([]byte, 8)
  729. binary.BigEndian.PutUint64(bytes, uint64(n))
  730. return bytes
  731. }
  732. // FakeLessor is a fake implementation of Lessor interface.
  733. // Used for testing only.
  734. type FakeLessor struct{}
  735. func (fl *FakeLessor) SetRangeDeleter(dr RangeDeleter) {}
  736. func (fl *FakeLessor) SetCheckpointer(cp Checkpointer) {}
  737. func (fl *FakeLessor) Grant(id LeaseID, ttl int64) (*Lease, error) { return nil, nil }
  738. func (fl *FakeLessor) Revoke(id LeaseID) error { return nil }
  739. func (fl *FakeLessor) Checkpoint(id LeaseID, remainingTTL int64) error { return nil }
  740. func (fl *FakeLessor) Attach(id LeaseID, items []LeaseItem) error { return nil }
  741. func (fl *FakeLessor) GetLease(item LeaseItem) LeaseID { return 0 }
  742. func (fl *FakeLessor) Detach(id LeaseID, items []LeaseItem) error { return nil }
  743. func (fl *FakeLessor) Promote(extend time.Duration) {}
  744. func (fl *FakeLessor) Demote() {}
  745. func (fl *FakeLessor) Renew(id LeaseID) (int64, error) { return 10, nil }
  746. func (fl *FakeLessor) Lookup(id LeaseID) *Lease { return nil }
  747. func (fl *FakeLessor) Leases() []*Lease { return nil }
  748. func (fl *FakeLessor) ExpiredLeasesC() <-chan []*Lease { return nil }
  749. func (fl *FakeLessor) Recover(b backend.Backend, rd RangeDeleter) {}
  750. func (fl *FakeLessor) Stop() {}