apply.go 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591
  1. // Copyright 2016 CoreOS, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package etcdserver
  15. import (
  16. "bytes"
  17. "fmt"
  18. "sort"
  19. pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
  20. "github.com/coreos/etcd/lease"
  21. "github.com/coreos/etcd/pkg/types"
  22. dstorage "github.com/coreos/etcd/storage"
  23. "github.com/coreos/etcd/storage/storagepb"
  24. "github.com/gogo/protobuf/proto"
  25. )
  26. const (
  27. // noTxn is an invalid txn ID.
  28. // To apply with independent Range, Put, Delete, you can pass noTxn
  29. // to apply functions instead of a valid txn ID.
  30. noTxn = -1
  31. )
  32. type applyResult struct {
  33. resp proto.Message
  34. err error
  35. // physc signals the physical effect of the request has completed in addition
  36. // to being logically reflected by the node. Currently only used for
  37. // Compaction requests.
  38. physc <-chan struct{}
  39. }
  40. // applierV3 is the interface for processing V3 raft messages
  41. type applierV3 interface {
  42. Put(txnID int64, p *pb.PutRequest) (*pb.PutResponse, error)
  43. Range(txnID int64, r *pb.RangeRequest) (*pb.RangeResponse, error)
  44. DeleteRange(txnID int64, dr *pb.DeleteRangeRequest) (*pb.DeleteRangeResponse, error)
  45. Txn(rt *pb.TxnRequest) (*pb.TxnResponse, error)
  46. Compaction(compaction *pb.CompactionRequest) (*pb.CompactionResponse, <-chan struct{}, error)
  47. LeaseCreate(lc *pb.LeaseCreateRequest) (*pb.LeaseCreateResponse, error)
  48. LeaseRevoke(lc *pb.LeaseRevokeRequest) (*pb.LeaseRevokeResponse, error)
  49. Alarm(*pb.AlarmRequest) (*pb.AlarmResponse, error)
  50. AuthEnable() (*pb.AuthEnableResponse, error)
  51. UserAdd(ua *pb.AuthUserAddRequest) (*pb.AuthUserAddResponse, error)
  52. }
  53. type applierV3backend struct {
  54. s *EtcdServer
  55. }
  56. func (s *EtcdServer) applyV3Request(r *pb.InternalRaftRequest) *applyResult {
  57. ar := &applyResult{}
  58. switch {
  59. case r.Range != nil:
  60. ar.resp, ar.err = s.applyV3.Range(noTxn, r.Range)
  61. case r.Put != nil:
  62. ar.resp, ar.err = s.applyV3.Put(noTxn, r.Put)
  63. case r.DeleteRange != nil:
  64. ar.resp, ar.err = s.applyV3.DeleteRange(noTxn, r.DeleteRange)
  65. case r.Txn != nil:
  66. ar.resp, ar.err = s.applyV3.Txn(r.Txn)
  67. case r.Compaction != nil:
  68. ar.resp, ar.physc, ar.err = s.applyV3.Compaction(r.Compaction)
  69. case r.LeaseCreate != nil:
  70. ar.resp, ar.err = s.applyV3.LeaseCreate(r.LeaseCreate)
  71. case r.LeaseRevoke != nil:
  72. ar.resp, ar.err = s.applyV3.LeaseRevoke(r.LeaseRevoke)
  73. case r.Alarm != nil:
  74. ar.resp, ar.err = s.applyV3.Alarm(r.Alarm)
  75. case r.AuthEnable != nil:
  76. ar.resp, ar.err = s.applyV3.AuthEnable()
  77. case r.AuthUserAdd != nil:
  78. ar.resp, ar.err = s.applyV3.UserAdd(r.AuthUserAdd)
  79. default:
  80. panic("not implemented")
  81. }
  82. return ar
  83. }
  84. func (a *applierV3backend) Put(txnID int64, p *pb.PutRequest) (*pb.PutResponse, error) {
  85. resp := &pb.PutResponse{}
  86. resp.Header = &pb.ResponseHeader{}
  87. var (
  88. rev int64
  89. err error
  90. )
  91. if txnID != noTxn {
  92. rev, err = a.s.KV().TxnPut(txnID, p.Key, p.Value, lease.LeaseID(p.Lease))
  93. if err != nil {
  94. return nil, err
  95. }
  96. } else {
  97. leaseID := lease.LeaseID(p.Lease)
  98. if leaseID != lease.NoLease {
  99. if l := a.s.lessor.Lookup(leaseID); l == nil {
  100. return nil, lease.ErrLeaseNotFound
  101. }
  102. }
  103. rev = a.s.KV().Put(p.Key, p.Value, leaseID)
  104. }
  105. resp.Header.Revision = rev
  106. return resp, nil
  107. }
  108. func (a *applierV3backend) DeleteRange(txnID int64, dr *pb.DeleteRangeRequest) (*pb.DeleteRangeResponse, error) {
  109. resp := &pb.DeleteRangeResponse{}
  110. resp.Header = &pb.ResponseHeader{}
  111. var (
  112. n int64
  113. rev int64
  114. err error
  115. )
  116. if isGteRange(dr.RangeEnd) {
  117. dr.RangeEnd = []byte{}
  118. }
  119. if txnID != noTxn {
  120. n, rev, err = a.s.KV().TxnDeleteRange(txnID, dr.Key, dr.RangeEnd)
  121. if err != nil {
  122. return nil, err
  123. }
  124. } else {
  125. n, rev = a.s.KV().DeleteRange(dr.Key, dr.RangeEnd)
  126. }
  127. resp.Deleted = n
  128. resp.Header.Revision = rev
  129. return resp, nil
  130. }
  131. func (a *applierV3backend) Range(txnID int64, r *pb.RangeRequest) (*pb.RangeResponse, error) {
  132. resp := &pb.RangeResponse{}
  133. resp.Header = &pb.ResponseHeader{}
  134. var (
  135. kvs []storagepb.KeyValue
  136. rev int64
  137. err error
  138. )
  139. if isGteRange(r.RangeEnd) {
  140. r.RangeEnd = []byte{}
  141. }
  142. limit := r.Limit
  143. if r.SortOrder != pb.RangeRequest_NONE {
  144. // fetch everything; sort and truncate afterwards
  145. limit = 0
  146. }
  147. if limit > 0 {
  148. // fetch one extra for 'more' flag
  149. limit = limit + 1
  150. }
  151. if txnID != noTxn {
  152. kvs, rev, err = a.s.KV().TxnRange(txnID, r.Key, r.RangeEnd, limit, r.Revision)
  153. if err != nil {
  154. return nil, err
  155. }
  156. } else {
  157. kvs, rev, err = a.s.KV().Range(r.Key, r.RangeEnd, limit, r.Revision)
  158. if err != nil {
  159. return nil, err
  160. }
  161. }
  162. if r.SortOrder != pb.RangeRequest_NONE {
  163. var sorter sort.Interface
  164. switch {
  165. case r.SortTarget == pb.RangeRequest_KEY:
  166. sorter = &kvSortByKey{&kvSort{kvs}}
  167. case r.SortTarget == pb.RangeRequest_VERSION:
  168. sorter = &kvSortByVersion{&kvSort{kvs}}
  169. case r.SortTarget == pb.RangeRequest_CREATE:
  170. sorter = &kvSortByCreate{&kvSort{kvs}}
  171. case r.SortTarget == pb.RangeRequest_MOD:
  172. sorter = &kvSortByMod{&kvSort{kvs}}
  173. case r.SortTarget == pb.RangeRequest_VALUE:
  174. sorter = &kvSortByValue{&kvSort{kvs}}
  175. }
  176. switch {
  177. case r.SortOrder == pb.RangeRequest_ASCEND:
  178. sort.Sort(sorter)
  179. case r.SortOrder == pb.RangeRequest_DESCEND:
  180. sort.Sort(sort.Reverse(sorter))
  181. }
  182. }
  183. if r.Limit > 0 && len(kvs) > int(r.Limit) {
  184. kvs = kvs[:r.Limit]
  185. resp.More = true
  186. }
  187. resp.Header.Revision = rev
  188. for i := range kvs {
  189. resp.Kvs = append(resp.Kvs, &kvs[i])
  190. }
  191. return resp, nil
  192. }
  193. func (a *applierV3backend) Txn(rt *pb.TxnRequest) (*pb.TxnResponse, error) {
  194. var revision int64
  195. ok := true
  196. for _, c := range rt.Compare {
  197. if revision, ok = a.applyCompare(c); !ok {
  198. break
  199. }
  200. }
  201. var reqs []*pb.RequestUnion
  202. if ok {
  203. reqs = rt.Success
  204. } else {
  205. reqs = rt.Failure
  206. }
  207. if err := a.checkRequestLeases(reqs); err != nil {
  208. return nil, err
  209. }
  210. if err := a.checkRequestRange(reqs); err != nil {
  211. return nil, err
  212. }
  213. // When executing the operations of txn, we need to hold the txn lock.
  214. // So the reader will not see any intermediate results.
  215. txnID := a.s.KV().TxnBegin()
  216. defer func() {
  217. err := a.s.KV().TxnEnd(txnID)
  218. if err != nil {
  219. panic(fmt.Sprint("unexpected error when closing txn", txnID))
  220. }
  221. }()
  222. resps := make([]*pb.ResponseUnion, len(reqs))
  223. for i := range reqs {
  224. resps[i] = a.applyUnion(txnID, reqs[i])
  225. }
  226. if len(resps) != 0 {
  227. revision += 1
  228. }
  229. txnResp := &pb.TxnResponse{}
  230. txnResp.Header = &pb.ResponseHeader{}
  231. txnResp.Header.Revision = revision
  232. txnResp.Responses = resps
  233. txnResp.Succeeded = ok
  234. return txnResp, nil
  235. }
  236. // applyCompare applies the compare request.
  237. // It returns the revision at which the comparison happens. If the comparison
  238. // succeeds, the it returns true. Otherwise it returns false.
  239. func (a *applierV3backend) applyCompare(c *pb.Compare) (int64, bool) {
  240. ckvs, rev, err := a.s.KV().Range(c.Key, nil, 1, 0)
  241. if err != nil {
  242. if err == dstorage.ErrTxnIDMismatch {
  243. panic("unexpected txn ID mismatch error")
  244. }
  245. return rev, false
  246. }
  247. var ckv storagepb.KeyValue
  248. if len(ckvs) != 0 {
  249. ckv = ckvs[0]
  250. } else {
  251. // Use the zero value of ckv normally. However...
  252. if c.Target == pb.Compare_VALUE {
  253. // Always fail if we're comparing a value on a key that doesn't exist.
  254. // We can treat non-existence as the empty set explicitly, such that
  255. // even a key with a value of length 0 bytes is still a real key
  256. // that was written that way
  257. return rev, false
  258. }
  259. }
  260. // -1 is less, 0 is equal, 1 is greater
  261. var result int
  262. switch c.Target {
  263. case pb.Compare_VALUE:
  264. tv, _ := c.TargetUnion.(*pb.Compare_Value)
  265. if tv != nil {
  266. result = bytes.Compare(ckv.Value, tv.Value)
  267. }
  268. case pb.Compare_CREATE:
  269. tv, _ := c.TargetUnion.(*pb.Compare_CreateRevision)
  270. if tv != nil {
  271. result = compareInt64(ckv.CreateRevision, tv.CreateRevision)
  272. }
  273. case pb.Compare_MOD:
  274. tv, _ := c.TargetUnion.(*pb.Compare_ModRevision)
  275. if tv != nil {
  276. result = compareInt64(ckv.ModRevision, tv.ModRevision)
  277. }
  278. case pb.Compare_VERSION:
  279. tv, _ := c.TargetUnion.(*pb.Compare_Version)
  280. if tv != nil {
  281. result = compareInt64(ckv.Version, tv.Version)
  282. }
  283. }
  284. switch c.Result {
  285. case pb.Compare_EQUAL:
  286. if result != 0 {
  287. return rev, false
  288. }
  289. case pb.Compare_GREATER:
  290. if result != 1 {
  291. return rev, false
  292. }
  293. case pb.Compare_LESS:
  294. if result != -1 {
  295. return rev, false
  296. }
  297. }
  298. return rev, true
  299. }
  300. func (a *applierV3backend) applyUnion(txnID int64, union *pb.RequestUnion) *pb.ResponseUnion {
  301. switch tv := union.Request.(type) {
  302. case *pb.RequestUnion_RequestRange:
  303. if tv.RequestRange != nil {
  304. resp, err := a.Range(txnID, tv.RequestRange)
  305. if err != nil {
  306. panic("unexpected error during txn")
  307. }
  308. return &pb.ResponseUnion{Response: &pb.ResponseUnion_ResponseRange{ResponseRange: resp}}
  309. }
  310. case *pb.RequestUnion_RequestPut:
  311. if tv.RequestPut != nil {
  312. resp, err := a.Put(txnID, tv.RequestPut)
  313. if err != nil {
  314. panic("unexpected error during txn")
  315. }
  316. return &pb.ResponseUnion{Response: &pb.ResponseUnion_ResponsePut{ResponsePut: resp}}
  317. }
  318. case *pb.RequestUnion_RequestDeleteRange:
  319. if tv.RequestDeleteRange != nil {
  320. resp, err := a.DeleteRange(txnID, tv.RequestDeleteRange)
  321. if err != nil {
  322. panic("unexpected error during txn")
  323. }
  324. return &pb.ResponseUnion{Response: &pb.ResponseUnion_ResponseDeleteRange{ResponseDeleteRange: resp}}
  325. }
  326. default:
  327. // empty union
  328. return nil
  329. }
  330. return nil
  331. }
  332. func (a *applierV3backend) Compaction(compaction *pb.CompactionRequest) (*pb.CompactionResponse, <-chan struct{}, error) {
  333. resp := &pb.CompactionResponse{}
  334. resp.Header = &pb.ResponseHeader{}
  335. ch, err := a.s.KV().Compact(compaction.Revision)
  336. if err != nil {
  337. return nil, nil, err
  338. }
  339. // get the current revision. which key to get is not important.
  340. _, resp.Header.Revision, _ = a.s.KV().Range([]byte("compaction"), nil, 1, 0)
  341. return resp, ch, err
  342. }
  343. func (a *applierV3backend) LeaseCreate(lc *pb.LeaseCreateRequest) (*pb.LeaseCreateResponse, error) {
  344. l, err := a.s.lessor.Grant(lease.LeaseID(lc.ID), lc.TTL)
  345. resp := &pb.LeaseCreateResponse{}
  346. if err == nil {
  347. resp.ID = int64(l.ID)
  348. resp.TTL = l.TTL
  349. }
  350. return resp, err
  351. }
  352. func (a *applierV3backend) LeaseRevoke(lc *pb.LeaseRevokeRequest) (*pb.LeaseRevokeResponse, error) {
  353. err := a.s.lessor.Revoke(lease.LeaseID(lc.ID))
  354. return &pb.LeaseRevokeResponse{}, err
  355. }
  356. func (a *applierV3backend) Alarm(ar *pb.AlarmRequest) (*pb.AlarmResponse, error) {
  357. resp := &pb.AlarmResponse{}
  358. switch ar.Action {
  359. case pb.AlarmRequest_GET:
  360. resp.Alarms = a.s.alarmStore.Get(ar.Alarm)
  361. case pb.AlarmRequest_ACTIVATE:
  362. m := a.s.alarmStore.Activate(types.ID(ar.MemberID), ar.Alarm)
  363. if m == nil {
  364. break
  365. }
  366. resp.Alarms = append(resp.Alarms, m)
  367. switch m.Alarm {
  368. case pb.AlarmType_NOSPACE:
  369. if len(a.s.alarmStore.Get(m.Alarm)) == 1 {
  370. a.s.applyV3 = newApplierV3Capped(a)
  371. }
  372. default:
  373. plog.Warningf("unimplemented alarm activation (%+v)", m)
  374. }
  375. case pb.AlarmRequest_DEACTIVATE:
  376. m := a.s.alarmStore.Deactivate(types.ID(ar.MemberID), ar.Alarm)
  377. if m == nil {
  378. break
  379. }
  380. resp.Alarms = append(resp.Alarms, m)
  381. if m.Alarm == pb.AlarmType_NOSPACE && len(a.s.alarmStore.Get(ar.Alarm)) == 0 {
  382. a.s.applyV3 = newQuotaApplierV3(a.s, &applierV3backend{a.s})
  383. }
  384. default:
  385. return nil, nil
  386. }
  387. return resp, nil
  388. }
  389. type applierV3Capped struct {
  390. applierV3
  391. q backendQuota
  392. }
  393. // newApplierV3Capped creates an applyV3 that will reject Puts and transactions
  394. // with Puts so that the number of keys in the store is capped.
  395. func newApplierV3Capped(base applierV3) applierV3 { return &applierV3Capped{applierV3: base} }
  396. func (a *applierV3Capped) Put(txnID int64, p *pb.PutRequest) (*pb.PutResponse, error) {
  397. return nil, ErrNoSpace
  398. }
  399. func (a *applierV3Capped) Txn(r *pb.TxnRequest) (*pb.TxnResponse, error) {
  400. if a.q.Cost(r) > 0 {
  401. return nil, ErrNoSpace
  402. }
  403. return a.applierV3.Txn(r)
  404. }
  405. func (a *applierV3Capped) LeaseCreate(lc *pb.LeaseCreateRequest) (*pb.LeaseCreateResponse, error) {
  406. return nil, ErrNoSpace
  407. }
  408. func (a *applierV3backend) AuthEnable() (*pb.AuthEnableResponse, error) {
  409. a.s.AuthStore().AuthEnable()
  410. return &pb.AuthEnableResponse{}, nil
  411. }
  412. func (a *applierV3backend) UserAdd(r *pb.AuthUserAddRequest) (*pb.AuthUserAddResponse, error) {
  413. return a.s.AuthStore().UserAdd(r)
  414. }
  415. type quotaApplierV3 struct {
  416. applierV3
  417. q Quota
  418. }
  419. func newQuotaApplierV3(s *EtcdServer, app applierV3) applierV3 {
  420. return &quotaApplierV3{app, NewBackendQuota(s)}
  421. }
  422. func (a *quotaApplierV3) Put(txnID int64, p *pb.PutRequest) (*pb.PutResponse, error) {
  423. ok := a.q.Available(p)
  424. resp, err := a.applierV3.Put(txnID, p)
  425. if err == nil && !ok {
  426. err = ErrNoSpace
  427. }
  428. return resp, err
  429. }
  430. func (a *quotaApplierV3) Txn(rt *pb.TxnRequest) (*pb.TxnResponse, error) {
  431. ok := a.q.Available(rt)
  432. resp, err := a.applierV3.Txn(rt)
  433. if err == nil && !ok {
  434. err = ErrNoSpace
  435. }
  436. return resp, err
  437. }
  438. func (a *quotaApplierV3) LeaseCreate(lc *pb.LeaseCreateRequest) (*pb.LeaseCreateResponse, error) {
  439. ok := a.q.Available(lc)
  440. resp, err := a.applierV3.LeaseCreate(lc)
  441. if err == nil && !ok {
  442. err = ErrNoSpace
  443. }
  444. return resp, err
  445. }
  446. type kvSort struct{ kvs []storagepb.KeyValue }
  447. func (s *kvSort) Swap(i, j int) {
  448. t := s.kvs[i]
  449. s.kvs[i] = s.kvs[j]
  450. s.kvs[j] = t
  451. }
  452. func (s *kvSort) Len() int { return len(s.kvs) }
  453. type kvSortByKey struct{ *kvSort }
  454. func (s *kvSortByKey) Less(i, j int) bool {
  455. return bytes.Compare(s.kvs[i].Key, s.kvs[j].Key) < 0
  456. }
  457. type kvSortByVersion struct{ *kvSort }
  458. func (s *kvSortByVersion) Less(i, j int) bool {
  459. return (s.kvs[i].Version - s.kvs[j].Version) < 0
  460. }
  461. type kvSortByCreate struct{ *kvSort }
  462. func (s *kvSortByCreate) Less(i, j int) bool {
  463. return (s.kvs[i].CreateRevision - s.kvs[j].CreateRevision) < 0
  464. }
  465. type kvSortByMod struct{ *kvSort }
  466. func (s *kvSortByMod) Less(i, j int) bool {
  467. return (s.kvs[i].ModRevision - s.kvs[j].ModRevision) < 0
  468. }
  469. type kvSortByValue struct{ *kvSort }
  470. func (s *kvSortByValue) Less(i, j int) bool {
  471. return bytes.Compare(s.kvs[i].Value, s.kvs[j].Value) < 0
  472. }
  473. func (a *applierV3backend) checkRequestLeases(reqs []*pb.RequestUnion) error {
  474. for _, requ := range reqs {
  475. tv, ok := requ.Request.(*pb.RequestUnion_RequestPut)
  476. if !ok {
  477. continue
  478. }
  479. preq := tv.RequestPut
  480. if preq == nil || lease.LeaseID(preq.Lease) == lease.NoLease {
  481. continue
  482. }
  483. if l := a.s.lessor.Lookup(lease.LeaseID(preq.Lease)); l == nil {
  484. return lease.ErrLeaseNotFound
  485. }
  486. }
  487. return nil
  488. }
  489. func (a *applierV3backend) checkRequestRange(reqs []*pb.RequestUnion) error {
  490. for _, requ := range reqs {
  491. tv, ok := requ.Request.(*pb.RequestUnion_RequestRange)
  492. if !ok {
  493. continue
  494. }
  495. greq := tv.RequestRange
  496. if greq == nil || greq.Revision == 0 {
  497. continue
  498. }
  499. if greq.Revision > a.s.KV().Rev() {
  500. return dstorage.ErrFutureRev
  501. }
  502. if greq.Revision < a.s.KV().FirstRev() {
  503. return dstorage.ErrCompacted
  504. }
  505. }
  506. return nil
  507. }
  508. func compareInt64(a, b int64) int {
  509. switch {
  510. case a < b:
  511. return -1
  512. case a > b:
  513. return 1
  514. default:
  515. return 0
  516. }
  517. }
  518. // isGteRange determines if the range end is a >= range. This works around grpc
  519. // sending empty byte strings as nil; >= is encoded in the range end as '\0'.
  520. func isGteRange(rangeEnd []byte) bool {
  521. return len(rangeEnd) == 1 && rangeEnd[0] == 0
  522. }