apply.go 30 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037
  1. // Copyright 2016 The etcd Authors
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package etcdserver
  15. import (
  16. "bytes"
  17. "context"
  18. "fmt"
  19. "sort"
  20. "time"
  21. "go.etcd.io/etcd/auth"
  22. pb "go.etcd.io/etcd/etcdserver/etcdserverpb"
  23. "go.etcd.io/etcd/lease"
  24. "go.etcd.io/etcd/mvcc"
  25. "go.etcd.io/etcd/mvcc/mvccpb"
  26. "go.etcd.io/etcd/pkg/traceutil"
  27. "go.etcd.io/etcd/pkg/types"
  28. "github.com/gogo/protobuf/proto"
  29. "go.uber.org/zap"
  30. )
  31. const (
  32. warnApplyDuration = 100 * time.Millisecond
  33. )
  34. type applyResult struct {
  35. resp proto.Message
  36. err error
  37. // physc signals the physical effect of the request has completed in addition
  38. // to being logically reflected by the node. Currently only used for
  39. // Compaction requests.
  40. physc <-chan struct{}
  41. trace *traceutil.Trace
  42. }
  43. // applierV3 is the interface for processing V3 raft messages
  44. type applierV3 interface {
  45. Apply(r *pb.InternalRaftRequest) *applyResult
  46. Put(txn mvcc.TxnWrite, p *pb.PutRequest) (*pb.PutResponse, *traceutil.Trace, error)
  47. Range(ctx context.Context, txn mvcc.TxnRead, r *pb.RangeRequest) (*pb.RangeResponse, error)
  48. DeleteRange(txn mvcc.TxnWrite, dr *pb.DeleteRangeRequest) (*pb.DeleteRangeResponse, error)
  49. Txn(rt *pb.TxnRequest) (*pb.TxnResponse, error)
  50. Compaction(compaction *pb.CompactionRequest) (*pb.CompactionResponse, <-chan struct{}, *traceutil.Trace, error)
  51. LeaseGrant(lc *pb.LeaseGrantRequest) (*pb.LeaseGrantResponse, error)
  52. LeaseRevoke(lc *pb.LeaseRevokeRequest) (*pb.LeaseRevokeResponse, error)
  53. LeaseCheckpoint(lc *pb.LeaseCheckpointRequest) (*pb.LeaseCheckpointResponse, error)
  54. Alarm(*pb.AlarmRequest) (*pb.AlarmResponse, error)
  55. Authenticate(r *pb.InternalAuthenticateRequest) (*pb.AuthenticateResponse, error)
  56. AuthEnable() (*pb.AuthEnableResponse, error)
  57. AuthDisable() (*pb.AuthDisableResponse, error)
  58. UserAdd(ua *pb.AuthUserAddRequest) (*pb.AuthUserAddResponse, error)
  59. UserDelete(ua *pb.AuthUserDeleteRequest) (*pb.AuthUserDeleteResponse, error)
  60. UserChangePassword(ua *pb.AuthUserChangePasswordRequest) (*pb.AuthUserChangePasswordResponse, error)
  61. UserGrantRole(ua *pb.AuthUserGrantRoleRequest) (*pb.AuthUserGrantRoleResponse, error)
  62. UserGet(ua *pb.AuthUserGetRequest) (*pb.AuthUserGetResponse, error)
  63. UserRevokeRole(ua *pb.AuthUserRevokeRoleRequest) (*pb.AuthUserRevokeRoleResponse, error)
  64. RoleAdd(ua *pb.AuthRoleAddRequest) (*pb.AuthRoleAddResponse, error)
  65. RoleGrantPermission(ua *pb.AuthRoleGrantPermissionRequest) (*pb.AuthRoleGrantPermissionResponse, error)
  66. RoleGet(ua *pb.AuthRoleGetRequest) (*pb.AuthRoleGetResponse, error)
  67. RoleRevokePermission(ua *pb.AuthRoleRevokePermissionRequest) (*pb.AuthRoleRevokePermissionResponse, error)
  68. RoleDelete(ua *pb.AuthRoleDeleteRequest) (*pb.AuthRoleDeleteResponse, error)
  69. UserList(ua *pb.AuthUserListRequest) (*pb.AuthUserListResponse, error)
  70. RoleList(ua *pb.AuthRoleListRequest) (*pb.AuthRoleListResponse, error)
  71. }
  72. type checkReqFunc func(mvcc.ReadView, *pb.RequestOp) error
  73. type applierV3backend struct {
  74. s *EtcdServer
  75. checkPut checkReqFunc
  76. checkRange checkReqFunc
  77. }
  78. func (s *EtcdServer) newApplierV3Backend() applierV3 {
  79. base := &applierV3backend{s: s}
  80. base.checkPut = func(rv mvcc.ReadView, req *pb.RequestOp) error {
  81. return base.checkRequestPut(rv, req)
  82. }
  83. base.checkRange = func(rv mvcc.ReadView, req *pb.RequestOp) error {
  84. return base.checkRequestRange(rv, req)
  85. }
  86. return base
  87. }
  88. func (s *EtcdServer) newApplierV3() applierV3 {
  89. return newAuthApplierV3(
  90. s.AuthStore(),
  91. newQuotaApplierV3(s, s.newApplierV3Backend()),
  92. s.lessor,
  93. )
  94. }
  95. func (a *applierV3backend) Apply(r *pb.InternalRaftRequest) *applyResult {
  96. ar := &applyResult{}
  97. defer func(start time.Time) {
  98. warnOfExpensiveRequest(a.s.getLogger(), start, &pb.InternalRaftStringer{Request: r}, ar.resp, ar.err)
  99. }(time.Now())
  100. // call into a.s.applyV3.F instead of a.F so upper appliers can check individual calls
  101. switch {
  102. case r.Range != nil:
  103. ar.resp, ar.err = a.s.applyV3.Range(context.TODO(), nil, r.Range)
  104. case r.Put != nil:
  105. ar.resp, ar.trace, ar.err = a.s.applyV3.Put(nil, r.Put)
  106. case r.DeleteRange != nil:
  107. ar.resp, ar.err = a.s.applyV3.DeleteRange(nil, r.DeleteRange)
  108. case r.Txn != nil:
  109. ar.resp, ar.err = a.s.applyV3.Txn(r.Txn)
  110. case r.Compaction != nil:
  111. ar.resp, ar.physc, ar.trace, ar.err = a.s.applyV3.Compaction(r.Compaction)
  112. case r.LeaseGrant != nil:
  113. ar.resp, ar.err = a.s.applyV3.LeaseGrant(r.LeaseGrant)
  114. case r.LeaseRevoke != nil:
  115. ar.resp, ar.err = a.s.applyV3.LeaseRevoke(r.LeaseRevoke)
  116. case r.LeaseCheckpoint != nil:
  117. ar.resp, ar.err = a.s.applyV3.LeaseCheckpoint(r.LeaseCheckpoint)
  118. case r.Alarm != nil:
  119. ar.resp, ar.err = a.s.applyV3.Alarm(r.Alarm)
  120. case r.Authenticate != nil:
  121. ar.resp, ar.err = a.s.applyV3.Authenticate(r.Authenticate)
  122. case r.AuthEnable != nil:
  123. ar.resp, ar.err = a.s.applyV3.AuthEnable()
  124. case r.AuthDisable != nil:
  125. ar.resp, ar.err = a.s.applyV3.AuthDisable()
  126. case r.AuthUserAdd != nil:
  127. ar.resp, ar.err = a.s.applyV3.UserAdd(r.AuthUserAdd)
  128. case r.AuthUserDelete != nil:
  129. ar.resp, ar.err = a.s.applyV3.UserDelete(r.AuthUserDelete)
  130. case r.AuthUserChangePassword != nil:
  131. ar.resp, ar.err = a.s.applyV3.UserChangePassword(r.AuthUserChangePassword)
  132. case r.AuthUserGrantRole != nil:
  133. ar.resp, ar.err = a.s.applyV3.UserGrantRole(r.AuthUserGrantRole)
  134. case r.AuthUserGet != nil:
  135. ar.resp, ar.err = a.s.applyV3.UserGet(r.AuthUserGet)
  136. case r.AuthUserRevokeRole != nil:
  137. ar.resp, ar.err = a.s.applyV3.UserRevokeRole(r.AuthUserRevokeRole)
  138. case r.AuthRoleAdd != nil:
  139. ar.resp, ar.err = a.s.applyV3.RoleAdd(r.AuthRoleAdd)
  140. case r.AuthRoleGrantPermission != nil:
  141. ar.resp, ar.err = a.s.applyV3.RoleGrantPermission(r.AuthRoleGrantPermission)
  142. case r.AuthRoleGet != nil:
  143. ar.resp, ar.err = a.s.applyV3.RoleGet(r.AuthRoleGet)
  144. case r.AuthRoleRevokePermission != nil:
  145. ar.resp, ar.err = a.s.applyV3.RoleRevokePermission(r.AuthRoleRevokePermission)
  146. case r.AuthRoleDelete != nil:
  147. ar.resp, ar.err = a.s.applyV3.RoleDelete(r.AuthRoleDelete)
  148. case r.AuthUserList != nil:
  149. ar.resp, ar.err = a.s.applyV3.UserList(r.AuthUserList)
  150. case r.AuthRoleList != nil:
  151. ar.resp, ar.err = a.s.applyV3.RoleList(r.AuthRoleList)
  152. default:
  153. panic("not implemented")
  154. }
  155. return ar
  156. }
  157. func (a *applierV3backend) Put(txn mvcc.TxnWrite, p *pb.PutRequest) (resp *pb.PutResponse, trace *traceutil.Trace, err error) {
  158. resp = &pb.PutResponse{}
  159. resp.Header = &pb.ResponseHeader{}
  160. trace = traceutil.New("put",
  161. a.s.getLogger(),
  162. traceutil.Field{Key: "key", Value: string(p.Key)},
  163. traceutil.Field{Key: "req_size", Value: proto.Size(p)},
  164. )
  165. val, leaseID := p.Value, lease.LeaseID(p.Lease)
  166. if txn == nil {
  167. if leaseID != lease.NoLease {
  168. if l := a.s.lessor.Lookup(leaseID); l == nil {
  169. return nil, nil, lease.ErrLeaseNotFound
  170. }
  171. }
  172. txn = a.s.KV().Write(trace)
  173. defer txn.End()
  174. }
  175. var rr *mvcc.RangeResult
  176. if p.IgnoreValue || p.IgnoreLease || p.PrevKv {
  177. trace.DisableStep()
  178. rr, err = txn.Range(p.Key, nil, mvcc.RangeOptions{})
  179. if err != nil {
  180. return nil, nil, err
  181. }
  182. trace.EnableStep()
  183. trace.Step("get previous kv pair")
  184. }
  185. if p.IgnoreValue || p.IgnoreLease {
  186. if rr == nil || len(rr.KVs) == 0 {
  187. // ignore_{lease,value} flag expects previous key-value pair
  188. return nil, nil, ErrKeyNotFound
  189. }
  190. }
  191. if p.IgnoreValue {
  192. val = rr.KVs[0].Value
  193. }
  194. if p.IgnoreLease {
  195. leaseID = lease.LeaseID(rr.KVs[0].Lease)
  196. }
  197. if p.PrevKv {
  198. if rr != nil && len(rr.KVs) != 0 {
  199. resp.PrevKv = &rr.KVs[0]
  200. }
  201. }
  202. resp.Header.Revision = txn.Put(p.Key, val, leaseID)
  203. trace.AddField(traceutil.Field{Key: "response_revision", Value: resp.Header.Revision})
  204. return resp, trace, nil
  205. }
  206. func (a *applierV3backend) DeleteRange(txn mvcc.TxnWrite, dr *pb.DeleteRangeRequest) (*pb.DeleteRangeResponse, error) {
  207. resp := &pb.DeleteRangeResponse{}
  208. resp.Header = &pb.ResponseHeader{}
  209. end := mkGteRange(dr.RangeEnd)
  210. if txn == nil {
  211. txn = a.s.kv.Write(traceutil.TODO())
  212. defer txn.End()
  213. }
  214. if dr.PrevKv {
  215. rr, err := txn.Range(dr.Key, end, mvcc.RangeOptions{})
  216. if err != nil {
  217. return nil, err
  218. }
  219. if rr != nil {
  220. resp.PrevKvs = make([]*mvccpb.KeyValue, len(rr.KVs))
  221. for i := range rr.KVs {
  222. resp.PrevKvs[i] = &rr.KVs[i]
  223. }
  224. }
  225. }
  226. resp.Deleted, resp.Header.Revision = txn.DeleteRange(dr.Key, end)
  227. return resp, nil
  228. }
  229. func (a *applierV3backend) Range(ctx context.Context, txn mvcc.TxnRead, r *pb.RangeRequest) (*pb.RangeResponse, error) {
  230. trace := traceutil.Get(ctx)
  231. resp := &pb.RangeResponse{}
  232. resp.Header = &pb.ResponseHeader{}
  233. if txn == nil {
  234. txn = a.s.kv.Read(trace)
  235. defer txn.End()
  236. }
  237. limit := r.Limit
  238. if r.SortOrder != pb.RangeRequest_NONE ||
  239. r.MinModRevision != 0 || r.MaxModRevision != 0 ||
  240. r.MinCreateRevision != 0 || r.MaxCreateRevision != 0 {
  241. // fetch everything; sort and truncate afterwards
  242. limit = 0
  243. }
  244. if limit > 0 {
  245. // fetch one extra for 'more' flag
  246. limit = limit + 1
  247. }
  248. ro := mvcc.RangeOptions{
  249. Limit: limit,
  250. Rev: r.Revision,
  251. Count: r.CountOnly,
  252. }
  253. rr, err := txn.Range(r.Key, mkGteRange(r.RangeEnd), ro)
  254. if err != nil {
  255. return nil, err
  256. }
  257. if r.MaxModRevision != 0 {
  258. f := func(kv *mvccpb.KeyValue) bool { return kv.ModRevision > r.MaxModRevision }
  259. pruneKVs(rr, f)
  260. }
  261. if r.MinModRevision != 0 {
  262. f := func(kv *mvccpb.KeyValue) bool { return kv.ModRevision < r.MinModRevision }
  263. pruneKVs(rr, f)
  264. }
  265. if r.MaxCreateRevision != 0 {
  266. f := func(kv *mvccpb.KeyValue) bool { return kv.CreateRevision > r.MaxCreateRevision }
  267. pruneKVs(rr, f)
  268. }
  269. if r.MinCreateRevision != 0 {
  270. f := func(kv *mvccpb.KeyValue) bool { return kv.CreateRevision < r.MinCreateRevision }
  271. pruneKVs(rr, f)
  272. }
  273. sortOrder := r.SortOrder
  274. if r.SortTarget != pb.RangeRequest_KEY && sortOrder == pb.RangeRequest_NONE {
  275. // Since current mvcc.Range implementation returns results
  276. // sorted by keys in lexiographically ascending order,
  277. // sort ASCEND by default only when target is not 'KEY'
  278. sortOrder = pb.RangeRequest_ASCEND
  279. }
  280. if sortOrder != pb.RangeRequest_NONE {
  281. var sorter sort.Interface
  282. switch {
  283. case r.SortTarget == pb.RangeRequest_KEY:
  284. sorter = &kvSortByKey{&kvSort{rr.KVs}}
  285. case r.SortTarget == pb.RangeRequest_VERSION:
  286. sorter = &kvSortByVersion{&kvSort{rr.KVs}}
  287. case r.SortTarget == pb.RangeRequest_CREATE:
  288. sorter = &kvSortByCreate{&kvSort{rr.KVs}}
  289. case r.SortTarget == pb.RangeRequest_MOD:
  290. sorter = &kvSortByMod{&kvSort{rr.KVs}}
  291. case r.SortTarget == pb.RangeRequest_VALUE:
  292. sorter = &kvSortByValue{&kvSort{rr.KVs}}
  293. }
  294. switch {
  295. case sortOrder == pb.RangeRequest_ASCEND:
  296. sort.Sort(sorter)
  297. case sortOrder == pb.RangeRequest_DESCEND:
  298. sort.Sort(sort.Reverse(sorter))
  299. }
  300. }
  301. if r.Limit > 0 && len(rr.KVs) > int(r.Limit) {
  302. rr.KVs = rr.KVs[:r.Limit]
  303. resp.More = true
  304. }
  305. trace.Step("filter and sort the key-value pairs")
  306. resp.Header.Revision = rr.Rev
  307. resp.Count = int64(rr.Count)
  308. resp.Kvs = make([]*mvccpb.KeyValue, len(rr.KVs))
  309. for i := range rr.KVs {
  310. if r.KeysOnly {
  311. rr.KVs[i].Value = nil
  312. }
  313. resp.Kvs[i] = &rr.KVs[i]
  314. }
  315. trace.Step("assemble the response")
  316. return resp, nil
  317. }
  318. func (a *applierV3backend) Txn(rt *pb.TxnRequest) (*pb.TxnResponse, error) {
  319. isWrite := !isTxnReadonly(rt)
  320. txn := mvcc.NewReadOnlyTxnWrite(a.s.KV().Read(traceutil.TODO()))
  321. txnPath := compareToPath(txn, rt)
  322. if isWrite {
  323. if _, err := checkRequests(txn, rt, txnPath, a.checkPut); err != nil {
  324. txn.End()
  325. return nil, err
  326. }
  327. }
  328. if _, err := checkRequests(txn, rt, txnPath, a.checkRange); err != nil {
  329. txn.End()
  330. return nil, err
  331. }
  332. txnResp, _ := newTxnResp(rt, txnPath)
  333. // When executing mutable txn ops, etcd must hold the txn lock so
  334. // readers do not see any intermediate results. Since writes are
  335. // serialized on the raft loop, the revision in the read view will
  336. // be the revision of the write txn.
  337. if isWrite {
  338. txn.End()
  339. txn = a.s.KV().Write(traceutil.TODO())
  340. }
  341. a.applyTxn(txn, rt, txnPath, txnResp)
  342. rev := txn.Rev()
  343. if len(txn.Changes()) != 0 {
  344. rev++
  345. }
  346. txn.End()
  347. txnResp.Header.Revision = rev
  348. return txnResp, nil
  349. }
  350. // newTxnResp allocates a txn response for a txn request given a path.
  351. func newTxnResp(rt *pb.TxnRequest, txnPath []bool) (txnResp *pb.TxnResponse, txnCount int) {
  352. reqs := rt.Success
  353. if !txnPath[0] {
  354. reqs = rt.Failure
  355. }
  356. resps := make([]*pb.ResponseOp, len(reqs))
  357. txnResp = &pb.TxnResponse{
  358. Responses: resps,
  359. Succeeded: txnPath[0],
  360. Header: &pb.ResponseHeader{},
  361. }
  362. for i, req := range reqs {
  363. switch tv := req.Request.(type) {
  364. case *pb.RequestOp_RequestRange:
  365. resps[i] = &pb.ResponseOp{Response: &pb.ResponseOp_ResponseRange{}}
  366. case *pb.RequestOp_RequestPut:
  367. resps[i] = &pb.ResponseOp{Response: &pb.ResponseOp_ResponsePut{}}
  368. case *pb.RequestOp_RequestDeleteRange:
  369. resps[i] = &pb.ResponseOp{Response: &pb.ResponseOp_ResponseDeleteRange{}}
  370. case *pb.RequestOp_RequestTxn:
  371. resp, txns := newTxnResp(tv.RequestTxn, txnPath[1:])
  372. resps[i] = &pb.ResponseOp{Response: &pb.ResponseOp_ResponseTxn{ResponseTxn: resp}}
  373. txnPath = txnPath[1+txns:]
  374. txnCount += txns + 1
  375. default:
  376. }
  377. }
  378. return txnResp, txnCount
  379. }
  380. func compareToPath(rv mvcc.ReadView, rt *pb.TxnRequest) []bool {
  381. txnPath := make([]bool, 1)
  382. ops := rt.Success
  383. if txnPath[0] = applyCompares(rv, rt.Compare); !txnPath[0] {
  384. ops = rt.Failure
  385. }
  386. for _, op := range ops {
  387. tv, ok := op.Request.(*pb.RequestOp_RequestTxn)
  388. if !ok || tv.RequestTxn == nil {
  389. continue
  390. }
  391. txnPath = append(txnPath, compareToPath(rv, tv.RequestTxn)...)
  392. }
  393. return txnPath
  394. }
  395. func applyCompares(rv mvcc.ReadView, cmps []*pb.Compare) bool {
  396. for _, c := range cmps {
  397. if !applyCompare(rv, c) {
  398. return false
  399. }
  400. }
  401. return true
  402. }
  403. // applyCompare applies the compare request.
  404. // If the comparison succeeds, it returns true. Otherwise, returns false.
  405. func applyCompare(rv mvcc.ReadView, c *pb.Compare) bool {
  406. // TODO: possible optimizations
  407. // * chunk reads for large ranges to conserve memory
  408. // * rewrite rules for common patterns:
  409. // ex. "[a, b) createrev > 0" => "limit 1 /\ kvs > 0"
  410. // * caching
  411. rr, err := rv.Range(c.Key, mkGteRange(c.RangeEnd), mvcc.RangeOptions{})
  412. if err != nil {
  413. return false
  414. }
  415. if len(rr.KVs) == 0 {
  416. if c.Target == pb.Compare_VALUE {
  417. // Always fail if comparing a value on a key/keys that doesn't exist;
  418. // nil == empty string in grpc; no way to represent missing value
  419. return false
  420. }
  421. return compareKV(c, mvccpb.KeyValue{})
  422. }
  423. for _, kv := range rr.KVs {
  424. if !compareKV(c, kv) {
  425. return false
  426. }
  427. }
  428. return true
  429. }
  430. func compareKV(c *pb.Compare, ckv mvccpb.KeyValue) bool {
  431. var result int
  432. rev := int64(0)
  433. switch c.Target {
  434. case pb.Compare_VALUE:
  435. v := []byte{}
  436. if tv, _ := c.TargetUnion.(*pb.Compare_Value); tv != nil {
  437. v = tv.Value
  438. }
  439. result = bytes.Compare(ckv.Value, v)
  440. case pb.Compare_CREATE:
  441. if tv, _ := c.TargetUnion.(*pb.Compare_CreateRevision); tv != nil {
  442. rev = tv.CreateRevision
  443. }
  444. result = compareInt64(ckv.CreateRevision, rev)
  445. case pb.Compare_MOD:
  446. if tv, _ := c.TargetUnion.(*pb.Compare_ModRevision); tv != nil {
  447. rev = tv.ModRevision
  448. }
  449. result = compareInt64(ckv.ModRevision, rev)
  450. case pb.Compare_VERSION:
  451. if tv, _ := c.TargetUnion.(*pb.Compare_Version); tv != nil {
  452. rev = tv.Version
  453. }
  454. result = compareInt64(ckv.Version, rev)
  455. case pb.Compare_LEASE:
  456. if tv, _ := c.TargetUnion.(*pb.Compare_Lease); tv != nil {
  457. rev = tv.Lease
  458. }
  459. result = compareInt64(ckv.Lease, rev)
  460. }
  461. switch c.Result {
  462. case pb.Compare_EQUAL:
  463. return result == 0
  464. case pb.Compare_NOT_EQUAL:
  465. return result != 0
  466. case pb.Compare_GREATER:
  467. return result > 0
  468. case pb.Compare_LESS:
  469. return result < 0
  470. }
  471. return true
  472. }
  473. func (a *applierV3backend) applyTxn(txn mvcc.TxnWrite, rt *pb.TxnRequest, txnPath []bool, tresp *pb.TxnResponse) (txns int) {
  474. reqs := rt.Success
  475. if !txnPath[0] {
  476. reqs = rt.Failure
  477. }
  478. lg := a.s.getLogger()
  479. for i, req := range reqs {
  480. respi := tresp.Responses[i].Response
  481. switch tv := req.Request.(type) {
  482. case *pb.RequestOp_RequestRange:
  483. resp, err := a.Range(context.TODO(), txn, tv.RequestRange)
  484. if err != nil {
  485. if lg != nil {
  486. lg.Panic("unexpected error during txn", zap.Error(err))
  487. } else {
  488. plog.Panicf("unexpected error during txn: %v", err)
  489. }
  490. }
  491. respi.(*pb.ResponseOp_ResponseRange).ResponseRange = resp
  492. case *pb.RequestOp_RequestPut:
  493. resp, _, err := a.Put(txn, tv.RequestPut)
  494. if err != nil {
  495. if lg != nil {
  496. lg.Panic("unexpected error during txn", zap.Error(err))
  497. } else {
  498. plog.Panicf("unexpected error during txn: %v", err)
  499. }
  500. }
  501. respi.(*pb.ResponseOp_ResponsePut).ResponsePut = resp
  502. case *pb.RequestOp_RequestDeleteRange:
  503. resp, err := a.DeleteRange(txn, tv.RequestDeleteRange)
  504. if err != nil {
  505. if lg != nil {
  506. lg.Panic("unexpected error during txn", zap.Error(err))
  507. } else {
  508. plog.Panicf("unexpected error during txn: %v", err)
  509. }
  510. }
  511. respi.(*pb.ResponseOp_ResponseDeleteRange).ResponseDeleteRange = resp
  512. case *pb.RequestOp_RequestTxn:
  513. resp := respi.(*pb.ResponseOp_ResponseTxn).ResponseTxn
  514. applyTxns := a.applyTxn(txn, tv.RequestTxn, txnPath[1:], resp)
  515. txns += applyTxns + 1
  516. txnPath = txnPath[applyTxns+1:]
  517. default:
  518. // empty union
  519. }
  520. }
  521. return txns
  522. }
  523. func (a *applierV3backend) Compaction(compaction *pb.CompactionRequest) (*pb.CompactionResponse, <-chan struct{}, *traceutil.Trace, error) {
  524. resp := &pb.CompactionResponse{}
  525. resp.Header = &pb.ResponseHeader{}
  526. trace := traceutil.New("compact",
  527. a.s.getLogger(),
  528. traceutil.Field{Key: "revision", Value: compaction.Revision},
  529. )
  530. ch, err := a.s.KV().Compact(trace, compaction.Revision)
  531. if err != nil {
  532. return nil, ch, nil, err
  533. }
  534. // get the current revision. which key to get is not important.
  535. rr, _ := a.s.KV().Range([]byte("compaction"), nil, mvcc.RangeOptions{})
  536. resp.Header.Revision = rr.Rev
  537. return resp, ch, trace, err
  538. }
  539. func (a *applierV3backend) LeaseGrant(lc *pb.LeaseGrantRequest) (*pb.LeaseGrantResponse, error) {
  540. l, err := a.s.lessor.Grant(lease.LeaseID(lc.ID), lc.TTL)
  541. resp := &pb.LeaseGrantResponse{}
  542. if err == nil {
  543. resp.ID = int64(l.ID)
  544. resp.TTL = l.TTL()
  545. resp.Header = newHeader(a.s)
  546. }
  547. return resp, err
  548. }
  549. func (a *applierV3backend) LeaseRevoke(lc *pb.LeaseRevokeRequest) (*pb.LeaseRevokeResponse, error) {
  550. err := a.s.lessor.Revoke(lease.LeaseID(lc.ID))
  551. return &pb.LeaseRevokeResponse{Header: newHeader(a.s)}, err
  552. }
  553. func (a *applierV3backend) LeaseCheckpoint(lc *pb.LeaseCheckpointRequest) (*pb.LeaseCheckpointResponse, error) {
  554. for _, c := range lc.Checkpoints {
  555. err := a.s.lessor.Checkpoint(lease.LeaseID(c.ID), c.Remaining_TTL)
  556. if err != nil {
  557. return &pb.LeaseCheckpointResponse{Header: newHeader(a.s)}, err
  558. }
  559. }
  560. return &pb.LeaseCheckpointResponse{Header: newHeader(a.s)}, nil
  561. }
  562. func (a *applierV3backend) Alarm(ar *pb.AlarmRequest) (*pb.AlarmResponse, error) {
  563. resp := &pb.AlarmResponse{}
  564. oldCount := len(a.s.alarmStore.Get(ar.Alarm))
  565. lg := a.s.getLogger()
  566. switch ar.Action {
  567. case pb.AlarmRequest_GET:
  568. resp.Alarms = a.s.alarmStore.Get(ar.Alarm)
  569. case pb.AlarmRequest_ACTIVATE:
  570. m := a.s.alarmStore.Activate(types.ID(ar.MemberID), ar.Alarm)
  571. if m == nil {
  572. break
  573. }
  574. resp.Alarms = append(resp.Alarms, m)
  575. activated := oldCount == 0 && len(a.s.alarmStore.Get(m.Alarm)) == 1
  576. if !activated {
  577. break
  578. }
  579. if lg != nil {
  580. lg.Warn("alarm raised", zap.String("alarm", m.Alarm.String()), zap.String("from", types.ID(m.MemberID).String()))
  581. } else {
  582. plog.Warningf("alarm %v raised by peer %s", m.Alarm, types.ID(m.MemberID))
  583. }
  584. switch m.Alarm {
  585. case pb.AlarmType_CORRUPT:
  586. a.s.applyV3 = newApplierV3Corrupt(a)
  587. case pb.AlarmType_NOSPACE:
  588. a.s.applyV3 = newApplierV3Capped(a)
  589. default:
  590. if lg != nil {
  591. lg.Warn("unimplemented alarm activation", zap.String("alarm", fmt.Sprintf("%+v", m)))
  592. } else {
  593. plog.Errorf("unimplemented alarm activation (%+v)", m)
  594. }
  595. }
  596. case pb.AlarmRequest_DEACTIVATE:
  597. m := a.s.alarmStore.Deactivate(types.ID(ar.MemberID), ar.Alarm)
  598. if m == nil {
  599. break
  600. }
  601. resp.Alarms = append(resp.Alarms, m)
  602. deactivated := oldCount > 0 && len(a.s.alarmStore.Get(ar.Alarm)) == 0
  603. if !deactivated {
  604. break
  605. }
  606. switch m.Alarm {
  607. case pb.AlarmType_NOSPACE, pb.AlarmType_CORRUPT:
  608. // TODO: check kv hash before deactivating CORRUPT?
  609. if lg != nil {
  610. lg.Warn("alarm disarmed", zap.String("alarm", m.Alarm.String()), zap.String("from", types.ID(m.MemberID).String()))
  611. } else {
  612. plog.Infof("alarm disarmed %+v", ar)
  613. }
  614. a.s.applyV3 = a.s.newApplierV3()
  615. default:
  616. if lg != nil {
  617. lg.Warn("unimplemented alarm deactivation", zap.String("alarm", fmt.Sprintf("%+v", m)))
  618. } else {
  619. plog.Errorf("unimplemented alarm deactivation (%+v)", m)
  620. }
  621. }
  622. default:
  623. return nil, nil
  624. }
  625. return resp, nil
  626. }
  627. type applierV3Capped struct {
  628. applierV3
  629. q backendQuota
  630. }
  631. // newApplierV3Capped creates an applyV3 that will reject Puts and transactions
  632. // with Puts so that the number of keys in the store is capped.
  633. func newApplierV3Capped(base applierV3) applierV3 { return &applierV3Capped{applierV3: base} }
  634. func (a *applierV3Capped) Put(txn mvcc.TxnWrite, p *pb.PutRequest) (*pb.PutResponse, *traceutil.Trace, error) {
  635. return nil, nil, ErrNoSpace
  636. }
  637. func (a *applierV3Capped) Txn(r *pb.TxnRequest) (*pb.TxnResponse, error) {
  638. if a.q.Cost(r) > 0 {
  639. return nil, ErrNoSpace
  640. }
  641. return a.applierV3.Txn(r)
  642. }
  643. func (a *applierV3Capped) LeaseGrant(lc *pb.LeaseGrantRequest) (*pb.LeaseGrantResponse, error) {
  644. return nil, ErrNoSpace
  645. }
  646. func (a *applierV3backend) AuthEnable() (*pb.AuthEnableResponse, error) {
  647. err := a.s.AuthStore().AuthEnable()
  648. if err != nil {
  649. return nil, err
  650. }
  651. return &pb.AuthEnableResponse{Header: newHeader(a.s)}, nil
  652. }
  653. func (a *applierV3backend) AuthDisable() (*pb.AuthDisableResponse, error) {
  654. a.s.AuthStore().AuthDisable()
  655. return &pb.AuthDisableResponse{Header: newHeader(a.s)}, nil
  656. }
  657. func (a *applierV3backend) Authenticate(r *pb.InternalAuthenticateRequest) (*pb.AuthenticateResponse, error) {
  658. ctx := context.WithValue(context.WithValue(a.s.ctx, auth.AuthenticateParamIndex{}, a.s.consistIndex.ConsistentIndex()), auth.AuthenticateParamSimpleTokenPrefix{}, r.SimpleToken)
  659. resp, err := a.s.AuthStore().Authenticate(ctx, r.Name, r.Password)
  660. if resp != nil {
  661. resp.Header = newHeader(a.s)
  662. }
  663. return resp, err
  664. }
  665. func (a *applierV3backend) UserAdd(r *pb.AuthUserAddRequest) (*pb.AuthUserAddResponse, error) {
  666. resp, err := a.s.AuthStore().UserAdd(r)
  667. if resp != nil {
  668. resp.Header = newHeader(a.s)
  669. }
  670. return resp, err
  671. }
  672. func (a *applierV3backend) UserDelete(r *pb.AuthUserDeleteRequest) (*pb.AuthUserDeleteResponse, error) {
  673. resp, err := a.s.AuthStore().UserDelete(r)
  674. if resp != nil {
  675. resp.Header = newHeader(a.s)
  676. }
  677. return resp, err
  678. }
  679. func (a *applierV3backend) UserChangePassword(r *pb.AuthUserChangePasswordRequest) (*pb.AuthUserChangePasswordResponse, error) {
  680. resp, err := a.s.AuthStore().UserChangePassword(r)
  681. if resp != nil {
  682. resp.Header = newHeader(a.s)
  683. }
  684. return resp, err
  685. }
  686. func (a *applierV3backend) UserGrantRole(r *pb.AuthUserGrantRoleRequest) (*pb.AuthUserGrantRoleResponse, error) {
  687. resp, err := a.s.AuthStore().UserGrantRole(r)
  688. if resp != nil {
  689. resp.Header = newHeader(a.s)
  690. }
  691. return resp, err
  692. }
  693. func (a *applierV3backend) UserGet(r *pb.AuthUserGetRequest) (*pb.AuthUserGetResponse, error) {
  694. resp, err := a.s.AuthStore().UserGet(r)
  695. if resp != nil {
  696. resp.Header = newHeader(a.s)
  697. }
  698. return resp, err
  699. }
  700. func (a *applierV3backend) UserRevokeRole(r *pb.AuthUserRevokeRoleRequest) (*pb.AuthUserRevokeRoleResponse, error) {
  701. resp, err := a.s.AuthStore().UserRevokeRole(r)
  702. if resp != nil {
  703. resp.Header = newHeader(a.s)
  704. }
  705. return resp, err
  706. }
  707. func (a *applierV3backend) RoleAdd(r *pb.AuthRoleAddRequest) (*pb.AuthRoleAddResponse, error) {
  708. resp, err := a.s.AuthStore().RoleAdd(r)
  709. if resp != nil {
  710. resp.Header = newHeader(a.s)
  711. }
  712. return resp, err
  713. }
  714. func (a *applierV3backend) RoleGrantPermission(r *pb.AuthRoleGrantPermissionRequest) (*pb.AuthRoleGrantPermissionResponse, error) {
  715. resp, err := a.s.AuthStore().RoleGrantPermission(r)
  716. if resp != nil {
  717. resp.Header = newHeader(a.s)
  718. }
  719. return resp, err
  720. }
  721. func (a *applierV3backend) RoleGet(r *pb.AuthRoleGetRequest) (*pb.AuthRoleGetResponse, error) {
  722. resp, err := a.s.AuthStore().RoleGet(r)
  723. if resp != nil {
  724. resp.Header = newHeader(a.s)
  725. }
  726. return resp, err
  727. }
  728. func (a *applierV3backend) RoleRevokePermission(r *pb.AuthRoleRevokePermissionRequest) (*pb.AuthRoleRevokePermissionResponse, error) {
  729. resp, err := a.s.AuthStore().RoleRevokePermission(r)
  730. if resp != nil {
  731. resp.Header = newHeader(a.s)
  732. }
  733. return resp, err
  734. }
  735. func (a *applierV3backend) RoleDelete(r *pb.AuthRoleDeleteRequest) (*pb.AuthRoleDeleteResponse, error) {
  736. resp, err := a.s.AuthStore().RoleDelete(r)
  737. if resp != nil {
  738. resp.Header = newHeader(a.s)
  739. }
  740. return resp, err
  741. }
  742. func (a *applierV3backend) UserList(r *pb.AuthUserListRequest) (*pb.AuthUserListResponse, error) {
  743. resp, err := a.s.AuthStore().UserList(r)
  744. if resp != nil {
  745. resp.Header = newHeader(a.s)
  746. }
  747. return resp, err
  748. }
  749. func (a *applierV3backend) RoleList(r *pb.AuthRoleListRequest) (*pb.AuthRoleListResponse, error) {
  750. resp, err := a.s.AuthStore().RoleList(r)
  751. if resp != nil {
  752. resp.Header = newHeader(a.s)
  753. }
  754. return resp, err
  755. }
  756. type quotaApplierV3 struct {
  757. applierV3
  758. q Quota
  759. }
  760. func newQuotaApplierV3(s *EtcdServer, app applierV3) applierV3 {
  761. return &quotaApplierV3{app, NewBackendQuota(s, "v3-applier")}
  762. }
  763. func (a *quotaApplierV3) Put(txn mvcc.TxnWrite, p *pb.PutRequest) (*pb.PutResponse, *traceutil.Trace, error) {
  764. ok := a.q.Available(p)
  765. resp, trace, err := a.applierV3.Put(txn, p)
  766. if err == nil && !ok {
  767. err = ErrNoSpace
  768. }
  769. return resp, trace, err
  770. }
  771. func (a *quotaApplierV3) Txn(rt *pb.TxnRequest) (*pb.TxnResponse, error) {
  772. ok := a.q.Available(rt)
  773. resp, err := a.applierV3.Txn(rt)
  774. if err == nil && !ok {
  775. err = ErrNoSpace
  776. }
  777. return resp, err
  778. }
  779. func (a *quotaApplierV3) LeaseGrant(lc *pb.LeaseGrantRequest) (*pb.LeaseGrantResponse, error) {
  780. ok := a.q.Available(lc)
  781. resp, err := a.applierV3.LeaseGrant(lc)
  782. if err == nil && !ok {
  783. err = ErrNoSpace
  784. }
  785. return resp, err
  786. }
  787. type kvSort struct{ kvs []mvccpb.KeyValue }
  788. func (s *kvSort) Swap(i, j int) {
  789. t := s.kvs[i]
  790. s.kvs[i] = s.kvs[j]
  791. s.kvs[j] = t
  792. }
  793. func (s *kvSort) Len() int { return len(s.kvs) }
  794. type kvSortByKey struct{ *kvSort }
  795. func (s *kvSortByKey) Less(i, j int) bool {
  796. return bytes.Compare(s.kvs[i].Key, s.kvs[j].Key) < 0
  797. }
  798. type kvSortByVersion struct{ *kvSort }
  799. func (s *kvSortByVersion) Less(i, j int) bool {
  800. return (s.kvs[i].Version - s.kvs[j].Version) < 0
  801. }
  802. type kvSortByCreate struct{ *kvSort }
  803. func (s *kvSortByCreate) Less(i, j int) bool {
  804. return (s.kvs[i].CreateRevision - s.kvs[j].CreateRevision) < 0
  805. }
  806. type kvSortByMod struct{ *kvSort }
  807. func (s *kvSortByMod) Less(i, j int) bool {
  808. return (s.kvs[i].ModRevision - s.kvs[j].ModRevision) < 0
  809. }
  810. type kvSortByValue struct{ *kvSort }
  811. func (s *kvSortByValue) Less(i, j int) bool {
  812. return bytes.Compare(s.kvs[i].Value, s.kvs[j].Value) < 0
  813. }
  814. func checkRequests(rv mvcc.ReadView, rt *pb.TxnRequest, txnPath []bool, f checkReqFunc) (int, error) {
  815. txnCount := 0
  816. reqs := rt.Success
  817. if !txnPath[0] {
  818. reqs = rt.Failure
  819. }
  820. for _, req := range reqs {
  821. if tv, ok := req.Request.(*pb.RequestOp_RequestTxn); ok && tv.RequestTxn != nil {
  822. txns, err := checkRequests(rv, tv.RequestTxn, txnPath[1:], f)
  823. if err != nil {
  824. return 0, err
  825. }
  826. txnCount += txns + 1
  827. txnPath = txnPath[txns+1:]
  828. continue
  829. }
  830. if err := f(rv, req); err != nil {
  831. return 0, err
  832. }
  833. }
  834. return txnCount, nil
  835. }
  836. func (a *applierV3backend) checkRequestPut(rv mvcc.ReadView, reqOp *pb.RequestOp) error {
  837. tv, ok := reqOp.Request.(*pb.RequestOp_RequestPut)
  838. if !ok || tv.RequestPut == nil {
  839. return nil
  840. }
  841. req := tv.RequestPut
  842. if req.IgnoreValue || req.IgnoreLease {
  843. // expects previous key-value, error if not exist
  844. rr, err := rv.Range(req.Key, nil, mvcc.RangeOptions{})
  845. if err != nil {
  846. return err
  847. }
  848. if rr == nil || len(rr.KVs) == 0 {
  849. return ErrKeyNotFound
  850. }
  851. }
  852. if lease.LeaseID(req.Lease) != lease.NoLease {
  853. if l := a.s.lessor.Lookup(lease.LeaseID(req.Lease)); l == nil {
  854. return lease.ErrLeaseNotFound
  855. }
  856. }
  857. return nil
  858. }
  859. func (a *applierV3backend) checkRequestRange(rv mvcc.ReadView, reqOp *pb.RequestOp) error {
  860. tv, ok := reqOp.Request.(*pb.RequestOp_RequestRange)
  861. if !ok || tv.RequestRange == nil {
  862. return nil
  863. }
  864. req := tv.RequestRange
  865. switch {
  866. case req.Revision == 0:
  867. return nil
  868. case req.Revision > rv.Rev():
  869. return mvcc.ErrFutureRev
  870. case req.Revision < rv.FirstRev():
  871. return mvcc.ErrCompacted
  872. }
  873. return nil
  874. }
  875. func compareInt64(a, b int64) int {
  876. switch {
  877. case a < b:
  878. return -1
  879. case a > b:
  880. return 1
  881. default:
  882. return 0
  883. }
  884. }
  885. // mkGteRange determines if the range end is a >= range. This works around grpc
  886. // sending empty byte strings as nil; >= is encoded in the range end as '\0'.
  887. // If it is a GTE range, then []byte{} is returned to indicate the empty byte
  888. // string (vs nil being no byte string).
  889. func mkGteRange(rangeEnd []byte) []byte {
  890. if len(rangeEnd) == 1 && rangeEnd[0] == 0 {
  891. return []byte{}
  892. }
  893. return rangeEnd
  894. }
  895. func noSideEffect(r *pb.InternalRaftRequest) bool {
  896. return r.Range != nil || r.AuthUserGet != nil || r.AuthRoleGet != nil
  897. }
  898. func removeNeedlessRangeReqs(txn *pb.TxnRequest) {
  899. f := func(ops []*pb.RequestOp) []*pb.RequestOp {
  900. j := 0
  901. for i := 0; i < len(ops); i++ {
  902. if _, ok := ops[i].Request.(*pb.RequestOp_RequestRange); ok {
  903. continue
  904. }
  905. ops[j] = ops[i]
  906. j++
  907. }
  908. return ops[:j]
  909. }
  910. txn.Success = f(txn.Success)
  911. txn.Failure = f(txn.Failure)
  912. }
  913. func pruneKVs(rr *mvcc.RangeResult, isPrunable func(*mvccpb.KeyValue) bool) {
  914. j := 0
  915. for i := range rr.KVs {
  916. rr.KVs[j] = rr.KVs[i]
  917. if !isPrunable(&rr.KVs[i]) {
  918. j++
  919. }
  920. }
  921. rr.KVs = rr.KVs[:j]
  922. }
  923. func newHeader(s *EtcdServer) *pb.ResponseHeader {
  924. return &pb.ResponseHeader{
  925. ClusterId: uint64(s.Cluster().ID()),
  926. MemberId: uint64(s.ID()),
  927. Revision: s.KV().Rev(),
  928. RaftTerm: s.Term(),
  929. }
  930. }