v3demo_server.go 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602
  1. // Copyright 2015 CoreOS, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package etcdserver
  15. import (
  16. "bytes"
  17. "fmt"
  18. "sort"
  19. "time"
  20. "github.com/coreos/etcd/Godeps/_workspace/src/github.com/gogo/protobuf/proto"
  21. "github.com/coreos/etcd/Godeps/_workspace/src/golang.org/x/net/context"
  22. pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
  23. "github.com/coreos/etcd/lease"
  24. "github.com/coreos/etcd/lease/leasehttp"
  25. dstorage "github.com/coreos/etcd/storage"
  26. "github.com/coreos/etcd/storage/storagepb"
  27. )
  28. type RaftKV interface {
  29. Range(ctx context.Context, r *pb.RangeRequest) (*pb.RangeResponse, error)
  30. Put(ctx context.Context, r *pb.PutRequest) (*pb.PutResponse, error)
  31. DeleteRange(ctx context.Context, r *pb.DeleteRangeRequest) (*pb.DeleteRangeResponse, error)
  32. Txn(ctx context.Context, r *pb.TxnRequest) (*pb.TxnResponse, error)
  33. Compact(ctx context.Context, r *pb.CompactionRequest) (*pb.CompactionResponse, error)
  34. Hash(ctx context.Context, r *pb.HashRequest) (*pb.HashResponse, error)
  35. }
  36. type Lessor interface {
  37. // LeaseCreate sends LeaseCreate request to raft and apply it after committed.
  38. LeaseCreate(ctx context.Context, r *pb.LeaseCreateRequest) (*pb.LeaseCreateResponse, error)
  39. // LeaseRevoke sends LeaseRevoke request to raft and apply it after committed.
  40. LeaseRevoke(ctx context.Context, r *pb.LeaseRevokeRequest) (*pb.LeaseRevokeResponse, error)
  41. // LeaseRenew renews the lease with given ID. The renewed TTL is returned. Or an error
  42. // is returned.
  43. LeaseRenew(id lease.LeaseID) (int64, error)
  44. }
  45. func (s *EtcdServer) Range(ctx context.Context, r *pb.RangeRequest) (*pb.RangeResponse, error) {
  46. result, err := s.processInternalRaftRequest(ctx, pb.InternalRaftRequest{Range: r})
  47. if err != nil {
  48. return nil, err
  49. }
  50. return result.resp.(*pb.RangeResponse), result.err
  51. }
  52. func (s *EtcdServer) Put(ctx context.Context, r *pb.PutRequest) (*pb.PutResponse, error) {
  53. result, err := s.processInternalRaftRequest(ctx, pb.InternalRaftRequest{Put: r})
  54. if err != nil {
  55. return nil, err
  56. }
  57. return result.resp.(*pb.PutResponse), result.err
  58. }
  59. func (s *EtcdServer) DeleteRange(ctx context.Context, r *pb.DeleteRangeRequest) (*pb.DeleteRangeResponse, error) {
  60. result, err := s.processInternalRaftRequest(ctx, pb.InternalRaftRequest{DeleteRange: r})
  61. if err != nil {
  62. return nil, err
  63. }
  64. return result.resp.(*pb.DeleteRangeResponse), result.err
  65. }
  66. func (s *EtcdServer) Txn(ctx context.Context, r *pb.TxnRequest) (*pb.TxnResponse, error) {
  67. result, err := s.processInternalRaftRequest(ctx, pb.InternalRaftRequest{Txn: r})
  68. if err != nil {
  69. return nil, err
  70. }
  71. return result.resp.(*pb.TxnResponse), result.err
  72. }
  73. func (s *EtcdServer) Compact(ctx context.Context, r *pb.CompactionRequest) (*pb.CompactionResponse, error) {
  74. result, err := s.processInternalRaftRequest(ctx, pb.InternalRaftRequest{Compaction: r})
  75. if err != nil {
  76. return nil, err
  77. }
  78. return result.resp.(*pb.CompactionResponse), result.err
  79. }
  80. func (s *EtcdServer) Hash(ctx context.Context, r *pb.HashRequest) (*pb.HashResponse, error) {
  81. h, err := s.be.Hash()
  82. if err != nil {
  83. return nil, err
  84. }
  85. return &pb.HashResponse{Hash: h}, nil
  86. }
  87. func (s *EtcdServer) LeaseCreate(ctx context.Context, r *pb.LeaseCreateRequest) (*pb.LeaseCreateResponse, error) {
  88. // no id given? choose one
  89. for r.ID == int64(lease.NoLease) {
  90. // only use positive int64 id's
  91. r.ID = int64(s.reqIDGen.Next() & ((1 << 63) - 1))
  92. }
  93. result, err := s.processInternalRaftRequest(ctx, pb.InternalRaftRequest{LeaseCreate: r})
  94. if err != nil {
  95. return nil, err
  96. }
  97. return result.resp.(*pb.LeaseCreateResponse), result.err
  98. }
  99. func (s *EtcdServer) LeaseRevoke(ctx context.Context, r *pb.LeaseRevokeRequest) (*pb.LeaseRevokeResponse, error) {
  100. result, err := s.processInternalRaftRequest(ctx, pb.InternalRaftRequest{LeaseRevoke: r})
  101. if err != nil {
  102. return nil, err
  103. }
  104. return result.resp.(*pb.LeaseRevokeResponse), result.err
  105. }
  106. func (s *EtcdServer) LeaseRenew(id lease.LeaseID) (int64, error) {
  107. ttl, err := s.lessor.Renew(id)
  108. if err == nil {
  109. return ttl, nil
  110. }
  111. if err != lease.ErrNotPrimary {
  112. return -1, err
  113. }
  114. // renewals don't go through raft; forward to leader manually
  115. leader := s.cluster.Member(s.Leader())
  116. for i := 0; i < 5 && leader == nil; i++ {
  117. // wait an election
  118. dur := time.Duration(s.cfg.ElectionTicks) * time.Duration(s.cfg.TickMs) * time.Millisecond
  119. select {
  120. case <-time.After(dur):
  121. leader = s.cluster.Member(s.Leader())
  122. case <-s.done:
  123. return -1, ErrStopped
  124. }
  125. }
  126. if leader == nil || len(leader.PeerURLs) == 0 {
  127. return -1, ErrNoLeader
  128. }
  129. for _, url := range leader.PeerURLs {
  130. lurl := url + "/leases"
  131. ttl, err = leasehttp.RenewHTTP(id, lurl, s.peerRt, s.cfg.peerDialTimeout())
  132. if err == nil {
  133. break
  134. }
  135. }
  136. return ttl, err
  137. }
  138. type applyResult struct {
  139. resp proto.Message
  140. err error
  141. }
  142. func (s *EtcdServer) processInternalRaftRequest(ctx context.Context, r pb.InternalRaftRequest) (*applyResult, error) {
  143. r.ID = s.reqIDGen.Next()
  144. data, err := r.Marshal()
  145. if err != nil {
  146. return nil, err
  147. }
  148. ch := s.w.Register(r.ID)
  149. s.r.Propose(ctx, data)
  150. select {
  151. case x := <-ch:
  152. return x.(*applyResult), nil
  153. case <-ctx.Done():
  154. s.w.Trigger(r.ID, nil) // GC wait
  155. return nil, ctx.Err()
  156. case <-s.done:
  157. return nil, ErrStopped
  158. }
  159. }
  160. // Watchable returns a watchable interface attached to the etcdserver.
  161. func (s *EtcdServer) Watchable() dstorage.Watchable {
  162. return s.getKV()
  163. }
  164. const (
  165. // noTxn is an invalid txn ID.
  166. // To apply with independent Range, Put, Delete, you can pass noTxn
  167. // to apply functions instead of a valid txn ID.
  168. noTxn = -1
  169. )
  170. func (s *EtcdServer) applyV3Request(r *pb.InternalRaftRequest) interface{} {
  171. kv := s.getKV()
  172. le := s.lessor
  173. ar := &applyResult{}
  174. switch {
  175. case r.Range != nil:
  176. ar.resp, ar.err = applyRange(noTxn, kv, r.Range)
  177. case r.Put != nil:
  178. ar.resp, ar.err = applyPut(noTxn, kv, le, r.Put)
  179. case r.DeleteRange != nil:
  180. ar.resp, ar.err = applyDeleteRange(noTxn, kv, r.DeleteRange)
  181. case r.Txn != nil:
  182. ar.resp, ar.err = applyTxn(kv, le, r.Txn)
  183. case r.Compaction != nil:
  184. ar.resp, ar.err = applyCompaction(kv, r.Compaction)
  185. case r.LeaseCreate != nil:
  186. ar.resp, ar.err = applyLeaseCreate(le, r.LeaseCreate)
  187. case r.LeaseRevoke != nil:
  188. ar.resp, ar.err = applyLeaseRevoke(le, r.LeaseRevoke)
  189. default:
  190. panic("not implemented")
  191. }
  192. return ar
  193. }
  194. func applyPut(txnID int64, kv dstorage.KV, le lease.Lessor, p *pb.PutRequest) (*pb.PutResponse, error) {
  195. resp := &pb.PutResponse{}
  196. resp.Header = &pb.ResponseHeader{}
  197. var (
  198. rev int64
  199. err error
  200. )
  201. if txnID != noTxn {
  202. rev, err = kv.TxnPut(txnID, p.Key, p.Value, lease.LeaseID(p.Lease))
  203. if err != nil {
  204. return nil, err
  205. }
  206. } else {
  207. leaseID := lease.LeaseID(p.Lease)
  208. if leaseID != lease.NoLease {
  209. if l := le.Lookup(leaseID); l == nil {
  210. return nil, lease.ErrLeaseNotFound
  211. }
  212. }
  213. rev = kv.Put(p.Key, p.Value, leaseID)
  214. }
  215. resp.Header.Revision = rev
  216. return resp, nil
  217. }
  218. type kvSort struct{ kvs []storagepb.KeyValue }
  219. func (s *kvSort) Swap(i, j int) {
  220. t := s.kvs[i]
  221. s.kvs[i] = s.kvs[j]
  222. s.kvs[j] = t
  223. }
  224. func (s *kvSort) Len() int { return len(s.kvs) }
  225. type kvSortByKey struct{ *kvSort }
  226. func (s *kvSortByKey) Less(i, j int) bool {
  227. return bytes.Compare(s.kvs[i].Key, s.kvs[j].Key) < 0
  228. }
  229. type kvSortByVersion struct{ *kvSort }
  230. func (s *kvSortByVersion) Less(i, j int) bool {
  231. return (s.kvs[i].Version - s.kvs[j].Version) < 0
  232. }
  233. type kvSortByCreate struct{ *kvSort }
  234. func (s *kvSortByCreate) Less(i, j int) bool {
  235. return (s.kvs[i].CreateRevision - s.kvs[j].CreateRevision) < 0
  236. }
  237. type kvSortByMod struct{ *kvSort }
  238. func (s *kvSortByMod) Less(i, j int) bool {
  239. return (s.kvs[i].ModRevision - s.kvs[j].ModRevision) < 0
  240. }
  241. type kvSortByValue struct{ *kvSort }
  242. func (s *kvSortByValue) Less(i, j int) bool {
  243. return bytes.Compare(s.kvs[i].Value, s.kvs[j].Value) < 0
  244. }
  245. func applyRange(txnID int64, kv dstorage.KV, r *pb.RangeRequest) (*pb.RangeResponse, error) {
  246. resp := &pb.RangeResponse{}
  247. resp.Header = &pb.ResponseHeader{}
  248. var (
  249. kvs []storagepb.KeyValue
  250. rev int64
  251. err error
  252. )
  253. limit := r.Limit
  254. if r.SortOrder != pb.RangeRequest_NONE {
  255. // fetch everything; sort and truncate afterwards
  256. limit = 0
  257. }
  258. if limit > 0 {
  259. // fetch one extra for 'more' flag
  260. limit = limit + 1
  261. }
  262. if txnID != noTxn {
  263. kvs, rev, err = kv.TxnRange(txnID, r.Key, r.RangeEnd, limit, r.Revision)
  264. if err != nil {
  265. return nil, err
  266. }
  267. } else {
  268. kvs, rev, err = kv.Range(r.Key, r.RangeEnd, limit, r.Revision)
  269. if err != nil {
  270. return nil, err
  271. }
  272. }
  273. if r.SortOrder != pb.RangeRequest_NONE {
  274. var sorter sort.Interface
  275. switch {
  276. case r.SortTarget == pb.RangeRequest_KEY:
  277. sorter = &kvSortByKey{&kvSort{kvs}}
  278. case r.SortTarget == pb.RangeRequest_VERSION:
  279. sorter = &kvSortByVersion{&kvSort{kvs}}
  280. case r.SortTarget == pb.RangeRequest_CREATE:
  281. sorter = &kvSortByCreate{&kvSort{kvs}}
  282. case r.SortTarget == pb.RangeRequest_MOD:
  283. sorter = &kvSortByMod{&kvSort{kvs}}
  284. case r.SortTarget == pb.RangeRequest_VALUE:
  285. sorter = &kvSortByValue{&kvSort{kvs}}
  286. }
  287. switch {
  288. case r.SortOrder == pb.RangeRequest_ASCEND:
  289. sort.Sort(sorter)
  290. case r.SortOrder == pb.RangeRequest_DESCEND:
  291. sort.Sort(sort.Reverse(sorter))
  292. }
  293. }
  294. if r.Limit > 0 && len(kvs) > int(r.Limit) {
  295. kvs = kvs[:r.Limit]
  296. resp.More = true
  297. }
  298. resp.Header.Revision = rev
  299. for i := range kvs {
  300. resp.Kvs = append(resp.Kvs, &kvs[i])
  301. }
  302. return resp, nil
  303. }
  304. func applyDeleteRange(txnID int64, kv dstorage.KV, dr *pb.DeleteRangeRequest) (*pb.DeleteRangeResponse, error) {
  305. resp := &pb.DeleteRangeResponse{}
  306. resp.Header = &pb.ResponseHeader{}
  307. var (
  308. rev int64
  309. err error
  310. )
  311. if txnID != noTxn {
  312. _, rev, err = kv.TxnDeleteRange(txnID, dr.Key, dr.RangeEnd)
  313. if err != nil {
  314. return nil, err
  315. }
  316. } else {
  317. _, rev = kv.DeleteRange(dr.Key, dr.RangeEnd)
  318. }
  319. resp.Header.Revision = rev
  320. return resp, nil
  321. }
  322. func checkRequestLeases(le lease.Lessor, reqs []*pb.RequestUnion) error {
  323. for _, requ := range reqs {
  324. tv, ok := requ.Request.(*pb.RequestUnion_RequestPut)
  325. if !ok {
  326. continue
  327. }
  328. preq := tv.RequestPut
  329. if preq == nil || lease.LeaseID(preq.Lease) == lease.NoLease {
  330. continue
  331. }
  332. if l := le.Lookup(lease.LeaseID(preq.Lease)); l == nil {
  333. return lease.ErrLeaseNotFound
  334. }
  335. }
  336. return nil
  337. }
  338. func checkRequestRange(kv dstorage.KV, reqs []*pb.RequestUnion) error {
  339. for _, requ := range reqs {
  340. tv, ok := requ.Request.(*pb.RequestUnion_RequestRange)
  341. if !ok {
  342. continue
  343. }
  344. greq := tv.RequestRange
  345. if greq == nil || greq.Revision == 0 {
  346. continue
  347. }
  348. if greq.Revision > kv.Rev() {
  349. return dstorage.ErrFutureRev
  350. }
  351. if greq.Revision < kv.FirstRev() {
  352. return dstorage.ErrCompacted
  353. }
  354. }
  355. return nil
  356. }
  357. func applyTxn(kv dstorage.KV, le lease.Lessor, rt *pb.TxnRequest) (*pb.TxnResponse, error) {
  358. var revision int64
  359. ok := true
  360. for _, c := range rt.Compare {
  361. if revision, ok = applyCompare(kv, c); !ok {
  362. break
  363. }
  364. }
  365. var reqs []*pb.RequestUnion
  366. if ok {
  367. reqs = rt.Success
  368. } else {
  369. reqs = rt.Failure
  370. }
  371. if err := checkRequestLeases(le, reqs); err != nil {
  372. return nil, err
  373. }
  374. if err := checkRequestRange(kv, reqs); err != nil {
  375. return nil, err
  376. }
  377. // When executing the operations of txn, we need to hold the txn lock.
  378. // So the reader will not see any intermediate results.
  379. txnID := kv.TxnBegin()
  380. defer func() {
  381. err := kv.TxnEnd(txnID)
  382. if err != nil {
  383. panic(fmt.Sprint("unexpected error when closing txn", txnID))
  384. }
  385. }()
  386. resps := make([]*pb.ResponseUnion, len(reqs))
  387. for i := range reqs {
  388. resps[i] = applyUnion(txnID, kv, reqs[i])
  389. }
  390. if len(resps) != 0 {
  391. revision += 1
  392. }
  393. txnResp := &pb.TxnResponse{}
  394. txnResp.Header = &pb.ResponseHeader{}
  395. txnResp.Header.Revision = revision
  396. txnResp.Responses = resps
  397. txnResp.Succeeded = ok
  398. return txnResp, nil
  399. }
  400. func applyCompaction(kv dstorage.KV, compaction *pb.CompactionRequest) (*pb.CompactionResponse, error) {
  401. resp := &pb.CompactionResponse{}
  402. resp.Header = &pb.ResponseHeader{}
  403. err := kv.Compact(compaction.Revision)
  404. if err != nil {
  405. return nil, err
  406. }
  407. // get the current revision. which key to get is not important.
  408. _, resp.Header.Revision, _ = kv.Range([]byte("compaction"), nil, 1, 0)
  409. return resp, err
  410. }
  411. func applyUnion(txnID int64, kv dstorage.KV, union *pb.RequestUnion) *pb.ResponseUnion {
  412. switch tv := union.Request.(type) {
  413. case *pb.RequestUnion_RequestRange:
  414. if tv.RequestRange != nil {
  415. resp, err := applyRange(txnID, kv, tv.RequestRange)
  416. if err != nil {
  417. panic("unexpected error during txn")
  418. }
  419. return &pb.ResponseUnion{Response: &pb.ResponseUnion_ResponseRange{ResponseRange: resp}}
  420. }
  421. case *pb.RequestUnion_RequestPut:
  422. if tv.RequestPut != nil {
  423. resp, err := applyPut(txnID, kv, nil, tv.RequestPut)
  424. if err != nil {
  425. panic("unexpected error during txn")
  426. }
  427. return &pb.ResponseUnion{Response: &pb.ResponseUnion_ResponsePut{ResponsePut: resp}}
  428. }
  429. case *pb.RequestUnion_RequestDeleteRange:
  430. if tv.RequestDeleteRange != nil {
  431. resp, err := applyDeleteRange(txnID, kv, tv.RequestDeleteRange)
  432. if err != nil {
  433. panic("unexpected error during txn")
  434. }
  435. return &pb.ResponseUnion{Response: &pb.ResponseUnion_ResponseDeleteRange{ResponseDeleteRange: resp}}
  436. }
  437. default:
  438. // empty union
  439. return nil
  440. }
  441. return nil
  442. }
  443. // applyCompare applies the compare request.
  444. // It returns the revision at which the comparison happens. If the comparison
  445. // succeeds, the it returns true. Otherwise it returns false.
  446. func applyCompare(kv dstorage.KV, c *pb.Compare) (int64, bool) {
  447. ckvs, rev, err := kv.Range(c.Key, nil, 1, 0)
  448. if err != nil {
  449. if err == dstorage.ErrTxnIDMismatch {
  450. panic("unexpected txn ID mismatch error")
  451. }
  452. return rev, false
  453. }
  454. var ckv storagepb.KeyValue
  455. if len(ckvs) != 0 {
  456. ckv = ckvs[0]
  457. } else {
  458. // Use the zero value of ckv normally. However...
  459. if c.Target == pb.Compare_VALUE {
  460. // Always fail if we're comparing a value on a key that doesn't exist.
  461. // We can treat non-existence as the empty set explicitly, such that
  462. // even a key with a value of length 0 bytes is still a real key
  463. // that was written that way
  464. return rev, false
  465. }
  466. }
  467. // -1 is less, 0 is equal, 1 is greater
  468. var result int
  469. switch c.Target {
  470. case pb.Compare_VALUE:
  471. tv, _ := c.TargetUnion.(*pb.Compare_Value)
  472. if tv != nil {
  473. result = bytes.Compare(ckv.Value, tv.Value)
  474. }
  475. case pb.Compare_CREATE:
  476. tv, _ := c.TargetUnion.(*pb.Compare_CreateRevision)
  477. if tv != nil {
  478. result = compareInt64(ckv.CreateRevision, tv.CreateRevision)
  479. }
  480. case pb.Compare_MOD:
  481. tv, _ := c.TargetUnion.(*pb.Compare_ModRevision)
  482. if tv != nil {
  483. result = compareInt64(ckv.ModRevision, tv.ModRevision)
  484. }
  485. case pb.Compare_VERSION:
  486. tv, _ := c.TargetUnion.(*pb.Compare_Version)
  487. if tv != nil {
  488. result = compareInt64(ckv.Version, tv.Version)
  489. }
  490. }
  491. switch c.Result {
  492. case pb.Compare_EQUAL:
  493. if result != 0 {
  494. return rev, false
  495. }
  496. case pb.Compare_GREATER:
  497. if result != 1 {
  498. return rev, false
  499. }
  500. case pb.Compare_LESS:
  501. if result != -1 {
  502. return rev, false
  503. }
  504. }
  505. return rev, true
  506. }
  507. func applyLeaseCreate(le lease.Lessor, lc *pb.LeaseCreateRequest) (*pb.LeaseCreateResponse, error) {
  508. l, err := le.Grant(lease.LeaseID(lc.ID), lc.TTL)
  509. resp := &pb.LeaseCreateResponse{}
  510. if err == nil {
  511. resp.ID = int64(l.ID)
  512. resp.TTL = l.TTL
  513. }
  514. return resp, err
  515. }
  516. func applyLeaseRevoke(le lease.Lessor, lc *pb.LeaseRevokeRequest) (*pb.LeaseRevokeResponse, error) {
  517. err := le.Revoke(lease.LeaseID(lc.ID))
  518. return &pb.LeaseRevokeResponse{}, err
  519. }
  520. func compareInt64(a, b int64) int {
  521. switch {
  522. case a < b:
  523. return -1
  524. case a > b:
  525. return 1
  526. default:
  527. return 0
  528. }
  529. }