v3demo_server.go 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597
  1. // Copyright 2015 CoreOS, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package etcdserver
  15. import (
  16. "bytes"
  17. "fmt"
  18. "sort"
  19. "time"
  20. "github.com/coreos/etcd/Godeps/_workspace/src/github.com/gogo/protobuf/proto"
  21. "github.com/coreos/etcd/Godeps/_workspace/src/golang.org/x/net/context"
  22. pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
  23. "github.com/coreos/etcd/lease"
  24. "github.com/coreos/etcd/lease/leasehttp"
  25. dstorage "github.com/coreos/etcd/storage"
  26. "github.com/coreos/etcd/storage/storagepb"
  27. )
  28. type RaftKV interface {
  29. Range(ctx context.Context, r *pb.RangeRequest) (*pb.RangeResponse, error)
  30. Put(ctx context.Context, r *pb.PutRequest) (*pb.PutResponse, error)
  31. DeleteRange(ctx context.Context, r *pb.DeleteRangeRequest) (*pb.DeleteRangeResponse, error)
  32. Txn(ctx context.Context, r *pb.TxnRequest) (*pb.TxnResponse, error)
  33. Compact(ctx context.Context, r *pb.CompactionRequest) (*pb.CompactionResponse, error)
  34. }
  35. type Lessor interface {
  36. // LeaseCreate sends LeaseCreate request to raft and apply it after committed.
  37. LeaseCreate(ctx context.Context, r *pb.LeaseCreateRequest) (*pb.LeaseCreateResponse, error)
  38. // LeaseRevoke sends LeaseRevoke request to raft and apply it after committed.
  39. LeaseRevoke(ctx context.Context, r *pb.LeaseRevokeRequest) (*pb.LeaseRevokeResponse, error)
  40. // LeaseRenew renews the lease with given ID. The renewed TTL is returned. Or an error
  41. // is returned.
  42. LeaseRenew(id lease.LeaseID) (int64, error)
  43. }
  44. func (s *EtcdServer) Range(ctx context.Context, r *pb.RangeRequest) (*pb.RangeResponse, error) {
  45. result, err := s.processInternalRaftRequest(ctx, pb.InternalRaftRequest{Range: r})
  46. if err != nil {
  47. return nil, err
  48. }
  49. return result.resp.(*pb.RangeResponse), result.err
  50. }
  51. func (s *EtcdServer) Put(ctx context.Context, r *pb.PutRequest) (*pb.PutResponse, error) {
  52. result, err := s.processInternalRaftRequest(ctx, pb.InternalRaftRequest{Put: r})
  53. if err != nil {
  54. return nil, err
  55. }
  56. return result.resp.(*pb.PutResponse), result.err
  57. }
  58. func (s *EtcdServer) DeleteRange(ctx context.Context, r *pb.DeleteRangeRequest) (*pb.DeleteRangeResponse, error) {
  59. result, err := s.processInternalRaftRequest(ctx, pb.InternalRaftRequest{DeleteRange: r})
  60. if err != nil {
  61. return nil, err
  62. }
  63. return result.resp.(*pb.DeleteRangeResponse), result.err
  64. }
  65. func (s *EtcdServer) Txn(ctx context.Context, r *pb.TxnRequest) (*pb.TxnResponse, error) {
  66. result, err := s.processInternalRaftRequest(ctx, pb.InternalRaftRequest{Txn: r})
  67. if err != nil {
  68. return nil, err
  69. }
  70. return result.resp.(*pb.TxnResponse), result.err
  71. }
  72. func (s *EtcdServer) Compact(ctx context.Context, r *pb.CompactionRequest) (*pb.CompactionResponse, error) {
  73. result, err := s.processInternalRaftRequest(ctx, pb.InternalRaftRequest{Compaction: r})
  74. if err != nil {
  75. return nil, err
  76. }
  77. return result.resp.(*pb.CompactionResponse), result.err
  78. }
  79. func (s *EtcdServer) LeaseCreate(ctx context.Context, r *pb.LeaseCreateRequest) (*pb.LeaseCreateResponse, error) {
  80. // no id given? choose one
  81. for r.ID == int64(lease.NoLease) {
  82. // only use positive int64 id's
  83. r.ID = int64(s.reqIDGen.Next() & ((1 << 63) - 1))
  84. }
  85. result, err := s.processInternalRaftRequest(ctx, pb.InternalRaftRequest{LeaseCreate: r})
  86. if err != nil {
  87. return nil, err
  88. }
  89. resp := result.resp.(*pb.LeaseCreateResponse)
  90. if result.err != nil {
  91. resp.Error = result.err.Error()
  92. }
  93. return resp, nil
  94. }
  95. func (s *EtcdServer) LeaseRevoke(ctx context.Context, r *pb.LeaseRevokeRequest) (*pb.LeaseRevokeResponse, error) {
  96. result, err := s.processInternalRaftRequest(ctx, pb.InternalRaftRequest{LeaseRevoke: r})
  97. if err != nil {
  98. return nil, err
  99. }
  100. return result.resp.(*pb.LeaseRevokeResponse), result.err
  101. }
  102. func (s *EtcdServer) LeaseRenew(id lease.LeaseID) (int64, error) {
  103. ttl, err := s.lessor.Renew(id)
  104. if err == nil {
  105. return ttl, nil
  106. }
  107. if err != lease.ErrNotPrimary {
  108. return -1, err
  109. }
  110. // renewals don't go through raft; forward to leader manually
  111. leader := s.cluster.Member(s.Leader())
  112. for i := 0; i < 5 && leader == nil; i++ {
  113. // wait an election
  114. dur := time.Duration(s.cfg.ElectionTicks) * time.Duration(s.cfg.TickMs) * time.Millisecond
  115. select {
  116. case <-time.After(dur):
  117. leader = s.cluster.Member(s.Leader())
  118. case <-s.done:
  119. return -1, ErrStopped
  120. }
  121. }
  122. if leader == nil || len(leader.PeerURLs) == 0 {
  123. return -1, ErrNoLeader
  124. }
  125. for _, url := range leader.PeerURLs {
  126. lurl := url + "/leases"
  127. ttl, err = leasehttp.RenewHTTP(id, lurl, s.peerRt, s.cfg.peerDialTimeout())
  128. if err == nil {
  129. break
  130. }
  131. }
  132. return ttl, err
  133. }
  134. type applyResult struct {
  135. resp proto.Message
  136. err error
  137. }
  138. func (s *EtcdServer) processInternalRaftRequest(ctx context.Context, r pb.InternalRaftRequest) (*applyResult, error) {
  139. r.ID = s.reqIDGen.Next()
  140. data, err := r.Marshal()
  141. if err != nil {
  142. return nil, err
  143. }
  144. ch := s.w.Register(r.ID)
  145. s.r.Propose(ctx, data)
  146. select {
  147. case x := <-ch:
  148. return x.(*applyResult), nil
  149. case <-ctx.Done():
  150. s.w.Trigger(r.ID, nil) // GC wait
  151. return nil, ctx.Err()
  152. case <-s.done:
  153. return nil, ErrStopped
  154. }
  155. }
  156. // Watchable returns a watchable interface attached to the etcdserver.
  157. func (s *EtcdServer) Watchable() dstorage.Watchable {
  158. return s.getKV()
  159. }
  160. const (
  161. // noTxn is an invalid txn ID.
  162. // To apply with independent Range, Put, Delete, you can pass noTxn
  163. // to apply functions instead of a valid txn ID.
  164. noTxn = -1
  165. )
  166. func (s *EtcdServer) applyV3Request(r *pb.InternalRaftRequest) interface{} {
  167. kv := s.getKV()
  168. le := s.lessor
  169. ar := &applyResult{}
  170. switch {
  171. case r.Range != nil:
  172. ar.resp, ar.err = applyRange(noTxn, kv, r.Range)
  173. case r.Put != nil:
  174. ar.resp, ar.err = applyPut(noTxn, kv, le, r.Put)
  175. case r.DeleteRange != nil:
  176. ar.resp, ar.err = applyDeleteRange(noTxn, kv, r.DeleteRange)
  177. case r.Txn != nil:
  178. ar.resp, ar.err = applyTxn(kv, le, r.Txn)
  179. case r.Compaction != nil:
  180. ar.resp, ar.err = applyCompaction(kv, r.Compaction)
  181. case r.LeaseCreate != nil:
  182. ar.resp, ar.err = applyLeaseCreate(le, r.LeaseCreate)
  183. case r.LeaseRevoke != nil:
  184. ar.resp, ar.err = applyLeaseRevoke(le, r.LeaseRevoke)
  185. default:
  186. panic("not implemented")
  187. }
  188. return ar
  189. }
  190. func applyPut(txnID int64, kv dstorage.KV, le lease.Lessor, p *pb.PutRequest) (*pb.PutResponse, error) {
  191. resp := &pb.PutResponse{}
  192. resp.Header = &pb.ResponseHeader{}
  193. var (
  194. rev int64
  195. err error
  196. )
  197. if txnID != noTxn {
  198. rev, err = kv.TxnPut(txnID, p.Key, p.Value, lease.LeaseID(p.Lease))
  199. if err != nil {
  200. return nil, err
  201. }
  202. } else {
  203. leaseID := lease.LeaseID(p.Lease)
  204. if leaseID != lease.NoLease {
  205. if l := le.Lookup(leaseID); l == nil {
  206. return nil, lease.ErrLeaseNotFound
  207. }
  208. }
  209. rev = kv.Put(p.Key, p.Value, leaseID)
  210. }
  211. resp.Header.Revision = rev
  212. return resp, nil
  213. }
  214. type kvSort struct{ kvs []storagepb.KeyValue }
  215. func (s *kvSort) Swap(i, j int) {
  216. t := s.kvs[i]
  217. s.kvs[i] = s.kvs[j]
  218. s.kvs[j] = t
  219. }
  220. func (s *kvSort) Len() int { return len(s.kvs) }
  221. type kvSortByKey struct{ *kvSort }
  222. func (s *kvSortByKey) Less(i, j int) bool {
  223. return bytes.Compare(s.kvs[i].Key, s.kvs[j].Key) < 0
  224. }
  225. type kvSortByVersion struct{ *kvSort }
  226. func (s *kvSortByVersion) Less(i, j int) bool {
  227. return (s.kvs[i].Version - s.kvs[j].Version) < 0
  228. }
  229. type kvSortByCreate struct{ *kvSort }
  230. func (s *kvSortByCreate) Less(i, j int) bool {
  231. return (s.kvs[i].CreateRevision - s.kvs[j].CreateRevision) < 0
  232. }
  233. type kvSortByMod struct{ *kvSort }
  234. func (s *kvSortByMod) Less(i, j int) bool {
  235. return (s.kvs[i].ModRevision - s.kvs[j].ModRevision) < 0
  236. }
  237. type kvSortByValue struct{ *kvSort }
  238. func (s *kvSortByValue) Less(i, j int) bool {
  239. return bytes.Compare(s.kvs[i].Value, s.kvs[j].Value) < 0
  240. }
  241. func applyRange(txnID int64, kv dstorage.KV, r *pb.RangeRequest) (*pb.RangeResponse, error) {
  242. resp := &pb.RangeResponse{}
  243. resp.Header = &pb.ResponseHeader{}
  244. var (
  245. kvs []storagepb.KeyValue
  246. rev int64
  247. err error
  248. )
  249. limit := r.Limit
  250. if r.SortOrder != pb.RangeRequest_NONE {
  251. // fetch everything; sort and truncate afterwards
  252. limit = 0
  253. }
  254. if limit > 0 {
  255. // fetch one extra for 'more' flag
  256. limit = limit + 1
  257. }
  258. if txnID != noTxn {
  259. kvs, rev, err = kv.TxnRange(txnID, r.Key, r.RangeEnd, limit, r.Revision)
  260. if err != nil {
  261. return nil, err
  262. }
  263. } else {
  264. kvs, rev, err = kv.Range(r.Key, r.RangeEnd, limit, r.Revision)
  265. if err != nil {
  266. return nil, err
  267. }
  268. }
  269. if r.SortOrder != pb.RangeRequest_NONE {
  270. var sorter sort.Interface
  271. switch {
  272. case r.SortTarget == pb.RangeRequest_KEY:
  273. sorter = &kvSortByKey{&kvSort{kvs}}
  274. case r.SortTarget == pb.RangeRequest_VERSION:
  275. sorter = &kvSortByVersion{&kvSort{kvs}}
  276. case r.SortTarget == pb.RangeRequest_CREATE:
  277. sorter = &kvSortByCreate{&kvSort{kvs}}
  278. case r.SortTarget == pb.RangeRequest_MOD:
  279. sorter = &kvSortByMod{&kvSort{kvs}}
  280. case r.SortTarget == pb.RangeRequest_VALUE:
  281. sorter = &kvSortByValue{&kvSort{kvs}}
  282. }
  283. switch {
  284. case r.SortOrder == pb.RangeRequest_ASCEND:
  285. sort.Sort(sorter)
  286. case r.SortOrder == pb.RangeRequest_DESCEND:
  287. sort.Sort(sort.Reverse(sorter))
  288. }
  289. }
  290. if r.Limit > 0 && len(kvs) > int(r.Limit) {
  291. kvs = kvs[:r.Limit]
  292. resp.More = true
  293. }
  294. resp.Header.Revision = rev
  295. for i := range kvs {
  296. resp.Kvs = append(resp.Kvs, &kvs[i])
  297. }
  298. return resp, nil
  299. }
  300. func applyDeleteRange(txnID int64, kv dstorage.KV, dr *pb.DeleteRangeRequest) (*pb.DeleteRangeResponse, error) {
  301. resp := &pb.DeleteRangeResponse{}
  302. resp.Header = &pb.ResponseHeader{}
  303. var (
  304. rev int64
  305. err error
  306. )
  307. if txnID != noTxn {
  308. _, rev, err = kv.TxnDeleteRange(txnID, dr.Key, dr.RangeEnd)
  309. if err != nil {
  310. return nil, err
  311. }
  312. } else {
  313. _, rev = kv.DeleteRange(dr.Key, dr.RangeEnd)
  314. }
  315. resp.Header.Revision = rev
  316. return resp, nil
  317. }
  318. func checkRequestLeases(le lease.Lessor, reqs []*pb.RequestUnion) error {
  319. for _, requ := range reqs {
  320. tv, ok := requ.Request.(*pb.RequestUnion_RequestPut)
  321. if !ok {
  322. continue
  323. }
  324. preq := tv.RequestPut
  325. if preq == nil || lease.LeaseID(preq.Lease) == lease.NoLease {
  326. continue
  327. }
  328. if l := le.Lookup(lease.LeaseID(preq.Lease)); l == nil {
  329. return lease.ErrLeaseNotFound
  330. }
  331. }
  332. return nil
  333. }
  334. func checkRequestRange(kv dstorage.KV, reqs []*pb.RequestUnion) error {
  335. for _, requ := range reqs {
  336. tv, ok := requ.Request.(*pb.RequestUnion_RequestRange)
  337. if !ok {
  338. continue
  339. }
  340. greq := tv.RequestRange
  341. if greq == nil || greq.Revision == 0 {
  342. continue
  343. }
  344. if greq.Revision > kv.Rev() {
  345. return dstorage.ErrFutureRev
  346. }
  347. if greq.Revision < kv.FirstRev() {
  348. return dstorage.ErrCompacted
  349. }
  350. }
  351. return nil
  352. }
  353. func applyTxn(kv dstorage.KV, le lease.Lessor, rt *pb.TxnRequest) (*pb.TxnResponse, error) {
  354. var revision int64
  355. ok := true
  356. for _, c := range rt.Compare {
  357. if revision, ok = applyCompare(kv, c); !ok {
  358. break
  359. }
  360. }
  361. var reqs []*pb.RequestUnion
  362. if ok {
  363. reqs = rt.Success
  364. } else {
  365. reqs = rt.Failure
  366. }
  367. if err := checkRequestLeases(le, reqs); err != nil {
  368. return nil, err
  369. }
  370. if err := checkRequestRange(kv, reqs); err != nil {
  371. return nil, err
  372. }
  373. // When executing the operations of txn, we need to hold the txn lock.
  374. // So the reader will not see any intermediate results.
  375. txnID := kv.TxnBegin()
  376. defer func() {
  377. err := kv.TxnEnd(txnID)
  378. if err != nil {
  379. panic(fmt.Sprint("unexpected error when closing txn", txnID))
  380. }
  381. }()
  382. resps := make([]*pb.ResponseUnion, len(reqs))
  383. for i := range reqs {
  384. resps[i] = applyUnion(txnID, kv, reqs[i])
  385. }
  386. if len(resps) != 0 {
  387. revision += 1
  388. }
  389. txnResp := &pb.TxnResponse{}
  390. txnResp.Header = &pb.ResponseHeader{}
  391. txnResp.Header.Revision = revision
  392. txnResp.Responses = resps
  393. txnResp.Succeeded = ok
  394. return txnResp, nil
  395. }
  396. func applyCompaction(kv dstorage.KV, compaction *pb.CompactionRequest) (*pb.CompactionResponse, error) {
  397. resp := &pb.CompactionResponse{}
  398. resp.Header = &pb.ResponseHeader{}
  399. err := kv.Compact(compaction.Revision)
  400. if err != nil {
  401. return nil, err
  402. }
  403. // get the current revision. which key to get is not important.
  404. _, resp.Header.Revision, _ = kv.Range([]byte("compaction"), nil, 1, 0)
  405. return resp, err
  406. }
  407. func applyUnion(txnID int64, kv dstorage.KV, union *pb.RequestUnion) *pb.ResponseUnion {
  408. switch tv := union.Request.(type) {
  409. case *pb.RequestUnion_RequestRange:
  410. if tv.RequestRange != nil {
  411. resp, err := applyRange(txnID, kv, tv.RequestRange)
  412. if err != nil {
  413. panic("unexpected error during txn")
  414. }
  415. return &pb.ResponseUnion{Response: &pb.ResponseUnion_ResponseRange{ResponseRange: resp}}
  416. }
  417. case *pb.RequestUnion_RequestPut:
  418. if tv.RequestPut != nil {
  419. resp, err := applyPut(txnID, kv, nil, tv.RequestPut)
  420. if err != nil {
  421. panic("unexpected error during txn")
  422. }
  423. return &pb.ResponseUnion{Response: &pb.ResponseUnion_ResponsePut{ResponsePut: resp}}
  424. }
  425. case *pb.RequestUnion_RequestDeleteRange:
  426. if tv.RequestDeleteRange != nil {
  427. resp, err := applyDeleteRange(txnID, kv, tv.RequestDeleteRange)
  428. if err != nil {
  429. panic("unexpected error during txn")
  430. }
  431. return &pb.ResponseUnion{Response: &pb.ResponseUnion_ResponseDeleteRange{ResponseDeleteRange: resp}}
  432. }
  433. default:
  434. // empty union
  435. return nil
  436. }
  437. return nil
  438. }
  439. // applyCompare applies the compare request.
  440. // It returns the revision at which the comparison happens. If the comparison
  441. // succeeds, the it returns true. Otherwise it returns false.
  442. func applyCompare(kv dstorage.KV, c *pb.Compare) (int64, bool) {
  443. ckvs, rev, err := kv.Range(c.Key, nil, 1, 0)
  444. if err != nil {
  445. if err == dstorage.ErrTxnIDMismatch {
  446. panic("unexpected txn ID mismatch error")
  447. }
  448. return rev, false
  449. }
  450. var ckv storagepb.KeyValue
  451. if len(ckvs) != 0 {
  452. ckv = ckvs[0]
  453. } else {
  454. // Use the zero value of ckv normally. However...
  455. if c.Target == pb.Compare_VALUE {
  456. // Always fail if we're comparing a value on a key that doesn't exist.
  457. // We can treat non-existence as the empty set explicitly, such that
  458. // even a key with a value of length 0 bytes is still a real key
  459. // that was written that way
  460. return rev, false
  461. }
  462. }
  463. // -1 is less, 0 is equal, 1 is greater
  464. var result int
  465. switch c.Target {
  466. case pb.Compare_VALUE:
  467. tv, _ := c.TargetUnion.(*pb.Compare_Value)
  468. if tv != nil {
  469. result = bytes.Compare(ckv.Value, tv.Value)
  470. }
  471. case pb.Compare_CREATE:
  472. tv, _ := c.TargetUnion.(*pb.Compare_CreateRevision)
  473. if tv != nil {
  474. result = compareInt64(ckv.CreateRevision, tv.CreateRevision)
  475. }
  476. case pb.Compare_MOD:
  477. tv, _ := c.TargetUnion.(*pb.Compare_ModRevision)
  478. if tv != nil {
  479. result = compareInt64(ckv.ModRevision, tv.ModRevision)
  480. }
  481. case pb.Compare_VERSION:
  482. tv, _ := c.TargetUnion.(*pb.Compare_Version)
  483. if tv != nil {
  484. result = compareInt64(ckv.Version, tv.Version)
  485. }
  486. }
  487. switch c.Result {
  488. case pb.Compare_EQUAL:
  489. if result != 0 {
  490. return rev, false
  491. }
  492. case pb.Compare_GREATER:
  493. if result != 1 {
  494. return rev, false
  495. }
  496. case pb.Compare_LESS:
  497. if result != -1 {
  498. return rev, false
  499. }
  500. }
  501. return rev, true
  502. }
  503. func applyLeaseCreate(le lease.Lessor, lc *pb.LeaseCreateRequest) (*pb.LeaseCreateResponse, error) {
  504. l, err := le.Grant(lease.LeaseID(lc.ID), lc.TTL)
  505. resp := &pb.LeaseCreateResponse{}
  506. if err == nil {
  507. resp.ID = int64(l.ID)
  508. resp.TTL = l.TTL
  509. }
  510. return resp, err
  511. }
  512. func applyLeaseRevoke(le lease.Lessor, lc *pb.LeaseRevokeRequest) (*pb.LeaseRevokeResponse, error) {
  513. err := le.Revoke(lease.LeaseID(lc.ID))
  514. return &pb.LeaseRevokeResponse{}, err
  515. }
  516. func compareInt64(a, b int64) int {
  517. switch {
  518. case a < b:
  519. return -1
  520. case a > b:
  521. return 1
  522. default:
  523. return 0
  524. }
  525. }