| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593 |
- // Copyright 2015 CoreOS, Inc.
- //
- // Licensed under the Apache License, Version 2.0 (the "License");
- // you may not use this file except in compliance with the License.
- // You may obtain a copy of the License at
- //
- // http://www.apache.org/licenses/LICENSE-2.0
- //
- // Unless required by applicable law or agreed to in writing, software
- // distributed under the License is distributed on an "AS IS" BASIS,
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- // See the License for the specific language governing permissions and
- // limitations under the License.
- package etcdserver
- import (
- "bytes"
- "fmt"
- "sort"
- "time"
- "github.com/coreos/etcd/Godeps/_workspace/src/github.com/gogo/protobuf/proto"
- "github.com/coreos/etcd/Godeps/_workspace/src/golang.org/x/net/context"
- pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
- "github.com/coreos/etcd/lease"
- "github.com/coreos/etcd/lease/leasehttp"
- dstorage "github.com/coreos/etcd/storage"
- "github.com/coreos/etcd/storage/storagepb"
- )
- type RaftKV interface {
- Range(ctx context.Context, r *pb.RangeRequest) (*pb.RangeResponse, error)
- Put(ctx context.Context, r *pb.PutRequest) (*pb.PutResponse, error)
- DeleteRange(ctx context.Context, r *pb.DeleteRangeRequest) (*pb.DeleteRangeResponse, error)
- Txn(ctx context.Context, r *pb.TxnRequest) (*pb.TxnResponse, error)
- Compact(ctx context.Context, r *pb.CompactionRequest) (*pb.CompactionResponse, error)
- }
- type Lessor interface {
- // LeaseCreate sends LeaseCreate request to raft and apply it after committed.
- LeaseCreate(ctx context.Context, r *pb.LeaseCreateRequest) (*pb.LeaseCreateResponse, error)
- // LeaseRevoke sends LeaseRevoke request to raft and apply it after committed.
- LeaseRevoke(ctx context.Context, r *pb.LeaseRevokeRequest) (*pb.LeaseRevokeResponse, error)
- // LeaseRenew renews the lease with given ID. The renewed TTL is returned. Or an error
- // is returned.
- LeaseRenew(id lease.LeaseID) (int64, error)
- }
- func (s *EtcdServer) Range(ctx context.Context, r *pb.RangeRequest) (*pb.RangeResponse, error) {
- result, err := s.processInternalRaftRequest(ctx, pb.InternalRaftRequest{Range: r})
- if err != nil {
- return nil, err
- }
- return result.resp.(*pb.RangeResponse), result.err
- }
- func (s *EtcdServer) Put(ctx context.Context, r *pb.PutRequest) (*pb.PutResponse, error) {
- result, err := s.processInternalRaftRequest(ctx, pb.InternalRaftRequest{Put: r})
- if err != nil {
- return nil, err
- }
- return result.resp.(*pb.PutResponse), result.err
- }
- func (s *EtcdServer) DeleteRange(ctx context.Context, r *pb.DeleteRangeRequest) (*pb.DeleteRangeResponse, error) {
- result, err := s.processInternalRaftRequest(ctx, pb.InternalRaftRequest{DeleteRange: r})
- if err != nil {
- return nil, err
- }
- return result.resp.(*pb.DeleteRangeResponse), result.err
- }
- func (s *EtcdServer) Txn(ctx context.Context, r *pb.TxnRequest) (*pb.TxnResponse, error) {
- result, err := s.processInternalRaftRequest(ctx, pb.InternalRaftRequest{Txn: r})
- if err != nil {
- return nil, err
- }
- return result.resp.(*pb.TxnResponse), result.err
- }
- func (s *EtcdServer) Compact(ctx context.Context, r *pb.CompactionRequest) (*pb.CompactionResponse, error) {
- result, err := s.processInternalRaftRequest(ctx, pb.InternalRaftRequest{Compaction: r})
- if err != nil {
- return nil, err
- }
- return result.resp.(*pb.CompactionResponse), result.err
- }
- func (s *EtcdServer) LeaseCreate(ctx context.Context, r *pb.LeaseCreateRequest) (*pb.LeaseCreateResponse, error) {
- // no id given? choose one
- for r.ID == int64(lease.NoLease) {
- // only use positive int64 id's
- r.ID = int64(s.reqIDGen.Next() & ((1 << 63) - 1))
- }
- result, err := s.processInternalRaftRequest(ctx, pb.InternalRaftRequest{LeaseCreate: r})
- if err != nil {
- return nil, err
- }
- return result.resp.(*pb.LeaseCreateResponse), result.err
- }
- func (s *EtcdServer) LeaseRevoke(ctx context.Context, r *pb.LeaseRevokeRequest) (*pb.LeaseRevokeResponse, error) {
- result, err := s.processInternalRaftRequest(ctx, pb.InternalRaftRequest{LeaseRevoke: r})
- if err != nil {
- return nil, err
- }
- return result.resp.(*pb.LeaseRevokeResponse), result.err
- }
- func (s *EtcdServer) LeaseRenew(id lease.LeaseID) (int64, error) {
- ttl, err := s.lessor.Renew(id)
- if err == nil {
- return ttl, nil
- }
- if err != lease.ErrNotPrimary {
- return -1, err
- }
- // renewals don't go through raft; forward to leader manually
- leader := s.cluster.Member(s.Leader())
- for i := 0; i < 5 && leader == nil; i++ {
- // wait an election
- dur := time.Duration(s.cfg.ElectionTicks) * time.Duration(s.cfg.TickMs) * time.Millisecond
- select {
- case <-time.After(dur):
- leader = s.cluster.Member(s.Leader())
- case <-s.done:
- return -1, ErrStopped
- }
- }
- if leader == nil || len(leader.PeerURLs) == 0 {
- return -1, ErrNoLeader
- }
- for _, url := range leader.PeerURLs {
- lurl := url + "/leases"
- ttl, err = leasehttp.RenewHTTP(id, lurl, s.peerRt, s.cfg.peerDialTimeout())
- if err == nil {
- break
- }
- }
- return ttl, err
- }
- type applyResult struct {
- resp proto.Message
- err error
- }
- func (s *EtcdServer) processInternalRaftRequest(ctx context.Context, r pb.InternalRaftRequest) (*applyResult, error) {
- r.ID = s.reqIDGen.Next()
- data, err := r.Marshal()
- if err != nil {
- return nil, err
- }
- ch := s.w.Register(r.ID)
- s.r.Propose(ctx, data)
- select {
- case x := <-ch:
- return x.(*applyResult), nil
- case <-ctx.Done():
- s.w.Trigger(r.ID, nil) // GC wait
- return nil, ctx.Err()
- case <-s.done:
- return nil, ErrStopped
- }
- }
- // Watchable returns a watchable interface attached to the etcdserver.
- func (s *EtcdServer) Watchable() dstorage.Watchable {
- return s.getKV()
- }
- const (
- // noTxn is an invalid txn ID.
- // To apply with independent Range, Put, Delete, you can pass noTxn
- // to apply functions instead of a valid txn ID.
- noTxn = -1
- )
- func (s *EtcdServer) applyV3Request(r *pb.InternalRaftRequest) interface{} {
- kv := s.getKV()
- le := s.lessor
- ar := &applyResult{}
- switch {
- case r.Range != nil:
- ar.resp, ar.err = applyRange(noTxn, kv, r.Range)
- case r.Put != nil:
- ar.resp, ar.err = applyPut(noTxn, kv, le, r.Put)
- case r.DeleteRange != nil:
- ar.resp, ar.err = applyDeleteRange(noTxn, kv, r.DeleteRange)
- case r.Txn != nil:
- ar.resp, ar.err = applyTxn(kv, le, r.Txn)
- case r.Compaction != nil:
- ar.resp, ar.err = applyCompaction(kv, r.Compaction)
- case r.LeaseCreate != nil:
- ar.resp, ar.err = applyLeaseCreate(le, r.LeaseCreate)
- case r.LeaseRevoke != nil:
- ar.resp, ar.err = applyLeaseRevoke(le, r.LeaseRevoke)
- default:
- panic("not implemented")
- }
- return ar
- }
- func applyPut(txnID int64, kv dstorage.KV, le lease.Lessor, p *pb.PutRequest) (*pb.PutResponse, error) {
- resp := &pb.PutResponse{}
- resp.Header = &pb.ResponseHeader{}
- var (
- rev int64
- err error
- )
- if txnID != noTxn {
- rev, err = kv.TxnPut(txnID, p.Key, p.Value, lease.LeaseID(p.Lease))
- if err != nil {
- return nil, err
- }
- } else {
- leaseID := lease.LeaseID(p.Lease)
- if leaseID != lease.NoLease {
- if l := le.Lookup(leaseID); l == nil {
- return nil, lease.ErrLeaseNotFound
- }
- }
- rev = kv.Put(p.Key, p.Value, leaseID)
- }
- resp.Header.Revision = rev
- return resp, nil
- }
- type kvSort struct{ kvs []storagepb.KeyValue }
- func (s *kvSort) Swap(i, j int) {
- t := s.kvs[i]
- s.kvs[i] = s.kvs[j]
- s.kvs[j] = t
- }
- func (s *kvSort) Len() int { return len(s.kvs) }
- type kvSortByKey struct{ *kvSort }
- func (s *kvSortByKey) Less(i, j int) bool {
- return bytes.Compare(s.kvs[i].Key, s.kvs[j].Key) < 0
- }
- type kvSortByVersion struct{ *kvSort }
- func (s *kvSortByVersion) Less(i, j int) bool {
- return (s.kvs[i].Version - s.kvs[j].Version) < 0
- }
- type kvSortByCreate struct{ *kvSort }
- func (s *kvSortByCreate) Less(i, j int) bool {
- return (s.kvs[i].CreateRevision - s.kvs[j].CreateRevision) < 0
- }
- type kvSortByMod struct{ *kvSort }
- func (s *kvSortByMod) Less(i, j int) bool {
- return (s.kvs[i].ModRevision - s.kvs[j].ModRevision) < 0
- }
- type kvSortByValue struct{ *kvSort }
- func (s *kvSortByValue) Less(i, j int) bool {
- return bytes.Compare(s.kvs[i].Value, s.kvs[j].Value) < 0
- }
- func applyRange(txnID int64, kv dstorage.KV, r *pb.RangeRequest) (*pb.RangeResponse, error) {
- resp := &pb.RangeResponse{}
- resp.Header = &pb.ResponseHeader{}
- var (
- kvs []storagepb.KeyValue
- rev int64
- err error
- )
- limit := r.Limit
- if r.SortOrder != pb.RangeRequest_NONE {
- // fetch everything; sort and truncate afterwards
- limit = 0
- }
- if limit > 0 {
- // fetch one extra for 'more' flag
- limit = limit + 1
- }
- if txnID != noTxn {
- kvs, rev, err = kv.TxnRange(txnID, r.Key, r.RangeEnd, limit, r.Revision)
- if err != nil {
- return nil, err
- }
- } else {
- kvs, rev, err = kv.Range(r.Key, r.RangeEnd, limit, r.Revision)
- if err != nil {
- return nil, err
- }
- }
- if r.SortOrder != pb.RangeRequest_NONE {
- var sorter sort.Interface
- switch {
- case r.SortTarget == pb.RangeRequest_KEY:
- sorter = &kvSortByKey{&kvSort{kvs}}
- case r.SortTarget == pb.RangeRequest_VERSION:
- sorter = &kvSortByVersion{&kvSort{kvs}}
- case r.SortTarget == pb.RangeRequest_CREATE:
- sorter = &kvSortByCreate{&kvSort{kvs}}
- case r.SortTarget == pb.RangeRequest_MOD:
- sorter = &kvSortByMod{&kvSort{kvs}}
- case r.SortTarget == pb.RangeRequest_VALUE:
- sorter = &kvSortByValue{&kvSort{kvs}}
- }
- switch {
- case r.SortOrder == pb.RangeRequest_ASCEND:
- sort.Sort(sorter)
- case r.SortOrder == pb.RangeRequest_DESCEND:
- sort.Sort(sort.Reverse(sorter))
- }
- }
- if r.Limit > 0 && len(kvs) > int(r.Limit) {
- kvs = kvs[:r.Limit]
- resp.More = true
- }
- resp.Header.Revision = rev
- for i := range kvs {
- resp.Kvs = append(resp.Kvs, &kvs[i])
- }
- return resp, nil
- }
- func applyDeleteRange(txnID int64, kv dstorage.KV, dr *pb.DeleteRangeRequest) (*pb.DeleteRangeResponse, error) {
- resp := &pb.DeleteRangeResponse{}
- resp.Header = &pb.ResponseHeader{}
- var (
- rev int64
- err error
- )
- if txnID != noTxn {
- _, rev, err = kv.TxnDeleteRange(txnID, dr.Key, dr.RangeEnd)
- if err != nil {
- return nil, err
- }
- } else {
- _, rev = kv.DeleteRange(dr.Key, dr.RangeEnd)
- }
- resp.Header.Revision = rev
- return resp, nil
- }
- func checkRequestLeases(le lease.Lessor, reqs []*pb.RequestUnion) error {
- for _, requ := range reqs {
- tv, ok := requ.Request.(*pb.RequestUnion_RequestPut)
- if !ok {
- continue
- }
- preq := tv.RequestPut
- if preq == nil || lease.LeaseID(preq.Lease) == lease.NoLease {
- continue
- }
- if l := le.Lookup(lease.LeaseID(preq.Lease)); l == nil {
- return lease.ErrLeaseNotFound
- }
- }
- return nil
- }
- func checkRequestRange(kv dstorage.KV, reqs []*pb.RequestUnion) error {
- for _, requ := range reqs {
- tv, ok := requ.Request.(*pb.RequestUnion_RequestRange)
- if !ok {
- continue
- }
- greq := tv.RequestRange
- if greq == nil || greq.Revision == 0 {
- continue
- }
- if greq.Revision > kv.Rev() {
- return dstorage.ErrFutureRev
- }
- if greq.Revision < kv.FirstRev() {
- return dstorage.ErrCompacted
- }
- }
- return nil
- }
- func applyTxn(kv dstorage.KV, le lease.Lessor, rt *pb.TxnRequest) (*pb.TxnResponse, error) {
- var revision int64
- ok := true
- for _, c := range rt.Compare {
- if revision, ok = applyCompare(kv, c); !ok {
- break
- }
- }
- var reqs []*pb.RequestUnion
- if ok {
- reqs = rt.Success
- } else {
- reqs = rt.Failure
- }
- if err := checkRequestLeases(le, reqs); err != nil {
- return nil, err
- }
- if err := checkRequestRange(kv, reqs); err != nil {
- return nil, err
- }
- // When executing the operations of txn, we need to hold the txn lock.
- // So the reader will not see any intermediate results.
- txnID := kv.TxnBegin()
- defer func() {
- err := kv.TxnEnd(txnID)
- if err != nil {
- panic(fmt.Sprint("unexpected error when closing txn", txnID))
- }
- }()
- resps := make([]*pb.ResponseUnion, len(reqs))
- for i := range reqs {
- resps[i] = applyUnion(txnID, kv, reqs[i])
- }
- if len(resps) != 0 {
- revision += 1
- }
- txnResp := &pb.TxnResponse{}
- txnResp.Header = &pb.ResponseHeader{}
- txnResp.Header.Revision = revision
- txnResp.Responses = resps
- txnResp.Succeeded = ok
- return txnResp, nil
- }
- func applyCompaction(kv dstorage.KV, compaction *pb.CompactionRequest) (*pb.CompactionResponse, error) {
- resp := &pb.CompactionResponse{}
- resp.Header = &pb.ResponseHeader{}
- err := kv.Compact(compaction.Revision)
- if err != nil {
- return nil, err
- }
- // get the current revision. which key to get is not important.
- _, resp.Header.Revision, _ = kv.Range([]byte("compaction"), nil, 1, 0)
- return resp, err
- }
- func applyUnion(txnID int64, kv dstorage.KV, union *pb.RequestUnion) *pb.ResponseUnion {
- switch tv := union.Request.(type) {
- case *pb.RequestUnion_RequestRange:
- if tv.RequestRange != nil {
- resp, err := applyRange(txnID, kv, tv.RequestRange)
- if err != nil {
- panic("unexpected error during txn")
- }
- return &pb.ResponseUnion{Response: &pb.ResponseUnion_ResponseRange{ResponseRange: resp}}
- }
- case *pb.RequestUnion_RequestPut:
- if tv.RequestPut != nil {
- resp, err := applyPut(txnID, kv, nil, tv.RequestPut)
- if err != nil {
- panic("unexpected error during txn")
- }
- return &pb.ResponseUnion{Response: &pb.ResponseUnion_ResponsePut{ResponsePut: resp}}
- }
- case *pb.RequestUnion_RequestDeleteRange:
- if tv.RequestDeleteRange != nil {
- resp, err := applyDeleteRange(txnID, kv, tv.RequestDeleteRange)
- if err != nil {
- panic("unexpected error during txn")
- }
- return &pb.ResponseUnion{Response: &pb.ResponseUnion_ResponseDeleteRange{ResponseDeleteRange: resp}}
- }
- default:
- // empty union
- return nil
- }
- return nil
- }
- // applyCompare applies the compare request.
- // It returns the revision at which the comparison happens. If the comparison
- // succeeds, the it returns true. Otherwise it returns false.
- func applyCompare(kv dstorage.KV, c *pb.Compare) (int64, bool) {
- ckvs, rev, err := kv.Range(c.Key, nil, 1, 0)
- if err != nil {
- if err == dstorage.ErrTxnIDMismatch {
- panic("unexpected txn ID mismatch error")
- }
- return rev, false
- }
- var ckv storagepb.KeyValue
- if len(ckvs) != 0 {
- ckv = ckvs[0]
- } else {
- // Use the zero value of ckv normally. However...
- if c.Target == pb.Compare_VALUE {
- // Always fail if we're comparing a value on a key that doesn't exist.
- // We can treat non-existence as the empty set explicitly, such that
- // even a key with a value of length 0 bytes is still a real key
- // that was written that way
- return rev, false
- }
- }
- // -1 is less, 0 is equal, 1 is greater
- var result int
- switch c.Target {
- case pb.Compare_VALUE:
- tv, _ := c.TargetUnion.(*pb.Compare_Value)
- if tv != nil {
- result = bytes.Compare(ckv.Value, tv.Value)
- }
- case pb.Compare_CREATE:
- tv, _ := c.TargetUnion.(*pb.Compare_CreateRevision)
- if tv != nil {
- result = compareInt64(ckv.CreateRevision, tv.CreateRevision)
- }
- case pb.Compare_MOD:
- tv, _ := c.TargetUnion.(*pb.Compare_ModRevision)
- if tv != nil {
- result = compareInt64(ckv.ModRevision, tv.ModRevision)
- }
- case pb.Compare_VERSION:
- tv, _ := c.TargetUnion.(*pb.Compare_Version)
- if tv != nil {
- result = compareInt64(ckv.Version, tv.Version)
- }
- }
- switch c.Result {
- case pb.Compare_EQUAL:
- if result != 0 {
- return rev, false
- }
- case pb.Compare_GREATER:
- if result != 1 {
- return rev, false
- }
- case pb.Compare_LESS:
- if result != -1 {
- return rev, false
- }
- }
- return rev, true
- }
- func applyLeaseCreate(le lease.Lessor, lc *pb.LeaseCreateRequest) (*pb.LeaseCreateResponse, error) {
- l, err := le.Grant(lease.LeaseID(lc.ID), lc.TTL)
- resp := &pb.LeaseCreateResponse{}
- if err == nil {
- resp.ID = int64(l.ID)
- resp.TTL = l.TTL
- }
- return resp, err
- }
- func applyLeaseRevoke(le lease.Lessor, lc *pb.LeaseRevokeRequest) (*pb.LeaseRevokeResponse, error) {
- err := le.Revoke(lease.LeaseID(lc.ID))
- return &pb.LeaseRevokeResponse{}, err
- }
- func compareInt64(a, b int64) int {
- switch {
- case a < b:
- return -1
- case a > b:
- return 1
- default:
- return 0
- }
- }
|