123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788 |
- // Copyright 2015 The etcd Authors
- //
- // Licensed under the Apache License, Version 2.0 (the "License");
- // you may not use this file except in compliance with the License.
- // You may obtain a copy of the License at
- //
- // http://www.apache.org/licenses/LICENSE-2.0
- //
- // Unless required by applicable law or agreed to in writing, software
- // distributed under the License is distributed on an "AS IS" BASIS,
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- // See the License for the specific language governing permissions and
- // limitations under the License.
- package store
- import (
- "encoding/json"
- "fmt"
- "path"
- "strconv"
- "strings"
- "sync"
- "time"
- etcdErr "github.com/coreos/etcd/error"
- "github.com/coreos/etcd/pkg/types"
- "github.com/jonboulle/clockwork"
- )
- // The default version to set when the store is first initialized.
- const defaultVersion = 2
- var minExpireTime time.Time
- func init() {
- minExpireTime, _ = time.Parse(time.RFC3339, "2000-01-01T00:00:00Z")
- }
- type Store interface {
- Version() int
- Index() uint64
- Get(nodePath string, recursive, sorted bool) (*Event, error)
- Set(nodePath string, dir bool, value string, expireOpts TTLOptionSet) (*Event, error)
- Update(nodePath string, newValue string, expireOpts TTLOptionSet) (*Event, error)
- Create(nodePath string, dir bool, value string, unique bool,
- expireOpts TTLOptionSet) (*Event, error)
- CompareAndSwap(nodePath string, prevValue string, prevIndex uint64,
- value string, expireOpts TTLOptionSet) (*Event, error)
- Delete(nodePath string, dir, recursive bool) (*Event, error)
- CompareAndDelete(nodePath string, prevValue string, prevIndex uint64) (*Event, error)
- Watch(prefix string, recursive, stream bool, sinceIndex uint64) (Watcher, error)
- Save() ([]byte, error)
- Recovery(state []byte) error
- Clone() Store
- SaveNoCopy() ([]byte, error)
- JsonStats() []byte
- DeleteExpiredKeys(cutoff time.Time)
- HasTTLKeys() bool
- }
- type TTLOptionSet struct {
- ExpireTime time.Time
- Refresh bool
- }
- type store struct {
- Root *node
- WatcherHub *watcherHub
- CurrentIndex uint64
- Stats *Stats
- CurrentVersion int
- ttlKeyHeap *ttlKeyHeap // need to recovery manually
- worldLock sync.RWMutex // stop the world lock
- clock clockwork.Clock
- readonlySet types.Set
- }
- // New creates a store where the given namespaces will be created as initial directories.
- func New(namespaces ...string) Store {
- s := newStore(namespaces...)
- s.clock = clockwork.NewRealClock()
- return s
- }
- func newStore(namespaces ...string) *store {
- s := new(store)
- s.CurrentVersion = defaultVersion
- s.Root = newDir(s, "/", s.CurrentIndex, nil, Permanent)
- for _, namespace := range namespaces {
- s.Root.Add(newDir(s, namespace, s.CurrentIndex, s.Root, Permanent))
- }
- s.Stats = newStats()
- s.WatcherHub = newWatchHub(1000)
- s.ttlKeyHeap = newTtlKeyHeap()
- s.readonlySet = types.NewUnsafeSet(append(namespaces, "/")...)
- return s
- }
- // Version retrieves current version of the store.
- func (s *store) Version() int {
- return s.CurrentVersion
- }
- // Index retrieves the current index of the store.
- func (s *store) Index() uint64 {
- s.worldLock.RLock()
- defer s.worldLock.RUnlock()
- return s.CurrentIndex
- }
- // Get returns a get event.
- // If recursive is true, it will return all the content under the node path.
- // If sorted is true, it will sort the content by keys.
- func (s *store) Get(nodePath string, recursive, sorted bool) (*Event, error) {
- var err *etcdErr.Error
- s.worldLock.RLock()
- defer s.worldLock.RUnlock()
- defer func() {
- if err == nil {
- s.Stats.Inc(GetSuccess)
- if recursive {
- reportReadSuccess(GetRecursive)
- } else {
- reportReadSuccess(Get)
- }
- return
- }
- s.Stats.Inc(GetFail)
- if recursive {
- reportReadFailure(GetRecursive)
- } else {
- reportReadFailure(Get)
- }
- }()
- n, err := s.internalGet(nodePath)
- if err != nil {
- return nil, err
- }
- e := newEvent(Get, nodePath, n.ModifiedIndex, n.CreatedIndex)
- e.EtcdIndex = s.CurrentIndex
- e.Node.loadInternalNode(n, recursive, sorted, s.clock)
- return e, nil
- }
- // Create creates the node at nodePath. Create will help to create intermediate directories with no ttl.
- // If the node has already existed, create will fail.
- // If any node on the path is a file, create will fail.
- func (s *store) Create(nodePath string, dir bool, value string, unique bool, expireOpts TTLOptionSet) (*Event, error) {
- var err *etcdErr.Error
- s.worldLock.Lock()
- defer s.worldLock.Unlock()
- defer func() {
- if err == nil {
- s.Stats.Inc(CreateSuccess)
- reportWriteSuccess(Create)
- return
- }
- s.Stats.Inc(CreateFail)
- reportWriteFailure(Create)
- }()
- e, err := s.internalCreate(nodePath, dir, value, unique, false, expireOpts.ExpireTime, Create)
- if err != nil {
- return nil, err
- }
- e.EtcdIndex = s.CurrentIndex
- s.WatcherHub.notify(e)
- return e, nil
- }
- // Set creates or replace the node at nodePath.
- func (s *store) Set(nodePath string, dir bool, value string, expireOpts TTLOptionSet) (*Event, error) {
- var err *etcdErr.Error
- s.worldLock.Lock()
- defer s.worldLock.Unlock()
- defer func() {
- if err == nil {
- s.Stats.Inc(SetSuccess)
- reportWriteSuccess(Set)
- return
- }
- s.Stats.Inc(SetFail)
- reportWriteFailure(Set)
- }()
- // Get prevNode value
- n, getErr := s.internalGet(nodePath)
- if getErr != nil && getErr.ErrorCode != etcdErr.EcodeKeyNotFound {
- err = getErr
- return nil, err
- }
- if expireOpts.Refresh {
- if getErr != nil {
- err = getErr
- return nil, err
- } else {
- value = n.Value
- }
- }
- // Set new value
- e, err := s.internalCreate(nodePath, dir, value, false, true, expireOpts.ExpireTime, Set)
- if err != nil {
- return nil, err
- }
- e.EtcdIndex = s.CurrentIndex
- // Put prevNode into event
- if getErr == nil {
- prev := newEvent(Get, nodePath, n.ModifiedIndex, n.CreatedIndex)
- prev.Node.loadInternalNode(n, false, false, s.clock)
- e.PrevNode = prev.Node
- }
- if !expireOpts.Refresh {
- s.WatcherHub.notify(e)
- } else {
- e.SetRefresh()
- s.WatcherHub.add(e)
- }
- return e, nil
- }
- // returns user-readable cause of failed comparison
- func getCompareFailCause(n *node, which int, prevValue string, prevIndex uint64) string {
- switch which {
- case CompareIndexNotMatch:
- return fmt.Sprintf("[%v != %v]", prevIndex, n.ModifiedIndex)
- case CompareValueNotMatch:
- return fmt.Sprintf("[%v != %v]", prevValue, n.Value)
- default:
- return fmt.Sprintf("[%v != %v] [%v != %v]", prevValue, n.Value, prevIndex, n.ModifiedIndex)
- }
- }
- func (s *store) CompareAndSwap(nodePath string, prevValue string, prevIndex uint64,
- value string, expireOpts TTLOptionSet) (*Event, error) {
- var err *etcdErr.Error
- s.worldLock.Lock()
- defer s.worldLock.Unlock()
- defer func() {
- if err == nil {
- s.Stats.Inc(CompareAndSwapSuccess)
- reportWriteSuccess(CompareAndSwap)
- return
- }
- s.Stats.Inc(CompareAndSwapFail)
- reportWriteFailure(CompareAndSwap)
- }()
- nodePath = path.Clean(path.Join("/", nodePath))
- // we do not allow the user to change "/"
- if s.readonlySet.Contains(nodePath) {
- return nil, etcdErr.NewError(etcdErr.EcodeRootROnly, "/", s.CurrentIndex)
- }
- n, err := s.internalGet(nodePath)
- if err != nil {
- return nil, err
- }
- if n.IsDir() { // can only compare and swap file
- err = etcdErr.NewError(etcdErr.EcodeNotFile, nodePath, s.CurrentIndex)
- return nil, err
- }
- // If both of the prevValue and prevIndex are given, we will test both of them.
- // Command will be executed, only if both of the tests are successful.
- if ok, which := n.Compare(prevValue, prevIndex); !ok {
- cause := getCompareFailCause(n, which, prevValue, prevIndex)
- err = etcdErr.NewError(etcdErr.EcodeTestFailed, cause, s.CurrentIndex)
- return nil, err
- }
- if expireOpts.Refresh {
- value = n.Value
- }
- // update etcd index
- s.CurrentIndex++
- e := newEvent(CompareAndSwap, nodePath, s.CurrentIndex, n.CreatedIndex)
- e.EtcdIndex = s.CurrentIndex
- e.PrevNode = n.Repr(false, false, s.clock)
- eNode := e.Node
- // if test succeed, write the value
- n.Write(value, s.CurrentIndex)
- n.UpdateTTL(expireOpts.ExpireTime)
- // copy the value for safety
- valueCopy := value
- eNode.Value = &valueCopy
- eNode.Expiration, eNode.TTL = n.expirationAndTTL(s.clock)
- if !expireOpts.Refresh {
- s.WatcherHub.notify(e)
- } else {
- e.SetRefresh()
- s.WatcherHub.add(e)
- }
- return e, nil
- }
- // Delete deletes the node at the given path.
- // If the node is a directory, recursive must be true to delete it.
- func (s *store) Delete(nodePath string, dir, recursive bool) (*Event, error) {
- var err *etcdErr.Error
- s.worldLock.Lock()
- defer s.worldLock.Unlock()
- defer func() {
- if err == nil {
- s.Stats.Inc(DeleteSuccess)
- reportWriteSuccess(Delete)
- return
- }
- s.Stats.Inc(DeleteFail)
- reportWriteFailure(Delete)
- }()
- nodePath = path.Clean(path.Join("/", nodePath))
- // we do not allow the user to change "/"
- if s.readonlySet.Contains(nodePath) {
- return nil, etcdErr.NewError(etcdErr.EcodeRootROnly, "/", s.CurrentIndex)
- }
- // recursive implies dir
- if recursive {
- dir = true
- }
- n, err := s.internalGet(nodePath)
- if err != nil { // if the node does not exist, return error
- return nil, err
- }
- nextIndex := s.CurrentIndex + 1
- e := newEvent(Delete, nodePath, nextIndex, n.CreatedIndex)
- e.EtcdIndex = nextIndex
- e.PrevNode = n.Repr(false, false, s.clock)
- eNode := e.Node
- if n.IsDir() {
- eNode.Dir = true
- }
- callback := func(path string) { // notify function
- // notify the watchers with deleted set true
- s.WatcherHub.notifyWatchers(e, path, true)
- }
- err = n.Remove(dir, recursive, callback)
- if err != nil {
- return nil, err
- }
- // update etcd index
- s.CurrentIndex++
- s.WatcherHub.notify(e)
- return e, nil
- }
- func (s *store) CompareAndDelete(nodePath string, prevValue string, prevIndex uint64) (*Event, error) {
- var err *etcdErr.Error
- s.worldLock.Lock()
- defer s.worldLock.Unlock()
- defer func() {
- if err == nil {
- s.Stats.Inc(CompareAndDeleteSuccess)
- reportWriteSuccess(CompareAndDelete)
- return
- }
- s.Stats.Inc(CompareAndDeleteFail)
- reportWriteFailure(CompareAndDelete)
- }()
- nodePath = path.Clean(path.Join("/", nodePath))
- n, err := s.internalGet(nodePath)
- if err != nil { // if the node does not exist, return error
- return nil, err
- }
- if n.IsDir() { // can only compare and delete file
- return nil, etcdErr.NewError(etcdErr.EcodeNotFile, nodePath, s.CurrentIndex)
- }
- // If both of the prevValue and prevIndex are given, we will test both of them.
- // Command will be executed, only if both of the tests are successful.
- if ok, which := n.Compare(prevValue, prevIndex); !ok {
- cause := getCompareFailCause(n, which, prevValue, prevIndex)
- return nil, etcdErr.NewError(etcdErr.EcodeTestFailed, cause, s.CurrentIndex)
- }
- // update etcd index
- s.CurrentIndex++
- e := newEvent(CompareAndDelete, nodePath, s.CurrentIndex, n.CreatedIndex)
- e.EtcdIndex = s.CurrentIndex
- e.PrevNode = n.Repr(false, false, s.clock)
- callback := func(path string) { // notify function
- // notify the watchers with deleted set true
- s.WatcherHub.notifyWatchers(e, path, true)
- }
- err = n.Remove(false, false, callback)
- if err != nil {
- return nil, err
- }
- s.WatcherHub.notify(e)
- return e, nil
- }
- func (s *store) Watch(key string, recursive, stream bool, sinceIndex uint64) (Watcher, error) {
- s.worldLock.RLock()
- defer s.worldLock.RUnlock()
- key = path.Clean(path.Join("/", key))
- if sinceIndex == 0 {
- sinceIndex = s.CurrentIndex + 1
- }
- // WatcherHub does not know about the current index, so we need to pass it in
- w, err := s.WatcherHub.watch(key, recursive, stream, sinceIndex, s.CurrentIndex)
- if err != nil {
- return nil, err
- }
- return w, nil
- }
- // walk walks all the nodePath and apply the walkFunc on each directory
- func (s *store) walk(nodePath string, walkFunc func(prev *node, component string) (*node, *etcdErr.Error)) (*node, *etcdErr.Error) {
- components := strings.Split(nodePath, "/")
- curr := s.Root
- var err *etcdErr.Error
- for i := 1; i < len(components); i++ {
- if len(components[i]) == 0 { // ignore empty string
- return curr, nil
- }
- curr, err = walkFunc(curr, components[i])
- if err != nil {
- return nil, err
- }
- }
- return curr, nil
- }
- // Update updates the value/ttl of the node.
- // If the node is a file, the value and the ttl can be updated.
- // If the node is a directory, only the ttl can be updated.
- func (s *store) Update(nodePath string, newValue string, expireOpts TTLOptionSet) (*Event, error) {
- var err *etcdErr.Error
- s.worldLock.Lock()
- defer s.worldLock.Unlock()
- defer func() {
- if err == nil {
- s.Stats.Inc(UpdateSuccess)
- reportWriteSuccess(Update)
- return
- }
- s.Stats.Inc(UpdateFail)
- reportWriteFailure(Update)
- }()
- nodePath = path.Clean(path.Join("/", nodePath))
- // we do not allow the user to change "/"
- if s.readonlySet.Contains(nodePath) {
- return nil, etcdErr.NewError(etcdErr.EcodeRootROnly, "/", s.CurrentIndex)
- }
- currIndex, nextIndex := s.CurrentIndex, s.CurrentIndex+1
- n, err := s.internalGet(nodePath)
- if err != nil { // if the node does not exist, return error
- return nil, err
- }
- if n.IsDir() && len(newValue) != 0 {
- // if the node is a directory, we cannot update value to non-empty
- return nil, etcdErr.NewError(etcdErr.EcodeNotFile, nodePath, currIndex)
- }
- if expireOpts.Refresh {
- newValue = n.Value
- }
- e := newEvent(Update, nodePath, nextIndex, n.CreatedIndex)
- e.EtcdIndex = nextIndex
- e.PrevNode = n.Repr(false, false, s.clock)
- eNode := e.Node
- n.Write(newValue, nextIndex)
- if n.IsDir() {
- eNode.Dir = true
- } else {
- // copy the value for safety
- newValueCopy := newValue
- eNode.Value = &newValueCopy
- }
- // update ttl
- n.UpdateTTL(expireOpts.ExpireTime)
- eNode.Expiration, eNode.TTL = n.expirationAndTTL(s.clock)
- if !expireOpts.Refresh {
- s.WatcherHub.notify(e)
- } else {
- e.SetRefresh()
- s.WatcherHub.add(e)
- }
- s.CurrentIndex = nextIndex
- return e, nil
- }
- func (s *store) internalCreate(nodePath string, dir bool, value string, unique, replace bool,
- expireTime time.Time, action string) (*Event, *etcdErr.Error) {
- currIndex, nextIndex := s.CurrentIndex, s.CurrentIndex+1
- if unique { // append unique item under the node path
- nodePath += "/" + fmt.Sprintf("%020s", strconv.FormatUint(nextIndex, 10))
- }
- nodePath = path.Clean(path.Join("/", nodePath))
- // we do not allow the user to change "/"
- if s.readonlySet.Contains(nodePath) {
- return nil, etcdErr.NewError(etcdErr.EcodeRootROnly, "/", currIndex)
- }
- // Assume expire times that are way in the past are
- // This can occur when the time is serialized to JS
- if expireTime.Before(minExpireTime) {
- expireTime = Permanent
- }
- dirName, nodeName := path.Split(nodePath)
- // walk through the nodePath, create dirs and get the last directory node
- d, err := s.walk(dirName, s.checkDir)
- if err != nil {
- s.Stats.Inc(SetFail)
- reportWriteFailure(action)
- err.Index = currIndex
- return nil, err
- }
- e := newEvent(action, nodePath, nextIndex, nextIndex)
- eNode := e.Node
- n, _ := d.GetChild(nodeName)
- // force will try to replace an existing file
- if n != nil {
- if replace {
- if n.IsDir() {
- return nil, etcdErr.NewError(etcdErr.EcodeNotFile, nodePath, currIndex)
- }
- e.PrevNode = n.Repr(false, false, s.clock)
- n.Remove(false, false, nil)
- } else {
- return nil, etcdErr.NewError(etcdErr.EcodeNodeExist, nodePath, currIndex)
- }
- }
- if !dir { // create file
- // copy the value for safety
- valueCopy := value
- eNode.Value = &valueCopy
- n = newKV(s, nodePath, value, nextIndex, d, expireTime)
- } else { // create directory
- eNode.Dir = true
- n = newDir(s, nodePath, nextIndex, d, expireTime)
- }
- // we are sure d is a directory and does not have the children with name n.Name
- d.Add(n)
- // node with TTL
- if !n.IsPermanent() {
- s.ttlKeyHeap.push(n)
- eNode.Expiration, eNode.TTL = n.expirationAndTTL(s.clock)
- }
- s.CurrentIndex = nextIndex
- return e, nil
- }
- // InternalGet gets the node of the given nodePath.
- func (s *store) internalGet(nodePath string) (*node, *etcdErr.Error) {
- nodePath = path.Clean(path.Join("/", nodePath))
- walkFunc := func(parent *node, name string) (*node, *etcdErr.Error) {
- if !parent.IsDir() {
- err := etcdErr.NewError(etcdErr.EcodeNotDir, parent.Path, s.CurrentIndex)
- return nil, err
- }
- child, ok := parent.Children[name]
- if ok {
- return child, nil
- }
- return nil, etcdErr.NewError(etcdErr.EcodeKeyNotFound, path.Join(parent.Path, name), s.CurrentIndex)
- }
- f, err := s.walk(nodePath, walkFunc)
- if err != nil {
- return nil, err
- }
- return f, nil
- }
- // DeleteExpiredKeys will delete all expired keys
- func (s *store) DeleteExpiredKeys(cutoff time.Time) {
- s.worldLock.Lock()
- defer s.worldLock.Unlock()
- for {
- node := s.ttlKeyHeap.top()
- if node == nil || node.ExpireTime.After(cutoff) {
- break
- }
- s.CurrentIndex++
- e := newEvent(Expire, node.Path, s.CurrentIndex, node.CreatedIndex)
- e.EtcdIndex = s.CurrentIndex
- e.PrevNode = node.Repr(false, false, s.clock)
- callback := func(path string) { // notify function
- // notify the watchers with deleted set true
- s.WatcherHub.notifyWatchers(e, path, true)
- }
- s.ttlKeyHeap.pop()
- node.Remove(true, true, callback)
- reportExpiredKey()
- s.Stats.Inc(ExpireCount)
- s.WatcherHub.notify(e)
- }
- }
- // checkDir will check whether the component is a directory under parent node.
- // If it is a directory, this function will return the pointer to that node.
- // If it does not exist, this function will create a new directory and return the pointer to that node.
- // If it is a file, this function will return error.
- func (s *store) checkDir(parent *node, dirName string) (*node, *etcdErr.Error) {
- node, ok := parent.Children[dirName]
- if ok {
- if node.IsDir() {
- return node, nil
- }
- return nil, etcdErr.NewError(etcdErr.EcodeNotDir, node.Path, s.CurrentIndex)
- }
- n := newDir(s, path.Join(parent.Path, dirName), s.CurrentIndex+1, parent, Permanent)
- parent.Children[dirName] = n
- return n, nil
- }
- // Save saves the static state of the store system.
- // It will not be able to save the state of watchers.
- // It will not save the parent field of the node. Or there will
- // be cyclic dependencies issue for the json package.
- func (s *store) Save() ([]byte, error) {
- b, err := json.Marshal(s.Clone())
- if err != nil {
- return nil, err
- }
- return b, nil
- }
- func (s *store) SaveNoCopy() ([]byte, error) {
- b, err := json.Marshal(s)
- if err != nil {
- return nil, err
- }
- return b, nil
- }
- func (s *store) Clone() Store {
- s.worldLock.Lock()
- clonedStore := newStore()
- clonedStore.CurrentIndex = s.CurrentIndex
- clonedStore.Root = s.Root.Clone()
- clonedStore.WatcherHub = s.WatcherHub.clone()
- clonedStore.Stats = s.Stats.clone()
- clonedStore.CurrentVersion = s.CurrentVersion
- s.worldLock.Unlock()
- return clonedStore
- }
- // Recovery recovers the store system from a static state
- // It needs to recover the parent field of the nodes.
- // It needs to delete the expired nodes since the saved time and also
- // needs to create monitoring go routines.
- func (s *store) Recovery(state []byte) error {
- s.worldLock.Lock()
- defer s.worldLock.Unlock()
- err := json.Unmarshal(state, s)
- if err != nil {
- return err
- }
- s.ttlKeyHeap = newTtlKeyHeap()
- s.Root.recoverAndclean()
- return nil
- }
- func (s *store) JsonStats() []byte {
- s.Stats.Watchers = uint64(s.WatcherHub.count)
- return s.Stats.toJson()
- }
- func (s *store) HasTTLKeys() bool {
- s.worldLock.RLock()
- defer s.worldLock.RUnlock()
- return s.ttlKeyHeap.Len() != 0
- }
|