wal.go 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299
  1. /*
  2. Copyright 2014 CoreOS Inc.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package wal
  14. import (
  15. "errors"
  16. "fmt"
  17. "hash/crc32"
  18. "io"
  19. "log"
  20. "os"
  21. "path"
  22. "sort"
  23. "github.com/coreos/etcd/raft"
  24. "github.com/coreos/etcd/raft/raftpb"
  25. "github.com/coreos/etcd/wal/walpb"
  26. )
  27. const (
  28. infoType int64 = iota + 1
  29. entryType
  30. stateType
  31. crcType
  32. // the owner can make/remove files inside the directory
  33. privateDirMode = 0700
  34. )
  35. var (
  36. ErrIDMismatch = errors.New("wal: unmatch id")
  37. ErrFileNotFound = errors.New("wal: file not found")
  38. ErrIndexNotFound = errors.New("wal: index not found in file")
  39. ErrCRCMismatch = errors.New("wal: crc mismatch")
  40. crcTable = crc32.MakeTable(crc32.Castagnoli)
  41. )
  42. // WAL is a logical repersentation of the stable storage.
  43. // WAL is either in read mode or append mode but not both.
  44. // A newly created WAL is in append mode, and ready for appending records.
  45. // A just opened WAL is in read mode, and ready for reading records.
  46. // The WAL will be ready for appending after reading out all the previous records.
  47. type WAL struct {
  48. dir string // the living directory of the underlay files
  49. ri int64 // index of entry to start reading
  50. decoder *decoder // decoder to decode records
  51. f *os.File // underlay file opened for appending, sync
  52. seq int64 // sequence of the wal file currently used for writes
  53. enti int64 // index of the last entry saved to the wal
  54. encoder *encoder // encoder to encode records
  55. }
  56. // Create creates a WAL ready for appending records.
  57. func Create(dirpath string) (*WAL, error) {
  58. log.Printf("path=%s wal.create", dirpath)
  59. if Exist(dirpath) {
  60. return nil, os.ErrExist
  61. }
  62. if err := os.MkdirAll(dirpath, privateDirMode); err != nil {
  63. return nil, err
  64. }
  65. p := path.Join(dirpath, walName(0, 0))
  66. f, err := os.OpenFile(p, os.O_WRONLY|os.O_APPEND|os.O_CREATE, 0600)
  67. if err != nil {
  68. return nil, err
  69. }
  70. w := &WAL{
  71. dir: dirpath,
  72. seq: 0,
  73. f: f,
  74. encoder: newEncoder(f, 0),
  75. }
  76. if err := w.saveCrc(0); err != nil {
  77. return nil, err
  78. }
  79. return w, nil
  80. }
  81. // OpenAtIndex opens the WAL at the given index.
  82. // The index SHOULD have been previously committed to the WAL, or the following
  83. // ReadAll will fail.
  84. // The returned WAL is ready to read and the first record will be the given
  85. // index. The WAL cannot be appended to before reading out all of its
  86. // previous records.
  87. func OpenAtIndex(dirpath string, index int64) (*WAL, error) {
  88. log.Printf("path=%s wal.load index=%d", dirpath, index)
  89. names, err := readDir(dirpath)
  90. if err != nil {
  91. return nil, err
  92. }
  93. names = checkWalNames(names)
  94. if len(names) == 0 {
  95. return nil, ErrFileNotFound
  96. }
  97. sort.Sort(sort.StringSlice(names))
  98. nameIndex, ok := searchIndex(names, index)
  99. if !ok || !isValidSeq(names[nameIndex:]) {
  100. return nil, ErrFileNotFound
  101. }
  102. // open the wal files for reading
  103. rcs := make([]io.ReadCloser, 0)
  104. for _, name := range names[nameIndex:] {
  105. f, err := os.Open(path.Join(dirpath, name))
  106. if err != nil {
  107. return nil, err
  108. }
  109. rcs = append(rcs, f)
  110. }
  111. rc := MultiReadCloser(rcs...)
  112. // open the lastest wal file for appending
  113. seq, _, err := parseWalName(names[len(names)-1])
  114. if err != nil {
  115. rc.Close()
  116. return nil, err
  117. }
  118. last := path.Join(dirpath, names[len(names)-1])
  119. f, err := os.OpenFile(last, os.O_WRONLY|os.O_APPEND, 0)
  120. if err != nil {
  121. rc.Close()
  122. return nil, err
  123. }
  124. // create a WAL ready for reading
  125. w := &WAL{
  126. dir: dirpath,
  127. ri: index,
  128. decoder: newDecoder(rc),
  129. f: f,
  130. seq: seq,
  131. }
  132. return w, nil
  133. }
  134. // ReadAll reads out all records of the current WAL.
  135. // If it cannot read out the expected entry, it will return ErrIndexNotFound.
  136. // After ReadAll, the WAL will be ready for appending new records.
  137. func (w *WAL) ReadAll() (id int64, state raftpb.HardState, ents []raftpb.Entry, err error) {
  138. rec := &walpb.Record{}
  139. decoder := w.decoder
  140. for err = decoder.decode(rec); err == nil; err = decoder.decode(rec) {
  141. switch rec.Type {
  142. case entryType:
  143. e := mustUnmarshalEntry(rec.Data)
  144. if e.Index >= w.ri {
  145. ents = append(ents[:e.Index-w.ri], e)
  146. }
  147. w.enti = e.Index
  148. case stateType:
  149. state = mustUnmarshalState(rec.Data)
  150. case infoType:
  151. i := mustUnmarshalInfo(rec.Data)
  152. if id != 0 && id != i.Id {
  153. state.Reset()
  154. return 0, state, nil, ErrIDMismatch
  155. }
  156. id = i.Id
  157. case crcType:
  158. crc := decoder.crc.Sum32()
  159. // current crc of decoder must match the crc of the record.
  160. // do no need to match 0 crc, since the decoder is a new one at this case.
  161. if crc != 0 && rec.Validate(crc) != nil {
  162. state.Reset()
  163. return 0, state, nil, ErrCRCMismatch
  164. }
  165. decoder.updateCRC(rec.Crc)
  166. default:
  167. state.Reset()
  168. return 0, state, nil, fmt.Errorf("unexpected block type %d", rec.Type)
  169. }
  170. }
  171. if err != io.EOF {
  172. state.Reset()
  173. return 0, state, nil, err
  174. }
  175. if w.enti < w.ri {
  176. state.Reset()
  177. return 0, state, nil, ErrIndexNotFound
  178. }
  179. // close decoder, disable reading
  180. w.decoder.close()
  181. w.ri = 0
  182. // create encoder (chain crc with the decoder), enable appending
  183. w.encoder = newEncoder(w.f, w.decoder.lastCRC())
  184. w.decoder = nil
  185. return id, state, ents, nil
  186. }
  187. // Cut closes current file written and creates a new one ready to append.
  188. func (w *WAL) Cut() error {
  189. // create a new wal file with name sequence + 1
  190. fpath := path.Join(w.dir, walName(w.seq+1, w.enti+1))
  191. f, err := os.OpenFile(fpath, os.O_WRONLY|os.O_APPEND|os.O_CREATE, 0600)
  192. if err != nil {
  193. return err
  194. }
  195. w.Sync()
  196. w.f.Close()
  197. log.Printf("wal.cut index=%d prevfile=%s curfile=%s", w.enti, w.f.Name(), f.Name())
  198. // update writer and save the previous crc
  199. w.f = f
  200. w.seq++
  201. prevCrc := w.encoder.crc.Sum32()
  202. w.encoder = newEncoder(w.f, prevCrc)
  203. return w.saveCrc(prevCrc)
  204. }
  205. func (w *WAL) Sync() error {
  206. if w.encoder != nil {
  207. if err := w.encoder.flush(); err != nil {
  208. return err
  209. }
  210. }
  211. return w.f.Sync()
  212. }
  213. func (w *WAL) Close() {
  214. log.Printf("path=%s wal.close", w.f.Name())
  215. if w.f != nil {
  216. w.Sync()
  217. w.f.Close()
  218. }
  219. }
  220. func (w *WAL) SaveInfo(i *raftpb.Info) error {
  221. log.Printf("path=%s wal.saveInfo id=%d", w.f.Name(), i.Id)
  222. b, err := i.Marshal()
  223. if err != nil {
  224. panic(err)
  225. }
  226. rec := &walpb.Record{Type: infoType, Data: b}
  227. return w.encoder.encode(rec)
  228. }
  229. func (w *WAL) SaveEntry(e *raftpb.Entry) error {
  230. b, err := e.Marshal()
  231. if err != nil {
  232. panic(err)
  233. }
  234. rec := &walpb.Record{Type: entryType, Data: b}
  235. if err := w.encoder.encode(rec); err != nil {
  236. return err
  237. }
  238. w.enti = e.Index
  239. return nil
  240. }
  241. func (w *WAL) SaveState(s *raftpb.HardState) error {
  242. if raft.IsEmptyHardState(*s) {
  243. return nil
  244. }
  245. log.Printf("path=%s wal.saveState state=\"%+v\"", w.f.Name(), s)
  246. b, err := s.Marshal()
  247. if err != nil {
  248. panic(err)
  249. }
  250. rec := &walpb.Record{Type: stateType, Data: b}
  251. return w.encoder.encode(rec)
  252. }
  253. func (w *WAL) Save(st raftpb.HardState, ents []raftpb.Entry) {
  254. // TODO(xiangli): no more reference operator
  255. w.SaveState(&st)
  256. for i := range ents {
  257. w.SaveEntry(&ents[i])
  258. }
  259. w.Sync()
  260. }
  261. func (w *WAL) saveCrc(prevCrc uint32) error {
  262. return w.encoder.encode(&walpb.Record{Type: crcType, Crc: prevCrc})
  263. }