wal.go 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277
  1. /*
  2. Copyright 2014 CoreOS Inc.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package wal
  14. import (
  15. "errors"
  16. "fmt"
  17. "hash/crc32"
  18. "io"
  19. "log"
  20. "os"
  21. "path"
  22. "sort"
  23. "github.com/coreos/etcd/raft/raftpb"
  24. "github.com/coreos/etcd/wal/walpb"
  25. )
  26. const (
  27. infoType int64 = iota + 1
  28. entryType
  29. stateType
  30. crcType
  31. )
  32. var (
  33. ErrIdMismatch = errors.New("wal: unmatch id")
  34. ErrNotFound = errors.New("wal: file is not found")
  35. ErrCRCMismatch = errors.New("wal: crc mismatch")
  36. crcTable = crc32.MakeTable(crc32.Castagnoli)
  37. )
  38. // WAL is a logical repersentation of the stable storage.
  39. // WAL is either in read mode or append mode but not both.
  40. // A newly created WAL is in append mode, and ready for appending records.
  41. // A just opened WAL is in read mode, and ready for reading records.
  42. // The WAL will be ready for appending after reading out all the previous records.
  43. type WAL struct {
  44. dir string // the living directory of the underlay files
  45. ri int64 // index of entry to start reading
  46. decoder *decoder // decoder to decode records
  47. f *os.File // underlay file opened for appending, sync
  48. seq int64 // current sequence of the wal file
  49. encoder *encoder // encoder to encode records
  50. }
  51. // Create creates a WAL ready for appending records.
  52. func Create(dirpath string) (*WAL, error) {
  53. log.Printf("path=%s wal.create", dirpath)
  54. if Exist(dirpath) {
  55. return nil, os.ErrExist
  56. }
  57. if err := os.MkdirAll(dirpath, 0700); err != nil {
  58. return nil, err
  59. }
  60. p := path.Join(dirpath, fmt.Sprintf("%016x-%016x.wal", 0, 0))
  61. f, err := os.OpenFile(p, os.O_WRONLY|os.O_APPEND|os.O_CREATE, 0600)
  62. if err != nil {
  63. return nil, err
  64. }
  65. w := &WAL{
  66. dir: dirpath,
  67. seq: 0,
  68. f: f,
  69. encoder: newEncoder(f, 0),
  70. }
  71. if err := w.saveCrc(0); err != nil {
  72. return nil, err
  73. }
  74. return w, nil
  75. }
  76. // OpenFromIndex opens the WAL files containing all the entries after
  77. // the given index.
  78. // The returned WAL is ready to read. The WAL cannot be appended to before
  79. // reading out all of its previous records.
  80. func OpenFromIndex(dirpath string, index int64) (*WAL, error) {
  81. log.Printf("path=%s wal.load index=%d", dirpath, index)
  82. names, err := readDir(dirpath)
  83. if err != nil {
  84. return nil, err
  85. }
  86. names = checkWalNames(names)
  87. if len(names) == 0 {
  88. return nil, ErrNotFound
  89. }
  90. sort.Sort(sort.StringSlice(names))
  91. nameIndex, ok := searchIndex(names, index)
  92. if !ok || !isValidSeq(names[nameIndex:]) {
  93. return nil, ErrNotFound
  94. }
  95. // open the wal files for reading
  96. rcs := make([]io.ReadCloser, 0)
  97. for _, name := range names[nameIndex:] {
  98. f, err := os.Open(path.Join(dirpath, name))
  99. if err != nil {
  100. return nil, err
  101. }
  102. rcs = append(rcs, f)
  103. }
  104. rc := MultiReadCloser(rcs...)
  105. // open the lastest wal file for appending
  106. last := path.Join(dirpath, names[len(names)-1])
  107. f, err := os.OpenFile(last, os.O_WRONLY|os.O_APPEND, 0)
  108. if err != nil {
  109. rc.Close()
  110. return nil, err
  111. }
  112. // create a WAL ready for reading
  113. w := &WAL{
  114. ri: index,
  115. decoder: newDecoder(rc),
  116. f: f,
  117. }
  118. return w, nil
  119. }
  120. // ReadAll reads out all records of the current WAL.
  121. // After ReadAll, the WAL will be ready for appending new records.
  122. func (w *WAL) ReadAll() (int64, raftpb.State, []raftpb.Entry, error) {
  123. var id int64
  124. var state raftpb.State
  125. var entries []raftpb.Entry
  126. rec := &walpb.Record{}
  127. decoder := w.decoder
  128. var err error
  129. for err = decoder.decode(rec); err == nil; err = decoder.decode(rec) {
  130. switch rec.Type {
  131. case entryType:
  132. e := mustUnmarshalEntry(rec.Data)
  133. if e.Index > w.ri {
  134. entries = append(entries[:e.Index-w.ri-1], e)
  135. }
  136. case stateType:
  137. state = mustUnmarshalState(rec.Data)
  138. case infoType:
  139. i := mustUnmarshalInfo(rec.Data)
  140. if id != 0 && id != i.Id {
  141. state.Reset()
  142. return 0, state, nil, ErrIdMismatch
  143. }
  144. id = i.Id
  145. case crcType:
  146. crc := decoder.crc.Sum32()
  147. // current crc of decoder must match the crc of the record.
  148. // do no need to match 0 crc, since the decoder is a new one at this case.
  149. if crc != 0 && rec.Validate(crc) != nil {
  150. state.Reset()
  151. return 0, state, nil, ErrCRCMismatch
  152. }
  153. decoder.updateCRC(rec.Crc)
  154. default:
  155. state.Reset()
  156. return 0, state, nil, fmt.Errorf("unexpected block type %d", rec.Type)
  157. }
  158. }
  159. if err != io.EOF {
  160. state.Reset()
  161. return 0, state, nil, err
  162. }
  163. // close decoder, disable reading
  164. w.decoder.close()
  165. w.ri = 0
  166. // create encoder (chain crc with the decoder), enable appending
  167. w.encoder = newEncoder(w.f, w.decoder.lastCRC())
  168. w.decoder = nil
  169. return id, state, entries, nil
  170. }
  171. // index should be the index of last log entry.
  172. // Cut closes current file written and creates a new one ready to append.
  173. func (w *WAL) Cut(index int64) error {
  174. log.Printf("wal.cut index=%d", index)
  175. // create a new wal file with name sequence + 1
  176. fpath := path.Join(w.dir, fmt.Sprintf("%016x-%016x.wal", w.seq+1, index))
  177. f, err := os.OpenFile(fpath, os.O_WRONLY|os.O_APPEND|os.O_CREATE, 0600)
  178. if err != nil {
  179. return err
  180. }
  181. w.Sync()
  182. w.f.Close()
  183. // update writer and save the previous crc
  184. w.f = f
  185. w.seq++
  186. prevCrc := w.encoder.crc.Sum32()
  187. w.encoder = newEncoder(w.f, prevCrc)
  188. return w.saveCrc(prevCrc)
  189. }
  190. func (w *WAL) Sync() error {
  191. if w.encoder != nil {
  192. if err := w.encoder.flush(); err != nil {
  193. return err
  194. }
  195. }
  196. return w.f.Sync()
  197. }
  198. func (w *WAL) Close() {
  199. log.Printf("path=%s wal.close", w.f.Name())
  200. if w.f != nil {
  201. w.Sync()
  202. w.f.Close()
  203. }
  204. }
  205. func (w *WAL) SaveInfo(i *raftpb.Info) error {
  206. log.Printf("path=%s wal.saveInfo id=%d", w.f.Name(), i.Id)
  207. b, err := i.Marshal()
  208. if err != nil {
  209. panic(err)
  210. }
  211. rec := &walpb.Record{Type: infoType, Data: b}
  212. return w.encoder.encode(rec)
  213. }
  214. func (w *WAL) SaveEntry(e *raftpb.Entry) error {
  215. b, err := e.Marshal()
  216. if err != nil {
  217. panic(err)
  218. }
  219. rec := &walpb.Record{Type: entryType, Data: b}
  220. return w.encoder.encode(rec)
  221. }
  222. func (w *WAL) SaveState(s *raftpb.State) error {
  223. log.Printf("path=%s wal.saveState state=\"%+v\"", w.f.Name(), s)
  224. b, err := s.Marshal()
  225. if err != nil {
  226. panic(err)
  227. }
  228. rec := &walpb.Record{Type: stateType, Data: b}
  229. return w.encoder.encode(rec)
  230. }
  231. func (w *WAL) Save(st raftpb.State, ents []raftpb.Entry) {
  232. // TODO(xiangli): no addresses fly around
  233. w.SaveState(&st)
  234. for i := range ents {
  235. w.SaveEntry(&ents[i])
  236. }
  237. w.Sync()
  238. }
  239. func (w *WAL) saveCrc(prevCrc uint32) error {
  240. return w.encoder.encode(&walpb.Record{Type: crcType, Crc: prevCrc})
  241. }