123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188 |
- // Copyright 2015 The etcd Authors
- //
- // Licensed under the Apache License, Version 2.0 (the "License");
- // you may not use this file except in compliance with the License.
- // You may obtain a copy of the License at
- //
- // http://www.apache.org/licenses/LICENSE-2.0
- //
- // Unless required by applicable law or agreed to in writing, software
- // distributed under the License is distributed on an "AS IS" BASIS,
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- // See the License for the specific language governing permissions and
- // limitations under the License.
- package wal
- import (
- "bufio"
- "encoding/binary"
- "hash"
- "io"
- "sync"
- "go.etcd.io/etcd/pkg/crc"
- "go.etcd.io/etcd/pkg/pbutil"
- "go.etcd.io/etcd/raft/raftpb"
- "go.etcd.io/etcd/wal/walpb"
- )
- const minSectorSize = 512
- // frameSizeBytes is frame size in bytes, including record size and padding size.
- const frameSizeBytes = 8
- type decoder struct {
- mu sync.Mutex
- brs []*bufio.Reader
- // lastValidOff file offset following the last valid decoded record
- lastValidOff int64
- crc hash.Hash32
- }
- func newDecoder(r ...io.Reader) *decoder {
- readers := make([]*bufio.Reader, len(r))
- for i := range r {
- readers[i] = bufio.NewReader(r[i])
- }
- return &decoder{
- brs: readers,
- crc: crc.New(0, crcTable),
- }
- }
- func (d *decoder) decode(rec *walpb.Record) error {
- rec.Reset()
- d.mu.Lock()
- defer d.mu.Unlock()
- return d.decodeRecord(rec)
- }
- func (d *decoder) decodeRecord(rec *walpb.Record) error {
- if len(d.brs) == 0 {
- return io.EOF
- }
- l, err := readInt64(d.brs[0])
- if err == io.EOF || (err == nil && l == 0) {
- // hit end of file or preallocated space
- d.brs = d.brs[1:]
- if len(d.brs) == 0 {
- return io.EOF
- }
- d.lastValidOff = 0
- return d.decodeRecord(rec)
- }
- if err != nil {
- return err
- }
- recBytes, padBytes := decodeFrameSize(l)
- data := make([]byte, recBytes+padBytes)
- if _, err = io.ReadFull(d.brs[0], data); err != nil {
- // ReadFull returns io.EOF only if no bytes were read
- // the decoder should treat this as an ErrUnexpectedEOF instead.
- if err == io.EOF {
- err = io.ErrUnexpectedEOF
- }
- return err
- }
- if err := rec.Unmarshal(data[:recBytes]); err != nil {
- if d.isTornEntry(data) {
- return io.ErrUnexpectedEOF
- }
- return err
- }
- // skip crc checking if the record type is crcType
- if rec.Type != crcType {
- d.crc.Write(rec.Data)
- if err := rec.Validate(d.crc.Sum32()); err != nil {
- if d.isTornEntry(data) {
- return io.ErrUnexpectedEOF
- }
- return err
- }
- }
- // record decoded as valid; point last valid offset to end of record
- d.lastValidOff += frameSizeBytes + recBytes + padBytes
- return nil
- }
- func decodeFrameSize(lenField int64) (recBytes int64, padBytes int64) {
- // the record size is stored in the lower 56 bits of the 64-bit length
- recBytes = int64(uint64(lenField) & ^(uint64(0xff) << 56))
- // non-zero padding is indicated by set MSb / a negative length
- if lenField < 0 {
- // padding is stored in lower 3 bits of length MSB
- padBytes = int64((uint64(lenField) >> 56) & 0x7)
- }
- return recBytes, padBytes
- }
- // isTornEntry determines whether the last entry of the WAL was partially written
- // and corrupted because of a torn write.
- func (d *decoder) isTornEntry(data []byte) bool {
- if len(d.brs) != 1 {
- return false
- }
- fileOff := d.lastValidOff + frameSizeBytes
- curOff := 0
- chunks := [][]byte{}
- // split data on sector boundaries
- for curOff < len(data) {
- chunkLen := int(minSectorSize - (fileOff % minSectorSize))
- if chunkLen > len(data)-curOff {
- chunkLen = len(data) - curOff
- }
- chunks = append(chunks, data[curOff:curOff+chunkLen])
- fileOff += int64(chunkLen)
- curOff += chunkLen
- }
- // if any data for a sector chunk is all 0, it's a torn write
- for _, sect := range chunks {
- isZero := true
- for _, v := range sect {
- if v != 0 {
- isZero = false
- break
- }
- }
- if isZero {
- return true
- }
- }
- return false
- }
- func (d *decoder) updateCRC(prevCrc uint32) {
- d.crc = crc.New(prevCrc, crcTable)
- }
- func (d *decoder) lastCRC() uint32 {
- return d.crc.Sum32()
- }
- func (d *decoder) lastOffset() int64 { return d.lastValidOff }
- func mustUnmarshalEntry(d []byte) raftpb.Entry {
- var e raftpb.Entry
- pbutil.MustUnmarshal(&e, d)
- return e
- }
- func mustUnmarshalState(d []byte) raftpb.HardState {
- var s raftpb.HardState
- pbutil.MustUnmarshal(&s, d)
- return s
- }
- func readInt64(r io.Reader) (int64, error) {
- var n int64
- err := binary.Read(r, binary.LittleEndian, &n)
- return n, err
- }
|