repair.go 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141
  1. // Copyright 2015 The etcd Authors
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package wal
  15. import (
  16. "io"
  17. "os"
  18. "path/filepath"
  19. "go.etcd.io/etcd/pkg/fileutil"
  20. "go.etcd.io/etcd/wal/walpb"
  21. "go.uber.org/zap"
  22. )
  23. // Repair tries to repair ErrUnexpectedEOF in the
  24. // last wal file by truncating.
  25. func Repair(lg *zap.Logger, dirpath string) bool {
  26. f, err := openLast(lg, dirpath)
  27. if err != nil {
  28. return false
  29. }
  30. defer f.Close()
  31. if lg != nil {
  32. lg.Info("repairing", zap.String("path", f.Name()))
  33. } else {
  34. plog.Noticef("repairing %v", f.Name())
  35. }
  36. rec := &walpb.Record{}
  37. decoder := newDecoder(f)
  38. for {
  39. lastOffset := decoder.lastOffset()
  40. err := decoder.decode(rec)
  41. switch err {
  42. case nil:
  43. // update crc of the decoder when necessary
  44. switch rec.Type {
  45. case crcType:
  46. crc := decoder.crc.Sum32()
  47. // current crc of decoder must match the crc of the record.
  48. // do no need to match 0 crc, since the decoder is a new one at this case.
  49. if crc != 0 && rec.Validate(crc) != nil {
  50. return false
  51. }
  52. decoder.updateCRC(rec.Crc)
  53. }
  54. continue
  55. case io.EOF:
  56. if lg != nil {
  57. lg.Info("repaired", zap.String("path", f.Name()), zap.Error(io.EOF))
  58. }
  59. return true
  60. case io.ErrUnexpectedEOF:
  61. bf, bferr := os.Create(f.Name() + ".broken")
  62. if bferr != nil {
  63. if lg != nil {
  64. lg.Warn("failed to create backup file", zap.String("path", f.Name()+".broken"), zap.Error(bferr))
  65. } else {
  66. plog.Errorf("could not repair %v, failed to create backup file", f.Name())
  67. }
  68. return false
  69. }
  70. defer bf.Close()
  71. if _, err = f.Seek(0, io.SeekStart); err != nil {
  72. if lg != nil {
  73. lg.Warn("failed to read file", zap.String("path", f.Name()), zap.Error(err))
  74. } else {
  75. plog.Errorf("could not repair %v, failed to read file", f.Name())
  76. }
  77. return false
  78. }
  79. if _, err = io.Copy(bf, f); err != nil {
  80. if lg != nil {
  81. lg.Warn("failed to copy", zap.String("from", f.Name()+".broken"), zap.String("to", f.Name()), zap.Error(err))
  82. } else {
  83. plog.Errorf("could not repair %v, failed to copy file", f.Name())
  84. }
  85. return false
  86. }
  87. if err = f.Truncate(lastOffset); err != nil {
  88. if lg != nil {
  89. lg.Warn("failed to truncate", zap.String("path", f.Name()), zap.Error(err))
  90. } else {
  91. plog.Errorf("could not repair %v, failed to truncate file", f.Name())
  92. }
  93. return false
  94. }
  95. if err = fileutil.Fsync(f.File); err != nil {
  96. if lg != nil {
  97. lg.Warn("failed to fsync", zap.String("path", f.Name()), zap.Error(err))
  98. } else {
  99. plog.Errorf("could not repair %v, failed to sync file", f.Name())
  100. }
  101. return false
  102. }
  103. if lg != nil {
  104. lg.Info("repaired", zap.String("path", f.Name()), zap.Error(io.ErrUnexpectedEOF))
  105. }
  106. return true
  107. default:
  108. if lg != nil {
  109. lg.Warn("failed to repair", zap.String("path", f.Name()), zap.Error(err))
  110. } else {
  111. plog.Errorf("could not repair error (%v)", err)
  112. }
  113. return false
  114. }
  115. }
  116. }
  117. // openLast opens the last wal file for read and write.
  118. func openLast(lg *zap.Logger, dirpath string) (*fileutil.LockedFile, error) {
  119. names, err := readWALNames(lg, dirpath)
  120. if err != nil {
  121. return nil, err
  122. }
  123. last := filepath.Join(dirpath, names[len(names)-1])
  124. return fileutil.LockFile(last, os.O_RDWR, fileutil.PrivateFileMode)
  125. }