lookup.go 1.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108
  1. package filex
  2. import (
  3. "io"
  4. "os"
  5. )
  6. // OffsetRange represents a content block of a file.
  7. type OffsetRange struct {
  8. File string
  9. Start int64
  10. Stop int64
  11. }
  12. // SplitLineChunks splits file into chunks.
  13. // The whole line are guaranteed to be split in the same chunk.
  14. func SplitLineChunks(filename string, chunks int) ([]OffsetRange, error) {
  15. info, err := os.Stat(filename)
  16. if err != nil {
  17. return nil, err
  18. }
  19. if chunks <= 1 {
  20. return []OffsetRange{
  21. {
  22. File: filename,
  23. Start: 0,
  24. Stop: info.Size(),
  25. },
  26. }, nil
  27. }
  28. file, err := os.Open(filename)
  29. if err != nil {
  30. return nil, err
  31. }
  32. defer file.Close()
  33. var ranges []OffsetRange
  34. var offset int64
  35. // avoid the last chunk too few bytes
  36. preferSize := info.Size()/int64(chunks) + 1
  37. for {
  38. if offset+preferSize >= info.Size() {
  39. ranges = append(ranges, OffsetRange{
  40. File: filename,
  41. Start: offset,
  42. Stop: info.Size(),
  43. })
  44. break
  45. }
  46. offsetRange, err := nextRange(file, offset, offset+preferSize)
  47. if err != nil {
  48. return nil, err
  49. }
  50. ranges = append(ranges, offsetRange)
  51. if offsetRange.Stop < info.Size() {
  52. offset = offsetRange.Stop
  53. } else {
  54. break
  55. }
  56. }
  57. return ranges, nil
  58. }
  59. func nextRange(file *os.File, start, stop int64) (OffsetRange, error) {
  60. offset, err := skipPartialLine(file, stop)
  61. if err != nil {
  62. return OffsetRange{}, err
  63. }
  64. return OffsetRange{
  65. File: file.Name(),
  66. Start: start,
  67. Stop: offset,
  68. }, nil
  69. }
  70. func skipPartialLine(file *os.File, offset int64) (int64, error) {
  71. for {
  72. skipBuf := make([]byte, bufSize)
  73. n, err := file.ReadAt(skipBuf, offset)
  74. if err != nil && err != io.EOF {
  75. return 0, err
  76. }
  77. if n == 0 {
  78. return 0, io.EOF
  79. }
  80. for i := 0; i < n; i++ {
  81. if skipBuf[i] != '\r' && skipBuf[i] != '\n' {
  82. offset++
  83. } else {
  84. for ; i < n; i++ {
  85. if skipBuf[i] == '\r' || skipBuf[i] == '\n' {
  86. offset++
  87. } else {
  88. return offset, nil
  89. }
  90. }
  91. return offset, nil
  92. }
  93. }
  94. }
  95. }