lookup.go 1.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105
  1. package filex
  2. import (
  3. "io"
  4. "os"
  5. )
  6. type OffsetRange struct {
  7. File string
  8. Start int64
  9. Stop int64
  10. }
  11. func SplitLineChunks(filename string, chunks int) ([]OffsetRange, error) {
  12. info, err := os.Stat(filename)
  13. if err != nil {
  14. return nil, err
  15. }
  16. if chunks <= 1 {
  17. return []OffsetRange{
  18. {
  19. File: filename,
  20. Start: 0,
  21. Stop: info.Size(),
  22. },
  23. }, nil
  24. }
  25. file, err := os.Open(filename)
  26. if err != nil {
  27. return nil, err
  28. }
  29. defer file.Close()
  30. var ranges []OffsetRange
  31. var offset int64
  32. // avoid the last chunk too few bytes
  33. preferSize := info.Size()/int64(chunks) + 1
  34. for {
  35. if offset+preferSize >= info.Size() {
  36. ranges = append(ranges, OffsetRange{
  37. File: filename,
  38. Start: offset,
  39. Stop: info.Size(),
  40. })
  41. break
  42. }
  43. offsetRange, err := nextRange(file, offset, offset+preferSize)
  44. if err != nil {
  45. return nil, err
  46. }
  47. ranges = append(ranges, offsetRange)
  48. if offsetRange.Stop < info.Size() {
  49. offset = offsetRange.Stop
  50. } else {
  51. break
  52. }
  53. }
  54. return ranges, nil
  55. }
  56. func nextRange(file *os.File, start, stop int64) (OffsetRange, error) {
  57. offset, err := skipPartialLine(file, stop)
  58. if err != nil {
  59. return OffsetRange{}, err
  60. }
  61. return OffsetRange{
  62. File: file.Name(),
  63. Start: start,
  64. Stop: offset,
  65. }, nil
  66. }
  67. func skipPartialLine(file *os.File, offset int64) (int64, error) {
  68. for {
  69. skipBuf := make([]byte, bufSize)
  70. n, err := file.ReadAt(skipBuf, offset)
  71. if err != nil && err != io.EOF {
  72. return 0, err
  73. }
  74. if n == 0 {
  75. return 0, io.EOF
  76. }
  77. for i := 0; i < n; i++ {
  78. if skipBuf[i] != '\r' && skipBuf[i] != '\n' {
  79. offset++
  80. } else {
  81. for ; i < n; i++ {
  82. if skipBuf[i] == '\r' || skipBuf[i] == '\n' {
  83. offset++
  84. } else {
  85. return offset, nil
  86. }
  87. }
  88. return offset, nil
  89. }
  90. }
  91. }
  92. }