123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105 |
- package filex
- import (
- "io"
- "os"
- )
- type OffsetRange struct {
- File string
- Start int64
- Stop int64
- }
- func SplitLineChunks(filename string, chunks int) ([]OffsetRange, error) {
- info, err := os.Stat(filename)
- if err != nil {
- return nil, err
- }
- if chunks <= 1 {
- return []OffsetRange{
- {
- File: filename,
- Start: 0,
- Stop: info.Size(),
- },
- }, nil
- }
- file, err := os.Open(filename)
- if err != nil {
- return nil, err
- }
- defer file.Close()
- var ranges []OffsetRange
- var offset int64
- // avoid the last chunk too few bytes
- preferSize := info.Size()/int64(chunks) + 1
- for {
- if offset+preferSize >= info.Size() {
- ranges = append(ranges, OffsetRange{
- File: filename,
- Start: offset,
- Stop: info.Size(),
- })
- break
- }
- offsetRange, err := nextRange(file, offset, offset+preferSize)
- if err != nil {
- return nil, err
- }
- ranges = append(ranges, offsetRange)
- if offsetRange.Stop < info.Size() {
- offset = offsetRange.Stop
- } else {
- break
- }
- }
- return ranges, nil
- }
- func nextRange(file *os.File, start, stop int64) (OffsetRange, error) {
- offset, err := skipPartialLine(file, stop)
- if err != nil {
- return OffsetRange{}, err
- }
- return OffsetRange{
- File: file.Name(),
- Start: start,
- Stop: offset,
- }, nil
- }
- func skipPartialLine(file *os.File, offset int64) (int64, error) {
- for {
- skipBuf := make([]byte, bufSize)
- n, err := file.ReadAt(skipBuf, offset)
- if err != nil && err != io.EOF {
- return 0, err
- }
- if n == 0 {
- return 0, io.EOF
- }
- for i := 0; i < n; i++ {
- if skipBuf[i] != '\r' && skipBuf[i] != '\n' {
- offset++
- } else {
- for ; i < n; i++ {
- if skipBuf[i] == '\r' || skipBuf[i] == '\n' {
- offset++
- } else {
- return offset, nil
- }
- }
- return offset, nil
- }
- }
- }
- }
|