Browse Source

Refactoring the line parsing

Julian Kornberger 9 years ago
parent
commit
8082b59766
4 changed files with 299 additions and 216 deletions
  1. 0 216
      ftp.go
  2. 213 0
      parse.go
  3. 58 0
      scanner.go
  4. 28 0
      scanner_test.go

+ 0 - 216
ftp.go

@@ -4,7 +4,6 @@ package ftp
 import (
 import (
 	"bufio"
 	"bufio"
 	"errors"
 	"errors"
-	"fmt"
 	"io"
 	"io"
 	"net"
 	"net"
 	"net/textproto"
 	"net/textproto"
@@ -292,221 +291,6 @@ func (c *ServerConn) cmdDataConnFrom(offset uint64, format string, args ...inter
 	return conn, nil
 	return conn, nil
 }
 }
 
 
-var errUnsupportedListLine = errors.New("Unsupported LIST line")
-
-// parseRFC3659ListLine parses the style of directory line defined in RFC 3659.
-func parseRFC3659ListLine(line string) (*Entry, error) {
-	iSemicolon := strings.Index(line, ";")
-	iWhitespace := strings.Index(line, " ")
-
-	if iSemicolon < 0 || iSemicolon > iWhitespace {
-		return nil, errUnsupportedListLine
-	}
-
-	e := &Entry{
-		Name: line[iWhitespace+1:],
-	}
-
-	for _, field := range strings.Split(line[:iWhitespace-1], ";") {
-		i := strings.Index(field, "=")
-		if i < 1 {
-			return nil, errUnsupportedListLine
-		}
-
-		key := field[:i]
-		value := field[i+1:]
-
-		switch key {
-		case "modify":
-			var err error
-			e.Time, err = time.Parse("20060102150405", value)
-			if err != nil {
-				return nil, err
-			}
-		case "type":
-			switch value {
-			case "dir", "cdir", "pdir":
-				e.Type = EntryTypeFolder
-			case "file":
-				e.Type = EntryTypeFile
-			}
-		case "size":
-			e.setSize(value)
-		}
-	}
-	return e, nil
-}
-
-// parse file or folder name with multiple spaces
-func parseLsListLineName(line string, fields []string, offset int) string {
-	if offset < 1 {
-		return ""
-	}
-
-	match := fmt.Sprintf(" %s ", fields[offset-1])
-	index := strings.Index(line, match)
-	if index == -1 {
-		return ""
-	}
-
-	index += len(match)
-	return strings.TrimSpace(line[index:])
-}
-
-// parseLsListLine parses a directory line in a format based on the output of
-// the UNIX ls command.
-func parseLsListLine(line string) (*Entry, error) {
-	fields := strings.Fields(line)
-	if len(fields) >= 7 && fields[1] == "folder" && fields[2] == "0" {
-		e := &Entry{
-			Type: EntryTypeFolder,
-			Name: strings.Join(fields[6:], " "),
-		}
-		if err := e.setTime(fields[3:6]); err != nil {
-			return nil, err
-		}
-
-		return e, nil
-	}
-
-	if len(fields) < 8 {
-		return nil, errUnsupportedListLine
-	}
-
-	if fields[1] == "0" {
-		e := &Entry{
-			Type: EntryTypeFile,
-			Name: strings.Join(fields[7:], " "),
-		}
-
-		if err := e.setSize(fields[2]); err != nil {
-			return nil, err
-		}
-		if err := e.setTime(fields[4:7]); err != nil {
-			return nil, err
-		}
-
-		return e, nil
-	}
-
-	if len(fields) < 9 {
-		return nil, errUnsupportedListLine
-	}
-
-	e := &Entry{}
-	switch fields[0][0] {
-	case '-':
-		e.Type = EntryTypeFile
-		if err := e.setSize(fields[4]); err != nil {
-			return nil, err
-		}
-	case 'd':
-		e.Type = EntryTypeFolder
-	case 'l':
-		e.Type = EntryTypeLink
-	default:
-		return nil, errors.New("Unknown entry type")
-	}
-
-	if err := e.setTime(fields[5:8]); err != nil {
-		return nil, err
-	}
-
-	e.Name = parseLsListLineName(line, fields, 8)
-	if len(e.Name) == 0 {
-		e.Name = strings.Join(fields[8:], " ")
-	}
-
-	return e, nil
-}
-
-var dirTimeFormats = []string{
-	"01-02-06  03:04PM",
-	"2006-01-02  15:04",
-}
-
-// parseDirListLine parses a directory line in a format based on the output of
-// the MS-DOS DIR command.
-func parseDirListLine(line string) (*Entry, error) {
-	e := &Entry{}
-	var err error
-
-	// Try various time formats that DIR might use, and stop when one works.
-	for _, format := range dirTimeFormats {
-		if len(line) > len(format) {
-			e.Time, err = time.Parse(format, line[:len(format)])
-			if err == nil {
-				line = line[len(format):]
-				break
-			}
-		}
-	}
-	if err != nil {
-		// None of the time formats worked.
-		return nil, errUnsupportedListLine
-	}
-
-	line = strings.TrimLeft(line, " ")
-	if strings.HasPrefix(line, "<DIR>") {
-		e.Type = EntryTypeFolder
-		line = strings.TrimPrefix(line, "<DIR>")
-	} else {
-		space := strings.Index(line, " ")
-		if space == -1 {
-			return nil, errUnsupportedListLine
-		}
-		e.Size, err = strconv.ParseUint(line[:space], 10, 64)
-		if err != nil {
-			return nil, errUnsupportedListLine
-		}
-		e.Type = EntryTypeFile
-		line = line[space:]
-	}
-
-	e.Name = strings.TrimLeft(line, " ")
-	return e, nil
-}
-
-var listLineParsers = []func(line string) (*Entry, error){
-	parseRFC3659ListLine,
-	parseLsListLine,
-	parseDirListLine,
-}
-
-// parseListLine parses the various non-standard format returned by the LIST
-// FTP command.
-func parseListLine(line string) (*Entry, error) {
-	for _, f := range listLineParsers {
-		e, err := f(line)
-		if err == errUnsupportedListLine {
-			// Try another format.
-			continue
-		}
-		return e, err
-	}
-	return nil, errUnsupportedListLine
-}
-
-func (e *Entry) setSize(str string) (err error) {
-	e.Size, err = strconv.ParseUint(str, 0, 64)
-	return
-}
-
-func (e *Entry) setTime(fields []string) (err error) {
-	var timeStr string
-	if strings.Contains(fields[2], ":") { // this year
-		thisYear, _, _ := time.Now().Date()
-		timeStr = fields[1] + " " + fields[0] + " " + strconv.Itoa(thisYear)[2:4] + " " + fields[2] + " GMT"
-	} else { // not this year
-		if len(fields[2]) != 4 {
-			return errors.New("Invalid year format in time string")
-		}
-		timeStr = fields[1] + " " + fields[0] + " " + fields[2][2:4] + " 00:00 GMT"
-	}
-	e.Time, err = time.Parse("_2 Jan 06 15:04 MST", timeStr)
-	return
-}
-
 // NameList issues an NLST FTP command.
 // NameList issues an NLST FTP command.
 func (c *ServerConn) NameList(path string) (entries []string, err error) {
 func (c *ServerConn) NameList(path string) (entries []string, err error) {
 	conn, err := c.cmdDataConnFrom(0, "NLST %s", path)
 	conn, err := c.cmdDataConnFrom(0, "NLST %s", path)

+ 213 - 0
parse.go

@@ -0,0 +1,213 @@
+package ftp
+
+import (
+	"errors"
+	"strconv"
+	"strings"
+	"time"
+)
+
+var errUnsupportedListLine = errors.New("Unsupported LIST line")
+
+var listLineParsers = []func(line string) (*Entry, error){
+	parseRFC3659ListLine,
+	parseLsListLine,
+	parseDirListLine,
+}
+
+var dirTimeFormats = []string{
+	"01-02-06  03:04PM",
+	"2006-01-02  15:04",
+}
+
+// parseRFC3659ListLine parses the style of directory line defined in RFC 3659.
+func parseRFC3659ListLine(line string) (*Entry, error) {
+	iSemicolon := strings.Index(line, ";")
+	iWhitespace := strings.Index(line, " ")
+
+	if iSemicolon < 0 || iSemicolon > iWhitespace {
+		return nil, errUnsupportedListLine
+	}
+
+	e := &Entry{
+		Name: line[iWhitespace+1:],
+	}
+
+	for _, field := range strings.Split(line[:iWhitespace-1], ";") {
+		i := strings.Index(field, "=")
+		if i < 1 {
+			return nil, errUnsupportedListLine
+		}
+
+		key := field[:i]
+		value := field[i+1:]
+
+		switch key {
+		case "modify":
+			var err error
+			e.Time, err = time.Parse("20060102150405", value)
+			if err != nil {
+				return nil, err
+			}
+		case "type":
+			switch value {
+			case "dir", "cdir", "pdir":
+				e.Type = EntryTypeFolder
+			case "file":
+				e.Type = EntryTypeFile
+			}
+		case "size":
+			e.setSize(value)
+		}
+	}
+	return e, nil
+}
+
+// parseLsListLine parses a directory line in a format based on the output of
+// the UNIX ls command.
+func parseLsListLine(line string) (*Entry, error) {
+
+	// Has the first field a length of 10 bytes?
+	if strings.IndexByte(line, ' ') != 10 {
+		return nil, errUnsupportedListLine
+	}
+
+	scanner := NewScanner(line)
+	fields := scanner.NextFields(6)
+
+	if len(fields) < 6 {
+		return nil, errUnsupportedListLine
+	}
+
+	if fields[1] == "folder" && fields[2] == "0" {
+		e := &Entry{
+			Type: EntryTypeFolder,
+			Name: scanner.Remaining(),
+		}
+		if err := e.setTime(fields[3:6]); err != nil {
+			return nil, err
+		}
+
+		return e, nil
+	}
+
+	if fields[1] == "0" {
+		fields = append(fields, scanner.Next())
+		e := &Entry{
+			Type: EntryTypeFile,
+			Name: scanner.Remaining(),
+		}
+
+		if err := e.setSize(fields[2]); err != nil {
+			return nil, err
+		}
+		if err := e.setTime(fields[4:7]); err != nil {
+			return nil, err
+		}
+
+		return e, nil
+	}
+
+	// Read two more fields
+	fields = append(fields, scanner.NextFields(2)...)
+	if len(fields) < 8 {
+		return nil, errUnsupportedListLine
+	}
+
+	e := &Entry{
+		Name: scanner.Remaining(),
+	}
+	switch fields[0][0] {
+	case '-':
+		e.Type = EntryTypeFile
+		if err := e.setSize(fields[4]); err != nil {
+			return nil, err
+		}
+	case 'd':
+		e.Type = EntryTypeFolder
+	case 'l':
+		e.Type = EntryTypeLink
+	default:
+		return nil, errors.New("Unknown entry type")
+	}
+
+	if err := e.setTime(fields[5:8]); err != nil {
+		return nil, err
+	}
+
+	return e, nil
+}
+
+// parseDirListLine parses a directory line in a format based on the output of
+// the MS-DOS DIR command.
+func parseDirListLine(line string) (*Entry, error) {
+	e := &Entry{}
+	var err error
+
+	// Try various time formats that DIR might use, and stop when one works.
+	for _, format := range dirTimeFormats {
+		if len(line) > len(format) {
+			e.Time, err = time.Parse(format, line[:len(format)])
+			if err == nil {
+				line = line[len(format):]
+				break
+			}
+		}
+	}
+	if err != nil {
+		// None of the time formats worked.
+		return nil, errUnsupportedListLine
+	}
+
+	line = strings.TrimLeft(line, " ")
+	if strings.HasPrefix(line, "<DIR>") {
+		e.Type = EntryTypeFolder
+		line = strings.TrimPrefix(line, "<DIR>")
+	} else {
+		space := strings.Index(line, " ")
+		if space == -1 {
+			return nil, errUnsupportedListLine
+		}
+		e.Size, err = strconv.ParseUint(line[:space], 10, 64)
+		if err != nil {
+			return nil, errUnsupportedListLine
+		}
+		e.Type = EntryTypeFile
+		line = line[space:]
+	}
+
+	e.Name = strings.TrimLeft(line, " ")
+	return e, nil
+}
+
+// parseListLine parses the various non-standard format returned by the LIST
+// FTP command.
+func parseListLine(line string) (*Entry, error) {
+	for _, f := range listLineParsers {
+		e, err := f(line)
+		if err != errUnsupportedListLine {
+			return e, err
+		}
+	}
+	return nil, errUnsupportedListLine
+}
+
+func (e *Entry) setSize(str string) (err error) {
+	e.Size, err = strconv.ParseUint(str, 0, 64)
+	return
+}
+
+func (e *Entry) setTime(fields []string) (err error) {
+	var timeStr string
+	if strings.Contains(fields[2], ":") { // this year
+		thisYear, _, _ := time.Now().Date()
+		timeStr = fields[1] + " " + fields[0] + " " + strconv.Itoa(thisYear)[2:4] + " " + fields[2] + " GMT"
+	} else { // not this year
+		if len(fields[2]) != 4 {
+			return errors.New("Invalid year format in time string")
+		}
+		timeStr = fields[1] + " " + fields[0] + " " + fields[2][2:4] + " 00:00 GMT"
+	}
+	e.Time, err = time.Parse("_2 Jan 06 15:04 MST", timeStr)
+	return
+}

+ 58 - 0
scanner.go

@@ -0,0 +1,58 @@
+package ftp
+
+// A Scanner for fields delimited by one or more whitespace characters
+type Scanner struct {
+	bytes    []byte
+	position int
+}
+
+// NewScanner creates a new Scanner
+func NewScanner(str string) *Scanner {
+	return &Scanner{
+		bytes: []byte(str),
+	}
+}
+
+// NextFields returns the next `count` fields
+func (s *Scanner) NextFields(count int) []string {
+	fields := make([]string, 0, count)
+	for i := 0; i < count; i++ {
+		if field := s.Next(); field != "" {
+			fields = append(fields, field)
+		} else {
+			break
+		}
+	}
+	return fields
+}
+
+// Next returns the next field
+func (s *Scanner) Next() string {
+	sLen := len(s.bytes)
+
+	// skip trailing whitespace
+	for s.position < sLen {
+		if s.bytes[s.position] != ' ' {
+			break
+		}
+		s.position++
+	}
+
+	start := s.position
+
+	// skip non-whitespace
+	for s.position < sLen {
+		if s.bytes[s.position] == ' ' {
+			s.position++
+			return string(s.bytes[start : s.position-1])
+		}
+		s.position++
+	}
+
+	return string(s.bytes[start:s.position])
+}
+
+// Remaining returns the remaining string
+func (s *Scanner) Remaining() string {
+	return string(s.bytes[s.position:len(s.bytes)])
+}

+ 28 - 0
scanner_test.go

@@ -0,0 +1,28 @@
+package ftp
+
+import "testing"
+import "github.com/stretchr/testify/assert"
+
+func TestScanner(t *testing.T) {
+	assert := assert.New(t)
+
+	s := NewScanner("foo  bar x  y")
+	assert.Equal("foo", s.Next())
+	assert.Equal(" bar x  y", s.Remaining())
+	assert.Equal("bar", s.Next())
+	assert.Equal("x  y", s.Remaining())
+	assert.Equal("x", s.Next())
+	assert.Equal(" y", s.Remaining())
+	assert.Equal("y", s.Next())
+	assert.Equal("", s.Next())
+	assert.Equal("", s.Remaining())
+}
+
+func TestScannerEmpty(t *testing.T) {
+	assert := assert.New(t)
+
+	s := NewScanner("")
+	assert.Equal("", s.Next())
+	assert.Equal("", s.Next())
+	assert.Equal("", s.Remaining())
+}