parse.go 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254
  1. package ftp
  2. import (
  3. "errors"
  4. "strconv"
  5. "strings"
  6. "time"
  7. )
  8. var errUnsupportedListLine = errors.New("Unsupported LIST line")
  9. type parseFunc func(string, time.Time) (*Entry, error)
  10. var listLineParsers = []parseFunc{
  11. parseRFC3659ListLine,
  12. parseLsListLine,
  13. parseDirListLine,
  14. parseHostedFTPLine,
  15. }
  16. var dirTimeFormats = []string{
  17. "01-02-06 03:04PM",
  18. "2006-01-02 15:04",
  19. }
  20. // parseRFC3659ListLine parses the style of directory line defined in RFC 3659.
  21. func parseRFC3659ListLine(line string, now time.Time) (*Entry, error) {
  22. iSemicolon := strings.Index(line, ";")
  23. iWhitespace := strings.Index(line, " ")
  24. if iSemicolon < 0 || iSemicolon > iWhitespace {
  25. return nil, errUnsupportedListLine
  26. }
  27. e := &Entry{
  28. Name: line[iWhitespace+1:],
  29. }
  30. for _, field := range strings.Split(line[:iWhitespace-1], ";") {
  31. i := strings.Index(field, "=")
  32. if i < 1 {
  33. return nil, errUnsupportedListLine
  34. }
  35. key := strings.ToLower(field[:i])
  36. value := field[i+1:]
  37. switch key {
  38. case "modify":
  39. var err error
  40. e.Time, err = time.Parse("20060102150405", value)
  41. if err != nil {
  42. return nil, err
  43. }
  44. case "type":
  45. switch value {
  46. case "dir", "cdir", "pdir":
  47. e.Type = EntryTypeFolder
  48. case "file":
  49. e.Type = EntryTypeFile
  50. }
  51. case "size":
  52. e.setSize(value)
  53. }
  54. }
  55. return e, nil
  56. }
  57. // parseLsListLine parses a directory line in a format based on the output of
  58. // the UNIX ls command.
  59. func parseLsListLine(line string, now time.Time) (*Entry, error) {
  60. // Has the first field a length of 10 bytes?
  61. if strings.IndexByte(line, ' ') != 10 {
  62. return nil, errUnsupportedListLine
  63. }
  64. scanner := newScanner(line)
  65. fields := scanner.NextFields(6)
  66. if len(fields) < 6 {
  67. return nil, errUnsupportedListLine
  68. }
  69. if fields[1] == "folder" && fields[2] == "0" {
  70. e := &Entry{
  71. Type: EntryTypeFolder,
  72. Name: scanner.Remaining(),
  73. }
  74. if err := e.setTime(fields[3:6], now); err != nil {
  75. return nil, err
  76. }
  77. return e, nil
  78. }
  79. if fields[1] == "0" {
  80. fields = append(fields, scanner.Next())
  81. e := &Entry{
  82. Type: EntryTypeFile,
  83. Name: scanner.Remaining(),
  84. }
  85. if err := e.setSize(fields[2]); err != nil {
  86. return nil, errUnsupportedListLine
  87. }
  88. if err := e.setTime(fields[4:7], now); err != nil {
  89. return nil, err
  90. }
  91. return e, nil
  92. }
  93. // Read two more fields
  94. fields = append(fields, scanner.NextFields(2)...)
  95. if len(fields) < 8 {
  96. return nil, errUnsupportedListLine
  97. }
  98. e := &Entry{
  99. Name: scanner.Remaining(),
  100. }
  101. switch fields[0][0] {
  102. case '-':
  103. e.Type = EntryTypeFile
  104. if err := e.setSize(fields[4]); err != nil {
  105. return nil, err
  106. }
  107. case 'd':
  108. e.Type = EntryTypeFolder
  109. case 'l':
  110. e.Type = EntryTypeLink
  111. default:
  112. return nil, errors.New("Unknown entry type")
  113. }
  114. if err := e.setTime(fields[5:8], now); err != nil {
  115. return nil, err
  116. }
  117. return e, nil
  118. }
  119. // parseDirListLine parses a directory line in a format based on the output of
  120. // the MS-DOS DIR command.
  121. func parseDirListLine(line string, now time.Time) (*Entry, error) {
  122. e := &Entry{}
  123. var err error
  124. // Try various time formats that DIR might use, and stop when one works.
  125. for _, format := range dirTimeFormats {
  126. if len(line) > len(format) {
  127. e.Time, err = time.Parse(format, line[:len(format)])
  128. if err == nil {
  129. line = line[len(format):]
  130. break
  131. }
  132. }
  133. }
  134. if err != nil {
  135. // None of the time formats worked.
  136. return nil, errUnsupportedListLine
  137. }
  138. line = strings.TrimLeft(line, " ")
  139. if strings.HasPrefix(line, "<DIR>") {
  140. e.Type = EntryTypeFolder
  141. line = strings.TrimPrefix(line, "<DIR>")
  142. } else {
  143. space := strings.Index(line, " ")
  144. if space == -1 {
  145. return nil, errUnsupportedListLine
  146. }
  147. e.Size, err = strconv.ParseUint(line[:space], 10, 64)
  148. if err != nil {
  149. return nil, errUnsupportedListLine
  150. }
  151. e.Type = EntryTypeFile
  152. line = line[space:]
  153. }
  154. e.Name = strings.TrimLeft(line, " ")
  155. return e, nil
  156. }
  157. // parseHostedFTPLine parses a directory line in the non-standard format used
  158. // by hostedftp.com
  159. // -r-------- 0 user group 65222236 Feb 24 00:39 UABlacklistingWeek8.csv
  160. // (The link count is inexplicably 0)
  161. func parseHostedFTPLine(line string, now time.Time) (*Entry, error) {
  162. // Has the first field a length of 10 bytes?
  163. if strings.IndexByte(line, ' ') != 10 {
  164. return nil, errUnsupportedListLine
  165. }
  166. scanner := newScanner(line)
  167. fields := scanner.NextFields(2)
  168. if len(fields) < 2 || fields[1] != "0" {
  169. return nil, errUnsupportedListLine
  170. }
  171. // Set link count to 1 and attempt to parse as Unix.
  172. return parseLsListLine(fields[0]+" 1 "+scanner.Remaining(), now)
  173. }
  174. // parseListLine parses the various non-standard format returned by the LIST
  175. // FTP command.
  176. func parseListLine(line string, now time.Time) (*Entry, error) {
  177. for _, f := range listLineParsers {
  178. e, err := f(line, now)
  179. if err != errUnsupportedListLine {
  180. return e, err
  181. }
  182. }
  183. return nil, errUnsupportedListLine
  184. }
  185. func (e *Entry) setSize(str string) (err error) {
  186. e.Size, err = strconv.ParseUint(str, 0, 64)
  187. return
  188. }
  189. func (e *Entry) setTime(fields []string, now time.Time) (err error) {
  190. if strings.Contains(fields[2], ":") { // contains time
  191. thisYear, _, _ := now.Date()
  192. timeStr := fields[1] + " " + fields[0] + " " + strconv.Itoa(thisYear)[2:4] + " " + fields[2] + " GMT"
  193. e.Time, err = time.Parse("_2 Jan 06 15:04 MST", timeStr)
  194. /*
  195. On unix, `info ls` shows:
  196. 10.1.6 Formatting file timestamps
  197. ---------------------------------
  198. A timestamp is considered to be “recent” if it is less than six
  199. months old, and is not dated in the future. If a timestamp dated today
  200. is not listed in recent form, the timestamp is in the future, which
  201. means you probably have clock skew problems which may break programs
  202. like ‘make’ that rely on file timestamps.
  203. */
  204. if !e.Time.Before(now.AddDate(0, 6, 0)) {
  205. e.Time = e.Time.AddDate(-1, 0, 0)
  206. }
  207. } else { // only the date
  208. if len(fields[2]) != 4 {
  209. return errors.New("Invalid year format in time string")
  210. }
  211. timeStr := fields[1] + " " + fields[0] + " " + fields[2][2:4] + " 00:00 GMT"
  212. e.Time, err = time.Parse("_2 Jan 06 15:04 MST", timeStr)
  213. }
  214. return
  215. }