fields.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497
  1. // Copyright 2013 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package yaml
  5. import (
  6. "bytes"
  7. "encoding"
  8. "encoding/json"
  9. "reflect"
  10. "sort"
  11. "strings"
  12. "sync"
  13. "unicode"
  14. "unicode/utf8"
  15. )
  16. // indirect walks down v allocating pointers as needed,
  17. // until it gets to a non-pointer.
  18. // if it encounters an Unmarshaler, indirect stops and returns that.
  19. // if decodingNull is true, indirect stops at the last pointer so it can be set to nil.
  20. func indirect(v reflect.Value, decodingNull bool) (json.Unmarshaler, encoding.TextUnmarshaler, reflect.Value) {
  21. // If v is a named type and is addressable,
  22. // start with its address, so that if the type has pointer methods,
  23. // we find them.
  24. if v.Kind() != reflect.Ptr && v.Type().Name() != "" && v.CanAddr() {
  25. v = v.Addr()
  26. }
  27. for {
  28. // Load value from interface, but only if the result will be
  29. // usefully addressable.
  30. if v.Kind() == reflect.Interface && !v.IsNil() {
  31. e := v.Elem()
  32. if e.Kind() == reflect.Ptr && !e.IsNil() && (!decodingNull || e.Elem().Kind() == reflect.Ptr) {
  33. v = e
  34. continue
  35. }
  36. }
  37. if v.Kind() != reflect.Ptr {
  38. break
  39. }
  40. if v.Elem().Kind() != reflect.Ptr && decodingNull && v.CanSet() {
  41. break
  42. }
  43. if v.IsNil() {
  44. v.Set(reflect.New(v.Type().Elem()))
  45. }
  46. if v.Type().NumMethod() > 0 {
  47. if u, ok := v.Interface().(json.Unmarshaler); ok {
  48. return u, nil, reflect.Value{}
  49. }
  50. if u, ok := v.Interface().(encoding.TextUnmarshaler); ok {
  51. return nil, u, reflect.Value{}
  52. }
  53. }
  54. v = v.Elem()
  55. }
  56. return nil, nil, v
  57. }
  58. // A field represents a single field found in a struct.
  59. type field struct {
  60. name string
  61. nameBytes []byte // []byte(name)
  62. equalFold func(s, t []byte) bool // bytes.EqualFold or equivalent
  63. tag bool
  64. index []int
  65. typ reflect.Type
  66. omitEmpty bool
  67. quoted bool
  68. }
  69. func fillField(f field) field {
  70. f.nameBytes = []byte(f.name)
  71. f.equalFold = foldFunc(f.nameBytes)
  72. return f
  73. }
  74. // byName sorts field by name, breaking ties with depth,
  75. // then breaking ties with "name came from json tag", then
  76. // breaking ties with index sequence.
  77. type byName []field
  78. func (x byName) Len() int { return len(x) }
  79. func (x byName) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
  80. func (x byName) Less(i, j int) bool {
  81. if x[i].name != x[j].name {
  82. return x[i].name < x[j].name
  83. }
  84. if len(x[i].index) != len(x[j].index) {
  85. return len(x[i].index) < len(x[j].index)
  86. }
  87. if x[i].tag != x[j].tag {
  88. return x[i].tag
  89. }
  90. return byIndex(x).Less(i, j)
  91. }
  92. // byIndex sorts field by index sequence.
  93. type byIndex []field
  94. func (x byIndex) Len() int { return len(x) }
  95. func (x byIndex) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
  96. func (x byIndex) Less(i, j int) bool {
  97. for k, xik := range x[i].index {
  98. if k >= len(x[j].index) {
  99. return false
  100. }
  101. if xik != x[j].index[k] {
  102. return xik < x[j].index[k]
  103. }
  104. }
  105. return len(x[i].index) < len(x[j].index)
  106. }
  107. // typeFields returns a list of fields that JSON should recognize for the given type.
  108. // The algorithm is breadth-first search over the set of structs to include - the top struct
  109. // and then any reachable anonymous structs.
  110. func typeFields(t reflect.Type) []field {
  111. // Anonymous fields to explore at the current level and the next.
  112. current := []field{}
  113. next := []field{{typ: t}}
  114. // Count of queued names for current level and the next.
  115. count := map[reflect.Type]int{}
  116. nextCount := map[reflect.Type]int{}
  117. // Types already visited at an earlier level.
  118. visited := map[reflect.Type]bool{}
  119. // Fields found.
  120. var fields []field
  121. for len(next) > 0 {
  122. current, next = next, current[:0]
  123. count, nextCount = nextCount, map[reflect.Type]int{}
  124. for _, f := range current {
  125. if visited[f.typ] {
  126. continue
  127. }
  128. visited[f.typ] = true
  129. // Scan f.typ for fields to include.
  130. for i := 0; i < f.typ.NumField(); i++ {
  131. sf := f.typ.Field(i)
  132. if sf.PkgPath != "" { // unexported
  133. continue
  134. }
  135. tag := sf.Tag.Get("json")
  136. if tag == "-" {
  137. continue
  138. }
  139. name, opts := parseTag(tag)
  140. if !isValidTag(name) {
  141. name = ""
  142. }
  143. index := make([]int, len(f.index)+1)
  144. copy(index, f.index)
  145. index[len(f.index)] = i
  146. ft := sf.Type
  147. if ft.Name() == "" && ft.Kind() == reflect.Ptr {
  148. // Follow pointer.
  149. ft = ft.Elem()
  150. }
  151. // Record found field and index sequence.
  152. if name != "" || !sf.Anonymous || ft.Kind() != reflect.Struct {
  153. tagged := name != ""
  154. if name == "" {
  155. name = sf.Name
  156. }
  157. fields = append(fields, fillField(field{
  158. name: name,
  159. tag: tagged,
  160. index: index,
  161. typ: ft,
  162. omitEmpty: opts.Contains("omitempty"),
  163. quoted: opts.Contains("string"),
  164. }))
  165. if count[f.typ] > 1 {
  166. // If there were multiple instances, add a second,
  167. // so that the annihilation code will see a duplicate.
  168. // It only cares about the distinction between 1 or 2,
  169. // so don't bother generating any more copies.
  170. fields = append(fields, fields[len(fields)-1])
  171. }
  172. continue
  173. }
  174. // Record new anonymous struct to explore in next round.
  175. nextCount[ft]++
  176. if nextCount[ft] == 1 {
  177. next = append(next, fillField(field{name: ft.Name(), index: index, typ: ft}))
  178. }
  179. }
  180. }
  181. }
  182. sort.Sort(byName(fields))
  183. // Delete all fields that are hidden by the Go rules for embedded fields,
  184. // except that fields with JSON tags are promoted.
  185. // The fields are sorted in primary order of name, secondary order
  186. // of field index length. Loop over names; for each name, delete
  187. // hidden fields by choosing the one dominant field that survives.
  188. out := fields[:0]
  189. for advance, i := 0, 0; i < len(fields); i += advance {
  190. // One iteration per name.
  191. // Find the sequence of fields with the name of this first field.
  192. fi := fields[i]
  193. name := fi.name
  194. for advance = 1; i+advance < len(fields); advance++ {
  195. fj := fields[i+advance]
  196. if fj.name != name {
  197. break
  198. }
  199. }
  200. if advance == 1 { // Only one field with this name
  201. out = append(out, fi)
  202. continue
  203. }
  204. dominant, ok := dominantField(fields[i : i+advance])
  205. if ok {
  206. out = append(out, dominant)
  207. }
  208. }
  209. fields = out
  210. sort.Sort(byIndex(fields))
  211. return fields
  212. }
  213. // dominantField looks through the fields, all of which are known to
  214. // have the same name, to find the single field that dominates the
  215. // others using Go's embedding rules, modified by the presence of
  216. // JSON tags. If there are multiple top-level fields, the boolean
  217. // will be false: This condition is an error in Go and we skip all
  218. // the fields.
  219. func dominantField(fields []field) (field, bool) {
  220. // The fields are sorted in increasing index-length order. The winner
  221. // must therefore be one with the shortest index length. Drop all
  222. // longer entries, which is easy: just truncate the slice.
  223. length := len(fields[0].index)
  224. tagged := -1 // Index of first tagged field.
  225. for i, f := range fields {
  226. if len(f.index) > length {
  227. fields = fields[:i]
  228. break
  229. }
  230. if f.tag {
  231. if tagged >= 0 {
  232. // Multiple tagged fields at the same level: conflict.
  233. // Return no field.
  234. return field{}, false
  235. }
  236. tagged = i
  237. }
  238. }
  239. if tagged >= 0 {
  240. return fields[tagged], true
  241. }
  242. // All remaining fields have the same length. If there's more than one,
  243. // we have a conflict (two fields named "X" at the same level) and we
  244. // return no field.
  245. if len(fields) > 1 {
  246. return field{}, false
  247. }
  248. return fields[0], true
  249. }
  250. var fieldCache struct {
  251. sync.RWMutex
  252. m map[reflect.Type][]field
  253. }
  254. // cachedTypeFields is like typeFields but uses a cache to avoid repeated work.
  255. func cachedTypeFields(t reflect.Type) []field {
  256. fieldCache.RLock()
  257. f := fieldCache.m[t]
  258. fieldCache.RUnlock()
  259. if f != nil {
  260. return f
  261. }
  262. // Compute fields without lock.
  263. // Might duplicate effort but won't hold other computations back.
  264. f = typeFields(t)
  265. if f == nil {
  266. f = []field{}
  267. }
  268. fieldCache.Lock()
  269. if fieldCache.m == nil {
  270. fieldCache.m = map[reflect.Type][]field{}
  271. }
  272. fieldCache.m[t] = f
  273. fieldCache.Unlock()
  274. return f
  275. }
  276. func isValidTag(s string) bool {
  277. if s == "" {
  278. return false
  279. }
  280. for _, c := range s {
  281. switch {
  282. case strings.ContainsRune("!#$%&()*+-./:<=>?@[]^_{|}~ ", c):
  283. // Backslash and quote chars are reserved, but
  284. // otherwise any punctuation chars are allowed
  285. // in a tag name.
  286. default:
  287. if !unicode.IsLetter(c) && !unicode.IsDigit(c) {
  288. return false
  289. }
  290. }
  291. }
  292. return true
  293. }
  294. const (
  295. caseMask = ^byte(0x20) // Mask to ignore case in ASCII.
  296. kelvin = '\u212a'
  297. smallLongEss = '\u017f'
  298. )
  299. // foldFunc returns one of four different case folding equivalence
  300. // functions, from most general (and slow) to fastest:
  301. //
  302. // 1) bytes.EqualFold, if the key s contains any non-ASCII UTF-8
  303. // 2) equalFoldRight, if s contains special folding ASCII ('k', 'K', 's', 'S')
  304. // 3) asciiEqualFold, no special, but includes non-letters (including _)
  305. // 4) simpleLetterEqualFold, no specials, no non-letters.
  306. //
  307. // The letters S and K are special because they map to 3 runes, not just 2:
  308. // * S maps to s and to U+017F 'ſ' Latin small letter long s
  309. // * k maps to K and to U+212A 'K' Kelvin sign
  310. // See http://play.golang.org/p/tTxjOc0OGo
  311. //
  312. // The returned function is specialized for matching against s and
  313. // should only be given s. It's not curried for performance reasons.
  314. func foldFunc(s []byte) func(s, t []byte) bool {
  315. nonLetter := false
  316. special := false // special letter
  317. for _, b := range s {
  318. if b >= utf8.RuneSelf {
  319. return bytes.EqualFold
  320. }
  321. upper := b & caseMask
  322. if upper < 'A' || upper > 'Z' {
  323. nonLetter = true
  324. } else if upper == 'K' || upper == 'S' {
  325. // See above for why these letters are special.
  326. special = true
  327. }
  328. }
  329. if special {
  330. return equalFoldRight
  331. }
  332. if nonLetter {
  333. return asciiEqualFold
  334. }
  335. return simpleLetterEqualFold
  336. }
  337. // equalFoldRight is a specialization of bytes.EqualFold when s is
  338. // known to be all ASCII (including punctuation), but contains an 's',
  339. // 'S', 'k', or 'K', requiring a Unicode fold on the bytes in t.
  340. // See comments on foldFunc.
  341. func equalFoldRight(s, t []byte) bool {
  342. for _, sb := range s {
  343. if len(t) == 0 {
  344. return false
  345. }
  346. tb := t[0]
  347. if tb < utf8.RuneSelf {
  348. if sb != tb {
  349. sbUpper := sb & caseMask
  350. if 'A' <= sbUpper && sbUpper <= 'Z' {
  351. if sbUpper != tb&caseMask {
  352. return false
  353. }
  354. } else {
  355. return false
  356. }
  357. }
  358. t = t[1:]
  359. continue
  360. }
  361. // sb is ASCII and t is not. t must be either kelvin
  362. // sign or long s; sb must be s, S, k, or K.
  363. tr, size := utf8.DecodeRune(t)
  364. switch sb {
  365. case 's', 'S':
  366. if tr != smallLongEss {
  367. return false
  368. }
  369. case 'k', 'K':
  370. if tr != kelvin {
  371. return false
  372. }
  373. default:
  374. return false
  375. }
  376. t = t[size:]
  377. }
  378. if len(t) > 0 {
  379. return false
  380. }
  381. return true
  382. }
  383. // asciiEqualFold is a specialization of bytes.EqualFold for use when
  384. // s is all ASCII (but may contain non-letters) and contains no
  385. // special-folding letters.
  386. // See comments on foldFunc.
  387. func asciiEqualFold(s, t []byte) bool {
  388. if len(s) != len(t) {
  389. return false
  390. }
  391. for i, sb := range s {
  392. tb := t[i]
  393. if sb == tb {
  394. continue
  395. }
  396. if ('a' <= sb && sb <= 'z') || ('A' <= sb && sb <= 'Z') {
  397. if sb&caseMask != tb&caseMask {
  398. return false
  399. }
  400. } else {
  401. return false
  402. }
  403. }
  404. return true
  405. }
  406. // simpleLetterEqualFold is a specialization of bytes.EqualFold for
  407. // use when s is all ASCII letters (no underscores, etc) and also
  408. // doesn't contain 'k', 'K', 's', or 'S'.
  409. // See comments on foldFunc.
  410. func simpleLetterEqualFold(s, t []byte) bool {
  411. if len(s) != len(t) {
  412. return false
  413. }
  414. for i, b := range s {
  415. if b&caseMask != t[i]&caseMask {
  416. return false
  417. }
  418. }
  419. return true
  420. }
  421. // tagOptions is the string following a comma in a struct field's "json"
  422. // tag, or the empty string. It does not include the leading comma.
  423. type tagOptions string
  424. // parseTag splits a struct field's json tag into its name and
  425. // comma-separated options.
  426. func parseTag(tag string) (string, tagOptions) {
  427. if idx := strings.Index(tag, ","); idx != -1 {
  428. return tag[:idx], tagOptions(tag[idx+1:])
  429. }
  430. return tag, tagOptions("")
  431. }
  432. // Contains reports whether a comma-separated list of options
  433. // contains a particular substr flag. substr must be surrounded by a
  434. // string boundary or commas.
  435. func (o tagOptions) Contains(optionName string) bool {
  436. if len(o) == 0 {
  437. return false
  438. }
  439. s := string(o)
  440. for s != "" {
  441. var next string
  442. i := strings.Index(s, ",")
  443. if i >= 0 {
  444. s, next = s[:i], s[i+1:]
  445. }
  446. if s == optionName {
  447. return true
  448. }
  449. s = next
  450. }
  451. return false
  452. }