fields.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502
  1. // Copyright 2013 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package yaml
  5. import (
  6. "bytes"
  7. "encoding"
  8. "encoding/json"
  9. "reflect"
  10. "sort"
  11. "strings"
  12. "sync"
  13. "unicode"
  14. "unicode/utf8"
  15. )
  16. // indirect walks down v allocating pointers as needed,
  17. // until it gets to a non-pointer.
  18. // if it encounters an Unmarshaler, indirect stops and returns that.
  19. // if decodingNull is true, indirect stops at the last pointer so it can be set to nil.
  20. func indirect(v reflect.Value, decodingNull bool) (json.Unmarshaler, encoding.TextUnmarshaler, reflect.Value) {
  21. // If v is a named type and is addressable,
  22. // start with its address, so that if the type has pointer methods,
  23. // we find them.
  24. if v.Kind() != reflect.Ptr && v.Type().Name() != "" && v.CanAddr() {
  25. v = v.Addr()
  26. }
  27. for {
  28. // Load value from interface, but only if the result will be
  29. // usefully addressable.
  30. if v.Kind() == reflect.Interface && !v.IsNil() {
  31. e := v.Elem()
  32. if e.Kind() == reflect.Ptr && !e.IsNil() && (!decodingNull || e.Elem().Kind() == reflect.Ptr) {
  33. v = e
  34. continue
  35. }
  36. }
  37. if v.Kind() != reflect.Ptr {
  38. break
  39. }
  40. if v.Elem().Kind() != reflect.Ptr && decodingNull && v.CanSet() {
  41. break
  42. }
  43. if v.IsNil() {
  44. if v.CanSet() {
  45. v.Set(reflect.New(v.Type().Elem()))
  46. } else {
  47. v = reflect.New(v.Type().Elem())
  48. }
  49. }
  50. if v.Type().NumMethod() > 0 {
  51. if u, ok := v.Interface().(json.Unmarshaler); ok {
  52. return u, nil, reflect.Value{}
  53. }
  54. if u, ok := v.Interface().(encoding.TextUnmarshaler); ok {
  55. return nil, u, reflect.Value{}
  56. }
  57. }
  58. v = v.Elem()
  59. }
  60. return nil, nil, v
  61. }
  62. // A field represents a single field found in a struct.
  63. type field struct {
  64. name string
  65. nameBytes []byte // []byte(name)
  66. equalFold func(s, t []byte) bool // bytes.EqualFold or equivalent
  67. tag bool
  68. index []int
  69. typ reflect.Type
  70. omitEmpty bool
  71. quoted bool
  72. }
  73. func fillField(f field) field {
  74. f.nameBytes = []byte(f.name)
  75. f.equalFold = foldFunc(f.nameBytes)
  76. return f
  77. }
  78. // byName sorts field by name, breaking ties with depth,
  79. // then breaking ties with "name came from json tag", then
  80. // breaking ties with index sequence.
  81. type byName []field
  82. func (x byName) Len() int { return len(x) }
  83. func (x byName) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
  84. func (x byName) Less(i, j int) bool {
  85. if x[i].name != x[j].name {
  86. return x[i].name < x[j].name
  87. }
  88. if len(x[i].index) != len(x[j].index) {
  89. return len(x[i].index) < len(x[j].index)
  90. }
  91. if x[i].tag != x[j].tag {
  92. return x[i].tag
  93. }
  94. return byIndex(x).Less(i, j)
  95. }
  96. // byIndex sorts field by index sequence.
  97. type byIndex []field
  98. func (x byIndex) Len() int { return len(x) }
  99. func (x byIndex) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
  100. func (x byIndex) Less(i, j int) bool {
  101. for k, xik := range x[i].index {
  102. if k >= len(x[j].index) {
  103. return false
  104. }
  105. if xik != x[j].index[k] {
  106. return xik < x[j].index[k]
  107. }
  108. }
  109. return len(x[i].index) < len(x[j].index)
  110. }
  111. // typeFields returns a list of fields that JSON should recognize for the given type.
  112. // The algorithm is breadth-first search over the set of structs to include - the top struct
  113. // and then any reachable anonymous structs.
  114. func typeFields(t reflect.Type) []field {
  115. // Anonymous fields to explore at the current level and the next.
  116. current := []field{}
  117. next := []field{{typ: t}}
  118. // Count of queued names for current level and the next.
  119. count := map[reflect.Type]int{}
  120. nextCount := map[reflect.Type]int{}
  121. // Types already visited at an earlier level.
  122. visited := map[reflect.Type]bool{}
  123. // Fields found.
  124. var fields []field
  125. for len(next) > 0 {
  126. current, next = next, current[:0]
  127. count, nextCount = nextCount, map[reflect.Type]int{}
  128. for _, f := range current {
  129. if visited[f.typ] {
  130. continue
  131. }
  132. visited[f.typ] = true
  133. // Scan f.typ for fields to include.
  134. for i := 0; i < f.typ.NumField(); i++ {
  135. sf := f.typ.Field(i)
  136. if sf.PkgPath != "" { // unexported
  137. continue
  138. }
  139. tag := sf.Tag.Get("json")
  140. if tag == "-" {
  141. continue
  142. }
  143. name, opts := parseTag(tag)
  144. if !isValidTag(name) {
  145. name = ""
  146. }
  147. index := make([]int, len(f.index)+1)
  148. copy(index, f.index)
  149. index[len(f.index)] = i
  150. ft := sf.Type
  151. if ft.Name() == "" && ft.Kind() == reflect.Ptr {
  152. // Follow pointer.
  153. ft = ft.Elem()
  154. }
  155. // Record found field and index sequence.
  156. if name != "" || !sf.Anonymous || ft.Kind() != reflect.Struct {
  157. tagged := name != ""
  158. if name == "" {
  159. name = sf.Name
  160. }
  161. fields = append(fields, fillField(field{
  162. name: name,
  163. tag: tagged,
  164. index: index,
  165. typ: ft,
  166. omitEmpty: opts.Contains("omitempty"),
  167. quoted: opts.Contains("string"),
  168. }))
  169. if count[f.typ] > 1 {
  170. // If there were multiple instances, add a second,
  171. // so that the annihilation code will see a duplicate.
  172. // It only cares about the distinction between 1 or 2,
  173. // so don't bother generating any more copies.
  174. fields = append(fields, fields[len(fields)-1])
  175. }
  176. continue
  177. }
  178. // Record new anonymous struct to explore in next round.
  179. nextCount[ft]++
  180. if nextCount[ft] == 1 {
  181. next = append(next, fillField(field{name: ft.Name(), index: index, typ: ft}))
  182. }
  183. }
  184. }
  185. }
  186. sort.Sort(byName(fields))
  187. // Delete all fields that are hidden by the Go rules for embedded fields,
  188. // except that fields with JSON tags are promoted.
  189. // The fields are sorted in primary order of name, secondary order
  190. // of field index length. Loop over names; for each name, delete
  191. // hidden fields by choosing the one dominant field that survives.
  192. out := fields[:0]
  193. for advance, i := 0, 0; i < len(fields); i += advance {
  194. // One iteration per name.
  195. // Find the sequence of fields with the name of this first field.
  196. fi := fields[i]
  197. name := fi.name
  198. for advance = 1; i+advance < len(fields); advance++ {
  199. fj := fields[i+advance]
  200. if fj.name != name {
  201. break
  202. }
  203. }
  204. if advance == 1 { // Only one field with this name
  205. out = append(out, fi)
  206. continue
  207. }
  208. dominant, ok := dominantField(fields[i : i+advance])
  209. if ok {
  210. out = append(out, dominant)
  211. }
  212. }
  213. fields = out
  214. sort.Sort(byIndex(fields))
  215. return fields
  216. }
  217. // dominantField looks through the fields, all of which are known to
  218. // have the same name, to find the single field that dominates the
  219. // others using Go's embedding rules, modified by the presence of
  220. // JSON tags. If there are multiple top-level fields, the boolean
  221. // will be false: This condition is an error in Go and we skip all
  222. // the fields.
  223. func dominantField(fields []field) (field, bool) {
  224. // The fields are sorted in increasing index-length order. The winner
  225. // must therefore be one with the shortest index length. Drop all
  226. // longer entries, which is easy: just truncate the slice.
  227. length := len(fields[0].index)
  228. tagged := -1 // Index of first tagged field.
  229. for i, f := range fields {
  230. if len(f.index) > length {
  231. fields = fields[:i]
  232. break
  233. }
  234. if f.tag {
  235. if tagged >= 0 {
  236. // Multiple tagged fields at the same level: conflict.
  237. // Return no field.
  238. return field{}, false
  239. }
  240. tagged = i
  241. }
  242. }
  243. if tagged >= 0 {
  244. return fields[tagged], true
  245. }
  246. // All remaining fields have the same length. If there's more than one,
  247. // we have a conflict (two fields named "X" at the same level) and we
  248. // return no field.
  249. if len(fields) > 1 {
  250. return field{}, false
  251. }
  252. return fields[0], true
  253. }
  254. var fieldCache struct {
  255. sync.RWMutex
  256. m map[reflect.Type][]field
  257. }
  258. // cachedTypeFields is like typeFields but uses a cache to avoid repeated work.
  259. func cachedTypeFields(t reflect.Type) []field {
  260. fieldCache.RLock()
  261. f := fieldCache.m[t]
  262. fieldCache.RUnlock()
  263. if f != nil {
  264. return f
  265. }
  266. // Compute fields without lock.
  267. // Might duplicate effort but won't hold other computations back.
  268. f = typeFields(t)
  269. if f == nil {
  270. f = []field{}
  271. }
  272. fieldCache.Lock()
  273. if fieldCache.m == nil {
  274. fieldCache.m = map[reflect.Type][]field{}
  275. }
  276. fieldCache.m[t] = f
  277. fieldCache.Unlock()
  278. return f
  279. }
  280. func isValidTag(s string) bool {
  281. if s == "" {
  282. return false
  283. }
  284. for _, c := range s {
  285. switch {
  286. case strings.ContainsRune("!#$%&()*+-./:<=>?@[]^_{|}~ ", c):
  287. // Backslash and quote chars are reserved, but
  288. // otherwise any punctuation chars are allowed
  289. // in a tag name.
  290. default:
  291. if !unicode.IsLetter(c) && !unicode.IsDigit(c) {
  292. return false
  293. }
  294. }
  295. }
  296. return true
  297. }
  298. const (
  299. caseMask = ^byte(0x20) // Mask to ignore case in ASCII.
  300. kelvin = '\u212a'
  301. smallLongEss = '\u017f'
  302. )
  303. // foldFunc returns one of four different case folding equivalence
  304. // functions, from most general (and slow) to fastest:
  305. //
  306. // 1) bytes.EqualFold, if the key s contains any non-ASCII UTF-8
  307. // 2) equalFoldRight, if s contains special folding ASCII ('k', 'K', 's', 'S')
  308. // 3) asciiEqualFold, no special, but includes non-letters (including _)
  309. // 4) simpleLetterEqualFold, no specials, no non-letters.
  310. //
  311. // The letters S and K are special because they map to 3 runes, not just 2:
  312. // * S maps to s and to U+017F 'ſ' Latin small letter long s
  313. // * k maps to K and to U+212A 'K' Kelvin sign
  314. // See http://play.golang.org/p/tTxjOc0OGo
  315. //
  316. // The returned function is specialized for matching against s and
  317. // should only be given s. It's not curried for performance reasons.
  318. func foldFunc(s []byte) func(s, t []byte) bool {
  319. nonLetter := false
  320. special := false // special letter
  321. for _, b := range s {
  322. if b >= utf8.RuneSelf {
  323. return bytes.EqualFold
  324. }
  325. upper := b & caseMask
  326. if upper < 'A' || upper > 'Z' {
  327. nonLetter = true
  328. } else if upper == 'K' || upper == 'S' {
  329. // See above for why these letters are special.
  330. special = true
  331. }
  332. }
  333. if special {
  334. return equalFoldRight
  335. }
  336. if nonLetter {
  337. return asciiEqualFold
  338. }
  339. return simpleLetterEqualFold
  340. }
  341. // equalFoldRight is a specialization of bytes.EqualFold when s is
  342. // known to be all ASCII (including punctuation), but contains an 's',
  343. // 'S', 'k', or 'K', requiring a Unicode fold on the bytes in t.
  344. // See comments on foldFunc.
  345. func equalFoldRight(s, t []byte) bool {
  346. for _, sb := range s {
  347. if len(t) == 0 {
  348. return false
  349. }
  350. tb := t[0]
  351. if tb < utf8.RuneSelf {
  352. if sb != tb {
  353. sbUpper := sb & caseMask
  354. if 'A' <= sbUpper && sbUpper <= 'Z' {
  355. if sbUpper != tb&caseMask {
  356. return false
  357. }
  358. } else {
  359. return false
  360. }
  361. }
  362. t = t[1:]
  363. continue
  364. }
  365. // sb is ASCII and t is not. t must be either kelvin
  366. // sign or long s; sb must be s, S, k, or K.
  367. tr, size := utf8.DecodeRune(t)
  368. switch sb {
  369. case 's', 'S':
  370. if tr != smallLongEss {
  371. return false
  372. }
  373. case 'k', 'K':
  374. if tr != kelvin {
  375. return false
  376. }
  377. default:
  378. return false
  379. }
  380. t = t[size:]
  381. }
  382. if len(t) > 0 {
  383. return false
  384. }
  385. return true
  386. }
  387. // asciiEqualFold is a specialization of bytes.EqualFold for use when
  388. // s is all ASCII (but may contain non-letters) and contains no
  389. // special-folding letters.
  390. // See comments on foldFunc.
  391. func asciiEqualFold(s, t []byte) bool {
  392. if len(s) != len(t) {
  393. return false
  394. }
  395. for i, sb := range s {
  396. tb := t[i]
  397. if sb == tb {
  398. continue
  399. }
  400. if ('a' <= sb && sb <= 'z') || ('A' <= sb && sb <= 'Z') {
  401. if sb&caseMask != tb&caseMask {
  402. return false
  403. }
  404. } else {
  405. return false
  406. }
  407. }
  408. return true
  409. }
  410. // simpleLetterEqualFold is a specialization of bytes.EqualFold for
  411. // use when s is all ASCII letters (no underscores, etc) and also
  412. // doesn't contain 'k', 'K', 's', or 'S'.
  413. // See comments on foldFunc.
  414. func simpleLetterEqualFold(s, t []byte) bool {
  415. if len(s) != len(t) {
  416. return false
  417. }
  418. for i, b := range s {
  419. if b&caseMask != t[i]&caseMask {
  420. return false
  421. }
  422. }
  423. return true
  424. }
  425. // tagOptions is the string following a comma in a struct field's "json"
  426. // tag, or the empty string. It does not include the leading comma.
  427. type tagOptions string
  428. // parseTag splits a struct field's json tag into its name and
  429. // comma-separated options.
  430. func parseTag(tag string) (string, tagOptions) {
  431. if idx := strings.Index(tag, ","); idx != -1 {
  432. return tag[:idx], tagOptions(tag[idx+1:])
  433. }
  434. return tag, tagOptions("")
  435. }
  436. // Contains reports whether a comma-separated list of options
  437. // contains a particular substr flag. substr must be surrounded by a
  438. // string boundary or commas.
  439. func (o tagOptions) Contains(optionName string) bool {
  440. if len(o) == 0 {
  441. return false
  442. }
  443. s := string(o)
  444. for s != "" {
  445. var next string
  446. i := strings.Index(s, ",")
  447. if i >= 0 {
  448. s, next = s[:i], s[i+1:]
  449. }
  450. if s == optionName {
  451. return true
  452. }
  453. s = next
  454. }
  455. return false
  456. }