keyvalues.go 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658
  1. // Copyright 2012-2014 Charles Banning. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file
  4. // keyvalues.go: Extract values from an arbitrary XML doc. Tag path can include wildcard characters.
  5. package mxj
  6. import (
  7. "errors"
  8. "fmt"
  9. "strconv"
  10. "strings"
  11. )
  12. // ----------------------------- get everything FOR a single key -------------------------
  13. const (
  14. minArraySize = 32
  15. )
  16. var defaultArraySize int = minArraySize
  17. // Adjust the buffers for expected number of values to return from ValuesForKey() and ValuesForPath().
  18. // This can have the effect of significantly reducing memory allocation-copy functions for large data sets.
  19. // Returns the initial buffer size.
  20. func SetArraySize(size int) int {
  21. if size > minArraySize {
  22. defaultArraySize = size
  23. } else {
  24. defaultArraySize = minArraySize
  25. }
  26. return defaultArraySize
  27. }
  28. // Return all values in Map, 'mv', associated with a 'key'. If len(returned_values) == 0, then no match.
  29. // On error, the returned array is 'nil'. NOTE: 'key' can be wildcard, "*".
  30. // 'subkeys' (optional) are "key:val[:type]" strings representing attributes or elements in a list.
  31. // - By default 'val' is of type string. "key:val:bool" and "key:val:float" to coerce them.
  32. // - For attributes prefix the label with a hyphen, '-', e.g., "-seq:3".
  33. // - If the 'key' refers to a list, then "key:value" could select a list member of the list.
  34. // - The subkey can be wildcarded - "key:*" - to require that it's there with some value.
  35. // - If a subkey is preceeded with the '!' character, the key:value[:type] entry is treated as an
  36. // exclusion critera - e.g., "!author:William T. Gaddis".
  37. func (mv Map) ValuesForKey(key string, subkeys ...string) ([]interface{}, error) {
  38. m := map[string]interface{}(mv)
  39. var subKeyMap map[string]interface{}
  40. if len(subkeys) > 0 {
  41. var err error
  42. subKeyMap, err = getSubKeyMap(subkeys...)
  43. if err != nil {
  44. return nil, err
  45. }
  46. }
  47. ret := make([]interface{}, 0, defaultArraySize)
  48. var cnt int
  49. hasKey(m, key, &ret, &cnt, subKeyMap)
  50. return ret[:cnt], nil
  51. // ret := make([]interface{}, 0)
  52. // hasKey(m, key, &ret, subKeyMap)
  53. // return ret, nil
  54. }
  55. // hasKey - if the map 'key' exists append it to array
  56. // if it doesn't do nothing except scan array and map values
  57. func hasKey(iv interface{}, key string, ret *[]interface{}, cnt *int, subkeys map[string]interface{}) {
  58. // func hasKey(iv interface{}, key string, ret *[]interface{}, subkeys map[string]interface{}) {
  59. switch iv.(type) {
  60. case map[string]interface{}:
  61. vv := iv.(map[string]interface{})
  62. // see if the current value is of interest
  63. if v, ok := vv[key]; ok {
  64. switch v.(type) {
  65. case map[string]interface{}:
  66. if hasSubKeys(v, subkeys) {
  67. *ret = append(*ret, v)
  68. *cnt++
  69. }
  70. case []interface{}:
  71. for _, av := range v.([]interface{}) {
  72. if hasSubKeys(av, subkeys) {
  73. *ret = append(*ret, av)
  74. *cnt++
  75. }
  76. }
  77. default:
  78. if len(subkeys) == 0 {
  79. *ret = append(*ret, v)
  80. *cnt++
  81. }
  82. }
  83. }
  84. // wildcard case
  85. if key == "*" {
  86. for _, v := range vv {
  87. switch v.(type) {
  88. case map[string]interface{}:
  89. if hasSubKeys(v, subkeys) {
  90. *ret = append(*ret, v)
  91. *cnt++
  92. }
  93. case []interface{}:
  94. for _, av := range v.([]interface{}) {
  95. if hasSubKeys(av, subkeys) {
  96. *ret = append(*ret, av)
  97. *cnt++
  98. }
  99. }
  100. default:
  101. if len(subkeys) == 0 {
  102. *ret = append(*ret, v)
  103. *cnt++
  104. }
  105. }
  106. }
  107. }
  108. // scan the rest
  109. for _, v := range vv {
  110. hasKey(v, key, ret, cnt, subkeys)
  111. // hasKey(v, key, ret, subkeys)
  112. }
  113. case []interface{}:
  114. for _, v := range iv.([]interface{}) {
  115. hasKey(v, key, ret, cnt, subkeys)
  116. // hasKey(v, key, ret, subkeys)
  117. }
  118. }
  119. }
  120. // ----------------------- get everything for a node in the Map ---------------------------
  121. // Allow indexed arrays in "path" specification. (Request from Abhijit Kadam - abhijitk100@gmail.com.)
  122. // 2014.04.28 - implementation note.
  123. // Implemented as a wrapper of (old)ValuesForPath() because we need look-ahead logic to handle expansion
  124. // of wildcards and unindexed arrays. Embedding such logic into valuesForKeyPath() would have made the
  125. // code much more complicated; this wrapper is straightforward, easy to debug, and doesn't add significant overhead.
  126. // Retrieve all values for a path from the Map. If len(returned_values) == 0, then no match.
  127. // On error, the returned array is 'nil'.
  128. // 'path' is a dot-separated path of key values.
  129. // - If a node in the path is '*', then everything beyond is walked.
  130. // - 'path' can contain indexed array references, such as, "*.data[1]" and "msgs[2].data[0].field" -
  131. // even "*[2].*[0].field".
  132. // 'subkeys' (optional) are "key:val[:type]" strings representing attributes or elements in a list.
  133. // - By default 'val' is of type string. "key:val:bool" and "key:val:float" to coerce them.
  134. // - For attributes prefix the label with a hyphen, '-', e.g., "-seq:3".
  135. // - If the 'path' refers to a list, then "tag:value" would return member of the list.
  136. // - The subkey can be wildcarded - "key:*" - to require that it's there with some value.
  137. // - If a subkey is preceeded with the '!' character, the key:value[:type] entry is treated as an
  138. // exclusion critera - e.g., "!author:William T. Gaddis".
  139. func (mv Map) ValuesForPath(path string, subkeys ...string) ([]interface{}, error) {
  140. // If there are no array indexes in path, use legacy ValuesForPath() logic.
  141. if strings.Index(path, "[") < 0 {
  142. return mv.oldValuesForPath(path, subkeys...)
  143. }
  144. var subKeyMap map[string]interface{}
  145. if len(subkeys) > 0 {
  146. var err error
  147. subKeyMap, err = getSubKeyMap(subkeys...)
  148. if err != nil {
  149. return nil, err
  150. }
  151. }
  152. keys, kerr := parsePath(path)
  153. if kerr != nil {
  154. return nil, kerr
  155. }
  156. vals, verr := valuesForArray(keys, mv)
  157. if verr != nil {
  158. return nil, verr // Vals may be nil, but return empty array.
  159. }
  160. // Need to handle subkeys ... only return members of vals that satisfy conditions.
  161. retvals := make([]interface{}, 0)
  162. for _, v := range vals {
  163. if hasSubKeys(v, subKeyMap) {
  164. retvals = append(retvals, v)
  165. }
  166. }
  167. return retvals, nil
  168. }
  169. func valuesForArray(keys []*key, m Map) ([]interface{}, error) {
  170. var tmppath string
  171. var haveFirst bool
  172. var vals []interface{}
  173. var verr error
  174. lastkey := len(keys) - 1
  175. for i := 0; i <= lastkey; i++ {
  176. if !haveFirst {
  177. tmppath = keys[i].name
  178. haveFirst = true
  179. } else {
  180. tmppath += "." + keys[i].name
  181. }
  182. // Look-ahead: explode wildcards and unindexed arrays.
  183. // Need to handle un-indexed list recursively:
  184. // e.g., path is "stuff.data[0]" rather than "stuff[0].data[0]".
  185. // Need to treat it as "stuff[0].data[0]", "stuff[1].data[0]", ...
  186. if !keys[i].isArray && i < lastkey && keys[i+1].isArray {
  187. // Can't pass subkeys because we may not be at literal end of path.
  188. vv, vverr := m.oldValuesForPath(tmppath)
  189. if vverr != nil {
  190. return nil, vverr
  191. }
  192. for _, v := range vv {
  193. // See if we can walk the value.
  194. am, ok := v.(map[string]interface{})
  195. if !ok {
  196. continue
  197. }
  198. // Work the backend.
  199. nvals, nvalserr := valuesForArray(keys[i+1:], Map(am))
  200. if nvalserr != nil {
  201. return nil, nvalserr
  202. }
  203. vals = append(vals, nvals...)
  204. }
  205. break // have recursed the whole path - return
  206. }
  207. if keys[i].isArray || i == lastkey {
  208. // Don't pass subkeys because may not be at literal end of path.
  209. vals, verr = m.oldValuesForPath(tmppath)
  210. } else {
  211. continue
  212. }
  213. if verr != nil {
  214. return nil, verr
  215. }
  216. if i == lastkey && !keys[i].isArray {
  217. break
  218. }
  219. // Now we're looking at an array - supposedly.
  220. // Is index in range of vals?
  221. if len(vals) <= keys[i].position {
  222. vals = nil
  223. break
  224. }
  225. // Return the array member of interest, if at end of path.
  226. if i == lastkey {
  227. vals = vals[keys[i].position:(keys[i].position + 1)]
  228. break
  229. }
  230. // Extract the array member of interest.
  231. am := vals[keys[i].position:(keys[i].position + 1)]
  232. // must be a map[string]interface{} value so we can keep walking the path
  233. amm, ok := am[0].(map[string]interface{})
  234. if !ok {
  235. vals = nil
  236. break
  237. }
  238. m = Map(amm)
  239. haveFirst = false
  240. }
  241. return vals, nil
  242. }
  243. type key struct {
  244. name string
  245. isArray bool
  246. position int
  247. }
  248. func parsePath(s string) ([]*key, error) {
  249. keys := strings.Split(s, ".")
  250. ret := make([]*key, 0)
  251. for i := 0; i < len(keys); i++ {
  252. if keys[i] == "" {
  253. continue
  254. }
  255. newkey := new(key)
  256. if strings.Index(keys[i], "[") < 0 {
  257. newkey.name = keys[i]
  258. ret = append(ret, newkey)
  259. continue
  260. }
  261. p := strings.Split(keys[i], "[")
  262. newkey.name = p[0]
  263. p = strings.Split(p[1], "]")
  264. if p[0] == "" { // no right bracket
  265. return nil, fmt.Errorf("no right bracket on key index: %s", keys[i])
  266. }
  267. // convert p[0] to a int value
  268. pos, nerr := strconv.ParseInt(p[0], 10, 32)
  269. if nerr != nil {
  270. return nil, fmt.Errorf("cannot convert index to int value: %s", p[0])
  271. }
  272. newkey.position = int(pos)
  273. newkey.isArray = true
  274. ret = append(ret, newkey)
  275. }
  276. return ret, nil
  277. }
  278. // legacy ValuesForPath() - now wrapped to handle special case of indexed arrays in 'path'.
  279. func (mv Map) oldValuesForPath(path string, subkeys ...string) ([]interface{}, error) {
  280. m := map[string]interface{}(mv)
  281. var subKeyMap map[string]interface{}
  282. if len(subkeys) > 0 {
  283. var err error
  284. subKeyMap, err = getSubKeyMap(subkeys...)
  285. if err != nil {
  286. return nil, err
  287. }
  288. }
  289. keys := strings.Split(path, ".")
  290. if keys[len(keys)-1] == "" {
  291. keys = keys[:len(keys)-1]
  292. }
  293. // ivals := make([]interface{}, 0)
  294. // valuesForKeyPath(&ivals, m, keys, subKeyMap)
  295. // return ivals, nil
  296. ivals := make([]interface{}, 0, defaultArraySize)
  297. var cnt int
  298. valuesForKeyPath(&ivals, &cnt, m, keys, subKeyMap)
  299. return ivals[:cnt], nil
  300. }
  301. func valuesForKeyPath(ret *[]interface{}, cnt *int, m interface{}, keys []string, subkeys map[string]interface{}) {
  302. lenKeys := len(keys)
  303. // load 'm' values into 'ret'
  304. // expand any lists
  305. if lenKeys == 0 {
  306. switch m.(type) {
  307. case map[string]interface{}:
  308. if subkeys != nil {
  309. if ok := hasSubKeys(m, subkeys); !ok {
  310. return
  311. }
  312. }
  313. *ret = append(*ret, m)
  314. *cnt++
  315. case []interface{}:
  316. for i, v := range m.([]interface{}) {
  317. if subkeys != nil {
  318. if ok := hasSubKeys(v, subkeys); !ok {
  319. continue // only load list members with subkeys
  320. }
  321. }
  322. *ret = append(*ret, (m.([]interface{}))[i])
  323. *cnt++
  324. }
  325. default:
  326. if subkeys != nil {
  327. return // must be map[string]interface{} if there are subkeys
  328. }
  329. *ret = append(*ret, m)
  330. *cnt++
  331. }
  332. return
  333. }
  334. // key of interest
  335. key := keys[0]
  336. switch key {
  337. case "*": // wildcard - scan all values
  338. switch m.(type) {
  339. case map[string]interface{}:
  340. for _, v := range m.(map[string]interface{}) {
  341. // valuesForKeyPath(ret, v, keys[1:], subkeys)
  342. valuesForKeyPath(ret, cnt, v, keys[1:], subkeys)
  343. }
  344. case []interface{}:
  345. for _, v := range m.([]interface{}) {
  346. switch v.(type) {
  347. // flatten out a list of maps - keys are processed
  348. case map[string]interface{}:
  349. for _, vv := range v.(map[string]interface{}) {
  350. // valuesForKeyPath(ret, vv, keys[1:], subkeys)
  351. valuesForKeyPath(ret, cnt, vv, keys[1:], subkeys)
  352. }
  353. default:
  354. // valuesForKeyPath(ret, v, keys[1:], subkeys)
  355. valuesForKeyPath(ret, cnt, v, keys[1:], subkeys)
  356. }
  357. }
  358. }
  359. default: // key - must be map[string]interface{}
  360. switch m.(type) {
  361. case map[string]interface{}:
  362. if v, ok := m.(map[string]interface{})[key]; ok {
  363. // valuesForKeyPath(ret, v, keys[1:], subkeys)
  364. valuesForKeyPath(ret, cnt, v, keys[1:], subkeys)
  365. }
  366. case []interface{}: // may be buried in list
  367. for _, v := range m.([]interface{}) {
  368. switch v.(type) {
  369. case map[string]interface{}:
  370. if vv, ok := v.(map[string]interface{})[key]; ok {
  371. // valuesForKeyPath(ret, vv, keys[1:], subkeys)
  372. valuesForKeyPath(ret, cnt, vv, keys[1:], subkeys)
  373. }
  374. }
  375. }
  376. }
  377. }
  378. }
  379. // hasSubKeys() - interface{} equality works for string, float64, bool
  380. // 'v' must be a map[string]interface{} value to have subkeys
  381. // 'a' can have k:v pairs with v.(string) == "*", which is treated like a wildcard.
  382. func hasSubKeys(v interface{}, subkeys map[string]interface{}) bool {
  383. if len(subkeys) == 0 {
  384. return true
  385. }
  386. switch v.(type) {
  387. case map[string]interface{}:
  388. // do all subKey name:value pairs match?
  389. mv := v.(map[string]interface{})
  390. for skey, sval := range subkeys {
  391. isNotKey := false
  392. if skey[:1] == "!" { // a NOT-key
  393. skey = skey[1:]
  394. isNotKey = true
  395. }
  396. vv, ok := mv[skey]
  397. if !ok { // key doesn't exist
  398. if isNotKey { // key not there, but that's what we want
  399. if kv, ok := sval.(string); ok && kv == "*" {
  400. continue
  401. }
  402. }
  403. return false
  404. }
  405. // wildcard check
  406. if kv, ok := sval.(string); ok && kv == "*" {
  407. if isNotKey { // key is there, and we don't want it
  408. return false
  409. }
  410. continue
  411. }
  412. switch sval.(type) {
  413. case string:
  414. if s, ok := vv.(string); ok && s == sval.(string) {
  415. if isNotKey {
  416. return false
  417. }
  418. continue
  419. }
  420. case bool:
  421. if b, ok := vv.(bool); ok && b == sval.(bool) {
  422. if isNotKey {
  423. return false
  424. }
  425. continue
  426. }
  427. case float64:
  428. if f, ok := vv.(float64); ok && f == sval.(float64) {
  429. if isNotKey {
  430. return false
  431. }
  432. continue
  433. }
  434. }
  435. // key there but didn't match subkey value
  436. if isNotKey { // that's what we want
  437. continue
  438. }
  439. return false
  440. }
  441. // all subkeys matched
  442. return true
  443. }
  444. // not a map[string]interface{} value, can't have subkeys
  445. return false
  446. }
  447. // Generate map of key:value entries as map[string]string.
  448. // 'kv' arguments are "name:value" pairs: attribute keys are designated with prepended hyphen, '-'.
  449. // If len(kv) == 0, the return is (nil, nil).
  450. func getSubKeyMap(kv ...string) (map[string]interface{}, error) {
  451. if len(kv) == 0 {
  452. return nil, nil
  453. }
  454. m := make(map[string]interface{}, 0)
  455. for _, v := range kv {
  456. vv := strings.Split(v, ":")
  457. switch len(vv) {
  458. case 2:
  459. m[vv[0]] = interface{}(vv[1])
  460. case 3:
  461. switch vv[3] {
  462. case "string", "char", "text":
  463. m[vv[0]] = interface{}(vv[1])
  464. case "bool", "boolean":
  465. // ParseBool treats "1"==true & "0"==false
  466. b, err := strconv.ParseBool(vv[1])
  467. if err != nil {
  468. return nil, fmt.Errorf("can't convert subkey value to bool: %s", vv[1])
  469. }
  470. m[vv[0]] = interface{}(b)
  471. case "float", "float64", "num", "number", "numeric":
  472. f, err := strconv.ParseFloat(vv[1], 64)
  473. if err != nil {
  474. return nil, fmt.Errorf("can't convert subkey value to float: %s", vv[1])
  475. }
  476. m[vv[0]] = interface{}(f)
  477. default:
  478. return nil, fmt.Errorf("unknown subkey conversion spec: %s", v)
  479. }
  480. default:
  481. return nil, fmt.Errorf("unknown subkey spec: %s", v)
  482. }
  483. }
  484. return m, nil
  485. }
  486. // ------------------------------- END of valuesFor ... ----------------------------
  487. // ----------------------- locate where a key value is in the tree -------------------
  488. //----------------------------- find all paths to a key --------------------------------
  489. // Get all paths through Map, 'mv', (in dot-notation) that terminate with the specified key.
  490. // Results can be used with ValuesForPath.
  491. func (mv Map) PathsForKey(key string) []string {
  492. m := map[string]interface{}(mv)
  493. breadbasket := make(map[string]bool, 0)
  494. breadcrumbs := ""
  495. hasKeyPath(breadcrumbs, m, key, breadbasket)
  496. if len(breadbasket) == 0 {
  497. return nil
  498. }
  499. // unpack map keys to return
  500. res := make([]string, len(breadbasket))
  501. var i int
  502. for k := range breadbasket {
  503. res[i] = k
  504. i++
  505. }
  506. return res
  507. }
  508. // Extract the shortest path from all possible paths - from PathsForKey() - in Map, 'mv'..
  509. // Paths are strings using dot-notation.
  510. func (mv Map) PathForKeyShortest(key string) string {
  511. paths := mv.PathsForKey(key)
  512. lp := len(paths)
  513. if lp == 0 {
  514. return ""
  515. }
  516. if lp == 1 {
  517. return paths[0]
  518. }
  519. shortest := paths[0]
  520. shortestLen := len(strings.Split(shortest, "."))
  521. for i := 1; i < len(paths); i++ {
  522. vlen := len(strings.Split(paths[i], "."))
  523. if vlen < shortestLen {
  524. shortest = paths[i]
  525. shortestLen = vlen
  526. }
  527. }
  528. return shortest
  529. }
  530. // hasKeyPath - if the map 'key' exists append it to KeyPath.path and increment KeyPath.depth
  531. // This is really just a breadcrumber that saves all trails that hit the prescribed 'key'.
  532. func hasKeyPath(crumbs string, iv interface{}, key string, basket map[string]bool) {
  533. switch iv.(type) {
  534. case map[string]interface{}:
  535. vv := iv.(map[string]interface{})
  536. if _, ok := vv[key]; ok {
  537. if crumbs == "" {
  538. crumbs = key
  539. } else {
  540. crumbs += "." + key
  541. }
  542. // *basket = append(*basket, crumb)
  543. basket[crumbs] = true
  544. }
  545. // walk on down the path, key could occur again at deeper node
  546. for k, v := range vv {
  547. // create a new breadcrumb, intialized with the one we have
  548. var nbc string
  549. if crumbs == "" {
  550. nbc = k
  551. } else {
  552. nbc = crumbs + "." + k
  553. }
  554. hasKeyPath(nbc, v, key, basket)
  555. }
  556. case []interface{}:
  557. // crumb-trail doesn't change, pass it on
  558. for _, v := range iv.([]interface{}) {
  559. hasKeyPath(crumbs, v, key, basket)
  560. }
  561. }
  562. }
  563. // Returns the first found value for the path.
  564. func (mv Map) ValueForPath(path string) (interface{}, error) {
  565. vals, err := mv.ValuesForPath(path)
  566. if err != nil {
  567. return nil, err
  568. }
  569. if len(vals) == 0 {
  570. return nil, errors.New("ValueForPath: path not found")
  571. }
  572. return vals[0], nil
  573. }
  574. // Returns the first found value for the path as a string.
  575. func (mv Map) ValueForPathString(path string) (string, error) {
  576. vals, err := mv.ValuesForPath(path)
  577. if err != nil {
  578. return "", err
  579. }
  580. if len(vals) == 0 {
  581. return "", errors.New("ValueForPath: path not found")
  582. }
  583. val := vals[0]
  584. switch str := val.(type) {
  585. case string:
  586. return str, nil
  587. default:
  588. return "", fmt.Errorf("ValueForPath: unsupported type: %T", str)
  589. }
  590. }
  591. // Returns the first found value for the path as a string.
  592. // If the path is not found then it returns an empty string.
  593. func (mv Map) ValueOrEmptyForPathString(path string) string {
  594. str, _ := mv.ValueForPathString(path)
  595. return str
  596. }