xml.go 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919
  1. // Copyright 2012-2016 Charles Banning. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file
  4. // xml.go - basically the core of X2j for map[string]interface{} values.
  5. // NewMapXml, NewMapXmlReader, mv.Xml, mv.XmlWriter
  6. // see x2j and j2x for wrappers to provide end-to-end transformation of XML and JSON messages.
  7. package mxj
  8. import (
  9. "bytes"
  10. "encoding/xml"
  11. "errors"
  12. "fmt"
  13. "io"
  14. "sort"
  15. "strconv"
  16. "strings"
  17. "time"
  18. )
  19. // ------------------- NewMapXml & NewMapXmlReader ... -------------------------
  20. // If XmlCharsetReader != nil, it will be used to decode the XML, if required.
  21. // import (
  22. // charset "code.google.com/p/go-charset/charset"
  23. // github.com/clbanning/mxj
  24. // )
  25. // ...
  26. // mxj.XmlCharsetReader = charset.NewReader
  27. // m, merr := mxj.NewMapXml(xmlValue)
  28. var XmlCharsetReader func(charset string, input io.Reader) (io.Reader, error)
  29. // NewMapXml - convert a XML doc into a Map
  30. // (This is analogous to unmarshalling a JSON string to map[string]interface{} using json.Unmarshal().)
  31. // If the optional argument 'cast' is 'true', then values will be converted to boolean or float64 if possible.
  32. //
  33. // Converting XML to JSON is a simple as:
  34. // ...
  35. // mapVal, merr := mxj.NewMapXml(xmlVal)
  36. // if merr != nil {
  37. // // handle error
  38. // }
  39. // jsonVal, jerr := mapVal.Json()
  40. // if jerr != nil {
  41. // // handle error
  42. // }
  43. //
  44. // NOTES:
  45. // 1. The 'xmlVal' will be parsed looking for an xml.StartElement, so BOM and other
  46. // extraneous xml.CharData will be ignored unless io.EOF is reached first.
  47. // 2. If CoerceKeysToLower() has been called, then all key values will be lower case.
  48. func NewMapXml(xmlVal []byte, cast ...bool) (Map, error) {
  49. var r bool
  50. if len(cast) == 1 {
  51. r = cast[0]
  52. }
  53. return xmlToMap(xmlVal, r)
  54. }
  55. // Get next XML doc from an io.Reader as a Map value. Returns Map value.
  56. // NOTES:
  57. // 1. The 'xmlReader' will be parsed looking for an xml.StartElement, so BOM and other
  58. // extraneous xml.CharData will be ignored unless io.EOF is reached first.
  59. // 2. If CoerceKeysToLower() has been called, then all key values will be lower case.
  60. func NewMapXmlReader(xmlReader io.Reader, cast ...bool) (Map, error) {
  61. var r bool
  62. if len(cast) == 1 {
  63. r = cast[0]
  64. }
  65. // build the node tree
  66. return xmlReaderToMap(xmlReader, r)
  67. }
  68. // XmlWriterBufSize - set the size of io.Writer for the TeeReader used by NewMapXmlReaderRaw()
  69. // and HandleXmlReaderRaw(). This reduces repeated memory allocations and copy() calls in most cases.
  70. // NOTE: the 'xmlVal' will be parsed looking for an xml.StartElement, so BOM and other
  71. // extraneous xml.CharData will be ignored unless io.EOF is reached first.
  72. var XmlWriterBufSize int = 256
  73. // Get next XML doc from an io.Reader as a Map value. Returns Map value and slice with the raw XML.
  74. // NOTES:
  75. // 1. Due to the implementation of xml.Decoder, the raw XML off the reader is buffered to []byte
  76. // using a ByteReader. If the io.Reader is an os.File, there may be significant performance impact.
  77. // See the examples - getmetrics1.go through getmetrics4.go - for comparative use cases on a large
  78. // data set. If the io.Reader is wrapping a []byte value in-memory, however, such as http.Request.Body
  79. // you CAN use it to efficiently unmarshal a XML doc and retrieve the raw XML in a single call.
  80. // 2. The 'raw' return value may be larger than the XML text value.
  81. // 3. The 'xmlReader' will be parsed looking for an xml.StartElement, so BOM and other
  82. // extraneous xml.CharData will be ignored unless io.EOF is reached first.
  83. // 4. If CoerceKeysToLower() has been called, then all key values will be lower case.
  84. func NewMapXmlReaderRaw(xmlReader io.Reader, cast ...bool) (Map, []byte, error) {
  85. var r bool
  86. if len(cast) == 1 {
  87. r = cast[0]
  88. }
  89. // create TeeReader so we can retrieve raw XML
  90. buf := make([]byte, XmlWriterBufSize)
  91. wb := bytes.NewBuffer(buf)
  92. trdr := myTeeReader(xmlReader, wb) // see code at EOF
  93. // build the node tree
  94. m, err := xmlReaderToMap(trdr, r)
  95. // retrieve the raw XML that was decoded
  96. b := make([]byte, wb.Len())
  97. _, _ = wb.Read(b)
  98. if err != nil {
  99. return nil, b, err
  100. }
  101. return m, b, nil
  102. }
  103. // xmlReaderToMap() - parse a XML io.Reader to a map[string]interface{} value
  104. func xmlReaderToMap(rdr io.Reader, r bool) (map[string]interface{}, error) {
  105. // parse the Reader
  106. p := xml.NewDecoder(rdr)
  107. p.CharsetReader = XmlCharsetReader
  108. return xmlToMapParser("", nil, p, r)
  109. }
  110. // xmlToMap - convert a XML doc into map[string]interface{} value
  111. func xmlToMap(doc []byte, r bool) (map[string]interface{}, error) {
  112. b := bytes.NewReader(doc)
  113. p := xml.NewDecoder(b)
  114. p.CharsetReader = XmlCharsetReader
  115. return xmlToMapParser("", nil, p, r)
  116. }
  117. // ===================================== where the work happens =============================
  118. // Allow people to drop hyphen when unmarshaling the XML doc.
  119. var useHyphen bool = true
  120. // PrependAttrWithHyphen. Prepend attribute tags with a hyphen.
  121. // Default is 'true'.
  122. // Note:
  123. // If 'false', unmarshaling and marshaling is not symmetric. Attributes will be
  124. // marshal'd as <attr_tag>attr</attr_tag> and may be part of a list.
  125. func PrependAttrWithHyphen(v bool) {
  126. useHyphen = v
  127. }
  128. // Include sequence id with inner tags. - per Sean Murphy, murphysean84@gmail.com.
  129. var includeTagSeqNum bool
  130. // IncludeTagSeqNum - include a "_seq":N key:value pair with each inner tag, denoting
  131. // its position when parsed. This is of limited usefulness, since list values cannot
  132. // be tagged with "_seq" without changing their depth in the Map.
  133. // So THIS SHOULD BE USED WITH CAUTION - see the test cases. Here's a sample of what
  134. // you get.
  135. /*
  136. <Obj c="la" x="dee" h="da">
  137. <IntObj id="3"/>
  138. <IntObj1 id="1"/>
  139. <IntObj id="2"/>
  140. <StrObj>hello</StrObj>
  141. </Obj>
  142. parses as:
  143. {
  144. Obj:{
  145. "-c":"la",
  146. "-h":"da",
  147. "-x":"dee",
  148. "intObj":[
  149. {
  150. "-id"="3",
  151. "_seq":"0" // if mxj.Cast is passed, then: "_seq":0
  152. },
  153. {
  154. "-id"="2",
  155. "_seq":"2"
  156. }],
  157. "intObj1":{
  158. "-id":"1",
  159. "_seq":"1"
  160. },
  161. "StrObj":{
  162. "#text":"hello", // simple element value gets "#text" tag
  163. "_seq":"3"
  164. }
  165. }
  166. }
  167. */
  168. func IncludeTagSeqNum(b bool) {
  169. includeTagSeqNum = b
  170. }
  171. // all keys will be "lower case"
  172. var lowerCase bool
  173. // Coerce all tag values to keys in lower case. This is useful if you've got sources with variable
  174. // tag capitalization, and you want to use m.ValuesForKeys(), etc., with the key or path spec
  175. // in lower case.
  176. // CoerceKeysToLower() will toggle the coercion flag true|false - on|off
  177. // CoerceKeysToLower(true|false) will set the coercion flag on|off
  178. //
  179. // NOTE: only recognized by NewMapXml, NewMapXmlReader, and NewMapXmlReaderRaw functions as well as
  180. // the associated HandleXmlReader and HandleXmlReaderRaw.
  181. func CoerceKeysToLower(b ...bool) {
  182. if len(b) == 1 {
  183. lowerCase = b[0]
  184. return
  185. }
  186. if !lowerCase {
  187. lowerCase = true
  188. } else {
  189. lowerCase = false
  190. }
  191. }
  192. // xmlToMapParser (2015.11.12) - load a 'clean' XML doc into a map[string]interface{} directly.
  193. // A refactoring of xmlToTreeParser(), markDuplicate() and treeToMap() - here, all-in-one.
  194. // We've removed the intermediate *node tree with the allocation and subsequent rescanning.
  195. func xmlToMapParser(skey string, a []xml.Attr, p *xml.Decoder, r bool) (map[string]interface{}, error) {
  196. if lowerCase {
  197. skey = strings.ToLower(skey)
  198. }
  199. // NOTE: all attributes and sub-elements parsed into 'na', 'na' is returned as value for 'skey'
  200. // Unless 'skey' is a simple element w/o attributes, in which case the xml.CharData value is the value.
  201. var n, na map[string]interface{}
  202. var seq int // for includeTagSeqNum
  203. // Allocate maps and load attributes, if any.
  204. if skey != "" {
  205. n = make(map[string]interface{}) // old n
  206. na = make(map[string]interface{}) // old n.nodes
  207. if len(a) > 0 {
  208. for _, v := range a {
  209. var key string
  210. if useHyphen {
  211. key = `-` + v.Name.Local
  212. } else {
  213. key = v.Name.Local
  214. }
  215. if lowerCase {
  216. key = strings.ToLower(key)
  217. }
  218. na[key] = cast(v.Value, r)
  219. }
  220. }
  221. }
  222. for {
  223. t, err := p.Token()
  224. if err != nil {
  225. if err != io.EOF {
  226. return nil, errors.New("xml.Decoder.Token() - " + err.Error())
  227. }
  228. return nil, err
  229. }
  230. switch t.(type) {
  231. case xml.StartElement:
  232. tt := t.(xml.StartElement)
  233. // First call to xmlToMapParser() doesn't pass xml.StartElement - the map key.
  234. // So when the loop is first entered, the first token is the root tag along
  235. // with any attributes, which we process here.
  236. //
  237. // Subsequent calls to xmlToMapParser() will pass in tag+attributes for
  238. // processing before getting the next token which is the element value,
  239. // which is done above.
  240. if skey == "" {
  241. return xmlToMapParser(tt.Name.Local, tt.Attr, p, r)
  242. }
  243. // If not initializing the map, parse the element.
  244. // len(nn) == 1, necessarily - it is just an 'n'.
  245. nn, err := xmlToMapParser(tt.Name.Local, tt.Attr, p, r)
  246. if err != nil {
  247. return nil, err
  248. }
  249. // The nn map[string]interface{} value is a na[nn_key] value.
  250. // We need to see if nn_key already exists - means we're parsing a list.
  251. // This may require converting na[nn_key] value into []interface{} type.
  252. // First, extract the key:val for the map - it's a singleton.
  253. // Note: if CoerceKeysToLower() called, then key will be lower case.
  254. var key string
  255. var val interface{}
  256. for key, val = range nn {
  257. break
  258. }
  259. // IncludeTagSeqNum requests that the element be augmented with a "_seq" sub-element.
  260. // In theory, we don't need this if len(na) == 1. But, we don't know what might
  261. // come next - we're only parsing forward. So if you ask for 'includeTagSeqNum' you
  262. // get it on every element. (Personally, I never liked this, but I added it on request
  263. // and did get a $50 Amazon gift card in return - now we support it for backwards compatibility!)
  264. if includeTagSeqNum {
  265. switch val.(type) {
  266. case []interface{}:
  267. // noop - There's no clean way to handle this w/o changing message structure.
  268. case map[string]interface{}:
  269. val.(map[string]interface{})["_seq"] = seq // will overwrite an "_seq" XML tag
  270. seq++
  271. case interface{}: // a non-nil simple element: string, float64, bool
  272. v := map[string]interface{}{"#text": val}
  273. v["_seq"] = seq
  274. seq++
  275. val = v
  276. }
  277. }
  278. // 'na' holding sub-elements of n.
  279. // See if 'key' already exists.
  280. // If 'key' exists, then this is a list, if not just add key:val to na.
  281. if v, ok := na[key]; ok {
  282. var a []interface{}
  283. switch v.(type) {
  284. case []interface{}:
  285. a = v.([]interface{})
  286. default: // anything else - note: v.(type) != nil
  287. a = []interface{}{v}
  288. }
  289. a = append(a, val)
  290. na[key] = a
  291. } else {
  292. na[key] = val // save it as a singleton
  293. }
  294. case xml.EndElement:
  295. // len(n) > 0 if this is a simple element w/o xml.Attrs - see xml.CharData case.
  296. if len(n) == 0 {
  297. // If len(na)==0 we have an empty element == "";
  298. // it has no xml.Attr nor xml.CharData.
  299. // Note: in original node-tree parser, val defaulted to "";
  300. // so we always had the default if len(node.nodes) == 0.
  301. if len(na) > 0 {
  302. n[skey] = na
  303. } else {
  304. n[skey] = "" // empty element
  305. }
  306. }
  307. return n, nil
  308. case xml.CharData:
  309. // clean up possible noise
  310. tt := strings.Trim(string(t.(xml.CharData)), "\t\r\b\n ")
  311. if len(tt) > 0 {
  312. if len(na) > 0 {
  313. na["#text"] = cast(tt, r)
  314. } else if skey != "" {
  315. n[skey] = cast(tt, r)
  316. } else {
  317. // per Adrian (http://www.adrianlungu.com/) catch stray text
  318. // in decoder stream -
  319. // https://github.com/clbanning/mxj/pull/14#issuecomment-182816374
  320. // NOTE: CharSetReader must be set to non-UTF-8 CharSet or you'll get
  321. // a p.Token() decoding error when the BOM is UTF-16 or UTF-32.
  322. continue
  323. }
  324. }
  325. default:
  326. // noop
  327. }
  328. }
  329. }
  330. var castNanInf bool
  331. // Cast "Nan", "Inf", "-Inf" XML values to 'float64'.
  332. // By default, these values will be decoded as 'string'.
  333. func CastNanInf(b bool) {
  334. castNanInf = b
  335. }
  336. // cast - try to cast string values to bool or float64
  337. func cast(s string, r bool) interface{} {
  338. if r {
  339. // handle nan and inf
  340. if !castNanInf {
  341. switch strings.ToLower(s) {
  342. case "nan", "inf", "-inf":
  343. return interface{}(s)
  344. }
  345. }
  346. // handle numeric strings ahead of boolean
  347. if f, err := strconv.ParseFloat(s, 64); err == nil {
  348. return interface{}(f)
  349. }
  350. // ParseBool treats "1"==true & "0"==false
  351. // but be more strick - only allow TRUE, True, true, FALSE, False, false
  352. if s != "t" && s != "T" && s != "f" && s != "F" {
  353. if b, err := strconv.ParseBool(s); err == nil {
  354. return interface{}(b)
  355. }
  356. }
  357. }
  358. return interface{}(s)
  359. }
  360. // ------------------ END: NewMapXml & NewMapXmlReader -------------------------
  361. // ------------------ mv.Xml & mv.XmlWriter - from j2x ------------------------
  362. const (
  363. DefaultRootTag = "doc"
  364. )
  365. var useGoXmlEmptyElemSyntax bool
  366. // XmlGoEmptyElemSyntax() - <tag ...></tag> rather than <tag .../>.
  367. // Go's encoding/xml package marshals empty XML elements as <tag ...></tag>. By default this package
  368. // encodes empty elements as <tag .../>. If you're marshaling Map values that include structures
  369. // (which are passed to xml.Marshal for encoding), this will let you conform to the standard package.
  370. func XmlGoEmptyElemSyntax() {
  371. useGoXmlEmptyElemSyntax = true
  372. }
  373. // XmlDefaultEmptyElemSyntax() - <tag .../> rather than <tag ...></tag>.
  374. // Return XML encoding for empty elements to the default package setting.
  375. // Reverses effect of XmlGoEmptyElemSyntax().
  376. func XmlDefaultEmptyElemSyntax() {
  377. useGoXmlEmptyElemSyntax = false
  378. }
  379. // Encode a Map as XML. The companion of NewMapXml().
  380. // The following rules apply.
  381. // - The key label "#text" is treated as the value for a simple element with attributes.
  382. // - Map keys that begin with a hyphen, '-', are interpreted as attributes.
  383. // It is an error if the attribute doesn't have a []byte, string, number, or boolean value.
  384. // - Map value type encoding:
  385. // > string, bool, float64, int, int32, int64, float32: per "%v" formating
  386. // > []bool, []uint8: by casting to string
  387. // > structures, etc.: handed to xml.Marshal() - if there is an error, the element
  388. // value is "UNKNOWN"
  389. // - Elements with only attribute values or are null are terminated using "/>".
  390. // - If len(mv) == 1 and no rootTag is provided, then the map key is used as the root tag, possible.
  391. // Thus, `{ "key":"value" }` encodes as "<key>value</key>".
  392. // - To encode empty elements in a syntax consistent with encoding/xml call UseGoXmlEmptyElementSyntax().
  393. // The attributes tag=value pairs are alphabetized by "tag". Also, when encoding map[string]interface{} values -
  394. // complex elements, etc. - the key:value pairs are alphabetized by key so the resulting tags will appear sorted.
  395. func (mv Map) Xml(rootTag ...string) ([]byte, error) {
  396. m := map[string]interface{}(mv)
  397. var err error
  398. s := new(string)
  399. p := new(pretty) // just a stub
  400. if len(m) == 1 && len(rootTag) == 0 {
  401. for key, value := range m {
  402. // if it an array, see if all values are map[string]interface{}
  403. // we force a new root tag if we'll end up with no key:value in the list
  404. // so: key:[string_val, bool:true] --> <doc><key>string_val</key><bool>true</bool></doc>
  405. switch value.(type) {
  406. case []interface{}:
  407. for _, v := range value.([]interface{}) {
  408. switch v.(type) {
  409. case map[string]interface{}: // noop
  410. default: // anything else
  411. err = mapToXmlIndent(false, s, DefaultRootTag, m, p)
  412. goto done
  413. }
  414. }
  415. }
  416. err = mapToXmlIndent(false, s, key, value, p)
  417. }
  418. } else if len(rootTag) == 1 {
  419. err = mapToXmlIndent(false, s, rootTag[0], m, p)
  420. } else {
  421. err = mapToXmlIndent(false, s, DefaultRootTag, m, p)
  422. }
  423. done:
  424. return []byte(*s), err
  425. }
  426. // The following implementation is provided only for symmetry with NewMapXmlReader[Raw]
  427. // The names will also provide a key for the number of return arguments.
  428. // Writes the Map as XML on the Writer.
  429. // See Xml() for encoding rules.
  430. func (mv Map) XmlWriter(xmlWriter io.Writer, rootTag ...string) error {
  431. x, err := mv.Xml(rootTag...)
  432. if err != nil {
  433. return err
  434. }
  435. _, err = xmlWriter.Write(x)
  436. return err
  437. }
  438. // Writes the Map as XML on the Writer. []byte is the raw XML that was written.
  439. // See Xml() for encoding rules.
  440. func (mv Map) XmlWriterRaw(xmlWriter io.Writer, rootTag ...string) ([]byte, error) {
  441. x, err := mv.Xml(rootTag...)
  442. if err != nil {
  443. return x, err
  444. }
  445. _, err = xmlWriter.Write(x)
  446. return x, err
  447. }
  448. // Writes the Map as pretty XML on the Writer.
  449. // See Xml() for encoding rules.
  450. func (mv Map) XmlIndentWriter(xmlWriter io.Writer, prefix, indent string, rootTag ...string) error {
  451. x, err := mv.XmlIndent(prefix, indent, rootTag...)
  452. if err != nil {
  453. return err
  454. }
  455. _, err = xmlWriter.Write(x)
  456. return err
  457. }
  458. // Writes the Map as pretty XML on the Writer. []byte is the raw XML that was written.
  459. // See Xml() for encoding rules.
  460. func (mv Map) XmlIndentWriterRaw(xmlWriter io.Writer, prefix, indent string, rootTag ...string) ([]byte, error) {
  461. x, err := mv.XmlIndent(prefix, indent, rootTag...)
  462. if err != nil {
  463. return x, err
  464. }
  465. _, err = xmlWriter.Write(x)
  466. return x, err
  467. }
  468. // -------------------- END: mv.Xml & mv.XmlWriter -------------------------------
  469. // -------------- Handle XML stream by processing Map value --------------------
  470. // Default poll delay to keep Handler from spinning on an open stream
  471. // like sitting on os.Stdin waiting for imput.
  472. var xhandlerPollInterval = time.Millisecond
  473. // Bulk process XML using handlers that process a Map value.
  474. // 'rdr' is an io.Reader for XML (stream)
  475. // 'mapHandler' is the Map processor. Return of 'false' stops io.Reader processing.
  476. // 'errHandler' is the error processor. Return of 'false' stops io.Reader processing and returns the error.
  477. // Note: mapHandler() and errHandler() calls are blocking, so reading and processing of messages is serialized.
  478. // This means that you can stop reading the file on error or after processing a particular message.
  479. // To have reading and handling run concurrently, pass argument to a go routine in handler and return 'true'.
  480. func HandleXmlReader(xmlReader io.Reader, mapHandler func(Map) bool, errHandler func(error) bool) error {
  481. var n int
  482. for {
  483. m, merr := NewMapXmlReader(xmlReader)
  484. n++
  485. // handle error condition with errhandler
  486. if merr != nil && merr != io.EOF {
  487. merr = fmt.Errorf("[xmlReader: %d] %s", n, merr.Error())
  488. if ok := errHandler(merr); !ok {
  489. // caused reader termination
  490. return merr
  491. }
  492. continue
  493. }
  494. // pass to maphandler
  495. if len(m) != 0 {
  496. if ok := mapHandler(m); !ok {
  497. break
  498. }
  499. } else if merr != io.EOF {
  500. time.Sleep(xhandlerPollInterval)
  501. }
  502. if merr == io.EOF {
  503. break
  504. }
  505. }
  506. return nil
  507. }
  508. // Bulk process XML using handlers that process a Map value and the raw XML.
  509. // 'rdr' is an io.Reader for XML (stream)
  510. // 'mapHandler' is the Map and raw XML - []byte - processor. Return of 'false' stops io.Reader processing.
  511. // 'errHandler' is the error and raw XML processor. Return of 'false' stops io.Reader processing and returns the error.
  512. // Note: mapHandler() and errHandler() calls are blocking, so reading and processing of messages is serialized.
  513. // This means that you can stop reading the file on error or after processing a particular message.
  514. // To have reading and handling run concurrently, pass argument(s) to a go routine in handler and return 'true'.
  515. // See NewMapXmlReaderRaw for comment on performance associated with retrieving raw XML from a Reader.
  516. func HandleXmlReaderRaw(xmlReader io.Reader, mapHandler func(Map, []byte) bool, errHandler func(error, []byte) bool) error {
  517. var n int
  518. for {
  519. m, raw, merr := NewMapXmlReaderRaw(xmlReader)
  520. n++
  521. // handle error condition with errhandler
  522. if merr != nil && merr != io.EOF {
  523. merr = fmt.Errorf("[xmlReader: %d] %s", n, merr.Error())
  524. if ok := errHandler(merr, raw); !ok {
  525. // caused reader termination
  526. return merr
  527. }
  528. continue
  529. }
  530. // pass to maphandler
  531. if len(m) != 0 {
  532. if ok := mapHandler(m, raw); !ok {
  533. break
  534. }
  535. } else if merr != io.EOF {
  536. time.Sleep(xhandlerPollInterval)
  537. }
  538. if merr == io.EOF {
  539. break
  540. }
  541. }
  542. return nil
  543. }
  544. // ----------------- END: Handle XML stream by processing Map value --------------
  545. // -------- a hack of io.TeeReader ... need one that's an io.ByteReader for xml.NewDecoder() ----------
  546. // This is a clone of io.TeeReader with the additional method t.ReadByte().
  547. // Thus, this TeeReader is also an io.ByteReader.
  548. // This is necessary because xml.NewDecoder uses a ByteReader not a Reader. It appears to have been written
  549. // with bufio.Reader or bytes.Reader in mind ... not a generic io.Reader, which doesn't have to have ReadByte()..
  550. // If NewDecoder is passed a Reader that does not satisfy ByteReader() it wraps the Reader with
  551. // bufio.NewReader and uses ReadByte rather than Read that runs the TeeReader pipe logic.
  552. type teeReader struct {
  553. r io.Reader
  554. w io.Writer
  555. b []byte
  556. }
  557. func myTeeReader(r io.Reader, w io.Writer) io.Reader {
  558. b := make([]byte, 1)
  559. return &teeReader{r, w, b}
  560. }
  561. // need for io.Reader - but we don't use it ...
  562. func (t *teeReader) Read(p []byte) (n int, err error) {
  563. return 0, nil
  564. }
  565. func (t *teeReader) ReadByte() (c byte, err error) {
  566. n, err := t.r.Read(t.b)
  567. if n > 0 {
  568. if _, err := t.w.Write(t.b[:1]); err != nil {
  569. return t.b[0], err
  570. }
  571. }
  572. return t.b[0], err
  573. }
  574. // ----------------------- END: io.TeeReader hack -----------------------------------
  575. // ---------------------- XmlIndent - from j2x package ----------------------------
  576. // Encode a map[string]interface{} as a pretty XML string.
  577. // See Xml for encoding rules.
  578. func (mv Map) XmlIndent(prefix, indent string, rootTag ...string) ([]byte, error) {
  579. m := map[string]interface{}(mv)
  580. var err error
  581. s := new(string)
  582. p := new(pretty)
  583. p.indent = indent
  584. p.padding = prefix
  585. if len(m) == 1 && len(rootTag) == 0 {
  586. // this can extract the key for the single map element
  587. // use it if it isn't a key for a list
  588. for key, value := range m {
  589. if _, ok := value.([]interface{}); ok {
  590. err = mapToXmlIndent(true, s, DefaultRootTag, m, p)
  591. } else {
  592. err = mapToXmlIndent(true, s, key, value, p)
  593. }
  594. }
  595. } else if len(rootTag) == 1 {
  596. err = mapToXmlIndent(true, s, rootTag[0], m, p)
  597. } else {
  598. err = mapToXmlIndent(true, s, DefaultRootTag, m, p)
  599. }
  600. return []byte(*s), err
  601. }
  602. type pretty struct {
  603. indent string
  604. cnt int
  605. padding string
  606. mapDepth int
  607. start int
  608. }
  609. func (p *pretty) Indent() {
  610. p.padding += p.indent
  611. p.cnt++
  612. }
  613. func (p *pretty) Outdent() {
  614. if p.cnt > 0 {
  615. p.padding = p.padding[:len(p.padding)-len(p.indent)]
  616. p.cnt--
  617. }
  618. }
  619. // where the work actually happens
  620. // returns an error if an attribute is not atomic
  621. func mapToXmlIndent(doIndent bool, s *string, key string, value interface{}, pp *pretty) error {
  622. var endTag bool
  623. var isSimple bool
  624. var elen int
  625. p := &pretty{pp.indent, pp.cnt, pp.padding, pp.mapDepth, pp.start}
  626. switch value.(type) {
  627. case map[string]interface{}, []byte, string, float64, bool, int, int32, int64, float32:
  628. if doIndent {
  629. *s += p.padding
  630. }
  631. *s += `<` + key
  632. }
  633. switch value.(type) {
  634. case map[string]interface{}:
  635. vv := value.(map[string]interface{})
  636. lenvv := len(vv)
  637. // scan out attributes - keys have prepended hyphen, '-'
  638. var cntAttr int
  639. attrlist := make([][2]string, len(vv))
  640. var n int
  641. for k, v := range vv {
  642. if k[:1] == "-" {
  643. cntAttr++
  644. switch v.(type) {
  645. case string, float64, bool, int, int32, int64, float32:
  646. attrlist[n][0] = k[1:]
  647. attrlist[n][1] = fmt.Sprintf("%v", v)
  648. n++
  649. case []byte:
  650. attrlist[n][0] = k[1:]
  651. attrlist[n][1] = fmt.Sprintf("%v", string(v.([]byte)))
  652. default:
  653. return fmt.Errorf("invalid attribute value for: %s", k)
  654. }
  655. }
  656. }
  657. if cntAttr > 0 {
  658. attrlist = attrlist[:n]
  659. sort.Sort(attrList(attrlist))
  660. for _, v := range attrlist {
  661. *s += ` ` + v[0] + `="` + v[1] + `"`
  662. }
  663. }
  664. // only attributes?
  665. if cntAttr == lenvv {
  666. break
  667. }
  668. // simple element? Note: '#text" is an invalid XML tag.
  669. if v, ok := vv["#text"]; ok && cntAttr+1 == lenvv {
  670. *s += ">" + fmt.Sprintf("%v", v)
  671. endTag = true
  672. elen = 1
  673. isSimple = true
  674. break
  675. }
  676. // close tag with possible attributes
  677. *s += ">"
  678. if doIndent {
  679. *s += "\n"
  680. }
  681. // something more complex
  682. p.mapDepth++
  683. // extract the map k:v pairs and sort on key
  684. elemlist := make([][2]interface{}, len(vv))
  685. n = 0
  686. for k, v := range vv {
  687. if k[:1] == "-" {
  688. continue
  689. }
  690. elemlist[n][0] = k
  691. elemlist[n][1] = v
  692. n++
  693. }
  694. elemlist = elemlist[:n]
  695. sort.Sort(elemList(elemlist))
  696. var i int
  697. for _, v := range elemlist {
  698. switch v[1].(type) {
  699. case []interface{}:
  700. default:
  701. if i == 0 && doIndent {
  702. p.Indent()
  703. }
  704. }
  705. i++
  706. mapToXmlIndent(doIndent, s, v[0].(string), v[1], p)
  707. switch v[1].(type) {
  708. case []interface{}: // handled in []interface{} case
  709. default:
  710. if doIndent {
  711. p.Outdent()
  712. }
  713. }
  714. i--
  715. }
  716. p.mapDepth--
  717. endTag = true
  718. elen = 1 // we do have some content ...
  719. case []interface{}:
  720. for _, v := range value.([]interface{}) {
  721. if doIndent {
  722. p.Indent()
  723. }
  724. mapToXmlIndent(doIndent, s, key, v, p)
  725. if doIndent {
  726. p.Outdent()
  727. }
  728. }
  729. return nil
  730. case nil:
  731. // terminate the tag
  732. *s += "<" + key
  733. break
  734. default: // handle anything - even goofy stuff
  735. elen = 0
  736. switch value.(type) {
  737. case string, float64, bool, int, int32, int64, float32:
  738. v := fmt.Sprintf("%v", value)
  739. elen = len(v)
  740. if elen > 0 {
  741. *s += ">" + v
  742. }
  743. case []byte: // NOTE: byte is just an alias for uint8
  744. // similar to how xml.Marshal handles []byte structure members
  745. v := string(value.([]byte))
  746. elen = len(v)
  747. if elen > 0 {
  748. *s += ">" + v
  749. }
  750. default:
  751. var v []byte
  752. var err error
  753. if doIndent {
  754. v, err = xml.MarshalIndent(value, p.padding, p.indent)
  755. } else {
  756. v, err = xml.Marshal(value)
  757. }
  758. if err != nil {
  759. *s += ">UNKNOWN"
  760. } else {
  761. elen = len(v)
  762. if elen > 0 {
  763. *s += string(v)
  764. }
  765. }
  766. }
  767. isSimple = true
  768. endTag = true
  769. }
  770. if endTag {
  771. if doIndent {
  772. if !isSimple {
  773. *s += p.padding
  774. }
  775. }
  776. switch value.(type) {
  777. case map[string]interface{}, []byte, string, float64, bool, int, int32, int64, float32:
  778. if elen > 0 || useGoXmlEmptyElemSyntax {
  779. if elen == 0 {
  780. *s += ">"
  781. }
  782. *s += `</` + key + ">"
  783. } else {
  784. *s += `/>`
  785. }
  786. }
  787. } else if useGoXmlEmptyElemSyntax {
  788. *s += "></" + key + ">"
  789. } else {
  790. *s += "/>"
  791. }
  792. if doIndent {
  793. if p.cnt > p.start {
  794. *s += "\n"
  795. }
  796. p.Outdent()
  797. }
  798. return nil
  799. }
  800. // ============================ sort interface implementation =================
  801. type attrList [][2]string
  802. func (a attrList) Len() int {
  803. return len(a)
  804. }
  805. func (a attrList) Swap(i, j int) {
  806. a[i], a[j] = a[j], a[i]
  807. }
  808. func (a attrList) Less(i, j int) bool {
  809. if a[i][0] > a[j][0] {
  810. return false
  811. }
  812. return true
  813. }
  814. type elemList [][2]interface{}
  815. func (e elemList) Len() int {
  816. return len(e)
  817. }
  818. func (e elemList) Swap(i, j int) {
  819. e[i], e[j] = e[j], e[i]
  820. }
  821. func (e elemList) Less(i, j int) bool {
  822. if e[i][0].(string) > e[j][0].(string) {
  823. return false
  824. }
  825. return true
  826. }