text_test.go 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866
  1. // Copyright 2018 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package text
  5. import (
  6. "fmt"
  7. "math"
  8. "strings"
  9. "testing"
  10. "unicode/utf8"
  11. "github.com/golang/protobuf/v2/internal/detrand"
  12. "github.com/golang/protobuf/v2/internal/flags"
  13. "github.com/golang/protobuf/v2/reflect/protoreflect"
  14. "github.com/google/go-cmp/cmp"
  15. "github.com/google/go-cmp/cmp/cmpopts"
  16. )
  17. // Disable detrand to enable direct comparisons on outputs.
  18. func init() { detrand.Disable() }
  19. var S = fmt.Sprintf
  20. var V = ValueOf
  21. var ID = func(n protoreflect.Name) Value { return V(n) }
  22. type Lst = []Value
  23. type Msg = [][2]Value
  24. func Test(t *testing.T) {
  25. const space = " \n\r\t"
  26. tests := []struct {
  27. in string
  28. wantVal Value
  29. wantOut string
  30. wantOutBracket string
  31. wantOutASCII string
  32. wantOutIndent string
  33. wantErr string
  34. }{{
  35. in: "",
  36. wantVal: V(Msg{}),
  37. wantOutIndent: "\n",
  38. }, {
  39. in: S("%s# hello%s", space, space),
  40. wantVal: V(Msg{}),
  41. }, {
  42. in: S("%s# hello\rfoo:bar", space),
  43. wantVal: V(Msg{}),
  44. }, {
  45. // Comments only extend until the newline.
  46. in: S("%s# hello\nfoo:bar", space),
  47. wantVal: V(Msg{{ID("foo"), ID("bar")}}),
  48. wantOut: "foo:bar",
  49. wantOutIndent: "foo: bar\n",
  50. }, {
  51. // NUL is an invalid whitespace since C++ uses C-strings.
  52. in: "\x00",
  53. wantErr: `invalid "\x00" as identifier`,
  54. }, {
  55. in: "foo:0",
  56. wantVal: V(Msg{{ID("foo"), V(uint32(0))}}),
  57. wantOut: "foo:0",
  58. }, {
  59. in: S("%sfoo%s:0", space, space),
  60. wantVal: V(Msg{{ID("foo"), V(uint32(0))}}),
  61. }, {
  62. in: "foo bar:0",
  63. wantErr: `expected ':' after message key`,
  64. }, {
  65. in: "[foo]:0",
  66. wantVal: V(Msg{{V("foo"), V(uint32(0))}}),
  67. wantOut: "[foo]:0",
  68. wantOutIndent: "[foo]: 0\n",
  69. }, {
  70. in: S("%s[%sfoo%s]%s:0", space, space, space, space),
  71. wantVal: V(Msg{{V("foo"), V(uint32(0))}}),
  72. }, {
  73. in: "[proto.package.name]:0",
  74. wantVal: V(Msg{{V("proto.package.name"), V(uint32(0))}}),
  75. wantOut: "[proto.package.name]:0",
  76. wantOutIndent: "[proto.package.name]: 0\n",
  77. }, {
  78. in: S("%s[%sproto.package.name%s]%s:0", space, space, space, space),
  79. wantVal: V(Msg{{V("proto.package.name"), V(uint32(0))}}),
  80. }, {
  81. in: "['sub.domain.com\x2fpath\x2fto\x2fproto.package.name']:0",
  82. wantVal: V(Msg{{V("sub.domain.com/path/to/proto.package.name"), V(uint32(0))}}),
  83. wantOut: "[sub.domain.com/path/to/proto.package.name]:0",
  84. wantOutIndent: "[sub.domain.com/path/to/proto.package.name]: 0\n",
  85. }, {
  86. in: "[\"sub.domain.com\x2fpath\x2fto\x2fproto.package.name\"]:0",
  87. wantVal: V(Msg{{V("sub.domain.com/path/to/proto.package.name"), V(uint32(0))}}),
  88. }, {
  89. in: S("%s[%s'sub.domain.com\x2fpath\x2fto\x2fproto.package.name'%s]%s:0", space, space, space, space),
  90. wantVal: V(Msg{{V("sub.domain.com/path/to/proto.package.name"), V(uint32(0))}}),
  91. }, {
  92. in: S("%s[%s\"sub.domain.com\x2fpath\x2fto\x2fproto.package.name\"%s]%s:0", space, space, space, space),
  93. wantVal: V(Msg{{V("sub.domain.com/path/to/proto.package.name"), V(uint32(0))}}),
  94. }, {
  95. in: `['http://example.com/path/to/proto.package.name']:0`,
  96. wantVal: V(Msg{{V("http://example.com/path/to/proto.package.name"), V(uint32(0))}}),
  97. wantOut: `["http://example.com/path/to/proto.package.name"]:0`,
  98. wantOutIndent: `["http://example.com/path/to/proto.package.name"]: 0` + "\n",
  99. }, {
  100. in: "[proto.package.name:0",
  101. wantErr: `invalid character ':', expected ']' at end of extension name`,
  102. }, {
  103. in: "[proto.package name]:0",
  104. wantErr: `invalid character 'n', expected ']' at end of extension name`,
  105. }, {
  106. in: `["proto.package" "name"]:0`,
  107. wantErr: `invalid character '"', expected ']' at end of extension name`,
  108. }, {
  109. in: `["\z"]`,
  110. wantErr: `invalid escape code "\\z" in string`,
  111. }, {
  112. in: "[$]",
  113. wantErr: `invalid "$" as identifier`,
  114. }, {
  115. // This parses fine, but should result in a error later since no
  116. // type name in proto will ever be just a number.
  117. in: "[20]:0",
  118. wantVal: V(Msg{{V("20"), V(uint32(0))}}),
  119. wantOut: "[20]:0",
  120. }, {
  121. in: "20:0",
  122. wantVal: V(Msg{{V(uint32(20)), V(uint32(0))}}),
  123. wantOut: "20:0",
  124. }, {
  125. in: "0x20:0",
  126. wantVal: V(Msg{{V(uint32(0x20)), V(uint32(0))}}),
  127. wantOut: "32:0",
  128. }, {
  129. in: "020:0",
  130. wantVal: V(Msg{{V(uint32(020)), V(uint32(0))}}),
  131. wantOut: "16:0",
  132. }, {
  133. in: "-20:0",
  134. wantErr: `invalid "-20" as identifier`,
  135. }, {
  136. in: `foo:true bar:"s" baz:{} qux:[] wib:id`,
  137. wantVal: V(Msg{
  138. {ID("foo"), V(true)},
  139. {ID("bar"), V("s")},
  140. {ID("baz"), V(Msg{})},
  141. {ID("qux"), V(Lst{})},
  142. {ID("wib"), ID("id")},
  143. }),
  144. wantOut: `foo:true bar:"s" baz:{} qux:[] wib:id`,
  145. wantOutIndent: "foo: true\nbar: \"s\"\nbaz: {}\nqux: []\nwib: id\n",
  146. }, {
  147. in: S(`%sfoo%s:%strue%s %sbar%s:%s"s"%s %sbaz%s:%s<>%s %squx%s:%s[]%s %swib%s:%sid%s`,
  148. space, space, space, space, space, space, space, space, space, space, space, space, space, space, space, space, space, space, space, space),
  149. wantVal: V(Msg{
  150. {ID("foo"), V(true)},
  151. {ID("bar"), V("s")},
  152. {ID("baz"), V(Msg{})},
  153. {ID("qux"), V(Lst{})},
  154. {ID("wib"), ID("id")},
  155. }),
  156. }, {
  157. in: `foo:true;`,
  158. wantVal: V(Msg{{ID("foo"), V(true)}}),
  159. wantOut: "foo:true",
  160. wantOutIndent: "foo: true\n",
  161. }, {
  162. in: `foo:true,`,
  163. wantVal: V(Msg{{ID("foo"), V(true)}}),
  164. }, {
  165. in: `foo:bar;,`,
  166. wantErr: `invalid "," as identifier`,
  167. }, {
  168. in: `foo:bar,;`,
  169. wantErr: `invalid ";" as identifier`,
  170. }, {
  171. in: `footrue`,
  172. wantErr: `unexpected EOF`,
  173. }, {
  174. in: `foo true`,
  175. wantErr: `expected ':' after message key`,
  176. }, {
  177. in: `foo"s"`,
  178. wantErr: `expected ':' after message key`,
  179. }, {
  180. in: `foo "s"`,
  181. wantErr: `expected ':' after message key`,
  182. }, {
  183. in: `foo{}`,
  184. wantVal: V(Msg{{ID("foo"), V(Msg{})}}),
  185. wantOut: "foo:{}",
  186. wantOutBracket: "foo:<>",
  187. wantOutIndent: "foo: {}\n",
  188. }, {
  189. in: `foo {}`,
  190. wantVal: V(Msg{{ID("foo"), V(Msg{})}}),
  191. }, {
  192. in: `foo<>`,
  193. wantVal: V(Msg{{ID("foo"), V(Msg{})}}),
  194. }, {
  195. in: `foo <>`,
  196. wantVal: V(Msg{{ID("foo"), V(Msg{})}}),
  197. }, {
  198. in: `foo[]`,
  199. wantErr: `expected ':' after message key`,
  200. }, {
  201. in: `foo []`,
  202. wantErr: `expected ':' after message key`,
  203. }, {
  204. in: `foo:truebar:true`,
  205. wantErr: `invalid ":" as identifier`,
  206. }, {
  207. in: `foo:"s"bar:true`,
  208. wantVal: V(Msg{{ID("foo"), V("s")}, {ID("bar"), V(true)}}),
  209. wantOut: `foo:"s" bar:true`,
  210. wantOutIndent: "foo: \"s\"\nbar: true\n",
  211. }, {
  212. in: `foo:0bar:true`,
  213. wantErr: `invalid "0bar" as number or bool`,
  214. }, {
  215. in: `foo:{}bar:true`,
  216. wantVal: V(Msg{{ID("foo"), V(Msg{})}, {ID("bar"), V(true)}}),
  217. wantOut: "foo:{} bar:true",
  218. wantOutBracket: "foo:<> bar:true",
  219. wantOutIndent: "foo: {}\nbar: true\n",
  220. }, {
  221. in: `foo:[]bar:true`,
  222. wantVal: V(Msg{{ID("foo"), V(Lst{})}, {ID("bar"), V(true)}}),
  223. }, {
  224. in: `foo{bar:true}`,
  225. wantVal: V(Msg{{ID("foo"), V(Msg{{ID("bar"), V(true)}})}}),
  226. wantOut: "foo:{bar:true}",
  227. wantOutBracket: "foo:<bar:true>",
  228. wantOutIndent: "foo: {\n\tbar: true\n}\n",
  229. }, {
  230. in: `foo<bar:true>`,
  231. wantVal: V(Msg{{ID("foo"), V(Msg{{ID("bar"), V(true)}})}}),
  232. }, {
  233. in: `foo{bar:true,}`,
  234. wantVal: V(Msg{{ID("foo"), V(Msg{{ID("bar"), V(true)}})}}),
  235. }, {
  236. in: `foo{bar:true;}`,
  237. wantVal: V(Msg{{ID("foo"), V(Msg{{ID("bar"), V(true)}})}}),
  238. }, {
  239. in: `foo{`,
  240. wantErr: `unexpected EOF`,
  241. }, {
  242. in: `foo{ `,
  243. wantErr: `unexpected EOF`,
  244. }, {
  245. in: `foo{[`,
  246. wantErr: `unexpected EOF`,
  247. }, {
  248. in: `foo{[ `,
  249. wantErr: `unexpected EOF`,
  250. }, {
  251. in: `foo{bar:true,;}`,
  252. wantErr: `invalid ";" as identifier`,
  253. }, {
  254. in: `foo{bar:true;,}`,
  255. wantErr: `invalid "," as identifier`,
  256. }, {
  257. in: `foo<bar:{}>`,
  258. wantVal: V(Msg{{ID("foo"), V(Msg{{ID("bar"), V(Msg{})}})}}),
  259. wantOut: "foo:{bar:{}}",
  260. wantOutBracket: "foo:<bar:<>>",
  261. wantOutIndent: "foo: {\n\tbar: {}\n}\n",
  262. }, {
  263. in: `foo<bar:{>`,
  264. wantErr: `invalid character '>', expected '}' at end of message`,
  265. }, {
  266. in: `foo<bar:{}`,
  267. wantErr: `unexpected EOF`,
  268. }, {
  269. in: `arr:[]`,
  270. wantVal: V(Msg{{ID("arr"), V(Lst{})}}),
  271. wantOut: "arr:[]",
  272. wantOutBracket: "arr:[]",
  273. wantOutIndent: "arr: []\n",
  274. }, {
  275. in: `arr:[,]`,
  276. wantErr: `invalid "," as number or bool`,
  277. }, {
  278. in: `arr:[0 0]`,
  279. wantErr: `invalid character '0', expected ']' at end of list`,
  280. }, {
  281. in: `arr:["foo" "bar"]`,
  282. wantVal: V(Msg{{ID("arr"), V(Lst{V("foobar")})}}),
  283. wantOut: `arr:["foobar"]`,
  284. wantOutBracket: `arr:["foobar"]`,
  285. wantOutIndent: "arr: [\n\t\"foobar\"\n]\n",
  286. }, {
  287. in: `arr:[0,]`,
  288. wantErr: `invalid "]" as number or bool`,
  289. }, {
  290. in: `arr:[true,0,"",id,[],{}]`,
  291. wantVal: V(Msg{{ID("arr"), V(Lst{
  292. V(true), V(uint32(0)), V(""), ID("id"), V(Lst{}), V(Msg{}),
  293. })}}),
  294. wantOut: `arr:[true,0,"",id,[],{}]`,
  295. wantOutBracket: `arr:[true,0,"",id,[],<>]`,
  296. wantOutIndent: "arr: [\n\ttrue,\n\t0,\n\t\"\",\n\tid,\n\t[],\n\t{}\n]\n",
  297. }, {
  298. in: S(`arr:[%strue%s,%s0%s,%s""%s,%sid%s,%s[]%s,%s{}%s]`,
  299. space, space, space, space, space, space, space, space, space, space, space, space),
  300. wantVal: V(Msg{{ID("arr"), V(Lst{
  301. V(true), V(uint32(0)), V(""), ID("id"), V(Lst{}), V(Msg{}),
  302. })}}),
  303. }, {
  304. in: `arr:[`,
  305. wantErr: `unexpected EOF`,
  306. }, {
  307. in: `{`,
  308. wantErr: `invalid "{" as identifier`,
  309. }, {
  310. in: `<`,
  311. wantErr: `invalid "<" as identifier`,
  312. }, {
  313. in: `[`,
  314. wantErr: "unexpected EOF",
  315. }, {
  316. in: `}`,
  317. wantErr: "1 bytes of unconsumed input",
  318. }, {
  319. in: `>`,
  320. wantErr: "1 bytes of unconsumed input",
  321. }, {
  322. in: `]`,
  323. wantErr: `invalid "]" as identifier`,
  324. }, {
  325. in: `str: "'"`,
  326. wantVal: V(Msg{{ID("str"), V(`'`)}}),
  327. wantOut: `str:"'"`,
  328. }, {
  329. in: `str: '"'`,
  330. wantVal: V(Msg{{ID("str"), V(`"`)}}),
  331. wantOut: `str:"\""`,
  332. }, {
  333. // String that has as few escaped characters as possible.
  334. in: `str: ` + func() string {
  335. var b []byte
  336. for i := 0; i < utf8.RuneSelf; i++ {
  337. switch i {
  338. case 0, '\\', '\n', '\'': // these must be escaped, so ignore them
  339. default:
  340. b = append(b, byte(i))
  341. }
  342. }
  343. return "'" + string(b) + "'"
  344. }(),
  345. wantVal: V(Msg{{ID("str"), V("\x01\x02\x03\x04\x05\x06\a\b\t\v\f\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_`abcdefghijklmnopqrstuvwxyz{|}~\u007f")}}),
  346. wantOut: `str:"\x01\x02\x03\x04\x05\x06\x07\x08\t\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_` + "`abcdefghijklmnopqrstuvwxyz{|}~\x7f\"",
  347. wantOutASCII: `str:"\x01\x02\x03\x04\x05\x06\x07\x08\t\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_` + "`abcdefghijklmnopqrstuvwxyz{|}~\x7f\"",
  348. }, {
  349. in: "str: '\xde\xad\xbe\xef'",
  350. wantVal: V(Msg{{ID("str"), V("\xde\xad\xbe\xef")}}),
  351. wantOut: "str:\"\u07ad\\xbe\\xef\"",
  352. wantOutASCII: `str:"\u07ad\xbe\xef"`,
  353. wantErr: "invalid UTF-8 detected",
  354. }, {
  355. // Valid UTF-8 wire encoding, but sub-optimal encoding.
  356. in: "str: '\xc0\x80'",
  357. wantVal: V(Msg{{ID("str"), V("\xc0\x80")}}),
  358. wantOut: `str:"\xc0\x80"`,
  359. wantOutASCII: `str:"\xc0\x80"`,
  360. wantErr: "invalid UTF-8 detected",
  361. }, {
  362. // Valid UTF-8 wire encoding, but invalid rune (surrogate pair).
  363. in: "str: '\xed\xa0\x80'",
  364. wantVal: V(Msg{{ID("str"), V("\xed\xa0\x80")}}),
  365. wantOut: `str:"\xed\xa0\x80"`,
  366. wantOutASCII: `str:"\xed\xa0\x80"`,
  367. wantErr: "invalid UTF-8 detected",
  368. }, {
  369. // Valid UTF-8 wire encoding, but invalid rune (above max rune).
  370. in: "str: '\xf7\xbf\xbf\xbf'",
  371. wantVal: V(Msg{{ID("str"), V("\xf7\xbf\xbf\xbf")}}),
  372. wantOut: `str:"\xf7\xbf\xbf\xbf"`,
  373. wantOutASCII: `str:"\xf7\xbf\xbf\xbf"`,
  374. wantErr: "invalid UTF-8 detected",
  375. }, {
  376. // Valid UTF-8 wire encoding of the RuneError rune.
  377. in: "str: '\xef\xbf\xbd'",
  378. wantVal: V(Msg{{ID("str"), V(string(utf8.RuneError))}}),
  379. wantOut: `str:"` + string(utf8.RuneError) + `"`,
  380. wantOutASCII: `str:"\ufffd"`,
  381. }, {
  382. in: "str: 'hello\u1234world'",
  383. wantVal: V(Msg{{ID("str"), V("hello\u1234world")}}),
  384. wantOut: "str:\"hello\u1234world\"",
  385. wantOutASCII: `str:"hello\u1234world"`,
  386. }, {
  387. in: `str: '\"\'\\\?\a\b\n\r\t\v\f\1\12\123\xA\xaB\x12\uAb8f\U0010FFFF'`,
  388. wantVal: V(Msg{{ID("str"), V("\"'\\?\a\b\n\r\t\v\f\x01\nS\n\xab\x12\uab8f\U0010ffff")}}),
  389. wantOut: `str:"\"'\\?\x07\x08\n\r\t\x0b\x0c\x01\nS\n\xab\x12` + "\uab8f\U0010ffff" + `"`,
  390. wantOutASCII: `str:"\"'\\?\x07\x08\n\r\t\x0b\x0c\x01\nS\n\xab\x12\uab8f\U0010ffff"`,
  391. }, {
  392. in: `str: '`,
  393. wantErr: `unexpected EOF`,
  394. }, {
  395. in: `str: '\`,
  396. wantErr: `unexpected EOF`,
  397. }, {
  398. in: `str: '\'`,
  399. wantErr: `unexpected EOF`,
  400. }, {
  401. in: `str: '\8'`,
  402. wantErr: `invalid escape code "\\8" in string`,
  403. }, {
  404. in: `str: '\1x'`,
  405. wantVal: V(Msg{{ID("str"), V("\001x")}}),
  406. wantOut: `str:"\x01x"`,
  407. wantOutASCII: `str:"\x01x"`,
  408. }, {
  409. in: `str: '\12x'`,
  410. wantVal: V(Msg{{ID("str"), V("\012x")}}),
  411. wantOut: `str:"\nx"`,
  412. wantOutASCII: `str:"\nx"`,
  413. }, {
  414. in: `str: '\123x'`,
  415. wantVal: V(Msg{{ID("str"), V("\123x")}}),
  416. wantOut: `str:"Sx"`,
  417. wantOutASCII: `str:"Sx"`,
  418. }, {
  419. in: `str: '\1234x'`,
  420. wantVal: V(Msg{{ID("str"), V("\1234x")}}),
  421. wantOut: `str:"S4x"`,
  422. wantOutASCII: `str:"S4x"`,
  423. }, {
  424. in: `str: '\1'`,
  425. wantVal: V(Msg{{ID("str"), V("\001")}}),
  426. wantOut: `str:"\x01"`,
  427. wantOutASCII: `str:"\x01"`,
  428. }, {
  429. in: `str: '\12'`,
  430. wantVal: V(Msg{{ID("str"), V("\012")}}),
  431. wantOut: `str:"\n"`,
  432. wantOutASCII: `str:"\n"`,
  433. }, {
  434. in: `str: '\123'`,
  435. wantVal: V(Msg{{ID("str"), V("\123")}}),
  436. wantOut: `str:"S"`,
  437. wantOutASCII: `str:"S"`,
  438. }, {
  439. in: `str: '\1234'`,
  440. wantVal: V(Msg{{ID("str"), V("\1234")}}),
  441. wantOut: `str:"S4"`,
  442. wantOutASCII: `str:"S4"`,
  443. }, {
  444. in: `str: '\377'`,
  445. wantVal: V(Msg{{ID("str"), V("\377")}}),
  446. wantOut: `str:"\xff"`,
  447. wantOutASCII: `str:"\xff"`,
  448. }, {
  449. // Overflow octal escape.
  450. in: `str: '\400'`,
  451. wantErr: `invalid octal escape code "\\400" in string`,
  452. }, {
  453. in: `str: '\xfx'`,
  454. wantVal: V(Msg{{ID("str"), V("\x0fx")}}),
  455. wantOut: `str:"\x0fx"`,
  456. wantOutASCII: `str:"\x0fx"`,
  457. }, {
  458. in: `str: '\xffx'`,
  459. wantVal: V(Msg{{ID("str"), V("\xffx")}}),
  460. wantOut: `str:"\xffx"`,
  461. wantOutASCII: `str:"\xffx"`,
  462. }, {
  463. in: `str: '\xfffx'`,
  464. wantVal: V(Msg{{ID("str"), V("\xfffx")}}),
  465. wantOut: `str:"\xfffx"`,
  466. wantOutASCII: `str:"\xfffx"`,
  467. }, {
  468. in: `str: '\xf'`,
  469. wantVal: V(Msg{{ID("str"), V("\x0f")}}),
  470. wantOut: `str:"\x0f"`,
  471. wantOutASCII: `str:"\x0f"`,
  472. }, {
  473. in: `str: '\xff'`,
  474. wantVal: V(Msg{{ID("str"), V("\xff")}}),
  475. wantOut: `str:"\xff"`,
  476. wantOutASCII: `str:"\xff"`,
  477. }, {
  478. in: `str: '\xfff'`,
  479. wantVal: V(Msg{{ID("str"), V("\xfff")}}),
  480. wantOut: `str:"\xfff"`,
  481. wantOutASCII: `str:"\xfff"`,
  482. }, {
  483. in: `str: '\xz'`,
  484. wantErr: `invalid hex escape code "\\x" in string`,
  485. }, {
  486. in: `str: '\uPo'`,
  487. wantErr: `unexpected EOF`,
  488. }, {
  489. in: `str: '\uPoo'`,
  490. wantErr: `invalid Unicode escape code "\\uPoo'" in string`,
  491. }, {
  492. in: `str: '\uPoop'`,
  493. wantErr: `invalid Unicode escape code "\\uPoop" in string`,
  494. }, {
  495. // Unmatched surrogate pair.
  496. in: `str: '\uDEAD'`,
  497. wantErr: `unexpected EOF`, // trying to reader other half
  498. }, {
  499. // Surrogate pair with invalid other half.
  500. in: `str: '\uDEAD\u0000'`,
  501. wantErr: `invalid Unicode escape code "\\u0000" in string`,
  502. }, {
  503. // Properly matched surrogate pair.
  504. in: `str: '\uD800\uDEAD'`,
  505. wantVal: V(Msg{{ID("str"), V("𐊭")}}),
  506. wantOut: `str:"𐊭"`,
  507. wantOutASCII: `str:"\U000102ad"`,
  508. }, {
  509. // Overflow on Unicode rune.
  510. in: `str: '\U00110000'`,
  511. wantErr: `invalid Unicode escape code "\\U00110000" in string`,
  512. }, {
  513. in: `str: '\z'`,
  514. wantErr: `invalid escape code "\\z" in string`,
  515. }, {
  516. // Strings cannot have NUL literal since C-style strings forbid them.
  517. in: "str: '\x00'",
  518. wantErr: `invalid character '\x00' in string`,
  519. }, {
  520. // Strings cannot have newline literal. The C++ permits them if an
  521. // option is specified to allow them. In Go, we always forbid them.
  522. in: "str: '\n'",
  523. wantErr: `invalid character '\n' in string`,
  524. }, {
  525. in: "name: \"My name is \"\n\"elsewhere\"",
  526. wantVal: V(Msg{{ID("name"), V("My name is elsewhere")}}),
  527. wantOut: `name:"My name is elsewhere"`,
  528. wantOutASCII: `name:"My name is elsewhere"`,
  529. }, {
  530. in: "name: 'My name is '\n'elsewhere'",
  531. wantVal: V(Msg{{ID("name"), V("My name is elsewhere")}}),
  532. }, {
  533. in: "name: 'My name is '\n\"elsewhere\"",
  534. wantVal: V(Msg{{ID("name"), V("My name is elsewhere")}}),
  535. }, {
  536. in: "name: \"My name is \"\n'elsewhere'",
  537. wantVal: V(Msg{{ID("name"), V("My name is elsewhere")}}),
  538. }, {
  539. in: "name: \"My \"'name '\"is \"\n'elsewhere'",
  540. wantVal: V(Msg{{ID("name"), V("My name is elsewhere")}}),
  541. }, {
  542. in: `crazy:"x'"'\""\''"'z"`,
  543. wantVal: V(Msg{{ID("crazy"), V(`x'""''z`)}}),
  544. }, {
  545. in: `nums: [t,T,true,True,TRUE,f,F,false,False,FALSE]`,
  546. wantVal: V(Msg{{ID("nums"), V(Lst{
  547. V(true),
  548. ID("T"),
  549. V(true),
  550. V(true),
  551. ID("TRUE"),
  552. V(false),
  553. ID("F"),
  554. V(false),
  555. V(false),
  556. ID("FALSE"),
  557. })}}),
  558. wantOut: "nums:[true,T,true,true,TRUE,false,F,false,false,FALSE]",
  559. wantOutIndent: "nums: [\n\ttrue,\n\tT,\n\ttrue,\n\ttrue,\n\tTRUE,\n\tfalse,\n\tF,\n\tfalse,\n\tfalse,\n\tFALSE\n]\n",
  560. }, {
  561. in: `nums: [nan,inf,-inf,NaN,NAN,Inf,INF]`,
  562. wantVal: V(Msg{{ID("nums"), V(Lst{
  563. V(math.NaN()),
  564. V(math.Inf(+1)),
  565. V(math.Inf(-1)),
  566. ID("NaN"),
  567. ID("NAN"),
  568. ID("Inf"),
  569. ID("INF"),
  570. })}}),
  571. wantOut: "nums:[nan,inf,-inf,NaN,NAN,Inf,INF]",
  572. wantOutIndent: "nums: [\n\tnan,\n\tinf,\n\t-inf,\n\tNaN,\n\tNAN,\n\tInf,\n\tINF\n]\n",
  573. }, {
  574. // C++ permits this, but we currently reject this.
  575. in: `num: -nan`,
  576. wantErr: `invalid "-nan" as number or bool`,
  577. }, {
  578. in: `nums: [0,-0,-9876543210,9876543210,0x0,0x0123456789abcdef,-0x0123456789abcdef,01234567,-01234567]`,
  579. wantVal: V(Msg{{ID("nums"), V(Lst{
  580. V(uint32(0)),
  581. V(int32(-0)),
  582. V(int64(-9876543210)),
  583. V(uint64(9876543210)),
  584. V(uint32(0x0)),
  585. V(uint64(0x0123456789abcdef)),
  586. V(int64(-0x0123456789abcdef)),
  587. V(uint64(01234567)),
  588. V(int64(-01234567)),
  589. })}}),
  590. wantOut: "nums:[0,0,-9876543210,9876543210,0,81985529216486895,-81985529216486895,342391,-342391]",
  591. wantOutIndent: "nums: [\n\t0,\n\t0,\n\t-9876543210,\n\t9876543210,\n\t0,\n\t81985529216486895,\n\t-81985529216486895,\n\t342391,\n\t-342391\n]\n",
  592. }, {
  593. in: `nums: [0.,0f,1f,10f,-0f,-1f,-10f,1.0,0.1e-3,1.5e+5,1e10,.0]`,
  594. wantVal: V(Msg{{ID("nums"), V(Lst{
  595. V(0.0),
  596. V(0.0),
  597. V(1.0),
  598. V(10.0),
  599. V(-0.0),
  600. V(-1.0),
  601. V(-10.0),
  602. V(1.0),
  603. V(0.1e-3),
  604. V(1.5e+5),
  605. V(1.0e+10),
  606. V(0.0),
  607. })}}),
  608. wantOut: "nums:[0,0,1,10,0,-1,-10,1,0.0001,150000,1e+10,0]",
  609. wantOutIndent: "nums: [\n\t0,\n\t0,\n\t1,\n\t10,\n\t0,\n\t-1,\n\t-10,\n\t1,\n\t0.0001,\n\t150000,\n\t1e+10,\n\t0\n]\n",
  610. }, {
  611. in: `nums: [0xbeefbeef,0xbeefbeefbeefbeef]`,
  612. wantVal: V(Msg{{ID("nums"), func() Value {
  613. if flags.Proto1Legacy {
  614. return V(Lst{V(int32(-1091584273)), V(int64(-4688318750159552785))})
  615. } else {
  616. return V(Lst{V(uint32(0xbeefbeef)), V(uint64(0xbeefbeefbeefbeef))})
  617. }
  618. }()}}),
  619. }, {
  620. in: `num: +0`,
  621. wantErr: `invalid "+0" as number or bool`,
  622. }, {
  623. in: `num: 01.1234`,
  624. wantErr: `invalid "01.1234" as number or bool`,
  625. }, {
  626. in: `num: 0x`,
  627. wantErr: `invalid "0x" as number or bool`,
  628. }, {
  629. in: `num: 0xX`,
  630. wantErr: `invalid "0xX" as number or bool`,
  631. }, {
  632. in: `num: 0800`,
  633. wantErr: `invalid "0800" as number or bool`,
  634. }, {
  635. in: `num: true.`,
  636. wantErr: `invalid "true." as number or bool`,
  637. }, {
  638. in: `num: .`,
  639. wantErr: `parsing ".": invalid syntax`,
  640. }, {
  641. in: `num: -.`,
  642. wantErr: `parsing "-.": invalid syntax`,
  643. }, {
  644. in: `num: 1e10000`,
  645. wantErr: `parsing "1e10000": value out of range`,
  646. }, {
  647. in: `num: 99999999999999999999`,
  648. wantErr: `parsing "99999999999999999999": value out of range`,
  649. }, {
  650. in: `num: -99999999999999999999`,
  651. wantErr: `parsing "-99999999999999999999": value out of range`,
  652. }, {
  653. in: "x: -",
  654. wantErr: `syntax error (line 1:5)`,
  655. }, {
  656. in: "x:[\"💩\"x",
  657. wantErr: `syntax error (line 1:7)`,
  658. }, {
  659. in: "x:\n\n[\"🔥🔥🔥\"x",
  660. wantErr: `syntax error (line 3:7)`,
  661. }, {
  662. in: "x:[\"👍🏻👍🏿\"x",
  663. wantErr: `syntax error (line 1:10)`, // multi-rune emojis; could be column:8
  664. }, {
  665. in: `
  666. firstName : "John",
  667. lastName : "Smith" ,
  668. isAlive : true,
  669. age : 27,
  670. address { # missing colon is okay for messages
  671. streetAddress : "21 2nd Street" ,
  672. city : "New York" ,
  673. state : "NY" ,
  674. postalCode : "10021-3100" ; # trailing semicolon is okay
  675. },
  676. phoneNumbers : [ {
  677. type : "home" ,
  678. number : "212 555-1234"
  679. } , {
  680. type : "office" ,
  681. number : "646 555-4567"
  682. } , {
  683. type : "mobile" ,
  684. number : "123 456-7890" , # trailing comma is okay
  685. } ],
  686. children : [] ,
  687. spouse : null`,
  688. wantVal: V(Msg{
  689. {ID("firstName"), V("John")},
  690. {ID("lastName"), V("Smith")},
  691. {ID("isAlive"), V(true)},
  692. {ID("age"), V(27.0)},
  693. {ID("address"), V(Msg{
  694. {ID("streetAddress"), V("21 2nd Street")},
  695. {ID("city"), V("New York")},
  696. {ID("state"), V("NY")},
  697. {ID("postalCode"), V("10021-3100")},
  698. })},
  699. {ID("phoneNumbers"), V([]Value{
  700. V(Msg{
  701. {ID("type"), V("home")},
  702. {ID("number"), V("212 555-1234")},
  703. }),
  704. V(Msg{
  705. {ID("type"), V("office")},
  706. {ID("number"), V("646 555-4567")},
  707. }),
  708. V(Msg{
  709. {ID("type"), V("mobile")},
  710. {ID("number"), V("123 456-7890")},
  711. }),
  712. })},
  713. {ID("children"), V([]Value{})},
  714. {ID("spouse"), V(protoreflect.Name("null"))},
  715. }),
  716. wantOut: `firstName:"John" lastName:"Smith" isAlive:true age:27 address:{streetAddress:"21 2nd Street" city:"New York" state:"NY" postalCode:"10021-3100"} phoneNumbers:[{type:"home" number:"212 555-1234"},{type:"office" number:"646 555-4567"},{type:"mobile" number:"123 456-7890"}] children:[] spouse:null`,
  717. wantOutBracket: `firstName:"John" lastName:"Smith" isAlive:true age:27 address:<streetAddress:"21 2nd Street" city:"New York" state:"NY" postalCode:"10021-3100"> phoneNumbers:[<type:"home" number:"212 555-1234">,<type:"office" number:"646 555-4567">,<type:"mobile" number:"123 456-7890">] children:[] spouse:null`,
  718. wantOutIndent: `firstName: "John"
  719. lastName: "Smith"
  720. isAlive: true
  721. age: 27
  722. address: {
  723. streetAddress: "21 2nd Street"
  724. city: "New York"
  725. state: "NY"
  726. postalCode: "10021-3100"
  727. }
  728. phoneNumbers: [
  729. {
  730. type: "home"
  731. number: "212 555-1234"
  732. },
  733. {
  734. type: "office"
  735. number: "646 555-4567"
  736. },
  737. {
  738. type: "mobile"
  739. number: "123 456-7890"
  740. }
  741. ]
  742. children: []
  743. spouse: null
  744. `,
  745. }}
  746. opts := cmp.Options{
  747. cmpopts.EquateEmpty(),
  748. // Transform composites (List and Message).
  749. cmp.FilterValues(func(x, y Value) bool {
  750. return (x.Type() == List && y.Type() == List) || (x.Type() == Message && y.Type() == Message)
  751. }, cmp.Transformer("", func(v Value) interface{} {
  752. if v.Type() == List {
  753. return v.List()
  754. } else {
  755. return v.Message()
  756. }
  757. })),
  758. // Compare scalars (Bool, Int, Uint, Float, String, Name).
  759. cmp.FilterValues(func(x, y Value) bool {
  760. return !(x.Type() == List && y.Type() == List) && !(x.Type() == Message && y.Type() == Message)
  761. }, cmp.Comparer(func(x, y Value) bool {
  762. if x.Type() == List || x.Type() == Message || y.Type() == List || y.Type() == Message {
  763. return false
  764. }
  765. // Ensure golden value is always in x variable.
  766. if len(x.raw) > 0 {
  767. x, y = y, x
  768. }
  769. switch x.Type() {
  770. case Bool:
  771. want, _ := x.Bool()
  772. got, ok := y.Bool()
  773. return got == want && ok
  774. case Int:
  775. want, _ := x.Int(true)
  776. got, ok := y.Int(want < math.MinInt32 || math.MaxInt32 < want)
  777. return got == want && ok
  778. case Uint:
  779. want, _ := x.Uint(true)
  780. got, ok := y.Uint(math.MaxUint32 < want)
  781. return got == want && ok
  782. case Float:
  783. want, _ := x.Float(true)
  784. got, ok := y.Float(math.MaxFloat32 < math.Abs(want))
  785. if math.IsNaN(got) || math.IsNaN(want) {
  786. return math.IsNaN(got) == math.IsNaN(want)
  787. }
  788. return got == want && ok
  789. case Name:
  790. want, _ := x.Name()
  791. got, ok := y.Name()
  792. return got == want && ok
  793. default:
  794. return x.String() == y.String()
  795. }
  796. })),
  797. }
  798. for _, tt := range tests {
  799. t.Run("", func(t *testing.T) {
  800. if tt.in != "" || tt.wantVal.Type() != 0 || tt.wantErr != "" {
  801. gotVal, err := Unmarshal([]byte(tt.in))
  802. if err == nil {
  803. if tt.wantErr != "" {
  804. t.Errorf("Unmarshal(): got nil error, want %v", tt.wantErr)
  805. }
  806. } else {
  807. if tt.wantErr == "" {
  808. t.Errorf("Unmarshal(): got %v, want nil error", err)
  809. } else if !strings.Contains(err.Error(), tt.wantErr) {
  810. t.Errorf("Unmarshal(): got %v, want %v", err, tt.wantErr)
  811. }
  812. }
  813. if diff := cmp.Diff(gotVal, tt.wantVal, opts); diff != "" {
  814. t.Errorf("Unmarshal(): output mismatch (-got +want):\n%s", diff)
  815. }
  816. }
  817. if tt.wantOut != "" {
  818. gotOut, err := Marshal(tt.wantVal, "", [2]byte{0, 0}, false)
  819. if err != nil {
  820. t.Errorf("Marshal(): got %v, want nil error", err)
  821. }
  822. if string(gotOut) != tt.wantOut {
  823. t.Errorf("Marshal():\ngot: %s\nwant: %s", gotOut, tt.wantOut)
  824. }
  825. }
  826. if tt.wantOutBracket != "" {
  827. gotOut, err := Marshal(tt.wantVal, "", [2]byte{'<', '>'}, false)
  828. if err != nil {
  829. t.Errorf("Marshal(Bracket): got %v, want nil error", err)
  830. }
  831. if string(gotOut) != tt.wantOutBracket {
  832. t.Errorf("Marshal(Bracket):\ngot: %s\nwant: %s", gotOut, tt.wantOutBracket)
  833. }
  834. }
  835. if tt.wantOutASCII != "" {
  836. gotOut, err := Marshal(tt.wantVal, "", [2]byte{0, 0}, true)
  837. if err != nil {
  838. t.Errorf("Marshal(ASCII): got %v, want nil error", err)
  839. }
  840. if string(gotOut) != tt.wantOutASCII {
  841. t.Errorf("Marshal(ASCII):\ngot: %s\nwant: %s", gotOut, tt.wantOutASCII)
  842. }
  843. }
  844. if tt.wantOutIndent != "" {
  845. gotOut, err := Marshal(tt.wantVal, "\t", [2]byte{0, 0}, false)
  846. if err != nil {
  847. t.Errorf("Marshal(Indent): got %v, want nil error", err)
  848. }
  849. if string(gotOut) != tt.wantOutIndent {
  850. t.Errorf("Marshal(Indent):\ngot: %s\nwant: %s", gotOut, tt.wantOutIndent)
  851. }
  852. }
  853. })
  854. }
  855. }