text_test.go 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877
  1. // Copyright 2018 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package text
  5. import (
  6. "fmt"
  7. "math"
  8. "regexp"
  9. "strings"
  10. "testing"
  11. "unicode/utf8"
  12. "github.com/golang/protobuf/v2/internal/flags"
  13. "github.com/golang/protobuf/v2/reflect/protoreflect"
  14. "github.com/google/go-cmp/cmp"
  15. "github.com/google/go-cmp/cmp/cmpopts"
  16. )
  17. var S = fmt.Sprintf
  18. var V = ValueOf
  19. var ID = func(n protoreflect.Name) Value { return V(n) }
  20. type Lst = []Value
  21. type Msg = [][2]Value
  22. func Test(t *testing.T) {
  23. const space = " \n\r\t"
  24. tests := []struct {
  25. in string
  26. wantVal Value
  27. wantOut string
  28. wantOutBracket string
  29. wantOutASCII string
  30. wantOutIndent string
  31. wantErr string
  32. }{{
  33. in: "",
  34. wantVal: V(Msg{}),
  35. wantOutIndent: "\n",
  36. }, {
  37. in: S("%s# hello%s", space, space),
  38. wantVal: V(Msg{}),
  39. }, {
  40. in: S("%s# hello\rfoo:bar", space),
  41. wantVal: V(Msg{}),
  42. }, {
  43. // Comments only extend until the newline.
  44. in: S("%s# hello\nfoo:bar", space),
  45. wantVal: V(Msg{{ID("foo"), ID("bar")}}),
  46. wantOut: "foo:bar",
  47. wantOutIndent: "foo: bar\n",
  48. }, {
  49. // NUL is an invalid whitespace since C++ uses C-strings.
  50. in: "\x00",
  51. wantErr: `invalid "\x00" as identifier`,
  52. }, {
  53. in: "foo:0",
  54. wantVal: V(Msg{{ID("foo"), V(uint32(0))}}),
  55. wantOut: "foo:0",
  56. }, {
  57. in: S("%sfoo%s:0", space, space),
  58. wantVal: V(Msg{{ID("foo"), V(uint32(0))}}),
  59. }, {
  60. in: "foo bar:0",
  61. wantErr: `expected ':' after message key`,
  62. }, {
  63. in: "[foo]:0",
  64. wantVal: V(Msg{{V("foo"), V(uint32(0))}}),
  65. wantOut: "[foo]:0",
  66. wantOutIndent: "[foo]: 0\n",
  67. }, {
  68. in: S("%s[%sfoo%s]%s:0", space, space, space, space),
  69. wantVal: V(Msg{{V("foo"), V(uint32(0))}}),
  70. }, {
  71. in: "[proto.package.name]:0",
  72. wantVal: V(Msg{{V("proto.package.name"), V(uint32(0))}}),
  73. wantOut: "[proto.package.name]:0",
  74. wantOutIndent: "[proto.package.name]: 0\n",
  75. }, {
  76. in: S("%s[%sproto.package.name%s]%s:0", space, space, space, space),
  77. wantVal: V(Msg{{V("proto.package.name"), V(uint32(0))}}),
  78. }, {
  79. in: "['sub.domain.com\x2fpath\x2fto\x2fproto.package.name']:0",
  80. wantVal: V(Msg{{V("sub.domain.com/path/to/proto.package.name"), V(uint32(0))}}),
  81. wantOut: "[sub.domain.com/path/to/proto.package.name]:0",
  82. wantOutIndent: "[sub.domain.com/path/to/proto.package.name]: 0\n",
  83. }, {
  84. in: "[\"sub.domain.com\x2fpath\x2fto\x2fproto.package.name\"]:0",
  85. wantVal: V(Msg{{V("sub.domain.com/path/to/proto.package.name"), V(uint32(0))}}),
  86. }, {
  87. in: S("%s[%s'sub.domain.com\x2fpath\x2fto\x2fproto.package.name'%s]%s:0", space, space, space, space),
  88. wantVal: V(Msg{{V("sub.domain.com/path/to/proto.package.name"), V(uint32(0))}}),
  89. }, {
  90. in: S("%s[%s\"sub.domain.com\x2fpath\x2fto\x2fproto.package.name\"%s]%s:0", space, space, space, space),
  91. wantVal: V(Msg{{V("sub.domain.com/path/to/proto.package.name"), V(uint32(0))}}),
  92. }, {
  93. in: `['http://example.com/path/to/proto.package.name']:0`,
  94. wantVal: V(Msg{{V("http://example.com/path/to/proto.package.name"), V(uint32(0))}}),
  95. wantOut: `["http://example.com/path/to/proto.package.name"]:0`,
  96. wantOutIndent: `["http://example.com/path/to/proto.package.name"]: 0` + "\n",
  97. }, {
  98. in: "[proto.package.name:0",
  99. wantErr: `invalid character ':', expected ']' at end of extension name`,
  100. }, {
  101. in: "[proto.package name]:0",
  102. wantErr: `invalid character 'n', expected ']' at end of extension name`,
  103. }, {
  104. in: `["proto.package" "name"]:0`,
  105. wantErr: `invalid character '"', expected ']' at end of extension name`,
  106. }, {
  107. in: `["\z"]`,
  108. wantErr: `invalid escape code "\\z" in string`,
  109. }, {
  110. in: "[$]",
  111. wantErr: `invalid "$" as identifier`,
  112. }, {
  113. // This parses fine, but should result in a error later since no
  114. // type name in proto will ever be just a number.
  115. in: "[20]:0",
  116. wantVal: V(Msg{{V("20"), V(uint32(0))}}),
  117. wantOut: "[20]:0",
  118. }, {
  119. in: "20:0",
  120. wantVal: V(Msg{{V(uint32(20)), V(uint32(0))}}),
  121. wantOut: "20:0",
  122. }, {
  123. in: "0x20:0",
  124. wantVal: V(Msg{{V(uint32(0x20)), V(uint32(0))}}),
  125. wantOut: "32:0",
  126. }, {
  127. in: "020:0",
  128. wantVal: V(Msg{{V(uint32(020)), V(uint32(0))}}),
  129. wantOut: "16:0",
  130. }, {
  131. in: "-20:0",
  132. wantErr: `invalid "-20" as identifier`,
  133. }, {
  134. in: `foo:true bar:"s" baz:{} qux:[] wib:id`,
  135. wantVal: V(Msg{
  136. {ID("foo"), V(true)},
  137. {ID("bar"), V("s")},
  138. {ID("baz"), V(Msg{})},
  139. {ID("qux"), V(Lst{})},
  140. {ID("wib"), ID("id")},
  141. }),
  142. wantOut: `foo:true bar:"s" baz:{} qux:[] wib:id`,
  143. wantOutIndent: "foo: true\nbar: \"s\"\nbaz: {}\nqux: []\nwib: id\n",
  144. }, {
  145. in: S(`%sfoo%s:%strue%s %sbar%s:%s"s"%s %sbaz%s:%s<>%s %squx%s:%s[]%s %swib%s:%sid%s`,
  146. space, space, space, space, space, space, space, space, space, space, space, space, space, space, space, space, space, space, space, space),
  147. wantVal: V(Msg{
  148. {ID("foo"), V(true)},
  149. {ID("bar"), V("s")},
  150. {ID("baz"), V(Msg{})},
  151. {ID("qux"), V(Lst{})},
  152. {ID("wib"), ID("id")},
  153. }),
  154. }, {
  155. in: `foo:true;`,
  156. wantVal: V(Msg{{ID("foo"), V(true)}}),
  157. wantOut: "foo:true",
  158. wantOutIndent: "foo: true\n",
  159. }, {
  160. in: `foo:true,`,
  161. wantVal: V(Msg{{ID("foo"), V(true)}}),
  162. }, {
  163. in: `foo:bar;,`,
  164. wantErr: `invalid "," as identifier`,
  165. }, {
  166. in: `foo:bar,;`,
  167. wantErr: `invalid ";" as identifier`,
  168. }, {
  169. in: `footrue`,
  170. wantErr: `unexpected EOF`,
  171. }, {
  172. in: `foo true`,
  173. wantErr: `expected ':' after message key`,
  174. }, {
  175. in: `foo"s"`,
  176. wantErr: `expected ':' after message key`,
  177. }, {
  178. in: `foo "s"`,
  179. wantErr: `expected ':' after message key`,
  180. }, {
  181. in: `foo{}`,
  182. wantVal: V(Msg{{ID("foo"), V(Msg{})}}),
  183. wantOut: "foo:{}",
  184. wantOutBracket: "foo:<>",
  185. wantOutIndent: "foo: {}\n",
  186. }, {
  187. in: `foo {}`,
  188. wantVal: V(Msg{{ID("foo"), V(Msg{})}}),
  189. }, {
  190. in: `foo<>`,
  191. wantVal: V(Msg{{ID("foo"), V(Msg{})}}),
  192. }, {
  193. in: `foo <>`,
  194. wantVal: V(Msg{{ID("foo"), V(Msg{})}}),
  195. }, {
  196. in: `foo[]`,
  197. wantErr: `expected ':' after message key`,
  198. }, {
  199. in: `foo []`,
  200. wantErr: `expected ':' after message key`,
  201. }, {
  202. in: `foo:truebar:true`,
  203. wantErr: `invalid ":" as identifier`,
  204. }, {
  205. in: `foo:"s"bar:true`,
  206. wantVal: V(Msg{{ID("foo"), V("s")}, {ID("bar"), V(true)}}),
  207. wantOut: `foo:"s" bar:true`,
  208. wantOutIndent: "foo: \"s\"\nbar: true\n",
  209. }, {
  210. in: `foo:0bar:true`,
  211. wantErr: `invalid "0bar" as number or bool`,
  212. }, {
  213. in: `foo:{}bar:true`,
  214. wantVal: V(Msg{{ID("foo"), V(Msg{})}, {ID("bar"), V(true)}}),
  215. wantOut: "foo:{} bar:true",
  216. wantOutBracket: "foo:<> bar:true",
  217. wantOutIndent: "foo: {}\nbar: true\n",
  218. }, {
  219. in: `foo:[]bar:true`,
  220. wantVal: V(Msg{{ID("foo"), V(Lst{})}, {ID("bar"), V(true)}}),
  221. }, {
  222. in: `foo{bar:true}`,
  223. wantVal: V(Msg{{ID("foo"), V(Msg{{ID("bar"), V(true)}})}}),
  224. wantOut: "foo:{bar:true}",
  225. wantOutBracket: "foo:<bar:true>",
  226. wantOutIndent: "foo: {\n\tbar: true\n}\n",
  227. }, {
  228. in: `foo<bar:true>`,
  229. wantVal: V(Msg{{ID("foo"), V(Msg{{ID("bar"), V(true)}})}}),
  230. }, {
  231. in: `foo{bar:true,}`,
  232. wantVal: V(Msg{{ID("foo"), V(Msg{{ID("bar"), V(true)}})}}),
  233. }, {
  234. in: `foo{bar:true;}`,
  235. wantVal: V(Msg{{ID("foo"), V(Msg{{ID("bar"), V(true)}})}}),
  236. }, {
  237. in: `foo{`,
  238. wantErr: `unexpected EOF`,
  239. }, {
  240. in: `foo{ `,
  241. wantErr: `unexpected EOF`,
  242. }, {
  243. in: `foo{[`,
  244. wantErr: `unexpected EOF`,
  245. }, {
  246. in: `foo{[ `,
  247. wantErr: `unexpected EOF`,
  248. }, {
  249. in: `foo{bar:true,;}`,
  250. wantErr: `invalid ";" as identifier`,
  251. }, {
  252. in: `foo{bar:true;,}`,
  253. wantErr: `invalid "," as identifier`,
  254. }, {
  255. in: `foo<bar:{}>`,
  256. wantVal: V(Msg{{ID("foo"), V(Msg{{ID("bar"), V(Msg{})}})}}),
  257. wantOut: "foo:{bar:{}}",
  258. wantOutBracket: "foo:<bar:<>>",
  259. wantOutIndent: "foo: {\n\tbar: {}\n}\n",
  260. }, {
  261. in: `foo<bar:{>`,
  262. wantErr: `invalid character '>', expected '}' at end of message`,
  263. }, {
  264. in: `foo<bar:{}`,
  265. wantErr: `unexpected EOF`,
  266. }, {
  267. in: `arr:[]`,
  268. wantVal: V(Msg{{ID("arr"), V(Lst{})}}),
  269. wantOut: "arr:[]",
  270. wantOutBracket: "arr:[]",
  271. wantOutIndent: "arr: []\n",
  272. }, {
  273. in: `arr:[,]`,
  274. wantErr: `invalid "," as number or bool`,
  275. }, {
  276. in: `arr:[0 0]`,
  277. wantErr: `invalid character '0', expected ']' at end of list`,
  278. }, {
  279. in: `arr:["foo" "bar"]`,
  280. wantVal: V(Msg{{ID("arr"), V(Lst{V("foobar")})}}),
  281. wantOut: `arr:["foobar"]`,
  282. wantOutBracket: `arr:["foobar"]`,
  283. wantOutIndent: "arr: [\n\t\"foobar\"\n]\n",
  284. }, {
  285. in: `arr:[0,]`,
  286. wantErr: `invalid "]" as number or bool`,
  287. }, {
  288. in: `arr:[true,0,"",id,[],{}]`,
  289. wantVal: V(Msg{{ID("arr"), V(Lst{
  290. V(true), V(uint32(0)), V(""), ID("id"), V(Lst{}), V(Msg{}),
  291. })}}),
  292. wantOut: `arr:[true,0,"",id,[],{}]`,
  293. wantOutBracket: `arr:[true,0,"",id,[],<>]`,
  294. wantOutIndent: "arr: [\n\ttrue,\n\t0,\n\t\"\",\n\tid,\n\t[],\n\t{}\n]\n",
  295. }, {
  296. in: S(`arr:[%strue%s,%s0%s,%s""%s,%sid%s,%s[]%s,%s{}%s]`,
  297. space, space, space, space, space, space, space, space, space, space, space, space),
  298. wantVal: V(Msg{{ID("arr"), V(Lst{
  299. V(true), V(uint32(0)), V(""), ID("id"), V(Lst{}), V(Msg{}),
  300. })}}),
  301. }, {
  302. in: `arr:[`,
  303. wantErr: `unexpected EOF`,
  304. }, {
  305. in: `{`,
  306. wantErr: `invalid "{" as identifier`,
  307. }, {
  308. in: `<`,
  309. wantErr: `invalid "<" as identifier`,
  310. }, {
  311. in: `[`,
  312. wantErr: "unexpected EOF",
  313. }, {
  314. in: `}`,
  315. wantErr: "1 bytes of unconsumed input",
  316. }, {
  317. in: `>`,
  318. wantErr: "1 bytes of unconsumed input",
  319. }, {
  320. in: `]`,
  321. wantErr: `invalid "]" as identifier`,
  322. }, {
  323. in: `str: "'"`,
  324. wantVal: V(Msg{{ID("str"), V(`'`)}}),
  325. wantOut: `str:"'"`,
  326. }, {
  327. in: `str: '"'`,
  328. wantVal: V(Msg{{ID("str"), V(`"`)}}),
  329. wantOut: `str:"\""`,
  330. }, {
  331. // String that has as few escaped characters as possible.
  332. in: `str: ` + func() string {
  333. var b []byte
  334. for i := 0; i < utf8.RuneSelf; i++ {
  335. switch i {
  336. case 0, '\\', '\n', '\'': // these must be escaped, so ignore them
  337. default:
  338. b = append(b, byte(i))
  339. }
  340. }
  341. return "'" + string(b) + "'"
  342. }(),
  343. wantVal: V(Msg{{ID("str"), V("\x01\x02\x03\x04\x05\x06\a\b\t\v\f\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_`abcdefghijklmnopqrstuvwxyz{|}~\u007f")}}),
  344. wantOut: `str:"\x01\x02\x03\x04\x05\x06\x07\x08\t\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_` + "`abcdefghijklmnopqrstuvwxyz{|}~\x7f\"",
  345. wantOutASCII: `str:"\x01\x02\x03\x04\x05\x06\x07\x08\t\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_` + "`abcdefghijklmnopqrstuvwxyz{|}~\x7f\"",
  346. }, {
  347. in: "str: '\xde\xad\xbe\xef'",
  348. wantVal: V(Msg{{ID("str"), V("\xde\xad\xbe\xef")}}),
  349. wantOut: "str:\"\u07ad\\xbe\\xef\"",
  350. wantOutASCII: `str:"\u07ad\xbe\xef"`,
  351. wantErr: "invalid UTF-8 detected",
  352. }, {
  353. // Valid UTF-8 wire encoding, but sub-optimal encoding.
  354. in: "str: '\xc0\x80'",
  355. wantVal: V(Msg{{ID("str"), V("\xc0\x80")}}),
  356. wantOut: `str:"\xc0\x80"`,
  357. wantOutASCII: `str:"\xc0\x80"`,
  358. wantErr: "invalid UTF-8 detected",
  359. }, {
  360. // Valid UTF-8 wire encoding, but invalid rune (surrogate pair).
  361. in: "str: '\xed\xa0\x80'",
  362. wantVal: V(Msg{{ID("str"), V("\xed\xa0\x80")}}),
  363. wantOut: `str:"\xed\xa0\x80"`,
  364. wantOutASCII: `str:"\xed\xa0\x80"`,
  365. wantErr: "invalid UTF-8 detected",
  366. }, {
  367. // Valid UTF-8 wire encoding, but invalid rune (above max rune).
  368. in: "str: '\xf7\xbf\xbf\xbf'",
  369. wantVal: V(Msg{{ID("str"), V("\xf7\xbf\xbf\xbf")}}),
  370. wantOut: `str:"\xf7\xbf\xbf\xbf"`,
  371. wantOutASCII: `str:"\xf7\xbf\xbf\xbf"`,
  372. wantErr: "invalid UTF-8 detected",
  373. }, {
  374. // Valid UTF-8 wire encoding of the RuneError rune.
  375. in: "str: '\xef\xbf\xbd'",
  376. wantVal: V(Msg{{ID("str"), V(string(utf8.RuneError))}}),
  377. wantOut: `str:"` + string(utf8.RuneError) + `"`,
  378. wantOutASCII: `str:"\ufffd"`,
  379. }, {
  380. in: "str: 'hello\u1234world'",
  381. wantVal: V(Msg{{ID("str"), V("hello\u1234world")}}),
  382. wantOut: "str:\"hello\u1234world\"",
  383. wantOutASCII: `str:"hello\u1234world"`,
  384. }, {
  385. in: `str: '\"\'\\\?\a\b\n\r\t\v\f\1\12\123\xA\xaB\x12\uAb8f\U0010FFFF'`,
  386. wantVal: V(Msg{{ID("str"), V("\"'\\?\a\b\n\r\t\v\f\x01\nS\n\xab\x12\uab8f\U0010ffff")}}),
  387. wantOut: `str:"\"'\\?\x07\x08\n\r\t\x0b\x0c\x01\nS\n\xab\x12` + "\uab8f\U0010ffff" + `"`,
  388. wantOutASCII: `str:"\"'\\?\x07\x08\n\r\t\x0b\x0c\x01\nS\n\xab\x12\uab8f\U0010ffff"`,
  389. }, {
  390. in: `str: '`,
  391. wantErr: `unexpected EOF`,
  392. }, {
  393. in: `str: '\`,
  394. wantErr: `unexpected EOF`,
  395. }, {
  396. in: `str: '\'`,
  397. wantErr: `unexpected EOF`,
  398. }, {
  399. in: `str: '\8'`,
  400. wantErr: `invalid escape code "\\8" in string`,
  401. }, {
  402. in: `str: '\1x'`,
  403. wantVal: V(Msg{{ID("str"), V("\001x")}}),
  404. wantOut: `str:"\x01x"`,
  405. wantOutASCII: `str:"\x01x"`,
  406. }, {
  407. in: `str: '\12x'`,
  408. wantVal: V(Msg{{ID("str"), V("\012x")}}),
  409. wantOut: `str:"\nx"`,
  410. wantOutASCII: `str:"\nx"`,
  411. }, {
  412. in: `str: '\123x'`,
  413. wantVal: V(Msg{{ID("str"), V("\123x")}}),
  414. wantOut: `str:"Sx"`,
  415. wantOutASCII: `str:"Sx"`,
  416. }, {
  417. in: `str: '\1234x'`,
  418. wantVal: V(Msg{{ID("str"), V("\1234x")}}),
  419. wantOut: `str:"S4x"`,
  420. wantOutASCII: `str:"S4x"`,
  421. }, {
  422. in: `str: '\1'`,
  423. wantVal: V(Msg{{ID("str"), V("\001")}}),
  424. wantOut: `str:"\x01"`,
  425. wantOutASCII: `str:"\x01"`,
  426. }, {
  427. in: `str: '\12'`,
  428. wantVal: V(Msg{{ID("str"), V("\012")}}),
  429. wantOut: `str:"\n"`,
  430. wantOutASCII: `str:"\n"`,
  431. }, {
  432. in: `str: '\123'`,
  433. wantVal: V(Msg{{ID("str"), V("\123")}}),
  434. wantOut: `str:"S"`,
  435. wantOutASCII: `str:"S"`,
  436. }, {
  437. in: `str: '\1234'`,
  438. wantVal: V(Msg{{ID("str"), V("\1234")}}),
  439. wantOut: `str:"S4"`,
  440. wantOutASCII: `str:"S4"`,
  441. }, {
  442. in: `str: '\377'`,
  443. wantVal: V(Msg{{ID("str"), V("\377")}}),
  444. wantOut: `str:"\xff"`,
  445. wantOutASCII: `str:"\xff"`,
  446. }, {
  447. // Overflow octal escape.
  448. in: `str: '\400'`,
  449. wantErr: `invalid octal escape code "\\400" in string`,
  450. }, {
  451. in: `str: '\xfx'`,
  452. wantVal: V(Msg{{ID("str"), V("\x0fx")}}),
  453. wantOut: `str:"\x0fx"`,
  454. wantOutASCII: `str:"\x0fx"`,
  455. }, {
  456. in: `str: '\xffx'`,
  457. wantVal: V(Msg{{ID("str"), V("\xffx")}}),
  458. wantOut: `str:"\xffx"`,
  459. wantOutASCII: `str:"\xffx"`,
  460. }, {
  461. in: `str: '\xfffx'`,
  462. wantVal: V(Msg{{ID("str"), V("\xfffx")}}),
  463. wantOut: `str:"\xfffx"`,
  464. wantOutASCII: `str:"\xfffx"`,
  465. }, {
  466. in: `str: '\xf'`,
  467. wantVal: V(Msg{{ID("str"), V("\x0f")}}),
  468. wantOut: `str:"\x0f"`,
  469. wantOutASCII: `str:"\x0f"`,
  470. }, {
  471. in: `str: '\xff'`,
  472. wantVal: V(Msg{{ID("str"), V("\xff")}}),
  473. wantOut: `str:"\xff"`,
  474. wantOutASCII: `str:"\xff"`,
  475. }, {
  476. in: `str: '\xfff'`,
  477. wantVal: V(Msg{{ID("str"), V("\xfff")}}),
  478. wantOut: `str:"\xfff"`,
  479. wantOutASCII: `str:"\xfff"`,
  480. }, {
  481. in: `str: '\xz'`,
  482. wantErr: `invalid hex escape code "\\x" in string`,
  483. }, {
  484. in: `str: '\uPo'`,
  485. wantErr: `unexpected EOF`,
  486. }, {
  487. in: `str: '\uPoo'`,
  488. wantErr: `invalid Unicode escape code "\\uPoo'" in string`,
  489. }, {
  490. in: `str: '\uPoop'`,
  491. wantErr: `invalid Unicode escape code "\\uPoop" in string`,
  492. }, {
  493. // Unmatched surrogate pair.
  494. in: `str: '\uDEAD'`,
  495. wantErr: `unexpected EOF`, // trying to reader other half
  496. }, {
  497. // Surrogate pair with invalid other half.
  498. in: `str: '\uDEAD\u0000'`,
  499. wantErr: `invalid Unicode escape code "\\u0000" in string`,
  500. }, {
  501. // Properly matched surrogate pair.
  502. in: `str: '\uD800\uDEAD'`,
  503. wantVal: V(Msg{{ID("str"), V("𐊭")}}),
  504. wantOut: `str:"𐊭"`,
  505. wantOutASCII: `str:"\U000102ad"`,
  506. }, {
  507. // Overflow on Unicode rune.
  508. in: `str: '\U00110000'`,
  509. wantErr: `invalid Unicode escape code "\\U00110000" in string`,
  510. }, {
  511. in: `str: '\z'`,
  512. wantErr: `invalid escape code "\\z" in string`,
  513. }, {
  514. // Strings cannot have NUL literal since C-style strings forbid them.
  515. in: "str: '\x00'",
  516. wantErr: `invalid character '\x00' in string`,
  517. }, {
  518. // Strings cannot have newline literal. The C++ permits them if an
  519. // option is specified to allow them. In Go, we always forbid them.
  520. in: "str: '\n'",
  521. wantErr: `invalid character '\n' in string`,
  522. }, {
  523. in: "name: \"My name is \"\n\"elsewhere\"",
  524. wantVal: V(Msg{{ID("name"), V("My name is elsewhere")}}),
  525. wantOut: `name:"My name is elsewhere"`,
  526. wantOutASCII: `name:"My name is elsewhere"`,
  527. }, {
  528. in: "name: 'My name is '\n'elsewhere'",
  529. wantVal: V(Msg{{ID("name"), V("My name is elsewhere")}}),
  530. }, {
  531. in: "name: 'My name is '\n\"elsewhere\"",
  532. wantVal: V(Msg{{ID("name"), V("My name is elsewhere")}}),
  533. }, {
  534. in: "name: \"My name is \"\n'elsewhere'",
  535. wantVal: V(Msg{{ID("name"), V("My name is elsewhere")}}),
  536. }, {
  537. in: "name: \"My \"'name '\"is \"\n'elsewhere'",
  538. wantVal: V(Msg{{ID("name"), V("My name is elsewhere")}}),
  539. }, {
  540. in: `crazy:"x'"'\""\''"'z"`,
  541. wantVal: V(Msg{{ID("crazy"), V(`x'""''z`)}}),
  542. }, {
  543. in: `nums: [t,T,true,True,TRUE,f,F,false,False,FALSE]`,
  544. wantVal: V(Msg{{ID("nums"), V(Lst{
  545. V(true),
  546. ID("T"),
  547. V(true),
  548. V(true),
  549. ID("TRUE"),
  550. V(false),
  551. ID("F"),
  552. V(false),
  553. V(false),
  554. ID("FALSE"),
  555. })}}),
  556. wantOut: "nums:[true,T,true,true,TRUE,false,F,false,false,FALSE]",
  557. wantOutIndent: "nums: [\n\ttrue,\n\tT,\n\ttrue,\n\ttrue,\n\tTRUE,\n\tfalse,\n\tF,\n\tfalse,\n\tfalse,\n\tFALSE\n]\n",
  558. }, {
  559. in: `nums: [nan,inf,-inf,NaN,NAN,Inf,INF]`,
  560. wantVal: V(Msg{{ID("nums"), V(Lst{
  561. V(math.NaN()),
  562. V(math.Inf(+1)),
  563. V(math.Inf(-1)),
  564. ID("NaN"),
  565. ID("NAN"),
  566. ID("Inf"),
  567. ID("INF"),
  568. })}}),
  569. wantOut: "nums:[nan,inf,-inf,NaN,NAN,Inf,INF]",
  570. wantOutIndent: "nums: [\n\tnan,\n\tinf,\n\t-inf,\n\tNaN,\n\tNAN,\n\tInf,\n\tINF\n]\n",
  571. }, {
  572. // C++ permits this, but we currently reject this.
  573. in: `num: -nan`,
  574. wantErr: `invalid "-nan" as number or bool`,
  575. }, {
  576. in: `nums: [0,-0,-9876543210,9876543210,0x0,0x0123456789abcdef,-0x0123456789abcdef,01234567,-01234567]`,
  577. wantVal: V(Msg{{ID("nums"), V(Lst{
  578. V(uint32(0)),
  579. V(int32(-0)),
  580. V(int64(-9876543210)),
  581. V(uint64(9876543210)),
  582. V(uint32(0x0)),
  583. V(uint64(0x0123456789abcdef)),
  584. V(int64(-0x0123456789abcdef)),
  585. V(uint64(01234567)),
  586. V(int64(-01234567)),
  587. })}}),
  588. wantOut: "nums:[0,0,-9876543210,9876543210,0,81985529216486895,-81985529216486895,342391,-342391]",
  589. wantOutIndent: "nums: [\n\t0,\n\t0,\n\t-9876543210,\n\t9876543210,\n\t0,\n\t81985529216486895,\n\t-81985529216486895,\n\t342391,\n\t-342391\n]\n",
  590. }, {
  591. in: `nums: [0.,0f,1f,10f,-0f,-1f,-10f,1.0,0.1e-3,1.5e+5,1e10,.0]`,
  592. wantVal: V(Msg{{ID("nums"), V(Lst{
  593. V(0.0),
  594. V(0.0),
  595. V(1.0),
  596. V(10.0),
  597. V(-0.0),
  598. V(-1.0),
  599. V(-10.0),
  600. V(1.0),
  601. V(0.1e-3),
  602. V(1.5e+5),
  603. V(1.0e+10),
  604. V(0.0),
  605. })}}),
  606. wantOut: "nums:[0,0,1,10,0,-1,-10,1,0.0001,150000,1e+10,0]",
  607. wantOutIndent: "nums: [\n\t0,\n\t0,\n\t1,\n\t10,\n\t0,\n\t-1,\n\t-10,\n\t1,\n\t0.0001,\n\t150000,\n\t1e+10,\n\t0\n]\n",
  608. }, {
  609. in: `nums: [0xbeefbeef,0xbeefbeefbeefbeef]`,
  610. wantVal: V(Msg{{ID("nums"), func() Value {
  611. if flags.Proto1Legacy {
  612. return V(Lst{V(int32(-1091584273)), V(int64(-4688318750159552785))})
  613. } else {
  614. return V(Lst{V(uint32(0xbeefbeef)), V(uint64(0xbeefbeefbeefbeef))})
  615. }
  616. }()}}),
  617. }, {
  618. in: `num: +0`,
  619. wantErr: `invalid "+0" as number or bool`,
  620. }, {
  621. in: `num: 01.1234`,
  622. wantErr: `invalid "01.1234" as number or bool`,
  623. }, {
  624. in: `num: 0x`,
  625. wantErr: `invalid "0x" as number or bool`,
  626. }, {
  627. in: `num: 0xX`,
  628. wantErr: `invalid "0xX" as number or bool`,
  629. }, {
  630. in: `num: 0800`,
  631. wantErr: `invalid "0800" as number or bool`,
  632. }, {
  633. in: `num: true.`,
  634. wantErr: `invalid "true." as number or bool`,
  635. }, {
  636. in: `num: .`,
  637. wantErr: `parsing ".": invalid syntax`,
  638. }, {
  639. in: `num: -.`,
  640. wantErr: `parsing "-.": invalid syntax`,
  641. }, {
  642. in: `num: 1e10000`,
  643. wantErr: `parsing "1e10000": value out of range`,
  644. }, {
  645. in: `num: 99999999999999999999`,
  646. wantErr: `parsing "99999999999999999999": value out of range`,
  647. }, {
  648. in: `num: -99999999999999999999`,
  649. wantErr: `parsing "-99999999999999999999": value out of range`,
  650. }, {
  651. in: "x: -",
  652. wantErr: `syntax error (line 1:5)`,
  653. }, {
  654. in: "x:[\"💩\"x",
  655. wantErr: `syntax error (line 1:7)`,
  656. }, {
  657. in: "x:\n\n[\"🔥🔥🔥\"x",
  658. wantErr: `syntax error (line 3:7)`,
  659. }, {
  660. in: "x:[\"👍🏻👍🏿\"x",
  661. wantErr: `syntax error (line 1:10)`, // multi-rune emojis; could be column:8
  662. }, {
  663. in: `
  664. firstName : "John",
  665. lastName : "Smith" ,
  666. isAlive : true,
  667. age : 27,
  668. address { # missing colon is okay for messages
  669. streetAddress : "21 2nd Street" ,
  670. city : "New York" ,
  671. state : "NY" ,
  672. postalCode : "10021-3100" ; # trailing semicolon is okay
  673. },
  674. phoneNumbers : [ {
  675. type : "home" ,
  676. number : "212 555-1234"
  677. } , {
  678. type : "office" ,
  679. number : "646 555-4567"
  680. } , {
  681. type : "mobile" ,
  682. number : "123 456-7890" , # trailing comma is okay
  683. } ],
  684. children : [] ,
  685. spouse : null`,
  686. wantVal: V(Msg{
  687. {ID("firstName"), V("John")},
  688. {ID("lastName"), V("Smith")},
  689. {ID("isAlive"), V(true)},
  690. {ID("age"), V(27.0)},
  691. {ID("address"), V(Msg{
  692. {ID("streetAddress"), V("21 2nd Street")},
  693. {ID("city"), V("New York")},
  694. {ID("state"), V("NY")},
  695. {ID("postalCode"), V("10021-3100")},
  696. })},
  697. {ID("phoneNumbers"), V([]Value{
  698. V(Msg{
  699. {ID("type"), V("home")},
  700. {ID("number"), V("212 555-1234")},
  701. }),
  702. V(Msg{
  703. {ID("type"), V("office")},
  704. {ID("number"), V("646 555-4567")},
  705. }),
  706. V(Msg{
  707. {ID("type"), V("mobile")},
  708. {ID("number"), V("123 456-7890")},
  709. }),
  710. })},
  711. {ID("children"), V([]Value{})},
  712. {ID("spouse"), V(protoreflect.Name("null"))},
  713. }),
  714. wantOut: `firstName:"John" lastName:"Smith" isAlive:true age:27 address:{streetAddress:"21 2nd Street" city:"New York" state:"NY" postalCode:"10021-3100"} phoneNumbers:[{type:"home" number:"212 555-1234"},{type:"office" number:"646 555-4567"},{type:"mobile" number:"123 456-7890"}] children:[] spouse:null`,
  715. wantOutBracket: `firstName:"John" lastName:"Smith" isAlive:true age:27 address:<streetAddress:"21 2nd Street" city:"New York" state:"NY" postalCode:"10021-3100"> phoneNumbers:[<type:"home" number:"212 555-1234">,<type:"office" number:"646 555-4567">,<type:"mobile" number:"123 456-7890">] children:[] spouse:null`,
  716. wantOutIndent: `firstName: "John"
  717. lastName: "Smith"
  718. isAlive: true
  719. age: 27
  720. address: {
  721. streetAddress: "21 2nd Street"
  722. city: "New York"
  723. state: "NY"
  724. postalCode: "10021-3100"
  725. }
  726. phoneNumbers: [
  727. {
  728. type: "home"
  729. number: "212 555-1234"
  730. },
  731. {
  732. type: "office"
  733. number: "646 555-4567"
  734. },
  735. {
  736. type: "mobile"
  737. number: "123 456-7890"
  738. }
  739. ]
  740. children: []
  741. spouse: null
  742. `,
  743. }}
  744. opts := cmp.Options{
  745. cmpopts.EquateEmpty(),
  746. // Transform composites (List and Message).
  747. cmp.FilterValues(func(x, y Value) bool {
  748. return (x.Type() == List && y.Type() == List) || (x.Type() == Message && y.Type() == Message)
  749. }, cmp.Transformer("", func(v Value) interface{} {
  750. if v.Type() == List {
  751. return v.List()
  752. } else {
  753. return v.Message()
  754. }
  755. })),
  756. // Compare scalars (Bool, Int, Uint, Float, String, Name).
  757. cmp.FilterValues(func(x, y Value) bool {
  758. return !(x.Type() == List && y.Type() == List) && !(x.Type() == Message && y.Type() == Message)
  759. }, cmp.Comparer(func(x, y Value) bool {
  760. if x.Type() == List || x.Type() == Message || y.Type() == List || y.Type() == Message {
  761. return false
  762. }
  763. // Ensure golden value is always in x variable.
  764. if len(x.raw) > 0 {
  765. x, y = y, x
  766. }
  767. switch x.Type() {
  768. case Bool:
  769. want, _ := x.Bool()
  770. got, ok := y.Bool()
  771. return got == want && ok
  772. case Int:
  773. want, _ := x.Int(true)
  774. got, ok := y.Int(want < math.MinInt32 || math.MaxInt32 < want)
  775. return got == want && ok
  776. case Uint:
  777. want, _ := x.Uint(true)
  778. got, ok := y.Uint(math.MaxUint32 < want)
  779. return got == want && ok
  780. case Float:
  781. want, _ := x.Float(true)
  782. got, ok := y.Float(math.MaxFloat32 < math.Abs(want))
  783. if math.IsNaN(got) || math.IsNaN(want) {
  784. return math.IsNaN(got) == math.IsNaN(want)
  785. }
  786. return got == want && ok
  787. case Name:
  788. want, _ := x.Name()
  789. got, ok := y.Name()
  790. return got == want && ok
  791. default:
  792. return x.String() == y.String()
  793. }
  794. })),
  795. }
  796. for _, tt := range tests {
  797. t.Run("", func(t *testing.T) {
  798. if tt.in != "" || tt.wantVal.Type() != 0 || tt.wantErr != "" {
  799. gotVal, err := Unmarshal([]byte(tt.in))
  800. if err == nil {
  801. if tt.wantErr != "" {
  802. t.Errorf("Unmarshal(): got nil error, want %v", tt.wantErr)
  803. }
  804. } else {
  805. if tt.wantErr == "" {
  806. t.Errorf("Unmarshal(): got %v, want nil error", err)
  807. } else if !strings.Contains(err.Error(), tt.wantErr) {
  808. t.Errorf("Unmarshal(): got %v, want %v", err, tt.wantErr)
  809. }
  810. }
  811. if diff := cmp.Diff(gotVal, tt.wantVal, opts); diff != "" {
  812. t.Errorf("Unmarshal(): output mismatch (-got +want):\n%s", diff)
  813. }
  814. }
  815. if tt.wantOut != "" {
  816. gotOut, err := Marshal(tt.wantVal, "", [2]byte{0, 0}, false)
  817. if err != nil {
  818. t.Errorf("Marshal(): got %v, want nil error", err)
  819. }
  820. if removeRandomSpace(gotOut, false) != tt.wantOut {
  821. t.Errorf("Marshal():\ngot: %s\nwant: %s", gotOut, tt.wantOut)
  822. }
  823. }
  824. if tt.wantOutBracket != "" {
  825. gotOut, err := Marshal(tt.wantVal, "", [2]byte{'<', '>'}, false)
  826. if err != nil {
  827. t.Errorf("Marshal(Bracket): got %v, want nil error", err)
  828. }
  829. if removeRandomSpace(gotOut, false) != tt.wantOutBracket {
  830. t.Errorf("Marshal(Bracket):\ngot: %s\nwant: %s", gotOut, tt.wantOutBracket)
  831. }
  832. }
  833. if tt.wantOutASCII != "" {
  834. gotOut, err := Marshal(tt.wantVal, "", [2]byte{0, 0}, true)
  835. if err != nil {
  836. t.Errorf("Marshal(ASCII): got %v, want nil error", err)
  837. }
  838. if removeRandomSpace(gotOut, false) != tt.wantOutASCII {
  839. t.Errorf("Marshal(ASCII):\ngot: %s\nwant: %s", gotOut, tt.wantOutASCII)
  840. }
  841. }
  842. if tt.wantOutIndent != "" {
  843. gotOut, err := Marshal(tt.wantVal, "\t", [2]byte{0, 0}, false)
  844. if err != nil {
  845. t.Errorf("Marshal(Indent): got %v, want nil error", err)
  846. }
  847. if removeRandomSpace(gotOut, true) != tt.wantOutIndent {
  848. t.Errorf("Marshal(Indent):\ngot: %s\nwant: %s", gotOut, tt.wantOutIndent)
  849. }
  850. }
  851. })
  852. }
  853. }
  854. var expandedRE = regexp.MustCompile(": +")
  855. // This works only for the test cases above.
  856. func removeRandomSpace(b []byte, useIndent bool) string {
  857. s := string(b)
  858. if useIndent {
  859. return expandedRE.ReplaceAllString(s, ": ")
  860. }
  861. s = strings.Replace(s, " ", " ", -1)
  862. s = strings.Replace(s, " }", "}", -1)
  863. s = strings.Replace(s, " >", ">", -1)
  864. return strings.TrimRight(s, " ")
  865. }