123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184 |
- // Copyright 2015 The Go Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style
- // license that can be found in the LICENSE file.
- package rangetable
- import (
- "testing"
- "unicode"
- )
- var (
- maxRuneTable = &unicode.RangeTable{
- R32: []unicode.Range32{
- {unicode.MaxRune, unicode.MaxRune, 1},
- },
- }
- overlap1 = &unicode.RangeTable{
- R16: []unicode.Range16{
- {0x100, 0xfffc, 4},
- },
- R32: []unicode.Range32{
- {0x100000, 0x10fffc, 4},
- },
- }
- overlap2 = &unicode.RangeTable{
- R16: []unicode.Range16{
- {0x101, 0xfffd, 4},
- },
- R32: []unicode.Range32{
- {0x100001, 0x10fffd, 3},
- },
- }
- // The following table should be compacted into two entries for R16 and R32.
- optimize = &unicode.RangeTable{
- R16: []unicode.Range16{
- {0x1, 0x1, 1},
- {0x2, 0x2, 1},
- {0x3, 0x3, 1},
- {0x5, 0x5, 1},
- {0x7, 0x7, 1},
- {0x9, 0x9, 1},
- {0xb, 0xf, 2},
- },
- R32: []unicode.Range32{
- {0x10001, 0x10001, 1},
- {0x10002, 0x10002, 1},
- {0x10003, 0x10003, 1},
- {0x10005, 0x10005, 1},
- {0x10007, 0x10007, 1},
- {0x10009, 0x10009, 1},
- {0x1000b, 0x1000f, 2},
- },
- }
- )
- func TestMerge(t *testing.T) {
- for i, tt := range [][]*unicode.RangeTable{
- {unicode.Cc, unicode.Cf},
- {unicode.L, unicode.Ll},
- {unicode.L, unicode.Ll, unicode.Lu},
- {unicode.Ll, unicode.Lu},
- {unicode.M},
- unicode.GraphicRanges,
- cased,
- // Merge R16 only and R32 only and vice versa.
- {unicode.Khmer, unicode.Khudawadi},
- {unicode.Imperial_Aramaic, unicode.Radical},
- // Merge with empty.
- {&unicode.RangeTable{}},
- {&unicode.RangeTable{}, &unicode.RangeTable{}},
- {&unicode.RangeTable{}, &unicode.RangeTable{}, &unicode.RangeTable{}},
- {&unicode.RangeTable{}, unicode.Hiragana},
- {unicode.Inherited, &unicode.RangeTable{}},
- {&unicode.RangeTable{}, unicode.Hanunoo, &unicode.RangeTable{}},
- // Hypothetical tables.
- {maxRuneTable},
- {overlap1, overlap2},
- // Optimization
- {optimize},
- } {
- rt := Merge(tt...)
- for r := rune(0); r <= unicode.MaxRune; r++ {
- if got, want := unicode.Is(rt, r), unicode.In(r, tt...); got != want {
- t.Fatalf("%d:%U: got %v; want %v", i, r, got, want)
- }
- }
- // Test optimization and correctness for R16.
- for k := 0; k < len(rt.R16)-1; k++ {
- if lo, hi := rt.R16[k].Lo, rt.R16[k].Hi; lo > hi {
- t.Errorf("%d: Lo (%x) > Hi (%x)", i, lo, hi)
- }
- if hi, lo := rt.R16[k].Hi, rt.R16[k+1].Lo; hi >= lo {
- t.Errorf("%d: Hi (%x) >= next Lo (%x)", i, hi, lo)
- }
- if rt.R16[k].Hi+rt.R16[k].Stride == rt.R16[k+1].Lo {
- t.Errorf("%d: missed optimization for R16 at %d between %X and %x",
- i, k, rt.R16[k], rt.R16[k+1])
- }
- }
- // Test optimization and correctness for R32.
- for k := 0; k < len(rt.R32)-1; k++ {
- if lo, hi := rt.R32[k].Lo, rt.R32[k].Hi; lo > hi {
- t.Errorf("%d: Lo (%x) > Hi (%x)", i, lo, hi)
- }
- if hi, lo := rt.R32[k].Hi, rt.R32[k+1].Lo; hi >= lo {
- t.Errorf("%d: Hi (%x) >= next Lo (%x)", i, hi, lo)
- }
- if rt.R32[k].Hi+rt.R32[k].Stride == rt.R32[k+1].Lo {
- t.Errorf("%d: missed optimization for R32 at %d between %X and %X",
- i, k, rt.R32[k], rt.R32[k+1])
- }
- }
- }
- }
- const runes = "Hello World in 2015!,\U0010fffd"
- func BenchmarkNotMerged(t *testing.B) {
- for i := 0; i < t.N; i++ {
- for _, r := range runes {
- unicode.In(r, unicode.GraphicRanges...)
- }
- }
- }
- func BenchmarkMerged(t *testing.B) {
- rt := Merge(unicode.GraphicRanges...)
- for i := 0; i < t.N; i++ {
- for _, r := range runes {
- unicode.Is(rt, r)
- }
- }
- }
- var cased = []*unicode.RangeTable{
- unicode.Lower,
- unicode.Upper,
- unicode.Title,
- unicode.Other_Lowercase,
- unicode.Other_Uppercase,
- }
- func BenchmarkNotMergedCased(t *testing.B) {
- for i := 0; i < t.N; i++ {
- for _, r := range runes {
- unicode.In(r, cased...)
- }
- }
- }
- func BenchmarkMergedCased(t *testing.B) {
- // This reduces len(R16) from 243 to 82 and len(R32) from 65 to 35 for
- // Unicode 7.0.0.
- rt := Merge(cased...)
- for i := 0; i < t.N; i++ {
- for _, r := range runes {
- unicode.Is(rt, r)
- }
- }
- }
- func BenchmarkInit(t *testing.B) {
- for i := 0; i < t.N; i++ {
- Merge(cased...)
- Merge(unicode.GraphicRanges...)
- }
- }
- func BenchmarkInit2(t *testing.B) {
- // Hypothetical near-worst-case performance.
- for i := 0; i < t.N; i++ {
- Merge(overlap1, overlap2)
- }
- }
|