12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058 |
- // Copyright 2015 The Go Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style
- // license that can be found in the LICENSE file.
- package bidi
- import "log"
- // This implementation is a port based on the reference implementation found at:
- // https://www.unicode.org/Public/PROGRAMS/BidiReferenceJava/
- //
- // described in Unicode Bidirectional Algorithm (UAX #9).
- //
- // Input:
- // There are two levels of input to the algorithm, since clients may prefer to
- // supply some information from out-of-band sources rather than relying on the
- // default behavior.
- //
- // - Bidi class array
- // - Bidi class array, with externally supplied base line direction
- //
- // Output:
- // Output is separated into several stages:
- //
- // - levels array over entire paragraph
- // - reordering array over entire paragraph
- // - levels array over line
- // - reordering array over line
- //
- // Note that for conformance to the Unicode Bidirectional Algorithm,
- // implementations are only required to generate correct reordering and
- // character directionality (odd or even levels) over a line. Generating
- // identical level arrays over a line is not required. Bidi explicit format
- // codes (LRE, RLE, LRO, RLO, PDF) and BN can be assigned arbitrary levels and
- // positions as long as the rest of the input is properly reordered.
- //
- // As the algorithm is defined to operate on a single paragraph at a time, this
- // implementation is written to handle single paragraphs. Thus rule P1 is
- // presumed by this implementation-- the data provided to the implementation is
- // assumed to be a single paragraph, and either contains no 'B' codes, or a
- // single 'B' code at the end of the input. 'B' is allowed as input to
- // illustrate how the algorithm assigns it a level.
- //
- // Also note that rules L3 and L4 depend on the rendering engine that uses the
- // result of the bidi algorithm. This implementation assumes that the rendering
- // engine expects combining marks in visual order (e.g. to the left of their
- // base character in RTL runs) and that it adjusts the glyphs used to render
- // mirrored characters that are in RTL runs so that they render appropriately.
- // level is the embedding level of a character. Even embedding levels indicate
- // left-to-right order and odd levels indicate right-to-left order. The special
- // level of -1 is reserved for undefined order.
- type level int8
- const implicitLevel level = -1
- // in returns if x is equal to any of the values in set.
- func (c Class) in(set ...Class) bool {
- for _, s := range set {
- if c == s {
- return true
- }
- }
- return false
- }
- // A paragraph contains the state of a paragraph.
- type paragraph struct {
- initialTypes []Class
- // Arrays of properties needed for paired bracket evaluation in N0
- pairTypes []bracketType // paired Bracket types for paragraph
- pairValues []rune // rune for opening bracket or pbOpen and pbClose; 0 for pbNone
- embeddingLevel level // default: = implicitLevel;
- // at the paragraph levels
- resultTypes []Class
- resultLevels []level
- // Index of matching PDI for isolate initiator characters. For other
- // characters, the value of matchingPDI will be set to -1. For isolate
- // initiators with no matching PDI, matchingPDI will be set to the length of
- // the input string.
- matchingPDI []int
- // Index of matching isolate initiator for PDI characters. For other
- // characters, and for PDIs with no matching isolate initiator, the value of
- // matchingIsolateInitiator will be set to -1.
- matchingIsolateInitiator []int
- }
- // newParagraph initializes a paragraph. The user needs to supply a few arrays
- // corresponding to the preprocessed text input. The types correspond to the
- // Unicode BiDi classes for each rune. pairTypes indicates the bracket type for
- // each rune. pairValues provides a unique bracket class identifier for each
- // rune (suggested is the rune of the open bracket for opening and matching
- // close brackets, after normalization). The embedding levels are optional, but
- // may be supplied to encode embedding levels of styled text.
- //
- // TODO: return an error.
- func newParagraph(types []Class, pairTypes []bracketType, pairValues []rune, levels level) *paragraph {
- validateTypes(types)
- validatePbTypes(pairTypes)
- validatePbValues(pairValues, pairTypes)
- validateParagraphEmbeddingLevel(levels)
- p := ¶graph{
- initialTypes: append([]Class(nil), types...),
- embeddingLevel: levels,
- pairTypes: pairTypes,
- pairValues: pairValues,
- resultTypes: append([]Class(nil), types...),
- }
- p.run()
- return p
- }
- func (p *paragraph) Len() int { return len(p.initialTypes) }
- // The algorithm. Does not include line-based processing (Rules L1, L2).
- // These are applied later in the line-based phase of the algorithm.
- func (p *paragraph) run() {
- p.determineMatchingIsolates()
- // 1) determining the paragraph level
- // Rule P1 is the requirement for entering this algorithm.
- // Rules P2, P3.
- // If no externally supplied paragraph embedding level, use default.
- if p.embeddingLevel == implicitLevel {
- p.embeddingLevel = p.determineParagraphEmbeddingLevel(0, p.Len())
- }
- // Initialize result levels to paragraph embedding level.
- p.resultLevels = make([]level, p.Len())
- setLevels(p.resultLevels, p.embeddingLevel)
- // 2) Explicit levels and directions
- // Rules X1-X8.
- p.determineExplicitEmbeddingLevels()
- // Rule X9.
- // We do not remove the embeddings, the overrides, the PDFs, and the BNs
- // from the string explicitly. But they are not copied into isolating run
- // sequences when they are created, so they are removed for all
- // practical purposes.
- // Rule X10.
- // Run remainder of algorithm one isolating run sequence at a time
- for _, seq := range p.determineIsolatingRunSequences() {
- // 3) resolving weak types
- // Rules W1-W7.
- seq.resolveWeakTypes()
- // 4a) resolving paired brackets
- // Rule N0
- resolvePairedBrackets(seq)
- // 4b) resolving neutral types
- // Rules N1-N3.
- seq.resolveNeutralTypes()
- // 5) resolving implicit embedding levels
- // Rules I1, I2.
- seq.resolveImplicitLevels()
- // Apply the computed levels and types
- seq.applyLevelsAndTypes()
- }
- // Assign appropriate levels to 'hide' LREs, RLEs, LROs, RLOs, PDFs, and
- // BNs. This is for convenience, so the resulting level array will have
- // a value for every character.
- p.assignLevelsToCharactersRemovedByX9()
- }
- // determineMatchingIsolates determines the matching PDI for each isolate
- // initiator and vice versa.
- //
- // Definition BD9.
- //
- // At the end of this function:
- //
- // - The member variable matchingPDI is set to point to the index of the
- // matching PDI character for each isolate initiator character. If there is
- // no matching PDI, it is set to the length of the input text. For other
- // characters, it is set to -1.
- // - The member variable matchingIsolateInitiator is set to point to the
- // index of the matching isolate initiator character for each PDI character.
- // If there is no matching isolate initiator, or the character is not a PDI,
- // it is set to -1.
- func (p *paragraph) determineMatchingIsolates() {
- p.matchingPDI = make([]int, p.Len())
- p.matchingIsolateInitiator = make([]int, p.Len())
- for i := range p.matchingIsolateInitiator {
- p.matchingIsolateInitiator[i] = -1
- }
- for i := range p.matchingPDI {
- p.matchingPDI[i] = -1
- if t := p.resultTypes[i]; t.in(LRI, RLI, FSI) {
- depthCounter := 1
- for j := i + 1; j < p.Len(); j++ {
- if u := p.resultTypes[j]; u.in(LRI, RLI, FSI) {
- depthCounter++
- } else if u == PDI {
- if depthCounter--; depthCounter == 0 {
- p.matchingPDI[i] = j
- p.matchingIsolateInitiator[j] = i
- break
- }
- }
- }
- if p.matchingPDI[i] == -1 {
- p.matchingPDI[i] = p.Len()
- }
- }
- }
- }
- // determineParagraphEmbeddingLevel reports the resolved paragraph direction of
- // the substring limited by the given range [start, end).
- //
- // Determines the paragraph level based on rules P2, P3. This is also used
- // in rule X5c to find if an FSI should resolve to LRI or RLI.
- func (p *paragraph) determineParagraphEmbeddingLevel(start, end int) level {
- var strongType Class = unknownClass
- // Rule P2.
- for i := start; i < end; i++ {
- if t := p.resultTypes[i]; t.in(L, AL, R) {
- strongType = t
- break
- } else if t.in(FSI, LRI, RLI) {
- i = p.matchingPDI[i] // skip over to the matching PDI
- if i > end {
- log.Panic("assert (i <= end)")
- }
- }
- }
- // Rule P3.
- switch strongType {
- case unknownClass: // none found
- // default embedding level when no strong types found is 0.
- return 0
- case L:
- return 0
- default: // AL, R
- return 1
- }
- }
- const maxDepth = 125
- // This stack will store the embedding levels and override and isolated
- // statuses
- type directionalStatusStack struct {
- stackCounter int
- embeddingLevelStack [maxDepth + 1]level
- overrideStatusStack [maxDepth + 1]Class
- isolateStatusStack [maxDepth + 1]bool
- }
- func (s *directionalStatusStack) empty() { s.stackCounter = 0 }
- func (s *directionalStatusStack) pop() { s.stackCounter-- }
- func (s *directionalStatusStack) depth() int { return s.stackCounter }
- func (s *directionalStatusStack) push(level level, overrideStatus Class, isolateStatus bool) {
- s.embeddingLevelStack[s.stackCounter] = level
- s.overrideStatusStack[s.stackCounter] = overrideStatus
- s.isolateStatusStack[s.stackCounter] = isolateStatus
- s.stackCounter++
- }
- func (s *directionalStatusStack) lastEmbeddingLevel() level {
- return s.embeddingLevelStack[s.stackCounter-1]
- }
- func (s *directionalStatusStack) lastDirectionalOverrideStatus() Class {
- return s.overrideStatusStack[s.stackCounter-1]
- }
- func (s *directionalStatusStack) lastDirectionalIsolateStatus() bool {
- return s.isolateStatusStack[s.stackCounter-1]
- }
- // Determine explicit levels using rules X1 - X8
- func (p *paragraph) determineExplicitEmbeddingLevels() {
- var stack directionalStatusStack
- var overflowIsolateCount, overflowEmbeddingCount, validIsolateCount int
- // Rule X1.
- stack.push(p.embeddingLevel, ON, false)
- for i, t := range p.resultTypes {
- // Rules X2, X3, X4, X5, X5a, X5b, X5c
- switch t {
- case RLE, LRE, RLO, LRO, RLI, LRI, FSI:
- isIsolate := t.in(RLI, LRI, FSI)
- isRTL := t.in(RLE, RLO, RLI)
- // override if this is an FSI that resolves to RLI
- if t == FSI {
- isRTL = (p.determineParagraphEmbeddingLevel(i+1, p.matchingPDI[i]) == 1)
- }
- if isIsolate {
- p.resultLevels[i] = stack.lastEmbeddingLevel()
- if stack.lastDirectionalOverrideStatus() != ON {
- p.resultTypes[i] = stack.lastDirectionalOverrideStatus()
- }
- }
- var newLevel level
- if isRTL {
- // least greater odd
- newLevel = (stack.lastEmbeddingLevel() + 1) | 1
- } else {
- // least greater even
- newLevel = (stack.lastEmbeddingLevel() + 2) &^ 1
- }
- if newLevel <= maxDepth && overflowIsolateCount == 0 && overflowEmbeddingCount == 0 {
- if isIsolate {
- validIsolateCount++
- }
- // Push new embedding level, override status, and isolated
- // status.
- // No check for valid stack counter, since the level check
- // suffices.
- switch t {
- case LRO:
- stack.push(newLevel, L, isIsolate)
- case RLO:
- stack.push(newLevel, R, isIsolate)
- default:
- stack.push(newLevel, ON, isIsolate)
- }
- // Not really part of the spec
- if !isIsolate {
- p.resultLevels[i] = newLevel
- }
- } else {
- // This is an invalid explicit formatting character,
- // so apply the "Otherwise" part of rules X2-X5b.
- if isIsolate {
- overflowIsolateCount++
- } else { // !isIsolate
- if overflowIsolateCount == 0 {
- overflowEmbeddingCount++
- }
- }
- }
- // Rule X6a
- case PDI:
- if overflowIsolateCount > 0 {
- overflowIsolateCount--
- } else if validIsolateCount == 0 {
- // do nothing
- } else {
- overflowEmbeddingCount = 0
- for !stack.lastDirectionalIsolateStatus() {
- stack.pop()
- }
- stack.pop()
- validIsolateCount--
- }
- p.resultLevels[i] = stack.lastEmbeddingLevel()
- // Rule X7
- case PDF:
- // Not really part of the spec
- p.resultLevels[i] = stack.lastEmbeddingLevel()
- if overflowIsolateCount > 0 {
- // do nothing
- } else if overflowEmbeddingCount > 0 {
- overflowEmbeddingCount--
- } else if !stack.lastDirectionalIsolateStatus() && stack.depth() >= 2 {
- stack.pop()
- }
- case B: // paragraph separator.
- // Rule X8.
- // These values are reset for clarity, in this implementation B
- // can only occur as the last code in the array.
- stack.empty()
- overflowIsolateCount = 0
- overflowEmbeddingCount = 0
- validIsolateCount = 0
- p.resultLevels[i] = p.embeddingLevel
- default:
- p.resultLevels[i] = stack.lastEmbeddingLevel()
- if stack.lastDirectionalOverrideStatus() != ON {
- p.resultTypes[i] = stack.lastDirectionalOverrideStatus()
- }
- }
- }
- }
- type isolatingRunSequence struct {
- p *paragraph
- indexes []int // indexes to the original string
- types []Class // type of each character using the index
- resolvedLevels []level // resolved levels after application of rules
- level level
- sos, eos Class
- }
- func (i *isolatingRunSequence) Len() int { return len(i.indexes) }
- func maxLevel(a, b level) level {
- if a > b {
- return a
- }
- return b
- }
- // Rule X10, second bullet: Determine the start-of-sequence (sos) and end-of-sequence (eos) types,
- // either L or R, for each isolating run sequence.
- func (p *paragraph) isolatingRunSequence(indexes []int) *isolatingRunSequence {
- length := len(indexes)
- types := make([]Class, length)
- for i, x := range indexes {
- types[i] = p.resultTypes[x]
- }
- // assign level, sos and eos
- prevChar := indexes[0] - 1
- for prevChar >= 0 && isRemovedByX9(p.initialTypes[prevChar]) {
- prevChar--
- }
- prevLevel := p.embeddingLevel
- if prevChar >= 0 {
- prevLevel = p.resultLevels[prevChar]
- }
- var succLevel level
- lastType := types[length-1]
- if lastType.in(LRI, RLI, FSI) {
- succLevel = p.embeddingLevel
- } else {
- // the first character after the end of run sequence
- limit := indexes[length-1] + 1
- for ; limit < p.Len() && isRemovedByX9(p.initialTypes[limit]); limit++ {
- }
- succLevel = p.embeddingLevel
- if limit < p.Len() {
- succLevel = p.resultLevels[limit]
- }
- }
- level := p.resultLevels[indexes[0]]
- return &isolatingRunSequence{
- p: p,
- indexes: indexes,
- types: types,
- level: level,
- sos: typeForLevel(maxLevel(prevLevel, level)),
- eos: typeForLevel(maxLevel(succLevel, level)),
- }
- }
- // Resolving weak types Rules W1-W7.
- //
- // Note that some weak types (EN, AN) remain after this processing is
- // complete.
- func (s *isolatingRunSequence) resolveWeakTypes() {
- // on entry, only these types remain
- s.assertOnly(L, R, AL, EN, ES, ET, AN, CS, B, S, WS, ON, NSM, LRI, RLI, FSI, PDI)
- // Rule W1.
- // Changes all NSMs.
- precedingCharacterType := s.sos
- for i, t := range s.types {
- if t == NSM {
- s.types[i] = precedingCharacterType
- } else {
- if t.in(LRI, RLI, FSI, PDI) {
- precedingCharacterType = ON
- }
- precedingCharacterType = t
- }
- }
- // Rule W2.
- // EN does not change at the start of the run, because sos != AL.
- for i, t := range s.types {
- if t == EN {
- for j := i - 1; j >= 0; j-- {
- if t := s.types[j]; t.in(L, R, AL) {
- if t == AL {
- s.types[i] = AN
- }
- break
- }
- }
- }
- }
- // Rule W3.
- for i, t := range s.types {
- if t == AL {
- s.types[i] = R
- }
- }
- // Rule W4.
- // Since there must be values on both sides for this rule to have an
- // effect, the scan skips the first and last value.
- //
- // Although the scan proceeds left to right, and changes the type
- // values in a way that would appear to affect the computations
- // later in the scan, there is actually no problem. A change in the
- // current value can only affect the value to its immediate right,
- // and only affect it if it is ES or CS. But the current value can
- // only change if the value to its right is not ES or CS. Thus
- // either the current value will not change, or its change will have
- // no effect on the remainder of the analysis.
- for i := 1; i < s.Len()-1; i++ {
- t := s.types[i]
- if t == ES || t == CS {
- prevSepType := s.types[i-1]
- succSepType := s.types[i+1]
- if prevSepType == EN && succSepType == EN {
- s.types[i] = EN
- } else if s.types[i] == CS && prevSepType == AN && succSepType == AN {
- s.types[i] = AN
- }
- }
- }
- // Rule W5.
- for i, t := range s.types {
- if t == ET {
- // locate end of sequence
- runStart := i
- runEnd := s.findRunLimit(runStart, ET)
- // check values at ends of sequence
- t := s.sos
- if runStart > 0 {
- t = s.types[runStart-1]
- }
- if t != EN {
- t = s.eos
- if runEnd < len(s.types) {
- t = s.types[runEnd]
- }
- }
- if t == EN {
- setTypes(s.types[runStart:runEnd], EN)
- }
- // continue at end of sequence
- i = runEnd
- }
- }
- // Rule W6.
- for i, t := range s.types {
- if t.in(ES, ET, CS) {
- s.types[i] = ON
- }
- }
- // Rule W7.
- for i, t := range s.types {
- if t == EN {
- // set default if we reach start of run
- prevStrongType := s.sos
- for j := i - 1; j >= 0; j-- {
- t = s.types[j]
- if t == L || t == R { // AL's have been changed to R
- prevStrongType = t
- break
- }
- }
- if prevStrongType == L {
- s.types[i] = L
- }
- }
- }
- }
- // 6) resolving neutral types Rules N1-N2.
- func (s *isolatingRunSequence) resolveNeutralTypes() {
- // on entry, only these types can be in resultTypes
- s.assertOnly(L, R, EN, AN, B, S, WS, ON, RLI, LRI, FSI, PDI)
- for i, t := range s.types {
- switch t {
- case WS, ON, B, S, RLI, LRI, FSI, PDI:
- // find bounds of run of neutrals
- runStart := i
- runEnd := s.findRunLimit(runStart, B, S, WS, ON, RLI, LRI, FSI, PDI)
- // determine effective types at ends of run
- var leadType, trailType Class
- // Note that the character found can only be L, R, AN, or
- // EN.
- if runStart == 0 {
- leadType = s.sos
- } else {
- leadType = s.types[runStart-1]
- if leadType.in(AN, EN) {
- leadType = R
- }
- }
- if runEnd == len(s.types) {
- trailType = s.eos
- } else {
- trailType = s.types[runEnd]
- if trailType.in(AN, EN) {
- trailType = R
- }
- }
- var resolvedType Class
- if leadType == trailType {
- // Rule N1.
- resolvedType = leadType
- } else {
- // Rule N2.
- // Notice the embedding level of the run is used, not
- // the paragraph embedding level.
- resolvedType = typeForLevel(s.level)
- }
- setTypes(s.types[runStart:runEnd], resolvedType)
- // skip over run of (former) neutrals
- i = runEnd
- }
- }
- }
- func setLevels(levels []level, newLevel level) {
- for i := range levels {
- levels[i] = newLevel
- }
- }
- func setTypes(types []Class, newType Class) {
- for i := range types {
- types[i] = newType
- }
- }
- // 7) resolving implicit embedding levels Rules I1, I2.
- func (s *isolatingRunSequence) resolveImplicitLevels() {
- // on entry, only these types can be in resultTypes
- s.assertOnly(L, R, EN, AN)
- s.resolvedLevels = make([]level, len(s.types))
- setLevels(s.resolvedLevels, s.level)
- if (s.level & 1) == 0 { // even level
- for i, t := range s.types {
- // Rule I1.
- if t == L {
- // no change
- } else if t == R {
- s.resolvedLevels[i] += 1
- } else { // t == AN || t == EN
- s.resolvedLevels[i] += 2
- }
- }
- } else { // odd level
- for i, t := range s.types {
- // Rule I2.
- if t == R {
- // no change
- } else { // t == L || t == AN || t == EN
- s.resolvedLevels[i] += 1
- }
- }
- }
- }
- // Applies the levels and types resolved in rules W1-I2 to the
- // resultLevels array.
- func (s *isolatingRunSequence) applyLevelsAndTypes() {
- for i, x := range s.indexes {
- s.p.resultTypes[x] = s.types[i]
- s.p.resultLevels[x] = s.resolvedLevels[i]
- }
- }
- // Return the limit of the run consisting only of the types in validSet
- // starting at index. This checks the value at index, and will return
- // index if that value is not in validSet.
- func (s *isolatingRunSequence) findRunLimit(index int, validSet ...Class) int {
- loop:
- for ; index < len(s.types); index++ {
- t := s.types[index]
- for _, valid := range validSet {
- if t == valid {
- continue loop
- }
- }
- return index // didn't find a match in validSet
- }
- return len(s.types)
- }
- // Algorithm validation. Assert that all values in types are in the
- // provided set.
- func (s *isolatingRunSequence) assertOnly(codes ...Class) {
- loop:
- for i, t := range s.types {
- for _, c := range codes {
- if t == c {
- continue loop
- }
- }
- log.Panicf("invalid bidi code %v present in assertOnly at position %d", t, s.indexes[i])
- }
- }
- // determineLevelRuns returns an array of level runs. Each level run is
- // described as an array of indexes into the input string.
- //
- // Determines the level runs. Rule X9 will be applied in determining the
- // runs, in the way that makes sure the characters that are supposed to be
- // removed are not included in the runs.
- func (p *paragraph) determineLevelRuns() [][]int {
- run := []int{}
- allRuns := [][]int{}
- currentLevel := implicitLevel
- for i := range p.initialTypes {
- if !isRemovedByX9(p.initialTypes[i]) {
- if p.resultLevels[i] != currentLevel {
- // we just encountered a new run; wrap up last run
- if currentLevel >= 0 { // only wrap it up if there was a run
- allRuns = append(allRuns, run)
- run = nil
- }
- // Start new run
- currentLevel = p.resultLevels[i]
- }
- run = append(run, i)
- }
- }
- // Wrap up the final run, if any
- if len(run) > 0 {
- allRuns = append(allRuns, run)
- }
- return allRuns
- }
- // Definition BD13. Determine isolating run sequences.
- func (p *paragraph) determineIsolatingRunSequences() []*isolatingRunSequence {
- levelRuns := p.determineLevelRuns()
- // Compute the run that each character belongs to
- runForCharacter := make([]int, p.Len())
- for i, run := range levelRuns {
- for _, index := range run {
- runForCharacter[index] = i
- }
- }
- sequences := []*isolatingRunSequence{}
- var currentRunSequence []int
- for _, run := range levelRuns {
- first := run[0]
- if p.initialTypes[first] != PDI || p.matchingIsolateInitiator[first] == -1 {
- currentRunSequence = nil
- // int run = i;
- for {
- // Copy this level run into currentRunSequence
- currentRunSequence = append(currentRunSequence, run...)
- last := currentRunSequence[len(currentRunSequence)-1]
- lastT := p.initialTypes[last]
- if lastT.in(LRI, RLI, FSI) && p.matchingPDI[last] != p.Len() {
- run = levelRuns[runForCharacter[p.matchingPDI[last]]]
- } else {
- break
- }
- }
- sequences = append(sequences, p.isolatingRunSequence(currentRunSequence))
- }
- }
- return sequences
- }
- // Assign level information to characters removed by rule X9. This is for
- // ease of relating the level information to the original input data. Note
- // that the levels assigned to these codes are arbitrary, they're chosen so
- // as to avoid breaking level runs.
- func (p *paragraph) assignLevelsToCharactersRemovedByX9() {
- for i, t := range p.initialTypes {
- if t.in(LRE, RLE, LRO, RLO, PDF, BN) {
- p.resultTypes[i] = t
- p.resultLevels[i] = -1
- }
- }
- // now propagate forward the levels information (could have
- // propagated backward, the main thing is not to introduce a level
- // break where one doesn't already exist).
- if p.resultLevels[0] == -1 {
- p.resultLevels[0] = p.embeddingLevel
- }
- for i := 1; i < len(p.initialTypes); i++ {
- if p.resultLevels[i] == -1 {
- p.resultLevels[i] = p.resultLevels[i-1]
- }
- }
- // Embedding information is for informational purposes only so need not be
- // adjusted.
- }
- //
- // Output
- //
- // getLevels computes levels array breaking lines at offsets in linebreaks.
- // Rule L1.
- //
- // The linebreaks array must include at least one value. The values must be
- // in strictly increasing order (no duplicates) between 1 and the length of
- // the text, inclusive. The last value must be the length of the text.
- func (p *paragraph) getLevels(linebreaks []int) []level {
- // Note that since the previous processing has removed all
- // P, S, and WS values from resultTypes, the values referred to
- // in these rules are the initial types, before any processing
- // has been applied (including processing of overrides).
- //
- // This example implementation has reinserted explicit format codes
- // and BN, in order that the levels array correspond to the
- // initial text. Their final placement is not normative.
- // These codes are treated like WS in this implementation,
- // so they don't interrupt sequences of WS.
- validateLineBreaks(linebreaks, p.Len())
- result := append([]level(nil), p.resultLevels...)
- // don't worry about linebreaks since if there is a break within
- // a series of WS values preceding S, the linebreak itself
- // causes the reset.
- for i, t := range p.initialTypes {
- if t.in(B, S) {
- // Rule L1, clauses one and two.
- result[i] = p.embeddingLevel
- // Rule L1, clause three.
- for j := i - 1; j >= 0; j-- {
- if isWhitespace(p.initialTypes[j]) { // including format codes
- result[j] = p.embeddingLevel
- } else {
- break
- }
- }
- }
- }
- // Rule L1, clause four.
- start := 0
- for _, limit := range linebreaks {
- for j := limit - 1; j >= start; j-- {
- if isWhitespace(p.initialTypes[j]) { // including format codes
- result[j] = p.embeddingLevel
- } else {
- break
- }
- }
- start = limit
- }
- return result
- }
- // getReordering returns the reordering of lines from a visual index to a
- // logical index for line breaks at the given offsets.
- //
- // Lines are concatenated from left to right. So for example, the fifth
- // character from the left on the third line is
- //
- // getReordering(linebreaks)[linebreaks[1] + 4]
- //
- // (linebreaks[1] is the position after the last character of the second
- // line, which is also the index of the first character on the third line,
- // and adding four gets the fifth character from the left).
- //
- // The linebreaks array must include at least one value. The values must be
- // in strictly increasing order (no duplicates) between 1 and the length of
- // the text, inclusive. The last value must be the length of the text.
- func (p *paragraph) getReordering(linebreaks []int) []int {
- validateLineBreaks(linebreaks, p.Len())
- return computeMultilineReordering(p.getLevels(linebreaks), linebreaks)
- }
- // Return multiline reordering array for a given level array. Reordering
- // does not occur across a line break.
- func computeMultilineReordering(levels []level, linebreaks []int) []int {
- result := make([]int, len(levels))
- start := 0
- for _, limit := range linebreaks {
- tempLevels := make([]level, limit-start)
- copy(tempLevels, levels[start:])
- for j, order := range computeReordering(tempLevels) {
- result[start+j] = order + start
- }
- start = limit
- }
- return result
- }
- // Return reordering array for a given level array. This reorders a single
- // line. The reordering is a visual to logical map. For example, the
- // leftmost char is string.charAt(order[0]). Rule L2.
- func computeReordering(levels []level) []int {
- result := make([]int, len(levels))
- // initialize order
- for i := range result {
- result[i] = i
- }
- // locate highest level found on line.
- // Note the rules say text, but no reordering across line bounds is
- // performed, so this is sufficient.
- highestLevel := level(0)
- lowestOddLevel := level(maxDepth + 2)
- for _, level := range levels {
- if level > highestLevel {
- highestLevel = level
- }
- if level&1 != 0 && level < lowestOddLevel {
- lowestOddLevel = level
- }
- }
- for level := highestLevel; level >= lowestOddLevel; level-- {
- for i := 0; i < len(levels); i++ {
- if levels[i] >= level {
- // find range of text at or above this level
- start := i
- limit := i + 1
- for limit < len(levels) && levels[limit] >= level {
- limit++
- }
- for j, k := start, limit-1; j < k; j, k = j+1, k-1 {
- result[j], result[k] = result[k], result[j]
- }
- // skip to end of level run
- i = limit
- }
- }
- }
- return result
- }
- // isWhitespace reports whether the type is considered a whitespace type for the
- // line break rules.
- func isWhitespace(c Class) bool {
- switch c {
- case LRE, RLE, LRO, RLO, PDF, LRI, RLI, FSI, PDI, BN, WS:
- return true
- }
- return false
- }
- // isRemovedByX9 reports whether the type is one of the types removed in X9.
- func isRemovedByX9(c Class) bool {
- switch c {
- case LRE, RLE, LRO, RLO, PDF, BN:
- return true
- }
- return false
- }
- // typeForLevel reports the strong type (L or R) corresponding to the level.
- func typeForLevel(level level) Class {
- if (level & 0x1) == 0 {
- return L
- }
- return R
- }
- // TODO: change validation to not panic
- func validateTypes(types []Class) {
- if len(types) == 0 {
- log.Panic("types is null")
- }
- for i, t := range types[:len(types)-1] {
- if t == B {
- log.Panicf("B type before end of paragraph at index: %d", i)
- }
- }
- }
- func validateParagraphEmbeddingLevel(embeddingLevel level) {
- if embeddingLevel != implicitLevel &&
- embeddingLevel != 0 &&
- embeddingLevel != 1 {
- log.Panicf("illegal paragraph embedding level: %d", embeddingLevel)
- }
- }
- func validateLineBreaks(linebreaks []int, textLength int) {
- prev := 0
- for i, next := range linebreaks {
- if next <= prev {
- log.Panicf("bad linebreak: %d at index: %d", next, i)
- }
- prev = next
- }
- if prev != textLength {
- log.Panicf("last linebreak was %d, want %d", prev, textLength)
- }
- }
- func validatePbTypes(pairTypes []bracketType) {
- if len(pairTypes) == 0 {
- log.Panic("pairTypes is null")
- }
- for i, pt := range pairTypes {
- switch pt {
- case bpNone, bpOpen, bpClose:
- default:
- log.Panicf("illegal pairType value at %d: %v", i, pairTypes[i])
- }
- }
- }
- func validatePbValues(pairValues []rune, pairTypes []bracketType) {
- if pairValues == nil {
- log.Panic("pairValues is null")
- }
- if len(pairTypes) != len(pairValues) {
- log.Panic("pairTypes is different length from pairValues")
- }
- }
|