aboutsummaryrefslogtreecommitdiff
path: root/vendor/golang.org/x/text/unicode/bidi/core.go
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/golang.org/x/text/unicode/bidi/core.go')
-rw-r--r--vendor/golang.org/x/text/unicode/bidi/core.go1058
1 files changed, 0 insertions, 1058 deletions
diff --git a/vendor/golang.org/x/text/unicode/bidi/core.go b/vendor/golang.org/x/text/unicode/bidi/core.go
deleted file mode 100644
index 48d1440..0000000
--- a/vendor/golang.org/x/text/unicode/bidi/core.go
+++ /dev/null
@@ -1,1058 +0,0 @@
-// Copyright 2015 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package bidi
-
-import "log"
-
-// This implementation is a port based on the reference implementation found at:
-// https://www.unicode.org/Public/PROGRAMS/BidiReferenceJava/
-//
-// described in Unicode Bidirectional Algorithm (UAX #9).
-//
-// Input:
-// There are two levels of input to the algorithm, since clients may prefer to
-// supply some information from out-of-band sources rather than relying on the
-// default behavior.
-//
-// - Bidi class array
-// - Bidi class array, with externally supplied base line direction
-//
-// Output:
-// Output is separated into several stages:
-//
-// - levels array over entire paragraph
-// - reordering array over entire paragraph
-// - levels array over line
-// - reordering array over line
-//
-// Note that for conformance to the Unicode Bidirectional Algorithm,
-// implementations are only required to generate correct reordering and
-// character directionality (odd or even levels) over a line. Generating
-// identical level arrays over a line is not required. Bidi explicit format
-// codes (LRE, RLE, LRO, RLO, PDF) and BN can be assigned arbitrary levels and
-// positions as long as the rest of the input is properly reordered.
-//
-// As the algorithm is defined to operate on a single paragraph at a time, this
-// implementation is written to handle single paragraphs. Thus rule P1 is
-// presumed by this implementation-- the data provided to the implementation is
-// assumed to be a single paragraph, and either contains no 'B' codes, or a
-// single 'B' code at the end of the input. 'B' is allowed as input to
-// illustrate how the algorithm assigns it a level.
-//
-// Also note that rules L3 and L4 depend on the rendering engine that uses the
-// result of the bidi algorithm. This implementation assumes that the rendering
-// engine expects combining marks in visual order (e.g. to the left of their
-// base character in RTL runs) and that it adjusts the glyphs used to render
-// mirrored characters that are in RTL runs so that they render appropriately.
-
-// level is the embedding level of a character. Even embedding levels indicate
-// left-to-right order and odd levels indicate right-to-left order. The special
-// level of -1 is reserved for undefined order.
-type level int8
-
-const implicitLevel level = -1
-
-// in returns if x is equal to any of the values in set.
-func (c Class) in(set ...Class) bool {
- for _, s := range set {
- if c == s {
- return true
- }
- }
- return false
-}
-
-// A paragraph contains the state of a paragraph.
-type paragraph struct {
- initialTypes []Class
-
- // Arrays of properties needed for paired bracket evaluation in N0
- pairTypes []bracketType // paired Bracket types for paragraph
- pairValues []rune // rune for opening bracket or pbOpen and pbClose; 0 for pbNone
-
- embeddingLevel level // default: = implicitLevel;
-
- // at the paragraph levels
- resultTypes []Class
- resultLevels []level
-
- // Index of matching PDI for isolate initiator characters. For other
- // characters, the value of matchingPDI will be set to -1. For isolate
- // initiators with no matching PDI, matchingPDI will be set to the length of
- // the input string.
- matchingPDI []int
-
- // Index of matching isolate initiator for PDI characters. For other
- // characters, and for PDIs with no matching isolate initiator, the value of
- // matchingIsolateInitiator will be set to -1.
- matchingIsolateInitiator []int
-}
-
-// newParagraph initializes a paragraph. The user needs to supply a few arrays
-// corresponding to the preprocessed text input. The types correspond to the
-// Unicode BiDi classes for each rune. pairTypes indicates the bracket type for
-// each rune. pairValues provides a unique bracket class identifier for each
-// rune (suggested is the rune of the open bracket for opening and matching
-// close brackets, after normalization). The embedding levels are optional, but
-// may be supplied to encode embedding levels of styled text.
-//
-// TODO: return an error.
-func newParagraph(types []Class, pairTypes []bracketType, pairValues []rune, levels level) *paragraph {
- validateTypes(types)
- validatePbTypes(pairTypes)
- validatePbValues(pairValues, pairTypes)
- validateParagraphEmbeddingLevel(levels)
-
- p := &paragraph{
- initialTypes: append([]Class(nil), types...),
- embeddingLevel: levels,
-
- pairTypes: pairTypes,
- pairValues: pairValues,
-
- resultTypes: append([]Class(nil), types...),
- }
- p.run()
- return p
-}
-
-func (p *paragraph) Len() int { return len(p.initialTypes) }
-
-// The algorithm. Does not include line-based processing (Rules L1, L2).
-// These are applied later in the line-based phase of the algorithm.
-func (p *paragraph) run() {
- p.determineMatchingIsolates()
-
- // 1) determining the paragraph level
- // Rule P1 is the requirement for entering this algorithm.
- // Rules P2, P3.
- // If no externally supplied paragraph embedding level, use default.
- if p.embeddingLevel == implicitLevel {
- p.embeddingLevel = p.determineParagraphEmbeddingLevel(0, p.Len())
- }
-
- // Initialize result levels to paragraph embedding level.
- p.resultLevels = make([]level, p.Len())
- setLevels(p.resultLevels, p.embeddingLevel)
-
- // 2) Explicit levels and directions
- // Rules X1-X8.
- p.determineExplicitEmbeddingLevels()
-
- // Rule X9.
- // We do not remove the embeddings, the overrides, the PDFs, and the BNs
- // from the string explicitly. But they are not copied into isolating run
- // sequences when they are created, so they are removed for all
- // practical purposes.
-
- // Rule X10.
- // Run remainder of algorithm one isolating run sequence at a time
- for _, seq := range p.determineIsolatingRunSequences() {
- // 3) resolving weak types
- // Rules W1-W7.
- seq.resolveWeakTypes()
-
- // 4a) resolving paired brackets
- // Rule N0
- resolvePairedBrackets(seq)
-
- // 4b) resolving neutral types
- // Rules N1-N3.
- seq.resolveNeutralTypes()
-
- // 5) resolving implicit embedding levels
- // Rules I1, I2.
- seq.resolveImplicitLevels()
-
- // Apply the computed levels and types
- seq.applyLevelsAndTypes()
- }
-
- // Assign appropriate levels to 'hide' LREs, RLEs, LROs, RLOs, PDFs, and
- // BNs. This is for convenience, so the resulting level array will have
- // a value for every character.
- p.assignLevelsToCharactersRemovedByX9()
-}
-
-// determineMatchingIsolates determines the matching PDI for each isolate
-// initiator and vice versa.
-//
-// Definition BD9.
-//
-// At the end of this function:
-//
-// - The member variable matchingPDI is set to point to the index of the
-// matching PDI character for each isolate initiator character. If there is
-// no matching PDI, it is set to the length of the input text. For other
-// characters, it is set to -1.
-// - The member variable matchingIsolateInitiator is set to point to the
-// index of the matching isolate initiator character for each PDI character.
-// If there is no matching isolate initiator, or the character is not a PDI,
-// it is set to -1.
-func (p *paragraph) determineMatchingIsolates() {
- p.matchingPDI = make([]int, p.Len())
- p.matchingIsolateInitiator = make([]int, p.Len())
-
- for i := range p.matchingIsolateInitiator {
- p.matchingIsolateInitiator[i] = -1
- }
-
- for i := range p.matchingPDI {
- p.matchingPDI[i] = -1
-
- if t := p.resultTypes[i]; t.in(LRI, RLI, FSI) {
- depthCounter := 1
- for j := i + 1; j < p.Len(); j++ {
- if u := p.resultTypes[j]; u.in(LRI, RLI, FSI) {
- depthCounter++
- } else if u == PDI {
- if depthCounter--; depthCounter == 0 {
- p.matchingPDI[i] = j
- p.matchingIsolateInitiator[j] = i
- break
- }
- }
- }
- if p.matchingPDI[i] == -1 {
- p.matchingPDI[i] = p.Len()
- }
- }
- }
-}
-
-// determineParagraphEmbeddingLevel reports the resolved paragraph direction of
-// the substring limited by the given range [start, end).
-//
-// Determines the paragraph level based on rules P2, P3. This is also used
-// in rule X5c to find if an FSI should resolve to LRI or RLI.
-func (p *paragraph) determineParagraphEmbeddingLevel(start, end int) level {
- var strongType Class = unknownClass
-
- // Rule P2.
- for i := start; i < end; i++ {
- if t := p.resultTypes[i]; t.in(L, AL, R) {
- strongType = t
- break
- } else if t.in(FSI, LRI, RLI) {
- i = p.matchingPDI[i] // skip over to the matching PDI
- if i > end {
- log.Panic("assert (i <= end)")
- }
- }
- }
- // Rule P3.
- switch strongType {
- case unknownClass: // none found
- // default embedding level when no strong types found is 0.
- return 0
- case L:
- return 0
- default: // AL, R
- return 1
- }
-}
-
-const maxDepth = 125
-
-// This stack will store the embedding levels and override and isolated
-// statuses
-type directionalStatusStack struct {
- stackCounter int
- embeddingLevelStack [maxDepth + 1]level
- overrideStatusStack [maxDepth + 1]Class
- isolateStatusStack [maxDepth + 1]bool
-}
-
-func (s *directionalStatusStack) empty() { s.stackCounter = 0 }
-func (s *directionalStatusStack) pop() { s.stackCounter-- }
-func (s *directionalStatusStack) depth() int { return s.stackCounter }
-
-func (s *directionalStatusStack) push(level level, overrideStatus Class, isolateStatus bool) {
- s.embeddingLevelStack[s.stackCounter] = level
- s.overrideStatusStack[s.stackCounter] = overrideStatus
- s.isolateStatusStack[s.stackCounter] = isolateStatus
- s.stackCounter++
-}
-
-func (s *directionalStatusStack) lastEmbeddingLevel() level {
- return s.embeddingLevelStack[s.stackCounter-1]
-}
-
-func (s *directionalStatusStack) lastDirectionalOverrideStatus() Class {
- return s.overrideStatusStack[s.stackCounter-1]
-}
-
-func (s *directionalStatusStack) lastDirectionalIsolateStatus() bool {
- return s.isolateStatusStack[s.stackCounter-1]
-}
-
-// Determine explicit levels using rules X1 - X8
-func (p *paragraph) determineExplicitEmbeddingLevels() {
- var stack directionalStatusStack
- var overflowIsolateCount, overflowEmbeddingCount, validIsolateCount int
-
- // Rule X1.
- stack.push(p.embeddingLevel, ON, false)
-
- for i, t := range p.resultTypes {
- // Rules X2, X3, X4, X5, X5a, X5b, X5c
- switch t {
- case RLE, LRE, RLO, LRO, RLI, LRI, FSI:
- isIsolate := t.in(RLI, LRI, FSI)
- isRTL := t.in(RLE, RLO, RLI)
-
- // override if this is an FSI that resolves to RLI
- if t == FSI {
- isRTL = (p.determineParagraphEmbeddingLevel(i+1, p.matchingPDI[i]) == 1)
- }
- if isIsolate {
- p.resultLevels[i] = stack.lastEmbeddingLevel()
- if stack.lastDirectionalOverrideStatus() != ON {
- p.resultTypes[i] = stack.lastDirectionalOverrideStatus()
- }
- }
-
- var newLevel level
- if isRTL {
- // least greater odd
- newLevel = (stack.lastEmbeddingLevel() + 1) | 1
- } else {
- // least greater even
- newLevel = (stack.lastEmbeddingLevel() + 2) &^ 1
- }
-
- if newLevel <= maxDepth && overflowIsolateCount == 0 && overflowEmbeddingCount == 0 {
- if isIsolate {
- validIsolateCount++
- }
- // Push new embedding level, override status, and isolated
- // status.
- // No check for valid stack counter, since the level check
- // suffices.
- switch t {
- case LRO:
- stack.push(newLevel, L, isIsolate)
- case RLO:
- stack.push(newLevel, R, isIsolate)
- default:
- stack.push(newLevel, ON, isIsolate)
- }
- // Not really part of the spec
- if !isIsolate {
- p.resultLevels[i] = newLevel
- }
- } else {
- // This is an invalid explicit formatting character,
- // so apply the "Otherwise" part of rules X2-X5b.
- if isIsolate {
- overflowIsolateCount++
- } else { // !isIsolate
- if overflowIsolateCount == 0 {
- overflowEmbeddingCount++
- }
- }
- }
-
- // Rule X6a
- case PDI:
- if overflowIsolateCount > 0 {
- overflowIsolateCount--
- } else if validIsolateCount == 0 {
- // do nothing
- } else {
- overflowEmbeddingCount = 0
- for !stack.lastDirectionalIsolateStatus() {
- stack.pop()
- }
- stack.pop()
- validIsolateCount--
- }
- p.resultLevels[i] = stack.lastEmbeddingLevel()
-
- // Rule X7
- case PDF:
- // Not really part of the spec
- p.resultLevels[i] = stack.lastEmbeddingLevel()
-
- if overflowIsolateCount > 0 {
- // do nothing
- } else if overflowEmbeddingCount > 0 {
- overflowEmbeddingCount--
- } else if !stack.lastDirectionalIsolateStatus() && stack.depth() >= 2 {
- stack.pop()
- }
-
- case B: // paragraph separator.
- // Rule X8.
-
- // These values are reset for clarity, in this implementation B
- // can only occur as the last code in the array.
- stack.empty()
- overflowIsolateCount = 0
- overflowEmbeddingCount = 0
- validIsolateCount = 0
- p.resultLevels[i] = p.embeddingLevel
-
- default:
- p.resultLevels[i] = stack.lastEmbeddingLevel()
- if stack.lastDirectionalOverrideStatus() != ON {
- p.resultTypes[i] = stack.lastDirectionalOverrideStatus()
- }
- }
- }
-}
-
-type isolatingRunSequence struct {
- p *paragraph
-
- indexes []int // indexes to the original string
-
- types []Class // type of each character using the index
- resolvedLevels []level // resolved levels after application of rules
- level level
- sos, eos Class
-}
-
-func (i *isolatingRunSequence) Len() int { return len(i.indexes) }
-
-func maxLevel(a, b level) level {
- if a > b {
- return a
- }
- return b
-}
-
-// Rule X10, second bullet: Determine the start-of-sequence (sos) and end-of-sequence (eos) types,
-// either L or R, for each isolating run sequence.
-func (p *paragraph) isolatingRunSequence(indexes []int) *isolatingRunSequence {
- length := len(indexes)
- types := make([]Class, length)
- for i, x := range indexes {
- types[i] = p.resultTypes[x]
- }
-
- // assign level, sos and eos
- prevChar := indexes[0] - 1
- for prevChar >= 0 && isRemovedByX9(p.initialTypes[prevChar]) {
- prevChar--
- }
- prevLevel := p.embeddingLevel
- if prevChar >= 0 {
- prevLevel = p.resultLevels[prevChar]
- }
-
- var succLevel level
- lastType := types[length-1]
- if lastType.in(LRI, RLI, FSI) {
- succLevel = p.embeddingLevel
- } else {
- // the first character after the end of run sequence
- limit := indexes[length-1] + 1
- for ; limit < p.Len() && isRemovedByX9(p.initialTypes[limit]); limit++ {
-
- }
- succLevel = p.embeddingLevel
- if limit < p.Len() {
- succLevel = p.resultLevels[limit]
- }
- }
- level := p.resultLevels[indexes[0]]
- return &isolatingRunSequence{
- p: p,
- indexes: indexes,
- types: types,
- level: level,
- sos: typeForLevel(maxLevel(prevLevel, level)),
- eos: typeForLevel(maxLevel(succLevel, level)),
- }
-}
-
-// Resolving weak types Rules W1-W7.
-//
-// Note that some weak types (EN, AN) remain after this processing is
-// complete.
-func (s *isolatingRunSequence) resolveWeakTypes() {
-
- // on entry, only these types remain
- s.assertOnly(L, R, AL, EN, ES, ET, AN, CS, B, S, WS, ON, NSM, LRI, RLI, FSI, PDI)
-
- // Rule W1.
- // Changes all NSMs.
- preceedingCharacterType := s.sos
- for i, t := range s.types {
- if t == NSM {
- s.types[i] = preceedingCharacterType
- } else {
- if t.in(LRI, RLI, FSI, PDI) {
- preceedingCharacterType = ON
- }
- preceedingCharacterType = t
- }
- }
-
- // Rule W2.
- // EN does not change at the start of the run, because sos != AL.
- for i, t := range s.types {
- if t == EN {
- for j := i - 1; j >= 0; j-- {
- if t := s.types[j]; t.in(L, R, AL) {
- if t == AL {
- s.types[i] = AN
- }
- break
- }
- }
- }
- }
-
- // Rule W3.
- for i, t := range s.types {
- if t == AL {
- s.types[i] = R
- }
- }
-
- // Rule W4.
- // Since there must be values on both sides for this rule to have an
- // effect, the scan skips the first and last value.
- //
- // Although the scan proceeds left to right, and changes the type
- // values in a way that would appear to affect the computations
- // later in the scan, there is actually no problem. A change in the
- // current value can only affect the value to its immediate right,
- // and only affect it if it is ES or CS. But the current value can
- // only change if the value to its right is not ES or CS. Thus
- // either the current value will not change, or its change will have
- // no effect on the remainder of the analysis.
-
- for i := 1; i < s.Len()-1; i++ {
- t := s.types[i]
- if t == ES || t == CS {
- prevSepType := s.types[i-1]
- succSepType := s.types[i+1]
- if prevSepType == EN && succSepType == EN {
- s.types[i] = EN
- } else if s.types[i] == CS && prevSepType == AN && succSepType == AN {
- s.types[i] = AN
- }
- }
- }
-
- // Rule W5.
- for i, t := range s.types {
- if t == ET {
- // locate end of sequence
- runStart := i
- runEnd := s.findRunLimit(runStart, ET)
-
- // check values at ends of sequence
- t := s.sos
- if runStart > 0 {
- t = s.types[runStart-1]
- }
- if t != EN {
- t = s.eos
- if runEnd < len(s.types) {
- t = s.types[runEnd]
- }
- }
- if t == EN {
- setTypes(s.types[runStart:runEnd], EN)
- }
- // continue at end of sequence
- i = runEnd
- }
- }
-
- // Rule W6.
- for i, t := range s.types {
- if t.in(ES, ET, CS) {
- s.types[i] = ON
- }
- }
-
- // Rule W7.
- for i, t := range s.types {
- if t == EN {
- // set default if we reach start of run
- prevStrongType := s.sos
- for j := i - 1; j >= 0; j-- {
- t = s.types[j]
- if t == L || t == R { // AL's have been changed to R
- prevStrongType = t
- break
- }
- }
- if prevStrongType == L {
- s.types[i] = L
- }
- }
- }
-}
-
-// 6) resolving neutral types Rules N1-N2.
-func (s *isolatingRunSequence) resolveNeutralTypes() {
-
- // on entry, only these types can be in resultTypes
- s.assertOnly(L, R, EN, AN, B, S, WS, ON, RLI, LRI, FSI, PDI)
-
- for i, t := range s.types {
- switch t {
- case WS, ON, B, S, RLI, LRI, FSI, PDI:
- // find bounds of run of neutrals
- runStart := i
- runEnd := s.findRunLimit(runStart, B, S, WS, ON, RLI, LRI, FSI, PDI)
-
- // determine effective types at ends of run
- var leadType, trailType Class
-
- // Note that the character found can only be L, R, AN, or
- // EN.
- if runStart == 0 {
- leadType = s.sos
- } else {
- leadType = s.types[runStart-1]
- if leadType.in(AN, EN) {
- leadType = R
- }
- }
- if runEnd == len(s.types) {
- trailType = s.eos
- } else {
- trailType = s.types[runEnd]
- if trailType.in(AN, EN) {
- trailType = R
- }
- }
-
- var resolvedType Class
- if leadType == trailType {
- // Rule N1.
- resolvedType = leadType
- } else {
- // Rule N2.
- // Notice the embedding level of the run is used, not
- // the paragraph embedding level.
- resolvedType = typeForLevel(s.level)
- }
-
- setTypes(s.types[runStart:runEnd], resolvedType)
-
- // skip over run of (former) neutrals
- i = runEnd
- }
- }
-}
-
-func setLevels(levels []level, newLevel level) {
- for i := range levels {
- levels[i] = newLevel
- }
-}
-
-func setTypes(types []Class, newType Class) {
- for i := range types {
- types[i] = newType
- }
-}
-
-// 7) resolving implicit embedding levels Rules I1, I2.
-func (s *isolatingRunSequence) resolveImplicitLevels() {
-
- // on entry, only these types can be in resultTypes
- s.assertOnly(L, R, EN, AN)
-
- s.resolvedLevels = make([]level, len(s.types))
- setLevels(s.resolvedLevels, s.level)
-
- if (s.level & 1) == 0 { // even level
- for i, t := range s.types {
- // Rule I1.
- if t == L {
- // no change
- } else if t == R {
- s.resolvedLevels[i] += 1
- } else { // t == AN || t == EN
- s.resolvedLevels[i] += 2
- }
- }
- } else { // odd level
- for i, t := range s.types {
- // Rule I2.
- if t == R {
- // no change
- } else { // t == L || t == AN || t == EN
- s.resolvedLevels[i] += 1
- }
- }
- }
-}
-
-// Applies the levels and types resolved in rules W1-I2 to the
-// resultLevels array.
-func (s *isolatingRunSequence) applyLevelsAndTypes() {
- for i, x := range s.indexes {
- s.p.resultTypes[x] = s.types[i]
- s.p.resultLevels[x] = s.resolvedLevels[i]
- }
-}
-
-// Return the limit of the run consisting only of the types in validSet
-// starting at index. This checks the value at index, and will return
-// index if that value is not in validSet.
-func (s *isolatingRunSequence) findRunLimit(index int, validSet ...Class) int {
-loop:
- for ; index < len(s.types); index++ {
- t := s.types[index]
- for _, valid := range validSet {
- if t == valid {
- continue loop
- }
- }
- return index // didn't find a match in validSet
- }
- return len(s.types)
-}
-
-// Algorithm validation. Assert that all values in types are in the
-// provided set.
-func (s *isolatingRunSequence) assertOnly(codes ...Class) {
-loop:
- for i, t := range s.types {
- for _, c := range codes {
- if t == c {
- continue loop
- }
- }
- log.Panicf("invalid bidi code %v present in assertOnly at position %d", t, s.indexes[i])
- }
-}
-
-// determineLevelRuns returns an array of level runs. Each level run is
-// described as an array of indexes into the input string.
-//
-// Determines the level runs. Rule X9 will be applied in determining the
-// runs, in the way that makes sure the characters that are supposed to be
-// removed are not included in the runs.
-func (p *paragraph) determineLevelRuns() [][]int {
- run := []int{}
- allRuns := [][]int{}
- currentLevel := implicitLevel
-
- for i := range p.initialTypes {
- if !isRemovedByX9(p.initialTypes[i]) {
- if p.resultLevels[i] != currentLevel {
- // we just encountered a new run; wrap up last run
- if currentLevel >= 0 { // only wrap it up if there was a run
- allRuns = append(allRuns, run)
- run = nil
- }
- // Start new run
- currentLevel = p.resultLevels[i]
- }
- run = append(run, i)
- }
- }
- // Wrap up the final run, if any
- if len(run) > 0 {
- allRuns = append(allRuns, run)
- }
- return allRuns
-}
-
-// Definition BD13. Determine isolating run sequences.
-func (p *paragraph) determineIsolatingRunSequences() []*isolatingRunSequence {
- levelRuns := p.determineLevelRuns()
-
- // Compute the run that each character belongs to
- runForCharacter := make([]int, p.Len())
- for i, run := range levelRuns {
- for _, index := range run {
- runForCharacter[index] = i
- }
- }
-
- sequences := []*isolatingRunSequence{}
-
- var currentRunSequence []int
-
- for _, run := range levelRuns {
- first := run[0]
- if p.initialTypes[first] != PDI || p.matchingIsolateInitiator[first] == -1 {
- currentRunSequence = nil
- // int run = i;
- for {
- // Copy this level run into currentRunSequence
- currentRunSequence = append(currentRunSequence, run...)
-
- last := currentRunSequence[len(currentRunSequence)-1]
- lastT := p.initialTypes[last]
- if lastT.in(LRI, RLI, FSI) && p.matchingPDI[last] != p.Len() {
- run = levelRuns[runForCharacter[p.matchingPDI[last]]]
- } else {
- break
- }
- }
- sequences = append(sequences, p.isolatingRunSequence(currentRunSequence))
- }
- }
- return sequences
-}
-
-// Assign level information to characters removed by rule X9. This is for
-// ease of relating the level information to the original input data. Note
-// that the levels assigned to these codes are arbitrary, they're chosen so
-// as to avoid breaking level runs.
-func (p *paragraph) assignLevelsToCharactersRemovedByX9() {
- for i, t := range p.initialTypes {
- if t.in(LRE, RLE, LRO, RLO, PDF, BN) {
- p.resultTypes[i] = t
- p.resultLevels[i] = -1
- }
- }
- // now propagate forward the levels information (could have
- // propagated backward, the main thing is not to introduce a level
- // break where one doesn't already exist).
-
- if p.resultLevels[0] == -1 {
- p.resultLevels[0] = p.embeddingLevel
- }
- for i := 1; i < len(p.initialTypes); i++ {
- if p.resultLevels[i] == -1 {
- p.resultLevels[i] = p.resultLevels[i-1]
- }
- }
- // Embedding information is for informational purposes only so need not be
- // adjusted.
-}
-
-//
-// Output
-//
-
-// getLevels computes levels array breaking lines at offsets in linebreaks.
-// Rule L1.
-//
-// The linebreaks array must include at least one value. The values must be
-// in strictly increasing order (no duplicates) between 1 and the length of
-// the text, inclusive. The last value must be the length of the text.
-func (p *paragraph) getLevels(linebreaks []int) []level {
- // Note that since the previous processing has removed all
- // P, S, and WS values from resultTypes, the values referred to
- // in these rules are the initial types, before any processing
- // has been applied (including processing of overrides).
- //
- // This example implementation has reinserted explicit format codes
- // and BN, in order that the levels array correspond to the
- // initial text. Their final placement is not normative.
- // These codes are treated like WS in this implementation,
- // so they don't interrupt sequences of WS.
-
- validateLineBreaks(linebreaks, p.Len())
-
- result := append([]level(nil), p.resultLevels...)
-
- // don't worry about linebreaks since if there is a break within
- // a series of WS values preceding S, the linebreak itself
- // causes the reset.
- for i, t := range p.initialTypes {
- if t.in(B, S) {
- // Rule L1, clauses one and two.
- result[i] = p.embeddingLevel
-
- // Rule L1, clause three.
- for j := i - 1; j >= 0; j-- {
- if isWhitespace(p.initialTypes[j]) { // including format codes
- result[j] = p.embeddingLevel
- } else {
- break
- }
- }
- }
- }
-
- // Rule L1, clause four.
- start := 0
- for _, limit := range linebreaks {
- for j := limit - 1; j >= start; j-- {
- if isWhitespace(p.initialTypes[j]) { // including format codes
- result[j] = p.embeddingLevel
- } else {
- break
- }
- }
- start = limit
- }
-
- return result
-}
-
-// getReordering returns the reordering of lines from a visual index to a
-// logical index for line breaks at the given offsets.
-//
-// Lines are concatenated from left to right. So for example, the fifth
-// character from the left on the third line is
-//
-// getReordering(linebreaks)[linebreaks[1] + 4]
-//
-// (linebreaks[1] is the position after the last character of the second
-// line, which is also the index of the first character on the third line,
-// and adding four gets the fifth character from the left).
-//
-// The linebreaks array must include at least one value. The values must be
-// in strictly increasing order (no duplicates) between 1 and the length of
-// the text, inclusive. The last value must be the length of the text.
-func (p *paragraph) getReordering(linebreaks []int) []int {
- validateLineBreaks(linebreaks, p.Len())
-
- return computeMultilineReordering(p.getLevels(linebreaks), linebreaks)
-}
-
-// Return multiline reordering array for a given level array. Reordering
-// does not occur across a line break.
-func computeMultilineReordering(levels []level, linebreaks []int) []int {
- result := make([]int, len(levels))
-
- start := 0
- for _, limit := range linebreaks {
- tempLevels := make([]level, limit-start)
- copy(tempLevels, levels[start:])
-
- for j, order := range computeReordering(tempLevels) {
- result[start+j] = order + start
- }
- start = limit
- }
- return result
-}
-
-// Return reordering array for a given level array. This reorders a single
-// line. The reordering is a visual to logical map. For example, the
-// leftmost char is string.charAt(order[0]). Rule L2.
-func computeReordering(levels []level) []int {
- result := make([]int, len(levels))
- // initialize order
- for i := range result {
- result[i] = i
- }
-
- // locate highest level found on line.
- // Note the rules say text, but no reordering across line bounds is
- // performed, so this is sufficient.
- highestLevel := level(0)
- lowestOddLevel := level(maxDepth + 2)
- for _, level := range levels {
- if level > highestLevel {
- highestLevel = level
- }
- if level&1 != 0 && level < lowestOddLevel {
- lowestOddLevel = level
- }
- }
-
- for level := highestLevel; level >= lowestOddLevel; level-- {
- for i := 0; i < len(levels); i++ {
- if levels[i] >= level {
- // find range of text at or above this level
- start := i
- limit := i + 1
- for limit < len(levels) && levels[limit] >= level {
- limit++
- }
-
- for j, k := start, limit-1; j < k; j, k = j+1, k-1 {
- result[j], result[k] = result[k], result[j]
- }
- // skip to end of level run
- i = limit
- }
- }
- }
-
- return result
-}
-
-// isWhitespace reports whether the type is considered a whitespace type for the
-// line break rules.
-func isWhitespace(c Class) bool {
- switch c {
- case LRE, RLE, LRO, RLO, PDF, LRI, RLI, FSI, PDI, BN, WS:
- return true
- }
- return false
-}
-
-// isRemovedByX9 reports whether the type is one of the types removed in X9.
-func isRemovedByX9(c Class) bool {
- switch c {
- case LRE, RLE, LRO, RLO, PDF, BN:
- return true
- }
- return false
-}
-
-// typeForLevel reports the strong type (L or R) corresponding to the level.
-func typeForLevel(level level) Class {
- if (level & 0x1) == 0 {
- return L
- }
- return R
-}
-
-// TODO: change validation to not panic
-
-func validateTypes(types []Class) {
- if len(types) == 0 {
- log.Panic("types is null")
- }
- for i, t := range types[:len(types)-1] {
- if t == B {
- log.Panicf("B type before end of paragraph at index: %d", i)
- }
- }
-}
-
-func validateParagraphEmbeddingLevel(embeddingLevel level) {
- if embeddingLevel != implicitLevel &&
- embeddingLevel != 0 &&
- embeddingLevel != 1 {
- log.Panicf("illegal paragraph embedding level: %d", embeddingLevel)
- }
-}
-
-func validateLineBreaks(linebreaks []int, textLength int) {
- prev := 0
- for i, next := range linebreaks {
- if next <= prev {
- log.Panicf("bad linebreak: %d at index: %d", next, i)
- }
- prev = next
- }
- if prev != textLength {
- log.Panicf("last linebreak was %d, want %d", prev, textLength)
- }
-}
-
-func validatePbTypes(pairTypes []bracketType) {
- if len(pairTypes) == 0 {
- log.Panic("pairTypes is null")
- }
- for i, pt := range pairTypes {
- switch pt {
- case bpNone, bpOpen, bpClose:
- default:
- log.Panicf("illegal pairType value at %d: %v", i, pairTypes[i])
- }
- }
-}
-
-func validatePbValues(pairValues []rune, pairTypes []bracketType) {
- if pairValues == nil {
- log.Panic("pairValues is null")
- }
- if len(pairTypes) != len(pairValues) {
- log.Panic("pairTypes is different length from pairValues")
- }
-}