From 8c12c6939aab9106db14ec2d11d983bc5b29fb2c Mon Sep 17 00:00:00 2001 From: Niall Sheridan Date: Sun, 7 Jul 2019 21:33:44 +0100 Subject: Switch to modules --- vendor/golang.org/x/text/unicode/bidi/core.go | 1058 ------------------------- 1 file changed, 1058 deletions(-) delete mode 100644 vendor/golang.org/x/text/unicode/bidi/core.go (limited to 'vendor/golang.org/x/text/unicode/bidi/core.go') diff --git a/vendor/golang.org/x/text/unicode/bidi/core.go b/vendor/golang.org/x/text/unicode/bidi/core.go deleted file mode 100644 index 48d1440..0000000 --- a/vendor/golang.org/x/text/unicode/bidi/core.go +++ /dev/null @@ -1,1058 +0,0 @@ -// Copyright 2015 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package bidi - -import "log" - -// This implementation is a port based on the reference implementation found at: -// https://www.unicode.org/Public/PROGRAMS/BidiReferenceJava/ -// -// described in Unicode Bidirectional Algorithm (UAX #9). -// -// Input: -// There are two levels of input to the algorithm, since clients may prefer to -// supply some information from out-of-band sources rather than relying on the -// default behavior. -// -// - Bidi class array -// - Bidi class array, with externally supplied base line direction -// -// Output: -// Output is separated into several stages: -// -// - levels array over entire paragraph -// - reordering array over entire paragraph -// - levels array over line -// - reordering array over line -// -// Note that for conformance to the Unicode Bidirectional Algorithm, -// implementations are only required to generate correct reordering and -// character directionality (odd or even levels) over a line. Generating -// identical level arrays over a line is not required. Bidi explicit format -// codes (LRE, RLE, LRO, RLO, PDF) and BN can be assigned arbitrary levels and -// positions as long as the rest of the input is properly reordered. -// -// As the algorithm is defined to operate on a single paragraph at a time, this -// implementation is written to handle single paragraphs. Thus rule P1 is -// presumed by this implementation-- the data provided to the implementation is -// assumed to be a single paragraph, and either contains no 'B' codes, or a -// single 'B' code at the end of the input. 'B' is allowed as input to -// illustrate how the algorithm assigns it a level. -// -// Also note that rules L3 and L4 depend on the rendering engine that uses the -// result of the bidi algorithm. This implementation assumes that the rendering -// engine expects combining marks in visual order (e.g. to the left of their -// base character in RTL runs) and that it adjusts the glyphs used to render -// mirrored characters that are in RTL runs so that they render appropriately. - -// level is the embedding level of a character. Even embedding levels indicate -// left-to-right order and odd levels indicate right-to-left order. The special -// level of -1 is reserved for undefined order. -type level int8 - -const implicitLevel level = -1 - -// in returns if x is equal to any of the values in set. -func (c Class) in(set ...Class) bool { - for _, s := range set { - if c == s { - return true - } - } - return false -} - -// A paragraph contains the state of a paragraph. -type paragraph struct { - initialTypes []Class - - // Arrays of properties needed for paired bracket evaluation in N0 - pairTypes []bracketType // paired Bracket types for paragraph - pairValues []rune // rune for opening bracket or pbOpen and pbClose; 0 for pbNone - - embeddingLevel level // default: = implicitLevel; - - // at the paragraph levels - resultTypes []Class - resultLevels []level - - // Index of matching PDI for isolate initiator characters. For other - // characters, the value of matchingPDI will be set to -1. For isolate - // initiators with no matching PDI, matchingPDI will be set to the length of - // the input string. - matchingPDI []int - - // Index of matching isolate initiator for PDI characters. For other - // characters, and for PDIs with no matching isolate initiator, the value of - // matchingIsolateInitiator will be set to -1. - matchingIsolateInitiator []int -} - -// newParagraph initializes a paragraph. The user needs to supply a few arrays -// corresponding to the preprocessed text input. The types correspond to the -// Unicode BiDi classes for each rune. pairTypes indicates the bracket type for -// each rune. pairValues provides a unique bracket class identifier for each -// rune (suggested is the rune of the open bracket for opening and matching -// close brackets, after normalization). The embedding levels are optional, but -// may be supplied to encode embedding levels of styled text. -// -// TODO: return an error. -func newParagraph(types []Class, pairTypes []bracketType, pairValues []rune, levels level) *paragraph { - validateTypes(types) - validatePbTypes(pairTypes) - validatePbValues(pairValues, pairTypes) - validateParagraphEmbeddingLevel(levels) - - p := ¶graph{ - initialTypes: append([]Class(nil), types...), - embeddingLevel: levels, - - pairTypes: pairTypes, - pairValues: pairValues, - - resultTypes: append([]Class(nil), types...), - } - p.run() - return p -} - -func (p *paragraph) Len() int { return len(p.initialTypes) } - -// The algorithm. Does not include line-based processing (Rules L1, L2). -// These are applied later in the line-based phase of the algorithm. -func (p *paragraph) run() { - p.determineMatchingIsolates() - - // 1) determining the paragraph level - // Rule P1 is the requirement for entering this algorithm. - // Rules P2, P3. - // If no externally supplied paragraph embedding level, use default. - if p.embeddingLevel == implicitLevel { - p.embeddingLevel = p.determineParagraphEmbeddingLevel(0, p.Len()) - } - - // Initialize result levels to paragraph embedding level. - p.resultLevels = make([]level, p.Len()) - setLevels(p.resultLevels, p.embeddingLevel) - - // 2) Explicit levels and directions - // Rules X1-X8. - p.determineExplicitEmbeddingLevels() - - // Rule X9. - // We do not remove the embeddings, the overrides, the PDFs, and the BNs - // from the string explicitly. But they are not copied into isolating run - // sequences when they are created, so they are removed for all - // practical purposes. - - // Rule X10. - // Run remainder of algorithm one isolating run sequence at a time - for _, seq := range p.determineIsolatingRunSequences() { - // 3) resolving weak types - // Rules W1-W7. - seq.resolveWeakTypes() - - // 4a) resolving paired brackets - // Rule N0 - resolvePairedBrackets(seq) - - // 4b) resolving neutral types - // Rules N1-N3. - seq.resolveNeutralTypes() - - // 5) resolving implicit embedding levels - // Rules I1, I2. - seq.resolveImplicitLevels() - - // Apply the computed levels and types - seq.applyLevelsAndTypes() - } - - // Assign appropriate levels to 'hide' LREs, RLEs, LROs, RLOs, PDFs, and - // BNs. This is for convenience, so the resulting level array will have - // a value for every character. - p.assignLevelsToCharactersRemovedByX9() -} - -// determineMatchingIsolates determines the matching PDI for each isolate -// initiator and vice versa. -// -// Definition BD9. -// -// At the end of this function: -// -// - The member variable matchingPDI is set to point to the index of the -// matching PDI character for each isolate initiator character. If there is -// no matching PDI, it is set to the length of the input text. For other -// characters, it is set to -1. -// - The member variable matchingIsolateInitiator is set to point to the -// index of the matching isolate initiator character for each PDI character. -// If there is no matching isolate initiator, or the character is not a PDI, -// it is set to -1. -func (p *paragraph) determineMatchingIsolates() { - p.matchingPDI = make([]int, p.Len()) - p.matchingIsolateInitiator = make([]int, p.Len()) - - for i := range p.matchingIsolateInitiator { - p.matchingIsolateInitiator[i] = -1 - } - - for i := range p.matchingPDI { - p.matchingPDI[i] = -1 - - if t := p.resultTypes[i]; t.in(LRI, RLI, FSI) { - depthCounter := 1 - for j := i + 1; j < p.Len(); j++ { - if u := p.resultTypes[j]; u.in(LRI, RLI, FSI) { - depthCounter++ - } else if u == PDI { - if depthCounter--; depthCounter == 0 { - p.matchingPDI[i] = j - p.matchingIsolateInitiator[j] = i - break - } - } - } - if p.matchingPDI[i] == -1 { - p.matchingPDI[i] = p.Len() - } - } - } -} - -// determineParagraphEmbeddingLevel reports the resolved paragraph direction of -// the substring limited by the given range [start, end). -// -// Determines the paragraph level based on rules P2, P3. This is also used -// in rule X5c to find if an FSI should resolve to LRI or RLI. -func (p *paragraph) determineParagraphEmbeddingLevel(start, end int) level { - var strongType Class = unknownClass - - // Rule P2. - for i := start; i < end; i++ { - if t := p.resultTypes[i]; t.in(L, AL, R) { - strongType = t - break - } else if t.in(FSI, LRI, RLI) { - i = p.matchingPDI[i] // skip over to the matching PDI - if i > end { - log.Panic("assert (i <= end)") - } - } - } - // Rule P3. - switch strongType { - case unknownClass: // none found - // default embedding level when no strong types found is 0. - return 0 - case L: - return 0 - default: // AL, R - return 1 - } -} - -const maxDepth = 125 - -// This stack will store the embedding levels and override and isolated -// statuses -type directionalStatusStack struct { - stackCounter int - embeddingLevelStack [maxDepth + 1]level - overrideStatusStack [maxDepth + 1]Class - isolateStatusStack [maxDepth + 1]bool -} - -func (s *directionalStatusStack) empty() { s.stackCounter = 0 } -func (s *directionalStatusStack) pop() { s.stackCounter-- } -func (s *directionalStatusStack) depth() int { return s.stackCounter } - -func (s *directionalStatusStack) push(level level, overrideStatus Class, isolateStatus bool) { - s.embeddingLevelStack[s.stackCounter] = level - s.overrideStatusStack[s.stackCounter] = overrideStatus - s.isolateStatusStack[s.stackCounter] = isolateStatus - s.stackCounter++ -} - -func (s *directionalStatusStack) lastEmbeddingLevel() level { - return s.embeddingLevelStack[s.stackCounter-1] -} - -func (s *directionalStatusStack) lastDirectionalOverrideStatus() Class { - return s.overrideStatusStack[s.stackCounter-1] -} - -func (s *directionalStatusStack) lastDirectionalIsolateStatus() bool { - return s.isolateStatusStack[s.stackCounter-1] -} - -// Determine explicit levels using rules X1 - X8 -func (p *paragraph) determineExplicitEmbeddingLevels() { - var stack directionalStatusStack - var overflowIsolateCount, overflowEmbeddingCount, validIsolateCount int - - // Rule X1. - stack.push(p.embeddingLevel, ON, false) - - for i, t := range p.resultTypes { - // Rules X2, X3, X4, X5, X5a, X5b, X5c - switch t { - case RLE, LRE, RLO, LRO, RLI, LRI, FSI: - isIsolate := t.in(RLI, LRI, FSI) - isRTL := t.in(RLE, RLO, RLI) - - // override if this is an FSI that resolves to RLI - if t == FSI { - isRTL = (p.determineParagraphEmbeddingLevel(i+1, p.matchingPDI[i]) == 1) - } - if isIsolate { - p.resultLevels[i] = stack.lastEmbeddingLevel() - if stack.lastDirectionalOverrideStatus() != ON { - p.resultTypes[i] = stack.lastDirectionalOverrideStatus() - } - } - - var newLevel level - if isRTL { - // least greater odd - newLevel = (stack.lastEmbeddingLevel() + 1) | 1 - } else { - // least greater even - newLevel = (stack.lastEmbeddingLevel() + 2) &^ 1 - } - - if newLevel <= maxDepth && overflowIsolateCount == 0 && overflowEmbeddingCount == 0 { - if isIsolate { - validIsolateCount++ - } - // Push new embedding level, override status, and isolated - // status. - // No check for valid stack counter, since the level check - // suffices. - switch t { - case LRO: - stack.push(newLevel, L, isIsolate) - case RLO: - stack.push(newLevel, R, isIsolate) - default: - stack.push(newLevel, ON, isIsolate) - } - // Not really part of the spec - if !isIsolate { - p.resultLevels[i] = newLevel - } - } else { - // This is an invalid explicit formatting character, - // so apply the "Otherwise" part of rules X2-X5b. - if isIsolate { - overflowIsolateCount++ - } else { // !isIsolate - if overflowIsolateCount == 0 { - overflowEmbeddingCount++ - } - } - } - - // Rule X6a - case PDI: - if overflowIsolateCount > 0 { - overflowIsolateCount-- - } else if validIsolateCount == 0 { - // do nothing - } else { - overflowEmbeddingCount = 0 - for !stack.lastDirectionalIsolateStatus() { - stack.pop() - } - stack.pop() - validIsolateCount-- - } - p.resultLevels[i] = stack.lastEmbeddingLevel() - - // Rule X7 - case PDF: - // Not really part of the spec - p.resultLevels[i] = stack.lastEmbeddingLevel() - - if overflowIsolateCount > 0 { - // do nothing - } else if overflowEmbeddingCount > 0 { - overflowEmbeddingCount-- - } else if !stack.lastDirectionalIsolateStatus() && stack.depth() >= 2 { - stack.pop() - } - - case B: // paragraph separator. - // Rule X8. - - // These values are reset for clarity, in this implementation B - // can only occur as the last code in the array. - stack.empty() - overflowIsolateCount = 0 - overflowEmbeddingCount = 0 - validIsolateCount = 0 - p.resultLevels[i] = p.embeddingLevel - - default: - p.resultLevels[i] = stack.lastEmbeddingLevel() - if stack.lastDirectionalOverrideStatus() != ON { - p.resultTypes[i] = stack.lastDirectionalOverrideStatus() - } - } - } -} - -type isolatingRunSequence struct { - p *paragraph - - indexes []int // indexes to the original string - - types []Class // type of each character using the index - resolvedLevels []level // resolved levels after application of rules - level level - sos, eos Class -} - -func (i *isolatingRunSequence) Len() int { return len(i.indexes) } - -func maxLevel(a, b level) level { - if a > b { - return a - } - return b -} - -// Rule X10, second bullet: Determine the start-of-sequence (sos) and end-of-sequence (eos) types, -// either L or R, for each isolating run sequence. -func (p *paragraph) isolatingRunSequence(indexes []int) *isolatingRunSequence { - length := len(indexes) - types := make([]Class, length) - for i, x := range indexes { - types[i] = p.resultTypes[x] - } - - // assign level, sos and eos - prevChar := indexes[0] - 1 - for prevChar >= 0 && isRemovedByX9(p.initialTypes[prevChar]) { - prevChar-- - } - prevLevel := p.embeddingLevel - if prevChar >= 0 { - prevLevel = p.resultLevels[prevChar] - } - - var succLevel level - lastType := types[length-1] - if lastType.in(LRI, RLI, FSI) { - succLevel = p.embeddingLevel - } else { - // the first character after the end of run sequence - limit := indexes[length-1] + 1 - for ; limit < p.Len() && isRemovedByX9(p.initialTypes[limit]); limit++ { - - } - succLevel = p.embeddingLevel - if limit < p.Len() { - succLevel = p.resultLevels[limit] - } - } - level := p.resultLevels[indexes[0]] - return &isolatingRunSequence{ - p: p, - indexes: indexes, - types: types, - level: level, - sos: typeForLevel(maxLevel(prevLevel, level)), - eos: typeForLevel(maxLevel(succLevel, level)), - } -} - -// Resolving weak types Rules W1-W7. -// -// Note that some weak types (EN, AN) remain after this processing is -// complete. -func (s *isolatingRunSequence) resolveWeakTypes() { - - // on entry, only these types remain - s.assertOnly(L, R, AL, EN, ES, ET, AN, CS, B, S, WS, ON, NSM, LRI, RLI, FSI, PDI) - - // Rule W1. - // Changes all NSMs. - preceedingCharacterType := s.sos - for i, t := range s.types { - if t == NSM { - s.types[i] = preceedingCharacterType - } else { - if t.in(LRI, RLI, FSI, PDI) { - preceedingCharacterType = ON - } - preceedingCharacterType = t - } - } - - // Rule W2. - // EN does not change at the start of the run, because sos != AL. - for i, t := range s.types { - if t == EN { - for j := i - 1; j >= 0; j-- { - if t := s.types[j]; t.in(L, R, AL) { - if t == AL { - s.types[i] = AN - } - break - } - } - } - } - - // Rule W3. - for i, t := range s.types { - if t == AL { - s.types[i] = R - } - } - - // Rule W4. - // Since there must be values on both sides for this rule to have an - // effect, the scan skips the first and last value. - // - // Although the scan proceeds left to right, and changes the type - // values in a way that would appear to affect the computations - // later in the scan, there is actually no problem. A change in the - // current value can only affect the value to its immediate right, - // and only affect it if it is ES or CS. But the current value can - // only change if the value to its right is not ES or CS. Thus - // either the current value will not change, or its change will have - // no effect on the remainder of the analysis. - - for i := 1; i < s.Len()-1; i++ { - t := s.types[i] - if t == ES || t == CS { - prevSepType := s.types[i-1] - succSepType := s.types[i+1] - if prevSepType == EN && succSepType == EN { - s.types[i] = EN - } else if s.types[i] == CS && prevSepType == AN && succSepType == AN { - s.types[i] = AN - } - } - } - - // Rule W5. - for i, t := range s.types { - if t == ET { - // locate end of sequence - runStart := i - runEnd := s.findRunLimit(runStart, ET) - - // check values at ends of sequence - t := s.sos - if runStart > 0 { - t = s.types[runStart-1] - } - if t != EN { - t = s.eos - if runEnd < len(s.types) { - t = s.types[runEnd] - } - } - if t == EN { - setTypes(s.types[runStart:runEnd], EN) - } - // continue at end of sequence - i = runEnd - } - } - - // Rule W6. - for i, t := range s.types { - if t.in(ES, ET, CS) { - s.types[i] = ON - } - } - - // Rule W7. - for i, t := range s.types { - if t == EN { - // set default if we reach start of run - prevStrongType := s.sos - for j := i - 1; j >= 0; j-- { - t = s.types[j] - if t == L || t == R { // AL's have been changed to R - prevStrongType = t - break - } - } - if prevStrongType == L { - s.types[i] = L - } - } - } -} - -// 6) resolving neutral types Rules N1-N2. -func (s *isolatingRunSequence) resolveNeutralTypes() { - - // on entry, only these types can be in resultTypes - s.assertOnly(L, R, EN, AN, B, S, WS, ON, RLI, LRI, FSI, PDI) - - for i, t := range s.types { - switch t { - case WS, ON, B, S, RLI, LRI, FSI, PDI: - // find bounds of run of neutrals - runStart := i - runEnd := s.findRunLimit(runStart, B, S, WS, ON, RLI, LRI, FSI, PDI) - - // determine effective types at ends of run - var leadType, trailType Class - - // Note that the character found can only be L, R, AN, or - // EN. - if runStart == 0 { - leadType = s.sos - } else { - leadType = s.types[runStart-1] - if leadType.in(AN, EN) { - leadType = R - } - } - if runEnd == len(s.types) { - trailType = s.eos - } else { - trailType = s.types[runEnd] - if trailType.in(AN, EN) { - trailType = R - } - } - - var resolvedType Class - if leadType == trailType { - // Rule N1. - resolvedType = leadType - } else { - // Rule N2. - // Notice the embedding level of the run is used, not - // the paragraph embedding level. - resolvedType = typeForLevel(s.level) - } - - setTypes(s.types[runStart:runEnd], resolvedType) - - // skip over run of (former) neutrals - i = runEnd - } - } -} - -func setLevels(levels []level, newLevel level) { - for i := range levels { - levels[i] = newLevel - } -} - -func setTypes(types []Class, newType Class) { - for i := range types { - types[i] = newType - } -} - -// 7) resolving implicit embedding levels Rules I1, I2. -func (s *isolatingRunSequence) resolveImplicitLevels() { - - // on entry, only these types can be in resultTypes - s.assertOnly(L, R, EN, AN) - - s.resolvedLevels = make([]level, len(s.types)) - setLevels(s.resolvedLevels, s.level) - - if (s.level & 1) == 0 { // even level - for i, t := range s.types { - // Rule I1. - if t == L { - // no change - } else if t == R { - s.resolvedLevels[i] += 1 - } else { // t == AN || t == EN - s.resolvedLevels[i] += 2 - } - } - } else { // odd level - for i, t := range s.types { - // Rule I2. - if t == R { - // no change - } else { // t == L || t == AN || t == EN - s.resolvedLevels[i] += 1 - } - } - } -} - -// Applies the levels and types resolved in rules W1-I2 to the -// resultLevels array. -func (s *isolatingRunSequence) applyLevelsAndTypes() { - for i, x := range s.indexes { - s.p.resultTypes[x] = s.types[i] - s.p.resultLevels[x] = s.resolvedLevels[i] - } -} - -// Return the limit of the run consisting only of the types in validSet -// starting at index. This checks the value at index, and will return -// index if that value is not in validSet. -func (s *isolatingRunSequence) findRunLimit(index int, validSet ...Class) int { -loop: - for ; index < len(s.types); index++ { - t := s.types[index] - for _, valid := range validSet { - if t == valid { - continue loop - } - } - return index // didn't find a match in validSet - } - return len(s.types) -} - -// Algorithm validation. Assert that all values in types are in the -// provided set. -func (s *isolatingRunSequence) assertOnly(codes ...Class) { -loop: - for i, t := range s.types { - for _, c := range codes { - if t == c { - continue loop - } - } - log.Panicf("invalid bidi code %v present in assertOnly at position %d", t, s.indexes[i]) - } -} - -// determineLevelRuns returns an array of level runs. Each level run is -// described as an array of indexes into the input string. -// -// Determines the level runs. Rule X9 will be applied in determining the -// runs, in the way that makes sure the characters that are supposed to be -// removed are not included in the runs. -func (p *paragraph) determineLevelRuns() [][]int { - run := []int{} - allRuns := [][]int{} - currentLevel := implicitLevel - - for i := range p.initialTypes { - if !isRemovedByX9(p.initialTypes[i]) { - if p.resultLevels[i] != currentLevel { - // we just encountered a new run; wrap up last run - if currentLevel >= 0 { // only wrap it up if there was a run - allRuns = append(allRuns, run) - run = nil - } - // Start new run - currentLevel = p.resultLevels[i] - } - run = append(run, i) - } - } - // Wrap up the final run, if any - if len(run) > 0 { - allRuns = append(allRuns, run) - } - return allRuns -} - -// Definition BD13. Determine isolating run sequences. -func (p *paragraph) determineIsolatingRunSequences() []*isolatingRunSequence { - levelRuns := p.determineLevelRuns() - - // Compute the run that each character belongs to - runForCharacter := make([]int, p.Len()) - for i, run := range levelRuns { - for _, index := range run { - runForCharacter[index] = i - } - } - - sequences := []*isolatingRunSequence{} - - var currentRunSequence []int - - for _, run := range levelRuns { - first := run[0] - if p.initialTypes[first] != PDI || p.matchingIsolateInitiator[first] == -1 { - currentRunSequence = nil - // int run = i; - for { - // Copy this level run into currentRunSequence - currentRunSequence = append(currentRunSequence, run...) - - last := currentRunSequence[len(currentRunSequence)-1] - lastT := p.initialTypes[last] - if lastT.in(LRI, RLI, FSI) && p.matchingPDI[last] != p.Len() { - run = levelRuns[runForCharacter[p.matchingPDI[last]]] - } else { - break - } - } - sequences = append(sequences, p.isolatingRunSequence(currentRunSequence)) - } - } - return sequences -} - -// Assign level information to characters removed by rule X9. This is for -// ease of relating the level information to the original input data. Note -// that the levels assigned to these codes are arbitrary, they're chosen so -// as to avoid breaking level runs. -func (p *paragraph) assignLevelsToCharactersRemovedByX9() { - for i, t := range p.initialTypes { - if t.in(LRE, RLE, LRO, RLO, PDF, BN) { - p.resultTypes[i] = t - p.resultLevels[i] = -1 - } - } - // now propagate forward the levels information (could have - // propagated backward, the main thing is not to introduce a level - // break where one doesn't already exist). - - if p.resultLevels[0] == -1 { - p.resultLevels[0] = p.embeddingLevel - } - for i := 1; i < len(p.initialTypes); i++ { - if p.resultLevels[i] == -1 { - p.resultLevels[i] = p.resultLevels[i-1] - } - } - // Embedding information is for informational purposes only so need not be - // adjusted. -} - -// -// Output -// - -// getLevels computes levels array breaking lines at offsets in linebreaks. -// Rule L1. -// -// The linebreaks array must include at least one value. The values must be -// in strictly increasing order (no duplicates) between 1 and the length of -// the text, inclusive. The last value must be the length of the text. -func (p *paragraph) getLevels(linebreaks []int) []level { - // Note that since the previous processing has removed all - // P, S, and WS values from resultTypes, the values referred to - // in these rules are the initial types, before any processing - // has been applied (including processing of overrides). - // - // This example implementation has reinserted explicit format codes - // and BN, in order that the levels array correspond to the - // initial text. Their final placement is not normative. - // These codes are treated like WS in this implementation, - // so they don't interrupt sequences of WS. - - validateLineBreaks(linebreaks, p.Len()) - - result := append([]level(nil), p.resultLevels...) - - // don't worry about linebreaks since if there is a break within - // a series of WS values preceding S, the linebreak itself - // causes the reset. - for i, t := range p.initialTypes { - if t.in(B, S) { - // Rule L1, clauses one and two. - result[i] = p.embeddingLevel - - // Rule L1, clause three. - for j := i - 1; j >= 0; j-- { - if isWhitespace(p.initialTypes[j]) { // including format codes - result[j] = p.embeddingLevel - } else { - break - } - } - } - } - - // Rule L1, clause four. - start := 0 - for _, limit := range linebreaks { - for j := limit - 1; j >= start; j-- { - if isWhitespace(p.initialTypes[j]) { // including format codes - result[j] = p.embeddingLevel - } else { - break - } - } - start = limit - } - - return result -} - -// getReordering returns the reordering of lines from a visual index to a -// logical index for line breaks at the given offsets. -// -// Lines are concatenated from left to right. So for example, the fifth -// character from the left on the third line is -// -// getReordering(linebreaks)[linebreaks[1] + 4] -// -// (linebreaks[1] is the position after the last character of the second -// line, which is also the index of the first character on the third line, -// and adding four gets the fifth character from the left). -// -// The linebreaks array must include at least one value. The values must be -// in strictly increasing order (no duplicates) between 1 and the length of -// the text, inclusive. The last value must be the length of the text. -func (p *paragraph) getReordering(linebreaks []int) []int { - validateLineBreaks(linebreaks, p.Len()) - - return computeMultilineReordering(p.getLevels(linebreaks), linebreaks) -} - -// Return multiline reordering array for a given level array. Reordering -// does not occur across a line break. -func computeMultilineReordering(levels []level, linebreaks []int) []int { - result := make([]int, len(levels)) - - start := 0 - for _, limit := range linebreaks { - tempLevels := make([]level, limit-start) - copy(tempLevels, levels[start:]) - - for j, order := range computeReordering(tempLevels) { - result[start+j] = order + start - } - start = limit - } - return result -} - -// Return reordering array for a given level array. This reorders a single -// line. The reordering is a visual to logical map. For example, the -// leftmost char is string.charAt(order[0]). Rule L2. -func computeReordering(levels []level) []int { - result := make([]int, len(levels)) - // initialize order - for i := range result { - result[i] = i - } - - // locate highest level found on line. - // Note the rules say text, but no reordering across line bounds is - // performed, so this is sufficient. - highestLevel := level(0) - lowestOddLevel := level(maxDepth + 2) - for _, level := range levels { - if level > highestLevel { - highestLevel = level - } - if level&1 != 0 && level < lowestOddLevel { - lowestOddLevel = level - } - } - - for level := highestLevel; level >= lowestOddLevel; level-- { - for i := 0; i < len(levels); i++ { - if levels[i] >= level { - // find range of text at or above this level - start := i - limit := i + 1 - for limit < len(levels) && levels[limit] >= level { - limit++ - } - - for j, k := start, limit-1; j < k; j, k = j+1, k-1 { - result[j], result[k] = result[k], result[j] - } - // skip to end of level run - i = limit - } - } - } - - return result -} - -// isWhitespace reports whether the type is considered a whitespace type for the -// line break rules. -func isWhitespace(c Class) bool { - switch c { - case LRE, RLE, LRO, RLO, PDF, LRI, RLI, FSI, PDI, BN, WS: - return true - } - return false -} - -// isRemovedByX9 reports whether the type is one of the types removed in X9. -func isRemovedByX9(c Class) bool { - switch c { - case LRE, RLE, LRO, RLO, PDF, BN: - return true - } - return false -} - -// typeForLevel reports the strong type (L or R) corresponding to the level. -func typeForLevel(level level) Class { - if (level & 0x1) == 0 { - return L - } - return R -} - -// TODO: change validation to not panic - -func validateTypes(types []Class) { - if len(types) == 0 { - log.Panic("types is null") - } - for i, t := range types[:len(types)-1] { - if t == B { - log.Panicf("B type before end of paragraph at index: %d", i) - } - } -} - -func validateParagraphEmbeddingLevel(embeddingLevel level) { - if embeddingLevel != implicitLevel && - embeddingLevel != 0 && - embeddingLevel != 1 { - log.Panicf("illegal paragraph embedding level: %d", embeddingLevel) - } -} - -func validateLineBreaks(linebreaks []int, textLength int) { - prev := 0 - for i, next := range linebreaks { - if next <= prev { - log.Panicf("bad linebreak: %d at index: %d", next, i) - } - prev = next - } - if prev != textLength { - log.Panicf("last linebreak was %d, want %d", prev, textLength) - } -} - -func validatePbTypes(pairTypes []bracketType) { - if len(pairTypes) == 0 { - log.Panic("pairTypes is null") - } - for i, pt := range pairTypes { - switch pt { - case bpNone, bpOpen, bpClose: - default: - log.Panicf("illegal pairType value at %d: %v", i, pairTypes[i]) - } - } -} - -func validatePbValues(pairValues []rune, pairTypes []bracketType) { - if pairValues == nil { - log.Panic("pairValues is null") - } - if len(pairTypes) != len(pairValues) { - log.Panic("pairTypes is different length from pairValues") - } -} -- cgit v1.2.3