aboutsummaryrefslogtreecommitdiff
path: root/vendor/golang.org/x/text/unicode/bidi/prop.go
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/golang.org/x/text/unicode/bidi/prop.go')
-rw-r--r--vendor/golang.org/x/text/unicode/bidi/prop.go206
1 files changed, 206 insertions, 0 deletions
diff --git a/vendor/golang.org/x/text/unicode/bidi/prop.go b/vendor/golang.org/x/text/unicode/bidi/prop.go
new file mode 100644
index 0000000..7c9484e
--- /dev/null
+++ b/vendor/golang.org/x/text/unicode/bidi/prop.go
@@ -0,0 +1,206 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package bidi
+
+import "unicode/utf8"
+
+// Properties provides access to BiDi properties of runes.
+type Properties struct {
+ entry uint8
+ last uint8
+}
+
+var trie = newBidiTrie(0)
+
+// TODO: using this for bidirule reduces the running time by about 5%. Consider
+// if this is worth exposing or if we can find a way to speed up the Class
+// method.
+//
+// // CompactClass is like Class, but maps all of the BiDi control classes
+// // (LRO, RLO, LRE, RLE, PDF, LRI, RLI, FSI, PDI) to the class Control.
+// func (p Properties) CompactClass() Class {
+// return Class(p.entry & 0x0F)
+// }
+
+// Class returns the Bidi class for p.
+func (p Properties) Class() Class {
+ c := Class(p.entry & 0x0F)
+ if c == Control {
+ c = controlByteToClass[p.last&0xF]
+ }
+ return c
+}
+
+// IsBracket reports whether the rune is a bracket.
+func (p Properties) IsBracket() bool { return p.entry&0xF0 != 0 }
+
+// IsOpeningBracket reports whether the rune is an opening bracket.
+// IsBracket must return true.
+func (p Properties) IsOpeningBracket() bool { return p.entry&openMask != 0 }
+
+// TODO: find a better API and expose.
+func (p Properties) reverseBracket(r rune) rune {
+ return xorMasks[p.entry>>xorMaskShift] ^ r
+}
+
+var controlByteToClass = [16]Class{
+ 0xD: LRO, // U+202D LeftToRightOverride,
+ 0xE: RLO, // U+202E RightToLeftOverride,
+ 0xA: LRE, // U+202A LeftToRightEmbedding,
+ 0xB: RLE, // U+202B RightToLeftEmbedding,
+ 0xC: PDF, // U+202C PopDirectionalFormat,
+ 0x6: LRI, // U+2066 LeftToRightIsolate,
+ 0x7: RLI, // U+2067 RightToLeftIsolate,
+ 0x8: FSI, // U+2068 FirstStrongIsolate,
+ 0x9: PDI, // U+2069 PopDirectionalIsolate,
+}
+
+// LookupRune returns properties for r.
+func LookupRune(r rune) (p Properties, size int) {
+ var buf [4]byte
+ n := utf8.EncodeRune(buf[:], r)
+ return Lookup(buf[:n])
+}
+
+// TODO: these lookup methods are based on the generated trie code. The returned
+// sizes have slightly different semantics from the generated code, in that it
+// always returns size==1 for an illegal UTF-8 byte (instead of the length
+// of the maximum invalid subsequence). Most Transformers, like unicode/norm,
+// leave invalid UTF-8 untouched, in which case it has performance benefits to
+// do so (without changing the semantics). Bidi requires the semantics used here
+// for the bidirule implementation to be compatible with the Go semantics.
+// They ultimately should perhaps be adopted by all trie implementations, for
+// convenience sake.
+// This unrolled code also boosts performance of the secure/bidirule package by
+// about 30%.
+// So, to remove this code:
+// - add option to trie generator to define return type.
+// - always return 1 byte size for ill-formed UTF-8 runes.
+
+// Lookup returns properties for the first rune in s and the width in bytes of
+// its encoding. The size will be 0 if s does not hold enough bytes to complete
+// the encoding.
+func Lookup(s []byte) (p Properties, sz int) {
+ c0 := s[0]
+ switch {
+ case c0 < 0x80: // is ASCII
+ return Properties{entry: bidiValues[c0]}, 1
+ case c0 < 0xC2:
+ return Properties{}, 1
+ case c0 < 0xE0: // 2-byte UTF-8
+ if len(s) < 2 {
+ return Properties{}, 0
+ }
+ i := bidiIndex[c0]
+ c1 := s[1]
+ if c1 < 0x80 || 0xC0 <= c1 {
+ return Properties{}, 1
+ }
+ return Properties{entry: trie.lookupValue(uint32(i), c1)}, 2
+ case c0 < 0xF0: // 3-byte UTF-8
+ if len(s) < 3 {
+ return Properties{}, 0
+ }
+ i := bidiIndex[c0]
+ c1 := s[1]
+ if c1 < 0x80 || 0xC0 <= c1 {
+ return Properties{}, 1
+ }
+ o := uint32(i)<<6 + uint32(c1)
+ i = bidiIndex[o]
+ c2 := s[2]
+ if c2 < 0x80 || 0xC0 <= c2 {
+ return Properties{}, 1
+ }
+ return Properties{entry: trie.lookupValue(uint32(i), c2), last: c2}, 3
+ case c0 < 0xF8: // 4-byte UTF-8
+ if len(s) < 4 {
+ return Properties{}, 0
+ }
+ i := bidiIndex[c0]
+ c1 := s[1]
+ if c1 < 0x80 || 0xC0 <= c1 {
+ return Properties{}, 1
+ }
+ o := uint32(i)<<6 + uint32(c1)
+ i = bidiIndex[o]
+ c2 := s[2]
+ if c2 < 0x80 || 0xC0 <= c2 {
+ return Properties{}, 1
+ }
+ o = uint32(i)<<6 + uint32(c2)
+ i = bidiIndex[o]
+ c3 := s[3]
+ if c3 < 0x80 || 0xC0 <= c3 {
+ return Properties{}, 1
+ }
+ return Properties{entry: trie.lookupValue(uint32(i), c3)}, 4
+ }
+ // Illegal rune
+ return Properties{}, 1
+}
+
+// LookupString returns properties for the first rune in s and the width in
+// bytes of its encoding. The size will be 0 if s does not hold enough bytes to
+// complete the encoding.
+func LookupString(s string) (p Properties, sz int) {
+ c0 := s[0]
+ switch {
+ case c0 < 0x80: // is ASCII
+ return Properties{entry: bidiValues[c0]}, 1
+ case c0 < 0xC2:
+ return Properties{}, 1
+ case c0 < 0xE0: // 2-byte UTF-8
+ if len(s) < 2 {
+ return Properties{}, 0
+ }
+ i := bidiIndex[c0]
+ c1 := s[1]
+ if c1 < 0x80 || 0xC0 <= c1 {
+ return Properties{}, 1
+ }
+ return Properties{entry: trie.lookupValue(uint32(i), c1)}, 2
+ case c0 < 0xF0: // 3-byte UTF-8
+ if len(s) < 3 {
+ return Properties{}, 0
+ }
+ i := bidiIndex[c0]
+ c1 := s[1]
+ if c1 < 0x80 || 0xC0 <= c1 {
+ return Properties{}, 1
+ }
+ o := uint32(i)<<6 + uint32(c1)
+ i = bidiIndex[o]
+ c2 := s[2]
+ if c2 < 0x80 || 0xC0 <= c2 {
+ return Properties{}, 1
+ }
+ return Properties{entry: trie.lookupValue(uint32(i), c2), last: c2}, 3
+ case c0 < 0xF8: // 4-byte UTF-8
+ if len(s) < 4 {
+ return Properties{}, 0
+ }
+ i := bidiIndex[c0]
+ c1 := s[1]
+ if c1 < 0x80 || 0xC0 <= c1 {
+ return Properties{}, 1
+ }
+ o := uint32(i)<<6 + uint32(c1)
+ i = bidiIndex[o]
+ c2 := s[2]
+ if c2 < 0x80 || 0xC0 <= c2 {
+ return Properties{}, 1
+ }
+ o = uint32(i)<<6 + uint32(c2)
+ i = bidiIndex[o]
+ c3 := s[3]
+ if c3 < 0x80 || 0xC0 <= c3 {
+ return Properties{}, 1
+ }
+ return Properties{entry: trie.lookupValue(uint32(i), c3)}, 4
+ }
+ // Illegal rune
+ return Properties{}, 1
+}