From 8c12c6939aab9106db14ec2d11d983bc5b29fb2c Mon Sep 17 00:00:00 2001 From: Niall Sheridan Date: Sun, 7 Jul 2019 21:33:44 +0100 Subject: Switch to modules --- vendor/golang.org/x/text/unicode/norm/forminfo.go | 278 ---------------------- 1 file changed, 278 deletions(-) delete mode 100644 vendor/golang.org/x/text/unicode/norm/forminfo.go (limited to 'vendor/golang.org/x/text/unicode/norm/forminfo.go') diff --git a/vendor/golang.org/x/text/unicode/norm/forminfo.go b/vendor/golang.org/x/text/unicode/norm/forminfo.go deleted file mode 100644 index 526c703..0000000 --- a/vendor/golang.org/x/text/unicode/norm/forminfo.go +++ /dev/null @@ -1,278 +0,0 @@ -// Copyright 2011 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package norm - -import "encoding/binary" - -// This file contains Form-specific logic and wrappers for data in tables.go. - -// Rune info is stored in a separate trie per composing form. A composing form -// and its corresponding decomposing form share the same trie. Each trie maps -// a rune to a uint16. The values take two forms. For v >= 0x8000: -// bits -// 15: 1 (inverse of NFD_QC bit of qcInfo) -// 13..7: qcInfo (see below). isYesD is always true (no decompostion). -// 6..0: ccc (compressed CCC value). -// For v < 0x8000, the respective rune has a decomposition and v is an index -// into a byte array of UTF-8 decomposition sequences and additional info and -// has the form: -//
* [ []] -// The header contains the number of bytes in the decomposition (excluding this -// length byte). The two most significant bits of this length byte correspond -// to bit 5 and 4 of qcInfo (see below). The byte sequence itself starts at v+1. -// The byte sequence is followed by a trailing and leading CCC if the values -// for these are not zero. The value of v determines which ccc are appended -// to the sequences. For v < firstCCC, there are none, for v >= firstCCC, -// the sequence is followed by a trailing ccc, and for v >= firstLeadingCC -// there is an additional leading ccc. The value of tccc itself is the -// trailing CCC shifted left 2 bits. The two least-significant bits of tccc -// are the number of trailing non-starters. - -const ( - qcInfoMask = 0x3F // to clear all but the relevant bits in a qcInfo - headerLenMask = 0x3F // extract the length value from the header byte - headerFlagsMask = 0xC0 // extract the qcInfo bits from the header byte -) - -// Properties provides access to normalization properties of a rune. -type Properties struct { - pos uint8 // start position in reorderBuffer; used in composition.go - size uint8 // length of UTF-8 encoding of this rune - ccc uint8 // leading canonical combining class (ccc if not decomposition) - tccc uint8 // trailing canonical combining class (ccc if not decomposition) - nLead uint8 // number of leading non-starters. - flags qcInfo // quick check flags - index uint16 -} - -// functions dispatchable per form -type lookupFunc func(b input, i int) Properties - -// formInfo holds Form-specific functions and tables. -type formInfo struct { - form Form - composing, compatibility bool // form type - info lookupFunc - nextMain iterFunc -} - -var formTable = []*formInfo{{ - form: NFC, - composing: true, - compatibility: false, - info: lookupInfoNFC, - nextMain: nextComposed, -}, { - form: NFD, - composing: false, - compatibility: false, - info: lookupInfoNFC, - nextMain: nextDecomposed, -}, { - form: NFKC, - composing: true, - compatibility: true, - info: lookupInfoNFKC, - nextMain: nextComposed, -}, { - form: NFKD, - composing: false, - compatibility: true, - info: lookupInfoNFKC, - nextMain: nextDecomposed, -}} - -// We do not distinguish between boundaries for NFC, NFD, etc. to avoid -// unexpected behavior for the user. For example, in NFD, there is a boundary -// after 'a'. However, 'a' might combine with modifiers, so from the application's -// perspective it is not a good boundary. We will therefore always use the -// boundaries for the combining variants. - -// BoundaryBefore returns true if this rune starts a new segment and -// cannot combine with any rune on the left. -func (p Properties) BoundaryBefore() bool { - if p.ccc == 0 && !p.combinesBackward() { - return true - } - // We assume that the CCC of the first character in a decomposition - // is always non-zero if different from info.ccc and that we can return - // false at this point. This is verified by maketables. - return false -} - -// BoundaryAfter returns true if runes cannot combine with or otherwise -// interact with this or previous runes. -func (p Properties) BoundaryAfter() bool { - // TODO: loosen these conditions. - return p.isInert() -} - -// We pack quick check data in 4 bits: -// 5: Combines forward (0 == false, 1 == true) -// 4..3: NFC_QC Yes(00), No (10), or Maybe (11) -// 2: NFD_QC Yes (0) or No (1). No also means there is a decomposition. -// 1..0: Number of trailing non-starters. -// -// When all 4 bits are zero, the character is inert, meaning it is never -// influenced by normalization. -type qcInfo uint8 - -func (p Properties) isYesC() bool { return p.flags&0x10 == 0 } -func (p Properties) isYesD() bool { return p.flags&0x4 == 0 } - -func (p Properties) combinesForward() bool { return p.flags&0x20 != 0 } -func (p Properties) combinesBackward() bool { return p.flags&0x8 != 0 } // == isMaybe -func (p Properties) hasDecomposition() bool { return p.flags&0x4 != 0 } // == isNoD - -func (p Properties) isInert() bool { - return p.flags&qcInfoMask == 0 && p.ccc == 0 -} - -func (p Properties) multiSegment() bool { - return p.index >= firstMulti && p.index < endMulti -} - -func (p Properties) nLeadingNonStarters() uint8 { - return p.nLead -} - -func (p Properties) nTrailingNonStarters() uint8 { - return uint8(p.flags & 0x03) -} - -// Decomposition returns the decomposition for the underlying rune -// or nil if there is none. -func (p Properties) Decomposition() []byte { - // TODO: create the decomposition for Hangul? - if p.index == 0 { - return nil - } - i := p.index - n := decomps[i] & headerLenMask - i++ - return decomps[i : i+uint16(n)] -} - -// Size returns the length of UTF-8 encoding of the rune. -func (p Properties) Size() int { - return int(p.size) -} - -// CCC returns the canonical combining class of the underlying rune. -func (p Properties) CCC() uint8 { - if p.index >= firstCCCZeroExcept { - return 0 - } - return ccc[p.ccc] -} - -// LeadCCC returns the CCC of the first rune in the decomposition. -// If there is no decomposition, LeadCCC equals CCC. -func (p Properties) LeadCCC() uint8 { - return ccc[p.ccc] -} - -// TrailCCC returns the CCC of the last rune in the decomposition. -// If there is no decomposition, TrailCCC equals CCC. -func (p Properties) TrailCCC() uint8 { - return ccc[p.tccc] -} - -func buildRecompMap() { - recompMap = make(map[uint32]rune, len(recompMapPacked)/8) - var buf [8]byte - for i := 0; i < len(recompMapPacked); i += 8 { - copy(buf[:], recompMapPacked[i:i+8]) - key := binary.BigEndian.Uint32(buf[:4]) - val := binary.BigEndian.Uint32(buf[4:]) - recompMap[key] = rune(val) - } -} - -// Recomposition -// We use 32-bit keys instead of 64-bit for the two codepoint keys. -// This clips off the bits of three entries, but we know this will not -// result in a collision. In the unlikely event that changes to -// UnicodeData.txt introduce collisions, the compiler will catch it. -// Note that the recomposition map for NFC and NFKC are identical. - -// combine returns the combined rune or 0 if it doesn't exist. -// -// The caller is responsible for calling -// recompMapOnce.Do(buildRecompMap) sometime before this is called. -func combine(a, b rune) rune { - key := uint32(uint16(a))<<16 + uint32(uint16(b)) - if recompMap == nil { - panic("caller error") // see func comment - } - return recompMap[key] -} - -func lookupInfoNFC(b input, i int) Properties { - v, sz := b.charinfoNFC(i) - return compInfo(v, sz) -} - -func lookupInfoNFKC(b input, i int) Properties { - v, sz := b.charinfoNFKC(i) - return compInfo(v, sz) -} - -// Properties returns properties for the first rune in s. -func (f Form) Properties(s []byte) Properties { - if f == NFC || f == NFD { - return compInfo(nfcData.lookup(s)) - } - return compInfo(nfkcData.lookup(s)) -} - -// PropertiesString returns properties for the first rune in s. -func (f Form) PropertiesString(s string) Properties { - if f == NFC || f == NFD { - return compInfo(nfcData.lookupString(s)) - } - return compInfo(nfkcData.lookupString(s)) -} - -// compInfo converts the information contained in v and sz -// to a Properties. See the comment at the top of the file -// for more information on the format. -func compInfo(v uint16, sz int) Properties { - if v == 0 { - return Properties{size: uint8(sz)} - } else if v >= 0x8000 { - p := Properties{ - size: uint8(sz), - ccc: uint8(v), - tccc: uint8(v), - flags: qcInfo(v >> 8), - } - if p.ccc > 0 || p.combinesBackward() { - p.nLead = uint8(p.flags & 0x3) - } - return p - } - // has decomposition - h := decomps[v] - f := (qcInfo(h&headerFlagsMask) >> 2) | 0x4 - p := Properties{size: uint8(sz), flags: f, index: v} - if v >= firstCCC { - v += uint16(h&headerLenMask) + 1 - c := decomps[v] - p.tccc = c >> 2 - p.flags |= qcInfo(c & 0x3) - if v >= firstLeadingCCC { - p.nLead = c & 0x3 - if v >= firstStarterWithNLead { - // We were tricked. Remove the decomposition. - p.flags &= 0x03 - p.index = 0 - return p - } - p.ccc = decomps[v+1] - } - } - return p -} -- cgit v1.2.3