// Copyright 2015 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package rangetable import ( "unicode" ) // atEnd is used to mark a completed iteration. const atEnd = unicode.MaxRune + 1 // Merge returns a new RangeTable that is the union of the given tables. // It can also be used to compact user-created RangeTables. The entries in // R16 and R32 for any given RangeTable should be sorted and non-overlapping. // // A lookup in the resulting table can be several times faster than using In // directly on the ranges. Merge is an expensive operation, however, and only // makes sense if one intends to use the result for more than a couple of // hundred lookups. func Merge(ranges ...*unicode.RangeTable) *unicode.RangeTable { rt := &unicode.RangeTable{} if len(ranges) == 0 { return rt } iter := tablesIter(make([]tableIndex, len(ranges))) for i, t := range ranges { iter[i] = tableIndex{t, 0, atEnd} if len(t.R16) > 0 { iter[i].next = rune(t.R16[0].Lo) } } if r0 := iter.next16(); r0.Stride != 0 { for { r1 := iter.next16() if r1.Stride == 0 { rt.R16 = append(rt.R16, r0) break } stride := r1.Lo - r0.Hi if (r1.Lo == r1.Hi || stride == r1.Stride) && (r0.Lo == r0.Hi || stride == r0.Stride) { // Fully merge the next range into the previous one. r0.Hi, r0.Stride = r1.Hi, stride continue } else if stride == r0.Stride { // Move the first element of r1 to r0. This may eliminate an // entry. r0.Hi = r1.Lo r0.Stride = stride r1.Lo = r1.Lo + r1.Stride if r1.Lo > r1.Hi { continue } } rt.R16 = append(rt.R16, r0) r0 = r1 } } for i, t := range ranges { iter[i] = tableIndex{t, 0, atEnd} if len(t.R32) > 0 { iter[i].next = rune(t.R32[0].Lo) } } if r0 := iter.next32(); r0.Stride != 0 { for { r1 := iter.next32() if r1.Stride == 0 { rt.R32 = append(rt.R32, r0) break } stride := r1.Lo - r0.Hi if (r1.Lo == r1.Hi || stride == r1.Stride) && (r0.Lo == r0.Hi || stride == r0.Stride) { // Fully merge the next range into the previous one. r0.Hi, r0.Stride = r1.Hi, stride continue } else if stride == r0.Stride { // Move the first element of r1 to r0. This may eliminate an // entry. r0.Hi = r1.Lo r1.Lo = r1.Lo + r1.Stride if r1.Lo > r1.Hi { continue } } rt.R32 = append(rt.R32, r0) r0 = r1 } } for i := 0; i < len(rt.R16) && rt.R16[i].Hi <= unicode.MaxLatin1; i++ { rt.LatinOffset = i + 1 } return rt } type tableIndex struct { t *unicode.RangeTable p uint32 next rune } type tablesIter []tableIndex // sortIter does an insertion sort using the next field of tableIndex. Insertion // sort is a good sorting algorithm for this case. func sortIter(t []tableIndex) { for i := range t { for j := i; j > 0 && t[j-1].next > t[j].next; j-- { t[j], t[j-1] = t[j-1], t[j] } } } // next16 finds the ranged to be added to the table. If ranges overlap between // multiple tables it clips the result to a non-overlapping range if the // elements are not fully subsumed. It returns a zero range if there are no more // ranges. func (ti tablesIter) next16() unicode.Range16 { sortIter(ti) t0 := ti[0] if t0.next == atEnd { return unicode.Range16{} } r0 := t0.t.R16[t0.p] r0.Lo = uint16(t0.next) // We restrict the Hi of the current range if it overlaps with another range. for i := range ti { tn := ti[i] // Since our tableIndices are sorted by next, we can break if the there // is no overlap. The first value of a next range can always be merged // into the current one, so we can break in case of equality as well. if rune(r0.Hi) <= tn.next { break } rn := tn.t.R16[tn.p] rn.Lo = uint16(tn.next) // Limit r0.Hi based on next ranges in list, but allow it to overlap // with ranges as long as it subsumes it. m := (rn.Lo - r0.Lo) % r0.Stride if m == 0 && (rn.Stride == r0.Stride || rn.Lo == rn.Hi) { // Overlap, take the min of the two Hi values: for simplicity's sake // we only process one range at a time. if r0.Hi > rn.Hi { r0.Hi = rn.Hi } } else { // Not a compatible stride. Set to the last possible value before // rn.Lo, but ensure there is at least one value. if x := rn.Lo - m; r0.Lo <= x { r0.Hi = x } break } } // Update the next values for each table. for i := range ti { tn := &ti[i] if rune(r0.Hi) < tn.next { break } rn := tn.t.R16[tn.p] stride := rune(rn.Stride) tn.next += stride * (1 + ((rune(r0.Hi) - tn.next) / stride)) if rune(rn.Hi) < tn.next { if tn.p++; int(tn.p) == len(tn.t.R16) { tn.next = atEnd } else { tn.next = rune(tn.t.R16[tn.p].Lo) } } } if r0.Lo == r0.Hi { r0.Stride = 1 } return r0 } // next32 finds the ranged to be added to the table. If ranges overlap between // multiple tables it clips the result to a non-overlapping range if the // elements are not fully subsumed. It returns a zero range if there are no more // ranges. func (ti tablesIter) next32() unicode.Range32 { sortIter(ti) t0 := ti[0] if t0.next == atEnd { return unicode.Range32{} } r0 := t0.t.R32[t0.p] r0.Lo = uint32(t0.next) // We restrict the Hi of the current range if it overlaps with another range. for i := range ti { tn := ti[i] // Since our tableIndices are sorted by next, we can break if the there // is no overlap. The first value of a next range can always be merged // into the current one, so we can break in case of equality as well. if rune(r0.Hi) <= tn.next { break } rn := tn.t.R32[tn.p] rn.Lo = uint32(tn.next) // Limit r0.Hi based on next ranges in list, but allow it to overlap // with ranges as long as it subsumes it. m := (rn.Lo - r0.Lo) % r0.Stride if m == 0 && (rn.Stride == r0.Stride || rn.Lo == rn.Hi) { // Overlap, take the min of the two Hi values: for simplicity's sake // we only process one range at a time. if r0.Hi > rn.Hi { r0.Hi = rn.Hi } } else { // Not a compatible stride. Set to the last possible value before // rn.Lo, but ensure there is at least one value. if x := rn.Lo - m; r0.Lo <= x { r0.Hi = x } break } } // Update the next values for each table. for i := range ti { tn := &ti[i] if rune(r0.Hi) < tn.next { break } rn := tn.t.R32[tn.p] stride := rune(rn.Stride) tn.next += stride * (1 + ((rune(r0.Hi) - tn.next) / stride)) if rune(rn.Hi) < tn.next { if tn.p++; int(tn.p) == len(tn.t.R32) { tn.next = atEnd } else { tn.next = rune(tn.t.R32[tn.p].Lo) } } } if r0.Lo == r0.Hi { r0.Stride = 1 } return r0 }