1 // Copyright 2015 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
11 // atEnd is used to mark a completed iteration.
12 const atEnd = unicode.MaxRune + 1
14 // Merge returns a new RangeTable that is the union of the given tables.
15 // It can also be used to compact user-created RangeTables. The entries in
16 // R16 and R32 for any given RangeTable should be sorted and non-overlapping.
18 // A lookup in the resulting table can be several times faster than using In
19 // directly on the ranges. Merge is an expensive operation, however, and only
20 // makes sense if one intends to use the result for more than a couple of
22 func Merge(ranges ...*unicode.RangeTable) *unicode.RangeTable {
23 rt := &unicode.RangeTable{}
28 iter := tablesIter(make([]tableIndex, len(ranges)))
30 for i, t := range ranges {
31 iter[i] = tableIndex{t, 0, atEnd}
33 iter[i].next = rune(t.R16[0].Lo)
37 if r0 := iter.next16(); r0.Stride != 0 {
41 rt.R16 = append(rt.R16, r0)
44 stride := r1.Lo - r0.Hi
45 if (r1.Lo == r1.Hi || stride == r1.Stride) && (r0.Lo == r0.Hi || stride == r0.Stride) {
46 // Fully merge the next range into the previous one.
47 r0.Hi, r0.Stride = r1.Hi, stride
49 } else if stride == r0.Stride {
50 // Move the first element of r1 to r0. This may eliminate an
54 r1.Lo = r1.Lo + r1.Stride
59 rt.R16 = append(rt.R16, r0)
64 for i, t := range ranges {
65 iter[i] = tableIndex{t, 0, atEnd}
67 iter[i].next = rune(t.R32[0].Lo)
71 if r0 := iter.next32(); r0.Stride != 0 {
75 rt.R32 = append(rt.R32, r0)
78 stride := r1.Lo - r0.Hi
79 if (r1.Lo == r1.Hi || stride == r1.Stride) && (r0.Lo == r0.Hi || stride == r0.Stride) {
80 // Fully merge the next range into the previous one.
81 r0.Hi, r0.Stride = r1.Hi, stride
83 } else if stride == r0.Stride {
84 // Move the first element of r1 to r0. This may eliminate an
87 r1.Lo = r1.Lo + r1.Stride
92 rt.R32 = append(rt.R32, r0)
97 for i := 0; i < len(rt.R16) && rt.R16[i].Hi <= unicode.MaxLatin1; i++ {
98 rt.LatinOffset = i + 1
104 type tableIndex struct {
105 t *unicode.RangeTable
110 type tablesIter []tableIndex
112 // sortIter does an insertion sort using the next field of tableIndex. Insertion
113 // sort is a good sorting algorithm for this case.
114 func sortIter(t []tableIndex) {
116 for j := i; j > 0 && t[j-1].next > t[j].next; j-- {
117 t[j], t[j-1] = t[j-1], t[j]
122 // next16 finds the ranged to be added to the table. If ranges overlap between
123 // multiple tables it clips the result to a non-overlapping range if the
124 // elements are not fully subsumed. It returns a zero range if there are no more
126 func (ti tablesIter) next16() unicode.Range16 {
130 if t0.next == atEnd {
131 return unicode.Range16{}
134 r0.Lo = uint16(t0.next)
136 // We restrict the Hi of the current range if it overlaps with another range.
139 // Since our tableIndices are sorted by next, we can break if the there
140 // is no overlap. The first value of a next range can always be merged
141 // into the current one, so we can break in case of equality as well.
142 if rune(r0.Hi) <= tn.next {
146 rn.Lo = uint16(tn.next)
148 // Limit r0.Hi based on next ranges in list, but allow it to overlap
149 // with ranges as long as it subsumes it.
150 m := (rn.Lo - r0.Lo) % r0.Stride
151 if m == 0 && (rn.Stride == r0.Stride || rn.Lo == rn.Hi) {
152 // Overlap, take the min of the two Hi values: for simplicity's sake
153 // we only process one range at a time.
158 // Not a compatible stride. Set to the last possible value before
159 // rn.Lo, but ensure there is at least one value.
160 if x := rn.Lo - m; r0.Lo <= x {
167 // Update the next values for each table.
170 if rune(r0.Hi) < tn.next {
174 stride := rune(rn.Stride)
175 tn.next += stride * (1 + ((rune(r0.Hi) - tn.next) / stride))
176 if rune(rn.Hi) < tn.next {
177 if tn.p++; int(tn.p) == len(tn.t.R16) {
180 tn.next = rune(tn.t.R16[tn.p].Lo)
192 // next32 finds the ranged to be added to the table. If ranges overlap between
193 // multiple tables it clips the result to a non-overlapping range if the
194 // elements are not fully subsumed. It returns a zero range if there are no more
196 func (ti tablesIter) next32() unicode.Range32 {
200 if t0.next == atEnd {
201 return unicode.Range32{}
204 r0.Lo = uint32(t0.next)
206 // We restrict the Hi of the current range if it overlaps with another range.
209 // Since our tableIndices are sorted by next, we can break if the there
210 // is no overlap. The first value of a next range can always be merged
211 // into the current one, so we can break in case of equality as well.
212 if rune(r0.Hi) <= tn.next {
216 rn.Lo = uint32(tn.next)
218 // Limit r0.Hi based on next ranges in list, but allow it to overlap
219 // with ranges as long as it subsumes it.
220 m := (rn.Lo - r0.Lo) % r0.Stride
221 if m == 0 && (rn.Stride == r0.Stride || rn.Lo == rn.Hi) {
222 // Overlap, take the min of the two Hi values: for simplicity's sake
223 // we only process one range at a time.
228 // Not a compatible stride. Set to the last possible value before
229 // rn.Lo, but ensure there is at least one value.
230 if x := rn.Lo - m; r0.Lo <= x {
237 // Update the next values for each table.
240 if rune(r0.Hi) < tn.next {
244 stride := rune(rn.Stride)
245 tn.next += stride * (1 + ((rune(r0.Hi) - tn.next) / stride))
246 if rune(rn.Hi) < tn.next {
247 if tn.p++; int(tn.p) == len(tn.t.R32) {
250 tn.next = rune(tn.t.R32[tn.p].Lo)