src/dma/vendor/golang.org/x/text/collate/collate.go

   1 // Copyright 2012 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 // TODO: remove hard-coded versions when we have implemented fractional weights.
   6 // The current implementation is incompatible with later CLDR versions.
   7 //go:generate go run maketables.go -cldr=23 -unicode=6.2.0
   8
   9 // Package collate contains types for comparing and sorting Unicode strings
  10 // according to a given collation order.
  11 package collate // import "golang.org/x/text/collate"
  12
  13 import (
  14         "bytes"
  15         "strings"
  16
  17         "golang.org/x/text/internal/colltab"
  18         "golang.org/x/text/language"
  19 )
  20
  21 // Collator provides functionality for comparing strings for a given
  22 // collation order.
  23 type Collator struct {
  24         options
  25
  26         sorter sorter
  27
  28         _iter [2]iter
  29 }
  30
  31 func (c *Collator) iter(i int) *iter {
  32         // TODO: evaluate performance for making the second iterator optional.
  33         return &c._iter[i]
  34 }
  35
  36 // Supported returns the list of languages for which collating differs from its parent.
  37 func Supported() []language.Tag {
  38         // TODO: use language.Coverage instead.
  39
  40         t := make([]language.Tag, len(tags))
  41         copy(t, tags)
  42         return t
  43 }
  44
  45 func init() {
  46         ids := strings.Split(availableLocales, ",")
  47         tags = make([]language.Tag, len(ids))
  48         for i, s := range ids {
  49                 tags[i] = language.Raw.MustParse(s)
  50         }
  51 }
  52
  53 var tags []language.Tag
  54
  55 // New returns a new Collator initialized for the given locale.
  56 func New(t language.Tag, o ...Option) *Collator {
  57         index := colltab.MatchLang(t, tags)
  58         c := newCollator(getTable(locales[index]))
  59
  60         // Set options from the user-supplied tag.
  61         c.setFromTag(t)
  62
  63         // Set the user-supplied options.
  64         c.setOptions(o)
  65
  66         c.init()
  67         return c
  68 }
  69
  70 // NewFromTable returns a new Collator for the given Weighter.
  71 func NewFromTable(w colltab.Weighter, o ...Option) *Collator {
  72         c := newCollator(w)
  73         c.setOptions(o)
  74         c.init()
  75         return c
  76 }
  77
  78 func (c *Collator) init() {
  79         if c.numeric {
  80                 c.t = colltab.NewNumericWeighter(c.t)
  81         }
  82         c._iter[0].init(c)
  83         c._iter[1].init(c)
  84 }
  85
  86 // Buffer holds keys generated by Key and KeyString.
  87 type Buffer struct {
  88         buf [4096]byte
  89         key []byte
  90 }
  91
  92 func (b *Buffer) init() {
  93         if b.key == nil {
  94                 b.key = b.buf[:0]
  95         }
  96 }
  97
  98 // Reset clears the buffer from previous results generated by Key and KeyString.
  99 func (b *Buffer) Reset() {
 100         b.key = b.key[:0]
 101 }
 102
 103 // Compare returns an integer comparing the two byte slices.
 104 // The result will be 0 if a==b, -1 if a < b, and +1 if a > b.
 105 func (c *Collator) Compare(a, b []byte) int {
 106         // TODO: skip identical prefixes once we have a fast way to detect if a rune is
 107         // part of a contraction. This would lead to roughly a 10% speedup for the colcmp regtest.
 108         c.iter(0).SetInput(a)
 109         c.iter(1).SetInput(b)
 110         if res := c.compare(); res != 0 {
 111                 return res
 112         }
 113         if !c.ignore[colltab.Identity] {
 114                 return bytes.Compare(a, b)
 115         }
 116         return 0
 117 }
 118
 119 // CompareString returns an integer comparing the two strings.
 120 // The result will be 0 if a==b, -1 if a < b, and +1 if a > b.
 121 func (c *Collator) CompareString(a, b string) int {
 122         // TODO: skip identical prefixes once we have a fast way to detect if a rune is
 123         // part of a contraction. This would lead to roughly a 10% speedup for the colcmp regtest.
 124         c.iter(0).SetInputString(a)
 125         c.iter(1).SetInputString(b)
 126         if res := c.compare(); res != 0 {
 127                 return res
 128         }
 129         if !c.ignore[colltab.Identity] {
 130                 if a < b {
 131                         return -1
 132                 } else if a > b {
 133                         return 1
 134                 }
 135         }
 136         return 0
 137 }
 138
 139 func compareLevel(f func(i *iter) int, a, b *iter) int {
 140         a.pce = 0
 141         b.pce = 0
 142         for {
 143                 va := f(a)
 144                 vb := f(b)
 145                 if va != vb {
 146                         if va < vb {
 147                                 return -1
 148                         }
 149                         return 1
 150                 } else if va == 0 {
 151                         break
 152                 }
 153         }
 154         return 0
 155 }
 156
 157 func (c *Collator) compare() int {
 158         ia, ib := c.iter(0), c.iter(1)
 159         // Process primary level
 160         if c.alternate != altShifted {
 161                 // TODO: implement script reordering
 162                 if res := compareLevel((*iter).nextPrimary, ia, ib); res != 0 {
 163                         return res
 164                 }
 165         } else {
 166                 // TODO: handle shifted
 167         }
 168         if !c.ignore[colltab.Secondary] {
 169                 f := (*iter).nextSecondary
 170                 if c.backwards {
 171                         f = (*iter).prevSecondary
 172                 }
 173                 if res := compareLevel(f, ia, ib); res != 0 {
 174                         return res
 175                 }
 176         }
 177         // TODO: special case handling (Danish?)
 178         if !c.ignore[colltab.Tertiary] || c.caseLevel {
 179                 if res := compareLevel((*iter).nextTertiary, ia, ib); res != 0 {
 180                         return res
 181                 }
 182                 if !c.ignore[colltab.Quaternary] {
 183                         if res := compareLevel((*iter).nextQuaternary, ia, ib); res != 0 {
 184                                 return res
 185                         }
 186                 }
 187         }
 188         return 0
 189 }
 190
 191 // Key returns the collation key for str.
 192 // Passing the buffer buf may avoid memory allocations.
 193 // The returned slice will point to an allocation in Buffer and will remain
 194 // valid until the next call to buf.Reset().
 195 func (c *Collator) Key(buf *Buffer, str []byte) []byte {
 196         // See https://www.unicode.org/reports/tr10/#Main_Algorithm for more details.
 197         buf.init()
 198         return c.key(buf, c.getColElems(str))
 199 }
 200
 201 // KeyFromString returns the collation key for str.
 202 // Passing the buffer buf may avoid memory allocations.
 203 // The returned slice will point to an allocation in Buffer and will retain
 204 // valid until the next call to buf.ResetKeys().
 205 func (c *Collator) KeyFromString(buf *Buffer, str string) []byte {
 206         // See https://www.unicode.org/reports/tr10/#Main_Algorithm for more details.
 207         buf.init()
 208         return c.key(buf, c.getColElemsString(str))
 209 }
 210
 211 func (c *Collator) key(buf *Buffer, w []colltab.Elem) []byte {
 212         processWeights(c.alternate, c.t.Top(), w)
 213         kn := len(buf.key)
 214         c.keyFromElems(buf, w)
 215         return buf.key[kn:]
 216 }
 217
 218 func (c *Collator) getColElems(str []byte) []colltab.Elem {
 219         i := c.iter(0)
 220         i.SetInput(str)
 221         for i.Next() {
 222         }
 223         return i.Elems
 224 }
 225
 226 func (c *Collator) getColElemsString(str string) []colltab.Elem {
 227         i := c.iter(0)
 228         i.SetInputString(str)
 229         for i.Next() {
 230         }
 231         return i.Elems
 232 }
 233
 234 type iter struct {
 235         wa [512]colltab.Elem
 236
 237         colltab.Iter
 238         pce int
 239 }
 240
 241 func (i *iter) init(c *Collator) {
 242         i.Weighter = c.t
 243         i.Elems = i.wa[:0]
 244 }
 245
 246 func (i *iter) nextPrimary() int {
 247         for {
 248                 for ; i.pce < i.N; i.pce++ {
 249                         if v := i.Elems[i.pce].Primary(); v != 0 {
 250                                 i.pce++
 251                                 return v
 252                         }
 253                 }
 254                 if !i.Next() {
 255                         return 0
 256                 }
 257         }
 258         panic("should not reach here")
 259 }
 260
 261 func (i *iter) nextSecondary() int {
 262         for ; i.pce < len(i.Elems); i.pce++ {
 263                 if v := i.Elems[i.pce].Secondary(); v != 0 {
 264                         i.pce++
 265                         return v
 266                 }
 267         }
 268         return 0
 269 }
 270
 271 func (i *iter) prevSecondary() int {
 272         for ; i.pce < len(i.Elems); i.pce++ {
 273                 if v := i.Elems[len(i.Elems)-i.pce-1].Secondary(); v != 0 {
 274                         i.pce++
 275                         return v
 276                 }
 277         }
 278         return 0
 279 }
 280
 281 func (i *iter) nextTertiary() int {
 282         for ; i.pce < len(i.Elems); i.pce++ {
 283                 if v := i.Elems[i.pce].Tertiary(); v != 0 {
 284                         i.pce++
 285                         return int(v)
 286                 }
 287         }
 288         return 0
 289 }
 290
 291 func (i *iter) nextQuaternary() int {
 292         for ; i.pce < len(i.Elems); i.pce++ {
 293                 if v := i.Elems[i.pce].Quaternary(); v != 0 {
 294                         i.pce++
 295                         return v
 296                 }
 297         }
 298         return 0
 299 }
 300
 301 func appendPrimary(key []byte, p int) []byte {
 302         // Convert to variable length encoding; supports up to 23 bits.
 303         if p <= 0x7FFF {
 304                 key = append(key, uint8(p>>8), uint8(p))
 305         } else {
 306                 key = append(key, uint8(p>>16)|0x80, uint8(p>>8), uint8(p))
 307         }
 308         return key
 309 }
 310
 311 // keyFromElems converts the weights ws to a compact sequence of bytes.
 312 // The result will be appended to the byte buffer in buf.
 313 func (c *Collator) keyFromElems(buf *Buffer, ws []colltab.Elem) {
 314         for _, v := range ws {
 315                 if w := v.Primary(); w > 0 {
 316                         buf.key = appendPrimary(buf.key, w)
 317                 }
 318         }
 319         if !c.ignore[colltab.Secondary] {
 320                 buf.key = append(buf.key, 0, 0)
 321                 // TODO: we can use one 0 if we can guarantee that all non-zero weights are > 0xFF.
 322                 if !c.backwards {
 323                         for _, v := range ws {
 324                                 if w := v.Secondary(); w > 0 {
 325                                         buf.key = append(buf.key, uint8(w>>8), uint8(w))
 326                                 }
 327                         }
 328                 } else {
 329                         for i := len(ws) - 1; i >= 0; i-- {
 330                                 if w := ws[i].Secondary(); w > 0 {
 331                                         buf.key = append(buf.key, uint8(w>>8), uint8(w))
 332                                 }
 333                         }
 334                 }
 335         } else if c.caseLevel {
 336                 buf.key = append(buf.key, 0, 0)
 337         }
 338         if !c.ignore[colltab.Tertiary] || c.caseLevel {
 339                 buf.key = append(buf.key, 0, 0)
 340                 for _, v := range ws {
 341                         if w := v.Tertiary(); w > 0 {
 342                                 buf.key = append(buf.key, uint8(w))
 343                         }
 344                 }
 345                 // Derive the quaternary weights from the options and other levels.
 346                 // Note that we represent MaxQuaternary as 0xFF. The first byte of the
 347                 // representation of a primary weight is always smaller than 0xFF,
 348                 // so using this single byte value will compare correctly.
 349                 if !c.ignore[colltab.Quaternary] && c.alternate >= altShifted {
 350                         if c.alternate == altShiftTrimmed {
 351                                 lastNonFFFF := len(buf.key)
 352                                 buf.key = append(buf.key, 0)
 353                                 for _, v := range ws {
 354                                         if w := v.Quaternary(); w == colltab.MaxQuaternary {
 355                                                 buf.key = append(buf.key, 0xFF)
 356                                         } else if w > 0 {
 357                                                 buf.key = appendPrimary(buf.key, w)
 358                                                 lastNonFFFF = len(buf.key)
 359                                         }
 360                                 }
 361                                 buf.key = buf.key[:lastNonFFFF]
 362                         } else {
 363                                 buf.key = append(buf.key, 0)
 364                                 for _, v := range ws {
 365                                         if w := v.Quaternary(); w == colltab.MaxQuaternary {
 366                                                 buf.key = append(buf.key, 0xFF)
 367                                         } else if w > 0 {
 368                                                 buf.key = appendPrimary(buf.key, w)
 369                                         }
 370                                 }
 371                         }
 372                 }
 373         }
 374 }
 375
 376 func processWeights(vw alternateHandling, top uint32, wa []colltab.Elem) {
 377         ignore := false
 378         vtop := int(top)
 379         switch vw {
 380         case altShifted, altShiftTrimmed:
 381                 for i := range wa {
 382                         if p := wa[i].Primary(); p <= vtop && p != 0 {
 383                                 wa[i] = colltab.MakeQuaternary(p)
 384                                 ignore = true
 385                         } else if p == 0 {
 386                                 if ignore {
 387                                         wa[i] = colltab.Ignore
 388                                 }
 389                         } else {
 390                                 ignore = false
 391                         }
 392                 }
 393         case altBlanked:
 394                 for i := range wa {
 395                         if p := wa[i].Primary(); p <= vtop && (ignore || p != 0) {
 396                                 wa[i] = colltab.Ignore
 397                                 ignore = true
 398                         } else {
 399                                 ignore = false
 400                         }
 401                 }
 402         }
 403 }