src/dma/vendor/golang.org/x/text/internal/ucd/ucd.go

   1 // Copyright 2014 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 // Package ucd provides a parser for Unicode Character Database files, the
   6 // format of which is defined in https://www.unicode.org/reports/tr44/. See
   7 // https://www.unicode.org/Public/UCD/latest/ucd/ for example files.
   8 //
   9 // It currently does not support substitutions of missing fields.
  10 package ucd // import "golang.org/x/text/internal/ucd"
  11
  12 import (
  13         "bufio"
  14         "errors"
  15         "fmt"
  16         "io"
  17         "log"
  18         "regexp"
  19         "strconv"
  20         "strings"
  21 )
  22
  23 // UnicodeData.txt fields.
  24 const (
  25         CodePoint = iota
  26         Name
  27         GeneralCategory
  28         CanonicalCombiningClass
  29         BidiClass
  30         DecompMapping
  31         DecimalValue
  32         DigitValue
  33         NumericValue
  34         BidiMirrored
  35         Unicode1Name
  36         ISOComment
  37         SimpleUppercaseMapping
  38         SimpleLowercaseMapping
  39         SimpleTitlecaseMapping
  40 )
  41
  42 // Parse calls f for each entry in the given reader of a UCD file. It will close
  43 // the reader upon return. It will call log.Fatal if any error occurred.
  44 //
  45 // This implements the most common usage pattern of using Parser.
  46 func Parse(r io.ReadCloser, f func(p *Parser)) {
  47         defer r.Close()
  48
  49         p := New(r)
  50         for p.Next() {
  51                 f(p)
  52         }
  53         if err := p.Err(); err != nil {
  54                 r.Close() // os.Exit will cause defers not to be called.
  55                 log.Fatal(err)
  56         }
  57 }
  58
  59 // An Option is used to configure a Parser.
  60 type Option func(p *Parser)
  61
  62 func keepRanges(p *Parser) {
  63         p.keepRanges = true
  64 }
  65
  66 var (
  67         // KeepRanges prevents the expansion of ranges. The raw ranges can be
  68         // obtained by calling Range(0) on the parser.
  69         KeepRanges Option = keepRanges
  70 )
  71
  72 // The Part option register a handler for lines starting with a '@'. The text
  73 // after a '@' is available as the first field. Comments are handled as usual.
  74 func Part(f func(p *Parser)) Option {
  75         return func(p *Parser) {
  76                 p.partHandler = f
  77         }
  78 }
  79
  80 // The CommentHandler option passes comments that are on a line by itself to
  81 // a given handler.
  82 func CommentHandler(f func(s string)) Option {
  83         return func(p *Parser) {
  84                 p.commentHandler = f
  85         }
  86 }
  87
  88 // A Parser parses Unicode Character Database (UCD) files.
  89 type Parser struct {
  90         scanner *bufio.Scanner
  91
  92         keepRanges bool // Don't expand rune ranges in field 0.
  93
  94         err     error
  95         comment string
  96         field   []string
  97         // parsedRange is needed in case Range(0) is called more than once for one
  98         // field. In some cases this requires scanning ahead.
  99         line                 int
 100         parsedRange          bool
 101         rangeStart, rangeEnd rune
 102
 103         partHandler    func(p *Parser)
 104         commentHandler func(s string)
 105 }
 106
 107 func (p *Parser) setError(err error, msg string) {
 108         if p.err == nil && err != nil {
 109                 if msg == "" {
 110                         p.err = fmt.Errorf("ucd:line:%d: %v", p.line, err)
 111                 } else {
 112                         p.err = fmt.Errorf("ucd:line:%d:%s: %v", p.line, msg, err)
 113                 }
 114         }
 115 }
 116
 117 func (p *Parser) getField(i int) string {
 118         if i >= len(p.field) {
 119                 return ""
 120         }
 121         return p.field[i]
 122 }
 123
 124 // Err returns a non-nil error if any error occurred during parsing.
 125 func (p *Parser) Err() error {
 126         return p.err
 127 }
 128
 129 // New returns a Parser for the given Reader.
 130 func New(r io.Reader, o ...Option) *Parser {
 131         p := &Parser{
 132                 scanner: bufio.NewScanner(r),
 133         }
 134         for _, f := range o {
 135                 f(p)
 136         }
 137         return p
 138 }
 139
 140 // Next parses the next line in the file. It returns true if a line was parsed
 141 // and false if it reached the end of the file.
 142 func (p *Parser) Next() bool {
 143         if !p.keepRanges && p.rangeStart < p.rangeEnd {
 144                 p.rangeStart++
 145                 return true
 146         }
 147         p.comment = ""
 148         p.field = p.field[:0]
 149         p.parsedRange = false
 150
 151         for p.scanner.Scan() && p.err == nil {
 152                 p.line++
 153                 s := p.scanner.Text()
 154                 if s == "" {
 155                         continue
 156                 }
 157                 if s[0] == '#' {
 158                         if p.commentHandler != nil {
 159                                 p.commentHandler(strings.TrimSpace(s[1:]))
 160                         }
 161                         continue
 162                 }
 163
 164                 // Parse line
 165                 if i := strings.IndexByte(s, '#'); i != -1 {
 166                         p.comment = strings.TrimSpace(s[i+1:])
 167                         s = s[:i]
 168                 }
 169                 if s[0] == '@' {
 170                         if p.partHandler != nil {
 171                                 p.field = append(p.field, strings.TrimSpace(s[1:]))
 172                                 p.partHandler(p)
 173                                 p.field = p.field[:0]
 174                         }
 175                         p.comment = ""
 176                         continue
 177                 }
 178                 for {
 179                         i := strings.IndexByte(s, ';')
 180                         if i == -1 {
 181                                 p.field = append(p.field, strings.TrimSpace(s))
 182                                 break
 183                         }
 184                         p.field = append(p.field, strings.TrimSpace(s[:i]))
 185                         s = s[i+1:]
 186                 }
 187                 if !p.keepRanges {
 188                         p.rangeStart, p.rangeEnd = p.getRange(0)
 189                 }
 190                 return true
 191         }
 192         p.setError(p.scanner.Err(), "scanner failed")
 193         return false
 194 }
 195
 196 func parseRune(b string) (rune, error) {
 197         if len(b) > 2 && b[0] == 'U' && b[1] == '+' {
 198                 b = b[2:]
 199         }
 200         x, err := strconv.ParseUint(b, 16, 32)
 201         return rune(x), err
 202 }
 203
 204 func (p *Parser) parseRune(s string) rune {
 205         x, err := parseRune(s)
 206         p.setError(err, "failed to parse rune")
 207         return x
 208 }
 209
 210 // Rune parses and returns field i as a rune.
 211 func (p *Parser) Rune(i int) rune {
 212         if i > 0 || p.keepRanges {
 213                 return p.parseRune(p.getField(i))
 214         }
 215         return p.rangeStart
 216 }
 217
 218 // Runes interprets and returns field i as a sequence of runes.
 219 func (p *Parser) Runes(i int) (runes []rune) {
 220         add := func(s string) {
 221                 if s = strings.TrimSpace(s); len(s) > 0 {
 222                         runes = append(runes, p.parseRune(s))
 223                 }
 224         }
 225         for b := p.getField(i); ; {
 226                 i := strings.IndexByte(b, ' ')
 227                 if i == -1 {
 228                         add(b)
 229                         break
 230                 }
 231                 add(b[:i])
 232                 b = b[i+1:]
 233         }
 234         return
 235 }
 236
 237 var (
 238         errIncorrectLegacyRange = errors.New("ucd: unmatched <* First>")
 239
 240         // reRange matches one line of a legacy rune range.
 241         reRange = regexp.MustCompile("^([0-9A-F]*);<([^,]*), ([^>]*)>(.*)$")
 242 )
 243
 244 // Range parses and returns field i as a rune range. A range is inclusive at
 245 // both ends. If the field only has one rune, first and last will be identical.
 246 // It supports the legacy format for ranges used in UnicodeData.txt.
 247 func (p *Parser) Range(i int) (first, last rune) {
 248         if !p.keepRanges {
 249                 return p.rangeStart, p.rangeStart
 250         }
 251         return p.getRange(i)
 252 }
 253
 254 func (p *Parser) getRange(i int) (first, last rune) {
 255         b := p.getField(i)
 256         if k := strings.Index(b, ".."); k != -1 {
 257                 return p.parseRune(b[:k]), p.parseRune(b[k+2:])
 258         }
 259         // The first field may not be a rune, in which case we may ignore any error
 260         // and set the range as 0..0.
 261         x, err := parseRune(b)
 262         if err != nil {
 263                 // Disable range parsing henceforth. This ensures that an error will be
 264                 // returned if the user subsequently will try to parse this field as
 265                 // a Rune.
 266                 p.keepRanges = true
 267         }
 268         // Special case for UnicodeData that was retained for backwards compatibility.
 269         if i == 0 && len(p.field) > 1 && strings.HasSuffix(p.field[1], "First>") {
 270                 if p.parsedRange {
 271                         return p.rangeStart, p.rangeEnd
 272                 }
 273                 mf := reRange.FindStringSubmatch(p.scanner.Text())
 274                 p.line++
 275                 if mf == nil || !p.scanner.Scan() {
 276                         p.setError(errIncorrectLegacyRange, "")
 277                         return x, x
 278                 }
 279                 // Using Bytes would be more efficient here, but Text is a lot easier
 280                 // and this is not a frequent case.
 281                 ml := reRange.FindStringSubmatch(p.scanner.Text())
 282                 if ml == nil || mf[2] != ml[2] || ml[3] != "Last" || mf[4] != ml[4] {
 283                         p.setError(errIncorrectLegacyRange, "")
 284                         return x, x
 285                 }
 286                 p.rangeStart, p.rangeEnd = x, p.parseRune(p.scanner.Text()[:len(ml[1])])
 287                 p.parsedRange = true
 288                 return p.rangeStart, p.rangeEnd
 289         }
 290         return x, x
 291 }
 292
 293 // bools recognizes all valid UCD boolean values.
 294 var bools = map[string]bool{
 295         "":      false,
 296         "N":     false,
 297         "No":    false,
 298         "F":     false,
 299         "False": false,
 300         "Y":     true,
 301         "Yes":   true,
 302         "T":     true,
 303         "True":  true,
 304 }
 305
 306 // Bool parses and returns field i as a boolean value.
 307 func (p *Parser) Bool(i int) bool {
 308         f := p.getField(i)
 309         for s, v := range bools {
 310                 if f == s {
 311                         return v
 312                 }
 313         }
 314         p.setError(strconv.ErrSyntax, "error parsing bool")
 315         return false
 316 }
 317
 318 // Int parses and returns field i as an integer value.
 319 func (p *Parser) Int(i int) int {
 320         x, err := strconv.ParseInt(string(p.getField(i)), 10, 64)
 321         p.setError(err, "error parsing int")
 322         return int(x)
 323 }
 324
 325 // Uint parses and returns field i as an unsigned integer value.
 326 func (p *Parser) Uint(i int) uint {
 327         x, err := strconv.ParseUint(string(p.getField(i)), 10, 64)
 328         p.setError(err, "error parsing uint")
 329         return uint(x)
 330 }
 331
 332 // Float parses and returns field i as a decimal value.
 333 func (p *Parser) Float(i int) float64 {
 334         x, err := strconv.ParseFloat(string(p.getField(i)), 64)
 335         p.setError(err, "error parsing float")
 336         return x
 337 }
 338
 339 // String parses and returns field i as a string value.
 340 func (p *Parser) String(i int) string {
 341         return string(p.getField(i))
 342 }
 343
 344 // Strings parses and returns field i as a space-separated list of strings.
 345 func (p *Parser) Strings(i int) []string {
 346         ss := strings.Split(string(p.getField(i)), " ")
 347         for i, s := range ss {
 348                 ss[i] = strings.TrimSpace(s)
 349         }
 350         return ss
 351 }
 352
 353 // Comment returns the comments for the current line.
 354 func (p *Parser) Comment() string {
 355         return string(p.comment)
 356 }
 357
 358 var errUndefinedEnum = errors.New("ucd: undefined enum value")
 359
 360 // Enum interprets and returns field i as a value that must be one of the values
 361 // in enum.
 362 func (p *Parser) Enum(i int, enum ...string) string {
 363         f := p.getField(i)
 364         for _, s := range enum {
 365                 if f == s {
 366                         return s
 367                 }
 368         }
 369         p.setError(errUndefinedEnum, "error parsing enum")
 370         return ""
 371 }