1 // Copyright 2013 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
7 // This tool generates types for the various XML formats of CLDR.
23 "golang.org/x/text/internal/gen"
26 var outputFile = flag.String("output", "xml.go", "output file name")
31 r := gen.OpenCLDRCoreZip()
32 buffer, err := ioutil.ReadAll(r)
34 log.Fatal("Could not read zip file")
37 z, err := zip.NewReader(bytes.NewReader(buffer), int64(len(buffer)))
39 log.Fatalf("Could not read zip archive: %v", err)
44 version := gen.CLDRVersion()
46 for _, dtd := range files {
47 for _, f := range z.File {
48 if strings.HasSuffix(f.Name, dtd.file+".dtd") {
52 b := makeBuilder(&buf, dtd)
54 b.resolve(b.index[dtd.top[0]])
56 if b.version != "" && version != b.version {
58 log.Fatalf("main: inconsistent versions: found %s; want %s", b.version, version)
64 fmt.Fprintln(&buf, "// Version is the version of CLDR from which the XML definitions are generated.")
65 fmt.Fprintf(&buf, "const Version = %q\n", version)
67 gen.WriteGoFile(*outputFile, "cldr", buf.Bytes())
70 func failOnError(err error) {
72 log.New(os.Stderr, "", log.Lshortfile).Output(2, err.Error())
77 // configuration data per DTD type
79 file string // base file name
80 root string // Go name of the root XML element
81 top []string // create a different type for this section
83 skipElem []string // hard-coded or deprecated elements
84 skipAttr []string // attributes to exclude
85 predefined []string // hard-coded elements exist of the form <name>Elem
86 forceRepeat []string // elements to make slices despite DTD
93 top: []string{"ldmlBCP47"},
95 "cldrVersion", // deprecated, not used
99 file: "ldmlSupplemental",
100 root: "SupplementalData",
101 top: []string{"supplementalData"},
103 "cldrVersion", // deprecated, not used
105 forceRepeat: []string{
106 "plurals", // data defined in plurals.xml and ordinals.xml
113 "ldml", "collation", "calendar", "timeZoneNames", "localeDisplayNames", "numbers",
116 "cp", // not used anywhere
117 "special", // not used anywhere
118 "fallback", // deprecated, not used
119 "alias", // in Common
120 "default", // in Common
123 "hiraganaQuarternary", // typo in DTD, correct version included as well
125 predefined: []string{"rules"},
129 var comments = map[string]string{
131 // LDMLBCP47 holds information on allowable values for various variables in LDML.
133 "supplementalData": `
134 // SupplementalData holds information relevant for internationalization
135 // and proper use of CLDR, but that is not contained in the locale hierarchy.
138 // LDML is the top-level type for locale-specific data.
141 // Collation contains rules that specify a certain sort-order,
142 // as a tailoring of the root order.
143 // The parsed rules are obtained by passing a RuleProcessor to Collation's
147 // Calendar specifies the fields used for formatting and parsing dates and times.
148 // The month and quarter names are identified numerically, starting at 1.
149 // The day (of the week) names are identified with short strings, since there is
150 // no universally-accepted numeric designation.
153 // Dates contains information regarding the format and parsing of dates and times.
155 "localeDisplayNames": `
156 // LocaleDisplayNames specifies localized display names for scripts, languages,
157 // countries, currencies, and variants.
160 // Numbers supplies information for formatting and parsing numbers and currencies.
164 type element struct {
165 name string // XML element name
166 category string // elements contained by this element
167 signature string // category + attrKey*
169 attr []*attribute // attributes supported by this element.
170 sub []struct { // parsed and evaluated sub elements of this element.
172 repeat bool // true if the element needs to be a slice
175 resolved bool // prevent multiple resolutions of this element.
178 type attribute struct {
187 reHead = regexp.MustCompile(` *(\w+) +([\w\-]+)`)
188 reAttr = regexp.MustCompile(` *(\w+) *(?:(\w+)|\(([\w\- \|]+)\)) *(?:#([A-Z]*) *(?:\"([\.\d+])\")?)? *("[\w\-:]*")?`)
189 reElem = regexp.MustCompile(`^ *(EMPTY|ANY|\(.*\)[\*\+\?]?) *$`)
190 reToken = regexp.MustCompile(`\w\-`)
193 // builder is used to read in the DTD files from CLDR and generate Go code
194 // to be used with the encoding/xml package.
195 type builder struct {
197 index map[string]*element
203 func makeBuilder(w io.Writer, d dtd) builder {
206 index: make(map[string]*element),
212 // parseDTD parses a DTD file.
213 func (b *builder) parseDTD(r io.Reader) {
214 for d := xml.NewDecoder(r); ; {
220 dir, ok := t.(xml.Directive)
224 m := reHead.FindSubmatch(dir)
225 dir = dir[len(m[0]):]
226 ename := string(m[2])
227 el, elementFound := b.index[ename]
228 switch string(m[1]) {
231 log.Fatal("parseDTD: duplicate entry for element %q", ename)
233 m := reElem.FindSubmatch(dir)
235 log.Fatalf("parseDTD: invalid element %q", string(dir))
237 if len(m[0]) != len(dir) {
238 log.Fatal("parseDTD: invalid element %q", string(dir), len(dir), len(m[0]), string(m[0]))
248 log.Fatalf("parseDTD: unknown element %q", ename)
251 m := reAttr.FindStringSubmatch(s)
253 log.Fatal(fmt.Errorf("parseDTD: invalid attribute %q", string(dir)))
259 case "draft", "references", "alt", "validSubLocales", "standard" /* in Common */ :
260 case "type", "choice":
262 el.attr = append(el.attr, &attribute{
265 list: reToken.FindAllString(m[3], -1),
267 el.signature = fmt.Sprintf("%s=%s+%s", el.signature, m[1], m[2])
274 var reCat = regexp.MustCompile(`[ ,\|]*(?:(\(|\)|\#?[\w_-]+)([\*\+\?]?))?`)
276 // resolve takes a parsed element and converts it into structured data
277 // that can be used to generate the XML code.
278 func (b *builder) resolve(e *element) {
282 b.elem = append(b.elem, e)
285 found := make(map[string]bool)
286 sequenceStart := []int{}
288 m := reCat.FindStringSubmatch(s)
290 log.Fatalf("%s: invalid category string %q", e.name, s)
292 repeat := m[2] == "*" || m[2] == "+" || in(b.info.forceRepeat, m[1])
296 sequenceStart = append(sequenceStart, len(e.sub))
298 if len(sequenceStart) == 0 {
299 log.Fatalf("%s: unmatched closing parenthesis", e.name)
301 for i := sequenceStart[len(sequenceStart)-1]; i < len(e.sub); i++ {
302 e.sub[i].repeat = e.sub[i].repeat || repeat
304 sequenceStart = sequenceStart[:len(sequenceStart)-1]
306 if in(b.info.skipElem, m[1]) {
307 } else if sub, ok := b.index[m[1]]; ok {
308 if !found[sub.name] {
309 e.sub = append(e.sub, struct {
313 found[sub.name] = true
316 } else if m[1] == "#PCDATA" || m[1] == "ANY" {
317 } else if m[1] != "EMPTY" {
318 log.Fatalf("resolve:%s: element %q not found", e.name, m[1])
325 // return true if s is contained in set.
326 func in(set []string, s string) bool {
327 for _, v := range set {
335 var repl = strings.NewReplacer("-", " ", "_", " ")
337 // title puts the first character or each character following '_' in title case and
338 // removes all occurrences of '_'.
339 func title(s string) string {
340 return strings.Replace(strings.Title(repl.Replace(s)), " ", "", -1)
343 // writeElem generates Go code for a single element, recursively.
344 func (b *builder) writeElem(tab int, e *element) {
345 p := func(f string, x ...interface{}) {
346 f = strings.Replace(f, "\n", "\n"+strings.Repeat("\t", tab), -1)
347 fmt.Fprintf(b.w, f, x...)
349 if len(e.sub) == 0 && len(e.attr) == 0 {
356 for _, attr := range e.attr {
357 if !in(b.info.skipAttr, attr.name) {
358 p("\n%s string `xml:\"%s,attr\"`", title(attr.name), attr.name)
361 for _, sub := range e.sub {
362 if in(b.info.predefined, sub.e.name) {
363 p("\n%sElem", sub.e.name)
366 if in(b.info.skipElem, sub.e.name) {
369 p("\n%s ", title(sub.e.name))
374 if in(b.info.top, sub.e.name) {
377 b.writeElem(tab, sub.e)
379 p(" `xml:\"%s\"`", sub.e.name)
385 // write generates the Go XML code.
386 func (b *builder) write() {
387 for i, name := range b.info.top {
390 fmt.Fprintf(b.w, comments[name])
391 name := title(e.name)
395 fmt.Fprintf(b.w, "type %s ", name)
397 fmt.Fprint(b.w, "\n")