src/ceph/src/common/ConfUtils.cc

   1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
   2 // vim: ts=8 sw=2 smarttab
   3 /*
   4  * Ceph - scalable distributed file system
   5  *
   6  * Copyright (C) 2011 New Dream Network
   7  *
   8  * This is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License version 2.1, as published by the Free Software
  11  * Foundation.  See file COPYING.
  12  *
  13  */
  14
  15 #include <algorithm>
  16 #include <map>
  17 #include <sstream>
  18 #include <sys/stat.h>
  19 #include <iostream>
  20
  21 #include "include/buffer.h"
  22 #include "common/errno.h"
  23 #include "common/utf8.h"
  24 #include "common/ConfUtils.h"
  25
  26 using std::ostringstream;
  27 using std::pair;
  28 using std::string;
  29
  30 #define MAX_CONFIG_FILE_SZ 0x40000000
  31
  32 ////////////////////////////// ConfLine //////////////////////////////
  33 ConfLine::
  34 ConfLine(const std::string &key_, const std::string &val_,
  35       const std::string &newsection_, const std::string &comment_, int line_no_)
  36   : key(key_), val(val_), newsection(newsection_)
  37 {
  38   // If you want to implement writable ConfFile support, you'll need to save
  39   // the comment and line_no arguments here.
  40 }
  41
  42 bool ConfLine::
  43 operator<(const ConfLine &rhs) const
  44 {
  45   // We only compare keys.
  46   // If you have more than one line with the same key in a given section, the
  47   // last one wins.
  48   if (key < rhs.key)
  49     return true;
  50   else
  51     return false;
  52 }
  53
  54 std::ostream &operator<<(std::ostream& oss, const ConfLine &l)
  55 {
  56   oss << "ConfLine(key = '" << l.key << "', val='"
  57       << l.val << "', newsection='" << l.newsection << "')";
  58   return oss;
  59 }
  60 ///////////////////////// ConfFile //////////////////////////
  61 ConfFile::
  62 ConfFile()
  63 {
  64 }
  65
  66 ConfFile::
  67 ~ConfFile()
  68 {
  69 }
  70
  71 void ConfFile::
  72 clear()
  73 {
  74   sections.clear();
  75 }
  76
  77 /* We load the whole file into memory and then parse it.  Although this is not
  78  * the optimal approach, it does mean that most of this code can be shared with
  79  * the bufferlist loading function. Since bufferlists are always in-memory, the
  80  * load_from_buffer interface works well for them.
  81  * In general, configuration files should be a few kilobytes at maximum, so
  82  * loading the whole configuration into memory shouldn't be a problem.
  83  */
  84 int ConfFile::
  85 parse_file(const std::string &fname, std::deque<std::string> *errors,
  86            std::ostream *warnings)
  87 {
  88   clear();
  89
  90   int ret = 0;
  91   size_t sz;
  92   char *buf = NULL;
  93   FILE *fp = fopen(fname.c_str(), "r");
  94   if (!fp) {
  95     ostringstream oss;
  96     oss << __func__ << ": cannot open " << fname << ": " << cpp_strerror(errno);
  97     errors->push_back(oss.str());
  98     ret = -errno;
  99     return ret;
 100   }
 101
 102   struct stat st_buf;
 103   if (fstat(fileno(fp), &st_buf)) {
 104     ret = -errno;
 105     ostringstream oss;
 106     oss << __func__ << ": failed to fstat '" << fname << "': " << cpp_strerror(ret);
 107     errors->push_back(oss.str());
 108     goto done;
 109   }
 110
 111   if (st_buf.st_size > MAX_CONFIG_FILE_SZ) {
 112     ostringstream oss;
 113     oss << __func__ << ": config file '" << fname << "' is " << st_buf.st_size
 114         << " bytes, but the maximum is " << MAX_CONFIG_FILE_SZ;
 115     errors->push_back(oss.str());
 116     ret = -EINVAL;
 117     goto done;
 118   }
 119
 120   sz = (size_t)st_buf.st_size;
 121   buf = (char*)malloc(sz);
 122   if (!buf) {
 123     ret = -ENOMEM;
 124     goto done;
 125   }
 126
 127   if (fread(buf, 1, sz, fp) != sz) {
 128     if (ferror(fp)) {
 129       ret = -errno;
 130       ostringstream oss;
 131       oss << __func__ << ": fread error while reading '" << fname << "': "
 132           << cpp_strerror(ret);
 133       errors->push_back(oss.str());
 134       goto done;
 135     }
 136     else {
 137       ostringstream oss;
 138       oss << __func__ << ": unexpected EOF while reading '" << fname << "': "
 139           << "possible concurrent modification?";
 140       errors->push_back(oss.str());
 141       ret = -EIO;
 142       goto done;
 143     }
 144   }
 145
 146   load_from_buffer(buf, sz, errors, warnings);
 147   ret = 0;
 148
 149 done:
 150   free(buf);
 151   fclose(fp);
 152   return ret;
 153 }
 154
 155 int ConfFile::
 156 parse_bufferlist(ceph::bufferlist *bl, std::deque<std::string> *errors,
 157                  std::ostream *warnings)
 158 {
 159   clear();
 160
 161   load_from_buffer(bl->c_str(), bl->length(), errors, warnings);
 162   return 0;
 163 }
 164
 165 int ConfFile::
 166 read(const std::string &section, const std::string &key, std::string &val) const
 167 {
 168   string k(normalize_key_name(key));
 169
 170   const_section_iter_t s = sections.find(section);
 171   if (s == sections.end())
 172     return -ENOENT;
 173   ConfLine exemplar(k, "", "", "", 0);
 174   ConfSection::const_line_iter_t l = s->second.lines.find(exemplar);
 175   if (l == s->second.lines.end())
 176     return -ENOENT;
 177   val = l->val;
 178   return 0;
 179 }
 180
 181 ConfFile::const_section_iter_t ConfFile::
 182 sections_begin() const
 183 {
 184   return sections.begin();
 185 }
 186
 187 ConfFile::const_section_iter_t ConfFile::
 188 sections_end() const
 189 {
 190   return sections.end();
 191 }
 192
 193 void ConfFile::
 194 trim_whitespace(std::string &str, bool strip_internal)
 195 {
 196   // strip preceding
 197   const char *in = str.c_str();
 198   while (true) {
 199     char c = *in;
 200     if ((!c) || (!isspace(c)))
 201       break;
 202     ++in;
 203   }
 204   char output[strlen(in) + 1];
 205   strcpy(output, in);
 206
 207   // strip trailing
 208   char *o = output + strlen(output);
 209   while (true) {
 210     if (o == output)
 211       break;
 212     --o;
 213     if (!isspace(*o)) {
 214       ++o;
 215       *o = '\0';
 216       break;
 217     }
 218   }
 219
 220   if (!strip_internal) {
 221     str.assign(output);
 222     return;
 223   }
 224
 225   // strip internal
 226   char output2[strlen(output) + 1];
 227   char *out2 = output2;
 228   bool prev_was_space = false;
 229   for (char *u = output; *u; ++u) {
 230     char c = *u;
 231     if (isspace(c)) {
 232       if (!prev_was_space)
 233         *out2++ = c;
 234       prev_was_space = true;
 235     }
 236     else {
 237       *out2++ = c;
 238       prev_was_space = false;
 239     }
 240   }
 241   *out2++ = '\0';
 242   str.assign(output2);
 243 }
 244
 245 /* Normalize a key name.
 246  *
 247  * Normalized key names have no leading or trailing whitespace, and all
 248  * whitespace is stored as underscores.  The main reason for selecting this
 249  * normal form is so that in common/config.cc, we can use a macro to stringify
 250  * the field names of md_config_t and get a key in normal form.
 251  */
 252 std::string ConfFile::
 253 normalize_key_name(const std::string &key)
 254 {
 255   string k(key);
 256   ConfFile::trim_whitespace(k, true);
 257   std::replace(k.begin(), k.end(), ' ', '_');
 258   return k;
 259 }
 260
 261 std::ostream &operator<<(std::ostream &oss, const ConfFile &cf)
 262 {
 263   for (ConfFile::const_section_iter_t s = cf.sections_begin();
 264        s != cf.sections_end(); ++s) {
 265     oss << "[" << s->first << "]\n";
 266     for (ConfSection::const_line_iter_t l = s->second.lines.begin();
 267          l != s->second.lines.end(); ++l) {
 268       if (!l->key.empty()) {
 269         oss << "\t" << l->key << " = \"" << l->val << "\"\n";
 270       }
 271     }
 272   }
 273   return oss;
 274 }
 275
 276 void ConfFile::
 277 load_from_buffer(const char *buf, size_t sz, std::deque<std::string> *errors,
 278                  std::ostream *warnings)
 279 {
 280   errors->clear();
 281
 282   section_iter_t::value_type vt("global", ConfSection());
 283   pair < section_iter_t, bool > vr(sections.insert(vt));
 284   assert(vr.second);
 285   section_iter_t cur_section = vr.first;
 286   std::string acc;
 287
 288   const char *b = buf;
 289   int line_no = 0;
 290   size_t line_len = -1;
 291   size_t rem = sz;
 292   while (1) {
 293     b += line_len + 1;
 294     if ((line_len + 1) > rem)
 295       break;
 296     rem -= line_len + 1;
 297     if (rem == 0)
 298       break;
 299     line_no++;
 300
 301     // look for the next newline
 302     const char *end = (const char*)memchr(b, '\n', rem);
 303     if (!end) {
 304       ostringstream oss;
 305       oss << "read_conf: ignoring line " << line_no << " because it doesn't "
 306           << "end with a newline! Please end the config file with a newline.";
 307       errors->push_back(oss.str());
 308       break;
 309     }
 310
 311     // find length of line, and search for NULLs
 312     line_len = 0;
 313     bool found_null = false;
 314     for (const char *tmp = b; tmp != end; ++tmp) {
 315       line_len++;
 316       if (*tmp == '\0') {
 317         found_null = true;
 318       }
 319     }
 320
 321     if (found_null) {
 322       ostringstream oss;
 323       oss << "read_conf: ignoring line " << line_no << " because it has "
 324           << "an embedded null.";
 325       errors->push_back(oss.str());
 326       acc.clear();
 327       continue;
 328     }
 329
 330     if (check_utf8(b, line_len)) {
 331       ostringstream oss;
 332       oss << "read_conf: ignoring line " << line_no << " because it is not "
 333           << "valid UTF8.";
 334       errors->push_back(oss.str());
 335       acc.clear();
 336       continue;
 337     }
 338
 339     if ((line_len >= 1) && (b[line_len-1] == '\\')) {
 340       // A backslash at the end of a line serves as a line continuation marker.
 341       // Combine the next line with this one.
 342       // Remove the backslash itself from the text.
 343       acc.append(b, line_len - 1);
 344       continue;
 345     }
 346
 347     acc.append(b, line_len);
 348
 349     //cerr << "acc = '" << acc << "'" << std::endl;
 350     ConfLine *cline = process_line(line_no, acc.c_str(), errors);
 351     acc.clear();
 352     if (!cline)
 353       continue;
 354     const std::string &csection(cline->newsection);
 355     if (!csection.empty()) {
 356       std::map <std::string, ConfSection>::value_type nt(csection, ConfSection());
 357       pair < section_iter_t, bool > nr(sections.insert(nt));
 358       cur_section = nr.first;
 359     }
 360     else {
 361       if (cur_section->second.lines.count(*cline)) {
 362         // replace an existing key/line in this section, so that
 363         //  [mysection]
 364         //    foo = 1
 365         //    foo = 2
 366         // will result in foo = 2.
 367         cur_section->second.lines.erase(*cline);
 368         if (cline->key.length() && warnings)
 369           *warnings << "warning: line " << line_no << ": '" << cline->key << "' in section '"
 370                     << cur_section->first << "' redefined " << std::endl;
 371       }
 372       // add line to current section
 373       //std::cerr << "cur_section = " << cur_section->first << ", " << *cline << std::endl;
 374       cur_section->second.lines.insert(*cline);
 375     }
 376     delete cline;
 377   }
 378
 379   if (!acc.empty()) {
 380     ostringstream oss;
 381     oss << "read_conf: don't end with lines that end in backslashes!";
 382     errors->push_back(oss.str());
 383   }
 384 }
 385
 386 /*
 387  * A simple state-machine based parser.
 388  * This probably could/should be rewritten with something like boost::spirit
 389  * or yacc if the grammar ever gets more complex.
 390  */
 391 ConfLine* ConfFile::
 392 process_line(int line_no, const char *line, std::deque<std::string> *errors)
 393 {
 394   enum acceptor_state_t {
 395     ACCEPT_INIT,
 396     ACCEPT_SECTION_NAME,
 397     ACCEPT_KEY,
 398     ACCEPT_VAL_START,
 399     ACCEPT_UNQUOTED_VAL,
 400     ACCEPT_QUOTED_VAL,
 401     ACCEPT_COMMENT_START,
 402     ACCEPT_COMMENT_TEXT,
 403   };
 404   const char *l = line;
 405   acceptor_state_t state = ACCEPT_INIT;
 406   string key, val, newsection, comment;
 407   bool escaping = false;
 408   while (true) {
 409     char c = *l++;
 410     switch (state) {
 411       case ACCEPT_INIT:
 412         if (c == '\0')
 413           return NULL; // blank line. Not an error, but not interesting either.
 414         else if (c == '[')
 415           state = ACCEPT_SECTION_NAME;
 416         else if ((c == '#') || (c == ';'))
 417           state = ACCEPT_COMMENT_TEXT;
 418         else if (c == ']') {
 419           ostringstream oss;
 420           oss << "unexpected right bracket at char " << (l - line)
 421               << ", line " << line_no;
 422           errors->push_back(oss.str());
 423           return NULL;
 424         }
 425         else if (isspace(c)) {
 426           // ignore whitespace here
 427         }
 428         else {
 429           // try to accept this character as a key
 430           state = ACCEPT_KEY;
 431           --l;
 432         }
 433         break;
 434       case ACCEPT_SECTION_NAME:
 435         if (c == '\0') {
 436           ostringstream oss;
 437           oss << "error parsing new section name: expected right bracket "
 438               << "at char " << (l - line) << ", line " << line_no;
 439           errors->push_back(oss.str());
 440           return NULL;
 441         }
 442         else if ((c == ']') && (!escaping)) {
 443           trim_whitespace(newsection, true);
 444           if (newsection.empty()) {
 445             ostringstream oss;
 446             oss << "error parsing new section name: no section name found? "
 447                 << "at char " << (l - line) << ", line " << line_no;
 448             errors->push_back(oss.str());
 449             return NULL;
 450           }
 451           state = ACCEPT_COMMENT_START;
 452         }
 453         else if (((c == '#') || (c == ';')) && (!escaping)) {
 454           ostringstream oss;
 455           oss << "unexpected comment marker while parsing new section name, at "
 456               << "char " << (l - line) << ", line " << line_no;
 457           errors->push_back(oss.str());
 458           return NULL;
 459         }
 460         else if ((c == '\\') && (!escaping)) {
 461           escaping = true;
 462         }
 463         else {
 464           escaping = false;
 465           newsection += c;
 466         }
 467         break;
 468       case ACCEPT_KEY:
 469         if ((((c == '#') || (c == ';')) && (!escaping)) || (c == '\0')) {
 470           ostringstream oss;
 471           if (c == '\0') {
 472             oss << "end of key=val line " << line_no
 473                 << " reached, no \"=val\" found...missing =?";
 474           } else {
 475             oss << "unexpected character while parsing putative key value, "
 476                 << "at char " << (l - line) << ", line " << line_no;
 477           }
 478           errors->push_back(oss.str());
 479           return NULL;
 480         }
 481         else if ((c == '=') && (!escaping)) {
 482           key = normalize_key_name(key);
 483           if (key.empty()) {
 484             ostringstream oss;
 485             oss << "error parsing key name: no key name found? "
 486                 << "at char " << (l - line) << ", line " << line_no;
 487             errors->push_back(oss.str());
 488             return NULL;
 489           }
 490           state = ACCEPT_VAL_START;
 491         }
 492         else if ((c == '\\') && (!escaping)) {
 493           escaping = true;
 494         }
 495         else {
 496           escaping = false;
 497           key += c;
 498         }
 499         break;
 500       case ACCEPT_VAL_START:
 501         if (c == '\0')
 502           return new ConfLine(key, val, newsection, comment, line_no);
 503         else if ((c == '#') || (c == ';'))
 504           state = ACCEPT_COMMENT_TEXT;
 505         else if (c == '"')
 506           state = ACCEPT_QUOTED_VAL;
 507         else if (isspace(c)) {
 508           // ignore whitespace
 509         }
 510         else {
 511           // try to accept character as a val
 512           state = ACCEPT_UNQUOTED_VAL;
 513           --l;
 514         }
 515         break;
 516       case ACCEPT_UNQUOTED_VAL:
 517         if (c == '\0') {
 518           if (escaping) {
 519             ostringstream oss;
 520             oss << "error parsing value name: unterminated escape sequence "
 521                 << "at char " << (l - line) << ", line " << line_no;
 522             errors->push_back(oss.str());
 523             return NULL;
 524           }
 525           trim_whitespace(val, false);
 526           return new ConfLine(key, val, newsection, comment, line_no);
 527         }
 528         else if (((c == '#') || (c == ';')) && (!escaping)) {
 529           trim_whitespace(val, false);
 530           state = ACCEPT_COMMENT_TEXT;
 531         }
 532         else if ((c == '\\') && (!escaping)) {
 533           escaping = true;
 534         }
 535         else {
 536           escaping = false;
 537           val += c;
 538         }
 539         break;
 540       case ACCEPT_QUOTED_VAL:
 541         if (c == '\0') {
 542           ostringstream oss;
 543           oss << "found opening quote for value, but not the closing quote. "
 544               << "line " << line_no;
 545           errors->push_back(oss.str());
 546           return NULL;
 547         }
 548         else if ((c == '"') && (!escaping)) {
 549           state = ACCEPT_COMMENT_START;
 550         }
 551         else if ((c == '\\') && (!escaping)) {
 552           escaping = true;
 553         }
 554         else {
 555           escaping = false;
 556           // Add anything, including whitespace.
 557           val += c;
 558         }
 559         break;
 560       case ACCEPT_COMMENT_START:
 561         if (c == '\0') {
 562           return new ConfLine(key, val, newsection, comment, line_no);
 563         }
 564         else if ((c == '#') || (c == ';')) {
 565           state = ACCEPT_COMMENT_TEXT;
 566         }
 567         else if (isspace(c)) {
 568           // ignore whitespace
 569         }
 570         else {
 571           ostringstream oss;
 572           oss << "unexpected character at char " << (l - line) << " of line "
 573               << line_no;
 574           errors->push_back(oss.str());
 575           return NULL;
 576         }
 577         break;
 578       case ACCEPT_COMMENT_TEXT:
 579         if (c == '\0')
 580           return new ConfLine(key, val, newsection, comment, line_no);
 581         else
 582           comment += c;
 583         break;
 584       default:
 585         ceph_abort();
 586         break;
 587     }
 588     assert(c != '\0'); // We better not go past the end of the input string.
 589   }
 590 }