src/ceph/src/dmclock/sim/src/ConfUtils.cc

   1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
   2 // vim: ts=8 sw=2 smarttab
   3 /*
   4  * Ceph - scalable distributed file system
   5  *
   6  * Copyright (C) 2011 New Dream Network
   7  *
   8  * This is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License version 2.1, as published by the Free Software
  11  * Foundation.  See file COPYING.
  12  *
  13  */
  14
  15 #include <algorithm>
  16 #include <errno.h>
  17 #include <list>
  18 #include <map>
  19 #include <sstream>
  20 #include <stdio.h>
  21 #include <stdlib.h>
  22 #include <string.h>
  23 #include <string>
  24 #include <sys/stat.h>
  25 #include <sys/types.h>
  26 #include <unistd.h>
  27 #include <iostream>
  28
  29 #include <assert.h>
  30 #include "ConfUtils.h"
  31
  32 using std::cerr;
  33 using std::ostringstream;
  34 using std::pair;
  35 using std::string;
  36
  37 #define MAX_CONFIG_FILE_SZ 0x40000000
  38
  39 ////////////////////////////// ConfLine //////////////////////////////
  40 ConfLine::
  41 ConfLine(const std::string &key_, const std::string val_,
  42       const std::string newsection_, const std::string comment_, int line_no_)
  43   : key(key_), val(val_), newsection(newsection_)
  44 {
  45   // If you want to implement writable ConfFile support, you'll need to save
  46   // the comment and line_no arguments here.
  47 }
  48
  49 bool ConfLine::
  50 operator<(const ConfLine &rhs) const
  51 {
  52   // We only compare keys.
  53   // If you have more than one line with the same key in a given section, the
  54   // last one wins.
  55   if (key < rhs.key)
  56     return true;
  57   else
  58     return false;
  59 }
  60
  61 std::ostream &operator<<(std::ostream& oss, const ConfLine &l)
  62 {
  63   oss << "ConfLine(key = '" << l.key << "', val='"
  64       << l.val << "', newsection='" << l.newsection << "')";
  65   return oss;
  66 }
  67 ///////////////////////// ConfFile //////////////////////////
  68 ConfFile::
  69 ConfFile()
  70 {
  71 }
  72
  73 ConfFile::
  74 ~ConfFile()
  75 {
  76 }
  77
  78 void ConfFile::
  79 clear()
  80 {
  81   sections.clear();
  82 }
  83
  84 /* We load the whole file into memory and then parse it.  Although this is not
  85  * the optimal approach, it does mean that most of this code can be shared with
  86  * the bufferlist loading function. Since bufferlists are always in-memory, the
  87  * load_from_buffer interface works well for them.
  88  * In general, configuration files should be a few kilobytes at maximum, so
  89  * loading the whole configuration into memory shouldn't be a problem.
  90  */
  91 int ConfFile::
  92 parse_file(const std::string &fname, std::deque<std::string> *errors,
  93            std::ostream *warnings)
  94 {
  95   clear();
  96
  97   int ret = 0;
  98   size_t sz;
  99   char *buf = NULL;
 100   char buf2[128];
 101   FILE *fp = fopen(fname.c_str(), "r");
 102   if (!fp) {
 103     ret = -errno;
 104     return ret;
 105   }
 106
 107   struct stat st_buf;
 108   if (fstat(fileno(fp), &st_buf)) {
 109     ret = -errno;
 110     ostringstream oss;
 111     oss << "read_conf: failed to fstat '" << fname << "': " << strerror_r(ret, buf2, sizeof(buf2));
 112     errors->push_back(oss.str());
 113     goto done;
 114   }
 115
 116   if (st_buf.st_size > MAX_CONFIG_FILE_SZ) {
 117     ostringstream oss;
 118     oss << "read_conf: config file '" << fname << "' is " << st_buf.st_size
 119         << " bytes, but the maximum is " << MAX_CONFIG_FILE_SZ;
 120     errors->push_back(oss.str());
 121     ret = -EINVAL;
 122     goto done;
 123   }
 124
 125   sz = (size_t)st_buf.st_size;
 126   buf = (char*)malloc(sz);
 127   if (!buf) {
 128     ret = -ENOMEM;
 129     goto done;
 130   }
 131
 132   if (fread(buf, 1, sz, fp) != sz) {
 133     if (ferror(fp)) {
 134       ret = -errno;
 135       ostringstream oss;
 136       oss << "read_conf: fread error while reading '" << fname << "': "
 137           << strerror_r(ret, buf2, sizeof(buf2));
 138       errors->push_back(oss.str());
 139       goto done;
 140     }
 141     else {
 142       ostringstream oss;
 143       oss << "read_conf: unexpected EOF while reading '" << fname << "': "
 144           << "possible concurrent modification?";
 145       errors->push_back(oss.str());
 146       ret = -EIO;
 147       goto done;
 148     }
 149   }
 150
 151   load_from_buffer(buf, sz, errors, warnings);
 152   ret = 0;
 153
 154 done:
 155   free(buf);
 156   fclose(fp);
 157   return ret;
 158 }
 159
 160 int ConfFile::
 161 read(const std::string &section, const std::string &key, std::string &val) const
 162 {
 163   string k(normalize_key_name(key));
 164
 165   const_section_iter_t s = sections.find(section);
 166   if (s == sections.end())
 167     return -ENOENT;
 168   ConfLine exemplar(k, "", "", "", 0);
 169   ConfSection::const_line_iter_t l = s->second.lines.find(exemplar);
 170   if (l == s->second.lines.end())
 171     return -ENOENT;
 172   val = l->val;
 173   return 0;
 174 }
 175
 176 ConfFile::const_section_iter_t ConfFile::
 177 sections_begin() const
 178 {
 179   return sections.begin();
 180 }
 181
 182 ConfFile::const_section_iter_t ConfFile::
 183 sections_end() const
 184 {
 185   return sections.end();
 186 }
 187
 188 void ConfFile::
 189 trim_whitespace(std::string &str, bool strip_internal)
 190 {
 191   // strip preceding
 192   const char *in = str.c_str();
 193   while (true) {
 194     char c = *in;
 195     if ((!c) || (!isspace(c)))
 196       break;
 197     ++in;
 198   }
 199   char output[strlen(in) + 1];
 200   strcpy(output, in);
 201
 202   // strip trailing
 203   char *o = output + strlen(output);
 204   while (true) {
 205     if (o == output)
 206       break;
 207     --o;
 208     if (!isspace(*o)) {
 209       ++o;
 210       *o = '\0';
 211       break;
 212     }
 213   }
 214
 215   if (!strip_internal) {
 216     str.assign(output);
 217     return;
 218   }
 219
 220   // strip internal
 221   char output2[strlen(output) + 1];
 222   char *out2 = output2;
 223   bool prev_was_space = false;
 224   for (char *u = output; *u; ++u) {
 225     char c = *u;
 226     if (isspace(c)) {
 227       if (!prev_was_space)
 228         *out2++ = c;
 229       prev_was_space = true;
 230     }
 231     else {
 232       *out2++ = c;
 233       prev_was_space = false;
 234     }
 235   }
 236   *out2++ = '\0';
 237   str.assign(output2);
 238 }
 239
 240 /* Normalize a key name.
 241  *
 242  * Normalized key names have no leading or trailing whitespace, and all
 243  * whitespace is stored as underscores.  The main reason for selecting this
 244  * normal form is so that in common/config.cc, we can use a macro to stringify
 245  * the field names of md_config_t and get a key in normal form.
 246  */
 247 std::string ConfFile::
 248 normalize_key_name(const std::string &key)
 249 {
 250   string k(key);
 251   ConfFile::trim_whitespace(k, true);
 252   std::replace(k.begin(), k.end(), ' ', '_');
 253   return k;
 254 }
 255
 256 std::ostream &operator<<(std::ostream &oss, const ConfFile &cf)
 257 {
 258   for (ConfFile::const_section_iter_t s = cf.sections_begin();
 259        s != cf.sections_end(); ++s) {
 260     oss << "[" << s->first << "]\n";
 261     for (ConfSection::const_line_iter_t l = s->second.lines.begin();
 262          l != s->second.lines.end(); ++l) {
 263       if (!l->key.empty()) {
 264         oss << "\t" << l->key << " = \"" << l->val << "\"\n";
 265       }
 266     }
 267   }
 268   return oss;
 269 }
 270
 271 void ConfFile::
 272 load_from_buffer(const char *buf, size_t sz, std::deque<std::string> *errors,
 273                  std::ostream *warnings)
 274 {
 275   errors->clear();
 276
 277   section_iter_t::value_type vt("global", ConfSection());
 278   pair < section_iter_t, bool > vr(sections.insert(vt));
 279   assert(vr.second);
 280   section_iter_t cur_section = vr.first;
 281   std::string acc;
 282
 283   const char *b = buf;
 284   int line_no = 0;
 285   size_t line_len = -1;
 286   size_t rem = sz;
 287   while (1) {
 288     b += line_len + 1;
 289     rem -= line_len + 1;
 290     if (rem == 0)
 291       break;
 292     line_no++;
 293
 294     // look for the next newline
 295     const char *end = (const char*)memchr(b, '\n', rem);
 296     if (!end) {
 297       ostringstream oss;
 298       oss << "read_conf: ignoring line " << line_no << " because it doesn't "
 299           << "end with a newline! Please end the config file with a newline.";
 300       errors->push_back(oss.str());
 301       break;
 302     }
 303
 304     // find length of line, and search for NULLs
 305     line_len = 0;
 306     bool found_null = false;
 307     for (const char *tmp = b; tmp != end; ++tmp) {
 308       line_len++;
 309       if (*tmp == '\0') {
 310         found_null = true;
 311       }
 312     }
 313
 314     if (found_null) {
 315       ostringstream oss;
 316       oss << "read_conf: ignoring line " << line_no << " because it has "
 317           << "an embedded null.";
 318       errors->push_back(oss.str());
 319       acc.clear();
 320       continue;
 321     }
 322
 323     if ((line_len >= 1) && (b[line_len-1] == '\\')) {
 324       // A backslash at the end of a line serves as a line continuation marker.
 325       // Combine the next line with this one.
 326       // Remove the backslash itself from the text.
 327       acc.append(b, line_len - 1);
 328       continue;
 329     }
 330
 331     acc.append(b, line_len);
 332
 333     //cerr << "acc = '" << acc << "'" << std::endl;
 334     ConfLine *cline = process_line(line_no, acc.c_str(), errors);
 335     acc.clear();
 336     if (!cline)
 337       continue;
 338     const std::string &csection(cline->newsection);
 339     if (!csection.empty()) {
 340       std::map <std::string, ConfSection>::value_type nt(csection, ConfSection());
 341       pair < section_iter_t, bool > nr(sections.insert(nt));
 342       cur_section = nr.first;
 343     }
 344     else {
 345       if (cur_section->second.lines.count(*cline)) {
 346         // replace an existing key/line in this section, so that
 347         //  [mysection]
 348         //    foo = 1
 349         //    foo = 2
 350         // will result in foo = 2.
 351         cur_section->second.lines.erase(*cline);
 352         if (cline->key.length() && warnings)
 353           *warnings << "warning: line " << line_no << ": '" << cline->key << "' in section '"
 354                     << cur_section->first << "' redefined " << std::endl;
 355       }
 356       // add line to current section
 357       //std::cerr << "cur_section = " << cur_section->first << ", " << *cline << std::endl;
 358       cur_section->second.lines.insert(*cline);
 359     }
 360     delete cline;
 361   }
 362
 363   if (!acc.empty()) {
 364     ostringstream oss;
 365     oss << "read_conf: don't end with lines that end in backslashes!";
 366     errors->push_back(oss.str());
 367   }
 368 }
 369
 370 /*
 371  * A simple state-machine based parser.
 372  * This probably could/should be rewritten with something like boost::spirit
 373  * or yacc if the grammar ever gets more complex.
 374  */
 375 ConfLine* ConfFile::
 376 process_line(int line_no, const char *line, std::deque<std::string> *errors)
 377 {
 378   enum acceptor_state_t {
 379     ACCEPT_INIT,
 380     ACCEPT_SECTION_NAME,
 381     ACCEPT_KEY,
 382     ACCEPT_VAL_START,
 383     ACCEPT_UNQUOTED_VAL,
 384     ACCEPT_QUOTED_VAL,
 385     ACCEPT_COMMENT_START,
 386     ACCEPT_COMMENT_TEXT,
 387   };
 388   const char *l = line;
 389   acceptor_state_t state = ACCEPT_INIT;
 390   string key, val, newsection, comment;
 391   bool escaping = false;
 392   while (true) {
 393     char c = *l++;
 394     switch (state) {
 395       case ACCEPT_INIT:
 396         if (c == '\0')
 397           return NULL; // blank line. Not an error, but not interesting either.
 398         else if (c == '[')
 399           state = ACCEPT_SECTION_NAME;
 400         else if ((c == '#') || (c == ';'))
 401           state = ACCEPT_COMMENT_TEXT;
 402         else if (c == ']') {
 403           ostringstream oss;
 404           oss << "unexpected right bracket at char " << (l - line)
 405               << ", line " << line_no;
 406           errors->push_back(oss.str());
 407           return NULL;
 408         }
 409         else if (isspace(c)) {
 410           // ignore whitespace here
 411         }
 412         else {
 413           // try to accept this character as a key
 414           state = ACCEPT_KEY;
 415           --l;
 416         }
 417         break;
 418       case ACCEPT_SECTION_NAME:
 419         if (c == '\0') {
 420           ostringstream oss;
 421           oss << "error parsing new section name: expected right bracket "
 422               << "at char " << (l - line) << ", line " << line_no;
 423           errors->push_back(oss.str());
 424           return NULL;
 425         }
 426         else if ((c == ']') && (!escaping)) {
 427           trim_whitespace(newsection, true);
 428           if (newsection.empty()) {
 429             ostringstream oss;
 430             oss << "error parsing new section name: no section name found? "
 431                 << "at char " << (l - line) << ", line " << line_no;
 432             errors->push_back(oss.str());
 433             return NULL;
 434           }
 435           state = ACCEPT_COMMENT_START;
 436         }
 437         else if (((c == '#') || (c == ';')) && (!escaping)) {
 438           ostringstream oss;
 439           oss << "unexpected comment marker while parsing new section name, at "
 440               << "char " << (l - line) << ", line " << line_no;
 441           errors->push_back(oss.str());
 442           return NULL;
 443         }
 444         else if ((c == '\\') && (!escaping)) {
 445           escaping = true;
 446         }
 447         else {
 448           escaping = false;
 449           newsection += c;
 450         }
 451         break;
 452       case ACCEPT_KEY:
 453         if ((((c == '#') || (c == ';')) && (!escaping)) || (c == '\0')) {
 454           ostringstream oss;
 455           if (c == '\0') {
 456             oss << "end of key=val line " << line_no
 457                 << " reached, no \"=val\" found...missing =?";
 458           } else {
 459             oss << "unexpected character while parsing putative key value, "
 460                 << "at char " << (l - line) << ", line " << line_no;
 461           }
 462           errors->push_back(oss.str());
 463           return NULL;
 464         }
 465         else if ((c == '=') && (!escaping)) {
 466           key = normalize_key_name(key);
 467           if (key.empty()) {
 468             ostringstream oss;
 469             oss << "error parsing key name: no key name found? "
 470                 << "at char " << (l - line) << ", line " << line_no;
 471             errors->push_back(oss.str());
 472             return NULL;
 473           }
 474           state = ACCEPT_VAL_START;
 475         }
 476         else if ((c == '\\') && (!escaping)) {
 477           escaping = true;
 478         }
 479         else {
 480           escaping = false;
 481           key += c;
 482         }
 483         break;
 484       case ACCEPT_VAL_START:
 485         if (c == '\0')
 486           return new ConfLine(key, val, newsection, comment, line_no);
 487         else if ((c == '#') || (c == ';'))
 488           state = ACCEPT_COMMENT_TEXT;
 489         else if (c == '"')
 490           state = ACCEPT_QUOTED_VAL;
 491         else if (isspace(c)) {
 492           // ignore whitespace
 493         }
 494         else {
 495           // try to accept character as a val
 496           state = ACCEPT_UNQUOTED_VAL;
 497           --l;
 498         }
 499         break;
 500       case ACCEPT_UNQUOTED_VAL:
 501         if (c == '\0') {
 502           if (escaping) {
 503             ostringstream oss;
 504             oss << "error parsing value name: unterminated escape sequence "
 505                 << "at char " << (l - line) << ", line " << line_no;
 506             errors->push_back(oss.str());
 507             return NULL;
 508           }
 509           trim_whitespace(val, false);
 510           return new ConfLine(key, val, newsection, comment, line_no);
 511         }
 512         else if (((c == '#') || (c == ';')) && (!escaping)) {
 513           trim_whitespace(val, false);
 514           state = ACCEPT_COMMENT_TEXT;
 515         }
 516         else if ((c == '\\') && (!escaping)) {
 517           escaping = true;
 518         }
 519         else {
 520           escaping = false;
 521           val += c;
 522         }
 523         break;
 524       case ACCEPT_QUOTED_VAL:
 525         if (c == '\0') {
 526           ostringstream oss;
 527           oss << "found opening quote for value, but not the closing quote. "
 528               << "line " << line_no;
 529           errors->push_back(oss.str());
 530           return NULL;
 531         }
 532         else if ((c == '"') && (!escaping)) {
 533           state = ACCEPT_COMMENT_START;
 534         }
 535         else if ((c == '\\') && (!escaping)) {
 536           escaping = true;
 537         }
 538         else {
 539           escaping = false;
 540           // Add anything, including whitespace.
 541           val += c;
 542         }
 543         break;
 544       case ACCEPT_COMMENT_START:
 545         if (c == '\0') {
 546           return new ConfLine(key, val, newsection, comment, line_no);
 547         }
 548         else if ((c == '#') || (c == ';')) {
 549           state = ACCEPT_COMMENT_TEXT;
 550         }
 551         else if (isspace(c)) {
 552           // ignore whitespace
 553         }
 554         else {
 555           ostringstream oss;
 556           oss << "unexpected character at char " << (l - line) << " of line "
 557               << line_no;
 558           errors->push_back(oss.str());
 559           return NULL;
 560         }
 561         break;
 562       case ACCEPT_COMMENT_TEXT:
 563         if (c == '\0')
 564           return new ConfLine(key, val, newsection, comment, line_no);
 565         else
 566           comment += c;
 567         break;
 568       default:
 569         assert(0);
 570         break;
 571     }
 572     assert(c != '\0'); // We better not go past the end of the input string.
 573   }
 574 }