spot/lbtt/src/StringUtil.cc

/*
 *  Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005
 *  Heikki Tauriainen <Heikki.Tauriainen@tkk.fi>
 *
 *  This program is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU General Public License
 *  as published by the Free Software Foundation; either version 2
 *  of the License, or (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 */

#include <config.h>
#include <cctype>
#include <climits>
#include <cstdlib>
#include "StringUtil.h"

namespace StringUtil
{

/* ========================================================================= */
string toString(const double d, const int precision, const ios::fmtflags flags)
/* ----------------------------------------------------------------------------
 *
 * Description:   Converts a double to a string with a given precision and
 *                format. The function defaults to fixed-point format with a
 *                precision of two decimals.
 *
 * Arguments:     d          --  The double to be converted.
 *                precision  --  Precision.
 *                flags      --  Formatting flags.
 *
 * Returns:       The double as a string.
 *
 * ------------------------------------------------------------------------- */
{
#ifdef HAVE_SSTREAM
  ostringstream stream;
  stream.precision(precision);
  stream.flags(flags);
  stream << d;
  return stream.str();
#else
  ostrstream stream;
  stream.precision(precision);
  stream.flags(flags);
  stream << d << ends;
  string result(stream.str());
  stream.freeze(0);
  return result;
#endif /* HAVE_SSTREAM */
}

/* ========================================================================= */
void sliceString
  (const string& s, const char* slice_chars, vector<string>& slices)
/* ----------------------------------------------------------------------------
 *
 * Description:   Slices a string into a vector of strings, using a given set
 *                of characters as separators.
 *
 * Arguments:     s            --  A reference to the constant original string.
 *                slice_chars  --  A C-style string containing the characters
 *                                 to be used as separators.
 *                slices       --  A reference to a vector for storing the
 *                                 string components.
 *
 * Returns:       Nothing.
 *
 * ------------------------------------------------------------------------- */
{
  string::size_type last_non_slicechar_pos = 0;
  string::size_type last_slicechar_pos = 0;

  slices.clear();

  do
  {
    last_non_slicechar_pos =
      s.find_first_not_of(slice_chars, last_slicechar_pos);
    if (last_non_slicechar_pos != s.npos)
    {
      last_slicechar_pos = s.find_first_of(slice_chars,
                                           last_non_slicechar_pos);
      if (last_slicechar_pos == s.npos)
        slices.push_back(s.substr(last_non_slicechar_pos));
      else
        slices.push_back(s.substr(last_non_slicechar_pos,
                                  last_slicechar_pos
                                  - last_non_slicechar_pos));
    }
  }
  while (last_non_slicechar_pos != s.npos && last_slicechar_pos != s.npos);
}

/* ========================================================================= */
string toLowerCase(const string& s)
/* ----------------------------------------------------------------------------
 *
 * Description:   Converts a string to lower case.
 *
 * Argument:      s  --  String to process.
 *
 * Returns:       The string in lower case.
 *
 * ------------------------------------------------------------------------- */
{
  string result;
  for (string::size_type pos = 0; pos < s.length(); ++pos)
    result += tolower(s[pos]);
  return result;
}

/* ========================================================================= */
bool interpretSpecialCharacters(const char c, bool& escape, char& quotechar)
/* ----------------------------------------------------------------------------
 *
 * Description:   Updates the values of `escape' and `quotechar' based on their
 *                original values and the value of `c'.  Used for scanning
 *                through a string possibly containing quotes and escaped
 *                characters.
 *
 * Arguments:     c          --  A character.
 *                escape     --  A truth value telling whether `c' was escaped.
 *                quotechar  --   0  ==  `c' was read outside of quotes.
 *                               `'' ==  `c' was read inside single quotes.
 *                               `"' ==  `c' was read inside double quotes.
 *
 * Returns:       True if `c' had a special meaning (for example, if it was a
 *                begin/end quote character) in the state determined by the
 *                original values of `escape' and `quotechar'.
 *
 * ------------------------------------------------------------------------- */
{
  if (escape)
  {
    escape = false;
    return false;
  }

  switch (c)
  {
    case '\\' :
      if (quotechar != '\'')
      {
	escape = true;
	return true;
      }
      break;

    case '\'' : case '"' :
      if (quotechar == 0)
      {
	quotechar = c;
	return true;
      }
      else if (c == quotechar)
      {
	quotechar = 0;
	return true;
      }
      break;

    default :
      break;
  }

  return false;
}

/* ========================================================================= */
string unquoteString(const string& s)
/* ----------------------------------------------------------------------------
 *
 * Description:   Removes (unescaped) single and double quotes and escape
 *                characters from a string.
 *
 * Argument:      s  --  String to process.
 *
 * Returns:       A string with the quotes and escape characters removed.
 *
 * ------------------------------------------------------------------------- */
{
  string result;
  char quotechar = 0;
  bool escape = false;

  for (string::size_type pos = 0; pos < s.size(); ++pos)
  {
    if (!interpretSpecialCharacters(s[pos], escape, quotechar))
      result += s[pos];
  }

  return result;
}

/* ========================================================================= */
string::size_type findInQuotedString
  (const string& s, const string& chars, QuoteMode type)
/* ----------------------------------------------------------------------------
 *
 * Description:   Finds a character in a string (respecting quotes).
 *
 * Arguments:     s      --  String to process.
 *                chars  --  A sting of characters to be searched in `s'.
 *                type   --  The extent of the search.
 *                             GLOBAL         - Apply the search to the entire
 *                                              string.
 *                             INSIDE_QUOTES  - Restrict the search to
 *                                              unescaped characters between
 *                                              quotes.
 *                             OUTSIDE_QUOTES - Restrict the search to
 *                                              unescaped characters outside
 *                                              quotes.
 *
 * Returns:       If `s' contains one of the characters in `chars' in a part
 *                of the string that matches `type', the position of the
 *                character in `s', and string::npos otherwise.
 *
 * ------------------------------------------------------------------------- */
{
  char quotechar = 0;
  bool escape = false;

  for (string::size_type pos = 0; pos < s.size(); ++pos)
  {
    if ((type == GLOBAL || (!escape &&
			    ((type == INSIDE_QUOTES && quotechar != 0)
			     || (type == OUTSIDE_QUOTES && quotechar == 0))))
	&& chars.find_first_of(s[pos]) != string::npos)
      return pos;

    interpretSpecialCharacters(s[pos], escape, quotechar);
  }

  return string::npos;
}

/* ========================================================================= */
string substituteInQuotedString
  (const string& s, const string& chars, const string& substitutions,
   QuoteMode type)
/* ----------------------------------------------------------------------------
 *
 * Description:   Substitutes characters in a string with other characters.
 *
 * Arguments:     s              --  String to process.
 *                chars          --  A string of characters, each of which
 *                                   should be substituted in `s' with the
 *                                   character at the corresponding
 *                                   position of the string `substitutions'.
 *                substitutions  --  Characters to substitute. The length of
 *                                   this string should equal the length of
 *                                   `chars'.
 *                type           --  The extent of substitution.
 *                                     GLOBAL         - Apply the substitutions
 *                                                      globally (the default).
 *                                     INSIDE_QUOTES  - Apply the substitutions
 *                                                      to unescaped characters
 *                                                      only inside quotes that
 *                                                      have not been escaped
 *                                                      with a backslash.
 *                                     OUTSIDE_QUOTES - Apply the substitutions
 *                                                      to unescaped characters
 *                                                      only outside quotes
 *                                                      that have not been
 *                                                      escaped with a
 *                                                      backslash.
 *                                   It is not recommended to substitute the
 *                                   special characters ', " and \ with other
 *                                   characters if they have special meaning in
 *                                   `s'.
 *
 * Returns:       A string with the substitutions.
 *
 * ------------------------------------------------------------------------- */
{
  string result;
  char quotechar = 0;
  bool escape = false;

  for (string::size_type pos = 0; pos < s.size(); ++pos)
  {
    char c = s[pos];
    if (type == GLOBAL || (!escape &&
			   ((type == INSIDE_QUOTES && quotechar != 0)
			    || (type == OUTSIDE_QUOTES && quotechar == 0))))
    {
      string::size_type subst_pos = chars.find_first_of(c);
      if (subst_pos != string::npos)
	c = substitutions[subst_pos];
    }
    result += c;

    interpretSpecialCharacters(s[pos], escape, quotechar);
  }

  return result;
}

/* ========================================================================= */
unsigned long int parseNumber(const string& number_string)
/* ----------------------------------------------------------------------------
 *
 * Description:   Converts a string to an unsigned long integer.
 *
 * Argument:      number_string  --  A reference to a constant string.
 *
 * Returns:       The number contained in the string, unless the string could
 *                not be converted to a number, in which case an exception is
 *                thrown.
 *
 * ------------------------------------------------------------------------- */
{
  char* endptr;

  unsigned long int number = strtoul(number_string.c_str(), &endptr, 10);

  if (*endptr != '\0' || number_string.empty()
      || number_string.find_first_of("-") != string::npos)
    throw NotANumberException("expected a nonnegative integer, got `"
                              + number_string + "'");

  return number;
}

/* ========================================================================= */
int parseInterval
  (const string& token, unsigned long int& min, unsigned long int& max)
/* ----------------------------------------------------------------------------
 *
 * Description:   Reads the lower and upper bound from an "interval string"
 *                into two unsigned long integer variables.
 *
 * Arguments:     token     --  A reference to a constant "interval string" of
 *                              the format
 *                                  <interval string>
 *                                    ::=  "*"                  // 0
 *                                      |  <ulong>              // 1
 *                                      |  <sep><ulong>         // 2
 *                                      |  <ulong><sep>         // 3
 *                                      |  <ulong><sep><ulong>  // 4
 *                              where <ulong> is an unsigned long integer (not
 *                              containing a minus sign), and <sep> is either
 *                              "-" or "...". The meaning of the various cases
 *                              is as follows:
 *                                  0  All integers between 0 and ULONG_MAX.
 *                                  1  A point interval consisting of a single
 *                                     value.
 *                                  2  An interval from 0 to a given upper
 *                                     bound.
 *                                  3  An interval from a given lower bound to
 *                                     ULONG_MAX.
 *                                  4  A bounded interval.
 *                min, max  --  References to two unsigned long integers for
 *                              storing the lower and upper bound of the
 *                              interval.
 *
 * Returns:       A value telling the type of the specified interval, which is
 *                a bitwise or of the values LEFT_BOUNDED and RIGHT_BOUNDED
 *                depending on which bounds were given explicitly for the
 *                interval. (The lower and upper bounds of the interval itself
 *                are stored in the variables `min' and `max', respectively.)
 *                The function will throw a NotANumberException if the
 *                interval string is of an invalid format.
 *
 * ------------------------------------------------------------------------- */
{
  unsigned long int tmp_min = 0;
  unsigned long int tmp_max = ULONG_MAX;
  int interval_type = UNBOUNDED;

  if (token != "*")
  {
    string::size_type pos(token.find_first_of("-"));
    if (pos == string::npos)
      pos = token.find("...");
    string value(token.substr(0, pos));

    if (!value.empty())
    {
      tmp_min = parseNumber(value);
      if (pos == string::npos)
	tmp_max = tmp_min;
      interval_type |= LEFT_BOUNDED;
    }

    if (pos != string::npos)
      value = token.substr(pos + (token[pos] == '-' ? 1 : 3));

    if (!value.empty())
    {
      tmp_max = parseNumber(value);
      interval_type |= RIGHT_BOUNDED;
    }
    else if (!(interval_type & LEFT_BOUNDED))
      throw NotANumberException("invalid format for interval");
  }

  min = tmp_min;
  max = tmp_max;

  return interval_type;
}

/* ========================================================================= */
void parseIntervalList
  (const string& token, IntervalList& intervals, unsigned long int min,
   unsigned long int max, vector<string>* extra_tokens)
/* ----------------------------------------------------------------------------
 *
 * Description:   Parses a string of number intervals into an IntervalList.
 *
 * Arguments:     token         --  A reference to a constant comma-separated
 *                                  list of interval strings (see documentation
 *                                  for the parseInterval function).
 *                intervals     --  A reference to an IntervalList to be used
 *                                  for storing the result.
 *                min           --  Absolute lower bound for the numbers.
 *                                  Numbers lower than this bound will not be
 *                                  stored in the result set.
 *                max           --  Absolute upper bound for the numbers.
 *                                  Numbers greater than this bound will not be
 *                                  stored in the result set.
 *                extra_tokens  --  If not 0, all tokens that cannot be
 *                                  recognized as valid interval strings will
 *                                  be stored in the vector of strings to which
 *                                  this variable points. Otherwise the
 *                                  function will throw a NotANumberException.
 *
 * Returns:       Nothing. Throws an IntervalRangeException if any of the
 *                intervals in the list does not fit in the closed range
 *                [min,max].
 *
 * ------------------------------------------------------------------------- */
{
  vector<string> interval_strings;
  int interval_type;

  intervals.clear();
  sliceString(token, ",", interval_strings);

  for (vector<string>::const_iterator i = interval_strings.begin();
       i != interval_strings.end();
       ++i)
  {
    unsigned long int i_start, i_end;

    try
    {
      interval_type = parseInterval(*i, i_start, i_end);
    }
    catch (const NotANumberException&)
    {
      if (extra_tokens != 0)
      {
	extra_tokens->push_back(*i);
	continue;
      }
      else
	throw;
    }

    if (interval_type & LEFT_BOUNDED)
    {
      if (i_start < min || i_start > max)
	throw IntervalRangeException(i_start);
    }
    else if (i_start < min)
      i_start = min;

    if (interval_type & RIGHT_BOUNDED)
    {
      if (i_end < min || i_end > max)
	throw IntervalRangeException(i_end);
    }
    else if (i_end > max)
      i_end = max;

    intervals.merge(i_start, i_end);
  }
}

/* ========================================================================= */
void parseTime
  (const string& time_string, unsigned long int& hours,
   unsigned long int& minutes, unsigned long int& seconds)
/* ----------------------------------------------------------------------------
 *
 * Description:   Parses a "time string", i.e., a string of the form
 *                    ([0-9]+"h")([0-9]+"min")?([0-9]+"s")?
 *                  | ([0-9]+"min")([0-9]+"s")?
 *                  | ([0-9]+"s")
 *                (where 'h', 'min' and 's' correspond to hours, minutes and
 *                seconds, respectively) and stores the numbers into three
 *                unsigned long integers.  The case of the unit symbols is not
 *                relevant.
 *
 * Arguments:     time_string  --  String to process.
 *                hours        --  A reference to an unsigned long integer for
 *                                 storing the number of hours.
 *                minutes      --  A reference to an unsigned long integer for
 *                                 storing the number of minutes.
 *                seconds      --  A reference to an unsigned long integer for
 *                                 storing the number of seconds.
 *
 *                Time components left unspecified in `time_string' will get
 *                the value 0.
 *
 * Returns:       Nothing. Throws an Exception if the given string is not of
 *                the correct format.
 *
 * ------------------------------------------------------------------------- */
{
  bool hours_present = false, minutes_present = false, seconds_present = false;
  hours = minutes = seconds = 0;

  if (time_string.empty())
    throw Exception("invalid time format");

  string::size_type pos1 = 0;
  string s;

  while (pos1 < time_string.length())
  {
    string::size_type pos2 = time_string.find_first_not_of("0123456789", pos1);
    if (pos2 >= time_string.length())
      throw Exception("invalid time format");

    unsigned long int val;

    try
    {
      val = parseNumber(time_string.substr(pos1, pos2 - pos1));
    }
    catch (const NotANumberException&)
    {
      throw Exception("invalid time format");
    }

    switch (tolower(time_string[pos2]))
    {
      case 'h' :
	if (hours_present || minutes_present || seconds_present)
	  throw Exception("invalid time format");
	hours_present = true;
	hours = val;
	break;

      case 'm' :
	if (minutes_present
	    || seconds_present
	    || pos2 + 2 >= time_string.length()
	    || tolower(time_string[pos2 + 1]) != 'i'
	    || tolower(time_string[pos2 + 2]) != 'n')
	  throw Exception("invalid time format");
	minutes_present = true;
        minutes = val;
	pos2 += 2;
	break;

      case 's' :
	if (seconds_present)
	  throw Exception("invalid time format");
	seconds_present = true;
	seconds = val;
	break;

      default : /* 's' */
	throw Exception("invalid time format");
	break;
    }

    pos1 = pos2 + 1;
  }
}

}