spot/lbtt/src/StringUtil.cc

586 lines
21 KiB
C++

/*
* Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005
* Heikki Tauriainen <Heikki.Tauriainen@tkk.fi>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#include <config.h>
#include <cctype>
#include <climits>
#include <cstdlib>
#include "StringUtil.h"
namespace StringUtil
{
/* ========================================================================= */
string toString(const double d, const int precision, const ios::fmtflags flags)
/* ----------------------------------------------------------------------------
*
* Description: Converts a double to a string with a given precision and
* format. The function defaults to fixed-point format with a
* precision of two decimals.
*
* Arguments: d -- The double to be converted.
* precision -- Precision.
* flags -- Formatting flags.
*
* Returns: The double as a string.
*
* ------------------------------------------------------------------------- */
{
#ifdef HAVE_SSTREAM
ostringstream stream;
stream.precision(precision);
stream.flags(flags);
stream << d;
return stream.str();
#else
ostrstream stream;
stream.precision(precision);
stream.flags(flags);
stream << d << ends;
string result(stream.str());
stream.freeze(0);
return result;
#endif /* HAVE_SSTREAM */
}
/* ========================================================================= */
void sliceString
(const string& s, const char* slice_chars, vector<string>& slices)
/* ----------------------------------------------------------------------------
*
* Description: Slices a string into a vector of strings, using a given set
* of characters as separators.
*
* Arguments: s -- A reference to the constant original string.
* slice_chars -- A C-style string containing the characters
* to be used as separators.
* slices -- A reference to a vector for storing the
* string components.
*
* Returns: Nothing.
*
* ------------------------------------------------------------------------- */
{
string::size_type last_non_slicechar_pos = 0;
string::size_type last_slicechar_pos = 0;
slices.clear();
do
{
last_non_slicechar_pos =
s.find_first_not_of(slice_chars, last_slicechar_pos);
if (last_non_slicechar_pos != s.npos)
{
last_slicechar_pos = s.find_first_of(slice_chars,
last_non_slicechar_pos);
if (last_slicechar_pos == s.npos)
slices.push_back(s.substr(last_non_slicechar_pos));
else
slices.push_back(s.substr(last_non_slicechar_pos,
last_slicechar_pos
- last_non_slicechar_pos));
}
}
while (last_non_slicechar_pos != s.npos && last_slicechar_pos != s.npos);
}
/* ========================================================================= */
string toLowerCase(const string& s)
/* ----------------------------------------------------------------------------
*
* Description: Converts a string to lower case.
*
* Argument: s -- String to process.
*
* Returns: The string in lower case.
*
* ------------------------------------------------------------------------- */
{
string result;
for (string::size_type pos = 0; pos < s.length(); ++pos)
result += tolower(s[pos]);
return result;
}
/* ========================================================================= */
bool interpretSpecialCharacters(const char c, bool& escape, char& quotechar)
/* ----------------------------------------------------------------------------
*
* Description: Updates the values of `escape' and `quotechar' based on their
* original values and the value of `c'. Used for scanning
* through a string possibly containing quotes and escaped
* characters.
*
* Arguments: c -- A character.
* escape -- A truth value telling whether `c' was escaped.
* quotechar -- 0 == `c' was read outside of quotes.
* `'' == `c' was read inside single quotes.
* `"' == `c' was read inside double quotes.
*
* Returns: True if `c' had a special meaning (for example, if it was a
* begin/end quote character) in the state determined by the
* original values of `escape' and `quotechar'.
*
* ------------------------------------------------------------------------- */
{
if (escape)
{
escape = false;
return false;
}
switch (c)
{
case '\\' :
if (quotechar != '\'')
{
escape = true;
return true;
}
break;
case '\'' : case '"' :
if (quotechar == 0)
{
quotechar = c;
return true;
}
else if (c == quotechar)
{
quotechar = 0;
return true;
}
break;
default :
break;
}
return false;
}
/* ========================================================================= */
string unquoteString(const string& s)
/* ----------------------------------------------------------------------------
*
* Description: Removes (unescaped) single and double quotes and escape
* characters from a string.
*
* Argument: s -- String to process.
*
* Returns: A string with the quotes and escape characters removed.
*
* ------------------------------------------------------------------------- */
{
string result;
char quotechar = 0;
bool escape = false;
for (string::size_type pos = 0; pos < s.size(); ++pos)
{
if (!interpretSpecialCharacters(s[pos], escape, quotechar))
result += s[pos];
}
return result;
}
/* ========================================================================= */
string::size_type findInQuotedString
(const string& s, const string& chars, QuoteMode type)
/* ----------------------------------------------------------------------------
*
* Description: Finds a character in a string (respecting quotes).
*
* Arguments: s -- String to process.
* chars -- A sting of characters to be searched in `s'.
* type -- The extent of the search.
* GLOBAL - Apply the search to the entire
* string.
* INSIDE_QUOTES - Restrict the search to
* unescaped characters between
* quotes.
* OUTSIDE_QUOTES - Restrict the search to
* unescaped characters outside
* quotes.
*
* Returns: If `s' contains one of the characters in `chars' in a part
* of the string that matches `type', the position of the
* character in `s', and string::npos otherwise.
*
* ------------------------------------------------------------------------- */
{
char quotechar = 0;
bool escape = false;
for (string::size_type pos = 0; pos < s.size(); ++pos)
{
if ((type == GLOBAL || (!escape &&
((type == INSIDE_QUOTES && quotechar != 0)
|| (type == OUTSIDE_QUOTES && quotechar == 0))))
&& chars.find_first_of(s[pos]) != string::npos)
return pos;
interpretSpecialCharacters(s[pos], escape, quotechar);
}
return string::npos;
}
/* ========================================================================= */
string substituteInQuotedString
(const string& s, const string& chars, const string& substitutions,
QuoteMode type)
/* ----------------------------------------------------------------------------
*
* Description: Substitutes characters in a string with other characters.
*
* Arguments: s -- String to process.
* chars -- A string of characters, each of which
* should be substituted in `s' with the
* character at the corresponding
* position of the string `substitutions'.
* substitutions -- Characters to substitute. The length of
* this string should equal the length of
* `chars'.
* type -- The extent of substitution.
* GLOBAL - Apply the substitutions
* globally (the default).
* INSIDE_QUOTES - Apply the substitutions
* to unescaped characters
* only inside quotes that
* have not been escaped
* with a backslash.
* OUTSIDE_QUOTES - Apply the substitutions
* to unescaped characters
* only outside quotes
* that have not been
* escaped with a
* backslash.
* It is not recommended to substitute the
* special characters ', " and \ with other
* characters if they have special meaning in
* `s'.
*
* Returns: A string with the substitutions.
*
* ------------------------------------------------------------------------- */
{
string result;
char quotechar = 0;
bool escape = false;
for (string::size_type pos = 0; pos < s.size(); ++pos)
{
char c = s[pos];
if (type == GLOBAL || (!escape &&
((type == INSIDE_QUOTES && quotechar != 0)
|| (type == OUTSIDE_QUOTES && quotechar == 0))))
{
string::size_type subst_pos = chars.find_first_of(c);
if (subst_pos != string::npos)
c = substitutions[subst_pos];
}
result += c;
interpretSpecialCharacters(s[pos], escape, quotechar);
}
return result;
}
/* ========================================================================= */
unsigned long int parseNumber(const string& number_string)
/* ----------------------------------------------------------------------------
*
* Description: Converts a string to an unsigned long integer.
*
* Argument: number_string -- A reference to a constant string.
*
* Returns: The number contained in the string, unless the string could
* not be converted to a number, in which case an exception is
* thrown.
*
* ------------------------------------------------------------------------- */
{
char* endptr;
unsigned long int number = strtoul(number_string.c_str(), &endptr, 10);
if (*endptr != '\0' || number_string.empty()
|| number_string.find_first_of("-") != string::npos)
throw NotANumberException("expected a nonnegative integer, got `"
+ number_string + "'");
return number;
}
/* ========================================================================= */
int parseInterval
(const string& token, unsigned long int& min, unsigned long int& max)
/* ----------------------------------------------------------------------------
*
* Description: Reads the lower and upper bound from an "interval string"
* into two unsigned long integer variables.
*
* Arguments: token -- A reference to a constant "interval string" of
* the format
* <interval string>
* ::= "*" // 0
* | <ulong> // 1
* | <sep><ulong> // 2
* | <ulong><sep> // 3
* | <ulong><sep><ulong> // 4
* where <ulong> is an unsigned long integer (not
* containing a minus sign), and <sep> is either
* "-" or "...". The meaning of the various cases
* is as follows:
* 0 All integers between 0 and ULONG_MAX.
* 1 A point interval consisting of a single
* value.
* 2 An interval from 0 to a given upper
* bound.
* 3 An interval from a given lower bound to
* ULONG_MAX.
* 4 A bounded interval.
* min, max -- References to two unsigned long integers for
* storing the lower and upper bound of the
* interval.
*
* Returns: A value telling the type of the specified interval, which is
* a bitwise or of the values LEFT_BOUNDED and RIGHT_BOUNDED
* depending on which bounds were given explicitly for the
* interval. (The lower and upper bounds of the interval itself
* are stored in the variables `min' and `max', respectively.)
* The function will throw a NotANumberException if the
* interval string is of an invalid format.
*
* ------------------------------------------------------------------------- */
{
unsigned long int tmp_min = 0;
unsigned long int tmp_max = ULONG_MAX;
int interval_type = UNBOUNDED;
if (token != "*")
{
string::size_type pos(token.find_first_of("-"));
if (pos == string::npos)
pos = token.find("...");
string value(token.substr(0, pos));
if (!value.empty())
{
tmp_min = parseNumber(value);
if (pos == string::npos)
tmp_max = tmp_min;
interval_type |= LEFT_BOUNDED;
}
if (pos != string::npos)
value = token.substr(pos + (token[pos] == '-' ? 1 : 3));
if (!value.empty())
{
tmp_max = parseNumber(value);
interval_type |= RIGHT_BOUNDED;
}
else if (!(interval_type & LEFT_BOUNDED))
throw NotANumberException("invalid format for interval");
}
min = tmp_min;
max = tmp_max;
return interval_type;
}
/* ========================================================================= */
void parseIntervalList
(const string& token, IntervalList& intervals, unsigned long int min,
unsigned long int max, vector<string>* extra_tokens)
/* ----------------------------------------------------------------------------
*
* Description: Parses a string of number intervals into an IntervalList.
*
* Arguments: token -- A reference to a constant comma-separated
* list of interval strings (see documentation
* for the parseInterval function).
* intervals -- A reference to an IntervalList to be used
* for storing the result.
* min -- Absolute lower bound for the numbers.
* Numbers lower than this bound will not be
* stored in the result set.
* max -- Absolute upper bound for the numbers.
* Numbers greater than this bound will not be
* stored in the result set.
* extra_tokens -- If not 0, all tokens that cannot be
* recognized as valid interval strings will
* be stored in the vector of strings to which
* this variable points. Otherwise the
* function will throw a NotANumberException.
*
* Returns: Nothing. Throws an IntervalRangeException if any of the
* intervals in the list does not fit in the closed range
* [min,max].
*
* ------------------------------------------------------------------------- */
{
vector<string> interval_strings;
int interval_type;
intervals.clear();
sliceString(token, ",", interval_strings);
for (vector<string>::const_iterator i = interval_strings.begin();
i != interval_strings.end();
++i)
{
unsigned long int i_start, i_end;
try
{
interval_type = parseInterval(*i, i_start, i_end);
}
catch (const NotANumberException&)
{
if (extra_tokens != 0)
{
extra_tokens->push_back(*i);
continue;
}
else
throw;
}
if (interval_type & LEFT_BOUNDED)
{
if (i_start < min || i_start > max)
throw IntervalRangeException(i_start);
}
else if (i_start < min)
i_start = min;
if (interval_type & RIGHT_BOUNDED)
{
if (i_end < min || i_end > max)
throw IntervalRangeException(i_end);
}
else if (i_end > max)
i_end = max;
intervals.merge(i_start, i_end);
}
}
/* ========================================================================= */
void parseTime
(const string& time_string, unsigned long int& hours,
unsigned long int& minutes, unsigned long int& seconds)
/* ----------------------------------------------------------------------------
*
* Description: Parses a "time string", i.e., a string of the form
* ([0-9]+"h")([0-9]+"min")?([0-9]+"s")?
* | ([0-9]+"min")([0-9]+"s")?
* | ([0-9]+"s")
* (where 'h', 'min' and 's' correspond to hours, minutes and
* seconds, respectively) and stores the numbers into three
* unsigned long integers. The case of the unit symbols is not
* relevant.
*
* Arguments: time_string -- String to process.
* hours -- A reference to an unsigned long integer for
* storing the number of hours.
* minutes -- A reference to an unsigned long integer for
* storing the number of minutes.
* seconds -- A reference to an unsigned long integer for
* storing the number of seconds.
*
* Time components left unspecified in `time_string' will get
* the value 0.
*
* Returns: Nothing. Throws an Exception if the given string is not of
* the correct format.
*
* ------------------------------------------------------------------------- */
{
bool hours_present = false, minutes_present = false, seconds_present = false;
hours = minutes = seconds = 0;
if (time_string.empty())
throw Exception("invalid time format");
string::size_type pos1 = 0;
string s;
while (pos1 < time_string.length())
{
string::size_type pos2 = time_string.find_first_not_of("0123456789", pos1);
if (pos2 >= time_string.length())
throw Exception("invalid time format");
unsigned long int val;
try
{
val = parseNumber(time_string.substr(pos1, pos2 - pos1));
}
catch (const NotANumberException&)
{
throw Exception("invalid time format");
}
switch (tolower(time_string[pos2]))
{
case 'h' :
if (hours_present || minutes_present || seconds_present)
throw Exception("invalid time format");
hours_present = true;
hours = val;
break;
case 'm' :
if (minutes_present
|| seconds_present
|| pos2 + 2 >= time_string.length()
|| tolower(time_string[pos2 + 1]) != 'i'
|| tolower(time_string[pos2 + 2]) != 'n')
throw Exception("invalid time format");
minutes_present = true;
minutes = val;
pos2 += 2;
break;
case 's' :
if (seconds_present)
throw Exception("invalid time format");
seconds_present = true;
seconds = val;
break;
default : /* 's' */
throw Exception("invalid time format");
break;
}
pos1 = pos2 + 1;
}
}
}