/* * Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005 * Heikki Tauriainen * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ #include #include #include #include #include "StringUtil.h" namespace StringUtil { /* ========================================================================= */ string toString(const double d, const int precision, const ios::fmtflags flags) /* ---------------------------------------------------------------------------- * * Description: Converts a double to a string with a given precision and * format. The function defaults to fixed-point format with a * precision of two decimals. * * Arguments: d -- The double to be converted. * precision -- Precision. * flags -- Formatting flags. * * Returns: The double as a string. * * ------------------------------------------------------------------------- */ { #ifdef HAVE_SSTREAM ostringstream stream; stream.precision(precision); stream.flags(flags); stream << d; return stream.str(); #else ostrstream stream; stream.precision(precision); stream.flags(flags); stream << d << ends; string result(stream.str()); stream.freeze(0); return result; #endif /* HAVE_SSTREAM */ } /* ========================================================================= */ void sliceString (const string& s, const char* slice_chars, vector& slices) /* ---------------------------------------------------------------------------- * * Description: Slices a string into a vector of strings, using a given set * of characters as separators. * * Arguments: s -- A reference to the constant original string. * slice_chars -- A C-style string containing the characters * to be used as separators. * slices -- A reference to a vector for storing the * string components. * * Returns: Nothing. * * ------------------------------------------------------------------------- */ { string::size_type last_non_slicechar_pos = 0; string::size_type last_slicechar_pos = 0; slices.clear(); do { last_non_slicechar_pos = s.find_first_not_of(slice_chars, last_slicechar_pos); if (last_non_slicechar_pos != s.npos) { last_slicechar_pos = s.find_first_of(slice_chars, last_non_slicechar_pos); if (last_slicechar_pos == s.npos) slices.push_back(s.substr(last_non_slicechar_pos)); else slices.push_back(s.substr(last_non_slicechar_pos, last_slicechar_pos - last_non_slicechar_pos)); } } while (last_non_slicechar_pos != s.npos && last_slicechar_pos != s.npos); } /* ========================================================================= */ string toLowerCase(const string& s) /* ---------------------------------------------------------------------------- * * Description: Converts a string to lower case. * * Argument: s -- String to process. * * Returns: The string in lower case. * * ------------------------------------------------------------------------- */ { string result; for (string::size_type pos = 0; pos < s.length(); ++pos) result += tolower(s[pos]); return result; } /* ========================================================================= */ bool interpretSpecialCharacters(const char c, bool& escape, char& quotechar) /* ---------------------------------------------------------------------------- * * Description: Updates the values of `escape' and `quotechar' based on their * original values and the value of `c'. Used for scanning * through a string possibly containing quotes and escaped * characters. * * Arguments: c -- A character. * escape -- A truth value telling whether `c' was escaped. * quotechar -- 0 == `c' was read outside of quotes. * `'' == `c' was read inside single quotes. * `"' == `c' was read inside double quotes. * * Returns: True if `c' had a special meaning (for example, if it was a * begin/end quote character) in the state determined by the * original values of `escape' and `quotechar'. * * ------------------------------------------------------------------------- */ { if (escape) { escape = false; return false; } switch (c) { case '\\' : if (quotechar != '\'') { escape = true; return true; } break; case '\'' : case '"' : if (quotechar == 0) { quotechar = c; return true; } else if (c == quotechar) { quotechar = 0; return true; } break; default : break; } return false; } /* ========================================================================= */ string unquoteString(const string& s) /* ---------------------------------------------------------------------------- * * Description: Removes (unescaped) single and double quotes and escape * characters from a string. * * Argument: s -- String to process. * * Returns: A string with the quotes and escape characters removed. * * ------------------------------------------------------------------------- */ { string result; char quotechar = 0; bool escape = false; for (string::size_type pos = 0; pos < s.size(); ++pos) { if (!interpretSpecialCharacters(s[pos], escape, quotechar)) result += s[pos]; } return result; } /* ========================================================================= */ string::size_type findInQuotedString (const string& s, const string& chars, QuoteMode type) /* ---------------------------------------------------------------------------- * * Description: Finds a character in a string (respecting quotes). * * Arguments: s -- String to process. * chars -- A sting of characters to be searched in `s'. * type -- The extent of the search. * GLOBAL - Apply the search to the entire * string. * INSIDE_QUOTES - Restrict the search to * unescaped characters between * quotes. * OUTSIDE_QUOTES - Restrict the search to * unescaped characters outside * quotes. * * Returns: If `s' contains one of the characters in `chars' in a part * of the string that matches `type', the position of the * character in `s', and string::npos otherwise. * * ------------------------------------------------------------------------- */ { char quotechar = 0; bool escape = false; for (string::size_type pos = 0; pos < s.size(); ++pos) { if ((type == GLOBAL || (!escape && ((type == INSIDE_QUOTES && quotechar != 0) || (type == OUTSIDE_QUOTES && quotechar == 0)))) && chars.find_first_of(s[pos]) != string::npos) return pos; interpretSpecialCharacters(s[pos], escape, quotechar); } return string::npos; } /* ========================================================================= */ string substituteInQuotedString (const string& s, const string& chars, const string& substitutions, QuoteMode type) /* ---------------------------------------------------------------------------- * * Description: Substitutes characters in a string with other characters. * * Arguments: s -- String to process. * chars -- A string of characters, each of which * should be substituted in `s' with the * character at the corresponding * position of the string `substitutions'. * substitutions -- Characters to substitute. The length of * this string should equal the length of * `chars'. * type -- The extent of substitution. * GLOBAL - Apply the substitutions * globally (the default). * INSIDE_QUOTES - Apply the substitutions * to unescaped characters * only inside quotes that * have not been escaped * with a backslash. * OUTSIDE_QUOTES - Apply the substitutions * to unescaped characters * only outside quotes * that have not been * escaped with a * backslash. * It is not recommended to substitute the * special characters ', " and \ with other * characters if they have special meaning in * `s'. * * Returns: A string with the substitutions. * * ------------------------------------------------------------------------- */ { string result; char quotechar = 0; bool escape = false; for (string::size_type pos = 0; pos < s.size(); ++pos) { char c = s[pos]; if (type == GLOBAL || (!escape && ((type == INSIDE_QUOTES && quotechar != 0) || (type == OUTSIDE_QUOTES && quotechar == 0)))) { string::size_type subst_pos = chars.find_first_of(c); if (subst_pos != string::npos) c = substitutions[subst_pos]; } result += c; interpretSpecialCharacters(s[pos], escape, quotechar); } return result; } /* ========================================================================= */ unsigned long int parseNumber(const string& number_string) /* ---------------------------------------------------------------------------- * * Description: Converts a string to an unsigned long integer. * * Argument: number_string -- A reference to a constant string. * * Returns: The number contained in the string, unless the string could * not be converted to a number, in which case an exception is * thrown. * * ------------------------------------------------------------------------- */ { char* endptr; unsigned long int number = strtoul(number_string.c_str(), &endptr, 10); if (*endptr != '\0' || number_string.empty() || number_string.find_first_of("-") != string::npos) throw NotANumberException("expected a nonnegative integer, got `" + number_string + "'"); return number; } /* ========================================================================= */ int parseInterval (const string& token, unsigned long int& min, unsigned long int& max) /* ---------------------------------------------------------------------------- * * Description: Reads the lower and upper bound from an "interval string" * into two unsigned long integer variables. * * Arguments: token -- A reference to a constant "interval string" of * the format * * ::= "*" // 0 * | // 1 * | // 2 * | // 3 * | // 4 * where is an unsigned long integer (not * containing a minus sign), and is either * "-" or "...". The meaning of the various cases * is as follows: * 0 All integers between 0 and ULONG_MAX. * 1 A point interval consisting of a single * value. * 2 An interval from 0 to a given upper * bound. * 3 An interval from a given lower bound to * ULONG_MAX. * 4 A bounded interval. * min, max -- References to two unsigned long integers for * storing the lower and upper bound of the * interval. * * Returns: A value telling the type of the specified interval, which is * a bitwise or of the values LEFT_BOUNDED and RIGHT_BOUNDED * depending on which bounds were given explicitly for the * interval. (The lower and upper bounds of the interval itself * are stored in the variables `min' and `max', respectively.) * The function will throw a NotANumberException if the * interval string is of an invalid format. * * ------------------------------------------------------------------------- */ { unsigned long int tmp_min = 0; unsigned long int tmp_max = ULONG_MAX; int interval_type = UNBOUNDED; if (token != "*") { string::size_type pos(token.find_first_of("-")); if (pos == string::npos) pos = token.find("..."); string value(token.substr(0, pos)); if (!value.empty()) { tmp_min = parseNumber(value); if (pos == string::npos) tmp_max = tmp_min; interval_type |= LEFT_BOUNDED; } if (pos != string::npos) value = token.substr(pos + (token[pos] == '-' ? 1 : 3)); if (!value.empty()) { tmp_max = parseNumber(value); interval_type |= RIGHT_BOUNDED; } else if (!(interval_type & LEFT_BOUNDED)) throw NotANumberException("invalid format for interval"); } min = tmp_min; max = tmp_max; return interval_type; } /* ========================================================================= */ void parseIntervalList (const string& token, IntervalList& intervals, unsigned long int min, unsigned long int max, vector* extra_tokens) /* ---------------------------------------------------------------------------- * * Description: Parses a string of number intervals into an IntervalList. * * Arguments: token -- A reference to a constant comma-separated * list of interval strings (see documentation * for the parseInterval function). * intervals -- A reference to an IntervalList to be used * for storing the result. * min -- Absolute lower bound for the numbers. * Numbers lower than this bound will not be * stored in the result set. * max -- Absolute upper bound for the numbers. * Numbers greater than this bound will not be * stored in the result set. * extra_tokens -- If not 0, all tokens that cannot be * recognized as valid interval strings will * be stored in the vector of strings to which * this variable points. Otherwise the * function will throw a NotANumberException. * * Returns: Nothing. Throws an IntervalRangeException if any of the * intervals in the list does not fit in the closed range * [min,max]. * * ------------------------------------------------------------------------- */ { vector interval_strings; int interval_type; intervals.clear(); sliceString(token, ",", interval_strings); for (vector::const_iterator i = interval_strings.begin(); i != interval_strings.end(); ++i) { unsigned long int i_start, i_end; try { interval_type = parseInterval(*i, i_start, i_end); } catch (const NotANumberException&) { if (extra_tokens != 0) { extra_tokens->push_back(*i); continue; } else throw; } if (interval_type & LEFT_BOUNDED) { if (i_start < min || i_start > max) throw IntervalRangeException(i_start); } else if (i_start < min) i_start = min; if (interval_type & RIGHT_BOUNDED) { if (i_end < min || i_end > max) throw IntervalRangeException(i_end); } else if (i_end > max) i_end = max; intervals.merge(i_start, i_end); } } /* ========================================================================= */ void parseTime (const string& time_string, unsigned long int& hours, unsigned long int& minutes, unsigned long int& seconds) /* ---------------------------------------------------------------------------- * * Description: Parses a "time string", i.e., a string of the form * ([0-9]+"h")([0-9]+"min")?([0-9]+"s")? * | ([0-9]+"min")([0-9]+"s")? * | ([0-9]+"s") * (where 'h', 'min' and 's' correspond to hours, minutes and * seconds, respectively) and stores the numbers into three * unsigned long integers. The case of the unit symbols is not * relevant. * * Arguments: time_string -- String to process. * hours -- A reference to an unsigned long integer for * storing the number of hours. * minutes -- A reference to an unsigned long integer for * storing the number of minutes. * seconds -- A reference to an unsigned long integer for * storing the number of seconds. * * Time components left unspecified in `time_string' will get * the value 0. * * Returns: Nothing. Throws an Exception if the given string is not of * the correct format. * * ------------------------------------------------------------------------- */ { bool hours_present = false, minutes_present = false, seconds_present = false; hours = minutes = seconds = 0; if (time_string.empty()) throw Exception("invalid time format"); string::size_type pos1 = 0; string s; while (pos1 < time_string.length()) { string::size_type pos2 = time_string.find_first_not_of("0123456789", pos1); if (pos2 >= time_string.length()) throw Exception("invalid time format"); unsigned long int val; try { val = parseNumber(time_string.substr(pos1, pos2 - pos1)); } catch (const NotANumberException&) { throw Exception("invalid time format"); } switch (tolower(time_string[pos2])) { case 'h' : if (hours_present || minutes_present || seconds_present) throw Exception("invalid time format"); hours_present = true; hours = val; break; case 'm' : if (minutes_present || seconds_present || pos2 + 2 >= time_string.length() || tolower(time_string[pos2 + 1]) != 'i' || tolower(time_string[pos2 + 2]) != 'n') throw Exception("invalid time format"); minutes_present = true; minutes = val; pos2 += 2; break; case 's' : if (seconds_present) throw Exception("invalid time format"); seconds_present = true; seconds = val; break; default : /* 's' */ throw Exception("invalid time format"); break; } pos1 = pos2 + 1; } } }