dc/d73/strutil_8cc_source.html

 // Copyright (C) 2011-2020 Internet Systems Consortium, Inc. ("ISC")

 //

 // This Source Code Form is subject to the terms of the Mozilla Public

 // License, v. 2.0. If a copy of the MPL was not distributed with this

 // file, You can obtain one at http://mozilla.org/MPL/2.0/.


 #include <config.h>


 #include <util/encode/hex.h>

 #include <util/strutil.h>


 #include <boost/algorithm/string/classification.hpp>

 #include <boost/algorithm/string/constants.hpp>

 #include <boost/algorithm/string/split.hpp>


 #include <numeric>

 #include <iostream>

 #include <sstream>


 // Early versions of C++11 regex were buggy, use it if we

 // can otherwise, we fall back to regcomp/regexec.  For more info see:

 // https://stackoverflow.com/questions/12530406/is-gcc-4-8-or-earlier-buggy-about-regular-expressions

 #ifdef USE_REGEX

 #include <regex>

 #else

 #include <sys/types.h>

 #include <regex.h>

 #endif


 #include <string.h>


 using namespace std;


 namespace isc {

 namespace util {

 namespace str {


 // Normalize slashes


 void

 normalizeSlash(std::string& name) {

     if (!name.empty()) {

         size_t pos = 0;

         while ((pos = name.find('\\', pos)) != std::string::npos) {

             name[pos] = '/';

         }

     }

 }


 // Trim String


 string

 trim(const string& instring) {

     string retstring = "";

     if (!instring.empty()) {

         static const char* blanks = " \t\n";


         // Search for first non-blank character in the string

         size_t first = instring.find_first_not_of(blanks);

         if (first != string::npos) {


             // String not all blanks, so look for last character

             size_t last = instring.find_last_not_of(blanks);


             // Extract the trimmed substring

             retstring = instring.substr(first, (last - first + 1));

         }

     }


     return (retstring);

 }


 // Tokenize string.  As noted in the header, this is locally written to avoid

 // another dependency on a Boost library.


 vector<string>

 tokens(const std::string& text, const std::string& delim, bool escape) {

     vector<string> result;

     string token;

     bool in_token = false;

     bool escaped = false;

     for (auto c = text.cbegin(); c != text.cend(); ++c) {

         if (delim.find(*c) != string::npos) {

             // Current character is a delimiter

             if (!in_token) {

                 // Two or more delimiters, eat them

             } else if (escaped) {

                 // Escaped delimiter in a token: reset escaped and keep it

                 escaped = false;

                 token.push_back(*c);

             } else {

                 // End of the current token: save it if not empty

                 if (!token.empty()) {

                     result.push_back(token);

                 }

                 // Reset state

                 in_token = false;

                 token.clear();

             }

         } else if (escape && (*c == '\\')) {

             // Current character is the escape character

             if (!in_token) {

                 // The escape character is the first character of a new token

                 in_token = true;

             }

             if (escaped) {

                 // Escaped escape: reset escaped and keep one character

                 escaped = false;

                 token.push_back(*c);

             } else {

                 // Remember to keep the next character

                 escaped = true;

             }

         } else {

             // Not a delimiter nor an escape

             if (!in_token) {

                 // First character of a new token

                 in_token = true;

             }

             if (escaped) {

                 // Escaped common character: as escape was false

                 escaped = false;

                 token.push_back('\\');

                 token.push_back(*c);

             } else {

                 // The common case: keep it

                 token.push_back(*c);

             }

         }

     }

     // End of input: close and save the current token if not empty

     if (escaped) {

         // Pending escape

         token.push_back('\\');

     }

     if (!token.empty()) {

         result.push_back(token);

     }


     return (result);

 }


 // Local function to pass to accumulate() for summing up string lengths.


 namespace {


 size_t

 lengthSum(string::size_type curlen, const string& cur_string) {

     return (curlen + cur_string.size());

 }


 }


 // Provide printf-style formatting.


 std::string

 format(const std::string& format, const std::vector<std::string>& args) {


     static const string flag = "%s";


     // Initialize return string.  To speed things up, we'll reserve an

     // appropriate amount of space - current string size, plus length of all

     // the argument strings, less two characters for each argument (the %s in

     // the format string is being replaced).

     string result;

     size_t length = accumulate(args.begin(), args.end(), format.size(),

         lengthSum) - (args.size() * flag.size());

     result.reserve(length);


     // Iterate through replacing all tokens

     result = format;

     size_t tokenpos = 0;    // Position of last token replaced

     std::vector<std::string>::size_type i = 0; // Index into argument array


     while ((i < args.size()) && (tokenpos != string::npos)) {

         tokenpos = result.find(flag, tokenpos);

         if (tokenpos != string::npos) {

             result.replace(tokenpos, flag.size(), args[i++]);

         }

     }


     return (result);

 }


 std::string

 getToken(std::istringstream& iss) {

     string token;

     iss >> token;

     if (iss.bad() || iss.fail()) {

         isc_throw(StringTokenError, "could not read token from string");

     }

     return (token);

 }


 std::vector<uint8_t>

 quotedStringToBinary(const std::string& quoted_string) {

     std::vector<uint8_t> binary;

     // Remove whitespace before and after the quotes.

     std::string trimmed_string = trim(quoted_string);


     // We require two quote characters, so the length of the string must be

     // equal to 2 at minimum, and it must start and end with quotes.

     if ((trimmed_string.length() > 1) && ((trimmed_string[0] == '\'') &&

         (trimmed_string[trimmed_string.length()-1] == '\''))) {

         // Remove quotes and trim the text inside the quotes.

         trimmed_string = trim(trimmed_string.substr(1, trimmed_string.length() - 2));

         // Copy string contents into the vector.

         binary.assign(trimmed_string.begin(), trimmed_string.end());

     }

     // Return resulting vector or empty vector.

     return (binary);

 }


 void

 decodeColonSeparatedHexString(const std::string& hex_string,

                               std::vector<uint8_t>& binary) {

     decodeSeparatedHexString(hex_string, ":", binary);

 }


 void

 decodeSeparatedHexString(const std::string& hex_string, const std::string& sep,

                          std::vector<uint8_t>& binary) {

     std::vector<std::string> split_text;

     boost::split(split_text, hex_string, boost::is_any_of(sep),

                  boost::algorithm::token_compress_off);


     std::vector<uint8_t> binary_vec;

     for (size_t i = 0; i < split_text.size(); ++i) {


         // If there are multiple tokens and the current one is empty, it

         // means that two consecutive colons were specified. This is not

         // allowed.

         if ((split_text.size() > 1) && split_text[i].empty()) {

             isc_throw(isc::BadValue, "two consecutive separators ('" << sep << "') specified in"

                       " a decoded string '" << hex_string << "'");


         // Between a colon we expect at most two characters.

         } else if (split_text[i].size() > 2) {

             isc_throw(isc::BadValue, "invalid format of the decoded string"

                       << " '" << hex_string << "'");


         } else if (!split_text[i].empty()) {

             std::stringstream s;

             s << "0x";


             for (unsigned int j = 0; j < split_text[i].length(); ++j) {

                 // Check if we're dealing with hexadecimal digit.

                 if (!isxdigit(split_text[i][j])) {

                     isc_throw(isc::BadValue, "'" << split_text[i][j]

                               << "' is not a valid hexadecimal digit in"

                               << " decoded string '" << hex_string << "'");

                 }

                 s << split_text[i][j];

             }


             // The stream should now have one or two hexadecimal digits.

             // Let's convert it to a number and store in a temporary

             // vector.

             unsigned int binary_value;

             s >> std::hex >> binary_value;


             binary_vec.push_back(static_cast<uint8_t>(binary_value));

         }


     }


     // All ok, replace the data in the output vector with a result.

     binary.swap(binary_vec);

 }


 void

 decodeFormattedHexString(const std::string& hex_string,

                          std::vector<uint8_t>& binary) {

     // If there is at least one colon we assume that the string

     // comprises octets separated by colons (e.g. MAC address notation).

     if (hex_string.find(':') != std::string::npos) {

         decodeSeparatedHexString(hex_string, ":", binary);

     } else if (hex_string.find(' ') != std::string::npos) {

         decodeSeparatedHexString(hex_string, " ", binary);

     } else {

         std::ostringstream s;


         // If we have odd number of digits we'll have to prepend '0'.

         if (hex_string.length() % 2 != 0) {

             s << "0";

         }


         // It is ok to use '0x' prefix in a string.

         if ((hex_string.length() > 2) && (hex_string.substr(0, 2) == "0x")) {

             // Exclude '0x' from the decoded string.

             s << hex_string.substr(2);


         } else {

             // No '0x', so decode the whole string.

             s << hex_string;

         }


         try {

             // Decode the hex string.

             encode::decodeHex(s.str(), binary);


         } catch (...) {

             isc_throw(isc::BadValue, "'" << hex_string << "' is not a valid"

                       " string of hexadecimal digits");

         }

     }

 }


 class StringSanitizerImpl {

 public:

     StringSanitizerImpl(const std::string& char_set, const std::string& char_replacement)

         : char_set_(char_set), char_replacement_(char_replacement) {

         if (char_set.size() > StringSanitizer::MAX_DATA_SIZE) {

             isc_throw(isc::BadValue, "char set size: '" << char_set.size()

                       << "' exceeds max size: '"

                       << StringSanitizer::MAX_DATA_SIZE << "'");

         }


         if (char_replacement.size() > StringSanitizer::MAX_DATA_SIZE) {

             isc_throw(isc::BadValue, "char replacement size: '"

                       << char_replacement.size() << "' exceeds max size: '"

                       << StringSanitizer::MAX_DATA_SIZE << "'");

         }

 #ifdef USE_REGEX

         try {

             scrub_exp_ = std::regex(char_set, std::regex::extended);

         } catch (const std::exception& ex) {

             isc_throw(isc::BadValue, "invalid regex: '"

                       << char_set_ << "', " << ex.what());

         }

 #else

         int ec = regcomp(&scrub_exp_, char_set_.c_str(), REG_EXTENDED);

         if (ec) {

             char errbuf[512] = "";

             static_cast<void>(regerror(ec, &scrub_exp_, errbuf, sizeof(errbuf)));

             regfree(&scrub_exp_);

             isc_throw(isc::BadValue, "invalid regex: '" << char_set_ << "', " << errbuf);

         }

 #endif

     }


     ~StringSanitizerImpl() {

 #ifndef USE_REGEX

         regfree(&scrub_exp_);

 #endif

     }


     std::string scrub(const std::string& original) {

 #ifdef USE_REGEX

         std::stringstream result;

         try {

             std::regex_replace(std::ostream_iterator<char>(result),

                                original.begin(), original.end(),

                                scrub_exp_, char_replacement_);

         } catch (const std::exception& ex) {

             isc_throw(isc::BadValue, "replacing '" << char_set_ << "' with '"

                    << char_replacement_ << "' in '" << original << "' failed: ,"

                    << ex.what());

         }


         return (result.str());

 #else

         // In order to handle embedded nuls, we have to process in nul-terminated

         // chunks.  We iterate over the original data, doing pattern replacement

         // on each chunk.

         const char* orig_data = original.data();

         const char* dead_end = orig_data + original.size();

         const char* start_from = orig_data;

         stringstream result;


         while (start_from < dead_end) {

             // Iterate over original string, match by match.

             regmatch_t matches[2];  // n matches + 1

             const char* end_at = start_from + strlen(start_from);


             while (start_from < end_at) {

                 // Look for the next match

                 if (regexec(&scrub_exp_, start_from, 1, matches, 0) == REG_NOMATCH) {

                     // No matches, so add in the remainder

                     result << start_from;

                     start_from = end_at + 1;

                     break;

                 }


                 // Shouldn't happen, but one never knows eh?

                 if (matches[0].rm_so == -1) {

                     isc_throw(isc::Unexpected, "matched but so is -1?");

                 }


                 // Add everything from starting point up to the current match

                 const char* match_at = start_from + matches[0].rm_so;

                 while (start_from < match_at) {

                     result << *start_from;

                     ++start_from;

                 }


                 // Add in the replacement

                 result << char_replacement_;


                 // Move past the match.

                 ++start_from;

             }


             // if we have an embedded nul, replace it and continue

             if (start_from < dead_end) {

                 // Add in the replacement

                 result << char_replacement_;

                 start_from = end_at + 1;

             }

         }


         return (result.str());

 #endif

     }


 private:

     std::string char_set_;


     std::string char_replacement_;


 #ifdef USE_REGEX

     regex scrub_exp_;

 #else

     regex_t scrub_exp_;

 #endif

 };


 // @note The regex engine is implemented using recursion and can cause

 // stack overflow if the input data is too large. An arbitrary size of

 // 4096 should be enough for all cases.

 const uint32_t StringSanitizer::MAX_DATA_SIZE = 4096;


 StringSanitizer::StringSanitizer(const std::string& char_set,

                                  const std::string& char_replacement)

     : impl_(new StringSanitizerImpl(char_set, char_replacement)) {

 }


 StringSanitizer::~StringSanitizer() {

     delete impl_;

 }


 std::string

 StringSanitizer::scrub(const std::string& original) {

     return (impl_->scrub(original));

 }


 } // namespace str

 } // namespace util

 } // namespace isc

isc::util::str::StringSanitizer::~StringSanitizer
~StringSanitizer()
Destructor.
Definition: strutil.cc:442

isc::util::str::StringSanitizerImpl
Definition: strutil.cc:310

std
STL namespace.

isc::util::str::decodeSeparatedHexString
void decodeSeparatedHexString(const std::string &hex_string, const std::string &sep, std::vector< uint8_t > &binary)
Converts a string of separated hexadecimal digits into a vector.
Definition: strutil.cc:221

strutil.h

isc::util::str::decodeFormattedHexString
void decodeFormattedHexString(const std::string &hex_string, std::vector< uint8_t > &binary)
Converts a formatted string of hexadecimal digits into a vector.
Definition: strutil.cc:273

isc::util::str::StringSanitizerImpl::~StringSanitizerImpl
~StringSanitizerImpl()
Destructor.
Definition: strutil.cc:344

isc_throw
#define isc_throw(type, stream)
A shortcut macro to insert known values into exception arguments.
Definition: exceptions/exceptions.h:202

isc::BadValue
A generic exception that is thrown if a parameter given to a method is considered invalid in that con...
Definition: exceptions/exceptions.h:132

isc::util::str::StringSanitizerImpl::scrub
std::string scrub(const std::string &original)
Definition: strutil.cc:350

isc::util::encode::decodeHex
void decodeHex(const string &input, vector< uint8_t > &result)
Decode a text encoded in the base16 ('hex') format into the original data.
Definition: base_n.cc:474

isc::Unexpected
A generic exception that is thrown when an unexpected error condition occurs.
Definition: exceptions/exceptions.h:153

hex.h

isc::util::str::getToken
std::string getToken(std::istringstream &iss)
Returns one token from the given stringstream.
Definition: strutil.cc:186

isc::util::str::normalizeSlash
void normalizeSlash(std::string &name)
Normalize Backslash.
Definition: strutil.cc:41

isc::util::str::tokens
vector< string > tokens(const std::string &text, const std::string &delim, bool escape)
Split String into Tokens.
Definition: strutil.cc:77

isc::Exception::what
virtual const char * what() const
Returns a C-style character string of the cause of the exception.
Definition: exceptions/exceptions.cc:32

isc::util::str::StringTokenError
A Set of C++ Utilities for Manipulating Strings.
Definition: strutil.h:30

isc::util::str::quotedStringToBinary
std::vector< uint8_t > quotedStringToBinary(const std::string &quoted_string)
Converts a string in quotes into vector.
Definition: strutil.cc:196

isc
Defines the logger used by the top-level component of kea-dhcp-ddns.
Definition: agent_parser.cc:143

isc::util::str::decodeColonSeparatedHexString
void decodeColonSeparatedHexString(const std::string &hex_string, std::vector< uint8_t > &binary)
Converts a string of hexadecimal digits with colons into a vector.
Definition: strutil.cc:215

isc::util::str::StringSanitizer::scrub
std::string scrub(const std::string &original)
Returns a scrubbed copy of a given string.
Definition: strutil.cc:447

isc::util::str::trim
string trim(const string &instring)
Trim Leading and Trailing Spaces.
Definition: strutil.cc:53

isc::util::str::StringSanitizerImpl::StringSanitizerImpl
StringSanitizerImpl(const std::string &char_set, const std::string &char_replacement)
Definition: strutil.cc:312

isc::util::str::format
std::string format(const std::string &format, const std::vector< std::string > &args)
Apply Formatting.
Definition: strutil.cc:157