Kea  1.9.9-git
base_n.cc
Go to the documentation of this file.
1 // Copyright (C) 2010-2020 Internet Systems Consortium, Inc. ("ISC")
2 //
3 // This Source Code Form is subject to the terms of the Mozilla Public
4 // License, v. 2.0. If a copy of the MPL was not distributed with this
5 // file, You can obtain one at http://mozilla.org/MPL/2.0/.
6 
7 #include <config.h>
8 
13 #include <util/encode/base32hex.h>
14 #include <util/encode/base64.h>
15 
16 #include <exceptions/exceptions.h>
17 #include <exceptions/isc_assert.h>
18 
19 #include <boost/archive/iterators/base64_from_binary.hpp>
20 #include <boost/archive/iterators/binary_from_base64.hpp>
21 #include <boost/archive/iterators/transform_width.hpp>
22 #ifdef HAVE_BOOST_INTEGER_COMMON_FACTOR_HPP
23 #include <boost/integer/common_factor.hpp>
24 #else
25 #include <boost/math/common_factor.hpp>
26 #endif
27 
28 #include <stdint.h>
29 #include <stdexcept>
30 #include <iterator>
31 #include <string>
32 #include <vector>
33 
34 using namespace std;
35 using namespace boost::archive::iterators;
36 
37 namespace isc {
38 namespace util {
39 namespace encode {
40 
41 // Some versions of clang cannot handle exceptions in unnamed namespaces
42 // so this exception is defined in an 'internal' namespace
43 namespace clang_unnamed_namespace_workaround {
44 // An internally caught exception to unify a few possible cases of the same
45 // error.
46 class IncompleteBaseInput : public std::exception {
47 };
48 } // end namespace internal
49 
50 // In the following anonymous namespace, we provide a generic framework
51 // to encode/decode baseN format. We use the following tools:
52 // - boost base64_from_binary/binary_from_base64: provide mapping table for
53 // base64.
54 // These classes take another iterator (Base) as a template argument, and
55 // their dereference operator (operator*()) first retrieves an input value
56 // from Base via Base::operator* and converts the value using their mapping
57 // table. The converted value is returned as their own operator*.
58 // - base{32hex,16}_from_binary/binary_from_base{32hex,16}: provide mapping
59 // table for base32hex and base16. A straightforward variation of their
60 // base64 counterparts.
61 // - EncodeNormalizer/DecodeNormalizer: supplemental filter handling baseN
62 // padding characters (=)
63 // - boost transform_width: an iterator framework for handling data stream
64 // per bit-group. It takes another iterator (Base) and output/input bit
65 // numbers (BitsOut/BitsIn) template arguments. A transform_width object
66 // internally maintains a bit stream, which can be retrieved per BitsOut
67 // bits via its dereference operator (operator*()). It builds the stream
68 // by internally iterating over the Base object via Base::operator++ and
69 // Base::operator*, using the least BitsIn bits of the result of
70 // Base::operator*. In our usage BitsIn for encoding and BitsOut for
71 // decoding are always 8 (# of bits for one byte).
72 //
73 // Its dereference operator
74 // retrieves BitsIn bits from the result of "*Base" (if necessary it
75 // internally calls ++Base)
76 //
77 // A conceptual description of how the encoding and decoding work is as
78 // follows:
79 // Encoding:
80 // input binary data => Normalizer (append sufficient number of 0 bits)
81 // => transform_width (extract bit groups from the original
82 // stream)
83 // => baseXX_from_binary (convert each bit group to an
84 // encoded byte using the mapping)
85 // Decoding:
86 // input baseXX text => Normalizer (convert '='s to the encoded characters
87 // corresponding to 0, e.g. 'A's in base64)
88 // => binary_from_baseXX (convert each encoded byte into
89 // the original group bit)
90 // => transform_width (build original byte stream by
91 // concatenating the decoded bit
92 // stream)
93 //
94 // Below, we define a set of templated classes to handle different parameters
95 // for different encoding algorithms.
96 namespace {
97 // Common constants used for all baseN encoding.
98 const char BASE_PADDING_CHAR = '=';
99 const uint8_t BINARY_ZERO_CODE = 0;
100 
101 // EncodeNormalizer is an input iterator intended to be used as a filter
102 // between the binary stream and baseXX_from_binary translator (via
103 // transform_width). An EncodeNormalizer object is configured with two
104 // iterators (base and base_end), specifying the head and end of the input
105 // stream. It internally iterators over the original stream, and return
106 // each byte of the stream intact via its dereference operator until it
107 // reaches the end of the stream. After that the EncodeNormalizer object
108 // will return 0 no matter how many times it is subsequently incremented.
109 // This is necessary because the input binary stream may not contain
110 // sufficient bits for a full encoded text while baseXX_from_binary expects
111 // a sufficient length of input.
112 // Note: this class is intended to be used within this implementation file,
113 // and assumes "base < base_end" on construction without validating the
114 // arguments. The behavior is undefined if this assumption doesn't hold.
115 class EncodeNormalizer : public iterator<input_iterator_tag, uint8_t> {
116 public:
117  EncodeNormalizer(const vector<uint8_t>::const_iterator& base,
118  const vector<uint8_t>::const_iterator& base_end) :
119  base_(base), base_end_(base_end), in_pad_(false)
120  {}
121  EncodeNormalizer& operator++() { // prefix version
122  increment();
123  return (*this);
124  }
125  EncodeNormalizer operator++(int) { // postfix version
126  const EncodeNormalizer copy = *this;
127  increment();
128  return (copy);
129  }
130  const uint8_t& operator*() const {
131  if (in_pad_) {
132  return (BINARY_ZERO_CODE);
133  } else {
134  return (*base_);
135  }
136  }
137  bool operator==(const EncodeNormalizer& other) const {
138  return (base_ == other.base_);
139  }
140 private:
141  void increment() {
142  if (!in_pad_) {
143  ++base_;
144  }
145  if (base_ == base_end_) {
146  in_pad_ = true;
147  }
148  }
149  vector<uint8_t>::const_iterator base_;
150  const vector<uint8_t>::const_iterator base_end_;
151  bool in_pad_;
152 };
153 
154 // DecodeNormalizer is an input iterator intended to be used as a filter
155 // between the encoded baseX stream and binary_from_baseXX.
156 // A DecodeNormalizer object is configured with three string iterators
157 // (base, base_beginpad, and base_end), specifying the head of the string,
158 // the beginning position of baseX padding (when there's padding), and
159 // end of the string, respectively. It internally iterators over the original
160 // stream, and return each character of the encoded string via its dereference
161 // operator until it reaches base_beginpad. After that the DecodeNormalizer
162 // will return the encoding character corresponding to the all-0 value
163 // (which is specified on construction via base_zero_code. see also
164 // BaseZeroCode below). This translation is necessary because
165 // binary_from_baseXX doesn't accept the padding character (i.e. '=').
166 // Note: this class is intended to be used within this implementation file,
167 // and for simplicity assumes "base < base_beginpad <= base_end" on
168 // construction without validating the arguments. The behavior is undefined
169 // if this assumption doesn't hold.
170 class DecodeNormalizer : public iterator<input_iterator_tag, char> {
171 public:
172  DecodeNormalizer(const char base_zero_code,
173  const string::const_iterator& base,
174  const string::const_iterator& base_beginpad,
175  const string::const_iterator& base_end,
176  size_t* char_count) :
177  base_zero_code_(base_zero_code),
178  base_(base), base_beginpad_(base_beginpad), base_end_(base_end),
179  in_pad_(false), char_count_(char_count)
180  {
181  // Skip beginning spaces, if any. We need do it here because
182  // otherwise the first call to operator*() would be confused.
183  skipSpaces();
184  }
185  DecodeNormalizer& operator++() {
186  if (base_ < base_end_) {
187  ++*char_count_;
188  }
189  ++base_;
190  skipSpaces();
191  if (base_ == base_beginpad_) {
192  in_pad_ = true;
193  }
194  return (*this);
195  }
196  void skipSpaces() {
197  // If (char is signed and) *base_ < 0, on Windows platform with Visual
198  // Studio compiler it may trigger _ASSERTE((unsigned)(c + 1) <= 256);
199  // so make sure that the parameter of isspace() is larger than 0.
200  // We don't simply cast it to unsigned char to avoid confusing the
201  // isspace() implementation with a possible extension for values
202  // larger than 127. Also note the check is not ">= 0"; for systems
203  // where char is unsigned that would always be true and would possibly
204  // trigger a compiler warning that could stop the build.
205  while (base_ != base_end_ && *base_ > 0 && isspace(*base_)) {
206  ++base_;
207  }
208  }
209  const char& operator*() const {
210  if (base_ == base_end_) {
211  // binary_from_baseX can call this operator when it needs more bits
212  // even if the internal iterator (base_) has reached its end
213  // (if that happens it means the input is an incomplete baseX
214  // string and should be rejected). So this is the only point
215  // we can catch and reject this type of invalid input.
216  //
217  // More recent versions of Boost fixed the behavior and the
218  // out-of-range call to this operator doesn't happen. It's good,
219  // but in that case we need to catch incomplete baseX input in
220  // a different way. It's done via char_count_ and after the
221  // completion of decoding.
222 
223  // throw this now and convert it
224  throw clang_unnamed_namespace_workaround::IncompleteBaseInput();
225  }
226  if (*base_ == BASE_PADDING_CHAR) {
227  // Padding can only happen at the end of the input string. We can
228  // detect any violation of this by checking in_pad_, which is
229  // true iff we are on or after the first valid sequence of padding
230  // characters.
231  if (in_pad_) {
232  return (base_zero_code_);
233  } else {
234  isc_throw(BadValue, "Intermediate padding found");
235  }
236  } else {
237  return (*base_);
238  }
239  }
240  bool operator==(const DecodeNormalizer& other) const {
241  return (base_ == other.base_);
242  }
243 private:
244  const char base_zero_code_;
245  string::const_iterator base_;
246  const string::const_iterator base_beginpad_;
247  const string::const_iterator base_end_;
248  bool in_pad_;
249  // Store number of non-space decoded characters (incl. pad) here. Define
250  // it as a pointer so we can carry it over to any copied objects.
251  size_t* char_count_;
252 };
253 
254 // BitsPerChunk: number of bits to be converted using the baseN mapping table.
255 // e.g. 6 for base64.
256 // BaseZeroCode: the byte character that represents a value of 0 in
257 // the corresponding encoding. e.g. 'A' for base64.
258 // Encoder: baseX_from_binary<transform_width<EncodeNormalizer,
259 // BitsPerChunk, 8> >
260 // Decoder: transform_width<binary_from_baseX<DecodeNormalizer>,
261 // 8, BitsPerChunk>
262 template <int BitsPerChunk, char BaseZeroCode,
263  typename Encoder, typename Decoder>
264 struct BaseNTransformer {
265  static string encode(const vector<uint8_t>& binary);
266  static void decode(const char* algorithm,
267  const string& base64, vector<uint8_t>& result);
268 
269  // BITS_PER_GROUP is the number of bits for the smallest possible (non
270  // empty) bit string that can be converted to a valid baseN encoded text
271  // without padding. It's the least common multiple of 8 and BitsPerChunk,
272  // e.g. 24 for base64.
273  static const int BITS_PER_GROUP =
274 #ifdef HAVE_BOOST_INTEGER_COMMON_FACTOR_HPP
275  boost::integer::static_lcm<BitsPerChunk, 8>::value;
276 #else
277  boost::math::static_lcm<BitsPerChunk, 8>::value;
278 #endif
279 
280  // MAX_PADDING_CHARS is the maximum number of padding characters
281  // that can appear in a valid baseN encoded text.
282  // It's group_len - chars_for_byte, where group_len is the number of
283  // encoded characters to represent BITS_PER_GROUP bits, and
284  // chars_for_byte is the number of encoded character that is needed to
285  // represent a single byte, which is ceil(8 / BitsPerChunk).
286  // For example, for base64 we need two encoded characters to represent a
287  // byte, and each group consists of 4 encoded characters, so
288  // MAX_PADDING_CHARS is 4 - 2 = 2.
289  static const int MAX_PADDING_CHARS =
290  BITS_PER_GROUP / BitsPerChunk -
291  (8 / BitsPerChunk + ((8 % BitsPerChunk) == 0 ? 0 : 1));
292 };
293 
294 template <int BitsPerChunk, char BaseZeroCode,
295  typename Encoder, typename Decoder>
296 string
297 BaseNTransformer<BitsPerChunk, BaseZeroCode, Encoder, Decoder>::encode(
298  const vector<uint8_t>& binary)
299 {
300  // calculate the resulting length.
301  size_t bits = binary.size() * 8;
302  if (bits % BITS_PER_GROUP > 0) {
303  bits += (BITS_PER_GROUP - (bits % BITS_PER_GROUP));
304  }
305  const size_t len = bits / BitsPerChunk;
306 
307  string result;
308  result.reserve(len);
309  result.assign(Encoder(EncodeNormalizer(binary.begin(), binary.end())),
310  Encoder(EncodeNormalizer(binary.end(), binary.end())));
311  isc_throw_assert(len >= result.length());
312  result.append(len - result.length(), BASE_PADDING_CHAR);
313  return (result);
314 }
315 
316 template <int BitsPerChunk, char BaseZeroCode,
317  typename Encoder, typename Decoder>
318 void
319 BaseNTransformer<BitsPerChunk, BaseZeroCode, Encoder, Decoder>::decode(
320  const char* const algorithm,
321  const string& input,
322  vector<uint8_t>& result)
323 {
324  // enumerate the number of trailing padding characters (=), ignoring
325  // white spaces. since baseN_from_binary doesn't accept padding,
326  // we handle it explicitly.
327  size_t padchars = 0;
328  string::const_reverse_iterator srit = input.rbegin();
329  string::const_reverse_iterator srit_end = input.rend();
330  while (srit != srit_end) {
331  char ch = *srit;
332  if (ch == BASE_PADDING_CHAR) {
333  if (++padchars > MAX_PADDING_CHARS) {
334  isc_throw(BadValue, "Too many " << algorithm
335  << " padding characters: " << input);
336  }
337  } else if (!(ch > 0 && isspace(ch))) {
338  // see the note for DecodeNormalizer::skipSpaces() above for ch > 0
339  break;
340  }
341  ++srit;
342  }
343  // then calculate the number of padding bits corresponding to the padding
344  // characters. In general, the padding bits consist of all-zero
345  // trailing bits of the last encoded character followed by zero bits
346  // represented by the padding characters:
347  // 1st pad 2nd pad 3rd pad...
348  // +++===== ======= ===... (+: from encoded chars, =: from pad chars)
349  // 0000...0 0......0 000...
350  // 0 7 8 15 16.... (bits)
351  // The number of bits for the '==...' part is padchars * BitsPerChunk.
352  // So the total number of padding bits is the smallest multiple of 8
353  // that is >= padchars * BitsPerChunk.
354  // (Below, note the common idiom of the bitwise AND with ~7. It clears the
355  // lowest three bits, so has the effect of rounding the result down to the
356  // nearest multiple of 8)
357  const size_t padbits = (padchars * BitsPerChunk + 7) & ~7;
358 
359  // In some encoding algorithm, it could happen that a padding byte would
360  // contain a full set of encoded bits, which is not allowed by definition
361  // of padding. For example, if BitsPerChunk is 5, the following
362  // representation could happen:
363  // ++00000= (+: from encoded chars, 0: encoded char for '0', =: pad chars)
364  // 0 7 (bits)
365  // This must actually be encoded as follows:
366  // ++======
367  // 0 7 (bits)
368  // The following check rejects this type of invalid encoding.
369  if (padbits > BitsPerChunk * (padchars + 1)) {
370  isc_throw(BadValue, "Invalid " << algorithm << " padding: " << input);
371  }
372 
373  // convert the number of bits in bytes for convenience.
374  const size_t padbytes = padbits / 8;
375 
376  try {
377  size_t char_count = 0;
378  result.assign(Decoder(DecodeNormalizer(BaseZeroCode, input.begin(),
379  srit.base(), input.end(),
380  &char_count)),
381  Decoder(DecodeNormalizer(BaseZeroCode, input.end(),
382  input.end(), input.end(),
383  NULL)));
384 
385  // Number of bits of the conversion result including padding must be
386  // a multiple of 8; otherwise the decoder reaches the end of input
387  // with some incomplete bits of data, which is invalid.
388  if (((char_count * BitsPerChunk) % 8) != 0) {
389  // catch this immediately below
390  throw clang_unnamed_namespace_workaround::IncompleteBaseInput();
391  }
392  } catch (const clang_unnamed_namespace_workaround::IncompleteBaseInput&) {
393  // we unify error handling for incomplete input here.
394  isc_throw(BadValue, "Incomplete input for " << algorithm
395  << ": " << input);
396  } catch (const dataflow_exception& ex) {
397  // convert any boost exceptions into our local one.
398  isc_throw(BadValue, ex.what());
399  }
400 
401  // Confirm the original BaseX text is the canonical encoding of the
402  // data, that is, that the first byte of padding is indeed 0.
403  // (DecodeNormalizer and binary_from_baseXX ensure that the rest of the
404  // padding is all zero).
405  isc_throw_assert(result.size() >= padbytes);
406  if (padbytes > 0 && *(result.end() - padbytes) != 0) {
407  isc_throw(BadValue, "Non 0 bits included in " << algorithm
408  << " padding: " << input);
409  }
410 
411  // strip the padded zero-bit fields
412  result.resize(result.size() - padbytes);
413 }
414 
415 //
416 // Instantiation for BASE-64
417 //
418 typedef
419 base64_from_binary<transform_width<EncodeNormalizer, 6, 8> > base64_encoder;
420 typedef
421 transform_width<binary_from_base64<DecodeNormalizer>, 8, 6> base64_decoder;
422 typedef BaseNTransformer<6, 'A', base64_encoder, base64_decoder>
423 Base64Transformer;
424 
425 //
426 // Instantiation for BASE-32HEX
427 //
428 typedef
430 base32hex_encoder;
431 typedef
432 transform_width<binary_from_base32hex<DecodeNormalizer>, 8, 5>
433 base32hex_decoder;
434 typedef BaseNTransformer<5, '0', base32hex_encoder, base32hex_decoder>
435 Base32HexTransformer;
436 
437 //
438 // Instantiation for BASE-16 (HEX)
439 //
440 typedef
442 typedef
443 transform_width<binary_from_base16<DecodeNormalizer>, 8, 4> base16_decoder;
444 typedef BaseNTransformer<4, '0', base16_encoder, base16_decoder>
445 Base16Transformer;
446 }
447 
448 string
449 encodeBase64(const vector<uint8_t>& binary) {
450  return (Base64Transformer::encode(binary));
451 }
452 
453 void
454 decodeBase64(const string& input, vector<uint8_t>& result) {
455  Base64Transformer::decode("base64", input, result);
456 }
457 
458 string
459 encodeBase32Hex(const vector<uint8_t>& binary) {
460  return (Base32HexTransformer::encode(binary));
461 }
462 
463 void
464 decodeBase32Hex(const string& input, vector<uint8_t>& result) {
465  Base32HexTransformer::decode("base32hex", input, result);
466 }
467 
468 string
469 encodeHex(const vector<uint8_t>& binary) {
470  return (Base16Transformer::encode(binary));
471 }
472 
473 void
474 decodeHex(const string& input, vector<uint8_t>& result) {
475  Base16Transformer::decode("base16", input, result);
476 }
477 
478 } // namespace encode
479 } // namespace util
480 } // namespace isc
#define isc_throw_assert(expr)
Replacement for assert() that throws if the expression is false.
Definition: isc_assert.h:18
size_t * char_count_
Definition: base_n.cc:251
STL namespace.
bool operator==(const Element &a, const Element &b)
Definition: data.cc:210
#define isc_throw(type, stream)
A shortcut macro to insert known values into exception arguments.
ElementPtr copy(ConstElementPtr from, int level)
Copy the data up to a nesting level.
Definition: data.cc:1097
void decodeHex(const string &input, vector< uint8_t > &result)
Decode a text encoded in the base16 ('hex') format into the original data.
Definition: base_n.cc:474
const vector< uint8_t >::const_iterator base_end_
Definition: base_n.cc:150
void decodeBase64(const std::string &input, std::vector< uint8_t > &result)
Decode a text encoded in the base64 format into the original data.
Definition: base_n.cc:454
Defines the logger used by the top-level component of kea-dhcp-ddns.
string encodeHex(const vector< uint8_t > &binary)
Encode binary data in the base16 ('hex') format.
Definition: base_n.cc:469
std::string encodeBase64(const std::vector< uint8_t > &binary)
Encode binary data in the base64 format.
Definition: base_n.cc:449
bool in_pad_
Definition: base_n.cc:151
const char base_zero_code_
Definition: base_n.cc:244
std::string encodeBase32Hex(const std::vector< uint8_t > &binary)
Encode binary data in the base32hex format.
Definition: base_n.cc:459
vector< uint8_t >::const_iterator base_
Definition: base_n.cc:149
void decodeBase32Hex(const std::string &input, std::vector< uint8_t > &result)
Decode a text encoded in the base32hex format into the original data.
Definition: base_n.cc:464
const string::const_iterator base_beginpad_
Definition: base_n.cc:246