From 47767f72b47c9914deaf0de0908f816edfcc9709 Mon Sep 17 00:00:00 2001 From: Francois Kritzinger Date: Tue, 6 Feb 2024 10:52:18 +0200 Subject: Add base64url_encode() --- libbutl/base64.cxx | 81 +++++++++++++++++++++++++++++++++++++++++++------ libbutl/base64.hxx | 19 ++++++++++++ tests/base64/driver.cxx | 69 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 160 insertions(+), 9 deletions(-) diff --git a/libbutl/base64.cxx b/libbutl/base64.cxx index 4466f24..03191c8 100644 --- a/libbutl/base64.cxx +++ b/libbutl/base64.cxx @@ -17,18 +17,16 @@ namespace butl "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; // base64-encode the data in the iterator range [i, e). Write the encoded - // data starting at the iterator position o. + // data starting at the iterator position o. If url is true, encode using + // base64url. // template static void - base64_encode (I& i, const I& e, O& o) + base64_encode (I& i, const I& e, O& o, bool url = false) { const size_t un (65); // Non-existing index of the codes string. for (size_t n (0); i != e; ++n) { - if (n && n % 19 == 0) - *o++ = '\n'; // Split into lines, like the base64 utility does. - auto next = [&i] () {return static_cast (*i++);}; unsigned char c (next ()); @@ -51,10 +49,34 @@ namespace butl i4 = c & 0x3F; } - *o++ = codes[i1]; - *o++ = codes[i2]; - *o++ = i3 == un ? '=' : codes[i3]; - *o++ = i4 == un ? '=' : codes[i4]; + // @@ TMP Lots of redundant branches. Would making it a template + // parameter help? + // + if (!url) + { + if (n && n % 19 == 0) + *o++ = '\n'; // Split into lines, like the base64 utility does. + + *o++ = codes[i1]; + *o++ = codes[i2]; + *o++ = i3 == un ? '=' : codes[i3]; + *o++ = i4 == un ? '=' : codes[i4]; + } + // base64url: different 63rd and 64th characters and no padding or + // newlines. + // + else + { + auto code = [] (size_t i) + { + return i == 62 ? '-' : i == 63 ? '_' : codes[i]; + }; + + *o++ = code (i1); + *o++ = code (i2); + if (i3 != un) *o++ = code (i3); + if (i4 != un) *o++ = code (i4); + } } } @@ -170,6 +192,47 @@ namespace butl return r; } + string + base64url_encode (istream& is) + { + if (!is.good ()) + throw invalid_argument ("bad stream"); + + string r; + istreambuf_iterator i (is); + back_insert_iterator o (r); + + base64_encode (i, istreambuf_iterator (), o, true /* url */); + is.setstate (istream::eofbit); + return r; + } + + void + base64url_encode (ostream& os, istream& is) + { + if (!os.good () || !is.good ()) + throw invalid_argument ("bad stream"); + + istreambuf_iterator i (is); + ostreambuf_iterator o (os); + base64_encode (i, istreambuf_iterator (), o, true /* url */); + + if (o.failed ()) + os.setstate (istream::badbit); + + is.setstate (istream::eofbit); + } + + string + base64url_encode (const std::vector& v) + { + string r; + back_insert_iterator o (r); + auto i (v.begin ()); + base64_encode (i, v.end (), o, true /* url */); + return r; + } + void base64_decode (ostream& os, istream& is) { diff --git a/libbutl/base64.hxx b/libbutl/base64.hxx index f38e62f..6f8ef02 100644 --- a/libbutl/base64.hxx +++ b/libbutl/base64.hxx @@ -27,6 +27,25 @@ namespace butl LIBBUTL_SYMEXPORT std::string base64_encode (const std::vector&); + // Encode a stream or a buffer using base64url (RFC 4648), a base64 variant + // with different 62nd and 63rd alphabet characters (- and _ instead of ~ + // and .) to make it filesystem safe, and optional padding because the + // padding character `=` would have to be percent-encoded to be safe in + // URLs. This implementation does not output any padding, newlines or any + // other whitespace in order to conform to RFC7519: JSON Web Token (JWT) and + // RFC7515: JSON Web Signature (JWS). + // + // Note that base64url decoding has not yet been implemented. + // + LIBBUTL_SYMEXPORT void + base64url_encode (std::ostream&, std::istream&); + + LIBBUTL_SYMEXPORT std::string + base64url_encode (std::istream&); + + LIBBUTL_SYMEXPORT std::string + base64url_encode (const std::vector&); + // Base64-decode a stream or a string. Throw invalid_argument if the input // is not a valid base64 representation. If reading from a stream, check if // it has badbit, failbit, or eofbit set and throw invalid_argument if diff --git a/tests/base64/driver.cxx b/tests/base64/driver.cxx index a37a238..32d5236 100644 --- a/tests/base64/driver.cxx +++ b/tests/base64/driver.cxx @@ -13,6 +13,8 @@ using namespace std; using namespace butl; +// Test base64 encoding and decoding. +// static bool encode (const string& i, const string& o) { @@ -68,9 +70,44 @@ encode (const string& i, const string& o) return r; } +// Test base64url encoding only (decoding not yet implemented). +// +static bool +encode_url (const string& i, const string& o) +{ + istringstream is (i); + string s (base64url_encode (is)); + bool r (s == o && is.eof ()); + + if (r) + { + is.seekg (0); + + // VC15 seekg() doesn't clear eofbit. + // +#if defined(_MSC_VER) && _MSC_VER < 1920 + is.clear (); +#endif + + assert (!is.eof ()); + + ostringstream os; + base64url_encode (os, is); + r = os.str () == o && is.eof (); + } + + if (r) + r = base64url_encode (vector (i.begin (), i.end ())) == o; + + return r; +} + + int main () { + // base64 + // assert (encode ("", "")); assert (encode ("B", "Qg==")); assert (encode ("BX", "Qlg=")); @@ -80,6 +117,19 @@ main () assert (encode ("BXzS@#", "Qlh6U0Aj")); assert (encode ("BXzS@#/", "Qlh6U0AjLw==")); + // base64url: no padding in output. + // + assert (encode_url ("", "")); + assert (encode_url ("B", "Qg")); + assert (encode_url ("BX", "Qlg")); + assert (encode_url ("BXz", "Qlh6")); + assert (encode_url ("BXzS", "Qlh6Uw")); + assert (encode_url ("BXzS@", "Qlh6U0A")); + assert (encode_url ("BXzS@#", "Qlh6U0Aj")); + assert (encode_url ("BXzS@#/", "Qlh6U0AjLw")); + + // Multi-line input. + // const char* s ( "class fdstream_base\n" "{\n" @@ -91,10 +141,29 @@ main () " fdbuf buf_;\n" "};\n"); + // base64 + // const char* r ( "Y2xhc3MgZmRzdHJlYW1fYmFzZQp7CnByb3RlY3RlZDoKICBmZHN0cmVhbV9iYXNlICgpID0gZGVm\n" "YXVsdDsKICBmZHN0cmVhbV9iYXNlIChpbnQgZmQpOiBidWZfIChmZCkge30KCnByb3RlY3RlZDoK\n" "ICBmZGJ1ZiBidWZfOwp9Owo="); assert (encode (s, r)); + + // base64url: no newlines or padding in output. + // + r = +"Y2xhc3MgZmRzdHJlYW1fYmFzZQp7CnByb3RlY3RlZDoKICBmZHN0cmVhbV9iYXNlICgpID0gZGVm" +"YXVsdDsKICBmZHN0cmVhbV9iYXNlIChpbnQgZmQpOiBidWZfIChmZCkge30KCnByb3RlY3RlZDoK" +"ICBmZGJ1ZiBidWZfOwp9Owo"; + + assert (encode_url (s, r)); + + // Test 63rd and 64th characters: `>` maps to `+` or `-`; `?` maps to `/` or + // `_`. + // + assert (encode (">>>>>>", "Pj4+Pj4+")); + assert (encode_url (">>>>>>", "Pj4-Pj4-")); + assert (encode ("??????", "Pz8/Pz8/")); + assert (encode_url ("??????", "Pz8_Pz8_")); } -- cgit v1.1