aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFrancois Kritzinger <francois@codesynthesis.com>2024-02-06 10:52:18 +0200
committerFrancois Kritzinger <francois@codesynthesis.com>2024-02-09 13:47:53 +0200
commit47767f72b47c9914deaf0de0908f816edfcc9709 (patch)
treed1ecc399ff223215801f106435db80d482e61b27
parentd249dff880edd1520a979279934afc0a6370bd67 (diff)
Add base64url_encode()
-rw-r--r--libbutl/base64.cxx81
-rw-r--r--libbutl/base64.hxx19
-rw-r--r--tests/base64/driver.cxx69
3 files changed, 160 insertions, 9 deletions
diff --git a/libbutl/base64.cxx b/libbutl/base64.cxx
index 4466f24..03191c8 100644
--- a/libbutl/base64.cxx
+++ b/libbutl/base64.cxx
@@ -17,18 +17,16 @@ namespace butl
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
// base64-encode the data in the iterator range [i, e). Write the encoded
- // data starting at the iterator position o.
+ // data starting at the iterator position o. If url is true, encode using
+ // base64url.
//
template <typename I, typename O>
static void
- base64_encode (I& i, const I& e, O& o)
+ base64_encode (I& i, const I& e, O& o, bool url = false)
{
const size_t un (65); // Non-existing index of the codes string.
for (size_t n (0); i != e; ++n)
{
- if (n && n % 19 == 0)
- *o++ = '\n'; // Split into lines, like the base64 utility does.
-
auto next = [&i] () {return static_cast<unsigned char> (*i++);};
unsigned char c (next ());
@@ -51,10 +49,34 @@ namespace butl
i4 = c & 0x3F;
}
- *o++ = codes[i1];
- *o++ = codes[i2];
- *o++ = i3 == un ? '=' : codes[i3];
- *o++ = i4 == un ? '=' : codes[i4];
+ // @@ TMP Lots of redundant branches. Would making it a template
+ // parameter help?
+ //
+ if (!url)
+ {
+ if (n && n % 19 == 0)
+ *o++ = '\n'; // Split into lines, like the base64 utility does.
+
+ *o++ = codes[i1];
+ *o++ = codes[i2];
+ *o++ = i3 == un ? '=' : codes[i3];
+ *o++ = i4 == un ? '=' : codes[i4];
+ }
+ // base64url: different 63rd and 64th characters and no padding or
+ // newlines.
+ //
+ else
+ {
+ auto code = [] (size_t i)
+ {
+ return i == 62 ? '-' : i == 63 ? '_' : codes[i];
+ };
+
+ *o++ = code (i1);
+ *o++ = code (i2);
+ if (i3 != un) *o++ = code (i3);
+ if (i4 != un) *o++ = code (i4);
+ }
}
}
@@ -170,6 +192,47 @@ namespace butl
return r;
}
+ string
+ base64url_encode (istream& is)
+ {
+ if (!is.good ())
+ throw invalid_argument ("bad stream");
+
+ string r;
+ istreambuf_iterator<char> i (is);
+ back_insert_iterator<string> o (r);
+
+ base64_encode (i, istreambuf_iterator<char> (), o, true /* url */);
+ is.setstate (istream::eofbit);
+ return r;
+ }
+
+ void
+ base64url_encode (ostream& os, istream& is)
+ {
+ if (!os.good () || !is.good ())
+ throw invalid_argument ("bad stream");
+
+ istreambuf_iterator<char> i (is);
+ ostreambuf_iterator<char> o (os);
+ base64_encode (i, istreambuf_iterator<char> (), o, true /* url */);
+
+ if (o.failed ())
+ os.setstate (istream::badbit);
+
+ is.setstate (istream::eofbit);
+ }
+
+ string
+ base64url_encode (const std::vector<char>& v)
+ {
+ string r;
+ back_insert_iterator<string> o (r);
+ auto i (v.begin ());
+ base64_encode (i, v.end (), o, true /* url */);
+ return r;
+ }
+
void
base64_decode (ostream& os, istream& is)
{
diff --git a/libbutl/base64.hxx b/libbutl/base64.hxx
index f38e62f..6f8ef02 100644
--- a/libbutl/base64.hxx
+++ b/libbutl/base64.hxx
@@ -27,6 +27,25 @@ namespace butl
LIBBUTL_SYMEXPORT std::string
base64_encode (const std::vector<char>&);
+ // Encode a stream or a buffer using base64url (RFC 4648), a base64 variant
+ // with different 62nd and 63rd alphabet characters (- and _ instead of ~
+ // and .) to make it filesystem safe, and optional padding because the
+ // padding character `=` would have to be percent-encoded to be safe in
+ // URLs. This implementation does not output any padding, newlines or any
+ // other whitespace in order to conform to RFC7519: JSON Web Token (JWT) and
+ // RFC7515: JSON Web Signature (JWS).
+ //
+ // Note that base64url decoding has not yet been implemented.
+ //
+ LIBBUTL_SYMEXPORT void
+ base64url_encode (std::ostream&, std::istream&);
+
+ LIBBUTL_SYMEXPORT std::string
+ base64url_encode (std::istream&);
+
+ LIBBUTL_SYMEXPORT std::string
+ base64url_encode (const std::vector<char>&);
+
// Base64-decode a stream or a string. Throw invalid_argument if the input
// is not a valid base64 representation. If reading from a stream, check if
// it has badbit, failbit, or eofbit set and throw invalid_argument if
diff --git a/tests/base64/driver.cxx b/tests/base64/driver.cxx
index a37a238..32d5236 100644
--- a/tests/base64/driver.cxx
+++ b/tests/base64/driver.cxx
@@ -13,6 +13,8 @@
using namespace std;
using namespace butl;
+// Test base64 encoding and decoding.
+//
static bool
encode (const string& i, const string& o)
{
@@ -68,9 +70,44 @@ encode (const string& i, const string& o)
return r;
}
+// Test base64url encoding only (decoding not yet implemented).
+//
+static bool
+encode_url (const string& i, const string& o)
+{
+ istringstream is (i);
+ string s (base64url_encode (is));
+ bool r (s == o && is.eof ());
+
+ if (r)
+ {
+ is.seekg (0);
+
+ // VC15 seekg() doesn't clear eofbit.
+ //
+#if defined(_MSC_VER) && _MSC_VER < 1920
+ is.clear ();
+#endif
+
+ assert (!is.eof ());
+
+ ostringstream os;
+ base64url_encode (os, is);
+ r = os.str () == o && is.eof ();
+ }
+
+ if (r)
+ r = base64url_encode (vector<char> (i.begin (), i.end ())) == o;
+
+ return r;
+}
+
+
int
main ()
{
+ // base64
+ //
assert (encode ("", ""));
assert (encode ("B", "Qg=="));
assert (encode ("BX", "Qlg="));
@@ -80,6 +117,19 @@ main ()
assert (encode ("BXzS@#", "Qlh6U0Aj"));
assert (encode ("BXzS@#/", "Qlh6U0AjLw=="));
+ // base64url: no padding in output.
+ //
+ assert (encode_url ("", ""));
+ assert (encode_url ("B", "Qg"));
+ assert (encode_url ("BX", "Qlg"));
+ assert (encode_url ("BXz", "Qlh6"));
+ assert (encode_url ("BXzS", "Qlh6Uw"));
+ assert (encode_url ("BXzS@", "Qlh6U0A"));
+ assert (encode_url ("BXzS@#", "Qlh6U0Aj"));
+ assert (encode_url ("BXzS@#/", "Qlh6U0AjLw"));
+
+ // Multi-line input.
+ //
const char* s (
"class fdstream_base\n"
"{\n"
@@ -91,10 +141,29 @@ main ()
" fdbuf buf_;\n"
"};\n");
+ // base64
+ //
const char* r (
"Y2xhc3MgZmRzdHJlYW1fYmFzZQp7CnByb3RlY3RlZDoKICBmZHN0cmVhbV9iYXNlICgpID0gZGVm\n"
"YXVsdDsKICBmZHN0cmVhbV9iYXNlIChpbnQgZmQpOiBidWZfIChmZCkge30KCnByb3RlY3RlZDoK\n"
"ICBmZGJ1ZiBidWZfOwp9Owo=");
assert (encode (s, r));
+
+ // base64url: no newlines or padding in output.
+ //
+ r =
+"Y2xhc3MgZmRzdHJlYW1fYmFzZQp7CnByb3RlY3RlZDoKICBmZHN0cmVhbV9iYXNlICgpID0gZGVm"
+"YXVsdDsKICBmZHN0cmVhbV9iYXNlIChpbnQgZmQpOiBidWZfIChmZCkge30KCnByb3RlY3RlZDoK"
+"ICBmZGJ1ZiBidWZfOwp9Owo";
+
+ assert (encode_url (s, r));
+
+ // Test 63rd and 64th characters: `>` maps to `+` or `-`; `?` maps to `/` or
+ // `_`.
+ //
+ assert (encode (">>>>>>", "Pj4+Pj4+"));
+ assert (encode_url (">>>>>>", "Pj4-Pj4-"));
+ assert (encode ("??????", "Pz8/Pz8/"));
+ assert (encode_url ("??????", "Pz8_Pz8_"));
}