summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorDaniel Gustafsson <dgustafsson@postgresql.org>2025-09-20 23:19:32 +0200
committerDaniel Gustafsson <dgustafsson@postgresql.org>2025-09-20 23:19:32 +0200
commite1d917182c1953b16b32a39ed2fe38e3d0823047 (patch)
treea107b80202efa1d95f6d994dec3665bd7652e679 /src
parent261f89a976bf3dbf25e43bab9983fdd28f20b49b (diff)
Add support for base64url encoding and decoding
This adds support for base64url encoding and decoding, a base64 variant which is safe to use in filenames and URLs. base64url replaces '+' in the base64 alphabet with '-' and '/' with '_', thus making it safe for URL addresses and file systems. Support for base64url was originally suggested by Przemysław Sztoch. Author: Florents Tselai <florents.tselai@gmail.com> Reviewed-by: Aleksander Alekseev <aleksander@timescale.com> Reviewed-by: David E. Wheeler <david@justatheory.com> Reviewed-by: Masahiko Sawada <sawada.mshk@gmail.com> Reviewed-by: Daniel Gustafsson <daniel@yesql.se> Reviewed-by: Chao Li (Evan) <li.evan.chao@gmail.com> Discussion: https://postgr.es/m/70f2b6a8-486a-4fdb-a951-84cef35e22ab@sztoch.pl
Diffstat (limited to 'src')
-rw-r--r--src/backend/utils/adt/encode.c157
-rw-r--r--src/test/regress/expected/strings.out150
-rw-r--r--src/test/regress/sql/strings.sql54
3 files changed, 340 insertions, 21 deletions
diff --git a/src/backend/utils/adt/encode.c b/src/backend/utils/adt/encode.c
index 4ccaed815d1..9a9c7e8da99 100644
--- a/src/backend/utils/adt/encode.c
+++ b/src/backend/utils/adt/encode.c
@@ -267,12 +267,15 @@ hex_dec_len(const char *src, size_t srclen)
}
/*
- * BASE64
+ * BASE64 and BASE64URL
*/
static const char _base64[] =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+static const char _base64url[] =
+"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
+
static const int8 b64lookup[128] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
@@ -284,8 +287,15 @@ static const int8 b64lookup[128] = {
41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1,
};
+/*
+ * pg_base64_encode_internal
+ *
+ * Helper for decoding base64 or base64url. When url is passed as true the
+ * input will be encoded using base64url. len bytes in src is encoded into
+ * dst.
+ */
static uint64
-pg_base64_encode(const char *src, size_t len, char *dst)
+pg_base64_encode_internal(const char *src, size_t len, char *dst, bool url)
{
char *p,
*lend = dst + 76;
@@ -293,6 +303,7 @@ pg_base64_encode(const char *src, size_t len, char *dst)
*end = src + len;
int pos = 2;
uint32 buf = 0;
+ const char *alphabet = url ? _base64url : _base64;
s = src;
p = dst;
@@ -306,33 +317,64 @@ pg_base64_encode(const char *src, size_t len, char *dst)
/* write it out */
if (pos < 0)
{
- *p++ = _base64[(buf >> 18) & 0x3f];
- *p++ = _base64[(buf >> 12) & 0x3f];
- *p++ = _base64[(buf >> 6) & 0x3f];
- *p++ = _base64[buf & 0x3f];
+ *p++ = alphabet[(buf >> 18) & 0x3f];
+ *p++ = alphabet[(buf >> 12) & 0x3f];
+ *p++ = alphabet[(buf >> 6) & 0x3f];
+ *p++ = alphabet[buf & 0x3f];
pos = 2;
buf = 0;
- }
- if (p >= lend)
- {
- *p++ = '\n';
- lend = p + 76;
+
+ if (!url && p >= lend)
+ {
+ *p++ = '\n';
+ lend = p + 76;
+ }
}
}
+
+ /* Handle remaining bytes in buf */
if (pos != 2)
{
- *p++ = _base64[(buf >> 18) & 0x3f];
- *p++ = _base64[(buf >> 12) & 0x3f];
- *p++ = (pos == 0) ? _base64[(buf >> 6) & 0x3f] : '=';
- *p++ = '=';
+ *p++ = alphabet[(buf >> 18) & 0x3f];
+ *p++ = alphabet[(buf >> 12) & 0x3f];
+
+ if (pos == 0)
+ {
+ *p++ = alphabet[(buf >> 6) & 0x3f];
+ if (!url)
+ *p++ = '=';
+ }
+ else if (!url)
+ {
+ *p++ = '=';
+ *p++ = '=';
+ }
}
return p - dst;
}
static uint64
-pg_base64_decode(const char *src, size_t len, char *dst)
+pg_base64_encode(const char *src, size_t len, char *dst)
+{
+ return pg_base64_encode_internal(src, len, dst, false);
+}
+
+static uint64
+pg_base64url_encode(const char *src, size_t len, char *dst)
+{
+ return pg_base64_encode_internal(src, len, dst, true);
+}
+
+/*
+ * pg_base64_decode_internal
+ *
+ * Helper for decoding base64 or base64url. When url is passed as true the
+ * input will be assumed to be encoded using base64url.
+ */
+static uint64
+pg_base64_decode_internal(const char *src, size_t len, char *dst, bool url)
{
const char *srcend = src + len,
*s = src;
@@ -350,6 +392,15 @@ pg_base64_decode(const char *src, size_t len, char *dst)
if (c == ' ' || c == '\t' || c == '\n' || c == '\r')
continue;
+ /* convert base64url to base64 */
+ if (url)
+ {
+ if (c == '-')
+ c = '+';
+ else if (c == '_')
+ c = '/';
+ }
+
if (c == '=')
{
/* end sequence */
@@ -360,9 +411,12 @@ pg_base64_decode(const char *src, size_t len, char *dst)
else if (pos == 3)
end = 2;
else
+ {
+ /* translator: %s is the name of an encoding scheme */
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
- errmsg("unexpected \"=\" while decoding base64 sequence")));
+ errmsg("unexpected \"=\" while decoding %s sequence", url ? "base64url" : "base64")));
+ }
}
b = 0;
}
@@ -372,10 +426,14 @@ pg_base64_decode(const char *src, size_t len, char *dst)
if (c > 0 && c < 127)
b = b64lookup[(unsigned char) c];
if (b < 0)
+ {
+ /* translator: %s is the name of an encoding scheme */
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
- errmsg("invalid symbol \"%.*s\" found while decoding base64 sequence",
- pg_mblen(s - 1), s - 1)));
+ errmsg("invalid symbol \"%.*s\" found while decoding %s sequence",
+ pg_mblen(s - 1), s - 1,
+ url ? "base64url" : "base64")));
+ }
}
/* add it to buffer */
buf = (buf << 6) + b;
@@ -392,15 +450,40 @@ pg_base64_decode(const char *src, size_t len, char *dst)
}
}
- if (pos != 0)
+ if (pos == 2)
+ {
+ buf <<= 12;
+ *p++ = (buf >> 16) & 0xFF;
+ }
+ else if (pos == 3)
+ {
+ buf <<= 6;
+ *p++ = (buf >> 16) & 0xFF;
+ *p++ = (buf >> 8) & 0xFF;
+ }
+ else if (pos != 0)
+ {
+ /* translator: %s is the name of an encoding scheme */
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
- errmsg("invalid base64 end sequence"),
+ errmsg("invalid %s end sequence", url ? "base64url" : "base64"),
errhint("Input data is missing padding, is truncated, or is otherwise corrupted.")));
+ }
return p - dst;
}
+static uint64
+pg_base64_decode(const char *src, size_t len, char *dst)
+{
+ return pg_base64_decode_internal(src, len, dst, false);
+}
+
+static uint64
+pg_base64url_decode(const char *src, size_t len, char *dst)
+{
+ return pg_base64_decode_internal(src, len, dst, true);
+}
static uint64
pg_base64_enc_len(const char *src, size_t srclen)
@@ -415,6 +498,32 @@ pg_base64_dec_len(const char *src, size_t srclen)
return ((uint64) srclen * 3) >> 2;
}
+static uint64
+pg_base64url_enc_len(const char *src, size_t srclen)
+{
+ /*
+ * Unlike standard base64, base64url doesn't use padding characters when
+ * the input length is not divisible by 3
+ */
+ return (srclen + 2) / 3 * 4;
+}
+
+static uint64
+pg_base64url_dec_len(const char *src, size_t srclen)
+{
+ /*
+ * For base64, each 4 characters of input produce at most 3 bytes of
+ * output. For base64url without padding, we need to round up to the
+ * nearest 4
+ */
+ size_t adjusted_len = srclen;
+
+ if (srclen % 4 != 0)
+ adjusted_len += 4 - (srclen % 4);
+
+ return (adjusted_len * 3) / 4;
+}
+
/*
* Escape
* Minimally escape bytea to text.
@@ -607,6 +716,12 @@ static const struct
}
},
{
+ "base64url",
+ {
+ pg_base64url_enc_len, pg_base64url_dec_len, pg_base64url_encode, pg_base64url_decode
+ }
+ },
+ {
"escape",
{
esc_enc_len, esc_dec_len, esc_encode, esc_decode
diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out
index 2d6cb02ad60..691e475bce3 100644
--- a/src/test/regress/expected/strings.out
+++ b/src/test/regress/expected/strings.out
@@ -2518,6 +2518,156 @@ SELECT decode(encode('\x1234567890abcdef00', 'escape'), 'escape');
(1 row)
--
+-- base64url encoding/decoding
+--
+SET bytea_output TO hex;
+-- Simple encoding/decoding
+SELECT encode('\x69b73eff', 'base64url'); -- abc-_w
+ encode
+--------
+ abc-_w
+(1 row)
+
+SELECT decode('abc-_w', 'base64url'); -- \x69b73eff
+ decode
+------------
+ \x69b73eff
+(1 row)
+
+-- Round-trip: decode(encode(x)) = x
+SELECT decode(encode('\x1234567890abcdef00', 'base64url'), 'base64url'); -- \x1234567890abcdef00
+ decode
+----------------------
+ \x1234567890abcdef00
+(1 row)
+
+-- Empty input
+SELECT encode('', 'base64url'); -- ''
+ encode
+--------
+
+(1 row)
+
+SELECT decode('', 'base64url'); -- ''
+ decode
+--------
+ \x
+(1 row)
+
+-- 1 byte input
+SELECT encode('\x01', 'base64url'); -- AQ
+ encode
+--------
+ AQ
+(1 row)
+
+SELECT decode('AQ', 'base64url'); -- \x01
+ decode
+--------
+ \x01
+(1 row)
+
+-- 2 byte input
+SELECT encode('\x0102'::bytea, 'base64url'); -- AQI
+ encode
+--------
+ AQI
+(1 row)
+
+SELECT decode('AQI', 'base64url'); -- \x0102
+ decode
+--------
+ \x0102
+(1 row)
+
+-- 3 byte input (no padding needed)
+SELECT encode('\x010203'::bytea, 'base64url'); -- AQID
+ encode
+--------
+ AQID
+(1 row)
+
+SELECT decode('AQID', 'base64url'); -- \x010203
+ decode
+----------
+ \x010203
+(1 row)
+
+-- 4 byte input (results in 6 base64 chars)
+SELECT encode('\xdeadbeef'::bytea, 'base64url'); -- 3q2-7w
+ encode
+--------
+ 3q2-7w
+(1 row)
+
+SELECT decode('3q2-7w', 'base64url'); -- \xdeadbeef
+ decode
+------------
+ \xdeadbeef
+(1 row)
+
+-- Round-trip test for all lengths from 0–4
+SELECT encode(decode(encode(E'\\x', 'base64url'), 'base64url'), 'base64url');
+ encode
+--------
+
+(1 row)
+
+SELECT encode(decode(encode(E'\\x00', 'base64url'), 'base64url'), 'base64url');
+ encode
+--------
+ AA
+(1 row)
+
+SELECT encode(decode(encode(E'\\x0001', 'base64url'), 'base64url'), 'base64url');
+ encode
+--------
+ AAE
+(1 row)
+
+SELECT encode(decode(encode(E'\\x000102', 'base64url'), 'base64url'), 'base64url');
+ encode
+--------
+ AAEC
+(1 row)
+
+SELECT encode(decode(encode(E'\\x00010203', 'base64url'), 'base64url'), 'base64url');
+ encode
+--------
+ AAECAw
+(1 row)
+
+-- Invalid inputs (should ERROR)
+-- invalid character '@'
+SELECT decode('QQ@=', 'base64url');
+ERROR: invalid symbol "@" found while decoding base64url sequence
+-- missing characters (incomplete group)
+SELECT decode('QQ', 'base64url'); -- ok (1 byte)
+ decode
+--------
+ \x41
+(1 row)
+
+SELECT decode('QQI', 'base64url'); -- ok (2 bytes)
+ decode
+--------
+ \x4102
+(1 row)
+
+SELECT decode('QQIDQ', 'base64url'); -- ERROR: invalid base64url end sequence
+ERROR: invalid base64url end sequence
+HINT: Input data is missing padding, is truncated, or is otherwise corrupted.
+-- unexpected '=' at start
+SELECT decode('=QQQ', 'base64url');
+ERROR: unexpected "=" while decoding base64url sequence
+-- valid base64 padding in base64url (optional, but accepted)
+SELECT decode('abc-_w==', 'base64url'); -- should decode to \x69b73eff
+ decode
+------------
+ \x69b73eff
+(1 row)
+
+--
-- get_bit/set_bit etc
--
SELECT get_bit('\x1234567890abcdef00'::bytea, 43);
diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql
index 5ed421d6205..c05f3413699 100644
--- a/src/test/regress/sql/strings.sql
+++ b/src/test/regress/sql/strings.sql
@@ -800,6 +800,60 @@ SELECT encode('\x1234567890abcdef00', 'escape');
SELECT decode(encode('\x1234567890abcdef00', 'escape'), 'escape');
--
+-- base64url encoding/decoding
+--
+SET bytea_output TO hex;
+
+-- Simple encoding/decoding
+SELECT encode('\x69b73eff', 'base64url'); -- abc-_w
+SELECT decode('abc-_w', 'base64url'); -- \x69b73eff
+
+-- Round-trip: decode(encode(x)) = x
+SELECT decode(encode('\x1234567890abcdef00', 'base64url'), 'base64url'); -- \x1234567890abcdef00
+
+-- Empty input
+SELECT encode('', 'base64url'); -- ''
+SELECT decode('', 'base64url'); -- ''
+
+-- 1 byte input
+SELECT encode('\x01', 'base64url'); -- AQ
+SELECT decode('AQ', 'base64url'); -- \x01
+
+-- 2 byte input
+SELECT encode('\x0102'::bytea, 'base64url'); -- AQI
+SELECT decode('AQI', 'base64url'); -- \x0102
+
+-- 3 byte input (no padding needed)
+SELECT encode('\x010203'::bytea, 'base64url'); -- AQID
+SELECT decode('AQID', 'base64url'); -- \x010203
+
+-- 4 byte input (results in 6 base64 chars)
+SELECT encode('\xdeadbeef'::bytea, 'base64url'); -- 3q2-7w
+SELECT decode('3q2-7w', 'base64url'); -- \xdeadbeef
+
+-- Round-trip test for all lengths from 0–4
+SELECT encode(decode(encode(E'\\x', 'base64url'), 'base64url'), 'base64url');
+SELECT encode(decode(encode(E'\\x00', 'base64url'), 'base64url'), 'base64url');
+SELECT encode(decode(encode(E'\\x0001', 'base64url'), 'base64url'), 'base64url');
+SELECT encode(decode(encode(E'\\x000102', 'base64url'), 'base64url'), 'base64url');
+SELECT encode(decode(encode(E'\\x00010203', 'base64url'), 'base64url'), 'base64url');
+
+-- Invalid inputs (should ERROR)
+-- invalid character '@'
+SELECT decode('QQ@=', 'base64url');
+
+-- missing characters (incomplete group)
+SELECT decode('QQ', 'base64url'); -- ok (1 byte)
+SELECT decode('QQI', 'base64url'); -- ok (2 bytes)
+SELECT decode('QQIDQ', 'base64url'); -- ERROR: invalid base64url end sequence
+
+-- unexpected '=' at start
+SELECT decode('=QQQ', 'base64url');
+
+-- valid base64 padding in base64url (optional, but accepted)
+SELECT decode('abc-_w==', 'base64url'); -- should decode to \x69b73eff
+
+--
-- get_bit/set_bit etc
--
SELECT get_bit('\x1234567890abcdef00'::bytea, 43);