diff options
author | Nathan Bossart <nathan@postgresql.org> | 2025-10-06 12:28:50 -0500 |
---|---|---|
committer | Nathan Bossart <nathan@postgresql.org> | 2025-10-06 12:28:50 -0500 |
commit | ec8719ccbfcd78c6b40b5f2b94f60769f3bd08aa (patch) | |
tree | 606d4b39e51f4535b30074778962c19ba071bb19 /src/backend/utils/adt/encode.c | |
parent | 5b5e8a29c13aa583f8a674cb65f92a204da24be8 (diff) |
Optimize hex_encode() and hex_decode() using SIMD.
The hex_encode() and hex_decode() functions serve as the workhorses
for hexadecimal data for bytea's text format conversion functions,
and some workloads are sensitive to their performance. This commit
adds new implementations that use routines from port/simd.h, which
testing indicates are much faster for larger inputs. For small or
invalid inputs, we fall back on the existing scalar versions.
Since we are using port/simd.h, these optimizations apply to both
x86-64 and AArch64.
Author: Nathan Bossart <nathandbossart@gmail.com>
Co-authored-by: Chiranmoy Bhattacharya <chiranmoy.bhattacharya@fujitsu.com>
Co-authored-by: Susmitha Devanga <devanga.susmitha@fujitsu.com>
Reviewed-by: John Naylor <johncnaylorls@gmail.com>
Discussion: https://postgr.es/m/aLhVWTRy0QPbW2tl%40nathan
Diffstat (limited to 'src/backend/utils/adt/encode.c')
-rw-r--r-- | src/backend/utils/adt/encode.c | 137 |
1 files changed, 133 insertions, 4 deletions
diff --git a/src/backend/utils/adt/encode.c b/src/backend/utils/adt/encode.c index 9a9c7e8da99..aabe9913eee 100644 --- a/src/backend/utils/adt/encode.c +++ b/src/backend/utils/adt/encode.c @@ -16,6 +16,7 @@ #include <ctype.h> #include "mb/pg_wchar.h" +#include "port/simd.h" #include "utils/builtins.h" #include "utils/memutils.h" #include "varatt.h" @@ -177,8 +178,8 @@ static const int8 hexlookup[128] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, }; -uint64 -hex_encode(const char *src, size_t len, char *dst) +static inline uint64 +hex_encode_scalar(const char *src, size_t len, char *dst) { const char *end = src + len; @@ -193,6 +194,55 @@ hex_encode(const char *src, size_t len, char *dst) return (uint64) len * 2; } +uint64 +hex_encode(const char *src, size_t len, char *dst) +{ +#ifdef USE_NO_SIMD + return hex_encode_scalar(src, len, dst); +#else + const uint64 tail_idx = len & ~(sizeof(Vector8) - 1); + uint64 i; + + /* + * This splits the high and low nibbles of each byte into separate + * vectors, adds the vectors to a mask that converts the nibbles to their + * equivalent ASCII bytes, and interleaves those bytes back together to + * form the final hex-encoded string. + */ + for (i = 0; i < tail_idx; i += sizeof(Vector8)) + { + Vector8 srcv; + Vector8 lo; + Vector8 hi; + Vector8 mask; + + vector8_load(&srcv, (const uint8 *) &src[i]); + + lo = vector8_and(srcv, vector8_broadcast(0x0f)); + mask = vector8_gt(lo, vector8_broadcast(0x9)); + mask = vector8_and(mask, vector8_broadcast('a' - '0' - 10)); + mask = vector8_add(mask, vector8_broadcast('0')); + lo = vector8_add(lo, mask); + + hi = vector8_and(srcv, vector8_broadcast(0xf0)); + hi = vector8_shift_right(hi, 4); + mask = vector8_gt(hi, vector8_broadcast(0x9)); + mask = vector8_and(mask, vector8_broadcast('a' - '0' - 10)); + mask = vector8_add(mask, vector8_broadcast('0')); + hi = vector8_add(hi, mask); + + vector8_store((uint8 *) &dst[i * 2], + vector8_interleave_low(hi, lo)); + vector8_store((uint8 *) &dst[i * 2 + sizeof(Vector8)], + vector8_interleave_high(hi, lo)); + } + + (void) hex_encode_scalar(src + i, len - i, dst + i * 2); + + return (uint64) len * 2; +#endif +} + static inline bool get_hex(const char *cp, char *out) { @@ -213,8 +263,8 @@ hex_decode(const char *src, size_t len, char *dst) return hex_decode_safe(src, len, dst, NULL); } -uint64 -hex_decode_safe(const char *src, size_t len, char *dst, Node *escontext) +static inline uint64 +hex_decode_safe_scalar(const char *src, size_t len, char *dst, Node *escontext) { const char *s, *srcend; @@ -254,6 +304,85 @@ hex_decode_safe(const char *src, size_t len, char *dst, Node *escontext) return p - dst; } +/* + * This helper converts each byte to its binary-equivalent nibble by + * subtraction and combines them to form the return bytes (separated by zero + * bytes). Returns false if any input bytes are outside the expected ranges of + * ASCII values. Otherwise, returns true. + */ +#ifndef USE_NO_SIMD +static inline bool +hex_decode_simd_helper(const Vector8 src, Vector8 *dst) +{ + Vector8 sub; + Vector8 mask_hi = vector8_interleave_low(vector8_broadcast(0), vector8_broadcast(0x0f)); + Vector8 mask_lo = vector8_interleave_low(vector8_broadcast(0x0f), vector8_broadcast(0)); + Vector8 tmp; + bool ret; + + tmp = vector8_gt(vector8_broadcast('9' + 1), src); + sub = vector8_and(tmp, vector8_broadcast('0')); + + tmp = vector8_gt(src, vector8_broadcast('A' - 1)); + tmp = vector8_and(tmp, vector8_broadcast('A' - 10)); + sub = vector8_add(sub, tmp); + + tmp = vector8_gt(src, vector8_broadcast('a' - 1)); + tmp = vector8_and(tmp, vector8_broadcast('a' - 'A')); + sub = vector8_add(sub, tmp); + + *dst = vector8_issub(src, sub); + ret = !vector8_has_ge(*dst, 0x10); + + tmp = vector8_and(*dst, mask_hi); + tmp = vector8_shift_right(tmp, 8); + *dst = vector8_and(*dst, mask_lo); + *dst = vector8_shift_left(*dst, 4); + *dst = vector8_or(*dst, tmp); + return ret; +} +#endif /* ! USE_NO_SIMD */ + +uint64 +hex_decode_safe(const char *src, size_t len, char *dst, Node *escontext) +{ +#ifdef USE_NO_SIMD + return hex_decode_safe_scalar(src, len, dst, escontext); +#else + const uint64 tail_idx = len & ~(sizeof(Vector8) * 2 - 1); + uint64 i; + bool success = true; + + /* + * We must process 2 vectors at a time since the output will be half the + * length of the input. + */ + for (i = 0; i < tail_idx; i += sizeof(Vector8) * 2) + { + Vector8 srcv; + Vector8 dstv1; + Vector8 dstv2; + + vector8_load(&srcv, (const uint8 *) &src[i]); + success &= hex_decode_simd_helper(srcv, &dstv1); + + vector8_load(&srcv, (const uint8 *) &src[i + sizeof(Vector8)]); + success &= hex_decode_simd_helper(srcv, &dstv2); + + vector8_store((uint8 *) &dst[i / 2], vector8_pack_16(dstv1, dstv2)); + } + + /* + * If something didn't look right in the vector path, try again in the + * scalar path so that we can handle it correctly. + */ + if (!success) + i = 0; + + return i / 2 + hex_decode_safe_scalar(src + i, len - i, dst + i / 2, escontext); +#endif +} + static uint64 hex_enc_len(const char *src, size_t srclen) { |