diff options
| author | Nick Terrell <terrelln@meta.com> | 2025-03-08 12:09:33 -0800 |
|---|---|---|
| committer | Nick Terrell <terrelln@meta.com> | 2025-03-13 13:25:58 -0700 |
| commit | 65d1f5507ed2c78c64fce40e44e5574a9419eb09 (patch) | |
| tree | 4a1b819db2ea7f2a322e9bcc7582d946d0a4ea29 /lib/zstd/compress/fse_compress.c | |
| parent | 7eb172143d5508b4da468ed59ee857c6e5e01da6 (diff) | |
zstd: Import upstream v1.5.7
In addition to keeping the kernel's copy of zstd up to date, this update
was requested by Intel to expose upstream's APIs that allow QAT to accelerate
the LZ match finding stage of Zstd.
This patch is imported from the upstream tag v1.5.7-kernel [0], which is signed
with upstream's signing key EF8FE99528B52FFD [1]. It was imported from upstream
using this command:
export ZSTD=/path/to/repo/zstd/
export LINUX=/path/to/repo/linux/
cd "$ZSTD/contrib/linux-kernel"
git checkout v1.5.7-kernel
make import LINUX="$LINUX"
This patch has been tested on x86-64, and has been boot tested with
a zstd compressed kernel & initramfs on i386 and aarch64. I benchmarked
the patch on x86-64 with gcc-14.2.1 on an Intel i9-9900K by measruing the
performance of compressed filesystem reads and writes.
Component, Level, Size delta, C. time delta, D. time delta
Btrfs , 1, +0.00%, -6.1%, +1.4%
Btrfs , 3, +0.00%, -9.8%, +3.0%
Btrfs , 5, +0.00%, +1.7%, +1.4%
Btrfs , 7, +0.00%, -1.9%, +2.7%
Btrfs , 9, +0.00%, -3.4%, +3.7%
Btrfs , 15, +0.00%, -0.3%, +3.6%
SquashFS , 1, +0.00%, N/A, +1.9%
The major changes that impact the kernel use cases for each version are:
v1.5.7: https://github.com/facebook/zstd/releases/tag/v1.5.7
* Add zstd_compress_sequences_and_literals() for use by Intel's QAT driver
to implement Zstd compression acceleration in the kernel.
* Fix an underflow bug in 32-bit builds that can cause data corruption when
processing more than 4GB of data with a single `ZSTD_CCtx` object, when an
input crosses the 4GB boundry. I don't believe this impacts any current kernel
use cases, because the `ZSTD_CCtx` is typically reconstructed between
compressions.
* Levels 1-4 see 5-10% compression speed improvements for inputs smaller than
128KB.
v1.5.6: https://github.com/facebook/zstd/releases/tag/v1.5.6
* Improved compression ratio for the highest compression levels. I don't expect
these see much use however, due to their slow speeds.
v1.5.5: https://github.com/facebook/zstd/releases/tag/v1.5.5
* Fix a rare corruption bug that can trigger on levels 13 and above.
* Improve compression speed of levels 5-11 on incompressible data.
v1.5.4: https://github.com/facebook/zstd/releases/tag/v1.5.4
* Improve copmression speed of levels 5-11 on ARM.
* Improve dictionary compression speed.
Signed-off-by: Nick Terrell <terrelln@fb.com>
Diffstat (limited to 'lib/zstd/compress/fse_compress.c')
| -rw-r--r-- | lib/zstd/compress/fse_compress.c | 74 |
1 files changed, 16 insertions, 58 deletions
diff --git a/lib/zstd/compress/fse_compress.c b/lib/zstd/compress/fse_compress.c index ec5b1ca6d71a..44a3c10becf2 100644 --- a/lib/zstd/compress/fse_compress.c +++ b/lib/zstd/compress/fse_compress.c @@ -1,6 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause /* ****************************************************************** * FSE : Finite State Entropy encoder - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * * You can contact the author at : * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy @@ -25,7 +26,8 @@ #include "../common/error_private.h" #define ZSTD_DEPS_NEED_MALLOC #define ZSTD_DEPS_NEED_MATH64 -#include "../common/zstd_deps.h" /* ZSTD_malloc, ZSTD_free, ZSTD_memcpy, ZSTD_memset */ +#include "../common/zstd_deps.h" /* ZSTD_memset */ +#include "../common/bits.h" /* ZSTD_highbit32 */ /* ************************************************************** @@ -90,7 +92,7 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, assert(tableLog < 16); /* required for threshold strategy to work */ /* For explanations on how to distribute symbol values over the table : - * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */ + * https://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */ #ifdef __clang_analyzer__ ZSTD_memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize); /* useless initialization, just to keep scan-build happy */ @@ -191,7 +193,7 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, break; default : assert(normalizedCounter[s] > 1); - { U32 const maxBitsOut = tableLog - BIT_highbit32 ((U32)normalizedCounter[s]-1); + { U32 const maxBitsOut = tableLog - ZSTD_highbit32 ((U32)normalizedCounter[s]-1); U32 const minStatePlus = (U32)normalizedCounter[s] << maxBitsOut; symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus; symbolTT[s].deltaFindState = (int)(total - (unsigned)normalizedCounter[s]); @@ -224,8 +226,8 @@ size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog) size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog + 4 /* bitCount initialized at 4 */ + 2 /* first two symbols may use one additional bit each */) / 8) - + 1 /* round up to whole nb bytes */ - + 2 /* additional two bytes for bitstream flush */; + + 1 /* round up to whole nb bytes */ + + 2 /* additional two bytes for bitstream flush */; return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */ } @@ -254,7 +256,7 @@ FSE_writeNCount_generic (void* header, size_t headerBufferSize, /* Init */ remaining = tableSize+1; /* +1 for extra accuracy */ threshold = tableSize; - nbBits = tableLog+1; + nbBits = (int)tableLog+1; while ((symbol < alphabetSize) && (remaining>1)) { /* stops at 1 */ if (previousIs0) { @@ -273,7 +275,7 @@ FSE_writeNCount_generic (void* header, size_t headerBufferSize, } while (symbol >= start+3) { start+=3; - bitStream += 3 << bitCount; + bitStream += 3U << bitCount; bitCount += 2; } bitStream += (symbol-start) << bitCount; @@ -293,7 +295,7 @@ FSE_writeNCount_generic (void* header, size_t headerBufferSize, count++; /* +1 for extra accuracy */ if (count>=threshold) count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */ - bitStream += count << bitCount; + bitStream += (U32)count << bitCount; bitCount += nbBits; bitCount -= (count<max); previousIs0 = (count==1); @@ -321,7 +323,8 @@ FSE_writeNCount_generic (void* header, size_t headerBufferSize, out[1] = (BYTE)(bitStream>>8); out+= (bitCount+7) /8; - return (out-ostart); + assert(out >= ostart); + return (size_t)(out-ostart); } @@ -342,21 +345,11 @@ size_t FSE_writeNCount (void* buffer, size_t bufferSize, * FSE Compression Code ****************************************************************/ -FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog) -{ - size_t size; - if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX; - size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32); - return (FSE_CTable*)ZSTD_malloc(size); -} - -void FSE_freeCTable (FSE_CTable* ct) { ZSTD_free(ct); } - /* provides the minimum logSize to safely represent a distribution */ static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue) { - U32 minBitsSrc = BIT_highbit32((U32)(srcSize)) + 1; - U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2; + U32 minBitsSrc = ZSTD_highbit32((U32)(srcSize)) + 1; + U32 minBitsSymbols = ZSTD_highbit32(maxSymbolValue) + 2; U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols; assert(srcSize > 1); /* Not supported, RLE should be used instead */ return minBits; @@ -364,7 +357,7 @@ static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue) unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus) { - U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus; + U32 maxBitsSrc = ZSTD_highbit32((U32)(srcSize - 1)) - minus; U32 tableLog = maxTableLog; U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue); assert(srcSize > 1); /* Not supported, RLE should be used instead */ @@ -532,40 +525,6 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog, return tableLog; } - -/* fake FSE_CTable, for raw (uncompressed) input */ -size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits) -{ - const unsigned tableSize = 1 << nbBits; - const unsigned tableMask = tableSize - 1; - const unsigned maxSymbolValue = tableMask; - void* const ptr = ct; - U16* const tableU16 = ( (U16*) ptr) + 2; - void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableSize>>1); /* assumption : tableLog >= 1 */ - FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT); - unsigned s; - - /* Sanity checks */ - if (nbBits < 1) return ERROR(GENERIC); /* min size */ - - /* header */ - tableU16[-2] = (U16) nbBits; - tableU16[-1] = (U16) maxSymbolValue; - - /* Build table */ - for (s=0; s<tableSize; s++) - tableU16[s] = (U16)(tableSize + s); - - /* Build Symbol Transformation Table */ - { const U32 deltaNbBits = (nbBits << 16) - (1 << nbBits); - for (s=0; s<=maxSymbolValue; s++) { - symbolTT[s].deltaNbBits = deltaNbBits; - symbolTT[s].deltaFindState = s-1; - } } - - return 0; -} - /* fake FSE_CTable, for rle input (always same symbol) */ size_t FSE_buildCTable_rle (FSE_CTable* ct, BYTE symbolValue) { @@ -664,5 +623,4 @@ size_t FSE_compress_usingCTable (void* dst, size_t dstSize, size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); } - #endif /* FSE_COMMONDEFS_ONLY */ |
