diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/Makefile.global.in | 4 | ||||
-rw-r--r-- | src/include/pg_config.h.in | 15 | ||||
-rw-r--r-- | src/include/pg_config.h.win32 | 23 | ||||
-rw-r--r-- | src/include/port/pg_crc32c.h | 44 | ||||
-rw-r--r-- | src/port/Makefile | 8 | ||||
-rw-r--r-- | src/port/pg_crc32c_choose.c | 63 | ||||
-rw-r--r-- | src/port/pg_crc32c_sse42.c | 52 | ||||
-rw-r--r-- | src/tools/msvc/Mkvcbuild.pm | 13 |
8 files changed, 217 insertions, 5 deletions
diff --git a/src/Makefile.global.in b/src/Makefile.global.in index 7c39d827246..4b06fc2d962 100644 --- a/src/Makefile.global.in +++ b/src/Makefile.global.in @@ -225,6 +225,7 @@ GCC = @GCC@ SUN_STUDIO_CC = @SUN_STUDIO_CC@ CFLAGS = @CFLAGS@ CFLAGS_VECTOR = @CFLAGS_VECTOR@ +CFLAGS_SSE42 = @CFLAGS_SSE42@ # Kind-of compilers @@ -548,6 +549,9 @@ endif LIBOBJS = @LIBOBJS@ +# files needed for the chosen CRC-32C implementation +PG_CRC32C_OBJS = @PG_CRC32C_OBJS@ + LIBS := -lpgcommon -lpgport $(LIBS) # to make ws2_32.lib the last library diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in index 202c51a34a5..5688f750af9 100644 --- a/src/include/pg_config.h.in +++ b/src/include/pg_config.h.in @@ -675,6 +675,12 @@ /* Define to 1 if your compiler understands __builtin_unreachable. */ #undef HAVE__BUILTIN_UNREACHABLE +/* Define to 1 if you have __cpuid. */ +#undef HAVE__CPUID + +/* Define to 1 if you have __get_cpuid. */ +#undef HAVE__GET_CPUID + /* Define to 1 if your compiler understands _Static_assert. */ #undef HAVE__STATIC_ASSERT @@ -818,6 +824,15 @@ /* Use replacement snprintf() functions. */ #undef USE_REPL_SNPRINTF +/* Define to 1 to use Intel SSE 4.2 CRC instructions with a runtime check. */ +#undef USE_SLICING_BY_8_CRC32C + +/* Define to 1 use Intel SSE 4.2 CRC instructions. */ +#undef USE_SSE42_CRC32C + +/* Define to 1 to use Intel SSSE 4.2 CRC instructions with a runtime check. */ +#undef USE_SSE42_CRC32C_WITH_RUNTIME_CHECK + /* Define to select SysV-style semaphores. */ #undef USE_SYSV_SEMAPHORES diff --git a/src/include/pg_config.h.win32 b/src/include/pg_config.h.win32 index 1baf64f0056..d9fa711ab57 100644 --- a/src/include/pg_config.h.win32 +++ b/src/include/pg_config.h.win32 @@ -6,8 +6,8 @@ * * HAVE_CBRT, HAVE_FUNCNAME_FUNC, HAVE_GETOPT, HAVE_GETOPT_H, HAVE_INTTYPES_H, * HAVE_GETOPT_LONG, HAVE_LOCALE_T, HAVE_RINT, HAVE_STRINGS_H, HAVE_STRTOLL, - * HAVE_STRTOULL, HAVE_STRUCT_OPTION, ENABLE_THREAD_SAFETY, - * PG_USE_INLINE, inline + * HAVE_STRTOULL, HAVE_STRUCT_OPTION, ENABLE_THREAD_SAFETY, PG_USE_INLINE, + * inline, USE_SSE42_CRC32C_WITH_RUNTIME_CHECK */ /* Define to the type of arg 1 of 'accept' */ @@ -529,6 +529,12 @@ /* Define to 1 if your compiler understands __builtin_unreachable. */ /* #undef HAVE__BUILTIN_UNREACHABLE */ +/* Define to 1 if you have __cpuid. */ +#define HAVE__CPUID 1 + +/* Define to 1 if you have __get_cpuid. */ +#undef HAVE__GET_CPUID + /* Define to 1 if your compiler understands _Static_assert. */ /* #undef HAVE__STATIC_ASSERT */ @@ -639,6 +645,19 @@ /* Use replacement snprintf() functions. */ #define USE_REPL_SNPRINTF 1 +/* Define to 1 to use Intel SSE 4.2 CRC instructions with a runtime check. */ +#if (_MSC_VER < 1500) +#define USE_SLICING_BY_8_CRC32C 1 +#end + +/* Define to 1 use Intel SSE 4.2 CRC instructions. */ +/* #undef USE_SSE42_CRC32C */ + +/* Define to 1 to use Intel SSSE 4.2 CRC instructions with a runtime check. */ +#if (_MSC_VER >= 1500) +#define USE_SSE42_CRC32C_WITH_RUNTIME_CHECK +#endif + /* Define to select SysV-style semaphores. */ /* #undef USE_SYSV_SEMAPHORES */ diff --git a/src/include/port/pg_crc32c.h b/src/include/port/pg_crc32c.h index d07c0cb623d..b14d194fb33 100644 --- a/src/include/port/pg_crc32c.h +++ b/src/include/port/pg_crc32c.h @@ -3,6 +3,25 @@ * pg_crc32c.h * Routines for computing CRC-32C checksums. * + * The speed of CRC-32C calculation has a big impact on performance, so we + * jump through some hoops to get the best implementation for each + * platform. Some CPU architectures have special instructions for speeding + * up CRC calculations (e.g. Intel SSE 4.2), on other platforms we use the + * Slicing-by-8 algorithm which uses lookup tables. + * + * The public interface consists of four macros: + * + * INIT_CRC32C(crc) + * Initialize a CRC accumulator + * + * COMP_CRC32C(crc, data, len) + * Accumulate some (more) bytes into a CRC + * + * FIN_CRC32C(crc) + * Finish a CRC calculation + * + * EQ_CRC32C(c1, c2) + * Check for equality of two CRCs. * * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California @@ -16,9 +35,32 @@ typedef uint32 pg_crc32c; +/* The INIT and EQ macros are the same for all implementations. */ #define INIT_CRC32C(crc) ((crc) = 0xFFFFFFFF) #define EQ_CRC32C(c1, c2) ((c1) == (c2)) +#if defined(USE_SSE42_CRC32C) +/* Use SSE4.2 instructions. */ +#define COMP_CRC32C(crc, data, len) \ + ((crc) = pg_comp_crc32c_sse42((crc), (data), (len))) +#define FIN_CRC32C(crc) ((crc) ^= 0xFFFFFFFF) + +extern pg_crc32c pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len); + +#elif defined(USE_SSE42_CRC32C_WITH_RUNTIME_CHECK) +/* + * Use SSE4.2 instructions, but perform a runtime check first to check that + * they are available. + */ +#define COMP_CRC32C(crc, data, len) \ + ((crc) = pg_comp_crc32c((crc), (data), (len))) +#define FIN_CRC32C(crc) ((crc) ^= 0xFFFFFFFF) + +extern pg_crc32c pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len); +extern pg_crc32c pg_comp_crc32c_sb8(pg_crc32c crc, const void *data, size_t len); +extern pg_crc32c (*pg_comp_crc32c) (pg_crc32c crc, const void *data, size_t len); + +#else /* * Use slicing-by-8 algorithm. * @@ -46,4 +88,6 @@ typedef uint32 pg_crc32c; extern pg_crc32c pg_comp_crc32c_sb8(pg_crc32c crc, const void *data, size_t len); +#endif + #endif /* PG_CRC32C_H */ diff --git a/src/port/Makefile b/src/port/Makefile index d1c9c8a9877..bc9b63add04 100644 --- a/src/port/Makefile +++ b/src/port/Makefile @@ -30,10 +30,10 @@ include $(top_builddir)/src/Makefile.global override CPPFLAGS := -I$(top_builddir)/src/port -DFRONTEND $(CPPFLAGS) LIBS += $(PTHREAD_LIBS) -OBJS = $(LIBOBJS) chklocale.o erand48.o inet_net_ntop.o \ +OBJS = $(LIBOBJS) $(PG_CRC32C_OBJS) chklocale.o erand48.o inet_net_ntop.o \ noblock.o path.o pgcheckdir.o pgmkdirp.o pgsleep.o \ pgstrcasecmp.o pqsignal.o \ - qsort.o qsort_arg.o quotes.o sprompt.o tar.o thread.o pg_crc32c_sb8.o + qsort.o qsort_arg.o quotes.o sprompt.o tar.o thread.o # foo_srv.o and foo.o are both built from foo.c, but only foo.o has -DFRONTEND OBJS_SRV = $(OBJS:%.o=%_srv.o) @@ -57,6 +57,10 @@ libpgport.a: $(OBJS) # thread.o needs PTHREAD_CFLAGS (but thread_srv.o does not) thread.o: CFLAGS+=$(PTHREAD_CFLAGS) +# pg_crc32c_sse42.o and its _srv.o version need CFLAGS_SSE42 +pg_crc32c_sse42.o: CFLAGS+=$(CFLAGS_SSE42) +pg_crc32c_sse42_srv.o: CFLAGS+=$(CFLAGS_SSE42) + # # Server versions of object files # diff --git a/src/port/pg_crc32c_choose.c b/src/port/pg_crc32c_choose.c new file mode 100644 index 00000000000..ba0d1670f82 --- /dev/null +++ b/src/port/pg_crc32c_choose.c @@ -0,0 +1,63 @@ +/*------------------------------------------------------------------------- + * + * pg_crc32c_choose.c + * Choose which CRC-32C implementation to use, at runtime. + * + * Try to the special CRC instructions introduced in Intel SSE 4.2, + * if available on the platform we're running on, but fall back to the + * slicing-by-8 implementation otherwise. + * + * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/port/pg_crc32c_choose.c + * + *------------------------------------------------------------------------- + */ + +#include "c.h" + +#ifdef HAVE__GET_CPUID +#include <cpuid.h> +#endif + +#ifdef HAVE__CPUID +#include <intrin.h> +#endif + +#include "port/pg_crc32c.h" + +static bool +pg_crc32c_sse42_available(void) +{ + unsigned int exx[4] = {0, 0, 0, 0}; + +#if defined(HAVE__GET_CPUID) + __get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]); +#elif defined(HAVE__CPUID) + __cpuid(exx, 1); +#else +#error cpuid instruction not available +#endif + + return (exx[2] & (1 << 20)) != 0; /* SSE 4.2 */ +} + +/* + * This gets called on the first call. It replaces the function pointer + * so that subsequent calls are routed directly to the chosen implementation. + */ +static pg_crc32c +pg_comp_crc32c_choose(pg_crc32c crc, const void *data, size_t len) +{ + if (pg_crc32c_sse42_available()) + pg_comp_crc32c = pg_comp_crc32c_sse42; + else + pg_comp_crc32c = pg_comp_crc32c_sb8; + + return pg_comp_crc32c(crc, data, len); +} + +pg_crc32c (*pg_comp_crc32c) (pg_crc32c crc, const void *data, size_t len) = pg_comp_crc32c_choose; diff --git a/src/port/pg_crc32c_sse42.c b/src/port/pg_crc32c_sse42.c new file mode 100644 index 00000000000..b6107103bef --- /dev/null +++ b/src/port/pg_crc32c_sse42.c @@ -0,0 +1,52 @@ +/*------------------------------------------------------------------------- + * + * pg_crc32c_sse42.c + * Compute CRC-32C checksum using Intel SSE 4.2 instructions. + * + * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/port/pg_crc32c_sse42.c + * + *------------------------------------------------------------------------- + */ +#include "c.h" + +#include "port/pg_crc32c.h" + +#include <nmmintrin.h> + +pg_crc32c +pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len) +{ + const unsigned char *p = data; + const uint64 *p8; + + /* + * Process eight bytes of data at a time. + * + * NB: We do unaligned 8-byte accesses here. The Intel architecture + * allows that, and performance testing didn't show any performance + * gain from aligning the beginning address. + */ + p8 = (const uint64 *) p; + while (len >= 8) + { + crc = (uint32) _mm_crc32_u64(crc, *p8++); + len -= 8; + } + + /* + * Handle any remaining bytes one at a time. + */ + p = (const unsigned char *) p8; + while (len > 0) + { + crc = _mm_crc32_u8(crc, *p++); + len--; + } + + return crc; +} diff --git a/src/tools/msvc/Mkvcbuild.pm b/src/tools/msvc/Mkvcbuild.pm index b2c0dfbd7b7..39281db9011 100644 --- a/src/tools/msvc/Mkvcbuild.pm +++ b/src/tools/msvc/Mkvcbuild.pm @@ -92,10 +92,21 @@ sub mkvcbuild pgcheckdir.c pgmkdirp.c pgsleep.c pgstrcasecmp.c pqsignal.c mkdtemp.c qsort.c qsort_arg.c quotes.c system.c sprompt.c tar.c thread.c getopt.c getopt_long.c dirent.c - win32env.c win32error.c win32setlocale.c pg_crc32c_sb8.c); + win32env.c win32error.c win32setlocale.c); push(@pgportfiles, 'rint.c') if ($vsVersion < '12.00'); + if ($vsVersion >= '9.00') + { + push(@pgportfiles, 'pg_crc32c_choose.c'); + push(@pgportfiles, 'pg_crc32c_sse42.c'); + push(@pgportfiles, 'pg_crc32c_sb8.c'); + } + else + { + push(@pgportfiles, 'pg_crc32c_sb8.c') + } + our @pgcommonallfiles = qw( exec.c pg_lzcompress.c pgfnames.c psprintf.c relpath.c rmtree.c string.c username.c wait_error.c); |