summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2002-12-29 22:29:03 +0000
committerTom Lane <tgl@sss.pgh.pa.us>2002-12-29 22:29:03 +0000
commit629df5f4895048249806170765358089708e7e83 (patch)
tree7f7f2652e3cb8cdfd5ba5e2835b84c57aea2127f /src
parentb37d6373f0b2fc9f9779b2722f3a6095645cc9d3 (diff)
Adjust hash table sizing algorithm to avoid integer overflow in
ExecHashJoinGetBatch(). Fixes core dump on large hash joins, as in example from Rae Stiening.
Diffstat (limited to 'src')
-rw-r--r--src/backend/executor/nodeHash.c36
1 files changed, 23 insertions, 13 deletions
diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c
index 8bb5bde84c0..4ac8aecd2d0 100644
--- a/src/backend/executor/nodeHash.c
+++ b/src/backend/executor/nodeHash.c
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
*
- * $Id: nodeHash.c,v 1.66 2002/09/04 20:31:18 momjian Exp $
+ * $Id: nodeHash.c,v 1.66.2.1 2002/12/29 22:29:03 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -19,6 +19,7 @@
*/
#include "postgres.h"
+#include <limits.h>
#include <math.h>
#include "access/hash.h"
@@ -342,7 +343,8 @@ ExecChooseHashTableSize(double ntuples, int tupwidth,
{
int tupsize;
double inner_rel_bytes;
- double hash_table_bytes;
+ long hash_table_bytes;
+ double dtmp;
int nbatch;
int nbuckets;
int totalbuckets;
@@ -360,20 +362,22 @@ ExecChooseHashTableSize(double ntuples, int tupwidth,
inner_rel_bytes = ntuples * tupsize * FUDGE_FAC;
/*
- * Target hashtable size is SortMem kilobytes, but not less than
- * sqrt(estimated inner rel size), so as to avoid horrible
- * performance.
+ * Target in-memory hashtable size is SortMem kilobytes.
*/
- hash_table_bytes = sqrt(inner_rel_bytes);
- if (hash_table_bytes < (SortMem * 1024L))
- hash_table_bytes = SortMem * 1024L;
+ hash_table_bytes = SortMem * 1024L;
/*
* Count the number of hash buckets we want for the whole relation,
* for an average bucket load of NTUP_PER_BUCKET (per virtual
- * bucket!).
+ * bucket!). It has to fit in an int, however.
*/
- totalbuckets = (int) ceil(ntuples * FUDGE_FAC / NTUP_PER_BUCKET);
+ dtmp = ceil(ntuples * FUDGE_FAC / NTUP_PER_BUCKET);
+ if (dtmp < INT_MAX)
+ totalbuckets = (int) dtmp;
+ else
+ totalbuckets = INT_MAX;
+ if (totalbuckets <= 0)
+ totalbuckets = 1;
/*
* Count the number of buckets we think will actually fit in the
@@ -407,10 +411,16 @@ ExecChooseHashTableSize(double ntuples, int tupwidth,
* that nbatch doesn't have to have anything to do with the ratio
* totalbuckets/nbuckets; in fact, it is the number of groups we
* will use for the part of the data that doesn't fall into the
- * first nbuckets hash buckets.
+ * first nbuckets hash buckets. We try to set it to make all the
+ * batches the same size. But we have to keep nbatch small
+ * enough to avoid integer overflow in ExecHashJoinGetBatch().
*/
- nbatch = (int) ceil((inner_rel_bytes - hash_table_bytes) /
- hash_table_bytes);
+ dtmp = ceil((inner_rel_bytes - hash_table_bytes) /
+ hash_table_bytes);
+ if (dtmp < INT_MAX / totalbuckets)
+ nbatch = (int) dtmp;
+ else
+ nbatch = INT_MAX / totalbuckets;
if (nbatch <= 0)
nbatch = 1;
}