| 1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
 | /*-------------------------------------------------------------------------
 *
 * hash.h
 *	  header file for postgres hash access method implementation
 *
 *
 * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * $Id: hash.h,v 1.38 2001/03/22 04:00:27 momjian Exp $
 *
 * NOTES
 *		modeled after Margo Seltzer's hash implementation for unix.
 *
 *-------------------------------------------------------------------------
 */
#ifndef HASH_H
#define HASH_H
#include "access/itup.h"
#include "access/relscan.h"
#include "access/sdir.h"
#include "access/xlog.h"
#include "fmgr.h"
/*
 * An overflow page is a spare page allocated for storing data whose
 * bucket doesn't have room to store it. We use overflow pages rather
 * than just splitting the bucket because there is a linear order in
 * the way we split buckets. In other words, if there isn't enough space
 * in the bucket itself, put it in an overflow page.
 *
 * Overflow page addresses are stored in form: (Splitnumber, Page offset).
 *
 * A splitnumber is the number of the generation where the table doubles
 * in size. The ovflpage's offset within the splitnumber; offsets start
 * at 1.
 *
 * We convert the stored bitmap address into a page address with the
 * macro OADDR_OF(S, O) where S is the splitnumber and O is the page
 * offset.
 */
typedef uint32 Bucket;
typedef bits16 OverflowPageAddress;
typedef uint32 SplitNumber;
typedef uint32 PageOffset;
/* A valid overflow address will always have a page offset >= 1 */
#define InvalidOvflAddress		0
#define SPLITSHIFT		11
#define SPLITMASK		0x7FF
#define SPLITNUM(N)		((SplitNumber)(((uint32)(N)) >> SPLITSHIFT))
#define OPAGENUM(N)		((PageOffset)((N) & SPLITMASK))
#define OADDR_OF(S,O)	((OverflowPageAddress)((uint32)((uint32)(S) << SPLITSHIFT) + (O)))
#define BUCKET_TO_BLKNO(B) \
		((Bucket) ((B) + ((B) ? metap->SPARES[_hash_log2((B)+1)-1] : 0)) + 1)
#define OADDR_TO_BLKNO(B)		 \
		((BlockNumber) \
		 (BUCKET_TO_BLKNO ( (1 << SPLITNUM((B))) -1 ) + OPAGENUM((B))));
/*
 * hasho_flag tells us which type of page we're looking at.  For
 * example, knowing overflow pages from bucket pages is necessary
 * information when you're deleting tuples from a page. If all the
 * tuples are deleted from an overflow page, the overflow is made
 * available to other buckets by calling _hash_freeovflpage(). If all
 * the tuples are deleted from a bucket page, no additional action is
 * necessary.
 */
#define LH_UNUSED_PAGE			(0)
#define LH_OVERFLOW_PAGE		(1 << 0)
#define LH_BUCKET_PAGE			(1 << 1)
#define LH_BITMAP_PAGE			(1 << 2)
#define LH_META_PAGE			(1 << 3)
typedef struct HashPageOpaqueData
{
	bits16		hasho_flag;		/* is this page a bucket or ovfl */
	Bucket		hasho_bucket;	/* bucket number this pg belongs to */
	OverflowPageAddress hasho_oaddr;	/* ovfl address of this ovfl pg */
	BlockNumber hasho_nextblkno;/* next ovfl blkno */
	BlockNumber hasho_prevblkno;/* previous ovfl (or bucket) blkno */
} HashPageOpaqueData;
typedef HashPageOpaqueData *HashPageOpaque;
/*
 *	ScanOpaqueData is used to remember which buffers we're currently
 *	examining in the scan.	We keep these buffers locked and pinned and
 *	recorded in the opaque entry of the scan in order to avoid doing a
 *	ReadBuffer() for every tuple in the index.	This avoids semop() calls,
 *	which are expensive.
 */
typedef struct HashScanOpaqueData
{
	Buffer		hashso_curbuf;
	Buffer		hashso_mrkbuf;
} HashScanOpaqueData;
typedef HashScanOpaqueData *HashScanOpaque;
/*
 * Definitions for metapage.
 */
#define HASH_METAPAGE	0		/* metapage is always block 0 */
#define HASH_MAGIC		0x6440640
#define HASH_VERSION	0
/*
 * NCACHED is used to set the array sizeof spares[] & bitmaps[].
 *
 * Spares[] is used to hold the number overflow pages currently
 * allocated at a certain splitpoint. For example, if spares[3] = 7
 * then there are a maximum of 7 ovflpages available at splitpoint 3.
 * The value in spares[] will change as ovflpages are added within
 * a splitpoint.
 *
 * Within a splitpoint, one can find which ovflpages are available and
 * which are used by looking at a bitmaps that are stored on the ovfl
 * pages themselves. There is at least one bitmap for every splitpoint's
 * ovflpages. Bitmaps[] contains the ovflpage addresses of the ovflpages
 * that hold the ovflpage bitmaps.
 *
 * The reason that the size is restricted to NCACHED (32) is because
 * the bitmaps are 16 bits: upper 5 represent the splitpoint, lower 11
 * indicate the page number within the splitpoint. Since there are
 * only 5 bits to store the splitpoint, there can only be 32 splitpoints.
 * Both spares[] and bitmaps[] use splitpoints as there indices, so there
 * can only be 32 of them.
 */
#define NCACHED			32
typedef struct HashMetaPageData
{
	PageHeaderData hashm_phdr;	/* pad for page header (do not use) */
	uint32		hashm_magic;	/* magic no. for hash tables */
	uint32		hashm_version;	/* version ID */
	uint32		hashm_nkeys;	/* number of keys stored in the table */
	uint16		hashm_ffactor;	/* fill factor */
	uint16		hashm_bsize;	/* bucket size (bytes) - must be a power
								 * of 2 */
	uint16		hashm_bshift;	/* bucket shift */
	uint16		hashm_bmsize;	/* bitmap array size (bytes) - must be a
								 * power of 2 */
	uint32		hashm_maxbucket;/* ID of maximum bucket in use */
	uint32		hashm_highmask; /* mask to modulo into entire table */
	uint32		hashm_lowmask;	/* mask to modulo into lower half of table */
	uint32		hashm_ovflpoint;/* pageno. from which ovflpgs being
								 * allocated */
	uint32		hashm_lastfreed;/* last ovflpage freed */
	uint32		hashm_nmaps;	/* Initial number of bitmaps */
	uint32		hashm_spares[NCACHED];	/* spare pages available at
										 * splitpoints */
	BlockNumber hashm_mapp[NCACHED];	/* blknumbers of ovfl page maps */
	RegProcedure hashm_procid;	/* hash procedure id from pg_proc */
} HashMetaPageData;
typedef HashMetaPageData *HashMetaPage;
/* Short hands for accessing structure */
#define OVFL_POINT		hashm_ovflpoint
#define LAST_FREED		hashm_lastfreed
#define MAX_BUCKET		hashm_maxbucket
#define FFACTOR			hashm_ffactor
#define HIGH_MASK		hashm_highmask
#define LOW_MASK		hashm_lowmask
#define NKEYS			hashm_nkeys
#define SPARES			hashm_spares
extern bool BuildingHash;
typedef struct HashItemData
{
	IndexTupleData hash_itup;
} HashItemData;
typedef HashItemData *HashItem;
/*
 * Constants
 */
#define DEFAULT_FFACTOR			300
#define SPLITMAX				8
#define BYTE_TO_BIT				3		/* 2^3 bits/byte */
#define INT_TO_BYTE				2		/* 2^2 bytes/int */
#define INT_TO_BIT				5		/* 2^5 bits/int */
#define ALL_SET					((uint32) ~0)
/*
 * bitmap pages do not contain tuples.	they do contain the standard
 * page headers and trailers; however, everything in between is a
 * giant bit array.  the number of bits that fit on a page obviously
 * depends on the page size and the header/trailer overhead.
 */
#define BMPGSZ_BYTE(metap)		((metap)->hashm_bmsize)
#define BMPGSZ_BIT(metap)		((metap)->hashm_bmsize << BYTE_TO_BIT)
#define HashPageGetBitmap(pg) \
	((uint32 *) (((char *) (pg)) + MAXALIGN(sizeof(PageHeaderData))))
/*
 * The number of bits in an ovflpage bitmap which
 * tells which ovflpages are empty versus in use (NOT the number of
 * bits in an overflow page *address* bitmap).
 */
#define BITS_PER_MAP	32		/* Number of bits in ovflpage bitmap */
/* Given the address of the beginning of a big map, clear/set the nth bit */
#define CLRBIT(A, N)	((A)[(N)/BITS_PER_MAP] &= ~(1<<((N)%BITS_PER_MAP)))
#define SETBIT(A, N)	((A)[(N)/BITS_PER_MAP] |= (1<<((N)%BITS_PER_MAP)))
#define ISSET(A, N)		((A)[(N)/BITS_PER_MAP] & (1<<((N)%BITS_PER_MAP)))
/*
 * page locking modes
 */
#define HASH_READ		0
#define HASH_WRITE		1
/*
 *	In general, the hash code tries to localize its knowledge about page
 *	layout to a couple of routines.  However, we need a special value to
 *	indicate "no page number" in those places where we expect page numbers.
 */
#define P_NONE			0
/*
 *	Strategy number. There's only one valid strategy for hashing: equality.
 */
#define HTEqualStrategyNumber			1
#define HTMaxStrategyNumber				1
/*
 *	When a new operator class is declared, we require that the user supply
 *	us with an amproc procudure for hashing a key of the new type.
 *	Since we only have one such proc in amproc, it's number 1.
 */
#define HASHPROC		1
/* public routines */
extern Datum hashbuild(PG_FUNCTION_ARGS);
extern Datum hashinsert(PG_FUNCTION_ARGS);
extern Datum hashgettuple(PG_FUNCTION_ARGS);
extern Datum hashbeginscan(PG_FUNCTION_ARGS);
extern Datum hashrescan(PG_FUNCTION_ARGS);
extern Datum hashendscan(PG_FUNCTION_ARGS);
extern Datum hashmarkpos(PG_FUNCTION_ARGS);
extern Datum hashrestrpos(PG_FUNCTION_ARGS);
extern Datum hashdelete(PG_FUNCTION_ARGS);
/*
 * Datatype-specific hash functions in hashfunc.c.
 *
 * NOTE: some of these are also used by catcache operations, without
 * any direct connection to hash indexes.
 */
extern Datum hashchar(PG_FUNCTION_ARGS);
extern Datum hashint2(PG_FUNCTION_ARGS);
extern Datum hashint4(PG_FUNCTION_ARGS);
extern Datum hashint8(PG_FUNCTION_ARGS);
extern Datum hashoid(PG_FUNCTION_ARGS);
extern Datum hashfloat4(PG_FUNCTION_ARGS);
extern Datum hashfloat8(PG_FUNCTION_ARGS);
extern Datum hashoidvector(PG_FUNCTION_ARGS);
extern Datum hashint2vector(PG_FUNCTION_ARGS);
extern Datum hashname(PG_FUNCTION_ARGS);
extern Datum hashvarlena(PG_FUNCTION_ARGS);
extern Datum hash_any(char *keydata, int keylen);
/* private routines */
/* hashinsert.c */
extern InsertIndexResult _hash_doinsert(Relation rel, HashItem hitem);
/* hashovfl.c */
extern Buffer _hash_addovflpage(Relation rel, Buffer *metabufp, Buffer buf);
extern Buffer _hash_freeovflpage(Relation rel, Buffer ovflbuf);
extern int32 _hash_initbitmap(Relation rel, HashMetaPage metap, int32 pnum,
				 int32 nbits, int32 ndx);
extern void _hash_squeezebucket(Relation rel, HashMetaPage metap,
					Bucket bucket);
/* hashpage.c */
extern void _hash_metapinit(Relation rel);
extern Buffer _hash_getbuf(Relation rel, BlockNumber blkno, int access);
extern void _hash_relbuf(Relation rel, Buffer buf, int access);
extern void _hash_wrtbuf(Relation rel, Buffer buf);
extern void _hash_wrtnorelbuf(Relation rel, Buffer buf);
extern Page _hash_chgbufaccess(Relation rel, Buffer *bufp, int from_access,
				   int to_access);
extern void _hash_pageinit(Page page, Size size);
extern void _hash_pagedel(Relation rel, ItemPointer tid);
extern void _hash_expandtable(Relation rel, Buffer metabuf);
/* hashscan.c */
extern void _hash_regscan(IndexScanDesc scan);
extern void _hash_dropscan(IndexScanDesc scan);
extern void _hash_adjscans(Relation rel, ItemPointer tid);
/* hashsearch.c */
extern void _hash_search(Relation rel, int keysz, ScanKey scankey,
			 Buffer *bufP, HashMetaPage metap);
extern RetrieveIndexResult _hash_next(IndexScanDesc scan, ScanDirection dir);
extern RetrieveIndexResult _hash_first(IndexScanDesc scan, ScanDirection dir);
extern bool _hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir,
		   Buffer metabuf);
/* hashutil.c */
extern ScanKey _hash_mkscankey(Relation rel, IndexTuple itup,
				HashMetaPage metap);
extern void _hash_freeskey(ScanKey skey);
extern bool _hash_checkqual(IndexScanDesc scan, IndexTuple itup);
extern HashItem _hash_formitem(IndexTuple itup);
extern Bucket _hash_call(Relation rel, HashMetaPage metap, Datum key);
extern uint32 _hash_log2(uint32 num);
extern void _hash_checkpage(Page page, int flags);
/* hash.c */
extern void hash_redo(XLogRecPtr lsn, XLogRecord *record);
extern void hash_undo(XLogRecPtr lsn, XLogRecord *record);
extern void hash_desc(char *buf, uint8 xl_info, char *rec);
#endif	 /* HASH_H */
 |