diff options
Diffstat (limited to 'src/backend/access/brin/brin_tuple.c')
-rw-r--r-- | src/backend/access/brin/brin_tuple.c | 554 |
1 files changed, 554 insertions, 0 deletions
diff --git a/src/backend/access/brin/brin_tuple.c b/src/backend/access/brin/brin_tuple.c new file mode 100644 index 00000000000..d895cb715cb --- /dev/null +++ b/src/backend/access/brin/brin_tuple.c @@ -0,0 +1,554 @@ +/* + * brin_tuples.c + * Method implementations for tuples in BRIN indexes. + * + * Intended usage is that code outside this file only deals with + * BrinMemTuples, and convert to and from the on-disk representation through + * functions in this file. + * + * NOTES + * + * A BRIN tuple is similar to a heap tuple, with a few key differences. The + * first interesting difference is that the tuple header is much simpler, only + * containing its total length and a small area for flags. Also, the stored + * data does not match the relation tuple descriptor exactly: for each + * attribute in the descriptor, the index tuple carries an arbitrary number + * of values, depending on the opclass. + * + * Also, for each column of the index relation there are two null bits: one + * (hasnulls) stores whether any tuple within the page range has that column + * set to null; the other one (allnulls) stores whether the column values are + * all null. If allnulls is true, then the tuple data area does not contain + * values for that column at all; whereas it does if the hasnulls is set. + * Note the size of the null bitmask may not be the same as that of the + * datum array. + * + * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/access/brin/brin_tuple.c + */ +#include "postgres.h" + +#include "access/htup_details.h" +#include "access/brin_tuple.h" +#include "access/tupdesc.h" +#include "access/tupmacs.h" +#include "utils/datum.h" +#include "utils/memutils.h" + + +static inline void brin_deconstruct_tuple(BrinDesc *brdesc, + char *tp, bits8 *nullbits, bool nulls, + Datum *values, bool *allnulls, bool *hasnulls); + + +/* + * Return a tuple descriptor used for on-disk storage of BRIN tuples. + */ +static TupleDesc +brtuple_disk_tupdesc(BrinDesc *brdesc) +{ + /* We cache these in the BrinDesc */ + if (brdesc->bd_disktdesc == NULL) + { + int i; + int j; + AttrNumber attno = 1; + TupleDesc tupdesc; + MemoryContext oldcxt; + + /* make sure it's in the bdesc's context */ + oldcxt = MemoryContextSwitchTo(brdesc->bd_context); + + tupdesc = CreateTemplateTupleDesc(brdesc->bd_totalstored, false); + + for (i = 0; i < brdesc->bd_tupdesc->natts; i++) + { + for (j = 0; j < brdesc->bd_info[i]->oi_nstored; j++) + TupleDescInitEntry(tupdesc, attno++, NULL, + brdesc->bd_info[i]->oi_typids[j], + -1, 0); + } + + MemoryContextSwitchTo(oldcxt); + + brdesc->bd_disktdesc = tupdesc; + } + + return brdesc->bd_disktdesc; +} + +/* + * Generate a new on-disk tuple to be inserted in a BRIN index. + * + * See brin_form_placeholder_tuple if you touch this. + */ +BrinTuple * +brin_form_tuple(BrinDesc *brdesc, BlockNumber blkno, BrinMemTuple *tuple, + Size *size) +{ + Datum *values; + bool *nulls; + bool anynulls = false; + BrinTuple *rettuple; + int keyno; + int idxattno; + uint16 phony_infomask; + bits8 *phony_nullbitmap; + Size len, + hoff, + data_len; + + Assert(brdesc->bd_totalstored > 0); + + values = palloc(sizeof(Datum) * brdesc->bd_totalstored); + nulls = palloc0(sizeof(bool) * brdesc->bd_totalstored); + phony_nullbitmap = palloc(sizeof(bits8) * BITMAPLEN(brdesc->bd_totalstored)); + + /* + * Set up the values/nulls arrays for heap_fill_tuple + */ + idxattno = 0; + for (keyno = 0; keyno < brdesc->bd_tupdesc->natts; keyno++) + { + int datumno; + + /* + * "allnulls" is set when there's no nonnull value in any row in the + * column; when this happens, there is no data to store. Thus set the + * nullable bits for all data elements of this column and we're done. + */ + if (tuple->bt_columns[keyno].bv_allnulls) + { + for (datumno = 0; + datumno < brdesc->bd_info[keyno]->oi_nstored; + datumno++) + nulls[idxattno++] = true; + anynulls = true; + continue; + } + + /* + * The "hasnulls" bit is set when there are some null values in the + * data. We still need to store a real value, but the presence of + * this means we need a null bitmap. + */ + if (tuple->bt_columns[keyno].bv_hasnulls) + anynulls = true; + + for (datumno = 0; + datumno < brdesc->bd_info[keyno]->oi_nstored; + datumno++) + values[idxattno++] = tuple->bt_columns[keyno].bv_values[datumno]; + } + + /* compute total space needed */ + len = SizeOfBrinTuple; + if (anynulls) + { + /* + * We need a double-length bitmap on an on-disk BRIN index tuple; the + * first half stores the "allnulls" bits, the second stores + * "hasnulls". + */ + len += BITMAPLEN(brdesc->bd_tupdesc->natts * 2); + } + + len = hoff = MAXALIGN(len); + + data_len = heap_compute_data_size(brtuple_disk_tupdesc(brdesc), + values, nulls); + + len += data_len; + + rettuple = palloc0(len); + rettuple->bt_blkno = blkno; + rettuple->bt_info = hoff; + Assert((rettuple->bt_info & BRIN_OFFSET_MASK) == hoff); + + /* + * The infomask and null bitmap as computed by heap_fill_tuple are useless + * to us. However, that function will not accept a null infomask; and we + * need to pass a valid null bitmap so that it will correctly skip + * outputting null attributes in the data area. + */ + heap_fill_tuple(brtuple_disk_tupdesc(brdesc), + values, + nulls, + (char *) rettuple + hoff, + data_len, + &phony_infomask, + phony_nullbitmap); + + /* done with these */ + pfree(values); + pfree(nulls); + pfree(phony_nullbitmap); + + /* + * Now fill in the real null bitmasks. allnulls first. + */ + if (anynulls) + { + bits8 *bitP; + int bitmask; + + rettuple->bt_info |= BRIN_NULLS_MASK; + + /* + * Note that we reverse the sense of null bits in this module: we + * store a 1 for a null attribute rather than a 0. So we must reverse + * the sense of the att_isnull test in br_deconstruct_tuple as well. + */ + bitP = ((bits8 *) ((char *) rettuple + SizeOfBrinTuple)) - 1; + bitmask = HIGHBIT; + for (keyno = 0; keyno < brdesc->bd_tupdesc->natts; keyno++) + { + if (bitmask != HIGHBIT) + bitmask <<= 1; + else + { + bitP += 1; + *bitP = 0x0; + bitmask = 1; + } + + if (!tuple->bt_columns[keyno].bv_allnulls) + continue; + + *bitP |= bitmask; + } + /* hasnulls bits follow */ + for (keyno = 0; keyno < brdesc->bd_tupdesc->natts; keyno++) + { + if (bitmask != HIGHBIT) + bitmask <<= 1; + else + { + bitP += 1; + *bitP = 0x0; + bitmask = 1; + } + + if (!tuple->bt_columns[keyno].bv_hasnulls) + continue; + + *bitP |= bitmask; + } + bitP = ((bits8 *) (rettuple + SizeOfBrinTuple)) - 1; + } + + if (tuple->bt_placeholder) + rettuple->bt_info |= BRIN_PLACEHOLDER_MASK; + + *size = len; + return rettuple; +} + +/* + * Generate a new on-disk tuple with no data values, marked as placeholder. + * + * This is a cut-down version of brin_form_tuple. + */ +BrinTuple * +brin_form_placeholder_tuple(BrinDesc *brdesc, BlockNumber blkno, Size *size) +{ + Size len; + Size hoff; + BrinTuple *rettuple; + int keyno; + bits8 *bitP; + int bitmask; + + /* compute total space needed: always add nulls */ + len = SizeOfBrinTuple; + len += BITMAPLEN(brdesc->bd_tupdesc->natts * 2); + len = hoff = MAXALIGN(len); + + rettuple = palloc0(len); + rettuple->bt_blkno = blkno; + rettuple->bt_info = hoff; + rettuple->bt_info |= BRIN_NULLS_MASK | BRIN_PLACEHOLDER_MASK; + + bitP = ((bits8 *) ((char *) rettuple + SizeOfBrinTuple)) - 1; + bitmask = HIGHBIT; + /* set allnulls true for all attributes */ + for (keyno = 0; keyno < brdesc->bd_tupdesc->natts; keyno++) + { + if (bitmask != HIGHBIT) + bitmask <<= 1; + else + { + bitP += 1; + *bitP = 0x0; + bitmask = 1; + } + + *bitP |= bitmask; + } + /* no need to set hasnulls */ + + *size = len; + return rettuple; +} + +/* + * Free a tuple created by brin_form_tuple + */ +void +brin_free_tuple(BrinTuple *tuple) +{ + pfree(tuple); +} + +/* + * Create an palloc'd copy of a BrinTuple. + */ +BrinTuple * +brin_copy_tuple(BrinTuple *tuple, Size len) +{ + BrinTuple *newtup; + + newtup = palloc(len); + memcpy(newtup, tuple, len); + + return newtup; +} + +/* + * Return whether two BrinTuples are bitwise identical. + */ +bool +brin_tuples_equal(const BrinTuple *a, Size alen, const BrinTuple *b, Size blen) +{ + if (alen != blen) + return false; + if (memcmp(a, b, alen) != 0) + return false; + return true; +} + +/* + * Create a new BrinMemTuple from scratch, and initialize it to an empty + * state. + * + * Note: we don't provide any means to free a deformed tuple, so make sure to + * use a temporary memory context. + */ +BrinMemTuple * +brin_new_memtuple(BrinDesc *brdesc) +{ + BrinMemTuple *dtup; + char *currdatum; + long basesize; + int i; + + basesize = MAXALIGN(sizeof(BrinMemTuple) + + sizeof(BrinValues) * brdesc->bd_tupdesc->natts); + dtup = palloc0(basesize + sizeof(Datum) * brdesc->bd_totalstored); + currdatum = (char *) dtup + basesize; + for (i = 0; i < brdesc->bd_tupdesc->natts; i++) + { + dtup->bt_columns[i].bv_attno = i + 1; + dtup->bt_columns[i].bv_allnulls = true; + dtup->bt_columns[i].bv_hasnulls = false; + dtup->bt_columns[i].bv_values = (Datum *) currdatum; + currdatum += sizeof(Datum) * brdesc->bd_info[i]->oi_nstored; + } + + dtup->bt_context = AllocSetContextCreate(CurrentMemoryContext, + "brin dtuple", + ALLOCSET_DEFAULT_MINSIZE, + ALLOCSET_DEFAULT_INITSIZE, + ALLOCSET_DEFAULT_MAXSIZE); + return dtup; +} + +/* + * Reset a BrinMemTuple to initial state + */ +void +brin_memtuple_initialize(BrinMemTuple *dtuple, BrinDesc *brdesc) +{ + int i; + + MemoryContextReset(dtuple->bt_context); + for (i = 0; i < brdesc->bd_tupdesc->natts; i++) + { + dtuple->bt_columns[i].bv_allnulls = true; + dtuple->bt_columns[i].bv_hasnulls = false; + } +} + +/* + * Convert a BrinTuple back to a BrinMemTuple. This is the reverse of + * brin_form_tuple. + * + * Note we don't need the "on disk tupdesc" here; we rely on our own routine to + * deconstruct the tuple from the on-disk format. + */ +BrinMemTuple * +brin_deform_tuple(BrinDesc *brdesc, BrinTuple *tuple) +{ + BrinMemTuple *dtup; + Datum *values; + bool *allnulls; + bool *hasnulls; + char *tp; + bits8 *nullbits; + int keyno; + int valueno; + MemoryContext oldcxt; + + dtup = brin_new_memtuple(brdesc); + + if (BrinTupleIsPlaceholder(tuple)) + dtup->bt_placeholder = true; + dtup->bt_blkno = tuple->bt_blkno; + + values = palloc(sizeof(Datum) * brdesc->bd_totalstored); + allnulls = palloc(sizeof(bool) * brdesc->bd_tupdesc->natts); + hasnulls = palloc(sizeof(bool) * brdesc->bd_tupdesc->natts); + + tp = (char *) tuple + BrinTupleDataOffset(tuple); + + if (BrinTupleHasNulls(tuple)) + nullbits = (bits8 *) ((char *) tuple + SizeOfBrinTuple); + else + nullbits = NULL; + brin_deconstruct_tuple(brdesc, + tp, nullbits, BrinTupleHasNulls(tuple), + values, allnulls, hasnulls); + + /* + * Iterate to assign each of the values to the corresponding item in the + * values array of each column. The copies occur in the tuple's context. + */ + oldcxt = MemoryContextSwitchTo(dtup->bt_context); + for (valueno = 0, keyno = 0; keyno < brdesc->bd_tupdesc->natts; keyno++) + { + int i; + + if (allnulls[keyno]) + { + valueno += brdesc->bd_info[keyno]->oi_nstored; + continue; + } + + /* + * We would like to skip datumCopy'ing the values datum in some cases, + * caller permitting ... + */ + for (i = 0; i < brdesc->bd_info[keyno]->oi_nstored; i++) + dtup->bt_columns[keyno].bv_values[i] = + datumCopy(values[valueno++], + brdesc->bd_tupdesc->attrs[keyno]->attbyval, + brdesc->bd_tupdesc->attrs[keyno]->attlen); + + dtup->bt_columns[keyno].bv_hasnulls = hasnulls[keyno]; + dtup->bt_columns[keyno].bv_allnulls = false; + } + + MemoryContextSwitchTo(oldcxt); + + pfree(values); + pfree(allnulls); + pfree(hasnulls); + + return dtup; +} + +/* + * brin_deconstruct_tuple + * Guts of attribute extraction from an on-disk BRIN tuple. + * + * Its arguments are: + * brdesc BRIN descriptor for the stored tuple + * tp pointer to the tuple data area + * nullbits pointer to the tuple nulls bitmask + * nulls "has nulls" bit in tuple infomask + * values output values, array of size brdesc->bd_totalstored + * allnulls output "allnulls", size brdesc->bd_tupdesc->natts + * hasnulls output "hasnulls", size brdesc->bd_tupdesc->natts + * + * Output arrays must have been allocated by caller. + */ +static inline void +brin_deconstruct_tuple(BrinDesc *brdesc, + char *tp, bits8 *nullbits, bool nulls, + Datum *values, bool *allnulls, bool *hasnulls) +{ + int attnum; + int stored; + TupleDesc diskdsc; + long off; + + /* + * First iterate to natts to obtain both null flags for each attribute. + * Note that we reverse the sense of the att_isnull test, because we store + * 1 for a null value (rather than a 1 for a not null value as is the + * att_isnull convention used elsewhere.) See brin_form_tuple. + */ + for (attnum = 0; attnum < brdesc->bd_tupdesc->natts; attnum++) + { + /* + * the "all nulls" bit means that all values in the page range for + * this column are nulls. Therefore there are no values in the tuple + * data area. + */ + allnulls[attnum] = nulls && !att_isnull(attnum, nullbits); + + /* + * the "has nulls" bit means that some tuples have nulls, but others + * have not-null values. Therefore we know the tuple contains data + * for this column. + * + * The hasnulls bits follow the allnulls bits in the same bitmask. + */ + hasnulls[attnum] = + nulls && !att_isnull(brdesc->bd_tupdesc->natts + attnum, nullbits); + } + + /* + * Iterate to obtain each attribute's stored values. Note that since we + * may reuse attribute entries for more than one column, we cannot cache + * offsets here. + */ + diskdsc = brtuple_disk_tupdesc(brdesc); + stored = 0; + off = 0; + for (attnum = 0; attnum < brdesc->bd_tupdesc->natts; attnum++) + { + int datumno; + + if (allnulls[attnum]) + { + stored += brdesc->bd_info[attnum]->oi_nstored; + continue; + } + + for (datumno = 0; + datumno < brdesc->bd_info[attnum]->oi_nstored; + datumno++) + { + Form_pg_attribute thisatt = diskdsc->attrs[stored]; + + if (thisatt->attlen == -1) + { + off = att_align_pointer(off, thisatt->attalign, -1, + tp + off); + } + else + { + /* not varlena, so safe to use att_align_nominal */ + off = att_align_nominal(off, thisatt->attalign); + } + + values[stored++] = fetchatt(thisatt, tp + off); + + off = att_addlength_pointer(off, thisatt->attlen, tp + off); + } + } +} |