diff options
Diffstat (limited to 'reftable/block.c')
-rw-r--r-- | reftable/block.c | 655 |
1 files changed, 655 insertions, 0 deletions
diff --git a/reftable/block.c b/reftable/block.c new file mode 100644 index 0000000000..920b3f4486 --- /dev/null +++ b/reftable/block.c @@ -0,0 +1,655 @@ +/* + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file or at + * https://developers.google.com/open-source/licenses/bsd + */ + +#include "block.h" + +#include "blocksource.h" +#include "constants.h" +#include "iter.h" +#include "record.h" +#include "reftable-error.h" +#include "system.h" + +size_t header_size(int version) +{ + switch (version) { + case 1: + return 24; + case 2: + return 28; + } + abort(); +} + +size_t footer_size(int version) +{ + switch (version) { + case 1: + return 68; + case 2: + return 72; + } + abort(); +} + +static int block_writer_register_restart(struct block_writer *w, int n, + int is_restart, struct reftable_buf *key) +{ + uint32_t rlen; + int err; + + rlen = w->restart_len; + if (rlen >= MAX_RESTARTS) + is_restart = 0; + + if (is_restart) + rlen++; + if (2 + 3 * rlen + n > w->block_size - w->next) + return REFTABLE_ENTRY_TOO_BIG_ERROR; + if (is_restart) { + REFTABLE_ALLOC_GROW_OR_NULL(w->restarts, w->restart_len + 1, + w->restart_cap); + if (!w->restarts) + return REFTABLE_OUT_OF_MEMORY_ERROR; + w->restarts[w->restart_len++] = w->next; + } + + w->next += n; + + reftable_buf_reset(&w->last_key); + err = reftable_buf_add(&w->last_key, key->buf, key->len); + if (err < 0) + return err; + + w->entries++; + return 0; +} + +int block_writer_init(struct block_writer *bw, uint8_t typ, uint8_t *block, + uint32_t block_size, uint32_t header_off, uint32_t hash_size) +{ + bw->block = block; + bw->hash_size = hash_size; + bw->block_size = block_size; + bw->header_off = header_off; + bw->block[header_off] = typ; + bw->next = header_off + 4; + bw->restart_interval = 16; + bw->entries = 0; + bw->restart_len = 0; + bw->last_key.len = 0; + if (!bw->zstream) { + REFTABLE_CALLOC_ARRAY(bw->zstream, 1); + if (!bw->zstream) + return REFTABLE_OUT_OF_MEMORY_ERROR; + deflateInit(bw->zstream, 9); + } + + return 0; +} + +uint8_t block_writer_type(struct block_writer *bw) +{ + return bw->block[bw->header_off]; +} + +/* + * Adds the reftable_record to the block. Returns 0 on success and + * appropriate error codes on failure. + */ +int block_writer_add(struct block_writer *w, struct reftable_record *rec) +{ + struct reftable_buf empty = REFTABLE_BUF_INIT; + struct reftable_buf last = + w->entries % w->restart_interval == 0 ? empty : w->last_key; + struct string_view out = { + .buf = w->block + w->next, + .len = w->block_size - w->next, + }; + struct string_view start = out; + int is_restart = 0; + int n = 0; + int err; + + err = reftable_record_key(rec, &w->scratch); + if (err < 0) + goto done; + + if (!w->scratch.len) { + err = REFTABLE_API_ERROR; + goto done; + } + + n = reftable_encode_key(&is_restart, out, last, w->scratch, + reftable_record_val_type(rec)); + if (n < 0) { + err = n; + goto done; + } + string_view_consume(&out, n); + + n = reftable_record_encode(rec, out, w->hash_size); + if (n < 0) { + err = n; + goto done; + } + string_view_consume(&out, n); + + err = block_writer_register_restart(w, start.len - out.len, is_restart, + &w->scratch); +done: + return err; +} + +int block_writer_finish(struct block_writer *w) +{ + for (uint32_t i = 0; i < w->restart_len; i++) { + reftable_put_be24(w->block + w->next, w->restarts[i]); + w->next += 3; + } + + reftable_put_be16(w->block + w->next, w->restart_len); + w->next += 2; + reftable_put_be24(w->block + 1 + w->header_off, w->next); + + /* + * Log records are stored zlib-compressed. Note that the compression + * also spans over the restart points we have just written. + */ + if (block_writer_type(w) == REFTABLE_BLOCK_TYPE_LOG) { + int block_header_skip = 4 + w->header_off; + uLongf src_len = w->next - block_header_skip, compressed_len; + int ret; + + ret = deflateReset(w->zstream); + if (ret != Z_OK) + return REFTABLE_ZLIB_ERROR; + + /* + * Precompute the upper bound of how many bytes the compressed + * data may end up with. Combined with `Z_FINISH`, `deflate()` + * is guaranteed to return `Z_STREAM_END`. + */ + compressed_len = deflateBound(w->zstream, src_len); + REFTABLE_ALLOC_GROW_OR_NULL(w->compressed, compressed_len, + w->compressed_cap); + if (!w->compressed) { + ret = REFTABLE_OUT_OF_MEMORY_ERROR; + return ret; + } + + w->zstream->next_out = w->compressed; + w->zstream->avail_out = compressed_len; + w->zstream->next_in = w->block + block_header_skip; + w->zstream->avail_in = src_len; + + /* + * We want to perform all decompression in a single step, which + * is why we can pass Z_FINISH here. As we have precomputed the + * deflated buffer's size via `deflateBound()` this function is + * guaranteed to succeed according to the zlib documentation. + */ + ret = deflate(w->zstream, Z_FINISH); + if (ret != Z_STREAM_END) + return REFTABLE_ZLIB_ERROR; + + /* + * Overwrite the uncompressed data we have already written and + * adjust the `next` pointer to point right after the + * compressed data. + */ + memcpy(w->block + block_header_skip, w->compressed, + w->zstream->total_out); + w->next = w->zstream->total_out + block_header_skip; + } + + return w->next; +} + +static int read_block(struct reftable_block_source *source, + struct reftable_block_data *dest, uint64_t off, + uint32_t sz) +{ + size_t size = block_source_size(source); + block_source_release_data(dest); + if (off >= size) + return 0; + if (off + sz > size) + sz = size - off; + return block_source_read_data(source, dest, off, sz); +} + +int reftable_block_init(struct reftable_block *block, + struct reftable_block_source *source, + uint32_t offset, uint32_t header_size, + uint32_t table_block_size, uint32_t hash_size, + uint8_t want_type) +{ + uint32_t guess_block_size = table_block_size ? + table_block_size : DEFAULT_BLOCK_SIZE; + uint32_t full_block_size = table_block_size; + uint16_t restart_count; + uint32_t restart_off; + uint32_t block_size; + uint8_t block_type; + int err; + + err = read_block(source, &block->block_data, offset, guess_block_size); + if (err < 0) + goto done; + + block_type = block->block_data.data[header_size]; + if (!reftable_is_block_type(block_type)) { + err = REFTABLE_FORMAT_ERROR; + goto done; + } + if (want_type != REFTABLE_BLOCK_TYPE_ANY && block_type != want_type) { + err = 1; + goto done; + } + + block_size = reftable_get_be24(block->block_data.data + header_size + 1); + if (block_size > guess_block_size) { + err = read_block(source, &block->block_data, offset, block_size); + if (err < 0) + goto done; + } + + if (block_type == REFTABLE_BLOCK_TYPE_LOG) { + uint32_t block_header_skip = 4 + header_size; + uLong dst_len = block_size - block_header_skip; + uLong src_len = block->block_data.len - block_header_skip; + + /* Log blocks specify the *uncompressed* size in their header. */ + REFTABLE_ALLOC_GROW_OR_NULL(block->uncompressed_data, block_size, + block->uncompressed_cap); + if (!block->uncompressed_data) { + err = REFTABLE_OUT_OF_MEMORY_ERROR; + goto done; + } + + /* Copy over the block header verbatim. It's not compressed. */ + memcpy(block->uncompressed_data, block->block_data.data, block_header_skip); + + if (!block->zstream) { + REFTABLE_CALLOC_ARRAY(block->zstream, 1); + if (!block->zstream) { + err = REFTABLE_OUT_OF_MEMORY_ERROR; + goto done; + } + + err = inflateInit(block->zstream); + } else { + err = inflateReset(block->zstream); + } + if (err != Z_OK) { + err = REFTABLE_ZLIB_ERROR; + goto done; + } + + block->zstream->next_in = block->block_data.data + block_header_skip; + block->zstream->avail_in = src_len; + block->zstream->next_out = block->uncompressed_data + block_header_skip; + block->zstream->avail_out = dst_len; + + /* + * We know both input as well as output size, and we know that + * the sizes should never be bigger than `uInt_MAX` because + * blocks can at most be 16MB large. We can thus use `Z_FINISH` + * here to instruct zlib to inflate the data in one go, which + * is more efficient than using `Z_NO_FLUSH`. + */ + err = inflate(block->zstream, Z_FINISH); + if (err != Z_STREAM_END) { + err = REFTABLE_ZLIB_ERROR; + goto done; + } + err = 0; + + if (block->zstream->total_out + block_header_skip != block_size) { + err = REFTABLE_FORMAT_ERROR; + goto done; + } + + /* We're done with the input data. */ + block_source_release_data(&block->block_data); + block->block_data.data = block->uncompressed_data; + block->block_data.len = block_size; + full_block_size = src_len + block_header_skip - block->zstream->avail_in; + } else if (full_block_size == 0) { + full_block_size = block_size; + } else if (block_size < full_block_size && block_size < block->block_data.len && + block->block_data.data[block_size] != 0) { + /* If the block is smaller than the full block size, it is + padded (data followed by '\0') or the next block is + unaligned. */ + full_block_size = block_size; + } + + restart_count = reftable_get_be16(block->block_data.data + block_size - 2); + restart_off = block_size - 2 - 3 * restart_count; + + block->block_type = block_type; + block->hash_size = hash_size; + block->restart_off = restart_off; + block->full_block_size = full_block_size; + block->header_off = header_size; + block->restart_count = restart_count; + + err = 0; + +done: + if (err < 0) + reftable_block_release(block); + return err; +} + +void reftable_block_release(struct reftable_block *block) +{ + inflateEnd(block->zstream); + reftable_free(block->zstream); + reftable_free(block->uncompressed_data); + block_source_release_data(&block->block_data); + memset(block, 0, sizeof(*block)); +} + +uint8_t reftable_block_type(const struct reftable_block *b) +{ + return b->block_data.data[b->header_off]; +} + +int reftable_block_first_key(const struct reftable_block *block, struct reftable_buf *key) +{ + int off = block->header_off + 4, n; + struct string_view in = { + .buf = block->block_data.data + off, + .len = block->restart_off - off, + }; + uint8_t extra = 0; + + reftable_buf_reset(key); + + n = reftable_decode_key(key, &extra, in); + if (n < 0) + return n; + if (!key->len) + return REFTABLE_FORMAT_ERROR; + + return 0; +} + +static uint32_t block_restart_offset(const struct reftable_block *b, size_t idx) +{ + return reftable_get_be24(b->block_data.data + b->restart_off + 3 * idx); +} + +void block_iter_init(struct block_iter *it, const struct reftable_block *block) +{ + it->block = block; + block_iter_seek_start(it); +} + +void block_iter_seek_start(struct block_iter *it) +{ + reftable_buf_reset(&it->last_key); + it->next_off = it->block->header_off + 4; +} + +struct restart_needle_less_args { + int error; + struct reftable_buf needle; + const struct reftable_block *block; +}; + +static int restart_needle_less(size_t idx, void *_args) +{ + struct restart_needle_less_args *args = _args; + uint32_t off = block_restart_offset(args->block, idx); + struct string_view in = { + .buf = args->block->block_data.data + off, + .len = args->block->restart_off - off, + }; + uint64_t prefix_len, suffix_len; + uint8_t extra; + int n; + + /* + * Records at restart points are stored without prefix compression, so + * there is no need to fully decode the record key here. This removes + * the need for allocating memory. + */ + n = reftable_decode_keylen(in, &prefix_len, &suffix_len, &extra); + if (n < 0 || prefix_len) { + args->error = 1; + return -1; + } + + string_view_consume(&in, n); + if (suffix_len > in.len) { + args->error = 1; + return -1; + } + + n = memcmp(args->needle.buf, in.buf, + args->needle.len < suffix_len ? args->needle.len : suffix_len); + if (n) + return n < 0; + return args->needle.len < suffix_len; +} + +int block_iter_next(struct block_iter *it, struct reftable_record *rec) +{ + struct string_view in = { + .buf = (unsigned char *) it->block->block_data.data + it->next_off, + .len = it->block->restart_off - it->next_off, + }; + struct string_view start = in; + uint8_t extra = 0; + int n = 0; + + if (it->next_off >= it->block->restart_off) + return 1; + + n = reftable_decode_key(&it->last_key, &extra, in); + if (n < 0) + return -1; + if (!it->last_key.len) + return REFTABLE_FORMAT_ERROR; + + string_view_consume(&in, n); + n = reftable_record_decode(rec, it->last_key, extra, in, it->block->hash_size, + &it->scratch); + if (n < 0) + return -1; + string_view_consume(&in, n); + + it->next_off += start.len - in.len; + return 0; +} + +void block_iter_reset(struct block_iter *it) +{ + reftable_buf_reset(&it->last_key); + it->next_off = 0; + it->block = NULL; +} + +void block_iter_close(struct block_iter *it) +{ + reftable_buf_release(&it->last_key); + reftable_buf_release(&it->scratch); +} + +int block_iter_seek_key(struct block_iter *it, struct reftable_buf *want) +{ + struct restart_needle_less_args args = { + .needle = *want, + .block = it->block, + }; + struct reftable_record rec; + int err = 0; + size_t i; + + /* + * Perform a binary search over the block's restart points, which + * avoids doing a linear scan over the whole block. Like this, we + * identify the section of the block that should contain our key. + * + * Note that we explicitly search for the first restart point _greater_ + * than the sought-after record, not _greater or equal_ to it. In case + * the sought-after record is located directly at the restart point we + * would otherwise start doing the linear search at the preceding + * restart point. While that works alright, we would end up scanning + * too many record. + */ + i = binsearch(it->block->restart_count, &restart_needle_less, &args); + if (args.error) { + err = REFTABLE_FORMAT_ERROR; + goto done; + } + + /* + * Now there are multiple cases: + * + * - `i == 0`: The wanted record is smaller than the record found at + * the first restart point. As the first restart point is the first + * record in the block, our wanted record cannot be located in this + * block at all. We still need to position the iterator so that the + * next call to `block_iter_next()` will yield an end-of-iterator + * signal. + * + * - `i == restart_count`: The wanted record was not found at any of + * the restart points. As there is no restart point at the end of + * the section the record may thus be contained in the last block. + * + * - `i > 0`: The wanted record must be contained in the section + * before the found restart point. We thus do a linear search + * starting from the preceding restart point. + */ + if (i > 0) + it->next_off = block_restart_offset(it->block, i - 1); + else + it->next_off = it->block->header_off + 4; + + err = reftable_record_init(&rec, reftable_block_type(it->block)); + if (err < 0) + goto done; + + /* + * We're looking for the last entry less than the wanted key so that + * the next call to `block_reader_next()` would yield the wanted + * record. We thus don't want to position our iterator at the sought + * after record, but one before. To do so, we have to go one entry too + * far and then back up. + */ + while (1) { + size_t prev_off = it->next_off; + + err = block_iter_next(it, &rec); + if (err < 0) + goto done; + if (err > 0) { + it->next_off = prev_off; + err = 0; + goto done; + } + + err = reftable_record_key(&rec, &it->last_key); + if (err < 0) + goto done; + + /* + * Check whether the current key is greater or equal to the + * sought-after key. In case it is greater we know that the + * record does not exist in the block and can thus abort early. + * In case it is equal to the sought-after key we have found + * the desired record. + * + * Note that we store the next record's key record directly in + * `last_key` without restoring the key of the preceding record + * in case we need to go one record back. This is safe to do as + * `block_iter_next()` would return the ref whose key is equal + * to `last_key` now, and naturally all keys share a prefix + * with themselves. + */ + if (reftable_buf_cmp(&it->last_key, want) >= 0) { + it->next_off = prev_off; + goto done; + } + } + +done: + reftable_record_release(&rec); + return err; +} + +static int block_iter_seek_void(void *it, struct reftable_record *want) +{ + struct reftable_buf buf = REFTABLE_BUF_INIT; + struct block_iter *bi = it; + int err; + + if (bi->block->block_type != want->type) + return REFTABLE_API_ERROR; + + err = reftable_record_key(want, &buf); + if (err < 0) + goto out; + + err = block_iter_seek_key(it, &buf); + if (err < 0) + goto out; + + err = 0; + +out: + reftable_buf_release(&buf); + return err; +} + +static int block_iter_next_void(void *it, struct reftable_record *rec) +{ + return block_iter_next(it, rec); +} + +static void block_iter_close_void(void *it) +{ + block_iter_close(it); +} + +static struct reftable_iterator_vtable block_iter_vtable = { + .seek = &block_iter_seek_void, + .next = &block_iter_next_void, + .close = &block_iter_close_void, +}; + +int reftable_block_init_iterator(const struct reftable_block *b, + struct reftable_iterator *it) +{ + struct block_iter *bi; + + REFTABLE_CALLOC_ARRAY(bi, 1); + block_iter_init(bi, b); + + assert(!it->ops); + it->iter_arg = bi; + it->ops = &block_iter_vtable; + + return 0; +} + +void block_writer_release(struct block_writer *bw) +{ + deflateEnd(bw->zstream); + REFTABLE_FREE_AND_NULL(bw->zstream); + REFTABLE_FREE_AND_NULL(bw->restarts); + REFTABLE_FREE_AND_NULL(bw->compressed); + reftable_buf_release(&bw->scratch); + reftable_buf_release(&bw->last_key); + /* the block is not owned. */ +} |