summaryrefslogtreecommitdiff
path: root/reftable/table.c
diff options
context:
space:
mode:
Diffstat (limited to 'reftable/table.c')
-rw-r--r--reftable/table.c779
1 files changed, 779 insertions, 0 deletions
diff --git a/reftable/table.c b/reftable/table.c
new file mode 100644
index 0000000000..56362df0ed
--- /dev/null
+++ b/reftable/table.c
@@ -0,0 +1,779 @@
+/*
+ * Copyright 2020 Google LLC
+ *
+ * Use of this source code is governed by a BSD-style
+ * license that can be found in the LICENSE file or at
+ * https://developers.google.com/open-source/licenses/bsd
+ */
+
+#include "table.h"
+
+#include "system.h"
+#include "block.h"
+#include "blocksource.h"
+#include "constants.h"
+#include "iter.h"
+#include "record.h"
+#include "reftable-error.h"
+
+static struct reftable_table_offsets *
+table_offsets_for(struct reftable_table *t, uint8_t typ)
+{
+ switch (typ) {
+ case REFTABLE_BLOCK_TYPE_REF:
+ return &t->ref_offsets;
+ case REFTABLE_BLOCK_TYPE_LOG:
+ return &t->log_offsets;
+ case REFTABLE_BLOCK_TYPE_OBJ:
+ return &t->obj_offsets;
+ }
+ abort();
+}
+
+enum reftable_hash reftable_table_hash_id(struct reftable_table *t)
+{
+ return t->hash_id;
+}
+
+const char *reftable_table_name(struct reftable_table *t)
+{
+ return t->name;
+}
+
+static int parse_footer(struct reftable_table *t, uint8_t *footer,
+ uint8_t *header)
+{
+ uint8_t *f = footer;
+ uint8_t first_block_typ;
+ int err = 0;
+ uint32_t computed_crc;
+ uint32_t file_crc;
+
+ if (memcmp(f, "REFT", 4)) {
+ err = REFTABLE_FORMAT_ERROR;
+ goto done;
+ }
+ f += 4;
+
+ if (memcmp(footer, header, header_size(t->version))) {
+ err = REFTABLE_FORMAT_ERROR;
+ goto done;
+ }
+
+ f++;
+ t->block_size = reftable_get_be24(f);
+
+ f += 3;
+ t->min_update_index = reftable_get_be64(f);
+ f += 8;
+ t->max_update_index = reftable_get_be64(f);
+ f += 8;
+
+ if (t->version == 1) {
+ t->hash_id = REFTABLE_HASH_SHA1;
+ } else {
+ switch (reftable_get_be32(f)) {
+ case REFTABLE_FORMAT_ID_SHA1:
+ t->hash_id = REFTABLE_HASH_SHA1;
+ break;
+ case REFTABLE_FORMAT_ID_SHA256:
+ t->hash_id = REFTABLE_HASH_SHA256;
+ break;
+ default:
+ err = REFTABLE_FORMAT_ERROR;
+ goto done;
+ }
+
+ f += 4;
+ }
+
+ t->ref_offsets.index_offset = reftable_get_be64(f);
+ f += 8;
+
+ t->obj_offsets.offset = reftable_get_be64(f);
+ f += 8;
+
+ t->object_id_len = t->obj_offsets.offset & ((1 << 5) - 1);
+ t->obj_offsets.offset >>= 5;
+
+ t->obj_offsets.index_offset = reftable_get_be64(f);
+ f += 8;
+ t->log_offsets.offset = reftable_get_be64(f);
+ f += 8;
+ t->log_offsets.index_offset = reftable_get_be64(f);
+ f += 8;
+
+ computed_crc = crc32(0, footer, f - footer);
+ file_crc = reftable_get_be32(f);
+ f += 4;
+ if (computed_crc != file_crc) {
+ err = REFTABLE_FORMAT_ERROR;
+ goto done;
+ }
+
+ first_block_typ = header[header_size(t->version)];
+ t->ref_offsets.is_present = (first_block_typ == REFTABLE_BLOCK_TYPE_REF);
+ t->ref_offsets.offset = 0;
+ t->log_offsets.is_present = (first_block_typ == REFTABLE_BLOCK_TYPE_LOG ||
+ t->log_offsets.offset > 0);
+ t->obj_offsets.is_present = t->obj_offsets.offset > 0;
+ if (t->obj_offsets.is_present && !t->object_id_len) {
+ err = REFTABLE_FORMAT_ERROR;
+ goto done;
+ }
+
+ err = 0;
+done:
+ return err;
+}
+
+struct table_iter {
+ struct reftable_table *table;
+ uint8_t typ;
+ uint64_t block_off;
+ struct reftable_block block;
+ struct block_iter bi;
+ int is_finished;
+};
+
+static int table_iter_init(struct table_iter *ti, struct reftable_table *t)
+{
+ struct block_iter bi = BLOCK_ITER_INIT;
+ memset(ti, 0, sizeof(*ti));
+ reftable_table_incref(t);
+ ti->table = t;
+ ti->bi = bi;
+ return 0;
+}
+
+static int table_iter_next_in_block(struct table_iter *ti,
+ struct reftable_record *rec)
+{
+ int res = block_iter_next(&ti->bi, rec);
+ if (res == 0 && reftable_record_type(rec) == REFTABLE_BLOCK_TYPE_REF) {
+ rec->u.ref.update_index += ti->table->min_update_index;
+ }
+
+ return res;
+}
+
+static void table_iter_block_done(struct table_iter *ti)
+{
+ reftable_block_release(&ti->block);
+ block_iter_reset(&ti->bi);
+}
+
+int table_init_block(struct reftable_table *t, struct reftable_block *block,
+ uint64_t next_off, uint8_t want_typ)
+{
+ uint32_t header_off = next_off ? 0 : header_size(t->version);
+ int err;
+
+ if (next_off >= t->size)
+ return 1;
+
+ err = reftable_block_init(block, &t->source, next_off, header_off,
+ t->block_size, hash_size(t->hash_id), want_typ);
+ if (err)
+ reftable_block_release(block);
+ return err;
+}
+
+static void table_iter_close(struct table_iter *ti)
+{
+ table_iter_block_done(ti);
+ block_iter_close(&ti->bi);
+ reftable_table_decref(ti->table);
+}
+
+static int table_iter_next_block(struct table_iter *ti)
+{
+ uint64_t next_block_off = ti->block_off + ti->block.full_block_size;
+ int err;
+
+ err = table_init_block(ti->table, &ti->block, next_block_off, ti->typ);
+ if (err > 0)
+ ti->is_finished = 1;
+ if (err)
+ return err;
+
+ ti->block_off = next_block_off;
+ ti->is_finished = 0;
+ block_iter_init(&ti->bi, &ti->block);
+
+ return 0;
+}
+
+static int table_iter_next(struct table_iter *ti, struct reftable_record *rec)
+{
+ if (reftable_record_type(rec) != ti->typ)
+ return REFTABLE_API_ERROR;
+
+ while (1) {
+ int err;
+
+ if (ti->is_finished)
+ return 1;
+
+ /*
+ * Check whether the current block still has more records. If
+ * so, return it. If the iterator returns positive then the
+ * current block has been exhausted.
+ */
+ err = table_iter_next_in_block(ti, rec);
+ if (err <= 0)
+ return err;
+
+ /*
+ * Otherwise, we need to continue to the next block in the
+ * table and retry. If there are no more blocks then the
+ * iterator is drained.
+ */
+ err = table_iter_next_block(ti);
+ if (err) {
+ ti->is_finished = 1;
+ return err;
+ }
+ }
+}
+
+static int table_iter_seek_to(struct table_iter *ti, uint64_t off, uint8_t typ)
+{
+ int err;
+
+ err = table_init_block(ti->table, &ti->block, off, typ);
+ if (err != 0)
+ return err;
+
+ ti->typ = reftable_block_type(&ti->block);
+ ti->block_off = off;
+ block_iter_init(&ti->bi, &ti->block);
+ ti->is_finished = 0;
+ return 0;
+}
+
+static int table_iter_seek_start(struct table_iter *ti, uint8_t typ, int index)
+{
+ struct reftable_table_offsets *offs = table_offsets_for(ti->table, typ);
+ uint64_t off = offs->offset;
+ if (index) {
+ off = offs->index_offset;
+ if (off == 0) {
+ return 1;
+ }
+ typ = REFTABLE_BLOCK_TYPE_INDEX;
+ }
+
+ return table_iter_seek_to(ti, off, typ);
+}
+
+static int table_iter_seek_linear(struct table_iter *ti,
+ struct reftable_record *want)
+{
+ struct reftable_buf want_key = REFTABLE_BUF_INIT;
+ struct reftable_buf got_key = REFTABLE_BUF_INIT;
+ struct reftable_record rec;
+ int err;
+
+ err = reftable_record_init(&rec, reftable_record_type(want));
+ if (err < 0)
+ goto done;
+
+ err = reftable_record_key(want, &want_key);
+ if (err < 0)
+ goto done;
+
+ /*
+ * First we need to locate the block that must contain our record. To
+ * do so we scan through blocks linearly until we find the first block
+ * whose first key is bigger than our wanted key. Once we have found
+ * that block we know that the key must be contained in the preceding
+ * block.
+ *
+ * This algorithm is somewhat unfortunate because it means that we
+ * always have to seek one block too far and then back up. But as we
+ * can only decode the _first_ key of a block but not its _last_ key we
+ * have no other way to do this.
+ */
+ while (1) {
+ struct table_iter next = *ti;
+
+ /*
+ * We must be careful to not modify underlying data of `ti`
+ * because we may find that `next` does not contain our desired
+ * block, but that `ti` does. In that case, we would discard
+ * `next` and continue with `ti`.
+ *
+ * This also means that we cannot reuse allocated memory for
+ * `next` here. While it would be great if we could, it should
+ * in practice not be too bad given that we should only ever
+ * end up doing linear seeks with at most three blocks. As soon
+ * as we have more than three blocks we would have an index, so
+ * we would not do a linear search there anymore.
+ */
+ memset(&next.block.block_data, 0, sizeof(next.block.block_data));
+ next.block.zstream = NULL;
+ next.block.uncompressed_data = NULL;
+ next.block.uncompressed_cap = 0;
+
+ err = table_iter_next_block(&next);
+ if (err < 0)
+ goto done;
+ if (err > 0)
+ break;
+
+ err = reftable_block_first_key(&next.block, &got_key);
+ if (err < 0)
+ goto done;
+
+ if (reftable_buf_cmp(&got_key, &want_key) > 0) {
+ table_iter_block_done(&next);
+ break;
+ }
+
+ table_iter_block_done(ti);
+ *ti = next;
+ }
+
+ /*
+ * We have located the block that must contain our record, so we seek
+ * the wanted key inside of it. If the block does not contain our key
+ * we know that the corresponding record does not exist.
+ */
+ block_iter_init(&ti->bi, &ti->block);
+ err = block_iter_seek_key(&ti->bi, &want_key);
+ if (err < 0)
+ goto done;
+ err = 0;
+
+done:
+ reftable_record_release(&rec);
+ reftable_buf_release(&want_key);
+ reftable_buf_release(&got_key);
+ return err;
+}
+
+static int table_iter_seek_indexed(struct table_iter *ti,
+ struct reftable_record *rec)
+{
+ struct reftable_record want_index = {
+ .type = REFTABLE_BLOCK_TYPE_INDEX, .u.idx = { .last_key = REFTABLE_BUF_INIT }
+ };
+ struct reftable_record index_result = {
+ .type = REFTABLE_BLOCK_TYPE_INDEX,
+ .u.idx = { .last_key = REFTABLE_BUF_INIT },
+ };
+ int err;
+
+ err = reftable_record_key(rec, &want_index.u.idx.last_key);
+ if (err < 0)
+ goto done;
+
+ /*
+ * The index may consist of multiple levels, where each level may have
+ * multiple index blocks. We start by doing a linear search in the
+ * highest layer that identifies the relevant index block as well as
+ * the record inside that block that corresponds to our wanted key.
+ */
+ err = table_iter_seek_linear(ti, &want_index);
+ if (err < 0)
+ goto done;
+
+ /*
+ * Traverse down the levels until we find a non-index entry.
+ */
+ while (1) {
+ /*
+ * In case we seek a record that does not exist the index iter
+ * will tell us that the iterator is over. This works because
+ * the last index entry of the current level will contain the
+ * last key it knows about. So in case our seeked key is larger
+ * than the last indexed key we know that it won't exist.
+ *
+ * There is one subtlety in the layout of the index section
+ * that makes this work as expected: the highest-level index is
+ * at end of the section and will point backwards and thus we
+ * start reading from the end of the index section, not the
+ * beginning.
+ *
+ * If that wasn't the case and the order was reversed then the
+ * linear seek would seek into the lower levels and traverse
+ * all levels of the index only to find out that the key does
+ * not exist.
+ */
+ err = table_iter_next(ti, &index_result);
+ if (err != 0)
+ goto done;
+
+ err = table_iter_seek_to(ti, index_result.u.idx.offset, 0);
+ if (err != 0)
+ goto done;
+
+ block_iter_init(&ti->bi, &ti->block);
+
+ err = block_iter_seek_key(&ti->bi, &want_index.u.idx.last_key);
+ if (err < 0)
+ goto done;
+
+ if (ti->typ == reftable_record_type(rec)) {
+ err = 0;
+ break;
+ }
+
+ if (ti->typ != REFTABLE_BLOCK_TYPE_INDEX) {
+ err = REFTABLE_FORMAT_ERROR;
+ goto done;
+ }
+ }
+
+done:
+ reftable_record_release(&want_index);
+ reftable_record_release(&index_result);
+ return err;
+}
+
+static int table_iter_seek(struct table_iter *ti,
+ struct reftable_record *want)
+{
+ uint8_t typ = reftable_record_type(want);
+ struct reftable_table_offsets *offs = table_offsets_for(ti->table, typ);
+ int err;
+
+ err = table_iter_seek_start(ti, reftable_record_type(want),
+ !!offs->index_offset);
+ if (err < 0)
+ goto out;
+
+ if (offs->index_offset)
+ err = table_iter_seek_indexed(ti, want);
+ else
+ err = table_iter_seek_linear(ti, want);
+ if (err)
+ goto out;
+
+out:
+ return err;
+}
+
+static int table_iter_seek_void(void *ti, struct reftable_record *want)
+{
+ return table_iter_seek(ti, want);
+}
+
+static int table_iter_next_void(void *ti, struct reftable_record *rec)
+{
+ return table_iter_next(ti, rec);
+}
+
+static void table_iter_close_void(void *ti)
+{
+ table_iter_close(ti);
+}
+
+static struct reftable_iterator_vtable table_iter_vtable = {
+ .seek = &table_iter_seek_void,
+ .next = &table_iter_next_void,
+ .close = &table_iter_close_void,
+};
+
+static void iterator_from_table_iter(struct reftable_iterator *it,
+ struct table_iter *ti)
+{
+ assert(!it->ops);
+ it->iter_arg = ti;
+ it->ops = &table_iter_vtable;
+}
+
+int table_init_iter(struct reftable_table *t,
+ struct reftable_iterator *it,
+ uint8_t typ)
+{
+ struct reftable_table_offsets *offs = table_offsets_for(t, typ);
+
+ if (offs->is_present) {
+ struct table_iter *ti;
+ REFTABLE_ALLOC_ARRAY(ti, 1);
+ if (!ti)
+ return REFTABLE_OUT_OF_MEMORY_ERROR;
+
+ table_iter_init(ti, t);
+ iterator_from_table_iter(it, ti);
+ } else {
+ iterator_set_empty(it);
+ }
+
+ return 0;
+}
+
+int reftable_table_init_ref_iterator(struct reftable_table *t,
+ struct reftable_iterator *it)
+{
+ return table_init_iter(t, it, REFTABLE_BLOCK_TYPE_REF);
+}
+
+int reftable_table_init_log_iterator(struct reftable_table *t,
+ struct reftable_iterator *it)
+{
+ return table_init_iter(t, it, REFTABLE_BLOCK_TYPE_LOG);
+}
+
+int reftable_table_new(struct reftable_table **out,
+ struct reftable_block_source *source, char const *name)
+{
+ struct reftable_block_data footer = { 0 };
+ struct reftable_block_data header = { 0 };
+ struct reftable_table *t;
+ uint64_t file_size = block_source_size(source);
+ uint32_t read_size;
+ ssize_t bytes_read;
+ int err;
+
+ REFTABLE_CALLOC_ARRAY(t, 1);
+ if (!t) {
+ err = REFTABLE_OUT_OF_MEMORY_ERROR;
+ goto done;
+ }
+
+ /*
+ * We need one extra byte to read the type of first block. We also
+ * pretend to always be reading v2 of the format because it is larger.
+ */
+ read_size = header_size(2) + 1;
+ if (read_size > file_size) {
+ err = REFTABLE_FORMAT_ERROR;
+ goto done;
+ }
+
+ bytes_read = block_source_read_data(source, &header, 0, read_size);
+ if (bytes_read < 0 || (size_t)bytes_read != read_size) {
+ err = REFTABLE_IO_ERROR;
+ goto done;
+ }
+
+ if (memcmp(header.data, "REFT", 4)) {
+ err = REFTABLE_FORMAT_ERROR;
+ goto done;
+ }
+ t->version = header.data[4];
+ if (t->version != 1 && t->version != 2) {
+ err = REFTABLE_FORMAT_ERROR;
+ goto done;
+ }
+
+ t->size = file_size - footer_size(t->version);
+ t->source = *source;
+ t->name = reftable_strdup(name);
+ if (!t->name) {
+ err = REFTABLE_OUT_OF_MEMORY_ERROR;
+ goto done;
+ }
+ t->hash_id = 0;
+ t->refcount = 1;
+
+ bytes_read = block_source_read_data(source, &footer, t->size,
+ footer_size(t->version));
+ if (bytes_read < 0 || (size_t)bytes_read != footer_size(t->version)) {
+ err = REFTABLE_IO_ERROR;
+ goto done;
+ }
+
+ err = parse_footer(t, footer.data, header.data);
+ if (err)
+ goto done;
+
+ *out = t;
+
+done:
+ block_source_release_data(&footer);
+ block_source_release_data(&header);
+ if (err) {
+ if (t)
+ reftable_free(t->name);
+ reftable_free(t);
+ block_source_close(source);
+ }
+ return err;
+}
+
+void reftable_table_incref(struct reftable_table *t)
+{
+ t->refcount++;
+}
+
+void reftable_table_decref(struct reftable_table *t)
+{
+ if (!t)
+ return;
+ if (--t->refcount)
+ return;
+ block_source_close(&t->source);
+ REFTABLE_FREE_AND_NULL(t->name);
+ reftable_free(t);
+}
+
+static int reftable_table_refs_for_indexed(struct reftable_table *t,
+ struct reftable_iterator *it,
+ uint8_t *oid)
+{
+ struct reftable_record want = {
+ .type = REFTABLE_BLOCK_TYPE_OBJ,
+ .u.obj = {
+ .hash_prefix = oid,
+ .hash_prefix_len = t->object_id_len,
+ },
+ };
+ struct reftable_iterator oit = { NULL };
+ struct reftable_record got = {
+ .type = REFTABLE_BLOCK_TYPE_OBJ,
+ .u.obj = { 0 },
+ };
+ int err = 0;
+ struct indexed_table_ref_iter *itr = NULL;
+
+ /* Look through the reverse index. */
+ err = table_init_iter(t, &oit, REFTABLE_BLOCK_TYPE_OBJ);
+ if (err < 0)
+ goto done;
+
+ err = iterator_seek(&oit, &want);
+ if (err != 0)
+ goto done;
+
+ /* read out the reftable_obj_record */
+ err = iterator_next(&oit, &got);
+ if (err < 0)
+ goto done;
+
+ if (err > 0 || memcmp(want.u.obj.hash_prefix, got.u.obj.hash_prefix,
+ t->object_id_len)) {
+ /* didn't find it; return empty iterator */
+ iterator_set_empty(it);
+ err = 0;
+ goto done;
+ }
+
+ err = indexed_table_ref_iter_new(&itr, t, oid, hash_size(t->hash_id),
+ got.u.obj.offsets,
+ got.u.obj.offset_len);
+ if (err < 0)
+ goto done;
+ got.u.obj.offsets = NULL;
+ iterator_from_indexed_table_ref_iter(it, itr);
+
+done:
+ reftable_iterator_destroy(&oit);
+ reftable_record_release(&got);
+ return err;
+}
+
+static int reftable_table_refs_for_unindexed(struct reftable_table *t,
+ struct reftable_iterator *it,
+ uint8_t *oid)
+{
+ struct table_iter *ti;
+ struct filtering_ref_iterator *filter = NULL;
+ struct filtering_ref_iterator empty = FILTERING_REF_ITERATOR_INIT;
+ uint32_t oid_len = hash_size(t->hash_id);
+ int err;
+
+ REFTABLE_ALLOC_ARRAY(ti, 1);
+ if (!ti) {
+ err = REFTABLE_OUT_OF_MEMORY_ERROR;
+ goto out;
+ }
+
+ table_iter_init(ti, t);
+ err = table_iter_seek_start(ti, REFTABLE_BLOCK_TYPE_REF, 0);
+ if (err < 0)
+ goto out;
+
+ filter = reftable_malloc(sizeof(*filter));
+ if (!filter) {
+ err = REFTABLE_OUT_OF_MEMORY_ERROR;
+ goto out;
+ }
+ *filter = empty;
+
+ err = reftable_buf_add(&filter->oid, oid, oid_len);
+ if (err < 0)
+ goto out;
+
+ iterator_from_table_iter(&filter->it, ti);
+
+ iterator_from_filtering_ref_iterator(it, filter);
+
+ err = 0;
+
+out:
+ if (err < 0) {
+ if (ti)
+ table_iter_close(ti);
+ reftable_free(ti);
+ }
+ return err;
+}
+
+int reftable_table_refs_for(struct reftable_table *t,
+ struct reftable_iterator *it, uint8_t *oid)
+{
+ if (t->obj_offsets.is_present)
+ return reftable_table_refs_for_indexed(t, it, oid);
+ return reftable_table_refs_for_unindexed(t, it, oid);
+}
+
+uint64_t reftable_table_max_update_index(struct reftable_table *t)
+{
+ return t->max_update_index;
+}
+
+uint64_t reftable_table_min_update_index(struct reftable_table *t)
+{
+ return t->min_update_index;
+}
+
+int reftable_table_iterator_init(struct reftable_table_iterator *it,
+ struct reftable_table *t)
+{
+ struct table_iter *ti;
+ int err;
+
+ REFTABLE_ALLOC_ARRAY(ti, 1);
+ if (!ti)
+ return REFTABLE_OUT_OF_MEMORY_ERROR;
+
+ err = table_iter_init(ti, t);
+ if (err < 0)
+ goto out;
+
+ it->iter_arg = ti;
+ err = 0;
+
+out:
+ if (err < 0)
+ reftable_free(ti);
+ return err;
+}
+
+void reftable_table_iterator_release(struct reftable_table_iterator *it)
+{
+ if (!it->iter_arg)
+ return;
+ table_iter_close(it->iter_arg);
+ reftable_free(it->iter_arg);
+ it->iter_arg = NULL;
+}
+
+int reftable_table_iterator_next(struct reftable_table_iterator *it,
+ const struct reftable_block **out)
+{
+ struct table_iter *ti = it->iter_arg;
+ int err;
+
+ err = table_iter_next_block(ti);
+ if (err)
+ return err;
+
+ *out = &ti->block;
+
+ return 0;
+}