diff options
Diffstat (limited to 'reftable')
38 files changed, 8942 insertions, 0 deletions
diff --git a/reftable/LICENSE b/reftable/LICENSE new file mode 100644 index 0000000000..402e0f9356 --- /dev/null +++ b/reftable/LICENSE @@ -0,0 +1,31 @@ +BSD License + +Copyright (c) 2020, Google LLC +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +* Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +* Neither the name of Google LLC nor the names of its contributors may +be used to endorse or promote products derived from this software +without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/reftable/basics.c b/reftable/basics.c new file mode 100644 index 0000000000..9988ebd635 --- /dev/null +++ b/reftable/basics.c @@ -0,0 +1,267 @@ +/* + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file or at + * https://developers.google.com/open-source/licenses/bsd + */ + +#define REFTABLE_ALLOW_BANNED_ALLOCATORS +#include "basics.h" +#include "reftable-basics.h" +#include "reftable-error.h" + +static void *(*reftable_malloc_ptr)(size_t sz); +static void *(*reftable_realloc_ptr)(void *, size_t); +static void (*reftable_free_ptr)(void *); + +void *reftable_malloc(size_t sz) +{ + if (!sz) + return NULL; + if (reftable_malloc_ptr) + return (*reftable_malloc_ptr)(sz); + return malloc(sz); +} + +void *reftable_realloc(void *p, size_t sz) +{ + if (!sz) { + reftable_free(p); + return NULL; + } + + if (reftable_realloc_ptr) + return (*reftable_realloc_ptr)(p, sz); + return realloc(p, sz); +} + +void reftable_free(void *p) +{ + if (reftable_free_ptr) + reftable_free_ptr(p); + else + free(p); +} + +void *reftable_calloc(size_t nelem, size_t elsize) +{ + void *p; + + if (nelem && elsize > SIZE_MAX / nelem) + return NULL; + + p = reftable_malloc(nelem * elsize); + if (!p) + return NULL; + + memset(p, 0, nelem * elsize); + return p; +} + +char *reftable_strdup(const char *str) +{ + size_t len = strlen(str); + char *result = reftable_malloc(len + 1); + if (!result) + return NULL; + memcpy(result, str, len + 1); + return result; +} + +void reftable_set_alloc(void *(*malloc)(size_t), + void *(*realloc)(void *, size_t), void (*free)(void *)) +{ + reftable_malloc_ptr = malloc; + reftable_realloc_ptr = realloc; + reftable_free_ptr = free; +} + +void reftable_buf_init(struct reftable_buf *buf) +{ + struct reftable_buf empty = REFTABLE_BUF_INIT; + *buf = empty; +} + +void reftable_buf_release(struct reftable_buf *buf) +{ + reftable_free(buf->buf); + reftable_buf_init(buf); +} + +void reftable_buf_reset(struct reftable_buf *buf) +{ + if (buf->alloc) { + buf->len = 0; + buf->buf[0] = '\0'; + } +} + +int reftable_buf_setlen(struct reftable_buf *buf, size_t len) +{ + if (len > buf->len) + return -1; + if (len == buf->len) + return 0; + buf->buf[len] = '\0'; + buf->len = len; + return 0; +} + +int reftable_buf_cmp(const struct reftable_buf *a, const struct reftable_buf *b) +{ + size_t len = a->len < b->len ? a->len : b->len; + if (len) { + int cmp = memcmp(a->buf, b->buf, len); + if (cmp) + return cmp; + } + return a->len < b->len ? -1 : a->len != b->len; +} + +int reftable_buf_add(struct reftable_buf *buf, const void *data, size_t len) +{ + size_t newlen = buf->len + len; + + if (newlen + 1 > buf->alloc) { + if (REFTABLE_ALLOC_GROW(buf->buf, newlen + 1, buf->alloc)) + return REFTABLE_OUT_OF_MEMORY_ERROR; + } + + memcpy(buf->buf + buf->len, data, len); + buf->buf[newlen] = '\0'; + buf->len = newlen; + + return 0; +} + +int reftable_buf_addstr(struct reftable_buf *buf, const char *s) +{ + return reftable_buf_add(buf, s, strlen(s)); +} + +char *reftable_buf_detach(struct reftable_buf *buf) +{ + char *result = buf->buf; + reftable_buf_init(buf); + return result; +} + +size_t binsearch(size_t sz, int (*f)(size_t k, void *args), void *args) +{ + size_t lo = 0; + size_t hi = sz; + + /* Invariants: + * + * (hi == sz) || f(hi) == true + * (lo == 0 && f(0) == true) || fi(lo) == false + */ + while (hi - lo > 1) { + size_t mid = lo + (hi - lo) / 2; + int ret = f(mid, args); + if (ret < 0) + return sz; + + if (ret > 0) + hi = mid; + else + lo = mid; + } + + if (lo) + return hi; + + return f(0, args) ? 0 : 1; +} + +void free_names(char **a) +{ + char **p; + if (!a) { + return; + } + for (p = a; *p; p++) { + reftable_free(*p); + } + reftable_free(a); +} + +size_t names_length(const char **names) +{ + const char **p = names; + while (*p) + p++; + return p - names; +} + +char **parse_names(char *buf, int size) +{ + char **names = NULL; + size_t names_cap = 0; + size_t names_len = 0; + char *p = buf; + char *end = buf + size; + + while (p < end) { + char *next = strchr(p, '\n'); + if (next && next < end) { + *next = 0; + } else { + next = end; + } + if (p < next) { + if (REFTABLE_ALLOC_GROW(names, names_len + 1, + names_cap)) + goto err; + + names[names_len] = reftable_strdup(p); + if (!names[names_len++]) + goto err; + } + p = next + 1; + } + + if (REFTABLE_ALLOC_GROW(names, names_len + 1, names_cap)) + goto err; + names[names_len] = NULL; + + return names; + +err: + for (size_t i = 0; i < names_len; i++) + reftable_free(names[i]); + reftable_free(names); + return NULL; +} + +int names_equal(const char **a, const char **b) +{ + size_t i = 0; + for (; a[i] && b[i]; i++) + if (strcmp(a[i], b[i])) + return 0; + return a[i] == b[i]; +} + +size_t common_prefix_size(struct reftable_buf *a, struct reftable_buf *b) +{ + size_t p = 0; + for (; p < a->len && p < b->len; p++) + if (a->buf[p] != b->buf[p]) + break; + return p; +} + +uint32_t hash_size(enum reftable_hash id) +{ + if (!id) + return REFTABLE_HASH_SIZE_SHA1; + switch (id) { + case REFTABLE_HASH_SHA1: + return REFTABLE_HASH_SIZE_SHA1; + case REFTABLE_HASH_SHA256: + return REFTABLE_HASH_SIZE_SHA256; + } + abort(); +} diff --git a/reftable/basics.h b/reftable/basics.h new file mode 100644 index 0000000000..7d22f96261 --- /dev/null +++ b/reftable/basics.h @@ -0,0 +1,290 @@ +/* + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file or at + * https://developers.google.com/open-source/licenses/bsd + */ + +#ifndef BASICS_H +#define BASICS_H + +/* + * miscellaneous utilities that are not provided by Git. + */ + +#include "system.h" +#include "reftable-basics.h" + +#ifdef __GNUC__ +#define REFTABLE_UNUSED __attribute__((__unused__)) +#else +#define REFTABLE_UNUSED +#endif + +/* + * Initialize the buffer such that it is ready for use. This is equivalent to + * using REFTABLE_BUF_INIT for stack-allocated variables. + */ +void reftable_buf_init(struct reftable_buf *buf); + +/* + * Release memory associated with the buffer. The buffer is reinitialized such + * that it can be reused for subsequent operations. + */ +void reftable_buf_release(struct reftable_buf *buf); + +/* + * Reset the buffer such that it is effectively empty, without releasing the + * memory that this structure holds on to. This is equivalent to calling + * `reftable_buf_setlen(buf, 0)`. + */ +void reftable_buf_reset(struct reftable_buf *buf); + +/* + * Trim the buffer to a shorter length by updating the `len` member and writing + * a NUL byte to `buf[len]`. Returns 0 on success, -1 when `len` points outside + * of the array. + */ +int reftable_buf_setlen(struct reftable_buf *buf, size_t len); + +/* + * Lexicographically compare the two buffers. Returns 0 when both buffers have + * the same contents, -1 when `a` is lexicographically smaller than `b`, and 1 + * otherwise. + */ +int reftable_buf_cmp(const struct reftable_buf *a, const struct reftable_buf *b); + +/* + * Append `len` bytes from `data` to the buffer. This function works with + * arbitrary byte sequences, including ones that contain embedded NUL + * characters. As such, we use `void *` as input type. Returns 0 on success, + * REFTABLE_OUT_OF_MEMORY_ERROR on allocation failure. + */ +int reftable_buf_add(struct reftable_buf *buf, const void *data, size_t len); + +/* Equivalent to `reftable_buf_add(buf, s, strlen(s))`. */ +int reftable_buf_addstr(struct reftable_buf *buf, const char *s); + +/* + * Detach the buffer from the structure such that the underlying memory is now + * owned by the caller. The buffer is reinitialized such that it can be reused + * for subsequent operations. + */ +char *reftable_buf_detach(struct reftable_buf *buf); + +/* Bigendian en/decoding of integers */ + +static inline void reftable_put_be16(void *out, uint16_t i) +{ + unsigned char *p = out; + p[0] = (uint8_t)((i >> 8) & 0xff); + p[1] = (uint8_t)((i >> 0) & 0xff); +} + +static inline void reftable_put_be24(void *out, uint32_t i) +{ + unsigned char *p = out; + p[0] = (uint8_t)((i >> 16) & 0xff); + p[1] = (uint8_t)((i >> 8) & 0xff); + p[2] = (uint8_t)((i >> 0) & 0xff); +} + +static inline void reftable_put_be32(void *out, uint32_t i) +{ + unsigned char *p = out; + p[0] = (uint8_t)((i >> 24) & 0xff); + p[1] = (uint8_t)((i >> 16) & 0xff); + p[2] = (uint8_t)((i >> 8) & 0xff); + p[3] = (uint8_t)((i >> 0) & 0xff); +} + +static inline void reftable_put_be64(void *out, uint64_t i) +{ + unsigned char *p = out; + p[0] = (uint8_t)((i >> 56) & 0xff); + p[1] = (uint8_t)((i >> 48) & 0xff); + p[2] = (uint8_t)((i >> 40) & 0xff); + p[3] = (uint8_t)((i >> 32) & 0xff); + p[4] = (uint8_t)((i >> 24) & 0xff); + p[5] = (uint8_t)((i >> 16) & 0xff); + p[6] = (uint8_t)((i >> 8) & 0xff); + p[7] = (uint8_t)((i >> 0) & 0xff); +} + +static inline uint16_t reftable_get_be16(const void *in) +{ + const unsigned char *p = in; + return (uint16_t)(p[0]) << 8 | + (uint16_t)(p[1]) << 0; +} + +static inline uint32_t reftable_get_be24(const void *in) +{ + const unsigned char *p = in; + return (uint32_t)(p[0]) << 16 | + (uint32_t)(p[1]) << 8 | + (uint32_t)(p[2]) << 0; +} + +static inline uint32_t reftable_get_be32(const void *in) +{ + const unsigned char *p = in; + return (uint32_t)(p[0]) << 24 | + (uint32_t)(p[1]) << 16 | + (uint32_t)(p[2]) << 8| + (uint32_t)(p[3]) << 0; +} + +static inline uint64_t reftable_get_be64(const void *in) +{ + const unsigned char *p = in; + return (uint64_t)(p[0]) << 56 | + (uint64_t)(p[1]) << 48 | + (uint64_t)(p[2]) << 40 | + (uint64_t)(p[3]) << 32 | + (uint64_t)(p[4]) << 24 | + (uint64_t)(p[5]) << 16 | + (uint64_t)(p[6]) << 8 | + (uint64_t)(p[7]) << 0; +} + +/* + * find smallest index i in [0, sz) at which `f(i) > 0`, assuming that f is + * ascending. Return sz if `f(i) == 0` for all indices. The search is aborted + * and `sz` is returned in case `f(i) < 0`. + * + * Contrary to bsearch(3), this returns something useful if the argument is not + * found. + */ +size_t binsearch(size_t sz, int (*f)(size_t k, void *args), void *args); + +/* + * Frees a NULL terminated array of malloced strings. The array itself is also + * freed. + */ +void free_names(char **a); + +/* + * Parse a newline separated list of names. `size` is the length of the buffer, + * without terminating '\0'. Empty names are discarded. Returns a `NULL` + * pointer when allocations fail. + */ +char **parse_names(char *buf, int size); + +/* compares two NULL-terminated arrays of strings. */ +int names_equal(const char **a, const char **b); + +/* returns the array size of a NULL-terminated array of strings. */ +size_t names_length(const char **names); + +/* Allocation routines; they invoke the functions set through + * reftable_set_alloc() */ +void *reftable_malloc(size_t sz); +void *reftable_realloc(void *p, size_t sz); +void reftable_free(void *p); +void *reftable_calloc(size_t nelem, size_t elsize); +char *reftable_strdup(const char *str); + +static inline int reftable_alloc_size(size_t nelem, size_t elsize, size_t *out) +{ + if (nelem && elsize > SIZE_MAX / nelem) + return -1; + *out = nelem * elsize; + return 0; +} + +#define REFTABLE_ALLOC_ARRAY(x, alloc) do { \ + size_t alloc_size; \ + if (reftable_alloc_size(sizeof(*(x)), (alloc), &alloc_size) < 0) { \ + errno = ENOMEM; \ + (x) = NULL; \ + } else { \ + (x) = reftable_malloc(alloc_size); \ + } \ + } while (0) +#define REFTABLE_CALLOC_ARRAY(x, alloc) (x) = reftable_calloc((alloc), sizeof(*(x))) +#define REFTABLE_REALLOC_ARRAY(x, alloc) do { \ + size_t alloc_size; \ + if (reftable_alloc_size(sizeof(*(x)), (alloc), &alloc_size) < 0) { \ + errno = ENOMEM; \ + (x) = NULL; \ + } else { \ + (x) = reftable_realloc((x), alloc_size); \ + } \ + } while (0) + +static inline void *reftable_alloc_grow(void *p, size_t nelem, size_t elsize, + size_t *allocp) +{ + void *new_p; + size_t alloc = *allocp * 2 + 1, alloc_bytes; + if (alloc < nelem) + alloc = nelem; + if (reftable_alloc_size(elsize, alloc, &alloc_bytes) < 0) { + errno = ENOMEM; + return p; + } + new_p = reftable_realloc(p, alloc_bytes); + if (!new_p) + return p; + *allocp = alloc; + return new_p; +} + +#define REFTABLE_ALLOC_GROW(x, nr, alloc) ( \ + (nr) > (alloc) && ( \ + (x) = reftable_alloc_grow((x), (nr), sizeof(*(x)), &(alloc)), \ + (nr) > (alloc) \ + ) \ +) + +#define REFTABLE_ALLOC_GROW_OR_NULL(x, nr, alloc) do { \ + size_t reftable_alloc_grow_or_null_alloc = alloc; \ + if (REFTABLE_ALLOC_GROW((x), (nr), reftable_alloc_grow_or_null_alloc)) { \ + REFTABLE_FREE_AND_NULL(x); \ + alloc = 0; \ + } else { \ + alloc = reftable_alloc_grow_or_null_alloc; \ + } \ +} while (0) + +#define REFTABLE_FREE_AND_NULL(p) do { reftable_free(p); (p) = NULL; } while (0) + +#ifndef REFTABLE_ALLOW_BANNED_ALLOCATORS +# define REFTABLE_BANNED(func) use_reftable_##func##_instead +# undef malloc +# define malloc(sz) REFTABLE_BANNED(malloc) +# undef realloc +# define realloc(ptr, sz) REFTABLE_BANNED(realloc) +# undef free +# define free(ptr) REFTABLE_BANNED(free) +# undef calloc +# define calloc(nelem, elsize) REFTABLE_BANNED(calloc) +# undef strdup +# define strdup(str) REFTABLE_BANNED(strdup) +#endif + +#define REFTABLE_SWAP(a, b) do { \ + void *_swap_a_ptr = &(a); \ + void *_swap_b_ptr = &(b); \ + unsigned char _swap_buffer[sizeof(a) - 2 * sizeof(a) * (sizeof(a) != sizeof(b))]; \ + memcpy(_swap_buffer, _swap_a_ptr, sizeof(a)); \ + memcpy(_swap_a_ptr, _swap_b_ptr, sizeof(a)); \ + memcpy(_swap_b_ptr, _swap_buffer, sizeof(a)); \ +} while (0) + +/* Find the longest shared prefix size of `a` and `b` */ +size_t common_prefix_size(struct reftable_buf *a, struct reftable_buf *b); + +uint32_t hash_size(enum reftable_hash id); + +/* + * Format IDs that identify the hash function used by a reftable. Note that + * these constants end up on disk and thus mustn't change. The format IDs are + * "sha1" and "s256" in big endian, respectively. + */ +#define REFTABLE_FORMAT_ID_SHA1 ((uint32_t) 0x73686131) +#define REFTABLE_FORMAT_ID_SHA256 ((uint32_t) 0x73323536) + +#endif diff --git a/reftable/block.c b/reftable/block.c new file mode 100644 index 0000000000..920b3f4486 --- /dev/null +++ b/reftable/block.c @@ -0,0 +1,655 @@ +/* + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file or at + * https://developers.google.com/open-source/licenses/bsd + */ + +#include "block.h" + +#include "blocksource.h" +#include "constants.h" +#include "iter.h" +#include "record.h" +#include "reftable-error.h" +#include "system.h" + +size_t header_size(int version) +{ + switch (version) { + case 1: + return 24; + case 2: + return 28; + } + abort(); +} + +size_t footer_size(int version) +{ + switch (version) { + case 1: + return 68; + case 2: + return 72; + } + abort(); +} + +static int block_writer_register_restart(struct block_writer *w, int n, + int is_restart, struct reftable_buf *key) +{ + uint32_t rlen; + int err; + + rlen = w->restart_len; + if (rlen >= MAX_RESTARTS) + is_restart = 0; + + if (is_restart) + rlen++; + if (2 + 3 * rlen + n > w->block_size - w->next) + return REFTABLE_ENTRY_TOO_BIG_ERROR; + if (is_restart) { + REFTABLE_ALLOC_GROW_OR_NULL(w->restarts, w->restart_len + 1, + w->restart_cap); + if (!w->restarts) + return REFTABLE_OUT_OF_MEMORY_ERROR; + w->restarts[w->restart_len++] = w->next; + } + + w->next += n; + + reftable_buf_reset(&w->last_key); + err = reftable_buf_add(&w->last_key, key->buf, key->len); + if (err < 0) + return err; + + w->entries++; + return 0; +} + +int block_writer_init(struct block_writer *bw, uint8_t typ, uint8_t *block, + uint32_t block_size, uint32_t header_off, uint32_t hash_size) +{ + bw->block = block; + bw->hash_size = hash_size; + bw->block_size = block_size; + bw->header_off = header_off; + bw->block[header_off] = typ; + bw->next = header_off + 4; + bw->restart_interval = 16; + bw->entries = 0; + bw->restart_len = 0; + bw->last_key.len = 0; + if (!bw->zstream) { + REFTABLE_CALLOC_ARRAY(bw->zstream, 1); + if (!bw->zstream) + return REFTABLE_OUT_OF_MEMORY_ERROR; + deflateInit(bw->zstream, 9); + } + + return 0; +} + +uint8_t block_writer_type(struct block_writer *bw) +{ + return bw->block[bw->header_off]; +} + +/* + * Adds the reftable_record to the block. Returns 0 on success and + * appropriate error codes on failure. + */ +int block_writer_add(struct block_writer *w, struct reftable_record *rec) +{ + struct reftable_buf empty = REFTABLE_BUF_INIT; + struct reftable_buf last = + w->entries % w->restart_interval == 0 ? empty : w->last_key; + struct string_view out = { + .buf = w->block + w->next, + .len = w->block_size - w->next, + }; + struct string_view start = out; + int is_restart = 0; + int n = 0; + int err; + + err = reftable_record_key(rec, &w->scratch); + if (err < 0) + goto done; + + if (!w->scratch.len) { + err = REFTABLE_API_ERROR; + goto done; + } + + n = reftable_encode_key(&is_restart, out, last, w->scratch, + reftable_record_val_type(rec)); + if (n < 0) { + err = n; + goto done; + } + string_view_consume(&out, n); + + n = reftable_record_encode(rec, out, w->hash_size); + if (n < 0) { + err = n; + goto done; + } + string_view_consume(&out, n); + + err = block_writer_register_restart(w, start.len - out.len, is_restart, + &w->scratch); +done: + return err; +} + +int block_writer_finish(struct block_writer *w) +{ + for (uint32_t i = 0; i < w->restart_len; i++) { + reftable_put_be24(w->block + w->next, w->restarts[i]); + w->next += 3; + } + + reftable_put_be16(w->block + w->next, w->restart_len); + w->next += 2; + reftable_put_be24(w->block + 1 + w->header_off, w->next); + + /* + * Log records are stored zlib-compressed. Note that the compression + * also spans over the restart points we have just written. + */ + if (block_writer_type(w) == REFTABLE_BLOCK_TYPE_LOG) { + int block_header_skip = 4 + w->header_off; + uLongf src_len = w->next - block_header_skip, compressed_len; + int ret; + + ret = deflateReset(w->zstream); + if (ret != Z_OK) + return REFTABLE_ZLIB_ERROR; + + /* + * Precompute the upper bound of how many bytes the compressed + * data may end up with. Combined with `Z_FINISH`, `deflate()` + * is guaranteed to return `Z_STREAM_END`. + */ + compressed_len = deflateBound(w->zstream, src_len); + REFTABLE_ALLOC_GROW_OR_NULL(w->compressed, compressed_len, + w->compressed_cap); + if (!w->compressed) { + ret = REFTABLE_OUT_OF_MEMORY_ERROR; + return ret; + } + + w->zstream->next_out = w->compressed; + w->zstream->avail_out = compressed_len; + w->zstream->next_in = w->block + block_header_skip; + w->zstream->avail_in = src_len; + + /* + * We want to perform all decompression in a single step, which + * is why we can pass Z_FINISH here. As we have precomputed the + * deflated buffer's size via `deflateBound()` this function is + * guaranteed to succeed according to the zlib documentation. + */ + ret = deflate(w->zstream, Z_FINISH); + if (ret != Z_STREAM_END) + return REFTABLE_ZLIB_ERROR; + + /* + * Overwrite the uncompressed data we have already written and + * adjust the `next` pointer to point right after the + * compressed data. + */ + memcpy(w->block + block_header_skip, w->compressed, + w->zstream->total_out); + w->next = w->zstream->total_out + block_header_skip; + } + + return w->next; +} + +static int read_block(struct reftable_block_source *source, + struct reftable_block_data *dest, uint64_t off, + uint32_t sz) +{ + size_t size = block_source_size(source); + block_source_release_data(dest); + if (off >= size) + return 0; + if (off + sz > size) + sz = size - off; + return block_source_read_data(source, dest, off, sz); +} + +int reftable_block_init(struct reftable_block *block, + struct reftable_block_source *source, + uint32_t offset, uint32_t header_size, + uint32_t table_block_size, uint32_t hash_size, + uint8_t want_type) +{ + uint32_t guess_block_size = table_block_size ? + table_block_size : DEFAULT_BLOCK_SIZE; + uint32_t full_block_size = table_block_size; + uint16_t restart_count; + uint32_t restart_off; + uint32_t block_size; + uint8_t block_type; + int err; + + err = read_block(source, &block->block_data, offset, guess_block_size); + if (err < 0) + goto done; + + block_type = block->block_data.data[header_size]; + if (!reftable_is_block_type(block_type)) { + err = REFTABLE_FORMAT_ERROR; + goto done; + } + if (want_type != REFTABLE_BLOCK_TYPE_ANY && block_type != want_type) { + err = 1; + goto done; + } + + block_size = reftable_get_be24(block->block_data.data + header_size + 1); + if (block_size > guess_block_size) { + err = read_block(source, &block->block_data, offset, block_size); + if (err < 0) + goto done; + } + + if (block_type == REFTABLE_BLOCK_TYPE_LOG) { + uint32_t block_header_skip = 4 + header_size; + uLong dst_len = block_size - block_header_skip; + uLong src_len = block->block_data.len - block_header_skip; + + /* Log blocks specify the *uncompressed* size in their header. */ + REFTABLE_ALLOC_GROW_OR_NULL(block->uncompressed_data, block_size, + block->uncompressed_cap); + if (!block->uncompressed_data) { + err = REFTABLE_OUT_OF_MEMORY_ERROR; + goto done; + } + + /* Copy over the block header verbatim. It's not compressed. */ + memcpy(block->uncompressed_data, block->block_data.data, block_header_skip); + + if (!block->zstream) { + REFTABLE_CALLOC_ARRAY(block->zstream, 1); + if (!block->zstream) { + err = REFTABLE_OUT_OF_MEMORY_ERROR; + goto done; + } + + err = inflateInit(block->zstream); + } else { + err = inflateReset(block->zstream); + } + if (err != Z_OK) { + err = REFTABLE_ZLIB_ERROR; + goto done; + } + + block->zstream->next_in = block->block_data.data + block_header_skip; + block->zstream->avail_in = src_len; + block->zstream->next_out = block->uncompressed_data + block_header_skip; + block->zstream->avail_out = dst_len; + + /* + * We know both input as well as output size, and we know that + * the sizes should never be bigger than `uInt_MAX` because + * blocks can at most be 16MB large. We can thus use `Z_FINISH` + * here to instruct zlib to inflate the data in one go, which + * is more efficient than using `Z_NO_FLUSH`. + */ + err = inflate(block->zstream, Z_FINISH); + if (err != Z_STREAM_END) { + err = REFTABLE_ZLIB_ERROR; + goto done; + } + err = 0; + + if (block->zstream->total_out + block_header_skip != block_size) { + err = REFTABLE_FORMAT_ERROR; + goto done; + } + + /* We're done with the input data. */ + block_source_release_data(&block->block_data); + block->block_data.data = block->uncompressed_data; + block->block_data.len = block_size; + full_block_size = src_len + block_header_skip - block->zstream->avail_in; + } else if (full_block_size == 0) { + full_block_size = block_size; + } else if (block_size < full_block_size && block_size < block->block_data.len && + block->block_data.data[block_size] != 0) { + /* If the block is smaller than the full block size, it is + padded (data followed by '\0') or the next block is + unaligned. */ + full_block_size = block_size; + } + + restart_count = reftable_get_be16(block->block_data.data + block_size - 2); + restart_off = block_size - 2 - 3 * restart_count; + + block->block_type = block_type; + block->hash_size = hash_size; + block->restart_off = restart_off; + block->full_block_size = full_block_size; + block->header_off = header_size; + block->restart_count = restart_count; + + err = 0; + +done: + if (err < 0) + reftable_block_release(block); + return err; +} + +void reftable_block_release(struct reftable_block *block) +{ + inflateEnd(block->zstream); + reftable_free(block->zstream); + reftable_free(block->uncompressed_data); + block_source_release_data(&block->block_data); + memset(block, 0, sizeof(*block)); +} + +uint8_t reftable_block_type(const struct reftable_block *b) +{ + return b->block_data.data[b->header_off]; +} + +int reftable_block_first_key(const struct reftable_block *block, struct reftable_buf *key) +{ + int off = block->header_off + 4, n; + struct string_view in = { + .buf = block->block_data.data + off, + .len = block->restart_off - off, + }; + uint8_t extra = 0; + + reftable_buf_reset(key); + + n = reftable_decode_key(key, &extra, in); + if (n < 0) + return n; + if (!key->len) + return REFTABLE_FORMAT_ERROR; + + return 0; +} + +static uint32_t block_restart_offset(const struct reftable_block *b, size_t idx) +{ + return reftable_get_be24(b->block_data.data + b->restart_off + 3 * idx); +} + +void block_iter_init(struct block_iter *it, const struct reftable_block *block) +{ + it->block = block; + block_iter_seek_start(it); +} + +void block_iter_seek_start(struct block_iter *it) +{ + reftable_buf_reset(&it->last_key); + it->next_off = it->block->header_off + 4; +} + +struct restart_needle_less_args { + int error; + struct reftable_buf needle; + const struct reftable_block *block; +}; + +static int restart_needle_less(size_t idx, void *_args) +{ + struct restart_needle_less_args *args = _args; + uint32_t off = block_restart_offset(args->block, idx); + struct string_view in = { + .buf = args->block->block_data.data + off, + .len = args->block->restart_off - off, + }; + uint64_t prefix_len, suffix_len; + uint8_t extra; + int n; + + /* + * Records at restart points are stored without prefix compression, so + * there is no need to fully decode the record key here. This removes + * the need for allocating memory. + */ + n = reftable_decode_keylen(in, &prefix_len, &suffix_len, &extra); + if (n < 0 || prefix_len) { + args->error = 1; + return -1; + } + + string_view_consume(&in, n); + if (suffix_len > in.len) { + args->error = 1; + return -1; + } + + n = memcmp(args->needle.buf, in.buf, + args->needle.len < suffix_len ? args->needle.len : suffix_len); + if (n) + return n < 0; + return args->needle.len < suffix_len; +} + +int block_iter_next(struct block_iter *it, struct reftable_record *rec) +{ + struct string_view in = { + .buf = (unsigned char *) it->block->block_data.data + it->next_off, + .len = it->block->restart_off - it->next_off, + }; + struct string_view start = in; + uint8_t extra = 0; + int n = 0; + + if (it->next_off >= it->block->restart_off) + return 1; + + n = reftable_decode_key(&it->last_key, &extra, in); + if (n < 0) + return -1; + if (!it->last_key.len) + return REFTABLE_FORMAT_ERROR; + + string_view_consume(&in, n); + n = reftable_record_decode(rec, it->last_key, extra, in, it->block->hash_size, + &it->scratch); + if (n < 0) + return -1; + string_view_consume(&in, n); + + it->next_off += start.len - in.len; + return 0; +} + +void block_iter_reset(struct block_iter *it) +{ + reftable_buf_reset(&it->last_key); + it->next_off = 0; + it->block = NULL; +} + +void block_iter_close(struct block_iter *it) +{ + reftable_buf_release(&it->last_key); + reftable_buf_release(&it->scratch); +} + +int block_iter_seek_key(struct block_iter *it, struct reftable_buf *want) +{ + struct restart_needle_less_args args = { + .needle = *want, + .block = it->block, + }; + struct reftable_record rec; + int err = 0; + size_t i; + + /* + * Perform a binary search over the block's restart points, which + * avoids doing a linear scan over the whole block. Like this, we + * identify the section of the block that should contain our key. + * + * Note that we explicitly search for the first restart point _greater_ + * than the sought-after record, not _greater or equal_ to it. In case + * the sought-after record is located directly at the restart point we + * would otherwise start doing the linear search at the preceding + * restart point. While that works alright, we would end up scanning + * too many record. + */ + i = binsearch(it->block->restart_count, &restart_needle_less, &args); + if (args.error) { + err = REFTABLE_FORMAT_ERROR; + goto done; + } + + /* + * Now there are multiple cases: + * + * - `i == 0`: The wanted record is smaller than the record found at + * the first restart point. As the first restart point is the first + * record in the block, our wanted record cannot be located in this + * block at all. We still need to position the iterator so that the + * next call to `block_iter_next()` will yield an end-of-iterator + * signal. + * + * - `i == restart_count`: The wanted record was not found at any of + * the restart points. As there is no restart point at the end of + * the section the record may thus be contained in the last block. + * + * - `i > 0`: The wanted record must be contained in the section + * before the found restart point. We thus do a linear search + * starting from the preceding restart point. + */ + if (i > 0) + it->next_off = block_restart_offset(it->block, i - 1); + else + it->next_off = it->block->header_off + 4; + + err = reftable_record_init(&rec, reftable_block_type(it->block)); + if (err < 0) + goto done; + + /* + * We're looking for the last entry less than the wanted key so that + * the next call to `block_reader_next()` would yield the wanted + * record. We thus don't want to position our iterator at the sought + * after record, but one before. To do so, we have to go one entry too + * far and then back up. + */ + while (1) { + size_t prev_off = it->next_off; + + err = block_iter_next(it, &rec); + if (err < 0) + goto done; + if (err > 0) { + it->next_off = prev_off; + err = 0; + goto done; + } + + err = reftable_record_key(&rec, &it->last_key); + if (err < 0) + goto done; + + /* + * Check whether the current key is greater or equal to the + * sought-after key. In case it is greater we know that the + * record does not exist in the block and can thus abort early. + * In case it is equal to the sought-after key we have found + * the desired record. + * + * Note that we store the next record's key record directly in + * `last_key` without restoring the key of the preceding record + * in case we need to go one record back. This is safe to do as + * `block_iter_next()` would return the ref whose key is equal + * to `last_key` now, and naturally all keys share a prefix + * with themselves. + */ + if (reftable_buf_cmp(&it->last_key, want) >= 0) { + it->next_off = prev_off; + goto done; + } + } + +done: + reftable_record_release(&rec); + return err; +} + +static int block_iter_seek_void(void *it, struct reftable_record *want) +{ + struct reftable_buf buf = REFTABLE_BUF_INIT; + struct block_iter *bi = it; + int err; + + if (bi->block->block_type != want->type) + return REFTABLE_API_ERROR; + + err = reftable_record_key(want, &buf); + if (err < 0) + goto out; + + err = block_iter_seek_key(it, &buf); + if (err < 0) + goto out; + + err = 0; + +out: + reftable_buf_release(&buf); + return err; +} + +static int block_iter_next_void(void *it, struct reftable_record *rec) +{ + return block_iter_next(it, rec); +} + +static void block_iter_close_void(void *it) +{ + block_iter_close(it); +} + +static struct reftable_iterator_vtable block_iter_vtable = { + .seek = &block_iter_seek_void, + .next = &block_iter_next_void, + .close = &block_iter_close_void, +}; + +int reftable_block_init_iterator(const struct reftable_block *b, + struct reftable_iterator *it) +{ + struct block_iter *bi; + + REFTABLE_CALLOC_ARRAY(bi, 1); + block_iter_init(bi, b); + + assert(!it->ops); + it->iter_arg = bi; + it->ops = &block_iter_vtable; + + return 0; +} + +void block_writer_release(struct block_writer *bw) +{ + deflateEnd(bw->zstream); + REFTABLE_FREE_AND_NULL(bw->zstream); + REFTABLE_FREE_AND_NULL(bw->restarts); + REFTABLE_FREE_AND_NULL(bw->compressed); + reftable_buf_release(&bw->scratch); + reftable_buf_release(&bw->last_key); + /* the block is not owned. */ +} diff --git a/reftable/block.h b/reftable/block.h new file mode 100644 index 0000000000..d6dfaae33e --- /dev/null +++ b/reftable/block.h @@ -0,0 +1,115 @@ +/* + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file or at + * https://developers.google.com/open-source/licenses/bsd + */ + +#ifndef BLOCK_H +#define BLOCK_H + +#include "basics.h" +#include "record.h" +#include "reftable-block.h" +#include "reftable-blocksource.h" + +/* + * Writes reftable blocks. The block_writer is reused across blocks to minimize + * allocation overhead. + */ +struct block_writer { + struct z_stream_s *zstream; + unsigned char *compressed; + size_t compressed_cap; + + uint8_t *block; + uint32_t block_size; + + /* Offset of the global header. Nonzero in the first block only. */ + uint32_t header_off; + + /* How often to restart keys. */ + uint16_t restart_interval; + uint32_t hash_size; + + /* Offset of next uint8_t to write. */ + uint32_t next; + uint32_t *restarts; + uint32_t restart_len; + uint32_t restart_cap; + + struct reftable_buf last_key; + /* Scratch buffer used to avoid allocations. */ + struct reftable_buf scratch; + int entries; +}; + +/* + * initializes the blockwriter to write `typ` entries, using `block` as temporary + * storage. `block` is not owned by the block_writer. */ +int block_writer_init(struct block_writer *bw, uint8_t typ, uint8_t *block, + uint32_t block_size, uint32_t header_off, uint32_t hash_size); + +/* returns the block type (eg. 'r' for ref records. */ +uint8_t block_writer_type(struct block_writer *bw); + +/* Attempts to append the record. Returns 0 on success or error code on failure. */ +int block_writer_add(struct block_writer *w, struct reftable_record *rec); + +/* appends the key restarts, and compress the block if necessary. */ +int block_writer_finish(struct block_writer *w); + +/* clears out internally allocated block_writer members. */ +void block_writer_release(struct block_writer *bw); + +/* Iterator for records contained in a single block. */ +struct block_iter { + /* offset within the block of the next entry to read. */ + uint32_t next_off; + const struct reftable_block *block; + + /* key for last entry we read. */ + struct reftable_buf last_key; + struct reftable_buf scratch; +}; + +#define BLOCK_ITER_INIT { \ + .last_key = REFTABLE_BUF_INIT, \ + .scratch = REFTABLE_BUF_INIT, \ +} + +/* + * Initialize the block iterator with the given block. The iterator will be + * positioned at the first record contained in the block. The block must remain + * valid until the end of the iterator's lifetime. It is valid to re-initialize + * iterators multiple times. + */ +void block_iter_init(struct block_iter *it, const struct reftable_block *block); + +/* Position the initialized iterator at the first record of its block. */ +void block_iter_seek_start(struct block_iter *it); + +/* + * Position the initialized iterator at the desired record key. It is not an + * error in case the record cannot be found. If so, a subsequent call to + * `block_iter_next()` will indicate that the iterator is exhausted. + */ +int block_iter_seek_key(struct block_iter *it, struct reftable_buf *want); + +/* return < 0 for error, 0 for OK, > 0 for EOF. */ +int block_iter_next(struct block_iter *it, struct reftable_record *rec); + +/* Reset the block iterator to pristine state without releasing its memory. */ +void block_iter_reset(struct block_iter *it); + +/* deallocate memory for `it`. The block reader and its block is left intact. */ +void block_iter_close(struct block_iter *it); + +/* size of file header, depending on format version */ +size_t header_size(int version); + +/* size of file footer, depending on format version */ +size_t footer_size(int version); + +#endif diff --git a/reftable/blocksource.c b/reftable/blocksource.c new file mode 100644 index 0000000000..573c81287f --- /dev/null +++ b/reftable/blocksource.c @@ -0,0 +1,179 @@ +/* + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file or at + * https://developers.google.com/open-source/licenses/bsd + */ + +#include "system.h" + +#include "basics.h" +#include "blocksource.h" +#include "reftable-blocksource.h" +#include "reftable-error.h" + +void block_source_release_data(struct reftable_block_data *data) +{ + struct reftable_block_source source = data->source; + if (data && source.ops) + source.ops->release_data(source.arg, data); + data->data = NULL; + data->len = 0; + data->source.ops = NULL; + data->source.arg = NULL; +} + +void block_source_close(struct reftable_block_source *source) +{ + if (!source->ops) { + return; + } + + source->ops->close(source->arg); + source->ops = NULL; +} + +ssize_t block_source_read_data(struct reftable_block_source *source, + struct reftable_block_data *dest, uint64_t off, + uint32_t size) +{ + ssize_t result = source->ops->read_data(source->arg, dest, off, size); + dest->source = *source; + return result; +} + +uint64_t block_source_size(struct reftable_block_source *source) +{ + return source->ops->size(source->arg); +} + +static void reftable_buf_release_data(void *b REFTABLE_UNUSED, struct reftable_block_data *dest) +{ + if (dest->len) + memset(dest->data, 0xff, dest->len); + reftable_free(dest->data); +} + +static void reftable_buf_close(void *b REFTABLE_UNUSED) +{ +} + +static ssize_t reftable_buf_read_data(void *v, struct reftable_block_data *dest, + uint64_t off, uint32_t size) +{ + struct reftable_buf *b = v; + assert(off + size <= b->len); + REFTABLE_CALLOC_ARRAY(dest->data, size); + if (!dest->data) + return -1; + memcpy(dest->data, b->buf + off, size); + dest->len = size; + return size; +} + +static uint64_t reftable_buf_size(void *b) +{ + return ((struct reftable_buf *)b)->len; +} + +static struct reftable_block_source_vtable reftable_buf_vtable = { + .size = &reftable_buf_size, + .read_data = &reftable_buf_read_data, + .release_data = &reftable_buf_release_data, + .close = &reftable_buf_close, +}; + +void block_source_from_buf(struct reftable_block_source *bs, + struct reftable_buf *buf) +{ + assert(!bs->ops); + bs->ops = &reftable_buf_vtable; + bs->arg = buf; +} + +struct file_block_source { + uint64_t size; + unsigned char *data; +}; + +static uint64_t file_size(void *b) +{ + return ((struct file_block_source *)b)->size; +} + +static void file_release_data(void *b REFTABLE_UNUSED, struct reftable_block_data *dest REFTABLE_UNUSED) +{ +} + +static void file_close(void *v) +{ + struct file_block_source *b = v; + munmap(b->data, b->size); + reftable_free(b); +} + +static ssize_t file_read_data(void *v, struct reftable_block_data *dest, uint64_t off, + uint32_t size) +{ + struct file_block_source *b = v; + assert(off + size <= b->size); + dest->data = b->data + off; + dest->len = size; + return size; +} + +static struct reftable_block_source_vtable file_vtable = { + .size = &file_size, + .read_data = &file_read_data, + .release_data = &file_release_data, + .close = &file_close, +}; + +int reftable_block_source_from_file(struct reftable_block_source *bs, + const char *name) +{ + struct file_block_source *p = NULL; + struct stat st; + int fd, err; + + fd = open(name, O_RDONLY); + if (fd < 0) { + if (errno == ENOENT) + return REFTABLE_NOT_EXIST_ERROR; + err = -1; + goto out; + } + + if (fstat(fd, &st) < 0) { + err = REFTABLE_IO_ERROR; + goto out; + } + + REFTABLE_CALLOC_ARRAY(p, 1); + if (!p) { + err = REFTABLE_OUT_OF_MEMORY_ERROR; + goto out; + } + + p->size = st.st_size; + p->data = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); + if (p->data == MAP_FAILED) { + err = REFTABLE_IO_ERROR; + p->data = NULL; + goto out; + } + + assert(!bs->ops); + bs->ops = &file_vtable; + bs->arg = p; + + err = 0; + +out: + if (fd >= 0) + close(fd); + if (err < 0) + reftable_free(p); + return err; +} diff --git a/reftable/blocksource.h b/reftable/blocksource.h new file mode 100644 index 0000000000..a110e05958 --- /dev/null +++ b/reftable/blocksource.h @@ -0,0 +1,46 @@ +/* + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file or at + * https://developers.google.com/open-source/licenses/bsd + */ + +#ifndef BLOCKSOURCE_H +#define BLOCKSOURCE_H + +#include "system.h" + +struct reftable_block_source; +struct reftable_block_data; +struct reftable_buf; + +/* + * Close the block source and the underlying resource. This is a no-op in case + * the block source is zero-initialized. + */ +void block_source_close(struct reftable_block_source *source); + +/* + * Read a block of length `size` from the source at the given `off`. + */ +ssize_t block_source_read_data(struct reftable_block_source *source, + struct reftable_block_data *dest, uint64_t off, + uint32_t size); + +/* + * Return the total length of the underlying resource. + */ +uint64_t block_source_size(struct reftable_block_source *source); + +/* + * Return a block to its original source, releasing any resources associated + * with it. + */ +void block_source_release_data(struct reftable_block_data *data); + +/* Create an in-memory block source for reading reftables. */ +void block_source_from_buf(struct reftable_block_source *bs, + struct reftable_buf *buf); + +#endif diff --git a/reftable/constants.h b/reftable/constants.h new file mode 100644 index 0000000000..e3b1aaa516 --- /dev/null +++ b/reftable/constants.h @@ -0,0 +1,18 @@ +/* + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file or at + * https://developers.google.com/open-source/licenses/bsd + */ + +#ifndef CONSTANTS_H +#define CONSTANTS_H + +#include "reftable-constants.h" + +#define MAX_RESTARTS ((1 << 16) - 1) +#define DEFAULT_BLOCK_SIZE 4096 +#define DEFAULT_GEOMETRIC_FACTOR 2 + +#endif diff --git a/reftable/error.c b/reftable/error.c new file mode 100644 index 0000000000..c7cab2dbc4 --- /dev/null +++ b/reftable/error.c @@ -0,0 +1,46 @@ +/* + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file or at + * https://developers.google.com/open-source/licenses/bsd + */ + +#include "system.h" +#include "reftable-error.h" + +#include <stdio.h> + +const char *reftable_error_str(int err) +{ + static char buf[250]; + switch (err) { + case REFTABLE_IO_ERROR: + return "I/O error"; + case REFTABLE_FORMAT_ERROR: + return "corrupt reftable file"; + case REFTABLE_NOT_EXIST_ERROR: + return "file does not exist"; + case REFTABLE_LOCK_ERROR: + return "data is locked"; + case REFTABLE_API_ERROR: + return "misuse of the reftable API"; + case REFTABLE_ZLIB_ERROR: + return "zlib failure"; + case REFTABLE_EMPTY_TABLE_ERROR: + return "wrote empty table"; + case REFTABLE_REFNAME_ERROR: + return "invalid refname"; + case REFTABLE_ENTRY_TOO_BIG_ERROR: + return "entry too large"; + case REFTABLE_OUTDATED_ERROR: + return "data concurrently modified"; + case REFTABLE_OUT_OF_MEMORY_ERROR: + return "out of memory"; + case -1: + return "general error"; + default: + snprintf(buf, sizeof(buf), "unknown error code %d", err); + return buf; + } +} diff --git a/reftable/iter.c b/reftable/iter.c new file mode 100644 index 0000000000..2ecc52b336 --- /dev/null +++ b/reftable/iter.c @@ -0,0 +1,302 @@ +/* + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file or at + * https://developers.google.com/open-source/licenses/bsd + */ + +#include "iter.h" + +#include "system.h" + +#include "block.h" +#include "blocksource.h" +#include "constants.h" +#include "reftable-error.h" +#include "table.h" + +int iterator_seek(struct reftable_iterator *it, struct reftable_record *want) +{ + return it->ops->seek(it->iter_arg, want); +} + +int iterator_next(struct reftable_iterator *it, struct reftable_record *rec) +{ + return it->ops->next(it->iter_arg, rec); +} + +static int empty_iterator_seek(void *arg REFTABLE_UNUSED, struct reftable_record *want REFTABLE_UNUSED) +{ + return 0; +} + +static int empty_iterator_next(void *arg REFTABLE_UNUSED, struct reftable_record *rec REFTABLE_UNUSED) +{ + return 1; +} + +static void empty_iterator_close(void *arg REFTABLE_UNUSED) +{ +} + +static struct reftable_iterator_vtable empty_vtable = { + .seek = &empty_iterator_seek, + .next = &empty_iterator_next, + .close = &empty_iterator_close, +}; + +void iterator_set_empty(struct reftable_iterator *it) +{ + assert(!it->ops); + it->iter_arg = NULL; + it->ops = &empty_vtable; +} + +static void filtering_ref_iterator_close(void *iter_arg) +{ + struct filtering_ref_iterator *fri = iter_arg; + reftable_buf_release(&fri->oid); + reftable_iterator_destroy(&fri->it); +} + +static int filtering_ref_iterator_seek(void *iter_arg, + struct reftable_record *want) +{ + struct filtering_ref_iterator *fri = iter_arg; + return iterator_seek(&fri->it, want); +} + +static int filtering_ref_iterator_next(void *iter_arg, + struct reftable_record *rec) +{ + struct filtering_ref_iterator *fri = iter_arg; + struct reftable_ref_record *ref = &rec->u.ref; + int err = 0; + while (1) { + err = reftable_iterator_next_ref(&fri->it, ref); + if (err != 0) { + break; + } + + if (ref->value_type == REFTABLE_REF_VAL2 && + (!memcmp(fri->oid.buf, ref->value.val2.target_value, + fri->oid.len) || + !memcmp(fri->oid.buf, ref->value.val2.value, + fri->oid.len))) + return 0; + + if (ref->value_type == REFTABLE_REF_VAL1 && + !memcmp(fri->oid.buf, ref->value.val1, fri->oid.len)) { + return 0; + } + } + + reftable_ref_record_release(ref); + return err; +} + +static struct reftable_iterator_vtable filtering_ref_iterator_vtable = { + .seek = &filtering_ref_iterator_seek, + .next = &filtering_ref_iterator_next, + .close = &filtering_ref_iterator_close, +}; + +void iterator_from_filtering_ref_iterator(struct reftable_iterator *it, + struct filtering_ref_iterator *fri) +{ + assert(!it->ops); + it->iter_arg = fri; + it->ops = &filtering_ref_iterator_vtable; +} + +static void indexed_table_ref_iter_close(void *p) +{ + struct indexed_table_ref_iter *it = p; + block_iter_close(&it->cur); + block_source_release_data(&it->block.block_data); + reftable_free(it->offsets); + reftable_buf_release(&it->oid); +} + +static int indexed_table_ref_iter_next_block(struct indexed_table_ref_iter *it) +{ + uint64_t off; + int err = 0; + if (it->offset_idx == it->offset_len) { + it->is_finished = 1; + return 1; + } + + block_source_release_data(&it->block.block_data); + + off = it->offsets[it->offset_idx++]; + err = table_init_block(it->table, &it->block, off, REFTABLE_BLOCK_TYPE_REF); + if (err < 0) { + return err; + } + if (err > 0) { + /* indexed block does not exist. */ + return REFTABLE_FORMAT_ERROR; + } + block_iter_init(&it->cur, &it->block); + return 0; +} + +static int indexed_table_ref_iter_seek(void *p REFTABLE_UNUSED, + struct reftable_record *want REFTABLE_UNUSED) +{ + return REFTABLE_API_ERROR; +} + +static int indexed_table_ref_iter_next(void *p, struct reftable_record *rec) +{ + struct indexed_table_ref_iter *it = p; + struct reftable_ref_record *ref = &rec->u.ref; + + while (1) { + int err = block_iter_next(&it->cur, rec); + if (err < 0) { + return err; + } + + if (err > 0) { + err = indexed_table_ref_iter_next_block(it); + if (err < 0) { + return err; + } + + if (it->is_finished) { + return 1; + } + continue; + } + /* BUG */ + if (!memcmp(it->oid.buf, ref->value.val2.target_value, + it->oid.len) || + !memcmp(it->oid.buf, ref->value.val2.value, it->oid.len)) { + return 0; + } + } +} + +int indexed_table_ref_iter_new(struct indexed_table_ref_iter **dest, + struct reftable_table *t, uint8_t *oid, + int oid_len, uint64_t *offsets, int offset_len) +{ + struct indexed_table_ref_iter empty = INDEXED_TABLE_REF_ITER_INIT; + struct indexed_table_ref_iter *itr; + int err = 0; + + itr = reftable_calloc(1, sizeof(*itr)); + if (!itr) { + err = REFTABLE_OUT_OF_MEMORY_ERROR; + goto out; + } + + *itr = empty; + itr->table = t; + + err = reftable_buf_add(&itr->oid, oid, oid_len); + if (err < 0) + goto out; + + itr->offsets = offsets; + itr->offset_len = offset_len; + + err = indexed_table_ref_iter_next_block(itr); + if (err < 0) + goto out; + + *dest = itr; + err = 0; + +out: + if (err < 0) { + *dest = NULL; + reftable_free(itr); + } + return err; +} + +static struct reftable_iterator_vtable indexed_table_ref_iter_vtable = { + .seek = &indexed_table_ref_iter_seek, + .next = &indexed_table_ref_iter_next, + .close = &indexed_table_ref_iter_close, +}; + +void iterator_from_indexed_table_ref_iter(struct reftable_iterator *it, + struct indexed_table_ref_iter *itr) +{ + assert(!it->ops); + it->iter_arg = itr; + it->ops = &indexed_table_ref_iter_vtable; +} + +void reftable_iterator_destroy(struct reftable_iterator *it) +{ + if (!it->ops) + return; + it->ops->close(it->iter_arg); + it->ops = NULL; + REFTABLE_FREE_AND_NULL(it->iter_arg); +} + +int reftable_iterator_seek_ref(struct reftable_iterator *it, + const char *name) +{ + struct reftable_record want = { + .type = REFTABLE_BLOCK_TYPE_REF, + .u.ref = { + .refname = (char *)name, + }, + }; + return it->ops->seek(it->iter_arg, &want); +} + +int reftable_iterator_next_ref(struct reftable_iterator *it, + struct reftable_ref_record *ref) +{ + struct reftable_record rec = { + .type = REFTABLE_BLOCK_TYPE_REF, + .u = { + .ref = *ref + }, + }; + int err = iterator_next(it, &rec); + *ref = rec.u.ref; + return err; +} + +int reftable_iterator_seek_log_at(struct reftable_iterator *it, + const char *name, uint64_t update_index) +{ + struct reftable_record want = { + .type = REFTABLE_BLOCK_TYPE_LOG, + .u.log = { + .refname = (char *)name, + .update_index = update_index, + }, + }; + return it->ops->seek(it->iter_arg, &want); +} + +int reftable_iterator_seek_log(struct reftable_iterator *it, + const char *name) +{ + return reftable_iterator_seek_log_at(it, name, ~((uint64_t) 0)); +} + +int reftable_iterator_next_log(struct reftable_iterator *it, + struct reftable_log_record *log) +{ + struct reftable_record rec = { + .type = REFTABLE_BLOCK_TYPE_LOG, + .u = { + .log = *log, + }, + }; + int err = iterator_next(it, &rec); + *log = rec.u.log; + return err; +} diff --git a/reftable/iter.h b/reftable/iter.h new file mode 100644 index 0000000000..cc920970a5 --- /dev/null +++ b/reftable/iter.h @@ -0,0 +1,89 @@ +/* + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file or at + * https://developers.google.com/open-source/licenses/bsd + */ + +#ifndef ITER_H +#define ITER_H + +#include "system.h" +#include "block.h" +#include "record.h" + +#include "reftable-iterator.h" + +/* + * The virtual function table for implementing generic reftable iterators. + */ +struct reftable_iterator_vtable { + int (*seek)(void *iter_arg, struct reftable_record *want); + int (*next)(void *iter_arg, struct reftable_record *rec); + void (*close)(void *iter_arg); +}; + +/* + * Position the iterator at the wanted record such that a call to + * `iterator_next()` would return that record, if it exists. + */ +int iterator_seek(struct reftable_iterator *it, struct reftable_record *want); + +/* + * Yield the next record and advance the iterator. Returns <0 on error, 0 when + * a record was yielded, and >0 when the iterator hit an error. + */ +int iterator_next(struct reftable_iterator *it, struct reftable_record *rec); + +/* + * Set up the iterator such that it behaves the same as an iterator with no + * entries. + */ +void iterator_set_empty(struct reftable_iterator *it); + +/* iterator that produces only ref records that point to `oid` */ +struct filtering_ref_iterator { + struct reftable_buf oid; + struct reftable_iterator it; +}; +#define FILTERING_REF_ITERATOR_INIT \ + { \ + .oid = REFTABLE_BUF_INIT \ + } + +void iterator_from_filtering_ref_iterator(struct reftable_iterator *, + struct filtering_ref_iterator *); + +/* iterator that produces only ref records that point to `oid`, + * but using the object index. + */ +struct indexed_table_ref_iter { + struct reftable_table *table; + struct reftable_buf oid; + + /* mutable */ + uint64_t *offsets; + + /* Points to the next offset to read. */ + int offset_idx; + int offset_len; + struct reftable_block block; + struct block_iter cur; + int is_finished; +}; + +#define INDEXED_TABLE_REF_ITER_INIT { \ + .cur = BLOCK_ITER_INIT, \ + .oid = REFTABLE_BUF_INIT, \ +} + +void iterator_from_indexed_table_ref_iter(struct reftable_iterator *it, + struct indexed_table_ref_iter *itr); + +/* Takes ownership of `offsets` */ +int indexed_table_ref_iter_new(struct indexed_table_ref_iter **dest, + struct reftable_table *t, uint8_t *oid, + int oid_len, uint64_t *offsets, int offset_len); + +#endif diff --git a/reftable/merged.c b/reftable/merged.c new file mode 100644 index 0000000000..733de07454 --- /dev/null +++ b/reftable/merged.c @@ -0,0 +1,316 @@ +/* + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file or at + * https://developers.google.com/open-source/licenses/bsd + */ + +#include "merged.h" + +#include "constants.h" +#include "iter.h" +#include "pq.h" +#include "record.h" +#include "reftable-merged.h" +#include "reftable-error.h" +#include "system.h" +#include "table.h" + +struct merged_subiter { + struct reftable_iterator iter; + struct reftable_record rec; +}; + +struct merged_iter { + struct merged_subiter *subiters; + struct merged_iter_pqueue pq; + size_t subiters_len; + int suppress_deletions; + ssize_t advance_index; +}; + +static void merged_iter_close(void *p) +{ + struct merged_iter *mi = p; + + merged_iter_pqueue_release(&mi->pq); + for (size_t i = 0; i < mi->subiters_len; i++) { + reftable_iterator_destroy(&mi->subiters[i].iter); + reftable_record_release(&mi->subiters[i].rec); + } + reftable_free(mi->subiters); +} + +static int merged_iter_advance_subiter(struct merged_iter *mi, size_t idx) +{ + struct pq_entry e = { + .index = idx, + .rec = &mi->subiters[idx].rec, + }; + int err; + + err = iterator_next(&mi->subiters[idx].iter, &mi->subiters[idx].rec); + if (err) + return err; + + err = merged_iter_pqueue_add(&mi->pq, &e); + if (err) + return err; + + return 0; +} + +static int merged_iter_seek(struct merged_iter *mi, struct reftable_record *want) +{ + int err; + + mi->advance_index = -1; + while (!merged_iter_pqueue_is_empty(mi->pq)) { + err = merged_iter_pqueue_remove(&mi->pq, NULL); + if (err < 0) + return err; + } + + for (size_t i = 0; i < mi->subiters_len; i++) { + err = iterator_seek(&mi->subiters[i].iter, want); + if (err < 0) + return err; + if (err > 0) + continue; + + err = merged_iter_advance_subiter(mi, i); + if (err < 0) + return err; + } + + return 0; +} + +static int merged_iter_next_entry(struct merged_iter *mi, + struct reftable_record *rec) +{ + struct pq_entry entry = { 0 }; + int err = 0, empty; + + empty = merged_iter_pqueue_is_empty(mi->pq); + + if (mi->advance_index >= 0) { + /* + * When there are no pqueue entries then we only have a single + * subiter left. There is no need to use the pqueue in that + * case anymore as we know that the subiter will return entries + * in the correct order already. + * + * While this may sound like a very specific edge case, it may + * happen more frequently than you think. Most repositories + * will end up having a single large base table that contains + * most of the refs. It's thus likely that we exhaust all + * subiters but the one from that base ref. + */ + if (empty) + return iterator_next(&mi->subiters[mi->advance_index].iter, + rec); + + err = merged_iter_advance_subiter(mi, mi->advance_index); + if (err < 0) + return err; + if (!err) + empty = 0; + mi->advance_index = -1; + } + + if (empty) + return 1; + + err = merged_iter_pqueue_remove(&mi->pq, &entry); + if (err < 0) + return err; + + /* + One can also use reftable as datacenter-local storage, where the ref + database is maintained in globally consistent database (eg. + CockroachDB or Spanner). In this scenario, replication delays together + with compaction may cause newer tables to contain older entries. In + such a deployment, the loop below must be changed to collect all + entries for the same key, and return new the newest one. + */ + while (!merged_iter_pqueue_is_empty(mi->pq)) { + struct pq_entry top = merged_iter_pqueue_top(mi->pq); + int cmp; + + err = reftable_record_cmp(top.rec, entry.rec, &cmp); + if (err < 0) + return err; + if (cmp > 0) + break; + + err = merged_iter_pqueue_remove(&mi->pq, NULL); + if (err < 0) + return err; + + err = merged_iter_advance_subiter(mi, top.index); + if (err < 0) + return err; + } + + mi->advance_index = entry.index; + REFTABLE_SWAP(*rec, *entry.rec); + return 0; +} + +static int merged_iter_seek_void(void *it, struct reftable_record *want) +{ + return merged_iter_seek(it, want); +} + +static int merged_iter_next_void(void *p, struct reftable_record *rec) +{ + struct merged_iter *mi = p; + while (1) { + int err = merged_iter_next_entry(mi, rec); + if (err) + return err; + if (mi->suppress_deletions && reftable_record_is_deletion(rec)) + continue; + return 0; + } +} + +static struct reftable_iterator_vtable merged_iter_vtable = { + .seek = merged_iter_seek_void, + .next = &merged_iter_next_void, + .close = &merged_iter_close, +}; + +static void iterator_from_merged_iter(struct reftable_iterator *it, + struct merged_iter *mi) +{ + assert(!it->ops); + it->iter_arg = mi; + it->ops = &merged_iter_vtable; +} + +int reftable_merged_table_new(struct reftable_merged_table **dest, + struct reftable_table **tables, size_t n, + enum reftable_hash hash_id) +{ + struct reftable_merged_table *m = NULL; + uint64_t last_max = 0; + uint64_t first_min = 0; + + for (size_t i = 0; i < n; i++) { + uint64_t min = reftable_table_min_update_index(tables[i]); + uint64_t max = reftable_table_max_update_index(tables[i]); + + if (reftable_table_hash_id(tables[i]) != hash_id) { + return REFTABLE_FORMAT_ERROR; + } + if (i == 0 || min < first_min) { + first_min = min; + } + if (i == 0 || max > last_max) { + last_max = max; + } + } + + REFTABLE_CALLOC_ARRAY(m, 1); + if (!m) + return REFTABLE_OUT_OF_MEMORY_ERROR; + + m->tables = tables; + m->tables_len = n; + m->min = first_min; + m->max = last_max; + m->hash_id = hash_id; + *dest = m; + return 0; +} + +void reftable_merged_table_free(struct reftable_merged_table *mt) +{ + if (!mt) + return; + reftable_free(mt); +} + +uint64_t +reftable_merged_table_max_update_index(struct reftable_merged_table *mt) +{ + return mt->max; +} + +uint64_t +reftable_merged_table_min_update_index(struct reftable_merged_table *mt) +{ + return mt->min; +} + +int merged_table_init_iter(struct reftable_merged_table *mt, + struct reftable_iterator *it, + uint8_t typ) +{ + struct merged_subiter *subiters = NULL; + struct merged_iter *mi = NULL; + int ret; + + if (mt->tables_len) { + REFTABLE_CALLOC_ARRAY(subiters, mt->tables_len); + if (!subiters) { + ret = REFTABLE_OUT_OF_MEMORY_ERROR; + goto out; + } + } + + for (size_t i = 0; i < mt->tables_len; i++) { + ret = reftable_record_init(&subiters[i].rec, typ); + if (ret < 0) + goto out; + + ret = table_init_iter(mt->tables[i], &subiters[i].iter, typ); + if (ret < 0) + goto out; + } + + REFTABLE_CALLOC_ARRAY(mi, 1); + if (!mi) { + ret = REFTABLE_OUT_OF_MEMORY_ERROR; + goto out; + } + mi->advance_index = -1; + mi->suppress_deletions = mt->suppress_deletions; + mi->subiters = subiters; + mi->subiters_len = mt->tables_len; + + iterator_from_merged_iter(it, mi); + ret = 0; + +out: + if (ret < 0) { + for (size_t i = 0; subiters && i < mt->tables_len; i++) { + reftable_iterator_destroy(&subiters[i].iter); + reftable_record_release(&subiters[i].rec); + } + reftable_free(subiters); + reftable_free(mi); + } + + return ret; +} + +int reftable_merged_table_init_ref_iterator(struct reftable_merged_table *mt, + struct reftable_iterator *it) +{ + return merged_table_init_iter(mt, it, REFTABLE_BLOCK_TYPE_REF); +} + +int reftable_merged_table_init_log_iterator(struct reftable_merged_table *mt, + struct reftable_iterator *it) +{ + return merged_table_init_iter(mt, it, REFTABLE_BLOCK_TYPE_LOG); +} + +enum reftable_hash reftable_merged_table_hash_id(struct reftable_merged_table *mt) +{ + return mt->hash_id; +} diff --git a/reftable/merged.h b/reftable/merged.h new file mode 100644 index 0000000000..4317e5f5f6 --- /dev/null +++ b/reftable/merged.h @@ -0,0 +1,34 @@ +/* + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file or at + * https://developers.google.com/open-source/licenses/bsd + */ + +#ifndef MERGED_H +#define MERGED_H + +#include "system.h" +#include "reftable-basics.h" + +struct reftable_merged_table { + struct reftable_table **tables; + size_t tables_len; + enum reftable_hash hash_id; + + /* If unset, produce deletions. This is useful for compaction. For the + * full stack, deletions should be produced. */ + int suppress_deletions; + + uint64_t min; + uint64_t max; +}; + +struct reftable_iterator; + +int merged_table_init_iter(struct reftable_merged_table *mt, + struct reftable_iterator *it, + uint8_t typ); + +#endif diff --git a/reftable/pq.c b/reftable/pq.c new file mode 100644 index 0000000000..9a79f5c5ee --- /dev/null +++ b/reftable/pq.c @@ -0,0 +1,95 @@ +/* + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file or at + * https://developers.google.com/open-source/licenses/bsd + */ + +#include "pq.h" + +#include "reftable-error.h" +#include "reftable-record.h" +#include "system.h" +#include "basics.h" + +int pq_less(struct pq_entry *a, struct pq_entry *b) +{ + int cmp, err; + + err = reftable_record_cmp(a->rec, b->rec, &cmp); + if (err < 0) + return err; + + if (cmp == 0) + return a->index > b->index; + return cmp < 0; +} + +int merged_iter_pqueue_remove(struct merged_iter_pqueue *pq, struct pq_entry *out) +{ + size_t i = 0; + struct pq_entry e = pq->heap[0]; + pq->heap[0] = pq->heap[pq->len - 1]; + pq->len--; + + while (i < pq->len) { + size_t min = i; + size_t j = 2 * i + 1; + size_t k = 2 * i + 2; + int cmp; + + if (j < pq->len) { + cmp = pq_less(&pq->heap[j], &pq->heap[i]); + if (cmp < 0) + return -1; + else if (cmp) + min = j; + } + + if (k < pq->len) { + cmp = pq_less(&pq->heap[k], &pq->heap[min]); + if (cmp < 0) + return -1; + else if (cmp) + min = k; + } + + if (min == i) + break; + REFTABLE_SWAP(pq->heap[i], pq->heap[min]); + i = min; + } + + if (out) + *out = e; + + return 0; +} + +int merged_iter_pqueue_add(struct merged_iter_pqueue *pq, const struct pq_entry *e) +{ + size_t i = 0; + + REFTABLE_ALLOC_GROW_OR_NULL(pq->heap, pq->len + 1, pq->cap); + if (!pq->heap) + return REFTABLE_OUT_OF_MEMORY_ERROR; + pq->heap[pq->len++] = *e; + + i = pq->len - 1; + while (i > 0) { + size_t j = (i - 1) / 2; + if (pq_less(&pq->heap[j], &pq->heap[i])) + break; + REFTABLE_SWAP(pq->heap[j], pq->heap[i]); + i = j; + } + + return 0; +} + +void merged_iter_pqueue_release(struct merged_iter_pqueue *pq) +{ + REFTABLE_FREE_AND_NULL(pq->heap); + memset(pq, 0, sizeof(*pq)); +} diff --git a/reftable/pq.h b/reftable/pq.h new file mode 100644 index 0000000000..42310670b0 --- /dev/null +++ b/reftable/pq.h @@ -0,0 +1,40 @@ +/* + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file or at + * https://developers.google.com/open-source/licenses/bsd + */ + +#ifndef PQ_H +#define PQ_H + +#include "record.h" + +struct pq_entry { + size_t index; + struct reftable_record *rec; +}; + +struct merged_iter_pqueue { + struct pq_entry *heap; + size_t len; + size_t cap; +}; + +int merged_iter_pqueue_remove(struct merged_iter_pqueue *pq, struct pq_entry *out); +int merged_iter_pqueue_add(struct merged_iter_pqueue *pq, const struct pq_entry *e); +void merged_iter_pqueue_release(struct merged_iter_pqueue *pq); +int pq_less(struct pq_entry *a, struct pq_entry *b); + +static inline struct pq_entry merged_iter_pqueue_top(struct merged_iter_pqueue pq) +{ + return pq.heap[0]; +} + +static inline int merged_iter_pqueue_is_empty(struct merged_iter_pqueue pq) +{ + return pq.len == 0; +} + +#endif diff --git a/reftable/record.c b/reftable/record.c new file mode 100644 index 0000000000..fcd387ba5d --- /dev/null +++ b/reftable/record.c @@ -0,0 +1,1322 @@ +/* + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file or at + * https://developers.google.com/open-source/licenses/bsd + */ + +/* record.c - methods for different types of records. */ + +#include "record.h" + +#include "system.h" +#include "constants.h" +#include "reftable-error.h" +#include "basics.h" + +static struct reftable_record_vtable * +reftable_record_vtable(struct reftable_record *rec); +static void *reftable_record_data(struct reftable_record *rec); + +int get_var_int(uint64_t *dest, struct string_view *in) +{ + const unsigned char *buf = in->buf; + unsigned char c; + uint64_t val; + + if (!in->len) + return -1; + c = *buf++; + val = c & 0x7f; + + while (c & 0x80) { + /* + * We use a micro-optimization here: whenever we see that the + * 0x80 bit is set, we know that the remainder of the value + * cannot be 0. The zero-values thus doesn't need to be encoded + * at all, which is why we subtract 1 when encoding and add 1 + * when decoding. + * + * This allows us to save a byte in some edge cases. + */ + val += 1; + if (!val || (val & (uint64_t)(~0ULL << (64 - 7)))) + return -1; /* overflow */ + if (buf >= in->buf + in->len) + return -1; + c = *buf++; + val = (val << 7) + (c & 0x7f); + } + + *dest = val; + return buf - in->buf; +} + +int put_var_int(struct string_view *dest, uint64_t value) +{ + unsigned char varint[10]; + unsigned pos = sizeof(varint) - 1; + varint[pos] = value & 0x7f; + while (value >>= 7) + varint[--pos] = 0x80 | (--value & 0x7f); + if (dest->len < sizeof(varint) - pos) + return REFTABLE_ENTRY_TOO_BIG_ERROR; + memcpy(dest->buf, varint + pos, sizeof(varint) - pos); + return sizeof(varint) - pos; +} + +int reftable_is_block_type(uint8_t typ) +{ + switch (typ) { + case REFTABLE_BLOCK_TYPE_REF: + case REFTABLE_BLOCK_TYPE_LOG: + case REFTABLE_BLOCK_TYPE_OBJ: + case REFTABLE_BLOCK_TYPE_INDEX: + return 1; + } + return 0; +} + +const unsigned char *reftable_ref_record_val1(const struct reftable_ref_record *rec) +{ + switch (rec->value_type) { + case REFTABLE_REF_VAL1: + return rec->value.val1; + case REFTABLE_REF_VAL2: + return rec->value.val2.value; + default: + return NULL; + } +} + +const unsigned char *reftable_ref_record_val2(const struct reftable_ref_record *rec) +{ + switch (rec->value_type) { + case REFTABLE_REF_VAL2: + return rec->value.val2.target_value; + default: + return NULL; + } +} + +static int decode_string(struct reftable_buf *dest, struct string_view in) +{ + int start_len = in.len; + uint64_t tsize = 0; + int n, err; + + n = get_var_int(&tsize, &in); + if (n <= 0) + return -1; + string_view_consume(&in, n); + if (in.len < tsize) + return -1; + + reftable_buf_reset(dest); + err = reftable_buf_add(dest, in.buf, tsize); + if (err < 0) + return err; + + string_view_consume(&in, tsize); + + return start_len - in.len; +} + +static int encode_string(const char *str, struct string_view s) +{ + struct string_view start = s; + size_t l = strlen(str); + int n = put_var_int(&s, l); + if (n < 0) + return n; + string_view_consume(&s, n); + if (s.len < l) + return REFTABLE_ENTRY_TOO_BIG_ERROR; + memcpy(s.buf, str, l); + string_view_consume(&s, l); + + return start.len - s.len; +} + +int reftable_encode_key(int *restart, struct string_view dest, + struct reftable_buf prev_key, struct reftable_buf key, + uint8_t extra) +{ + struct string_view start = dest; + size_t prefix_len = common_prefix_size(&prev_key, &key); + uint64_t suffix_len = key.len - prefix_len; + int n = put_var_int(&dest, prefix_len); + if (n < 0) + return n; + string_view_consume(&dest, n); + + *restart = (prefix_len == 0); + + n = put_var_int(&dest, suffix_len << 3 | (uint64_t)extra); + if (n < 0) + return n; + string_view_consume(&dest, n); + + if (dest.len < suffix_len) + return REFTABLE_ENTRY_TOO_BIG_ERROR; + memcpy(dest.buf, key.buf + prefix_len, suffix_len); + string_view_consume(&dest, suffix_len); + + return start.len - dest.len; +} + +int reftable_decode_keylen(struct string_view in, + uint64_t *prefix_len, + uint64_t *suffix_len, + uint8_t *extra) +{ + size_t start_len = in.len; + int n; + + n = get_var_int(prefix_len, &in); + if (n < 0) + return -1; + string_view_consume(&in, n); + + n = get_var_int(suffix_len, &in); + if (n <= 0) + return -1; + string_view_consume(&in, n); + + *extra = (uint8_t)(*suffix_len & 0x7); + *suffix_len >>= 3; + + return start_len - in.len; +} + +int reftable_decode_key(struct reftable_buf *last_key, uint8_t *extra, + struct string_view in) +{ + int start_len = in.len; + uint64_t prefix_len = 0; + uint64_t suffix_len = 0; + int err, n; + + n = reftable_decode_keylen(in, &prefix_len, &suffix_len, extra); + if (n < 0) + return -1; + string_view_consume(&in, n); + + if (in.len < suffix_len || + prefix_len > last_key->len) + return -1; + + err = reftable_buf_setlen(last_key, prefix_len); + if (err < 0) + return err; + + err = reftable_buf_add(last_key, in.buf, suffix_len); + if (err < 0) + return err; + + string_view_consume(&in, suffix_len); + + return start_len - in.len; +} + +static int reftable_ref_record_key(const void *r, struct reftable_buf *dest) +{ + const struct reftable_ref_record *rec = + (const struct reftable_ref_record *)r; + reftable_buf_reset(dest); + return reftable_buf_addstr(dest, rec->refname); +} + +static int reftable_ref_record_copy_from(void *rec, const void *src_rec, + uint32_t hash_size) +{ + struct reftable_ref_record *ref = rec; + const struct reftable_ref_record *src = src_rec; + char *refname = NULL; + size_t refname_cap = 0; + int err; + + REFTABLE_SWAP(refname, ref->refname); + REFTABLE_SWAP(refname_cap, ref->refname_cap); + reftable_ref_record_release(ref); + REFTABLE_SWAP(ref->refname, refname); + REFTABLE_SWAP(ref->refname_cap, refname_cap); + + if (src->refname) { + size_t refname_len = strlen(src->refname); + + REFTABLE_ALLOC_GROW_OR_NULL(ref->refname, refname_len + 1, + ref->refname_cap); + if (!ref->refname) { + err = REFTABLE_OUT_OF_MEMORY_ERROR; + goto out; + } + + memcpy(ref->refname, src->refname, refname_len); + ref->refname[refname_len] = 0; + } + + ref->update_index = src->update_index; + ref->value_type = src->value_type; + switch (src->value_type) { + case REFTABLE_REF_DELETION: + break; + case REFTABLE_REF_VAL1: + memcpy(ref->value.val1, src->value.val1, hash_size); + break; + case REFTABLE_REF_VAL2: + memcpy(ref->value.val2.value, src->value.val2.value, hash_size); + memcpy(ref->value.val2.target_value, + src->value.val2.target_value, hash_size); + break; + case REFTABLE_REF_SYMREF: + ref->value.symref = reftable_strdup(src->value.symref); + if (!ref->value.symref) { + err = REFTABLE_OUT_OF_MEMORY_ERROR; + goto out; + } + break; + } + + err = 0; +out: + return err; +} + +static void reftable_ref_record_release_void(void *rec) +{ + reftable_ref_record_release(rec); +} + +void reftable_ref_record_release(struct reftable_ref_record *ref) +{ + switch (ref->value_type) { + case REFTABLE_REF_SYMREF: + reftable_free(ref->value.symref); + break; + case REFTABLE_REF_VAL2: + break; + case REFTABLE_REF_VAL1: + break; + case REFTABLE_REF_DELETION: + break; + default: + abort(); + } + + reftable_free(ref->refname); + memset(ref, 0, sizeof(struct reftable_ref_record)); +} + +static uint8_t reftable_ref_record_val_type(const void *rec) +{ + const struct reftable_ref_record *r = + (const struct reftable_ref_record *)rec; + return r->value_type; +} + +static int reftable_ref_record_encode(const void *rec, struct string_view s, + uint32_t hash_size) +{ + const struct reftable_ref_record *r = + (const struct reftable_ref_record *)rec; + struct string_view start = s; + int n = put_var_int(&s, r->update_index); + if (n < 0) + return n; + string_view_consume(&s, n); + + switch (r->value_type) { + case REFTABLE_REF_SYMREF: + n = encode_string(r->value.symref, s); + if (n < 0) + return n; + string_view_consume(&s, n); + break; + case REFTABLE_REF_VAL2: + if (s.len < 2 * hash_size) + return REFTABLE_ENTRY_TOO_BIG_ERROR; + memcpy(s.buf, r->value.val2.value, hash_size); + string_view_consume(&s, hash_size); + memcpy(s.buf, r->value.val2.target_value, hash_size); + string_view_consume(&s, hash_size); + break; + case REFTABLE_REF_VAL1: + if (s.len < hash_size) + return REFTABLE_ENTRY_TOO_BIG_ERROR; + memcpy(s.buf, r->value.val1, hash_size); + string_view_consume(&s, hash_size); + break; + case REFTABLE_REF_DELETION: + break; + default: + abort(); + } + + return start.len - s.len; +} + +static int reftable_ref_record_decode(void *rec, struct reftable_buf key, + uint8_t val_type, struct string_view in, + uint32_t hash_size, struct reftable_buf *scratch) +{ + struct reftable_ref_record *r = rec; + struct string_view start = in; + uint64_t update_index = 0; + const char *refname = NULL; + size_t refname_cap = 0; + int n, err; + + n = get_var_int(&update_index, &in); + if (n < 0) + return n; + string_view_consume(&in, n); + + REFTABLE_SWAP(refname, r->refname); + REFTABLE_SWAP(refname_cap, r->refname_cap); + reftable_ref_record_release(r); + REFTABLE_SWAP(r->refname, refname); + REFTABLE_SWAP(r->refname_cap, refname_cap); + + REFTABLE_ALLOC_GROW_OR_NULL(r->refname, key.len + 1, r->refname_cap); + if (!r->refname) { + err = REFTABLE_OUT_OF_MEMORY_ERROR; + goto done; + } + memcpy(r->refname, key.buf, key.len); + r->refname[key.len] = 0; + + r->update_index = update_index; + r->value_type = val_type; + switch (val_type) { + case REFTABLE_REF_VAL1: + if (in.len < hash_size) { + err = REFTABLE_FORMAT_ERROR; + goto done; + } + + memcpy(r->value.val1, in.buf, hash_size); + string_view_consume(&in, hash_size); + break; + + case REFTABLE_REF_VAL2: + if (in.len < 2 * hash_size) { + err = REFTABLE_FORMAT_ERROR; + goto done; + } + + memcpy(r->value.val2.value, in.buf, hash_size); + string_view_consume(&in, hash_size); + + memcpy(r->value.val2.target_value, in.buf, hash_size); + string_view_consume(&in, hash_size); + break; + + case REFTABLE_REF_SYMREF: { + int n = decode_string(scratch, in); + if (n < 0) { + err = REFTABLE_FORMAT_ERROR; + goto done; + } + string_view_consume(&in, n); + r->value.symref = reftable_buf_detach(scratch); + } break; + + case REFTABLE_REF_DELETION: + break; + default: + abort(); + break; + } + + return start.len - in.len; + +done: + return err; +} + +static int reftable_ref_record_is_deletion_void(const void *p) +{ + return reftable_ref_record_is_deletion( + (const struct reftable_ref_record *)p); +} + +static int reftable_ref_record_equal_void(const void *a, + const void *b, uint32_t hash_size) +{ + struct reftable_ref_record *ra = (struct reftable_ref_record *) a; + struct reftable_ref_record *rb = (struct reftable_ref_record *) b; + return reftable_ref_record_equal(ra, rb, hash_size); +} + +static int reftable_ref_record_cmp_void(const void *_a, const void *_b) +{ + const struct reftable_ref_record *a = _a; + const struct reftable_ref_record *b = _b; + return strcmp(a->refname, b->refname); +} + +static struct reftable_record_vtable reftable_ref_record_vtable = { + .key = &reftable_ref_record_key, + .type = REFTABLE_BLOCK_TYPE_REF, + .copy_from = &reftable_ref_record_copy_from, + .val_type = &reftable_ref_record_val_type, + .encode = &reftable_ref_record_encode, + .decode = &reftable_ref_record_decode, + .release = &reftable_ref_record_release_void, + .is_deletion = &reftable_ref_record_is_deletion_void, + .equal = &reftable_ref_record_equal_void, + .cmp = &reftable_ref_record_cmp_void, +}; + +static int reftable_obj_record_key(const void *r, struct reftable_buf *dest) +{ + const struct reftable_obj_record *rec = + (const struct reftable_obj_record *)r; + reftable_buf_reset(dest); + return reftable_buf_add(dest, rec->hash_prefix, rec->hash_prefix_len); +} + +static void reftable_obj_record_release(void *rec) +{ + struct reftable_obj_record *obj = rec; + REFTABLE_FREE_AND_NULL(obj->hash_prefix); + REFTABLE_FREE_AND_NULL(obj->offsets); + memset(obj, 0, sizeof(struct reftable_obj_record)); +} + +static int reftable_obj_record_copy_from(void *rec, const void *src_rec, + uint32_t hash_size REFTABLE_UNUSED) +{ + struct reftable_obj_record *obj = rec; + const struct reftable_obj_record *src = src_rec; + + reftable_obj_record_release(obj); + + REFTABLE_ALLOC_ARRAY(obj->hash_prefix, src->hash_prefix_len); + if (!obj->hash_prefix) + return REFTABLE_OUT_OF_MEMORY_ERROR; + obj->hash_prefix_len = src->hash_prefix_len; + if (src->hash_prefix_len) + memcpy(obj->hash_prefix, src->hash_prefix, obj->hash_prefix_len); + + if (src->offset_len) { + if (sizeof(*src->offsets) > SIZE_MAX / src->offset_len) + return REFTABLE_OUT_OF_MEMORY_ERROR; + + REFTABLE_ALLOC_ARRAY(obj->offsets, src->offset_len); + if (!obj->offsets) + return REFTABLE_OUT_OF_MEMORY_ERROR; + + memcpy(obj->offsets, src->offsets, sizeof(*src->offsets) * src->offset_len); + obj->offset_len = src->offset_len; + } + + return 0; +} + +static uint8_t reftable_obj_record_val_type(const void *rec) +{ + const struct reftable_obj_record *r = rec; + if (r->offset_len > 0 && r->offset_len < 8) + return r->offset_len; + return 0; +} + +static int reftable_obj_record_encode(const void *rec, struct string_view s, + uint32_t hash_size REFTABLE_UNUSED) +{ + const struct reftable_obj_record *r = rec; + struct string_view start = s; + int i = 0; + int n = 0; + uint64_t last = 0; + if (r->offset_len == 0 || r->offset_len >= 8) { + n = put_var_int(&s, r->offset_len); + if (n < 0) + return n; + string_view_consume(&s, n); + } + if (r->offset_len == 0) + return start.len - s.len; + n = put_var_int(&s, r->offsets[0]); + if (n < 0) + return n; + string_view_consume(&s, n); + + last = r->offsets[0]; + for (i = 1; i < r->offset_len; i++) { + int n = put_var_int(&s, r->offsets[i] - last); + if (n < 0) + return n; + string_view_consume(&s, n); + last = r->offsets[i]; + } + return start.len - s.len; +} + +static int reftable_obj_record_decode(void *rec, struct reftable_buf key, + uint8_t val_type, struct string_view in, + uint32_t hash_size REFTABLE_UNUSED, + struct reftable_buf *scratch REFTABLE_UNUSED) +{ + struct string_view start = in; + struct reftable_obj_record *r = rec; + uint64_t count = val_type; + int n = 0; + uint64_t last; + + reftable_obj_record_release(r); + + REFTABLE_ALLOC_ARRAY(r->hash_prefix, key.len); + if (!r->hash_prefix) + return REFTABLE_OUT_OF_MEMORY_ERROR; + memcpy(r->hash_prefix, key.buf, key.len); + r->hash_prefix_len = key.len; + + if (val_type == 0) { + n = get_var_int(&count, &in); + if (n < 0) { + return n; + } + + string_view_consume(&in, n); + } + + r->offsets = NULL; + r->offset_len = 0; + if (count == 0) + return start.len - in.len; + + REFTABLE_ALLOC_ARRAY(r->offsets, count); + if (!r->offsets) + return REFTABLE_OUT_OF_MEMORY_ERROR; + r->offset_len = count; + + n = get_var_int(&r->offsets[0], &in); + if (n < 0) + return n; + string_view_consume(&in, n); + + last = r->offsets[0]; + for (uint64_t j = 1; j < count; j++) { + uint64_t delta = 0; + int n = get_var_int(&delta, &in); + if (n < 0) { + return n; + } + string_view_consume(&in, n); + + last = r->offsets[j] = (delta + last); + } + return start.len - in.len; +} + +static int not_a_deletion(const void *p REFTABLE_UNUSED) +{ + return 0; +} + +static int reftable_obj_record_equal_void(const void *a, const void *b, + uint32_t hash_size REFTABLE_UNUSED) +{ + struct reftable_obj_record *ra = (struct reftable_obj_record *) a; + struct reftable_obj_record *rb = (struct reftable_obj_record *) b; + + if (ra->hash_prefix_len != rb->hash_prefix_len + || ra->offset_len != rb->offset_len) + return 0; + + if (ra->hash_prefix_len && + memcmp(ra->hash_prefix, rb->hash_prefix, ra->hash_prefix_len)) + return 0; + if (ra->offset_len && + memcmp(ra->offsets, rb->offsets, ra->offset_len * sizeof(uint64_t))) + return 0; + + return 1; +} + +static int reftable_obj_record_cmp_void(const void *_a, const void *_b) +{ + const struct reftable_obj_record *a = _a; + const struct reftable_obj_record *b = _b; + int cmp; + + cmp = memcmp(a->hash_prefix, b->hash_prefix, + a->hash_prefix_len > b->hash_prefix_len ? + a->hash_prefix_len : b->hash_prefix_len); + if (cmp) + return cmp; + + /* + * When the prefix is the same then the object record that is longer is + * considered to be bigger. + */ + return a->hash_prefix_len - b->hash_prefix_len; +} + +static struct reftable_record_vtable reftable_obj_record_vtable = { + .key = &reftable_obj_record_key, + .type = REFTABLE_BLOCK_TYPE_OBJ, + .copy_from = &reftable_obj_record_copy_from, + .val_type = &reftable_obj_record_val_type, + .encode = &reftable_obj_record_encode, + .decode = &reftable_obj_record_decode, + .release = &reftable_obj_record_release, + .is_deletion = ¬_a_deletion, + .equal = &reftable_obj_record_equal_void, + .cmp = &reftable_obj_record_cmp_void, +}; + +static int reftable_log_record_key(const void *r, struct reftable_buf *dest) +{ + const struct reftable_log_record *rec = + (const struct reftable_log_record *)r; + int len = strlen(rec->refname), err; + uint8_t i64[8]; + uint64_t ts = 0; + + reftable_buf_reset(dest); + err = reftable_buf_add(dest, (uint8_t *)rec->refname, len + 1); + if (err < 0) + return err; + + ts = (~ts) - rec->update_index; + reftable_put_be64(&i64[0], ts); + + err = reftable_buf_add(dest, i64, sizeof(i64)); + if (err < 0) + return err; + + return 0; +} + +static int reftable_log_record_copy_from(void *rec, const void *src_rec, + uint32_t hash_size) +{ + struct reftable_log_record *dst = rec; + const struct reftable_log_record *src = + (const struct reftable_log_record *)src_rec; + int ret; + + reftable_log_record_release(dst); + *dst = *src; + + if (dst->refname) { + dst->refname = reftable_strdup(dst->refname); + if (!dst->refname) { + ret = REFTABLE_OUT_OF_MEMORY_ERROR; + goto out; + } + } + + switch (dst->value_type) { + case REFTABLE_LOG_DELETION: + break; + case REFTABLE_LOG_UPDATE: + if (dst->value.update.email) + dst->value.update.email = + reftable_strdup(dst->value.update.email); + if (dst->value.update.name) + dst->value.update.name = + reftable_strdup(dst->value.update.name); + if (dst->value.update.message) + dst->value.update.message = + reftable_strdup(dst->value.update.message); + + if (!dst->value.update.email || + !dst->value.update.name || + !dst->value.update.message) { + ret = REFTABLE_OUT_OF_MEMORY_ERROR; + goto out; + } + + memcpy(dst->value.update.new_hash, + src->value.update.new_hash, hash_size); + memcpy(dst->value.update.old_hash, + src->value.update.old_hash, hash_size); + break; + } + + ret = 0; +out: + return ret; +} + +static void reftable_log_record_release_void(void *rec) +{ + struct reftable_log_record *r = rec; + reftable_log_record_release(r); +} + +void reftable_log_record_release(struct reftable_log_record *r) +{ + reftable_free(r->refname); + switch (r->value_type) { + case REFTABLE_LOG_DELETION: + break; + case REFTABLE_LOG_UPDATE: + reftable_free(r->value.update.name); + reftable_free(r->value.update.email); + reftable_free(r->value.update.message); + break; + } + memset(r, 0, sizeof(struct reftable_log_record)); +} + +static uint8_t reftable_log_record_val_type(const void *rec) +{ + const struct reftable_log_record *log = + (const struct reftable_log_record *)rec; + + return reftable_log_record_is_deletion(log) ? 0 : 1; +} + +static int reftable_log_record_encode(const void *rec, struct string_view s, + uint32_t hash_size) +{ + const struct reftable_log_record *r = rec; + struct string_view start = s; + int n = 0; + if (reftable_log_record_is_deletion(r)) + return 0; + + if (s.len < 2 * hash_size) + return REFTABLE_ENTRY_TOO_BIG_ERROR; + + memcpy(s.buf, r->value.update.old_hash, hash_size); + memcpy(s.buf + hash_size, r->value.update.new_hash, hash_size); + string_view_consume(&s, 2 * hash_size); + + n = encode_string(r->value.update.name ? r->value.update.name : "", s); + if (n < 0) + return n; + string_view_consume(&s, n); + + n = encode_string(r->value.update.email ? r->value.update.email : "", + s); + if (n < 0) + return n; + string_view_consume(&s, n); + + n = put_var_int(&s, r->value.update.time); + if (n < 0) + return n; + string_view_consume(&s, n); + + if (s.len < 2) + return REFTABLE_ENTRY_TOO_BIG_ERROR; + + reftable_put_be16(s.buf, r->value.update.tz_offset); + string_view_consume(&s, 2); + + n = encode_string( + r->value.update.message ? r->value.update.message : "", s); + if (n < 0) + return n; + string_view_consume(&s, n); + + return start.len - s.len; +} + +static int reftable_log_record_decode(void *rec, struct reftable_buf key, + uint8_t val_type, struct string_view in, + uint32_t hash_size, struct reftable_buf *scratch) +{ + struct string_view start = in; + struct reftable_log_record *r = rec; + uint64_t max = 0; + uint64_t ts = 0; + int err, n; + + if (key.len <= 9 || key.buf[key.len - 9] != 0) + return REFTABLE_FORMAT_ERROR; + + REFTABLE_ALLOC_GROW_OR_NULL(r->refname, key.len - 8, r->refname_cap); + if (!r->refname) { + err = REFTABLE_OUT_OF_MEMORY_ERROR; + goto done; + } + + memcpy(r->refname, key.buf, key.len - 8); + ts = reftable_get_be64((unsigned char *)key.buf + key.len - 8); + + r->update_index = (~max) - ts; + + if (val_type != r->value_type) { + switch (r->value_type) { + case REFTABLE_LOG_UPDATE: + REFTABLE_FREE_AND_NULL(r->value.update.message); + r->value.update.message_cap = 0; + REFTABLE_FREE_AND_NULL(r->value.update.email); + REFTABLE_FREE_AND_NULL(r->value.update.name); + break; + case REFTABLE_LOG_DELETION: + break; + } + } + + r->value_type = val_type; + if (val_type == REFTABLE_LOG_DELETION) + return 0; + + if (in.len < 2 * hash_size) { + err = REFTABLE_FORMAT_ERROR; + goto done; + } + + memcpy(r->value.update.old_hash, in.buf, hash_size); + memcpy(r->value.update.new_hash, in.buf + hash_size, hash_size); + + string_view_consume(&in, 2 * hash_size); + + n = decode_string(scratch, in); + if (n < 0) { + err = REFTABLE_FORMAT_ERROR; + goto done; + } + string_view_consume(&in, n); + + /* + * In almost all cases we can expect the reflog name to not change for + * reflog entries as they are tied to the local identity, not to the + * target commits. As an optimization for this common case we can thus + * skip copying over the name in case it's accurate already. + */ + if (!r->value.update.name || + strcmp(r->value.update.name, scratch->buf)) { + char *name = reftable_realloc(r->value.update.name, scratch->len + 1); + if (!name) { + err = REFTABLE_OUT_OF_MEMORY_ERROR; + goto done; + } + + r->value.update.name = name; + memcpy(r->value.update.name, scratch->buf, scratch->len); + r->value.update.name[scratch->len] = 0; + } + + n = decode_string(scratch, in); + if (n < 0) { + err = REFTABLE_FORMAT_ERROR; + goto done; + } + string_view_consume(&in, n); + + /* Same as above, but for the reflog email. */ + if (!r->value.update.email || + strcmp(r->value.update.email, scratch->buf)) { + char *email = reftable_realloc(r->value.update.email, scratch->len + 1); + if (!email) { + err = REFTABLE_OUT_OF_MEMORY_ERROR; + goto done; + } + + r->value.update.email = email; + memcpy(r->value.update.email, scratch->buf, scratch->len); + r->value.update.email[scratch->len] = 0; + } + + ts = 0; + n = get_var_int(&ts, &in); + if (n < 0) { + err = REFTABLE_FORMAT_ERROR; + goto done; + } + string_view_consume(&in, n); + r->value.update.time = ts; + if (in.len < 2) { + err = REFTABLE_FORMAT_ERROR; + goto done; + } + + r->value.update.tz_offset = reftable_get_be16(in.buf); + string_view_consume(&in, 2); + + n = decode_string(scratch, in); + if (n < 0) { + err = REFTABLE_FORMAT_ERROR; + goto done; + } + string_view_consume(&in, n); + + REFTABLE_ALLOC_GROW_OR_NULL(r->value.update.message, scratch->len + 1, + r->value.update.message_cap); + if (!r->value.update.message) { + err = REFTABLE_OUT_OF_MEMORY_ERROR; + goto done; + } + + memcpy(r->value.update.message, scratch->buf, scratch->len); + r->value.update.message[scratch->len] = 0; + + return start.len - in.len; + +done: + return err; +} + +static int null_streq(const char *a, const char *b) +{ + const char *empty = ""; + if (!a) + a = empty; + + if (!b) + b = empty; + + return 0 == strcmp(a, b); +} + +static int reftable_log_record_equal_void(const void *a, + const void *b, uint32_t hash_size) +{ + return reftable_log_record_equal((struct reftable_log_record *) a, + (struct reftable_log_record *) b, + hash_size); +} + +static int reftable_log_record_cmp_void(const void *_a, const void *_b) +{ + const struct reftable_log_record *a = _a; + const struct reftable_log_record *b = _b; + int cmp = strcmp(a->refname, b->refname); + if (cmp) + return cmp; + + /* + * Note that the comparison here is reversed. This is because the + * update index is reversed when comparing keys. For reference, see how + * we handle this in reftable_log_record_key()`. + */ + return b->update_index - a->update_index; +} + +int reftable_log_record_equal(const struct reftable_log_record *a, + const struct reftable_log_record *b, uint32_t hash_size) +{ + if (!(null_streq(a->refname, b->refname) && + a->update_index == b->update_index && + a->value_type == b->value_type)) + return 0; + + switch (a->value_type) { + case REFTABLE_LOG_DELETION: + return 1; + case REFTABLE_LOG_UPDATE: + return null_streq(a->value.update.name, b->value.update.name) && + a->value.update.time == b->value.update.time && + a->value.update.tz_offset == b->value.update.tz_offset && + null_streq(a->value.update.email, + b->value.update.email) && + null_streq(a->value.update.message, + b->value.update.message) && + !memcmp(a->value.update.old_hash, + b->value.update.old_hash, hash_size) && + !memcmp(a->value.update.new_hash, + b->value.update.new_hash, hash_size); + } + + abort(); +} + +static int reftable_log_record_is_deletion_void(const void *p) +{ + return reftable_log_record_is_deletion( + (const struct reftable_log_record *)p); +} + +static struct reftable_record_vtable reftable_log_record_vtable = { + .key = &reftable_log_record_key, + .type = REFTABLE_BLOCK_TYPE_LOG, + .copy_from = &reftable_log_record_copy_from, + .val_type = &reftable_log_record_val_type, + .encode = &reftable_log_record_encode, + .decode = &reftable_log_record_decode, + .release = &reftable_log_record_release_void, + .is_deletion = &reftable_log_record_is_deletion_void, + .equal = &reftable_log_record_equal_void, + .cmp = &reftable_log_record_cmp_void, +}; + +static int reftable_index_record_key(const void *r, struct reftable_buf *dest) +{ + const struct reftable_index_record *rec = r; + reftable_buf_reset(dest); + return reftable_buf_add(dest, rec->last_key.buf, rec->last_key.len); +} + +static int reftable_index_record_copy_from(void *rec, const void *src_rec, + uint32_t hash_size REFTABLE_UNUSED) +{ + struct reftable_index_record *dst = rec; + const struct reftable_index_record *src = src_rec; + int err; + + reftable_buf_reset(&dst->last_key); + err = reftable_buf_add(&dst->last_key, src->last_key.buf, src->last_key.len); + if (err < 0) + return err; + dst->offset = src->offset; + + return 0; +} + +static void reftable_index_record_release(void *rec) +{ + struct reftable_index_record *idx = rec; + reftable_buf_release(&idx->last_key); +} + +static uint8_t reftable_index_record_val_type(const void *rec REFTABLE_UNUSED) +{ + return 0; +} + +static int reftable_index_record_encode(const void *rec, struct string_view out, + uint32_t hash_size REFTABLE_UNUSED) +{ + const struct reftable_index_record *r = + (const struct reftable_index_record *)rec; + struct string_view start = out; + + int n = put_var_int(&out, r->offset); + if (n < 0) + return n; + + string_view_consume(&out, n); + + return start.len - out.len; +} + +static int reftable_index_record_decode(void *rec, struct reftable_buf key, + uint8_t val_type REFTABLE_UNUSED, + struct string_view in, + uint32_t hash_size REFTABLE_UNUSED, + struct reftable_buf *scratch REFTABLE_UNUSED) +{ + struct string_view start = in; + struct reftable_index_record *r = rec; + int err, n = 0; + + reftable_buf_reset(&r->last_key); + err = reftable_buf_add(&r->last_key, key.buf, key.len); + if (err < 0) + return err; + + n = get_var_int(&r->offset, &in); + if (n < 0) + return n; + + string_view_consume(&in, n); + return start.len - in.len; +} + +static int reftable_index_record_equal(const void *a, const void *b, + uint32_t hash_size REFTABLE_UNUSED) +{ + struct reftable_index_record *ia = (struct reftable_index_record *) a; + struct reftable_index_record *ib = (struct reftable_index_record *) b; + + return ia->offset == ib->offset && !reftable_buf_cmp(&ia->last_key, &ib->last_key); +} + +static int reftable_index_record_cmp(const void *_a, const void *_b) +{ + const struct reftable_index_record *a = _a; + const struct reftable_index_record *b = _b; + return reftable_buf_cmp(&a->last_key, &b->last_key); +} + +static struct reftable_record_vtable reftable_index_record_vtable = { + .key = &reftable_index_record_key, + .type = REFTABLE_BLOCK_TYPE_INDEX, + .copy_from = &reftable_index_record_copy_from, + .val_type = &reftable_index_record_val_type, + .encode = &reftable_index_record_encode, + .decode = &reftable_index_record_decode, + .release = &reftable_index_record_release, + .is_deletion = ¬_a_deletion, + .equal = &reftable_index_record_equal, + .cmp = &reftable_index_record_cmp, +}; + +int reftable_record_key(struct reftable_record *rec, struct reftable_buf *dest) +{ + return reftable_record_vtable(rec)->key(reftable_record_data(rec), dest); +} + +int reftable_record_encode(struct reftable_record *rec, struct string_view dest, + uint32_t hash_size) +{ + return reftable_record_vtable(rec)->encode(reftable_record_data(rec), + dest, hash_size); +} + +int reftable_record_copy_from(struct reftable_record *rec, + struct reftable_record *src, uint32_t hash_size) +{ + assert(src->type == rec->type); + + return reftable_record_vtable(rec)->copy_from(reftable_record_data(rec), + reftable_record_data(src), + hash_size); +} + +uint8_t reftable_record_val_type(struct reftable_record *rec) +{ + return reftable_record_vtable(rec)->val_type(reftable_record_data(rec)); +} + +int reftable_record_decode(struct reftable_record *rec, struct reftable_buf key, + uint8_t extra, struct string_view src, uint32_t hash_size, + struct reftable_buf *scratch) +{ + return reftable_record_vtable(rec)->decode(reftable_record_data(rec), + key, extra, src, hash_size, + scratch); +} + +void reftable_record_release(struct reftable_record *rec) +{ + reftable_record_vtable(rec)->release(reftable_record_data(rec)); +} + +int reftable_record_is_deletion(struct reftable_record *rec) +{ + return reftable_record_vtable(rec)->is_deletion( + reftable_record_data(rec)); +} + +int reftable_record_cmp(struct reftable_record *a, struct reftable_record *b, + int *cmp) +{ + if (a->type != b->type) + return -1; + *cmp = reftable_record_vtable(a)->cmp(reftable_record_data(a), + reftable_record_data(b)); + return 0; +} + +int reftable_record_equal(struct reftable_record *a, struct reftable_record *b, uint32_t hash_size) +{ + if (a->type != b->type) + return 0; + return reftable_record_vtable(a)->equal( + reftable_record_data(a), reftable_record_data(b), hash_size); +} + +static int hash_equal(const unsigned char *a, const unsigned char *b, uint32_t hash_size) +{ + if (a && b) + return !memcmp(a, b, hash_size); + + return a == b; +} + +int reftable_ref_record_equal(const struct reftable_ref_record *a, + const struct reftable_ref_record *b, uint32_t hash_size) +{ + if (!null_streq(a->refname, b->refname)) + return 0; + + if (a->update_index != b->update_index || + a->value_type != b->value_type) + return 0; + + switch (a->value_type) { + case REFTABLE_REF_SYMREF: + return !strcmp(a->value.symref, b->value.symref); + case REFTABLE_REF_VAL2: + return hash_equal(a->value.val2.value, b->value.val2.value, + hash_size) && + hash_equal(a->value.val2.target_value, + b->value.val2.target_value, hash_size); + case REFTABLE_REF_VAL1: + return hash_equal(a->value.val1, b->value.val1, hash_size); + case REFTABLE_REF_DELETION: + return 1; + default: + abort(); + } +} + +int reftable_ref_record_compare_name(const void *a, const void *b) +{ + return strcmp(((struct reftable_ref_record *)a)->refname, + ((struct reftable_ref_record *)b)->refname); +} + +int reftable_ref_record_is_deletion(const struct reftable_ref_record *ref) +{ + return ref->value_type == REFTABLE_REF_DELETION; +} + +int reftable_log_record_compare_key(const void *a, const void *b) +{ + const struct reftable_log_record *la = a; + const struct reftable_log_record *lb = b; + + int cmp = strcmp(la->refname, lb->refname); + if (cmp) + return cmp; + if (la->update_index > lb->update_index) + return -1; + return (la->update_index < lb->update_index) ? 1 : 0; +} + +int reftable_log_record_is_deletion(const struct reftable_log_record *log) +{ + return (log->value_type == REFTABLE_LOG_DELETION); +} + +static void *reftable_record_data(struct reftable_record *rec) +{ + switch (rec->type) { + case REFTABLE_BLOCK_TYPE_REF: + return &rec->u.ref; + case REFTABLE_BLOCK_TYPE_LOG: + return &rec->u.log; + case REFTABLE_BLOCK_TYPE_INDEX: + return &rec->u.idx; + case REFTABLE_BLOCK_TYPE_OBJ: + return &rec->u.obj; + } + abort(); +} + +static struct reftable_record_vtable * +reftable_record_vtable(struct reftable_record *rec) +{ + switch (rec->type) { + case REFTABLE_BLOCK_TYPE_REF: + return &reftable_ref_record_vtable; + case REFTABLE_BLOCK_TYPE_LOG: + return &reftable_log_record_vtable; + case REFTABLE_BLOCK_TYPE_INDEX: + return &reftable_index_record_vtable; + case REFTABLE_BLOCK_TYPE_OBJ: + return &reftable_obj_record_vtable; + } + abort(); +} + +int reftable_record_init(struct reftable_record *rec, uint8_t typ) +{ + memset(rec, 0, sizeof(*rec)); + rec->type = typ; + + switch (typ) { + case REFTABLE_BLOCK_TYPE_REF: + case REFTABLE_BLOCK_TYPE_LOG: + case REFTABLE_BLOCK_TYPE_OBJ: + return 0; + case REFTABLE_BLOCK_TYPE_INDEX: + reftable_buf_init(&rec->u.idx.last_key); + return 0; + default: + return REFTABLE_API_ERROR; + } +} diff --git a/reftable/record.h b/reftable/record.h new file mode 100644 index 0000000000..7953f352a3 --- /dev/null +++ b/reftable/record.h @@ -0,0 +1,164 @@ +/* + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file or at + * https://developers.google.com/open-source/licenses/bsd + */ + +#ifndef RECORD_H +#define RECORD_H + +#include "basics.h" +#include "system.h" + +#include <stdint.h> + +#include "reftable-record.h" + +/* + * A substring of existing string data. This structure takes no responsibility + * for the lifetime of the data it points to. + */ +struct string_view { + uint8_t *buf; + size_t len; +}; + +/* Advance `s.buf` by `n`, and decrease length. */ +static inline void string_view_consume(struct string_view *s, int n) +{ + s->buf += n; + s->len -= n; +} + +/* + * Decode and encode a varint. Returns the number of bytes read/written, or a + * negative value in case encoding/decoding the varint has failed. + */ +int get_var_int(uint64_t *dest, struct string_view *in); +int put_var_int(struct string_view *dest, uint64_t val); + +/* Methods for records. */ +struct reftable_record_vtable { + /* encode the key of to a uint8_t reftable_buf. */ + int (*key)(const void *rec, struct reftable_buf *dest); + + /* The record type of ('r' for ref). */ + uint8_t type; + + int (*copy_from)(void *dest, const void *src, uint32_t hash_size); + + /* a value of [0..7], indicating record subvariants (eg. ref vs. symref + * vs ref deletion) */ + uint8_t (*val_type)(const void *rec); + + /* encodes rec into dest, returning how much space was used. */ + int (*encode)(const void *rec, struct string_view dest, uint32_t hash_size); + + /* decode data from `src` into the record. */ + int (*decode)(void *rec, struct reftable_buf key, uint8_t extra, + struct string_view src, uint32_t hash_size, + struct reftable_buf *scratch); + + /* deallocate and null the record. */ + void (*release)(void *rec); + + /* is this a tombstone? */ + int (*is_deletion)(const void *rec); + + /* Are two records equal? This assumes they have the same type. Returns 0 for non-equal. */ + int (*equal)(const void *a, const void *b, uint32_t hash_size); + + /* + * Compare keys of two records with each other. The records must have + * the same type. + */ + int (*cmp)(const void *a, const void *b); +}; + +/* returns true for recognized block types. Block start with the block type. */ +int reftable_is_block_type(uint8_t typ); + +/* Encode `key` into `dest`. Sets `is_restart` to indicate a restart. Returns + * number of bytes written. */ +int reftable_encode_key(int *is_restart, struct string_view dest, + struct reftable_buf prev_key, struct reftable_buf key, + uint8_t extra); + +/* Decode a record's key lengths. */ +int reftable_decode_keylen(struct string_view in, + uint64_t *prefix_len, + uint64_t *suffix_len, + uint8_t *extra); + +/* + * Decode into `last_key` and `extra` from `in`. `last_key` is expected to + * contain the decoded key of the preceding record, if any. + */ +int reftable_decode_key(struct reftable_buf *last_key, uint8_t *extra, + struct string_view in); + +/* reftable_index_record are used internally to speed up lookups. */ +struct reftable_index_record { + uint64_t offset; /* Offset of block */ + struct reftable_buf last_key; /* Last key of the block. */ +}; + +/* reftable_obj_record stores an object ID => ref mapping. */ +struct reftable_obj_record { + uint8_t *hash_prefix; /* leading bytes of the object ID */ + int hash_prefix_len; /* number of leading bytes. Constant + * across a single table. */ + uint64_t *offsets; /* a vector of file offsets. */ + int offset_len; +}; + +/* record is a generic wrapper for different types of records. It is normally + * created on the stack, or embedded within another struct. If the type is + * known, a fresh instance can be initialized explicitly. Otherwise, use + * `reftable_record_init()` to initialize generically (as the index_record is + * not valid as 0-initialized structure) + */ +struct reftable_record { + uint8_t type; + union { + struct reftable_ref_record ref; + struct reftable_log_record log; + struct reftable_obj_record obj; + struct reftable_index_record idx; + } u; +}; + +/* Initialize the reftable record for the given type. */ +int reftable_record_init(struct reftable_record *rec, uint8_t typ); + +/* see struct record_vtable */ +int reftable_record_cmp(struct reftable_record *a, struct reftable_record *b, int *cmp); +int reftable_record_equal(struct reftable_record *a, struct reftable_record *b, uint32_t hash_size); +int reftable_record_key(struct reftable_record *rec, struct reftable_buf *dest); +int reftable_record_copy_from(struct reftable_record *rec, + struct reftable_record *src, uint32_t hash_size); +uint8_t reftable_record_val_type(struct reftable_record *rec); +int reftable_record_encode(struct reftable_record *rec, struct string_view dest, + uint32_t hash_size); +int reftable_record_decode(struct reftable_record *rec, struct reftable_buf key, + uint8_t extra, struct string_view src, + uint32_t hash_size, struct reftable_buf *scratch); +int reftable_record_is_deletion(struct reftable_record *rec); + +static inline uint8_t reftable_record_type(struct reftable_record *rec) +{ + return rec->type; +} + +/* frees and zeroes out the embedded record */ +void reftable_record_release(struct reftable_record *rec); + +/* for qsort. */ +int reftable_ref_record_compare_name(const void *a, const void *b); + +/* for qsort. */ +int reftable_log_record_compare_key(const void *a, const void *b); + +#endif diff --git a/reftable/reftable-basics.h b/reftable/reftable-basics.h new file mode 100644 index 0000000000..6d73f19c85 --- /dev/null +++ b/reftable/reftable-basics.h @@ -0,0 +1,39 @@ +/* + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file or at + * https://developers.google.com/open-source/licenses/bsd + */ + +#ifndef REFTABLE_BASICS_H +#define REFTABLE_BASICS_H + +#include <stddef.h> + +/* A buffer that contains arbitrary byte slices. */ +struct reftable_buf { + size_t alloc; + size_t len; + char *buf; +}; +#define REFTABLE_BUF_INIT { 0 } + +/* + * Hash functions understood by the reftable library. Note that the values are + * arbitrary and somewhat random such that we can easily detect cases where the + * hash hasn't been properly set up. + */ +enum reftable_hash { + REFTABLE_HASH_SHA1 = 89, + REFTABLE_HASH_SHA256 = 247, +}; +#define REFTABLE_HASH_SIZE_SHA1 20 +#define REFTABLE_HASH_SIZE_SHA256 32 +#define REFTABLE_HASH_SIZE_MAX REFTABLE_HASH_SIZE_SHA256 + +/* Overrides the functions to use for memory management. */ +void reftable_set_alloc(void *(*malloc)(size_t), + void *(*realloc)(void *, size_t), void (*free)(void *)); + +#endif diff --git a/reftable/reftable-block.h b/reftable/reftable-block.h new file mode 100644 index 0000000000..0b05a8f7e3 --- /dev/null +++ b/reftable/reftable-block.h @@ -0,0 +1,75 @@ +/* + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file or at + * https://developers.google.com/open-source/licenses/bsd + */ + +#ifndef REFTABLE_BLOCK_H +#define REFTABLE_BLOCK_H + +#include <stdint.h> + +#include "reftable-basics.h" +#include "reftable-blocksource.h" +#include "reftable-iterator.h" + +struct z_stream_s; + +/* + * A block part of a reftable. Contains records as well as some metadata + * describing them. + */ +struct reftable_block { + /* + * Offset of the block header; nonzero for the first block in a + * reftable. + */ + uint32_t header_off; + + /* The memory block. */ + struct reftable_block_data block_data; + uint32_t hash_size; + + /* Uncompressed data for log entries. */ + struct z_stream_s *zstream; + unsigned char *uncompressed_data; + size_t uncompressed_cap; + + /* + * Restart point data. Restart points are located after the block's + * record data. + */ + uint16_t restart_count; + uint32_t restart_off; + + /* + * Size of the data in the file. For log blocks, this is the compressed + * size. + */ + uint32_t full_block_size; + uint8_t block_type; +}; + +/* Initialize a reftable block from the given block source. */ +int reftable_block_init(struct reftable_block *b, + struct reftable_block_source *source, + uint32_t offset, uint32_t header_size, + uint32_t table_block_size, uint32_t hash_size, + uint8_t want_type); + +/* Release resources allocated by the block. */ +void reftable_block_release(struct reftable_block *b); + +/* Initialize a generic record iterator from the given block. */ +int reftable_block_init_iterator(const struct reftable_block *b, + struct reftable_iterator *it); + +/* Returns the block type (eg. 'r' for refs). */ +uint8_t reftable_block_type(const struct reftable_block *b); + +/* Decodes the first key in the block. */ +int reftable_block_first_key(const struct reftable_block *b, struct reftable_buf *key); + +#endif /* REFTABLE_BLOCK_H */ diff --git a/reftable/reftable-blocksource.h b/reftable/reftable-blocksource.h new file mode 100644 index 0000000000..f5ba867bd6 --- /dev/null +++ b/reftable/reftable-blocksource.h @@ -0,0 +1,53 @@ +/* + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file or at + * https://developers.google.com/open-source/licenses/bsd + */ + +#ifndef REFTABLE_BLOCKSOURCE_H +#define REFTABLE_BLOCKSOURCE_H + +#include <stdint.h> + +/* + * Generic wrapper for a seekable readable file. + */ +struct reftable_block_source { + struct reftable_block_source_vtable *ops; + void *arg; +}; + +/* a contiguous segment of bytes. It keeps track of its generating block_source + * so it can return itself into the pool. */ +struct reftable_block_data { + uint8_t *data; + size_t len; + struct reftable_block_source source; +}; + +/* block_source_vtable are the operations that make up block_source */ +struct reftable_block_source_vtable { + /* Returns the size of a block source. */ + uint64_t (*size)(void *source); + + /* + * Reads a segment from the block source. It is an error to read beyond + * the end of the block. + */ + ssize_t (*read_data)(void *source, struct reftable_block_data *dest, + uint64_t off, uint32_t size); + + /* Mark the block as read; may release the data. */ + void (*release_data)(void *source, struct reftable_block_data *data); + + /* Release all resources associated with the block source. */ + void (*close)(void *source); +}; + +/* opens a file on the file system as a block_source */ +int reftable_block_source_from_file(struct reftable_block_source *block_src, + const char *name); + +#endif diff --git a/reftable/reftable-constants.h b/reftable/reftable-constants.h new file mode 100644 index 0000000000..4ae9ba4bac --- /dev/null +++ b/reftable/reftable-constants.h @@ -0,0 +1,18 @@ +/* + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file or at + * https://developers.google.com/open-source/licenses/bsd + */ + +#ifndef REFTABLE_CONSTANTS_H +#define REFTABLE_CONSTANTS_H + +#define REFTABLE_BLOCK_TYPE_LOG 'g' +#define REFTABLE_BLOCK_TYPE_INDEX 'i' +#define REFTABLE_BLOCK_TYPE_REF 'r' +#define REFTABLE_BLOCK_TYPE_OBJ 'o' +#define REFTABLE_BLOCK_TYPE_ANY 0 + +#endif /* REFTABLE_CONSTANTS_H */ diff --git a/reftable/reftable-error.h b/reftable/reftable-error.h new file mode 100644 index 0000000000..d100e0df92 --- /dev/null +++ b/reftable/reftable-error.h @@ -0,0 +1,70 @@ +/* + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file or at + * https://developers.google.com/open-source/licenses/bsd + */ + +#ifndef REFTABLE_ERROR_H +#define REFTABLE_ERROR_H + +/* + * Errors in reftable calls are signaled with negative integer return values. 0 + * means success. + */ +enum reftable_error { + /* Unexpected file system behavior */ + REFTABLE_IO_ERROR = -2, + + /* Format inconsistency on reading data */ + REFTABLE_FORMAT_ERROR = -3, + + /* File does not exist. Returned from block_source_from_file(), because + * it needs special handling in stack. + */ + REFTABLE_NOT_EXIST_ERROR = -4, + + /* Trying to access locked data. */ + REFTABLE_LOCK_ERROR = -5, + + /* Misuse of the API: + * - on writing a record with NULL refname. + * - on writing a record before setting the writer limits. + * - on writing a reftable_ref_record outside the table limits + * - on writing a ref or log record before the stack's + * next_update_inde*x + * - on writing a log record with multiline message with + * exact_log_message unset + * - on reading a reftable_ref_record from log iterator, or vice versa. + * + * When a call misuses the API, the internal state of the library is + * kept unchanged. + */ + REFTABLE_API_ERROR = -6, + + /* Decompression error */ + REFTABLE_ZLIB_ERROR = -7, + + /* Wrote a table without blocks. */ + REFTABLE_EMPTY_TABLE_ERROR = -8, + + /* Invalid ref name. */ + REFTABLE_REFNAME_ERROR = -10, + + /* Entry does not fit. This can happen when writing outsize reflog + messages. */ + REFTABLE_ENTRY_TOO_BIG_ERROR = -11, + + /* Trying to write out-of-date data. */ + REFTABLE_OUTDATED_ERROR = -12, + + /* An allocation has failed due to an out-of-memory situation. */ + REFTABLE_OUT_OF_MEMORY_ERROR = -13, +}; + +/* convert the numeric error code to a string. The string should not be + * deallocated. */ +const char *reftable_error_str(int err); + +#endif diff --git a/reftable/reftable-iterator.h b/reftable/reftable-iterator.h new file mode 100644 index 0000000000..af582028c2 --- /dev/null +++ b/reftable/reftable-iterator.h @@ -0,0 +1,60 @@ +/* + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file or at + * https://developers.google.com/open-source/licenses/bsd + */ + +#ifndef REFTABLE_ITERATOR_H +#define REFTABLE_ITERATOR_H + +#include "reftable-record.h" + +struct reftable_iterator_vtable; + +/* iterator is the generic interface for walking over data stored in a + * reftable. + */ +struct reftable_iterator { + struct reftable_iterator_vtable *ops; + void *iter_arg; +}; + +/* + * Position the iterator at the ref record with given name such that the next + * call to `next_ref()` would yield the record. + */ +int reftable_iterator_seek_ref(struct reftable_iterator *it, + const char *name); + +/* reads the next reftable_ref_record. Returns < 0 for error, 0 for OK and > 0: + * end of iteration. + */ +int reftable_iterator_next_ref(struct reftable_iterator *it, + struct reftable_ref_record *ref); + +/* + * Position the iterator at the log record with given name and update index + * such that the next call to `next_log()` would yield the record. + */ +int reftable_iterator_seek_log_at(struct reftable_iterator *it, + const char *name, uint64_t update_index); + +/* + * Position the iterator at the newest log record with given name such that the + * next call to `next_log()` would yield the record. + */ +int reftable_iterator_seek_log(struct reftable_iterator *it, + const char *name); + +/* reads the next reftable_log_record. Returns < 0 for error, 0 for OK and > 0: + * end of iteration. + */ +int reftable_iterator_next_log(struct reftable_iterator *it, + struct reftable_log_record *log); + +/* releases resources associated with an iterator. */ +void reftable_iterator_destroy(struct reftable_iterator *it); + +#endif diff --git a/reftable/reftable-merged.h b/reftable/reftable-merged.h new file mode 100644 index 0000000000..e5af846b32 --- /dev/null +++ b/reftable/reftable-merged.h @@ -0,0 +1,61 @@ +/* + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file or at + * https://developers.google.com/open-source/licenses/bsd + */ + +#ifndef REFTABLE_MERGED_H +#define REFTABLE_MERGED_H + +#include "reftable-iterator.h" + +/* + * Merged tables + * + * A ref database kept in a sequence of table files. The merged_table presents a + * unified view to reading (seeking, iterating) a sequence of immutable tables. + * + * The merged tables are on purpose kept disconnected from their actual storage + * (eg. files on disk), because it is useful to merge tables aren't files. For + * example, the per-workspace and global ref namespace can be implemented as a + * merged table of two stacks of file-backed reftables. + */ + +/* A merged table is implements seeking/iterating over a stack of tables. */ +struct reftable_merged_table; + +struct reftable_table; + +/* + * reftable_merged_table_new creates a new merged table. The tables must be + * kept alive as long as the merged table is still in use. + */ +int reftable_merged_table_new(struct reftable_merged_table **dest, + struct reftable_table **tables, size_t n, + enum reftable_hash hash_id); + +/* Initialize a merged table iterator for reading refs. */ +int reftable_merged_table_init_ref_iterator(struct reftable_merged_table *mt, + struct reftable_iterator *it); + +/* Initialize a merged table iterator for reading logs. */ +int reftable_merged_table_init_log_iterator(struct reftable_merged_table *mt, + struct reftable_iterator *it); + +/* returns the max update_index covered by this merged table. */ +uint64_t +reftable_merged_table_max_update_index(struct reftable_merged_table *mt); + +/* returns the min update_index covered by this merged table. */ +uint64_t +reftable_merged_table_min_update_index(struct reftable_merged_table *mt); + +/* releases memory for the merged_table */ +void reftable_merged_table_free(struct reftable_merged_table *m); + +/* return the hash ID of the merged table. */ +enum reftable_hash reftable_merged_table_hash_id(struct reftable_merged_table *m); + +#endif diff --git a/reftable/reftable-record.h b/reftable/reftable-record.h new file mode 100644 index 0000000000..385a74cc86 --- /dev/null +++ b/reftable/reftable-record.h @@ -0,0 +1,110 @@ +/* + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file or at + * https://developers.google.com/open-source/licenses/bsd + */ + +#ifndef REFTABLE_RECORD_H +#define REFTABLE_RECORD_H + +#include "reftable-basics.h" +#include <stdint.h> + +/* + * Basic data types + * + * Reftables store the state of each ref in struct reftable_ref_record, and they + * store a sequence of reflog updates in struct reftable_log_record. + */ + +/* reftable_ref_record holds a ref database entry target_value */ +struct reftable_ref_record { + char *refname; /* Name of the ref, malloced. */ + size_t refname_cap; + uint64_t update_index; /* Logical timestamp at which this value is + * written */ + + enum { + /* tombstone to hide deletions from earlier tables */ + REFTABLE_REF_DELETION = 0x0, + + /* a simple ref */ + REFTABLE_REF_VAL1 = 0x1, + /* a tag, plus its peeled hash */ + REFTABLE_REF_VAL2 = 0x2, + + /* a symbolic reference */ + REFTABLE_REF_SYMREF = 0x3, +#define REFTABLE_NR_REF_VALUETYPES 4 + } value_type; + union { + unsigned char val1[REFTABLE_HASH_SIZE_MAX]; + struct { + unsigned char value[REFTABLE_HASH_SIZE_MAX]; /* first hash */ + unsigned char target_value[REFTABLE_HASH_SIZE_MAX]; /* second hash */ + } val2; + char *symref; /* referent, malloced 0-terminated string */ + } value; +}; + +/* Returns the first hash, or NULL if `rec` is not of type + * REFTABLE_REF_VAL1 or REFTABLE_REF_VAL2. */ +const unsigned char *reftable_ref_record_val1(const struct reftable_ref_record *rec); + +/* Returns the second hash, or NULL if `rec` is not of type + * REFTABLE_REF_VAL2. */ +const unsigned char *reftable_ref_record_val2(const struct reftable_ref_record *rec); + +/* returns whether 'ref' represents a deletion */ +int reftable_ref_record_is_deletion(const struct reftable_ref_record *ref); + +/* frees and nulls all pointer values inside `ref`. */ +void reftable_ref_record_release(struct reftable_ref_record *ref); + +/* returns whether two reftable_ref_records are the same. Useful for testing. */ +int reftable_ref_record_equal(const struct reftable_ref_record *a, + const struct reftable_ref_record *b, uint32_t hash_size); + +/* reftable_log_record holds a reflog entry */ +struct reftable_log_record { + char *refname; + size_t refname_cap; + uint64_t update_index; /* logical timestamp of a transactional update. + */ + + enum { + /* tombstone to hide deletions from earlier tables */ + REFTABLE_LOG_DELETION = 0x0, + + /* a simple update */ + REFTABLE_LOG_UPDATE = 0x1, +#define REFTABLE_NR_LOG_VALUETYPES 2 + } value_type; + + union { + struct { + unsigned char new_hash[REFTABLE_HASH_SIZE_MAX]; + unsigned char old_hash[REFTABLE_HASH_SIZE_MAX]; + char *name; + char *email; + uint64_t time; + int16_t tz_offset; + char *message; + size_t message_cap; + } update; + } value; +}; + +/* returns whether 'ref' represents the deletion of a log record. */ +int reftable_log_record_is_deletion(const struct reftable_log_record *log); + +/* frees and nulls all pointer values. */ +void reftable_log_record_release(struct reftable_log_record *log); + +/* returns whether two records are equal. Useful for testing. */ +int reftable_log_record_equal(const struct reftable_log_record *a, + const struct reftable_log_record *b, uint32_t hash_size); + +#endif diff --git a/reftable/reftable-stack.h b/reftable/reftable-stack.h new file mode 100644 index 0000000000..910ec6ef3a --- /dev/null +++ b/reftable/reftable-stack.h @@ -0,0 +1,155 @@ +/* + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file or at + * https://developers.google.com/open-source/licenses/bsd + */ + +#ifndef REFTABLE_STACK_H +#define REFTABLE_STACK_H + +#include "reftable-writer.h" + +/* + * The stack presents an interface to a mutable sequence of reftables. + + * A stack can be mutated by pushing a table to the top of the stack. + + * The reftable_stack automatically compacts files on disk to ensure good + * amortized performance. + * + * For windows and other platforms that cannot have open files as rename + * destinations, concurrent access from multiple processes needs the rand() + * random seed to be randomized. + */ +struct reftable_stack; + +/* open a new reftable stack. The tables along with the table list will be + * stored in 'dir'. Typically, this should be .git/reftables. + */ +int reftable_new_stack(struct reftable_stack **dest, const char *dir, + const struct reftable_write_options *opts); + +/* returns the update_index at which a next table should be written. */ +uint64_t reftable_stack_next_update_index(struct reftable_stack *st); + +/* holds a transaction to add tables at the top of a stack. */ +struct reftable_addition; + +enum { + /* + * Reload the stack when the stack is out-of-date after locking it. + */ + REFTABLE_STACK_NEW_ADDITION_RELOAD = (1 << 0), +}; + +/* + * returns a new transaction to add reftables to the given stack. As a side + * effect, the ref database is locked. Accepts REFTABLE_STACK_NEW_ADDITION_* + * flags. + */ +int reftable_stack_new_addition(struct reftable_addition **dest, + struct reftable_stack *st, + unsigned int flags); + +/* Adds a reftable to transaction. */ +int reftable_addition_add(struct reftable_addition *add, + int (*write_table)(struct reftable_writer *wr, + void *arg), + void *arg); + +/* Commits the transaction, releasing the lock. After calling this, + * reftable_addition_destroy should still be called. + */ +int reftable_addition_commit(struct reftable_addition *add); + +/* Release all non-committed data from the transaction, and deallocate the + * transaction. Releases the lock if held. */ +void reftable_addition_destroy(struct reftable_addition *add); + +/* add a new table to the stack. The write_table function must call + * reftable_writer_set_limits, add refs and return an error value. */ +int reftable_stack_add(struct reftable_stack *st, + int (*write_table)(struct reftable_writer *wr, + void *write_arg), + void *write_arg); + +struct reftable_iterator; + +/* + * Initialize an iterator for the merged tables contained in the stack that can + * be used to iterate through refs. The iterator is valid until the next reload + * or write. + */ +int reftable_stack_init_ref_iterator(struct reftable_stack *st, + struct reftable_iterator *it); + +/* + * Initialize an iterator for the merged tables contained in the stack that can + * be used to iterate through logs. The iterator is valid until the next reload + * or write. + */ +int reftable_stack_init_log_iterator(struct reftable_stack *st, + struct reftable_iterator *it); + +/* returns the merged_table for seeking. This table is valid until the + * next write or reload, and should not be closed or deleted. + */ +struct reftable_merged_table * +reftable_stack_merged_table(struct reftable_stack *st); + +/* frees all resources associated with the stack. */ +void reftable_stack_destroy(struct reftable_stack *st); + +/* Reloads the stack if necessary. This is very cheap to run if the stack was up + * to date */ +int reftable_stack_reload(struct reftable_stack *st); + +/* Policy for expiring reflog entries. */ +struct reftable_log_expiry_config { + /* Drop entries older than this timestamp */ + uint64_t time; + + /* Drop older entries */ + uint64_t min_update_index; +}; + +/* compacts all reftables into a giant table. Expire reflog entries if config is + * non-NULL */ +int reftable_stack_compact_all(struct reftable_stack *st, + struct reftable_log_expiry_config *config); + +/* heuristically compact unbalanced table stack. */ +int reftable_stack_auto_compact(struct reftable_stack *st); + +/* delete stale .ref tables. */ +int reftable_stack_clean(struct reftable_stack *st); + +/* convenience function to read a single ref. Returns < 0 for error, 0 for + * success, and 1 if ref not found. */ +int reftable_stack_read_ref(struct reftable_stack *st, const char *refname, + struct reftable_ref_record *ref); + +/* convenience function to read a single log. Returns < 0 for error, 0 for + * success, and 1 if ref not found. */ +int reftable_stack_read_log(struct reftable_stack *st, const char *refname, + struct reftable_log_record *log); + +/* statistics on past compactions. */ +struct reftable_compaction_stats { + uint64_t bytes; /* total number of bytes written */ + uint64_t entries_written; /* total number of entries written, including + failures. */ + int attempts; /* how often we tried to compact */ + int failures; /* failures happen on concurrent updates */ +}; + +/* return statistics for compaction up till now. */ +struct reftable_compaction_stats * +reftable_stack_compaction_stats(struct reftable_stack *st); + +/* Return the hash of the stack. */ +enum reftable_hash reftable_stack_hash_id(struct reftable_stack *st); + +#endif diff --git a/reftable/reftable-table.h b/reftable/reftable-table.h new file mode 100644 index 0000000000..5f935d02e3 --- /dev/null +++ b/reftable/reftable-table.h @@ -0,0 +1,115 @@ +/* + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file or at + * https://developers.google.com/open-source/licenses/bsd + */ + +#ifndef REFTABLE_TABLE_H +#define REFTABLE_TABLE_H + +#include "reftable-iterator.h" +#include "reftable-block.h" +#include "reftable-blocksource.h" + +/* + * Reading single tables + * + * The follow routines are for reading single files. For an + * application-level interface, skip ahead to struct + * reftable_merged_table and struct reftable_stack. + */ + +/* Metadata for a block type. */ +struct reftable_table_offsets { + int is_present; + uint64_t offset; + uint64_t index_offset; +}; + +/* The table struct is a handle to an open reftable file. */ +struct reftable_table { + /* for convenience, associate a name with the instance. */ + char *name; + struct reftable_block_source source; + + /* Size of the file, excluding the footer. */ + uint64_t size; + + /* The hash function used for ref records. */ + enum reftable_hash hash_id; + + uint32_t block_size; + uint64_t min_update_index; + uint64_t max_update_index; + /* Length of the OID keys in the 'o' section */ + int object_id_len; + int version; + + struct reftable_table_offsets ref_offsets; + struct reftable_table_offsets obj_offsets; + struct reftable_table_offsets log_offsets; + + uint64_t refcount; +}; + +/* reftable_table_new opens a reftable for reading. If successful, + * returns 0 code and sets pp. The name is used for creating a + * stack. Typically, it is the basename of the file. The block source + * `src` is owned by the table, and is closed on calling + * reftable_table_destroy(). On error, the block source `src` is + * closed as well. + */ +int reftable_table_new(struct reftable_table **out, + struct reftable_block_source *src, const char *name); + +/* + * Manage the reference count of the reftable table. A newly initialized + * table starts with a refcount of 1 and will be deleted once the refcount has + * reached 0. + * + * This is required because tables may have longer lifetimes than the stack + * they belong to. The stack may for example be reloaded while the old tables + * are still being accessed by an iterator. + */ +void reftable_table_incref(struct reftable_table *table); +void reftable_table_decref(struct reftable_table *table); + +/* Initialize a reftable iterator for reading refs. */ +int reftable_table_init_ref_iterator(struct reftable_table *t, + struct reftable_iterator *it); + +/* Initialize a reftable iterator for reading logs. */ +int reftable_table_init_log_iterator(struct reftable_table *t, + struct reftable_iterator *it); + +/* returns the hash ID used in this table. */ +enum reftable_hash reftable_table_hash_id(struct reftable_table *t); + +/* return an iterator for the refs pointing to `oid`. */ +int reftable_table_refs_for(struct reftable_table *t, + struct reftable_iterator *it, uint8_t *oid); + +/* return the max_update_index for a table */ +uint64_t reftable_table_max_update_index(struct reftable_table *t); + +/* return the min_update_index for a table */ +uint64_t reftable_table_min_update_index(struct reftable_table *t); + +/* + * An iterator that iterates through the blocks contained in a given table. + */ +struct reftable_table_iterator { + void *iter_arg; +}; + +int reftable_table_iterator_init(struct reftable_table_iterator *it, + struct reftable_table *t); + +void reftable_table_iterator_release(struct reftable_table_iterator *it); + +int reftable_table_iterator_next(struct reftable_table_iterator *it, + const struct reftable_block **out); + +#endif diff --git a/reftable/reftable-writer.h b/reftable/reftable-writer.h new file mode 100644 index 0000000000..0fbeff17f4 --- /dev/null +++ b/reftable/reftable-writer.h @@ -0,0 +1,189 @@ +/* + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file or at + * https://developers.google.com/open-source/licenses/bsd + */ + +#ifndef REFTABLE_WRITER_H +#define REFTABLE_WRITER_H + +#include "reftable-record.h" + +#include <stdint.h> +#include <unistd.h> /* ssize_t */ + +/* Writing single reftables */ + +/* reftable_write_options sets options for writing a single reftable. */ +struct reftable_write_options { + /* boolean: do not pad out blocks to block size. */ + unsigned unpadded : 1; + + /* the blocksize. Should be less than 2^24. */ + uint32_t block_size; + + /* boolean: do not generate a SHA1 => ref index. */ + unsigned skip_index_objects : 1; + + /* how often to write complete keys in each block. */ + uint16_t restart_interval; + + /* 4-byte identifier ("sha1", "s256") of the hash. + * Defaults to SHA1 if unset + */ + enum reftable_hash hash_id; + + /* Default mode for creating files. If unset, use 0666 (+umask) */ + unsigned int default_permissions; + + /* boolean: copy log messages exactly. If unset, check that the message + * is a single line, and add '\n' if missing. + */ + unsigned exact_log_message : 1; + + /* boolean: Prevent auto-compaction of tables. */ + unsigned disable_auto_compact : 1; + + /* + * Geometric sequence factor used by auto-compaction to decide which + * tables to compact. Defaults to 2 if unset. + */ + uint8_t auto_compaction_factor; + + /* + * The number of milliseconds to wait when trying to lock "tables.list". + * Note that this does not apply to locking individual tables, as these + * should only ever be locked when already holding the "tables.list" + * lock. + * + * Passing 0 will fail immediately when the file is locked, passing a + * negative value will cause us to block indefinitely. + */ + long lock_timeout_ms; + + /* + * Optional callback used to fsync files to disk. Falls back to using + * fsync(3P) when unset. + */ + int (*fsync)(int fd); + + /* + * Callback function to execute whenever the stack is being reloaded. + * This can be used e.g. to discard cached information that relies on + * the old stack's data. The payload data will be passed as argument to + * the callback. + */ + void (*on_reload)(void *payload); + void *on_reload_payload; +}; + +/* reftable_block_stats holds statistics for a single block type */ +struct reftable_block_stats { + /* total number of entries written */ + int entries; + /* total number of key restarts */ + uint32_t restarts; + /* total number of blocks */ + int blocks; + /* total number of index blocks */ + int index_blocks; + /* depth of the index */ + int max_index_level; + + /* offset of the first block for this type */ + uint64_t offset; + /* offset of the top level index block for this type, or 0 if not + * present */ + uint64_t index_offset; +}; + +/* stats holds overall statistics for a single reftable */ +struct reftable_stats { + /* total number of blocks written. */ + int blocks; + /* stats for ref data */ + struct reftable_block_stats ref_stats; + /* stats for the SHA1 to ref map. */ + struct reftable_block_stats obj_stats; + /* stats for index blocks */ + struct reftable_block_stats idx_stats; + /* stats for log blocks */ + struct reftable_block_stats log_stats; + + /* disambiguation length of shortened object IDs. */ + int object_id_len; +}; + +struct reftable_writer; + +/* Create a new writer. */ +int reftable_writer_new(struct reftable_writer **out, + ssize_t (*writer_func)(void *, const void *, size_t), + int (*flush_func)(void *), + void *writer_arg, const struct reftable_write_options *opts); + +/* + * Set the range of update indices for the records we will add. When writing a + * table into a stack, the min should be at least + * reftable_stack_next_update_index(), or REFTABLE_API_ERROR is returned. + * + * For transactional updates to a stack, typically min==max, and the + * update_index can be obtained by inspeciting the stack. When converting an + * existing ref database into a single reftable, this would be a range of + * update-index timestamps. + * + * The function should be called before adding any records to the writer. If not + * it will fail with REFTABLE_API_ERROR. + */ +int reftable_writer_set_limits(struct reftable_writer *w, uint64_t min, + uint64_t max); + +/* + Add a reftable_ref_record. The record should have names that come after + already added records. + + The update_index must be within the limits set by + reftable_writer_set_limits(), or REFTABLE_API_ERROR is returned. It is an + REFTABLE_API_ERROR error to write a ref record after a log record. +*/ +int reftable_writer_add_ref(struct reftable_writer *w, + struct reftable_ref_record *ref); + +/* + Convenience function to add multiple reftable_ref_records; the function sorts + the records before adding them, reordering the records array passed in. +*/ +int reftable_writer_add_refs(struct reftable_writer *w, + struct reftable_ref_record *refs, int n); + +/* + adds reftable_log_records. Log records are keyed by (refname, decreasing + update_index). The key for the record added must come after the already added + log records. +*/ +int reftable_writer_add_log(struct reftable_writer *w, + struct reftable_log_record *log); + +/* + Convenience function to add multiple reftable_log_records; the function sorts + the records before adding them, reordering records array passed in. +*/ +int reftable_writer_add_logs(struct reftable_writer *w, + struct reftable_log_record *logs, int n); + +/* reftable_writer_close finalizes the reftable. The writer is retained so + * statistics can be inspected. */ +int reftable_writer_close(struct reftable_writer *w); + +/* writer_stats returns the statistics on the reftable being written. + + This struct becomes invalid when the writer is freed. + */ +const struct reftable_stats *reftable_writer_stats(struct reftable_writer *w); + +/* reftable_writer_free deallocates memory for the writer */ +void reftable_writer_free(struct reftable_writer *w); + +#endif diff --git a/reftable/stack.c b/reftable/stack.c new file mode 100644 index 0000000000..4caf96aa1d --- /dev/null +++ b/reftable/stack.c @@ -0,0 +1,1841 @@ +/* + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file or at + * https://developers.google.com/open-source/licenses/bsd + */ + +#include "stack.h" + +#include "system.h" +#include "constants.h" +#include "merged.h" +#include "reftable-error.h" +#include "reftable-record.h" +#include "reftable-merged.h" +#include "table.h" +#include "writer.h" + +static int stack_try_add(struct reftable_stack *st, + int (*write_table)(struct reftable_writer *wr, + void *arg), + void *arg); +static int stack_write_compact(struct reftable_stack *st, + struct reftable_writer *wr, + size_t first, size_t last, + struct reftable_log_expiry_config *config); +static void reftable_addition_close(struct reftable_addition *add); +static int reftable_stack_reload_maybe_reuse(struct reftable_stack *st, + int reuse_open); + +static int stack_filename(struct reftable_buf *dest, struct reftable_stack *st, + const char *name) +{ + int err; + reftable_buf_reset(dest); + if ((err = reftable_buf_addstr(dest, st->reftable_dir)) < 0 || + (err = reftable_buf_addstr(dest, "/")) < 0 || + (err = reftable_buf_addstr(dest, name)) < 0) + return err; + return 0; +} + +static int stack_fsync(const struct reftable_write_options *opts, int fd) +{ + if (opts->fsync) + return opts->fsync(fd); + return fsync(fd); +} + +static ssize_t reftable_write_data(int fd, const void *data, size_t size) +{ + size_t total_written = 0; + const char *p = data; + + while (total_written < size) { + ssize_t bytes_written = write(fd, p, size - total_written); + if (bytes_written < 0 && (errno == EAGAIN || errno == EINTR)) + continue; + if (bytes_written < 0) + return REFTABLE_IO_ERROR; + + total_written += bytes_written; + p += bytes_written; + } + + return total_written; +} + +struct fd_writer { + const struct reftable_write_options *opts; + int fd; +}; + +static ssize_t fd_writer_write(void *arg, const void *data, size_t sz) +{ + struct fd_writer *writer = arg; + return reftable_write_data(writer->fd, data, sz); +} + +static int fd_writer_flush(void *arg) +{ + struct fd_writer *writer = arg; + return stack_fsync(writer->opts, writer->fd); +} + +int reftable_new_stack(struct reftable_stack **dest, const char *dir, + const struct reftable_write_options *_opts) +{ + struct reftable_buf list_file_name = REFTABLE_BUF_INIT; + struct reftable_write_options opts = { 0 }; + struct reftable_stack *p; + int err; + + p = reftable_calloc(1, sizeof(*p)); + if (!p) { + err = REFTABLE_OUT_OF_MEMORY_ERROR; + goto out; + } + + if (_opts) + opts = *_opts; + if (opts.hash_id == 0) + opts.hash_id = REFTABLE_HASH_SHA1; + + *dest = NULL; + + reftable_buf_reset(&list_file_name); + if ((err = reftable_buf_addstr(&list_file_name, dir)) < 0 || + (err = reftable_buf_addstr(&list_file_name, "/tables.list")) < 0) + goto out; + + p->list_file = reftable_buf_detach(&list_file_name); + p->list_fd = -1; + p->opts = opts; + p->reftable_dir = reftable_strdup(dir); + if (!p->reftable_dir) { + err = REFTABLE_OUT_OF_MEMORY_ERROR; + goto out; + } + + err = reftable_stack_reload_maybe_reuse(p, 1); + if (err < 0) + goto out; + + *dest = p; + err = 0; + +out: + if (err < 0) + reftable_stack_destroy(p); + return err; +} + +static int fd_read_lines(int fd, char ***namesp) +{ + char *buf = NULL; + int err = 0; + off_t size; + + size = lseek(fd, 0, SEEK_END); + if (size < 0) { + err = REFTABLE_IO_ERROR; + goto done; + } + + err = lseek(fd, 0, SEEK_SET); + if (err < 0) { + err = REFTABLE_IO_ERROR; + goto done; + } + + REFTABLE_ALLOC_ARRAY(buf, size + 1); + if (!buf) { + err = REFTABLE_OUT_OF_MEMORY_ERROR; + goto done; + } + + for (off_t total_read = 0; total_read < size; ) { + ssize_t bytes_read = read(fd, buf + total_read, size - total_read); + if (bytes_read < 0 && (errno == EAGAIN || errno == EINTR)) + continue; + if (bytes_read < 0 || !bytes_read) { + err = REFTABLE_IO_ERROR; + goto done; + } + + total_read += bytes_read; + } + buf[size] = 0; + + *namesp = parse_names(buf, size); + if (!*namesp) { + err = REFTABLE_OUT_OF_MEMORY_ERROR; + goto done; + } + +done: + reftable_free(buf); + return err; +} + +int read_lines(const char *filename, char ***namesp) +{ + int fd = open(filename, O_RDONLY); + int err = 0; + if (fd < 0) { + if (errno == ENOENT) { + REFTABLE_CALLOC_ARRAY(*namesp, 1); + if (!*namesp) + return REFTABLE_OUT_OF_MEMORY_ERROR; + return 0; + } + + return REFTABLE_IO_ERROR; + } + err = fd_read_lines(fd, namesp); + close(fd); + return err; +} + +int reftable_stack_init_ref_iterator(struct reftable_stack *st, + struct reftable_iterator *it) +{ + return merged_table_init_iter(reftable_stack_merged_table(st), + it, REFTABLE_BLOCK_TYPE_REF); +} + +int reftable_stack_init_log_iterator(struct reftable_stack *st, + struct reftable_iterator *it) +{ + return merged_table_init_iter(reftable_stack_merged_table(st), + it, REFTABLE_BLOCK_TYPE_LOG); +} + +struct reftable_merged_table * +reftable_stack_merged_table(struct reftable_stack *st) +{ + return st->merged; +} + +static int has_name(char **names, const char *name) +{ + while (*names) { + if (!strcmp(*names, name)) + return 1; + names++; + } + return 0; +} + +/* Close and free the stack */ +void reftable_stack_destroy(struct reftable_stack *st) +{ + char **names = NULL; + int err = 0; + + if (!st) + return; + + if (st->merged) { + reftable_merged_table_free(st->merged); + st->merged = NULL; + } + + err = read_lines(st->list_file, &names); + if (err < 0) { + REFTABLE_FREE_AND_NULL(names); + } + + if (st->tables) { + struct reftable_buf filename = REFTABLE_BUF_INIT; + + for (size_t i = 0; i < st->tables_len; i++) { + const char *name = reftable_table_name(st->tables[i]); + int try_unlinking = 1; + + reftable_buf_reset(&filename); + if (names && !has_name(names, name)) { + if (stack_filename(&filename, st, name) < 0) + try_unlinking = 0; + } + reftable_table_decref(st->tables[i]); + + if (try_unlinking && filename.len) { + /* On Windows, can only unlink after closing. */ + unlink(filename.buf); + } + } + + reftable_buf_release(&filename); + st->tables_len = 0; + REFTABLE_FREE_AND_NULL(st->tables); + } + + if (st->list_fd >= 0) { + close(st->list_fd); + st->list_fd = -1; + } + + REFTABLE_FREE_AND_NULL(st->list_file); + REFTABLE_FREE_AND_NULL(st->reftable_dir); + reftable_free(st); + free_names(names); +} + +static struct reftable_table **stack_copy_tables(struct reftable_stack *st, + size_t cur_len) +{ + struct reftable_table **cur = reftable_calloc(cur_len, sizeof(*cur)); + if (!cur) + return NULL; + for (size_t i = 0; i < cur_len; i++) + cur[i] = st->tables[i]; + return cur; +} + +static int reftable_stack_reload_once(struct reftable_stack *st, + const char **names, + int reuse_open) +{ + size_t cur_len = !st->merged ? 0 : st->merged->tables_len; + struct reftable_table **cur = NULL; + struct reftable_table **reused = NULL; + struct reftable_table **new_tables = NULL; + size_t reused_len = 0, reused_alloc = 0, names_len; + size_t new_tables_len = 0; + struct reftable_merged_table *new_merged = NULL; + struct reftable_buf table_path = REFTABLE_BUF_INIT; + int err = 0; + size_t i; + + if (cur_len) { + cur = stack_copy_tables(st, cur_len); + if (!cur) { + err = REFTABLE_OUT_OF_MEMORY_ERROR; + goto done; + } + } + + names_len = names_length(names); + + if (names_len) { + new_tables = reftable_calloc(names_len, sizeof(*new_tables)); + if (!new_tables) { + err = REFTABLE_OUT_OF_MEMORY_ERROR; + goto done; + } + } + + while (*names) { + struct reftable_table *table = NULL; + const char *name = *names++; + + /* this is linear; we assume compaction keeps the number of + tables under control so this is not quadratic. */ + for (i = 0; reuse_open && i < cur_len; i++) { + if (cur[i] && 0 == strcmp(cur[i]->name, name)) { + table = cur[i]; + cur[i] = NULL; + + /* + * When reloading the stack fails, we end up + * releasing all new tables. This also + * includes the reused tables, even though + * they are still in used by the old stack. We + * thus need to keep them alive here, which we + * do by bumping their refcount. + */ + REFTABLE_ALLOC_GROW_OR_NULL(reused, + reused_len + 1, + reused_alloc); + if (!reused) { + err = REFTABLE_OUT_OF_MEMORY_ERROR; + goto done; + } + reused[reused_len++] = table; + reftable_table_incref(table); + break; + } + } + + if (!table) { + struct reftable_block_source src = { NULL }; + + err = stack_filename(&table_path, st, name); + if (err < 0) + goto done; + + err = reftable_block_source_from_file(&src, + table_path.buf); + if (err < 0) + goto done; + + err = reftable_table_new(&table, &src, name); + if (err < 0) + goto done; + } + + new_tables[new_tables_len] = table; + new_tables_len++; + } + + /* success! */ + err = reftable_merged_table_new(&new_merged, new_tables, + new_tables_len, st->opts.hash_id); + if (err < 0) + goto done; + + /* + * Close the old, non-reused tables and proactively try to unlink + * them. This is done for systems like Windows, where the underlying + * file of such an open table wouldn't have been possible to be + * unlinked by the compacting process. + */ + for (i = 0; i < cur_len; i++) { + if (cur[i]) { + const char *name = reftable_table_name(cur[i]); + + err = stack_filename(&table_path, st, name); + if (err < 0) + goto done; + + reftable_table_decref(cur[i]); + unlink(table_path.buf); + } + } + + /* Update the stack to point to the new tables. */ + if (st->merged) + reftable_merged_table_free(st->merged); + new_merged->suppress_deletions = 1; + st->merged = new_merged; + + if (st->tables) + reftable_free(st->tables); + st->tables = new_tables; + st->tables_len = new_tables_len; + new_tables = NULL; + new_tables_len = 0; + + /* + * Decrement the refcount of reused tables again. This only needs to + * happen on the successful case, because on the unsuccessful one we + * decrement their refcount via `new_tables`. + */ + for (i = 0; i < reused_len; i++) + reftable_table_decref(reused[i]); + +done: + for (i = 0; i < new_tables_len; i++) + reftable_table_decref(new_tables[i]); + reftable_free(new_tables); + reftable_free(reused); + reftable_free(cur); + reftable_buf_release(&table_path); + return err; +} + +/* return negative if a before b. */ +static int tv_cmp(struct timeval *a, struct timeval *b) +{ + time_t diff = a->tv_sec - b->tv_sec; + int udiff = a->tv_usec - b->tv_usec; + + if (diff != 0) + return diff; + + return udiff; +} + +static int reftable_stack_reload_maybe_reuse(struct reftable_stack *st, + int reuse_open) +{ + char **names = NULL, **names_after = NULL; + struct timeval deadline; + int64_t delay = 0; + int tries = 0, err; + int fd = -1; + + err = gettimeofday(&deadline, NULL); + if (err < 0) + goto out; + deadline.tv_sec += 3; + + while (1) { + struct timeval now; + + err = gettimeofday(&now, NULL); + if (err < 0) + goto out; + + /* + * Only look at deadlines after the first few times. This + * simplifies debugging in GDB. + */ + tries++; + if (tries > 3 && tv_cmp(&now, &deadline) >= 0) + goto out; + + fd = open(st->list_file, O_RDONLY); + if (fd < 0) { + if (errno != ENOENT) { + err = REFTABLE_IO_ERROR; + goto out; + } + + REFTABLE_CALLOC_ARRAY(names, 1); + if (!names) { + err = REFTABLE_OUT_OF_MEMORY_ERROR; + goto out; + } + } else { + err = fd_read_lines(fd, &names); + if (err < 0) + goto out; + } + + err = reftable_stack_reload_once(st, (const char **) names, reuse_open); + if (!err) + break; + if (err != REFTABLE_NOT_EXIST_ERROR) + goto out; + + /* + * REFTABLE_NOT_EXIST_ERROR can be caused by a concurrent + * writer. Check if there was one by checking if the name list + * changed. + */ + err = read_lines(st->list_file, &names_after); + if (err < 0) + goto out; + if (names_equal((const char **) names_after, + (const char **) names)) { + err = REFTABLE_NOT_EXIST_ERROR; + goto out; + } + + free_names(names); + names = NULL; + free_names(names_after); + names_after = NULL; + close(fd); + fd = -1; + + delay = delay + (delay * reftable_rand()) / UINT32_MAX + 1; + poll(NULL, 0, delay); + } + +out: + /* + * Invalidate the stat cache. It is sufficient to only close the file + * descriptor and keep the cached stat info because we never use the + * latter when the former is negative. + */ + if (st->list_fd >= 0) { + close(st->list_fd); + st->list_fd = -1; + } + + /* + * Cache stat information in case it provides a useful signal to us. + * According to POSIX, "The st_ino and st_dev fields taken together + * uniquely identify the file within the system." That being said, + * Windows is not POSIX compliant and we do not have these fields + * available. So the information we have there is insufficient to + * determine whether two file descriptors point to the same file. + * + * While we could fall back to using other signals like the file's + * mtime, those are not sufficient to avoid races. We thus refrain from + * using the stat cache on such systems and fall back to the secondary + * caching mechanism, which is to check whether contents of the file + * have changed. + * + * On other systems which are POSIX compliant we must keep the file + * descriptor open. This is to avoid a race condition where two + * processes access the reftable stack at the same point in time: + * + * 1. A reads the reftable stack and caches its stat info. + * + * 2. B updates the stack, appending a new table to "tables.list". + * This will both use a new inode and result in a different file + * size, thus invalidating A's cache in theory. + * + * 3. B decides to auto-compact the stack and merges two tables. The + * file size now matches what A has cached again. Furthermore, the + * filesystem may decide to recycle the inode number of the file + * we have replaced in (2) because it is not in use anymore. + * + * 4. A reloads the reftable stack. Neither the inode number nor the + * file size changed. If the timestamps did not change either then + * we think the cached copy of our stack is up-to-date. + * + * By keeping the file descriptor open the inode number cannot be + * recycled, mitigating the race. + */ + if (!err && fd >= 0 && !fstat(fd, &st->list_st) && + st->list_st.st_dev && st->list_st.st_ino) { + st->list_fd = fd; + fd = -1; + } + + if (fd >= 0) + close(fd); + free_names(names); + free_names(names_after); + + if (st->opts.on_reload) + st->opts.on_reload(st->opts.on_reload_payload); + + return err; +} + +/* -1 = error + 0 = up to date + 1 = changed. */ +static int stack_uptodate(struct reftable_stack *st) +{ + char **names = NULL; + int err; + + /* + * When we have cached stat information available then we use it to + * verify whether the file has been rewritten. + * + * Note that we explicitly do not want to use `stat_validity_check()` + * and friends here because they may end up not comparing the `st_dev` + * and `st_ino` fields. These functions thus cannot guarantee that we + * indeed still have the same file. + */ + if (st->list_fd >= 0) { + struct stat list_st; + + if (stat(st->list_file, &list_st) < 0) { + /* + * It's fine for "tables.list" to not exist. In that + * case, we have to refresh when the loaded stack has + * any tables. + */ + if (errno == ENOENT) + return !!st->tables_len; + return REFTABLE_IO_ERROR; + } + + /* + * When "tables.list" refers to the same file we can assume + * that it didn't change. This is because we always use + * rename(3P) to update the file and never write to it + * directly. + */ + if (st->list_st.st_dev == list_st.st_dev && + st->list_st.st_ino == list_st.st_ino) + return 0; + } + + err = read_lines(st->list_file, &names); + if (err < 0) + return err; + + for (size_t i = 0; i < st->tables_len; i++) { + if (!names[i]) { + err = 1; + goto done; + } + + if (strcmp(st->tables[i]->name, names[i])) { + err = 1; + goto done; + } + } + + if (names[st->merged->tables_len]) { + err = 1; + goto done; + } + +done: + free_names(names); + return err; +} + +int reftable_stack_reload(struct reftable_stack *st) +{ + int err = stack_uptodate(st); + if (err > 0) + return reftable_stack_reload_maybe_reuse(st, 1); + return err; +} + +int reftable_stack_add(struct reftable_stack *st, + int (*write)(struct reftable_writer *wr, void *arg), + void *arg) +{ + int err = stack_try_add(st, write, arg); + if (err < 0) { + if (err == REFTABLE_OUTDATED_ERROR) { + /* Ignore error return, we want to propagate + REFTABLE_OUTDATED_ERROR. + */ + reftable_stack_reload(st); + } + return err; + } + + return 0; +} + +static int format_name(struct reftable_buf *dest, uint64_t min, uint64_t max) +{ + char buf[100]; + uint32_t rnd = reftable_rand(); + snprintf(buf, sizeof(buf), "0x%012" PRIx64 "-0x%012" PRIx64 "-%08x", + min, max, rnd); + reftable_buf_reset(dest); + return reftable_buf_addstr(dest, buf); +} + +struct reftable_addition { + struct reftable_flock tables_list_lock; + struct reftable_stack *stack; + + char **new_tables; + size_t new_tables_len, new_tables_cap; + uint64_t next_update_index; +}; + +#define REFTABLE_ADDITION_INIT {0} + +static int reftable_stack_init_addition(struct reftable_addition *add, + struct reftable_stack *st, + unsigned int flags) +{ + struct reftable_buf lock_file_name = REFTABLE_BUF_INIT; + int err; + + add->stack = st; + + err = flock_acquire(&add->tables_list_lock, st->list_file, + st->opts.lock_timeout_ms); + if (err < 0) { + if (errno == EEXIST) { + err = REFTABLE_LOCK_ERROR; + } else { + err = REFTABLE_IO_ERROR; + } + goto done; + } + if (st->opts.default_permissions) { + if (chmod(add->tables_list_lock.path, + st->opts.default_permissions) < 0) { + err = REFTABLE_IO_ERROR; + goto done; + } + } + + err = stack_uptodate(st); + if (err < 0) + goto done; + if (err > 0 && flags & REFTABLE_STACK_NEW_ADDITION_RELOAD) { + err = reftable_stack_reload_maybe_reuse(add->stack, 1); + if (err) + goto done; + } + if (err > 0) { + err = REFTABLE_OUTDATED_ERROR; + goto done; + } + + add->next_update_index = reftable_stack_next_update_index(st); +done: + if (err) + reftable_addition_close(add); + reftable_buf_release(&lock_file_name); + return err; +} + +static void reftable_addition_close(struct reftable_addition *add) +{ + struct reftable_buf nm = REFTABLE_BUF_INIT; + size_t i; + + for (i = 0; i < add->new_tables_len; i++) { + if (!stack_filename(&nm, add->stack, add->new_tables[i])) + unlink(nm.buf); + reftable_free(add->new_tables[i]); + add->new_tables[i] = NULL; + } + reftable_free(add->new_tables); + add->new_tables = NULL; + add->new_tables_len = 0; + add->new_tables_cap = 0; + + flock_release(&add->tables_list_lock); + reftable_buf_release(&nm); +} + +void reftable_addition_destroy(struct reftable_addition *add) +{ + if (!add) { + return; + } + reftable_addition_close(add); + reftable_free(add); +} + +int reftable_addition_commit(struct reftable_addition *add) +{ + struct reftable_buf table_list = REFTABLE_BUF_INIT; + int err = 0; + size_t i; + + if (add->new_tables_len == 0) + goto done; + + for (i = 0; i < add->stack->merged->tables_len; i++) { + if ((err = reftable_buf_addstr(&table_list, add->stack->tables[i]->name)) < 0 || + (err = reftable_buf_addstr(&table_list, "\n")) < 0) + goto done; + } + for (i = 0; i < add->new_tables_len; i++) { + if ((err = reftable_buf_addstr(&table_list, add->new_tables[i])) < 0 || + (err = reftable_buf_addstr(&table_list, "\n")) < 0) + goto done; + } + + err = reftable_write_data(add->tables_list_lock.fd, + table_list.buf, table_list.len); + reftable_buf_release(&table_list); + if (err < 0) { + err = REFTABLE_IO_ERROR; + goto done; + } + + err = stack_fsync(&add->stack->opts, add->tables_list_lock.fd); + if (err < 0) { + err = REFTABLE_IO_ERROR; + goto done; + } + + err = flock_commit(&add->tables_list_lock); + if (err < 0) { + err = REFTABLE_IO_ERROR; + goto done; + } + + /* success, no more state to clean up. */ + for (i = 0; i < add->new_tables_len; i++) + reftable_free(add->new_tables[i]); + reftable_free(add->new_tables); + add->new_tables = NULL; + add->new_tables_len = 0; + add->new_tables_cap = 0; + + err = reftable_stack_reload_maybe_reuse(add->stack, 1); + if (err) + goto done; + + if (!add->stack->opts.disable_auto_compact) { + /* + * Auto-compact the stack to keep the number of tables in + * control. It is possible that a concurrent writer is already + * trying to compact parts of the stack, which would lead to a + * `REFTABLE_LOCK_ERROR` because parts of the stack are locked + * already. This is a benign error though, so we ignore it. + */ + err = reftable_stack_auto_compact(add->stack); + if (err < 0 && err != REFTABLE_LOCK_ERROR) + goto done; + err = 0; + } + +done: + reftable_addition_close(add); + return err; +} + +int reftable_stack_new_addition(struct reftable_addition **dest, + struct reftable_stack *st, + unsigned int flags) +{ + int err = 0; + struct reftable_addition empty = REFTABLE_ADDITION_INIT; + + REFTABLE_CALLOC_ARRAY(*dest, 1); + if (!*dest) + return REFTABLE_OUT_OF_MEMORY_ERROR; + + **dest = empty; + err = reftable_stack_init_addition(*dest, st, flags); + if (err) { + reftable_free(*dest); + *dest = NULL; + } + return err; +} + +static int stack_try_add(struct reftable_stack *st, + int (*write_table)(struct reftable_writer *wr, + void *arg), + void *arg) +{ + struct reftable_addition add = REFTABLE_ADDITION_INIT; + int err = reftable_stack_init_addition(&add, st, 0); + if (err < 0) + goto done; + + err = reftable_addition_add(&add, write_table, arg); + if (err < 0) + goto done; + + err = reftable_addition_commit(&add); +done: + reftable_addition_close(&add); + return err; +} + +int reftable_addition_add(struct reftable_addition *add, + int (*write_table)(struct reftable_writer *wr, + void *arg), + void *arg) +{ + struct reftable_buf temp_tab_file_name = REFTABLE_BUF_INIT; + struct reftable_buf tab_file_name = REFTABLE_BUF_INIT; + struct reftable_buf next_name = REFTABLE_BUF_INIT; + struct reftable_writer *wr = NULL; + struct reftable_tmpfile tab_file = REFTABLE_TMPFILE_INIT; + struct fd_writer writer = { + .opts = &add->stack->opts, + }; + int err = 0; + + reftable_buf_reset(&next_name); + + err = format_name(&next_name, add->next_update_index, add->next_update_index); + if (err < 0) + goto done; + + err = stack_filename(&temp_tab_file_name, add->stack, next_name.buf); + if (err < 0) + goto done; + + err = reftable_buf_addstr(&temp_tab_file_name, ".temp.XXXXXX"); + if (err < 0) + goto done; + + err = tmpfile_from_pattern(&tab_file, temp_tab_file_name.buf); + if (err < 0) + goto done; + if (add->stack->opts.default_permissions) { + if (chmod(tab_file.path, + add->stack->opts.default_permissions)) { + err = REFTABLE_IO_ERROR; + goto done; + } + } + + writer.fd = tab_file.fd; + err = reftable_writer_new(&wr, fd_writer_write, fd_writer_flush, + &writer, &add->stack->opts); + if (err < 0) + goto done; + + err = write_table(wr, arg); + if (err < 0) + goto done; + + err = reftable_writer_close(wr); + if (err == REFTABLE_EMPTY_TABLE_ERROR) { + err = 0; + goto done; + } + if (err < 0) + goto done; + + err = tmpfile_close(&tab_file); + if (err < 0) + goto done; + + if (wr->min_update_index < add->next_update_index) { + err = REFTABLE_API_ERROR; + goto done; + } + + err = format_name(&next_name, wr->min_update_index, wr->max_update_index); + if (err < 0) + goto done; + + err = reftable_buf_addstr(&next_name, ".ref"); + if (err < 0) + goto done; + + err = stack_filename(&tab_file_name, add->stack, next_name.buf); + if (err < 0) + goto done; + + /* + On windows, this relies on rand() picking a unique destination name. + Maybe we should do retry loop as well? + */ + err = tmpfile_rename(&tab_file, tab_file_name.buf); + if (err < 0) + goto done; + + REFTABLE_ALLOC_GROW_OR_NULL(add->new_tables, add->new_tables_len + 1, + add->new_tables_cap); + if (!add->new_tables) { + err = REFTABLE_OUT_OF_MEMORY_ERROR; + goto done; + } + add->new_tables[add->new_tables_len++] = reftable_buf_detach(&next_name); + +done: + tmpfile_delete(&tab_file); + reftable_buf_release(&temp_tab_file_name); + reftable_buf_release(&tab_file_name); + reftable_buf_release(&next_name); + reftable_writer_free(wr); + return err; +} + +uint64_t reftable_stack_next_update_index(struct reftable_stack *st) +{ + int sz = st->merged->tables_len; + if (sz > 0) + return reftable_table_max_update_index(st->tables[sz - 1]) + + 1; + return 1; +} + +static int stack_compact_locked(struct reftable_stack *st, + size_t first, size_t last, + struct reftable_log_expiry_config *config, + struct reftable_tmpfile *tab_file_out) +{ + struct reftable_buf next_name = REFTABLE_BUF_INIT; + struct reftable_buf tab_file_path = REFTABLE_BUF_INIT; + struct reftable_writer *wr = NULL; + struct fd_writer writer= { + .opts = &st->opts, + }; + struct reftable_tmpfile tab_file = REFTABLE_TMPFILE_INIT; + int err = 0; + + err = format_name(&next_name, reftable_table_min_update_index(st->tables[first]), + reftable_table_max_update_index(st->tables[last])); + if (err < 0) + goto done; + + err = stack_filename(&tab_file_path, st, next_name.buf); + if (err < 0) + goto done; + + err = reftable_buf_addstr(&tab_file_path, ".temp.XXXXXX"); + if (err < 0) + goto done; + + err = tmpfile_from_pattern(&tab_file, tab_file_path.buf); + if (err < 0) + goto done; + + if (st->opts.default_permissions && + chmod(tab_file.path, st->opts.default_permissions) < 0) { + err = REFTABLE_IO_ERROR; + goto done; + } + + writer.fd = tab_file.fd; + err = reftable_writer_new(&wr, fd_writer_write, fd_writer_flush, + &writer, &st->opts); + if (err < 0) + goto done; + + err = stack_write_compact(st, wr, first, last, config); + if (err < 0) + goto done; + + err = reftable_writer_close(wr); + if (err < 0) + goto done; + + err = tmpfile_close(&tab_file); + if (err < 0) + goto done; + + *tab_file_out = tab_file; + tab_file = REFTABLE_TMPFILE_INIT; + +done: + tmpfile_delete(&tab_file); + reftable_writer_free(wr); + reftable_buf_release(&next_name); + reftable_buf_release(&tab_file_path); + return err; +} + +static int stack_write_compact(struct reftable_stack *st, + struct reftable_writer *wr, + size_t first, size_t last, + struct reftable_log_expiry_config *config) +{ + struct reftable_merged_table *mt = NULL; + struct reftable_iterator it = { NULL }; + struct reftable_ref_record ref = { NULL }; + struct reftable_log_record log = { NULL }; + size_t subtabs_len = last - first + 1; + uint64_t entries = 0; + int err = 0; + + for (size_t i = first; i <= last; i++) + st->stats.bytes += st->tables[i]->size; + err = reftable_writer_set_limits(wr, st->tables[first]->min_update_index, + st->tables[last]->max_update_index); + if (err < 0) + goto done; + + err = reftable_merged_table_new(&mt, st->tables + first, subtabs_len, + st->opts.hash_id); + if (err < 0) + goto done; + + err = merged_table_init_iter(mt, &it, REFTABLE_BLOCK_TYPE_REF); + if (err < 0) + goto done; + + err = reftable_iterator_seek_ref(&it, ""); + if (err < 0) + goto done; + + while (1) { + err = reftable_iterator_next_ref(&it, &ref); + if (err > 0) { + err = 0; + break; + } + if (err < 0) + goto done; + + if (first == 0 && reftable_ref_record_is_deletion(&ref)) { + continue; + } + + err = reftable_writer_add_ref(wr, &ref); + if (err < 0) + goto done; + entries++; + } + reftable_iterator_destroy(&it); + + err = merged_table_init_iter(mt, &it, REFTABLE_BLOCK_TYPE_LOG); + if (err < 0) + goto done; + + err = reftable_iterator_seek_log(&it, ""); + if (err < 0) + goto done; + + while (1) { + err = reftable_iterator_next_log(&it, &log); + if (err > 0) { + err = 0; + break; + } + if (err < 0) + goto done; + if (first == 0 && reftable_log_record_is_deletion(&log)) { + continue; + } + + if (config && config->min_update_index > 0 && + log.update_index < config->min_update_index) { + continue; + } + + if (config && config->time > 0 && + log.value.update.time < config->time) { + continue; + } + + err = reftable_writer_add_log(wr, &log); + if (err < 0) + goto done; + entries++; + } + +done: + reftable_iterator_destroy(&it); + if (mt) + reftable_merged_table_free(mt); + reftable_ref_record_release(&ref); + reftable_log_record_release(&log); + st->stats.entries_written += entries; + return err; +} + +enum stack_compact_range_flags { + /* + * Perform a best-effort compaction. That is, even if we cannot lock + * all tables in the specified range, we will try to compact the + * remaining slice. + */ + STACK_COMPACT_RANGE_BEST_EFFORT = (1 << 0), +}; + +/* + * Compact all tables in the range `[first, last)` into a single new table. + * + * This function returns `0` on success or a code `< 0` on failure. When the + * stack or any of the tables in the specified range are already locked then + * this function returns `REFTABLE_LOCK_ERROR`. This is a benign error that + * callers can either ignore, or they may choose to retry compaction after some + * amount of time. + */ +static int stack_compact_range(struct reftable_stack *st, + size_t first, size_t last, + struct reftable_log_expiry_config *expiry, + unsigned int flags) +{ + struct reftable_buf tables_list_buf = REFTABLE_BUF_INIT; + struct reftable_buf new_table_name = REFTABLE_BUF_INIT; + struct reftable_buf new_table_path = REFTABLE_BUF_INIT; + struct reftable_buf table_name = REFTABLE_BUF_INIT; + struct reftable_flock tables_list_lock = REFTABLE_FLOCK_INIT; + struct reftable_flock *table_locks = NULL; + struct reftable_tmpfile new_table = REFTABLE_TMPFILE_INIT; + int is_empty_table = 0, err = 0; + size_t first_to_replace, last_to_replace; + size_t i, nlocks = 0; + char **names = NULL; + + if (first > last || (!expiry && first == last)) { + err = 0; + goto done; + } + + st->stats.attempts++; + + /* + * Hold the lock so that we can read "tables.list" and lock all tables + * which are part of the user-specified range. + */ + err = flock_acquire(&tables_list_lock, st->list_file, st->opts.lock_timeout_ms); + if (err < 0) { + if (errno == EEXIST) + err = REFTABLE_LOCK_ERROR; + else + err = REFTABLE_IO_ERROR; + goto done; + } + + err = stack_uptodate(st); + if (err) + goto done; + + /* + * Lock all tables in the user-provided range. This is the slice of our + * stack which we'll compact. + * + * Note that we lock tables in reverse order from last to first. The + * intent behind this is to allow a newer process to perform best + * effort compaction of tables that it has added in the case where an + * older process is still busy compacting tables which are preexisting + * from the point of view of the newer process. + */ + REFTABLE_ALLOC_ARRAY(table_locks, last - first + 1); + if (!table_locks) { + err = REFTABLE_OUT_OF_MEMORY_ERROR; + goto done; + } + for (i = 0; i < last - first + 1; i++) + table_locks[i] = REFTABLE_FLOCK_INIT; + + for (i = last + 1; i > first; i--) { + err = stack_filename(&table_name, st, reftable_table_name(st->tables[i - 1])); + if (err < 0) + goto done; + + err = flock_acquire(&table_locks[nlocks], table_name.buf, 0); + if (err < 0) { + /* + * When the table is locked already we may do a + * best-effort compaction and compact only the tables + * that we have managed to lock so far. This of course + * requires that we have been able to lock at least two + * tables, otherwise there would be nothing to compact. + * In that case, we return a lock error to our caller. + */ + if (errno == EEXIST && last - (i - 1) >= 2 && + flags & STACK_COMPACT_RANGE_BEST_EFFORT) { + err = 0; + /* + * The subtraction is to offset the index, the + * addition is to only compact up to the table + * of the preceding iteration. They obviously + * cancel each other out, but that may be + * non-obvious when it was omitted. + */ + first = (i - 1) + 1; + break; + } else if (errno == EEXIST) { + err = REFTABLE_LOCK_ERROR; + goto done; + } else { + err = REFTABLE_IO_ERROR; + goto done; + } + } + + /* + * We need to close the lockfiles as we might otherwise easily + * run into file descriptor exhaustion when we compress a lot + * of tables. + */ + err = flock_close(&table_locks[nlocks++]); + if (err < 0) { + err = REFTABLE_IO_ERROR; + goto done; + } + } + + /* + * We have locked all tables in our range and can thus release the + * "tables.list" lock while compacting the locked tables. This allows + * concurrent updates to the stack to proceed. + */ + err = flock_release(&tables_list_lock); + if (err < 0) { + err = REFTABLE_IO_ERROR; + goto done; + } + + /* + * Compact the now-locked tables into a new table. Note that compacting + * these tables may end up with an empty new table in case tombstones + * end up cancelling out all refs in that range. + */ + err = stack_compact_locked(st, first, last, expiry, &new_table); + if (err < 0) { + if (err != REFTABLE_EMPTY_TABLE_ERROR) + goto done; + is_empty_table = 1; + } + + /* + * Now that we have written the new, compacted table we need to re-lock + * "tables.list". We'll then replace the compacted range of tables with + * the new table. + */ + err = flock_acquire(&tables_list_lock, st->list_file, st->opts.lock_timeout_ms); + if (err < 0) { + if (errno == EEXIST) + err = REFTABLE_LOCK_ERROR; + else + err = REFTABLE_IO_ERROR; + goto done; + } + + if (st->opts.default_permissions) { + if (chmod(tables_list_lock.path, + st->opts.default_permissions) < 0) { + err = REFTABLE_IO_ERROR; + goto done; + } + } + + /* + * As we have unlocked the stack while compacting our slice of tables + * it may have happened that a concurrently running process has updated + * the stack while we were compacting. In that case, we need to check + * whether the tables that we have just compacted still exist in the + * stack in the exact same order as we have compacted them. + * + * If they do exist, then it is fine to continue and replace those + * tables with our compacted version. If they don't, then we need to + * abort. + */ + err = stack_uptodate(st); + if (err < 0) + goto done; + if (err > 0) { + ssize_t new_offset = -1; + int fd; + + fd = open(st->list_file, O_RDONLY); + if (fd < 0) { + err = REFTABLE_IO_ERROR; + goto done; + } + + err = fd_read_lines(fd, &names); + close(fd); + if (err < 0) + goto done; + + /* + * Search for the offset of the first table that we have + * compacted in the updated "tables.list" file. + */ + for (size_t i = 0; names[i]; i++) { + if (strcmp(names[i], st->tables[first]->name)) + continue; + + /* + * We have found the first entry. Verify that all the + * subsequent tables we have compacted still exist in + * the modified stack in the exact same order as we + * have compacted them. + */ + for (size_t j = 1; j < last - first + 1; j++) { + const char *old = first + j < st->merged->tables_len ? + st->tables[first + j]->name : NULL; + const char *new = names[i + j]; + + /* + * If some entries are missing or in case the tables + * have changed then we need to bail out. Again, this + * shouldn't ever happen because we have locked the + * tables we are compacting. + */ + if (!old || !new || strcmp(old, new)) { + err = REFTABLE_OUTDATED_ERROR; + goto done; + } + } + + new_offset = i; + break; + } + + /* + * In case we didn't find our compacted tables in the stack we + * need to bail out. In theory, this should have never happened + * because we locked the tables we are compacting. + */ + if (new_offset < 0) { + err = REFTABLE_OUTDATED_ERROR; + goto done; + } + + /* + * We have found the new range that we want to replace, so + * let's update the range of tables that we want to replace. + */ + first_to_replace = new_offset; + last_to_replace = last + (new_offset - first); + } else { + /* + * `fd_read_lines()` uses a `NULL` sentinel to indicate that + * the array is at its end. As we use `free_names()` to free + * the array, we need to include this sentinel value here and + * thus have to allocate `tables_len + 1` many entries. + */ + REFTABLE_CALLOC_ARRAY(names, st->merged->tables_len + 1); + if (!names) { + err = REFTABLE_OUT_OF_MEMORY_ERROR; + goto done; + } + + for (size_t i = 0; i < st->merged->tables_len; i++) { + names[i] = reftable_strdup(st->tables[i]->name); + if (!names[i]) { + err = REFTABLE_OUT_OF_MEMORY_ERROR; + goto done; + } + } + first_to_replace = first; + last_to_replace = last; + } + + /* + * If the resulting compacted table is not empty, then we need to move + * it into place now. + */ + if (!is_empty_table) { + err = format_name(&new_table_name, st->tables[first]->min_update_index, + st->tables[last]->max_update_index); + if (err < 0) + goto done; + + err = reftable_buf_addstr(&new_table_name, ".ref"); + if (err < 0) + goto done; + + err = stack_filename(&new_table_path, st, new_table_name.buf); + if (err < 0) + goto done; + + err = tmpfile_rename(&new_table, new_table_path.buf); + if (err < 0) + goto done; + } + + /* + * Write the new "tables.list" contents with the compacted table we + * have just written. In case the compacted table became empty we + * simply skip writing it. + */ + for (i = 0; i < first_to_replace; i++) { + if ((err = reftable_buf_addstr(&tables_list_buf, names[i])) < 0 || + (err = reftable_buf_addstr(&tables_list_buf, "\n")) < 0) + goto done; + } + if (!is_empty_table) { + if ((err = reftable_buf_addstr(&tables_list_buf, new_table_name.buf)) < 0 || + (err = reftable_buf_addstr(&tables_list_buf, "\n")) < 0) + goto done; + } + for (i = last_to_replace + 1; names[i]; i++) { + if ((err = reftable_buf_addstr(&tables_list_buf, names[i])) < 0 || + (err = reftable_buf_addstr(&tables_list_buf, "\n")) < 0) + goto done; + } + + err = reftable_write_data(tables_list_lock.fd, + tables_list_buf.buf, tables_list_buf.len); + if (err < 0) { + err = REFTABLE_IO_ERROR; + unlink(new_table_path.buf); + goto done; + } + + err = stack_fsync(&st->opts, tables_list_lock.fd); + if (err < 0) { + err = REFTABLE_IO_ERROR; + unlink(new_table_path.buf); + goto done; + } + + err = flock_commit(&tables_list_lock); + if (err < 0) { + err = REFTABLE_IO_ERROR; + unlink(new_table_path.buf); + goto done; + } + + /* + * Reload the stack before deleting the compacted tables. We can only + * delete the files after we closed them on Windows, so this needs to + * happen first. + */ + err = reftable_stack_reload_maybe_reuse(st, first < last); + if (err < 0) + goto done; + + /* + * Delete the old tables. They may still be in use by concurrent + * readers, so it is expected that unlinking tables may fail. + */ + for (i = 0; i < nlocks; i++) { + struct reftable_flock *table_lock = &table_locks[i]; + + reftable_buf_reset(&table_name); + err = reftable_buf_add(&table_name, table_lock->path, + strlen(table_lock->path) - strlen(".lock")); + if (err) + continue; + + unlink(table_name.buf); + } + +done: + flock_release(&tables_list_lock); + for (i = 0; table_locks && i < nlocks; i++) + flock_release(&table_locks[i]); + reftable_free(table_locks); + + tmpfile_delete(&new_table); + reftable_buf_release(&new_table_name); + reftable_buf_release(&new_table_path); + reftable_buf_release(&tables_list_buf); + reftable_buf_release(&table_name); + free_names(names); + + if (err == REFTABLE_LOCK_ERROR) + st->stats.failures++; + + return err; +} + +int reftable_stack_compact_all(struct reftable_stack *st, + struct reftable_log_expiry_config *config) +{ + size_t last = st->merged->tables_len ? st->merged->tables_len - 1 : 0; + return stack_compact_range(st, 0, last, config, 0); +} + +static int segment_size(struct segment *s) +{ + return s->end - s->start; +} + +struct segment suggest_compaction_segment(uint64_t *sizes, size_t n, + uint8_t factor) +{ + struct segment seg = { 0 }; + uint64_t bytes; + size_t i; + + if (!factor) + factor = DEFAULT_GEOMETRIC_FACTOR; + + /* + * If there are no tables or only a single one then we don't have to + * compact anything. The sequence is geometric by definition already. + */ + if (n <= 1) + return seg; + + /* + * Find the ending table of the compaction segment needed to restore the + * geometric sequence. Note that the segment end is exclusive. + * + * To do so, we iterate backwards starting from the most recent table + * until a valid segment end is found. If the preceding table is smaller + * than the current table multiplied by the geometric factor (2), the + * compaction segment end has been identified. + * + * Tables after the ending point are not added to the byte count because + * they are already valid members of the geometric sequence. Due to the + * properties of a geometric sequence, it is not possible for the sum of + * these tables to exceed the value of the ending point table. + * + * Example table size sequence requiring no compaction: + * 64, 32, 16, 8, 4, 2, 1 + * + * Example table size sequence where compaction segment end is set to + * the last table. Since the segment end is exclusive, the last table is + * excluded during subsequent compaction and the table with size 3 is + * the final table included: + * 64, 32, 16, 8, 4, 3, 1 + */ + for (i = n - 1; i > 0; i--) { + if (sizes[i - 1] < sizes[i] * factor) { + seg.end = i + 1; + bytes = sizes[i]; + break; + } + } + + /* + * Find the starting table of the compaction segment by iterating + * through the remaining tables and keeping track of the accumulated + * size of all tables seen from the segment end table. The previous + * table is compared to the accumulated size because the tables from the + * segment end are merged backwards recursively. + * + * Note that we keep iterating even after we have found the first + * starting point. This is because there may be tables in the stack + * preceding that first starting point which violate the geometric + * sequence. + * + * Example compaction segment start set to table with size 32: + * 128, 32, 16, 8, 4, 3, 1 + */ + for (; i > 0; i--) { + uint64_t curr = bytes; + bytes += sizes[i - 1]; + + if (sizes[i - 1] < curr * factor) { + seg.start = i - 1; + seg.bytes = bytes; + } + } + + return seg; +} + +static uint64_t *stack_table_sizes_for_compaction(struct reftable_stack *st) +{ + int version = (st->opts.hash_id == REFTABLE_HASH_SHA1) ? 1 : 2; + int overhead = header_size(version) - 1; + uint64_t *sizes; + + REFTABLE_CALLOC_ARRAY(sizes, st->merged->tables_len); + if (!sizes) + return NULL; + + for (size_t i = 0; i < st->merged->tables_len; i++) + sizes[i] = st->tables[i]->size - overhead; + + return sizes; +} + +int reftable_stack_auto_compact(struct reftable_stack *st) +{ + struct segment seg; + uint64_t *sizes; + + if (st->merged->tables_len < 2) + return 0; + + sizes = stack_table_sizes_for_compaction(st); + if (!sizes) + return REFTABLE_OUT_OF_MEMORY_ERROR; + + seg = suggest_compaction_segment(sizes, st->merged->tables_len, + st->opts.auto_compaction_factor); + reftable_free(sizes); + + if (segment_size(&seg) > 0) + return stack_compact_range(st, seg.start, seg.end - 1, + NULL, STACK_COMPACT_RANGE_BEST_EFFORT); + + return 0; +} + +struct reftable_compaction_stats * +reftable_stack_compaction_stats(struct reftable_stack *st) +{ + return &st->stats; +} + +int reftable_stack_read_ref(struct reftable_stack *st, const char *refname, + struct reftable_ref_record *ref) +{ + struct reftable_iterator it = { 0 }; + int ret; + + ret = reftable_merged_table_init_ref_iterator(st->merged, &it); + if (ret) + goto out; + + ret = reftable_iterator_seek_ref(&it, refname); + if (ret) + goto out; + + ret = reftable_iterator_next_ref(&it, ref); + if (ret) + goto out; + + if (strcmp(ref->refname, refname) || + reftable_ref_record_is_deletion(ref)) { + reftable_ref_record_release(ref); + ret = 1; + goto out; + } + +out: + reftable_iterator_destroy(&it); + return ret; +} + +int reftable_stack_read_log(struct reftable_stack *st, const char *refname, + struct reftable_log_record *log) +{ + struct reftable_iterator it = {0}; + int err; + + err = reftable_stack_init_log_iterator(st, &it); + if (err) + goto done; + + err = reftable_iterator_seek_log(&it, refname); + if (err) + goto done; + + err = reftable_iterator_next_log(&it, log); + if (err) + goto done; + + if (strcmp(log->refname, refname) || + reftable_log_record_is_deletion(log)) { + err = 1; + goto done; + } + +done: + if (err) { + reftable_log_record_release(log); + } + reftable_iterator_destroy(&it); + return err; +} + +static int is_table_name(const char *s) +{ + const char *dot = strrchr(s, '.'); + return dot && !strcmp(dot, ".ref"); +} + +static void remove_maybe_stale_table(struct reftable_stack *st, uint64_t max, + const char *name) +{ + int err = 0; + uint64_t update_idx = 0; + struct reftable_block_source src = { NULL }; + struct reftable_table *table = NULL; + struct reftable_buf table_path = REFTABLE_BUF_INIT; + + err = stack_filename(&table_path, st, name); + if (err < 0) + goto done; + + err = reftable_block_source_from_file(&src, table_path.buf); + if (err < 0) + goto done; + + err = reftable_table_new(&table, &src, name); + if (err < 0) + goto done; + + update_idx = reftable_table_max_update_index(table); + reftable_table_decref(table); + + if (update_idx <= max) { + unlink(table_path.buf); + } +done: + reftable_buf_release(&table_path); +} + +static int reftable_stack_clean_locked(struct reftable_stack *st) +{ + uint64_t max = reftable_merged_table_max_update_index( + reftable_stack_merged_table(st)); + DIR *dir = opendir(st->reftable_dir); + struct dirent *d = NULL; + if (!dir) { + return REFTABLE_IO_ERROR; + } + + while ((d = readdir(dir))) { + int found = 0; + if (!is_table_name(d->d_name)) + continue; + + for (size_t i = 0; !found && i < st->tables_len; i++) + found = !strcmp(reftable_table_name(st->tables[i]), d->d_name); + if (found) + continue; + + remove_maybe_stale_table(st, max, d->d_name); + } + + closedir(dir); + return 0; +} + +int reftable_stack_clean(struct reftable_stack *st) +{ + struct reftable_addition *add = NULL; + int err = reftable_stack_new_addition(&add, st, 0); + if (err < 0) { + goto done; + } + + err = reftable_stack_reload(st); + if (err < 0) { + goto done; + } + + err = reftable_stack_clean_locked(st); + +done: + reftable_addition_destroy(add); + return err; +} + +enum reftable_hash reftable_stack_hash_id(struct reftable_stack *st) +{ + return reftable_merged_table_hash_id(st->merged); +} diff --git a/reftable/stack.h b/reftable/stack.h new file mode 100644 index 0000000000..bc28f2998a --- /dev/null +++ b/reftable/stack.h @@ -0,0 +1,41 @@ +/* + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file or at + * https://developers.google.com/open-source/licenses/bsd + */ + +#ifndef STACK_H +#define STACK_H + +#include "system.h" +#include "reftable-writer.h" +#include "reftable-stack.h" + +struct reftable_stack { + struct stat list_st; + char *list_file; + int list_fd; + + char *reftable_dir; + + struct reftable_write_options opts; + + struct reftable_table **tables; + size_t tables_len; + struct reftable_merged_table *merged; + struct reftable_compaction_stats stats; +}; + +int read_lines(const char *filename, char ***lines); + +struct segment { + size_t start, end; + uint64_t bytes; +}; + +struct segment suggest_compaction_segment(uint64_t *sizes, size_t n, + uint8_t factor); + +#endif diff --git a/reftable/system.c b/reftable/system.c new file mode 100644 index 0000000000..1ee268b125 --- /dev/null +++ b/reftable/system.c @@ -0,0 +1,133 @@ +#include "../git-compat-util.h" + +#include "system.h" +#include "basics.h" +#include "reftable-error.h" +#include "../lockfile.h" +#include "../tempfile.h" + +uint32_t reftable_rand(void) +{ + return git_rand(CSPRNG_BYTES_INSECURE); +} + +int tmpfile_from_pattern(struct reftable_tmpfile *out, const char *pattern) +{ + struct tempfile *tempfile; + + tempfile = mks_tempfile(pattern); + if (!tempfile) + return REFTABLE_IO_ERROR; + + out->path = tempfile->filename.buf; + out->fd = tempfile->fd; + out->priv = tempfile; + + return 0; +} + +int tmpfile_close(struct reftable_tmpfile *t) +{ + struct tempfile *tempfile = t->priv; + int ret = close_tempfile_gently(tempfile); + t->fd = -1; + if (ret < 0) + return REFTABLE_IO_ERROR; + return 0; +} + +int tmpfile_delete(struct reftable_tmpfile *t) +{ + struct tempfile *tempfile = t->priv; + int ret = delete_tempfile(&tempfile); + *t = REFTABLE_TMPFILE_INIT; + if (ret < 0) + return REFTABLE_IO_ERROR; + return 0; +} + +int tmpfile_rename(struct reftable_tmpfile *t, const char *path) +{ + struct tempfile *tempfile = t->priv; + int ret = rename_tempfile(&tempfile, path); + *t = REFTABLE_TMPFILE_INIT; + if (ret < 0) + return REFTABLE_IO_ERROR; + return 0; +} + +int flock_acquire(struct reftable_flock *l, const char *target_path, + long timeout_ms) +{ + struct lock_file *lockfile; + int err; + + lockfile = reftable_malloc(sizeof(*lockfile)); + if (!lockfile) + return REFTABLE_OUT_OF_MEMORY_ERROR; + + err = hold_lock_file_for_update_timeout(lockfile, target_path, LOCK_NO_DEREF, + timeout_ms); + if (err < 0) { + reftable_free(lockfile); + if (errno == EEXIST) + return REFTABLE_LOCK_ERROR; + return -1; + } + + l->fd = get_lock_file_fd(lockfile); + l->path = get_lock_file_path(lockfile); + l->priv = lockfile; + + return 0; +} + +int flock_close(struct reftable_flock *l) +{ + struct lock_file *lockfile = l->priv; + int ret; + + if (!lockfile) + return REFTABLE_API_ERROR; + + ret = close_lock_file_gently(lockfile); + l->fd = -1; + if (ret < 0) + return REFTABLE_IO_ERROR; + + return 0; +} + +int flock_release(struct reftable_flock *l) +{ + struct lock_file *lockfile = l->priv; + int ret; + + if (!lockfile) + return 0; + + ret = rollback_lock_file(lockfile); + reftable_free(lockfile); + *l = REFTABLE_FLOCK_INIT; + if (ret < 0) + return REFTABLE_IO_ERROR; + + return 0; +} + +int flock_commit(struct reftable_flock *l) +{ + struct lock_file *lockfile = l->priv; + int ret; + + if (!lockfile) + return REFTABLE_API_ERROR; + + ret = commit_lock_file(lockfile); + reftable_free(lockfile); + *l = REFTABLE_FLOCK_INIT; + if (ret < 0) + return REFTABLE_IO_ERROR; + + return 0; +} diff --git a/reftable/system.h b/reftable/system.h new file mode 100644 index 0000000000..beb9d2431f --- /dev/null +++ b/reftable/system.h @@ -0,0 +1,109 @@ +/* + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file or at + * https://developers.google.com/open-source/licenses/bsd + */ + +#ifndef SYSTEM_H +#define SYSTEM_H + +/* This header glues the reftable library to the rest of Git */ + +#define MINGW_DONT_HANDLE_IN_USE_ERROR +#include "compat/posix.h" +#include "compat/zlib-compat.h" + +/* + * Return a random 32 bit integer. This function is expected to return + * pre-seeded data. + */ +uint32_t reftable_rand(void); + +/* + * An implementation-specific temporary file. By making this specific to the + * implementation it becomes possible to tie temporary files into any kind of + * signal or atexit handlers for cleanup on abnormal situations. + */ +struct reftable_tmpfile { + const char *path; + int fd; + void *priv; +}; +#define REFTABLE_TMPFILE_INIT ((struct reftable_tmpfile) { .fd = -1, }) + +/* + * Create a temporary file from a pattern similar to how mkstemp(3p) would. + * The `pattern` shall not be modified. On success, the structure at `out` has + * been initialized such that it is ready for use. Returns 0 on success, a + * reftable error code on error. + */ +int tmpfile_from_pattern(struct reftable_tmpfile *out, const char *pattern); + +/* + * Close the temporary file's file descriptor without removing the file itself. + * This is a no-op in case the file has already been closed beforehand. Returns + * 0 on success, a reftable error code on error. + */ +int tmpfile_close(struct reftable_tmpfile *t); + +/* + * Close the temporary file and delete it. This is a no-op in case the file has + * already been deleted or renamed beforehand. Returns 0 on success, a reftable + * error code on error. + */ +int tmpfile_delete(struct reftable_tmpfile *t); + +/* + * Rename the temporary file to the provided path. The temporary file must be + * active. Return 0 on success, a reftable error code on error. Deactivates the + * temporary file. + */ +int tmpfile_rename(struct reftable_tmpfile *t, const char *path); + +/* + * An implementation-specific file lock. Same as with `reftable_tmpfile`, + * making this specific to the implementation makes it possible to tie this + * into signal or atexit handlers such that we know to clean up stale locks on + * abnormal exits. + */ +struct reftable_flock { + const char *path; + int fd; + void *priv; +}; +#define REFTABLE_FLOCK_INIT ((struct reftable_flock){ .fd = -1, }) + +/* + * Acquire the lock for the given target path by exclusively creating a file + * with ".lock" appended to it. If that lock exists, we wait up to `timeout_ms` + * to acquire the lock. If `timeout_ms` is 0 we don't wait, if it is negative + * we block indefinitely. + * + * Retrun 0 on success, a reftable error code on error. + */ +int flock_acquire(struct reftable_flock *l, const char *target_path, + long timeout_ms); + +/* + * Close the lockfile's file descriptor without removing the lock itself. This + * is a no-op in case the lockfile has already been closed beforehand. Returns + * 0 on success, a reftable error code on error. + */ +int flock_close(struct reftable_flock *l); + +/* + * Release the lock by unlinking the lockfile. This is a no-op in case the + * lockfile has already been released or committed beforehand. Returns 0 on + * success, a reftable error code on error. + */ +int flock_release(struct reftable_flock *l); + +/* + * Commit the lock by renaming the lockfile into place. Returns 0 on success, a + * reftable error code on error. + */ +int flock_commit(struct reftable_flock *l); + +#endif diff --git a/reftable/table.c b/reftable/table.c new file mode 100644 index 0000000000..56362df0ed --- /dev/null +++ b/reftable/table.c @@ -0,0 +1,779 @@ +/* + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file or at + * https://developers.google.com/open-source/licenses/bsd + */ + +#include "table.h" + +#include "system.h" +#include "block.h" +#include "blocksource.h" +#include "constants.h" +#include "iter.h" +#include "record.h" +#include "reftable-error.h" + +static struct reftable_table_offsets * +table_offsets_for(struct reftable_table *t, uint8_t typ) +{ + switch (typ) { + case REFTABLE_BLOCK_TYPE_REF: + return &t->ref_offsets; + case REFTABLE_BLOCK_TYPE_LOG: + return &t->log_offsets; + case REFTABLE_BLOCK_TYPE_OBJ: + return &t->obj_offsets; + } + abort(); +} + +enum reftable_hash reftable_table_hash_id(struct reftable_table *t) +{ + return t->hash_id; +} + +const char *reftable_table_name(struct reftable_table *t) +{ + return t->name; +} + +static int parse_footer(struct reftable_table *t, uint8_t *footer, + uint8_t *header) +{ + uint8_t *f = footer; + uint8_t first_block_typ; + int err = 0; + uint32_t computed_crc; + uint32_t file_crc; + + if (memcmp(f, "REFT", 4)) { + err = REFTABLE_FORMAT_ERROR; + goto done; + } + f += 4; + + if (memcmp(footer, header, header_size(t->version))) { + err = REFTABLE_FORMAT_ERROR; + goto done; + } + + f++; + t->block_size = reftable_get_be24(f); + + f += 3; + t->min_update_index = reftable_get_be64(f); + f += 8; + t->max_update_index = reftable_get_be64(f); + f += 8; + + if (t->version == 1) { + t->hash_id = REFTABLE_HASH_SHA1; + } else { + switch (reftable_get_be32(f)) { + case REFTABLE_FORMAT_ID_SHA1: + t->hash_id = REFTABLE_HASH_SHA1; + break; + case REFTABLE_FORMAT_ID_SHA256: + t->hash_id = REFTABLE_HASH_SHA256; + break; + default: + err = REFTABLE_FORMAT_ERROR; + goto done; + } + + f += 4; + } + + t->ref_offsets.index_offset = reftable_get_be64(f); + f += 8; + + t->obj_offsets.offset = reftable_get_be64(f); + f += 8; + + t->object_id_len = t->obj_offsets.offset & ((1 << 5) - 1); + t->obj_offsets.offset >>= 5; + + t->obj_offsets.index_offset = reftable_get_be64(f); + f += 8; + t->log_offsets.offset = reftable_get_be64(f); + f += 8; + t->log_offsets.index_offset = reftable_get_be64(f); + f += 8; + + computed_crc = crc32(0, footer, f - footer); + file_crc = reftable_get_be32(f); + f += 4; + if (computed_crc != file_crc) { + err = REFTABLE_FORMAT_ERROR; + goto done; + } + + first_block_typ = header[header_size(t->version)]; + t->ref_offsets.is_present = (first_block_typ == REFTABLE_BLOCK_TYPE_REF); + t->ref_offsets.offset = 0; + t->log_offsets.is_present = (first_block_typ == REFTABLE_BLOCK_TYPE_LOG || + t->log_offsets.offset > 0); + t->obj_offsets.is_present = t->obj_offsets.offset > 0; + if (t->obj_offsets.is_present && !t->object_id_len) { + err = REFTABLE_FORMAT_ERROR; + goto done; + } + + err = 0; +done: + return err; +} + +struct table_iter { + struct reftable_table *table; + uint8_t typ; + uint64_t block_off; + struct reftable_block block; + struct block_iter bi; + int is_finished; +}; + +static int table_iter_init(struct table_iter *ti, struct reftable_table *t) +{ + struct block_iter bi = BLOCK_ITER_INIT; + memset(ti, 0, sizeof(*ti)); + reftable_table_incref(t); + ti->table = t; + ti->bi = bi; + return 0; +} + +static int table_iter_next_in_block(struct table_iter *ti, + struct reftable_record *rec) +{ + int res = block_iter_next(&ti->bi, rec); + if (res == 0 && reftable_record_type(rec) == REFTABLE_BLOCK_TYPE_REF) { + rec->u.ref.update_index += ti->table->min_update_index; + } + + return res; +} + +static void table_iter_block_done(struct table_iter *ti) +{ + reftable_block_release(&ti->block); + block_iter_reset(&ti->bi); +} + +int table_init_block(struct reftable_table *t, struct reftable_block *block, + uint64_t next_off, uint8_t want_typ) +{ + uint32_t header_off = next_off ? 0 : header_size(t->version); + int err; + + if (next_off >= t->size) + return 1; + + err = reftable_block_init(block, &t->source, next_off, header_off, + t->block_size, hash_size(t->hash_id), want_typ); + if (err) + reftable_block_release(block); + return err; +} + +static void table_iter_close(struct table_iter *ti) +{ + table_iter_block_done(ti); + block_iter_close(&ti->bi); + reftable_table_decref(ti->table); +} + +static int table_iter_next_block(struct table_iter *ti) +{ + uint64_t next_block_off = ti->block_off + ti->block.full_block_size; + int err; + + err = table_init_block(ti->table, &ti->block, next_block_off, ti->typ); + if (err > 0) + ti->is_finished = 1; + if (err) + return err; + + ti->block_off = next_block_off; + ti->is_finished = 0; + block_iter_init(&ti->bi, &ti->block); + + return 0; +} + +static int table_iter_next(struct table_iter *ti, struct reftable_record *rec) +{ + if (reftable_record_type(rec) != ti->typ) + return REFTABLE_API_ERROR; + + while (1) { + int err; + + if (ti->is_finished) + return 1; + + /* + * Check whether the current block still has more records. If + * so, return it. If the iterator returns positive then the + * current block has been exhausted. + */ + err = table_iter_next_in_block(ti, rec); + if (err <= 0) + return err; + + /* + * Otherwise, we need to continue to the next block in the + * table and retry. If there are no more blocks then the + * iterator is drained. + */ + err = table_iter_next_block(ti); + if (err) { + ti->is_finished = 1; + return err; + } + } +} + +static int table_iter_seek_to(struct table_iter *ti, uint64_t off, uint8_t typ) +{ + int err; + + err = table_init_block(ti->table, &ti->block, off, typ); + if (err != 0) + return err; + + ti->typ = reftable_block_type(&ti->block); + ti->block_off = off; + block_iter_init(&ti->bi, &ti->block); + ti->is_finished = 0; + return 0; +} + +static int table_iter_seek_start(struct table_iter *ti, uint8_t typ, int index) +{ + struct reftable_table_offsets *offs = table_offsets_for(ti->table, typ); + uint64_t off = offs->offset; + if (index) { + off = offs->index_offset; + if (off == 0) { + return 1; + } + typ = REFTABLE_BLOCK_TYPE_INDEX; + } + + return table_iter_seek_to(ti, off, typ); +} + +static int table_iter_seek_linear(struct table_iter *ti, + struct reftable_record *want) +{ + struct reftable_buf want_key = REFTABLE_BUF_INIT; + struct reftable_buf got_key = REFTABLE_BUF_INIT; + struct reftable_record rec; + int err; + + err = reftable_record_init(&rec, reftable_record_type(want)); + if (err < 0) + goto done; + + err = reftable_record_key(want, &want_key); + if (err < 0) + goto done; + + /* + * First we need to locate the block that must contain our record. To + * do so we scan through blocks linearly until we find the first block + * whose first key is bigger than our wanted key. Once we have found + * that block we know that the key must be contained in the preceding + * block. + * + * This algorithm is somewhat unfortunate because it means that we + * always have to seek one block too far and then back up. But as we + * can only decode the _first_ key of a block but not its _last_ key we + * have no other way to do this. + */ + while (1) { + struct table_iter next = *ti; + + /* + * We must be careful to not modify underlying data of `ti` + * because we may find that `next` does not contain our desired + * block, but that `ti` does. In that case, we would discard + * `next` and continue with `ti`. + * + * This also means that we cannot reuse allocated memory for + * `next` here. While it would be great if we could, it should + * in practice not be too bad given that we should only ever + * end up doing linear seeks with at most three blocks. As soon + * as we have more than three blocks we would have an index, so + * we would not do a linear search there anymore. + */ + memset(&next.block.block_data, 0, sizeof(next.block.block_data)); + next.block.zstream = NULL; + next.block.uncompressed_data = NULL; + next.block.uncompressed_cap = 0; + + err = table_iter_next_block(&next); + if (err < 0) + goto done; + if (err > 0) + break; + + err = reftable_block_first_key(&next.block, &got_key); + if (err < 0) + goto done; + + if (reftable_buf_cmp(&got_key, &want_key) > 0) { + table_iter_block_done(&next); + break; + } + + table_iter_block_done(ti); + *ti = next; + } + + /* + * We have located the block that must contain our record, so we seek + * the wanted key inside of it. If the block does not contain our key + * we know that the corresponding record does not exist. + */ + block_iter_init(&ti->bi, &ti->block); + err = block_iter_seek_key(&ti->bi, &want_key); + if (err < 0) + goto done; + err = 0; + +done: + reftable_record_release(&rec); + reftable_buf_release(&want_key); + reftable_buf_release(&got_key); + return err; +} + +static int table_iter_seek_indexed(struct table_iter *ti, + struct reftable_record *rec) +{ + struct reftable_record want_index = { + .type = REFTABLE_BLOCK_TYPE_INDEX, .u.idx = { .last_key = REFTABLE_BUF_INIT } + }; + struct reftable_record index_result = { + .type = REFTABLE_BLOCK_TYPE_INDEX, + .u.idx = { .last_key = REFTABLE_BUF_INIT }, + }; + int err; + + err = reftable_record_key(rec, &want_index.u.idx.last_key); + if (err < 0) + goto done; + + /* + * The index may consist of multiple levels, where each level may have + * multiple index blocks. We start by doing a linear search in the + * highest layer that identifies the relevant index block as well as + * the record inside that block that corresponds to our wanted key. + */ + err = table_iter_seek_linear(ti, &want_index); + if (err < 0) + goto done; + + /* + * Traverse down the levels until we find a non-index entry. + */ + while (1) { + /* + * In case we seek a record that does not exist the index iter + * will tell us that the iterator is over. This works because + * the last index entry of the current level will contain the + * last key it knows about. So in case our seeked key is larger + * than the last indexed key we know that it won't exist. + * + * There is one subtlety in the layout of the index section + * that makes this work as expected: the highest-level index is + * at end of the section and will point backwards and thus we + * start reading from the end of the index section, not the + * beginning. + * + * If that wasn't the case and the order was reversed then the + * linear seek would seek into the lower levels and traverse + * all levels of the index only to find out that the key does + * not exist. + */ + err = table_iter_next(ti, &index_result); + if (err != 0) + goto done; + + err = table_iter_seek_to(ti, index_result.u.idx.offset, 0); + if (err != 0) + goto done; + + block_iter_init(&ti->bi, &ti->block); + + err = block_iter_seek_key(&ti->bi, &want_index.u.idx.last_key); + if (err < 0) + goto done; + + if (ti->typ == reftable_record_type(rec)) { + err = 0; + break; + } + + if (ti->typ != REFTABLE_BLOCK_TYPE_INDEX) { + err = REFTABLE_FORMAT_ERROR; + goto done; + } + } + +done: + reftable_record_release(&want_index); + reftable_record_release(&index_result); + return err; +} + +static int table_iter_seek(struct table_iter *ti, + struct reftable_record *want) +{ + uint8_t typ = reftable_record_type(want); + struct reftable_table_offsets *offs = table_offsets_for(ti->table, typ); + int err; + + err = table_iter_seek_start(ti, reftable_record_type(want), + !!offs->index_offset); + if (err < 0) + goto out; + + if (offs->index_offset) + err = table_iter_seek_indexed(ti, want); + else + err = table_iter_seek_linear(ti, want); + if (err) + goto out; + +out: + return err; +} + +static int table_iter_seek_void(void *ti, struct reftable_record *want) +{ + return table_iter_seek(ti, want); +} + +static int table_iter_next_void(void *ti, struct reftable_record *rec) +{ + return table_iter_next(ti, rec); +} + +static void table_iter_close_void(void *ti) +{ + table_iter_close(ti); +} + +static struct reftable_iterator_vtable table_iter_vtable = { + .seek = &table_iter_seek_void, + .next = &table_iter_next_void, + .close = &table_iter_close_void, +}; + +static void iterator_from_table_iter(struct reftable_iterator *it, + struct table_iter *ti) +{ + assert(!it->ops); + it->iter_arg = ti; + it->ops = &table_iter_vtable; +} + +int table_init_iter(struct reftable_table *t, + struct reftable_iterator *it, + uint8_t typ) +{ + struct reftable_table_offsets *offs = table_offsets_for(t, typ); + + if (offs->is_present) { + struct table_iter *ti; + REFTABLE_ALLOC_ARRAY(ti, 1); + if (!ti) + return REFTABLE_OUT_OF_MEMORY_ERROR; + + table_iter_init(ti, t); + iterator_from_table_iter(it, ti); + } else { + iterator_set_empty(it); + } + + return 0; +} + +int reftable_table_init_ref_iterator(struct reftable_table *t, + struct reftable_iterator *it) +{ + return table_init_iter(t, it, REFTABLE_BLOCK_TYPE_REF); +} + +int reftable_table_init_log_iterator(struct reftable_table *t, + struct reftable_iterator *it) +{ + return table_init_iter(t, it, REFTABLE_BLOCK_TYPE_LOG); +} + +int reftable_table_new(struct reftable_table **out, + struct reftable_block_source *source, char const *name) +{ + struct reftable_block_data footer = { 0 }; + struct reftable_block_data header = { 0 }; + struct reftable_table *t; + uint64_t file_size = block_source_size(source); + uint32_t read_size; + ssize_t bytes_read; + int err; + + REFTABLE_CALLOC_ARRAY(t, 1); + if (!t) { + err = REFTABLE_OUT_OF_MEMORY_ERROR; + goto done; + } + + /* + * We need one extra byte to read the type of first block. We also + * pretend to always be reading v2 of the format because it is larger. + */ + read_size = header_size(2) + 1; + if (read_size > file_size) { + err = REFTABLE_FORMAT_ERROR; + goto done; + } + + bytes_read = block_source_read_data(source, &header, 0, read_size); + if (bytes_read < 0 || (size_t)bytes_read != read_size) { + err = REFTABLE_IO_ERROR; + goto done; + } + + if (memcmp(header.data, "REFT", 4)) { + err = REFTABLE_FORMAT_ERROR; + goto done; + } + t->version = header.data[4]; + if (t->version != 1 && t->version != 2) { + err = REFTABLE_FORMAT_ERROR; + goto done; + } + + t->size = file_size - footer_size(t->version); + t->source = *source; + t->name = reftable_strdup(name); + if (!t->name) { + err = REFTABLE_OUT_OF_MEMORY_ERROR; + goto done; + } + t->hash_id = 0; + t->refcount = 1; + + bytes_read = block_source_read_data(source, &footer, t->size, + footer_size(t->version)); + if (bytes_read < 0 || (size_t)bytes_read != footer_size(t->version)) { + err = REFTABLE_IO_ERROR; + goto done; + } + + err = parse_footer(t, footer.data, header.data); + if (err) + goto done; + + *out = t; + +done: + block_source_release_data(&footer); + block_source_release_data(&header); + if (err) { + if (t) + reftable_free(t->name); + reftable_free(t); + block_source_close(source); + } + return err; +} + +void reftable_table_incref(struct reftable_table *t) +{ + t->refcount++; +} + +void reftable_table_decref(struct reftable_table *t) +{ + if (!t) + return; + if (--t->refcount) + return; + block_source_close(&t->source); + REFTABLE_FREE_AND_NULL(t->name); + reftable_free(t); +} + +static int reftable_table_refs_for_indexed(struct reftable_table *t, + struct reftable_iterator *it, + uint8_t *oid) +{ + struct reftable_record want = { + .type = REFTABLE_BLOCK_TYPE_OBJ, + .u.obj = { + .hash_prefix = oid, + .hash_prefix_len = t->object_id_len, + }, + }; + struct reftable_iterator oit = { NULL }; + struct reftable_record got = { + .type = REFTABLE_BLOCK_TYPE_OBJ, + .u.obj = { 0 }, + }; + int err = 0; + struct indexed_table_ref_iter *itr = NULL; + + /* Look through the reverse index. */ + err = table_init_iter(t, &oit, REFTABLE_BLOCK_TYPE_OBJ); + if (err < 0) + goto done; + + err = iterator_seek(&oit, &want); + if (err != 0) + goto done; + + /* read out the reftable_obj_record */ + err = iterator_next(&oit, &got); + if (err < 0) + goto done; + + if (err > 0 || memcmp(want.u.obj.hash_prefix, got.u.obj.hash_prefix, + t->object_id_len)) { + /* didn't find it; return empty iterator */ + iterator_set_empty(it); + err = 0; + goto done; + } + + err = indexed_table_ref_iter_new(&itr, t, oid, hash_size(t->hash_id), + got.u.obj.offsets, + got.u.obj.offset_len); + if (err < 0) + goto done; + got.u.obj.offsets = NULL; + iterator_from_indexed_table_ref_iter(it, itr); + +done: + reftable_iterator_destroy(&oit); + reftable_record_release(&got); + return err; +} + +static int reftable_table_refs_for_unindexed(struct reftable_table *t, + struct reftable_iterator *it, + uint8_t *oid) +{ + struct table_iter *ti; + struct filtering_ref_iterator *filter = NULL; + struct filtering_ref_iterator empty = FILTERING_REF_ITERATOR_INIT; + uint32_t oid_len = hash_size(t->hash_id); + int err; + + REFTABLE_ALLOC_ARRAY(ti, 1); + if (!ti) { + err = REFTABLE_OUT_OF_MEMORY_ERROR; + goto out; + } + + table_iter_init(ti, t); + err = table_iter_seek_start(ti, REFTABLE_BLOCK_TYPE_REF, 0); + if (err < 0) + goto out; + + filter = reftable_malloc(sizeof(*filter)); + if (!filter) { + err = REFTABLE_OUT_OF_MEMORY_ERROR; + goto out; + } + *filter = empty; + + err = reftable_buf_add(&filter->oid, oid, oid_len); + if (err < 0) + goto out; + + iterator_from_table_iter(&filter->it, ti); + + iterator_from_filtering_ref_iterator(it, filter); + + err = 0; + +out: + if (err < 0) { + if (ti) + table_iter_close(ti); + reftable_free(ti); + } + return err; +} + +int reftable_table_refs_for(struct reftable_table *t, + struct reftable_iterator *it, uint8_t *oid) +{ + if (t->obj_offsets.is_present) + return reftable_table_refs_for_indexed(t, it, oid); + return reftable_table_refs_for_unindexed(t, it, oid); +} + +uint64_t reftable_table_max_update_index(struct reftable_table *t) +{ + return t->max_update_index; +} + +uint64_t reftable_table_min_update_index(struct reftable_table *t) +{ + return t->min_update_index; +} + +int reftable_table_iterator_init(struct reftable_table_iterator *it, + struct reftable_table *t) +{ + struct table_iter *ti; + int err; + + REFTABLE_ALLOC_ARRAY(ti, 1); + if (!ti) + return REFTABLE_OUT_OF_MEMORY_ERROR; + + err = table_iter_init(ti, t); + if (err < 0) + goto out; + + it->iter_arg = ti; + err = 0; + +out: + if (err < 0) + reftable_free(ti); + return err; +} + +void reftable_table_iterator_release(struct reftable_table_iterator *it) +{ + if (!it->iter_arg) + return; + table_iter_close(it->iter_arg); + reftable_free(it->iter_arg); + it->iter_arg = NULL; +} + +int reftable_table_iterator_next(struct reftable_table_iterator *it, + const struct reftable_block **out) +{ + struct table_iter *ti = it->iter_arg; + int err; + + err = table_iter_next_block(ti); + if (err) + return err; + + *out = &ti->block; + + return 0; +} diff --git a/reftable/table.h b/reftable/table.h new file mode 100644 index 0000000000..c54703e621 --- /dev/null +++ b/reftable/table.h @@ -0,0 +1,29 @@ +/* + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file or at + * https://developers.google.com/open-source/licenses/bsd + */ + +#ifndef TABLE_H +#define TABLE_H + +#include "block.h" +#include "record.h" +#include "reftable-iterator.h" +#include "reftable-table.h" + +const char *reftable_table_name(struct reftable_table *t); + +int table_init_iter(struct reftable_table *t, + struct reftable_iterator *it, + uint8_t typ); + +/* + * Initialize a block by reading from the given table and offset. + */ +int table_init_block(struct reftable_table *t, struct reftable_block *block, + uint64_t next_off, uint8_t want_typ); + +#endif diff --git a/reftable/tree.c b/reftable/tree.c new file mode 100644 index 0000000000..a52f7c0c7d --- /dev/null +++ b/reftable/tree.c @@ -0,0 +1,74 @@ +/* + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file or at + * https://developers.google.com/open-source/licenses/bsd + */ + +#include "system.h" +#include "tree.h" + +#include "basics.h" + +struct tree_node *tree_search(struct tree_node *tree, + void *key, + int (*compare)(const void *, const void *)) +{ + int res; + if (!tree) + return NULL; + res = compare(key, tree->key); + if (res < 0) + return tree_search(tree->left, key, compare); + else if (res > 0) + return tree_search(tree->right, key, compare); + return tree; +} + +struct tree_node *tree_insert(struct tree_node **rootp, + void *key, + int (*compare)(const void *, const void *)) +{ + int res; + + if (!*rootp) { + struct tree_node *n; + + REFTABLE_CALLOC_ARRAY(n, 1); + if (!n) + return NULL; + + n->key = key; + *rootp = n; + return *rootp; + } + + res = compare(key, (*rootp)->key); + if (res < 0) + return tree_insert(&(*rootp)->left, key, compare); + else if (res > 0) + return tree_insert(&(*rootp)->right, key, compare); + return *rootp; +} + +void infix_walk(struct tree_node *t, void (*action)(void *arg, void *key), + void *arg) +{ + if (t->left) + infix_walk(t->left, action, arg); + action(arg, t->key); + if (t->right) + infix_walk(t->right, action, arg); +} + +void tree_free(struct tree_node *t) +{ + if (!t) + return; + if (t->left) + tree_free(t->left); + if (t->right) + tree_free(t->right); + reftable_free(t); +} diff --git a/reftable/tree.h b/reftable/tree.h new file mode 100644 index 0000000000..2c9c465299 --- /dev/null +++ b/reftable/tree.h @@ -0,0 +1,45 @@ +/* + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file or at + * https://developers.google.com/open-source/licenses/bsd + */ + +#ifndef TREE_H +#define TREE_H + +/* tree_node is a generic binary search tree. */ +struct tree_node { + void *key; + struct tree_node *left, *right; +}; + +/* + * Search the tree for the node matching the given key using `compare` as + * comparison function. Returns the node whose key matches or `NULL` in case + * the key does not exist in the tree. + */ +struct tree_node *tree_search(struct tree_node *tree, + void *key, + int (*compare)(const void *, const void *)); + +/* + * Insert a node into the tree. Returns the newly inserted node if the key does + * not yet exist. Otherwise it returns the preexisting node. Returns `NULL` + * when allocating the new node fails. + */ +struct tree_node *tree_insert(struct tree_node **rootp, + void *key, + int (*compare)(const void *, const void *)); + +/* performs an infix walk of the tree. */ +void infix_walk(struct tree_node *t, void (*action)(void *arg, void *key), + void *arg); + +/* + * deallocates the tree nodes recursively. Keys should be deallocated separately + * by walking over the tree. */ +void tree_free(struct tree_node *t); + +#endif diff --git a/reftable/writer.c b/reftable/writer.c new file mode 100644 index 0000000000..3b4ebdd6dc --- /dev/null +++ b/reftable/writer.c @@ -0,0 +1,884 @@ +/* + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file or at + * https://developers.google.com/open-source/licenses/bsd + */ + +#include "writer.h" + +#include "system.h" + +#include "block.h" +#include "constants.h" +#include "record.h" +#include "tree.h" +#include "reftable-error.h" + +/* finishes a block, and writes it to storage */ +static int writer_flush_block(struct reftable_writer *w); + +/* deallocates memory related to the index */ +static void writer_clear_index(struct reftable_writer *w); + +/* finishes writing a 'r' (refs) or 'g' (reflogs) section */ +static int writer_finish_public_section(struct reftable_writer *w); + +static struct reftable_block_stats * +writer_reftable_block_stats(struct reftable_writer *w, uint8_t typ) +{ + switch (typ) { + case 'r': + return &w->stats.ref_stats; + case 'o': + return &w->stats.obj_stats; + case 'i': + return &w->stats.idx_stats; + case 'g': + return &w->stats.log_stats; + } + abort(); + return NULL; +} + +/* write data, queuing the padding for the next write. Returns negative for + * error. */ +static int padded_write(struct reftable_writer *w, uint8_t *data, size_t len, + int padding) +{ + int n = 0; + if (w->pending_padding > 0) { + uint8_t *zeroed; + int n; + + zeroed = reftable_calloc(w->pending_padding, sizeof(*zeroed)); + if (!zeroed) + return -1; + + n = w->write(w->write_arg, zeroed, w->pending_padding); + if (n < 0) { + reftable_free(zeroed); + return n; + } + + w->pending_padding = 0; + reftable_free(zeroed); + } + + w->pending_padding = padding; + n = w->write(w->write_arg, data, len); + if (n < 0) + return n; + n += padding; + return 0; +} + +static void options_set_defaults(struct reftable_write_options *opts) +{ + if (opts->restart_interval == 0) { + opts->restart_interval = 16; + } + + if (opts->hash_id == 0) { + opts->hash_id = REFTABLE_HASH_SHA1; + } + if (opts->block_size == 0) { + opts->block_size = DEFAULT_BLOCK_SIZE; + } +} + +static int writer_version(struct reftable_writer *w) +{ + return (w->opts.hash_id == 0 || w->opts.hash_id == REFTABLE_HASH_SHA1) ? + 1 : + 2; +} + +static int writer_write_header(struct reftable_writer *w, uint8_t *dest) +{ + memcpy(dest, "REFT", 4); + + dest[4] = writer_version(w); + + reftable_put_be24(dest + 5, w->opts.block_size); + reftable_put_be64(dest + 8, w->min_update_index); + reftable_put_be64(dest + 16, w->max_update_index); + if (writer_version(w) == 2) { + uint32_t hash_id; + + switch (w->opts.hash_id) { + case REFTABLE_HASH_SHA1: + hash_id = REFTABLE_FORMAT_ID_SHA1; + break; + case REFTABLE_HASH_SHA256: + hash_id = REFTABLE_FORMAT_ID_SHA256; + break; + default: + return -1; + } + + reftable_put_be32(dest + 24, hash_id); + } + + return header_size(writer_version(w)); +} + +static int writer_reinit_block_writer(struct reftable_writer *w, uint8_t typ) +{ + int block_start = 0, ret; + + if (w->next == 0) + block_start = header_size(writer_version(w)); + + reftable_buf_reset(&w->last_key); + ret = block_writer_init(&w->block_writer_data, typ, w->block, + w->opts.block_size, block_start, + hash_size(w->opts.hash_id)); + if (ret < 0) + return ret; + + w->block_writer = &w->block_writer_data; + w->block_writer->restart_interval = w->opts.restart_interval; + + return 0; +} + +int reftable_writer_new(struct reftable_writer **out, + ssize_t (*writer_func)(void *, const void *, size_t), + int (*flush_func)(void *), + void *writer_arg, const struct reftable_write_options *_opts) +{ + struct reftable_write_options opts = {0}; + struct reftable_writer *wp; + + wp = reftable_calloc(1, sizeof(*wp)); + if (!wp) + return REFTABLE_OUT_OF_MEMORY_ERROR; + + if (_opts) + opts = *_opts; + options_set_defaults(&opts); + if (opts.block_size >= (1 << 24)) + return REFTABLE_API_ERROR; + + reftable_buf_init(&wp->block_writer_data.last_key); + reftable_buf_init(&wp->last_key); + reftable_buf_init(&wp->scratch); + REFTABLE_CALLOC_ARRAY(wp->block, opts.block_size); + if (!wp->block) { + reftable_free(wp); + return REFTABLE_OUT_OF_MEMORY_ERROR; + } + wp->write = writer_func; + wp->write_arg = writer_arg; + wp->opts = opts; + wp->flush = flush_func; + writer_reinit_block_writer(wp, REFTABLE_BLOCK_TYPE_REF); + + *out = wp; + + return 0; +} + +int reftable_writer_set_limits(struct reftable_writer *w, uint64_t min, + uint64_t max) +{ + /* + * Set the min/max update index limits for the reftable writer. + * This must be called before adding any records, since: + * - The 'next' field gets set after writing the first block. + * - The 'last_key' field updates with each new record (but resets + * after sections). + * Returns REFTABLE_API_ERROR if called after writing has begun. + */ + if (w->next || w->last_key.len) + return REFTABLE_API_ERROR; + + w->min_update_index = min; + w->max_update_index = max; + + return 0; +} + +static void writer_release(struct reftable_writer *w) +{ + if (w) { + reftable_free(w->block); + w->block = NULL; + block_writer_release(&w->block_writer_data); + w->block_writer = NULL; + writer_clear_index(w); + reftable_buf_release(&w->last_key); + reftable_buf_release(&w->scratch); + } +} + +void reftable_writer_free(struct reftable_writer *w) +{ + writer_release(w); + reftable_free(w); +} + +struct obj_index_tree_node { + struct reftable_buf hash; + uint64_t *offsets; + size_t offset_len; + size_t offset_cap; +}; + +#define OBJ_INDEX_TREE_NODE_INIT \ + { \ + .hash = REFTABLE_BUF_INIT \ + } + +static int obj_index_tree_node_compare(const void *a, const void *b) +{ + return reftable_buf_cmp(&((const struct obj_index_tree_node *)a)->hash, + &((const struct obj_index_tree_node *)b)->hash); +} + +static int writer_index_hash(struct reftable_writer *w, struct reftable_buf *hash) +{ + uint64_t off = w->next; + struct obj_index_tree_node want = { .hash = *hash }; + struct obj_index_tree_node *key; + struct tree_node *node; + + node = tree_search(w->obj_index_tree, &want, &obj_index_tree_node_compare); + if (!node) { + struct obj_index_tree_node empty = OBJ_INDEX_TREE_NODE_INIT; + int err; + + key = reftable_malloc(sizeof(*key)); + if (!key) + return REFTABLE_OUT_OF_MEMORY_ERROR; + + *key = empty; + + reftable_buf_reset(&key->hash); + err = reftable_buf_add(&key->hash, hash->buf, hash->len); + if (err < 0) { + reftable_free(key); + return err; + } + tree_insert(&w->obj_index_tree, key, + &obj_index_tree_node_compare); + } else { + key = node->key; + } + + if (key->offset_len > 0 && key->offsets[key->offset_len - 1] == off) + return 0; + + REFTABLE_ALLOC_GROW_OR_NULL(key->offsets, key->offset_len + 1, + key->offset_cap); + if (!key->offsets) + return REFTABLE_OUT_OF_MEMORY_ERROR; + key->offsets[key->offset_len++] = off; + + return 0; +} + +static int writer_add_record(struct reftable_writer *w, + struct reftable_record *rec) +{ + int err; + + err = reftable_record_key(rec, &w->scratch); + if (err < 0) + goto done; + + if (reftable_buf_cmp(&w->last_key, &w->scratch) >= 0) { + err = REFTABLE_API_ERROR; + goto done; + } + + reftable_buf_reset(&w->last_key); + err = reftable_buf_add(&w->last_key, w->scratch.buf, w->scratch.len); + if (err < 0) + goto done; + + if (!w->block_writer) { + err = writer_reinit_block_writer(w, reftable_record_type(rec)); + if (err < 0) + goto done; + } + + if (block_writer_type(w->block_writer) != reftable_record_type(rec)) + return REFTABLE_API_ERROR; + + /* + * Try to add the record to the writer. If this succeeds then we're + * done. Otherwise the block writer may have hit the block size limit + * and needs to be flushed. + */ + err = block_writer_add(w->block_writer, rec); + if (err == 0) + goto done; + + if (err != REFTABLE_ENTRY_TOO_BIG_ERROR) + goto done; + /* + * The current block is full, so we need to flush and reinitialize the + * writer to start writing the next block. + */ + err = writer_flush_block(w); + if (err < 0) + goto done; + err = writer_reinit_block_writer(w, reftable_record_type(rec)); + if (err < 0) + goto done; + + /* + * Try to add the record to the writer again. If this still fails then + * the record does not fit into the block size. + */ + err = block_writer_add(w->block_writer, rec); + if (err) + goto done; + +done: + return err; +} + +int reftable_writer_add_ref(struct reftable_writer *w, + struct reftable_ref_record *ref) +{ + struct reftable_record rec = { + .type = REFTABLE_BLOCK_TYPE_REF, + .u = { + .ref = *ref + }, + }; + int err; + + if (!ref->refname || + ref->update_index < w->min_update_index || + ref->update_index > w->max_update_index) + return REFTABLE_API_ERROR; + + rec.u.ref.update_index -= w->min_update_index; + + err = writer_add_record(w, &rec); + if (err < 0) + goto out; + + if (!w->opts.skip_index_objects && reftable_ref_record_val1(ref)) { + reftable_buf_reset(&w->scratch); + err = reftable_buf_add(&w->scratch, (char *)reftable_ref_record_val1(ref), + hash_size(w->opts.hash_id)); + if (err < 0) + goto out; + + err = writer_index_hash(w, &w->scratch); + if (err < 0) + goto out; + } + + if (!w->opts.skip_index_objects && reftable_ref_record_val2(ref)) { + reftable_buf_reset(&w->scratch); + err = reftable_buf_add(&w->scratch, reftable_ref_record_val2(ref), + hash_size(w->opts.hash_id)); + if (err < 0) + goto out; + + err = writer_index_hash(w, &w->scratch); + if (err < 0) + goto out; + } + + err = 0; + +out: + return err; +} + +int reftable_writer_add_refs(struct reftable_writer *w, + struct reftable_ref_record *refs, int n) +{ + int err = 0; + int i = 0; + QSORT(refs, n, reftable_ref_record_compare_name); + for (i = 0; err == 0 && i < n; i++) { + err = reftable_writer_add_ref(w, &refs[i]); + } + return err; +} + +static int reftable_writer_add_log_verbatim(struct reftable_writer *w, + struct reftable_log_record *log) +{ + struct reftable_record rec = { + .type = REFTABLE_BLOCK_TYPE_LOG, + .u = { + .log = *log, + }, + }; + if (w->block_writer && + block_writer_type(w->block_writer) == REFTABLE_BLOCK_TYPE_REF) { + int err = writer_finish_public_section(w); + if (err < 0) + return err; + } + + w->next -= w->pending_padding; + w->pending_padding = 0; + return writer_add_record(w, &rec); +} + +int reftable_writer_add_log(struct reftable_writer *w, + struct reftable_log_record *log) +{ + char *input_log_message = NULL; + struct reftable_buf cleaned_message = REFTABLE_BUF_INIT; + int err = 0; + + if (log->value_type == REFTABLE_LOG_DELETION) + return reftable_writer_add_log_verbatim(w, log); + + /* + * Verify only the upper limit of the update_index. Each reflog entry + * is tied to a specific update_index. Entries in the reflog can be + * replaced by adding a new entry with the same update_index, + * effectively canceling the old one. + * + * Consequently, reflog updates may include update_index values lower + * than the writer's min_update_index. + */ + if (log->update_index > w->max_update_index) + return REFTABLE_API_ERROR; + + if (!log->refname) + return REFTABLE_API_ERROR; + + input_log_message = log->value.update.message; + if (!w->opts.exact_log_message && log->value.update.message) { + err = reftable_buf_addstr(&cleaned_message, log->value.update.message); + if (err < 0) + goto done; + + while (cleaned_message.len && + cleaned_message.buf[cleaned_message.len - 1] == '\n') { + err = reftable_buf_setlen(&cleaned_message, + cleaned_message.len - 1); + if (err < 0) + goto done; + } + if (strchr(cleaned_message.buf, '\n')) { + /* multiple lines not allowed. */ + err = REFTABLE_API_ERROR; + goto done; + } + + err = reftable_buf_addstr(&cleaned_message, "\n"); + if (err < 0) + goto done; + + log->value.update.message = cleaned_message.buf; + } + + err = reftable_writer_add_log_verbatim(w, log); + log->value.update.message = input_log_message; +done: + reftable_buf_release(&cleaned_message); + return err; +} + +int reftable_writer_add_logs(struct reftable_writer *w, + struct reftable_log_record *logs, int n) +{ + int err = 0; + int i = 0; + QSORT(logs, n, reftable_log_record_compare_key); + + for (i = 0; err == 0 && i < n; i++) { + err = reftable_writer_add_log(w, &logs[i]); + } + return err; +} + +static int writer_finish_section(struct reftable_writer *w) +{ + struct reftable_block_stats *bstats = NULL; + uint8_t typ = block_writer_type(w->block_writer); + uint64_t index_start = 0; + int max_level = 0; + size_t threshold = w->opts.unpadded ? 1 : 3; + int before_blocks = w->stats.idx_stats.blocks; + int err; + + err = writer_flush_block(w); + if (err < 0) + return err; + + /* + * When the section we are about to index has a lot of blocks then the + * index itself may span across multiple blocks, as well. This would + * require a linear scan over index blocks only to find the desired + * indexed block, which is inefficient. Instead, we write a multi-level + * index where index records of level N+1 will refer to index blocks of + * level N. This isn't constant time, either, but at least logarithmic. + * + * This loop handles writing this multi-level index. Note that we write + * the lowest-level index pointing to the indexed blocks first. We then + * continue writing additional index levels until the current level has + * less blocks than the threshold so that the highest level will be at + * the end of the index section. + * + * Readers are thus required to start reading the index section from + * its end, which is why we set `index_start` to the beginning of the + * last index section. + */ + while (w->index_len > threshold) { + struct reftable_index_record *idx = NULL; + size_t i, idx_len; + + max_level++; + index_start = w->next; + err = writer_reinit_block_writer(w, REFTABLE_BLOCK_TYPE_INDEX); + if (err < 0) + return err; + + idx = w->index; + idx_len = w->index_len; + + w->index = NULL; + w->index_len = 0; + w->index_cap = 0; + for (i = 0; i < idx_len; i++) { + struct reftable_record rec = { + .type = REFTABLE_BLOCK_TYPE_INDEX, + .u = { + .idx = idx[i], + }, + }; + + err = writer_add_record(w, &rec); + if (err < 0) + return err; + } + + err = writer_flush_block(w); + if (err < 0) + return err; + + for (i = 0; i < idx_len; i++) + reftable_buf_release(&idx[i].last_key); + reftable_free(idx); + } + + /* + * The index may still contain a number of index blocks lower than the + * threshold. Clear it so that these entries don't leak into the next + * index section. + */ + writer_clear_index(w); + + bstats = writer_reftable_block_stats(w, typ); + bstats->index_blocks = w->stats.idx_stats.blocks - before_blocks; + bstats->index_offset = index_start; + bstats->max_index_level = max_level; + + /* Reinit lastKey, as the next section can start with any key. */ + reftable_buf_reset(&w->last_key); + + return 0; +} + +struct common_prefix_arg { + struct reftable_buf *last; + size_t max; +}; + +static void update_common(void *void_arg, void *key) +{ + struct common_prefix_arg *arg = void_arg; + struct obj_index_tree_node *entry = key; + if (arg->last) { + size_t n = common_prefix_size(&entry->hash, arg->last); + if (n > arg->max) + arg->max = n; + } + arg->last = &entry->hash; +} + +struct write_record_arg { + struct reftable_writer *w; + int err; +}; + +static void write_object_record(void *void_arg, void *key) +{ + struct write_record_arg *arg = void_arg; + struct obj_index_tree_node *entry = key; + struct reftable_record + rec = { .type = REFTABLE_BLOCK_TYPE_OBJ, + .u.obj = { + .hash_prefix = (uint8_t *)entry->hash.buf, + .hash_prefix_len = arg->w->stats.object_id_len, + .offsets = entry->offsets, + .offset_len = entry->offset_len, + } }; + if (arg->err < 0) + goto done; + + /* + * Try to add the record to the writer. If this succeeds then we're + * done. Otherwise the block writer may have hit the block size limit + * and needs to be flushed. + */ + arg->err = block_writer_add(arg->w->block_writer, &rec); + if (arg->err == 0) + goto done; + + if (arg->err != REFTABLE_ENTRY_TOO_BIG_ERROR) + goto done; + + /* + * The current block is full, so we need to flush and reinitialize the + * writer to start writing the next block. + */ + arg->err = writer_flush_block(arg->w); + if (arg->err < 0) + goto done; + + arg->err = writer_reinit_block_writer(arg->w, REFTABLE_BLOCK_TYPE_OBJ); + if (arg->err < 0) + goto done; + + /* + * If this still fails then we may need to reset record's offset + * length to reduce the data size to be written. + */ + arg->err = block_writer_add(arg->w->block_writer, &rec); + if (arg->err == 0) + goto done; + + if (arg->err != REFTABLE_ENTRY_TOO_BIG_ERROR) + goto done; + + rec.u.obj.offset_len = 0; + arg->err = block_writer_add(arg->w->block_writer, &rec); + + /* Should be able to write into a fresh block. */ + assert(arg->err == 0); + +done:; +} + +static void object_record_free(void *void_arg REFTABLE_UNUSED, void *key) +{ + struct obj_index_tree_node *entry = key; + + REFTABLE_FREE_AND_NULL(entry->offsets); + reftable_buf_release(&entry->hash); + reftable_free(entry); +} + +static int writer_dump_object_index(struct reftable_writer *w) +{ + struct write_record_arg closure = { .w = w }; + struct common_prefix_arg common = { + .max = 1, /* obj_id_len should be >= 2. */ + }; + int err; + + if (w->obj_index_tree) + infix_walk(w->obj_index_tree, &update_common, &common); + w->stats.object_id_len = common.max + 1; + + err = writer_reinit_block_writer(w, REFTABLE_BLOCK_TYPE_OBJ); + if (err < 0) + return err; + + if (w->obj_index_tree) + infix_walk(w->obj_index_tree, &write_object_record, &closure); + + if (closure.err < 0) + return closure.err; + return writer_finish_section(w); +} + +static int writer_finish_public_section(struct reftable_writer *w) +{ + uint8_t typ = 0; + int err = 0; + + if (!w->block_writer) + return 0; + + typ = block_writer_type(w->block_writer); + err = writer_finish_section(w); + if (err < 0) + return err; + if (typ == REFTABLE_BLOCK_TYPE_REF && !w->opts.skip_index_objects && + w->stats.ref_stats.index_blocks > 0) { + err = writer_dump_object_index(w); + if (err < 0) + return err; + } + + if (w->obj_index_tree) { + infix_walk(w->obj_index_tree, &object_record_free, NULL); + tree_free(w->obj_index_tree); + w->obj_index_tree = NULL; + } + + w->block_writer = NULL; + return 0; +} + +int reftable_writer_close(struct reftable_writer *w) +{ + uint8_t footer[72]; + uint8_t *p = footer; + int err = writer_finish_public_section(w); + int empty_table = w->next == 0; + if (err != 0) + goto done; + w->pending_padding = 0; + if (empty_table) { + /* Empty tables need a header anyway. */ + uint8_t header[28]; + int n = writer_write_header(w, header); + err = padded_write(w, header, n, 0); + if (err < 0) + goto done; + } + + p += writer_write_header(w, footer); + reftable_put_be64(p, w->stats.ref_stats.index_offset); + p += 8; + reftable_put_be64(p, (w->stats.obj_stats.offset) << 5 | w->stats.object_id_len); + p += 8; + reftable_put_be64(p, w->stats.obj_stats.index_offset); + p += 8; + + reftable_put_be64(p, w->stats.log_stats.offset); + p += 8; + reftable_put_be64(p, w->stats.log_stats.index_offset); + p += 8; + + reftable_put_be32(p, crc32(0, footer, p - footer)); + p += 4; + + err = w->flush(w->write_arg); + if (err < 0) { + err = REFTABLE_IO_ERROR; + goto done; + } + + err = padded_write(w, footer, footer_size(writer_version(w)), 0); + if (err < 0) + goto done; + + if (empty_table) { + err = REFTABLE_EMPTY_TABLE_ERROR; + goto done; + } + +done: + writer_release(w); + return err; +} + +static void writer_clear_index(struct reftable_writer *w) +{ + for (size_t i = 0; w->index && i < w->index_len; i++) + reftable_buf_release(&w->index[i].last_key); + REFTABLE_FREE_AND_NULL(w->index); + w->index_len = 0; + w->index_cap = 0; +} + +static int writer_flush_nonempty_block(struct reftable_writer *w) +{ + struct reftable_index_record index_record = { + .last_key = REFTABLE_BUF_INIT, + }; + uint8_t typ = block_writer_type(w->block_writer); + struct reftable_block_stats *bstats; + int raw_bytes, padding = 0, err; + uint64_t block_typ_off; + + /* + * Finish the current block. This will cause the block writer to emit + * restart points and potentially compress records in case we are + * writing a log block. + * + * Note that this is still happening in memory. + */ + raw_bytes = block_writer_finish(w->block_writer); + if (raw_bytes < 0) + return raw_bytes; + + /* + * By default, all records except for log records are padded to the + * block size. + */ + if (!w->opts.unpadded && typ != REFTABLE_BLOCK_TYPE_LOG) + padding = w->opts.block_size - raw_bytes; + + bstats = writer_reftable_block_stats(w, typ); + block_typ_off = (bstats->blocks == 0) ? w->next : 0; + if (block_typ_off > 0) + bstats->offset = block_typ_off; + bstats->entries += w->block_writer->entries; + bstats->restarts += w->block_writer->restart_len; + bstats->blocks++; + w->stats.blocks++; + + /* + * If this is the first block we're writing to the table then we need + * to also write the reftable header. + */ + if (!w->next) + writer_write_header(w, w->block); + + err = padded_write(w, w->block, raw_bytes, padding); + if (err < 0) + return err; + + /* + * Add an index record for every block that we're writing. If we end up + * having more than a threshold of index records we will end up writing + * an index section in `writer_finish_section()`. Each index record + * contains the last record key of the block it is indexing as well as + * the offset of that block. + * + * Note that this also applies when flushing index blocks, in which + * case we will end up with a multi-level index. + */ + REFTABLE_ALLOC_GROW_OR_NULL(w->index, w->index_len + 1, w->index_cap); + if (!w->index) + return REFTABLE_OUT_OF_MEMORY_ERROR; + + index_record.offset = w->next; + reftable_buf_reset(&index_record.last_key); + err = reftable_buf_add(&index_record.last_key, w->block_writer->last_key.buf, + w->block_writer->last_key.len); + if (err < 0) + return err; + w->index[w->index_len] = index_record; + w->index_len++; + + w->next += padding + raw_bytes; + w->block_writer = NULL; + + return 0; +} + +static int writer_flush_block(struct reftable_writer *w) +{ + if (!w->block_writer) + return 0; + if (w->block_writer->entries == 0) + return 0; + return writer_flush_nonempty_block(w); +} + +const struct reftable_stats *reftable_writer_stats(struct reftable_writer *w) +{ + return &w->stats; +} diff --git a/reftable/writer.h b/reftable/writer.h new file mode 100644 index 0000000000..9f53610b27 --- /dev/null +++ b/reftable/writer.h @@ -0,0 +1,53 @@ +/* + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file or at + * https://developers.google.com/open-source/licenses/bsd + */ + +#ifndef WRITER_H +#define WRITER_H + +#include "basics.h" +#include "block.h" +#include "tree.h" +#include "reftable-writer.h" + +struct reftable_writer { + ssize_t (*write)(void *, const void *, size_t); + int (*flush)(void *); + void *write_arg; + int pending_padding; + struct reftable_buf last_key; + /* Scratch buffer used to avoid allocations. */ + struct reftable_buf scratch; + + /* offset of next block to write. */ + uint64_t next; + uint64_t min_update_index, max_update_index; + struct reftable_write_options opts; + + /* memory buffer for writing */ + uint8_t *block; + + /* writer for the current section. NULL or points to + * block_writer_data */ + struct block_writer *block_writer; + + struct block_writer block_writer_data; + + /* pending index records for the current section */ + struct reftable_index_record *index; + size_t index_len; + size_t index_cap; + + /* + * tree for use with tsearch; used to populate the 'o' inverse OID + * map */ + struct tree_node *obj_index_tree; + + struct reftable_stats stats; +}; + +#endif |