diff options
Diffstat (limited to 'bulk-checkin.c')
| -rw-r--r-- | bulk-checkin.c | 403 |
1 files changed, 0 insertions, 403 deletions
diff --git a/bulk-checkin.c b/bulk-checkin.c deleted file mode 100644 index 124c493067..0000000000 --- a/bulk-checkin.c +++ /dev/null @@ -1,403 +0,0 @@ -/* - * Copyright (c) 2011, Google Inc. - */ - -#define USE_THE_REPOSITORY_VARIABLE - -#include "git-compat-util.h" -#include "bulk-checkin.h" -#include "environment.h" -#include "gettext.h" -#include "hex.h" -#include "lockfile.h" -#include "repository.h" -#include "csum-file.h" -#include "pack.h" -#include "strbuf.h" -#include "tmp-objdir.h" -#include "packfile.h" -#include "object-file.h" -#include "odb.h" - -struct bulk_checkin_packfile { - char *pack_tmp_name; - struct hashfile *f; - off_t offset; - struct pack_idx_option pack_idx_opts; - - struct pack_idx_entry **written; - uint32_t alloc_written; - uint32_t nr_written; -}; - -struct odb_transaction { - struct object_database *odb; - - int nesting; - struct tmp_objdir *objdir; - struct bulk_checkin_packfile packfile; -}; - -static void finish_tmp_packfile(struct odb_transaction *transaction, - struct strbuf *basename, - unsigned char hash[]) -{ - struct bulk_checkin_packfile *state = &transaction->packfile; - struct repository *repo = transaction->odb->repo; - char *idx_tmp_name = NULL; - - stage_tmp_packfiles(repo, basename, state->pack_tmp_name, - state->written, state->nr_written, NULL, - &state->pack_idx_opts, hash, &idx_tmp_name); - rename_tmp_packfile_idx(repo, basename, &idx_tmp_name); - - free(idx_tmp_name); -} - -static void flush_bulk_checkin_packfile(struct odb_transaction *transaction) -{ - struct bulk_checkin_packfile *state = &transaction->packfile; - struct repository *repo = transaction->odb->repo; - unsigned char hash[GIT_MAX_RAWSZ]; - struct strbuf packname = STRBUF_INIT; - - if (!state->f) - return; - - if (state->nr_written == 0) { - close(state->f->fd); - free_hashfile(state->f); - unlink(state->pack_tmp_name); - goto clear_exit; - } else if (state->nr_written == 1) { - finalize_hashfile(state->f, hash, FSYNC_COMPONENT_PACK, - CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE); - } else { - int fd = finalize_hashfile(state->f, hash, FSYNC_COMPONENT_PACK, 0); - fixup_pack_header_footer(repo->hash_algo, fd, hash, state->pack_tmp_name, - state->nr_written, hash, - state->offset); - close(fd); - } - - strbuf_addf(&packname, "%s/pack/pack-%s.", - repo_get_object_directory(transaction->odb->repo), - hash_to_hex_algop(hash, repo->hash_algo)); - - finish_tmp_packfile(transaction, &packname, hash); - for (uint32_t i = 0; i < state->nr_written; i++) - free(state->written[i]); - -clear_exit: - free(state->pack_tmp_name); - free(state->written); - memset(state, 0, sizeof(*state)); - - strbuf_release(&packname); - /* Make objects we just wrote available to ourselves */ - reprepare_packed_git(repo); -} - -/* - * Cleanup after batch-mode fsync_object_files. - */ -static void flush_batch_fsync(struct odb_transaction *transaction) -{ - struct strbuf temp_path = STRBUF_INIT; - struct tempfile *temp; - - if (!transaction->objdir) - return; - - /* - * Issue a full hardware flush against a temporary file to ensure - * that all objects are durable before any renames occur. The code in - * fsync_loose_object_bulk_checkin has already issued a writeout - * request, but it has not flushed any writeback cache in the storage - * hardware or any filesystem logs. This fsync call acts as a barrier - * to ensure that the data in each new object file is durable before - * the final name is visible. - */ - strbuf_addf(&temp_path, "%s/bulk_fsync_XXXXXX", - repo_get_object_directory(transaction->odb->repo)); - temp = xmks_tempfile(temp_path.buf); - fsync_or_die(get_tempfile_fd(temp), get_tempfile_path(temp)); - delete_tempfile(&temp); - strbuf_release(&temp_path); - - /* - * Make the object files visible in the primary ODB after their data is - * fully durable. - */ - tmp_objdir_migrate(transaction->objdir); - transaction->objdir = NULL; -} - -static int already_written(struct odb_transaction *transaction, - struct object_id *oid) -{ - /* The object may already exist in the repository */ - if (odb_has_object(transaction->odb, oid, - HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR)) - return 1; - - /* Might want to keep the list sorted */ - for (uint32_t i = 0; i < transaction->packfile.nr_written; i++) - if (oideq(&transaction->packfile.written[i]->oid, oid)) - return 1; - - /* This is a new object we need to keep */ - return 0; -} - -/* - * Read the contents from fd for size bytes, streaming it to the - * packfile in state while updating the hash in ctx. Signal a failure - * by returning a negative value when the resulting pack would exceed - * the pack size limit and this is not the first object in the pack, - * so that the caller can discard what we wrote from the current pack - * by truncating it and opening a new one. The caller will then call - * us again after rewinding the input fd. - * - * The already_hashed_to pointer is kept untouched by the caller to - * make sure we do not hash the same byte when we are called - * again. This way, the caller does not have to checkpoint its hash - * status before calling us just in case we ask it to call us again - * with a new pack. - */ -static int stream_blob_to_pack(struct bulk_checkin_packfile *state, - struct git_hash_ctx *ctx, off_t *already_hashed_to, - int fd, size_t size, const char *path, - unsigned flags) -{ - git_zstream s; - unsigned char ibuf[16384]; - unsigned char obuf[16384]; - unsigned hdrlen; - int status = Z_OK; - int write_object = (flags & INDEX_WRITE_OBJECT); - off_t offset = 0; - - git_deflate_init(&s, pack_compression_level); - - hdrlen = encode_in_pack_object_header(obuf, sizeof(obuf), OBJ_BLOB, size); - s.next_out = obuf + hdrlen; - s.avail_out = sizeof(obuf) - hdrlen; - - while (status != Z_STREAM_END) { - if (size && !s.avail_in) { - size_t rsize = size < sizeof(ibuf) ? size : sizeof(ibuf); - ssize_t read_result = read_in_full(fd, ibuf, rsize); - if (read_result < 0) - die_errno("failed to read from '%s'", path); - if ((size_t)read_result != rsize) - die("failed to read %u bytes from '%s'", - (unsigned)rsize, path); - offset += rsize; - if (*already_hashed_to < offset) { - size_t hsize = offset - *already_hashed_to; - if (rsize < hsize) - hsize = rsize; - if (hsize) - git_hash_update(ctx, ibuf, hsize); - *already_hashed_to = offset; - } - s.next_in = ibuf; - s.avail_in = rsize; - size -= rsize; - } - - status = git_deflate(&s, size ? 0 : Z_FINISH); - - if (!s.avail_out || status == Z_STREAM_END) { - if (write_object) { - size_t written = s.next_out - obuf; - - /* would we bust the size limit? */ - if (state->nr_written && - pack_size_limit_cfg && - pack_size_limit_cfg < state->offset + written) { - git_deflate_abort(&s); - return -1; - } - - hashwrite(state->f, obuf, written); - state->offset += written; - } - s.next_out = obuf; - s.avail_out = sizeof(obuf); - } - - switch (status) { - case Z_OK: - case Z_BUF_ERROR: - case Z_STREAM_END: - continue; - default: - die("unexpected deflate failure: %d", status); - } - } - git_deflate_end(&s); - return 0; -} - -/* Lazily create backing packfile for the state */ -static void prepare_to_stream(struct odb_transaction *transaction, - unsigned flags) -{ - struct bulk_checkin_packfile *state = &transaction->packfile; - if (!(flags & INDEX_WRITE_OBJECT) || state->f) - return; - - state->f = create_tmp_packfile(transaction->odb->repo, - &state->pack_tmp_name); - reset_pack_idx_option(&state->pack_idx_opts); - - /* Pretend we are going to write only one object */ - state->offset = write_pack_header(state->f, 1); - if (!state->offset) - die_errno("unable to write pack header"); -} - -int index_blob_bulk_checkin(struct odb_transaction *transaction, - struct object_id *result_oid, int fd, size_t size, - const char *path, unsigned flags) -{ - struct bulk_checkin_packfile *state = &transaction->packfile; - off_t seekback, already_hashed_to; - struct git_hash_ctx ctx; - unsigned char obuf[16384]; - unsigned header_len; - struct hashfile_checkpoint checkpoint; - struct pack_idx_entry *idx = NULL; - - seekback = lseek(fd, 0, SEEK_CUR); - if (seekback == (off_t) -1) - return error("cannot find the current offset"); - - header_len = format_object_header((char *)obuf, sizeof(obuf), - OBJ_BLOB, size); - transaction->odb->repo->hash_algo->init_fn(&ctx); - git_hash_update(&ctx, obuf, header_len); - - /* Note: idx is non-NULL when we are writing */ - if ((flags & INDEX_WRITE_OBJECT) != 0) { - CALLOC_ARRAY(idx, 1); - - prepare_to_stream(transaction, flags); - hashfile_checkpoint_init(state->f, &checkpoint); - } - - already_hashed_to = 0; - - while (1) { - prepare_to_stream(transaction, flags); - if (idx) { - hashfile_checkpoint(state->f, &checkpoint); - idx->offset = state->offset; - crc32_begin(state->f); - } - if (!stream_blob_to_pack(state, &ctx, &already_hashed_to, - fd, size, path, flags)) - break; - /* - * Writing this object to the current pack will make - * it too big; we need to truncate it, start a new - * pack, and write into it. - */ - if (!idx) - BUG("should not happen"); - hashfile_truncate(state->f, &checkpoint); - state->offset = checkpoint.offset; - flush_bulk_checkin_packfile(transaction); - if (lseek(fd, seekback, SEEK_SET) == (off_t) -1) - return error("cannot seek back"); - } - git_hash_final_oid(result_oid, &ctx); - if (!idx) - return 0; - - idx->crc32 = crc32_end(state->f); - if (already_written(transaction, result_oid)) { - hashfile_truncate(state->f, &checkpoint); - state->offset = checkpoint.offset; - free(idx); - } else { - oidcpy(&idx->oid, result_oid); - ALLOC_GROW(state->written, - state->nr_written + 1, - state->alloc_written); - state->written[state->nr_written++] = idx; - } - return 0; -} - -void prepare_loose_object_bulk_checkin(struct odb_transaction *transaction) -{ - /* - * We lazily create the temporary object directory - * the first time an object might be added, since - * callers may not know whether any objects will be - * added at the time they call begin_odb_transaction. - */ - if (!transaction || transaction->objdir) - return; - - transaction->objdir = tmp_objdir_create(transaction->odb->repo, "bulk-fsync"); - if (transaction->objdir) - tmp_objdir_replace_primary_odb(transaction->objdir, 0); -} - -void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction, - int fd, const char *filename) -{ - /* - * If we have an active ODB transaction, we issue a call that - * cleans the filesystem page cache but avoids a hardware flush - * command. Later on we will issue a single hardware flush - * before renaming the objects to their final names as part of - * flush_batch_fsync. - */ - if (!transaction || !transaction->objdir || - git_fsync(fd, FSYNC_WRITEOUT_ONLY) < 0) { - if (errno == ENOSYS) - warning(_("core.fsyncMethod = batch is unsupported on this platform")); - fsync_or_die(fd, filename); - } -} - -struct odb_transaction *begin_odb_transaction(struct object_database *odb) -{ - if (!odb->transaction) { - CALLOC_ARRAY(odb->transaction, 1); - odb->transaction->odb = odb; - } - - odb->transaction->nesting += 1; - - return odb->transaction; -} - -void flush_odb_transaction(struct odb_transaction *transaction) -{ - if (!transaction) - return; - - flush_batch_fsync(transaction); - flush_bulk_checkin_packfile(transaction); -} - -void end_odb_transaction(struct odb_transaction *transaction) -{ - if (!transaction || transaction->nesting == 0) - BUG("Unbalanced ODB transaction nesting"); - - transaction->nesting -= 1; - - if (transaction->nesting) - return; - - flush_odb_transaction(transaction); - transaction->odb->transaction = NULL; - free(transaction); -} |
