diff options
Diffstat (limited to 'object-file.c')
| -rw-r--r-- | object-file.c | 863 |
1 files changed, 611 insertions, 252 deletions
diff --git a/object-file.c b/object-file.c index 1ac04c2891..84c9249dab 100644 --- a/object-file.c +++ b/object-file.c @@ -8,10 +8,8 @@ */ #define USE_THE_REPOSITORY_VARIABLE -#define DISABLE_SIGN_COMPARE_WARNINGS #include "git-compat-util.h" -#include "bulk-checkin.h" #include "convert.h" #include "dir.h" #include "environment.h" @@ -21,13 +19,16 @@ #include "loose.h" #include "object-file-convert.h" #include "object-file.h" -#include "object-store.h" +#include "odb.h" #include "oidtree.h" #include "pack.h" #include "packfile.h" #include "path.h" +#include "read-cache-ll.h" #include "setup.h" #include "streaming.h" +#include "tempfile.h" +#include "tmp-objdir.h" /* The maximum size for an object header. */ #define MAX_HEADER_LEN 32 @@ -42,10 +43,11 @@ static int get_conv_flags(unsigned flags) return 0; } -static void fill_loose_path(struct strbuf *buf, const struct object_id *oid) +static void fill_loose_path(struct strbuf *buf, + const struct object_id *oid, + const struct git_hash_algo *algop) { - int i; - for (i = 0; i < the_hash_algo->rawsz; i++) { + for (size_t i = 0; i < algop->rawsz; i++) { static char hex[] = "0123456789abcdef"; unsigned int val = oid->hash[i]; strbuf_addch(buf, hex[val >> 4]); @@ -55,14 +57,14 @@ static void fill_loose_path(struct strbuf *buf, const struct object_id *oid) } } -const char *odb_loose_path(struct object_directory *odb, +const char *odb_loose_path(struct odb_source *source, struct strbuf *buf, const struct object_id *oid) { strbuf_reset(buf); - strbuf_addstr(buf, odb->path); + strbuf_addstr(buf, source->path); strbuf_addch(buf, '/'); - fill_loose_path(buf, oid); + fill_loose_path(buf, oid, source->odb->repo->hash_algo); return buf->buf; } @@ -88,46 +90,19 @@ int check_and_freshen_file(const char *fn, int freshen) return 1; } -static int check_and_freshen_odb(struct object_directory *odb, - const struct object_id *oid, - int freshen) +static int check_and_freshen_source(struct odb_source *source, + const struct object_id *oid, + int freshen) { static struct strbuf path = STRBUF_INIT; - odb_loose_path(odb, &path, oid); + odb_loose_path(source, &path, oid); return check_and_freshen_file(path.buf, freshen); } -static int check_and_freshen_local(const struct object_id *oid, int freshen) +int odb_source_loose_has_object(struct odb_source *source, + const struct object_id *oid) { - return check_and_freshen_odb(the_repository->objects->odb, oid, freshen); -} - -static int check_and_freshen_nonlocal(const struct object_id *oid, int freshen) -{ - struct object_directory *odb; - - prepare_alt_odb(the_repository); - for (odb = the_repository->objects->odb->next; odb; odb = odb->next) { - if (check_and_freshen_odb(odb, oid, freshen)) - return 1; - } - return 0; -} - -static int check_and_freshen(const struct object_id *oid, int freshen) -{ - return check_and_freshen_local(oid, freshen) || - check_and_freshen_nonlocal(oid, freshen); -} - -int has_loose_object_nonlocal(const struct object_id *oid) -{ - return check_and_freshen_nonlocal(oid, 0); -} - -int has_loose_object(const struct object_id *oid) -{ - return check_and_freshen(oid, 0); + return check_and_freshen_source(source, oid, 0); } int format_object_header(char *str, size_t size, enum object_type type, @@ -192,25 +167,22 @@ int stream_object_signature(struct repository *r, const struct object_id *oid) } /* - * Find "oid" as a loose object in the local repository or in an alternate. + * Find "oid" as a loose object in given source. * Returns 0 on success, negative on failure. * * The "path" out-parameter will give the path of the object we found (if any). * Note that it may point to static storage and is only valid until another * call to stat_loose_object(). */ -static int stat_loose_object(struct repository *r, const struct object_id *oid, +static int stat_loose_object(struct odb_source_loose *loose, + const struct object_id *oid, struct stat *st, const char **path) { - struct object_directory *odb; static struct strbuf buf = STRBUF_INIT; - prepare_alt_odb(r); - for (odb = r->objects->odb; odb; odb = odb->next) { - *path = odb_loose_path(odb, &buf, oid); - if (!lstat(*path, st)) - return 0; - } + *path = odb_loose_path(loose->source, &buf, oid); + if (!lstat(*path, st)) + return 0; return -1; } @@ -219,39 +191,24 @@ static int stat_loose_object(struct repository *r, const struct object_id *oid, * Like stat_loose_object(), but actually open the object and return the * descriptor. See the caveats on the "path" parameter above. */ -static int open_loose_object(struct repository *r, +static int open_loose_object(struct odb_source_loose *loose, const struct object_id *oid, const char **path) { - int fd; - struct object_directory *odb; - int most_interesting_errno = ENOENT; static struct strbuf buf = STRBUF_INIT; + int fd; - prepare_alt_odb(r); - for (odb = r->objects->odb; odb; odb = odb->next) { - *path = odb_loose_path(odb, &buf, oid); - fd = git_open(*path); - if (fd >= 0) - return fd; + *path = odb_loose_path(loose->source, &buf, oid); + fd = git_open(*path); + if (fd >= 0) + return fd; - if (most_interesting_errno == ENOENT) - most_interesting_errno = errno; - } - errno = most_interesting_errno; return -1; } -static int quick_has_loose(struct repository *r, +static int quick_has_loose(struct odb_source_loose *loose, const struct object_id *oid) { - struct object_directory *odb; - - prepare_alt_odb(r); - for (odb = r->objects->odb; odb; odb = odb->next) { - if (oidtree_contains(odb_loose_cache(odb, oid), oid)) - return 1; - } - return 0; + return !!oidtree_contains(odb_source_loose_cache(loose->source, oid), oid); } /* @@ -277,12 +234,12 @@ static void *map_fd(int fd, const char *path, unsigned long *size) return map; } -void *map_loose_object(struct repository *r, - const struct object_id *oid, - unsigned long *size) +void *odb_source_loose_map_object(struct odb_source *source, + const struct object_id *oid, + unsigned long *size) { const char *p; - int fd = open_loose_object(r, oid, &p); + int fd = open_loose_object(source->loose, oid, &p); if (fd < 0) return NULL; @@ -327,9 +284,8 @@ static void *unpack_loose_rest(git_zstream *stream, void *buffer, unsigned long size, const struct object_id *oid) { - int bytes = strlen(buffer) + 1; + size_t bytes = strlen(buffer) + 1, n; unsigned char *buf = xmallocz(size); - unsigned long n; int status = Z_OK; n = stream->total_out - bytes; @@ -433,9 +389,9 @@ int parse_loose_header(const char *hdr, struct object_info *oi) return 0; } -int loose_object_info(struct repository *r, - const struct object_id *oid, - struct object_info *oi, int flags) +int odb_source_loose_read_object_info(struct odb_source *source, + const struct object_id *oid, + struct object_info *oi, int flags) { int status = 0; int fd; @@ -448,7 +404,7 @@ int loose_object_info(struct repository *r, enum object_type type_scratch; if (oi->delta_base_oid) - oidclr(oi->delta_base_oid, the_repository->hash_algo); + oidclr(oi->delta_base_oid, source->odb->repo->hash_algo); /* * If we don't care about type or size, then we don't @@ -461,15 +417,15 @@ int loose_object_info(struct repository *r, if (!oi->typep && !oi->sizep && !oi->contentp) { struct stat st; if (!oi->disk_sizep && (flags & OBJECT_INFO_QUICK)) - return quick_has_loose(r, oid) ? 0 : -1; - if (stat_loose_object(r, oid, &st, &path) < 0) + return quick_has_loose(source->loose, oid) ? 0 : -1; + if (stat_loose_object(source->loose, oid, &st, &path) < 0) return -1; if (oi->disk_sizep) *oi->disk_sizep = st.st_size; return 0; } - fd = open_loose_object(r, oid, &path); + fd = open_loose_object(source->loose, oid, &path); if (fd < 0) { if (errno != ENOENT) error_errno(_("unable to open loose object %s"), oid_to_hex(oid)); @@ -596,7 +552,7 @@ static int check_collision(const char *source, const char *dest) goto out; } - if (sz_a < sizeof(buf_source)) + if ((size_t) sz_a < sizeof(buf_source)) break; } @@ -611,12 +567,14 @@ out: /* * Move the just written object into its final resting place. */ -int finalize_object_file(const char *tmpfile, const char *filename) +int finalize_object_file(struct repository *repo, + const char *tmpfile, const char *filename) { - return finalize_object_file_flags(tmpfile, filename, 0); + return finalize_object_file_flags(repo, tmpfile, filename, 0); } -int finalize_object_file_flags(const char *tmpfile, const char *filename, +int finalize_object_file_flags(struct repository *repo, + const char *tmpfile, const char *filename, enum finalize_object_file_flags flags) { unsigned retries = 0; @@ -676,7 +634,7 @@ retry: } out: - if (adjust_shared_perm(the_repository, filename)) + if (adjust_shared_perm(repo, filename)) return error(_("unable to set permission to '%s'"), filename); return 0; } @@ -691,14 +649,102 @@ void hash_object_file(const struct git_hash_algo *algo, const void *buf, write_object_file_prepare(algo, buf, len, type, oid, hdr, &hdrlen); } +struct transaction_packfile { + char *pack_tmp_name; + struct hashfile *f; + off_t offset; + struct pack_idx_option pack_idx_opts; + + struct pack_idx_entry **written; + uint32_t alloc_written; + uint32_t nr_written; +}; + +struct odb_transaction { + struct object_database *odb; + + struct tmp_objdir *objdir; + struct transaction_packfile packfile; +}; + +static void prepare_loose_object_transaction(struct odb_transaction *transaction) +{ + /* + * We lazily create the temporary object directory + * the first time an object might be added, since + * callers may not know whether any objects will be + * added at the time they call object_file_transaction_begin. + */ + if (!transaction || transaction->objdir) + return; + + transaction->objdir = tmp_objdir_create(transaction->odb->repo, "bulk-fsync"); + if (transaction->objdir) + tmp_objdir_replace_primary_odb(transaction->objdir, 0); +} + +static void fsync_loose_object_transaction(struct odb_transaction *transaction, + int fd, const char *filename) +{ + /* + * If we have an active ODB transaction, we issue a call that + * cleans the filesystem page cache but avoids a hardware flush + * command. Later on we will issue a single hardware flush + * before renaming the objects to their final names as part of + * flush_batch_fsync. + */ + if (!transaction || !transaction->objdir || + git_fsync(fd, FSYNC_WRITEOUT_ONLY) < 0) { + if (errno == ENOSYS) + warning(_("core.fsyncMethod = batch is unsupported on this platform")); + fsync_or_die(fd, filename); + } +} + +/* + * Cleanup after batch-mode fsync_object_files. + */ +static void flush_loose_object_transaction(struct odb_transaction *transaction) +{ + struct strbuf temp_path = STRBUF_INIT; + struct tempfile *temp; + + if (!transaction->objdir) + return; + + /* + * Issue a full hardware flush against a temporary file to ensure + * that all objects are durable before any renames occur. The code in + * fsync_loose_object_transaction has already issued a writeout + * request, but it has not flushed any writeback cache in the storage + * hardware or any filesystem logs. This fsync call acts as a barrier + * to ensure that the data in each new object file is durable before + * the final name is visible. + */ + strbuf_addf(&temp_path, "%s/bulk_fsync_XXXXXX", + repo_get_object_directory(transaction->odb->repo)); + temp = xmks_tempfile(temp_path.buf); + fsync_or_die(get_tempfile_fd(temp), get_tempfile_path(temp)); + delete_tempfile(&temp); + strbuf_release(&temp_path); + + /* + * Make the object files visible in the primary ODB after their data is + * fully durable. + */ + tmp_objdir_migrate(transaction->objdir); + transaction->objdir = NULL; +} + /* Finalize a file on disk, and close it. */ -static void close_loose_object(int fd, const char *filename) +static void close_loose_object(struct odb_source *source, + int fd, const char *filename) { - if (the_repository->objects->odb->will_destroy) + if (source->will_destroy) goto out; if (batch_fsync_enabled(FSYNC_COMPONENT_LOOSE_OBJECT)) - fsync_loose_object_bulk_checkin(fd, filename); + fsync_loose_object_transaction(source->odb->transaction, fd, filename); else if (fsync_object_files > 0) fsync_or_die(fd, filename); else @@ -726,7 +772,8 @@ static inline int directory_size(const char *filename) * We want to avoid cross-directory filename renames, because those * can have problems on various filesystems (FAT, NFS, Coda). */ -static int create_tmpfile(struct strbuf *tmp, const char *filename) +static int create_tmpfile(struct repository *repo, + struct strbuf *tmp, const char *filename) { int fd, dirlen = directory_size(filename); @@ -745,7 +792,7 @@ static int create_tmpfile(struct strbuf *tmp, const char *filename) strbuf_add(tmp, filename, dirlen - 1); if (mkdir(tmp->buf, 0777) && errno != EEXIST) return -1; - if (adjust_shared_perm(the_repository, tmp->buf)) + if (adjust_shared_perm(repo, tmp->buf)) return -1; /* Try again */ @@ -766,26 +813,26 @@ static int create_tmpfile(struct strbuf *tmp, const char *filename) * Returns a "fd", which should later be provided to * end_loose_object_common(). */ -static int start_loose_object_common(struct strbuf *tmp_file, +static int start_loose_object_common(struct odb_source *source, + struct strbuf *tmp_file, const char *filename, unsigned flags, git_zstream *stream, unsigned char *buf, size_t buflen, struct git_hash_ctx *c, struct git_hash_ctx *compat_c, char *hdr, int hdrlen) { - struct repository *repo = the_repository; - const struct git_hash_algo *algo = repo->hash_algo; - const struct git_hash_algo *compat = repo->compat_hash_algo; + const struct git_hash_algo *algo = source->odb->repo->hash_algo; + const struct git_hash_algo *compat = source->odb->repo->compat_hash_algo; int fd; - fd = create_tmpfile(tmp_file, filename); + fd = create_tmpfile(source->odb->repo, tmp_file, filename); if (fd < 0) { - if (flags & WRITE_OBJECT_FILE_SILENT) + if (flags & WRITE_OBJECT_SILENT) return -1; else if (errno == EACCES) return error(_("insufficient permission for adding " "an object to repository database %s"), - repo_get_object_directory(the_repository)); + source->path); else return error_errno( _("unable to create temporary file")); @@ -815,14 +862,14 @@ static int start_loose_object_common(struct strbuf *tmp_file, * Common steps for the inner git_deflate() loop for writing loose * objects. Returns what git_deflate() returns. */ -static int write_loose_object_common(struct git_hash_ctx *c, struct git_hash_ctx *compat_c, +static int write_loose_object_common(struct odb_source *source, + struct git_hash_ctx *c, struct git_hash_ctx *compat_c, git_zstream *stream, const int flush, unsigned char *in0, const int fd, unsigned char *compressed, const size_t compressed_len) { - struct repository *repo = the_repository; - const struct git_hash_algo *compat = repo->compat_hash_algo; + const struct git_hash_algo *compat = source->odb->repo->compat_hash_algo; int ret; ret = git_deflate(stream, flush ? Z_FINISH : 0); @@ -843,12 +890,12 @@ static int write_loose_object_common(struct git_hash_ctx *c, struct git_hash_ctx * - End the compression of zlib stream. * - Get the calculated oid to "oid". */ -static int end_loose_object_common(struct git_hash_ctx *c, struct git_hash_ctx *compat_c, +static int end_loose_object_common(struct odb_source *source, + struct git_hash_ctx *c, struct git_hash_ctx *compat_c, git_zstream *stream, struct object_id *oid, struct object_id *compat_oid) { - struct repository *repo = the_repository; - const struct git_hash_algo *compat = repo->compat_hash_algo; + const struct git_hash_algo *compat = source->odb->repo->compat_hash_algo; int ret; ret = git_deflate_end_gently(stream); @@ -861,7 +908,8 @@ static int end_loose_object_common(struct git_hash_ctx *c, struct git_hash_ctx * return Z_OK; } -static int write_loose_object(const struct object_id *oid, char *hdr, +static int write_loose_object(struct odb_source *source, + const struct object_id *oid, char *hdr, int hdrlen, const void *buf, unsigned long len, time_t mtime, unsigned flags) { @@ -874,11 +922,11 @@ static int write_loose_object(const struct object_id *oid, char *hdr, static struct strbuf filename = STRBUF_INIT; if (batch_fsync_enabled(FSYNC_COMPONENT_LOOSE_OBJECT)) - prepare_loose_object_bulk_checkin(); + prepare_loose_object_transaction(source->odb->transaction); - odb_loose_path(the_repository->objects->odb, &filename, oid); + odb_loose_path(source, &filename, oid); - fd = start_loose_object_common(&tmp_file, filename.buf, flags, + fd = start_loose_object_common(source, &tmp_file, filename.buf, flags, &stream, compressed, sizeof(compressed), &c, NULL, hdr, hdrlen); if (fd < 0) @@ -890,14 +938,14 @@ static int write_loose_object(const struct object_id *oid, char *hdr, do { unsigned char *in0 = stream.next_in; - ret = write_loose_object_common(&c, NULL, &stream, 1, in0, fd, + ret = write_loose_object_common(source, &c, NULL, &stream, 1, in0, fd, compressed, sizeof(compressed)); } while (ret == Z_OK); if (ret != Z_STREAM_END) die(_("unable to deflate new object %s (%d)"), oid_to_hex(oid), ret); - ret = end_loose_object_common(&c, NULL, &stream, ¶no_oid, NULL); + ret = end_loose_object_common(source, &c, NULL, &stream, ¶no_oid, NULL); if (ret != Z_OK) die(_("deflateEnd on object %s failed (%d)"), oid_to_hex(oid), ret); @@ -905,45 +953,32 @@ static int write_loose_object(const struct object_id *oid, char *hdr, die(_("confused by unstable object source data for %s"), oid_to_hex(oid)); - close_loose_object(fd, tmp_file.buf); + close_loose_object(source, fd, tmp_file.buf); if (mtime) { struct utimbuf utb; utb.actime = mtime; utb.modtime = mtime; if (utime(tmp_file.buf, &utb) < 0 && - !(flags & WRITE_OBJECT_FILE_SILENT)) + !(flags & WRITE_OBJECT_SILENT)) warning_errno(_("failed utime() on %s"), tmp_file.buf); } - return finalize_object_file_flags(tmp_file.buf, filename.buf, + return finalize_object_file_flags(source->odb->repo, tmp_file.buf, filename.buf, FOF_SKIP_COLLISION_CHECK); } -static int freshen_loose_object(const struct object_id *oid) +int odb_source_loose_freshen_object(struct odb_source *source, + const struct object_id *oid) { - return check_and_freshen(oid, 1); + return !!check_and_freshen_source(source, oid, 1); } -static int freshen_packed_object(const struct object_id *oid) +int odb_source_loose_write_stream(struct odb_source *source, + struct odb_write_stream *in_stream, size_t len, + struct object_id *oid) { - struct pack_entry e; - if (!find_pack_entry(the_repository, oid, &e)) - return 0; - if (e.p->is_cruft) - return 0; - if (e.p->freshened) - return 1; - if (!freshen_file(e.p->pack_name)) - return 0; - e.p->freshened = 1; - return 1; -} - -int stream_loose_object(struct input_stream *in_stream, size_t len, - struct object_id *oid) -{ - const struct git_hash_algo *compat = the_repository->compat_hash_algo; + const struct git_hash_algo *compat = source->odb->repo->compat_hash_algo; struct object_id compat_oid; int fd, ret, err = 0, flush = 0; unsigned char compressed[4096]; @@ -956,10 +991,10 @@ int stream_loose_object(struct input_stream *in_stream, size_t len, int hdrlen; if (batch_fsync_enabled(FSYNC_COMPONENT_LOOSE_OBJECT)) - prepare_loose_object_bulk_checkin(); + prepare_loose_object_transaction(source->odb->transaction); /* Since oid is not determined, save tmp file to odb path. */ - strbuf_addf(&filename, "%s/", repo_get_object_directory(the_repository)); + strbuf_addf(&filename, "%s/", source->path); hdrlen = format_object_header(hdr, sizeof(hdr), OBJ_BLOB, len); /* @@ -970,7 +1005,7 @@ int stream_loose_object(struct input_stream *in_stream, size_t len, * - Setup zlib stream for compression. * - Start to feed header to zlib stream. */ - fd = start_loose_object_common(&tmp_file, filename.buf, 0, + fd = start_loose_object_common(source, &tmp_file, filename.buf, 0, &stream, compressed, sizeof(compressed), &c, &compat_c, hdr, hdrlen); if (fd < 0) { @@ -990,7 +1025,7 @@ int stream_loose_object(struct input_stream *in_stream, size_t len, if (in_stream->is_finished) flush = 1; } - ret = write_loose_object_common(&c, &compat_c, &stream, flush, in0, fd, + ret = write_loose_object_common(source, &c, &compat_c, &stream, flush, in0, fd, compressed, sizeof(compressed)); /* * Unlike write_loose_object(), we do not have the entire @@ -1013,17 +1048,16 @@ int stream_loose_object(struct input_stream *in_stream, size_t len, */ if (ret != Z_STREAM_END) die(_("unable to stream deflate new object (%d)"), ret); - ret = end_loose_object_common(&c, &compat_c, &stream, oid, &compat_oid); + ret = end_loose_object_common(source, &c, &compat_c, &stream, oid, &compat_oid); if (ret != Z_OK) die(_("deflateEnd on stream object failed (%d)"), ret); - close_loose_object(fd, tmp_file.buf); + close_loose_object(source, fd, tmp_file.buf); - if (freshen_packed_object(oid) || freshen_loose_object(oid)) { + if (odb_freshen_object(source->odb, oid)) { unlink_or_warn(tmp_file.buf); goto cleanup; } - - odb_loose_path(the_repository->objects->odb, &filename, oid); + odb_loose_path(source, &filename, oid); /* We finally know the object path, and create the missing dir. */ dirlen = directory_size(filename.buf); @@ -1031,7 +1065,7 @@ int stream_loose_object(struct input_stream *in_stream, size_t len, struct strbuf dir = STRBUF_INIT; strbuf_add(&dir, filename.buf, dirlen); - if (safe_create_dir_in_gitdir(the_repository, dir.buf) && + if (safe_create_dir_in_gitdir(source->odb->repo, dir.buf) && errno != EEXIST) { err = error_errno(_("unable to create directory %s"), dir.buf); strbuf_release(&dir); @@ -1040,23 +1074,23 @@ int stream_loose_object(struct input_stream *in_stream, size_t len, strbuf_release(&dir); } - err = finalize_object_file_flags(tmp_file.buf, filename.buf, + err = finalize_object_file_flags(source->odb->repo, tmp_file.buf, filename.buf, FOF_SKIP_COLLISION_CHECK); if (!err && compat) - err = repo_add_loose_object_map(the_repository, oid, &compat_oid); + err = repo_add_loose_object_map(source, oid, &compat_oid); cleanup: strbuf_release(&tmp_file); strbuf_release(&filename); return err; } -int write_object_file_flags(const void *buf, unsigned long len, - enum object_type type, struct object_id *oid, - struct object_id *compat_oid_in, unsigned flags) +int odb_source_loose_write_object(struct odb_source *source, + const void *buf, unsigned long len, + enum object_type type, struct object_id *oid, + struct object_id *compat_oid_in, unsigned flags) { - struct repository *repo = the_repository; - const struct git_hash_algo *algo = repo->hash_algo; - const struct git_hash_algo *compat = repo->compat_hash_algo; + const struct git_hash_algo *algo = source->odb->repo->hash_algo; + const struct git_hash_algo *compat = source->odb->repo->compat_hash_algo; struct object_id compat_oid; char hdr[MAX_HEADER_LEN]; int hdrlen = sizeof(hdr); @@ -1069,7 +1103,7 @@ int write_object_file_flags(const void *buf, unsigned long len, hash_object_file(compat, buf, len, type, &compat_oid); else { struct strbuf converted = STRBUF_INIT; - convert_object_file(the_repository, &converted, algo, compat, + convert_object_file(source->odb->repo, &converted, algo, compat, buf, len, type, 0); hash_object_file(compat, converted.buf, converted.len, type, &compat_oid); @@ -1081,19 +1115,19 @@ int write_object_file_flags(const void *buf, unsigned long len, * it out into .git/objects/??/?{38} file. */ write_object_file_prepare(algo, buf, len, type, oid, hdr, &hdrlen); - if (freshen_packed_object(oid) || freshen_loose_object(oid)) + if (odb_freshen_object(source->odb, oid)) return 0; - if (write_loose_object(oid, hdr, hdrlen, buf, len, 0, flags)) + if (write_loose_object(source, oid, hdr, hdrlen, buf, len, 0, flags)) return -1; if (compat) - return repo_add_loose_object_map(repo, oid, &compat_oid); + return repo_add_loose_object_map(source, oid, &compat_oid); return 0; } -int force_object_loose(const struct object_id *oid, time_t mtime) +int force_object_loose(struct odb_source *source, + const struct object_id *oid, time_t mtime) { - struct repository *repo = the_repository; - const struct git_hash_algo *compat = repo->compat_hash_algo; + const struct git_hash_algo *compat = source->odb->repo->compat_hash_algo; void *buf; unsigned long len; struct object_info oi = OBJECT_INFO_INIT; @@ -1103,22 +1137,24 @@ int force_object_loose(const struct object_id *oid, time_t mtime) int hdrlen; int ret; - if (has_loose_object(oid)) - return 0; + for (struct odb_source *s = source->odb->sources; s; s = s->next) + if (odb_source_loose_has_object(s, oid)) + return 0; + oi.typep = &type; oi.sizep = &len; oi.contentp = &buf; - if (oid_object_info_extended(the_repository, oid, &oi, 0)) + if (odb_read_object_info_extended(source->odb, oid, &oi, 0)) return error(_("cannot read object for %s"), oid_to_hex(oid)); if (compat) { - if (repo_oid_to_algop(repo, oid, compat, &compat_oid)) + if (repo_oid_to_algop(source->odb->repo, oid, compat, &compat_oid)) return error(_("cannot map object %s to %s"), oid_to_hex(oid), compat->name); } hdrlen = format_object_header(hdr, sizeof(hdr), type, len); - ret = write_loose_object(oid, hdr, hdrlen, buf, len, mtime, 0); + ret = write_loose_object(source, oid, hdr, hdrlen, buf, len, mtime, 0); if (!ret && compat) - ret = repo_add_loose_object_map(the_repository, oid, &compat_oid); + ret = repo_add_loose_object_map(source, oid, &compat_oid); free(buf); return ret; @@ -1168,15 +1204,15 @@ static int index_mem(struct index_state *istate, opts.strict = 1; opts.error_func = hash_format_check_report; - if (fsck_buffer(null_oid(the_hash_algo), type, buf, size, &opts)) + if (fsck_buffer(null_oid(istate->repo->hash_algo), type, buf, size, &opts)) die(_("refusing to create malformed object")); fsck_finish(&opts); } if (write_object) - ret = write_object_file(buf, size, type, oid); + ret = odb_write_object(istate->repo->objects, buf, size, type, oid); else - hash_object_file(the_hash_algo, buf, size, type, oid); + hash_object_file(istate->repo->hash_algo, buf, size, type, oid); strbuf_release(&nbuf); return ret; @@ -1199,10 +1235,10 @@ static int index_stream_convert_blob(struct index_state *istate, get_conv_flags(flags)); if (write_object) - ret = write_object_file(sbuf.buf, sbuf.len, OBJ_BLOB, - oid); + ret = odb_write_object(istate->repo->objects, sbuf.buf, sbuf.len, OBJ_BLOB, + oid); else - hash_object_file(the_hash_algo, sbuf.buf, sbuf.len, OBJ_BLOB, + hash_object_file(istate->repo->hash_algo, sbuf.buf, sbuf.len, OBJ_BLOB, oid); strbuf_release(&sbuf); return ret; @@ -1240,7 +1276,7 @@ static int index_core(struct index_state *istate, if (read_result < 0) ret = error_errno(_("read error while indexing %s"), path ? path : "<unknown>"); - else if (read_result != size) + else if ((size_t) read_result != size) ret = error(_("short read while indexing %s"), path ? path : "<unknown>"); else @@ -1254,6 +1290,274 @@ static int index_core(struct index_state *istate, return ret; } +static int already_written(struct odb_transaction *transaction, + struct object_id *oid) +{ + /* The object may already exist in the repository */ + if (odb_has_object(transaction->odb, oid, + HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR)) + return 1; + + /* Might want to keep the list sorted */ + for (uint32_t i = 0; i < transaction->packfile.nr_written; i++) + if (oideq(&transaction->packfile.written[i]->oid, oid)) + return 1; + + /* This is a new object we need to keep */ + return 0; +} + +/* Lazily create backing packfile for the state */ +static void prepare_packfile_transaction(struct odb_transaction *transaction, + unsigned flags) +{ + struct transaction_packfile *state = &transaction->packfile; + if (!(flags & INDEX_WRITE_OBJECT) || state->f) + return; + + state->f = create_tmp_packfile(transaction->odb->repo, + &state->pack_tmp_name); + reset_pack_idx_option(&state->pack_idx_opts); + + /* Pretend we are going to write only one object */ + state->offset = write_pack_header(state->f, 1); + if (!state->offset) + die_errno("unable to write pack header"); +} + +/* + * Read the contents from fd for size bytes, streaming it to the + * packfile in state while updating the hash in ctx. Signal a failure + * by returning a negative value when the resulting pack would exceed + * the pack size limit and this is not the first object in the pack, + * so that the caller can discard what we wrote from the current pack + * by truncating it and opening a new one. The caller will then call + * us again after rewinding the input fd. + * + * The already_hashed_to pointer is kept untouched by the caller to + * make sure we do not hash the same byte when we are called + * again. This way, the caller does not have to checkpoint its hash + * status before calling us just in case we ask it to call us again + * with a new pack. + */ +static int stream_blob_to_pack(struct transaction_packfile *state, + struct git_hash_ctx *ctx, off_t *already_hashed_to, + int fd, size_t size, const char *path, + unsigned flags) +{ + git_zstream s; + unsigned char ibuf[16384]; + unsigned char obuf[16384]; + unsigned hdrlen; + int status = Z_OK; + int write_object = (flags & INDEX_WRITE_OBJECT); + off_t offset = 0; + + git_deflate_init(&s, pack_compression_level); + + hdrlen = encode_in_pack_object_header(obuf, sizeof(obuf), OBJ_BLOB, size); + s.next_out = obuf + hdrlen; + s.avail_out = sizeof(obuf) - hdrlen; + + while (status != Z_STREAM_END) { + if (size && !s.avail_in) { + size_t rsize = size < sizeof(ibuf) ? size : sizeof(ibuf); + ssize_t read_result = read_in_full(fd, ibuf, rsize); + if (read_result < 0) + die_errno("failed to read from '%s'", path); + if ((size_t)read_result != rsize) + die("failed to read %u bytes from '%s'", + (unsigned)rsize, path); + offset += rsize; + if (*already_hashed_to < offset) { + size_t hsize = offset - *already_hashed_to; + if (rsize < hsize) + hsize = rsize; + if (hsize) + git_hash_update(ctx, ibuf, hsize); + *already_hashed_to = offset; + } + s.next_in = ibuf; + s.avail_in = rsize; + size -= rsize; + } + + status = git_deflate(&s, size ? 0 : Z_FINISH); + + if (!s.avail_out || status == Z_STREAM_END) { + if (write_object) { + size_t written = s.next_out - obuf; + + /* would we bust the size limit? */ + if (state->nr_written && + pack_size_limit_cfg && + pack_size_limit_cfg < state->offset + written) { + git_deflate_abort(&s); + return -1; + } + + hashwrite(state->f, obuf, written); + state->offset += written; + } + s.next_out = obuf; + s.avail_out = sizeof(obuf); + } + + switch (status) { + case Z_OK: + case Z_BUF_ERROR: + case Z_STREAM_END: + continue; + default: + die("unexpected deflate failure: %d", status); + } + } + git_deflate_end(&s); + return 0; +} + +static void flush_packfile_transaction(struct odb_transaction *transaction) +{ + struct transaction_packfile *state = &transaction->packfile; + struct repository *repo = transaction->odb->repo; + unsigned char hash[GIT_MAX_RAWSZ]; + struct strbuf packname = STRBUF_INIT; + char *idx_tmp_name = NULL; + + if (!state->f) + return; + + if (state->nr_written == 0) { + close(state->f->fd); + free_hashfile(state->f); + unlink(state->pack_tmp_name); + goto clear_exit; + } else if (state->nr_written == 1) { + finalize_hashfile(state->f, hash, FSYNC_COMPONENT_PACK, + CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE); + } else { + int fd = finalize_hashfile(state->f, hash, FSYNC_COMPONENT_PACK, 0); + fixup_pack_header_footer(repo->hash_algo, fd, hash, state->pack_tmp_name, + state->nr_written, hash, + state->offset); + close(fd); + } + + strbuf_addf(&packname, "%s/pack/pack-%s.", + repo_get_object_directory(transaction->odb->repo), + hash_to_hex_algop(hash, repo->hash_algo)); + + stage_tmp_packfiles(repo, &packname, state->pack_tmp_name, + state->written, state->nr_written, NULL, + &state->pack_idx_opts, hash, &idx_tmp_name); + rename_tmp_packfile_idx(repo, &packname, &idx_tmp_name); + + for (uint32_t i = 0; i < state->nr_written; i++) + free(state->written[i]); + +clear_exit: + free(idx_tmp_name); + free(state->pack_tmp_name); + free(state->written); + memset(state, 0, sizeof(*state)); + + strbuf_release(&packname); + /* Make objects we just wrote available to ourselves */ + odb_reprepare(repo->objects); +} + +/* + * This writes the specified object to a packfile. Objects written here + * during the same transaction are written to the same packfile. The + * packfile is not flushed until the transaction is flushed. The caller + * is expected to ensure a valid transaction is setup for objects to be + * recorded to. + * + * This also bypasses the usual "convert-to-git" dance, and that is on + * purpose. We could write a streaming version of the converting + * functions and insert that before feeding the data to fast-import + * (or equivalent in-core API described above). However, that is + * somewhat complicated, as we do not know the size of the filter + * result, which we need to know beforehand when writing a git object. + * Since the primary motivation for trying to stream from the working + * tree file and to avoid mmaping it in core is to deal with large + * binary blobs, they generally do not want to get any conversion, and + * callers should avoid this code path when filters are requested. + */ +static int index_blob_packfile_transaction(struct odb_transaction *transaction, + struct object_id *result_oid, int fd, + size_t size, const char *path, + unsigned flags) +{ + struct transaction_packfile *state = &transaction->packfile; + off_t seekback, already_hashed_to; + struct git_hash_ctx ctx; + unsigned char obuf[16384]; + unsigned header_len; + struct hashfile_checkpoint checkpoint; + struct pack_idx_entry *idx = NULL; + + seekback = lseek(fd, 0, SEEK_CUR); + if (seekback == (off_t)-1) + return error("cannot find the current offset"); + + header_len = format_object_header((char *)obuf, sizeof(obuf), + OBJ_BLOB, size); + transaction->odb->repo->hash_algo->init_fn(&ctx); + git_hash_update(&ctx, obuf, header_len); + + /* Note: idx is non-NULL when we are writing */ + if ((flags & INDEX_WRITE_OBJECT) != 0) { + CALLOC_ARRAY(idx, 1); + + prepare_packfile_transaction(transaction, flags); + hashfile_checkpoint_init(state->f, &checkpoint); + } + + already_hashed_to = 0; + + while (1) { + prepare_packfile_transaction(transaction, flags); + if (idx) { + hashfile_checkpoint(state->f, &checkpoint); + idx->offset = state->offset; + crc32_begin(state->f); + } + if (!stream_blob_to_pack(state, &ctx, &already_hashed_to, + fd, size, path, flags)) + break; + /* + * Writing this object to the current pack will make + * it too big; we need to truncate it, start a new + * pack, and write into it. + */ + if (!idx) + BUG("should not happen"); + hashfile_truncate(state->f, &checkpoint); + state->offset = checkpoint.offset; + flush_packfile_transaction(transaction); + if (lseek(fd, seekback, SEEK_SET) == (off_t)-1) + return error("cannot seek back"); + } + git_hash_final_oid(result_oid, &ctx); + if (!idx) + return 0; + + idx->crc32 = crc32_end(state->f); + if (already_written(transaction, result_oid)) { + hashfile_truncate(state->f, &checkpoint); + state->offset = checkpoint.offset; + free(idx); + } else { + oidcpy(&idx->oid, result_oid); + ALLOC_GROW(state->written, + state->nr_written + 1, + state->alloc_written); + state->written[state->nr_written++] = idx; + } + return 0; +} + int index_fd(struct index_state *istate, struct object_id *oid, int fd, struct stat *st, enum object_type type, const char *path, unsigned flags) @@ -1264,18 +1568,27 @@ int index_fd(struct index_state *istate, struct object_id *oid, * Call xsize_t() only when needed to avoid potentially unnecessary * die() for large files. */ - if (type == OBJ_BLOB && path && would_convert_to_git_filter_fd(istate, path)) + if (type == OBJ_BLOB && path && would_convert_to_git_filter_fd(istate, path)) { ret = index_stream_convert_blob(istate, oid, fd, path, flags); - else if (!S_ISREG(st->st_mode)) + } else if (!S_ISREG(st->st_mode)) { ret = index_pipe(istate, oid, fd, type, path, flags); - else if (st->st_size <= repo_settings_get_big_file_threshold(the_repository) || - type != OBJ_BLOB || - (path && would_convert_to_git(istate, path))) + } else if ((st->st_size >= 0 && + (size_t)st->st_size <= repo_settings_get_big_file_threshold(istate->repo)) || + type != OBJ_BLOB || + (path && would_convert_to_git(istate, path))) { ret = index_core(istate, oid, fd, xsize_t(st->st_size), type, path, flags); - else - ret = index_blob_bulk_checkin(oid, fd, xsize_t(st->st_size), path, - flags); + } else { + struct odb_transaction *transaction; + + transaction = odb_transaction_begin(the_repository->objects); + ret = index_blob_packfile_transaction(the_repository->objects->transaction, + oid, fd, + xsize_t(st->st_size), + path, flags); + odb_transaction_commit(transaction); + } + close(fd); return ret; } @@ -1300,14 +1613,18 @@ int index_path(struct index_state *istate, struct object_id *oid, if (strbuf_readlink(&sb, path, st->st_size)) return error_errno("readlink(\"%s\")", path); if (!(flags & INDEX_WRITE_OBJECT)) - hash_object_file(the_hash_algo, sb.buf, sb.len, + hash_object_file(istate->repo->hash_algo, sb.buf, sb.len, OBJ_BLOB, oid); - else if (write_object_file(sb.buf, sb.len, OBJ_BLOB, oid)) + else if (odb_write_object(istate->repo->objects, sb.buf, sb.len, OBJ_BLOB, oid)) rc = error(_("%s: failed to insert into database"), path); strbuf_release(&sb); break; case S_IFDIR: - return repo_resolve_gitlink_ref(the_repository, path, "HEAD", oid); + if (repo_resolve_gitlink_ref(istate->repo, path, "HEAD", oid)) + return error(_("'%s' does not have a commit checked out"), path); + if (&hash_algos[oid->algo] != istate->repo->hash_algo) + return error(_("cannot add a submodule of a different hash algorithm")); + break; default: return error(_("%s: unsupported file type"), path); } @@ -1329,12 +1646,13 @@ int read_pack_header(int fd, struct pack_header *header) return 0; } -int for_each_file_in_obj_subdir(unsigned int subdir_nr, - struct strbuf *path, - each_loose_object_fn obj_cb, - each_loose_cruft_fn cruft_cb, - each_loose_subdir_fn subdir_cb, - void *data) +static int for_each_file_in_obj_subdir(unsigned int subdir_nr, + struct strbuf *path, + const struct git_hash_algo *algop, + each_loose_object_fn obj_cb, + each_loose_cruft_fn cruft_cb, + each_loose_subdir_fn subdir_cb, + void *data) { size_t origlen, baselen; DIR *dir; @@ -1367,12 +1685,12 @@ int for_each_file_in_obj_subdir(unsigned int subdir_nr, namelen = strlen(de->d_name); strbuf_setlen(path, baselen); strbuf_add(path, de->d_name, namelen); - if (namelen == the_hash_algo->hexsz - 2 && + if (namelen == algop->hexsz - 2 && !hex_to_bytes(oid.hash + 1, de->d_name, - the_hash_algo->rawsz - 1)) { - oid_set_algo(&oid, the_hash_algo); - memset(oid.hash + the_hash_algo->rawsz, 0, - GIT_MAX_RAWSZ - the_hash_algo->rawsz); + algop->rawsz - 1)) { + oid_set_algo(&oid, algop); + memset(oid.hash + algop->rawsz, 0, + GIT_MAX_RAWSZ - algop->rawsz); if (obj_cb) { r = obj_cb(&oid, path->buf, data); if (r) @@ -1398,26 +1716,7 @@ int for_each_file_in_obj_subdir(unsigned int subdir_nr, return r; } -int for_each_loose_file_in_objdir_buf(struct strbuf *path, - each_loose_object_fn obj_cb, - each_loose_cruft_fn cruft_cb, - each_loose_subdir_fn subdir_cb, - void *data) -{ - int r = 0; - int i; - - for (i = 0; i < 256; i++) { - r = for_each_file_in_obj_subdir(i, path, obj_cb, cruft_cb, - subdir_cb, data); - if (r) - break; - } - - return r; -} - -int for_each_loose_file_in_objdir(const char *path, +int for_each_loose_file_in_source(struct odb_source *source, each_loose_object_fn obj_cb, each_loose_cruft_fn cruft_cb, each_loose_subdir_fn subdir_cb, @@ -1426,22 +1725,27 @@ int for_each_loose_file_in_objdir(const char *path, struct strbuf buf = STRBUF_INIT; int r; - strbuf_addstr(&buf, path); - r = for_each_loose_file_in_objdir_buf(&buf, obj_cb, cruft_cb, - subdir_cb, data); - strbuf_release(&buf); + strbuf_addstr(&buf, source->path); + for (int i = 0; i < 256; i++) { + r = for_each_file_in_obj_subdir(i, &buf, source->odb->repo->hash_algo, + obj_cb, cruft_cb, subdir_cb, data); + if (r) + break; + } + strbuf_release(&buf); return r; } -int for_each_loose_object(each_loose_object_fn cb, void *data, +int for_each_loose_object(struct object_database *odb, + each_loose_object_fn cb, void *data, enum for_each_object_flags flags) { - struct object_directory *odb; + struct odb_source *source; - prepare_alt_odb(the_repository); - for (odb = the_repository->objects->odb; odb; odb = odb->next) { - int r = for_each_loose_file_in_objdir(odb->path, cb, NULL, + odb_prepare_alternates(odb); + for (source = odb->sources; source; source = source->next) { + int r = for_each_loose_file_in_source(source, cb, NULL, NULL, data); if (r) return r; @@ -1461,50 +1765,57 @@ static int append_loose_object(const struct object_id *oid, return 0; } -struct oidtree *odb_loose_cache(struct object_directory *odb, - const struct object_id *oid) +struct oidtree *odb_source_loose_cache(struct odb_source *source, + const struct object_id *oid) { int subdir_nr = oid->hash[0]; struct strbuf buf = STRBUF_INIT; - size_t word_bits = bitsizeof(odb->loose_objects_subdir_seen[0]); + size_t word_bits = bitsizeof(source->loose->subdir_seen[0]); size_t word_index = subdir_nr / word_bits; size_t mask = (size_t)1u << (subdir_nr % word_bits); uint32_t *bitmap; if (subdir_nr < 0 || - subdir_nr >= bitsizeof(odb->loose_objects_subdir_seen)) + (size_t) subdir_nr >= bitsizeof(source->loose->subdir_seen)) BUG("subdir_nr out of range"); - bitmap = &odb->loose_objects_subdir_seen[word_index]; + bitmap = &source->loose->subdir_seen[word_index]; if (*bitmap & mask) - return odb->loose_objects_cache; - if (!odb->loose_objects_cache) { - ALLOC_ARRAY(odb->loose_objects_cache, 1); - oidtree_init(odb->loose_objects_cache); + return source->loose->cache; + if (!source->loose->cache) { + ALLOC_ARRAY(source->loose->cache, 1); + oidtree_init(source->loose->cache); } - strbuf_addstr(&buf, odb->path); + strbuf_addstr(&buf, source->path); for_each_file_in_obj_subdir(subdir_nr, &buf, + source->odb->repo->hash_algo, append_loose_object, NULL, NULL, - odb->loose_objects_cache); + source->loose->cache); *bitmap |= mask; strbuf_release(&buf); - return odb->loose_objects_cache; + return source->loose->cache; } -void odb_clear_loose_cache(struct object_directory *odb) +static void odb_source_loose_clear_cache(struct odb_source_loose *loose) { - oidtree_clear(odb->loose_objects_cache); - FREE_AND_NULL(odb->loose_objects_cache); - memset(&odb->loose_objects_subdir_seen, 0, - sizeof(odb->loose_objects_subdir_seen)); + oidtree_clear(loose->cache); + FREE_AND_NULL(loose->cache); + memset(&loose->subdir_seen, 0, + sizeof(loose->subdir_seen)); +} + +void odb_source_loose_reprepare(struct odb_source *source) +{ + odb_source_loose_clear_cache(source->loose); } static int check_stream_oid(git_zstream *stream, const char *hdr, unsigned long size, const char *path, - const struct object_id *expected_oid) + const struct object_id *expected_oid, + const struct git_hash_algo *algop) { struct git_hash_ctx c; struct object_id real_oid; @@ -1512,7 +1823,7 @@ static int check_stream_oid(git_zstream *stream, unsigned long total_read; int status = Z_OK; - the_hash_algo->init_fn(&c); + algop->init_fn(&c); git_hash_update(&c, hdr, stream->total_out); /* @@ -1557,7 +1868,8 @@ static int check_stream_oid(git_zstream *stream, return 0; } -int read_loose_object(const char *path, +int read_loose_object(struct repository *repo, + const char *path, const struct object_id *expected_oid, struct object_id *real_oid, void **contents, @@ -1596,8 +1908,9 @@ int read_loose_object(const char *path, } if (*oi->typep == OBJ_BLOB && - *size > repo_settings_get_big_file_threshold(the_repository)) { - if (check_stream_oid(&stream, hdr, *size, path, expected_oid) < 0) + *size > repo_settings_get_big_file_threshold(repo)) { + if (check_stream_oid(&stream, hdr, *size, path, expected_oid, + repo->hash_algo) < 0) goto out_inflate; } else { *contents = unpack_loose_rest(&stream, hdr, *size, expected_oid); @@ -1605,7 +1918,7 @@ int read_loose_object(const char *path, error(_("unable to unpack contents of %s"), path); goto out_inflate; } - hash_object_file(the_repository->hash_algo, + hash_object_file(repo->hash_algo, *contents, *size, *oi->typep, real_oid); if (!oideq(expected_oid, real_oid)) @@ -1621,3 +1934,49 @@ out: munmap(map, mapsize); return ret; } + +struct odb_transaction *object_file_transaction_begin(struct odb_source *source) +{ + struct object_database *odb = source->odb; + + if (odb->transaction) + return NULL; + + CALLOC_ARRAY(odb->transaction, 1); + odb->transaction->odb = odb; + + return odb->transaction; +} + +void object_file_transaction_commit(struct odb_transaction *transaction) +{ + if (!transaction) + return; + + /* + * Ensure the transaction ending matches the pending transaction. + */ + ASSERT(transaction == transaction->odb->transaction); + + flush_loose_object_transaction(transaction); + flush_packfile_transaction(transaction); + transaction->odb->transaction = NULL; + free(transaction); +} + +struct odb_source_loose *odb_source_loose_new(struct odb_source *source) +{ + struct odb_source_loose *loose; + CALLOC_ARRAY(loose, 1); + loose->source = source; + return loose; +} + +void odb_source_loose_free(struct odb_source_loose *loose) +{ + if (!loose) + return; + odb_source_loose_clear_cache(loose); + loose_object_map_clear(&loose->map); + free(loose); +} |
