From ece43d9dc70b1717484ee78b66aef4f9390c2b2b Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 3 Nov 2025 08:42:00 +0100 Subject: object-file: introduce `struct odb_source_loose` Currently, all state that relates to loose objects is held directly by the `struct odb_source`. Introduce a new `struct odb_source_loose` to hold the state instead so that it is entirely self-contained. This structure will eventually morph into the backend for accessing loose objects. As such, this is part of the refactorings to introduce pluggable object databases. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'object-file.c') diff --git a/object-file.c b/object-file.c index 4675c8ed6b..cd6aa561fa 100644 --- a/object-file.c +++ b/object-file.c @@ -1995,3 +1995,16 @@ void object_file_transaction_commit(struct odb_transaction *transaction) transaction->odb->transaction = NULL; free(transaction); } + +struct odb_source_loose *odb_source_loose_new(struct odb_source *source) +{ + struct odb_source_loose *loose; + CALLOC_ARRAY(loose, 1); + loose->source = source; + return loose; +} + +void odb_source_loose_free(struct odb_source_loose *loose) +{ + free(loose); +} -- cgit v1.2.3 From 90a93f9dea88532623ef7422dbc21d8dc70a58dd Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 3 Nov 2025 08:42:01 +0100 Subject: object-file: move loose object cache into loose source Our loose objects use a cache that (optionally) stores all objects for each of the opened sharding directories. This cache is located in the `struct odb_source`, but now that we have `struct odb_source_loose` it makes sense to move it into the latter structure so that all state that relates to loose objects is entirely self-contained. Do so. While at it, rename corresponding functions to have a prefix that relates to `struct odb_source_loose`. Note that despite this prefix, the functions still accept a `struct odb_source` as input. This is done intentionally: once we introduce pluggable object databases, we will continue to accept this struct but then do a cast inside these functions to `struct odb_source_loose`. This design is similar to how we do it for our ref backends. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- loose.c | 9 +++++---- object-file.c | 35 +++++++++++++++++++---------------- object-file.h | 16 ++++++++++++++-- object-name.c | 2 +- odb.c | 1 - odb.h | 12 ------------ 6 files changed, 39 insertions(+), 36 deletions(-) (limited to 'object-file.c') diff --git a/loose.c b/loose.c index e8ea6e7e24..8cc7573ff2 100644 --- a/loose.c +++ b/loose.c @@ -1,6 +1,7 @@ #include "git-compat-util.h" #include "hash.h" #include "path.h" +#include "object-file.h" #include "odb.h" #include "hex.h" #include "repository.h" @@ -54,7 +55,7 @@ static int insert_loose_map(struct odb_source *source, inserted |= insert_oid_pair(map->to_compat, oid, compat_oid); inserted |= insert_oid_pair(map->to_storage, compat_oid, oid); if (inserted) - oidtree_insert(source->loose_objects_cache, compat_oid); + oidtree_insert(source->loose->cache, compat_oid); return inserted; } @@ -66,9 +67,9 @@ static int load_one_loose_object_map(struct repository *repo, struct odb_source if (!source->loose_map) loose_object_map_init(&source->loose_map); - if (!source->loose_objects_cache) { - ALLOC_ARRAY(source->loose_objects_cache, 1); - oidtree_init(source->loose_objects_cache); + if (!source->loose->cache) { + ALLOC_ARRAY(source->loose->cache, 1); + oidtree_init(source->loose->cache); } insert_loose_map(source, repo->hash_algo->empty_tree, repo->compat_hash_algo->empty_tree); diff --git a/object-file.c b/object-file.c index cd6aa561fa..fef00d6d3d 100644 --- a/object-file.c +++ b/object-file.c @@ -223,7 +223,7 @@ static int quick_has_loose(struct repository *r, odb_prepare_alternates(r->objects); for (source = r->objects->sources; source; source = source->next) { - if (oidtree_contains(odb_loose_cache(source, oid), oid)) + if (oidtree_contains(odb_source_loose_cache(source, oid), oid)) return 1; } return 0; @@ -1802,44 +1802,44 @@ static int append_loose_object(const struct object_id *oid, return 0; } -struct oidtree *odb_loose_cache(struct odb_source *source, - const struct object_id *oid) +struct oidtree *odb_source_loose_cache(struct odb_source *source, + const struct object_id *oid) { int subdir_nr = oid->hash[0]; struct strbuf buf = STRBUF_INIT; - size_t word_bits = bitsizeof(source->loose_objects_subdir_seen[0]); + size_t word_bits = bitsizeof(source->loose->subdir_seen[0]); size_t word_index = subdir_nr / word_bits; size_t mask = (size_t)1u << (subdir_nr % word_bits); uint32_t *bitmap; if (subdir_nr < 0 || - (size_t) subdir_nr >= bitsizeof(source->loose_objects_subdir_seen)) + (size_t) subdir_nr >= bitsizeof(source->loose->subdir_seen)) BUG("subdir_nr out of range"); - bitmap = &source->loose_objects_subdir_seen[word_index]; + bitmap = &source->loose->subdir_seen[word_index]; if (*bitmap & mask) - return source->loose_objects_cache; - if (!source->loose_objects_cache) { - ALLOC_ARRAY(source->loose_objects_cache, 1); - oidtree_init(source->loose_objects_cache); + return source->loose->cache; + if (!source->loose->cache) { + ALLOC_ARRAY(source->loose->cache, 1); + oidtree_init(source->loose->cache); } strbuf_addstr(&buf, source->path); for_each_file_in_obj_subdir(subdir_nr, &buf, source->odb->repo->hash_algo, append_loose_object, NULL, NULL, - source->loose_objects_cache); + source->loose->cache); *bitmap |= mask; strbuf_release(&buf); - return source->loose_objects_cache; + return source->loose->cache; } void odb_clear_loose_cache(struct odb_source *source) { - oidtree_clear(source->loose_objects_cache); - FREE_AND_NULL(source->loose_objects_cache); - memset(&source->loose_objects_subdir_seen, 0, - sizeof(source->loose_objects_subdir_seen)); + oidtree_clear(source->loose->cache); + FREE_AND_NULL(source->loose->cache); + memset(&source->loose->subdir_seen, 0, + sizeof(source->loose->subdir_seen)); } static int check_stream_oid(git_zstream *stream, @@ -2006,5 +2006,8 @@ struct odb_source_loose *odb_source_loose_new(struct odb_source *source) void odb_source_loose_free(struct odb_source_loose *loose) { + if (!loose) + return; + odb_clear_loose_cache(loose->source); free(loose); } diff --git a/object-file.h b/object-file.h index 695a7e8e7c..90da69cf5f 100644 --- a/object-file.h +++ b/object-file.h @@ -20,6 +20,18 @@ struct odb_source; struct odb_source_loose { struct odb_source *source; + + /* + * Used to store the results of readdir(3) calls when we are OK + * sacrificing accuracy due to races for speed. That includes + * object existence with OBJECT_INFO_QUICK, as well as + * our search for unique abbreviated hashes. Don't use it for tasks + * requiring greater accuracy! + * + * Be sure to call odb_load_loose_cache() before using. + */ + uint32_t subdir_seen[8]; /* 256 bits */ + struct oidtree *cache; }; struct odb_source_loose *odb_source_loose_new(struct odb_source *source); @@ -29,8 +41,8 @@ void odb_source_loose_free(struct odb_source_loose *loose); * Populate and return the loose object cache array corresponding to the * given object ID. */ -struct oidtree *odb_loose_cache(struct odb_source *source, - const struct object_id *oid); +struct oidtree *odb_source_loose_cache(struct odb_source *source, + const struct object_id *oid); /* Empty the loose object cache for the specified object directory. */ void odb_clear_loose_cache(struct odb_source *source); diff --git a/object-name.c b/object-name.c index 766c757042..8ce0ef7c23 100644 --- a/object-name.c +++ b/object-name.c @@ -116,7 +116,7 @@ static void find_short_object_filename(struct disambiguate_state *ds) struct odb_source *source; for (source = ds->repo->objects->sources; source && !ds->ambiguous; source = source->next) - oidtree_each(odb_loose_cache(source, &ds->bin_pfx), + oidtree_each(odb_source_loose_cache(source, &ds->bin_pfx), &ds->bin_pfx, ds->len, match_prefix, ds); } diff --git a/odb.c b/odb.c index 2d06ab0bb8..87d84688c6 100644 --- a/odb.c +++ b/odb.c @@ -370,7 +370,6 @@ static void odb_source_free(struct odb_source *source) { free(source->path); odb_source_loose_free(source->loose); - odb_clear_loose_cache(source); loose_object_map_clear(&source->loose_map); free(source); } diff --git a/odb.h b/odb.h index 49b398beda..77104396af 100644 --- a/odb.h +++ b/odb.h @@ -51,18 +51,6 @@ struct odb_source { /* Private state for loose objects. */ struct odb_source_loose *loose; - /* - * Used to store the results of readdir(3) calls when we are OK - * sacrificing accuracy due to races for speed. That includes - * object existence with OBJECT_INFO_QUICK, as well as - * our search for unique abbreviated hashes. Don't use it for tasks - * requiring greater accuracy! - * - * Be sure to call odb_load_loose_cache() before using. - */ - uint32_t loose_objects_subdir_seen[8]; /* 256 bits */ - struct oidtree *loose_objects_cache; - /* Map between object IDs for loose objects. */ struct loose_object_map *loose_map; -- cgit v1.2.3 From be659c97eae3b68e38b71f0a67067dede23903b5 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 3 Nov 2025 08:42:02 +0100 Subject: object-file: hide internals when we need to reprepare loose sources There are two different situations where we have to clear the cache of loose objects: - When freeing the loose object source itself to avoid memory leaks. - When repreparing the loose object source so that any potentially- stale data is getting evicted from the cache. The former is already handled by `odb_source_loose_free()`. But the latter case is still done manually by in `odb_reprepare()`, so we are leaking internals into that code. Introduce a new `odb_source_loose_reprepare()` function as an equivalent to `packfile_store_prepare()` to hide these implementation details. Furthermore, while at it, rename the function `odb_clear_loose_cache()` to `odb_source_loose_clear()`. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.c | 17 +++++++++++------ object-file.h | 6 +++--- odb.c | 2 +- 3 files changed, 15 insertions(+), 10 deletions(-) (limited to 'object-file.c') diff --git a/object-file.c b/object-file.c index fef00d6d3d..20daa629a1 100644 --- a/object-file.c +++ b/object-file.c @@ -1834,12 +1834,17 @@ struct oidtree *odb_source_loose_cache(struct odb_source *source, return source->loose->cache; } -void odb_clear_loose_cache(struct odb_source *source) +static void odb_source_loose_clear_cache(struct odb_source_loose *loose) { - oidtree_clear(source->loose->cache); - FREE_AND_NULL(source->loose->cache); - memset(&source->loose->subdir_seen, 0, - sizeof(source->loose->subdir_seen)); + oidtree_clear(loose->cache); + FREE_AND_NULL(loose->cache); + memset(&loose->subdir_seen, 0, + sizeof(loose->subdir_seen)); +} + +void odb_source_loose_reprepare(struct odb_source *source) +{ + odb_source_loose_clear_cache(source->loose); } static int check_stream_oid(git_zstream *stream, @@ -2008,6 +2013,6 @@ void odb_source_loose_free(struct odb_source_loose *loose) { if (!loose) return; - odb_clear_loose_cache(loose->source); + odb_source_loose_clear_cache(loose); free(loose); } diff --git a/object-file.h b/object-file.h index 90da69cf5f..bec855e8e5 100644 --- a/object-file.h +++ b/object-file.h @@ -37,6 +37,9 @@ struct odb_source_loose { struct odb_source_loose *odb_source_loose_new(struct odb_source *source); void odb_source_loose_free(struct odb_source_loose *loose); +/* Reprepare the loose source by emptying the loose object cache. */ +void odb_source_loose_reprepare(struct odb_source *source); + /* * Populate and return the loose object cache array corresponding to the * given object ID. @@ -44,9 +47,6 @@ void odb_source_loose_free(struct odb_source_loose *loose); struct oidtree *odb_source_loose_cache(struct odb_source *source, const struct object_id *oid); -/* Empty the loose object cache for the specified object directory. */ -void odb_clear_loose_cache(struct odb_source *source); - /* * Put in `buf` the name of the file in the local object database that * would be used to store a loose object with the specified oid. diff --git a/odb.c b/odb.c index 87d84688c6..b3e8d4a49c 100644 --- a/odb.c +++ b/odb.c @@ -1071,7 +1071,7 @@ void odb_reprepare(struct object_database *o) odb_prepare_alternates(o); for (source = o->sources; source; source = source->next) - odb_clear_loose_cache(source); + odb_source_loose_reprepare(source); o->approximate_object_count_valid = 0; -- cgit v1.2.3 From 376016ec71c3a6c883f2ca77a3f1c0245fd60dc2 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 3 Nov 2025 08:42:03 +0100 Subject: object-file: move loose object map into loose source The loose object map is used to map from the repository's canonical object hash to the compatibility hash. As the name indicates, this map is only used for loose objects, and as such it is tied to a specific loose object source. Same as with preceding commits, move this map into the loose object source accordingly. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- loose.c | 10 +++++----- object-file.c | 1 + object-file.h | 3 +++ odb.c | 1 - odb.h | 3 --- 5 files changed, 9 insertions(+), 9 deletions(-) (limited to 'object-file.c') diff --git a/loose.c b/loose.c index 8cc7573ff2..56cf64b648 100644 --- a/loose.c +++ b/loose.c @@ -49,7 +49,7 @@ static int insert_loose_map(struct odb_source *source, const struct object_id *oid, const struct object_id *compat_oid) { - struct loose_object_map *map = source->loose_map; + struct loose_object_map *map = source->loose->map; int inserted = 0; inserted |= insert_oid_pair(map->to_compat, oid, compat_oid); @@ -65,8 +65,8 @@ static int load_one_loose_object_map(struct repository *repo, struct odb_source struct strbuf buf = STRBUF_INIT, path = STRBUF_INIT; FILE *fp; - if (!source->loose_map) - loose_object_map_init(&source->loose_map); + if (!source->loose->map) + loose_object_map_init(&source->loose->map); if (!source->loose->cache) { ALLOC_ARRAY(source->loose->cache, 1); oidtree_init(source->loose->cache); @@ -125,7 +125,7 @@ int repo_read_loose_object_map(struct repository *repo) int repo_write_loose_object_map(struct repository *repo) { - kh_oid_map_t *map = repo->objects->sources->loose_map->to_compat; + kh_oid_map_t *map = repo->objects->sources->loose->map->to_compat; struct lock_file lock; int fd; khiter_t iter; @@ -231,7 +231,7 @@ int repo_loose_object_map_oid(struct repository *repo, khiter_t pos; for (source = repo->objects->sources; source; source = source->next) { - struct loose_object_map *loose_map = source->loose_map; + struct loose_object_map *loose_map = source->loose->map; if (!loose_map) continue; map = (to == repo->compat_hash_algo) ? diff --git a/object-file.c b/object-file.c index 20daa629a1..ccc67713fa 100644 --- a/object-file.c +++ b/object-file.c @@ -2014,5 +2014,6 @@ void odb_source_loose_free(struct odb_source_loose *loose) if (!loose) return; odb_source_loose_clear_cache(loose); + loose_object_map_clear(&loose->map); free(loose); } diff --git a/object-file.h b/object-file.h index bec855e8e5..f8a96a45f5 100644 --- a/object-file.h +++ b/object-file.h @@ -32,6 +32,9 @@ struct odb_source_loose { */ uint32_t subdir_seen[8]; /* 256 bits */ struct oidtree *cache; + + /* Map between object IDs for loose objects. */ + struct loose_object_map *map; }; struct odb_source_loose *odb_source_loose_new(struct odb_source *source); diff --git a/odb.c b/odb.c index b3e8d4a49c..d1df9609e2 100644 --- a/odb.c +++ b/odb.c @@ -370,7 +370,6 @@ static void odb_source_free(struct odb_source *source) { free(source->path); odb_source_loose_free(source->loose); - loose_object_map_clear(&source->loose_map); free(source); } diff --git a/odb.h b/odb.h index 77104396af..f9a3137a34 100644 --- a/odb.h +++ b/odb.h @@ -51,9 +51,6 @@ struct odb_source { /* Private state for loose objects. */ struct odb_source_loose *loose; - /* Map between object IDs for loose objects. */ - struct loose_object_map *loose_map; - /* * private data * -- cgit v1.2.3 From ff7ad5cb3936514ec0be32531ff6274b53dbe091 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 3 Nov 2025 08:42:04 +0100 Subject: object-file: read objects via the loose object source When reading an object via `loose_object_info()` or `map_loose_object()` we hand in the whole repository. We then iterate through each of the object sources to figure out whether that source has the object in question. This logic is reversing responsibility though: a specific backend should only care about one specific source, where the object sources themselves are then managed by the object database. Refactor the code accordingly by passing an object source to both of these functions instead. The different sources are then handled by either `do_oid_object_info_extended()`, which sits on the object database level, and by `open_istream_loose()`. The latter function arguably is still at the wrong level, but this will be cleaned up at a later point in time. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.c | 68 ++++++++++++++++++++++------------------------------------- object-file.h | 15 +++++++------ odb.c | 9 ++++++-- streaming.c | 11 +++++++++- 4 files changed, 50 insertions(+), 53 deletions(-) (limited to 'object-file.c') diff --git a/object-file.c b/object-file.c index ccc67713fa..6d6e9a5a2a 100644 --- a/object-file.c +++ b/object-file.c @@ -167,25 +167,22 @@ int stream_object_signature(struct repository *r, const struct object_id *oid) } /* - * Find "oid" as a loose object in the local repository or in an alternate. + * Find "oid" as a loose object in given source. * Returns 0 on success, negative on failure. * * The "path" out-parameter will give the path of the object we found (if any). * Note that it may point to static storage and is only valid until another * call to stat_loose_object(). */ -static int stat_loose_object(struct repository *r, const struct object_id *oid, +static int stat_loose_object(struct odb_source_loose *loose, + const struct object_id *oid, struct stat *st, const char **path) { - struct odb_source *source; static struct strbuf buf = STRBUF_INIT; - odb_prepare_alternates(r->objects); - for (source = r->objects->sources; source; source = source->next) { - *path = odb_loose_path(source, &buf, oid); - if (!lstat(*path, st)) - return 0; - } + *path = odb_loose_path(loose->source, &buf, oid); + if (!lstat(*path, st)) + return 0; return -1; } @@ -194,39 +191,24 @@ static int stat_loose_object(struct repository *r, const struct object_id *oid, * Like stat_loose_object(), but actually open the object and return the * descriptor. See the caveats on the "path" parameter above. */ -static int open_loose_object(struct repository *r, +static int open_loose_object(struct odb_source_loose *loose, const struct object_id *oid, const char **path) { - int fd; - struct odb_source *source; - int most_interesting_errno = ENOENT; static struct strbuf buf = STRBUF_INIT; + int fd; - odb_prepare_alternates(r->objects); - for (source = r->objects->sources; source; source = source->next) { - *path = odb_loose_path(source, &buf, oid); - fd = git_open(*path); - if (fd >= 0) - return fd; + *path = odb_loose_path(loose->source, &buf, oid); + fd = git_open(*path); + if (fd >= 0) + return fd; - if (most_interesting_errno == ENOENT) - most_interesting_errno = errno; - } - errno = most_interesting_errno; return -1; } -static int quick_has_loose(struct repository *r, +static int quick_has_loose(struct odb_source_loose *loose, const struct object_id *oid) { - struct odb_source *source; - - odb_prepare_alternates(r->objects); - for (source = r->objects->sources; source; source = source->next) { - if (oidtree_contains(odb_source_loose_cache(source, oid), oid)) - return 1; - } - return 0; + return !!oidtree_contains(odb_source_loose_cache(loose->source, oid), oid); } /* @@ -252,12 +234,12 @@ static void *map_fd(int fd, const char *path, unsigned long *size) return map; } -void *map_loose_object(struct repository *r, - const struct object_id *oid, - unsigned long *size) +void *odb_source_loose_map_object(struct odb_source *source, + const struct object_id *oid, + unsigned long *size) { const char *p; - int fd = open_loose_object(r, oid, &p); + int fd = open_loose_object(source->loose, oid, &p); if (fd < 0) return NULL; @@ -407,9 +389,9 @@ int parse_loose_header(const char *hdr, struct object_info *oi) return 0; } -int loose_object_info(struct repository *r, - const struct object_id *oid, - struct object_info *oi, int flags) +int odb_source_loose_read_object_info(struct odb_source *source, + const struct object_id *oid, + struct object_info *oi, int flags) { int status = 0; int fd; @@ -422,7 +404,7 @@ int loose_object_info(struct repository *r, enum object_type type_scratch; if (oi->delta_base_oid) - oidclr(oi->delta_base_oid, r->hash_algo); + oidclr(oi->delta_base_oid, source->odb->repo->hash_algo); /* * If we don't care about type or size, then we don't @@ -435,15 +417,15 @@ int loose_object_info(struct repository *r, if (!oi->typep && !oi->sizep && !oi->contentp) { struct stat st; if (!oi->disk_sizep && (flags & OBJECT_INFO_QUICK)) - return quick_has_loose(r, oid) ? 0 : -1; - if (stat_loose_object(r, oid, &st, &path) < 0) + return quick_has_loose(source->loose, oid) ? 0 : -1; + if (stat_loose_object(source->loose, oid, &st, &path) < 0) return -1; if (oi->disk_sizep) *oi->disk_sizep = st.st_size; return 0; } - fd = open_loose_object(r, oid, &path); + fd = open_loose_object(source->loose, oid, &path); if (fd < 0) { if (errno != ENOENT) error_errno(_("unable to open loose object %s"), oid_to_hex(oid)); diff --git a/object-file.h b/object-file.h index f8a96a45f5..ca13d3d64e 100644 --- a/object-file.h +++ b/object-file.h @@ -43,6 +43,14 @@ void odb_source_loose_free(struct odb_source_loose *loose); /* Reprepare the loose source by emptying the loose object cache. */ void odb_source_loose_reprepare(struct odb_source *source); +int odb_source_loose_read_object_info(struct odb_source *source, + const struct object_id *oid, + struct object_info *oi, int flags); + +void *odb_source_loose_map_object(struct odb_source *source, + const struct object_id *oid, + unsigned long *size); + /* * Populate and return the loose object cache array corresponding to the * given object ID. @@ -66,9 +74,6 @@ const char *odb_loose_path(struct odb_source *source, int has_loose_object(struct odb_source *source, const struct object_id *oid); -void *map_loose_object(struct repository *r, const struct object_id *oid, - unsigned long *size); - /* * Iterate over the files in the loose-object parts of the object * directory "path", triggering the following callbacks: @@ -196,10 +201,6 @@ int check_object_signature(struct repository *r, const struct object_id *oid, */ int stream_object_signature(struct repository *r, const struct object_id *oid); -int loose_object_info(struct repository *r, - const struct object_id *oid, - struct object_info *oi, int flags); - enum finalize_object_file_flags { FOF_SKIP_COLLISION_CHECK = 1, }; diff --git a/odb.c b/odb.c index d1df9609e2..4c0b4fdcd5 100644 --- a/odb.c +++ b/odb.c @@ -697,13 +697,18 @@ static int do_oid_object_info_extended(struct object_database *odb, return 0; } + odb_prepare_alternates(odb); + while (1) { + struct odb_source *source; + if (find_pack_entry(odb->repo, real, &e)) break; /* Most likely it's a loose object. */ - if (!loose_object_info(odb->repo, real, oi, flags)) - return 0; + for (source = odb->sources; source; source = source->next) + if (!odb_source_loose_read_object_info(source, real, oi, flags)) + return 0; /* Not a loose object; someone else may have just packed it. */ if (!(flags & OBJECT_INFO_QUICK)) { diff --git a/streaming.c b/streaming.c index 4b13827668..00ad649ae3 100644 --- a/streaming.c +++ b/streaming.c @@ -230,12 +230,21 @@ static int open_istream_loose(struct git_istream *st, struct repository *r, enum object_type *type) { struct object_info oi = OBJECT_INFO_INIT; + struct odb_source *source; + oi.sizep = &st->size; oi.typep = type; - st->u.loose.mapped = map_loose_object(r, oid, &st->u.loose.mapsize); + odb_prepare_alternates(r->objects); + for (source = r->objects->sources; source; source = source->next) { + st->u.loose.mapped = odb_source_loose_map_object(source, oid, + &st->u.loose.mapsize); + if (st->u.loose.mapped) + break; + } if (!st->u.loose.mapped) return -1; + switch (unpack_loose_header(&st->z, st->u.loose.mapped, st->u.loose.mapsize, st->u.loose.hdr, sizeof(st->u.loose.hdr))) { -- cgit v1.2.3 From 05130c6c9eed9ff7450e9067d7215032eb914c10 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 3 Nov 2025 08:42:05 +0100 Subject: object-file: rename `has_loose_object()` Rename `has_loose_object()` to `odb_source_loose_has_object()` so that it becomes clear that this is tied to a specific loose object source. This matches our modern naming schema for functions. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 4 ++-- object-file.c | 6 +++--- object-file.h | 16 ++++++++-------- 3 files changed, 13 insertions(+), 13 deletions(-) (limited to 'object-file.c') diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index b5454e5df1..69e80b1443 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -1716,7 +1716,7 @@ static int want_object_in_pack_mtime(const struct object_id *oid, */ struct odb_source *source = the_repository->objects->sources->next; for (; source; source = source->next) - if (has_loose_object(source, oid)) + if (odb_source_loose_has_object(source, oid)) return 0; } @@ -3978,7 +3978,7 @@ static void add_cruft_object_entry(const struct object_id *oid, enum object_type int found = 0; for (; !found && source; source = source->next) - if (has_loose_object(source, oid)) + if (odb_source_loose_has_object(source, oid)) found = 1; /* diff --git a/object-file.c b/object-file.c index 6d6e9a5a2a..79e7ab8d2e 100644 --- a/object-file.c +++ b/object-file.c @@ -99,8 +99,8 @@ static int check_and_freshen_source(struct odb_source *source, return check_and_freshen_file(path.buf, freshen); } -int has_loose_object(struct odb_source *source, - const struct object_id *oid) +int odb_source_loose_has_object(struct odb_source *source, + const struct object_id *oid) { return check_and_freshen_source(source, oid, 0); } @@ -1161,7 +1161,7 @@ int force_object_loose(struct odb_source *source, int ret; for (struct odb_source *s = source->odb->sources; s; s = s->next) - if (has_loose_object(s, oid)) + if (odb_source_loose_has_object(s, oid)) return 0; oi.typep = &type; diff --git a/object-file.h b/object-file.h index ca13d3d64e..065a44bb8a 100644 --- a/object-file.h +++ b/object-file.h @@ -51,6 +51,14 @@ void *odb_source_loose_map_object(struct odb_source *source, const struct object_id *oid, unsigned long *size); +/* + * Return true iff an object database source has a loose object + * with the specified name. This function does not respect replace + * references. + */ +int odb_source_loose_has_object(struct odb_source *source, + const struct object_id *oid); + /* * Populate and return the loose object cache array corresponding to the * given object ID. @@ -66,14 +74,6 @@ const char *odb_loose_path(struct odb_source *source, struct strbuf *buf, const struct object_id *oid); -/* - * Return true iff an object database source has a loose object - * with the specified name. This function does not respect replace - * references. - */ -int has_loose_object(struct odb_source *source, - const struct object_id *oid); - /* * Iterate over the files in the loose-object parts of the object * directory "path", triggering the following callbacks: -- cgit v1.2.3 From f2bd88a308a2754e727cb462e03102307cdfe004 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 3 Nov 2025 08:42:06 +0100 Subject: object-file: refactor freshening of objects When writing an object that already exists in our object database we skip the write and instead only update mtimes of the object, either in its packed or loose object format. This logic is wholly contained in "object-file.c", but that file is really only concerned with loose objects. So it does not really make sense that it also contains the logic to freshen a packed object. Introduce a new `odb_freshen_object()` function that sits on the object database level and two functions `packfile_store_freshen_object()` and `odb_source_loose_freshen_object()`. Like this, the format-specific functions can be part of their respective subsystems, while the backend agnostic function to freshen an object sits at the object database layer. Note that this change also moves the logic that iterates through object sources from the object source layer into the object database layer. This change is intentional: object sources should ideally only have to worry about themselves, and coordination of different sources should be handled on the object database level. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.c | 33 +++++---------------------------- object-file.h | 3 +++ odb.c | 16 ++++++++++++++++ odb.h | 3 +++ packfile.c | 16 ++++++++++++++++ packfile.h | 3 +++ 6 files changed, 46 insertions(+), 28 deletions(-) (limited to 'object-file.c') diff --git a/object-file.c b/object-file.c index 79e7ab8d2e..893c32adcd 100644 --- a/object-file.c +++ b/object-file.c @@ -968,30 +968,10 @@ static int write_loose_object(struct odb_source *source, FOF_SKIP_COLLISION_CHECK); } -static int freshen_loose_object(struct object_database *odb, - const struct object_id *oid) +int odb_source_loose_freshen_object(struct odb_source *source, + const struct object_id *oid) { - odb_prepare_alternates(odb); - for (struct odb_source *source = odb->sources; source; source = source->next) - if (check_and_freshen_source(source, oid, 1)) - return 1; - return 0; -} - -static int freshen_packed_object(struct object_database *odb, - const struct object_id *oid) -{ - struct pack_entry e; - if (!find_pack_entry(odb->repo, oid, &e)) - return 0; - if (e.p->is_cruft) - return 0; - if (e.p->freshened) - return 1; - if (!freshen_file(e.p->pack_name)) - return 0; - e.p->freshened = 1; - return 1; + return !!check_and_freshen_source(source, oid, 1); } int stream_loose_object(struct odb_source *source, @@ -1073,12 +1053,10 @@ int stream_loose_object(struct odb_source *source, die(_("deflateEnd on stream object failed (%d)"), ret); close_loose_object(source, fd, tmp_file.buf); - if (freshen_packed_object(source->odb, oid) || - freshen_loose_object(source->odb, oid)) { + if (odb_freshen_object(source->odb, oid)) { unlink_or_warn(tmp_file.buf); goto cleanup; } - odb_loose_path(source, &filename, oid); /* We finally know the object path, and create the missing dir. */ @@ -1137,8 +1115,7 @@ int write_object_file(struct odb_source *source, * it out into .git/objects/??/?{38} file. */ write_object_file_prepare(algo, buf, len, type, oid, hdr, &hdrlen); - if (freshen_packed_object(source->odb, oid) || - freshen_loose_object(source->odb, oid)) + if (odb_freshen_object(source->odb, oid)) return 0; if (write_loose_object(source, oid, hdr, hdrlen, buf, len, 0, flags)) return -1; diff --git a/object-file.h b/object-file.h index 065a44bb8a..ee5b24cec6 100644 --- a/object-file.h +++ b/object-file.h @@ -59,6 +59,9 @@ void *odb_source_loose_map_object(struct odb_source *source, int odb_source_loose_has_object(struct odb_source *source, const struct object_id *oid); +int odb_source_loose_freshen_object(struct odb_source *source, + const struct object_id *oid); + /* * Populate and return the loose object cache array corresponding to the * given object ID. diff --git a/odb.c b/odb.c index 4c0b4fdcd5..17734bdaff 100644 --- a/odb.c +++ b/odb.c @@ -987,6 +987,22 @@ int odb_has_object(struct object_database *odb, const struct object_id *oid, return odb_read_object_info_extended(odb, oid, NULL, object_info_flags) >= 0; } +int odb_freshen_object(struct object_database *odb, + const struct object_id *oid) +{ + struct odb_source *source; + + if (packfile_store_freshen_object(odb->packfiles, oid)) + return 1; + + odb_prepare_alternates(odb); + for (source = odb->sources; source; source = source->next) + if (odb_source_loose_freshen_object(source, oid)) + return 1; + + return 0; +} + void odb_assert_oid_type(struct object_database *odb, const struct object_id *oid, enum object_type expect) { diff --git a/odb.h b/odb.h index f9a3137a34..2653247e0c 100644 --- a/odb.h +++ b/odb.h @@ -396,6 +396,9 @@ int odb_has_object(struct object_database *odb, const struct object_id *oid, unsigned flags); +int odb_freshen_object(struct object_database *odb, + const struct object_id *oid); + void odb_assert_oid_type(struct object_database *odb, const struct object_id *oid, enum object_type expect); diff --git a/packfile.c b/packfile.c index 1ae2b2fe1e..40f733dd23 100644 --- a/packfile.c +++ b/packfile.c @@ -819,6 +819,22 @@ struct packed_git *packfile_store_load_pack(struct packfile_store *store, return p; } +int packfile_store_freshen_object(struct packfile_store *store, + const struct object_id *oid) +{ + struct pack_entry e; + if (!find_pack_entry(store->odb->repo, oid, &e)) + return 0; + if (e.p->is_cruft) + return 0; + if (e.p->freshened) + return 1; + if (utime(e.p->pack_name, NULL)) + return 0; + e.p->freshened = 1; + return 1; +} + void (*report_garbage)(unsigned seen_bits, const char *path); static void report_helper(const struct string_list *list, diff --git a/packfile.h b/packfile.h index c9d0b93446..58fcc88e20 100644 --- a/packfile.h +++ b/packfile.h @@ -163,6 +163,9 @@ struct list_head *packfile_store_get_packs_mru(struct packfile_store *store); struct packed_git *packfile_store_load_pack(struct packfile_store *store, const char *idx_path, int local); +int packfile_store_freshen_object(struct packfile_store *store, + const struct object_id *oid); + struct pack_window { struct pack_window *next; unsigned char *base; -- cgit v1.2.3 From bfb1b2b4ac5cfa99f7d2503b404d282714d84bdf Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 3 Nov 2025 08:42:07 +0100 Subject: object-file: rename `write_object_file()` Rename `write_object_file()` to `odb_source_loose_write_object()` so that it becomes clear that this is tied to a specific loose object source. This matches our modern naming schema for functions. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.c | 8 ++++---- object-file.h | 10 +++++----- odb.c | 3 ++- 3 files changed, 11 insertions(+), 10 deletions(-) (limited to 'object-file.c') diff --git a/object-file.c b/object-file.c index 893c32adcd..fdc644a427 100644 --- a/object-file.c +++ b/object-file.c @@ -1084,10 +1084,10 @@ cleanup: return err; } -int write_object_file(struct odb_source *source, - const void *buf, unsigned long len, - enum object_type type, struct object_id *oid, - struct object_id *compat_oid_in, unsigned flags) +int odb_source_loose_write_object(struct odb_source *source, + const void *buf, unsigned long len, + enum object_type type, struct object_id *oid, + struct object_id *compat_oid_in, unsigned flags) { const struct git_hash_algo *algo = source->odb->repo->hash_algo; const struct git_hash_algo *compat = source->odb->repo->compat_hash_algo; diff --git a/object-file.h b/object-file.h index ee5b24cec6..36a60e15c4 100644 --- a/object-file.h +++ b/object-file.h @@ -62,6 +62,11 @@ int odb_source_loose_has_object(struct odb_source *source, int odb_source_loose_freshen_object(struct odb_source *source, const struct object_id *oid); +int odb_source_loose_write_object(struct odb_source *source, + const void *buf, unsigned long len, + enum object_type type, struct object_id *oid, + struct object_id *compat_oid_in, unsigned flags); + /* * Populate and return the loose object cache array corresponding to the * given object ID. @@ -168,11 +173,6 @@ enum unpack_loose_header_result unpack_loose_header(git_zstream *stream, struct object_info; int parse_loose_header(const char *hdr, struct object_info *oi); -int write_object_file(struct odb_source *source, - const void *buf, unsigned long len, - enum object_type type, struct object_id *oid, - struct object_id *compat_oid_in, unsigned flags); - struct input_stream { const void *(*read)(struct input_stream *, unsigned long *len); void *data; diff --git a/odb.c b/odb.c index 17734bdaff..da44f1d63b 100644 --- a/odb.c +++ b/odb.c @@ -1021,7 +1021,8 @@ int odb_write_object_ext(struct object_database *odb, struct object_id *compat_oid, unsigned flags) { - return write_object_file(odb->sources, buf, len, type, oid, compat_oid, flags); + return odb_source_loose_write_object(odb->sources, buf, len, type, + oid, compat_oid, flags); } struct object_database *odb_new(struct repository *repo) -- cgit v1.2.3 From 3e5e360888316ed1a44da69bf134bb6ec70aee1b Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 3 Nov 2025 08:42:08 +0100 Subject: object-file: refactor writing objects via a stream We have two different ways to write an object into the database: - We either provide the full buffer and write the object all at once. - Or we provide an input stream that has a `read()` function so that we can chunk the object. The latter is especially used for large objects, where it may be too expensive to hold the complete object in memory all at once. While we already have `odb_write_object()` at the ODB-layer, we don't have an equivalent for streaming an object. Introduce a new function `odb_write_object_stream()` to address this gap so that callers don't have to be aware of the inner workings of how to stream an object to disk with a specific object source. Rename `stream_loose_object()` to `odb_source_loose_write_stream()` to clarify its scope. This matches our modern best practices around how to name functions. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/unpack-objects.c | 7 +++---- object-file.c | 6 +++--- object-file.h | 14 ++++---------- odb.c | 7 +++++++ odb.h | 10 ++++++++++ 5 files changed, 27 insertions(+), 17 deletions(-) (limited to 'object-file.c') diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c index ef79e43715..6fc64e9e4b 100644 --- a/builtin/unpack-objects.c +++ b/builtin/unpack-objects.c @@ -363,7 +363,7 @@ struct input_zstream_data { int status; }; -static const void *feed_input_zstream(struct input_stream *in_stream, +static const void *feed_input_zstream(struct odb_write_stream *in_stream, unsigned long *readlen) { struct input_zstream_data *data = in_stream->data; @@ -393,7 +393,7 @@ static void stream_blob(unsigned long size, unsigned nr) { git_zstream zstream = { 0 }; struct input_zstream_data data = { 0 }; - struct input_stream in_stream = { + struct odb_write_stream in_stream = { .read = feed_input_zstream, .data = &data, }; @@ -402,8 +402,7 @@ static void stream_blob(unsigned long size, unsigned nr) data.zstream = &zstream; git_inflate_init(&zstream); - if (stream_loose_object(the_repository->objects->sources, - &in_stream, size, &info->oid)) + if (odb_write_object_stream(the_repository->objects, &in_stream, size, &info->oid)) die(_("failed to write object in stream")); if (data.status != Z_STREAM_END) diff --git a/object-file.c b/object-file.c index fdc644a427..811c569ed3 100644 --- a/object-file.c +++ b/object-file.c @@ -974,9 +974,9 @@ int odb_source_loose_freshen_object(struct odb_source *source, return !!check_and_freshen_source(source, oid, 1); } -int stream_loose_object(struct odb_source *source, - struct input_stream *in_stream, size_t len, - struct object_id *oid) +int odb_source_loose_write_stream(struct odb_source *source, + struct odb_write_stream *in_stream, size_t len, + struct object_id *oid) { const struct git_hash_algo *compat = source->odb->repo->compat_hash_algo; struct object_id compat_oid; diff --git a/object-file.h b/object-file.h index 36a60e15c4..eeffa67bbd 100644 --- a/object-file.h +++ b/object-file.h @@ -67,6 +67,10 @@ int odb_source_loose_write_object(struct odb_source *source, enum object_type type, struct object_id *oid, struct object_id *compat_oid_in, unsigned flags); +int odb_source_loose_write_stream(struct odb_source *source, + struct odb_write_stream *stream, size_t len, + struct object_id *oid); + /* * Populate and return the loose object cache array corresponding to the * given object ID. @@ -173,16 +177,6 @@ enum unpack_loose_header_result unpack_loose_header(git_zstream *stream, struct object_info; int parse_loose_header(const char *hdr, struct object_info *oi); -struct input_stream { - const void *(*read)(struct input_stream *, unsigned long *len); - void *data; - int is_finished; -}; - -int stream_loose_object(struct odb_source *source, - struct input_stream *in_stream, size_t len, - struct object_id *oid); - int force_object_loose(struct odb_source *source, const struct object_id *oid, time_t mtime); diff --git a/odb.c b/odb.c index da44f1d63b..3ec21ef24e 100644 --- a/odb.c +++ b/odb.c @@ -1025,6 +1025,13 @@ int odb_write_object_ext(struct object_database *odb, oid, compat_oid, flags); } +int odb_write_object_stream(struct object_database *odb, + struct odb_write_stream *stream, size_t len, + struct object_id *oid) +{ + return odb_source_loose_write_stream(odb->sources, stream, len, oid); +} + struct object_database *odb_new(struct repository *repo) { struct object_database *o = xmalloc(sizeof(*o)); diff --git a/odb.h b/odb.h index 2653247e0c..9bb28008b1 100644 --- a/odb.h +++ b/odb.h @@ -492,4 +492,14 @@ static inline int odb_write_object(struct object_database *odb, return odb_write_object_ext(odb, buf, len, type, oid, NULL, 0); } +struct odb_write_stream { + const void *(*read)(struct odb_write_stream *, unsigned long *len); + void *data; + int is_finished; +}; + +int odb_write_object_stream(struct object_database *odb, + struct odb_write_stream *stream, size_t len, + struct object_id *oid); + #endif /* ODB_H */ -- cgit v1.2.3