diff options
Diffstat (limited to 'sha1_file.c')
| -rw-r--r-- | sha1_file.c | 232 |
1 files changed, 146 insertions, 86 deletions
diff --git a/sha1_file.c b/sha1_file.c index 88f2151ff3..40b23297b2 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -7,6 +7,7 @@ * creation etc. */ #include "cache.h" +#include "string-list.h" #include "delta.h" #include "pack.h" #include "blob.h" @@ -19,6 +20,7 @@ #include "pack-revindex.h" #include "sha1-lookup.h" #include "bulk-checkin.h" +#include "streaming.h" #ifndef O_NOATIME #if defined(__linux__) && (defined(__i386__) || defined(__PPC__)) @@ -54,6 +56,8 @@ static struct cached_object empty_tree = { 0 }; +static struct packed_git *last_found_pack; + static struct cached_object *find_cached_object(const unsigned char *sha1) { int i; @@ -226,7 +230,6 @@ char *sha1_pack_index_name(const unsigned char *sha1) struct alternate_object_database *alt_odb_list; static struct alternate_object_database **alt_odb_tail; -static void read_info_alternates(const char * alternates, int depth); static int git_open_noatime(const char *name); /* @@ -244,7 +247,7 @@ static int git_open_noatime(const char *name); * SHA1, an extra slash for the first level indirection, and the * terminating NUL. */ -static int link_alt_odb_entry(const char * entry, int len, const char * relative_base, int depth) +static int link_alt_odb_entry(const char *entry, const char *relative_base, int depth) { const char *objdir = get_object_directory(); struct alternate_object_database *ent; @@ -256,7 +259,7 @@ static int link_alt_odb_entry(const char * entry, int len, const char * relative strbuf_addstr(&pathbuf, real_path(relative_base)); strbuf_addch(&pathbuf, '/'); } - strbuf_add(&pathbuf, entry, len); + strbuf_addstr(&pathbuf, entry); normalize_path_copy(pathbuf.buf, pathbuf.buf); @@ -296,7 +299,7 @@ static int link_alt_odb_entry(const char * entry, int len, const char * relative return -1; } } - if (!memcmp(ent->base, objdir, pfxlen)) { + if (!strcmp(ent->base, objdir)) { free(ent); return -1; } @@ -314,10 +317,12 @@ static int link_alt_odb_entry(const char * entry, int len, const char * relative return 0; } -static void link_alt_odb_entries(const char *alt, const char *ep, int sep, +static void link_alt_odb_entries(const char *alt, int len, int sep, const char *relative_base, int depth) { - const char *cp, *last; + struct string_list entries = STRING_LIST_INIT_NODUP; + char *alt_copy; + int i; if (depth > 5) { error("%s: ignoring alternate object stores, nesting too deep.", @@ -325,33 +330,24 @@ static void link_alt_odb_entries(const char *alt, const char *ep, int sep, return; } - last = alt; - while (last < ep) { - cp = last; - if (cp < ep && *cp == '#') { - while (cp < ep && *cp != sep) - cp++; - last = cp + 1; + alt_copy = xmemdupz(alt, len); + string_list_split_in_place(&entries, alt_copy, sep, -1); + for (i = 0; i < entries.nr; i++) { + const char *entry = entries.items[i].string; + if (entry[0] == '\0' || entry[0] == '#') continue; + if (!is_absolute_path(entry) && depth) { + error("%s: ignoring relative alternate object store %s", + relative_base, entry); + } else { + link_alt_odb_entry(entry, relative_base, depth); } - while (cp < ep && *cp != sep) - cp++; - if (last != cp) { - if (!is_absolute_path(last) && depth) { - error("%s: ignoring relative alternate object store %s", - relative_base, last); - } else { - link_alt_odb_entry(last, cp - last, - relative_base, depth); - } - } - while (cp < ep && *cp == sep) - cp++; - last = cp; } + string_list_clear(&entries, 0); + free(alt_copy); } -static void read_info_alternates(const char * relative_base, int depth) +void read_info_alternates(const char * relative_base, int depth) { char *map; size_t mapsz; @@ -375,7 +371,7 @@ static void read_info_alternates(const char * relative_base, int depth) map = xmmap(NULL, mapsz, PROT_READ, MAP_PRIVATE, fd, 0); close(fd); - link_alt_odb_entries(map, map + mapsz, '\n', relative_base, depth); + link_alt_odb_entries(map, mapsz, '\n', relative_base, depth); munmap(map, mapsz); } @@ -389,7 +385,7 @@ void add_to_alternates_file(const char *reference) if (commit_lock_file(lock)) die("could not close alternates file"); if (alt_odb_tail) - link_alt_odb_entries(alt, alt + strlen(alt), '\n', NULL, 0); + link_alt_odb_entries(alt, strlen(alt), '\n', NULL, 0); } void foreach_alt_odb(alt_odb_fn fn, void *cb) @@ -413,7 +409,7 @@ void prepare_alt_odb(void) if (!alt) alt = ""; alt_odb_tail = &alt_odb_list; - link_alt_odb_entries(alt, alt + strlen(alt), PATH_SEP, NULL, 0); + link_alt_odb_entries(alt, strlen(alt), PATH_SEP, NULL, 0); read_info_alternates(get_object_directory(), 0); } @@ -720,6 +716,8 @@ void free_pack_by_name(const char *pack_name) close_pack_index(p); free(p->bad_object_sha1); *pp = p->next; + if (last_found_pack == p) + last_found_pack = NULL; free(p); return; } @@ -727,6 +725,24 @@ void free_pack_by_name(const char *pack_name) } } +static unsigned int get_max_fd_limit(void) +{ +#ifdef RLIMIT_NOFILE + struct rlimit lim; + + if (getrlimit(RLIMIT_NOFILE, &lim)) + die_errno("cannot get RLIMIT_NOFILE"); + + return lim.rlim_cur; +#elif defined(_SC_OPEN_MAX) + return sysconf(_SC_OPEN_MAX); +#elif defined(OPEN_MAX) + return OPEN_MAX; +#else + return 1; /* see the caller ;-) */ +#endif +} + /* * Do not call this directly as this leaks p->pack_fd on error return; * call open_packed_git() instead. @@ -743,13 +759,7 @@ static int open_packed_git_1(struct packed_git *p) return error("packfile %s index unavailable", p->pack_name); if (!pack_max_fds) { - struct rlimit lim; - unsigned int max_fds; - - if (getrlimit(RLIMIT_NOFILE, &lim)) - die_errno("cannot get RLIMIT_NOFILE"); - - max_fds = lim.rlim_cur; + unsigned int max_fds = get_max_fd_limit(); /* Save 3 for stdin/stdout/stderr, 22 for work */ if (25 < max_fds) @@ -1142,10 +1152,47 @@ static const struct packed_git *has_packed_and_bad(const unsigned char *sha1) return NULL; } -int check_sha1_signature(const unsigned char *sha1, void *map, unsigned long size, const char *type) +/* + * With an in-core object data in "map", rehash it to make sure the + * object name actually matches "sha1" to detect object corruption. + * With "map" == NULL, try reading the object named with "sha1" using + * the streaming interface and rehash it to do the same. + */ +int check_sha1_signature(const unsigned char *sha1, void *map, + unsigned long size, const char *type) { unsigned char real_sha1[20]; - hash_sha1_file(map, size, type, real_sha1); + enum object_type obj_type; + struct git_istream *st; + git_SHA_CTX c; + char hdr[32]; + int hdrlen; + + if (map) { + hash_sha1_file(map, size, type, real_sha1); + return hashcmp(sha1, real_sha1) ? -1 : 0; + } + + st = open_istream(sha1, &obj_type, &size, NULL); + if (!st) + return -1; + + /* Generate the header */ + hdrlen = sprintf(hdr, "%s %lu", typename(obj_type), size) + 1; + + /* Sha1.. */ + git_SHA1_Init(&c); + git_SHA1_Update(&c, hdr, hdrlen); + for (;;) { + char buf[1024 * 16]; + ssize_t readlen = read_istream(st, buf, sizeof(buf)); + + if (!readlen) + break; + git_SHA1_Update(&c, buf, readlen); + } + git_SHA1_Final(real_sha1, &c); + close_istream(st); return hashcmp(sha1, real_sha1) ? -1 : 0; } @@ -1202,6 +1249,11 @@ void *map_sha1_file(const unsigned char *sha1, unsigned long *size) if (!fstat(fd, &st)) { *size = xsize_t(st.st_size); + if (!*size) { + /* mmap() is forbidden on empty files */ + error("object file %s is empty", sha1_file_name(sha1)); + return NULL; + } map = xmmap(NULL, *size, PROT_READ, MAP_PRIVATE, fd, 0); } close(fd); @@ -2010,54 +2062,58 @@ int is_pack_valid(struct packed_git *p) return !open_packed_git(p); } +static int fill_pack_entry(const unsigned char *sha1, + struct pack_entry *e, + struct packed_git *p) +{ + off_t offset; + + if (p->num_bad_objects) { + unsigned i; + for (i = 0; i < p->num_bad_objects; i++) + if (!hashcmp(sha1, p->bad_object_sha1 + 20 * i)) + return 0; + } + + offset = find_pack_entry_one(sha1, p); + if (!offset) + return 0; + + /* + * We are about to tell the caller where they can locate the + * requested object. We better make sure the packfile is + * still here and can be accessed before supplying that + * answer, as it may have been deleted since the index was + * loaded! + */ + if (!is_pack_valid(p)) { + warning("packfile %s cannot be accessed", p->pack_name); + return 0; + } + e->offset = offset; + e->p = p; + hashcpy(e->sha1, sha1); + return 1; +} + static int find_pack_entry(const unsigned char *sha1, struct pack_entry *e) { - static struct packed_git *last_found = (void *)1; struct packed_git *p; - off_t offset; prepare_packed_git(); if (!packed_git) return 0; - p = (last_found == (void *)1) ? packed_git : last_found; - do { - if (p->num_bad_objects) { - unsigned i; - for (i = 0; i < p->num_bad_objects; i++) - if (!hashcmp(sha1, p->bad_object_sha1 + 20 * i)) - goto next; - } + if (last_found_pack && fill_pack_entry(sha1, e, last_found_pack)) + return 1; - offset = find_pack_entry_one(sha1, p); - if (offset) { - /* - * We are about to tell the caller where they can - * locate the requested object. We better make - * sure the packfile is still here and can be - * accessed before supplying that answer, as - * it may have been deleted since the index - * was loaded! - */ - if (!is_pack_valid(p)) { - warning("packfile %s cannot be accessed", p->pack_name); - goto next; - } - e->offset = offset; - e->p = p; - hashcpy(e->sha1, sha1); - last_found = p; - return 1; - } + for (p = packed_git; p; p = p->next) { + if (p == last_found_pack || !fill_pack_entry(sha1, e, p)) + continue; - next: - if (p == last_found) - p = packed_git; - else - p = p->next; - if (p == last_found) - p = p->next; - } while (p); + last_found_pack = p; + return 1; + } return 0; } @@ -2366,7 +2422,7 @@ int move_temp_to_file(const char *tmpfile, const char *filename) unlink_or_warn(tmpfile); if (ret) { if (ret != EEXIST) { - return error("unable to write sha1 filename %s: %s\n", filename, strerror(ret)); + return error("unable to write sha1 filename %s: %s", filename, strerror(ret)); } /* FIXME!!! Collision check here ? */ } @@ -2458,9 +2514,9 @@ static int write_loose_object(const unsigned char *sha1, char *hdr, int hdrlen, fd = create_tmpfile(tmp_file, sizeof(tmp_file), filename); if (fd < 0) { if (errno == EACCES) - return error("insufficient permission for adding an object to repository database %s\n", get_object_directory()); + return error("insufficient permission for adding an object to repository database %s", get_object_directory()); else - return error("unable to create temporary sha1 filename %s: %s\n", tmp_file, strerror(errno)); + return error("unable to create temporary file: %s", strerror(errno)); } /* Set it up */ @@ -2687,10 +2743,13 @@ static int index_core(unsigned char *sha1, int fd, size_t size, * This also bypasses the usual "convert-to-git" dance, and that is on * purpose. We could write a streaming version of the converting * functions and insert that before feeding the data to fast-import - * (or equivalent in-core API described above), but the primary - * motivation for trying to stream from the working tree file and to - * avoid mmaping it in core is to deal with large binary blobs, and - * by definition they do _not_ want to get any conversion. + * (or equivalent in-core API described above). However, that is + * somewhat complicated, as we do not know the size of the filter + * result, which we need to know beforehand when writing a git object. + * Since the primary motivation for trying to stream from the working + * tree file and to avoid mmaping it in core is to deal with large + * binary blobs, they generally do not want to get any conversion, and + * callers should avoid this code path when filters are requested. */ static int index_stream(unsigned char *sha1, int fd, size_t size, enum object_type type, const char *path, @@ -2707,7 +2766,8 @@ int index_fd(unsigned char *sha1, int fd, struct stat *st, if (!S_ISREG(st->st_mode)) ret = index_pipe(sha1, fd, type, path, flags); - else if (size <= big_file_threshold || type != OBJ_BLOB) + else if (size <= big_file_threshold || type != OBJ_BLOB || + (path && would_convert_to_git(path, NULL, 0, 0))) ret = index_core(sha1, fd, size, type, path, flags); else ret = index_stream(sha1, fd, size, type, path, flags); |
