diff options
Diffstat (limited to 'builtin/unpack-objects.c')
| -rw-r--r-- | builtin/unpack-objects.c | 568 | 
1 files changed, 568 insertions, 0 deletions
| diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c new file mode 100644 index 0000000000..14e04e6795 --- /dev/null +++ b/builtin/unpack-objects.c @@ -0,0 +1,568 @@ +#include "builtin.h" +#include "cache.h" +#include "object.h" +#include "delta.h" +#include "pack.h" +#include "blob.h" +#include "commit.h" +#include "tag.h" +#include "tree.h" +#include "tree-walk.h" +#include "progress.h" +#include "decorate.h" +#include "fsck.h" + +static int dry_run, quiet, recover, has_errors, strict; +static const char unpack_usage[] = "git unpack-objects [-n] [-q] [-r] [--strict] < pack-file"; + +/* We always read in 4kB chunks. */ +static unsigned char buffer[4096]; +static unsigned int offset, len; +static off_t consumed_bytes; +static git_SHA_CTX ctx; + +/* + * When running under --strict mode, objects whose reachability are + * suspect are kept in core without getting written in the object + * store. + */ +struct obj_buffer { +	char *buffer; +	unsigned long size; +}; + +static struct decoration obj_decorate; + +static struct obj_buffer *lookup_object_buffer(struct object *base) +{ +	return lookup_decoration(&obj_decorate, base); +} + +static void add_object_buffer(struct object *object, char *buffer, unsigned long size) +{ +	struct obj_buffer *obj; +	obj = xcalloc(1, sizeof(struct obj_buffer)); +	obj->buffer = buffer; +	obj->size = size; +	if (add_decoration(&obj_decorate, object, obj)) +		die("object %s tried to add buffer twice!", sha1_to_hex(object->sha1)); +} + +/* + * Make sure at least "min" bytes are available in the buffer, and + * return the pointer to the buffer. + */ +static void *fill(int min) +{ +	if (min <= len) +		return buffer + offset; +	if (min > sizeof(buffer)) +		die("cannot fill %d bytes", min); +	if (offset) { +		git_SHA1_Update(&ctx, buffer, offset); +		memmove(buffer, buffer + offset, len); +		offset = 0; +	} +	do { +		ssize_t ret = xread(0, buffer + len, sizeof(buffer) - len); +		if (ret <= 0) { +			if (!ret) +				die("early EOF"); +			die_errno("read error on input"); +		} +		len += ret; +	} while (len < min); +	return buffer; +} + +static void use(int bytes) +{ +	if (bytes > len) +		die("used more bytes than were available"); +	len -= bytes; +	offset += bytes; + +	/* make sure off_t is sufficiently large not to wrap */ +	if (signed_add_overflows(consumed_bytes, bytes)) +		die("pack too large for current definition of off_t"); +	consumed_bytes += bytes; +} + +static void *get_data(unsigned long size) +{ +	git_zstream stream; +	void *buf = xmalloc(size); + +	memset(&stream, 0, sizeof(stream)); + +	stream.next_out = buf; +	stream.avail_out = size; +	stream.next_in = fill(1); +	stream.avail_in = len; +	git_inflate_init(&stream); + +	for (;;) { +		int ret = git_inflate(&stream, 0); +		use(len - stream.avail_in); +		if (stream.total_out == size && ret == Z_STREAM_END) +			break; +		if (ret != Z_OK) { +			error("inflate returned %d\n", ret); +			free(buf); +			buf = NULL; +			if (!recover) +				exit(1); +			has_errors = 1; +			break; +		} +		stream.next_in = fill(1); +		stream.avail_in = len; +	} +	git_inflate_end(&stream); +	return buf; +} + +struct delta_info { +	unsigned char base_sha1[20]; +	unsigned nr; +	off_t base_offset; +	unsigned long size; +	void *delta; +	struct delta_info *next; +}; + +static struct delta_info *delta_list; + +static void add_delta_to_list(unsigned nr, unsigned const char *base_sha1, +			      off_t base_offset, +			      void *delta, unsigned long size) +{ +	struct delta_info *info = xmalloc(sizeof(*info)); + +	hashcpy(info->base_sha1, base_sha1); +	info->base_offset = base_offset; +	info->size = size; +	info->delta = delta; +	info->nr = nr; +	info->next = delta_list; +	delta_list = info; +} + +struct obj_info { +	off_t offset; +	unsigned char sha1[20]; +	struct object *obj; +}; + +#define FLAG_OPEN (1u<<20) +#define FLAG_WRITTEN (1u<<21) + +static struct obj_info *obj_list; +static unsigned nr_objects; + +/* + * Called only from check_object() after it verified this object + * is Ok. + */ +static void write_cached_object(struct object *obj) +{ +	unsigned char sha1[20]; +	struct obj_buffer *obj_buf = lookup_object_buffer(obj); +	if (write_sha1_file(obj_buf->buffer, obj_buf->size, typename(obj->type), sha1) < 0) +		die("failed to write object %s", sha1_to_hex(obj->sha1)); +	obj->flags |= FLAG_WRITTEN; +} + +/* + * At the very end of the processing, write_rest() scans the objects + * that have reachability requirements and calls this function. + * Verify its reachability and validity recursively and write it out. + */ +static int check_object(struct object *obj, int type, void *data) +{ +	if (!obj) +		return 1; + +	if (obj->flags & FLAG_WRITTEN) +		return 0; + +	if (type != OBJ_ANY && obj->type != type) +		die("object type mismatch"); + +	if (!(obj->flags & FLAG_OPEN)) { +		unsigned long size; +		int type = sha1_object_info(obj->sha1, &size); +		if (type != obj->type || type <= 0) +			die("object of unexpected type"); +		obj->flags |= FLAG_WRITTEN; +		return 0; +	} + +	if (fsck_object(obj, 1, fsck_error_function)) +		die("Error in object"); +	if (fsck_walk(obj, check_object, NULL)) +		die("Error on reachable objects of %s", sha1_to_hex(obj->sha1)); +	write_cached_object(obj); +	return 0; +} + +static void write_rest(void) +{ +	unsigned i; +	for (i = 0; i < nr_objects; i++) { +		if (obj_list[i].obj) +			check_object(obj_list[i].obj, OBJ_ANY, NULL); +	} +} + +static void added_object(unsigned nr, enum object_type type, +			 void *data, unsigned long size); + +/* + * Write out nr-th object from the list, now we know the contents + * of it.  Under --strict, this buffers structured objects in-core, + * to be checked at the end. + */ +static void write_object(unsigned nr, enum object_type type, +			 void *buf, unsigned long size) +{ +	if (!strict) { +		if (write_sha1_file(buf, size, typename(type), obj_list[nr].sha1) < 0) +			die("failed to write object"); +		added_object(nr, type, buf, size); +		free(buf); +		obj_list[nr].obj = NULL; +	} else if (type == OBJ_BLOB) { +		struct blob *blob; +		if (write_sha1_file(buf, size, typename(type), obj_list[nr].sha1) < 0) +			die("failed to write object"); +		added_object(nr, type, buf, size); +		free(buf); + +		blob = lookup_blob(obj_list[nr].sha1); +		if (blob) +			blob->object.flags |= FLAG_WRITTEN; +		else +			die("invalid blob object"); +		obj_list[nr].obj = NULL; +	} else { +		struct object *obj; +		int eaten; +		hash_sha1_file(buf, size, typename(type), obj_list[nr].sha1); +		added_object(nr, type, buf, size); +		obj = parse_object_buffer(obj_list[nr].sha1, type, size, buf, &eaten); +		if (!obj) +			die("invalid %s", typename(type)); +		add_object_buffer(obj, buf, size); +		obj->flags |= FLAG_OPEN; +		obj_list[nr].obj = obj; +	} +} + +static void resolve_delta(unsigned nr, enum object_type type, +			  void *base, unsigned long base_size, +			  void *delta, unsigned long delta_size) +{ +	void *result; +	unsigned long result_size; + +	result = patch_delta(base, base_size, +			     delta, delta_size, +			     &result_size); +	if (!result) +		die("failed to apply delta"); +	free(delta); +	write_object(nr, type, result, result_size); +} + +/* + * We now know the contents of an object (which is nr-th in the pack); + * resolve all the deltified objects that are based on it. + */ +static void added_object(unsigned nr, enum object_type type, +			 void *data, unsigned long size) +{ +	struct delta_info **p = &delta_list; +	struct delta_info *info; + +	while ((info = *p) != NULL) { +		if (!hashcmp(info->base_sha1, obj_list[nr].sha1) || +		    info->base_offset == obj_list[nr].offset) { +			*p = info->next; +			p = &delta_list; +			resolve_delta(info->nr, type, data, size, +				      info->delta, info->size); +			free(info); +			continue; +		} +		p = &info->next; +	} +} + +static void unpack_non_delta_entry(enum object_type type, unsigned long size, +				   unsigned nr) +{ +	void *buf = get_data(size); + +	if (!dry_run && buf) +		write_object(nr, type, buf, size); +	else +		free(buf); +} + +static int resolve_against_held(unsigned nr, const unsigned char *base, +				void *delta_data, unsigned long delta_size) +{ +	struct object *obj; +	struct obj_buffer *obj_buffer; +	obj = lookup_object(base); +	if (!obj) +		return 0; +	obj_buffer = lookup_object_buffer(obj); +	if (!obj_buffer) +		return 0; +	resolve_delta(nr, obj->type, obj_buffer->buffer, +		      obj_buffer->size, delta_data, delta_size); +	return 1; +} + +static void unpack_delta_entry(enum object_type type, unsigned long delta_size, +			       unsigned nr) +{ +	void *delta_data, *base; +	unsigned long base_size; +	unsigned char base_sha1[20]; + +	if (type == OBJ_REF_DELTA) { +		hashcpy(base_sha1, fill(20)); +		use(20); +		delta_data = get_data(delta_size); +		if (dry_run || !delta_data) { +			free(delta_data); +			return; +		} +		if (has_sha1_file(base_sha1)) +			; /* Ok we have this one */ +		else if (resolve_against_held(nr, base_sha1, +					      delta_data, delta_size)) +			return; /* we are done */ +		else { +			/* cannot resolve yet --- queue it */ +			hashcpy(obj_list[nr].sha1, null_sha1); +			add_delta_to_list(nr, base_sha1, 0, delta_data, delta_size); +			return; +		} +	} else { +		unsigned base_found = 0; +		unsigned char *pack, c; +		off_t base_offset; +		unsigned lo, mid, hi; + +		pack = fill(1); +		c = *pack; +		use(1); +		base_offset = c & 127; +		while (c & 128) { +			base_offset += 1; +			if (!base_offset || MSB(base_offset, 7)) +				die("offset value overflow for delta base object"); +			pack = fill(1); +			c = *pack; +			use(1); +			base_offset = (base_offset << 7) + (c & 127); +		} +		base_offset = obj_list[nr].offset - base_offset; +		if (base_offset <= 0 || base_offset >= obj_list[nr].offset) +			die("offset value out of bound for delta base object"); + +		delta_data = get_data(delta_size); +		if (dry_run || !delta_data) { +			free(delta_data); +			return; +		} +		lo = 0; +		hi = nr; +		while (lo < hi) { +			mid = (lo + hi)/2; +			if (base_offset < obj_list[mid].offset) { +				hi = mid; +			} else if (base_offset > obj_list[mid].offset) { +				lo = mid + 1; +			} else { +				hashcpy(base_sha1, obj_list[mid].sha1); +				base_found = !is_null_sha1(base_sha1); +				break; +			} +		} +		if (!base_found) { +			/* +			 * The delta base object is itself a delta that +			 * has not been resolved yet. +			 */ +			hashcpy(obj_list[nr].sha1, null_sha1); +			add_delta_to_list(nr, null_sha1, base_offset, delta_data, delta_size); +			return; +		} +	} + +	if (resolve_against_held(nr, base_sha1, delta_data, delta_size)) +		return; + +	base = read_sha1_file(base_sha1, &type, &base_size); +	if (!base) { +		error("failed to read delta-pack base object %s", +		      sha1_to_hex(base_sha1)); +		if (!recover) +			exit(1); +		has_errors = 1; +		return; +	} +	resolve_delta(nr, type, base, base_size, delta_data, delta_size); +	free(base); +} + +static void unpack_one(unsigned nr) +{ +	unsigned shift; +	unsigned char *pack; +	unsigned long size, c; +	enum object_type type; + +	obj_list[nr].offset = consumed_bytes; + +	pack = fill(1); +	c = *pack; +	use(1); +	type = (c >> 4) & 7; +	size = (c & 15); +	shift = 4; +	while (c & 0x80) { +		pack = fill(1); +		c = *pack; +		use(1); +		size += (c & 0x7f) << shift; +		shift += 7; +	} + +	switch (type) { +	case OBJ_COMMIT: +	case OBJ_TREE: +	case OBJ_BLOB: +	case OBJ_TAG: +		unpack_non_delta_entry(type, size, nr); +		return; +	case OBJ_REF_DELTA: +	case OBJ_OFS_DELTA: +		unpack_delta_entry(type, size, nr); +		return; +	default: +		error("bad object type %d", type); +		has_errors = 1; +		if (recover) +			return; +		exit(1); +	} +} + +static void unpack_all(void) +{ +	int i; +	struct progress *progress = NULL; +	struct pack_header *hdr = fill(sizeof(struct pack_header)); + +	nr_objects = ntohl(hdr->hdr_entries); + +	if (ntohl(hdr->hdr_signature) != PACK_SIGNATURE) +		die("bad pack file"); +	if (!pack_version_ok(hdr->hdr_version)) +		die("unknown pack file version %"PRIu32, +			ntohl(hdr->hdr_version)); +	use(sizeof(struct pack_header)); + +	if (!quiet) +		progress = start_progress("Unpacking objects", nr_objects); +	obj_list = xcalloc(nr_objects, sizeof(*obj_list)); +	for (i = 0; i < nr_objects; i++) { +		unpack_one(i); +		display_progress(progress, i + 1); +	} +	stop_progress(&progress); + +	if (delta_list) +		die("unresolved deltas left after unpacking"); +} + +int cmd_unpack_objects(int argc, const char **argv, const char *prefix) +{ +	int i; +	unsigned char sha1[20]; + +	read_replace_refs = 0; + +	git_config(git_default_config, NULL); + +	quiet = !isatty(2); + +	for (i = 1 ; i < argc; i++) { +		const char *arg = argv[i]; + +		if (*arg == '-') { +			if (!strcmp(arg, "-n")) { +				dry_run = 1; +				continue; +			} +			if (!strcmp(arg, "-q")) { +				quiet = 1; +				continue; +			} +			if (!strcmp(arg, "-r")) { +				recover = 1; +				continue; +			} +			if (!strcmp(arg, "--strict")) { +				strict = 1; +				continue; +			} +			if (!prefixcmp(arg, "--pack_header=")) { +				struct pack_header *hdr; +				char *c; + +				hdr = (struct pack_header *)buffer; +				hdr->hdr_signature = htonl(PACK_SIGNATURE); +				hdr->hdr_version = htonl(strtoul(arg + 14, &c, 10)); +				if (*c != ',') +					die("bad %s", arg); +				hdr->hdr_entries = htonl(strtoul(c + 1, &c, 10)); +				if (*c) +					die("bad %s", arg); +				len = sizeof(*hdr); +				continue; +			} +			usage(unpack_usage); +		} + +		/* We don't take any non-flag arguments now.. Maybe some day */ +		usage(unpack_usage); +	} +	git_SHA1_Init(&ctx); +	unpack_all(); +	git_SHA1_Update(&ctx, buffer, offset); +	git_SHA1_Final(sha1, &ctx); +	if (strict) +		write_rest(); +	if (hashcmp(fill(20), sha1)) +		die("final sha1 did not match"); +	use(20); + +	/* Write the last part of the buffer to stdout */ +	while (len) { +		int ret = xwrite(1, buffer + offset, len); +		if (ret <= 0) +			break; +		len -= ret; +		offset += ret; +	} + +	/* All done */ +	return has_errors; +} | 
