diff options
Diffstat (limited to 'builtin/index-pack.c')
| -rw-r--r-- | builtin/index-pack.c | 262 | 
1 files changed, 214 insertions, 48 deletions
| diff --git a/builtin/index-pack.c b/builtin/index-pack.c index e40451ffb4..0945adbb3b 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -11,7 +11,7 @@  #include "exec_cmd.h"  static const char index_pack_usage[] = -"git index-pack [-v] [-o <index-file>] [ --keep | --keep=<msg> ] [--strict] (<pack-file> | --stdin [--fix-thin] [<pack-file>])"; +"git index-pack [-v] [-o <index-file>] [--keep | --keep=<msg>] [--verify] [--strict] (<pack-file> | --stdin [--fix-thin] [<pack-file>])";  struct object_entry {  	struct pack_idx_entry idx; @@ -19,6 +19,8 @@ struct object_entry {  	unsigned int hdr_size;  	enum object_type type;  	enum object_type real_type; +	unsigned delta_depth; +	int base_object_no;  };  union delta_base { @@ -66,6 +68,7 @@ static struct progress *progress;  static unsigned char input_buffer[4096];  static unsigned int input_offset, input_len;  static off_t consumed_bytes; +static unsigned deepest_delta;  static git_SHA_CTX input_ctx;  static uint32_t input_crc32;  static int input_fd, output_fd, pack_fd; @@ -265,7 +268,7 @@ static void unlink_base_data(struct base_data *c)  static void *unpack_entry_data(unsigned long offset, unsigned long size)  {  	int status; -	z_stream stream; +	git_zstream stream;  	void *buf = xmalloc(size);  	memset(&stream, 0, sizeof(stream)); @@ -355,7 +358,7 @@ static void *get_data_from_pack(struct object_entry *obj)  	off_t from = obj[0].idx.offset + obj[0].hdr_size;  	unsigned long len = obj[1].idx.offset - from;  	unsigned char *data, *inbuf; -	z_stream stream; +	git_zstream stream;  	int status;  	data = xmalloc(obj->size); @@ -389,7 +392,18 @@ static void *get_data_from_pack(struct object_entry *obj)  	return data;  } -static int find_delta(const union delta_base *base) +static int compare_delta_bases(const union delta_base *base1, +			       const union delta_base *base2, +			       enum object_type type1, +			       enum object_type type2) +{ +	int cmp = type1 - type2; +	if (cmp) +		return cmp; +	return memcmp(base1, base2, UNION_BASE_SZ); +} + +static int find_delta(const union delta_base *base, enum object_type type)  {  	int first = 0, last = nr_deltas; @@ -398,7 +412,8 @@ static int find_delta(const union delta_base *base)                  struct delta_entry *delta = &deltas[next];                  int cmp; -                cmp = memcmp(base, &delta->base, UNION_BASE_SZ); +		cmp = compare_delta_bases(base, &delta->base, +					  type, objects[delta->obj_no].type);                  if (!cmp)                          return next;                  if (cmp < 0) { @@ -411,9 +426,10 @@ static int find_delta(const union delta_base *base)  }  static void find_delta_children(const union delta_base *base, -				int *first_index, int *last_index) +				int *first_index, int *last_index, +				enum object_type type)  { -	int first = find_delta(base); +	int first = find_delta(base, type);  	int last = first;  	int end = nr_deltas - 1; @@ -483,12 +499,17 @@ static void sha1_object(const void *data, unsigned long size,  	}  } +static int is_delta_type(enum object_type type) +{ +	return (type == OBJ_REF_DELTA || type == OBJ_OFS_DELTA); +} +  static void *get_base_data(struct base_data *c)  {  	if (!c->data) {  		struct object_entry *obj = c->obj; -		if (obj->type == OBJ_REF_DELTA || obj->type == OBJ_OFS_DELTA) { +		if (is_delta_type(obj->type)) {  			void *base = get_base_data(c->base);  			void *raw = get_data_from_pack(obj);  			c->data = patch_delta( @@ -515,6 +536,10 @@ static void resolve_delta(struct object_entry *delta_obj,  	void *base_data, *delta_data;  	delta_obj->real_type = base->obj->real_type; +	delta_obj->delta_depth = base->obj->delta_depth + 1; +	if (deepest_delta < delta_obj->delta_depth) +		deepest_delta = delta_obj->delta_depth; +	delta_obj->base_object_no = base->obj - objects;  	delta_data = get_data_from_pack(delta_obj);  	base_data = get_base_data(base);  	result->obj = delta_obj; @@ -541,11 +566,13 @@ static void find_unresolved_deltas(struct base_data *base,  		union delta_base base_spec;  		hashcpy(base_spec.sha1, base->obj->idx.sha1); -		find_delta_children(&base_spec, &ref_first, &ref_last); +		find_delta_children(&base_spec, +				    &ref_first, &ref_last, OBJ_REF_DELTA);  		memset(&base_spec, 0, sizeof(base_spec));  		base_spec.offset = base->obj->idx.offset; -		find_delta_children(&base_spec, &ofs_first, &ofs_last); +		find_delta_children(&base_spec, +				    &ofs_first, &ofs_last, OBJ_OFS_DELTA);  	}  	if (ref_last == -1 && ofs_last == -1) { @@ -557,24 +584,24 @@ static void find_unresolved_deltas(struct base_data *base,  	for (i = ref_first; i <= ref_last; i++) {  		struct object_entry *child = objects + deltas[i].obj_no; -		if (child->real_type == OBJ_REF_DELTA) { -			struct base_data result; -			resolve_delta(child, base, &result); -			if (i == ref_last && ofs_last == -1) -				free_base_data(base); -			find_unresolved_deltas(&result, base); -		} +		struct base_data result; + +		assert(child->real_type == OBJ_REF_DELTA); +		resolve_delta(child, base, &result); +		if (i == ref_last && ofs_last == -1) +			free_base_data(base); +		find_unresolved_deltas(&result, base);  	}  	for (i = ofs_first; i <= ofs_last; i++) {  		struct object_entry *child = objects + deltas[i].obj_no; -		if (child->real_type == OBJ_OFS_DELTA) { -			struct base_data result; -			resolve_delta(child, base, &result); -			if (i == ofs_last) -				free_base_data(base); -			find_unresolved_deltas(&result, base); -		} +		struct base_data result; + +		assert(child->real_type == OBJ_OFS_DELTA); +		resolve_delta(child, base, &result); +		if (i == ofs_last) +			free_base_data(base); +		find_unresolved_deltas(&result, base);  	}  	unlink_base_data(base); @@ -584,7 +611,11 @@ static int compare_delta_entry(const void *a, const void *b)  {  	const struct delta_entry *delta_a = a;  	const struct delta_entry *delta_b = b; -	return memcmp(&delta_a->base, &delta_b->base, UNION_BASE_SZ); + +	/* group by type (ref vs ofs) and then by value (sha-1 or offset) */ +	return compare_delta_bases(&delta_a->base, &delta_b->base, +				   objects[delta_a->obj_no].type, +				   objects[delta_b->obj_no].type);  }  /* Parse all objects and return the pack content SHA1 hash */ @@ -608,7 +639,7 @@ static void parse_pack_objects(unsigned char *sha1)  		struct object_entry *obj = &objects[i];  		void *data = unpack_raw_entry(obj, &delta->base);  		obj->real_type = obj->type; -		if (obj->type == OBJ_REF_DELTA || obj->type == OBJ_OFS_DELTA) { +		if (is_delta_type(obj->type)) {  			nr_deltas++;  			delta->obj_no = i;  			delta++; @@ -655,7 +686,7 @@ static void parse_pack_objects(unsigned char *sha1)  		struct object_entry *obj = &objects[i];  		struct base_data base_obj; -		if (obj->type == OBJ_REF_DELTA || obj->type == OBJ_OFS_DELTA) +		if (is_delta_type(obj->type))  			continue;  		base_obj.obj = obj;  		base_obj.data = NULL; @@ -666,26 +697,26 @@ static void parse_pack_objects(unsigned char *sha1)  static int write_compressed(struct sha1file *f, void *in, unsigned int size)  { -	z_stream stream; +	git_zstream stream;  	int status;  	unsigned char outbuf[4096];  	memset(&stream, 0, sizeof(stream)); -	deflateInit(&stream, zlib_compression_level); +	git_deflate_init(&stream, zlib_compression_level);  	stream.next_in = in;  	stream.avail_in = size;  	do {  		stream.next_out = outbuf;  		stream.avail_out = sizeof(outbuf); -		status = deflate(&stream, Z_FINISH); +		status = git_deflate(&stream, Z_FINISH);  		sha1write(f, outbuf, sizeof(outbuf) - stream.avail_out);  	} while (status == Z_OK);  	if (status != Z_STREAM_END)  		die("unable to deflate appended object (%d)", status);  	size = stream.total_out; -	deflateEnd(&stream); +	git_deflate_end(&stream);  	return size;  } @@ -859,24 +890,137 @@ static void final(const char *final_pack_name, const char *curr_pack_name,  static int git_index_pack_config(const char *k, const char *v, void *cb)  { +	struct pack_idx_option *opts = cb; +  	if (!strcmp(k, "pack.indexversion")) { -		pack_idx_default_version = git_config_int(k, v); -		if (pack_idx_default_version > 2) -			die("bad pack.indexversion=%"PRIu32, -				pack_idx_default_version); +		opts->version = git_config_int(k, v); +		if (opts->version > 2) +			die("bad pack.indexversion=%"PRIu32, opts->version);  		return 0;  	}  	return git_default_config(k, v, cb);  } +static int cmp_uint32(const void *a_, const void *b_) +{ +	uint32_t a = *((uint32_t *)a_); +	uint32_t b = *((uint32_t *)b_); + +	return (a < b) ? -1 : (a != b); +} + +static void read_v2_anomalous_offsets(struct packed_git *p, +				      struct pack_idx_option *opts) +{ +	const uint32_t *idx1, *idx2; +	uint32_t i; + +	/* The address of the 4-byte offset table */ +	idx1 = (((const uint32_t *)p->index_data) +		+ 2 /* 8-byte header */ +		+ 256 /* fan out */ +		+ 5 * p->num_objects /* 20-byte SHA-1 table */ +		+ p->num_objects /* CRC32 table */ +		); + +	/* The address of the 8-byte offset table */ +	idx2 = idx1 + p->num_objects; + +	for (i = 0; i < p->num_objects; i++) { +		uint32_t off = ntohl(idx1[i]); +		if (!(off & 0x80000000)) +			continue; +		off = off & 0x7fffffff; +		if (idx2[off * 2]) +			continue; +		/* +		 * The real offset is ntohl(idx2[off * 2]) in high 4 +		 * octets, and ntohl(idx2[off * 2 + 1]) in low 4 +		 * octets.  But idx2[off * 2] is Zero!!! +		 */ +		ALLOC_GROW(opts->anomaly, opts->anomaly_nr + 1, opts->anomaly_alloc); +		opts->anomaly[opts->anomaly_nr++] = ntohl(idx2[off * 2 + 1]); +	} + +	if (1 < opts->anomaly_nr) +		qsort(opts->anomaly, opts->anomaly_nr, sizeof(uint32_t), cmp_uint32); +} + +static void read_idx_option(struct pack_idx_option *opts, const char *pack_name) +{ +	struct packed_git *p = add_packed_git(pack_name, strlen(pack_name), 1); + +	if (!p) +		die("Cannot open existing pack file '%s'", pack_name); +	if (open_pack_index(p)) +		die("Cannot open existing pack idx file for '%s'", pack_name); + +	/* Read the attributes from the existing idx file */ +	opts->version = p->index_version; + +	if (opts->version == 2) +		read_v2_anomalous_offsets(p, opts); + +	/* +	 * Get rid of the idx file as we do not need it anymore. +	 * NEEDSWORK: extract this bit from free_pack_by_name() in +	 * sha1_file.c, perhaps?  It shouldn't matter very much as we +	 * know we haven't installed this pack (hence we never have +	 * read anything from it). +	 */ +	close_pack_index(p); +	free(p); +} + +static void show_pack_info(int stat_only) +{ +	int i, baseobjects = nr_objects - nr_deltas; +	unsigned long *chain_histogram = NULL; + +	if (deepest_delta) +		chain_histogram = xcalloc(deepest_delta, sizeof(unsigned long)); + +	for (i = 0; i < nr_objects; i++) { +		struct object_entry *obj = &objects[i]; + +		if (is_delta_type(obj->type)) +			chain_histogram[obj->delta_depth - 1]++; +		if (stat_only) +			continue; +		printf("%s %-6s %lu %lu %"PRIuMAX, +		       sha1_to_hex(obj->idx.sha1), +		       typename(obj->real_type), obj->size, +		       (unsigned long)(obj[1].idx.offset - obj->idx.offset), +		       (uintmax_t)obj->idx.offset); +		if (is_delta_type(obj->type)) { +			struct object_entry *bobj = &objects[obj->base_object_no]; +			printf(" %u %s", obj->delta_depth, sha1_to_hex(bobj->idx.sha1)); +		} +		putchar('\n'); +	} + +	if (baseobjects) +		printf("non delta: %d object%s\n", +		       baseobjects, baseobjects > 1 ? "s" : ""); +	for (i = 0; i < deepest_delta; i++) { +		if (!chain_histogram[i]) +			continue; +		printf("chain length = %d: %lu object%s\n", +		       i + 1, +		       chain_histogram[i], +		       chain_histogram[i] > 1 ? "s" : ""); +	} +} +  int cmd_index_pack(int argc, const char **argv, const char *prefix)  { -	int i, fix_thin_pack = 0; +	int i, fix_thin_pack = 0, verify = 0, stat_only = 0, stat = 0;  	const char *curr_pack, *curr_index;  	const char *index_name = NULL, *pack_name = NULL;  	const char *keep_name = NULL, *keep_msg = NULL;  	char *index_name_buf = NULL, *keep_name_buf = NULL;  	struct pack_idx_entry **idx_objects; +	struct pack_idx_option opts;  	unsigned char pack_sha1[20];  	if (argc == 2 && !strcmp(argv[1], "-h")) @@ -884,7 +1028,8 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix)  	read_replace_refs = 0; -	git_config(git_index_pack_config, NULL); +	reset_pack_idx_option(&opts); +	git_config(git_index_pack_config, &opts);  	if (prefix && chdir(prefix))  		die("Cannot come back to cwd"); @@ -898,6 +1043,15 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix)  				fix_thin_pack = 1;  			} else if (!strcmp(arg, "--strict")) {  				strict = 1; +			} else if (!strcmp(arg, "--verify")) { +				verify = 1; +			} else if (!strcmp(arg, "--verify-stat")) { +				verify = 1; +				stat = 1; +			} else if (!strcmp(arg, "--verify-stat-only")) { +				verify = 1; +				stat = 1; +				stat_only = 1;  			} else if (!strcmp(arg, "--keep")) {  				keep_msg = "";  			} else if (!prefixcmp(arg, "--keep=")) { @@ -923,12 +1077,12 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix)  				index_name = argv[++i];  			} else if (!prefixcmp(arg, "--index-version=")) {  				char *c; -				pack_idx_default_version = strtoul(arg + 16, &c, 10); -				if (pack_idx_default_version > 2) +				opts.version = strtoul(arg + 16, &c, 10); +				if (opts.version > 2)  					die("bad %s", arg);  				if (*c == ',') -					pack_idx_off32_limit = strtoul(c+1, &c, 0); -				if (*c || pack_idx_off32_limit & 0x80000000) +					opts.off32_limit = strtoul(c+1, &c, 0); +				if (*c || opts.off32_limit & 0x80000000)  					die("bad %s", arg);  			} else  				usage(index_pack_usage); @@ -964,11 +1118,17 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix)  		strcpy(keep_name_buf + len - 5, ".keep");  		keep_name = keep_name_buf;  	} +	if (verify) { +		if (!index_name) +			die("--verify with no packfile name given"); +		read_idx_option(&opts, index_name); +		opts.flags |= WRITE_IDX_VERIFY; +	}  	curr_pack = open_pack_file(pack_name);  	parse_pack_header(); -	objects = xmalloc((nr_objects + 1) * sizeof(struct object_entry)); -	deltas = xmalloc(nr_objects * sizeof(struct delta_entry)); +	objects = xcalloc(nr_objects + 1, sizeof(struct object_entry)); +	deltas = xcalloc(nr_objects, sizeof(struct delta_entry));  	parse_pack_objects(pack_sha1);  	if (nr_deltas == nr_resolved_deltas) {  		stop_progress(&progress); @@ -1008,16 +1168,22 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix)  	if (strict)  		check_objects(); +	if (stat) +		show_pack_info(stat_only); +  	idx_objects = xmalloc((nr_objects) * sizeof(struct pack_idx_entry *));  	for (i = 0; i < nr_objects; i++)  		idx_objects[i] = &objects[i].idx; -	curr_index = write_idx_file(index_name, idx_objects, nr_objects, pack_sha1); +	curr_index = write_idx_file(index_name, idx_objects, nr_objects, &opts, pack_sha1);  	free(idx_objects); -	final(pack_name, curr_pack, -		index_name, curr_index, -		keep_name, keep_msg, -		pack_sha1); +	if (!verify) +		final(pack_name, curr_pack, +		      index_name, curr_index, +		      keep_name, keep_msg, +		      pack_sha1); +	else +		close(input_fd);  	free(objects);  	free(index_name_buf);  	free(keep_name_buf); | 
