summaryrefslogtreecommitdiff
path: root/t/t5620-backfill.sh
blob: 58c81556e72c899a6ff5a7a7d0169441bb62aa8f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
#!/bin/sh

test_description='git backfill on partial clones'

GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main
export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME

. ./test-lib.sh

# We create objects in the 'src' repo.
test_expect_success 'setup repo for object creation' '
	echo "{print \$1}" >print_1.awk &&
	echo "{print \$2}" >print_2.awk &&

	git init src &&

	mkdir -p src/a/b/c &&
	mkdir -p src/d/e &&

	for i in 1 2
	do
		for n in 1 2 3 4
		do
			echo "Version $i of file $n" > src/file.$n.txt &&
			echo "Version $i of file a/$n" > src/a/file.$n.txt &&
			echo "Version $i of file a/b/$n" > src/a/b/file.$n.txt &&
			echo "Version $i of file a/b/c/$n" > src/a/b/c/file.$n.txt &&
			echo "Version $i of file d/$n" > src/d/file.$n.txt &&
			echo "Version $i of file d/e/$n" > src/d/e/file.$n.txt &&
			git -C src add . &&
			git -C src commit -m "Iteration $n" || return 1
		done
	done
'

# Clone 'src' into 'srv.bare' so we have a bare repo to be our origin
# server for the partial clone.
test_expect_success 'setup bare clone for server' '
	git clone --bare "file://$(pwd)/src" srv.bare &&
	git -C srv.bare config --local uploadpack.allowfilter 1 &&
	git -C srv.bare config --local uploadpack.allowanysha1inwant 1
'

# do basic partial clone from "srv.bare"
test_expect_success 'do partial clone 1, backfill gets all objects' '
	git clone --no-checkout --filter=blob:none	\
		--single-branch --branch=main 		\
		"file://$(pwd)/srv.bare" backfill1 &&

	# Backfill with no options gets everything reachable from HEAD.
	GIT_TRACE2_EVENT="$(pwd)/backfill-file-trace" git \
		-C backfill1 backfill &&

	# We should have engaged the partial clone machinery
	test_trace2_data promisor fetch_count 48 <backfill-file-trace &&

	# No more missing objects!
	git -C backfill1 rev-list --quiet --objects --missing=print HEAD >revs2 &&
	test_line_count = 0 revs2
'

test_expect_success 'do partial clone 2, backfill min batch size' '
	git clone --no-checkout --filter=blob:none	\
		--single-branch --branch=main 		\
		"file://$(pwd)/srv.bare" backfill2 &&

	GIT_TRACE2_EVENT="$(pwd)/batch-trace" git \
		-C backfill2 backfill --min-batch-size=20 &&

	# Batches were used
	test_trace2_data promisor fetch_count 20 <batch-trace >matches &&
	test_line_count = 2 matches &&
	test_trace2_data promisor fetch_count 8 <batch-trace &&

	# No more missing objects!
	git -C backfill2 rev-list --quiet --objects --missing=print HEAD >revs2 &&
	test_line_count = 0 revs2
'

test_expect_success 'backfill --sparse without sparse-checkout fails' '
	git init not-sparse &&
	test_must_fail git -C not-sparse backfill --sparse 2>err &&
	grep "problem loading sparse-checkout" err
'

test_expect_success 'backfill --sparse' '
	git clone --sparse --filter=blob:none		\
		--single-branch --branch=main 		\
		"file://$(pwd)/srv.bare" backfill3 &&

	# Initial checkout includes four files at root.
	git -C backfill3 rev-list --quiet --objects --missing=print HEAD >missing &&
	test_line_count = 44 missing &&

	# Initial sparse-checkout is just the files at root, so we get the
	# older versions of the four files at tip.
	GIT_TRACE2_EVENT="$(pwd)/sparse-trace1" git \
		-C backfill3 backfill --sparse &&
	test_trace2_data promisor fetch_count 4 <sparse-trace1 &&
	test_trace2_data path-walk paths 5 <sparse-trace1 &&
	git -C backfill3 rev-list --quiet --objects --missing=print HEAD >missing &&
	test_line_count = 40 missing &&

	# Expand the sparse-checkout to include 'd' recursively. This
	# engages the algorithm to skip the trees for 'a'. Note that
	# the "sparse-checkout set" command downloads the objects at tip
	# to satisfy the current checkout.
	git -C backfill3 sparse-checkout set d &&
	GIT_TRACE2_EVENT="$(pwd)/sparse-trace2" git \
		-C backfill3 backfill --sparse &&
	test_trace2_data promisor fetch_count 8 <sparse-trace2 &&
	test_trace2_data path-walk paths 15 <sparse-trace2 &&
	git -C backfill3 rev-list --quiet --objects --missing=print HEAD >missing &&
	test_line_count = 24 missing &&

	# Disabling the --sparse option (on by default) will download everything
	git -C backfill3 backfill --no-sparse &&
	git -C backfill3 rev-list --quiet --objects --missing=print HEAD >missing &&
	test_line_count = 0 missing
'

test_expect_success 'backfill --sparse without cone mode (positive)' '
	git clone --no-checkout --filter=blob:none		\
		--single-branch --branch=main 		\
		"file://$(pwd)/srv.bare" backfill4 &&

	# No blobs yet
	git -C backfill4 rev-list --quiet --objects --missing=print HEAD >missing &&
	test_line_count = 48 missing &&

	# Define sparse-checkout by filename regardless of parent directory.
	# This downloads 6 blobs to satisfy the checkout.
	git -C backfill4 sparse-checkout set --no-cone "**/file.1.txt" &&
	git -C backfill4 checkout main &&

	# Track new blob count
	git -C backfill4 rev-list --quiet --objects --missing=print HEAD >missing &&
	test_line_count = 42 missing &&

	GIT_TRACE2_EVENT="$(pwd)/no-cone-trace1" git \
		-C backfill4 backfill --sparse &&
	test_trace2_data promisor fetch_count 6 <no-cone-trace1 &&

	# This walk needed to visit all directories to search for these paths.
	test_trace2_data path-walk paths 12 <no-cone-trace1 &&
	git -C backfill4 rev-list --quiet --objects --missing=print HEAD >missing &&
	test_line_count = 36 missing
'

test_expect_success 'backfill --sparse without cone mode (negative)' '
	git clone --no-checkout --filter=blob:none		\
		--single-branch --branch=main 		\
		"file://$(pwd)/srv.bare" backfill5 &&

	# No blobs yet
	git -C backfill5 rev-list --quiet --objects --missing=print HEAD >missing &&
	test_line_count = 48 missing &&

	# Define sparse-checkout by filename regardless of parent directory.
	# This downloads 18 blobs to satisfy the checkout
	git -C backfill5 sparse-checkout set --no-cone "**/file*" "!**/file.1.txt" &&
	git -C backfill5 checkout main &&

	# Track new blob count
	git -C backfill5 rev-list --quiet --objects --missing=print HEAD >missing &&
	test_line_count = 30 missing &&

	GIT_TRACE2_EVENT="$(pwd)/no-cone-trace2" git \
		-C backfill5 backfill --sparse &&
	test_trace2_data promisor fetch_count 18 <no-cone-trace2 &&

	# This walk needed to visit all directories to search for these paths, plus
	# 12 extra "file.?.txt" paths than the previous test.
	test_trace2_data path-walk paths 24 <no-cone-trace2 &&
	git -C backfill5 rev-list --quiet --objects --missing=print HEAD >missing &&
	test_line_count = 12 missing
'

. "$TEST_DIRECTORY"/lib-httpd.sh
start_httpd

test_expect_success 'create a partial clone over HTTP' '
	SERVER="$HTTPD_DOCUMENT_ROOT_PATH/server" &&
	rm -rf "$SERVER" repo &&
	git clone --bare "file://$(pwd)/src" "$SERVER" &&
	test_config -C "$SERVER" uploadpack.allowfilter 1 &&
	test_config -C "$SERVER" uploadpack.allowanysha1inwant 1 &&

	git clone --no-checkout --filter=blob:none \
		"$HTTPD_URL/smart/server" backfill-http
'

test_expect_success 'backfilling over HTTP succeeds' '
	GIT_TRACE2_EVENT="$(pwd)/backfill-http-trace" git \
		-C backfill-http backfill &&

	# We should have engaged the partial clone machinery
	test_trace2_data promisor fetch_count 48 <backfill-http-trace &&

	# Confirm all objects are present, none missing.
	git -C backfill-http rev-list --objects --all >rev-list-out &&
	awk "{print \$1;}" <rev-list-out >oids &&
	GIT_TRACE2_EVENT="$(pwd)/walk-trace" git -C backfill-http \
		cat-file --batch-check <oids >batch-out &&
	! grep missing batch-out
'

# DO NOT add non-httpd-specific tests here, because the last part of this
# test script is only executed when httpd is available and enabled.

test_done