Merge branch 'tb/midx-use-checksum'

When rebuilding the multi-pack index file reusing an existing one,
we used to blindly trust the existing file and ended up carrying
corrupted data into the updated file, which has been corrected.

* tb/midx-use-checksum:
  midx: report checksum mismatches during 'verify'
  midx: don't reuse corrupt MIDXs when writing
  commit-graph: rewrite to use checksum_valid()
  csum-file: introduce checksum_valid()
diff --git a/commit-graph.c b/commit-graph.c
index 2bcb4e0..1a2602d 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -2422,14 +2422,16 @@
 #define GENERATION_ZERO_EXISTS 1
 #define GENERATION_NUMBER_EXISTS 2
 
+static int commit_graph_checksum_valid(struct commit_graph *g)
+{
+	return hashfile_checksum_valid(g->data, g->data_len);
+}
+
 int verify_commit_graph(struct repository *r, struct commit_graph *g, int flags)
 {
 	uint32_t i, cur_fanout_pos = 0;
 	struct object_id prev_oid, cur_oid;
-	unsigned char checksum[GIT_MAX_HEXSZ];
 	int generation_zero = 0;
-	struct hashfile *f;
-	int devnull;
 	struct progress *progress = NULL;
 	int local_error = 0;
 
@@ -2442,11 +2444,7 @@
 	if (verify_commit_graph_error)
 		return verify_commit_graph_error;
 
-	devnull = open("/dev/null", O_WRONLY);
-	f = hashfd(devnull, NULL);
-	hashwrite(f, g->data, g->data_len - g->hash_len);
-	finalize_hashfile(f, checksum, CSUM_CLOSE);
-	if (!hasheq(checksum, g->data + g->data_len - g->hash_len)) {
+	if (!commit_graph_checksum_valid(g)) {
 		graph_report(_("the commit-graph file has incorrect checksum and is likely corrupt"));
 		verify_commit_graph_error = VERIFY_COMMIT_GRAPH_ERROR_HASH;
 	}
diff --git a/csum-file.c b/csum-file.c
index 3487d28..c951cf8 100644
--- a/csum-file.c
+++ b/csum-file.c
@@ -217,3 +217,19 @@
 	f->do_crc = 0;
 	return f->crc32;
 }
+
+int hashfile_checksum_valid(const unsigned char *data, size_t total_len)
+{
+	unsigned char got[GIT_MAX_RAWSZ];
+	git_hash_ctx ctx;
+	size_t data_len = total_len - the_hash_algo->rawsz;
+
+	if (total_len < the_hash_algo->rawsz)
+		return 0; /* say "too short"? */
+
+	the_hash_algo->init_fn(&ctx);
+	the_hash_algo->update_fn(&ctx, data, data_len);
+	the_hash_algo->final_fn(got, &ctx);
+
+	return hasheq(got, data + data_len);
+}
diff --git a/csum-file.h b/csum-file.h
index 3044bd1..291215b 100644
--- a/csum-file.h
+++ b/csum-file.h
@@ -44,6 +44,9 @@
 void crc32_begin(struct hashfile *);
 uint32_t crc32_end(struct hashfile *);
 
+/* Verify checksum validity while reading. Returns non-zero on success. */
+int hashfile_checksum_valid(const unsigned char *data, size_t len);
+
 /*
  * Returns the total number of bytes fed to the hashfile so far (including ones
  * that have not been written out to the descriptor yet).
diff --git a/midx.c b/midx.c
index 21d6a05..9a35b02 100644
--- a/midx.c
+++ b/midx.c
@@ -885,6 +885,11 @@
 static void clear_midx_files_ext(struct repository *r, const char *ext,
 				 unsigned char *keep_hash);
 
+static int midx_checksum_valid(struct multi_pack_index *m)
+{
+	return hashfile_checksum_valid(m->data, m->data_len);
+}
+
 static int write_midx_internal(const char *object_dir, struct multi_pack_index *m,
 			       struct string_list *packs_to_drop,
 			       const char *preferred_pack_name,
@@ -911,6 +916,11 @@
 	else
 		ctx.m = load_multi_pack_index(object_dir, 1);
 
+	if (ctx.m && !midx_checksum_valid(ctx.m)) {
+		warning(_("ignoring existing multi-pack-index; checksum mismatch"));
+		ctx.m = NULL;
+	}
+
 	ctx.nr = 0;
 	ctx.alloc = ctx.m ? ctx.m->num_packs : 16;
 	ctx.info = NULL;
@@ -1218,6 +1228,9 @@
 		return result;
 	}
 
+	if (!midx_checksum_valid(m))
+		midx_report(_("incorrect checksum"));
+
 	if (flags & MIDX_PROGRESS)
 		progress = start_delayed_progress(_("Looking for referenced packfiles"),
 					  m->num_packs);
diff --git a/pack-check.c b/pack-check.c
index 4b089fe..c8e560d 100644
--- a/pack-check.c
+++ b/pack-check.c
@@ -164,22 +164,13 @@
 
 int verify_pack_index(struct packed_git *p)
 {
-	size_t len;
-	const unsigned char *index_base;
-	git_hash_ctx ctx;
-	unsigned char hash[GIT_MAX_RAWSZ];
 	int err = 0;
 
 	if (open_pack_index(p))
 		return error("packfile %s index not opened", p->pack_name);
-	index_base = p->index_data;
-	len = p->index_size - the_hash_algo->rawsz;
 
 	/* Verify SHA1 sum of the index file */
-	the_hash_algo->init_fn(&ctx);
-	the_hash_algo->update_fn(&ctx, index_base, len);
-	the_hash_algo->final_fn(hash, &ctx);
-	if (!hasheq(hash, index_base + len))
+	if (!hashfile_checksum_valid(p->index_data, p->index_size))
 		err = error("Packfile index for %s hash mismatch",
 			    p->pack_name);
 	return err;
diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh
index 5641d15..7609f1e 100755
--- a/t/t5319-multi-pack-index.sh
+++ b/t/t5319-multi-pack-index.sh
@@ -410,6 +410,19 @@
 		"git -c core.multipackindex=true fsck"
 '
 
+test_expect_success 'corrupt MIDX is not reused' '
+	corrupt_midx_and_verify $MIDX_BYTE_OFFSET "\377" $objdir \
+		"incorrect object offset" &&
+	git multi-pack-index write 2>err &&
+	test_i18ngrep checksum.mismatch err &&
+	git multi-pack-index verify
+'
+
+test_expect_success 'verify incorrect checksum' '
+	pos=$(($(wc -c <$objdir/pack/multi-pack-index) - 1)) &&
+	corrupt_midx_and_verify $pos "\377" $objdir "incorrect checksum"
+'
+
 test_expect_success 'repack progress off for redirected stderr' '
 	GIT_PROGRESS_DELAY=0 git multi-pack-index --object-dir=$objdir repack 2>err &&
 	test_line_count = 0 err