Merge branch 'tb/midx-use-checksum'
When rebuilding the multi-pack index file reusing an existing one,
we used to blindly trust the existing file and ended up carrying
corrupted data into the updated file, which has been corrected.
* tb/midx-use-checksum:
midx: report checksum mismatches during 'verify'
midx: don't reuse corrupt MIDXs when writing
commit-graph: rewrite to use checksum_valid()
csum-file: introduce checksum_valid()
diff --git a/commit-graph.c b/commit-graph.c
index 2bcb4e0..1a2602d 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -2422,14 +2422,16 @@
#define GENERATION_ZERO_EXISTS 1
#define GENERATION_NUMBER_EXISTS 2
+static int commit_graph_checksum_valid(struct commit_graph *g)
+{
+ return hashfile_checksum_valid(g->data, g->data_len);
+}
+
int verify_commit_graph(struct repository *r, struct commit_graph *g, int flags)
{
uint32_t i, cur_fanout_pos = 0;
struct object_id prev_oid, cur_oid;
- unsigned char checksum[GIT_MAX_HEXSZ];
int generation_zero = 0;
- struct hashfile *f;
- int devnull;
struct progress *progress = NULL;
int local_error = 0;
@@ -2442,11 +2444,7 @@
if (verify_commit_graph_error)
return verify_commit_graph_error;
- devnull = open("/dev/null", O_WRONLY);
- f = hashfd(devnull, NULL);
- hashwrite(f, g->data, g->data_len - g->hash_len);
- finalize_hashfile(f, checksum, CSUM_CLOSE);
- if (!hasheq(checksum, g->data + g->data_len - g->hash_len)) {
+ if (!commit_graph_checksum_valid(g)) {
graph_report(_("the commit-graph file has incorrect checksum and is likely corrupt"));
verify_commit_graph_error = VERIFY_COMMIT_GRAPH_ERROR_HASH;
}
diff --git a/csum-file.c b/csum-file.c
index 3487d28..c951cf8 100644
--- a/csum-file.c
+++ b/csum-file.c
@@ -217,3 +217,19 @@
f->do_crc = 0;
return f->crc32;
}
+
+int hashfile_checksum_valid(const unsigned char *data, size_t total_len)
+{
+ unsigned char got[GIT_MAX_RAWSZ];
+ git_hash_ctx ctx;
+ size_t data_len = total_len - the_hash_algo->rawsz;
+
+ if (total_len < the_hash_algo->rawsz)
+ return 0; /* say "too short"? */
+
+ the_hash_algo->init_fn(&ctx);
+ the_hash_algo->update_fn(&ctx, data, data_len);
+ the_hash_algo->final_fn(got, &ctx);
+
+ return hasheq(got, data + data_len);
+}
diff --git a/csum-file.h b/csum-file.h
index 3044bd1..291215b 100644
--- a/csum-file.h
+++ b/csum-file.h
@@ -44,6 +44,9 @@
void crc32_begin(struct hashfile *);
uint32_t crc32_end(struct hashfile *);
+/* Verify checksum validity while reading. Returns non-zero on success. */
+int hashfile_checksum_valid(const unsigned char *data, size_t len);
+
/*
* Returns the total number of bytes fed to the hashfile so far (including ones
* that have not been written out to the descriptor yet).
diff --git a/midx.c b/midx.c
index 21d6a05..9a35b02 100644
--- a/midx.c
+++ b/midx.c
@@ -885,6 +885,11 @@
static void clear_midx_files_ext(struct repository *r, const char *ext,
unsigned char *keep_hash);
+static int midx_checksum_valid(struct multi_pack_index *m)
+{
+ return hashfile_checksum_valid(m->data, m->data_len);
+}
+
static int write_midx_internal(const char *object_dir, struct multi_pack_index *m,
struct string_list *packs_to_drop,
const char *preferred_pack_name,
@@ -911,6 +916,11 @@
else
ctx.m = load_multi_pack_index(object_dir, 1);
+ if (ctx.m && !midx_checksum_valid(ctx.m)) {
+ warning(_("ignoring existing multi-pack-index; checksum mismatch"));
+ ctx.m = NULL;
+ }
+
ctx.nr = 0;
ctx.alloc = ctx.m ? ctx.m->num_packs : 16;
ctx.info = NULL;
@@ -1218,6 +1228,9 @@
return result;
}
+ if (!midx_checksum_valid(m))
+ midx_report(_("incorrect checksum"));
+
if (flags & MIDX_PROGRESS)
progress = start_delayed_progress(_("Looking for referenced packfiles"),
m->num_packs);
diff --git a/pack-check.c b/pack-check.c
index 4b089fe..c8e560d 100644
--- a/pack-check.c
+++ b/pack-check.c
@@ -164,22 +164,13 @@
int verify_pack_index(struct packed_git *p)
{
- size_t len;
- const unsigned char *index_base;
- git_hash_ctx ctx;
- unsigned char hash[GIT_MAX_RAWSZ];
int err = 0;
if (open_pack_index(p))
return error("packfile %s index not opened", p->pack_name);
- index_base = p->index_data;
- len = p->index_size - the_hash_algo->rawsz;
/* Verify SHA1 sum of the index file */
- the_hash_algo->init_fn(&ctx);
- the_hash_algo->update_fn(&ctx, index_base, len);
- the_hash_algo->final_fn(hash, &ctx);
- if (!hasheq(hash, index_base + len))
+ if (!hashfile_checksum_valid(p->index_data, p->index_size))
err = error("Packfile index for %s hash mismatch",
p->pack_name);
return err;
diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh
index 5641d15..7609f1e 100755
--- a/t/t5319-multi-pack-index.sh
+++ b/t/t5319-multi-pack-index.sh
@@ -410,6 +410,19 @@
"git -c core.multipackindex=true fsck"
'
+test_expect_success 'corrupt MIDX is not reused' '
+ corrupt_midx_and_verify $MIDX_BYTE_OFFSET "\377" $objdir \
+ "incorrect object offset" &&
+ git multi-pack-index write 2>err &&
+ test_i18ngrep checksum.mismatch err &&
+ git multi-pack-index verify
+'
+
+test_expect_success 'verify incorrect checksum' '
+ pos=$(($(wc -c <$objdir/pack/multi-pack-index) - 1)) &&
+ corrupt_midx_and_verify $pos "\377" $objdir "incorrect checksum"
+'
+
test_expect_success 'repack progress off for redirected stderr' '
GIT_PROGRESS_DELAY=0 git multi-pack-index --object-dir=$objdir repack 2>err &&
test_line_count = 0 err