Merge branch 'sg/commit-graph-cleanups' into master

The changed-path Bloom filter is improved using ideas from an
independent implementation.

* sg/commit-graph-cleanups:
  commit-graph: simplify write_commit_graph_file() #2
  commit-graph: simplify write_commit_graph_file() #1
  commit-graph: simplify parse_commit_graph() #2
  commit-graph: simplify parse_commit_graph() #1
  commit-graph: clean up #includes
  diff.h: drop diff_tree_oid() & friends' return value
  commit-slab: add a function to deep free entries on the slab
  commit-graph-format.txt: all multi-byte numbers are in network byte order
  commit-graph: fix parsing the Chunk Lookup table
  tree-walk.c: don't match submodule entries for 'submod/anything'
diff --git a/Documentation/technical/commit-graph-format.txt b/Documentation/technical/commit-graph-format.txt
index 1beef17..4405410 100644
--- a/Documentation/technical/commit-graph-format.txt
+++ b/Documentation/technical/commit-graph-format.txt
@@ -32,7 +32,7 @@
 of the body. The header includes certain values, such as number of chunks
 and hash type.
 
-All 4-byte numbers are in network order.
+All multi-byte numbers are in network byte order.
 
 HEADER:
 
diff --git a/commit-graph.c b/commit-graph.c
index 1af68c2..1f37097 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -1,7 +1,5 @@
-#include "cache.h"
-#include "config.h"
-#include "dir.h"
 #include "git-compat-util.h"
+#include "config.h"
 #include "lockfile.h"
 #include "pack.h"
 #include "packfile.h"
@@ -285,8 +283,7 @@
 	const unsigned char *data, *chunk_lookup;
 	uint32_t i;
 	struct commit_graph *graph;
-	uint64_t last_chunk_offset;
-	uint32_t last_chunk_id;
+	uint64_t next_chunk_offset;
 	uint32_t graph_signature;
 	unsigned char graph_version, hash_version;
 
@@ -326,24 +323,26 @@
 	graph->data = graph_map;
 	graph->data_len = graph_size;
 
-	last_chunk_id = 0;
-	last_chunk_offset = 8;
+	if (graph_size < GRAPH_HEADER_SIZE +
+			 (graph->num_chunks + 1) * GRAPH_CHUNKLOOKUP_WIDTH +
+			 GRAPH_FANOUT_SIZE + the_hash_algo->rawsz) {
+		error(_("commit-graph file is too small to hold %u chunks"),
+		      graph->num_chunks);
+		free(graph);
+		return NULL;
+	}
+
 	chunk_lookup = data + 8;
+	next_chunk_offset = get_be64(chunk_lookup + 4);
 	for (i = 0; i < graph->num_chunks; i++) {
 		uint32_t chunk_id;
-		uint64_t chunk_offset;
+		uint64_t chunk_offset = next_chunk_offset;
 		int chunk_repeated = 0;
 
-		if (data + graph_size - chunk_lookup <
-		    GRAPH_CHUNKLOOKUP_WIDTH) {
-			error(_("commit-graph chunk lookup table entry missing; file may be incomplete"));
-			goto free_and_return;
-		}
-
 		chunk_id = get_be32(chunk_lookup + 0);
-		chunk_offset = get_be64(chunk_lookup + 4);
 
 		chunk_lookup += GRAPH_CHUNKLOOKUP_WIDTH;
+		next_chunk_offset = get_be64(chunk_lookup + 4);
 
 		if (chunk_offset > graph_size - the_hash_algo->rawsz) {
 			error(_("commit-graph improper chunk offset %08x%08x"), (uint32_t)(chunk_offset >> 32),
@@ -362,8 +361,11 @@
 		case GRAPH_CHUNKID_OIDLOOKUP:
 			if (graph->chunk_oid_lookup)
 				chunk_repeated = 1;
-			else
+			else {
 				graph->chunk_oid_lookup = data + chunk_offset;
+				graph->num_commits = (next_chunk_offset - chunk_offset)
+						     / graph->hash_len;
+			}
 			break;
 
 		case GRAPH_CHUNKID_DATA:
@@ -417,15 +419,6 @@
 			error(_("commit-graph chunk id %08x appears multiple times"), chunk_id);
 			goto free_and_return;
 		}
-
-		if (last_chunk_id == GRAPH_CHUNKID_OIDLOOKUP)
-		{
-			graph->num_commits = (chunk_offset - last_chunk_offset)
-					     / graph->hash_len;
-		}
-
-		last_chunk_id = chunk_id;
-		last_chunk_offset = chunk_offset;
 	}
 
 	if (graph->chunk_bloom_indexes && graph->chunk_bloom_data) {
@@ -1586,17 +1579,22 @@
 	return 0;
 }
 
+struct chunk_info {
+	uint32_t id;
+	uint64_t size;
+};
+
 static int write_commit_graph_file(struct write_commit_graph_context *ctx)
 {
 	uint32_t i;
 	int fd;
 	struct hashfile *f;
 	struct lock_file lk = LOCK_INIT;
-	uint32_t chunk_ids[MAX_NUM_CHUNKS + 1];
-	uint64_t chunk_offsets[MAX_NUM_CHUNKS + 1];
+	struct chunk_info chunks[MAX_NUM_CHUNKS + 1];
 	const unsigned hashsz = the_hash_algo->rawsz;
 	struct strbuf progress_title = STRBUF_INIT;
 	int num_chunks = 3;
+	uint64_t chunk_offset;
 	struct object_id file_hash;
 	const struct bloom_filter_settings bloom_settings = DEFAULT_BLOOM_FILTER_SETTINGS;
 
@@ -1644,51 +1642,34 @@
 		f = hashfd(lk.tempfile->fd, lk.tempfile->filename.buf);
 	}
 
-	chunk_ids[0] = GRAPH_CHUNKID_OIDFANOUT;
-	chunk_ids[1] = GRAPH_CHUNKID_OIDLOOKUP;
-	chunk_ids[2] = GRAPH_CHUNKID_DATA;
+	chunks[0].id = GRAPH_CHUNKID_OIDFANOUT;
+	chunks[0].size = GRAPH_FANOUT_SIZE;
+	chunks[1].id = GRAPH_CHUNKID_OIDLOOKUP;
+	chunks[1].size = hashsz * ctx->commits.nr;
+	chunks[2].id = GRAPH_CHUNKID_DATA;
+	chunks[2].size = (hashsz + 16) * ctx->commits.nr;
 	if (ctx->num_extra_edges) {
-		chunk_ids[num_chunks] = GRAPH_CHUNKID_EXTRAEDGES;
+		chunks[num_chunks].id = GRAPH_CHUNKID_EXTRAEDGES;
+		chunks[num_chunks].size = 4 * ctx->num_extra_edges;
 		num_chunks++;
 	}
 	if (ctx->changed_paths) {
-		chunk_ids[num_chunks] = GRAPH_CHUNKID_BLOOMINDEXES;
+		chunks[num_chunks].id = GRAPH_CHUNKID_BLOOMINDEXES;
+		chunks[num_chunks].size = sizeof(uint32_t) * ctx->commits.nr;
 		num_chunks++;
-		chunk_ids[num_chunks] = GRAPH_CHUNKID_BLOOMDATA;
+		chunks[num_chunks].id = GRAPH_CHUNKID_BLOOMDATA;
+		chunks[num_chunks].size = sizeof(uint32_t) * 3
+					  + ctx->total_bloom_filter_data_size;
 		num_chunks++;
 	}
 	if (ctx->num_commit_graphs_after > 1) {
-		chunk_ids[num_chunks] = GRAPH_CHUNKID_BASE;
+		chunks[num_chunks].id = GRAPH_CHUNKID_BASE;
+		chunks[num_chunks].size = hashsz * (ctx->num_commit_graphs_after - 1);
 		num_chunks++;
 	}
 
-	chunk_ids[num_chunks] = 0;
-
-	chunk_offsets[0] = 8 + (num_chunks + 1) * GRAPH_CHUNKLOOKUP_WIDTH;
-	chunk_offsets[1] = chunk_offsets[0] + GRAPH_FANOUT_SIZE;
-	chunk_offsets[2] = chunk_offsets[1] + hashsz * ctx->commits.nr;
-	chunk_offsets[3] = chunk_offsets[2] + (hashsz + 16) * ctx->commits.nr;
-
-	num_chunks = 3;
-	if (ctx->num_extra_edges) {
-		chunk_offsets[num_chunks + 1] = chunk_offsets[num_chunks] +
-						4 * ctx->num_extra_edges;
-		num_chunks++;
-	}
-	if (ctx->changed_paths) {
-		chunk_offsets[num_chunks + 1] = chunk_offsets[num_chunks] +
-						sizeof(uint32_t) * ctx->commits.nr;
-		num_chunks++;
-
-		chunk_offsets[num_chunks + 1] = chunk_offsets[num_chunks] +
-						sizeof(uint32_t) * 3 + ctx->total_bloom_filter_data_size;
-		num_chunks++;
-	}
-	if (ctx->num_commit_graphs_after > 1) {
-		chunk_offsets[num_chunks + 1] = chunk_offsets[num_chunks] +
-						hashsz * (ctx->num_commit_graphs_after - 1);
-		num_chunks++;
-	}
+	chunks[num_chunks].id = 0;
+	chunks[num_chunks].size = 0;
 
 	hashwrite_be32(f, GRAPH_SIGNATURE);
 
@@ -1697,13 +1678,16 @@
 	hashwrite_u8(f, num_chunks);
 	hashwrite_u8(f, ctx->num_commit_graphs_after - 1);
 
+	chunk_offset = 8 + (num_chunks + 1) * GRAPH_CHUNKLOOKUP_WIDTH;
 	for (i = 0; i <= num_chunks; i++) {
 		uint32_t chunk_write[3];
 
-		chunk_write[0] = htonl(chunk_ids[i]);
-		chunk_write[1] = htonl(chunk_offsets[i] >> 32);
-		chunk_write[2] = htonl(chunk_offsets[i] & 0xffffffff);
+		chunk_write[0] = htonl(chunks[i].id);
+		chunk_write[1] = htonl(chunk_offset >> 32);
+		chunk_write[2] = htonl(chunk_offset & 0xffffffff);
 		hashwrite(f, chunk_write, 12);
+
+		chunk_offset += chunks[i].size;
 	}
 
 	if (ctx->report_progress) {
diff --git a/commit-graph.h b/commit-graph.h
index 28f89cd..cf8d3c9 100644
--- a/commit-graph.h
+++ b/commit-graph.h
@@ -2,9 +2,6 @@
 #define COMMIT_GRAPH_H
 
 #include "git-compat-util.h"
-#include "repository.h"
-#include "string-list.h"
-#include "cache.h"
 #include "object-store.h"
 #include "oidset.h"
 
@@ -23,6 +20,9 @@
 
 struct commit;
 struct bloom_filter_settings;
+struct repository;
+struct raw_object_store;
+struct string_list;
 
 char *get_commit_graph_filename(struct object_directory *odb);
 int open_commit_graph(const char *graph_file, int *fd, struct stat *st);
diff --git a/commit-slab-decl.h b/commit-slab-decl.h
index bfbed15..98de2c9 100644
--- a/commit-slab-decl.h
+++ b/commit-slab-decl.h
@@ -32,6 +32,7 @@
 void init_ ##slabname## _with_stride(struct slabname *s, unsigned stride); \
 void init_ ##slabname(struct slabname *s);				\
 void clear_ ##slabname(struct slabname *s);				\
+void deep_clear_ ##slabname(struct slabname *s, void (*free_fn)(elemtype *ptr)); \
 elemtype *slabname## _at_peek(struct slabname *s, const struct commit *c, int add_if_missing); \
 elemtype *slabname## _at(struct slabname *s, const struct commit *c);	\
 elemtype *slabname## _peek(struct slabname *s, const struct commit *c)
diff --git a/commit-slab-impl.h b/commit-slab-impl.h
index 5c0eb91..557738d 100644
--- a/commit-slab-impl.h
+++ b/commit-slab-impl.h
@@ -38,6 +38,19 @@
 	FREE_AND_NULL(s->slab);						\
 }									\
 									\
+scope void deep_clear_ ##slabname(struct slabname *s, void (*free_fn)(elemtype *)) \
+{									\
+	unsigned int i;							\
+	for (i = 0; i < s->slab_count; i++) {				\
+		unsigned int j;						\
+		if (!s->slab[i])					\
+			continue;					\
+		for (j = 0; j < s->slab_size; j++)			\
+			free_fn(&s->slab[i][j * s->stride]);		\
+	}								\
+	clear_ ##slabname(s);						\
+}									\
+									\
 scope elemtype *slabname## _at_peek(struct slabname *s,			\
 						  const struct commit *c, \
 						  int add_if_missing)   \
diff --git a/commit-slab.h b/commit-slab.h
index 05b3f28..8e72a30 100644
--- a/commit-slab.h
+++ b/commit-slab.h
@@ -47,6 +47,16 @@
  *
  *   Call this function before the slab falls out of scope to avoid
  *   leaking memory.
+ *
+ * - void deep_clear_indegree(struct indegree *, void (*free_fn)(int*))
+ *
+ *   Empties the slab, similar to clear_indegree(), but in addition it
+ *   calls the given 'free_fn' for each slab entry to release any
+ *   additional memory that might be owned by the entry (but not the
+ *   entry itself!).
+ *   Note that 'free_fn' might be called even for entries for which no
+ *   indegree_at() call has been made; in this case 'free_fn' is invoked
+ *   with a pointer to a zero-initialized location.
  */
 
 #define define_commit_slab(slabname, elemtype) \
diff --git a/diff.h b/diff.h
index 9443dc1..e0c0af6 100644
--- a/diff.h
+++ b/diff.h
@@ -431,11 +431,11 @@
 	struct combine_diff_path *p, const struct object_id *oid,
 	const struct object_id **parents_oid, int nparent,
 	struct strbuf *base, struct diff_options *opt);
-int diff_tree_oid(const struct object_id *old_oid,
-		  const struct object_id *new_oid,
-		  const char *base, struct diff_options *opt);
-int diff_root_tree_oid(const struct object_id *new_oid, const char *base,
-		       struct diff_options *opt);
+void diff_tree_oid(const struct object_id *old_oid,
+		   const struct object_id *new_oid,
+		   const char *base, struct diff_options *opt);
+void diff_root_tree_oid(const struct object_id *new_oid, const char *base,
+			struct diff_options *opt);
 
 struct combine_diff_path {
 	struct combine_diff_path *next;
diff --git a/revision.c b/revision.c
index 6aa7f4f..049008c 100644
--- a/revision.c
+++ b/revision.c
@@ -791,9 +791,7 @@
 
 	tree_difference = REV_TREE_SAME;
 	revs->pruning.flags.has_changes = 0;
-	if (diff_tree_oid(&t1->object.oid, &t2->object.oid, "",
-			   &revs->pruning) < 0)
-		return REV_TREE_DIFFERENT;
+	diff_tree_oid(&t1->object.oid, &t2->object.oid, "", &revs->pruning);
 
 	if (!nth_parent)
 		if (bloom_ret == 1 && tree_difference == REV_TREE_SAME)
@@ -804,7 +802,6 @@
 
 static int rev_same_tree_as_empty(struct rev_info *revs, struct commit *commit)
 {
-	int retval;
 	struct tree *t1 = get_commit_tree(commit);
 
 	if (!t1)
@@ -812,9 +809,9 @@
 
 	tree_difference = REV_TREE_SAME;
 	revs->pruning.flags.has_changes = 0;
-	retval = diff_tree_oid(NULL, &t1->object.oid, "", &revs->pruning);
+	diff_tree_oid(NULL, &t1->object.oid, "", &revs->pruning);
 
-	return retval >= 0 && (tree_difference == REV_TREE_SAME);
+	return tree_difference == REV_TREE_SAME;
 }
 
 struct treesame_state {
diff --git a/shallow.c b/shallow.c
index b826de9..91b9e10 100644
--- a/shallow.c
+++ b/shallow.c
@@ -110,6 +110,10 @@
  * supports a "valid" flag.
  */
 define_commit_slab(commit_depth, int *);
+static void free_depth_in_slab(int **ptr)
+{
+	FREE_AND_NULL(*ptr);
+}
 struct commit_list *get_shallow_commits(struct object_array *heads, int depth,
 		int shallow_flag, int not_shallow_flag)
 {
@@ -176,15 +180,7 @@
 			}
 		}
 	}
-	for (i = 0; i < depths.slab_count; i++) {
-		int j;
-
-		if (!depths.slab[i])
-			continue;
-		for (j = 0; j < depths.slab_size; j++)
-			free(depths.slab[i][j]);
-	}
-	clear_commit_depth(&depths);
+	deep_clear_commit_depth(&depths, free_depth_in_slab);
 
 	return result;
 }
diff --git a/t/t4010-diff-pathspec.sh b/t/t4010-diff-pathspec.sh
index e5ca359..65cc703 100755
--- a/t/t4010-diff-pathspec.sh
+++ b/t/t4010-diff-pathspec.sh
@@ -125,7 +125,9 @@
 test_expect_success 'diff-tree ignores trailing slash on submodule path' '
 	git diff --name-only HEAD^ HEAD submod >expect &&
 	git diff --name-only HEAD^ HEAD submod/ >actual &&
-	test_cmp expect actual
+	test_cmp expect actual &&
+	git diff --name-only HEAD^ HEAD -- submod/whatever >actual &&
+	test_must_be_empty actual
 '
 
 test_expect_success 'diff multiple wildcard pathspecs' '
diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh
index 26f332d..50d5f08 100755
--- a/t/t5318-commit-graph.sh
+++ b/t/t5318-commit-graph.sh
@@ -529,7 +529,7 @@
 '
 
 test_expect_success 'detect low chunk count' '
-	corrupt_graph_and_verify $GRAPH_BYTE_CHUNK_COUNT "\02" \
+	corrupt_graph_and_verify $GRAPH_BYTE_CHUNK_COUNT "\01" \
 		"missing the .* chunk"
 '
 
@@ -615,7 +615,8 @@
 
 test_expect_success 'detect incorrect chunk count' '
 	corrupt_graph_and_verify $GRAPH_BYTE_CHUNK_COUNT "\377" \
-		"chunk lookup table entry missing" $GRAPH_CHUNK_LOOKUP_OFFSET
+		"commit-graph file is too small to hold [0-9]* chunks" \
+		$GRAPH_CHUNK_LOOKUP_OFFSET
 '
 
 test_expect_success 'git fsck (checks commit-graph)' '
diff --git a/tree-diff.c b/tree-diff.c
index f3d303c..6ebad1a 100644
--- a/tree-diff.c
+++ b/tree-diff.c
@@ -29,9 +29,9 @@
 	struct combine_diff_path *p, const struct object_id *oid,
 	const struct object_id **parents_oid, int nparent,
 	struct strbuf *base, struct diff_options *opt);
-static int ll_diff_tree_oid(const struct object_id *old_oid,
-			    const struct object_id *new_oid,
-			    struct strbuf *base, struct diff_options *opt);
+static void ll_diff_tree_oid(const struct object_id *old_oid,
+			     const struct object_id *new_oid,
+			     struct strbuf *base, struct diff_options *opt);
 
 /*
  * Compare two tree entries, taking into account only path/S_ISDIR(mode),
@@ -679,9 +679,9 @@
 	q->nr = 1;
 }
 
-static int ll_diff_tree_oid(const struct object_id *old_oid,
-			    const struct object_id *new_oid,
-			    struct strbuf *base, struct diff_options *opt)
+static void ll_diff_tree_oid(const struct object_id *old_oid,
+			     const struct object_id *new_oid,
+			     struct strbuf *base, struct diff_options *opt)
 {
 	struct combine_diff_path phead, *p;
 	pathchange_fn_t pathchange_old = opt->pathchange;
@@ -697,29 +697,27 @@
 	}
 
 	opt->pathchange = pathchange_old;
-	return 0;
 }
 
-int diff_tree_oid(const struct object_id *old_oid,
-		  const struct object_id *new_oid,
-		  const char *base_str, struct diff_options *opt)
+void diff_tree_oid(const struct object_id *old_oid,
+		   const struct object_id *new_oid,
+		   const char *base_str, struct diff_options *opt)
 {
 	struct strbuf base;
-	int retval;
 
 	strbuf_init(&base, PATH_MAX);
 	strbuf_addstr(&base, base_str);
 
-	retval = ll_diff_tree_oid(old_oid, new_oid, &base, opt);
+	ll_diff_tree_oid(old_oid, new_oid, &base, opt);
 	if (!*base_str && opt->flags.follow_renames && diff_might_be_rename())
 		try_to_follow_renames(old_oid, new_oid, &base, opt);
 
 	strbuf_release(&base);
-
-	return retval;
 }
 
-int diff_root_tree_oid(const struct object_id *new_oid, const char *base, struct diff_options *opt)
+void diff_root_tree_oid(const struct object_id *new_oid,
+			const char *base,
+			struct diff_options *opt)
 {
-	return diff_tree_oid(NULL, new_oid, base, opt);
+	diff_tree_oid(NULL, new_oid, base, opt);
 }
diff --git a/tree-walk.c b/tree-walk.c
index bb0ad34..0160294 100644
--- a/tree-walk.c
+++ b/tree-walk.c
@@ -851,7 +851,14 @@
 	if (matchlen > pathlen) {
 		if (match[pathlen] != '/')
 			return 0;
-		if (!S_ISDIR(entry->mode) && !S_ISGITLINK(entry->mode))
+		/*
+		 * Reject non-directories as partial pathnames, except
+		 * when match is a submodule with a trailing slash and
+		 * nothing else (to handle 'submod/' and 'submod'
+		 * uniformly).
+		 */
+		if (!S_ISDIR(entry->mode) &&
+		    (!S_ISGITLINK(entry->mode) || matchlen > pathlen + 1))
 			return 0;
 	}