commit-graph: persist existence of changed-paths

The changed-path Bloom filters were released in v2.27.0, but have a
significant drawback. A user can opt-in to writing the changed-path
filters using the "--changed-paths" option to "git commit-graph write"
but the next write will drop the filters unless that option is
specified.

This becomes even more important when considering the interaction with
gc.writeCommitGraph (on by default) or fetch.writeCommitGraph (part of
features.experimental). These config options trigger commit-graph writes
that the user did not signal, and hence there is no --changed-paths
option available.

Allow a user that opts-in to the changed-path filters to persist the
property of "my commit-graph has changed-path filters" automatically. A
user can drop filters using the --no-changed-paths option.

In the process, we need to be extremely careful to match the Bloom
filter settings as specified by the commit-graph. This will allow future
versions of Git to customize these settings, and the version with this
change will persist those settings as commit-graphs are rewritten on
top.

Use the trace2 API to signal the settings used during the write, and
check that output in a test after manually adjusting the correct bytes
in the commit-graph file.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
diff --git a/Documentation/git-commit-graph.txt b/Documentation/git-commit-graph.txt
index f4b13c0..369b222 100644
--- a/Documentation/git-commit-graph.txt
+++ b/Documentation/git-commit-graph.txt
@@ -60,7 +60,10 @@
 With the `--changed-paths` option, compute and write information about the
 paths changed between a commit and it's first parent. This operation can
 take a while on large repositories. It provides significant performance gains
-for getting history of a directory or a file with `git log -- <path>`.
+for getting history of a directory or a file with `git log -- <path>`. If
+this option is given, future commit-graph writes will automatically assume
+that this option was intended. Use `--no-changed-paths` to stop storing this
+data.
 +
 With the `--split` option, write the commit-graph as a chain of multiple
 commit-graph files stored in `<dir>/info/commit-graphs`. The new commits
diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c
index 5900983..ff7b177 100644
--- a/builtin/commit-graph.c
+++ b/builtin/commit-graph.c
@@ -151,6 +151,7 @@
 	};
 
 	opts.progress = isatty(2);
+	opts.enable_changed_paths = -1;
 	split_opts.size_multiple = 2;
 	split_opts.max_commits = 0;
 	split_opts.expire_time = 0;
@@ -171,7 +172,9 @@
 		flags |= COMMIT_GRAPH_WRITE_SPLIT;
 	if (opts.progress)
 		flags |= COMMIT_GRAPH_WRITE_PROGRESS;
-	if (opts.enable_changed_paths ||
+	if (!opts.enable_changed_paths)
+		flags |= COMMIT_GRAPH_NO_WRITE_BLOOM_FILTERS;
+	if (opts.enable_changed_paths == 1 ||
 	    git_env_bool(GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS, 0))
 		flags |= COMMIT_GRAPH_WRITE_BLOOM_FILTERS;
 
diff --git a/commit-graph.c b/commit-graph.c
index 50ce039..6762704 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -16,6 +16,8 @@
 #include "progress.h"
 #include "bloom.h"
 #include "commit-slab.h"
+#include "json-writer.h"
+#include "trace2.h"
 
 void git_test_write_commit_graph_or_die(void)
 {
@@ -1108,6 +1110,21 @@
 	stop_progress(&progress);
 }
 
+static void trace2_bloom_filter_settings(struct write_commit_graph_context *ctx)
+{
+	struct json_writer jw = JSON_WRITER_INIT;
+
+	jw_object_begin(&jw, 0);
+	jw_object_intmax(&jw, "hash_version", ctx->bloom_settings->hash_version);
+	jw_object_intmax(&jw, "num_hashes", ctx->bloom_settings->num_hashes);
+	jw_object_intmax(&jw, "bits_per_entry", ctx->bloom_settings->bits_per_entry);
+	jw_end(&jw);
+
+	trace2_data_json("bloom", ctx->r, "settings", &jw);
+
+	jw_release(&jw);
+}
+
 static void write_graph_chunk_bloom_data(struct hashfile *f,
 					 struct write_commit_graph_context *ctx)
 {
@@ -1116,6 +1133,8 @@
 	struct progress *progress = NULL;
 	int i = 0;
 
+	trace2_bloom_filter_settings(ctx);
+
 	if (ctx->report_progress)
 		progress = start_delayed_progress(
 			_("Writing changed paths Bloom filters data"),
@@ -1547,9 +1566,15 @@
 	int num_chunks = 3;
 	uint64_t chunk_offset;
 	struct object_id file_hash;
-	const struct bloom_filter_settings bloom_settings = DEFAULT_BLOOM_FILTER_SETTINGS;
+	struct bloom_filter_settings bloom_settings = DEFAULT_BLOOM_FILTER_SETTINGS;
 
-	ctx->bloom_settings = &bloom_settings;
+	if (!ctx->bloom_settings) {
+		bloom_settings.bits_per_entry = git_env_ulong("GIT_TEST_BLOOM_SETTINGS_BITS_PER_ENTRY",
+							      bloom_settings.bits_per_entry);
+		bloom_settings.num_hashes = git_env_ulong("GIT_TEST_BLOOM_SETTINGS_NUM_HASHES",
+							  bloom_settings.num_hashes);
+		ctx->bloom_settings = &bloom_settings;
+	}
 
 	if (ctx->split) {
 		struct strbuf tmp_file = STRBUF_INIT;
@@ -1974,9 +1999,23 @@
 	ctx->split = flags & COMMIT_GRAPH_WRITE_SPLIT ? 1 : 0;
 	ctx->check_oids = flags & COMMIT_GRAPH_WRITE_CHECK_OIDS ? 1 : 0;
 	ctx->split_opts = split_opts;
-	ctx->changed_paths = flags & COMMIT_GRAPH_WRITE_BLOOM_FILTERS ? 1 : 0;
 	ctx->total_bloom_filter_data_size = 0;
 
+	if (flags & COMMIT_GRAPH_WRITE_BLOOM_FILTERS)
+		ctx->changed_paths = 1;
+	if (!(flags & COMMIT_GRAPH_NO_WRITE_BLOOM_FILTERS)) {
+		struct commit_graph *g;
+		prepare_commit_graph_one(ctx->r, ctx->odb);
+
+		g = ctx->r->objects->commit_graph;
+
+		/* We have changed-paths already. Keep them in the next graph */
+		if (g && g->chunk_bloom_data) {
+			ctx->changed_paths = 1;
+			ctx->bloom_settings = g->bloom_filter_settings;
+		}
+	}
+
 	if (ctx->split) {
 		struct commit_graph *g;
 		prepare_commit_graph(ctx->r);
diff --git a/commit-graph.h b/commit-graph.h
index f0fb13e..45b1e5b 100644
--- a/commit-graph.h
+++ b/commit-graph.h
@@ -96,6 +96,7 @@
 	/* Make sure that each OID in the input is a valid commit OID. */
 	COMMIT_GRAPH_WRITE_CHECK_OIDS = (1 << 3),
 	COMMIT_GRAPH_WRITE_BLOOM_FILTERS = (1 << 4),
+	COMMIT_GRAPH_NO_WRITE_BLOOM_FILTERS = (1 << 5),
 };
 
 struct split_commit_graph_opts {
diff --git a/t/t4216-log-bloom.sh b/t/t4216-log-bloom.sh
index 2761208..52ad998 100755
--- a/t/t4216-log-bloom.sh
+++ b/t/t4216-log-bloom.sh
@@ -126,7 +126,7 @@
 	test_commit c14 A/anotherFile2 &&
 	test_commit c15 A/B/anotherFile2 &&
 	test_commit c16 A/B/C/anotherFile2 &&
-	GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS=0 git commit-graph write --reachable --split &&
+	git commit-graph write --reachable --split --no-changed-paths &&
 	test_line_count = 2 .git/objects/info/commit-graphs/commit-graph-chain
 '
 
@@ -152,6 +152,21 @@
 	test_bloom_filters_used_when_some_filters_are_missing "-- A/B"
 '
 
+test_expect_success 'persist filter settings' '
+	test_when_finished rm -rf .git/objects/info/commit-graph* &&
+	rm -rf .git/objects/info/commit-graph* &&
+	GIT_TRACE2_EVENT="$(pwd)/trace2.txt" \
+		GIT_TRACE2_EVENT_NESTING=5 \
+		GIT_TEST_BLOOM_SETTINGS_NUM_HASHES=9 \
+		GIT_TEST_BLOOM_SETTINGS_BITS_PER_ENTRY=15 \
+		git commit-graph write --reachable --changed-paths &&
+	grep "{\"hash_version\":1,\"num_hashes\":9,\"bits_per_entry\":15}" trace2.txt &&
+	GIT_TRACE2_EVENT="$(pwd)/trace2-auto.txt" \
+		GIT_TRACE2_EVENT_NESTING=5 \
+		git commit-graph write --reachable --changed-paths &&
+	grep "{\"hash_version\":1,\"num_hashes\":9,\"bits_per_entry\":15}" trace2-auto.txt
+'
+
 test_expect_success 'correctly report changes over limit' '
 	git init 513changes &&
 	(