Merge branch 'ab/pickaxe-pcre2'

Rewrite the backend for "diff -G/-S" to use pcre2 engine when
available.

* ab/pickaxe-pcre2: (22 commits)
  xdiff-interface: replace discard_hunk_line() with a flag
  xdiff users: use designated initializers for out_line
  pickaxe -G: don't special-case create/delete
  pickaxe -G: terminate early on matching lines
  xdiff-interface: allow early return from xdiff_emit_line_fn
  xdiff-interface: prepare for allowing early return
  pickaxe -S: slightly optimize contains()
  pickaxe: rename variables in has_changes() for brevity
  pickaxe -S: support content with NULs under --pickaxe-regex
  pickaxe: assert that we must have a needle under -G or -S
  pickaxe: refactor function selection in diffcore-pickaxe()
  perf: add performance test for pickaxe
  pickaxe/style: consolidate declarations and assignments
  diff.h: move pickaxe fields together again
  pickaxe: die when --find-object and --pickaxe-all are combined
  pickaxe: die when -G and --pickaxe-regex are combined
  pickaxe tests: add missing test for --no-pickaxe-regex being an error
  pickaxe tests: test for -G, -S and --find-object incompatibility
  pickaxe tests: add test for "log -S" not being a regex
  pickaxe tests: add test for diffgrep_consume() internals
  ...
diff --git a/builtin/merge-tree.c b/builtin/merge-tree.c
index de85207..5dc94d6 100644
--- a/builtin/merge-tree.c
+++ b/builtin/merge-tree.c
@@ -107,15 +107,12 @@
 	mmfile_t src, dst;
 	xpparam_t xpp;
 	xdemitconf_t xecfg;
-	xdemitcb_t ecb;
+	xdemitcb_t ecb = { .out_line = show_outf };
 
 	memset(&xpp, 0, sizeof(xpp));
 	xpp.flags = 0;
 	memset(&xecfg, 0, sizeof(xecfg));
 	xecfg.ctxlen = 3;
-	ecb.out_hunk = NULL;
-	ecb.out_line = show_outf;
-	ecb.priv = NULL;
 
 	src.ptr = origin(entry, &size);
 	if (!src.ptr)
diff --git a/builtin/rerere.c b/builtin/rerere.c
index fd3be17..83d7a77 100644
--- a/builtin/rerere.c
+++ b/builtin/rerere.c
@@ -28,7 +28,7 @@
 {
 	xpparam_t xpp;
 	xdemitconf_t xecfg;
-	xdemitcb_t ecb;
+	xdemitcb_t ecb = { .out_line = outf };
 	mmfile_t minus, plus;
 	int ret;
 
@@ -41,8 +41,6 @@
 	xpp.flags = 0;
 	memset(&xecfg, 0, sizeof(xecfg));
 	xecfg.ctxlen = 3;
-	ecb.out_hunk = NULL;
-	ecb.out_line = outf;
 	ret = xdi_diff(&minus, &plus, &xpp, &xecfg, &ecb);
 
 	free(minus.ptr);
diff --git a/combine-diff.c b/combine-diff.c
index 7d925ce..d93782d 100644
--- a/combine-diff.c
+++ b/combine-diff.c
@@ -403,11 +403,11 @@
 	state->sline[state->nb-1].p_lno[state->n] = state->ob;
 }
 
-static void consume_line(void *state_, char *line, unsigned long len)
+static int consume_line(void *state_, char *line, unsigned long len)
 {
 	struct combine_diff_state *state = state_;
 	if (!state->lost_bucket)
-		return; /* not in any hunk yet */
+		return 0; /* not in any hunk yet */
 	switch (line[0]) {
 	case '-':
 		append_lost(state->lost_bucket, state->n, line+1, len-1);
@@ -417,6 +417,7 @@
 		state->lno++;
 		break;
 	}
+	return 0;
 }
 
 static void combine_diff(struct repository *r,
diff --git a/diff.c b/diff.c
index 52c7915..260dc37 100644
--- a/diff.c
+++ b/diff.c
@@ -2340,7 +2340,7 @@
 	ecbdata->lno_in_postimage = strtol(p + 1, NULL, 10);
 }
 
-static void fn_out_consume(void *priv, char *line, unsigned long len)
+static int fn_out_consume(void *priv, char *line, unsigned long len)
 {
 	struct emit_callback *ecbdata = priv;
 	struct diff_options *o = ecbdata->opt;
@@ -2376,7 +2376,7 @@
 		len = sane_truncate_line(line, len);
 		find_lno(line, ecbdata);
 		emit_hunk_header(ecbdata, line, len);
-		return;
+		return 0;
 	}
 
 	if (ecbdata->diff_words) {
@@ -2386,11 +2386,11 @@
 		if (line[0] == '-') {
 			diff_words_append(line, len,
 					  &ecbdata->diff_words->minus);
-			return;
+			return 0;
 		} else if (line[0] == '+') {
 			diff_words_append(line, len,
 					  &ecbdata->diff_words->plus);
-			return;
+			return 0;
 		} else if (starts_with(line, "\\ ")) {
 			/*
 			 * Eat the "no newline at eof" marker as if we
@@ -2399,11 +2399,11 @@
 			 * defer processing. If this is the end of
 			 * preimage, more "+" lines may come after it.
 			 */
-			return;
+			return 0;
 		}
 		diff_words_flush(ecbdata);
 		emit_diff_symbol(o, s, line, len, 0);
-		return;
+		return 0;
 	}
 
 	switch (line[0]) {
@@ -2427,6 +2427,7 @@
 				 line, len, 0);
 		break;
 	}
+	return 0;
 }
 
 static void pprint_rename(struct strbuf *name, const char *a, const char *b)
@@ -2526,7 +2527,7 @@
 	return x;
 }
 
-static void diffstat_consume(void *priv, char *line, unsigned long len)
+static int diffstat_consume(void *priv, char *line, unsigned long len)
 {
 	struct diffstat_t *diffstat = priv;
 	struct diffstat_file *x = diffstat->files[diffstat->nr - 1];
@@ -2535,6 +2536,7 @@
 		x->added++;
 	else if (line[0] == '-')
 		x->deleted++;
+	return 0;
 }
 
 const char mime_boundary_leader[] = "------------";
@@ -3212,7 +3214,7 @@
 	data->lineno = nb - 1;
 }
 
-static void checkdiff_consume(void *priv, char *line, unsigned long len)
+static int checkdiff_consume(void *priv, char *line, unsigned long len)
 {
 	struct checkdiff_t *data = priv;
 	int marker_size = data->conflict_marker_size;
@@ -3236,7 +3238,7 @@
 		}
 		bad = ws_check(line + 1, len - 1, data->ws_rule);
 		if (!bad)
-			return;
+			return 0;
 		data->status |= bad;
 		err = whitespace_error_string(bad);
 		fprintf(data->o->file, "%s%s:%d: %s.\n",
@@ -3248,6 +3250,7 @@
 	} else if (line[0] == ' ') {
 		data->lineno++;
 	}
+	return 0;
 }
 
 static unsigned char *deflate_it(char *data,
@@ -3726,7 +3729,8 @@
 		xpp.anchors_nr = o->anchors_nr;
 		xecfg.ctxlen = o->context;
 		xecfg.interhunkctxlen = o->interhunkcontext;
-		if (xdi_diff_outf(&mf1, &mf2, discard_hunk_line,
+		xecfg.flags = XDL_EMIT_NO_HUNK_HDR;
+		if (xdi_diff_outf(&mf1, &mf2, NULL,
 				  diffstat_consume, diffstat, &xpp, &xecfg))
 			die("unable to generate diffstat for %s", one->path);
 
@@ -4632,6 +4636,12 @@
 	if (HAS_MULTI_BITS(options->pickaxe_opts & DIFF_PICKAXE_KINDS_MASK))
 		die(_("-G, -S and --find-object are mutually exclusive"));
 
+	if (HAS_MULTI_BITS(options->pickaxe_opts & DIFF_PICKAXE_KINDS_G_REGEX_MASK))
+		die(_("-G and --pickaxe-regex are mutually exclusive, use --pickaxe-regex with -S"));
+
+	if (HAS_MULTI_BITS(options->pickaxe_opts & DIFF_PICKAXE_KINDS_ALL_OBJFIND_MASK))
+		die(_("---pickaxe-all and --find-object are mutually exclusive, use --pickaxe-all with -G and -S"));
+
 	/*
 	 * Most of the time we can say "there are changes"
 	 * only by checking if there are changed paths, but
@@ -6119,17 +6129,18 @@
 	}
 }
 
-static void patch_id_consume(void *priv, char *line, unsigned long len)
+static int patch_id_consume(void *priv, char *line, unsigned long len)
 {
 	struct patch_id_t *data = priv;
 	int new_len;
 
 	if (len > 12 && starts_with(line, "\\ "))
-		return;
+		return 0;
 	new_len = remove_space(line, len);
 
 	the_hash_algo->update_fn(data->ctx, line, new_len);
 	data->patchlen += new_len;
+	return 0;
 }
 
 static void patch_id_add_string(git_hash_ctx *ctx, const char *str)
@@ -6227,8 +6238,8 @@
 
 		xpp.flags = 0;
 		xecfg.ctxlen = 3;
-		xecfg.flags = 0;
-		if (xdi_diff_outf(&mf1, &mf2, discard_hunk_line,
+		xecfg.flags = XDL_EMIT_NO_HUNK_HDR;
+		if (xdi_diff_outf(&mf1, &mf2, NULL,
 				  patch_id_consume, &data, &xpp, &xecfg))
 			return error("unable to generate patch-id diff for %s",
 				     p->one->path);
diff --git a/diff.h b/diff.h
index c8f3fae..8ba85c5 100644
--- a/diff.h
+++ b/diff.h
@@ -265,6 +265,7 @@
 	 * postimage of the diff_queue.
 	 */
 	const char *pickaxe;
+	unsigned pickaxe_opts;
 
 	/* -I<regex> */
 	regex_t **ignore_regex;
@@ -304,8 +305,6 @@
 	/* The output format used when `diff_flush()` is run. */
 	int output_format;
 
-	unsigned pickaxe_opts;
-
 	/* Affects the way detection logic for complete rewrites, renames and
 	 * copies.
 	 */
@@ -556,6 +555,10 @@
 #define DIFF_PICKAXE_KINDS_MASK (DIFF_PICKAXE_KIND_S | \
 				 DIFF_PICKAXE_KIND_G | \
 				 DIFF_PICKAXE_KIND_OBJFIND)
+#define DIFF_PICKAXE_KINDS_G_REGEX_MASK (DIFF_PICKAXE_KIND_G | \
+					 DIFF_PICKAXE_REGEX)
+#define DIFF_PICKAXE_KINDS_ALL_OBJFIND_MASK (DIFF_PICKAXE_ALL | \
+					     DIFF_PICKAXE_KIND_OBJFIND)
 
 #define DIFF_PICKAXE_IGNORE_CASE	32
 
diff --git a/diffcore-pickaxe.c b/diffcore-pickaxe.c
index a9c6d60..c88e50c 100644
--- a/diffcore-pickaxe.c
+++ b/diffcore-pickaxe.c
@@ -19,38 +19,31 @@
 	int hit;
 };
 
-static void diffgrep_consume(void *priv, char *line, unsigned long len)
+static int diffgrep_consume(void *priv, char *line, unsigned long len)
 {
 	struct diffgrep_cb *data = priv;
 	regmatch_t regmatch;
 
 	if (line[0] != '+' && line[0] != '-')
-		return;
+		return 0;
 	if (data->hit)
-		/*
-		 * NEEDSWORK: we should have a way to terminate the
-		 * caller early.
-		 */
-		return;
-	data->hit = !regexec_buf(data->regexp, line + 1, len - 1, 1,
-				 &regmatch, 0);
+		BUG("Already matched in diffgrep_consume! Broken xdiff_emit_line_fn?");
+	if (!regexec_buf(data->regexp, line + 1, len - 1, 1,
+			 &regmatch, 0)) {
+		data->hit = 1;
+		return 1;
+	}
+	return 0;
 }
 
 static int diff_grep(mmfile_t *one, mmfile_t *two,
 		     struct diff_options *o,
 		     regex_t *regexp, kwset_t kws)
 {
-	regmatch_t regmatch;
 	struct diffgrep_cb ecbdata;
 	xpparam_t xpp;
 	xdemitconf_t xecfg;
-
-	if (!one)
-		return !regexec_buf(regexp, two->ptr, two->size,
-				    1, &regmatch, 0);
-	if (!two)
-		return !regexec_buf(regexp, one->ptr, one->size,
-				    1, &regmatch, 0);
+	int ret;
 
 	/*
 	 * We have both sides; need to run textual diff and see if
@@ -60,38 +53,47 @@
 	memset(&xecfg, 0, sizeof(xecfg));
 	ecbdata.regexp = regexp;
 	ecbdata.hit = 0;
+	xecfg.flags = XDL_EMIT_NO_HUNK_HDR;
 	xecfg.ctxlen = o->context;
 	xecfg.interhunkctxlen = o->interhunkcontext;
-	if (xdi_diff_outf(one, two, discard_hunk_line, diffgrep_consume,
-			  &ecbdata, &xpp, &xecfg))
-		return 0;
-	return ecbdata.hit;
+
+	/*
+	 * An xdiff error might be our "data->hit" from above. See the
+	 * comment for xdiff_emit_line_fn in xdiff-interface.h
+	 */
+	ret = xdi_diff_outf(one, two, NULL, diffgrep_consume,
+			    &ecbdata, &xpp, &xecfg);
+	if (ecbdata.hit)
+		return 1;
+	if (ret)
+		return ret;
+	return 0;
 }
 
-static unsigned int contains(mmfile_t *mf, regex_t *regexp, kwset_t kws)
+static unsigned int contains(mmfile_t *mf, regex_t *regexp, kwset_t kws,
+			     unsigned int limit)
 {
-	unsigned int cnt;
-	unsigned long sz;
-	const char *data;
-
-	sz = mf->size;
-	data = mf->ptr;
-	cnt = 0;
+	unsigned int cnt = 0;
+	unsigned long sz = mf->size;
+	const char *data = mf->ptr;
 
 	if (regexp) {
 		regmatch_t regmatch;
 		int flags = 0;
 
-		while (sz && *data &&
+		while (sz &&
 		       !regexec_buf(regexp, data, sz, 1, &regmatch, flags)) {
 			flags |= REG_NOTBOL;
 			data += regmatch.rm_eo;
 			sz -= regmatch.rm_eo;
-			if (sz && *data && regmatch.rm_so == regmatch.rm_eo) {
+			if (sz && regmatch.rm_so == regmatch.rm_eo) {
 				data++;
 				sz--;
 			}
 			cnt++;
+
+			if (limit && cnt == limit)
+				return cnt;
 		}
 
 	} else { /* Classic exact string match */
@@ -103,6 +105,9 @@
 			sz -= offset + kwsm.size[0];
 			data += offset + kwsm.size[0];
 			cnt++;
+
+			if (limit && cnt == limit)
+				return cnt;
 		}
 	}
 	return cnt;
@@ -112,9 +117,9 @@
 		       struct diff_options *o,
 		       regex_t *regexp, kwset_t kws)
 {
-	unsigned int one_contains = one ? contains(one, regexp, kws) : 0;
-	unsigned int two_contains = two ? contains(two, regexp, kws) : 0;
-	return one_contains != two_contains;
+	unsigned int c1 = one ? contains(one, regexp, kws, 0) : 0;
+	unsigned int c2 = two ? contains(two, regexp, kws, c1 + 1) : 0;
+	return c1 != c2;
 }
 
 static int pickaxe_match(struct diff_filepair *p, struct diff_options *o,
@@ -136,9 +141,6 @@
 			 oidset_contains(o->objfind, &p->two->oid));
 	}
 
-	if (!o->pickaxe[0])
-		return 0;
-
 	if (o->flags.allow_textconv) {
 		textconv_one = get_textconv(o->repo, p->one);
 		textconv_two = get_textconv(o->repo, p->two);
@@ -163,9 +165,7 @@
 	mf1.size = fill_textconv(o->repo, textconv_one, p->one, &mf1.ptr);
 	mf2.size = fill_textconv(o->repo, textconv_two, p->two, &mf2.ptr);
 
-	ret = fn(DIFF_FILE_VALID(p->one) ? &mf1 : NULL,
-		 DIFF_FILE_VALID(p->two) ? &mf2 : NULL,
-		 o, regexp, kws);
+	ret = fn(&mf1, &mf2, o, regexp, kws);
 
 	if (textconv_one)
 		free(mf1.ptr);
@@ -232,13 +232,31 @@
 	int opts = o->pickaxe_opts;
 	regex_t regex, *regexp = NULL;
 	kwset_t kws = NULL;
+	pickaxe_fn fn;
 
+	if (opts & ~DIFF_PICKAXE_KIND_OBJFIND &&
+	    (!needle || !*needle))
+		BUG("should have needle under -G or -S");
 	if (opts & (DIFF_PICKAXE_REGEX | DIFF_PICKAXE_KIND_G)) {
 		int cflags = REG_EXTENDED | REG_NEWLINE;
 		if (o->pickaxe_opts & DIFF_PICKAXE_IGNORE_CASE)
 			cflags |= REG_ICASE;
 		regcomp_or_die(&regex, needle, cflags);
 		regexp = &regex;
+
+		if (opts & DIFF_PICKAXE_KIND_G)
+			fn = diff_grep;
+		else if (opts & DIFF_PICKAXE_REGEX)
+			fn = has_changes;
+		else
+			/*
+			 * We don't need to check the combination of
+			 * -G and --pickaxe-regex, by the time we get
+			 * here diff.c has already died if they're
+			 * combined. See the usage tests in
+			 * t4209-log-pickaxe.sh.
+			 */
+			BUG("unreachable");
 	} else if (opts & DIFF_PICKAXE_KIND_S) {
 		if (o->pickaxe_opts & DIFF_PICKAXE_IGNORE_CASE &&
 		    has_non_ascii(needle)) {
@@ -255,10 +273,14 @@
 			kwsincr(kws, needle, strlen(needle));
 			kwsprep(kws);
 		}
+		fn = has_changes;
+	} else if (opts & DIFF_PICKAXE_KIND_OBJFIND) {
+		fn = NULL;
+	} else {
+		BUG("unknown pickaxe_opts flag");
 	}
 
-	pickaxe(&diff_queued_diff, o, regexp, kws,
-		(opts & DIFF_PICKAXE_KIND_G) ? diff_grep : has_changes);
+	pickaxe(&diff_queued_diff, o, regexp, kws, fn);
 
 	if (regexp)
 		regfree(regexp);
diff --git a/range-diff.c b/range-diff.c
index 1a4471f..e947979 100644
--- a/range-diff.c
+++ b/range-diff.c
@@ -274,9 +274,10 @@
 	hashmap_clear(&map);
 }
 
-static void diffsize_consume(void *data, char *line, unsigned long len)
+static int diffsize_consume(void *data, char *line, unsigned long len)
 {
 	(*(int *)data)++;
+	return 0;
 }
 
 static void diffsize_hunk(void *data, long ob, long on, long nb, long nn,
diff --git a/t/perf/p4209-pickaxe.sh b/t/perf/p4209-pickaxe.sh
new file mode 100755
index 0000000..f585a44
--- /dev/null
+++ b/t/perf/p4209-pickaxe.sh
@@ -0,0 +1,70 @@
+#!/bin/sh
+
+test_description="Test pickaxe performance"
+
+. ./perf-lib.sh
+
+test_perf_default_repo
+
+# Not --max-count, as that's the number of matching commit, so it's
+# unbounded. We want to limit our revision walk here.
+from_rev_desc=
+from_rev=
+max_count=1000
+if test_have_prereq EXPENSIVE
+then
+	max_count=10000
+fi
+from_rev=" $(git rev-list HEAD | head -n $max_count | tail -n 1).."
+from_rev_desc=" <limit-rev>.."
+
+for icase in \
+	'' \
+	'-i '
+do
+	# -S (no regex)
+	for pattern in \
+		'int main' \
+		'æ'
+	do
+		for opts in \
+			'-S'
+		do
+			test_perf "git log $icase$opts'$pattern'$from_rev_desc" "
+				git log --pretty=format:%H $icase$opts'$pattern'$from_rev
+			"
+		done
+	done
+
+	# -S (regex)
+	for pattern in  \
+		'(int|void|null)' \
+		'if *\([^ ]+ & ' \
+		'[àáâãäåæñøùúûüýþ]'
+	do
+		for opts in \
+			'--pickaxe-regex -S'
+		do
+			test_perf "git log $icase$opts'$pattern'$from_rev_desc" "
+				git log --pretty=format:%H $icase$opts'$pattern'$from_rev
+			"
+		done
+	done
+
+	# -G
+	for pattern in  \
+		'(int|void|null)' \
+		'if *\([^ ]+ & ' \
+		'[àáâãäåæñøùúûüýþ]'
+	do
+		for opts in \
+			'-G'
+		do
+			test_perf "git log $icase$opts'$pattern'$from_rev_desc" "
+				git log --pretty=format:%H $icase$opts'$pattern'$from_rev
+			"
+		done
+	done
+done
+
+test_done
diff --git a/t/t4209-log-pickaxe.sh b/t/t4209-log-pickaxe.sh
index 5d06f5f..75795d0 100755
--- a/t/t4209-log-pickaxe.sh
+++ b/t/t4209-log-pickaxe.sh
@@ -55,6 +55,43 @@
 	git rev-parse --verify HEAD >expect_second
 '
 
+test_expect_success 'usage' '
+	test_expect_code 129 git log -S 2>err &&
+	test_i18ngrep "switch.*requires a value" err &&
+
+	test_expect_code 129 git log -G 2>err &&
+	test_i18ngrep "switch.*requires a value" err &&
+
+	test_expect_code 128 git log -Gregex -Sstring 2>err &&
+	grep "mutually exclusive" err &&
+
+	test_expect_code 128 git log -Gregex --find-object=HEAD 2>err &&
+	grep "mutually exclusive" err &&
+
+	test_expect_code 128 git log -Sstring --find-object=HEAD 2>err &&
+	grep "mutually exclusive" err &&
+
+	test_expect_code 128 git log --pickaxe-all --find-object=HEAD 2>err &&
+	grep "mutually exclusive" err
+'
+
+test_expect_success 'usage: --pickaxe-regex' '
+	test_expect_code 128 git log -Gregex --pickaxe-regex 2>err &&
+	grep "mutually exclusive" err
+'
+
+test_expect_success 'usage: --no-pickaxe-regex' '
+	cat >expect <<-\EOF &&
+	fatal: unrecognized argument: --no-pickaxe-regex
+	EOF
+
+	test_expect_code 128 git log -Sstring --no-pickaxe-regex 2>actual &&
+	test_cmp expect actual &&
+
+	test_expect_code 128 git log -Gstring --no-pickaxe-regex 2>err &&
+	test_cmp expect actual
+'
+
 test_log	expect_initial	--grep initial
 test_log	expect_nomatch	--grep InItial
 test_log_icase	expect_initial	--grep InItial
@@ -106,38 +143,83 @@
 	rm .gitattributes
 '
 
+test_expect_success 'setup log -[GS] plain & regex' '
+	test_create_repo GS-plain &&
+	test_commit -C GS-plain --append A data.txt "a" &&
+	test_commit -C GS-plain --append B data.txt "a a" &&
+	test_commit -C GS-plain --append C data.txt "b" &&
+	test_commit -C GS-plain --append D data.txt "[b]" &&
+	test_commit -C GS-plain E data.txt "" &&
+
+	# We also include E, the deletion commit
+	git -C GS-plain log --grep="[ABE]" >A-to-B-then-E-log &&
+	git -C GS-plain log --grep="[CDE]" >C-to-D-then-E-log &&
+	git -C GS-plain log --grep="[DE]" >D-then-E-log &&
+	git -C GS-plain log >full-log
+'
+
+test_expect_success 'log -G trims diff new/old [-+]' '
+	git -C GS-plain log -G"[+-]a" >log &&
+	test_must_be_empty log &&
+	git -C GS-plain log -G"^a" >log &&
+	test_cmp log A-to-B-then-E-log
+'
+
+test_expect_success 'log -S<pat> is not a regex, but -S<pat> --pickaxe-regex is' '
+	git -C GS-plain log -S"a" >log &&
+	test_cmp log A-to-B-then-E-log &&
+
+	git -C GS-plain log -S"[a]" >log &&
+	test_must_be_empty log &&
+
+	git -C GS-plain log -S"[a]" --pickaxe-regex >log &&
+	test_cmp log A-to-B-then-E-log &&
+
+	git -C GS-plain log -S"[b]" >log &&
+	test_cmp log D-then-E-log &&
+
+	git -C GS-plain log -S"[b]" --pickaxe-regex >log &&
+	test_cmp log C-to-D-then-E-log
+'
+
 test_expect_success 'setup log -[GS] binary & --text' '
-	git checkout --orphan GS-binary-and-text &&
-	git read-tree --empty &&
-	printf "a\na\0a\n" >data.bin &&
-	git add data.bin &&
-	git commit -m "create binary file" data.bin &&
-	printf "a\na\0a\n" >>data.bin &&
-	git commit -m "modify binary file" data.bin &&
-	git rm data.bin &&
-	git commit -m "delete binary file" data.bin &&
-	git log >full-log
+	test_create_repo GS-bin-txt &&
+	test_commit -C GS-bin-txt --printf A data.bin "a\na\0a\n" &&
+	test_commit -C GS-bin-txt --append --printf B data.bin "a\na\0a\n" &&
+	test_commit -C GS-bin-txt C data.bin "" &&
+	git -C GS-bin-txt log >full-log
 '
 
 test_expect_success 'log -G ignores binary files' '
-	git log -Ga >log &&
+	git -C GS-bin-txt log -Ga >log &&
 	test_must_be_empty log
 '
 
 test_expect_success 'log -G looks into binary files with -a' '
-	git log -a -Ga >log &&
+	git -C GS-bin-txt log -a -Ga >log &&
 	test_cmp log full-log
 '
 
 test_expect_success 'log -G looks into binary files with textconv filter' '
-	test_when_finished "rm .gitattributes" &&
-	echo "* diff=bin" >.gitattributes &&
-	git -c diff.bin.textconv=cat log -Ga >log &&
+	test_when_finished "rm GS-bin-txt/.gitattributes" &&
+	(
+		cd GS-bin-txt &&
+		echo "* diff=bin" >.gitattributes &&
+		git -c diff.bin.textconv=cat log -Ga >../log
+	) &&
 	test_cmp log full-log
 '
 
 test_expect_success 'log -S looks into binary files' '
-	git log -Sa >log &&
+	git -C GS-bin-txt log -Sa >log &&
+	test_cmp log full-log
+'
+
+test_expect_success 'log -S --pickaxe-regex looks into binary files' '
+	git -C GS-bin-txt log --pickaxe-regex -Sa >log &&
+	test_cmp log full-log &&
+
+	git -C GS-bin-txt log --pickaxe-regex -S"[a]" >log &&
 	test_cmp log full-log
 '
 
diff --git a/t/t7816-grep-binary-pattern.sh b/t/t7816-grep-binary-pattern.sh
index 60bab29..9d67a5f 100755
--- a/t/t7816-grep-binary-pattern.sh
+++ b/t/t7816-grep-binary-pattern.sh
@@ -59,7 +59,7 @@
 	git commit -m.
 "
 
-# Simple fixed-string matching that can use kwset (no -i && non-ASCII)
+# Simple fixed-string matching
 nul_match P P P '-F' 'yQf'
 nul_match P P P '-F' 'yQx'
 nul_match P P P '-Fi' 'YQf'
@@ -78,7 +78,7 @@
 nul_match P P P '-F' 'æQ[ð]'
 nul_match P P P '-F' '[æ]Qð'
 
-# The -F kwset codepath can't handle -i && non-ASCII...
+# Matching pattern and subject case with -i
 nul_match P 1 1 '-i' '[æ]Qð'
 
 # ...PCRE v2 only matches non-ASCII with -i casefolding under UTF-8
diff --git a/xdiff-interface.c b/xdiff-interface.c
index 609615d..75b32ae 100644
--- a/xdiff-interface.c
+++ b/xdiff-interface.c
@@ -31,29 +31,36 @@
 	return 0;
 }
 
-static void consume_one(void *priv_, char *s, unsigned long size)
+static int consume_one(void *priv_, char *s, unsigned long size)
 {
 	struct xdiff_emit_state *priv = priv_;
 	char *ep;
 	while (size) {
 		unsigned long this_size;
+		int ret;
 		ep = memchr(s, '\n', size);
 		this_size = (ep == NULL) ? size : (ep - s + 1);
-		priv->line_fn(priv->consume_callback_data, s, this_size);
+		ret = priv->line_fn(priv->consume_callback_data, s, this_size);
+		if (ret)
+			return ret;
 		size -= this_size;
 		s += this_size;
 	}
+	return 0;
 }
 
 static int xdiff_outf(void *priv_, mmbuffer_t *mb, int nbuf)
 {
 	struct xdiff_emit_state *priv = priv_;
 	int i;
+	int stop = 0;
 
 	if (!priv->line_fn)
 		return 0;
 
 	for (i = 0; i < nbuf; i++) {
+		if (stop)
+			return 1;
 		if (mb[i].ptr[mb[i].size-1] != '\n') {
 			/* Incomplete line */
 			strbuf_add(&priv->remainder, mb[i].ptr, mb[i].size);
@@ -62,17 +69,21 @@
 
 		/* we have a complete line */
 		if (!priv->remainder.len) {
-			consume_one(priv, mb[i].ptr, mb[i].size);
+			stop = consume_one(priv, mb[i].ptr, mb[i].size);
 			continue;
 		}
 		strbuf_add(&priv->remainder, mb[i].ptr, mb[i].size);
-		consume_one(priv, priv->remainder.buf, priv->remainder.len);
+		stop = consume_one(priv, priv->remainder.buf, priv->remainder.len);
 		strbuf_reset(&priv->remainder);
 	}
+	if (stop)
+		return -1;
 	if (priv->remainder.len) {
-		consume_one(priv, priv->remainder.buf, priv->remainder.len);
+		stop = consume_one(priv, priv->remainder.buf, priv->remainder.len);
 		strbuf_reset(&priv->remainder);
 	}
+	if (stop)
+		return -1;
 	return 0;
 }
 
@@ -115,12 +126,6 @@
 	return xdl_diff(&a, &b, xpp, xecfg, xecb);
 }
 
-void discard_hunk_line(void *priv,
-		       long ob, long on, long nb, long nn,
-		       const char *func, long funclen)
-{
-}
-
 int xdi_diff_outf(mmfile_t *mf1, mmfile_t *mf2,
 		  xdiff_emit_hunk_fn hunk_fn,
 		  xdiff_emit_line_fn line_fn,
diff --git a/xdiff-interface.h b/xdiff-interface.h
index 93df269..4301a7e 100644
--- a/xdiff-interface.h
+++ b/xdiff-interface.h
@@ -11,7 +11,28 @@
  */
 #define MAX_XDIFF_SIZE (1024UL * 1024 * 1023)
 
-typedef void (*xdiff_emit_line_fn)(void *, char *, unsigned long);
+/**
+ * The `xdiff_emit_line_fn` function can return 1 to abort early, or 0
+ * to continue processing. Note that doing so is an all-or-nothing
+ * affair, as returning 1 will return all the way to the top-level,
+ * e.g. the xdi_diff_outf() call to generate the diff.
+ *
+ * Thus returning 1 means you won't be getting any more diff lines. If
+ * you need something in-between those two options you'll to use
+ * `xdl_emit_hunk_consume_func_t` and implement your own version of
+ * xdl_emit_diff().
+ *
+ * We may extend the interface in the future to understand other more
+ * granular return values. While you should return 1 to exit early,
+ * doing so will currently make your early return indistinguishable
+ * from an error internal to xdiff, xdiff itself will see that
+ * non-zero return and translate it to -1.
+ *
+ * See "diff_grep" in diffcore-pickaxe.c for a trick to work around
+ * this, i.e. using the "consume_callback_data" to note the desired
+ * early return.
+ */
+typedef int (*xdiff_emit_line_fn)(void *, char *, unsigned long);
 typedef void (*xdiff_emit_hunk_fn)(void *data,
 				   long old_begin, long old_nr,
 				   long new_begin, long new_nr,
@@ -33,14 +54,6 @@
 extern int git_xmerge_style;
 
 /*
- * Can be used as a no-op hunk_fn for xdi_diff_outf(), since a NULL
- * one just sends the hunk line to the line_fn callback).
- */
-void discard_hunk_line(void *priv,
-		       long ob, long on, long nb, long nn,
-		       const char *func, long funclen);
-
-/*
  * Compare the strings l1 with l2 which are of size s1 and s2 respectively.
  * Returns 1 if the strings are deemed equal, 0 otherwise.
  * The `flags` given as XDF_WHITESPACE_FLAGS determine how white spaces
diff --git a/xdiff/xdiff.h b/xdiff/xdiff.h
index 7a04605..b29deca 100644
--- a/xdiff/xdiff.h
+++ b/xdiff/xdiff.h
@@ -50,6 +50,7 @@
 
 /* xdemitconf_t.flags */
 #define XDL_EMIT_FUNCNAMES (1 << 0)
+#define XDL_EMIT_NO_HUNK_HDR (1 << 1)
 #define XDL_EMIT_FUNCCONTEXT (1 << 2)
 
 /* merge simplification levels */
diff --git a/xdiff/xemit.c b/xdiff/xemit.c
index 9d7d6c5..1cbf2b9 100644
--- a/xdiff/xemit.c
+++ b/xdiff/xemit.c
@@ -278,7 +278,8 @@
 				      s1 - 1, funclineprev);
 			funclineprev = s1 - 1;
 		}
-		if (xdl_emit_hunk_hdr(s1 + 1, e1 - s1, s2 + 1, e2 - s2,
+		if (!(xecfg->flags & XDL_EMIT_NO_HUNK_HDR) &&
+		    xdl_emit_hunk_hdr(s1 + 1, e1 - s1, s2 + 1, e2 - s2,
 				      func_line.buf, func_line.len, ecb) < 0)
 			return -1;