shortlog: allow multiple groups to be specified

Now that shortlog supports reading from trailers, it can be useful to
combine counts from multiple trailers, or between trailers and authors.
This can be done manually by post-processing the output from multiple
runs, but it's non-trivial to make sure that each name/commit pair is
counted only once.

This patch teaches shortlog to accept multiple --group options on the
command line, and pull data from all of them. That makes it possible to
run:

  git shortlog -ns --group=author --group=trailer:co-authored-by

to get a shortlog that counts authors and co-authors equally.

The implementation is mostly straightforward. The "group" enum becomes a
bitfield, and the trailer key becomes a list. I didn't bother
implementing the multi-group semantics for reading from stdin. It would
be possible to do, but the existing matching code makes it awkward, and
I doubt anybody cares.

The duplicate suppression we used for trailers now covers authors and
committers as well (though in non-trailer single-group mode we can skip
the hash insertion and lookup, since we only see one value per commit).

There is one subtlety: we now care about the case when no group bit is
set (in which case we default to showing the author). The caller in
builtin/log.c needs to be adapted to ask explicitly for authors, rather
than relying on shortlog_init(). It would be possible with some
gymnastics to make this keep working as-is, but it's not worth it for a
single caller.

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
diff --git a/Documentation/git-shortlog.txt b/Documentation/git-shortlog.txt
index 3db0db2..fd93cd4 100644
--- a/Documentation/git-shortlog.txt
+++ b/Documentation/git-shortlog.txt
@@ -51,6 +51,7 @@
 	Group commits based on `<type>`. If no `--group` option is
 	specified, the default is `author`. `<type>` is one of:
 +
+--
  - `author`, commits are grouped by author
  - `committer`, commits are grouped by committer (the same as `-c`)
  - `trailer:<field>`, the `<field>` is interpreted as a case-insensitive
@@ -68,6 +69,12 @@
 identity. If successful, the mailmap is applied and the email is omitted
 unless the `--email` option is specified. If the value cannot be parsed
 as an identity, it will be taken literally and completely.
+--
++
+If `--group` is specified multiple times, commits are counted under each
+value (but again, only once per unique value in that commit). For
+example, `git shortlog --group=author --group=trailer:co-authored-by`
+counts both authors and co-authors.
 
 -c::
 --committer::
diff --git a/builtin/log.c b/builtin/log.c
index d104d5c..ad0c523 100644
--- a/builtin/log.c
+++ b/builtin/log.c
@@ -1197,6 +1197,7 @@
 	log.in1 = 2;
 	log.in2 = 4;
 	log.file = rev->diffopt.file;
+	log.groups = SHORTLOG_GROUP_AUTHOR;
 	for (i = 0; i < nr; i++)
 		shortlog_add_commit(&log, list[i]);
 
diff --git a/builtin/shortlog.c b/builtin/shortlog.c
index 28133ae..0a5c496 100644
--- a/builtin/shortlog.c
+++ b/builtin/shortlog.c
@@ -130,7 +130,10 @@
 	static const char *committer_match[2] = { "Commit: ", "committer " };
 	const char **match;
 
-	switch (log->group) {
+	if (HAS_MULTI_BITS(log->groups))
+		die(_("using multiple --group options with stdin is not supported"));
+
+	switch (log->groups) {
 	case SHORTLOG_GROUP_AUTHOR:
 		match = author_match;
 		break;
@@ -221,13 +224,13 @@
 }
 
 static void insert_records_from_trailers(struct shortlog *log,
+					 struct strset *dups,
 					 struct commit *commit,
 					 struct pretty_print_context *ctx,
 					 const char *oneline)
 {
 	struct trailer_iterator iter;
 	const char *commit_buffer, *body;
-	struct strset dups = STRSET_INIT;
 	struct strbuf ident = STRBUF_INIT;
 
 	/*
@@ -243,21 +246,20 @@
 	while (trailer_iterator_advance(&iter)) {
 		const char *value = iter.val.buf;
 
-		if (strcasecmp(iter.key.buf, log->trailer))
+		if (!string_list_has_string(&log->trailers, iter.key.buf))
 			continue;
 
 		strbuf_reset(&ident);
 		if (!parse_ident(log, &ident, value))
 			value = ident.buf;
 
-		if (strset_check_and_add(&dups, value))
+		if (strset_check_and_add(dups, value))
 			continue;
 		insert_one_record(log, value, oneline);
 	}
 	trailer_iterator_release(&iter);
 
 	strbuf_release(&ident);
-	strset_clear(&dups);
 	unuse_commit_buffer(commit, commit_buffer);
 }
 
@@ -265,6 +267,7 @@
 {
 	struct strbuf ident = STRBUF_INIT;
 	struct strbuf oneline = STRBUF_INIT;
+	struct strset dups = STRSET_INIT;
 	struct pretty_print_context ctx = {0};
 	const char *oneline_str;
 
@@ -282,24 +285,29 @@
 	}
 	oneline_str = oneline.len ? oneline.buf : "<none>";
 
-	switch (log->group) {
-	case SHORTLOG_GROUP_AUTHOR:
+	if (log->groups & SHORTLOG_GROUP_AUTHOR) {
+		strbuf_reset(&ident);
 		format_commit_message(commit,
 				      log->email ? "%aN <%aE>" : "%aN",
 				      &ident, &ctx);
-		insert_one_record(log, ident.buf, oneline_str);
-		break;
-	case SHORTLOG_GROUP_COMMITTER:
+		if (!HAS_MULTI_BITS(log->groups) ||
+		    !strset_check_and_add(&dups, ident.buf))
+			insert_one_record(log, ident.buf, oneline_str);
+	}
+	if (log->groups & SHORTLOG_GROUP_COMMITTER) {
+		strbuf_reset(&ident);
 		format_commit_message(commit,
 				      log->email ? "%cN <%cE>" : "%cN",
 				      &ident, &ctx);
-		insert_one_record(log, ident.buf, oneline_str);
-		break;
-	case SHORTLOG_GROUP_TRAILER:
-		insert_records_from_trailers(log, commit, &ctx, oneline_str);
-		break;
+		if (!HAS_MULTI_BITS(log->groups) ||
+		    !strset_check_and_add(&dups, ident.buf))
+			insert_one_record(log, ident.buf, oneline_str);
+	}
+	if (log->groups & SHORTLOG_GROUP_TRAILER) {
+		insert_records_from_trailers(log, &dups, commit, &ctx, oneline_str);
 	}
 
+	strset_clear(&dups);
 	strbuf_release(&ident);
 	strbuf_release(&oneline);
 }
@@ -366,14 +374,16 @@
 	struct shortlog *log = opt->value;
 	const char *field;
 
-	if (unset || !strcasecmp(arg, "author"))
-		log->group = SHORTLOG_GROUP_AUTHOR;
+	if (unset) {
+		log->groups = 0;
+		string_list_clear(&log->trailers, 0);
+	} else if (!strcasecmp(arg, "author"))
+		log->groups |= SHORTLOG_GROUP_AUTHOR;
 	else if (!strcasecmp(arg, "committer"))
-		log->group = SHORTLOG_GROUP_COMMITTER;
+		log->groups |= SHORTLOG_GROUP_COMMITTER;
 	else if (skip_prefix(arg, "trailer:", &field)) {
-		log->group = SHORTLOG_GROUP_TRAILER;
-		free(log->trailer);
-		log->trailer = xstrdup(field);
+		log->groups |= SHORTLOG_GROUP_TRAILER;
+		string_list_append(&log->trailers, field);
 	} else
 		return error(_("unknown group type: %s"), arg);
 
@@ -391,6 +401,8 @@
 	log->wrap = DEFAULT_WRAPLEN;
 	log->in1 = DEFAULT_INDENT1;
 	log->in2 = DEFAULT_INDENT2;
+	log->trailers.strdup_strings = 1;
+	log->trailers.cmp = strcasecmp;
 }
 
 int cmd_shortlog(int argc, const char **argv, const char *prefix)
@@ -400,9 +412,9 @@
 	int nongit = !startup_info->have_repository;
 
 	const struct option options[] = {
-		OPT_SET_INT('c', "committer", &log.group,
-			    N_("Group by committer rather than author"),
-			    SHORTLOG_GROUP_COMMITTER),
+		OPT_BIT('c', "committer", &log.groups,
+			N_("Group by committer rather than author"),
+			SHORTLOG_GROUP_COMMITTER),
 		OPT_BOOL('n', "numbered", &log.sort_by_number,
 			 N_("sort output according to the number of commits per author")),
 		OPT_BOOL('s', "summary", &log.summary,
@@ -454,6 +466,10 @@
 	log.abbrev = rev.abbrev;
 	log.file = rev.diffopt.file;
 
+	if (!log.groups)
+		log.groups = SHORTLOG_GROUP_AUTHOR;
+	string_list_sort(&log.trailers);
+
 	/* assume HEAD if from a tty */
 	if (!nongit && !rev.pending.nr && isatty(0))
 		add_head_to_pending(&rev);
diff --git a/shortlog.h b/shortlog.h
index 89c2dbc..64be879 100644
--- a/shortlog.h
+++ b/shortlog.h
@@ -17,11 +17,11 @@
 	int abbrev;
 
 	enum {
-		SHORTLOG_GROUP_AUTHOR = 0,
-		SHORTLOG_GROUP_COMMITTER,
-		SHORTLOG_GROUP_TRAILER,
-	} group;
-	char *trailer;
+		SHORTLOG_GROUP_AUTHOR = (1 << 0),
+		SHORTLOG_GROUP_COMMITTER = (1 << 1),
+		SHORTLOG_GROUP_TRAILER = (1 << 2),
+	} groups;
+	struct string_list trailers;
 
 	char *common_repo_prefix;
 	int email;
diff --git a/t/t4201-shortlog.sh b/t/t4201-shortlog.sh
index a62ee9e..3d5c4a2 100755
--- a/t/t4201-shortlog.sh
+++ b/t/t4201-shortlog.sh
@@ -282,4 +282,78 @@
 	test_cmp expect actual
 '
 
+test_expect_success 'shortlog can match multiple groups' '
+	git commit --allow-empty -F - <<-\EOF &&
+	subject one
+
+	this has two trailers that are distinct from the author; it will count
+	3 times in the output
+
+	Some-trailer: User A <a@example.com>
+	Another-trailer: User B <b@example.com>
+	EOF
+
+	git commit --allow-empty -F - <<-\EOF &&
+	subject two
+
+	this one has two trailers, one of which is a duplicate with the author;
+	it will only be counted once for them
+
+	Another-trailer: A U Thor <author@example.com>
+	Some-trailer: User B <b@example.com>
+	EOF
+
+	cat >expect <<-\EOF &&
+	     2	A U Thor
+	     2	User B
+	     1	User A
+	EOF
+	git shortlog -ns \
+		--group=author \
+		--group=trailer:some-trailer \
+		--group=trailer:another-trailer \
+		-2 HEAD >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'set up option selection tests' '
+	git commit --allow-empty -F - <<-\EOF
+	subject
+
+	body
+
+	Trailer-one: value-one
+	Trailer-two: value-two
+	EOF
+'
+
+test_expect_success '--no-group resets group list to author' '
+	cat >expect <<-\EOF &&
+	     1	A U Thor
+	EOF
+	git shortlog -ns \
+		--group=committer \
+		--group=trailer:trailer-one \
+		--no-group \
+		-1 HEAD >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success '--no-group resets trailer list' '
+	cat >expect <<-\EOF &&
+	     1	value-two
+	EOF
+	git shortlog -ns \
+		--group=trailer:trailer-one \
+		--no-group \
+		--group=trailer:trailer-two \
+		-1 HEAD >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'stdin with multiple groups reports error' '
+	git log >log &&
+	test_must_fail git shortlog --group=author --group=committer <log
+'
+
 test_done