Merge branch 'jk/shortlog-group-by-trailer'

"git shortlog" has been taught to group commits by the contents of
the trailer lines, like "Reviewed-by:", "Coauthored-by:", etc.

* jk/shortlog-group-by-trailer:
  shortlog: allow multiple groups to be specified
  shortlog: parse trailer idents
  shortlog: rename parse_stdin_ident()
  shortlog: de-duplicate trailer values
  shortlog: match commit trailers with --group
  trailer: add interface for iterating over commit trailers
  shortlog: add grouping option
  shortlog: change "author" variables to "ident"
diff --git a/Documentation/git-shortlog.txt b/Documentation/git-shortlog.txt
index a72ea7f..fd93cd4 100644
--- a/Documentation/git-shortlog.txt
+++ b/Documentation/git-shortlog.txt
@@ -47,9 +47,38 @@
 
 	Each pretty-printed commit will be rewrapped before it is shown.
 
+--group=<type>::
+	Group commits based on `<type>`. If no `--group` option is
+	specified, the default is `author`. `<type>` is one of:
++
+--
+ - `author`, commits are grouped by author
+ - `committer`, commits are grouped by committer (the same as `-c`)
+ - `trailer:<field>`, the `<field>` is interpreted as a case-insensitive
+   commit message trailer (see linkgit:git-interpret-trailers[1]). For
+   example, if your project uses `Reviewed-by` trailers, you might want
+   to see who has been reviewing with
+   `git shortlog -ns --group=trailer:reviewed-by`.
++
+Note that commits that do not include the trailer will not be counted.
+Likewise, commits with multiple trailers (e.g., multiple signoffs) may
+be counted more than once (but only once per unique trailer value in
+that commit).
++
+Shortlog will attempt to parse each trailer value as a `name <email>`
+identity. If successful, the mailmap is applied and the email is omitted
+unless the `--email` option is specified. If the value cannot be parsed
+as an identity, it will be taken literally and completely.
+--
++
+If `--group` is specified multiple times, commits are counted under each
+value (but again, only once per unique value in that commit). For
+example, `git shortlog --group=author --group=trailer:co-authored-by`
+counts both authors and co-authors.
+
 -c::
 --committer::
-	Collect and show committer identities instead of authors.
+	This is an alias for `--group=committer`.
 
 -w[<width>[,<indent1>[,<indent2>]]]::
 	Linewrap the output by wrapping each line at `width`.  The first
diff --git a/builtin/log.c b/builtin/log.c
index b8824d8..7f27e9e 100644
--- a/builtin/log.c
+++ b/builtin/log.c
@@ -1195,6 +1195,7 @@
 	log.in1 = 2;
 	log.in2 = 4;
 	log.file = rev->diffopt.file;
+	log.groups = SHORTLOG_GROUP_AUTHOR;
 	for (i = 0; i < nr; i++)
 		shortlog_add_commit(&log, list[i]);
 
diff --git a/builtin/shortlog.c b/builtin/shortlog.c
index c856c58..0a5c496 100644
--- a/builtin/shortlog.c
+++ b/builtin/shortlog.c
@@ -9,6 +9,7 @@
 #include "mailmap.h"
 #include "shortlog.h"
 #include "parse-options.h"
+#include "trailer.h"
 
 static char const * const shortlog_usage[] = {
 	N_("git shortlog [<options>] [<revision-range>] [[--] <path>...]"),
@@ -49,12 +50,12 @@
 }
 
 static void insert_one_record(struct shortlog *log,
-			      const char *author,
+			      const char *ident,
 			      const char *oneline)
 {
 	struct string_list_item *item;
 
-	item = string_list_insert(&log->list, author);
+	item = string_list_insert(&log->list, ident);
 
 	if (log->summary)
 		item->util = (void *)(UTIL_TO_INT(item) + 1);
@@ -97,8 +98,8 @@
 	}
 }
 
-static int parse_stdin_author(struct shortlog *log,
-			       struct strbuf *out, const char *in)
+static int parse_ident(struct shortlog *log,
+		       struct strbuf *out, const char *in)
 {
 	const char *mailbuf, *namebuf;
 	size_t namelen, maillen;
@@ -122,18 +123,33 @@
 
 static void read_from_stdin(struct shortlog *log)
 {
-	struct strbuf author = STRBUF_INIT;
-	struct strbuf mapped_author = STRBUF_INIT;
+	struct strbuf ident = STRBUF_INIT;
+	struct strbuf mapped_ident = STRBUF_INIT;
 	struct strbuf oneline = STRBUF_INIT;
 	static const char *author_match[2] = { "Author: ", "author " };
 	static const char *committer_match[2] = { "Commit: ", "committer " };
 	const char **match;
 
-	match = log->committer ? committer_match : author_match;
-	while (strbuf_getline_lf(&author, stdin) != EOF) {
+	if (HAS_MULTI_BITS(log->groups))
+		die(_("using multiple --group options with stdin is not supported"));
+
+	switch (log->groups) {
+	case SHORTLOG_GROUP_AUTHOR:
+		match = author_match;
+		break;
+	case SHORTLOG_GROUP_COMMITTER:
+		match = committer_match;
+		break;
+	case SHORTLOG_GROUP_TRAILER:
+		die(_("using --group=trailer with stdin is not supported"));
+	default:
+		BUG("unhandled shortlog group");
+	}
+
+	while (strbuf_getline_lf(&ident, stdin) != EOF) {
 		const char *v;
-		if (!skip_prefix(author.buf, match[0], &v) &&
-		    !skip_prefix(author.buf, match[1], &v))
+		if (!skip_prefix(ident.buf, match[0], &v) &&
+		    !skip_prefix(ident.buf, match[1], &v))
 			continue;
 		while (strbuf_getline_lf(&oneline, stdin) != EOF &&
 		       oneline.len)
@@ -142,23 +158,118 @@
 		       !oneline.len)
 			; /* discard blanks */
 
-		strbuf_reset(&mapped_author);
-		if (parse_stdin_author(log, &mapped_author, v) < 0)
+		strbuf_reset(&mapped_ident);
+		if (parse_ident(log, &mapped_ident, v) < 0)
 			continue;
 
-		insert_one_record(log, mapped_author.buf, oneline.buf);
+		insert_one_record(log, mapped_ident.buf, oneline.buf);
 	}
-	strbuf_release(&author);
-	strbuf_release(&mapped_author);
+	strbuf_release(&ident);
+	strbuf_release(&mapped_ident);
 	strbuf_release(&oneline);
 }
 
+struct strset_item {
+	struct hashmap_entry ent;
+	char value[FLEX_ARRAY];
+};
+
+struct strset {
+	struct hashmap map;
+};
+
+#define STRSET_INIT { { NULL } }
+
+static int strset_item_hashcmp(const void *hash_data,
+			       const struct hashmap_entry *entry,
+			       const struct hashmap_entry *entry_or_key,
+			       const void *keydata)
+{
+	const struct strset_item *a, *b;
+
+	a = container_of(entry, const struct strset_item, ent);
+	if (keydata)
+		return strcmp(a->value, keydata);
+
+	b = container_of(entry_or_key, const struct strset_item, ent);
+	return strcmp(a->value, b->value);
+}
+
+/*
+ * Adds "str" to the set if it was not already present; returns true if it was
+ * already there.
+ */
+static int strset_check_and_add(struct strset *ss, const char *str)
+{
+	unsigned int hash = strhash(str);
+	struct strset_item *item;
+
+	if (!ss->map.table)
+		hashmap_init(&ss->map, strset_item_hashcmp, NULL, 0);
+
+	if (hashmap_get_from_hash(&ss->map, hash, str))
+		return 1;
+
+	FLEX_ALLOC_STR(item, value, str);
+	hashmap_entry_init(&item->ent, hash);
+	hashmap_add(&ss->map, &item->ent);
+	return 0;
+}
+
+static void strset_clear(struct strset *ss)
+{
+	if (!ss->map.table)
+		return;
+	hashmap_free_entries(&ss->map, struct strset_item, ent);
+}
+
+static void insert_records_from_trailers(struct shortlog *log,
+					 struct strset *dups,
+					 struct commit *commit,
+					 struct pretty_print_context *ctx,
+					 const char *oneline)
+{
+	struct trailer_iterator iter;
+	const char *commit_buffer, *body;
+	struct strbuf ident = STRBUF_INIT;
+
+	/*
+	 * Using format_commit_message("%B") would be simpler here, but
+	 * this saves us copying the message.
+	 */
+	commit_buffer = logmsg_reencode(commit, NULL, ctx->output_encoding);
+	body = strstr(commit_buffer, "\n\n");
+	if (!body)
+		return;
+
+	trailer_iterator_init(&iter, body);
+	while (trailer_iterator_advance(&iter)) {
+		const char *value = iter.val.buf;
+
+		if (!string_list_has_string(&log->trailers, iter.key.buf))
+			continue;
+
+		strbuf_reset(&ident);
+		if (!parse_ident(log, &ident, value))
+			value = ident.buf;
+
+		if (strset_check_and_add(dups, value))
+			continue;
+		insert_one_record(log, value, oneline);
+	}
+	trailer_iterator_release(&iter);
+
+	strbuf_release(&ident);
+	unuse_commit_buffer(commit, commit_buffer);
+}
+
 void shortlog_add_commit(struct shortlog *log, struct commit *commit)
 {
-	struct strbuf author = STRBUF_INIT;
+	struct strbuf ident = STRBUF_INIT;
 	struct strbuf oneline = STRBUF_INIT;
+	struct strset dups = STRSET_INIT;
 	struct pretty_print_context ctx = {0};
-	const char *fmt;
+	const char *oneline_str;
 
 	ctx.fmt = CMIT_FMT_USERFORMAT;
 	ctx.abbrev = log->abbrev;
@@ -166,21 +277,38 @@
 	ctx.date_mode.type = DATE_NORMAL;
 	ctx.output_encoding = get_log_output_encoding();
 
-	fmt = log->committer ?
-		(log->email ? "%cN <%cE>" : "%cN") :
-		(log->email ? "%aN <%aE>" : "%aN");
-
-	format_commit_message(commit, fmt, &author, &ctx);
 	if (!log->summary) {
 		if (log->user_format)
 			pretty_print_commit(&ctx, commit, &oneline);
 		else
 			format_commit_message(commit, "%s", &oneline, &ctx);
 	}
+	oneline_str = oneline.len ? oneline.buf : "<none>";
 
-	insert_one_record(log, author.buf, oneline.len ? oneline.buf : "<none>");
+	if (log->groups & SHORTLOG_GROUP_AUTHOR) {
+		strbuf_reset(&ident);
+		format_commit_message(commit,
+				      log->email ? "%aN <%aE>" : "%aN",
+				      &ident, &ctx);
+		if (!HAS_MULTI_BITS(log->groups) ||
+		    !strset_check_and_add(&dups, ident.buf))
+			insert_one_record(log, ident.buf, oneline_str);
+	}
+	if (log->groups & SHORTLOG_GROUP_COMMITTER) {
+		strbuf_reset(&ident);
+		format_commit_message(commit,
+				      log->email ? "%cN <%cE>" : "%cN",
+				      &ident, &ctx);
+		if (!HAS_MULTI_BITS(log->groups) ||
+		    !strset_check_and_add(&dups, ident.buf))
+			insert_one_record(log, ident.buf, oneline_str);
+	}
+	if (log->groups & SHORTLOG_GROUP_TRAILER) {
+		insert_records_from_trailers(log, &dups, commit, &ctx, oneline_str);
+	}
 
-	strbuf_release(&author);
+	strset_clear(&dups);
+	strbuf_release(&ident);
 	strbuf_release(&oneline);
 }
 
@@ -241,6 +369,28 @@
 	return 0;
 }
 
+static int parse_group_option(const struct option *opt, const char *arg, int unset)
+{
+	struct shortlog *log = opt->value;
+	const char *field;
+
+	if (unset) {
+		log->groups = 0;
+		string_list_clear(&log->trailers, 0);
+	} else if (!strcasecmp(arg, "author"))
+		log->groups |= SHORTLOG_GROUP_AUTHOR;
+	else if (!strcasecmp(arg, "committer"))
+		log->groups |= SHORTLOG_GROUP_COMMITTER;
+	else if (skip_prefix(arg, "trailer:", &field)) {
+		log->groups |= SHORTLOG_GROUP_TRAILER;
+		string_list_append(&log->trailers, field);
+	} else
+		return error(_("unknown group type: %s"), arg);
+
+	return 0;
+}
+
+
 void shortlog_init(struct shortlog *log)
 {
 	memset(log, 0, sizeof(*log));
@@ -251,6 +401,8 @@
 	log->wrap = DEFAULT_WRAPLEN;
 	log->in1 = DEFAULT_INDENT1;
 	log->in2 = DEFAULT_INDENT2;
+	log->trailers.strdup_strings = 1;
+	log->trailers.cmp = strcasecmp;
 }
 
 int cmd_shortlog(int argc, const char **argv, const char *prefix)
@@ -260,8 +412,9 @@
 	int nongit = !startup_info->have_repository;
 
 	const struct option options[] = {
-		OPT_BOOL('c', "committer", &log.committer,
-			 N_("Group by committer rather than author")),
+		OPT_BIT('c', "committer", &log.groups,
+			N_("Group by committer rather than author"),
+			SHORTLOG_GROUP_COMMITTER),
 		OPT_BOOL('n', "numbered", &log.sort_by_number,
 			 N_("sort output according to the number of commits per author")),
 		OPT_BOOL('s', "summary", &log.summary,
@@ -271,6 +424,8 @@
 		OPT_CALLBACK_F('w', NULL, &log, N_("<w>[,<i1>[,<i2>]]"),
 			N_("Linewrap output"), PARSE_OPT_OPTARG,
 			&parse_wrap_args),
+		OPT_CALLBACK(0, "group", &log, N_("field"),
+			N_("Group by field"), parse_group_option),
 		OPT_END(),
 	};
 
@@ -311,6 +466,10 @@
 	log.abbrev = rev.abbrev;
 	log.file = rev.diffopt.file;
 
+	if (!log.groups)
+		log.groups = SHORTLOG_GROUP_AUTHOR;
+	string_list_sort(&log.trailers);
+
 	/* assume HEAD if from a tty */
 	if (!nongit && !rev.pending.nr && isatty(0))
 		add_head_to_pending(&rev);
diff --git a/shortlog.h b/shortlog.h
index 2fa61c4..64be879 100644
--- a/shortlog.h
+++ b/shortlog.h
@@ -15,7 +15,13 @@
 	int in2;
 	int user_format;
 	int abbrev;
-	int committer;
+
+	enum {
+		SHORTLOG_GROUP_AUTHOR = (1 << 0),
+		SHORTLOG_GROUP_COMMITTER = (1 << 1),
+		SHORTLOG_GROUP_TRAILER = (1 << 2),
+	} groups;
+	struct string_list trailers;
 
 	char *common_repo_prefix;
 	int email;
diff --git a/t/t4201-shortlog.sh b/t/t4201-shortlog.sh
index d3a7ce6..3d5c4a2 100755
--- a/t/t4201-shortlog.sh
+++ b/t/t4201-shortlog.sh
@@ -215,4 +215,145 @@
 	test_cmp expect actual
 '
 
+test_expect_success '--group=committer is the same as --committer' '
+	git shortlog -ns --group=committer HEAD >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'shortlog --group=trailer:signed-off-by' '
+	git commit --allow-empty -m foo -s &&
+	GIT_COMMITTER_NAME="SOB One" \
+	GIT_COMMITTER_EMAIL=sob@example.com \
+		git commit --allow-empty -m foo -s &&
+	git commit --allow-empty --amend --no-edit -s &&
+	cat >expect <<-\EOF &&
+	     2	C O Mitter <committer@example.com>
+	     1	SOB One <sob@example.com>
+	EOF
+	git shortlog -nse --group=trailer:signed-off-by HEAD >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'trailer idents are split' '
+	cat >expect <<-\EOF &&
+	     2	C O Mitter
+	     1	SOB One
+	EOF
+	git shortlog -ns --group=trailer:signed-off-by HEAD >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'trailer idents are mailmapped' '
+	cat >expect <<-\EOF &&
+	     2	C O Mitter
+	     1	Another Name
+	EOF
+	echo "Another Name <sob@example.com>" >mail.map &&
+	git -c mailmap.file=mail.map shortlog -ns \
+		--group=trailer:signed-off-by HEAD >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'shortlog de-duplicates trailers in a single commit' '
+	git commit --allow-empty -F - <<-\EOF &&
+	subject one
+
+	this message has two distinct values, plus a repeat
+
+	Repeated-trailer: Foo
+	Repeated-trailer: Bar
+	Repeated-trailer: Foo
+	EOF
+
+	git commit --allow-empty -F - <<-\EOF &&
+	subject two
+
+	similar to the previous, but without the second distinct value
+
+	Repeated-trailer: Foo
+	Repeated-trailer: Foo
+	EOF
+
+	cat >expect <<-\EOF &&
+	     2	Foo
+	     1	Bar
+	EOF
+	git shortlog -ns --group=trailer:repeated-trailer -2 HEAD >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'shortlog can match multiple groups' '
+	git commit --allow-empty -F - <<-\EOF &&
+	subject one
+
+	this has two trailers that are distinct from the author; it will count
+	3 times in the output
+
+	Some-trailer: User A <a@example.com>
+	Another-trailer: User B <b@example.com>
+	EOF
+
+	git commit --allow-empty -F - <<-\EOF &&
+	subject two
+
+	this one has two trailers, one of which is a duplicate with the author;
+	it will only be counted once for them
+
+	Another-trailer: A U Thor <author@example.com>
+	Some-trailer: User B <b@example.com>
+	EOF
+
+	cat >expect <<-\EOF &&
+	     2	A U Thor
+	     2	User B
+	     1	User A
+	EOF
+	git shortlog -ns \
+		--group=author \
+		--group=trailer:some-trailer \
+		--group=trailer:another-trailer \
+		-2 HEAD >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'set up option selection tests' '
+	git commit --allow-empty -F - <<-\EOF
+	subject
+
+	body
+
+	Trailer-one: value-one
+	Trailer-two: value-two
+	EOF
+'
+
+test_expect_success '--no-group resets group list to author' '
+	cat >expect <<-\EOF &&
+	     1	A U Thor
+	EOF
+	git shortlog -ns \
+		--group=committer \
+		--group=trailer:trailer-one \
+		--no-group \
+		-1 HEAD >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success '--no-group resets trailer list' '
+	cat >expect <<-\EOF &&
+	     1	value-two
+	EOF
+	git shortlog -ns \
+		--group=trailer:trailer-one \
+		--no-group \
+		--group=trailer:trailer-two \
+		-1 HEAD >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'stdin with multiple groups reports error' '
+	git log >log &&
+	test_must_fail git shortlog --group=author --group=committer <log
+'
+
 test_done
diff --git a/trailer.c b/trailer.c
index 68dabc2..3f7391d 100644
--- a/trailer.c
+++ b/trailer.c
@@ -1183,3 +1183,39 @@
 	format_trailer_info(out, &info, opts);
 	trailer_info_release(&info);
 }
+
+void trailer_iterator_init(struct trailer_iterator *iter, const char *msg)
+{
+	struct process_trailer_options opts = PROCESS_TRAILER_OPTIONS_INIT;
+	strbuf_init(&iter->key, 0);
+	strbuf_init(&iter->val, 0);
+	opts.no_divider = 1;
+	trailer_info_get(&iter->info, msg, &opts);
+	iter->cur = 0;
+}
+
+int trailer_iterator_advance(struct trailer_iterator *iter)
+{
+	while (iter->cur < iter->info.trailer_nr) {
+		char *trailer = iter->info.trailers[iter->cur++];
+		int separator_pos = find_separator(trailer, separators);
+
+		if (separator_pos < 1)
+			continue; /* not a real trailer */
+
+		strbuf_reset(&iter->key);
+		strbuf_reset(&iter->val);
+		parse_trailer(&iter->key, &iter->val, NULL,
+			      trailer, separator_pos);
+		unfold_value(&iter->val);
+		return 1;
+	}
+	return 0;
+}
+
+void trailer_iterator_release(struct trailer_iterator *iter)
+{
+	trailer_info_release(&iter->info);
+	strbuf_release(&iter->val);
+	strbuf_release(&iter->key);
+}
diff --git a/trailer.h b/trailer.h
index 203acf4..cd93e7d 100644
--- a/trailer.h
+++ b/trailer.h
@@ -2,8 +2,7 @@
 #define TRAILER_H
 
 #include "list.h"
-
-struct strbuf;
+#include "strbuf.h"
 
 enum trailer_where {
 	WHERE_DEFAULT,
@@ -103,4 +102,46 @@
 void format_trailers_from_commit(struct strbuf *out, const char *msg,
 				 const struct process_trailer_options *opts);
 
+/*
+ * An interface for iterating over the trailers found in a particular commit
+ * message. Use like:
+ *
+ *   struct trailer_iterator iter;
+ *   trailer_iterator_init(&iter, msg);
+ *   while (trailer_iterator_advance(&iter))
+ *      ... do something with iter.key and iter.val ...
+ *   trailer_iterator_release(&iter);
+ */
+struct trailer_iterator {
+	struct strbuf key;
+	struct strbuf val;
+
+	/* private */
+	struct trailer_info info;
+	size_t cur;
+};
+
+/*
+ * Initialize "iter" in preparation for walking over the trailers in the commit
+ * message "msg". The "msg" pointer must remain valid until the iterator is
+ * released.
+ *
+ * After initializing, note that key/val will not yet point to any trailer.
+ * Call advance() to parse the first one (if any).
+ */
+void trailer_iterator_init(struct trailer_iterator *iter, const char *msg);
+
+/*
+ * Advance to the next trailer of the iterator. Returns 0 if there is no such
+ * trailer, and 1 otherwise. The key and value of the trailer can be
+ * fetched from the iter->key and iter->value fields (which are valid
+ * only until the next advance).
+ */
+int trailer_iterator_advance(struct trailer_iterator *iter);
+
+/*
+ * Release all resources associated with the trailer iteration.
+ */
+void trailer_iterator_release(struct trailer_iterator *iter);
+
 #endif /* TRAILER_H */