convert: permit passing additional metadata to filter processes

There are a variety of situations where a filter process can make use of
some additional metadata.  For example, some people find the ident
filter too limiting and would like to include the commit or the branch
in their smudged files.  This information isn't available during
checkout as HEAD hasn't been updated at that point, and it wouldn't be
available in archives either.

Let's add a way to pass this metadata down to the filter.  We pass the
blob we're operating on, the treeish (preferring the commit over the
tree if one exists), and the ref we're operating on.  Note that we won't
pass this information in all cases, such as when renormalizing or when
we're performing diffs, since it doesn't make sense in those cases.

The data we currently get from the filter process looks like the
following:

  command=smudge
  pathname=git.c
  0000

With this change, we'll get data more like this:

  command=smudge
  pathname=git.c
  refname=refs/tags/v2.25.1
  treeish=c522f061d551c9bb8684a7c3859b2ece4499b56b
  blob=7be7ad34bd053884ec48923706e70c81719a8660
  0000

There are a couple things to note about this approach.  For operations
like checkout, treeish will always be a commit, since we cannot check
out individual trees, but for other operations, like archive, we can end
up operating on only a particular tree, so we'll provide only a tree as
the treeish.  Similar comments apply for refname, since there are a
variety of cases in which we won't have a ref.

This commit wires up the code to print this information, but doesn't
pass any of it at this point.  In a future commit, we'll have various
code paths pass the actual useful data down.

Signed-off-by: brian m. carlson <bk2204@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
diff --git a/apply.c b/apply.c
index bdc008f..144c19a 100644
--- a/apply.c
+++ b/apply.c
@@ -4349,7 +4349,7 @@
 	if (fd < 0)
 		return 1;
 
-	if (convert_to_working_tree(state->repo->index, path, buf, size, &nbuf)) {
+	if (convert_to_working_tree(state->repo->index, path, buf, size, &nbuf, NULL)) {
 		size = nbuf.len;
 		buf  = nbuf.buf;
 	}
diff --git a/archive.c b/archive.c
index a8da0fc..d9e92cc 100644
--- a/archive.c
+++ b/archive.c
@@ -85,7 +85,7 @@
 		size_t size = 0;
 
 		strbuf_attach(&buf, buffer, *sizep, *sizep + 1);
-		convert_to_working_tree(args->repo->index, path, buf.buf, buf.len, &buf);
+		convert_to_working_tree(args->repo->index, path, buf.buf, buf.len, &buf, NULL);
 		if (commit)
 			format_subst(commit, buf.buf, buf.len, &buf);
 		buffer = strbuf_detach(&buf, &size);
diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index 272f9fc..545fed4 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -42,7 +42,7 @@
 			     oid_to_hex(oid), path);
 	if ((type == OBJ_BLOB) && S_ISREG(mode)) {
 		struct strbuf strbuf = STRBUF_INIT;
-		if (convert_to_working_tree(&the_index, path, *buf, *size, &strbuf)) {
+		if (convert_to_working_tree(&the_index, path, *buf, *size, &strbuf, NULL)) {
 			free(*buf);
 			*size = strbuf.len;
 			*buf = strbuf_detach(&strbuf, NULL);
diff --git a/convert.c b/convert.c
index 5ead3ce..6261921 100644
--- a/convert.c
+++ b/convert.c
@@ -797,6 +797,7 @@
 static int apply_multi_file_filter(const char *path, const char *src, size_t len,
 				   int fd, struct strbuf *dst, const char *cmd,
 				   const unsigned int wanted_capability,
+				   const struct checkout_metadata *meta,
 				   struct delayed_checkout *dco)
 {
 	int err;
@@ -855,6 +856,24 @@
 	if (err)
 		goto done;
 
+	if (meta && meta->refname) {
+		err = packet_write_fmt_gently(process->in, "ref=%s\n", meta->refname);
+		if (err)
+			goto done;
+	}
+
+	if (meta && !is_null_oid(&meta->treeish)) {
+		err = packet_write_fmt_gently(process->in, "treeish=%s\n", oid_to_hex(&meta->treeish));
+		if (err)
+			goto done;
+	}
+
+	if (meta && !is_null_oid(&meta->blob)) {
+		err = packet_write_fmt_gently(process->in, "blob=%s\n", oid_to_hex(&meta->blob));
+		if (err)
+			goto done;
+	}
+
 	if ((entry->supported_capabilities & CAP_DELAY) &&
 	    dco && dco->state == CE_CAN_DELAY) {
 		can_delay = 1;
@@ -971,6 +990,7 @@
 static int apply_filter(const char *path, const char *src, size_t len,
 			int fd, struct strbuf *dst, struct convert_driver *drv,
 			const unsigned int wanted_capability,
+			const struct checkout_metadata *meta,
 			struct delayed_checkout *dco)
 {
 	const char *cmd = NULL;
@@ -990,7 +1010,7 @@
 		return apply_single_file_filter(path, src, len, fd, dst, cmd);
 	else if (drv->process && *drv->process)
 		return apply_multi_file_filter(path, src, len, fd, dst,
-			drv->process, wanted_capability, dco);
+			drv->process, wanted_capability, meta, dco);
 
 	return 0;
 }
@@ -1368,7 +1388,7 @@
 	if (!ca.drv->required)
 		return 0;
 
-	return apply_filter(path, NULL, 0, -1, NULL, ca.drv, CAP_CLEAN, NULL);
+	return apply_filter(path, NULL, 0, -1, NULL, ca.drv, CAP_CLEAN, NULL, NULL);
 }
 
 const char *get_convert_attr_ascii(const struct index_state *istate, const char *path)
@@ -1406,7 +1426,7 @@
 
 	convert_attrs(istate, &ca, path);
 
-	ret |= apply_filter(path, src, len, -1, dst, ca.drv, CAP_CLEAN, NULL);
+	ret |= apply_filter(path, src, len, -1, dst, ca.drv, CAP_CLEAN, NULL, NULL);
 	if (!ret && ca.drv && ca.drv->required)
 		die(_("%s: clean filter '%s' failed"), path, ca.drv->name);
 
@@ -1441,7 +1461,7 @@
 	assert(ca.drv);
 	assert(ca.drv->clean || ca.drv->process);
 
-	if (!apply_filter(path, NULL, 0, fd, dst, ca.drv, CAP_CLEAN, NULL))
+	if (!apply_filter(path, NULL, 0, fd, dst, ca.drv, CAP_CLEAN, NULL, NULL))
 		die(_("%s: clean filter '%s' failed"), path, ca.drv->name);
 
 	encode_to_git(path, dst->buf, dst->len, dst, ca.working_tree_encoding, conv_flags);
@@ -1452,7 +1472,9 @@
 static int convert_to_working_tree_internal(const struct index_state *istate,
 					    const char *path, const char *src,
 					    size_t len, struct strbuf *dst,
-					    int normalizing, struct delayed_checkout *dco)
+					    int normalizing,
+					    const struct checkout_metadata *meta,
+					    struct delayed_checkout *dco)
 {
 	int ret = 0, ret_filter = 0;
 	struct conv_attrs ca;
@@ -1484,7 +1506,7 @@
 	}
 
 	ret_filter = apply_filter(
-		path, src, len, -1, dst, ca.drv, CAP_SMUDGE, dco);
+		path, src, len, -1, dst, ca.drv, CAP_SMUDGE, meta, dco);
 	if (!ret_filter && ca.drv && ca.drv->required)
 		die(_("%s: smudge filter %s failed"), path, ca.drv->name);
 
@@ -1494,22 +1516,24 @@
 int async_convert_to_working_tree(const struct index_state *istate,
 				  const char *path, const char *src,
 				  size_t len, struct strbuf *dst,
+				  const struct checkout_metadata *meta,
 				  void *dco)
 {
-	return convert_to_working_tree_internal(istate, path, src, len, dst, 0, dco);
+	return convert_to_working_tree_internal(istate, path, src, len, dst, 0, meta, dco);
 }
 
 int convert_to_working_tree(const struct index_state *istate,
 			    const char *path, const char *src,
-			    size_t len, struct strbuf *dst)
+			    size_t len, struct strbuf *dst,
+			    const struct checkout_metadata *meta)
 {
-	return convert_to_working_tree_internal(istate, path, src, len, dst, 0, NULL);
+	return convert_to_working_tree_internal(istate, path, src, len, dst, 0, meta, NULL);
 }
 
 int renormalize_buffer(const struct index_state *istate, const char *path,
 		       const char *src, size_t len, struct strbuf *dst)
 {
-	int ret = convert_to_working_tree_internal(istate, path, src, len, dst, 1, NULL);
+	int ret = convert_to_working_tree_internal(istate, path, src, len, dst, 1, NULL, NULL);
 	if (ret) {
 		src = dst->buf;
 		len = dst->len;
diff --git a/convert.h b/convert.h
index 3710969..894e01c 100644
--- a/convert.h
+++ b/convert.h
@@ -4,10 +4,10 @@
 #ifndef CONVERT_H
 #define CONVERT_H
 
+#include "hash.h"
 #include "string-list.h"
 
 struct index_state;
-struct object_id;
 struct strbuf;
 
 #define CONV_EOL_RNDTRP_DIE   (1<<0) /* Die if CRLF to LF to CRLF is different */
@@ -57,6 +57,12 @@
 	struct string_list paths;
 };
 
+struct checkout_metadata {
+	const char *refname;
+	struct object_id treeish;
+	struct object_id blob;
+};
+
 extern enum eol core_eol;
 extern char *check_roundtrip_encoding;
 const char *get_cached_convert_stats_ascii(const struct index_state *istate,
@@ -71,10 +77,12 @@
 		   struct strbuf *dst, int conv_flags);
 int convert_to_working_tree(const struct index_state *istate,
 			    const char *path, const char *src,
-			    size_t len, struct strbuf *dst);
+			    size_t len, struct strbuf *dst,
+			    const struct checkout_metadata *meta);
 int async_convert_to_working_tree(const struct index_state *istate,
 				  const char *path, const char *src,
 				  size_t len, struct strbuf *dst,
+				  const struct checkout_metadata *meta,
 				  void *dco);
 int async_query_available_blobs(const char *cmd,
 				struct string_list *available_paths);
diff --git a/diff.c b/diff.c
index f2cfbf2..12761c8 100644
--- a/diff.c
+++ b/diff.c
@@ -4071,7 +4071,7 @@
 	if (!temp->tempfile)
 		die_errno("unable to create temp-file");
 	if (convert_to_working_tree(istate, path,
-			(const char *)blob, (size_t)size, &buf)) {
+			(const char *)blob, (size_t)size, &buf, NULL)) {
 		blob = buf.buf;
 		size = buf.len;
 	}
diff --git a/entry.c b/entry.c
index 53380bb..4b2d9b2 100644
--- a/entry.c
+++ b/entry.c
@@ -315,13 +315,13 @@
 		 */
 		if (dco && dco->state != CE_NO_DELAY) {
 			ret = async_convert_to_working_tree(state->istate, ce->name, new_blob,
-							    size, &buf, dco);
+							    size, &buf, NULL, dco);
 			if (ret && string_list_has_string(&dco->paths, ce->name)) {
 				free(new_blob);
 				goto delayed;
 			}
 		} else
-			ret = convert_to_working_tree(state->istate, ce->name, new_blob, size, &buf);
+			ret = convert_to_working_tree(state->istate, ce->name, new_blob, size, &buf, NULL);
 
 		if (ret) {
 			free(new_blob);
diff --git a/merge-recursive.c b/merge-recursive.c
index 7a4e6f2..d92e2ac 100644
--- a/merge-recursive.c
+++ b/merge-recursive.c
@@ -958,7 +958,7 @@
 		if (S_ISREG(contents->mode)) {
 			struct strbuf strbuf = STRBUF_INIT;
 			if (convert_to_working_tree(opt->repo->index,
-						    path, buf, size, &strbuf)) {
+						    path, buf, size, &strbuf, NULL)) {
 				free(buf);
 				size = strbuf.len;
 				buf = strbuf_detach(&strbuf, NULL);