Merge branch 'jk/tree-walk-overflow'

Codepaths to walk tree objects have been audited for integer
overflows and hardened.

* jk/tree-walk-overflow:
  tree-walk: harden make_traverse_path() length computations
  tree-walk: add a strbuf wrapper for make_traverse_path()
  tree-walk: accept a raw length for traverse_path_len()
  tree-walk: use size_t consistently
  tree-walk: drop oid from traverse_info
  setup_traverse_info(): stop copying oid
diff --git a/Documentation/technical/api-tree-walking.txt b/Documentation/technical/api-tree-walking.txt
index bde1862..7962e32 100644
--- a/Documentation/technical/api-tree-walking.txt
+++ b/Documentation/technical/api-tree-walking.txt
@@ -62,9 +62,7 @@
 `setup_traverse_info`::
 
 	Initialize a `traverse_info` given the pathname of the tree to start
-	traversing from. The `base` argument is assumed to be the `path`
-	member of the `name_entry` being recursed into unless the tree is a
-	top-level tree in which case the empty string ("") is used.
+	traversing from.
 
 Walking
 -------
@@ -140,6 +138,10 @@
 	This utilizes the memory structure of a tree entry to avoid the
 	overhead of using a generic strlen().
 
+`strbuf_make_traverse_path`::
+
+	Convenience wrapper to `make_traverse_path` into a strbuf.
+
 Authors
 -------
 
diff --git a/builtin/merge-tree.c b/builtin/merge-tree.c
index 97b54ca..e72714a 100644
--- a/builtin/merge-tree.c
+++ b/builtin/merge-tree.c
@@ -180,8 +180,9 @@
 
 static char *traverse_path(const struct traverse_info *info, const struct name_entry *n)
 {
-	char *path = xmallocz(traverse_path_len(info, n) + the_hash_algo->rawsz);
-	return make_traverse_path(path, info, n);
+	struct strbuf buf = STRBUF_INIT;
+	strbuf_make_traverse_path(&buf, info, n->path, n->pathlen);
+	return strbuf_detach(&buf, NULL);
 }
 
 static void resolve(const struct traverse_info *info, struct name_entry *ours, struct name_entry *result)
diff --git a/cache-tree.c b/cache-tree.c
index 706ffcf..c22161f 100644
--- a/cache-tree.c
+++ b/cache-tree.c
@@ -713,7 +713,7 @@
 	if (!info->prev)
 		return root;
 	our_parent = find_cache_tree_from_traversal(root, info->prev);
-	return cache_tree_find(our_parent, info->name.path);
+	return cache_tree_find(our_parent, info->name);
 }
 
 int cache_tree_matches_traversal(struct cache_tree *root,
diff --git a/tree-walk.c b/tree-walk.c
index c20b62f..bea819d 100644
--- a/tree-walk.c
+++ b/tree-walk.c
@@ -170,40 +170,61 @@
 
 void setup_traverse_info(struct traverse_info *info, const char *base)
 {
-	int pathlen = strlen(base);
+	size_t pathlen = strlen(base);
 	static struct traverse_info dummy;
 
 	memset(info, 0, sizeof(*info));
 	if (pathlen && base[pathlen-1] == '/')
 		pathlen--;
 	info->pathlen = pathlen ? pathlen + 1 : 0;
-	info->name.path = base;
-	info->name.pathlen = pathlen;
-	if (pathlen) {
-		hashcpy(info->name.oid.hash, (const unsigned char *)base + pathlen + 1);
+	info->name = base;
+	info->namelen = pathlen;
+	if (pathlen)
 		info->prev = &dummy;
-	}
 }
 
-char *make_traverse_path(char *path, const struct traverse_info *info, const struct name_entry *n)
+char *make_traverse_path(char *path, size_t pathlen,
+			 const struct traverse_info *info,
+			 const char *name, size_t namelen)
 {
-	int len = tree_entry_len(n);
-	int pathlen = info->pathlen;
+	/* Always points to the end of the name we're about to add */
+	size_t pos = st_add(info->pathlen, namelen);
 
-	path[pathlen + len] = 0;
+	if (pos >= pathlen)
+		BUG("too small buffer passed to make_traverse_path");
+
+	path[pos] = 0;
 	for (;;) {
-		memcpy(path + pathlen, n->path, len);
-		if (!pathlen)
+		if (pos < namelen)
+			BUG("traverse_info pathlen does not match strings");
+		pos -= namelen;
+		memcpy(path + pos, name, namelen);
+
+		if (!pos)
 			break;
-		path[--pathlen] = '/';
-		n = &info->name;
-		len = tree_entry_len(n);
+		path[--pos] = '/';
+
+		if (!info)
+			BUG("traverse_info ran out of list items");
+		name = info->name;
+		namelen = info->namelen;
 		info = info->prev;
-		pathlen -= len;
 	}
 	return path;
 }
 
+void strbuf_make_traverse_path(struct strbuf *out,
+			       const struct traverse_info *info,
+			       const char *name, size_t namelen)
+{
+	size_t len = traverse_path_len(info, namelen);
+
+	strbuf_grow(out, len);
+	make_traverse_path(out->buf + out->len, out->alloc - out->len,
+			   info, name, namelen);
+	strbuf_setlen(out, out->len + len);
+}
+
 struct tree_desc_skip {
 	struct tree_desc_skip *prev;
 	const void *ptr;
@@ -400,13 +421,12 @@
 		tx[i].d = t[i];
 
 	if (info->prev) {
-		strbuf_grow(&base, info->pathlen);
-		make_traverse_path(base.buf, info->prev, &info->name);
-		base.buf[info->pathlen-1] = '/';
-		strbuf_setlen(&base, info->pathlen);
-		traverse_path = xstrndup(base.buf, info->pathlen);
+		strbuf_make_traverse_path(&base, info->prev,
+					  info->name, info->namelen);
+		strbuf_addch(&base, '/');
+		traverse_path = xstrndup(base.buf, base.len);
 	} else {
-		traverse_path = xstrndup(info->name.path, info->pathlen);
+		traverse_path = xstrndup(info->name, info->pathlen);
 	}
 	info->traverse_path = traverse_path;
 	for (;;) {
diff --git a/tree-walk.h b/tree-walk.h
index 2a5db29..abe2caf 100644
--- a/tree-walk.h
+++ b/tree-walk.h
@@ -58,8 +58,11 @@
 struct traverse_info {
 	const char *traverse_path;
 	struct traverse_info *prev;
-	struct name_entry name;
-	int pathlen;
+	const char *name;
+	size_t namelen;
+	unsigned mode;
+
+	size_t pathlen;
 	struct pathspec *pathspec;
 
 	unsigned long df_conflicts;
@@ -69,12 +72,17 @@
 };
 
 int get_tree_entry(struct repository *, const struct object_id *, const char *, struct object_id *, unsigned short *);
-char *make_traverse_path(char *path, const struct traverse_info *info, const struct name_entry *n);
+char *make_traverse_path(char *path, size_t pathlen, const struct traverse_info *info,
+			 const char *name, size_t namelen);
+void strbuf_make_traverse_path(struct strbuf *out,
+			       const struct traverse_info *info,
+			       const char *name, size_t namelen);
 void setup_traverse_info(struct traverse_info *info, const char *base);
 
-static inline int traverse_path_len(const struct traverse_info *info, const struct name_entry *n)
+static inline size_t traverse_path_len(const struct traverse_info *info,
+				       size_t namelen)
 {
-	return info->pathlen + tree_entry_len(n);
+	return st_add(info->pathlen, namelen);
 }
 
 /* in general, positive means "kind of interesting" */
diff --git a/unpack-trees.c b/unpack-trees.c
index 62276d4..50f257b 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -632,7 +632,7 @@
 	return ret;
 }
 
-static int find_cache_pos(struct traverse_info *, const struct name_entry *);
+static int find_cache_pos(struct traverse_info *, const char *p, size_t len);
 
 static void restore_cache_bottom(struct traverse_info *info, int bottom)
 {
@@ -651,7 +651,7 @@
 	if (o->diff_index_cached)
 		return 0;
 	ret = o->cache_bottom;
-	pos = find_cache_pos(info->prev, &info->name);
+	pos = find_cache_pos(info->prev, info->name, info->namelen);
 
 	if (pos < -1)
 		o->cache_bottom = -2 - pos;
@@ -686,21 +686,19 @@
 				      struct traverse_info *info)
 {
 	struct unpack_trees_options *o = info->data;
-	int len = traverse_path_len(info, names);
-	char *name = xmalloc(len + 1 /* slash */ + 1 /* NUL */);
+	struct strbuf name = STRBUF_INIT;
 	int pos;
 
-	make_traverse_path(name, info, names);
-	name[len++] = '/';
-	name[len] = '\0';
-	pos = index_name_pos(o->src_index, name, len);
+	strbuf_make_traverse_path(&name, info, names->path, names->pathlen);
+	strbuf_addch(&name, '/');
+	pos = index_name_pos(o->src_index, name.buf, name.len);
 	if (pos >= 0)
 		BUG("This is a directory and should not exist in index");
 	pos = -pos - 1;
-	if (!starts_with(o->src_index->cache[pos]->name, name) ||
-	    (pos > 0 && starts_with(o->src_index->cache[pos-1]->name, name)))
+	if (!starts_with(o->src_index->cache[pos]->name, name.buf) ||
+	    (pos > 0 && starts_with(o->src_index->cache[pos-1]->name, name.buf)))
 		BUG("pos must point at the first entry in this directory");
-	free(name);
+	strbuf_release(&name);
 	return pos;
 }
 
@@ -811,8 +809,10 @@
 	newinfo = *info;
 	newinfo.prev = info;
 	newinfo.pathspec = info->pathspec;
-	newinfo.name = *p;
-	newinfo.pathlen += tree_entry_len(p) + 1;
+	newinfo.name = p->path;
+	newinfo.namelen = p->pathlen;
+	newinfo.mode = p->mode;
+	newinfo.pathlen = st_add3(newinfo.pathlen, tree_entry_len(p), 1);
 	newinfo.df_conflicts |= df_conflicts;
 
 	/*
@@ -863,14 +863,18 @@
  * itself - the caller needs to do the final check for the cache
  * entry having more data at the end!
  */
-static int do_compare_entry_piecewise(const struct cache_entry *ce, const struct traverse_info *info, const struct name_entry *n)
+static int do_compare_entry_piecewise(const struct cache_entry *ce,
+				      const struct traverse_info *info,
+				      const char *name, size_t namelen,
+				      unsigned mode)
 {
-	int len, pathlen, ce_len;
+	int pathlen, ce_len;
 	const char *ce_name;
 
 	if (info->prev) {
 		int cmp = do_compare_entry_piecewise(ce, info->prev,
-						     &info->name);
+						     info->name, info->namelen,
+						     info->mode);
 		if (cmp)
 			return cmp;
 	}
@@ -884,15 +888,15 @@
 	ce_len -= pathlen;
 	ce_name = ce->name + pathlen;
 
-	len = tree_entry_len(n);
-	return df_name_compare(ce_name, ce_len, S_IFREG, n->path, len, n->mode);
+	return df_name_compare(ce_name, ce_len, S_IFREG, name, namelen, mode);
 }
 
 static int do_compare_entry(const struct cache_entry *ce,
 			    const struct traverse_info *info,
-			    const struct name_entry *n)
+			    const char *name, size_t namelen,
+			    unsigned mode)
 {
-	int len, pathlen, ce_len;
+	int pathlen, ce_len;
 	const char *ce_name;
 	int cmp;
 
@@ -902,7 +906,7 @@
 	 * it is quicker to use the precomputed version.
 	 */
 	if (!info->traverse_path)
-		return do_compare_entry_piecewise(ce, info, n);
+		return do_compare_entry_piecewise(ce, info, name, namelen, mode);
 
 	cmp = strncmp(ce->name, info->traverse_path, info->pathlen);
 	if (cmp)
@@ -917,13 +921,12 @@
 	ce_len -= pathlen;
 	ce_name = ce->name + pathlen;
 
-	len = tree_entry_len(n);
-	return df_name_compare(ce_name, ce_len, S_IFREG, n->path, len, n->mode);
+	return df_name_compare(ce_name, ce_len, S_IFREG, name, namelen, mode);
 }
 
 static int compare_entry(const struct cache_entry *ce, const struct traverse_info *info, const struct name_entry *n)
 {
-	int cmp = do_compare_entry(ce, info, n);
+	int cmp = do_compare_entry(ce, info, n->path, n->pathlen, n->mode);
 	if (cmp)
 		return cmp;
 
@@ -931,7 +934,7 @@
 	 * Even if the beginning compared identically, the ce should
 	 * compare as bigger than a directory leading up to it!
 	 */
-	return ce_namelen(ce) > traverse_path_len(info, n);
+	return ce_namelen(ce) > traverse_path_len(info, tree_entry_len(n));
 }
 
 static int ce_in_traverse_path(const struct cache_entry *ce,
@@ -939,7 +942,8 @@
 {
 	if (!info->prev)
 		return 1;
-	if (do_compare_entry(ce, info->prev, &info->name))
+	if (do_compare_entry(ce, info->prev,
+			     info->name, info->namelen, info->mode))
 		return 0;
 	/*
 	 * If ce (blob) is the same name as the path (which is a tree
@@ -954,7 +958,7 @@
 	struct index_state *istate,
 	int is_transient)
 {
-	int len = traverse_path_len(info, n);
+	size_t len = traverse_path_len(info, tree_entry_len(n));
 	struct cache_entry *ce =
 		is_transient ?
 		make_empty_transient_cache_entry(len) :
@@ -964,7 +968,8 @@
 	ce->ce_flags = create_ce_flags(stage);
 	ce->ce_namelen = len;
 	oidcpy(&ce->oid, &n->oid);
-	make_traverse_path(ce->name, info, n);
+	/* len+1 because the cache_entry allocates space for NUL */
+	make_traverse_path(ce->name, len + 1, info, n->path, n->pathlen);
 
 	return ce;
 }
@@ -1057,13 +1062,12 @@
  * the directory.
  */
 static int find_cache_pos(struct traverse_info *info,
-			  const struct name_entry *p)
+			  const char *p, size_t p_len)
 {
 	int pos;
 	struct unpack_trees_options *o = info->data;
 	struct index_state *index = o->src_index;
 	int pfxlen = info->pathlen;
-	int p_len = tree_entry_len(p);
 
 	for (pos = o->cache_bottom; pos < index->cache_nr; pos++) {
 		const struct cache_entry *ce = index->cache[pos];
@@ -1099,7 +1103,7 @@
 			ce_len = ce_slash - ce_name;
 		else
 			ce_len = ce_namelen(ce) - pfxlen;
-		cmp = name_compare(p->path, p_len, ce_name, ce_len);
+		cmp = name_compare(p, p_len, ce_name, ce_len);
 		/*
 		 * Exact match; if we have a directory we need to
 		 * delay returning it.
@@ -1114,7 +1118,7 @@
 		 * E.g.  ce_name == "t-i", and p->path == "t"; we may
 		 * have "t/a" in the index.
 		 */
-		if (p_len < ce_len && !memcmp(ce_name, p->path, p_len) &&
+		if (p_len < ce_len && !memcmp(ce_name, p, p_len) &&
 		    ce_name[p_len] < '/')
 			continue; /* keep looking */
 		break;
@@ -1125,7 +1129,7 @@
 static struct cache_entry *find_cache_entry(struct traverse_info *info,
 					    const struct name_entry *p)
 {
-	int pos = find_cache_pos(info, p);
+	int pos = find_cache_pos(info, p->path, p->pathlen);
 	struct unpack_trees_options *o = info->data;
 
 	if (0 <= pos)
@@ -1138,10 +1142,10 @@
 {
 	if (info->prev) {
 		debug_path(info->prev);
-		if (*info->prev->name.path)
+		if (*info->prev->name)
 			putchar('/');
 	}
-	printf("%s", info->name.path);
+	printf("%s", info->name);
 }
 
 static void debug_name_entry(int i, struct name_entry *n)