Merge branch 'rs/archive-add-file'

"git archive" learns the "--add-file" option to include untracked
files into a snapshot from a tree-ish.

* rs/archive-add-file:
  Makefile: use git-archive --add-file
  archive: add --add-file
  archive: read short blobs in archive.c::write_archive_entry()
diff --git a/Documentation/git-archive.txt b/Documentation/git-archive.txt
index cfa1e4e..9f81728 100644
--- a/Documentation/git-archive.txt
+++ b/Documentation/git-archive.txt
@@ -55,6 +55,12 @@
 --output=<file>::
 	Write the archive to <file> instead of stdout.
 
+--add-file=<file>::
+	Add a non-tracked file to the archive.  Can be repeated to add
+	multiple files.  The path of the file in the archive is built
+	by concatenating the value for `--prefix` (if any) and the
+	basename of <file>.
+
 --worktree-attributes::
 	Look for attributes in .gitattributes files in the working tree
 	as well (see <<ATTRIBUTES>>).
diff --git a/Makefile b/Makefile
index fb521da..5311b1d 100644
--- a/Makefile
+++ b/Makefile
@@ -3057,32 +3057,29 @@
 # With GNU tar, "--mode=u+rwX,og+rX,og-w" would be a good idea, for example.
 TAR_DIST_EXTRA_OPTS =
 GIT_TARNAME = git-$(GIT_VERSION)
+GIT_ARCHIVE_EXTRA_FILES = \
+	--prefix=$(GIT_TARNAME)/ \
+	--add-file=configure \
+	--add-file=$(GIT_TARNAME)/version \
+	--prefix=$(GIT_TARNAME)/git-gui/ \
+	--add-file=$(GIT_TARNAME)/git-gui/version
+ifdef DC_SHA1_SUBMODULE
+GIT_ARCHIVE_EXTRA_FILES += \
+	--prefix=$(GIT_TARNAME)/sha1collisiondetection/ \
+	--add-file=sha1collisiondetection/LICENSE.txt \
+	--prefix=$(GIT_TARNAME)/sha1collisiondetection/lib/ \
+	--add-file=sha1collisiondetection/lib/sha1.c \
+	--add-file=sha1collisiondetection/lib/sha1.h \
+	--add-file=sha1collisiondetection/lib/ubc_check.c \
+	--add-file=sha1collisiondetection/lib/ubc_check.h
+endif
 dist: git-archive$(X) configure
-	./git-archive --format=tar \
-		--prefix=$(GIT_TARNAME)/ HEAD^{tree} > $(GIT_TARNAME).tar
 	@mkdir -p $(GIT_TARNAME)
-	@cp configure $(GIT_TARNAME)
 	@echo $(GIT_VERSION) > $(GIT_TARNAME)/version
 	@$(MAKE) -C git-gui TARDIR=../$(GIT_TARNAME)/git-gui dist-version
-	$(TAR) rf $(GIT_TARNAME).tar $(TAR_DIST_EXTRA_OPTS) \
-		$(GIT_TARNAME)/configure \
-		$(GIT_TARNAME)/version \
-		$(GIT_TARNAME)/git-gui/version
-ifdef DC_SHA1_SUBMODULE
-	@mkdir -p $(GIT_TARNAME)/sha1collisiondetection/lib
-	@cp sha1collisiondetection/LICENSE.txt \
-		$(GIT_TARNAME)/sha1collisiondetection/
-	@cp sha1collisiondetection/LICENSE.txt \
-		$(GIT_TARNAME)/sha1collisiondetection/
-	@cp sha1collisiondetection/lib/sha1.[ch] \
-		$(GIT_TARNAME)/sha1collisiondetection/lib/
-	@cp sha1collisiondetection/lib/ubc_check.[ch] \
-		$(GIT_TARNAME)/sha1collisiondetection/lib/
-	$(TAR) rf $(GIT_TARNAME).tar $(TAR_DIST_EXTRA_OPTS) \
-		$(GIT_TARNAME)/sha1collisiondetection/LICENSE.txt \
-		$(GIT_TARNAME)/sha1collisiondetection/lib/sha1.[ch] \
-		$(GIT_TARNAME)/sha1collisiondetection/lib/ubc_check.[ch]
-endif
+	./git-archive --format=tar \
+		$(GIT_ARCHIVE_EXTRA_FILES) \
+		--prefix=$(GIT_TARNAME)/ HEAD^{tree} > $(GIT_TARNAME).tar
 	@$(RM) -r $(GIT_TARNAME)
 	gzip -f -9 $(GIT_TARNAME).tar
 
diff --git a/archive-tar.c b/archive-tar.c
index 5ceec36..f1a1447 100644
--- a/archive-tar.c
+++ b/archive-tar.c
@@ -242,13 +242,12 @@
 static int write_tar_entry(struct archiver_args *args,
 			   const struct object_id *oid,
 			   const char *path, size_t pathlen,
-			   unsigned int mode)
+			   unsigned int mode,
+			   void *buffer, unsigned long size)
 {
 	struct ustar_header header;
 	struct strbuf ext_header = STRBUF_INIT;
-	unsigned int old_mode = mode;
-	unsigned long size, size_in_header;
-	void *buffer;
+	unsigned long size_in_header;
 	int err = 0;
 
 	memset(&header, 0, sizeof(header));
@@ -282,20 +281,6 @@
 	} else
 		memcpy(header.name, path, pathlen);
 
-	if (S_ISREG(mode) && !args->convert &&
-	    oid_object_info(args->repo, oid, &size) == OBJ_BLOB &&
-	    size > big_file_threshold)
-		buffer = NULL;
-	else if (S_ISLNK(mode) || S_ISREG(mode)) {
-		enum object_type type;
-		buffer = object_file_to_archive(args, path, oid, old_mode, &type, &size);
-		if (!buffer)
-			return error(_("cannot read %s"), oid_to_hex(oid));
-	} else {
-		buffer = NULL;
-		size = 0;
-	}
-
 	if (S_ISLNK(mode)) {
 		if (size > sizeof(header.linkname)) {
 			xsnprintf(header.linkname, sizeof(header.linkname),
@@ -326,7 +311,6 @@
 		else
 			err = stream_blocked(args->repo, oid);
 	}
-	free(buffer);
 	return err;
 }
 
diff --git a/archive-zip.c b/archive-zip.c
index e9f4262..2961e01 100644
--- a/archive-zip.c
+++ b/archive-zip.c
@@ -285,7 +285,8 @@
 static int write_zip_entry(struct archiver_args *args,
 			   const struct object_id *oid,
 			   const char *path, size_t pathlen,
-			   unsigned int mode)
+			   unsigned int mode,
+			   void *buffer, unsigned long size)
 {
 	struct zip_local_header header;
 	uintmax_t offset = zip_offset;
@@ -299,10 +300,8 @@
 	enum zip_method method;
 	unsigned char *out;
 	void *deflated = NULL;
-	void *buffer;
 	struct git_istream *stream = NULL;
 	unsigned long flags = 0;
-	unsigned long size;
 	int is_binary = -1;
 	const char *path_without_prefix = path + args->baselen;
 	unsigned int creator_version = 0;
@@ -328,13 +327,8 @@
 		method = ZIP_METHOD_STORE;
 		attr2 = 16;
 		out = NULL;
-		size = 0;
 		compressed_size = 0;
-		buffer = NULL;
 	} else if (S_ISREG(mode) || S_ISLNK(mode)) {
-		enum object_type type = oid_object_info(args->repo, oid,
-							&size);
-
 		method = ZIP_METHOD_STORE;
 		attr2 = S_ISLNK(mode) ? ((mode | 0777) << 16) :
 			(mode & 0111) ? ((mode) << 16) : 0;
@@ -343,21 +337,16 @@
 		if (S_ISREG(mode) && args->compression_level != 0 && size > 0)
 			method = ZIP_METHOD_DEFLATE;
 
-		if (S_ISREG(mode) && type == OBJ_BLOB && !args->convert &&
-		    size > big_file_threshold) {
+		if (!buffer) {
+			enum object_type type;
 			stream = open_istream(args->repo, oid, &type, &size,
 					      NULL);
 			if (!stream)
 				return error(_("cannot stream blob %s"),
 					     oid_to_hex(oid));
 			flags |= ZIP_STREAM;
-			out = buffer = NULL;
+			out = NULL;
 		} else {
-			buffer = object_file_to_archive(args, path, oid, mode,
-							&type, &size);
-			if (!buffer)
-				return error(_("cannot read %s"),
-					     oid_to_hex(oid));
 			crc = crc32(crc, buffer, size);
 			is_binary = entry_is_binary(args->repo->index,
 						    path_without_prefix,
@@ -511,7 +500,6 @@
 	}
 
 	free(deflated);
-	free(buffer);
 
 	if (compressed_size > 0xffffffff || size > 0xffffffff ||
 	    offset > 0xffffffff) {
diff --git a/archive.c b/archive.c
index 0de6048..3c1541a 100644
--- a/archive.c
+++ b/archive.c
@@ -70,10 +70,12 @@
 	free(to_free);
 }
 
-void *object_file_to_archive(const struct archiver_args *args,
-			     const char *path, const struct object_id *oid,
-			     unsigned int mode, enum object_type *type,
-			     unsigned long *sizep)
+static void *object_file_to_archive(const struct archiver_args *args,
+				    const char *path,
+				    const struct object_id *oid,
+				    unsigned int mode,
+				    enum object_type *type,
+				    unsigned long *sizep)
 {
 	void *buffer;
 	const struct commit *commit = args->convert ? args->commit : NULL;
@@ -145,6 +147,9 @@
 	write_archive_entry_fn_t write_entry = c->write_entry;
 	int err;
 	const char *path_without_prefix;
+	unsigned long size;
+	void *buffer;
+	enum object_type type;
 
 	args->convert = 0;
 	strbuf_reset(&path);
@@ -167,7 +172,7 @@
 	if (S_ISDIR(mode) || S_ISGITLINK(mode)) {
 		if (args->verbose)
 			fprintf(stderr, "%.*s\n", (int)path.len, path.buf);
-		err = write_entry(args, oid, path.buf, path.len, mode);
+		err = write_entry(args, oid, path.buf, path.len, mode, NULL, 0);
 		if (err)
 			return err;
 		return (S_ISDIR(mode) ? READ_TREE_RECURSIVE : 0);
@@ -175,7 +180,19 @@
 
 	if (args->verbose)
 		fprintf(stderr, "%.*s\n", (int)path.len, path.buf);
-	return write_entry(args, oid, path.buf, path.len, mode);
+
+	/* Stream it? */
+	if (S_ISREG(mode) && !args->convert &&
+	    oid_object_info(args->repo, oid, &size) == OBJ_BLOB &&
+	    size > big_file_threshold)
+		return write_entry(args, oid, path.buf, path.len, mode, NULL, size);
+
+	buffer = object_file_to_archive(args, path.buf, oid, mode, &type, &size);
+	if (!buffer)
+		return error(_("cannot read %s"), oid_to_hex(oid));
+	err = write_entry(args, oid, path.buf, path.len, mode, buffer, size);
+	free(buffer);
+	return err;
 }
 
 static void queue_directory(const unsigned char *sha1,
@@ -249,6 +266,11 @@
 				   stage, context);
 }
 
+struct extra_file_info {
+	char *base;
+	struct stat stat;
+};
+
 int write_archive_entries(struct archiver_args *args,
 		write_archive_entry_fn_t write_entry)
 {
@@ -256,6 +278,10 @@
 	struct unpack_trees_options opts;
 	struct tree_desc t;
 	int err;
+	struct strbuf path_in_archive = STRBUF_INIT;
+	struct strbuf content = STRBUF_INIT;
+	struct object_id fake_oid = null_oid;
+	int i;
 
 	if (args->baselen > 0 && args->base[args->baselen - 1] == '/') {
 		size_t len = args->baselen;
@@ -265,7 +291,7 @@
 		if (args->verbose)
 			fprintf(stderr, "%.*s\n", (int)len, args->base);
 		err = write_entry(args, &args->tree->object.oid, args->base,
-				  len, 040777);
+				  len, 040777, NULL, 0);
 		if (err)
 			return err;
 	}
@@ -301,6 +327,33 @@
 		free(context.bottom);
 		context.bottom = next;
 	}
+
+	for (i = 0; i < args->extra_files.nr; i++) {
+		struct string_list_item *item = args->extra_files.items + i;
+		char *path = item->string;
+		struct extra_file_info *info = item->util;
+
+		put_be64(fake_oid.hash, i + 1);
+
+		strbuf_reset(&path_in_archive);
+		if (info->base)
+			strbuf_addstr(&path_in_archive, info->base);
+		strbuf_addstr(&path_in_archive, basename(path));
+
+		strbuf_reset(&content);
+		if (strbuf_read_file(&content, path, info->stat.st_size) < 0)
+			err = error_errno(_("could not read '%s'"), path);
+		else
+			err = write_entry(args, &fake_oid, path_in_archive.buf,
+					  path_in_archive.len,
+					  info->stat.st_mode,
+					  content.buf, content.len);
+		if (err)
+			break;
+	}
+	strbuf_release(&path_in_archive);
+	strbuf_release(&content);
+
 	return err;
 }
 
@@ -440,6 +493,42 @@
 	ar_args->time = archive_time;
 }
 
+static void extra_file_info_clear(void *util, const char *str)
+{
+	struct extra_file_info *info = util;
+	free(info->base);
+	free(info);
+}
+
+static int add_file_cb(const struct option *opt, const char *arg, int unset)
+{
+	struct archiver_args *args = opt->value;
+	const char **basep = (const char **)opt->defval;
+	const char *base = *basep;
+	char *path;
+	struct string_list_item *item;
+	struct extra_file_info *info;
+
+	if (unset) {
+		string_list_clear_func(&args->extra_files,
+				       extra_file_info_clear);
+		return 0;
+	}
+
+	if (!arg)
+		return -1;
+
+	path = prefix_filename(args->prefix, arg);
+	item = string_list_append_nodup(&args->extra_files, path);
+	item->util = info = xmalloc(sizeof(*info));
+	info->base = xstrdup_or_null(base);
+	if (stat(path, &info->stat))
+		die(_("File not found: %s"), path);
+	if (!S_ISREG(info->stat.st_mode))
+		die(_("Not a regular file: %s"), path);
+	return 0;
+}
+
 #define OPT__COMPR(s, v, h, p) \
 	OPT_SET_INT_F(s, NULL, v, h, p, PARSE_OPT_NONEG)
 #define OPT__COMPR_HIDDEN(s, v, p) \
@@ -464,6 +553,9 @@
 		OPT_STRING(0, "format", &format, N_("fmt"), N_("archive format")),
 		OPT_STRING(0, "prefix", &base, N_("prefix"),
 			N_("prepend prefix to each pathname in the archive")),
+		{ OPTION_CALLBACK, 0, "add-file", args, N_("file"),
+		  N_("add untracked file to archive"), 0, add_file_cb,
+		  (intptr_t)&base },
 		OPT_STRING('o', "output", &output, N_("file"),
 			N_("write the archive to this file")),
 		OPT_BOOL(0, "worktree-attributes", &worktree_attributes,
@@ -498,6 +590,8 @@
 		die(_("Option --exec can only be used together with --remote"));
 	if (output)
 		die(_("Unexpected option --output"));
+	if (is_remote && args->extra_files.nr)
+		die(_("Options --add-file and --remote cannot be used together"));
 
 	if (!base)
 		base = "";
@@ -544,11 +638,14 @@
 {
 	const struct archiver *ar = NULL;
 	struct archiver_args args;
+	int rc;
 
 	git_config_get_bool("uploadarchive.allowunreachable", &remote_allow_unreachable);
 	git_config(git_default_config, NULL);
 
 	args.repo = repo;
+	args.prefix = prefix;
+	string_list_init(&args.extra_files, 1);
 	argc = parse_archive_args(argc, argv, &ar, &args, name_hint, remote);
 	if (!startup_info->have_repository) {
 		/*
@@ -562,7 +659,11 @@
 	parse_treeish_arg(argv, &args, prefix, remote);
 	parse_pathspec_arg(argv + 1, &args);
 
-	return ar->write_archive(ar, &args);
+	rc = ar->write_archive(ar, &args);
+
+	string_list_clear_func(&args.extra_files, extra_file_info_clear);
+
+	return rc;
 }
 
 static int match_extension(const char *filename, const char *ext)
diff --git a/archive.h b/archive.h
index 3bd96bf..82b2260 100644
--- a/archive.h
+++ b/archive.h
@@ -9,6 +9,7 @@
 struct archiver_args {
 	struct repository *repo;
 	const char *refname;
+	const char *prefix;
 	const char *base;
 	size_t baselen;
 	struct tree *tree;
@@ -20,6 +21,7 @@
 	unsigned int worktree_attributes : 1;
 	unsigned int convert : 1;
 	int compression_level;
+	struct string_list extra_files;
 };
 
 /* main api */
@@ -49,12 +51,9 @@
 typedef int (*write_archive_entry_fn_t)(struct archiver_args *args,
 					const struct object_id *oid,
 					const char *path, size_t pathlen,
-					unsigned int mode);
+					unsigned int mode,
+					void *buffer, unsigned long size);
 
 int write_archive_entries(struct archiver_args *args, write_archive_entry_fn_t write_entry);
-void *object_file_to_archive(const struct archiver_args *args,
-			     const char *path, const struct object_id *oid,
-			     unsigned int mode, enum object_type *type,
-			     unsigned long *sizep);
 
 #endif	/* ARCHIVE_H */
diff --git a/t/t5000-tar-tree.sh b/t/t5000-tar-tree.sh
index 37655a2..3ebb0d3 100755
--- a/t/t5000-tar-tree.sh
+++ b/t/t5000-tar-tree.sh
@@ -94,6 +94,16 @@
 	'
 }
 
+check_added() {
+	dir=$1
+	path_in_fs=$2
+	path_in_archive=$3
+
+	test_expect_success " validate extra file $path_in_archive" '
+		diff -r $path_in_fs $dir/$path_in_archive
+	'
+}
+
 test_expect_success 'setup' '
 	test_oid_cache <<-EOF
 	obj sha1:19f9c8273ec45a8938e6999cb59b3ff66739902a
@@ -164,6 +174,25 @@
 
 check_tar with_olde-prefix olde-
 
+test_expect_success 'git archive --add-file' '
+	echo untracked >untracked &&
+	git archive --add-file=untracked HEAD >with_untracked.tar
+'
+
+check_tar with_untracked
+check_added with_untracked untracked untracked
+
+test_expect_success 'git archive --add-file twice' '
+	echo untracked >untracked &&
+	git archive --prefix=one/ --add-file=untracked \
+		--prefix=two/ --add-file=untracked \
+		--prefix= HEAD >with_untracked2.tar
+'
+
+check_tar with_untracked2
+check_added with_untracked2 untracked one/untracked
+check_added with_untracked2 untracked two/untracked
+
 test_expect_success 'git archive on large files' '
     test_config core.bigfilethreshold 1 &&
     git archive HEAD >b3.tar &&
diff --git a/t/t5003-archive-zip.sh b/t/t5003-archive-zip.sh
index 3b76d2e..1e6d18b 100755
--- a/t/t5003-archive-zip.sh
+++ b/t/t5003-archive-zip.sh
@@ -72,6 +72,16 @@
 	"
 }
 
+check_added() {
+	dir=$1
+	path_in_fs=$2
+	path_in_archive=$3
+
+	test_expect_success UNZIP " validate extra file $path_in_archive" '
+		diff -r $path_in_fs $dir/$path_in_archive
+	'
+}
+
 test_expect_success \
     'populate workdir' \
     'mkdir a &&
@@ -188,4 +198,22 @@
 
 check_zip large-compressed
 
+test_expect_success 'git archive --format=zip --add-file' '
+	echo untracked >untracked &&
+	git archive --format=zip --add-file=untracked HEAD >with_untracked.zip
+'
+
+check_zip with_untracked
+check_added with_untracked untracked untracked
+
+test_expect_success 'git archive --format=zip --add-file twice' '
+	echo untracked >untracked &&
+	git archive --format=zip --prefix=one/ --add-file=untracked \
+		--prefix=two/ --add-file=untracked \
+		--prefix= HEAD >with_untracked2.zip
+'
+check_zip with_untracked2
+check_added with_untracked2 untracked one/untracked
+check_added with_untracked2 untracked two/untracked
+
 test_done