Merge branch 'jk/xdiff-memory-limits' into maint-2.3
diff --git a/Documentation/git.txt b/Documentation/git.txt
index a62ed6f..41a09ca 100644
--- a/Documentation/git.txt
+++ b/Documentation/git.txt
@@ -1045,6 +1045,33 @@
 	an operation has touched every ref (e.g., because you are
 	cloning a repository to make a backup).
 
+`GIT_ALLOW_PROTOCOL`::
+	If set, provide a colon-separated list of protocols which are
+	allowed to be used with fetch/push/clone. This is useful to
+	restrict recursive submodule initialization from an untrusted
+	repository. Any protocol not mentioned will be disallowed (i.e.,
+	this is a whitelist, not a blacklist). If the variable is not
+	set at all, all protocols are enabled.  The protocol names
+	currently used by git are:
+
+	  - `file`: any local file-based path (including `file://` URLs,
+	    or local paths)
+
+	  - `git`: the anonymous git protocol over a direct TCP
+	    connection (or proxy, if configured)
+
+	  - `ssh`: git over ssh (including `host:path` syntax,
+	    `git+ssh://`, etc).
+
+	  - `rsync`: git over rsync
+
+	  - `http`: git over http, both "smart http" and "dumb http".
+	    Note that this does _not_ include `https`; if you want both,
+	    you should specify both as `http:https`.
+
+	  - any external helpers are named by their protocol (e.g., use
+	    `hg` to allow the `git-remote-hg` helper)
+
 
 Discussion[[Discussion]]
 ------------------------
diff --git a/connect.c b/connect.c
index 14c924b..bd4b50e 100644
--- a/connect.c
+++ b/connect.c
@@ -9,6 +9,7 @@
 #include "url.h"
 #include "string-list.h"
 #include "sha1-array.h"
+#include "transport.h"
 
 static char *server_capabilities;
 static const char *parse_feature_value(const char *, const char *, int *);
@@ -694,6 +695,8 @@
 		else
 			target_host = xstrdup(hostandport);
 
+		transport_check_allowed("git");
+
 		/* These underlying connection commands die() if they
 		 * cannot connect.
 		 */
@@ -727,6 +730,7 @@
 			int putty;
 			char *ssh_host = hostandport;
 			const char *port = NULL;
+			transport_check_allowed("ssh");
 			get_host_and_port(&ssh_host, &port);
 
 			if (!port)
@@ -768,6 +772,7 @@
 			/* remove repo-local variables from the environment */
 			conn->env = local_repo_env;
 			conn->use_shell = 1;
+			transport_check_allowed("file");
 		}
 		argv_array_push(&conn->args, cmd.buf);
 
diff --git a/git-submodule.sh b/git-submodule.sh
index 36797c3..78c2740 100755
--- a/git-submodule.sh
+++ b/git-submodule.sh
@@ -22,6 +22,15 @@
 wt_prefix=$(git rev-parse --show-prefix)
 cd_to_toplevel
 
+# Restrict ourselves to a vanilla subset of protocols; the URLs
+# we get are under control of a remote repository, and we do not
+# want them kicking off arbitrary git-remote-* programs.
+#
+# If the user has already specified a set of allowed protocols,
+# we assume they know what they're doing and use that instead.
+: ${GIT_ALLOW_PROTOCOL=file:git:http:https:ssh}
+export GIT_ALLOW_PROTOCOL
+
 command=
 branch=
 force=
diff --git a/http.c b/http.c
index 6798620..00e3fc8 100644
--- a/http.c
+++ b/http.c
@@ -8,6 +8,7 @@
 #include "credential.h"
 #include "version.h"
 #include "pkt-line.h"
+#include "transport.h"
 
 int active_requests;
 int http_is_verbose;
@@ -303,6 +304,7 @@
 static CURL *get_curl_handle(void)
 {
 	CURL *result = curl_easy_init();
+	long allowed_protocols = 0;
 
 	if (!result)
 		die("curl_easy_init failed");
@@ -350,11 +352,27 @@
 	}
 
 	curl_easy_setopt(result, CURLOPT_FOLLOWLOCATION, 1);
+	curl_easy_setopt(result, CURLOPT_MAXREDIRS, 20);
 #if LIBCURL_VERSION_NUM >= 0x071301
 	curl_easy_setopt(result, CURLOPT_POSTREDIR, CURL_REDIR_POST_ALL);
 #elif LIBCURL_VERSION_NUM >= 0x071101
 	curl_easy_setopt(result, CURLOPT_POST301, 1);
 #endif
+#if LIBCURL_VERSION_NUM >= 0x071304
+	if (is_transport_allowed("http"))
+		allowed_protocols |= CURLPROTO_HTTP;
+	if (is_transport_allowed("https"))
+		allowed_protocols |= CURLPROTO_HTTPS;
+	if (is_transport_allowed("ftp"))
+		allowed_protocols |= CURLPROTO_FTP;
+	if (is_transport_allowed("ftps"))
+		allowed_protocols |= CURLPROTO_FTPS;
+	curl_easy_setopt(result, CURLOPT_REDIR_PROTOCOLS, allowed_protocols);
+#else
+	if (transport_restrict_protocols())
+		warning("protocol restrictions not applied to curl redirects because\n"
+			"your curl version is too old (>= 7.19.4)");
+#endif
 
 	if (getenv("GIT_CURL_VERBOSE"))
 		curl_easy_setopt(result, CURLOPT_VERBOSE, 1);
diff --git a/t/lib-httpd/apache.conf b/t/lib-httpd/apache.conf
index 0b81a00..7d15e6d 100644
--- a/t/lib-httpd/apache.conf
+++ b/t/lib-httpd/apache.conf
@@ -119,6 +119,10 @@
 RewriteRule ^/smart-redir-temp/(.*)$ /smart/$1 [R=302]
 RewriteRule ^/smart-redir-auth/(.*)$ /auth/smart/$1 [R=301]
 RewriteRule ^/smart-redir-limited/(.*)/info/refs$ /smart/$1/info/refs [R=301]
+RewriteRule ^/ftp-redir/(.*)$ ftp://localhost:1000/$1 [R=302]
+
+RewriteRule ^/loop-redir/x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-(.*) /$1 [R=302]
+RewriteRule ^/loop-redir/(.*)$ /loop-redir/x-$1 [R=302]
 
 <IfDefine SSL>
 LoadModule ssl_module modules/mod_ssl.so
diff --git a/t/lib-proto-disable.sh b/t/lib-proto-disable.sh
new file mode 100644
index 0000000..b0917d9
--- /dev/null
+++ b/t/lib-proto-disable.sh
@@ -0,0 +1,96 @@
+# Test routines for checking protocol disabling.
+
+# test cloning a particular protocol
+#   $1 - description of the protocol
+#   $2 - machine-readable name of the protocol
+#   $3 - the URL to try cloning
+test_proto () {
+	desc=$1
+	proto=$2
+	url=$3
+
+	test_expect_success "clone $1 (enabled)" '
+		rm -rf tmp.git &&
+		(
+			GIT_ALLOW_PROTOCOL=$proto &&
+			export GIT_ALLOW_PROTOCOL &&
+			git clone --bare "$url" tmp.git
+		)
+	'
+
+	test_expect_success "fetch $1 (enabled)" '
+		(
+			cd tmp.git &&
+			GIT_ALLOW_PROTOCOL=$proto &&
+			export GIT_ALLOW_PROTOCOL &&
+			git fetch
+		)
+	'
+
+	test_expect_success "push $1 (enabled)" '
+		(
+			cd tmp.git &&
+			GIT_ALLOW_PROTOCOL=$proto &&
+			export GIT_ALLOW_PROTOCOL &&
+			git push origin HEAD:pushed
+		)
+	'
+
+	test_expect_success "push $1 (disabled)" '
+		(
+			cd tmp.git &&
+			GIT_ALLOW_PROTOCOL=none &&
+			export GIT_ALLOW_PROTOCOL &&
+			test_must_fail git push origin HEAD:pushed
+		)
+	'
+
+	test_expect_success "fetch $1 (disabled)" '
+		(
+			cd tmp.git &&
+			GIT_ALLOW_PROTOCOL=none &&
+			export GIT_ALLOW_PROTOCOL &&
+			test_must_fail git fetch
+		)
+	'
+
+	test_expect_success "clone $1 (disabled)" '
+		rm -rf tmp.git &&
+		(
+			GIT_ALLOW_PROTOCOL=none &&
+			export GIT_ALLOW_PROTOCOL &&
+			test_must_fail git clone --bare "$url" tmp.git
+		)
+	'
+}
+
+# set up an ssh wrapper that will access $host/$repo in the
+# trash directory, and enable it for subsequent tests.
+setup_ssh_wrapper () {
+	test_expect_success 'setup ssh wrapper' '
+		write_script ssh-wrapper <<-\EOF &&
+		echo >&2 "ssh: $*"
+		host=$1; shift
+		cd "$TRASH_DIRECTORY/$host" &&
+		eval "$*"
+		EOF
+		GIT_SSH="$PWD/ssh-wrapper" &&
+		export GIT_SSH &&
+		export TRASH_DIRECTORY
+	'
+}
+
+# set up a wrapper that can be used with remote-ext to
+# access repositories in the "remote" directory of trash-dir,
+# like "ext::fake-remote %S repo.git"
+setup_ext_wrapper () {
+	test_expect_success 'setup ext wrapper' '
+		write_script fake-remote <<-\EOF &&
+		echo >&2 "fake-remote: $*"
+		cd "$TRASH_DIRECTORY/remote" &&
+		eval "$*"
+		EOF
+		PATH=$TRASH_DIRECTORY:$PATH &&
+		export TRASH_DIRECTORY
+	'
+}
diff --git a/t/t5810-proto-disable-local.sh b/t/t5810-proto-disable-local.sh
new file mode 100755
index 0000000..563592d
--- /dev/null
+++ b/t/t5810-proto-disable-local.sh
@@ -0,0 +1,14 @@
+#!/bin/sh
+
+test_description='test disabling of local paths in clone/fetch'
+. ./test-lib.sh
+. "$TEST_DIRECTORY/lib-proto-disable.sh"
+
+test_expect_success 'setup repository to clone' '
+	test_commit one
+'
+
+test_proto "file://" file "file://$PWD"
+test_proto "path" file .
+
+test_done
diff --git a/t/t5811-proto-disable-git.sh b/t/t5811-proto-disable-git.sh
new file mode 100755
index 0000000..8ac6b2a
--- /dev/null
+++ b/t/t5811-proto-disable-git.sh
@@ -0,0 +1,20 @@
+#!/bin/sh
+
+test_description='test disabling of git-over-tcp in clone/fetch'
+. ./test-lib.sh
+. "$TEST_DIRECTORY/lib-proto-disable.sh"
+. "$TEST_DIRECTORY/lib-git-daemon.sh"
+start_git_daemon
+
+test_expect_success 'create git-accessible repo' '
+	bare="$GIT_DAEMON_DOCUMENT_ROOT_PATH/repo.git" &&
+	test_commit one &&
+	git --bare init "$bare" &&
+	git push "$bare" HEAD &&
+	>"$bare/git-daemon-export-ok" &&
+	git -C "$bare" config daemon.receivepack true
+'
+
+test_proto "git://" git "$GIT_DAEMON_URL/repo.git"
+
+test_done
diff --git a/t/t5812-proto-disable-http.sh b/t/t5812-proto-disable-http.sh
new file mode 100755
index 0000000..0d105d5
--- /dev/null
+++ b/t/t5812-proto-disable-http.sh
@@ -0,0 +1,33 @@
+#!/bin/sh
+
+test_description='test disabling of git-over-http in clone/fetch'
+. ./test-lib.sh
+. "$TEST_DIRECTORY/lib-proto-disable.sh"
+. "$TEST_DIRECTORY/lib-httpd.sh"
+start_httpd
+
+test_expect_success 'create git-accessible repo' '
+	bare="$HTTPD_DOCUMENT_ROOT_PATH/repo.git" &&
+	test_commit one &&
+	git --bare init "$bare" &&
+	git push "$bare" HEAD &&
+	git -C "$bare" config http.receivepack true
+'
+
+test_proto "smart http" http "$HTTPD_URL/smart/repo.git"
+
+test_expect_success 'curl redirects respect whitelist' '
+	test_must_fail env GIT_ALLOW_PROTOCOL=http:https \
+		git clone "$HTTPD_URL/ftp-redir/repo.git" 2>stderr &&
+	{
+		test_i18ngrep "ftp.*disabled" stderr ||
+		test_i18ngrep "your curl version is too old"
+	}
+'
+
+test_expect_success 'curl limits redirects' '
+	test_must_fail git clone "$HTTPD_URL/loop-redir/smart/repo.git"
+'
+
+stop_httpd
+test_done
diff --git a/t/t5813-proto-disable-ssh.sh b/t/t5813-proto-disable-ssh.sh
new file mode 100755
index 0000000..ad877d7
--- /dev/null
+++ b/t/t5813-proto-disable-ssh.sh
@@ -0,0 +1,20 @@
+#!/bin/sh
+
+test_description='test disabling of git-over-ssh in clone/fetch'
+. ./test-lib.sh
+. "$TEST_DIRECTORY/lib-proto-disable.sh"
+
+setup_ssh_wrapper
+
+test_expect_success 'setup repository to clone' '
+	test_commit one &&
+	mkdir remote &&
+	git init --bare remote/repo.git &&
+	git push remote/repo.git HEAD
+'
+
+test_proto "host:path" ssh "remote:repo.git"
+test_proto "ssh://" ssh "ssh://remote/$PWD/remote/repo.git"
+test_proto "git+ssh://" ssh "git+ssh://remote/$PWD/remote/repo.git"
+
+test_done
diff --git a/t/t5814-proto-disable-ext.sh b/t/t5814-proto-disable-ext.sh
new file mode 100755
index 0000000..9d6f7df
--- /dev/null
+++ b/t/t5814-proto-disable-ext.sh
@@ -0,0 +1,18 @@
+#!/bin/sh
+
+test_description='test disabling of remote-helper paths in clone/fetch'
+. ./test-lib.sh
+. "$TEST_DIRECTORY/lib-proto-disable.sh"
+
+setup_ext_wrapper
+
+test_expect_success 'setup repository to clone' '
+	test_commit one &&
+	mkdir remote &&
+	git init --bare remote/repo.git &&
+	git push remote/repo.git HEAD
+'
+
+test_proto "remote-helper" ext "ext::fake-remote %S repo.git"
+
+test_done
diff --git a/t/t5815-submodule-protos.sh b/t/t5815-submodule-protos.sh
new file mode 100755
index 0000000..06f55a1
--- /dev/null
+++ b/t/t5815-submodule-protos.sh
@@ -0,0 +1,43 @@
+#!/bin/sh
+
+test_description='test protocol whitelisting with submodules'
+. ./test-lib.sh
+. "$TEST_DIRECTORY"/lib-proto-disable.sh
+
+setup_ext_wrapper
+setup_ssh_wrapper
+
+test_expect_success 'setup repository with submodules' '
+	mkdir remote &&
+	git init remote/repo.git &&
+	(cd remote/repo.git && test_commit one) &&
+	# submodule-add should probably trust what we feed it on the cmdline,
+	# but its implementation is overly conservative.
+	GIT_ALLOW_PROTOCOL=ssh git submodule add remote:repo.git ssh-module &&
+	GIT_ALLOW_PROTOCOL=ext git submodule add "ext::fake-remote %S repo.git" ext-module &&
+	git commit -m "add submodules"
+'
+
+test_expect_success 'clone with recurse-submodules fails' '
+	test_must_fail git clone --recurse-submodules . dst
+'
+
+test_expect_success 'setup individual updates' '
+	rm -rf dst &&
+	git clone . dst &&
+	git -C dst submodule init
+'
+
+test_expect_success 'update of ssh allowed' '
+	git -C dst submodule update ssh-module
+'
+
+test_expect_success 'update of ext not allowed' '
+	test_must_fail git -C dst submodule update ext-module
+'
+
+test_expect_success 'user can override whitelist' '
+	GIT_ALLOW_PROTOCOL=ext git -C dst submodule update ext-module
+'
+
+test_done
diff --git a/transport-helper.c b/transport-helper.c
index 7dc4a44..0b5362c 100644
--- a/transport-helper.c
+++ b/transport-helper.c
@@ -1038,6 +1038,8 @@
 	struct helper_data *data = xcalloc(1, sizeof(*data));
 	data->name = name;
 
+	transport_check_allowed(name);
+
 	if (getenv("GIT_TRANSPORT_HELPER_DEBUG"))
 		debug = 1;
 
diff --git a/transport.c b/transport.c
index 88bde1d..647d2c2 100644
--- a/transport.c
+++ b/transport.c
@@ -909,6 +909,42 @@
 	return strchr(url, ':') - url;
 }
 
+static const struct string_list *protocol_whitelist(void)
+{
+	static int enabled = -1;
+	static struct string_list allowed = STRING_LIST_INIT_DUP;
+
+	if (enabled < 0) {
+		const char *v = getenv("GIT_ALLOW_PROTOCOL");
+		if (v) {
+			string_list_split(&allowed, v, ':', -1);
+			string_list_sort(&allowed);
+			enabled = 1;
+		} else {
+			enabled = 0;
+		}
+	}
+
+	return enabled ? &allowed : NULL;
+}
+
+int is_transport_allowed(const char *type)
+{
+	const struct string_list *allowed = protocol_whitelist();
+	return !allowed || string_list_has_string(allowed, type);
+}
+
+void transport_check_allowed(const char *type)
+{
+	if (!is_transport_allowed(type))
+		die("transport '%s' not allowed", type);
+}
+
+int transport_restrict_protocols(void)
+{
+	return !!protocol_whitelist();
+}
+
 struct transport *transport_get(struct remote *remote, const char *url)
 {
 	const char *helper;
@@ -940,12 +976,14 @@
 	if (helper) {
 		transport_helper_init(ret, helper);
 	} else if (starts_with(url, "rsync:")) {
+		transport_check_allowed("rsync");
 		ret->get_refs_list = get_refs_via_rsync;
 		ret->fetch = fetch_objs_via_rsync;
 		ret->push = rsync_transport_push;
 		ret->smart_options = NULL;
 	} else if (url_is_local_not_ssh(url) && is_file(url) && is_bundle(url, 1)) {
 		struct bundle_transport_data *data = xcalloc(1, sizeof(*data));
+		transport_check_allowed("file");
 		ret->data = data;
 		ret->get_refs_list = get_refs_from_bundle;
 		ret->fetch = fetch_refs_from_bundle;
@@ -957,7 +995,10 @@
 		|| starts_with(url, "ssh://")
 		|| starts_with(url, "git+ssh://")
 		|| starts_with(url, "ssh+git://")) {
-		/* These are builtin smart transports. */
+		/*
+		 * These are builtin smart transports; "allowed" transports
+		 * will be checked individually in git_connect.
+		 */
 		struct git_transport_data *data = xcalloc(1, sizeof(*data));
 		ret->data = data;
 		ret->set_option = NULL;
diff --git a/transport.h b/transport.h
index 3e0091e..ed84da2 100644
--- a/transport.h
+++ b/transport.h
@@ -132,6 +132,24 @@
 /* Returns a transport suitable for the url */
 struct transport *transport_get(struct remote *, const char *);
 
+/*
+ * Check whether a transport is allowed by the environment. Type should
+ * generally be the URL scheme, as described in Documentation/git.txt
+ */
+int is_transport_allowed(const char *type);
+
+/*
+ * Check whether a transport is allowed by the environment,
+ * and die otherwise.
+ */
+void transport_check_allowed(const char *type);
+
+/*
+ * Returns true if the user has attempted to turn on protocol
+ * restrictions at all.
+ */
+int transport_restrict_protocols(void);
+
 /* Transport options which apply to git:// and scp-style URLs */
 
 /* The program to use on the remote side to send a pack */