Merge branch 'jk/submodule-fsck-loose' into maint

* jk/submodule-fsck-loose:
  fsck: complain when .gitmodules is a symlink
  index-pack: check .gitmodules files with --strict
  unpack-objects: call fsck_finish() after fscking objects
  fsck: call fsck_finish() after fscking objects
  fsck: check .gitmodules content
  fsck: handle promisor objects in .gitmodules check
  fsck: detect gitmodules files
  fsck: actually fsck blob data
  fsck: simplify ".git" check
  index-pack: make fsck error message more specific
diff --git a/builtin/fsck.c b/builtin/fsck.c
index ef78c6c..028aba5 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -337,7 +337,7 @@
 	}
 }
 
-static int fsck_obj(struct object *obj)
+static int fsck_obj(struct object *obj, void *buffer, unsigned long size)
 {
 	int err;
 
@@ -351,7 +351,7 @@
 
 	if (fsck_walk(obj, NULL, &fsck_obj_options))
 		objerror(obj, "broken links");
-	err = fsck_object(obj, NULL, 0, &fsck_obj_options);
+	err = fsck_object(obj, buffer, size, &fsck_obj_options);
 	if (err)
 		goto out;
 
@@ -396,7 +396,7 @@
 	}
 	obj->flags &= ~(REACHABLE | SEEN);
 	obj->flags |= HAS_OBJ;
-	return fsck_obj(obj);
+	return fsck_obj(obj, buffer, size);
 }
 
 static int default_refs;
@@ -504,44 +504,42 @@
 	}
 }
 
-static struct object *parse_loose_object(const struct object_id *oid,
-					 const char *path)
-{
-	struct object *obj;
-	void *contents;
-	enum object_type type;
-	unsigned long size;
-	int eaten;
-
-	if (read_loose_object(path, oid->hash, &type, &size, &contents) < 0)
-		return NULL;
-
-	if (!contents && type != OBJ_BLOB)
-		die("BUG: read_loose_object streamed a non-blob");
-
-	obj = parse_object_buffer(oid, type, size, contents, &eaten);
-
-	if (!eaten)
-		free(contents);
-	return obj;
-}
-
 static int fsck_loose(const struct object_id *oid, const char *path, void *data)
 {
-	struct object *obj = parse_loose_object(oid, path);
+	struct object *obj;
+	enum object_type type;
+	unsigned long size;
+	void *contents;
+	int eaten;
 
-	if (!obj) {
+	if (read_loose_object(path, oid->hash, &type, &size, &contents) < 0) {
 		errors_found |= ERROR_OBJECT;
 		error("%s: object corrupt or missing: %s",
 		      oid_to_hex(oid), path);
 		return 0; /* keep checking other objects */
 	}
 
+	if (!contents && type != OBJ_BLOB)
+		BUG("read_loose_object streamed a non-blob");
+
+	obj = parse_object_buffer(oid, type, size, contents, &eaten);
+	if (!obj) {
+		errors_found |= ERROR_OBJECT;
+		error("%s: object could not be parsed: %s",
+		      oid_to_hex(oid), path);
+		if (!eaten)
+			free(contents);
+		return 0; /* keep checking other objects */
+	}
+
 	obj->flags &= ~(REACHABLE | SEEN);
 	obj->flags |= HAS_OBJ;
-	if (fsck_obj(obj))
+	if (fsck_obj(obj, contents, size))
 		errors_found |= ERROR_OBJECT;
-	return 0;
+
+	if (!eaten)
+		free(contents);
+	return 0; /* keep checking other objects, even if we saw an error */
 }
 
 static int fsck_cruft(const char *basename, const char *path, void *data)
@@ -750,6 +748,9 @@
 			}
 			stop_progress(&progress);
 		}
+
+		if (fsck_finish(&fsck_obj_options))
+			errors_found |= ERROR_OBJECT;
 	}
 
 	for (i = 0; i < argc; i++) {
diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index bda84a9..7b2f7c0 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -836,6 +836,9 @@
 				blob->object.flags |= FLAG_CHECKED;
 			else
 				die(_("invalid blob object %s"), oid_to_hex(oid));
+			if (do_fsck_object &&
+			    fsck_object(&blob->object, (void *)data, size, &fsck_options))
+				die(_("fsck error in packed object"));
 		} else {
 			struct object *obj;
 			int eaten;
@@ -853,7 +856,7 @@
 				die(_("invalid %s"), type_name(type));
 			if (do_fsck_object &&
 			    fsck_object(obj, buf, size, &fsck_options))
-				die(_("Error in object"));
+				die(_("fsck error in packed object"));
 			if (strict && fsck_walk(obj, NULL, &fsck_options))
 				die(_("Not all child objects of %s are reachable"), oid_to_hex(&obj->oid));
 
@@ -1477,6 +1480,9 @@
 	} else
 		chmod(final_index_name, 0444);
 
+	if (do_fsck_object)
+		add_packed_git(final_index_name, strlen(final_index_name), 0);
+
 	if (!from_stdin) {
 		printf("%s\n", sha1_to_hex(hash));
 	} else {
@@ -1818,6 +1824,10 @@
 		      pack_hash);
 	else
 		close(input_fd);
+
+	if (do_fsck_object && fsck_finish(&fsck_options))
+		die(_("fsck error in pack objects"));
+
 	free(objects);
 	strbuf_release(&index_name_buf);
 	if (pack_name == NULL)
diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c
index 6620fee..c8f1406 100644
--- a/builtin/unpack-objects.c
+++ b/builtin/unpack-objects.c
@@ -210,7 +210,7 @@
 	if (!obj_buf)
 		die("Whoops! Cannot find object '%s'", oid_to_hex(&obj->oid));
 	if (fsck_object(obj, obj_buf->buffer, obj_buf->size, &fsck_options))
-		die("Error in object");
+		die("fsck error in packed object");
 	fsck_options.walk = check_object;
 	if (fsck_walk(obj, NULL, &fsck_options))
 		die("Error on reachable objects of %s", oid_to_hex(&obj->oid));
@@ -572,8 +572,11 @@
 	unpack_all();
 	the_hash_algo->update_fn(&ctx, buffer, offset);
 	the_hash_algo->final_fn(oid.hash, &ctx);
-	if (strict)
+	if (strict) {
 		write_rest();
+		if (fsck_finish(&fsck_options))
+			die(_("fsck error in pack objects"));
+	}
 	if (hashcmp(fill(the_hash_algo->rawsz), oid.hash))
 		die("final sha1 did not match");
 	use(the_hash_algo->rawsz);
diff --git a/fsck.c b/fsck.c
index 5c8c12d..9339f31 100644
--- a/fsck.c
+++ b/fsck.c
@@ -10,6 +10,13 @@
 #include "utf8.h"
 #include "sha1-array.h"
 #include "decorate.h"
+#include "oidset.h"
+#include "packfile.h"
+#include "submodule-config.h"
+#include "config.h"
+
+static struct oidset gitmodules_found = OIDSET_INIT;
+static struct oidset gitmodules_done = OIDSET_INIT;
 
 #define FSCK_FATAL -1
 #define FSCK_INFO -2
@@ -44,6 +51,7 @@
 	FUNC(MISSING_TAG_ENTRY, ERROR) \
 	FUNC(MISSING_TAG_OBJECT, ERROR) \
 	FUNC(MISSING_TREE, ERROR) \
+	FUNC(MISSING_TREE_OBJECT, ERROR) \
 	FUNC(MISSING_TYPE, ERROR) \
 	FUNC(MISSING_TYPE_ENTRY, ERROR) \
 	FUNC(MULTIPLE_AUTHORS, ERROR) \
@@ -51,6 +59,11 @@
 	FUNC(TREE_NOT_SORTED, ERROR) \
 	FUNC(UNKNOWN_TYPE, ERROR) \
 	FUNC(ZERO_PADDED_DATE, ERROR) \
+	FUNC(GITMODULES_MISSING, ERROR) \
+	FUNC(GITMODULES_BLOB, ERROR) \
+	FUNC(GITMODULES_PARSE, ERROR) \
+	FUNC(GITMODULES_NAME, ERROR) \
+	FUNC(GITMODULES_SYMLINK, ERROR) \
 	/* warnings */ \
 	FUNC(BAD_FILEMODE, WARN) \
 	FUNC(EMPTY_NAME, WARN) \
@@ -561,10 +574,18 @@
 		has_empty_name |= !*name;
 		has_dot |= !strcmp(name, ".");
 		has_dotdot |= !strcmp(name, "..");
-		has_dotgit |= (!strcmp(name, ".git") ||
-			       is_hfs_dotgit(name) ||
-			       is_ntfs_dotgit(name));
+		has_dotgit |= is_hfs_dotgit(name) || is_ntfs_dotgit(name);
 		has_zero_pad |= *(char *)desc.buffer == '0';
+
+		if (is_hfs_dotgitmodules(name) || is_ntfs_dotgitmodules(name)) {
+			if (!S_ISLNK(mode))
+				oidset_insert(&gitmodules_found, oid);
+			else
+				retval += report(options, &item->object,
+						 FSCK_MSG_GITMODULES_SYMLINK,
+						 ".gitmodules is a symbolic link");
+		}
+
 		if (update_tree_entry_gently(&desc)) {
 			retval += report(options, &item->object, FSCK_MSG_BAD_TREE, "cannot be parsed as a tree");
 			break;
@@ -901,6 +922,66 @@
 	return fsck_tag_buffer(tag, data, size, options);
 }
 
+struct fsck_gitmodules_data {
+	struct object *obj;
+	struct fsck_options *options;
+	int ret;
+};
+
+static int fsck_gitmodules_fn(const char *var, const char *value, void *vdata)
+{
+	struct fsck_gitmodules_data *data = vdata;
+	const char *subsection, *key;
+	int subsection_len;
+	char *name;
+
+	if (parse_config_key(var, "submodule", &subsection, &subsection_len, &key) < 0 ||
+	    !subsection)
+		return 0;
+
+	name = xmemdupz(subsection, subsection_len);
+	if (check_submodule_name(name) < 0)
+		data->ret |= report(data->options, data->obj,
+				    FSCK_MSG_GITMODULES_NAME,
+				    "disallowed submodule name: %s",
+				    name);
+	free(name);
+
+	return 0;
+}
+
+static int fsck_blob(struct blob *blob, const char *buf,
+		     unsigned long size, struct fsck_options *options)
+{
+	struct fsck_gitmodules_data data;
+
+	if (!oidset_contains(&gitmodules_found, &blob->object.oid))
+		return 0;
+	oidset_insert(&gitmodules_done, &blob->object.oid);
+
+	if (!buf) {
+		/*
+		 * A missing buffer here is a sign that the caller found the
+		 * blob too gigantic to load into memory. Let's just consider
+		 * that an error.
+		 */
+		return report(options, &blob->object,
+			      FSCK_MSG_GITMODULES_PARSE,
+			      ".gitmodules too large to parse");
+	}
+
+	data.obj = &blob->object;
+	data.options = options;
+	data.ret = 0;
+	if (git_config_from_mem(fsck_gitmodules_fn, CONFIG_ORIGIN_BLOB,
+				".gitmodules", buf, size, &data))
+		data.ret |= report(options, &blob->object,
+				   FSCK_MSG_GITMODULES_PARSE,
+				   "could not parse gitmodules blob");
+
+	return data.ret;
+}
+
 int fsck_object(struct object *obj, void *data, unsigned long size,
 	struct fsck_options *options)
 {
@@ -908,7 +989,7 @@
 		return report(options, obj, FSCK_MSG_BAD_OBJECT_SHA1, "no valid object to fsck");
 
 	if (obj->type == OBJ_BLOB)
-		return 0;
+		return fsck_blob((struct blob *)obj, data, size, options);
 	if (obj->type == OBJ_TREE)
 		return fsck_tree((struct tree *) obj, options);
 	if (obj->type == OBJ_COMMIT)
@@ -932,3 +1013,52 @@
 	error("object %s: %s", describe_object(o, obj), message);
 	return 1;
 }
+
+int fsck_finish(struct fsck_options *options)
+{
+	int ret = 0;
+	struct oidset_iter iter;
+	const struct object_id *oid;
+
+	oidset_iter_init(&gitmodules_found, &iter);
+	while ((oid = oidset_iter_next(&iter))) {
+		struct blob *blob;
+		enum object_type type;
+		unsigned long size;
+		char *buf;
+
+		if (oidset_contains(&gitmodules_done, oid))
+			continue;
+
+		blob = lookup_blob(oid);
+		if (!blob) {
+			ret |= report(options, &blob->object,
+				      FSCK_MSG_GITMODULES_BLOB,
+				      "non-blob found at .gitmodules");
+			continue;
+		}
+
+		buf = read_sha1_file(oid->hash, &type, &size);
+		if (!buf) {
+			if (is_promisor_object(&blob->object.oid))
+				continue;
+			ret |= report(options, &blob->object,
+				      FSCK_MSG_GITMODULES_MISSING,
+				      "unable to read .gitmodules blob");
+			continue;
+		}
+
+		if (type == OBJ_BLOB)
+			ret |= fsck_blob(blob, buf, size, options);
+		else
+			ret |= report(options, &blob->object,
+				      FSCK_MSG_GITMODULES_BLOB,
+				      "non-blob found at .gitmodules");
+		free(buf);
+	}
+
+
+	oidset_clear(&gitmodules_found);
+	oidset_clear(&gitmodules_done);
+	return ret;
+}
diff --git a/fsck.h b/fsck.h
index 4525510..c3cf5e0 100644
--- a/fsck.h
+++ b/fsck.h
@@ -53,4 +53,11 @@
 int fsck_object(struct object *obj, void *data, unsigned long size,
 	struct fsck_options *options);
 
+/*
+ * Some fsck checks are context-dependent, and may end up queued; run this
+ * after completing all fsck_object() calls in order to resolve any remaining
+ * checks.
+ */
+int fsck_finish(struct fsck_options *options);
+
 #endif
diff --git a/sha1_file.c b/sha1_file.c
index cc0f43e..53f0a36 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -2209,7 +2209,7 @@
 		goto out;
 	}
 
-	if (*type == OBJ_BLOB) {
+	if (*type == OBJ_BLOB && *size > big_file_threshold) {
 		if (check_stream_sha1(&stream, hdr, *size, path, expected_sha1) < 0)
 			goto out;
 	} else {
diff --git a/t/lib-pack.sh b/t/lib-pack.sh
index 7509846..4674899 100644
--- a/t/lib-pack.sh
+++ b/t/lib-pack.sh
@@ -79,6 +79,18 @@
 		;;
 	esac
 
+	# If it's not a delta, we can convince pack-objects to generate a pack
+	# with just our entry, and then strip off the header (12 bytes) and
+	# trailer (20 bytes).
+	if test -z "$2"
+	then
+		echo "$1" | git pack-objects --stdout >pack_obj.tmp &&
+		size=$(wc -c <pack_obj.tmp) &&
+		dd if=pack_obj.tmp bs=1 count=$((size - 20 - 12)) skip=12 &&
+		rm -f pack_obj.tmp
+		return
+	fi
+
 	echo >&2 "BUG: don't know how to print $1${2:+ (from $2)}"
 	return 1
 }
diff --git a/t/t7415-submodule-names.sh b/t/t7415-submodule-names.sh
index 75fa071..a770d92 100755
--- a/t/t7415-submodule-names.sh
+++ b/t/t7415-submodule-names.sh
@@ -6,6 +6,7 @@
 real-world setup that confirms we catch this in practice.
 '
 . ./test-lib.sh
+. "$TEST_DIRECTORY"/lib-pack.sh
 
 test_expect_success 'check names' '
 	cat >expect <<-\EOF &&
@@ -73,4 +74,81 @@
 	! grep "RUNNING POST CHECKOUT" output
 '
 
+test_expect_success 'fsck detects evil superproject' '
+	test_must_fail git fsck
+'
+
+test_expect_success 'transfer.fsckObjects detects evil superproject (unpack)' '
+	rm -rf dst.git &&
+	git init --bare dst.git &&
+	git -C dst.git config transfer.fsckObjects true &&
+	test_must_fail git push dst.git HEAD
+'
+
+test_expect_success 'transfer.fsckObjects detects evil superproject (index)' '
+	rm -rf dst.git &&
+	git init --bare dst.git &&
+	git -C dst.git config transfer.fsckObjects true &&
+	git -C dst.git config transfer.unpackLimit 1 &&
+	test_must_fail git push dst.git HEAD
+'
+
+# Normally our packs contain commits followed by trees followed by blobs. This
+# reverses the order, which requires backtracking to find the context of a
+# blob. We'll start with a fresh gitmodules-only tree to make it simpler.
+test_expect_success 'create oddly ordered pack' '
+	git checkout --orphan odd &&
+	git rm -rf --cached . &&
+	git add .gitmodules &&
+	git commit -m odd &&
+	{
+		pack_header 3 &&
+		pack_obj $(git rev-parse HEAD:.gitmodules) &&
+		pack_obj $(git rev-parse HEAD^{tree}) &&
+		pack_obj $(git rev-parse HEAD)
+	} >odd.pack &&
+	pack_trailer odd.pack
+'
+
+test_expect_success 'transfer.fsckObjects handles odd pack (unpack)' '
+	rm -rf dst.git &&
+	git init --bare dst.git &&
+	test_must_fail git -C dst.git unpack-objects --strict <odd.pack
+'
+
+test_expect_success 'transfer.fsckObjects handles odd pack (index)' '
+	rm -rf dst.git &&
+	git init --bare dst.git &&
+	test_must_fail git -C dst.git index-pack --strict --stdin <odd.pack
+'
+
+test_expect_success 'fsck detects symlinked .gitmodules file' '
+	git init symlink &&
+	(
+		cd symlink &&
+
+		# Make the tree directly to avoid index restrictions.
+		#
+		# Because symlinks store the target as a blob, choose
+		# a pathname that could be parsed as a .gitmodules file
+		# to trick naive non-symlink-aware checking.
+		tricky="[foo]bar=true" &&
+		content=$(git hash-object -w ../.gitmodules) &&
+		target=$(printf "$tricky" | git hash-object -w --stdin) &&
+		tree=$(
+			{
+				printf "100644 blob $content\t$tricky\n" &&
+				printf "120000 blob $target\t.gitmodules\n"
+			} | git mktree
+		) &&
+		commit=$(git commit-tree $tree) &&
+
+		# Check not only that we fail, but that it is due to the
+		# symlink detector; this grep string comes from the config
+		# variable name and will not be translated.
+		test_must_fail git fsck 2>output &&
+		grep gitmodulesSymlink output
+	)
+'
+
 test_done