Merge tag 'vfs-5.4-merge-1' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux
Pull swap access updates from Darrick Wong:
"Prohibit writing to active swap files and swap partitions.
There's no non-malicious use case for allowing userspace to scribble
on storage that the kernel thinks it owns"
* tag 'vfs-5.4-merge-1' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux:
vfs: don't allow writes to swap files
mm: set S_SWAPFILE on blockdev swap devices
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 677cb36..9c073db 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1972,6 +1972,9 @@
if (bdev_read_only(I_BDEV(bd_inode)))
return -EPERM;
+ if (IS_SWAPFILE(bd_inode))
+ return -ETXTBSY;
+
if (!iov_iter_count(from))
return 0;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 104a727..ae66481 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3565,4 +3565,15 @@
fa->fsx_xflags = xflags;
}
+/*
+ * Flush file data before changing attributes. Caller must hold any locks
+ * required to prevent further writes to this file until we're done setting
+ * flags.
+ */
+static inline int inode_drain_writes(struct inode *inode)
+{
+ inode_dio_wait(inode);
+ return filemap_write_and_wait(inode->i_mapping);
+}
+
#endif /* _LINUX_FS_H */
diff --git a/mm/filemap.c b/mm/filemap.c
index d0cf700..40667c2 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2988,6 +2988,9 @@
loff_t count;
int ret;
+ if (IS_SWAPFILE(inode))
+ return -ETXTBSY;
+
if (!iov_iter_count(from))
return 0;
diff --git a/mm/memory.c b/mm/memory.c
index e2bb51b..b1dff75 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2196,6 +2196,10 @@
vmf->flags = FAULT_FLAG_WRITE|FAULT_FLAG_MKWRITE;
+ if (vmf->vma->vm_file &&
+ IS_SWAPFILE(vmf->vma->vm_file->f_mapping->host))
+ return VM_FAULT_SIGBUS;
+
ret = vmf->vma->vm_ops->page_mkwrite(vmf);
/* Restore original flags so that caller is not surprised */
vmf->flags = old_flags;
diff --git a/mm/mmap.c b/mm/mmap.c
index 7e8c3e8a..6bc21fca 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1483,8 +1483,12 @@
case MAP_SHARED_VALIDATE:
if (flags & ~flags_mask)
return -EOPNOTSUPP;
- if ((prot&PROT_WRITE) && !(file->f_mode&FMODE_WRITE))
- return -EACCES;
+ if (prot & PROT_WRITE) {
+ if (!(file->f_mode & FMODE_WRITE))
+ return -EACCES;
+ if (IS_SWAPFILE(file->f_mapping->host))
+ return -ETXTBSY;
+ }
/*
* Make sure we don't allow writing to an append-only
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 0789a76..dab4352 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2368,9 +2368,8 @@
* requirements, they are simply tossed out - we will never use those blocks
* for swapping.
*
- * For S_ISREG swapfiles we set S_SWAPFILE across the life of the swapon. This
- * prevents root from shooting her foot off by ftruncating an in-use swapfile,
- * which will scribble on the fs.
+ * For all swap devices we set S_SWAPFILE across the life of the swapon. This
+ * prevents users from writing to the swap device, which will corrupt memory.
*
* The amount of disk space which a single swap extent represents varies.
* Typically it is in the 1-4 megabyte range. So we can have hundreds of
@@ -2661,13 +2660,14 @@
inode = mapping->host;
if (S_ISBLK(inode->i_mode)) {
struct block_device *bdev = I_BDEV(inode);
+
set_blocksize(bdev, old_block_size);
blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
- } else {
- inode_lock(inode);
- inode->i_flags &= ~S_SWAPFILE;
- inode_unlock(inode);
}
+
+ inode_lock(inode);
+ inode->i_flags &= ~S_SWAPFILE;
+ inode_unlock(inode);
filp_close(swap_file, NULL);
/*
@@ -2890,11 +2890,11 @@
p->flags |= SWP_BLKDEV;
} else if (S_ISREG(inode->i_mode)) {
p->bdev = inode->i_sb->s_bdev;
- inode_lock(inode);
- if (IS_SWAPFILE(inode))
- return -EBUSY;
- } else
- return -EINVAL;
+ }
+
+ inode_lock(inode);
+ if (IS_SWAPFILE(inode))
+ return -EBUSY;
return 0;
}
@@ -3275,6 +3275,17 @@
if (error)
goto bad_swap;
+ /*
+ * Flush any pending IO and dirty mappings before we start using this
+ * swap device.
+ */
+ inode->i_flags |= S_SWAPFILE;
+ error = inode_drain_writes(inode);
+ if (error) {
+ inode->i_flags &= ~S_SWAPFILE;
+ goto bad_swap;
+ }
+
mutex_lock(&swapon_mutex);
prio = -1;
if (swap_flags & SWAP_FLAG_PREFER)
@@ -3295,8 +3306,6 @@
atomic_inc(&proc_poll_event);
wake_up_interruptible(&proc_poll_wait);
- if (S_ISREG(inode->i_mode))
- inode->i_flags |= S_SWAPFILE;
error = 0;
goto out;
bad_swap:
@@ -3318,7 +3327,7 @@
if (inced_nr_rotate_swap)
atomic_dec(&nr_rotate_swap);
if (swap_file) {
- if (inode && S_ISREG(inode->i_mode)) {
+ if (inode) {
inode_unlock(inode);
inode = NULL;
}
@@ -3331,7 +3340,7 @@
}
if (name)
putname(name);
- if (inode && S_ISREG(inode->i_mode))
+ if (inode)
inode_unlock(inode);
if (!error)
enable_swap_slots_cache();