aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/super.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4/super.c')
-rw-r--r--fs/ext4/super.c553
1 files changed, 388 insertions, 165 deletions
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index eb7aa3e4ef0..3d4fb81bacd 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -45,7 +45,7 @@
#include <linux/freezer.h>
#include "ext4.h"
-#include "ext4_extents.h"
+#include "ext4_extents.h" /* Needed for trace points definition */
#include "ext4_jbd2.h"
#include "xattr.h"
#include "acl.h"
@@ -74,7 +74,6 @@ static const char *ext4_decode_error(struct super_block *sb, int errno,
static int ext4_remount(struct super_block *sb, int *flags, char *data);
static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
static int ext4_unfreeze(struct super_block *sb);
-static void ext4_write_super(struct super_block *sb);
static int ext4_freeze(struct super_block *sb);
static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
const char *dev_name, void *data);
@@ -144,9 +143,10 @@ int ext4_superblock_csum_verify(struct super_block *sb,
return es->s_checksum == ext4_superblock_csum(sb, es);
}
-void ext4_superblock_csum_set(struct super_block *sb,
- struct ext4_super_block *es)
+void ext4_superblock_csum_set(struct super_block *sb)
{
+ struct ext4_super_block *es = EXT4_SB(sb)->s_es;
+
if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
return;
@@ -327,38 +327,17 @@ static void ext4_put_nojournal(handle_t *handle)
/*
* Wrappers for jbd2_journal_start/end.
- *
- * The only special thing we need to do here is to make sure that all
- * journal_end calls result in the superblock being marked dirty, so
- * that sync() will call the filesystem's write_super callback if
- * appropriate.
- *
- * To avoid j_barrier hold in userspace when a user calls freeze(),
- * ext4 prevents a new handle from being started by s_frozen, which
- * is in an upper layer.
*/
handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
{
journal_t *journal;
- handle_t *handle;
trace_ext4_journal_start(sb, nblocks, _RET_IP_);
if (sb->s_flags & MS_RDONLY)
return ERR_PTR(-EROFS);
+ WARN_ON(sb->s_writers.frozen == SB_FREEZE_COMPLETE);
journal = EXT4_SB(sb)->s_journal;
- handle = ext4_journal_current_handle();
-
- /*
- * If a handle has been started, it should be allowed to
- * finish, otherwise deadlock could happen between freeze
- * and others(e.g. truncate) due to the restart of the
- * journal handle if the filesystem is forzen and active
- * handles are not stopped.
- */
- if (!handle)
- vfs_check_frozen(sb, SB_FREEZE_TRANS);
-
if (!journal)
return ext4_get_nojournal();
/*
@@ -373,12 +352,6 @@ handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
return jbd2_journal_start(journal, nblocks);
}
-/*
- * The only special thing we need to do here is to make sure that all
- * jbd2_journal_stop calls result in the superblock being marked dirty, so
- * that sync() will call the filesystem's write_super callback if
- * appropriate.
- */
int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle)
{
struct super_block *sb;
@@ -448,7 +421,7 @@ static void __save_error_info(struct super_block *sb, const char *func,
*/
if (!es->s_error_count)
mod_timer(&EXT4_SB(sb)->s_err_report, jiffies + 24*60*60*HZ);
- es->s_error_count = cpu_to_le32(le32_to_cpu(es->s_error_count) + 1);
+ le32_add_cpu(&es->s_error_count, 1);
}
static void save_error_info(struct super_block *sb, const char *func,
@@ -878,7 +851,6 @@ static void ext4_put_super(struct super_block *sb)
flush_workqueue(sbi->dio_unwritten_wq);
destroy_workqueue(sbi->dio_unwritten_wq);
- lock_super(sb);
if (sbi->s_journal) {
err = jbd2_journal_destroy(sbi->s_journal);
sbi->s_journal = NULL;
@@ -896,7 +868,7 @@ static void ext4_put_super(struct super_block *sb)
EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
es->s_state = cpu_to_le16(sbi->s_mount_state);
}
- if (sb->s_dirt || !(sb->s_flags & MS_RDONLY))
+ if (!(sb->s_flags & MS_RDONLY))
ext4_commit_super(sb, 1);
if (sbi->s_proc) {
@@ -945,7 +917,6 @@ static void ext4_put_super(struct super_block *sb)
* Now that we are completely done shutting down the
* superblock, we need to actually destroy the kobject.
*/
- unlock_super(sb);
kobject_put(&sbi->s_kobj);
wait_for_completion(&sbi->s_kobj_unregister);
if (sbi->s_chksum_driver)
@@ -968,14 +939,16 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
return NULL;
ei->vfs_inode.i_version = 1;
- ei->vfs_inode.i_data.writeback_index = 0;
memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache));
INIT_LIST_HEAD(&ei->i_prealloc_list);
spin_lock_init(&ei->i_prealloc_lock);
+ ext4_es_init_tree(&ei->i_es_tree);
+ rwlock_init(&ei->i_es_lock);
ei->i_reserved_data_blocks = 0;
ei->i_reserved_meta_blocks = 0;
ei->i_allocated_meta_blocks = 0;
ei->i_da_metadata_calc_len = 0;
+ ei->i_da_metadata_calc_last_lblock = 0;
spin_lock_init(&(ei->i_block_reservation_lock));
#ifdef CONFIG_QUOTA
ei->i_reserved_quota = 0;
@@ -983,11 +956,10 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
ei->jinode = NULL;
INIT_LIST_HEAD(&ei->i_completed_io_list);
spin_lock_init(&ei->i_completed_io_lock);
- ei->cur_aio_dio = NULL;
ei->i_sync_tid = 0;
ei->i_datasync_tid = 0;
atomic_set(&ei->i_ioend_count, 0);
- atomic_set(&ei->i_aiodio_unwritten, 0);
+ atomic_set(&ei->i_unwritten, 0);
return &ei->vfs_inode;
}
@@ -1025,9 +997,7 @@ static void init_once(void *foo)
struct ext4_inode_info *ei = (struct ext4_inode_info *) foo;
INIT_LIST_HEAD(&ei->i_orphan);
-#ifdef CONFIG_EXT4_FS_XATTR
init_rwsem(&ei->xattr_sem);
-#endif
init_rwsem(&ei->i_data_sem);
inode_init_once(&ei->vfs_inode);
}
@@ -1046,6 +1016,11 @@ static int init_inodecache(void)
static void destroy_inodecache(void)
{
+ /*
+ * Make sure all delayed rcu free inodes are flushed before we
+ * destroy cache.
+ */
+ rcu_barrier();
kmem_cache_destroy(ext4_inode_cachep);
}
@@ -1055,6 +1030,7 @@ void ext4_clear_inode(struct inode *inode)
clear_inode(inode);
dquot_drop(inode);
ext4_discard_preallocations(inode);
+ ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);
if (EXT4_I(inode)->jinode) {
jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode),
EXT4_I(inode)->jinode);
@@ -1137,12 +1113,18 @@ static int ext4_mark_dquot_dirty(struct dquot *dquot);
static int ext4_write_info(struct super_block *sb, int type);
static int ext4_quota_on(struct super_block *sb, int type, int format_id,
struct path *path);
+static int ext4_quota_on_sysfile(struct super_block *sb, int type,
+ int format_id);
static int ext4_quota_off(struct super_block *sb, int type);
+static int ext4_quota_off_sysfile(struct super_block *sb, int type);
static int ext4_quota_on_mount(struct super_block *sb, int type);
static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
size_t len, loff_t off);
static ssize_t ext4_quota_write(struct super_block *sb, int type,
const char *data, size_t len, loff_t off);
+static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
+ unsigned int flags);
+static int ext4_enable_quotas(struct super_block *sb);
static const struct dquot_operations ext4_quota_operations = {
.get_reserved_space = ext4_get_reserved_space,
@@ -1164,6 +1146,16 @@ static const struct quotactl_ops ext4_qctl_operations = {
.get_dqblk = dquot_get_dqblk,
.set_dqblk = dquot_set_dqblk
};
+
+static const struct quotactl_ops ext4_qctl_sysfile_operations = {
+ .quota_on_meta = ext4_quota_on_sysfile,
+ .quota_off = ext4_quota_off_sysfile,
+ .quota_sync = dquot_quota_sync,
+ .get_info = dquot_get_dqinfo,
+ .set_info = dquot_set_dqinfo,
+ .get_dqblk = dquot_get_dqblk,
+ .set_dqblk = dquot_set_dqblk
+};
#endif
static const struct super_operations ext4_sops = {
@@ -1194,7 +1186,6 @@ static const struct super_operations ext4_nojournal_sops = {
.dirty_inode = ext4_dirty_inode,
.drop_inode = ext4_drop_inode,
.evict_inode = ext4_evict_inode,
- .write_super = ext4_write_super,
.put_super = ext4_put_super,
.statfs = ext4_statfs,
.remount_fs = ext4_remount,
@@ -1231,6 +1222,7 @@ enum {
Opt_inode_readahead_blks, Opt_journal_ioprio,
Opt_dioread_nolock, Opt_dioread_lock,
Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
+ Opt_max_dir_size_kb,
};
static const match_table_t tokens = {
@@ -1304,6 +1296,7 @@ static const match_table_t tokens = {
{Opt_init_itable, "init_itable=%u"},
{Opt_init_itable, "init_itable"},
{Opt_noinit_itable, "noinit_itable"},
+ {Opt_max_dir_size_kb, "max_dir_size_kb=%u"},
{Opt_removed, "check=none"}, /* mount option from ext2/3 */
{Opt_removed, "nocheck"}, /* mount option from ext2/3 */
{Opt_removed, "reservation"}, /* mount option from ext2/3 */
@@ -1454,13 +1447,8 @@ static const struct mount_opts {
{Opt_data_journal, EXT4_MOUNT_JOURNAL_DATA, MOPT_DATAJ},
{Opt_data_ordered, EXT4_MOUNT_ORDERED_DATA, MOPT_DATAJ},
{Opt_data_writeback, EXT4_MOUNT_WRITEBACK_DATA, MOPT_DATAJ},
-#ifdef CONFIG_EXT4_FS_XATTR
{Opt_user_xattr, EXT4_MOUNT_XATTR_USER, MOPT_SET},
{Opt_nouser_xattr, EXT4_MOUNT_XATTR_USER, MOPT_CLEAR},
-#else
- {Opt_user_xattr, 0, MOPT_NOSUPPORT},
- {Opt_nouser_xattr, 0, MOPT_NOSUPPORT},
-#endif
#ifdef CONFIG_EXT4_FS_POSIX_ACL
{Opt_acl, EXT4_MOUNT_POSIX_ACL, MOPT_SET},
{Opt_noacl, EXT4_MOUNT_POSIX_ACL, MOPT_CLEAR},
@@ -1484,6 +1472,7 @@ static const struct mount_opts {
{Opt_jqfmt_vfsold, QFMT_VFS_OLD, MOPT_QFMT},
{Opt_jqfmt_vfsv0, QFMT_VFS_V0, MOPT_QFMT},
{Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT},
+ {Opt_max_dir_size_kb, 0, MOPT_GTE0},
{Opt_err, 0, 0}
};
@@ -1599,6 +1588,8 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
if (!args->from)
arg = EXT4_DEF_LI_WAIT_MULT;
sbi->s_li_wait_mult = arg;
+ } else if (token == Opt_max_dir_size_kb) {
+ sbi->s_max_dir_size_kb = arg;
} else if (token == Opt_stripe) {
sbi->s_stripe = arg;
} else if (m->flags & MOPT_DATAJ) {
@@ -1654,9 +1645,7 @@ static int parse_options(char *options, struct super_block *sb,
unsigned int *journal_ioprio,
int is_remount)
{
-#ifdef CONFIG_QUOTA
struct ext4_sb_info *sbi = EXT4_SB(sb);
-#endif
char *p;
substring_t args[MAX_OPT_ARGS];
int token;
@@ -1671,7 +1660,7 @@ static int parse_options(char *options, struct super_block *sb,
* Initialize args struct so we know whether arg was
* found; some options take optional arguments.
*/
- args[0].to = args[0].from = 0;
+ args[0].to = args[0].from = NULL;
token = match_token(p, tokens, args);
if (handle_mount_opt(sb, p, token, args, journal_devnum,
journal_ioprio, is_remount) < 0)
@@ -1705,6 +1694,16 @@ static int parse_options(char *options, struct super_block *sb,
}
}
#endif
+ if (test_opt(sb, DIOREAD_NOLOCK)) {
+ int blocksize =
+ BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
+
+ if (blocksize < PAGE_CACHE_SIZE) {
+ ext4_msg(sb, KERN_ERR, "can't mount with "
+ "dioread_nolock if block size != PAGE_SIZE");
+ return 0;
+ }
+ }
return 1;
}
@@ -1747,7 +1746,7 @@ static inline void ext4_show_quota_options(struct seq_file *seq,
static const char *token2str(int token)
{
- static const struct match_token *t;
+ const struct match_token *t;
for (t = tokens; t->token != Opt_err; t++)
if (t->token == token && !strchr(t->pattern, '='))
@@ -1830,6 +1829,8 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
if (nodefs || (test_opt(sb, INIT_INODE_TABLE) &&
(sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT)))
SEQ_OPTS_PRINT("init_itable=%u", sbi->s_li_wait_mult);
+ if (nodefs || sbi->s_max_dir_size_kb)
+ SEQ_OPTS_PRINT("max_dir_size_kb=%u", sbi->s_max_dir_size_kb);
ext4_show_quota_options(seq, sb);
return 0;
@@ -1921,34 +1922,56 @@ done:
return res;
}
+int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct flex_groups *new_groups;
+ int size;
+
+ if (!sbi->s_log_groups_per_flex)
+ return 0;
+
+ size = ext4_flex_group(sbi, ngroup - 1) + 1;
+ if (size <= sbi->s_flex_groups_allocated)
+ return 0;
+
+ size = roundup_pow_of_two(size * sizeof(struct flex_groups));
+ new_groups = ext4_kvzalloc(size, GFP_KERNEL);
+ if (!new_groups) {
+ ext4_msg(sb, KERN_ERR, "not enough memory for %d flex groups",
+ size / (int) sizeof(struct flex_groups));
+ return -ENOMEM;
+ }
+
+ if (sbi->s_flex_groups) {
+ memcpy(new_groups, sbi->s_flex_groups,
+ (sbi->s_flex_groups_allocated *
+ sizeof(struct flex_groups)));
+ ext4_kvfree(sbi->s_flex_groups);
+ }
+ sbi->s_flex_groups = new_groups;
+ sbi->s_flex_groups_allocated = size / sizeof(struct flex_groups);
+ return 0;
+}
+
static int ext4_fill_flex_info(struct super_block *sb)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_group_desc *gdp = NULL;
- ext4_group_t flex_group_count;
ext4_group_t flex_group;
unsigned int groups_per_flex = 0;
- size_t size;
- int i;
+ int i, err;
sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
if (sbi->s_log_groups_per_flex < 1 || sbi->s_log_groups_per_flex > 31) {
sbi->s_log_groups_per_flex = 0;
return 1;
}
- groups_per_flex = 1 << sbi->s_log_groups_per_flex;
-
- /* We allocate both existing and potentially added groups */
- flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) +
- ((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) <<
- EXT4_DESC_PER_BLOCK_BITS(sb))) / groups_per_flex;
- size = flex_group_count * sizeof(struct flex_groups);
- sbi->s_flex_groups = ext4_kvzalloc(size, GFP_KERNEL);
- if (sbi->s_flex_groups == NULL) {
- ext4_msg(sb, KERN_ERR, "not enough memory for %u flex groups",
- flex_group_count);
+ groups_per_flex = 1U << sbi->s_log_groups_per_flex;
+
+ err = ext4_alloc_flex_bg_array(sb, sbi->s_groups_count);
+ if (err)
goto failed;
- }
for (i = 0; i < sbi->s_groups_count; i++) {
gdp = ext4_get_group_desc(sb, i, NULL);
@@ -2151,10 +2174,12 @@ static void ext4_orphan_cleanup(struct super_block *sb,
}
if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
- if (es->s_last_orphan)
+ /* don't clear list on RO mount w/ errors */
+ if (es->s_last_orphan && !(s_flags & MS_RDONLY)) {
jbd_debug(1, "Errors on filesystem, "
"clearing orphan list.\n");
- es->s_last_orphan = 0;
+ es->s_last_orphan = 0;
+ }
jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
return;
}
@@ -2195,7 +2220,9 @@ static void ext4_orphan_cleanup(struct super_block *sb,
__func__, inode->i_ino, inode->i_size);
jbd_debug(2, "truncating inode %lu to %lld bytes\n",
inode->i_ino, inode->i_size);
+ mutex_lock(&inode->i_mutex);
ext4_truncate(inode);
+ mutex_unlock(&inode->i_mutex);
nr_truncates++;
} else {
ext4_msg(sb, KERN_DEBUG,
@@ -2535,6 +2562,7 @@ EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs);
EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request);
EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc);
EXT4_RW_ATTR_SBI_UI(max_writeback_mb_bump, s_max_writeback_mb_bump);
+EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb);
EXT4_ATTR(trigger_fs_error, 0200, NULL, trigger_test_error);
static struct attribute *ext4_attrs[] = {
@@ -2550,6 +2578,7 @@ static struct attribute *ext4_attrs[] = {
ATTR_LIST(mb_stream_req),
ATTR_LIST(mb_group_prealloc),
ATTR_LIST(max_writeback_mb_bump),
+ ATTR_LIST(extent_max_zeroout_kb),
ATTR_LIST(trigger_fs_error),
NULL,
};
@@ -2557,10 +2586,12 @@ static struct attribute *ext4_attrs[] = {
/* Features this copy of ext4 supports */
EXT4_INFO_ATTR(lazy_itable_init);
EXT4_INFO_ATTR(batched_discard);
+EXT4_INFO_ATTR(meta_bg_resize);
static struct attribute *ext4_feat_attrs[] = {
ATTR_LIST(lazy_itable_init),
ATTR_LIST(batched_discard),
+ ATTR_LIST(meta_bg_resize),
NULL,
};
@@ -2661,6 +2692,16 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly)
"extents feature\n");
return 0;
}
+
+#ifndef CONFIG_QUOTA
+ if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) &&
+ !readonly) {
+ ext4_msg(sb, KERN_ERR,
+ "Filesystem with quota feature cannot be mounted RDWR "
+ "without CONFIG_QUOTA");
+ return 0;
+ }
+#endif /* CONFIG_QUOTA */
return 1;
}
@@ -2723,6 +2764,7 @@ static int ext4_run_li_request(struct ext4_li_request *elr)
sb = elr->lr_super;
ngroups = EXT4_SB(sb)->s_groups_count;
+ sb_start_write(sb);
for (group = elr->lr_next_group; group < ngroups; group++) {
gdp = ext4_get_group_desc(sb, group, NULL);
if (!gdp) {
@@ -2749,6 +2791,7 @@ static int ext4_run_li_request(struct ext4_li_request *elr)
elr->lr_next_sched = jiffies + elr->lr_timeout;
elr->lr_next_group = group + 1;
}
+ sb_end_write(sb);
return ret;
}
@@ -3085,6 +3128,121 @@ static int set_journal_csum_feature_set(struct super_block *sb)
return ret;
}
+/*
+ * Note: calculating the overhead so we can be compatible with
+ * historical BSD practice is quite difficult in the face of
+ * clusters/bigalloc. This is because multiple metadata blocks from
+ * different block group can end up in the same allocation cluster.
+ * Calculating the exact overhead in the face of clustered allocation
+ * requires either O(all block bitmaps) in memory or O(number of block
+ * groups**2) in time. We will still calculate the superblock for
+ * older file systems --- and if we come across with a bigalloc file
+ * system with zero in s_overhead_clusters the estimate will be close to
+ * correct especially for very large cluster sizes --- but for newer
+ * file systems, it's better to calculate this figure once at mkfs
+ * time, and store it in the superblock. If the superblock value is
+ * present (even for non-bigalloc file systems), we will use it.
+ */
+static int count_overhead(struct super_block *sb, ext4_group_t grp,
+ char *buf)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct ext4_group_desc *gdp;
+ ext4_fsblk_t first_block, last_block, b;
+ ext4_group_t i, ngroups = ext4_get_groups_count(sb);
+ int s, j, count = 0;
+
+ if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_BIGALLOC))
+ return (ext4_bg_has_super(sb, grp) + ext4_bg_num_gdb(sb, grp) +
+ sbi->s_itb_per_group + 2);
+
+ first_block = le32_to_cpu(sbi->s_es->s_first_data_block) +
+ (grp * EXT4_BLOCKS_PER_GROUP(sb));
+ last_block = first_block + EXT4_BLOCKS_PER_GROUP(sb) - 1;
+ for (i = 0; i < ngroups; i++) {
+ gdp = ext4_get_group_desc(sb, i, NULL);
+ b = ext4_block_bitmap(sb, gdp);
+ if (b >= first_block && b <= last_block) {
+ ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf);
+ count++;
+ }
+ b = ext4_inode_bitmap(sb, gdp);
+ if (b >= first_block && b <= last_block) {
+ ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf);
+ count++;
+ }
+ b = ext4_inode_table(sb, gdp);
+ if (b >= first_block && b + sbi->s_itb_per_group <= last_block)
+ for (j = 0; j < sbi->s_itb_per_group; j++, b++) {
+ int c = EXT4_B2C(sbi, b - first_block);
+ ext4_set_bit(c, buf);
+ count++;
+ }
+ if (i != grp)
+ continue;
+ s = 0;
+ if (ext4_bg_has_super(sb, grp)) {
+ ext4_set_bit(s++, buf);
+ count++;
+ }
+ for (j = ext4_bg_num_gdb(sb, grp); j > 0; j--) {
+ ext4_set_bit(EXT4_B2C(sbi, s++), buf);
+ count++;
+ }
+ }
+ if (!count)
+ return 0;
+ return EXT4_CLUSTERS_PER_GROUP(sb) -
+ ext4_count_free(buf, EXT4_CLUSTERS_PER_GROUP(sb) / 8);
+}
+
+/*
+ * Compute the overhead and stash it in sbi->s_overhead
+ */
+int ext4_calculate_overhead(struct super_block *sb)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct ext4_super_block *es = sbi->s_es;
+ ext4_group_t i, ngroups = ext4_get_groups_count(sb);
+ ext4_fsblk_t overhead = 0;
+ char *buf = (char *) get_zeroed_page(GFP_KERNEL);
+
+ if (!buf)
+ return -ENOMEM;
+
+ /*
+ * Compute the overhead (FS structures). This is constant
+ * for a given filesystem unless the number of block groups
+ * changes so we cache the previous value until it does.
+ */
+
+ /*
+ * All of the blocks before first_data_block are overhead
+ */
+ overhead = EXT4_B2C(sbi, le32_to_cpu(es->s_first_data_block));
+
+ /*
+ * Add the overhead found in each block group
+ */
+ for (i = 0; i < ngroups; i++) {
+ int blks;
+
+ blks = count_overhead(sb, i, buf);
+ overhead += blks;
+ if (blks)
+ memset(buf, 0, PAGE_SIZE);
+ cond_resched();
+ }
+ /* Add the journal blocks as well */
+ if (sbi->s_journal)
+ overhead += EXT4_B2C(sbi, sbi->s_journal->j_maxlen);
+
+ sbi->s_overhead = overhead;
+ smp_wmb();
+ free_page((unsigned long) buf);
+ return 0;
+}
+
static int ext4_fill_super(struct super_block *sb, void *data, int silent)
{
char *orig_data = kstrdup(data, GFP_KERNEL);
@@ -3106,7 +3264,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
unsigned int i;
int needs_recovery, has_huge_files, has_bigalloc;
__u64 blocks_count;
- int err;
+ int err = 0;
unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
ext4_group_t first_not_zeroed;
@@ -3122,9 +3280,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
}
sb->s_fs_info = sbi;
sbi->s_sb = sb;
- sbi->s_mount_opt = 0;
- sbi->s_resuid = make_kuid(&init_user_ns, EXT4_DEF_RESUID);
- sbi->s_resgid = make_kgid(&init_user_ns, EXT4_DEF_RESGID);
sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS;
sbi->s_sb_block = sb_block;
if (sb->s_bdev->bd_part)
@@ -3135,6 +3290,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
for (cp = sb->s_id; (cp = strchr(cp, '/'));)
*cp = '!';
+ /* -EINVAL is default */
ret = -EINVAL;
blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE);
if (!blocksize) {
@@ -3219,9 +3375,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
if (def_mount_opts & EXT4_DEFM_UID16)
set_opt(sb, NO_UID32);
/* xattr user namespace & acls are now defaulted on */
-#ifdef CONFIG_EXT4_FS_XATTR
set_opt(sb, XATTR_USER);
-#endif
#ifdef CONFIG_EXT4_FS_POSIX_ACL
set_opt(sb, POSIX_ACL);
#endif
@@ -3257,7 +3411,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
* enable delayed allocation by default
* Use -o nodelalloc to turn it off
*/
- if (!IS_EXT3_SB(sb) &&
+ if (!IS_EXT3_SB(sb) && !IS_EXT2_SB(sb) &&
((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0))
set_opt(sb, DELALLOC);
@@ -3296,15 +3450,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
clear_opt(sb, DELALLOC);
}
- blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
- if (test_opt(sb, DIOREAD_NOLOCK)) {
- if (blocksize < PAGE_SIZE) {
- ext4_msg(sb, KERN_ERR, "can't mount with "
- "dioread_nolock if block size != PAGE_SIZE");
- goto failed_mount;
- }
- }
-
sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
(test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
@@ -3346,6 +3491,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
if (!ext4_feature_set_ok(sb, (sb->s_flags & MS_RDONLY)))
goto failed_mount;
+ blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
if (blocksize < EXT4_MIN_BLOCK_SIZE ||
blocksize > EXT4_MAX_BLOCK_SIZE) {
ext4_msg(sb, KERN_ERR,
@@ -3512,7 +3658,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
" too large to mount safely on this system");
if (sizeof(sector_t) < 8)
ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled");
- ret = err;
goto failed_mount;
}
@@ -3620,12 +3765,12 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
}
if (err) {
ext4_msg(sb, KERN_ERR, "insufficient memory");
- ret = err;
goto failed_mount3;
}
sbi->s_stripe = ext4_get_stripe_size(sbi);
sbi->s_max_writeback_mb_bump = 128;
+ sbi->s_extent_max_zeroout_kb = 32;
/*
* set up enough so that it can read an inode
@@ -3640,12 +3785,16 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
#ifdef CONFIG_QUOTA
sb->s_qcop = &ext4_qctl_operations;
sb->dq_op = &ext4_quota_operations;
+
+ if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) {
+ /* Use qctl operations for hidden quota files. */
+ sb->s_qcop = &ext4_qctl_sysfile_operations;
+ }
#endif
memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
mutex_init(&sbi->s_orphan_lock);
- sbi->s_resize_flags = 0;
sb->s_root = NULL;
@@ -3735,6 +3884,18 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
no_journal:
/*
+ * Get the # of file system overhead blocks from the
+ * superblock if present.
+ */
+ if (es->s_overhead_clusters)
+ sbi->s_overhead = le32_to_cpu(es->s_overhead_clusters);
+ else {
+ err = ext4_calculate_overhead(sb);
+ if (err)
+ goto failed_mount_wq;
+ }
+
+ /*
* The maximum number of concurrent works can be high and
* concurrency isn't really necessary. Limit it to 1.
*/
@@ -3742,6 +3903,7 @@ no_journal:
alloc_workqueue("ext4-dio-unwritten", WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
if (!EXT4_SB(sb)->dio_unwritten_wq) {
printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n");
+ ret = -ENOMEM;
goto failed_mount_wq;
}
@@ -3840,6 +4002,24 @@ no_journal:
} else
descr = "out journal";
+#ifdef CONFIG_QUOTA
+ /* Enable quota usage during mount. */
+ if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) &&
+ !(sb->s_flags & MS_RDONLY)) {
+ err = ext4_enable_quotas(sb);
+ if (err)
+ goto failed_mount7;
+ }
+#endif /* CONFIG_QUOTA */
+
+ if (test_opt(sb, DISCARD)) {
+ struct request_queue *q = bdev_get_queue(sb->s_bdev);
+ if (!blk_queue_discard(q))
+ ext4_msg(sb, KERN_WARNING,
+ "mounting with \"discard\" option, but "
+ "the device does not support discard");
+ }
+
ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. "
"Opts: %s%s%s", descr, sbi->s_es->s_mount_opts,
*sbi->s_es->s_mount_opts ? "; " : "", orig_data);
@@ -3906,7 +4086,7 @@ out_fail:
kfree(sbi);
out_free_orig:
kfree(orig_data);
- return ret;
+ return err ? err : ret;
}
/*
@@ -4203,9 +4383,8 @@ static int ext4_commit_super(struct super_block *sb, int sync)
es->s_free_inodes_count =
cpu_to_le32(percpu_counter_sum_positive(
&EXT4_SB(sb)->s_freeinodes_counter));
- sb->s_dirt = 0;
BUFFER_TRACE(sbh, "marking dirty");
- ext4_superblock_csum_set(sb, es);
+ ext4_superblock_csum_set(sb);
mark_buffer_dirty(sbh);
if (sync) {
error = sync_dirty_buffer(sbh);
@@ -4286,6 +4465,7 @@ static void ext4_clear_journal_err(struct super_block *sb,
ext4_commit_super(sb, 1);
jbd2_journal_clear_err(journal);
+ jbd2_journal_update_sb_errno(journal);
}
}
@@ -4302,21 +4482,12 @@ int ext4_force_commit(struct super_block *sb)
return 0;
journal = EXT4_SB(sb)->s_journal;
- if (journal) {
- vfs_check_frozen(sb, SB_FREEZE_TRANS);
+ if (journal)
ret = ext4_journal_force_commit(journal);
- }
return ret;
}
-static void ext4_write_super(struct super_block *sb)
-{
- lock_super(sb);
- ext4_commit_super(sb, 1);
- unlock_super(sb);
-}
-
static int ext4_sync_fs(struct super_block *sb, int wait)
{
int ret = 0;
@@ -4325,6 +4496,11 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
trace_ext4_sync_fs(sb, wait);
flush_workqueue(sbi->dio_unwritten_wq);
+ /*
+ * Writeback quota in non-journalled quota case - journalled quota has
+ * no dirty dquots
+ */
+ dquot_writeback_dquots(sb, -1);
if (jbd2_journal_start_commit(sbi->s_journal, &target)) {
if (wait)
jbd2_log_wait_commit(sbi->s_journal, target);
@@ -4337,9 +4513,8 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
* gives us a chance to flush the journal completely and mark the fs clean.
*
* Note that only this function cannot bring a filesystem to be in a clean
- * state independently, because ext4 prevents a new handle from being started
- * by @sb->s_frozen, which stays in an upper layer. It thus needs help from
- * the upper layer.
+ * state independently. It relies on upper layer to stop all data & metadata
+ * modifications.
*/
static int ext4_freeze(struct super_block *sb)
{
@@ -4366,7 +4541,7 @@ static int ext4_freeze(struct super_block *sb)
EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
error = ext4_commit_super(sb, 1);
out:
- /* we rely on s_frozen to stop further updates */
+ /* we rely on upper layer to stop further updates */
jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
return error;
}
@@ -4380,11 +4555,9 @@ static int ext4_unfreeze(struct super_block *sb)
if (sb->s_flags & MS_RDONLY)
return 0;
- lock_super(sb);
/* Reset the needs_recovery flag before the fs is unlocked. */
EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
ext4_commit_super(sb, 1);
- unlock_super(sb);
return 0;
}
@@ -4420,7 +4593,6 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
char *orig_data = kstrdup(data, GFP_KERNEL);
/* Store the original options */
- lock_super(sb);
old_sb_flags = sb->s_flags;
old_opts.s_mount_opt = sbi->s_mount_opt;
old_opts.s_mount_opt2 = sbi->s_mount_opt2;
@@ -4559,7 +4731,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
}
ext4_setup_system_zone(sb);
- if (sbi->s_journal == NULL)
+ if (sbi->s_journal == NULL && !(old_sb_flags & MS_RDONLY))
ext4_commit_super(sb, 1);
#ifdef CONFIG_QUOTA
@@ -4568,10 +4740,17 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
if (old_opts.s_qf_names[i] &&
old_opts.s_qf_names[i] != sbi->s_qf_names[i])
kfree(old_opts.s_qf_names[i]);
+ if (enable_quota) {
+ if (sb_any_quota_suspended(sb))
+ dquot_resume(sb, -1);
+ else if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
+ EXT4_FEATURE_RO_COMPAT_QUOTA)) {
+ err = ext4_enable_quotas(sb);
+ if (err)
+ goto restore_opts;
+ }
+ }
#endif
- unlock_super(sb);
- if (enable_quota)
- dquot_resume(sb, -1);
ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s", orig_data);
kfree(orig_data);
@@ -4595,72 +4774,25 @@ restore_opts:
sbi->s_qf_names[i] = old_opts.s_qf_names[i];
}
#endif
- unlock_super(sb);
kfree(orig_data);
return err;
}
-/*
- * Note: calculating the overhead so we can be compatible with
- * historical BSD practice is quite difficult in the face of
- * clusters/bigalloc. This is because multiple metadata blocks from
- * different block group can end up in the same allocation cluster.
- * Calculating the exact overhead in the face of clustered allocation
- * requires either O(all block bitmaps) in memory or O(number of block
- * groups**2) in time. We will still calculate the superblock for
- * older file systems --- and if we come across with a bigalloc file
- * system with zero in s_overhead_clusters the estimate will be close to
- * correct especially for very large cluster sizes --- but for newer
- * file systems, it's better to calculate this figure once at mkfs
- * time, and store it in the superblock. If the superblock value is
- * present (even for non-bigalloc file systems), we will use it.
- */
static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
{
struct super_block *sb = dentry->d_sb;
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_super_block *es = sbi->s_es;
- struct ext4_group_desc *gdp;
+ ext4_fsblk_t overhead = 0;
u64 fsid;
s64 bfree;
- if (test_opt(sb, MINIX_DF)) {
- sbi->s_overhead_last = 0;
- } else if (es->s_overhead_clusters) {
- sbi->s_overhead_last = le32_to_cpu(es->s_overhead_clusters);
- } else if (sbi->s_blocks_last != ext4_blocks_count(es)) {
- ext4_group_t i, ngroups = ext4_get_groups_count(sb);
- ext4_fsblk_t overhead = 0;
-
- /*
- * Compute the overhead (FS structures). This is constant
- * for a given filesystem unless the number of block groups
- * changes so we cache the previous value until it does.
- */
-
- /*
- * All of the blocks before first_data_block are
- * overhead
- */
- overhead = EXT4_B2C(sbi, le32_to_cpu(es->s_first_data_block));
-
- /*
- * Add the overhead found in each block group
- */
- for (i = 0; i < ngroups; i++) {
- gdp = ext4_get_group_desc(sb, i, NULL);
- overhead += ext4_num_overhead_clusters(sb, i, gdp);
- cond_resched();
- }
- sbi->s_overhead_last = overhead;
- smp_wmb();
- sbi->s_blocks_last = ext4_blocks_count(es);
- }
+ if (!test_opt(sb, MINIX_DF))
+ overhead = sbi->s_overhead;
buf->f_type = EXT4_SUPER_MAGIC;
buf->f_bsize = sb->s_blocksize;
- buf->f_blocks = (ext4_blocks_count(es) -
- EXT4_C2B(sbi, sbi->s_overhead_last));
+ buf->f_blocks = ext4_blocks_count(es) - EXT4_C2B(sbi, overhead);
bfree = percpu_counter_sum_positive(&sbi->s_freeclusters_counter) -
percpu_counter_sum_positive(&sbi->s_dirtyclusters_counter);
/* prevent underflow in case that few free space is available */
@@ -4693,7 +4825,7 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
static inline struct inode *dquot_to_inode(struct dquot *dquot)
{
- return sb_dqopt(dquot->dq_sb)->files[dquot->dq_type];
+ return sb_dqopt(dquot->dq_sb)->files[dquot->dq_id.type];
}
static int ext4_write_dquot(struct dquot *dquot)
@@ -4830,6 +4962,74 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
return dquot_quota_on(sb, type, format_id, path);
}
+static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
+ unsigned int flags)
+{
+ int err;
+ struct inode *qf_inode;
+ unsigned long qf_inums[MAXQUOTAS] = {
+ le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
+ le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum)
+ };
+
+ BUG_ON(!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA));
+
+ if (!qf_inums[type])
+ return -EPERM;
+
+ qf_inode = ext4_iget(sb, qf_inums[type]);
+ if (IS_ERR(qf_inode)) {
+ ext4_error(sb, "Bad quota inode # %lu", qf_inums[type]);
+ return PTR_ERR(qf_inode);
+ }
+
+ err = dquot_enable(qf_inode, type, format_id, flags);
+ iput(qf_inode);
+
+ return err;
+}
+
+/* Enable usage tracking for all quota types. */
+static int ext4_enable_quotas(struct super_block *sb)
+{
+ int type, err = 0;
+ unsigned long qf_inums[MAXQUOTAS] = {
+ le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
+ le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum)
+ };
+
+ sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE;
+ for (type = 0; type < MAXQUOTAS; type++) {
+ if (qf_inums[type]) {
+ err = ext4_quota_enable(sb, type, QFMT_VFS_V1,
+ DQUOT_USAGE_ENABLED);
+ if (err) {
+ ext4_warning(sb,
+ "Failed to enable quota (type=%d) "
+ "tracking. Please run e2fsck to fix.",
+ type);
+ return err;
+ }
+ }
+ }
+ return 0;
+}
+
+/*
+ * quota_on function that is used when QUOTA feature is set.
+ */
+static int ext4_quota_on_sysfile(struct super_block *sb, int type,
+ int format_id)
+{
+ if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA))
+ return -EINVAL;
+
+ /*
+ * USAGE was enabled at mount time. Only need to enable LIMITS now.
+ */
+ return ext4_quota_enable(sb, type, format_id, DQUOT_LIMITS_ENABLED);
+}
+
static int ext4_quota_off(struct super_block *sb, int type)
{
struct inode *inode = sb_dqopt(sb)->files[type];
@@ -4856,6 +5056,18 @@ out:
return dquot_quota_off(sb, type);
}
+/*
+ * quota_off function that is used when QUOTA feature is set.
+ */
+static int ext4_quota_off_sysfile(struct super_block *sb, int type)
+{
+ if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA))
+ return -EINVAL;
+
+ /* Disable only the limits. */
+ return dquot_disable(sb, type, DQUOT_LIMITS_ENABLED);
+}
+
/* Read data from quotafile - avoid pagecache and such because we cannot afford
* acquiring the locks... As quota files are never truncated and quota code
* itself serializes the operations (and no one else should touch the files)
@@ -5072,6 +5284,7 @@ static int __init ext4_init_fs(void)
ext4_li_info = NULL;
mutex_init(&ext4_li_mtx);
+ /* Build-time check for flags consistency */
ext4_check_flag_values();
for (i = 0; i < EXT4_WQ_HASH_SZ; i++) {
@@ -5079,15 +5292,22 @@ static int __init ext4_init_fs(void)
init_waitqueue_head(&ext4__ioend_wq[i]);
}
- err = ext4_init_pageio();
+ err = ext4_init_es();
if (err)
return err;
+
+ err = ext4_init_pageio();
+ if (err)
+ goto out7;
+
err = ext4_init_system_zone();
if (err)
goto out6;
ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj);
- if (!ext4_kset)
+ if (!ext4_kset) {
+ err = -ENOMEM;
goto out5;
+ }
ext4_proc_root = proc_mkdir("fs/ext4", NULL);
err = ext4_init_feat_adverts();
@@ -5129,6 +5349,9 @@ out5:
ext4_exit_system_zone();
out6:
ext4_exit_pageio();
+out7:
+ ext4_exit_es();
+
return err;
}