aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/extents.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4/extents.c')
-rw-r--r--fs/ext4/extents.c864
1 files changed, 395 insertions, 469 deletions
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 91341ec6e06..5ae1674ec12 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -41,6 +41,8 @@
#include <asm/uaccess.h>
#include <linux/fiemap.h>
#include "ext4_jbd2.h"
+#include "ext4_extents.h"
+#include "xattr.h"
#include <trace/events/ext4.h>
@@ -52,6 +54,9 @@
#define EXT4_EXT_MARK_UNINIT1 0x2 /* mark first half uninitialized */
#define EXT4_EXT_MARK_UNINIT2 0x4 /* mark second half uninitialized */
+#define EXT4_EXT_DATA_VALID1 0x8 /* first half contains valid data */
+#define EXT4_EXT_DATA_VALID2 0x10 /* second half contains valid data */
+
static __le32 ext4_extent_block_csum(struct inode *inode,
struct ext4_extent_header *eh)
{
@@ -106,6 +111,9 @@ static int ext4_split_extent_at(handle_t *handle,
int split_flag,
int flags);
+static int ext4_find_delayed_extent(struct inode *inode,
+ struct ext4_ext_cache *newex);
+
static int ext4_ext_truncate_extend_restart(handle_t *handle,
struct inode *inode,
int needed)
@@ -1177,7 +1185,7 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block),
ext4_idx_pblock(EXT_FIRST_INDEX(neh)));
- neh->eh_depth = cpu_to_le16(le16_to_cpu(neh->eh_depth) + 1);
+ le16_add_cpu(&neh->eh_depth, 1);
ext4_mark_inode_dirty(handle, inode);
out:
brelse(bh);
@@ -1656,16 +1664,60 @@ static int ext4_ext_try_to_merge_right(struct inode *inode,
}
/*
+ * This function does a very simple check to see if we can collapse
+ * an extent tree with a single extent tree leaf block into the inode.
+ */
+static void ext4_ext_try_to_merge_up(handle_t *handle,
+ struct inode *inode,
+ struct ext4_ext_path *path)
+{
+ size_t s;
+ unsigned max_root = ext4_ext_space_root(inode, 0);
+ ext4_fsblk_t blk;
+
+ if ((path[0].p_depth != 1) ||
+ (le16_to_cpu(path[0].p_hdr->eh_entries) != 1) ||
+ (le16_to_cpu(path[1].p_hdr->eh_entries) > max_root))
+ return;
+
+ /*
+ * We need to modify the block allocation bitmap and the block
+ * group descriptor to release the extent tree block. If we
+ * can't get the journal credits, give up.
+ */
+ if (ext4_journal_extend(handle, 2))
+ return;
+
+ /*
+ * Copy the extent data up to the inode
+ */
+ blk = ext4_idx_pblock(path[0].p_idx);
+ s = le16_to_cpu(path[1].p_hdr->eh_entries) *
+ sizeof(struct ext4_extent_idx);
+ s += sizeof(struct ext4_extent_header);
+
+ memcpy(path[0].p_hdr, path[1].p_hdr, s);
+ path[0].p_depth = 0;
+ path[0].p_ext = EXT_FIRST_EXTENT(path[0].p_hdr) +
+ (path[1].p_ext - EXT_FIRST_EXTENT(path[1].p_hdr));
+ path[0].p_hdr->eh_max = cpu_to_le16(max_root);
+
+ brelse(path[1].p_bh);
+ ext4_free_blocks(handle, inode, NULL, blk, 1,
+ EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
+}
+
+/*
* This function tries to merge the @ex extent to neighbours in the tree.
* return 1 if merge left else 0.
*/
-static int ext4_ext_try_to_merge(struct inode *inode,
+static void ext4_ext_try_to_merge(handle_t *handle,
+ struct inode *inode,
struct ext4_ext_path *path,
struct ext4_extent *ex) {
struct ext4_extent_header *eh;
unsigned int depth;
int merge_done = 0;
- int ret = 0;
depth = ext_depth(inode);
BUG_ON(path[depth].p_hdr == NULL);
@@ -1675,9 +1727,9 @@ static int ext4_ext_try_to_merge(struct inode *inode,
merge_done = ext4_ext_try_to_merge_right(inode, path, ex - 1);
if (!merge_done)
- ret = ext4_ext_try_to_merge_right(inode, path, ex);
+ (void) ext4_ext_try_to_merge_right(inode, path, ex);
- return ret;
+ ext4_ext_try_to_merge_up(handle, inode, path);
}
/*
@@ -1891,18 +1943,17 @@ has_space:
nearex->ee_len = newext->ee_len;
merge:
- /* try to merge extents to the right */
+ /* try to merge extents */
if (!(flag & EXT4_GET_BLOCKS_PRE_IO))
- ext4_ext_try_to_merge(inode, path, nearex);
+ ext4_ext_try_to_merge(handle, inode, path, nearex);
- /* try to merge extents to the left */
/* time to correct all indexes above */
err = ext4_ext_correct_indexes(handle, inode, path);
if (err)
goto cleanup;
- err = ext4_ext_dirty(handle, inode, path + depth);
+ err = ext4_ext_dirty(handle, inode, path + path->p_depth);
cleanup:
if (npath) {
@@ -1913,27 +1964,33 @@ cleanup:
return err;
}
-static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
- ext4_lblk_t num, ext_prepare_callback func,
- void *cbdata)
+static int ext4_fill_fiemap_extents(struct inode *inode,
+ ext4_lblk_t block, ext4_lblk_t num,
+ struct fiemap_extent_info *fieinfo)
{
struct ext4_ext_path *path = NULL;
- struct ext4_ext_cache cbex;
+ struct ext4_ext_cache newex;
struct ext4_extent *ex;
- ext4_lblk_t next, start = 0, end = 0;
+ ext4_lblk_t next, next_del, start = 0, end = 0;
ext4_lblk_t last = block + num;
- int depth, exists, err = 0;
-
- BUG_ON(func == NULL);
- BUG_ON(inode == NULL);
+ int exists, depth = 0, err = 0;
+ unsigned int flags = 0;
+ unsigned char blksize_bits = inode->i_sb->s_blocksize_bits;
while (block < last && block != EXT_MAX_BLOCKS) {
num = last - block;
/* find extent for this block */
down_read(&EXT4_I(inode)->i_data_sem);
+
+ if (path && ext_depth(inode) != depth) {
+ /* depth was changed. we have to realloc path */
+ kfree(path);
+ path = NULL;
+ }
+
path = ext4_ext_find_extent(inode, block, path);
- up_read(&EXT4_I(inode)->i_data_sem);
if (IS_ERR(path)) {
+ up_read(&EXT4_I(inode)->i_data_sem);
err = PTR_ERR(path);
path = NULL;
break;
@@ -1941,13 +1998,16 @@ static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
depth = ext_depth(inode);
if (unlikely(path[depth].p_hdr == NULL)) {
+ up_read(&EXT4_I(inode)->i_data_sem);
EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth);
err = -EIO;
break;
}
ex = path[depth].p_ext;
next = ext4_ext_next_allocated_block(path);
+ ext4_ext_drop_refs(path);
+ flags = 0;
exists = 0;
if (!ex) {
/* there is no extent yet, so try to allocate
@@ -1984,40 +2044,64 @@ static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
BUG_ON(end <= start);
if (!exists) {
- cbex.ec_block = start;
- cbex.ec_len = end - start;
- cbex.ec_start = 0;
+ newex.ec_block = start;
+ newex.ec_len = end - start;
+ newex.ec_start = 0;
} else {
- cbex.ec_block = le32_to_cpu(ex->ee_block);
- cbex.ec_len = ext4_ext_get_actual_len(ex);
- cbex.ec_start = ext4_ext_pblock(ex);
+ newex.ec_block = le32_to_cpu(ex->ee_block);
+ newex.ec_len = ext4_ext_get_actual_len(ex);
+ newex.ec_start = ext4_ext_pblock(ex);
+ if (ext4_ext_is_uninitialized(ex))
+ flags |= FIEMAP_EXTENT_UNWRITTEN;
}
- if (unlikely(cbex.ec_len == 0)) {
- EXT4_ERROR_INODE(inode, "cbex.ec_len == 0");
- err = -EIO;
- break;
+ /*
+ * Find delayed extent and update newex accordingly. We call
+ * it even in !exists case to find out whether newex is the
+ * last existing extent or not.
+ */
+ next_del = ext4_find_delayed_extent(inode, &newex);
+ if (!exists && next_del) {
+ exists = 1;
+ flags |= FIEMAP_EXTENT_DELALLOC;
}
- err = func(inode, next, &cbex, ex, cbdata);
- ext4_ext_drop_refs(path);
+ up_read(&EXT4_I(inode)->i_data_sem);
- if (err < 0)
+ if (unlikely(newex.ec_len == 0)) {
+ EXT4_ERROR_INODE(inode, "newex.ec_len == 0");
+ err = -EIO;
break;
+ }
- if (err == EXT_REPEAT)
- continue;
- else if (err == EXT_BREAK) {
- err = 0;
- break;
+ /* This is possible iff next == next_del == EXT_MAX_BLOCKS */
+ if (next == next_del) {
+ flags |= FIEMAP_EXTENT_LAST;
+ if (unlikely(next_del != EXT_MAX_BLOCKS ||
+ next != EXT_MAX_BLOCKS)) {
+ EXT4_ERROR_INODE(inode,
+ "next extent == %u, next "
+ "delalloc extent = %u",
+ next, next_del);
+ err = -EIO;
+ break;
+ }
}
- if (ext_depth(inode) != depth) {
- /* depth was changed. we have to realloc path */
- kfree(path);
- path = NULL;
+ if (exists) {
+ err = fiemap_fill_next_extent(fieinfo,
+ (__u64)newex.ec_block << blksize_bits,
+ (__u64)newex.ec_start << blksize_bits,
+ (__u64)newex.ec_len << blksize_bits,
+ flags);
+ if (err < 0)
+ break;
+ if (err == 1) {
+ err = 0;
+ break;
+ }
}
- block = cbex.ec_block + cbex.ec_len;
+ block = newex.ec_block + newex.ec_len;
}
if (path) {
@@ -2093,13 +2177,10 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
}
/*
- * ext4_ext_check_cache()
+ * ext4_ext_in_cache()
* Checks to see if the given block is in the cache.
* If it is, the cached extent is stored in the given
- * cache extent pointer. If the cached extent is a hole,
- * this routine should be used instead of
- * ext4_ext_in_cache if the calling function needs to
- * know the size of the hole.
+ * cache extent pointer.
*
* @inode: The files inode
* @block: The block to look for in the cache
@@ -2108,10 +2189,11 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
*
* Return 0 if cache is invalid; 1 if the cache is valid
*/
-static int ext4_ext_check_cache(struct inode *inode, ext4_lblk_t block,
- struct ext4_ext_cache *ex){
+static int
+ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
+ struct ext4_extent *ex)
+{
struct ext4_ext_cache *cex;
- struct ext4_sb_info *sbi;
int ret = 0;
/*
@@ -2119,14 +2201,15 @@ static int ext4_ext_check_cache(struct inode *inode, ext4_lblk_t block,
*/
spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
cex = &EXT4_I(inode)->i_cached_extent;
- sbi = EXT4_SB(inode->i_sb);
/* has cache valid data? */
if (cex->ec_len == 0)
goto errout;
if (in_range(block, cex->ec_block, cex->ec_len)) {
- memcpy(ex, cex, sizeof(struct ext4_ext_cache));
+ ex->ee_block = cpu_to_le32(cex->ec_block);
+ ext4_ext_store_pblock(ex, cex->ec_start);
+ ex->ee_len = cpu_to_le16(cex->ec_len);
ext_debug("%u cached by %u:%u:%llu\n",
block,
cex->ec_block, cex->ec_len, cex->ec_start);
@@ -2139,48 +2222,18 @@ errout:
}
/*
- * ext4_ext_in_cache()
- * Checks to see if the given block is in the cache.
- * If it is, the cached extent is stored in the given
- * extent pointer.
- *
- * @inode: The files inode
- * @block: The block to look for in the cache
- * @ex: Pointer where the cached extent will be stored
- * if it contains block
- *
- * Return 0 if cache is invalid; 1 if the cache is valid
- */
-static int
-ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
- struct ext4_extent *ex)
-{
- struct ext4_ext_cache cex;
- int ret = 0;
-
- if (ext4_ext_check_cache(inode, block, &cex)) {
- ex->ee_block = cpu_to_le32(cex.ec_block);
- ext4_ext_store_pblock(ex, cex.ec_start);
- ex->ee_len = cpu_to_le16(cex.ec_len);
- ret = 1;
- }
-
- return ret;
-}
-
-
-/*
* ext4_ext_rm_idx:
* removes index from the index block.
*/
static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
- struct ext4_ext_path *path)
+ struct ext4_ext_path *path, int depth)
{
int err;
ext4_fsblk_t leaf;
/* free index block */
- path--;
+ depth--;
+ path = path + depth;
leaf = ext4_idx_pblock(path->p_idx);
if (unlikely(path->p_hdr->eh_entries == 0)) {
EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0");
@@ -2205,6 +2258,19 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
ext4_free_blocks(handle, inode, NULL, leaf, 1,
EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
+
+ while (--depth >= 0) {
+ if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr))
+ break;
+ path--;
+ err = ext4_ext_get_access(handle, inode, path);
+ if (err)
+ break;
+ path->p_idx->ei_block = (path+1)->p_idx->ei_block;
+ err = ext4_ext_dirty(handle, inode, path);
+ if (err)
+ break;
+ }
return err;
}
@@ -2257,7 +2323,13 @@ int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int nrblocks,
int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
{
int index;
- int depth = ext_depth(inode);
+ int depth;
+
+ /* If we are converting the inline data, only one is needed here. */
+ if (ext4_has_inline_data(inode))
+ return 1;
+
+ depth = ext_depth(inode);
if (chunk)
index = depth * 2;
@@ -2275,10 +2347,13 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
unsigned short ee_len = ext4_ext_get_actual_len(ex);
ext4_fsblk_t pblk;
- int flags = EXT4_FREE_BLOCKS_FORGET;
+ int flags = 0;
if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
- flags |= EXT4_FREE_BLOCKS_METADATA;
+ flags |= EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET;
+ else if (ext4_should_journal_data(inode))
+ flags |= EXT4_FREE_BLOCKS_FORGET;
+
/*
* For bigalloc file systems, we never free a partial cluster
* at the beginning of the extent. Instead, we make a note
@@ -2538,7 +2613,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
/* if this leaf is free, then we should
* remove it from index block above */
if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL)
- err = ext4_ext_rm_idx(handle, inode, path + depth);
+ err = ext4_ext_rm_idx(handle, inode, path, depth);
out:
return err;
@@ -2570,10 +2645,10 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
{
struct super_block *sb = inode->i_sb;
int depth = ext_depth(inode);
- struct ext4_ext_path *path;
+ struct ext4_ext_path *path = NULL;
ext4_fsblk_t partial_cluster = 0;
handle_t *handle;
- int i, err;
+ int i = 0, err = 0;
ext_debug("truncate since %u to %u\n", start, end);
@@ -2605,9 +2680,17 @@ again:
return PTR_ERR(path);
}
depth = ext_depth(inode);
+ /* Leaf not may not exist only if inode has no blocks at all */
ex = path[depth].p_ext;
- if (!ex)
- goto cont;
+ if (!ex) {
+ if (depth) {
+ EXT4_ERROR_INODE(inode,
+ "path[%d].p_hdr == NULL",
+ depth);
+ err = -EIO;
+ }
+ goto out;
+ }
ee_block = le32_to_cpu(ex->ee_block);
@@ -2637,29 +2720,34 @@ again:
if (err < 0)
goto out;
}
- ext4_ext_drop_refs(path);
- kfree(path);
}
-cont:
-
/*
* We start scanning from right side, freeing all the blocks
* after i_size and walking into the tree depth-wise.
*/
depth = ext_depth(inode);
- path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 1), GFP_NOFS);
- if (path == NULL) {
- ext4_journal_stop(handle);
- return -ENOMEM;
- }
- path[0].p_depth = depth;
- path[0].p_hdr = ext_inode_hdr(inode);
+ if (path) {
+ int k = i = depth;
+ while (--k > 0)
+ path[k].p_block =
+ le16_to_cpu(path[k].p_hdr->eh_entries)+1;
+ } else {
+ path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 1),
+ GFP_NOFS);
+ if (path == NULL) {
+ ext4_journal_stop(handle);
+ return -ENOMEM;
+ }
+ path[0].p_depth = depth;
+ path[0].p_hdr = ext_inode_hdr(inode);
+ i = 0;
- if (ext4_ext_check(inode, path[0].p_hdr, depth)) {
- err = -EIO;
- goto out;
+ if (ext4_ext_check(inode, path[0].p_hdr, depth)) {
+ err = -EIO;
+ goto out;
+ }
}
- i = err = 0;
+ err = 0;
while (i >= 0 && err == 0) {
if (i == depth) {
@@ -2728,7 +2816,7 @@ cont:
/* index is empty, remove it;
* handle must be already prepared by the
* truncatei_leaf() */
- err = ext4_ext_rm_idx(handle, inode, path + i);
+ err = ext4_ext_rm_idx(handle, inode, path, i);
}
/* root level has p_bh == NULL, brelse() eats this */
brelse(path[i].p_bh);
@@ -2773,8 +2861,10 @@ cont:
out:
ext4_ext_drop_refs(path);
kfree(path);
- if (err == -EAGAIN)
+ if (err == -EAGAIN) {
+ path = NULL;
goto again;
+ }
ext4_journal_stop(handle);
return err;
@@ -2883,6 +2973,9 @@ static int ext4_split_extent_at(handle_t *handle,
unsigned int ee_len, depth;
int err = 0;
+ BUG_ON((split_flag & (EXT4_EXT_DATA_VALID1 | EXT4_EXT_DATA_VALID2)) ==
+ (EXT4_EXT_DATA_VALID1 | EXT4_EXT_DATA_VALID2));
+
ext_debug("ext4_split_extents_at: inode %lu, logical"
"block %llu\n", inode->i_ino, (unsigned long long)split);
@@ -2912,9 +3005,9 @@ static int ext4_split_extent_at(handle_t *handle,
ext4_ext_mark_initialized(ex);
if (!(flags & EXT4_GET_BLOCKS_PRE_IO))
- ext4_ext_try_to_merge(inode, path, ex);
+ ext4_ext_try_to_merge(handle, inode, path, ex);
- err = ext4_ext_dirty(handle, inode, path + depth);
+ err = ext4_ext_dirty(handle, inode, path + path->p_depth);
goto out;
}
@@ -2941,13 +3034,20 @@ static int ext4_split_extent_at(handle_t *handle,
err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
- err = ext4_ext_zeroout(inode, &orig_ex);
+ if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) {
+ if (split_flag & EXT4_EXT_DATA_VALID1)
+ err = ext4_ext_zeroout(inode, ex2);
+ else
+ err = ext4_ext_zeroout(inode, ex);
+ } else
+ err = ext4_ext_zeroout(inode, &orig_ex);
+
if (err)
goto fix_extent_len;
/* update the extent length and mark as initialized */
ex->ee_len = cpu_to_le16(ee_len);
- ext4_ext_try_to_merge(inode, path, ex);
- err = ext4_ext_dirty(handle, inode, path + depth);
+ ext4_ext_try_to_merge(handle, inode, path, ex);
+ err = ext4_ext_dirty(handle, inode, path + path->p_depth);
goto out;
} else if (err)
goto fix_extent_len;
@@ -2994,12 +3094,13 @@ static int ext4_split_extent(handle_t *handle,
uninitialized = ext4_ext_is_uninitialized(ex);
if (map->m_lblk + map->m_len < ee_block + ee_len) {
- split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT ?
- EXT4_EXT_MAY_ZEROOUT : 0;
+ split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT;
flags1 = flags | EXT4_GET_BLOCKS_PRE_IO;
if (uninitialized)
split_flag1 |= EXT4_EXT_MARK_UNINIT1 |
EXT4_EXT_MARK_UNINIT2;
+ if (split_flag & EXT4_EXT_DATA_VALID2)
+ split_flag1 |= EXT4_EXT_DATA_VALID1;
err = ext4_split_extent_at(handle, inode, path,
map->m_lblk + map->m_len, split_flag1, flags1);
if (err)
@@ -3012,8 +3113,8 @@ static int ext4_split_extent(handle_t *handle,
return PTR_ERR(path);
if (map->m_lblk >= ee_block) {
- split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT ?
- EXT4_EXT_MAY_ZEROOUT : 0;
+ split_flag1 = split_flag & (EXT4_EXT_MAY_ZEROOUT |
+ EXT4_EXT_DATA_VALID2);
if (uninitialized)
split_flag1 |= EXT4_EXT_MARK_UNINIT1;
if (split_flag & EXT4_EXT_MARK_UNINIT2)
@@ -3029,7 +3130,6 @@ out:
return err ? err : map->m_len;
}
-#define EXT4_EXT_ZERO_LEN 7
/*
* This function is called by ext4_ext_map_blocks() if someone tries to write
* to an uninitialized extent. It may result in splitting the uninitialized
@@ -3055,13 +3155,14 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
struct ext4_map_blocks *map,
struct ext4_ext_path *path)
{
+ struct ext4_sb_info *sbi;
struct ext4_extent_header *eh;
struct ext4_map_blocks split_map;
struct ext4_extent zero_ex;
struct ext4_extent *ex;
ext4_lblk_t ee_block, eof_block;
unsigned int ee_len, depth;
- int allocated;
+ int allocated, max_zeroout = 0;
int err = 0;
int split_flag = 0;
@@ -3069,6 +3170,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
"block %llu, max_blocks %u\n", inode->i_ino,
(unsigned long long)map->m_lblk, map->m_len);
+ sbi = EXT4_SB(inode->i_sb);
eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >>
inode->i_sb->s_blocksize_bits;
if (eof_block < map->m_lblk + map->m_len)
@@ -3168,9 +3270,12 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
*/
split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0;
- /* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */
- if (ee_len <= 2*EXT4_EXT_ZERO_LEN &&
- (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
+ if (EXT4_EXT_MAY_ZEROOUT & split_flag)
+ max_zeroout = sbi->s_extent_max_zeroout_kb >>
+ inode->i_sb->s_blocksize_bits;
+
+ /* If extent is less than s_max_zeroout_kb, zeroout directly */
+ if (max_zeroout && (ee_len <= max_zeroout)) {
err = ext4_ext_zeroout(inode, ex);
if (err)
goto out;
@@ -3179,8 +3284,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
if (err)
goto out;
ext4_ext_mark_initialized(ex);
- ext4_ext_try_to_merge(inode, path, ex);
- err = ext4_ext_dirty(handle, inode, path + depth);
+ ext4_ext_try_to_merge(handle, inode, path, ex);
+ err = ext4_ext_dirty(handle, inode, path + path->p_depth);
goto out;
}
@@ -3194,9 +3299,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
split_map.m_lblk = map->m_lblk;
split_map.m_len = map->m_len;
- if (allocated > map->m_len) {
- if (allocated <= EXT4_EXT_ZERO_LEN &&
- (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
+ if (max_zeroout && (allocated > map->m_len)) {
+ if (allocated <= max_zeroout) {
/* case 3 */
zero_ex.ee_block =
cpu_to_le32(map->m_lblk);
@@ -3208,9 +3312,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
goto out;
split_map.m_lblk = map->m_lblk;
split_map.m_len = allocated;
- } else if ((map->m_lblk - ee_block + map->m_len <
- EXT4_EXT_ZERO_LEN) &&
- (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
+ } else if (map->m_lblk - ee_block + map->m_len < max_zeroout) {
/* case 2 */
if (map->m_lblk != ee_block) {
zero_ex.ee_block = ex->ee_block;
@@ -3230,7 +3332,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
}
allocated = ext4_split_extent(handle, inode, path,
- &split_map, split_flag, 0);
+ &split_map, split_flag, 0);
if (allocated < 0)
err = allocated;
@@ -3244,7 +3346,7 @@ out:
* to an uninitialized extent.
*
* Writing to an uninitialized extent may result in splitting the uninitialized
- * extent into multiple /initialized uninitialized extents (up to three)
+ * extent into multiple initialized/uninitialized extents (up to three)
* There are three possibilities:
* a> There is no split required: Entire extent should be uninitialized
* b> Splits in two extents: Write is happening at either end of the extent
@@ -3291,26 +3393,47 @@ static int ext4_split_unwritten_extents(handle_t *handle,
split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0;
split_flag |= EXT4_EXT_MARK_UNINIT2;
-
+ if (flags & EXT4_GET_BLOCKS_CONVERT)
+ split_flag |= EXT4_EXT_DATA_VALID2;
flags |= EXT4_GET_BLOCKS_PRE_IO;
return ext4_split_extent(handle, inode, path, map, split_flag, flags);
}
static int ext4_convert_unwritten_extents_endio(handle_t *handle,
- struct inode *inode,
- struct ext4_ext_path *path)
+ struct inode *inode,
+ struct ext4_map_blocks *map,
+ struct ext4_ext_path *path)
{
struct ext4_extent *ex;
+ ext4_lblk_t ee_block;
+ unsigned int ee_len;
int depth;
int err = 0;
depth = ext_depth(inode);
ex = path[depth].p_ext;
+ ee_block = le32_to_cpu(ex->ee_block);
+ ee_len = ext4_ext_get_actual_len(ex);
ext_debug("ext4_convert_unwritten_extents_endio: inode %lu, logical"
"block %llu, max_blocks %u\n", inode->i_ino,
- (unsigned long long)le32_to_cpu(ex->ee_block),
- ext4_ext_get_actual_len(ex));
+ (unsigned long long)ee_block, ee_len);
+
+ /* If extent is larger than requested then split is required */
+ if (ee_block != map->m_lblk || ee_len > map->m_len) {
+ err = ext4_split_unwritten_extents(handle, inode, map, path,
+ EXT4_GET_BLOCKS_CONVERT);
+ if (err < 0)
+ goto out;
+ ext4_ext_drop_refs(path);
+ path = ext4_ext_find_extent(inode, map->m_lblk, path);
+ if (IS_ERR(path)) {
+ err = PTR_ERR(path);
+ goto out;
+ }
+ depth = ext_depth(inode);
+ ex = path[depth].p_ext;
+ }
err = ext4_ext_get_access(handle, inode, path + depth);
if (err)
@@ -3321,10 +3444,10 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle,
/* note: ext4_ext_correct_indexes() isn't needed here because
* borders are not changed
*/
- ext4_ext_try_to_merge(inode, path, ex);
+ ext4_ext_try_to_merge(handle, inode, path, ex);
/* Mark modified extent as dirty */
- err = ext4_ext_dirty(handle, inode, path + depth);
+ err = ext4_ext_dirty(handle, inode, path + path->p_depth);
out:
ext4_ext_show_leaf(inode, path);
return err;
@@ -3394,115 +3517,34 @@ out:
/**
* ext4_find_delalloc_range: find delayed allocated block in the given range.
*
- * Goes through the buffer heads in the range [lblk_start, lblk_end] and returns
- * whether there are any buffers marked for delayed allocation. It returns '1'
- * on the first delalloc'ed buffer head found. If no buffer head in the given
- * range is marked for delalloc, it returns 0.
- * lblk_start should always be <= lblk_end.
- * search_hint_reverse is to indicate that searching in reverse from lblk_end to
- * lblk_start might be more efficient (i.e., we will likely hit the delalloc'ed
- * block sooner). This is useful when blocks are truncated sequentially from
- * lblk_start towards lblk_end.
+ * Return 1 if there is a delalloc block in the range, otherwise 0.
*/
static int ext4_find_delalloc_range(struct inode *inode,
ext4_lblk_t lblk_start,
- ext4_lblk_t lblk_end,
- int search_hint_reverse)
+ ext4_lblk_t lblk_end)
{
- struct address_space *mapping = inode->i_mapping;
- struct buffer_head *head, *bh = NULL;
- struct page *page;
- ext4_lblk_t i, pg_lblk;
- pgoff_t index;
+ struct extent_status es;
- if (!test_opt(inode->i_sb, DELALLOC))
- return 0;
-
- /* reverse search wont work if fs block size is less than page size */
- if (inode->i_blkbits < PAGE_CACHE_SHIFT)
- search_hint_reverse = 0;
-
- if (search_hint_reverse)
- i = lblk_end;
+ es.start = lblk_start;
+ ext4_es_find_extent(inode, &es);
+ if (es.len == 0)
+ return 0; /* there is no delay extent in this tree */
+ else if (es.start <= lblk_start && lblk_start < es.start + es.len)
+ return 1;
+ else if (lblk_start <= es.start && es.start <= lblk_end)
+ return 1;
else
- i = lblk_start;
-
- index = i >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
-
- while ((i >= lblk_start) && (i <= lblk_end)) {
- page = find_get_page(mapping, index);
- if (!page)
- goto nextpage;
-
- if (!page_has_buffers(page))
- goto nextpage;
-
- head = page_buffers(page);
- if (!head)
- goto nextpage;
-
- bh = head;
- pg_lblk = index << (PAGE_CACHE_SHIFT -
- inode->i_blkbits);
- do {
- if (unlikely(pg_lblk < lblk_start)) {
- /*
- * This is possible when fs block size is less
- * than page size and our cluster starts/ends in
- * middle of the page. So we need to skip the
- * initial few blocks till we reach the 'lblk'
- */
- pg_lblk++;
- continue;
- }
-
- /* Check if the buffer is delayed allocated and that it
- * is not yet mapped. (when da-buffers are mapped during
- * their writeout, their da_mapped bit is set.)
- */
- if (buffer_delay(bh) && !buffer_da_mapped(bh)) {
- page_cache_release(page);
- trace_ext4_find_delalloc_range(inode,
- lblk_start, lblk_end,
- search_hint_reverse,
- 1, i);
- return 1;
- }
- if (search_hint_reverse)
- i--;
- else
- i++;
- } while ((i >= lblk_start) && (i <= lblk_end) &&
- ((bh = bh->b_this_page) != head));
-nextpage:
- if (page)
- page_cache_release(page);
- /*
- * Move to next page. 'i' will be the first lblk in the next
- * page.
- */
- if (search_hint_reverse)
- index--;
- else
- index++;
- i = index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
- }
-
- trace_ext4_find_delalloc_range(inode, lblk_start, lblk_end,
- search_hint_reverse, 0, 0);
- return 0;
+ return 0;
}
-int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk,
- int search_hint_reverse)
+int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk)
{
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
ext4_lblk_t lblk_start, lblk_end;
lblk_start = lblk & (~(sbi->s_cluster_ratio - 1));
lblk_end = lblk_start + sbi->s_cluster_ratio - 1;
- return ext4_find_delalloc_range(inode, lblk_start, lblk_end,
- search_hint_reverse);
+ return ext4_find_delalloc_range(inode, lblk_start, lblk_end);
}
/**
@@ -3563,7 +3605,7 @@ get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start,
lblk_from = lblk_start & (~(sbi->s_cluster_ratio - 1));
lblk_to = lblk_from + c_offset - 1;
- if (ext4_find_delalloc_range(inode, lblk_from, lblk_to, 0))
+ if (ext4_find_delalloc_range(inode, lblk_from, lblk_to))
allocated_clusters--;
}
@@ -3573,7 +3615,7 @@ get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start,
lblk_from = lblk_start + num_blks;
lblk_to = lblk_from + (sbi->s_cluster_ratio - c_offset) - 1;
- if (ext4_find_delalloc_range(inode, lblk_from, lblk_to, 0))
+ if (ext4_find_delalloc_range(inode, lblk_from, lblk_to))
allocated_clusters--;
}
@@ -3588,7 +3630,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
{
int ret = 0;
int err = 0;
- ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio;
+ ext4_io_end_t *io = ext4_inode_aio(inode);
ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical "
"block %llu, max_blocks %u, flags %x, allocated %u\n",
@@ -3596,13 +3638,15 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
flags, allocated);
ext4_ext_show_leaf(inode, path);
- trace_ext4_ext_handle_uninitialized_extents(inode, map, allocated,
- newblock);
+ trace_ext4_ext_handle_uninitialized_extents(inode, map, flags,
+ allocated, newblock);
/* get_block() before submit the IO, split the extent */
if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
ret = ext4_split_unwritten_extents(handle, inode, map,
path, flags);
+ if (ret <= 0)
+ goto out;
/*
* Flag the inode(non aio case) or end_io struct (aio case)
* that this IO needs to conversion to written when IO is
@@ -3618,7 +3662,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
}
/* IO end_io complete, convert the filled extent to written */
if ((flags & EXT4_GET_BLOCKS_CONVERT)) {
- ret = ext4_convert_unwritten_extents_endio(handle, inode,
+ ret = ext4_convert_unwritten_extents_endio(handle, inode, map,
path);
if (ret >= 0) {
ext4_update_inode_fsync_trans(handle, inode, 1);
@@ -3842,12 +3886,13 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
struct ext4_extent newex, *ex, *ex2;
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
ext4_fsblk_t newblock = 0;
- int free_on_err = 0, err = 0, depth, ret;
+ int free_on_err = 0, err = 0, depth;
unsigned int allocated = 0, offset = 0;
unsigned int allocated_clusters = 0;
struct ext4_allocation_request ar;
- ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio;
+ ext4_io_end_t *io = ext4_inode_aio(inode);
ext4_lblk_t cluster_offset;
+ int set_unwritten = 0;
ext_debug("blocks %u/%u requested for inode %lu\n",
map->m_lblk, map->m_len, inode->i_ino);
@@ -3857,7 +3902,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
if (ext4_ext_in_cache(inode, map->m_lblk, &newex)) {
if (!newex.ee_start_lo && !newex.ee_start_hi) {
if ((sbi->s_cluster_ratio > 1) &&
- ext4_find_delalloc_cluster(inode, map->m_lblk, 0))
+ ext4_find_delalloc_cluster(inode, map->m_lblk))
map->m_flags |= EXT4_MAP_FROM_CLUSTER;
if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
@@ -3937,15 +3982,15 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
ee_len, ee_start);
goto out;
}
- ret = ext4_ext_handle_uninitialized_extents(
+ allocated = ext4_ext_handle_uninitialized_extents(
handle, inode, map, path, flags,
allocated, newblock);
- return ret;
+ goto out3;
}
}
if ((sbi->s_cluster_ratio > 1) &&
- ext4_find_delalloc_cluster(inode, map->m_lblk, 0))
+ ext4_find_delalloc_cluster(inode, map->m_lblk))
map->m_flags |= EXT4_MAP_FROM_CLUSTER;
/*
@@ -4070,13 +4115,8 @@ got_allocated_blocks:
* For non asycn direct IO case, flag the inode state
* that we need to perform conversion when IO is done.
*/
- if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
- if (io)
- ext4_set_io_unwritten_flag(inode, io);
- else
- ext4_set_inode_state(inode,
- EXT4_STATE_DIO_UNWRITTEN);
- }
+ if ((flags & EXT4_GET_BLOCKS_PRE_IO))
+ set_unwritten = 1;
if (ext4_should_dioread_nolock(inode))
map->m_flags |= EXT4_MAP_UNINIT;
}
@@ -4088,6 +4128,15 @@ got_allocated_blocks:
if (!err)
err = ext4_ext_insert_extent(handle, inode, path,
&newex, flags);
+
+ if (!err && set_unwritten) {
+ if (io)
+ ext4_set_io_unwritten_flag(inode, io);
+ else
+ ext4_set_inode_state(inode,
+ EXT4_STATE_DIO_UNWRITTEN);
+ }
+
if (err && free_on_err) {
int fb_flags = flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE ?
EXT4_FREE_BLOCKS_NO_QUOT_UPDATE : 0;
@@ -4210,8 +4259,8 @@ out2:
kfree(path);
}
- trace_ext4_ext_map_blocks_exit(inode, map->m_lblk,
- newblock, map->m_len, err ? err : allocated);
+out3:
+ trace_ext4_ext_map_blocks_exit(inode, map, err ? err : allocated);
return err ? err : allocated;
}
@@ -4229,7 +4278,7 @@ void ext4_ext_truncate(struct inode *inode)
* finish any pending end_io work so we won't run the risk of
* converting any truncated blocks to initialized later
*/
- ext4_flush_completed_IO(inode);
+ ext4_flush_unwritten_io(inode);
/*
* probably first extent we're gonna free will be last in block
@@ -4270,6 +4319,8 @@ void ext4_ext_truncate(struct inode *inode)
last_block = (inode->i_size + sb->s_blocksize - 1)
>> EXT4_BLOCK_SIZE_BITS(sb);
+ err = ext4_es_remove_extent(inode, last_block,
+ EXT_MAX_BLOCKS - last_block);
err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1);
/* In a multi-transaction truncate, we only make the final
@@ -4360,6 +4411,10 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
if (mode & FALLOC_FL_PUNCH_HOLE)
return ext4_punch_hole(file, offset, len);
+ ret = ext4_convert_inline_data(inode);
+ if (ret)
+ return ret;
+
trace_ext4_fallocate_enter(inode, offset, len, mode);
map.m_lblk = offset >> blkbits;
/*
@@ -4389,6 +4444,9 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
*/
if (len <= EXT_UNINIT_MAX_LEN << blkbits)
flags |= EXT4_GET_BLOCKS_NO_NORMALIZE;
+
+ /* Prevent race condition between unwritten */
+ ext4_flush_unwritten_io(inode);
retry:
while (ret >= 0 && ret < max_blocks) {
map.m_lblk = map.m_lblk + ret;
@@ -4420,6 +4478,8 @@ retry:
ext4_falloc_update_inode(inode, mode, new_size,
(map.m_flags & EXT4_MAP_NEW));
ext4_mark_inode_dirty(handle, inode);
+ if ((file->f_flags & O_SYNC) && ret >= max_blocks)
+ ext4_handle_sync(handle);
ret2 = ext4_journal_stop(handle);
if (ret2)
break;
@@ -4493,206 +4553,43 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
}
/*
- * Callback function called for each extent to gather FIEMAP information.
+ * If newex is not existing extent (newex->ec_start equals zero) find
+ * delayed extent at start of newex and update newex accordingly and
+ * return start of the next delayed extent.
+ *
+ * If newex is existing extent (newex->ec_start is not equal zero)
+ * return start of next delayed extent or EXT_MAX_BLOCKS if no delayed
+ * extent found. Leave newex unmodified.
*/
-static int ext4_ext_fiemap_cb(struct inode *inode, ext4_lblk_t next,
- struct ext4_ext_cache *newex, struct ext4_extent *ex,
- void *data)
+static int ext4_find_delayed_extent(struct inode *inode,
+ struct ext4_ext_cache *newex)
{
- __u64 logical;
- __u64 physical;
- __u64 length;
- __u32 flags = 0;
- int ret = 0;
- struct fiemap_extent_info *fieinfo = data;
- unsigned char blksize_bits;
+ struct extent_status es;
+ ext4_lblk_t next_del;
- blksize_bits = inode->i_sb->s_blocksize_bits;
- logical = (__u64)newex->ec_block << blksize_bits;
+ es.start = newex->ec_block;
+ next_del = ext4_es_find_extent(inode, &es);
if (newex->ec_start == 0) {
/*
* No extent in extent-tree contains block @newex->ec_start,
* then the block may stay in 1)a hole or 2)delayed-extent.
- *
- * Holes or delayed-extents are processed as follows.
- * 1. lookup dirty pages with specified range in pagecache.
- * If no page is got, then there is no delayed-extent and
- * return with EXT_CONTINUE.
- * 2. find the 1st mapped buffer,
- * 3. check if the mapped buffer is both in the request range
- * and a delayed buffer. If not, there is no delayed-extent,
- * then return.
- * 4. a delayed-extent is found, the extent will be collected.
*/
- ext4_lblk_t end = 0;
- pgoff_t last_offset;
- pgoff_t offset;
- pgoff_t index;
- pgoff_t start_index = 0;
- struct page **pages = NULL;
- struct buffer_head *bh = NULL;
- struct buffer_head *head = NULL;
- unsigned int nr_pages = PAGE_SIZE / sizeof(struct page *);
-
- pages = kmalloc(PAGE_SIZE, GFP_KERNEL);
- if (pages == NULL)
- return -ENOMEM;
-
- offset = logical >> PAGE_SHIFT;
-repeat:
- last_offset = offset;
- head = NULL;
- ret = find_get_pages_tag(inode->i_mapping, &offset,
- PAGECACHE_TAG_DIRTY, nr_pages, pages);
-
- if (!(flags & FIEMAP_EXTENT_DELALLOC)) {
- /* First time, try to find a mapped buffer. */
- if (ret == 0) {
-out:
- for (index = 0; index < ret; index++)
- page_cache_release(pages[index]);
- /* just a hole. */
- kfree(pages);
- return EXT_CONTINUE;
- }
- index = 0;
-
-next_page:
- /* Try to find the 1st mapped buffer. */
- end = ((__u64)pages[index]->index << PAGE_SHIFT) >>
- blksize_bits;
- if (!page_has_buffers(pages[index]))
- goto out;
- head = page_buffers(pages[index]);
- if (!head)
- goto out;
-
- index++;
- bh = head;
- do {
- if (end >= newex->ec_block +
- newex->ec_len)
- /* The buffer is out of
- * the request range.
- */
- goto out;
-
- if (buffer_mapped(bh) &&
- end >= newex->ec_block) {
- start_index = index - 1;
- /* get the 1st mapped buffer. */
- goto found_mapped_buffer;
- }
-
- bh = bh->b_this_page;
- end++;
- } while (bh != head);
-
- /* No mapped buffer in the range found in this page,
- * We need to look up next page.
- */
- if (index >= ret) {
- /* There is no page left, but we need to limit
- * newex->ec_len.
- */
- newex->ec_len = end - newex->ec_block;
- goto out;
- }
- goto next_page;
- } else {
- /*Find contiguous delayed buffers. */
- if (ret > 0 && pages[0]->index == last_offset)
- head = page_buffers(pages[0]);
- bh = head;
- index = 1;
- start_index = 0;
- }
-
-found_mapped_buffer:
- if (bh != NULL && buffer_delay(bh)) {
- /* 1st or contiguous delayed buffer found. */
- if (!(flags & FIEMAP_EXTENT_DELALLOC)) {
- /*
- * 1st delayed buffer found, record
- * the start of extent.
- */
- flags |= FIEMAP_EXTENT_DELALLOC;
- newex->ec_block = end;
- logical = (__u64)end << blksize_bits;
- }
- /* Find contiguous delayed buffers. */
- do {
- if (!buffer_delay(bh))
- goto found_delayed_extent;
- bh = bh->b_this_page;
- end++;
- } while (bh != head);
-
- for (; index < ret; index++) {
- if (!page_has_buffers(pages[index])) {
- bh = NULL;
- break;
- }
- head = page_buffers(pages[index]);
- if (!head) {
- bh = NULL;
- break;
- }
-
- if (pages[index]->index !=
- pages[start_index]->index + index
- - start_index) {
- /* Blocks are not contiguous. */
- bh = NULL;
- break;
- }
- bh = head;
- do {
- if (!buffer_delay(bh))
- /* Delayed-extent ends. */
- goto found_delayed_extent;
- bh = bh->b_this_page;
- end++;
- } while (bh != head);
- }
- } else if (!(flags & FIEMAP_EXTENT_DELALLOC))
- /* a hole found. */
- goto out;
+ if (es.len == 0)
+ /* A hole found. */
+ return 0;
-found_delayed_extent:
- newex->ec_len = min(end - newex->ec_block,
- (ext4_lblk_t)EXT_INIT_MAX_LEN);
- if (ret == nr_pages && bh != NULL &&
- newex->ec_len < EXT_INIT_MAX_LEN &&
- buffer_delay(bh)) {
- /* Have not collected an extent and continue. */
- for (index = 0; index < ret; index++)
- page_cache_release(pages[index]);
- goto repeat;
+ if (es.start > newex->ec_block) {
+ /* A hole found. */
+ newex->ec_len = min(es.start - newex->ec_block,
+ newex->ec_len);
+ return 0;
}
- for (index = 0; index < ret; index++)
- page_cache_release(pages[index]);
- kfree(pages);
+ newex->ec_len = es.start + es.len - newex->ec_block;
}
- physical = (__u64)newex->ec_start << blksize_bits;
- length = (__u64)newex->ec_len << blksize_bits;
-
- if (ex && ext4_ext_is_uninitialized(ex))
- flags |= FIEMAP_EXTENT_UNWRITTEN;
-
- if (next == EXT_MAX_BLOCKS)
- flags |= FIEMAP_EXTENT_LAST;
-
- ret = fiemap_fill_next_extent(fieinfo, logical, physical,
- length, flags);
- if (ret < 0)
- return ret;
- if (ret == 1)
- return EXT_BREAK;
- return EXT_CONTINUE;
+ return next_del;
}
/* fiemap flags we can handle specified here */
#define EXT4_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR)
@@ -4755,9 +4652,32 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
loff_t first_page_offset, last_page_offset;
int credits, err = 0;
+ /*
+ * Write out all dirty pages to avoid race conditions
+ * Then release them.
+ */
+ if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
+ err = filemap_write_and_wait_range(mapping,
+ offset, offset + length - 1);
+
+ if (err)
+ return err;
+ }
+
+ mutex_lock(&inode->i_mutex);
+ /* It's not possible punch hole on append only file */
+ if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) {
+ err = -EPERM;
+ goto out_mutex;
+ }
+ if (IS_SWAPFILE(inode)) {
+ err = -ETXTBSY;
+ goto out_mutex;
+ }
+
/* No need to punch hole beyond i_size */
if (offset >= inode->i_size)
- return 0;
+ goto out_mutex;
/*
* If the hole extends beyond i_size, set the hole
@@ -4775,35 +4695,26 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
first_page_offset = first_page << PAGE_CACHE_SHIFT;
last_page_offset = last_page << PAGE_CACHE_SHIFT;
- /*
- * Write out all dirty pages to avoid race conditions
- * Then release them.
- */
- if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
- err = filemap_write_and_wait_range(mapping,
- offset, offset + length - 1);
-
- if (err)
- return err;
- }
-
/* Now release the pages */
if (last_page_offset > first_page_offset) {
truncate_pagecache_range(inode, first_page_offset,
last_page_offset - 1);
}
- /* finish any pending end_io work */
- ext4_flush_completed_IO(inode);
+ /* Wait all existing dio workers, newcomers will block on i_mutex */
+ ext4_inode_block_unlocked_dio(inode);
+ err = ext4_flush_unwritten_io(inode);
+ if (err)
+ goto out_dio;
+ inode_dio_wait(inode);
credits = ext4_writepage_trans_blocks(inode);
handle = ext4_journal_start(inode, credits);
- if (IS_ERR(handle))
- return PTR_ERR(handle);
+ if (IS_ERR(handle)) {
+ err = PTR_ERR(handle);
+ goto out_dio;
+ }
- err = ext4_orphan_add(handle, inode);
- if (err)
- goto out;
/*
* Now we need to zero out the non-page-aligned data in the
@@ -4878,6 +4789,8 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
ext4_ext_invalidate_cache(inode);
ext4_discard_preallocations(inode);
+ err = ext4_es_remove_extent(inode, first_block,
+ stop_block - first_block);
err = ext4_ext_remove_space(inode, first_block, stop_block - 1);
ext4_ext_invalidate_cache(inode);
@@ -4889,18 +4802,31 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
up_write(&EXT4_I(inode)->i_data_sem);
out:
- ext4_orphan_del(handle, inode);
inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
ext4_mark_inode_dirty(handle, inode);
ext4_journal_stop(handle);
+out_dio:
+ ext4_inode_resume_unlocked_dio(inode);
+out_mutex:
+ mutex_unlock(&inode->i_mutex);
return err;
}
+
int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len)
{
ext4_lblk_t start_blk;
int error = 0;
+ if (ext4_has_inline_data(inode)) {
+ int has_inline = 1;
+
+ error = ext4_inline_data_fiemap(inode, fieinfo, &has_inline);
+
+ if (has_inline)
+ return error;
+ }
+
/* fallback to generic here if not in extents fmt */
if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
return generic_block_fiemap(inode, fieinfo, start, len,
@@ -4922,11 +4848,11 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
len_blks = ((ext4_lblk_t) last_blk) - start_blk + 1;
/*
- * Walk the extent tree gathering extent information.
- * ext4_ext_fiemap_cb will push extents back to user.
+ * Walk the extent tree gathering extent information
+ * and pushing extents back to the user.
*/
- error = ext4_ext_walk_space(inode, start_blk, len_blks,
- ext4_ext_fiemap_cb, fieinfo);
+ error = ext4_fill_fiemap_extents(inode, start_blk,
+ len_blks, fieinfo);
}
return error;