diff options
Diffstat (limited to 'fs/ext4/extents.c')
-rw-r--r-- | fs/ext4/extents.c | 864 |
1 files changed, 395 insertions, 469 deletions
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 91341ec6e06..5ae1674ec12 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -41,6 +41,8 @@ #include <asm/uaccess.h> #include <linux/fiemap.h> #include "ext4_jbd2.h" +#include "ext4_extents.h" +#include "xattr.h" #include <trace/events/ext4.h> @@ -52,6 +54,9 @@ #define EXT4_EXT_MARK_UNINIT1 0x2 /* mark first half uninitialized */ #define EXT4_EXT_MARK_UNINIT2 0x4 /* mark second half uninitialized */ +#define EXT4_EXT_DATA_VALID1 0x8 /* first half contains valid data */ +#define EXT4_EXT_DATA_VALID2 0x10 /* second half contains valid data */ + static __le32 ext4_extent_block_csum(struct inode *inode, struct ext4_extent_header *eh) { @@ -106,6 +111,9 @@ static int ext4_split_extent_at(handle_t *handle, int split_flag, int flags); +static int ext4_find_delayed_extent(struct inode *inode, + struct ext4_ext_cache *newex); + static int ext4_ext_truncate_extend_restart(handle_t *handle, struct inode *inode, int needed) @@ -1177,7 +1185,7 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block), ext4_idx_pblock(EXT_FIRST_INDEX(neh))); - neh->eh_depth = cpu_to_le16(le16_to_cpu(neh->eh_depth) + 1); + le16_add_cpu(&neh->eh_depth, 1); ext4_mark_inode_dirty(handle, inode); out: brelse(bh); @@ -1656,16 +1664,60 @@ static int ext4_ext_try_to_merge_right(struct inode *inode, } /* + * This function does a very simple check to see if we can collapse + * an extent tree with a single extent tree leaf block into the inode. + */ +static void ext4_ext_try_to_merge_up(handle_t *handle, + struct inode *inode, + struct ext4_ext_path *path) +{ + size_t s; + unsigned max_root = ext4_ext_space_root(inode, 0); + ext4_fsblk_t blk; + + if ((path[0].p_depth != 1) || + (le16_to_cpu(path[0].p_hdr->eh_entries) != 1) || + (le16_to_cpu(path[1].p_hdr->eh_entries) > max_root)) + return; + + /* + * We need to modify the block allocation bitmap and the block + * group descriptor to release the extent tree block. If we + * can't get the journal credits, give up. + */ + if (ext4_journal_extend(handle, 2)) + return; + + /* + * Copy the extent data up to the inode + */ + blk = ext4_idx_pblock(path[0].p_idx); + s = le16_to_cpu(path[1].p_hdr->eh_entries) * + sizeof(struct ext4_extent_idx); + s += sizeof(struct ext4_extent_header); + + memcpy(path[0].p_hdr, path[1].p_hdr, s); + path[0].p_depth = 0; + path[0].p_ext = EXT_FIRST_EXTENT(path[0].p_hdr) + + (path[1].p_ext - EXT_FIRST_EXTENT(path[1].p_hdr)); + path[0].p_hdr->eh_max = cpu_to_le16(max_root); + + brelse(path[1].p_bh); + ext4_free_blocks(handle, inode, NULL, blk, 1, + EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); +} + +/* * This function tries to merge the @ex extent to neighbours in the tree. * return 1 if merge left else 0. */ -static int ext4_ext_try_to_merge(struct inode *inode, +static void ext4_ext_try_to_merge(handle_t *handle, + struct inode *inode, struct ext4_ext_path *path, struct ext4_extent *ex) { struct ext4_extent_header *eh; unsigned int depth; int merge_done = 0; - int ret = 0; depth = ext_depth(inode); BUG_ON(path[depth].p_hdr == NULL); @@ -1675,9 +1727,9 @@ static int ext4_ext_try_to_merge(struct inode *inode, merge_done = ext4_ext_try_to_merge_right(inode, path, ex - 1); if (!merge_done) - ret = ext4_ext_try_to_merge_right(inode, path, ex); + (void) ext4_ext_try_to_merge_right(inode, path, ex); - return ret; + ext4_ext_try_to_merge_up(handle, inode, path); } /* @@ -1891,18 +1943,17 @@ has_space: nearex->ee_len = newext->ee_len; merge: - /* try to merge extents to the right */ + /* try to merge extents */ if (!(flag & EXT4_GET_BLOCKS_PRE_IO)) - ext4_ext_try_to_merge(inode, path, nearex); + ext4_ext_try_to_merge(handle, inode, path, nearex); - /* try to merge extents to the left */ /* time to correct all indexes above */ err = ext4_ext_correct_indexes(handle, inode, path); if (err) goto cleanup; - err = ext4_ext_dirty(handle, inode, path + depth); + err = ext4_ext_dirty(handle, inode, path + path->p_depth); cleanup: if (npath) { @@ -1913,27 +1964,33 @@ cleanup: return err; } -static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block, - ext4_lblk_t num, ext_prepare_callback func, - void *cbdata) +static int ext4_fill_fiemap_extents(struct inode *inode, + ext4_lblk_t block, ext4_lblk_t num, + struct fiemap_extent_info *fieinfo) { struct ext4_ext_path *path = NULL; - struct ext4_ext_cache cbex; + struct ext4_ext_cache newex; struct ext4_extent *ex; - ext4_lblk_t next, start = 0, end = 0; + ext4_lblk_t next, next_del, start = 0, end = 0; ext4_lblk_t last = block + num; - int depth, exists, err = 0; - - BUG_ON(func == NULL); - BUG_ON(inode == NULL); + int exists, depth = 0, err = 0; + unsigned int flags = 0; + unsigned char blksize_bits = inode->i_sb->s_blocksize_bits; while (block < last && block != EXT_MAX_BLOCKS) { num = last - block; /* find extent for this block */ down_read(&EXT4_I(inode)->i_data_sem); + + if (path && ext_depth(inode) != depth) { + /* depth was changed. we have to realloc path */ + kfree(path); + path = NULL; + } + path = ext4_ext_find_extent(inode, block, path); - up_read(&EXT4_I(inode)->i_data_sem); if (IS_ERR(path)) { + up_read(&EXT4_I(inode)->i_data_sem); err = PTR_ERR(path); path = NULL; break; @@ -1941,13 +1998,16 @@ static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block, depth = ext_depth(inode); if (unlikely(path[depth].p_hdr == NULL)) { + up_read(&EXT4_I(inode)->i_data_sem); EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth); err = -EIO; break; } ex = path[depth].p_ext; next = ext4_ext_next_allocated_block(path); + ext4_ext_drop_refs(path); + flags = 0; exists = 0; if (!ex) { /* there is no extent yet, so try to allocate @@ -1984,40 +2044,64 @@ static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block, BUG_ON(end <= start); if (!exists) { - cbex.ec_block = start; - cbex.ec_len = end - start; - cbex.ec_start = 0; + newex.ec_block = start; + newex.ec_len = end - start; + newex.ec_start = 0; } else { - cbex.ec_block = le32_to_cpu(ex->ee_block); - cbex.ec_len = ext4_ext_get_actual_len(ex); - cbex.ec_start = ext4_ext_pblock(ex); + newex.ec_block = le32_to_cpu(ex->ee_block); + newex.ec_len = ext4_ext_get_actual_len(ex); + newex.ec_start = ext4_ext_pblock(ex); + if (ext4_ext_is_uninitialized(ex)) + flags |= FIEMAP_EXTENT_UNWRITTEN; } - if (unlikely(cbex.ec_len == 0)) { - EXT4_ERROR_INODE(inode, "cbex.ec_len == 0"); - err = -EIO; - break; + /* + * Find delayed extent and update newex accordingly. We call + * it even in !exists case to find out whether newex is the + * last existing extent or not. + */ + next_del = ext4_find_delayed_extent(inode, &newex); + if (!exists && next_del) { + exists = 1; + flags |= FIEMAP_EXTENT_DELALLOC; } - err = func(inode, next, &cbex, ex, cbdata); - ext4_ext_drop_refs(path); + up_read(&EXT4_I(inode)->i_data_sem); - if (err < 0) + if (unlikely(newex.ec_len == 0)) { + EXT4_ERROR_INODE(inode, "newex.ec_len == 0"); + err = -EIO; break; + } - if (err == EXT_REPEAT) - continue; - else if (err == EXT_BREAK) { - err = 0; - break; + /* This is possible iff next == next_del == EXT_MAX_BLOCKS */ + if (next == next_del) { + flags |= FIEMAP_EXTENT_LAST; + if (unlikely(next_del != EXT_MAX_BLOCKS || + next != EXT_MAX_BLOCKS)) { + EXT4_ERROR_INODE(inode, + "next extent == %u, next " + "delalloc extent = %u", + next, next_del); + err = -EIO; + break; + } } - if (ext_depth(inode) != depth) { - /* depth was changed. we have to realloc path */ - kfree(path); - path = NULL; + if (exists) { + err = fiemap_fill_next_extent(fieinfo, + (__u64)newex.ec_block << blksize_bits, + (__u64)newex.ec_start << blksize_bits, + (__u64)newex.ec_len << blksize_bits, + flags); + if (err < 0) + break; + if (err == 1) { + err = 0; + break; + } } - block = cbex.ec_block + cbex.ec_len; + block = newex.ec_block + newex.ec_len; } if (path) { @@ -2093,13 +2177,10 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path, } /* - * ext4_ext_check_cache() + * ext4_ext_in_cache() * Checks to see if the given block is in the cache. * If it is, the cached extent is stored in the given - * cache extent pointer. If the cached extent is a hole, - * this routine should be used instead of - * ext4_ext_in_cache if the calling function needs to - * know the size of the hole. + * cache extent pointer. * * @inode: The files inode * @block: The block to look for in the cache @@ -2108,10 +2189,11 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path, * * Return 0 if cache is invalid; 1 if the cache is valid */ -static int ext4_ext_check_cache(struct inode *inode, ext4_lblk_t block, - struct ext4_ext_cache *ex){ +static int +ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block, + struct ext4_extent *ex) +{ struct ext4_ext_cache *cex; - struct ext4_sb_info *sbi; int ret = 0; /* @@ -2119,14 +2201,15 @@ static int ext4_ext_check_cache(struct inode *inode, ext4_lblk_t block, */ spin_lock(&EXT4_I(inode)->i_block_reservation_lock); cex = &EXT4_I(inode)->i_cached_extent; - sbi = EXT4_SB(inode->i_sb); /* has cache valid data? */ if (cex->ec_len == 0) goto errout; if (in_range(block, cex->ec_block, cex->ec_len)) { - memcpy(ex, cex, sizeof(struct ext4_ext_cache)); + ex->ee_block = cpu_to_le32(cex->ec_block); + ext4_ext_store_pblock(ex, cex->ec_start); + ex->ee_len = cpu_to_le16(cex->ec_len); ext_debug("%u cached by %u:%u:%llu\n", block, cex->ec_block, cex->ec_len, cex->ec_start); @@ -2139,48 +2222,18 @@ errout: } /* - * ext4_ext_in_cache() - * Checks to see if the given block is in the cache. - * If it is, the cached extent is stored in the given - * extent pointer. - * - * @inode: The files inode - * @block: The block to look for in the cache - * @ex: Pointer where the cached extent will be stored - * if it contains block - * - * Return 0 if cache is invalid; 1 if the cache is valid - */ -static int -ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block, - struct ext4_extent *ex) -{ - struct ext4_ext_cache cex; - int ret = 0; - - if (ext4_ext_check_cache(inode, block, &cex)) { - ex->ee_block = cpu_to_le32(cex.ec_block); - ext4_ext_store_pblock(ex, cex.ec_start); - ex->ee_len = cpu_to_le16(cex.ec_len); - ret = 1; - } - - return ret; -} - - -/* * ext4_ext_rm_idx: * removes index from the index block. */ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, - struct ext4_ext_path *path) + struct ext4_ext_path *path, int depth) { int err; ext4_fsblk_t leaf; /* free index block */ - path--; + depth--; + path = path + depth; leaf = ext4_idx_pblock(path->p_idx); if (unlikely(path->p_hdr->eh_entries == 0)) { EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0"); @@ -2205,6 +2258,19 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, ext4_free_blocks(handle, inode, NULL, leaf, 1, EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); + + while (--depth >= 0) { + if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr)) + break; + path--; + err = ext4_ext_get_access(handle, inode, path); + if (err) + break; + path->p_idx->ei_block = (path+1)->p_idx->ei_block; + err = ext4_ext_dirty(handle, inode, path); + if (err) + break; + } return err; } @@ -2257,7 +2323,13 @@ int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int nrblocks, int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) { int index; - int depth = ext_depth(inode); + int depth; + + /* If we are converting the inline data, only one is needed here. */ + if (ext4_has_inline_data(inode)) + return 1; + + depth = ext_depth(inode); if (chunk) index = depth * 2; @@ -2275,10 +2347,13 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); unsigned short ee_len = ext4_ext_get_actual_len(ex); ext4_fsblk_t pblk; - int flags = EXT4_FREE_BLOCKS_FORGET; + int flags = 0; if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) - flags |= EXT4_FREE_BLOCKS_METADATA; + flags |= EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET; + else if (ext4_should_journal_data(inode)) + flags |= EXT4_FREE_BLOCKS_FORGET; + /* * For bigalloc file systems, we never free a partial cluster * at the beginning of the extent. Instead, we make a note @@ -2538,7 +2613,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, /* if this leaf is free, then we should * remove it from index block above */ if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) - err = ext4_ext_rm_idx(handle, inode, path + depth); + err = ext4_ext_rm_idx(handle, inode, path, depth); out: return err; @@ -2570,10 +2645,10 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start, { struct super_block *sb = inode->i_sb; int depth = ext_depth(inode); - struct ext4_ext_path *path; + struct ext4_ext_path *path = NULL; ext4_fsblk_t partial_cluster = 0; handle_t *handle; - int i, err; + int i = 0, err = 0; ext_debug("truncate since %u to %u\n", start, end); @@ -2605,9 +2680,17 @@ again: return PTR_ERR(path); } depth = ext_depth(inode); + /* Leaf not may not exist only if inode has no blocks at all */ ex = path[depth].p_ext; - if (!ex) - goto cont; + if (!ex) { + if (depth) { + EXT4_ERROR_INODE(inode, + "path[%d].p_hdr == NULL", + depth); + err = -EIO; + } + goto out; + } ee_block = le32_to_cpu(ex->ee_block); @@ -2637,29 +2720,34 @@ again: if (err < 0) goto out; } - ext4_ext_drop_refs(path); - kfree(path); } -cont: - /* * We start scanning from right side, freeing all the blocks * after i_size and walking into the tree depth-wise. */ depth = ext_depth(inode); - path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 1), GFP_NOFS); - if (path == NULL) { - ext4_journal_stop(handle); - return -ENOMEM; - } - path[0].p_depth = depth; - path[0].p_hdr = ext_inode_hdr(inode); + if (path) { + int k = i = depth; + while (--k > 0) + path[k].p_block = + le16_to_cpu(path[k].p_hdr->eh_entries)+1; + } else { + path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 1), + GFP_NOFS); + if (path == NULL) { + ext4_journal_stop(handle); + return -ENOMEM; + } + path[0].p_depth = depth; + path[0].p_hdr = ext_inode_hdr(inode); + i = 0; - if (ext4_ext_check(inode, path[0].p_hdr, depth)) { - err = -EIO; - goto out; + if (ext4_ext_check(inode, path[0].p_hdr, depth)) { + err = -EIO; + goto out; + } } - i = err = 0; + err = 0; while (i >= 0 && err == 0) { if (i == depth) { @@ -2728,7 +2816,7 @@ cont: /* index is empty, remove it; * handle must be already prepared by the * truncatei_leaf() */ - err = ext4_ext_rm_idx(handle, inode, path + i); + err = ext4_ext_rm_idx(handle, inode, path, i); } /* root level has p_bh == NULL, brelse() eats this */ brelse(path[i].p_bh); @@ -2773,8 +2861,10 @@ cont: out: ext4_ext_drop_refs(path); kfree(path); - if (err == -EAGAIN) + if (err == -EAGAIN) { + path = NULL; goto again; + } ext4_journal_stop(handle); return err; @@ -2883,6 +2973,9 @@ static int ext4_split_extent_at(handle_t *handle, unsigned int ee_len, depth; int err = 0; + BUG_ON((split_flag & (EXT4_EXT_DATA_VALID1 | EXT4_EXT_DATA_VALID2)) == + (EXT4_EXT_DATA_VALID1 | EXT4_EXT_DATA_VALID2)); + ext_debug("ext4_split_extents_at: inode %lu, logical" "block %llu\n", inode->i_ino, (unsigned long long)split); @@ -2912,9 +3005,9 @@ static int ext4_split_extent_at(handle_t *handle, ext4_ext_mark_initialized(ex); if (!(flags & EXT4_GET_BLOCKS_PRE_IO)) - ext4_ext_try_to_merge(inode, path, ex); + ext4_ext_try_to_merge(handle, inode, path, ex); - err = ext4_ext_dirty(handle, inode, path + depth); + err = ext4_ext_dirty(handle, inode, path + path->p_depth); goto out; } @@ -2941,13 +3034,20 @@ static int ext4_split_extent_at(handle_t *handle, err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) { - err = ext4_ext_zeroout(inode, &orig_ex); + if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) { + if (split_flag & EXT4_EXT_DATA_VALID1) + err = ext4_ext_zeroout(inode, ex2); + else + err = ext4_ext_zeroout(inode, ex); + } else + err = ext4_ext_zeroout(inode, &orig_ex); + if (err) goto fix_extent_len; /* update the extent length and mark as initialized */ ex->ee_len = cpu_to_le16(ee_len); - ext4_ext_try_to_merge(inode, path, ex); - err = ext4_ext_dirty(handle, inode, path + depth); + ext4_ext_try_to_merge(handle, inode, path, ex); + err = ext4_ext_dirty(handle, inode, path + path->p_depth); goto out; } else if (err) goto fix_extent_len; @@ -2994,12 +3094,13 @@ static int ext4_split_extent(handle_t *handle, uninitialized = ext4_ext_is_uninitialized(ex); if (map->m_lblk + map->m_len < ee_block + ee_len) { - split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT ? - EXT4_EXT_MAY_ZEROOUT : 0; + split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT; flags1 = flags | EXT4_GET_BLOCKS_PRE_IO; if (uninitialized) split_flag1 |= EXT4_EXT_MARK_UNINIT1 | EXT4_EXT_MARK_UNINIT2; + if (split_flag & EXT4_EXT_DATA_VALID2) + split_flag1 |= EXT4_EXT_DATA_VALID1; err = ext4_split_extent_at(handle, inode, path, map->m_lblk + map->m_len, split_flag1, flags1); if (err) @@ -3012,8 +3113,8 @@ static int ext4_split_extent(handle_t *handle, return PTR_ERR(path); if (map->m_lblk >= ee_block) { - split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT ? - EXT4_EXT_MAY_ZEROOUT : 0; + split_flag1 = split_flag & (EXT4_EXT_MAY_ZEROOUT | + EXT4_EXT_DATA_VALID2); if (uninitialized) split_flag1 |= EXT4_EXT_MARK_UNINIT1; if (split_flag & EXT4_EXT_MARK_UNINIT2) @@ -3029,7 +3130,6 @@ out: return err ? err : map->m_len; } -#define EXT4_EXT_ZERO_LEN 7 /* * This function is called by ext4_ext_map_blocks() if someone tries to write * to an uninitialized extent. It may result in splitting the uninitialized @@ -3055,13 +3155,14 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, struct ext4_map_blocks *map, struct ext4_ext_path *path) { + struct ext4_sb_info *sbi; struct ext4_extent_header *eh; struct ext4_map_blocks split_map; struct ext4_extent zero_ex; struct ext4_extent *ex; ext4_lblk_t ee_block, eof_block; unsigned int ee_len, depth; - int allocated; + int allocated, max_zeroout = 0; int err = 0; int split_flag = 0; @@ -3069,6 +3170,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, "block %llu, max_blocks %u\n", inode->i_ino, (unsigned long long)map->m_lblk, map->m_len); + sbi = EXT4_SB(inode->i_sb); eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >> inode->i_sb->s_blocksize_bits; if (eof_block < map->m_lblk + map->m_len) @@ -3168,9 +3270,12 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, */ split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0; - /* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */ - if (ee_len <= 2*EXT4_EXT_ZERO_LEN && - (EXT4_EXT_MAY_ZEROOUT & split_flag)) { + if (EXT4_EXT_MAY_ZEROOUT & split_flag) + max_zeroout = sbi->s_extent_max_zeroout_kb >> + inode->i_sb->s_blocksize_bits; + + /* If extent is less than s_max_zeroout_kb, zeroout directly */ + if (max_zeroout && (ee_len <= max_zeroout)) { err = ext4_ext_zeroout(inode, ex); if (err) goto out; @@ -3179,8 +3284,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, if (err) goto out; ext4_ext_mark_initialized(ex); - ext4_ext_try_to_merge(inode, path, ex); - err = ext4_ext_dirty(handle, inode, path + depth); + ext4_ext_try_to_merge(handle, inode, path, ex); + err = ext4_ext_dirty(handle, inode, path + path->p_depth); goto out; } @@ -3194,9 +3299,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, split_map.m_lblk = map->m_lblk; split_map.m_len = map->m_len; - if (allocated > map->m_len) { - if (allocated <= EXT4_EXT_ZERO_LEN && - (EXT4_EXT_MAY_ZEROOUT & split_flag)) { + if (max_zeroout && (allocated > map->m_len)) { + if (allocated <= max_zeroout) { /* case 3 */ zero_ex.ee_block = cpu_to_le32(map->m_lblk); @@ -3208,9 +3312,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, goto out; split_map.m_lblk = map->m_lblk; split_map.m_len = allocated; - } else if ((map->m_lblk - ee_block + map->m_len < - EXT4_EXT_ZERO_LEN) && - (EXT4_EXT_MAY_ZEROOUT & split_flag)) { + } else if (map->m_lblk - ee_block + map->m_len < max_zeroout) { /* case 2 */ if (map->m_lblk != ee_block) { zero_ex.ee_block = ex->ee_block; @@ -3230,7 +3332,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, } allocated = ext4_split_extent(handle, inode, path, - &split_map, split_flag, 0); + &split_map, split_flag, 0); if (allocated < 0) err = allocated; @@ -3244,7 +3346,7 @@ out: * to an uninitialized extent. * * Writing to an uninitialized extent may result in splitting the uninitialized - * extent into multiple /initialized uninitialized extents (up to three) + * extent into multiple initialized/uninitialized extents (up to three) * There are three possibilities: * a> There is no split required: Entire extent should be uninitialized * b> Splits in two extents: Write is happening at either end of the extent @@ -3291,26 +3393,47 @@ static int ext4_split_unwritten_extents(handle_t *handle, split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0; split_flag |= EXT4_EXT_MARK_UNINIT2; - + if (flags & EXT4_GET_BLOCKS_CONVERT) + split_flag |= EXT4_EXT_DATA_VALID2; flags |= EXT4_GET_BLOCKS_PRE_IO; return ext4_split_extent(handle, inode, path, map, split_flag, flags); } static int ext4_convert_unwritten_extents_endio(handle_t *handle, - struct inode *inode, - struct ext4_ext_path *path) + struct inode *inode, + struct ext4_map_blocks *map, + struct ext4_ext_path *path) { struct ext4_extent *ex; + ext4_lblk_t ee_block; + unsigned int ee_len; int depth; int err = 0; depth = ext_depth(inode); ex = path[depth].p_ext; + ee_block = le32_to_cpu(ex->ee_block); + ee_len = ext4_ext_get_actual_len(ex); ext_debug("ext4_convert_unwritten_extents_endio: inode %lu, logical" "block %llu, max_blocks %u\n", inode->i_ino, - (unsigned long long)le32_to_cpu(ex->ee_block), - ext4_ext_get_actual_len(ex)); + (unsigned long long)ee_block, ee_len); + + /* If extent is larger than requested then split is required */ + if (ee_block != map->m_lblk || ee_len > map->m_len) { + err = ext4_split_unwritten_extents(handle, inode, map, path, + EXT4_GET_BLOCKS_CONVERT); + if (err < 0) + goto out; + ext4_ext_drop_refs(path); + path = ext4_ext_find_extent(inode, map->m_lblk, path); + if (IS_ERR(path)) { + err = PTR_ERR(path); + goto out; + } + depth = ext_depth(inode); + ex = path[depth].p_ext; + } err = ext4_ext_get_access(handle, inode, path + depth); if (err) @@ -3321,10 +3444,10 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle, /* note: ext4_ext_correct_indexes() isn't needed here because * borders are not changed */ - ext4_ext_try_to_merge(inode, path, ex); + ext4_ext_try_to_merge(handle, inode, path, ex); /* Mark modified extent as dirty */ - err = ext4_ext_dirty(handle, inode, path + depth); + err = ext4_ext_dirty(handle, inode, path + path->p_depth); out: ext4_ext_show_leaf(inode, path); return err; @@ -3394,115 +3517,34 @@ out: /** * ext4_find_delalloc_range: find delayed allocated block in the given range. * - * Goes through the buffer heads in the range [lblk_start, lblk_end] and returns - * whether there are any buffers marked for delayed allocation. It returns '1' - * on the first delalloc'ed buffer head found. If no buffer head in the given - * range is marked for delalloc, it returns 0. - * lblk_start should always be <= lblk_end. - * search_hint_reverse is to indicate that searching in reverse from lblk_end to - * lblk_start might be more efficient (i.e., we will likely hit the delalloc'ed - * block sooner). This is useful when blocks are truncated sequentially from - * lblk_start towards lblk_end. + * Return 1 if there is a delalloc block in the range, otherwise 0. */ static int ext4_find_delalloc_range(struct inode *inode, ext4_lblk_t lblk_start, - ext4_lblk_t lblk_end, - int search_hint_reverse) + ext4_lblk_t lblk_end) { - struct address_space *mapping = inode->i_mapping; - struct buffer_head *head, *bh = NULL; - struct page *page; - ext4_lblk_t i, pg_lblk; - pgoff_t index; + struct extent_status es; - if (!test_opt(inode->i_sb, DELALLOC)) - return 0; - - /* reverse search wont work if fs block size is less than page size */ - if (inode->i_blkbits < PAGE_CACHE_SHIFT) - search_hint_reverse = 0; - - if (search_hint_reverse) - i = lblk_end; + es.start = lblk_start; + ext4_es_find_extent(inode, &es); + if (es.len == 0) + return 0; /* there is no delay extent in this tree */ + else if (es.start <= lblk_start && lblk_start < es.start + es.len) + return 1; + else if (lblk_start <= es.start && es.start <= lblk_end) + return 1; else - i = lblk_start; - - index = i >> (PAGE_CACHE_SHIFT - inode->i_blkbits); - - while ((i >= lblk_start) && (i <= lblk_end)) { - page = find_get_page(mapping, index); - if (!page) - goto nextpage; - - if (!page_has_buffers(page)) - goto nextpage; - - head = page_buffers(page); - if (!head) - goto nextpage; - - bh = head; - pg_lblk = index << (PAGE_CACHE_SHIFT - - inode->i_blkbits); - do { - if (unlikely(pg_lblk < lblk_start)) { - /* - * This is possible when fs block size is less - * than page size and our cluster starts/ends in - * middle of the page. So we need to skip the - * initial few blocks till we reach the 'lblk' - */ - pg_lblk++; - continue; - } - - /* Check if the buffer is delayed allocated and that it - * is not yet mapped. (when da-buffers are mapped during - * their writeout, their da_mapped bit is set.) - */ - if (buffer_delay(bh) && !buffer_da_mapped(bh)) { - page_cache_release(page); - trace_ext4_find_delalloc_range(inode, - lblk_start, lblk_end, - search_hint_reverse, - 1, i); - return 1; - } - if (search_hint_reverse) - i--; - else - i++; - } while ((i >= lblk_start) && (i <= lblk_end) && - ((bh = bh->b_this_page) != head)); -nextpage: - if (page) - page_cache_release(page); - /* - * Move to next page. 'i' will be the first lblk in the next - * page. - */ - if (search_hint_reverse) - index--; - else - index++; - i = index << (PAGE_CACHE_SHIFT - inode->i_blkbits); - } - - trace_ext4_find_delalloc_range(inode, lblk_start, lblk_end, - search_hint_reverse, 0, 0); - return 0; + return 0; } -int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk, - int search_hint_reverse) +int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk) { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); ext4_lblk_t lblk_start, lblk_end; lblk_start = lblk & (~(sbi->s_cluster_ratio - 1)); lblk_end = lblk_start + sbi->s_cluster_ratio - 1; - return ext4_find_delalloc_range(inode, lblk_start, lblk_end, - search_hint_reverse); + return ext4_find_delalloc_range(inode, lblk_start, lblk_end); } /** @@ -3563,7 +3605,7 @@ get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start, lblk_from = lblk_start & (~(sbi->s_cluster_ratio - 1)); lblk_to = lblk_from + c_offset - 1; - if (ext4_find_delalloc_range(inode, lblk_from, lblk_to, 0)) + if (ext4_find_delalloc_range(inode, lblk_from, lblk_to)) allocated_clusters--; } @@ -3573,7 +3615,7 @@ get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start, lblk_from = lblk_start + num_blks; lblk_to = lblk_from + (sbi->s_cluster_ratio - c_offset) - 1; - if (ext4_find_delalloc_range(inode, lblk_from, lblk_to, 0)) + if (ext4_find_delalloc_range(inode, lblk_from, lblk_to)) allocated_clusters--; } @@ -3588,7 +3630,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, { int ret = 0; int err = 0; - ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; + ext4_io_end_t *io = ext4_inode_aio(inode); ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical " "block %llu, max_blocks %u, flags %x, allocated %u\n", @@ -3596,13 +3638,15 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, flags, allocated); ext4_ext_show_leaf(inode, path); - trace_ext4_ext_handle_uninitialized_extents(inode, map, allocated, - newblock); + trace_ext4_ext_handle_uninitialized_extents(inode, map, flags, + allocated, newblock); /* get_block() before submit the IO, split the extent */ if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { ret = ext4_split_unwritten_extents(handle, inode, map, path, flags); + if (ret <= 0) + goto out; /* * Flag the inode(non aio case) or end_io struct (aio case) * that this IO needs to conversion to written when IO is @@ -3618,7 +3662,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, } /* IO end_io complete, convert the filled extent to written */ if ((flags & EXT4_GET_BLOCKS_CONVERT)) { - ret = ext4_convert_unwritten_extents_endio(handle, inode, + ret = ext4_convert_unwritten_extents_endio(handle, inode, map, path); if (ret >= 0) { ext4_update_inode_fsync_trans(handle, inode, 1); @@ -3842,12 +3886,13 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, struct ext4_extent newex, *ex, *ex2; struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); ext4_fsblk_t newblock = 0; - int free_on_err = 0, err = 0, depth, ret; + int free_on_err = 0, err = 0, depth; unsigned int allocated = 0, offset = 0; unsigned int allocated_clusters = 0; struct ext4_allocation_request ar; - ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; + ext4_io_end_t *io = ext4_inode_aio(inode); ext4_lblk_t cluster_offset; + int set_unwritten = 0; ext_debug("blocks %u/%u requested for inode %lu\n", map->m_lblk, map->m_len, inode->i_ino); @@ -3857,7 +3902,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, if (ext4_ext_in_cache(inode, map->m_lblk, &newex)) { if (!newex.ee_start_lo && !newex.ee_start_hi) { if ((sbi->s_cluster_ratio > 1) && - ext4_find_delalloc_cluster(inode, map->m_lblk, 0)) + ext4_find_delalloc_cluster(inode, map->m_lblk)) map->m_flags |= EXT4_MAP_FROM_CLUSTER; if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { @@ -3937,15 +3982,15 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, ee_len, ee_start); goto out; } - ret = ext4_ext_handle_uninitialized_extents( + allocated = ext4_ext_handle_uninitialized_extents( handle, inode, map, path, flags, allocated, newblock); - return ret; + goto out3; } } if ((sbi->s_cluster_ratio > 1) && - ext4_find_delalloc_cluster(inode, map->m_lblk, 0)) + ext4_find_delalloc_cluster(inode, map->m_lblk)) map->m_flags |= EXT4_MAP_FROM_CLUSTER; /* @@ -4070,13 +4115,8 @@ got_allocated_blocks: * For non asycn direct IO case, flag the inode state * that we need to perform conversion when IO is done. */ - if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { - if (io) - ext4_set_io_unwritten_flag(inode, io); - else - ext4_set_inode_state(inode, - EXT4_STATE_DIO_UNWRITTEN); - } + if ((flags & EXT4_GET_BLOCKS_PRE_IO)) + set_unwritten = 1; if (ext4_should_dioread_nolock(inode)) map->m_flags |= EXT4_MAP_UNINIT; } @@ -4088,6 +4128,15 @@ got_allocated_blocks: if (!err) err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); + + if (!err && set_unwritten) { + if (io) + ext4_set_io_unwritten_flag(inode, io); + else + ext4_set_inode_state(inode, + EXT4_STATE_DIO_UNWRITTEN); + } + if (err && free_on_err) { int fb_flags = flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE ? EXT4_FREE_BLOCKS_NO_QUOT_UPDATE : 0; @@ -4210,8 +4259,8 @@ out2: kfree(path); } - trace_ext4_ext_map_blocks_exit(inode, map->m_lblk, - newblock, map->m_len, err ? err : allocated); +out3: + trace_ext4_ext_map_blocks_exit(inode, map, err ? err : allocated); return err ? err : allocated; } @@ -4229,7 +4278,7 @@ void ext4_ext_truncate(struct inode *inode) * finish any pending end_io work so we won't run the risk of * converting any truncated blocks to initialized later */ - ext4_flush_completed_IO(inode); + ext4_flush_unwritten_io(inode); /* * probably first extent we're gonna free will be last in block @@ -4270,6 +4319,8 @@ void ext4_ext_truncate(struct inode *inode) last_block = (inode->i_size + sb->s_blocksize - 1) >> EXT4_BLOCK_SIZE_BITS(sb); + err = ext4_es_remove_extent(inode, last_block, + EXT_MAX_BLOCKS - last_block); err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1); /* In a multi-transaction truncate, we only make the final @@ -4360,6 +4411,10 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) if (mode & FALLOC_FL_PUNCH_HOLE) return ext4_punch_hole(file, offset, len); + ret = ext4_convert_inline_data(inode); + if (ret) + return ret; + trace_ext4_fallocate_enter(inode, offset, len, mode); map.m_lblk = offset >> blkbits; /* @@ -4389,6 +4444,9 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) */ if (len <= EXT_UNINIT_MAX_LEN << blkbits) flags |= EXT4_GET_BLOCKS_NO_NORMALIZE; + + /* Prevent race condition between unwritten */ + ext4_flush_unwritten_io(inode); retry: while (ret >= 0 && ret < max_blocks) { map.m_lblk = map.m_lblk + ret; @@ -4420,6 +4478,8 @@ retry: ext4_falloc_update_inode(inode, mode, new_size, (map.m_flags & EXT4_MAP_NEW)); ext4_mark_inode_dirty(handle, inode); + if ((file->f_flags & O_SYNC) && ret >= max_blocks) + ext4_handle_sync(handle); ret2 = ext4_journal_stop(handle); if (ret2) break; @@ -4493,206 +4553,43 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, } /* - * Callback function called for each extent to gather FIEMAP information. + * If newex is not existing extent (newex->ec_start equals zero) find + * delayed extent at start of newex and update newex accordingly and + * return start of the next delayed extent. + * + * If newex is existing extent (newex->ec_start is not equal zero) + * return start of next delayed extent or EXT_MAX_BLOCKS if no delayed + * extent found. Leave newex unmodified. */ -static int ext4_ext_fiemap_cb(struct inode *inode, ext4_lblk_t next, - struct ext4_ext_cache *newex, struct ext4_extent *ex, - void *data) +static int ext4_find_delayed_extent(struct inode *inode, + struct ext4_ext_cache *newex) { - __u64 logical; - __u64 physical; - __u64 length; - __u32 flags = 0; - int ret = 0; - struct fiemap_extent_info *fieinfo = data; - unsigned char blksize_bits; + struct extent_status es; + ext4_lblk_t next_del; - blksize_bits = inode->i_sb->s_blocksize_bits; - logical = (__u64)newex->ec_block << blksize_bits; + es.start = newex->ec_block; + next_del = ext4_es_find_extent(inode, &es); if (newex->ec_start == 0) { /* * No extent in extent-tree contains block @newex->ec_start, * then the block may stay in 1)a hole or 2)delayed-extent. - * - * Holes or delayed-extents are processed as follows. - * 1. lookup dirty pages with specified range in pagecache. - * If no page is got, then there is no delayed-extent and - * return with EXT_CONTINUE. - * 2. find the 1st mapped buffer, - * 3. check if the mapped buffer is both in the request range - * and a delayed buffer. If not, there is no delayed-extent, - * then return. - * 4. a delayed-extent is found, the extent will be collected. */ - ext4_lblk_t end = 0; - pgoff_t last_offset; - pgoff_t offset; - pgoff_t index; - pgoff_t start_index = 0; - struct page **pages = NULL; - struct buffer_head *bh = NULL; - struct buffer_head *head = NULL; - unsigned int nr_pages = PAGE_SIZE / sizeof(struct page *); - - pages = kmalloc(PAGE_SIZE, GFP_KERNEL); - if (pages == NULL) - return -ENOMEM; - - offset = logical >> PAGE_SHIFT; -repeat: - last_offset = offset; - head = NULL; - ret = find_get_pages_tag(inode->i_mapping, &offset, - PAGECACHE_TAG_DIRTY, nr_pages, pages); - - if (!(flags & FIEMAP_EXTENT_DELALLOC)) { - /* First time, try to find a mapped buffer. */ - if (ret == 0) { -out: - for (index = 0; index < ret; index++) - page_cache_release(pages[index]); - /* just a hole. */ - kfree(pages); - return EXT_CONTINUE; - } - index = 0; - -next_page: - /* Try to find the 1st mapped buffer. */ - end = ((__u64)pages[index]->index << PAGE_SHIFT) >> - blksize_bits; - if (!page_has_buffers(pages[index])) - goto out; - head = page_buffers(pages[index]); - if (!head) - goto out; - - index++; - bh = head; - do { - if (end >= newex->ec_block + - newex->ec_len) - /* The buffer is out of - * the request range. - */ - goto out; - - if (buffer_mapped(bh) && - end >= newex->ec_block) { - start_index = index - 1; - /* get the 1st mapped buffer. */ - goto found_mapped_buffer; - } - - bh = bh->b_this_page; - end++; - } while (bh != head); - - /* No mapped buffer in the range found in this page, - * We need to look up next page. - */ - if (index >= ret) { - /* There is no page left, but we need to limit - * newex->ec_len. - */ - newex->ec_len = end - newex->ec_block; - goto out; - } - goto next_page; - } else { - /*Find contiguous delayed buffers. */ - if (ret > 0 && pages[0]->index == last_offset) - head = page_buffers(pages[0]); - bh = head; - index = 1; - start_index = 0; - } - -found_mapped_buffer: - if (bh != NULL && buffer_delay(bh)) { - /* 1st or contiguous delayed buffer found. */ - if (!(flags & FIEMAP_EXTENT_DELALLOC)) { - /* - * 1st delayed buffer found, record - * the start of extent. - */ - flags |= FIEMAP_EXTENT_DELALLOC; - newex->ec_block = end; - logical = (__u64)end << blksize_bits; - } - /* Find contiguous delayed buffers. */ - do { - if (!buffer_delay(bh)) - goto found_delayed_extent; - bh = bh->b_this_page; - end++; - } while (bh != head); - - for (; index < ret; index++) { - if (!page_has_buffers(pages[index])) { - bh = NULL; - break; - } - head = page_buffers(pages[index]); - if (!head) { - bh = NULL; - break; - } - - if (pages[index]->index != - pages[start_index]->index + index - - start_index) { - /* Blocks are not contiguous. */ - bh = NULL; - break; - } - bh = head; - do { - if (!buffer_delay(bh)) - /* Delayed-extent ends. */ - goto found_delayed_extent; - bh = bh->b_this_page; - end++; - } while (bh != head); - } - } else if (!(flags & FIEMAP_EXTENT_DELALLOC)) - /* a hole found. */ - goto out; + if (es.len == 0) + /* A hole found. */ + return 0; -found_delayed_extent: - newex->ec_len = min(end - newex->ec_block, - (ext4_lblk_t)EXT_INIT_MAX_LEN); - if (ret == nr_pages && bh != NULL && - newex->ec_len < EXT_INIT_MAX_LEN && - buffer_delay(bh)) { - /* Have not collected an extent and continue. */ - for (index = 0; index < ret; index++) - page_cache_release(pages[index]); - goto repeat; + if (es.start > newex->ec_block) { + /* A hole found. */ + newex->ec_len = min(es.start - newex->ec_block, + newex->ec_len); + return 0; } - for (index = 0; index < ret; index++) - page_cache_release(pages[index]); - kfree(pages); + newex->ec_len = es.start + es.len - newex->ec_block; } - physical = (__u64)newex->ec_start << blksize_bits; - length = (__u64)newex->ec_len << blksize_bits; - - if (ex && ext4_ext_is_uninitialized(ex)) - flags |= FIEMAP_EXTENT_UNWRITTEN; - - if (next == EXT_MAX_BLOCKS) - flags |= FIEMAP_EXTENT_LAST; - - ret = fiemap_fill_next_extent(fieinfo, logical, physical, - length, flags); - if (ret < 0) - return ret; - if (ret == 1) - return EXT_BREAK; - return EXT_CONTINUE; + return next_del; } /* fiemap flags we can handle specified here */ #define EXT4_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR) @@ -4755,9 +4652,32 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length) loff_t first_page_offset, last_page_offset; int credits, err = 0; + /* + * Write out all dirty pages to avoid race conditions + * Then release them. + */ + if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { + err = filemap_write_and_wait_range(mapping, + offset, offset + length - 1); + + if (err) + return err; + } + + mutex_lock(&inode->i_mutex); + /* It's not possible punch hole on append only file */ + if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) { + err = -EPERM; + goto out_mutex; + } + if (IS_SWAPFILE(inode)) { + err = -ETXTBSY; + goto out_mutex; + } + /* No need to punch hole beyond i_size */ if (offset >= inode->i_size) - return 0; + goto out_mutex; /* * If the hole extends beyond i_size, set the hole @@ -4775,35 +4695,26 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length) first_page_offset = first_page << PAGE_CACHE_SHIFT; last_page_offset = last_page << PAGE_CACHE_SHIFT; - /* - * Write out all dirty pages to avoid race conditions - * Then release them. - */ - if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { - err = filemap_write_and_wait_range(mapping, - offset, offset + length - 1); - - if (err) - return err; - } - /* Now release the pages */ if (last_page_offset > first_page_offset) { truncate_pagecache_range(inode, first_page_offset, last_page_offset - 1); } - /* finish any pending end_io work */ - ext4_flush_completed_IO(inode); + /* Wait all existing dio workers, newcomers will block on i_mutex */ + ext4_inode_block_unlocked_dio(inode); + err = ext4_flush_unwritten_io(inode); + if (err) + goto out_dio; + inode_dio_wait(inode); credits = ext4_writepage_trans_blocks(inode); handle = ext4_journal_start(inode, credits); - if (IS_ERR(handle)) - return PTR_ERR(handle); + if (IS_ERR(handle)) { + err = PTR_ERR(handle); + goto out_dio; + } - err = ext4_orphan_add(handle, inode); - if (err) - goto out; /* * Now we need to zero out the non-page-aligned data in the @@ -4878,6 +4789,8 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length) ext4_ext_invalidate_cache(inode); ext4_discard_preallocations(inode); + err = ext4_es_remove_extent(inode, first_block, + stop_block - first_block); err = ext4_ext_remove_space(inode, first_block, stop_block - 1); ext4_ext_invalidate_cache(inode); @@ -4889,18 +4802,31 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length) up_write(&EXT4_I(inode)->i_data_sem); out: - ext4_orphan_del(handle, inode); inode->i_mtime = inode->i_ctime = ext4_current_time(inode); ext4_mark_inode_dirty(handle, inode); ext4_journal_stop(handle); +out_dio: + ext4_inode_resume_unlocked_dio(inode); +out_mutex: + mutex_unlock(&inode->i_mutex); return err; } + int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, __u64 start, __u64 len) { ext4_lblk_t start_blk; int error = 0; + if (ext4_has_inline_data(inode)) { + int has_inline = 1; + + error = ext4_inline_data_fiemap(inode, fieinfo, &has_inline); + + if (has_inline) + return error; + } + /* fallback to generic here if not in extents fmt */ if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) return generic_block_fiemap(inode, fieinfo, start, len, @@ -4922,11 +4848,11 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, len_blks = ((ext4_lblk_t) last_blk) - start_blk + 1; /* - * Walk the extent tree gathering extent information. - * ext4_ext_fiemap_cb will push extents back to user. + * Walk the extent tree gathering extent information + * and pushing extents back to the user. */ - error = ext4_ext_walk_space(inode, start_blk, len_blks, - ext4_ext_fiemap_cb, fieinfo); + error = ext4_fill_fiemap_extents(inode, start_blk, + len_blks, fieinfo); } return error; |