From 4c05f9ad4d168098b7ce3ffa7098283f94811ed6 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Fri, 2 Nov 2012 11:38:41 +1100 Subject: xfs: invalidate allocbt blocks moved to the free list When we free a block from the alloc btree tree, we move it to the freelist held in the AGFL and mark it busy in the busy extent tree. This typically happens when we merge btree blocks. Once the transaction is committed and checkpointed, the block can remain on the free list for an indefinite amount of time. Now, this isn't the end of the world at this point - if the free list is shortened, the buffer is invalidated in the transaction that moves it back to free space. If the buffer is allocated as metadata from the free list, then all the modifications getted logged, and we have no issues, either. And if it gets allocated as userdata direct from the freelist, it gets invalidated and so will never get written. However, during the time it sits on the free list, pressure on the log can cause the AIL to be pushed and the buffer that covers the block gets pushed for write. IOWs, we end up writing a freed metadata block to disk. Again, this isn't the end of the world because we know from the above we are only writing to free space. The problem, however, is for validation callbacks. If the block was on old btree root block, then the level of the block is going to be higher than the current tree root, and so will fail validation. There may be other inconsistencies in the block as well, and currently we don't care because the block is in free space. Shutting down the filesystem because a freed block doesn't pass write validation, OTOH, is rather unfriendly. So, make sure we always invalidate buffers as they move from the free space trees to the free list so that we guarantee they never get written to disk while on the free list. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Phil White Reviewed-by: Mark Tinguely Signed-off-by: Ben Myers --- fs/xfs/xfs_alloc_btree.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/xfs/xfs_alloc_btree.c') diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index f1647caace8..f7876c6d616 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c @@ -121,6 +121,8 @@ xfs_allocbt_free_block( xfs_extent_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1, XFS_EXTENT_BUSY_SKIP_DISCARD); xfs_trans_agbtree_delta(cur->bc_tp, -1); + + xfs_trans_binval(cur->bc_tp, bp); return 0; } -- cgit v1.2.3 From 3d3e6f64e22c94115d47de670611bcd3ecda3796 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 12 Nov 2012 22:54:08 +1100 Subject: xfs: verify btree blocks as they are read from disk Add an btree block verify callback function and pass it into the buffer read functions. Because each different btree block type requires different verification, add a function to the ops structure that is called from the generic code. Also, propagate the verification callback functions through the readahead functions, and into the external bmap and bulkstat inode readahead code that uses the generic btree buffer read functions. Signed-off-by: Dave Chinner Reviewed-by: Phil White Signed-off-by: Ben Myers --- fs/xfs/xfs_alloc_btree.c | 61 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) (limited to 'fs/xfs/xfs_alloc_btree.c') diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index f7876c6d616..46961e52e9b 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c @@ -272,6 +272,66 @@ xfs_allocbt_key_diff( return (__int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock; } +void +xfs_allocbt_read_verify( + struct xfs_buf *bp) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); + struct xfs_perag *pag = bp->b_pag; + unsigned int level; + int sblock_ok; /* block passes checks */ + + /* + * magic number and level verification + * + * During growfs operations, we can't verify the exact level as the + * perag is not fully initialised and hence not attached to the buffer. + * In this case, check against the maximum tree depth. + */ + level = be16_to_cpu(block->bb_level); + switch (block->bb_magic) { + case cpu_to_be32(XFS_ABTB_MAGIC): + if (pag) + sblock_ok = level < pag->pagf_levels[XFS_BTNUM_BNOi]; + else + sblock_ok = level < mp->m_ag_maxlevels; + break; + case cpu_to_be32(XFS_ABTC_MAGIC): + if (pag) + sblock_ok = level < pag->pagf_levels[XFS_BTNUM_CNTi]; + else + sblock_ok = level < mp->m_ag_maxlevels; + break; + default: + sblock_ok = 0; + break; + } + + /* numrecs verification */ + sblock_ok = sblock_ok && + be16_to_cpu(block->bb_numrecs) <= mp->m_alloc_mxr[level != 0]; + + /* sibling pointer verification */ + sblock_ok = sblock_ok && + (block->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK) || + be32_to_cpu(block->bb_u.s.bb_leftsib) < mp->m_sb.sb_agblocks) && + block->bb_u.s.bb_leftsib && + (block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK) || + be32_to_cpu(block->bb_u.s.bb_rightsib) < mp->m_sb.sb_agblocks) && + block->bb_u.s.bb_rightsib; + + if (!sblock_ok) { + trace_xfs_btree_corrupt(bp, _RET_IP_); + XFS_CORRUPTION_ERROR("xfs_allocbt_read_verify", + XFS_ERRLEVEL_LOW, mp, block); + xfs_buf_ioerror(bp, EFSCORRUPTED); + } + + bp->b_iodone = NULL; + xfs_buf_ioend(bp, 0); +} + #ifdef DEBUG STATIC int xfs_allocbt_keys_inorder( @@ -327,6 +387,7 @@ static const struct xfs_btree_ops xfs_allocbt_ops = { .init_rec_from_cur = xfs_allocbt_init_rec_from_cur, .init_ptr_from_cur = xfs_allocbt_init_ptr_from_cur, .key_diff = xfs_allocbt_key_diff, + .read_verify = xfs_allocbt_read_verify, #ifdef DEBUG .keys_inorder = xfs_allocbt_keys_inorder, .recs_inorder = xfs_allocbt_recs_inorder, -- cgit v1.2.3 From 612cfbfe174a89d565363fff7f3961a2dda5fb71 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 14 Nov 2012 17:52:32 +1100 Subject: xfs: add pre-write metadata buffer verifier callbacks These verifiers are essentially the same code as the read verifiers, but do not require ioend processing. Hence factor the read verifier functions and add a new write verifier wrapper that is used as the callback. This is done as one large patch for all verifiers rather than one patch per verifier as the change is largely mechanical. This includes hooking up the write verifier via the read verifier function. Hooking up the write verifier for buffers obtained via xfs_trans_get_buf() will be done in a separate patch as that touches code in many different places rather than just the verifier functions. Signed-off-by: Dave Chinner Reviewed-by: Mark Tinguely Signed-off-by: Ben Myers --- fs/xfs/xfs_alloc_btree.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) (limited to 'fs/xfs/xfs_alloc_btree.c') diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index 46961e52e9b..6e98b22ebde 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c @@ -272,8 +272,8 @@ xfs_allocbt_key_diff( return (__int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock; } -void -xfs_allocbt_read_verify( +static void +xfs_allocbt_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; @@ -323,11 +323,24 @@ xfs_allocbt_read_verify( if (!sblock_ok) { trace_xfs_btree_corrupt(bp, _RET_IP_); - XFS_CORRUPTION_ERROR("xfs_allocbt_read_verify", - XFS_ERRLEVEL_LOW, mp, block); + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, block); xfs_buf_ioerror(bp, EFSCORRUPTED); } +} +static void +xfs_allocbt_write_verify( + struct xfs_buf *bp) +{ + xfs_allocbt_verify(bp); +} + +void +xfs_allocbt_read_verify( + struct xfs_buf *bp) +{ + xfs_allocbt_verify(bp); + bp->b_pre_io = xfs_allocbt_write_verify; bp->b_iodone = NULL; xfs_buf_ioend(bp, 0); } -- cgit v1.2.3 From b0f539de9fcc543a3ffa40bc22bf51aca6ea6183 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 14 Nov 2012 17:53:49 +1100 Subject: xfs: connect up write verifiers to new buffers Metadata buffers that are read from disk have write verifiers already attached to them, but newly allocated buffers do not. Add appropriate write verifiers to all new metadata buffers. Signed-off-by: Dave Chinner Reviewed-by: Ben Myers Signed-off-by: Ben Myers --- fs/xfs/xfs_alloc_btree.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/xfs/xfs_alloc_btree.c') diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index 6e98b22ebde..b8339652491 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c @@ -401,6 +401,7 @@ static const struct xfs_btree_ops xfs_allocbt_ops = { .init_ptr_from_cur = xfs_allocbt_init_ptr_from_cur, .key_diff = xfs_allocbt_key_diff, .read_verify = xfs_allocbt_read_verify, + .write_verify = xfs_allocbt_write_verify, #ifdef DEBUG .keys_inorder = xfs_allocbt_keys_inorder, .recs_inorder = xfs_allocbt_recs_inorder, -- cgit v1.2.3 From 1813dd64057490e7a0678a885c4fe6d02f78bdc1 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 14 Nov 2012 17:54:40 +1100 Subject: xfs: convert buffer verifiers to an ops structure. To separate the verifiers from iodone functions and associate read and write verifiers at the same time, introduce a buffer verifier operations structure to the xfs_buf. This avoids the need for assigning the write verifier, clearing the iodone function and re-running ioend processing in the read verifier, and gets rid of the nasty "b_pre_io" name for the write verifier function pointer. If we ever need to, it will also be easier to add further content specific callbacks to a buffer with an ops structure in place. We also avoid needing to export verifier functions, instead we can simply export the ops structures for those that are needed outside the function they are defined in. This patch also fixes a directory block readahead verifier issue it exposed. This patch also adds ops callbacks to the inode/alloc btree blocks initialised by growfs. These will need more work before they will work with CRCs. Signed-off-by: Dave Chinner Reviewed-by: Phil White Signed-off-by: Ben Myers --- fs/xfs/xfs_alloc_btree.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'fs/xfs/xfs_alloc_btree.c') diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index b8339652491..b1ddef6b268 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c @@ -329,22 +329,25 @@ xfs_allocbt_verify( } static void -xfs_allocbt_write_verify( +xfs_allocbt_read_verify( struct xfs_buf *bp) { xfs_allocbt_verify(bp); } -void -xfs_allocbt_read_verify( +static void +xfs_allocbt_write_verify( struct xfs_buf *bp) { xfs_allocbt_verify(bp); - bp->b_pre_io = xfs_allocbt_write_verify; - bp->b_iodone = NULL; - xfs_buf_ioend(bp, 0); } +const struct xfs_buf_ops xfs_allocbt_buf_ops = { + .verify_read = xfs_allocbt_read_verify, + .verify_write = xfs_allocbt_write_verify, +}; + + #ifdef DEBUG STATIC int xfs_allocbt_keys_inorder( @@ -400,8 +403,7 @@ static const struct xfs_btree_ops xfs_allocbt_ops = { .init_rec_from_cur = xfs_allocbt_init_rec_from_cur, .init_ptr_from_cur = xfs_allocbt_init_ptr_from_cur, .key_diff = xfs_allocbt_key_diff, - .read_verify = xfs_allocbt_read_verify, - .write_verify = xfs_allocbt_write_verify, + .buf_ops = &xfs_allocbt_buf_ops, #ifdef DEBUG .keys_inorder = xfs_allocbt_keys_inorder, .recs_inorder = xfs_allocbt_recs_inorder, -- cgit v1.2.3