aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_buf_item.c
diff options
context:
space:
mode:
authorPatrick McHardy <kaber@trash.net>2013-03-31 18:10:34 +0200
committerPatrick McHardy <kaber@trash.net>2013-03-31 18:10:34 +0200
commit70711d223510ba1773cfe1d7770a56141c815ff8 (patch)
tree4a71f38a3a554ddecaa31b7d8c6bc49b7d1705b4 /fs/xfs/xfs_buf_item.c
parentd53b4ed072d9779cdf53582c46436dec06d0961f (diff)
parent19f949f52599ba7c3f67a5897ac6be14bfcb1200 (diff)
Merge tag 'v3.8' of /home/kaber/src/repos/linux
Linux 3.8 Signed-off-by: Patrick McHardy <kaber@trash.net> Conflicts: include/linux/Kbuild include/linux/netlink.h
Diffstat (limited to 'fs/xfs/xfs_buf_item.c')
-rw-r--r--fs/xfs/xfs_buf_item.c408
1 files changed, 299 insertions, 109 deletions
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index d9e451115f9..3f9949fee39 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -71,7 +71,7 @@ xfs_buf_item_log_debug(
chunk_num = byte >> XFS_BLF_SHIFT;
word_num = chunk_num >> BIT_TO_WORD_SHIFT;
bit_num = chunk_num & (NBWORD - 1);
- wordp = &(bip->bli_format.blf_data_map[word_num]);
+ wordp = &(bip->__bli_format.blf_data_map[word_num]);
bit_set = *wordp & (1 << bit_num);
ASSERT(bit_set);
byte++;
@@ -153,33 +153,25 @@ STATIC void xfs_buf_do_callbacks(struct xfs_buf *bp);
* If the XFS_BLI_STALE flag has been set, then log nothing.
*/
STATIC uint
-xfs_buf_item_size(
- struct xfs_log_item *lip)
+xfs_buf_item_size_segment(
+ struct xfs_buf_log_item *bip,
+ struct xfs_buf_log_format *blfp)
{
- struct xfs_buf_log_item *bip = BUF_ITEM(lip);
struct xfs_buf *bp = bip->bli_buf;
uint nvecs;
int next_bit;
int last_bit;
- ASSERT(atomic_read(&bip->bli_refcount) > 0);
- if (bip->bli_flags & XFS_BLI_STALE) {
- /*
- * The buffer is stale, so all we need to log
- * is the buf log format structure with the
- * cancel flag in it.
- */
- trace_xfs_buf_item_size_stale(bip);
- ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL);
- return 1;
- }
+ last_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size, 0);
+ if (last_bit == -1)
+ return 0;
+
+ /*
+ * initial count for a dirty buffer is 2 vectors - the format structure
+ * and the first dirty region.
+ */
+ nvecs = 2;
- ASSERT(bip->bli_flags & XFS_BLI_LOGGED);
- nvecs = 1;
- last_bit = xfs_next_bit(bip->bli_format.blf_data_map,
- bip->bli_format.blf_map_size, 0);
- ASSERT(last_bit != -1);
- nvecs++;
while (last_bit != -1) {
/*
* This takes the bit number to start looking from and
@@ -187,16 +179,15 @@ xfs_buf_item_size(
* if there are no more bits set or the start bit is
* beyond the end of the bitmap.
*/
- next_bit = xfs_next_bit(bip->bli_format.blf_data_map,
- bip->bli_format.blf_map_size,
- last_bit + 1);
+ next_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size,
+ last_bit + 1);
/*
* If we run out of bits, leave the loop,
* else if we find a new set of bits bump the number of vecs,
* else keep scanning the current set of bits.
*/
if (next_bit == -1) {
- last_bit = -1;
+ break;
} else if (next_bit != last_bit + 1) {
last_bit = next_bit;
nvecs++;
@@ -210,22 +201,73 @@ xfs_buf_item_size(
}
}
- trace_xfs_buf_item_size(bip);
return nvecs;
}
/*
- * This is called to fill in the vector of log iovecs for the
- * given log buf item. It fills the first entry with a buf log
- * format structure, and the rest point to contiguous chunks
- * within the buffer.
+ * This returns the number of log iovecs needed to log the given buf log item.
+ *
+ * It calculates this as 1 iovec for the buf log format structure and 1 for each
+ * stretch of non-contiguous chunks to be logged. Contiguous chunks are logged
+ * in a single iovec.
+ *
+ * Discontiguous buffers need a format structure per region that that is being
+ * logged. This makes the changes in the buffer appear to log recovery as though
+ * they came from separate buffers, just like would occur if multiple buffers
+ * were used instead of a single discontiguous buffer. This enables
+ * discontiguous buffers to be in-memory constructs, completely transparent to
+ * what ends up on disk.
+ *
+ * If the XFS_BLI_STALE flag has been set, then log nothing but the buf log
+ * format structures.
*/
-STATIC void
-xfs_buf_item_format(
- struct xfs_log_item *lip,
- struct xfs_log_iovec *vecp)
+STATIC uint
+xfs_buf_item_size(
+ struct xfs_log_item *lip)
{
struct xfs_buf_log_item *bip = BUF_ITEM(lip);
+ uint nvecs;
+ int i;
+
+ ASSERT(atomic_read(&bip->bli_refcount) > 0);
+ if (bip->bli_flags & XFS_BLI_STALE) {
+ /*
+ * The buffer is stale, so all we need to log
+ * is the buf log format structure with the
+ * cancel flag in it.
+ */
+ trace_xfs_buf_item_size_stale(bip);
+ ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL);
+ return bip->bli_format_count;
+ }
+
+ ASSERT(bip->bli_flags & XFS_BLI_LOGGED);
+
+ /*
+ * the vector count is based on the number of buffer vectors we have
+ * dirty bits in. This will only be greater than one when we have a
+ * compound buffer with more than one segment dirty. Hence for compound
+ * buffers we need to track which segment the dirty bits correspond to,
+ * and when we move from one segment to the next increment the vector
+ * count for the extra buf log format structure that will need to be
+ * written.
+ */
+ nvecs = 0;
+ for (i = 0; i < bip->bli_format_count; i++) {
+ nvecs += xfs_buf_item_size_segment(bip, &bip->bli_formats[i]);
+ }
+
+ trace_xfs_buf_item_size(bip);
+ return nvecs;
+}
+
+static struct xfs_log_iovec *
+xfs_buf_item_format_segment(
+ struct xfs_buf_log_item *bip,
+ struct xfs_log_iovec *vecp,
+ uint offset,
+ struct xfs_buf_log_format *blfp)
+{
struct xfs_buf *bp = bip->bli_buf;
uint base_size;
uint nvecs;
@@ -235,40 +277,33 @@ xfs_buf_item_format(
uint nbits;
uint buffer_offset;
- ASSERT(atomic_read(&bip->bli_refcount) > 0);
- ASSERT((bip->bli_flags & XFS_BLI_LOGGED) ||
- (bip->bli_flags & XFS_BLI_STALE));
+ /* copy the flags across from the base format item */
+ blfp->blf_flags = bip->__bli_format.blf_flags;
/*
- * The size of the base structure is the size of the
- * declared structure plus the space for the extra words
- * of the bitmap. We subtract one from the map size, because
- * the first element of the bitmap is accounted for in the
- * size of the base structure.
+ * Base size is the actual size of the ondisk structure - it reflects
+ * the actual size of the dirty bitmap rather than the size of the in
+ * memory structure.
*/
- base_size =
- (uint)(sizeof(xfs_buf_log_format_t) +
- ((bip->bli_format.blf_map_size - 1) * sizeof(uint)));
- vecp->i_addr = &bip->bli_format;
+ base_size = offsetof(struct xfs_buf_log_format, blf_data_map) +
+ (blfp->blf_map_size * sizeof(blfp->blf_data_map[0]));
+
+ nvecs = 0;
+ first_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size, 0);
+ if (!(bip->bli_flags & XFS_BLI_STALE) && first_bit == -1) {
+ /*
+ * If the map is not be dirty in the transaction, mark
+ * the size as zero and do not advance the vector pointer.
+ */
+ goto out;
+ }
+
+ vecp->i_addr = blfp;
vecp->i_len = base_size;
vecp->i_type = XLOG_REG_TYPE_BFORMAT;
vecp++;
nvecs = 1;
- /*
- * If it is an inode buffer, transfer the in-memory state to the
- * format flags and clear the in-memory state. We do not transfer
- * this state if the inode buffer allocation has not yet been committed
- * to the log as setting the XFS_BLI_INODE_BUF flag will prevent
- * correct replay of the inode allocation.
- */
- if (bip->bli_flags & XFS_BLI_INODE_BUF) {
- if (!((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) &&
- xfs_log_item_in_current_chkpt(lip)))
- bip->bli_format.blf_flags |= XFS_BLF_INODE_BUF;
- bip->bli_flags &= ~XFS_BLI_INODE_BUF;
- }
-
if (bip->bli_flags & XFS_BLI_STALE) {
/*
* The buffer is stale, so all we need to log
@@ -276,17 +311,14 @@ xfs_buf_item_format(
* cancel flag in it.
*/
trace_xfs_buf_item_format_stale(bip);
- ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL);
- bip->bli_format.blf_size = nvecs;
- return;
+ ASSERT(blfp->blf_flags & XFS_BLF_CANCEL);
+ goto out;
}
/*
* Fill in an iovec for each set of contiguous chunks.
*/
- first_bit = xfs_next_bit(bip->bli_format.blf_data_map,
- bip->bli_format.blf_map_size, 0);
- ASSERT(first_bit != -1);
+
last_bit = first_bit;
nbits = 1;
for (;;) {
@@ -296,9 +328,8 @@ xfs_buf_item_format(
* if there are no more bits set or the start bit is
* beyond the end of the bitmap.
*/
- next_bit = xfs_next_bit(bip->bli_format.blf_data_map,
- bip->bli_format.blf_map_size,
- (uint)last_bit + 1);
+ next_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size,
+ (uint)last_bit + 1);
/*
* If we run out of bits fill in the last iovec and get
* out of the loop.
@@ -309,14 +340,14 @@ xfs_buf_item_format(
* keep counting and scanning.
*/
if (next_bit == -1) {
- buffer_offset = first_bit * XFS_BLF_CHUNK;
+ buffer_offset = offset + first_bit * XFS_BLF_CHUNK;
vecp->i_addr = xfs_buf_offset(bp, buffer_offset);
vecp->i_len = nbits * XFS_BLF_CHUNK;
vecp->i_type = XLOG_REG_TYPE_BCHUNK;
nvecs++;
break;
} else if (next_bit != last_bit + 1) {
- buffer_offset = first_bit * XFS_BLF_CHUNK;
+ buffer_offset = offset + first_bit * XFS_BLF_CHUNK;
vecp->i_addr = xfs_buf_offset(bp, buffer_offset);
vecp->i_len = nbits * XFS_BLF_CHUNK;
vecp->i_type = XLOG_REG_TYPE_BCHUNK;
@@ -325,14 +356,17 @@ xfs_buf_item_format(
first_bit = next_bit;
last_bit = next_bit;
nbits = 1;
- } else if (xfs_buf_offset(bp, next_bit << XFS_BLF_SHIFT) !=
- (xfs_buf_offset(bp, last_bit << XFS_BLF_SHIFT) +
+ } else if (xfs_buf_offset(bp, offset +
+ (next_bit << XFS_BLF_SHIFT)) !=
+ (xfs_buf_offset(bp, offset +
+ (last_bit << XFS_BLF_SHIFT)) +
XFS_BLF_CHUNK)) {
- buffer_offset = first_bit * XFS_BLF_CHUNK;
+ buffer_offset = offset + first_bit * XFS_BLF_CHUNK;
vecp->i_addr = xfs_buf_offset(bp, buffer_offset);
vecp->i_len = nbits * XFS_BLF_CHUNK;
vecp->i_type = XLOG_REG_TYPE_BCHUNK;
-/* You would think we need to bump the nvecs here too, but we do not
+/*
+ * You would think we need to bump the nvecs here too, but we do not
* this number is used by recovery, and it gets confused by the boundary
* split here
* nvecs++;
@@ -346,7 +380,50 @@ xfs_buf_item_format(
nbits++;
}
}
- bip->bli_format.blf_size = nvecs;
+out:
+ blfp->blf_size = nvecs;
+ return vecp;
+}
+
+/*
+ * This is called to fill in the vector of log iovecs for the
+ * given log buf item. It fills the first entry with a buf log
+ * format structure, and the rest point to contiguous chunks
+ * within the buffer.
+ */
+STATIC void
+xfs_buf_item_format(
+ struct xfs_log_item *lip,
+ struct xfs_log_iovec *vecp)
+{
+ struct xfs_buf_log_item *bip = BUF_ITEM(lip);
+ struct xfs_buf *bp = bip->bli_buf;
+ uint offset = 0;
+ int i;
+
+ ASSERT(atomic_read(&bip->bli_refcount) > 0);
+ ASSERT((bip->bli_flags & XFS_BLI_LOGGED) ||
+ (bip->bli_flags & XFS_BLI_STALE));
+
+ /*
+ * If it is an inode buffer, transfer the in-memory state to the
+ * format flags and clear the in-memory state. We do not transfer
+ * this state if the inode buffer allocation has not yet been committed
+ * to the log as setting the XFS_BLI_INODE_BUF flag will prevent
+ * correct replay of the inode allocation.
+ */
+ if (bip->bli_flags & XFS_BLI_INODE_BUF) {
+ if (!((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) &&
+ xfs_log_item_in_current_chkpt(lip)))
+ bip->__bli_format.blf_flags |= XFS_BLF_INODE_BUF;
+ bip->bli_flags &= ~XFS_BLI_INODE_BUF;
+ }
+
+ for (i = 0; i < bip->bli_format_count; i++) {
+ vecp = xfs_buf_item_format_segment(bip, vecp, offset,
+ &bip->bli_formats[i]);
+ offset += bp->b_maps[i].bm_len;
+ }
/*
* Check to make sure everything is consistent.
@@ -418,7 +495,7 @@ xfs_buf_item_unpin(
ASSERT(bip->bli_flags & XFS_BLI_STALE);
ASSERT(xfs_buf_islocked(bp));
ASSERT(XFS_BUF_ISSTALE(bp));
- ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL);
+ ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL);
trace_xfs_buf_item_unpin_stale(bip);
@@ -459,7 +536,25 @@ xfs_buf_item_unpin(
}
xfs_buf_relse(bp);
} else if (freed && remove) {
+ /*
+ * There are currently two references to the buffer - the active
+ * LRU reference and the buf log item. What we are about to do
+ * here - simulate a failed IO completion - requires 3
+ * references.
+ *
+ * The LRU reference is removed by the xfs_buf_stale() call. The
+ * buf item reference is removed by the xfs_buf_iodone()
+ * callback that is run by xfs_buf_do_callbacks() during ioend
+ * processing (via the bp->b_iodone callback), and then finally
+ * the ioend processing will drop the IO reference if the buffer
+ * is marked XBF_ASYNC.
+ *
+ * Hence we need to take an additional reference here so that IO
+ * completion processing doesn't free the buffer prematurely.
+ */
xfs_buf_lock(bp);
+ xfs_buf_hold(bp);
+ bp->b_flags |= XBF_ASYNC;
xfs_buf_ioerror(bp, EIO);
XFS_BUF_UNDONE(bp);
xfs_buf_stale(bp);
@@ -516,7 +611,7 @@ xfs_buf_item_unlock(
{
struct xfs_buf_log_item *bip = BUF_ITEM(lip);
struct xfs_buf *bp = bip->bli_buf;
- int aborted;
+ int aborted, clean, i;
uint hold;
/* Clear the buffer's association with this transaction. */
@@ -546,7 +641,7 @@ xfs_buf_item_unlock(
*/
if (bip->bli_flags & XFS_BLI_STALE) {
trace_xfs_buf_item_unlock_stale(bip);
- ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL);
+ ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL);
if (!aborted) {
atomic_dec(&bip->bli_refcount);
return;
@@ -557,12 +652,27 @@ xfs_buf_item_unlock(
/*
* If the buf item isn't tracking any data, free it, otherwise drop the
- * reference we hold to it.
+ * reference we hold to it. If we are aborting the transaction, this may
+ * be the only reference to the buf item, so we free it anyway
+ * regardless of whether it is dirty or not. A dirty abort implies a
+ * shutdown, anyway.
*/
- if (xfs_bitmap_empty(bip->bli_format.blf_data_map,
- bip->bli_format.blf_map_size))
+ clean = 1;
+ for (i = 0; i < bip->bli_format_count; i++) {
+ if (!xfs_bitmap_empty(bip->bli_formats[i].blf_data_map,
+ bip->bli_formats[i].blf_map_size)) {
+ clean = 0;
+ break;
+ }
+ }
+ if (clean)
xfs_buf_item_relse(bp);
- else
+ else if (aborted) {
+ if (atomic_dec_and_test(&bip->bli_refcount)) {
+ ASSERT(XFS_FORCED_SHUTDOWN(lip->li_mountp));
+ xfs_buf_item_relse(bp);
+ }
+ } else
atomic_dec(&bip->bli_refcount);
if (!hold)
@@ -622,6 +732,35 @@ static const struct xfs_item_ops xfs_buf_item_ops = {
.iop_committing = xfs_buf_item_committing
};
+STATIC int
+xfs_buf_item_get_format(
+ struct xfs_buf_log_item *bip,
+ int count)
+{
+ ASSERT(bip->bli_formats == NULL);
+ bip->bli_format_count = count;
+
+ if (count == 1) {
+ bip->bli_formats = &bip->__bli_format;
+ return 0;
+ }
+
+ bip->bli_formats = kmem_zalloc(count * sizeof(struct xfs_buf_log_format),
+ KM_SLEEP);
+ if (!bip->bli_formats)
+ return ENOMEM;
+ return 0;
+}
+
+STATIC void
+xfs_buf_item_free_format(
+ struct xfs_buf_log_item *bip)
+{
+ if (bip->bli_formats != &bip->__bli_format) {
+ kmem_free(bip->bli_formats);
+ bip->bli_formats = NULL;
+ }
+}
/*
* Allocate a new buf log item to go with the given buffer.
@@ -639,6 +778,8 @@ xfs_buf_item_init(
xfs_buf_log_item_t *bip;
int chunks;
int map_size;
+ int error;
+ int i;
/*
* Check to see if there is already a buf log item for
@@ -650,25 +791,33 @@ xfs_buf_item_init(
if (lip != NULL && lip->li_type == XFS_LI_BUF)
return;
- /*
- * chunks is the number of XFS_BLF_CHUNK size pieces
- * the buffer can be divided into. Make sure not to
- * truncate any pieces. map_size is the size of the
- * bitmap needed to describe the chunks of the buffer.
- */
- chunks = (int)((BBTOB(bp->b_length) + (XFS_BLF_CHUNK - 1)) >>
- XFS_BLF_SHIFT);
- map_size = (int)((chunks + NBWORD) >> BIT_TO_WORD_SHIFT);
-
- bip = (xfs_buf_log_item_t*)kmem_zone_zalloc(xfs_buf_item_zone,
- KM_SLEEP);
+ bip = kmem_zone_zalloc(xfs_buf_item_zone, KM_SLEEP);
xfs_log_item_init(mp, &bip->bli_item, XFS_LI_BUF, &xfs_buf_item_ops);
bip->bli_buf = bp;
xfs_buf_hold(bp);
- bip->bli_format.blf_type = XFS_LI_BUF;
- bip->bli_format.blf_blkno = (__int64_t)XFS_BUF_ADDR(bp);
- bip->bli_format.blf_len = (ushort)bp->b_length;
- bip->bli_format.blf_map_size = map_size;
+
+ /*
+ * chunks is the number of XFS_BLF_CHUNK size pieces the buffer
+ * can be divided into. Make sure not to truncate any pieces.
+ * map_size is the size of the bitmap needed to describe the
+ * chunks of the buffer.
+ *
+ * Discontiguous buffer support follows the layout of the underlying
+ * buffer. This makes the implementation as simple as possible.
+ */
+ error = xfs_buf_item_get_format(bip, bp->b_map_count);
+ ASSERT(error == 0);
+
+ for (i = 0; i < bip->bli_format_count; i++) {
+ chunks = DIV_ROUND_UP(BBTOB(bp->b_maps[i].bm_len),
+ XFS_BLF_CHUNK);
+ map_size = DIV_ROUND_UP(chunks, NBWORD);
+
+ bip->bli_formats[i].blf_type = XFS_LI_BUF;
+ bip->bli_formats[i].blf_blkno = bp->b_maps[i].bm_bn;
+ bip->bli_formats[i].blf_len = bp->b_maps[i].bm_len;
+ bip->bli_formats[i].blf_map_size = map_size;
+ }
#ifdef XFS_TRANS_DEBUG
/*
@@ -699,10 +848,11 @@ xfs_buf_item_init(
* item's bitmap.
*/
void
-xfs_buf_item_log(
- xfs_buf_log_item_t *bip,
+xfs_buf_item_log_segment(
+ struct xfs_buf_log_item *bip,
uint first,
- uint last)
+ uint last,
+ uint *map)
{
uint first_bit;
uint last_bit;
@@ -715,12 +865,6 @@ xfs_buf_item_log(
uint mask;
/*
- * Mark the item as having some dirty data for
- * quick reference in xfs_buf_item_dirty.
- */
- bip->bli_flags |= XFS_BLI_DIRTY;
-
- /*
* Convert byte offsets to bit numbers.
*/
first_bit = first >> XFS_BLF_SHIFT;
@@ -736,7 +880,7 @@ xfs_buf_item_log(
* to set a bit in.
*/
word_num = first_bit >> BIT_TO_WORD_SHIFT;
- wordp = &(bip->bli_format.blf_data_map[word_num]);
+ wordp = &map[word_num];
/*
* Calculate the starting bit in the first word.
@@ -783,6 +927,51 @@ xfs_buf_item_log(
xfs_buf_item_log_debug(bip, first, last);
}
+/*
+ * Mark bytes first through last inclusive as dirty in the buf
+ * item's bitmap.
+ */
+void
+xfs_buf_item_log(
+ xfs_buf_log_item_t *bip,
+ uint first,
+ uint last)
+{
+ int i;
+ uint start;
+ uint end;
+ struct xfs_buf *bp = bip->bli_buf;
+
+ /*
+ * Mark the item as having some dirty data for
+ * quick reference in xfs_buf_item_dirty.
+ */
+ bip->bli_flags |= XFS_BLI_DIRTY;
+
+ /*
+ * walk each buffer segment and mark them dirty appropriately.
+ */
+ start = 0;
+ for (i = 0; i < bip->bli_format_count; i++) {
+ if (start > last)
+ break;
+ end = start + BBTOB(bp->b_maps[i].bm_len);
+ if (first > end) {
+ start += BBTOB(bp->b_maps[i].bm_len);
+ continue;
+ }
+ if (first < start)
+ first = start;
+ if (end > last)
+ end = last;
+
+ xfs_buf_item_log_segment(bip, first, end,
+ &bip->bli_formats[i].blf_data_map[0]);
+
+ start += bp->b_maps[i].bm_len;
+ }
+}
+
/*
* Return 1 if the buffer has some data that has been logged (at any
@@ -804,6 +993,7 @@ xfs_buf_item_free(
kmem_free(bip->bli_logged);
#endif /* XFS_TRANS_DEBUG */
+ xfs_buf_item_free_format(bip);
kmem_zone_free(xfs_buf_item_zone, bip);
}