aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_buf.c
diff options
context:
space:
mode:
authorPatrick McHardy <kaber@trash.net>2013-03-31 18:10:34 +0200
committerPatrick McHardy <kaber@trash.net>2013-03-31 18:10:34 +0200
commit70711d223510ba1773cfe1d7770a56141c815ff8 (patch)
tree4a71f38a3a554ddecaa31b7d8c6bc49b7d1705b4 /fs/xfs/xfs_buf.c
parentd53b4ed072d9779cdf53582c46436dec06d0961f (diff)
parent19f949f52599ba7c3f67a5897ac6be14bfcb1200 (diff)
Merge tag 'v3.8' of /home/kaber/src/repos/linux
Linux 3.8 Signed-off-by: Patrick McHardy <kaber@trash.net> Conflicts: include/linux/Kbuild include/linux/netlink.h
Diffstat (limited to 'fs/xfs/xfs_buf.c')
-rw-r--r--fs/xfs/xfs_buf.c336
1 files changed, 254 insertions, 82 deletions
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 269b35c084d..fbbb9eb92e3 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -96,6 +96,7 @@ xfs_buf_lru_add(
atomic_inc(&bp->b_hold);
list_add_tail(&bp->b_lru, &btp->bt_lru);
btp->bt_lru_nr++;
+ bp->b_lru_flags &= ~_XBF_LRU_DISPOSE;
}
spin_unlock(&btp->bt_lru_lock);
}
@@ -154,7 +155,8 @@ xfs_buf_stale(
struct xfs_buftarg *btp = bp->b_target;
spin_lock(&btp->bt_lru_lock);
- if (!list_empty(&bp->b_lru)) {
+ if (!list_empty(&bp->b_lru) &&
+ !(bp->b_lru_flags & _XBF_LRU_DISPOSE)) {
list_del_init(&bp->b_lru);
btp->bt_lru_nr--;
atomic_dec(&bp->b_hold);
@@ -164,14 +166,49 @@ xfs_buf_stale(
ASSERT(atomic_read(&bp->b_hold) >= 1);
}
+static int
+xfs_buf_get_maps(
+ struct xfs_buf *bp,
+ int map_count)
+{
+ ASSERT(bp->b_maps == NULL);
+ bp->b_map_count = map_count;
+
+ if (map_count == 1) {
+ bp->b_maps = &bp->__b_map;
+ return 0;
+ }
+
+ bp->b_maps = kmem_zalloc(map_count * sizeof(struct xfs_buf_map),
+ KM_NOFS);
+ if (!bp->b_maps)
+ return ENOMEM;
+ return 0;
+}
+
+/*
+ * Frees b_pages if it was allocated.
+ */
+static void
+xfs_buf_free_maps(
+ struct xfs_buf *bp)
+{
+ if (bp->b_maps != &bp->__b_map) {
+ kmem_free(bp->b_maps);
+ bp->b_maps = NULL;
+ }
+}
+
struct xfs_buf *
-xfs_buf_alloc(
+_xfs_buf_alloc(
struct xfs_buftarg *target,
- xfs_daddr_t blkno,
- size_t numblks,
+ struct xfs_buf_map *map,
+ int nmaps,
xfs_buf_flags_t flags)
{
struct xfs_buf *bp;
+ int error;
+ int i;
bp = kmem_zone_zalloc(xfs_buf_zone, KM_NOFS);
if (unlikely(!bp))
@@ -192,16 +229,28 @@ xfs_buf_alloc(
sema_init(&bp->b_sema, 0); /* held, no waiters */
XB_SET_OWNER(bp);
bp->b_target = target;
+ bp->b_flags = flags;
/*
* Set length and io_length to the same value initially.
* I/O routines should use io_length, which will be the same in
* most cases but may be reset (e.g. XFS recovery).
*/
- bp->b_length = numblks;
- bp->b_io_length = numblks;
- bp->b_flags = flags;
- bp->b_bn = blkno;
+ error = xfs_buf_get_maps(bp, nmaps);
+ if (error) {
+ kmem_zone_free(xfs_buf_zone, bp);
+ return NULL;
+ }
+
+ bp->b_bn = map[0].bm_bn;
+ bp->b_length = 0;
+ for (i = 0; i < nmaps; i++) {
+ bp->b_maps[i].bm_bn = map[i].bm_bn;
+ bp->b_maps[i].bm_len = map[i].bm_len;
+ bp->b_length += map[i].bm_len;
+ }
+ bp->b_io_length = bp->b_length;
+
atomic_set(&bp->b_pin_count, 0);
init_waitqueue_head(&bp->b_waiters);
@@ -280,6 +329,7 @@ xfs_buf_free(
} else if (bp->b_flags & _XBF_KMEM)
kmem_free(bp->b_addr);
_xfs_buf_free_pages(bp);
+ xfs_buf_free_maps(bp);
kmem_zone_free(xfs_buf_zone, bp);
}
@@ -327,8 +377,9 @@ xfs_buf_allocate_memory(
}
use_alloc_page:
- start = BBTOB(bp->b_bn) >> PAGE_SHIFT;
- end = (BBTOB(bp->b_bn + bp->b_length) + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ start = BBTOB(bp->b_maps[0].bm_bn) >> PAGE_SHIFT;
+ end = (BBTOB(bp->b_maps[0].bm_bn + bp->b_length) + PAGE_SIZE - 1)
+ >> PAGE_SHIFT;
page_count = end - start;
error = _xfs_buf_get_pages(bp, page_count, flags);
if (unlikely(error))
@@ -425,8 +476,8 @@ _xfs_buf_map_pages(
xfs_buf_t *
_xfs_buf_find(
struct xfs_buftarg *btp,
- xfs_daddr_t blkno,
- size_t numblks,
+ struct xfs_buf_map *map,
+ int nmaps,
xfs_buf_flags_t flags,
xfs_buf_t *new_bp)
{
@@ -435,13 +486,36 @@ _xfs_buf_find(
struct rb_node **rbp;
struct rb_node *parent;
xfs_buf_t *bp;
+ xfs_daddr_t blkno = map[0].bm_bn;
+ xfs_daddr_t eofs;
+ int numblks = 0;
+ int i;
+ for (i = 0; i < nmaps; i++)
+ numblks += map[i].bm_len;
numbytes = BBTOB(numblks);
/* Check for IOs smaller than the sector size / not sector aligned */
ASSERT(!(numbytes < (1 << btp->bt_sshift)));
ASSERT(!(BBTOB(blkno) & (xfs_off_t)btp->bt_smask));
+ /*
+ * Corrupted block numbers can get through to here, unfortunately, so we
+ * have to check that the buffer falls within the filesystem bounds.
+ */
+ eofs = XFS_FSB_TO_BB(btp->bt_mount, btp->bt_mount->m_sb.sb_dblocks);
+ if (blkno >= eofs) {
+ /*
+ * XXX (dgc): we should really be returning EFSCORRUPTED here,
+ * but none of the higher level infrastructure supports
+ * returning a specific error on buffer lookup failures.
+ */
+ xfs_alert(btp->bt_mount,
+ "%s: Block out of range: block 0x%llx, EOFS 0x%llx ",
+ __func__, blkno, eofs);
+ return NULL;
+ }
+
/* get tree root */
pag = xfs_perag_get(btp->bt_mount,
xfs_daddr_to_agno(btp->bt_mount, blkno));
@@ -513,7 +587,9 @@ found:
*/
if (bp->b_flags & XBF_STALE) {
ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0);
+ ASSERT(bp->b_iodone == NULL);
bp->b_flags &= _XBF_KMEM | _XBF_PAGES;
+ bp->b_ops = NULL;
}
trace_xfs_buf_find(bp, flags, _RET_IP_);
@@ -527,31 +603,31 @@ found:
* more hits than misses.
*/
struct xfs_buf *
-xfs_buf_get(
- xfs_buftarg_t *target,
- xfs_daddr_t blkno,
- size_t numblks,
+xfs_buf_get_map(
+ struct xfs_buftarg *target,
+ struct xfs_buf_map *map,
+ int nmaps,
xfs_buf_flags_t flags)
{
struct xfs_buf *bp;
struct xfs_buf *new_bp;
int error = 0;
- bp = _xfs_buf_find(target, blkno, numblks, flags, NULL);
+ bp = _xfs_buf_find(target, map, nmaps, flags, NULL);
if (likely(bp))
goto found;
- new_bp = xfs_buf_alloc(target, blkno, numblks, flags);
+ new_bp = _xfs_buf_alloc(target, map, nmaps, flags);
if (unlikely(!new_bp))
return NULL;
error = xfs_buf_allocate_memory(new_bp, flags);
if (error) {
- kmem_zone_free(xfs_buf_zone, new_bp);
+ xfs_buf_free(new_bp);
return NULL;
}
- bp = _xfs_buf_find(target, blkno, numblks, flags, new_bp);
+ bp = _xfs_buf_find(target, map, nmaps, flags, new_bp);
if (!bp) {
xfs_buf_free(new_bp);
return NULL;
@@ -560,8 +636,6 @@ xfs_buf_get(
if (bp != new_bp)
xfs_buf_free(new_bp);
- bp->b_io_length = bp->b_length;
-
found:
if (!bp->b_addr) {
error = _xfs_buf_map_pages(bp, flags);
@@ -584,7 +658,7 @@ _xfs_buf_read(
xfs_buf_flags_t flags)
{
ASSERT(!(flags & XBF_WRITE));
- ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL);
+ ASSERT(bp->b_maps[0].bm_bn != XFS_BUF_DADDR_NULL);
bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD);
bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD);
@@ -596,22 +670,24 @@ _xfs_buf_read(
}
xfs_buf_t *
-xfs_buf_read(
- xfs_buftarg_t *target,
- xfs_daddr_t blkno,
- size_t numblks,
- xfs_buf_flags_t flags)
+xfs_buf_read_map(
+ struct xfs_buftarg *target,
+ struct xfs_buf_map *map,
+ int nmaps,
+ xfs_buf_flags_t flags,
+ const struct xfs_buf_ops *ops)
{
- xfs_buf_t *bp;
+ struct xfs_buf *bp;
flags |= XBF_READ;
- bp = xfs_buf_get(target, blkno, numblks, flags);
+ bp = xfs_buf_get_map(target, map, nmaps, flags);
if (bp) {
trace_xfs_buf_read(bp, flags, _RET_IP_);
if (!XFS_BUF_ISDONE(bp)) {
XFS_STATS_INC(xb_get_read);
+ bp->b_ops = ops;
_xfs_buf_read(bp, flags);
} else if (flags & XBF_ASYNC) {
/*
@@ -634,16 +710,17 @@ xfs_buf_read(
* safe manner.
*/
void
-xfs_buf_readahead(
- xfs_buftarg_t *target,
- xfs_daddr_t blkno,
- size_t numblks)
+xfs_buf_readahead_map(
+ struct xfs_buftarg *target,
+ struct xfs_buf_map *map,
+ int nmaps,
+ const struct xfs_buf_ops *ops)
{
if (bdi_read_congested(target->bt_bdi))
return;
- xfs_buf_read(target, blkno, numblks,
- XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD);
+ xfs_buf_read_map(target, map, nmaps,
+ XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD, ops);
}
/*
@@ -655,25 +732,24 @@ xfs_buf_read_uncached(
struct xfs_buftarg *target,
xfs_daddr_t daddr,
size_t numblks,
- int flags)
+ int flags,
+ const struct xfs_buf_ops *ops)
{
- xfs_buf_t *bp;
- int error;
+ struct xfs_buf *bp;
bp = xfs_buf_get_uncached(target, numblks, flags);
if (!bp)
return NULL;
/* set up the buffer for a read IO */
- XFS_BUF_SET_ADDR(bp, daddr);
- XFS_BUF_READ(bp);
+ ASSERT(bp->b_map_count == 1);
+ bp->b_bn = daddr;
+ bp->b_maps[0].bm_bn = daddr;
+ bp->b_flags |= XBF_READ;
+ bp->b_ops = ops;
xfsbdstrat(target->bt_mount, bp);
- error = xfs_buf_iowait(bp);
- if (error) {
- xfs_buf_relse(bp);
- return NULL;
- }
+ xfs_buf_iowait(bp);
return bp;
}
@@ -694,7 +770,11 @@ xfs_buf_set_empty(
bp->b_addr = NULL;
bp->b_length = numblks;
bp->b_io_length = numblks;
+
+ ASSERT(bp->b_map_count == 1);
bp->b_bn = XFS_BUF_DADDR_NULL;
+ bp->b_maps[0].bm_bn = XFS_BUF_DADDR_NULL;
+ bp->b_maps[0].bm_len = bp->b_length;
}
static inline struct page *
@@ -758,9 +838,10 @@ xfs_buf_get_uncached(
{
unsigned long page_count;
int error, i;
- xfs_buf_t *bp;
+ struct xfs_buf *bp;
+ DEFINE_SINGLE_BUF_MAP(map, XFS_BUF_DADDR_NULL, numblks);
- bp = xfs_buf_alloc(target, XFS_BUF_DADDR_NULL, numblks, 0);
+ bp = _xfs_buf_alloc(target, &map, 1, 0);
if (unlikely(bp == NULL))
goto fail;
@@ -791,6 +872,7 @@ xfs_buf_get_uncached(
__free_page(bp->b_pages[i]);
_xfs_buf_free_pages(bp);
fail_free_buf:
+ xfs_buf_free_maps(bp);
kmem_zone_free(xfs_buf_zone, bp);
fail:
return NULL;
@@ -937,27 +1019,37 @@ STATIC void
xfs_buf_iodone_work(
struct work_struct *work)
{
- xfs_buf_t *bp =
+ struct xfs_buf *bp =
container_of(work, xfs_buf_t, b_iodone_work);
+ bool read = !!(bp->b_flags & XBF_READ);
+
+ bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD);
+ if (read && bp->b_ops)
+ bp->b_ops->verify_read(bp);
if (bp->b_iodone)
(*(bp->b_iodone))(bp);
else if (bp->b_flags & XBF_ASYNC)
xfs_buf_relse(bp);
+ else {
+ ASSERT(read && bp->b_ops);
+ complete(&bp->b_iowait);
+ }
}
void
xfs_buf_ioend(
- xfs_buf_t *bp,
- int schedule)
+ struct xfs_buf *bp,
+ int schedule)
{
+ bool read = !!(bp->b_flags & XBF_READ);
+
trace_xfs_buf_iodone(bp, _RET_IP_);
- bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD);
if (bp->b_error == 0)
bp->b_flags |= XBF_DONE;
- if ((bp->b_iodone) || (bp->b_flags & XBF_ASYNC)) {
+ if (bp->b_iodone || (read && bp->b_ops) || (bp->b_flags & XBF_ASYNC)) {
if (schedule) {
INIT_WORK(&bp->b_iodone_work, xfs_buf_iodone_work);
queue_work(xfslogd_workqueue, &bp->b_iodone_work);
@@ -965,6 +1057,7 @@ xfs_buf_ioend(
xfs_buf_iodone_work(&bp->b_iodone_work);
}
} else {
+ bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD);
complete(&bp->b_iowait);
}
}
@@ -1135,45 +1228,53 @@ xfs_buf_bio_end_io(
{
xfs_buf_t *bp = (xfs_buf_t *)bio->bi_private;
- xfs_buf_ioerror(bp, -error);
+ /*
+ * don't overwrite existing errors - otherwise we can lose errors on
+ * buffers that require multiple bios to complete.
+ */
+ if (!bp->b_error)
+ xfs_buf_ioerror(bp, -error);
- if (!error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
+ if (!bp->b_error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp));
_xfs_buf_ioend(bp, 1);
bio_put(bio);
}
-STATIC void
-_xfs_buf_ioapply(
- xfs_buf_t *bp)
+static void
+xfs_buf_ioapply_map(
+ struct xfs_buf *bp,
+ int map,
+ int *buf_offset,
+ int *count,
+ int rw)
{
- int rw, map_i, total_nr_pages, nr_pages;
- struct bio *bio;
- int offset = bp->b_offset;
- int size = BBTOB(bp->b_io_length);
- sector_t sector = bp->b_bn;
+ int page_index;
+ int total_nr_pages = bp->b_page_count;
+ int nr_pages;
+ struct bio *bio;
+ sector_t sector = bp->b_maps[map].bm_bn;
+ int size;
+ int offset;
total_nr_pages = bp->b_page_count;
- map_i = 0;
- if (bp->b_flags & XBF_WRITE) {
- if (bp->b_flags & XBF_SYNCIO)
- rw = WRITE_SYNC;
- else
- rw = WRITE;
- if (bp->b_flags & XBF_FUA)
- rw |= REQ_FUA;
- if (bp->b_flags & XBF_FLUSH)
- rw |= REQ_FLUSH;
- } else if (bp->b_flags & XBF_READ_AHEAD) {
- rw = READA;
- } else {
- rw = READ;
+ /* skip the pages in the buffer before the start offset */
+ page_index = 0;
+ offset = *buf_offset;
+ while (offset >= PAGE_SIZE) {
+ page_index++;
+ offset -= PAGE_SIZE;
}
- /* we only use the buffer cache for meta-data */
- rw |= REQ_META;
+ /*
+ * Limit the IO size to the length of the current vector, and update the
+ * remaining IO count for the next time around.
+ */
+ size = min_t(int, BBTOB(bp->b_maps[map].bm_len), *count);
+ *count -= size;
+ *buf_offset += size;
next_chunk:
atomic_inc(&bp->b_io_remaining);
@@ -1188,13 +1289,14 @@ next_chunk:
bio->bi_private = bp;
- for (; size && nr_pages; nr_pages--, map_i++) {
+ for (; size && nr_pages; nr_pages--, page_index++) {
int rbytes, nbytes = PAGE_SIZE - offset;
if (nbytes > size)
nbytes = size;
- rbytes = bio_add_page(bio, bp->b_pages[map_i], nbytes, offset);
+ rbytes = bio_add_page(bio, bp->b_pages[page_index], nbytes,
+ offset);
if (rbytes < nbytes)
break;
@@ -1213,9 +1315,76 @@ next_chunk:
if (size)
goto next_chunk;
} else {
+ /*
+ * This is guaranteed not to be the last io reference count
+ * because the caller (xfs_buf_iorequest) holds a count itself.
+ */
+ atomic_dec(&bp->b_io_remaining);
xfs_buf_ioerror(bp, EIO);
bio_put(bio);
}
+
+}
+
+STATIC void
+_xfs_buf_ioapply(
+ struct xfs_buf *bp)
+{
+ struct blk_plug plug;
+ int rw;
+ int offset;
+ int size;
+ int i;
+
+ if (bp->b_flags & XBF_WRITE) {
+ if (bp->b_flags & XBF_SYNCIO)
+ rw = WRITE_SYNC;
+ else
+ rw = WRITE;
+ if (bp->b_flags & XBF_FUA)
+ rw |= REQ_FUA;
+ if (bp->b_flags & XBF_FLUSH)
+ rw |= REQ_FLUSH;
+
+ /*
+ * Run the write verifier callback function if it exists. If
+ * this function fails it will mark the buffer with an error and
+ * the IO should not be dispatched.
+ */
+ if (bp->b_ops) {
+ bp->b_ops->verify_write(bp);
+ if (bp->b_error) {
+ xfs_force_shutdown(bp->b_target->bt_mount,
+ SHUTDOWN_CORRUPT_INCORE);
+ return;
+ }
+ }
+ } else if (bp->b_flags & XBF_READ_AHEAD) {
+ rw = READA;
+ } else {
+ rw = READ;
+ }
+
+ /* we only use the buffer cache for meta-data */
+ rw |= REQ_META;
+
+ /*
+ * Walk all the vectors issuing IO on them. Set up the initial offset
+ * into the buffer and the desired IO size before we start -
+ * _xfs_buf_ioapply_vec() will modify them appropriately for each
+ * subsequent call.
+ */
+ offset = bp->b_offset;
+ size = BBTOB(bp->b_io_length);
+ blk_start_plug(&plug);
+ for (i = 0; i < bp->b_map_count; i++) {
+ xfs_buf_ioapply_map(bp, i, &offset, &size, rw);
+ if (bp->b_error)
+ break;
+ if (size <= 0)
+ break; /* all done */
+ }
+ blk_finish_plug(&plug);
}
void
@@ -1336,6 +1505,8 @@ restart:
while (!list_empty(&btp->bt_lru)) {
bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru);
if (atomic_read(&bp->b_hold) > 1) {
+ trace_xfs_buf_wait_buftarg(bp, _RET_IP_);
+ list_move_tail(&bp->b_lru, &btp->bt_lru);
spin_unlock(&btp->bt_lru_lock);
delay(100);
goto restart;
@@ -1389,6 +1560,7 @@ xfs_buftarg_shrink(
*/
list_move(&bp->b_lru, &dispose);
btp->bt_lru_nr--;
+ bp->b_lru_flags |= _XBF_LRU_DISPOSE;
}
spin_unlock(&btp->bt_lru_lock);
@@ -1557,7 +1729,7 @@ xfs_buf_cmp(
struct xfs_buf *bp = container_of(b, struct xfs_buf, b_list);
xfs_daddr_t diff;
- diff = ap->b_bn - bp->b_bn;
+ diff = ap->b_maps[0].bm_bn - bp->b_maps[0].bm_bn;
if (diff < 0)
return -1;
if (diff > 0)