diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index aa473fa640a..daf4066c24b 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -728,16 +728,17 @@ xfs_file_buffered_aio_write( write_retry: trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, 0); ret = generic_file_buffered_write(iocb, iovp, nr_segs, - pos, &iocb->ki_pos, count, ret); + pos, &iocb->ki_pos, count, 0); + /* - * if we just got an ENOSPC, flush the inode now we aren't holding any - * page locks and retry *once* + * If we just got an ENOSPC, try to write back all dirty inodes to + * convert delalloc space to free up some of the excess reserved + * metadata space. */ if (ret == -ENOSPC && !enospc) { enospc = 1; - ret = -xfs_flush_pages(ip, 0, -1, 0, FI_NONE); - if (!ret) - goto write_retry; + xfs_flush_inodes(ip->i_mount); + goto write_retry; } current->backing_dev_info = NULL; diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 973dff6ad93..f858b903678 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -373,7 +373,7 @@ xfs_iomap_write_delay( xfs_extlen_t extsz; int nimaps; xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS]; - int prealloc, flushed = 0; + int prealloc; int error; ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); @@ -434,26 +434,17 @@ retry: } /* - * If bmapi returned us nothing, we got either ENOSPC or EDQUOT. For - * ENOSPC, * flush all other inodes with delalloc blocks to free up - * some of the excess reserved metadata space. For both cases, retry + * If bmapi returned us nothing, we got either ENOSPC or EDQUOT. Retry * without EOF preallocation. */ if (nimaps == 0) { trace_xfs_delalloc_enospc(ip, offset, count); - if (flushed) - return XFS_ERROR(error ? error : ENOSPC); - - if (error == ENOSPC) { - xfs_iunlock(ip, XFS_ILOCK_EXCL); - xfs_flush_inodes(ip); - xfs_ilock(ip, XFS_ILOCK_EXCL); + if (prealloc) { + prealloc = 0; + error = 0; + goto retry; } - - flushed = 1; - error = 0; - prealloc = 0; - goto retry; + return XFS_ERROR(error ? error : ENOSPC); } if (!(imap[0].br_startblock || XFS_IS_REALTIME_INODE(ip))) diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 26e46aeaa3f..a54b5aa498d 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -198,7 +198,6 @@ typedef struct xfs_mount { #endif struct xfs_mru_cache *m_filestream; /* per-mount filestream data */ struct delayed_work m_reclaim_work; /* background inode reclaim */ - struct work_struct m_flush_work; /* background inode flush */ __int64_t m_update_flags; /* sb flags we need to update on the next remount,rw */ struct shrinker m_inode_shrink; /* inode reclaim shrinker */ diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 37c39a155a5..9468c687846 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -882,6 +882,24 @@ xfs_destroy_mount_workqueues( destroy_workqueue(mp->m_unwritten_workqueue); } +/* + * Flush all dirty data to disk. Must not be called while holding an XFS_ILOCK + * or a page lock. We use sync_inodes_sb() here to ensure we block while waiting + * for IO to complete so that we effectively throttle multiple callers to the + * rate at which IO is completing. + */ +void +xfs_flush_inodes( + struct xfs_mount *mp) +{ + struct super_block *sb = mp->m_super; + + if (down_read_trylock(&sb->s_umount)) { + sync_inodes_sb(sb); + up_read(&sb->s_umount); + } +} + /* Catch misguided souls that try to use this interface on XFS */ STATIC struct inode * xfs_fs_alloc_inode( @@ -1005,8 +1023,6 @@ xfs_fs_put_super( { struct xfs_mount *mp = XFS_M(sb); - cancel_work_sync(&mp->m_flush_work); - xfs_filestream_unmount(mp); xfs_unmountfs(mp); @@ -1324,7 +1340,6 @@ xfs_fs_fill_super( spin_lock_init(&mp->m_sb_lock); mutex_init(&mp->m_growlock); atomic_set(&mp->m_active_trans, 0); - INIT_WORK(&mp->m_flush_work, xfs_flush_worker); INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker); mp->m_super = sb; diff --git a/fs/xfs/xfs_super.h b/fs/xfs/xfs_super.h index 9de4a920ba0..bbe3d15a790 100644 --- a/fs/xfs/xfs_super.h +++ b/fs/xfs/xfs_super.h @@ -74,6 +74,7 @@ struct block_device; extern __uint64_t xfs_max_file_offset(unsigned int); +extern void xfs_flush_inodes(struct xfs_mount *mp); extern void xfs_blkdev_issue_flush(struct xfs_buftarg *); extern xfs_agnumber_t xfs_set_inode32(struct xfs_mount *); extern xfs_agnumber_t xfs_set_inode64(struct xfs_mount *); diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c index 2174555aebb..6a2ada37916 100644 --- a/fs/xfs/xfs_sync.c +++ b/fs/xfs/xfs_sync.c @@ -216,51 +216,6 @@ xfs_inode_ag_iterator( return XFS_ERROR(last_error); } -STATIC int -xfs_sync_inode_data( - struct xfs_inode *ip, - struct xfs_perag *pag, - int flags) -{ - struct inode *inode = VFS_I(ip); - struct address_space *mapping = inode->i_mapping; - int error = 0; - - if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) - return 0; - - if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) { - if (flags & SYNC_TRYLOCK) - return 0; - xfs_ilock(ip, XFS_IOLOCK_SHARED); - } - - error = xfs_flush_pages(ip, 0, -1, (flags & SYNC_WAIT) ? - 0 : XBF_ASYNC, FI_NONE); - xfs_iunlock(ip, XFS_IOLOCK_SHARED); - return error; -} - -/* - * Write out pagecache data for the whole filesystem. - */ -STATIC int -xfs_sync_data( - struct xfs_mount *mp, - int flags) -{ - int error; - - ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0); - - error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags); - if (error) - return XFS_ERROR(error); - - xfs_log_force(mp, (flags & SYNC_WAIT) ? XFS_LOG_SYNC : 0); - return 0; -} - STATIC int xfs_sync_fsdata( struct xfs_mount *mp) @@ -415,39 +370,6 @@ xfs_reclaim_worker( xfs_syncd_queue_reclaim(mp); } -/* - * Flush delayed allocate data, attempting to free up reserved space - * from existing allocations. At this point a new allocation attempt - * has failed with ENOSPC and we are in the process of scratching our - * heads, looking about for more room. - * - * Queue a new data flush if there isn't one already in progress and - * wait for completion of the flush. This means that we only ever have one - * inode flush in progress no matter how many ENOSPC events are occurring and - * so will prevent the system from bogging down due to every concurrent - * ENOSPC event scanning all the active inodes in the system for writeback. - */ -void -xfs_flush_inodes( - struct xfs_inode *ip) -{ - struct xfs_mount *mp = ip->i_mount; - - queue_work(xfs_syncd_wq, &mp->m_flush_work); - flush_work(&mp->m_flush_work); -} - -void -xfs_flush_worker( - struct work_struct *work) -{ - struct xfs_mount *mp = container_of(work, - struct xfs_mount, m_flush_work); - - xfs_sync_data(mp, SYNC_TRYLOCK); - xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_WAIT); -} - void __xfs_inode_set_reclaim_tag( struct xfs_perag *pag, diff --git a/fs/xfs/xfs_sync.h b/fs/xfs/xfs_sync.h index 8d58fab72a1..0018e846f0d 100644 --- a/fs/xfs/xfs_sync.h +++ b/fs/xfs/xfs_sync.h @@ -26,14 +26,11 @@ struct xfs_perag; extern struct workqueue_struct *xfs_syncd_wq; /* sync workqueue */ -void xfs_flush_worker(struct work_struct *work); void xfs_reclaim_worker(struct work_struct *work); int xfs_quiesce_data(struct xfs_mount *mp); void xfs_quiesce_attr(struct xfs_mount *mp); -void xfs_flush_inodes(struct xfs_inode *ip); - int xfs_reclaim_inodes(struct xfs_mount *mp, int mode); int xfs_reclaim_inodes_count(struct xfs_mount *mp); void xfs_reclaim_inodes_nr(struct xfs_mount *mp, int nr_to_scan); diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 2a5c637344b..14928564f10 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -777,7 +777,7 @@ xfs_create( XFS_TRANS_PERM_LOG_RES, log_count); if (error == ENOSPC) { /* flush outstanding delalloc blocks and retry */ - xfs_flush_inodes(dp); + xfs_flush_inodes(mp); error = xfs_trans_reserve(tp, resblks, log_res, 0, XFS_TRANS_PERM_LOG_RES, log_count); }