From a76cccbeef9e91b5f799e8853acac1ed1fc833cb Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Tue, 31 Jul 2012 14:55:51 +1000
Subject: xfs: fix uninitialised variable in xfs_rtbuf_get()

Results in this assert failure in generic/090:

XFS: Assertion failed: *nmap >= 1, file: fs/xfs/xfs_bmap.c, line: 4363
.....
Call Trace:
 [<ffffffff814680db>] xfs_bmapi_read+0x6b/0x370
 [<ffffffff814b64b2>] xfs_rtbuf_get+0x42/0x130
 [<ffffffff814b6f09>] xfs_rtget_summary+0x89/0x120
 [<ffffffff814b7bfe>] xfs_rtallocate_extent_size+0xce/0x340
 [<ffffffff814b89f0>] xfs_rtallocate_extent+0x240/0x290
 [<ffffffff81462c1a>] xfs_bmap_rtalloc+0x1ba/0x340
 [<ffffffff81463a65>] xfs_bmap_alloc+0x35/0x40
 [<ffffffff8146f111>] xfs_bmapi_allocate+0xf1/0x350
 [<ffffffff8146f9de>] xfs_bmapi_write+0x66e/0xa60
 [<ffffffff8144538a>] xfs_iomap_write_direct+0x22a/0x3f0
 [<ffffffff8143707b>] __xfs_get_blocks+0x38b/0x5d0
 [<ffffffff814372d4>] xfs_get_blocks_direct+0x14/0x20
 [<ffffffff811b0081>] do_blockdev_direct_IO+0xf71/0x1eb0
 [<ffffffff811b1015>] __blockdev_direct_IO+0x55/0x60
 [<ffffffff814355ca>] xfs_vm_direct_IO+0x11a/0x1e0
 [<ffffffff8112d617>] generic_file_direct_write+0xd7/0x1b0
 [<ffffffff8143e16c>] xfs_file_dio_aio_write+0x13c/0x320
 [<ffffffff8143e6f2>] xfs_file_aio_write+0x1c2/0x1d0
 [<ffffffff81174a07>] do_sync_write+0xa7/0xe0
 [<ffffffff81175288>] vfs_write+0xa8/0x160
 [<ffffffff81175702>] sys_pwrite64+0x92/0xb0
 [<ffffffff81b68f69>] system_call_fastpath+0x16/0x1b

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Ben Myers <bpm@sgi.com>
---
 fs/xfs/xfs_rtalloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs/xfs')
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 92d4331cd4f..ca28a4ba4b5 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -857,7 +857,7 @@ xfs_rtbuf_get(
 	xfs_buf_t	*bp;		/* block buffer, result */
 	xfs_inode_t	*ip;		/* bitmap or summary inode */
 	xfs_bmbt_irec_t	map;
-	int		nmap;
+	int		nmap = 1;
 	int		error;		/* error value */
 
 	ip = issum ? mp->m_rsumip : mp->m_rbmip;
-- 
cgit v1.2.3


From 1ed845df60f3f02d4b7cd9fcad79ccb69c289f5c Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@inktank.com>
Date: Wed, 1 Aug 2012 09:56:49 -0500
Subject: xfs: kill struct declarations in xfs_mount.h

I noticed that "struct xfs_mount_args" was still declared in
"fs/xfs/xfs_mount.h".  That struct doesn't even exist any more (and
is obviously not referenced elsewhere in that header file).  While
in there, delete four other unneeded struct declarations in that
file.

Doing so highlights that "fs/xfs/xfs_trace.h" was relying indirectly
on "xfs_mount.h" to be #included in order to declare "struct
xfs_bmbt_irec", so add that declaration to resolve that issue.

Signed-off-by: Alex Elder <elder@inktank.com>
Signed-off-by: Ben Myers <bpm@sgi.com>
---
 fs/xfs/xfs_mount.h | 5 -----
 fs/xfs/xfs_trace.h | 1 +
 2 files changed, 1 insertion(+), 5 deletions(-)

(limited to 'fs/xfs')

diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 05a05a7b611..deee09e534d 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -54,12 +54,7 @@ typedef struct xfs_trans_reservations {
 #include "xfs_sync.h"
 
 struct xlog;
-struct xfs_mount_args;
 struct xfs_inode;
-struct xfs_bmbt_irec;
-struct xfs_bmap_free;
-struct xfs_extdelta;
-struct xfs_swapext;
 struct xfs_mru_cache;
 struct xfs_nameops;
 struct xfs_ail;
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index e5795dd6013..7d36ccf57f9 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -37,6 +37,7 @@ struct xlog_recover;
 struct xlog_recover_item;
 struct xfs_buf_log_format;
 struct xfs_inode_log_format;
+struct xfs_bmbt_irec;
 
 DECLARE_EVENT_CLASS(xfs_attr_list_class,
 	TP_PROTO(struct xfs_attr_list_context *ctx),
-- 
cgit v1.2.3


From c4982110ae93d7575503feb81d15e93c0c5f393c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Tue, 7 Aug 2012 02:02:02 -0400
Subject: xfs: unlock the AGI buffer when looping in xfs_dialloc

Also update some commens in the area to make the code easier to read.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Mark Tinguely <tinguely@sgi.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Ben Myers <bpm@sgi.com>
---
 fs/xfs/xfs_ialloc.c | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

(limited to 'fs/xfs')

diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 21e37b55f7e..5aceb3f8ecd 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -962,23 +962,22 @@ xfs_dialloc(
 		if (!pag->pagi_freecount && !okalloc)
 			goto nextag;
 
+		/*
+		 * Then read in the AGI buffer and recheck with the AGI buffer
+		 * lock held.
+		 */
 		error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
 		if (error)
 			goto out_error;
 
-		/*
-		 * Once the AGI has been read in we have to recheck
-		 * pagi_freecount with the AGI buffer lock held.
-		 */
 		if (pag->pagi_freecount) {
 			xfs_perag_put(pag);
 			goto out_alloc;
 		}
 
-		if (!okalloc) {
-			xfs_trans_brelse(tp, agbp);
-			goto nextag;
-		}
+		if (!okalloc)
+			goto nextag_relse_buffer;
+
 
 		error = xfs_ialloc_ag_alloc(tp, agbp, &ialloced);
 		if (error) {
@@ -1007,6 +1006,8 @@ xfs_dialloc(
 			return 0;
 		}
 
+nextag_relse_buffer:
+		xfs_trans_brelse(tp, agbp);
 nextag:
 		xfs_perag_put(pag);
 		if (++agno == mp->m_sb.sb_agcount)
-- 
cgit v1.2.3


From 643bfc061c47e9c7661324a09fb0a0bc6601e5d6 Mon Sep 17 00:00:00 2001
From: Tomas Racek <tracek@redhat.com>
Date: Tue, 14 Aug 2012 10:35:04 +0200
Subject: xfs: check for possible overflow in xfs_ioc_trim

If range.start or range.minlen is bigger than filesystem size, return
invalid value error. This fixes possible overflow in BTOBB macro when
passed value was nearly ULLONG_MAX.

Signed-off-by: Tomas Racek <tracek@redhat.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Ben Myers <bpm@sgi.com>
---
 fs/xfs/xfs_discard.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'fs/xfs')

diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c
index f9c3fe304a1..69cf4fcde03 100644
--- a/fs/xfs/xfs_discard.c
+++ b/fs/xfs/xfs_discard.c
@@ -179,12 +179,14 @@ xfs_ioc_trim(
 	 * used by the fstrim application.  In the end it really doesn't
 	 * matter as trimming blocks is an advisory interface.
 	 */
+	if (range.start >= XFS_FSB_TO_B(mp, mp->m_sb.sb_dblocks) ||
+	    range.minlen > XFS_FSB_TO_B(mp, XFS_ALLOC_AG_MAX_USABLE(mp)))
+		return -XFS_ERROR(EINVAL);
+
 	start = BTOBB(range.start);
 	end = start + BTOBBT(range.len) - 1;
 	minlen = BTOBB(max_t(u64, granularity, range.minlen));
 
-	if (XFS_BB_TO_FSB(mp, start) >= mp->m_sb.sb_dblocks)
-		return -XFS_ERROR(EINVAL);
 	if (end > XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) - 1)
 		end = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)- 1;
 
-- 
cgit v1.2.3


From e599b3253c5e49f7a2a579eef2fc2aa066989ef5 Mon Sep 17 00:00:00 2001
From: Carlos Maiolino <cmaiolino@redhat.com>
Date: Fri, 10 Aug 2012 15:01:51 -0300
Subject: xfs: fix race while discarding buffers [V4]

While xfs_buftarg_shrink() is freeing buffers from the dispose list (filled with
buffers from lru list), there is a possibility to have xfs_buf_stale() racing
with it, and removing buffers from dispose list before xfs_buftarg_shrink() does
it.

This happens because xfs_buftarg_shrink() handle the dispose list without
locking and the test condition in xfs_buf_stale() checks for the buffer being in
*any* list:

if (!list_empty(&bp->b_lru))

If the buffer happens to be on dispose list, this causes the buffer counter of
lru list (btp->bt_lru_nr) to be decremented twice (once in xfs_buftarg_shrink()
and another in xfs_buf_stale()) causing a wrong account usage of the lru list.

This may cause xfs_buftarg_shrink() to return a wrong value to the memory
shrinker shrink_slab(), and such account error may also cause an underflowed
value to be returned; since the counter is lower than the current number of
items in the lru list, a decrement may happen when the counter is 0, causing
an underflow on the counter.

The fix uses a new flag field (and a new buffer flag) to serialize buffer
handling during the shrink process. The new flag field has been designed to use
btp->bt_lru_lock/unlock instead of xfs_buf_lock/unlock mechanism.

dchinner, sandeen, aquini and aris also deserve credits for this.

Signed-off-by: Carlos Maiolino <cmaiolino@redhat.com>
Reviewed-by: Ben Myers <bpm@sgi.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Ben Myers <bpm@sgi.com>
---
 fs/xfs/xfs_buf.c |  5 ++++-
 fs/xfs/xfs_buf.h | 41 ++++++++++++++++++++++++-----------------
 2 files changed, 28 insertions(+), 18 deletions(-)

(limited to 'fs/xfs')

diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index d7a9dd735e1..933b7930b86 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -96,6 +96,7 @@ xfs_buf_lru_add(
 		atomic_inc(&bp->b_hold);
 		list_add_tail(&bp->b_lru, &btp->bt_lru);
 		btp->bt_lru_nr++;
+		bp->b_lru_flags &= ~_XBF_LRU_DISPOSE;
 	}
 	spin_unlock(&btp->bt_lru_lock);
 }
@@ -154,7 +155,8 @@ xfs_buf_stale(
 		struct xfs_buftarg *btp = bp->b_target;
 
 		spin_lock(&btp->bt_lru_lock);
-		if (!list_empty(&bp->b_lru)) {
+		if (!list_empty(&bp->b_lru) &&
+		    !(bp->b_lru_flags & _XBF_LRU_DISPOSE)) {
 			list_del_init(&bp->b_lru);
 			btp->bt_lru_nr--;
 			atomic_dec(&bp->b_hold);
@@ -1501,6 +1503,7 @@ xfs_buftarg_shrink(
 		 */
 		list_move(&bp->b_lru, &dispose);
 		btp->bt_lru_nr--;
+		bp->b_lru_flags |= _XBF_LRU_DISPOSE;
 	}
 	spin_unlock(&btp->bt_lru_lock);
 
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index d03b73b9604..7c0b6a0a155 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -38,27 +38,28 @@ typedef enum {
 	XBRW_ZERO = 3,			/* Zero target memory */
 } xfs_buf_rw_t;
 
-#define XBF_READ	(1 << 0) /* buffer intended for reading from device */
-#define XBF_WRITE	(1 << 1) /* buffer intended for writing to device */
-#define XBF_READ_AHEAD	(1 << 2) /* asynchronous read-ahead */
-#define XBF_ASYNC	(1 << 4) /* initiator will not wait for completion */
-#define XBF_DONE	(1 << 5) /* all pages in the buffer uptodate */
-#define XBF_STALE	(1 << 6) /* buffer has been staled, do not find it */
+#define XBF_READ	 (1 << 0) /* buffer intended for reading from device */
+#define XBF_WRITE	 (1 << 1) /* buffer intended for writing to device */
+#define XBF_READ_AHEAD	 (1 << 2) /* asynchronous read-ahead */
+#define XBF_ASYNC	 (1 << 4) /* initiator will not wait for completion */
+#define XBF_DONE	 (1 << 5) /* all pages in the buffer uptodate */
+#define XBF_STALE	 (1 << 6) /* buffer has been staled, do not find it */
 
 /* I/O hints for the BIO layer */
-#define XBF_SYNCIO	(1 << 10)/* treat this buffer as synchronous I/O */
-#define XBF_FUA		(1 << 11)/* force cache write through mode */
-#define XBF_FLUSH	(1 << 12)/* flush the disk cache before a write */
+#define XBF_SYNCIO	 (1 << 10)/* treat this buffer as synchronous I/O */
+#define XBF_FUA		 (1 << 11)/* force cache write through mode */
+#define XBF_FLUSH	 (1 << 12)/* flush the disk cache before a write */
 
 /* flags used only as arguments to access routines */
-#define XBF_TRYLOCK	(1 << 16)/* lock requested, but do not wait */
-#define XBF_UNMAPPED	(1 << 17)/* do not map the buffer */
+#define XBF_TRYLOCK	 (1 << 16)/* lock requested, but do not wait */
+#define XBF_UNMAPPED	 (1 << 17)/* do not map the buffer */
 
 /* flags used only internally */
-#define _XBF_PAGES	(1 << 20)/* backed by refcounted pages */
-#define _XBF_KMEM	(1 << 21)/* backed by heap memory */
-#define _XBF_DELWRI_Q	(1 << 22)/* buffer on a delwri queue */
-#define _XBF_COMPOUND	(1 << 23)/* compound buffer */
+#define _XBF_PAGES	 (1 << 20)/* backed by refcounted pages */
+#define _XBF_KMEM	 (1 << 21)/* backed by heap memory */
+#define _XBF_DELWRI_Q	 (1 << 22)/* buffer on a delwri queue */
+#define _XBF_COMPOUND	 (1 << 23)/* compound buffer */
+#define _XBF_LRU_DISPOSE (1 << 24)/* buffer being discarded */
 
 typedef unsigned int xfs_buf_flags_t;
 
@@ -72,12 +73,13 @@ typedef unsigned int xfs_buf_flags_t;
 	{ XBF_SYNCIO,		"SYNCIO" }, \
 	{ XBF_FUA,		"FUA" }, \
 	{ XBF_FLUSH,		"FLUSH" }, \
-	{ XBF_TRYLOCK,		"TRYLOCK" }, 	/* should never be set */\
+	{ XBF_TRYLOCK,		"TRYLOCK" },	/* should never be set */\
 	{ XBF_UNMAPPED,		"UNMAPPED" },	/* ditto */\
 	{ _XBF_PAGES,		"PAGES" }, \
 	{ _XBF_KMEM,		"KMEM" }, \
 	{ _XBF_DELWRI_Q,	"DELWRI_Q" }, \
-	{ _XBF_COMPOUND,	"COMPOUND" }
+	{ _XBF_COMPOUND,	"COMPOUND" }, \
+	{ _XBF_LRU_DISPOSE,	"LRU_DISPOSE" }
 
 typedef struct xfs_buftarg {
 	dev_t			bt_dev;
@@ -124,7 +126,12 @@ typedef struct xfs_buf {
 	xfs_buf_flags_t		b_flags;	/* status flags */
 	struct semaphore	b_sema;		/* semaphore for lockables */
 
+	/*
+	 * concurrent access to b_lru and b_lru_flags are protected by
+	 * bt_lru_lock and not by b_sema
+	 */
 	struct list_head	b_lru;		/* lru list */
+	xfs_buf_flags_t		b_lru_flags;	/* internal lru status flags */
 	wait_queue_head_t	b_waiters;	/* unpin waiters */
 	struct list_head	b_list;
 	struct xfs_perag	*b_pag;		/* contains rbtree root */
-- 
cgit v1.2.3


From 834ab12228fad777a11007a24cb6286b02c9a41c Mon Sep 17 00:00:00 2001
From: Jeff Liu <jeff.liu@oracle.com>
Date: Tue, 21 Aug 2012 17:11:45 +0800
Subject: xfs: Remove type argument from xfs_seek_data()/xfs_seek_hole()

The type is already indicated by the function naming explicitly, so this argument
can be omitted from those calls.

Signed-off-by: Jie Liu <jeff.liu@oracle.com>
Reviewed-by: Mark Tinguely <tinguely@sgi.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Ben Myers <bpm@sgi.com>
---
 fs/xfs/xfs_file.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

(limited to 'fs/xfs')

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 56afcdb2377..92ba18f841f 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -962,8 +962,7 @@ xfs_vm_page_mkwrite(
 STATIC loff_t
 xfs_seek_data(
 	struct file		*file,
-	loff_t			start,
-	u32			type)
+	loff_t			start)
 {
 	struct inode		*inode = file->f_mapping->host;
 	struct xfs_inode	*ip = XFS_I(inode);
@@ -1029,8 +1028,7 @@ out_unlock:
 STATIC loff_t
 xfs_seek_hole(
 	struct file		*file,
-	loff_t			start,
-	u32			type)
+	loff_t			start)
 {
 	struct inode		*inode = file->f_mapping->host;
 	struct xfs_inode	*ip = XFS_I(inode);
@@ -1092,9 +1090,9 @@ xfs_file_llseek(
 	case SEEK_SET:
 		return generic_file_llseek(file, offset, origin);
 	case SEEK_DATA:
-		return xfs_seek_data(file, offset, origin);
+		return xfs_seek_data(file, offset);
 	case SEEK_HOLE:
-		return xfs_seek_hole(file, offset, origin);
+		return xfs_seek_hole(file, offset);
 	default:
 		return -EINVAL;
 	}
-- 
cgit v1.2.3


From d126d43f631f996daeee5006714fed914be32368 Mon Sep 17 00:00:00 2001
From: Jeff Liu <jeff.liu@oracle.com>
Date: Tue, 21 Aug 2012 17:11:57 +0800
Subject: xfs: Introduce a helper routine to probe data or hole offset from
 page cache

Introduce helpers to probe data or hole offset from page cache.

Signed-off-by: Jie Liu <jeff.liu@oracle.com>
Reviewed-by: Mark Tinguely <tinguely@sgi.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Ben Myers <bpm@sgi.com>
---
 fs/xfs/xfs_file.c | 219 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 219 insertions(+)

(limited to 'fs/xfs')

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 92ba18f841f..d78a746b6c7 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -36,6 +36,7 @@
 
 #include <linux/dcache.h>
 #include <linux/falloc.h>
+#include <linux/pagevec.h>
 
 static const struct vm_operations_struct xfs_file_vm_ops;
 
@@ -959,6 +960,224 @@ xfs_vm_page_mkwrite(
 	return block_page_mkwrite(vma, vmf, xfs_get_blocks);
 }
 
+/*
+ * This type is designed to indicate the type of offset we would like
+ * to search from page cache for either xfs_seek_data() or xfs_seek_hole().
+ */
+enum {
+	HOLE_OFF = 0,
+	DATA_OFF,
+};
+
+/*
+ * Lookup the desired type of offset from the given page.
+ *
+ * On success, return true and the offset argument will point to the
+ * start of the region that was found.  Otherwise this function will
+ * return false and keep the offset argument unchanged.
+ */
+STATIC bool
+xfs_lookup_buffer_offset(
+	struct page		*page,
+	loff_t			*offset,
+	unsigned int		type)
+{
+	loff_t			lastoff = page_offset(page);
+	bool			found = false;
+	struct buffer_head	*bh, *head;
+
+	bh = head = page_buffers(page);
+	do {
+		/*
+		 * Unwritten extents that have data in the page
+		 * cache covering them can be identified by the
+		 * BH_Unwritten state flag.  Pages with multiple
+		 * buffers might have a mix of holes, data and
+		 * unwritten extents - any buffer with valid
+		 * data in it should have BH_Uptodate flag set
+		 * on it.
+		 */
+		if (buffer_unwritten(bh) ||
+		    buffer_uptodate(bh)) {
+			if (type == DATA_OFF)
+				found = true;
+		} else {
+			if (type == HOLE_OFF)
+				found = true;
+		}
+
+		if (found) {
+			*offset = lastoff;
+			break;
+		}
+		lastoff += bh->b_size;
+	} while ((bh = bh->b_this_page) != head);
+
+	return found;
+}
+
+/*
+ * This routine is called to find out and return a data or hole offset
+ * from the page cache for unwritten extents according to the desired
+ * type for xfs_seek_data() or xfs_seek_hole().
+ *
+ * The argument offset is used to tell where we start to search from the
+ * page cache.  Map is used to figure out the end points of the range to
+ * lookup pages.
+ *
+ * Return true if the desired type of offset was found, and the argument
+ * offset is filled with that address.  Otherwise, return false and keep
+ * offset unchanged.
+ */
+STATIC bool
+xfs_find_get_desired_pgoff(
+	struct inode		*inode,
+	struct xfs_bmbt_irec	*map,
+	unsigned int		type,
+	loff_t			*offset)
+{
+	struct xfs_inode	*ip = XFS_I(inode);
+	struct xfs_mount	*mp = ip->i_mount;
+	struct pagevec		pvec;
+	pgoff_t			index;
+	pgoff_t			end;
+	loff_t			endoff;
+	loff_t			startoff = *offset;
+	loff_t			lastoff = startoff;
+	bool			found = false;
+
+	pagevec_init(&pvec, 0);
+
+	index = startoff >> PAGE_CACHE_SHIFT;
+	endoff = XFS_FSB_TO_B(mp, map->br_startoff + map->br_blockcount);
+	end = endoff >> PAGE_CACHE_SHIFT;
+	do {
+		int		want;
+		unsigned	nr_pages;
+		unsigned int	i;
+
+		want = min_t(pgoff_t, end - index, PAGEVEC_SIZE);
+		nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index,
+					  want);
+		/*
+		 * No page mapped into given range.  If we are searching holes
+		 * and if this is the first time we got into the loop, it means
+		 * that the given offset is landed in a hole, return it.
+		 *
+		 * If we have already stepped through some block buffers to find
+		 * holes but they all contains data.  In this case, the last
+		 * offset is already updated and pointed to the end of the last
+		 * mapped page, if it does not reach the endpoint to search,
+		 * that means there should be a hole between them.
+		 */
+		if (nr_pages == 0) {
+			/* Data search found nothing */
+			if (type == DATA_OFF)
+				break;
+
+			ASSERT(type == HOLE_OFF);
+			if (lastoff == startoff || lastoff < endoff) {
+				found = true;
+				*offset = lastoff;
+			}
+			break;
+		}
+
+		/*
+		 * At lease we found one page.  If this is the first time we
+		 * step into the loop, and if the first page index offset is
+		 * greater than the given search offset, a hole was found.
+		 */
+		if (type == HOLE_OFF && lastoff == startoff &&
+		    lastoff < page_offset(pvec.pages[0])) {
+			found = true;
+			break;
+		}
+
+		for (i = 0; i < nr_pages; i++) {
+			struct page	*page = pvec.pages[i];
+			loff_t		b_offset;
+
+			/*
+			 * At this point, the page may be truncated or
+			 * invalidated (changing page->mapping to NULL),
+			 * or even swizzled back from swapper_space to tmpfs
+			 * file mapping. However, page->index will not change
+			 * because we have a reference on the page.
+			 *
+			 * Searching done if the page index is out of range.
+			 * If the current offset is not reaches the end of
+			 * the specified search range, there should be a hole
+			 * between them.
+			 */
+			if (page->index > end) {
+				if (type == HOLE_OFF && lastoff < endoff) {
+					*offset = lastoff;
+					found = true;
+				}
+				goto out;
+			}
+
+			lock_page(page);
+			/*
+			 * Page truncated or invalidated(page->mapping == NULL).
+			 * We can freely skip it and proceed to check the next
+			 * page.
+			 */
+			if (unlikely(page->mapping != inode->i_mapping)) {
+				unlock_page(page);
+				continue;
+			}
+
+			if (!page_has_buffers(page)) {
+				unlock_page(page);
+				continue;
+			}
+
+			found = xfs_lookup_buffer_offset(page, &b_offset, type);
+			if (found) {
+				/*
+				 * The found offset may be less than the start
+				 * point to search if this is the first time to
+				 * come here.
+				 */
+				*offset = max_t(loff_t, startoff, b_offset);
+				unlock_page(page);
+				goto out;
+			}
+
+			/*
+			 * We either searching data but nothing was found, or
+			 * searching hole but found a data buffer.  In either
+			 * case, probably the next page contains the desired
+			 * things, update the last offset to it so.
+			 */
+			lastoff = page_offset(page) + PAGE_SIZE;
+			unlock_page(page);
+		}
+
+		/*
+		 * The number of returned pages less than our desired, search
+		 * done.  In this case, nothing was found for searching data,
+		 * but we found a hole behind the last offset.
+		 */
+		if (nr_pages < want) {
+			if (type == HOLE_OFF) {
+				*offset = lastoff;
+				found = true;
+			}
+			break;
+		}
+
+		index = pvec.pages[i - 1]->index + 1;
+		pagevec_release(&pvec);
+	} while (index <= end);
+
+out:
+	pagevec_release(&pvec);
+	return found;
+}
+
 STATIC loff_t
 xfs_seek_data(
 	struct file		*file,
-- 
cgit v1.2.3


From 52f1acc8b56a333fbc7218711c3fa2fb3bf78b92 Mon Sep 17 00:00:00 2001
From: Jeff Liu <jeff.liu@oracle.com>
Date: Tue, 21 Aug 2012 17:12:07 +0800
Subject: xfs: xfs_seek_data() refinement with unwritten extents check up from
 page cache

xfs_seek_data() refinement with unwritten extents check up from page cache.

Signed-off-by: Jie Liu <jeff.liu@oracle.com>
Reviewed-by: Mark Tinguely <tinguely@sgi.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Ben Myers <bpm@sgi.com>
---
 fs/xfs/xfs_file.c | 72 +++++++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 54 insertions(+), 18 deletions(-)

(limited to 'fs/xfs')

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index d78a746b6c7..3f9107431df 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1186,8 +1186,6 @@ xfs_seek_data(
 	struct inode		*inode = file->f_mapping->host;
 	struct xfs_inode	*ip = XFS_I(inode);
 	struct xfs_mount	*mp = ip->i_mount;
-	struct xfs_bmbt_irec	map[2];
-	int			nmap = 2;
 	loff_t			uninitialized_var(offset);
 	xfs_fsize_t		isize;
 	xfs_fileoff_t		fsbno;
@@ -1203,36 +1201,74 @@ xfs_seek_data(
 		goto out_unlock;
 	}
 
-	fsbno = XFS_B_TO_FSBT(mp, start);
-
 	/*
 	 * Try to read extents from the first block indicated
 	 * by fsbno to the end block of the file.
 	 */
+	fsbno = XFS_B_TO_FSBT(mp, start);
 	end = XFS_B_TO_FSB(mp, isize);
+	for (;;) {
+		struct xfs_bmbt_irec	map[2];
+		int			nmap = 2;
+		unsigned int		i;
 
-	error = xfs_bmapi_read(ip, fsbno, end - fsbno, map, &nmap,
-			       XFS_BMAPI_ENTIRE);
-	if (error)
-		goto out_unlock;
+		error = xfs_bmapi_read(ip, fsbno, end - fsbno, map, &nmap,
+				       XFS_BMAPI_ENTIRE);
+		if (error)
+			goto out_unlock;
 
-	/*
-	 * Treat unwritten extent as data extent since it might
-	 * contains dirty data in page cache.
-	 */
-	if (map[0].br_startblock != HOLESTARTBLOCK) {
-		offset = max_t(loff_t, start,
-			       XFS_FSB_TO_B(mp, map[0].br_startoff));
-	} else {
+		/* No extents at given offset, must be beyond EOF */
+		if (nmap == 0) {
+			error = ENXIO;
+			goto out_unlock;
+		}
+
+		for (i = 0; i < nmap; i++) {
+			offset = max_t(loff_t, start,
+				       XFS_FSB_TO_B(mp, map[i].br_startoff));
+
+			/* Landed in a data extent */
+			if (map[i].br_startblock == DELAYSTARTBLOCK ||
+			    (map[i].br_state == XFS_EXT_NORM &&
+			     !isnullstartblock(map[i].br_startblock)))
+				goto out;
+
+			/*
+			 * Landed in an unwritten extent, try to search data
+			 * from page cache.
+			 */
+			if (map[i].br_state == XFS_EXT_UNWRITTEN) {
+				if (xfs_find_get_desired_pgoff(inode, &map[i],
+							DATA_OFF, &offset))
+					goto out;
+			}
+		}
+
+		/*
+		 * map[0] is hole or its an unwritten extent but
+		 * without data in page cache.  Probably means that
+		 * we are reading after EOF if nothing in map[1].
+		 */
 		if (nmap == 1) {
 			error = ENXIO;
 			goto out_unlock;
 		}
 
-		offset = max_t(loff_t, start,
-			       XFS_FSB_TO_B(mp, map[1].br_startoff));
+		ASSERT(i > 1);
+
+		/*
+		 * Nothing was found, proceed to the next round of search
+		 * if reading offset not beyond or hit EOF.
+		 */
+		fsbno = map[i - 1].br_startoff + map[i - 1].br_blockcount;
+		start = XFS_FSB_TO_B(mp, fsbno);
+		if (start >= isize) {
+			error = ENXIO;
+			goto out_unlock;
+		}
 	}
 
+out:
 	if (offset != file->f_pos)
 		file->f_pos = offset;
 
-- 
cgit v1.2.3


From b686d1f79acb65c6a34473c15fcfa2ee54aed8e2 Mon Sep 17 00:00:00 2001
From: Jeff Liu <jeff.liu@oracle.com>
Date: Tue, 21 Aug 2012 17:12:18 +0800
Subject: xfs: xfs_seek_hole() refinement with hole searching from page cache
 for unwritten extents

xfs_seek_hole() refinement with hole searching from page cache for unwritten extent.

Signed-off-by: Jie Liu <jeff.liu@oracle.com>
Reviewed-by: Mark Tinguely <tinguely@sgi.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Ben Myers <bpm@sgi.com>
---
 fs/xfs/xfs_file.c | 78 +++++++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 67 insertions(+), 11 deletions(-)

(limited to 'fs/xfs')

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 3f9107431df..1eaeb8be3aa 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1289,9 +1289,9 @@ xfs_seek_hole(
 	struct xfs_inode	*ip = XFS_I(inode);
 	struct xfs_mount	*mp = ip->i_mount;
 	loff_t			uninitialized_var(offset);
-	loff_t			holeoff;
 	xfs_fsize_t		isize;
 	xfs_fileoff_t		fsbno;
+	xfs_filblks_t		end;
 	uint			lock;
 	int			error;
 
@@ -1307,21 +1307,77 @@ xfs_seek_hole(
 	}
 
 	fsbno = XFS_B_TO_FSBT(mp, start);
-	error = xfs_bmap_first_unused(NULL, ip, 1, &fsbno, XFS_DATA_FORK);
-	if (error)
-		goto out_unlock;
+	end = XFS_B_TO_FSB(mp, isize);
+
+	for (;;) {
+		struct xfs_bmbt_irec	map[2];
+		int			nmap = 2;
+		unsigned int		i;
+
+		error = xfs_bmapi_read(ip, fsbno, end - fsbno, map, &nmap,
+				       XFS_BMAPI_ENTIRE);
+		if (error)
+			goto out_unlock;
+
+		/* No extents at given offset, must be beyond EOF */
+		if (nmap == 0) {
+			error = ENXIO;
+			goto out_unlock;
+		}
+
+		for (i = 0; i < nmap; i++) {
+			offset = max_t(loff_t, start,
+				       XFS_FSB_TO_B(mp, map[i].br_startoff));
+
+			/* Landed in a hole */
+			if (map[i].br_startblock == HOLESTARTBLOCK)
+				goto out;
+
+			/*
+			 * Landed in an unwritten extent, try to search hole
+			 * from page cache.
+			 */
+			if (map[i].br_state == XFS_EXT_UNWRITTEN) {
+				if (xfs_find_get_desired_pgoff(inode, &map[i],
+							HOLE_OFF, &offset))
+					goto out;
+			}
+		}
 
-	holeoff = XFS_FSB_TO_B(mp, fsbno);
-	if (holeoff <= start)
-		offset = start;
-	else {
 		/*
-		 * xfs_bmap_first_unused() could return a value bigger than
-		 * isize if there are no more holes past the supplied offset.
+		 * map[0] contains data or its unwritten but contains
+		 * data in page cache, probably means that we are
+		 * reading after EOF.  We should fix offset to point
+		 * to the end of the file(i.e., there is an implicit
+		 * hole at the end of any file).
 		 */
-		offset = min_t(loff_t, holeoff, isize);
+		if (nmap == 1) {
+			offset = isize;
+			break;
+		}
+
+		ASSERT(i > 1);
+
+		/*
+		 * Both mappings contains data, proceed to the next round of
+		 * search if the current reading offset not beyond or hit EOF.
+		 */
+		fsbno = map[i - 1].br_startoff + map[i - 1].br_blockcount;
+		start = XFS_FSB_TO_B(mp, fsbno);
+		if (start >= isize) {
+			offset = isize;
+			break;
+		}
 	}
 
+out:
+	/*
+	 * At this point, we must have found a hole.  However, the returned
+	 * offset may be bigger than the file size as it may be aligned to
+	 * page boundary for unwritten extents, we need to deal with this
+	 * situation in particular.
+	 */
+	offset = min_t(loff_t, offset, isize);
 	if (offset != file->f_pos)
 		file->f_pos = offset;
 
-- 
cgit v1.2.3


From 4026c9fde9c67266932afd209e25bfef4474a1be Mon Sep 17 00:00:00 2001
From: Ben Myers <bpm@sgi.com>
Date: Thu, 13 Sep 2012 16:18:47 -0500
Subject: xfs: stop the sync worker before xfs_unmountfs

Cancel work of the xfs_sync_worker before teardown of the log in
xfs_unmountfs.  This prevents occasional crashes on unmount like so:

PID: 21602  TASK: ee9df060  CPU: 0   COMMAND: "kworker/0:3"
 #0 [c5377d28] crash_kexec at c0292c94
 #1 [c5377d80] oops_end at c07090c2
 #2 [c5377d98] no_context at c06f614e
 #3 [c5377dbc] __bad_area_nosemaphore at c06f6281
 #4 [c5377df4] bad_area_nosemaphore at c06f629b
 #5 [c5377e00] do_page_fault at c070b0cb
 #6 [c5377e7c] error_code (via page_fault) at c070892c
    EAX: f300c6a8  EBX: f300c6a8  ECX: 000000c0  EDX: 000000c0  EBP: c5377ed0
    DS:  007b      ESI: 00000000  ES:  007b      EDI: 00000001  GS:  ffffad20
    CS:  0060      EIP: c0481ad0  ERR: ffffffff  EFLAGS: 00010246
 #7 [c5377eb0] atomic64_read_cx8 at c0481ad0
 #8 [c5377ebc] xlog_assign_tail_lsn_locked at f7cc7c6e [xfs]
 #9 [c5377ed4] xfs_trans_ail_delete_bulk at f7ccd520 [xfs]
#10 [c5377f0c] xfs_buf_iodone at f7ccb602 [xfs]
#11 [c5377f24] xfs_buf_do_callbacks at f7cca524 [xfs]
#12 [c5377f30] xfs_buf_iodone_callbacks at f7cca5da [xfs]
#13 [c5377f4c] xfs_buf_iodone_work at f7c718d0 [xfs]
#14 [c5377f58] process_one_work at c024ee4c
#15 [c5377f98] worker_thread at c024f43d
#16 [c5377fbc] kthread at c025326b
#17 [c5377fe8] kernel_thread_helper at c070e834

PID: 26653  TASK: e79143b0  CPU: 3   COMMAND: "umount"
 #0 [cde0fda0] __schedule at c0706595
 #1 [cde0fe28] schedule at c0706b89
 #2 [cde0fe30] schedule_timeout at c0705600
 #3 [cde0fe94] __down_common at c0706098
 #4 [cde0fec8] __down at c0706122
 #5 [cde0fed0] down at c025936f
 #6 [cde0fee0] xfs_buf_lock at f7c7131d [xfs]
 #7 [cde0ff00] xfs_freesb at f7cc2236 [xfs]
 #8 [cde0ff10] xfs_fs_put_super at f7c80f21 [xfs]
 #9 [cde0ff1c] generic_shutdown_super at c0333d7a
#10 [cde0ff38] kill_block_super at c0333e0f
#11 [cde0ff48] deactivate_locked_super at c0334218
#12 [cde0ff58] deactivate_super at c033495d
#13 [cde0ff68] mntput_no_expire at c034bc13
#14 [cde0ff7c] sys_umount at c034cc69
#15 [cde0ffa0] sys_oldumount at c034ccd4
#16 [cde0ffb0] system_call at c0707e66

commit 11159a05 added this to xfs_log_unmount and needs to be cleaned up
at a later date.

Signed-off-by: Ben Myers <bpm@sgi.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Mark Tinguely <tinguely@sgi.com>
---
 fs/xfs/xfs_super.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs/xfs')

diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index bdaf4cb9f4a..19e2380fb86 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -919,6 +919,7 @@ xfs_fs_put_super(
 	struct xfs_mount	*mp = XFS_M(sb);
 
 	xfs_filestream_unmount(mp);
+	cancel_delayed_work_sync(&mp->m_sync_work);
 	xfs_unmountfs(mp);
 	xfs_syncd_stop(mp);
 	xfs_freesb(mp);
-- 
cgit v1.2.3


From c3a58fecdd1934a8538ada9073107625f5151687 Mon Sep 17 00:00:00 2001
From: Carlos Maiolino <cmaiolino@redhat.com>
Date: Fri, 17 Aug 2012 18:19:38 -0300
Subject: Make inode64 a remountable option

Actually, there is no reason about why a user must umount and mount a
XFS filesystem to enable 'inode64' option. So, this patch makes this a
remountable option.

Signed-off-by: Carlos Maiolino <cmaiolino@redhat.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Ben Myers <bpm@sgi.com>
---
 fs/xfs/xfs_super.c | 30 +++++++++++++++++++++++++++++-
 1 file changed, 29 insertions(+), 1 deletion(-)

(limited to 'fs/xfs')

diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 19e2380fb86..c416a01fcb1 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -120,12 +120,13 @@ mempool_t *xfs_ioend_pool;
  * in the future, too.
  */
 enum {
-	Opt_barrier, Opt_nobarrier, Opt_err
+	Opt_barrier, Opt_nobarrier, Opt_inode64, Opt_err
 };
 
 static const match_table_t tokens = {
 	{Opt_barrier, "barrier"},
 	{Opt_nobarrier, "nobarrier"},
+	{Opt_inode64, "inode64"},
 	{Opt_err, NULL}
 };
 
@@ -1031,6 +1032,30 @@ xfs_restore_resvblks(struct xfs_mount *mp)
 	xfs_reserve_blocks(mp, &resblks, NULL);
 }
 
+STATIC void
+xfs_set_inode64(struct xfs_mount *mp)
+{
+	int i = 0;
+
+	for (i = 0; i < mp->m_sb.sb_agcount; i++) {
+		struct xfs_perag	*pag;
+
+		pag = xfs_perag_get(mp, i);
+		pag->pagi_inodeok = 1;
+		pag->pagf_metadata = 0;
+		xfs_perag_put(pag);
+	}
+
+	/* There is no need for lock protection on m_flags,
+	 * the rw_semaphore of the VFS superblock is locked
+	 * during mount/umount/remount operations, so this is
+	 * enough to avoid concurency on the m_flags field
+	 */
+	mp->m_flags &= ~(XFS_MOUNT_32BITINODES |
+			 XFS_MOUNT_SMALL_INUMS);
+	mp->m_maxagi = i;
+}
+
 STATIC int
 xfs_fs_remount(
 	struct super_block	*sb,
@@ -1056,6 +1081,9 @@ xfs_fs_remount(
 		case Opt_nobarrier:
 			mp->m_flags &= ~XFS_MOUNT_BARRIER;
 			break;
+		case Opt_inode64:
+			xfs_set_inode64(mp);
+			break;
 		default:
 			/*
 			 * Logically we would return an error here to prevent
-- 
cgit v1.2.3


From 8aea3ff411b2ce8fe7b46644298ed243a920eb24 Mon Sep 17 00:00:00 2001
From: Carlos Maiolino <cmaiolino@redhat.com>
Date: Thu, 20 Sep 2012 10:32:36 -0300
Subject: xfs: Fix m_agirotor reset during AG selection

xfs_ialloc_next_ag() currently resets m_agirotor when it is equal to
m_maxagi:

         if (++mp->m_agirotor == mp->m_maxagi)
	         mp->m_agirotor = 0;

But, if for some reason mp->m_maxagi changes to a lower value than
current m_agirotor, this condition will never be true, causing
m_agirotor to exceed the maximum allowed value (m_maxagi).

This implies mainly during lookups for xfs_perag structs in its radix
tree, since the agno value used for the lookup is based on m_agirotor.
An out-of-range m_agirotor may cause a lookup failure which in case will
return NULL.

As an example, the value of m_maxagi is decreased during
inode64->inode32 remount process, case where I've found this problem.

Signed-off-by: Carlos Maiolino <cmaiolino@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Mark Tinguely <tinguely@sgi.com>
Signed-off-by: Ben Myers <bpm@sgi.com>
---
 fs/xfs/xfs_ialloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs/xfs')

diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 5aceb3f8ecd..445bf1aef31 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -431,7 +431,7 @@ xfs_ialloc_next_ag(
 
 	spin_lock(&mp->m_agirotor_lock);
 	agno = mp->m_agirotor;
-	if (++mp->m_agirotor == mp->m_maxagi)
+	if (++mp->m_agirotor >= mp->m_maxagi)
 		mp->m_agirotor = 0;
 	spin_unlock(&mp->m_agirotor_lock);
 
-- 
cgit v1.2.3


From 08bf540412ed82a15cb9068249ad49b410a7b082 Mon Sep 17 00:00:00 2001
From: Carlos Maiolino <cmaiolino@redhat.com>
Date: Thu, 20 Sep 2012 10:32:37 -0300
Subject: xfs: make inode64 as the default allocation mode

since 64-bit inodes can be accessed while using inode32, and these can
also be used on 32-bit kernels, there is no reason to still keep inode32
as the default mount option.  If the filesystem cannot handle 64bit
inode numbers (i.e CONFIG_LBDAF is not enabled and BITS_PER_LONG == 32),
XFS_MOUNT_SMALL_INUMS will still be set by default, so inode64 is not an
unconditional default value.

Signed-off-by: Carlos Maiolino <cmaiolino@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Mark Tinguely <tinguely@sgi.com>
Signed-off-by: Ben Myers <bpm@sgi.com>
---
 fs/xfs/xfs_super.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'fs/xfs')

diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index c416a01fcb1..996257d36fd 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -88,6 +88,8 @@ mempool_t *xfs_ioend_pool;
 					 * unwritten extent conversion */
 #define MNTOPT_NOBARRIER "nobarrier"	/* .. disable */
 #define MNTOPT_64BITINODE   "inode64"	/* inodes can be allocated anywhere */
+#define MNTOPT_32BITINODE   "inode32"	/* inode allocation limited to
+					 * XFS_MAXINUMBER_32 */
 #define MNTOPT_IKEEP	"ikeep"		/* do not free empty inode clusters */
 #define MNTOPT_NOIKEEP	"noikeep"	/* free empty inode clusters */
 #define MNTOPT_LARGEIO	   "largeio"	/* report large I/O sizes in stat() */
@@ -198,7 +200,9 @@ xfs_parseargs(
 	 */
 	mp->m_flags |= XFS_MOUNT_BARRIER;
 	mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
+#if !XFS_BIG_INUMS
 	mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
+#endif
 
 	/*
 	 * These can be overridden by the mount option parsing.
@@ -295,6 +299,8 @@ xfs_parseargs(
 				return EINVAL;
 			}
 			dswidth = simple_strtoul(value, &eov, 10);
+		} else if (!strcmp(this_char, MNTOPT_32BITINODE)) {
+			mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
 		} else if (!strcmp(this_char, MNTOPT_64BITINODE)) {
 			mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS;
 #if !XFS_BIG_INUMS
@@ -493,6 +499,7 @@ xfs_showargs(
 		{ XFS_MOUNT_FILESTREAMS,	"," MNTOPT_FILESTREAM },
 		{ XFS_MOUNT_GRPID,		"," MNTOPT_GRPID },
 		{ XFS_MOUNT_DISCARD,		"," MNTOPT_DISCARD },
+		{ XFS_MOUNT_SMALL_INUMS,	"," MNTOPT_32BITINODE },
 		{ 0, NULL }
 	};
 	static struct proc_xfs_info xfs_info_unset[] = {
-- 
cgit v1.2.3


From 2d2194f61fddab3a9731b6e7a7ae3a4a19dd810c Mon Sep 17 00:00:00 2001
From: Carlos Maiolino <cmaiolino@redhat.com>
Date: Thu, 20 Sep 2012 10:32:38 -0300
Subject: xfs: reduce code duplication handling inode32/64 options

Add xfs_set_inode32() to be used to enable inode32 allocation mode. this
will reduce the amount of duplicated code needed to mount/remount a
filesystem with inode32 option.  This patch also changes
xfs_set_inode64() to return the maximum AG number that inodes can be
allocated instead of set mp->m_maxagi by itself, so that the behaviour
is the same as xfs_set_inode32().  This simplifies code that calls these
functions and needs to know the maximum AG that inodes can be allocated
in.

Signed-off-by: Carlos Maiolino <cmaiolino@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Mark Tinguely <tinguely@sgi.com>
Signed-off-by: Ben Myers <bpm@sgi.com>
---
 fs/xfs/xfs_mount.c | 43 +++-----------------------
 fs/xfs/xfs_super.c | 89 +++++++++++++++++++++++++++++++++++++++---------------
 fs/xfs/xfs_super.h |  2 ++
 3 files changed, 72 insertions(+), 62 deletions(-)

(limited to 'fs/xfs')

diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 29c2f83d414..b2bd3a0e637 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -440,7 +440,7 @@ xfs_initialize_perag(
 	xfs_agnumber_t	agcount,
 	xfs_agnumber_t	*maxagi)
 {
-	xfs_agnumber_t	index, max_metadata;
+	xfs_agnumber_t	index;
 	xfs_agnumber_t	first_initialised = 0;
 	xfs_perag_t	*pag;
 	xfs_agino_t	agino;
@@ -500,43 +500,10 @@ xfs_initialize_perag(
 	else
 		mp->m_flags &= ~XFS_MOUNT_32BITINODES;
 
-	if (mp->m_flags & XFS_MOUNT_32BITINODES) {
-		/*
-		 * Calculate how much should be reserved for inodes to meet
-		 * the max inode percentage.
-		 */
-		if (mp->m_maxicount) {
-			__uint64_t	icount;
-
-			icount = sbp->sb_dblocks * sbp->sb_imax_pct;
-			do_div(icount, 100);
-			icount += sbp->sb_agblocks - 1;
-			do_div(icount, sbp->sb_agblocks);
-			max_metadata = icount;
-		} else {
-			max_metadata = agcount;
-		}
-
-		for (index = 0; index < agcount; index++) {
-			ino = XFS_AGINO_TO_INO(mp, index, agino);
-			if (ino > XFS_MAXINUMBER_32) {
-				index++;
-				break;
-			}
-
-			pag = xfs_perag_get(mp, index);
-			pag->pagi_inodeok = 1;
-			if (index < max_metadata)
-				pag->pagf_metadata = 1;
-			xfs_perag_put(pag);
-		}
-	} else {
-		for (index = 0; index < agcount; index++) {
-			pag = xfs_perag_get(mp, index);
-			pag->pagi_inodeok = 1;
-			xfs_perag_put(pag);
-		}
-	}
+	if (mp->m_flags & XFS_MOUNT_32BITINODES)
+		index = xfs_set_inode32(mp);
+	else
+		index = xfs_set_inode64(mp);
 
 	if (maxagi)
 		*maxagi = index;
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 996257d36fd..d6619d68553 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -599,6 +599,71 @@ xfs_max_file_offset(
 	return (((__uint64_t)pagefactor) << bitshift) - 1;
 }
 
+xfs_agnumber_t
+xfs_set_inode32(struct xfs_mount *mp)
+{
+	xfs_agnumber_t	index = 0;
+	xfs_sb_t	*sbp = &mp->m_sb;
+	xfs_agnumber_t	max_metadata;
+	xfs_agino_t	agino =	XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks -1, 0);
+	xfs_ino_t	ino = XFS_AGINO_TO_INO(mp, sbp->sb_agcount -1, agino);
+	xfs_perag_t	*pag;
+
+	/* Calculate how much should be reserved for inodes to meet
+	 * the max inode percentage.
+	 */
+	if (mp->m_maxicount) {
+		__uint64_t	icount;
+
+		icount = sbp->sb_dblocks * sbp->sb_imax_pct;
+		do_div(icount, 100);
+		icount += sbp->sb_agblocks - 1;
+		do_div(icount, sbp->sb_agblocks);
+		max_metadata = icount;
+	} else {
+		max_metadata = sbp->sb_agcount;
+	}
+
+	for (index = 0; index < sbp->sb_agcount; index++) {
+		ino = XFS_AGINO_TO_INO(mp, index, agino);
+		if (ino > XFS_MAXINUMBER_32) {
+			index++;
+			break;
+		}
+
+		pag = xfs_perag_get(mp, index);
+		pag->pagi_inodeok = 1;
+		if (index < max_metadata)
+			pag->pagf_metadata = 1;
+		xfs_perag_put(pag);
+	}
+	return index;
+}
+
+xfs_agnumber_t
+xfs_set_inode64(struct xfs_mount *mp)
+{
+	xfs_agnumber_t index = 0;
+
+	for (index = 0; index < mp->m_sb.sb_agcount; index++) {
+		struct xfs_perag	*pag;
+
+		pag = xfs_perag_get(mp, index);
+		pag->pagi_inodeok = 1;
+		pag->pagf_metadata = 0;
+		xfs_perag_put(pag);
+	}
+
+	/* There is no need for lock protection on m_flags,
+	 * the rw_semaphore of the VFS superblock is locked
+	 * during mount/umount/remount operations, so this is
+	 * enough to avoid concurency on the m_flags field
+	 */
+	mp->m_flags &= ~(XFS_MOUNT_32BITINODES |
+			 XFS_MOUNT_SMALL_INUMS);
+	return index;
+}
+
 STATIC int
 xfs_blkdev_get(
 	xfs_mount_t		*mp,
@@ -1039,30 +1104,6 @@ xfs_restore_resvblks(struct xfs_mount *mp)
 	xfs_reserve_blocks(mp, &resblks, NULL);
 }
 
-STATIC void
-xfs_set_inode64(struct xfs_mount *mp)
-{
-	int i = 0;
-
-	for (i = 0; i < mp->m_sb.sb_agcount; i++) {
-		struct xfs_perag	*pag;
-
-		pag = xfs_perag_get(mp, i);
-		pag->pagi_inodeok = 1;
-		pag->pagf_metadata = 0;
-		xfs_perag_put(pag);
-	}
-
-	/* There is no need for lock protection on m_flags,
-	 * the rw_semaphore of the VFS superblock is locked
-	 * during mount/umount/remount operations, so this is
-	 * enough to avoid concurency on the m_flags field
-	 */
-	mp->m_flags &= ~(XFS_MOUNT_32BITINODES |
-			 XFS_MOUNT_SMALL_INUMS);
-	mp->m_maxagi = i;
-}
-
 STATIC int
 xfs_fs_remount(
 	struct super_block	*sb,
diff --git a/fs/xfs/xfs_super.h b/fs/xfs/xfs_super.h
index 09b0c26b224..9de4a920ba0 100644
--- a/fs/xfs/xfs_super.h
+++ b/fs/xfs/xfs_super.h
@@ -75,6 +75,8 @@ struct block_device;
 extern __uint64_t xfs_max_file_offset(unsigned int);
 
 extern void xfs_blkdev_issue_flush(struct xfs_buftarg *);
+extern xfs_agnumber_t xfs_set_inode32(struct xfs_mount *);
+extern xfs_agnumber_t xfs_set_inode64(struct xfs_mount *);
 
 extern const struct export_operations xfs_export_operations;
 extern const struct xattr_handler *xfs_xattr_handlers[];
-- 
cgit v1.2.3


From 4c0837224c677db35cd85b04a77504c496cadb66 Mon Sep 17 00:00:00 2001
From: Carlos Maiolino <cmaiolino@redhat.com>
Date: Thu, 20 Sep 2012 10:32:39 -0300
Subject: xfs: Fix mp->m_maxagi update during inode64 remount

With the changes made on xfs_set_inode64(), to make it behave as
xfs_set_inode32() (now leaving to the caller the responsibility to update
mp->m_maxagi), we use the return value of xfs_set_inode64() to update
mp->m_maxagi during remount.

Signed-off-by: Carlos Maiolino <cmaiolino@redhat.com>
Reviewed-by: Mark Tinguely <tinguely@sgi.com>
Signed-off-by: Ben Myers <bpm@sgi.com>
---
 fs/xfs/xfs_super.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs/xfs')

diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index d6619d68553..aeb03f9a896 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1130,7 +1130,7 @@ xfs_fs_remount(
 			mp->m_flags &= ~XFS_MOUNT_BARRIER;
 			break;
 		case Opt_inode64:
-			xfs_set_inode64(mp);
+			mp->m_maxagi = xfs_set_inode64(mp);
 			break;
 		default:
 			/*
-- 
cgit v1.2.3


From 4056c1d08d2a7c50ae7414db7c1783ba45b4835d Mon Sep 17 00:00:00 2001
From: Carlos Maiolino <cmaiolino@redhat.com>
Date: Thu, 20 Sep 2012 10:32:40 -0300
Subject: xfs: add inode64->inode32 transition into xfs_set_inode32()

To make inode32 a remountable option, xfs_set_inode32() should be able
to make a transition from inode64 option, disabling inode allocation on
higher AGs.

Signed-off-by: Carlos Maiolino <cmaiolino@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Mark Tinguely <tinguely@sgi.com>
Signed-off-by: Ben Myers <bpm@sgi.com>
---
 fs/xfs/xfs_super.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

(limited to 'fs/xfs')

diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index aeb03f9a896..168d4984ce8 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -603,6 +603,7 @@ xfs_agnumber_t
 xfs_set_inode32(struct xfs_mount *mp)
 {
 	xfs_agnumber_t	index = 0;
+	xfs_agnumber_t	maxagi = 0;
 	xfs_sb_t	*sbp = &mp->m_sb;
 	xfs_agnumber_t	max_metadata;
 	xfs_agino_t	agino =	XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks -1, 0);
@@ -626,18 +627,26 @@ xfs_set_inode32(struct xfs_mount *mp)
 
 	for (index = 0; index < sbp->sb_agcount; index++) {
 		ino = XFS_AGINO_TO_INO(mp, index, agino);
+
 		if (ino > XFS_MAXINUMBER_32) {
-			index++;
-			break;
+			pag = xfs_perag_get(mp, index);
+			pag->pagi_inodeok = 0;
+			pag->pagf_metadata = 0;
+			xfs_perag_put(pag);
+			continue;
 		}
 
 		pag = xfs_perag_get(mp, index);
 		pag->pagi_inodeok = 1;
+		maxagi++;
 		if (index < max_metadata)
 			pag->pagf_metadata = 1;
 		xfs_perag_put(pag);
 	}
-	return index;
+	mp->m_flags |= (XFS_MOUNT_32BITINODES |
+			XFS_MOUNT_SMALL_INUMS);
+
+	return maxagi;
 }
 
 xfs_agnumber_t
-- 
cgit v1.2.3


From 2ea0392983a82f7dc3055568ae0f2558724d119b Mon Sep 17 00:00:00 2001
From: Carlos Maiolino <cmaiolino@redhat.com>
Date: Thu, 20 Sep 2012 10:32:41 -0300
Subject: xfs: Make inode32 a remountable option

As inode64 is the default option now, and was also made remountable
previously, inode32 can also be remounted on-the-fly when it is needed.

Signed-off-by: Carlos Maiolino <cmaiolino@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Mark Tinguely <tinguely@sgi.com>
Signed-off-by: Ben Myers <bpm@sgi.com>
---
 fs/xfs/xfs_super.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'fs/xfs')

diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 168d4984ce8..d93f2c7364c 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -122,13 +122,18 @@ mempool_t *xfs_ioend_pool;
  * in the future, too.
  */
 enum {
-	Opt_barrier, Opt_nobarrier, Opt_inode64, Opt_err
+	Opt_barrier,
+	Opt_nobarrier,
+	Opt_inode64,
+	Opt_inode32,
+	Opt_err
 };
 
 static const match_table_t tokens = {
 	{Opt_barrier, "barrier"},
 	{Opt_nobarrier, "nobarrier"},
 	{Opt_inode64, "inode64"},
+	{Opt_inode32, "inode32"},
 	{Opt_err, NULL}
 };
 
@@ -1141,6 +1146,9 @@ xfs_fs_remount(
 		case Opt_inode64:
 			mp->m_maxagi = xfs_set_inode64(mp);
 			break;
+		case Opt_inode32:
+			mp->m_maxagi = xfs_set_inode32(mp);
+			break;
 		default:
 			/*
 			 * Logically we would return an error here to prevent
-- 
cgit v1.2.3