From 5407e24229408d7586ee451a384fc13e4a2332be Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Fri, 18 May 2012 09:28:23 -0400 Subject: GFS2: Fold quota data into the reservations struct This patch moves the ancillary quota data structures into the block reservations structure. This saves GFS2 some time and effort in allocating and deallocating the qadata structure. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/bmap.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) (limited to 'fs/gfs2/bmap.c') diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index dab54099dd9..6d957a86482 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -1045,12 +1045,13 @@ static int trunc_dealloc(struct gfs2_inode *ip, u64 size) lblock = (size - 1) >> sdp->sd_sb.sb_bsize_shift; find_metapath(sdp, lblock, &mp, ip->i_height); - if (!gfs2_qadata_get(ip)) - return -ENOMEM; + error = gfs2_rindex_update(sdp); + if (error) + return error; error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); if (error) - goto out; + return error; while (height--) { struct strip_mine sm; @@ -1064,8 +1065,6 @@ static int trunc_dealloc(struct gfs2_inode *ip, u64 size) gfs2_quota_unhold(ip); -out: - gfs2_qadata_put(ip); return error; } @@ -1167,19 +1166,14 @@ static int do_grow(struct inode *inode, u64 size) struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); struct buffer_head *dibh; - struct gfs2_qadata *qa = NULL; int error; int unstuff = 0; if (gfs2_is_stuffed(ip) && (size > (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)))) { - qa = gfs2_qadata_get(ip); - if (qa == NULL) - return -ENOMEM; - error = gfs2_quota_lock_check(ip); if (error) - goto do_grow_alloc_put; + return error; error = gfs2_inplace_reserve(ip, 1); if (error) @@ -1214,8 +1208,6 @@ do_grow_release: gfs2_inplace_release(ip); do_grow_qunlock: gfs2_quota_unlock(ip); -do_grow_alloc_put: - gfs2_qadata_put(ip); } return error; } -- cgit v1.2.3 From 8e2e00473598dd5379d8408cb974dade000acafc Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Thu, 19 Jul 2012 08:12:40 -0400 Subject: GFS2: Reduce file fragmentation This patch reduces GFS2 file fragmentation by pre-reserving blocks. The resulting improved on disk layout greatly speeds up operations in cases which would have resulted in interlaced allocation of blocks previously. A typical example of this is 10 parallel dd processes, each writing to a file in a common dirctory. The implementation uses an rbtree of reservations attached to each resource group (and each inode). Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/bmap.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/gfs2/bmap.c') diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 6d957a86482..49cd7dd4a9f 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -785,6 +785,9 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, if (error) goto out_rlist; + if (gfs2_rs_active(ip->i_res)) /* needs to be done with the rgrp glock held */ + gfs2_rs_deltree(ip->i_res); + error = gfs2_trans_begin(sdp, rg_blocks + RES_DINODE + RES_INDIRECT + RES_STATFS + RES_QUOTA, revokes); -- cgit v1.2.3 From 4a993fb1503d11496974bd86c0b7123f63d9c8a2 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 31 Jul 2012 15:21:20 +0100 Subject: GFS2: Add structure to contain rgrp, bitmap, offset tuple This patch introduces a new structure, gfs2_rbm, which is a tuple of a resource group, a bitmap within the resource group and an offset within that bitmap. This is designed to make manipulating these sets of variables easier. There is also a new helper function which converts this representation back to a disk block address. In addition, the rbtree nodes which are used for the reservations were not being correctly initialised, which is now fixed. Also, the tracing was not passing through the inode where it should have been. That is mostly fixed aside from one corner case. This needs to be revisited since there can also be a NULL rgrp in some cases which results in the device being incorrect in the trace. This is intended to be the first step towards cleaning up some of the allocation code, and some further bug fixes. Signed-off-by: Steven Whitehouse --- fs/gfs2/bmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/gfs2/bmap.c') diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 49cd7dd4a9f..1fd3ae237bd 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -786,7 +786,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, goto out_rlist; if (gfs2_rs_active(ip->i_res)) /* needs to be done with the rgrp glock held */ - gfs2_rs_deltree(ip->i_res); + gfs2_rs_deltree(ip, ip->i_res); error = gfs2_trans_begin(sdp, rg_blocks + RES_DINODE + RES_INDIRECT + RES_STATFS + RES_QUOTA, -- cgit v1.2.3 From 9dbe9610b9df4efe0946299804ed46bb8f91dec2 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Wed, 31 Oct 2012 10:37:10 +0000 Subject: GFS2: Add Orlov allocator Just like ext3, this works on the root directory and any directory with the +T flag set. Also, just like ext3, any subdirectory created in one of the just mentioned cases will be allocated to a random resource group (GFS2 equivalent of a block group). If you are creating a set of directories, each of which will contain a job running on a different node, then by setting +T on the parent directory before creating the subdirectories, each will land up in a different resource group, and thus resource group contention between nodes will be kept to a minimum. Signed-off-by: Steven Whitehouse --- fs/gfs2/bmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/gfs2/bmap.c') diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 1fd3ae237bd..de70e52caf3 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -1178,7 +1178,7 @@ static int do_grow(struct inode *inode, u64 size) if (error) return error; - error = gfs2_inplace_reserve(ip, 1); + error = gfs2_inplace_reserve(ip, 1, 0); if (error) goto do_grow_qunlock; unstuff = 1; -- cgit v1.2.3 From fa731fc4e045a801814547188a63c2cd49a4cfe6 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 13 Nov 2012 09:50:28 +0000 Subject: GFS2: Fix truncation of journaled data files This patch fixes an issue relating to not having enough revokes available when truncating journaled data files. In order to ensure that we do no run out, the truncation is broken into separate pieces if it is large enough. Tested using fsx on a journaled data file. Signed-off-by: Steven Whitehouse --- fs/gfs2/bmap.c | 52 +++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 49 insertions(+), 3 deletions(-) (limited to 'fs/gfs2/bmap.c') diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index de70e52caf3..a68e91bcef3 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -991,6 +991,41 @@ unlock: return err; } +/** + * gfs2_journaled_truncate - Wrapper for truncate_pagecache for jdata files + * @inode: The inode being truncated + * @oldsize: The original (larger) size + * @newsize: The new smaller size + * + * With jdata files, we have to journal a revoke for each block which is + * truncated. As a result, we need to split this into separate transactions + * if the number of pages being truncated gets too large. + */ + +#define GFS2_JTRUNC_REVOKES 8192 + +static int gfs2_journaled_truncate(struct inode *inode, u64 oldsize, u64 newsize) +{ + struct gfs2_sbd *sdp = GFS2_SB(inode); + u64 max_chunk = GFS2_JTRUNC_REVOKES * sdp->sd_vfs->s_blocksize; + u64 chunk; + int error; + + while (oldsize != newsize) { + chunk = oldsize - newsize; + if (chunk > max_chunk) + chunk = max_chunk; + truncate_pagecache(inode, oldsize, oldsize - chunk); + oldsize -= chunk; + gfs2_trans_end(sdp); + error = gfs2_trans_begin(sdp, RES_DINODE, GFS2_JTRUNC_REVOKES); + if (error) + return error; + } + + return 0; +} + static int trunc_start(struct inode *inode, u64 oldsize, u64 newsize) { struct gfs2_inode *ip = GFS2_I(inode); @@ -1000,8 +1035,10 @@ static int trunc_start(struct inode *inode, u64 oldsize, u64 newsize) int journaled = gfs2_is_jdata(ip); int error; - error = gfs2_trans_begin(sdp, - RES_DINODE + (journaled ? RES_JDATA : 0), 0); + if (journaled) + error = gfs2_trans_begin(sdp, RES_DINODE + RES_JDATA, GFS2_JTRUNC_REVOKES); + else + error = gfs2_trans_begin(sdp, RES_DINODE, 0); if (error) return error; @@ -1026,7 +1063,16 @@ static int trunc_start(struct inode *inode, u64 oldsize, u64 newsize) ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; gfs2_dinode_out(ip, dibh->b_data); - truncate_pagecache(inode, oldsize, newsize); + if (journaled) + error = gfs2_journaled_truncate(inode, oldsize, newsize); + else + truncate_pagecache(inode, oldsize, newsize); + + if (error) { + brelse(dibh); + return error; + } + out_brelse: brelse(dibh); out: -- cgit v1.2.3