From 0a305e496059a113f93bdd3ad27a5aaa917fe34d Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Wed, 6 Jun 2012 11:17:59 +0100 Subject: GFS2: Extend the life of the reservations This patch lengthens the lifespan of the reservations structure for inodes. Before, they were allocated and deallocated for every write operation. With this patch, they are allocated when the first write occurs, and deallocated when the last process closes the file. It's more efficient to do it this way because it saves GFS2 a lot of unnecessary allocates and frees. It also gives us more flexibility for the future: (1) we can now fold the qadata structure back into the structure and save those alloc/frees, (2) we can use this for multi-block reservations. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 68 ++++++++++++++++++++++++++++++++-------------------------- 1 file changed, 37 insertions(+), 31 deletions(-) (limited to 'fs/gfs2/rgrp.c') diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index f74fb9bd197..e944fefbc9a 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -417,6 +417,39 @@ void gfs2_free_clones(struct gfs2_rgrpd *rgd) } } +/** + * gfs2_rs_alloc - make sure we have a reservation assigned to the inode + * @ip: the inode for this reservation + */ +int gfs2_rs_alloc(struct gfs2_inode *ip) +{ + int error = 0; + + down_write(&ip->i_rw_mutex); + if (!ip->i_res) { + ip->i_res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS); + if (!ip->i_res) + error = -ENOMEM; + } + up_write(&ip->i_rw_mutex); + return error; +} + +/** + * gfs2_rs_delete - delete a reservation + * @ip: The inode for this reservation + * + */ +void gfs2_rs_delete(struct gfs2_inode *ip) +{ + down_write(&ip->i_rw_mutex); + if (ip->i_res) { + kmem_cache_free(gfs2_rsrv_cachep, ip->i_res); + ip->i_res = NULL; + } + up_write(&ip->i_rw_mutex); +} + void gfs2_clear_rgrpd(struct gfs2_sbd *sdp) { struct rb_node *n; @@ -992,22 +1025,6 @@ struct gfs2_qadata *gfs2_qadata_get(struct gfs2_inode *ip) return ip->i_qadata; } -/** - * gfs2_blkrsv_get - get the struct gfs2_blkreserv structure for an inode - * @ip: the incore GFS2 inode structure - * - * Returns: the struct gfs2_qadata - */ - -static int gfs2_blkrsv_get(struct gfs2_inode *ip) -{ - BUG_ON(ip->i_res != NULL); - ip->i_res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS); - if (!ip->i_res) - return -ENOMEM; - return 0; -} - /** * try_rgrp_fit - See if a given reservation will fit in a given RG * @rgd: the RG data @@ -1162,13 +1179,6 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked) return -ENOSPC; } -static void gfs2_blkrsv_put(struct gfs2_inode *ip) -{ - BUG_ON(ip->i_res == NULL); - kmem_cache_free(gfs2_rsrv_cachep, ip->i_res); - ip->i_res = NULL; -} - /** * gfs2_inplace_reserve - Reserve space in the filesystem * @ip: the inode to reserve space for @@ -1181,14 +1191,10 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_blkreserv *rs; - int error; + int error = 0; u64 last_unlinked = NO_BLOCK; int tries = 0; - error = gfs2_blkrsv_get(ip); - if (error) - return error; - rs = ip->i_res; rs->rs_requested = requested; if (gfs2_assert_warn(sdp, requested)) { @@ -1213,7 +1219,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) out: if (error) - gfs2_blkrsv_put(ip); + rs->rs_requested = 0; return error; } @@ -1230,7 +1236,7 @@ void gfs2_inplace_release(struct gfs2_inode *ip) if (rs->rs_rgd_gh.gh_gl) gfs2_glock_dq_uninit(&rs->rs_rgd_gh); - gfs2_blkrsv_put(ip); + rs->rs_requested = 0; } /** @@ -1496,7 +1502,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, /* Only happens if there is a bug in gfs2, return something distinctive * to ensure that it is noticed. */ - if (ip->i_res == NULL) + if (ip->i_res->rs_requested == 0) return -ECANCELED; rgd = ip->i_rgd; -- cgit v1.2.3 From 5407e24229408d7586ee451a384fc13e4a2332be Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Fri, 18 May 2012 09:28:23 -0400 Subject: GFS2: Fold quota data into the reservations struct This patch moves the ancillary quota data structures into the block reservations structure. This saves GFS2 some time and effort in allocating and deallocating the qadata structure. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 19 ------------------- 1 file changed, 19 deletions(-) (limited to 'fs/gfs2/rgrp.c') diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index e944fefbc9a..9eca6a9cff8 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1006,25 +1006,6 @@ out: return ret; } -/** - * gfs2_qadata_get - get the struct gfs2_qadata structure for an inode - * @ip: the incore GFS2 inode structure - * - * Returns: the struct gfs2_qadata - */ - -struct gfs2_qadata *gfs2_qadata_get(struct gfs2_inode *ip) -{ - struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - int error; - BUG_ON(ip->i_qadata != NULL); - ip->i_qadata = kzalloc(sizeof(struct gfs2_qadata), GFP_NOFS); - error = gfs2_rindex_update(sdp); - if (error) - fs_warn(sdp, "rindex update returns %d\n", error); - return ip->i_qadata; -} - /** * try_rgrp_fit - See if a given reservation will fit in a given RG * @rgd: the RG data -- cgit v1.2.3 From 90306c41dc3d8e5f12ecd0193dae99e0e7f6e896 Mon Sep 17 00:00:00 2001 From: Benjamin Marzinski Date: Tue, 29 May 2012 23:01:09 -0500 Subject: GFS2: Use lvbs for storing rgrp information with mount option Instead of reading in the resource groups when gfs2 is checking for free space to allocate from, gfs2 can store the necessary infromation in the resource group's lvb. Also, instead of searching for unlinked inodes in every resource group that's checked for free space, gfs2 can store the number of unlinked but inodes in the lvb, and only check for unlinked inodes if it will find some. The first time a resource group is locked, the lvb must initialized. Since this involves counting the unlinked inodes in the resource group, this takes a little extra time. But after that, if the resource group is locked with GL_SKIP, the buffer head won't be read in unless it's actually needed. Enabling the resource groups lvbs is done via the rgrplvb mount option. If this option isn't set, the lvbs will still be set and updated, but they won't be verfied or used by the filesystem. To safely turn on this option, all of the nodes mounting the filesystem must be running code with this patch, and the filesystem must have been completely unmounted since they were updated. Signed-off-by: Benjamin Marzinski Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 147 +++++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 138 insertions(+), 9 deletions(-) (limited to 'fs/gfs2/rgrp.c') diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 9eca6a9cff8..3c6f7ed16a3 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -660,6 +660,7 @@ static int read_rindex_entry(struct gfs2_inode *ip) goto fail; rgd->rd_gl->gl_object = rgd; + rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lvb; rgd->rd_flags &= ~GFS2_RDF_UPTODATE; if (rgd->rd_data > sdp->sd_max_rg_data) sdp->sd_max_rg_data = rgd->rd_data; @@ -769,9 +770,65 @@ static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf) memset(&str->rg_reserved, 0, sizeof(str->rg_reserved)); } +static int gfs2_rgrp_lvb_valid(struct gfs2_rgrpd *rgd) +{ + struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl; + struct gfs2_rgrp *str = (struct gfs2_rgrp *)rgd->rd_bits[0].bi_bh->b_data; + + if (rgl->rl_flags != str->rg_flags || rgl->rl_free != str->rg_free || + rgl->rl_dinodes != str->rg_dinodes || + rgl->rl_igeneration != str->rg_igeneration) + return 0; + return 1; +} + +static void gfs2_rgrp_ondisk2lvb(struct gfs2_rgrp_lvb *rgl, const void *buf) +{ + const struct gfs2_rgrp *str = buf; + + rgl->rl_magic = cpu_to_be32(GFS2_MAGIC); + rgl->rl_flags = str->rg_flags; + rgl->rl_free = str->rg_free; + rgl->rl_dinodes = str->rg_dinodes; + rgl->rl_igeneration = str->rg_igeneration; + rgl->__pad = 0UL; +} + +static void update_rgrp_lvb_unlinked(struct gfs2_rgrpd *rgd, u32 change) +{ + struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl; + u32 unlinked = be32_to_cpu(rgl->rl_unlinked) + change; + rgl->rl_unlinked = cpu_to_be32(unlinked); +} + +static u32 count_unlinked(struct gfs2_rgrpd *rgd) +{ + struct gfs2_bitmap *bi; + const u32 length = rgd->rd_length; + const u8 *buffer = NULL; + u32 i, goal, count = 0; + + for (i = 0, bi = rgd->rd_bits; i < length; i++, bi++) { + goal = 0; + buffer = bi->bi_bh->b_data + bi->bi_offset; + WARN_ON(!buffer_uptodate(bi->bi_bh)); + while (goal < bi->bi_len * GFS2_NBBY) { + goal = gfs2_bitfit(buffer, bi->bi_len, goal, + GFS2_BLKST_UNLINKED); + if (goal == BFITNOENT) + break; + count++; + goal++; + } + } + + return count; +} + + /** - * gfs2_rgrp_go_lock - Read in a RG's header and bitmaps - * @gh: The glock holder for the resource group + * gfs2_rgrp_bh_get - Read in a RG's header and bitmaps + * @rgd: the struct gfs2_rgrpd describing the RG to read in * * Read in all of a Resource Group's header and bitmap blocks. * Caller must eventually call gfs2_rgrp_relse() to free the bitmaps. @@ -779,9 +836,8 @@ static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf) * Returns: errno */ -int gfs2_rgrp_go_lock(struct gfs2_holder *gh) +int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd) { - struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object; struct gfs2_sbd *sdp = rgd->rd_sbd; struct gfs2_glock *gl = rgd->rd_gl; unsigned int length = rgd->rd_length; @@ -789,6 +845,9 @@ int gfs2_rgrp_go_lock(struct gfs2_holder *gh) unsigned int x, y; int error; + if (rgd->rd_bits[0].bi_bh != NULL) + return 0; + for (x = 0; x < length; x++) { bi = rgd->rd_bits + x; error = gfs2_meta_read(gl, rgd->rd_addr + x, 0, &bi->bi_bh); @@ -815,7 +874,20 @@ int gfs2_rgrp_go_lock(struct gfs2_holder *gh) rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK); rgd->rd_free_clone = rgd->rd_free; } - + if (be32_to_cpu(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) { + rgd->rd_rgl->rl_unlinked = cpu_to_be32(count_unlinked(rgd)); + gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, + rgd->rd_bits[0].bi_bh->b_data); + } + else if (sdp->sd_args.ar_rgrplvb) { + if (!gfs2_rgrp_lvb_valid(rgd)){ + gfs2_consist_rgrpd(rgd); + error = -EIO; + goto fail; + } + if (rgd->rd_rgl->rl_unlinked == 0) + rgd->rd_flags &= ~GFS2_RDF_CHECK; + } return 0; fail: @@ -829,6 +901,39 @@ fail: return error; } +int update_rgrp_lvb(struct gfs2_rgrpd *rgd) +{ + u32 rl_flags; + + if (rgd->rd_flags & GFS2_RDF_UPTODATE) + return 0; + + if (be32_to_cpu(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) + return gfs2_rgrp_bh_get(rgd); + + rl_flags = be32_to_cpu(rgd->rd_rgl->rl_flags); + rl_flags &= ~GFS2_RDF_MASK; + rgd->rd_flags &= GFS2_RDF_MASK; + rgd->rd_flags |= (rl_flags | GFS2_RDF_UPTODATE | GFS2_RDF_CHECK); + if (rgd->rd_rgl->rl_unlinked == 0) + rgd->rd_flags &= ~GFS2_RDF_CHECK; + rgd->rd_free = be32_to_cpu(rgd->rd_rgl->rl_free); + rgd->rd_free_clone = rgd->rd_free; + rgd->rd_dinodes = be32_to_cpu(rgd->rd_rgl->rl_dinodes); + rgd->rd_igeneration = be64_to_cpu(rgd->rd_rgl->rl_igeneration); + return 0; +} + +int gfs2_rgrp_go_lock(struct gfs2_holder *gh) +{ + struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object; + struct gfs2_sbd *sdp = rgd->rd_sbd; + + if (gh->gh_flags & GL_SKIP && sdp->sd_args.ar_rgrplvb) + return 0; + return gfs2_rgrp_bh_get((struct gfs2_rgrpd *)gh->gh_gl->gl_object); +} + /** * gfs2_rgrp_go_unlock - Release RG bitmaps read in with gfs2_rgrp_bh_get() * @gh: The glock holder for the resource group @@ -842,8 +947,10 @@ void gfs2_rgrp_go_unlock(struct gfs2_holder *gh) for (x = 0; x < length; x++) { struct gfs2_bitmap *bi = rgd->rd_bits + x; - brelse(bi->bi_bh); - bi->bi_bh = NULL; + if (bi->bi_bh) { + brelse(bi->bi_bh); + bi->bi_bh = NULL; + } } } @@ -987,6 +1094,7 @@ int gfs2_fitrim(struct file *filp, void __user *argp) rgd->rd_flags |= GFS2_RGF_TRIMMED; gfs2_trans_add_bh(rgd->rd_gl, bh, 1); gfs2_rgrp_out(rgd, bh->b_data); + gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, bh->b_data); gfs2_trans_end(sdp); } } @@ -1116,6 +1224,9 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked) int error, rg_locked, flags = LM_FLAG_TRY; int loops = 0; + if (sdp->sd_args.ar_rgrplvb) + flags |= GL_SKIP; + if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) rgd = begin = ip->i_rgd; else @@ -1133,22 +1244,34 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked) } else { error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, flags, &rs->rs_rgd_gh); + if (!error && sdp->sd_args.ar_rgrplvb) { + error = update_rgrp_lvb(rgd); + if (error) { + gfs2_glock_dq_uninit(&rs->rs_rgd_gh); + return error; + } + } } switch (error) { case 0: if (try_rgrp_fit(rgd, ip)) { + if (sdp->sd_args.ar_rgrplvb) + gfs2_rgrp_bh_get(rgd); ip->i_rgd = rgd; return 0; } - if (rgd->rd_flags & GFS2_RDF_CHECK) + if (rgd->rd_flags & GFS2_RDF_CHECK) { + if (sdp->sd_args.ar_rgrplvb) + gfs2_rgrp_bh_get(rgd); try_rgrp_unlink(rgd, last_unlinked, ip->i_no_addr); + } if (!rg_locked) gfs2_glock_dq_uninit(&rs->rs_rgd_gh); /* fall through */ case GLR_TRYFAILED: rgd = gfs2_rgrpd_get_next(rgd); if (rgd == begin) { - flags = 0; + flags &= ~LM_FLAG_TRY; loops++; } break; @@ -1529,6 +1652,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); + gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0); if (dinode) @@ -1575,6 +1699,7 @@ void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta) rgd->rd_flags &= ~GFS2_RGF_TRIMMED; gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); + gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); /* Directories keep their data in the metadata address space */ if (meta || ip->i_depth) @@ -1611,6 +1736,8 @@ void gfs2_unlink_di(struct inode *inode) trace_gfs2_block_alloc(ip, rgd, blkno, 1, GFS2_BLKST_UNLINKED); gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); + gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); + update_rgrp_lvb_unlinked(rgd, 1); } static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno) @@ -1630,6 +1757,8 @@ static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno) gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); + gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); + update_rgrp_lvb_unlinked(rgd, -1); gfs2_statfs_change(sdp, 0, +1, -1); } -- cgit v1.2.3 From 666d1d8ad201803862514317c17695925e61316b Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Wed, 13 Jun 2012 23:03:56 -0400 Subject: GFS2: Combine functions get_local_rgrp and gfs2_inplace_reserve This function combines rgrp functions get_local_rgrp and gfs2_inplace_reserve so that the double retry loop is gone. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 82 +++++++++++++++++++++------------------------------------- 1 file changed, 29 insertions(+), 53 deletions(-) (limited to 'fs/gfs2/rgrp.c') diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 3c6f7ed16a3..e53d0a1c234 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1207,25 +1207,30 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip } /** - * get_local_rgrp - Choose and lock a rgrp for allocation + * gfs2_inplace_reserve - Reserve space in the filesystem * @ip: the inode to reserve space for - * @last_unlinked: the last unlinked block - * - * Try to acquire rgrp in way which avoids contending with others. + * @requested: the number of blocks to be reserved * * Returns: errno */ -static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked) +int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_rgrpd *rgd, *begin = NULL; struct gfs2_blkreserv *rs = ip->i_res; - int error, rg_locked, flags = LM_FLAG_TRY; + int error = 0, rg_locked, flags = LM_FLAG_TRY; + u64 last_unlinked = NO_BLOCK; int loops = 0; if (sdp->sd_args.ar_rgrplvb) flags |= GL_SKIP; + rs = ip->i_res; + rs->rs_requested = requested; + if (gfs2_assert_warn(sdp, requested)) { + error = -EINVAL; + goto out; + } if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) rgd = begin = ip->i_rgd; @@ -1263,63 +1268,34 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked) if (rgd->rd_flags & GFS2_RDF_CHECK) { if (sdp->sd_args.ar_rgrplvb) gfs2_rgrp_bh_get(rgd); - try_rgrp_unlink(rgd, last_unlinked, ip->i_no_addr); + try_rgrp_unlink(rgd, &last_unlinked, + ip->i_no_addr); } if (!rg_locked) gfs2_glock_dq_uninit(&rs->rs_rgd_gh); /* fall through */ case GLR_TRYFAILED: rgd = gfs2_rgrpd_get_next(rgd); - if (rgd == begin) { - flags &= ~LM_FLAG_TRY; - loops++; - } + if (rgd != begin) /* If we didn't wrap */ + break; + + flags &= ~LM_FLAG_TRY; + loops++; + /* Check that fs hasn't grown if writing to rindex */ + if (ip == GFS2_I(sdp->sd_rindex) && + !sdp->sd_rindex_uptodate) { + error = gfs2_ri_update(ip); + if (error) + goto out; + } else if (loops == 2) + /* Flushing the log may release space */ + gfs2_log_flush(sdp, NULL); break; default: - return error; + goto out; } } - - return -ENOSPC; -} - -/** - * gfs2_inplace_reserve - Reserve space in the filesystem - * @ip: the inode to reserve space for - * @requested: the number of blocks to be reserved - * - * Returns: errno - */ - -int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) -{ - struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - struct gfs2_blkreserv *rs; - int error = 0; - u64 last_unlinked = NO_BLOCK; - int tries = 0; - - rs = ip->i_res; - rs->rs_requested = requested; - if (gfs2_assert_warn(sdp, requested)) { - error = -EINVAL; - goto out; - } - - do { - error = get_local_rgrp(ip, &last_unlinked); - if (error != -ENOSPC) - break; - /* Check that fs hasn't grown if writing to rindex */ - if (ip == GFS2_I(sdp->sd_rindex) && !sdp->sd_rindex_uptodate) { - error = gfs2_ri_update(ip); - if (error) - break; - continue; - } - /* Flushing the log may release space */ - gfs2_log_flush(sdp, NULL); - } while (tries++ < 3); + error = -ENOSPC; out: if (error) -- cgit v1.2.3 From 294f2ad5a545eb71d397623743ddd8201131bdad Mon Sep 17 00:00:00 2001 From: Abhijith Das Date: Wed, 18 Jul 2012 11:56:59 -0400 Subject: GFS2: kernel panic with small gfs2 filesystems - 1 RG In the unlikely setup where there's only one resource group in the gfs2 filesystem, gfs2_rgrpd_get_next() returns a NULL rgd that is not dealt with properly, causing a kernel NULL ptr dereference. This patch fixes this issue. Signed-off-by: Abhi Das Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/gfs2/rgrp.c') diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index e53d0a1c234..fb7079263ea 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1276,6 +1276,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) /* fall through */ case GLR_TRYFAILED: rgd = gfs2_rgrpd_get_next(rgd); + rgd = rgd ? : begin; /* if NULL, wrap */ if (rgd != begin) /* If we didn't wrap */ break; -- cgit v1.2.3 From 8e2e00473598dd5379d8408cb974dade000acafc Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Thu, 19 Jul 2012 08:12:40 -0400 Subject: GFS2: Reduce file fragmentation This patch reduces GFS2 file fragmentation by pre-reserving blocks. The resulting improved on disk layout greatly speeds up operations in cases which would have resulted in interlaced allocation of blocks previously. A typical example of this is 10 parallel dd processes, each writing to a file in a common dirctory. The implementation uses an rbtree of reservations attached to each resource group (and each inode). Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 578 +++++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 523 insertions(+), 55 deletions(-) (limited to 'fs/gfs2/rgrp.c') diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index fb7079263ea..4d34887a601 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -35,6 +35,9 @@ #define BFITNOENT ((u32)~0) #define NO_BLOCK ((u64)~0) +#define RSRV_CONTENTION_FACTOR 4 +#define RGRP_RSRV_MAX_CONTENDERS 2 + #if BITS_PER_LONG == 32 #define LBITMASK (0x55555555UL) #define LBITSKIP55 (0x55555555UL) @@ -177,6 +180,57 @@ static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state) return tmp; } +/** + * rs_cmp - multi-block reservation range compare + * @blk: absolute file system block number of the new reservation + * @len: number of blocks in the new reservation + * @rs: existing reservation to compare against + * + * returns: 1 if the block range is beyond the reach of the reservation + * -1 if the block range is before the start of the reservation + * 0 if the block range overlaps with the reservation + */ +static inline int rs_cmp(u64 blk, u32 len, struct gfs2_blkreserv *rs) +{ + u64 startblk = gfs2_rs_startblk(rs); + + if (blk >= startblk + rs->rs_free) + return 1; + if (blk + len - 1 < startblk) + return -1; + return 0; +} + +/** + * rs_find - Find a rgrp multi-block reservation that contains a given block + * @rgd: The rgrp + * @rgblk: The block we're looking for, relative to the rgrp + */ +static struct gfs2_blkreserv *rs_find(struct gfs2_rgrpd *rgd, u32 rgblk) +{ + struct rb_node **newn; + int rc; + u64 fsblk = rgblk + rgd->rd_data0; + + spin_lock(&rgd->rd_rsspin); + newn = &rgd->rd_rstree.rb_node; + while (*newn) { + struct gfs2_blkreserv *cur = + rb_entry(*newn, struct gfs2_blkreserv, rs_node); + rc = rs_cmp(fsblk, 1, cur); + if (rc < 0) + newn = &((*newn)->rb_left); + else if (rc > 0) + newn = &((*newn)->rb_right); + else { + spin_unlock(&rgd->rd_rsspin); + return cur; + } + } + spin_unlock(&rgd->rd_rsspin); + return NULL; +} + /** * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing * a block in a given allocation state. @@ -424,19 +478,93 @@ void gfs2_free_clones(struct gfs2_rgrpd *rgd) int gfs2_rs_alloc(struct gfs2_inode *ip) { int error = 0; + struct gfs2_blkreserv *res; + + if (ip->i_res) + return 0; + + res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS); + if (!res) + error = -ENOMEM; down_write(&ip->i_rw_mutex); - if (!ip->i_res) { - ip->i_res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS); - if (!ip->i_res) - error = -ENOMEM; - } + if (ip->i_res) + kmem_cache_free(gfs2_rsrv_cachep, res); + else + ip->i_res = res; up_write(&ip->i_rw_mutex); return error; } +static void dump_rs(struct seq_file *seq, struct gfs2_blkreserv *rs) +{ + gfs2_print_dbg(seq, " r: %llu s:%llu b:%u f:%u\n", + rs->rs_rgd->rd_addr, gfs2_rs_startblk(rs), rs->rs_biblk, + rs->rs_free); +} + /** - * gfs2_rs_delete - delete a reservation + * __rs_deltree - remove a multi-block reservation from the rgd tree + * @rs: The reservation to remove + * + */ +static void __rs_deltree(struct gfs2_blkreserv *rs) +{ + struct gfs2_rgrpd *rgd; + + if (!gfs2_rs_active(rs)) + return; + + rgd = rs->rs_rgd; + /* We can't do this: The reason is that when the rgrp is invalidated, + it's in the "middle" of acquiring the glock, but the HOLDER bit + isn't set yet: + BUG_ON(!gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl));*/ + trace_gfs2_rs(NULL, rs, TRACE_RS_TREEDEL); + + if (!RB_EMPTY_ROOT(&rgd->rd_rstree)) + rb_erase(&rs->rs_node, &rgd->rd_rstree); + BUG_ON(!rgd->rd_rs_cnt); + rgd->rd_rs_cnt--; + + if (rs->rs_free) { + /* return reserved blocks to the rgrp and the ip */ + BUG_ON(rs->rs_rgd->rd_reserved < rs->rs_free); + rs->rs_rgd->rd_reserved -= rs->rs_free; + rs->rs_free = 0; + clear_bit(GBF_FULL, &rs->rs_bi->bi_flags); + smp_mb__after_clear_bit(); + } + /* We can't change any of the step 1 or step 2 components of the rs. + E.g. We can't set rs_rgd to NULL because the rgd glock is held and + dequeued through this pointer. + Can't: atomic_set(&rs->rs_sizehint, 0); + Can't: rs->rs_requested = 0; + Can't: rs->rs_rgd = NULL;*/ + rs->rs_bi = NULL; + rs->rs_biblk = 0; +} + +/** + * gfs2_rs_deltree - remove a multi-block reservation from the rgd tree + * @rs: The reservation to remove + * + */ +void gfs2_rs_deltree(struct gfs2_blkreserv *rs) +{ + struct gfs2_rgrpd *rgd; + + if (!gfs2_rs_active(rs)) + return; + + rgd = rs->rs_rgd; + spin_lock(&rgd->rd_rsspin); + __rs_deltree(rs); + spin_unlock(&rgd->rd_rsspin); +} + +/** + * gfs2_rs_delete - delete a multi-block reservation * @ip: The inode for this reservation * */ @@ -444,12 +572,36 @@ void gfs2_rs_delete(struct gfs2_inode *ip) { down_write(&ip->i_rw_mutex); if (ip->i_res) { + gfs2_rs_deltree(ip->i_res); + trace_gfs2_rs(ip, ip->i_res, TRACE_RS_DELETE); + BUG_ON(ip->i_res->rs_free); kmem_cache_free(gfs2_rsrv_cachep, ip->i_res); ip->i_res = NULL; } up_write(&ip->i_rw_mutex); } +/** + * return_all_reservations - return all reserved blocks back to the rgrp. + * @rgd: the rgrp that needs its space back + * + * We previously reserved a bunch of blocks for allocation. Now we need to + * give them back. This leave the reservation structures in tact, but removes + * all of their corresponding "no-fly zones". + */ +static void return_all_reservations(struct gfs2_rgrpd *rgd) +{ + struct rb_node *n; + struct gfs2_blkreserv *rs; + + spin_lock(&rgd->rd_rsspin); + while ((n = rb_first(&rgd->rd_rstree))) { + rs = rb_entry(n, struct gfs2_blkreserv, rs_node); + __rs_deltree(rs); + } + spin_unlock(&rgd->rd_rsspin); +} + void gfs2_clear_rgrpd(struct gfs2_sbd *sdp) { struct rb_node *n; @@ -472,6 +624,7 @@ void gfs2_clear_rgrpd(struct gfs2_sbd *sdp) gfs2_free_clones(rgd); kfree(rgd->rd_bits); + return_all_reservations(rgd); kmem_cache_free(gfs2_rgrpd_cachep, rgd); } } @@ -649,6 +802,7 @@ static int read_rindex_entry(struct gfs2_inode *ip) rgd->rd_data0 = be64_to_cpu(buf.ri_data0); rgd->rd_data = be32_to_cpu(buf.ri_data); rgd->rd_bitbytes = be32_to_cpu(buf.ri_bitbytes); + spin_lock_init(&rgd->rd_rsspin); error = compute_bitstructs(rgd); if (error) @@ -1114,30 +1268,213 @@ out: return ret; } +/** + * rs_insert - insert a new multi-block reservation into the rgrp's rb_tree + * @bi: the bitmap with the blocks + * @ip: the inode structure + * @biblk: the 32-bit block number relative to the start of the bitmap + * @amount: the number of blocks to reserve + * + * Returns: NULL - reservation was already taken, so not inserted + * pointer to the inserted reservation + */ +static struct gfs2_blkreserv *rs_insert(struct gfs2_bitmap *bi, + struct gfs2_inode *ip, u32 biblk, + int amount) +{ + struct rb_node **newn, *parent = NULL; + int rc; + struct gfs2_blkreserv *rs = ip->i_res; + struct gfs2_rgrpd *rgd = rs->rs_rgd; + u64 fsblock = gfs2_bi2rgd_blk(bi, biblk) + rgd->rd_data0; + + spin_lock(&rgd->rd_rsspin); + newn = &rgd->rd_rstree.rb_node; + BUG_ON(!ip->i_res); + BUG_ON(gfs2_rs_active(rs)); + /* Figure out where to put new node */ + /*BUG_ON(!gfs2_glock_is_locked_by_me(rgd->rd_gl));*/ + while (*newn) { + struct gfs2_blkreserv *cur = + rb_entry(*newn, struct gfs2_blkreserv, rs_node); + + parent = *newn; + rc = rs_cmp(fsblock, amount, cur); + if (rc > 0) + newn = &((*newn)->rb_right); + else if (rc < 0) + newn = &((*newn)->rb_left); + else { + spin_unlock(&rgd->rd_rsspin); + return NULL; /* reservation already in use */ + } + } + + /* Do our reservation work */ + rs = ip->i_res; + rs->rs_free = amount; + rs->rs_biblk = biblk; + rs->rs_bi = bi; + rb_link_node(&rs->rs_node, parent, newn); + rb_insert_color(&rs->rs_node, &rgd->rd_rstree); + + /* Do our inode accounting for the reservation */ + /*BUG_ON(!gfs2_glock_is_locked_by_me(ip->i_gl));*/ + + /* Do our rgrp accounting for the reservation */ + rgd->rd_reserved += amount; /* blocks reserved */ + rgd->rd_rs_cnt++; /* number of in-tree reservations */ + spin_unlock(&rgd->rd_rsspin); + trace_gfs2_rs(ip, rs, TRACE_RS_INSERT); + return rs; +} + +/** + * unclaimed_blocks - return number of blocks that aren't spoken for + */ +static u32 unclaimed_blocks(struct gfs2_rgrpd *rgd) +{ + return rgd->rd_free_clone - rgd->rd_reserved; +} + +/** + * rg_mblk_search - find a group of multiple free blocks + * @rgd: the resource group descriptor + * @rs: the block reservation + * @ip: pointer to the inode for which we're reserving blocks + * + * This is very similar to rgblk_search, except we're looking for whole + * 64-bit words that represent a chunk of 32 free blocks. I'm only focusing + * on aligned dwords for speed's sake. + * + * Returns: 0 if successful or BFITNOENT if there isn't enough free space + */ + +static int rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) +{ + struct gfs2_bitmap *bi = rgd->rd_bits; + const u32 length = rgd->rd_length; + u32 blk; + unsigned int buf, x, search_bytes; + u8 *buffer = NULL; + u8 *ptr, *end, *nonzero; + u32 goal, rsv_bytes; + struct gfs2_blkreserv *rs; + u32 best_rs_bytes, unclaimed; + int best_rs_blocks; + + /* Find bitmap block that contains bits for goal block */ + if (rgrp_contains_block(rgd, ip->i_goal)) + goal = ip->i_goal - rgd->rd_data0; + else + goal = rgd->rd_last_alloc; + for (buf = 0; buf < length; buf++) { + bi = rgd->rd_bits + buf; + /* Convert scope of "goal" from rgrp-wide to within + found bit block */ + if (goal < (bi->bi_start + bi->bi_len) * GFS2_NBBY) { + goal -= bi->bi_start * GFS2_NBBY; + goto do_search; + } + } + buf = 0; + goal = 0; + +do_search: + best_rs_blocks = max_t(int, atomic_read(&ip->i_res->rs_sizehint), + (RGRP_RSRV_MINBLKS * rgd->rd_length)); + best_rs_bytes = (best_rs_blocks * + (1 + (RSRV_CONTENTION_FACTOR * rgd->rd_rs_cnt))) / + GFS2_NBBY; /* 1 + is for our not-yet-created reservation */ + best_rs_bytes = ALIGN(best_rs_bytes, sizeof(u64)); + unclaimed = unclaimed_blocks(rgd); + if (best_rs_bytes * GFS2_NBBY > unclaimed) + best_rs_bytes = unclaimed >> GFS2_BIT_SIZE; + + for (x = 0; x <= length; x++) { + bi = rgd->rd_bits + buf; + + if (test_bit(GBF_FULL, &bi->bi_flags)) + goto skip; + + WARN_ON(!buffer_uptodate(bi->bi_bh)); + if (bi->bi_clone) + buffer = bi->bi_clone + bi->bi_offset; + else + buffer = bi->bi_bh->b_data + bi->bi_offset; + + /* We have to keep the reservations aligned on u64 boundaries + otherwise we could get situations where a byte can't be + used because it's after a reservation, but a free bit still + is within the reservation's area. */ + ptr = buffer + ALIGN(goal >> GFS2_BIT_SIZE, sizeof(u64)); + end = (buffer + bi->bi_len); + while (ptr < end) { + rsv_bytes = 0; + if ((ptr + best_rs_bytes) <= end) + search_bytes = best_rs_bytes; + else + search_bytes = end - ptr; + BUG_ON(!search_bytes); + nonzero = memchr_inv(ptr, 0, search_bytes); + /* If the lot is all zeroes, reserve the whole size. If + there's enough zeroes to satisfy the request, use + what we can. If there's not enough, keep looking. */ + if (nonzero == NULL) + rsv_bytes = search_bytes; + else if ((nonzero - ptr) * GFS2_NBBY >= + ip->i_res->rs_requested) + rsv_bytes = (nonzero - ptr); + + if (rsv_bytes) { + blk = ((ptr - buffer) * GFS2_NBBY); + BUG_ON(blk >= bi->bi_len * GFS2_NBBY); + rs = rs_insert(bi, ip, blk, + rsv_bytes * GFS2_NBBY); + if (IS_ERR(rs)) + return PTR_ERR(rs); + if (rs) + return 0; + } + ptr += ALIGN(search_bytes, sizeof(u64)); + } +skip: + /* Try next bitmap block (wrap back to rgrp header + if at end) */ + buf++; + buf %= length; + goal = 0; + } + + return BFITNOENT; +} + /** * try_rgrp_fit - See if a given reservation will fit in a given RG * @rgd: the RG data * @ip: the inode * * If there's room for the requested blocks to be allocated from the RG: + * This will try to get a multi-block reservation first, and if that doesn't + * fit, it will take what it can. * * Returns: 1 on success (it fits), 0 on failure (it doesn't fit) */ -static int try_rgrp_fit(const struct gfs2_rgrpd *rgd, const struct gfs2_inode *ip) +static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) { - const struct gfs2_blkreserv *rs = ip->i_res; + struct gfs2_blkreserv *rs = ip->i_res; if (rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR)) return 0; - if (rgd->rd_free_clone >= rs->rs_requested) + /* Look for a multi-block reservation. */ + if (unclaimed_blocks(rgd) >= RGRP_RSRV_MINBLKS && + rg_mblk_search(rgd, ip) != BFITNOENT) + return 1; + if (unclaimed_blocks(rgd) >= rs->rs_requested) return 1; - return 0; -} -static inline u32 gfs2_bi2rgd_blk(struct gfs2_bitmap *bi, u32 blk) -{ - return (bi->bi_start * GFS2_NBBY) + blk; + return 0; } /** @@ -1217,7 +1554,7 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - struct gfs2_rgrpd *rgd, *begin = NULL; + struct gfs2_rgrpd *begin = NULL; struct gfs2_blkreserv *rs = ip->i_res; int error = 0, rg_locked, flags = LM_FLAG_TRY; u64 last_unlinked = NO_BLOCK; @@ -1225,32 +1562,40 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) if (sdp->sd_args.ar_rgrplvb) flags |= GL_SKIP; - rs = ip->i_res; rs->rs_requested = requested; if (gfs2_assert_warn(sdp, requested)) { error = -EINVAL; goto out; } - - if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) - rgd = begin = ip->i_rgd; - else - rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1); - - if (rgd == NULL) + if (gfs2_rs_active(rs)) { + begin = rs->rs_rgd; + flags = 0; /* Yoda: Do or do not. There is no try */ + } else if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) { + rs->rs_rgd = begin = ip->i_rgd; + } else { + rs->rs_rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1); + } + if (rs->rs_rgd == NULL) return -EBADSLT; while (loops < 3) { rg_locked = 0; - if (gfs2_glock_is_locked_by_me(rgd->rd_gl)) { + if (gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl)) { rg_locked = 1; error = 0; + } else if (!loops && !gfs2_rs_active(rs) && + rs->rs_rgd->rd_rs_cnt > RGRP_RSRV_MAX_CONTENDERS) { + /* If the rgrp already is maxed out for contenders, + we can eliminate it as a "first pass" without even + requesting the rgrp glock. */ + error = GLR_TRYFAILED; } else { - error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, - flags, &rs->rs_rgd_gh); + error = gfs2_glock_nq_init(rs->rs_rgd->rd_gl, + LM_ST_EXCLUSIVE, flags, + &rs->rs_rgd_gh); if (!error && sdp->sd_args.ar_rgrplvb) { - error = update_rgrp_lvb(rgd); + error = update_rgrp_lvb(rs->rs_rgd); if (error) { gfs2_glock_dq_uninit(&rs->rs_rgd_gh); return error; @@ -1259,25 +1604,37 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) } switch (error) { case 0: - if (try_rgrp_fit(rgd, ip)) { + if (gfs2_rs_active(rs)) { + if (unclaimed_blocks(rs->rs_rgd) + + rs->rs_free >= rs->rs_requested) { + ip->i_rgd = rs->rs_rgd; + return 0; + } + /* We have a multi-block reservation, but the + rgrp doesn't have enough free blocks to + satisfy the request. Free the reservation + and look for a suitable rgrp. */ + gfs2_rs_deltree(rs); + } + if (try_rgrp_fit(rs->rs_rgd, ip)) { if (sdp->sd_args.ar_rgrplvb) - gfs2_rgrp_bh_get(rgd); - ip->i_rgd = rgd; + gfs2_rgrp_bh_get(rs->rs_rgd); + ip->i_rgd = rs->rs_rgd; return 0; } - if (rgd->rd_flags & GFS2_RDF_CHECK) { + if (rs->rs_rgd->rd_flags & GFS2_RDF_CHECK) { if (sdp->sd_args.ar_rgrplvb) - gfs2_rgrp_bh_get(rgd); - try_rgrp_unlink(rgd, &last_unlinked, + gfs2_rgrp_bh_get(rs->rs_rgd); + try_rgrp_unlink(rs->rs_rgd, &last_unlinked, ip->i_no_addr); } if (!rg_locked) gfs2_glock_dq_uninit(&rs->rs_rgd_gh); /* fall through */ case GLR_TRYFAILED: - rgd = gfs2_rgrpd_get_next(rgd); - rgd = rgd ? : begin; /* if NULL, wrap */ - if (rgd != begin) /* If we didn't wrap */ + rs->rs_rgd = gfs2_rgrpd_get_next(rs->rs_rgd); + rs->rs_rgd = rs->rs_rgd ? : begin; /* if NULL, wrap */ + if (rs->rs_rgd != begin) /* If we didn't wrap */ break; flags &= ~LM_FLAG_TRY; @@ -1315,6 +1672,12 @@ void gfs2_inplace_release(struct gfs2_inode *ip) { struct gfs2_blkreserv *rs = ip->i_res; + if (!rs) + return; + + if (!rs->rs_free) + gfs2_rs_deltree(rs); + if (rs->rs_rgd_gh.gh_gl) gfs2_glock_dq_uninit(&rs->rs_rgd_gh); rs->rs_requested = 0; @@ -1413,7 +1776,27 @@ do_search: if (state != GFS2_BLKST_UNLINKED && bi->bi_clone) buffer = bi->bi_clone + bi->bi_offset; - biblk = gfs2_bitfit(buffer, bi->bi_len, goal, state); + while (1) { + struct gfs2_blkreserv *rs; + u32 rgblk; + + biblk = gfs2_bitfit(buffer, bi->bi_len, goal, state); + if (biblk == BFITNOENT) + break; + /* Check if this block is reserved() */ + rgblk = gfs2_bi2rgd_blk(bi, biblk); + rs = rs_find(rgd, rgblk); + if (rs == NULL) + break; + + BUG_ON(rs->rs_bi != bi); + biblk = BFITNOENT; + /* This should jump to the first block after the + reservation. */ + goal = rs->rs_biblk + rs->rs_free; + if (goal >= bi->bi_len * GFS2_NBBY) + break; + } if (biblk != BFITNOENT) break; @@ -1449,8 +1832,9 @@ static u64 gfs2_alloc_extent(struct gfs2_rgrpd *rgd, struct gfs2_bitmap *bi, u32 blk, bool dinode, unsigned int *n) { const unsigned int elen = *n; - u32 goal; + u32 goal, rgblk; const u8 *buffer = NULL; + struct gfs2_blkreserv *rs; *n = 0; buffer = bi->bi_bh->b_data + bi->bi_offset; @@ -1463,6 +1847,10 @@ static u64 gfs2_alloc_extent(struct gfs2_rgrpd *rgd, struct gfs2_bitmap *bi, goal++; if (goal >= (bi->bi_len * GFS2_NBBY)) break; + rgblk = gfs2_bi2rgd_blk(bi, goal); + rs = rs_find(rgd, rgblk); + if (rs) /* Oops, we bumped into someone's reservation */ + break; if (gfs2_testbit(rgd, buffer, bi->bi_len, goal) != GFS2_BLKST_FREE) break; @@ -1538,12 +1926,22 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart, int gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl) { - const struct gfs2_rgrpd *rgd = gl->gl_object; + struct gfs2_rgrpd *rgd = gl->gl_object; + struct gfs2_blkreserv *trs; + const struct rb_node *n; + if (rgd == NULL) return 0; - gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u\n", + gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u r:%u\n", (unsigned long long)rgd->rd_addr, rgd->rd_flags, - rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes); + rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes, + rgd->rd_reserved); + spin_lock(&rgd->rd_rsspin); + for (n = rb_first(&rgd->rd_rstree); n; n = rb_next(&trs->rs_node)) { + trs = rb_entry(n, struct gfs2_blkreserv, rs_node); + dump_rs(seq, trs); + } + spin_unlock(&rgd->rd_rsspin); return 0; } @@ -1557,11 +1955,64 @@ static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd) rgd->rd_flags |= GFS2_RDF_ERROR; } +/** + * claim_reserved_blks - Claim previously reserved blocks + * @ip: the inode that's claiming the reservation + * @dinode: 1 if this block is a dinode block, otherwise data block + * @nblocks: desired extent length + * + * Lay claim to previously allocated block reservation blocks. + * Returns: Starting block number of the blocks claimed. + * Sets *nblocks to the actual extent length allocated. + */ +static u64 claim_reserved_blks(struct gfs2_inode *ip, bool dinode, + unsigned int *nblocks) +{ + struct gfs2_blkreserv *rs = ip->i_res; + struct gfs2_rgrpd *rgd = rs->rs_rgd; + struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); + struct gfs2_bitmap *bi; + u64 start_block = gfs2_rs_startblk(rs); + const unsigned int elen = *nblocks; + + /*BUG_ON(!gfs2_glock_is_locked_by_me(ip->i_gl));*/ + gfs2_assert_withdraw(sdp, rgd); + /*BUG_ON(!gfs2_glock_is_locked_by_me(rgd->rd_gl));*/ + bi = rs->rs_bi; + gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); + + for (*nblocks = 0; *nblocks < elen && rs->rs_free; (*nblocks)++) { + /* Make sure the bitmap hasn't changed */ + gfs2_setbit(rgd, bi->bi_clone, bi, rs->rs_biblk, + dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); + rs->rs_biblk++; + rs->rs_free--; + + BUG_ON(!rgd->rd_reserved); + rgd->rd_reserved--; + dinode = false; + trace_gfs2_rs(ip, rs, TRACE_RS_CLAIM); + } + + if (!rs->rs_free) { + struct gfs2_rgrpd *rgd = ip->i_res->rs_rgd; + + gfs2_rs_deltree(rs); + /* -nblocks because we haven't returned to do the math yet. + I'm doing the math backwards to prevent negative numbers, + but think of it as: + if (unclaimed_blocks(rgd) - *nblocks >= RGRP_RSRV_MINBLKS */ + if (unclaimed_blocks(rgd) >= RGRP_RSRV_MINBLKS + *nblocks) + rg_mblk_search(rgd, ip); + } + return start_block; +} + /** * gfs2_alloc_blocks - Allocate one or more blocks of data and/or a dinode * @ip: the inode to allocate the block for * @bn: Used to return the starting block number - * @ndata: requested number of blocks/extent length (value/result) + * @nblocks: requested number of blocks/extent length (value/result) * @dinode: 1 if we're allocating a dinode block, else 0 * @generation: the generation number of the inode * @@ -1586,20 +2037,34 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, if (ip->i_res->rs_requested == 0) return -ECANCELED; - rgd = ip->i_rgd; - - if (!dinode && rgrp_contains_block(rgd, ip->i_goal)) - goal = ip->i_goal - rgd->rd_data0; - else - goal = rgd->rd_last_alloc; - - blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, &bi); + /* Check if we have a multi-block reservation, and if so, claim the + next free block from it. */ + if (gfs2_rs_active(ip->i_res)) { + BUG_ON(!ip->i_res->rs_free); + rgd = ip->i_res->rs_rgd; + block = claim_reserved_blks(ip, dinode, nblocks); + } else { + rgd = ip->i_rgd; - /* Since all blocks are reserved in advance, this shouldn't happen */ - if (blk == BFITNOENT) - goto rgrp_error; + if (!dinode && rgrp_contains_block(rgd, ip->i_goal)) + goal = ip->i_goal - rgd->rd_data0; + else + goal = rgd->rd_last_alloc; + + blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, &bi); + + /* Since all blocks are reserved in advance, this shouldn't + happen */ + if (blk == BFITNOENT) { + printk(KERN_WARNING "BFITNOENT, nblocks=%u\n", + *nblocks); + printk(KERN_WARNING "FULL=%d\n", + test_bit(GBF_FULL, &rgd->rd_bits->bi_flags)); + goto rgrp_error; + } - block = gfs2_alloc_extent(rgd, bi, blk, dinode, nblocks); + block = gfs2_alloc_extent(rgd, bi, blk, dinode, nblocks); + } ndata = *nblocks; if (dinode) ndata--; @@ -1616,8 +2081,10 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, brelse(dibh); } } - if (rgd->rd_free < *nblocks) + if (rgd->rd_free < *nblocks) { + printk(KERN_WARNING "nblocks=%u\n", *nblocks); goto rgrp_error; + } rgd->rd_free -= *nblocks; if (dinode) { @@ -1877,6 +2344,7 @@ void gfs2_rlist_free(struct gfs2_rgrp_list *rlist) for (x = 0; x < rlist->rl_rgrps; x++) gfs2_holder_uninit(&rlist->rl_ghs[x]); kfree(rlist->rl_ghs); + rlist->rl_ghs = NULL; } } -- cgit v1.2.3 From 62e252eeefda62eb8cae9f4286270317ab8d5a42 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Mon, 30 Jul 2012 11:06:08 +0100 Subject: GFS2: Take account of blockages when using reserved blocks The claim_reserved_blks() function was not taking account of the possibility of "blockages" while performing allocation. This can be caused by another node allocating something in the same extent which has been reserved locally. This patch tests for this condition and then skips the remainder of the reservation in this case. This is a relatively rare event, so that it should not affect the general performance improvement which the block reservations provide. The claim_reserved_blks() function also appears not to be able to deal with reservations which cross bitmap boundaries, but that can be dealt with in a future patch since we don't generate boundary crossing reservations currently. Signed-off-by: Steven Whitehouse Reported-by: David Teigland Cc: Bob Peterson --- fs/gfs2/rgrp.c | 66 +++++++++++++++++++++++++--------------------------------- 1 file changed, 28 insertions(+), 38 deletions(-) (limited to 'fs/gfs2/rgrp.c') diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 4d34887a601..c9ed814eeb6 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1961,7 +1961,7 @@ static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd) * @dinode: 1 if this block is a dinode block, otherwise data block * @nblocks: desired extent length * - * Lay claim to previously allocated block reservation blocks. + * Lay claim to previously reserved blocks. * Returns: Starting block number of the blocks claimed. * Sets *nblocks to the actual extent length allocated. */ @@ -1970,19 +1970,17 @@ static u64 claim_reserved_blks(struct gfs2_inode *ip, bool dinode, { struct gfs2_blkreserv *rs = ip->i_res; struct gfs2_rgrpd *rgd = rs->rs_rgd; - struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_bitmap *bi; u64 start_block = gfs2_rs_startblk(rs); const unsigned int elen = *nblocks; - /*BUG_ON(!gfs2_glock_is_locked_by_me(ip->i_gl));*/ - gfs2_assert_withdraw(sdp, rgd); - /*BUG_ON(!gfs2_glock_is_locked_by_me(rgd->rd_gl));*/ bi = rs->rs_bi; gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); for (*nblocks = 0; *nblocks < elen && rs->rs_free; (*nblocks)++) { - /* Make sure the bitmap hasn't changed */ + if (gfs2_testbit(rgd, bi->bi_bh->b_data + bi->bi_offset, + bi->bi_len, rs->rs_biblk) != GFS2_BLKST_FREE) + break; gfs2_setbit(rgd, bi->bi_clone, bi, rs->rs_biblk, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); rs->rs_biblk++; @@ -1991,20 +1989,12 @@ static u64 claim_reserved_blks(struct gfs2_inode *ip, bool dinode, BUG_ON(!rgd->rd_reserved); rgd->rd_reserved--; dinode = false; - trace_gfs2_rs(ip, rs, TRACE_RS_CLAIM); } - if (!rs->rs_free) { - struct gfs2_rgrpd *rgd = ip->i_res->rs_rgd; - + trace_gfs2_rs(ip, rs, TRACE_RS_CLAIM); + if (!rs->rs_free || *nblocks != elen) gfs2_rs_deltree(rs); - /* -nblocks because we haven't returned to do the math yet. - I'm doing the math backwards to prevent negative numbers, - but think of it as: - if (unclaimed_blocks(rgd) - *nblocks >= RGRP_RSRV_MINBLKS */ - if (unclaimed_blocks(rgd) >= RGRP_RSRV_MINBLKS + *nblocks) - rg_mblk_search(rgd, ip); - } + return start_block; } @@ -2037,34 +2027,34 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, if (ip->i_res->rs_requested == 0) return -ECANCELED; - /* Check if we have a multi-block reservation, and if so, claim the - next free block from it. */ + /* If we have a reservation, claim blocks from it. */ if (gfs2_rs_active(ip->i_res)) { BUG_ON(!ip->i_res->rs_free); rgd = ip->i_res->rs_rgd; block = claim_reserved_blks(ip, dinode, nblocks); - } else { - rgd = ip->i_rgd; + if (*nblocks) + goto found_blocks; + } - if (!dinode && rgrp_contains_block(rgd, ip->i_goal)) - goal = ip->i_goal - rgd->rd_data0; - else - goal = rgd->rd_last_alloc; - - blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, &bi); - - /* Since all blocks are reserved in advance, this shouldn't - happen */ - if (blk == BFITNOENT) { - printk(KERN_WARNING "BFITNOENT, nblocks=%u\n", - *nblocks); - printk(KERN_WARNING "FULL=%d\n", - test_bit(GBF_FULL, &rgd->rd_bits->bi_flags)); - goto rgrp_error; - } + rgd = ip->i_rgd; - block = gfs2_alloc_extent(rgd, bi, blk, dinode, nblocks); + if (!dinode && rgrp_contains_block(rgd, ip->i_goal)) + goal = ip->i_goal - rgd->rd_data0; + else + goal = rgd->rd_last_alloc; + + blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, &bi); + + /* Since all blocks are reserved in advance, this shouldn't happen */ + if (blk == BFITNOENT) { + printk(KERN_WARNING "BFITNOENT, nblocks=%u\n", *nblocks); + printk(KERN_WARNING "FULL=%d\n", + test_bit(GBF_FULL, &rgd->rd_bits->bi_flags)); + goto rgrp_error; } + + block = gfs2_alloc_extent(rgd, bi, blk, dinode, nblocks); +found_blocks: ndata = *nblocks; if (dinode) ndata--; -- cgit v1.2.3 From 71f890f7f758f340215d48fed5223f9cce05b652 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Mon, 30 Jul 2012 14:53:19 +0100 Subject: GFS2: Remove rs_requested field from reservations The rs_requested field is left over from the original allocation code, however this should have been a parameter passed to the various functions from gfs2_inplace_reserve() and not a member of the reservation structure as the value is not required after the initial allocation. This also helps simplify the code since we no longer need to set the rs_requested to zero. Also the gfs2_inplace_release() function can also be simplified since the reservation structure will always be defined when it is called, and the only remaining task is to unlock the rgrp if required. It can also now be called unconditionally too, resulting in a further simplification. Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 35 ++++++++--------------------------- 1 file changed, 8 insertions(+), 27 deletions(-) (limited to 'fs/gfs2/rgrp.c') diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index c9ed814eeb6..a2b43bb8349 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -539,7 +539,6 @@ static void __rs_deltree(struct gfs2_blkreserv *rs) E.g. We can't set rs_rgd to NULL because the rgd glock is held and dequeued through this pointer. Can't: atomic_set(&rs->rs_sizehint, 0); - Can't: rs->rs_requested = 0; Can't: rs->rs_rgd = NULL;*/ rs->rs_bi = NULL; rs->rs_biblk = 0; @@ -1350,7 +1349,7 @@ static u32 unclaimed_blocks(struct gfs2_rgrpd *rgd) * Returns: 0 if successful or BFITNOENT if there isn't enough free space */ -static int rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) +static int rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, unsigned requested) { struct gfs2_bitmap *bi = rgd->rd_bits; const u32 length = rgd->rd_length; @@ -1422,8 +1421,7 @@ do_search: what we can. If there's not enough, keep looking. */ if (nonzero == NULL) rsv_bytes = search_bytes; - else if ((nonzero - ptr) * GFS2_NBBY >= - ip->i_res->rs_requested) + else if ((nonzero - ptr) * GFS2_NBBY >= requested) rsv_bytes = (nonzero - ptr); if (rsv_bytes) { @@ -1461,17 +1459,16 @@ skip: * Returns: 1 on success (it fits), 0 on failure (it doesn't fit) */ -static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) +static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, + unsigned requested) { - struct gfs2_blkreserv *rs = ip->i_res; - if (rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR)) return 0; /* Look for a multi-block reservation. */ if (unclaimed_blocks(rgd) >= RGRP_RSRV_MINBLKS && - rg_mblk_search(rgd, ip) != BFITNOENT) + rg_mblk_search(rgd, ip, requested) != BFITNOENT) return 1; - if (unclaimed_blocks(rgd) >= rs->rs_requested) + if (unclaimed_blocks(rgd) >= requested) return 1; return 0; @@ -1562,7 +1559,6 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) if (sdp->sd_args.ar_rgrplvb) flags |= GL_SKIP; - rs->rs_requested = requested; if (gfs2_assert_warn(sdp, requested)) { error = -EINVAL; goto out; @@ -1606,7 +1602,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) case 0: if (gfs2_rs_active(rs)) { if (unclaimed_blocks(rs->rs_rgd) + - rs->rs_free >= rs->rs_requested) { + rs->rs_free >= requested) { ip->i_rgd = rs->rs_rgd; return 0; } @@ -1616,7 +1612,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) and look for a suitable rgrp. */ gfs2_rs_deltree(rs); } - if (try_rgrp_fit(rs->rs_rgd, ip)) { + if (try_rgrp_fit(rs->rs_rgd, ip, requested)) { if (sdp->sd_args.ar_rgrplvb) gfs2_rgrp_bh_get(rs->rs_rgd); ip->i_rgd = rs->rs_rgd; @@ -1656,8 +1652,6 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) error = -ENOSPC; out: - if (error) - rs->rs_requested = 0; return error; } @@ -1672,15 +1666,8 @@ void gfs2_inplace_release(struct gfs2_inode *ip) { struct gfs2_blkreserv *rs = ip->i_res; - if (!rs) - return; - - if (!rs->rs_free) - gfs2_rs_deltree(rs); - if (rs->rs_rgd_gh.gh_gl) gfs2_glock_dq_uninit(&rs->rs_rgd_gh); - rs->rs_requested = 0; } /** @@ -2021,12 +2008,6 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, int error; struct gfs2_bitmap *bi; - /* Only happens if there is a bug in gfs2, return something distinctive - * to ensure that it is noticed. - */ - if (ip->i_res->rs_requested == 0) - return -ECANCELED; - /* If we have a reservation, claim blocks from it. */ if (gfs2_rs_active(ip->i_res)) { BUG_ON(!ip->i_res->rs_free); -- cgit v1.2.3 From 4a993fb1503d11496974bd86c0b7123f63d9c8a2 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 31 Jul 2012 15:21:20 +0100 Subject: GFS2: Add structure to contain rgrp, bitmap, offset tuple This patch introduces a new structure, gfs2_rbm, which is a tuple of a resource group, a bitmap within the resource group and an offset within that bitmap. This is designed to make manipulating these sets of variables easier. There is also a new helper function which converts this representation back to a disk block address. In addition, the rbtree nodes which are used for the reservations were not being correctly initialised, which is now fixed. Also, the tracing was not passing through the inode where it should have been. That is mostly fixed aside from one corner case. This needs to be revisited since there can also be a NULL rgrp in some cases which results in the device being incorrect in the trace. This is intended to be the first step towards cleaning up some of the allocation code, and some further bug fixes. Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 175 ++++++++++++++++++++++++++------------------------------- 1 file changed, 81 insertions(+), 94 deletions(-) (limited to 'fs/gfs2/rgrp.c') diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index a2b43bb8349..eaa41885a00 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -192,7 +192,7 @@ static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state) */ static inline int rs_cmp(u64 blk, u32 len, struct gfs2_blkreserv *rs) { - u64 startblk = gfs2_rs_startblk(rs); + u64 startblk = gfs2_rbm_to_block(&rs->rs_rbm); if (blk >= startblk + rs->rs_free) return 1; @@ -487,6 +487,8 @@ int gfs2_rs_alloc(struct gfs2_inode *ip) if (!res) error = -ENOMEM; + rb_init_node(&res->rs_node); + down_write(&ip->i_rw_mutex); if (ip->i_res) kmem_cache_free(gfs2_rsrv_cachep, res); @@ -499,8 +501,8 @@ int gfs2_rs_alloc(struct gfs2_inode *ip) static void dump_rs(struct seq_file *seq, struct gfs2_blkreserv *rs) { gfs2_print_dbg(seq, " r: %llu s:%llu b:%u f:%u\n", - rs->rs_rgd->rd_addr, gfs2_rs_startblk(rs), rs->rs_biblk, - rs->rs_free); + rs->rs_rbm.rgd->rd_addr, gfs2_rbm_to_block(&rs->rs_rbm), + rs->rs_rbm.offset, rs->rs_free); } /** @@ -508,40 +510,28 @@ static void dump_rs(struct seq_file *seq, struct gfs2_blkreserv *rs) * @rs: The reservation to remove * */ -static void __rs_deltree(struct gfs2_blkreserv *rs) +static void __rs_deltree(struct gfs2_inode *ip, struct gfs2_blkreserv *rs) { struct gfs2_rgrpd *rgd; if (!gfs2_rs_active(rs)) return; - rgd = rs->rs_rgd; - /* We can't do this: The reason is that when the rgrp is invalidated, - it's in the "middle" of acquiring the glock, but the HOLDER bit - isn't set yet: - BUG_ON(!gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl));*/ - trace_gfs2_rs(NULL, rs, TRACE_RS_TREEDEL); - - if (!RB_EMPTY_ROOT(&rgd->rd_rstree)) - rb_erase(&rs->rs_node, &rgd->rd_rstree); + rgd = rs->rs_rbm.rgd; + trace_gfs2_rs(ip, rs, TRACE_RS_TREEDEL); + rb_erase(&rs->rs_node, &rgd->rd_rstree); + rb_init_node(&rs->rs_node); BUG_ON(!rgd->rd_rs_cnt); rgd->rd_rs_cnt--; if (rs->rs_free) { /* return reserved blocks to the rgrp and the ip */ - BUG_ON(rs->rs_rgd->rd_reserved < rs->rs_free); - rs->rs_rgd->rd_reserved -= rs->rs_free; + BUG_ON(rs->rs_rbm.rgd->rd_reserved < rs->rs_free); + rs->rs_rbm.rgd->rd_reserved -= rs->rs_free; rs->rs_free = 0; - clear_bit(GBF_FULL, &rs->rs_bi->bi_flags); + clear_bit(GBF_FULL, &rs->rs_rbm.bi->bi_flags); smp_mb__after_clear_bit(); } - /* We can't change any of the step 1 or step 2 components of the rs. - E.g. We can't set rs_rgd to NULL because the rgd glock is held and - dequeued through this pointer. - Can't: atomic_set(&rs->rs_sizehint, 0); - Can't: rs->rs_rgd = NULL;*/ - rs->rs_bi = NULL; - rs->rs_biblk = 0; } /** @@ -549,17 +539,16 @@ static void __rs_deltree(struct gfs2_blkreserv *rs) * @rs: The reservation to remove * */ -void gfs2_rs_deltree(struct gfs2_blkreserv *rs) +void gfs2_rs_deltree(struct gfs2_inode *ip, struct gfs2_blkreserv *rs) { struct gfs2_rgrpd *rgd; - if (!gfs2_rs_active(rs)) - return; - - rgd = rs->rs_rgd; - spin_lock(&rgd->rd_rsspin); - __rs_deltree(rs); - spin_unlock(&rgd->rd_rsspin); + rgd = rs->rs_rbm.rgd; + if (rgd) { + spin_lock(&rgd->rd_rsspin); + __rs_deltree(ip, rs); + spin_unlock(&rgd->rd_rsspin); + } } /** @@ -571,7 +560,7 @@ void gfs2_rs_delete(struct gfs2_inode *ip) { down_write(&ip->i_rw_mutex); if (ip->i_res) { - gfs2_rs_deltree(ip->i_res); + gfs2_rs_deltree(ip, ip->i_res); trace_gfs2_rs(ip, ip->i_res, TRACE_RS_DELETE); BUG_ON(ip->i_res->rs_free); kmem_cache_free(gfs2_rsrv_cachep, ip->i_res); @@ -596,7 +585,7 @@ static void return_all_reservations(struct gfs2_rgrpd *rgd) spin_lock(&rgd->rd_rsspin); while ((n = rb_first(&rgd->rd_rstree))) { rs = rb_entry(n, struct gfs2_blkreserv, rs_node); - __rs_deltree(rs); + __rs_deltree(NULL, rs); } spin_unlock(&rgd->rd_rsspin); } @@ -1284,7 +1273,7 @@ static struct gfs2_blkreserv *rs_insert(struct gfs2_bitmap *bi, struct rb_node **newn, *parent = NULL; int rc; struct gfs2_blkreserv *rs = ip->i_res; - struct gfs2_rgrpd *rgd = rs->rs_rgd; + struct gfs2_rgrpd *rgd = rs->rs_rbm.rgd; u64 fsblock = gfs2_bi2rgd_blk(bi, biblk) + rgd->rd_data0; spin_lock(&rgd->rd_rsspin); @@ -1312,8 +1301,8 @@ static struct gfs2_blkreserv *rs_insert(struct gfs2_bitmap *bi, /* Do our reservation work */ rs = ip->i_res; rs->rs_free = amount; - rs->rs_biblk = biblk; - rs->rs_bi = bi; + rs->rs_rbm.offset = biblk; + rs->rs_rbm.bi = bi; rb_link_node(&rs->rs_node, parent, newn); rb_insert_color(&rs->rs_node, &rgd->rd_rstree); @@ -1564,34 +1553,34 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) goto out; } if (gfs2_rs_active(rs)) { - begin = rs->rs_rgd; + begin = rs->rs_rbm.rgd; flags = 0; /* Yoda: Do or do not. There is no try */ } else if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) { - rs->rs_rgd = begin = ip->i_rgd; + rs->rs_rbm.rgd = begin = ip->i_rgd; } else { - rs->rs_rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1); + rs->rs_rbm.rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1); } - if (rs->rs_rgd == NULL) + if (rs->rs_rbm.rgd == NULL) return -EBADSLT; while (loops < 3) { rg_locked = 0; - if (gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl)) { + if (gfs2_glock_is_locked_by_me(rs->rs_rbm.rgd->rd_gl)) { rg_locked = 1; error = 0; } else if (!loops && !gfs2_rs_active(rs) && - rs->rs_rgd->rd_rs_cnt > RGRP_RSRV_MAX_CONTENDERS) { + rs->rs_rbm.rgd->rd_rs_cnt > RGRP_RSRV_MAX_CONTENDERS) { /* If the rgrp already is maxed out for contenders, we can eliminate it as a "first pass" without even requesting the rgrp glock. */ error = GLR_TRYFAILED; } else { - error = gfs2_glock_nq_init(rs->rs_rgd->rd_gl, + error = gfs2_glock_nq_init(rs->rs_rbm.rgd->rd_gl, LM_ST_EXCLUSIVE, flags, &rs->rs_rgd_gh); if (!error && sdp->sd_args.ar_rgrplvb) { - error = update_rgrp_lvb(rs->rs_rgd); + error = update_rgrp_lvb(rs->rs_rbm.rgd); if (error) { gfs2_glock_dq_uninit(&rs->rs_rgd_gh); return error; @@ -1601,36 +1590,36 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) switch (error) { case 0: if (gfs2_rs_active(rs)) { - if (unclaimed_blocks(rs->rs_rgd) + + if (unclaimed_blocks(rs->rs_rbm.rgd) + rs->rs_free >= requested) { - ip->i_rgd = rs->rs_rgd; + ip->i_rgd = rs->rs_rbm.rgd; return 0; } /* We have a multi-block reservation, but the rgrp doesn't have enough free blocks to satisfy the request. Free the reservation and look for a suitable rgrp. */ - gfs2_rs_deltree(rs); + gfs2_rs_deltree(ip, rs); } - if (try_rgrp_fit(rs->rs_rgd, ip, requested)) { + if (try_rgrp_fit(rs->rs_rbm.rgd, ip, requested)) { if (sdp->sd_args.ar_rgrplvb) - gfs2_rgrp_bh_get(rs->rs_rgd); - ip->i_rgd = rs->rs_rgd; + gfs2_rgrp_bh_get(rs->rs_rbm.rgd); + ip->i_rgd = rs->rs_rbm.rgd; return 0; } - if (rs->rs_rgd->rd_flags & GFS2_RDF_CHECK) { + if (rs->rs_rbm.rgd->rd_flags & GFS2_RDF_CHECK) { if (sdp->sd_args.ar_rgrplvb) - gfs2_rgrp_bh_get(rs->rs_rgd); - try_rgrp_unlink(rs->rs_rgd, &last_unlinked, + gfs2_rgrp_bh_get(rs->rs_rbm.rgd); + try_rgrp_unlink(rs->rs_rbm.rgd, &last_unlinked, ip->i_no_addr); } if (!rg_locked) gfs2_glock_dq_uninit(&rs->rs_rgd_gh); /* fall through */ case GLR_TRYFAILED: - rs->rs_rgd = gfs2_rgrpd_get_next(rs->rs_rgd); - rs->rs_rgd = rs->rs_rgd ? : begin; /* if NULL, wrap */ - if (rs->rs_rgd != begin) /* If we didn't wrap */ + rs->rs_rbm.rgd = gfs2_rgrpd_get_next(rs->rs_rbm.rgd); + rs->rs_rbm.rgd = rs->rs_rbm.rgd ? : begin; /* if NULL, wrap */ + if (rs->rs_rbm.rgd != begin) /* If we didn't wrap */ break; flags &= ~LM_FLAG_TRY; @@ -1776,11 +1765,11 @@ do_search: if (rs == NULL) break; - BUG_ON(rs->rs_bi != bi); + BUG_ON(rs->rs_rbm.bi != bi); biblk = BFITNOENT; /* This should jump to the first block after the reservation. */ - goal = rs->rs_biblk + rs->rs_free; + goal = rs->rs_rbm.offset + rs->rs_free; if (goal >= bi->bi_len * GFS2_NBBY) break; } @@ -1805,9 +1794,7 @@ skip: /** * gfs2_alloc_extent - allocate an extent from a given bitmap - * @rgd: the resource group descriptor - * @bi: the bitmap within the rgrp - * @blk: the block within the bitmap + * @rbm: the resource group information * @dinode: TRUE if the first block we allocate is for a dinode * @n: The extent length * @@ -1815,9 +1802,12 @@ skip: * Set the found bits to @new_state to change block's allocation state. * Returns: starting block number of the extent (fs scope) */ -static u64 gfs2_alloc_extent(struct gfs2_rgrpd *rgd, struct gfs2_bitmap *bi, - u32 blk, bool dinode, unsigned int *n) +static u64 gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode, + unsigned int *n) { + struct gfs2_rgrpd *rgd = rbm->rgd; + struct gfs2_bitmap *bi = rbm->bi; + u32 blk = rbm->offset; const unsigned int elen = *n; u32 goal, rgblk; const u8 *buffer = NULL; @@ -1956,21 +1946,21 @@ static u64 claim_reserved_blks(struct gfs2_inode *ip, bool dinode, unsigned int *nblocks) { struct gfs2_blkreserv *rs = ip->i_res; - struct gfs2_rgrpd *rgd = rs->rs_rgd; + struct gfs2_rgrpd *rgd = rs->rs_rbm.rgd; struct gfs2_bitmap *bi; - u64 start_block = gfs2_rs_startblk(rs); + u64 start_block = gfs2_rbm_to_block(&rs->rs_rbm); const unsigned int elen = *nblocks; - bi = rs->rs_bi; + bi = rs->rs_rbm.bi; gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); for (*nblocks = 0; *nblocks < elen && rs->rs_free; (*nblocks)++) { if (gfs2_testbit(rgd, bi->bi_bh->b_data + bi->bi_offset, - bi->bi_len, rs->rs_biblk) != GFS2_BLKST_FREE) + bi->bi_len, rs->rs_rbm.offset) != GFS2_BLKST_FREE) break; - gfs2_setbit(rgd, bi->bi_clone, bi, rs->rs_biblk, + gfs2_setbit(rgd, bi->bi_clone, bi, rs->rs_rbm.offset, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); - rs->rs_biblk++; + rs->rs_rbm.offset++; rs->rs_free--; BUG_ON(!rgd->rd_reserved); @@ -1980,7 +1970,7 @@ static u64 claim_reserved_blks(struct gfs2_inode *ip, bool dinode, trace_gfs2_rs(ip, rs, TRACE_RS_CLAIM); if (!rs->rs_free || *nblocks != elen) - gfs2_rs_deltree(rs); + gfs2_rs_deltree(ip, rs); return start_block; } @@ -2001,40 +1991,37 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct buffer_head *dibh; - struct gfs2_rgrpd *rgd; + struct gfs2_rbm rbm = { .rgd = ip->i_rgd, }; unsigned int ndata; - u32 goal, blk; /* block, within the rgrp scope */ + u32 goal; /* block, within the rgrp scope */ u64 block; /* block, within the file system scope */ int error; - struct gfs2_bitmap *bi; /* If we have a reservation, claim blocks from it. */ if (gfs2_rs_active(ip->i_res)) { BUG_ON(!ip->i_res->rs_free); - rgd = ip->i_res->rs_rgd; + rbm.rgd = ip->i_res->rs_rbm.rgd; block = claim_reserved_blks(ip, dinode, nblocks); if (*nblocks) goto found_blocks; } - rgd = ip->i_rgd; - - if (!dinode && rgrp_contains_block(rgd, ip->i_goal)) - goal = ip->i_goal - rgd->rd_data0; + if (!dinode && rgrp_contains_block(rbm.rgd, ip->i_goal)) + goal = ip->i_goal - rbm.rgd->rd_data0; else - goal = rgd->rd_last_alloc; + goal = rbm.rgd->rd_last_alloc; - blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, &bi); + rbm.offset = rgblk_search(rbm.rgd, goal, GFS2_BLKST_FREE, &rbm.bi); /* Since all blocks are reserved in advance, this shouldn't happen */ - if (blk == BFITNOENT) { + if (rbm.offset == BFITNOENT) { printk(KERN_WARNING "BFITNOENT, nblocks=%u\n", *nblocks); printk(KERN_WARNING "FULL=%d\n", - test_bit(GBF_FULL, &rgd->rd_bits->bi_flags)); + test_bit(GBF_FULL, &rbm.rgd->rd_bits->bi_flags)); goto rgrp_error; } - block = gfs2_alloc_extent(rgd, bi, blk, dinode, nblocks); + block = gfs2_alloc_extent(&rbm, dinode, nblocks); found_blocks: ndata = *nblocks; if (dinode) @@ -2052,22 +2039,22 @@ found_blocks: brelse(dibh); } } - if (rgd->rd_free < *nblocks) { + if (rbm.rgd->rd_free < *nblocks) { printk(KERN_WARNING "nblocks=%u\n", *nblocks); goto rgrp_error; } - rgd->rd_free -= *nblocks; + rbm.rgd->rd_free -= *nblocks; if (dinode) { - rgd->rd_dinodes++; - *generation = rgd->rd_igeneration++; + rbm.rgd->rd_dinodes++; + *generation = rbm.rgd->rd_igeneration++; if (*generation == 0) - *generation = rgd->rd_igeneration++; + *generation = rbm.rgd->rd_igeneration++; } - gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); - gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); - gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); + gfs2_trans_add_bh(rbm.rgd->rd_gl, rbm.rgd->rd_bits[0].bi_bh, 1); + gfs2_rgrp_out(rbm.rgd, rbm.rgd->rd_bits[0].bi_bh->b_data); + gfs2_rgrp_ondisk2lvb(rbm.rgd->rd_rgl, rbm.rgd->rd_bits[0].bi_bh->b_data); gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0); if (dinode) @@ -2081,14 +2068,14 @@ found_blocks: gfs2_quota_change(ip, ndata, ip->i_inode.i_uid, ip->i_inode.i_gid); - rgd->rd_free_clone -= *nblocks; - trace_gfs2_block_alloc(ip, rgd, block, *nblocks, + rbm.rgd->rd_free_clone -= *nblocks; + trace_gfs2_block_alloc(ip, rbm.rgd, block, *nblocks, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); *bn = block; return 0; rgrp_error: - gfs2_rgrp_error(rgd); + gfs2_rgrp_error(rbm.rgd); return -EIO; } -- cgit v1.2.3 From 5b924ae2dcb1cc5e78445a0cedb5a3673bb5ad8a Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Wed, 1 Aug 2012 20:35:05 +0100 Subject: GFS2: Replace rgblk_search with gfs2_rbm_find This is part of a series of patches which are introducing the gfs2_rbm structure throughout the block allocation code. The main aim of this part is to create a search function which can deal directly with struct gfs2_rbm. In this case it specifies the initial position at which to start the search and also the point at which the search terminates. The net result of this is to clean up the search code and make it rather more readable, and the various possible exceptions which may occur during the search are partitioned into their own functions. There are some bug fixes too. We should not be checking the reservations while allocating extents - the time for that is when we are searching for where to put the extent, not when we've already made that decision. Also, rgblk_search had two uses, and in only one of those cases did it make sense to check for reservations. This is fixed in the new gfs2_rbm_find function, which has a cleaner interface. The reservation checking has been improved by always checking for contiguous reservations, and returning the first free block after all contiguous reservations. This is done under the spin lock to ensure consistancy of the tree. The allocation of extents is now in all cases done by the existing allocation code, and if there is an active reservation, that is updated after the fact. Again this is done under the spin lock, since it entails changing the lookup key for the reservation in question. Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 460 +++++++++++++++++++++++++++++++-------------------------- 1 file changed, 250 insertions(+), 210 deletions(-) (limited to 'fs/gfs2/rgrp.c') diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index eaa41885a00..bd3b926949d 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -67,10 +67,6 @@ static const char valid_change[16] = { 1, 0, 0, 0 }; -static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, - unsigned char old_state, - struct gfs2_bitmap **rbi); - /** * gfs2_setbit - Set a bit in the bitmaps * @rgd: the resource group descriptor @@ -201,36 +197,6 @@ static inline int rs_cmp(u64 blk, u32 len, struct gfs2_blkreserv *rs) return 0; } -/** - * rs_find - Find a rgrp multi-block reservation that contains a given block - * @rgd: The rgrp - * @rgblk: The block we're looking for, relative to the rgrp - */ -static struct gfs2_blkreserv *rs_find(struct gfs2_rgrpd *rgd, u32 rgblk) -{ - struct rb_node **newn; - int rc; - u64 fsblk = rgblk + rgd->rd_data0; - - spin_lock(&rgd->rd_rsspin); - newn = &rgd->rd_rstree.rb_node; - while (*newn) { - struct gfs2_blkreserv *cur = - rb_entry(*newn, struct gfs2_blkreserv, rs_node); - rc = rs_cmp(fsblk, 1, cur); - if (rc < 0) - newn = &((*newn)->rb_left); - else if (rc > 0) - newn = &((*newn)->rb_right); - else { - spin_unlock(&rgd->rd_rsspin); - return cur; - } - } - spin_unlock(&rgd->rd_rsspin); - return NULL; -} - /** * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing * a block in a given allocation state. @@ -1306,9 +1272,6 @@ static struct gfs2_blkreserv *rs_insert(struct gfs2_bitmap *bi, rb_link_node(&rs->rs_node, parent, newn); rb_insert_color(&rs->rs_node, &rgd->rd_rstree); - /* Do our inode accounting for the reservation */ - /*BUG_ON(!gfs2_glock_is_locked_by_me(ip->i_gl));*/ - /* Do our rgrp accounting for the reservation */ rgd->rd_reserved += amount; /* blocks reserved */ rgd->rd_rs_cnt++; /* number of in-tree reservations */ @@ -1463,6 +1426,199 @@ static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, return 0; } +/** + * gfs2_next_unreserved_block - Return next block that is not reserved + * @rgd: The resource group + * @block: The starting block + * @ip: Ignore any reservations for this inode + * + * If the block does not appear in any reservation, then return the + * block number unchanged. If it does appear in the reservation, then + * keep looking through the tree of reservations in order to find the + * first block number which is not reserved. + */ + +static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block, + const struct gfs2_inode *ip) +{ + struct gfs2_blkreserv *rs; + struct rb_node *n; + int rc; + + spin_lock(&rgd->rd_rsspin); + n = rb_first(&rgd->rd_rstree); + while (n) { + rs = rb_entry(n, struct gfs2_blkreserv, rs_node); + rc = rs_cmp(block, 1, rs); + if (rc < 0) + n = n->rb_left; + else if (rc > 0) + n = n->rb_right; + else + break; + } + + if (n) { + while ((rs_cmp(block, 1, rs) == 0) && (ip->i_res != rs)) { + block = gfs2_rbm_to_block(&rs->rs_rbm) + rs->rs_free; + n = rb_next(&rs->rs_node); + if (n == NULL) + break; + rs = rb_entry(n, struct gfs2_blkreserv, rs_node); + } + } + + spin_unlock(&rgd->rd_rsspin); + return block; +} + +/** + * gfs2_rbm_from_block - Set the rbm based upon rgd and block number + * @rbm: The rbm with rgd already set correctly + * @block: The block number (filesystem relative) + * + * This sets the bi and offset members of an rbm based on a + * resource group and a filesystem relative block number. The + * resource group must be set in the rbm on entry, the bi and + * offset members will be set by this function. + * + * Returns: 0 on success, or an error code + */ + +static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block) +{ + u64 rblock = block - rbm->rgd->rd_data0; + u32 goal = (u32)rblock; + int x; + + if (WARN_ON_ONCE(rblock > UINT_MAX)) + return -EINVAL; + + for (x = 0; x < rbm->rgd->rd_length; x++) { + rbm->bi = rbm->rgd->rd_bits + x; + if (goal < (rbm->bi->bi_start + rbm->bi->bi_len) * GFS2_NBBY) { + rbm->offset = goal - (rbm->bi->bi_start * GFS2_NBBY); + return 0; + } + } + + return -E2BIG; +} + +/** + * gfs2_reservation_check_and_update - Check for reservations during block alloc + * @rbm: The current position in the resource group + * + * This checks the current position in the rgrp to see whether there is + * a reservation covering this block. If not then this function is a + * no-op. If there is, then the position is moved to the end of the + * contiguous reservation(s) so that we are pointing at the first + * non-reserved block. + * + * Returns: 0 if no reservation, 1 if @rbm has changed, otherwise an error + */ + +static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm, + const struct gfs2_inode *ip) +{ + u64 block = gfs2_rbm_to_block(rbm); + u64 nblock; + int ret; + + nblock = gfs2_next_unreserved_block(rbm->rgd, block, ip); + if (nblock == block) + return 0; + ret = gfs2_rbm_from_block(rbm, nblock); + if (ret < 0) + return ret; + return 1; +} + +/** + * gfs2_rbm_find - Look for blocks of a particular state + * @rbm: Value/result starting position and final position + * @state: The state which we want to find + * @ip: If set, check for reservations + * @nowrap: Stop looking at the end of the rgrp, rather than wrapping + * around until we've reached the starting point. + * + * Side effects: + * - If looking for free blocks, we set GBF_FULL on each bitmap which + * has no free blocks in it. + * + * Returns: 0 on success, -ENOSPC if there is no block of the requested state + */ + +static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, + const struct gfs2_inode *ip, bool nowrap) +{ + struct buffer_head *bh; + struct gfs2_bitmap *initial_bi; + u32 initial_offset; + u32 offset; + u8 *buffer; + int index; + int n = 0; + int iters = rbm->rgd->rd_length; + int ret; + + /* If we are not starting at the beginning of a bitmap, then we + * need to add one to the bitmap count to ensure that we search + * the starting bitmap twice. + */ + if (rbm->offset != 0) + iters++; + + while(1) { + if (test_bit(GBF_FULL, &rbm->bi->bi_flags) && + (state == GFS2_BLKST_FREE)) + goto next_bitmap; + + bh = rbm->bi->bi_bh; + buffer = bh->b_data + rbm->bi->bi_offset; + WARN_ON(!buffer_uptodate(bh)); + if (state != GFS2_BLKST_UNLINKED && rbm->bi->bi_clone) + buffer = rbm->bi->bi_clone + rbm->bi->bi_offset; +find_next: + initial_offset = rbm->offset; + offset = gfs2_bitfit(buffer, rbm->bi->bi_len, rbm->offset, state); + if (offset == BFITNOENT) + goto bitmap_full; + rbm->offset = offset; + if (ip == NULL) + return 0; + + initial_bi = rbm->bi; + ret = gfs2_reservation_check_and_update(rbm, ip); + if (ret == 0) + return 0; + if (ret > 0) { + n += (rbm->bi - initial_bi); + goto find_next; + } + return ret; + +bitmap_full: /* Mark bitmap as full and fall through */ + if ((state == GFS2_BLKST_FREE) && initial_offset == 0) + set_bit(GBF_FULL, &rbm->bi->bi_flags); + +next_bitmap: /* Find next bitmap in the rgrp */ + rbm->offset = 0; + index = rbm->bi - rbm->rgd->rd_bits; + index++; + if (index == rbm->rgd->rd_length) + index = 0; + rbm->bi = &rbm->rgd->rd_bits[index]; + if ((index == 0) && nowrap) + break; + n++; + if (n >= iters) + break; + } + + return -ENOSPC; +} + /** * try_rgrp_unlink - Look for any unlinked, allocated, but unused inodes * @rgd: The rgrp @@ -1475,34 +1631,33 @@ static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip) { - u32 goal = 0, block; - u64 no_addr; + u64 block; struct gfs2_sbd *sdp = rgd->rd_sbd; struct gfs2_glock *gl; struct gfs2_inode *ip; int error; int found = 0; - struct gfs2_bitmap *bi; + struct gfs2_rbm rbm = { .rgd = rgd, .bi = rgd->rd_bits, .offset = 0 }; - while (goal < rgd->rd_data) { + while (1) { down_write(&sdp->sd_log_flush_lock); - block = rgblk_search(rgd, goal, GFS2_BLKST_UNLINKED, &bi); + error = gfs2_rbm_find(&rbm, GFS2_BLKST_UNLINKED, NULL, true); up_write(&sdp->sd_log_flush_lock); - if (block == BFITNOENT) + if (error == -ENOSPC) + break; + if (WARN_ON_ONCE(error)) break; - block = gfs2_bi2rgd_blk(bi, block); - /* rgblk_search can return a block < goal, so we need to - keep it marching forward. */ - no_addr = block + rgd->rd_data0; - goal = max(block + 1, goal + 1); - if (*last_unlinked != NO_BLOCK && no_addr <= *last_unlinked) + block = gfs2_rbm_to_block(&rbm); + if (gfs2_rbm_from_block(&rbm, block + 1)) + break; + if (*last_unlinked != NO_BLOCK && block <= *last_unlinked) continue; - if (no_addr == skip) + if (block == skip) continue; - *last_unlinked = no_addr; + *last_unlinked = block; - error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &gl); + error = gfs2_glock_get(sdp, block, &gfs2_inode_glops, CREATE, &gl); if (error) continue; @@ -1692,105 +1847,6 @@ static unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block) return type; } -/** - * rgblk_search - find a block in @state - * @rgd: the resource group descriptor - * @goal: the goal block within the RG (start here to search for avail block) - * @state: GFS2_BLKST_XXX the before-allocation state to find - * @rbi: address of the pointer to the bitmap containing the block found - * - * Walk rgrp's bitmap to find bits that represent a block in @state. - * - * This function never fails, because we wouldn't call it unless we - * know (from reservation results, etc.) that a block is available. - * - * Scope of @goal is just within rgrp, not the whole filesystem. - * Scope of @returned block is just within bitmap, not the whole filesystem. - * - * Returns: the block number found relative to the bitmap rbi - */ - -static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, unsigned char state, - struct gfs2_bitmap **rbi) -{ - struct gfs2_bitmap *bi = NULL; - const u32 length = rgd->rd_length; - u32 biblk = BFITNOENT; - unsigned int buf, x; - const u8 *buffer = NULL; - - *rbi = NULL; - /* Find bitmap block that contains bits for goal block */ - for (buf = 0; buf < length; buf++) { - bi = rgd->rd_bits + buf; - /* Convert scope of "goal" from rgrp-wide to within found bit block */ - if (goal < (bi->bi_start + bi->bi_len) * GFS2_NBBY) { - goal -= bi->bi_start * GFS2_NBBY; - goto do_search; - } - } - buf = 0; - goal = 0; - -do_search: - /* Search (up to entire) bitmap in this rgrp for allocatable block. - "x <= length", instead of "x < length", because we typically start - the search in the middle of a bit block, but if we can't find an - allocatable block anywhere else, we want to be able wrap around and - search in the first part of our first-searched bit block. */ - for (x = 0; x <= length; x++) { - bi = rgd->rd_bits + buf; - - if (test_bit(GBF_FULL, &bi->bi_flags) && - (state == GFS2_BLKST_FREE)) - goto skip; - - /* The GFS2_BLKST_UNLINKED state doesn't apply to the clone - bitmaps, so we must search the originals for that. */ - buffer = bi->bi_bh->b_data + bi->bi_offset; - WARN_ON(!buffer_uptodate(bi->bi_bh)); - if (state != GFS2_BLKST_UNLINKED && bi->bi_clone) - buffer = bi->bi_clone + bi->bi_offset; - - while (1) { - struct gfs2_blkreserv *rs; - u32 rgblk; - - biblk = gfs2_bitfit(buffer, bi->bi_len, goal, state); - if (biblk == BFITNOENT) - break; - /* Check if this block is reserved() */ - rgblk = gfs2_bi2rgd_blk(bi, biblk); - rs = rs_find(rgd, rgblk); - if (rs == NULL) - break; - - BUG_ON(rs->rs_rbm.bi != bi); - biblk = BFITNOENT; - /* This should jump to the first block after the - reservation. */ - goal = rs->rs_rbm.offset + rs->rs_free; - if (goal >= bi->bi_len * GFS2_NBBY) - break; - } - if (biblk != BFITNOENT) - break; - - if ((goal == 0) && (state == GFS2_BLKST_FREE)) - set_bit(GBF_FULL, &bi->bi_flags); - - /* Try next bitmap block (wrap back to rgrp header if at end) */ -skip: - buf++; - buf %= length; - goal = 0; - } - - if (biblk != BFITNOENT) - *rbi = bi; - - return biblk; -} /** * gfs2_alloc_extent - allocate an extent from a given bitmap @@ -1809,9 +1865,8 @@ static u64 gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode, struct gfs2_bitmap *bi = rbm->bi; u32 blk = rbm->offset; const unsigned int elen = *n; - u32 goal, rgblk; + u32 goal; const u8 *buffer = NULL; - struct gfs2_blkreserv *rs; *n = 0; buffer = bi->bi_bh->b_data + bi->bi_offset; @@ -1824,10 +1879,6 @@ static u64 gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode, goal++; if (goal >= (bi->bi_len * GFS2_NBBY)) break; - rgblk = gfs2_bi2rgd_blk(bi, goal); - rs = rs_find(rgd, rgblk); - if (rs) /* Oops, we bumped into someone's reservation */ - break; if (gfs2_testbit(rgd, buffer, bi->bi_len, goal) != GFS2_BLKST_FREE) break; @@ -1933,46 +1984,41 @@ static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd) } /** - * claim_reserved_blks - Claim previously reserved blocks - * @ip: the inode that's claiming the reservation - * @dinode: 1 if this block is a dinode block, otherwise data block - * @nblocks: desired extent length + * gfs2_adjust_reservation - Adjust (or remove) a reservation after allocation + * @ip: The inode we have just allocated blocks for + * @rbm: The start of the allocated blocks + * @len: The extent length * - * Lay claim to previously reserved blocks. - * Returns: Starting block number of the blocks claimed. - * Sets *nblocks to the actual extent length allocated. + * Adjusts a reservation after an allocation has taken place. If the + * reservation does not match the allocation, or if it is now empty + * then it is removed. */ -static u64 claim_reserved_blks(struct gfs2_inode *ip, bool dinode, - unsigned int *nblocks) + +static void gfs2_adjust_reservation(struct gfs2_inode *ip, + const struct gfs2_rbm *rbm, unsigned len) { struct gfs2_blkreserv *rs = ip->i_res; - struct gfs2_rgrpd *rgd = rs->rs_rbm.rgd; - struct gfs2_bitmap *bi; - u64 start_block = gfs2_rbm_to_block(&rs->rs_rbm); - const unsigned int elen = *nblocks; - - bi = rs->rs_rbm.bi; - gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); + struct gfs2_rgrpd *rgd = rbm->rgd; + unsigned rlen; + u64 block; + int ret; - for (*nblocks = 0; *nblocks < elen && rs->rs_free; (*nblocks)++) { - if (gfs2_testbit(rgd, bi->bi_bh->b_data + bi->bi_offset, - bi->bi_len, rs->rs_rbm.offset) != GFS2_BLKST_FREE) - break; - gfs2_setbit(rgd, bi->bi_clone, bi, rs->rs_rbm.offset, - dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); - rs->rs_rbm.offset++; - rs->rs_free--; - - BUG_ON(!rgd->rd_reserved); - rgd->rd_reserved--; - dinode = false; + spin_lock(&rgd->rd_rsspin); + if (gfs2_rs_active(rs)) { + if (gfs2_rbm_eq(&rs->rs_rbm, rbm)) { + block = gfs2_rbm_to_block(rbm); + ret = gfs2_rbm_from_block(&rs->rs_rbm, block + len); + rlen = min(rs->rs_free, len); + rs->rs_free -= rlen; + rgd->rd_reserved -= rlen; + trace_gfs2_rs(ip, rs, TRACE_RS_CLAIM); + if (rs->rs_free && !ret) + goto out; + } + __rs_deltree(ip, rs); } - - trace_gfs2_rs(ip, rs, TRACE_RS_CLAIM); - if (!rs->rs_free || *nblocks != elen) - gfs2_rs_deltree(ip, rs); - - return start_block; +out: + spin_unlock(&rgd->rd_rsspin); } /** @@ -1993,36 +2039,30 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, struct buffer_head *dibh; struct gfs2_rbm rbm = { .rgd = ip->i_rgd, }; unsigned int ndata; - u32 goal; /* block, within the rgrp scope */ + u64 goal; u64 block; /* block, within the file system scope */ int error; - /* If we have a reservation, claim blocks from it. */ - if (gfs2_rs_active(ip->i_res)) { - BUG_ON(!ip->i_res->rs_free); - rbm.rgd = ip->i_res->rs_rbm.rgd; - block = claim_reserved_blks(ip, dinode, nblocks); - if (*nblocks) - goto found_blocks; - } - - if (!dinode && rgrp_contains_block(rbm.rgd, ip->i_goal)) - goal = ip->i_goal - rbm.rgd->rd_data0; + if (gfs2_rs_active(ip->i_res)) + goal = gfs2_rbm_to_block(&ip->i_res->rs_rbm); + else if (!dinode && rgrp_contains_block(rbm.rgd, ip->i_goal)) + goal = ip->i_goal; else - goal = rbm.rgd->rd_last_alloc; + goal = rbm.rgd->rd_last_alloc + rbm.rgd->rd_data0; - rbm.offset = rgblk_search(rbm.rgd, goal, GFS2_BLKST_FREE, &rbm.bi); + gfs2_rbm_from_block(&rbm, goal); + error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, ip, false); /* Since all blocks are reserved in advance, this shouldn't happen */ - if (rbm.offset == BFITNOENT) { - printk(KERN_WARNING "BFITNOENT, nblocks=%u\n", *nblocks); - printk(KERN_WARNING "FULL=%d\n", - test_bit(GBF_FULL, &rbm.rgd->rd_bits->bi_flags)); + if (error) { + fs_warn(sdp, "error=%d, nblocks=%u, full=%d\n", error, *nblocks, + test_bit(GBF_FULL, &rbm.rgd->rd_bits->bi_flags)); goto rgrp_error; } block = gfs2_alloc_extent(&rbm, dinode, nblocks); -found_blocks: + if (gfs2_rs_active(ip->i_res)) + gfs2_adjust_reservation(ip, &rbm, *nblocks); ndata = *nblocks; if (dinode) ndata--; -- cgit v1.2.3 From 3983903a712e74548fa08ef25d68e55b8e4349c6 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 3 Aug 2012 11:10:30 +0100 Subject: GFS2: Update gfs2_get_block_type() to use rbm Use the new gfs2_rbm_from_block() function to replace an open coded version of the same code. Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 25 ++++++------------------- 1 file changed, 6 insertions(+), 19 deletions(-) (limited to 'fs/gfs2/rgrp.c') diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index bd3b926949d..0c1be38f837 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1824,27 +1824,14 @@ void gfs2_inplace_release(struct gfs2_inode *ip) static unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block) { - struct gfs2_bitmap *bi = NULL; - u32 length, rgrp_block, buf_block; - unsigned int buf; - unsigned char type; - - length = rgd->rd_length; - rgrp_block = block - rgd->rd_data0; - - for (buf = 0; buf < length; buf++) { - bi = rgd->rd_bits + buf; - if (rgrp_block < (bi->bi_start + bi->bi_len) * GFS2_NBBY) - break; - } - - gfs2_assert(rgd->rd_sbd, buf < length); - buf_block = rgrp_block - bi->bi_start * GFS2_NBBY; + struct gfs2_rbm rbm = { .rgd = rgd, }; + int ret; - type = gfs2_testbit(rgd, bi->bi_bh->b_data + bi->bi_offset, - bi->bi_len, buf_block); + ret = gfs2_rbm_from_block(&rbm, block); + WARN_ON_ONCE(ret != 0); - return type; + return gfs2_testbit(rgd, rbm.bi->bi_bh->b_data + rbm.bi->bi_offset, + rbm.bi->bi_len, rbm.offset); } -- cgit v1.2.3 From 3b1d0b9d0b6f4b76293c8f30cc95aa946bd34150 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 3 Aug 2012 11:23:28 +0100 Subject: GFS2: Update rgblk_free() to use rbm Replace open coded version with a call to gfs2_rbm_from_block() Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 44 ++++++++++++++------------------------------ 1 file changed, 14 insertions(+), 30 deletions(-) (limited to 'fs/gfs2/rgrp.c') diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 0c1be38f837..06476b34a1b 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1890,46 +1890,30 @@ static u64 gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode, static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart, u32 blen, unsigned char new_state) { - struct gfs2_rgrpd *rgd; - struct gfs2_bitmap *bi = NULL; - u32 length, rgrp_blk, buf_blk; - unsigned int buf; + struct gfs2_rbm rbm; - rgd = gfs2_blk2rgrpd(sdp, bstart, 1); - if (!rgd) { + rbm.rgd = gfs2_blk2rgrpd(sdp, bstart, 1); + if (!rbm.rgd) { if (gfs2_consist(sdp)) fs_err(sdp, "block = %llu\n", (unsigned long long)bstart); return NULL; } - length = rgd->rd_length; - - rgrp_blk = bstart - rgd->rd_data0; - while (blen--) { - for (buf = 0; buf < length; buf++) { - bi = rgd->rd_bits + buf; - if (rgrp_blk < (bi->bi_start + bi->bi_len) * GFS2_NBBY) - break; + gfs2_rbm_from_block(&rbm, bstart); + bstart++; + if (!rbm.bi->bi_clone) { + rbm.bi->bi_clone = kmalloc(rbm.bi->bi_bh->b_size, + GFP_NOFS | __GFP_NOFAIL); + memcpy(rbm.bi->bi_clone + rbm.bi->bi_offset, + rbm.bi->bi_bh->b_data + rbm.bi->bi_offset, + rbm.bi->bi_len); } - - gfs2_assert(rgd->rd_sbd, buf < length); - - buf_blk = rgrp_blk - bi->bi_start * GFS2_NBBY; - rgrp_blk++; - - if (!bi->bi_clone) { - bi->bi_clone = kmalloc(bi->bi_bh->b_size, - GFP_NOFS | __GFP_NOFAIL); - memcpy(bi->bi_clone + bi->bi_offset, - bi->bi_bh->b_data + bi->bi_offset, - bi->bi_len); - } - gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); - gfs2_setbit(rgd, NULL, bi, buf_blk, new_state); + gfs2_trans_add_bh(rbm.rgd->rd_gl, rbm.bi->bi_bh, 1); + gfs2_setbit(rbm.rgd, NULL, rbm.bi, rbm.offset, new_state); } - return rgd; + return rbm.rgd; } /** -- cgit v1.2.3 From 24d634e8f3b43fe2eb7c7d66567de7aba8edc308 Mon Sep 17 00:00:00 2001 From: Michel Lespinasse Date: Sun, 5 Aug 2012 22:04:08 -0700 Subject: GFS2: Use RB_CLEAR_NODE() rather than rb_init_node() gfs2 calls RB_EMPTY_NODE() to check if nodes are not on an rbtree. The corresponding initialization function is RB_CLEAR_NODE(). rb_init_node() was never clearly defined and is going away. Signed-off-by: Michel Lespinasse Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/gfs2/rgrp.c') diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 06476b34a1b..7ce22d8c489 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -453,7 +453,7 @@ int gfs2_rs_alloc(struct gfs2_inode *ip) if (!res) error = -ENOMEM; - rb_init_node(&res->rs_node); + RB_CLEAR_NODE(&res->rs_node); down_write(&ip->i_rw_mutex); if (ip->i_res) @@ -486,7 +486,7 @@ static void __rs_deltree(struct gfs2_inode *ip, struct gfs2_blkreserv *rs) rgd = rs->rs_rbm.rgd; trace_gfs2_rs(ip, rs, TRACE_RS_TREEDEL); rb_erase(&rs->rs_node, &rgd->rd_rstree); - rb_init_node(&rs->rs_node); + RB_CLEAR_NODE(&rs->rs_node); BUG_ON(!rgd->rd_rs_cnt); rgd->rd_rs_cnt--; -- cgit v1.2.3 From 5d50d5324612d28c47b9361e5424f13a19c888cd Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 7 Aug 2012 13:47:12 +0100 Subject: GFS2: Fix case where reservation finished at end of rgrp One corner case which the original patch failed to take into account was when there is a reservation which ended such that the following block was one beyond the end of the rgrp in question. This extra test fixes that case. Signed-off-by: Steven Whitehouse Reported-by: Bob Peterson Tested-by: Bob Peterson --- fs/gfs2/rgrp.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'fs/gfs2/rgrp.c') diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 7ce22d8c489..c17029a92b8 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1596,6 +1596,12 @@ find_next: n += (rbm->bi - initial_bi); goto find_next; } + if (ret == -E2BIG) { + index = 0; + rbm->offset = 0; + n += (rbm->bi - initial_bi); + goto res_covered_end_of_rgrp; + } return ret; bitmap_full: /* Mark bitmap as full and fall through */ @@ -1608,6 +1614,7 @@ next_bitmap: /* Find next bitmap in the rgrp */ index++; if (index == rbm->rgd->rd_length) index = 0; +res_covered_end_of_rgrp: rbm->bi = &rbm->rgd->rd_bits[index]; if ((index == 0) && nowrap) break; -- cgit v1.2.3 From 8d8b752a0f55282498b918f6a28f87ee2bcf19c3 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Tue, 7 Aug 2012 13:28:17 -0400 Subject: GFS2: rbm code cleanup This patch fixes a few small rbm related things. First, it fixes a corner case where the rbm needs to switch bitmaps and wasn't adjusting its buffer pointer. Second, there's a white space issue fixed. Third, the logic in function gfs2_rbm_from_block was optimized a bit. Lastly, a check for goal block overflows was added to function gfs2_alloc_blocks. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'fs/gfs2/rgrp.c') diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index c17029a92b8..c26711882a6 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -467,7 +467,7 @@ int gfs2_rs_alloc(struct gfs2_inode *ip) static void dump_rs(struct seq_file *seq, struct gfs2_blkreserv *rs) { gfs2_print_dbg(seq, " r: %llu s:%llu b:%u f:%u\n", - rs->rs_rbm.rgd->rd_addr, gfs2_rbm_to_block(&rs->rs_rbm), + rs->rs_rbm.rgd->rd_addr, gfs2_rbm_to_block(&rs->rs_rbm), rs->rs_rbm.offset, rs->rs_free); } @@ -1493,16 +1493,18 @@ static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block) if (WARN_ON_ONCE(rblock > UINT_MAX)) return -EINVAL; + if (block >= rbm->rgd->rd_data0 + rbm->rgd->rd_data) + return -E2BIG; for (x = 0; x < rbm->rgd->rd_length; x++) { rbm->bi = rbm->rgd->rd_bits + x; if (goal < (rbm->bi->bi_start + rbm->bi->bi_len) * GFS2_NBBY) { rbm->offset = goal - (rbm->bi->bi_start * GFS2_NBBY); - return 0; + break; } } - return -E2BIG; + return 0; } /** @@ -1579,7 +1581,6 @@ static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, WARN_ON(!buffer_uptodate(bh)); if (state != GFS2_BLKST_UNLINKED && rbm->bi->bi_clone) buffer = rbm->bi->bi_clone + rbm->bi->bi_offset; -find_next: initial_offset = rbm->offset; offset = gfs2_bitfit(buffer, rbm->bi->bi_len, rbm->offset, state); if (offset == BFITNOENT) @@ -1594,7 +1595,7 @@ find_next: return 0; if (ret > 0) { n += (rbm->bi - initial_bi); - goto find_next; + goto next_iter; } if (ret == -E2BIG) { index = 0; @@ -1619,6 +1620,7 @@ res_covered_end_of_rgrp: if ((index == 0) && nowrap) break; n++; +next_iter: if (n >= iters) break; } @@ -2028,6 +2030,10 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, else goal = rbm.rgd->rd_last_alloc + rbm.rgd->rd_data0; + if ((goal < rbm.rgd->rd_data0) || + (goal >= rbm.rgd->rd_data0 + rbm.rgd->rd_data)) + rbm.rgd = gfs2_blk2rgrpd(sdp, goal, 1); + gfs2_rbm_from_block(&rbm, goal); error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, ip, false); -- cgit v1.2.3 From 29c05b205d4d30248982d5167bb142146db89bd3 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Thu, 9 Aug 2012 12:48:47 -0500 Subject: GFS2: Eliminate unnecessary check for state > 3 in bitfit Function gfs2_bitfit was checking for state > 3, but that's impossible since it is only called from rgblk_search, which receives only GFS2_BLKST_ constants. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'fs/gfs2/rgrp.c') diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index c26711882a6..47d2346575a 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -228,8 +228,6 @@ static u32 gfs2_bitfit(const u8 *buf, const unsigned int len, u64 mask = 0x5555555555555555ULL; u32 bit; - BUG_ON(state > 3); - /* Mask off bits we don't care about at the start of the search */ mask <<= spoint; tmp = gfs2_bit_search(ptr, mask, state); -- cgit v1.2.3 From c04a2ef3a8b51cf58d8ff18bb4d6af17252f0fb6 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Mon, 13 Aug 2012 11:14:57 +0100 Subject: GFS2: Use rbm for gfs2_testbit() Change the arguments to gfs2_testbit() so that it now just takes an rbm specifying the position of the two bit entry to return. Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 72 +++++++++++++++++++++++----------------------------------- 1 file changed, 28 insertions(+), 44 deletions(-) (limited to 'fs/gfs2/rgrp.c') diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 47d2346575a..3a288cec5af 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -117,30 +117,21 @@ static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf2, /** * gfs2_testbit - test a bit in the bitmaps - * @rgd: the resource group descriptor - * @buffer: the buffer that holds the bitmaps - * @buflen: the length (in bytes) of the buffer - * @block: the block to read + * @rbm: The bit to test * + * Returns: The two bit block state of the requested bit */ -static inline unsigned char gfs2_testbit(struct gfs2_rgrpd *rgd, - const unsigned char *buffer, - unsigned int buflen, u32 block) +static inline u8 gfs2_testbit(const struct gfs2_rbm *rbm) { - const unsigned char *byte, *end; - unsigned char cur_state; + const u8 *buffer = rbm->bi->bi_bh->b_data + rbm->bi->bi_offset; + const u8 *byte; unsigned int bit; - byte = buffer + (block / GFS2_NBBY); - bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE; - end = buffer + buflen; - - gfs2_assert(rgd->rd_sbd, byte < end); - - cur_state = (*byte >> bit) & GFS2_BIT_MASK; + byte = buffer + (rbm->offset / GFS2_NBBY); + bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE; - return cur_state; + return (*byte >> bit) & GFS2_BIT_MASK; } /** @@ -1837,8 +1828,7 @@ static unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block) ret = gfs2_rbm_from_block(&rbm, block); WARN_ON_ONCE(ret != 0); - return gfs2_testbit(rgd, rbm.bi->bi_bh->b_data + rbm.bi->bi_offset, - rbm.bi->bi_len, rbm.offset); + return gfs2_testbit(&rbm); } @@ -1846,42 +1836,35 @@ static unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block) * gfs2_alloc_extent - allocate an extent from a given bitmap * @rbm: the resource group information * @dinode: TRUE if the first block we allocate is for a dinode - * @n: The extent length + * @n: The extent length (value/result) * - * Add the found bitmap buffer to the transaction. + * Add the bitmap buffer to the transaction. * Set the found bits to @new_state to change block's allocation state. - * Returns: starting block number of the extent (fs scope) */ -static u64 gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode, +static void gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode, unsigned int *n) { - struct gfs2_rgrpd *rgd = rbm->rgd; - struct gfs2_bitmap *bi = rbm->bi; - u32 blk = rbm->offset; + struct gfs2_rbm pos = { .rgd = rbm->rgd, }; const unsigned int elen = *n; - u32 goal; - const u8 *buffer = NULL; + u64 block; + int ret; - *n = 0; - buffer = bi->bi_bh->b_data + bi->bi_offset; - gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); - gfs2_setbit(rgd, bi->bi_clone, bi, blk, + *n = 1; + block = gfs2_rbm_to_block(rbm); + gfs2_trans_add_bh(rbm->rgd->rd_gl, rbm->bi->bi_bh, 1); + gfs2_setbit(rbm->rgd, rbm->bi->bi_clone, rbm->bi, rbm->offset, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); - (*n)++; - goal = blk; + block++; while (*n < elen) { - goal++; - if (goal >= (bi->bi_len * GFS2_NBBY)) - break; - if (gfs2_testbit(rgd, buffer, bi->bi_len, goal) != - GFS2_BLKST_FREE) + ret = gfs2_rbm_from_block(&pos, block); + WARN_ON(ret); + if (gfs2_testbit(&pos) != GFS2_BLKST_FREE) break; - gfs2_setbit(rgd, bi->bi_clone, bi, goal, GFS2_BLKST_USED); + gfs2_trans_add_bh(pos.rgd->rd_gl, pos.bi->bi_bh, 1); + gfs2_setbit(pos.rgd, pos.bi->bi_clone, pos.bi, pos.offset, GFS2_BLKST_USED); (*n)++; + block++; } - blk = gfs2_bi2rgd_blk(bi, blk); - rgd->rd_last_alloc = blk + *n - 1; - return rgd->rd_data0 + blk; } /** @@ -2042,7 +2025,8 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, goto rgrp_error; } - block = gfs2_alloc_extent(&rbm, dinode, nblocks); + gfs2_alloc_extent(&rbm, dinode, nblocks); + block = gfs2_rbm_to_block(&rbm); if (gfs2_rs_active(ip->i_res)) gfs2_adjust_reservation(ip, &rbm, *nblocks); ndata = *nblocks; -- cgit v1.2.3 From 3e6339dd2850353781513dec42a8d56690e9e1db Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Mon, 13 Aug 2012 11:37:51 +0100 Subject: GFS2: Use rbm for gfs2_setbit() Use the rbm structure for gfs2_setbit() in order to simplify the arguments to the function. We have to add a bool to control whether the clone bitmap should be updated (if it exists) but otherwise it is a more or less direct substitution. Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 46 ++++++++++++++++++++-------------------------- 1 file changed, 20 insertions(+), 26 deletions(-) (limited to 'fs/gfs2/rgrp.c') diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 3a288cec5af..55a2651666c 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -69,47 +69,42 @@ static const char valid_change[16] = { /** * gfs2_setbit - Set a bit in the bitmaps - * @rgd: the resource group descriptor - * @buf2: the clone buffer that holds the bitmaps - * @bi: the bitmap structure - * @block: the block to set + * @rbm: The position of the bit to set + * @do_clone: Also set the clone bitmap, if it exists * @new_state: the new state of the block * */ -static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf2, - struct gfs2_bitmap *bi, u32 block, +static inline void gfs2_setbit(const struct gfs2_rbm *rbm, bool do_clone, unsigned char new_state) { unsigned char *byte1, *byte2, *end, cur_state; - unsigned int buflen = bi->bi_len; - const unsigned int bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE; + unsigned int buflen = rbm->bi->bi_len; + const unsigned int bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE; - byte1 = bi->bi_bh->b_data + bi->bi_offset + (block / GFS2_NBBY); - end = bi->bi_bh->b_data + bi->bi_offset + buflen; + byte1 = rbm->bi->bi_bh->b_data + rbm->bi->bi_offset + (rbm->offset / GFS2_NBBY); + end = rbm->bi->bi_bh->b_data + rbm->bi->bi_offset + buflen; BUG_ON(byte1 >= end); cur_state = (*byte1 >> bit) & GFS2_BIT_MASK; if (unlikely(!valid_change[new_state * 4 + cur_state])) { - printk(KERN_WARNING "GFS2: buf_blk = 0x%llx old_state=%d, " - "new_state=%d\n", - (unsigned long long)block, cur_state, new_state); - printk(KERN_WARNING "GFS2: rgrp=0x%llx bi_start=0x%lx\n", - (unsigned long long)rgd->rd_addr, - (unsigned long)bi->bi_start); - printk(KERN_WARNING "GFS2: bi_offset=0x%lx bi_len=0x%lx\n", - (unsigned long)bi->bi_offset, - (unsigned long)bi->bi_len); + printk(KERN_WARNING "GFS2: buf_blk = 0x%x old_state=%d, " + "new_state=%d\n", rbm->offset, cur_state, new_state); + printk(KERN_WARNING "GFS2: rgrp=0x%llx bi_start=0x%x\n", + (unsigned long long)rbm->rgd->rd_addr, + rbm->bi->bi_start); + printk(KERN_WARNING "GFS2: bi_offset=0x%x bi_len=0x%x\n", + rbm->bi->bi_offset, rbm->bi->bi_len); dump_stack(); - gfs2_consist_rgrpd(rgd); + gfs2_consist_rgrpd(rbm->rgd); return; } *byte1 ^= (cur_state ^ new_state) << bit; - if (buf2) { - byte2 = buf2 + bi->bi_offset + (block / GFS2_NBBY); + if (do_clone && rbm->bi->bi_clone) { + byte2 = rbm->bi->bi_clone + rbm->bi->bi_offset + (rbm->offset / GFS2_NBBY); cur_state = (*byte2 >> bit) & GFS2_BIT_MASK; *byte2 ^= (cur_state ^ new_state) << bit; } @@ -1852,8 +1847,7 @@ static void gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode, *n = 1; block = gfs2_rbm_to_block(rbm); gfs2_trans_add_bh(rbm->rgd->rd_gl, rbm->bi->bi_bh, 1); - gfs2_setbit(rbm->rgd, rbm->bi->bi_clone, rbm->bi, rbm->offset, - dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); + gfs2_setbit(rbm, true, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); block++; while (*n < elen) { ret = gfs2_rbm_from_block(&pos, block); @@ -1861,7 +1855,7 @@ static void gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode, if (gfs2_testbit(&pos) != GFS2_BLKST_FREE) break; gfs2_trans_add_bh(pos.rgd->rd_gl, pos.bi->bi_bh, 1); - gfs2_setbit(pos.rgd, pos.bi->bi_clone, pos.bi, pos.offset, GFS2_BLKST_USED); + gfs2_setbit(&pos, true, GFS2_BLKST_USED); (*n)++; block++; } @@ -1900,7 +1894,7 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart, rbm.bi->bi_len); } gfs2_trans_add_bh(rbm.rgd->rd_gl, rbm.bi->bi_bh, 1); - gfs2_setbit(rbm.rgd, NULL, rbm.bi, rbm.offset, new_state); + gfs2_setbit(&rbm, false, new_state); } return rbm.rgd; -- cgit v1.2.3 From 137834a696fd51ef8c710a0ad854b585027c7df0 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Thu, 23 Aug 2012 13:43:40 +0100 Subject: GFS2: Fall back to ignoring reservations, if there are no other blocks left When we get to the stage of allocating blocks, we know that the resource group in question must contain enough free blocks, otherwise gfs2_inplace_reserve() would have failed. So if we are left with only free blocks which are reserved, then we must use those. This can happen if another node has sneeked in and use some blocks reserved on this node, for example. Generally this will happen very rarely and only when the resouce group is nearly full. Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fs/gfs2/rgrp.c') diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 55a2651666c..30c864e7029 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -2012,6 +2012,11 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, gfs2_rbm_from_block(&rbm, goal); error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, ip, false); + if (error == -ENOSPC) { + gfs2_rbm_from_block(&rbm, goal); + error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, false); + } + /* Since all blocks are reserved in advance, this shouldn't happen */ if (error) { fs_warn(sdp, "error=%d, nblocks=%u, full=%d\n", error, *nblocks, -- cgit v1.2.3 From 9e733d3923fb0e4eeae7b827019332d246576a22 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Thu, 23 Aug 2012 15:37:59 +0100 Subject: GFS2: Improve block reservation tracing This patch improves the tracing of block reservations by removing some corner cases and also providing more useful detail in the traces. A new field is added to the reservation structure to contain the inode number. This is used since in certain contexts it is not possible to access the inode itself to obtain this information. As a result we can then display the inode number for all tracepoints and also in case we dump the resource group. The "del" tracepoint operation has been removed. This could be called with the reservation rgrp set to NULL. That resulted in not printing the device number, and thus making the information largely useless anyway. Also, the conditional on the rgrp being NULL can then be removed from the tracepoint. After this change, all the block reservation tracepoint calls will be called with the rgrp information. The existing ins,clm and tdel calls to the block reservation tracepoint are sufficient to track the entire life of the block reservation. In gfs2_block_alloc() the error detection is updated to print out the inode number of the problematic inode. This can then be compared against the information in the glock dump,tracepoints, etc. Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) (limited to 'fs/gfs2/rgrp.c') diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 30c864e7029..87ee0b70f81 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -448,10 +448,11 @@ int gfs2_rs_alloc(struct gfs2_inode *ip) return error; } -static void dump_rs(struct seq_file *seq, struct gfs2_blkreserv *rs) +static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs) { - gfs2_print_dbg(seq, " r: %llu s:%llu b:%u f:%u\n", - rs->rs_rbm.rgd->rd_addr, gfs2_rbm_to_block(&rs->rs_rbm), + gfs2_print_dbg(seq, " B: n:%llu s:%llu b:%u f:%u\n", + (unsigned long long)rs->rs_inum, + (unsigned long long)gfs2_rbm_to_block(&rs->rs_rbm), rs->rs_rbm.offset, rs->rs_free); } @@ -468,7 +469,7 @@ static void __rs_deltree(struct gfs2_inode *ip, struct gfs2_blkreserv *rs) return; rgd = rs->rs_rbm.rgd; - trace_gfs2_rs(ip, rs, TRACE_RS_TREEDEL); + trace_gfs2_rs(rs, TRACE_RS_TREEDEL); rb_erase(&rs->rs_node, &rgd->rd_rstree); RB_CLEAR_NODE(&rs->rs_node); BUG_ON(!rgd->rd_rs_cnt); @@ -511,7 +512,6 @@ void gfs2_rs_delete(struct gfs2_inode *ip) down_write(&ip->i_rw_mutex); if (ip->i_res) { gfs2_rs_deltree(ip, ip->i_res); - trace_gfs2_rs(ip, ip->i_res, TRACE_RS_DELETE); BUG_ON(ip->i_res->rs_free); kmem_cache_free(gfs2_rsrv_cachep, ip->i_res); ip->i_res = NULL; @@ -1253,6 +1253,7 @@ static struct gfs2_blkreserv *rs_insert(struct gfs2_bitmap *bi, rs->rs_free = amount; rs->rs_rbm.offset = biblk; rs->rs_rbm.bi = bi; + rs->rs_inum = ip->i_no_addr; rb_link_node(&rs->rs_node, parent, newn); rb_insert_color(&rs->rs_node, &rgd->rd_rstree); @@ -1260,7 +1261,7 @@ static struct gfs2_blkreserv *rs_insert(struct gfs2_bitmap *bi, rgd->rd_reserved += amount; /* blocks reserved */ rgd->rd_rs_cnt++; /* number of in-tree reservations */ spin_unlock(&rgd->rd_rsspin); - trace_gfs2_rs(ip, rs, TRACE_RS_INSERT); + trace_gfs2_rs(rs, TRACE_RS_INSERT); return rs; } @@ -1966,7 +1967,7 @@ static void gfs2_adjust_reservation(struct gfs2_inode *ip, rlen = min(rs->rs_free, len); rs->rs_free -= rlen; rgd->rd_reserved -= rlen; - trace_gfs2_rs(ip, rs, TRACE_RS_CLAIM); + trace_gfs2_rs(rs, TRACE_RS_CLAIM); if (rs->rs_free && !ret) goto out; } @@ -2005,10 +2006,6 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, else goal = rbm.rgd->rd_last_alloc + rbm.rgd->rd_data0; - if ((goal < rbm.rgd->rd_data0) || - (goal >= rbm.rgd->rd_data0 + rbm.rgd->rd_data)) - rbm.rgd = gfs2_blk2rgrpd(sdp, goal, 1); - gfs2_rbm_from_block(&rbm, goal); error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, ip, false); @@ -2019,7 +2016,8 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, /* Since all blocks are reserved in advance, this shouldn't happen */ if (error) { - fs_warn(sdp, "error=%d, nblocks=%u, full=%d\n", error, *nblocks, + fs_warn(sdp, "inum=%llu error=%d, nblocks=%u, full=%d\n", + (unsigned long long)ip->i_no_addr, error, *nblocks, test_bit(GBF_FULL, &rbm.rgd->rd_bits->bi_flags)); goto rgrp_error; } -- cgit v1.2.3 From c743ffd09fa7d3464c6f74767a3ae2ca5dc3ebf7 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Sat, 25 Aug 2012 18:21:47 +0100 Subject: GFS2: Fix unclaimed_blocks() wrapping bug and clean up When rgd->rd_free_clone is less than rgd->rd_reserved, the unclaimed_blocks() calculation would wrap and produce incorrect results. This patch checks for this condition when this function is called from gfs2_mblk_search() In addition, the use of this particular function in other places in the code has been dropped by means of a general clean up of gfs2_inplace_reserve(). This function is now much easier to follow. Also the setting of the rgd->rd_last_alloc field is corrected. Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 193 ++++++++++++++++++++++++++------------------------------- 1 file changed, 88 insertions(+), 105 deletions(-) (limited to 'fs/gfs2/rgrp.c') diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 87ee0b70f81..88695412670 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1231,7 +1231,7 @@ static struct gfs2_blkreserv *rs_insert(struct gfs2_bitmap *bi, BUG_ON(!ip->i_res); BUG_ON(gfs2_rs_active(rs)); /* Figure out where to put new node */ - /*BUG_ON(!gfs2_glock_is_locked_by_me(rgd->rd_gl));*/ + while (*newn) { struct gfs2_blkreserv *cur = rb_entry(*newn, struct gfs2_blkreserv, rs_node); @@ -1276,17 +1276,16 @@ static u32 unclaimed_blocks(struct gfs2_rgrpd *rgd) /** * rg_mblk_search - find a group of multiple free blocks * @rgd: the resource group descriptor - * @rs: the block reservation * @ip: pointer to the inode for which we're reserving blocks + * @requested: number of blocks required for this allocation * * This is very similar to rgblk_search, except we're looking for whole * 64-bit words that represent a chunk of 32 free blocks. I'm only focusing * on aligned dwords for speed's sake. * - * Returns: 0 if successful or BFITNOENT if there isn't enough free space */ -static int rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, unsigned requested) +static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, unsigned requested) { struct gfs2_bitmap *bi = rgd->rd_bits; const u32 length = rgd->rd_length; @@ -1299,11 +1298,16 @@ static int rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, unsigne u32 best_rs_bytes, unclaimed; int best_rs_blocks; + if ((rgd->rd_free_clone < rgd->rd_reserved) || + (unclaimed_blocks(rgd) < max(requested, RGRP_RSRV_MINBLKS))) + return; + /* Find bitmap block that contains bits for goal block */ if (rgrp_contains_block(rgd, ip->i_goal)) goal = ip->i_goal - rgd->rd_data0; else goal = rgd->rd_last_alloc; + for (buf = 0; buf < length; buf++) { bi = rgd->rd_bits + buf; /* Convert scope of "goal" from rgrp-wide to within @@ -1366,10 +1370,8 @@ do_search: BUG_ON(blk >= bi->bi_len * GFS2_NBBY); rs = rs_insert(bi, ip, blk, rsv_bytes * GFS2_NBBY); - if (IS_ERR(rs)) - return PTR_ERR(rs); if (rs) - return 0; + return; } ptr += ALIGN(search_bytes, sizeof(u64)); } @@ -1380,35 +1382,6 @@ skip: buf %= length; goal = 0; } - - return BFITNOENT; -} - -/** - * try_rgrp_fit - See if a given reservation will fit in a given RG - * @rgd: the RG data - * @ip: the inode - * - * If there's room for the requested blocks to be allocated from the RG: - * This will try to get a multi-block reservation first, and if that doesn't - * fit, it will take what it can. - * - * Returns: 1 on success (it fits), 0 on failure (it doesn't fit) - */ - -static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, - unsigned requested) -{ - if (rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR)) - return 0; - /* Look for a multi-block reservation. */ - if (unclaimed_blocks(rgd) >= RGRP_RSRV_MINBLKS && - rg_mblk_search(rgd, ip, requested) != BFITNOENT) - return 1; - if (unclaimed_blocks(rgd) >= requested) - return 1; - - return 0; } /** @@ -1678,6 +1651,19 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip return; } +static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *begin) +{ + struct gfs2_rgrpd *rgd = *pos; + + rgd = gfs2_rgrpd_get_next(rgd); + if (rgd == NULL) + rgd = gfs2_rgrpd_get_next(NULL); + *pos = rgd; + if (rgd != begin) /* If we didn't wrap */ + return true; + return false; +} + /** * gfs2_inplace_reserve - Reserve space in the filesystem * @ip: the inode to reserve space for @@ -1697,10 +1683,8 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) if (sdp->sd_args.ar_rgrplvb) flags |= GL_SKIP; - if (gfs2_assert_warn(sdp, requested)) { - error = -EINVAL; - goto out; - } + if (gfs2_assert_warn(sdp, requested)) + return -EINVAL; if (gfs2_rs_active(rs)) { begin = rs->rs_rbm.rgd; flags = 0; /* Yoda: Do or do not. There is no try */ @@ -1713,84 +1697,82 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) return -EBADSLT; while (loops < 3) { - rg_locked = 0; - - if (gfs2_glock_is_locked_by_me(rs->rs_rbm.rgd->rd_gl)) { - rg_locked = 1; - error = 0; - } else if (!loops && !gfs2_rs_active(rs) && - rs->rs_rbm.rgd->rd_rs_cnt > RGRP_RSRV_MAX_CONTENDERS) { - /* If the rgrp already is maxed out for contenders, - we can eliminate it as a "first pass" without even - requesting the rgrp glock. */ - error = GLR_TRYFAILED; - } else { + rg_locked = 1; + + if (!gfs2_glock_is_locked_by_me(rs->rs_rbm.rgd->rd_gl)) { + rg_locked = 0; error = gfs2_glock_nq_init(rs->rs_rbm.rgd->rd_gl, LM_ST_EXCLUSIVE, flags, &rs->rs_rgd_gh); - if (!error && sdp->sd_args.ar_rgrplvb) { + if (error == GLR_TRYFAILED) + goto next_rgrp; + if (unlikely(error)) + return error; + if (sdp->sd_args.ar_rgrplvb) { error = update_rgrp_lvb(rs->rs_rbm.rgd); - if (error) { + if (unlikely(error)) { gfs2_glock_dq_uninit(&rs->rs_rgd_gh); return error; } } } - switch (error) { - case 0: - if (gfs2_rs_active(rs)) { - if (unclaimed_blocks(rs->rs_rbm.rgd) + - rs->rs_free >= requested) { - ip->i_rgd = rs->rs_rbm.rgd; - return 0; - } - /* We have a multi-block reservation, but the - rgrp doesn't have enough free blocks to - satisfy the request. Free the reservation - and look for a suitable rgrp. */ - gfs2_rs_deltree(ip, rs); - } - if (try_rgrp_fit(rs->rs_rbm.rgd, ip, requested)) { - if (sdp->sd_args.ar_rgrplvb) - gfs2_rgrp_bh_get(rs->rs_rbm.rgd); - ip->i_rgd = rs->rs_rbm.rgd; - return 0; - } - if (rs->rs_rbm.rgd->rd_flags & GFS2_RDF_CHECK) { - if (sdp->sd_args.ar_rgrplvb) - gfs2_rgrp_bh_get(rs->rs_rbm.rgd); - try_rgrp_unlink(rs->rs_rbm.rgd, &last_unlinked, - ip->i_no_addr); - } - if (!rg_locked) - gfs2_glock_dq_uninit(&rs->rs_rgd_gh); - /* fall through */ - case GLR_TRYFAILED: - rs->rs_rbm.rgd = gfs2_rgrpd_get_next(rs->rs_rbm.rgd); - rs->rs_rbm.rgd = rs->rs_rbm.rgd ? : begin; /* if NULL, wrap */ - if (rs->rs_rbm.rgd != begin) /* If we didn't wrap */ - break; - flags &= ~LM_FLAG_TRY; - loops++; - /* Check that fs hasn't grown if writing to rindex */ - if (ip == GFS2_I(sdp->sd_rindex) && - !sdp->sd_rindex_uptodate) { - error = gfs2_ri_update(ip); - if (error) - goto out; - } else if (loops == 2) - /* Flushing the log may release space */ - gfs2_log_flush(sdp, NULL); - break; - default: - goto out; + /* Skip unuseable resource groups */ + if (rs->rs_rbm.rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR)) + goto skip_rgrp; + + if (sdp->sd_args.ar_rgrplvb) + gfs2_rgrp_bh_get(rs->rs_rbm.rgd); + + /* Get a reservation if we don't already have one */ + if (!gfs2_rs_active(rs)) + rg_mblk_search(rs->rs_rbm.rgd, ip, requested); + + /* Skip rgrps when we can't get a reservation on first pass */ + if (!gfs2_rs_active(rs) && (loops < 1)) + goto check_rgrp; + + /* If rgrp has enough free space, use it */ + if (rs->rs_rbm.rgd->rd_free_clone >= requested) { + ip->i_rgd = rs->rs_rbm.rgd; + return 0; } + + /* Drop reservation, if we couldn't use reserved rgrp */ + if (gfs2_rs_active(rs)) + gfs2_rs_deltree(ip, rs); +check_rgrp: + /* Check for unlinked inodes which can be reclaimed */ + if (rs->rs_rbm.rgd->rd_flags & GFS2_RDF_CHECK) + try_rgrp_unlink(rs->rs_rbm.rgd, &last_unlinked, + ip->i_no_addr); +skip_rgrp: + /* Unlock rgrp if required */ + if (!rg_locked) + gfs2_glock_dq_uninit(&rs->rs_rgd_gh); +next_rgrp: + /* Find the next rgrp, and continue looking */ + if (gfs2_select_rgrp(&rs->rs_rbm.rgd, begin)) + continue; + + /* If we've scanned all the rgrps, but found no free blocks + * then this checks for some less likely conditions before + * trying again. + */ + flags &= ~LM_FLAG_TRY; + loops++; + /* Check that fs hasn't grown if writing to rindex */ + if (ip == GFS2_I(sdp->sd_rindex) && !sdp->sd_rindex_uptodate) { + error = gfs2_ri_update(ip); + if (error) + return error; + } + /* Flushing the log may release space */ + if (loops == 2) + gfs2_log_flush(sdp, NULL); } - error = -ENOSPC; -out: - return error; + return -ENOSPC; } /** @@ -2024,6 +2006,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, gfs2_alloc_extent(&rbm, dinode, nblocks); block = gfs2_rbm_to_block(&rbm); + rbm.rgd->rd_last_alloc = block - rbm.rgd->rd_data0; if (gfs2_rs_active(ip->i_res)) gfs2_adjust_reservation(ip, &rbm, *nblocks); ndata = *nblocks; -- cgit v1.2.3 From 0688a5ecea61a36ba12d17a18ab9f8712145cfa2 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Tue, 28 Aug 2012 08:45:56 -0400 Subject: GFS2: Stop block extents at the end of bitmaps This patch stops multiple block allocations if a nonzero return code is received from gfs2_rbm_from_block. Without this patch, if enough pressure is put on the file system, you get a kernel warning quickly followed by: BUG: unable to handle kernel NULL pointer dereference at (null) IP: [] gfs2_alloc_blocks+0x2c8/0x880 [gfs2] With this patch, things run normally. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs/gfs2/rgrp.c') diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 88695412670..defb8265ce5 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1834,8 +1834,7 @@ static void gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode, block++; while (*n < elen) { ret = gfs2_rbm_from_block(&pos, block); - WARN_ON(ret); - if (gfs2_testbit(&pos) != GFS2_BLKST_FREE) + if (ret || gfs2_testbit(&pos) != GFS2_BLKST_FREE) break; gfs2_trans_add_bh(pos.rgd->rd_gl, pos.bi->bi_bh, 1); gfs2_setbit(&pos, true, GFS2_BLKST_USED); -- cgit v1.2.3 From ff7f4cb461163967a9dbb8c569e2447b7520654f Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Mon, 10 Sep 2012 10:03:50 +0100 Subject: GFS2: Consolidate free block searching functions With the recently added block reservation code, an additional function was added to search for free blocks. This had a restriction of only being able to search for aligned extents of free blocks. As a result the allocation patterns when reserving blocks were suboptimal when the existing allocation of blocks for an inode was not aligned to the same boundary. This patch resolves that problem by adding the ability for gfs2_rbm_find to search for extents of a particular minimum size. We can then use gfs2_rbm_find for both looking for reservations, and also looking for free blocks on an individual basis when we actually come to do the allocation later on. As a result we only need a single set of code to deal with both situations. The function gfs2_rbm_from_block() is moved up rgrp.c so that it occurs before all of its callers. Many thanks are due to Bob for helping track down the final issue in this patch. That fix to the rb_tree traversal and to not share block reservations from a dirctory to its children is included here. Signed-off-by: Steven Whitehouse Signed-off-by: Bob Peterson --- fs/gfs2/rgrp.c | 367 ++++++++++++++++++++++++++++++--------------------------- 1 file changed, 192 insertions(+), 175 deletions(-) (limited to 'fs/gfs2/rgrp.c') diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index defb8265ce5..b933cdcda7f 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -35,9 +35,6 @@ #define BFITNOENT ((u32)~0) #define NO_BLOCK ((u64)~0) -#define RSRV_CONTENTION_FACTOR 4 -#define RGRP_RSRV_MAX_CONTENDERS 2 - #if BITS_PER_LONG == 32 #define LBITMASK (0x55555555UL) #define LBITSKIP55 (0x55555555UL) @@ -67,6 +64,10 @@ static const char valid_change[16] = { 1, 0, 0, 0 }; +static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 minext, + const struct gfs2_inode *ip, bool nowrap); + + /** * gfs2_setbit - Set a bit in the bitmaps * @rbm: The position of the bit to set @@ -234,6 +235,130 @@ static u32 gfs2_bitfit(const u8 *buf, const unsigned int len, return (((const unsigned char *)ptr - buf) * GFS2_NBBY) + bit; } +/** + * gfs2_rbm_from_block - Set the rbm based upon rgd and block number + * @rbm: The rbm with rgd already set correctly + * @block: The block number (filesystem relative) + * + * This sets the bi and offset members of an rbm based on a + * resource group and a filesystem relative block number. The + * resource group must be set in the rbm on entry, the bi and + * offset members will be set by this function. + * + * Returns: 0 on success, or an error code + */ + +static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block) +{ + u64 rblock = block - rbm->rgd->rd_data0; + u32 goal = (u32)rblock; + int x; + + if (WARN_ON_ONCE(rblock > UINT_MAX)) + return -EINVAL; + if (block >= rbm->rgd->rd_data0 + rbm->rgd->rd_data) + return -E2BIG; + + for (x = 0; x < rbm->rgd->rd_length; x++) { + rbm->bi = rbm->rgd->rd_bits + x; + if (goal < (rbm->bi->bi_start + rbm->bi->bi_len) * GFS2_NBBY) { + rbm->offset = goal - (rbm->bi->bi_start * GFS2_NBBY); + break; + } + } + + return 0; +} + +/** + * gfs2_unaligned_extlen - Look for free blocks which are not byte aligned + * @rbm: Position to search (value/result) + * @n_unaligned: Number of unaligned blocks to check + * @len: Decremented for each block found (terminate on zero) + * + * Returns: true if a non-free block is encountered + */ + +static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *len) +{ + u64 block; + u32 n; + u8 res; + + for (n = 0; n < n_unaligned; n++) { + res = gfs2_testbit(rbm); + if (res != GFS2_BLKST_FREE) + return true; + (*len)--; + if (*len == 0) + return true; + block = gfs2_rbm_to_block(rbm); + if (gfs2_rbm_from_block(rbm, block + 1)) + return true; + } + + return false; +} + +/** + * gfs2_free_extlen - Return extent length of free blocks + * @rbm: Starting position + * @len: Max length to check + * + * Starting at the block specified by the rbm, see how many free blocks + * there are, not reading more than len blocks ahead. This can be done + * using memchr_inv when the blocks are byte aligned, but has to be done + * on a block by block basis in case of unaligned blocks. Also this + * function can cope with bitmap boundaries (although it must stop on + * a resource group boundary) + * + * Returns: Number of free blocks in the extent + */ + +static u32 gfs2_free_extlen(const struct gfs2_rbm *rrbm, u32 len) +{ + struct gfs2_rbm rbm = *rrbm; + u32 n_unaligned = rbm.offset & 3; + u32 size = len; + u32 bytes; + u32 chunk_size; + u8 *ptr, *start, *end; + u64 block; + + if (n_unaligned && + gfs2_unaligned_extlen(&rbm, 4 - n_unaligned, &len)) + goto out; + + /* Start is now byte aligned */ + while (len > 3) { + start = rbm.bi->bi_bh->b_data; + if (rbm.bi->bi_clone) + start = rbm.bi->bi_clone; + end = start + rbm.bi->bi_bh->b_size; + start += rbm.bi->bi_offset; + BUG_ON(rbm.offset & 3); + start += (rbm.offset / GFS2_NBBY); + bytes = min_t(u32, len / GFS2_NBBY, (end - start)); + ptr = memchr_inv(start, 0, bytes); + chunk_size = ((ptr == NULL) ? bytes : (ptr - start)); + chunk_size *= GFS2_NBBY; + BUG_ON(len < chunk_size); + len -= chunk_size; + block = gfs2_rbm_to_block(&rbm); + gfs2_rbm_from_block(&rbm, block + chunk_size); + n_unaligned = 3; + if (ptr) + break; + n_unaligned = len & 3; + } + + /* Deal with any bits left over at the end */ + if (n_unaligned) + gfs2_unaligned_extlen(&rbm, n_unaligned, &len); +out: + return size - len; +} + /** * gfs2_bitcount - count the number of bits in a certain state * @rgd: the resource group descriptor @@ -472,8 +597,6 @@ static void __rs_deltree(struct gfs2_inode *ip, struct gfs2_blkreserv *rs) trace_gfs2_rs(rs, TRACE_RS_TREEDEL); rb_erase(&rs->rs_node, &rgd->rd_rstree); RB_CLEAR_NODE(&rs->rs_node); - BUG_ON(!rgd->rd_rs_cnt); - rgd->rd_rs_cnt--; if (rs->rs_free) { /* return reserved blocks to the rgrp and the ip */ @@ -1208,179 +1331,85 @@ out: /** * rs_insert - insert a new multi-block reservation into the rgrp's rb_tree - * @bi: the bitmap with the blocks * @ip: the inode structure - * @biblk: the 32-bit block number relative to the start of the bitmap - * @amount: the number of blocks to reserve * - * Returns: NULL - reservation was already taken, so not inserted - * pointer to the inserted reservation */ -static struct gfs2_blkreserv *rs_insert(struct gfs2_bitmap *bi, - struct gfs2_inode *ip, u32 biblk, - int amount) +static void rs_insert(struct gfs2_inode *ip) { struct rb_node **newn, *parent = NULL; int rc; struct gfs2_blkreserv *rs = ip->i_res; struct gfs2_rgrpd *rgd = rs->rs_rbm.rgd; - u64 fsblock = gfs2_bi2rgd_blk(bi, biblk) + rgd->rd_data0; + u64 fsblock = gfs2_rbm_to_block(&rs->rs_rbm); - spin_lock(&rgd->rd_rsspin); - newn = &rgd->rd_rstree.rb_node; - BUG_ON(!ip->i_res); BUG_ON(gfs2_rs_active(rs)); - /* Figure out where to put new node */ + spin_lock(&rgd->rd_rsspin); + newn = &rgd->rd_rstree.rb_node; while (*newn) { struct gfs2_blkreserv *cur = rb_entry(*newn, struct gfs2_blkreserv, rs_node); parent = *newn; - rc = rs_cmp(fsblock, amount, cur); + rc = rs_cmp(fsblock, rs->rs_free, cur); if (rc > 0) newn = &((*newn)->rb_right); else if (rc < 0) newn = &((*newn)->rb_left); else { spin_unlock(&rgd->rd_rsspin); - return NULL; /* reservation already in use */ + WARN_ON(1); + return; } } - /* Do our reservation work */ - rs = ip->i_res; - rs->rs_free = amount; - rs->rs_rbm.offset = biblk; - rs->rs_rbm.bi = bi; - rs->rs_inum = ip->i_no_addr; rb_link_node(&rs->rs_node, parent, newn); rb_insert_color(&rs->rs_node, &rgd->rd_rstree); /* Do our rgrp accounting for the reservation */ - rgd->rd_reserved += amount; /* blocks reserved */ - rgd->rd_rs_cnt++; /* number of in-tree reservations */ + rgd->rd_reserved += rs->rs_free; /* blocks reserved */ spin_unlock(&rgd->rd_rsspin); trace_gfs2_rs(rs, TRACE_RS_INSERT); - return rs; } /** - * unclaimed_blocks - return number of blocks that aren't spoken for - */ -static u32 unclaimed_blocks(struct gfs2_rgrpd *rgd) -{ - return rgd->rd_free_clone - rgd->rd_reserved; -} - -/** - * rg_mblk_search - find a group of multiple free blocks + * rg_mblk_search - find a group of multiple free blocks to form a reservation * @rgd: the resource group descriptor * @ip: pointer to the inode for which we're reserving blocks * @requested: number of blocks required for this allocation * - * This is very similar to rgblk_search, except we're looking for whole - * 64-bit words that represent a chunk of 32 free blocks. I'm only focusing - * on aligned dwords for speed's sake. - * */ -static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, unsigned requested) +static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, + unsigned requested) { - struct gfs2_bitmap *bi = rgd->rd_bits; - const u32 length = rgd->rd_length; - u32 blk; - unsigned int buf, x, search_bytes; - u8 *buffer = NULL; - u8 *ptr, *end, *nonzero; - u32 goal, rsv_bytes; - struct gfs2_blkreserv *rs; - u32 best_rs_bytes, unclaimed; - int best_rs_blocks; + struct gfs2_rbm rbm = { .rgd = rgd, }; + u64 goal; + struct gfs2_blkreserv *rs = ip->i_res; + u32 extlen; + u32 free_blocks = rgd->rd_free_clone - rgd->rd_reserved; + int ret; - if ((rgd->rd_free_clone < rgd->rd_reserved) || - (unclaimed_blocks(rgd) < max(requested, RGRP_RSRV_MINBLKS))) + extlen = max_t(u32, atomic_read(&rs->rs_sizehint), requested); + extlen = clamp(extlen, RGRP_RSRV_MINBLKS, free_blocks); + if ((rgd->rd_free_clone < rgd->rd_reserved) || (free_blocks < extlen)) return; /* Find bitmap block that contains bits for goal block */ if (rgrp_contains_block(rgd, ip->i_goal)) - goal = ip->i_goal - rgd->rd_data0; + goal = ip->i_goal; else - goal = rgd->rd_last_alloc; + goal = rgd->rd_last_alloc + rgd->rd_data0; - for (buf = 0; buf < length; buf++) { - bi = rgd->rd_bits + buf; - /* Convert scope of "goal" from rgrp-wide to within - found bit block */ - if (goal < (bi->bi_start + bi->bi_len) * GFS2_NBBY) { - goal -= bi->bi_start * GFS2_NBBY; - goto do_search; - } - } - buf = 0; - goal = 0; - -do_search: - best_rs_blocks = max_t(int, atomic_read(&ip->i_res->rs_sizehint), - (RGRP_RSRV_MINBLKS * rgd->rd_length)); - best_rs_bytes = (best_rs_blocks * - (1 + (RSRV_CONTENTION_FACTOR * rgd->rd_rs_cnt))) / - GFS2_NBBY; /* 1 + is for our not-yet-created reservation */ - best_rs_bytes = ALIGN(best_rs_bytes, sizeof(u64)); - unclaimed = unclaimed_blocks(rgd); - if (best_rs_bytes * GFS2_NBBY > unclaimed) - best_rs_bytes = unclaimed >> GFS2_BIT_SIZE; - - for (x = 0; x <= length; x++) { - bi = rgd->rd_bits + buf; - - if (test_bit(GBF_FULL, &bi->bi_flags)) - goto skip; + if (WARN_ON(gfs2_rbm_from_block(&rbm, goal))) + return; - WARN_ON(!buffer_uptodate(bi->bi_bh)); - if (bi->bi_clone) - buffer = bi->bi_clone + bi->bi_offset; - else - buffer = bi->bi_bh->b_data + bi->bi_offset; - - /* We have to keep the reservations aligned on u64 boundaries - otherwise we could get situations where a byte can't be - used because it's after a reservation, but a free bit still - is within the reservation's area. */ - ptr = buffer + ALIGN(goal >> GFS2_BIT_SIZE, sizeof(u64)); - end = (buffer + bi->bi_len); - while (ptr < end) { - rsv_bytes = 0; - if ((ptr + best_rs_bytes) <= end) - search_bytes = best_rs_bytes; - else - search_bytes = end - ptr; - BUG_ON(!search_bytes); - nonzero = memchr_inv(ptr, 0, search_bytes); - /* If the lot is all zeroes, reserve the whole size. If - there's enough zeroes to satisfy the request, use - what we can. If there's not enough, keep looking. */ - if (nonzero == NULL) - rsv_bytes = search_bytes; - else if ((nonzero - ptr) * GFS2_NBBY >= requested) - rsv_bytes = (nonzero - ptr); - - if (rsv_bytes) { - blk = ((ptr - buffer) * GFS2_NBBY); - BUG_ON(blk >= bi->bi_len * GFS2_NBBY); - rs = rs_insert(bi, ip, blk, - rsv_bytes * GFS2_NBBY); - if (rs) - return; - } - ptr += ALIGN(search_bytes, sizeof(u64)); - } -skip: - /* Try next bitmap block (wrap back to rgrp header - if at end) */ - buf++; - buf %= length; - goal = 0; + ret = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, extlen, ip, true); + if (ret == 0) { + rs->rs_rbm = rbm; + rs->rs_free = extlen; + rs->rs_inum = ip->i_no_addr; + rs_insert(ip); } } @@ -1388,6 +1417,7 @@ skip: * gfs2_next_unreserved_block - Return next block that is not reserved * @rgd: The resource group * @block: The starting block + * @length: The required length * @ip: Ignore any reservations for this inode * * If the block does not appear in any reservation, then return the @@ -1397,6 +1427,7 @@ skip: */ static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block, + u32 length, const struct gfs2_inode *ip) { struct gfs2_blkreserv *rs; @@ -1404,10 +1435,10 @@ static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block, int rc; spin_lock(&rgd->rd_rsspin); - n = rb_first(&rgd->rd_rstree); + n = rgd->rd_rstree.rb_node; while (n) { rs = rb_entry(n, struct gfs2_blkreserv, rs_node); - rc = rs_cmp(block, 1, rs); + rc = rs_cmp(block, length, rs); if (rc < 0) n = n->rb_left; else if (rc > 0) @@ -1417,9 +1448,9 @@ static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block, } if (n) { - while ((rs_cmp(block, 1, rs) == 0) && (ip->i_res != rs)) { + while ((rs_cmp(block, length, rs) == 0) && (ip->i_res != rs)) { block = gfs2_rbm_to_block(&rs->rs_rbm) + rs->rs_free; - n = rb_next(&rs->rs_node); + n = n->rb_right; if (n == NULL) break; rs = rb_entry(n, struct gfs2_blkreserv, rs_node); @@ -1430,44 +1461,11 @@ static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block, return block; } -/** - * gfs2_rbm_from_block - Set the rbm based upon rgd and block number - * @rbm: The rbm with rgd already set correctly - * @block: The block number (filesystem relative) - * - * This sets the bi and offset members of an rbm based on a - * resource group and a filesystem relative block number. The - * resource group must be set in the rbm on entry, the bi and - * offset members will be set by this function. - * - * Returns: 0 on success, or an error code - */ - -static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block) -{ - u64 rblock = block - rbm->rgd->rd_data0; - u32 goal = (u32)rblock; - int x; - - if (WARN_ON_ONCE(rblock > UINT_MAX)) - return -EINVAL; - if (block >= rbm->rgd->rd_data0 + rbm->rgd->rd_data) - return -E2BIG; - - for (x = 0; x < rbm->rgd->rd_length; x++) { - rbm->bi = rbm->rgd->rd_bits + x; - if (goal < (rbm->bi->bi_start + rbm->bi->bi_len) * GFS2_NBBY) { - rbm->offset = goal - (rbm->bi->bi_start * GFS2_NBBY); - break; - } - } - - return 0; -} - /** * gfs2_reservation_check_and_update - Check for reservations during block alloc * @rbm: The current position in the resource group + * @ip: The inode for which we are searching for blocks + * @minext: The minimum extent length * * This checks the current position in the rgrp to see whether there is * a reservation covering this block. If not then this function is a @@ -1479,15 +1477,33 @@ static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block) */ static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm, - const struct gfs2_inode *ip) + const struct gfs2_inode *ip, + u32 minext) { u64 block = gfs2_rbm_to_block(rbm); + u32 extlen = 1; u64 nblock; int ret; - nblock = gfs2_next_unreserved_block(rbm->rgd, block, ip); + /* + * If we have a minimum extent length, then skip over any extent + * which is less than the min extent length in size. + */ + if (minext) { + extlen = gfs2_free_extlen(rbm, minext); + nblock = block + extlen; + if (extlen < minext) + goto fail; + } + + /* + * Check the extent which has been found against the reservations + * and skip if parts of it are already reserved + */ + nblock = gfs2_next_unreserved_block(rbm->rgd, block, extlen, ip); if (nblock == block) return 0; +fail: ret = gfs2_rbm_from_block(rbm, nblock); if (ret < 0) return ret; @@ -1498,6 +1514,7 @@ static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm, * gfs2_rbm_find - Look for blocks of a particular state * @rbm: Value/result starting position and final position * @state: The state which we want to find + * @minext: The requested extent length (0 for a single block) * @ip: If set, check for reservations * @nowrap: Stop looking at the end of the rgrp, rather than wrapping * around until we've reached the starting point. @@ -1509,7 +1526,7 @@ static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm, * Returns: 0 on success, -ENOSPC if there is no block of the requested state */ -static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, +static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 minext, const struct gfs2_inode *ip, bool nowrap) { struct buffer_head *bh; @@ -1548,7 +1565,7 @@ static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, return 0; initial_bi = rbm->bi; - ret = gfs2_reservation_check_and_update(rbm, ip); + ret = gfs2_reservation_check_and_update(rbm, ip, minext); if (ret == 0) return 0; if (ret > 0) { @@ -1608,7 +1625,7 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip while (1) { down_write(&sdp->sd_log_flush_lock); - error = gfs2_rbm_find(&rbm, GFS2_BLKST_UNLINKED, NULL, true); + error = gfs2_rbm_find(&rbm, GFS2_BLKST_UNLINKED, 0, NULL, true); up_write(&sdp->sd_log_flush_lock); if (error == -ENOSPC) break; @@ -1988,11 +2005,11 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, goal = rbm.rgd->rd_last_alloc + rbm.rgd->rd_data0; gfs2_rbm_from_block(&rbm, goal); - error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, ip, false); + error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, 0, ip, false); if (error == -ENOSPC) { gfs2_rbm_from_block(&rbm, goal); - error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, false); + error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, 0, NULL, false); } /* Since all blocks are reserved in advance, this shouldn't happen */ -- cgit v1.2.3 From 3701530aed9711fcfe9b29d3de0f7a27c8dbc8ae Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Wed, 12 Sep 2012 09:40:31 -0400 Subject: GFS2: Fix infinite loop in rbm_find This patch fixes an infinite loop in gfs2_rbm_find that was introduced by the previous patch. The problem occurred when the length was less than 3 but the rbm block was byte-aligned, causing it to improperly return a extent length of zero, which caused it to spin. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse Tested-by: Bob Peterson Tested-by: Barry Marson --- fs/gfs2/rgrp.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/gfs2/rgrp.c') diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index b933cdcda7f..3cc402ce6fe 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -329,6 +329,7 @@ static u32 gfs2_free_extlen(const struct gfs2_rbm *rrbm, u32 len) gfs2_unaligned_extlen(&rbm, 4 - n_unaligned, &len)) goto out; + n_unaligned = len & 3; /* Start is now byte aligned */ while (len > 3) { start = rbm.bi->bi_bh->b_data; -- cgit v1.2.3 From cd0ed19fb614cb1315c0a510ec6c163d8324fd82 Mon Sep 17 00:00:00 2001 From: Andrew Price Date: Fri, 12 Oct 2012 16:45:09 +0100 Subject: GFS2: Fix possible null pointer deref in gfs2_rs_alloc Despite the return value from kmem_cache_zalloc() being checked, the error wasn't being returned until after a possible null pointer dereference. This patch returns the error immediately, allowing the removal of the error variable. Signed-off-by: Andrew Price Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'fs/gfs2/rgrp.c') diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 3cc402ce6fe..43d1a20bdbe 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -553,7 +553,6 @@ void gfs2_free_clones(struct gfs2_rgrpd *rgd) */ int gfs2_rs_alloc(struct gfs2_inode *ip) { - int error = 0; struct gfs2_blkreserv *res; if (ip->i_res) @@ -561,7 +560,7 @@ int gfs2_rs_alloc(struct gfs2_inode *ip) res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS); if (!res) - error = -ENOMEM; + return -ENOMEM; RB_CLEAR_NODE(&res->rs_node); @@ -571,7 +570,7 @@ int gfs2_rs_alloc(struct gfs2_inode *ip) else ip->i_res = res; up_write(&ip->i_rw_mutex); - return error; + return 0; } static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs) -- cgit v1.2.3 From 3a238adefb8c5b8cb8cde0ce689d513306176ff4 Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Tue, 16 Oct 2012 11:39:07 +0200 Subject: GFS2: Require user to provide argument for FITRIM When the fstrim_range argument is not provided by user in FITRIM ioctl we should just return EFAULT and not promoting bad behaviour by filling the structure in kernel. Let the user deal with it. Signed-off-by: Lukas Czerner Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'fs/gfs2/rgrp.c') diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 43d1a20bdbe..b6bbf718d6c 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1270,11 +1270,7 @@ int gfs2_fitrim(struct file *filp, void __user *argp) if (!blk_queue_discard(q)) return -EOPNOTSUPP; - if (argp == NULL) { - r.start = 0; - r.len = ULLONG_MAX; - r.minlen = 0; - } else if (copy_from_user(&r, argp, sizeof(r))) + if (copy_from_user(&r, argp, sizeof(r))) return -EFAULT; ret = gfs2_rindex_update(sdp); @@ -1323,7 +1319,7 @@ int gfs2_fitrim(struct file *filp, void __user *argp) out: r.len = trimmed << 9; - if (argp && copy_to_user(argp, &r, sizeof(r))) + if (copy_to_user(argp, &r, sizeof(r))) return -EFAULT; return ret; -- cgit v1.2.3 From 076f0faa764ab3a5a32fc726ae05e2de0e66151d Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Tue, 16 Oct 2012 11:39:08 +0200 Subject: GFS2: Fix FITRIM argument handling Currently implementation in gfs2 uses FITRIM arguments as it were in file system blocks units which is wrong. The FITRIM arguments (fstrim_range.start, fstrim_range.len and fstrim_range.minlen) are actually in bytes. Moreover, check for start argument beyond the end of file system, len argument being smaller than file system block and minlen argument being bigger than biggest resource group were missing. This commit converts the code to convert FITRIM argument to file system blocks and also adds appropriate checks mentioned above. All the problems were recognised by xfstests 251 and 260. Signed-off-by: Lukas Czerner Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) (limited to 'fs/gfs2/rgrp.c') diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index b6bbf718d6c..38fe18f2f05 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1262,7 +1262,9 @@ int gfs2_fitrim(struct file *filp, void __user *argp) int ret = 0; u64 amt; u64 trimmed = 0; + u64 start, end, minlen; unsigned int x; + unsigned bs_shift = sdp->sd_sb.sb_bsize_shift; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -1277,8 +1279,18 @@ int gfs2_fitrim(struct file *filp, void __user *argp) if (ret) return ret; - rgd = gfs2_blk2rgrpd(sdp, r.start, 0); - rgd_end = gfs2_blk2rgrpd(sdp, r.start + r.len, 0); + start = r.start >> bs_shift; + end = start + (r.len >> bs_shift); + minlen = max_t(u64, r.minlen, + q->limits.discard_granularity) >> bs_shift; + + rgd = gfs2_blk2rgrpd(sdp, start, 0); + rgd_end = gfs2_blk2rgrpd(sdp, end - 1, 0); + + if (end <= start || + minlen > sdp->sd_max_rg_data || + start > rgd_end->rd_data0 + rgd_end->rd_data) + return -EINVAL; while (1) { @@ -1290,7 +1302,9 @@ int gfs2_fitrim(struct file *filp, void __user *argp) /* Trim each bitmap in the rgrp */ for (x = 0; x < rgd->rd_length; x++) { struct gfs2_bitmap *bi = rgd->rd_bits + x; - ret = gfs2_rgrp_send_discards(sdp, rgd->rd_data0, NULL, bi, r.minlen, &amt); + ret = gfs2_rgrp_send_discards(sdp, + rgd->rd_data0, NULL, bi, minlen, + &amt); if (ret) { gfs2_glock_dq_uninit(&gh); goto out; -- cgit v1.2.3 From a68a0a352a0209467268dfddffe02db08b97ddb4 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Fri, 19 Oct 2012 08:32:51 -0400 Subject: GFS2: Speed up gfs2_rbm_from_block This patch is a rewrite of function gfs2_rbm_from_block. Rather than looping to find the right bitmap, the code now does a few simple math calculations. I compared the performance of both algorithms side by side and the new algorithm is noticeably faster. Sample instrumentation output from a "fast" machine: 5 million calls: millisec spent: Orig: 166 New: 113 5 million calls: millisec spent: Orig: 189 New: 114 In addition, I ran postmark (on a somewhat slowr CPU) before the after the new algorithm was put in place and postmark showed a decent improvement: Before the new algorithm: ------------------------- Time: 645 seconds total 584 seconds of transactions (171 per second) Files: 150087 created (232 per second) Creation alone: 100000 files (2083 per second) Mixed with transactions: 50087 files (85 per second) 49995 read (85 per second) 49991 appended (85 per second) 150087 deleted (232 per second) Deletion alone: 100174 files (7705 per second) Mixed with transactions: 49913 files (85 per second) Data: 273.42 megabytes read (434.08 kilobytes per second) 852.13 megabytes written (1.32 megabytes per second) With the new algorithm: ----------------------- Time: 599 seconds total 530 seconds of transactions (188 per second) Files: 150087 created (250 per second) Creation alone: 100000 files (1886 per second) Mixed with transactions: 50087 files (94 per second) 49995 read (94 per second) 49991 appended (94 per second) 150087 deleted (250 per second) Deletion alone: 100174 files (6260 per second) Mixed with transactions: 49913 files (94 per second) Data: 273.42 megabytes read (467.42 kilobytes per second) 852.13 megabytes written (1.42 megabytes per second) Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) (limited to 'fs/gfs2/rgrp.c') diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 38fe18f2f05..669b89b95cc 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -251,22 +251,25 @@ static u32 gfs2_bitfit(const u8 *buf, const unsigned int len, static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block) { u64 rblock = block - rbm->rgd->rd_data0; - u32 goal = (u32)rblock; - int x; + u32 x; if (WARN_ON_ONCE(rblock > UINT_MAX)) return -EINVAL; if (block >= rbm->rgd->rd_data0 + rbm->rgd->rd_data) return -E2BIG; - for (x = 0; x < rbm->rgd->rd_length; x++) { - rbm->bi = rbm->rgd->rd_bits + x; - if (goal < (rbm->bi->bi_start + rbm->bi->bi_len) * GFS2_NBBY) { - rbm->offset = goal - (rbm->bi->bi_start * GFS2_NBBY); - break; - } - } + rbm->bi = rbm->rgd->rd_bits; + rbm->offset = (u32)(rblock); + /* Check if the block is within the first block */ + if (rbm->offset < (rbm->bi->bi_start + rbm->bi->bi_len) * GFS2_NBBY) + return 0; + /* Adjust for the size diff between gfs2_meta_header and gfs2_rgrp */ + rbm->offset += (sizeof(struct gfs2_rgrp) - + sizeof(struct gfs2_meta_header)) * GFS2_NBBY; + x = rbm->offset / rbm->rgd->rd_sbd->sd_blocks_per_bitmap; + rbm->offset -= x * rbm->rgd->rd_sbd->sd_blocks_per_bitmap; + rbm->bi += x; return 0; } -- cgit v1.2.3 From bcd97c06308cbfa8b46e11762ea116300cdce772 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Wed, 31 Oct 2012 09:58:42 +0000 Subject: GFS2: Add test for resource group congestion status This patch uses information gathered by the recent glock statistics patch in order to derrive a boolean verdict on the congestion status of a resource group. This is then used when making decisions on which resource group to choose during block allocation. The aim is to avoid resource groups which are heavily contended by other nodes, while still ensuring locality of access wherever possible. Once a reservation has been made in a particular resource group we continue to use that resource group until a new reservation is required. This should help to ensure that we do not change resource groups too often. Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 94 +++++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 90 insertions(+), 4 deletions(-) (limited to 'fs/gfs2/rgrp.c') diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 669b89b95cc..bdf3e644baa 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1681,6 +1681,88 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip return; } +/** + * gfs2_rgrp_congested - Use stats to figure out whether an rgrp is congested + * @rgd: The rgrp in question + * @loops: An indication of how picky we can be (0=very, 1=less so) + * + * This function uses the recently added glock statistics in order to + * figure out whether a parciular resource group is suffering from + * contention from multiple nodes. This is done purely on the basis + * of timings, since this is the only data we have to work with and + * our aim here is to reject a resource group which is highly contended + * but (very important) not to do this too often in order to ensure that + * we do not land up introducing fragmentation by changing resource + * groups when not actually required. + * + * The calculation is fairly simple, we want to know whether the SRTTB + * (i.e. smoothed round trip time for blocking operations) to acquire + * the lock for this rgrp's glock is significantly greater than the + * time taken for resource groups on average. We introduce a margin in + * the form of the variable @var which is computed as the sum of the two + * respective variences, and multiplied by a factor depending on @loops + * and whether we have a lot of data to base the decision on. This is + * then tested against the square difference of the means in order to + * decide whether the result is statistically significant or not. + * + * Returns: A boolean verdict on the congestion status + */ + +static bool gfs2_rgrp_congested(const struct gfs2_rgrpd *rgd, int loops) +{ + const struct gfs2_glock *gl = rgd->rd_gl; + const struct gfs2_sbd *sdp = gl->gl_sbd; + struct gfs2_lkstats *st; + s64 r_dcount, l_dcount; + s64 r_srttb, l_srttb; + s64 srttb_diff; + s64 sqr_diff; + s64 var; + + preempt_disable(); + st = &this_cpu_ptr(sdp->sd_lkstats)->lkstats[LM_TYPE_RGRP]; + r_srttb = st->stats[GFS2_LKS_SRTTB]; + r_dcount = st->stats[GFS2_LKS_DCOUNT]; + var = st->stats[GFS2_LKS_SRTTVARB] + + gl->gl_stats.stats[GFS2_LKS_SRTTVARB]; + preempt_enable(); + + l_srttb = gl->gl_stats.stats[GFS2_LKS_SRTTB]; + l_dcount = gl->gl_stats.stats[GFS2_LKS_DCOUNT]; + + if ((l_dcount < 1) || (r_dcount < 1) || (r_srttb == 0)) + return false; + + srttb_diff = r_srttb - l_srttb; + sqr_diff = srttb_diff * srttb_diff; + + var *= 2; + if (l_dcount < 8 || r_dcount < 8) + var *= 2; + if (loops == 1) + var *= 2; + + return ((srttb_diff < 0) && (sqr_diff > var)); +} + +/** + * gfs2_rgrp_used_recently + * @rs: The block reservation with the rgrp to test + * @msecs: The time limit in milliseconds + * + * Returns: True if the rgrp glock has been used within the time limit + */ +static bool gfs2_rgrp_used_recently(const struct gfs2_blkreserv *rs, + u64 msecs) +{ + u64 tdiff; + + tdiff = ktime_to_ns(ktime_sub(ktime_get_real(), + rs->rs_rbm.rgd->rd_gl->gl_dstamp)); + + return tdiff > (msecs * 1000 * 1000); +} + static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *begin) { struct gfs2_rgrpd *rgd = *pos; @@ -1707,7 +1789,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_rgrpd *begin = NULL; struct gfs2_blkreserv *rs = ip->i_res; - int error = 0, rg_locked, flags = LM_FLAG_TRY; + int error = 0, rg_locked, flags = 0; u64 last_unlinked = NO_BLOCK; int loops = 0; @@ -1731,13 +1813,18 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) if (!gfs2_glock_is_locked_by_me(rs->rs_rbm.rgd->rd_gl)) { rg_locked = 0; + if (!gfs2_rs_active(rs) && (loops < 2) && + gfs2_rgrp_used_recently(rs, 1000) && + gfs2_rgrp_congested(rs->rs_rbm.rgd, loops)) + goto next_rgrp; error = gfs2_glock_nq_init(rs->rs_rbm.rgd->rd_gl, LM_ST_EXCLUSIVE, flags, &rs->rs_rgd_gh); - if (error == GLR_TRYFAILED) - goto next_rgrp; if (unlikely(error)) return error; + if (!gfs2_rs_active(rs) && (loops < 2) && + gfs2_rgrp_congested(rs->rs_rbm.rgd, loops)) + goto skip_rgrp; if (sdp->sd_args.ar_rgrplvb) { error = update_rgrp_lvb(rs->rs_rbm.rgd); if (unlikely(error)) { @@ -1789,7 +1876,6 @@ next_rgrp: * then this checks for some less likely conditions before * trying again. */ - flags &= ~LM_FLAG_TRY; loops++; /* Check that fs hasn't grown if writing to rindex */ if (ip == GFS2_I(sdp->sd_rindex) && !sdp->sd_rindex_uptodate) { -- cgit v1.2.3 From 9dbe9610b9df4efe0946299804ed46bb8f91dec2 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Wed, 31 Oct 2012 10:37:10 +0000 Subject: GFS2: Add Orlov allocator Just like ext3, this works on the root directory and any directory with the +T flag set. Also, just like ext3, any subdirectory created in one of the just mentioned cases will be allocated to a random resource group (GFS2 equivalent of a block group). If you are creating a set of directories, each of which will contain a job running on a different node, then by setting +T on the parent directory before creating the subdirectories, each will land up in a different resource group, and thus resource group contention between nodes will be kept to a minimum. Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) (limited to 'fs/gfs2/rgrp.c') diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index bdf3e644baa..99a619788c6 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "gfs2.h" #include "incore.h" @@ -1763,6 +1764,15 @@ static bool gfs2_rgrp_used_recently(const struct gfs2_blkreserv *rs, return tdiff > (msecs * 1000 * 1000); } +static u32 gfs2_orlov_skip(const struct gfs2_inode *ip) +{ + const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); + u32 skip; + + get_random_bytes(&skip, sizeof(skip)); + return skip % sdp->sd_rgrps; +} + static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *begin) { struct gfs2_rgrpd *rgd = *pos; @@ -1784,7 +1794,7 @@ static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *b * Returns: errno */ -int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) +int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested, u32 aflags) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_rgrpd *begin = NULL; @@ -1792,6 +1802,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) int error = 0, rg_locked, flags = 0; u64 last_unlinked = NO_BLOCK; int loops = 0; + u32 skip = 0; if (sdp->sd_args.ar_rgrplvb) flags |= GL_SKIP; @@ -1805,6 +1816,8 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) } else { rs->rs_rbm.rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1); } + if (S_ISDIR(ip->i_inode.i_mode) && (aflags & GFS2_AF_ORLOV)) + skip = gfs2_orlov_skip(ip); if (rs->rs_rbm.rgd == NULL) return -EBADSLT; @@ -1813,6 +1826,8 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) if (!gfs2_glock_is_locked_by_me(rs->rs_rbm.rgd->rd_gl)) { rg_locked = 0; + if (skip && skip--) + goto next_rgrp; if (!gfs2_rs_active(rs) && (loops < 2) && gfs2_rgrp_used_recently(rs, 1000) && gfs2_rgrp_congested(rs->rs_rbm.rgd, loops)) @@ -1871,6 +1886,8 @@ next_rgrp: /* Find the next rgrp, and continue looking */ if (gfs2_select_rgrp(&rs->rs_rbm.rgd, begin)) continue; + if (skip) + continue; /* If we've scanned all the rgrps, but found no free blocks * then this checks for some less likely conditions before -- cgit v1.2.3 From aa8920c96897dd82f0520f9e7db7311b42547ce6 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 13 Nov 2012 14:50:35 +0000 Subject: GFS2: Fix one RG corner case For filesystems with only a single resource group, we need to be careful that the allocation loop will not land up with a NULL resource group. This fixes a bug in a previous patch where the gfs2_rgrpd_get_next() function was being used instead of gfs2_rgrpd_get_first() Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/gfs2/rgrp.c') diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 99a619788c6..5625e93bf61 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1776,10 +1776,11 @@ static u32 gfs2_orlov_skip(const struct gfs2_inode *ip) static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *begin) { struct gfs2_rgrpd *rgd = *pos; + struct gfs2_sbd *sdp = rgd->rd_sbd; rgd = gfs2_rgrpd_get_next(rgd); if (rgd == NULL) - rgd = gfs2_rgrpd_get_next(NULL); + rgd = gfs2_rgrpd_get_first(sdp); *pos = rgd; if (rgd != begin) /* If we didn't wrap */ return true; -- cgit v1.2.3 From 4e2f8849def738092ad6c0fc2b34737381bc9d26 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Wed, 14 Nov 2012 13:47:37 -0500 Subject: GFS2: remove redundant lvb pointer The lksb struct already contains a pointer to the lvb, so another directly from the glock struct is not needed. Signed-off-by: David Teigland Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/gfs2/rgrp.c') diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 5625e93bf61..37ee061d899 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -879,7 +879,7 @@ static int read_rindex_entry(struct gfs2_inode *ip) goto fail; rgd->rd_gl->gl_object = rgd; - rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lvb; + rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lksb.sb_lvbptr; rgd->rd_flags &= ~GFS2_RDF_UPTODATE; if (rgd->rd_data > sdp->sd_max_rg_data) sdp->sd_max_rg_data = rgd->rd_data; -- cgit v1.2.3 From f1213cacc7ffc7d4cdef3692f22b28a2df3216f5 Mon Sep 17 00:00:00 2001 From: Abhijith Das Date: Wed, 19 Dec 2012 10:48:01 -0500 Subject: GFS2: Fix race in gfs2_rs_alloc QE aio tests uncovered a race condition in gfs2_rs_alloc where it's possible to come out of the function with a valid ip->i_res allocation but it gets freed before use resulting in a NULL ptr dereference. This patch envelopes the initial short-circuit check for non-NULL ip->i_res into the mutex lock. With this patch, I was able to successfully run the reproducer test multiple times. Resolves: rhbz#878476 Signed-off-by: Abhi Das Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) (limited to 'fs/gfs2/rgrp.c') diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 37ee061d899..738b3888adc 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -557,22 +557,20 @@ void gfs2_free_clones(struct gfs2_rgrpd *rgd) */ int gfs2_rs_alloc(struct gfs2_inode *ip) { - struct gfs2_blkreserv *res; + int error = 0; + down_write(&ip->i_rw_mutex); if (ip->i_res) - return 0; - - res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS); - if (!res) - return -ENOMEM; + goto out; - RB_CLEAR_NODE(&res->rs_node); + ip->i_res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS); + if (!ip->i_res) { + error = -ENOMEM; + goto out; + } - down_write(&ip->i_rw_mutex); - if (ip->i_res) - kmem_cache_free(gfs2_rsrv_cachep, res); - else - ip->i_res = res; + RB_CLEAR_NODE(&ip->i_res->rs_node); +out: up_write(&ip->i_rw_mutex); return 0; } -- cgit v1.2.3 From 15bd50ad82a6d3421af1abe82e2554898abc4141 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Thu, 20 Dec 2012 13:21:07 -0500 Subject: GFS2: Stop looking for free blocks at end of rgrp This patch adds a return code check after calling function gfs2_rbm_from_block while determining the free extent size. That way, when the end of an rgrp is reached, it won't try to process unaligned blocks after the end. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'fs/gfs2/rgrp.c') diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 738b3888adc..712dd4fd864 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -350,10 +350,14 @@ static u32 gfs2_free_extlen(const struct gfs2_rbm *rrbm, u32 len) BUG_ON(len < chunk_size); len -= chunk_size; block = gfs2_rbm_to_block(&rbm); - gfs2_rbm_from_block(&rbm, block + chunk_size); - n_unaligned = 3; - if (ptr) + if (gfs2_rbm_from_block(&rbm, block + chunk_size)) { + n_unaligned = 0; break; + } + if (ptr) { + n_unaligned = 3; + break; + } n_unaligned = len & 3; } -- cgit v1.2.3 From 13d2eb012927b03ac1b80202af5aa9abc4003bd5 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Thu, 20 Dec 2012 13:23:04 -0500 Subject: GFS2: Reset rd_last_alloc when it reaches the end of the rgrp In function rg_mblk_search, it's searching for multiple blocks in a given state (e.g. "free"). If there's an active block reservation its goal is the next free block of that. If the resource group contains the dinode's goal block, that's used for the search. But if neither is the case, it uses the rgrp's last allocated block. That way, consecutive allocations appear after one another on media. The problem comes in when you hit the end of the rgrp; it would never start over and search from the beginning. This became a problem, since if you deleted all the files and data from the rgrp, it would never start over and find free blocks. So it had to keep searching further out on the media to allocate blocks. This patch resets the rd_last_alloc after it does an unsuccessful search at the end of the rgrp. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/gfs2/rgrp.c') diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 712dd4fd864..b7eff078fe9 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1426,6 +1426,9 @@ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, rs->rs_free = extlen; rs->rs_inum = ip->i_no_addr; rs_insert(ip); + } else { + if (goal == rgd->rd_last_alloc + rgd->rd_data0) + rgd->rd_last_alloc = 0; } } -- cgit v1.2.3