diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 01c4975da4b..30de4f2a2ea 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -643,7 +643,7 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, goto out_unlock; requested = data_blocks + ind_blocks; - error = gfs2_inplace_reserve(ip, requested); + error = gfs2_inplace_reserve(ip, requested, 0); if (error) goto out_qunlock; } diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 1fd3ae237bd..a68e91bcef3 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -991,6 +991,41 @@ unlock: return err; } +/** + * gfs2_journaled_truncate - Wrapper for truncate_pagecache for jdata files + * @inode: The inode being truncated + * @oldsize: The original (larger) size + * @newsize: The new smaller size + * + * With jdata files, we have to journal a revoke for each block which is + * truncated. As a result, we need to split this into separate transactions + * if the number of pages being truncated gets too large. + */ + +#define GFS2_JTRUNC_REVOKES 8192 + +static int gfs2_journaled_truncate(struct inode *inode, u64 oldsize, u64 newsize) +{ + struct gfs2_sbd *sdp = GFS2_SB(inode); + u64 max_chunk = GFS2_JTRUNC_REVOKES * sdp->sd_vfs->s_blocksize; + u64 chunk; + int error; + + while (oldsize != newsize) { + chunk = oldsize - newsize; + if (chunk > max_chunk) + chunk = max_chunk; + truncate_pagecache(inode, oldsize, oldsize - chunk); + oldsize -= chunk; + gfs2_trans_end(sdp); + error = gfs2_trans_begin(sdp, RES_DINODE, GFS2_JTRUNC_REVOKES); + if (error) + return error; + } + + return 0; +} + static int trunc_start(struct inode *inode, u64 oldsize, u64 newsize) { struct gfs2_inode *ip = GFS2_I(inode); @@ -1000,8 +1035,10 @@ static int trunc_start(struct inode *inode, u64 oldsize, u64 newsize) int journaled = gfs2_is_jdata(ip); int error; - error = gfs2_trans_begin(sdp, - RES_DINODE + (journaled ? RES_JDATA : 0), 0); + if (journaled) + error = gfs2_trans_begin(sdp, RES_DINODE + RES_JDATA, GFS2_JTRUNC_REVOKES); + else + error = gfs2_trans_begin(sdp, RES_DINODE, 0); if (error) return error; @@ -1026,7 +1063,16 @@ static int trunc_start(struct inode *inode, u64 oldsize, u64 newsize) ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; gfs2_dinode_out(ip, dibh->b_data); - truncate_pagecache(inode, oldsize, newsize); + if (journaled) + error = gfs2_journaled_truncate(inode, oldsize, newsize); + else + truncate_pagecache(inode, oldsize, newsize); + + if (error) { + brelse(dibh); + return error; + } + out_brelse: brelse(dibh); out: @@ -1178,7 +1224,7 @@ static int do_grow(struct inode *inode, u64 size) if (error) return error; - error = gfs2_inplace_reserve(ip, 1); + error = gfs2_inplace_reserve(ip, 1, 0); if (error) goto do_grow_qunlock; unstuff = 1; diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index 259b088cfc4..9a35670fdc3 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -1676,16 +1676,11 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name, be16_add_cpu(&leaf->lf_entries, 1); } brelse(bh); - error = gfs2_meta_inode_buffer(ip, &bh); - if (error) - break; - gfs2_trans_add_bh(ip->i_gl, bh, 1); ip->i_entries++; ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; if (S_ISDIR(nip->i_inode.i_mode)) inc_nlink(&ip->i_inode); - gfs2_dinode_out(ip, bh->b_data); - brelse(bh); + mark_inode_dirty(inode); error = 0; break; } diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index e056b4ce487..dfe2d8cb9b2 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -432,7 +432,7 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) if (ret) goto out_unlock; gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks); - ret = gfs2_inplace_reserve(ip, data_blocks + ind_blocks); + ret = gfs2_inplace_reserve(ip, data_blocks + ind_blocks, 0); if (ret) goto out_quota_unlock; @@ -825,7 +825,7 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, retry: gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks); - error = gfs2_inplace_reserve(ip, data_blocks + ind_blocks); + error = gfs2_inplace_reserve(ip, data_blocks + ind_blocks, 0); if (error) { if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) { bytes >>= 1; diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 0f22d09f358..992c5c0cb50 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -55,8 +55,6 @@ struct gfs2_glock_iter { typedef void (*glock_examiner) (struct gfs2_glock * gl); -static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl); -#define GLOCK_BUG_ON(gl,x) do { if (unlikely(x)) { __dump_glock(NULL, gl); BUG(); } } while(0) static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target); static struct dentry *gfs2_root; @@ -107,10 +105,12 @@ static void gfs2_glock_dealloc(struct rcu_head *rcu) { struct gfs2_glock *gl = container_of(rcu, struct gfs2_glock, gl_rcu); - if (gl->gl_ops->go_flags & GLOF_ASPACE) + if (gl->gl_ops->go_flags & GLOF_ASPACE) { kmem_cache_free(gfs2_glock_aspace_cachep, gl); - else + } else { + kfree(gl->gl_lksb.sb_lvbptr); kmem_cache_free(gfs2_glock_cachep, gl); + } } void gfs2_glock_free(struct gfs2_glock *gl) @@ -537,8 +537,8 @@ __acquires(&gl->gl_spin) (lck_flags & (LM_FLAG_TRY|LM_FLAG_TRY_1CB))) clear_bit(GLF_BLOCKING, &gl->gl_flags); spin_unlock(&gl->gl_spin); - if (glops->go_xmote_th) - glops->go_xmote_th(gl); + if (glops->go_sync) + glops->go_sync(gl); if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags)) glops->go_inval(gl, target == LM_ST_DEFERRED ? 0 : DIO_METADATA); clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags); @@ -547,7 +547,10 @@ __acquires(&gl->gl_spin) if (sdp->sd_lockstruct.ls_ops->lm_lock) { /* lock_dlm */ ret = sdp->sd_lockstruct.ls_ops->lm_lock(gl, target, lck_flags); - GLOCK_BUG_ON(gl, ret); + if (ret) { + printk(KERN_ERR "GFS2: lm_lock ret %d\n", ret); + GLOCK_BUG_ON(gl, 1); + } } else { /* lock_nolock */ finish_xmote(gl, target); if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) @@ -736,6 +739,16 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, if (!gl) return -ENOMEM; + memset(&gl->gl_lksb, 0, sizeof(struct dlm_lksb)); + + if (glops->go_flags & GLOF_LVB) { + gl->gl_lksb.sb_lvbptr = kzalloc(GFS2_MIN_LVB_SIZE, GFP_KERNEL); + if (!gl->gl_lksb.sb_lvbptr) { + kmem_cache_free(cachep, gl); + return -ENOMEM; + } + } + atomic_inc(&sdp->sd_glock_disposal); gl->gl_sbd = sdp; gl->gl_flags = 0; @@ -753,9 +766,6 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, preempt_enable(); gl->gl_stats.stats[GFS2_LKS_DCOUNT] = 0; gl->gl_stats.stats[GFS2_LKS_QCOUNT] = 0; - memset(&gl->gl_lksb, 0, sizeof(struct dlm_lksb)); - memset(gl->gl_lvb, 0, 32 * sizeof(char)); - gl->gl_lksb.sb_lvbptr = gl->gl_lvb; gl->gl_tchange = jiffies; gl->gl_object = NULL; gl->gl_hold_time = GL_GLOCK_DFT_HOLD; @@ -777,6 +787,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, tmp = search_bucket(hash, sdp, &name); if (tmp) { spin_unlock_bucket(hash); + kfree(gl->gl_lksb.sb_lvbptr); kmem_cache_free(cachep, gl); atomic_dec(&sdp->sd_glock_disposal); gl = tmp; @@ -1013,7 +1024,7 @@ trap_recursive: printk(KERN_ERR "pid: %d\n", pid_nr(gh->gh_owner_pid)); printk(KERN_ERR "lock type: %d req lock state : %d\n", gh->gh_gl->gl_name.ln_type, gh->gh_state); - __dump_glock(NULL, gl); + gfs2_dump_glock(NULL, gl); BUG(); } @@ -1508,7 +1519,7 @@ static int dump_glock(struct seq_file *seq, struct gfs2_glock *gl) { int ret; spin_lock(&gl->gl_spin); - ret = __dump_glock(seq, gl); + ret = gfs2_dump_glock(seq, gl); spin_unlock(&gl->gl_spin); return ret; } @@ -1528,6 +1539,7 @@ static void dump_glock_func(struct gfs2_glock *gl) void gfs2_gl_hash_clear(struct gfs2_sbd *sdp) { + set_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags); glock_hash_walk(clear_glock, sdp); flush_workqueue(glock_workqueue); wait_event(sdp->sd_glock_wait, atomic_read(&sdp->sd_glock_disposal) == 0); @@ -1655,7 +1667,7 @@ static const char *gflags2str(char *buf, const struct gfs2_glock *gl) } /** - * __dump_glock - print information about a glock + * gfs2_dump_glock - print information about a glock * @seq: The seq_file struct * @gl: the glock * @@ -1672,7 +1684,7 @@ static const char *gflags2str(char *buf, const struct gfs2_glock *gl) * Returns: 0 on success, -ENOBUFS when we run out of space */ -static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl) +int gfs2_dump_glock(struct seq_file *seq, const struct gfs2_glock *gl) { const struct gfs2_glock_operations *glops = gl->gl_ops; unsigned long long dtime; diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index 307ac31df78..fd580b7861d 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -178,33 +178,33 @@ static inline struct address_space *gfs2_glock2aspace(struct gfs2_glock *gl) return NULL; } -int gfs2_glock_get(struct gfs2_sbd *sdp, - u64 number, const struct gfs2_glock_operations *glops, - int create, struct gfs2_glock **glp); -void gfs2_glock_hold(struct gfs2_glock *gl); -void gfs2_glock_put_nolock(struct gfs2_glock *gl); -void gfs2_glock_put(struct gfs2_glock *gl); -void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags, - struct gfs2_holder *gh); -void gfs2_holder_reinit(unsigned int state, unsigned flags, - struct gfs2_holder *gh); -void gfs2_holder_uninit(struct gfs2_holder *gh); -int gfs2_glock_nq(struct gfs2_holder *gh); -int gfs2_glock_poll(struct gfs2_holder *gh); -int gfs2_glock_wait(struct gfs2_holder *gh); -void gfs2_glock_dq(struct gfs2_holder *gh); -void gfs2_glock_dq_wait(struct gfs2_holder *gh); - -void gfs2_glock_dq_uninit(struct gfs2_holder *gh); -int gfs2_glock_nq_num(struct gfs2_sbd *sdp, - u64 number, const struct gfs2_glock_operations *glops, - unsigned int state, int flags, struct gfs2_holder *gh); - -int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs); -void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs); -void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs); - -__printf(2, 3) +extern int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, + const struct gfs2_glock_operations *glops, + int create, struct gfs2_glock **glp); +extern void gfs2_glock_hold(struct gfs2_glock *gl); +extern void gfs2_glock_put_nolock(struct gfs2_glock *gl); +extern void gfs2_glock_put(struct gfs2_glock *gl); +extern void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, + unsigned flags, struct gfs2_holder *gh); +extern void gfs2_holder_reinit(unsigned int state, unsigned flags, + struct gfs2_holder *gh); +extern void gfs2_holder_uninit(struct gfs2_holder *gh); +extern int gfs2_glock_nq(struct gfs2_holder *gh); +extern int gfs2_glock_poll(struct gfs2_holder *gh); +extern int gfs2_glock_wait(struct gfs2_holder *gh); +extern void gfs2_glock_dq(struct gfs2_holder *gh); +extern void gfs2_glock_dq_wait(struct gfs2_holder *gh); +extern void gfs2_glock_dq_uninit(struct gfs2_holder *gh); +extern int gfs2_glock_nq_num(struct gfs2_sbd *sdp, u64 number, + const struct gfs2_glock_operations *glops, + unsigned int state, int flags, + struct gfs2_holder *gh); +extern int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs); +extern void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs); +extern void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs); +extern int gfs2_dump_glock(struct seq_file *seq, const struct gfs2_glock *gl); +#define GLOCK_BUG_ON(gl,x) do { if (unlikely(x)) { gfs2_dump_glock(NULL, gl); BUG(); } } while(0) +extern __printf(2, 3) void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...); /** diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 32cc4fde975..78d4184ffc7 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -74,7 +74,7 @@ static void __gfs2_ail_flush(struct gfs2_glock *gl, bool fsync) gfs2_trans_add_revoke(sdp, bd); } - BUG_ON(!fsync && atomic_read(&gl->gl_ail_count)); + GLOCK_BUG_ON(gl, !fsync && atomic_read(&gl->gl_ail_count)); spin_unlock(&sdp->sd_ail_lock); gfs2_log_unlock(sdp); } @@ -96,7 +96,7 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl) tr.tr_ip = (unsigned long)__builtin_return_address(0); sb_start_intwrite(sdp->sd_vfs); gfs2_log_reserve(sdp, tr.tr_reserved); - BUG_ON(current->journal_info); + WARN_ON_ONCE(current->journal_info); current->journal_info = &tr; __gfs2_ail_flush(gl, 0); @@ -139,7 +139,7 @@ static void rgrp_go_sync(struct gfs2_glock *gl) if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) return; - BUG_ON(gl->gl_state != LM_ST_EXCLUSIVE); + GLOCK_BUG_ON(gl, gl->gl_state != LM_ST_EXCLUSIVE); gfs2_log_flush(gl->gl_sbd, gl); filemap_fdatawrite(metamapping); @@ -168,7 +168,7 @@ static void rgrp_go_inval(struct gfs2_glock *gl, int flags) { struct address_space *mapping = gfs2_glock2aspace(gl); - BUG_ON(!(flags & DIO_METADATA)); + WARN_ON_ONCE(!(flags & DIO_METADATA)); gfs2_assert_withdraw(gl->gl_sbd, !atomic_read(&gl->gl_ail_count)); truncate_inode_pages(mapping, 0); @@ -197,7 +197,7 @@ static void inode_go_sync(struct gfs2_glock *gl) if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) return; - BUG_ON(gl->gl_state != LM_ST_EXCLUSIVE); + GLOCK_BUG_ON(gl, gl->gl_state != LM_ST_EXCLUSIVE); gfs2_log_flush(gl->gl_sbd, gl); filemap_fdatawrite(metamapping); @@ -536,7 +536,7 @@ const struct gfs2_glock_operations gfs2_meta_glops = { }; const struct gfs2_glock_operations gfs2_inode_glops = { - .go_xmote_th = inode_go_sync, + .go_sync = inode_go_sync, .go_inval = inode_go_inval, .go_demote_ok = inode_go_demote_ok, .go_lock = inode_go_lock, @@ -546,17 +546,17 @@ const struct gfs2_glock_operations gfs2_inode_glops = { }; const struct gfs2_glock_operations gfs2_rgrp_glops = { - .go_xmote_th = rgrp_go_sync, + .go_sync = rgrp_go_sync, .go_inval = rgrp_go_inval, .go_lock = gfs2_rgrp_go_lock, .go_unlock = gfs2_rgrp_go_unlock, .go_dump = gfs2_rgrp_dump, .go_type = LM_TYPE_RGRP, - .go_flags = GLOF_ASPACE, + .go_flags = GLOF_ASPACE | GLOF_LVB, }; const struct gfs2_glock_operations gfs2_trans_glops = { - .go_xmote_th = trans_go_sync, + .go_sync = trans_go_sync, .go_xmote_bh = trans_go_xmote_bh, .go_demote_ok = trans_go_demote_ok, .go_type = LM_TYPE_NONDISK, @@ -577,6 +577,7 @@ const struct gfs2_glock_operations gfs2_nondisk_glops = { const struct gfs2_glock_operations gfs2_quota_glops = { .go_type = LM_TYPE_QUOTA, + .go_flags = GLOF_LVB, }; const struct gfs2_glock_operations gfs2_journal_glops = { diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 3d469d37345..c373a24fedd 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -205,7 +205,7 @@ struct lm_lockname { struct gfs2_glock_operations { - void (*go_xmote_th) (struct gfs2_glock *gl); + void (*go_sync) (struct gfs2_glock *gl); int (*go_xmote_bh) (struct gfs2_glock *gl, struct gfs2_holder *gh); void (*go_inval) (struct gfs2_glock *gl, int flags); int (*go_demote_ok) (const struct gfs2_glock *gl); @@ -216,6 +216,7 @@ struct gfs2_glock_operations { const int go_type; const unsigned long go_flags; #define GLOF_ASPACE 1 +#define GLOF_LVB 2 }; enum { @@ -321,7 +322,6 @@ struct gfs2_glock { ktime_t gl_dstamp; struct gfs2_lkstats gl_stats; struct dlm_lksb gl_lksb; - char gl_lvb[32]; unsigned long gl_tchange; void *gl_object; @@ -539,6 +539,7 @@ enum { SDF_DEMOTE = 5, SDF_NOJOURNALID = 6, SDF_RORECOVERY = 7, /* read only recovery */ + SDF_SKIP_DLM_UNLOCK = 8, }; #define GFS2_FSNAME_LEN 256 @@ -621,6 +622,7 @@ struct gfs2_sbd { u32 sd_hash_bsize_shift; u32 sd_hash_ptrs; /* Number of pointers in a hash block */ u32 sd_qc_per_block; + u32 sd_blocks_per_bitmap; u32 sd_max_dirres; /* Max blocks needed to add a directory entry */ u32 sd_max_height; /* Max height of a file's metadata tree */ u64 sd_heightsize[GFS2_MAX_META_HEIGHT + 1]; diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 381893ceefa..2b6f5698ef1 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -364,34 +364,34 @@ static int create_ok(struct gfs2_inode *dip, const struct qstr *name, return 0; } -static void munge_mode_uid_gid(struct gfs2_inode *dip, umode_t *mode, - unsigned int *uid, unsigned int *gid) +static void munge_mode_uid_gid(const struct gfs2_inode *dip, + struct inode *inode) { if (GFS2_SB(&dip->i_inode)->sd_args.ar_suiddir && (dip->i_inode.i_mode & S_ISUID) && dip->i_inode.i_uid) { - if (S_ISDIR(*mode)) - *mode |= S_ISUID; + if (S_ISDIR(inode->i_mode)) + inode->i_mode |= S_ISUID; else if (dip->i_inode.i_uid != current_fsuid()) - *mode &= ~07111; - *uid = dip->i_inode.i_uid; + inode->i_mode &= ~07111; + inode->i_uid = dip->i_inode.i_uid; } else - *uid = current_fsuid(); + inode->i_uid = current_fsuid(); if (dip->i_inode.i_mode & S_ISGID) { - if (S_ISDIR(*mode)) - *mode |= S_ISGID; - *gid = dip->i_inode.i_gid; + if (S_ISDIR(inode->i_mode)) + inode->i_mode |= S_ISGID; + inode->i_gid = dip->i_inode.i_gid; } else - *gid = current_fsgid(); + inode->i_gid = current_fsgid(); } -static int alloc_dinode(struct gfs2_inode *dip, u64 *no_addr, u64 *generation) +static int alloc_dinode(struct gfs2_inode *ip, u32 flags) { - struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); + struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); int error; int dblocks = 1; - error = gfs2_inplace_reserve(dip, RES_DINODE); + error = gfs2_inplace_reserve(ip, RES_DINODE, flags); if (error) goto out; @@ -399,12 +399,15 @@ static int alloc_dinode(struct gfs2_inode *dip, u64 *no_addr, u64 *generation) if (error) goto out_ipreserv; - error = gfs2_alloc_blocks(dip, no_addr, &dblocks, 1, generation); + error = gfs2_alloc_blocks(ip, &ip->i_no_addr, &dblocks, 1, &ip->i_generation); + ip->i_no_formal_ino = ip->i_generation; + ip->i_inode.i_ino = ip->i_no_addr; + ip->i_goal = ip->i_no_addr; gfs2_trans_end(sdp); out_ipreserv: - gfs2_inplace_release(dip); + gfs2_inplace_release(ip); out: return error; } @@ -429,52 +432,42 @@ static void gfs2_init_dir(struct buffer_head *dibh, /** * init_dinode - Fill in a new dinode structure * @dip: The directory this inode is being created in - * @gl: The glock covering the new inode - * @inum: The inode number - * @mode: The file permissions - * @uid: The uid of the new inode - * @gid: The gid of the new inode - * @generation: The generation number of the new inode - * @dev: The device number (if a device node) + * @ip: The inode * @symname: The symlink destination (if a symlink) - * @size: The inode size (ignored for directories) * @bhp: The buffer head (returned to caller) * */ -static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, - const struct gfs2_inum_host *inum, umode_t mode, - unsigned int uid, unsigned int gid, - const u64 *generation, dev_t dev, const char *symname, - unsigned size, struct buffer_head **bhp) +static void init_dinode(struct gfs2_inode *dip, struct gfs2_inode *ip, + const char *symname, struct buffer_head **bhp) { struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); struct gfs2_dinode *di; struct buffer_head *dibh; struct timespec tv = CURRENT_TIME; - dibh = gfs2_meta_new(gl, inum->no_addr); - gfs2_trans_add_bh(gl, dibh, 1); + dibh = gfs2_meta_new(ip->i_gl, ip->i_no_addr); + gfs2_trans_add_bh(ip->i_gl, dibh, 1); gfs2_metatype_set(dibh, GFS2_METATYPE_DI, GFS2_FORMAT_DI); gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); di = (struct gfs2_dinode *)dibh->b_data; - di->di_num.no_formal_ino = cpu_to_be64(inum->no_formal_ino); - di->di_num.no_addr = cpu_to_be64(inum->no_addr); - di->di_mode = cpu_to_be32(mode); - di->di_uid = cpu_to_be32(uid); - di->di_gid = cpu_to_be32(gid); + di->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino); + di->di_num.no_addr = cpu_to_be64(ip->i_no_addr); + di->di_mode = cpu_to_be32(ip->i_inode.i_mode); + di->di_uid = cpu_to_be32(ip->i_inode.i_uid); + di->di_gid = cpu_to_be32(ip->i_inode.i_gid); di->di_nlink = 0; - di->di_size = cpu_to_be64(size); + di->di_size = cpu_to_be64(ip->i_inode.i_size); di->di_blocks = cpu_to_be64(1); di->di_atime = di->di_mtime = di->di_ctime = cpu_to_be64(tv.tv_sec); - di->di_major = cpu_to_be32(MAJOR(dev)); - di->di_minor = cpu_to_be32(MINOR(dev)); - di->di_goal_meta = di->di_goal_data = cpu_to_be64(inum->no_addr); - di->di_generation = cpu_to_be64(*generation); + di->di_major = cpu_to_be32(MAJOR(ip->i_inode.i_rdev)); + di->di_minor = cpu_to_be32(MINOR(ip->i_inode.i_rdev)); + di->di_goal_meta = di->di_goal_data = cpu_to_be64(ip->i_no_addr); + di->di_generation = cpu_to_be64(ip->i_generation); di->di_flags = 0; di->__pad1 = 0; - di->di_payload_format = cpu_to_be32(S_ISDIR(mode) ? GFS2_FORMAT_DE : 0); + di->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) ? GFS2_FORMAT_DE : 0); di->di_height = 0; di->__pad2 = 0; di->__pad3 = 0; @@ -487,7 +480,7 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, di->di_ctime_nsec = cpu_to_be32(tv.tv_nsec); memset(&di->di_reserved, 0, sizeof(di->di_reserved)); - switch(mode & S_IFMT) { + switch(ip->i_inode.i_mode & S_IFMT) { case S_IFREG: if ((dip->i_diskflags & GFS2_DIF_INHERIT_JDATA) || gfs2_tune_get(sdp, gt_new_files_jdata)) @@ -502,7 +495,7 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, gfs2_init_dir(dibh, dip); break; case S_IFLNK: - memcpy(dibh->b_data + sizeof(struct gfs2_dinode), symname, size); + memcpy(dibh->b_data + sizeof(struct gfs2_dinode), symname, ip->i_inode.i_size); break; } @@ -511,25 +504,22 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, *bhp = dibh; } -static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, - umode_t mode, const struct gfs2_inum_host *inum, - const u64 *generation, dev_t dev, const char *symname, - unsigned int size, struct buffer_head **bhp) +static int make_dinode(struct gfs2_inode *dip, struct gfs2_inode *ip, + const char *symname, struct buffer_head **bhp) { + struct inode *inode = &ip->i_inode; struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); - unsigned int uid, gid; int error; - munge_mode_uid_gid(dip, &mode, &uid, &gid); error = gfs2_rindex_update(sdp); if (error) return error; - error = gfs2_quota_lock(dip, uid, gid); + error = gfs2_quota_lock(dip, inode->i_uid, inode->i_gid); if (error) return error; - error = gfs2_quota_check(dip, uid, gid); + error = gfs2_quota_check(dip, inode->i_uid, inode->i_gid); if (error) goto out_quota; @@ -537,8 +527,8 @@ static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, if (error) goto out_quota; - init_dinode(dip, gl, inum, mode, uid, gid, generation, dev, symname, size, bhp); - gfs2_quota_change(dip, +1, uid, gid); + init_dinode(dip, ip, symname, bhp); + gfs2_quota_change(dip, +1, inode->i_uid, inode->i_gid); gfs2_trans_end(sdp); out_quota: @@ -570,7 +560,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name, if (error) goto fail_quota_locks; - error = gfs2_inplace_reserve(dip, sdp->sd_max_dirres); + error = gfs2_inplace_reserve(dip, sdp->sd_max_dirres, 0); if (error) goto fail_quota_locks; @@ -657,19 +647,14 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, struct inode *inode = NULL; struct gfs2_inode *dip = GFS2_I(dir), *ip; struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); - struct gfs2_inum_host inum = { .no_addr = 0, .no_formal_ino = 0 }; + struct gfs2_glock *io_gl; int error; - u64 generation; struct buffer_head *bh = NULL; + u32 aflags = 0; if (!name->len || name->len > GFS2_FNAMESIZE) return -ENAMETOOLONG; - /* We need a reservation to allocate the new dinode block. The - directory ip temporarily points to the reservation, but this is - being done to get a set of contiguous blocks for the new dinode. - Since this is a create, we don't have a sizehint yet, so it will - have to use the minimum reservation size. */ error = gfs2_rs_alloc(dip); if (error) return error; @@ -688,45 +673,72 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, if (error) goto fail_gunlock; - error = alloc_dinode(dip, &inum.no_addr, &generation); - if (error) - goto fail_gunlock; - inum.no_formal_ino = generation; - - error = gfs2_glock_nq_num(sdp, inum.no_addr, &gfs2_inode_glops, - LM_ST_EXCLUSIVE, GL_SKIP, ghs + 1); - if (error) - goto fail_gunlock; - - error = make_dinode(dip, ghs[1].gh_gl, mode, &inum, &generation, dev, symname, size, &bh); - if (error) - goto fail_gunlock2; - - inode = gfs2_inode_lookup(dir->i_sb, IF2DT(mode), inum.no_addr, - inum.no_formal_ino, 0); - if (IS_ERR(inode)) - goto fail_gunlock2; - + inode = new_inode(sdp->sd_vfs); + if (!inode) { + gfs2_glock_dq_uninit(ghs); + return -ENOMEM; + } ip = GFS2_I(inode); - error = gfs2_inode_refresh(ip); - if (error) - goto fail_gunlock2; - error = gfs2_rs_alloc(ip); + if (error) + goto fail_free_inode; + + set_bit(GIF_INVALID, &ip->i_flags); + inode->i_mode = mode; + inode->i_rdev = dev; + inode->i_size = size; + munge_mode_uid_gid(dip, inode); + ip->i_goal = dip->i_goal; + + if ((GFS2_I(sdp->sd_root_dir->d_inode) == dip) || + (dip->i_diskflags & GFS2_DIF_TOPDIR)) + aflags |= GFS2_AF_ORLOV; + + error = alloc_dinode(ip, aflags); + if (error) + goto fail_free_inode; + + error = gfs2_glock_get(sdp, ip->i_no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl); + if (error) + goto fail_free_inode; + + ip->i_gl->gl_object = ip; + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, ghs + 1); + if (error) + goto fail_free_inode; + + error = make_dinode(dip, ip, symname, &bh); if (error) goto fail_gunlock2; + error = gfs2_glock_get(sdp, ip->i_no_addr, &gfs2_iopen_glops, CREATE, &io_gl); + if (error) + goto fail_gunlock2; + + error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh); + if (error) + goto fail_gunlock2; + + ip->i_iopen_gh.gh_gl->gl_object = ip; + gfs2_glock_put(io_gl); + gfs2_set_iop(inode); + insert_inode_hash(inode); + + error = gfs2_inode_refresh(ip); + if (error) + goto fail_gunlock3; + error = gfs2_acl_create(dip, inode); if (error) - goto fail_gunlock2; + goto fail_gunlock3; error = gfs2_security_init(dip, ip, name); if (error) - goto fail_gunlock2; + goto fail_gunlock3; error = link_dinode(dip, name, ip); if (error) - goto fail_gunlock2; + goto fail_gunlock3; if (bh) brelse(bh); @@ -739,8 +751,20 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, d_instantiate(dentry, inode); return 0; +fail_gunlock3: + gfs2_glock_dq_uninit(ghs + 1); + if (ip->i_gl) + gfs2_glock_put(ip->i_gl); + goto fail_gunlock; + fail_gunlock2: gfs2_glock_dq_uninit(ghs + 1); +fail_free_inode: + if (ip->i_gl) + gfs2_glock_put(ip->i_gl); + gfs2_rs_delete(ip); + free_inode_nonrcu(inode); + inode = NULL; fail_gunlock: gfs2_glock_dq_uninit(ghs); if (inode && !IS_ERR(inode)) { @@ -748,7 +772,6 @@ fail_gunlock: iput(inode); } fail: - gfs2_rs_delete(dip); if (bh) brelse(bh); return error; @@ -884,7 +907,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, if (error) goto out_gunlock; - error = gfs2_inplace_reserve(dip, sdp->sd_max_dirres); + error = gfs2_inplace_reserve(dip, sdp->sd_max_dirres, 0); if (error) goto out_gunlock_q; @@ -977,7 +1000,6 @@ static int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, * gfs2_unlink_inode - Removes an inode from its parent dir and unlinks it * @dip: The parent directory * @name: The name of the entry in the parent directory - * @bh: The inode buffer for the inode to be removed * @inode: The inode to be removed * * Called with all the locks and in a transaction. This will only be @@ -987,8 +1009,7 @@ static int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, */ static int gfs2_unlink_inode(struct gfs2_inode *dip, - const struct dentry *dentry, - struct buffer_head *bh) + const struct dentry *dentry) { struct inode *inode = dentry->d_inode; struct gfs2_inode *ip = GFS2_I(inode); @@ -1028,7 +1049,6 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry) struct gfs2_sbd *sdp = GFS2_SB(dir); struct inode *inode = dentry->d_inode; struct gfs2_inode *ip = GFS2_I(inode); - struct buffer_head *bh; struct gfs2_holder ghs[3]; struct gfs2_rgrpd *rgd; int error; @@ -1076,15 +1096,10 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry) goto out_gunlock; error = gfs2_trans_begin(sdp, 2*RES_DINODE + 3*RES_LEAF + RES_RG_BIT, 0); - if (error) - goto out_gunlock; - - error = gfs2_meta_inode_buffer(ip, &bh); if (error) goto out_end_trans; - error = gfs2_unlink_inode(dip, dentry, bh); - brelse(bh); + error = gfs2_unlink_inode(dip, dentry); out_end_trans: gfs2_trans_end(sdp); @@ -1365,7 +1380,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, if (error) goto out_gunlock; - error = gfs2_inplace_reserve(ndip, sdp->sd_max_dirres); + error = gfs2_inplace_reserve(ndip, sdp->sd_max_dirres, 0); if (error) goto out_gunlock_q; @@ -1384,14 +1399,8 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, /* Remove the target file, if it exists */ - if (nip) { - struct buffer_head *bh; - error = gfs2_meta_inode_buffer(nip, &bh); - if (error) - goto out_end_trans; - error = gfs2_unlink_inode(ndip, ndentry, bh); - brelse(bh); - } + if (nip) + error = gfs2_unlink_inode(ndip, ndentry); if (dir_rename) { error = gfs2_dir_mvino(ip, &gfs2_qdotdot, ndip, DT_DIR); diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index 0fb6539b0c8..8dad6b09371 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -120,8 +120,8 @@ static void gdlm_ast(void *arg) gfs2_update_reply_times(gl); BUG_ON(gl->gl_lksb.sb_flags & DLM_SBF_DEMOTED); - if (gl->gl_lksb.sb_flags & DLM_SBF_VALNOTVALID) - memset(gl->gl_lvb, 0, GDLM_LVB_SIZE); + if ((gl->gl_lksb.sb_flags & DLM_SBF_VALNOTVALID) && gl->gl_lksb.sb_lvbptr) + memset(gl->gl_lksb.sb_lvbptr, 0, GDLM_LVB_SIZE); switch (gl->gl_lksb.sb_status) { case -DLM_EUNLOCK: /* Unlocked, so glock can be freed */ @@ -203,8 +203,10 @@ static int make_mode(const unsigned int lmstate) static u32 make_flags(struct gfs2_glock *gl, const unsigned int gfs_flags, const int req) { - u32 lkf = DLM_LKF_VALBLK; - u32 lkid = gl->gl_lksb.sb_lkid; + u32 lkf = 0; + + if (gl->gl_lksb.sb_lvbptr) + lkf |= DLM_LKF_VALBLK; if (gfs_flags & LM_FLAG_TRY) lkf |= DLM_LKF_NOQUEUE; @@ -228,7 +230,7 @@ static u32 make_flags(struct gfs2_glock *gl, const unsigned int gfs_flags, BUG(); } - if (lkid != 0) { + if (gl->gl_lksb.sb_lkid != 0) { lkf |= DLM_LKF_CONVERT; if (test_bit(GLF_BLOCKING, &gl->gl_flags)) lkf |= DLM_LKF_QUECVT; @@ -289,6 +291,14 @@ static void gdlm_put_lock(struct gfs2_glock *gl) gfs2_glstats_inc(gl, GFS2_LKS_DCOUNT); gfs2_sbstats_inc(gl, GFS2_LKS_DCOUNT); gfs2_update_request_times(gl); + + /* don't want to skip dlm_unlock writing the lvb when lock is ex */ + if (test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags) && + gl->gl_lksb.sb_lvbptr && (gl->gl_state != LM_ST_EXCLUSIVE)) { + gfs2_glock_free(gl); + return; + } + error = dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_VALBLK, NULL, gl); if (error) { diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index e443966c810..0e3554edb8f 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -278,6 +278,9 @@ static int gfs2_read_sb(struct gfs2_sbd *sdp, int silent) sdp->sd_qc_per_block = (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header)) / sizeof(struct gfs2_quota_change); + sdp->sd_blocks_per_bitmap = (sdp->sd_sb.sb_bsize - + sizeof(struct gfs2_meta_header)) + * GFS2_NBBY; /* not the rgrp bitmap, subsequent bitmaps only */ /* Compute maximum reservation required to add a entry to a directory */ diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index c5af8e18f27..ae55e248c3b 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -816,7 +816,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) blocks = num_qd * data_blocks + RES_DINODE + num_qd + 3; reserved = 1 + (nalloc * (data_blocks + ind_blocks)); - error = gfs2_inplace_reserve(ip, reserved); + error = gfs2_inplace_reserve(ip, reserved, 0); if (error) goto out_alloc; @@ -869,7 +869,7 @@ static int update_qd(struct gfs2_sbd *sdp, struct gfs2_quota_data *qd) if (error < 0) return error; - qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb; + qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lksb.sb_lvbptr; qlvb->qb_magic = cpu_to_be32(GFS2_MAGIC); qlvb->__pad = 0; qlvb->qb_limit = q.qu_limit; @@ -893,7 +893,7 @@ restart: if (error) return error; - qd->qd_qb = *(struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb; + qd->qd_qb = *(struct gfs2_quota_lvb *)qd->qd_gl->gl_lksb.sb_lvbptr; if (force_refresh || qd->qd_qb.qb_magic != cpu_to_be32(GFS2_MAGIC)) { gfs2_glock_dq_uninit(q_gh); @@ -1506,7 +1506,7 @@ static int gfs2_get_dqblk(struct super_block *sb, struct kqid qid, if (error) goto out; - qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb; + qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lksb.sb_lvbptr; fdq->d_version = FS_DQUOT_VERSION; fdq->d_flags = (type == QUOTA_USER) ? FS_USER_QUOTA : FS_GROUP_QUOTA; fdq->d_id = from_kqid(&init_user_ns, qid); @@ -1605,7 +1605,7 @@ static int gfs2_set_dqblk(struct super_block *sb, struct kqid qid, gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota), &data_blocks, &ind_blocks); blocks = 1 + data_blocks + ind_blocks; - error = gfs2_inplace_reserve(ip, blocks); + error = gfs2_inplace_reserve(ip, blocks, 0); if (error) goto out_i; blocks += gfs2_rg_blocks(ip, blocks); diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 38fe18f2f05..37ee061d899 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "gfs2.h" #include "incore.h" @@ -251,22 +252,25 @@ static u32 gfs2_bitfit(const u8 *buf, const unsigned int len, static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block) { u64 rblock = block - rbm->rgd->rd_data0; - u32 goal = (u32)rblock; - int x; + u32 x; if (WARN_ON_ONCE(rblock > UINT_MAX)) return -EINVAL; if (block >= rbm->rgd->rd_data0 + rbm->rgd->rd_data) return -E2BIG; - for (x = 0; x < rbm->rgd->rd_length; x++) { - rbm->bi = rbm->rgd->rd_bits + x; - if (goal < (rbm->bi->bi_start + rbm->bi->bi_len) * GFS2_NBBY) { - rbm->offset = goal - (rbm->bi->bi_start * GFS2_NBBY); - break; - } - } + rbm->bi = rbm->rgd->rd_bits; + rbm->offset = (u32)(rblock); + /* Check if the block is within the first block */ + if (rbm->offset < (rbm->bi->bi_start + rbm->bi->bi_len) * GFS2_NBBY) + return 0; + /* Adjust for the size diff between gfs2_meta_header and gfs2_rgrp */ + rbm->offset += (sizeof(struct gfs2_rgrp) - + sizeof(struct gfs2_meta_header)) * GFS2_NBBY; + x = rbm->offset / rbm->rgd->rd_sbd->sd_blocks_per_bitmap; + rbm->offset -= x * rbm->rgd->rd_sbd->sd_blocks_per_bitmap; + rbm->bi += x; return 0; } @@ -875,7 +879,7 @@ static int read_rindex_entry(struct gfs2_inode *ip) goto fail; rgd->rd_gl->gl_object = rgd; - rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lvb; + rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lksb.sb_lvbptr; rgd->rd_flags &= ~GFS2_RDF_UPTODATE; if (rgd->rd_data > sdp->sd_max_rg_data) sdp->sd_max_rg_data = rgd->rd_data; @@ -1678,13 +1682,105 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip return; } +/** + * gfs2_rgrp_congested - Use stats to figure out whether an rgrp is congested + * @rgd: The rgrp in question + * @loops: An indication of how picky we can be (0=very, 1=less so) + * + * This function uses the recently added glock statistics in order to + * figure out whether a parciular resource group is suffering from + * contention from multiple nodes. This is done purely on the basis + * of timings, since this is the only data we have to work with and + * our aim here is to reject a resource group which is highly contended + * but (very important) not to do this too often in order to ensure that + * we do not land up introducing fragmentation by changing resource + * groups when not actually required. + * + * The calculation is fairly simple, we want to know whether the SRTTB + * (i.e. smoothed round trip time for blocking operations) to acquire + * the lock for this rgrp's glock is significantly greater than the + * time taken for resource groups on average. We introduce a margin in + * the form of the variable @var which is computed as the sum of the two + * respective variences, and multiplied by a factor depending on @loops + * and whether we have a lot of data to base the decision on. This is + * then tested against the square difference of the means in order to + * decide whether the result is statistically significant or not. + * + * Returns: A boolean verdict on the congestion status + */ + +static bool gfs2_rgrp_congested(const struct gfs2_rgrpd *rgd, int loops) +{ + const struct gfs2_glock *gl = rgd->rd_gl; + const struct gfs2_sbd *sdp = gl->gl_sbd; + struct gfs2_lkstats *st; + s64 r_dcount, l_dcount; + s64 r_srttb, l_srttb; + s64 srttb_diff; + s64 sqr_diff; + s64 var; + + preempt_disable(); + st = &this_cpu_ptr(sdp->sd_lkstats)->lkstats[LM_TYPE_RGRP]; + r_srttb = st->stats[GFS2_LKS_SRTTB]; + r_dcount = st->stats[GFS2_LKS_DCOUNT]; + var = st->stats[GFS2_LKS_SRTTVARB] + + gl->gl_stats.stats[GFS2_LKS_SRTTVARB]; + preempt_enable(); + + l_srttb = gl->gl_stats.stats[GFS2_LKS_SRTTB]; + l_dcount = gl->gl_stats.stats[GFS2_LKS_DCOUNT]; + + if ((l_dcount < 1) || (r_dcount < 1) || (r_srttb == 0)) + return false; + + srttb_diff = r_srttb - l_srttb; + sqr_diff = srttb_diff * srttb_diff; + + var *= 2; + if (l_dcount < 8 || r_dcount < 8) + var *= 2; + if (loops == 1) + var *= 2; + + return ((srttb_diff < 0) && (sqr_diff > var)); +} + +/** + * gfs2_rgrp_used_recently + * @rs: The block reservation with the rgrp to test + * @msecs: The time limit in milliseconds + * + * Returns: True if the rgrp glock has been used within the time limit + */ +static bool gfs2_rgrp_used_recently(const struct gfs2_blkreserv *rs, + u64 msecs) +{ + u64 tdiff; + + tdiff = ktime_to_ns(ktime_sub(ktime_get_real(), + rs->rs_rbm.rgd->rd_gl->gl_dstamp)); + + return tdiff > (msecs * 1000 * 1000); +} + +static u32 gfs2_orlov_skip(const struct gfs2_inode *ip) +{ + const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); + u32 skip; + + get_random_bytes(&skip, sizeof(skip)); + return skip % sdp->sd_rgrps; +} + static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *begin) { struct gfs2_rgrpd *rgd = *pos; + struct gfs2_sbd *sdp = rgd->rd_sbd; rgd = gfs2_rgrpd_get_next(rgd); if (rgd == NULL) - rgd = gfs2_rgrpd_get_next(NULL); + rgd = gfs2_rgrpd_get_first(sdp); *pos = rgd; if (rgd != begin) /* If we didn't wrap */ return true; @@ -1699,14 +1795,15 @@ static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *b * Returns: errno */ -int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) +int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested, u32 aflags) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_rgrpd *begin = NULL; struct gfs2_blkreserv *rs = ip->i_res; - int error = 0, rg_locked, flags = LM_FLAG_TRY; + int error = 0, rg_locked, flags = 0; u64 last_unlinked = NO_BLOCK; int loops = 0; + u32 skip = 0; if (sdp->sd_args.ar_rgrplvb) flags |= GL_SKIP; @@ -1720,6 +1817,8 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) } else { rs->rs_rbm.rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1); } + if (S_ISDIR(ip->i_inode.i_mode) && (aflags & GFS2_AF_ORLOV)) + skip = gfs2_orlov_skip(ip); if (rs->rs_rbm.rgd == NULL) return -EBADSLT; @@ -1728,13 +1827,20 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) if (!gfs2_glock_is_locked_by_me(rs->rs_rbm.rgd->rd_gl)) { rg_locked = 0; + if (skip && skip--) + goto next_rgrp; + if (!gfs2_rs_active(rs) && (loops < 2) && + gfs2_rgrp_used_recently(rs, 1000) && + gfs2_rgrp_congested(rs->rs_rbm.rgd, loops)) + goto next_rgrp; error = gfs2_glock_nq_init(rs->rs_rbm.rgd->rd_gl, LM_ST_EXCLUSIVE, flags, &rs->rs_rgd_gh); - if (error == GLR_TRYFAILED) - goto next_rgrp; if (unlikely(error)) return error; + if (!gfs2_rs_active(rs) && (loops < 2) && + gfs2_rgrp_congested(rs->rs_rbm.rgd, loops)) + goto skip_rgrp; if (sdp->sd_args.ar_rgrplvb) { error = update_rgrp_lvb(rs->rs_rbm.rgd); if (unlikely(error)) { @@ -1781,12 +1887,13 @@ next_rgrp: /* Find the next rgrp, and continue looking */ if (gfs2_select_rgrp(&rs->rs_rbm.rgd, begin)) continue; + if (skip) + continue; /* If we've scanned all the rgrps, but found no free blocks * then this checks for some less likely conditions before * trying again. */ - flags &= ~LM_FLAG_TRY; loops++; /* Check that fs hasn't grown if writing to rindex */ if (ip == GFS2_I(sdp->sd_rindex) && !sdp->sd_rindex_uptodate) { diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index 24077958dcf..842185853f6 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h @@ -39,7 +39,8 @@ extern void gfs2_rgrp_go_unlock(struct gfs2_holder *gh); extern struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip); -extern int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested); +#define GFS2_AF_ORLOV 1 +extern int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested, u32 flags); extern void gfs2_inplace_release(struct gfs2_inode *ip); extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n, diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h index bbdc78af60c..2ee13e841e9 100644 --- a/fs/gfs2/trace_gfs2.h +++ b/fs/gfs2/trace_gfs2.h @@ -486,7 +486,7 @@ TRACE_EVENT(gfs2_block_alloc, ), TP_fast_assign( - __entry->dev = ip->i_gl->gl_sbd->sd_vfs->s_dev; + __entry->dev = rgd->rd_gl->gl_sbd->sd_vfs->s_dev; __entry->start = block; __entry->inum = ip->i_no_addr; __entry->len = len; diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c index db330e5518c..76c144b3c9b 100644 --- a/fs/gfs2/xattr.c +++ b/fs/gfs2/xattr.c @@ -734,7 +734,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er, if (error) return error; - error = gfs2_inplace_reserve(ip, blks); + error = gfs2_inplace_reserve(ip, blks, 0); if (error) goto out_gunlock_q;