From c04fecb4d9f7753e0cbff7edd03ec68f8721cdce Mon Sep 17 00:00:00 2001 From: David Teigland Date: Thu, 10 May 2012 10:18:07 -0500 Subject: dlm: use rsbtbl as resource directory Remove the dir hash table (dirtbl), and use the rsb hash table (rsbtbl) as the resource directory. It has always been an unnecessary duplication of information. This improves efficiency by using a single rsbtbl lookup in many cases where both rsbtbl and dirtbl lookups were needed previously. This eliminates the need to handle cases of rsbtbl and dirtbl being out of sync. In many cases there will be memory savings because the dir hash table no longer exists. Signed-off-by: David Teigland --- fs/dlm/dlm_internal.h | 46 +++++++++++++++++++++++++++------------------- 1 file changed, 27 insertions(+), 19 deletions(-) (limited to 'fs/dlm/dlm_internal.h') diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h index bc342f7ac3a..3093207a768 100644 --- a/fs/dlm/dlm_internal.h +++ b/fs/dlm/dlm_internal.h @@ -55,8 +55,6 @@ struct dlm_lkb; struct dlm_rsb; struct dlm_member; struct dlm_rsbtable; -struct dlm_dirtable; -struct dlm_direntry; struct dlm_recover; struct dlm_header; struct dlm_message; @@ -98,18 +96,6 @@ do { \ } -struct dlm_direntry { - struct list_head list; - uint32_t master_nodeid; - uint16_t length; - char name[1]; -}; - -struct dlm_dirtable { - struct list_head list; - spinlock_t lock; -}; - struct dlm_rsbtable { struct rb_root keep; struct rb_root toss; @@ -283,6 +269,15 @@ struct dlm_lkb { }; }; +/* + * res_master_nodeid is "normal": 0 is unset/invalid, non-zero is the real + * nodeid, even when nodeid is our_nodeid. + * + * res_nodeid is "odd": -1 is unset/invalid, zero means our_nodeid, + * greater than zero when another nodeid. + * + * (TODO: remove res_nodeid and only use res_master_nodeid) + */ struct dlm_rsb { struct dlm_ls *res_ls; /* the lockspace */ @@ -291,6 +286,8 @@ struct dlm_rsb { unsigned long res_flags; int res_length; /* length of rsb name */ int res_nodeid; + int res_master_nodeid; + int res_dir_nodeid; uint32_t res_lvbseq; uint32_t res_hash; uint32_t res_bucket; /* rsbtbl */ @@ -313,10 +310,21 @@ struct dlm_rsb { char res_name[DLM_RESNAME_MAXLEN+1]; }; +/* dlm_master_lookup() flags */ + +#define DLM_LU_RECOVER_DIR 1 +#define DLM_LU_RECOVER_MASTER 2 + +/* dlm_master_lookup() results */ + +#define DLM_LU_MATCH 1 +#define DLM_LU_ADD 2 + /* find_rsb() flags */ -#define R_MASTER 1 /* only return rsb if it's a master */ -#define R_CREATE 2 /* create/add rsb if not found */ +#define R_REQUEST 0x00000001 +#define R_RECEIVE_REQUEST 0x00000002 +#define R_RECEIVE_RECOVER 0x00000004 /* rsb_flags */ @@ -509,9 +517,6 @@ struct dlm_ls { struct dlm_rsbtable *ls_rsbtbl; uint32_t ls_rsbtbl_size; - struct dlm_dirtable *ls_dirtbl; - uint32_t ls_dirtbl_size; - struct mutex ls_waiters_mutex; struct list_head ls_waiters; /* lkbs needing a reply */ @@ -545,6 +550,7 @@ struct dlm_ls { struct dentry *ls_debug_waiters_dentry; /* debugfs */ struct dentry *ls_debug_locks_dentry; /* debugfs */ struct dentry *ls_debug_all_dentry; /* debugfs */ + struct dentry *ls_debug_toss_dentry; /* debugfs */ wait_queue_head_t ls_uevent_wait; /* user part of join/leave */ int ls_uevent_result; @@ -573,6 +579,8 @@ struct dlm_ls { struct mutex ls_requestqueue_mutex; struct dlm_rcom *ls_recover_buf; int ls_recover_nodeid; /* for debugging */ + unsigned int ls_recover_dir_sent_res; /* for log info */ + unsigned int ls_recover_dir_sent_msg; /* for log info */ unsigned int ls_recover_locks_in; /* for log info */ uint64_t ls_rcom_seq; spinlock_t ls_rcom_spin; -- cgit v1.2.3 From 1d7c484eeb167fc374294e38ae402de4097c8611 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Tue, 15 May 2012 16:07:49 -0500 Subject: dlm: use idr instead of list for recovered rsbs When a large number of resources are being recovered, a linear search of the recover_list takes a long time. Use an idr in place of a list. Signed-off-by: David Teigland --- fs/dlm/dlm_internal.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/dlm/dlm_internal.h') diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h index 3093207a768..a5f82d5b394 100644 --- a/fs/dlm/dlm_internal.h +++ b/fs/dlm/dlm_internal.h @@ -288,6 +288,7 @@ struct dlm_rsb { int res_nodeid; int res_master_nodeid; int res_dir_nodeid; + int res_id; /* for ls_recover_idr */ uint32_t res_lvbseq; uint32_t res_hash; uint32_t res_bucket; /* rsbtbl */ @@ -587,6 +588,8 @@ struct dlm_ls { struct list_head ls_recover_list; spinlock_t ls_recover_list_lock; int ls_recover_list_count; + struct idr ls_recover_idr; + spinlock_t ls_recover_idr_lock; wait_queue_head_t ls_wait_general; struct mutex ls_clear_proc_locks; -- cgit v1.2.3 From 05c32f47bfae74dabff05208957768078b53cc49 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Thu, 14 Jun 2012 12:17:32 -0500 Subject: dlm: fix race between remove and lookup It was possible for a remove message on an old rsb to be sent after a lookup message on a new rsb, where the rsbs were for the same resource name. This could lead to a missing directory entry for the new rsb. It is fixed by keeping a copy of the resource name being removed until after the remove has been sent. A lookup checks if this in-progress remove matches the name it is looking up. Signed-off-by: David Teigland --- fs/dlm/dlm_internal.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'fs/dlm/dlm_internal.h') diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h index a5f82d5b394..9d3e485f88c 100644 --- a/fs/dlm/dlm_internal.h +++ b/fs/dlm/dlm_internal.h @@ -498,6 +498,13 @@ struct rcom_lock { char rl_lvb[0]; }; +/* + * The max number of resources per rsbtbl bucket that shrink will attempt + * to remove in each iteration. + */ + +#define DLM_REMOVE_NAMES_MAX 8 + struct dlm_ls { struct list_head ls_list; /* list of lockspaces */ dlm_lockspace_t *ls_local_handle; @@ -531,6 +538,12 @@ struct dlm_ls { int ls_new_rsb_count; struct list_head ls_new_rsb; /* new rsb structs */ + spinlock_t ls_remove_spin; + char ls_remove_name[DLM_RESNAME_MAXLEN+1]; + char *ls_remove_names[DLM_REMOVE_NAMES_MAX]; + int ls_remove_len; + int ls_remove_lens[DLM_REMOVE_NAMES_MAX]; + struct list_head ls_nodes; /* current nodes in ls */ struct list_head ls_nodes_gone; /* dead node list, recovery */ int ls_num_nodes; /* number of nodes in ls */ -- cgit v1.2.3 From 475f230c6072fb2186f48b23943afcd0ee3a8343 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Thu, 2 Aug 2012 11:08:21 -0500 Subject: dlm: fix unlock balance warnings The in_recovery rw_semaphore has always been acquired and released by different threads by design. To work around the "BUG: bad unlock balance detected!" messages, adjust things so the dlm_recoverd thread always does both down_write and up_write. Signed-off-by: David Teigland --- fs/dlm/dlm_internal.h | 46 ++++++++++++++++++++++++++++++++++++---------- 1 file changed, 36 insertions(+), 10 deletions(-) (limited to 'fs/dlm/dlm_internal.h') diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h index 9d3e485f88c..871c1abf602 100644 --- a/fs/dlm/dlm_internal.h +++ b/fs/dlm/dlm_internal.h @@ -604,6 +604,7 @@ struct dlm_ls { struct idr ls_recover_idr; spinlock_t ls_recover_idr_lock; wait_queue_head_t ls_wait_general; + wait_queue_head_t ls_recover_lock_wait; struct mutex ls_clear_proc_locks; struct list_head ls_root_list; /* root resources */ @@ -616,15 +617,40 @@ struct dlm_ls { char ls_name[1]; }; -#define LSFL_WORK 0 -#define LSFL_RUNNING 1 -#define LSFL_RECOVERY_STOP 2 -#define LSFL_RCOM_READY 3 -#define LSFL_RCOM_WAIT 4 -#define LSFL_UEVENT_WAIT 5 -#define LSFL_TIMEWARN 6 -#define LSFL_CB_DELAY 7 -#define LSFL_NODIR 8 +/* + * LSFL_RECOVER_STOP - dlm_ls_stop() sets this to tell dlm recovery routines + * that they should abort what they're doing so new recovery can be started. + * + * LSFL_RECOVER_DOWN - dlm_ls_stop() sets this to tell dlm_recoverd that it + * should do down_write() on the in_recovery rw_semaphore. (doing down_write + * within dlm_ls_stop causes complaints about the lock acquired/released + * in different contexts.) + * + * LSFL_RECOVER_LOCK - dlm_recoverd holds the in_recovery rw_semaphore. + * It sets this after it is done with down_write() on the in_recovery + * rw_semaphore and clears it after it has released the rw_semaphore. + * + * LSFL_RECOVER_WORK - dlm_ls_start() sets this to tell dlm_recoverd that it + * should begin recovery of the lockspace. + * + * LSFL_RUNNING - set when normal locking activity is enabled. + * dlm_ls_stop() clears this to tell dlm locking routines that they should + * quit what they are doing so recovery can run. dlm_recoverd sets + * this after recovery is finished. + */ + +#define LSFL_RECOVER_STOP 0 +#define LSFL_RECOVER_DOWN 1 +#define LSFL_RECOVER_LOCK 2 +#define LSFL_RECOVER_WORK 3 +#define LSFL_RUNNING 4 + +#define LSFL_RCOM_READY 5 +#define LSFL_RCOM_WAIT 6 +#define LSFL_UEVENT_WAIT 7 +#define LSFL_TIMEWARN 8 +#define LSFL_CB_DELAY 9 +#define LSFL_NODIR 10 /* much of this is just saving user space pointers associated with the lock that we pass back to the user lib with an ast */ @@ -667,7 +693,7 @@ static inline int dlm_locking_stopped(struct dlm_ls *ls) static inline int dlm_recovery_stopped(struct dlm_ls *ls) { - return test_bit(LSFL_RECOVERY_STOP, &ls->ls_flags); + return test_bit(LSFL_RECOVER_STOP, &ls->ls_flags); } static inline int dlm_no_directory(struct dlm_ls *ls) -- cgit v1.2.3 From da8c66638ae684c99abcb30e89d2803402e7ca20 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Thu, 15 Nov 2012 15:01:51 -0600 Subject: dlm: fix lvb invalidation conditions When a node is removed that held a PW/EX lock, the existing master node should invalidate the lvb on the resource due to the purged lock. Previously, the existing master node was invalidating the lvb if it found only NL/CR locks on the resource during recovery for the removed node. This could lead to cases where it invalidated the lvb and shouldn't have, or cases where it should have invalidated and didn't. When recovery selects a *new* master node for a resource, and that new master finds only NL/CR locks on the resource after lock recovery, it should invalidate the lvb. This case was handled correctly (but was incorrectly applied to the existing master case also.) When a process exits while holding a PW/EX lock, the lvb on the resource should be invalidated. This was not happening. The lvb contents and VALNOTVALID flag should be recovered before granting locks in recovery so that the recovered lvb state is provided in the callback. The lvb was being recovered after the lock was granted. Signed-off-by: David Teigland --- fs/dlm/dlm_internal.h | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/dlm/dlm_internal.h') diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h index 871c1abf602..77c0f70f8fe 100644 --- a/fs/dlm/dlm_internal.h +++ b/fs/dlm/dlm_internal.h @@ -337,6 +337,7 @@ enum rsb_flags { RSB_NEW_MASTER2, RSB_RECOVER_CONVERT, RSB_RECOVER_GRANT, + RSB_RECOVER_LVB_INVAL, }; static inline void rsb_set_flag(struct dlm_rsb *r, enum rsb_flags flag) -- cgit v1.2.3