From d13603ef6e14a12cd65a6975e8117c0fea7c7ddf Mon Sep 17 00:00:00 2001 From: Arne Jansen Date: Tue, 13 Sep 2011 11:40:09 +0200 Subject: Btrfs: check the root passed to btrfs_end_transaction This patch only add a consistancy check to validate that the same root is passed to start_transaction and end_transaction. Subvolume quota depends on this. Signed-off-by: Arne Jansen --- fs/btrfs/transaction.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'fs/btrfs/transaction.h') diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index fe27379e368..010729446e1 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -57,6 +57,12 @@ struct btrfs_trans_handle { struct btrfs_block_rsv *block_rsv; struct btrfs_block_rsv *orig_rsv; int aborted; + /* + * this root is only needed to validate that the root passed to + * start_transaction is the same as the one passed to end_transaction. + * Subvolume quota depends on this + */ + struct btrfs_root *root; }; struct btrfs_pending_snapshot { -- cgit v1.2.3 From bed92eae26ccf280d1a2168b7509447b56675a27 Mon Sep 17 00:00:00 2001 From: Arne Jansen Date: Thu, 28 Jun 2012 18:03:02 +0200 Subject: Btrfs: qgroup implementation and prototypes Signed-off-by: Arne Jansen Signed-off-by: Jan Schmidt --- fs/btrfs/transaction.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/btrfs/transaction.h') diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 010729446e1..16ba00842c3 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -20,6 +20,7 @@ #define __BTRFS_TRANSACTION__ #include "btrfs_inode.h" #include "delayed-ref.h" +#include "ctree.h" struct btrfs_transaction { u64 transid; @@ -63,6 +64,8 @@ struct btrfs_trans_handle { * Subvolume quota depends on this */ struct btrfs_root *root; + struct seq_list delayed_ref_elem; + struct list_head qgroup_ref_list; }; struct btrfs_pending_snapshot { -- cgit v1.2.3 From c556723794b3487a79de1ecd6354975b1389f5ff Mon Sep 17 00:00:00 2001 From: Arne Jansen Date: Wed, 14 Sep 2011 15:44:05 +0200 Subject: Btrfs: hooks to reserve qgroup space Like block reserves, reserve a small piece of space on each transaction start and for delalloc. These are the hooks that can actually return EDQUOT to the user. The amount of space reserved is tracked in the transaction handle. Signed-off-by: Arne Jansen --- fs/btrfs/transaction.h | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/btrfs/transaction.h') diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 16ba00842c3..2759e0572c5 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -50,6 +50,7 @@ struct btrfs_transaction { struct btrfs_trans_handle { u64 transid; u64 bytes_reserved; + u64 qgroup_reserved; unsigned long use_count; unsigned long blocks_reserved; unsigned long blocks_used; -- cgit v1.2.3 From 6f72c7e20dbaea55f04546de69586c84a3654503 Mon Sep 17 00:00:00 2001 From: Arne Jansen Date: Wed, 14 Sep 2011 15:58:21 +0200 Subject: Btrfs: add qgroup inheritance When creating a subvolume or snapshot, it is necessary to initialize the qgroup account with a copy of some other (tracking) qgroup. This patch adds parameters to the ioctls to pass the information from which qgroup to inherit. Signed-off-by: Arne Jansen --- fs/btrfs/transaction.h | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/btrfs/transaction.h') diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 2759e0572c5..cca315dcdfc 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -73,6 +73,7 @@ struct btrfs_pending_snapshot { struct dentry *dentry; struct btrfs_root *root; struct btrfs_root *snap; + struct btrfs_qgroup_inherit *inherit; /* block reservation for the operation */ struct btrfs_block_rsv block_rsv; /* extra metadata reseration for relocation */ -- cgit v1.2.3 From 0e721106923be82f651dd0ee504742a8a3eb089f Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 26 Jun 2012 16:13:18 -0400 Subject: Btrfs: change how we indicate we're adding csums There is weird logic I had to put in place to make sure that when we were adding csums that we'd used the delalloc block rsv instead of the global block rsv. Part of this meant that we had to free up our transaction reservation before we ran the delayed refs since csum deletion happens during the delayed ref work. The problem with this is that when we release a reservation we will add it to the global reserve if it is not full in order to keep us going along longer before we have to force a transaction commit. By releasing our reservation before we run delayed refs we don't get the opportunity to drain down the global reserve for the work we did, so we won't refill it as often. This isn't a problem per-se, it just results in us possibly committing transactions more and more often, and in rare cases could cause those WARN_ON()'s to pop in use_block_rsv because we ran out of space in our block rsv. This also helps us by holding onto space while the delayed refs run so we don't end up with as many people trying to do things at the same time, which again will help us not force commits or hit the use_block_rsv warnings. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/transaction.h | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/btrfs/transaction.h') diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index fe27379e368..d314a74b496 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -57,6 +57,7 @@ struct btrfs_trans_handle { struct btrfs_block_rsv *block_rsv; struct btrfs_block_rsv *orig_rsv; int aborted; + int adding_csums; }; struct btrfs_pending_snapshot { -- cgit v1.2.3 From 46d8bc34248f3a94dea910137d1ddf5fb1e3a1cc Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Wed, 29 Aug 2012 01:07:55 -0600 Subject: Btrfs: fix a bug in checking whether a inode is already in log This is based on Josef's "Btrfs: turbo charge fsync". The current btrfs checks if an inode is in log by comparing root's last_log_commit to inode's last_sub_trans[2]. But the problem is that this root->last_log_commit is shared among inodes. Say we have N inodes to be logged, after the first inode, root's last_log_commit is updated and the N-1 remained files will be skipped. This fixes the bug by keeping a local copy of root's last_log_commit inside each inode and this local copy will be maintained itself. [1]: we regard each log transaction as a subset of btrfs's transaction, i.e. sub_trans Signed-off-by: Liu Bo --- fs/btrfs/transaction.h | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/btrfs/transaction.h') diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index e8b8416c688..1a138bfb8f1 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -88,6 +88,7 @@ static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans, { BTRFS_I(inode)->last_trans = trans->transaction->transid; BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid; + BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->root->last_log_commit; } int btrfs_end_transaction(struct btrfs_trans_handle *trans, -- cgit v1.2.3 From 8407aa464331556e4f6784f974030b83fc7585ed Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Fri, 7 Sep 2012 01:43:32 -0600 Subject: Btrfs: fix corrupted metadata in the snapshot When we delete a inode, we will remove all the delayed items including delayed inode update, and then truncate all the relative metadata. If there is lots of metadata, we will end the current transaction, and start a new transaction to truncate the left metadata. In this way, we will leave a inode item that its link counter is > 0, and also may leave some directory index items in fs/file tree after the current transaction ends. In other words, the metadata in this fs/file tree is inconsistent. If we create a snapshot for this tree now, we will find a inode with corrupted metadata in the new snapshot, and we won't continue to drop the left metadata, because its link counter is not 0. We fix this problem by updating the inode item before the current transaction ends. Signed-off-by: Miao Xie --- fs/btrfs/transaction.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/btrfs/transaction.h') diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 1a138bfb8f1..1b054800eca 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -97,6 +97,8 @@ int btrfs_end_transaction_nolock(struct btrfs_trans_handle *trans, struct btrfs_root *root); struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, int num_items); +struct btrfs_trans_handle *btrfs_start_transaction_noflush( + struct btrfs_root *root, int num_items); struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root); struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root); struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root); -- cgit v1.2.3 From ea658badc47e614e38ab4d98510488474c7e6d4b Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 11 Sep 2012 16:57:25 -0400 Subject: Btrfs: delay block group item insertion So we have lots of places where we try to preallocate chunks in order to make sure we have enough space as we make our allocations. This has historically meant that we're constantly tweaking when we should allocate a new chunk, and historically we have gotten this horribly wrong so we way over allocate either metadata or data. To try and keep this from happening we are going to make it so that the block group item insertion is done out of band at the end of a transaction. This will allow us to create chunks even if we are trying to make an allocation for the extent tree. With this patch my enospc tests run faster (didn't expect this) and more efficiently use the disk space (this is what I wanted). Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/transaction.h | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/btrfs/transaction.h') diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 1b054800eca..b6463e12804 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -68,6 +68,7 @@ struct btrfs_trans_handle { struct btrfs_root *root; struct seq_list delayed_ref_elem; struct list_head qgroup_ref_list; + struct list_head new_bgs; }; struct btrfs_pending_snapshot { -- cgit v1.2.3 From 60376ce4a8396bc5cd777be05b6a9bf044520f42 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 14 Sep 2012 10:34:40 -0400 Subject: Btrfs: fix race in sync and freeze again I screwed this up, there is a race between checking if there is a running transaction and actually starting a transaction in sync where we could race with a freezer and get ourselves into trouble. To fix this we need to make a new join type to only do the try lock on the freeze stuff. If it fails we'll return EPERM and just return from sync. This fixes a hang Liu Bo reported when running xfstest 68 in a loop. Thanks, Reported-by: Liu Bo Signed-off-by: Josef Bacik --- fs/btrfs/transaction.h | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/btrfs/transaction.h') diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index b6463e12804..fbf8313b9d6 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -102,6 +102,7 @@ struct btrfs_trans_handle *btrfs_start_transaction_noflush( struct btrfs_root *root, int num_items); struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root); struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root); +struct btrfs_trans_handle *btrfs_join_transaction_freeze(struct btrfs_root *root); struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root); int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid); int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, -- cgit v1.2.3 From a698d0755adb6f27289d1e6610b2240595d27e8c Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Thu, 20 Sep 2012 01:51:59 -0600 Subject: Btrfs: add a type field for the transaction handle This patch add a type field into the transaction handle structure, in this way, we needn't implement various end-transaction functions and can make the code more simple and readable. Signed-off-by: Miao Xie --- fs/btrfs/transaction.h | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/transaction.h') diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index fbf8313b9d6..0630bd19396 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -47,6 +47,14 @@ struct btrfs_transaction { int aborted; }; +enum btrfs_trans_type { + TRANS_START, + TRANS_JOIN, + TRANS_USERSPACE, + TRANS_JOIN_NOLOCK, + TRANS_JOIN_FREEZE, +}; + struct btrfs_trans_handle { u64 transid; u64 bytes_reserved; @@ -58,8 +66,9 @@ struct btrfs_trans_handle { struct btrfs_transaction *transaction; struct btrfs_block_rsv *block_rsv; struct btrfs_block_rsv *orig_rsv; - int aborted; - int adding_csums; + short aborted; + short adding_csums; + enum btrfs_trans_type type; /* * this root is only needed to validate that the root passed to * start_transaction is the same as the one passed to end_transaction. @@ -94,8 +103,6 @@ static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans, int btrfs_end_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root); -int btrfs_end_transaction_nolock(struct btrfs_trans_handle *trans, - struct btrfs_root *root); struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, int num_items); struct btrfs_trans_handle *btrfs_start_transaction_noflush( -- cgit v1.2.3 From 354aa0fb6d5b97b262e056f7ad7bfc88d7ce0004 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Thu, 20 Sep 2012 01:54:00 -0600 Subject: Btrfs: fix orphan transaction on the freezed filesystem With the following debug patch: static int btrfs_freeze(struct super_block *sb) { + struct btrfs_fs_info *fs_info = btrfs_sb(sb); + struct btrfs_transaction *trans; + + spin_lock(&fs_info->trans_lock); + trans = fs_info->running_transaction; + if (trans) { + printk("Transid %llu, use_count %d, num_writer %d\n", + trans->transid, atomic_read(&trans->use_count), + atomic_read(&trans->num_writers)); + } + spin_unlock(&fs_info->trans_lock); return 0; } I found there was a orphan transaction after the freeze operation was done. It is because the transaction may not be committed when the transaction handle end even though it is the last handle of the current transaction. This design avoid committing the transaction frequently, but also introduce the above problem. So I add btrfs_attach_transaction() which can catch the current transaction and commit it. If there is no transaction, it will return ENOENT, and do not anything. This function also can be used to instead of btrfs_join_transaction_freeze() because it don't increase the writer counter and don't start a new transaction, so it also can fix the deadlock between sync and freeze. Besides that, it is used to instead of btrfs_join_transaction() in transaction_kthread(), because if there is no transaction, the transaction kthread needn't anything. Signed-off-by: Miao Xie --- fs/btrfs/transaction.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/transaction.h') diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 0630bd19396..80961947a6b 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -52,7 +52,7 @@ enum btrfs_trans_type { TRANS_JOIN, TRANS_USERSPACE, TRANS_JOIN_NOLOCK, - TRANS_JOIN_FREEZE, + TRANS_ATTACH, }; struct btrfs_trans_handle { @@ -109,7 +109,7 @@ struct btrfs_trans_handle *btrfs_start_transaction_noflush( struct btrfs_root *root, int num_items); struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root); struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root); -struct btrfs_trans_handle *btrfs_join_transaction_freeze(struct btrfs_root *root); +struct btrfs_trans_handle *btrfs_attach_transaction(struct btrfs_root *root); struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root); int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid); int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, -- cgit v1.2.3 From 08e007d2e57744472a9424735a368ffe6d625597 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Tue, 16 Oct 2012 11:33:38 +0000 Subject: Btrfs: improve the noflush reservation In some places(such as: evicting inode), we just can not flush the reserved space of delalloc, flushing the delayed directory index and delayed inode is OK, but we don't try to flush those things and just go back when there is no enough space to be reserved. This patch fixes this problem. We defined 3 types of the flush operations: NO_FLUSH, FLUSH_LIMIT and FLUSH_ALL. If we can in the transaction, we should not flush anything, or the deadlock would happen, so use NO_FLUSH. If we flushing the reserved space of delalloc would cause deadlock, use FLUSH_LIMIT. In the other cases, FLUSH_ALL is used, and we will flush all things. Signed-off-by: Miao Xie Signed-off-by: Chris Mason --- fs/btrfs/transaction.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/transaction.h') diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 80961947a6b..0e8aa1e6c28 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -105,7 +105,7 @@ int btrfs_end_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root); struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, int num_items); -struct btrfs_trans_handle *btrfs_start_transaction_noflush( +struct btrfs_trans_handle *btrfs_start_transaction_lflush( struct btrfs_root *root, int num_items); struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root); struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root); -- cgit v1.2.3