From b1375d64c539c5b76794be759b62d3f178e67c32 Mon Sep 17 00:00:00 2001 From: Jan Schmidt Date: Thu, 26 Jan 2012 15:01:11 -0500 Subject: Btrfs: fix uninit warning in backref.c Added initialization with the declaration of ret. It isn't set later on the switch-default branch (which should never be taken). Signed-off-by: Jan Schmidt Signed-off-by: Chris Mason --- fs/btrfs/backref.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index b9a843226de..633c701a287 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -297,7 +297,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, struct btrfs_delayed_extent_op *extent_op = head->extent_op; struct rb_node *n = &head->node.rb_node; int sgn; - int ret; + int ret = 0; if (extent_op && extent_op->update_key) btrfs_disk_key_to_cpu(info_key, &extent_op->key); @@ -392,7 +392,7 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info, struct btrfs_key *info_key, int *info_level, struct list_head *prefs) { - int ret; + int ret = 0; int slot; struct extent_buffer *leaf; struct btrfs_key key; -- cgit v1.2.3 From 357b9784b79924a31ccded5d9a0c688f48cc28f2 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 26 Jan 2012 15:01:11 -0500 Subject: Btrfs: make sure a bitmap has enough bytes We have only been checking for min_bytes available in bitmap entries, but we won't successfully setup a bitmap cluster unless it has at least bytes in the bitmap, so in the common case min_bytes is 4k and we want something like 2MB, so if there are a bunch of bitmap entries with less than 2mb's in them, we'll search all them anyway, which is suboptimal. Fix this check. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/free-space-cache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index efe20032e4a..6e740693234 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -2475,7 +2475,7 @@ setup_cluster_bitmap(struct btrfs_block_group_cache *block_group, } list_for_each_entry(entry, bitmaps, list) { - if (entry->bytes < min_bytes) + if (entry->bytes < bytes) continue; ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset, bytes, cont1_bytes, min_bytes); -- cgit v1.2.3 From 6dd70ce4eb7429c2ba6dd9fa46f78a0a2a254038 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 26 Jan 2012 15:01:11 -0500 Subject: btrfs: Fix busyloops in transaction waiting code wait_log_commit() and wait_for_writer() were using slightly different conditions for deciding whether they should call schedule() and whether they should continue in the wait loop. Thus it could happen that we busylooped when the first condition was not true while the second one was. That is burning CPU cycles needlessly and is deadly on UP machines... Signed-off-by: Jan Kara Signed-off-by: Chris Mason --- fs/btrfs/tree-log.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index cb877e0886a..966cc74f5d6 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -1957,7 +1957,8 @@ static int wait_log_commit(struct btrfs_trans_handle *trans, finish_wait(&root->log_commit_wait[index], &wait); mutex_lock(&root->log_mutex); - } while (root->log_transid < transid + 2 && + } while (root->fs_info->last_trans_log_full_commit != + trans->transid && root->log_transid < transid + 2 && atomic_read(&root->log_commit[index])); return 0; } @@ -1966,7 +1967,8 @@ static int wait_for_writer(struct btrfs_trans_handle *trans, struct btrfs_root *root) { DEFINE_WAIT(wait); - while (atomic_read(&root->log_writers)) { + while (root->fs_info->last_trans_log_full_commit != + trans->transid && atomic_read(&root->log_writers)) { prepare_to_wait(&root->log_writer_wait, &wait, TASK_UNINTERRUPTIBLE); mutex_unlock(&root->log_mutex); -- cgit v1.2.3 From 8bedd51b6121c4607784d75f852828d25d119c52 Mon Sep 17 00:00:00 2001 From: Mitch Harder Date: Thu, 26 Jan 2012 15:01:11 -0500 Subject: Btrfs: Check for NULL page in extent_range_uptodate A user has encountered a NULL pointer kernel oops in btrfs when encountering media errors. The problem has been identified as an unhandled NULL pointer returned from find_get_page(). This modification simply checks for a NULL page, and returns with an error if found (the extent_range_uptodate() function returns 1 on errors). After testing this patch, the user reported that the error with the NULL pointer oops was solved. However, there is still a remaining problem with a thread becoming stuck in wait_on_page_locked(page) in the read_extent_buffer_pages(...) function in extent_io.c for (i = start_i; i < num_pages; i++) { page = extent_buffer_page(eb, i); wait_on_page_locked(page); if (!PageUptodate(page)) ret = -EIO; } This patch leaves the issue with the locked page yet to be resolved. Signed-off-by: Mitch Harder Signed-off-by: Chris Mason --- fs/btrfs/extent_io.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 9d09a4f8187..fcf77e1ded4 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3909,6 +3909,8 @@ int extent_range_uptodate(struct extent_io_tree *tree, while (start <= end) { index = start >> PAGE_CACHE_SHIFT; page = find_get_page(tree->mapping, index); + if (!page) + return 1; uptodate = PageUptodate(page); page_cache_release(page); if (!uptodate) { -- cgit v1.2.3 From 0b4a9d248f88e6773312f262e8185f23863d984a Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 26 Jan 2012 15:01:11 -0500 Subject: Btrfs: use cluster->window_start when allocating from a cluster bitmap We specifically set window_start in the cluster struct to indicate where the cluster starts in a bitmap, but we've been using min_start to indicate where we're searching from. This is usually the start of the blockgroup, so essentially means we're constantly searching from the start of any bitmap we find, which completely negates all the trouble we go to in order to setup a cluster. So start using window_start to make sure we actually use the area we found. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/free-space-cache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 6e740693234..61447a51f64 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -2242,7 +2242,7 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, if (entry->bitmap) { ret = btrfs_alloc_from_bitmap(block_group, cluster, entry, bytes, - min_start); + cluster->window_start); if (ret == 0) { node = rb_next(&entry->offset_index); if (!node) -- cgit v1.2.3 From 0b485143d835c019cddc45f46e4b3873dcc9aa4e Mon Sep 17 00:00:00 2001 From: Stefan Behrens Date: Thu, 26 Jan 2012 15:01:11 -0500 Subject: Btrfs: fix warning for 32-bit build of fs/btrfs/check-integrity.c There have been 4 warnings on 32-bit build, they are herewith fixed. Signed-off-by: Stefan Behrens Signed-off-by: Chris Mason --- fs/btrfs/check-integrity.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index ad0b3ba735b..b669a7d8e49 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c @@ -1662,7 +1662,7 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, block = btrfsic_block_hashtable_lookup(bdev, dev_bytenr, &state->block_hashtable); if (NULL != block) { - u64 bytenr; + u64 bytenr = 0; struct list_head *elem_ref_to; struct list_head *tmp_ref_to; @@ -2777,9 +2777,10 @@ int btrfsic_submit_bh(int rw, struct buffer_head *bh) printk(KERN_INFO "submit_bh(rw=0x%x, blocknr=%lu (bytenr %llu)," " size=%lu, data=%p, bdev=%p)\n", - rw, bh->b_blocknr, - (unsigned long long)dev_bytenr, bh->b_size, - bh->b_data, bh->b_bdev); + rw, (unsigned long)bh->b_blocknr, + (unsigned long long)dev_bytenr, + (unsigned long)bh->b_size, bh->b_data, + bh->b_bdev); btrfsic_process_written_block(dev_state, dev_bytenr, bh->b_data, bh->b_size, NULL, NULL, bh, rw); @@ -2844,7 +2845,7 @@ void btrfsic_submit_bio(int rw, struct bio *bio) printk(KERN_INFO "submit_bio(rw=0x%x, bi_vcnt=%u," " bi_sector=%lu (bytenr %llu), bi_bdev=%p)\n", - rw, bio->bi_vcnt, bio->bi_sector, + rw, bio->bi_vcnt, (unsigned long)bio->bi_sector, (unsigned long long)dev_bytenr, bio->bi_bdev); -- cgit v1.2.3 From 7ec31b548a17f773ab6289e795ed3a6820e8b56e Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Thu, 26 Jan 2012 15:01:12 -0500 Subject: Btrfs: do not defrag a file partially xfstests 218 complains that btrfs defrags a file partially: After: 1 Write backwards sync, but contiguous - should defrag to 1 extent Before: 10 -After: 1 +After: 2 To fix this, we need to set max_to_defrag count properly. Signed-off-by: Liu Bo Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 6834be4c870..0b06a5ca8af 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1066,7 +1066,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, i = range->start >> PAGE_CACHE_SHIFT; } if (!max_to_defrag) - max_to_defrag = last_index; + max_to_defrag = last_index + 1; /* * make writeback starts from i, so the defrag range can be -- cgit v1.2.3 From 9e622d6bea0202e9fe267955362c01918562c09b Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Thu, 26 Jan 2012 15:01:12 -0500 Subject: Btrfs: fix enospc error caused by wrong checks of the chunk When we did sysbench test for inline files, enospc error happened easily though there was lots of free disk space which could be allocated for new chunks. Reproduce steps: # mkfs.btrfs -b $((2 * 1024 * 1024 * 1024)) # mount /mnt # ulimit -n 102400 # cd /mnt # sysbench --num-threads=1 --test=fileio --file-num=81920 \ > --file-total-size=80M --file-block-size=1K --file-io-mode=sync \ > --file-test-mode=seqwr prepare # sysbench --num-threads=1 --test=fileio --file-num=81920 \ > --file-total-size=80M --file-block-size=1K --file-io-mode=sync \ > --file-test-mode=seqwr run The reason of this bug is: Now, we can reserve space which is larger than the free space in the chunks if we have enough free disk space which can be used for new chunks. By this way, the space allocator should allocate a new chunk by force if there is no free space in the free space cache. But there are two wrong checks which break this operation. One is if (ret == -ENOSPC && num_bytes > min_alloc_size) in btrfs_reserve_extent(), it is wrong, we should try to allocate a new chunk even we fail to allocate free space by minimum allocable size. The other is if (space_info->force_alloc) force = space_info->force_alloc; in do_chunk_alloc(). It makes the allocator ignore CHUNK_ALLOC_FORCE If someone sets ->force_alloc to CHUNK_ALLOC_LIMITED, and makes the enospc error happen. Fix these two wrong checks. Especially the second one, we fix it by changing the value of CHUNK_ALLOC_LIMITED and CHUNK_ALLOC_FORCE, and make CHUNK_ALLOC_FORCE greater than CHUNK_ALLOC_LIMITED since CHUNK_ALLOC_FORCE has higher priority. And if the value which is passed in by the caller is greater than ->force_alloc, use the passed value. Signed-off-by: Miao Xie Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 49 +++++++++++++++++++++++++++---------------------- 1 file changed, 27 insertions(+), 22 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 700879ed64c..283af7a676a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -34,23 +34,24 @@ #include "locking.h" #include "free-space-cache.h" -/* control flags for do_chunk_alloc's force field +/* + * control flags for do_chunk_alloc's force field * CHUNK_ALLOC_NO_FORCE means to only allocate a chunk * if we really need one. * - * CHUNK_ALLOC_FORCE means it must try to allocate one - * * CHUNK_ALLOC_LIMITED means to only try and allocate one * if we have very few chunks already allocated. This is * used as part of the clustering code to help make sure * we have a good pool of storage to cluster in, without * filling the FS with empty chunks * + * CHUNK_ALLOC_FORCE means it must try to allocate one + * */ enum { CHUNK_ALLOC_NO_FORCE = 0, - CHUNK_ALLOC_FORCE = 1, - CHUNK_ALLOC_LIMITED = 2, + CHUNK_ALLOC_LIMITED = 1, + CHUNK_ALLOC_FORCE = 2, }; /* @@ -3414,7 +3415,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, again: spin_lock(&space_info->lock); - if (space_info->force_alloc) + if (force < space_info->force_alloc) force = space_info->force_alloc; if (space_info->full) { spin_unlock(&space_info->lock); @@ -5794,6 +5795,7 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans, u64 search_end, struct btrfs_key *ins, u64 data) { + bool final_tried = false; int ret; u64 search_start = 0; @@ -5813,22 +5815,25 @@ again: search_start, search_end, hint_byte, ins, data); - if (ret == -ENOSPC && num_bytes > min_alloc_size) { - num_bytes = num_bytes >> 1; - num_bytes = num_bytes & ~(root->sectorsize - 1); - num_bytes = max(num_bytes, min_alloc_size); - do_chunk_alloc(trans, root->fs_info->extent_root, - num_bytes, data, CHUNK_ALLOC_FORCE); - goto again; - } - if (ret == -ENOSPC && btrfs_test_opt(root, ENOSPC_DEBUG)) { - struct btrfs_space_info *sinfo; - - sinfo = __find_space_info(root->fs_info, data); - printk(KERN_ERR "btrfs allocation failed flags %llu, " - "wanted %llu\n", (unsigned long long)data, - (unsigned long long)num_bytes); - dump_space_info(sinfo, num_bytes, 1); + if (ret == -ENOSPC) { + if (!final_tried) { + num_bytes = num_bytes >> 1; + num_bytes = num_bytes & ~(root->sectorsize - 1); + num_bytes = max(num_bytes, min_alloc_size); + do_chunk_alloc(trans, root->fs_info->extent_root, + num_bytes, data, CHUNK_ALLOC_FORCE); + if (num_bytes == min_alloc_size) + final_tried = true; + goto again; + } else if (btrfs_test_opt(root, ENOSPC_DEBUG)) { + struct btrfs_space_info *sinfo; + + sinfo = __find_space_info(root->fs_info, data); + printk(KERN_ERR "btrfs allocation failed flags %llu, " + "wanted %llu\n", (unsigned long long)data, + (unsigned long long)num_bytes); + dump_space_info(sinfo, num_bytes, 1); + } } trace_btrfs_reserved_extent_alloc(root, ins->objectid, ins->offset); -- cgit v1.2.3 From 0c4e538bccc106872d31b1514570b4dac95fb7f2 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Thu, 26 Jan 2012 15:01:12 -0500 Subject: btrfs: mask out gfp flags in releasepage btree_releasepage is a callback and can be passed unknown gfp flags and then they may end up in kmem_cache_alloc called from alloc_extent_state, slab allocator will BUG_ON when there is HIGHMEM or DMA32 flag set. This may happen when btrfs is mounted from a loop device, which masks out __GFP_IO flag. The check in try_release_extent_state 3399 if ((mask & GFP_NOFS) == GFP_NOFS) 3400 mask = GFP_NOFS; will not work and passes unfiltered flags further resulting in crash at mm/slab.c:2963 [<000000000024ae4c>] cache_alloc_refill+0x3b4/0x5c8 [<000000000024c810>] kmem_cache_alloc+0x204/0x294 [<00000000001fd3c2>] mempool_alloc+0x52/0x170 [<000003c000ced0b0>] alloc_extent_state+0x40/0xd4 [btrfs] [<000003c000cee5ae>] __clear_extent_bit+0x38a/0x4cc [btrfs] [<000003c000cee78c>] try_release_extent_state+0x9c/0xd4 [btrfs] [<000003c000cc4c66>] btree_releasepage+0x7e/0xd0 [btrfs] [<0000000000210d84>] shrink_page_list+0x6a0/0x724 [<0000000000211394>] shrink_inactive_list+0x230/0x578 [<0000000000211bb8>] shrink_list+0x6c/0x120 [<0000000000211e4e>] shrink_zone+0x1e2/0x228 [<0000000000211f24>] shrink_zones+0x90/0x254 [<0000000000213410>] do_try_to_free_pages+0xac/0x420 [<0000000000213ae0>] try_to_free_pages+0x13c/0x1b0 [<0000000000204e6c>] __alloc_pages_nodemask+0x5b4/0x9a8 [<00000000001fb04a>] grab_cache_page_write_begin+0x7e/0xe8 Signed-off-by: David Sterba Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index da4457f84d7..4c867112b4c 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -961,6 +961,13 @@ static int btree_releasepage(struct page *page, gfp_t gfp_flags) tree = &BTRFS_I(page->mapping->host)->io_tree; map = &BTRFS_I(page->mapping->host)->extent_tree; + /* + * We need to mask out eg. __GFP_HIGHMEM and __GFP_DMA32 as we're doing + * slab allocation from alloc_extent_state down the callchain where + * it'd hit a BUG_ON as those flags are not allowed. + */ + gfp_flags &= ~GFP_SLAB_BUG_MASK; + ret = try_release_extent_state(map, tree, page, gfp_flags); if (!ret) return 0; -- cgit v1.2.3 From 9b23062840e7c685ef0a0b561285d6e3a3b6811b Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 26 Jan 2012 15:01:12 -0500 Subject: Btrfs: advance window_start if we're using a bitmap If we span a long area in a bitmap we could end up taking a lot of time searching to the next free area if we're searching from the original window_start, so advance window_start in order to make sure we don't do any superficial searching. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/free-space-cache.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 61447a51f64..5802b1473c3 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -2251,6 +2251,7 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, offset_index); continue; } + cluster->window_start += bytes; } else { ret = entry->offset; -- cgit v1.2.3 From 9998eb703490589c3e8f1bf09b15203156776edb Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 25 Jan 2012 13:47:40 -0500 Subject: Btrfs: fix reservations in btrfs_page_mkwrite Josef fixed btrfs_page_mkwrite to properly release reserved extents if there was an error. But if we fail to get a reservation and we fail to dirty the inode (for ENOSPC reasons), we'll end up trying to release a reservation we never had. This makes sure we only release if we were able to reserve. Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 5977987abdb..7405753ec5d 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6401,18 +6401,23 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) unsigned long zero_start; loff_t size; int ret; + int reserved = 0; u64 page_start; u64 page_end; ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); - if (!ret) + if (!ret) { ret = btrfs_update_time(vma->vm_file); + reserved = 1; + } if (ret) { if (ret == -ENOMEM) ret = VM_FAULT_OOM; else /* -ENOSPC, -EIO, etc */ ret = VM_FAULT_SIGBUS; - goto out; + if (reserved) + goto out; + goto out_noreserve; } ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */ @@ -6495,6 +6500,7 @@ out_unlock: unlock_page(page); out: btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); +out_noreserve: return ret; } -- cgit v1.2.3