From a555f810af6d63ea5960abaed88e150ad95c3011 Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Thu, 28 Jan 2010 16:18:15 -0500
Subject: Btrfs: Add mount -o compress-force

The default btrfs mount -o compress mode will quickly back off
compressing a file if it notices that compression does not reduce the
size of the data being written.  This can save considerable CPU because
all future writes to the file go through uncompressed.

But some files are both very large and have mixed data stored in
them.  In that case, we want to add the ability to always try
compressing data before writing it.

This commit adds mount -o compress-force.  A later commit will add
a new inode flag that does the same thing.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/ctree.h | 1 +
 fs/btrfs/inode.c | 3 ++-
 fs/btrfs/super.c | 9 ++++++++-
 3 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 9f806dd04c2..2aa8ec6a098 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1161,6 +1161,7 @@ struct btrfs_root {
 #define BTRFS_MOUNT_SSD_SPREAD		(1 << 8)
 #define BTRFS_MOUNT_NOSSD		(1 << 9)
 #define BTRFS_MOUNT_DISCARD		(1 << 10)
+#define BTRFS_MOUNT_FORCE_COMPRESS      (1 << 11)
 
 #define btrfs_clear_opt(o, opt)		((o) &= ~BTRFS_MOUNT_##opt)
 #define btrfs_set_opt(o, opt)		((o) |= BTRFS_MOUNT_##opt)
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index b330e27c2d8..f46c5727684 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -483,7 +483,8 @@ again:
 		nr_pages_ret = 0;
 
 		/* flag the file so we don't compress in the future */
-		BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS;
+		if (!btrfs_test_opt(root, FORCE_COMPRESS))
+			BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS;
 	}
 	if (will_compress) {
 		*num_added += 1;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 3f9b45704fc..8a1ea6e6457 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -66,7 +66,8 @@ enum {
 	Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow,
 	Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier,
 	Opt_ssd, Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl,
-	Opt_compress, Opt_notreelog, Opt_ratio, Opt_flushoncommit,
+	Opt_compress, Opt_compress_force, Opt_notreelog, Opt_ratio,
+	Opt_flushoncommit,
 	Opt_discard, Opt_err,
 };
 
@@ -82,6 +83,7 @@ static match_table_t tokens = {
 	{Opt_alloc_start, "alloc_start=%s"},
 	{Opt_thread_pool, "thread_pool=%d"},
 	{Opt_compress, "compress"},
+	{Opt_compress_force, "compress-force"},
 	{Opt_ssd, "ssd"},
 	{Opt_ssd_spread, "ssd_spread"},
 	{Opt_nossd, "nossd"},
@@ -173,6 +175,11 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
 			printk(KERN_INFO "btrfs: use compression\n");
 			btrfs_set_opt(info->mount_opt, COMPRESS);
 			break;
+		case Opt_compress_force:
+			printk(KERN_INFO "btrfs: forcing compression\n");
+			btrfs_set_opt(info->mount_opt, FORCE_COMPRESS);
+			btrfs_set_opt(info->mount_opt, COMPRESS);
+			break;
 		case Opt_ssd:
 			printk(KERN_INFO "btrfs: use ssd allocation scheme\n");
 			btrfs_set_opt(info->mount_opt, SSD);
-- 
cgit v1.2.3


From b8d9bfeb18f9af794020d96e9bee984d18a8d737 Mon Sep 17 00:00:00 2001
From: Miao Xie <miaox@cn.fujitsu.com>
Date: Tue, 15 Dec 2009 06:54:17 +0000
Subject: Btrfs: remove tree_search() in extent_map.c

This patch removes tree_search() in extent_map.c because it is not called by
anything.

Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/extent_map.c | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index ccbdcb54ec5..5a4f73b79b7 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -155,20 +155,6 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 offset,
 	return NULL;
 }
 
-/*
- * look for an offset in the tree, and if it can't be found, return
- * the first offset we can find smaller than 'offset'.
- */
-static inline struct rb_node *tree_search(struct rb_root *root, u64 offset)
-{
-	struct rb_node *prev;
-	struct rb_node *ret;
-	ret = __tree_search(root, offset, &prev, NULL);
-	if (!ret)
-		return prev;
-	return ret;
-}
-
 /* check to see if two extent_map structs are adjacent and safe to merge */
 static int mergable_maps(struct extent_map *prev, struct extent_map *next)
 {
-- 
cgit v1.2.3


From d1ea6a61454e7d7ff0873d0ad1ae27d5807da0d3 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Wed, 20 Jan 2010 07:28:54 +0000
Subject: Btrfs: Use correct values when updating inode i_size on fallocate

commit f2bc9dd07e3424c4ec5f3949961fe053d47bc825
Author: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Date:   Wed Jan 20 12:57:53 2010 +0530

    Btrfs: Use correct values when updating inode i_size on fallocate

    Even though we allocate more, we should be updating inode i_size
    as per the arguments passed

    Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/inode.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index f46c5727684..5606361b5f0 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -5800,7 +5800,7 @@ out_fail:
 }
 
 static int prealloc_file_range(struct inode *inode, u64 start, u64 end,
-			       u64 alloc_hint, int mode)
+			u64 alloc_hint, int mode, loff_t actual_len)
 {
 	struct btrfs_trans_handle *trans;
 	struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -5809,6 +5809,7 @@ static int prealloc_file_range(struct inode *inode, u64 start, u64 end,
 	u64 cur_offset = start;
 	u64 num_bytes = end - start;
 	int ret = 0;
+	u64 i_size;
 
 	while (num_bytes > 0) {
 		alloc_size = min(num_bytes, root->fs_info->max_extent);
@@ -5847,8 +5848,12 @@ static int prealloc_file_range(struct inode *inode, u64 start, u64 end,
 		BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC;
 		if (!(mode & FALLOC_FL_KEEP_SIZE) &&
 		    cur_offset > inode->i_size) {
-			i_size_write(inode, cur_offset);
-			btrfs_ordered_update_i_size(inode, cur_offset, NULL);
+			if (cur_offset > actual_len)
+				i_size  = actual_len;
+			else
+				i_size = cur_offset;
+			i_size_write(inode, i_size);
+			btrfs_ordered_update_i_size(inode, i_size, NULL);
 		}
 
 		ret = btrfs_update_inode(trans, root, inode);
@@ -5941,7 +5946,7 @@ static long btrfs_fallocate(struct inode *inode, int mode,
 		     !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
 			ret = prealloc_file_range(inode,
 						  cur_offset, last_byte,
-						  alloc_hint, mode);
+						alloc_hint, mode, offset+len);
 			if (ret < 0) {
 				free_extent_map(em);
 				break;
-- 
cgit v1.2.3


From f858153c367a397235d3e81136741e40e44faf1d Mon Sep 17 00:00:00 2001
From: Yang Hongyang <yanghy@cn.fujitsu.com>
Date: Tue, 26 Jan 2010 00:48:23 +0000
Subject: Btrfs: fix a memory leak in btrfs_init_acl

In btrfs_init_acl() cloned acl is not released

Signed-off-by: Yang Hongyang <yanghy@cn.fujitsu.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/acl.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index fa44e92e9b8..da3133c6983 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -269,6 +269,7 @@ int btrfs_init_acl(struct btrfs_trans_handle *trans,
 						    ACL_TYPE_ACCESS);
 			}
 		}
+		posix_acl_release(clone);
 	}
 failed:
 	posix_acl_release(acl);
-- 
cgit v1.2.3


From e3acc2a6850efff647f1c5458524eb3a8bcba20a Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@redhat.com>
Date: Tue, 26 Jan 2010 14:30:53 +0000
Subject: Btrfs: run orphan cleanup on default fs root

This patch revert's commit

6c090a11e1c403b727a6a8eff0b97d5fb9e95cb5

Since it introduces this problem where we can run orphan cleanup on a
volume that can have orphan entries re-added.  Instead of my original
fix, Yan Zheng pointed out that we can just revert my original fix and
then run the orphan cleanup in open_ctree after we look up the fs_root.
I have tested this with all the tests that gave me problems and this
patch fixes both problems.  Thanks,

Signed-off-by: Josef Bacik <josef@redhat.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/disk-io.c | 6 ++++++
 fs/btrfs/inode.c   | 6 ------
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 009e3bd18f2..87b25543d7d 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1993,6 +1993,12 @@ struct btrfs_root *open_ctree(struct super_block *sb,
 	if (!fs_info->fs_root)
 		goto fail_trans_kthread;
 
+	if (!(sb->s_flags & MS_RDONLY)) {
+		down_read(&fs_info->cleanup_work_sem);
+		btrfs_orphan_cleanup(fs_info->fs_root);
+		up_read(&fs_info->cleanup_work_sem);
+	}
+
 	return tree_root;
 
 fail_trans_kthread:
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 5606361b5f0..8cd109972fa 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3797,12 +3797,6 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
 
 	if (location.type == BTRFS_INODE_ITEM_KEY) {
 		inode = btrfs_iget(dir->i_sb, &location, root);
-		if (unlikely(root->clean_orphans) &&
-		    !(inode->i_sb->s_flags & MS_RDONLY)) {
-			down_read(&root->fs_info->cleanup_work_sem);
-			btrfs_orphan_cleanup(root);
-			up_read(&root->fs_info->cleanup_work_sem);
-		}
 		return inode;
 	}
 
-- 
cgit v1.2.3


From f48b90756bd834dda852ff514f2690d3175b1f44 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@redhat.com>
Date: Wed, 27 Jan 2010 02:07:59 +0000
Subject: Btrfs: do not mark the chunk as readonly if in degraded mode

If a RAID setup has chunks that span multiple disks, and one of those
disks has failed, btrfs_chunk_readonly will return 1 since one of the
disks in that chunk's stripes is dead and therefore not writeable.  So
instead if we are in degraded mode, return 0 so we can go ahead and
allocate stuff.  Without this patch all of the block groups in a RAID1
setup will end up read-only, which will mean we can't add new disks to
the array since we won't be able to make allocations.

Signed-off-by: Josef Bacik <josef@redhat.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/volumes.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 220dad5db01..66122bdf8bb 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2538,6 +2538,11 @@ int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset)
 	if (!em)
 		return 1;
 
+	if (btrfs_test_opt(root, DEGRADED)) {
+		free_extent_map(em);
+		return 0;
+	}
+
 	map = (struct map_lookup *)em->bdev;
 	for (i = 0; i < map->num_stripes; i++) {
 		if (!map->stripes[i].dev->writeable) {
-- 
cgit v1.2.3


From 7f59203abeaf18bf3497b308891f95a4489810ad Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@redhat.com>
Date: Wed, 27 Jan 2010 02:09:00 +0000
Subject: Btrfs: check return value of open_bdev_exclusive properly

Hit this problem while testing RAID1 failure stuff.  open_bdev_exclusive
returns ERR_PTR(), not NULL.  So change the return value properly.  This
is important if you accidently specify a device that doesn't exist when
trying to add a new device to an array, you will panic the box
dereferencing bdev.

Signed-off-by: Josef Bacik <josef@redhat.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/volumes.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 66122bdf8bb..5eb7459e378 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1434,8 +1434,8 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
 		return -EINVAL;
 
 	bdev = open_bdev_exclusive(device_path, 0, root->fs_info->bdev_holder);
-	if (!bdev)
-		return -EIO;
+	if (IS_ERR(bdev))
+		return PTR_ERR(bdev);
 
 	if (root->fs_info->fs_devices->seeding) {
 		seeding_dev = 1;
-- 
cgit v1.2.3


From 035fe03a7ad56982b30ab3a522b7b08d58feccd0 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@redhat.com>
Date: Wed, 27 Jan 2010 02:09:38 +0000
Subject: Btrfs: check total number of devices when removing missing

If you have a disk failure in RAID1 and then add a new disk to the
array, and then try to remove the missing volume, it will fail.  The
reason is the sanity check only looks at the total number of rw devices,
which is just 2 because we have 2 good disks and 1 bad one.  Instead
check the total number of devices in the array to make sure we can
actually remove the device.  Tested this with a failed disk setup and
with this test we can now run

btrfs-vol -r missing /mount/point

and it works fine.

Signed-off-by: Josef Bacik <josef@redhat.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/volumes.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 5eb7459e378..41ecbb2347f 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1135,7 +1135,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
 		root->fs_info->avail_metadata_alloc_bits;
 
 	if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) &&
-	    root->fs_info->fs_devices->rw_devices <= 4) {
+	    root->fs_info->fs_devices->num_devices <= 4) {
 		printk(KERN_ERR "btrfs: unable to go below four devices "
 		       "on raid10\n");
 		ret = -EINVAL;
@@ -1143,7 +1143,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
 	}
 
 	if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) &&
-	    root->fs_info->fs_devices->rw_devices <= 2) {
+	    root->fs_info->fs_devices->num_devices <= 2) {
 		printk(KERN_ERR "btrfs: unable to go below two "
 		       "devices on raid1\n");
 		ret = -EINVAL;
-- 
cgit v1.2.3