aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorPatrick McHardy <kaber@trash.net>2012-08-08 21:03:47 +0200
committerPatrick McHardy <kaber@trash.net>2012-08-08 21:03:47 +0200
commitd53b4ed072d9779cdf53582c46436dec06d0961f (patch)
treeac95ecab33e31cd79aae69c475e8348adac51230 /fs
parent5d4dff7f1011a81a693a9c7b1f6a0b9c842eb60c (diff)
parent28a33cbc24e4256c143dce96c7d93bf423229f92 (diff)
Merge tag 'v3.5' of 192.168.0.154:/repos/git/linux-2.6
Conflicts: drivers/Kconfig Signed-off-by: Patrick McHardy <kaber@trash.net>
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/cache.c64
-rw-r--r--fs/9p/fid.c8
-rw-r--r--fs/9p/v9fs.c73
-rw-r--r--fs/9p/v9fs_vfs.h4
-rw-r--r--fs/9p/vfs_addr.c13
-rw-r--r--fs/9p/vfs_dentry.c12
-rw-r--r--fs/9p/vfs_dir.c13
-rw-r--r--fs/9p/vfs_file.c34
-rw-r--r--fs/9p/vfs_inode.c205
-rw-r--r--fs/9p/vfs_inode_dotl.c161
-rw-r--r--fs/9p/vfs_super.c19
-rw-r--r--fs/9p/xattr.c16
-rw-r--r--fs/Kconfig15
-rw-r--r--fs/Kconfig.binfmt5
-rw-r--r--fs/Makefile4
-rw-r--r--fs/adfs/super.c7
-rw-r--r--fs/affs/affs.h14
-rw-r--r--fs/affs/amigaffs.c6
-rw-r--r--fs/affs/inode.c2
-rw-r--r--fs/affs/namei.c8
-rw-r--r--fs/affs/super.c8
-rw-r--r--fs/afs/dir.c12
-rw-r--r--fs/afs/file.c2
-rw-r--r--fs/afs/fsclient.c8
-rw-r--r--fs/afs/inode.c2
-rw-r--r--fs/afs/internal.h2
-rw-r--r--fs/afs/mntpt.c8
-rw-r--r--fs/afs/rxrpc.c3
-rw-r--r--fs/afs/super.c8
-rw-r--r--fs/aio.c214
-rw-r--r--fs/anon_inodes.c109
-rw-r--r--fs/attr.c19
-rw-r--r--fs/autofs4/autofs_i.h14
-rw-r--r--fs/autofs4/dev-ioctl.c14
-rw-r--r--fs/autofs4/expire.c2
-rw-r--r--fs/autofs4/init.c6
-rw-r--r--fs/autofs4/inode.c23
-rw-r--r--fs/autofs4/root.c4
-rw-r--r--fs/autofs4/waitq.c40
-rw-r--r--fs/bad_inode.c9
-rw-r--r--fs/befs/linuxvfs.c4
-rw-r--r--fs/bfs/dir.c2
-rw-r--r--fs/bfs/inode.c6
-rw-r--r--fs/binfmt_aout.c51
-rw-r--r--fs/binfmt_elf.c107
-rw-r--r--fs/binfmt_elf_fdpic.c37
-rw-r--r--fs/binfmt_em86.c3
-rw-r--r--fs/binfmt_flat.c27
-rw-r--r--fs/binfmt_misc.c18
-rw-r--r--fs/binfmt_script.c3
-rw-r--r--fs/binfmt_som.c16
-rw-r--r--fs/bio-integrity.c10
-rw-r--r--fs/bio.c72
-rw-r--r--fs/block_dev.c78
-rw-r--r--fs/btrfs/Kconfig19
-rw-r--r--fs/btrfs/Makefile3
-rw-r--r--fs/btrfs/acl.c4
-rw-r--r--fs/btrfs/async-thread.c17
-rw-r--r--fs/btrfs/async-thread.h4
-rw-r--r--fs/btrfs/backref.c1447
-rw-r--r--fs/btrfs/backref.h11
-rw-r--r--fs/btrfs/btrfs_inode.h54
-rw-r--r--fs/btrfs/check-integrity.c3358
-rw-r--r--fs/btrfs/check-integrity.h36
-rw-r--r--fs/btrfs/compression.c54
-rw-r--r--fs/btrfs/compression.h2
-rw-r--r--fs/btrfs/ctree.c1311
-rw-r--r--fs/btrfs/ctree.h449
-rw-r--r--fs/btrfs/delayed-inode.c104
-rw-r--r--fs/btrfs/delayed-inode.h3
-rw-r--r--fs/btrfs/delayed-ref.c176
-rw-r--r--fs/btrfs/delayed-ref.h80
-rw-r--r--fs/btrfs/dir-item.c10
-rw-r--r--fs/btrfs/disk-io.c902
-rw-r--r--fs/btrfs/disk-io.h18
-rw-r--r--fs/btrfs/export.c19
-rw-r--r--fs/btrfs/extent-tree.c1124
-rw-r--r--fs/btrfs/extent_io.c1353
-rw-r--r--fs/btrfs/extent_io.h75
-rw-r--r--fs/btrfs/extent_map.h4
-rw-r--r--fs/btrfs/file-item.c61
-rw-r--r--fs/btrfs/file.c197
-rw-r--r--fs/btrfs/free-space-cache.c646
-rw-r--r--fs/btrfs/inode-item.c6
-rw-r--r--fs/btrfs/inode-map.c23
-rw-r--r--fs/btrfs/inode.c1075
-rw-r--r--fs/btrfs/ioctl.c628
-rw-r--r--fs/btrfs/ioctl.h93
-rw-r--r--fs/btrfs/locking.c59
-rw-r--r--fs/btrfs/locking.h4
-rw-r--r--fs/btrfs/lzo.c4
-rw-r--r--fs/btrfs/ordered-data.c225
-rw-r--r--fs/btrfs/ordered-data.h37
-rw-r--r--fs/btrfs/orphan.c2
-rw-r--r--fs/btrfs/print-tree.c3
-rw-r--r--fs/btrfs/rcu-string.h56
-rw-r--r--fs/btrfs/reada.c65
-rw-r--r--fs/btrfs/relocation.c152
-rw-r--r--fs/btrfs/root-tree.c25
-rw-r--r--fs/btrfs/scrub.c1486
-rw-r--r--fs/btrfs/struct-funcs.c53
-rw-r--r--fs/btrfs/super.c544
-rw-r--r--fs/btrfs/transaction.c309
-rw-r--r--fs/btrfs/transaction.h3
-rw-r--r--fs/btrfs/tree-log.c147
-rw-r--r--fs/btrfs/tree-log.h2
-rw-r--r--fs/btrfs/ulist.c222
-rw-r--r--fs/btrfs/ulist.h77
-rw-r--r--fs/btrfs/volumes.c1614
-rw-r--r--fs/btrfs/volumes.h113
-rw-r--r--fs/btrfs/xattr.c3
-rw-r--r--fs/btrfs/zlib.c4
-rw-r--r--fs/buffer.c96
-rw-r--r--fs/cachefiles/interface.c1
-rw-r--r--fs/cachefiles/namei.c3
-rw-r--r--fs/ceph/addr.c21
-rw-r--r--fs/ceph/caps.c8
-rw-r--r--fs/ceph/dir.c92
-rw-r--r--fs/ceph/export.c34
-rw-r--r--fs/ceph/file.c1
-rw-r--r--fs/ceph/inode.c15
-rw-r--r--fs/ceph/ioctl.c102
-rw-r--r--fs/ceph/ioctl.h2
-rw-r--r--fs/ceph/mds_client.c75
-rw-r--r--fs/ceph/mds_client.h12
-rw-r--r--fs/ceph/snap.c2
-rw-r--r--fs/ceph/super.c55
-rw-r--r--fs/ceph/super.h7
-rw-r--r--fs/ceph/xattr.c219
-rw-r--r--fs/char_dev.c6
-rw-r--r--fs/cifs/Kconfig27
-rw-r--r--fs/cifs/Makefile4
-rw-r--r--fs/cifs/README11
-rw-r--r--fs/cifs/cifs_debug.c118
-rw-r--r--fs/cifs/cifs_debug.h6
-rw-r--r--fs/cifs/cifs_fs_sb.h4
-rw-r--r--fs/cifs/cifs_spnego.c10
-rw-r--r--fs/cifs/cifs_unicode.c41
-rw-r--r--fs/cifs/cifs_unicode.h20
-rw-r--r--fs/cifs/cifsacl.c3
-rw-r--r--fs/cifs/cifsencrypt.c21
-rw-r--r--fs/cifs/cifsfs.c96
-rw-r--r--fs/cifs/cifsfs.h8
-rw-r--r--fs/cifs/cifsglob.h176
-rw-r--r--fs/cifs/cifsproto.h41
-rw-r--r--fs/cifs/cifssmb.c485
-rw-r--r--fs/cifs/connect.c1966
-rw-r--r--fs/cifs/dir.c47
-rw-r--r--fs/cifs/file.c1139
-rw-r--r--fs/cifs/inode.c32
-rw-r--r--fs/cifs/ioctl.c8
-rw-r--r--fs/cifs/misc.c142
-rw-r--r--fs/cifs/netmisc.c6
-rw-r--r--fs/cifs/readdir.c31
-rw-r--r--fs/cifs/sess.c45
-rw-r--r--fs/cifs/smb1ops.c243
-rw-r--r--fs/cifs/smb2ops.c27
-rw-r--r--fs/cifs/smbencrypt.c2
-rw-r--r--fs/cifs/transport.c361
-rw-r--r--fs/cifs/xattr.c6
-rw-r--r--fs/coda/cnode.c38
-rw-r--r--fs/coda/coda_fs_i.h4
-rw-r--r--fs/coda/dir.c37
-rw-r--r--fs/coda/inode.c18
-rw-r--r--fs/coda/psdev.c1
-rw-r--r--fs/coda/upcall.c1
-rw-r--r--fs/compat.c139
-rw-r--r--fs/compat_ioctl.c42
-rw-r--r--fs/configfs/configfs_internal.h9
-rw-r--r--fs/configfs/dir.c74
-rw-r--r--fs/configfs/inode.c66
-rw-r--r--fs/configfs/mount.c16
-rw-r--r--fs/configfs/symlink.c12
-rw-r--r--fs/cramfs/inode.c15
-rw-r--r--fs/dcache.c398
-rw-r--r--fs/dcookies.c2
-rw-r--r--fs/debugfs/file.c259
-rw-r--r--fs/debugfs/inode.c165
-rw-r--r--fs/devpts/inode.c120
-rw-r--r--fs/direct-io.c101
-rw-r--r--fs/dlm/ast.c3
-rw-r--r--fs/dlm/config.c130
-rw-r--r--fs/dlm/config.h17
-rw-r--r--fs/dlm/debug_fs.c37
-rw-r--r--fs/dlm/dir.c18
-rw-r--r--fs/dlm/dlm_internal.h76
-rw-r--r--fs/dlm/lock.c642
-rw-r--r--fs/dlm/lock.h10
-rw-r--r--fs/dlm/lockspace.c91
-rw-r--r--fs/dlm/lowcomms.c54
-rw-r--r--fs/dlm/member.c486
-rw-r--r--fs/dlm/member.h10
-rw-r--r--fs/dlm/memory.c8
-rw-r--r--fs/dlm/rcom.c160
-rw-r--r--fs/dlm/rcom.h2
-rw-r--r--fs/dlm/recover.c160
-rw-r--r--fs/dlm/recoverd.c62
-rw-r--r--fs/dlm/requestqueue.c43
-rw-r--r--fs/dlm/user.c5
-rw-r--r--fs/ecryptfs/crypto.c122
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h11
-rw-r--r--fs/ecryptfs/file.c9
-rw-r--r--fs/ecryptfs/inode.c127
-rw-r--r--fs/ecryptfs/keystore.c14
-rw-r--r--fs/ecryptfs/kthread.c2
-rw-r--r--fs/ecryptfs/main.c19
-rw-r--r--fs/ecryptfs/messaging.c2
-rw-r--r--fs/ecryptfs/miscdev.c188
-rw-r--r--fs/ecryptfs/mmap.c12
-rw-r--r--fs/ecryptfs/read_write.c100
-rw-r--r--fs/ecryptfs/super.c22
-rw-r--r--fs/efs/super.c4
-rw-r--r--fs/eventfd.c14
-rw-r--r--fs/eventpoll.c406
-rw-r--r--fs/exec.c137
-rw-r--r--fs/exofs/Kbuild2
-rw-r--r--fs/exofs/Kconfig11
-rw-r--r--fs/exofs/Kconfig.ore12
-rw-r--r--fs/exofs/dir.c6
-rw-r--r--fs/exofs/exofs.h16
-rw-r--r--fs/exofs/inode.c6
-rw-r--r--fs/exofs/namei.c19
-rw-r--r--fs/exofs/ore.c16
-rw-r--r--fs/exofs/ore_raid.c169
-rw-r--r--fs/exofs/super.c30
-rw-r--r--fs/exofs/sys.c200
-rw-r--r--fs/exportfs/expfs.c33
-rw-r--r--fs/ext2/balloc.c9
-rw-r--r--fs/ext2/dir.c6
-rw-r--r--fs/ext2/ext2.h637
-rw-r--r--fs/ext2/ialloc.c11
-rw-r--r--fs/ext2/inode.c27
-rw-r--r--fs/ext2/ioctl.c34
-rw-r--r--fs/ext2/namei.c21
-rw-r--r--fs/ext2/super.c61
-rw-r--r--fs/ext2/xattr.c2
-rw-r--r--fs/ext2/xattr_security.c6
-rw-r--r--fs/ext2/xattr_trusted.c6
-rw-r--r--fs/ext2/xattr_user.c1
-rw-r--r--fs/ext2/xip.c2
-rw-r--r--fs/ext3/acl.c8
-rw-r--r--fs/ext3/balloc.c99
-rw-r--r--fs/ext3/bitmap.c4
-rw-r--r--fs/ext3/dir.c174
-rw-r--r--fs/ext3/ext3.h1326
-rw-r--r--fs/ext3/ext3_jbd.c2
-rw-r--r--fs/ext3/file.c6
-rw-r--r--fs/ext3/fsync.c8
-rw-r--r--fs/ext3/hash.c8
-rw-r--r--fs/ext3/ialloc.c43
-rw-r--r--fs/ext3/inode.c104
-rw-r--r--fs/ext3/ioctl.c33
-rw-r--r--fs/ext3/namei.c33
-rw-r--r--fs/ext3/resize.c5
-rw-r--r--fs/ext3/super.c84
-rw-r--r--fs/ext3/symlink.c4
-rw-r--r--fs/ext3/xattr.c7
-rw-r--r--fs/ext3/xattr_security.c7
-rw-r--r--fs/ext3/xattr_trusted.c7
-rw-r--r--fs/ext3/xattr_user.c6
-rw-r--r--fs/ext4/Kconfig2
-rw-r--r--fs/ext4/balloc.c118
-rw-r--r--fs/ext4/bitmap.c83
-rw-r--r--fs/ext4/block_validity.c1
-rw-r--r--fs/ext4/dir.c239
-rw-r--r--fs/ext4/ext4.h206
-rw-r--r--fs/ext4/ext4_extents.h28
-rw-r--r--fs/ext4/ext4_jbd2.c9
-rw-r--r--fs/ext4/ext4_jbd2.h135
-rw-r--r--fs/ext4/extents.c438
-rw-r--r--fs/ext4/file.c2
-rw-r--r--fs/ext4/fsync.c2
-rw-r--r--fs/ext4/hash.c4
-rw-r--r--fs/ext4/ialloc.c355
-rw-r--r--fs/ext4/indirect.c1
-rw-r--r--fs/ext4/inode.c394
-rw-r--r--fs/ext4/ioctl.c137
-rw-r--r--fs/ext4/mballoc.c374
-rw-r--r--fs/ext4/mballoc.h20
-rw-r--r--fs/ext4/migrate.c7
-rw-r--r--fs/ext4/mmp.c48
-rw-r--r--fs/ext4/namei.c460
-rw-r--r--fs/ext4/page-io.c12
-rw-r--r--fs/ext4/resize.c1241
-rw-r--r--fs/ext4/super.c1430
-rw-r--r--fs/ext4/xattr.c107
-rw-r--r--fs/ext4/xattr.h4
-rw-r--r--fs/ext4/xattr_security.c6
-rw-r--r--fs/ext4/xattr_trusted.c1
-rw-r--r--fs/ext4/xattr_user.c1
-rw-r--r--fs/fat/dir.c4
-rw-r--r--fs/fat/fat.h12
-rw-r--r--fs/fat/fatent.c21
-rw-r--r--fs/fat/file.c8
-rw-r--r--fs/fat/inode.c117
-rw-r--r--fs/fat/namei_msdos.c4
-rw-r--r--fs/fat/namei_vfat.c90
-rw-r--r--fs/fcntl.c66
-rw-r--r--fs/fhandle.c8
-rw-r--r--fs/fifo.c9
-rw-r--r--fs/file.c54
-rw-r--r--fs/file_table.c43
-rw-r--r--fs/filesystems.c1
-rw-r--r--fs/freevxfs/vxfs_inode.c7
-rw-r--r--fs/freevxfs/vxfs_super.c3
-rw-r--r--fs/fs-writeback.c400
-rw-r--r--fs/fs_struct.c31
-rw-r--r--fs/fuse/control.c10
-rw-r--r--fs/fuse/dev.c61
-rw-r--r--fs/fuse/dir.c106
-rw-r--r--fs/fuse/file.c235
-rw-r--r--fs/fuse/fuse_i.h18
-rw-r--r--fs/fuse/inode.c57
-rw-r--r--fs/gfs2/Kconfig7
-rw-r--r--fs/gfs2/acl.c26
-rw-r--r--fs/gfs2/aops.c52
-rw-r--r--fs/gfs2/bmap.c42
-rw-r--r--fs/gfs2/dir.c70
-rw-r--r--fs/gfs2/dir.h2
-rw-r--r--fs/gfs2/export.c20
-rw-r--r--fs/gfs2/file.c66
-rw-r--r--fs/gfs2/glock.c226
-rw-r--r--fs/gfs2/glock.h7
-rw-r--r--fs/gfs2/glops.c6
-rw-r--r--fs/gfs2/incore.h157
-rw-r--r--fs/gfs2/inode.c104
-rw-r--r--fs/gfs2/inode.h3
-rw-r--r--fs/gfs2/lock_dlm.c1124
-rw-r--r--fs/gfs2/log.c287
-rw-r--r--fs/gfs2/log.h5
-rw-r--r--fs/gfs2/lops.c503
-rw-r--r--fs/gfs2/lops.h14
-rw-r--r--fs/gfs2/main.c31
-rw-r--r--fs/gfs2/meta_io.c32
-rw-r--r--fs/gfs2/meta_io.h4
-rw-r--r--fs/gfs2/ops_fstype.c58
-rw-r--r--fs/gfs2/quota.c103
-rw-r--r--fs/gfs2/recovery.c11
-rw-r--r--fs/gfs2/rgrp.c569
-rw-r--r--fs/gfs2/rgrp.h26
-rw-r--r--fs/gfs2/super.c28
-rw-r--r--fs/gfs2/sys.c41
-rw-r--r--fs/gfs2/sys.h2
-rw-r--r--fs/gfs2/trace_gfs2.h76
-rw-r--r--fs/gfs2/trans.c44
-rw-r--r--fs/gfs2/trans.h6
-rw-r--r--fs/gfs2/util.c2
-rw-r--r--fs/gfs2/util.h4
-rw-r--r--fs/gfs2/xattr.c64
-rw-r--r--fs/hfs/dir.c4
-rw-r--r--fs/hfs/hfs_fs.h2
-rw-r--r--fs/hfs/inode.c4
-rw-r--r--fs/hfs/super.c11
-rw-r--r--fs/hfsplus/catalog.c4
-rw-r--r--fs/hfsplus/dir.c17
-rw-r--r--fs/hfsplus/hfsplus_fs.h9
-rw-r--r--fs/hfsplus/hfsplus_raw.h2
-rw-r--r--fs/hfsplus/inode.c4
-rw-r--r--fs/hfsplus/ioctl.c43
-rw-r--r--fs/hfsplus/options.c4
-rw-r--r--fs/hfsplus/super.c31
-rw-r--r--fs/hfsplus/wrapper.c2
-rw-r--r--fs/hostfs/hostfs.h5
-rw-r--r--fs/hostfs/hostfs_kern.c22
-rw-r--r--fs/hostfs/hostfs_user.c4
-rw-r--r--fs/hpfs/alloc.c14
-rw-r--r--fs/hpfs/anode.c43
-rw-r--r--fs/hpfs/buffer.c1
-rw-r--r--fs/hpfs/dir.c2
-rw-r--r--fs/hpfs/dnode.c10
-rw-r--r--fs/hpfs/ea.c60
-rw-r--r--fs/hpfs/hpfs.h289
-rw-r--r--fs/hpfs/hpfs_fn.h23
-rw-r--r--fs/hpfs/inode.c4
-rw-r--r--fs/hpfs/map.c20
-rw-r--r--fs/hpfs/namei.c8
-rw-r--r--fs/hpfs/super.c11
-rw-r--r--fs/hppfs/hppfs.c14
-rw-r--r--fs/hugetlbfs/inode.c220
-rw-r--r--fs/inode.c329
-rw-r--r--fs/internal.h33
-rw-r--r--fs/ioctl.c4
-rw-r--r--fs/ioprio.c40
-rw-r--r--fs/isofs/export.c13
-rw-r--r--fs/isofs/inode.c13
-rw-r--r--fs/isofs/isofs.h6
-rw-r--r--fs/jbd/checkpoint.c30
-rw-r--r--fs/jbd/commit.c27
-rw-r--r--fs/jbd/journal.c223
-rw-r--r--fs/jbd/recovery.c4
-rw-r--r--fs/jbd/revoke.c34
-rw-r--r--fs/jbd/transaction.c44
-rw-r--r--fs/jbd2/Kconfig2
-rw-r--r--fs/jbd2/checkpoint.c142
-rw-r--r--fs/jbd2/commit.c132
-rw-r--r--fs/jbd2/journal.c510
-rw-r--r--fs/jbd2/recovery.c131
-rw-r--r--fs/jbd2/revoke.c73
-rw-r--r--fs/jbd2/transaction.c57
-rw-r--r--fs/jffs2/acl.c2
-rw-r--r--fs/jffs2/background.c29
-rw-r--r--fs/jffs2/build.c6
-rw-r--r--fs/jffs2/compr.c32
-rw-r--r--fs/jffs2/compr_lzo.c1
-rw-r--r--fs/jffs2/compr_rubin.c2
-rw-r--r--fs/jffs2/compr_zlib.c45
-rw-r--r--fs/jffs2/debug.c22
-rw-r--r--fs/jffs2/debug.h50
-rw-r--r--fs/jffs2/dir.c55
-rw-r--r--fs/jffs2/erase.c91
-rw-r--r--fs/jffs2/file.c33
-rw-r--r--fs/jffs2/fs.c76
-rw-r--r--fs/jffs2/gc.c324
-rw-r--r--fs/jffs2/jffs2_fs_sb.h11
-rw-r--r--fs/jffs2/malloc.c2
-rw-r--r--fs/jffs2/nodelist.c30
-rw-r--r--fs/jffs2/nodemgmt.c256
-rw-r--r--fs/jffs2/os-linux.h11
-rw-r--r--fs/jffs2/read.c70
-rw-r--r--fs/jffs2/readinode.c43
-rw-r--r--fs/jffs2/scan.c233
-rw-r--r--fs/jffs2/security.c4
-rw-r--r--fs/jffs2/summary.c16
-rw-r--r--fs/jffs2/super.c75
-rw-r--r--fs/jffs2/symlink.c7
-rw-r--r--fs/jffs2/wbuf.c241
-rw-r--r--fs/jffs2/write.c113
-rw-r--r--fs/jffs2/writev.c32
-rw-r--r--fs/jffs2/xattr.c25
-rw-r--r--fs/jffs2/xattr.h2
-rw-r--r--fs/jfs/inode.c2
-rw-r--r--fs/jfs/ioctl.c4
-rw-r--r--fs/jfs/jfs_logmgr.c2
-rw-r--r--fs/jfs/jfs_txnmgr.c4
-rw-r--r--fs/jfs/namei.c19
-rw-r--r--fs/jfs/super.c17
-rw-r--r--fs/libfs.c25
-rw-r--r--fs/lockd/clnt4xdr.c4
-rw-r--r--fs/lockd/clntlock.c16
-rw-r--r--fs/lockd/clntxdr.c10
-rw-r--r--fs/lockd/host.c42
-rw-r--r--fs/lockd/mon.c23
-rw-r--r--fs/lockd/netns.h12
-rw-r--r--fs/lockd/svc.c221
-rw-r--r--fs/lockd/svclock.c59
-rw-r--r--fs/lockd/svcsubs.c2
-rw-r--r--fs/locks.c12
-rw-r--r--fs/logfs/dev_mtd.c80
-rw-r--r--fs/logfs/dir.c29
-rw-r--r--fs/logfs/file.c2
-rw-r--r--fs/logfs/gc.c2
-rw-r--r--fs/logfs/inode.c7
-rw-r--r--fs/logfs/journal.c1
-rw-r--r--fs/logfs/logfs.h7
-rw-r--r--fs/logfs/readwrite.c91
-rw-r--r--fs/logfs/segment.c55
-rw-r--r--fs/logfs/super.c15
-rw-r--r--fs/minix/bitmap.c2
-rw-r--r--fs/minix/dir.c4
-rw-r--r--fs/minix/inode.c41
-rw-r--r--fs/minix/minix.h3
-rw-r--r--fs/minix/namei.c20
-rw-r--r--fs/mount.h76
-rw-r--r--fs/mpage.c6
-rw-r--r--fs/namei.c723
-rw-r--r--fs/namespace.c953
-rw-r--r--fs/ncpfs/dir.c18
-rw-r--r--fs/ncpfs/file.c7
-rw-r--r--fs/ncpfs/inode.c16
-rw-r--r--fs/ncpfs/ioctl.c2
-rw-r--r--fs/ncpfs/mmap.c1
-rw-r--r--fs/ncpfs/ncp_fs_sb.h10
-rw-r--r--fs/ncpfs/ncplib_kernel.h2
-rw-r--r--fs/ncpfs/symlink.c2
-rw-r--r--fs/nfs/Kconfig40
-rw-r--r--fs/nfs/Makefile5
-rw-r--r--fs/nfs/blocklayout/blocklayout.c435
-rw-r--r--fs/nfs/blocklayout/blocklayout.h23
-rw-r--r--fs/nfs/blocklayout/blocklayoutdev.c46
-rw-r--r--fs/nfs/blocklayout/blocklayoutdm.c33
-rw-r--r--fs/nfs/blocklayout/extents.c178
-rw-r--r--fs/nfs/cache_lib.c61
-rw-r--r--fs/nfs/cache_lib.h10
-rw-r--r--fs/nfs/callback.c25
-rw-r--r--fs/nfs/callback.h5
-rw-r--r--fs/nfs/callback_proc.c101
-rw-r--r--fs/nfs/callback_xdr.c33
-rw-r--r--fs/nfs/client.c500
-rw-r--r--fs/nfs/delegation.c84
-rw-r--r--fs/nfs/delegation.h5
-rw-r--r--fs/nfs/dir.c144
-rw-r--r--fs/nfs/direct.c763
-rw-r--r--fs/nfs/dns_resolve.c130
-rw-r--r--fs/nfs/dns_resolve.h14
-rw-r--r--fs/nfs/file.c92
-rw-r--r--fs/nfs/fscache.c17
-rw-r--r--fs/nfs/fscache.h10
-rw-r--r--fs/nfs/getroot.c92
-rw-r--r--fs/nfs/idmap.c836
-rw-r--r--fs/nfs/inode.c291
-rw-r--r--fs/nfs/internal.h156
-rw-r--r--fs/nfs/mount_clnt.c16
-rw-r--r--fs/nfs/namespace.c145
-rw-r--r--fs/nfs/netns.h32
-rw-r--r--fs/nfs/nfs2xdr.c7
-rw-r--r--fs/nfs/nfs3acl.c2
-rw-r--r--fs/nfs/nfs3proc.c57
-rw-r--r--fs/nfs/nfs3xdr.c116
-rw-r--r--fs/nfs/nfs4_fs.h91
-rw-r--r--fs/nfs/nfs4filelayout.c778
-rw-r--r--fs/nfs/nfs4filelayout.h58
-rw-r--r--fs/nfs/nfs4filelayoutdev.c186
-rw-r--r--fs/nfs/nfs4namespace.c147
-rw-r--r--fs/nfs/nfs4proc.c1550
-rw-r--r--fs/nfs/nfs4renewd.c2
-rw-r--r--fs/nfs/nfs4state.c711
-rw-r--r--fs/nfs/nfs4xdr.c1256
-rw-r--r--fs/nfs/nfsroot.c2
-rw-r--r--fs/nfs/objlayout/objio_osd.c98
-rw-r--r--fs/nfs/objlayout/objlayout.c163
-rw-r--r--fs/nfs/objlayout/objlayout.h2
-rw-r--r--fs/nfs/pagelist.c143
-rw-r--r--fs/nfs/pnfs.c419
-rw-r--r--fs/nfs/pnfs.h180
-rw-r--r--fs/nfs/pnfs_dev.c4
-rw-r--r--fs/nfs/proc.c53
-rw-r--r--fs/nfs/read.c450
-rw-r--r--fs/nfs/super.c1019
-rw-r--r--fs/nfs/sysctl.c2
-rw-r--r--fs/nfs/unlink.c45
-rw-r--r--fs/nfs/write.c953
-rw-r--r--fs/nfsd/Kconfig10
-rw-r--r--fs/nfsd/Makefile1
-rw-r--r--fs/nfsd/auth.c7
-rw-r--r--fs/nfsd/current_stateid.h28
-rw-r--r--fs/nfsd/export.c183
-rw-r--r--fs/nfsd/fault_inject.c92
-rw-r--r--fs/nfsd/fault_inject.h28
-rw-r--r--fs/nfsd/idmap.h8
-rw-r--r--fs/nfsd/netns.h40
-rw-r--r--fs/nfsd/nfs3xdr.c22
-rw-r--r--fs/nfsd/nfs4callback.c34
-rw-r--r--fs/nfsd/nfs4idmap.c163
-rw-r--r--fs/nfsd/nfs4proc.c140
-rw-r--r--fs/nfsd/nfs4recover.c675
-rw-r--r--fs/nfsd/nfs4state.c1246
-rw-r--r--fs/nfsd/nfs4xdr.c197
-rw-r--r--fs/nfsd/nfsctl.c103
-rw-r--r--fs/nfsd/nfsd.h27
-rw-r--r--fs/nfsd/nfsfh.c6
-rw-r--r--fs/nfsd/nfsfh.h2
-rw-r--r--fs/nfsd/nfssvc.c77
-rw-r--r--fs/nfsd/state.h49
-rw-r--r--fs/nfsd/stats.c5
-rw-r--r--fs/nfsd/vfs.c103
-rw-r--r--fs/nfsd/vfs.h14
-rw-r--r--fs/nfsd/xdr4.h40
-rw-r--r--fs/nilfs2/cpfile.c94
-rw-r--r--fs/nilfs2/dat.c38
-rw-r--r--fs/nilfs2/dir.c6
-rw-r--r--fs/nilfs2/file.c24
-rw-r--r--fs/nilfs2/gcinode.c2
-rw-r--r--fs/nilfs2/ifile.c4
-rw-r--r--fs/nilfs2/inode.c6
-rw-r--r--fs/nilfs2/ioctl.c32
-rw-r--r--fs/nilfs2/mdt.c4
-rw-r--r--fs/nilfs2/namei.c41
-rw-r--r--fs/nilfs2/nilfs.h2
-rw-r--r--fs/nilfs2/page.c8
-rw-r--r--fs/nilfs2/recovery.c4
-rw-r--r--fs/nilfs2/segbuf.c4
-rw-r--r--fs/nilfs2/segment.c4
-rw-r--r--fs/nilfs2/sufile.c68
-rw-r--r--fs/nilfs2/super.c12
-rw-r--r--fs/nilfs2/the_nilfs.c7
-rw-r--r--fs/nls/Kconfig157
-rw-r--r--fs/nls/Makefile11
-rw-r--r--fs/nls/mac-celtic.c602
-rw-r--r--fs/nls/mac-centeuro.c532
-rw-r--r--fs/nls/mac-croatian.c602
-rw-r--r--fs/nls/mac-cyrillic.c497
-rw-r--r--fs/nls/mac-gaelic.c567
-rw-r--r--fs/nls/mac-greek.c497
-rw-r--r--fs/nls/mac-iceland.c602
-rw-r--r--fs/nls/mac-inuit.c532
-rw-r--r--fs/nls/mac-roman.c637
-rw-r--r--fs/nls/mac-romanian.c602
-rw-r--r--fs/nls/mac-turkish.c602
-rw-r--r--fs/nls/nls_base.c73
-rw-r--r--fs/notify/fanotify/fanotify_user.c6
-rw-r--r--fs/notify/fsnotify.c17
-rw-r--r--fs/notify/mark.c8
-rw-r--r--fs/notify/notification.c3
-rw-r--r--fs/notify/vfsmount_mark.c19
-rw-r--r--fs/ntfs/aops.c20
-rw-r--r--fs/ntfs/attrib.c26
-rw-r--r--fs/ntfs/file.c20
-rw-r--r--fs/ntfs/inode.c11
-rw-r--r--fs/ntfs/inode.h2
-rw-r--r--fs/ntfs/layout.h4
-rw-r--r--fs/ntfs/mft.c6
-rw-r--r--fs/ntfs/super.c29
-rw-r--r--fs/ntfs/volume.h4
-rw-r--r--fs/ocfs2/alloc.c2
-rw-r--r--fs/ocfs2/aops.c16
-rw-r--r--fs/ocfs2/blockcheck.c42
-rw-r--r--fs/ocfs2/cluster/netdebug.c2
-rw-r--r--fs/ocfs2/cluster/tcp.c2
-rw-r--r--fs/ocfs2/dlm/dlmast.c2
-rw-r--r--fs/ocfs2/dlm/dlmcommon.h6
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c2
-rw-r--r--fs/ocfs2/dlmfs/dlmfs.c40
-rw-r--r--fs/ocfs2/dlmglue.c33
-rw-r--r--fs/ocfs2/export.c19
-rw-r--r--fs/ocfs2/extent_map.c2
-rw-r--r--fs/ocfs2/file.c8
-rw-r--r--fs/ocfs2/inode.c15
-rw-r--r--fs/ocfs2/ioctl.c37
-rw-r--r--fs/ocfs2/move_extents.c10
-rw-r--r--fs/ocfs2/namei.c15
-rw-r--r--fs/ocfs2/quota_global.c2
-rw-r--r--fs/ocfs2/refcounttree.c12
-rw-r--r--fs/ocfs2/stack_user.c4
-rw-r--r--fs/ocfs2/suballoc.c4
-rw-r--r--fs/ocfs2/super.c61
-rw-r--r--fs/ocfs2/symlink.c115
-rw-r--r--fs/ocfs2/symlink.h2
-rw-r--r--fs/ocfs2/xattr.c2
-rw-r--r--fs/ocfs2/xattr.h2
-rw-r--r--fs/omfs/dir.c6
-rw-r--r--fs/omfs/inode.c10
-rw-r--r--fs/omfs/omfs.h2
-rw-r--r--fs/open.c126
-rw-r--r--fs/openpromfs/inode.c4
-rw-r--r--fs/partitions/Kconfig251
-rw-r--r--fs/partitions/Makefile20
-rw-r--r--fs/partitions/acorn.c556
-rw-r--r--fs/partitions/acorn.h14
-rw-r--r--fs/partitions/amiga.c139
-rw-r--r--fs/partitions/amiga.h6
-rw-r--r--fs/partitions/atari.c149
-rw-r--r--fs/partitions/atari.h34
-rw-r--r--fs/partitions/check.c687
-rw-r--r--fs/partitions/check.h49
-rw-r--r--fs/partitions/efi.c675
-rw-r--r--fs/partitions/efi.h134
-rw-r--r--fs/partitions/ibm.c275
-rw-r--r--fs/partitions/ibm.h1
-rw-r--r--fs/partitions/karma.c57
-rw-r--r--fs/partitions/karma.h8
-rw-r--r--fs/partitions/ldm.c1570
-rw-r--r--fs/partitions/ldm.h215
-rw-r--r--fs/partitions/mac.c134
-rw-r--r--fs/partitions/mac.h44
-rw-r--r--fs/partitions/msdos.c552
-rw-r--r--fs/partitions/msdos.h8
-rw-r--r--fs/partitions/osf.c86
-rw-r--r--fs/partitions/osf.h7
-rw-r--r--fs/partitions/sgi.c82
-rw-r--r--fs/partitions/sgi.h8
-rw-r--r--fs/partitions/sun.c122
-rw-r--r--fs/partitions/sun.h8
-rw-r--r--fs/partitions/sysv68.c95
-rw-r--r--fs/partitions/sysv68.h1
-rw-r--r--fs/partitions/ultrix.c48
-rw-r--r--fs/partitions/ultrix.h5
-rw-r--r--fs/pipe.c58
-rw-r--r--fs/pnode.c124
-rw-r--r--fs/pnode.h36
-rw-r--r--fs/posix_acl.c2
-rw-r--r--fs/proc/array.c280
-rw-r--r--fs/proc/base.c997
-rw-r--r--fs/proc/generic.c8
-rw-r--r--fs/proc/inode.c38
-rw-r--r--fs/proc/internal.h16
-rw-r--r--fs/proc/kcore.c8
-rw-r--r--fs/proc/namespaces.c9
-rw-r--r--fs/proc/page.c2
-rw-r--r--fs/proc/proc_net.c2
-rw-r--r--fs/proc/proc_sysctl.c1276
-rw-r--r--fs/proc/root.c71
-rw-r--r--fs/proc/stat.c151
-rw-r--r--fs/proc/task_mmu.c429
-rw-r--r--fs/proc/task_nommu.c71
-rw-r--r--fs/proc/uptime.c11
-rw-r--r--fs/proc/vmcore.c23
-rw-r--r--fs/proc_namespace.c333
-rw-r--r--fs/pstore/Kconfig17
-rw-r--r--fs/pstore/Makefile3
-rw-r--r--fs/pstore/inode.c64
-rw-r--r--fs/pstore/platform.c98
-rw-r--r--fs/pstore/ram.c386
-rw-r--r--fs/pstore/ram_core.c539
-rw-r--r--fs/qnx4/inode.c150
-rw-r--r--fs/qnx4/namei.c9
-rw-r--r--fs/qnx4/qnx4.h2
-rw-r--r--fs/qnx6/Kconfig26
-rw-r--r--fs/qnx6/Makefile7
-rw-r--r--fs/qnx6/README8
-rw-r--r--fs/qnx6/dir.c291
-rw-r--r--fs/qnx6/inode.c698
-rw-r--r--fs/qnx6/namei.c42
-rw-r--r--fs/qnx6/qnx6.h135
-rw-r--r--fs/qnx6/super_mmi.c150
-rw-r--r--fs/quota/dquot.c233
-rw-r--r--fs/quota/quota.c28
-rw-r--r--fs/ramfs/file-nommu.c1
-rw-r--r--fs/ramfs/inode.c38
-rw-r--r--fs/read_write.c9
-rw-r--r--fs/readdir.c35
-rw-r--r--fs/reiserfs/acl.h76
-rw-r--r--fs/reiserfs/bitmap.c98
-rw-r--r--fs/reiserfs/dir.c2
-rw-r--r--fs/reiserfs/do_balan.c2
-rw-r--r--fs/reiserfs/file.c6
-rw-r--r--fs/reiserfs/fix_node.c2
-rw-r--r--fs/reiserfs/hashes.c2
-rw-r--r--fs/reiserfs/ibalance.c2
-rw-r--r--fs/reiserfs/inode.c42
-rw-r--r--fs/reiserfs/ioctl.c10
-rw-r--r--fs/reiserfs/item_ops.c2
-rw-r--r--fs/reiserfs/journal.c82
-rw-r--r--fs/reiserfs/lbalance.c4
-rw-r--r--fs/reiserfs/lock.c2
-rw-r--r--fs/reiserfs/namei.c14
-rw-r--r--fs/reiserfs/objectid.c3
-rw-r--r--fs/reiserfs/prints.c4
-rw-r--r--fs/reiserfs/procfs.c3
-rw-r--r--fs/reiserfs/reiserfs.h2929
-rw-r--r--fs/reiserfs/resize.c4
-rw-r--r--fs/reiserfs/stree.c6
-rw-r--r--fs/reiserfs/super.c286
-rw-r--r--fs/reiserfs/tail_conversion.c6
-rw-r--r--fs/reiserfs/xattr.c8
-rw-r--r--fs/reiserfs/xattr.h122
-rw-r--r--fs/reiserfs/xattr_acl.c6
-rw-r--r--fs/reiserfs/xattr_security.c4
-rw-r--r--fs/reiserfs/xattr_trusted.c4
-rw-r--r--fs/reiserfs/xattr_user.c4
-rw-r--r--fs/romfs/mmap-nommu.c28
-rw-r--r--fs/romfs/storage.c2
-rw-r--r--fs/romfs/super.c7
-rw-r--r--fs/select.c50
-rw-r--r--fs/seq_file.c124
-rw-r--r--fs/signalfd.c22
-rw-r--r--fs/splice.c60
-rw-r--r--fs/squashfs/block.c3
-rw-r--r--fs/squashfs/cache.c30
-rw-r--r--fs/squashfs/dir.c7
-rw-r--r--fs/squashfs/file.c8
-rw-r--r--fs/squashfs/inode.c4
-rw-r--r--fs/squashfs/namei.c5
-rw-r--r--fs/squashfs/squashfs_fs.h19
-rw-r--r--fs/squashfs/squashfs_fs_sb.h1
-rw-r--r--fs/squashfs/super.c11
-rw-r--r--fs/squashfs/symlink.c4
-rw-r--r--fs/stack.c2
-rw-r--r--fs/stat.c65
-rw-r--r--fs/statfs.c28
-rw-r--r--fs/super.c99
-rw-r--r--fs/sync.c8
-rw-r--r--fs/sysfs/dir.c264
-rw-r--r--fs/sysfs/file.c10
-rw-r--r--fs/sysfs/group.c8
-rw-r--r--fs/sysfs/inode.c24
-rw-r--r--fs/sysfs/mount.c5
-rw-r--r--fs/sysfs/sysfs.h21
-rw-r--r--fs/sysv/ialloc.c2
-rw-r--r--fs/sysv/inode.c3
-rw-r--r--fs/sysv/itree.c2
-rw-r--r--fs/sysv/namei.c18
-rw-r--r--fs/sysv/super.c27
-rw-r--r--fs/sysv/sysv.h3
-rw-r--r--fs/ubifs/Kconfig23
-rw-r--r--fs/ubifs/Makefile5
-rw-r--r--fs/ubifs/commit.c14
-rw-r--r--fs/ubifs/debug.c646
-rw-r--r--fs/ubifs/debug.h249
-rw-r--r--fs/ubifs/dir.c53
-rw-r--r--fs/ubifs/file.c8
-rw-r--r--fs/ubifs/find.c4
-rw-r--r--fs/ubifs/gc.c2
-rw-r--r--fs/ubifs/io.c74
-rw-r--r--fs/ubifs/ioctl.c4
-rw-r--r--fs/ubifs/journal.c17
-rw-r--r--fs/ubifs/log.c18
-rw-r--r--fs/ubifs/lprops.c18
-rw-r--r--fs/ubifs/lpt.c84
-rw-r--r--fs/ubifs/lpt_commit.c86
-rw-r--r--fs/ubifs/master.c8
-rw-r--r--fs/ubifs/orphan.c25
-rw-r--r--fs/ubifs/recovery.c46
-rw-r--r--fs/ubifs/replay.c35
-rw-r--r--fs/ubifs/sb.c53
-rw-r--r--fs/ubifs/scan.c14
-rw-r--r--fs/ubifs/super.c44
-rw-r--r--fs/ubifs/tnc.c86
-rw-r--r--fs/ubifs/tnc_commit.c28
-rw-r--r--fs/ubifs/tnc_misc.c46
-rw-r--r--fs/ubifs/ubifs.h39
-rw-r--r--fs/ubifs/xattr.c16
-rw-r--r--fs/udf/balloc.c84
-rw-r--r--fs/udf/file.c12
-rw-r--r--fs/udf/ialloc.c3
-rw-r--r--fs/udf/inode.c85
-rw-r--r--fs/udf/namei.c35
-rw-r--r--fs/udf/super.c138
-rw-r--r--fs/udf/symlink.c14
-rw-r--r--fs/udf/udf_i.h1
-rw-r--r--fs/udf/udf_sb.h8
-rw-r--r--fs/udf/udfdecl.h2
-rw-r--r--fs/ufs/ialloc.c2
-rw-r--r--fs/ufs/inode.c7
-rw-r--r--fs/ufs/namei.c20
-rw-r--r--fs/ufs/super.c18
-rw-r--r--fs/ufs/ufs.h2
-rw-r--r--fs/utimes.c5
-rw-r--r--fs/xattr.c66
-rw-r--r--fs/xattr_acl.c2
-rw-r--r--fs/xfs/Makefile5
-rw-r--r--fs/xfs/kmem.c10
-rw-r--r--fs/xfs/kmem.h27
-rw-r--r--fs/xfs/xfs_acl.c2
-rw-r--r--fs/xfs/xfs_ag.h18
-rw-r--r--fs/xfs/xfs_alloc.c633
-rw-r--r--fs/xfs/xfs_alloc.h40
-rw-r--r--fs/xfs/xfs_alloc_btree.c9
-rw-r--r--fs/xfs/xfs_aops.c429
-rw-r--r--fs/xfs/xfs_aops.h4
-rw-r--r--fs/xfs/xfs_attr.c45
-rw-r--r--fs/xfs/xfs_attr_leaf.c52
-rw-r--r--fs/xfs/xfs_bmap.c170
-rw-r--r--fs/xfs/xfs_bmap.h3
-rw-r--r--fs/xfs/xfs_bmap_btree.c1
-rw-r--r--fs/xfs/xfs_btree.c1
-rw-r--r--fs/xfs/xfs_buf.c661
-rw-r--r--fs/xfs/xfs_buf.h99
-rw-r--r--fs/xfs/xfs_buf_item.c123
-rw-r--r--fs/xfs/xfs_da_btree.c49
-rw-r--r--fs/xfs/xfs_dfrag.c69
-rw-r--r--fs/xfs/xfs_dir2.c1
-rw-r--r--fs/xfs/xfs_dir2_block.c2
-rw-r--r--fs/xfs/xfs_dir2_data.c1
-rw-r--r--fs/xfs/xfs_dir2_leaf.c1
-rw-r--r--fs/xfs/xfs_dir2_node.c1
-rw-r--r--fs/xfs/xfs_dir2_sf.c1
-rw-r--r--fs/xfs/xfs_discard.c71
-rw-r--r--fs/xfs/xfs_dquot.c930
-rw-r--r--fs/xfs/xfs_dquot.h83
-rw-r--r--fs/xfs/xfs_dquot_item.c165
-rw-r--r--fs/xfs/xfs_error.c1
-rw-r--r--fs/xfs/xfs_export.c24
-rw-r--r--fs/xfs/xfs_extent_busy.c603
-rw-r--r--fs/xfs/xfs_extent_busy.h69
-rw-r--r--fs/xfs/xfs_extfree_item.c59
-rw-r--r--fs/xfs/xfs_file.c580
-rw-r--r--fs/xfs/xfs_fs_subr.c2
-rw-r--r--fs/xfs/xfs_fsops.c82
-rw-r--r--fs/xfs/xfs_ialloc.c14
-rw-r--r--fs/xfs/xfs_ialloc.h11
-rw-r--r--fs/xfs/xfs_ialloc_btree.c1
-rw-r--r--fs/xfs/xfs_iget.c96
-rw-r--r--fs/xfs/xfs_inode.c421
-rw-r--r--fs/xfs/xfs_inode.h146
-rw-r--r--fs/xfs/xfs_inode_item.c480
-rw-r--r--fs/xfs/xfs_inode_item.h18
-rw-r--r--fs/xfs/xfs_inum.h5
-rw-r--r--fs/xfs/xfs_ioctl.c38
-rw-r--r--fs/xfs/xfs_ioctl32.c12
-rw-r--r--fs/xfs/xfs_iomap.c124
-rw-r--r--fs/xfs/xfs_iops.c140
-rw-r--r--fs/xfs/xfs_itable.c25
-rw-r--r--fs/xfs/xfs_log.c800
-rw-r--r--fs/xfs/xfs_log.h25
-rw-r--r--fs/xfs/xfs_log_cil.c361
-rw-r--r--fs/xfs/xfs_log_priv.h74
-rw-r--r--fs/xfs/xfs_log_recover.c186
-rw-r--r--fs/xfs/xfs_message.c1
-rw-r--r--fs/xfs/xfs_mount.c85
-rw-r--r--fs/xfs/xfs_mount.h12
-rw-r--r--fs/xfs/xfs_qm.c1283
-rw-r--r--fs/xfs/xfs_qm.h69
-rw-r--r--fs/xfs/xfs_qm_bhv.c44
-rw-r--r--fs/xfs/xfs_qm_stats.c105
-rw-r--r--fs/xfs/xfs_qm_stats.h53
-rw-r--r--fs/xfs/xfs_qm_syscalls.c143
-rw-r--r--fs/xfs/xfs_quota.h14
-rw-r--r--fs/xfs/xfs_quota_priv.h11
-rw-r--r--fs/xfs/xfs_quotaops.c1
-rw-r--r--fs/xfs/xfs_rename.c12
-rw-r--r--fs/xfs/xfs_rtalloc.c19
-rw-r--r--fs/xfs/xfs_rw.c156
-rw-r--r--fs/xfs/xfs_rw.h47
-rw-r--r--fs/xfs/xfs_sb.h1
-rw-r--r--fs/xfs/xfs_stats.c99
-rw-r--r--fs/xfs/xfs_stats.h10
-rw-r--r--fs/xfs/xfs_super.c301
-rw-r--r--fs/xfs/xfs_super.h8
-rw-r--r--fs/xfs/xfs_sync.c322
-rw-r--r--fs/xfs/xfs_sync.h2
-rw-r--r--fs/xfs/xfs_trace.c2
-rw-r--r--fs/xfs/xfs_trace.h213
-rw-r--r--fs/xfs/xfs_trans.c519
-rw-r--r--fs/xfs/xfs_trans.h23
-rw-r--r--fs/xfs/xfs_trans_ail.c258
-rw-r--r--fs/xfs/xfs_trans_buf.c151
-rw-r--r--fs/xfs/xfs_trans_dquot.c21
-rw-r--r--fs/xfs/xfs_trans_extfree.c1
-rw-r--r--fs/xfs/xfs_trans_inode.c10
-rw-r--r--fs/xfs/xfs_trans_priv.h15
-rw-r--r--fs/xfs/xfs_types.h5
-rw-r--r--fs/xfs/xfs_utils.c6
-rw-r--r--fs/xfs/xfs_utils.h2
-rw-r--r--fs/xfs/xfs_vnode.h1
-rw-r--r--fs/xfs/xfs_vnodeops.c98
-rw-r--r--fs/xfs/xfs_vnodeops.h7
916 files changed, 69471 insertions, 38924 deletions
diff --git a/fs/9p/cache.c b/fs/9p/cache.c
index 945aa5f02f9..a9ea73d6dcf 100644
--- a/fs/9p/cache.c
+++ b/fs/9p/cache.c
@@ -62,8 +62,8 @@ static uint16_t v9fs_cache_session_get_key(const void *cookie_netfs_data,
uint16_t klen = 0;
v9ses = (struct v9fs_session_info *)cookie_netfs_data;
- P9_DPRINTK(P9_DEBUG_FSC, "session %p buf %p size %u", v9ses,
- buffer, bufmax);
+ p9_debug(P9_DEBUG_FSC, "session %p buf %p size %u\n",
+ v9ses, buffer, bufmax);
if (v9ses->cachetag)
klen = strlen(v9ses->cachetag);
@@ -72,7 +72,7 @@ static uint16_t v9fs_cache_session_get_key(const void *cookie_netfs_data,
return 0;
memcpy(buffer, v9ses->cachetag, klen);
- P9_DPRINTK(P9_DEBUG_FSC, "cache session tag %s", v9ses->cachetag);
+ p9_debug(P9_DEBUG_FSC, "cache session tag %s\n", v9ses->cachetag);
return klen;
}
@@ -91,14 +91,14 @@ void v9fs_cache_session_get_cookie(struct v9fs_session_info *v9ses)
v9ses->fscache = fscache_acquire_cookie(v9fs_cache_netfs.primary_index,
&v9fs_cache_session_index_def,
v9ses);
- P9_DPRINTK(P9_DEBUG_FSC, "session %p get cookie %p", v9ses,
- v9ses->fscache);
+ p9_debug(P9_DEBUG_FSC, "session %p get cookie %p\n",
+ v9ses, v9ses->fscache);
}
void v9fs_cache_session_put_cookie(struct v9fs_session_info *v9ses)
{
- P9_DPRINTK(P9_DEBUG_FSC, "session %p put cookie %p", v9ses,
- v9ses->fscache);
+ p9_debug(P9_DEBUG_FSC, "session %p put cookie %p\n",
+ v9ses, v9ses->fscache);
fscache_relinquish_cookie(v9ses->fscache, 0);
v9ses->fscache = NULL;
}
@@ -109,8 +109,8 @@ static uint16_t v9fs_cache_inode_get_key(const void *cookie_netfs_data,
{
const struct v9fs_inode *v9inode = cookie_netfs_data;
memcpy(buffer, &v9inode->qid.path, sizeof(v9inode->qid.path));
- P9_DPRINTK(P9_DEBUG_FSC, "inode %p get key %llu", &v9inode->vfs_inode,
- v9inode->qid.path);
+ p9_debug(P9_DEBUG_FSC, "inode %p get key %llu\n",
+ &v9inode->vfs_inode, v9inode->qid.path);
return sizeof(v9inode->qid.path);
}
@@ -120,8 +120,8 @@ static void v9fs_cache_inode_get_attr(const void *cookie_netfs_data,
const struct v9fs_inode *v9inode = cookie_netfs_data;
*size = i_size_read(&v9inode->vfs_inode);
- P9_DPRINTK(P9_DEBUG_FSC, "inode %p get attr %llu", &v9inode->vfs_inode,
- *size);
+ p9_debug(P9_DEBUG_FSC, "inode %p get attr %llu\n",
+ &v9inode->vfs_inode, *size);
}
static uint16_t v9fs_cache_inode_get_aux(const void *cookie_netfs_data,
@@ -129,8 +129,8 @@ static uint16_t v9fs_cache_inode_get_aux(const void *cookie_netfs_data,
{
const struct v9fs_inode *v9inode = cookie_netfs_data;
memcpy(buffer, &v9inode->qid.version, sizeof(v9inode->qid.version));
- P9_DPRINTK(P9_DEBUG_FSC, "inode %p get aux %u", &v9inode->vfs_inode,
- v9inode->qid.version);
+ p9_debug(P9_DEBUG_FSC, "inode %p get aux %u\n",
+ &v9inode->vfs_inode, v9inode->qid.version);
return sizeof(v9inode->qid.version);
}
@@ -206,8 +206,8 @@ void v9fs_cache_inode_get_cookie(struct inode *inode)
&v9fs_cache_inode_index_def,
v9inode);
- P9_DPRINTK(P9_DEBUG_FSC, "inode %p get cookie %p", inode,
- v9inode->fscache);
+ p9_debug(P9_DEBUG_FSC, "inode %p get cookie %p\n",
+ inode, v9inode->fscache);
}
void v9fs_cache_inode_put_cookie(struct inode *inode)
@@ -216,8 +216,8 @@ void v9fs_cache_inode_put_cookie(struct inode *inode)
if (!v9inode->fscache)
return;
- P9_DPRINTK(P9_DEBUG_FSC, "inode %p put cookie %p", inode,
- v9inode->fscache);
+ p9_debug(P9_DEBUG_FSC, "inode %p put cookie %p\n",
+ inode, v9inode->fscache);
fscache_relinquish_cookie(v9inode->fscache, 0);
v9inode->fscache = NULL;
@@ -229,8 +229,8 @@ void v9fs_cache_inode_flush_cookie(struct inode *inode)
if (!v9inode->fscache)
return;
- P9_DPRINTK(P9_DEBUG_FSC, "inode %p flush cookie %p", inode,
- v9inode->fscache);
+ p9_debug(P9_DEBUG_FSC, "inode %p flush cookie %p\n",
+ inode, v9inode->fscache);
fscache_relinquish_cookie(v9inode->fscache, 1);
v9inode->fscache = NULL;
@@ -272,8 +272,8 @@ void v9fs_cache_inode_reset_cookie(struct inode *inode)
v9inode->fscache = fscache_acquire_cookie(v9ses->fscache,
&v9fs_cache_inode_index_def,
v9inode);
- P9_DPRINTK(P9_DEBUG_FSC, "inode %p revalidating cookie old %p new %p",
- inode, old, v9inode->fscache);
+ p9_debug(P9_DEBUG_FSC, "inode %p revalidating cookie old %p new %p\n",
+ inode, old, v9inode->fscache);
spin_unlock(&v9inode->fscache_lock);
}
@@ -323,7 +323,7 @@ int __v9fs_readpage_from_fscache(struct inode *inode, struct page *page)
int ret;
const struct v9fs_inode *v9inode = V9FS_I(inode);
- P9_DPRINTK(P9_DEBUG_FSC, "inode %p page %p", inode, page);
+ p9_debug(P9_DEBUG_FSC, "inode %p page %p\n", inode, page);
if (!v9inode->fscache)
return -ENOBUFS;
@@ -335,13 +335,13 @@ int __v9fs_readpage_from_fscache(struct inode *inode, struct page *page)
switch (ret) {
case -ENOBUFS:
case -ENODATA:
- P9_DPRINTK(P9_DEBUG_FSC, "page/inode not in cache %d", ret);
+ p9_debug(P9_DEBUG_FSC, "page/inode not in cache %d\n", ret);
return 1;
case 0:
- P9_DPRINTK(P9_DEBUG_FSC, "BIO submitted");
+ p9_debug(P9_DEBUG_FSC, "BIO submitted\n");
return ret;
default:
- P9_DPRINTK(P9_DEBUG_FSC, "ret %d", ret);
+ p9_debug(P9_DEBUG_FSC, "ret %d\n", ret);
return ret;
}
}
@@ -361,7 +361,7 @@ int __v9fs_readpages_from_fscache(struct inode *inode,
int ret;
const struct v9fs_inode *v9inode = V9FS_I(inode);
- P9_DPRINTK(P9_DEBUG_FSC, "inode %p pages %u", inode, *nr_pages);
+ p9_debug(P9_DEBUG_FSC, "inode %p pages %u\n", inode, *nr_pages);
if (!v9inode->fscache)
return -ENOBUFS;
@@ -373,15 +373,15 @@ int __v9fs_readpages_from_fscache(struct inode *inode,
switch (ret) {
case -ENOBUFS:
case -ENODATA:
- P9_DPRINTK(P9_DEBUG_FSC, "pages/inodes not in cache %d", ret);
+ p9_debug(P9_DEBUG_FSC, "pages/inodes not in cache %d\n", ret);
return 1;
case 0:
BUG_ON(!list_empty(pages));
BUG_ON(*nr_pages != 0);
- P9_DPRINTK(P9_DEBUG_FSC, "BIO submitted");
+ p9_debug(P9_DEBUG_FSC, "BIO submitted\n");
return ret;
default:
- P9_DPRINTK(P9_DEBUG_FSC, "ret %d", ret);
+ p9_debug(P9_DEBUG_FSC, "ret %d\n", ret);
return ret;
}
}
@@ -396,9 +396,9 @@ void __v9fs_readpage_to_fscache(struct inode *inode, struct page *page)
int ret;
const struct v9fs_inode *v9inode = V9FS_I(inode);
- P9_DPRINTK(P9_DEBUG_FSC, "inode %p page %p", inode, page);
+ p9_debug(P9_DEBUG_FSC, "inode %p page %p\n", inode, page);
ret = fscache_write_page(v9inode->fscache, page, GFP_KERNEL);
- P9_DPRINTK(P9_DEBUG_FSC, "ret = %d", ret);
+ p9_debug(P9_DEBUG_FSC, "ret = %d\n", ret);
if (ret != 0)
v9fs_uncache_page(inode, page);
}
@@ -409,7 +409,7 @@ void __v9fs_readpage_to_fscache(struct inode *inode, struct page *page)
void __v9fs_fscache_wait_on_page_write(struct inode *inode, struct page *page)
{
const struct v9fs_inode *v9inode = V9FS_I(inode);
- P9_DPRINTK(P9_DEBUG_FSC, "inode %p page %p", inode, page);
+ p9_debug(P9_DEBUG_FSC, "inode %p page %p\n", inode, page);
if (PageFsCache(page))
fscache_wait_on_page_write(v9inode->fscache, page);
}
diff --git a/fs/9p/fid.c b/fs/9p/fid.c
index 85b67ffa2a4..da8eefbe830 100644
--- a/fs/9p/fid.c
+++ b/fs/9p/fid.c
@@ -45,8 +45,8 @@ int v9fs_fid_add(struct dentry *dentry, struct p9_fid *fid)
{
struct v9fs_dentry *dent;
- P9_DPRINTK(P9_DEBUG_VFS, "fid %d dentry %s\n",
- fid->fid, dentry->d_name.name);
+ p9_debug(P9_DEBUG_VFS, "fid %d dentry %s\n",
+ fid->fid, dentry->d_name.name);
dent = dentry->d_fsdata;
if (!dent) {
@@ -79,8 +79,8 @@ static struct p9_fid *v9fs_fid_find(struct dentry *dentry, u32 uid, int any)
struct v9fs_dentry *dent;
struct p9_fid *fid, *ret;
- P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p) uid %d any %d\n",
- dentry->d_name.name, dentry, uid, any);
+ p9_debug(P9_DEBUG_VFS, " dentry: %s (%p) uid %d any %d\n",
+ dentry->d_name.name, dentry, uid, any);
dent = (struct v9fs_dentry *) dentry->d_fsdata;
ret = NULL;
if (dent) {
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 2b78014a124..b85efa77394 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -23,6 +23,8 @@
*
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/fs.h>
@@ -85,15 +87,15 @@ static int get_cache_mode(char *s)
if (!strcmp(s, "loose")) {
version = CACHE_LOOSE;
- P9_DPRINTK(P9_DEBUG_9P, "Cache mode: loose\n");
+ p9_debug(P9_DEBUG_9P, "Cache mode: loose\n");
} else if (!strcmp(s, "fscache")) {
version = CACHE_FSCACHE;
- P9_DPRINTK(P9_DEBUG_9P, "Cache mode: fscache\n");
+ p9_debug(P9_DEBUG_9P, "Cache mode: fscache\n");
} else if (!strcmp(s, "none")) {
version = CACHE_NONE;
- P9_DPRINTK(P9_DEBUG_9P, "Cache mode: none\n");
+ p9_debug(P9_DEBUG_9P, "Cache mode: none\n");
} else
- printk(KERN_INFO "9p: Unknown Cache mode %s.\n", s);
+ pr_info("Unknown Cache mode %s\n", s);
return version;
}
@@ -140,8 +142,8 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
case Opt_debug:
r = match_int(&args[0], &option);
if (r < 0) {
- P9_DPRINTK(P9_DEBUG_ERROR,
- "integer field, but no integer?\n");
+ p9_debug(P9_DEBUG_ERROR,
+ "integer field, but no integer?\n");
ret = r;
continue;
}
@@ -154,8 +156,8 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
case Opt_dfltuid:
r = match_int(&args[0], &option);
if (r < 0) {
- P9_DPRINTK(P9_DEBUG_ERROR,
- "integer field, but no integer?\n");
+ p9_debug(P9_DEBUG_ERROR,
+ "integer field, but no integer?\n");
ret = r;
continue;
}
@@ -164,8 +166,8 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
case Opt_dfltgid:
r = match_int(&args[0], &option);
if (r < 0) {
- P9_DPRINTK(P9_DEBUG_ERROR,
- "integer field, but no integer?\n");
+ p9_debug(P9_DEBUG_ERROR,
+ "integer field, but no integer?\n");
ret = r;
continue;
}
@@ -174,8 +176,8 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
case Opt_afid:
r = match_int(&args[0], &option);
if (r < 0) {
- P9_DPRINTK(P9_DEBUG_ERROR,
- "integer field, but no integer?\n");
+ p9_debug(P9_DEBUG_ERROR,
+ "integer field, but no integer?\n");
ret = r;
continue;
}
@@ -205,8 +207,8 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
s = match_strdup(&args[0]);
if (!s) {
ret = -ENOMEM;
- P9_DPRINTK(P9_DEBUG_ERROR,
- "problem allocating copy of cache arg\n");
+ p9_debug(P9_DEBUG_ERROR,
+ "problem allocating copy of cache arg\n");
goto free_and_return;
}
ret = get_cache_mode(s);
@@ -223,8 +225,8 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
s = match_strdup(&args[0]);
if (!s) {
ret = -ENOMEM;
- P9_DPRINTK(P9_DEBUG_ERROR,
- "problem allocating copy of access arg\n");
+ p9_debug(P9_DEBUG_ERROR,
+ "problem allocating copy of access arg\n");
goto free_and_return;
}
@@ -240,8 +242,8 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
v9ses->uid = simple_strtoul(s, &e, 10);
if (*e != '\0') {
ret = -EINVAL;
- printk(KERN_INFO "9p: Unknown access "
- "argument %s.\n", s);
+ pr_info("Unknown access argument %s\n",
+ s);
kfree(s);
goto free_and_return;
}
@@ -254,9 +256,8 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
#ifdef CONFIG_9P_FS_POSIX_ACL
v9ses->flags |= V9FS_POSIX_ACL;
#else
- P9_DPRINTK(P9_DEBUG_ERROR,
- "Not defined CONFIG_9P_FS_POSIX_ACL. "
- "Ignoring posixacl option\n");
+ p9_debug(P9_DEBUG_ERROR,
+ "Not defined CONFIG_9P_FS_POSIX_ACL. Ignoring posixacl option\n");
#endif
break;
@@ -318,7 +319,7 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
if (IS_ERR(v9ses->clnt)) {
retval = PTR_ERR(v9ses->clnt);
v9ses->clnt = NULL;
- P9_DPRINTK(P9_DEBUG_ERROR, "problem initializing 9p client\n");
+ p9_debug(P9_DEBUG_ERROR, "problem initializing 9p client\n");
goto error;
}
@@ -371,7 +372,7 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
if (IS_ERR(fid)) {
retval = PTR_ERR(fid);
fid = NULL;
- P9_DPRINTK(P9_DEBUG_ERROR, "cannot attach\n");
+ p9_debug(P9_DEBUG_ERROR, "cannot attach\n");
goto error;
}
@@ -429,7 +430,7 @@ void v9fs_session_close(struct v9fs_session_info *v9ses)
*/
void v9fs_session_cancel(struct v9fs_session_info *v9ses) {
- P9_DPRINTK(P9_DEBUG_ERROR, "cancel session %p\n", v9ses);
+ p9_debug(P9_DEBUG_ERROR, "cancel session %p\n", v9ses);
p9_client_disconnect(v9ses->clnt);
}
@@ -442,7 +443,7 @@ void v9fs_session_cancel(struct v9fs_session_info *v9ses) {
void v9fs_session_begin_cancel(struct v9fs_session_info *v9ses)
{
- P9_DPRINTK(P9_DEBUG_ERROR, "begin cancel session %p\n", v9ses);
+ p9_debug(P9_DEBUG_ERROR, "begin cancel session %p\n", v9ses);
p9_client_begin_disconnect(v9ses->clnt);
}
@@ -591,23 +592,23 @@ static void v9fs_cache_unregister(void)
static int __init init_v9fs(void)
{
int err;
- printk(KERN_INFO "Installing v9fs 9p2000 file system support\n");
+ pr_info("Installing v9fs 9p2000 file system support\n");
/* TODO: Setup list of registered trasnport modules */
- err = register_filesystem(&v9fs_fs_type);
- if (err < 0) {
- printk(KERN_ERR "Failed to register filesystem\n");
- return err;
- }
err = v9fs_cache_register();
if (err < 0) {
- printk(KERN_ERR "Failed to register v9fs for caching\n");
- goto out_fs_unreg;
+ pr_err("Failed to register v9fs for caching\n");
+ return err;
}
err = v9fs_sysfs_init();
if (err < 0) {
- printk(KERN_ERR "Failed to register with sysfs\n");
+ pr_err("Failed to register with sysfs\n");
+ goto out_cache;
+ }
+ err = register_filesystem(&v9fs_fs_type);
+ if (err < 0) {
+ pr_err("Failed to register filesystem\n");
goto out_sysfs_cleanup;
}
@@ -616,8 +617,8 @@ static int __init init_v9fs(void)
out_sysfs_cleanup:
v9fs_sysfs_cleanup();
-out_fs_unreg:
- unregister_filesystem(&v9fs_fs_type);
+out_cache:
+ v9fs_cache_unregister();
return err;
}
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index 410ffd6ceb5..dc95a252523 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -54,9 +54,9 @@ extern struct kmem_cache *v9fs_inode_cache;
struct inode *v9fs_alloc_inode(struct super_block *sb);
void v9fs_destroy_inode(struct inode *inode);
-struct inode *v9fs_get_inode(struct super_block *sb, int mode, dev_t);
+struct inode *v9fs_get_inode(struct super_block *sb, umode_t mode, dev_t);
int v9fs_init_inode(struct v9fs_session_info *v9ses,
- struct inode *inode, int mode, dev_t);
+ struct inode *inode, umode_t mode, dev_t);
void v9fs_evict_inode(struct inode *inode);
ino_t v9fs_qid2ino(struct p9_qid *qid);
void v9fs_stat2inode(struct p9_wstat *, struct inode *, struct super_block *);
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index 2524e4cbb8e..0ad61c6a65a 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -56,7 +56,7 @@ static int v9fs_fid_readpage(struct p9_fid *fid, struct page *page)
struct inode *inode;
inode = page->mapping->host;
- P9_DPRINTK(P9_DEBUG_VFS, "\n");
+ p9_debug(P9_DEBUG_VFS, "\n");
BUG_ON(!PageLocked(page));
@@ -116,14 +116,14 @@ static int v9fs_vfs_readpages(struct file *filp, struct address_space *mapping,
struct inode *inode;
inode = mapping->host;
- P9_DPRINTK(P9_DEBUG_VFS, "inode: %p file: %p\n", inode, filp);
+ p9_debug(P9_DEBUG_VFS, "inode: %p file: %p\n", inode, filp);
ret = v9fs_readpages_from_fscache(inode, mapping, pages, &nr_pages);
if (ret == 0)
return ret;
ret = read_cache_pages(mapping, pages, (void *)v9fs_vfs_readpage, filp);
- P9_DPRINTK(P9_DEBUG_VFS, " = %d\n", ret);
+ p9_debug(P9_DEBUG_VFS, " = %d\n", ret);
return ret;
}
@@ -263,10 +263,9 @@ v9fs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
* Now that we do caching with cache mode enabled, We need
* to support direct IO
*/
- P9_DPRINTK(P9_DEBUG_VFS, "v9fs_direct_IO: v9fs_direct_IO (%s) "
- "off/no(%lld/%lu) EINVAL\n",
- iocb->ki_filp->f_path.dentry->d_name.name,
- (long long) pos, nr_segs);
+ p9_debug(P9_DEBUG_VFS, "v9fs_direct_IO: v9fs_direct_IO (%s) off/no(%lld/%lu) EINVAL\n",
+ iocb->ki_filp->f_path.dentry->d_name.name,
+ (long long)pos, nr_segs);
return -EINVAL;
}
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
index e022890c6f4..d529437ff44 100644
--- a/fs/9p/vfs_dentry.c
+++ b/fs/9p/vfs_dentry.c
@@ -53,8 +53,8 @@
static int v9fs_dentry_delete(const struct dentry *dentry)
{
- P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_name.name,
- dentry);
+ p9_debug(P9_DEBUG_VFS, " dentry: %s (%p)\n",
+ dentry->d_name.name, dentry);
return 1;
}
@@ -66,8 +66,8 @@ static int v9fs_dentry_delete(const struct dentry *dentry)
*/
static int v9fs_cached_dentry_delete(const struct dentry *dentry)
{
- P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n",
- dentry->d_name.name, dentry);
+ p9_debug(P9_DEBUG_VFS, " dentry: %s (%p)\n",
+ dentry->d_name.name, dentry);
/* Don't cache negative dentries */
if (!dentry->d_inode)
@@ -86,8 +86,8 @@ static void v9fs_dentry_release(struct dentry *dentry)
struct v9fs_dentry *dent;
struct p9_fid *temp, *current_fid;
- P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_name.name,
- dentry);
+ p9_debug(P9_DEBUG_VFS, " dentry: %s (%p)\n",
+ dentry->d_name.name, dentry);
dent = dentry->d_fsdata;
if (dent) {
list_for_each_entry_safe(current_fid, temp, &dent->fidlist,
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index 598fff1a54e..ff911e77965 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -140,7 +140,7 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
int reclen = 0;
struct p9_rdir *rdir;
- P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", filp->f_path.dentry->d_name.name);
+ p9_debug(P9_DEBUG_VFS, "name %s\n", filp->f_path.dentry->d_name.name);
fid = filp->private_data;
buflen = fid->clnt->msize - P9_IOHDRSZ;
@@ -168,7 +168,7 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
err = p9stat_read(fid->clnt, rdir->buf + rdir->head,
rdir->tail - rdir->head, &st);
if (err) {
- P9_DPRINTK(P9_DEBUG_VFS, "returned %d\n", err);
+ p9_debug(P9_DEBUG_VFS, "returned %d\n", err);
err = -EIO;
p9stat_free(&st);
goto unlock_and_exit;
@@ -213,7 +213,7 @@ static int v9fs_dir_readdir_dotl(struct file *filp, void *dirent,
struct p9_dirent curdirent;
u64 oldoffset = 0;
- P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", filp->f_path.dentry->d_name.name);
+ p9_debug(P9_DEBUG_VFS, "name %s\n", filp->f_path.dentry->d_name.name);
fid = filp->private_data;
buflen = fid->clnt->msize - P9_READDIRHDRSZ;
@@ -244,7 +244,7 @@ static int v9fs_dir_readdir_dotl(struct file *filp, void *dirent,
rdir->tail - rdir->head,
&curdirent);
if (err < 0) {
- P9_DPRINTK(P9_DEBUG_VFS, "returned %d\n", err);
+ p9_debug(P9_DEBUG_VFS, "returned %d\n", err);
err = -EIO;
goto unlock_and_exit;
}
@@ -290,9 +290,8 @@ int v9fs_dir_release(struct inode *inode, struct file *filp)
struct p9_fid *fid;
fid = filp->private_data;
- P9_DPRINTK(P9_DEBUG_VFS,
- "v9fs_dir_release: inode: %p filp: %p fid: %d\n",
- inode, filp, fid ? fid->fid : -1);
+ p9_debug(P9_DEBUG_VFS, "inode: %p filp: %p fid: %d\n",
+ inode, filp, fid ? fid->fid : -1);
if (fid)
p9_client_clunk(fid);
return 0;
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 62857a810a7..fc06fd27065 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -61,7 +61,7 @@ int v9fs_file_open(struct inode *inode, struct file *file)
struct p9_fid *fid;
int omode;
- P9_DPRINTK(P9_DEBUG_VFS, "inode: %p file: %p\n", inode, file);
+ p9_debug(P9_DEBUG_VFS, "inode: %p file: %p\n", inode, file);
v9inode = V9FS_I(inode);
v9ses = v9fs_inode2v9ses(inode);
if (v9fs_proto_dotl(v9ses))
@@ -135,7 +135,7 @@ static int v9fs_file_lock(struct file *filp, int cmd, struct file_lock *fl)
int res = 0;
struct inode *inode = filp->f_path.dentry->d_inode;
- P9_DPRINTK(P9_DEBUG_VFS, "filp: %p lock: %p\n", filp, fl);
+ p9_debug(P9_DEBUG_VFS, "filp: %p lock: %p\n", filp, fl);
/* No mandatory locks */
if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK)
@@ -204,7 +204,8 @@ static int v9fs_file_do_lock(struct file *filp, int cmd, struct file_lock *fl)
break;
if (status == P9_LOCK_BLOCKED && !IS_SETLKW(cmd))
break;
- schedule_timeout_interruptible(P9_LOCK_TIMEOUT);
+ if (schedule_timeout_interruptible(P9_LOCK_TIMEOUT) != 0)
+ break;
}
/* map 9p status to VFS status */
@@ -304,8 +305,8 @@ static int v9fs_file_lock_dotl(struct file *filp, int cmd, struct file_lock *fl)
struct inode *inode = filp->f_path.dentry->d_inode;
int ret = -ENOLCK;
- P9_DPRINTK(P9_DEBUG_VFS, "filp: %p cmd:%d lock: %p name: %s\n", filp,
- cmd, fl, filp->f_path.dentry->d_name.name);
+ p9_debug(P9_DEBUG_VFS, "filp: %p cmd:%d lock: %p name: %s\n",
+ filp, cmd, fl, filp->f_path.dentry->d_name.name);
/* No mandatory locks */
if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK)
@@ -340,8 +341,8 @@ static int v9fs_file_flock_dotl(struct file *filp, int cmd,
struct inode *inode = filp->f_path.dentry->d_inode;
int ret = -ENOLCK;
- P9_DPRINTK(P9_DEBUG_VFS, "filp: %p cmd:%d lock: %p name: %s\n", filp,
- cmd, fl, filp->f_path.dentry->d_name.name);
+ p9_debug(P9_DEBUG_VFS, "filp: %p cmd:%d lock: %p name: %s\n",
+ filp, cmd, fl, filp->f_path.dentry->d_name.name);
/* No mandatory locks */
if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK)
@@ -384,8 +385,8 @@ v9fs_fid_readn(struct p9_fid *fid, char *data, char __user *udata, u32 count,
{
int n, total, size;
- P9_DPRINTK(P9_DEBUG_VFS, "fid %d offset %llu count %d\n", fid->fid,
- (long long unsigned) offset, count);
+ p9_debug(P9_DEBUG_VFS, "fid %d offset %llu count %d\n",
+ fid->fid, (long long unsigned)offset, count);
n = 0;
total = 0;
size = fid->iounit ? fid->iounit : fid->clnt->msize - P9_IOHDRSZ;
@@ -443,7 +444,7 @@ v9fs_file_read(struct file *filp, char __user *udata, size_t count,
struct p9_fid *fid;
size_t size;
- P9_DPRINTK(P9_DEBUG_VFS, "count %zu offset %lld\n", count, *offset);
+ p9_debug(P9_DEBUG_VFS, "count %zu offset %lld\n", count, *offset);
fid = filp->private_data;
size = fid->iounit ? fid->iounit : fid->clnt->msize - P9_IOHDRSZ;
@@ -470,8 +471,8 @@ v9fs_file_write_internal(struct inode *inode, struct p9_fid *fid,
loff_t origin = *offset;
unsigned long pg_start, pg_end;
- P9_DPRINTK(P9_DEBUG_VFS, "data %p count %d offset %x\n", data,
- (int)count, (int)*offset);
+ p9_debug(P9_DEBUG_VFS, "data %p count %d offset %x\n",
+ data, (int)count, (int)*offset);
clnt = fid->clnt;
do {
@@ -552,7 +553,7 @@ static int v9fs_file_fsync(struct file *filp, loff_t start, loff_t end,
return retval;
mutex_lock(&inode->i_mutex);
- P9_DPRINTK(P9_DEBUG_VFS, "filp %p datasync %x\n", filp, datasync);
+ p9_debug(P9_DEBUG_VFS, "filp %p datasync %x\n", filp, datasync);
fid = filp->private_data;
v9fs_blank_wstat(&wstat);
@@ -575,8 +576,7 @@ int v9fs_file_fsync_dotl(struct file *filp, loff_t start, loff_t end,
return retval;
mutex_lock(&inode->i_mutex);
- P9_DPRINTK(P9_DEBUG_VFS, "v9fs_file_fsync_dotl: filp %p datasync %x\n",
- filp, datasync);
+ p9_debug(P9_DEBUG_VFS, "filp %p datasync %x\n", filp, datasync);
fid = filp->private_data;
@@ -607,8 +607,8 @@ v9fs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
struct inode *inode = filp->f_path.dentry->d_inode;
- P9_DPRINTK(P9_DEBUG_VFS, "page %p fid %lx\n",
- page, (unsigned long)filp->private_data);
+ p9_debug(P9_DEBUG_VFS, "page %p fid %lx\n",
+ page, (unsigned long)filp->private_data);
v9inode = V9FS_I(inode);
/* make sure the cache has finished storing the page */
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 879ed885173..57ccb7537da 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -23,6 +23,8 @@
*
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/fs.h>
@@ -59,15 +61,13 @@ static const struct inode_operations v9fs_symlink_inode_operations;
*
*/
-static int unixmode2p9mode(struct v9fs_session_info *v9ses, int mode)
+static u32 unixmode2p9mode(struct v9fs_session_info *v9ses, umode_t mode)
{
int res;
res = mode & 0777;
if (S_ISDIR(mode))
res |= P9_DMDIR;
if (v9fs_proto_dotu(v9ses)) {
- if (S_ISLNK(mode))
- res |= P9_DMSYMLINK;
if (v9ses->nodev == 0) {
if (S_ISSOCK(mode))
res |= P9_DMSOCKET;
@@ -85,10 +85,33 @@ static int unixmode2p9mode(struct v9fs_session_info *v9ses, int mode)
res |= P9_DMSETGID;
if ((mode & S_ISVTX) == S_ISVTX)
res |= P9_DMSETVTX;
- if ((mode & P9_DMLINK))
- res |= P9_DMLINK;
}
+ return res;
+}
+/**
+ * p9mode2perm- convert plan9 mode bits to unix permission bits
+ * @v9ses: v9fs session information
+ * @stat: p9_wstat from which mode need to be derived
+ *
+ */
+static int p9mode2perm(struct v9fs_session_info *v9ses,
+ struct p9_wstat *stat)
+{
+ int res;
+ int mode = stat->mode;
+
+ res = mode & S_IALLUGO;
+ if (v9fs_proto_dotu(v9ses)) {
+ if ((mode & P9_DMSETUID) == P9_DMSETUID)
+ res |= S_ISUID;
+
+ if ((mode & P9_DMSETGID) == P9_DMSETGID)
+ res |= S_ISGID;
+
+ if ((mode & P9_DMSETVTX) == P9_DMSETVTX)
+ res |= S_ISVTX;
+ }
return res;
}
@@ -99,14 +122,14 @@ static int unixmode2p9mode(struct v9fs_session_info *v9ses, int mode)
* @rdev: major number, minor number in case of device files.
*
*/
-static int p9mode2unixmode(struct v9fs_session_info *v9ses,
- struct p9_wstat *stat, dev_t *rdev)
+static umode_t p9mode2unixmode(struct v9fs_session_info *v9ses,
+ struct p9_wstat *stat, dev_t *rdev)
{
int res;
- int mode = stat->mode;
+ u32 mode = stat->mode;
- res = mode & S_IALLUGO;
*rdev = 0;
+ res = p9mode2perm(v9ses, stat);
if ((mode & P9_DMDIR) == P9_DMDIR)
res |= S_IFDIR;
@@ -133,24 +156,13 @@ static int p9mode2unixmode(struct v9fs_session_info *v9ses,
res |= S_IFBLK;
break;
default:
- P9_DPRINTK(P9_DEBUG_ERROR,
- "Unknown special type %c %s\n", type,
- stat->extension);
+ p9_debug(P9_DEBUG_ERROR, "Unknown special type %c %s\n",
+ type, stat->extension);
};
*rdev = MKDEV(major, minor);
} else
res |= S_IFREG;
- if (v9fs_proto_dotu(v9ses)) {
- if ((mode & P9_DMSETUID) == P9_DMSETUID)
- res |= S_ISUID;
-
- if ((mode & P9_DMSETGID) == P9_DMSETGID)
- res |= S_ISGID;
-
- if ((mode & P9_DMSETVTX) == P9_DMSETVTX)
- res |= S_ISVTX;
- }
return res;
}
@@ -251,7 +263,6 @@ struct inode *v9fs_alloc_inode(struct super_block *sb)
static void v9fs_i_callback(struct rcu_head *head)
{
struct inode *inode = container_of(head, struct inode, i_rcu);
- INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(v9fs_inode_cache, V9FS_I(inode));
}
@@ -261,7 +272,7 @@ void v9fs_destroy_inode(struct inode *inode)
}
int v9fs_init_inode(struct v9fs_session_info *v9ses,
- struct inode *inode, int mode, dev_t rdev)
+ struct inode *inode, umode_t mode, dev_t rdev)
{
int err = 0;
@@ -281,8 +292,8 @@ int v9fs_init_inode(struct v9fs_session_info *v9ses,
} else if (v9fs_proto_dotu(v9ses)) {
inode->i_op = &v9fs_file_inode_operations;
} else {
- P9_DPRINTK(P9_DEBUG_ERROR,
- "special files without extended mode\n");
+ p9_debug(P9_DEBUG_ERROR,
+ "special files without extended mode\n");
err = -EINVAL;
goto error;
}
@@ -307,8 +318,8 @@ int v9fs_init_inode(struct v9fs_session_info *v9ses,
break;
case S_IFLNK:
if (!v9fs_proto_dotu(v9ses) && !v9fs_proto_dotl(v9ses)) {
- P9_DPRINTK(P9_DEBUG_ERROR, "extended modes used with "
- "legacy protocol.\n");
+ p9_debug(P9_DEBUG_ERROR,
+ "extended modes used with legacy protocol\n");
err = -EINVAL;
goto error;
}
@@ -335,8 +346,8 @@ int v9fs_init_inode(struct v9fs_session_info *v9ses,
break;
default:
- P9_DPRINTK(P9_DEBUG_ERROR, "BAD mode 0x%x S_IFMT 0x%x\n",
- mode, mode & S_IFMT);
+ p9_debug(P9_DEBUG_ERROR, "BAD mode 0x%hx S_IFMT 0x%x\n",
+ mode, mode & S_IFMT);
err = -EINVAL;
goto error;
}
@@ -352,17 +363,18 @@ error:
*
*/
-struct inode *v9fs_get_inode(struct super_block *sb, int mode, dev_t rdev)
+struct inode *v9fs_get_inode(struct super_block *sb, umode_t mode, dev_t rdev)
{
int err;
struct inode *inode;
struct v9fs_session_info *v9ses = sb->s_fs_info;
- P9_DPRINTK(P9_DEBUG_VFS, "super block: %p mode: %o\n", sb, mode);
+ p9_debug(P9_DEBUG_VFS, "super block: %p mode: %ho\n", sb, mode);
inode = new_inode(sb);
if (!inode) {
- P9_EPRINTK(KERN_WARNING, "Problem allocating inode\n");
+ pr_warn("%s (%d): Problem allocating inode\n",
+ __func__, task_pid_nr(current));
return ERR_PTR(-ENOMEM);
}
err = v9fs_init_inode(v9ses, inode, mode, rdev);
@@ -436,7 +448,7 @@ void v9fs_evict_inode(struct inode *inode)
struct v9fs_inode *v9inode = V9FS_I(inode);
truncate_inode_pages(inode->i_mapping, 0);
- end_writeback(inode);
+ clear_inode(inode);
filemap_fdatawrite(inode->i_mapping);
#ifdef CONFIG_9P_FSCACHE
@@ -492,7 +504,8 @@ static struct inode *v9fs_qid_iget(struct super_block *sb,
int new)
{
dev_t rdev;
- int retval, umode;
+ int retval;
+ umode_t umode;
unsigned long i_ino;
struct inode *inode;
struct v9fs_session_info *v9ses = sb->s_fs_info;
@@ -578,15 +591,15 @@ static int v9fs_remove(struct inode *dir, struct dentry *dentry, int flags)
struct p9_fid *v9fid, *dfid;
struct v9fs_session_info *v9ses;
- P9_DPRINTK(P9_DEBUG_VFS, "inode: %p dentry: %p rmdir: %x\n",
- dir, dentry, flags);
+ p9_debug(P9_DEBUG_VFS, "inode: %p dentry: %p rmdir: %x\n",
+ dir, dentry, flags);
v9ses = v9fs_inode2v9ses(dir);
inode = dentry->d_inode;
dfid = v9fs_fid_lookup(dentry->d_parent);
if (IS_ERR(dfid)) {
retval = PTR_ERR(dfid);
- P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", retval);
+ p9_debug(P9_DEBUG_VFS, "fid lookup failed %d\n", retval);
return retval;
}
if (v9fs_proto_dotl(v9ses))
@@ -635,7 +648,7 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir,
struct p9_fid *dfid, *ofid, *fid;
struct inode *inode;
- P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name);
+ p9_debug(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name);
err = 0;
ofid = NULL;
@@ -644,7 +657,7 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir,
dfid = v9fs_fid_lookup(dentry->d_parent);
if (IS_ERR(dfid)) {
err = PTR_ERR(dfid);
- P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err);
+ p9_debug(P9_DEBUG_VFS, "fid lookup failed %d\n", err);
return ERR_PTR(err);
}
@@ -652,36 +665,41 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir,
ofid = p9_client_walk(dfid, 0, NULL, 1);
if (IS_ERR(ofid)) {
err = PTR_ERR(ofid);
- P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err);
+ p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err);
return ERR_PTR(err);
}
err = p9_client_fcreate(ofid, name, perm, mode, extension);
if (err < 0) {
- P9_DPRINTK(P9_DEBUG_VFS, "p9_client_fcreate failed %d\n", err);
+ p9_debug(P9_DEBUG_VFS, "p9_client_fcreate failed %d\n", err);
goto error;
}
- /* now walk from the parent so we can get unopened fid */
- fid = p9_client_walk(dfid, 1, &name, 1);
- if (IS_ERR(fid)) {
- err = PTR_ERR(fid);
- P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err);
- fid = NULL;
- goto error;
- }
-
- /* instantiate inode and assign the unopened fid to the dentry */
- inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb);
- if (IS_ERR(inode)) {
- err = PTR_ERR(inode);
- P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err);
- goto error;
+ if (!(perm & P9_DMLINK)) {
+ /* now walk from the parent so we can get unopened fid */
+ fid = p9_client_walk(dfid, 1, &name, 1);
+ if (IS_ERR(fid)) {
+ err = PTR_ERR(fid);
+ p9_debug(P9_DEBUG_VFS,
+ "p9_client_walk failed %d\n", err);
+ fid = NULL;
+ goto error;
+ }
+ /*
+ * instantiate inode and assign the unopened fid to the dentry
+ */
+ inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ p9_debug(P9_DEBUG_VFS,
+ "inode creation failed %d\n", err);
+ goto error;
+ }
+ err = v9fs_fid_add(dentry, fid);
+ if (err < 0)
+ goto error;
+ d_instantiate(dentry, inode);
}
- err = v9fs_fid_add(dentry, fid);
- if (err < 0)
- goto error;
- d_instantiate(dentry, inode);
return ofid;
error:
if (ofid)
@@ -703,7 +721,7 @@ error:
*/
static int
-v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode,
+v9fs_vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
struct nameidata *nd)
{
int err;
@@ -786,14 +804,14 @@ error:
*
*/
-static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
{
int err;
u32 perm;
struct p9_fid *fid;
struct v9fs_session_info *v9ses;
- P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name);
+ p9_debug(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name);
err = 0;
v9ses = v9fs_inode2v9ses(dir);
perm = unixmode2p9mode(v9ses, mode | S_IFDIR);
@@ -831,8 +849,8 @@ struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
char *name;
int result = 0;
- P9_DPRINTK(P9_DEBUG_VFS, "dir: %p dentry: (%s) %p nameidata: %p\n",
- dir, dentry->d_name.name, dentry, nameidata);
+ p9_debug(P9_DEBUG_VFS, "dir: %p dentry: (%s) %p nameidata: %p\n",
+ dir, dentry->d_name.name, dentry, nameidata);
if (dentry->d_name.len > NAME_MAX)
return ERR_PTR(-ENAMETOOLONG);
@@ -938,7 +956,7 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
struct p9_fid *newdirfid;
struct p9_wstat wstat;
- P9_DPRINTK(P9_DEBUG_VFS, "\n");
+ p9_debug(P9_DEBUG_VFS, "\n");
retval = 0;
old_inode = old_dentry->d_inode;
new_inode = new_dentry->d_inode;
@@ -974,8 +992,7 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
* 9P .u can only handle file rename in the same directory
*/
- P9_DPRINTK(P9_DEBUG_ERROR,
- "old dir and new dir are different\n");
+ p9_debug(P9_DEBUG_ERROR, "old dir and new dir are different\n");
retval = -EXDEV;
goto clunk_newdir;
}
@@ -1031,7 +1048,7 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
struct p9_fid *fid;
struct p9_wstat *st;
- P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry);
+ p9_debug(P9_DEBUG_VFS, "dentry: %p\n", dentry);
err = -EPERM;
v9ses = v9fs_dentry2v9ses(dentry);
if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
@@ -1068,7 +1085,7 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
struct p9_fid *fid;
struct p9_wstat wstat;
- P9_DPRINTK(P9_DEBUG_VFS, "\n");
+ p9_debug(P9_DEBUG_VFS, "\n");
retval = inode_change_ok(dentry->d_inode, iattr);
if (retval)
return retval;
@@ -1131,7 +1148,7 @@ void
v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
struct super_block *sb)
{
- mode_t mode;
+ umode_t mode;
char ext[32];
char tag_name[14];
unsigned int i_nlink;
@@ -1167,7 +1184,7 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
set_nlink(inode, i_nlink);
}
}
- mode = stat->mode & S_IALLUGO;
+ mode = p9mode2perm(v9ses, stat);
mode |= inode->i_mode & ~S_IALLUGO;
inode->i_mode = mode;
i_size_write(inode, stat->length);
@@ -1213,7 +1230,7 @@ static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen)
struct p9_fid *fid;
struct p9_wstat *st;
- P9_DPRINTK(P9_DEBUG_VFS, " %s\n", dentry->d_name.name);
+ p9_debug(P9_DEBUG_VFS, " %s\n", dentry->d_name.name);
retval = -EPERM;
v9ses = v9fs_dentry2v9ses(dentry);
fid = v9fs_fid_lookup(dentry);
@@ -1235,8 +1252,8 @@ static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen)
/* copy extension buffer into buffer */
strncpy(buffer, st->extension, buflen);
- P9_DPRINTK(P9_DEBUG_VFS,
- "%s -> %s (%s)\n", dentry->d_name.name, st->extension, buffer);
+ p9_debug(P9_DEBUG_VFS, "%s -> %s (%s)\n",
+ dentry->d_name.name, st->extension, buffer);
retval = strnlen(buffer, buflen);
done:
@@ -1257,7 +1274,7 @@ static void *v9fs_vfs_follow_link(struct dentry *dentry, struct nameidata *nd)
int len = 0;
char *link = __getname();
- P9_DPRINTK(P9_DEBUG_VFS, "%s n", dentry->d_name.name);
+ p9_debug(P9_DEBUG_VFS, "%s\n", dentry->d_name.name);
if (!link)
link = ERR_PTR(-ENOMEM);
@@ -1288,8 +1305,8 @@ v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
{
char *s = nd_get_link(nd);
- P9_DPRINTK(P9_DEBUG_VFS, " %s %s\n", dentry->d_name.name,
- IS_ERR(s) ? "<error>" : s);
+ p9_debug(P9_DEBUG_VFS, " %s %s\n",
+ dentry->d_name.name, IS_ERR(s) ? "<error>" : s);
if (!IS_ERR(s))
__putname(s);
}
@@ -1304,19 +1321,17 @@ v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
*/
static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry,
- int mode, const char *extension)
+ u32 perm, const char *extension)
{
- u32 perm;
struct p9_fid *fid;
struct v9fs_session_info *v9ses;
v9ses = v9fs_inode2v9ses(dir);
if (!v9fs_proto_dotu(v9ses)) {
- P9_DPRINTK(P9_DEBUG_ERROR, "not extended\n");
+ p9_debug(P9_DEBUG_ERROR, "not extended\n");
return -EPERM;
}
- perm = unixmode2p9mode(v9ses, mode);
fid = v9fs_create(v9ses, dir, dentry, (char *) extension, perm,
P9_OREAD);
if (IS_ERR(fid))
@@ -1340,10 +1355,10 @@ static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry,
static int
v9fs_vfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
{
- P9_DPRINTK(P9_DEBUG_VFS, " %lu,%s,%s\n", dir->i_ino,
- dentry->d_name.name, symname);
+ p9_debug(P9_DEBUG_VFS, " %lu,%s,%s\n",
+ dir->i_ino, dentry->d_name.name, symname);
- return v9fs_vfs_mkspecial(dir, dentry, S_IFLNK, symname);
+ return v9fs_vfs_mkspecial(dir, dentry, P9_DMSYMLINK, symname);
}
/**
@@ -1362,9 +1377,8 @@ v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir,
char *name;
struct p9_fid *oldfid;
- P9_DPRINTK(P9_DEBUG_VFS,
- " %lu,%s,%s\n", dir->i_ino, dentry->d_name.name,
- old_dentry->d_name.name);
+ p9_debug(P9_DEBUG_VFS, " %lu,%s,%s\n",
+ dir->i_ino, dentry->d_name.name, old_dentry->d_name.name);
oldfid = v9fs_fid_clone(old_dentry);
if (IS_ERR(oldfid))
@@ -1398,14 +1412,16 @@ clunk_fid:
*/
static int
-v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
+v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev)
{
+ struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir);
int retval;
char *name;
+ u32 perm;
- P9_DPRINTK(P9_DEBUG_VFS,
- " %lu,%s mode: %x MAJOR: %u MINOR: %u\n", dir->i_ino,
- dentry->d_name.name, mode, MAJOR(rdev), MINOR(rdev));
+ p9_debug(P9_DEBUG_VFS, " %lu,%s mode: %hx MAJOR: %u MINOR: %u\n",
+ dir->i_ino, dentry->d_name.name, mode,
+ MAJOR(rdev), MINOR(rdev));
if (!new_valid_dev(rdev))
return -EINVAL;
@@ -1427,7 +1443,8 @@ v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
return -EINVAL;
}
- retval = v9fs_vfs_mkspecial(dir, dentry, mode, name);
+ perm = unixmode2p9mode(v9ses, mode);
+ retval = v9fs_vfs_mkspecial(dir, dentry, perm, name);
__putname(name);
return retval;
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 0b5745e2194..e3dd2a1e2bf 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -48,7 +48,7 @@
#include "acl.h"
static int
-v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
+v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, umode_t omode,
dev_t rdev);
/**
@@ -68,24 +68,6 @@ static gid_t v9fs_get_fsgid_for_create(struct inode *dir_inode)
return current_fsgid();
}
-/**
- * v9fs_dentry_from_dir_inode - helper function to get the dentry from
- * dir inode.
- *
- */
-
-static struct dentry *v9fs_dentry_from_dir_inode(struct inode *inode)
-{
- struct dentry *dentry;
-
- spin_lock(&inode->i_lock);
- /* Directory should have only one entry. */
- BUG_ON(S_ISDIR(inode->i_mode) && !list_is_singular(&inode->i_dentry));
- dentry = list_entry(inode->i_dentry.next, struct dentry, d_alias);
- spin_unlock(&inode->i_lock);
- return dentry;
-}
-
static int v9fs_test_inode_dotl(struct inode *inode, void *data)
{
struct v9fs_inode *v9inode = V9FS_I(inode);
@@ -253,7 +235,7 @@ int v9fs_open_to_dotl_flags(int flags)
*/
static int
-v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
+v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, umode_t omode,
struct nameidata *nd)
{
int err = 0;
@@ -283,13 +265,13 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
}
name = (char *) dentry->d_name.name;
- P9_DPRINTK(P9_DEBUG_VFS, "v9fs_vfs_create_dotl: name:%s flags:0x%x "
- "mode:0x%x\n", name, flags, omode);
+ p9_debug(P9_DEBUG_VFS, "name:%s flags:0x%x mode:0x%hx\n",
+ name, flags, omode);
dfid = v9fs_fid_lookup(dentry->d_parent);
if (IS_ERR(dfid)) {
err = PTR_ERR(dfid);
- P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err);
+ p9_debug(P9_DEBUG_VFS, "fid lookup failed %d\n", err);
return err;
}
@@ -297,7 +279,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
ofid = p9_client_walk(dfid, 0, NULL, 1);
if (IS_ERR(ofid)) {
err = PTR_ERR(ofid);
- P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err);
+ p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err);
return err;
}
@@ -307,16 +289,15 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
/* Update mode based on ACL value */
err = v9fs_acl_mode(dir, &mode, &dacl, &pacl);
if (err) {
- P9_DPRINTK(P9_DEBUG_VFS,
- "Failed to get acl values in creat %d\n", err);
+ p9_debug(P9_DEBUG_VFS, "Failed to get acl values in creat %d\n",
+ err);
goto error;
}
err = p9_client_create_dotl(ofid, name, v9fs_open_to_dotl_flags(flags),
mode, gid, &qid);
if (err < 0) {
- P9_DPRINTK(P9_DEBUG_VFS,
- "p9_client_open_dotl failed in creat %d\n",
- err);
+ p9_debug(P9_DEBUG_VFS, "p9_client_open_dotl failed in creat %d\n",
+ err);
goto error;
}
v9fs_invalidate_inode_attr(dir);
@@ -325,14 +306,14 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
fid = p9_client_walk(dfid, 1, &name, 1);
if (IS_ERR(fid)) {
err = PTR_ERR(fid);
- P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err);
+ p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err);
fid = NULL;
goto error;
}
inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
- P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err);
+ p9_debug(P9_DEBUG_VFS, "inode creation failed %d\n", err);
goto error;
}
err = v9fs_fid_add(dentry, fid);
@@ -395,7 +376,7 @@ err_clunk_old_fid:
*/
static int v9fs_vfs_mkdir_dotl(struct inode *dir,
- struct dentry *dentry, int omode)
+ struct dentry *dentry, umode_t omode)
{
int err;
struct v9fs_session_info *v9ses;
@@ -408,7 +389,7 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir,
struct dentry *dir_dentry;
struct posix_acl *dacl = NULL, *pacl = NULL;
- P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name);
+ p9_debug(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name);
err = 0;
v9ses = v9fs_inode2v9ses(dir);
@@ -416,11 +397,11 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir,
if (dir->i_mode & S_ISGID)
omode |= S_ISGID;
- dir_dentry = v9fs_dentry_from_dir_inode(dir);
+ dir_dentry = dentry->d_parent;
dfid = v9fs_fid_lookup(dir_dentry);
if (IS_ERR(dfid)) {
err = PTR_ERR(dfid);
- P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err);
+ p9_debug(P9_DEBUG_VFS, "fid lookup failed %d\n", err);
dfid = NULL;
goto error;
}
@@ -430,8 +411,8 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir,
/* Update mode based on ACL value */
err = v9fs_acl_mode(dir, &mode, &dacl, &pacl);
if (err) {
- P9_DPRINTK(P9_DEBUG_VFS,
- "Failed to get acl values in mkdir %d\n", err);
+ p9_debug(P9_DEBUG_VFS, "Failed to get acl values in mkdir %d\n",
+ err);
goto error;
}
name = (char *) dentry->d_name.name;
@@ -444,8 +425,8 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir,
fid = p9_client_walk(dfid, 1, &name, 1);
if (IS_ERR(fid)) {
err = PTR_ERR(fid);
- P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n",
- err);
+ p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n",
+ err);
fid = NULL;
goto error;
}
@@ -453,8 +434,8 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir,
inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
- P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
- err);
+ p9_debug(P9_DEBUG_VFS, "inode creation failed %d\n",
+ err);
goto error;
}
err = v9fs_fid_add(dentry, fid);
@@ -495,7 +476,7 @@ v9fs_vfs_getattr_dotl(struct vfsmount *mnt, struct dentry *dentry,
struct p9_fid *fid;
struct p9_stat_dotl *st;
- P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry);
+ p9_debug(P9_DEBUG_VFS, "dentry: %p\n", dentry);
err = -EPERM;
v9ses = v9fs_dentry2v9ses(dentry);
if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
@@ -523,6 +504,46 @@ v9fs_vfs_getattr_dotl(struct vfsmount *mnt, struct dentry *dentry,
return 0;
}
+/*
+ * Attribute flags.
+ */
+#define P9_ATTR_MODE (1 << 0)
+#define P9_ATTR_UID (1 << 1)
+#define P9_ATTR_GID (1 << 2)
+#define P9_ATTR_SIZE (1 << 3)
+#define P9_ATTR_ATIME (1 << 4)
+#define P9_ATTR_MTIME (1 << 5)
+#define P9_ATTR_CTIME (1 << 6)
+#define P9_ATTR_ATIME_SET (1 << 7)
+#define P9_ATTR_MTIME_SET (1 << 8)
+
+struct dotl_iattr_map {
+ int iattr_valid;
+ int p9_iattr_valid;
+};
+
+static int v9fs_mapped_iattr_valid(int iattr_valid)
+{
+ int i;
+ int p9_iattr_valid = 0;
+ struct dotl_iattr_map dotl_iattr_map[] = {
+ { ATTR_MODE, P9_ATTR_MODE },
+ { ATTR_UID, P9_ATTR_UID },
+ { ATTR_GID, P9_ATTR_GID },
+ { ATTR_SIZE, P9_ATTR_SIZE },
+ { ATTR_ATIME, P9_ATTR_ATIME },
+ { ATTR_MTIME, P9_ATTR_MTIME },
+ { ATTR_CTIME, P9_ATTR_CTIME },
+ { ATTR_ATIME_SET, P9_ATTR_ATIME_SET },
+ { ATTR_MTIME_SET, P9_ATTR_MTIME_SET },
+ };
+ for (i = 0; i < ARRAY_SIZE(dotl_iattr_map); i++) {
+ if (iattr_valid & dotl_iattr_map[i].iattr_valid)
+ p9_iattr_valid |= dotl_iattr_map[i].p9_iattr_valid;
+ }
+ return p9_iattr_valid;
+}
+
/**
* v9fs_vfs_setattr_dotl - set file metadata
* @dentry: file whose metadata to set
@@ -537,13 +558,13 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
struct p9_fid *fid;
struct p9_iattr_dotl p9attr;
- P9_DPRINTK(P9_DEBUG_VFS, "\n");
+ p9_debug(P9_DEBUG_VFS, "\n");
retval = inode_change_ok(dentry->d_inode, iattr);
if (retval)
return retval;
- p9attr.valid = iattr->ia_valid;
+ p9attr.valid = v9fs_mapped_iattr_valid(iattr->ia_valid);
p9attr.mode = iattr->ia_mode;
p9attr.uid = iattr->ia_uid;
p9attr.gid = iattr->ia_gid;
@@ -594,7 +615,7 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
void
v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode)
{
- mode_t mode;
+ umode_t mode;
struct v9fs_inode *v9inode = V9FS_I(inode);
if ((stat->st_result_mask & P9_STATS_BASIC) == P9_STATS_BASIC) {
@@ -670,14 +691,13 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry,
struct v9fs_session_info *v9ses;
name = (char *) dentry->d_name.name;
- P9_DPRINTK(P9_DEBUG_VFS, "v9fs_vfs_symlink_dotl : %lu,%s,%s\n",
- dir->i_ino, name, symname);
+ p9_debug(P9_DEBUG_VFS, "%lu,%s,%s\n", dir->i_ino, name, symname);
v9ses = v9fs_inode2v9ses(dir);
dfid = v9fs_fid_lookup(dentry->d_parent);
if (IS_ERR(dfid)) {
err = PTR_ERR(dfid);
- P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err);
+ p9_debug(P9_DEBUG_VFS, "fid lookup failed %d\n", err);
return err;
}
@@ -687,7 +707,7 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry,
err = p9_client_symlink(dfid, name, (char *)symname, gid, &qid);
if (err < 0) {
- P9_DPRINTK(P9_DEBUG_VFS, "p9_client_symlink failed %d\n", err);
+ p9_debug(P9_DEBUG_VFS, "p9_client_symlink failed %d\n", err);
goto error;
}
@@ -697,8 +717,8 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry,
fid = p9_client_walk(dfid, 1, &name, 1);
if (IS_ERR(fid)) {
err = PTR_ERR(fid);
- P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n",
- err);
+ p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n",
+ err);
fid = NULL;
goto error;
}
@@ -707,8 +727,8 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry,
inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
- P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
- err);
+ p9_debug(P9_DEBUG_VFS, "inode creation failed %d\n",
+ err);
goto error;
}
err = v9fs_fid_add(dentry, fid);
@@ -751,12 +771,11 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir,
struct p9_fid *dfid, *oldfid;
struct v9fs_session_info *v9ses;
- P9_DPRINTK(P9_DEBUG_VFS, "dir ino: %lu, old_name: %s, new_name: %s\n",
- dir->i_ino, old_dentry->d_name.name,
- dentry->d_name.name);
+ p9_debug(P9_DEBUG_VFS, "dir ino: %lu, old_name: %s, new_name: %s\n",
+ dir->i_ino, old_dentry->d_name.name, dentry->d_name.name);
v9ses = v9fs_inode2v9ses(dir);
- dir_dentry = v9fs_dentry_from_dir_inode(dir);
+ dir_dentry = dentry->d_parent;
dfid = v9fs_fid_lookup(dir_dentry);
if (IS_ERR(dfid))
return PTR_ERR(dfid);
@@ -770,7 +789,7 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir,
err = p9_client_link(dfid, oldfid, (char *)dentry->d_name.name);
if (err < 0) {
- P9_DPRINTK(P9_DEBUG_VFS, "p9_client_link failed %d\n", err);
+ p9_debug(P9_DEBUG_VFS, "p9_client_link failed %d\n", err);
return err;
}
@@ -799,7 +818,7 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir,
*
*/
static int
-v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
+v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, umode_t omode,
dev_t rdev)
{
int err;
@@ -813,19 +832,19 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
struct dentry *dir_dentry;
struct posix_acl *dacl = NULL, *pacl = NULL;
- P9_DPRINTK(P9_DEBUG_VFS,
- " %lu,%s mode: %x MAJOR: %u MINOR: %u\n", dir->i_ino,
- dentry->d_name.name, omode, MAJOR(rdev), MINOR(rdev));
+ p9_debug(P9_DEBUG_VFS, " %lu,%s mode: %hx MAJOR: %u MINOR: %u\n",
+ dir->i_ino, dentry->d_name.name, omode,
+ MAJOR(rdev), MINOR(rdev));
if (!new_valid_dev(rdev))
return -EINVAL;
v9ses = v9fs_inode2v9ses(dir);
- dir_dentry = v9fs_dentry_from_dir_inode(dir);
+ dir_dentry = dentry->d_parent;
dfid = v9fs_fid_lookup(dir_dentry);
if (IS_ERR(dfid)) {
err = PTR_ERR(dfid);
- P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", err);
+ p9_debug(P9_DEBUG_VFS, "fid lookup failed %d\n", err);
dfid = NULL;
goto error;
}
@@ -835,8 +854,8 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
/* Update mode based on ACL value */
err = v9fs_acl_mode(dir, &mode, &dacl, &pacl);
if (err) {
- P9_DPRINTK(P9_DEBUG_VFS,
- "Failed to get acl values in mknod %d\n", err);
+ p9_debug(P9_DEBUG_VFS, "Failed to get acl values in mknod %d\n",
+ err);
goto error;
}
name = (char *) dentry->d_name.name;
@@ -851,8 +870,8 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
fid = p9_client_walk(dfid, 1, &name, 1);
if (IS_ERR(fid)) {
err = PTR_ERR(fid);
- P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n",
- err);
+ p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n",
+ err);
fid = NULL;
goto error;
}
@@ -860,8 +879,8 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
- P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
- err);
+ p9_debug(P9_DEBUG_VFS, "inode creation failed %d\n",
+ err);
goto error;
}
err = v9fs_fid_add(dentry, fid);
@@ -905,7 +924,7 @@ v9fs_vfs_follow_link_dotl(struct dentry *dentry, struct nameidata *nd)
char *link = __getname();
char *target;
- P9_DPRINTK(P9_DEBUG_VFS, "%s\n", dentry->d_name.name);
+ p9_debug(P9_DEBUG_VFS, "%s\n", dentry->d_name.name);
if (!link) {
link = ERR_PTR(-ENOMEM);
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index c70251d47ed..8c92a9ba833 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -117,11 +117,11 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
struct inode *inode = NULL;
struct dentry *root = NULL;
struct v9fs_session_info *v9ses = NULL;
- int mode = S_IRWXUGO | S_ISVTX;
+ umode_t mode = S_IRWXUGO | S_ISVTX;
struct p9_fid *fid;
int retval = 0;
- P9_DPRINTK(P9_DEBUG_VFS, " \n");
+ p9_debug(P9_DEBUG_VFS, "\n");
v9ses = kzalloc(sizeof(struct v9fs_session_info), GFP_KERNEL);
if (!v9ses)
@@ -155,9 +155,8 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
goto release_sb;
}
- root = d_alloc_root(inode);
+ root = d_make_root(inode);
if (!root) {
- iput(inode);
retval = -ENOMEM;
goto release_sb;
}
@@ -191,7 +190,7 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
goto release_sb;
v9fs_fid_add(root, fid);
- P9_DPRINTK(P9_DEBUG_VFS, " simple set mount, return 0\n");
+ p9_debug(P9_DEBUG_VFS, " simple set mount, return 0\n");
return dget(sb->s_root);
clunk_fid:
@@ -223,7 +222,7 @@ static void v9fs_kill_super(struct super_block *s)
{
struct v9fs_session_info *v9ses = s->s_fs_info;
- P9_DPRINTK(P9_DEBUG_VFS, " %p\n", s);
+ p9_debug(P9_DEBUG_VFS, " %p\n", s);
kill_anon_super(s);
@@ -231,7 +230,7 @@ static void v9fs_kill_super(struct super_block *s)
v9fs_session_close(v9ses);
kfree(v9ses);
s->s_fs_info = NULL;
- P9_DPRINTK(P9_DEBUG_VFS, "exiting kill_super\n");
+ p9_debug(P9_DEBUG_VFS, "exiting kill_super\n");
}
static void
@@ -260,7 +259,7 @@ static int v9fs_statfs(struct dentry *dentry, struct kstatfs *buf)
if (v9fs_proto_dotl(v9ses)) {
res = p9_client_statfs(fid, &rs);
if (res == 0) {
- buf->f_type = V9FS_MAGIC;
+ buf->f_type = rs.type;
buf->f_bsize = rs.bsize;
buf->f_blocks = rs.blocks;
buf->f_bfree = rs.bfree;
@@ -303,7 +302,7 @@ static int v9fs_write_inode(struct inode *inode,
* send an fsync request to server irrespective of
* wbc->sync_mode.
*/
- P9_DPRINTK(P9_DEBUG_VFS, "%s: inode %p\n", __func__, inode);
+ p9_debug(P9_DEBUG_VFS, "%s: inode %p\n", __func__, inode);
v9inode = V9FS_I(inode);
if (!v9inode->writeback_fid)
return 0;
@@ -326,7 +325,7 @@ static int v9fs_write_inode_dotl(struct inode *inode,
* send an fsync request to server irrespective of
* wbc->sync_mode.
*/
- P9_DPRINTK(P9_DEBUG_VFS, "%s: inode %p\n", __func__, inode);
+ p9_debug(P9_DEBUG_VFS, "%s: inode %p\n", __func__, inode);
v9inode = V9FS_I(inode);
if (!v9inode->writeback_fid)
return 0;
diff --git a/fs/9p/xattr.c b/fs/9p/xattr.c
index d288773871b..29653b70a9c 100644
--- a/fs/9p/xattr.c
+++ b/fs/9p/xattr.c
@@ -32,8 +32,8 @@ ssize_t v9fs_fid_xattr_get(struct p9_fid *fid, const char *name,
attr_fid = p9_client_xattrwalk(fid, name, &attr_size);
if (IS_ERR(attr_fid)) {
retval = PTR_ERR(attr_fid);
- P9_DPRINTK(P9_DEBUG_VFS,
- "p9_client_attrwalk failed %zd\n", retval);
+ p9_debug(P9_DEBUG_VFS, "p9_client_attrwalk failed %zd\n",
+ retval);
attr_fid = NULL;
goto error;
}
@@ -87,8 +87,8 @@ ssize_t v9fs_xattr_get(struct dentry *dentry, const char *name,
{
struct p9_fid *fid;
- P9_DPRINTK(P9_DEBUG_VFS, "%s: name = %s value_len = %zu\n",
- __func__, name, buffer_size);
+ p9_debug(P9_DEBUG_VFS, "name = %s value_len = %zu\n",
+ name, buffer_size);
fid = v9fs_fid_lookup(dentry);
if (IS_ERR(fid))
return PTR_ERR(fid);
@@ -115,8 +115,8 @@ int v9fs_xattr_set(struct dentry *dentry, const char *name,
int retval, msize, write_count;
struct p9_fid *fid = NULL;
- P9_DPRINTK(P9_DEBUG_VFS, "%s: name = %s value_len = %zu flags = %d\n",
- __func__, name, value_len, flags);
+ p9_debug(P9_DEBUG_VFS, "name = %s value_len = %zu flags = %d\n",
+ name, value_len, flags);
fid = v9fs_fid_clone(dentry);
if (IS_ERR(fid)) {
@@ -129,8 +129,8 @@ int v9fs_xattr_set(struct dentry *dentry, const char *name,
*/
retval = p9_client_xattrcreate(fid, name, value_len, flags);
if (retval < 0) {
- P9_DPRINTK(P9_DEBUG_VFS,
- "p9_client_xattrcreate failed %d\n", retval);
+ p9_debug(P9_DEBUG_VFS, "p9_client_xattrcreate failed %d\n",
+ retval);
goto error;
}
msize = fid->clnt->msize;
diff --git a/fs/Kconfig b/fs/Kconfig
index 5f4c45d4aa1..f95ae3a027f 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -4,6 +4,10 @@
menu "File systems"
+# Use unaligned word dcache accesses
+config DCACHE_WORD_ACCESS
+ bool
+
if BLOCK
source "fs/ext2/Kconfig"
@@ -210,6 +214,7 @@ source "fs/minix/Kconfig"
source "fs/omfs/Kconfig"
source "fs/hpfs/Kconfig"
source "fs/qnx4/Kconfig"
+source "fs/qnx6/Kconfig"
source "fs/romfs/Kconfig"
source "fs/pstore/Kconfig"
source "fs/sysv/Kconfig"
@@ -218,6 +223,8 @@ source "fs/exofs/Kconfig"
endif # MISC_FILESYSTEMS
+source "fs/exofs/Kconfig.ore"
+
menuconfig NETWORK_FILESYSTEMS
bool "Network File Systems"
default y
@@ -266,14 +273,6 @@ source "fs/9p/Kconfig"
endif # NETWORK_FILESYSTEMS
-if BLOCK
-menu "Partition Types"
-
-source "fs/partitions/Kconfig"
-
-endmenu
-endif
-
source "fs/nls/Kconfig"
source "fs/dlm/Kconfig"
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index 79e2ca7973b..02257420274 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -27,10 +27,13 @@ config COMPAT_BINFMT_ELF
bool
depends on COMPAT && BINFMT_ELF
+config ARCH_BINFMT_ELF_RANDOMIZE_PIE
+ bool
+
config BINFMT_ELF_FDPIC
bool "Kernel support for FDPIC ELF binaries"
default y
- depends on (FRV || BLACKFIN || (SUPERH32 && !MMU))
+ depends on (FRV || BLACKFIN || (SUPERH32 && !MMU) || C6X)
help
ELF FDPIC binaries are based on ELF, but allow the individual load
segments of a binary to be located in memory independently of each
diff --git a/fs/Makefile b/fs/Makefile
index d2c3353d547..2fb97793467 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -19,6 +19,8 @@ else
obj-y += no-block.o
endif
+obj-$(CONFIG_PROC_FS) += proc_namespace.o
+
obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o
obj-y += notify/
obj-$(CONFIG_EPOLL) += eventpoll.o
@@ -52,7 +54,6 @@ obj-$(CONFIG_FHANDLE) += fhandle.o
obj-y += quota/
obj-$(CONFIG_PROC_FS) += proc/
-obj-y += partitions/
obj-$(CONFIG_SYSFS) += sysfs/
obj-$(CONFIG_CONFIGFS_FS) += configfs/
obj-y += devpts/
@@ -101,6 +102,7 @@ obj-$(CONFIG_UBIFS_FS) += ubifs/
obj-$(CONFIG_AFFS_FS) += affs/
obj-$(CONFIG_ROMFS_FS) += romfs/
obj-$(CONFIG_QNX4FS_FS) += qnx4/
+obj-$(CONFIG_QNX6FS_FS) += qnx6/
obj-$(CONFIG_AUTOFS4_FS) += autofs4/
obj-$(CONFIG_ADFS_FS) += adfs/
obj-$(CONFIG_FUSE_FS) += fuse/
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index c8bf36a1996..06fdcc9382c 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -126,9 +126,9 @@ static void adfs_put_super(struct super_block *sb)
sb->s_fs_info = NULL;
}
-static int adfs_show_options(struct seq_file *seq, struct vfsmount *mnt)
+static int adfs_show_options(struct seq_file *seq, struct dentry *root)
{
- struct adfs_sb_info *asb = ADFS_SB(mnt->mnt_sb);
+ struct adfs_sb_info *asb = ADFS_SB(root->d_sb);
if (asb->s_uid != 0)
seq_printf(seq, ",uid=%u", asb->s_uid);
@@ -483,10 +483,9 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent)
sb->s_d_op = &adfs_dentry_operations;
root = adfs_iget(sb, &root_obj);
- sb->s_root = d_alloc_root(root);
+ sb->s_root = d_make_root(root);
if (!sb->s_root) {
int i;
- iput(root);
for (i = 0; i < asb->s_map_size; i++)
brelse(asb->s_map[i].dm_bh);
kfree(asb->s_map);
diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index c2b9c79eb64..1fceb320d2f 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -18,14 +18,6 @@
#define AFFS_GET_HASHENTRY(data,hashkey) be32_to_cpu(((struct dir_front *)data)->hashtable[hashkey])
#define AFFS_BLOCK(sb, bh, blk) (AFFS_HEAD(bh)->table[AFFS_SB(sb)->s_hashsize-1-(blk)])
-#ifdef __LITTLE_ENDIAN
-#define BO_EXBITS 0x18UL
-#elif defined(__BIG_ENDIAN)
-#define BO_EXBITS 0x00UL
-#else
-#error Endianness must be known for affs to work.
-#endif
-
#define AFFS_HEAD(bh) ((struct affs_head *)(bh)->b_data)
#define AFFS_TAIL(sb, bh) ((struct affs_tail *)((bh)->b_data+(sb)->s_blocksize-sizeof(struct affs_tail)))
#define AFFS_ROOT_HEAD(bh) ((struct affs_root_head *)(bh)->b_data)
@@ -136,7 +128,7 @@ extern int affs_remove_header(struct dentry *dentry);
extern u32 affs_checksum_block(struct super_block *sb, struct buffer_head *bh);
extern void affs_fix_checksum(struct super_block *sb, struct buffer_head *bh);
extern void secs_to_datestamp(time_t secs, struct affs_date *ds);
-extern mode_t prot_to_mode(u32 prot);
+extern umode_t prot_to_mode(u32 prot);
extern void mode_to_prot(struct inode *inode);
extern void affs_error(struct super_block *sb, const char *function, const char *fmt, ...);
extern void affs_warning(struct super_block *sb, const char *function, const char *fmt, ...);
@@ -156,8 +148,8 @@ extern void affs_free_bitmap(struct super_block *sb);
extern int affs_hash_name(struct super_block *sb, const u8 *name, unsigned int len);
extern struct dentry *affs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *);
extern int affs_unlink(struct inode *dir, struct dentry *dentry);
-extern int affs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *);
-extern int affs_mkdir(struct inode *dir, struct dentry *dentry, int mode);
+extern int affs_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct nameidata *);
+extern int affs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode);
extern int affs_rmdir(struct inode *dir, struct dentry *dentry);
extern int affs_link(struct dentry *olddentry, struct inode *dir,
struct dentry *dentry);
diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c
index de37ec84234..52a6407682e 100644
--- a/fs/affs/amigaffs.c
+++ b/fs/affs/amigaffs.c
@@ -390,10 +390,10 @@ secs_to_datestamp(time_t secs, struct affs_date *ds)
ds->ticks = cpu_to_be32(secs * 50);
}
-mode_t
+umode_t
prot_to_mode(u32 prot)
{
- int mode = 0;
+ umode_t mode = 0;
if (!(prot & FIBF_NOWRITE))
mode |= S_IWUSR;
@@ -421,7 +421,7 @@ void
mode_to_prot(struct inode *inode)
{
u32 prot = AFFS_I(inode)->i_protect;
- mode_t mode = inode->i_mode;
+ umode_t mode = inode->i_mode;
if (!(mode & S_IXUSR))
prot |= FIBF_NOEXECUTE;
diff --git a/fs/affs/inode.c b/fs/affs/inode.c
index 88a4b0b5005..8bc4a59f4e7 100644
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -264,7 +264,7 @@ affs_evict_inode(struct inode *inode)
}
invalidate_inode_buffers(inode);
- end_writeback(inode);
+ clear_inode(inode);
affs_free_prealloc(inode);
cache_page = (unsigned long)AFFS_I(inode)->i_lc;
if (cache_page) {
diff --git a/fs/affs/namei.c b/fs/affs/namei.c
index 780a11dc631..47806940aac 100644
--- a/fs/affs/namei.c
+++ b/fs/affs/namei.c
@@ -255,13 +255,13 @@ affs_unlink(struct inode *dir, struct dentry *dentry)
}
int
-affs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd)
+affs_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct nameidata *nd)
{
struct super_block *sb = dir->i_sb;
struct inode *inode;
int error;
- pr_debug("AFFS: create(%lu,\"%.*s\",0%o)\n",dir->i_ino,(int)dentry->d_name.len,
+ pr_debug("AFFS: create(%lu,\"%.*s\",0%ho)\n",dir->i_ino,(int)dentry->d_name.len,
dentry->d_name.name,mode);
inode = affs_new_inode(dir);
@@ -285,12 +285,12 @@ affs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata
}
int
-affs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+affs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
{
struct inode *inode;
int error;
- pr_debug("AFFS: mkdir(%lu,\"%.*s\",0%o)\n",dir->i_ino,
+ pr_debug("AFFS: mkdir(%lu,\"%.*s\",0%ho)\n",dir->i_ino,
(int)dentry->d_name.len,dentry->d_name.name,mode);
inode = affs_new_inode(dir);
diff --git a/fs/affs/super.c b/fs/affs/super.c
index b31507d0f9b..0782653a05a 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -98,7 +98,6 @@ static struct inode *affs_alloc_inode(struct super_block *sb)
static void affs_i_callback(struct rcu_head *head)
{
struct inode *inode = container_of(head, struct inode, i_rcu);
- INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(affs_inode_cachep, AFFS_I(inode));
}
@@ -474,7 +473,7 @@ got_root:
root_inode = affs_iget(sb, root_block);
if (IS_ERR(root_inode)) {
ret = PTR_ERR(root_inode);
- goto out_error_noinode;
+ goto out_error;
}
if (AFFS_SB(sb)->s_flags & SF_INTL)
@@ -482,7 +481,7 @@ got_root:
else
sb->s_d_op = &affs_dentry_operations;
- sb->s_root = d_alloc_root(root_inode);
+ sb->s_root = d_make_root(root_inode);
if (!sb->s_root) {
printk(KERN_ERR "AFFS: Get root inode failed\n");
goto out_error;
@@ -495,9 +494,6 @@ got_root:
* Begin the cascaded cleanup ...
*/
out_error:
- if (root_inode)
- iput(root_inode);
-out_error_noinode:
kfree(sbi->s_bitmap);
affs_brelse(root_bh);
kfree(sbi->s_prefix);
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 1b0b1955001..e22dc4b4a50 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -28,9 +28,9 @@ static int afs_d_delete(const struct dentry *dentry);
static void afs_d_release(struct dentry *dentry);
static int afs_lookup_filldir(void *_cookie, const char *name, int nlen,
loff_t fpos, u64 ino, unsigned dtype);
-static int afs_create(struct inode *dir, struct dentry *dentry, int mode,
+static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
struct nameidata *nd);
-static int afs_mkdir(struct inode *dir, struct dentry *dentry, int mode);
+static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode);
static int afs_rmdir(struct inode *dir, struct dentry *dentry);
static int afs_unlink(struct inode *dir, struct dentry *dentry);
static int afs_link(struct dentry *from, struct inode *dir,
@@ -764,7 +764,7 @@ static void afs_d_release(struct dentry *dentry)
/*
* create a directory on an AFS filesystem
*/
-static int afs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
{
struct afs_file_status status;
struct afs_callback cb;
@@ -777,7 +777,7 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
dvnode = AFS_FS_I(dir);
- _enter("{%x:%u},{%s},%o",
+ _enter("{%x:%u},{%s},%ho",
dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name, mode);
ret = -ENAMETOOLONG;
@@ -948,7 +948,7 @@ error:
/*
* create a regular file on an AFS filesystem
*/
-static int afs_create(struct inode *dir, struct dentry *dentry, int mode,
+static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
struct nameidata *nd)
{
struct afs_file_status status;
@@ -962,7 +962,7 @@ static int afs_create(struct inode *dir, struct dentry *dentry, int mode,
dvnode = AFS_FS_I(dir);
- _enter("{%x:%u},{%s},%o,",
+ _enter("{%x:%u},{%s},%ho,",
dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name, mode);
ret = -ENAMETOOLONG;
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 14d89fa58fe..8f6e9234d56 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -251,7 +251,7 @@ static int afs_readpages(struct file *file, struct address_space *mapping,
ASSERT(key != NULL);
vnode = AFS_FS_I(mapping->host);
- if (vnode->flags & AFS_VNODE_DELETED) {
+ if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
_leave(" = -ESTALE");
return -ESTALE;
}
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 2f213d109c2..b960ff05ea0 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -365,10 +365,10 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call,
_debug("extract data");
if (call->count > 0) {
page = call->reply3;
- buffer = kmap_atomic(page, KM_USER0);
+ buffer = kmap_atomic(page);
ret = afs_extract_data(call, skb, last, buffer,
call->count);
- kunmap_atomic(buffer, KM_USER0);
+ kunmap_atomic(buffer);
switch (ret) {
case 0: break;
case -EAGAIN: return 0;
@@ -411,9 +411,9 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call,
if (call->count < PAGE_SIZE) {
_debug("clear");
page = call->reply3;
- buffer = kmap_atomic(page, KM_USER0);
+ buffer = kmap_atomic(page);
memset(buffer + call->count, 0, PAGE_SIZE - call->count);
- kunmap_atomic(buffer, KM_USER0);
+ kunmap_atomic(buffer);
}
_leave(" = 0 [done]");
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index d890ae3b2ce..95cffd38239 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -423,7 +423,7 @@ void afs_evict_inode(struct inode *inode)
ASSERTCMP(inode->i_ino, ==, vnode->fid.vnode);
truncate_inode_pages(&inode->i_data, 0);
- end_writeback(inode);
+ clear_inode(inode);
afs_give_up_callback(vnode);
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index d2b0888126d..a306bb6d88d 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -109,7 +109,7 @@ struct afs_call {
unsigned reply_size; /* current size of reply */
unsigned first_offset; /* offset into mapping[first] */
unsigned last_to; /* amount of mapping[last] */
- unsigned short offset; /* offset into received data store */
+ unsigned offset; /* offset into received data store */
unsigned char unmarshall; /* unmarshalling phase */
bool incoming; /* T if incoming call */
bool send_pages; /* T if data from mapping should be sent */
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index aa59184151d..298cf8919ec 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -200,9 +200,9 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
if (PageError(page))
goto error;
- buf = kmap_atomic(page, KM_USER0);
+ buf = kmap_atomic(page);
memcpy(devname, buf, size);
- kunmap_atomic(buf, KM_USER0);
+ kunmap_atomic(buf);
page_cache_release(page);
page = NULL;
}
@@ -242,7 +242,7 @@ struct vfsmount *afs_d_automount(struct path *path)
{
struct vfsmount *newmnt;
- _enter("{%s,%s}", path->mnt->mnt_devname, path->dentry->d_name.name);
+ _enter("{%s}", path->dentry->d_name.name);
newmnt = afs_mntpt_do_automount(path->dentry);
if (IS_ERR(newmnt))
@@ -252,7 +252,7 @@ struct vfsmount *afs_d_automount(struct path *path)
mnt_set_expiry(newmnt, &afs_vfsmounts);
queue_delayed_work(afs_wq, &afs_mntpt_expiry_timer,
afs_mntpt_expiry_timeout * HZ);
- _leave(" = %p {%s}", newmnt, newmnt->mnt_devname);
+ _leave(" = %p", newmnt);
return newmnt;
}
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index e45a323aebb..8ad8c2a0703 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -314,6 +314,7 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
struct msghdr msg;
struct kvec iov[1];
int ret;
+ struct sk_buff *skb;
_enter("%x,{%d},", addr->s_addr, ntohs(call->port));
@@ -380,6 +381,8 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
error_do_abort:
rxrpc_kernel_abort_call(rxcall, RX_USER_ABORT);
+ while ((skb = skb_dequeue(&call->rx_queue)))
+ afs_free_skb(skb);
rxrpc_kernel_end_call(rxcall);
call->rxcall = NULL;
error_kill_call:
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 356dcf0929e..f02b31e7e64 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -301,7 +301,6 @@ static int afs_fill_super(struct super_block *sb,
{
struct afs_super_info *as = sb->s_fs_info;
struct afs_fid fid;
- struct dentry *root = NULL;
struct inode *inode = NULL;
int ret;
@@ -327,18 +326,16 @@ static int afs_fill_super(struct super_block *sb,
set_bit(AFS_VNODE_AUTOCELL, &AFS_FS_I(inode)->flags);
ret = -ENOMEM;
- root = d_alloc_root(inode);
- if (!root)
+ sb->s_root = d_make_root(inode);
+ if (!sb->s_root)
goto error;
sb->s_d_op = &afs_fs_dentry_operations;
- sb->s_root = root;
_leave(" = 0");
return 0;
error:
- iput(inode);
_leave(" = %d", ret);
return ret;
}
@@ -495,7 +492,6 @@ static void afs_i_callback(struct rcu_head *head)
{
struct inode *inode = container_of(head, struct inode, i_rcu);
struct afs_vnode *vnode = AFS_FS_I(inode);
- INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(afs_inode_cachep, vnode);
}
diff --git a/fs/aio.c b/fs/aio.c
index 78c514cfd21..55c4c765605 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -13,7 +13,7 @@
#include <linux/errno.h>
#include <linux/time.h>
#include <linux/aio_abi.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/syscalls.h>
#include <linux/backing-dev.h>
#include <linux/uio.h>
@@ -93,9 +93,8 @@ static void aio_free_ring(struct kioctx *ctx)
put_page(info->ring_pages[i]);
if (info->mmap_size) {
- down_write(&ctx->mm->mmap_sem);
- do_munmap(ctx->mm, info->mmap_base, info->mmap_size);
- up_write(&ctx->mm->mmap_sem);
+ BUG_ON(ctx->mm != current->mm);
+ vm_munmap(info->mmap_base, info->mmap_size);
}
if (info->ring_pages && info->ring_pages != info->internal_pages)
@@ -135,9 +134,9 @@ static int aio_setup_ring(struct kioctx *ctx)
info->mmap_size = nr_pages * PAGE_SIZE;
dprintk("attempting mmap of %lu bytes\n", info->mmap_size);
down_write(&ctx->mm->mmap_sem);
- info->mmap_base = do_mmap(NULL, 0, info->mmap_size,
- PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE,
- 0);
+ info->mmap_base = do_mmap_pgoff(NULL, 0, info->mmap_size,
+ PROT_READ|PROT_WRITE,
+ MAP_ANONYMOUS|MAP_PRIVATE, 0);
if (IS_ERR((void *)info->mmap_base)) {
up_write(&ctx->mm->mmap_sem);
info->mmap_size = 0;
@@ -160,7 +159,7 @@ static int aio_setup_ring(struct kioctx *ctx)
info->nr = nr_events; /* trusted copy */
- ring = kmap_atomic(info->ring_pages[0], KM_USER0);
+ ring = kmap_atomic(info->ring_pages[0]);
ring->nr = nr_events; /* user copy */
ring->id = ctx->user_id;
ring->head = ring->tail = 0;
@@ -168,47 +167,38 @@ static int aio_setup_ring(struct kioctx *ctx)
ring->compat_features = AIO_RING_COMPAT_FEATURES;
ring->incompat_features = AIO_RING_INCOMPAT_FEATURES;
ring->header_length = sizeof(struct aio_ring);
- kunmap_atomic(ring, KM_USER0);
+ kunmap_atomic(ring);
return 0;
}
/* aio_ring_event: returns a pointer to the event at the given index from
- * kmap_atomic(, km). Release the pointer with put_aio_ring_event();
+ * kmap_atomic(). Release the pointer with put_aio_ring_event();
*/
#define AIO_EVENTS_PER_PAGE (PAGE_SIZE / sizeof(struct io_event))
#define AIO_EVENTS_FIRST_PAGE ((PAGE_SIZE - sizeof(struct aio_ring)) / sizeof(struct io_event))
#define AIO_EVENTS_OFFSET (AIO_EVENTS_PER_PAGE - AIO_EVENTS_FIRST_PAGE)
-#define aio_ring_event(info, nr, km) ({ \
+#define aio_ring_event(info, nr) ({ \
unsigned pos = (nr) + AIO_EVENTS_OFFSET; \
struct io_event *__event; \
__event = kmap_atomic( \
- (info)->ring_pages[pos / AIO_EVENTS_PER_PAGE], km); \
+ (info)->ring_pages[pos / AIO_EVENTS_PER_PAGE]); \
__event += pos % AIO_EVENTS_PER_PAGE; \
__event; \
})
-#define put_aio_ring_event(event, km) do { \
+#define put_aio_ring_event(event) do { \
struct io_event *__event = (event); \
(void)__event; \
- kunmap_atomic((void *)((unsigned long)__event & PAGE_MASK), km); \
+ kunmap_atomic((void *)((unsigned long)__event & PAGE_MASK)); \
} while(0)
static void ctx_rcu_free(struct rcu_head *head)
{
struct kioctx *ctx = container_of(head, struct kioctx, rcu_head);
- unsigned nr_events = ctx->max_reqs;
-
kmem_cache_free(kioctx_cachep, ctx);
-
- if (nr_events) {
- spin_lock(&aio_nr_lock);
- BUG_ON(aio_nr - nr_events > aio_nr);
- aio_nr -= nr_events;
- spin_unlock(&aio_nr_lock);
- }
}
/* __put_ioctx
@@ -217,23 +207,23 @@ static void ctx_rcu_free(struct rcu_head *head)
*/
static void __put_ioctx(struct kioctx *ctx)
{
+ unsigned nr_events = ctx->max_reqs;
BUG_ON(ctx->reqs_active);
- cancel_delayed_work(&ctx->wq);
- cancel_work_sync(&ctx->wq.work);
+ cancel_delayed_work_sync(&ctx->wq);
aio_free_ring(ctx);
mmdrop(ctx->mm);
ctx->mm = NULL;
+ if (nr_events) {
+ spin_lock(&aio_nr_lock);
+ BUG_ON(aio_nr - nr_events > aio_nr);
+ aio_nr -= nr_events;
+ spin_unlock(&aio_nr_lock);
+ }
pr_debug("__put_ioctx: freeing %p\n", ctx);
call_rcu(&ctx->rcu_head, ctx_rcu_free);
}
-static inline void get_ioctx(struct kioctx *kioctx)
-{
- BUG_ON(atomic_read(&kioctx->users) <= 0);
- atomic_inc(&kioctx->users);
-}
-
static inline int try_get_ioctx(struct kioctx *kioctx)
{
return atomic_inc_not_zero(&kioctx->users);
@@ -253,7 +243,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
{
struct mm_struct *mm;
struct kioctx *ctx;
- int did_sync = 0;
+ int err = -ENOMEM;
/* Prevent overflows */
if ((nr_events > (0x10000000U / sizeof(struct io_event))) ||
@@ -262,7 +252,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
return ERR_PTR(-EINVAL);
}
- if ((unsigned long)nr_events > aio_max_nr)
+ if (!nr_events || (unsigned long)nr_events > aio_max_nr)
return ERR_PTR(-EAGAIN);
ctx = kmem_cache_zalloc(kioctx_cachep, GFP_KERNEL);
@@ -273,7 +263,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
mm = ctx->mm = current->mm;
atomic_inc(&mm->mm_count);
- atomic_set(&ctx->users, 1);
+ atomic_set(&ctx->users, 2);
spin_lock_init(&ctx->ctx_lock);
spin_lock_init(&ctx->ring_info.ring_lock);
init_waitqueue_head(&ctx->wait);
@@ -286,25 +276,14 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
goto out_freectx;
/* limit the number of system wide aios */
- do {
- spin_lock_bh(&aio_nr_lock);
- if (aio_nr + nr_events > aio_max_nr ||
- aio_nr + nr_events < aio_nr)
- ctx->max_reqs = 0;
- else
- aio_nr += ctx->max_reqs;
- spin_unlock_bh(&aio_nr_lock);
- if (ctx->max_reqs || did_sync)
- break;
-
- /* wait for rcu callbacks to have completed before giving up */
- synchronize_rcu();
- did_sync = 1;
- ctx->max_reqs = nr_events;
- } while (1);
-
- if (ctx->max_reqs == 0)
+ spin_lock(&aio_nr_lock);
+ if (aio_nr + nr_events > aio_max_nr ||
+ aio_nr + nr_events < aio_nr) {
+ spin_unlock(&aio_nr_lock);
goto out_cleanup;
+ }
+ aio_nr += ctx->max_reqs;
+ spin_unlock(&aio_nr_lock);
/* now link into global list. */
spin_lock(&mm->ioctx_lock);
@@ -316,27 +295,27 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
return ctx;
out_cleanup:
- __put_ioctx(ctx);
- return ERR_PTR(-EAGAIN);
-
+ err = -EAGAIN;
+ aio_free_ring(ctx);
out_freectx:
mmdrop(mm);
kmem_cache_free(kioctx_cachep, ctx);
- ctx = ERR_PTR(-ENOMEM);
-
- dprintk("aio: error allocating ioctx %p\n", ctx);
- return ctx;
+ dprintk("aio: error allocating ioctx %d\n", err);
+ return ERR_PTR(err);
}
-/* aio_cancel_all
+/* kill_ctx
* Cancels all outstanding aio requests on an aio context. Used
* when the processes owning a context have all exited to encourage
* the rapid destruction of the kioctx.
*/
-static void aio_cancel_all(struct kioctx *ctx)
+static void kill_ctx(struct kioctx *ctx)
{
int (*cancel)(struct kiocb *, struct io_event *);
+ struct task_struct *tsk = current;
+ DECLARE_WAITQUEUE(wait, tsk);
struct io_event res;
+
spin_lock_irq(&ctx->ctx_lock);
ctx->dead = 1;
while (!list_empty(&ctx->active_reqs)) {
@@ -352,15 +331,7 @@ static void aio_cancel_all(struct kioctx *ctx)
spin_lock_irq(&ctx->ctx_lock);
}
}
- spin_unlock_irq(&ctx->ctx_lock);
-}
-static void wait_for_all_aios(struct kioctx *ctx)
-{
- struct task_struct *tsk = current;
- DECLARE_WAITQUEUE(wait, tsk);
-
- spin_lock_irq(&ctx->ctx_lock);
if (!ctx->reqs_active)
goto out;
@@ -410,19 +381,24 @@ void exit_aio(struct mm_struct *mm)
ctx = hlist_entry(mm->ioctx_list.first, struct kioctx, list);
hlist_del_rcu(&ctx->list);
- aio_cancel_all(ctx);
-
- wait_for_all_aios(ctx);
- /*
- * Ensure we don't leave the ctx on the aio_wq
- */
- cancel_work_sync(&ctx->wq.work);
+ kill_ctx(ctx);
if (1 != atomic_read(&ctx->users))
printk(KERN_DEBUG
"exit_aio:ioctx still alive: %d %d %d\n",
atomic_read(&ctx->users), ctx->dead,
ctx->reqs_active);
+ /*
+ * We don't need to bother with munmap() here -
+ * exit_mmap(mm) is coming and it'll unmap everything.
+ * Since aio_free_ring() uses non-zero ->mmap_size
+ * as indicator that it needs to unmap the area,
+ * just set it to 0; aio_free_ring() is the only
+ * place that uses ->mmap_size, so it's safe.
+ * That way we get all munmap done to current->mm -
+ * all other callers have ctx->mm == current->mm.
+ */
+ ctx->ring_info.mmap_size = 0;
put_ioctx(ctx);
}
}
@@ -476,14 +452,23 @@ static void kiocb_batch_init(struct kiocb_batch *batch, long total)
batch->count = total;
}
-static void kiocb_batch_free(struct kiocb_batch *batch)
+static void kiocb_batch_free(struct kioctx *ctx, struct kiocb_batch *batch)
{
struct kiocb *req, *n;
+ if (list_empty(&batch->head))
+ return;
+
+ spin_lock_irq(&ctx->ctx_lock);
list_for_each_entry_safe(req, n, &batch->head, ki_batch) {
list_del(&req->ki_batch);
+ list_del(&req->ki_list);
kmem_cache_free(kiocb_cachep, req);
+ ctx->reqs_active--;
}
+ if (unlikely(!ctx->reqs_active && ctx->dead))
+ wake_up_all(&ctx->wait);
+ spin_unlock_irq(&ctx->ctx_lock);
}
/*
@@ -600,11 +585,16 @@ static void aio_fput_routine(struct work_struct *data)
fput(req->ki_filp);
/* Link the iocb into the context's free list */
+ rcu_read_lock();
spin_lock_irq(&ctx->ctx_lock);
really_put_req(ctx, req);
+ /*
+ * at that point ctx might've been killed, but actual
+ * freeing is RCU'd
+ */
spin_unlock_irq(&ctx->ctx_lock);
+ rcu_read_unlock();
- put_ioctx(ctx);
spin_lock_irq(&fput_lock);
}
spin_unlock_irq(&fput_lock);
@@ -635,7 +625,6 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
* this function will be executed w/out any aio kthread wakeup.
*/
if (unlikely(!fput_atomic(req->ki_filp))) {
- get_ioctx(ctx);
spin_lock(&fput_lock);
list_add(&req->ki_list, &fput_head);
spin_unlock(&fput_lock);
@@ -913,7 +902,7 @@ static void aio_kick_handler(struct work_struct *work)
unuse_mm(mm);
set_fs(oldfs);
/*
- * we're in a worker thread already, don't use queue_delayed_work,
+ * we're in a worker thread already; no point using non-zero delay
*/
if (requeue)
queue_delayed_work(aio_wq, &ctx->wq, 0);
@@ -1012,10 +1001,10 @@ int aio_complete(struct kiocb *iocb, long res, long res2)
if (kiocbIsCancelled(iocb))
goto put_rq;
- ring = kmap_atomic(info->ring_pages[0], KM_IRQ1);
+ ring = kmap_atomic(info->ring_pages[0]);
tail = info->tail;
- event = aio_ring_event(info, tail, KM_IRQ0);
+ event = aio_ring_event(info, tail);
if (++tail >= info->nr)
tail = 0;
@@ -1036,8 +1025,8 @@ int aio_complete(struct kiocb *iocb, long res, long res2)
info->tail = tail;
ring->tail = tail;
- put_aio_ring_event(event, KM_IRQ0);
- kunmap_atomic(ring, KM_IRQ1);
+ put_aio_ring_event(event);
+ kunmap_atomic(ring);
pr_debug("added to ring %p at [%lu]\n", iocb, tail);
@@ -1082,7 +1071,7 @@ static int aio_read_evt(struct kioctx *ioctx, struct io_event *ent)
unsigned long head;
int ret = 0;
- ring = kmap_atomic(info->ring_pages[0], KM_USER0);
+ ring = kmap_atomic(info->ring_pages[0]);
dprintk("in aio_read_evt h%lu t%lu m%lu\n",
(unsigned long)ring->head, (unsigned long)ring->tail,
(unsigned long)ring->nr);
@@ -1094,18 +1083,18 @@ static int aio_read_evt(struct kioctx *ioctx, struct io_event *ent)
head = ring->head % info->nr;
if (head != ring->tail) {
- struct io_event *evp = aio_ring_event(info, head, KM_USER1);
+ struct io_event *evp = aio_ring_event(info, head);
*ent = *evp;
head = (head + 1) % info->nr;
smp_mb(); /* finish reading the event before updatng the head */
ring->head = head;
ret = 1;
- put_aio_ring_event(evp, KM_USER1);
+ put_aio_ring_event(evp);
}
spin_unlock(&info->ring_lock);
out:
- kunmap_atomic(ring, KM_USER0);
+ kunmap_atomic(ring);
dprintk("leaving aio_read_evt: %d h%lu t%lu\n", ret,
(unsigned long)ring->head, (unsigned long)ring->tail);
return ret;
@@ -1283,8 +1272,7 @@ static void io_destroy(struct kioctx *ioctx)
if (likely(!was_dead))
put_ioctx(ioctx); /* twice for the list */
- aio_cancel_all(ioctx);
- wait_for_all_aios(ioctx);
+ kill_ctx(ioctx);
/*
* Wake up any waiters. The setting of ctx->dead must be seen
@@ -1292,7 +1280,6 @@ static void io_destroy(struct kioctx *ioctx)
* locking done by the above calls to ensure this consistency.
*/
wake_up_all(&ioctx->wait);
- put_ioctx(ioctx); /* once for the lookup */
}
/* sys_io_setup:
@@ -1329,11 +1316,9 @@ SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp)
ret = PTR_ERR(ioctx);
if (!IS_ERR(ioctx)) {
ret = put_user(ioctx->user_id, ctxp);
- if (!ret)
- return 0;
-
- get_ioctx(ioctx); /* io_destroy() expects us to hold a ref */
- io_destroy(ioctx);
+ if (ret)
+ io_destroy(ioctx);
+ put_ioctx(ioctx);
}
out:
@@ -1351,6 +1336,7 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx)
struct kioctx *ioctx = lookup_ioctx(ctx);
if (likely(NULL != ioctx)) {
io_destroy(ioctx);
+ put_ioctx(ioctx);
return 0;
}
pr_debug("EINVAL: io_destroy: invalid context id\n");
@@ -1460,13 +1446,17 @@ static ssize_t aio_setup_vectored_rw(int type, struct kiocb *kiocb, bool compat)
ret = compat_rw_copy_check_uvector(type,
(struct compat_iovec __user *)kiocb->ki_buf,
kiocb->ki_nbytes, 1, &kiocb->ki_inline_vec,
- &kiocb->ki_iovec, 1);
+ &kiocb->ki_iovec);
else
#endif
ret = rw_copy_check_uvector(type,
(struct iovec __user *)kiocb->ki_buf,
kiocb->ki_nbytes, 1, &kiocb->ki_inline_vec,
- &kiocb->ki_iovec, 1);
+ &kiocb->ki_iovec);
+ if (ret < 0)
+ goto out;
+
+ ret = rw_verify_area(type, kiocb->ki_filp, &kiocb->ki_pos, ret);
if (ret < 0)
goto out;
@@ -1481,11 +1471,17 @@ out:
return ret;
}
-static ssize_t aio_setup_single_vector(struct kiocb *kiocb)
+static ssize_t aio_setup_single_vector(int type, struct file * file, struct kiocb *kiocb)
{
+ int bytes;
+
+ bytes = rw_verify_area(type, file, &kiocb->ki_pos, kiocb->ki_left);
+ if (bytes < 0)
+ return bytes;
+
kiocb->ki_iovec = &kiocb->ki_inline_vec;
kiocb->ki_iovec->iov_base = kiocb->ki_buf;
- kiocb->ki_iovec->iov_len = kiocb->ki_left;
+ kiocb->ki_iovec->iov_len = bytes;
kiocb->ki_nr_segs = 1;
kiocb->ki_cur_seg = 0;
return 0;
@@ -1510,10 +1506,7 @@ static ssize_t aio_setup_iocb(struct kiocb *kiocb, bool compat)
if (unlikely(!access_ok(VERIFY_WRITE, kiocb->ki_buf,
kiocb->ki_left)))
break;
- ret = security_file_permission(file, MAY_READ);
- if (unlikely(ret))
- break;
- ret = aio_setup_single_vector(kiocb);
+ ret = aio_setup_single_vector(READ, file, kiocb);
if (ret)
break;
ret = -EINVAL;
@@ -1528,10 +1521,7 @@ static ssize_t aio_setup_iocb(struct kiocb *kiocb, bool compat)
if (unlikely(!access_ok(VERIFY_READ, kiocb->ki_buf,
kiocb->ki_left)))
break;
- ret = security_file_permission(file, MAY_WRITE);
- if (unlikely(ret))
- break;
- ret = aio_setup_single_vector(kiocb);
+ ret = aio_setup_single_vector(WRITE, file, kiocb);
if (ret)
break;
ret = -EINVAL;
@@ -1542,9 +1532,6 @@ static ssize_t aio_setup_iocb(struct kiocb *kiocb, bool compat)
ret = -EBADF;
if (unlikely(!(file->f_mode & FMODE_READ)))
break;
- ret = security_file_permission(file, MAY_READ);
- if (unlikely(ret))
- break;
ret = aio_setup_vectored_rw(READ, kiocb, compat);
if (ret)
break;
@@ -1556,9 +1543,6 @@ static ssize_t aio_setup_iocb(struct kiocb *kiocb, bool compat)
ret = -EBADF;
if (unlikely(!(file->f_mode & FMODE_WRITE)))
break;
- ret = security_file_permission(file, MAY_WRITE);
- if (unlikely(ret))
- break;
ret = aio_setup_vectored_rw(WRITE, kiocb, compat);
if (ret)
break;
@@ -1742,7 +1726,7 @@ long do_io_submit(aio_context_t ctx_id, long nr,
}
blk_finish_plug(&plug);
- kiocb_batch_free(&batch);
+ kiocb_batch_free(ctx, &batch);
put_ioctx(ctx);
return i ? i : ret;
}
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index f11e43ed907..28d39fb84ae 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -39,19 +39,6 @@ static const struct dentry_operations anon_inodefs_dentry_operations = {
.d_dname = anon_inodefs_dname,
};
-static struct dentry *anon_inodefs_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data)
-{
- return mount_pseudo(fs_type, "anon_inode:", NULL,
- &anon_inodefs_dentry_operations, ANON_INODE_FS_MAGIC);
-}
-
-static struct file_system_type anon_inode_fs_type = {
- .name = "anon_inodefs",
- .mount = anon_inodefs_mount,
- .kill_sb = kill_anon_super,
-};
-
/*
* nop .set_page_dirty method so that people can use .page_mkwrite on
* anon inodes.
@@ -65,6 +52,62 @@ static const struct address_space_operations anon_aops = {
.set_page_dirty = anon_set_page_dirty,
};
+/*
+ * A single inode exists for all anon_inode files. Contrary to pipes,
+ * anon_inode inodes have no associated per-instance data, so we need
+ * only allocate one of them.
+ */
+static struct inode *anon_inode_mkinode(struct super_block *s)
+{
+ struct inode *inode = new_inode_pseudo(s);
+
+ if (!inode)
+ return ERR_PTR(-ENOMEM);
+
+ inode->i_ino = get_next_ino();
+ inode->i_fop = &anon_inode_fops;
+
+ inode->i_mapping->a_ops = &anon_aops;
+
+ /*
+ * Mark the inode dirty from the very beginning,
+ * that way it will never be moved to the dirty
+ * list because mark_inode_dirty() will think
+ * that it already _is_ on the dirty list.
+ */
+ inode->i_state = I_DIRTY;
+ inode->i_mode = S_IRUSR | S_IWUSR;
+ inode->i_uid = current_fsuid();
+ inode->i_gid = current_fsgid();
+ inode->i_flags |= S_PRIVATE;
+ inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+ return inode;
+}
+
+static struct dentry *anon_inodefs_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
+{
+ struct dentry *root;
+ root = mount_pseudo(fs_type, "anon_inode:", NULL,
+ &anon_inodefs_dentry_operations, ANON_INODE_FS_MAGIC);
+ if (!IS_ERR(root)) {
+ struct super_block *s = root->d_sb;
+ anon_inode_inode = anon_inode_mkinode(s);
+ if (IS_ERR(anon_inode_inode)) {
+ dput(root);
+ deactivate_locked_super(s);
+ root = ERR_CAST(anon_inode_inode);
+ }
+ }
+ return root;
+}
+
+static struct file_system_type anon_inode_fs_type = {
+ .name = "anon_inodefs",
+ .mount = anon_inodefs_mount,
+ .kill_sb = kill_anon_super,
+};
+
/**
* anon_inode_getfile - creates a new file instance by hooking it up to an
* anonymous inode, and a dentry that describe the "class"
@@ -180,38 +223,6 @@ err_put_unused_fd:
}
EXPORT_SYMBOL_GPL(anon_inode_getfd);
-/*
- * A single inode exists for all anon_inode files. Contrary to pipes,
- * anon_inode inodes have no associated per-instance data, so we need
- * only allocate one of them.
- */
-static struct inode *anon_inode_mkinode(void)
-{
- struct inode *inode = new_inode_pseudo(anon_inode_mnt->mnt_sb);
-
- if (!inode)
- return ERR_PTR(-ENOMEM);
-
- inode->i_ino = get_next_ino();
- inode->i_fop = &anon_inode_fops;
-
- inode->i_mapping->a_ops = &anon_aops;
-
- /*
- * Mark the inode dirty from the very beginning,
- * that way it will never be moved to the dirty
- * list because mark_inode_dirty() will think
- * that it already _is_ on the dirty list.
- */
- inode->i_state = I_DIRTY;
- inode->i_mode = S_IRUSR | S_IWUSR;
- inode->i_uid = current_fsuid();
- inode->i_gid = current_fsgid();
- inode->i_flags |= S_PRIVATE;
- inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
- return inode;
-}
-
static int __init anon_inode_init(void)
{
int error;
@@ -224,16 +235,8 @@ static int __init anon_inode_init(void)
error = PTR_ERR(anon_inode_mnt);
goto err_unregister_filesystem;
}
- anon_inode_inode = anon_inode_mkinode();
- if (IS_ERR(anon_inode_inode)) {
- error = PTR_ERR(anon_inode_inode);
- goto err_mntput;
- }
-
return 0;
-err_mntput:
- kern_unmount(anon_inode_mnt);
err_unregister_filesystem:
unregister_filesystem(&anon_inode_fs_type);
err_exit:
diff --git a/fs/attr.c b/fs/attr.c
index 7ee7ba48831..0da90951d27 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -5,7 +5,7 @@
* changes by Thomas Schoebel-Theuer
*/
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/time.h>
#include <linux/mm.h>
#include <linux/string.h>
@@ -47,14 +47,14 @@ int inode_change_ok(const struct inode *inode, struct iattr *attr)
/* Make sure a caller can chown. */
if ((ia_valid & ATTR_UID) &&
- (current_fsuid() != inode->i_uid ||
- attr->ia_uid != inode->i_uid) && !capable(CAP_CHOWN))
+ (!uid_eq(current_fsuid(), inode->i_uid) ||
+ !uid_eq(attr->ia_uid, inode->i_uid)) && !capable(CAP_CHOWN))
return -EPERM;
/* Make sure caller can chgrp. */
if ((ia_valid & ATTR_GID) &&
- (current_fsuid() != inode->i_uid ||
- (!in_group_p(attr->ia_gid) && attr->ia_gid != inode->i_gid)) &&
+ (!uid_eq(current_fsuid(), inode->i_uid) ||
+ (!in_group_p(attr->ia_gid) && !gid_eq(attr->ia_gid, inode->i_gid))) &&
!capable(CAP_CHOWN))
return -EPERM;
@@ -166,7 +166,7 @@ EXPORT_SYMBOL(setattr_copy);
int notify_change(struct dentry * dentry, struct iattr * attr)
{
struct inode *inode = dentry->d_inode;
- mode_t mode = inode->i_mode;
+ umode_t mode = inode->i_mode;
int error;
struct timespec now;
unsigned int ia_valid = attr->ia_valid;
@@ -176,8 +176,13 @@ int notify_change(struct dentry * dentry, struct iattr * attr)
return -EPERM;
}
+ if ((ia_valid & ATTR_SIZE) && IS_I_VERSION(inode)) {
+ if (attr->ia_size != inode->i_size)
+ inode_inc_iversion(inode);
+ }
+
if ((ia_valid & ATTR_MODE)) {
- mode_t amode = attr->ia_mode;
+ umode_t amode = attr->ia_mode;
/* Flag setting protected by i_mutex */
if (is_sxid(amode))
inode->i_flags &= ~S_NOSEC;
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 326dc08d3e3..908e1845541 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -116,6 +116,7 @@ struct autofs_sb_info {
int needs_reghost;
struct super_block *sb;
struct mutex wq_mutex;
+ struct mutex pipe_mutex;
spinlock_t fs_lock;
struct autofs_wait_queue *queues; /* Wait queue pointer */
spinlock_t lookup_lock;
@@ -155,7 +156,7 @@ static inline int autofs4_ispending(struct dentry *dentry)
return 0;
}
-struct inode *autofs4_get_inode(struct super_block *, mode_t);
+struct inode *autofs4_get_inode(struct super_block *, umode_t);
void autofs4_free_ino(struct autofs_info *);
/* Expiration */
@@ -268,6 +269,17 @@ int autofs4_fill_super(struct super_block *, void *, int);
struct autofs_info *autofs4_new_ino(struct autofs_sb_info *);
void autofs4_clean_ino(struct autofs_info *);
+static inline int autofs_prepare_pipe(struct file *pipe)
+{
+ if (!pipe->f_op || !pipe->f_op->write)
+ return -EINVAL;
+ if (!S_ISFIFO(pipe->f_dentry->d_inode->i_mode))
+ return -EINVAL;
+ /* We want a packet pipe */
+ pipe->f_flags |= O_DIRECT;
+ return 0;
+}
+
/* Queue management functions */
int autofs4_wait(struct autofs_sb_info *,struct dentry *, enum autofs_notify);
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index 509fe1eb66a..aa9103f8f01 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -194,7 +194,7 @@ static int find_autofs_mount(const char *pathname,
return err;
err = -ENOENT;
while (path.dentry == path.mnt->mnt_root) {
- if (path.mnt->mnt_sb->s_magic == AUTOFS_SUPER_MAGIC) {
+ if (path.dentry->d_sb->s_magic == AUTOFS_SUPER_MAGIC) {
if (test(&path, data)) {
path_get(&path);
if (!err) /* already found some */
@@ -212,7 +212,7 @@ static int find_autofs_mount(const char *pathname,
static int test_by_dev(struct path *path, void *p)
{
- return path->mnt->mnt_sb->s_dev == *(dev_t *)p;
+ return path->dentry->d_sb->s_dev == *(dev_t *)p;
}
static int test_by_type(struct path *path, void *p)
@@ -230,7 +230,7 @@ static void autofs_dev_ioctl_fd_install(unsigned int fd, struct file *file)
fdt = files_fdtable(files);
BUG_ON(fdt->fd[fd] != NULL);
rcu_assign_pointer(fdt->fd[fd], file);
- FD_SET(fd, fdt->close_on_exec);
+ __set_close_on_exec(fd, fdt);
spin_unlock(&files->file_lock);
}
@@ -376,7 +376,7 @@ static int autofs_dev_ioctl_setpipefd(struct file *fp,
err = -EBADF;
goto out;
}
- if (!pipe->f_op || !pipe->f_op->write) {
+ if (autofs_prepare_pipe(pipe) < 0) {
err = -EPIPE;
fput(pipe);
goto out;
@@ -538,11 +538,11 @@ static int autofs_dev_ioctl_ismountpoint(struct file *fp,
err = find_autofs_mount(name, &path, test_by_type, &type);
if (err)
goto out;
- devid = new_encode_dev(path.mnt->mnt_sb->s_dev);
+ devid = new_encode_dev(path.dentry->d_sb->s_dev);
err = 0;
if (path.mnt->mnt_root == path.dentry) {
err = 1;
- magic = path.mnt->mnt_sb->s_magic;
+ magic = path.dentry->d_sb->s_magic;
}
} else {
dev_t dev = sbi->sb->s_dev;
@@ -556,7 +556,7 @@ static int autofs_dev_ioctl_ismountpoint(struct file *fp,
err = have_submounts(path.dentry);
if (follow_down_one(&path))
- magic = path.mnt->mnt_sb->s_magic;
+ magic = path.dentry->d_sb->s_magic;
}
param->ismountpoint.out.devid = devid;
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 450f529a4ea..1feb68ecef9 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -124,6 +124,7 @@ start:
/* Negative dentry - try next */
if (!simple_positive(q)) {
spin_unlock(&p->d_lock);
+ lock_set_subclass(&q->d_lock.dep_map, 0, _RET_IP_);
p = q;
goto again;
}
@@ -186,6 +187,7 @@ again:
/* Negative dentry - try next */
if (!simple_positive(ret)) {
spin_unlock(&p->d_lock);
+ lock_set_subclass(&ret->d_lock.dep_map, 0, _RET_IP_);
p = ret;
goto again;
}
diff --git a/fs/autofs4/init.c b/fs/autofs4/init.c
index c038727b405..cddc74b9cdb 100644
--- a/fs/autofs4/init.c
+++ b/fs/autofs4/init.c
@@ -31,11 +31,11 @@ static int __init init_autofs4_fs(void)
{
int err;
+ autofs_dev_ioctl_init();
+
err = register_filesystem(&autofs_fs_type);
if (err)
- return err;
-
- autofs_dev_ioctl_init();
+ autofs_dev_ioctl_exit();
return err;
}
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 8179f1ab817..8a4fed8ead3 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -70,10 +70,10 @@ out_kill_sb:
kill_litter_super(sb);
}
-static int autofs4_show_options(struct seq_file *m, struct vfsmount *mnt)
+static int autofs4_show_options(struct seq_file *m, struct dentry *root)
{
- struct autofs_sb_info *sbi = autofs4_sbi(mnt->mnt_sb);
- struct inode *root_inode = mnt->mnt_sb->s_root->d_inode;
+ struct autofs_sb_info *sbi = autofs4_sbi(root->d_sb);
+ struct inode *root_inode = root->d_sb->s_root->d_inode;
if (!sbi)
return 0;
@@ -100,7 +100,7 @@ static int autofs4_show_options(struct seq_file *m, struct vfsmount *mnt)
static void autofs4_evict_inode(struct inode *inode)
{
- end_writeback(inode);
+ clear_inode(inode);
kfree(inode->i_private);
}
@@ -225,6 +225,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
sbi->min_proto = 0;
sbi->max_proto = 0;
mutex_init(&sbi->wq_mutex);
+ mutex_init(&sbi->pipe_mutex);
spin_lock_init(&sbi->fs_lock);
sbi->queues = NULL;
spin_lock_init(&sbi->lookup_lock);
@@ -244,12 +245,9 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
if (!ino)
goto fail_free;
root_inode = autofs4_get_inode(s, S_IFDIR | 0755);
- if (!root_inode)
- goto fail_ino;
-
- root = d_alloc_root(root_inode);
+ root = d_make_root(root_inode);
if (!root)
- goto fail_iput;
+ goto fail_ino;
pipe = NULL;
root->d_fsdata = ino;
@@ -292,7 +290,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
printk("autofs: could not open pipe file descriptor\n");
goto fail_dput;
}
- if (!pipe->f_op || !pipe->f_op->write)
+ if (autofs_prepare_pipe(pipe) < 0)
goto fail_fput;
sbi->pipe = pipe;
sbi->pipefd = pipefd;
@@ -314,9 +312,6 @@ fail_fput:
fail_dput:
dput(root);
goto fail_free;
-fail_iput:
- printk("autofs: get root dentry failed\n");
- iput(root_inode);
fail_ino:
kfree(ino);
fail_free:
@@ -326,7 +321,7 @@ fail_unlock:
return -EINVAL;
}
-struct inode *autofs4_get_inode(struct super_block *sb, mode_t mode)
+struct inode *autofs4_get_inode(struct super_block *sb, umode_t mode)
{
struct inode *inode = new_inode(sb);
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index f55ae23b137..75e5f1c8e02 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -26,7 +26,7 @@
static int autofs4_dir_symlink(struct inode *,struct dentry *,const char *);
static int autofs4_dir_unlink(struct inode *,struct dentry *);
static int autofs4_dir_rmdir(struct inode *,struct dentry *);
-static int autofs4_dir_mkdir(struct inode *,struct dentry *,int);
+static int autofs4_dir_mkdir(struct inode *,struct dentry *,umode_t);
static long autofs4_root_ioctl(struct file *,unsigned int,unsigned long);
#ifdef CONFIG_COMPAT
static long autofs4_root_compat_ioctl(struct file *,unsigned int,unsigned long);
@@ -699,7 +699,7 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry)
return 0;
}
-static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
{
struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb);
struct autofs_info *ino = autofs4_dentry_ino(dentry);
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index e1fbdeef85d..da8876d38a7 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -56,26 +56,27 @@ void autofs4_catatonic_mode(struct autofs_sb_info *sbi)
mutex_unlock(&sbi->wq_mutex);
}
-static int autofs4_write(struct file *file, const void *addr, int bytes)
+static int autofs4_write(struct autofs_sb_info *sbi,
+ struct file *file, const void *addr, int bytes)
{
unsigned long sigpipe, flags;
mm_segment_t fs;
const char *data = (const char *)addr;
ssize_t wr = 0;
- /** WARNING: this is not safe for writing more than PIPE_BUF bytes! **/
-
sigpipe = sigismember(&current->pending.signal, SIGPIPE);
/* Save pointer to user space and point back to kernel space */
fs = get_fs();
set_fs(KERNEL_DS);
+ mutex_lock(&sbi->pipe_mutex);
while (bytes &&
(wr = file->f_op->write(file,data,bytes,&file->f_pos)) > 0) {
data += wr;
bytes -= wr;
}
+ mutex_unlock(&sbi->pipe_mutex);
set_fs(fs);
@@ -110,6 +111,13 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
pkt.hdr.proto_version = sbi->version;
pkt.hdr.type = type;
+ mutex_lock(&sbi->wq_mutex);
+
+ /* Check if we have become catatonic */
+ if (sbi->catatonic) {
+ mutex_unlock(&sbi->wq_mutex);
+ return;
+ }
switch (type) {
/* Kernel protocol v4 missing and expire packets */
case autofs_ptype_missing:
@@ -163,22 +171,18 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
}
default:
printk("autofs4_notify_daemon: bad type %d!\n", type);
+ mutex_unlock(&sbi->wq_mutex);
return;
}
- /* Check if we have become catatonic */
- mutex_lock(&sbi->wq_mutex);
- if (!sbi->catatonic) {
- pipe = sbi->pipe;
- get_file(pipe);
- }
+ pipe = sbi->pipe;
+ get_file(pipe);
+
mutex_unlock(&sbi->wq_mutex);
- if (pipe) {
- if (autofs4_write(pipe, &pkt, pktsz))
- autofs4_catatonic_mode(sbi);
- fput(pipe);
- }
+ if (autofs4_write(sbi, pipe, &pkt, pktsz))
+ autofs4_catatonic_mode(sbi);
+ fput(pipe);
}
static int autofs4_getpath(struct autofs_sb_info *sbi,
@@ -257,6 +261,9 @@ static int validate_request(struct autofs_wait_queue **wait,
struct autofs_wait_queue *wq;
struct autofs_info *ino;
+ if (sbi->catatonic)
+ return -ENOENT;
+
/* Wait in progress, continue; */
wq = autofs4_find_wait(sbi, qstr);
if (wq) {
@@ -289,6 +296,9 @@ static int validate_request(struct autofs_wait_queue **wait,
if (mutex_lock_interruptible(&sbi->wq_mutex))
return -EINTR;
+ if (sbi->catatonic)
+ return -ENOENT;
+
wq = autofs4_find_wait(sbi, qstr);
if (wq) {
*wait = wq;
@@ -389,7 +399,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
ret = validate_request(&wq, sbi, &qstr, dentry, notify);
if (ret <= 0) {
- if (ret == 0)
+ if (ret != -EINTR)
mutex_unlock(&sbi->wq_mutex);
kfree(qstr.name);
return ret;
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index 9205cf25f1c..1b35d6bd06b 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -9,7 +9,7 @@
*/
#include <linux/fs.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/stat.h>
#include <linux/time.h>
#include <linux/namei.h>
@@ -173,7 +173,7 @@ static const struct file_operations bad_file_ops =
};
static int bad_inode_create (struct inode *dir, struct dentry *dentry,
- int mode, struct nameidata *nd)
+ umode_t mode, struct nameidata *nd)
{
return -EIO;
}
@@ -202,7 +202,7 @@ static int bad_inode_symlink (struct inode *dir, struct dentry *dentry,
}
static int bad_inode_mkdir(struct inode *dir, struct dentry *dentry,
- int mode)
+ umode_t mode)
{
return -EIO;
}
@@ -213,7 +213,7 @@ static int bad_inode_rmdir (struct inode *dir, struct dentry *dentry)
}
static int bad_inode_mknod (struct inode *dir, struct dentry *dentry,
- int mode, dev_t rdev)
+ umode_t mode, dev_t rdev)
{
return -EIO;
}
@@ -292,7 +292,6 @@ static const struct inode_operations bad_inode_ops =
.getxattr = bad_inode_getxattr,
.listxattr = bad_inode_listxattr,
.removexattr = bad_inode_removexattr,
- /* truncate_range returns void */
};
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 8342ca67abc..e18da23d42b 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -286,7 +286,6 @@ befs_alloc_inode(struct super_block *sb)
static void befs_i_callback(struct rcu_head *head)
{
struct inode *inode = container_of(head, struct inode, i_rcu);
- INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(befs_inode_cachep, BEFS_I(inode));
}
@@ -853,9 +852,8 @@ befs_fill_super(struct super_block *sb, void *data, int silent)
ret = PTR_ERR(root);
goto unacquire_priv_sbp;
}
- sb->s_root = d_alloc_root(root);
+ sb->s_root = d_make_root(root);
if (!sb->s_root) {
- iput(root);
befs_error(sb, "get root inode failed");
goto unacquire_priv_sbp;
}
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index 9cc07401947..d12c7966db2 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -84,7 +84,7 @@ const struct file_operations bfs_dir_operations = {
extern void dump_imap(const char *, struct super_block *);
-static int bfs_create(struct inode *dir, struct dentry *dentry, int mode,
+static int bfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
struct nameidata *nd)
{
int err;
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 697af5bf70b..9870417c26e 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -174,7 +174,7 @@ static void bfs_evict_inode(struct inode *inode)
truncate_inode_pages(&inode->i_data, 0);
invalidate_inode_buffers(inode);
- end_writeback(inode);
+ clear_inode(inode);
if (inode->i_nlink)
return;
@@ -251,7 +251,6 @@ static struct inode *bfs_alloc_inode(struct super_block *sb)
static void bfs_i_callback(struct rcu_head *head)
{
struct inode *inode = container_of(head, struct inode, i_rcu);
- INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(bfs_inode_cachep, BFS_I(inode));
}
@@ -368,9 +367,8 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
ret = PTR_ERR(inode);
goto out2;
}
- s->s_root = d_alloc_root(inode);
+ s->s_root = d_make_root(inode);
if (!s->s_root) {
- iput(inode);
ret = -ENOMEM;
goto out2;
}
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index a6395bdb26a..d146e181d10 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -26,7 +26,6 @@
#include <linux/coredump.h>
#include <linux/slab.h>
-#include <asm/system.h>
#include <asm/uaccess.h>
#include <asm/cacheflush.h>
#include <asm/a.out-core.h>
@@ -51,9 +50,7 @@ static int set_brk(unsigned long start, unsigned long end)
end = PAGE_ALIGN(end);
if (end > start) {
unsigned long addr;
- down_write(&current->mm->mmap_sem);
- addr = do_brk(start, end - start);
- up_write(&current->mm->mmap_sem);
+ addr = vm_brk(start, end - start);
if (BAD_ADDR(addr))
return addr;
}
@@ -259,8 +256,14 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
current->mm->free_area_cache = current->mm->mmap_base;
current->mm->cached_hole_size = 0;
+ retval = setup_arg_pages(bprm, STACK_TOP, EXSTACK_DEFAULT);
+ if (retval < 0) {
+ /* Someone check-me: is this error path enough? */
+ send_sig(SIGKILL, current, 0);
+ return retval;
+ }
+
install_exec_creds(bprm);
- current->flags &= ~PF_FORKNOEXEC;
if (N_MAGIC(ex) == OMAGIC) {
unsigned long text_addr, map_size;
@@ -275,9 +278,7 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
pos = 32;
map_size = ex.a_text+ex.a_data;
#endif
- down_write(&current->mm->mmap_sem);
- error = do_brk(text_addr & PAGE_MASK, map_size);
- up_write(&current->mm->mmap_sem);
+ error = vm_brk(text_addr & PAGE_MASK, map_size);
if (error != (text_addr & PAGE_MASK)) {
send_sig(SIGKILL, current, 0);
return error;
@@ -308,9 +309,7 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
if (!bprm->file->f_op->mmap||((fd_offset & ~PAGE_MASK) != 0)) {
loff_t pos = fd_offset;
- down_write(&current->mm->mmap_sem);
- do_brk(N_TXTADDR(ex), ex.a_text+ex.a_data);
- up_write(&current->mm->mmap_sem);
+ vm_brk(N_TXTADDR(ex), ex.a_text+ex.a_data);
bprm->file->f_op->read(bprm->file,
(char __user *)N_TXTADDR(ex),
ex.a_text+ex.a_data, &pos);
@@ -320,24 +319,20 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
goto beyond_if;
}
- down_write(&current->mm->mmap_sem);
- error = do_mmap(bprm->file, N_TXTADDR(ex), ex.a_text,
+ error = vm_mmap(bprm->file, N_TXTADDR(ex), ex.a_text,
PROT_READ | PROT_EXEC,
MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE,
fd_offset);
- up_write(&current->mm->mmap_sem);
if (error != N_TXTADDR(ex)) {
send_sig(SIGKILL, current, 0);
return error;
}
- down_write(&current->mm->mmap_sem);
- error = do_mmap(bprm->file, N_DATADDR(ex), ex.a_data,
+ error = vm_mmap(bprm->file, N_DATADDR(ex), ex.a_data,
PROT_READ | PROT_WRITE | PROT_EXEC,
MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE,
fd_offset + ex.a_text);
- up_write(&current->mm->mmap_sem);
if (error != N_DATADDR(ex)) {
send_sig(SIGKILL, current, 0);
return error;
@@ -352,13 +347,6 @@ beyond_if:
return retval;
}
- retval = setup_arg_pages(bprm, STACK_TOP, EXSTACK_DEFAULT);
- if (retval < 0) {
- /* Someone check-me: is this error path enough? */
- send_sig(SIGKILL, current, 0);
- return retval;
- }
-
current->mm->start_stack =
(unsigned long) create_aout_tables((char __user *) bprm->p, bprm);
#ifdef __alpha__
@@ -414,9 +402,7 @@ static int load_aout_library(struct file *file)
"N_TXTOFF is not page aligned. Please convert library: %s\n",
file->f_path.dentry->d_name.name);
}
- down_write(&current->mm->mmap_sem);
- do_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss);
- up_write(&current->mm->mmap_sem);
+ vm_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss);
file->f_op->read(file, (char __user *)start_addr,
ex.a_text + ex.a_data, &pos);
@@ -427,12 +413,10 @@ static int load_aout_library(struct file *file)
goto out;
}
/* Now use mmap to map the library into memory. */
- down_write(&current->mm->mmap_sem);
- error = do_mmap(file, start_addr, ex.a_text + ex.a_data,
+ error = vm_mmap(file, start_addr, ex.a_text + ex.a_data,
PROT_READ | PROT_WRITE | PROT_EXEC,
MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
N_TXTOFF(ex));
- up_write(&current->mm->mmap_sem);
retval = error;
if (error != start_addr)
goto out;
@@ -440,9 +424,7 @@ static int load_aout_library(struct file *file)
len = PAGE_ALIGN(ex.a_text + ex.a_data);
bss = ex.a_text + ex.a_data + ex.a_bss;
if (bss > len) {
- down_write(&current->mm->mmap_sem);
- error = do_brk(start_addr + len, bss - len);
- up_write(&current->mm->mmap_sem);
+ error = vm_brk(start_addr + len, bss - len);
retval = error;
if (error != start_addr + len)
goto out;
@@ -454,7 +436,8 @@ out:
static int __init init_aout_binfmt(void)
{
- return register_binfmt(&aout_format);
+ register_binfmt(&aout_format);
+ return 0;
}
static void __exit exit_aout_binfmt(void)
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 21ac5ee4b43..1b52956afe3 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -35,6 +35,7 @@
#include <asm/uaccess.h>
#include <asm/param.h>
#include <asm/page.h>
+#include <asm/exec.h>
static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
static int load_elf_library(struct file *);
@@ -81,9 +82,7 @@ static int set_brk(unsigned long start, unsigned long end)
end = ELF_PAGEALIGN(end);
if (end > start) {
unsigned long addr;
- down_write(&current->mm->mmap_sem);
- addr = do_brk(start, end - start);
- up_write(&current->mm->mmap_sem);
+ addr = vm_brk(start, end - start);
if (BAD_ADDR(addr))
return addr;
}
@@ -227,10 +226,10 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
NEW_AUX_ENT(AT_BASE, interp_load_addr);
NEW_AUX_ENT(AT_FLAGS, 0);
NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
- NEW_AUX_ENT(AT_UID, cred->uid);
- NEW_AUX_ENT(AT_EUID, cred->euid);
- NEW_AUX_ENT(AT_GID, cred->gid);
- NEW_AUX_ENT(AT_EGID, cred->egid);
+ NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
+ NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
+ NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
+ NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
NEW_AUX_ENT(AT_EXECFN, bprm->exec);
@@ -330,7 +329,6 @@ static unsigned long elf_map(struct file *filep, unsigned long addr,
if (!size)
return addr;
- down_write(&current->mm->mmap_sem);
/*
* total_size is the size of the ELF (interpreter) image.
* The _first_ mmap needs to know the full size, otherwise
@@ -341,13 +339,12 @@ static unsigned long elf_map(struct file *filep, unsigned long addr,
*/
if (total_size) {
total_size = ELF_PAGEALIGN(total_size);
- map_addr = do_mmap(filep, addr, total_size, prot, type, off);
+ map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
if (!BAD_ADDR(map_addr))
- do_munmap(current->mm, map_addr+size, total_size-size);
+ vm_munmap(map_addr+size, total_size-size);
} else
- map_addr = do_mmap(filep, addr, size, prot, type, off);
+ map_addr = vm_mmap(filep, addr, size, prot, type, off);
- up_write(&current->mm->mmap_sem);
return(map_addr);
}
@@ -513,9 +510,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
/* Map the last of the bss segment */
- down_write(&current->mm->mmap_sem);
- error = do_brk(elf_bss, last_bss - elf_bss);
- up_write(&current->mm->mmap_sem);
+ error = vm_brk(elf_bss, last_bss - elf_bss);
if (BAD_ADDR(error))
goto out_close;
}
@@ -712,7 +707,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
goto out_free_dentry;
/* OK, This is the point of no return */
- current->flags &= ~PF_FORKNOEXEC;
current->mm->def_flags = def_flags;
/* Do this immediately, since STACK_TOP as used in setup_arg_pages
@@ -794,7 +788,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
* default mmap base, as well as whatever program they
* might try to exec. This is because the brk will
* follow the loader, and is not movable. */
-#if defined(CONFIG_X86) || defined(CONFIG_ARM)
+#ifdef CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE
/* Memory randomization might have been switched off
* in runtime via sysctl.
* If that is the case, retain the original non-zero
@@ -934,7 +928,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
install_exec_creds(bprm);
- current->flags &= ~PF_FORKNOEXEC;
retval = create_elf_tables(bprm, &loc->elf_ex,
load_addr, interp_load_addr);
if (retval < 0) {
@@ -963,10 +956,8 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
and some applications "depend" upon this behavior.
Since we do not have the power to recompile these, we
emulate the SVr4 behavior. Sigh. */
- down_write(&current->mm->mmap_sem);
- error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
+ error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
MAP_FIXED | MAP_PRIVATE, 0);
- up_write(&current->mm->mmap_sem);
}
#ifdef ELF_PLAT_INIT
@@ -1051,8 +1042,7 @@ static int load_elf_library(struct file *file)
eppnt++;
/* Now use mmap to map the library into memory. */
- down_write(&current->mm->mmap_sem);
- error = do_mmap(file,
+ error = vm_mmap(file,
ELF_PAGESTART(eppnt->p_vaddr),
(eppnt->p_filesz +
ELF_PAGEOFFSET(eppnt->p_vaddr)),
@@ -1060,7 +1050,6 @@ static int load_elf_library(struct file *file)
MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
(eppnt->p_offset -
ELF_PAGEOFFSET(eppnt->p_vaddr)));
- up_write(&current->mm->mmap_sem);
if (error != ELF_PAGESTART(eppnt->p_vaddr))
goto out_free_ph;
@@ -1073,11 +1062,8 @@ static int load_elf_library(struct file *file)
len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
ELF_MIN_ALIGN - 1);
bss = eppnt->p_memsz + eppnt->p_vaddr;
- if (bss > len) {
- down_write(&current->mm->mmap_sem);
- do_brk(len, bss - len);
- up_write(&current->mm->mmap_sem);
- }
+ if (bss > len)
+ vm_brk(len, bss - len);
error = 0;
out_free_ph:
@@ -1095,6 +1081,29 @@ out:
*/
/*
+ * The purpose of always_dump_vma() is to make sure that special kernel mappings
+ * that are useful for post-mortem analysis are included in every core dump.
+ * In that way we ensure that the core dump is fully interpretable later
+ * without matching up the same kernel and hardware config to see what PC values
+ * meant. These special mappings include - vDSO, vsyscall, and other
+ * architecture specific mappings
+ */
+static bool always_dump_vma(struct vm_area_struct *vma)
+{
+ /* Any vsyscall mappings? */
+ if (vma == get_gate_vma(vma->vm_mm))
+ return true;
+ /*
+ * arch_vma_name() returns non-NULL for special architecture mappings,
+ * such as vDSO sections.
+ */
+ if (arch_vma_name(vma))
+ return true;
+
+ return false;
+}
+
+/*
* Decide what to dump of a segment, part, all or none.
*/
static unsigned long vma_dump_size(struct vm_area_struct *vma,
@@ -1102,10 +1111,13 @@ static unsigned long vma_dump_size(struct vm_area_struct *vma,
{
#define FILTER(type) (mm_flags & (1UL << MMF_DUMP_##type))
- /* The vma can be set up to tell us the answer directly. */
- if (vma->vm_flags & VM_ALWAYSDUMP)
+ /* always dump the vdso and vsyscall sections */
+ if (always_dump_vma(vma))
goto whole;
+ if (vma->vm_flags & VM_NODUMP)
+ return 0;
+
/* Hugetlb memory check */
if (vma->vm_flags & VM_HUGETLB) {
if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
@@ -1342,8 +1354,8 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
psinfo->pr_flag = p->flags;
rcu_read_lock();
cred = __task_cred(p);
- SET_UID(psinfo->pr_uid, cred->uid);
- SET_GID(psinfo->pr_gid, cred->gid);
+ SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
+ SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
rcu_read_unlock();
strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
@@ -1390,6 +1402,22 @@ static void do_thread_regset_writeback(struct task_struct *task,
regset->writeback(task, regset, 1);
}
+#ifndef PR_REG_SIZE
+#define PR_REG_SIZE(S) sizeof(S)
+#endif
+
+#ifndef PRSTATUS_SIZE
+#define PRSTATUS_SIZE(S) sizeof(S)
+#endif
+
+#ifndef PR_REG_PTR
+#define PR_REG_PTR(S) (&((S)->pr_reg))
+#endif
+
+#ifndef SET_PR_FPVALID
+#define SET_PR_FPVALID(S, V) ((S)->pr_fpvalid = (V))
+#endif
+
static int fill_thread_core_info(struct elf_thread_core_info *t,
const struct user_regset_view *view,
long signr, size_t *total)
@@ -1404,11 +1432,11 @@ static int fill_thread_core_info(struct elf_thread_core_info *t,
*/
fill_prstatus(&t->prstatus, t->task, signr);
(void) view->regsets[0].get(t->task, &view->regsets[0],
- 0, sizeof(t->prstatus.pr_reg),
- &t->prstatus.pr_reg, NULL);
+ 0, PR_REG_SIZE(t->prstatus.pr_reg),
+ PR_REG_PTR(&t->prstatus), NULL);
fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
- sizeof(t->prstatus), &t->prstatus);
+ PRSTATUS_SIZE(t->prstatus), &t->prstatus);
*total += notesize(&t->notes[0]);
do_thread_regset_writeback(t->task, &view->regsets[0]);
@@ -1421,7 +1449,7 @@ static int fill_thread_core_info(struct elf_thread_core_info *t,
for (i = 1; i < view->n; ++i) {
const struct user_regset *regset = &view->regsets[i];
do_thread_regset_writeback(t->task, regset);
- if (regset->core_note_type &&
+ if (regset->core_note_type && regset->get &&
(!regset->active || regset->active(t->task, regset))) {
int ret;
size_t size = regset->n * regset->size;
@@ -1438,7 +1466,7 @@ static int fill_thread_core_info(struct elf_thread_core_info *t,
regset->core_note_type,
size, data);
else {
- t->prstatus.pr_fpvalid = 1;
+ SET_PR_FPVALID(&t->prstatus, 1);
fill_note(&t->notes[i], "CORE",
NT_PRFPREG, size, data);
}
@@ -2077,7 +2105,8 @@ out:
static int __init init_elf_binfmt(void)
{
- return register_binfmt(&elf_format);
+ register_binfmt(&elf_format);
+ return 0;
}
static void __exit exit_elf_binfmt(void)
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 30745f459fa..3d77cf81ba3 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -39,6 +39,7 @@
#include <asm/uaccess.h>
#include <asm/param.h>
#include <asm/pgalloc.h>
+#include <asm/exec.h>
typedef char *elf_caddr_t;
@@ -91,7 +92,8 @@ static struct linux_binfmt elf_fdpic_format = {
static int __init init_elf_fdpic_binfmt(void)
{
- return register_binfmt(&elf_fdpic_format);
+ register_binfmt(&elf_fdpic_format);
+ return 0;
}
static void __exit exit_elf_fdpic_binfmt(void)
@@ -334,8 +336,6 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
current->mm->context.exec_fdpic_loadmap = 0;
current->mm->context.interp_fdpic_loadmap = 0;
- current->flags &= ~PF_FORKNOEXEC;
-
#ifdef CONFIG_MMU
elf_fdpic_arch_lay_out_mm(&exec_params,
&interp_params,
@@ -390,21 +390,17 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
(executable_stack == EXSTACK_DEFAULT && VM_STACK_FLAGS & VM_EXEC))
stack_prot |= PROT_EXEC;
- down_write(&current->mm->mmap_sem);
- current->mm->start_brk = do_mmap(NULL, 0, stack_size, stack_prot,
+ current->mm->start_brk = vm_mmap(NULL, 0, stack_size, stack_prot,
MAP_PRIVATE | MAP_ANONYMOUS |
MAP_UNINITIALIZED | MAP_GROWSDOWN,
0);
if (IS_ERR_VALUE(current->mm->start_brk)) {
- up_write(&current->mm->mmap_sem);
retval = current->mm->start_brk;
current->mm->start_brk = 0;
goto error_kill;
}
- up_write(&current->mm->mmap_sem);
-
current->mm->brk = current->mm->start_brk;
current->mm->context.end_brk = current->mm->start_brk;
current->mm->context.end_brk +=
@@ -413,7 +409,6 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
#endif
install_exec_creds(bprm);
- current->flags &= ~PF_FORKNOEXEC;
if (create_elf_fdpic_tables(bprm, current->mm,
&exec_params, &interp_params) < 0)
goto error_kill;
@@ -632,10 +627,10 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm,
NEW_AUX_ENT(AT_BASE, interp_params->elfhdr_addr);
NEW_AUX_ENT(AT_FLAGS, 0);
NEW_AUX_ENT(AT_ENTRY, exec_params->entry_addr);
- NEW_AUX_ENT(AT_UID, (elf_addr_t) cred->uid);
- NEW_AUX_ENT(AT_EUID, (elf_addr_t) cred->euid);
- NEW_AUX_ENT(AT_GID, (elf_addr_t) cred->gid);
- NEW_AUX_ENT(AT_EGID, (elf_addr_t) cred->egid);
+ NEW_AUX_ENT(AT_UID, (elf_addr_t) from_kuid_munged(cred->user_ns, cred->uid));
+ NEW_AUX_ENT(AT_EUID, (elf_addr_t) from_kuid_munged(cred->user_ns, cred->euid));
+ NEW_AUX_ENT(AT_GID, (elf_addr_t) from_kgid_munged(cred->user_ns, cred->gid));
+ NEW_AUX_ENT(AT_EGID, (elf_addr_t) from_kgid_munged(cred->user_ns, cred->egid));
NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
NEW_AUX_ENT(AT_EXECFN, bprm->exec);
@@ -956,10 +951,8 @@ static int elf_fdpic_map_file_constdisp_on_uclinux(
if (params->flags & ELF_FDPIC_FLAG_EXECUTABLE)
mflags |= MAP_EXECUTABLE;
- down_write(&mm->mmap_sem);
- maddr = do_mmap(NULL, load_addr, top - base,
+ maddr = vm_mmap(NULL, load_addr, top - base,
PROT_READ | PROT_WRITE | PROT_EXEC, mflags, 0);
- up_write(&mm->mmap_sem);
if (IS_ERR_VALUE(maddr))
return (int) maddr;
@@ -1097,10 +1090,8 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params,
/* create the mapping */
disp = phdr->p_vaddr & ~PAGE_MASK;
- down_write(&mm->mmap_sem);
- maddr = do_mmap(file, maddr, phdr->p_memsz + disp, prot, flags,
+ maddr = vm_mmap(file, maddr, phdr->p_memsz + disp, prot, flags,
phdr->p_offset - disp);
- up_write(&mm->mmap_sem);
kdebug("mmap[%d] <file> sz=%lx pr=%x fl=%x of=%lx --> %08lx",
loop, phdr->p_memsz + disp, prot, flags,
@@ -1144,10 +1135,8 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params,
unsigned long xmaddr;
flags |= MAP_FIXED | MAP_ANONYMOUS;
- down_write(&mm->mmap_sem);
- xmaddr = do_mmap(NULL, xaddr, excess - excess1,
+ xmaddr = vm_mmap(NULL, xaddr, excess - excess1,
prot, flags, 0);
- up_write(&mm->mmap_sem);
kdebug("mmap[%d] <anon>"
" ad=%lx sz=%lx pr=%x fl=%x of=0 --> %08lx",
@@ -1432,8 +1421,8 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
psinfo->pr_flag = p->flags;
rcu_read_lock();
cred = __task_cred(p);
- SET_UID(psinfo->pr_uid, cred->uid);
- SET_GID(psinfo->pr_gid, cred->gid);
+ SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
+ SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
rcu_read_unlock();
strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
diff --git a/fs/binfmt_em86.c b/fs/binfmt_em86.c
index b8e8b0acf9b..2790c7e1912 100644
--- a/fs/binfmt_em86.c
+++ b/fs/binfmt_em86.c
@@ -100,7 +100,8 @@ static struct linux_binfmt em86_format = {
static int __init init_em86_binfmt(void)
{
- return register_binfmt(&em86_format);
+ register_binfmt(&em86_format);
+ return 0;
}
static void __exit exit_em86_binfmt(void)
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 1bffbe0ed77..178cb70acc2 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -15,7 +15,7 @@
* JAN/99 -- coded full program relocation (gerg@snapgear.com)
*/
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/mm.h>
@@ -37,7 +37,6 @@
#include <linux/syscalls.h>
#include <asm/byteorder.h>
-#include <asm/system.h>
#include <asm/uaccess.h>
#include <asm/unaligned.h>
#include <asm/cacheflush.h>
@@ -543,10 +542,8 @@ static int load_flat_file(struct linux_binprm * bprm,
*/
DBG_FLT("BINFMT_FLAT: ROM mapping of file (we hope)\n");
- down_write(&current->mm->mmap_sem);
- textpos = do_mmap(bprm->file, 0, text_len, PROT_READ|PROT_EXEC,
+ textpos = vm_mmap(bprm->file, 0, text_len, PROT_READ|PROT_EXEC,
MAP_PRIVATE|MAP_EXECUTABLE, 0);
- up_write(&current->mm->mmap_sem);
if (!textpos || IS_ERR_VALUE(textpos)) {
if (!textpos)
textpos = (unsigned long) -ENOMEM;
@@ -557,17 +554,15 @@ static int load_flat_file(struct linux_binprm * bprm,
len = data_len + extra + MAX_SHARED_LIBS * sizeof(unsigned long);
len = PAGE_ALIGN(len);
- down_write(&current->mm->mmap_sem);
- realdatastart = do_mmap(0, 0, len,
+ realdatastart = vm_mmap(0, 0, len,
PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE, 0);
- up_write(&current->mm->mmap_sem);
if (realdatastart == 0 || IS_ERR_VALUE(realdatastart)) {
if (!realdatastart)
realdatastart = (unsigned long) -ENOMEM;
printk("Unable to allocate RAM for process data, errno %d\n",
(int)-realdatastart);
- do_munmap(current->mm, textpos, text_len);
+ vm_munmap(textpos, text_len);
ret = realdatastart;
goto err;
}
@@ -591,8 +586,8 @@ static int load_flat_file(struct linux_binprm * bprm,
}
if (IS_ERR_VALUE(result)) {
printk("Unable to read data+bss, errno %d\n", (int)-result);
- do_munmap(current->mm, textpos, text_len);
- do_munmap(current->mm, realdatastart, len);
+ vm_munmap(textpos, text_len);
+ vm_munmap(realdatastart, len);
ret = result;
goto err;
}
@@ -604,10 +599,8 @@ static int load_flat_file(struct linux_binprm * bprm,
len = text_len + data_len + extra + MAX_SHARED_LIBS * sizeof(unsigned long);
len = PAGE_ALIGN(len);
- down_write(&current->mm->mmap_sem);
- textpos = do_mmap(0, 0, len,
+ textpos = vm_mmap(0, 0, len,
PROT_READ | PROT_EXEC | PROT_WRITE, MAP_PRIVATE, 0);
- up_write(&current->mm->mmap_sem);
if (!textpos || IS_ERR_VALUE(textpos)) {
if (!textpos)
@@ -661,7 +654,7 @@ static int load_flat_file(struct linux_binprm * bprm,
}
if (IS_ERR_VALUE(result)) {
printk("Unable to read code+data+bss, errno %d\n",(int)-result);
- do_munmap(current->mm, textpos, text_len + data_len + extra +
+ vm_munmap(textpos, text_len + data_len + extra +
MAX_SHARED_LIBS * sizeof(unsigned long));
ret = result;
goto err;
@@ -902,7 +895,6 @@ static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs)
libinfo.lib_list[j].start_data:UNLOADED_LIB;
install_exec_creds(bprm);
- current->flags &= ~PF_FORKNOEXEC;
set_binfmt(&flat_format);
@@ -950,7 +942,8 @@ static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs)
static int __init init_flat_binfmt(void)
{
- return register_binfmt(&flat_format);
+ register_binfmt(&flat_format);
+ return 0;
}
/****************************************************************************/
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 1e9edbdeda7..790b3cddca6 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -19,6 +19,7 @@
#include <linux/module.h>
#include <linux/init.h>
#include <linux/sched.h>
+#include <linux/magic.h>
#include <linux/binfmts.h>
#include <linux/slab.h>
#include <linux/ctype.h>
@@ -504,7 +505,7 @@ static struct inode *bm_get_inode(struct super_block *sb, int mode)
static void bm_evict_inode(struct inode *inode)
{
- end_writeback(inode);
+ clear_inode(inode);
kfree(inode->i_private);
}
@@ -560,7 +561,7 @@ static ssize_t bm_entry_write(struct file *file, const char __user *buffer,
break;
case 2: set_bit(Enabled, &e->flags);
break;
- case 3: root = dget(file->f_path.mnt->mnt_sb->s_root);
+ case 3: root = dget(file->f_path.dentry->d_sb->s_root);
mutex_lock(&root->d_inode->i_mutex);
kill_node(e);
@@ -587,7 +588,7 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer,
Node *e;
struct inode *inode;
struct dentry *root, *dentry;
- struct super_block *sb = file->f_path.mnt->mnt_sb;
+ struct super_block *sb = file->f_path.dentry->d_sb;
int err = 0;
e = create_entry(buffer, count);
@@ -666,7 +667,7 @@ static ssize_t bm_status_write(struct file * file, const char __user * buffer,
switch (res) {
case 1: enabled = 0; break;
case 2: enabled = 1; break;
- case 3: root = dget(file->f_path.mnt->mnt_sb->s_root);
+ case 3: root = dget(file->f_path.dentry->d_sb->s_root);
mutex_lock(&root->d_inode->i_mutex);
while (!list_empty(&entries))
@@ -699,7 +700,7 @@ static int bm_fill_super(struct super_block * sb, void * data, int silent)
[3] = {"register", &bm_register_operations, S_IWUSR},
/* last one */ {""}
};
- int err = simple_fill_super(sb, 0x42494e4d, bm_files);
+ int err = simple_fill_super(sb, BINFMTFS_MAGIC, bm_files);
if (!err)
sb->s_op = &s_ops;
return err;
@@ -726,11 +727,8 @@ static struct file_system_type bm_fs_type = {
static int __init init_misc_binfmt(void)
{
int err = register_filesystem(&bm_fs_type);
- if (!err) {
- err = insert_binfmt(&misc_format);
- if (err)
- unregister_filesystem(&bm_fs_type);
- }
+ if (!err)
+ insert_binfmt(&misc_format);
return err;
}
diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c
index 396a9884591..d3b8c1f6315 100644
--- a/fs/binfmt_script.c
+++ b/fs/binfmt_script.c
@@ -105,7 +105,8 @@ static struct linux_binfmt script_format = {
static int __init init_script_binfmt(void)
{
- return register_binfmt(&script_format);
+ register_binfmt(&script_format);
+ return 0;
}
static void __exit exit_script_binfmt(void)
diff --git a/fs/binfmt_som.c b/fs/binfmt_som.c
index cc8560f6c9b..4517aaff61b 100644
--- a/fs/binfmt_som.c
+++ b/fs/binfmt_som.c
@@ -147,10 +147,8 @@ static int map_som_binary(struct file *file,
code_size = SOM_PAGEALIGN(hpuxhdr->exec_tsize);
current->mm->start_code = code_start;
current->mm->end_code = code_start + code_size;
- down_write(&current->mm->mmap_sem);
- retval = do_mmap(file, code_start, code_size, prot,
+ retval = vm_mmap(file, code_start, code_size, prot,
flags, SOM_PAGESTART(hpuxhdr->exec_tfile));
- up_write(&current->mm->mmap_sem);
if (retval < 0 && retval > -1024)
goto out;
@@ -158,20 +156,16 @@ static int map_som_binary(struct file *file,
data_size = SOM_PAGEALIGN(hpuxhdr->exec_dsize);
current->mm->start_data = data_start;
current->mm->end_data = bss_start = data_start + data_size;
- down_write(&current->mm->mmap_sem);
- retval = do_mmap(file, data_start, data_size,
+ retval = vm_mmap(file, data_start, data_size,
prot | PROT_WRITE, flags,
SOM_PAGESTART(hpuxhdr->exec_dfile));
- up_write(&current->mm->mmap_sem);
if (retval < 0 && retval > -1024)
goto out;
som_brk = bss_start + SOM_PAGEALIGN(hpuxhdr->exec_bsize);
current->mm->start_brk = current->mm->brk = som_brk;
- down_write(&current->mm->mmap_sem);
- retval = do_mmap(NULL, bss_start, som_brk - bss_start,
+ retval = vm_mmap(NULL, bss_start, som_brk - bss_start,
prot | PROT_WRITE, MAP_FIXED | MAP_PRIVATE, 0);
- up_write(&current->mm->mmap_sem);
if (retval > 0 || retval < -1024)
retval = 0;
out:
@@ -225,7 +219,6 @@ load_som_binary(struct linux_binprm * bprm, struct pt_regs * regs)
goto out_free;
/* OK, This is the point of no return */
- current->flags &= ~PF_FORKNOEXEC;
current->personality = PER_HPUX;
setup_new_exec(bprm);
@@ -289,7 +282,8 @@ static int load_som_library(struct file *f)
static int __init init_som_binfmt(void)
{
- return register_binfmt(&som_format);
+ register_binfmt(&som_format);
+ return 0;
}
static void __exit exit_som_binfmt(void)
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index c2183f3917c..e85c04b9f61 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -357,7 +357,7 @@ static void bio_integrity_generate(struct bio *bio)
bix.sector_size = bi->sector_size;
bio_for_each_segment(bv, bio, i) {
- void *kaddr = kmap_atomic(bv->bv_page, KM_USER0);
+ void *kaddr = kmap_atomic(bv->bv_page);
bix.data_buf = kaddr + bv->bv_offset;
bix.data_size = bv->bv_len;
bix.prot_buf = prot_buf;
@@ -371,7 +371,7 @@ static void bio_integrity_generate(struct bio *bio)
total += sectors * bi->tuple_size;
BUG_ON(total > bio->bi_integrity->bip_size);
- kunmap_atomic(kaddr, KM_USER0);
+ kunmap_atomic(kaddr);
}
}
@@ -498,7 +498,7 @@ static int bio_integrity_verify(struct bio *bio)
bix.sector_size = bi->sector_size;
bio_for_each_segment(bv, bio, i) {
- void *kaddr = kmap_atomic(bv->bv_page, KM_USER0);
+ void *kaddr = kmap_atomic(bv->bv_page);
bix.data_buf = kaddr + bv->bv_offset;
bix.data_size = bv->bv_len;
bix.prot_buf = prot_buf;
@@ -507,7 +507,7 @@ static int bio_integrity_verify(struct bio *bio)
ret = bi->verify_fn(&bix);
if (ret) {
- kunmap_atomic(kaddr, KM_USER0);
+ kunmap_atomic(kaddr);
return ret;
}
@@ -517,7 +517,7 @@ static int bio_integrity_verify(struct bio *bio)
total += sectors * bi->tuple_size;
BUG_ON(total > bio->bi_integrity->bip_size);
- kunmap_atomic(kaddr, KM_USER0);
+ kunmap_atomic(kaddr);
}
return ret;
diff --git a/fs/bio.c b/fs/bio.c
index b1fe82cf88c..73922abba83 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -19,12 +19,14 @@
#include <linux/swap.h>
#include <linux/bio.h>
#include <linux/blkdev.h>
+#include <linux/iocontext.h>
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/kernel.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/mempool.h>
#include <linux/workqueue.h>
+#include <linux/cgroup.h>
#include <scsi/sg.h> /* for struct sg_iovec */
#include <trace/events/block.h>
@@ -418,6 +420,7 @@ void bio_put(struct bio *bio)
* last put frees it
*/
if (atomic_dec_and_test(&bio->bi_cnt)) {
+ bio_disassociate_task(bio);
bio->bi_next = NULL;
bio->bi_destructor(bio);
}
@@ -507,11 +510,12 @@ int bio_get_nr_vecs(struct block_device *bdev)
struct request_queue *q = bdev_get_queue(bdev);
int nr_pages;
- nr_pages = ((queue_max_sectors(q) << 9) + PAGE_SIZE - 1) >> PAGE_SHIFT;
- if (nr_pages > queue_max_segments(q))
- nr_pages = queue_max_segments(q);
+ nr_pages = min_t(unsigned,
+ queue_max_segments(q),
+ queue_max_sectors(q) / (PAGE_SIZE >> 9) + 1);
+
+ return min_t(unsigned, nr_pages, BIO_MAX_PAGES);
- return nr_pages;
}
EXPORT_SYMBOL(bio_get_nr_vecs);
@@ -1645,6 +1649,64 @@ bad:
}
EXPORT_SYMBOL(bioset_create);
+#ifdef CONFIG_BLK_CGROUP
+/**
+ * bio_associate_current - associate a bio with %current
+ * @bio: target bio
+ *
+ * Associate @bio with %current if it hasn't been associated yet. Block
+ * layer will treat @bio as if it were issued by %current no matter which
+ * task actually issues it.
+ *
+ * This function takes an extra reference of @task's io_context and blkcg
+ * which will be put when @bio is released. The caller must own @bio,
+ * ensure %current->io_context exists, and is responsible for synchronizing
+ * calls to this function.
+ */
+int bio_associate_current(struct bio *bio)
+{
+ struct io_context *ioc;
+ struct cgroup_subsys_state *css;
+
+ if (bio->bi_ioc)
+ return -EBUSY;
+
+ ioc = current->io_context;
+ if (!ioc)
+ return -ENOENT;
+
+ /* acquire active ref on @ioc and associate */
+ get_io_context_active(ioc);
+ bio->bi_ioc = ioc;
+
+ /* associate blkcg if exists */
+ rcu_read_lock();
+ css = task_subsys_state(current, blkio_subsys_id);
+ if (css && css_tryget(css))
+ bio->bi_css = css;
+ rcu_read_unlock();
+
+ return 0;
+}
+
+/**
+ * bio_disassociate_task - undo bio_associate_current()
+ * @bio: target bio
+ */
+void bio_disassociate_task(struct bio *bio)
+{
+ if (bio->bi_ioc) {
+ put_io_context(bio->bi_ioc);
+ bio->bi_ioc = NULL;
+ }
+ if (bio->bi_css) {
+ css_put(bio->bi_css);
+ bio->bi_css = NULL;
+ }
+}
+
+#endif /* CONFIG_BLK_CGROUP */
+
static void __init biovec_init_slabs(void)
{
int i;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index b07f1da1de4..c2bbe1fb132 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -16,7 +16,9 @@
#include <linux/blkdev.h>
#include <linux/module.h>
#include <linux/blkpg.h>
+#include <linux/magic.h>
#include <linux/buffer_head.h>
+#include <linux/swap.h>
#include <linux/pagevec.h>
#include <linux/writeback.h>
#include <linux/mpage.h>
@@ -24,7 +26,7 @@
#include <linux/uio.h>
#include <linux/namei.h>
#include <linux/log2.h>
-#include <linux/kmemleak.h>
+#include <linux/cleancache.h>
#include <asm/uaccess.h>
#include "internal.h"
@@ -68,7 +70,7 @@ static void bdev_inode_switch_bdi(struct inode *inode,
spin_unlock(&dst->wb.list_lock);
}
-static sector_t max_block(struct block_device *bdev)
+sector_t blkdev_max_block(struct block_device *bdev)
{
sector_t retval = ~((sector_t)0);
loff_t sz = i_size_read(bdev->bd_inode);
@@ -82,13 +84,35 @@ static sector_t max_block(struct block_device *bdev)
}
/* Kill _all_ buffers and pagecache , dirty or not.. */
-static void kill_bdev(struct block_device *bdev)
+void kill_bdev(struct block_device *bdev)
{
- if (bdev->bd_inode->i_mapping->nrpages == 0)
+ struct address_space *mapping = bdev->bd_inode->i_mapping;
+
+ if (mapping->nrpages == 0)
return;
+
invalidate_bh_lrus();
- truncate_inode_pages(bdev->bd_inode->i_mapping, 0);
+ truncate_inode_pages(mapping, 0);
}
+EXPORT_SYMBOL(kill_bdev);
+
+/* Invalidate clean unused buffers and pagecache. */
+void invalidate_bdev(struct block_device *bdev)
+{
+ struct address_space *mapping = bdev->bd_inode->i_mapping;
+
+ if (mapping->nrpages == 0)
+ return;
+
+ invalidate_bh_lrus();
+ lru_add_drain_all(); /* make sure all lru add caches are flushed */
+ invalidate_mapping_pages(mapping, 0, -1);
+ /* 99% of the time, we don't need to flush the cleancache on the bdev.
+ * But, for the strange corners, lets be cautious
+ */
+ cleancache_invalidate_inode(mapping);
+}
+EXPORT_SYMBOL(invalidate_bdev);
int set_blocksize(struct block_device *bdev, int size)
{
@@ -139,7 +163,7 @@ static int
blkdev_get_block(struct inode *inode, sector_t iblock,
struct buffer_head *bh, int create)
{
- if (iblock >= max_block(I_BDEV(inode))) {
+ if (iblock >= blkdev_max_block(I_BDEV(inode))) {
if (create)
return -EIO;
@@ -161,7 +185,7 @@ static int
blkdev_get_blocks(struct inode *inode, sector_t iblock,
struct buffer_head *bh, int create)
{
- sector_t end_block = max_block(I_BDEV(inode));
+ sector_t end_block = blkdev_max_block(I_BDEV(inode));
unsigned long max_blocks = bh->b_size >> inode->i_blkbits;
if ((iblock + max_blocks) > end_block) {
@@ -425,7 +449,6 @@ static void bdev_i_callback(struct rcu_head *head)
struct inode *inode = container_of(head, struct inode, i_rcu);
struct bdev_inode *bdi = BDEV_I(inode);
- INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(bdev_cachep, bdi);
}
@@ -464,7 +487,7 @@ static void bdev_evict_inode(struct inode *inode)
struct list_head *p;
truncate_inode_pages(&inode->i_data, 0);
invalidate_inode_buffers(inode); /* is it needed here? */
- end_writeback(inode);
+ clear_inode(inode);
spin_lock(&bdev_lock);
while ( (p = bdev->bd_inodes.next) != &bdev->bd_inodes ) {
__bd_forget(list_entry(p, struct inode, i_devices));
@@ -484,7 +507,7 @@ static const struct super_operations bdev_sops = {
static struct dentry *bd_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data)
{
- return mount_pseudo(fs_type, "bdev:", &bdev_sops, NULL, 0x62646576);
+ return mount_pseudo(fs_type, "bdev:", &bdev_sops, NULL, BDEVFS_MAGIC);
}
static struct file_system_type bd_type = {
@@ -493,12 +516,12 @@ static struct file_system_type bd_type = {
.kill_sb = kill_anon_super,
};
-struct super_block *blockdev_superblock __read_mostly;
+static struct super_block *blockdev_superblock __read_mostly;
void __init bdev_cache_init(void)
{
int err;
- struct vfsmount *bd_mnt;
+ static struct vfsmount *bd_mnt;
bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode),
0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
@@ -510,12 +533,7 @@ void __init bdev_cache_init(void)
bd_mnt = kern_mount(&bd_type);
if (IS_ERR(bd_mnt))
panic("Cannot create bdev pseudo-fs");
- /*
- * This vfsmount structure is only used to obtain the
- * blockdev_superblock, so tell kmemleak not to report it.
- */
- kmemleak_not_leak(bd_mnt);
- blockdev_superblock = bd_mnt->mnt_sb; /* For writeback */
+ blockdev_superblock = bd_mnt->mnt_sb; /* For writeback */
}
/*
@@ -639,6 +657,11 @@ static struct block_device *bd_acquire(struct inode *inode)
return bdev;
}
+static inline int sb_is_blkdev_sb(struct super_block *sb)
+{
+ return sb == blockdev_superblock;
+}
+
/* Call when you free inode */
void bd_forget(struct inode *inode)
@@ -1117,6 +1140,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
mutex_lock_nested(&bdev->bd_mutex, for_part);
if (!bdev->bd_openers) {
bdev->bd_disk = disk;
+ bdev->bd_queue = disk->queue;
bdev->bd_contains = bdev;
if (!partno) {
struct backing_dev_info *bdi;
@@ -1137,6 +1161,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
disk_put_part(bdev->bd_part);
bdev->bd_part = NULL;
bdev->bd_disk = NULL;
+ bdev->bd_queue = NULL;
mutex_unlock(&bdev->bd_mutex);
disk_unblock_events(disk);
put_disk(disk);
@@ -1159,8 +1184,12 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
* The latter is necessary to prevent ghost
* partitions on a removed medium.
*/
- if (bdev->bd_invalidated && (!ret || ret == -ENOMEDIUM))
- rescan_partitions(disk, bdev);
+ if (bdev->bd_invalidated) {
+ if (!ret)
+ rescan_partitions(disk, bdev);
+ else if (ret == -ENOMEDIUM)
+ invalidate_partitions(disk, bdev);
+ }
if (ret)
goto out_clear;
} else {
@@ -1190,8 +1219,12 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
if (bdev->bd_disk->fops->open)
ret = bdev->bd_disk->fops->open(bdev, mode);
/* the same as first opener case, read comment there */
- if (bdev->bd_invalidated && (!ret || ret == -ENOMEDIUM))
- rescan_partitions(bdev->bd_disk, bdev);
+ if (bdev->bd_invalidated) {
+ if (!ret)
+ rescan_partitions(bdev->bd_disk, bdev);
+ else if (ret == -ENOMEDIUM)
+ invalidate_partitions(bdev->bd_disk, bdev);
+ }
if (ret)
goto out_unlock_bdev;
}
@@ -1210,6 +1243,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
disk_put_part(bdev->bd_part);
bdev->bd_disk = NULL;
bdev->bd_part = NULL;
+ bdev->bd_queue = NULL;
bdev_inode_switch_bdi(bdev->bd_inode, &default_backing_dev_info);
if (bdev != bdev->bd_contains)
__blkdev_put(bdev->bd_contains, mode, 1);
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig
index ecb9fd3be14..d33f01c08b6 100644
--- a/fs/btrfs/Kconfig
+++ b/fs/btrfs/Kconfig
@@ -31,3 +31,22 @@ config BTRFS_FS_POSIX_ACL
Linux website <http://acl.bestbits.at/>.
If you don't know what Access Control Lists are, say N
+
+config BTRFS_FS_CHECK_INTEGRITY
+ bool "Btrfs with integrity check tool compiled in (DANGEROUS)"
+ depends on BTRFS_FS
+ help
+ Adds code that examines all block write requests (including
+ writes of the super block). The goal is to verify that the
+ state of the filesystem on disk is always consistent, i.e.,
+ after a power-loss or kernel panic event the filesystem is
+ in a consistent state.
+
+ If the integrity check tool is included and activated in
+ the mount options, plenty of kernel memory is used, and
+ plenty of additional CPU cycles are spent. Enabling this
+ functionality is not intended for normal use.
+
+ In most cases, unless you are a btrfs developer who needs
+ to verify the integrity of (super)-block write requests
+ during the run of a regression test, say N
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index c0ddfd29c5e..0c4fa2befae 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -8,6 +8,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
export.o tree-log.o free-space-cache.o zlib.o lzo.o \
compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \
- reada.o backref.o
+ reada.o backref.o ulist.o
btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
+btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 89b156d85d6..761e2cd8fed 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -227,7 +227,11 @@ int btrfs_init_acl(struct btrfs_trans_handle *trans,
if (ret > 0) {
/* we need an acl */
ret = btrfs_set_acl(trans, inode, acl, ACL_TYPE_ACCESS);
+ } else {
+ cache_no_acl(inode);
}
+ } else {
+ cache_no_acl(inode);
}
failed:
posix_acl_release(acl);
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 0b394580d86..42704149b72 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -171,11 +171,11 @@ out:
spin_unlock_irqrestore(&workers->lock, flags);
}
-static noinline int run_ordered_completions(struct btrfs_workers *workers,
+static noinline void run_ordered_completions(struct btrfs_workers *workers,
struct btrfs_work *work)
{
if (!workers->ordered)
- return 0;
+ return;
set_bit(WORK_DONE_BIT, &work->flags);
@@ -213,7 +213,6 @@ static noinline int run_ordered_completions(struct btrfs_workers *workers,
}
spin_unlock(&workers->order_lock);
- return 0;
}
static void put_worker(struct btrfs_worker_thread *worker)
@@ -334,7 +333,7 @@ again:
if (freezing(current)) {
worker->working = 0;
spin_unlock_irq(&worker->lock);
- refrigerator();
+ try_to_freeze();
} else {
spin_unlock_irq(&worker->lock);
if (!kthread_should_stop()) {
@@ -399,7 +398,7 @@ again:
/*
* this will wait for all the worker threads to shutdown
*/
-int btrfs_stop_workers(struct btrfs_workers *workers)
+void btrfs_stop_workers(struct btrfs_workers *workers)
{
struct list_head *cur;
struct btrfs_worker_thread *worker;
@@ -427,7 +426,6 @@ int btrfs_stop_workers(struct btrfs_workers *workers)
put_worker(worker);
}
spin_unlock_irq(&workers->lock);
- return 0;
}
/*
@@ -615,14 +613,14 @@ found:
* it was taken from. It is intended for use with long running work functions
* that make some progress and want to give the cpu up for others.
*/
-int btrfs_requeue_work(struct btrfs_work *work)
+void btrfs_requeue_work(struct btrfs_work *work)
{
struct btrfs_worker_thread *worker = work->worker;
unsigned long flags;
int wake = 0;
if (test_and_set_bit(WORK_QUEUED_BIT, &work->flags))
- goto out;
+ return;
spin_lock_irqsave(&worker->lock, flags);
if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags))
@@ -649,9 +647,6 @@ int btrfs_requeue_work(struct btrfs_work *work)
if (wake)
wake_up_process(worker->task);
spin_unlock_irqrestore(&worker->lock, flags);
-out:
-
- return 0;
}
void btrfs_set_work_high_prio(struct btrfs_work *work)
diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h
index f34cc31fa3c..063698b90ce 100644
--- a/fs/btrfs/async-thread.h
+++ b/fs/btrfs/async-thread.h
@@ -111,9 +111,9 @@ struct btrfs_workers {
void btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work);
int btrfs_start_workers(struct btrfs_workers *workers);
-int btrfs_stop_workers(struct btrfs_workers *workers);
+void btrfs_stop_workers(struct btrfs_workers *workers);
void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max,
struct btrfs_workers *async_starter);
-int btrfs_requeue_work(struct btrfs_work *work);
+void btrfs_requeue_work(struct btrfs_work *work);
void btrfs_set_work_high_prio(struct btrfs_work *work);
#endif
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 22c64fff1bd..a383c18e74e 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -19,19 +19,1048 @@
#include "ctree.h"
#include "disk-io.h"
#include "backref.h"
+#include "ulist.h"
+#include "transaction.h"
+#include "delayed-ref.h"
+#include "locking.h"
-struct __data_ref {
- struct list_head list;
+struct extent_inode_elem {
u64 inum;
- u64 root;
- u64 extent_data_item_offset;
+ u64 offset;
+ struct extent_inode_elem *next;
};
-struct __shared_ref {
- struct list_head list;
+static int check_extent_in_eb(struct btrfs_key *key, struct extent_buffer *eb,
+ struct btrfs_file_extent_item *fi,
+ u64 extent_item_pos,
+ struct extent_inode_elem **eie)
+{
+ u64 data_offset;
+ u64 data_len;
+ struct extent_inode_elem *e;
+
+ data_offset = btrfs_file_extent_offset(eb, fi);
+ data_len = btrfs_file_extent_num_bytes(eb, fi);
+
+ if (extent_item_pos < data_offset ||
+ extent_item_pos >= data_offset + data_len)
+ return 1;
+
+ e = kmalloc(sizeof(*e), GFP_NOFS);
+ if (!e)
+ return -ENOMEM;
+
+ e->next = *eie;
+ e->inum = key->objectid;
+ e->offset = key->offset + (extent_item_pos - data_offset);
+ *eie = e;
+
+ return 0;
+}
+
+static int find_extent_in_eb(struct extent_buffer *eb, u64 wanted_disk_byte,
+ u64 extent_item_pos,
+ struct extent_inode_elem **eie)
+{
u64 disk_byte;
+ struct btrfs_key key;
+ struct btrfs_file_extent_item *fi;
+ int slot;
+ int nritems;
+ int extent_type;
+ int ret;
+
+ /*
+ * from the shared data ref, we only have the leaf but we need
+ * the key. thus, we must look into all items and see that we
+ * find one (some) with a reference to our extent item.
+ */
+ nritems = btrfs_header_nritems(eb);
+ for (slot = 0; slot < nritems; ++slot) {
+ btrfs_item_key_to_cpu(eb, &key, slot);
+ if (key.type != BTRFS_EXTENT_DATA_KEY)
+ continue;
+ fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
+ extent_type = btrfs_file_extent_type(eb, fi);
+ if (extent_type == BTRFS_FILE_EXTENT_INLINE)
+ continue;
+ /* don't skip BTRFS_FILE_EXTENT_PREALLOC, we can handle that */
+ disk_byte = btrfs_file_extent_disk_bytenr(eb, fi);
+ if (disk_byte != wanted_disk_byte)
+ continue;
+
+ ret = check_extent_in_eb(&key, eb, fi, extent_item_pos, eie);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
+/*
+ * this structure records all encountered refs on the way up to the root
+ */
+struct __prelim_ref {
+ struct list_head list;
+ u64 root_id;
+ struct btrfs_key key_for_search;
+ int level;
+ int count;
+ struct extent_inode_elem *inode_list;
+ u64 parent;
+ u64 wanted_disk_byte;
};
+/*
+ * the rules for all callers of this function are:
+ * - obtaining the parent is the goal
+ * - if you add a key, you must know that it is a correct key
+ * - if you cannot add the parent or a correct key, then we will look into the
+ * block later to set a correct key
+ *
+ * delayed refs
+ * ============
+ * backref type | shared | indirect | shared | indirect
+ * information | tree | tree | data | data
+ * --------------------+--------+----------+--------+----------
+ * parent logical | y | - | - | -
+ * key to resolve | - | y | y | y
+ * tree block logical | - | - | - | -
+ * root for resolving | y | y | y | y
+ *
+ * - column 1: we've the parent -> done
+ * - column 2, 3, 4: we use the key to find the parent
+ *
+ * on disk refs (inline or keyed)
+ * ==============================
+ * backref type | shared | indirect | shared | indirect
+ * information | tree | tree | data | data
+ * --------------------+--------+----------+--------+----------
+ * parent logical | y | - | y | -
+ * key to resolve | - | - | - | y
+ * tree block logical | y | y | y | y
+ * root for resolving | - | y | y | y
+ *
+ * - column 1, 3: we've the parent -> done
+ * - column 2: we take the first key from the block to find the parent
+ * (see __add_missing_keys)
+ * - column 4: we use the key to find the parent
+ *
+ * additional information that's available but not required to find the parent
+ * block might help in merging entries to gain some speed.
+ */
+
+static int __add_prelim_ref(struct list_head *head, u64 root_id,
+ struct btrfs_key *key, int level,
+ u64 parent, u64 wanted_disk_byte, int count)
+{
+ struct __prelim_ref *ref;
+
+ /* in case we're adding delayed refs, we're holding the refs spinlock */
+ ref = kmalloc(sizeof(*ref), GFP_ATOMIC);
+ if (!ref)
+ return -ENOMEM;
+
+ ref->root_id = root_id;
+ if (key)
+ ref->key_for_search = *key;
+ else
+ memset(&ref->key_for_search, 0, sizeof(ref->key_for_search));
+
+ ref->inode_list = NULL;
+ ref->level = level;
+ ref->count = count;
+ ref->parent = parent;
+ ref->wanted_disk_byte = wanted_disk_byte;
+ list_add_tail(&ref->list, head);
+
+ return 0;
+}
+
+static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
+ struct ulist *parents, int level,
+ struct btrfs_key *key_for_search, u64 time_seq,
+ u64 wanted_disk_byte,
+ const u64 *extent_item_pos)
+{
+ int ret = 0;
+ int slot;
+ struct extent_buffer *eb;
+ struct btrfs_key key;
+ struct btrfs_file_extent_item *fi;
+ struct extent_inode_elem *eie = NULL;
+ u64 disk_byte;
+
+ if (level != 0) {
+ eb = path->nodes[level];
+ ret = ulist_add(parents, eb->start, 0, GFP_NOFS);
+ if (ret < 0)
+ return ret;
+ return 0;
+ }
+
+ /*
+ * We normally enter this function with the path already pointing to
+ * the first item to check. But sometimes, we may enter it with
+ * slot==nritems. In that case, go to the next leaf before we continue.
+ */
+ if (path->slots[0] >= btrfs_header_nritems(path->nodes[0]))
+ ret = btrfs_next_old_leaf(root, path, time_seq);
+
+ while (!ret) {
+ eb = path->nodes[0];
+ slot = path->slots[0];
+
+ btrfs_item_key_to_cpu(eb, &key, slot);
+
+ if (key.objectid != key_for_search->objectid ||
+ key.type != BTRFS_EXTENT_DATA_KEY)
+ break;
+
+ fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
+ disk_byte = btrfs_file_extent_disk_bytenr(eb, fi);
+
+ if (disk_byte == wanted_disk_byte) {
+ eie = NULL;
+ if (extent_item_pos) {
+ ret = check_extent_in_eb(&key, eb, fi,
+ *extent_item_pos,
+ &eie);
+ if (ret < 0)
+ break;
+ }
+ if (!ret) {
+ ret = ulist_add(parents, eb->start,
+ (unsigned long)eie, GFP_NOFS);
+ if (ret < 0)
+ break;
+ if (!extent_item_pos) {
+ ret = btrfs_next_old_leaf(root, path,
+ time_seq);
+ continue;
+ }
+ }
+ }
+ ret = btrfs_next_old_item(root, path, time_seq);
+ }
+
+ if (ret > 0)
+ ret = 0;
+ return ret;
+}
+
+/*
+ * resolve an indirect backref in the form (root_id, key, level)
+ * to a logical address
+ */
+static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
+ int search_commit_root,
+ u64 time_seq,
+ struct __prelim_ref *ref,
+ struct ulist *parents,
+ const u64 *extent_item_pos)
+{
+ struct btrfs_path *path;
+ struct btrfs_root *root;
+ struct btrfs_key root_key;
+ struct extent_buffer *eb;
+ int ret = 0;
+ int root_level;
+ int level = ref->level;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+ path->search_commit_root = !!search_commit_root;
+
+ root_key.objectid = ref->root_id;
+ root_key.type = BTRFS_ROOT_ITEM_KEY;
+ root_key.offset = (u64)-1;
+ root = btrfs_read_fs_root_no_name(fs_info, &root_key);
+ if (IS_ERR(root)) {
+ ret = PTR_ERR(root);
+ goto out;
+ }
+
+ rcu_read_lock();
+ root_level = btrfs_header_level(root->node);
+ rcu_read_unlock();
+
+ if (root_level + 1 == level)
+ goto out;
+
+ path->lowest_level = level;
+ ret = btrfs_search_old_slot(root, &ref->key_for_search, path, time_seq);
+ pr_debug("search slot in root %llu (level %d, ref count %d) returned "
+ "%d for key (%llu %u %llu)\n",
+ (unsigned long long)ref->root_id, level, ref->count, ret,
+ (unsigned long long)ref->key_for_search.objectid,
+ ref->key_for_search.type,
+ (unsigned long long)ref->key_for_search.offset);
+ if (ret < 0)
+ goto out;
+
+ eb = path->nodes[level];
+ while (!eb) {
+ if (!level) {
+ WARN_ON(1);
+ ret = 1;
+ goto out;
+ }
+ level--;
+ eb = path->nodes[level];
+ }
+
+ ret = add_all_parents(root, path, parents, level, &ref->key_for_search,
+ time_seq, ref->wanted_disk_byte,
+ extent_item_pos);
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
+/*
+ * resolve all indirect backrefs from the list
+ */
+static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info,
+ int search_commit_root, u64 time_seq,
+ struct list_head *head,
+ const u64 *extent_item_pos)
+{
+ int err;
+ int ret = 0;
+ struct __prelim_ref *ref;
+ struct __prelim_ref *ref_safe;
+ struct __prelim_ref *new_ref;
+ struct ulist *parents;
+ struct ulist_node *node;
+ struct ulist_iterator uiter;
+
+ parents = ulist_alloc(GFP_NOFS);
+ if (!parents)
+ return -ENOMEM;
+
+ /*
+ * _safe allows us to insert directly after the current item without
+ * iterating over the newly inserted items.
+ * we're also allowed to re-assign ref during iteration.
+ */
+ list_for_each_entry_safe(ref, ref_safe, head, list) {
+ if (ref->parent) /* already direct */
+ continue;
+ if (ref->count == 0)
+ continue;
+ err = __resolve_indirect_ref(fs_info, search_commit_root,
+ time_seq, ref, parents,
+ extent_item_pos);
+ if (err) {
+ if (ret == 0)
+ ret = err;
+ continue;
+ }
+
+ /* we put the first parent into the ref at hand */
+ ULIST_ITER_INIT(&uiter);
+ node = ulist_next(parents, &uiter);
+ ref->parent = node ? node->val : 0;
+ ref->inode_list =
+ node ? (struct extent_inode_elem *)node->aux : 0;
+
+ /* additional parents require new refs being added here */
+ while ((node = ulist_next(parents, &uiter))) {
+ new_ref = kmalloc(sizeof(*new_ref), GFP_NOFS);
+ if (!new_ref) {
+ ret = -ENOMEM;
+ break;
+ }
+ memcpy(new_ref, ref, sizeof(*ref));
+ new_ref->parent = node->val;
+ new_ref->inode_list =
+ (struct extent_inode_elem *)node->aux;
+ list_add(&new_ref->list, &ref->list);
+ }
+ ulist_reinit(parents);
+ }
+
+ ulist_free(parents);
+ return ret;
+}
+
+static inline int ref_for_same_block(struct __prelim_ref *ref1,
+ struct __prelim_ref *ref2)
+{
+ if (ref1->level != ref2->level)
+ return 0;
+ if (ref1->root_id != ref2->root_id)
+ return 0;
+ if (ref1->key_for_search.type != ref2->key_for_search.type)
+ return 0;
+ if (ref1->key_for_search.objectid != ref2->key_for_search.objectid)
+ return 0;
+ if (ref1->key_for_search.offset != ref2->key_for_search.offset)
+ return 0;
+ if (ref1->parent != ref2->parent)
+ return 0;
+
+ return 1;
+}
+
+/*
+ * read tree blocks and add keys where required.
+ */
+static int __add_missing_keys(struct btrfs_fs_info *fs_info,
+ struct list_head *head)
+{
+ struct list_head *pos;
+ struct extent_buffer *eb;
+
+ list_for_each(pos, head) {
+ struct __prelim_ref *ref;
+ ref = list_entry(pos, struct __prelim_ref, list);
+
+ if (ref->parent)
+ continue;
+ if (ref->key_for_search.type)
+ continue;
+ BUG_ON(!ref->wanted_disk_byte);
+ eb = read_tree_block(fs_info->tree_root, ref->wanted_disk_byte,
+ fs_info->tree_root->leafsize, 0);
+ BUG_ON(!eb);
+ btrfs_tree_read_lock(eb);
+ if (btrfs_header_level(eb) == 0)
+ btrfs_item_key_to_cpu(eb, &ref->key_for_search, 0);
+ else
+ btrfs_node_key_to_cpu(eb, &ref->key_for_search, 0);
+ btrfs_tree_read_unlock(eb);
+ free_extent_buffer(eb);
+ }
+ return 0;
+}
+
+/*
+ * merge two lists of backrefs and adjust counts accordingly
+ *
+ * mode = 1: merge identical keys, if key is set
+ * FIXME: if we add more keys in __add_prelim_ref, we can merge more here.
+ * additionally, we could even add a key range for the blocks we
+ * looked into to merge even more (-> replace unresolved refs by those
+ * having a parent).
+ * mode = 2: merge identical parents
+ */
+static int __merge_refs(struct list_head *head, int mode)
+{
+ struct list_head *pos1;
+
+ list_for_each(pos1, head) {
+ struct list_head *n2;
+ struct list_head *pos2;
+ struct __prelim_ref *ref1;
+
+ ref1 = list_entry(pos1, struct __prelim_ref, list);
+
+ for (pos2 = pos1->next, n2 = pos2->next; pos2 != head;
+ pos2 = n2, n2 = pos2->next) {
+ struct __prelim_ref *ref2;
+ struct __prelim_ref *xchg;
+
+ ref2 = list_entry(pos2, struct __prelim_ref, list);
+
+ if (mode == 1) {
+ if (!ref_for_same_block(ref1, ref2))
+ continue;
+ if (!ref1->parent && ref2->parent) {
+ xchg = ref1;
+ ref1 = ref2;
+ ref2 = xchg;
+ }
+ ref1->count += ref2->count;
+ } else {
+ if (ref1->parent != ref2->parent)
+ continue;
+ ref1->count += ref2->count;
+ }
+ list_del(&ref2->list);
+ kfree(ref2);
+ }
+
+ }
+ return 0;
+}
+
+/*
+ * add all currently queued delayed refs from this head whose seq nr is
+ * smaller or equal that seq to the list
+ */
+static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
+ struct list_head *prefs)
+{
+ struct btrfs_delayed_extent_op *extent_op = head->extent_op;
+ struct rb_node *n = &head->node.rb_node;
+ struct btrfs_key key;
+ struct btrfs_key op_key = {0};
+ int sgn;
+ int ret = 0;
+
+ if (extent_op && extent_op->update_key)
+ btrfs_disk_key_to_cpu(&op_key, &extent_op->key);
+
+ while ((n = rb_prev(n))) {
+ struct btrfs_delayed_ref_node *node;
+ node = rb_entry(n, struct btrfs_delayed_ref_node,
+ rb_node);
+ if (node->bytenr != head->node.bytenr)
+ break;
+ WARN_ON(node->is_head);
+
+ if (node->seq > seq)
+ continue;
+
+ switch (node->action) {
+ case BTRFS_ADD_DELAYED_EXTENT:
+ case BTRFS_UPDATE_DELAYED_HEAD:
+ WARN_ON(1);
+ continue;
+ case BTRFS_ADD_DELAYED_REF:
+ sgn = 1;
+ break;
+ case BTRFS_DROP_DELAYED_REF:
+ sgn = -1;
+ break;
+ default:
+ BUG_ON(1);
+ }
+ switch (node->type) {
+ case BTRFS_TREE_BLOCK_REF_KEY: {
+ struct btrfs_delayed_tree_ref *ref;
+
+ ref = btrfs_delayed_node_to_tree_ref(node);
+ ret = __add_prelim_ref(prefs, ref->root, &op_key,
+ ref->level + 1, 0, node->bytenr,
+ node->ref_mod * sgn);
+ break;
+ }
+ case BTRFS_SHARED_BLOCK_REF_KEY: {
+ struct btrfs_delayed_tree_ref *ref;
+
+ ref = btrfs_delayed_node_to_tree_ref(node);
+ ret = __add_prelim_ref(prefs, ref->root, NULL,
+ ref->level + 1, ref->parent,
+ node->bytenr,
+ node->ref_mod * sgn);
+ break;
+ }
+ case BTRFS_EXTENT_DATA_REF_KEY: {
+ struct btrfs_delayed_data_ref *ref;
+ ref = btrfs_delayed_node_to_data_ref(node);
+
+ key.objectid = ref->objectid;
+ key.type = BTRFS_EXTENT_DATA_KEY;
+ key.offset = ref->offset;
+ ret = __add_prelim_ref(prefs, ref->root, &key, 0, 0,
+ node->bytenr,
+ node->ref_mod * sgn);
+ break;
+ }
+ case BTRFS_SHARED_DATA_REF_KEY: {
+ struct btrfs_delayed_data_ref *ref;
+
+ ref = btrfs_delayed_node_to_data_ref(node);
+
+ key.objectid = ref->objectid;
+ key.type = BTRFS_EXTENT_DATA_KEY;
+ key.offset = ref->offset;
+ ret = __add_prelim_ref(prefs, ref->root, &key, 0,
+ ref->parent, node->bytenr,
+ node->ref_mod * sgn);
+ break;
+ }
+ default:
+ WARN_ON(1);
+ }
+ BUG_ON(ret);
+ }
+
+ return 0;
+}
+
+/*
+ * add all inline backrefs for bytenr to the list
+ */
+static int __add_inline_refs(struct btrfs_fs_info *fs_info,
+ struct btrfs_path *path, u64 bytenr,
+ int *info_level, struct list_head *prefs)
+{
+ int ret = 0;
+ int slot;
+ struct extent_buffer *leaf;
+ struct btrfs_key key;
+ unsigned long ptr;
+ unsigned long end;
+ struct btrfs_extent_item *ei;
+ u64 flags;
+ u64 item_size;
+
+ /*
+ * enumerate all inline refs
+ */
+ leaf = path->nodes[0];
+ slot = path->slots[0];
+
+ item_size = btrfs_item_size_nr(leaf, slot);
+ BUG_ON(item_size < sizeof(*ei));
+
+ ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
+ flags = btrfs_extent_flags(leaf, ei);
+
+ ptr = (unsigned long)(ei + 1);
+ end = (unsigned long)ei + item_size;
+
+ if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
+ struct btrfs_tree_block_info *info;
+
+ info = (struct btrfs_tree_block_info *)ptr;
+ *info_level = btrfs_tree_block_level(leaf, info);
+ ptr += sizeof(struct btrfs_tree_block_info);
+ BUG_ON(ptr > end);
+ } else {
+ BUG_ON(!(flags & BTRFS_EXTENT_FLAG_DATA));
+ }
+
+ while (ptr < end) {
+ struct btrfs_extent_inline_ref *iref;
+ u64 offset;
+ int type;
+
+ iref = (struct btrfs_extent_inline_ref *)ptr;
+ type = btrfs_extent_inline_ref_type(leaf, iref);
+ offset = btrfs_extent_inline_ref_offset(leaf, iref);
+
+ switch (type) {
+ case BTRFS_SHARED_BLOCK_REF_KEY:
+ ret = __add_prelim_ref(prefs, 0, NULL,
+ *info_level + 1, offset,
+ bytenr, 1);
+ break;
+ case BTRFS_SHARED_DATA_REF_KEY: {
+ struct btrfs_shared_data_ref *sdref;
+ int count;
+
+ sdref = (struct btrfs_shared_data_ref *)(iref + 1);
+ count = btrfs_shared_data_ref_count(leaf, sdref);
+ ret = __add_prelim_ref(prefs, 0, NULL, 0, offset,
+ bytenr, count);
+ break;
+ }
+ case BTRFS_TREE_BLOCK_REF_KEY:
+ ret = __add_prelim_ref(prefs, offset, NULL,
+ *info_level + 1, 0,
+ bytenr, 1);
+ break;
+ case BTRFS_EXTENT_DATA_REF_KEY: {
+ struct btrfs_extent_data_ref *dref;
+ int count;
+ u64 root;
+
+ dref = (struct btrfs_extent_data_ref *)(&iref->offset);
+ count = btrfs_extent_data_ref_count(leaf, dref);
+ key.objectid = btrfs_extent_data_ref_objectid(leaf,
+ dref);
+ key.type = BTRFS_EXTENT_DATA_KEY;
+ key.offset = btrfs_extent_data_ref_offset(leaf, dref);
+ root = btrfs_extent_data_ref_root(leaf, dref);
+ ret = __add_prelim_ref(prefs, root, &key, 0, 0,
+ bytenr, count);
+ break;
+ }
+ default:
+ WARN_ON(1);
+ }
+ BUG_ON(ret);
+ ptr += btrfs_extent_inline_ref_size(type);
+ }
+
+ return 0;
+}
+
+/*
+ * add all non-inline backrefs for bytenr to the list
+ */
+static int __add_keyed_refs(struct btrfs_fs_info *fs_info,
+ struct btrfs_path *path, u64 bytenr,
+ int info_level, struct list_head *prefs)
+{
+ struct btrfs_root *extent_root = fs_info->extent_root;
+ int ret;
+ int slot;
+ struct extent_buffer *leaf;
+ struct btrfs_key key;
+
+ while (1) {
+ ret = btrfs_next_item(extent_root, path);
+ if (ret < 0)
+ break;
+ if (ret) {
+ ret = 0;
+ break;
+ }
+
+ slot = path->slots[0];
+ leaf = path->nodes[0];
+ btrfs_item_key_to_cpu(leaf, &key, slot);
+
+ if (key.objectid != bytenr)
+ break;
+ if (key.type < BTRFS_TREE_BLOCK_REF_KEY)
+ continue;
+ if (key.type > BTRFS_SHARED_DATA_REF_KEY)
+ break;
+
+ switch (key.type) {
+ case BTRFS_SHARED_BLOCK_REF_KEY:
+ ret = __add_prelim_ref(prefs, 0, NULL,
+ info_level + 1, key.offset,
+ bytenr, 1);
+ break;
+ case BTRFS_SHARED_DATA_REF_KEY: {
+ struct btrfs_shared_data_ref *sdref;
+ int count;
+
+ sdref = btrfs_item_ptr(leaf, slot,
+ struct btrfs_shared_data_ref);
+ count = btrfs_shared_data_ref_count(leaf, sdref);
+ ret = __add_prelim_ref(prefs, 0, NULL, 0, key.offset,
+ bytenr, count);
+ break;
+ }
+ case BTRFS_TREE_BLOCK_REF_KEY:
+ ret = __add_prelim_ref(prefs, key.offset, NULL,
+ info_level + 1, 0,
+ bytenr, 1);
+ break;
+ case BTRFS_EXTENT_DATA_REF_KEY: {
+ struct btrfs_extent_data_ref *dref;
+ int count;
+ u64 root;
+
+ dref = btrfs_item_ptr(leaf, slot,
+ struct btrfs_extent_data_ref);
+ count = btrfs_extent_data_ref_count(leaf, dref);
+ key.objectid = btrfs_extent_data_ref_objectid(leaf,
+ dref);
+ key.type = BTRFS_EXTENT_DATA_KEY;
+ key.offset = btrfs_extent_data_ref_offset(leaf, dref);
+ root = btrfs_extent_data_ref_root(leaf, dref);
+ ret = __add_prelim_ref(prefs, root, &key, 0, 0,
+ bytenr, count);
+ break;
+ }
+ default:
+ WARN_ON(1);
+ }
+ BUG_ON(ret);
+ }
+
+ return ret;
+}
+
+/*
+ * this adds all existing backrefs (inline backrefs, backrefs and delayed
+ * refs) for the given bytenr to the refs list, merges duplicates and resolves
+ * indirect refs to their parent bytenr.
+ * When roots are found, they're added to the roots list
+ *
+ * FIXME some caching might speed things up
+ */
+static int find_parent_nodes(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info, u64 bytenr,
+ u64 delayed_ref_seq, u64 time_seq,
+ struct ulist *refs, struct ulist *roots,
+ const u64 *extent_item_pos)
+{
+ struct btrfs_key key;
+ struct btrfs_path *path;
+ struct btrfs_delayed_ref_root *delayed_refs = NULL;
+ struct btrfs_delayed_ref_head *head;
+ int info_level = 0;
+ int ret;
+ int search_commit_root = (trans == BTRFS_BACKREF_SEARCH_COMMIT_ROOT);
+ struct list_head prefs_delayed;
+ struct list_head prefs;
+ struct __prelim_ref *ref;
+
+ INIT_LIST_HEAD(&prefs);
+ INIT_LIST_HEAD(&prefs_delayed);
+
+ key.objectid = bytenr;
+ key.type = BTRFS_EXTENT_ITEM_KEY;
+ key.offset = (u64)-1;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+ path->search_commit_root = !!search_commit_root;
+
+ /*
+ * grab both a lock on the path and a lock on the delayed ref head.
+ * We need both to get a consistent picture of how the refs look
+ * at a specified point in time
+ */
+again:
+ head = NULL;
+
+ ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 0);
+ if (ret < 0)
+ goto out;
+ BUG_ON(ret == 0);
+
+ if (trans != BTRFS_BACKREF_SEARCH_COMMIT_ROOT) {
+ /*
+ * look if there are updates for this ref queued and lock the
+ * head
+ */
+ delayed_refs = &trans->transaction->delayed_refs;
+ spin_lock(&delayed_refs->lock);
+ head = btrfs_find_delayed_ref_head(trans, bytenr);
+ if (head) {
+ if (!mutex_trylock(&head->mutex)) {
+ atomic_inc(&head->node.refs);
+ spin_unlock(&delayed_refs->lock);
+
+ btrfs_release_path(path);
+
+ /*
+ * Mutex was contended, block until it's
+ * released and try again
+ */
+ mutex_lock(&head->mutex);
+ mutex_unlock(&head->mutex);
+ btrfs_put_delayed_ref(&head->node);
+ goto again;
+ }
+ ret = __add_delayed_refs(head, delayed_ref_seq,
+ &prefs_delayed);
+ mutex_unlock(&head->mutex);
+ if (ret) {
+ spin_unlock(&delayed_refs->lock);
+ goto out;
+ }
+ }
+ spin_unlock(&delayed_refs->lock);
+ }
+
+ if (path->slots[0]) {
+ struct extent_buffer *leaf;
+ int slot;
+
+ path->slots[0]--;
+ leaf = path->nodes[0];
+ slot = path->slots[0];
+ btrfs_item_key_to_cpu(leaf, &key, slot);
+ if (key.objectid == bytenr &&
+ key.type == BTRFS_EXTENT_ITEM_KEY) {
+ ret = __add_inline_refs(fs_info, path, bytenr,
+ &info_level, &prefs);
+ if (ret)
+ goto out;
+ ret = __add_keyed_refs(fs_info, path, bytenr,
+ info_level, &prefs);
+ if (ret)
+ goto out;
+ }
+ }
+ btrfs_release_path(path);
+
+ list_splice_init(&prefs_delayed, &prefs);
+
+ ret = __add_missing_keys(fs_info, &prefs);
+ if (ret)
+ goto out;
+
+ ret = __merge_refs(&prefs, 1);
+ if (ret)
+ goto out;
+
+ ret = __resolve_indirect_refs(fs_info, search_commit_root, time_seq,
+ &prefs, extent_item_pos);
+ if (ret)
+ goto out;
+
+ ret = __merge_refs(&prefs, 2);
+ if (ret)
+ goto out;
+
+ while (!list_empty(&prefs)) {
+ ref = list_first_entry(&prefs, struct __prelim_ref, list);
+ list_del(&ref->list);
+ if (ref->count < 0)
+ WARN_ON(1);
+ if (ref->count && ref->root_id && ref->parent == 0) {
+ /* no parent == root of tree */
+ ret = ulist_add(roots, ref->root_id, 0, GFP_NOFS);
+ BUG_ON(ret < 0);
+ }
+ if (ref->count && ref->parent) {
+ struct extent_inode_elem *eie = NULL;
+ if (extent_item_pos && !ref->inode_list) {
+ u32 bsz;
+ struct extent_buffer *eb;
+ bsz = btrfs_level_size(fs_info->extent_root,
+ info_level);
+ eb = read_tree_block(fs_info->extent_root,
+ ref->parent, bsz, 0);
+ BUG_ON(!eb);
+ ret = find_extent_in_eb(eb, bytenr,
+ *extent_item_pos, &eie);
+ ref->inode_list = eie;
+ free_extent_buffer(eb);
+ }
+ ret = ulist_add_merge(refs, ref->parent,
+ (unsigned long)ref->inode_list,
+ (unsigned long *)&eie, GFP_NOFS);
+ if (!ret && extent_item_pos) {
+ /*
+ * we've recorded that parent, so we must extend
+ * its inode list here
+ */
+ BUG_ON(!eie);
+ while (eie->next)
+ eie = eie->next;
+ eie->next = ref->inode_list;
+ }
+ BUG_ON(ret < 0);
+ }
+ kfree(ref);
+ }
+
+out:
+ btrfs_free_path(path);
+ while (!list_empty(&prefs)) {
+ ref = list_first_entry(&prefs, struct __prelim_ref, list);
+ list_del(&ref->list);
+ kfree(ref);
+ }
+ while (!list_empty(&prefs_delayed)) {
+ ref = list_first_entry(&prefs_delayed, struct __prelim_ref,
+ list);
+ list_del(&ref->list);
+ kfree(ref);
+ }
+
+ return ret;
+}
+
+static void free_leaf_list(struct ulist *blocks)
+{
+ struct ulist_node *node = NULL;
+ struct extent_inode_elem *eie;
+ struct extent_inode_elem *eie_next;
+ struct ulist_iterator uiter;
+
+ ULIST_ITER_INIT(&uiter);
+ while ((node = ulist_next(blocks, &uiter))) {
+ if (!node->aux)
+ continue;
+ eie = (struct extent_inode_elem *)node->aux;
+ for (; eie; eie = eie_next) {
+ eie_next = eie->next;
+ kfree(eie);
+ }
+ node->aux = 0;
+ }
+
+ ulist_free(blocks);
+}
+
+/*
+ * Finds all leafs with a reference to the specified combination of bytenr and
+ * offset. key_list_head will point to a list of corresponding keys (caller must
+ * free each list element). The leafs will be stored in the leafs ulist, which
+ * must be freed with ulist_free.
+ *
+ * returns 0 on success, <0 on error
+ */
+static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info, u64 bytenr,
+ u64 delayed_ref_seq, u64 time_seq,
+ struct ulist **leafs,
+ const u64 *extent_item_pos)
+{
+ struct ulist *tmp;
+ int ret;
+
+ tmp = ulist_alloc(GFP_NOFS);
+ if (!tmp)
+ return -ENOMEM;
+ *leafs = ulist_alloc(GFP_NOFS);
+ if (!*leafs) {
+ ulist_free(tmp);
+ return -ENOMEM;
+ }
+
+ ret = find_parent_nodes(trans, fs_info, bytenr, delayed_ref_seq,
+ time_seq, *leafs, tmp, extent_item_pos);
+ ulist_free(tmp);
+
+ if (ret < 0 && ret != -ENOENT) {
+ free_leaf_list(*leafs);
+ return ret;
+ }
+
+ return 0;
+}
+
+/*
+ * walk all backrefs for a given extent to find all roots that reference this
+ * extent. Walking a backref means finding all extents that reference this
+ * extent and in turn walk the backrefs of those, too. Naturally this is a
+ * recursive process, but here it is implemented in an iterative fashion: We
+ * find all referencing extents for the extent in question and put them on a
+ * list. In turn, we find all referencing extents for those, further appending
+ * to the list. The way we iterate the list allows adding more elements after
+ * the current while iterating. The process stops when we reach the end of the
+ * list. Found roots are added to the roots list.
+ *
+ * returns 0 on success, < 0 on error.
+ */
+int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info, u64 bytenr,
+ u64 delayed_ref_seq, u64 time_seq,
+ struct ulist **roots)
+{
+ struct ulist *tmp;
+ struct ulist_node *node = NULL;
+ struct ulist_iterator uiter;
+ int ret;
+
+ tmp = ulist_alloc(GFP_NOFS);
+ if (!tmp)
+ return -ENOMEM;
+ *roots = ulist_alloc(GFP_NOFS);
+ if (!*roots) {
+ ulist_free(tmp);
+ return -ENOMEM;
+ }
+
+ ULIST_ITER_INIT(&uiter);
+ while (1) {
+ ret = find_parent_nodes(trans, fs_info, bytenr, delayed_ref_seq,
+ time_seq, tmp, *roots, NULL);
+ if (ret < 0 && ret != -ENOENT) {
+ ulist_free(tmp);
+ ulist_free(*roots);
+ return ret;
+ }
+ node = ulist_next(tmp, &uiter);
+ if (!node)
+ break;
+ bytenr = node->val;
+ }
+
+ ulist_free(tmp);
+ return 0;
+}
+
+
static int __inode_info(u64 inum, u64 ioff, u8 key_type,
struct btrfs_root *fs_root, struct btrfs_path *path,
struct btrfs_key *found_key)
@@ -108,19 +1137,25 @@ static char *iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
s64 bytes_left = size - 1;
struct extent_buffer *eb = eb_in;
struct btrfs_key found_key;
+ int leave_spinning = path->leave_spinning;
if (bytes_left >= 0)
dest[bytes_left] = '\0';
+ path->leave_spinning = 1;
while (1) {
len = btrfs_inode_ref_name_len(eb, iref);
bytes_left -= len;
if (bytes_left >= 0)
read_extent_buffer(eb, dest + bytes_left,
(unsigned long)(iref + 1), len);
- if (eb != eb_in)
+ if (eb != eb_in) {
+ btrfs_tree_read_unlock_blocking(eb);
free_extent_buffer(eb);
+ }
ret = inode_ref_info(parent, 0, fs_root, path, &found_key);
+ if (ret > 0)
+ ret = -ENOENT;
if (ret)
break;
next_inum = found_key.offset;
@@ -132,8 +1167,11 @@ static char *iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
slot = path->slots[0];
eb = path->nodes[0];
/* make sure we can use eb after releasing the path */
- if (eb != eb_in)
+ if (eb != eb_in) {
atomic_inc(&eb->refs);
+ btrfs_tree_read_lock(eb);
+ btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
+ }
btrfs_release_path(path);
iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
@@ -144,6 +1182,7 @@ static char *iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
}
btrfs_release_path(path);
+ path->leave_spinning = leave_spinning;
if (ret)
return ERR_PTR(ret);
@@ -181,8 +1220,11 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical,
btrfs_item_key_to_cpu(path->nodes[0], found_key, path->slots[0]);
if (found_key->type != BTRFS_EXTENT_ITEM_KEY ||
found_key->objectid > logical ||
- found_key->objectid + found_key->offset <= logical)
+ found_key->objectid + found_key->offset <= logical) {
+ pr_debug("logical %llu is not within any extent\n",
+ (unsigned long long)logical);
return -ENOENT;
+ }
eb = path->nodes[0];
item_size = btrfs_item_size_nr(eb, path->slots[0]);
@@ -191,6 +1233,13 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical,
ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item);
flags = btrfs_extent_flags(eb, ei);
+ pr_debug("logical %llu is at position %llu within the extent (%llu "
+ "EXTENT_ITEM %llu) flags %#llx size %u\n",
+ (unsigned long long)logical,
+ (unsigned long long)(logical - found_key->objectid),
+ (unsigned long long)found_key->objectid,
+ (unsigned long long)found_key->offset,
+ (unsigned long long)flags, item_size);
if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
return BTRFS_EXTENT_FLAG_TREE_BLOCK;
if (flags & BTRFS_EXTENT_FLAG_DATA)
@@ -287,295 +1336,103 @@ int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb,
return 0;
}
-static int __data_list_add(struct list_head *head, u64 inum,
- u64 extent_data_item_offset, u64 root)
-{
- struct __data_ref *ref;
-
- ref = kmalloc(sizeof(*ref), GFP_NOFS);
- if (!ref)
- return -ENOMEM;
-
- ref->inum = inum;
- ref->extent_data_item_offset = extent_data_item_offset;
- ref->root = root;
- list_add_tail(&ref->list, head);
-
- return 0;
-}
-
-static int __data_list_add_eb(struct list_head *head, struct extent_buffer *eb,
- struct btrfs_extent_data_ref *dref)
-{
- return __data_list_add(head, btrfs_extent_data_ref_objectid(eb, dref),
- btrfs_extent_data_ref_offset(eb, dref),
- btrfs_extent_data_ref_root(eb, dref));
-}
-
-static int __shared_list_add(struct list_head *head, u64 disk_byte)
-{
- struct __shared_ref *ref;
-
- ref = kmalloc(sizeof(*ref), GFP_NOFS);
- if (!ref)
- return -ENOMEM;
-
- ref->disk_byte = disk_byte;
- list_add_tail(&ref->list, head);
-
- return 0;
-}
-
-static int __iter_shared_inline_ref_inodes(struct btrfs_fs_info *fs_info,
- u64 logical, u64 inum,
- u64 extent_data_item_offset,
- u64 extent_offset,
- struct btrfs_path *path,
- struct list_head *data_refs,
- iterate_extent_inodes_t *iterate,
- void *ctx)
-{
- u64 ref_root;
- u32 item_size;
- struct btrfs_key key;
- struct extent_buffer *eb;
- struct btrfs_extent_item *ei;
- struct btrfs_extent_inline_ref *eiref;
- struct __data_ref *ref;
- int ret;
- int type;
- int last;
- unsigned long ptr = 0;
-
- WARN_ON(!list_empty(data_refs));
- ret = extent_from_logical(fs_info, logical, path, &key);
- if (ret & BTRFS_EXTENT_FLAG_DATA)
- ret = -EIO;
- if (ret < 0)
- goto out;
-
- eb = path->nodes[0];
- ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item);
- item_size = btrfs_item_size_nr(eb, path->slots[0]);
-
- ret = 0;
- ref_root = 0;
- /*
- * as done in iterate_extent_inodes, we first build a list of refs to
- * iterate, then free the path and then iterate them to avoid deadlocks.
- */
- do {
- last = __get_extent_inline_ref(&ptr, eb, ei, item_size,
- &eiref, &type);
- if (last < 0) {
- ret = last;
- goto out;
- }
- if (type == BTRFS_TREE_BLOCK_REF_KEY ||
- type == BTRFS_SHARED_BLOCK_REF_KEY) {
- ref_root = btrfs_extent_inline_ref_offset(eb, eiref);
- ret = __data_list_add(data_refs, inum,
- extent_data_item_offset,
- ref_root);
- }
- } while (!ret && !last);
-
- btrfs_release_path(path);
-
- if (ref_root == 0) {
- printk(KERN_ERR "btrfs: failed to find tree block ref "
- "for shared data backref %llu\n", logical);
- WARN_ON(1);
- ret = -EIO;
- }
-
-out:
- while (!list_empty(data_refs)) {
- ref = list_first_entry(data_refs, struct __data_ref, list);
- list_del(&ref->list);
- if (!ret)
- ret = iterate(ref->inum, extent_offset +
- ref->extent_data_item_offset,
- ref->root, ctx);
- kfree(ref);
- }
-
- return ret;
-}
-
-static int __iter_shared_inline_ref(struct btrfs_fs_info *fs_info,
- u64 logical, u64 orig_extent_item_objectid,
- u64 extent_offset, struct btrfs_path *path,
- struct list_head *data_refs,
- iterate_extent_inodes_t *iterate,
- void *ctx)
+static int iterate_leaf_refs(struct extent_inode_elem *inode_list,
+ u64 root, u64 extent_item_objectid,
+ iterate_extent_inodes_t *iterate, void *ctx)
{
- u64 disk_byte;
- struct btrfs_key key;
- struct btrfs_file_extent_item *fi;
- struct extent_buffer *eb;
- int slot;
- int nritems;
- int ret;
- int found = 0;
-
- eb = read_tree_block(fs_info->tree_root, logical,
- fs_info->tree_root->leafsize, 0);
- if (!eb)
- return -EIO;
-
- /*
- * from the shared data ref, we only have the leaf but we need
- * the key. thus, we must look into all items and see that we
- * find one (some) with a reference to our extent item.
- */
- nritems = btrfs_header_nritems(eb);
- for (slot = 0; slot < nritems; ++slot) {
- btrfs_item_key_to_cpu(eb, &key, slot);
- if (key.type != BTRFS_EXTENT_DATA_KEY)
- continue;
- fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
- if (!fi) {
- free_extent_buffer(eb);
- return -EIO;
- }
- disk_byte = btrfs_file_extent_disk_bytenr(eb, fi);
- if (disk_byte != orig_extent_item_objectid) {
- if (found)
- break;
- else
- continue;
- }
- ++found;
- ret = __iter_shared_inline_ref_inodes(fs_info, logical,
- key.objectid,
- key.offset,
- extent_offset, path,
- data_refs,
- iterate, ctx);
- if (ret)
+ struct extent_inode_elem *eie;
+ int ret = 0;
+
+ for (eie = inode_list; eie; eie = eie->next) {
+ pr_debug("ref for %llu resolved, key (%llu EXTEND_DATA %llu), "
+ "root %llu\n", extent_item_objectid,
+ eie->inum, eie->offset, root);
+ ret = iterate(eie->inum, eie->offset, root, ctx);
+ if (ret) {
+ pr_debug("stopping iteration for %llu due to ret=%d\n",
+ extent_item_objectid, ret);
break;
+ }
}
- if (!found) {
- printk(KERN_ERR "btrfs: failed to follow shared data backref "
- "to parent %llu\n", logical);
- WARN_ON(1);
- ret = -EIO;
- }
-
- free_extent_buffer(eb);
return ret;
}
/*
* calls iterate() for every inode that references the extent identified by
- * the given parameters. will use the path given as a parameter and return it
- * released.
+ * the given parameters.
* when the iterator function returns a non-zero value, iteration stops.
*/
int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
- struct btrfs_path *path,
- u64 extent_item_objectid,
- u64 extent_offset,
+ u64 extent_item_objectid, u64 extent_item_pos,
+ int search_commit_root,
iterate_extent_inodes_t *iterate, void *ctx)
{
- unsigned long ptr = 0;
- int last;
int ret;
- int type;
- u64 logical;
- u32 item_size;
- struct btrfs_extent_inline_ref *eiref;
- struct btrfs_extent_data_ref *dref;
- struct extent_buffer *eb;
- struct btrfs_extent_item *ei;
- struct btrfs_key key;
struct list_head data_refs = LIST_HEAD_INIT(data_refs);
struct list_head shared_refs = LIST_HEAD_INIT(shared_refs);
- struct __data_ref *ref_d;
- struct __shared_ref *ref_s;
-
- eb = path->nodes[0];
- ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item);
- item_size = btrfs_item_size_nr(eb, path->slots[0]);
-
- /* first we iterate the inline refs, ... */
- do {
- last = __get_extent_inline_ref(&ptr, eb, ei, item_size,
- &eiref, &type);
- if (last == -ENOENT) {
- ret = 0;
- break;
- }
- if (last < 0) {
- ret = last;
- break;
- }
+ struct btrfs_trans_handle *trans;
+ struct ulist *refs = NULL;
+ struct ulist *roots = NULL;
+ struct ulist_node *ref_node = NULL;
+ struct ulist_node *root_node = NULL;
+ struct seq_list seq_elem = {};
+ struct seq_list tree_mod_seq_elem = {};
+ struct ulist_iterator ref_uiter;
+ struct ulist_iterator root_uiter;
+ struct btrfs_delayed_ref_root *delayed_refs = NULL;
+
+ pr_debug("resolving all inodes for extent %llu\n",
+ extent_item_objectid);
+
+ if (search_commit_root) {
+ trans = BTRFS_BACKREF_SEARCH_COMMIT_ROOT;
+ } else {
+ trans = btrfs_join_transaction(fs_info->extent_root);
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
+
+ delayed_refs = &trans->transaction->delayed_refs;
+ spin_lock(&delayed_refs->lock);
+ btrfs_get_delayed_seq(delayed_refs, &seq_elem);
+ spin_unlock(&delayed_refs->lock);
+ btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem);
+ }
- if (type == BTRFS_EXTENT_DATA_REF_KEY) {
- dref = (struct btrfs_extent_data_ref *)(&eiref->offset);
- ret = __data_list_add_eb(&data_refs, eb, dref);
- } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
- logical = btrfs_extent_inline_ref_offset(eb, eiref);
- ret = __shared_list_add(&shared_refs, logical);
- }
- } while (!ret && !last);
+ ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid,
+ seq_elem.seq, tree_mod_seq_elem.seq, &refs,
+ &extent_item_pos);
+ if (ret)
+ goto out;
- /* ... then we proceed to in-tree references and ... */
- while (!ret) {
- ++path->slots[0];
- if (path->slots[0] > btrfs_header_nritems(eb)) {
- ret = btrfs_next_leaf(fs_info->extent_root, path);
- if (ret) {
- if (ret == 1)
- ret = 0; /* we're done */
- break;
- }
- eb = path->nodes[0];
- }
- btrfs_item_key_to_cpu(eb, &key, path->slots[0]);
- if (key.objectid != extent_item_objectid)
+ ULIST_ITER_INIT(&ref_uiter);
+ while (!ret && (ref_node = ulist_next(refs, &ref_uiter))) {
+ ret = btrfs_find_all_roots(trans, fs_info, ref_node->val,
+ seq_elem.seq,
+ tree_mod_seq_elem.seq, &roots);
+ if (ret)
break;
- if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
- dref = btrfs_item_ptr(eb, path->slots[0],
- struct btrfs_extent_data_ref);
- ret = __data_list_add_eb(&data_refs, eb, dref);
- } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
- ret = __shared_list_add(&shared_refs, key.offset);
+ ULIST_ITER_INIT(&root_uiter);
+ while (!ret && (root_node = ulist_next(roots, &root_uiter))) {
+ pr_debug("root %llu references leaf %llu, data list "
+ "%#lx\n", root_node->val, ref_node->val,
+ ref_node->aux);
+ ret = iterate_leaf_refs(
+ (struct extent_inode_elem *)ref_node->aux,
+ root_node->val, extent_item_objectid,
+ iterate, ctx);
}
+ ulist_free(roots);
+ roots = NULL;
}
- btrfs_release_path(path);
-
- /*
- * ... only at the very end we can process the refs we found. this is
- * because the iterator function we call is allowed to make tree lookups
- * and we have to avoid deadlocks. additionally, we need more tree
- * lookups ourselves for shared data refs.
- */
- while (!list_empty(&data_refs)) {
- ref_d = list_first_entry(&data_refs, struct __data_ref, list);
- list_del(&ref_d->list);
- if (!ret)
- ret = iterate(ref_d->inum, extent_offset +
- ref_d->extent_data_item_offset,
- ref_d->root, ctx);
- kfree(ref_d);
- }
-
- while (!list_empty(&shared_refs)) {
- ref_s = list_first_entry(&shared_refs, struct __shared_ref,
- list);
- list_del(&ref_s->list);
- if (!ret)
- ret = __iter_shared_inline_ref(fs_info,
- ref_s->disk_byte,
- extent_item_objectid,
- extent_offset, path,
- &data_refs,
- iterate, ctx);
- kfree(ref_s);
+ free_leaf_list(refs);
+ ulist_free(roots);
+out:
+ if (!search_commit_root) {
+ btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
+ btrfs_put_delayed_seq(delayed_refs, &seq_elem);
+ btrfs_end_transaction(trans, fs_info->extent_root);
}
return ret;
@@ -586,19 +1443,22 @@ int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
iterate_extent_inodes_t *iterate, void *ctx)
{
int ret;
- u64 offset;
+ u64 extent_item_pos;
struct btrfs_key found_key;
+ int search_commit_root = path->search_commit_root;
ret = extent_from_logical(fs_info, logical, path,
&found_key);
+ btrfs_release_path(path);
if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK)
ret = -EINVAL;
if (ret < 0)
return ret;
- offset = logical - found_key.objectid;
- ret = iterate_extent_inodes(fs_info, path, found_key.objectid,
- offset, iterate, ctx);
+ extent_item_pos = logical - found_key.objectid;
+ ret = iterate_extent_inodes(fs_info, found_key.objectid,
+ extent_item_pos, search_commit_root,
+ iterate, ctx);
return ret;
}
@@ -607,7 +1467,7 @@ static int iterate_irefs(u64 inum, struct btrfs_root *fs_root,
struct btrfs_path *path,
iterate_irefs_t *iterate, void *ctx)
{
- int ret;
+ int ret = 0;
int slot;
u32 cur;
u32 len;
@@ -619,7 +1479,8 @@ static int iterate_irefs(u64 inum, struct btrfs_root *fs_root,
struct btrfs_inode_ref *iref;
struct btrfs_key found_key;
- while (1) {
+ while (!ret) {
+ path->leave_spinning = 1;
ret = inode_ref_info(inum, parent ? parent+1 : 0, fs_root, path,
&found_key);
if (ret < 0)
@@ -635,6 +1496,8 @@ static int iterate_irefs(u64 inum, struct btrfs_root *fs_root,
eb = path->nodes[0];
/* make sure we can use eb after releasing the path */
atomic_inc(&eb->refs);
+ btrfs_tree_read_lock(eb);
+ btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
btrfs_release_path(path);
item = btrfs_item_nr(eb, slot);
@@ -643,14 +1506,17 @@ static int iterate_irefs(u64 inum, struct btrfs_root *fs_root,
for (cur = 0; cur < btrfs_item_size(eb, item); cur += len) {
name_len = btrfs_inode_ref_name_len(eb, iref);
/* path must be released before calling iterate()! */
+ pr_debug("following ref at offset %u for inode %llu in "
+ "tree %llu\n", cur,
+ (unsigned long long)found_key.objectid,
+ (unsigned long long)fs_root->objectid);
ret = iterate(parent, iref, eb, ctx);
- if (ret) {
- free_extent_buffer(eb);
+ if (ret)
break;
- }
len = sizeof(*iref) + name_len;
iref = (struct btrfs_inode_ref *)((char *)iref + len);
}
+ btrfs_tree_read_unlock_blocking(eb);
free_extent_buffer(eb);
}
@@ -683,10 +1549,14 @@ static int inode_to_path(u64 inum, struct btrfs_inode_ref *iref,
return PTR_ERR(fspath);
if (fspath > fspath_min) {
+ pr_debug("path resolved: %s\n", fspath);
ipath->fspath->val[i] = (u64)(unsigned long)fspath;
++ipath->fspath->elem_cnt;
ipath->fspath->bytes_left = fspath - fspath_min;
} else {
+ pr_debug("missed path, not enough space. missing bytes: %lu, "
+ "constructed so far: %s\n",
+ (unsigned long)(fspath_min - fspath), fspath_min);
++ipath->fspath->elem_missed;
ipath->fspath->bytes_missing += fspath_min - fspath;
ipath->fspath->bytes_left = 0;
@@ -711,12 +1581,6 @@ int paths_from_inode(u64 inum, struct inode_fs_paths *ipath)
inode_to_path, ipath);
}
-/*
- * allocates space to return multiple file system paths for an inode.
- * total_bytes to allocate are passed, note that space usable for actual path
- * information will be total_bytes - sizeof(struct inode_fs_paths).
- * the returned pointer must be freed with free_ipath() in the end.
- */
struct btrfs_data_container *init_data_container(u32 total_bytes)
{
struct btrfs_data_container *data;
@@ -772,5 +1636,8 @@ struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root,
void free_ipath(struct inode_fs_paths *ipath)
{
+ if (!ipath)
+ return;
+ kfree(ipath->fspath);
kfree(ipath);
}
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h
index 92618837cb8..c18d8ac7b79 100644
--- a/fs/btrfs/backref.h
+++ b/fs/btrfs/backref.h
@@ -20,6 +20,9 @@
#define __BTRFS_BACKREF__
#include "ioctl.h"
+#include "ulist.h"
+
+#define BTRFS_BACKREF_SEARCH_COMMIT_ROOT ((struct btrfs_trans_handle *)0)
struct inode_fs_paths {
struct btrfs_path *btrfs_path;
@@ -43,9 +46,8 @@ int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb,
u64 *out_root, u8 *out_level);
int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
- struct btrfs_path *path,
u64 extent_item_objectid,
- u64 extent_offset,
+ u64 extent_offset, int search_commit_root,
iterate_extent_inodes_t *iterate, void *ctx);
int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
@@ -54,6 +56,11 @@ int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
int paths_from_inode(u64 inum, struct inode_fs_paths *ipath);
+int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_info *fs_info, u64 bytenr,
+ u64 delayed_ref_seq, u64 time_seq,
+ struct ulist **roots);
+
struct btrfs_data_container *init_data_container(u32 total_bytes);
struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root,
struct btrfs_path *path);
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 634608d2a6d..12394a90d60 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -24,6 +24,21 @@
#include "ordered-data.h"
#include "delayed-inode.h"
+/*
+ * ordered_data_close is set by truncate when a file that used
+ * to have good data has been truncated to zero. When it is set
+ * the btrfs file release call will add this inode to the
+ * ordered operations list so that we make sure to flush out any
+ * new data the application may have written before commit.
+ */
+#define BTRFS_INODE_ORDERED_DATA_CLOSE 0
+#define BTRFS_INODE_ORPHAN_META_RESERVED 1
+#define BTRFS_INODE_DUMMY 2
+#define BTRFS_INODE_IN_DEFRAG 3
+#define BTRFS_INODE_DELALLOC_META_RESERVED 4
+#define BTRFS_INODE_HAS_ORPHAN_ITEM 5
+#define BTRFS_INODE_HAS_ASYNC_EXTENT 6
+
/* in memory btrfs inode */
struct btrfs_inode {
/* which subvolume this inode belongs to */
@@ -51,12 +66,12 @@ struct btrfs_inode {
/* held while logging the inode in tree-log.c */
struct mutex log_mutex;
+ /* held while doing delalloc reservations */
+ struct mutex delalloc_mutex;
+
/* used to order data wrt metadata */
struct btrfs_ordered_inode_tree ordered_tree;
- /* for keeping track of orphaned inodes */
- struct list_head i_orphan;
-
/* list of all the delalloc inodes in the FS. There are times we need
* to write all the delalloc pages to disk, and this list is used
* to walk them all.
@@ -75,14 +90,13 @@ struct btrfs_inode {
/* the space_info for where this inode's data allocations are done */
struct btrfs_space_info *space_info;
+ unsigned long runtime_flags;
+
/* full 64 bit generation number, struct vfs_inode doesn't have a big
* enough field for this.
*/
u64 generation;
- /* sequence number for NFS changes */
- u64 sequence;
-
/*
* transid of the trans_handle that last modified this inode
*/
@@ -142,22 +156,9 @@ struct btrfs_inode {
unsigned reserved_extents;
/*
- * ordered_data_close is set by truncate when a file that used
- * to have good data has been truncated to zero. When it is set
- * the btrfs file release call will add this inode to the
- * ordered operations list so that we make sure to flush out any
- * new data the application may have written before commit.
- */
- unsigned ordered_data_close:1;
- unsigned orphan_meta_reserved:1;
- unsigned dummy_inode:1;
- unsigned in_defrag:1;
- unsigned delalloc_meta_reserved:1;
-
- /*
* always compress this one file
*/
- unsigned force_compress:4;
+ unsigned force_compress;
struct btrfs_delayed_node *delayed_node;
@@ -199,4 +200,17 @@ static inline bool btrfs_is_free_space_inode(struct btrfs_root *root,
return false;
}
+static inline int btrfs_inode_in_log(struct inode *inode, u64 generation)
+{
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+ int ret = 0;
+
+ mutex_lock(&root->log_mutex);
+ if (BTRFS_I(inode)->logged_trans == generation &&
+ BTRFS_I(inode)->last_sub_trans <= root->last_log_commit)
+ ret = 1;
+ mutex_unlock(&root->log_mutex);
+ return ret;
+}
+
#endif
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
new file mode 100644
index 00000000000..da6e9364a5e
--- /dev/null
+++ b/fs/btrfs/check-integrity.c
@@ -0,0 +1,3358 @@
+/*
+ * Copyright (C) STRATO AG 2011. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+/*
+ * This module can be used to catch cases when the btrfs kernel
+ * code executes write requests to the disk that bring the file
+ * system in an inconsistent state. In such a state, a power-loss
+ * or kernel panic event would cause that the data on disk is
+ * lost or at least damaged.
+ *
+ * Code is added that examines all block write requests during
+ * runtime (including writes of the super block). Three rules
+ * are verified and an error is printed on violation of the
+ * rules:
+ * 1. It is not allowed to write a disk block which is
+ * currently referenced by the super block (either directly
+ * or indirectly).
+ * 2. When a super block is written, it is verified that all
+ * referenced (directly or indirectly) blocks fulfill the
+ * following requirements:
+ * 2a. All referenced blocks have either been present when
+ * the file system was mounted, (i.e., they have been
+ * referenced by the super block) or they have been
+ * written since then and the write completion callback
+ * was called and a FLUSH request to the device where
+ * these blocks are located was received and completed.
+ * 2b. All referenced blocks need to have a generation
+ * number which is equal to the parent's number.
+ *
+ * One issue that was found using this module was that the log
+ * tree on disk became temporarily corrupted because disk blocks
+ * that had been in use for the log tree had been freed and
+ * reused too early, while being referenced by the written super
+ * block.
+ *
+ * The search term in the kernel log that can be used to filter
+ * on the existence of detected integrity issues is
+ * "btrfs: attempt".
+ *
+ * The integrity check is enabled via mount options. These
+ * mount options are only supported if the integrity check
+ * tool is compiled by defining BTRFS_FS_CHECK_INTEGRITY.
+ *
+ * Example #1, apply integrity checks to all metadata:
+ * mount /dev/sdb1 /mnt -o check_int
+ *
+ * Example #2, apply integrity checks to all metadata and
+ * to data extents:
+ * mount /dev/sdb1 /mnt -o check_int_data
+ *
+ * Example #3, apply integrity checks to all metadata and dump
+ * the tree that the super block references to kernel messages
+ * each time after a super block was written:
+ * mount /dev/sdb1 /mnt -o check_int,check_int_print_mask=263
+ *
+ * If the integrity check tool is included and activated in
+ * the mount options, plenty of kernel memory is used, and
+ * plenty of additional CPU cycles are spent. Enabling this
+ * functionality is not intended for normal use. In most
+ * cases, unless you are a btrfs developer who needs to verify
+ * the integrity of (super)-block write requests, do not
+ * enable the config option BTRFS_FS_CHECK_INTEGRITY to
+ * include and compile the integrity check tool.
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/buffer_head.h>
+#include <linux/mutex.h>
+#include <linux/crc32c.h>
+#include <linux/genhd.h>
+#include <linux/blkdev.h>
+#include "ctree.h"
+#include "disk-io.h"
+#include "transaction.h"
+#include "extent_io.h"
+#include "volumes.h"
+#include "print-tree.h"
+#include "locking.h"
+#include "check-integrity.h"
+#include "rcu-string.h"
+
+#define BTRFSIC_BLOCK_HASHTABLE_SIZE 0x10000
+#define BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE 0x10000
+#define BTRFSIC_DEV2STATE_HASHTABLE_SIZE 0x100
+#define BTRFSIC_BLOCK_MAGIC_NUMBER 0x14491051
+#define BTRFSIC_BLOCK_LINK_MAGIC_NUMBER 0x11070807
+#define BTRFSIC_DEV2STATE_MAGIC_NUMBER 0x20111530
+#define BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER 20111300
+#define BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL (200 - 6) /* in characters,
+ * excluding " [...]" */
+#define BTRFSIC_GENERATION_UNKNOWN ((u64)-1)
+
+/*
+ * The definition of the bitmask fields for the print_mask.
+ * They are specified with the mount option check_integrity_print_mask.
+ */
+#define BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE 0x00000001
+#define BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION 0x00000002
+#define BTRFSIC_PRINT_MASK_TREE_AFTER_SB_WRITE 0x00000004
+#define BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE 0x00000008
+#define BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH 0x00000010
+#define BTRFSIC_PRINT_MASK_END_IO_BIO_BH 0x00000020
+#define BTRFSIC_PRINT_MASK_VERBOSE 0x00000040
+#define BTRFSIC_PRINT_MASK_VERY_VERBOSE 0x00000080
+#define BTRFSIC_PRINT_MASK_INITIAL_TREE 0x00000100
+#define BTRFSIC_PRINT_MASK_INITIAL_ALL_TREES 0x00000200
+#define BTRFSIC_PRINT_MASK_INITIAL_DATABASE 0x00000400
+#define BTRFSIC_PRINT_MASK_NUM_COPIES 0x00000800
+#define BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS 0x00001000
+
+struct btrfsic_dev_state;
+struct btrfsic_state;
+
+struct btrfsic_block {
+ u32 magic_num; /* only used for debug purposes */
+ unsigned int is_metadata:1; /* if it is meta-data, not data-data */
+ unsigned int is_superblock:1; /* if it is one of the superblocks */
+ unsigned int is_iodone:1; /* if is done by lower subsystem */
+ unsigned int iodone_w_error:1; /* error was indicated to endio */
+ unsigned int never_written:1; /* block was added because it was
+ * referenced, not because it was
+ * written */
+ unsigned int mirror_num:2; /* large enough to hold
+ * BTRFS_SUPER_MIRROR_MAX */
+ struct btrfsic_dev_state *dev_state;
+ u64 dev_bytenr; /* key, physical byte num on disk */
+ u64 logical_bytenr; /* logical byte num on disk */
+ u64 generation;
+ struct btrfs_disk_key disk_key; /* extra info to print in case of
+ * issues, will not always be correct */
+ struct list_head collision_resolving_node; /* list node */
+ struct list_head all_blocks_node; /* list node */
+
+ /* the following two lists contain block_link items */
+ struct list_head ref_to_list; /* list */
+ struct list_head ref_from_list; /* list */
+ struct btrfsic_block *next_in_same_bio;
+ void *orig_bio_bh_private;
+ union {
+ bio_end_io_t *bio;
+ bh_end_io_t *bh;
+ } orig_bio_bh_end_io;
+ int submit_bio_bh_rw;
+ u64 flush_gen; /* only valid if !never_written */
+};
+
+/*
+ * Elements of this type are allocated dynamically and required because
+ * each block object can refer to and can be ref from multiple blocks.
+ * The key to lookup them in the hashtable is the dev_bytenr of
+ * the block ref to plus the one from the block refered from.
+ * The fact that they are searchable via a hashtable and that a
+ * ref_cnt is maintained is not required for the btrfs integrity
+ * check algorithm itself, it is only used to make the output more
+ * beautiful in case that an error is detected (an error is defined
+ * as a write operation to a block while that block is still referenced).
+ */
+struct btrfsic_block_link {
+ u32 magic_num; /* only used for debug purposes */
+ u32 ref_cnt;
+ struct list_head node_ref_to; /* list node */
+ struct list_head node_ref_from; /* list node */
+ struct list_head collision_resolving_node; /* list node */
+ struct btrfsic_block *block_ref_to;
+ struct btrfsic_block *block_ref_from;
+ u64 parent_generation;
+};
+
+struct btrfsic_dev_state {
+ u32 magic_num; /* only used for debug purposes */
+ struct block_device *bdev;
+ struct btrfsic_state *state;
+ struct list_head collision_resolving_node; /* list node */
+ struct btrfsic_block dummy_block_for_bio_bh_flush;
+ u64 last_flush_gen;
+ char name[BDEVNAME_SIZE];
+};
+
+struct btrfsic_block_hashtable {
+ struct list_head table[BTRFSIC_BLOCK_HASHTABLE_SIZE];
+};
+
+struct btrfsic_block_link_hashtable {
+ struct list_head table[BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE];
+};
+
+struct btrfsic_dev_state_hashtable {
+ struct list_head table[BTRFSIC_DEV2STATE_HASHTABLE_SIZE];
+};
+
+struct btrfsic_block_data_ctx {
+ u64 start; /* virtual bytenr */
+ u64 dev_bytenr; /* physical bytenr on device */
+ u32 len;
+ struct btrfsic_dev_state *dev;
+ char **datav;
+ struct page **pagev;
+ void *mem_to_free;
+};
+
+/* This structure is used to implement recursion without occupying
+ * any stack space, refer to btrfsic_process_metablock() */
+struct btrfsic_stack_frame {
+ u32 magic;
+ u32 nr;
+ int error;
+ int i;
+ int limit_nesting;
+ int num_copies;
+ int mirror_num;
+ struct btrfsic_block *block;
+ struct btrfsic_block_data_ctx *block_ctx;
+ struct btrfsic_block *next_block;
+ struct btrfsic_block_data_ctx next_block_ctx;
+ struct btrfs_header *hdr;
+ struct btrfsic_stack_frame *prev;
+};
+
+/* Some state per mounted filesystem */
+struct btrfsic_state {
+ u32 print_mask;
+ int include_extent_data;
+ int csum_size;
+ struct list_head all_blocks_list;
+ struct btrfsic_block_hashtable block_hashtable;
+ struct btrfsic_block_link_hashtable block_link_hashtable;
+ struct btrfs_root *root;
+ u64 max_superblock_generation;
+ struct btrfsic_block *latest_superblock;
+ u32 metablock_size;
+ u32 datablock_size;
+};
+
+static void btrfsic_block_init(struct btrfsic_block *b);
+static struct btrfsic_block *btrfsic_block_alloc(void);
+static void btrfsic_block_free(struct btrfsic_block *b);
+static void btrfsic_block_link_init(struct btrfsic_block_link *n);
+static struct btrfsic_block_link *btrfsic_block_link_alloc(void);
+static void btrfsic_block_link_free(struct btrfsic_block_link *n);
+static void btrfsic_dev_state_init(struct btrfsic_dev_state *ds);
+static struct btrfsic_dev_state *btrfsic_dev_state_alloc(void);
+static void btrfsic_dev_state_free(struct btrfsic_dev_state *ds);
+static void btrfsic_block_hashtable_init(struct btrfsic_block_hashtable *h);
+static void btrfsic_block_hashtable_add(struct btrfsic_block *b,
+ struct btrfsic_block_hashtable *h);
+static void btrfsic_block_hashtable_remove(struct btrfsic_block *b);
+static struct btrfsic_block *btrfsic_block_hashtable_lookup(
+ struct block_device *bdev,
+ u64 dev_bytenr,
+ struct btrfsic_block_hashtable *h);
+static void btrfsic_block_link_hashtable_init(
+ struct btrfsic_block_link_hashtable *h);
+static void btrfsic_block_link_hashtable_add(
+ struct btrfsic_block_link *l,
+ struct btrfsic_block_link_hashtable *h);
+static void btrfsic_block_link_hashtable_remove(struct btrfsic_block_link *l);
+static struct btrfsic_block_link *btrfsic_block_link_hashtable_lookup(
+ struct block_device *bdev_ref_to,
+ u64 dev_bytenr_ref_to,
+ struct block_device *bdev_ref_from,
+ u64 dev_bytenr_ref_from,
+ struct btrfsic_block_link_hashtable *h);
+static void btrfsic_dev_state_hashtable_init(
+ struct btrfsic_dev_state_hashtable *h);
+static void btrfsic_dev_state_hashtable_add(
+ struct btrfsic_dev_state *ds,
+ struct btrfsic_dev_state_hashtable *h);
+static void btrfsic_dev_state_hashtable_remove(struct btrfsic_dev_state *ds);
+static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup(
+ struct block_device *bdev,
+ struct btrfsic_dev_state_hashtable *h);
+static struct btrfsic_stack_frame *btrfsic_stack_frame_alloc(void);
+static void btrfsic_stack_frame_free(struct btrfsic_stack_frame *sf);
+static int btrfsic_process_superblock(struct btrfsic_state *state,
+ struct btrfs_fs_devices *fs_devices);
+static int btrfsic_process_metablock(struct btrfsic_state *state,
+ struct btrfsic_block *block,
+ struct btrfsic_block_data_ctx *block_ctx,
+ int limit_nesting, int force_iodone_flag);
+static void btrfsic_read_from_block_data(
+ struct btrfsic_block_data_ctx *block_ctx,
+ void *dst, u32 offset, size_t len);
+static int btrfsic_create_link_to_next_block(
+ struct btrfsic_state *state,
+ struct btrfsic_block *block,
+ struct btrfsic_block_data_ctx
+ *block_ctx, u64 next_bytenr,
+ int limit_nesting,
+ struct btrfsic_block_data_ctx *next_block_ctx,
+ struct btrfsic_block **next_blockp,
+ int force_iodone_flag,
+ int *num_copiesp, int *mirror_nump,
+ struct btrfs_disk_key *disk_key,
+ u64 parent_generation);
+static int btrfsic_handle_extent_data(struct btrfsic_state *state,
+ struct btrfsic_block *block,
+ struct btrfsic_block_data_ctx *block_ctx,
+ u32 item_offset, int force_iodone_flag);
+static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len,
+ struct btrfsic_block_data_ctx *block_ctx_out,
+ int mirror_num);
+static int btrfsic_map_superblock(struct btrfsic_state *state, u64 bytenr,
+ u32 len, struct block_device *bdev,
+ struct btrfsic_block_data_ctx *block_ctx_out);
+static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx);
+static int btrfsic_read_block(struct btrfsic_state *state,
+ struct btrfsic_block_data_ctx *block_ctx);
+static void btrfsic_dump_database(struct btrfsic_state *state);
+static void btrfsic_complete_bio_end_io(struct bio *bio, int err);
+static int btrfsic_test_for_metadata(struct btrfsic_state *state,
+ char **datav, unsigned int num_pages);
+static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
+ u64 dev_bytenr, char **mapped_datav,
+ unsigned int num_pages,
+ struct bio *bio, int *bio_is_patched,
+ struct buffer_head *bh,
+ int submit_bio_bh_rw);
+static int btrfsic_process_written_superblock(
+ struct btrfsic_state *state,
+ struct btrfsic_block *const block,
+ struct btrfs_super_block *const super_hdr);
+static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status);
+static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate);
+static int btrfsic_is_block_ref_by_superblock(const struct btrfsic_state *state,
+ const struct btrfsic_block *block,
+ int recursion_level);
+static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state,
+ struct btrfsic_block *const block,
+ int recursion_level);
+static void btrfsic_print_add_link(const struct btrfsic_state *state,
+ const struct btrfsic_block_link *l);
+static void btrfsic_print_rem_link(const struct btrfsic_state *state,
+ const struct btrfsic_block_link *l);
+static char btrfsic_get_block_type(const struct btrfsic_state *state,
+ const struct btrfsic_block *block);
+static void btrfsic_dump_tree(const struct btrfsic_state *state);
+static void btrfsic_dump_tree_sub(const struct btrfsic_state *state,
+ const struct btrfsic_block *block,
+ int indent_level);
+static struct btrfsic_block_link *btrfsic_block_link_lookup_or_add(
+ struct btrfsic_state *state,
+ struct btrfsic_block_data_ctx *next_block_ctx,
+ struct btrfsic_block *next_block,
+ struct btrfsic_block *from_block,
+ u64 parent_generation);
+static struct btrfsic_block *btrfsic_block_lookup_or_add(
+ struct btrfsic_state *state,
+ struct btrfsic_block_data_ctx *block_ctx,
+ const char *additional_string,
+ int is_metadata,
+ int is_iodone,
+ int never_written,
+ int mirror_num,
+ int *was_created);
+static int btrfsic_process_superblock_dev_mirror(
+ struct btrfsic_state *state,
+ struct btrfsic_dev_state *dev_state,
+ struct btrfs_device *device,
+ int superblock_mirror_num,
+ struct btrfsic_dev_state **selected_dev_state,
+ struct btrfs_super_block *selected_super);
+static struct btrfsic_dev_state *btrfsic_dev_state_lookup(
+ struct block_device *bdev);
+static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
+ u64 bytenr,
+ struct btrfsic_dev_state *dev_state,
+ u64 dev_bytenr);
+
+static struct mutex btrfsic_mutex;
+static int btrfsic_is_initialized;
+static struct btrfsic_dev_state_hashtable btrfsic_dev_state_hashtable;
+
+
+static void btrfsic_block_init(struct btrfsic_block *b)
+{
+ b->magic_num = BTRFSIC_BLOCK_MAGIC_NUMBER;
+ b->dev_state = NULL;
+ b->dev_bytenr = 0;
+ b->logical_bytenr = 0;
+ b->generation = BTRFSIC_GENERATION_UNKNOWN;
+ b->disk_key.objectid = 0;
+ b->disk_key.type = 0;
+ b->disk_key.offset = 0;
+ b->is_metadata = 0;
+ b->is_superblock = 0;
+ b->is_iodone = 0;
+ b->iodone_w_error = 0;
+ b->never_written = 0;
+ b->mirror_num = 0;
+ b->next_in_same_bio = NULL;
+ b->orig_bio_bh_private = NULL;
+ b->orig_bio_bh_end_io.bio = NULL;
+ INIT_LIST_HEAD(&b->collision_resolving_node);
+ INIT_LIST_HEAD(&b->all_blocks_node);
+ INIT_LIST_HEAD(&b->ref_to_list);
+ INIT_LIST_HEAD(&b->ref_from_list);
+ b->submit_bio_bh_rw = 0;
+ b->flush_gen = 0;
+}
+
+static struct btrfsic_block *btrfsic_block_alloc(void)
+{
+ struct btrfsic_block *b;
+
+ b = kzalloc(sizeof(*b), GFP_NOFS);
+ if (NULL != b)
+ btrfsic_block_init(b);
+
+ return b;
+}
+
+static void btrfsic_block_free(struct btrfsic_block *b)
+{
+ BUG_ON(!(NULL == b || BTRFSIC_BLOCK_MAGIC_NUMBER == b->magic_num));
+ kfree(b);
+}
+
+static void btrfsic_block_link_init(struct btrfsic_block_link *l)
+{
+ l->magic_num = BTRFSIC_BLOCK_LINK_MAGIC_NUMBER;
+ l->ref_cnt = 1;
+ INIT_LIST_HEAD(&l->node_ref_to);
+ INIT_LIST_HEAD(&l->node_ref_from);
+ INIT_LIST_HEAD(&l->collision_resolving_node);
+ l->block_ref_to = NULL;
+ l->block_ref_from = NULL;
+}
+
+static struct btrfsic_block_link *btrfsic_block_link_alloc(void)
+{
+ struct btrfsic_block_link *l;
+
+ l = kzalloc(sizeof(*l), GFP_NOFS);
+ if (NULL != l)
+ btrfsic_block_link_init(l);
+
+ return l;
+}
+
+static void btrfsic_block_link_free(struct btrfsic_block_link *l)
+{
+ BUG_ON(!(NULL == l || BTRFSIC_BLOCK_LINK_MAGIC_NUMBER == l->magic_num));
+ kfree(l);
+}
+
+static void btrfsic_dev_state_init(struct btrfsic_dev_state *ds)
+{
+ ds->magic_num = BTRFSIC_DEV2STATE_MAGIC_NUMBER;
+ ds->bdev = NULL;
+ ds->state = NULL;
+ ds->name[0] = '\0';
+ INIT_LIST_HEAD(&ds->collision_resolving_node);
+ ds->last_flush_gen = 0;
+ btrfsic_block_init(&ds->dummy_block_for_bio_bh_flush);
+ ds->dummy_block_for_bio_bh_flush.is_iodone = 1;
+ ds->dummy_block_for_bio_bh_flush.dev_state = ds;
+}
+
+static struct btrfsic_dev_state *btrfsic_dev_state_alloc(void)
+{
+ struct btrfsic_dev_state *ds;
+
+ ds = kzalloc(sizeof(*ds), GFP_NOFS);
+ if (NULL != ds)
+ btrfsic_dev_state_init(ds);
+
+ return ds;
+}
+
+static void btrfsic_dev_state_free(struct btrfsic_dev_state *ds)
+{
+ BUG_ON(!(NULL == ds ||
+ BTRFSIC_DEV2STATE_MAGIC_NUMBER == ds->magic_num));
+ kfree(ds);
+}
+
+static void btrfsic_block_hashtable_init(struct btrfsic_block_hashtable *h)
+{
+ int i;
+
+ for (i = 0; i < BTRFSIC_BLOCK_HASHTABLE_SIZE; i++)
+ INIT_LIST_HEAD(h->table + i);
+}
+
+static void btrfsic_block_hashtable_add(struct btrfsic_block *b,
+ struct btrfsic_block_hashtable *h)
+{
+ const unsigned int hashval =
+ (((unsigned int)(b->dev_bytenr >> 16)) ^
+ ((unsigned int)((uintptr_t)b->dev_state->bdev))) &
+ (BTRFSIC_BLOCK_HASHTABLE_SIZE - 1);
+
+ list_add(&b->collision_resolving_node, h->table + hashval);
+}
+
+static void btrfsic_block_hashtable_remove(struct btrfsic_block *b)
+{
+ list_del(&b->collision_resolving_node);
+}
+
+static struct btrfsic_block *btrfsic_block_hashtable_lookup(
+ struct block_device *bdev,
+ u64 dev_bytenr,
+ struct btrfsic_block_hashtable *h)
+{
+ const unsigned int hashval =
+ (((unsigned int)(dev_bytenr >> 16)) ^
+ ((unsigned int)((uintptr_t)bdev))) &
+ (BTRFSIC_BLOCK_HASHTABLE_SIZE - 1);
+ struct list_head *elem;
+
+ list_for_each(elem, h->table + hashval) {
+ struct btrfsic_block *const b =
+ list_entry(elem, struct btrfsic_block,
+ collision_resolving_node);
+
+ if (b->dev_state->bdev == bdev && b->dev_bytenr == dev_bytenr)
+ return b;
+ }
+
+ return NULL;
+}
+
+static void btrfsic_block_link_hashtable_init(
+ struct btrfsic_block_link_hashtable *h)
+{
+ int i;
+
+ for (i = 0; i < BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE; i++)
+ INIT_LIST_HEAD(h->table + i);
+}
+
+static void btrfsic_block_link_hashtable_add(
+ struct btrfsic_block_link *l,
+ struct btrfsic_block_link_hashtable *h)
+{
+ const unsigned int hashval =
+ (((unsigned int)(l->block_ref_to->dev_bytenr >> 16)) ^
+ ((unsigned int)(l->block_ref_from->dev_bytenr >> 16)) ^
+ ((unsigned int)((uintptr_t)l->block_ref_to->dev_state->bdev)) ^
+ ((unsigned int)((uintptr_t)l->block_ref_from->dev_state->bdev)))
+ & (BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE - 1);
+
+ BUG_ON(NULL == l->block_ref_to);
+ BUG_ON(NULL == l->block_ref_from);
+ list_add(&l->collision_resolving_node, h->table + hashval);
+}
+
+static void btrfsic_block_link_hashtable_remove(struct btrfsic_block_link *l)
+{
+ list_del(&l->collision_resolving_node);
+}
+
+static struct btrfsic_block_link *btrfsic_block_link_hashtable_lookup(
+ struct block_device *bdev_ref_to,
+ u64 dev_bytenr_ref_to,
+ struct block_device *bdev_ref_from,
+ u64 dev_bytenr_ref_from,
+ struct btrfsic_block_link_hashtable *h)
+{
+ const unsigned int hashval =
+ (((unsigned int)(dev_bytenr_ref_to >> 16)) ^
+ ((unsigned int)(dev_bytenr_ref_from >> 16)) ^
+ ((unsigned int)((uintptr_t)bdev_ref_to)) ^
+ ((unsigned int)((uintptr_t)bdev_ref_from))) &
+ (BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE - 1);
+ struct list_head *elem;
+
+ list_for_each(elem, h->table + hashval) {
+ struct btrfsic_block_link *const l =
+ list_entry(elem, struct btrfsic_block_link,
+ collision_resolving_node);
+
+ BUG_ON(NULL == l->block_ref_to);
+ BUG_ON(NULL == l->block_ref_from);
+ if (l->block_ref_to->dev_state->bdev == bdev_ref_to &&
+ l->block_ref_to->dev_bytenr == dev_bytenr_ref_to &&
+ l->block_ref_from->dev_state->bdev == bdev_ref_from &&
+ l->block_ref_from->dev_bytenr == dev_bytenr_ref_from)
+ return l;
+ }
+
+ return NULL;
+}
+
+static void btrfsic_dev_state_hashtable_init(
+ struct btrfsic_dev_state_hashtable *h)
+{
+ int i;
+
+ for (i = 0; i < BTRFSIC_DEV2STATE_HASHTABLE_SIZE; i++)
+ INIT_LIST_HEAD(h->table + i);
+}
+
+static void btrfsic_dev_state_hashtable_add(
+ struct btrfsic_dev_state *ds,
+ struct btrfsic_dev_state_hashtable *h)
+{
+ const unsigned int hashval =
+ (((unsigned int)((uintptr_t)ds->bdev)) &
+ (BTRFSIC_DEV2STATE_HASHTABLE_SIZE - 1));
+
+ list_add(&ds->collision_resolving_node, h->table + hashval);
+}
+
+static void btrfsic_dev_state_hashtable_remove(struct btrfsic_dev_state *ds)
+{
+ list_del(&ds->collision_resolving_node);
+}
+
+static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup(
+ struct block_device *bdev,
+ struct btrfsic_dev_state_hashtable *h)
+{
+ const unsigned int hashval =
+ (((unsigned int)((uintptr_t)bdev)) &
+ (BTRFSIC_DEV2STATE_HASHTABLE_SIZE - 1));
+ struct list_head *elem;
+
+ list_for_each(elem, h->table + hashval) {
+ struct btrfsic_dev_state *const ds =
+ list_entry(elem, struct btrfsic_dev_state,
+ collision_resolving_node);
+
+ if (ds->bdev == bdev)
+ return ds;
+ }
+
+ return NULL;
+}
+
+static int btrfsic_process_superblock(struct btrfsic_state *state,
+ struct btrfs_fs_devices *fs_devices)
+{
+ int ret = 0;
+ struct btrfs_super_block *selected_super;
+ struct list_head *dev_head = &fs_devices->devices;
+ struct btrfs_device *device;
+ struct btrfsic_dev_state *selected_dev_state = NULL;
+ int pass;
+
+ BUG_ON(NULL == state);
+ selected_super = kzalloc(sizeof(*selected_super), GFP_NOFS);
+ if (NULL == selected_super) {
+ printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
+ return -1;
+ }
+
+ list_for_each_entry(device, dev_head, dev_list) {
+ int i;
+ struct btrfsic_dev_state *dev_state;
+
+ if (!device->bdev || !device->name)
+ continue;
+
+ dev_state = btrfsic_dev_state_lookup(device->bdev);
+ BUG_ON(NULL == dev_state);
+ for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
+ ret = btrfsic_process_superblock_dev_mirror(
+ state, dev_state, device, i,
+ &selected_dev_state, selected_super);
+ if (0 != ret && 0 == i) {
+ kfree(selected_super);
+ return ret;
+ }
+ }
+ }
+
+ if (NULL == state->latest_superblock) {
+ printk(KERN_INFO "btrfsic: no superblock found!\n");
+ kfree(selected_super);
+ return -1;
+ }
+
+ state->csum_size = btrfs_super_csum_size(selected_super);
+
+ for (pass = 0; pass < 3; pass++) {
+ int num_copies;
+ int mirror_num;
+ u64 next_bytenr;
+
+ switch (pass) {
+ case 0:
+ next_bytenr = btrfs_super_root(selected_super);
+ if (state->print_mask &
+ BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
+ printk(KERN_INFO "root@%llu\n",
+ (unsigned long long)next_bytenr);
+ break;
+ case 1:
+ next_bytenr = btrfs_super_chunk_root(selected_super);
+ if (state->print_mask &
+ BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
+ printk(KERN_INFO "chunk@%llu\n",
+ (unsigned long long)next_bytenr);
+ break;
+ case 2:
+ next_bytenr = btrfs_super_log_root(selected_super);
+ if (0 == next_bytenr)
+ continue;
+ if (state->print_mask &
+ BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
+ printk(KERN_INFO "log@%llu\n",
+ (unsigned long long)next_bytenr);
+ break;
+ }
+
+ num_copies =
+ btrfs_num_copies(&state->root->fs_info->mapping_tree,
+ next_bytenr, state->metablock_size);
+ if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
+ printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
+ (unsigned long long)next_bytenr, num_copies);
+
+ for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
+ struct btrfsic_block *next_block;
+ struct btrfsic_block_data_ctx tmp_next_block_ctx;
+ struct btrfsic_block_link *l;
+
+ ret = btrfsic_map_block(state, next_bytenr,
+ state->metablock_size,
+ &tmp_next_block_ctx,
+ mirror_num);
+ if (ret) {
+ printk(KERN_INFO "btrfsic:"
+ " btrfsic_map_block(root @%llu,"
+ " mirror %d) failed!\n",
+ (unsigned long long)next_bytenr,
+ mirror_num);
+ kfree(selected_super);
+ return -1;
+ }
+
+ next_block = btrfsic_block_hashtable_lookup(
+ tmp_next_block_ctx.dev->bdev,
+ tmp_next_block_ctx.dev_bytenr,
+ &state->block_hashtable);
+ BUG_ON(NULL == next_block);
+
+ l = btrfsic_block_link_hashtable_lookup(
+ tmp_next_block_ctx.dev->bdev,
+ tmp_next_block_ctx.dev_bytenr,
+ state->latest_superblock->dev_state->
+ bdev,
+ state->latest_superblock->dev_bytenr,
+ &state->block_link_hashtable);
+ BUG_ON(NULL == l);
+
+ ret = btrfsic_read_block(state, &tmp_next_block_ctx);
+ if (ret < (int)PAGE_CACHE_SIZE) {
+ printk(KERN_INFO
+ "btrfsic: read @logical %llu failed!\n",
+ (unsigned long long)
+ tmp_next_block_ctx.start);
+ btrfsic_release_block_ctx(&tmp_next_block_ctx);
+ kfree(selected_super);
+ return -1;
+ }
+
+ ret = btrfsic_process_metablock(state,
+ next_block,
+ &tmp_next_block_ctx,
+ BTRFS_MAX_LEVEL + 3, 1);
+ btrfsic_release_block_ctx(&tmp_next_block_ctx);
+ }
+ }
+
+ kfree(selected_super);
+ return ret;
+}
+
+static int btrfsic_process_superblock_dev_mirror(
+ struct btrfsic_state *state,
+ struct btrfsic_dev_state *dev_state,
+ struct btrfs_device *device,
+ int superblock_mirror_num,
+ struct btrfsic_dev_state **selected_dev_state,
+ struct btrfs_super_block *selected_super)
+{
+ struct btrfs_super_block *super_tmp;
+ u64 dev_bytenr;
+ struct buffer_head *bh;
+ struct btrfsic_block *superblock_tmp;
+ int pass;
+ struct block_device *const superblock_bdev = device->bdev;
+
+ /* super block bytenr is always the unmapped device bytenr */
+ dev_bytenr = btrfs_sb_offset(superblock_mirror_num);
+ if (dev_bytenr + BTRFS_SUPER_INFO_SIZE > device->total_bytes)
+ return -1;
+ bh = __bread(superblock_bdev, dev_bytenr / 4096,
+ BTRFS_SUPER_INFO_SIZE);
+ if (NULL == bh)
+ return -1;
+ super_tmp = (struct btrfs_super_block *)
+ (bh->b_data + (dev_bytenr & 4095));
+
+ if (btrfs_super_bytenr(super_tmp) != dev_bytenr ||
+ strncmp((char *)(&(super_tmp->magic)), BTRFS_MAGIC,
+ sizeof(super_tmp->magic)) ||
+ memcmp(device->uuid, super_tmp->dev_item.uuid, BTRFS_UUID_SIZE) ||
+ btrfs_super_nodesize(super_tmp) != state->metablock_size ||
+ btrfs_super_leafsize(super_tmp) != state->metablock_size ||
+ btrfs_super_sectorsize(super_tmp) != state->datablock_size) {
+ brelse(bh);
+ return 0;
+ }
+
+ superblock_tmp =
+ btrfsic_block_hashtable_lookup(superblock_bdev,
+ dev_bytenr,
+ &state->block_hashtable);
+ if (NULL == superblock_tmp) {
+ superblock_tmp = btrfsic_block_alloc();
+ if (NULL == superblock_tmp) {
+ printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
+ brelse(bh);
+ return -1;
+ }
+ /* for superblock, only the dev_bytenr makes sense */
+ superblock_tmp->dev_bytenr = dev_bytenr;
+ superblock_tmp->dev_state = dev_state;
+ superblock_tmp->logical_bytenr = dev_bytenr;
+ superblock_tmp->generation = btrfs_super_generation(super_tmp);
+ superblock_tmp->is_metadata = 1;
+ superblock_tmp->is_superblock = 1;
+ superblock_tmp->is_iodone = 1;
+ superblock_tmp->never_written = 0;
+ superblock_tmp->mirror_num = 1 + superblock_mirror_num;
+ if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
+ printk_in_rcu(KERN_INFO "New initial S-block (bdev %p, %s)"
+ " @%llu (%s/%llu/%d)\n",
+ superblock_bdev,
+ rcu_str_deref(device->name),
+ (unsigned long long)dev_bytenr,
+ dev_state->name,
+ (unsigned long long)dev_bytenr,
+ superblock_mirror_num);
+ list_add(&superblock_tmp->all_blocks_node,
+ &state->all_blocks_list);
+ btrfsic_block_hashtable_add(superblock_tmp,
+ &state->block_hashtable);
+ }
+
+ /* select the one with the highest generation field */
+ if (btrfs_super_generation(super_tmp) >
+ state->max_superblock_generation ||
+ 0 == state->max_superblock_generation) {
+ memcpy(selected_super, super_tmp, sizeof(*selected_super));
+ *selected_dev_state = dev_state;
+ state->max_superblock_generation =
+ btrfs_super_generation(super_tmp);
+ state->latest_superblock = superblock_tmp;
+ }
+
+ for (pass = 0; pass < 3; pass++) {
+ u64 next_bytenr;
+ int num_copies;
+ int mirror_num;
+ const char *additional_string = NULL;
+ struct btrfs_disk_key tmp_disk_key;
+
+ tmp_disk_key.type = BTRFS_ROOT_ITEM_KEY;
+ tmp_disk_key.offset = 0;
+ switch (pass) {
+ case 0:
+ tmp_disk_key.objectid =
+ cpu_to_le64(BTRFS_ROOT_TREE_OBJECTID);
+ additional_string = "initial root ";
+ next_bytenr = btrfs_super_root(super_tmp);
+ break;
+ case 1:
+ tmp_disk_key.objectid =
+ cpu_to_le64(BTRFS_CHUNK_TREE_OBJECTID);
+ additional_string = "initial chunk ";
+ next_bytenr = btrfs_super_chunk_root(super_tmp);
+ break;
+ case 2:
+ tmp_disk_key.objectid =
+ cpu_to_le64(BTRFS_TREE_LOG_OBJECTID);
+ additional_string = "initial log ";
+ next_bytenr = btrfs_super_log_root(super_tmp);
+ if (0 == next_bytenr)
+ continue;
+ break;
+ }
+
+ num_copies =
+ btrfs_num_copies(&state->root->fs_info->mapping_tree,
+ next_bytenr, state->metablock_size);
+ if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
+ printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
+ (unsigned long long)next_bytenr, num_copies);
+ for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
+ struct btrfsic_block *next_block;
+ struct btrfsic_block_data_ctx tmp_next_block_ctx;
+ struct btrfsic_block_link *l;
+
+ if (btrfsic_map_block(state, next_bytenr,
+ state->metablock_size,
+ &tmp_next_block_ctx,
+ mirror_num)) {
+ printk(KERN_INFO "btrfsic: btrfsic_map_block("
+ "bytenr @%llu, mirror %d) failed!\n",
+ (unsigned long long)next_bytenr,
+ mirror_num);
+ brelse(bh);
+ return -1;
+ }
+
+ next_block = btrfsic_block_lookup_or_add(
+ state, &tmp_next_block_ctx,
+ additional_string, 1, 1, 0,
+ mirror_num, NULL);
+ if (NULL == next_block) {
+ btrfsic_release_block_ctx(&tmp_next_block_ctx);
+ brelse(bh);
+ return -1;
+ }
+
+ next_block->disk_key = tmp_disk_key;
+ next_block->generation = BTRFSIC_GENERATION_UNKNOWN;
+ l = btrfsic_block_link_lookup_or_add(
+ state, &tmp_next_block_ctx,
+ next_block, superblock_tmp,
+ BTRFSIC_GENERATION_UNKNOWN);
+ btrfsic_release_block_ctx(&tmp_next_block_ctx);
+ if (NULL == l) {
+ brelse(bh);
+ return -1;
+ }
+ }
+ }
+ if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_ALL_TREES)
+ btrfsic_dump_tree_sub(state, superblock_tmp, 0);
+
+ brelse(bh);
+ return 0;
+}
+
+static struct btrfsic_stack_frame *btrfsic_stack_frame_alloc(void)
+{
+ struct btrfsic_stack_frame *sf;
+
+ sf = kzalloc(sizeof(*sf), GFP_NOFS);
+ if (NULL == sf)
+ printk(KERN_INFO "btrfsic: alloc memory failed!\n");
+ else
+ sf->magic = BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER;
+ return sf;
+}
+
+static void btrfsic_stack_frame_free(struct btrfsic_stack_frame *sf)
+{
+ BUG_ON(!(NULL == sf ||
+ BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER == sf->magic));
+ kfree(sf);
+}
+
+static int btrfsic_process_metablock(
+ struct btrfsic_state *state,
+ struct btrfsic_block *const first_block,
+ struct btrfsic_block_data_ctx *const first_block_ctx,
+ int first_limit_nesting, int force_iodone_flag)
+{
+ struct btrfsic_stack_frame initial_stack_frame = { 0 };
+ struct btrfsic_stack_frame *sf;
+ struct btrfsic_stack_frame *next_stack;
+ struct btrfs_header *const first_hdr =
+ (struct btrfs_header *)first_block_ctx->datav[0];
+
+ BUG_ON(!first_hdr);
+ sf = &initial_stack_frame;
+ sf->error = 0;
+ sf->i = -1;
+ sf->limit_nesting = first_limit_nesting;
+ sf->block = first_block;
+ sf->block_ctx = first_block_ctx;
+ sf->next_block = NULL;
+ sf->hdr = first_hdr;
+ sf->prev = NULL;
+
+continue_with_new_stack_frame:
+ sf->block->generation = le64_to_cpu(sf->hdr->generation);
+ if (0 == sf->hdr->level) {
+ struct btrfs_leaf *const leafhdr =
+ (struct btrfs_leaf *)sf->hdr;
+
+ if (-1 == sf->i) {
+ sf->nr = le32_to_cpu(leafhdr->header.nritems);
+
+ if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
+ printk(KERN_INFO
+ "leaf %llu items %d generation %llu"
+ " owner %llu\n",
+ (unsigned long long)
+ sf->block_ctx->start,
+ sf->nr,
+ (unsigned long long)
+ le64_to_cpu(leafhdr->header.generation),
+ (unsigned long long)
+ le64_to_cpu(leafhdr->header.owner));
+ }
+
+continue_with_current_leaf_stack_frame:
+ if (0 == sf->num_copies || sf->mirror_num > sf->num_copies) {
+ sf->i++;
+ sf->num_copies = 0;
+ }
+
+ if (sf->i < sf->nr) {
+ struct btrfs_item disk_item;
+ u32 disk_item_offset =
+ (uintptr_t)(leafhdr->items + sf->i) -
+ (uintptr_t)leafhdr;
+ struct btrfs_disk_key *disk_key;
+ u8 type;
+ u32 item_offset;
+
+ if (disk_item_offset + sizeof(struct btrfs_item) >
+ sf->block_ctx->len) {
+leaf_item_out_of_bounce_error:
+ printk(KERN_INFO
+ "btrfsic: leaf item out of bounce at logical %llu, dev %s\n",
+ sf->block_ctx->start,
+ sf->block_ctx->dev->name);
+ goto one_stack_frame_backwards;
+ }
+ btrfsic_read_from_block_data(sf->block_ctx,
+ &disk_item,
+ disk_item_offset,
+ sizeof(struct btrfs_item));
+ item_offset = le32_to_cpu(disk_item.offset);
+ disk_key = &disk_item.key;
+ type = disk_key->type;
+
+ if (BTRFS_ROOT_ITEM_KEY == type) {
+ struct btrfs_root_item root_item;
+ u32 root_item_offset;
+ u64 next_bytenr;
+
+ root_item_offset = item_offset +
+ offsetof(struct btrfs_leaf, items);
+ if (root_item_offset +
+ sizeof(struct btrfs_root_item) >
+ sf->block_ctx->len)
+ goto leaf_item_out_of_bounce_error;
+ btrfsic_read_from_block_data(
+ sf->block_ctx, &root_item,
+ root_item_offset,
+ sizeof(struct btrfs_root_item));
+ next_bytenr = le64_to_cpu(root_item.bytenr);
+
+ sf->error =
+ btrfsic_create_link_to_next_block(
+ state,
+ sf->block,
+ sf->block_ctx,
+ next_bytenr,
+ sf->limit_nesting,
+ &sf->next_block_ctx,
+ &sf->next_block,
+ force_iodone_flag,
+ &sf->num_copies,
+ &sf->mirror_num,
+ disk_key,
+ le64_to_cpu(root_item.
+ generation));
+ if (sf->error)
+ goto one_stack_frame_backwards;
+
+ if (NULL != sf->next_block) {
+ struct btrfs_header *const next_hdr =
+ (struct btrfs_header *)
+ sf->next_block_ctx.datav[0];
+
+ next_stack =
+ btrfsic_stack_frame_alloc();
+ if (NULL == next_stack) {
+ btrfsic_release_block_ctx(
+ &sf->
+ next_block_ctx);
+ goto one_stack_frame_backwards;
+ }
+
+ next_stack->i = -1;
+ next_stack->block = sf->next_block;
+ next_stack->block_ctx =
+ &sf->next_block_ctx;
+ next_stack->next_block = NULL;
+ next_stack->hdr = next_hdr;
+ next_stack->limit_nesting =
+ sf->limit_nesting - 1;
+ next_stack->prev = sf;
+ sf = next_stack;
+ goto continue_with_new_stack_frame;
+ }
+ } else if (BTRFS_EXTENT_DATA_KEY == type &&
+ state->include_extent_data) {
+ sf->error = btrfsic_handle_extent_data(
+ state,
+ sf->block,
+ sf->block_ctx,
+ item_offset,
+ force_iodone_flag);
+ if (sf->error)
+ goto one_stack_frame_backwards;
+ }
+
+ goto continue_with_current_leaf_stack_frame;
+ }
+ } else {
+ struct btrfs_node *const nodehdr = (struct btrfs_node *)sf->hdr;
+
+ if (-1 == sf->i) {
+ sf->nr = le32_to_cpu(nodehdr->header.nritems);
+
+ if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
+ printk(KERN_INFO "node %llu level %d items %d"
+ " generation %llu owner %llu\n",
+ (unsigned long long)
+ sf->block_ctx->start,
+ nodehdr->header.level, sf->nr,
+ (unsigned long long)
+ le64_to_cpu(nodehdr->header.generation),
+ (unsigned long long)
+ le64_to_cpu(nodehdr->header.owner));
+ }
+
+continue_with_current_node_stack_frame:
+ if (0 == sf->num_copies || sf->mirror_num > sf->num_copies) {
+ sf->i++;
+ sf->num_copies = 0;
+ }
+
+ if (sf->i < sf->nr) {
+ struct btrfs_key_ptr key_ptr;
+ u32 key_ptr_offset;
+ u64 next_bytenr;
+
+ key_ptr_offset = (uintptr_t)(nodehdr->ptrs + sf->i) -
+ (uintptr_t)nodehdr;
+ if (key_ptr_offset + sizeof(struct btrfs_key_ptr) >
+ sf->block_ctx->len) {
+ printk(KERN_INFO
+ "btrfsic: node item out of bounce at logical %llu, dev %s\n",
+ sf->block_ctx->start,
+ sf->block_ctx->dev->name);
+ goto one_stack_frame_backwards;
+ }
+ btrfsic_read_from_block_data(
+ sf->block_ctx, &key_ptr, key_ptr_offset,
+ sizeof(struct btrfs_key_ptr));
+ next_bytenr = le64_to_cpu(key_ptr.blockptr);
+
+ sf->error = btrfsic_create_link_to_next_block(
+ state,
+ sf->block,
+ sf->block_ctx,
+ next_bytenr,
+ sf->limit_nesting,
+ &sf->next_block_ctx,
+ &sf->next_block,
+ force_iodone_flag,
+ &sf->num_copies,
+ &sf->mirror_num,
+ &key_ptr.key,
+ le64_to_cpu(key_ptr.generation));
+ if (sf->error)
+ goto one_stack_frame_backwards;
+
+ if (NULL != sf->next_block) {
+ struct btrfs_header *const next_hdr =
+ (struct btrfs_header *)
+ sf->next_block_ctx.datav[0];
+
+ next_stack = btrfsic_stack_frame_alloc();
+ if (NULL == next_stack)
+ goto one_stack_frame_backwards;
+
+ next_stack->i = -1;
+ next_stack->block = sf->next_block;
+ next_stack->block_ctx = &sf->next_block_ctx;
+ next_stack->next_block = NULL;
+ next_stack->hdr = next_hdr;
+ next_stack->limit_nesting =
+ sf->limit_nesting - 1;
+ next_stack->prev = sf;
+ sf = next_stack;
+ goto continue_with_new_stack_frame;
+ }
+
+ goto continue_with_current_node_stack_frame;
+ }
+ }
+
+one_stack_frame_backwards:
+ if (NULL != sf->prev) {
+ struct btrfsic_stack_frame *const prev = sf->prev;
+
+ /* the one for the initial block is freed in the caller */
+ btrfsic_release_block_ctx(sf->block_ctx);
+
+ if (sf->error) {
+ prev->error = sf->error;
+ btrfsic_stack_frame_free(sf);
+ sf = prev;
+ goto one_stack_frame_backwards;
+ }
+
+ btrfsic_stack_frame_free(sf);
+ sf = prev;
+ goto continue_with_new_stack_frame;
+ } else {
+ BUG_ON(&initial_stack_frame != sf);
+ }
+
+ return sf->error;
+}
+
+static void btrfsic_read_from_block_data(
+ struct btrfsic_block_data_ctx *block_ctx,
+ void *dstv, u32 offset, size_t len)
+{
+ size_t cur;
+ size_t offset_in_page;
+ char *kaddr;
+ char *dst = (char *)dstv;
+ size_t start_offset = block_ctx->start & ((u64)PAGE_CACHE_SIZE - 1);
+ unsigned long i = (start_offset + offset) >> PAGE_CACHE_SHIFT;
+
+ WARN_ON(offset + len > block_ctx->len);
+ offset_in_page = (start_offset + offset) &
+ ((unsigned long)PAGE_CACHE_SIZE - 1);
+
+ while (len > 0) {
+ cur = min(len, ((size_t)PAGE_CACHE_SIZE - offset_in_page));
+ BUG_ON(i >= (block_ctx->len + PAGE_CACHE_SIZE - 1) >>
+ PAGE_CACHE_SHIFT);
+ kaddr = block_ctx->datav[i];
+ memcpy(dst, kaddr + offset_in_page, cur);
+
+ dst += cur;
+ len -= cur;
+ offset_in_page = 0;
+ i++;
+ }
+}
+
+static int btrfsic_create_link_to_next_block(
+ struct btrfsic_state *state,
+ struct btrfsic_block *block,
+ struct btrfsic_block_data_ctx *block_ctx,
+ u64 next_bytenr,
+ int limit_nesting,
+ struct btrfsic_block_data_ctx *next_block_ctx,
+ struct btrfsic_block **next_blockp,
+ int force_iodone_flag,
+ int *num_copiesp, int *mirror_nump,
+ struct btrfs_disk_key *disk_key,
+ u64 parent_generation)
+{
+ struct btrfsic_block *next_block = NULL;
+ int ret;
+ struct btrfsic_block_link *l;
+ int did_alloc_block_link;
+ int block_was_created;
+
+ *next_blockp = NULL;
+ if (0 == *num_copiesp) {
+ *num_copiesp =
+ btrfs_num_copies(&state->root->fs_info->mapping_tree,
+ next_bytenr, state->metablock_size);
+ if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
+ printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
+ (unsigned long long)next_bytenr, *num_copiesp);
+ *mirror_nump = 1;
+ }
+
+ if (*mirror_nump > *num_copiesp)
+ return 0;
+
+ if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
+ printk(KERN_INFO
+ "btrfsic_create_link_to_next_block(mirror_num=%d)\n",
+ *mirror_nump);
+ ret = btrfsic_map_block(state, next_bytenr,
+ state->metablock_size,
+ next_block_ctx, *mirror_nump);
+ if (ret) {
+ printk(KERN_INFO
+ "btrfsic: btrfsic_map_block(@%llu, mirror=%d) failed!\n",
+ (unsigned long long)next_bytenr, *mirror_nump);
+ btrfsic_release_block_ctx(next_block_ctx);
+ *next_blockp = NULL;
+ return -1;
+ }
+
+ next_block = btrfsic_block_lookup_or_add(state,
+ next_block_ctx, "referenced ",
+ 1, force_iodone_flag,
+ !force_iodone_flag,
+ *mirror_nump,
+ &block_was_created);
+ if (NULL == next_block) {
+ btrfsic_release_block_ctx(next_block_ctx);
+ *next_blockp = NULL;
+ return -1;
+ }
+ if (block_was_created) {
+ l = NULL;
+ next_block->generation = BTRFSIC_GENERATION_UNKNOWN;
+ } else {
+ if (next_block->logical_bytenr != next_bytenr &&
+ !(!next_block->is_metadata &&
+ 0 == next_block->logical_bytenr)) {
+ printk(KERN_INFO
+ "Referenced block @%llu (%s/%llu/%d)"
+ " found in hash table, %c,"
+ " bytenr mismatch (!= stored %llu).\n",
+ (unsigned long long)next_bytenr,
+ next_block_ctx->dev->name,
+ (unsigned long long)next_block_ctx->dev_bytenr,
+ *mirror_nump,
+ btrfsic_get_block_type(state, next_block),
+ (unsigned long long)next_block->logical_bytenr);
+ } else if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
+ printk(KERN_INFO
+ "Referenced block @%llu (%s/%llu/%d)"
+ " found in hash table, %c.\n",
+ (unsigned long long)next_bytenr,
+ next_block_ctx->dev->name,
+ (unsigned long long)next_block_ctx->dev_bytenr,
+ *mirror_nump,
+ btrfsic_get_block_type(state, next_block));
+ next_block->logical_bytenr = next_bytenr;
+
+ next_block->mirror_num = *mirror_nump;
+ l = btrfsic_block_link_hashtable_lookup(
+ next_block_ctx->dev->bdev,
+ next_block_ctx->dev_bytenr,
+ block_ctx->dev->bdev,
+ block_ctx->dev_bytenr,
+ &state->block_link_hashtable);
+ }
+
+ next_block->disk_key = *disk_key;
+ if (NULL == l) {
+ l = btrfsic_block_link_alloc();
+ if (NULL == l) {
+ printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
+ btrfsic_release_block_ctx(next_block_ctx);
+ *next_blockp = NULL;
+ return -1;
+ }
+
+ did_alloc_block_link = 1;
+ l->block_ref_to = next_block;
+ l->block_ref_from = block;
+ l->ref_cnt = 1;
+ l->parent_generation = parent_generation;
+
+ if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
+ btrfsic_print_add_link(state, l);
+
+ list_add(&l->node_ref_to, &block->ref_to_list);
+ list_add(&l->node_ref_from, &next_block->ref_from_list);
+
+ btrfsic_block_link_hashtable_add(l,
+ &state->block_link_hashtable);
+ } else {
+ did_alloc_block_link = 0;
+ if (0 == limit_nesting) {
+ l->ref_cnt++;
+ l->parent_generation = parent_generation;
+ if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
+ btrfsic_print_add_link(state, l);
+ }
+ }
+
+ if (limit_nesting > 0 && did_alloc_block_link) {
+ ret = btrfsic_read_block(state, next_block_ctx);
+ if (ret < (int)next_block_ctx->len) {
+ printk(KERN_INFO
+ "btrfsic: read block @logical %llu failed!\n",
+ (unsigned long long)next_bytenr);
+ btrfsic_release_block_ctx(next_block_ctx);
+ *next_blockp = NULL;
+ return -1;
+ }
+
+ *next_blockp = next_block;
+ } else {
+ *next_blockp = NULL;
+ }
+ (*mirror_nump)++;
+
+ return 0;
+}
+
+static int btrfsic_handle_extent_data(
+ struct btrfsic_state *state,
+ struct btrfsic_block *block,
+ struct btrfsic_block_data_ctx *block_ctx,
+ u32 item_offset, int force_iodone_flag)
+{
+ int ret;
+ struct btrfs_file_extent_item file_extent_item;
+ u64 file_extent_item_offset;
+ u64 next_bytenr;
+ u64 num_bytes;
+ u64 generation;
+ struct btrfsic_block_link *l;
+
+ file_extent_item_offset = offsetof(struct btrfs_leaf, items) +
+ item_offset;
+ if (file_extent_item_offset +
+ offsetof(struct btrfs_file_extent_item, disk_num_bytes) >
+ block_ctx->len) {
+ printk(KERN_INFO
+ "btrfsic: file item out of bounce at logical %llu, dev %s\n",
+ block_ctx->start, block_ctx->dev->name);
+ return -1;
+ }
+
+ btrfsic_read_from_block_data(block_ctx, &file_extent_item,
+ file_extent_item_offset,
+ offsetof(struct btrfs_file_extent_item, disk_num_bytes));
+ if (BTRFS_FILE_EXTENT_REG != file_extent_item.type ||
+ ((u64)0) == le64_to_cpu(file_extent_item.disk_bytenr)) {
+ if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE)
+ printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu\n",
+ file_extent_item.type,
+ (unsigned long long)
+ le64_to_cpu(file_extent_item.disk_bytenr));
+ return 0;
+ }
+
+ if (file_extent_item_offset + sizeof(struct btrfs_file_extent_item) >
+ block_ctx->len) {
+ printk(KERN_INFO
+ "btrfsic: file item out of bounce at logical %llu, dev %s\n",
+ block_ctx->start, block_ctx->dev->name);
+ return -1;
+ }
+ btrfsic_read_from_block_data(block_ctx, &file_extent_item,
+ file_extent_item_offset,
+ sizeof(struct btrfs_file_extent_item));
+ next_bytenr = le64_to_cpu(file_extent_item.disk_bytenr) +
+ le64_to_cpu(file_extent_item.offset);
+ generation = le64_to_cpu(file_extent_item.generation);
+ num_bytes = le64_to_cpu(file_extent_item.num_bytes);
+ generation = le64_to_cpu(file_extent_item.generation);
+
+ if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE)
+ printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu,"
+ " offset = %llu, num_bytes = %llu\n",
+ file_extent_item.type,
+ (unsigned long long)
+ le64_to_cpu(file_extent_item.disk_bytenr),
+ (unsigned long long)le64_to_cpu(file_extent_item.offset),
+ (unsigned long long)num_bytes);
+ while (num_bytes > 0) {
+ u32 chunk_len;
+ int num_copies;
+ int mirror_num;
+
+ if (num_bytes > state->datablock_size)
+ chunk_len = state->datablock_size;
+ else
+ chunk_len = num_bytes;
+
+ num_copies =
+ btrfs_num_copies(&state->root->fs_info->mapping_tree,
+ next_bytenr, state->datablock_size);
+ if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
+ printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
+ (unsigned long long)next_bytenr, num_copies);
+ for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
+ struct btrfsic_block_data_ctx next_block_ctx;
+ struct btrfsic_block *next_block;
+ int block_was_created;
+
+ if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
+ printk(KERN_INFO "btrfsic_handle_extent_data("
+ "mirror_num=%d)\n", mirror_num);
+ if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE)
+ printk(KERN_INFO
+ "\tdisk_bytenr = %llu, num_bytes %u\n",
+ (unsigned long long)next_bytenr,
+ chunk_len);
+ ret = btrfsic_map_block(state, next_bytenr,
+ chunk_len, &next_block_ctx,
+ mirror_num);
+ if (ret) {
+ printk(KERN_INFO
+ "btrfsic: btrfsic_map_block(@%llu,"
+ " mirror=%d) failed!\n",
+ (unsigned long long)next_bytenr,
+ mirror_num);
+ return -1;
+ }
+
+ next_block = btrfsic_block_lookup_or_add(
+ state,
+ &next_block_ctx,
+ "referenced ",
+ 0,
+ force_iodone_flag,
+ !force_iodone_flag,
+ mirror_num,
+ &block_was_created);
+ if (NULL == next_block) {
+ printk(KERN_INFO
+ "btrfsic: error, kmalloc failed!\n");
+ btrfsic_release_block_ctx(&next_block_ctx);
+ return -1;
+ }
+ if (!block_was_created) {
+ if (next_block->logical_bytenr != next_bytenr &&
+ !(!next_block->is_metadata &&
+ 0 == next_block->logical_bytenr)) {
+ printk(KERN_INFO
+ "Referenced block"
+ " @%llu (%s/%llu/%d)"
+ " found in hash table, D,"
+ " bytenr mismatch"
+ " (!= stored %llu).\n",
+ (unsigned long long)next_bytenr,
+ next_block_ctx.dev->name,
+ (unsigned long long)
+ next_block_ctx.dev_bytenr,
+ mirror_num,
+ (unsigned long long)
+ next_block->logical_bytenr);
+ }
+ next_block->logical_bytenr = next_bytenr;
+ next_block->mirror_num = mirror_num;
+ }
+
+ l = btrfsic_block_link_lookup_or_add(state,
+ &next_block_ctx,
+ next_block, block,
+ generation);
+ btrfsic_release_block_ctx(&next_block_ctx);
+ if (NULL == l)
+ return -1;
+ }
+
+ next_bytenr += chunk_len;
+ num_bytes -= chunk_len;
+ }
+
+ return 0;
+}
+
+static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len,
+ struct btrfsic_block_data_ctx *block_ctx_out,
+ int mirror_num)
+{
+ int ret;
+ u64 length;
+ struct btrfs_bio *multi = NULL;
+ struct btrfs_device *device;
+
+ length = len;
+ ret = btrfs_map_block(&state->root->fs_info->mapping_tree, READ,
+ bytenr, &length, &multi, mirror_num);
+
+ device = multi->stripes[0].dev;
+ block_ctx_out->dev = btrfsic_dev_state_lookup(device->bdev);
+ block_ctx_out->dev_bytenr = multi->stripes[0].physical;
+ block_ctx_out->start = bytenr;
+ block_ctx_out->len = len;
+ block_ctx_out->datav = NULL;
+ block_ctx_out->pagev = NULL;
+ block_ctx_out->mem_to_free = NULL;
+
+ if (0 == ret)
+ kfree(multi);
+ if (NULL == block_ctx_out->dev) {
+ ret = -ENXIO;
+ printk(KERN_INFO "btrfsic: error, cannot lookup dev (#1)!\n");
+ }
+
+ return ret;
+}
+
+static int btrfsic_map_superblock(struct btrfsic_state *state, u64 bytenr,
+ u32 len, struct block_device *bdev,
+ struct btrfsic_block_data_ctx *block_ctx_out)
+{
+ block_ctx_out->dev = btrfsic_dev_state_lookup(bdev);
+ block_ctx_out->dev_bytenr = bytenr;
+ block_ctx_out->start = bytenr;
+ block_ctx_out->len = len;
+ block_ctx_out->datav = NULL;
+ block_ctx_out->pagev = NULL;
+ block_ctx_out->mem_to_free = NULL;
+ if (NULL != block_ctx_out->dev) {
+ return 0;
+ } else {
+ printk(KERN_INFO "btrfsic: error, cannot lookup dev (#2)!\n");
+ return -ENXIO;
+ }
+}
+
+static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx)
+{
+ if (block_ctx->mem_to_free) {
+ unsigned int num_pages;
+
+ BUG_ON(!block_ctx->datav);
+ BUG_ON(!block_ctx->pagev);
+ num_pages = (block_ctx->len + (u64)PAGE_CACHE_SIZE - 1) >>
+ PAGE_CACHE_SHIFT;
+ while (num_pages > 0) {
+ num_pages--;
+ if (block_ctx->datav[num_pages]) {
+ kunmap(block_ctx->pagev[num_pages]);
+ block_ctx->datav[num_pages] = NULL;
+ }
+ if (block_ctx->pagev[num_pages]) {
+ __free_page(block_ctx->pagev[num_pages]);
+ block_ctx->pagev[num_pages] = NULL;
+ }
+ }
+
+ kfree(block_ctx->mem_to_free);
+ block_ctx->mem_to_free = NULL;
+ block_ctx->pagev = NULL;
+ block_ctx->datav = NULL;
+ }
+}
+
+static int btrfsic_read_block(struct btrfsic_state *state,
+ struct btrfsic_block_data_ctx *block_ctx)
+{
+ unsigned int num_pages;
+ unsigned int i;
+ u64 dev_bytenr;
+ int ret;
+
+ BUG_ON(block_ctx->datav);
+ BUG_ON(block_ctx->pagev);
+ BUG_ON(block_ctx->mem_to_free);
+ if (block_ctx->dev_bytenr & ((u64)PAGE_CACHE_SIZE - 1)) {
+ printk(KERN_INFO
+ "btrfsic: read_block() with unaligned bytenr %llu\n",
+ (unsigned long long)block_ctx->dev_bytenr);
+ return -1;
+ }
+
+ num_pages = (block_ctx->len + (u64)PAGE_CACHE_SIZE - 1) >>
+ PAGE_CACHE_SHIFT;
+ block_ctx->mem_to_free = kzalloc((sizeof(*block_ctx->datav) +
+ sizeof(*block_ctx->pagev)) *
+ num_pages, GFP_NOFS);
+ if (!block_ctx->mem_to_free)
+ return -1;
+ block_ctx->datav = block_ctx->mem_to_free;
+ block_ctx->pagev = (struct page **)(block_ctx->datav + num_pages);
+ for (i = 0; i < num_pages; i++) {
+ block_ctx->pagev[i] = alloc_page(GFP_NOFS);
+ if (!block_ctx->pagev[i])
+ return -1;
+ }
+
+ dev_bytenr = block_ctx->dev_bytenr;
+ for (i = 0; i < num_pages;) {
+ struct bio *bio;
+ unsigned int j;
+ DECLARE_COMPLETION_ONSTACK(complete);
+
+ bio = bio_alloc(GFP_NOFS, num_pages - i);
+ if (!bio) {
+ printk(KERN_INFO
+ "btrfsic: bio_alloc() for %u pages failed!\n",
+ num_pages - i);
+ return -1;
+ }
+ bio->bi_bdev = block_ctx->dev->bdev;
+ bio->bi_sector = dev_bytenr >> 9;
+ bio->bi_end_io = btrfsic_complete_bio_end_io;
+ bio->bi_private = &complete;
+
+ for (j = i; j < num_pages; j++) {
+ ret = bio_add_page(bio, block_ctx->pagev[j],
+ PAGE_CACHE_SIZE, 0);
+ if (PAGE_CACHE_SIZE != ret)
+ break;
+ }
+ if (j == i) {
+ printk(KERN_INFO
+ "btrfsic: error, failed to add a single page!\n");
+ return -1;
+ }
+ submit_bio(READ, bio);
+
+ /* this will also unplug the queue */
+ wait_for_completion(&complete);
+
+ if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) {
+ printk(KERN_INFO
+ "btrfsic: read error at logical %llu dev %s!\n",
+ block_ctx->start, block_ctx->dev->name);
+ bio_put(bio);
+ return -1;
+ }
+ bio_put(bio);
+ dev_bytenr += (j - i) * PAGE_CACHE_SIZE;
+ i = j;
+ }
+ for (i = 0; i < num_pages; i++) {
+ block_ctx->datav[i] = kmap(block_ctx->pagev[i]);
+ if (!block_ctx->datav[i]) {
+ printk(KERN_INFO "btrfsic: kmap() failed (dev %s)!\n",
+ block_ctx->dev->name);
+ return -1;
+ }
+ }
+
+ return block_ctx->len;
+}
+
+static void btrfsic_complete_bio_end_io(struct bio *bio, int err)
+{
+ complete((struct completion *)bio->bi_private);
+}
+
+static void btrfsic_dump_database(struct btrfsic_state *state)
+{
+ struct list_head *elem_all;
+
+ BUG_ON(NULL == state);
+
+ printk(KERN_INFO "all_blocks_list:\n");
+ list_for_each(elem_all, &state->all_blocks_list) {
+ const struct btrfsic_block *const b_all =
+ list_entry(elem_all, struct btrfsic_block,
+ all_blocks_node);
+ struct list_head *elem_ref_to;
+ struct list_head *elem_ref_from;
+
+ printk(KERN_INFO "%c-block @%llu (%s/%llu/%d)\n",
+ btrfsic_get_block_type(state, b_all),
+ (unsigned long long)b_all->logical_bytenr,
+ b_all->dev_state->name,
+ (unsigned long long)b_all->dev_bytenr,
+ b_all->mirror_num);
+
+ list_for_each(elem_ref_to, &b_all->ref_to_list) {
+ const struct btrfsic_block_link *const l =
+ list_entry(elem_ref_to,
+ struct btrfsic_block_link,
+ node_ref_to);
+
+ printk(KERN_INFO " %c @%llu (%s/%llu/%d)"
+ " refers %u* to"
+ " %c @%llu (%s/%llu/%d)\n",
+ btrfsic_get_block_type(state, b_all),
+ (unsigned long long)b_all->logical_bytenr,
+ b_all->dev_state->name,
+ (unsigned long long)b_all->dev_bytenr,
+ b_all->mirror_num,
+ l->ref_cnt,
+ btrfsic_get_block_type(state, l->block_ref_to),
+ (unsigned long long)
+ l->block_ref_to->logical_bytenr,
+ l->block_ref_to->dev_state->name,
+ (unsigned long long)l->block_ref_to->dev_bytenr,
+ l->block_ref_to->mirror_num);
+ }
+
+ list_for_each(elem_ref_from, &b_all->ref_from_list) {
+ const struct btrfsic_block_link *const l =
+ list_entry(elem_ref_from,
+ struct btrfsic_block_link,
+ node_ref_from);
+
+ printk(KERN_INFO " %c @%llu (%s/%llu/%d)"
+ " is ref %u* from"
+ " %c @%llu (%s/%llu/%d)\n",
+ btrfsic_get_block_type(state, b_all),
+ (unsigned long long)b_all->logical_bytenr,
+ b_all->dev_state->name,
+ (unsigned long long)b_all->dev_bytenr,
+ b_all->mirror_num,
+ l->ref_cnt,
+ btrfsic_get_block_type(state, l->block_ref_from),
+ (unsigned long long)
+ l->block_ref_from->logical_bytenr,
+ l->block_ref_from->dev_state->name,
+ (unsigned long long)
+ l->block_ref_from->dev_bytenr,
+ l->block_ref_from->mirror_num);
+ }
+
+ printk(KERN_INFO "\n");
+ }
+}
+
+/*
+ * Test whether the disk block contains a tree block (leaf or node)
+ * (note that this test fails for the super block)
+ */
+static int btrfsic_test_for_metadata(struct btrfsic_state *state,
+ char **datav, unsigned int num_pages)
+{
+ struct btrfs_header *h;
+ u8 csum[BTRFS_CSUM_SIZE];
+ u32 crc = ~(u32)0;
+ unsigned int i;
+
+ if (num_pages * PAGE_CACHE_SIZE < state->metablock_size)
+ return 1; /* not metadata */
+ num_pages = state->metablock_size >> PAGE_CACHE_SHIFT;
+ h = (struct btrfs_header *)datav[0];
+
+ if (memcmp(h->fsid, state->root->fs_info->fsid, BTRFS_UUID_SIZE))
+ return 1;
+
+ for (i = 0; i < num_pages; i++) {
+ u8 *data = i ? datav[i] : (datav[i] + BTRFS_CSUM_SIZE);
+ size_t sublen = i ? PAGE_CACHE_SIZE :
+ (PAGE_CACHE_SIZE - BTRFS_CSUM_SIZE);
+
+ crc = crc32c(crc, data, sublen);
+ }
+ btrfs_csum_final(crc, csum);
+ if (memcmp(csum, h->csum, state->csum_size))
+ return 1;
+
+ return 0; /* is metadata */
+}
+
+static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
+ u64 dev_bytenr, char **mapped_datav,
+ unsigned int num_pages,
+ struct bio *bio, int *bio_is_patched,
+ struct buffer_head *bh,
+ int submit_bio_bh_rw)
+{
+ int is_metadata;
+ struct btrfsic_block *block;
+ struct btrfsic_block_data_ctx block_ctx;
+ int ret;
+ struct btrfsic_state *state = dev_state->state;
+ struct block_device *bdev = dev_state->bdev;
+ unsigned int processed_len;
+
+ if (NULL != bio_is_patched)
+ *bio_is_patched = 0;
+
+again:
+ if (num_pages == 0)
+ return;
+
+ processed_len = 0;
+ is_metadata = (0 == btrfsic_test_for_metadata(state, mapped_datav,
+ num_pages));
+
+ block = btrfsic_block_hashtable_lookup(bdev, dev_bytenr,
+ &state->block_hashtable);
+ if (NULL != block) {
+ u64 bytenr = 0;
+ struct list_head *elem_ref_to;
+ struct list_head *tmp_ref_to;
+
+ if (block->is_superblock) {
+ bytenr = le64_to_cpu(((struct btrfs_super_block *)
+ mapped_datav[0])->bytenr);
+ if (num_pages * PAGE_CACHE_SIZE <
+ BTRFS_SUPER_INFO_SIZE) {
+ printk(KERN_INFO
+ "btrfsic: cannot work with too short bios!\n");
+ return;
+ }
+ is_metadata = 1;
+ BUG_ON(BTRFS_SUPER_INFO_SIZE & (PAGE_CACHE_SIZE - 1));
+ processed_len = BTRFS_SUPER_INFO_SIZE;
+ if (state->print_mask &
+ BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE) {
+ printk(KERN_INFO
+ "[before new superblock is written]:\n");
+ btrfsic_dump_tree_sub(state, block, 0);
+ }
+ }
+ if (is_metadata) {
+ if (!block->is_superblock) {
+ if (num_pages * PAGE_CACHE_SIZE <
+ state->metablock_size) {
+ printk(KERN_INFO
+ "btrfsic: cannot work with too short bios!\n");
+ return;
+ }
+ processed_len = state->metablock_size;
+ bytenr = le64_to_cpu(((struct btrfs_header *)
+ mapped_datav[0])->bytenr);
+ btrfsic_cmp_log_and_dev_bytenr(state, bytenr,
+ dev_state,
+ dev_bytenr);
+ }
+ if (block->logical_bytenr != bytenr) {
+ printk(KERN_INFO
+ "Written block @%llu (%s/%llu/%d)"
+ " found in hash table, %c,"
+ " bytenr mismatch"
+ " (!= stored %llu).\n",
+ (unsigned long long)bytenr,
+ dev_state->name,
+ (unsigned long long)dev_bytenr,
+ block->mirror_num,
+ btrfsic_get_block_type(state, block),
+ (unsigned long long)
+ block->logical_bytenr);
+ block->logical_bytenr = bytenr;
+ } else if (state->print_mask &
+ BTRFSIC_PRINT_MASK_VERBOSE)
+ printk(KERN_INFO
+ "Written block @%llu (%s/%llu/%d)"
+ " found in hash table, %c.\n",
+ (unsigned long long)bytenr,
+ dev_state->name,
+ (unsigned long long)dev_bytenr,
+ block->mirror_num,
+ btrfsic_get_block_type(state, block));
+ } else {
+ if (num_pages * PAGE_CACHE_SIZE <
+ state->datablock_size) {
+ printk(KERN_INFO
+ "btrfsic: cannot work with too short bios!\n");
+ return;
+ }
+ processed_len = state->datablock_size;
+ bytenr = block->logical_bytenr;
+ if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
+ printk(KERN_INFO
+ "Written block @%llu (%s/%llu/%d)"
+ " found in hash table, %c.\n",
+ (unsigned long long)bytenr,
+ dev_state->name,
+ (unsigned long long)dev_bytenr,
+ block->mirror_num,
+ btrfsic_get_block_type(state, block));
+ }
+
+ if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
+ printk(KERN_INFO
+ "ref_to_list: %cE, ref_from_list: %cE\n",
+ list_empty(&block->ref_to_list) ? ' ' : '!',
+ list_empty(&block->ref_from_list) ? ' ' : '!');
+ if (btrfsic_is_block_ref_by_superblock(state, block, 0)) {
+ printk(KERN_INFO "btrfs: attempt to overwrite %c-block"
+ " @%llu (%s/%llu/%d), old(gen=%llu,"
+ " objectid=%llu, type=%d, offset=%llu),"
+ " new(gen=%llu),"
+ " which is referenced by most recent superblock"
+ " (superblockgen=%llu)!\n",
+ btrfsic_get_block_type(state, block),
+ (unsigned long long)bytenr,
+ dev_state->name,
+ (unsigned long long)dev_bytenr,
+ block->mirror_num,
+ (unsigned long long)block->generation,
+ (unsigned long long)
+ le64_to_cpu(block->disk_key.objectid),
+ block->disk_key.type,
+ (unsigned long long)
+ le64_to_cpu(block->disk_key.offset),
+ (unsigned long long)
+ le64_to_cpu(((struct btrfs_header *)
+ mapped_datav[0])->generation),
+ (unsigned long long)
+ state->max_superblock_generation);
+ btrfsic_dump_tree(state);
+ }
+
+ if (!block->is_iodone && !block->never_written) {
+ printk(KERN_INFO "btrfs: attempt to overwrite %c-block"
+ " @%llu (%s/%llu/%d), oldgen=%llu, newgen=%llu,"
+ " which is not yet iodone!\n",
+ btrfsic_get_block_type(state, block),
+ (unsigned long long)bytenr,
+ dev_state->name,
+ (unsigned long long)dev_bytenr,
+ block->mirror_num,
+ (unsigned long long)block->generation,
+ (unsigned long long)
+ le64_to_cpu(((struct btrfs_header *)
+ mapped_datav[0])->generation));
+ /* it would not be safe to go on */
+ btrfsic_dump_tree(state);
+ goto continue_loop;
+ }
+
+ /*
+ * Clear all references of this block. Do not free
+ * the block itself even if is not referenced anymore
+ * because it still carries valueable information
+ * like whether it was ever written and IO completed.
+ */
+ list_for_each_safe(elem_ref_to, tmp_ref_to,
+ &block->ref_to_list) {
+ struct btrfsic_block_link *const l =
+ list_entry(elem_ref_to,
+ struct btrfsic_block_link,
+ node_ref_to);
+
+ if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
+ btrfsic_print_rem_link(state, l);
+ l->ref_cnt--;
+ if (0 == l->ref_cnt) {
+ list_del(&l->node_ref_to);
+ list_del(&l->node_ref_from);
+ btrfsic_block_link_hashtable_remove(l);
+ btrfsic_block_link_free(l);
+ }
+ }
+
+ if (block->is_superblock)
+ ret = btrfsic_map_superblock(state, bytenr,
+ processed_len,
+ bdev, &block_ctx);
+ else
+ ret = btrfsic_map_block(state, bytenr, processed_len,
+ &block_ctx, 0);
+ if (ret) {
+ printk(KERN_INFO
+ "btrfsic: btrfsic_map_block(root @%llu)"
+ " failed!\n", (unsigned long long)bytenr);
+ goto continue_loop;
+ }
+ block_ctx.datav = mapped_datav;
+ /* the following is required in case of writes to mirrors,
+ * use the same that was used for the lookup */
+ block_ctx.dev = dev_state;
+ block_ctx.dev_bytenr = dev_bytenr;
+
+ if (is_metadata || state->include_extent_data) {
+ block->never_written = 0;
+ block->iodone_w_error = 0;
+ if (NULL != bio) {
+ block->is_iodone = 0;
+ BUG_ON(NULL == bio_is_patched);
+ if (!*bio_is_patched) {
+ block->orig_bio_bh_private =
+ bio->bi_private;
+ block->orig_bio_bh_end_io.bio =
+ bio->bi_end_io;
+ block->next_in_same_bio = NULL;
+ bio->bi_private = block;
+ bio->bi_end_io = btrfsic_bio_end_io;
+ *bio_is_patched = 1;
+ } else {
+ struct btrfsic_block *chained_block =
+ (struct btrfsic_block *)
+ bio->bi_private;
+
+ BUG_ON(NULL == chained_block);
+ block->orig_bio_bh_private =
+ chained_block->orig_bio_bh_private;
+ block->orig_bio_bh_end_io.bio =
+ chained_block->orig_bio_bh_end_io.
+ bio;
+ block->next_in_same_bio = chained_block;
+ bio->bi_private = block;
+ }
+ } else if (NULL != bh) {
+ block->is_iodone = 0;
+ block->orig_bio_bh_private = bh->b_private;
+ block->orig_bio_bh_end_io.bh = bh->b_end_io;
+ block->next_in_same_bio = NULL;
+ bh->b_private = block;
+ bh->b_end_io = btrfsic_bh_end_io;
+ } else {
+ block->is_iodone = 1;
+ block->orig_bio_bh_private = NULL;
+ block->orig_bio_bh_end_io.bio = NULL;
+ block->next_in_same_bio = NULL;
+ }
+ }
+
+ block->flush_gen = dev_state->last_flush_gen + 1;
+ block->submit_bio_bh_rw = submit_bio_bh_rw;
+ if (is_metadata) {
+ block->logical_bytenr = bytenr;
+ block->is_metadata = 1;
+ if (block->is_superblock) {
+ BUG_ON(PAGE_CACHE_SIZE !=
+ BTRFS_SUPER_INFO_SIZE);
+ ret = btrfsic_process_written_superblock(
+ state,
+ block,
+ (struct btrfs_super_block *)
+ mapped_datav[0]);
+ if (state->print_mask &
+ BTRFSIC_PRINT_MASK_TREE_AFTER_SB_WRITE) {
+ printk(KERN_INFO
+ "[after new superblock is written]:\n");
+ btrfsic_dump_tree_sub(state, block, 0);
+ }
+ } else {
+ block->mirror_num = 0; /* unknown */
+ ret = btrfsic_process_metablock(
+ state,
+ block,
+ &block_ctx,
+ 0, 0);
+ }
+ if (ret)
+ printk(KERN_INFO
+ "btrfsic: btrfsic_process_metablock"
+ "(root @%llu) failed!\n",
+ (unsigned long long)dev_bytenr);
+ } else {
+ block->is_metadata = 0;
+ block->mirror_num = 0; /* unknown */
+ block->generation = BTRFSIC_GENERATION_UNKNOWN;
+ if (!state->include_extent_data
+ && list_empty(&block->ref_from_list)) {
+ /*
+ * disk block is overwritten with extent
+ * data (not meta data) and we are configured
+ * to not include extent data: take the
+ * chance and free the block's memory
+ */
+ btrfsic_block_hashtable_remove(block);
+ list_del(&block->all_blocks_node);
+ btrfsic_block_free(block);
+ }
+ }
+ btrfsic_release_block_ctx(&block_ctx);
+ } else {
+ /* block has not been found in hash table */
+ u64 bytenr;
+
+ if (!is_metadata) {
+ processed_len = state->datablock_size;
+ if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
+ printk(KERN_INFO "Written block (%s/%llu/?)"
+ " !found in hash table, D.\n",
+ dev_state->name,
+ (unsigned long long)dev_bytenr);
+ if (!state->include_extent_data) {
+ /* ignore that written D block */
+ goto continue_loop;
+ }
+
+ /* this is getting ugly for the
+ * include_extent_data case... */
+ bytenr = 0; /* unknown */
+ block_ctx.start = bytenr;
+ block_ctx.len = processed_len;
+ block_ctx.mem_to_free = NULL;
+ block_ctx.pagev = NULL;
+ } else {
+ processed_len = state->metablock_size;
+ bytenr = le64_to_cpu(((struct btrfs_header *)
+ mapped_datav[0])->bytenr);
+ btrfsic_cmp_log_and_dev_bytenr(state, bytenr, dev_state,
+ dev_bytenr);
+ if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
+ printk(KERN_INFO
+ "Written block @%llu (%s/%llu/?)"
+ " !found in hash table, M.\n",
+ (unsigned long long)bytenr,
+ dev_state->name,
+ (unsigned long long)dev_bytenr);
+
+ ret = btrfsic_map_block(state, bytenr, processed_len,
+ &block_ctx, 0);
+ if (ret) {
+ printk(KERN_INFO
+ "btrfsic: btrfsic_map_block(root @%llu)"
+ " failed!\n",
+ (unsigned long long)dev_bytenr);
+ goto continue_loop;
+ }
+ }
+ block_ctx.datav = mapped_datav;
+ /* the following is required in case of writes to mirrors,
+ * use the same that was used for the lookup */
+ block_ctx.dev = dev_state;
+ block_ctx.dev_bytenr = dev_bytenr;
+
+ block = btrfsic_block_alloc();
+ if (NULL == block) {
+ printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
+ btrfsic_release_block_ctx(&block_ctx);
+ goto continue_loop;
+ }
+ block->dev_state = dev_state;
+ block->dev_bytenr = dev_bytenr;
+ block->logical_bytenr = bytenr;
+ block->is_metadata = is_metadata;
+ block->never_written = 0;
+ block->iodone_w_error = 0;
+ block->mirror_num = 0; /* unknown */
+ block->flush_gen = dev_state->last_flush_gen + 1;
+ block->submit_bio_bh_rw = submit_bio_bh_rw;
+ if (NULL != bio) {
+ block->is_iodone = 0;
+ BUG_ON(NULL == bio_is_patched);
+ if (!*bio_is_patched) {
+ block->orig_bio_bh_private = bio->bi_private;
+ block->orig_bio_bh_end_io.bio = bio->bi_end_io;
+ block->next_in_same_bio = NULL;
+ bio->bi_private = block;
+ bio->bi_end_io = btrfsic_bio_end_io;
+ *bio_is_patched = 1;
+ } else {
+ struct btrfsic_block *chained_block =
+ (struct btrfsic_block *)
+ bio->bi_private;
+
+ BUG_ON(NULL == chained_block);
+ block->orig_bio_bh_private =
+ chained_block->orig_bio_bh_private;
+ block->orig_bio_bh_end_io.bio =
+ chained_block->orig_bio_bh_end_io.bio;
+ block->next_in_same_bio = chained_block;
+ bio->bi_private = block;
+ }
+ } else if (NULL != bh) {
+ block->is_iodone = 0;
+ block->orig_bio_bh_private = bh->b_private;
+ block->orig_bio_bh_end_io.bh = bh->b_end_io;
+ block->next_in_same_bio = NULL;
+ bh->b_private = block;
+ bh->b_end_io = btrfsic_bh_end_io;
+ } else {
+ block->is_iodone = 1;
+ block->orig_bio_bh_private = NULL;
+ block->orig_bio_bh_end_io.bio = NULL;
+ block->next_in_same_bio = NULL;
+ }
+ if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
+ printk(KERN_INFO
+ "New written %c-block @%llu (%s/%llu/%d)\n",
+ is_metadata ? 'M' : 'D',
+ (unsigned long long)block->logical_bytenr,
+ block->dev_state->name,
+ (unsigned long long)block->dev_bytenr,
+ block->mirror_num);
+ list_add(&block->all_blocks_node, &state->all_blocks_list);
+ btrfsic_block_hashtable_add(block, &state->block_hashtable);
+
+ if (is_metadata) {
+ ret = btrfsic_process_metablock(state, block,
+ &block_ctx, 0, 0);
+ if (ret)
+ printk(KERN_INFO
+ "btrfsic: process_metablock(root @%llu)"
+ " failed!\n",
+ (unsigned long long)dev_bytenr);
+ }
+ btrfsic_release_block_ctx(&block_ctx);
+ }
+
+continue_loop:
+ BUG_ON(!processed_len);
+ dev_bytenr += processed_len;
+ mapped_datav += processed_len >> PAGE_CACHE_SHIFT;
+ num_pages -= processed_len >> PAGE_CACHE_SHIFT;
+ goto again;
+}
+
+static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status)
+{
+ struct btrfsic_block *block = (struct btrfsic_block *)bp->bi_private;
+ int iodone_w_error;
+
+ /* mutex is not held! This is not save if IO is not yet completed
+ * on umount */
+ iodone_w_error = 0;
+ if (bio_error_status)
+ iodone_w_error = 1;
+
+ BUG_ON(NULL == block);
+ bp->bi_private = block->orig_bio_bh_private;
+ bp->bi_end_io = block->orig_bio_bh_end_io.bio;
+
+ do {
+ struct btrfsic_block *next_block;
+ struct btrfsic_dev_state *const dev_state = block->dev_state;
+
+ if ((dev_state->state->print_mask &
+ BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
+ printk(KERN_INFO
+ "bio_end_io(err=%d) for %c @%llu (%s/%llu/%d)\n",
+ bio_error_status,
+ btrfsic_get_block_type(dev_state->state, block),
+ (unsigned long long)block->logical_bytenr,
+ dev_state->name,
+ (unsigned long long)block->dev_bytenr,
+ block->mirror_num);
+ next_block = block->next_in_same_bio;
+ block->iodone_w_error = iodone_w_error;
+ if (block->submit_bio_bh_rw & REQ_FLUSH) {
+ dev_state->last_flush_gen++;
+ if ((dev_state->state->print_mask &
+ BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
+ printk(KERN_INFO
+ "bio_end_io() new %s flush_gen=%llu\n",
+ dev_state->name,
+ (unsigned long long)
+ dev_state->last_flush_gen);
+ }
+ if (block->submit_bio_bh_rw & REQ_FUA)
+ block->flush_gen = 0; /* FUA completed means block is
+ * on disk */
+ block->is_iodone = 1; /* for FLUSH, this releases the block */
+ block = next_block;
+ } while (NULL != block);
+
+ bp->bi_end_io(bp, bio_error_status);
+}
+
+static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate)
+{
+ struct btrfsic_block *block = (struct btrfsic_block *)bh->b_private;
+ int iodone_w_error = !uptodate;
+ struct btrfsic_dev_state *dev_state;
+
+ BUG_ON(NULL == block);
+ dev_state = block->dev_state;
+ if ((dev_state->state->print_mask & BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
+ printk(KERN_INFO
+ "bh_end_io(error=%d) for %c @%llu (%s/%llu/%d)\n",
+ iodone_w_error,
+ btrfsic_get_block_type(dev_state->state, block),
+ (unsigned long long)block->logical_bytenr,
+ block->dev_state->name,
+ (unsigned long long)block->dev_bytenr,
+ block->mirror_num);
+
+ block->iodone_w_error = iodone_w_error;
+ if (block->submit_bio_bh_rw & REQ_FLUSH) {
+ dev_state->last_flush_gen++;
+ if ((dev_state->state->print_mask &
+ BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
+ printk(KERN_INFO
+ "bh_end_io() new %s flush_gen=%llu\n",
+ dev_state->name,
+ (unsigned long long)dev_state->last_flush_gen);
+ }
+ if (block->submit_bio_bh_rw & REQ_FUA)
+ block->flush_gen = 0; /* FUA completed means block is on disk */
+
+ bh->b_private = block->orig_bio_bh_private;
+ bh->b_end_io = block->orig_bio_bh_end_io.bh;
+ block->is_iodone = 1; /* for FLUSH, this releases the block */
+ bh->b_end_io(bh, uptodate);
+}
+
+static int btrfsic_process_written_superblock(
+ struct btrfsic_state *state,
+ struct btrfsic_block *const superblock,
+ struct btrfs_super_block *const super_hdr)
+{
+ int pass;
+
+ superblock->generation = btrfs_super_generation(super_hdr);
+ if (!(superblock->generation > state->max_superblock_generation ||
+ 0 == state->max_superblock_generation)) {
+ if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
+ printk(KERN_INFO
+ "btrfsic: superblock @%llu (%s/%llu/%d)"
+ " with old gen %llu <= %llu\n",
+ (unsigned long long)superblock->logical_bytenr,
+ superblock->dev_state->name,
+ (unsigned long long)superblock->dev_bytenr,
+ superblock->mirror_num,
+ (unsigned long long)
+ btrfs_super_generation(super_hdr),
+ (unsigned long long)
+ state->max_superblock_generation);
+ } else {
+ if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
+ printk(KERN_INFO
+ "btrfsic: got new superblock @%llu (%s/%llu/%d)"
+ " with new gen %llu > %llu\n",
+ (unsigned long long)superblock->logical_bytenr,
+ superblock->dev_state->name,
+ (unsigned long long)superblock->dev_bytenr,
+ superblock->mirror_num,
+ (unsigned long long)
+ btrfs_super_generation(super_hdr),
+ (unsigned long long)
+ state->max_superblock_generation);
+
+ state->max_superblock_generation =
+ btrfs_super_generation(super_hdr);
+ state->latest_superblock = superblock;
+ }
+
+ for (pass = 0; pass < 3; pass++) {
+ int ret;
+ u64 next_bytenr;
+ struct btrfsic_block *next_block;
+ struct btrfsic_block_data_ctx tmp_next_block_ctx;
+ struct btrfsic_block_link *l;
+ int num_copies;
+ int mirror_num;
+ const char *additional_string = NULL;
+ struct btrfs_disk_key tmp_disk_key;
+
+ tmp_disk_key.type = BTRFS_ROOT_ITEM_KEY;
+ tmp_disk_key.offset = 0;
+
+ switch (pass) {
+ case 0:
+ tmp_disk_key.objectid =
+ cpu_to_le64(BTRFS_ROOT_TREE_OBJECTID);
+ additional_string = "root ";
+ next_bytenr = btrfs_super_root(super_hdr);
+ if (state->print_mask &
+ BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
+ printk(KERN_INFO "root@%llu\n",
+ (unsigned long long)next_bytenr);
+ break;
+ case 1:
+ tmp_disk_key.objectid =
+ cpu_to_le64(BTRFS_CHUNK_TREE_OBJECTID);
+ additional_string = "chunk ";
+ next_bytenr = btrfs_super_chunk_root(super_hdr);
+ if (state->print_mask &
+ BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
+ printk(KERN_INFO "chunk@%llu\n",
+ (unsigned long long)next_bytenr);
+ break;
+ case 2:
+ tmp_disk_key.objectid =
+ cpu_to_le64(BTRFS_TREE_LOG_OBJECTID);
+ additional_string = "log ";
+ next_bytenr = btrfs_super_log_root(super_hdr);
+ if (0 == next_bytenr)
+ continue;
+ if (state->print_mask &
+ BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
+ printk(KERN_INFO "log@%llu\n",
+ (unsigned long long)next_bytenr);
+ break;
+ }
+
+ num_copies =
+ btrfs_num_copies(&state->root->fs_info->mapping_tree,
+ next_bytenr, BTRFS_SUPER_INFO_SIZE);
+ if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
+ printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
+ (unsigned long long)next_bytenr, num_copies);
+ for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
+ int was_created;
+
+ if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
+ printk(KERN_INFO
+ "btrfsic_process_written_superblock("
+ "mirror_num=%d)\n", mirror_num);
+ ret = btrfsic_map_block(state, next_bytenr,
+ BTRFS_SUPER_INFO_SIZE,
+ &tmp_next_block_ctx,
+ mirror_num);
+ if (ret) {
+ printk(KERN_INFO
+ "btrfsic: btrfsic_map_block(@%llu,"
+ " mirror=%d) failed!\n",
+ (unsigned long long)next_bytenr,
+ mirror_num);
+ return -1;
+ }
+
+ next_block = btrfsic_block_lookup_or_add(
+ state,
+ &tmp_next_block_ctx,
+ additional_string,
+ 1, 0, 1,
+ mirror_num,
+ &was_created);
+ if (NULL == next_block) {
+ printk(KERN_INFO
+ "btrfsic: error, kmalloc failed!\n");
+ btrfsic_release_block_ctx(&tmp_next_block_ctx);
+ return -1;
+ }
+
+ next_block->disk_key = tmp_disk_key;
+ if (was_created)
+ next_block->generation =
+ BTRFSIC_GENERATION_UNKNOWN;
+ l = btrfsic_block_link_lookup_or_add(
+ state,
+ &tmp_next_block_ctx,
+ next_block,
+ superblock,
+ BTRFSIC_GENERATION_UNKNOWN);
+ btrfsic_release_block_ctx(&tmp_next_block_ctx);
+ if (NULL == l)
+ return -1;
+ }
+ }
+
+ if (-1 == btrfsic_check_all_ref_blocks(state, superblock, 0)) {
+ WARN_ON(1);
+ btrfsic_dump_tree(state);
+ }
+
+ return 0;
+}
+
+static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state,
+ struct btrfsic_block *const block,
+ int recursion_level)
+{
+ struct list_head *elem_ref_to;
+ int ret = 0;
+
+ if (recursion_level >= 3 + BTRFS_MAX_LEVEL) {
+ /*
+ * Note that this situation can happen and does not
+ * indicate an error in regular cases. It happens
+ * when disk blocks are freed and later reused.
+ * The check-integrity module is not aware of any
+ * block free operations, it just recognizes block
+ * write operations. Therefore it keeps the linkage
+ * information for a block until a block is
+ * rewritten. This can temporarily cause incorrect
+ * and even circular linkage informations. This
+ * causes no harm unless such blocks are referenced
+ * by the most recent super block.
+ */
+ if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
+ printk(KERN_INFO
+ "btrfsic: abort cyclic linkage (case 1).\n");
+
+ return ret;
+ }
+
+ /*
+ * This algorithm is recursive because the amount of used stack
+ * space is very small and the max recursion depth is limited.
+ */
+ list_for_each(elem_ref_to, &block->ref_to_list) {
+ const struct btrfsic_block_link *const l =
+ list_entry(elem_ref_to, struct btrfsic_block_link,
+ node_ref_to);
+
+ if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
+ printk(KERN_INFO
+ "rl=%d, %c @%llu (%s/%llu/%d)"
+ " %u* refers to %c @%llu (%s/%llu/%d)\n",
+ recursion_level,
+ btrfsic_get_block_type(state, block),
+ (unsigned long long)block->logical_bytenr,
+ block->dev_state->name,
+ (unsigned long long)block->dev_bytenr,
+ block->mirror_num,
+ l->ref_cnt,
+ btrfsic_get_block_type(state, l->block_ref_to),
+ (unsigned long long)
+ l->block_ref_to->logical_bytenr,
+ l->block_ref_to->dev_state->name,
+ (unsigned long long)l->block_ref_to->dev_bytenr,
+ l->block_ref_to->mirror_num);
+ if (l->block_ref_to->never_written) {
+ printk(KERN_INFO "btrfs: attempt to write superblock"
+ " which references block %c @%llu (%s/%llu/%d)"
+ " which is never written!\n",
+ btrfsic_get_block_type(state, l->block_ref_to),
+ (unsigned long long)
+ l->block_ref_to->logical_bytenr,
+ l->block_ref_to->dev_state->name,
+ (unsigned long long)l->block_ref_to->dev_bytenr,
+ l->block_ref_to->mirror_num);
+ ret = -1;
+ } else if (!l->block_ref_to->is_iodone) {
+ printk(KERN_INFO "btrfs: attempt to write superblock"
+ " which references block %c @%llu (%s/%llu/%d)"
+ " which is not yet iodone!\n",
+ btrfsic_get_block_type(state, l->block_ref_to),
+ (unsigned long long)
+ l->block_ref_to->logical_bytenr,
+ l->block_ref_to->dev_state->name,
+ (unsigned long long)l->block_ref_to->dev_bytenr,
+ l->block_ref_to->mirror_num);
+ ret = -1;
+ } else if (l->parent_generation !=
+ l->block_ref_to->generation &&
+ BTRFSIC_GENERATION_UNKNOWN !=
+ l->parent_generation &&
+ BTRFSIC_GENERATION_UNKNOWN !=
+ l->block_ref_to->generation) {
+ printk(KERN_INFO "btrfs: attempt to write superblock"
+ " which references block %c @%llu (%s/%llu/%d)"
+ " with generation %llu !="
+ " parent generation %llu!\n",
+ btrfsic_get_block_type(state, l->block_ref_to),
+ (unsigned long long)
+ l->block_ref_to->logical_bytenr,
+ l->block_ref_to->dev_state->name,
+ (unsigned long long)l->block_ref_to->dev_bytenr,
+ l->block_ref_to->mirror_num,
+ (unsigned long long)l->block_ref_to->generation,
+ (unsigned long long)l->parent_generation);
+ ret = -1;
+ } else if (l->block_ref_to->flush_gen >
+ l->block_ref_to->dev_state->last_flush_gen) {
+ printk(KERN_INFO "btrfs: attempt to write superblock"
+ " which references block %c @%llu (%s/%llu/%d)"
+ " which is not flushed out of disk's write cache"
+ " (block flush_gen=%llu,"
+ " dev->flush_gen=%llu)!\n",
+ btrfsic_get_block_type(state, l->block_ref_to),
+ (unsigned long long)
+ l->block_ref_to->logical_bytenr,
+ l->block_ref_to->dev_state->name,
+ (unsigned long long)l->block_ref_to->dev_bytenr,
+ l->block_ref_to->mirror_num,
+ (unsigned long long)block->flush_gen,
+ (unsigned long long)
+ l->block_ref_to->dev_state->last_flush_gen);
+ ret = -1;
+ } else if (-1 == btrfsic_check_all_ref_blocks(state,
+ l->block_ref_to,
+ recursion_level +
+ 1)) {
+ ret = -1;
+ }
+ }
+
+ return ret;
+}
+
+static int btrfsic_is_block_ref_by_superblock(
+ const struct btrfsic_state *state,
+ const struct btrfsic_block *block,
+ int recursion_level)
+{
+ struct list_head *elem_ref_from;
+
+ if (recursion_level >= 3 + BTRFS_MAX_LEVEL) {
+ /* refer to comment at "abort cyclic linkage (case 1)" */
+ if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
+ printk(KERN_INFO
+ "btrfsic: abort cyclic linkage (case 2).\n");
+
+ return 0;
+ }
+
+ /*
+ * This algorithm is recursive because the amount of used stack space
+ * is very small and the max recursion depth is limited.
+ */
+ list_for_each(elem_ref_from, &block->ref_from_list) {
+ const struct btrfsic_block_link *const l =
+ list_entry(elem_ref_from, struct btrfsic_block_link,
+ node_ref_from);
+
+ if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
+ printk(KERN_INFO
+ "rl=%d, %c @%llu (%s/%llu/%d)"
+ " is ref %u* from %c @%llu (%s/%llu/%d)\n",
+ recursion_level,
+ btrfsic_get_block_type(state, block),
+ (unsigned long long)block->logical_bytenr,
+ block->dev_state->name,
+ (unsigned long long)block->dev_bytenr,
+ block->mirror_num,
+ l->ref_cnt,
+ btrfsic_get_block_type(state, l->block_ref_from),
+ (unsigned long long)
+ l->block_ref_from->logical_bytenr,
+ l->block_ref_from->dev_state->name,
+ (unsigned long long)
+ l->block_ref_from->dev_bytenr,
+ l->block_ref_from->mirror_num);
+ if (l->block_ref_from->is_superblock &&
+ state->latest_superblock->dev_bytenr ==
+ l->block_ref_from->dev_bytenr &&
+ state->latest_superblock->dev_state->bdev ==
+ l->block_ref_from->dev_state->bdev)
+ return 1;
+ else if (btrfsic_is_block_ref_by_superblock(state,
+ l->block_ref_from,
+ recursion_level +
+ 1))
+ return 1;
+ }
+
+ return 0;
+}
+
+static void btrfsic_print_add_link(const struct btrfsic_state *state,
+ const struct btrfsic_block_link *l)
+{
+ printk(KERN_INFO
+ "Add %u* link from %c @%llu (%s/%llu/%d)"
+ " to %c @%llu (%s/%llu/%d).\n",
+ l->ref_cnt,
+ btrfsic_get_block_type(state, l->block_ref_from),
+ (unsigned long long)l->block_ref_from->logical_bytenr,
+ l->block_ref_from->dev_state->name,
+ (unsigned long long)l->block_ref_from->dev_bytenr,
+ l->block_ref_from->mirror_num,
+ btrfsic_get_block_type(state, l->block_ref_to),
+ (unsigned long long)l->block_ref_to->logical_bytenr,
+ l->block_ref_to->dev_state->name,
+ (unsigned long long)l->block_ref_to->dev_bytenr,
+ l->block_ref_to->mirror_num);
+}
+
+static void btrfsic_print_rem_link(const struct btrfsic_state *state,
+ const struct btrfsic_block_link *l)
+{
+ printk(KERN_INFO
+ "Rem %u* link from %c @%llu (%s/%llu/%d)"
+ " to %c @%llu (%s/%llu/%d).\n",
+ l->ref_cnt,
+ btrfsic_get_block_type(state, l->block_ref_from),
+ (unsigned long long)l->block_ref_from->logical_bytenr,
+ l->block_ref_from->dev_state->name,
+ (unsigned long long)l->block_ref_from->dev_bytenr,
+ l->block_ref_from->mirror_num,
+ btrfsic_get_block_type(state, l->block_ref_to),
+ (unsigned long long)l->block_ref_to->logical_bytenr,
+ l->block_ref_to->dev_state->name,
+ (unsigned long long)l->block_ref_to->dev_bytenr,
+ l->block_ref_to->mirror_num);
+}
+
+static char btrfsic_get_block_type(const struct btrfsic_state *state,
+ const struct btrfsic_block *block)
+{
+ if (block->is_superblock &&
+ state->latest_superblock->dev_bytenr == block->dev_bytenr &&
+ state->latest_superblock->dev_state->bdev == block->dev_state->bdev)
+ return 'S';
+ else if (block->is_superblock)
+ return 's';
+ else if (block->is_metadata)
+ return 'M';
+ else
+ return 'D';
+}
+
+static void btrfsic_dump_tree(const struct btrfsic_state *state)
+{
+ btrfsic_dump_tree_sub(state, state->latest_superblock, 0);
+}
+
+static void btrfsic_dump_tree_sub(const struct btrfsic_state *state,
+ const struct btrfsic_block *block,
+ int indent_level)
+{
+ struct list_head *elem_ref_to;
+ int indent_add;
+ static char buf[80];
+ int cursor_position;
+
+ /*
+ * Should better fill an on-stack buffer with a complete line and
+ * dump it at once when it is time to print a newline character.
+ */
+
+ /*
+ * This algorithm is recursive because the amount of used stack space
+ * is very small and the max recursion depth is limited.
+ */
+ indent_add = sprintf(buf, "%c-%llu(%s/%llu/%d)",
+ btrfsic_get_block_type(state, block),
+ (unsigned long long)block->logical_bytenr,
+ block->dev_state->name,
+ (unsigned long long)block->dev_bytenr,
+ block->mirror_num);
+ if (indent_level + indent_add > BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL) {
+ printk("[...]\n");
+ return;
+ }
+ printk(buf);
+ indent_level += indent_add;
+ if (list_empty(&block->ref_to_list)) {
+ printk("\n");
+ return;
+ }
+ if (block->mirror_num > 1 &&
+ !(state->print_mask & BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS)) {
+ printk(" [...]\n");
+ return;
+ }
+
+ cursor_position = indent_level;
+ list_for_each(elem_ref_to, &block->ref_to_list) {
+ const struct btrfsic_block_link *const l =
+ list_entry(elem_ref_to, struct btrfsic_block_link,
+ node_ref_to);
+
+ while (cursor_position < indent_level) {
+ printk(" ");
+ cursor_position++;
+ }
+ if (l->ref_cnt > 1)
+ indent_add = sprintf(buf, " %d*--> ", l->ref_cnt);
+ else
+ indent_add = sprintf(buf, " --> ");
+ if (indent_level + indent_add >
+ BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL) {
+ printk("[...]\n");
+ cursor_position = 0;
+ continue;
+ }
+
+ printk(buf);
+
+ btrfsic_dump_tree_sub(state, l->block_ref_to,
+ indent_level + indent_add);
+ cursor_position = 0;
+ }
+}
+
+static struct btrfsic_block_link *btrfsic_block_link_lookup_or_add(
+ struct btrfsic_state *state,
+ struct btrfsic_block_data_ctx *next_block_ctx,
+ struct btrfsic_block *next_block,
+ struct btrfsic_block *from_block,
+ u64 parent_generation)
+{
+ struct btrfsic_block_link *l;
+
+ l = btrfsic_block_link_hashtable_lookup(next_block_ctx->dev->bdev,
+ next_block_ctx->dev_bytenr,
+ from_block->dev_state->bdev,
+ from_block->dev_bytenr,
+ &state->block_link_hashtable);
+ if (NULL == l) {
+ l = btrfsic_block_link_alloc();
+ if (NULL == l) {
+ printk(KERN_INFO
+ "btrfsic: error, kmalloc" " failed!\n");
+ return NULL;
+ }
+
+ l->block_ref_to = next_block;
+ l->block_ref_from = from_block;
+ l->ref_cnt = 1;
+ l->parent_generation = parent_generation;
+
+ if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
+ btrfsic_print_add_link(state, l);
+
+ list_add(&l->node_ref_to, &from_block->ref_to_list);
+ list_add(&l->node_ref_from, &next_block->ref_from_list);
+
+ btrfsic_block_link_hashtable_add(l,
+ &state->block_link_hashtable);
+ } else {
+ l->ref_cnt++;
+ l->parent_generation = parent_generation;
+ if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
+ btrfsic_print_add_link(state, l);
+ }
+
+ return l;
+}
+
+static struct btrfsic_block *btrfsic_block_lookup_or_add(
+ struct btrfsic_state *state,
+ struct btrfsic_block_data_ctx *block_ctx,
+ const char *additional_string,
+ int is_metadata,
+ int is_iodone,
+ int never_written,
+ int mirror_num,
+ int *was_created)
+{
+ struct btrfsic_block *block;
+
+ block = btrfsic_block_hashtable_lookup(block_ctx->dev->bdev,
+ block_ctx->dev_bytenr,
+ &state->block_hashtable);
+ if (NULL == block) {
+ struct btrfsic_dev_state *dev_state;
+
+ block = btrfsic_block_alloc();
+ if (NULL == block) {
+ printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
+ return NULL;
+ }
+ dev_state = btrfsic_dev_state_lookup(block_ctx->dev->bdev);
+ if (NULL == dev_state) {
+ printk(KERN_INFO
+ "btrfsic: error, lookup dev_state failed!\n");
+ btrfsic_block_free(block);
+ return NULL;
+ }
+ block->dev_state = dev_state;
+ block->dev_bytenr = block_ctx->dev_bytenr;
+ block->logical_bytenr = block_ctx->start;
+ block->is_metadata = is_metadata;
+ block->is_iodone = is_iodone;
+ block->never_written = never_written;
+ block->mirror_num = mirror_num;
+ if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
+ printk(KERN_INFO
+ "New %s%c-block @%llu (%s/%llu/%d)\n",
+ additional_string,
+ btrfsic_get_block_type(state, block),
+ (unsigned long long)block->logical_bytenr,
+ dev_state->name,
+ (unsigned long long)block->dev_bytenr,
+ mirror_num);
+ list_add(&block->all_blocks_node, &state->all_blocks_list);
+ btrfsic_block_hashtable_add(block, &state->block_hashtable);
+ if (NULL != was_created)
+ *was_created = 1;
+ } else {
+ if (NULL != was_created)
+ *was_created = 0;
+ }
+
+ return block;
+}
+
+static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
+ u64 bytenr,
+ struct btrfsic_dev_state *dev_state,
+ u64 dev_bytenr)
+{
+ int num_copies;
+ int mirror_num;
+ int ret;
+ struct btrfsic_block_data_ctx block_ctx;
+ int match = 0;
+
+ num_copies = btrfs_num_copies(&state->root->fs_info->mapping_tree,
+ bytenr, state->metablock_size);
+
+ for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
+ ret = btrfsic_map_block(state, bytenr, state->metablock_size,
+ &block_ctx, mirror_num);
+ if (ret) {
+ printk(KERN_INFO "btrfsic:"
+ " btrfsic_map_block(logical @%llu,"
+ " mirror %d) failed!\n",
+ (unsigned long long)bytenr, mirror_num);
+ continue;
+ }
+
+ if (dev_state->bdev == block_ctx.dev->bdev &&
+ dev_bytenr == block_ctx.dev_bytenr) {
+ match++;
+ btrfsic_release_block_ctx(&block_ctx);
+ break;
+ }
+ btrfsic_release_block_ctx(&block_ctx);
+ }
+
+ if (!match) {
+ printk(KERN_INFO "btrfs: attempt to write M-block which contains logical bytenr that doesn't map to dev+physical bytenr of submit_bio,"
+ " buffer->log_bytenr=%llu, submit_bio(bdev=%s,"
+ " phys_bytenr=%llu)!\n",
+ (unsigned long long)bytenr, dev_state->name,
+ (unsigned long long)dev_bytenr);
+ for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
+ ret = btrfsic_map_block(state, bytenr,
+ state->metablock_size,
+ &block_ctx, mirror_num);
+ if (ret)
+ continue;
+
+ printk(KERN_INFO "Read logical bytenr @%llu maps to"
+ " (%s/%llu/%d)\n",
+ (unsigned long long)bytenr,
+ block_ctx.dev->name,
+ (unsigned long long)block_ctx.dev_bytenr,
+ mirror_num);
+ }
+ WARN_ON(1);
+ }
+}
+
+static struct btrfsic_dev_state *btrfsic_dev_state_lookup(
+ struct block_device *bdev)
+{
+ struct btrfsic_dev_state *ds;
+
+ ds = btrfsic_dev_state_hashtable_lookup(bdev,
+ &btrfsic_dev_state_hashtable);
+ return ds;
+}
+
+int btrfsic_submit_bh(int rw, struct buffer_head *bh)
+{
+ struct btrfsic_dev_state *dev_state;
+
+ if (!btrfsic_is_initialized)
+ return submit_bh(rw, bh);
+
+ mutex_lock(&btrfsic_mutex);
+ /* since btrfsic_submit_bh() might also be called before
+ * btrfsic_mount(), this might return NULL */
+ dev_state = btrfsic_dev_state_lookup(bh->b_bdev);
+
+ /* Only called to write the superblock (incl. FLUSH/FUA) */
+ if (NULL != dev_state &&
+ (rw & WRITE) && bh->b_size > 0) {
+ u64 dev_bytenr;
+
+ dev_bytenr = 4096 * bh->b_blocknr;
+ if (dev_state->state->print_mask &
+ BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
+ printk(KERN_INFO
+ "submit_bh(rw=0x%x, blocknr=%lu (bytenr %llu),"
+ " size=%lu, data=%p, bdev=%p)\n",
+ rw, (unsigned long)bh->b_blocknr,
+ (unsigned long long)dev_bytenr,
+ (unsigned long)bh->b_size, bh->b_data,
+ bh->b_bdev);
+ btrfsic_process_written_block(dev_state, dev_bytenr,
+ &bh->b_data, 1, NULL,
+ NULL, bh, rw);
+ } else if (NULL != dev_state && (rw & REQ_FLUSH)) {
+ if (dev_state->state->print_mask &
+ BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
+ printk(KERN_INFO
+ "submit_bh(rw=0x%x FLUSH, bdev=%p)\n",
+ rw, bh->b_bdev);
+ if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) {
+ if ((dev_state->state->print_mask &
+ (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
+ BTRFSIC_PRINT_MASK_VERBOSE)))
+ printk(KERN_INFO
+ "btrfsic_submit_bh(%s) with FLUSH"
+ " but dummy block already in use"
+ " (ignored)!\n",
+ dev_state->name);
+ } else {
+ struct btrfsic_block *const block =
+ &dev_state->dummy_block_for_bio_bh_flush;
+
+ block->is_iodone = 0;
+ block->never_written = 0;
+ block->iodone_w_error = 0;
+ block->flush_gen = dev_state->last_flush_gen + 1;
+ block->submit_bio_bh_rw = rw;
+ block->orig_bio_bh_private = bh->b_private;
+ block->orig_bio_bh_end_io.bh = bh->b_end_io;
+ block->next_in_same_bio = NULL;
+ bh->b_private = block;
+ bh->b_end_io = btrfsic_bh_end_io;
+ }
+ }
+ mutex_unlock(&btrfsic_mutex);
+ return submit_bh(rw, bh);
+}
+
+void btrfsic_submit_bio(int rw, struct bio *bio)
+{
+ struct btrfsic_dev_state *dev_state;
+
+ if (!btrfsic_is_initialized) {
+ submit_bio(rw, bio);
+ return;
+ }
+
+ mutex_lock(&btrfsic_mutex);
+ /* since btrfsic_submit_bio() is also called before
+ * btrfsic_mount(), this might return NULL */
+ dev_state = btrfsic_dev_state_lookup(bio->bi_bdev);
+ if (NULL != dev_state &&
+ (rw & WRITE) && NULL != bio->bi_io_vec) {
+ unsigned int i;
+ u64 dev_bytenr;
+ int bio_is_patched;
+ char **mapped_datav;
+
+ dev_bytenr = 512 * bio->bi_sector;
+ bio_is_patched = 0;
+ if (dev_state->state->print_mask &
+ BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
+ printk(KERN_INFO
+ "submit_bio(rw=0x%x, bi_vcnt=%u,"
+ " bi_sector=%lu (bytenr %llu), bi_bdev=%p)\n",
+ rw, bio->bi_vcnt, (unsigned long)bio->bi_sector,
+ (unsigned long long)dev_bytenr,
+ bio->bi_bdev);
+
+ mapped_datav = kmalloc(sizeof(*mapped_datav) * bio->bi_vcnt,
+ GFP_NOFS);
+ if (!mapped_datav)
+ goto leave;
+ for (i = 0; i < bio->bi_vcnt; i++) {
+ BUG_ON(bio->bi_io_vec[i].bv_len != PAGE_CACHE_SIZE);
+ mapped_datav[i] = kmap(bio->bi_io_vec[i].bv_page);
+ if (!mapped_datav[i]) {
+ while (i > 0) {
+ i--;
+ kunmap(bio->bi_io_vec[i].bv_page);
+ }
+ kfree(mapped_datav);
+ goto leave;
+ }
+ if ((BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
+ BTRFSIC_PRINT_MASK_VERBOSE) ==
+ (dev_state->state->print_mask &
+ (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
+ BTRFSIC_PRINT_MASK_VERBOSE)))
+ printk(KERN_INFO
+ "#%u: page=%p, len=%u, offset=%u\n",
+ i, bio->bi_io_vec[i].bv_page,
+ bio->bi_io_vec[i].bv_len,
+ bio->bi_io_vec[i].bv_offset);
+ }
+ btrfsic_process_written_block(dev_state, dev_bytenr,
+ mapped_datav, bio->bi_vcnt,
+ bio, &bio_is_patched,
+ NULL, rw);
+ while (i > 0) {
+ i--;
+ kunmap(bio->bi_io_vec[i].bv_page);
+ }
+ kfree(mapped_datav);
+ } else if (NULL != dev_state && (rw & REQ_FLUSH)) {
+ if (dev_state->state->print_mask &
+ BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
+ printk(KERN_INFO
+ "submit_bio(rw=0x%x FLUSH, bdev=%p)\n",
+ rw, bio->bi_bdev);
+ if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) {
+ if ((dev_state->state->print_mask &
+ (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
+ BTRFSIC_PRINT_MASK_VERBOSE)))
+ printk(KERN_INFO
+ "btrfsic_submit_bio(%s) with FLUSH"
+ " but dummy block already in use"
+ " (ignored)!\n",
+ dev_state->name);
+ } else {
+ struct btrfsic_block *const block =
+ &dev_state->dummy_block_for_bio_bh_flush;
+
+ block->is_iodone = 0;
+ block->never_written = 0;
+ block->iodone_w_error = 0;
+ block->flush_gen = dev_state->last_flush_gen + 1;
+ block->submit_bio_bh_rw = rw;
+ block->orig_bio_bh_private = bio->bi_private;
+ block->orig_bio_bh_end_io.bio = bio->bi_end_io;
+ block->next_in_same_bio = NULL;
+ bio->bi_private = block;
+ bio->bi_end_io = btrfsic_bio_end_io;
+ }
+ }
+leave:
+ mutex_unlock(&btrfsic_mutex);
+
+ submit_bio(rw, bio);
+}
+
+int btrfsic_mount(struct btrfs_root *root,
+ struct btrfs_fs_devices *fs_devices,
+ int including_extent_data, u32 print_mask)
+{
+ int ret;
+ struct btrfsic_state *state;
+ struct list_head *dev_head = &fs_devices->devices;
+ struct btrfs_device *device;
+
+ if (root->nodesize != root->leafsize) {
+ printk(KERN_INFO
+ "btrfsic: cannot handle nodesize %d != leafsize %d!\n",
+ root->nodesize, root->leafsize);
+ return -1;
+ }
+ if (root->nodesize & ((u64)PAGE_CACHE_SIZE - 1)) {
+ printk(KERN_INFO
+ "btrfsic: cannot handle nodesize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n",
+ root->nodesize, (unsigned long)PAGE_CACHE_SIZE);
+ return -1;
+ }
+ if (root->leafsize & ((u64)PAGE_CACHE_SIZE - 1)) {
+ printk(KERN_INFO
+ "btrfsic: cannot handle leafsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n",
+ root->leafsize, (unsigned long)PAGE_CACHE_SIZE);
+ return -1;
+ }
+ if (root->sectorsize & ((u64)PAGE_CACHE_SIZE - 1)) {
+ printk(KERN_INFO
+ "btrfsic: cannot handle sectorsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n",
+ root->sectorsize, (unsigned long)PAGE_CACHE_SIZE);
+ return -1;
+ }
+ state = kzalloc(sizeof(*state), GFP_NOFS);
+ if (NULL == state) {
+ printk(KERN_INFO "btrfs check-integrity: kmalloc() failed!\n");
+ return -1;
+ }
+
+ if (!btrfsic_is_initialized) {
+ mutex_init(&btrfsic_mutex);
+ btrfsic_dev_state_hashtable_init(&btrfsic_dev_state_hashtable);
+ btrfsic_is_initialized = 1;
+ }
+ mutex_lock(&btrfsic_mutex);
+ state->root = root;
+ state->print_mask = print_mask;
+ state->include_extent_data = including_extent_data;
+ state->csum_size = 0;
+ state->metablock_size = root->nodesize;
+ state->datablock_size = root->sectorsize;
+ INIT_LIST_HEAD(&state->all_blocks_list);
+ btrfsic_block_hashtable_init(&state->block_hashtable);
+ btrfsic_block_link_hashtable_init(&state->block_link_hashtable);
+ state->max_superblock_generation = 0;
+ state->latest_superblock = NULL;
+
+ list_for_each_entry(device, dev_head, dev_list) {
+ struct btrfsic_dev_state *ds;
+ char *p;
+
+ if (!device->bdev || !device->name)
+ continue;
+
+ ds = btrfsic_dev_state_alloc();
+ if (NULL == ds) {
+ printk(KERN_INFO
+ "btrfs check-integrity: kmalloc() failed!\n");
+ mutex_unlock(&btrfsic_mutex);
+ return -1;
+ }
+ ds->bdev = device->bdev;
+ ds->state = state;
+ bdevname(ds->bdev, ds->name);
+ ds->name[BDEVNAME_SIZE - 1] = '\0';
+ for (p = ds->name; *p != '\0'; p++);
+ while (p > ds->name && *p != '/')
+ p--;
+ if (*p == '/')
+ p++;
+ strlcpy(ds->name, p, sizeof(ds->name));
+ btrfsic_dev_state_hashtable_add(ds,
+ &btrfsic_dev_state_hashtable);
+ }
+
+ ret = btrfsic_process_superblock(state, fs_devices);
+ if (0 != ret) {
+ mutex_unlock(&btrfsic_mutex);
+ btrfsic_unmount(root, fs_devices);
+ return ret;
+ }
+
+ if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_DATABASE)
+ btrfsic_dump_database(state);
+ if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_TREE)
+ btrfsic_dump_tree(state);
+
+ mutex_unlock(&btrfsic_mutex);
+ return 0;
+}
+
+void btrfsic_unmount(struct btrfs_root *root,
+ struct btrfs_fs_devices *fs_devices)
+{
+ struct list_head *elem_all;
+ struct list_head *tmp_all;
+ struct btrfsic_state *state;
+ struct list_head *dev_head = &fs_devices->devices;
+ struct btrfs_device *device;
+
+ if (!btrfsic_is_initialized)
+ return;
+
+ mutex_lock(&btrfsic_mutex);
+
+ state = NULL;
+ list_for_each_entry(device, dev_head, dev_list) {
+ struct btrfsic_dev_state *ds;
+
+ if (!device->bdev || !device->name)
+ continue;
+
+ ds = btrfsic_dev_state_hashtable_lookup(
+ device->bdev,
+ &btrfsic_dev_state_hashtable);
+ if (NULL != ds) {
+ state = ds->state;
+ btrfsic_dev_state_hashtable_remove(ds);
+ btrfsic_dev_state_free(ds);
+ }
+ }
+
+ if (NULL == state) {
+ printk(KERN_INFO
+ "btrfsic: error, cannot find state information"
+ " on umount!\n");
+ mutex_unlock(&btrfsic_mutex);
+ return;
+ }
+
+ /*
+ * Don't care about keeping the lists' state up to date,
+ * just free all memory that was allocated dynamically.
+ * Free the blocks and the block_links.
+ */
+ list_for_each_safe(elem_all, tmp_all, &state->all_blocks_list) {
+ struct btrfsic_block *const b_all =
+ list_entry(elem_all, struct btrfsic_block,
+ all_blocks_node);
+ struct list_head *elem_ref_to;
+ struct list_head *tmp_ref_to;
+
+ list_for_each_safe(elem_ref_to, tmp_ref_to,
+ &b_all->ref_to_list) {
+ struct btrfsic_block_link *const l =
+ list_entry(elem_ref_to,
+ struct btrfsic_block_link,
+ node_ref_to);
+
+ if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
+ btrfsic_print_rem_link(state, l);
+
+ l->ref_cnt--;
+ if (0 == l->ref_cnt)
+ btrfsic_block_link_free(l);
+ }
+
+ if (b_all->is_iodone || b_all->never_written)
+ btrfsic_block_free(b_all);
+ else
+ printk(KERN_INFO "btrfs: attempt to free %c-block"
+ " @%llu (%s/%llu/%d) on umount which is"
+ " not yet iodone!\n",
+ btrfsic_get_block_type(state, b_all),
+ (unsigned long long)b_all->logical_bytenr,
+ b_all->dev_state->name,
+ (unsigned long long)b_all->dev_bytenr,
+ b_all->mirror_num);
+ }
+
+ mutex_unlock(&btrfsic_mutex);
+
+ kfree(state);
+}
diff --git a/fs/btrfs/check-integrity.h b/fs/btrfs/check-integrity.h
new file mode 100644
index 00000000000..8b59175cc50
--- /dev/null
+++ b/fs/btrfs/check-integrity.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) STRATO AG 2011. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#if !defined(__BTRFS_CHECK_INTEGRITY__)
+#define __BTRFS_CHECK_INTEGRITY__
+
+#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
+int btrfsic_submit_bh(int rw, struct buffer_head *bh);
+void btrfsic_submit_bio(int rw, struct bio *bio);
+#else
+#define btrfsic_submit_bh submit_bh
+#define btrfsic_submit_bio submit_bio
+#endif
+
+int btrfsic_mount(struct btrfs_root *root,
+ struct btrfs_fs_devices *fs_devices,
+ int including_extent_data, u32 print_mask);
+void btrfsic_unmount(struct btrfs_root *root,
+ struct btrfs_fs_devices *fs_devices);
+
+#endif
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 14f1c5a0b2d..86eff48dab7 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -120,10 +120,10 @@ static int check_compressed_csum(struct inode *inode,
page = cb->compressed_pages[i];
csum = ~(u32)0;
- kaddr = kmap_atomic(page, KM_USER0);
+ kaddr = kmap_atomic(page);
csum = btrfs_csum_data(root, kaddr, csum, PAGE_CACHE_SIZE);
btrfs_csum_final(csum, (char *)&csum);
- kunmap_atomic(kaddr, KM_USER0);
+ kunmap_atomic(kaddr);
if (csum != *cb_sum) {
printk(KERN_INFO "btrfs csum failed ino %llu "
@@ -226,8 +226,8 @@ out:
* Clear the writeback bits on all of the file
* pages for a compressed write
*/
-static noinline int end_compressed_writeback(struct inode *inode, u64 start,
- unsigned long ram_size)
+static noinline void end_compressed_writeback(struct inode *inode, u64 start,
+ unsigned long ram_size)
{
unsigned long index = start >> PAGE_CACHE_SHIFT;
unsigned long end_index = (start + ram_size - 1) >> PAGE_CACHE_SHIFT;
@@ -253,7 +253,6 @@ static noinline int end_compressed_writeback(struct inode *inode, u64 start,
index += ret;
}
/* the inode may be gone now */
- return 0;
}
/*
@@ -392,20 +391,21 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
*/
atomic_inc(&cb->pending_bios);
ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
- BUG_ON(ret);
+ BUG_ON(ret); /* -ENOMEM */
if (!skip_sum) {
ret = btrfs_csum_one_bio(root, inode, bio,
start, 1);
- BUG_ON(ret);
+ BUG_ON(ret); /* -ENOMEM */
}
ret = btrfs_map_bio(root, WRITE, bio, 0, 1);
- BUG_ON(ret);
+ BUG_ON(ret); /* -ENOMEM */
bio_put(bio);
bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS);
+ BUG_ON(!bio);
bio->bi_private = cb;
bio->bi_end_io = end_compressed_bio_write;
bio_add_page(bio, page, PAGE_CACHE_SIZE, 0);
@@ -421,15 +421,15 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
bio_get(bio);
ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
- BUG_ON(ret);
+ BUG_ON(ret); /* -ENOMEM */
if (!skip_sum) {
ret = btrfs_csum_one_bio(root, inode, bio, start, 1);
- BUG_ON(ret);
+ BUG_ON(ret); /* -ENOMEM */
}
ret = btrfs_map_bio(root, WRITE, bio, 0, 1);
- BUG_ON(ret);
+ BUG_ON(ret); /* -ENOMEM */
bio_put(bio);
return 0;
@@ -497,7 +497,7 @@ static noinline int add_ra_bio_pages(struct inode *inode,
* sure they map to this compressed extent on disk.
*/
set_page_extent_mapped(page);
- lock_extent(tree, last_offset, end, GFP_NOFS);
+ lock_extent(tree, last_offset, end);
read_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, last_offset,
PAGE_CACHE_SIZE);
@@ -507,7 +507,7 @@ static noinline int add_ra_bio_pages(struct inode *inode,
(last_offset + PAGE_CACHE_SIZE > extent_map_end(em)) ||
(em->block_start >> 9) != cb->orig_bio->bi_sector) {
free_extent_map(em);
- unlock_extent(tree, last_offset, end, GFP_NOFS);
+ unlock_extent(tree, last_offset, end);
unlock_page(page);
page_cache_release(page);
break;
@@ -521,10 +521,10 @@ static noinline int add_ra_bio_pages(struct inode *inode,
if (zero_offset) {
int zeros;
zeros = PAGE_CACHE_SIZE - zero_offset;
- userpage = kmap_atomic(page, KM_USER0);
+ userpage = kmap_atomic(page);
memset(userpage + zero_offset, 0, zeros);
flush_dcache_page(page);
- kunmap_atomic(userpage, KM_USER0);
+ kunmap_atomic(userpage);
}
}
@@ -535,7 +535,7 @@ static noinline int add_ra_bio_pages(struct inode *inode,
nr_pages++;
page_cache_release(page);
} else {
- unlock_extent(tree, last_offset, end, GFP_NOFS);
+ unlock_extent(tree, last_offset, end);
unlock_page(page);
page_cache_release(page);
break;
@@ -588,6 +588,8 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
page_offset(bio->bi_io_vec->bv_page),
PAGE_CACHE_SIZE);
read_unlock(&em_tree->lock);
+ if (!em)
+ return -EIO;
compressed_len = em->block_len;
cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS);
@@ -660,7 +662,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
bio_get(comp_bio);
ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0);
- BUG_ON(ret);
+ BUG_ON(ret); /* -ENOMEM */
/*
* inc the count before we submit the bio so
@@ -673,19 +675,20 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
ret = btrfs_lookup_bio_sums(root, inode,
comp_bio, sums);
- BUG_ON(ret);
+ BUG_ON(ret); /* -ENOMEM */
}
sums += (comp_bio->bi_size + root->sectorsize - 1) /
root->sectorsize;
ret = btrfs_map_bio(root, READ, comp_bio,
mirror_num, 0);
- BUG_ON(ret);
+ BUG_ON(ret); /* -ENOMEM */
bio_put(comp_bio);
comp_bio = compressed_bio_alloc(bdev, cur_disk_byte,
GFP_NOFS);
+ BUG_ON(!comp_bio);
comp_bio->bi_private = cb;
comp_bio->bi_end_io = end_compressed_bio_read;
@@ -696,15 +699,15 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
bio_get(comp_bio);
ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0);
- BUG_ON(ret);
+ BUG_ON(ret); /* -ENOMEM */
if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
ret = btrfs_lookup_bio_sums(root, inode, comp_bio, sums);
- BUG_ON(ret);
+ BUG_ON(ret); /* -ENOMEM */
}
ret = btrfs_map_bio(root, READ, comp_bio, mirror_num, 0);
- BUG_ON(ret);
+ BUG_ON(ret); /* -ENOMEM */
bio_put(comp_bio);
return 0;
@@ -732,7 +735,7 @@ struct btrfs_compress_op *btrfs_compress_op[] = {
&btrfs_lzo_compress,
};
-int __init btrfs_init_compress(void)
+void __init btrfs_init_compress(void)
{
int i;
@@ -742,7 +745,6 @@ int __init btrfs_init_compress(void)
atomic_set(&comp_alloc_workspace[i], 0);
init_waitqueue_head(&comp_workspace_wait[i]);
}
- return 0;
}
/*
@@ -991,9 +993,9 @@ int btrfs_decompress_buf2page(char *buf, unsigned long buf_start,
bytes = min(PAGE_CACHE_SIZE - *pg_offset,
PAGE_CACHE_SIZE - buf_offset);
bytes = min(bytes, working_bytes);
- kaddr = kmap_atomic(page_out, KM_USER0);
+ kaddr = kmap_atomic(page_out);
memcpy(kaddr + *pg_offset, buf + buf_offset, bytes);
- kunmap_atomic(kaddr, KM_USER0);
+ kunmap_atomic(kaddr);
flush_dcache_page(page_out);
*pg_offset += bytes;
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index a12059f4f0f..9afb0a62ae8 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -19,7 +19,7 @@
#ifndef __BTRFS_COMPRESSION_
#define __BTRFS_COMPRESSION_
-int btrfs_init_compress(void);
+void btrfs_init_compress(void);
void btrfs_exit_compress(void);
int btrfs_compress_pages(int type, struct address_space *mapping,
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index dede441bdee..8206b390058 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -18,6 +18,7 @@
#include <linux/sched.h>
#include <linux/slab.h>
+#include <linux/rbtree.h>
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
@@ -36,8 +37,17 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_buffer *dst_buf,
struct extent_buffer *src_buf);
-static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
- struct btrfs_path *path, int level, int slot);
+static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+ struct btrfs_path *path, int level, int slot,
+ int tree_mod_log);
+static void tree_mod_log_free_eb(struct btrfs_fs_info *fs_info,
+ struct extent_buffer *eb);
+struct extent_buffer *read_old_tree_block(struct btrfs_root *root, u64 bytenr,
+ u32 blocksize, u64 parent_transid,
+ u64 time_seq);
+struct extent_buffer *btrfs_find_old_tree_block(struct btrfs_root *root,
+ u64 bytenr, u32 blocksize,
+ u64 time_seq);
struct btrfs_path *btrfs_alloc_path(void)
{
@@ -156,10 +166,23 @@ struct extent_buffer *btrfs_root_node(struct btrfs_root *root)
{
struct extent_buffer *eb;
- rcu_read_lock();
- eb = rcu_dereference(root->node);
- extent_buffer_get(eb);
- rcu_read_unlock();
+ while (1) {
+ rcu_read_lock();
+ eb = rcu_dereference(root->node);
+
+ /*
+ * RCU really hurts here, we could free up the root node because
+ * it was cow'ed but we may not get the new root node yet so do
+ * the inc_not_zero dance and if it doesn't work then
+ * synchronize_rcu and try again.
+ */
+ if (atomic_inc_not_zero(&eb->refs)) {
+ rcu_read_unlock();
+ break;
+ }
+ rcu_read_unlock();
+ synchronize_rcu();
+ }
return eb;
}
@@ -207,10 +230,12 @@ struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root)
*/
static void add_root_to_dirty_list(struct btrfs_root *root)
{
+ spin_lock(&root->fs_info->trans_lock);
if (root->track_dirty && list_empty(&root->dirty_list)) {
list_add(&root->dirty_list,
&root->fs_info->dirty_cowonly_roots);
}
+ spin_unlock(&root->fs_info->trans_lock);
}
/*
@@ -261,9 +286,9 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
WARN_ON(btrfs_header_generation(buf) > trans->transid);
if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
- ret = btrfs_inc_ref(trans, root, cow, 1);
+ ret = btrfs_inc_ref(trans, root, cow, 1, 1);
else
- ret = btrfs_inc_ref(trans, root, cow, 0);
+ ret = btrfs_inc_ref(trans, root, cow, 0, 1);
if (ret)
return ret;
@@ -273,6 +298,449 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
return 0;
}
+enum mod_log_op {
+ MOD_LOG_KEY_REPLACE,
+ MOD_LOG_KEY_ADD,
+ MOD_LOG_KEY_REMOVE,
+ MOD_LOG_KEY_REMOVE_WHILE_FREEING,
+ MOD_LOG_KEY_REMOVE_WHILE_MOVING,
+ MOD_LOG_MOVE_KEYS,
+ MOD_LOG_ROOT_REPLACE,
+};
+
+struct tree_mod_move {
+ int dst_slot;
+ int nr_items;
+};
+
+struct tree_mod_root {
+ u64 logical;
+ u8 level;
+};
+
+struct tree_mod_elem {
+ struct rb_node node;
+ u64 index; /* shifted logical */
+ struct seq_list elem;
+ enum mod_log_op op;
+
+ /* this is used for MOD_LOG_KEY_* and MOD_LOG_MOVE_KEYS operations */
+ int slot;
+
+ /* this is used for MOD_LOG_KEY* and MOD_LOG_ROOT_REPLACE */
+ u64 generation;
+
+ /* those are used for op == MOD_LOG_KEY_{REPLACE,REMOVE} */
+ struct btrfs_disk_key key;
+ u64 blockptr;
+
+ /* this is used for op == MOD_LOG_MOVE_KEYS */
+ struct tree_mod_move move;
+
+ /* this is used for op == MOD_LOG_ROOT_REPLACE */
+ struct tree_mod_root old_root;
+};
+
+static inline void
+__get_tree_mod_seq(struct btrfs_fs_info *fs_info, struct seq_list *elem)
+{
+ elem->seq = atomic_inc_return(&fs_info->tree_mod_seq);
+ list_add_tail(&elem->list, &fs_info->tree_mod_seq_list);
+}
+
+void btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info,
+ struct seq_list *elem)
+{
+ elem->flags = 1;
+ spin_lock(&fs_info->tree_mod_seq_lock);
+ __get_tree_mod_seq(fs_info, elem);
+ spin_unlock(&fs_info->tree_mod_seq_lock);
+}
+
+void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
+ struct seq_list *elem)
+{
+ struct rb_root *tm_root;
+ struct rb_node *node;
+ struct rb_node *next;
+ struct seq_list *cur_elem;
+ struct tree_mod_elem *tm;
+ u64 min_seq = (u64)-1;
+ u64 seq_putting = elem->seq;
+
+ if (!seq_putting)
+ return;
+
+ BUG_ON(!(elem->flags & 1));
+ spin_lock(&fs_info->tree_mod_seq_lock);
+ list_del(&elem->list);
+
+ list_for_each_entry(cur_elem, &fs_info->tree_mod_seq_list, list) {
+ if ((cur_elem->flags & 1) && cur_elem->seq < min_seq) {
+ if (seq_putting > cur_elem->seq) {
+ /*
+ * blocker with lower sequence number exists, we
+ * cannot remove anything from the log
+ */
+ goto out;
+ }
+ min_seq = cur_elem->seq;
+ }
+ }
+
+ /*
+ * anything that's lower than the lowest existing (read: blocked)
+ * sequence number can be removed from the tree.
+ */
+ write_lock(&fs_info->tree_mod_log_lock);
+ tm_root = &fs_info->tree_mod_log;
+ for (node = rb_first(tm_root); node; node = next) {
+ next = rb_next(node);
+ tm = container_of(node, struct tree_mod_elem, node);
+ if (tm->elem.seq > min_seq)
+ continue;
+ rb_erase(node, tm_root);
+ list_del(&tm->elem.list);
+ kfree(tm);
+ }
+ write_unlock(&fs_info->tree_mod_log_lock);
+out:
+ spin_unlock(&fs_info->tree_mod_seq_lock);
+}
+
+/*
+ * key order of the log:
+ * index -> sequence
+ *
+ * the index is the shifted logical of the *new* root node for root replace
+ * operations, or the shifted logical of the affected block for all other
+ * operations.
+ */
+static noinline int
+__tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
+{
+ struct rb_root *tm_root;
+ struct rb_node **new;
+ struct rb_node *parent = NULL;
+ struct tree_mod_elem *cur;
+ int ret = 0;
+
+ BUG_ON(!tm || !tm->elem.seq);
+
+ write_lock(&fs_info->tree_mod_log_lock);
+ tm_root = &fs_info->tree_mod_log;
+ new = &tm_root->rb_node;
+ while (*new) {
+ cur = container_of(*new, struct tree_mod_elem, node);
+ parent = *new;
+ if (cur->index < tm->index)
+ new = &((*new)->rb_left);
+ else if (cur->index > tm->index)
+ new = &((*new)->rb_right);
+ else if (cur->elem.seq < tm->elem.seq)
+ new = &((*new)->rb_left);
+ else if (cur->elem.seq > tm->elem.seq)
+ new = &((*new)->rb_right);
+ else {
+ kfree(tm);
+ ret = -EEXIST;
+ goto unlock;
+ }
+ }
+
+ rb_link_node(&tm->node, parent, new);
+ rb_insert_color(&tm->node, tm_root);
+unlock:
+ write_unlock(&fs_info->tree_mod_log_lock);
+ return ret;
+}
+
+static inline int tree_mod_dont_log(struct btrfs_fs_info *fs_info,
+ struct extent_buffer *eb) {
+ smp_mb();
+ if (list_empty(&(fs_info)->tree_mod_seq_list))
+ return 1;
+ if (!eb)
+ return 0;
+ if (btrfs_header_level(eb) == 0)
+ return 1;
+ return 0;
+}
+
+/*
+ * This allocates memory and gets a tree modification sequence number when
+ * needed.
+ *
+ * Returns 0 when no sequence number is needed, < 0 on error.
+ * Returns 1 when a sequence number was added. In this case,
+ * fs_info->tree_mod_seq_lock was acquired and must be released by the caller
+ * after inserting into the rb tree.
+ */
+static inline int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags,
+ struct tree_mod_elem **tm_ret)
+{
+ struct tree_mod_elem *tm;
+ int seq;
+
+ if (tree_mod_dont_log(fs_info, NULL))
+ return 0;
+
+ tm = *tm_ret = kzalloc(sizeof(*tm), flags);
+ if (!tm)
+ return -ENOMEM;
+
+ tm->elem.flags = 0;
+ spin_lock(&fs_info->tree_mod_seq_lock);
+ if (list_empty(&fs_info->tree_mod_seq_list)) {
+ /*
+ * someone emptied the list while we were waiting for the lock.
+ * we must not add to the list, because no blocker exists. items
+ * are removed from the list only when the existing blocker is
+ * removed from the list.
+ */
+ kfree(tm);
+ seq = 0;
+ spin_unlock(&fs_info->tree_mod_seq_lock);
+ } else {
+ __get_tree_mod_seq(fs_info, &tm->elem);
+ seq = tm->elem.seq;
+ }
+
+ return seq;
+}
+
+static noinline int
+tree_mod_log_insert_key_mask(struct btrfs_fs_info *fs_info,
+ struct extent_buffer *eb, int slot,
+ enum mod_log_op op, gfp_t flags)
+{
+ struct tree_mod_elem *tm;
+ int ret;
+
+ ret = tree_mod_alloc(fs_info, flags, &tm);
+ if (ret <= 0)
+ return ret;
+
+ tm->index = eb->start >> PAGE_CACHE_SHIFT;
+ if (op != MOD_LOG_KEY_ADD) {
+ btrfs_node_key(eb, &tm->key, slot);
+ tm->blockptr = btrfs_node_blockptr(eb, slot);
+ }
+ tm->op = op;
+ tm->slot = slot;
+ tm->generation = btrfs_node_ptr_generation(eb, slot);
+
+ ret = __tree_mod_log_insert(fs_info, tm);
+ spin_unlock(&fs_info->tree_mod_seq_lock);
+ return ret;
+}
+
+static noinline int
+tree_mod_log_insert_key(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
+ int slot, enum mod_log_op op)
+{
+ return tree_mod_log_insert_key_mask(fs_info, eb, slot, op, GFP_NOFS);
+}
+
+static noinline int
+tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
+ struct extent_buffer *eb, int dst_slot, int src_slot,
+ int nr_items, gfp_t flags)
+{
+ struct tree_mod_elem *tm;
+ int ret;
+ int i;
+
+ if (tree_mod_dont_log(fs_info, eb))
+ return 0;
+
+ for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) {
+ ret = tree_mod_log_insert_key(fs_info, eb, i + dst_slot,
+ MOD_LOG_KEY_REMOVE_WHILE_MOVING);
+ BUG_ON(ret < 0);
+ }
+
+ ret = tree_mod_alloc(fs_info, flags, &tm);
+ if (ret <= 0)
+ return ret;
+
+ tm->index = eb->start >> PAGE_CACHE_SHIFT;
+ tm->slot = src_slot;
+ tm->move.dst_slot = dst_slot;
+ tm->move.nr_items = nr_items;
+ tm->op = MOD_LOG_MOVE_KEYS;
+
+ ret = __tree_mod_log_insert(fs_info, tm);
+ spin_unlock(&fs_info->tree_mod_seq_lock);
+ return ret;
+}
+
+static noinline int
+tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
+ struct extent_buffer *old_root,
+ struct extent_buffer *new_root, gfp_t flags)
+{
+ struct tree_mod_elem *tm;
+ int ret;
+
+ ret = tree_mod_alloc(fs_info, flags, &tm);
+ if (ret <= 0)
+ return ret;
+
+ tm->index = new_root->start >> PAGE_CACHE_SHIFT;
+ tm->old_root.logical = old_root->start;
+ tm->old_root.level = btrfs_header_level(old_root);
+ tm->generation = btrfs_header_generation(old_root);
+ tm->op = MOD_LOG_ROOT_REPLACE;
+
+ ret = __tree_mod_log_insert(fs_info, tm);
+ spin_unlock(&fs_info->tree_mod_seq_lock);
+ return ret;
+}
+
+static struct tree_mod_elem *
+__tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq,
+ int smallest)
+{
+ struct rb_root *tm_root;
+ struct rb_node *node;
+ struct tree_mod_elem *cur = NULL;
+ struct tree_mod_elem *found = NULL;
+ u64 index = start >> PAGE_CACHE_SHIFT;
+
+ read_lock(&fs_info->tree_mod_log_lock);
+ tm_root = &fs_info->tree_mod_log;
+ node = tm_root->rb_node;
+ while (node) {
+ cur = container_of(node, struct tree_mod_elem, node);
+ if (cur->index < index) {
+ node = node->rb_left;
+ } else if (cur->index > index) {
+ node = node->rb_right;
+ } else if (cur->elem.seq < min_seq) {
+ node = node->rb_left;
+ } else if (!smallest) {
+ /* we want the node with the highest seq */
+ if (found)
+ BUG_ON(found->elem.seq > cur->elem.seq);
+ found = cur;
+ node = node->rb_left;
+ } else if (cur->elem.seq > min_seq) {
+ /* we want the node with the smallest seq */
+ if (found)
+ BUG_ON(found->elem.seq < cur->elem.seq);
+ found = cur;
+ node = node->rb_right;
+ } else {
+ found = cur;
+ break;
+ }
+ }
+ read_unlock(&fs_info->tree_mod_log_lock);
+
+ return found;
+}
+
+/*
+ * this returns the element from the log with the smallest time sequence
+ * value that's in the log (the oldest log item). any element with a time
+ * sequence lower than min_seq will be ignored.
+ */
+static struct tree_mod_elem *
+tree_mod_log_search_oldest(struct btrfs_fs_info *fs_info, u64 start,
+ u64 min_seq)
+{
+ return __tree_mod_log_search(fs_info, start, min_seq, 1);
+}
+
+/*
+ * this returns the element from the log with the largest time sequence
+ * value that's in the log (the most recent log item). any element with
+ * a time sequence lower than min_seq will be ignored.
+ */
+static struct tree_mod_elem *
+tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq)
+{
+ return __tree_mod_log_search(fs_info, start, min_seq, 0);
+}
+
+static inline void
+tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
+ struct extent_buffer *src, unsigned long dst_offset,
+ unsigned long src_offset, int nr_items)
+{
+ int ret;
+ int i;
+
+ if (tree_mod_dont_log(fs_info, NULL))
+ return;
+
+ if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0)
+ return;
+
+ /* speed this up by single seq for all operations? */
+ for (i = 0; i < nr_items; i++) {
+ ret = tree_mod_log_insert_key(fs_info, src, i + src_offset,
+ MOD_LOG_KEY_REMOVE);
+ BUG_ON(ret < 0);
+ ret = tree_mod_log_insert_key(fs_info, dst, i + dst_offset,
+ MOD_LOG_KEY_ADD);
+ BUG_ON(ret < 0);
+ }
+}
+
+static inline void
+tree_mod_log_eb_move(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
+ int dst_offset, int src_offset, int nr_items)
+{
+ int ret;
+ ret = tree_mod_log_insert_move(fs_info, dst, dst_offset, src_offset,
+ nr_items, GFP_NOFS);
+ BUG_ON(ret < 0);
+}
+
+static inline void
+tree_mod_log_set_node_key(struct btrfs_fs_info *fs_info,
+ struct extent_buffer *eb,
+ struct btrfs_disk_key *disk_key, int slot, int atomic)
+{
+ int ret;
+
+ ret = tree_mod_log_insert_key_mask(fs_info, eb, slot,
+ MOD_LOG_KEY_REPLACE,
+ atomic ? GFP_ATOMIC : GFP_NOFS);
+ BUG_ON(ret < 0);
+}
+
+static void tree_mod_log_free_eb(struct btrfs_fs_info *fs_info,
+ struct extent_buffer *eb)
+{
+ int i;
+ int ret;
+ u32 nritems;
+
+ if (tree_mod_dont_log(fs_info, eb))
+ return;
+
+ nritems = btrfs_header_nritems(eb);
+ for (i = nritems - 1; i >= 0; i--) {
+ ret = tree_mod_log_insert_key(fs_info, eb, i,
+ MOD_LOG_KEY_REMOVE_WHILE_FREEING);
+ BUG_ON(ret < 0);
+ }
+}
+
+static inline void
+tree_mod_log_set_root_pointer(struct btrfs_root *root,
+ struct extent_buffer *new_root_node)
+{
+ int ret;
+ tree_mod_log_free_eb(root->fs_info, root->node);
+ ret = tree_mod_log_insert_root(root->fs_info, root->node,
+ new_root_node, GFP_NOFS);
+ BUG_ON(ret < 0);
+}
+
/*
* check if the tree block can be shared by multiple trees
*/
@@ -331,8 +799,13 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
if (btrfs_block_can_be_shared(root, buf)) {
ret = btrfs_lookup_extent_info(trans, root, buf->start,
buf->len, &refs, &flags);
- BUG_ON(ret);
- BUG_ON(refs == 0);
+ if (ret)
+ return ret;
+ if (refs == 0) {
+ ret = -EROFS;
+ btrfs_std_error(root->fs_info, ret);
+ return ret;
+ }
} else {
refs = 1;
if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
@@ -350,44 +823,51 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
if ((owner == root->root_key.objectid ||
root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) &&
!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) {
- ret = btrfs_inc_ref(trans, root, buf, 1);
- BUG_ON(ret);
+ ret = btrfs_inc_ref(trans, root, buf, 1, 1);
+ BUG_ON(ret); /* -ENOMEM */
if (root->root_key.objectid ==
BTRFS_TREE_RELOC_OBJECTID) {
- ret = btrfs_dec_ref(trans, root, buf, 0);
- BUG_ON(ret);
- ret = btrfs_inc_ref(trans, root, cow, 1);
- BUG_ON(ret);
+ ret = btrfs_dec_ref(trans, root, buf, 0, 1);
+ BUG_ON(ret); /* -ENOMEM */
+ ret = btrfs_inc_ref(trans, root, cow, 1, 1);
+ BUG_ON(ret); /* -ENOMEM */
}
new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
} else {
if (root->root_key.objectid ==
BTRFS_TREE_RELOC_OBJECTID)
- ret = btrfs_inc_ref(trans, root, cow, 1);
+ ret = btrfs_inc_ref(trans, root, cow, 1, 1);
else
- ret = btrfs_inc_ref(trans, root, cow, 0);
- BUG_ON(ret);
+ ret = btrfs_inc_ref(trans, root, cow, 0, 1);
+ BUG_ON(ret); /* -ENOMEM */
}
if (new_flags != 0) {
ret = btrfs_set_disk_extent_flags(trans, root,
buf->start,
buf->len,
new_flags, 0);
- BUG_ON(ret);
+ if (ret)
+ return ret;
}
} else {
if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
if (root->root_key.objectid ==
BTRFS_TREE_RELOC_OBJECTID)
- ret = btrfs_inc_ref(trans, root, cow, 1);
+ ret = btrfs_inc_ref(trans, root, cow, 1, 1);
else
- ret = btrfs_inc_ref(trans, root, cow, 0);
- BUG_ON(ret);
- ret = btrfs_dec_ref(trans, root, buf, 1);
- BUG_ON(ret);
+ ret = btrfs_inc_ref(trans, root, cow, 0, 1);
+ BUG_ON(ret); /* -ENOMEM */
+ ret = btrfs_dec_ref(trans, root, buf, 1, 1);
+ BUG_ON(ret); /* -ENOMEM */
}
+ /*
+ * don't log freeing in case we're freeing the root node, this
+ * is done by tree_mod_log_set_root_pointer later
+ */
+ if (buf != root->node && btrfs_header_level(buf) != 0)
+ tree_mod_log_free_eb(root->fs_info, buf);
clean_tree_block(trans, root, buf);
*last_ref = 1;
}
@@ -415,7 +895,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
{
struct btrfs_disk_key disk_key;
struct extent_buffer *cow;
- int level;
+ int level, ret;
int last_ref = 0;
int unlock_orig = 0;
u64 parent_start;
@@ -467,7 +947,11 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
(unsigned long)btrfs_header_fsid(cow),
BTRFS_FSID_SIZE);
- update_ref_for_cow(trans, root, buf, cow, &last_ref);
+ ret = update_ref_for_cow(trans, root, buf, cow, &last_ref);
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
+ return ret;
+ }
if (root->ref_cows)
btrfs_reloc_cow_block(trans, root, buf, cow);
@@ -481,6 +965,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
parent_start = 0;
extent_buffer_get(cow);
+ tree_mod_log_set_root_pointer(root, cow);
rcu_assign_pointer(root->node, cow);
btrfs_free_tree_block(trans, root, buf, parent_start,
@@ -494,6 +979,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
parent_start = 0;
WARN_ON(trans->transid != btrfs_header_generation(parent));
+ tree_mod_log_insert_key(root->fs_info, parent, parent_slot,
+ MOD_LOG_KEY_REPLACE);
btrfs_set_node_blockptr(parent, parent_slot,
cow->start);
btrfs_set_node_ptr_generation(parent, parent_slot,
@@ -504,12 +991,235 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
}
if (unlock_orig)
btrfs_tree_unlock(buf);
- free_extent_buffer(buf);
+ free_extent_buffer_stale(buf);
btrfs_mark_buffer_dirty(cow);
*cow_ret = cow;
return 0;
}
+/*
+ * returns the logical address of the oldest predecessor of the given root.
+ * entries older than time_seq are ignored.
+ */
+static struct tree_mod_elem *
+__tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info,
+ struct btrfs_root *root, u64 time_seq)
+{
+ struct tree_mod_elem *tm;
+ struct tree_mod_elem *found = NULL;
+ u64 root_logical = root->node->start;
+ int looped = 0;
+
+ if (!time_seq)
+ return 0;
+
+ /*
+ * the very last operation that's logged for a root is the replacement
+ * operation (if it is replaced at all). this has the index of the *new*
+ * root, making it the very first operation that's logged for this root.
+ */
+ while (1) {
+ tm = tree_mod_log_search_oldest(fs_info, root_logical,
+ time_seq);
+ if (!looped && !tm)
+ return 0;
+ /*
+ * if there are no tree operation for the oldest root, we simply
+ * return it. this should only happen if that (old) root is at
+ * level 0.
+ */
+ if (!tm)
+ break;
+
+ /*
+ * if there's an operation that's not a root replacement, we
+ * found the oldest version of our root. normally, we'll find a
+ * MOD_LOG_KEY_REMOVE_WHILE_FREEING operation here.
+ */
+ if (tm->op != MOD_LOG_ROOT_REPLACE)
+ break;
+
+ found = tm;
+ root_logical = tm->old_root.logical;
+ BUG_ON(root_logical == root->node->start);
+ looped = 1;
+ }
+
+ /* if there's no old root to return, return what we found instead */
+ if (!found)
+ found = tm;
+
+ return found;
+}
+
+/*
+ * tm is a pointer to the first operation to rewind within eb. then, all
+ * previous operations will be rewinded (until we reach something older than
+ * time_seq).
+ */
+static void
+__tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq,
+ struct tree_mod_elem *first_tm)
+{
+ u32 n;
+ struct rb_node *next;
+ struct tree_mod_elem *tm = first_tm;
+ unsigned long o_dst;
+ unsigned long o_src;
+ unsigned long p_size = sizeof(struct btrfs_key_ptr);
+
+ n = btrfs_header_nritems(eb);
+ while (tm && tm->elem.seq >= time_seq) {
+ /*
+ * all the operations are recorded with the operator used for
+ * the modification. as we're going backwards, we do the
+ * opposite of each operation here.
+ */
+ switch (tm->op) {
+ case MOD_LOG_KEY_REMOVE_WHILE_FREEING:
+ BUG_ON(tm->slot < n);
+ case MOD_LOG_KEY_REMOVE_WHILE_MOVING:
+ case MOD_LOG_KEY_REMOVE:
+ btrfs_set_node_key(eb, &tm->key, tm->slot);
+ btrfs_set_node_blockptr(eb, tm->slot, tm->blockptr);
+ btrfs_set_node_ptr_generation(eb, tm->slot,
+ tm->generation);
+ n++;
+ break;
+ case MOD_LOG_KEY_REPLACE:
+ BUG_ON(tm->slot >= n);
+ btrfs_set_node_key(eb, &tm->key, tm->slot);
+ btrfs_set_node_blockptr(eb, tm->slot, tm->blockptr);
+ btrfs_set_node_ptr_generation(eb, tm->slot,
+ tm->generation);
+ break;
+ case MOD_LOG_KEY_ADD:
+ /* if a move operation is needed it's in the log */
+ n--;
+ break;
+ case MOD_LOG_MOVE_KEYS:
+ o_dst = btrfs_node_key_ptr_offset(tm->slot);
+ o_src = btrfs_node_key_ptr_offset(tm->move.dst_slot);
+ memmove_extent_buffer(eb, o_dst, o_src,
+ tm->move.nr_items * p_size);
+ break;
+ case MOD_LOG_ROOT_REPLACE:
+ /*
+ * this operation is special. for roots, this must be
+ * handled explicitly before rewinding.
+ * for non-roots, this operation may exist if the node
+ * was a root: root A -> child B; then A gets empty and
+ * B is promoted to the new root. in the mod log, we'll
+ * have a root-replace operation for B, a tree block
+ * that is no root. we simply ignore that operation.
+ */
+ break;
+ }
+ next = rb_next(&tm->node);
+ if (!next)
+ break;
+ tm = container_of(next, struct tree_mod_elem, node);
+ if (tm->index != first_tm->index)
+ break;
+ }
+ btrfs_set_header_nritems(eb, n);
+}
+
+static struct extent_buffer *
+tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
+ u64 time_seq)
+{
+ struct extent_buffer *eb_rewin;
+ struct tree_mod_elem *tm;
+
+ if (!time_seq)
+ return eb;
+
+ if (btrfs_header_level(eb) == 0)
+ return eb;
+
+ tm = tree_mod_log_search(fs_info, eb->start, time_seq);
+ if (!tm)
+ return eb;
+
+ if (tm->op == MOD_LOG_KEY_REMOVE_WHILE_FREEING) {
+ BUG_ON(tm->slot != 0);
+ eb_rewin = alloc_dummy_extent_buffer(eb->start,
+ fs_info->tree_root->nodesize);
+ BUG_ON(!eb_rewin);
+ btrfs_set_header_bytenr(eb_rewin, eb->start);
+ btrfs_set_header_backref_rev(eb_rewin,
+ btrfs_header_backref_rev(eb));
+ btrfs_set_header_owner(eb_rewin, btrfs_header_owner(eb));
+ btrfs_set_header_level(eb_rewin, btrfs_header_level(eb));
+ } else {
+ eb_rewin = btrfs_clone_extent_buffer(eb);
+ BUG_ON(!eb_rewin);
+ }
+
+ extent_buffer_get(eb_rewin);
+ free_extent_buffer(eb);
+
+ __tree_mod_log_rewind(eb_rewin, time_seq, tm);
+
+ return eb_rewin;
+}
+
+/*
+ * get_old_root() rewinds the state of @root's root node to the given @time_seq
+ * value. If there are no changes, the current root->root_node is returned. If
+ * anything changed in between, there's a fresh buffer allocated on which the
+ * rewind operations are done. In any case, the returned buffer is read locked.
+ * Returns NULL on error (with no locks held).
+ */
+static inline struct extent_buffer *
+get_old_root(struct btrfs_root *root, u64 time_seq)
+{
+ struct tree_mod_elem *tm;
+ struct extent_buffer *eb;
+ struct tree_mod_root *old_root = NULL;
+ u64 old_generation = 0;
+ u64 logical;
+
+ eb = btrfs_read_lock_root_node(root);
+ tm = __tree_mod_log_oldest_root(root->fs_info, root, time_seq);
+ if (!tm)
+ return root->node;
+
+ if (tm->op == MOD_LOG_ROOT_REPLACE) {
+ old_root = &tm->old_root;
+ old_generation = tm->generation;
+ logical = old_root->logical;
+ } else {
+ logical = root->node->start;
+ }
+
+ tm = tree_mod_log_search(root->fs_info, logical, time_seq);
+ if (old_root)
+ eb = alloc_dummy_extent_buffer(logical, root->nodesize);
+ else
+ eb = btrfs_clone_extent_buffer(root->node);
+ btrfs_tree_read_unlock(root->node);
+ free_extent_buffer(root->node);
+ if (!eb)
+ return NULL;
+ btrfs_tree_read_lock(eb);
+ if (old_root) {
+ btrfs_set_header_bytenr(eb, eb->start);
+ btrfs_set_header_backref_rev(eb, BTRFS_MIXED_BACKREF_REV);
+ btrfs_set_header_owner(eb, root->root_key.objectid);
+ btrfs_set_header_level(eb, old_root->level);
+ btrfs_set_header_generation(eb, old_generation);
+ }
+ if (tm)
+ __tree_mod_log_rewind(eb, time_seq, tm);
+ else
+ WARN_ON(btrfs_header_level(eb) != 0);
+ extent_buffer_get(eb);
+
+ return eb;
+}
+
static inline int should_cow_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_buffer *buf)
@@ -700,7 +1410,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
cur = btrfs_find_tree_block(root, blocknr, blocksize);
if (cur)
- uptodate = btrfs_buffer_uptodate(cur, gen);
+ uptodate = btrfs_buffer_uptodate(cur, gen, 0);
else
uptodate = 0;
if (!cur || !uptodate) {
@@ -714,7 +1424,11 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
if (!cur)
return -EIO;
} else if (!uptodate) {
- btrfs_read_buffer(cur, gen);
+ err = btrfs_read_buffer(cur, gen);
+ if (err) {
+ free_extent_buffer(cur);
+ return err;
+ }
}
}
if (search_start == 0)
@@ -829,20 +1543,18 @@ static noinline int generic_bin_search(struct extent_buffer *eb,
static int bin_search(struct extent_buffer *eb, struct btrfs_key *key,
int level, int *slot)
{
- if (level == 0) {
+ if (level == 0)
return generic_bin_search(eb,
offsetof(struct btrfs_leaf, items),
sizeof(struct btrfs_item),
key, btrfs_header_nritems(eb),
slot);
- } else {
+ else
return generic_bin_search(eb,
offsetof(struct btrfs_node, ptrs),
sizeof(struct btrfs_key_ptr),
key, btrfs_header_nritems(eb),
slot);
- }
- return -1;
}
int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
@@ -934,7 +1646,12 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
/* promote the child to a root */
child = read_node_slot(root, mid, 0);
- BUG_ON(!child);
+ if (!child) {
+ ret = -EROFS;
+ btrfs_std_error(root->fs_info, ret);
+ goto enospc;
+ }
+
btrfs_tree_lock(child);
btrfs_set_lock_blocking(child);
ret = btrfs_cow_block(trans, root, child, mid, 0, &child);
@@ -944,6 +1661,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
goto enospc;
}
+ tree_mod_log_set_root_pointer(root, child);
rcu_assign_pointer(root->node, child);
add_root_to_dirty_list(root);
@@ -959,15 +1677,13 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
root_sub_used(root, mid->len);
btrfs_free_tree_block(trans, root, mid, 0, 1);
/* once for the root ptr */
- free_extent_buffer(mid);
+ free_extent_buffer_stale(mid);
return 0;
}
if (btrfs_header_nritems(mid) >
BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
return 0;
- btrfs_header_nritems(mid);
-
left = read_node_slot(root, parent, pslot - 1);
if (left) {
btrfs_tree_lock(left);
@@ -997,7 +1713,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
wret = push_node_left(trans, root, left, mid, 1);
if (wret < 0)
ret = wret;
- btrfs_header_nritems(mid);
}
/*
@@ -1010,17 +1725,16 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
if (btrfs_header_nritems(right) == 0) {
clean_tree_block(trans, root, right);
btrfs_tree_unlock(right);
- wret = del_ptr(trans, root, path, level + 1, pslot +
- 1);
- if (wret)
- ret = wret;
+ del_ptr(trans, root, path, level + 1, pslot + 1, 1);
root_sub_used(root, right->len);
btrfs_free_tree_block(trans, root, right, 0, 1);
- free_extent_buffer(right);
+ free_extent_buffer_stale(right);
right = NULL;
} else {
struct btrfs_disk_key right_key;
btrfs_node_key(right, &right_key, 0);
+ tree_mod_log_set_node_key(root->fs_info, parent,
+ &right_key, pslot + 1, 0);
btrfs_set_node_key(parent, &right_key, pslot + 1);
btrfs_mark_buffer_dirty(parent);
}
@@ -1035,7 +1749,11 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
* otherwise we would have pulled some pointers from the
* right
*/
- BUG_ON(!left);
+ if (!left) {
+ ret = -EROFS;
+ btrfs_std_error(root->fs_info, ret);
+ goto enospc;
+ }
wret = balance_node_right(trans, root, mid, left);
if (wret < 0) {
ret = wret;
@@ -1051,17 +1769,17 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
if (btrfs_header_nritems(mid) == 0) {
clean_tree_block(trans, root, mid);
btrfs_tree_unlock(mid);
- wret = del_ptr(trans, root, path, level + 1, pslot);
- if (wret)
- ret = wret;
+ del_ptr(trans, root, path, level + 1, pslot, 1);
root_sub_used(root, mid->len);
btrfs_free_tree_block(trans, root, mid, 0, 1);
- free_extent_buffer(mid);
+ free_extent_buffer_stale(mid);
mid = NULL;
} else {
/* update the parent key to reflect our changes */
struct btrfs_disk_key mid_key;
btrfs_node_key(mid, &mid_key, 0);
+ tree_mod_log_set_node_key(root->fs_info, parent, &mid_key,
+ pslot, 0);
btrfs_set_node_key(parent, &mid_key, pslot);
btrfs_mark_buffer_dirty(parent);
}
@@ -1159,6 +1877,8 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
struct btrfs_disk_key disk_key;
orig_slot += left_nr;
btrfs_node_key(mid, &disk_key, 0);
+ tree_mod_log_set_node_key(root->fs_info, parent,
+ &disk_key, pslot, 0);
btrfs_set_node_key(parent, &disk_key, pslot);
btrfs_mark_buffer_dirty(parent);
if (btrfs_header_nritems(left) > orig_slot) {
@@ -1210,6 +1930,8 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
struct btrfs_disk_key disk_key;
btrfs_node_key(right, &disk_key, 0);
+ tree_mod_log_set_node_key(root->fs_info, parent,
+ &disk_key, pslot + 1, 0);
btrfs_set_node_key(parent, &disk_key, pslot + 1);
btrfs_mark_buffer_dirty(parent);
@@ -1331,7 +2053,12 @@ static noinline int reada_for_balance(struct btrfs_root *root,
block1 = btrfs_node_blockptr(parent, slot - 1);
gen = btrfs_node_ptr_generation(parent, slot - 1);
eb = btrfs_find_tree_block(root, block1, blocksize);
- if (eb && btrfs_buffer_uptodate(eb, gen))
+ /*
+ * if we get -eagain from btrfs_buffer_uptodate, we
+ * don't want to return eagain here. That will loop
+ * forever
+ */
+ if (eb && btrfs_buffer_uptodate(eb, gen, 1) != 0)
block1 = 0;
free_extent_buffer(eb);
}
@@ -1339,7 +2066,7 @@ static noinline int reada_for_balance(struct btrfs_root *root,
block2 = btrfs_node_blockptr(parent, slot + 1);
gen = btrfs_node_ptr_generation(parent, slot + 1);
eb = btrfs_find_tree_block(root, block2, blocksize);
- if (eb && btrfs_buffer_uptodate(eb, gen))
+ if (eb && btrfs_buffer_uptodate(eb, gen, 1) != 0)
block2 = 0;
free_extent_buffer(eb);
}
@@ -1382,7 +2109,8 @@ static noinline int reada_for_balance(struct btrfs_root *root,
* if lowest_unlock is 1, level 0 won't be unlocked
*/
static noinline void unlock_up(struct btrfs_path *path, int level,
- int lowest_unlock)
+ int lowest_unlock, int min_write_lock_level,
+ int *write_lock_level)
{
int i;
int skip_level = level;
@@ -1414,6 +2142,11 @@ static noinline void unlock_up(struct btrfs_path *path, int level,
if (i >= lowest_unlock && i > skip_level && path->locks[i]) {
btrfs_tree_unlock_rw(t, path->locks[i]);
path->locks[i] = 0;
+ if (write_lock_level &&
+ i > min_write_lock_level &&
+ i <= *write_lock_level) {
+ *write_lock_level = i - 1;
+ }
}
}
}
@@ -1456,7 +2189,7 @@ static int
read_block_for_search(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct btrfs_path *p,
struct extent_buffer **eb_ret, int level, int slot,
- struct btrfs_key *key)
+ struct btrfs_key *key, u64 time_seq)
{
u64 blocknr;
u64 gen;
@@ -1471,8 +2204,9 @@ read_block_for_search(struct btrfs_trans_handle *trans,
tmp = btrfs_find_tree_block(root, blocknr, blocksize);
if (tmp) {
- if (btrfs_buffer_uptodate(tmp, 0)) {
- if (btrfs_buffer_uptodate(tmp, gen)) {
+ /* first we do an atomic uptodate check */
+ if (btrfs_buffer_uptodate(tmp, 0, 1) > 0) {
+ if (btrfs_buffer_uptodate(tmp, gen, 1) > 0) {
/*
* we found an up to date block without
* sleeping, return
@@ -1490,8 +2224,9 @@ read_block_for_search(struct btrfs_trans_handle *trans,
free_extent_buffer(tmp);
btrfs_set_path_blocking(p);
+ /* now we're allowed to do a blocking uptodate check */
tmp = read_tree_block(root, blocknr, blocksize, gen);
- if (tmp && btrfs_buffer_uptodate(tmp, gen)) {
+ if (tmp && btrfs_buffer_uptodate(tmp, gen, 0) > 0) {
*eb_ret = tmp;
return 0;
}
@@ -1526,7 +2261,7 @@ read_block_for_search(struct btrfs_trans_handle *trans,
* and give up so that our caller doesn't loop forever
* on our EAGAINs.
*/
- if (!btrfs_buffer_uptodate(tmp, 0))
+ if (!btrfs_buffer_uptodate(tmp, 0, 0))
ret = -EIO;
free_extent_buffer(tmp);
}
@@ -1637,6 +2372,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
/* everything at write_lock_level or lower must be write locked */
int write_lock_level = 0;
u8 lowest_level = 0;
+ int min_write_lock_level;
lowest_level = p->lowest_level;
WARN_ON(lowest_level && ins_len > 0);
@@ -1664,6 +2400,8 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
if (cow && (p->keep_locks || p->lowest_level))
write_lock_level = BTRFS_MAX_LEVEL;
+ min_write_lock_level = write_lock_level;
+
again:
/*
* we try very hard to do read locks on the root
@@ -1795,7 +2533,8 @@ cow_done:
goto again;
}
- unlock_up(p, level, lowest_unlock);
+ unlock_up(p, level, lowest_unlock,
+ min_write_lock_level, &write_lock_level);
if (level == lowest_level) {
if (dec)
@@ -1804,7 +2543,7 @@ cow_done:
}
err = read_block_for_search(trans, root, p,
- &b, level, slot, key);
+ &b, level, slot, key, 0);
if (err == -EAGAIN)
goto again;
if (err) {
@@ -1857,7 +2596,8 @@ cow_done:
}
}
if (!p->search_for_split)
- unlock_up(p, level, lowest_unlock);
+ unlock_up(p, level, lowest_unlock,
+ min_write_lock_level, &write_lock_level);
goto done;
}
}
@@ -1875,21 +2615,125 @@ done:
}
/*
+ * Like btrfs_search_slot, this looks for a key in the given tree. It uses the
+ * current state of the tree together with the operations recorded in the tree
+ * modification log to search for the key in a previous version of this tree, as
+ * denoted by the time_seq parameter.
+ *
+ * Naturally, there is no support for insert, delete or cow operations.
+ *
+ * The resulting path and return value will be set up as if we called
+ * btrfs_search_slot at that point in time with ins_len and cow both set to 0.
+ */
+int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key,
+ struct btrfs_path *p, u64 time_seq)
+{
+ struct extent_buffer *b;
+ int slot;
+ int ret;
+ int err;
+ int level;
+ int lowest_unlock = 1;
+ u8 lowest_level = 0;
+
+ lowest_level = p->lowest_level;
+ WARN_ON(p->nodes[0] != NULL);
+
+ if (p->search_commit_root) {
+ BUG_ON(time_seq);
+ return btrfs_search_slot(NULL, root, key, p, 0, 0);
+ }
+
+again:
+ b = get_old_root(root, time_seq);
+ level = btrfs_header_level(b);
+ p->locks[level] = BTRFS_READ_LOCK;
+
+ while (b) {
+ level = btrfs_header_level(b);
+ p->nodes[level] = b;
+ btrfs_clear_path_blocking(p, NULL, 0);
+
+ /*
+ * we have a lock on b and as long as we aren't changing
+ * the tree, there is no way to for the items in b to change.
+ * It is safe to drop the lock on our parent before we
+ * go through the expensive btree search on b.
+ */
+ btrfs_unlock_up_safe(p, level + 1);
+
+ ret = bin_search(b, key, level, &slot);
+
+ if (level != 0) {
+ int dec = 0;
+ if (ret && slot > 0) {
+ dec = 1;
+ slot -= 1;
+ }
+ p->slots[level] = slot;
+ unlock_up(p, level, lowest_unlock, 0, NULL);
+
+ if (level == lowest_level) {
+ if (dec)
+ p->slots[level]++;
+ goto done;
+ }
+
+ err = read_block_for_search(NULL, root, p, &b, level,
+ slot, key, time_seq);
+ if (err == -EAGAIN)
+ goto again;
+ if (err) {
+ ret = err;
+ goto done;
+ }
+
+ level = btrfs_header_level(b);
+ err = btrfs_try_tree_read_lock(b);
+ if (!err) {
+ btrfs_set_path_blocking(p);
+ btrfs_tree_read_lock(b);
+ btrfs_clear_path_blocking(p, b,
+ BTRFS_READ_LOCK);
+ }
+ p->locks[level] = BTRFS_READ_LOCK;
+ p->nodes[level] = b;
+ b = tree_mod_log_rewind(root->fs_info, b, time_seq);
+ if (b != p->nodes[level]) {
+ btrfs_tree_unlock_rw(p->nodes[level],
+ p->locks[level]);
+ p->locks[level] = 0;
+ p->nodes[level] = b;
+ }
+ } else {
+ p->slots[level] = slot;
+ unlock_up(p, level, lowest_unlock, 0, NULL);
+ goto done;
+ }
+ }
+ ret = 1;
+done:
+ if (!p->leave_spinning)
+ btrfs_set_path_blocking(p);
+ if (ret < 0)
+ btrfs_release_path(p);
+
+ return ret;
+}
+
+/*
* adjust the pointers going up the tree, starting at level
* making sure the right key of each node is points to 'key'.
* This is used after shifting pointers to the left, so it stops
* fixing up pointers when a given leaf/node is not in slot 0 of the
* higher levels
*
- * If this fails to write a tree block, it returns -1, but continues
- * fixing up the blocks in ram so the tree is consistent.
*/
-static int fixup_low_keys(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct btrfs_path *path,
- struct btrfs_disk_key *key, int level)
+static void fixup_low_keys(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct btrfs_path *path,
+ struct btrfs_disk_key *key, int level)
{
int i;
- int ret = 0;
struct extent_buffer *t;
for (i = level; i < BTRFS_MAX_LEVEL; i++) {
@@ -1897,12 +2741,12 @@ static int fixup_low_keys(struct btrfs_trans_handle *trans,
if (!path->nodes[i])
break;
t = path->nodes[i];
+ tree_mod_log_set_node_key(root->fs_info, t, key, tslot, 1);
btrfs_set_node_key(t, key, tslot);
btrfs_mark_buffer_dirty(path->nodes[i]);
if (tslot != 0)
break;
}
- return ret;
}
/*
@@ -1911,9 +2755,9 @@ static int fixup_low_keys(struct btrfs_trans_handle *trans,
* This function isn't completely safe. It's the caller's responsibility
* that the new key won't break the order
*/
-int btrfs_set_item_key_safe(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct btrfs_path *path,
- struct btrfs_key *new_key)
+void btrfs_set_item_key_safe(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct btrfs_path *path,
+ struct btrfs_key *new_key)
{
struct btrfs_disk_key disk_key;
struct extent_buffer *eb;
@@ -1923,13 +2767,11 @@ int btrfs_set_item_key_safe(struct btrfs_trans_handle *trans,
slot = path->slots[0];
if (slot > 0) {
btrfs_item_key(eb, &disk_key, slot - 1);
- if (comp_keys(&disk_key, new_key) >= 0)
- return -1;
+ BUG_ON(comp_keys(&disk_key, new_key) >= 0);
}
if (slot < btrfs_header_nritems(eb) - 1) {
btrfs_item_key(eb, &disk_key, slot + 1);
- if (comp_keys(&disk_key, new_key) <= 0)
- return -1;
+ BUG_ON(comp_keys(&disk_key, new_key) <= 0);
}
btrfs_cpu_key_to_disk(&disk_key, new_key);
@@ -1937,7 +2779,6 @@ int btrfs_set_item_key_safe(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(eb);
if (slot == 0)
fixup_low_keys(trans, root, path, &disk_key, 1);
- return 0;
}
/*
@@ -1983,12 +2824,16 @@ static int push_node_left(struct btrfs_trans_handle *trans,
} else
push_items = min(src_nritems - 8, push_items);
+ tree_mod_log_eb_copy(root->fs_info, dst, src, dst_nritems, 0,
+ push_items);
copy_extent_buffer(dst, src,
btrfs_node_key_ptr_offset(dst_nritems),
btrfs_node_key_ptr_offset(0),
push_items * sizeof(struct btrfs_key_ptr));
if (push_items < src_nritems) {
+ tree_mod_log_eb_move(root->fs_info, src, 0, push_items,
+ src_nritems - push_items);
memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0),
btrfs_node_key_ptr_offset(push_items),
(src_nritems - push_items) *
@@ -2042,11 +2887,14 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
if (max_push < push_items)
push_items = max_push;
+ tree_mod_log_eb_move(root->fs_info, dst, push_items, 0, dst_nritems);
memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items),
btrfs_node_key_ptr_offset(0),
(dst_nritems) *
sizeof(struct btrfs_key_ptr));
+ tree_mod_log_eb_copy(root->fs_info, dst, src, 0,
+ src_nritems - push_items, push_items);
copy_extent_buffer(dst, src,
btrfs_node_key_ptr_offset(0),
btrfs_node_key_ptr_offset(src_nritems - push_items),
@@ -2121,6 +2969,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(c);
old = root->node;
+ tree_mod_log_set_root_pointer(root, c);
rcu_assign_pointer(root->node, c);
/* the super has an extra ref to root->node */
@@ -2140,36 +2989,42 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
*
* slot and level indicate where you want the key to go, and
* blocknr is the block the key points to.
- *
- * returns zero on success and < 0 on any error
*/
-static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root
- *root, struct btrfs_path *path, struct btrfs_disk_key
- *key, u64 bytenr, int slot, int level)
+static void insert_ptr(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct btrfs_path *path,
+ struct btrfs_disk_key *key, u64 bytenr,
+ int slot, int level)
{
struct extent_buffer *lower;
int nritems;
+ int ret;
BUG_ON(!path->nodes[level]);
btrfs_assert_tree_locked(path->nodes[level]);
lower = path->nodes[level];
nritems = btrfs_header_nritems(lower);
BUG_ON(slot > nritems);
- if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root))
- BUG();
+ BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(root));
if (slot != nritems) {
+ if (level)
+ tree_mod_log_eb_move(root->fs_info, lower, slot + 1,
+ slot, nritems - slot);
memmove_extent_buffer(lower,
btrfs_node_key_ptr_offset(slot + 1),
btrfs_node_key_ptr_offset(slot),
(nritems - slot) * sizeof(struct btrfs_key_ptr));
}
+ if (level) {
+ ret = tree_mod_log_insert_key(root->fs_info, lower, slot,
+ MOD_LOG_KEY_ADD);
+ BUG_ON(ret < 0);
+ }
btrfs_set_node_key(lower, key, slot);
btrfs_set_node_blockptr(lower, slot, bytenr);
WARN_ON(trans->transid == 0);
btrfs_set_node_ptr_generation(lower, slot, trans->transid);
btrfs_set_header_nritems(lower, nritems + 1);
btrfs_mark_buffer_dirty(lower);
- return 0;
}
/*
@@ -2190,7 +3045,6 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
struct btrfs_disk_key disk_key;
int mid;
int ret;
- int wret;
u32 c_nritems;
c = path->nodes[level];
@@ -2235,7 +3089,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
(unsigned long)btrfs_header_chunk_tree_uuid(split),
BTRFS_UUID_SIZE);
-
+ tree_mod_log_eb_copy(root->fs_info, split, c, 0, mid, c_nritems - mid);
copy_extent_buffer(split, c,
btrfs_node_key_ptr_offset(0),
btrfs_node_key_ptr_offset(mid),
@@ -2247,11 +3101,8 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(c);
btrfs_mark_buffer_dirty(split);
- wret = insert_ptr(trans, root, path, &disk_key, split->start,
- path->slots[level + 1] + 1,
- level + 1);
- if (wret)
- ret = wret;
+ insert_ptr(trans, root, path, &disk_key, split->start,
+ path->slots[level + 1] + 1, level + 1);
if (path->slots[level] >= mid) {
path->slots[level] -= mid;
@@ -2320,6 +3171,7 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
{
struct extent_buffer *left = path->nodes[0];
struct extent_buffer *upper = path->nodes[1];
+ struct btrfs_map_token token;
struct btrfs_disk_key disk_key;
int slot;
u32 i;
@@ -2331,6 +3183,8 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
u32 data_end;
u32 this_item_size;
+ btrfs_init_map_token(&token);
+
if (empty)
nr = 0;
else
@@ -2408,8 +3262,8 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
push_space = BTRFS_LEAF_DATA_SIZE(root);
for (i = 0; i < right_nritems; i++) {
item = btrfs_item_nr(right, i);
- push_space -= btrfs_item_size(right, item);
- btrfs_set_item_offset(right, item, push_space);
+ push_space -= btrfs_token_item_size(right, item, &token);
+ btrfs_set_token_item_offset(right, item, push_space, &token);
}
left_nritems -= push_items;
@@ -2537,9 +3391,11 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
u32 old_left_nritems;
u32 nr;
int ret = 0;
- int wret;
u32 this_item_size;
u32 old_left_item_size;
+ struct btrfs_map_token token;
+
+ btrfs_init_map_token(&token);
if (empty)
nr = min(right_nritems, max_slot);
@@ -2600,9 +3456,10 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
item = btrfs_item_nr(left, i);
- ioff = btrfs_item_offset(left, item);
- btrfs_set_item_offset(left, item,
- ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size));
+ ioff = btrfs_token_item_offset(left, item, &token);
+ btrfs_set_token_item_offset(left, item,
+ ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size),
+ &token);
}
btrfs_set_header_nritems(left, old_left_nritems + push_items);
@@ -2632,8 +3489,9 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
for (i = 0; i < right_nritems; i++) {
item = btrfs_item_nr(right, i);
- push_space = push_space - btrfs_item_size(right, item);
- btrfs_set_item_offset(right, item, push_space);
+ push_space = push_space - btrfs_token_item_size(right,
+ item, &token);
+ btrfs_set_token_item_offset(right, item, push_space, &token);
}
btrfs_mark_buffer_dirty(left);
@@ -2643,9 +3501,7 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
clean_tree_block(trans, root, right);
btrfs_item_key(right, &disk_key, 0);
- wret = fixup_low_keys(trans, root, path, &disk_key, 1);
- if (wret)
- ret = wret;
+ fixup_low_keys(trans, root, path, &disk_key, 1);
/* then fixup the leaf pointer in the path */
if (path->slots[0] < push_items) {
@@ -2716,7 +3572,8 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
path->nodes[1], slot - 1, &left);
if (ret) {
/* we hit -ENOSPC, but it isn't fatal here */
- ret = 1;
+ if (ret == -ENOSPC)
+ ret = 1;
goto out;
}
@@ -2738,22 +3595,21 @@ out:
/*
* split the path's leaf in two, making sure there is at least data_size
* available for the resulting leaf level of the path.
- *
- * returns 0 if all went well and < 0 on failure.
*/
-static noinline int copy_for_split(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- struct extent_buffer *l,
- struct extent_buffer *right,
- int slot, int mid, int nritems)
+static noinline void copy_for_split(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ struct extent_buffer *l,
+ struct extent_buffer *right,
+ int slot, int mid, int nritems)
{
int data_copy_size;
int rt_data_off;
int i;
- int ret = 0;
- int wret;
struct btrfs_disk_key disk_key;
+ struct btrfs_map_token token;
+
+ btrfs_init_map_token(&token);
nritems = nritems - mid;
btrfs_set_header_nritems(right, nritems);
@@ -2775,17 +3631,15 @@ static noinline int copy_for_split(struct btrfs_trans_handle *trans,
struct btrfs_item *item = btrfs_item_nr(right, i);
u32 ioff;
- ioff = btrfs_item_offset(right, item);
- btrfs_set_item_offset(right, item, ioff + rt_data_off);
+ ioff = btrfs_token_item_offset(right, item, &token);
+ btrfs_set_token_item_offset(right, item,
+ ioff + rt_data_off, &token);
}
btrfs_set_header_nritems(l, mid);
- ret = 0;
btrfs_item_key(right, &disk_key, 0);
- wret = insert_ptr(trans, root, path, &disk_key, right->start,
- path->slots[1] + 1, 1);
- if (wret)
- ret = wret;
+ insert_ptr(trans, root, path, &disk_key, right->start,
+ path->slots[1] + 1, 1);
btrfs_mark_buffer_dirty(right);
btrfs_mark_buffer_dirty(l);
@@ -2803,8 +3657,6 @@ static noinline int copy_for_split(struct btrfs_trans_handle *trans,
}
BUG_ON(path->slots[0] < 0);
-
- return ret;
}
/*
@@ -2993,12 +3845,8 @@ again:
if (split == 0) {
if (mid <= slot) {
btrfs_set_header_nritems(right, 0);
- wret = insert_ptr(trans, root, path,
- &disk_key, right->start,
- path->slots[1] + 1, 1);
- if (wret)
- ret = wret;
-
+ insert_ptr(trans, root, path, &disk_key, right->start,
+ path->slots[1] + 1, 1);
btrfs_tree_unlock(path->nodes[0]);
free_extent_buffer(path->nodes[0]);
path->nodes[0] = right;
@@ -3006,29 +3854,21 @@ again:
path->slots[1] += 1;
} else {
btrfs_set_header_nritems(right, 0);
- wret = insert_ptr(trans, root, path,
- &disk_key,
- right->start,
+ insert_ptr(trans, root, path, &disk_key, right->start,
path->slots[1], 1);
- if (wret)
- ret = wret;
btrfs_tree_unlock(path->nodes[0]);
free_extent_buffer(path->nodes[0]);
path->nodes[0] = right;
path->slots[0] = 0;
- if (path->slots[1] == 0) {
- wret = fixup_low_keys(trans, root,
- path, &disk_key, 1);
- if (wret)
- ret = wret;
- }
+ if (path->slots[1] == 0)
+ fixup_low_keys(trans, root, path,
+ &disk_key, 1);
}
btrfs_mark_buffer_dirty(right);
return ret;
}
- ret = copy_for_split(trans, root, path, l, right, slot, mid, nritems);
- BUG_ON(ret);
+ copy_for_split(trans, root, path, l, right, slot, mid, nritems);
if (split == 2) {
BUG_ON(num_doubles != 0);
@@ -3036,7 +3876,7 @@ again:
goto again;
}
- return ret;
+ return 0;
push_for_double:
push_for_double_split(trans, root, path, data_size);
@@ -3238,11 +4078,9 @@ int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
return ret;
path->slots[0]++;
- ret = setup_items_for_insert(trans, root, path, new_key, &item_size,
- item_size, item_size +
- sizeof(struct btrfs_item), 1);
- BUG_ON(ret);
-
+ setup_items_for_insert(trans, root, path, new_key, &item_size,
+ item_size, item_size +
+ sizeof(struct btrfs_item), 1);
leaf = path->nodes[0];
memcpy_extent_buffer(leaf,
btrfs_item_ptr_offset(leaf, path->slots[0]),
@@ -3257,10 +4095,10 @@ int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
* off the end of the item or if we shift the item to chop bytes off
* the front.
*/
-int btrfs_truncate_item(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- u32 new_size, int from_end)
+void btrfs_truncate_item(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ u32 new_size, int from_end)
{
int slot;
struct extent_buffer *leaf;
@@ -3271,13 +4109,16 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans,
unsigned int old_size;
unsigned int size_diff;
int i;
+ struct btrfs_map_token token;
+
+ btrfs_init_map_token(&token);
leaf = path->nodes[0];
slot = path->slots[0];
old_size = btrfs_item_size_nr(leaf, slot);
if (old_size == new_size)
- return 0;
+ return;
nritems = btrfs_header_nritems(leaf);
data_end = leaf_data_end(root, leaf);
@@ -3297,8 +4138,9 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans,
u32 ioff;
item = btrfs_item_nr(leaf, i);
- ioff = btrfs_item_offset(leaf, item);
- btrfs_set_item_offset(leaf, item, ioff + size_diff);
+ ioff = btrfs_token_item_offset(leaf, item, &token);
+ btrfs_set_token_item_offset(leaf, item,
+ ioff + size_diff, &token);
}
/* shift the data */
@@ -3350,15 +4192,14 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans,
btrfs_print_leaf(root, leaf);
BUG();
}
- return 0;
}
/*
* make the item pointed to by the path bigger, data_size is the new size.
*/
-int btrfs_extend_item(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct btrfs_path *path,
- u32 data_size)
+void btrfs_extend_item(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct btrfs_path *path,
+ u32 data_size)
{
int slot;
struct extent_buffer *leaf;
@@ -3368,6 +4209,9 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans,
unsigned int old_data;
unsigned int old_size;
int i;
+ struct btrfs_map_token token;
+
+ btrfs_init_map_token(&token);
leaf = path->nodes[0];
@@ -3397,8 +4241,9 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans,
u32 ioff;
item = btrfs_item_nr(leaf, i);
- ioff = btrfs_item_offset(leaf, item);
- btrfs_set_item_offset(leaf, item, ioff - data_size);
+ ioff = btrfs_token_item_offset(leaf, item, &token);
+ btrfs_set_token_item_offset(leaf, item,
+ ioff - data_size, &token);
}
/* shift the data */
@@ -3416,7 +4261,6 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans,
btrfs_print_leaf(root, leaf);
BUG();
}
- return 0;
}
/*
@@ -3441,6 +4285,9 @@ int btrfs_insert_some_items(struct btrfs_trans_handle *trans,
unsigned int data_end;
struct btrfs_disk_key disk_key;
struct btrfs_key found_key;
+ struct btrfs_map_token token;
+
+ btrfs_init_map_token(&token);
for (i = 0; i < nr; i++) {
if (total_size + data_size[i] + sizeof(struct btrfs_item) >
@@ -3506,8 +4353,9 @@ int btrfs_insert_some_items(struct btrfs_trans_handle *trans,
u32 ioff;
item = btrfs_item_nr(leaf, i);
- ioff = btrfs_item_offset(leaf, item);
- btrfs_set_item_offset(leaf, item, ioff - total_data);
+ ioff = btrfs_token_item_offset(leaf, item, &token);
+ btrfs_set_token_item_offset(leaf, item,
+ ioff - total_data, &token);
}
/* shift the items */
memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
@@ -3534,9 +4382,10 @@ int btrfs_insert_some_items(struct btrfs_trans_handle *trans,
btrfs_cpu_key_to_disk(&disk_key, cpu_key + i);
btrfs_set_item_key(leaf, &disk_key, slot + i);
item = btrfs_item_nr(leaf, slot + i);
- btrfs_set_item_offset(leaf, item, data_end - data_size[i]);
+ btrfs_set_token_item_offset(leaf, item,
+ data_end - data_size[i], &token);
data_end -= data_size[i];
- btrfs_set_item_size(leaf, item, data_size[i]);
+ btrfs_set_token_item_size(leaf, item, data_size[i], &token);
}
btrfs_set_header_nritems(leaf, nritems + nr);
btrfs_mark_buffer_dirty(leaf);
@@ -3544,7 +4393,7 @@ int btrfs_insert_some_items(struct btrfs_trans_handle *trans,
ret = 0;
if (slot == 0) {
btrfs_cpu_key_to_disk(&disk_key, cpu_key);
- ret = fixup_low_keys(trans, root, path, &disk_key, 1);
+ fixup_low_keys(trans, root, path, &disk_key, 1);
}
if (btrfs_leaf_free_space(root, leaf) < 0) {
@@ -3562,19 +4411,21 @@ out:
* to save stack depth by doing the bulk of the work in a function
* that doesn't call btrfs_search_slot
*/
-int setup_items_for_insert(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct btrfs_path *path,
- struct btrfs_key *cpu_key, u32 *data_size,
- u32 total_data, u32 total_size, int nr)
+void setup_items_for_insert(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct btrfs_path *path,
+ struct btrfs_key *cpu_key, u32 *data_size,
+ u32 total_data, u32 total_size, int nr)
{
struct btrfs_item *item;
int i;
u32 nritems;
unsigned int data_end;
struct btrfs_disk_key disk_key;
- int ret;
struct extent_buffer *leaf;
int slot;
+ struct btrfs_map_token token;
+
+ btrfs_init_map_token(&token);
leaf = path->nodes[0];
slot = path->slots[0];
@@ -3606,8 +4457,9 @@ int setup_items_for_insert(struct btrfs_trans_handle *trans,
u32 ioff;
item = btrfs_item_nr(leaf, i);
- ioff = btrfs_item_offset(leaf, item);
- btrfs_set_item_offset(leaf, item, ioff - total_data);
+ ioff = btrfs_token_item_offset(leaf, item, &token);
+ btrfs_set_token_item_offset(leaf, item,
+ ioff - total_data, &token);
}
/* shift the items */
memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
@@ -3626,17 +4478,17 @@ int setup_items_for_insert(struct btrfs_trans_handle *trans,
btrfs_cpu_key_to_disk(&disk_key, cpu_key + i);
btrfs_set_item_key(leaf, &disk_key, slot + i);
item = btrfs_item_nr(leaf, slot + i);
- btrfs_set_item_offset(leaf, item, data_end - data_size[i]);
+ btrfs_set_token_item_offset(leaf, item,
+ data_end - data_size[i], &token);
data_end -= data_size[i];
- btrfs_set_item_size(leaf, item, data_size[i]);
+ btrfs_set_token_item_size(leaf, item, data_size[i], &token);
}
btrfs_set_header_nritems(leaf, nritems + nr);
- ret = 0;
if (slot == 0) {
btrfs_cpu_key_to_disk(&disk_key, cpu_key);
- ret = fixup_low_keys(trans, root, path, &disk_key, 1);
+ fixup_low_keys(trans, root, path, &disk_key, 1);
}
btrfs_unlock_up_safe(path, 1);
btrfs_mark_buffer_dirty(leaf);
@@ -3645,7 +4497,6 @@ int setup_items_for_insert(struct btrfs_trans_handle *trans,
btrfs_print_leaf(root, leaf);
BUG();
}
- return ret;
}
/*
@@ -3672,16 +4523,14 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
if (ret == 0)
return -EEXIST;
if (ret < 0)
- goto out;
+ return ret;
slot = path->slots[0];
BUG_ON(slot < 0);
- ret = setup_items_for_insert(trans, root, path, cpu_key, data_size,
+ setup_items_for_insert(trans, root, path, cpu_key, data_size,
total_data, total_size, nr);
-
-out:
- return ret;
+ return 0;
}
/*
@@ -3717,22 +4566,30 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
* the tree should have been previously balanced so the deletion does not
* empty a node.
*/
-static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
- struct btrfs_path *path, int level, int slot)
+static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+ struct btrfs_path *path, int level, int slot,
+ int tree_mod_log)
{
struct extent_buffer *parent = path->nodes[level];
u32 nritems;
- int ret = 0;
- int wret;
+ int ret;
nritems = btrfs_header_nritems(parent);
if (slot != nritems - 1) {
+ if (tree_mod_log && level)
+ tree_mod_log_eb_move(root->fs_info, parent, slot,
+ slot + 1, nritems - slot - 1);
memmove_extent_buffer(parent,
btrfs_node_key_ptr_offset(slot),
btrfs_node_key_ptr_offset(slot + 1),
sizeof(struct btrfs_key_ptr) *
(nritems - slot - 1));
+ } else if (tree_mod_log && level) {
+ ret = tree_mod_log_insert_key(root->fs_info, parent, slot,
+ MOD_LOG_KEY_REMOVE);
+ BUG_ON(ret < 0);
}
+
nritems--;
btrfs_set_header_nritems(parent, nritems);
if (nritems == 0 && parent == root->node) {
@@ -3743,12 +4600,9 @@ static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct btrfs_disk_key disk_key;
btrfs_node_key(parent, &disk_key, 0);
- wret = fixup_low_keys(trans, root, path, &disk_key, level + 1);
- if (wret)
- ret = wret;
+ fixup_low_keys(trans, root, path, &disk_key, level + 1);
}
btrfs_mark_buffer_dirty(parent);
- return ret;
}
/*
@@ -3761,17 +4615,13 @@ static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
* The path must have already been setup for deleting the leaf, including
* all the proper balancing. path->nodes[1] must be locked.
*/
-static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- struct extent_buffer *leaf)
+static noinline void btrfs_del_leaf(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ struct extent_buffer *leaf)
{
- int ret;
-
WARN_ON(btrfs_header_generation(leaf) != trans->transid);
- ret = del_ptr(trans, root, path, 1, path->slots[1]);
- if (ret)
- return ret;
+ del_ptr(trans, root, path, 1, path->slots[1], 1);
/*
* btrfs_free_extent is expensive, we want to make sure we
@@ -3781,8 +4631,9 @@ static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
root_sub_used(root, leaf->len);
+ extent_buffer_get(leaf);
btrfs_free_tree_block(trans, root, leaf, 0, 1);
- return 0;
+ free_extent_buffer_stale(leaf);
}
/*
* delete the item at the leaf level in path. If that empties
@@ -3799,6 +4650,9 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
int wret;
int i;
u32 nritems;
+ struct btrfs_map_token token;
+
+ btrfs_init_map_token(&token);
leaf = path->nodes[0];
last_off = btrfs_item_offset_nr(leaf, slot + nr - 1);
@@ -3820,8 +4674,9 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
u32 ioff;
item = btrfs_item_nr(leaf, i);
- ioff = btrfs_item_offset(leaf, item);
- btrfs_set_item_offset(leaf, item, ioff + dsize);
+ ioff = btrfs_token_item_offset(leaf, item, &token);
+ btrfs_set_token_item_offset(leaf, item,
+ ioff + dsize, &token);
}
memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot),
@@ -3839,8 +4694,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
} else {
btrfs_set_path_blocking(path);
clean_tree_block(trans, root, leaf);
- ret = btrfs_del_leaf(trans, root, path, leaf);
- BUG_ON(ret);
+ btrfs_del_leaf(trans, root, path, leaf);
}
} else {
int used = leaf_space_used(leaf, 0, nritems);
@@ -3848,10 +4702,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct btrfs_disk_key disk_key;
btrfs_item_key(leaf, &disk_key, 0);
- wret = fixup_low_keys(trans, root, path,
- &disk_key, 1);
- if (wret)
- ret = wret;
+ fixup_low_keys(trans, root, path, &disk_key, 1);
}
/* delete the leaf if it is mostly empty */
@@ -3879,9 +4730,9 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs