diff options
author | Patrick McHardy <kaber@trash.net> | 2012-08-08 21:03:47 +0200 |
---|---|---|
committer | Patrick McHardy <kaber@trash.net> | 2012-08-08 21:03:47 +0200 |
commit | d53b4ed072d9779cdf53582c46436dec06d0961f (patch) | |
tree | ac95ecab33e31cd79aae69c475e8348adac51230 /fs/exofs | |
parent | 5d4dff7f1011a81a693a9c7b1f6a0b9c842eb60c (diff) | |
parent | 28a33cbc24e4256c143dce96c7d93bf423229f92 (diff) |
Merge tag 'v3.5' of 192.168.0.154:/repos/git/linux-2.6
Conflicts:
drivers/Kconfig
Signed-off-by: Patrick McHardy <kaber@trash.net>
Diffstat (limited to 'fs/exofs')
-rw-r--r-- | fs/exofs/Kbuild | 2 | ||||
-rw-r--r-- | fs/exofs/Kconfig | 11 | ||||
-rw-r--r-- | fs/exofs/Kconfig.ore | 12 | ||||
-rw-r--r-- | fs/exofs/dir.c | 6 | ||||
-rw-r--r-- | fs/exofs/exofs.h | 16 | ||||
-rw-r--r-- | fs/exofs/inode.c | 6 | ||||
-rw-r--r-- | fs/exofs/namei.c | 19 | ||||
-rw-r--r-- | fs/exofs/ore.c | 16 | ||||
-rw-r--r-- | fs/exofs/ore_raid.c | 169 | ||||
-rw-r--r-- | fs/exofs/super.c | 30 | ||||
-rw-r--r-- | fs/exofs/sys.c | 200 |
11 files changed, 379 insertions, 108 deletions
diff --git a/fs/exofs/Kbuild b/fs/exofs/Kbuild index 352ba149d23..389ba8312d5 100644 --- a/fs/exofs/Kbuild +++ b/fs/exofs/Kbuild @@ -16,5 +16,5 @@ libore-y := ore.o ore_raid.o obj-$(CONFIG_ORE) += libore.o -exofs-y := inode.o file.o symlink.o namei.o dir.o super.o +exofs-y := inode.o file.o symlink.o namei.o dir.o super.o sys.o obj-$(CONFIG_EXOFS_FS) += exofs.o diff --git a/fs/exofs/Kconfig b/fs/exofs/Kconfig index da42f32c49b..86194b2f799 100644 --- a/fs/exofs/Kconfig +++ b/fs/exofs/Kconfig @@ -1,14 +1,3 @@ -# Note ORE needs to "select ASYNC_XOR". So Not to force multiple selects -# for every ORE user we do it like this. Any user should add itself here -# at the "depends on EXOFS_FS || ..." with an ||. The dependencies are -# selected here, and we default to "ON". So in effect it is like been -# selected by any of the users. -config ORE - tristate - depends on EXOFS_FS || PNFS_OBJLAYOUT - select ASYNC_XOR - default SCSI_OSD_ULD - config EXOFS_FS tristate "exofs: OSD based file system support" depends on SCSI_OSD_ULD diff --git a/fs/exofs/Kconfig.ore b/fs/exofs/Kconfig.ore new file mode 100644 index 00000000000..1ca7fb7b6ba --- /dev/null +++ b/fs/exofs/Kconfig.ore @@ -0,0 +1,12 @@ +# ORE - Objects Raid Engine (libore.ko) +# +# Note ORE needs to "select ASYNC_XOR". So Not to force multiple selects +# for every ORE user we do it like this. Any user should add itself here +# at the "depends on EXOFS_FS || ..." with an ||. The dependencies are +# selected here, and we default to "ON". So in effect it is like been +# selected by any of the users. +config ORE + tristate + depends on EXOFS_FS || PNFS_OBJLAYOUT + select ASYNC_XOR + default SCSI_OSD_ULD diff --git a/fs/exofs/dir.c b/fs/exofs/dir.c index d0941c6a1f7..c61e62ac231 100644 --- a/fs/exofs/dir.c +++ b/fs/exofs/dir.c @@ -234,7 +234,7 @@ static unsigned char exofs_type_by_mode[S_IFMT >> S_SHIFT] = { static inline void exofs_set_de_type(struct exofs_dir_entry *de, struct inode *inode) { - mode_t mode = inode->i_mode; + umode_t mode = inode->i_mode; de->file_type = exofs_type_by_mode[(mode & S_IFMT) >> S_SHIFT]; } @@ -597,7 +597,7 @@ int exofs_make_empty(struct inode *inode, struct inode *parent) goto fail; } - kaddr = kmap_atomic(page, KM_USER0); + kaddr = kmap_atomic(page); de = (struct exofs_dir_entry *)kaddr; de->name_len = 1; de->rec_len = cpu_to_le16(EXOFS_DIR_REC_LEN(1)); @@ -611,7 +611,7 @@ int exofs_make_empty(struct inode *inode, struct inode *parent) de->inode_no = cpu_to_le64(parent->i_ino); memcpy(de->name, PARENT_DIR, sizeof(PARENT_DIR)); exofs_set_de_type(de, inode); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); err = exofs_commit_chunk(page, 0, chunk_size); fail: page_cache_release(page); diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h index 51f4b4c40f0..fffe86fd7a4 100644 --- a/fs/exofs/exofs.h +++ b/fs/exofs/exofs.h @@ -56,6 +56,9 @@ struct exofs_dev { struct ore_dev ored; unsigned did; + unsigned urilen; + uint8_t *uri; + struct kobject ed_kobj; }; /* * our extension to the in-memory superblock @@ -73,6 +76,7 @@ struct exofs_sb_info { struct ore_layout layout; /* Default files layout */ struct ore_comp one_comp; /* id & cred of partition id=0*/ struct ore_components oc; /* comps for the partition */ + struct kobject s_kobj; /* holds per-sbi kobject */ }; /* @@ -154,7 +158,7 @@ int exofs_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata); extern struct inode *exofs_iget(struct super_block *, unsigned long); -struct inode *exofs_new_inode(struct inode *, int); +struct inode *exofs_new_inode(struct inode *, umode_t); extern int exofs_write_inode(struct inode *, struct writeback_control *wbc); extern void exofs_evict_inode(struct inode *); @@ -176,6 +180,16 @@ void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], const struct osd_obj_id *obj); int exofs_sbi_write_stats(struct exofs_sb_info *sbi); +/* sys.c */ +int exofs_sysfs_init(void); +void exofs_sysfs_uninit(void); +int exofs_sysfs_sb_add(struct exofs_sb_info *sbi, + struct exofs_dt_device_info *dt_dev); +void exofs_sysfs_sb_del(struct exofs_sb_info *sbi); +int exofs_sysfs_odev_add(struct exofs_dev *edev, + struct exofs_sb_info *sbi); +void exofs_sysfs_dbg_print(void); + /********************* * operation vectors * *********************/ diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index f6dbf7768ce..5badb0c039d 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c @@ -1276,7 +1276,7 @@ static void create_done(struct ore_io_state *ios, void *p) /* * Set up a new inode and create an object for it on the OSD */ -struct inode *exofs_new_inode(struct inode *dir, int mode) +struct inode *exofs_new_inode(struct inode *dir, umode_t mode) { struct super_block *sb = dir->i_sb; struct exofs_sb_info *sbi = sb->s_fs_info; @@ -1473,7 +1473,7 @@ void exofs_evict_inode(struct inode *inode) goto no_delete; inode->i_size = 0; - end_writeback(inode); + clear_inode(inode); /* if we are deleting an obj that hasn't been created yet, wait. * This also makes sure that create_done cannot be called with an @@ -1503,5 +1503,5 @@ void exofs_evict_inode(struct inode *inode) return; no_delete: - end_writeback(inode); + clear_inode(inode); } diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c index b54c43775f1..fc7161d6bf6 100644 --- a/fs/exofs/namei.c +++ b/fs/exofs/namei.c @@ -59,7 +59,7 @@ static struct dentry *exofs_lookup(struct inode *dir, struct dentry *dentry, return d_splice_alias(inode, dentry); } -static int exofs_create(struct inode *dir, struct dentry *dentry, int mode, +static int exofs_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct nameidata *nd) { struct inode *inode = exofs_new_inode(dir, mode); @@ -74,7 +74,7 @@ static int exofs_create(struct inode *dir, struct dentry *dentry, int mode, return err; } -static int exofs_mknod(struct inode *dir, struct dentry *dentry, int mode, +static int exofs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev) { struct inode *inode; @@ -143,9 +143,6 @@ static int exofs_link(struct dentry *old_dentry, struct inode *dir, { struct inode *inode = old_dentry->d_inode; - if (inode->i_nlink >= EXOFS_LINK_MAX) - return -EMLINK; - inode->i_ctime = CURRENT_TIME; inode_inc_link_count(inode); ihold(inode); @@ -153,13 +150,10 @@ static int exofs_link(struct dentry *old_dentry, struct inode *dir, return exofs_add_nondir(dentry, inode); } -static int exofs_mkdir(struct inode *dir, struct dentry *dentry, int mode) +static int exofs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) { struct inode *inode; - int err = -EMLINK; - - if (dir->i_nlink >= EXOFS_LINK_MAX) - goto out; + int err; inode_inc_link_count(dir); @@ -275,11 +269,6 @@ static int exofs_rename(struct inode *old_dir, struct dentry *old_dentry, if (err) goto out_dir; } else { - if (dir_de) { - err = -EMLINK; - if (new_dir->i_nlink >= EXOFS_LINK_MAX) - goto out_dir; - } err = exofs_add_link(new_dentry, old_inode); if (err) goto out_dir; diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c index d271ad83720..24a49d47e93 100644 --- a/fs/exofs/ore.c +++ b/fs/exofs/ore.c @@ -266,7 +266,7 @@ int ore_get_rw_state(struct ore_layout *layout, struct ore_components *oc, /* first/last seg is split */ num_raid_units += layout->group_width; - sgs_per_dev = div_u64(num_raid_units, data_devs); + sgs_per_dev = div_u64(num_raid_units, data_devs) + 2; } else { /* For Writes add parity pages array. */ max_par_pages = num_raid_units * pages_in_unit * @@ -445,10 +445,10 @@ int ore_check_io(struct ore_io_state *ios, ore_on_dev_error on_dev_error) u64 residual = ios->reading ? or->in.residual : or->out.residual; u64 offset = (ios->offset + ios->length) - residual; - struct ore_dev *od = ios->oc->ods[ - per_dev->dev - ios->oc->first_dev]; + unsigned dev = per_dev->dev - ios->oc->first_dev; + struct ore_dev *od = ios->oc->ods[dev]; - on_dev_error(ios, od, per_dev->dev, osi.osd_err_pri, + on_dev_error(ios, od, dev, osi.osd_err_pri, offset, residual); } if (osi.osd_err_pri >= acumulated_osd_err) { @@ -735,13 +735,7 @@ static int _prepare_for_striping(struct ore_io_state *ios) out: ios->numdevs = devs_in_group; ios->pages_consumed = cur_pg; - if (unlikely(ret)) { - if (length == ios->length) - return ret; - else - ios->length -= length; - } - return 0; + return ret; } int ore_create(struct ore_io_state *ios) diff --git a/fs/exofs/ore_raid.c b/fs/exofs/ore_raid.c index 29c47e5c4a8..5f376d14fdc 100644 --- a/fs/exofs/ore_raid.c +++ b/fs/exofs/ore_raid.c @@ -144,26 +144,26 @@ static void _sp2d_reset(struct __stripe_pages_2d *sp2d, { unsigned data_devs = sp2d->data_devs; unsigned group_width = data_devs + sp2d->parity; - unsigned p; + int p, c; if (!sp2d->needed) return; - for (p = 0; p < sp2d->pages_in_unit; p++) { - struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p]; - - if (_1ps->write_count < group_width) { - unsigned c; + for (c = data_devs - 1; c >= 0; --c) + for (p = sp2d->pages_in_unit - 1; p >= 0; --p) { + struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p]; - for (c = 0; c < data_devs; c++) - if (_1ps->page_is_read[c]) { - struct page *page = _1ps->pages[c]; + if (_1ps->page_is_read[c]) { + struct page *page = _1ps->pages[c]; - r4w->put_page(priv, page); - _1ps->page_is_read[c] = false; - } + r4w->put_page(priv, page); + _1ps->page_is_read[c] = false; + } } + for (p = 0; p < sp2d->pages_in_unit; p++) { + struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p]; + memset(_1ps->pages, 0, group_width * sizeof(*_1ps->pages)); _1ps->write_count = 0; _1ps->tx = NULL; @@ -328,8 +328,8 @@ static int _alloc_read_4_write(struct ore_io_state *ios) /* @si contains info of the to-be-inserted page. Update of @si should be * maintained by caller. Specificaly si->dev, si->obj_offset, ... */ -static int _add_to_read_4_write(struct ore_io_state *ios, - struct ore_striping_info *si, struct page *page) +static int _add_to_r4w(struct ore_io_state *ios, struct ore_striping_info *si, + struct page *page, unsigned pg_len) { struct request_queue *q; struct ore_per_dev_state *per_dev; @@ -366,17 +366,60 @@ static int _add_to_read_4_write(struct ore_io_state *ios, _ore_add_sg_seg(per_dev, gap, true); } q = osd_request_queue(ore_comp_dev(read_ios->oc, per_dev->dev)); - added_len = bio_add_pc_page(q, per_dev->bio, page, PAGE_SIZE, 0); - if (unlikely(added_len != PAGE_SIZE)) { + added_len = bio_add_pc_page(q, per_dev->bio, page, pg_len, + si->obj_offset % PAGE_SIZE); + if (unlikely(added_len != pg_len)) { ORE_DBGMSG("Failed to bio_add_pc_page bi_vcnt=%d\n", per_dev->bio->bi_vcnt); return -ENOMEM; } - per_dev->length += PAGE_SIZE; + per_dev->length += pg_len; return 0; } +/* read the beginning of an unaligned first page */ +static int _add_to_r4w_first_page(struct ore_io_state *ios, struct page *page) +{ + struct ore_striping_info si; + unsigned pg_len; + + ore_calc_stripe_info(ios->layout, ios->offset, 0, &si); + + pg_len = si.obj_offset % PAGE_SIZE; + si.obj_offset -= pg_len; + + ORE_DBGMSG("offset=0x%llx len=0x%x index=0x%lx dev=%x\n", + _LLU(si.obj_offset), pg_len, page->index, si.dev); + + return _add_to_r4w(ios, &si, page, pg_len); +} + +/* read the end of an incomplete last page */ +static int _add_to_r4w_last_page(struct ore_io_state *ios, u64 *offset) +{ + struct ore_striping_info si; + struct page *page; + unsigned pg_len, p, c; + + ore_calc_stripe_info(ios->layout, *offset, 0, &si); + + p = si.unit_off / PAGE_SIZE; + c = _dev_order(ios->layout->group_width * ios->layout->mirrors_p1, + ios->layout->mirrors_p1, si.par_dev, si.dev); + page = ios->sp2d->_1p_stripes[p].pages[c]; + + pg_len = PAGE_SIZE - (si.unit_off % PAGE_SIZE); + *offset += pg_len; + + ORE_DBGMSG("p=%d, c=%d next-offset=0x%llx len=0x%x dev=%x par_dev=%d\n", + p, c, _LLU(*offset), pg_len, si.dev, si.par_dev); + + BUG_ON(!page); + + return _add_to_r4w(ios, &si, page, pg_len); +} + static void _mark_read4write_pages_uptodate(struct ore_io_state *ios, int ret) { struct bio_vec *bv; @@ -418,16 +461,12 @@ static void _mark_read4write_pages_uptodate(struct ore_io_state *ios, int ret) * ios->sp2d[p][*], xor is calculated the same way. These pages are * allocated/freed and don't go through cache */ -static int _read_4_write(struct ore_io_state *ios) +static int _read_4_write_first_stripe(struct ore_io_state *ios) { - struct ore_io_state *ios_read; struct ore_striping_info read_si; struct __stripe_pages_2d *sp2d = ios->sp2d; u64 offset = ios->si.first_stripe_start; - u64 last_stripe_end; - unsigned bytes_in_stripe = ios->si.bytes_in_stripe; - unsigned i, c, p, min_p = sp2d->pages_in_unit, max_p = -1; - int ret; + unsigned c, p, min_p = sp2d->pages_in_unit, max_p = -1; if (offset == ios->offset) /* Go to start collect $200 */ goto read_last_stripe; @@ -435,6 +474,9 @@ static int _read_4_write(struct ore_io_state *ios) min_p = _sp2d_min_pg(sp2d); max_p = _sp2d_max_pg(sp2d); + ORE_DBGMSG("stripe_start=0x%llx ios->offset=0x%llx min_p=%d max_p=%d\n", + offset, ios->offset, min_p, max_p); + for (c = 0; ; c++) { ore_calc_stripe_info(ios->layout, offset, 0, &read_si); read_si.obj_offset += min_p * PAGE_SIZE; @@ -444,9 +486,13 @@ static int _read_4_write(struct ore_io_state *ios) struct page **pp = &_1ps->pages[c]; bool uptodate; - if (*pp) + if (*pp) { + if (ios->offset % PAGE_SIZE) + /* Read the remainder of the page */ + _add_to_r4w_first_page(ios, *pp); /* to-be-written pages start here */ goto read_last_stripe; + } *pp = ios->r4w->get_page(ios->private, offset, &uptodate); @@ -454,7 +500,7 @@ static int _read_4_write(struct ore_io_state *ios) return -ENOMEM; if (!uptodate) - _add_to_read_4_write(ios, &read_si, *pp); + _add_to_r4w(ios, &read_si, *pp, PAGE_SIZE); /* Mark read-pages to be cache_released */ _1ps->page_is_read[c] = true; @@ -465,8 +511,23 @@ static int _read_4_write(struct ore_io_state *ios) } read_last_stripe: - offset = ios->offset + (ios->length + PAGE_SIZE - 1) / - PAGE_SIZE * PAGE_SIZE; + return 0; +} + +static int _read_4_write_last_stripe(struct ore_io_state *ios) +{ + struct ore_striping_info read_si; + struct __stripe_pages_2d *sp2d = ios->sp2d; + u64 offset; + u64 last_stripe_end; + unsigned bytes_in_stripe = ios->si.bytes_in_stripe; + unsigned c, p, min_p = sp2d->pages_in_unit, max_p = -1; + + offset = ios->offset + ios->length; + if (offset % PAGE_SIZE) + _add_to_r4w_last_page(ios, &offset); + /* offset will be aligned to next page */ + last_stripe_end = div_u64(offset + bytes_in_stripe - 1, bytes_in_stripe) * bytes_in_stripe; if (offset == last_stripe_end) /* Optimize for the aligned case */ @@ -477,15 +538,15 @@ read_last_stripe: c = _dev_order(ios->layout->group_width * ios->layout->mirrors_p1, ios->layout->mirrors_p1, read_si.par_dev, read_si.dev); - BUG_ON(ios->si.first_stripe_start + bytes_in_stripe != last_stripe_end); - /* unaligned IO must be within a single stripe */ - if (min_p == sp2d->pages_in_unit) { /* Didn't do it yet */ min_p = _sp2d_min_pg(sp2d); max_p = _sp2d_max_pg(sp2d); } + ORE_DBGMSG("offset=0x%llx stripe_end=0x%llx min_p=%d max_p=%d\n", + offset, last_stripe_end, min_p, max_p); + while (offset < last_stripe_end) { struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p]; @@ -503,7 +564,7 @@ read_last_stripe: /* Mark read-pages to be cache_released */ _1ps->page_is_read[c] = true; if (!uptodate) - _add_to_read_4_write(ios, &read_si, page); + _add_to_r4w(ios, &read_si, page, PAGE_SIZE); } offset += PAGE_SIZE; @@ -518,6 +579,15 @@ read_last_stripe: } read_it: + return 0; +} + +static int _read_4_write_execute(struct ore_io_state *ios) +{ + struct ore_io_state *ios_read; + unsigned i; + int ret; + ios_read = ios->ios_read_4_write; if (!ios_read) return 0; @@ -541,6 +611,8 @@ read_it: } _mark_read4write_pages_uptodate(ios_read, ret); + ore_put_io_state(ios_read); + ios->ios_read_4_write = NULL; /* Might need a reuse at last stripe */ return 0; } @@ -551,7 +623,11 @@ int _ore_add_parity_unit(struct ore_io_state *ios, unsigned cur_len) { if (ios->reading) { - BUG_ON(per_dev->cur_sg >= ios->sgs_per_dev); + if (per_dev->cur_sg >= ios->sgs_per_dev) { + ORE_DBGMSG("cur_sg(%d) >= sgs_per_dev(%d)\n" , + per_dev->cur_sg, ios->sgs_per_dev); + return -ENOMEM; + } _ore_add_sg_seg(per_dev, cur_len, true); } else { struct __stripe_pages_2d *sp2d = ios->sp2d; @@ -572,8 +648,11 @@ int _ore_add_parity_unit(struct ore_io_state *ios, /* If first stripe, Read in all read4write pages * (if needed) before we calculate the first parity. */ - _read_4_write(ios); + _read_4_write_first_stripe(ios); } + if (!cur_len) /* If last stripe r4w pages of last stripe */ + _read_4_write_last_stripe(ios); + _read_4_write_execute(ios); for (i = 0; i < num_pages; i++) { pages[i] = _raid_page_alloc(); @@ -600,36 +679,14 @@ int _ore_add_parity_unit(struct ore_io_state *ios, int _ore_post_alloc_raid_stuff(struct ore_io_state *ios) { - struct ore_layout *layout = ios->layout; - if (ios->parity_pages) { + struct ore_layout *layout = ios->layout; unsigned pages_in_unit = layout->stripe_unit / PAGE_SIZE; - unsigned stripe_size = ios->si.bytes_in_stripe; - u64 last_stripe, first_stripe; if (_sp2d_alloc(pages_in_unit, layout->group_width, layout->parity, &ios->sp2d)) { return -ENOMEM; } - - BUG_ON(ios->offset % PAGE_SIZE); - - /* Round io down to last full strip */ - first_stripe = div_u64(ios->offset, stripe_size); - last_stripe = div_u64(ios->offset + ios->length, stripe_size); - - /* If an IO spans more then a single stripe it must end at - * a stripe boundary. The reminder at the end is pushed into the - * next IO. - */ - if (last_stripe != first_stripe) { - ios->length = last_stripe * stripe_size - ios->offset; - - BUG_ON(!ios->length); - ios->nr_pages = (ios->length + PAGE_SIZE - 1) / - PAGE_SIZE; - ios->si.length = ios->length; /*make it consistent */ - } } return 0; } diff --git a/fs/exofs/super.c b/fs/exofs/super.c index e6085ec192d..433783624d1 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c @@ -166,7 +166,6 @@ static struct inode *exofs_alloc_inode(struct super_block *sb) static void exofs_i_callback(struct rcu_head *head) { struct inode *inode = container_of(head, struct inode, i_rcu); - INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(exofs_inode_cachep, exofs_i(inode)); } @@ -390,7 +389,7 @@ static int exofs_sync_fs(struct super_block *sb, int wait) ios->length = offsetof(struct exofs_fscb, s_dev_table_oid); memset(fscb, 0, ios->length); fscb->s_nextid = cpu_to_le64(sbi->s_nextid); - fscb->s_numfiles = cpu_to_le32(sbi->s_numfiles); + fscb->s_numfiles = cpu_to_le64(sbi->s_numfiles); fscb->s_magic = cpu_to_le16(sb->s_magic); fscb->s_newfs = 0; fscb->s_version = EXOFS_FSCB_VER; @@ -473,6 +472,7 @@ static void exofs_put_super(struct super_block *sb) _exofs_print_device("Unmounting", NULL, ore_comp_dev(&sbi->oc, 0), sbi->one_comp.obj.partition); + exofs_sysfs_sb_del(sbi); bdi_destroy(&sbi->bdi); exofs_free_sbi(sbi); sb->s_fs_info = NULL; @@ -530,7 +530,8 @@ static int exofs_devs_2_odi(struct exofs_dt_device_info *dt_dev, struct osd_dev_info *odi) { odi->systemid_len = le32_to_cpu(dt_dev->systemid_len); - memcpy(odi->systemid, dt_dev->systemid, odi->systemid_len); + if (likely(odi->systemid_len)) + memcpy(odi->systemid, dt_dev->systemid, OSD_SYSTEMID_LEN); odi->osdname_len = le32_to_cpu(dt_dev->osdname_len); odi->osdname = dt_dev->osdname; @@ -566,7 +567,7 @@ int __alloc_dev_table(struct exofs_sb_info *sbi, unsigned numdevs, aoded = kzalloc(sizeof(*aoded), GFP_KERNEL); if (unlikely(!aoded)) { - EXOFS_ERR("ERROR: faild allocating Device array[%d]\n", + EXOFS_ERR("ERROR: failed allocating Device array[%d]\n", numdevs); return -ENOMEM; } @@ -632,6 +633,12 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info *sbi, memcpy(&sbi->oc.ods[numdevs], &sbi->oc.ods[0], (numdevs - 1) * sizeof(sbi->oc.ods[0])); + /* create sysfs subdir under which we put the device table + * And cluster layout. A Superblock is identified by the string: + * "dev[0].osdname"_"pid" + */ + exofs_sysfs_sb_add(sbi, &dt->dt_dev_table[0]); + for (i = 0; i < numdevs; i++) { struct exofs_fscb fscb; struct osd_dev_info odi; @@ -657,6 +664,7 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info *sbi, eds[i].ored.od = fscb_od; ++sbi->oc.numdevs; fscb_od = NULL; + exofs_sysfs_odev_add(&eds[i], sbi); continue; } @@ -682,6 +690,7 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info *sbi, odi.osdname); goto out; } + exofs_sysfs_odev_add(&eds[i], sbi); /* TODO: verify other information is correct and FS-uuid * matches. Benny what did you say about device table @@ -745,7 +754,6 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) sbi->one_comp.obj.partition = opts->pid; sbi->one_comp.obj.id = 0; exofs_make_credential(sbi->one_comp.cred, &sbi->one_comp.obj); - sbi->oc.numdevs = 1; sbi->oc.single_comp = EC_SINGLE_COMP; sbi->oc.comps = &sbi->one_comp; @@ -755,6 +763,7 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) sb->s_blocksize = EXOFS_BLKSIZE; sb->s_blocksize_bits = EXOFS_BLKSHIFT; sb->s_maxbytes = MAX_LFS_FILESIZE; + sb->s_max_links = EXOFS_LINK_MAX; atomic_set(&sbi->s_curr_pending, 0); sb->s_bdev = NULL; sb->s_dev = 0; @@ -803,6 +812,7 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) goto free_sbi; ore_comp_set_dev(&sbi->oc, 0, od); + sbi->oc.numdevs = 1; } __sbi_read_stats(sbi); @@ -819,9 +829,8 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) ret = PTR_ERR(root); goto free_sbi; } - sb->s_root = d_alloc_root(root); + sb->s_root = d_make_root(root); if (!sb->s_root) { - iput(root); EXOFS_ERR("ERROR: get root inode failed\n"); ret = -ENOMEM; goto free_sbi; @@ -839,9 +848,12 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) ret = bdi_setup_and_register(&sbi->bdi, "exofs", BDI_CAP_MAP_COPY); if (ret) { EXOFS_DBGMSG("Failed to bdi_setup_and_register\n"); + dput(sb->s_root); + sb->s_root = NULL; goto free_sbi; } + exofs_sysfs_dbg_print(); _exofs_print_device("Mounting", opts->dev_name, ore_comp_dev(&sbi->oc, 0), sbi->one_comp.obj.partition); @@ -1021,6 +1033,9 @@ static int __init init_exofs(void) if (err) goto out_d; + /* We don't fail if sysfs creation failed */ + exofs_sysfs_init(); + return 0; out_d: destroy_inodecache(); @@ -1030,6 +1045,7 @@ out: static void __exit exit_exofs(void) { + exofs_sysfs_uninit(); unregister_filesystem(&exofs_type); destroy_inodecache(); } diff --git a/fs/exofs/sys.c b/fs/exofs/sys.c new file mode 100644 index 00000000000..5a7b691e748 --- /dev/null +++ b/fs/exofs/sys.c @@ -0,0 +1,200 @@ +/* + * Copyright (C) 2012 + * Sachin Bhamare <sbhamare@panasas.com> + * Boaz Harrosh <bharrosh@panasas.com> + * + * This file is part of exofs. + * + * exofs is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License 2 as published by + * the Free Software Foundation. + * + * exofs is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with exofs; if not, write to the: + * Free Software Foundation <licensing@fsf.org> + */ + +#include <linux/kobject.h> +#include <linux/device.h> + +#include "exofs.h" + +struct odev_attr { + struct attribute attr; + ssize_t (*show)(struct exofs_dev *, char *); + ssize_t (*store)(struct exofs_dev *, const char *, size_t); +}; + +static ssize_t odev_attr_show(struct kobject *kobj, struct attribute *attr, + char *buf) +{ + struct exofs_dev *edp = container_of(kobj, struct exofs_dev, ed_kobj); + struct odev_attr *a = container_of(attr, struct odev_attr, attr); + + return a->show ? a->show(edp, buf) : 0; +} + +static ssize_t odev_attr_store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t len) +{ + struct exofs_dev *edp = container_of(kobj, struct exofs_dev, ed_kobj); + struct odev_attr *a = container_of(attr, struct odev_attr, attr); + + return a->store ? a->store(edp, buf, len) : len; +} + +static const struct sysfs_ops odev_attr_ops = { + .show = odev_attr_show, + .store = odev_attr_store, +}; + + +static struct kset *exofs_kset; + +static ssize_t osdname_show(struct exofs_dev *edp, char *buf) +{ + struct osd_dev *odev = edp->ored.od; + const struct osd_dev_info *odi = osduld_device_info(odev); + + return snprintf(buf, odi->osdname_len + 1, "%s", odi->osdname); +} + +static ssize_t systemid_show(struct exofs_dev *edp, char *buf) +{ + struct osd_dev *odev = edp->ored.od; + const struct osd_dev_info *odi = osduld_device_info(odev); + + memcpy(buf, odi->systemid, odi->systemid_len); + return odi->systemid_len; +} + +static ssize_t uri_show(struct exofs_dev *edp, char *buf) +{ + return snprintf(buf, edp->urilen, "%s", edp->uri); +} + +static ssize_t uri_store(struct exofs_dev *edp, const char *buf, size_t len) +{ + edp->urilen = strlen(buf) + 1; + edp->uri = krealloc(edp->uri, edp->urilen, GFP_KERNEL); + strncpy(edp->uri, buf, edp->urilen); + return edp->urilen; +} + +#define OSD_ATTR(name, mode, show, store) \ + static struct odev_attr odev_attr_##name = \ + __ATTR(name, mode, show, store) + +OSD_ATTR(osdname, S_IRUGO, osdname_show, NULL); +OSD_ATTR(systemid, S_IRUGO, systemid_show, NULL); +OSD_ATTR(uri, S_IRWXU, uri_show, uri_store); + +static struct attribute *odev_attrs[] = { + &odev_attr_osdname.attr, + &odev_attr_systemid.attr, + &odev_attr_uri.attr, + NULL, +}; + +static struct kobj_type odev_ktype = { + .default_attrs = odev_attrs, + .sysfs_ops = &odev_attr_ops, +}; + +static struct kobj_type uuid_ktype = { +}; + +void exofs_sysfs_dbg_print(void) +{ +#ifdef CONFIG_EXOFS_DEBUG + struct kobject *k_name, *k_tmp; + + list_for_each_entry_safe(k_name, k_tmp, &exofs_kset->list, entry) { + printk(KERN_INFO "%s: name %s ref %d\n", + __func__, kobject_name(k_name), + (int)atomic_read(&k_name->kref.refcount)); + } +#endif +} +/* + * This function removes all kobjects under exofs_kset + * At the end of it, exofs_kset kobject will have a refcount + * of 1 which gets decremented only on exofs module unload + */ +void exofs_sysfs_sb_del(struct exofs_sb_info *sbi) +{ + struct kobject *k_name, *k_tmp; + struct kobject *s_kobj = &sbi->s_kobj; + + list_for_each_entry_safe(k_name, k_tmp, &exofs_kset->list, entry) { + /* Remove all that are children of this SBI */ + if (k_name->parent == s_kobj) + kobject_put(k_name); + } + kobject_put(s_kobj); +} + +/* + * This function creates sysfs entries to hold the current exofs cluster + * instance (uniquely identified by osdname,pid tuple). + * This function gets called once per exofs mount instance. + */ +int exofs_sysfs_sb_add(struct exofs_sb_info *sbi, + struct exofs_dt_device_info *dt_dev) +{ + struct kobject *s_kobj; + int retval = 0; + uint64_t pid = sbi->one_comp.obj.partition; + + /* allocate new uuid dirent */ + s_kobj = &sbi->s_kobj; + s_kobj->kset = exofs_kset; + retval = kobject_init_and_add(s_kobj, &uuid_ktype, + &exofs_kset->kobj, "%s_%llx", dt_dev->osdname, pid); + if (retval) { + EXOFS_ERR("ERROR: Failed to create sysfs entry for " + "uuid-%s_%llx => %d\n", dt_dev->osdname, pid, retval); + return -ENOMEM; + } + return 0; +} + +int exofs_sysfs_odev_add(struct exofs_dev *edev, struct exofs_sb_info *sbi) +{ + struct kobject *d_kobj; + int retval = 0; + + /* create osd device group which contains following attributes + * osdname, systemid & uri + */ + d_kobj = &edev->ed_kobj; + d_kobj->kset = exofs_kset; + retval = kobject_init_and_add(d_kobj, &odev_ktype, + &sbi->s_kobj, "dev%u", edev->did); + if (retval) { + EXOFS_ERR("ERROR: Failed to create sysfs entry for " + "device dev%u\n", edev->did); + return retval; + } + return 0; +} + +int exofs_sysfs_init(void) +{ + exofs_kset = kset_create_and_add("exofs", NULL, fs_kobj); + if (!exofs_kset) { + EXOFS_ERR("ERROR: kset_create_and_add exofs failed\n"); + return -ENOMEM; + } + return 0; +} + +void exofs_sysfs_uninit(void) +{ + kset_unregister(exofs_kset); +} |