aboutsummaryrefslogtreecommitdiffstats
path: root/fs/exofs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/exofs')
-rw-r--r--fs/exofs/Kbuild2
-rw-r--r--fs/exofs/Kconfig11
-rw-r--r--fs/exofs/Kconfig.ore12
-rw-r--r--fs/exofs/dir.c6
-rw-r--r--fs/exofs/exofs.h16
-rw-r--r--fs/exofs/inode.c6
-rw-r--r--fs/exofs/namei.c19
-rw-r--r--fs/exofs/ore.c16
-rw-r--r--fs/exofs/ore_raid.c169
-rw-r--r--fs/exofs/super.c30
-rw-r--r--fs/exofs/sys.c200
11 files changed, 379 insertions, 108 deletions
diff --git a/fs/exofs/Kbuild b/fs/exofs/Kbuild
index 352ba149d23..389ba8312d5 100644
--- a/fs/exofs/Kbuild
+++ b/fs/exofs/Kbuild
@@ -16,5 +16,5 @@
libore-y := ore.o ore_raid.o
obj-$(CONFIG_ORE) += libore.o
-exofs-y := inode.o file.o symlink.o namei.o dir.o super.o
+exofs-y := inode.o file.o symlink.o namei.o dir.o super.o sys.o
obj-$(CONFIG_EXOFS_FS) += exofs.o
diff --git a/fs/exofs/Kconfig b/fs/exofs/Kconfig
index da42f32c49b..86194b2f799 100644
--- a/fs/exofs/Kconfig
+++ b/fs/exofs/Kconfig
@@ -1,14 +1,3 @@
-# Note ORE needs to "select ASYNC_XOR". So Not to force multiple selects
-# for every ORE user we do it like this. Any user should add itself here
-# at the "depends on EXOFS_FS || ..." with an ||. The dependencies are
-# selected here, and we default to "ON". So in effect it is like been
-# selected by any of the users.
-config ORE
- tristate
- depends on EXOFS_FS || PNFS_OBJLAYOUT
- select ASYNC_XOR
- default SCSI_OSD_ULD
-
config EXOFS_FS
tristate "exofs: OSD based file system support"
depends on SCSI_OSD_ULD
diff --git a/fs/exofs/Kconfig.ore b/fs/exofs/Kconfig.ore
new file mode 100644
index 00000000000..1ca7fb7b6ba
--- /dev/null
+++ b/fs/exofs/Kconfig.ore
@@ -0,0 +1,12 @@
+# ORE - Objects Raid Engine (libore.ko)
+#
+# Note ORE needs to "select ASYNC_XOR". So Not to force multiple selects
+# for every ORE user we do it like this. Any user should add itself here
+# at the "depends on EXOFS_FS || ..." with an ||. The dependencies are
+# selected here, and we default to "ON". So in effect it is like been
+# selected by any of the users.
+config ORE
+ tristate
+ depends on EXOFS_FS || PNFS_OBJLAYOUT
+ select ASYNC_XOR
+ default SCSI_OSD_ULD
diff --git a/fs/exofs/dir.c b/fs/exofs/dir.c
index d0941c6a1f7..c61e62ac231 100644
--- a/fs/exofs/dir.c
+++ b/fs/exofs/dir.c
@@ -234,7 +234,7 @@ static unsigned char exofs_type_by_mode[S_IFMT >> S_SHIFT] = {
static inline
void exofs_set_de_type(struct exofs_dir_entry *de, struct inode *inode)
{
- mode_t mode = inode->i_mode;
+ umode_t mode = inode->i_mode;
de->file_type = exofs_type_by_mode[(mode & S_IFMT) >> S_SHIFT];
}
@@ -597,7 +597,7 @@ int exofs_make_empty(struct inode *inode, struct inode *parent)
goto fail;
}
- kaddr = kmap_atomic(page, KM_USER0);
+ kaddr = kmap_atomic(page);
de = (struct exofs_dir_entry *)kaddr;
de->name_len = 1;
de->rec_len = cpu_to_le16(EXOFS_DIR_REC_LEN(1));
@@ -611,7 +611,7 @@ int exofs_make_empty(struct inode *inode, struct inode *parent)
de->inode_no = cpu_to_le64(parent->i_ino);
memcpy(de->name, PARENT_DIR, sizeof(PARENT_DIR));
exofs_set_de_type(de, inode);
- kunmap_atomic(kaddr, KM_USER0);
+ kunmap_atomic(kaddr);
err = exofs_commit_chunk(page, 0, chunk_size);
fail:
page_cache_release(page);
diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h
index 51f4b4c40f0..fffe86fd7a4 100644
--- a/fs/exofs/exofs.h
+++ b/fs/exofs/exofs.h
@@ -56,6 +56,9 @@
struct exofs_dev {
struct ore_dev ored;
unsigned did;
+ unsigned urilen;
+ uint8_t *uri;
+ struct kobject ed_kobj;
};
/*
* our extension to the in-memory superblock
@@ -73,6 +76,7 @@ struct exofs_sb_info {
struct ore_layout layout; /* Default files layout */
struct ore_comp one_comp; /* id & cred of partition id=0*/
struct ore_components oc; /* comps for the partition */
+ struct kobject s_kobj; /* holds per-sbi kobject */
};
/*
@@ -154,7 +158,7 @@ int exofs_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata);
extern struct inode *exofs_iget(struct super_block *, unsigned long);
-struct inode *exofs_new_inode(struct inode *, int);
+struct inode *exofs_new_inode(struct inode *, umode_t);
extern int exofs_write_inode(struct inode *, struct writeback_control *wbc);
extern void exofs_evict_inode(struct inode *);
@@ -176,6 +180,16 @@ void exofs_make_credential(u8 cred_a[OSD_CAP_LEN],
const struct osd_obj_id *obj);
int exofs_sbi_write_stats(struct exofs_sb_info *sbi);
+/* sys.c */
+int exofs_sysfs_init(void);
+void exofs_sysfs_uninit(void);
+int exofs_sysfs_sb_add(struct exofs_sb_info *sbi,
+ struct exofs_dt_device_info *dt_dev);
+void exofs_sysfs_sb_del(struct exofs_sb_info *sbi);
+int exofs_sysfs_odev_add(struct exofs_dev *edev,
+ struct exofs_sb_info *sbi);
+void exofs_sysfs_dbg_print(void);
+
/*********************
* operation vectors *
*********************/
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index f6dbf7768ce..5badb0c039d 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -1276,7 +1276,7 @@ static void create_done(struct ore_io_state *ios, void *p)
/*
* Set up a new inode and create an object for it on the OSD
*/
-struct inode *exofs_new_inode(struct inode *dir, int mode)
+struct inode *exofs_new_inode(struct inode *dir, umode_t mode)
{
struct super_block *sb = dir->i_sb;
struct exofs_sb_info *sbi = sb->s_fs_info;
@@ -1473,7 +1473,7 @@ void exofs_evict_inode(struct inode *inode)
goto no_delete;
inode->i_size = 0;
- end_writeback(inode);
+ clear_inode(inode);
/* if we are deleting an obj that hasn't been created yet, wait.
* This also makes sure that create_done cannot be called with an
@@ -1503,5 +1503,5 @@ void exofs_evict_inode(struct inode *inode)
return;
no_delete:
- end_writeback(inode);
+ clear_inode(inode);
}
diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c
index b54c43775f1..fc7161d6bf6 100644
--- a/fs/exofs/namei.c
+++ b/fs/exofs/namei.c
@@ -59,7 +59,7 @@ static struct dentry *exofs_lookup(struct inode *dir, struct dentry *dentry,
return d_splice_alias(inode, dentry);
}
-static int exofs_create(struct inode *dir, struct dentry *dentry, int mode,
+static int exofs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
struct nameidata *nd)
{
struct inode *inode = exofs_new_inode(dir, mode);
@@ -74,7 +74,7 @@ static int exofs_create(struct inode *dir, struct dentry *dentry, int mode,
return err;
}
-static int exofs_mknod(struct inode *dir, struct dentry *dentry, int mode,
+static int exofs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
dev_t rdev)
{
struct inode *inode;
@@ -143,9 +143,6 @@ static int exofs_link(struct dentry *old_dentry, struct inode *dir,
{
struct inode *inode = old_dentry->d_inode;
- if (inode->i_nlink >= EXOFS_LINK_MAX)
- return -EMLINK;
-
inode->i_ctime = CURRENT_TIME;
inode_inc_link_count(inode);
ihold(inode);
@@ -153,13 +150,10 @@ static int exofs_link(struct dentry *old_dentry, struct inode *dir,
return exofs_add_nondir(dentry, inode);
}
-static int exofs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+static int exofs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
{
struct inode *inode;
- int err = -EMLINK;
-
- if (dir->i_nlink >= EXOFS_LINK_MAX)
- goto out;
+ int err;
inode_inc_link_count(dir);
@@ -275,11 +269,6 @@ static int exofs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (err)
goto out_dir;
} else {
- if (dir_de) {
- err = -EMLINK;
- if (new_dir->i_nlink >= EXOFS_LINK_MAX)
- goto out_dir;
- }
err = exofs_add_link(new_dentry, old_inode);
if (err)
goto out_dir;
diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c
index d271ad83720..24a49d47e93 100644
--- a/fs/exofs/ore.c
+++ b/fs/exofs/ore.c
@@ -266,7 +266,7 @@ int ore_get_rw_state(struct ore_layout *layout, struct ore_components *oc,
/* first/last seg is split */
num_raid_units += layout->group_width;
- sgs_per_dev = div_u64(num_raid_units, data_devs);
+ sgs_per_dev = div_u64(num_raid_units, data_devs) + 2;
} else {
/* For Writes add parity pages array. */
max_par_pages = num_raid_units * pages_in_unit *
@@ -445,10 +445,10 @@ int ore_check_io(struct ore_io_state *ios, ore_on_dev_error on_dev_error)
u64 residual = ios->reading ?
or->in.residual : or->out.residual;
u64 offset = (ios->offset + ios->length) - residual;
- struct ore_dev *od = ios->oc->ods[
- per_dev->dev - ios->oc->first_dev];
+ unsigned dev = per_dev->dev - ios->oc->first_dev;
+ struct ore_dev *od = ios->oc->ods[dev];
- on_dev_error(ios, od, per_dev->dev, osi.osd_err_pri,
+ on_dev_error(ios, od, dev, osi.osd_err_pri,
offset, residual);
}
if (osi.osd_err_pri >= acumulated_osd_err) {
@@ -735,13 +735,7 @@ static int _prepare_for_striping(struct ore_io_state *ios)
out:
ios->numdevs = devs_in_group;
ios->pages_consumed = cur_pg;
- if (unlikely(ret)) {
- if (length == ios->length)
- return ret;
- else
- ios->length -= length;
- }
- return 0;
+ return ret;
}
int ore_create(struct ore_io_state *ios)
diff --git a/fs/exofs/ore_raid.c b/fs/exofs/ore_raid.c
index 29c47e5c4a8..5f376d14fdc 100644
--- a/fs/exofs/ore_raid.c
+++ b/fs/exofs/ore_raid.c
@@ -144,26 +144,26 @@ static void _sp2d_reset(struct __stripe_pages_2d *sp2d,
{
unsigned data_devs = sp2d->data_devs;
unsigned group_width = data_devs + sp2d->parity;
- unsigned p;
+ int p, c;
if (!sp2d->needed)
return;
- for (p = 0; p < sp2d->pages_in_unit; p++) {
- struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p];
-
- if (_1ps->write_count < group_width) {
- unsigned c;
+ for (c = data_devs - 1; c >= 0; --c)
+ for (p = sp2d->pages_in_unit - 1; p >= 0; --p) {
+ struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p];
- for (c = 0; c < data_devs; c++)
- if (_1ps->page_is_read[c]) {
- struct page *page = _1ps->pages[c];
+ if (_1ps->page_is_read[c]) {
+ struct page *page = _1ps->pages[c];
- r4w->put_page(priv, page);
- _1ps->page_is_read[c] = false;
- }
+ r4w->put_page(priv, page);
+ _1ps->page_is_read[c] = false;
+ }
}
+ for (p = 0; p < sp2d->pages_in_unit; p++) {
+ struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p];
+
memset(_1ps->pages, 0, group_width * sizeof(*_1ps->pages));
_1ps->write_count = 0;
_1ps->tx = NULL;
@@ -328,8 +328,8 @@ static int _alloc_read_4_write(struct ore_io_state *ios)
/* @si contains info of the to-be-inserted page. Update of @si should be
* maintained by caller. Specificaly si->dev, si->obj_offset, ...
*/
-static int _add_to_read_4_write(struct ore_io_state *ios,
- struct ore_striping_info *si, struct page *page)
+static int _add_to_r4w(struct ore_io_state *ios, struct ore_striping_info *si,
+ struct page *page, unsigned pg_len)
{
struct request_queue *q;
struct ore_per_dev_state *per_dev;
@@ -366,17 +366,60 @@ static int _add_to_read_4_write(struct ore_io_state *ios,
_ore_add_sg_seg(per_dev, gap, true);
}
q = osd_request_queue(ore_comp_dev(read_ios->oc, per_dev->dev));
- added_len = bio_add_pc_page(q, per_dev->bio, page, PAGE_SIZE, 0);
- if (unlikely(added_len != PAGE_SIZE)) {
+ added_len = bio_add_pc_page(q, per_dev->bio, page, pg_len,
+ si->obj_offset % PAGE_SIZE);
+ if (unlikely(added_len != pg_len)) {
ORE_DBGMSG("Failed to bio_add_pc_page bi_vcnt=%d\n",
per_dev->bio->bi_vcnt);
return -ENOMEM;
}
- per_dev->length += PAGE_SIZE;
+ per_dev->length += pg_len;
return 0;
}
+/* read the beginning of an unaligned first page */
+static int _add_to_r4w_first_page(struct ore_io_state *ios, struct page *page)
+{
+ struct ore_striping_info si;
+ unsigned pg_len;
+
+ ore_calc_stripe_info(ios->layout, ios->offset, 0, &si);
+
+ pg_len = si.obj_offset % PAGE_SIZE;
+ si.obj_offset -= pg_len;
+
+ ORE_DBGMSG("offset=0x%llx len=0x%x index=0x%lx dev=%x\n",
+ _LLU(si.obj_offset), pg_len, page->index, si.dev);
+
+ return _add_to_r4w(ios, &si, page, pg_len);
+}
+
+/* read the end of an incomplete last page */
+static int _add_to_r4w_last_page(struct ore_io_state *ios, u64 *offset)
+{
+ struct ore_striping_info si;
+ struct page *page;
+ unsigned pg_len, p, c;
+
+ ore_calc_stripe_info(ios->layout, *offset, 0, &si);
+
+ p = si.unit_off / PAGE_SIZE;
+ c = _dev_order(ios->layout->group_width * ios->layout->mirrors_p1,
+ ios->layout->mirrors_p1, si.par_dev, si.dev);
+ page = ios->sp2d->_1p_stripes[p].pages[c];
+
+ pg_len = PAGE_SIZE - (si.unit_off % PAGE_SIZE);
+ *offset += pg_len;
+
+ ORE_DBGMSG("p=%d, c=%d next-offset=0x%llx len=0x%x dev=%x par_dev=%d\n",
+ p, c, _LLU(*offset), pg_len, si.dev, si.par_dev);
+
+ BUG_ON(!page);
+
+ return _add_to_r4w(ios, &si, page, pg_len);
+}
+
static void _mark_read4write_pages_uptodate(struct ore_io_state *ios, int ret)
{
struct bio_vec *bv;
@@ -418,16 +461,12 @@ static void _mark_read4write_pages_uptodate(struct ore_io_state *ios, int ret)
* ios->sp2d[p][*], xor is calculated the same way. These pages are
* allocated/freed and don't go through cache
*/
-static int _read_4_write(struct ore_io_state *ios)
+static int _read_4_write_first_stripe(struct ore_io_state *ios)
{
- struct ore_io_state *ios_read;
struct ore_striping_info read_si;
struct __stripe_pages_2d *sp2d = ios->sp2d;
u64 offset = ios->si.first_stripe_start;
- u64 last_stripe_end;
- unsigned bytes_in_stripe = ios->si.bytes_in_stripe;
- unsigned i, c, p, min_p = sp2d->pages_in_unit, max_p = -1;
- int ret;
+ unsigned c, p, min_p = sp2d->pages_in_unit, max_p = -1;
if (offset == ios->offset) /* Go to start collect $200 */
goto read_last_stripe;
@@ -435,6 +474,9 @@ static int _read_4_write(struct ore_io_state *ios)
min_p = _sp2d_min_pg(sp2d);
max_p = _sp2d_max_pg(sp2d);
+ ORE_DBGMSG("stripe_start=0x%llx ios->offset=0x%llx min_p=%d max_p=%d\n",
+ offset, ios->offset, min_p, max_p);
+
for (c = 0; ; c++) {
ore_calc_stripe_info(ios->layout, offset, 0, &read_si);
read_si.obj_offset += min_p * PAGE_SIZE;
@@ -444,9 +486,13 @@ static int _read_4_write(struct ore_io_state *ios)
struct page **pp = &_1ps->pages[c];
bool uptodate;
- if (*pp)
+ if (*pp) {
+ if (ios->offset % PAGE_SIZE)
+ /* Read the remainder of the page */
+ _add_to_r4w_first_page(ios, *pp);
/* to-be-written pages start here */
goto read_last_stripe;
+ }
*pp = ios->r4w->get_page(ios->private, offset,
&uptodate);
@@ -454,7 +500,7 @@ static int _read_4_write(struct ore_io_state *ios)
return -ENOMEM;
if (!uptodate)
- _add_to_read_4_write(ios, &read_si, *pp);
+ _add_to_r4w(ios, &read_si, *pp, PAGE_SIZE);
/* Mark read-pages to be cache_released */
_1ps->page_is_read[c] = true;
@@ -465,8 +511,23 @@ static int _read_4_write(struct ore_io_state *ios)
}
read_last_stripe:
- offset = ios->offset + (ios->length + PAGE_SIZE - 1) /
- PAGE_SIZE * PAGE_SIZE;
+ return 0;
+}
+
+static int _read_4_write_last_stripe(struct ore_io_state *ios)
+{
+ struct ore_striping_info read_si;
+ struct __stripe_pages_2d *sp2d = ios->sp2d;
+ u64 offset;
+ u64 last_stripe_end;
+ unsigned bytes_in_stripe = ios->si.bytes_in_stripe;
+ unsigned c, p, min_p = sp2d->pages_in_unit, max_p = -1;
+
+ offset = ios->offset + ios->length;
+ if (offset % PAGE_SIZE)
+ _add_to_r4w_last_page(ios, &offset);
+ /* offset will be aligned to next page */
+
last_stripe_end = div_u64(offset + bytes_in_stripe - 1, bytes_in_stripe)
* bytes_in_stripe;
if (offset == last_stripe_end) /* Optimize for the aligned case */
@@ -477,15 +538,15 @@ read_last_stripe:
c = _dev_order(ios->layout->group_width * ios->layout->mirrors_p1,
ios->layout->mirrors_p1, read_si.par_dev, read_si.dev);
- BUG_ON(ios->si.first_stripe_start + bytes_in_stripe != last_stripe_end);
- /* unaligned IO must be within a single stripe */
-
if (min_p == sp2d->pages_in_unit) {
/* Didn't do it yet */
min_p = _sp2d_min_pg(sp2d);
max_p = _sp2d_max_pg(sp2d);
}
+ ORE_DBGMSG("offset=0x%llx stripe_end=0x%llx min_p=%d max_p=%d\n",
+ offset, last_stripe_end, min_p, max_p);
+
while (offset < last_stripe_end) {
struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p];
@@ -503,7 +564,7 @@ read_last_stripe:
/* Mark read-pages to be cache_released */
_1ps->page_is_read[c] = true;
if (!uptodate)
- _add_to_read_4_write(ios, &read_si, page);
+ _add_to_r4w(ios, &read_si, page, PAGE_SIZE);
}
offset += PAGE_SIZE;
@@ -518,6 +579,15 @@ read_last_stripe:
}
read_it:
+ return 0;
+}
+
+static int _read_4_write_execute(struct ore_io_state *ios)
+{
+ struct ore_io_state *ios_read;
+ unsigned i;
+ int ret;
+
ios_read = ios->ios_read_4_write;
if (!ios_read)
return 0;
@@ -541,6 +611,8 @@ read_it:
}
_mark_read4write_pages_uptodate(ios_read, ret);
+ ore_put_io_state(ios_read);
+ ios->ios_read_4_write = NULL; /* Might need a reuse at last stripe */
return 0;
}
@@ -551,7 +623,11 @@ int _ore_add_parity_unit(struct ore_io_state *ios,
unsigned cur_len)
{
if (ios->reading) {
- BUG_ON(per_dev->cur_sg >= ios->sgs_per_dev);
+ if (per_dev->cur_sg >= ios->sgs_per_dev) {
+ ORE_DBGMSG("cur_sg(%d) >= sgs_per_dev(%d)\n" ,
+ per_dev->cur_sg, ios->sgs_per_dev);
+ return -ENOMEM;
+ }
_ore_add_sg_seg(per_dev, cur_len, true);
} else {
struct __stripe_pages_2d *sp2d = ios->sp2d;
@@ -572,8 +648,11 @@ int _ore_add_parity_unit(struct ore_io_state *ios,
/* If first stripe, Read in all read4write pages
* (if needed) before we calculate the first parity.
*/
- _read_4_write(ios);
+ _read_4_write_first_stripe(ios);
}
+ if (!cur_len) /* If last stripe r4w pages of last stripe */
+ _read_4_write_last_stripe(ios);
+ _read_4_write_execute(ios);
for (i = 0; i < num_pages; i++) {
pages[i] = _raid_page_alloc();
@@ -600,36 +679,14 @@ int _ore_add_parity_unit(struct ore_io_state *ios,
int _ore_post_alloc_raid_stuff(struct ore_io_state *ios)
{
- struct ore_layout *layout = ios->layout;
-
if (ios->parity_pages) {
+ struct ore_layout *layout = ios->layout;
unsigned pages_in_unit = layout->stripe_unit / PAGE_SIZE;
- unsigned stripe_size = ios->si.bytes_in_stripe;
- u64 last_stripe, first_stripe;
if (_sp2d_alloc(pages_in_unit, layout->group_width,
layout->parity, &ios->sp2d)) {
return -ENOMEM;
}
-
- BUG_ON(ios->offset % PAGE_SIZE);
-
- /* Round io down to last full strip */
- first_stripe = div_u64(ios->offset, stripe_size);
- last_stripe = div_u64(ios->offset + ios->length, stripe_size);
-
- /* If an IO spans more then a single stripe it must end at
- * a stripe boundary. The reminder at the end is pushed into the
- * next IO.
- */
- if (last_stripe != first_stripe) {
- ios->length = last_stripe * stripe_size - ios->offset;
-
- BUG_ON(!ios->length);
- ios->nr_pages = (ios->length + PAGE_SIZE - 1) /
- PAGE_SIZE;
- ios->si.length = ios->length; /*make it consistent */
- }
}
return 0;
}
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index e6085ec192d..433783624d1 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -166,7 +166,6 @@ static struct inode *exofs_alloc_inode(struct super_block *sb)
static void exofs_i_callback(struct rcu_head *head)
{
struct inode *inode = container_of(head, struct inode, i_rcu);
- INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(exofs_inode_cachep, exofs_i(inode));
}
@@ -390,7 +389,7 @@ static int exofs_sync_fs(struct super_block *sb, int wait)
ios->length = offsetof(struct exofs_fscb, s_dev_table_oid);
memset(fscb, 0, ios->length);
fscb->s_nextid = cpu_to_le64(sbi->s_nextid);
- fscb->s_numfiles = cpu_to_le32(sbi->s_numfiles);
+ fscb->s_numfiles = cpu_to_le64(sbi->s_numfiles);
fscb->s_magic = cpu_to_le16(sb->s_magic);
fscb->s_newfs = 0;
fscb->s_version = EXOFS_FSCB_VER;
@@ -473,6 +472,7 @@ static void exofs_put_super(struct super_block *sb)
_exofs_print_device("Unmounting", NULL, ore_comp_dev(&sbi->oc, 0),
sbi->one_comp.obj.partition);
+ exofs_sysfs_sb_del(sbi);
bdi_destroy(&sbi->bdi);
exofs_free_sbi(sbi);
sb->s_fs_info = NULL;
@@ -530,7 +530,8 @@ static int exofs_devs_2_odi(struct exofs_dt_device_info *dt_dev,
struct osd_dev_info *odi)
{
odi->systemid_len = le32_to_cpu(dt_dev->systemid_len);
- memcpy(odi->systemid, dt_dev->systemid, odi->systemid_len);
+ if (likely(odi->systemid_len))
+ memcpy(odi->systemid, dt_dev->systemid, OSD_SYSTEMID_LEN);
odi->osdname_len = le32_to_cpu(dt_dev->osdname_len);
odi->osdname = dt_dev->osdname;
@@ -566,7 +567,7 @@ int __alloc_dev_table(struct exofs_sb_info *sbi, unsigned numdevs,
aoded = kzalloc(sizeof(*aoded), GFP_KERNEL);
if (unlikely(!aoded)) {
- EXOFS_ERR("ERROR: faild allocating Device array[%d]\n",
+ EXOFS_ERR("ERROR: failed allocating Device array[%d]\n",
numdevs);
return -ENOMEM;
}
@@ -632,6 +633,12 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info *sbi,
memcpy(&sbi->oc.ods[numdevs], &sbi->oc.ods[0],
(numdevs - 1) * sizeof(sbi->oc.ods[0]));
+ /* create sysfs subdir under which we put the device table
+ * And cluster layout. A Superblock is identified by the string:
+ * "dev[0].osdname"_"pid"
+ */
+ exofs_sysfs_sb_add(sbi, &dt->dt_dev_table[0]);
+
for (i = 0; i < numdevs; i++) {
struct exofs_fscb fscb;
struct osd_dev_info odi;
@@ -657,6 +664,7 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info *sbi,
eds[i].ored.od = fscb_od;
++sbi->oc.numdevs;
fscb_od = NULL;
+ exofs_sysfs_odev_add(&eds[i], sbi);
continue;
}
@@ -682,6 +690,7 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info *sbi,
odi.osdname);
goto out;
}
+ exofs_sysfs_odev_add(&eds[i], sbi);
/* TODO: verify other information is correct and FS-uuid
* matches. Benny what did you say about device table
@@ -745,7 +754,6 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
sbi->one_comp.obj.partition = opts->pid;
sbi->one_comp.obj.id = 0;
exofs_make_credential(sbi->one_comp.cred, &sbi->one_comp.obj);
- sbi->oc.numdevs = 1;
sbi->oc.single_comp = EC_SINGLE_COMP;
sbi->oc.comps = &sbi->one_comp;
@@ -755,6 +763,7 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
sb->s_blocksize = EXOFS_BLKSIZE;
sb->s_blocksize_bits = EXOFS_BLKSHIFT;
sb->s_maxbytes = MAX_LFS_FILESIZE;
+ sb->s_max_links = EXOFS_LINK_MAX;
atomic_set(&sbi->s_curr_pending, 0);
sb->s_bdev = NULL;
sb->s_dev = 0;
@@ -803,6 +812,7 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
goto free_sbi;
ore_comp_set_dev(&sbi->oc, 0, od);
+ sbi->oc.numdevs = 1;
}
__sbi_read_stats(sbi);
@@ -819,9 +829,8 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
ret = PTR_ERR(root);
goto free_sbi;
}
- sb->s_root = d_alloc_root(root);
+ sb->s_root = d_make_root(root);
if (!sb->s_root) {
- iput(root);
EXOFS_ERR("ERROR: get root inode failed\n");
ret = -ENOMEM;
goto free_sbi;
@@ -839,9 +848,12 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
ret = bdi_setup_and_register(&sbi->bdi, "exofs", BDI_CAP_MAP_COPY);
if (ret) {
EXOFS_DBGMSG("Failed to bdi_setup_and_register\n");
+ dput(sb->s_root);
+ sb->s_root = NULL;
goto free_sbi;
}
+ exofs_sysfs_dbg_print();
_exofs_print_device("Mounting", opts->dev_name,
ore_comp_dev(&sbi->oc, 0),
sbi->one_comp.obj.partition);
@@ -1021,6 +1033,9 @@ static int __init init_exofs(void)
if (err)
goto out_d;
+ /* We don't fail if sysfs creation failed */
+ exofs_sysfs_init();
+
return 0;
out_d:
destroy_inodecache();
@@ -1030,6 +1045,7 @@ out:
static void __exit exit_exofs(void)
{
+ exofs_sysfs_uninit();
unregister_filesystem(&exofs_type);
destroy_inodecache();
}
diff --git a/fs/exofs/sys.c b/fs/exofs/sys.c
new file mode 100644
index 00000000000..5a7b691e748
--- /dev/null
+++ b/fs/exofs/sys.c
@@ -0,0 +1,200 @@
+/*
+ * Copyright (C) 2012
+ * Sachin Bhamare <sbhamare@panasas.com>
+ * Boaz Harrosh <bharrosh@panasas.com>
+ *
+ * This file is part of exofs.
+ *
+ * exofs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License 2 as published by
+ * the Free Software Foundation.
+ *
+ * exofs is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with exofs; if not, write to the:
+ * Free Software Foundation <licensing@fsf.org>
+ */
+
+#include <linux/kobject.h>
+#include <linux/device.h>
+
+#include "exofs.h"
+
+struct odev_attr {
+ struct attribute attr;
+ ssize_t (*show)(struct exofs_dev *, char *);
+ ssize_t (*store)(struct exofs_dev *, const char *, size_t);
+};
+
+static ssize_t odev_attr_show(struct kobject *kobj, struct attribute *attr,
+ char *buf)
+{
+ struct exofs_dev *edp = container_of(kobj, struct exofs_dev, ed_kobj);
+ struct odev_attr *a = container_of(attr, struct odev_attr, attr);
+
+ return a->show ? a->show(edp, buf) : 0;
+}
+
+static ssize_t odev_attr_store(struct kobject *kobj, struct attribute *attr,
+ const char *buf, size_t len)
+{
+ struct exofs_dev *edp = container_of(kobj, struct exofs_dev, ed_kobj);
+ struct odev_attr *a = container_of(attr, struct odev_attr, attr);
+
+ return a->store ? a->store(edp, buf, len) : len;
+}
+
+static const struct sysfs_ops odev_attr_ops = {
+ .show = odev_attr_show,
+ .store = odev_attr_store,
+};
+
+
+static struct kset *exofs_kset;
+
+static ssize_t osdname_show(struct exofs_dev *edp, char *buf)
+{
+ struct osd_dev *odev = edp->ored.od;
+ const struct osd_dev_info *odi = osduld_device_info(odev);
+
+ return snprintf(buf, odi->osdname_len + 1, "%s", odi->osdname);
+}
+
+static ssize_t systemid_show(struct exofs_dev *edp, char *buf)
+{
+ struct osd_dev *odev = edp->ored.od;
+ const struct osd_dev_info *odi = osduld_device_info(odev);
+
+ memcpy(buf, odi->systemid, odi->systemid_len);
+ return odi->systemid_len;
+}
+
+static ssize_t uri_show(struct exofs_dev *edp, char *buf)
+{
+ return snprintf(buf, edp->urilen, "%s", edp->uri);
+}
+
+static ssize_t uri_store(struct exofs_dev *edp, const char *buf, size_t len)
+{
+ edp->urilen = strlen(buf) + 1;
+ edp->uri = krealloc(edp->uri, edp->urilen, GFP_KERNEL);
+ strncpy(edp->uri, buf, edp->urilen);
+ return edp->urilen;
+}
+
+#define OSD_ATTR(name, mode, show, store) \
+ static struct odev_attr odev_attr_##name = \
+ __ATTR(name, mode, show, store)
+
+OSD_ATTR(osdname, S_IRUGO, osdname_show, NULL);
+OSD_ATTR(systemid, S_IRUGO, systemid_show, NULL);
+OSD_ATTR(uri, S_IRWXU, uri_show, uri_store);
+
+static struct attribute *odev_attrs[] = {
+ &odev_attr_osdname.attr,
+ &odev_attr_systemid.attr,
+ &odev_attr_uri.attr,
+ NULL,
+};
+
+static struct kobj_type odev_ktype = {
+ .default_attrs = odev_attrs,
+ .sysfs_ops = &odev_attr_ops,
+};
+
+static struct kobj_type uuid_ktype = {
+};
+
+void exofs_sysfs_dbg_print(void)
+{
+#ifdef CONFIG_EXOFS_DEBUG
+ struct kobject *k_name, *k_tmp;
+
+ list_for_each_entry_safe(k_name, k_tmp, &exofs_kset->list, entry) {
+ printk(KERN_INFO "%s: name %s ref %d\n",
+ __func__, kobject_name(k_name),
+ (int)atomic_read(&k_name->kref.refcount));
+ }
+#endif
+}
+/*
+ * This function removes all kobjects under exofs_kset
+ * At the end of it, exofs_kset kobject will have a refcount
+ * of 1 which gets decremented only on exofs module unload
+ */
+void exofs_sysfs_sb_del(struct exofs_sb_info *sbi)
+{
+ struct kobject *k_name, *k_tmp;
+ struct kobject *s_kobj = &sbi->s_kobj;
+
+ list_for_each_entry_safe(k_name, k_tmp, &exofs_kset->list, entry) {
+ /* Remove all that are children of this SBI */
+ if (k_name->parent == s_kobj)
+ kobject_put(k_name);
+ }
+ kobject_put(s_kobj);
+}
+
+/*
+ * This function creates sysfs entries to hold the current exofs cluster
+ * instance (uniquely identified by osdname,pid tuple).
+ * This function gets called once per exofs mount instance.
+ */
+int exofs_sysfs_sb_add(struct exofs_sb_info *sbi,
+ struct exofs_dt_device_info *dt_dev)
+{
+ struct kobject *s_kobj;
+ int retval = 0;
+ uint64_t pid = sbi->one_comp.obj.partition;
+
+ /* allocate new uuid dirent */
+ s_kobj = &sbi->s_kobj;
+ s_kobj->kset = exofs_kset;
+ retval = kobject_init_and_add(s_kobj, &uuid_ktype,
+ &exofs_kset->kobj, "%s_%llx", dt_dev->osdname, pid);
+ if (retval) {
+ EXOFS_ERR("ERROR: Failed to create sysfs entry for "
+ "uuid-%s_%llx => %d\n", dt_dev->osdname, pid, retval);
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+int exofs_sysfs_odev_add(struct exofs_dev *edev, struct exofs_sb_info *sbi)
+{
+ struct kobject *d_kobj;
+ int retval = 0;
+
+ /* create osd device group which contains following attributes
+ * osdname, systemid & uri
+ */
+ d_kobj = &edev->ed_kobj;
+ d_kobj->kset = exofs_kset;
+ retval = kobject_init_and_add(d_kobj, &odev_ktype,
+ &sbi->s_kobj, "dev%u", edev->did);
+ if (retval) {
+ EXOFS_ERR("ERROR: Failed to create sysfs entry for "
+ "device dev%u\n", edev->did);
+ return retval;
+ }
+ return 0;
+}
+
+int exofs_sysfs_init(void)
+{
+ exofs_kset = kset_create_and_add("exofs", NULL, fs_kobj);
+ if (!exofs_kset) {
+ EXOFS_ERR("ERROR: kset_create_and_add exofs failed\n");
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+void exofs_sysfs_uninit(void)
+{
+ kset_unregister(exofs_kset);
+}