From c4d6d8dbf335c7fa47341654a37c53a512b519bb Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 20 Dec 2012 21:52:32 +0000 Subject: CacheFiles: Fix the marking of cached pages Under some circumstances CacheFiles defers the marking of pages with PG_fscache so that it can take advantage of pagevecs to reduce the number of calls to fscache_mark_pages_cached() and the netfs's hook to keep track of this. There are, however, two problems with this: (1) It can lead to the PG_fscache mark being applied _after_ the page is set PG_uptodate and unlocked (by the call to fscache_end_io()). (2) CacheFiles's ref on the page is dropped immediately following fscache_end_io() - and so may not still be held when the mark is applied. This can lead to the page being passed back to the allocator before the mark is applied. Fix this by, where appropriate, marking the page before calling fscache_end_io() and releasing the page. This means that we can't take advantage of pagevecs and have to make a separate call for each page to the marking routines. The symptoms of this are Bad Page state errors cropping up under memory pressure, for example: BUG: Bad page state in process tar pfn:002da page:ffffea0000009fb0 count:0 mapcount:0 mapping: (null) index:0x1447 page flags: 0x1000(private_2) Pid: 4574, comm: tar Tainted: G W 3.1.0-rc4-fsdevel+ #1064 Call Trace: [] ? dump_page+0xb9/0xbe [] bad_page+0xd5/0xea [] get_page_from_freelist+0x35b/0x46a [] __alloc_pages_nodemask+0x362/0x662 [] __do_page_cache_readahead+0x13a/0x267 [] ? __do_page_cache_readahead+0xa2/0x267 [] ra_submit+0x1c/0x20 [] ondemand_readahead+0x28b/0x29a [] ? ondemand_readahead+0x163/0x29a [] page_cache_sync_readahead+0x38/0x3a [] generic_file_aio_read+0x2ab/0x67e [] nfs_file_read+0xa4/0xc9 [nfs] [] do_sync_read+0xba/0xfa [] ? security_file_permission+0x7b/0x84 [] ? rw_verify_area+0xab/0xc8 [] vfs_read+0xaa/0x13a [] sys_read+0x45/0x6c [] system_call_fastpath+0x16/0x1b As can be seen, PG_private_2 (== PG_fscache) is set in the page flags. Instrumenting fscache_mark_pages_cached() to verify whether page->mapping was set appropriately showed that sometimes it wasn't. This led to the discovery that sometimes the page has apparently been reclaimed by the time the marker got to see it. Reported-by: M. Stevens Signed-off-by: David Howells Reviewed-by: Jeff Layton --- fs/fscache/page.c | 59 +++++++++++++++++++++++++++++++++---------------------- 1 file changed, 36 insertions(+), 23 deletions(-) (limited to 'fs/fscache') diff --git a/fs/fscache/page.c b/fs/fscache/page.c index 3f7a59bfa7a..d7c663cfc92 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -914,6 +914,40 @@ done: } EXPORT_SYMBOL(__fscache_uncache_page); +/** + * fscache_mark_page_cached - Mark a page as being cached + * @op: The retrieval op pages are being marked for + * @page: The page to be marked + * + * Mark a netfs page as being cached. After this is called, the netfs + * must call fscache_uncache_page() to remove the mark. + */ +void fscache_mark_page_cached(struct fscache_retrieval *op, struct page *page) +{ + struct fscache_cookie *cookie = op->op.object->cookie; + +#ifdef CONFIG_FSCACHE_STATS + atomic_inc(&fscache_n_marks); +#endif + + _debug("- mark %p{%lx}", page, page->index); + if (TestSetPageFsCache(page)) { + static bool once_only; + if (!once_only) { + once_only = true; + printk(KERN_WARNING "FS-Cache:" + " Cookie type %s marked page %lx" + " multiple times\n", + cookie->def->name, page->index); + } + } + + if (cookie->def->mark_page_cached) + cookie->def->mark_page_cached(cookie->netfs_data, + op->mapping, page); +} +EXPORT_SYMBOL(fscache_mark_page_cached); + /** * fscache_mark_pages_cached - Mark pages as being cached * @op: The retrieval op pages are being marked for @@ -925,32 +959,11 @@ EXPORT_SYMBOL(__fscache_uncache_page); void fscache_mark_pages_cached(struct fscache_retrieval *op, struct pagevec *pagevec) { - struct fscache_cookie *cookie = op->op.object->cookie; unsigned long loop; -#ifdef CONFIG_FSCACHE_STATS - atomic_add(pagevec->nr, &fscache_n_marks); -#endif - - for (loop = 0; loop < pagevec->nr; loop++) { - struct page *page = pagevec->pages[loop]; - - _debug("- mark %p{%lx}", page, page->index); - if (TestSetPageFsCache(page)) { - static bool once_only; - if (!once_only) { - once_only = true; - printk(KERN_WARNING "FS-Cache:" - " Cookie type %s marked page %lx" - " multiple times\n", - cookie->def->name, page->index); - } - } - } + for (loop = 0; loop < pagevec->nr; loop++) + fscache_mark_page_cached(op, pagevec->pages[loop]); - if (cookie->def->mark_pages_cached) - cookie->def->mark_pages_cached(cookie->netfs_data, - op->mapping, pagevec); pagevec_reinit(pagevec); } EXPORT_SYMBOL(fscache_mark_pages_cached); -- cgit v1.2.3 From 5f4f9f4af185d5e76c966d2d3420a61870c856e7 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 20 Dec 2012 21:52:33 +0000 Subject: CacheFiles: Downgrade the requirements passed to the allocator Downgrade the requirements passed to the allocator in the gfp flags parameter. FS-Cache/CacheFiles can handle OOM conditions simply by aborting the attempt to store an object or a page in the cache. Signed-off-by: David Howells --- fs/fscache/page.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/fscache') diff --git a/fs/fscache/page.c b/fs/fscache/page.c index d7c663cfc92..248a12e2253 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -759,7 +759,7 @@ int __fscache_write_page(struct fscache_cookie *cookie, fscache_stat(&fscache_n_stores); - op = kzalloc(sizeof(*op), GFP_NOIO); + op = kzalloc(sizeof(*op), GFP_NOIO | __GFP_NOMEMALLOC | __GFP_NORETRY); if (!op) goto nomem; -- cgit v1.2.3 From 0f972b5696c0a0677a9b3a18fee45cc0e8de4184 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 20 Dec 2012 21:52:33 +0000 Subject: FS-Cache: Check that there are no read ops when cookie relinquished Check that the netfs isn't trying to relinquish a cookie that still has read operations in progress upon it. If there are, then give log a warning and BUG. Signed-off-by: David Howells --- fs/fscache/cookie.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'fs/fscache') diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c index 990535071a8..0666996adf8 100644 --- a/fs/fscache/cookie.c +++ b/fs/fscache/cookie.c @@ -452,6 +452,14 @@ void __fscache_relinquish_cookie(struct fscache_cookie *cookie, int retire) _debug("RELEASE OBJ%x", object->debug_id); + if (atomic_read(&object->n_reads)) { + spin_unlock(&cookie->lock); + printk(KERN_ERR "FS-Cache:" + " Cookie '%s' still has %d outstanding reads\n", + cookie->def->name, atomic_read(&object->n_reads)); + BUG(); + } + /* detach each cache object from the object cookie */ spin_lock(&object->lock); hlist_del_init(&object->cookie_link); -- cgit v1.2.3 From ef46ed888efb1e8da33be5d33c9b54476289a43b Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 20 Dec 2012 21:52:35 +0000 Subject: FS-Cache: Make cookie relinquishment wait for outstanding reads Make fscache_relinquish_cookie() log a warning and wait if there are any outstanding reads left on the cookie it was given. Signed-off-by: David Howells --- fs/fscache/cookie.c | 18 ++++++++++++++---- fs/fscache/operation.c | 10 ++++++++-- 2 files changed, 22 insertions(+), 6 deletions(-) (limited to 'fs/fscache') diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c index 0666996adf8..66be9eccede 100644 --- a/fs/fscache/cookie.c +++ b/fs/fscache/cookie.c @@ -442,22 +442,32 @@ void __fscache_relinquish_cookie(struct fscache_cookie *cookie, int retire) event = retire ? FSCACHE_OBJECT_EV_RETIRE : FSCACHE_OBJECT_EV_RELEASE; +try_again: spin_lock(&cookie->lock); /* break links with all the active objects */ while (!hlist_empty(&cookie->backing_objects)) { + int n_reads; object = hlist_entry(cookie->backing_objects.first, struct fscache_object, cookie_link); _debug("RELEASE OBJ%x", object->debug_id); - if (atomic_read(&object->n_reads)) { + set_bit(FSCACHE_COOKIE_WAITING_ON_READS, &cookie->flags); + n_reads = atomic_read(&object->n_reads); + if (n_reads) { + int n_ops = object->n_ops; + int n_in_progress = object->n_in_progress; spin_unlock(&cookie->lock); printk(KERN_ERR "FS-Cache:" - " Cookie '%s' still has %d outstanding reads\n", - cookie->def->name, atomic_read(&object->n_reads)); - BUG(); + " Cookie '%s' still has %d outstanding reads (%d,%d)\n", + cookie->def->name, + n_reads, n_ops, n_in_progress); + wait_on_bit(&cookie->flags, FSCACHE_COOKIE_WAITING_ON_READS, + fscache_wait_bit, TASK_UNINTERRUPTIBLE); + printk("Wait finished\n"); + goto try_again; } /* detach each cache object from the object cookie */ diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c index 30afdfa7aec..c857ab824d6 100644 --- a/fs/fscache/operation.c +++ b/fs/fscache/operation.c @@ -340,8 +340,14 @@ void fscache_put_operation(struct fscache_operation *op) object = op->object; - if (test_bit(FSCACHE_OP_DEC_READ_CNT, &op->flags)) - atomic_dec(&object->n_reads); + if (test_bit(FSCACHE_OP_DEC_READ_CNT, &op->flags)) { + if (atomic_dec_and_test(&object->n_reads)) { + clear_bit(FSCACHE_COOKIE_WAITING_ON_READS, + &object->cookie->flags); + wake_up_bit(&object->cookie->flags, + FSCACHE_COOKIE_WAITING_ON_READS); + } + } /* now... we may get called with the object spinlock held, so we * complete the cleanup here only if we can immediately acquire the -- cgit v1.2.3 From 9f10523f891928330b7529da54c1a3cc65180b1a Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 20 Dec 2012 21:52:35 +0000 Subject: FS-Cache: Fix operation state management and accounting Fix the state management of internal fscache operations and the accounting of what operations are in what states. This is done by: (1) Give struct fscache_operation a enum variable that directly represents the state it's currently in, rather than spreading this knowledge over a bunch of flags, who's processing the operation at the moment and whether it is queued or not. This makes it easier to write assertions to check the state at various points and to prevent invalid state transitions. (2) Add an 'operation complete' state and supply a function to indicate the completion of an operation (fscache_op_complete()) and make things call it. The final call to fscache_put_operation() can then check that an op in the appropriate state (complete or cancelled). (3) Adjust the use of object->n_ops, ->n_in_progress, ->n_exclusive to better govern the state of an object: (a) The ->n_ops is now the number of extant operations on the object and is now decremented by fscache_put_operation() only. (b) The ->n_in_progress is simply the number of objects that have been taken off of the object's pending queue for the purposes of being run. This is decremented by fscache_op_complete() only. (c) The ->n_exclusive is the number of exclusive ops that have been submitted and queued or are in progress. It is decremented by fscache_op_complete() and by fscache_cancel_op(). fscache_put_operation() and fscache_operation_gc() now no longer try to clean up ->n_exclusive and ->n_in_progress. That was leading to double decrements against fscache_cancel_op(). fscache_cancel_op() now no longer decrements ->n_ops. That was leading to double decrements against fscache_put_operation(). fscache_submit_exclusive_op() now decides whether it has to queue an op based on ->n_in_progress being > 0 rather than ->n_ops > 0 as the latter will persist in being true even after all preceding operations have been cancelled or completed. Furthermore, if an object is active and there are runnable ops against it, there must be at least one op running. (4) Add a remaining-pages counter (n_pages) to struct fscache_retrieval and provide a function to record completion of the pages as they complete. When n_pages reaches 0, the operation is deemed to be complete and fscache_op_complete() is called. Add calls to fscache_retrieval_complete() anywhere we've finished with a page we've been given to read or allocate for. This includes places where we just return pages to the netfs for reading from the server and where accessing the cache fails and we discard the proposed netfs page. The bugs in the unfixed state management manifest themselves as oopses like the following where the operation completion gets out of sync with return of the cookie by the netfs. This is possible because the cache unlocks and returns all the netfs pages before recording its completion - which means that there's nothing to stop the netfs discarding them and returning the cookie. FS-Cache: Cookie 'NFS.fh' still has outstanding reads ------------[ cut here ]------------ kernel BUG at fs/fscache/cookie.c:519! invalid opcode: 0000 [#1] SMP CPU 1 Modules linked in: cachefiles nfs fscache auth_rpcgss nfs_acl lockd sunrpc Pid: 400, comm: kswapd0 Not tainted 3.1.0-rc7-fsdevel+ #1090 /DG965RY RIP: 0010:[] [] __fscache_relinquish_cookie+0x170/0x343 [fscache] RSP: 0018:ffff8800368cfb00 EFLAGS: 00010282 RAX: 000000000000003c RBX: ffff880023cc8790 RCX: 0000000000000000 RDX: 0000000000002f2e RSI: 0000000000000001 RDI: ffffffff813ab86c RBP: ffff8800368cfb50 R08: 0000000000000002 R09: 0000000000000000 R10: ffff88003a1b7890 R11: ffff88001df6e488 R12: ffff880023d8ed98 R13: ffff880023cc8798 R14: 0000000000000004 R15: ffff88003b8bf370 FS: 0000000000000000(0000) GS:ffff88003bd00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 00000000008ba008 CR3: 0000000023d93000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process kswapd0 (pid: 400, threadinfo ffff8800368ce000, task ffff88003b8bf040) Stack: ffff88003b8bf040 ffff88001df6e528 ffff88001df6e528 ffffffffa00b46b0 ffff88003b8bf040 ffff88001df6e488 ffff88001df6e620 ffffffffa00b46b0 ffff88001ebd04c8 0000000000000004 ffff8800368cfb70 ffffffffa00b2c91 Call Trace: [] nfs_fscache_release_inode_cookie+0x3b/0x47 [nfs] [] nfs_clear_inode+0x3c/0x41 [nfs] [] nfs4_evict_inode+0x2f/0x33 [nfs] [] evict+0xa1/0x15c [] dispose_list+0x2c/0x38 [] prune_icache_sb+0x28c/0x29b [] prune_super+0xd5/0x140 [] shrink_slab+0x102/0x1ab [] balance_pgdat+0x2f2/0x595 [] ? process_timeout+0xb/0xb [] kswapd+0x270/0x289 [] ? __init_waitqueue_head+0x46/0x46 [] ? balance_pgdat+0x595/0x595 [] kthread+0x7f/0x87 [] kernel_thread_helper+0x4/0x10 [] ? finish_task_switch+0x45/0xc0 [] ? retint_restore_args+0xe/0xe [] ? __init_kthread_worker+0x53/0x53 [] ? gs_change+0xb/0xb Signed-off-by: David Howells --- fs/fscache/object.c | 2 -- fs/fscache/operation.c | 91 ++++++++++++++++++++++++++++++++------------------ fs/fscache/page.c | 25 +++++++++++--- 3 files changed, 80 insertions(+), 38 deletions(-) (limited to 'fs/fscache') diff --git a/fs/fscache/object.c b/fs/fscache/object.c index b6b897c550a..773bc798a41 100644 --- a/fs/fscache/object.c +++ b/fs/fscache/object.c @@ -587,8 +587,6 @@ static void fscache_object_available(struct fscache_object *object) if (object->n_in_progress == 0) { if (object->n_ops > 0) { ASSERTCMP(object->n_ops, >=, object->n_obj_ops); - ASSERTIF(object->n_ops > object->n_obj_ops, - !list_empty(&object->pending_ops)); fscache_start_operations(object); } else { ASSERT(list_empty(&object->pending_ops)); diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c index c857ab824d6..748f9553c2c 100644 --- a/fs/fscache/operation.c +++ b/fs/fscache/operation.c @@ -37,6 +37,7 @@ void fscache_enqueue_operation(struct fscache_operation *op) ASSERT(op->processor != NULL); ASSERTCMP(op->object->state, >=, FSCACHE_OBJECT_AVAILABLE); ASSERTCMP(atomic_read(&op->usage), >, 0); + ASSERTCMP(op->state, ==, FSCACHE_OP_ST_IN_PROGRESS); fscache_stat(&fscache_n_op_enqueue); switch (op->flags & FSCACHE_OP_TYPE) { @@ -64,6 +65,9 @@ EXPORT_SYMBOL(fscache_enqueue_operation); static void fscache_run_op(struct fscache_object *object, struct fscache_operation *op) { + ASSERTCMP(op->state, ==, FSCACHE_OP_ST_PENDING); + + op->state = FSCACHE_OP_ST_IN_PROGRESS; object->n_in_progress++; if (test_and_clear_bit(FSCACHE_OP_WAITING, &op->flags)) wake_up_bit(&op->flags, FSCACHE_OP_WAITING); @@ -80,22 +84,23 @@ static void fscache_run_op(struct fscache_object *object, int fscache_submit_exclusive_op(struct fscache_object *object, struct fscache_operation *op) { - int ret; - _enter("{OBJ%x OP%x},", object->debug_id, op->debug_id); + ASSERTCMP(op->state, ==, FSCACHE_OP_ST_INITIALISED); + ASSERTCMP(atomic_read(&op->usage), >, 0); + spin_lock(&object->lock); ASSERTCMP(object->n_ops, >=, object->n_in_progress); ASSERTCMP(object->n_ops, >=, object->n_exclusive); ASSERT(list_empty(&op->pend_link)); - ret = -ENOBUFS; + op->state = FSCACHE_OP_ST_PENDING; if (fscache_object_is_active(object)) { op->object = object; object->n_ops++; object->n_exclusive++; /* reads and writes must wait */ - if (object->n_ops > 1) { + if (object->n_in_progress > 0) { atomic_inc(&op->usage); list_add_tail(&op->pend_link, &object->pending_ops); fscache_stat(&fscache_n_op_pend); @@ -111,7 +116,6 @@ int fscache_submit_exclusive_op(struct fscache_object *object, /* need to issue a new write op after this */ clear_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags); - ret = 0; } else if (object->state == FSCACHE_OBJECT_CREATING) { op->object = object; object->n_ops++; @@ -119,14 +123,13 @@ int fscache_submit_exclusive_op(struct fscache_object *object, atomic_inc(&op->usage); list_add_tail(&op->pend_link, &object->pending_ops); fscache_stat(&fscache_n_op_pend); - ret = 0; } else { /* not allowed to submit ops in any other state */ BUG(); } spin_unlock(&object->lock); - return ret; + return 0; } /* @@ -186,6 +189,7 @@ int fscache_submit_op(struct fscache_object *object, _enter("{OBJ%x OP%x},{%u}", object->debug_id, op->debug_id, atomic_read(&op->usage)); + ASSERTCMP(op->state, ==, FSCACHE_OP_ST_INITIALISED); ASSERTCMP(atomic_read(&op->usage), >, 0); spin_lock(&object->lock); @@ -196,6 +200,7 @@ int fscache_submit_op(struct fscache_object *object, ostate = object->state; smp_rmb(); + op->state = FSCACHE_OP_ST_PENDING; if (fscache_object_is_active(object)) { op->object = object; object->n_ops++; @@ -225,12 +230,15 @@ int fscache_submit_op(struct fscache_object *object, object->state == FSCACHE_OBJECT_LC_DYING || object->state == FSCACHE_OBJECT_WITHDRAWING) { fscache_stat(&fscache_n_op_rejected); + op->state = FSCACHE_OP_ST_CANCELLED; ret = -ENOBUFS; } else if (!test_bit(FSCACHE_IOERROR, &object->cache->flags)) { fscache_report_unexpected_submission(object, op, ostate); ASSERT(!fscache_object_is_active(object)); + op->state = FSCACHE_OP_ST_CANCELLED; ret = -ENOBUFS; } else { + op->state = FSCACHE_OP_ST_CANCELLED; ret = -ENOBUFS; } @@ -290,13 +298,18 @@ int fscache_cancel_op(struct fscache_operation *op) _enter("OBJ%x OP%x}", op->object->debug_id, op->debug_id); + ASSERTCMP(op->state, >=, FSCACHE_OP_ST_PENDING); + ASSERTCMP(op->state, !=, FSCACHE_OP_ST_CANCELLED); + ASSERTCMP(atomic_read(&op->usage), >, 0); + spin_lock(&object->lock); ret = -EBUSY; - if (!list_empty(&op->pend_link)) { + if (op->state == FSCACHE_OP_ST_PENDING) { + ASSERT(!list_empty(&op->pend_link)); fscache_stat(&fscache_n_op_cancelled); list_del_init(&op->pend_link); - object->n_ops--; + op->state = FSCACHE_OP_ST_CANCELLED; if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags)) object->n_exclusive--; if (test_and_clear_bit(FSCACHE_OP_WAITING, &op->flags)) @@ -310,6 +323,37 @@ int fscache_cancel_op(struct fscache_operation *op) return ret; } +/* + * Record the completion of an in-progress operation. + */ +void fscache_op_complete(struct fscache_operation *op) +{ + struct fscache_object *object = op->object; + + _enter("OBJ%x", object->debug_id); + + ASSERTCMP(op->state, ==, FSCACHE_OP_ST_IN_PROGRESS); + ASSERTCMP(object->n_in_progress, >, 0); + ASSERTIFCMP(test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags), + object->n_exclusive, >, 0); + ASSERTIFCMP(test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags), + object->n_in_progress, ==, 1); + + spin_lock(&object->lock); + + op->state = FSCACHE_OP_ST_COMPLETE; + + if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags)) + object->n_exclusive--; + object->n_in_progress--; + if (object->n_in_progress == 0) + fscache_start_operations(object); + + spin_unlock(&object->lock); + _leave(""); +} +EXPORT_SYMBOL(fscache_op_complete); + /* * release an operation * - queues pending ops if this is the last in-progress op @@ -328,8 +372,9 @@ void fscache_put_operation(struct fscache_operation *op) return; _debug("PUT OP"); - if (test_and_set_bit(FSCACHE_OP_DEAD, &op->flags)) - BUG(); + ASSERTIFCMP(op->state != FSCACHE_OP_ST_COMPLETE, + op->state, ==, FSCACHE_OP_ST_CANCELLED); + op->state = FSCACHE_OP_ST_DEAD; fscache_stat(&fscache_n_op_release); @@ -365,16 +410,6 @@ void fscache_put_operation(struct fscache_operation *op) return; } - if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags)) { - ASSERTCMP(object->n_exclusive, >, 0); - object->n_exclusive--; - } - - ASSERTCMP(object->n_in_progress, >, 0); - object->n_in_progress--; - if (object->n_in_progress == 0) - fscache_start_operations(object); - ASSERTCMP(object->n_ops, >, 0); object->n_ops--; if (object->n_ops == 0) @@ -413,23 +448,14 @@ void fscache_operation_gc(struct work_struct *work) spin_unlock(&cache->op_gc_list_lock); object = op->object; + spin_lock(&object->lock); _debug("GC DEFERRED REL OBJ%x OP%x", object->debug_id, op->debug_id); fscache_stat(&fscache_n_op_gc); ASSERTCMP(atomic_read(&op->usage), ==, 0); - - spin_lock(&object->lock); - if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags)) { - ASSERTCMP(object->n_exclusive, >, 0); - object->n_exclusive--; - } - - ASSERTCMP(object->n_in_progress, >, 0); - object->n_in_progress--; - if (object->n_in_progress == 0) - fscache_start_operations(object); + ASSERTCMP(op->state, ==, FSCACHE_OP_ST_DEAD); ASSERTCMP(object->n_ops, >, 0); object->n_ops--; @@ -437,6 +463,7 @@ void fscache_operation_gc(struct work_struct *work) fscache_raise_event(object, FSCACHE_OBJECT_EV_CLEARED); spin_unlock(&object->lock); + kfree(op); } while (count++ < 20); diff --git a/fs/fscache/page.c b/fs/fscache/page.c index 248a12e2253..b38b13d2a55 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -162,6 +162,7 @@ static void fscache_attr_changed_op(struct fscache_operation *op) fscache_abort_object(object); } + fscache_op_complete(op); _leave(""); } @@ -223,6 +224,8 @@ static void fscache_release_retrieval_op(struct fscache_operation *_op) _enter("{OP%x}", op->op.debug_id); + ASSERTCMP(op->n_pages, ==, 0); + fscache_hist(fscache_retrieval_histogram, op->start_time); if (op->context) fscache_put_context(op->op.object->cookie, op->context); @@ -320,6 +323,11 @@ static int fscache_wait_for_retrieval_activation(struct fscache_object *object, _debug("<<< GO"); check_if_dead: + if (op->op.state == FSCACHE_OP_ST_CANCELLED) { + fscache_stat(stat_object_dead); + _leave(" = -ENOBUFS [cancelled]"); + return -ENOBUFS; + } if (unlikely(fscache_object_is_dead(object))) { fscache_stat(stat_object_dead); return -ENOBUFS; @@ -364,6 +372,7 @@ int __fscache_read_or_alloc_page(struct fscache_cookie *cookie, _leave(" = -ENOMEM"); return -ENOMEM; } + op->n_pages = 1; spin_lock(&cookie->lock); @@ -375,10 +384,10 @@ int __fscache_read_or_alloc_page(struct fscache_cookie *cookie, ASSERTCMP(object->state, >, FSCACHE_OBJECT_LOOKING_UP); atomic_inc(&object->n_reads); - set_bit(FSCACHE_OP_DEC_READ_CNT, &op->op.flags); + __set_bit(FSCACHE_OP_DEC_READ_CNT, &op->op.flags); if (fscache_submit_op(object, &op->op) < 0) - goto nobufs_unlock; + goto nobufs_unlock_dec; spin_unlock(&cookie->lock); fscache_stat(&fscache_n_retrieval_ops); @@ -425,6 +434,8 @@ error: _leave(" = %d", ret); return ret; +nobufs_unlock_dec: + atomic_dec(&object->n_reads); nobufs_unlock: spin_unlock(&cookie->lock); kfree(op); @@ -482,6 +493,7 @@ int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie, op = fscache_alloc_retrieval(mapping, end_io_func, context); if (!op) return -ENOMEM; + op->n_pages = *nr_pages; spin_lock(&cookie->lock); @@ -491,10 +503,10 @@ int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie, struct fscache_object, cookie_link); atomic_inc(&object->n_reads); - set_bit(FSCACHE_OP_DEC_READ_CNT, &op->op.flags); + __set_bit(FSCACHE_OP_DEC_READ_CNT, &op->op.flags); if (fscache_submit_op(object, &op->op) < 0) - goto nobufs_unlock; + goto nobufs_unlock_dec; spin_unlock(&cookie->lock); fscache_stat(&fscache_n_retrieval_ops); @@ -541,6 +553,8 @@ error: _leave(" = %d", ret); return ret; +nobufs_unlock_dec: + atomic_dec(&object->n_reads); nobufs_unlock: spin_unlock(&cookie->lock); kfree(op); @@ -583,6 +597,7 @@ int __fscache_alloc_page(struct fscache_cookie *cookie, op = fscache_alloc_retrieval(page->mapping, NULL, NULL); if (!op) return -ENOMEM; + op->n_pages = 1; spin_lock(&cookie->lock); @@ -696,6 +711,7 @@ static void fscache_write_op(struct fscache_operation *_op) fscache_end_page_write(object, page); if (ret < 0) { fscache_abort_object(object); + fscache_op_complete(&op->op); } else { fscache_enqueue_operation(&op->op); } @@ -710,6 +726,7 @@ superseded: spin_unlock(&cookie->stores_lock); clear_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags); spin_unlock(&object->lock); + fscache_op_complete(&op->op); _leave(""); } -- cgit v1.2.3 From ef778e7ae67cd426c30cad43378b908f5eb0bad5 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 20 Dec 2012 21:52:36 +0000 Subject: FS-Cache: Provide proper invalidation Provide a proper invalidation method rather than relying on the netfs retiring the cookie it has and getting a new one. The problem with this is that isn't easy for the netfs to make sure that it has completed/cancelled all its outstanding storage and retrieval operations on the cookie it is retiring. Instead, have the cache provide an invalidation method that will cancel or wait for all currently outstanding operations before invalidating the cache, and will cause new operations to queue up behind that. Whilst invalidation is in progress, some requests will be rejected until the cache can stack a barrier on the operation queue to cause new operations to be deferred behind it. Signed-off-by: David Howells --- fs/fscache/cookie.c | 60 +++++++++++++++++++++++++++++++++++++++++ fs/fscache/internal.h | 10 +++++++ fs/fscache/object.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++ fs/fscache/operation.c | 32 ++++++++++++++++++++++ fs/fscache/page.c | 51 +++++++++++++++++++++++++++++++++++ fs/fscache/stats.c | 11 +++++++- 6 files changed, 235 insertions(+), 1 deletion(-) (limited to 'fs/fscache') diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c index 66be9eccede..8dcb114758e 100644 --- a/fs/fscache/cookie.c +++ b/fs/fscache/cookie.c @@ -369,6 +369,66 @@ cant_attach_object: return ret; } +/* + * Invalidate an object. Callable with spinlocks held. + */ +void __fscache_invalidate(struct fscache_cookie *cookie) +{ + struct fscache_object *object; + + _enter("{%s}", cookie->def->name); + + fscache_stat(&fscache_n_invalidates); + + /* Only permit invalidation of data files. Invalidating an index will + * require the caller to release all its attachments to the tree rooted + * there, and if it's doing that, it may as well just retire the + * cookie. + */ + ASSERTCMP(cookie->def->type, ==, FSCACHE_COOKIE_TYPE_DATAFILE); + + /* We will be updating the cookie too. */ + BUG_ON(!cookie->def->get_aux); + + /* If there's an object, we tell the object state machine to handle the + * invalidation on our behalf, otherwise there's nothing to do. + */ + if (!hlist_empty(&cookie->backing_objects)) { + spin_lock(&cookie->lock); + + if (!hlist_empty(&cookie->backing_objects) && + !test_and_set_bit(FSCACHE_COOKIE_INVALIDATING, + &cookie->flags)) { + object = hlist_entry(cookie->backing_objects.first, + struct fscache_object, + cookie_link); + if (object->state < FSCACHE_OBJECT_DYING) + fscache_raise_event( + object, FSCACHE_OBJECT_EV_INVALIDATE); + } + + spin_unlock(&cookie->lock); + } + + _leave(""); +} +EXPORT_SYMBOL(__fscache_invalidate); + +/* + * Wait for object invalidation to complete. + */ +void __fscache_wait_on_invalidate(struct fscache_cookie *cookie) +{ + _enter("%p", cookie); + + wait_on_bit(&cookie->flags, FSCACHE_COOKIE_INVALIDATING, + fscache_wait_bit_interruptible, + TASK_UNINTERRUPTIBLE); + + _leave(""); +} +EXPORT_SYMBOL(__fscache_wait_on_invalidate); + /* * update the index entries backing a cookie */ diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index f6aad48d38a..c8117930393 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h @@ -122,10 +122,16 @@ extern int fscache_submit_exclusive_op(struct fscache_object *, extern int fscache_submit_op(struct fscache_object *, struct fscache_operation *); extern int fscache_cancel_op(struct fscache_operation *); +extern void fscache_cancel_all_ops(struct fscache_object *); extern void fscache_abort_object(struct fscache_object *); extern void fscache_start_operations(struct fscache_object *); extern void fscache_operation_gc(struct work_struct *); +/* + * page.c + */ +extern void fscache_invalidate_writes(struct fscache_cookie *); + /* * proc.c */ @@ -205,6 +211,9 @@ extern atomic_t fscache_n_acquires_ok; extern atomic_t fscache_n_acquires_nobufs; extern atomic_t fscache_n_acquires_oom; +extern atomic_t fscache_n_invalidates; +extern atomic_t fscache_n_invalidates_run; + extern atomic_t fscache_n_updates; extern atomic_t fscache_n_updates_null; extern atomic_t fscache_n_updates_run; @@ -237,6 +246,7 @@ extern atomic_t fscache_n_cop_alloc_object; extern atomic_t fscache_n_cop_lookup_object; extern atomic_t fscache_n_cop_lookup_complete; extern atomic_t fscache_n_cop_grab_object; +extern atomic_t fscache_n_cop_invalidate_object; extern atomic_t fscache_n_cop_update_object; extern atomic_t fscache_n_cop_drop_object; extern atomic_t fscache_n_cop_put_object; diff --git a/fs/fscache/object.c b/fs/fscache/object.c index 773bc798a41..80b549141ea 100644 --- a/fs/fscache/object.c +++ b/fs/fscache/object.c @@ -14,6 +14,7 @@ #define FSCACHE_DEBUG_LEVEL COOKIE #include +#include #include "internal.h" const char *fscache_object_states[FSCACHE_OBJECT__NSTATES] = { @@ -22,6 +23,7 @@ const char *fscache_object_states[FSCACHE_OBJECT__NSTATES] = { [FSCACHE_OBJECT_CREATING] = "OBJECT_CREATING", [FSCACHE_OBJECT_AVAILABLE] = "OBJECT_AVAILABLE", [FSCACHE_OBJECT_ACTIVE] = "OBJECT_ACTIVE", + [FSCACHE_OBJECT_INVALIDATING] = "OBJECT_INVALIDATING", [FSCACHE_OBJECT_UPDATING] = "OBJECT_UPDATING", [FSCACHE_OBJECT_DYING] = "OBJECT_DYING", [FSCACHE_OBJECT_LC_DYING] = "OBJECT_LC_DYING", @@ -39,6 +41,7 @@ const char fscache_object_states_short[FSCACHE_OBJECT__NSTATES][5] = { [FSCACHE_OBJECT_CREATING] = "CRTN", [FSCACHE_OBJECT_AVAILABLE] = "AVBL", [FSCACHE_OBJECT_ACTIVE] = "ACTV", + [FSCACHE_OBJECT_INVALIDATING] = "INVL", [FSCACHE_OBJECT_UPDATING] = "UPDT", [FSCACHE_OBJECT_DYING] = "DYNG", [FSCACHE_OBJECT_LC_DYING] = "LCDY", @@ -54,6 +57,7 @@ static void fscache_put_object(struct fscache_object *); static void fscache_initialise_object(struct fscache_object *); static void fscache_lookup_object(struct fscache_object *); static void fscache_object_available(struct fscache_object *); +static void fscache_invalidate_object(struct fscache_object *); static void fscache_release_object(struct fscache_object *); static void fscache_withdraw_object(struct fscache_object *); static void fscache_enqueue_dependents(struct fscache_object *); @@ -78,6 +82,15 @@ static inline void fscache_done_parent_op(struct fscache_object *object) spin_unlock(&parent->lock); } +/* + * Notify netfs of invalidation completion. + */ +static inline void fscache_invalidation_complete(struct fscache_cookie *cookie) +{ + if (test_and_clear_bit(FSCACHE_COOKIE_INVALIDATING, &cookie->flags)) + wake_up_bit(&cookie->flags, FSCACHE_COOKIE_INVALIDATING); +} + /* * process events that have been sent to an object's state machine * - initiates parent lookup @@ -125,6 +138,16 @@ static void fscache_object_state_machine(struct fscache_object *object) case FSCACHE_OBJECT_ACTIVE: goto active_transit; + /* Invalidate an object on disk */ + case FSCACHE_OBJECT_INVALIDATING: + clear_bit(FSCACHE_OBJECT_EV_INVALIDATE, &object->events); + fscache_stat(&fscache_n_invalidates_run); + fscache_stat(&fscache_n_cop_invalidate_object); + fscache_invalidate_object(object); + fscache_stat_d(&fscache_n_cop_invalidate_object); + fscache_raise_event(object, FSCACHE_OBJECT_EV_UPDATE); + goto active_transit; + /* update the object metadata on disk */ case FSCACHE_OBJECT_UPDATING: clear_bit(FSCACHE_OBJECT_EV_UPDATE, &object->events); @@ -275,6 +298,9 @@ active_transit: case FSCACHE_OBJECT_EV_ERROR: new_state = FSCACHE_OBJECT_DYING; goto change_state; + case FSCACHE_OBJECT_EV_INVALIDATE: + new_state = FSCACHE_OBJECT_INVALIDATING; + goto change_state; case FSCACHE_OBJECT_EV_UPDATE: new_state = FSCACHE_OBJECT_UPDATING; goto change_state; @@ -679,6 +705,7 @@ static void fscache_withdraw_object(struct fscache_object *object) if (object->cookie == cookie) { hlist_del_init(&object->cookie_link); object->cookie = NULL; + fscache_invalidation_complete(cookie); detached = true; } spin_unlock(&cookie->lock); @@ -888,3 +915,48 @@ enum fscache_checkaux fscache_check_aux(struct fscache_object *object, return result; } EXPORT_SYMBOL(fscache_check_aux); + +/* + * Asynchronously invalidate an object. + */ +static void fscache_invalidate_object(struct fscache_object *object) +{ + struct fscache_operation *op; + struct fscache_cookie *cookie = object->cookie; + + _enter("{OBJ%x}", object->debug_id); + + /* Reject any new read/write ops and abort any that are pending. */ + fscache_invalidate_writes(cookie); + clear_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags); + fscache_cancel_all_ops(object); + + /* Now we have to wait for in-progress reads and writes */ + op = kzalloc(sizeof(*op), GFP_KERNEL); + if (!op) { + fscache_raise_event(object, FSCACHE_OBJECT_EV_ERROR); + _leave(" [ENOMEM]"); + return; + } + + fscache_operation_init(op, object->cache->ops->invalidate_object, NULL); + op->flags = FSCACHE_OP_ASYNC | (1 << FSCACHE_OP_EXCLUSIVE); + + spin_lock(&cookie->lock); + if (fscache_submit_exclusive_op(object, op) < 0) + BUG(); + spin_unlock(&cookie->lock); + fscache_put_operation(op); + + /* Once we've completed the invalidation, we know there will be no data + * stored in the cache and thus we can reinstate the data-check-skip + * optimisation. + */ + set_bit(FSCACHE_COOKIE_NO_DATA_YET, &cookie->flags); + + /* We can allow read and write requests to come in once again. They'll + * queue up behind our exclusive invalidation operation. + */ + fscache_invalidation_complete(cookie); + _leave(""); +} diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c index 748f9553c2c..c58dbe61326 100644 --- a/fs/fscache/operation.c +++ b/fs/fscache/operation.c @@ -323,6 +323,38 @@ int fscache_cancel_op(struct fscache_operation *op) return ret; } +/* + * Cancel all pending operations on an object + */ +void fscache_cancel_all_ops(struct fscache_object *object) +{ + struct fscache_operation *op; + + _enter("OBJ%x", object->debug_id); + + spin_lock(&object->lock); + + while (!list_empty(&object->pending_ops)) { + op = list_entry(object->pending_ops.next, + struct fscache_operation, pend_link); + fscache_stat(&fscache_n_op_cancelled); + list_del_init(&op->pend_link); + + ASSERTCMP(op->state, ==, FSCACHE_OP_ST_PENDING); + op->state = FSCACHE_OP_ST_CANCELLED; + + if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags)) + object->n_exclusive--; + if (test_and_clear_bit(FSCACHE_OP_WAITING, &op->flags)) + wake_up_bit(&op->flags, FSCACHE_OP_WAITING); + fscache_put_operation(op); + cond_resched_lock(&object->lock); + } + + spin_unlock(&object->lock); + _leave(""); +} + /* * Record the completion of an in-progress operation. */ diff --git a/fs/fscache/page.c b/fs/fscache/page.c index b38b13d2a55..7bf9d255705 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -361,6 +361,11 @@ int __fscache_read_or_alloc_page(struct fscache_cookie *cookie, if (hlist_empty(&cookie->backing_objects)) goto nobufs; + if (test_bit(FSCACHE_COOKIE_INVALIDATING, &cookie->flags)) { + _leave(" = -ENOBUFS [invalidating]"); + return -ENOBUFS; + } + ASSERTCMP(cookie->def->type, !=, FSCACHE_COOKIE_TYPE_INDEX); ASSERTCMP(page, !=, NULL); @@ -483,6 +488,11 @@ int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie, if (hlist_empty(&cookie->backing_objects)) goto nobufs; + if (test_bit(FSCACHE_COOKIE_INVALIDATING, &cookie->flags)) { + _leave(" = -ENOBUFS [invalidating]"); + return -ENOBUFS; + } + ASSERTCMP(cookie->def->type, !=, FSCACHE_COOKIE_TYPE_INDEX); ASSERTCMP(*nr_pages, >, 0); ASSERT(!list_empty(pages)); @@ -591,6 +601,11 @@ int __fscache_alloc_page(struct fscache_cookie *cookie, ASSERTCMP(cookie->def->type, !=, FSCACHE_COOKIE_TYPE_INDEX); ASSERTCMP(page, !=, NULL); + if (test_bit(FSCACHE_COOKIE_INVALIDATING, &cookie->flags)) { + _leave(" = -ENOBUFS [invalidating]"); + return -ENOBUFS; + } + if (fscache_wait_for_deferred_lookup(cookie) < 0) return -ERESTARTSYS; @@ -730,6 +745,37 @@ superseded: _leave(""); } +/* + * Clear the pages pending writing for invalidation + */ +void fscache_invalidate_writes(struct fscache_cookie *cookie) +{ + struct page *page; + void *results[16]; + int n, i; + + _enter(""); + + while (spin_lock(&cookie->stores_lock), + n = radix_tree_gang_lookup_tag(&cookie->stores, results, 0, + ARRAY_SIZE(results), + FSCACHE_COOKIE_PENDING_TAG), + n > 0) { + for (i = n - 1; i >= 0; i--) { + page = results[i]; + radix_tree_delete(&cookie->stores, page->index); + } + + spin_unlock(&cookie->stores_lock); + + for (i = n - 1; i >= 0; i--) + page_cache_release(results[i]); + } + + spin_unlock(&cookie->stores_lock); + _leave(""); +} + /* * request a page be stored in the cache * - returns: @@ -776,6 +822,11 @@ int __fscache_write_page(struct fscache_cookie *cookie, fscache_stat(&fscache_n_stores); + if (test_bit(FSCACHE_COOKIE_INVALIDATING, &cookie->flags)) { + _leave(" = -ENOBUFS [invalidating]"); + return -ENOBUFS; + } + op = kzalloc(sizeof(*op), GFP_NOIO | __GFP_NOMEMALLOC | __GFP_NORETRY); if (!op) goto nomem; diff --git a/fs/fscache/stats.c b/fs/fscache/stats.c index 4765190d537..51cdaee1410 100644 --- a/fs/fscache/stats.c +++ b/fs/fscache/stats.c @@ -80,6 +80,9 @@ atomic_t fscache_n_acquires_ok; atomic_t fscache_n_acquires_nobufs; atomic_t fscache_n_acquires_oom; +atomic_t fscache_n_invalidates; +atomic_t fscache_n_invalidates_run; + atomic_t fscache_n_updates; atomic_t fscache_n_updates_null; atomic_t fscache_n_updates_run; @@ -112,6 +115,7 @@ atomic_t fscache_n_cop_alloc_object; atomic_t fscache_n_cop_lookup_object; atomic_t fscache_n_cop_lookup_complete; atomic_t fscache_n_cop_grab_object; +atomic_t fscache_n_cop_invalidate_object; atomic_t fscache_n_cop_update_object; atomic_t fscache_n_cop_drop_object; atomic_t fscache_n_cop_put_object; @@ -168,6 +172,10 @@ static int fscache_stats_show(struct seq_file *m, void *v) atomic_read(&fscache_n_object_created), atomic_read(&fscache_n_object_lookups_timed_out)); + seq_printf(m, "Invals : n=%u run=%u\n", + atomic_read(&fscache_n_invalidates), + atomic_read(&fscache_n_invalidates_run)); + seq_printf(m, "Updates: n=%u nul=%u run=%u\n", atomic_read(&fscache_n_updates), atomic_read(&fscache_n_updates_null), @@ -246,7 +254,8 @@ static int fscache_stats_show(struct seq_file *m, void *v) atomic_read(&fscache_n_cop_lookup_object), atomic_read(&fscache_n_cop_lookup_complete), atomic_read(&fscache_n_cop_grab_object)); - seq_printf(m, "CacheOp: upo=%d dro=%d pto=%d atc=%d syn=%d\n", + seq_printf(m, "CacheOp: inv=%d upo=%d dro=%d pto=%d atc=%d syn=%d\n", + atomic_read(&fscache_n_cop_invalidate_object), atomic_read(&fscache_n_cop_update_object), atomic_read(&fscache_n_cop_drop_object), atomic_read(&fscache_n_cop_put_object), -- cgit v1.2.3 From b4cf1e08c8ac95eff65faa53904f7f13ac78194b Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 5 Dec 2012 13:34:45 +0000 Subject: CacheFiles: Add missing retrieval completions CacheFiles is missing some calls to fscache_retrieval_complete() in the error handling/collision paths of its reader functions. This can be seen by the following assertion tripping in fscache_put_operation() whereby the operation being destroyed is still in the in-progress state and has not been cancelled or completed: FS-Cache: Assertion failed 3 == 5 is false ------------[ cut here ]------------ kernel BUG at fs/fscache/operation.c:408! invalid opcode: 0000 [#1] SMP CPU 2 Modules linked in: xfs ioatdma dca loop joydev evdev psmouse dcdbas pcspkr serio_raw i5000_edac edac_core i5k_amb shpchp pci_hotplug sg sr_mod] Pid: 8062, comm: httpd Not tainted 3.1.0-rc8 #1 Dell Inc. PowerEdge 1950/0DT097 RIP: 0010:[] [] fscache_put_operation+0x304/0x330 RSP: 0018:ffff880062f739d8 EFLAGS: 00010296 RAX: 0000000000000025 RBX: ffff8800c5122e84 RCX: ffffffff81ddf040 RDX: 00000000ffffffff RSI: 0000000000000082 RDI: ffffffff81ddef30 RBP: ffff880062f739f8 R08: 0000000000000005 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000003 R12: ffff8800c5122e40 R13: ffff880037a2cd20 R14: ffff880087c7a058 R15: ffff880087c7a000 FS: 00007f63dcf636e0(0000) GS:ffff88022fc80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f0c0a91f000 CR3: 0000000062ec2000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process httpd (pid: 8062, threadinfo ffff880062f72000, task ffff880087e58000) Stack: ffff880062f73bf8 0000000000000000 ffff880062f73bf8 ffff880037a2cd20 ffff880062f73a68 ffffffff8119aa7e ffff88006540e000 ffff880062f73ad4 ffff88008e9a4308 ffff880037a2cd20 ffff880062f73a48 ffff8800c5122e40 Call Trace: [] __fscache_read_or_alloc_pages+0x1fe/0x530 [] __nfs_readpages_from_fscache+0x70/0x1c0 [] nfs_readpages+0xca/0x1e0 [] ? rpc_do_put_task+0x36/0x50 [] ? alloc_nfs_open_context+0x4b/0x110 [] ? rpc_call_sync+0x5a/0x70 [] __do_page_cache_readahead+0x1ca/0x270 [] ra_submit+0x21/0x30 [] ondemand_readahead+0x11d/0x250 [] page_cache_sync_readahead+0x36/0x60 [] generic_file_aio_read+0x454/0x770 [] nfs_file_read+0xe1/0x130 [] do_sync_read+0xd9/0x120 [] ? mntput+0x1f/0x40 [] ? fput+0x1cb/0x260 [] vfs_read+0xc8/0x180 [] sys_read+0x55/0x90 Reported-by: Mark Moseley Signed-off-by: David Howells --- fs/fscache/page.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/fscache') diff --git a/fs/fscache/page.c b/fs/fscache/page.c index 7bf9d255705..4dbbca16262 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -329,6 +329,8 @@ check_if_dead: return -ENOBUFS; } if (unlikely(fscache_object_is_dead(object))) { + pr_err("%s() = -ENOBUFS [obj dead %d]", __func__, op->op.state); + fscache_cancel_op(&op->op); fscache_stat(stat_object_dead); return -ENOBUFS; } -- cgit v1.2.3 From 03acc4be5e479eebc95338cd1d72a9954c128e2b Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 5 Dec 2012 13:34:46 +0000 Subject: FS-Cache: Initialise the object event mask with the calculated mask Initialise the object event mask with the calculated mask rather than unmasking undefined events also. Signed-off-by: David Howells --- fs/fscache/object.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/fscache') diff --git a/fs/fscache/object.c b/fs/fscache/object.c index 80b549141ea..2ef8a082a27 100644 --- a/fs/fscache/object.c +++ b/fs/fscache/object.c @@ -114,7 +114,8 @@ static void fscache_object_state_machine(struct fscache_object *object) /* wait for the parent object to become ready */ case FSCACHE_OBJECT_INIT: object->event_mask = - ULONG_MAX & ~(1 << FSCACHE_OBJECT_EV_CLEARED); + FSCACHE_OBJECT_EVENTS_MASK & + ~(1 << FSCACHE_OBJECT_EV_CLEARED); fscache_initialise_object(object); goto done; -- cgit v1.2.3 From c2d35bfe4b508451b75b5b6bc60a08dbdc44f952 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 5 Dec 2012 13:34:47 +0000 Subject: FS-Cache: Don't mask off the object event mask when printing it Don't mask off the object event mask when printing it. That way it can be seen if threre are bits set that shouldn't be. Signed-off-by: David Howells --- fs/fscache/object-list.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/fscache') diff --git a/fs/fscache/object-list.c b/fs/fscache/object-list.c index ebe29c58138..f27c89d1788 100644 --- a/fs/fscache/object-list.c +++ b/fs/fscache/object-list.c @@ -245,7 +245,7 @@ static int fscache_objlist_show(struct seq_file *m, void *v) obj->n_in_progress, obj->n_exclusive, atomic_read(&obj->n_reads), - obj->event_mask & FSCACHE_OBJECT_EVENTS_MASK, + obj->event_mask, obj->events, obj->flags, work_busy(&obj->work)); -- cgit v1.2.3 From 75bc411388f4aeb9fb0381bd56eb5d67193ed9a1 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 5 Dec 2012 13:34:48 +0000 Subject: FS-Cache: Limit the number of I/O error reports for a cache Limit the number of I/O error reports for a cache to 1 to prevent massive amounts of noise. After the first I/O error the cache is taken off line automatically, so must be restarted to resume caching. Signed-off-by: David Howells --- fs/fscache/cache.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs/fscache') diff --git a/fs/fscache/cache.c b/fs/fscache/cache.c index 6a3c48abd67..b52aed1dca9 100644 --- a/fs/fscache/cache.c +++ b/fs/fscache/cache.c @@ -314,10 +314,10 @@ EXPORT_SYMBOL(fscache_add_cache); */ void fscache_io_error(struct fscache_cache *cache) { - set_bit(FSCACHE_IOERROR, &cache->flags); - - printk(KERN_ERR "FS-Cache: Cache %s stopped due to I/O error\n", - cache->ops->name); + if (!test_and_set_bit(FSCACHE_IOERROR, &cache->flags)) + printk(KERN_ERR "FS-Cache:" + " Cache '%s' stopped due to I/O error\n", + cache->ops->name); } EXPORT_SYMBOL(fscache_io_error); -- cgit v1.2.3 From 8d76349d359064859217dc292dc8733e209705af Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 5 Dec 2012 13:34:48 +0000 Subject: FS-Cache: Exclusive op submission can BUG if there's been an I/O error The function to submit an exclusive op (fscache_submit_exclusive_op()) can BUG if there's been an I/O error because it may see the parent cache object in an unexpected state. It should only BUG if there hasn't been an I/O error. In this case the problem was produced by remounting the cache partition to be R/O. The EROFS state was detected and the cache was aborted, but not everything handled the aborting correctly. SysRq : Emergency Remount R/O EXT4-fs (sda6): re-mounted. Opts: (null) Emergency Remount complete CacheFiles: I/O Error: Failed to update xattr with error -30 FS-Cache: Cache cachefiles stopped due to I/O error ------------[ cut here ]------------ kernel BUG at fs/fscache/operation.c:128! invalid opcode: 0000 [#1] SMP CPU 0 Modules linked in: cachefiles nfs fscache auth_rpcgss nfs_acl lockd sunrpc Pid: 6612, comm: kworker/u:2 Not tainted 3.1.0-rc8-fsdevel+ #1093 /DG965RY RIP: 0010:[] [] fscache_submit_exclusive_op+0x2ad/0x2c2 [fscache] RSP: 0018:ffff880000853d40 EFLAGS: 00010206 RAX: ffff880038ac72a8 RBX: ffff8800181f2260 RCX: ffffffff81f2b2b0 RDX: 0000000000000001 RSI: ffffffff8179a478 RDI: ffff8800181f2280 RBP: ffff880000853d60 R08: 0000000000000002 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000001 R12: ffff880038ac7268 R13: ffff8800181f2280 R14: ffff88003a359190 R15: 000000010122b162 FS: 0000000000000000(0000) GS:ffff88003bc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 00000034cc4a77f0 CR3: 0000000010e96000 CR4: 00000000000006f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process kworker/u:2 (pid: 6612, threadinfo ffff880000852000, task ffff880014c3c040) Stack: ffff8800181f2260 ffff8800181f2310 ffff880038ac7268 ffff8800181f2260 ffff880000853dc0 ffffffffa0072375 ffff880037ecfe00 ffff88003a359198 ffff880000853dc0 0000000000000246 0000000000000000 ffff88000a91d308 Call Trace: [] fscache_object_work_func+0x792/0xe65 [fscache] [] process_one_work+0x1eb/0x37f [] ? process_one_work+0x18d/0x37f [] ? fscache_enqueue_dependents+0xd8/0xd8 [fscache] [] worker_thread+0x15a/0x21a [] ? rescuer_thread+0x188/0x188 [] kthread+0x7f/0x87 [] kernel_thread_helper+0x4/0x10 [] ? finish_task_switch+0x45/0xc0 [] ? retint_restore_args+0xe/0xe [] ? __init_kthread_worker+0x53/0x53 [] ? gs_change+0xb/0xb Signed-off-by: David Howells --- fs/fscache/internal.h | 1 + fs/fscache/object.c | 23 +++++++++++++++++------ fs/fscache/operation.c | 13 ++++++++++--- 3 files changed, 28 insertions(+), 9 deletions(-) (limited to 'fs/fscache') diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index c8117930393..dcb3e1d5dbf 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h @@ -288,6 +288,7 @@ extern const struct file_operations fscache_stats_fops; static inline void fscache_raise_event(struct fscache_object *object, unsigned event) { + BUG_ON(event >= NR_FSCACHE_OBJECT_EVENTS); if (!test_and_set_bit(event, &object->events) && test_bit(event, &object->event_mask)) fscache_enqueue_object(object); diff --git a/fs/fscache/object.c b/fs/fscache/object.c index 2ef8a082a27..2c512cbac38 100644 --- a/fs/fscache/object.c +++ b/fs/fscache/object.c @@ -103,6 +103,7 @@ static void fscache_object_state_machine(struct fscache_object *object) { enum fscache_object_state new_state; struct fscache_cookie *cookie; + int event; ASSERT(object != NULL); @@ -275,7 +276,8 @@ static void fscache_object_state_machine(struct fscache_object *object) /* determine the transition from a lookup state */ lookup_transit: - switch (fls(object->events & object->event_mask) - 1) { + event = fls(object->events & object->event_mask) - 1; + switch (event) { case FSCACHE_OBJECT_EV_WITHDRAW: case FSCACHE_OBJECT_EV_RETIRE: case FSCACHE_OBJECT_EV_RELEASE: @@ -292,7 +294,8 @@ lookup_transit: /* determine the transition from an active state */ active_transit: - switch (fls(object->events & object->event_mask) - 1) { + event = fls(object->events & object->event_mask) - 1; + switch (event) { case FSCACHE_OBJECT_EV_WITHDRAW: case FSCACHE_OBJECT_EV_RETIRE: case FSCACHE_OBJECT_EV_RELEASE: @@ -314,7 +317,8 @@ active_transit: /* determine the transition from a terminal state */ terminal_transit: - switch (fls(object->events & object->event_mask) - 1) { + event = fls(object->events & object->event_mask) - 1; + switch (event) { case FSCACHE_OBJECT_EV_WITHDRAW: new_state = FSCACHE_OBJECT_WITHDRAWING; goto change_state; @@ -347,8 +351,8 @@ done: unsupported_event: printk(KERN_ERR "FS-Cache:" - " Unsupported event %lx [mask %lx] in state %s\n", - object->events, object->event_mask, + " Unsupported event %d [%lx/%lx] in state %s\n", + event, object->events, object->event_mask, fscache_object_states[object->state]); BUG(); } @@ -945,7 +949,7 @@ static void fscache_invalidate_object(struct fscache_object *object) spin_lock(&cookie->lock); if (fscache_submit_exclusive_op(object, op) < 0) - BUG(); + goto submit_op_failed; spin_unlock(&cookie->lock); fscache_put_operation(op); @@ -960,4 +964,11 @@ static void fscache_invalidate_object(struct fscache_object *object) */ fscache_invalidation_complete(cookie); _leave(""); + return; + +submit_op_failed: + spin_unlock(&cookie->lock); + kfree(op); + fscache_raise_event(object, FSCACHE_OBJECT_EV_ERROR); + _leave(" [EIO]"); } diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c index c58dbe61326..9e6b7d232bb 100644 --- a/fs/fscache/operation.c +++ b/fs/fscache/operation.c @@ -84,6 +84,8 @@ static void fscache_run_op(struct fscache_object *object, int fscache_submit_exclusive_op(struct fscache_object *object, struct fscache_operation *op) { + int ret; + _enter("{OBJ%x OP%x},", object->debug_id, op->debug_id); ASSERTCMP(op->state, ==, FSCACHE_OP_ST_INITIALISED); @@ -116,6 +118,7 @@ int fscache_submit_exclusive_op(struct fscache_object *object, /* need to issue a new write op after this */ clear_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags); + ret = 0; } else if (object->state == FSCACHE_OBJECT_CREATING) { op->object = object; object->n_ops++; @@ -123,13 +126,17 @@ int fscache_submit_exclusive_op(struct fscache_object *object, atomic_inc(&op->usage); list_add_tail(&op->pend_link, &object->pending_ops); fscache_stat(&fscache_n_op_pend); + ret = 0; } else { - /* not allowed to submit ops in any other state */ - BUG(); + /* If we're in any other state, there must have been an I/O + * error of some nature. + */ + ASSERT(test_bit(FSCACHE_IOERROR, &object->cache->flags)); + ret = -EIO; } spin_unlock(&object->lock); - return 0; + return ret; } /* -- cgit v1.2.3 From 8c209ce721444a61b61d9e772746c721e4d8d1e8 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 5 Dec 2012 13:34:49 +0000 Subject: NFS: nfs_migrate_page() does not wait for FS-Cache to finish with a page nfs_migrate_page() does not wait for FS-Cache to finish with a page, probably leading to the following bad-page-state: BUG: Bad page state in process python-bin pfn:17d39b page:ffffea00053649e8 flags:004000000000100c count:0 mapcount:0 mapping:(null) index:38686 (Tainted: G B ---------------- ) Pid: 31053, comm: python-bin Tainted: G B ---------------- 2.6.32-71.24.1.el6.x86_64 #1 Call Trace: [] bad_page+0x107/0x160 [] free_hot_cold_page+0x1c9/0x220 [] __pagevec_free+0x59/0xb0 [] ? flush_tlb_others_ipi+0x128/0x130 [] release_pages+0x21c/0x250 [] ? remove_migration_pte+0x28a/0x2b0 [] ? mem_cgroup_get_reclaim_stat_from_page+0x18/0x70 [] ____pagevec_lru_add+0x167/0x180 [] __lru_cache_add+0x58/0x70 [] lru_cache_add_lru+0x21/0x40 [] putback_lru_page+0x69/0x100 [] migrate_pages+0x13d/0x5d0 [] ? ____pagevec_lru_add+0x167/0x180 [] ? compaction_alloc+0x0/0x370 [] compact_zone+0x4cc/0x600 [] ? get_page_from_freelist+0x15c/0x820 [] ? check_preempt_wakeup+0x1c4/0x3c0 [] compact_zone_order+0x7e/0xb0 [] try_to_compact_pages+0x109/0x170 [] __alloc_pages_nodemask+0x5ed/0x850 [] ? thread_return+0x4e/0x778 [] alloc_pages_vma+0x93/0x150 [] do_huge_pmd_anonymous_page+0x135/0x340 [] ? rwsem_down_read_failed+0x26/0x30 [] handle_mm_fault+0x245/0x2b0 [] do_page_fault+0x123/0x3a0 [] page_fault+0x25/0x30 nfs_migrate_page() calls nfs_fscache_release_page() which doesn't actually wait - even if __GFP_WAIT is set. The reason that doesn't wait is that fscache_maybe_release_page() might deadlock the allocator as the work threads writing to the cache may all end up sleeping on memory allocation. However, I wonder if that is actually a problem. There are a number of things I can do to deal with this: (1) Make nfs_migrate_page() wait. (2) Make fscache_maybe_release_page() honour the __GFP_WAIT flag. (3) Set a timeout around the wait. (4) Make nfs_migrate_page() return an error if the page is still busy. For the moment, I'll select (2) and (4). Signed-off-by: David Howells Acked-by: Jeff Layton --- fs/fscache/internal.h | 1 + fs/fscache/page.c | 19 ++++++++++++++----- fs/fscache/stats.c | 6 ++++-- 3 files changed, 19 insertions(+), 7 deletions(-) (limited to 'fs/fscache') diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index dcb3e1d5dbf..88a48ccb7d9 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h @@ -200,6 +200,7 @@ extern atomic_t fscache_n_store_vmscan_not_storing; extern atomic_t fscache_n_store_vmscan_gone; extern atomic_t fscache_n_store_vmscan_busy; extern atomic_t fscache_n_store_vmscan_cancelled; +extern atomic_t fscache_n_store_vmscan_wait; extern atomic_t fscache_n_marks; extern atomic_t fscache_n_uncaches; diff --git a/fs/fscache/page.c b/fs/fscache/page.c index 4dbbca16262..f9b2fb3ae49 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -56,6 +56,7 @@ bool __fscache_maybe_release_page(struct fscache_cookie *cookie, _enter("%p,%p,%x", cookie, page, gfp); +try_again: rcu_read_lock(); val = radix_tree_lookup(&cookie->stores, page->index); if (!val) { @@ -104,11 +105,19 @@ bool __fscache_maybe_release_page(struct fscache_cookie *cookie, return true; page_busy: - /* we might want to wait here, but that could deadlock the allocator as - * the work threads writing to the cache may all end up sleeping - * on memory allocation */ - fscache_stat(&fscache_n_store_vmscan_busy); - return false; + /* We will wait here if we're allowed to, but that could deadlock the + * allocator as the work threads writing to the cache may all end up + * sleeping on memory allocation, so we may need to impose a timeout + * too. */ + if (!(gfp & __GFP_WAIT)) { + fscache_stat(&fscache_n_store_vmscan_busy); + return false; + } + + fscache_stat(&fscache_n_store_vmscan_wait); + __fscache_wait_on_page_write(cookie, page); + gfp &= ~__GFP_WAIT; + goto try_again; } EXPORT_SYMBOL(__fscache_maybe_release_page); diff --git a/fs/fscache/stats.c b/fs/fscache/stats.c index 51cdaee1410..8179e8bc4a3 100644 --- a/fs/fscache/stats.c +++ b/fs/fscache/stats.c @@ -69,6 +69,7 @@ atomic_t fscache_n_store_vmscan_not_storing; atomic_t fscache_n_store_vmscan_gone; atomic_t fscache_n_store_vmscan_busy; atomic_t fscache_n_store_vmscan_cancelled; +atomic_t fscache_n_store_vmscan_wait; atomic_t fscache_n_marks; atomic_t fscache_n_uncaches; @@ -232,11 +233,12 @@ static int fscache_stats_show(struct seq_file *m, void *v) atomic_read(&fscache_n_store_radix_deletes), atomic_read(&fscache_n_store_pages_over_limit)); - seq_printf(m, "VmScan : nos=%u gon=%u bsy=%u can=%u\n", + seq_printf(m, "VmScan : nos=%u gon=%u bsy=%u can=%u wt=%u\n", atomic_read(&fscache_n_store_vmscan_not_storing), atomic_read(&fscache_n_store_vmscan_gone), atomic_read(&fscache_n_store_vmscan_busy), - atomic_read(&fscache_n_store_vmscan_cancelled)); + atomic_read(&fscache_n_store_vmscan_cancelled), + atomic_read(&fscache_n_store_vmscan_wait)); seq_printf(m, "Ops : pend=%u run=%u enq=%u can=%u rej=%u\n", atomic_read(&fscache_n_op_pend), -- cgit v1.2.3 From 969695215f9a865cbf64c4ce3742ac9fc57fffed Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 5 Dec 2012 13:34:49 +0000 Subject: FS-Cache: Add transition to handle invalidate immediately after lookup Add a missing transition to the FS-Cache object state machine to handle an invalidation event occuring between the back end completing the object lookup by calling fscache_obtained_object() (which moves to state OBJECT_AVAILABLE) and the backend returning to fscache_lookup_object() and thence to fscache_object_state_machine() which then does a goto lookup_transit to handle the transition - but lookup_transit doesn't handle EV_INVALIDATE. Without this, the following BUG can be logged: FS-Cache: Unsupported event 2 [5/f7] in state OBJECT_AVAILABLE ------------[ cut here ]------------ kernel BUG at fs/fscache/object.c:357! Where event 2 is EV_INVALIDATE. Signed-off-by: David Howells --- fs/fscache/object.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/fscache') diff --git a/fs/fscache/object.c b/fs/fscache/object.c index 2c512cbac38..50d41c18021 100644 --- a/fs/fscache/object.c +++ b/fs/fscache/object.c @@ -284,6 +284,9 @@ lookup_transit: case FSCACHE_OBJECT_EV_ERROR: new_state = FSCACHE_OBJECT_LC_DYING; goto change_state; + case FSCACHE_OBJECT_EV_INVALIDATE: + new_state = FSCACHE_OBJECT_INVALIDATING; + goto change_state; case FSCACHE_OBJECT_EV_REQUEUE: goto done; case -1: -- cgit v1.2.3 From 9c04caa81b876faee5f1cc6eaad76dd7021ab8ff Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 7 Dec 2012 18:08:02 +0000 Subject: FS-Cache: Fix signal handling during waits wait_on_bit() with TASK_INTERRUPTIBLE returns 1 rather than a negative error code, so change what we check for. This means that the signal handling in fscache_wait_for_retrieval_activation() should now work properly. Without this, the following bug can be seen if CTRL-C is pressed during fscache read operation: FS-Cache: Assertion failed 2 == 3 is false ------------[ cut here ]------------ kernel BUG at fs/fscache/page.c:347! invalid opcode: 0000 [#1] SMP Modules linked in: cachefiles(F) nfsv4(F) nfsv3(F) nfsv2(F) nfs(F) fscache(F) auth_rpcgss(F) nfs_acl(F) lockd(F) sunrpc(F) CPU 1 Pid: 15006, comm: slurp-q Tainted: GF 3.7.0-rc8-fsdevel+ #411 /DG965RY RIP: 0010:[] [] fscache_wait_for_retrieval_activation+0x167/0x177 [fscache] RSP: 0018:ffff88002a4c39a8 EFLAGS: 00010292 RAX: 000000000000001a RBX: ffff88002d3dc158 RCX: 0000000000008685 RDX: ffffffff8102ccd6 RSI: 0000000000000001 RDI: ffffffff8102d1d6 RBP: ffff88002a4c39c8 R08: 0000000000000002 R09: 0000000000000000 R10: ffffffff8163afa0 R11: ffff88003bd11900 R12: ffffffffa00868c8 R13: ffff880028306458 R14: ffff88002d3dc1b0 R15: ffff88001372e538 FS: 00007f17426a0700(0000) GS:ffff88003bd00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 00007f1742494a44 CR3: 0000000031bd7000 CR4: 00000000000007e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process slurp-q (pid: 15006, threadinfo ffff88002a4c2000, task ffff880023de3040) Stack: ffff88002d3dc158 ffff88001372e538 ffff88002a4c3ab4 ffff8800283064e0 ffff88002a4c3a38 ffffffffa0080f6d 0000000000000000 ffff880023de3040 ffff88002a4c3ac8 ffffffff810ac8ae ffff880028306458 ffff88002a4c3bc8 Call Trace: [] __fscache_read_or_alloc_pages+0x24f/0x4bc [fscache] [] ? __alloc_pages_nodemask+0x195/0x75c [] __nfs_readpages_from_fscache+0x86/0x13d [nfs] [] nfs_readpages+0x186/0x1bd [nfs] [] ? alloc_pages_current+0xc7/0xe4 [] ? __page_cache_alloc+0x84/0x91 [] ? __do_page_cache_readahead+0xa6/0x2e0 [] __do_page_cache_readahead+0x237/0x2e0 [] ? __do_page_cache_readahead+0xa6/0x2e0 [] ra_submit+0x1c/0x20 [] ondemand_readahead+0x359/0x382 [] page_cache_sync_readahead+0x38/0x3a [] generic_file_aio_read+0x26b/0x637 [] ? nfs_mark_delegation_referenced+0xb/0xb [nfsv4] [] nfs_file_read+0xaa/0xcf [nfs] [] do_sync_read+0x91/0xd1 [] vfs_read+0x9b/0x144 [] sys_read+0x44/0x75 [] system_call_fastpath+0x16/0x1b Signed-off-by: David Howells --- fs/fscache/page.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/fscache') diff --git a/fs/fscache/page.c b/fs/fscache/page.c index f9b2fb3ae49..5b5d9081c8b 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -319,7 +319,7 @@ static int fscache_wait_for_retrieval_activation(struct fscache_object *object, fscache_stat(stat_op_waits); if (wait_on_bit(&op->op.flags, FSCACHE_OP_WAITING, fscache_wait_bit_interruptible, - TASK_INTERRUPTIBLE) < 0) { + TASK_INTERRUPTIBLE) != 0) { ret = fscache_cancel_op(&op->op); if (ret == 0) return -ERESTARTSYS; -- cgit v1.2.3 From 7ef001e937e8b9cbedb2fc1c31dd681ac3b31927 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 7 Dec 2012 10:41:26 +0000 Subject: FS-Cache: One of the write operation paths doesn't set the object state In fscache_write_op(), if the object is determined to have become inactive or to have lost its cookie, we don't move the operation state from in-progress, and so an assertion in fscache_put_operation() fails with an assertion (see below). Instrumenting fscache_op_work_func() indicates that it called fscache_write_op() before calling fscache_put_operation() - where the assertion failed. The assertion at line 433 indicates that the operation state is IN_PROGRESS rather than being COMPLETE or CANCELLED. Instrumenting fscache_write_op() showed that it was being called on an object that had had its cookie removed and that this was due to relinquishment of the cookie by the netfs. At this point fscache no longer has access to the pages of netfs data that were requested to be written, and so simply cancelling the operation is the thing to do. FS-Cache: Assertion failed 3 == 5 is false ------------[ cut here ]------------ kernel BUG at fs/fscache/operation.c:433! invalid opcode: 0000 [#1] SMP Modules linked in: cachefiles(F) nfsv4(F) nfsv3(F) nfsv2(F) nfs(F) fscache(F) auth_rpcgss(F) nfs_acl(F) lockd(F) sunrpc(F) CPU 0 Pid: 1035, comm: kworker/u:3 Tainted: GF 3.7.0-rc8-fsdevel+ #411 /DG965RY RIP: 0010:[] [] fscache_put_operation+0x11a/0x2ed [fscache] RSP: 0018:ffff88003e32bcf8 EFLAGS: 00010296 RAX: 000000000000000f RBX: ffff88001818eb78 RCX: ffffffff6c102000 RDX: ffffffff8102d1ad RSI: ffffffff6c102000 RDI: ffffffff8102d1d6 RBP: ffff88003e32bd18 R08: 0000000000000002 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffffffffa00811da R13: 0000000000000001 R14: 0000000100625d26 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff88003bc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 00007fff7dd31c68 CR3: 000000003d730000 CR4: 00000000000007f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process kworker/u:3 (pid: 1035, threadinfo ffff88003e32a000, task ffff88003bb38080) Stack: ffffffff8102d1ad ffff88001818eb78 ffffffffa00811da 0000000000000001 ffff88003e32bd48 ffffffffa007f0ad ffff88001818eb78 ffffffff819583c0 ffff88003df24e00 ffff88003882c3e0 ffff88003e32bde8 ffffffff81042de0 Call Trace: [] ? vprintk_emit+0x3c6/0x41a [] ? __fscache_read_or_alloc_pages+0x4bc/0x4bc [fscache] [] fscache_op_work_func+0xec/0x123 [fscache] [] process_one_work+0x21c/0x3b0 [] ? process_one_work+0x1be/0x3b0 [] ? fscache_operation_gc+0x23e/0x23e [fscache] [] worker_thread+0x202/0x2df [] ? rescuer_thread+0x18e/0x18e [] kthread+0xd0/0xd8 [] ? _raw_spin_unlock_irq+0x29/0x3e [] ? __init_kthread_worker+0x55/0x55 [] ret_from_fork+0x7c/0xb0 [] ? __init_kthread_worker+0x55/0x55 Signed-off-by: David Howells --- fs/fscache/page.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) (limited to 'fs/fscache') diff --git a/fs/fscache/page.c b/fs/fscache/page.c index 5b5d9081c8b..ef0218f5080 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -699,9 +699,27 @@ static void fscache_write_op(struct fscache_operation *_op) spin_lock(&object->lock); cookie = object->cookie; - if (!fscache_object_is_active(object) || !cookie) { + if (!fscache_object_is_active(object)) { + /* If we get here, then the on-disk cache object likely longer + * exists, so we should just cancel this write operation. + */ spin_unlock(&object->lock); - _leave(""); + op->op.state = FSCACHE_OP_ST_CANCELLED; + _leave(" [inactive]"); + return; + } + + if (!cookie) { + /* If we get here, then the cookie belonging to the object was + * detached, probably by the cookie being withdrawn due to + * memory pressure, which means that the pages we might write + * to the cache from no longer exist - therefore, we can just + * cancel this write operation. + */ + spin_unlock(&object->lock); + op->op.state = FSCACHE_OP_ST_CANCELLED; + _leave(" [cancel] op{f=%lx s=%u} obj{s=%u f=%lx}", + _op->flags, _op->state, object->state, object->flags); return; } -- cgit v1.2.3 From 1f372dff1da37e2b36ae9085368fa46896398598 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 13 Dec 2012 20:03:13 +0000 Subject: FS-Cache: Mark cancellation of in-progress operation Mark as cancelled an operation that is in progress rather than pending at the time it is cancelled, and call fscache_complete_op() to cancel an operation so that blocked ops can be started. Signed-off-by: David Howells --- fs/fscache/operation.c | 7 ++++--- fs/fscache/page.c | 10 +++++----- 2 files changed, 9 insertions(+), 8 deletions(-) (limited to 'fs/fscache') diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c index 9e6b7d232bb..36c59604130 100644 --- a/fs/fscache/operation.c +++ b/fs/fscache/operation.c @@ -363,9 +363,9 @@ void fscache_cancel_all_ops(struct fscache_object *object) } /* - * Record the completion of an in-progress operation. + * Record the completion or cancellation of an in-progress operation. */ -void fscache_op_complete(struct fscache_operation *op) +void fscache_op_complete(struct fscache_operation *op, bool cancelled) { struct fscache_object *object = op->object; @@ -380,7 +380,8 @@ void fscache_op_complete(struct fscache_operation *op) spin_lock(&object->lock); - op->state = FSCACHE_OP_ST_COMPLETE; + op->state = cancelled ? + FSCACHE_OP_ST_CANCELLED : FSCACHE_OP_ST_COMPLETE; if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags)) object->n_exclusive--; diff --git a/fs/fscache/page.c b/fs/fscache/page.c index ef0218f5080..8a92b9fabe8 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -171,7 +171,7 @@ static void fscache_attr_changed_op(struct fscache_operation *op) fscache_abort_object(object); } - fscache_op_complete(op); + fscache_op_complete(op, true); _leave(""); } @@ -704,7 +704,7 @@ static void fscache_write_op(struct fscache_operation *_op) * exists, so we should just cancel this write operation. */ spin_unlock(&object->lock); - op->op.state = FSCACHE_OP_ST_CANCELLED; + fscache_op_complete(&op->op, false); _leave(" [inactive]"); return; } @@ -717,7 +717,7 @@ static void fscache_write_op(struct fscache_operation *_op) * cancel this write operation. */ spin_unlock(&object->lock); - op->op.state = FSCACHE_OP_ST_CANCELLED; + fscache_op_complete(&op->op, false); _leave(" [cancel] op{f=%lx s=%u} obj{s=%u f=%lx}", _op->flags, _op->state, object->state, object->flags); return; @@ -755,7 +755,7 @@ static void fscache_write_op(struct fscache_operation *_op) fscache_end_page_write(object, page); if (ret < 0) { fscache_abort_object(object); - fscache_op_complete(&op->op); + fscache_op_complete(&op->op, true); } else { fscache_enqueue_operation(&op->op); } @@ -770,7 +770,7 @@ superseded: spin_unlock(&cookie->stores_lock); clear_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags); spin_unlock(&object->lock); - fscache_op_complete(&op->op); + fscache_op_complete(&op->op, true); _leave(""); } -- cgit v1.2.3 From 91c7fbbf63f33c77d8d28de624834a21888842bb Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Dec 2012 11:02:22 +0000 Subject: FS-Cache: Clear remaining page count on retrieval cancellation Provide fscache_cancel_op() with a pointer to a function it should invoke under lock if it cancels an operation. Use this to clear the remaining page count upon cancellation of a pending retrieval operation so that fscache_release_retrieval_op() doesn't get an assertion failure (see below). This can happen when a signal occurs, say from CTRL-C being pressed during data retrieval. FS-Cache: Assertion failed 3 == 0 is false ------------[ cut here ]------------ kernel BUG at fs/fscache/page.c:237! invalid opcode: 0000 [#641] SMP Modules linked in: cachefiles(F) nfsv4(F) nfsv3(F) nfsv2(F) nfs(F) fscache(F) auth_rpcgss(F) nfs_acl(F) lockd(F) sunrpc(F) CPU 0 Pid: 6075, comm: slurp-q Tainted: GF D 3.7.0-rc8-fsdevel+ #411 /DG965RY RIP: 0010:[] [] fscache_release_retrieval_op+0x75/0xff [fscache] RSP: 0000:ffff88001c6d7988 EFLAGS: 00010296 RAX: 000000000000000f RBX: ffff880014cdfe00 RCX: ffffffff6c102000 RDX: ffffffff8102d1ad RSI: ffffffff6c102000 RDI: ffffffff8102d1d6 RBP: ffff88001c6d7998 R08: 0000000000000002 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 00000000fffffe00 R13: ffff88001c6d7ab4 R14: ffff88001a8638a0 R15: ffff88001552b190 FS: 00007f877aaf0700(0000) GS:ffff88003bc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 00007fff11378fd2 CR3: 000000001c6c6000 CR4: 00000000000007f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process slurp-q (pid: 6075, threadinfo ffff88001c6d6000, task ffff88001c6c4080) Stack: ffffffffa007ec07 ffff880014cdfe00 ffff88001c6d79c8 ffffffffa007db4d ffffffffa007ec07 ffff880014cdfe00 00000000fffffe00 ffff88001c6d7ab4 ffff88001c6d7a38 ffffffffa008116d 0000000000000000 ffff88001c6c4080 Call Trace: [] ? fscache_cancel_op+0x194/0x1cf [fscache] [] fscache_put_operation+0x135/0x2ed [fscache] [] ? fscache_cancel_op+0x194/0x1cf [fscache] [] __fscache_read_or_alloc_pages+0x413/0x4bc [fscache] [] ? __alloc_pages_nodemask+0x195/0x75c [] __nfs_readpages_from_fscache+0x86/0x13d [nfs] [] nfs_readpages+0x186/0x1bd [nfs] [] ? alloc_pages_current+0xc7/0xe4 [] ? __page_cache_alloc+0x84/0x91 [] ? __do_page_cache_readahead+0xa6/0x2e0 [] __do_page_cache_readahead+0x237/0x2e0 [] ? __do_page_cache_readahead+0xa6/0x2e0 [] ra_submit+0x1c/0x20 [] ondemand_readahead+0x359/0x382 [] page_cache_sync_readahead+0x38/0x3a [] generic_file_aio_read+0x26b/0x637 [] ? nfs_mark_delegation_referenced+0xb/0xb [nfsv4] [] nfs_file_read+0xaa/0xcf [nfs] [] do_sync_read+0x91/0xd1 [] vfs_read+0x9b/0x144 [] sys_read+0x44/0x75 [] system_call_fastpath+0x16/0x1b Signed-off-by: David Howells --- fs/fscache/internal.h | 3 ++- fs/fscache/operation.c | 5 ++++- fs/fscache/page.c | 17 ++++++++++++++--- 3 files changed, 20 insertions(+), 5 deletions(-) (limited to 'fs/fscache') diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index 88a48ccb7d9..ee38fef4be5 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h @@ -121,7 +121,8 @@ extern int fscache_submit_exclusive_op(struct fscache_object *, struct fscache_operation *); extern int fscache_submit_op(struct fscache_object *, struct fscache_operation *); -extern int fscache_cancel_op(struct fscache_operation *); +extern int fscache_cancel_op(struct fscache_operation *, + void (*)(struct fscache_operation *)); extern void fscache_cancel_all_ops(struct fscache_object *); extern void fscache_abort_object(struct fscache_object *); extern void fscache_start_operations(struct fscache_object *); diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c index 36c59604130..762a9ec4ffa 100644 --- a/fs/fscache/operation.c +++ b/fs/fscache/operation.c @@ -298,7 +298,8 @@ void fscache_start_operations(struct fscache_object *object) /* * cancel an operation that's pending on an object */ -int fscache_cancel_op(struct fscache_operation *op) +int fscache_cancel_op(struct fscache_operation *op, + void (*do_cancel)(struct fscache_operation *)) { struct fscache_object *object = op->object; int ret; @@ -316,6 +317,8 @@ int fscache_cancel_op(struct fscache_operation *op) ASSERT(!list_empty(&op->pend_link)); fscache_stat(&fscache_n_op_cancelled); list_del_init(&op->pend_link); + if (do_cancel) + do_cancel(op); op->state = FSCACHE_OP_ST_CANCELLED; if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags)) object->n_exclusive--; diff --git a/fs/fscache/page.c b/fs/fscache/page.c index 8a92b9fabe8..ff000e52072 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -302,6 +302,17 @@ static int fscache_wait_for_deferred_lookup(struct fscache_cookie *cookie) return 0; } +/* + * Handle cancellation of a pending retrieval op + */ +static void fscache_do_cancel_retrieval(struct fscache_operation *_op) +{ + struct fscache_retrieval *op = + container_of(_op, struct fscache_retrieval, op); + + op->n_pages = 0; +} + /* * wait for an object to become active (or dead) */ @@ -320,7 +331,7 @@ static int fscache_wait_for_retrieval_activation(struct fscache_object *object, if (wait_on_bit(&op->op.flags, FSCACHE_OP_WAITING, fscache_wait_bit_interruptible, TASK_INTERRUPTIBLE) != 0) { - ret = fscache_cancel_op(&op->op); + ret = fscache_cancel_op(&op->op, fscache_do_cancel_retrieval); if (ret == 0) return -ERESTARTSYS; @@ -338,8 +349,8 @@ check_if_dead: return -ENOBUFS; } if (unlikely(fscache_object_is_dead(object))) { - pr_err("%s() = -ENOBUFS [obj dead %d]", __func__, op->op.state); - fscache_cancel_op(&op->op); + pr_err("%s() = -ENOBUFS [obj dead %d]\n", __func__, op->op.state); + fscache_cancel_op(&op->op, fscache_do_cancel_retrieval); fscache_stat(stat_object_dead); return -ENOBUFS; } -- cgit v1.2.3