This series of patches introduces the facility to deliver only fatal
signals to tasks which are otherwise waiting uninterruptibly.
-
This is pretty nice I think. It also is a significant piece of
infrastructure required to fix some of the main oom kill deadlocks.
-
Abstracting away direct uses of TASK_ flags allows us to change the
definitions of the task flags more easily.Also restructure do_wait() a little
Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
---
arch/ia64/kernel/perfmon.c | 4 +-
fs/proc/array.c | 9 +---
fs/proc/base.c | 2 +-
include/linux/sched.h | 15 +++++++
include/linux/wait.h | 11 +++--
kernel/exit.c | 90 +++++++++++++++++++------------------------
kernel/power/process.c | 7 +--
kernel/ptrace.c | 8 ++--
kernel/sched.c | 15 +++----
kernel/signal.c | 6 +-
kernel/wait.c | 2 +-
11 files changed, 83 insertions(+), 86 deletions(-)diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index f55fa07..6b0a6cf 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -2630,7 +2630,7 @@ pfm_task_incompatible(pfm_context_t *ctx, struct task_struct *task)
*/
if (task == current) return 0;- if ((task->state != TASK_STOPPED) && (task->state != TASK_TRACED)) {
+ if (!is_task_stopped_or_traced(task)) {
DPRINT(("cannot attach to non-stopped task [%d] state=%ld\n", task->pid, task->state));
return -EBUSY;
}
@@ -4790,7 +4790,7 @@ recheck:
* the task must be stopped.
*/
if (PFM_CMD_STOPPED(cmd)) {
- if ((task->state != TASK_STOPPED) && (task->state != TASK_TRACED)) {
+ if (!is_task_stopped_or_traced(task)) {
DPRINT(("[%d] task not in stopped state\n", task->pid));
return -EBUSY;
}
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 27b59f5..8939bf0 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -140,13 +140,8 @@ static const char *task_state_array[] = {static inline const char *get_task_state(struct task_struct *tsk)
{
- unsigned int state = (tsk->state & (TASK_RUNNING |
- TASK_INTERRUPTIBLE |
- TASK_UNINTERRUPTIBLE |
- TASK...
I think it would be nicer if you made it explicit in the name that
these are not individual flags. Maybe it doesn't matter though...Also, TASK_NORMAL / TASK_ALL aren't very good names. TASK_SLEEP_NORMAL
TASK_SLEEP_ALL might be a bit more helpful?
-
Set TASK_WAKEKILL for TASK_STOPPED and TASK_TRACED, add TASK_KILLABLE and
use TASK_WAKEKILL in signal_wake_up()Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
---
include/linux/sched.h | 22 ++++++++++++++--------
kernel/signal.c | 8 ++++----
2 files changed, 18 insertions(+), 12 deletions(-)diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5ef5253..f02ade4 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -169,27 +169,33 @@ print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
#define TASK_RUNNING 0
#define TASK_INTERRUPTIBLE 1
#define TASK_UNINTERRUPTIBLE 2
-#define TASK_STOPPED 4
-#define TASK_TRACED 8
+#define __TASK_STOPPED 4
+#define __TASK_TRACED 8
/* in tsk->exit_state */
#define EXIT_ZOMBIE 16
#define EXIT_DEAD 32
/* in tsk->state again */
#define TASK_DEAD 64
+#define TASK_WAKEKILL 128
+
+/* Convenience macros for the sake of set_task_state */
+#define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE)
+#define TASK_STOPPED (TASK_WAKEKILL | __TASK_STOPPED)
+#define TASK_TRACED (TASK_WAKEKILL | __TASK_TRACED)/* Convenience macros for the sake of wake_up */
#define TASK_NORMAL (TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE)
-#define TASK_ALL (TASK_NORMAL | TASK_STOPPED | TASK_TRACED)
+#define TASK_ALL (TASK_NORMAL | __TASK_STOPPED | __TASK_TRACED)/* get_task_state() */
#define TASK_REPORT (TASK_RUNNING | TASK_INTERRUPTIBLE | \
- TASK_UNINTERRUPTIBLE | TASK_STOPPED | \
- TASK_TRACED)
+ TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \
+ __TASK_TRACED)-#define is_task_traced(task) ((task->state & TASK_TRACED) != 0)
-#define is_task_stopped(task) ((task->state & TASK_STOPPED) != 0)
+#define is_task_traced(task) ((task->state & __TASK_TRACED) != 0)
+#define is_task_stopped(task) ((task->state & __TASK_STOPPED) != 0)
#define is_task_stopped_or_traced(task) \
- ((task->state & (TASK_STOPPED | TASK_TRACED)) ...
and associated infrastructure such as sync_page_killable and
fatal_signal_pending. Use lock_page_killable in do_generic_mapping_read()
to allow us to kill `cat' of a file on an NFS-mounted filesystem.Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
---
include/linux/pagemap.h | 14 ++++++++++++++
include/linux/sched.h | 9 ++++++++-
kernel/signal.c | 5 +++++
mm/filemap.c | 25 +++++++++++++++++++++----
4 files changed, 48 insertions(+), 5 deletions(-)diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index db8a410..4b62a10 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -157,6 +157,7 @@ static inline pgoff_t linear_page_index(struct vm_area_struct *vma,
}extern void FASTCALL(__lock_page(struct page *page));
+extern int FASTCALL(__lock_page_killable(struct page *page));
extern void FASTCALL(__lock_page_nosync(struct page *page));
extern void FASTCALL(unlock_page(struct page *page));@@ -171,6 +172,19 @@ static inline void lock_page(struct page *page)
}/*
+ * lock_page_killable is like lock_page but can be interrupted by fatal
+ * signals. It returns 0 if it locked the page and -EINTR if it was
+ * killed while waiting.
+ */
+static inline int lock_page_killable(struct page *page)
+{
+ might_sleep();
+ if (TestSetPageLocked(page))
+ return __lock_page_killable(page);
+ return 0;
+}
+
+/*
* lock_page_nosync should only be used if we can't pin the page's inode.
* Doesn't play quite so well with block device plugging.
*/
diff --git a/include/linux/sched.h b/include/linux/sched.h
index f02ade4..077893d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1738,7 +1738,14 @@ static inline int signal_pending(struct task_struct *p)
{
return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING));
}
-
+
+extern int FASTCALL(__fatal_signal_pending(struct task_struct *p));
+
+static inline int fatal_signal_pending(struct task_struct *p)
+{
+ return sign...
Use TASK_KILLABLE to allow wait_on_retry_sync_kiocb to return -EINTR.
All callers then check the return value and break out of their loops.Signed-off-by: Matthew Wilcox <matthew@wil.cx>
---
fs/read_write.c | 17 ++++++++++++-----
1 files changed, 12 insertions(+), 5 deletions(-)diff --git a/fs/read_write.c b/fs/read_write.c
index 124693e..3196a3b 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -218,14 +218,15 @@ Einval:
return -EINVAL;
}-static void wait_on_retry_sync_kiocb(struct kiocb *iocb)
+static int wait_on_retry_sync_kiocb(struct kiocb *iocb)
{
- set_current_state(TASK_UNINTERRUPTIBLE);
+ set_current_state(TASK_KILLABLE);
if (!kiocbIsKicked(iocb))
schedule();
else
kiocbClearKicked(iocb);
__set_current_state(TASK_RUNNING);
+ return fatal_signal_pending(current) ? -EINTR : 0;
}ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
@@ -242,7 +243,9 @@ ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *pp
ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos);
if (ret != -EIOCBRETRY)
break;
- wait_on_retry_sync_kiocb(&kiocb);
+ ret = wait_on_retry_sync_kiocb(&kiocb);
+ if (ret)
+ break;
}if (-EIOCBQUEUED == ret)
@@ -300,7 +303,9 @@ ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, lof
ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos);
if (ret != -EIOCBRETRY)
break;
- wait_on_retry_sync_kiocb(&kiocb);
+ ret = wait_on_retry_sync_kiocb(&kiocb);
+ if (ret)
+ break;
}if (-EIOCBQUEUED == ret)
@@ -466,7 +471,9 @@ ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov,
ret = fn(&kiocb, iov, nr_segs, kiocb.ki_pos);
if (ret != -EIOCBRETRY)
break;
- wait_on_retry_sync_kiocb(&kiocb);
+ ret = wait_on_retry_sync_kiocb(&kiocb);
+ if (ret)
+ break;
}if (ret == -EIOCBQUEUED)
--
1.4.4.2-
...
Although the EINTR never gets to userspace anyway, is there a good
reason why the last patch for do_generic_mapping_read doesn't pass
back -EINTR?
-
Replace the uses of __wake_up_locked with wake_up_locked
Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
---
fs/eventpoll.c | 11 ++++-------
1 files changed, 4 insertions(+), 7 deletions(-)diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 77b9953..72e4cb4 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -657,8 +657,7 @@ is_linked:
* wait list.
*/
if (waitqueue_active(&ep->wq))
- __wake_up_locked(&ep->wq, TASK_UNINTERRUPTIBLE |
- TASK_INTERRUPTIBLE);
+ wake_up_locked(&ep->wq);
if (waitqueue_active(&ep->poll_wait))
pwake++;@@ -781,7 +780,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
/* Notify waiting tasks that events are available */
if (waitqueue_active(&ep->wq))
- __wake_up_locked(&ep->wq, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE);
+ wake_up_locked(&ep->wq);
if (waitqueue_active(&ep->poll_wait))
pwake++;
}
@@ -855,8 +854,7 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even/* Notify waiting tasks that events are available */
if (waitqueue_active(&ep->wq))
- __wake_up_locked(&ep->wq, TASK_UNINTERRUPTIBLE |
- TASK_INTERRUPTIBLE);
+ wake_up_locked(&ep->wq);
if (waitqueue_active(&ep->poll_wait))
pwake++;
}
@@ -979,8 +977,7 @@ errxit:
* wait list (delayed after we release the lock).
*/
if (waitqueue_active(&ep->wq))
- __wake_up_locked(&ep->wq, TASK_UNINTERRUPTIBLE |
- TASK_INTERRUPTIBLE);
+ wake_up_locked(&ep->wq);
if (waitqueue_active(&ep->poll_wait))
pwake++;
}
--
1.4.4.2-
On Thu, 18 Oct 2007 18:25:58 -0400
Matthew Wilcox <matthew@wil.cx> wrote:Have you tested this patch with LOCKDEP enabled? eventpoll is... tricky
in what it does with waitqueues and locks.... and some of this stuff is
there, afaik, to deal with that. You're now changing this ... call me
chicken :)
-
I haven't tested it, but it's a simple textual substitution:
#define wake_up_locked(x) __wake_up_locked((x),
TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE)so it should be identical in effect.
--
Intel are signing my paycheques ... these opinions are still mine
"Bill, look, we understand that you're interested in selling us this
operating system, but compare it to ours. We can't possibly take such
a retrograde step."
-
| Ingo Molnar | [patch 12/13] syslets: x86: optimized copy_uatom() |
| Greg Kroah-Hartman | [PATCH 017/196] aoechr: Convert from class_device to device |
| Yinghai Lu | Re: 2.6.26, PAT and AMD family 6 |
| Jan Engelhardt | intel iommu (Re: -mm merge plans for 2.6.23) |
git: | |
| Gerrit Renker | [PATCH 27/37] dccp: Integration of dynamic feature activation - part 2 (server side) |
| David Miller | [GIT]: Networking |
| David Miller | Re: [PATCH] pkt_sched: Destroy gen estimators under rtnl_lock(). |
| Natalie Protasevich | [BUG] New Kernel Bugs |
