Skip to content

Commit 1569345

Browse files
committed
tracing/ring-buffer: Move poll wake ups into ring buffer code
Move the logic to wake up on ring buffer data into the ring buffer code itself. This simplifies the tracing code a lot and also has the added benefit that waiters on one of the instance buffers can be woken only when data is added to that instance instead of data added to any instance. Signed-off-by: Steven Rostedt <[email protected]>
1 parent b627344 commit 1569345

File tree

3 files changed

+164
-71
lines changed

3 files changed

+164
-71
lines changed

include/linux/ring_buffer.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#include <linux/kmemcheck.h>
55
#include <linux/mm.h>
66
#include <linux/seq_file.h>
7+
#include <linux/poll.h>
78

89
struct ring_buffer;
910
struct ring_buffer_iter;
@@ -96,6 +97,11 @@ __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *k
9697
__ring_buffer_alloc((size), (flags), &__key); \
9798
})
9899

100+
void ring_buffer_wait(struct ring_buffer *buffer, int cpu);
101+
int ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu,
102+
struct file *filp, poll_table *poll_table);
103+
104+
99105
#define RING_BUFFER_ALL_CPUS -1
100106

101107
void ring_buffer_free(struct ring_buffer *buffer);

kernel/trace/ring_buffer.c

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include <linux/trace_clock.h>
99
#include <linux/trace_seq.h>
1010
#include <linux/spinlock.h>
11+
#include <linux/irq_work.h>
1112
#include <linux/debugfs.h>
1213
#include <linux/uaccess.h>
1314
#include <linux/hardirq.h>
@@ -442,6 +443,12 @@ int ring_buffer_print_page_header(struct trace_seq *s)
442443
return ret;
443444
}
444445

446+
struct rb_irq_work {
447+
struct irq_work work;
448+
wait_queue_head_t waiters;
449+
bool waiters_pending;
450+
};
451+
445452
/*
446453
* head_page == tail_page && head == tail then buffer is empty.
447454
*/
@@ -476,6 +483,8 @@ struct ring_buffer_per_cpu {
476483
struct list_head new_pages; /* new pages to add */
477484
struct work_struct update_pages_work;
478485
struct completion update_done;
486+
487+
struct rb_irq_work irq_work;
479488
};
480489

481490
struct ring_buffer {
@@ -495,6 +504,8 @@ struct ring_buffer {
495504
struct notifier_block cpu_notify;
496505
#endif
497506
u64 (*clock)(void);
507+
508+
struct rb_irq_work irq_work;
498509
};
499510

500511
struct ring_buffer_iter {
@@ -506,6 +517,118 @@ struct ring_buffer_iter {
506517
u64 read_stamp;
507518
};
508519

520+
/*
521+
* rb_wake_up_waiters - wake up tasks waiting for ring buffer input
522+
*
523+
* Schedules a delayed work to wake up any task that is blocked on the
524+
* ring buffer waiters queue.
525+
*/
526+
static void rb_wake_up_waiters(struct irq_work *work)
527+
{
528+
struct rb_irq_work *rbwork = container_of(work, struct rb_irq_work, work);
529+
530+
wake_up_all(&rbwork->waiters);
531+
}
532+
533+
/**
534+
* ring_buffer_wait - wait for input to the ring buffer
535+
* @buffer: buffer to wait on
536+
* @cpu: the cpu buffer to wait on
537+
*
538+
* If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon
539+
* as data is added to any of the @buffer's cpu buffers. Otherwise
540+
* it will wait for data to be added to a specific cpu buffer.
541+
*/
542+
void ring_buffer_wait(struct ring_buffer *buffer, int cpu)
543+
{
544+
struct ring_buffer_per_cpu *cpu_buffer;
545+
DEFINE_WAIT(wait);
546+
struct rb_irq_work *work;
547+
548+
/*
549+
* Depending on what the caller is waiting for, either any
550+
* data in any cpu buffer, or a specific buffer, put the
551+
* caller on the appropriate wait queue.
552+
*/
553+
if (cpu == RING_BUFFER_ALL_CPUS)
554+
work = &buffer->irq_work;
555+
else {
556+
cpu_buffer = buffer->buffers[cpu];
557+
work = &cpu_buffer->irq_work;
558+
}
559+
560+
561+
prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE);
562+
563+
/*
564+
* The events can happen in critical sections where
565+
* checking a work queue can cause deadlocks.
566+
* After adding a task to the queue, this flag is set
567+
* only to notify events to try to wake up the queue
568+
* using irq_work.
569+
*
570+
* We don't clear it even if the buffer is no longer
571+
* empty. The flag only causes the next event to run
572+
* irq_work to do the work queue wake up. The worse
573+
* that can happen if we race with !trace_empty() is that
574+
* an event will cause an irq_work to try to wake up
575+
* an empty queue.
576+
*
577+
* There's no reason to protect this flag either, as
578+
* the work queue and irq_work logic will do the necessary
579+
* synchronization for the wake ups. The only thing
580+
* that is necessary is that the wake up happens after
581+
* a task has been queued. It's OK for spurious wake ups.
582+
*/
583+
work->waiters_pending = true;
584+
585+
if ((cpu == RING_BUFFER_ALL_CPUS && ring_buffer_empty(buffer)) ||
586+
(cpu != RING_BUFFER_ALL_CPUS && ring_buffer_empty_cpu(buffer, cpu)))
587+
schedule();
588+
589+
finish_wait(&work->waiters, &wait);
590+
}
591+
592+
/**
593+
* ring_buffer_poll_wait - poll on buffer input
594+
* @buffer: buffer to wait on
595+
* @cpu: the cpu buffer to wait on
596+
* @filp: the file descriptor
597+
* @poll_table: The poll descriptor
598+
*
599+
* If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon
600+
* as data is added to any of the @buffer's cpu buffers. Otherwise
601+
* it will wait for data to be added to a specific cpu buffer.
602+
*
603+
* Returns POLLIN | POLLRDNORM if data exists in the buffers,
604+
* zero otherwise.
605+
*/
606+
int ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu,
607+
struct file *filp, poll_table *poll_table)
608+
{
609+
struct ring_buffer_per_cpu *cpu_buffer;
610+
struct rb_irq_work *work;
611+
612+
if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) ||
613+
(cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu)))
614+
return POLLIN | POLLRDNORM;
615+
616+
if (cpu == RING_BUFFER_ALL_CPUS)
617+
work = &buffer->irq_work;
618+
else {
619+
cpu_buffer = buffer->buffers[cpu];
620+
work = &cpu_buffer->irq_work;
621+
}
622+
623+
work->waiters_pending = true;
624+
poll_wait(filp, &work->waiters, poll_table);
625+
626+
if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) ||
627+
(cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu)))
628+
return POLLIN | POLLRDNORM;
629+
return 0;
630+
}
631+
509632
/* buffer may be either ring_buffer or ring_buffer_per_cpu */
510633
#define RB_WARN_ON(b, cond) \
511634
({ \
@@ -1061,6 +1184,7 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int nr_pages, int cpu)
10611184
cpu_buffer->lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10621185
INIT_WORK(&cpu_buffer->update_pages_work, update_pages_handler);
10631186
init_completion(&cpu_buffer->update_done);
1187+
init_irq_work(&cpu_buffer->irq_work.work, rb_wake_up_waiters);
10641188

10651189
bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
10661190
GFP_KERNEL, cpu_to_node(cpu));
@@ -1156,6 +1280,8 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
11561280
buffer->clock = trace_clock_local;
11571281
buffer->reader_lock_key = key;
11581282

1283+
init_irq_work(&buffer->irq_work.work, rb_wake_up_waiters);
1284+
11591285
/* need at least two pages */
11601286
if (nr_pages < 2)
11611287
nr_pages = 2;
@@ -2610,6 +2736,22 @@ static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
26102736
rb_end_commit(cpu_buffer);
26112737
}
26122738

2739+
static __always_inline void
2740+
rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
2741+
{
2742+
if (buffer->irq_work.waiters_pending) {
2743+
buffer->irq_work.waiters_pending = false;
2744+
/* irq_work_queue() supplies it's own memory barriers */
2745+
irq_work_queue(&buffer->irq_work.work);
2746+
}
2747+
2748+
if (cpu_buffer->irq_work.waiters_pending) {
2749+
cpu_buffer->irq_work.waiters_pending = false;
2750+
/* irq_work_queue() supplies it's own memory barriers */
2751+
irq_work_queue(&cpu_buffer->irq_work.work);
2752+
}
2753+
}
2754+
26132755
/**
26142756
* ring_buffer_unlock_commit - commit a reserved
26152757
* @buffer: The buffer to commit to
@@ -2629,6 +2771,8 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer,
26292771

26302772
rb_commit(cpu_buffer, event);
26312773

2774+
rb_wakeups(buffer, cpu_buffer);
2775+
26322776
trace_recursive_unlock();
26332777

26342778
preempt_enable_notrace();
@@ -2801,6 +2945,8 @@ int ring_buffer_write(struct ring_buffer *buffer,
28012945

28022946
rb_commit(cpu_buffer, event);
28032947

2948+
rb_wakeups(buffer, cpu_buffer);
2949+
28042950
ret = 0;
28052951
out:
28062952
preempt_enable_notrace();

kernel/trace/trace.c

Lines changed: 12 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
#include <linux/seq_file.h>
2020
#include <linux/notifier.h>
2121
#include <linux/irqflags.h>
22-
#include <linux/irq_work.h>
2322
#include <linux/debugfs.h>
2423
#include <linux/pagemap.h>
2524
#include <linux/hardirq.h>
@@ -86,14 +85,6 @@ static int dummy_set_flag(u32 old_flags, u32 bit, int set)
8685
*/
8786
static DEFINE_PER_CPU(bool, trace_cmdline_save);
8887

89-
/*
90-
* When a reader is waiting for data, then this variable is
91-
* set to true.
92-
*/
93-
static bool trace_wakeup_needed;
94-
95-
static struct irq_work trace_work_wakeup;
96-
9788
/*
9889
* Kill all tracing for good (never come back).
9990
* It is initialized to 1 but will turn to zero if the initialization
@@ -334,28 +325,12 @@ static inline void trace_access_lock_init(void)
334325

335326
#endif
336327

337-
/* trace_wait is a waitqueue for tasks blocked on trace_poll */
338-
static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
339-
340328
/* trace_flags holds trace_options default values */
341329
unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
342330
TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
343331
TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |
344332
TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS;
345333

346-
/**
347-
* trace_wake_up - wake up tasks waiting for trace input
348-
*
349-
* Schedules a delayed work to wake up any task that is blocked on the
350-
* trace_wait queue. These is used with trace_poll for tasks polling the
351-
* trace.
352-
*/
353-
static void trace_wake_up(struct irq_work *work)
354-
{
355-
wake_up_all(&trace_wait);
356-
357-
}
358-
359334
/**
360335
* tracing_on - enable tracing buffers
361336
*
@@ -763,36 +738,11 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
763738

764739
static void default_wait_pipe(struct trace_iterator *iter)
765740
{
766-
DEFINE_WAIT(wait);
767-
768-
prepare_to_wait(&trace_wait, &wait, TASK_INTERRUPTIBLE);
769-
770-
/*
771-
* The events can happen in critical sections where
772-
* checking a work queue can cause deadlocks.
773-
* After adding a task to the queue, this flag is set
774-
* only to notify events to try to wake up the queue
775-
* using irq_work.
776-
*
777-
* We don't clear it even if the buffer is no longer
778-
* empty. The flag only causes the next event to run
779-
* irq_work to do the work queue wake up. The worse
780-
* that can happen if we race with !trace_empty() is that
781-
* an event will cause an irq_work to try to wake up
782-
* an empty queue.
783-
*
784-
* There's no reason to protect this flag either, as
785-
* the work queue and irq_work logic will do the necessary
786-
* synchronization for the wake ups. The only thing
787-
* that is necessary is that the wake up happens after
788-
* a task has been queued. It's OK for spurious wake ups.
789-
*/
790-
trace_wakeup_needed = true;
791-
792-
if (trace_empty(iter))
793-
schedule();
741+
/* Iterators are static, they should be filled or empty */
742+
if (trace_buffer_iter(iter, iter->cpu_file))
743+
return;
794744

795-
finish_wait(&trace_wait, &wait);
745+
ring_buffer_wait(iter->tr->buffer, iter->cpu_file);
796746
}
797747

798748
/**
@@ -1262,11 +1212,6 @@ void
12621212
__buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
12631213
{
12641214
__this_cpu_write(trace_cmdline_save, true);
1265-
if (trace_wakeup_needed) {
1266-
trace_wakeup_needed = false;
1267-
/* irq_work_queue() supplies it's own memory barriers */
1268-
irq_work_queue(&trace_work_wakeup);
1269-
}
12701215
ring_buffer_unlock_commit(buffer, event);
12711216
}
12721217

@@ -3557,21 +3502,18 @@ static int tracing_release_pipe(struct inode *inode, struct file *file)
35573502
static unsigned int
35583503
trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
35593504
{
3560-
if (trace_flags & TRACE_ITER_BLOCK) {
3505+
/* Iterators are static, they should be filled or empty */
3506+
if (trace_buffer_iter(iter, iter->cpu_file))
3507+
return POLLIN | POLLRDNORM;
3508+
3509+
if (trace_flags & TRACE_ITER_BLOCK)
35613510
/*
35623511
* Always select as readable when in blocking mode
35633512
*/
35643513
return POLLIN | POLLRDNORM;
3565-
} else {
3566-
if (!trace_empty(iter))
3567-
return POLLIN | POLLRDNORM;
3568-
trace_wakeup_needed = true;
3569-
poll_wait(filp, &trace_wait, poll_table);
3570-
if (!trace_empty(iter))
3571-
return POLLIN | POLLRDNORM;
3572-
3573-
return 0;
3574-
}
3514+
else
3515+
return ring_buffer_poll_wait(iter->tr->buffer, iter->cpu_file,
3516+
filp, poll_table);
35753517
}
35763518

35773519
static unsigned int
@@ -5701,7 +5643,6 @@ __init static int tracer_alloc_buffers(void)
57015643
#endif
57025644

57035645
trace_init_cmdlines();
5704-
init_irq_work(&trace_work_wakeup, trace_wake_up);
57055646

57065647
register_tracer(&nop_trace);
57075648

0 commit comments

Comments
 (0)