Skip to content

Commit 98229aa

Browse files
committed
x86/irq: Plug vector cleanup race
We still can end up with a stale vector due to the following: CPU0 CPU1 CPU2 lock_vector() data->move_in_progress=0 sendIPI() unlock_vector() set_affinity() assign_irq_vector() lock_vector() handle_IPI move_in_progress = 1 lock_vector() unlock_vector() move_in_progress == 1 So we need to serialize the vector assignment against a pending cleanup. The solution is rather simple now. We not only check for the move_in_progress flag in assign_irq_vector(), we also check whether there is still a cleanup pending in the old_domain cpumask. If so, we return -EBUSY to the caller and let him deal with it. Though we have to be careful in the cpu unplug case. If the cleanout has not yet completed then the following setaffinity() call would return -EBUSY. Add code which prevents this. Full context is here: http://lkml.kernel.org/r/[email protected] Reported-and-tested-by: Joe Lawrence <[email protected]> Signed-off-by: Thomas Gleixner <[email protected]> Tested-by: Borislav Petkov <[email protected]> Cc: Jiang Liu <[email protected]> Cc: Jeremiah Mahler <[email protected]> Cc: [email protected] Cc: Guenter Roeck <[email protected]> Cc: [email protected] #4.3+ Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Thomas Gleixner <[email protected]>
1 parent 90a2282 commit 98229aa

File tree

1 file changed

+53
-10
lines changed

1 file changed

+53
-10
lines changed

arch/x86/kernel/apic/vector.c

Lines changed: 53 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,12 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d,
120120
static int current_offset = VECTOR_OFFSET_START % 16;
121121
int cpu, vector;
122122

123-
if (d->move_in_progress)
123+
/*
124+
* If there is still a move in progress or the previous move has not
125+
* been cleaned up completely, tell the caller to come back later.
126+
*/
127+
if (d->move_in_progress ||
128+
cpumask_intersects(d->old_domain, cpu_online_mask))
124129
return -EBUSY;
125130

126131
/* Only try and allocate irqs on cpus that are present */
@@ -259,7 +264,12 @@ static void clear_irq_vector(int irq, struct apic_chip_data *data)
259264
data->cfg.vector = 0;
260265
cpumask_clear(data->domain);
261266

262-
if (likely(!data->move_in_progress))
267+
/*
268+
* If move is in progress or the old_domain mask is not empty,
269+
* i.e. the cleanup IPI has not been processed yet, we need to remove
270+
* the old references to desc from all cpus vector tables.
271+
*/
272+
if (!data->move_in_progress && cpumask_empty(data->old_domain))
263273
return;
264274

265275
desc = irq_to_desc(irq);
@@ -579,12 +589,25 @@ asmlinkage __visible void smp_irq_move_cleanup_interrupt(void)
579589
goto unlock;
580590

581591
/*
582-
* Check if the irq migration is in progress. If so, we
583-
* haven't received the cleanup request yet for this irq.
592+
* Nothing to cleanup if irq migration is in progress
593+
* or this cpu is not set in the cleanup mask.
584594
*/
585-
if (data->move_in_progress)
595+
if (data->move_in_progress ||
596+
!cpumask_test_cpu(me, data->old_domain))
586597
goto unlock;
587598

599+
/*
600+
* We have two cases to handle here:
601+
* 1) vector is unchanged but the target mask got reduced
602+
* 2) vector and the target mask has changed
603+
*
604+
* #1 is obvious, but in #2 we have two vectors with the same
605+
* irq descriptor: the old and the new vector. So we need to
606+
* make sure that we only cleanup the old vector. The new
607+
* vector has the current @vector number in the config and
608+
* this cpu is part of the target mask. We better leave that
609+
* one alone.
610+
*/
588611
if (vector == data->cfg.vector &&
589612
cpumask_test_cpu(me, data->domain))
590613
goto unlock;
@@ -602,6 +625,7 @@ asmlinkage __visible void smp_irq_move_cleanup_interrupt(void)
602625
goto unlock;
603626
}
604627
__this_cpu_write(vector_irq[vector], VECTOR_UNUSED);
628+
cpumask_clear_cpu(me, data->old_domain);
605629
unlock:
606630
raw_spin_unlock(&desc->lock);
607631
}
@@ -645,13 +669,32 @@ void irq_force_complete_move(struct irq_desc *desc)
645669
__irq_complete_move(cfg, cfg->vector);
646670

647671
/*
648-
* Remove this cpu from the cleanup mask. The IPI might have been sent
649-
* just before the cpu was removed from the offline mask, but has not
650-
* been processed because the CPU has interrupts disabled and is on
651-
* the way out.
672+
* This is tricky. If the cleanup of @data->old_domain has not been
673+
* done yet, then the following setaffinity call will fail with
674+
* -EBUSY. This can leave the interrupt in a stale state.
675+
*
676+
* The cleanup cannot make progress because we hold @desc->lock. So in
677+
* case @data->old_domain is not yet cleaned up, we need to drop the
678+
* lock and acquire it again. @desc cannot go away, because the
679+
* hotplug code holds the sparse irq lock.
652680
*/
653681
raw_spin_lock(&vector_lock);
654-
cpumask_clear_cpu(smp_processor_id(), data->old_domain);
682+
/* Clean out all offline cpus (including ourself) first. */
683+
cpumask_and(data->old_domain, data->old_domain, cpu_online_mask);
684+
while (!cpumask_empty(data->old_domain)) {
685+
raw_spin_unlock(&vector_lock);
686+
raw_spin_unlock(&desc->lock);
687+
cpu_relax();
688+
raw_spin_lock(&desc->lock);
689+
/*
690+
* Reevaluate apic_chip_data. It might have been cleared after
691+
* we dropped @desc->lock.
692+
*/
693+
data = apic_chip_data(irqdata);
694+
if (!data)
695+
return;
696+
raw_spin_lock(&vector_lock);
697+
}
655698
raw_spin_unlock(&vector_lock);
656699
}
657700
#endif

0 commit comments

Comments
 (0)