Re: [patch 15/14] x86/dumpstack/64: Speedup in_exception_stack()

From: Josh Poimboeuf
Date: Tue Apr 02 2019 - 11:43:36 EST


On Tue, Apr 02, 2019 at 12:19:46PM +0200, Thomas Gleixner wrote:
> The current implementation of in_exception_stack() iterates over the
> exception stacks array. Most of the time this is an useless exercise, but
> even for the actual use cases (perf and ftrace) it takes at least 2
> iterations to get to the NMI stack.
>
> As the exception stacks and the guard pages are page aligned the loop can
> be avoided completely.
>
> Add a initial check whether the stack pointer is inside the full exception
> stack area and leave early if not.
>
> Create a lookup table which describes the stack area. The table index is
> the page offset from the beginning of the exception stacks. So for any
> given stack pointer the page offset is computed and a lookup in the
> description table is performed. If it is inside a guard page, return. If
> not, use the descriptor to fill in the info structure.
>
> The table is filled at compile time with nasty macro magic and for the
> !KASAN case the interesting page descriptors exactly fit into a single
> cache line. Just the last guard page descriptor is in the next cacheline,
> but that should not be accessed in the regular case.
>
> Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
> ---
> arch/x86/kernel/dumpstack_64.c | 97 +++++++++++++++++++++++++++--------------
> 1 file changed, 66 insertions(+), 31 deletions(-)
>
> --- a/arch/x86/kernel/dumpstack_64.c
> +++ b/arch/x86/kernel/dumpstack_64.c
> @@ -48,50 +48,85 @@ const char *stack_type_name(enum stack_t
> return NULL;
> }
>
> -struct estack_layout {
> - unsigned int begin;
> - unsigned int end;
> +#define ESTACK_S(st) \
> + (offsetof(struct cea_exception_stacks, st## _stack))
> +
> +#define ESTACK_E(st) \
> + (offsetof(struct cea_exception_stacks, st## _stack_guard))
> +
> +#define PAGENR(offs) ((offs) / PAGE_SIZE)
> +#define PAGERANGE(st) PAGENR(ESTACK_S(st)) ... PAGENR(ESTACK_E(st) - 1)
> +
> +#if EXCEPTION_STKSZ == PAGE_SIZE
> +# define CONDRANGE(st) PAGENR(ESTACK_S(st))
> +#else
> +# define CONDRANGE(st) PAGERANGE(st)
> +#endif
> +
> +/**
> + * struct estack_pages - Page descriptor for exception stacks
> + * @offs: Offset from the start of the exception stack area
> + * @size: Size of the exception stack
> + * @type: Type to store in the stack_info struct
> + */
> +struct estack_pages {
> + u32 offs;
> + u16 size;
> + u16 type;
> };
>
> -#define ESTACK_ENTRY(x) { \
> - .begin = offsetof(struct cea_exception_stacks, x## _stack), \
> - .end = offsetof(struct cea_exception_stacks, x## _stack_guard) \
> +#define ESTACK_PAGE(ist, est) { \
> + .offs = ESTACK_S(est), \
> + .size = ESTACK_E(est) - ESTACK_S(est), \
> + .type = STACK_TYPE_EXCEPTION + ist, \
> }
>
> -static const struct estack_layout layout[N_EXCEPTION_STACKS] = {
> - [ DOUBLEFAULT_IST ] = ESTACK_ENTRY(DF),
> - [ NMI_IST ] = ESTACK_ENTRY(NMI),
> - [ DEBUG_IST ] = ESTACK_ENTRY(DB),
> - [ MCE_IST ] = ESTACK_ENTRY(MCE),
> +#define ESTACK_PAGES (sizeof(struct cea_exception_stacks) / PAGE_SIZE)
> +
> +/*
> + * Array of exception stack page descriptors. If the stack is larger than
> + * PAGE_SIZE, all pages covering a particular stack will have the same
> + * info.
> + */
> +static const struct estack_pages estack_pages[ESTACK_PAGES] ____cacheline_aligned = {
> + [CONDRANGE(DF)] = ESTACK_PAGE(DOUBLEFAULT_IST, DF),
> + [CONDRANGE(NMI)] = ESTACK_PAGE(NMI_IST, NMI),
> + [PAGERANGE(DB)] = ESTACK_PAGE(DEBUG_IST, DB),
> + [CONDRANGE(MCE)] = ESTACK_PAGE(MCE_IST, MCE),

It would be nice if the *_IST macro naming aligned with the struct
cea_exception_stacks field naming. Then you could just do, e.g.
ESTACKPAGE(DF).

Also it's a bit unfortunate that some of the stack size knowledge is
hard-coded here, i.e #DB always being > 1 page and non-#DB being
sometimes 1 page.

> };
>
> static bool in_exception_stack(unsigned long *stack, struct stack_info *info)
> {
> - unsigned long estacks, begin, end, stk = (unsigned long)stack;
> + unsigned long begin, end, stk = (unsigned long)stack;
> + const struct estack_pages *ep;
> struct pt_regs *regs;
> unsigned int k;
>
> BUILD_BUG_ON(N_EXCEPTION_STACKS != 4);
>
> - estacks = (unsigned long)__this_cpu_read(cea_exception_stacks);
> -
> - for (k = 0; k < N_EXCEPTION_STACKS; k++) {
> - begin = estacks + layout[k].begin;
> - end = estacks + layout[k].end;
> - regs = (struct pt_regs *)end - 1;
> -
> - if (stk <= begin || stk >= end)
> - continue;
> -
> - info->type = STACK_TYPE_EXCEPTION + k;
> - info->begin = (unsigned long *)begin;
> - info->end = (unsigned long *)end;
> - info->next_sp = (unsigned long *)regs->sp;
> -
> - return true;
> - }
> -
> - return false;
> + begin = (unsigned long)__this_cpu_read(cea_exception_stacks);
> + end = begin + sizeof(struct cea_exception_stacks);
> + /* Bail if @stack is outside the exception stack area. */
> + if (stk <= begin || stk >= end)
> + return false;

This check is the most important piece. Exception stack dumps are quite
rare, so this ensures an early exit in most cases regardless of whether
there's a loop below.

> +
> + /* Calc page offset from start of exception stacks */
> + k = (stk - begin) >> PAGE_SHIFT;
> + /* Lookup the page descriptor */
> + ep = &estack_pages[k];
> + /* Guard page? */
> + if (unlikely(!ep->size))
> + return false;
> +
> + begin += (unsigned long)ep->offs;
> + end = begin + (unsigned long)ep->size;
> + regs = (struct pt_regs *)end - 1;
> +
> + info->type = ep->type;
> + info->begin = (unsigned long *)begin;
> + info->end = (unsigned long *)end;
> + info->next_sp = (unsigned long *)regs->sp;
> + return true;

With the above "(stk <= begin || stk >= end)" check, removing the loop
becomes not all that important since exception stack dumps are quite
rare and not performance sensitive. With all the macros this code
becomes a little more obtuse, so I'm not sure whether removal of the
loop is a net positive.

> }
>
> static bool in_irq_stack(unsigned long *stack, struct stack_info *info)

--
Josh