Re: [PATCH v2] x86/asm: fix assembly constraints in bitops
From: Alexander Potapenko
Date: Tue Apr 02 2019 - 07:33:21 EST
On Tue, Apr 2, 2019 at 1:28 PM Alexander Potapenko <glider@xxxxxxxxxx> wrote:
>
> 1. Use memory clobber in bitops that touch arbitrary memory
>
> Certain bit operations that read/write bits take a base pointer and an
> arbitrarily large offset to address the bit relative to that base.
> Inline assembly constraints aren't expressive enough to tell the
> compiler that the assembly directive is going to touch a specific memory
> location of unknown size, therefore we have to use the "memory" clobber
> to indicate that the assembly is going to access memory locations other
> than those listed in the inputs/outputs.
> To indicate that BTR/BTS instructions don't necessarily touch the first
> sizeof(long) bytes of the argument, we also move the address to assembly
> inputs.
>
> This particular change leads to size increase of 124 kernel functions in
> a defconfig build. For some of them the diff is in NOP operations, other
> end up re-reading values from memory and may potentially slow down the
> execution. But without these clobbers the compiler is free to cache
> the contents of the bitmaps and use them as if they weren't changed by
> the inline assembly.
>
> 2. Use byte-sized arguments for operations touching single bytes.
>
> Passing a long value to ANDB/ORB/XORB instructions makes the compiler
> treat sizeof(long) bytes as being clobbered, which isn't the case. This
> may theoretically lead to worse code in the case of heavy optimization.
>
> Signed-off-by: Alexander Potapenko <glider@xxxxxxxxxx>
> Cc: Dmitry Vyukov <dvyukov@xxxxxxxxxx>
> Cc: Paul E. McKenney <paulmck@xxxxxxxxxxxxx>
> Cc: H. Peter Anvin <hpa@xxxxxxxxx>
> Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
> Cc: James Y Knight <jyknight@xxxxxxxxxx>
> ---
> v2:
> -- renamed the patch
> -- addressed comment by Peter Zijlstra: don't use "+m" for functions
> returning void
> -- fixed input types for operations touching single bytes
> ---
> arch/x86/include/asm/bitops.h | 41 +++++++++++++++--------------------
> 1 file changed, 18 insertions(+), 23 deletions(-)
>
> diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
> index d153d570bb04..c0cb9c5d3476 100644
> --- a/arch/x86/include/asm/bitops.h
> +++ b/arch/x86/include/asm/bitops.h
> @@ -36,16 +36,17 @@
> * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
> */
>
> -#define BITOP_ADDR(x) "+m" (*(volatile long *) (x))
> +#define RLONG_ADDR(x) "m" (*(volatile long *) (x))
> +#define WBYTE_ADDR(x) "+m" (*(volatile char *) (x))
>
> -#define ADDR BITOP_ADDR(addr)
> +#define ADDR RLONG_ADDR(addr)
>
> /*
> * We do the locked ops that don't return the old value as
> * a mask operation on a byte.
> */
> #define IS_IMMEDIATE(nr) (__builtin_constant_p(nr))
> -#define CONST_MASK_ADDR(nr, addr) BITOP_ADDR((void *)(addr) + ((nr)>>3))
> +#define CONST_MASK_ADDR(nr, addr) WBYTE_ADDR((void *)(addr) + ((nr)>>3))
> #define CONST_MASK(nr) (1 << ((nr) & 7))
>
> /**
> @@ -73,7 +74,7 @@ set_bit(long nr, volatile unsigned long *addr)
> : "memory");
> } else {
> asm volatile(LOCK_PREFIX __ASM_SIZE(bts) " %1,%0"
> - : BITOP_ADDR(addr) : "Ir" (nr) : "memory");
> + : : RLONG_ADDR(addr), "Ir" (nr) : "memory");
> }
> }
>
> @@ -88,7 +89,7 @@ set_bit(long nr, volatile unsigned long *addr)
> */
> static __always_inline void __set_bit(long nr, volatile unsigned long *addr)
> {
> - asm volatile(__ASM_SIZE(bts) " %1,%0" : ADDR : "Ir" (nr) : "memory");
> + asm volatile(__ASM_SIZE(bts) " %1,%0" : : ADDR, "Ir" (nr) : "memory");
> }
>
> /**
> @@ -110,8 +111,7 @@ clear_bit(long nr, volatile unsigned long *addr)
> : "iq" ((u8)~CONST_MASK(nr)));
> } else {
> asm volatile(LOCK_PREFIX __ASM_SIZE(btr) " %1,%0"
> - : BITOP_ADDR(addr)
> - : "Ir" (nr));
> + : : RLONG_ADDR(addr), "Ir" (nr) : "memory");
> }
> }
>
> @@ -131,7 +131,7 @@ static __always_inline void clear_bit_unlock(long nr, volatile unsigned long *ad
>
> static __always_inline void __clear_bit(long nr, volatile unsigned long *addr)
> {
> - asm volatile(__ASM_SIZE(btr) " %1,%0" : ADDR : "Ir" (nr));
> + asm volatile(__ASM_SIZE(btr) " %1,%0" : : ADDR, "Ir" (nr) : "memory");
> }
>
> static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr)
> @@ -139,7 +139,7 @@ static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile
> bool negative;
> asm volatile(LOCK_PREFIX "andb %2,%1"
> CC_SET(s)
> - : CC_OUT(s) (negative), ADDR
> + : CC_OUT(s) (negative), WBYTE_ADDR(addr)
> : "ir" ((char) ~(1 << nr)) : "memory");
> return negative;
> }
> @@ -155,13 +155,9 @@ static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile
> * __clear_bit() is non-atomic and implies release semantics before the memory
> * operation. It can be used for an unlock if no other CPUs can concurrently
> * modify other bits in the word.
> - *
> - * No memory barrier is required here, because x86 cannot reorder stores past
> - * older loads. Same principle as spin_unlock.
> */
> static __always_inline void __clear_bit_unlock(long nr, volatile unsigned long *addr)
> {
> - barrier();
Should have reflected this in the patch description.
> __clear_bit(nr, addr);
> }
>
> @@ -176,7 +172,7 @@ static __always_inline void __clear_bit_unlock(long nr, volatile unsigned long *
> */
> static __always_inline void __change_bit(long nr, volatile unsigned long *addr)
> {
> - asm volatile(__ASM_SIZE(btc) " %1,%0" : ADDR : "Ir" (nr));
> + asm volatile(__ASM_SIZE(btc) " %1,%0" : : ADDR, "Ir" (nr) : "memory");
> }
>
> /**
> @@ -196,8 +192,7 @@ static __always_inline void change_bit(long nr, volatile unsigned long *addr)
> : "iq" ((u8)CONST_MASK(nr)));
> } else {
> asm volatile(LOCK_PREFIX __ASM_SIZE(btc) " %1,%0"
> - : BITOP_ADDR(addr)
> - : "Ir" (nr));
> + : : RLONG_ADDR(addr), "Ir" (nr) : "memory");
> }
> }
>
> @@ -242,8 +237,8 @@ static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long *
>
> asm(__ASM_SIZE(bts) " %2,%1"
> CC_SET(c)
> - : CC_OUT(c) (oldbit), ADDR
> - : "Ir" (nr));
> + : CC_OUT(c) (oldbit)
> + : ADDR, "Ir" (nr) : "memory");
> return oldbit;
> }
>
> @@ -282,8 +277,8 @@ static __always_inline bool __test_and_clear_bit(long nr, volatile unsigned long
>
> asm volatile(__ASM_SIZE(btr) " %2,%1"
> CC_SET(c)
> - : CC_OUT(c) (oldbit), ADDR
> - : "Ir" (nr));
> + : CC_OUT(c) (oldbit)
> + : ADDR, "Ir" (nr) : "memory");
> return oldbit;
> }
>
> @@ -294,8 +289,8 @@ static __always_inline bool __test_and_change_bit(long nr, volatile unsigned lon
>
> asm volatile(__ASM_SIZE(btc) " %2,%1"
> CC_SET(c)
> - : CC_OUT(c) (oldbit), ADDR
> - : "Ir" (nr) : "memory");
> + : CC_OUT(c) (oldbit)
> + : ADDR, "Ir" (nr) : "memory");
>
> return oldbit;
> }
> @@ -326,7 +321,7 @@ static __always_inline bool variable_test_bit(long nr, volatile const unsigned l
> asm volatile(__ASM_SIZE(bt) " %2,%1"
> CC_SET(c)
> : CC_OUT(c) (oldbit)
> - : "m" (*(unsigned long *)addr), "Ir" (nr));
> + : "m" (*(unsigned long *)addr), "Ir" (nr) : "memory");
>
> return oldbit;
> }
> --
> 2.21.0.392.gf8f6787159e-goog
>
--
Alexander Potapenko
Software Engineer
Google Germany GmbH
Erika-Mann-StraÃe, 33
80636 MÃnchen
GeschÃftsfÃhrer: Paul Manicle, Halimah DeLaine Prado
Registergericht und -nummer: Hamburg, HRB 86891
Sitz der Gesellschaft: Hamburg