diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-10-13 13:36:07 -0700 | 
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-10-13 13:36:07 -0700 | 
| commit | 029f56db6ac248769f2c260bfaf3c3c0e23e904c (patch) | |
| tree | 78c03b03258c0ac5d7b9546acc2ba14e87e277fd | |
| parent | 7cd4ecd9177b94af783b8e21de7c65b41a871342 (diff) | |
| parent | aa5cacdc29d76a005cbbee018a47faa6e724dd2d (diff) | |
Merge tag 'x86_asm_for_v5.10' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 asm updates from Borislav Petkov:
 "Two asm wrapper fixes:
   - Use XORL instead of XORQ to avoid a REX prefix and save some bytes
     in the .fixup section, by Uros Bizjak.
   - Replace __force_order dummy variable with a memory clobber to fix
     LLVM requiring a definition for former and to prevent memory
     accesses from still being cached/reordered, by Arvind Sankar"
* tag 'x86_asm_for_v5.10' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/asm: Replace __force_order with a memory clobber
  x86/uaccess: Use XORL %0,%0 in __get_user_asm()
| -rw-r--r-- | arch/x86/boot/compressed/pgtable_64.c | 9 | ||||
| -rw-r--r-- | arch/x86/include/asm/special_insns.h | 28 | ||||
| -rw-r--r-- | arch/x86/include/asm/uaccess.h | 2 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/common.c | 4 | 
4 files changed, 18 insertions, 25 deletions
| diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c index c8862696a47b..7d0394f4ebf9 100644 --- a/arch/x86/boot/compressed/pgtable_64.c +++ b/arch/x86/boot/compressed/pgtable_64.c @@ -5,15 +5,6 @@  #include "pgtable.h"  #include "../string.h" -/* - * __force_order is used by special_insns.h asm code to force instruction - * serialization. - * - * It is not referenced from the code, but GCC < 5 with -fPIE would fail - * due to an undefined symbol. Define it to make these ancient GCCs work. - */ -unsigned long __force_order; -  #define BIOS_START_MIN		0x20000U	/* 128K, less than this is insane */  #define BIOS_START_MAX		0x9f000U	/* 640K, absolute maximum */ diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h index 94624fb06fac..cc177b4431ae 100644 --- a/arch/x86/include/asm/special_insns.h +++ b/arch/x86/include/asm/special_insns.h @@ -11,45 +11,47 @@  #include <linux/jump_label.h>  /* - * Volatile isn't enough to prevent the compiler from reordering the - * read/write functions for the control registers and messing everything up. - * A memory clobber would solve the problem, but would prevent reordering of - * all loads stores around it, which can hurt performance. Solution is to - * use a variable and mimic reads and writes to it to enforce serialization + * The compiler should not reorder volatile asm statements with respect to each + * other: they should execute in program order. However GCC 4.9.x and 5.x have + * a bug (which was fixed in 8.1, 7.3 and 6.5) where they might reorder + * volatile asm. The write functions are not affected since they have memory + * clobbers preventing reordering. To prevent reads from being reordered with + * respect to writes, use a dummy memory operand.   */ -extern unsigned long __force_order; + +#define __FORCE_ORDER "m"(*(unsigned int *)0x1000UL)  void native_write_cr0(unsigned long val);  static inline unsigned long native_read_cr0(void)  {  	unsigned long val; -	asm volatile("mov %%cr0,%0\n\t" : "=r" (val), "=m" (__force_order)); +	asm volatile("mov %%cr0,%0\n\t" : "=r" (val) : __FORCE_ORDER);  	return val;  }  static __always_inline unsigned long native_read_cr2(void)  {  	unsigned long val; -	asm volatile("mov %%cr2,%0\n\t" : "=r" (val), "=m" (__force_order)); +	asm volatile("mov %%cr2,%0\n\t" : "=r" (val) : __FORCE_ORDER);  	return val;  }  static __always_inline void native_write_cr2(unsigned long val)  { -	asm volatile("mov %0,%%cr2": : "r" (val), "m" (__force_order)); +	asm volatile("mov %0,%%cr2": : "r" (val) : "memory");  }  static inline unsigned long __native_read_cr3(void)  {  	unsigned long val; -	asm volatile("mov %%cr3,%0\n\t" : "=r" (val), "=m" (__force_order)); +	asm volatile("mov %%cr3,%0\n\t" : "=r" (val) : __FORCE_ORDER);  	return val;  }  static inline void native_write_cr3(unsigned long val)  { -	asm volatile("mov %0,%%cr3": : "r" (val), "m" (__force_order)); +	asm volatile("mov %0,%%cr3": : "r" (val) : "memory");  }  static inline unsigned long native_read_cr4(void) @@ -64,10 +66,10 @@ static inline unsigned long native_read_cr4(void)  	asm volatile("1: mov %%cr4, %0\n"  		     "2:\n"  		     _ASM_EXTABLE(1b, 2b) -		     : "=r" (val), "=m" (__force_order) : "0" (0)); +		     : "=r" (val) : "0" (0), __FORCE_ORDER);  #else  	/* CR4 always exists on x86_64. */ -	asm volatile("mov %%cr4,%0\n\t" : "=r" (val), "=m" (__force_order)); +	asm volatile("mov %%cr4,%0\n\t" : "=r" (val) : __FORCE_ORDER);  #endif  	return val;  } diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index aa60c239931b..477c503f2753 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -418,7 +418,7 @@ do {									\  		     "2:\n"						\  		     ".section .fixup,\"ax\"\n"				\  		     "3:	mov %[efault],%[errout]\n"		\ -		     "	xor"itype" %[output],%[output]\n"		\ +		     "	xorl %k[output],%k[output]\n"			\  		     "	jmp 2b\n"					\  		     ".previous\n"					\  		     _ASM_EXTABLE_UA(1b, 3b)				\ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 7824fc62c7cd..c51158914ea2 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -360,7 +360,7 @@ void native_write_cr0(unsigned long val)  	unsigned long bits_missing = 0;  set_register: -	asm volatile("mov %0,%%cr0": "+r" (val), "+m" (__force_order)); +	asm volatile("mov %0,%%cr0": "+r" (val) : : "memory");  	if (static_branch_likely(&cr_pinning)) {  		if (unlikely((val & X86_CR0_WP) != X86_CR0_WP)) { @@ -379,7 +379,7 @@ void native_write_cr4(unsigned long val)  	unsigned long bits_changed = 0;  set_register: -	asm volatile("mov %0,%%cr4": "+r" (val), "+m" (cr4_pinned_bits)); +	asm volatile("mov %0,%%cr4": "+r" (val) : : "memory");  	if (static_branch_likely(&cr_pinning)) {  		if (unlikely((val & cr4_pinned_mask) != cr4_pinned_bits)) { | 
