diff options
Diffstat (limited to 'arch/x86/kernel/process_64.c')
| -rw-r--r-- | arch/x86/kernel/process_64.c | 189 | 
1 files changed, 144 insertions, 45 deletions
| diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index ea5ea850348d..0e0b4288a4b2 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -54,13 +54,12 @@  #include <asm/vdso.h>  #include <asm/intel_rdt_sched.h>  #include <asm/unistd.h> +#include <asm/fsgsbase.h>  #ifdef CONFIG_IA32_EMULATION  /* Not included via unistd.h */  #include <asm/unistd_32_ia32.h>  #endif -__visible DEFINE_PER_CPU(unsigned long, rsp_scratch); -  /* Prints also some state that isn't saved in the pt_regs */  void __show_regs(struct pt_regs *regs, enum show_regs_mode mode)  { @@ -286,6 +285,138 @@ static __always_inline void load_seg_legacy(unsigned short prev_index,  	}  } +static __always_inline void x86_fsgsbase_load(struct thread_struct *prev, +					      struct thread_struct *next) +{ +	load_seg_legacy(prev->fsindex, prev->fsbase, +			next->fsindex, next->fsbase, FS); +	load_seg_legacy(prev->gsindex, prev->gsbase, +			next->gsindex, next->gsbase, GS); +} + +static unsigned long x86_fsgsbase_read_task(struct task_struct *task, +					    unsigned short selector) +{ +	unsigned short idx = selector >> 3; +	unsigned long base; + +	if (likely((selector & SEGMENT_TI_MASK) == 0)) { +		if (unlikely(idx >= GDT_ENTRIES)) +			return 0; + +		/* +		 * There are no user segments in the GDT with nonzero bases +		 * other than the TLS segments. +		 */ +		if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) +			return 0; + +		idx -= GDT_ENTRY_TLS_MIN; +		base = get_desc_base(&task->thread.tls_array[idx]); +	} else { +#ifdef CONFIG_MODIFY_LDT_SYSCALL +		struct ldt_struct *ldt; + +		/* +		 * If performance here mattered, we could protect the LDT +		 * with RCU.  This is a slow path, though, so we can just +		 * take the mutex. +		 */ +		mutex_lock(&task->mm->context.lock); +		ldt = task->mm->context.ldt; +		if (unlikely(idx >= ldt->nr_entries)) +			base = 0; +		else +			base = get_desc_base(ldt->entries + idx); +		mutex_unlock(&task->mm->context.lock); +#else +		base = 0; +#endif +	} + +	return base; +} + +void x86_fsbase_write_cpu(unsigned long fsbase) +{ +	/* +	 * Set the selector to 0 as a notion, that the segment base is +	 * overwritten, which will be checked for skipping the segment load +	 * during context switch. +	 */ +	loadseg(FS, 0); +	wrmsrl(MSR_FS_BASE, fsbase); +} + +void x86_gsbase_write_cpu_inactive(unsigned long gsbase) +{ +	/* Set the selector to 0 for the same reason as %fs above. */ +	loadseg(GS, 0); +	wrmsrl(MSR_KERNEL_GS_BASE, gsbase); +} + +unsigned long x86_fsbase_read_task(struct task_struct *task) +{ +	unsigned long fsbase; + +	if (task == current) +		fsbase = x86_fsbase_read_cpu(); +	else if (task->thread.fsindex == 0) +		fsbase = task->thread.fsbase; +	else +		fsbase = x86_fsgsbase_read_task(task, task->thread.fsindex); + +	return fsbase; +} + +unsigned long x86_gsbase_read_task(struct task_struct *task) +{ +	unsigned long gsbase; + +	if (task == current) +		gsbase = x86_gsbase_read_cpu_inactive(); +	else if (task->thread.gsindex == 0) +		gsbase = task->thread.gsbase; +	else +		gsbase = x86_fsgsbase_read_task(task, task->thread.gsindex); + +	return gsbase; +} + +int x86_fsbase_write_task(struct task_struct *task, unsigned long fsbase) +{ +	/* +	 * Not strictly needed for %fs, but do it for symmetry +	 * with %gs +	 */ +	if (unlikely(fsbase >= TASK_SIZE_MAX)) +		return -EPERM; + +	preempt_disable(); +	task->thread.fsbase = fsbase; +	if (task == current) +		x86_fsbase_write_cpu(fsbase); +	task->thread.fsindex = 0; +	preempt_enable(); + +	return 0; +} + +int x86_gsbase_write_task(struct task_struct *task, unsigned long gsbase) +{ +	if (unlikely(gsbase >= TASK_SIZE_MAX)) +		return -EPERM; + +	preempt_disable(); +	task->thread.gsbase = gsbase; +	if (task == current) +		x86_gsbase_write_cpu_inactive(gsbase); +	task->thread.gsindex = 0; +	preempt_enable(); + +	return 0; +} +  int copy_thread_tls(unsigned long clone_flags, unsigned long sp,  		unsigned long arg, struct task_struct *p, unsigned long tls)  { @@ -473,10 +604,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)  	if (unlikely(next->ds | prev->ds))  		loadsegment(ds, next->ds); -	load_seg_legacy(prev->fsindex, prev->fsbase, -			next->fsindex, next->fsbase, FS); -	load_seg_legacy(prev->gsindex, prev->gsbase, -			next->gsindex, next->gsbase, GS); +	x86_fsgsbase_load(prev, next);  	switch_fpu_finish(next_fpu, cpu); @@ -573,10 +701,10 @@ static void __set_personality_x32(void)  		current->mm->context.ia32_compat = TIF_X32;  	current->personality &= ~READ_IMPLIES_EXEC;  	/* -	 * in_compat_syscall() uses the presence of the x32 syscall bit +	 * in_32bit_syscall() uses the presence of the x32 syscall bit  	 * flag to determine compat status.  The x86 mmap() code relies on  	 * the syscall bitness so set x32 syscall bit right here to make -	 * in_compat_syscall() work during exec(). +	 * in_32bit_syscall() work during exec().  	 *  	 * Pretend to come from a x32 execve.  	 */ @@ -627,54 +755,25 @@ static long prctl_map_vdso(const struct vdso_image *image, unsigned long addr)  long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2)  {  	int ret = 0; -	int doit = task == current; -	int cpu;  	switch (option) { -	case ARCH_SET_GS: -		if (arg2 >= TASK_SIZE_MAX) -			return -EPERM; -		cpu = get_cpu(); -		task->thread.gsindex = 0; -		task->thread.gsbase = arg2; -		if (doit) { -			load_gs_index(0); -			ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, arg2); -		} -		put_cpu(); +	case ARCH_SET_GS: { +		ret = x86_gsbase_write_task(task, arg2);  		break; -	case ARCH_SET_FS: -		/* Not strictly needed for fs, but do it for symmetry -		   with gs */ -		if (arg2 >= TASK_SIZE_MAX) -			return -EPERM; -		cpu = get_cpu(); -		task->thread.fsindex = 0; -		task->thread.fsbase = arg2; -		if (doit) { -			/* set the selector to 0 to not confuse __switch_to */ -			loadsegment(fs, 0); -			ret = wrmsrl_safe(MSR_FS_BASE, arg2); -		} -		put_cpu(); +	} +	case ARCH_SET_FS: { +		ret = x86_fsbase_write_task(task, arg2);  		break; +	}  	case ARCH_GET_FS: { -		unsigned long base; +		unsigned long base = x86_fsbase_read_task(task); -		if (doit) -			rdmsrl(MSR_FS_BASE, base); -		else -			base = task->thread.fsbase;  		ret = put_user(base, (unsigned long __user *)arg2);  		break;  	}  	case ARCH_GET_GS: { -		unsigned long base; +		unsigned long base = x86_gsbase_read_task(task); -		if (doit) -			rdmsrl(MSR_KERNEL_GS_BASE, base); -		else -			base = task->thread.gsbase;  		ret = put_user(base, (unsigned long __user *)arg2);  		break;  	} | 
