diff options
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/Makefile | 1 | ||||
| -rw-r--r-- | kernel/cpufreq.c | 963 | ||||
| -rw-r--r-- | kernel/futex.c | 427 | ||||
| -rw-r--r-- | kernel/ksyms.c | 1 |
4 files changed, 223 insertions, 1169 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 9bf11ae7195b..638a2f6c341c 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -15,7 +15,6 @@ obj-$(CONFIG_UID16) += uid16.o obj-$(CONFIG_MODULES) += ksyms.o module.o obj-$(CONFIG_KALLSYMS) += kallsyms.o obj-$(CONFIG_PM) += power/ -obj-$(CONFIG_CPU_FREQ) += cpufreq.o obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o obj-$(CONFIG_COMPAT) += compat.o obj-$(CONFIG_IKCONFIG) += configs.o diff --git a/kernel/cpufreq.c b/kernel/cpufreq.c deleted file mode 100644 index 7f80c321c785..000000000000 --- a/kernel/cpufreq.c +++ /dev/null @@ -1,963 +0,0 @@ -/* - * linux/kernel/cpufreq.c - * - * Copyright (C) 2001 Russell King - * (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de> - * - * $Id: cpufreq.c,v 1.59 2003/01/20 17:31:48 db Exp $ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - */ - -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/init.h> -#include <linux/notifier.h> -#include <linux/cpufreq.h> -#include <linux/delay.h> -#include <linux/interrupt.h> -#include <linux/spinlock.h> -#include <linux/device.h> -#include <linux/slab.h> -#include <linux/cpu.h> -#include <linux/completion.h> - -/** - * The "cpufreq driver" - the arch- or hardware-dependend low - * level driver of CPUFreq support, and its spinlock. This lock - * also protects the cpufreq_cpu_data array. - */ -static struct cpufreq_driver *cpufreq_driver; -static struct cpufreq_policy *cpufreq_cpu_data[NR_CPUS]; -static spinlock_t cpufreq_driver_lock = SPIN_LOCK_UNLOCKED; - -/* internal prototype */ -static int __cpufreq_governor(struct cpufreq_policy *policy, unsigned int event); - - -/** - * Two notifier lists: the "policy" list is involved in the - * validation process for a new CPU frequency policy; the - * "transition" list for kernel code that needs to handle - * changes to devices when the CPU clock speed changes. - * The mutex locks both lists. - */ -static struct notifier_block *cpufreq_policy_notifier_list; -static struct notifier_block *cpufreq_transition_notifier_list; -static DECLARE_RWSEM (cpufreq_notifier_rwsem); - - -static LIST_HEAD(cpufreq_governor_list); -static DECLARE_MUTEX (cpufreq_governor_sem); - -static struct cpufreq_policy * cpufreq_cpu_get(unsigned int cpu) -{ - struct cpufreq_policy *data; - unsigned long flags; - - if (cpu >= NR_CPUS) - goto err_out; - - /* get the cpufreq driver */ - spin_lock_irqsave(&cpufreq_driver_lock, flags); - - if (!cpufreq_driver) - goto err_out_unlock; - - if (!try_module_get(cpufreq_driver->owner)) - goto err_out_unlock; - - - /* get the CPU */ - data = cpufreq_cpu_data[cpu]; - - if (!data) - goto err_out_put_module; - - if (!kobject_get(&data->kobj)) - goto err_out_put_module; - - - spin_unlock_irqrestore(&cpufreq_driver_lock, flags); - - return data; - - err_out_put_module: - module_put(cpufreq_driver->owner); - err_out_unlock: - spin_unlock_irqrestore(&cpufreq_driver_lock, flags); - err_out: - return NULL; -} - -static void cpufreq_cpu_put(struct cpufreq_policy *data) -{ - kobject_put(&data->kobj); - module_put(cpufreq_driver->owner); -} - -/********************************************************************* - * SYSFS INTERFACE * - *********************************************************************/ - -/** - * cpufreq_parse_governor - parse a governor string - */ -int cpufreq_parse_governor (char *str_governor, unsigned int *policy, - struct cpufreq_governor **governor) -{ - if (!strnicmp(str_governor, "performance", CPUFREQ_NAME_LEN)) { - *policy = CPUFREQ_POLICY_PERFORMANCE; - return 0; - } else if (!strnicmp(str_governor, "powersave", CPUFREQ_NAME_LEN)) { - *policy = CPUFREQ_POLICY_POWERSAVE; - return 0; - } else { - struct cpufreq_governor *t; - down(&cpufreq_governor_sem); - if (!cpufreq_driver || !cpufreq_driver->target) - goto out; - list_for_each_entry(t, &cpufreq_governor_list, governor_list) { - if (!strnicmp(str_governor,t->name,CPUFREQ_NAME_LEN)) { - *governor = t; - *policy = CPUFREQ_POLICY_GOVERNOR; - up(&cpufreq_governor_sem); - return 0; - } - } - out: - up(&cpufreq_governor_sem); - } - return -EINVAL; -} -EXPORT_SYMBOL_GPL(cpufreq_parse_governor); - - -/* drivers/base/cpu.c */ -extern struct sysdev_class cpu_sysdev_class; - - -/** - * cpufreq_per_cpu_attr_read() / show_##file_name() - print out cpufreq information - * - * Write out information from cpufreq_driver->policy[cpu]; object must be - * "unsigned int". - */ - -#define show_one(file_name, object) \ -static ssize_t show_##file_name \ -(struct cpufreq_policy * policy, char *buf) \ -{ \ - return sprintf (buf, "%u\n", policy->object); \ -} - -show_one(cpuinfo_min_freq, cpuinfo.min_freq); -show_one(cpuinfo_max_freq, cpuinfo.max_freq); -show_one(scaling_min_freq, min); -show_one(scaling_max_freq, max); - -/** - * cpufreq_per_cpu_attr_write() / store_##file_name() - sysfs write access - */ -#define store_one(file_name, object) \ -static ssize_t store_##file_name \ -(struct cpufreq_policy * policy, const char *buf, size_t count) \ -{ \ - unsigned int ret = -EINVAL; \ - struct cpufreq_policy new_policy; \ - \ - ret = cpufreq_get_policy(&new_policy, policy->cpu); \ - if (ret) \ - return -EINVAL; \ - \ - ret = sscanf (buf, "%u", &new_policy.object); \ - if (ret != 1) \ - return -EINVAL; \ - \ - ret = cpufreq_set_policy(&new_policy); \ - \ - return ret ? ret : count; \ -} - -store_one(scaling_min_freq,min); -store_one(scaling_max_freq,max); - -/** - * show_scaling_governor - show the current policy for the specified CPU - */ -static ssize_t show_scaling_governor (struct cpufreq_policy * policy, char *buf) -{ - switch (policy->policy) { - case CPUFREQ_POLICY_POWERSAVE: - return sprintf(buf, "powersave\n"); - case CPUFREQ_POLICY_PERFORMANCE: - return sprintf(buf, "performance\n"); - case CPUFREQ_POLICY_GOVERNOR: - return snprintf(buf, CPUFREQ_NAME_LEN, "%s\n", policy->governor->name); - default: - return -EINVAL; - } -} - - -/** - * store_scaling_governor - store policy for the specified CPU - */ -static ssize_t store_scaling_governor (struct cpufreq_policy * policy, - const char *buf, size_t count) -{ - unsigned int ret = -EINVAL; - char str_governor[16]; - struct cpufreq_policy new_policy; - - ret = cpufreq_get_policy(&new_policy, policy->cpu); - if (ret) - return ret; - - ret = sscanf (buf, "%15s", str_governor); - if (ret != 1) - return -EINVAL; - - if (cpufreq_parse_governor(str_governor, &new_policy.policy, &new_policy.governor)) - return -EINVAL; - - ret = cpufreq_set_policy(&new_policy); - - return ret ? ret : count; -} - -/** - * show_scaling_driver - show the cpufreq driver currently loaded - */ -static ssize_t show_scaling_driver (struct cpufreq_policy * policy, char *buf) -{ - return snprintf(buf, CPUFREQ_NAME_LEN, "%s\n", cpufreq_driver->name); -} - -/** - * show_scaling_available_governors - show the available CPUfreq governors - */ -static ssize_t show_scaling_available_governors (struct cpufreq_policy * policy, - char *buf) -{ - ssize_t i = 0; - struct cpufreq_governor *t; - - i += sprintf(buf, "performance powersave"); - - if (!cpufreq_driver->target) - goto out; - - list_for_each_entry(t, &cpufreq_governor_list, governor_list) { - if (i >= (ssize_t) ((PAGE_SIZE / sizeof(char)) - (CPUFREQ_NAME_LEN + 2))) - goto out; - i += snprintf(&buf[i], CPUFREQ_NAME_LEN, " %s", t->name); - } - out: - i += sprintf(&buf[i], "\n"); - return i; -} - - -#define define_one_ro(_name) \ -struct freq_attr _name = { \ - .attr = { .name = __stringify(_name), .mode = 0444 }, \ - .show = show_##_name, \ -} - -#define define_one_rw(_name) \ -struct freq_attr _name = { \ - .attr = { .name = __stringify(_name), .mode = 0644 }, \ - .show = show_##_name, \ - .store = store_##_name, \ -} - -define_one_ro(cpuinfo_min_freq); -define_one_ro(cpuinfo_max_freq); -define_one_ro(scaling_available_governors); -define_one_ro(scaling_driver); -define_one_rw(scaling_min_freq); -define_one_rw(scaling_max_freq); -define_one_rw(scaling_governor); - -static struct attribute * default_attrs[] = { - &cpuinfo_min_freq.attr, - &cpuinfo_max_freq.attr, - &scaling_min_freq.attr, - &scaling_max_freq.attr, - &scaling_governor.attr, - &scaling_driver.attr, - &scaling_available_governors.attr, - NULL -}; - -#define to_policy(k) container_of(k,struct cpufreq_policy,kobj) -#define to_attr(a) container_of(a,struct freq_attr,attr) - -static ssize_t show(struct kobject * kobj, struct attribute * attr ,char * buf) -{ - struct cpufreq_policy * policy = to_policy(kobj); - struct freq_attr * fattr = to_attr(attr); - ssize_t ret; - policy = cpufreq_cpu_get(policy->cpu); - if (!policy) - return -EINVAL; - ret = fattr->show ? fattr->show(policy,buf) : 0; - cpufreq_cpu_put(policy); - return ret; -} - -static ssize_t store(struct kobject * kobj, struct attribute * attr, - const char * buf, size_t count) -{ - struct cpufreq_policy * policy = to_policy(kobj); - struct freq_attr * fattr = to_attr(attr); - ssize_t ret; - policy = cpufreq_cpu_get(policy->cpu); - if (!policy) - return -EINVAL; - ret = fattr->store ? fattr->store(policy,buf,count) : 0; - cpufreq_cpu_put(policy); - return ret; -} - -static void cpufreq_sysfs_release(struct kobject * kobj) -{ - struct cpufreq_policy * policy = to_policy(kobj); - complete(&policy->kobj_unregister); -} - -static struct sysfs_ops sysfs_ops = { - .show = show, - .store = store, -}; - -static struct kobj_type ktype_cpufreq = { - .sysfs_ops = &sysfs_ops, - .default_attrs = default_attrs, - .release = cpufreq_sysfs_release, -}; - - -/** - * cpufreq_add_dev - add a CPU device - * - * Adds the cpufreq interface for a CPU device. - */ -static int cpufreq_add_dev (struct sys_device * sys_dev) -{ - unsigned int cpu = sys_dev->id; - int ret = 0; - struct cpufreq_policy new_policy; - struct cpufreq_policy *policy; - struct freq_attr **drv_attr; - unsigned long flags; - - if (!try_module_get(cpufreq_driver->owner)) - return -EINVAL; - - policy = kmalloc(sizeof(struct cpufreq_policy), GFP_KERNEL); - if (!policy) - return -ENOMEM; - memset(policy, 0, sizeof(struct cpufreq_policy)); - - policy->cpu = cpu; - init_MUTEX_LOCKED(&policy->lock); - init_completion(&policy->kobj_unregister); - - /* call driver. From then on the cpufreq must be able - * to accept all calls to ->verify and ->setpolicy for this CPU - */ - ret = cpufreq_driver->init(policy); - if (ret) - goto err_out; - - memcpy(&new_policy, policy, sizeof(struct cpufreq_policy)); - - /* prepare interface data */ - policy->kobj.parent = &sys_dev->kobj; - policy->kobj.ktype = &ktype_cpufreq; - strlcpy(policy->kobj.name, "cpufreq", KOBJ_NAME_LEN); - - ret = kobject_register(&policy->kobj); - if (ret) - goto err_out; - - /* set up files for this cpu device */ - drv_attr = cpufreq_driver->attr; - while ((drv_attr) && (*drv_attr)) { - sysfs_create_file(&policy->kobj, &((*drv_attr)->attr)); - drv_attr++; - } - - spin_lock_irqsave(&cpufreq_driver_lock, flags); - cpufreq_cpu_data[cpu] = policy; - spin_unlock_irqrestore(&cpufreq_driver_lock, flags); - - up(&policy->lock); - - /* set default policy */ - ret = cpufreq_set_policy(&new_policy); - if (ret) - goto err_out_unregister; - - module_put(cpufreq_driver->owner); - return 0; - - - err_out_unregister: - spin_lock_irqsave(&cpufreq_driver_lock, flags); - cpufreq_cpu_data[cpu] = NULL; - spin_unlock_irqrestore(&cpufreq_driver_lock, flags); - - kobject_unregister(&policy->kobj); - wait_for_completion(&policy->kobj_unregister); - - err_out: - kfree(policy); - module_put(cpufreq_driver->owner); - return ret; -} - - -/** - * cpufreq_remove_dev - remove a CPU device - * - * Removes the cpufreq interface for a CPU device. - */ -static int cpufreq_remove_dev (struct sys_device * sys_dev) -{ - unsigned int cpu = sys_dev->id; - unsigned long flags; - struct cpufreq_policy *data; - - spin_lock_irqsave(&cpufreq_driver_lock, flags); - data = cpufreq_cpu_data[cpu]; - - if (!data) { - spin_unlock_irqrestore(&cpufreq_driver_lock, flags); - return -EINVAL; - } - cpufreq_cpu_data[cpu] = NULL; - spin_unlock_irqrestore(&cpufreq_driver_lock, flags); - - if (!kobject_get(&data->kobj)) - return -EFAULT; - - kobject_unregister(&data->kobj); - - kobject_put(&data->kobj); - - /* we need to make sure that the underlying kobj is actually - * not referenced anymore by anybody before we proceed with - * unloading. - */ - wait_for_completion(&data->kobj_unregister); - - if (cpufreq_driver->target) - __cpufreq_governor(data, CPUFREQ_GOV_STOP); - - if (cpufreq_driver->exit) - cpufreq_driver->exit(data); - - kfree(data); - - return 0; -} - -/** - * cpufreq_resume - restore the CPU clock frequency after resume - * - * Restore the CPU clock frequency so that our idea of the current - * frequency reflects the actual hardware. - */ -static int cpufreq_resume(struct sys_device * sysdev) -{ - int cpu = sysdev->id; - unsigned int ret = 0; - struct cpufreq_policy *cpu_policy; - - if (!cpu_online(cpu)) - return 0; - - /* we may be lax here as interrupts are off. Nonetheless - * we need to grab the correct cpu policy, as to check - * whether we really run on this CPU. - */ - - cpu_policy = cpufreq_cpu_get(cpu); - if (!cpu_policy) - return -EINVAL; - - if (cpufreq_driver->setpolicy) - ret = cpufreq_driver->setpolicy(cpu_policy); - else - /* CPUFREQ_RELATION_H or CPUFREQ_RELATION_L have the same effect here, as cpu_policy->cur is known - * to be a valid and exact target frequency - */ - ret = cpufreq_driver->target(cpu_policy, cpu_policy->cur, CPUFREQ_RELATION_H); - - cpufreq_cpu_put(cpu_policy); - - return ret; -} - -static struct sysdev_driver cpufreq_sysdev_driver = { - .add = cpufreq_add_dev, - .remove = cpufreq_remove_dev, - .resume = cpufreq_resume, -}; - - -/********************************************************************* - * NOTIFIER LISTS INTERFACE * - *********************************************************************/ - -/** - * cpufreq_register_notifier - register a driver with cpufreq - * @nb: notifier function to register - * @list: CPUFREQ_TRANSITION_NOTIFIER or CPUFREQ_POLICY_NOTIFIER - * - * Add a driver to one of two lists: either a list of drivers that - * are notified about clock rate changes (once before and once after - * the transition), or a list of drivers that are notified about - * changes in cpufreq policy. - * - * This function may sleep, and has the same return conditions as - * notifier_chain_register. - */ -int cpufreq_register_notifier(struct notifier_block *nb, unsigned int list) -{ - int ret; - - down_write(&cpufreq_notifier_rwsem); - switch (list) { - case CPUFREQ_TRANSITION_NOTIFIER: - ret = notifier_chain_register(&cpufreq_transition_notifier_list, nb); - break; - case CPUFREQ_POLICY_NOTIFIER: - ret = notifier_chain_register(&cpufreq_policy_notifier_list, nb); - break; - default: - ret = -EINVAL; - } - up_write(&cpufreq_notifier_rwsem); - - return ret; -} -EXPORT_SYMBOL(cpufreq_register_notifier); - - -/** - * cpufreq_unregister_notifier - unregister a driver with cpufreq - * @nb: notifier block to be unregistered - * @list: CPUFREQ_TRANSITION_NOTIFIER or CPUFREQ_POLICY_NOTIFIER - * - * Remove a driver from the CPU frequency notifier list. - * - * This function may sleep, and has the same return conditions as - * notifier_chain_unregister. - */ -int cpufreq_unregister_notifier(struct notifier_block *nb, unsigned int list) -{ - int ret; - - down_write(&cpufreq_notifier_rwsem); - switch (list) { - case CPUFREQ_TRANSITION_NOTIFIER: - ret = notifier_chain_unregister(&cpufreq_transition_notifier_list, nb); - break; - case CPUFREQ_POLICY_NOTIFIER: - ret = notifier_chain_unregister(&cpufreq_policy_notifier_list, nb); - break; - default: - ret = -EINVAL; - } - up_write(&cpufreq_notifier_rwsem); - - return ret; -} -EXPORT_SYMBOL(cpufreq_unregister_notifier); - - -/********************************************************************* - * GOVERNORS * - *********************************************************************/ - - -int __cpufreq_driver_target(struct cpufreq_policy *policy, - unsigned int target_freq, - unsigned int relation) -{ - return cpufreq_driver->target(policy, target_freq, relation); -} -EXPORT_SYMBOL_GPL(__cpufreq_driver_target); - - -int cpufreq_driver_target(struct cpufreq_policy *policy, - unsigned int target_freq, - unsigned int relation) -{ - unsigned int ret; - - policy = cpufreq_cpu_get(policy->cpu); - if (!policy) - return -EINVAL; - - down(&policy->lock); - - ret = __cpufreq_driver_target(policy, target_freq, relation); - - up(&policy->lock); - - cpufreq_cpu_put(policy); - - return ret; -} -EXPORT_SYMBOL_GPL(cpufreq_driver_target); - - -static int __cpufreq_governor(struct cpufreq_policy *policy, unsigned int event) -{ - int ret = 0; - - switch (policy->policy) { - case CPUFREQ_POLICY_POWERSAVE: - if ((event == CPUFREQ_GOV_LIMITS) || (event == CPUFREQ_GOV_START)) { - ret = __cpufreq_driver_target(policy, policy->min, CPUFREQ_RELATION_L); - } - break; - case CPUFREQ_POLICY_PERFORMANCE: - if ((event == CPUFREQ_GOV_LIMITS) || (event == CPUFREQ_GOV_START)) { - ret = __cpufreq_driver_target(policy, policy->max, CPUFREQ_RELATION_H); - } - break; - case CPUFREQ_POLICY_GOVERNOR: - ret = -EINVAL; - if (!try_module_get(policy->governor->owner)) - break; - ret = policy->governor->governor(policy, event); - /* we keep one module reference alive for each CPU governed by this CPU */ - if ((event != CPUFREQ_GOV_START) || ret) - module_put(policy->governor->owner); - if ((event == CPUFREQ_GOV_STOP) && !ret) - module_put(policy->governor->owner); - break; - default: - ret = -EINVAL; - } - - return ret; -} - - -int cpufreq_governor(unsigned int cpu, unsigned int event) -{ - int ret = 0; - struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); - - if (!policy) - return -EINVAL; - - down(&policy->lock); - ret = __cpufreq_governor(policy, event); - up(&policy->lock); - - cpufreq_cpu_put(policy); - - return ret; -} -EXPORT_SYMBOL_GPL(cpufreq_governor); - - -int cpufreq_register_governor(struct cpufreq_governor *governor) -{ - struct cpufreq_governor *t; - - if (!governor) - return -EINVAL; - - if (!strnicmp(governor->name,"powersave",CPUFREQ_NAME_LEN)) - return -EBUSY; - if (!strnicmp(governor->name,"performance",CPUFREQ_NAME_LEN)) - return -EBUSY; - - down(&cpufreq_governor_sem); - - list_for_each_entry(t, &cpufreq_governor_list, governor_list) { - if (!strnicmp(governor->name,t->name,CPUFREQ_NAME_LEN)) { - up(&cpufreq_governor_sem); - return -EBUSY; - } - } - list_add(&governor->governor_list, &cpufreq_governor_list); - - up(&cpufreq_governor_sem); - - return 0; -} -EXPORT_SYMBOL_GPL(cpufreq_register_governor); - - -void cpufreq_unregister_governor(struct cpufreq_governor *governor) -{ - if (!governor) - return; - - down(&cpufreq_governor_sem); - list_del(&governor->governor_list); - up(&cpufreq_governor_sem); - return; -} -EXPORT_SYMBOL_GPL(cpufreq_unregister_governor); - - - -/********************************************************************* - * POLICY INTERFACE * - *********************************************************************/ - -/** - * cpufreq_get_policy - get the current cpufreq_policy - * @policy: struct cpufreq_policy into which the current cpufreq_policy is written - * - * Reads the current cpufreq policy. - */ -int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu) -{ - struct cpufreq_policy *cpu_policy; - if (!policy) - return -EINVAL; - - cpu_policy = cpufreq_cpu_get(cpu); - if (!cpu_policy) - return -EINVAL; - - down(&cpu_policy->lock); - memcpy(policy, cpu_policy, sizeof(struct cpufreq_policy)); - up(&cpu_policy->lock); - - cpufreq_cpu_put(cpu_policy); - - return 0; -} -EXPORT_SYMBOL(cpufreq_get_policy); - - -/** - * cpufreq_set_policy - set a new CPUFreq policy - * @policy: policy to be set. - * - * Sets a new CPU frequency and voltage scaling policy. - */ -int cpufreq_set_policy(struct cpufreq_policy *policy) -{ - int ret = 0; - struct cpufreq_policy *data; - - if (!policy) - return -EINVAL; - - data = cpufreq_cpu_get(policy->cpu); - if (!data) - return -EINVAL; - - /* lock this CPU */ - down(&data->lock); - - memcpy(&policy->cpuinfo, - &data->cpuinfo, - sizeof(struct cpufreq_cpuinfo)); - - /* verify the cpu speed can be set within this limit */ - ret = cpufreq_driver->verify(policy); - if (ret) - goto error_out; - - down_read(&cpufreq_notifier_rwsem); - - /* adjust if necessary - all reasons */ - notifier_call_chain(&cpufreq_policy_notifier_list, CPUFREQ_ADJUST, - policy); - - /* adjust if necessary - hardware incompatibility*/ - notifier_call_chain(&cpufreq_policy_notifier_list, CPUFREQ_INCOMPATIBLE, - policy); - - /* verify the cpu speed can be set within this limit, - which might be different to the first one */ - ret = cpufreq_driver->verify(policy); - if (ret) { - up_read(&cpufreq_notifier_rwsem); - goto error_out; - } - - /* notification of the new policy */ - notifier_call_chain(&cpufreq_policy_notifier_list, CPUFREQ_NOTIFY, - policy); - - up_read(&cpufreq_notifier_rwsem); - - data->min = policy->min; - data->max = policy->max; - - if (cpufreq_driver->setpolicy) { - data->policy = policy->policy; - ret = cpufreq_driver->setpolicy(policy); - } else { - if ((policy->policy != data->policy) || - ((policy->policy == CPUFREQ_POLICY_GOVERNOR) && (policy->governor != data->governor))) { - /* save old, working values */ - unsigned int old_pol = data->policy; - struct cpufreq_governor *old_gov = data->governor; - - /* end old governor */ - __cpufreq_governor(data, CPUFREQ_GOV_STOP); - - /* start new governor */ - data->policy = policy->policy; - data->governor = policy->governor; - if (__cpufreq_governor(data, CPUFREQ_GOV_START)) { - /* new governor failed, so re-start old one */ - data->policy = old_pol; - data->governor = old_gov; - __cpufreq_governor(data, CPUFREQ_GOV_START); - } - /* might be a policy change, too, so fall through */ - } - __cpufreq_governor(data, CPUFREQ_GOV_LIMITS); - } - - error_out: - up(&data->lock); - cpufreq_cpu_put(data); - - return ret; -} -EXPORT_SYMBOL(cpufreq_set_policy); - - - -/********************************************************************* - * EXTERNALLY AFFECTING FREQUENCY CHANGES * - *********************************************************************/ - -/** - * adjust_jiffies - adjust the system "loops_per_jiffy" - * - * This function alters the system "loops_per_jiffy" for the clock - * speed change. Note that loops_per_jiffy cannot be updated on SMP - * systems as each CPU might be scaled differently. So, use the arch - * per-CPU loops_per_jiffy value wherever possible. - */ -#ifndef CONFIG_SMP -static unsigned long l_p_j_ref; -static unsigned int l_p_j_ref_freq; - -static inline void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci) -{ - if (!l_p_j_ref_freq) { - l_p_j_ref = loops_per_jiffy; - l_p_j_ref_freq = ci->old; - } - if ((val == CPUFREQ_PRECHANGE && ci->old < ci->new) || - (val == CPUFREQ_POSTCHANGE && ci->old > ci->new)) - loops_per_jiffy = cpufreq_scale(l_p_j_ref, l_p_j_ref_freq, ci->new); -} -#else -#define adjust_jiffies(x...) do {} while (0) -#endif - - -/** - * cpufreq_notify_transition - call notifier chain and adjust_jiffies on frequency transition - * - * This function calls the transition notifiers and the "adjust_jiffies" function. It is called - * twice on all CPU frequency changes that have external effects. - */ -void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state) -{ - if (irqs_disabled()) - return; /* Only valid if we're in the resume process where - * everyone knows what CPU frequency we are at */ - - down_read(&cpufreq_notifier_rwsem); - switch (state) { - case CPUFREQ_PRECHANGE: - notifier_call_chain(&cpufreq_transition_notifier_list, CPUFREQ_PRECHANGE, freqs); - adjust_jiffies(CPUFREQ_PRECHANGE, freqs); - break; - case CPUFREQ_POSTCHANGE: - adjust_jiffies(CPUFREQ_POSTCHANGE, freqs); - notifier_call_chain(&cpufreq_transition_notifier_list, CPUFREQ_POSTCHANGE, freqs); - cpufreq_cpu_data[freqs->cpu]->cur = freqs->new; - break; - } - up_read(&cpufreq_notifier_rwsem); -} -EXPORT_SYMBOL_GPL(cpufreq_notify_transition); - - - -/********************************************************************* - * REGISTER / UNREGISTER CPUFREQ DRIVER * - *********************************************************************/ - -/** - * cpufreq_register_driver - register a CPU Frequency driver - * @driver_data: A struct cpufreq_driver containing the values# - * submitted by the CPU Frequency driver. - * - * Registers a CPU Frequency driver to this core code. This code - * returns zero on success, -EBUSY when another driver got here first - * (and isn't unregistered in the meantime). - * - */ -int cpufreq_register_driver(struct cpufreq_driver *driver_data) -{ - unsigned long flags; - - if (!driver_data || !driver_data->verify || !driver_data->init || - ((!driver_data->setpolicy) && (!driver_data->target))) - return -EINVAL; - - spin_lock_irqsave(&cpufreq_driver_lock, flags); - if (cpufreq_driver) { - spin_unlock_irqrestore(&cpufreq_driver_lock, flags); - return -EBUSY; - } - cpufreq_driver = driver_data; - spin_unlock_irqrestore(&cpufreq_driver_lock, flags); - - return sysdev_driver_register(&cpu_sysdev_class,&cpufreq_sysdev_driver); -} -EXPORT_SYMBOL_GPL(cpufreq_register_driver); - - -/** - * cpufreq_unregister_driver - unregister the current CPUFreq driver - * - * Unregister the current CPUFreq driver. Only call this if you have - * the right to do so, i.e. if you have succeeded in initialising before! - * Returns zero if successful, and -EINVAL if the cpufreq_driver is - * currently not initialised. - */ -int cpufreq_unregister_driver(struct cpufreq_driver *driver) -{ - unsigned long flags; - - if (!cpufreq_driver || (driver != cpufreq_driver)) - return -EINVAL; - - sysdev_driver_unregister(&cpu_sysdev_class, &cpufreq_sysdev_driver); - - spin_lock_irqsave(&cpufreq_driver_lock, flags); - cpufreq_driver = NULL; - spin_unlock_irqrestore(&cpufreq_driver_lock, flags); - - return 0; -} -EXPORT_SYMBOL_GPL(cpufreq_unregister_driver); diff --git a/kernel/futex.c b/kernel/futex.c index 4557addfc6d6..a4feceee661a 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -5,6 +5,9 @@ * Generalized futexes, futex requeueing, misc fixes by Ingo Molnar * (C) Copyright 2003 Red Hat Inc, All Rights Reserved * + * Removed page pinning, fix privately mapped COW pages and other cleanups + * (C) Copyright 2003 Jamie Lokier + * * Thanks to Ben LaHaise for yelling "hashed waitqueues" loudly * enough at me, Linus for the original (flawed) idea, Matthew * Kirkwood for proof-of-concept implementation. @@ -33,12 +36,32 @@ #include <linux/hash.h> #include <linux/init.h> #include <linux/futex.h> -#include <linux/vcache.h> #include <linux/mount.h> +#include <linux/pagemap.h> #define FUTEX_HASHBITS 8 /* + * Futexes are matched on equal values of this key. + * The key type depends on whether it's a shared or private mapping. + */ +union futex_key { + struct { + unsigned long pgoff; + struct inode *inode; + } shared; + struct { + unsigned long uaddr; + struct mm_struct *mm; + } private; + struct { + unsigned long word; + void *ptr; + } both; + int offset; +}; + +/* * We use this hashed waitqueue instead of a normal wait_queue_t, so * we can wake only the relevant ones (hashed queues may be shared): */ @@ -46,12 +69,8 @@ struct futex_q { struct list_head list; wait_queue_head_t waiters; - /* Page struct and offset within it. */ - struct page *page; - int offset; - - /* the virtual => physical COW-safe cache */ - vcache_t vcache; + /* Key which the futex is hashed on. */ + union futex_key key; /* For fd, sigio sent using these. */ int fd; @@ -66,111 +85,149 @@ static spinlock_t futex_lock = SPIN_LOCK_UNLOCKED; static struct vfsmount *futex_mnt; /* - * These are all locks that are necessery to look up a physical - * mapping safely, and modify/search the futex hash, atomically: + * We hash on the keys returned from get_futex_key (see below). */ -static inline void lock_futex_mm(void) +static inline struct list_head *hash_futex(union futex_key *key) { - spin_lock(¤t->mm->page_table_lock); - spin_lock(&vcache_lock); - spin_lock(&futex_lock); -} - -static inline void unlock_futex_mm(void) -{ - spin_unlock(&futex_lock); - spin_unlock(&vcache_lock); - spin_unlock(¤t->mm->page_table_lock); + return &futex_queues[hash_long(key->both.word + + (unsigned long) key->both.ptr + + key->offset, FUTEX_HASHBITS)]; } /* - * The physical page is shared, so we can hash on its address: + * Return 1 if two futex_keys are equal, 0 otherwise. */ -static inline struct list_head *hash_futex(struct page *page, int offset) +static inline int match_futex(union futex_key *key1, union futex_key *key2) { - return &futex_queues[hash_long((unsigned long)page + offset, - FUTEX_HASHBITS)]; + return (key1->both.word == key2->both.word + && key1->both.ptr == key2->both.ptr + && key1->offset == key2->offset); } /* - * Get kernel address of the user page and pin it. + * Get parameters which are the keys for a futex. + * + * For shared mappings, it's (page->index, vma->vm_file->f_dentry->d_inode, + * offset_within_page). For private mappings, it's (uaddr, current->mm). + * We can usually work out the index without swapping in the page. * - * Must be called with (and returns with) all futex-MM locks held. + * Returns: 0, or negative error code. + * The key words are stored in *key on success. + * + * Should be called with ¤t->mm->mmap_sem, + * but NOT &futex_lock or ¤t->mm->page_table_lock. */ -static inline struct page *__pin_page_atomic (struct page *page) -{ - if (!PageReserved(page)) - get_page(page); - return page; -} - -static struct page *__pin_page(unsigned long addr) +static int get_futex_key(unsigned long uaddr, union futex_key *key) { struct mm_struct *mm = current->mm; - struct page *page, *tmp; + struct vm_area_struct *vma; + struct page *page; int err; /* - * Do a quick atomic lookup first - this is the fastpath. + * The futex address must be "naturally" aligned. + */ + key->offset = uaddr % PAGE_SIZE; + if (unlikely((key->offset % sizeof(u32)) != 0)) + return -EINVAL; + uaddr -= key->offset; + + /* + * The futex is hashed differently depending on whether + * it's in a shared or private mapping. So check vma first. + */ + vma = find_extend_vma(mm, uaddr); + if (unlikely(!vma)) + return -EFAULT; + + /* + * Permissions. */ - page = follow_page(mm, addr, 0); - if (likely(page != NULL)) - return __pin_page_atomic(page); + if (unlikely((vma->vm_flags & (VM_IO|VM_READ)) != VM_READ)) + return (vma->vm_flags & VM_IO) ? -EPERM : -EACCES; /* - * No luck - need to fault in the page: + * Private mappings are handled in a simple way. + * + * NOTE: When userspace waits on a MAP_SHARED mapping, even if + * it's a read-only handle, it's expected that futexes attach to + * the object not the particular process. Therefore we use + * VM_MAYSHARE here, not VM_SHARED which is restricted to shared + * mappings of _writable_ handles. */ -repeat_lookup: + if (likely(!(vma->vm_flags & VM_MAYSHARE))) { + key->private.mm = mm; + key->private.uaddr = uaddr; + return 0; + } - unlock_futex_mm(); + /* + * Linear mappings are also simple. + */ + key->shared.inode = vma->vm_file->f_dentry->d_inode; + if (likely(!(vma->vm_flags & VM_NONLINEAR))) { + key->shared.pgoff = (((uaddr - vma->vm_start) >> PAGE_SHIFT) + + vma->vm_pgoff); + return 0; + } - down_read(&mm->mmap_sem); - err = get_user_pages(current, mm, addr, 1, 0, 0, &page, NULL); - up_read(&mm->mmap_sem); + /* + * We could walk the page table to read the non-linear + * pte, and get the page index without fetching the page + * from swap. But that's a lot of code to duplicate here + * for a rare case, so we simply fetch the page. + */ - lock_futex_mm(); + /* + * Do a quick atomic lookup first - this is the fastpath. + */ + spin_lock(¤t->mm->page_table_lock); + page = follow_page(mm, uaddr, 0); + if (likely(page != NULL)) { + key->shared.pgoff = + page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); + spin_unlock(¤t->mm->page_table_lock); + return 0; + } + spin_unlock(¤t->mm->page_table_lock); - if (err < 0) - return NULL; /* - * Since the faulting happened with locks released, we have to - * check for races: + * Do it the general way. */ - tmp = follow_page(mm, addr, 0); - if (tmp != page) { + err = get_user_pages(current, mm, uaddr, 1, 0, 0, &page, NULL); + if (err >= 0) { + key->shared.pgoff = + page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); put_page(page); - goto repeat_lookup; } - - return page; + return err; } + /* * Wake up all waiters hashed on the physical page that is mapped * to this virtual address: */ -static inline int futex_wake(unsigned long uaddr, int offset, int num) +static inline int futex_wake(unsigned long uaddr, int num) { struct list_head *i, *next, *head; - struct page *page; - int ret = 0; + union futex_key key; + int ret; - lock_futex_mm(); + down_read(¤t->mm->mmap_sem); - page = __pin_page(uaddr - offset); - if (!page) { - unlock_futex_mm(); - return -EFAULT; - } + ret = get_futex_key(uaddr, &key); + if (unlikely(ret != 0)) + goto out; - head = hash_futex(page, offset); + head = hash_futex(&key); + spin_lock(&futex_lock); list_for_each_safe(i, next, head) { struct futex_q *this = list_entry(i, struct futex_q, list); - if (this->page == page && this->offset == offset) { + if (match_futex (&this->key, &key)) { list_del_init(i); - __detach_vcache(&this->vcache); wake_up_all(&this->waiters); if (this->filp) send_sigio(&this->filp->f_owner, this->fd, POLL_IN); @@ -179,113 +236,74 @@ static inline int futex_wake(unsigned long uaddr, int offset, int num) break; } } + spin_unlock(&futex_lock); - unlock_futex_mm(); - put_page(page); - +out: + up_read(¤t->mm->mmap_sem); return ret; } /* - * This gets called by the COW code, we have to rehash any - * futexes that were pending on the old physical page, and - * rehash it to the new physical page. The pagetable_lock - * and vcache_lock is already held: - */ -static void futex_vcache_callback(vcache_t *vcache, struct page *new_page) -{ - struct futex_q *q = container_of(vcache, struct futex_q, vcache); - struct list_head *head = hash_futex(new_page, q->offset); - - spin_lock(&futex_lock); - - if (!list_empty(&q->list)) { - put_page(q->page); - q->page = new_page; - __pin_page_atomic(new_page); - list_del(&q->list); - list_add_tail(&q->list, head); - } - - spin_unlock(&futex_lock); -} - -/* * Requeue all waiters hashed on one physical page to another * physical page. */ -static inline int futex_requeue(unsigned long uaddr1, int offset1, - unsigned long uaddr2, int offset2, int nr_wake, int nr_requeue) +static inline int futex_requeue(unsigned long uaddr1, unsigned long uaddr2, + int nr_wake, int nr_requeue) { struct list_head *i, *next, *head1, *head2; - struct page *page1 = NULL, *page2 = NULL; - int ret = 0; + union futex_key key1, key2; + int ret; - lock_futex_mm(); + down_read(¤t->mm->mmap_sem); - page1 = __pin_page(uaddr1 - offset1); - if (!page1) + ret = get_futex_key(uaddr1, &key1); + if (unlikely(ret != 0)) goto out; - page2 = __pin_page(uaddr2 - offset2); - if (!page2) + ret = get_futex_key(uaddr2, &key2); + if (unlikely(ret != 0)) goto out; - head1 = hash_futex(page1, offset1); - head2 = hash_futex(page2, offset2); + head1 = hash_futex(&key1); + head2 = hash_futex(&key2); + spin_lock(&futex_lock); list_for_each_safe(i, next, head1) { struct futex_q *this = list_entry(i, struct futex_q, list); - if (this->page == page1 && this->offset == offset1) { + if (match_futex (&this->key, &key1)) { list_del_init(i); - __detach_vcache(&this->vcache); if (++ret <= nr_wake) { wake_up_all(&this->waiters); if (this->filp) send_sigio(&this->filp->f_owner, this->fd, POLL_IN); } else { - put_page(this->page); - __pin_page_atomic (page2); list_add_tail(i, head2); - __attach_vcache(&this->vcache, uaddr2, - current->mm, futex_vcache_callback); - this->offset = offset2; - this->page = page2; + this->key = key2; if (ret - nr_wake >= nr_requeue) break; } } } + spin_unlock(&futex_lock); out: - unlock_futex_mm(); - - if (page1) - put_page(page1); - if (page2) - put_page(page2); - + up_read(¤t->mm->mmap_sem); return ret; } -static inline void __queue_me(struct futex_q *q, struct page *page, - unsigned long uaddr, int offset, - int fd, struct file *filp) +static inline void queue_me(struct futex_q *q, union futex_key *key, + int fd, struct file *filp) { - struct list_head *head = hash_futex(page, offset); + struct list_head *head = hash_futex(key); - q->offset = offset; + q->key = *key; q->fd = fd; q->filp = filp; - q->page = page; + spin_lock(&futex_lock); list_add_tail(&q->list, head); - /* - * We register a futex callback to this virtual address, - * to make sure a COW properly rehashes the futex-queue. - */ - __attach_vcache(&q->vcache, uaddr, current->mm, futex_vcache_callback); + spin_unlock(&futex_lock); } /* Return 1 if we were still queued (ie. 0 means we were woken) */ @@ -293,83 +311,107 @@ static inline int unqueue_me(struct futex_q *q) { int ret = 0; - spin_lock(&vcache_lock); spin_lock(&futex_lock); if (!list_empty(&q->list)) { list_del(&q->list); - __detach_vcache(&q->vcache); ret = 1; } spin_unlock(&futex_lock); - spin_unlock(&vcache_lock); return ret; } -static inline int futex_wait(unsigned long uaddr, - int offset, - int val, - unsigned long time) +static inline int futex_wait(unsigned long uaddr, int val, unsigned long time) { DECLARE_WAITQUEUE(wait, current); - int ret = 0, curval; - struct page *page; + int ret, curval; + union futex_key key; struct futex_q q; + try_again: init_waitqueue_head(&q.waiters); - lock_futex_mm(); + down_read(¤t->mm->mmap_sem); - page = __pin_page(uaddr - offset); - if (!page) { - unlock_futex_mm(); - return -EFAULT; - } - __queue_me(&q, page, uaddr, offset, -1, NULL); + ret = get_futex_key(uaddr, &key); + if (unlikely(ret != 0)) + goto out_release_sem; + + queue_me(&q, &key, -1, NULL); /* - * Page is pinned, but may no longer be in this address space. - * It cannot schedule, so we access it with the spinlock held. + * Access the page after the futex is queued. + * We hold the mmap semaphore, so the mapping cannot have changed + * since we looked it up. */ if (get_user(curval, (int *)uaddr) != 0) { - unlock_futex_mm(); ret = -EFAULT; - goto out; + goto out_unqueue; } if (curval != val) { - unlock_futex_mm(); ret = -EWOULDBLOCK; - goto out; + goto out_unqueue; } + + /* + * Now the futex is queued and we have checked the data, we + * don't want to hold mmap_sem while we sleep. + */ + up_read(¤t->mm->mmap_sem); + /* - * The get_user() above might fault and schedule so we - * cannot just set TASK_INTERRUPTIBLE state when queueing - * ourselves into the futex hash. This code thus has to + * There might have been scheduling since the queue_me(), as we + * cannot hold a spinlock across the get_user() in case it + * faults. So we cannot just set TASK_INTERRUPTIBLE state when + * queueing ourselves into the futex hash. This code thus has to * rely on the futex_wake() code doing a wakeup after removing * the waiter from the list. */ add_wait_queue(&q.waiters, &wait); + spin_lock(&futex_lock); set_current_state(TASK_INTERRUPTIBLE); - if (!list_empty(&q.list)) { - unlock_futex_mm(); - time = schedule_timeout(time); + + if (unlikely(list_empty(&q.list))) { + /* + * We were woken already. + */ + spin_unlock(&futex_lock); + set_current_state(TASK_RUNNING); + return 0; } + + spin_unlock(&futex_lock); + time = schedule_timeout(time); set_current_state(TASK_RUNNING); + /* * NOTE: we don't remove ourselves from the waitqueue because * we are the only user of it. */ - if (time == 0) { - ret = -ETIMEDOUT; - goto out; - } + + /* + * Were we woken or interrupted for a valid reason? + */ + ret = unqueue_me(&q); + if (ret == 0) + return 0; + if (time == 0) + return -ETIMEDOUT; if (signal_pending(current)) - ret = -EINTR; -out: - /* Were we woken up anyway? */ + return -EINTR; + + /* + * No, it was a spurious wakeup. Try again. Should never happen. :) + */ + goto try_again; + + out_unqueue: + /* + * Were we unqueued anyway? + */ if (!unqueue_me(&q)) ret = 0; - put_page(q.page); - + out_release_sem: + up_read(¤t->mm->mmap_sem); return ret; } @@ -378,7 +420,6 @@ static int futex_close(struct inode *inode, struct file *filp) struct futex_q *q = filp->private_data; unqueue_me(q); - put_page(q->page); kfree(filp->private_data); return 0; } @@ -406,12 +447,12 @@ static struct file_operations futex_fops = { /* Signal allows caller to avoid the race which would occur if they set the sigio stuff up afterwards. */ -static int futex_fd(unsigned long uaddr, int offset, int signal) +static int futex_fd(unsigned long uaddr, int signal) { - struct page *page = NULL; struct futex_q *q; + union futex_key key; struct file *filp; - int ret; + int ret, err; ret = -EINVAL; if (signal < 0 || signal > _NSIG) @@ -450,69 +491,47 @@ static int futex_fd(unsigned long uaddr, int offset, int signal) goto out; } - lock_futex_mm(); - - page = __pin_page(uaddr - offset); - if (!page) { - unlock_futex_mm(); + down_read(¤t->mm->mmap_sem); + err = get_futex_key(uaddr, &key); + up_read(¤t->mm->mmap_sem); + if (unlikely(err != 0)) { put_unused_fd(ret); put_filp(filp); kfree(q); - return -EFAULT; + return err; } init_waitqueue_head(&q->waiters); filp->private_data = q; - __queue_me(q, page, uaddr, offset, ret, filp); - - unlock_futex_mm(); + queue_me(q, &key, ret, filp); /* Now we map fd to filp, so userspace can access it */ fd_install(ret, filp); - page = NULL; out: - if (page) - put_page(page); return ret; } long do_futex(unsigned long uaddr, int op, int val, unsigned long timeout, unsigned long uaddr2, int val2) { - unsigned long pos_in_page; int ret; - pos_in_page = uaddr % PAGE_SIZE; - - /* Must be "naturally" aligned */ - if (pos_in_page % sizeof(u32)) - return -EINVAL; - switch (op) { case FUTEX_WAIT: - ret = futex_wait(uaddr, pos_in_page, val, timeout); + ret = futex_wait(uaddr, val, timeout); break; case FUTEX_WAKE: - ret = futex_wake(uaddr, pos_in_page, val); + ret = futex_wake(uaddr, val); break; case FUTEX_FD: /* non-zero val means F_SETOWN(getpid()) & F_SETSIG(val) */ - ret = futex_fd(uaddr, pos_in_page, val); + ret = futex_fd(uaddr, val); break; case FUTEX_REQUEUE: - { - unsigned long pos_in_page2 = uaddr2 % PAGE_SIZE; - - /* Must be "naturally" aligned */ - if (pos_in_page2 % sizeof(u32)) - return -EINVAL; - - ret = futex_requeue(uaddr, pos_in_page, uaddr2, pos_in_page2, - val, val2); + ret = futex_requeue(uaddr, uaddr2, val, val2); break; - } default: ret = -ENOSYS; } diff --git a/kernel/ksyms.c b/kernel/ksyms.c index e503bd8b0349..9f61a0496c2a 100644 --- a/kernel/ksyms.c +++ b/kernel/ksyms.c @@ -511,7 +511,6 @@ EXPORT_SYMBOL(vsnprintf); EXPORT_SYMBOL(vsscanf); EXPORT_SYMBOL(__bdevname); EXPORT_SYMBOL(bdevname); -EXPORT_SYMBOL(cdevname); EXPORT_SYMBOL(simple_strtoull); EXPORT_SYMBOL(simple_strtoul); EXPORT_SYMBOL(simple_strtol); |
