Add the rt linux 4.1.3-rt3 as base
[kvmfornfv.git] / kernel / drivers / cpufreq / cpufreq.c
1 /*
2  *  linux/drivers/cpufreq/cpufreq.c
3  *
4  *  Copyright (C) 2001 Russell King
5  *            (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de>
6  *            (C) 2013 Viresh Kumar <viresh.kumar@linaro.org>
7  *
8  *  Oct 2005 - Ashok Raj <ashok.raj@intel.com>
9  *      Added handling for CPU hotplug
10  *  Feb 2006 - Jacob Shin <jacob.shin@amd.com>
11  *      Fix handling for CPU hotplug -- affected CPUs
12  *
13  * This program is free software; you can redistribute it and/or modify
14  * it under the terms of the GNU General Public License version 2 as
15  * published by the Free Software Foundation.
16  */
17
18 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
19
20 #include <linux/cpu.h>
21 #include <linux/cpufreq.h>
22 #include <linux/delay.h>
23 #include <linux/device.h>
24 #include <linux/init.h>
25 #include <linux/kernel_stat.h>
26 #include <linux/module.h>
27 #include <linux/mutex.h>
28 #include <linux/slab.h>
29 #include <linux/suspend.h>
30 #include <linux/syscore_ops.h>
31 #include <linux/tick.h>
32 #include <trace/events/power.h>
33
34 /* Macros to iterate over lists */
35 /* Iterate over online CPUs policies */
36 static LIST_HEAD(cpufreq_policy_list);
37 #define for_each_policy(__policy)                               \
38         list_for_each_entry(__policy, &cpufreq_policy_list, policy_list)
39
40 /* Iterate over governors */
41 static LIST_HEAD(cpufreq_governor_list);
42 #define for_each_governor(__governor)                           \
43         list_for_each_entry(__governor, &cpufreq_governor_list, governor_list)
44
45 /**
46  * The "cpufreq driver" - the arch- or hardware-dependent low
47  * level driver of CPUFreq support, and its spinlock. This lock
48  * also protects the cpufreq_cpu_data array.
49  */
50 static struct cpufreq_driver *cpufreq_driver;
51 static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data);
52 static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data_fallback);
53 static DEFINE_RWLOCK(cpufreq_driver_lock);
54 DEFINE_MUTEX(cpufreq_governor_lock);
55
56 /* This one keeps track of the previously set governor of a removed CPU */
57 static DEFINE_PER_CPU(char[CPUFREQ_NAME_LEN], cpufreq_cpu_governor);
58
59 /* Flag to suspend/resume CPUFreq governors */
60 static bool cpufreq_suspended;
61
62 static inline bool has_target(void)
63 {
64         return cpufreq_driver->target_index || cpufreq_driver->target;
65 }
66
67 /* internal prototypes */
68 static int __cpufreq_governor(struct cpufreq_policy *policy,
69                 unsigned int event);
70 static unsigned int __cpufreq_get(struct cpufreq_policy *policy);
71 static void handle_update(struct work_struct *work);
72
73 /**
74  * Two notifier lists: the "policy" list is involved in the
75  * validation process for a new CPU frequency policy; the
76  * "transition" list for kernel code that needs to handle
77  * changes to devices when the CPU clock speed changes.
78  * The mutex locks both lists.
79  */
80 static BLOCKING_NOTIFIER_HEAD(cpufreq_policy_notifier_list);
81 static struct srcu_notifier_head cpufreq_transition_notifier_list;
82
83 static bool init_cpufreq_transition_notifier_list_called;
84 static int __init init_cpufreq_transition_notifier_list(void)
85 {
86         srcu_init_notifier_head(&cpufreq_transition_notifier_list);
87         init_cpufreq_transition_notifier_list_called = true;
88         return 0;
89 }
90 pure_initcall(init_cpufreq_transition_notifier_list);
91
92 static int off __read_mostly;
93 static int cpufreq_disabled(void)
94 {
95         return off;
96 }
97 void disable_cpufreq(void)
98 {
99         off = 1;
100 }
101 static DEFINE_MUTEX(cpufreq_governor_mutex);
102
103 bool have_governor_per_policy(void)
104 {
105         return !!(cpufreq_driver->flags & CPUFREQ_HAVE_GOVERNOR_PER_POLICY);
106 }
107 EXPORT_SYMBOL_GPL(have_governor_per_policy);
108
109 struct kobject *get_governor_parent_kobj(struct cpufreq_policy *policy)
110 {
111         if (have_governor_per_policy())
112                 return &policy->kobj;
113         else
114                 return cpufreq_global_kobject;
115 }
116 EXPORT_SYMBOL_GPL(get_governor_parent_kobj);
117
118 static inline u64 get_cpu_idle_time_jiffy(unsigned int cpu, u64 *wall)
119 {
120         u64 idle_time;
121         u64 cur_wall_time;
122         u64 busy_time;
123
124         cur_wall_time = jiffies64_to_cputime64(get_jiffies_64());
125
126         busy_time = kcpustat_cpu(cpu).cpustat[CPUTIME_USER];
127         busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SYSTEM];
128         busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_IRQ];
129         busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SOFTIRQ];
130         busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_STEAL];
131         busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_NICE];
132
133         idle_time = cur_wall_time - busy_time;
134         if (wall)
135                 *wall = cputime_to_usecs(cur_wall_time);
136
137         return cputime_to_usecs(idle_time);
138 }
139
140 u64 get_cpu_idle_time(unsigned int cpu, u64 *wall, int io_busy)
141 {
142         u64 idle_time = get_cpu_idle_time_us(cpu, io_busy ? wall : NULL);
143
144         if (idle_time == -1ULL)
145                 return get_cpu_idle_time_jiffy(cpu, wall);
146         else if (!io_busy)
147                 idle_time += get_cpu_iowait_time_us(cpu, wall);
148
149         return idle_time;
150 }
151 EXPORT_SYMBOL_GPL(get_cpu_idle_time);
152
153 /*
154  * This is a generic cpufreq init() routine which can be used by cpufreq
155  * drivers of SMP systems. It will do following:
156  * - validate & show freq table passed
157  * - set policies transition latency
158  * - policy->cpus with all possible CPUs
159  */
160 int cpufreq_generic_init(struct cpufreq_policy *policy,
161                 struct cpufreq_frequency_table *table,
162                 unsigned int transition_latency)
163 {
164         int ret;
165
166         ret = cpufreq_table_validate_and_show(policy, table);
167         if (ret) {
168                 pr_err("%s: invalid frequency table: %d\n", __func__, ret);
169                 return ret;
170         }
171
172         policy->cpuinfo.transition_latency = transition_latency;
173
174         /*
175          * The driver only supports the SMP configuartion where all processors
176          * share the clock and voltage and clock.
177          */
178         cpumask_setall(policy->cpus);
179
180         return 0;
181 }
182 EXPORT_SYMBOL_GPL(cpufreq_generic_init);
183
184 unsigned int cpufreq_generic_get(unsigned int cpu)
185 {
186         struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu);
187
188         if (!policy || IS_ERR(policy->clk)) {
189                 pr_err("%s: No %s associated to cpu: %d\n",
190                        __func__, policy ? "clk" : "policy", cpu);
191                 return 0;
192         }
193
194         return clk_get_rate(policy->clk) / 1000;
195 }
196 EXPORT_SYMBOL_GPL(cpufreq_generic_get);
197
198 /* Only for cpufreq core internal use */
199 struct cpufreq_policy *cpufreq_cpu_get_raw(unsigned int cpu)
200 {
201         return per_cpu(cpufreq_cpu_data, cpu);
202 }
203
204 struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu)
205 {
206         struct cpufreq_policy *policy = NULL;
207         unsigned long flags;
208
209         if (cpu >= nr_cpu_ids)
210                 return NULL;
211
212         /* get the cpufreq driver */
213         read_lock_irqsave(&cpufreq_driver_lock, flags);
214
215         if (cpufreq_driver) {
216                 /* get the CPU */
217                 policy = per_cpu(cpufreq_cpu_data, cpu);
218                 if (policy)
219                         kobject_get(&policy->kobj);
220         }
221
222         read_unlock_irqrestore(&cpufreq_driver_lock, flags);
223
224         return policy;
225 }
226 EXPORT_SYMBOL_GPL(cpufreq_cpu_get);
227
228 void cpufreq_cpu_put(struct cpufreq_policy *policy)
229 {
230         kobject_put(&policy->kobj);
231 }
232 EXPORT_SYMBOL_GPL(cpufreq_cpu_put);
233
234 /*********************************************************************
235  *            EXTERNALLY AFFECTING FREQUENCY CHANGES                 *
236  *********************************************************************/
237
238 /**
239  * adjust_jiffies - adjust the system "loops_per_jiffy"
240  *
241  * This function alters the system "loops_per_jiffy" for the clock
242  * speed change. Note that loops_per_jiffy cannot be updated on SMP
243  * systems as each CPU might be scaled differently. So, use the arch
244  * per-CPU loops_per_jiffy value wherever possible.
245  */
246 static void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
247 {
248 #ifndef CONFIG_SMP
249         static unsigned long l_p_j_ref;
250         static unsigned int l_p_j_ref_freq;
251
252         if (ci->flags & CPUFREQ_CONST_LOOPS)
253                 return;
254
255         if (!l_p_j_ref_freq) {
256                 l_p_j_ref = loops_per_jiffy;
257                 l_p_j_ref_freq = ci->old;
258                 pr_debug("saving %lu as reference value for loops_per_jiffy; freq is %u kHz\n",
259                          l_p_j_ref, l_p_j_ref_freq);
260         }
261         if (val == CPUFREQ_POSTCHANGE && ci->old != ci->new) {
262                 loops_per_jiffy = cpufreq_scale(l_p_j_ref, l_p_j_ref_freq,
263                                                                 ci->new);
264                 pr_debug("scaling loops_per_jiffy to %lu for frequency %u kHz\n",
265                          loops_per_jiffy, ci->new);
266         }
267 #endif
268 }
269
270 static void __cpufreq_notify_transition(struct cpufreq_policy *policy,
271                 struct cpufreq_freqs *freqs, unsigned int state)
272 {
273         BUG_ON(irqs_disabled());
274
275         if (cpufreq_disabled())
276                 return;
277
278         freqs->flags = cpufreq_driver->flags;
279         pr_debug("notification %u of frequency transition to %u kHz\n",
280                  state, freqs->new);
281
282         switch (state) {
283
284         case CPUFREQ_PRECHANGE:
285                 /* detect if the driver reported a value as "old frequency"
286                  * which is not equal to what the cpufreq core thinks is
287                  * "old frequency".
288                  */
289                 if (!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
290                         if ((policy) && (policy->cpu == freqs->cpu) &&
291                             (policy->cur) && (policy->cur != freqs->old)) {
292                                 pr_debug("Warning: CPU frequency is %u, cpufreq assumed %u kHz\n",
293                                          freqs->old, policy->cur);
294                                 freqs->old = policy->cur;
295                         }
296                 }
297                 srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
298                                 CPUFREQ_PRECHANGE, freqs);
299                 adjust_jiffies(CPUFREQ_PRECHANGE, freqs);
300                 break;
301
302         case CPUFREQ_POSTCHANGE:
303                 adjust_jiffies(CPUFREQ_POSTCHANGE, freqs);
304                 pr_debug("FREQ: %lu - CPU: %lu\n",
305                          (unsigned long)freqs->new, (unsigned long)freqs->cpu);
306                 trace_cpu_frequency(freqs->new, freqs->cpu);
307                 srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
308                                 CPUFREQ_POSTCHANGE, freqs);
309                 if (likely(policy) && likely(policy->cpu == freqs->cpu))
310                         policy->cur = freqs->new;
311                 break;
312         }
313 }
314
315 /**
316  * cpufreq_notify_transition - call notifier chain and adjust_jiffies
317  * on frequency transition.
318  *
319  * This function calls the transition notifiers and the "adjust_jiffies"
320  * function. It is called twice on all CPU frequency changes that have
321  * external effects.
322  */
323 static void cpufreq_notify_transition(struct cpufreq_policy *policy,
324                 struct cpufreq_freqs *freqs, unsigned int state)
325 {
326         for_each_cpu(freqs->cpu, policy->cpus)
327                 __cpufreq_notify_transition(policy, freqs, state);
328 }
329
330 /* Do post notifications when there are chances that transition has failed */
331 static void cpufreq_notify_post_transition(struct cpufreq_policy *policy,
332                 struct cpufreq_freqs *freqs, int transition_failed)
333 {
334         cpufreq_notify_transition(policy, freqs, CPUFREQ_POSTCHANGE);
335         if (!transition_failed)
336                 return;
337
338         swap(freqs->old, freqs->new);
339         cpufreq_notify_transition(policy, freqs, CPUFREQ_PRECHANGE);
340         cpufreq_notify_transition(policy, freqs, CPUFREQ_POSTCHANGE);
341 }
342
343 void cpufreq_freq_transition_begin(struct cpufreq_policy *policy,
344                 struct cpufreq_freqs *freqs)
345 {
346
347         /*
348          * Catch double invocations of _begin() which lead to self-deadlock.
349          * ASYNC_NOTIFICATION drivers are left out because the cpufreq core
350          * doesn't invoke _begin() on their behalf, and hence the chances of
351          * double invocations are very low. Moreover, there are scenarios
352          * where these checks can emit false-positive warnings in these
353          * drivers; so we avoid that by skipping them altogether.
354          */
355         WARN_ON(!(cpufreq_driver->flags & CPUFREQ_ASYNC_NOTIFICATION)
356                                 && current == policy->transition_task);
357
358 wait:
359         wait_event(policy->transition_wait, !policy->transition_ongoing);
360
361         spin_lock(&policy->transition_lock);
362
363         if (unlikely(policy->transition_ongoing)) {
364                 spin_unlock(&policy->transition_lock);
365                 goto wait;
366         }
367
368         policy->transition_ongoing = true;
369         policy->transition_task = current;
370
371         spin_unlock(&policy->transition_lock);
372
373         cpufreq_notify_transition(policy, freqs, CPUFREQ_PRECHANGE);
374 }
375 EXPORT_SYMBOL_GPL(cpufreq_freq_transition_begin);
376
377 void cpufreq_freq_transition_end(struct cpufreq_policy *policy,
378                 struct cpufreq_freqs *freqs, int transition_failed)
379 {
380         if (unlikely(WARN_ON(!policy->transition_ongoing)))
381                 return;
382
383         cpufreq_notify_post_transition(policy, freqs, transition_failed);
384
385         policy->transition_ongoing = false;
386         policy->transition_task = NULL;
387
388         wake_up(&policy->transition_wait);
389 }
390 EXPORT_SYMBOL_GPL(cpufreq_freq_transition_end);
391
392
393 /*********************************************************************
394  *                          SYSFS INTERFACE                          *
395  *********************************************************************/
396 static ssize_t show_boost(struct kobject *kobj,
397                                  struct attribute *attr, char *buf)
398 {
399         return sprintf(buf, "%d\n", cpufreq_driver->boost_enabled);
400 }
401
402 static ssize_t store_boost(struct kobject *kobj, struct attribute *attr,
403                                   const char *buf, size_t count)
404 {
405         int ret, enable;
406
407         ret = sscanf(buf, "%d", &enable);
408         if (ret != 1 || enable < 0 || enable > 1)
409                 return -EINVAL;
410
411         if (cpufreq_boost_trigger_state(enable)) {
412                 pr_err("%s: Cannot %s BOOST!\n",
413                        __func__, enable ? "enable" : "disable");
414                 return -EINVAL;
415         }
416
417         pr_debug("%s: cpufreq BOOST %s\n",
418                  __func__, enable ? "enabled" : "disabled");
419
420         return count;
421 }
422 define_one_global_rw(boost);
423
424 static struct cpufreq_governor *find_governor(const char *str_governor)
425 {
426         struct cpufreq_governor *t;
427
428         for_each_governor(t)
429                 if (!strncasecmp(str_governor, t->name, CPUFREQ_NAME_LEN))
430                         return t;
431
432         return NULL;
433 }
434
435 /**
436  * cpufreq_parse_governor - parse a governor string
437  */
438 static int cpufreq_parse_governor(char *str_governor, unsigned int *policy,
439                                 struct cpufreq_governor **governor)
440 {
441         int err = -EINVAL;
442
443         if (!cpufreq_driver)
444                 goto out;
445
446         if (cpufreq_driver->setpolicy) {
447                 if (!strncasecmp(str_governor, "performance", CPUFREQ_NAME_LEN)) {
448                         *policy = CPUFREQ_POLICY_PERFORMANCE;
449                         err = 0;
450                 } else if (!strncasecmp(str_governor, "powersave",
451                                                 CPUFREQ_NAME_LEN)) {
452                         *policy = CPUFREQ_POLICY_POWERSAVE;
453                         err = 0;
454                 }
455         } else {
456                 struct cpufreq_governor *t;
457
458                 mutex_lock(&cpufreq_governor_mutex);
459
460                 t = find_governor(str_governor);
461
462                 if (t == NULL) {
463                         int ret;
464
465                         mutex_unlock(&cpufreq_governor_mutex);
466                         ret = request_module("cpufreq_%s", str_governor);
467                         mutex_lock(&cpufreq_governor_mutex);
468
469                         if (ret == 0)
470                                 t = find_governor(str_governor);
471                 }
472
473                 if (t != NULL) {
474                         *governor = t;
475                         err = 0;
476                 }
477
478                 mutex_unlock(&cpufreq_governor_mutex);
479         }
480 out:
481         return err;
482 }
483
484 /**
485  * cpufreq_per_cpu_attr_read() / show_##file_name() -
486  * print out cpufreq information
487  *
488  * Write out information from cpufreq_driver->policy[cpu]; object must be
489  * "unsigned int".
490  */
491
492 #define show_one(file_name, object)                     \
493 static ssize_t show_##file_name                         \
494 (struct cpufreq_policy *policy, char *buf)              \
495 {                                                       \
496         return sprintf(buf, "%u\n", policy->object);    \
497 }
498
499 show_one(cpuinfo_min_freq, cpuinfo.min_freq);
500 show_one(cpuinfo_max_freq, cpuinfo.max_freq);
501 show_one(cpuinfo_transition_latency, cpuinfo.transition_latency);
502 show_one(scaling_min_freq, min);
503 show_one(scaling_max_freq, max);
504
505 static ssize_t show_scaling_cur_freq(struct cpufreq_policy *policy, char *buf)
506 {
507         ssize_t ret;
508
509         if (cpufreq_driver && cpufreq_driver->setpolicy && cpufreq_driver->get)
510                 ret = sprintf(buf, "%u\n", cpufreq_driver->get(policy->cpu));
511         else
512                 ret = sprintf(buf, "%u\n", policy->cur);
513         return ret;
514 }
515
516 static int cpufreq_set_policy(struct cpufreq_policy *policy,
517                                 struct cpufreq_policy *new_policy);
518
519 /**
520  * cpufreq_per_cpu_attr_write() / store_##file_name() - sysfs write access
521  */
522 #define store_one(file_name, object)                    \
523 static ssize_t store_##file_name                                        \
524 (struct cpufreq_policy *policy, const char *buf, size_t count)          \
525 {                                                                       \
526         int ret, temp;                                                  \
527         struct cpufreq_policy new_policy;                               \
528                                                                         \
529         ret = cpufreq_get_policy(&new_policy, policy->cpu);             \
530         if (ret)                                                        \
531                 return -EINVAL;                                         \
532                                                                         \
533         ret = sscanf(buf, "%u", &new_policy.object);                    \
534         if (ret != 1)                                                   \
535                 return -EINVAL;                                         \
536                                                                         \
537         temp = new_policy.object;                                       \
538         ret = cpufreq_set_policy(policy, &new_policy);          \
539         if (!ret)                                                       \
540                 policy->user_policy.object = temp;                      \
541                                                                         \
542         return ret ? ret : count;                                       \
543 }
544
545 store_one(scaling_min_freq, min);
546 store_one(scaling_max_freq, max);
547
548 /**
549  * show_cpuinfo_cur_freq - current CPU frequency as detected by hardware
550  */
551 static ssize_t show_cpuinfo_cur_freq(struct cpufreq_policy *policy,
552                                         char *buf)
553 {
554         unsigned int cur_freq = __cpufreq_get(policy);
555         if (!cur_freq)
556                 return sprintf(buf, "<unknown>");
557         return sprintf(buf, "%u\n", cur_freq);
558 }
559
560 /**
561  * show_scaling_governor - show the current policy for the specified CPU
562  */
563 static ssize_t show_scaling_governor(struct cpufreq_policy *policy, char *buf)
564 {
565         if (policy->policy == CPUFREQ_POLICY_POWERSAVE)
566                 return sprintf(buf, "powersave\n");
567         else if (policy->policy == CPUFREQ_POLICY_PERFORMANCE)
568                 return sprintf(buf, "performance\n");
569         else if (policy->governor)
570                 return scnprintf(buf, CPUFREQ_NAME_PLEN, "%s\n",
571                                 policy->governor->name);
572         return -EINVAL;
573 }
574
575 /**
576  * store_scaling_governor - store policy for the specified CPU
577  */
578 static ssize_t store_scaling_governor(struct cpufreq_policy *policy,
579                                         const char *buf, size_t count)
580 {
581         int ret;
582         char    str_governor[16];
583         struct cpufreq_policy new_policy;
584
585         ret = cpufreq_get_policy(&new_policy, policy->cpu);
586         if (ret)
587                 return ret;
588
589         ret = sscanf(buf, "%15s", str_governor);
590         if (ret != 1)
591                 return -EINVAL;
592
593         if (cpufreq_parse_governor(str_governor, &new_policy.policy,
594                                                 &new_policy.governor))
595                 return -EINVAL;
596
597         ret = cpufreq_set_policy(policy, &new_policy);
598
599         policy->user_policy.policy = policy->policy;
600         policy->user_policy.governor = policy->governor;
601
602         if (ret)
603                 return ret;
604         else
605                 return count;
606 }
607
608 /**
609  * show_scaling_driver - show the cpufreq driver currently loaded
610  */
611 static ssize_t show_scaling_driver(struct cpufreq_policy *policy, char *buf)
612 {
613         return scnprintf(buf, CPUFREQ_NAME_PLEN, "%s\n", cpufreq_driver->name);
614 }
615
616 /**
617  * show_scaling_available_governors - show the available CPUfreq governors
618  */
619 static ssize_t show_scaling_available_governors(struct cpufreq_policy *policy,
620                                                 char *buf)
621 {
622         ssize_t i = 0;
623         struct cpufreq_governor *t;
624
625         if (!has_target()) {
626                 i += sprintf(buf, "performance powersave");
627                 goto out;
628         }
629
630         for_each_governor(t) {
631                 if (i >= (ssize_t) ((PAGE_SIZE / sizeof(char))
632                     - (CPUFREQ_NAME_LEN + 2)))
633                         goto out;
634                 i += scnprintf(&buf[i], CPUFREQ_NAME_PLEN, "%s ", t->name);
635         }
636 out:
637         i += sprintf(&buf[i], "\n");
638         return i;
639 }
640
641 ssize_t cpufreq_show_cpus(const struct cpumask *mask, char *buf)
642 {
643         ssize_t i = 0;
644         unsigned int cpu;
645
646         for_each_cpu(cpu, mask) {
647                 if (i)
648                         i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), " ");
649                 i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), "%u", cpu);
650                 if (i >= (PAGE_SIZE - 5))
651                         break;
652         }
653         i += sprintf(&buf[i], "\n");
654         return i;
655 }
656 EXPORT_SYMBOL_GPL(cpufreq_show_cpus);
657
658 /**
659  * show_related_cpus - show the CPUs affected by each transition even if
660  * hw coordination is in use
661  */
662 static ssize_t show_related_cpus(struct cpufreq_policy *policy, char *buf)
663 {
664         return cpufreq_show_cpus(policy->related_cpus, buf);
665 }
666
667 /**
668  * show_affected_cpus - show the CPUs affected by each transition
669  */
670 static ssize_t show_affected_cpus(struct cpufreq_policy *policy, char *buf)
671 {
672         return cpufreq_show_cpus(policy->cpus, buf);
673 }
674
675 static ssize_t store_scaling_setspeed(struct cpufreq_policy *policy,
676                                         const char *buf, size_t count)
677 {
678         unsigned int freq = 0;
679         unsigned int ret;
680
681         if (!policy->governor || !policy->governor->store_setspeed)
682                 return -EINVAL;
683
684         ret = sscanf(buf, "%u", &freq);
685         if (ret != 1)
686                 return -EINVAL;
687
688         policy->governor->store_setspeed(policy, freq);
689
690         return count;
691 }
692
693 static ssize_t show_scaling_setspeed(struct cpufreq_policy *policy, char *buf)
694 {
695         if (!policy->governor || !policy->governor->show_setspeed)
696                 return sprintf(buf, "<unsupported>\n");
697
698         return policy->governor->show_setspeed(policy, buf);
699 }
700
701 /**
702  * show_bios_limit - show the current cpufreq HW/BIOS limitation
703  */
704 static ssize_t show_bios_limit(struct cpufreq_policy *policy, char *buf)
705 {
706         unsigned int limit;
707         int ret;
708         if (cpufreq_driver->bios_limit) {
709                 ret = cpufreq_driver->bios_limit(policy->cpu, &limit);
710                 if (!ret)
711                         return sprintf(buf, "%u\n", limit);
712         }
713         return sprintf(buf, "%u\n", policy->cpuinfo.max_freq);
714 }
715
716 cpufreq_freq_attr_ro_perm(cpuinfo_cur_freq, 0400);
717 cpufreq_freq_attr_ro(cpuinfo_min_freq);
718 cpufreq_freq_attr_ro(cpuinfo_max_freq);
719 cpufreq_freq_attr_ro(cpuinfo_transition_latency);
720 cpufreq_freq_attr_ro(scaling_available_governors);
721 cpufreq_freq_attr_ro(scaling_driver);
722 cpufreq_freq_attr_ro(scaling_cur_freq);
723 cpufreq_freq_attr_ro(bios_limit);
724 cpufreq_freq_attr_ro(related_cpus);
725 cpufreq_freq_attr_ro(affected_cpus);
726 cpufreq_freq_attr_rw(scaling_min_freq);
727 cpufreq_freq_attr_rw(scaling_max_freq);
728 cpufreq_freq_attr_rw(scaling_governor);
729 cpufreq_freq_attr_rw(scaling_setspeed);
730
731 static struct attribute *default_attrs[] = {
732         &cpuinfo_min_freq.attr,
733         &cpuinfo_max_freq.attr,
734         &cpuinfo_transition_latency.attr,
735         &scaling_min_freq.attr,
736         &scaling_max_freq.attr,
737         &affected_cpus.attr,
738         &related_cpus.attr,
739         &scaling_governor.attr,
740         &scaling_driver.attr,
741         &scaling_available_governors.attr,
742         &scaling_setspeed.attr,
743         NULL
744 };
745
746 #define to_policy(k) container_of(k, struct cpufreq_policy, kobj)
747 #define to_attr(a) container_of(a, struct freq_attr, attr)
748
749 static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
750 {
751         struct cpufreq_policy *policy = to_policy(kobj);
752         struct freq_attr *fattr = to_attr(attr);
753         ssize_t ret;
754
755         down_read(&policy->rwsem);
756
757         if (fattr->show)
758                 ret = fattr->show(policy, buf);
759         else
760                 ret = -EIO;
761
762         up_read(&policy->rwsem);
763
764         return ret;
765 }
766
767 static ssize_t store(struct kobject *kobj, struct attribute *attr,
768                      const char *buf, size_t count)
769 {
770         struct cpufreq_policy *policy = to_policy(kobj);
771         struct freq_attr *fattr = to_attr(attr);
772         ssize_t ret = -EINVAL;
773
774         get_online_cpus();
775
776         if (!cpu_online(policy->cpu))
777                 goto unlock;
778
779         down_write(&policy->rwsem);
780
781         if (fattr->store)
782                 ret = fattr->store(policy, buf, count);
783         else
784                 ret = -EIO;
785
786         up_write(&policy->rwsem);
787 unlock:
788         put_online_cpus();
789
790         return ret;
791 }
792
793 static void cpufreq_sysfs_release(struct kobject *kobj)
794 {
795         struct cpufreq_policy *policy = to_policy(kobj);
796         pr_debug("last reference is dropped\n");
797         complete(&policy->kobj_unregister);
798 }
799
800 static const struct sysfs_ops sysfs_ops = {
801         .show   = show,
802         .store  = store,
803 };
804
805 static struct kobj_type ktype_cpufreq = {
806         .sysfs_ops      = &sysfs_ops,
807         .default_attrs  = default_attrs,
808         .release        = cpufreq_sysfs_release,
809 };
810
811 struct kobject *cpufreq_global_kobject;
812 EXPORT_SYMBOL(cpufreq_global_kobject);
813
814 static int cpufreq_global_kobject_usage;
815
816 int cpufreq_get_global_kobject(void)
817 {
818         if (!cpufreq_global_kobject_usage++)
819                 return kobject_add(cpufreq_global_kobject,
820                                 &cpu_subsys.dev_root->kobj, "%s", "cpufreq");
821
822         return 0;
823 }
824 EXPORT_SYMBOL(cpufreq_get_global_kobject);
825
826 void cpufreq_put_global_kobject(void)
827 {
828         if (!--cpufreq_global_kobject_usage)
829                 kobject_del(cpufreq_global_kobject);
830 }
831 EXPORT_SYMBOL(cpufreq_put_global_kobject);
832
833 int cpufreq_sysfs_create_file(const struct attribute *attr)
834 {
835         int ret = cpufreq_get_global_kobject();
836
837         if (!ret) {
838                 ret = sysfs_create_file(cpufreq_global_kobject, attr);
839                 if (ret)
840                         cpufreq_put_global_kobject();
841         }
842
843         return ret;
844 }
845 EXPORT_SYMBOL(cpufreq_sysfs_create_file);
846
847 void cpufreq_sysfs_remove_file(const struct attribute *attr)
848 {
849         sysfs_remove_file(cpufreq_global_kobject, attr);
850         cpufreq_put_global_kobject();
851 }
852 EXPORT_SYMBOL(cpufreq_sysfs_remove_file);
853
854 /* symlink affected CPUs */
855 static int cpufreq_add_dev_symlink(struct cpufreq_policy *policy)
856 {
857         unsigned int j;
858         int ret = 0;
859
860         for_each_cpu(j, policy->cpus) {
861                 struct device *cpu_dev;
862
863                 if (j == policy->cpu)
864                         continue;
865
866                 pr_debug("Adding link for CPU: %u\n", j);
867                 cpu_dev = get_cpu_device(j);
868                 ret = sysfs_create_link(&cpu_dev->kobj, &policy->kobj,
869                                         "cpufreq");
870                 if (ret)
871                         break;
872         }
873         return ret;
874 }
875
876 static int cpufreq_add_dev_interface(struct cpufreq_policy *policy,
877                                      struct device *dev)
878 {
879         struct freq_attr **drv_attr;
880         int ret = 0;
881
882         /* set up files for this cpu device */
883         drv_attr = cpufreq_driver->attr;
884         while (drv_attr && *drv_attr) {
885                 ret = sysfs_create_file(&policy->kobj, &((*drv_attr)->attr));
886                 if (ret)
887                         return ret;
888                 drv_attr++;
889         }
890         if (cpufreq_driver->get) {
891                 ret = sysfs_create_file(&policy->kobj, &cpuinfo_cur_freq.attr);
892                 if (ret)
893                         return ret;
894         }
895
896         ret = sysfs_create_file(&policy->kobj, &scaling_cur_freq.attr);
897         if (ret)
898                 return ret;
899
900         if (cpufreq_driver->bios_limit) {
901                 ret = sysfs_create_file(&policy->kobj, &bios_limit.attr);
902                 if (ret)
903                         return ret;
904         }
905
906         return cpufreq_add_dev_symlink(policy);
907 }
908
909 static void cpufreq_init_policy(struct cpufreq_policy *policy)
910 {
911         struct cpufreq_governor *gov = NULL;
912         struct cpufreq_policy new_policy;
913         int ret = 0;
914
915         memcpy(&new_policy, policy, sizeof(*policy));
916
917         /* Update governor of new_policy to the governor used before hotplug */
918         gov = find_governor(per_cpu(cpufreq_cpu_governor, policy->cpu));
919         if (gov)
920                 pr_debug("Restoring governor %s for cpu %d\n",
921                                 policy->governor->name, policy->cpu);
922         else
923                 gov = CPUFREQ_DEFAULT_GOVERNOR;
924
925         new_policy.governor = gov;
926
927         /* Use the default policy if its valid. */
928         if (cpufreq_driver->setpolicy)
929                 cpufreq_parse_governor(gov->name, &new_policy.policy, NULL);
930
931         /* set default policy */
932         ret = cpufreq_set_policy(policy, &new_policy);
933         if (ret) {
934                 pr_debug("setting policy failed\n");
935                 if (cpufreq_driver->exit)
936                         cpufreq_driver->exit(policy);
937         }
938 }
939
940 static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy,
941                                   unsigned int cpu, struct device *dev)
942 {
943         int ret = 0;
944         unsigned long flags;
945
946         if (has_target()) {
947                 ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP);
948                 if (ret) {
949                         pr_err("%s: Failed to stop governor\n", __func__);
950                         return ret;
951                 }
952         }
953
954         down_write(&policy->rwsem);
955
956         write_lock_irqsave(&cpufreq_driver_lock, flags);
957
958         cpumask_set_cpu(cpu, policy->cpus);
959         per_cpu(cpufreq_cpu_data, cpu) = policy;
960         write_unlock_irqrestore(&cpufreq_driver_lock, flags);
961
962         up_write(&policy->rwsem);
963
964         if (has_target()) {
965                 ret = __cpufreq_governor(policy, CPUFREQ_GOV_START);
966                 if (!ret)
967                         ret = __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
968
969                 if (ret) {
970                         pr_err("%s: Failed to start governor\n", __func__);
971                         return ret;
972                 }
973         }
974
975         return sysfs_create_link(&dev->kobj, &policy->kobj, "cpufreq");
976 }
977
978 static struct cpufreq_policy *cpufreq_policy_restore(unsigned int cpu)
979 {
980         struct cpufreq_policy *policy;
981         unsigned long flags;
982
983         read_lock_irqsave(&cpufreq_driver_lock, flags);
984
985         policy = per_cpu(cpufreq_cpu_data_fallback, cpu);
986
987         read_unlock_irqrestore(&cpufreq_driver_lock, flags);
988
989         if (policy)
990                 policy->governor = NULL;
991
992         return policy;
993 }
994
995 static struct cpufreq_policy *cpufreq_policy_alloc(void)
996 {
997         struct cpufreq_policy *policy;
998
999         policy = kzalloc(sizeof(*policy), GFP_KERNEL);
1000         if (!policy)
1001                 return NULL;
1002
1003         if (!alloc_cpumask_var(&policy->cpus, GFP_KERNEL))
1004                 goto err_free_policy;
1005
1006         if (!zalloc_cpumask_var(&policy->related_cpus, GFP_KERNEL))
1007                 goto err_free_cpumask;
1008
1009         INIT_LIST_HEAD(&policy->policy_list);
1010         init_rwsem(&policy->rwsem);
1011         spin_lock_init(&policy->transition_lock);
1012         init_waitqueue_head(&policy->transition_wait);
1013         init_completion(&policy->kobj_unregister);
1014         INIT_WORK(&policy->update, handle_update);
1015
1016         return policy;
1017
1018 err_free_cpumask:
1019         free_cpumask_var(policy->cpus);
1020 err_free_policy:
1021         kfree(policy);
1022
1023         return NULL;
1024 }
1025
1026 static void cpufreq_policy_put_kobj(struct cpufreq_policy *policy)
1027 {
1028         struct kobject *kobj;
1029         struct completion *cmp;
1030
1031         blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1032                         CPUFREQ_REMOVE_POLICY, policy);
1033
1034         down_read(&policy->rwsem);
1035         kobj = &policy->kobj;
1036         cmp = &policy->kobj_unregister;
1037         up_read(&policy->rwsem);
1038         kobject_put(kobj);
1039
1040         /*
1041          * We need to make sure that the underlying kobj is
1042          * actually not referenced anymore by anybody before we
1043          * proceed with unloading.
1044          */
1045         pr_debug("waiting for dropping of refcount\n");
1046         wait_for_completion(cmp);
1047         pr_debug("wait complete\n");
1048 }
1049
1050 static void cpufreq_policy_free(struct cpufreq_policy *policy)
1051 {
1052         free_cpumask_var(policy->related_cpus);
1053         free_cpumask_var(policy->cpus);
1054         kfree(policy);
1055 }
1056
1057 static int update_policy_cpu(struct cpufreq_policy *policy, unsigned int cpu,
1058                              struct device *cpu_dev)
1059 {
1060         int ret;
1061
1062         if (WARN_ON(cpu == policy->cpu))
1063                 return 0;
1064
1065         /* Move kobject to the new policy->cpu */
1066         ret = kobject_move(&policy->kobj, &cpu_dev->kobj);
1067         if (ret) {
1068                 pr_err("%s: Failed to move kobj: %d\n", __func__, ret);
1069                 return ret;
1070         }
1071
1072         down_write(&policy->rwsem);
1073         policy->cpu = cpu;
1074         up_write(&policy->rwsem);
1075
1076         return 0;
1077 }
1078
1079 static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
1080 {
1081         unsigned int j, cpu = dev->id;
1082         int ret = -ENOMEM;
1083         struct cpufreq_policy *policy;
1084         unsigned long flags;
1085         bool recover_policy = cpufreq_suspended;
1086
1087         if (cpu_is_offline(cpu))
1088                 return 0;
1089
1090         pr_debug("adding CPU %u\n", cpu);
1091
1092         /* check whether a different CPU already registered this
1093          * CPU because it is in the same boat. */
1094         policy = cpufreq_cpu_get_raw(cpu);
1095         if (unlikely(policy))
1096                 return 0;
1097
1098         /* Check if this cpu was hot-unplugged earlier and has siblings */
1099         read_lock_irqsave(&cpufreq_driver_lock, flags);
1100         for_each_policy(policy) {
1101                 if (cpumask_test_cpu(cpu, policy->related_cpus)) {
1102                         read_unlock_irqrestore(&cpufreq_driver_lock, flags);
1103                         ret = cpufreq_add_policy_cpu(policy, cpu, dev);
1104                         return ret;
1105                 }
1106         }
1107         read_unlock_irqrestore(&cpufreq_driver_lock, flags);
1108
1109         /*
1110          * Restore the saved policy when doing light-weight init and fall back
1111          * to the full init if that fails.
1112          */
1113         policy = recover_policy ? cpufreq_policy_restore(cpu) : NULL;
1114         if (!policy) {
1115                 recover_policy = false;
1116                 policy = cpufreq_policy_alloc();
1117                 if (!policy)
1118                         goto nomem_out;
1119         }
1120
1121         /*
1122          * In the resume path, since we restore a saved policy, the assignment
1123          * to policy->cpu is like an update of the existing policy, rather than
1124          * the creation of a brand new one. So we need to perform this update
1125          * by invoking update_policy_cpu().
1126          */
1127         if (recover_policy && cpu != policy->cpu)
1128                 WARN_ON(update_policy_cpu(policy, cpu, dev));
1129         else
1130                 policy->cpu = cpu;
1131
1132         cpumask_copy(policy->cpus, cpumask_of(cpu));
1133
1134         /* call driver. From then on the cpufreq must be able
1135          * to accept all calls to ->verify and ->setpolicy for this CPU
1136          */
1137         ret = cpufreq_driver->init(policy);
1138         if (ret) {
1139                 pr_debug("initialization failed\n");
1140                 goto err_set_policy_cpu;
1141         }
1142
1143         down_write(&policy->rwsem);
1144
1145         /* related cpus should atleast have policy->cpus */
1146         cpumask_or(policy->related_cpus, policy->related_cpus, policy->cpus);
1147
1148         /*
1149          * affected cpus must always be the one, which are online. We aren't
1150          * managing offline cpus here.
1151          */
1152         cpumask_and(policy->cpus, policy->cpus, cpu_online_mask);
1153
1154         if (!recover_policy) {
1155                 policy->user_policy.min = policy->min;
1156                 policy->user_policy.max = policy->max;
1157
1158                 /* prepare interface data */
1159                 ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq,
1160                                            &dev->kobj, "cpufreq");
1161                 if (ret) {
1162                         pr_err("%s: failed to init policy->kobj: %d\n",
1163                                __func__, ret);
1164                         goto err_init_policy_kobj;
1165                 }
1166         }
1167
1168         write_lock_irqsave(&cpufreq_driver_lock, flags);
1169         for_each_cpu(j, policy->cpus)
1170                 per_cpu(cpufreq_cpu_data, j) = policy;
1171         write_unlock_irqrestore(&cpufreq_driver_lock, flags);
1172
1173         if (cpufreq_driver->get && !cpufreq_driver->setpolicy) {
1174                 policy->cur = cpufreq_driver->get(policy->cpu);
1175                 if (!policy->cur) {
1176                         pr_err("%s: ->get() failed\n", __func__);
1177                         goto err_get_freq;
1178                 }
1179         }
1180
1181         /*
1182          * Sometimes boot loaders set CPU frequency to a value outside of
1183          * frequency table present with cpufreq core. In such cases CPU might be
1184          * unstable if it has to run on that frequency for long duration of time
1185          * and so its better to set it to a frequency which is specified in
1186          * freq-table. This also makes cpufreq stats inconsistent as
1187          * cpufreq-stats would fail to register because current frequency of CPU
1188          * isn't found in freq-table.
1189          *
1190          * Because we don't want this change to effect boot process badly, we go
1191          * for the next freq which is >= policy->cur ('cur' must be set by now,
1192          * otherwise we will end up setting freq to lowest of the table as 'cur'
1193          * is initialized to zero).
1194          *
1195          * We are passing target-freq as "policy->cur - 1" otherwise
1196          * __cpufreq_driver_target() would simply fail, as policy->cur will be
1197          * equal to target-freq.
1198          */
1199         if ((cpufreq_driver->flags & CPUFREQ_NEED_INITIAL_FREQ_CHECK)
1200             && has_target()) {
1201                 /* Are we running at unknown frequency ? */
1202                 ret = cpufreq_frequency_table_get_index(policy, policy->cur);
1203                 if (ret == -EINVAL) {
1204                         /* Warn user and fix it */
1205                         pr_warn("%s: CPU%d: Running at unlisted freq: %u KHz\n",
1206                                 __func__, policy->cpu, policy->cur);
1207                         ret = __cpufreq_driver_target(policy, policy->cur - 1,
1208                                 CPUFREQ_RELATION_L);
1209
1210                         /*
1211                          * Reaching here after boot in a few seconds may not
1212                          * mean that system will remain stable at "unknown"
1213                          * frequency for longer duration. Hence, a BUG_ON().
1214                          */
1215                         BUG_ON(ret);
1216                         pr_warn("%s: CPU%d: Unlisted initial frequency changed to: %u KHz\n",
1217                                 __func__, policy->cpu, policy->cur);
1218                 }
1219         }
1220
1221         blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1222                                      CPUFREQ_START, policy);
1223
1224         if (!recover_policy) {
1225                 ret = cpufreq_add_dev_interface(policy, dev);
1226                 if (ret)
1227                         goto err_out_unregister;
1228                 blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1229                                 CPUFREQ_CREATE_POLICY, policy);
1230         }
1231
1232         write_lock_irqsave(&cpufreq_driver_lock, flags);
1233         list_add(&policy->policy_list, &cpufreq_policy_list);
1234         write_unlock_irqrestore(&cpufreq_driver_lock, flags);
1235
1236         cpufreq_init_policy(policy);
1237
1238         if (!recover_policy) {
1239                 policy->user_policy.policy = policy->policy;
1240                 policy->user_policy.governor = policy->governor;
1241         }
1242         up_write(&policy->rwsem);
1243
1244         kobject_uevent(&policy->kobj, KOBJ_ADD);
1245
1246         /* Callback for handling stuff after policy is ready */
1247         if (cpufreq_driver->ready)
1248                 cpufreq_driver->ready(policy);
1249
1250         pr_debug("initialization complete\n");
1251
1252         return 0;
1253
1254 err_out_unregister:
1255 err_get_freq:
1256         write_lock_irqsave(&cpufreq_driver_lock, flags);
1257         for_each_cpu(j, policy->cpus)
1258                 per_cpu(cpufreq_cpu_data, j) = NULL;
1259         write_unlock_irqrestore(&cpufreq_driver_lock, flags);
1260
1261         if (!recover_policy) {
1262                 kobject_put(&policy->kobj);
1263                 wait_for_completion(&policy->kobj_unregister);
1264         }
1265 err_init_policy_kobj:
1266         up_write(&policy->rwsem);
1267
1268         if (cpufreq_driver->exit)
1269                 cpufreq_driver->exit(policy);
1270 err_set_policy_cpu:
1271         if (recover_policy) {
1272                 /* Do not leave stale fallback data behind. */
1273                 per_cpu(cpufreq_cpu_data_fallback, cpu) = NULL;
1274                 cpufreq_policy_put_kobj(policy);
1275         }
1276         cpufreq_policy_free(policy);
1277
1278 nomem_out:
1279         return ret;
1280 }
1281
1282 /**
1283  * cpufreq_add_dev - add a CPU device
1284  *
1285  * Adds the cpufreq interface for a CPU device.
1286  *
1287  * The Oracle says: try running cpufreq registration/unregistration concurrently
1288  * with with cpu hotplugging and all hell will break loose. Tried to clean this
1289  * mess up, but more thorough testing is needed. - Mathieu
1290  */
1291 static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
1292 {
1293         return __cpufreq_add_dev(dev, sif);
1294 }
1295
1296 static int __cpufreq_remove_dev_prepare(struct device *dev,
1297                                         struct subsys_interface *sif)
1298 {
1299         unsigned int cpu = dev->id, cpus;
1300         int ret;
1301         unsigned long flags;
1302         struct cpufreq_policy *policy;
1303
1304         pr_debug("%s: unregistering CPU %u\n", __func__, cpu);
1305
1306         write_lock_irqsave(&cpufreq_driver_lock, flags);
1307
1308         policy = per_cpu(cpufreq_cpu_data, cpu);
1309
1310         /* Save the policy somewhere when doing a light-weight tear-down */
1311         if (cpufreq_suspended)
1312                 per_cpu(cpufreq_cpu_data_fallback, cpu) = policy;
1313
1314         write_unlock_irqrestore(&cpufreq_driver_lock, flags);
1315
1316         if (!policy) {
1317                 pr_debug("%s: No cpu_data found\n", __func__);
1318                 return -EINVAL;
1319         }
1320
1321         if (has_target()) {
1322                 ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP);
1323                 if (ret) {
1324                         pr_err("%s: Failed to stop governor\n", __func__);
1325                         return ret;
1326                 }
1327
1328                 strncpy(per_cpu(cpufreq_cpu_governor, cpu),
1329                         policy->governor->name, CPUFREQ_NAME_LEN);
1330         }
1331
1332         down_read(&policy->rwsem);
1333         cpus = cpumask_weight(policy->cpus);
1334         up_read(&policy->rwsem);
1335
1336         if (cpu != policy->cpu) {
1337                 sysfs_remove_link(&dev->kobj, "cpufreq");
1338         } else if (cpus > 1) {
1339                 /* Nominate new CPU */
1340                 int new_cpu = cpumask_any_but(policy->cpus, cpu);
1341                 struct device *cpu_dev = get_cpu_device(new_cpu);
1342
1343                 sysfs_remove_link(&cpu_dev->kobj, "cpufreq");
1344                 ret = update_policy_cpu(policy, new_cpu, cpu_dev);
1345                 if (ret) {
1346                         if (sysfs_create_link(&cpu_dev->kobj, &policy->kobj,
1347                                               "cpufreq"))
1348                                 pr_err("%s: Failed to restore kobj link to cpu:%d\n",
1349                                        __func__, cpu_dev->id);
1350                         return ret;
1351                 }
1352
1353                 if (!cpufreq_suspended)
1354                         pr_debug("%s: policy Kobject moved to cpu: %d from: %d\n",
1355                                  __func__, new_cpu, cpu);
1356         } else if (cpufreq_driver->stop_cpu) {
1357                 cpufreq_driver->stop_cpu(policy);
1358         }
1359
1360         return 0;
1361 }
1362
1363 static int __cpufreq_remove_dev_finish(struct device *dev,
1364                                        struct subsys_interface *sif)
1365 {
1366         unsigned int cpu = dev->id, cpus;
1367         int ret;
1368         unsigned long flags;
1369         struct cpufreq_policy *policy;
1370
1371         write_lock_irqsave(&cpufreq_driver_lock, flags);
1372         policy = per_cpu(cpufreq_cpu_data, cpu);
1373         per_cpu(cpufreq_cpu_data, cpu) = NULL;
1374         write_unlock_irqrestore(&cpufreq_driver_lock, flags);
1375
1376         if (!policy) {
1377                 pr_debug("%s: No cpu_data found\n", __func__);
1378                 return -EINVAL;
1379         }
1380
1381         down_write(&policy->rwsem);
1382         cpus = cpumask_weight(policy->cpus);
1383
1384         if (cpus > 1)
1385                 cpumask_clear_cpu(cpu, policy->cpus);
1386         up_write(&policy->rwsem);
1387
1388         /* If cpu is last user of policy, free policy */
1389         if (cpus == 1) {
1390                 if (has_target()) {
1391                         ret = __cpufreq_governor(policy,
1392                                         CPUFREQ_GOV_POLICY_EXIT);
1393                         if (ret) {
1394                                 pr_err("%s: Failed to exit governor\n",
1395                                        __func__);
1396                                 return ret;
1397                         }
1398                 }
1399
1400                 if (!cpufreq_suspended)
1401                         cpufreq_policy_put_kobj(policy);
1402
1403                 /*
1404                  * Perform the ->exit() even during light-weight tear-down,
1405                  * since this is a core component, and is essential for the
1406                  * subsequent light-weight ->init() to succeed.
1407                  */
1408                 if (cpufreq_driver->exit)
1409                         cpufreq_driver->exit(policy);
1410
1411                 /* Remove policy from list of active policies */
1412                 write_lock_irqsave(&cpufreq_driver_lock, flags);
1413                 list_del(&policy->policy_list);
1414                 write_unlock_irqrestore(&cpufreq_driver_lock, flags);
1415
1416                 if (!cpufreq_suspended)
1417                         cpufreq_policy_free(policy);
1418         } else if (has_target()) {
1419                 ret = __cpufreq_governor(policy, CPUFREQ_GOV_START);
1420                 if (!ret)
1421                         ret = __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
1422
1423                 if (ret) {
1424                         pr_err("%s: Failed to start governor\n", __func__);
1425                         return ret;
1426                 }
1427         }
1428
1429         return 0;
1430 }
1431
1432 /**
1433  * cpufreq_remove_dev - remove a CPU device
1434  *
1435  * Removes the cpufreq interface for a CPU device.
1436  */
1437 static int cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif)
1438 {
1439         unsigned int cpu = dev->id;
1440         int ret;
1441
1442         if (cpu_is_offline(cpu))
1443                 return 0;
1444
1445         ret = __cpufreq_remove_dev_prepare(dev, sif);
1446
1447         if (!ret)
1448                 ret = __cpufreq_remove_dev_finish(dev, sif);
1449
1450         return ret;
1451 }
1452
1453 static void handle_update(struct work_struct *work)
1454 {
1455         struct cpufreq_policy *policy =
1456                 container_of(work, struct cpufreq_policy, update);
1457         unsigned int cpu = policy->cpu;
1458         pr_debug("handle_update for cpu %u called\n", cpu);
1459         cpufreq_update_policy(cpu);
1460 }
1461
1462 /**
1463  *      cpufreq_out_of_sync - If actual and saved CPU frequency differs, we're
1464  *      in deep trouble.
1465  *      @policy: policy managing CPUs
1466  *      @new_freq: CPU frequency the CPU actually runs at
1467  *
1468  *      We adjust to current frequency first, and need to clean up later.
1469  *      So either call to cpufreq_update_policy() or schedule handle_update()).
1470  */
1471 static void cpufreq_out_of_sync(struct cpufreq_policy *policy,
1472                                 unsigned int new_freq)
1473 {
1474         struct cpufreq_freqs freqs;
1475
1476         pr_debug("Warning: CPU frequency out of sync: cpufreq and timing core thinks of %u, is %u kHz\n",
1477                  policy->cur, new_freq);
1478
1479         freqs.old = policy->cur;
1480         freqs.new = new_freq;
1481
1482         cpufreq_freq_transition_begin(policy, &freqs);
1483         cpufreq_freq_transition_end(policy, &freqs, 0);
1484 }
1485
1486 /**
1487  * cpufreq_quick_get - get the CPU frequency (in kHz) from policy->cur
1488  * @cpu: CPU number
1489  *
1490  * This is the last known freq, without actually getting it from the driver.
1491  * Return value will be same as what is shown in scaling_cur_freq in sysfs.
1492  */
1493 unsigned int cpufreq_quick_get(unsigned int cpu)
1494 {
1495         struct cpufreq_policy *policy;
1496         unsigned int ret_freq = 0;
1497
1498         if (cpufreq_driver && cpufreq_driver->setpolicy && cpufreq_driver->get)
1499                 return cpufreq_driver->get(cpu);
1500
1501         policy = cpufreq_cpu_get(cpu);
1502         if (policy) {
1503                 ret_freq = policy->cur;
1504                 cpufreq_cpu_put(policy);
1505         }
1506
1507         return ret_freq;
1508 }
1509 EXPORT_SYMBOL(cpufreq_quick_get);
1510
1511 /**
1512  * cpufreq_quick_get_max - get the max reported CPU frequency for this CPU
1513  * @cpu: CPU number
1514  *
1515  * Just return the max possible frequency for a given CPU.
1516  */
1517 unsigned int cpufreq_quick_get_max(unsigned int cpu)
1518 {
1519         struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
1520         unsigned int ret_freq = 0;
1521
1522         if (policy) {
1523                 ret_freq = policy->max;
1524                 cpufreq_cpu_put(policy);
1525         }
1526
1527         return ret_freq;
1528 }
1529 EXPORT_SYMBOL(cpufreq_quick_get_max);
1530
1531 static unsigned int __cpufreq_get(struct cpufreq_policy *policy)
1532 {
1533         unsigned int ret_freq = 0;
1534
1535         if (!cpufreq_driver->get)
1536                 return ret_freq;
1537
1538         ret_freq = cpufreq_driver->get(policy->cpu);
1539
1540         if (ret_freq && policy->cur &&
1541                 !(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
1542                 /* verify no discrepancy between actual and
1543                                         saved value exists */
1544                 if (unlikely(ret_freq != policy->cur)) {
1545                         cpufreq_out_of_sync(policy, ret_freq);
1546                         schedule_work(&policy->update);
1547                 }
1548         }
1549
1550         return ret_freq;
1551 }
1552
1553 /**
1554  * cpufreq_get - get the current CPU frequency (in kHz)
1555  * @cpu: CPU number
1556  *
1557  * Get the CPU current (static) CPU frequency
1558  */
1559 unsigned int cpufreq_get(unsigned int cpu)
1560 {
1561         struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
1562         unsigned int ret_freq = 0;
1563
1564         if (policy) {
1565                 down_read(&policy->rwsem);
1566                 ret_freq = __cpufreq_get(policy);
1567                 up_read(&policy->rwsem);
1568
1569                 cpufreq_cpu_put(policy);
1570         }
1571
1572         return ret_freq;
1573 }
1574 EXPORT_SYMBOL(cpufreq_get);
1575
1576 static struct subsys_interface cpufreq_interface = {
1577         .name           = "cpufreq",
1578         .subsys         = &cpu_subsys,
1579         .add_dev        = cpufreq_add_dev,
1580         .remove_dev     = cpufreq_remove_dev,
1581 };
1582
1583 /*
1584  * In case platform wants some specific frequency to be configured
1585  * during suspend..
1586  */
1587 int cpufreq_generic_suspend(struct cpufreq_policy *policy)
1588 {
1589         int ret;
1590
1591         if (!policy->suspend_freq) {
1592                 pr_err("%s: suspend_freq can't be zero\n", __func__);
1593                 return -EINVAL;
1594         }
1595
1596         pr_debug("%s: Setting suspend-freq: %u\n", __func__,
1597                         policy->suspend_freq);
1598
1599         ret = __cpufreq_driver_target(policy, policy->suspend_freq,
1600                         CPUFREQ_RELATION_H);
1601         if (ret)
1602                 pr_err("%s: unable to set suspend-freq: %u. err: %d\n",
1603                                 __func__, policy->suspend_freq, ret);
1604
1605         return ret;
1606 }
1607 EXPORT_SYMBOL(cpufreq_generic_suspend);
1608
1609 /**
1610  * cpufreq_suspend() - Suspend CPUFreq governors
1611  *
1612  * Called during system wide Suspend/Hibernate cycles for suspending governors
1613  * as some platforms can't change frequency after this point in suspend cycle.
1614  * Because some of the devices (like: i2c, regulators, etc) they use for
1615  * changing frequency are suspended quickly after this point.
1616  */
1617 void cpufreq_suspend(void)
1618 {
1619         struct cpufreq_policy *policy;
1620
1621         if (!cpufreq_driver)
1622                 return;
1623
1624         if (!has_target())
1625                 goto suspend;
1626
1627         pr_debug("%s: Suspending Governors\n", __func__);
1628
1629         for_each_policy(policy) {
1630                 if (__cpufreq_governor(policy, CPUFREQ_GOV_STOP))
1631                         pr_err("%s: Failed to stop governor for policy: %p\n",
1632                                 __func__, policy);
1633                 else if (cpufreq_driver->suspend
1634                     && cpufreq_driver->suspend(policy))
1635                         pr_err("%s: Failed to suspend driver: %p\n", __func__,
1636                                 policy);
1637         }
1638
1639 suspend:
1640         cpufreq_suspended = true;
1641 }
1642
1643 /**
1644  * cpufreq_resume() - Resume CPUFreq governors
1645  *
1646  * Called during system wide Suspend/Hibernate cycle for resuming governors that
1647  * are suspended with cpufreq_suspend().
1648  */
1649 void cpufreq_resume(void)
1650 {
1651         struct cpufreq_policy *policy;
1652
1653         if (!cpufreq_driver)
1654                 return;
1655
1656         cpufreq_suspended = false;
1657
1658         if (!has_target())
1659                 return;
1660
1661         pr_debug("%s: Resuming Governors\n", __func__);
1662
1663         for_each_policy(policy) {
1664                 if (cpufreq_driver->resume && cpufreq_driver->resume(policy))
1665                         pr_err("%s: Failed to resume driver: %p\n", __func__,
1666                                 policy);
1667                 else if (__cpufreq_governor(policy, CPUFREQ_GOV_START)
1668                     || __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS))
1669                         pr_err("%s: Failed to start governor for policy: %p\n",
1670                                 __func__, policy);
1671         }
1672
1673         /*
1674          * schedule call cpufreq_update_policy() for first-online CPU, as that
1675          * wouldn't be hotplugged-out on suspend. It will verify that the
1676          * current freq is in sync with what we believe it to be.
1677          */
1678         policy = cpufreq_cpu_get_raw(cpumask_first(cpu_online_mask));
1679         if (WARN_ON(!policy))
1680                 return;
1681
1682         schedule_work(&policy->update);
1683 }
1684
1685 /**
1686  *      cpufreq_get_current_driver - return current driver's name
1687  *
1688  *      Return the name string of the currently loaded cpufreq driver
1689  *      or NULL, if none.
1690  */
1691 const char *cpufreq_get_current_driver(void)
1692 {
1693         if (cpufreq_driver)
1694                 return cpufreq_driver->name;
1695
1696         return NULL;
1697 }
1698 EXPORT_SYMBOL_GPL(cpufreq_get_current_driver);
1699
1700 /**
1701  *      cpufreq_get_driver_data - return current driver data
1702  *
1703  *      Return the private data of the currently loaded cpufreq
1704  *      driver, or NULL if no cpufreq driver is loaded.
1705  */
1706 void *cpufreq_get_driver_data(void)
1707 {
1708         if (cpufreq_driver)
1709                 return cpufreq_driver->driver_data;
1710
1711         return NULL;
1712 }
1713 EXPORT_SYMBOL_GPL(cpufreq_get_driver_data);
1714
1715 /*********************************************************************
1716  *                     NOTIFIER LISTS INTERFACE                      *
1717  *********************************************************************/
1718
1719 /**
1720  *      cpufreq_register_notifier - register a driver with cpufreq
1721  *      @nb: notifier function to register
1722  *      @list: CPUFREQ_TRANSITION_NOTIFIER or CPUFREQ_POLICY_NOTIFIER
1723  *
1724  *      Add a driver to one of two lists: either a list of drivers that
1725  *      are notified about clock rate changes (once before and once after
1726  *      the transition), or a list of drivers that are notified about
1727  *      changes in cpufreq policy.
1728  *
1729  *      This function may sleep, and has the same return conditions as
1730  *      blocking_notifier_chain_register.
1731  */
1732 int cpufreq_register_notifier(struct notifier_block *nb, unsigned int list)
1733 {
1734         int ret;
1735
1736         if (cpufreq_disabled())
1737                 return -EINVAL;
1738
1739         WARN_ON(!init_cpufreq_transition_notifier_list_called);
1740
1741         switch (list) {
1742         case CPUFREQ_TRANSITION_NOTIFIER:
1743                 ret = srcu_notifier_chain_register(
1744                                 &cpufreq_transition_notifier_list, nb);
1745                 break;
1746         case CPUFREQ_POLICY_NOTIFIER:
1747                 ret = blocking_notifier_chain_register(
1748                                 &cpufreq_policy_notifier_list, nb);
1749                 break;
1750         default:
1751                 ret = -EINVAL;
1752         }
1753
1754         return ret;
1755 }
1756 EXPORT_SYMBOL(cpufreq_register_notifier);
1757
1758 /**
1759  *      cpufreq_unregister_notifier - unregister a driver with cpufreq
1760  *      @nb: notifier block to be unregistered
1761  *      @list: CPUFREQ_TRANSITION_NOTIFIER or CPUFREQ_POLICY_NOTIFIER
1762  *
1763  *      Remove a driver from the CPU frequency notifier list.
1764  *
1765  *      This function may sleep, and has the same return conditions as
1766  *      blocking_notifier_chain_unregister.
1767  */
1768 int cpufreq_unregister_notifier(struct notifier_block *nb, unsigned int list)
1769 {
1770         int ret;
1771
1772         if (cpufreq_disabled())
1773                 return -EINVAL;
1774
1775         switch (list) {
1776         case CPUFREQ_TRANSITION_NOTIFIER:
1777                 ret = srcu_notifier_chain_unregister(
1778                                 &cpufreq_transition_notifier_list, nb);
1779                 break;
1780         case CPUFREQ_POLICY_NOTIFIER:
1781                 ret = blocking_notifier_chain_unregister(
1782                                 &cpufreq_policy_notifier_list, nb);
1783                 break;
1784         default:
1785                 ret = -EINVAL;
1786         }
1787
1788         return ret;
1789 }
1790 EXPORT_SYMBOL(cpufreq_unregister_notifier);
1791
1792
1793 /*********************************************************************
1794  *                              GOVERNORS                            *
1795  *********************************************************************/
1796
1797 /* Must set freqs->new to intermediate frequency */
1798 static int __target_intermediate(struct cpufreq_policy *policy,
1799                                  struct cpufreq_freqs *freqs, int index)
1800 {
1801         int ret;
1802
1803         freqs->new = cpufreq_driver->get_intermediate(policy, index);
1804
1805         /* We don't need to switch to intermediate freq */
1806         if (!freqs->new)
1807                 return 0;
1808
1809         pr_debug("%s: cpu: %d, switching to intermediate freq: oldfreq: %u, intermediate freq: %u\n",
1810                  __func__, policy->cpu, freqs->old, freqs->new);
1811
1812         cpufreq_freq_transition_begin(policy, freqs);
1813         ret = cpufreq_driver->target_intermediate(policy, index);
1814         cpufreq_freq_transition_end(policy, freqs, ret);
1815
1816         if (ret)
1817                 pr_err("%s: Failed to change to intermediate frequency: %d\n",
1818                        __func__, ret);
1819
1820         return ret;
1821 }
1822
1823 static int __target_index(struct cpufreq_policy *policy,
1824                           struct cpufreq_frequency_table *freq_table, int index)
1825 {
1826         struct cpufreq_freqs freqs = {.old = policy->cur, .flags = 0};
1827         unsigned int intermediate_freq = 0;
1828         int retval = -EINVAL;
1829         bool notify;
1830
1831         notify = !(cpufreq_driver->flags & CPUFREQ_ASYNC_NOTIFICATION);
1832         if (notify) {
1833                 /* Handle switching to intermediate frequency */
1834                 if (cpufreq_driver->get_intermediate) {
1835                         retval = __target_intermediate(policy, &freqs, index);
1836                         if (retval)
1837                                 return retval;
1838
1839                         intermediate_freq = freqs.new;
1840                         /* Set old freq to intermediate */
1841                         if (intermediate_freq)
1842                                 freqs.old = freqs.new;
1843                 }
1844
1845                 freqs.new = freq_table[index].frequency;
1846                 pr_debug("%s: cpu: %d, oldfreq: %u, new freq: %u\n",
1847                          __func__, policy->cpu, freqs.old, freqs.new);
1848
1849                 cpufreq_freq_transition_begin(policy, &freqs);
1850         }
1851
1852         retval = cpufreq_driver->target_index(policy, index);
1853         if (retval)
1854                 pr_err("%s: Failed to change cpu frequency: %d\n", __func__,
1855                        retval);
1856
1857         if (notify) {
1858                 cpufreq_freq_transition_end(policy, &freqs, retval);
1859
1860                 /*
1861                  * Failed after setting to intermediate freq? Driver should have
1862                  * reverted back to initial frequency and so should we. Check
1863                  * here for intermediate_freq instead of get_intermediate, in
1864                  * case we have't switched to intermediate freq at all.
1865                  */
1866                 if (unlikely(retval && intermediate_freq)) {
1867                         freqs.old = intermediate_freq;
1868                         freqs.new = policy->restore_freq;
1869                         cpufreq_freq_transition_begin(policy, &freqs);
1870                         cpufreq_freq_transition_end(policy, &freqs, 0);
1871                 }
1872         }
1873
1874         return retval;
1875 }
1876
1877 int __cpufreq_driver_target(struct cpufreq_policy *policy,
1878                             unsigned int target_freq,
1879                             unsigned int relation)
1880 {
1881         unsigned int old_target_freq = target_freq;
1882         int retval = -EINVAL;
1883
1884         if (cpufreq_disabled())
1885                 return -ENODEV;
1886
1887         /* Make sure that target_freq is within supported range */
1888         if (target_freq > policy->max)
1889                 target_freq = policy->max;
1890         if (target_freq < policy->min)
1891                 target_freq = policy->min;
1892
1893         pr_debug("target for CPU %u: %u kHz, relation %u, requested %u kHz\n",
1894                  policy->cpu, target_freq, relation, old_target_freq);
1895
1896         /*
1897          * This might look like a redundant call as we are checking it again
1898          * after finding index. But it is left intentionally for cases where
1899          * exactly same freq is called again and so we can save on few function
1900          * calls.
1901          */
1902         if (target_freq == policy->cur)
1903                 return 0;
1904
1905         /* Save last value to restore later on errors */
1906         policy->restore_freq = policy->cur;
1907
1908         if (cpufreq_driver->target)
1909                 retval = cpufreq_driver->target(policy, target_freq, relation);
1910         else if (cpufreq_driver->target_index) {
1911                 struct cpufreq_frequency_table *freq_table;
1912                 int index;
1913
1914                 freq_table = cpufreq_frequency_get_table(policy->cpu);
1915                 if (unlikely(!freq_table)) {
1916                         pr_err("%s: Unable to find freq_table\n", __func__);
1917                         goto out;
1918                 }
1919
1920                 retval = cpufreq_frequency_table_target(policy, freq_table,
1921                                 target_freq, relation, &index);
1922                 if (unlikely(retval)) {
1923                         pr_err("%s: Unable to find matching freq\n", __func__);
1924                         goto out;
1925                 }
1926
1927                 if (freq_table[index].frequency == policy->cur) {
1928                         retval = 0;
1929                         goto out;
1930                 }
1931
1932                 retval = __target_index(policy, freq_table, index);
1933         }
1934
1935 out:
1936         return retval;
1937 }
1938 EXPORT_SYMBOL_GPL(__cpufreq_driver_target);
1939
1940 int cpufreq_driver_target(struct cpufreq_policy *policy,
1941                           unsigned int target_freq,
1942                           unsigned int relation)
1943 {
1944         int ret = -EINVAL;
1945
1946         down_write(&policy->rwsem);
1947
1948         ret = __cpufreq_driver_target(policy, target_freq, relation);
1949
1950         up_write(&policy->rwsem);
1951
1952         return ret;
1953 }
1954 EXPORT_SYMBOL_GPL(cpufreq_driver_target);
1955
1956 static int __cpufreq_governor(struct cpufreq_policy *policy,
1957                                         unsigned int event)
1958 {
1959         int ret;
1960
1961         /* Only must be defined when default governor is known to have latency
1962            restrictions, like e.g. conservative or ondemand.
1963            That this is the case is already ensured in Kconfig
1964         */
1965 #ifdef CONFIG_CPU_FREQ_GOV_PERFORMANCE
1966         struct cpufreq_governor *gov = &cpufreq_gov_performance;
1967 #else
1968         struct cpufreq_governor *gov = NULL;
1969 #endif
1970
1971         /* Don't start any governor operations if we are entering suspend */
1972         if (cpufreq_suspended)
1973                 return 0;
1974         /*
1975          * Governor might not be initiated here if ACPI _PPC changed
1976          * notification happened, so check it.
1977          */
1978         if (!policy->governor)
1979                 return -EINVAL;
1980
1981         if (policy->governor->max_transition_latency &&
1982             policy->cpuinfo.transition_latency >
1983             policy->governor->max_transition_latency) {
1984                 if (!gov)
1985                         return -EINVAL;
1986                 else {
1987                         pr_warn("%s governor failed, too long transition latency of HW, fallback to %s governor\n",
1988                                 policy->governor->name, gov->name);
1989                         policy->governor = gov;
1990                 }
1991         }
1992
1993         if (event == CPUFREQ_GOV_POLICY_INIT)
1994                 if (!try_module_get(policy->governor->owner))
1995                         return -EINVAL;
1996
1997         pr_debug("__cpufreq_governor for CPU %u, event %u\n",
1998                  policy->cpu, event);
1999
2000         mutex_lock(&cpufreq_governor_lock);
2001         if ((policy->governor_enabled && event == CPUFREQ_GOV_START)
2002             || (!policy->governor_enabled
2003             && (event == CPUFREQ_GOV_LIMITS || event == CPUFREQ_GOV_STOP))) {
2004                 mutex_unlock(&cpufreq_governor_lock);
2005                 return -EBUSY;
2006         }
2007
2008         if (event == CPUFREQ_GOV_STOP)
2009                 policy->governor_enabled = false;
2010         else if (event == CPUFREQ_GOV_START)
2011                 policy->governor_enabled = true;
2012
2013         mutex_unlock(&cpufreq_governor_lock);
2014
2015         ret = policy->governor->governor(policy, event);
2016
2017         if (!ret) {
2018                 if (event == CPUFREQ_GOV_POLICY_INIT)
2019                         policy->governor->initialized++;
2020                 else if (event == CPUFREQ_GOV_POLICY_EXIT)
2021                         policy->governor->initialized--;
2022         } else {
2023                 /* Restore original values */
2024                 mutex_lock(&cpufreq_governor_lock);
2025                 if (event == CPUFREQ_GOV_STOP)
2026                         policy->governor_enabled = true;
2027                 else if (event == CPUFREQ_GOV_START)
2028                         policy->governor_enabled = false;
2029                 mutex_unlock(&cpufreq_governor_lock);
2030         }
2031
2032         if (((event == CPUFREQ_GOV_POLICY_INIT) && ret) ||
2033                         ((event == CPUFREQ_GOV_POLICY_EXIT) && !ret))
2034                 module_put(policy->governor->owner);
2035
2036         return ret;
2037 }
2038
2039 int cpufreq_register_governor(struct cpufreq_governor *governor)
2040 {
2041         int err;
2042
2043         if (!governor)
2044                 return -EINVAL;
2045
2046         if (cpufreq_disabled())
2047                 return -ENODEV;
2048
2049         mutex_lock(&cpufreq_governor_mutex);
2050
2051         governor->initialized = 0;
2052         err = -EBUSY;
2053         if (!find_governor(governor->name)) {
2054                 err = 0;
2055                 list_add(&governor->governor_list, &cpufreq_governor_list);
2056         }
2057
2058         mutex_unlock(&cpufreq_governor_mutex);
2059         return err;
2060 }
2061 EXPORT_SYMBOL_GPL(cpufreq_register_governor);
2062
2063 void cpufreq_unregister_governor(struct cpufreq_governor *governor)
2064 {
2065         int cpu;
2066
2067         if (!governor)
2068                 return;
2069
2070         if (cpufreq_disabled())
2071                 return;
2072
2073         for_each_present_cpu(cpu) {
2074                 if (cpu_online(cpu))
2075                         continue;
2076                 if (!strcmp(per_cpu(cpufreq_cpu_governor, cpu), governor->name))
2077                         strcpy(per_cpu(cpufreq_cpu_governor, cpu), "\0");
2078         }
2079
2080         mutex_lock(&cpufreq_governor_mutex);
2081         list_del(&governor->governor_list);
2082         mutex_unlock(&cpufreq_governor_mutex);
2083         return;
2084 }
2085 EXPORT_SYMBOL_GPL(cpufreq_unregister_governor);
2086
2087
2088 /*********************************************************************
2089  *                          POLICY INTERFACE                         *
2090  *********************************************************************/
2091
2092 /**
2093  * cpufreq_get_policy - get the current cpufreq_policy
2094  * @policy: struct cpufreq_policy into which the current cpufreq_policy
2095  *      is written
2096  *
2097  * Reads the current cpufreq policy.
2098  */
2099 int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu)
2100 {
2101         struct cpufreq_policy *cpu_policy;
2102         if (!policy)
2103                 return -EINVAL;
2104
2105         cpu_policy = cpufreq_cpu_get(cpu);
2106         if (!cpu_policy)
2107                 return -EINVAL;
2108
2109         memcpy(policy, cpu_policy, sizeof(*policy));
2110
2111         cpufreq_cpu_put(cpu_policy);
2112         return 0;
2113 }
2114 EXPORT_SYMBOL(cpufreq_get_policy);
2115
2116 /*
2117  * policy : current policy.
2118  * new_policy: policy to be set.
2119  */
2120 static int cpufreq_set_policy(struct cpufreq_policy *policy,
2121                                 struct cpufreq_policy *new_policy)
2122 {
2123         struct cpufreq_governor *old_gov;
2124         int ret;
2125
2126         pr_debug("setting new policy for CPU %u: %u - %u kHz\n",
2127                  new_policy->cpu, new_policy->min, new_policy->max);
2128
2129         memcpy(&new_policy->cpuinfo, &policy->cpuinfo, sizeof(policy->cpuinfo));
2130
2131         if (new_policy->min > policy->max || new_policy->max < policy->min)
2132                 return -EINVAL;
2133
2134         /* verify the cpu speed can be set within this limit */
2135         ret = cpufreq_driver->verify(new_policy);
2136         if (ret)
2137                 return ret;
2138
2139         /* adjust if necessary - all reasons */
2140         blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
2141                         CPUFREQ_ADJUST, new_policy);
2142
2143         /* adjust if necessary - hardware incompatibility*/
2144         blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
2145                         CPUFREQ_INCOMPATIBLE, new_policy);
2146
2147         /*
2148          * verify the cpu speed can be set within this limit, which might be
2149          * different to the first one
2150          */
2151         ret = cpufreq_driver->verify(new_policy);
2152         if (ret)
2153                 return ret;
2154
2155         /* notification of the new policy */
2156         blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
2157                         CPUFREQ_NOTIFY, new_policy);
2158
2159         policy->min = new_policy->min;
2160         policy->max = new_policy->max;
2161
2162         pr_debug("new min and max freqs are %u - %u kHz\n",
2163                  policy->min, policy->max);
2164
2165         if (cpufreq_driver->setpolicy) {
2166                 policy->policy = new_policy->policy;
2167                 pr_debug("setting range\n");
2168                 return cpufreq_driver->setpolicy(new_policy);
2169         }
2170
2171         if (new_policy->governor == policy->governor)
2172                 goto out;
2173
2174         pr_debug("governor switch\n");
2175
2176         /* save old, working values */
2177         old_gov = policy->governor;
2178         /* end old governor */
2179         if (old_gov) {
2180                 __cpufreq_governor(policy, CPUFREQ_GOV_STOP);
2181                 up_write(&policy->rwsem);
2182                 __cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT);
2183                 down_write(&policy->rwsem);
2184         }
2185
2186         /* start new governor */
2187         policy->governor = new_policy->governor;
2188         if (!__cpufreq_governor(policy, CPUFREQ_GOV_POLICY_INIT)) {
2189                 if (!__cpufreq_governor(policy, CPUFREQ_GOV_START))
2190                         goto out;
2191
2192                 up_write(&policy->rwsem);
2193                 __cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT);
2194                 down_write(&policy->rwsem);
2195         }
2196
2197         /* new governor failed, so re-start old one */
2198         pr_debug("starting governor %s failed\n", policy->governor->name);
2199         if (old_gov) {
2200                 policy->governor = old_gov;
2201                 __cpufreq_governor(policy, CPUFREQ_GOV_POLICY_INIT);
2202                 __cpufreq_governor(policy, CPUFREQ_GOV_START);
2203         }
2204
2205         return -EINVAL;
2206
2207  out:
2208         pr_debug("governor: change or update limits\n");
2209         return __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
2210 }
2211
2212 /**
2213  *      cpufreq_update_policy - re-evaluate an existing cpufreq policy
2214  *      @cpu: CPU which shall be re-evaluated
2215  *
2216  *      Useful for policy notifiers which have different necessities
2217  *      at different times.
2218  */
2219 int cpufreq_update_policy(unsigned int cpu)
2220 {
2221         struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
2222         struct cpufreq_policy new_policy;
2223         int ret;
2224
2225         if (!policy)
2226                 return -ENODEV;
2227
2228         down_write(&policy->rwsem);
2229
2230         pr_debug("updating policy for CPU %u\n", cpu);
2231         memcpy(&new_policy, policy, sizeof(*policy));
2232         new_policy.min = policy->user_policy.min;
2233         new_policy.max = policy->user_policy.max;
2234         new_policy.policy = policy->user_policy.policy;
2235         new_policy.governor = policy->user_policy.governor;
2236
2237         /*
2238          * BIOS might change freq behind our back
2239          * -> ask driver for current freq and notify governors about a change
2240          */
2241         if (cpufreq_driver->get && !cpufreq_driver->setpolicy) {
2242                 new_policy.cur = cpufreq_driver->get(cpu);
2243                 if (WARN_ON(!new_policy.cur)) {
2244                         ret = -EIO;
2245                         goto unlock;
2246                 }
2247
2248                 if (!policy->cur) {
2249                         pr_debug("Driver did not initialize current freq\n");
2250                         policy->cur = new_policy.cur;
2251                 } else {
2252                         if (policy->cur != new_policy.cur && has_target())
2253                                 cpufreq_out_of_sync(policy, new_policy.cur);
2254                 }
2255         }
2256
2257         ret = cpufreq_set_policy(policy, &new_policy);
2258
2259 unlock:
2260         up_write(&policy->rwsem);
2261
2262         cpufreq_cpu_put(policy);
2263         return ret;
2264 }
2265 EXPORT_SYMBOL(cpufreq_update_policy);
2266
2267 static int cpufreq_cpu_callback(struct notifier_block *nfb,
2268                                         unsigned long action, void *hcpu)
2269 {
2270         unsigned int cpu = (unsigned long)hcpu;
2271         struct device *dev;
2272
2273         dev = get_cpu_device(cpu);
2274         if (dev) {
2275                 switch (action & ~CPU_TASKS_FROZEN) {
2276                 case CPU_ONLINE:
2277                         __cpufreq_add_dev(dev, NULL);
2278                         break;
2279
2280                 case CPU_DOWN_PREPARE:
2281                         __cpufreq_remove_dev_prepare(dev, NULL);
2282                         break;
2283
2284                 case CPU_POST_DEAD:
2285                         __cpufreq_remove_dev_finish(dev, NULL);
2286                         break;
2287
2288                 case CPU_DOWN_FAILED:
2289                         __cpufreq_add_dev(dev, NULL);
2290                         break;
2291                 }
2292         }
2293         return NOTIFY_OK;
2294 }
2295
2296 static struct notifier_block __refdata cpufreq_cpu_notifier = {
2297         .notifier_call = cpufreq_cpu_callback,
2298 };
2299
2300 /*********************************************************************
2301  *               BOOST                                               *
2302  *********************************************************************/
2303 static int cpufreq_boost_set_sw(int state)
2304 {
2305         struct cpufreq_frequency_table *freq_table;
2306         struct cpufreq_policy *policy;
2307         int ret = -EINVAL;
2308
2309         for_each_policy(policy) {
2310                 freq_table = cpufreq_frequency_get_table(policy->cpu);
2311                 if (freq_table) {
2312                         ret = cpufreq_frequency_table_cpuinfo(policy,
2313                                                         freq_table);
2314                         if (ret) {
2315                                 pr_err("%s: Policy frequency update failed\n",
2316                                        __func__);
2317                                 break;
2318                         }
2319                         policy->user_policy.max = policy->max;
2320                         __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
2321                 }
2322         }
2323
2324         return ret;
2325 }
2326
2327 int cpufreq_boost_trigger_state(int state)
2328 {
2329         unsigned long flags;
2330         int ret = 0;
2331
2332         if (cpufreq_driver->boost_enabled == state)
2333                 return 0;
2334
2335         write_lock_irqsave(&cpufreq_driver_lock, flags);
2336         cpufreq_driver->boost_enabled = state;
2337         write_unlock_irqrestore(&cpufreq_driver_lock, flags);
2338
2339         ret = cpufreq_driver->set_boost(state);
2340         if (ret) {
2341                 write_lock_irqsave(&cpufreq_driver_lock, flags);
2342                 cpufreq_driver->boost_enabled = !state;
2343                 write_unlock_irqrestore(&cpufreq_driver_lock, flags);
2344
2345                 pr_err("%s: Cannot %s BOOST\n",
2346                        __func__, state ? "enable" : "disable");
2347         }
2348
2349         return ret;
2350 }
2351
2352 int cpufreq_boost_supported(void)
2353 {
2354         if (likely(cpufreq_driver))
2355                 return cpufreq_driver->boost_supported;
2356
2357         return 0;
2358 }
2359 EXPORT_SYMBOL_GPL(cpufreq_boost_supported);
2360
2361 int cpufreq_boost_enabled(void)
2362 {
2363         return cpufreq_driver->boost_enabled;
2364 }
2365 EXPORT_SYMBOL_GPL(cpufreq_boost_enabled);
2366
2367 /*********************************************************************
2368  *               REGISTER / UNREGISTER CPUFREQ DRIVER                *
2369  *********************************************************************/
2370
2371 /**
2372  * cpufreq_register_driver - register a CPU Frequency driver
2373  * @driver_data: A struct cpufreq_driver containing the values#
2374  * submitted by the CPU Frequency driver.
2375  *
2376  * Registers a CPU Frequency driver to this core code. This code
2377  * returns zero on success, -EBUSY when another driver got here first
2378  * (and isn't unregistered in the meantime).
2379  *
2380  */
2381 int cpufreq_register_driver(struct cpufreq_driver *driver_data)
2382 {
2383         unsigned long flags;
2384         int ret;
2385
2386         if (cpufreq_disabled())
2387                 return -ENODEV;
2388
2389         if (!driver_data || !driver_data->verify || !driver_data->init ||
2390             !(driver_data->setpolicy || driver_data->target_index ||
2391                     driver_data->target) ||
2392              (driver_data->setpolicy && (driver_data->target_index ||
2393                     driver_data->target)) ||
2394              (!!driver_data->get_intermediate != !!driver_data->target_intermediate))
2395                 return -EINVAL;
2396
2397         pr_debug("trying to register driver %s\n", driver_data->name);
2398
2399         write_lock_irqsave(&cpufreq_driver_lock, flags);
2400         if (cpufreq_driver) {
2401                 write_unlock_irqrestore(&cpufreq_driver_lock, flags);
2402                 return -EEXIST;
2403         }
2404         cpufreq_driver = driver_data;
2405         write_unlock_irqrestore(&cpufreq_driver_lock, flags);
2406
2407         if (driver_data->setpolicy)
2408                 driver_data->flags |= CPUFREQ_CONST_LOOPS;
2409
2410         if (cpufreq_boost_supported()) {
2411                 /*
2412                  * Check if driver provides function to enable boost -
2413                  * if not, use cpufreq_boost_set_sw as default
2414                  */
2415                 if (!cpufreq_driver->set_boost)
2416                         cpufreq_driver->set_boost = cpufreq_boost_set_sw;
2417
2418                 ret = cpufreq_sysfs_create_file(&boost.attr);
2419                 if (ret) {
2420                         pr_err("%s: cannot register global BOOST sysfs file\n",
2421                                __func__);
2422                         goto err_null_driver;
2423                 }
2424         }
2425
2426         ret = subsys_interface_register(&cpufreq_interface);
2427         if (ret)
2428                 goto err_boost_unreg;
2429
2430         if (!(cpufreq_driver->flags & CPUFREQ_STICKY) &&
2431             list_empty(&cpufreq_policy_list)) {
2432                 /* if all ->init() calls failed, unregister */
2433                 pr_debug("%s: No CPU initialized for driver %s\n", __func__,
2434                          driver_data->name);
2435                 goto err_if_unreg;
2436         }
2437
2438         register_hotcpu_notifier(&cpufreq_cpu_notifier);
2439         pr_debug("driver %s up and running\n", driver_data->name);
2440
2441         return 0;
2442 err_if_unreg:
2443         subsys_interface_unregister(&cpufreq_interface);
2444 err_boost_unreg:
2445         if (cpufreq_boost_supported())
2446                 cpufreq_sysfs_remove_file(&boost.attr);
2447 err_null_driver:
2448         write_lock_irqsave(&cpufreq_driver_lock, flags);
2449         cpufreq_driver = NULL;
2450         write_unlock_irqrestore(&cpufreq_driver_lock, flags);
2451         return ret;
2452 }
2453 EXPORT_SYMBOL_GPL(cpufreq_register_driver);
2454
2455 /**
2456  * cpufreq_unregister_driver - unregister the current CPUFreq driver
2457  *
2458  * Unregister the current CPUFreq driver. Only call this if you have
2459  * the right to do so, i.e. if you have succeeded in initialising before!
2460  * Returns zero if successful, and -EINVAL if the cpufreq_driver is
2461  * currently not initialised.
2462  */
2463 int cpufreq_unregister_driver(struct cpufreq_driver *driver)
2464 {
2465         unsigned long flags;
2466
2467         if (!cpufreq_driver || (driver != cpufreq_driver))
2468                 return -EINVAL;
2469
2470         pr_debug("unregistering driver %s\n", driver->name);
2471
2472         /* Protect against concurrent cpu hotplug */
2473         get_online_cpus();
2474         subsys_interface_unregister(&cpufreq_interface);
2475         if (cpufreq_boost_supported())
2476                 cpufreq_sysfs_remove_file(&boost.attr);
2477
2478         unregister_hotcpu_notifier(&cpufreq_cpu_notifier);
2479
2480         write_lock_irqsave(&cpufreq_driver_lock, flags);
2481
2482         cpufreq_driver = NULL;
2483
2484         write_unlock_irqrestore(&cpufreq_driver_lock, flags);
2485         put_online_cpus();
2486
2487         return 0;
2488 }
2489 EXPORT_SYMBOL_GPL(cpufreq_unregister_driver);
2490
2491 /*
2492  * Stop cpufreq at shutdown to make sure it isn't holding any locks
2493  * or mutexes when secondary CPUs are halted.
2494  */
2495 static struct syscore_ops cpufreq_syscore_ops = {
2496         .shutdown = cpufreq_suspend,
2497 };
2498
2499 static int __init cpufreq_core_init(void)
2500 {
2501         if (cpufreq_disabled())
2502                 return -ENODEV;
2503
2504         cpufreq_global_kobject = kobject_create();
2505         BUG_ON(!cpufreq_global_kobject);
2506
2507         register_syscore_ops(&cpufreq_syscore_ops);
2508
2509         return 0;
2510 }
2511 core_initcall(cpufreq_core_init);