android_kernel_lenovo_1050f/kernel/sched/consolidation.c

944 lines
20 KiB
C

/*
* CPU ConCurrency (CC) is measures the CPU load by averaging
* the number of running tasks. Using CC, the scheduler can
* evaluate the load of CPUs to improve load balance for power
* efficiency without sacrificing performance.
*
* Copyright (C) 2013 Intel, Inc.,
*
* Author: Du, Yuyang <yuyang.du@intel.com>
*
* CPU Workload Consolidation consolidate workload to the smallest
* number of CPUs that are capable of handling it. We measure
* capability of CPU by CC, then compare it with a threshold,
* and finally run the workload on non-shielded CPUs if they are
* predicted capable after the consolidation.
*
* Copyright (C) 2013 Intel, Inc.,
*
* Author: Rudramuni, Vishwesh M <vishwesh.m.rudramuni@intel.com>
* Du, Yuyang <yuyang.du@intel.com>
*
*/
#ifdef CONFIG_CPU_CONCURRENCY
#include "sched.h"
/*
* the sum period of time is 2^26 ns (~64) by default
*/
unsigned long sysctl_concurrency_sum_period = 26UL;
/*
* the number of sum periods, after which the original
* will be reduced/decayed to half
*/
unsigned long sysctl_concurrency_decay_rate = 1UL;
/*
* the contrib period of time is 2^10 (~1us) by default,
* us has better precision than ms, and
* 1024 makes use of faster shift than div
*/
static unsigned long cc_contrib_period = 10UL;
#ifdef CONFIG_WORKLOAD_CONSOLIDATION
/*
* whether we use concurrency to select cpu to run
* the woken up task
*/
static unsigned long wc_wakeup = 1UL;
/*
* concurrency lower than percentage of this number
* is capable of running wakee
*/
static unsigned long wc_wakeup_threshold = 80UL;
/*
* aggressively push the task even it is hot
*/
static unsigned long wc_push_hot_task = 1UL;
#endif
/*
* the concurrency is scaled up for decaying,
* thus, concurrency 1 is effectively 2^cc_resolution (1024),
* which can be halved by 10 half-life periods
*/
static unsigned long cc_resolution = 10UL;
/*
* after this number of half-life periods, even
* (1>>32)-1 (which is sufficiently large) is less than 1
*/
static unsigned long cc_decay_max_pds = 32UL;
static inline unsigned long cc_scale_up(unsigned long c)
{
return c << cc_resolution;
}
static inline unsigned long cc_scale_down(unsigned long c)
{
return c >> cc_resolution;
}
/* from nanoseconds to sum periods */
static inline u64 cc_sum_pds(u64 n)
{
return n >> sysctl_concurrency_sum_period;
}
/* from sum period to timestamp in ns */
static inline u64 cc_timestamp(u64 p)
{
return p << sysctl_concurrency_sum_period;
}
/*
* from nanoseconds to contrib periods, because
* ns so risky that can overflow cc->contrib
*/
static inline u64 cc_contrib_pds(u64 n)
{
return n >> cc_contrib_period;
}
/*
* cc_decay_factor only works for 32bit integer,
* cc_decay_factor_x, x indicates the number of periods
* as half-life (sysctl_concurrency_decay_rate)
*/
static const unsigned long cc_decay_factor_1[] = {
0xFFFFFFFF,
};
static const unsigned long cc_decay_factor_2[] = {
0xFFFFFFFF, 0xB504F333,
};
static const unsigned long cc_decay_factor_4[] = {
0xFFFFFFFF, 0xD744FCCA, 0xB504F333, 0x9837F051,
};
static const unsigned long cc_decay_factor_8[] = {
0xFFFFFFFF, 0xEAC0C6E7, 0xD744FCCA, 0xC5672A11,
0xB504F333, 0xA5FED6A9, 0x9837F051, 0x8B95C1E3,
};
/* by default sysctl_concurrency_decay_rate */
static const unsigned long *cc_decay_factor =
cc_decay_factor_1;
/*
* cc_decayed_sum depends on cc_resolution (fixed 10),
* cc_decayed_sum_x, x indicates the number of periods
* as half-life (sysctl_concurrency_decay_rate)
*/
static const unsigned long cc_decayed_sum_1[] = {
0, 512, 768, 896, 960, 992,
1008, 1016, 1020, 1022, 1023,
};
static const unsigned long cc_decayed_sum_2[] = {
0, 724, 1235, 1597, 1853, 2034, 2162, 2252,
2316, 2361, 2393, 2416, 2432, 2443, 2451,
2457, 2461, 2464, 2466, 2467, 2468, 2469,
};
static const unsigned long cc_decayed_sum_4[] = {
0, 861, 1585, 2193, 2705, 3135, 3497, 3801, 4057,
4272, 4453, 4605, 4733, 4840, 4930, 5006, 5070,
5124, 5169, 5207, 5239, 5266, 5289, 5308, 5324,
5337, 5348, 5358, 5366, 5373, 5379, 5384, 5388,
5391, 5394, 5396, 5398, 5400, 5401, 5402, 5403,
5404, 5405, 5406,
};
static const unsigned long cc_decayed_sum_8[] = {
0, 939, 1800, 2589, 3313, 3977, 4585, 5143,
5655, 6124, 6554, 6949, 7311, 7643, 7947, 8226,
8482, 8717, 8932, 9129, 9310, 9476, 9628, 9767,
9895, 10012, 10120, 10219, 10309, 10392, 10468, 10538,
10602, 10661, 10715, 10764, 10809, 10850, 10888, 10923,
10955, 10984, 11011, 11036, 11059, 11080, 11099, 11116,
11132, 11147, 11160, 11172, 11183, 11193, 11203, 11212,
11220, 11227, 11234, 11240, 11246, 11251, 11256, 11260,
11264, 11268, 11271, 11274, 11277, 11280, 11282, 11284,
11286, 11288, 11290, 11291, 11292, 11293, 11294, 11295,
11296, 11297, 11298, 11299, 11300, 11301, 11302,
};
/* by default sysctl_concurrency_decay_rate */
static const unsigned long *cc_decayed_sum = cc_decayed_sum_1;
/*
* the last index of cc_decayed_sum array
*/
static unsigned long cc_decayed_sum_len =
sizeof(cc_decayed_sum_1) / sizeof(cc_decayed_sum_1[0]) - 1;
/*
* sysctl handler to update decay rate
*/
int concurrency_decay_rate_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
int ret = proc_dointvec(table, write, buffer, lenp, ppos);
if (ret || !write)
return ret;
switch (sysctl_concurrency_decay_rate) {
case 1:
cc_decay_factor = cc_decay_factor_1;
cc_decayed_sum = cc_decayed_sum_1;
cc_decayed_sum_len = sizeof(cc_decayed_sum_1) /
sizeof(cc_decayed_sum_1[0]) - 1;
break;
case 2:
cc_decay_factor = cc_decay_factor_2;
cc_decayed_sum = cc_decayed_sum_2;
cc_decayed_sum_len = sizeof(cc_decayed_sum_2) /
sizeof(cc_decayed_sum_2[0]) - 1;
break;
case 4:
cc_decay_factor = cc_decay_factor_4;
cc_decayed_sum = cc_decayed_sum_4;
cc_decayed_sum_len = sizeof(cc_decayed_sum_4) /
sizeof(cc_decayed_sum_4[0]) - 1;
break;
case 8:
cc_decay_factor = cc_decay_factor_8;
cc_decayed_sum = cc_decayed_sum_8;
cc_decayed_sum_len = sizeof(cc_decayed_sum_8) /
sizeof(cc_decayed_sum_8[0]) - 1;
break;
default:
return -EINVAL;
}
cc_decay_max_pds *= sysctl_concurrency_decay_rate;
return 0;
}
/*
* decay concurrency at some decay rate
*/
static inline u64 decay_cc(u64 cc, u64 periods)
{
u32 periods_l;
if (periods <= 0)
return cc;
if (unlikely(periods >= cc_decay_max_pds))
return 0;
/* now period is not too large */
periods_l = (u32)periods;
if (periods_l >= sysctl_concurrency_decay_rate) {
cc >>= periods_l / sysctl_concurrency_decay_rate;
periods_l %= sysctl_concurrency_decay_rate;
}
if (!periods_l)
return cc;
cc *= cc_decay_factor[periods_l];
return cc >> 32;
}
/*
* add missed periods by predefined constants
*/
static inline u64 cc_missed_pds(u64 periods)
{
if (periods <= 0)
return 0;
if (periods > cc_decayed_sum_len)
periods = cc_decayed_sum_len;
return cc_decayed_sum[periods];
}
/*
* scale up nr_running, because we decay
*/
static inline unsigned long cc_weight(unsigned long nr_running)
{
/*
* scaling factor, this should be tunable
*/
return cc_scale_up(nr_running);
}
static inline void
__update_concurrency(struct rq *rq, u64 now, struct cpu_concurrency_t *cc)
{
u64 sum_pds, sum_pds_s, sum_pds_e;
u64 contrib_pds, ts_contrib, contrib_pds_one;
u64 sum_now;
unsigned long weight;
int updated = 0;
/*
* guarantee contrib_timestamp always >= sum_timestamp,
* and sum_timestamp is at period boundary
*/
if (now <= cc->sum_timestamp) {
cc->sum_timestamp = cc_timestamp(cc_sum_pds(now));
cc->contrib_timestamp = now;
return;
}
weight = cc_weight(cc->nr_running);
/* start and end of sum periods */
sum_pds_s = cc_sum_pds(cc->sum_timestamp);
sum_pds_e = cc_sum_pds(now);
sum_pds = sum_pds_e - sum_pds_s;
/* number of contrib periods in one sum period */
contrib_pds_one = cc_contrib_pds(cc_timestamp(1));
/*
* if we have passed at least one period,
* we need to do four things:
*/
if (sum_pds) {
/* 1) complete the last period */
ts_contrib = cc_timestamp(sum_pds_s + 1);
contrib_pds = cc_contrib_pds(ts_contrib);
contrib_pds -= cc_contrib_pds(cc->contrib_timestamp);
if (likely(contrib_pds))
cc->contrib += weight * contrib_pds;
cc->contrib = div64_u64(cc->contrib, contrib_pds_one);
cc->sum += cc->contrib;
cc->contrib = 0;
/* 2) update/decay them */
cc->sum = decay_cc(cc->sum, sum_pds);
sum_now = decay_cc(cc->sum, sum_pds - 1);
/* 3) compensate missed periods if any */
sum_pds -= 1;
cc->sum += cc->nr_running * cc_missed_pds(sum_pds);
sum_now += cc->nr_running * cc_missed_pds(sum_pds - 1);
updated = 1;
/* 4) update contrib timestamp to period boundary */
ts_contrib = cc_timestamp(sum_pds_e);
cc->sum_timestamp = ts_contrib;
cc->contrib_timestamp = ts_contrib;
}
/* current period */
contrib_pds = cc_contrib_pds(now);
contrib_pds -= cc_contrib_pds(cc->contrib_timestamp);
if (likely(contrib_pds))
cc->contrib += weight * contrib_pds;
/* new nr_running for next update */
cc->nr_running = rq->nr_running;
/*
* we need to account for the current sum period,
* if now has passed 1/2 of sum period, we contribute,
* otherwise, we use the last complete sum period
*/
contrib_pds = cc_contrib_pds(now - cc->sum_timestamp);
if (contrib_pds > contrib_pds_one / 2) {
sum_now = div64_u64(cc->contrib, contrib_pds);
sum_now += cc->sum;
updated = 1;
}
if (updated == 1)
cc->sum_now = sum_now;
cc->contrib_timestamp = now;
}
void init_cpu_concurrency(struct rq *rq)
{
rq->concurrency.sum = 0;
rq->concurrency.sum_now = 0;
rq->concurrency.contrib = 0;
rq->concurrency.nr_running = 0;
rq->concurrency.sum_timestamp = ULLONG_MAX;
rq->concurrency.contrib_timestamp = ULLONG_MAX;
#ifdef CONFIG_WORKLOAD_CONSOLIDATION
rq->concurrency.unload = 0;
#endif
}
/*
* we update cpu concurrency at:
* 1) enqueue task, which increases concurrency
* 2) dequeue task, which decreases concurrency
* 3) periodic scheduler tick, in case no en/dequeue for long
* 4) enter and exit idle (necessary?)
*/
void update_cpu_concurrency(struct rq *rq)
{
/*
* protected under rq->lock
*/
struct cpu_concurrency_t *cc = &rq->concurrency;
u64 now = rq->clock;
__update_concurrency(rq, now, cc);
}
#endif
#ifdef CONFIG_WORKLOAD_CONSOLIDATION
/*
* whether cpu is capable of having more concurrency
*/
static int cpu_cc_capable(int cpu)
{
u64 sum = cpu_rq(cpu)->concurrency.sum_now;
u64 threshold = cc_weight(1);
sum *= 100;
sum *= cpu_rq(cpu)->cpu_power;
threshold *= wc_wakeup_threshold;
threshold <<= SCHED_POWER_SHIFT;
if (sum <= threshold)
return 1;
return 0;
}
/*
* we do not select idle, if the cc of the
* wakee and waker (in this order) is capable
* of handling the wakee task
*/
int workload_consolidation_wakeup(int prev, int target)
{
if (!wc_wakeup) {
if (idle_cpu(target))
return target;
return nr_cpu_ids;
}
if (idle_cpu(prev) || cpu_cc_capable(prev))
return prev;
if (prev != target && (idle_cpu(target) || cpu_cc_capable(target)))
return target;
return nr_cpu_ids;
}
static inline u64 sched_group_cc(struct sched_group *sg)
{
u64 sg_cc = 0;
int i;
for_each_cpu(i, sched_group_cpus(sg))
sg_cc += cpu_rq(i)->concurrency.sum_now *
cpu_rq(i)->cpu_power;
return sg_cc;
}
static inline u64 sched_domain_cc(struct sched_domain *sd)
{
struct sched_group *sg = sd->groups;
u64 sd_cc = 0;
do {
sd_cc += sched_group_cc(sg);
sg = sg->next;
} while (sg != sd->groups);
return sd_cc;
}
static inline struct sched_group *
find_lowest_cc_group(struct sched_group *sg, int span)
{
u64 grp_cc, min = ULLONG_MAX;
struct sched_group *lowest = NULL;
int i;
for (i = 0; i < span; ++i) {
grp_cc = sched_group_cc(sg);
if (grp_cc < min) {
min = grp_cc;
lowest = sg;
}
sg = sg->next;
}
return lowest;
}
static inline u64 __calc_cc_thr(int cpus, unsigned int asym_cc)
{
u64 thr = cpus;
thr *= cc_weight(1);
thr *= asym_cc;
thr <<= SCHED_POWER_SHIFT;
return thr;
}
/*
* can @src_cc of @src_nr cpus be consolidated
* to @dst_cc of @dst_nr cpus
*/
static inline int
__can_consolidate_cc(u64 src_cc, int src_nr, u64 dst_cc, int dst_nr)
{
dst_cc *= dst_nr;
src_nr -= dst_nr;
if (unlikely(src_nr <= 0))
return 0;
src_nr = ilog2(src_nr);
src_nr += dst_nr;
src_cc *= src_nr;
if (src_cc > dst_cc)
return 0;
return 1;
}
/*
* find the group for asymmetric concurrency
* problem to address: traverse sd from top to down
*/
struct sched_group *
workload_consolidation_find_group(struct sched_domain *sd,
struct task_struct *p, int this_cpu)
{
int half, sg_weight, ns_half = 0;
struct sched_group *sg;
u64 sd_cc;
half = DIV_ROUND_CLOSEST(sd->total_groups, 2);
sg_weight = sd->groups->group_weight;
sd_cc = sched_domain_cc(sd);
sd_cc *= 100;
while (half) {
int allowed = 0, i;
int cpus = sg_weight * half;
u64 threshold = __calc_cc_thr(cpus,
sd->asym_concurrency);
/*
* we did not consider the added cc by this
* wakeup (mostly from fork/exec)
*/
if (!__can_consolidate_cc(sd_cc, sd->span_weight,
threshold, cpus))
break;
sg = sd->first_group;
for (i = 0; i < half; ++i) {
/* if it has no cpus allowed */
if (!cpumask_intersects(sched_group_cpus(sg),
tsk_cpus_allowed(p)))
continue;
allowed = 1;
sg = sg->next;
}
if (!allowed)
break;
ns_half = half;
half /= 2;
}
if (!ns_half)
return NULL;
if (ns_half == 1)
return sd->first_group;
return find_lowest_cc_group(sd->first_group, ns_half);
}
/*
* top_flag_domain - return top sched_domain containing flag.
* @cpu: the cpu whose highest level of sched domain is to
* be returned.
* @flag: the flag to check for the highest sched_domain
* for the given cpu.
*
* returns the highest sched_domain of a cpu which contains the given flag.
* different from highest_flag_domain in that along the domain upward chain
* domain may or may not contain the flag.
*/
static inline struct sched_domain *top_flag_domain(int cpu, int flag)
{
struct sched_domain *sd, *hsd = NULL;
for_each_domain(cpu, sd) {
if (!(sd->flags & flag))
continue;
hsd = sd;
}
return hsd;
}
/*
* workload_consolidation_cpu_shielded - return whether @cpu is shielded or not
*
* traverse downward the sched_domain tree when the sched_domain contains
* flag SD_ASYM_CONCURRENCY, each sd may have more than two groups, but
* we assume 1) every sched_group has the same weight, 2) every CPU has
* the same computing power
*/
int workload_consolidation_cpu_shielded(int cpu)
{
struct sched_domain *sd;
sd = top_flag_domain(cpu, SD_ASYM_CONCURRENCY);
while (sd) {
int half, sg_weight, this_sg_nr;
u64 sd_cc;
if (!(sd->flags & SD_ASYM_CONCURRENCY)) {
sd = sd->child;
continue;
}
half = DIV_ROUND_CLOSEST(sd->total_groups, 2);
sg_weight = sd->groups->group_weight;
this_sg_nr = sd->group_number;
sd_cc = sched_domain_cc(sd);
sd_cc *= 100;
while (half) {
int cpus = sg_weight * half;
u64 threshold = __calc_cc_thr(cpus,
sd->asym_concurrency);
if (!__can_consolidate_cc(sd_cc, sd->span_weight,
threshold, cpus))
return 0;
if (this_sg_nr >= half)
return 1;
half /= 2;
}
sd = sd->child;
}
return 0;
}
/*
* as of now, we have the following assumption
* 1) every sched_group has the same weight
* 2) every CPU has the same computing power
*/
static inline int __nonshielded_groups(struct sched_domain *sd)
{
int half, sg_weight, ret = 0;
u64 sd_cc;
half = DIV_ROUND_CLOSEST(sd->total_groups, 2);
sg_weight = sd->groups->group_weight;
sd_cc = sched_domain_cc(sd);
sd_cc *= 100;
while (half) {
int cpus = sg_weight * half;
u64 threshold = __calc_cc_thr(cpus,
sd->asym_concurrency);
if (!__can_consolidate_cc(sd_cc, sd->span_weight,
threshold, cpus))
return ret;
ret = half;
half /= 2;
}
return ret;
}
static DEFINE_PER_CPU(struct cpumask, nonshielded_cpumask);
/*
* workload_consolidation_nonshielded_mask - return the nonshielded cpus in the @mask,
* which is unmasked by the shielded cpus
*
* traverse downward the sched_domain tree when the sched_domain contains
* flag SD_ASYM_CONCURRENCY, each sd may have more than two groups
*/
void workload_consolidation_nonshielded_mask(int cpu, struct cpumask *mask)
{
struct sched_domain *sd;
struct cpumask *pcpu_mask = &per_cpu(nonshielded_cpumask, cpu);
int i;
sd = top_flag_domain(cpu, SD_ASYM_CONCURRENCY);
if (!sd)
return;
while (sd) {
struct sched_group *sg;
int this_sg_nr, ns_half;
if (!(sd->flags & SD_ASYM_CONCURRENCY)) {
sd = sd->child;
continue;
}
ns_half = __nonshielded_groups(sd);
if (!ns_half)
break;
cpumask_clear(pcpu_mask);
sg = sd->first_group;
for (i = 0; i < ns_half; ++i) {
cpumask_or(pcpu_mask, pcpu_mask,
sched_group_cpus(sg));
sg = sg->next;
}
cpumask_and(mask, mask, pcpu_mask);
this_sg_nr = sd->group_number;
if (this_sg_nr)
break;
sd = sd->child;
}
}
static int cpu_task_hot(struct task_struct *p, u64 now)
{
s64 delta;
if (p->sched_class != &fair_sched_class)
return 0;
if (unlikely(p->policy == SCHED_IDLE))
return 0;
if (sysctl_sched_migration_cost == -1)
return 1;
if (sysctl_sched_migration_cost == 0)
return 0;
if (wc_push_hot_task)
return 0;
/*
* buddy candidates are cache hot:
*/
if (sched_feat(CACHE_HOT_BUDDY) && this_rq()->nr_running &&
(&p->se == p->se.cfs_rq->next ||
&p->se == p->se.cfs_rq->last)) {
return 1;
}
delta = now - p->se.exec_start;
if (delta < (s64)sysctl_sched_migration_cost)
return 1;
return 0;
}
static int
cpu_move_task(struct task_struct *p, struct rq *src_rq, struct rq *dst_rq)
{
/*
* we do not migrate tasks that are:
* 1) running (obviously), or
* 2) cannot be migrated to this CPU due to cpus_allowed, or
* 3) are cache-hot on their current CPU.
*/
if (!cpumask_test_cpu(dst_rq->cpu, tsk_cpus_allowed(p)))
return 0;
if (task_running(src_rq, p))
return 0;
/*
* aggressive migration if task is cache cold
*/
if (!cpu_task_hot(p, src_rq->clock_task)) {
/*
* move a task
*/
deactivate_task(src_rq, p, 0);
set_task_cpu(p, dst_rq->cpu);
activate_task(dst_rq, p, 0);
check_preempt_curr(dst_rq, p, 0);
return 1;
}
return 0;
}
/*
* __unload_cpu_work is run by src cpu stopper, which pushes running
* tasks off src cpu onto dst cpu
*/
static int __unload_cpu_work(void *data)
{
struct rq *src_rq = data;
int src_cpu = cpu_of(src_rq);
struct cpu_concurrency_t *cc = &src_rq->concurrency;
struct rq *dst_rq = cpu_rq(cc->dst_cpu);
struct list_head *tasks = &src_rq->cfs_tasks;
struct task_struct *p, *n;
int pushed = 0;
int nr_migrate_break = 1;
raw_spin_lock_irq(&src_rq->lock);
/* make sure the requested cpu hasn't gone down in the meantime */
if (unlikely(src_cpu != smp_processor_id() || !cc->unload))
goto out_unlock;
/* Is there any task to move? */
if (src_rq->nr_running <= 1)
goto out_unlock;
double_lock_balance(src_rq, dst_rq);
list_for_each_entry_safe(p, n, tasks, se.group_node) {
if (!cpu_move_task(p, src_rq, dst_rq))
continue;
pushed++;
if (pushed >= nr_migrate_break)
break;
}
double_unlock_balance(src_rq, dst_rq);
out_unlock:
cc->unload = 0;
raw_spin_unlock_irq(&src_rq->lock);
return 0;
}
/*
* unload src_cpu to dst_cpu
*/
static void unload_cpu(int src_cpu, int dst_cpu)
{
unsigned long flags;
struct rq *src_rq = cpu_rq(src_cpu);
struct cpu_concurrency_t *cc = &src_rq->concurrency;
int unload = 0;
raw_spin_lock_irqsave(&src_rq->lock, flags);
if (!cc->unload) {
cc->unload = 1;
cc->dst_cpu = dst_cpu;
unload = 1;
}
raw_spin_unlock_irqrestore(&src_rq->lock, flags);
if (unload)
stop_one_cpu_nowait(src_cpu, __unload_cpu_work, src_rq,
&cc->unload_work);
}
static inline int find_lowest_cc_cpu(struct cpumask *mask)
{
u64 cpu_cc, min = ULLONG_MAX;
int i, lowest = nr_cpu_ids;
struct rq *rq;
for_each_cpu(i, mask) {
rq = cpu_rq(i);
cpu_cc = rq->concurrency.sum_now * rq->cpu_power;
if (cpu_cc < min) {
min = cpu_cc;
lowest = i;
}
}
return lowest;
}
/*
* find the lowest cc cpu in shielded and nonshielded cpus,
* aggressively unload the shielded to the nonshielded
*/
void workload_consolidation_unload(struct cpumask *nonshielded)
{
int src_cpu = nr_cpu_ids, dst_cpu, i;
u64 cpu_cc, min = ULLONG_MAX;
struct rq *rq;
for_each_cpu_not(i, nonshielded) {
if (i >= nr_cpu_ids)
break;
rq = cpu_rq(i);
if (rq->nr_running <= 0)
continue;
cpu_cc = rq->concurrency.sum_now * rq->cpu_power;
if (cpu_cc < min) {
min = cpu_cc;
src_cpu = i;
}
}
if (src_cpu >= nr_cpu_ids)
return;
dst_cpu = find_lowest_cc_cpu(nonshielded);
if (dst_cpu >= nr_cpu_ids)
return;
if (src_cpu != dst_cpu)
unload_cpu(src_cpu, dst_cpu);
}
#endif /* CONFIG_WORKLOAD_CONSOLIDATION */