Linux Headquarters
[ Register ]
[ About us ] [ Home Page ]

Advertisement
[ Kernel ] [ Documentation ] [ Links ] [ Books ]

Advertisement

Kernel v2.6.25-rc6-git6 /kernel/sched_fair.c

Filename:/kernel/sched_fair.c
Lines Added:118
Lines Deleted:87
Also changed in: (Previous) 2.6.25-rc6-git5  2.6.25-rc6-git4  2.6.25-rc6-git3  2.6.25-rc6  2.6.25-rc5-git7  2.6.25-rc5-git6 
(Following) 2.6.25-rc6-git7  2.6.25-rc6-git8  2.6.25-rc7  2.6.25-rc8  2.6.25-rc9  2.6.25-rc9-git1 

Location
[  2.6.25-rc6-git6
  [  kernel
     o  sched_fair.c

Patch

diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index f2cc590..86a9337 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -73,13 +73,13 @@ unsigned int sysctl_sched_batch_wakeup_granularity = 10000000UL;
 
 /*
  * SCHED_OTHER wake-up granularity.
- * (default: 10 msec * (1 + ilog(ncpus)), units: nanoseconds)
+ * (default: 5 msec * (1 + ilog(ncpus)), units: nanoseconds)
  *
  * This option delays the preemption effects of decoupled workloads
  * and reduces their over-scheduling. Synchronous workloads will still
  * have immediate wakeup/sleep latencies.
  */
-unsigned int sysctl_sched_wakeup_granularity = 10000000UL;
+unsigned int sysctl_sched_wakeup_granularity = 5000000UL;
 
 const_debug unsigned int sysctl_sched_migration_cost = 500000UL;
 
@@ -302,11 +302,6 @@ static u64 __sched_vslice(unsigned long rq_weight, unsigned long nr_running)
    return vslice;
 }
 
-static u64 sched_vslice(struct cfs_rq *cfs_rq)
-{
-   return __sched_vslice(cfs_rq->load.weight, cfs_rq->nr_running);
-}
-
 static u64 sched_vslice_add(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
    return __sched_vslice(cfs_rq->load.weight + se->load.weight,
@@ -504,15 +499,6 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
    } else
       vruntime = cfs_rq->min_vruntime;
 
-   if (sched_feat(TREE_AVG)) {
-      struct sched_entity *last = __pick_last_entity(cfs_rq);
-      if (last) {
-         vruntime += last->vruntime;
-         vruntime >>= 1;
-      }
-   } else if (sched_feat(APPROX_AVG) && cfs_rq->nr_running)
-      vruntime += sched_vslice(cfs_rq)/2;
-
    /*
     * The 'current' period is already promised to the current tasks,
     * however the extra weight of the new task will slow them down a
@@ -556,6 +542,21 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup)
    account_entity_enqueue(cfs_rq, se);
 }
 
+static void update_avg(u64 *avg, u64 sample)
+{
+   s64 diff = sample - *avg;
+   *avg += diff >> 3;
+}
+
+static void update_avg_stats(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+   if (!se->last_wakeup)
+      return;
+
+   update_avg(&se->avg_overlap, se->sum_exec_runtime - se->last_wakeup);
+   se->last_wakeup = 0;
+}
+
 static void
 dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
 {
@@ -566,6 +567,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
 
    update_stats_dequeue(cfs_rq, se);
    if (sleep) {
+      update_avg_stats(cfs_rq, se);
 #ifdef CONFIG_SCHEDSTATS
       if (entity_is_task(se)) {
          struct task_struct *tsk = task_of(se);
@@ -980,96 +982,121 @@ static inline int wake_idle(int cpu, struct task_struct *p)
 #endif
 
 #ifdef CONFIG_SMP
-static int select_task_rq_fair(struct task_struct *p, int sync)
+
+static const struct sched_class fair_sched_class;
+
+static int
+wake_affine(struct rq *rq, struct sched_domain *this_sd, struct rq *this_rq,
+       struct task_struct *p, int prev_cpu, int this_cpu, int sync,
+       int idx, unsigned long load, unsigned long this_load,
+       unsigned int imbalance)
 {
-   int cpu, this_cpu;
-   struct rq *rq;
-   struct sched_domain *sd, *this_sd = NULL;
-   int new_cpu;
+   struct task_struct *curr = this_rq->curr;
+   unsigned long tl = this_load;
+   unsigned long tl_per_task;
+
+   if (!(this_sd->flags & SD_WAKE_AFFINE))
+      return 0;
+
+   /*
+    * If the currently running task will sleep within
+    * a reasonable amount of time then attract this newly
+    * woken task:
+    */
+   if (sync && curr->sched_class == &fair_sched_class) {
+      if (curr->se.avg_overlap < sysctl_sched_migration_cost &&
+            p->se.avg_overlap < sysctl_sched_migration_cost)
+         return 1;
+   }
+
+   schedstat_inc(p, se.nr_wakeups_affine_attempts);
+   tl_per_task = cpu_avg_load_per_task(this_cpu);
+
+   /*
+    * If sync wakeup then subtract the (maximum possible)
+    * effect of the currently running task from the load
+    * of the current CPU:
+    */
+   if (sync)
+      tl -= current->se.load.weight;
+
+   if ((tl <= load && tl + target_load(prev_cpu, idx) <= tl_per_task) ||
+         100*(tl + p->se.load.weight) <= imbalance*load) {
+      /*
+       * This domain has SD_WAKE_AFFINE and
+       * p is cache cold in this domain, and
+       * there is no bad imbalance.
+       */
+      schedstat_inc(this_sd, ttwu_move_affine);
+      schedstat_inc(p, se.nr_wakeups_affine);
 
-   cpu      = task_cpu(p);
-   rq       = task_rq(p);
-   this_cpu = smp_processor_id();
-   new_cpu  = cpu;
+      return 1;
+   }
+   return 0;
+}
 
-   if (cpu == this_cpu)
-      goto out_set_cpu;
+static int select_task_rq_fair(struct task_struct *p, int sync)
+{
+   struct sched_domain *sd, *this_sd = NULL;
+   int prev_cpu, this_cpu, new_cpu;
+   unsigned long load, this_load;
+   struct rq *rq, *this_rq;
+   unsigned int imbalance;
+   int idx;
+
+   prev_cpu   = task_cpu(p);
+   rq      = task_rq(p);
+   this_cpu   = smp_processor_id();
+   this_rq      = cpu_rq(this_cpu);
+   new_cpu      = prev_cpu;
 
+   /*
+    * 'this_sd' is the first domain that both
+    * this_cpu and prev_cpu are present in:
+    */
    for_each_domain(this_cpu, sd) {
-      if (cpu_isset(cpu, sd->span)) {
+      if (cpu_isset(prev_cpu, sd->span)) {
          this_sd = sd;
          break;
       }
    }
 
    if (unlikely(!cpu_isset(this_cpu, p->cpus_allowed)))
-      goto out_set_cpu;
+      goto out;
 
    /*
     * Check for affine wakeup and passive balancing possibilities.
     */
-   if (this_sd) {
-      int idx = this_sd->wake_idx;
-      unsigned int imbalance;
-      unsigned long load, this_load;
-
-      imbalance = 100 + (this_sd->imbalance_pct - 100) / 2;
-
-      load = source_load(cpu, idx);
-      this_load = target_load(this_cpu, idx);
-
-      new_cpu = this_cpu; /* Wake to this CPU if we can */
-
-      if (this_sd->flags & SD_WAKE_AFFINE) {
-         unsigned long tl = this_load;
-         unsigned long tl_per_task;
-
-         /*
-          * Attract cache-cold tasks on sync wakeups:
-          */
-         if (sync && !task_hot(p, rq->clock, this_sd))
-            goto out_set_cpu;
-
-         schedstat_inc(p, se.nr_wakeups_affine_attempts);
-         tl_per_task = cpu_avg_load_per_task(this_cpu);
-
-         /*
-          * If sync wakeup then subtract the (maximum possible)
-          * effect of the currently running task from the load
-          * of the current CPU:
-          */
-         if (sync)
-            tl -= current->se.load.weight;
-
-         if ((tl <= load &&
-            tl + target_load(cpu, idx) <= tl_per_task) ||
-                100*(tl + p->se.load.weight) <= imbalance*load) {
-            /*
-             * This domain has SD_WAKE_AFFINE and
-             * p is cache cold in this domain, and
-             * there is no bad imbalance.
-             */
-            schedstat_inc(this_sd, ttwu_move_affine);
-            schedstat_inc(p, se.nr_wakeups_affine);
-            goto out_set_cpu;
-         }
-      }
+   if (!this_sd)
+      goto out;
 
-      /*
-       * Start passive balancing when half the imbalance_pct
-       * limit is reached.
-       */
-      if (this_sd->flags & SD_WAKE_BALANCE) {
-         if (imbalance*this_load <= 100*load) {
-            schedstat_inc(this_sd, ttwu_move_balance);
-            schedstat_inc(p, se.nr_wakeups_passive);
-            goto out_set_cpu;
-         }
+   idx = this_sd->wake_idx;
+
+   imbalance = 100 + (this_sd->imbalance_pct - 100) / 2;
+
+   load = source_load(prev_cpu, idx);
+   this_load = target_load(this_cpu, idx);
+
+   if (wake_affine(rq, this_sd, this_rq, p, prev_cpu, this_cpu, sync, idx,
+                 load, this_load, imbalance))
+      return this_cpu;
+
+   if (prev_cpu == this_cpu)
+      goto out;
+
+   /*
+    * Start passive balancing when half the imbalance_pct
+    * limit is reached.
+    */
+   if (this_sd->flags & SD_WAKE_BALANCE) {
+      if (imbalance*this_load <= 100*load) {
+         schedstat_inc(this_sd, ttwu_move_balance);
+         schedstat_inc(p, se.nr_wakeups_passive);
+         return this_cpu;
       }
    }
 
-   new_cpu = cpu; /* Could not wake to this_cpu. Wake to cpu instead */
-out_set_cpu:
+out:
    return wake_idle(new_cpu, p);
 }
 #endif /* CONFIG_SMP */
@@ -1092,6 +1119,10 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p)
       return;
    }
 
+   se->last_wakeup = se->sum_exec_runtime;
+   if (unlikely(se == pse))
+      return;
+
    cfs_rq_of(pse)->next = pse;
 
    /*


Comments: webmaster (at) linuxhq.com.
Advertising: banners (at) linuxhq.com.
Compilation ©1998-2008 Linux Headquarters, Inc.