Linux Headquarters
[ Register ]
[ About us ] [ Home Page ]

Advertisement
[ Kernel ] [ Documentation ] [ Links ] [ Books ]

Advertisement

Kernel v2.6.24 /mm/vmscan.c

Filename:/mm/vmscan.c
Lines Added:77
Lines Deleted:26
Also changed in: (Previous) 2.6.24-rc8  2.6.24-rc7  2.6.24-rc6  2.6.24-rc5  2.6.24-rc4  2.6.24-rc3 
(Following) 2.6.24-git17  2.6.24-git18  2.6.24-git19  2.6.24-git20  2.6.24-git21  2.6.24-git22 

Location
[  2.6.24
  [  mm
     o  vmscan.c

Patch

diff --git a/mm/vmscan.c b/mm/vmscan.c
index a6e65d0..e5a9597 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -141,7 +141,7 @@ EXPORT_SYMBOL(unregister_shrinker);
  * percentages of the lru and ageable caches.  This should balance the seeks
  * generated by these structures.
  *
- * If the vm encounted mapped pages on the LRU it increase the pressure on
+ * If the vm encountered mapped pages on the LRU it increase the pressure on
  * slab to avoid swapping.
  *
  * We do weird things to avoid (scanned*seeks*entries) overflowing 32 bits.
@@ -932,6 +932,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
       long mapped_ratio;
       long distress;
       long swap_tendency;
+      long imbalance;
 
       if (zone_is_near_oom(zone))
          goto force_reclaim_mapped;
@@ -967,6 +968,46 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
       swap_tendency = mapped_ratio / 2 + distress + sc->swappiness;
 
       /*
+       * If there's huge imbalance between active and inactive
+       * (think active 100 times larger than inactive) we should
+       * become more permissive, or the system will take too much
+       * cpu before it start swapping during memory pressure.
+       * Distress is about avoiding early-oom, this is about
+       * making swappiness graceful despite setting it to low
+       * values.
+       *
+       * Avoid div by zero with nr_inactive+1, and max resulting
+       * value is vm_total_pages.
+       */
+      imbalance  = zone_page_state(zone, NR_ACTIVE);
+      imbalance /= zone_page_state(zone, NR_INACTIVE) + 1;
+
+      /*
+       * Reduce the effect of imbalance if swappiness is low,
+       * this means for a swappiness very low, the imbalance
+       * must be much higher than 100 for this logic to make
+       * the difference.
+       *
+       * Max temporary value is vm_total_pages*100.
+       */
+      imbalance *= (vm_swappiness + 1);
+      imbalance /= 100;
+
+      /*
+       * If not much of the ram is mapped, makes the imbalance
+       * less relevant, it's high priority we refill the inactive
+       * list with mapped pages only in presence of high ratio of
+       * mapped pages.
+       *
+       * Max temporary value is vm_total_pages*100.
+       */
+      imbalance *= mapped_ratio;
+      imbalance /= 100;
+
+      /* apply imbalance feedback to swap_tendency */
+      swap_tendency += imbalance;
+
+      /*
        * Now use this metric to decide whether to start moving mapped
        * memory onto the inactive list.
        */
@@ -1067,8 +1108,6 @@ static unsigned long shrink_zone(int priority, struct zone *zone,
    unsigned long nr_to_scan;
    unsigned long nr_reclaimed = 0;
 
-   atomic_inc(&zone->reclaim_in_progress);
-
    /*
     * Add one to `nr_to_scan' just to make sure that the kernel will
     * slowly sift through the active list.
@@ -1107,8 +1146,6 @@ static unsigned long shrink_zone(int priority, struct zone *zone,
    }
 
    throttle_vm_writeout(sc->gfp_mask);
-
-   atomic_dec(&zone->reclaim_in_progress);
    return nr_reclaimed;
 }
 
@@ -1146,7 +1183,7 @@ static unsigned long shrink_zones(int priority, struct zone **zones,
 
       note_zone_scanning_priority(zone, priority);
 
-      if (zone->all_unreclaimable && priority != DEF_PRIORITY)
+      if (zone_is_all_unreclaimable(zone) && priority != DEF_PRIORITY)
          continue;   /* Let kswapd poll it */
 
       sc->all_unreclaimable = 0;
@@ -1245,7 +1282,7 @@ out:
     */
    if (priority < 0)
       priority = 0;
-   for (i = 0; zones[i] != 0; i++) {
+   for (i = 0; zones[i] != NULL; i++) {
       struct zone *zone = zones[i];
 
       if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
@@ -1327,7 +1364,8 @@ loop_again:
          if (!populated_zone(zone))
             continue;
 
-         if (zone->all_unreclaimable && priority != DEF_PRIORITY)
+         if (zone_is_all_unreclaimable(zone) &&
+             priority != DEF_PRIORITY)
             continue;
 
          if (!zone_watermark_ok(zone, order, zone->pages_high,
@@ -1362,7 +1400,8 @@ loop_again:
          if (!populated_zone(zone))
             continue;
 
-         if (zone->all_unreclaimable && priority != DEF_PRIORITY)
+         if (zone_is_all_unreclaimable(zone) &&
+               priority != DEF_PRIORITY)
             continue;
 
          if (!zone_watermark_ok(zone, order, zone->pages_high,
@@ -1371,18 +1410,25 @@ loop_again:
          temp_priority[i] = priority;
          sc.nr_scanned = 0;
          note_zone_scanning_priority(zone, priority);
-         nr_reclaimed += shrink_zone(priority, zone, &sc);
+         /*
+          * We put equal pressure on every zone, unless one
+          * zone has way too many pages free already.
+          */
+         if (!zone_watermark_ok(zone, order, 8*zone->pages_high,
+                  end_zone, 0))
+            nr_reclaimed += shrink_zone(priority, zone, &sc);
          reclaim_state->reclaimed_slab = 0;
          nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL,
                   lru_pages);
          nr_reclaimed += reclaim_state->reclaimed_slab;
          total_scanned += sc.nr_scanned;
-         if (zone->all_unreclaimable)
+         if (zone_is_all_unreclaimable(zone))
             continue;
          if (nr_slab == 0 && zone->pages_scanned >=
             (zone_page_state(zone, NR_ACTIVE)
             + zone_page_state(zone, NR_INACTIVE)) * 6)
-               zone->all_unreclaimable = 1;
+               zone_set_flag(zone,
+                        ZONE_ALL_UNRECLAIMABLE);
          /*
           * If we've done a decent amount of scanning and
           * the reclaim ratio is low, start doing writepage
@@ -1548,7 +1594,7 @@ static unsigned long shrink_all_zones(unsigned long nr_pages, int prio,
       if (!populated_zone(zone))
          continue;
 
-      if (zone->all_unreclaimable && prio != DEF_PRIORITY)
+      if (zone_is_all_unreclaimable(zone) && prio != DEF_PRIORITY)
          continue;
 
       /* For pass = 0 we don't shrink the active list */
@@ -1688,9 +1734,11 @@ static int __devinit cpu_callback(struct notifier_block *nfb,
 {
    pg_data_t *pgdat;
    cpumask_t mask;
+   int nid;
 
    if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN) {
-      for_each_online_pgdat(pgdat) {
+      for_each_node_state(nid, N_HIGH_MEMORY) {
+         pgdat = NODE_DATA(nid);
          mask = node_to_cpumask(pgdat->node_id);
          if (any_online_cpu(mask) != NR_CPUS)
             /* One of our CPUs online: restore mask */
@@ -1727,7 +1775,7 @@ static int __init kswapd_init(void)
    int nid;
 
    swap_setup();
-   for_each_online_node(nid)
+   for_each_node_state(nid, N_HIGH_MEMORY)
        kswapd_run(nid);
    hotcpu_notifier(cpu_callback, 0);
    return 0;
@@ -1847,8 +1895,8 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
 
 int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
 {
-   cpumask_t mask;
    int node_id;
+   int ret;
 
    /*
     * Zone reclaim reclaims unmapped file backed pages and
@@ -1866,15 +1914,13 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
          <= zone->min_slab_pages)
       return 0;
 
+   if (zone_is_all_unreclaimable(zone))
+      return 0;
+
    /*
-    * Avoid concurrent zone reclaims, do not reclaim in a zone that does
-    * not have reclaimable pages and if we should not delay the allocation
-    * then do not scan.
+    * Do not scan if the allocation should not be delayed.
     */
-   if (!(gfp_mask & __GFP_WAIT) ||
-      zone->all_unreclaimable ||
-      atomic_read(&zone->reclaim_in_progress) > 0 ||
-      (current->flags & PF_MEMALLOC))
+   if (!(gfp_mask & __GFP_WAIT) || (current->flags & PF_MEMALLOC))
          return 0;
 
    /*
@@ -1884,9 +1930,14 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
     * as wide as possible.
     */
    node_id = zone_to_nid(zone);
-   mask = node_to_cpumask(node_id);
-   if (!cpus_empty(mask) && node_id != numa_node_id())
+   if (node_state(node_id, N_CPU) && node_id != numa_node_id())
+      return 0;
+
+   if (zone_test_and_set_flag(zone, ZONE_RECLAIM_LOCKED))
       return 0;
-   return __zone_reclaim(zone, gfp_mask, order);
+   ret = __zone_reclaim(zone, gfp_mask, order);
+   zone_clear_flag(zone, ZONE_RECLAIM_LOCKED);
+
+   return ret;
 }
 #endif


Comments: webmaster (at) linuxhq.com.
Advertising: banners (at) linuxhq.com.
Compilation ©1998-2008 Linux Headquarters, Inc.