Linux Headquarters
[ Register ]
[ About us ] [ Home Page ]

[ Kernel ] [ Documentation ] [ Links ] [ Books ]

Kernel v2.4.1-pre11 /mm/vmscan.c

Filename:/mm/vmscan.c
Lines Added:135
Lines Deleted:246
Also changed in: (Previous) 2.4.1-pre10  2.4.1-pre9  2.4.1-pre8  2.4.1-pre6  2.4.1-pre7  2.4.1-pre4 
(Following) 2.4.1-pre12  2.4.1  2.4.1-ac7  2.4.1-ac8  2.4.1-ac9  2.4.1-ac10 

Location
[  2.4.1-pre11
  [  mm
     o  vmscan.c

Patch

diff -u --recursive --new-file v2.4.0/linux/mm/vmscan.c linux/mm/vmscan.c
--- v2.4.0/linux/mm/vmscan.c   Wed Jan  3 20:45:26 2001
+++ linux/mm/vmscan.c   Mon Jan 15 12:36:49 2001
@@ -35,45 +35,21 @@
  * using a process that no longer actually exists (it might
  * have died while we slept).
  */
-static int try_to_swap_out(struct mm_struct * mm, struct vm_area_struct* vma, unsigned long address, pte_t * page_table, int gfp_ma+
sk)
+static void try_to_swap_out(struct mm_struct * mm, struct vm_area_struct* vma, unsigned long address, pte_t * page_table, struct pa+
ge *page)
 {
    pte_t pte;
    swp_entry_t entry;
-   struct page * page;
-   int onlist;
-
-   pte = *page_table;
-   if (!pte_present(pte))
-      goto out_failed;
-   page = pte_page(pte);
-   if ((!VALID_PAGE(page)) || PageReserved(page))
-      goto out_failed;
-
-   if (!mm->swap_cnt)
-      return 1;
-
-   mm->swap_cnt--;
 
-   onlist = PageActive(page);
    /* Don't look at this pte if it's been accessed recently. */
    if (ptep_test_and_clear_young(page_table)) {
-      age_page_up(page);
-      goto out_failed;
+      page->age += PAGE_AGE_ADV;
+      if (page->age > PAGE_AGE_MAX)
+         page->age = PAGE_AGE_MAX;
+      return;
    }
-   if (!onlist)
-      /* The page is still mapped, so it can't be freeable... */
-      age_page_down_ageonly(page);
-
-   /*
-    * If the page is in active use by us, or if the page
-    * is in active use by others, don't unmap it or
-    * (worse) start unneeded IO.
-    */
-   if (page->age > 0)
-      goto out_failed;
 
    if (TryLockPage(page))
-      goto out_failed;
+      return;
 
    /* From this point on, the odds are that we're going to
     * nuke this pte, so read and clear the pte.  This hook
@@ -87,9 +63,6 @@
     * Is the page already in the swap cache? If so, then
     * we can just drop our reference to it without doing
     * any IO - it's already up-to-date on disk.
-    *
-    * Return 0, as we didn't actually free any real
-    * memory, and we should just continue our scan.
     */
    if (PageSwapCache(page)) {
       entry.val = page->index;
@@ -99,12 +72,12 @@
       swap_duplicate(entry);
       set_pte(page_table, swp_entry_to_pte(entry));
 drop_pte:
-      UnlockPage(page);
       mm->rss--;
-      deactivate_page(page);
+      if (!page->age)
+         deactivate_page(page);
+      UnlockPage(page);
       page_cache_release(page);
-out_failed:
-      return 0;
+      return;
    }
 
    /*
@@ -153,34 +126,20 @@
 out_unlock_restore:
    set_pte(page_table, pte);
    UnlockPage(page);
-   return 0;
+   return;
 }
 
-/*
- * A new implementation of swap_out().  We do not swap complete processes,
- * but only a small number of blocks, before we continue with the next
- * process.  The number of blocks actually swapped is determined on the
- * number of page faults, that this process actually had in the last time,
- * so we won't swap heavily used processes all the time ...
- *
- * Note: the priority argument is a hint on much CPU to waste with the
- *       swap block search, not a hint, of how much blocks to swap with
- *       each process.
- *
- * (C) 1993 Kai Petzke, wpp@marie.physik.tu-berlin.de
- */
-
-static inline int swap_out_pmd(struct mm_struct * mm, struct vm_area_struct * vma, pmd_t *dir, unsigned long address, unsigned long+
 end, int gfp_mask)
+static int swap_out_pmd(struct mm_struct * mm, struct vm_area_struct * vma, pmd_t *dir, unsigned long address, unsigned long end, i+
nt count)
 {
    pte_t * pte;
    unsigned long pmd_end;
 
    if (pmd_none(*dir))
-      return 0;
+      return count;
    if (pmd_bad(*dir)) {
       pmd_ERROR(*dir);
       pmd_clear(dir);
-      return 0;
+      return count;
    }
    
    pte = pte_offset(dir, address);
@@ -190,28 +149,33 @@
       end = pmd_end;
 
    do {
-      int result;
-      mm->swap_address = address + PAGE_SIZE;
-      result = try_to_swap_out(mm, vma, address, pte, gfp_mask);
-      if (result)
-         return result;
+      if (pte_present(*pte)) {
+         struct page *page = pte_page(*pte);
+
+         if (VALID_PAGE(page) && !PageReserved(page)) {
+            try_to_swap_out(mm, vma, address, pte, page);
+            if (!--count)
+               break;
+         }
+      }
       address += PAGE_SIZE;
       pte++;
    } while (address && (address < end));
-   return 0;
+   mm->swap_address = address + PAGE_SIZE;
+   return count;
 }
 
-static inline int swap_out_pgd(struct mm_struct * mm, struct vm_area_struct * vma, pgd_t *dir, unsigned long address, unsigned long+
 end, int gfp_mask)
+static inline int swap_out_pgd(struct mm_struct * mm, struct vm_area_struct * vma, pgd_t *dir, unsigned long address, unsigned long+
 end, int count)
 {
    pmd_t * pmd;
    unsigned long pgd_end;
 
    if (pgd_none(*dir))
-      return 0;
+      return count;
    if (pgd_bad(*dir)) {
       pgd_ERROR(*dir);
       pgd_clear(dir);
-      return 0;
+      return count;
    }
 
    pmd = pmd_offset(dir, address);
@@ -221,23 +185,23 @@
       end = pgd_end;
    
    do {
-      int result = swap_out_pmd(mm, vma, pmd, address, end, gfp_mask);
-      if (result)
-         return result;
+      count = swap_out_pmd(mm, vma, pmd, address, end, count);
+      if (!count)
+         break;
       address = (address + PMD_SIZE) & PMD_MASK;
       pmd++;
    } while (address && (address < end));
-   return 0;
+   return count;
 }
 
-static int swap_out_vma(struct mm_struct * mm, struct vm_area_struct * vma, unsigned long address, int gfp_mask)
+static int swap_out_vma(struct mm_struct * mm, struct vm_area_struct * vma, unsigned long address, int count)
 {
    pgd_t *pgdir;
    unsigned long end;
 
    /* Don't swap out areas which are locked down */
    if (vma->vm_flags & (VM_LOCKED|VM_RESERVED))
-      return 0;
+      return count;
 
    pgdir = pgd_offset(mm, address);
 
@@ -245,18 +209,17 @@
    if (address >= end)
       BUG();
    do {
-      int result = swap_out_pgd(mm, vma, pgdir, address, end, gfp_mask);
-      if (result)
-         return result;
+      count = swap_out_pgd(mm, vma, pgdir, address, end, count);
+      if (!count)
+         break;
       address = (address + PGDIR_SIZE) & PGDIR_MASK;
       pgdir++;
    } while (address && (address < end));
-   return 0;
+   return count;
 }
 
-static int swap_out_mm(struct mm_struct * mm, int gfp_mask)
+static int swap_out_mm(struct mm_struct * mm, int count)
 {
-   int result = 0;
    unsigned long address;
    struct vm_area_struct* vma;
 
@@ -276,8 +239,8 @@
          address = vma->vm_start;
 
       for (;;) {
-         result = swap_out_vma(mm, vma, address, gfp_mask);
-         if (result)
+         count = swap_out_vma(mm, vma, address, count);
+         if (!count)
             goto out_unlock;
          vma = vma->vm_next;
          if (!vma)
@@ -287,94 +250,63 @@
    }
    /* Reset to 0 when we reach the end of address space */
    mm->swap_address = 0;
-   mm->swap_cnt = 0;
 
 out_unlock:
    spin_unlock(&mm->page_table_lock);
-   return result;
+   return !count;
 }
 
 /*
- * Select the task with maximal swap_cnt and try to swap out a page.
  * N.B. This function returns only 0 or 1.  Return values != 1 from
  * the lower level routines result in continued processing.
  */
 #define SWAP_SHIFT 5
 #define SWAP_MIN 8
 
+static inline int swap_amount(struct mm_struct *mm)
+{
+   int nr = mm->rss >> SWAP_SHIFT;
+   return nr < SWAP_MIN ? SWAP_MIN : nr;
+}
+
 static int swap_out(unsigned int priority, int gfp_mask)
 {
    int counter;
-   int __ret = 0;
+   int retval = 0;
+   struct mm_struct *mm = current->mm;
 
-   /* 
-    * We make one or two passes through the task list, indexed by 
-    * assign = {0, 1}:
-    *   Pass 1: select the swappable task with maximal RSS that has
-    *         not yet been swapped out. 
-    *   Pass 2: re-assign rss swap_cnt values, then select as above.
-    *
-    * With this approach, there's no need to remember the last task
-    * swapped out.  If the swap-out fails, we clear swap_cnt so the 
-    * task won't be selected again until all others have been tried.
-    *
-    * Think of swap_cnt as a "shadow rss" - it tells us which process
-    * we want to page out (always try largest first).
-    */
-   counter = (nr_threads << SWAP_SHIFT) >> priority;
-   if (counter < 1)
-      counter = 1;
+   /* Always start by trying to penalize the process that is allocating memory */
+   if (mm)
+      retval = swap_out_mm(mm, swap_amount(mm));
 
-   for (; counter >= 0; counter--) {
+   /* Then, look at the other mm's */
+   counter = mmlist_nr >> priority;
+   do {
       struct list_head *p;
-      unsigned long max_cnt = 0;
-      struct mm_struct *best = NULL;
-      int assign = 0;
-      int found_task = 0;
-   select:
+
       spin_lock(&mmlist_lock);
       p = init_mm.mmlist.next;
-      for (; p != &init_mm.mmlist; p = p->next) {
-         struct mm_struct *mm = list_entry(p, struct mm_struct, mmlist);
-          if (mm->rss <= 0)
-            continue;
-         found_task++;
-         /* Refresh swap_cnt? */
-         if (assign == 1) {
-            mm->swap_cnt = (mm->rss >> SWAP_SHIFT);
-            if (mm->swap_cnt < SWAP_MIN)
-               mm->swap_cnt = SWAP_MIN;
-         }
-         if (mm->swap_cnt > max_cnt) {
-            max_cnt = mm->swap_cnt;
-            best = mm;
-         }
-      }
+      if (p == &init_mm.mmlist)
+         goto empty;
+
+      /* Move it to the back of the queue.. */
+      list_del(p);
+      list_add_tail(p, &init_mm.mmlist);
+      mm = list_entry(p, struct mm_struct, mmlist);
 
-      /* Make sure it doesn't disappear */
-      if (best)
-         atomic_inc(&best->mm_users);
+      /* Make sure the mm doesn't disappear when we drop the lock.. */
+      atomic_inc(&mm->mm_users);
       spin_unlock(&mmlist_lock);
 
-      /*
-       * We have dropped the tasklist_lock, but we
-       * know that "mm" still exists: we are running
-       * with the big kernel lock, and exit_mm()
-       * cannot race with us.
-       */
-      if (!best) {
-         if (!assign && found_task > 0) {
-            assign = 1;
-            goto select;
-         }
-         break;
-      } else {
-         __ret = swap_out_mm(best, gfp_mask);
-         mmput(best);
-         break;
-      }
-   }
-   return __ret;
+      /* Walk about 6% of the address space each time */
+      retval |= swap_out_mm(mm, swap_amount(mm));
+      mmput(mm);
+   } while (--counter >= 0);
+   return retval;
+
+empty:
+   spin_unlock(&mmlist_lock);
+   return 0;
 }
 
 
@@ -540,7 +472,6 @@
        */
       if (PageDirty(page)) {
          int (*writepage)(struct page *) = page->mapping->a_ops->writepage;
-         int result;
 
          if (!writepage)
             goto page_active;
@@ -558,16 +489,12 @@
          page_cache_get(page);
          spin_unlock(&pagemap_lru_lock);
 
-         result = writepage(page);
+         writepage(page);
          page_cache_release(page);
 
          /* And re-start the thing.. */
          spin_lock(&pagemap_lru_lock);
-         if (result != 1)
-            continue;
-         /* writepage refused to do anything */
-         set_page_dirty(page);
-         goto page_active;
+         continue;
       }
 
       /*
@@ -808,6 +735,9 @@
 int inactive_shortage(void)
 {
    int shortage = 0;
+   pg_data_t *pgdat = pgdat_list;
+
+   /* Is the inactive dirty list too small? */
 
    shortage += freepages.high;
    shortage += inactive_target;
@@ -818,7 +748,27 @@
    if (shortage > 0)
       return shortage;
 
-   return 0;
+   /* If not, do we have enough per-zone pages on the inactive list? */
+
+   shortage = 0;
+
+   do {
+      int i;
+      for(i = 0; i < MAX_NR_ZONES; i++) {
+         int zone_shortage;
+         zone_t *zone = pgdat->node_zones+ i;
+
+         zone_shortage = zone->pages_high;
+         zone_shortage -= zone->inactive_dirty_pages;
+         zone_shortage -= zone->inactive_clean_pages;
+         zone_shortage -= zone->free_pages;
+         if (zone_shortage > 0)
+            shortage += zone_shortage;
+      }
+      pgdat = pgdat->node_next;
+   } while (pgdat);
+
+   return shortage;
 }
 
 /*
@@ -833,72 +783,35 @@
  * really care about latency. In that case we don't try
  * to free too many pages.
  */
+#define DEF_PRIORITY (6)
 static int refill_inactive(unsigned int gfp_mask, int user)
 {
-   int priority, count, start_count, made_progress;
+   int count, start_count, maxtry;
 
    count = inactive_shortage() + free_shortage();
    if (user)
       count = (1 << page_cluster);
    start_count = count;
 
-   /* Always trim SLAB caches when memory gets low. */
-   kmem_cache_reap(gfp_mask);
-
-   priority = 6;
+   maxtry = 6;
    do {
-      made_progress = 0;
-
       if (current->need_resched) {
          __set_current_state(TASK_RUNNING);
          schedule();
       }
 
-      while (refill_inactive_scan(priority, 1)) {
-         made_progress = 1;
+      while (refill_inactive_scan(DEF_PRIORITY, 1)) {
          if (--count <= 0)
             goto done;
       }
 
-      /*
-       * don't be too light against the d/i cache since
-          * refill_inactive() almost never fail when there's
-          * really plenty of memory free. 
-       */
-      shrink_dcache_memory(priority, gfp_mask);
-      shrink_icache_memory(priority, gfp_mask);
+      /* If refill_inactive_scan failed, try to page stuff out.. */
+      swap_out(DEF_PRIORITY, gfp_mask);
 
-      /*
-       * Then, try to page stuff out..
-       */
-      while (swap_out(priority, gfp_mask)) {
-         made_progress = 1;
-         if (--count <= 0)
-            goto done;
-      }
-
-      /*
-       * If we either have enough free memory, or if
-       * page_launder() will be able to make enough
-       * free memory, then stop.
-       */
-      if (!inactive_shortage() || !free_shortage())
-         goto done;
-
-      /*
-       * Only switch to a lower "priority" if we
-       * didn't make any useful progress in the
-       * last loop.
-       */
-      if (!made_progress)
-         priority--;
-   } while (priority >= 0);
-
-   /* Always end on a refill_inactive.., may sleep... */
-   while (refill_inactive_scan(0, 1)) {
-      if (--count <= 0)
-         goto done;
-   }
+      if (--maxtry <= 0)
+            return 0;
+      
+   } while (inactive_shortage());
 
 done:
    return (count < start_count);
@@ -922,20 +835,29 @@
 
    /*
     * If needed, we move pages from the active list
-    * to the inactive list. We also "eat" pages from
-    * the inode and dentry cache whenever we do this.
+    * to the inactive list.
     */
-   if (free_shortage() || inactive_shortage()) {
-      shrink_dcache_memory(6, gfp_mask);
-      shrink_icache_memory(6, gfp_mask);
+   if (inactive_shortage())
       ret += refill_inactive(gfp_mask, user);
+
+   /*    
+    * Delete pages from the inode and dentry caches and 
+    * reclaim unused slab cache if memory is low.
+    */
+   if (free_shortage()) {
+      shrink_dcache_memory(DEF_PRIORITY, gfp_mask);
+      shrink_icache_memory(DEF_PRIORITY, gfp_mask);
    } else {
       /*
-       * Reclaim unused slab cache memory.
+       * Illogical, but true. At least for now.
+       *
+       * If we're _not_ under shortage any more, we
+       * reap the caches. Why? Because a noticeable
+       * part of the caches are the buffer-heads, 
+       * which we'll want to keep if under shortage.
        */
       kmem_cache_reap(gfp_mask);
-      ret = 1;
-   }
+   } 
 
    return ret;
 }
@@ -988,13 +910,8 @@
       static int recalc = 0;
 
       /* If needed, try to free some memory. */
-      if (inactive_shortage() || free_shortage()) {
-         int wait = 0;
-         /* Do we need to do some synchronous flushing? */
-         if (waitqueue_active(&kswapd_done))
-            wait = 1;
-         do_try_to_free_pages(GFP_KSWAPD, wait);
-      }
+      if (inactive_shortage() || free_shortage()) 
+         do_try_to_free_pages(GFP_KSWAPD, 0);
 
       /*
        * Do some (very minimal) background scanning. This
@@ -1002,7 +919,7 @@
        * every minute. This clears old referenced bits
        * and moves unused pages to the inactive list.
        */
-      refill_inactive_scan(6, 0);
+      refill_inactive_scan(DEF_PRIORITY, 0);
 
       /* Once a second, recalculate some VM stats. */
       if (time_after(jiffies, recalc + HZ)) {
@@ -1010,11 +927,6 @@
          recalculate_vm_stats();
       }
 
-      /*
-       * Wake up everybody waiting for free memory
-       * and unplug the disk queue.
-       */
-      wake_up_all(&kswapd_done);
       run_task_queue(&tq_disk);
 
       /* 
@@ -1045,33 +957,10 @@
    }
 }
 
-void wakeup_kswapd(int block)
+void wakeup_kswapd(void)
 {
-   DECLARE_WAITQUEUE(wait, current);
-
-   if (current == kswapd_task)
-      return;
-
-   if (!block) {
-      if (waitqueue_active(&kswapd_wait))
-         wake_up(&kswapd_wait);
-      return;
-   }
-
-   /*
-    * Kswapd could wake us up before we get a chance
-    * to sleep, so we have to be very careful here to
-    * prevent SMP races...
-    */
-   __set_current_state(TASK_UNINTERRUPTIBLE);
-   add_wait_queue(&kswapd_done, &wait);
-
-   if (waitqueue_active(&kswapd_wait))
-      wake_up(&kswapd_wait);
-   schedule();
-
-   remove_wait_queue(&kswapd_done, &wait);
-   __set_current_state(TASK_RUNNING);
+   if (current != kswapd_task)
+      wake_up_process(kswapd_task);
 }
 
 /*
@@ -1096,7 +985,7 @@
 /*
  * Kreclaimd will move pages from the inactive_clean list to the
  * free list, in order to keep atomic allocations possible under
- * all circumstances. Even when kswapd is blocked on IO.
+ * all circumstances.
  */
 int kreclaimd(void *unused)
 {


Comments: webmaster (at) linuxhq.com.
Advertising: banners (at) linuxhq.com.
Compilation ©1998-2008 Linux Headquarters, Inc.