Linux Headquarters
[ Register ]
[ About us ] [ Home Page ]

Advertisement
[ Kernel ] [ Documentation ] [ Links ] [ Books ]

Advertisement

Kernel v2.4.13 /mm/swapfile.c

Filename:/mm/swapfile.c
Lines Added:52
Lines Deleted:27
Also changed in: (Previous) 2.4.13-pre6  2.4.13-pre5  2.4.13-pre4  2.4.13-pre3  2.4.12-ac6  2.4.12-ac4 
(Following) 2.4.13-ac1  2.4.13-ac2  2.4.13-ac3  2.4.13-ac4  2.4.13-ac5  2.4.13-ac6 

Location
[  2.4.13
  [  mm
     o  swapfile.c

Patch

diff -u --recursive --new-file v2.4.12/linux/mm/swapfile.c linux/mm/swapfile.c
--- v2.4.12/linux/mm/swapfile.c   Tue Oct  9 17:06:53 2001
+++ linux/mm/swapfile.c   Mon Oct 15 12:09:50 2001
@@ -209,7 +209,7 @@
  * share this swap entry, so be cautious and let do_wp_page work out
  * what to do if a write is requested later.
  */
-/* BKL, mmlist_lock and vma->vm_mm->page_table_lock are held */
+/* mmlist_lock and vma->vm_mm->page_table_lock are held */
 static inline void unuse_pte(struct vm_area_struct * vma, unsigned long address,
    pte_t *dir, swp_entry_t entry, struct page* page)
 {
@@ -225,7 +225,7 @@
    ++vma->vm_mm->rss;
 }
 
-/* BKL, mmlist_lock and vma->vm_mm->page_table_lock are held */
+/* mmlist_lock and vma->vm_mm->page_table_lock are held */
 static inline void unuse_pmd(struct vm_area_struct * vma, pmd_t *dir,
    unsigned long address, unsigned long size, unsigned long offset,
    swp_entry_t entry, struct page* page)
@@ -253,7 +253,7 @@
    } while (address && (address < end));
 }
 
-/* BKL, mmlist_lock and vma->vm_mm->page_table_lock are held */
+/* mmlist_lock and vma->vm_mm->page_table_lock are held */
 static inline void unuse_pgd(struct vm_area_struct * vma, pgd_t *dir,
    unsigned long address, unsigned long size,
    swp_entry_t entry, struct page* page)
@@ -284,7 +284,7 @@
    } while (address && (address < end));
 }
 
-/* BKL, mmlist_lock and vma->vm_mm->page_table_lock are held */
+/* mmlist_lock and vma->vm_mm->page_table_lock are held */
 static void unuse_vma(struct vm_area_struct * vma, pgd_t *pgdir,
          swp_entry_t entry, struct page* page)
 {
@@ -424,10 +424,6 @@
 
       /*
        * Don't hold on to start_mm if it looks like exiting.
-       * Can mmput ever block? if so, then we cannot risk
-       * it between deleting the page from the swap cache,
-       * and completing the search through mms (and cannot
-       * use it to avoid the long hold on mmlist_lock there).
        */
       if (atomic_read(&start_mm->mm_users) == 1) {
          mmput(start_mm);
@@ -436,18 +432,15 @@
       }
 
       /*
-       * Wait for and lock page.  Remove it from swap cache
-       * so try_to_swap_out won't bump swap count.  Mark dirty
-       * so try_to_swap_out will preserve it without us having
-       * to mark any present ptes as dirty: so we can skip
-       * searching processes once swap count has all gone.
+       * Wait for and lock page.  When do_swap_page races with
+       * try_to_unuse, do_swap_page can handle the fault much
+       * faster than try_to_unuse can locate the entry.  This
+       * apparently redundant "wait_on_page" lets try_to_unuse
+       * defer to do_swap_page in such a case - in some tests,
+       * do_swap_page and try_to_unuse repeatedly compete.
        */
+      wait_on_page(page);
       lock_page(page);
-      if (PageSwapCache(page))
-         delete_from_swap_cache(page);
-      SetPageDirty(page);
-      UnlockPage(page);
-      flush_page_to_ram(page);
 
       /*
        * Remove all references to entry, without blocking.
@@ -455,20 +448,22 @@
        * to search, but use it as a reminder to search shmem.
        */
       swcount = *swap_map;
-      if (swcount) {
+      if (swcount > 1) {
+         flush_page_to_ram(page);
          if (start_mm == &init_mm)
             shmem_unuse(entry, page);
          else
             unuse_process(start_mm, entry, page);
       }
-      if (*swap_map) {
+      if (*swap_map > 1) {
          int set_start_mm = (*swap_map >= swcount);
          struct list_head *p = &start_mm->mmlist;
          struct mm_struct *new_start_mm = start_mm;
          struct mm_struct *mm;
 
          spin_lock(&mmlist_lock);
-         while (*swap_map && (p = p->next) != &start_mm->mmlist) {
+         while (*swap_map > 1 &&
+               (p = p->next) != &start_mm->mmlist) {
             mm = list_entry(p, struct mm_struct, mmlist);
             swcount = *swap_map;
             if (mm == &init_mm) {
@@ -486,7 +481,6 @@
          mmput(start_mm);
          start_mm = new_start_mm;
       }
-      page_cache_release(page);
 
       /*
        * How could swap count reach 0x7fff when the maximum
@@ -505,23 +499,52 @@
          swap_list_lock();
          swap_device_lock(si);
          nr_swap_pages++;
-         *swap_map = 0;
+         *swap_map = 1;
          swap_device_unlock(si);
          swap_list_unlock();
          reset_overflow = 1;
       }
 
       /*
+       * If a reference remains (rare), we would like to leave
+       * the page in the swap cache; but try_to_swap_out could
+       * then re-duplicate the entry once we drop page lock,
+       * so we might loop indefinitely; also, that page could
+       * not be swapped out to other storage meanwhile.  So:
+       * delete from cache even if there's another reference,
+       * after ensuring that the data has been saved to disk -
+       * since if the reference remains (rarer), it will be
+       * read from disk into another page.  Splitting into two
+       * pages would be incorrect if swap supported "shared
+       * private" pages, but they are handled by tmpfs files.
+       * Note shmem_unuse already deleted its from swap cache.
+       */
+      swcount = *swap_map;
+      if ((swcount > 0) != PageSwapCache(page))
+         BUG();
+      if ((swcount > 1) && PageDirty(page)) {
+         rw_swap_page(WRITE, page);
+         lock_page(page);
+      }
+      if (PageSwapCache(page))
+         delete_from_swap_cache(page);
+
+      /*
+       * So we could skip searching mms once swap count went
+       * to 1, we did not mark any present ptes as dirty: must
+       * mark page dirty so try_to_swap_out will preserve it.
+       */
+      SetPageDirty(page);
+      UnlockPage(page);
+      page_cache_release(page);
+
+      /*
        * Make sure that we aren't completely killing
        * interactive performance.  Interruptible check on
        * signal_pending() would be nice, but changes the spec?
        */
       if (current->need_resched)
          schedule();
-      else {
-         unlock_kernel();
-         lock_kernel();
-      }
    }
 
    mmput(start_mm);
@@ -577,7 +600,9 @@
    total_swap_pages -= p->pages;
    p->flags = SWP_USED;
    swap_list_unlock();
+   unlock_kernel();
    err = try_to_unuse(type);
+   lock_kernel();
    if (err) {
       /* re-insert swap space back into swap_list */
       swap_list_lock();


Comments: webmaster (at) linuxhq.com.
Advertising: banners (at) linuxhq.com.
Compilation ©1998-2008 Linux Headquarters, Inc.