| Kernel v2.4.13-ac1 /mm/swapfile.c |
|---|
 2.4.13-ac1
 mm
 swapfile.c
diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.vanilla/mm/swapfile.c linux.ac/mm/swapfile.c
--- linux.vanilla/mm/swapfile.c Thu Oct 25 16:26:39 2001
+++ linux.ac/mm/swapfile.c Sun Oct 21 18:54:37 2001
@@ -14,7 +14,6 @@
#include <linux/vmalloc.h>
#include <linux/pagemap.h>
#include <linux/shm.h>
-#include <linux/compiler.h>
#include <asm/pgtable.h>
@@ -32,6 +31,25 @@
struct swap_info_struct swap_info[MAX_SWAPFILES];
+/*
+ * When swap space gets filled up, we will set this flag.
+ * This will make do_swap_page(), in the page fault path,
+ * free swap entries on swapin so we'll reclaim swap space
+ * in order to be able to swap something out.
+ *
+ * At the moment we start reclaiming when swap usage goes
+ * over 80% of swap space.
+ *
+ * XXX: Random numbers, fixme.
+ */
+#define SWAP_FULL_PCT 80
+int vm_swap_full (void)
+{
+ int swap_used = total_swap_pages - nr_swap_pages;
+
+ return swap_used * 100 > total_swap_pages * SWAP_FULL_PCT;
+}
+
#define SWAPFILE_CLUSTER 256
static inline int scan_swap_map(struct swap_info_struct *si)
@@ -209,15 +227,15 @@
* share this swap entry, so be cautious and let do_wp_page work out
* what to do if a write is requested later.
*/
-/* mmlist_lock and vma->vm_mm->page_table_lock are held */
+/* BKL, mmlist_lock and vma->vm_mm->page_table_lock are held */
static inline void unuse_pte(struct vm_area_struct * vma, unsigned long address,
pte_t *dir, swp_entry_t entry, struct page* page)
{
pte_t pte = *dir;
- if (likely(pte_to_swp_entry(pte).val != entry.val))
+ if (pte_to_swp_entry(pte).val != entry.val)
return;
- if (unlikely(pte_none(pte) || pte_present(pte)))
+ if (pte_none(pte) || pte_present(pte))
return;
get_page(page);
set_pte(dir, pte_mkold(mk_pte(page, vma->vm_page_prot)));
@@ -225,7 +243,7 @@
++vma->vm_mm->rss;
}
-/* mmlist_lock and vma->vm_mm->page_table_lock are held */
+/* BKL, mmlist_lock and vma->vm_mm->page_table_lock are held */
static inline void unuse_pmd(struct vm_area_struct * vma, pmd_t *dir,
unsigned long address, unsigned long size, unsigned long offset,
swp_entry_t entry, struct page* page)
@@ -253,7 +271,7 @@
} while (address && (address < end));
}
-/* mmlist_lock and vma->vm_mm->page_table_lock are held */
+/* BKL, mmlist_lock and vma->vm_mm->page_table_lock are held */
static inline void unuse_pgd(struct vm_area_struct * vma, pgd_t *dir,
unsigned long address, unsigned long size,
swp_entry_t entry, struct page* page)
@@ -284,7 +302,7 @@
} while (address && (address < end));
}
-/* mmlist_lock and vma->vm_mm->page_table_lock are held */
+/* BKL, mmlist_lock and vma->vm_mm->page_table_lock are held */
static void unuse_vma(struct vm_area_struct * vma, pgd_t *pgdir,
swp_entry_t entry, struct page* page)
{
@@ -424,6 +442,10 @@
/*
* Don't hold on to start_mm if it looks like exiting.
+ * Can mmput ever block? if so, then we cannot risk
+ * it between deleting the page from the swap cache,
+ * and completing the search through mms (and cannot
+ * use it to avoid the long hold on mmlist_lock there).
*/
if (atomic_read(&start_mm->mm_users) == 1) {
mmput(start_mm);
@@ -432,15 +454,18 @@
}
/*
- * Wait for and lock page. When do_swap_page races with
- * try_to_unuse, do_swap_page can handle the fault much
- * faster than try_to_unuse can locate the entry. This
- * apparently redundant "wait_on_page" lets try_to_unuse
- * defer to do_swap_page in such a case - in some tests,
- * do_swap_page and try_to_unuse repeatedly compete.
+ * Wait for and lock page. Remove it from swap cache
+ * so try_to_swap_out won't bump swap count. Mark dirty
+ * so try_to_swap_out will preserve it without us having
+ * to mark any present ptes as dirty: so we can skip
+ * searching processes once swap count has all gone.
*/
- wait_on_page(page);
lock_page(page);
+ if (PageSwapCache(page))
+ delete_from_swap_cache(page);
+ SetPageDirty(page);
+ UnlockPage(page);
+ flush_page_to_ram(page);
/*
* Remove all references to entry, without blocking.
@@ -448,22 +473,20 @@
* to search, but use it as a reminder to search shmem.
*/
swcount = *swap_map;
- if (swcount > 1) {
- flush_page_to_ram(page);
+ if (swcount) {
if (start_mm == &init_mm)
shmem_unuse(entry, page);
else
unuse_process(start_mm, entry, page);
}
- if (*swap_map > 1) {
+ if (*swap_map) {
int set_start_mm = (*swap_map >= swcount);
struct list_head *p = &start_mm->mmlist;
struct mm_struct *new_start_mm = start_mm;
struct mm_struct *mm;
spin_lock(&mmlist_lock);
- while (*swap_map > 1 &&
- (p = p->next) != &start_mm->mmlist) {
+ while (*swap_map && (p = p->next) != &start_mm->mmlist) {
mm = list_entry(p, struct mm_struct, mmlist);
swcount = *swap_map;
if (mm == &init_mm) {
@@ -481,6 +504,7 @@
mmput(start_mm);
start_mm = new_start_mm;
}
+ page_cache_release(page);
/*
* How could swap count reach 0x7fff when the maximum
@@ -499,52 +523,23 @@
swap_list_lock();
swap_device_lock(si);
nr_swap_pages++;
- *swap_map = 1;
+ *swap_map = 0;
swap_device_unlock(si);
swap_list_unlock();
reset_overflow = 1;
}
/*
- * If a reference remains (rare), we would like to leave
- * the page in the swap cache; but try_to_swap_out could
- * then re-duplicate the entry once we drop page lock,
- * so we might loop indefinitely; also, that page could
- * not be swapped out to other storage meanwhile. So:
- * delete from cache even if there's another reference,
- * after ensuring that the data has been saved to disk -
- * since if the reference remains (rarer), it will be
- * read from disk into another page. Splitting into two
- * pages would be incorrect if swap supported "shared
- * private" pages, but they are handled by tmpfs files.
- * Note shmem_unuse already deleted its from swap cache.
- */
- swcount = *swap_map;
- if ((swcount > 0) != PageSwapCache(page))
- BUG();
- if ((swcount > 1) && PageDirty(page)) {
- rw_swap_page(WRITE, page);
- lock_page(page);
- }
- if (PageSwapCache(page))
- delete_from_swap_cache(page);
-
- /*
- * So we could skip searching mms once swap count went
- * to 1, we did not mark any present ptes as dirty: must
- * mark page dirty so try_to_swap_out will preserve it.
- */
- SetPageDirty(page);
- UnlockPage(page);
- page_cache_release(page);
-
- /*
* Make sure that we aren't completely killing
* interactive performance. Interruptible check on
* signal_pending() would be nice, but changes the spec?
*/
if (current->need_resched)
schedule();
+ else {
+ unlock_kernel();
+ lock_kernel();
+ }
}
mmput(start_mm);
@@ -576,8 +571,14 @@
for (type = swap_list.head; type >= 0; type = swap_info[type].next) {
p = swap_info + type;
if ((p->flags & SWP_WRITEOK) == SWP_WRITEOK) {
- if (p->swap_file == nd.dentry)
- break;
+ if (p->swap_file) {
+ if (p->swap_file == nd.dentry)
+ break;
+ } else {
+ if (S_ISBLK(nd.dentry->d_inode->i_mode)
+ && (p->swap_device == nd.dentry->d_inode->i_rdev))
+ break;
+ }
}
prev = type;
}
@@ -600,9 +601,7 @@
total_swap_pages -= p->pages;
p->flags = SWP_USED;
swap_list_unlock();
- unlock_kernel();
err = try_to_unuse(type);
- lock_kernel();
if (err) {
/* re-insert swap space back into swap_list */
swap_list_lock();
@@ -781,23 +780,28 @@
if (!dev || (blk_size[MAJOR(dev)] &&
!blk_size[MAJOR(dev)][MINOR(dev)]))
goto bad_swap;
+ error = -EBUSY;
+ for (i = 0 ; i < nr_swapfiles ; i++) {
+ if (i == type)
+ continue;
+ if (dev == swap_info[i].swap_device)
+ goto bad_swap;
+ }
swapfilesize = 0;
if (blk_size[MAJOR(dev)])
swapfilesize = blk_size[MAJOR(dev)][MINOR(dev)]
>> (PAGE_SHIFT - 10);
- } else if (S_ISREG(swap_inode->i_mode))
+ } else if (S_ISREG(swap_inode->i_mode)) {
+ error = -EBUSY;
+ for (i = 0 ; i < nr_swapfiles ; i++) {
+ if (i == type || !swap_info[i].swap_file)
+ continue;
+ if (swap_inode == swap_info[i].swap_file->d_inode)
+ goto bad_swap;
+ }
swapfilesize = swap_inode->i_size >> PAGE_SHIFT;
- else
+ } else
goto bad_swap;
-
- error = -EBUSY;
- for (i = 0 ; i < nr_swapfiles ; i++) {
- struct swap_info_struct *q = &swap_info[i];
- if (i == type || !q->swap_file)
- continue;
- if (swap_inode->i_mapping == q->swap_file->d_inode->i_mapping)
- goto bad_swap;
- }
swap_header = (void *) __get_free_page(GFP_USER);
if (!swap_header) {
|