Linux Headquarters
[ Register ]
[ About us ] [ Home Page ]

Advertisement
[ Kernel ] [ Documentation ] [ Links ] [ Books ]

Advertisement

Kernel v2.4.13-ac8 /mm/memory.c

Filename:/mm/memory.c
Lines Added:174
Lines Deleted:162
Also changed in: (Previous) 2.4.13-ac7  2.4.13-ac6  2.4.13-ac5  2.4.13-ac4  2.4.13-ac3  2.4.13-ac1 
(Following) 2.4.14  2.4.15-pre5  2.4.15-pre6  2.4.15-pre7  2.4.15-pre8  2.4.15-pre9 

Location
[  2.4.13-ac8
  [  mm
     o  memory.c

Patch

diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.vanilla/mm/memory.c linux.ac/mm/memory.c
--- linux.vanilla/mm/memory.c   Thu Oct 25 16:26:39 2001
+++ linux.ac/mm/memory.c   Fri Nov  2 14:52:58 2001
@@ -71,27 +71,6 @@
 mem_map_t * mem_map;
 
 /*
- * Called by TLB shootdown 
- */
-void __free_pte(pte_t pte)
-{
-   struct page *page = pte_page(pte);
-   if ((!VALID_PAGE(page)) || PageReserved(page))
-      return;
-   /*
-    * free_page() used to be able to clear swap cache
-    * entries.  We may now have to do it manually.
-    */
-   if (page->mapping) {
-      if (pte_dirty(pte))
-         set_page_dirty(page);
-   }
-      
-   free_page_and_swap_cache(page);
-}
-
-
-/*
  * Note: this doesn't free the actual pages themselves. That
  * has been handled earlier when unmapping all the memory regions.
  */
@@ -125,10 +104,8 @@
    }
    pmd = pmd_offset(dir, 0);
    pgd_clear(dir);
-   for (j = 0; j < PTRS_PER_PMD ; j++) {
-      prefetchw(pmd+j+(PREFETCH_STRIDE/16));
+   for (j = 0; j < PTRS_PER_PMD ; j++)
       free_one_pmd(pmd+j);
-   }
    pmd_free(pmd);
 }
 
@@ -325,7 +302,7 @@
          /* This will eventually call __free_pte on the pte. */
          tlb_remove_page(tlb, ptep, address + offset);
       } else {
-         swap_free(pte_to_swp_entry(pte));
+         free_swap_and_swap_cache(pte_to_swp_entry(pte));
          pte_clear(ptep);
       }
    }
@@ -404,17 +381,16 @@
    spin_unlock(&mm->page_table_lock);
 }
 
-
 /*
  * Do a quick page-table lookup for a single page. 
  */
-static struct page * follow_page(unsigned long address, int write) 
+static struct page * follow_page(struct mm_struct *mm, unsigned long address, int write) 
 {
    pgd_t *pgd;
    pmd_t *pmd;
    pte_t *ptep, pte;
 
-   pgd = pgd_offset(current->mm, address);
+   pgd = pgd_offset(mm, address);
    if (pgd_none(*pgd) || pgd_bad(*pgd))
       goto out;
 
@@ -450,21 +426,70 @@
    return page;
 }
 
+int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start,
+      int len, int write, int force, struct page **pages, struct vm_area_struct **vmas)
+{
+   int i = 0;
+
+   do {
+      struct vm_area_struct *   vma;
+
+      vma = find_extend_vma(mm, start);
+
+      if ( !vma ||
+          (!force &&
+              ((write && (!(vma->vm_flags & VM_WRITE))) ||
+              (!write && (!(vma->vm_flags & VM_READ))) ) )) {
+         if (i) return i;
+         return -EFAULT;
+      }
+
+      spin_lock(&mm->page_table_lock);
+      do {
+         struct page *map;
+         while (!(map = follow_page(mm, start, write))) {
+            spin_unlock(&mm->page_table_lock);
+            switch (handle_mm_fault(mm, vma, start, write)) {
+            case 1:
+               tsk->min_flt++;
+               break;
+            case 2:
+               tsk->maj_flt++;
+               break;
+            case 0:
+               if (i) return i;
+               return -EFAULT;
+            default:
+               if (i) return i;
+               return -ENOMEM;
+            }
+            spin_lock(&mm->page_table_lock);
+         }
+         if (pages) {
+            pages[i] = get_page_map(map);
+            if (pages[i]) get_page(pages[i]);
+         }
+         if (vmas)
+            vmas[i] = vma;
+         i++;
+         start += PAGE_SIZE;
+         len--;
+      } while(len && start < vma->vm_end);
+      spin_unlock(&mm->page_table_lock);
+   } while(len);
+   return i;
+}
+
 /*
  * Force in an entire range of pages from the current process's user VA,
  * and pin them in physical memory.  
  */
-
 #define dprintk(x...)
+
 int map_user_kiobuf(int rw, struct kiobuf *iobuf, unsigned long va, size_t len)
 {
-   unsigned long      ptr, end;
-   int         err;
+   int pgcount, err;
    struct mm_struct *   mm;
-   struct vm_area_struct *   vma = 0;
-   struct page *      map;
-   int         i;
-   int         datain = (rw == READ);
    
    /* Make sure the iobuf is not already mapped somewhere. */
    if (iobuf->nr_pages)
@@ -473,9 +498,10 @@
    mm = current->mm;
    dprintk ("map_user_kiobuf: begin\n");
    
-   ptr = va & PAGE_MASK;
-   end = (va + len + PAGE_SIZE - 1) & PAGE_MASK;
-   err = expand_kiobuf(iobuf, (end - ptr) >> PAGE_SHIFT);
+   pgcount = (va + len + PAGE_SIZE - 1)/PAGE_SIZE - va/PAGE_SIZE;
+   /* mapping 0 bytes is not permitted */
+   if (!pgcount) BUG();
+   err = expand_kiobuf(iobuf, pgcount);
    if (err)
       return err;
 
@@ -483,69 +509,29 @@
 
    err = -EFAULT;
    iobuf->locked = 0;
-   iobuf->offset = va & ~PAGE_MASK;
+   iobuf->offset = va & (PAGE_SIZE-1);
    iobuf->length = len;
-   
-   i = 0;
-   
-   /* 
-    * First of all, try to fault in all of the necessary pages
-    */
-   while (ptr < end) {
-      if (!vma || ptr >= vma->vm_end) {
-         vma = find_vma(current->mm, ptr);
-         if (!vma) 
-            goto out_unlock;
-         if (vma->vm_start > ptr) {
-            if (!(vma->vm_flags & VM_GROWSDOWN))
-               goto out_unlock;
-            if (expand_stack(vma, ptr))
-               goto out_unlock;
-         }
-         if (((datain) && (!(vma->vm_flags & VM_WRITE))) ||
-               (!(vma->vm_flags & VM_READ))) {
-            err = -EACCES;
-            goto out_unlock;
-         }
-      }
-      spin_lock(&mm->page_table_lock);
-      while (!(map = follow_page(ptr, datain))) {
-         int ret;
-
-         spin_unlock(&mm->page_table_lock);
-         ret = handle_mm_fault(current->mm, vma, ptr, datain);
-         if (ret <= 0) {
-            if (!ret)
-               goto out_unlock;
-            else {
-               err = -ENOMEM;
-               goto out_unlock;
-            }
-         }
-         spin_lock(&mm->page_table_lock);
-      }         
-      map = get_page_map(map);
-      if (map) {
-         flush_dcache_page(map);
-         atomic_inc(&map->count);
-      } else
-         printk (KERN_INFO "Mapped page missing [%d]\n", i);
-      spin_unlock(&mm->page_table_lock);
-      iobuf->maplist[i] = map;
-      iobuf->nr_pages = ++i;
-      
-      ptr += PAGE_SIZE;
-   }
 
+   /* Try to fault in all of the necessary pages */
+   down_read(&mm->mmap_sem);
+   /* rw==READ means read from disk, write into memory area */
+   err = get_user_pages(current, mm, va, pgcount,
+         (rw==READ), 0, iobuf->maplist, NULL);
    up_read(&mm->mmap_sem);
+   if (err < 0) {
+      unmap_kiobuf(iobuf);
+      dprintk ("map_user_kiobuf: end %d\n", err);
+      return err;
+   }
+   iobuf->nr_pages = err;
+   while (pgcount--) {
+      /* FIXME: flush superflous for rw==READ,
+       * probably wrong function for rw==WRITE
+       */
+      flush_dcache_page(iobuf->maplist[pgcount]);
+   }
    dprintk ("map_user_kiobuf: end OK\n");
    return 0;
-
- out_unlock:
-   up_read(&mm->mmap_sem);
-   unmap_kiobuf(iobuf);
-   dprintk ("map_user_kiobuf: end %d\n", err);
-   return err;
 }
 
 /*
@@ -595,6 +581,7 @@
       if (map) {
          if (iobuf->locked)
             UnlockPage(map);
+         /* FIXME: cache flush missing for rw==READ*/
          __free_page(map);
       }
    }
@@ -731,6 +718,7 @@
    } while (address && (address < end));
 }
 
+/* mm->page_table_lock must be held */
 static inline int zeromap_pmd_range(struct mm_struct *mm, pmd_t * pmd, unsigned long address,
                                     unsigned long size, pgprot_t prot)
 {
@@ -810,6 +798,7 @@
    } while (address && (address < end));
 }
 
+/* mm->page_table_lock must be held */
 static inline int remap_pmd_range(struct mm_struct *mm, pmd_t * pmd, unsigned long address, unsigned long size,
    unsigned long phys_addr, pgprot_t prot)
 {
@@ -881,9 +870,10 @@
 /*
  * We hold the mm semaphore for reading and vma->vm_mm->page_table_lock
  */
-static inline void break_cow(struct vm_area_struct * vma, struct page * new_page, unsigned long address, 
+static inline void break_cow(struct vm_area_struct * vma, struct page *   old_page, struct page * new_page, unsigned long address, 
       pte_t *page_table)
 {
+   copy_cow_page(old_page,new_page,address);
    flush_page_to_ram(new_page);
    flush_cache_page(vma, address);
    establish_pte(vma, address, page_table, pte_mkwrite(pte_mkdirty(mk_pte(new_page, vma->vm_page_prot))));
@@ -916,7 +906,10 @@
    old_page = pte_page(pte);
    if (!VALID_PAGE(old_page))
       goto bad_wp_page;
-   
+
+   if (old_page == ZERO_PAGE(address))
+      goto copy;
+
    /*
     * We can avoid the copy if:
     * - we're the only user (count == 1)
@@ -939,15 +932,11 @@
          break;
       /* Recheck swapcachedness once the page is locked */
       can_reuse = exclusive_swap_page(old_page);
-      if (can_reuse)
-         delete_from_swap_cache(old_page);
       UnlockPage(old_page);
       if (!can_reuse)
          break;
       /* FallThrough */
    case 1:
-      if (PageReserved(old_page))
-         break;
       flush_cache_page(vma, address);
       establish_pte(vma, address, page_table, pte_mkyoung(pte_mkdirty(pte_mkwrite(pte))));
       return 1;   /* Minor fault */
@@ -956,23 +945,26 @@
    /*
     * Ok, we need to copy. Oh, well..
     */
-   page_cache_get(old_page);
+copy:    
+    set_pte(page_table, pte);
    spin_unlock(&mm->page_table_lock);
-
    new_page = alloc_page(GFP_HIGHUSER);
+   spin_lock(&mm->page_table_lock);
    if (!new_page)
-      goto no_mem;
-   copy_cow_page(old_page,new_page,address);
-   page_cache_release(old_page);
+      return -1;
 
    /*
     * Re-check the pte - we dropped the lock
     */
-   spin_lock(&mm->page_table_lock);
    if (pte_same(*page_table, pte)) {
+      /* We are changing the pte, so get rid of the old
+       * one to avoid races with the hardware, this really
+       * only affects the accessed bit here.
+       */
+      pte = ptep_get_and_clear(page_table);
       if (PageReserved(old_page))
          ++mm->rss;
-      break_cow(vma, new_page, address, page_table);
+      break_cow(vma, old_page, new_page, address, page_table);
 
       /* Free the old page.. */
       new_page = old_page;
@@ -983,10 +975,6 @@
 bad_wp_page:
    printk("do_wp_page: bogus page at address %08lx (page 0x%lx)\n",address,(unsigned long)old_page);
    return -1;
-no_mem:
-   page_cache_release(old_page);
-   spin_lock(&mm->page_table_lock);
-   return -1;
 }
 
 static void vmtruncate_list(struct vm_area_struct *mpnt, unsigned long pgoff)
@@ -1039,6 +1027,7 @@
       goto out_unlock;
 
    pgoff = (offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+
    if (mapping->i_mmap != NULL)
       vmtruncate_list(mapping->i_mmap, pgoff);
    if (mapping->i_mmap_shared != NULL)
@@ -1048,8 +1037,10 @@
    spin_unlock(&mapping->i_shared_lock);
    truncate_inode_pages(mapping, offset);
    goto out_truncate;
-
+   
 do_expand:
+   if (offset > inode->i_sb->s_maxbytes)
+      goto out;
    limit = current->rlim[RLIMIT_FSIZE].rlim_cur;
    if (limit != RLIM_INFINITY) {
       if (inode->i_size >= limit) {
@@ -1062,15 +1053,24 @@
       }
    }
    inode->i_size = offset;
-
+   if (inode->i_op && inode->i_op->truncate)
+   {
+      /* This doesnt scale but it is meant to be a 2.4 invariant */
+      lock_kernel();
+      inode->i_op->truncate(inode);
+      unlock_kernel();
+   }
+   return 0;
+   
 out_truncate:
    if (inode->i_op && inode->i_op->truncate) {
       lock_kernel();
-      inode->i_op->truncate(inode);
+       inode->i_op->truncate(inode);
       unlock_kernel();
    }
-out:
    return 0;
+out:
+   return -EFBIG;
 }
 
 /* 
@@ -1090,10 +1090,6 @@
     */
    num = valid_swaphandles(entry, &offset);
    for (i = 0; i < num; offset++, i++) {
-      /* Don't block on I/O for read-ahead */
-      if (atomic_read(&nr_async_pages) >=
-          pager_daemon.swap_cluster << page_cluster)
-         break;
       /* Ok, do the async read-ahead now */
       new_page = read_swap_cache_async(SWP_ENTRY(SWP_TYPE(entry), offset));
       if (!new_page)
@@ -1103,10 +1099,6 @@
    return;
 }
 
-/* Swap 80% full? Release the pages as they are paged in.. */
-#define vm_swap_full() \
-   (swapper_space.nrpages*5 > total_swap_pages*4)
-
 /*
  * We hold the mm semaphore and the page_table_lock on entry and exit.
  */
@@ -1160,13 +1152,24 @@
    pte = mk_pte(page, vma->vm_page_prot);
 
    swap_free(entry);
-   mark_page_accessed(page);
-   if (exclusive_swap_page(page)) {
-      if (write_access || vm_swap_full()) {
-         pte = pte_mkdirty(pte);
-         if (vma->vm_flags & VM_WRITE)
-            pte = pte_mkwrite(pte);
+   if (exclusive_swap_page(page)) {   
+      if (write_access)
+         pte = pte_mkwrite(pte_mkdirty(pte));
+      /*
+       * If swap space is getting low and we were the last user
+       * of this piece of swap space, we free this space so
+       * somebody else can be swapped out.
+       *
+       * We hold the page lock (required whenever adding to or
+       * removing from the swap cache), and the page_table_lock
+       * prevents concurrent swap_duplicate()s of this exclusive
+       * entry by try_to_swap_out() or fork's copy_page_range() -
+       * there's even a second level of protection, page lock
+       * prevents try_to_swap_out() and mmap_sem prevents do_fork().
+       */
+      if (vm_swap_full()) {
          delete_from_swap_cache(page);
+         pte = pte_mkdirty(pte);
       }
    }
    UnlockPage(page);
@@ -1198,18 +1201,16 @@
 
       /* Allocate our own private page. */
       spin_unlock(&mm->page_table_lock);
-
       page = alloc_page(GFP_HIGHUSER);
-      if (!page)
-         goto no_mem;
-      clear_user_highpage(page, addr);
-
       spin_lock(&mm->page_table_lock);
+      if (!page)
+         return -1;
       if (!pte_none(*page_table)) {
          page_cache_release(page);
          return 1;
       }
       mm->rss++;
+      clear_user_highpage(page, addr);
       flush_page_to_ram(page);
       entry = pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
    }
@@ -1219,10 +1220,6 @@
    /* No need to invalidate - it was non-present before */
    update_mmu_cache(vma, addr, entry);
    return 1;   /* Minor fault */
-
-no_mem:
-   spin_lock(&mm->page_table_lock);
-   return -1;
 }
 
 /*
@@ -1242,10 +1239,16 @@
 {
    struct page * new_page;
    pte_t entry;
-
+   int ret;
+   struct inode *inode = NULL;
+   
    if (!vma->vm_ops || !vma->vm_ops->nopage)
       return do_anonymous_page(mm, vma, page_table, write_access, address);
    spin_unlock(&mm->page_table_lock);
+   if (vma->vm_file && vma->vm_file->f_dentry)
+      inode = vma->vm_file->f_dentry->d_inode;
+   if (inode)
+      down_read(&inode->i_truncate_sem);
 
    /*
     * The third argument is "no_share", which tells the low-level code
@@ -1255,10 +1258,16 @@
    new_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, (vma->vm_flags & VM_SHARED)?0:write_access);
 
    spin_lock(&mm->page_table_lock);
-   if (new_page == NULL)   /* no page was available -- SIGBUS */
-      return 0;
-   if (new_page == NOPAGE_OOM)
-      return -1;
+   if (new_page == NULL) {   /* no page was available -- SIGBUS */
+      ret = 0;
+      goto out;
+   }
+   
+   if (new_page == NOPAGE_OOM) {
+      ret =  -1;
+      goto out;
+   }
+   
    /*
     * This silly early PAGE_DIRTY setting removes a race
     * due to the bad i386 page protection. But it's valid
@@ -1284,12 +1293,17 @@
    } else {
       /* One of our sibling threads was faster, back out. */
       page_cache_release(new_page);
-      return 1;
+      ret = 1;
+      goto out;
    }
 
    /* no need to invalidate: a not-present page shouldn't be cached */
    update_mmu_cache(vma, address, entry);
-   return 2;   /* Major fault */
+   ret = 2;   /* Major fault */
+out:
+   if (inode)
+      up_read(&inode->i_truncate_sem);
+   return ret;
 }
 
 /*
@@ -1328,12 +1342,14 @@
       return do_swap_page(mm, vma, address, pte, entry, write_access);
    }
 
+   entry = ptep_get_and_clear(pte);
    if (write_access) {
       if (!pte_write(entry))
          return do_wp_page(mm, vma, address, pte, entry);
 
       entry = pte_mkdirty(entry);
    }
+
    entry = pte_mkyoung(entry);
    establish_pte(vma, address, pte, entry);
    return 1;
@@ -1394,7 +1410,7 @@
        * Because we dropped the lock, we should re-check the
        * entry, as somebody else could have populated it..
        */
-      if (!pgd_none(*pgd)) {
+      if (pgd_present(*pgd)) {
          pmd_free(new);
          goto out;
       }
@@ -1412,7 +1428,7 @@
  */
 pte_t *pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
 {
-   if (pmd_none(*pmd)) {
+   if (!pmd_present(*pmd)) {
       pte_t *new;
 
       /* "fast" allocation can happen without dropping the lock.. */
@@ -1428,7 +1444,7 @@
           * Because we dropped the lock, we should re-check the
           * entry, as somebody else could have populated it..
           */
-         if (!pmd_none(*pmd)) {
+         if (pmd_present(*pmd)) {
             pte_free(new);
             goto out;
          }
@@ -1439,23 +1455,19 @@
    return pte_offset(pmd, address);
 }
 
-/*
- * Simplistic page force-in..
- */
 int make_pages_present(unsigned long addr, unsigned long end)
 {
-   int write;
-   struct mm_struct *mm = current->mm;
+   int ret, len, write;
    struct vm_area_struct * vma;
 
-   vma = find_vma(mm, addr);
+   vma = find_vma(current->mm, addr);
    write = (vma->vm_flags & VM_WRITE) != 0;
    if (addr >= end)
       BUG();
-   do {
-      if (handle_mm_fault(mm, vma, addr, write) < 0)
-         return -1;
-      addr += PAGE_SIZE;
-   } while (addr < end);
-   return 0;
+   if (end > vma->vm_end)
+      BUG();
+   len = (end+PAGE_SIZE-1)/PAGE_SIZE-addr/PAGE_SIZE;
+   ret = get_user_pages(current, current->mm, addr,
+         len, write, 0, NULL, NULL);
+   return ret == len ? 0 : -1;
 }


Comments: webmaster (at) linuxhq.com.
Advertising: banners (at) linuxhq.com.
Compilation ©1998-2008 Linux Headquarters, Inc.