Linux Headquarters
[ Register ]
[ About us ] [ Home Page ]

Advertisement
[ Kernel ] [ Documentation ] [ Links ] [ Books ]

Advertisement

Kernel v2.6.24 /mm/readahead.c

Filename:/mm/readahead.c
Lines Added:49
Lines Deleted:46
Also changed in: (Previous) 2.6.24-rc8  2.6.24-rc7  2.6.24-rc6  2.6.24-rc5  2.6.24-rc4  2.6.24-rc3 
(Following) 2.6.25-rc6-git4  2.6.25-rc6-git5  2.6.25-rc6-git6  2.6.25-rc6-git7  2.6.25-rc6-git8  2.6.25-rc7 

Location
[  2.6.24
  [  mm
     o  readahead.c

Patch

diff --git a/mm/readahead.c b/mm/readahead.c
index 39bf45d..c9c50ca 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -15,22 +15,15 @@
 #include <linux/backing-dev.h>
 #include <linux/task_io_accounting_ops.h>
 #include <linux/pagevec.h>
+#include <linux/pagemap.h>
 
 void default_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
 {
 }
 EXPORT_SYMBOL(default_unplug_io_fn);
 
-/*
- * Convienent macros for min/max read-ahead pages.
- * Note that MAX_RA_PAGES is rounded down, while MIN_RA_PAGES is rounded up.
- * The latter is necessary for systems with large page size(i.e. 64k).
- */
-#define MAX_RA_PAGES   (VM_MAX_READAHEAD*1024 / PAGE_CACHE_SIZE)
-#define MIN_RA_PAGES   DIV_ROUND_UP(VM_MIN_READAHEAD*1024, PAGE_CACHE_SIZE)
-
 struct backing_dev_info default_backing_dev_info = {
-   .ra_pages   = MAX_RA_PAGES,
+   .ra_pages   = VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE,
    .state      = 0,
    .capabilities   = BDI_CAP_MAP_COPY,
    .unplug_io_fn   = default_unplug_io_fn,
@@ -45,7 +38,7 @@ void
 file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping)
 {
    ra->ra_pages = mapping->backing_dev_info->ra_pages;
-   ra->prev_index = -1;
+   ra->prev_pos = -1;
 }
 EXPORT_SYMBOL_GPL(file_ra_state_init);
 
@@ -65,28 +58,25 @@ int read_cache_pages(struct address_space *mapping, struct list_head *pages,
          int (*filler)(void *, struct page *), void *data)
 {
    struct page *page;
-   struct pagevec lru_pvec;
    int ret = 0;
 
-   pagevec_init(&lru_pvec, 0);
-
    while (!list_empty(pages)) {
       page = list_to_page(pages);
       list_del(&page->lru);
-      if (add_to_page_cache(page, mapping, page->index, GFP_KERNEL)) {
+      if (add_to_page_cache_lru(page, mapping,
+               page->index, GFP_KERNEL)) {
          page_cache_release(page);
          continue;
       }
+      page_cache_release(page);
+
       ret = filler(data, page);
-      if (!pagevec_add(&lru_pvec, page))
-         __pagevec_lru_add(&lru_pvec);
-      if (ret) {
+      if (unlikely(ret)) {
          put_pages_list(pages);
          break;
       }
       task_io_account_read(PAGE_CACHE_SIZE);
    }
-   pagevec_lru_add(&lru_pvec);
    return ret;
 }
 
@@ -96,7 +86,6 @@ static int read_pages(struct address_space *mapping, struct file *filp,
       struct list_head *pages, unsigned nr_pages)
 {
    unsigned page_idx;
-   struct pagevec lru_pvec;
    int ret;
 
    if (mapping->a_ops->readpages) {
@@ -106,19 +95,15 @@ static int read_pages(struct address_space *mapping, struct file *filp,
       goto out;
    }
 
-   pagevec_init(&lru_pvec, 0);
    for (page_idx = 0; page_idx < nr_pages; page_idx++) {
       struct page *page = list_to_page(pages);
       list_del(&page->lru);
-      if (!add_to_page_cache(page, mapping,
+      if (!add_to_page_cache_lru(page, mapping,
                page->index, GFP_KERNEL)) {
          mapping->a_ops->readpage(filp, page);
-         if (!pagevec_add(&lru_pvec, page))
-            __pagevec_lru_add(&lru_pvec);
-      } else
-         page_cache_release(page);
+      }
+      page_cache_release(page);
    }
-   pagevec_lru_add(&lru_pvec);
    ret = 0;
 out:
    return ret;
@@ -156,20 +141,19 @@ __do_page_cache_readahead(struct address_space *mapping, struct file *filp,
    /*
     * Preallocate as many pages as we will need.
     */
-   read_lock_irq(&mapping->tree_lock);
    for (page_idx = 0; page_idx < nr_to_read; page_idx++) {
       pgoff_t page_offset = offset + page_idx;
 
       if (page_offset > end_index)
          break;
 
+      rcu_read_lock();
       page = radix_tree_lookup(&mapping->page_tree, page_offset);
+      rcu_read_unlock();
       if (page)
          continue;
 
-      read_unlock_irq(&mapping->tree_lock);
       page = page_cache_alloc_cold(mapping);
-      read_lock_irq(&mapping->tree_lock);
       if (!page)
          break;
       page->index = page_offset;
@@ -178,7 +162,6 @@ __do_page_cache_readahead(struct address_space *mapping, struct file *filp,
          SetPageReadahead(page);
       ret++;
    }
-   read_unlock_irq(&mapping->tree_lock);
 
    /*
     * Now start the IO.  We ignore I/O errors - if the page is not
@@ -250,6 +233,12 @@ unsigned long max_sane_readahead(unsigned long nr)
       + node_page_state(numa_node_id(), NR_FREE_PAGES)) / 2);
 }
 
+static int __init readahead_init(void)
+{
+   return bdi_init(&default_backing_dev_info);
+}
+subsys_initcall(readahead_init);
+
 /*
  * Submit IO for the read-ahead request in file_ra_state.
  */
@@ -326,7 +315,7 @@ static unsigned long get_next_ra_size(struct file_ra_state *ra,
  * indicator. The flag won't be set on already cached pages, to avoid the
  * readahead-for-nothing fuss, saving pointless page cache lookups.
  *
- * prev_index tracks the last visited page in the _previous_ read request.
+ * prev_pos tracks the last visited byte in the _previous_ read request.
  * It should be maintained by the caller, and will be used for detecting
  * small random reads. Note that the readahead algorithm checks loosely
  * for sequential patterns. Hence interleaved reads might be served as
@@ -350,11 +339,9 @@ ondemand_readahead(struct address_space *mapping,
          bool hit_readahead_marker, pgoff_t offset,
          unsigned long req_size)
 {
-   unsigned long max;   /* max readahead pages */
-   int sequential;
-
-   max = ra->ra_pages;
-   sequential = (offset - ra->prev_index <= 1UL) || (req_size > max);
+   int   max = ra->ra_pages;   /* max readahead pages */
+   pgoff_t prev_offset;
+   int   sequential;
 
    /*
     * It's the expected callback offset, assume sequential access.
@@ -368,6 +355,9 @@ ondemand_readahead(struct address_space *mapping,
       goto readit;
    }
 
+   prev_offset = ra->prev_pos >> PAGE_CACHE_SHIFT;
+   sequential = offset - prev_offset <= 1UL || req_size > max;
+
    /*
     * Standalone, small read.
     * Read as is, and do not pollute the readahead state.
@@ -378,6 +368,29 @@ ondemand_readahead(struct address_space *mapping,
    }
 
    /*
+    * Hit a marked page without valid readahead state.
+    * E.g. interleaved reads.
+    * Query the pagecache for async_size, which normally equals to
+    * readahead size. Ramp it up and use it as the new readahead size.
+    */
+   if (hit_readahead_marker) {
+      pgoff_t start;
+
+      read_lock_irq(&mapping->tree_lock);
+      start = radix_tree_next_hole(&mapping->page_tree, offset, max+1);
+      read_unlock_irq(&mapping->tree_lock);
+
+      if (!start || start - offset > max)
+         return 0;
+
+      ra->start = start;
+      ra->size = start - offset;   /* old async_size */
+      ra->size = get_next_ra_size(ra, max);
+      ra->async_size = ra->size;
+      goto readit;
+   }
+
+   /*
     * It may be one of
     *    - first read on start of file
     *    - sequential cache miss
@@ -388,16 +401,6 @@ ondemand_readahead(struct address_space *mapping,
    ra->size = get_init_ra_size(req_size, max);
    ra->async_size = ra->size > req_size ? ra->size - req_size : ra->size;
 
-   /*
-    * Hit on a marked page without valid readahead state.
-    * E.g. interleaved reads.
-    * Not knowing its readahead pos/size, bet on the minimal possible one.
-    */
-   if (hit_readahead_marker) {
-      ra->start++;
-      ra->size = get_next_ra_size(ra, max);
-   }
-
 readit:
    return ra_submit(ra, mapping, filp);
 }


Comments: webmaster (at) linuxhq.com.
Advertising: banners (at) linuxhq.com.
Compilation ©1998-2008 Linux Headquarters, Inc.