Linux Headquarters
[ Register ]
[ About us ] [ Home Page ]

Advertisement
[ Kernel ] [ Documentation ] [ Links ] [ Books ]

Advertisement

Kernel v2.4.6 /fs/buffer.c

Filename:/fs/buffer.c
Lines Added:135
Lines Deleted:138
Also changed in: (Previous) 2.4.6-pre9  2.4.6-pre8  2.4.6-pre7  2.4.6-pre6  2.4.6-pre5  2.4.6-pre4 
(Following) 2.4.6-ac1  2.4.6-ac2  2.4.6-ac3  2.4.6-ac4  2.4.6-ac5  2.4.7-pre4 

Location
[  2.4.6
  [  fs
     o  buffer.c

Patch

diff -u --recursive --new-file v2.4.5/linux/fs/buffer.c linux/fs/buffer.c
--- v2.4.5/linux/fs/buffer.c   Fri May 25 17:57:46 2001
+++ linux/fs/buffer.c   Sat Jun 30 10:44:32 2001
@@ -61,7 +61,7 @@
 
 #define BUFSIZE_INDEX(X) ((int) buffersize_index[(X)>>9])
 #define MAX_BUF_PER_PAGE (PAGE_CACHE_SIZE / 512)
-#define NR_RESERVED (2*MAX_BUF_PER_PAGE)
+#define NR_RESERVED (10*MAX_BUF_PER_PAGE)
 #define MAX_UNUSED_BUFFERS NR_RESERVED+20 /* don't ever have more than this 
                     number of unused buffer heads */
 
@@ -161,144 +161,131 @@
    atomic_dec(&bh->b_count);
 }
 
-/* Call sync_buffers with wait!=0 to ensure that the call does not
- * return until all buffer writes have completed.  Sync() may return
- * before the writes have finished; fsync() may not.
- */
+/* End-of-write handler.. Just mark it up-to-date and unlock the buffer. */
+static void end_buffer_write(struct buffer_head *bh, int uptodate)
+{
+   mark_buffer_uptodate(bh, uptodate);
+   unlock_buffer(bh);
+}
 
-/* Godamity-damn.  Some buffers (bitmaps for filesystems)
- * spontaneously dirty themselves without ever brelse being called.
- * We will ultimately want to put these in a separate list, but for
- * now we search all of the lists for dirty buffers.
+/*
+ * The buffers have been marked clean and locked.  Just submit the dang
+ * things.. 
+ *
+ * We'll wait for the first one of them - "sync" is not exactly
+ * performance-critical, and this makes us not hog the IO subsystem
+ * completely, while still allowing for a fair amount of concurrent IO. 
  */
-static int sync_buffers(kdev_t dev, int wait)
+static void write_locked_buffers(struct buffer_head **array, unsigned int count)
 {
-   int i, retry, pass = 0, err = 0;
-   struct buffer_head * bh, *next;
-
-   /* One pass for no-wait, three for wait:
-    * 0) write out all dirty, unlocked buffers;
-    * 1) write out all dirty buffers, waiting if locked;
-    * 2) wait for completion by waiting for all buffers to unlock.
-    */
+   struct buffer_head *wait = *array;
+   atomic_inc(&wait->b_count);
    do {
-      retry = 0;
-
-      /* We search all lists as a failsafe mechanism, not because we expect
-       * there to be dirty buffers on any of the other lists.
-       */
-repeat:
-      spin_lock(&lru_list_lock);
-      bh = lru_list[BUF_DIRTY];
-      if (!bh)
-         goto repeat2;
-
-      for (i = nr_buffers_type[BUF_DIRTY]*2 ; i-- > 0 ; bh = next) {
-         next = bh->b_next_free;
+      struct buffer_head * bh = *array++;
+      bh->b_end_io = end_buffer_write;
+      submit_bh(WRITE, bh);
+   } while (--count);
+   wait_on_buffer(wait);
+   atomic_dec(&wait->b_count);
+}
 
-         if (!lru_list[BUF_DIRTY])
-            break;
-         if (dev && bh->b_dev != dev)
-            continue;
-         if (buffer_locked(bh)) {
-            /* Buffer is locked; skip it unless wait is
-             * requested AND pass > 0.
-             */
-            if (!wait || !pass) {
-               retry = 1;
-               continue;
-            }
-            atomic_inc(&bh->b_count);
-            spin_unlock(&lru_list_lock);
-            wait_on_buffer (bh);
-            atomic_dec(&bh->b_count);
-            goto repeat;
-         }
+#define NRSYNC (32)
+static void write_unlocked_buffers(kdev_t dev)
+{
+   struct buffer_head *next;
+   struct buffer_head *array[NRSYNC];
+   unsigned int count;
+   int nr;
 
-         /* If an unlocked buffer is not uptodate, there has
-          * been an IO error. Skip it.
-          */
-         if (wait && buffer_req(bh) && !buffer_locked(bh) &&
-             !buffer_dirty(bh) && !buffer_uptodate(bh)) {
-            err = -EIO;
-            continue;
-         }
+repeat:
+   spin_lock(&lru_list_lock);
+   next = lru_list[BUF_DIRTY];
+   nr = nr_buffers_type[BUF_DIRTY] * 2;
+   count = 0;
+   while (next && --nr >= 0) {
+      struct buffer_head * bh = next;
+      next = bh->b_next_free;
 
-         /* Don't write clean buffers.  Don't write ANY buffers
-          * on the third pass.
-          */
-         if (!buffer_dirty(bh) || pass >= 2)
+      if (dev && bh->b_dev != dev)
+         continue;
+      if (test_and_set_bit(BH_Lock, &bh->b_state))
+         continue;
+      if (atomic_set_buffer_clean(bh)) {
+         __refile_buffer(bh);
+         array[count++] = bh;
+         if (count < NRSYNC)
             continue;
 
-         atomic_inc(&bh->b_count);
          spin_unlock(&lru_list_lock);
-         ll_rw_block(WRITE, 1, &bh);
-         atomic_dec(&bh->b_count);
-         retry = 1;
+         write_locked_buffers(array, count);
          goto repeat;
       }
+      unlock_buffer(bh);
+   }
+   spin_unlock(&lru_list_lock);
 
-    repeat2:
-      bh = lru_list[BUF_LOCKED];
-      if (!bh) {
-         spin_unlock(&lru_list_lock);
-         break;
-      }
-      for (i = nr_buffers_type[BUF_LOCKED]*2 ; i-- > 0 ; bh = next) {
-         next = bh->b_next_free;
+   if (count)
+      write_locked_buffers(array, count);
+}
 
-         if (!lru_list[BUF_LOCKED])
-            break;
-         if (dev && bh->b_dev != dev)
-            continue;
-         if (buffer_locked(bh)) {
-            /* Buffer is locked; skip it unless wait is
-             * requested AND pass > 0.
-             */
-            if (!wait || !pass) {
-               retry = 1;
-               continue;
-            }
-            atomic_inc(&bh->b_count);
-            spin_unlock(&lru_list_lock);
-            wait_on_buffer (bh);
-            spin_lock(&lru_list_lock);
-            atomic_dec(&bh->b_count);
-            goto repeat2;
-         }
+static int wait_for_locked_buffers(kdev_t dev, int index, int refile)
+{
+   struct buffer_head * next;
+   int nr;
+
+repeat:
+   spin_lock(&lru_list_lock);
+   next = lru_list[index];
+   nr = nr_buffers_type[index] * 2;
+   while (next && --nr >= 0) {
+      struct buffer_head *bh = next;
+      next = bh->b_next_free;
+
+      if (!buffer_locked(bh)) {
+         if (refile)
+            __refile_buffer(bh);
+         continue;
       }
-      spin_unlock(&lru_list_lock);
+      if (dev && bh->b_dev != dev)
+         continue;
 
-      /* If we are waiting for the sync to succeed, and if any dirty
-       * blocks were written, then repeat; on the second pass, only
-       * wait for buffers being written (do not pass to write any
-       * more buffers on the second pass).
-       */
-   } while (wait && retry && ++pass<=2);
-   return err;
+      atomic_inc(&bh->b_count);
+      spin_unlock(&lru_list_lock);
+      wait_on_buffer (bh);
+      atomic_dec(&bh->b_count);
+      goto repeat;
+   }
+   spin_unlock(&lru_list_lock);
+   return 0;
 }
 
-void sync_dev(kdev_t dev)
+/* Call sync_buffers with wait!=0 to ensure that the call does not
+ * return until all buffer writes have completed.  Sync() may return
+ * before the writes have finished; fsync() may not.
+ */
+
+/* Godamity-damn.  Some buffers (bitmaps for filesystems)
+ * spontaneously dirty themselves without ever brelse being called.
+ * We will ultimately want to put these in a separate list, but for
+ * now we search all of the lists for dirty buffers.
+ */
+static int sync_buffers(kdev_t dev, int wait)
 {
-   sync_supers(dev);
-   sync_inodes(dev);
-   DQUOT_SYNC(dev);
-   /* sync all the dirty buffers out to disk only _after_ all the
-      high level layers finished generated buffer dirty data
-      (or we'll return with some buffer still dirty on the blockdevice
-      so breaking the semantics of this call) */
-   sync_buffers(dev, 0);
-   /*
-    * FIXME(eric) we need to sync the physical devices here.
-    * This is because some (scsi) controllers have huge amounts of
-    * cache onboard (hundreds of Mb), and we need to instruct
-    * them to commit all of the dirty memory to disk, and we should
-    * not return until this has happened.
-    *
-    * This would need to get implemented by going through the assorted
-    * layers so that each block major number can be synced, and this
-    * would call down into the upper and mid-layer scsi.
+   int err = 0;
+
+   /* One pass for no-wait, three for wait:
+    * 0) write out all dirty, unlocked buffers;
+    * 1) wait for all dirty locked buffers;
+    * 2) write out all dirty, unlocked buffers;
+    * 2) wait for completion by waiting for all buffers to unlock.
     */
+   write_unlocked_buffers(dev);
+   if (wait) {
+      err = wait_for_locked_buffers(dev, BUF_DIRTY, 0);
+      write_unlocked_buffers(dev);
+      err |= wait_for_locked_buffers(dev, BUF_LOCKED, 1);
+   }
+   return err;
 }
 
 int fsync_super(struct super_block *sb)
@@ -331,6 +318,15 @@
    return sync_buffers(dev, 1);
 }
 
+/*
+ * There's no real reason to pretend we should
+ * ever do anything differently
+ */
+void sync_dev(kdev_t dev)
+{
+   fsync_dev(dev);
+}
+
 asmlinkage long sys_sync(void)
 {
    fsync_dev(0);
@@ -646,8 +642,8 @@
          /* Another device? */
          if (bh->b_dev != dev)
             continue;
-         /* Part of a mapping? */
-         if (bh->b_page->mapping)
+         /* Not hashed? */
+         if (!bh->b_pprev)
             continue;
          if (buffer_locked(bh)) {
             atomic_inc(&bh->b_count);
@@ -711,6 +707,9 @@
          bh_next = bh->b_next_free;
          if (bh->b_dev != dev || bh->b_size == size)
             continue;
+         /* Unhashed? */
+         if (!bh->b_pprev)
+            continue;
          if (buffer_locked(bh)) {
             atomic_inc(&bh->b_count);
             spin_unlock(&lru_list_lock);
@@ -759,7 +758,7 @@
 {
    balance_dirty(NODEV);
    if (free_shortage())
-      page_launder(GFP_BUFFER, 0);
+      page_launder(GFP_NOFS, 0);
    if (!grow_buffers(size)) {
       wakeup_bdflush(1);
       current->policy |= SCHED_YIELD;
@@ -1220,11 +1219,11 @@
    }
    spin_unlock(&unused_list_lock);
 
-   /* This is critical.  We can't swap out pages to get
-    * more buffer heads, because the swap-out may need
-    * more buffer-heads itself.  Thus SLAB_BUFFER.
+   /* This is critical.  We can't call out to the FS
+    * to get more buffer heads, because the FS may need
+    * more buffer-heads itself.  Thus SLAB_NOFS.
     */
-   if((bh = kmem_cache_alloc(bh_cachep, SLAB_BUFFER)) != NULL) {
+   if((bh = kmem_cache_alloc(bh_cachep, SLAB_NOFS)) != NULL) {
       bh->b_blocknr = -1;
       bh->b_this_page = NULL;
       return bh;
@@ -1348,11 +1347,9 @@
     */
    run_task_queue(&tq_disk);
 
-   /* 
-    * Set our state for sleeping, then check again for buffer heads.
-    * This ensures we won't miss a wake_up from an interrupt.
-    */
-   wait_event(buffer_wait, nr_unused_buffer_heads >= MAX_BUF_PER_PAGE);
+   current->policy |= SCHED_YIELD;
+   __set_current_state(TASK_RUNNING);
+   schedule();
    goto try_again;
 }
 
@@ -2242,7 +2239,7 @@
       return 0;
    }
 
-   page = alloc_page(GFP_BUFFER);
+   page = alloc_page(GFP_NOFS);
    if (!page)
       goto out;
    LockPage(page);
@@ -2304,7 +2301,7 @@
  *   1 - start IO for dirty buffers
  *   2 - wait for completion of locked buffers
  */
-static void sync_page_buffers(struct buffer_head *bh, int wait)
+static void sync_page_buffers(struct buffer_head *bh, unsigned int gfp_mask)
 {
    struct buffer_head * tmp = bh;
 
@@ -2312,7 +2309,7 @@
       struct buffer_head *p = tmp;
       tmp = tmp->b_this_page;
       if (buffer_locked(p)) {
-         if (wait > 1)
+         if (gfp_mask & __GFP_WAIT)
             __wait_on_buffer(p);
       } else if (buffer_dirty(p))
          ll_rw_block(WRITE, 1, &p);
@@ -2336,7 +2333,7 @@
  *       obtain a reference to a buffer head within a page.  So we must
  *    lock out all of these paths to cleanly toss the page.
  */
-int try_to_free_buffers(struct page * page, int wait)
+int try_to_free_buffers(struct page * page, unsigned int gfp_mask)
 {
    struct buffer_head * tmp, * bh = page->buffers;
    int index = BUFSIZE_INDEX(bh->b_size);
@@ -2387,10 +2384,10 @@
    spin_unlock(&free_list[index].lock);
    write_unlock(&hash_table_lock);
    spin_unlock(&lru_list_lock);
-   if (wait) {
-      sync_page_buffers(bh, wait);
+   if (gfp_mask & __GFP_IO) {
+      sync_page_buffers(bh, gfp_mask);
       /* We waited synchronously, so we can free the buffers. */
-      if (wait > 1 && !loop) {
+      if ((gfp_mask & __GFP_WAIT) && !loop) {
          loop = 1;
          goto cleaned_buffers_try_again;
       }


Comments: webmaster (at) linuxhq.com.
Advertising: banners (at) linuxhq.com.
Compilation ©1998-2008 Linux Headquarters, Inc.