Linux Headquarters
[ Register ]
[ About us ] [ Home Page ]

[ Kernel ] [ Documentation ] [ Links ] [ Books ]

Kernel v2.6.24 /mm/memory_hotplug.c

Filename:/mm/memory_hotplug.c
Lines Added:327
Lines Deleted:31
Also changed in: (Previous) 2.6.24-rc8  2.6.24-rc7  2.6.24-rc6  2.6.24-rc5  2.6.24-rc4  2.6.24-rc3 
(Following) 2.6.24-git15  2.6.24-git16  2.6.24-git17  2.6.24-git18  2.6.24-git19  2.6.24-git20 

Location
[  2.6.24
  [  mm
     o  memory_hotplug.c

Patch

diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index df9d554..9512a54 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -23,6 +23,9 @@
 #include <linux/vmalloc.h>
 #include <linux/ioport.h>
 #include <linux/cpuset.h>
+#include <linux/delay.h>
+#include <linux/migrate.h>
+#include <linux/page-isolation.h>
 
 #include <asm/tlbflush.h>
 
@@ -36,7 +39,7 @@ static struct resource *register_memory_resource(u64 start, u64 size)
    res->name = "System RAM";
    res->start = start;
    res->end = start + size - 1;
-   res->flags = IORESOURCE_MEM;
+   res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
    if (request_resource(&iomem_resource, res) < 0) {
       printk("System RAM resource %llx - %llx cannot be added\n",
       (unsigned long long)res->start, (unsigned long long)res->end);
@@ -118,7 +121,7 @@ int __add_pages(struct zone *zone, unsigned long phys_start_pfn,
       err = __add_section(zone, i << PFN_SECTION_SHIFT);
 
       /*
-       * EEXIST is finally dealed with by ioresource collision
+       * EEXIST is finally dealt with by ioresource collision
        * check. see add_memory() => register_memory_resource()
        * Warning will be printed if there is collision.
        */
@@ -161,17 +164,47 @@ static void grow_pgdat_span(struct pglist_data *pgdat,
                pgdat->node_start_pfn;
 }
 
-int online_pages(unsigned long pfn, unsigned long nr_pages)
+static int online_pages_range(unsigned long start_pfn, unsigned long nr_pages,
+         void *arg)
 {
    unsigned long i;
+   unsigned long onlined_pages = *(unsigned long *)arg;
+   struct page *page;
+   if (PageReserved(pfn_to_page(start_pfn)))
+      for (i = 0; i < nr_pages; i++) {
+         page = pfn_to_page(start_pfn + i);
+         online_page(page);
+         onlined_pages++;
+      }
+   *(unsigned long *)arg = onlined_pages;
+   return 0;
+}
+
+
+int online_pages(unsigned long pfn, unsigned long nr_pages)
+{
    unsigned long flags;
    unsigned long onlined_pages = 0;
-   struct resource res;
-   u64 section_end;
-   unsigned long start_pfn;
    struct zone *zone;
    int need_zonelists_rebuild = 0;
+   int nid;
+   int ret;
+   struct memory_notify arg;
 
+   arg.start_pfn = pfn;
+   arg.nr_pages = nr_pages;
+   arg.status_change_nid = -1;
+
+   nid = page_to_nid(pfn_to_page(pfn));
+   if (node_present_pages(nid) == 0)
+      arg.status_change_nid = nid;
+
+   ret = memory_notify(MEM_GOING_ONLINE, &arg);
+   ret = notifier_to_errno(ret);
+   if (ret) {
+      memory_notify(MEM_CANCEL_ONLINE, &arg);
+      return ret;
+   }
    /*
     * This doesn't need a lock to do pfn_to_page().
     * The section can't be removed here because of the
@@ -191,37 +224,25 @@ int online_pages(unsigned long pfn, unsigned long nr_pages)
    if (!populated_zone(zone))
       need_zonelists_rebuild = 1;
 
-   res.start = (u64)pfn << PAGE_SHIFT;
-   res.end = res.start + ((u64)nr_pages << PAGE_SHIFT) - 1;
-   res.flags = IORESOURCE_MEM; /* we just need system ram */
-   section_end = res.end;
-
-   while ((res.start < res.end) && (find_next_system_ram(&res) >= 0)) {
-      start_pfn = (unsigned long)(res.start >> PAGE_SHIFT);
-      nr_pages = (unsigned long)
-                           ((res.end + 1 - res.start) >> PAGE_SHIFT);
-
-      if (PageReserved(pfn_to_page(start_pfn))) {
-         /* this region's page is not onlined now */
-         for (i = 0; i < nr_pages; i++) {
-            struct page *page = pfn_to_page(start_pfn + i);
-            online_page(page);
-            onlined_pages++;
-         }
-      }
-
-      res.start = res.end + 1;
-      res.end = section_end;
-   }
+   walk_memory_resource(pfn, nr_pages, &onlined_pages,
+      online_pages_range);
    zone->present_pages += onlined_pages;
    zone->zone_pgdat->node_present_pages += onlined_pages;
 
    setup_per_zone_pages_min();
+   if (onlined_pages) {
+      kswapd_run(zone_to_nid(zone));
+      node_set_state(zone_to_nid(zone), N_HIGH_MEMORY);
+   }
 
    if (need_zonelists_rebuild)
       build_all_zonelists();
    vm_total_pages = nr_free_pagecache_pages();
    writeback_set_ratelimit();
+
+   if (onlined_pages)
+      memory_notify(MEM_ONLINE, &arg);
+
    return 0;
 }
 #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
@@ -271,9 +292,6 @@ int add_memory(int nid, u64 start, u64 size)
       if (!pgdat)
          return -ENOMEM;
       new_pgdat = 1;
-      ret = kswapd_run(nid);
-      if (ret)
-         goto error;
    }
 
    /* call arch's memory hotadd */
@@ -308,3 +326,281 @@ error:
    return ret;
 }
 EXPORT_SYMBOL_GPL(add_memory);
+
+#ifdef CONFIG_MEMORY_HOTREMOVE
+/*
+ * Confirm all pages in a range [start, end) is belongs to the same zone.
+ */
+static int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn)
+{
+   unsigned long pfn;
+   struct zone *zone = NULL;
+   struct page *page;
+   int i;
+   for (pfn = start_pfn;
+        pfn < end_pfn;
+        pfn += MAX_ORDER_NR_PAGES) {
+      i = 0;
+      /* This is just a CONFIG_HOLES_IN_ZONE check.*/
+      while ((i < MAX_ORDER_NR_PAGES) && !pfn_valid_within(pfn + i))
+         i++;
+      if (i == MAX_ORDER_NR_PAGES)
+         continue;
+      page = pfn_to_page(pfn + i);
+      if (zone && page_zone(page) != zone)
+         return 0;
+      zone = page_zone(page);
+   }
+   return 1;
+}
+
+/*
+ * Scanning pfn is much easier than scanning lru list.
+ * Scan pfn from start to end and Find LRU page.
+ */
+int scan_lru_pages(unsigned long start, unsigned long end)
+{
+   unsigned long pfn;
+   struct page *page;
+   for (pfn = start; pfn < end; pfn++) {
+      if (pfn_valid(pfn)) {
+         page = pfn_to_page(pfn);
+         if (PageLRU(page))
+            return pfn;
+      }
+   }
+   return 0;
+}
+
+static struct page *
+hotremove_migrate_alloc(struct page *page,
+         unsigned long private,
+         int **x)
+{
+   /* This should be improoooooved!! */
+   return alloc_page(GFP_HIGHUSER_PAGECACHE);
+}
+
+
+#define NR_OFFLINE_AT_ONCE_PAGES   (256)
+static int
+do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
+{
+   unsigned long pfn;
+   struct page *page;
+   int move_pages = NR_OFFLINE_AT_ONCE_PAGES;
+   int not_managed = 0;
+   int ret = 0;
+   LIST_HEAD(source);
+
+   for (pfn = start_pfn; pfn < end_pfn && move_pages > 0; pfn++) {
+      if (!pfn_valid(pfn))
+         continue;
+      page = pfn_to_page(pfn);
+      if (!page_count(page))
+         continue;
+      /*
+       * We can skip free pages. And we can only deal with pages on
+       * LRU.
+       */
+      ret = isolate_lru_page(page, &source);
+      if (!ret) { /* Success */
+         move_pages--;
+      } else {
+         /* Becasue we don't have big zone->lock. we should
+            check this again here. */
+         if (page_count(page))
+            not_managed++;
+#ifdef CONFIG_DEBUG_VM
+         printk(KERN_INFO "removing from LRU failed"
+                " %lx/%d/%lx\n",
+            pfn, page_count(page), page->flags);
+#endif
+      }
+   }
+   ret = -EBUSY;
+   if (not_managed) {
+      if (!list_empty(&source))
+         putback_lru_pages(&source);
+      goto out;
+   }
+   ret = 0;
+   if (list_empty(&source))
+      goto out;
+   /* this function returns # of failed pages */
+   ret = migrate_pages(&source, hotremove_migrate_alloc, 0);
+
+out:
+   return ret;
+}
+
+/*
+ * remove from free_area[] and mark all as Reserved.
+ */
+static int
+offline_isolated_pages_cb(unsigned long start, unsigned long nr_pages,
+         void *data)
+{
+   __offline_isolated_pages(start, start + nr_pages);
+   return 0;
+}
+
+static void
+offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn)
+{
+   walk_memory_resource(start_pfn, end_pfn - start_pfn, NULL,
+            offline_isolated_pages_cb);
+}
+
+/*
+ * Check all pages in range, recoreded as memory resource, are isolated.
+ */
+static int
+check_pages_isolated_cb(unsigned long start_pfn, unsigned long nr_pages,
+         void *data)
+{
+   int ret;
+   long offlined = *(long *)data;
+   ret = test_pages_isolated(start_pfn, start_pfn + nr_pages);
+   offlined = nr_pages;
+   if (!ret)
+      *(long *)data += offlined;
+   return ret;
+}
+
+static long
+check_pages_isolated(unsigned long start_pfn, unsigned long end_pfn)
+{
+   long offlined = 0;
+   int ret;
+
+   ret = walk_memory_resource(start_pfn, end_pfn - start_pfn, &offlined,
+         check_pages_isolated_cb);
+   if (ret < 0)
+      offlined = (long)ret;
+   return offlined;
+}
+
+extern void drain_all_local_pages(void);
+
+int offline_pages(unsigned long start_pfn,
+        unsigned long end_pfn, unsigned long timeout)
+{
+   unsigned long pfn, nr_pages, expire;
+   long offlined_pages;
+   int ret, drain, retry_max, node;
+   struct zone *zone;
+   struct memory_notify arg;
+
+   BUG_ON(start_pfn >= end_pfn);
+   /* at least, alignment against pageblock is necessary */
+   if (!IS_ALIGNED(start_pfn, pageblock_nr_pages))
+      return -EINVAL;
+   if (!IS_ALIGNED(end_pfn, pageblock_nr_pages))
+      return -EINVAL;
+   /* This makes hotplug much easier...and readable.
+      we assume this for now. .*/
+   if (!test_pages_in_a_zone(start_pfn, end_pfn))
+      return -EINVAL;
+
+   zone = page_zone(pfn_to_page(start_pfn));
+   node = zone_to_nid(zone);
+   nr_pages = end_pfn - start_pfn;
+
+   /* set above range as isolated */
+   ret = start_isolate_page_range(start_pfn, end_pfn);
+   if (ret)
+      return ret;
+
+   arg.start_pfn = start_pfn;
+   arg.nr_pages = nr_pages;
+   arg.status_change_nid = -1;
+   if (nr_pages >= node_present_pages(node))
+      arg.status_change_nid = node;
+
+   ret = memory_notify(MEM_GOING_OFFLINE, &arg);
+   ret = notifier_to_errno(ret);
+   if (ret)
+      goto failed_removal;
+
+   pfn = start_pfn;
+   expire = jiffies + timeout;
+   drain = 0;
+   retry_max = 5;
+repeat:
+   /* start memory hot removal */
+   ret = -EAGAIN;
+   if (time_after(jiffies, expire))
+      goto failed_removal;
+   ret = -EINTR;
+   if (signal_pending(current))
+      goto failed_removal;
+   ret = 0;
+   if (drain) {
+      lru_add_drain_all();
+      flush_scheduled_work();
+      cond_resched();
+      drain_all_local_pages();
+   }
+
+   pfn = scan_lru_pages(start_pfn, end_pfn);
+   if (pfn) { /* We have page on LRU */
+      ret = do_migrate_range(pfn, end_pfn);
+      if (!ret) {
+         drain = 1;
+         goto repeat;
+      } else {
+         if (ret < 0)
+            if (--retry_max == 0)
+               goto failed_removal;
+         yield();
+         drain = 1;
+         goto repeat;
+      }
+   }
+   /* drain all zone's lru pagevec, this is asyncronous... */
+   lru_add_drain_all();
+   flush_scheduled_work();
+   yield();
+   /* drain pcp pages , this is synchrouns. */
+   drain_all_local_pages();
+   /* check again */
+   offlined_pages = check_pages_isolated(start_pfn, end_pfn);
+   if (offlined_pages < 0) {
+      ret = -EBUSY;
+      goto failed_removal;
+   }
+   printk(KERN_INFO "Offlined Pages %ld\n", offlined_pages);
+   /* Ok, all of our target is islaoted.
+      We cannot do rollback at this point. */
+   offline_isolated_pages(start_pfn, end_pfn);
+   /* reset pagetype flags and makes migrate type to be MOVABLE */
+   undo_isolate_page_range(start_pfn, end_pfn);
+   /* removal success */
+   zone->present_pages -= offlined_pages;
+   zone->zone_pgdat->node_present_pages -= offlined_pages;
+   totalram_pages -= offlined_pages;
+   num_physpages -= offlined_pages;
+
+   vm_total_pages = nr_free_pagecache_pages();
+   writeback_set_ratelimit();
+
+   memory_notify(MEM_OFFLINE, &arg);
+   return 0;
+
+failed_removal:
+   printk(KERN_INFO "memory offlining %lx to %lx failed\n",
+      start_pfn, end_pfn);
+   memory_notify(MEM_CANCEL_OFFLINE, &arg);
+   /* pushback to free area */
+   undo_isolate_page_range(start_pfn, end_pfn);
+
+   return ret;
+}
+#else
+int remove_memory(u64 start, u64 size)
+{
+   return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(remove_memory);
+#endif /* CONFIG_MEMORY_HOTREMOVE */


Comments: webmaster (at) linuxhq.com.
Advertising: banners (at) linuxhq.com.
Compilation ©1998-2008 Linux Headquarters, Inc.