diff options
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r-- | mm/page_alloc.c | 390 |
1 files changed, 232 insertions, 158 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 27aa58468..95a2bc436 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4,6 +4,7 @@ * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds * Swap reorganised 29.12.95, Stephen Tweedie * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 + * Reshaped it to be a zoned allocator, Ingo Molnar, Red Hat, 1999 */ #include <linux/config.h> @@ -22,7 +23,6 @@ #include <asm/pgtable.h> int nr_swap_pages = 0; -int nr_free_pages = 0; int nr_lru_pages; LIST_HEAD(lru_cache); @@ -36,30 +36,46 @@ LIST_HEAD(lru_cache); #if CONFIG_AP1000 /* the AP+ needs to allocate 8MB contiguous, aligned chunks of ram for the ring buffers */ -#define NR_MEM_LISTS 12 +#define MAX_ORDER 12 #else -#define NR_MEM_LISTS 10 +#define MAX_ORDER 10 #endif -struct free_area_struct { +typedef struct free_area_struct { struct list_head free_list; unsigned int * map; - unsigned long count; -}; +} free_area_t; -#define MEM_TYPE_DMA 0 -#define MEM_TYPE_NORMAL 1 -#define MEM_TYPE_HIGH 2 - -static const char *mem_type_strs[] = {"DMA", "Normal", "High"}; +#define ZONE_DMA 0 +#define ZONE_NORMAL 1 #ifdef CONFIG_HIGHMEM -#define NR_MEM_TYPES 3 +# define ZONE_HIGHMEM 2 +# define NR_ZONES 3 #else -#define NR_MEM_TYPES 2 +# define NR_ZONES 2 #endif -static struct free_area_struct free_area[NR_MEM_TYPES][NR_MEM_LISTS]; +typedef struct zone_struct { + spinlock_t lock; + unsigned long offset; + unsigned long size; + free_area_t free_area[MAX_ORDER]; + + unsigned long free_pages; + unsigned long pages_low, pages_high; + int low_on_memory; + char * name; +} zone_t; + +static zone_t zones[NR_ZONES] = + { + { name: "DMA" }, + { name: "Normal" }, +#ifdef CONFIG_HIGHMEM + { name: "HighMem" } +#endif + }; /* * Free_page() adds the page to the free lists. This is optimized for @@ -73,13 +89,6 @@ static struct free_area_struct free_area[NR_MEM_TYPES][NR_MEM_LISTS]; * for the normal case, giving better asm-code. */ -/* - * Buddy system. Hairy. You really aren't expected to understand this - * - * Hint: -mask = 1+~mask - */ -spinlock_t page_alloc_lock = SPIN_LOCK_UNLOCKED; - #define memlist_init(x) INIT_LIST_HEAD(x) #define memlist_add_head list_add #define memlist_add_tail list_add_tail @@ -88,35 +97,54 @@ spinlock_t page_alloc_lock = SPIN_LOCK_UNLOCKED; #define memlist_next(x) ((x)->next) #define memlist_prev(x) ((x)->prev) -static inline void free_pages_ok(struct page *page, unsigned long map_nr, unsigned long order) +/* + * Temporary debugging check. + */ +#define BAD_RANGE(zone,x) ((((x)-mem_map) < zone->offset) || (((x)-mem_map) >= zone->offset+zone->size)) + +/* + * Buddy system. Hairy. You really aren't expected to understand this + * + * Hint: -mask = 1+~mask + */ + +static inline void free_pages_ok (struct page *page, unsigned long map_nr, unsigned long order) { struct free_area_struct *area; - unsigned long index = map_nr >> (1 + order); - unsigned long mask = (~0UL) << order; + unsigned long index, page_idx, mask, offset; unsigned long flags; struct page *buddy; + zone_t *zone; + int i; - spin_lock_irqsave(&page_alloc_lock, flags); - -#define list(x) (mem_map+(x)) - -#ifdef CONFIG_HIGHMEM - if (map_nr >= highmem_mapnr) { - area = free_area[MEM_TYPE_HIGH]; - nr_free_highpages -= mask; - } else -#endif - if (PageDMA(page)) - area = free_area[MEM_TYPE_DMA]; - else - area = free_area[MEM_TYPE_NORMAL]; + /* + * Which zone is this page belonging to. + * + * (NR_ZONES is low, and we do not want (yet) to introduce + * put page->zone, it increases the size of mem_map[] + * unnecesserily. This small loop is basically equivalent + * to the previous #ifdef jungle, speed-wise.) + */ + i = NR_ZONES-1; + zone = zones + i; + for ( ; i >= 0; i--, zone--) + if (map_nr >= zone->offset) + break; + mask = (~0UL) << order; + offset = zone->offset; + area = zone->free_area; area += order; + page_idx = map_nr - zone->offset; + page_idx &= mask; + index = page_idx >> (1 + order); + mask = (~0UL) << order; - map_nr &= mask; - nr_free_pages -= mask; + spin_lock_irqsave(&zone->lock, flags); - while (mask + (1 << (NR_MEM_LISTS-1))) { + zone->free_pages -= mask; + + while (mask + (1 << (MAX_ORDER-1))) { if (!test_and_change_bit(index, area->map)) /* * the buddy page is still allocated. @@ -125,21 +153,22 @@ static inline void free_pages_ok(struct page *page, unsigned long map_nr, unsign /* * Move the buddy up one level. */ - buddy = list(map_nr ^ -mask); - page = list(map_nr); + buddy = mem_map + offset + (page_idx ^ -mask); + page = mem_map + offset + page_idx; + if (BAD_RANGE(zone,buddy)) + BUG(); + if (BAD_RANGE(zone,page)) + BUG(); - area->count--; memlist_del(&buddy->list); mask <<= 1; area++; index >>= 1; - map_nr &= mask; + page_idx &= mask; } - area->count++; - memlist_add_head(&(list(map_nr))->list, &area->free_list); -#undef list + memlist_add_head(&mem_map[offset + page_idx].list, &area->free_list); - spin_unlock_irqrestore(&page_alloc_lock, flags); + spin_unlock_irqrestore(&zone->lock, flags); } /* @@ -147,10 +176,9 @@ static inline void free_pages_ok(struct page *page, unsigned long map_nr, unsign */ #define MARK_USED(index, order, area) \ change_bit((index) >> (1+(order)), (area)->map) -#define CAN_DMA(x) (PageDMA(x)) #define ADDRESS(x) (PAGE_OFFSET + ((x) << PAGE_SHIFT)) -int __free_page(struct page *page) +int __free_page (struct page *page) { if (!PageReserved(page) && put_page_testzero(page)) { if (PageSwapCache(page)) @@ -164,7 +192,7 @@ int __free_page(struct page *page) return 0; } -int free_pages(unsigned long addr, unsigned long order) +int free_pages (unsigned long addr, unsigned long order) { unsigned long map_nr = MAP_NR(addr); @@ -182,16 +210,17 @@ int free_pages(unsigned long addr, unsigned long order) return 0; } -static inline unsigned long EXPAND (struct page *map, unsigned long index, +static inline unsigned long EXPAND (zone_t *zone, struct page *map, unsigned long index, int low, int high, struct free_area_struct * area) { unsigned long size = 1 << high; while (high > low) { + if (BAD_RANGE(zone,map)) + BUG(); area--; high--; size >>= 1; - area->count++; memlist_add_head(&(map)->list, &(area)->free_list); MARK_USED(index, high, area); index += size; @@ -201,79 +230,62 @@ static inline unsigned long EXPAND (struct page *map, unsigned long index, return index; } -static inline struct page * rmqueue (int order, unsigned type) +static inline struct page * rmqueue (zone_t *zone, unsigned long order) { - struct free_area_struct * area = free_area[type]+order; + struct free_area_struct * area = zone->free_area + order; unsigned long curr_order = order, map_nr; - struct page *page; struct list_head *head, *curr; + unsigned long flags; + struct page *page; + spin_lock_irqsave(&zone->lock, flags); do { head = &area->free_list; curr = memlist_next(head); if (curr != head) { + unsigned int index; + page = memlist_entry(curr, struct page, list); memlist_del(curr); - area->count--; - map_nr = page - mem_map; - MARK_USED(map_nr, curr_order, area); - nr_free_pages -= 1 << order; - map_nr = EXPAND(page, map_nr, order, curr_order, area); + map_nr = page - mem_map; + index = map_nr - zone->offset; + MARK_USED(index, curr_order, area); + zone->free_pages -= 1 << order; + map_nr = zone->offset + EXPAND(zone, page, index, order, curr_order, area); + spin_unlock_irqrestore(&zone->lock, flags); + page = mem_map + map_nr; + if (BAD_RANGE(zone,page)) + BUG(); return page; } curr_order++; area++; - } while (curr_order < NR_MEM_LISTS); + } while (curr_order < MAX_ORDER); + spin_unlock_irqrestore(&zone->lock, flags); return NULL; } -static inline int balance_lowmemory (int gfp_mask) +static inline int balance_memory (zone_t *zone, int gfp_mask) { int freed; - static int low_on_memory = 0; -#ifndef CONFIG_HIGHMEM - if (nr_free_pages > freepages.min) { - if (!low_on_memory) + if (zone->free_pages > zone->pages_low) { + if (!zone->low_on_memory) return 1; - if (nr_free_pages >= freepages.high) { - low_on_memory = 0; + /* + * Simple hysteresis: exit 'low memory mode' if + * the upper limit has been reached: + */ + if (zone->free_pages >= zone->pages_high) { + zone->low_on_memory = 0; return 1; } } + zone->low_on_memory = 1; - low_on_memory = 1; -#else - static int low_on_highmemory = 0; - - if (gfp_mask & __GFP_HIGHMEM) - { - if (nr_free_pages > freepages.min) { - if (!low_on_highmemory) { - return 1; - } - if (nr_free_pages >= freepages.high) { - low_on_highmemory = 0; - return 1; - } - } - low_on_highmemory = 1; - } else { - if (nr_free_pages+nr_free_highpages > freepages.min) { - if (!low_on_memory) { - return 1; - } - if (nr_free_pages+nr_free_highpages >= freepages.high) { - low_on_memory = 0; - return 1; - } - } - low_on_memory = 1; - } -#endif current->flags |= PF_MEMALLOC; freed = try_to_free_pages(gfp_mask); current->flags &= ~PF_MEMALLOC; @@ -283,13 +295,12 @@ static inline int balance_lowmemory (int gfp_mask) return 1; } -struct page * __get_pages(int gfp_mask, unsigned long order) +static inline struct page * __get_pages (zone_t *zone, unsigned int gfp_mask, + unsigned long order) { - unsigned long flags; struct page *page; - unsigned type; - if (order >= NR_MEM_LISTS) + if (order >= MAX_ORDER) goto nopage; /* @@ -303,28 +314,20 @@ struct page * __get_pages(int gfp_mask, unsigned long order) * further thought. */ if (!(current->flags & PF_MEMALLOC)) - goto lowmemory; - -ok_to_allocate: -#ifdef CONFIG_HIGHMEM - if (gfp_mask & __GFP_HIGHMEM) - type = MEM_TYPE_HIGH; - else -#endif - if (gfp_mask & __GFP_DMA) - type = MEM_TYPE_DMA; - else - type = MEM_TYPE_NORMAL; - - spin_lock_irqsave(&page_alloc_lock, flags); + if (!balance_memory(zone, gfp_mask)) + goto nopage; + /* + * We are falling back to lower-level zones if allocation + * in a higher zone fails. This assumes a hierarchical + * dependency between zones, which is true currently. If + * you need something else then move this loop outside + * this function, into the zone-specific allocator. + */ do { - page = rmqueue(order, type); - if (page) { - spin_unlock_irqrestore(&page_alloc_lock, flags); + page = rmqueue(zone, order); + if (page) return page; - } - } while (type-- > 0) ; - spin_unlock_irqrestore(&page_alloc_lock, flags); + } while (zone-- != zones) ; /* * If we can schedule, do so, and make sure to yield. @@ -338,60 +341,114 @@ ok_to_allocate: nopage: return NULL; +} -lowmemory: - if (balance_lowmemory(gfp_mask)) - goto ok_to_allocate; - goto nopage; +static inline zone_t * gfp_mask_to_zone (int gfp_mask) +{ + zone_t *zone; + +#if CONFIG_HIGHMEM + if (gfp_mask & __GFP_HIGHMEM) + zone = zones + ZONE_HIGHMEM; + else +#endif + if (gfp_mask & __GFP_DMA) + zone = zones + ZONE_DMA; + else + zone = zones + ZONE_NORMAL; + return zone; } -unsigned long __get_free_pages(int gfp_mask, unsigned long order) +unsigned long __get_free_pages (int gfp_mask, unsigned long order) { struct page *page; - page = __get_pages(gfp_mask, order); + + page = __get_pages(gfp_mask_to_zone(gfp_mask), gfp_mask, order); if (!page) return 0; return page_address(page); } -struct page * get_free_highpage(int gfp_mask) +struct page * alloc_pages (int gfp_mask, unsigned long order) { - return __get_pages(gfp_mask, 0); + return __get_pages(gfp_mask_to_zone(gfp_mask), gfp_mask, order); } /* + * Total amount of free (allocatable) RAM: + */ +unsigned int nr_free_pages (void) +{ + unsigned int sum; + zone_t *zone; + + sum = 0; + for (zone = zones; zone < zones+NR_ZONES; zone++) + sum += zone->free_pages; + return sum; +} + +/* + * Amount of free RAM allocatable as buffer memory: + */ +unsigned int nr_free_buffer_pages (void) +{ + unsigned int sum; + zone_t *zone; + + sum = nr_lru_pages; + for (zone = zones; zone <= zones+ZONE_NORMAL; zone++) + sum += zone->free_pages; + return sum; +} + +#if CONFIG_HIGHMEM +unsigned int nr_free_highpages (void) +{ + return zones[ZONE_HIGHMEM].free_pages; +} +#endif + +/* * Show free area list (used inside shift_scroll-lock stuff) * We also calculate the percentage fragmentation. We do this by counting the * memory on each free list with the exception of the first item on the list. */ void show_free_areas(void) { - unsigned long order, flags; + unsigned long order; unsigned type; - spin_lock_irqsave(&page_alloc_lock, flags); - printk("Free pages: %6dkB (%6ldkB HighMem)\n", - nr_free_pages<<(PAGE_SHIFT-10), - nr_free_highpages<<(PAGE_SHIFT-10)); + printk("Free pages: %6dkB (%6dkB HighMem)\n", + nr_free_pages()<<(PAGE_SHIFT-10), + nr_free_highpages()<<(PAGE_SHIFT-10)); printk("( Free: %d, lru_cache: %d (%d %d %d) )\n", - nr_free_pages, + nr_free_pages(), nr_lru_pages, freepages.min, freepages.low, freepages.high); - for (type = 0; type < NR_MEM_TYPES; type++) { + for (type = 0; type < NR_ZONES; type++) { + zone_t *zone = zones + type; unsigned long total = 0; - printk(" %s: ", mem_type_strs[type]); - for (order = 0; order < NR_MEM_LISTS; order++) { - unsigned long nr = free_area[type][order].count; + printk(" %s: ", zone->name); + for (order = 0; order < MAX_ORDER; order++) { + unsigned long i, nr; + + nr = 0; + for (i = 0; i < zone->size; i += 1<<order) { + struct page * page; + page = mem_map + zone->offset + i; + if (!page_count(page)) + nr++; + } total += nr * ((PAGE_SIZE>>10) << order); printk("%lu*%lukB ", nr, (unsigned long)((PAGE_SIZE>>10) << order)); } printk("= %lukB)\n", total); } - spin_unlock_irqrestore(&page_alloc_lock, flags); #ifdef SWAP_CACHE_INFO show_swap_cache_info(); @@ -401,18 +458,24 @@ void show_free_areas(void) #define LONG_ALIGN(x) (((x)+(sizeof(long))-1)&~((sizeof(long))-1)) /* - * set up the free-area data structures: + * Set up the zone data structures: * - mark all pages reserved * - mark all memory queues empty * - clear the memory bitmaps */ -volatile int data; -void __init free_area_init(unsigned long end_mem_pages) +void __init free_area_init(unsigned int *zones_size) { mem_map_t * p; unsigned long i, j; unsigned long map_size; + unsigned int totalpages, offset; + + totalpages = 0; + for (i = 0; i < NR_ZONES; i++) + totalpages += zones_size[i]; + printk("totalpages: %08x\n", totalpages); + i = totalpages >> 7; /* * Select nr of pages we try to keep free for important stuff * with a minimum of 10 pages and a maximum of 256 pages, so @@ -420,7 +483,7 @@ void __init free_area_init(unsigned long end_mem_pages) * This is fairly arbitrary, but based on some behaviour * analysis. */ - i = end_mem_pages >> 7; + i = totalpages >> 7; if (i < 10) i = 10; if (i > 256) @@ -430,11 +493,10 @@ void __init free_area_init(unsigned long end_mem_pages) freepages.high = i * 3; /* - * Most architectures just pick 'start_mem'. Some architectures - * (with lots of mem and discontinous memory maps) have to search - * for a good area. + * Some architectures (with lots of mem and discontinous memory + * maps) have to search for a good mem_map area: */ - map_size = end_mem_pages*sizeof(struct page); + map_size = totalpages*sizeof(struct page); mem_map = (struct page *) alloc_bootmem(map_size); memset(mem_map, 0, map_size); @@ -443,27 +505,39 @@ void __init free_area_init(unsigned long end_mem_pages) * up by free_all_bootmem() once the early boot process is * done. */ - for (p = mem_map; p < mem_map + end_mem_pages; p++) { + for (p = mem_map; p < mem_map + totalpages; p++) { set_page_count(p, 0); p->flags = (1 << PG_DMA); SetPageReserved(p); init_waitqueue_head(&p->wait); memlist_init(&p->list); } - - for (j = 0 ; j < NR_MEM_TYPES ; j++) { + + offset = 0; + for (j = 0; j < NR_ZONES; j++) { + zone_t *zone = zones + j; unsigned long mask = -1; - for (i = 0 ; i < NR_MEM_LISTS ; i++) { + unsigned long size; + + size = zones_size[j]; + zone->size = size; + zone->offset = offset; + zone->pages_low = freepages.low; + zone->pages_high = freepages.high; + zone->low_on_memory = 0; + + offset += size; + for (i = 0; i < MAX_ORDER; i++) { unsigned long bitmap_size; unsigned int * map; - memlist_init(&free_area[j][i].free_list); + memlist_init(&zone->free_area[i].free_list); mask += mask; - end_mem_pages = (end_mem_pages + ~mask) & mask; - bitmap_size = end_mem_pages >> i; + size = (size + ~mask) & mask; + bitmap_size = size >> i; bitmap_size = (bitmap_size + 7) >> 3; bitmap_size = LONG_ALIGN(bitmap_size); map = (unsigned int *) alloc_bootmem(bitmap_size); - free_area[j][i].map = map; + zone->free_area[i].map = map; memset((void *) map, 0, bitmap_size); } } |