diff options
Diffstat (limited to 'mm/kmalloc.c')
-rw-r--r-- | mm/kmalloc.c | 663 |
1 files changed, 376 insertions, 287 deletions
diff --git a/mm/kmalloc.c b/mm/kmalloc.c index e288ecf2f..d0193f02b 100644 --- a/mm/kmalloc.c +++ b/mm/kmalloc.c @@ -10,44 +10,42 @@ /* * Modified by Alex Bligh (alex@cconcepts.co.uk) 4 Apr 1994 to use multiple * pages. So for 'page' throughout, read 'area'. + * + * Largely rewritten.. Linus */ #include <linux/mm.h> -#include <asm/system.h> #include <linux/delay.h> +#include <linux/interrupt.h> -#define GFP_LEVEL_MASK 0xf - -/* I want this low enough for a while to catch errors. - I want this number to be increased in the near future: - loadable device drivers should use this function to get memory */ - -#define MAX_KMALLOC_K ((PAGE_SIZE<<(NUM_AREA_ORDERS-1))>>10) - - -/* This defines how many times we should try to allocate a free page before - giving up. Normally this shouldn't happen at all. */ -#define MAX_GET_FREE_PAGE_TRIES 4 +#include <asm/system.h> +#include <asm/dma.h> +#ifdef __mips__ +#include <asm/sgidefs.h> +#endif +/* Define this if you want slow routines that try to trip errors */ +#undef SADISTIC_KMALLOC /* Private flags. */ #define MF_USED 0xffaa0055 +#define MF_DMA 0xff00aa55 #define MF_FREE 0x0055ffaa -/* +/* * Much care has gone into making these routines in this file reentrant. * * The fancy bookkeeping of nbytesmalloced and the like are only used to - * report them to the user (oooohhhhh, aaaaahhhhh....) are not + * report them to the user (oooohhhhh, aaaaahhhhh....) are not * protected by cli(). (If that goes wrong. So what?) * * These routines restore the interrupt status to allow calling with ints - * off. + * off. */ -/* +/* * A block header. This is in front of every malloc-block, whether free or not. */ struct block_header { @@ -64,8 +62,8 @@ struct block_header { #define BH(p) ((struct block_header *)(p)) -/* - * The page descriptor is at the front of every page that malloc has in use. +/* + * The page descriptor is at the front of every page that malloc has in use. */ struct page_descriptor { struct page_descriptor *next; @@ -84,324 +82,415 @@ struct page_descriptor { */ struct size_descriptor { struct page_descriptor *firstfree; - struct page_descriptor *dmafree; /* DMA-able memory */ - int size; + struct page_descriptor *dmafree; /* DMA-able memory */ int nblocks; int nmallocs; int nfrees; int nbytesmalloced; int npages; - unsigned long gfporder; /* number of pages in the area required */ + unsigned long gfporder; /* number of pages in the area required */ }; /* - * For now it is unsafe to allocate bucket sizes between n & n=16 where n is - * 4096 * any power of two + * For now it is unsafe to allocate bucket sizes between n and + * n-sizeof(page_descriptor) where n is PAGE_SIZE * any power of two */ -#if PAGE_SIZE == 4096 -struct size_descriptor sizes[] = { - { NULL, NULL, 32,127, 0,0,0,0, 0}, - { NULL, NULL, 64, 63, 0,0,0,0, 0 }, - { NULL, NULL, 128, 31, 0,0,0,0, 0 }, - { NULL, NULL, 252, 16, 0,0,0,0, 0 }, - { NULL, NULL, 508, 8, 0,0,0,0, 0 }, - { NULL, NULL,1020, 4, 0,0,0,0, 0 }, - { NULL, NULL,2040, 2, 0,0,0,0, 0 }, - { NULL, NULL,4096-16, 1, 0,0,0,0, 0 }, - { NULL, NULL,8192-16, 1, 0,0,0,0, 1 }, - { NULL, NULL,16384-16, 1, 0,0,0,0, 2 }, - { NULL, NULL,32768-16, 1, 0,0,0,0, 3 }, - { NULL, NULL,65536-16, 1, 0,0,0,0, 4 }, - { NULL, NULL,131072-16, 1, 0,0,0,0, 5 }, - { NULL, NULL, 0, 0, 0,0,0,0, 0 } +#if PAGE_SIZE == 4096 && defined (__mips__) && \ + ((_MIPS_ISA == _MIPS_ISA_MIPS2) || \ + (_MIPS_ISA == _MIPS_ISA_MIPS3) || \ + (_MIPS_ISA == _MIPS_ISA_MIPS4)) +static const unsigned int blocksize[] = { + /* + * For MIPS II we need this hacked descriptor table to get + * doubleword alignment. Otherwise the scheduler and other code + * that use doublewords will bomb. + */ + 32, + 64, + 128, + 248, + 504, + 1016, + 2040, + 4096 - 16, + 8192 - 16, + 16384 - 16, + 32768 - 16, + 65536 - 16, + 131072 - 16, + 0 +}; + +static struct size_descriptor sizes[] = +{ + {NULL, NULL, 127, 0, 0, 0, 0, 0}, + {NULL, NULL, 63, 0, 0, 0, 0, 0}, + {NULL, NULL, 31, 0, 0, 0, 0, 0}, + {NULL, NULL, 16, 0, 0, 0, 0, 0}, + {NULL, NULL, 8, 0, 0, 0, 0, 0}, + {NULL, NULL, 4, 0, 0, 0, 0, 0}, + {NULL, NULL, 2, 0, 0, 0, 0, 0}, + {NULL, NULL, 1, 0, 0, 0, 0, 0}, + {NULL, NULL, 1, 0, 0, 0, 0, 1}, + {NULL, NULL, 1, 0, 0, 0, 0, 2}, + {NULL, NULL, 1, 0, 0, 0, 0, 3}, + {NULL, NULL, 1, 0, 0, 0, 0, 4}, + {NULL, NULL, 1, 0, 0, 0, 0, 5}, + {NULL, NULL, 0, 0, 0, 0, 0, 0} +}; +#elif PAGE_SIZE == 4096 +static const unsigned int blocksize[] = { + 32, + 64, + 128, + 252, + 508, + 1020, + 2040, + 4096 - 16, + 8192 - 16, + 16384 - 16, + 32768 - 16, + 65536 - 16, + 131072 - 16, + 0 +}; + +static struct size_descriptor sizes[] = +{ + {NULL, NULL, 127, 0, 0, 0, 0, 0}, + {NULL, NULL, 63, 0, 0, 0, 0, 0}, + {NULL, NULL, 31, 0, 0, 0, 0, 0}, + {NULL, NULL, 16, 0, 0, 0, 0, 0}, + {NULL, NULL, 8, 0, 0, 0, 0, 0}, + {NULL, NULL, 4, 0, 0, 0, 0, 0}, + {NULL, NULL, 2, 0, 0, 0, 0, 0}, + {NULL, NULL, 1, 0, 0, 0, 0, 0}, + {NULL, NULL, 1, 0, 0, 0, 0, 1}, + {NULL, NULL, 1, 0, 0, 0, 0, 2}, + {NULL, NULL, 1, 0, 0, 0, 0, 3}, + {NULL, NULL, 1, 0, 0, 0, 0, 4}, + {NULL, NULL, 1, 0, 0, 0, 0, 5}, + {NULL, NULL, 0, 0, 0, 0, 0, 0} }; #elif PAGE_SIZE == 8192 -struct size_descriptor sizes[] = { - { NULL, NULL, 64,127, 0,0,0,0, 0}, - { NULL, NULL, 128, 63, 0,0,0,0, 0 }, - { NULL, NULL, 248, 31, 0,0,0,0, 0 }, - { NULL, NULL, 504, 16, 0,0,0,0, 0 }, - { NULL, NULL,1016, 8, 0,0,0,0, 0 }, - { NULL, NULL,2040, 4, 0,0,0,0, 0 }, - { NULL, NULL,4080, 2, 0,0,0,0, 0 }, - { NULL, NULL,8192-32, 1, 0,0,0,0, 0 }, - { NULL, NULL,16384-32, 1, 0,0,0,0, 1 }, - { NULL, NULL,32768-32, 1, 0,0,0,0, 2 }, - { NULL, NULL,65536-32, 1, 0,0,0,0, 3 }, - { NULL, NULL,131072-32, 1, 0,0,0,0, 4 }, - { NULL, NULL,262144-32, 1, 0,0,0,0, 5 }, - { NULL, NULL, 0, 0, 0,0,0,0, 0 } +static const unsigned int blocksize[] = { + 64, + 128, + 248, + 504, + 1016, + 2040, + 4080, + 8192 - 32, + 16384 - 32, + 32768 - 32, + 65536 - 32, + 131072 - 32, + 262144 - 32, + 0 +}; + +struct size_descriptor sizes[] = +{ + {NULL, NULL, 127, 0, 0, 0, 0, 0}, + {NULL, NULL, 63, 0, 0, 0, 0, 0}, + {NULL, NULL, 31, 0, 0, 0, 0, 0}, + {NULL, NULL, 16, 0, 0, 0, 0, 0}, + {NULL, NULL, 8, 0, 0, 0, 0, 0}, + {NULL, NULL, 4, 0, 0, 0, 0, 0}, + {NULL, NULL, 2, 0, 0, 0, 0, 0}, + {NULL, NULL, 1, 0, 0, 0, 0, 0}, + {NULL, NULL, 1, 0, 0, 0, 0, 1}, + {NULL, NULL, 1, 0, 0, 0, 0, 2}, + {NULL, NULL, 1, 0, 0, 0, 0, 3}, + {NULL, NULL, 1, 0, 0, 0, 0, 4}, + {NULL, NULL, 1, 0, 0, 0, 0, 5}, + {NULL, NULL, 0, 0, 0, 0, 0, 0} }; #else #error you need to make a version for your pagesize #endif #define NBLOCKS(order) (sizes[order].nblocks) -#define BLOCKSIZE(order) (sizes[order].size) +#define BLOCKSIZE(order) (blocksize[order]) #define AREASIZE(order) (PAGE_SIZE<<(sizes[order].gfporder)) + - -long kmalloc_init (long start_mem,long end_mem) +long kmalloc_init(long start_mem, long end_mem) { int order; -/* +/* * Check the static info array. Things will blow up terribly if it's * incorrect. This is a late "compile time" check..... */ -for (order = 0;BLOCKSIZE(order);order++) - { - if ((NBLOCKS (order)*BLOCKSIZE(order) + sizeof (struct page_descriptor)) > - AREASIZE(order)) - { - printk ("Cannot use %d bytes out of %d in order = %d block mallocs\n", - (int) (NBLOCKS (order) * BLOCKSIZE(order) + - sizeof (struct page_descriptor)), - (int) AREASIZE(order), - BLOCKSIZE (order)); - panic ("This only happens if someone messes with kmalloc"); - } - } -return start_mem; + for (order = 0; BLOCKSIZE(order); order++) { + if ((NBLOCKS(order) * BLOCKSIZE(order) + sizeof(struct page_descriptor)) > + AREASIZE(order)) { + printk("Cannot use %d bytes out of %d in order = %d block mallocs\n", + (int) (NBLOCKS(order) * BLOCKSIZE(order) + + sizeof(struct page_descriptor)), + (int) AREASIZE(order), + BLOCKSIZE(order)); + panic("This only happens if someone messes with kmalloc"); + } + } + return start_mem; } +/* + * Create a small cache of page allocations: this helps a bit with + * those pesky 8kB+ allocations for NFS when we're temporarily + * out of memory.. + * + * This is a _truly_ small cache, we just cache one single page + * order (for orders 0, 1 and 2, that is 4, 8 and 16kB on x86). + */ +#define MAX_CACHE_ORDER 3 +struct page_descriptor * kmalloc_cache[MAX_CACHE_ORDER]; - -int get_order (int size) +static inline struct page_descriptor * get_kmalloc_pages(unsigned long priority, + unsigned long order, int dma) { - int order; + return (struct page_descriptor *) __get_free_pages(priority, order, dma); +} - /* Add the size of the header */ - size += sizeof (struct block_header); - for (order = 0;BLOCKSIZE(order);order++) - if (size <= BLOCKSIZE (order)) - return order; - return -1; +static inline void free_kmalloc_pages(struct page_descriptor * page, + unsigned long order, int dma) +{ + if (!dma && order < MAX_CACHE_ORDER) { + page = xchg(kmalloc_cache+order, page); + if (!page) + return; + } + free_pages((unsigned long) page, order); } -void * kmalloc (size_t size, int priority) +/* + * Ugh, this is ugly, but we want the default case to run + * straight through, which is why we have the ugly goto's + */ +void *kmalloc(size_t size, int priority) { unsigned long flags; - int order,tries,i,sz; - int dma_flag; + unsigned long type; + int order, dma; struct block_header *p; - struct page_descriptor *page; + struct page_descriptor *page, **pg; + struct size_descriptor *bucket = sizes; + + /* Get order */ + order = 0; + { + unsigned int realsize = size + sizeof(struct block_header); + for (;;) { + int ordersize = BLOCKSIZE(order); + if (realsize <= ordersize) + break; + order++; + bucket++; + if (ordersize) + continue; + printk("kmalloc of too large a block (%d bytes).\n", (int) size); + return NULL; + } + } + + dma = 0; + type = MF_USED; + pg = &bucket->firstfree; + if (priority & GFP_DMA) { + dma = 1; + type = MF_DMA; + pg = &bucket->dmafree; + } - dma_flag = (priority & GFP_DMA); priority &= GFP_LEVEL_MASK; - + /* Sanity check... */ if (intr_count && priority != GFP_ATOMIC) { static int count = 0; if (++count < 5) { printk("kmalloc called nonatomically from interrupt %p\n", - __builtin_return_address(0)); + return_address()); priority = GFP_ATOMIC; } } -order = get_order (size); -if (order < 0) - { - printk ("kmalloc of too large a block (%d bytes).\n",(int) size); - return (NULL); - } - -save_flags(flags); - -/* It seems VERY unlikely to me that it would be possible that this - loop will get executed more than once. */ -tries = MAX_GET_FREE_PAGE_TRIES; -while (tries --) - { - /* Try to allocate a "recently" freed memory block */ - cli (); - if ((page = (dma_flag ? sizes[order].dmafree : sizes[order].firstfree)) && - (p = page->firstfree)) - { - if (p->bh_flags == MF_FREE) - { - page->firstfree = p->bh_next; - page->nfree--; - if (!page->nfree) - { - if(dma_flag) - sizes[order].dmafree = page->next; - else - sizes[order].firstfree = page->next; - page->next = NULL; - } - restore_flags(flags); - - sizes [order].nmallocs++; - sizes [order].nbytesmalloced += size; - p->bh_flags = MF_USED; /* As of now this block is officially in use */ - p->bh_length = size; - return p+1; /* Pointer arithmetic: increments past header */ - } - printk ("Problem: block on freelist at %08lx isn't free.\n",(long)p); - return (NULL); - } - restore_flags(flags); - - - /* Now we're in trouble: We need to get a new free page..... */ - - sz = BLOCKSIZE(order); /* sz is the size of the blocks we're dealing with */ - - /* This can be done with ints on: This is private to this invocation */ - if (dma_flag) - page = (struct page_descriptor *) __get_dma_pages (priority & GFP_LEVEL_MASK, sizes[order].gfporder); - else - page = (struct page_descriptor *) __get_free_pages (priority & GFP_LEVEL_MASK, sizes[order].gfporder); - - if (!page) { - static unsigned long last = 0; - if (last + 10*HZ < jiffies) { - last = jiffies; - printk ("Couldn't get a free page.....\n"); - } - return NULL; - } -#if 0 - printk ("Got page %08x to use for %d byte mallocs....",(long)page,sz); -#endif - sizes[order].npages++; - - /* Loop for all but last block: */ - for (i=NBLOCKS(order),p=BH (page+1);i > 1;i--,p=p->bh_next) - { - p->bh_flags = MF_FREE; - p->bh_next = BH ( ((long)p)+sz); - } - /* Last block: */ - p->bh_flags = MF_FREE; - p->bh_next = NULL; - - page->order = order; - page->nfree = NBLOCKS(order); - page->firstfree = BH(page+1); -#if 0 - printk ("%d blocks per page\n",page->nfree); + save_flags(flags); + cli(); + page = *pg; + if (!page) + goto no_bucket_page; + + p = page->firstfree; + if (p->bh_flags != MF_FREE) + goto not_free_on_freelist; + +found_it: + page->firstfree = p->bh_next; + page->nfree--; + if (!page->nfree) + *pg = page->next; + restore_flags(flags); + bucket->nmallocs++; + bucket->nbytesmalloced += size; + p->bh_flags = type; /* As of now this block is officially in use */ + p->bh_length = size; +#ifdef SADISTIC_KMALLOC + memset(p+1, 0xf0, size); #endif - /* Now we're going to muck with the "global" freelist for this size: - this should be uninterruptible */ - cli (); - /* - * sizes[order].firstfree used to be NULL, otherwise we wouldn't be - * here, but you never know.... - */ - if (dma_flag) { - page->next = sizes[order].dmafree; - sizes[order].dmafree = page; - } else { - page->next = sizes[order].firstfree; - sizes[order].firstfree = page; - } - restore_flags(flags); - } - -/* Pray that printk won't cause this to happen again :-) */ - -printk ("Hey. This is very funny. I tried %d times to allocate a whole\n" - "new page for an object only %d bytes long, but some other process\n" - "beat me to actually allocating it. Also note that this 'error'\n" - "message is soooo very long to catch your attention. I'd appreciate\n" - "it if you'd be so kind as to report what conditions caused this to\n" - "the author of this kmalloc: wolff@dutecai.et.tudelft.nl.\n" - "(Executive summary: This can't happen)\n", - MAX_GET_FREE_PAGE_TRIES, - (int) size); -return NULL; + return p + 1; /* Pointer arithmetic: increments past header */ + + +no_bucket_page: + /* + * If we didn't find a page already allocated for this + * bucket size, we need to get one.. + * + * This can be done with ints on: it is private to this invocation + */ + restore_flags(flags); + + { + int i, sz; + + /* sz is the size of the blocks we're dealing with */ + sz = BLOCKSIZE(order); + + page = get_kmalloc_pages(priority, bucket->gfporder, dma); + if (!page) + goto no_free_page; +found_cached_page: + + bucket->npages++; + + page->order = order; + /* Loop for all but last block: */ + i = (page->nfree = bucket->nblocks) - 1; + p = BH(page + 1); + while (i > 0) { + i--; + p->bh_flags = MF_FREE; + p->bh_next = BH(((long) p) + sz); + p = p->bh_next; + } + /* Last block: */ + p->bh_flags = MF_FREE; + p->bh_next = NULL; + + p = BH(page+1); + } + + /* + * Now we're going to muck with the "global" freelist + * for this size: this should be uninterruptible + */ + cli(); + page->next = *pg; + *pg = page; + goto found_it; + + +no_free_page: + /* + * No free pages, check the kmalloc cache of + * pages to see if maybe we have something available + */ + if (!dma && order < MAX_CACHE_ORDER) { + page = xchg(kmalloc_cache+order, page); + if (page) + goto found_cached_page; + } + { + static unsigned long last = 0; + if (priority != GFP_BUFFER && (last + 10 * HZ < jiffies)) { + last = jiffies; + printk("Couldn't get a free page.....\n"); + } + return NULL; + } + +not_free_on_freelist: + restore_flags(flags); + printk("Problem: block on freelist at %08lx isn't free.\n", (long) p); + return NULL; } -void kfree_s (void *ptr,int size) +void kfree(void *__ptr) { -unsigned long flags; -int order; -register struct block_header *p=((struct block_header *)ptr) -1; -struct page_descriptor *page,*pg2; - -page = PAGE_DESC (p); -order = page->order; -if ((order < 0) || - (order > sizeof (sizes)/sizeof (sizes[0])) || - (((long)(page->next)) & ~PAGE_MASK) || - (p->bh_flags != MF_USED)) - { - printk ("kfree of non-kmalloced memory: %p, next= %p, order=%d\n", - p, page->next, page->order); - return; - } -if (size && - size != p->bh_length) - { - printk ("Trying to free pointer at %p with wrong size: %d instead of %lu.\n", - p,size,p->bh_length); - return; - } -size = p->bh_length; -p->bh_flags = MF_FREE; /* As of now this block is officially free */ -save_flags(flags); -cli (); -p->bh_next = page->firstfree; -page->firstfree = p; -page->nfree ++; - -if (page->nfree == 1) - { /* Page went from full to one free block: put it on the freelist. Do not bother - trying to put it on the DMA list. */ - if (page->next) - { - printk ("Page %p already on freelist dazed and confused....\n", page); - } - else - { - page->next = sizes[order].firstfree; - sizes[order].firstfree = page; - } - } - -/* If page is completely free, free it */ -if (page->nfree == NBLOCKS (page->order)) - { -#if 0 - printk ("Freeing page %08x.\n", (long)page); + int dma; + unsigned long flags; + unsigned int order; + struct page_descriptor *page, **pg; + struct size_descriptor *bucket; + + if (!__ptr) + goto null_kfree; +#define ptr ((struct block_header *) __ptr) + page = PAGE_DESC(ptr); + __ptr = ptr - 1; + if (~PAGE_MASK & (unsigned long)page->next) + goto bad_order; + order = page->order; + if (order >= sizeof(sizes) / sizeof(sizes[0])) + goto bad_order; + bucket = sizes + order; + dma = 0; + pg = &bucket->firstfree; + if (ptr->bh_flags == MF_DMA) { + dma = 1; + ptr->bh_flags = MF_USED; + pg = &bucket->dmafree; + } + if (ptr->bh_flags != MF_USED) + goto bad_order; + ptr->bh_flags = MF_FREE; /* As of now this block is officially free */ +#ifdef SADISTIC_KMALLOC + memset(ptr+1, 0x0e, ptr->bh_length); #endif - if (sizes[order].firstfree == page) - { - sizes[order].firstfree = page->next; - } - else if (sizes[order].dmafree == page) - { - sizes[order].dmafree = page->next; - } - else - { - for (pg2=sizes[order].firstfree; - (pg2 != NULL) && (pg2->next != page); - pg2=pg2->next) - /* Nothing */; - if (!pg2) - for (pg2=sizes[order].dmafree; - (pg2 != NULL) && (pg2->next != page); - pg2=pg2->next) - /* Nothing */; - if (pg2 != NULL) - pg2->next = page->next; - else - printk ("Ooops. page %p doesn't show on freelist.\n", page); - } -/* FIXME: I'm sure we should do something with npages here (like npages--) */ - free_pages ((long)page, sizes[order].gfporder); - } -restore_flags(flags); - -/* FIXME: ?? Are these increment & decrement operations guaranteed to be - * atomic? Could an IRQ not occur between the read & the write? - * Maybe yes on a x86 with GCC...?? - */ -sizes[order].nfrees++; /* Noncritical (monitoring) admin stuff */ -sizes[order].nbytesmalloced -= size; + save_flags(flags); + cli(); + + bucket->nfrees++; + bucket->nbytesmalloced -= ptr->bh_length; + + ptr->bh_next = page->firstfree; + page->firstfree = ptr; + if (!page->nfree++) { +/* Page went from full to one free block: put it on the freelist. */ + if (bucket->nblocks == 1) + goto free_page; + page->next = *pg; + *pg = page; + } +/* If page is completely free, free it */ + if (page->nfree == bucket->nblocks) { + for (;;) { + struct page_descriptor *tmp = *pg; + if (!tmp) + goto not_on_freelist; + if (tmp == page) + break; + pg = &tmp->next; + } + *pg = page->next; +free_page: + bucket->npages--; + free_kmalloc_pages(page, bucket->gfporder, dma); + } + restore_flags(flags); +null_kfree: + return; + +bad_order: + printk("kfree of non-kmalloced memory: %p, next= %p, order=%d\n", + ptr+1, page->next, page->order); + return; + +not_on_freelist: + printk("Ooops. page %p doesn't show on freelist.\n", page); + restore_flags(flags); } |