kernel hacker修炼之道之内存管理-SLUB(创建SLUB高速缓存kmem_cache_create())

tomhibolu

浏览: 1431420 次

最近访客更多访客>>

monkeytear

dreamtan

phight

linuxtiandi0001

博主相关

博客

微博

相册

留言

关于我

文章分类

全部博客 (1655)

社区版块

存档分类

创建SLUB高速缓存kmem_cache_create()

slab块内的对象是一个挨一个存放的，每个对象占用的空间主要包含两部分：对象本身和下一个空闲对象指针。依据空闲对象指针的位置，对象可分为两种：外置式和内置式。

先看外置式对象，如下图所示。指针位于对象的后面。对象还包括两个对齐用空间，word对齐是为了使后面的指针是word对齐的，obj对齐是为了使后面的对象按指定方式对齐。

再来看内置式对象，如下图所示。指针位于对象的头部，与对象共用存储空间。这是因为对象被分配出去之前，其存储空间是空闲的可用状态，可用于存放空闲对象指针。对象被分配出去后，也不再需要这个指针了，可以被对象内容覆盖。同理，对象释放时，不再使用对象内容，其空间可以用于存放指针。

如果在分配之前或释放之后，使用了对象的存储空间，那么就不能使用内置式指针，必须使用外置式指针，比如：

1）对象构造函数不为空：这样调用构造函数时，对象的存储空间被构造函数初始化。

2）使能了某些调试标记位：比如__OBJECT_POISON标记位，这样对象的存储空间会被初始化为固定的值，以利于调试。

struct kmem_cache *kmem_cache_create(const char *name, size_t size,
size_t align, unsigned long flags, void (*ctor)(void *))
{
struct kmem_cache *s;
char *n;
if (WARN_ON(!name))
return NULL;
down_write(&slub_lock);
/*查看是否可以复用*/
s = find_mergeable(size, align, flags, name, ctor);
if (s) {
/*如果可以复用就增加cache的引用计数*/
s->refcount++;
/*
* Adjust the object sizes so that we clear
* the complete object on kzalloc.
*/
/*cache对象的大小是cache中对象的大小和请求的对象的大小中最大的那个*/
s->objsize = max(s->objsize, (int)size);
s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *)));
if (sysfs_slab_alias(s, name)) {
s->refcount--;
goto err;
}
up_write(&slub_lock);
/*返回找到的cache*/
return s;
}
n = kstrdup(name, GFP_KERNEL);
if (!n)
goto err;
/*运行到这里说明不能复用，分配cache的描述符*/
s = kmalloc(kmem_size, GFP_KERNEL);
if (s) {
/*如果分配成功初始化cache，并把cache添加到slab_caches链表上*/
if (kmem_cache_open(s, n,
size, align, flags, ctor)) {
list_add(&s->list, &slab_caches);
if (sysfs_slab_add(s)) {
list_del(&s->list);
kfree(n);
kfree(s);
goto err;
}
up_write(&slub_lock);
return s;
}
kfree(n);
kfree(s);
}
err:
up_write(&slub_lock);
if (flags & SLAB_PANIC)
panic("Cannot create slabcache %s\n", name);
else
s = NULL;
return s;
}
EXPORT_SYMBOL(kmem_cache_create);

static struct kmem_cache *find_mergeable(size_t size,
size_t align, unsigned long flags, const char *name,
void (*ctor)(void *))
{
struct kmem_cache *s;
/*如果已经设置了不能复用标志则返回NULL*/
if (slub_nomerge || (flags & SLUB_NEVER_MERGE))
return NULL;
/*如果有构造函数返回NULL*/
if (ctor)
return NULL;
/*计算word对齐后的对象大小*/
size = ALIGN(size, sizeof(void *));
/*计算object对齐大小*/
align = calculate_alignment(flags, align, size);
size = ALIGN(size, align);
flags = kmem_cache_flags(size, flags, name, NULL);
/*找到合适的可以复用的cache*/
list_for_each_entry(s, &slab_caches, list) {
/*如果不满足复用的条件，跳过*/
if (slab_unmergeable(s))
continue;

if (size > s->size)
continue;

if ((flags & SLUB_MERGE_SAME) != (s->flags & SLUB_MERGE_SAME))
continue;
/*
* Check if alignment is compatible.
* Courtesy of Adrian Drzewiecki
*/
if ((s->size & ~(align - 1)) != s->size)
continue;

/*如果原来有cache的对象大小大于等于请求的大小，大的这部分小于sizeof(void*)，则可以满足条件*/
if (s->size - size >= sizeof(void *))
continue;

return s;
}
return NULL;
}

static int kmem_cache_open(struct kmem_cache *s,
const char *name, size_t size,
size_t align, unsigned long flags,
void (*ctor)(void *))
{
/*设置cache描述符的相应成员*/
memset(s, 0, kmem_size);
s->name = name;
s->ctor = ctor;
s->objsize = size;
s->align = align;
s->flags = kmem_cache_flags(size, flags, name, ctor);
s->reserved = 0;

if (need_reserve_slab_rcu && (s->flags & SLAB_DESTROY_BY_RCU))
s->reserved = sizeof(struct rcu_head);
/*计算内碎片的大小，页面的数量，对象的个数*/
if (!calculate_sizes(s, -1))
goto error;
if (disable_higher_order_debug) {
/*
* Disable debugging flags that store metadata if the min slab
* order increased.
*/
if (get_order(s->size) > get_order(s->objsize)) {
s->flags &= ~DEBUG_METADATA_FLAGS;
s->offset = 0;
if (!calculate_sizes(s, -1))
goto error;
}
}

/*
* The larger the object size is, the more pages we want on the partial
* list to avoid pounding the page allocator excessively.
*/
set_min_partial(s, ilog2(s->size));
/*新创建的,则引用计数置1*/
s->refcount = 1;
#ifdef CONFIG_NUMA
s->remote_node_defrag_ratio = 1000;
#endif
/*初始化节点相关的结构*/
if (!init_kmem_cache_nodes(s))
goto error;
/*初始化cpu local slab*/
if (alloc_kmem_cache_cpus(s))
return 1;

free_kmem_cache_nodes(s);
error:
if (flags & SLAB_PANIC)
panic("Cannot create slab %s size=%lu realsize=%u "
"order=%u offset=%u flags=%lx\n",
s->name, (unsigned long)size, s->size, oo_order(s->oo),
s->offset, flags);
return 0;
}

static int calculate_sizes(struct kmem_cache *s, int forced_order)
{
unsigned long flags = s->flags;
unsigned long size = s->objsize;
unsigned long align = s->align;
int order;

/*
* Round up object size to the next word boundary. We can only
* place the free pointer at word boundaries and this determines
* the possible location of the free pointer.
*/
/*对齐到word*/
size = ALIGN(size, sizeof(void *));

#ifdef CONFIG_SLUB_DEBUG
/*
* Determine if we can poison the object itself. If the user of
* the slab may touch the object after free or before allocation
* then we should never poison the object itself.
*/
if ((flags & SLAB_POISON) && !(flags & SLAB_DESTROY_BY_RCU) &&
!s->ctor)
s->flags |= __OBJECT_POISON;
else
s->flags &= ~__OBJECT_POISON;

/*
* If we are Redzoning then check if there is some space between the
* end of the object and the free pointer. If not then add an
* additional word to have some bytes to store Redzone information.
*/
if ((flags & SLAB_RED_ZONE) && size == s->objsize)
size += sizeof(void *);
#endif

/*
* With that we have determined the number of bytes in actual use
* by the object. This is the potential offset to the free pointer.
*/
s->inuse = size;

if (((flags & (SLAB_DESTROY_BY_RCU | SLAB_POISON)) ||
s->ctor)) {
/*
* Relocate free pointer after the object if it is not
* permitted to overwrite the first word of the object on
* kmem_cache_free.
*
* This is the case if we do RCU, have a constructor or
* destructor or are poisoning the objects.
*/
s->offset = size;
size += sizeof(void *);
}

#ifdef CONFIG_SLUB_DEBUG
if (flags & SLAB_STORE_USER)
/*
* Need to store information about allocs and frees after
* the object.
*/
size += 2 * sizeof(struct track);

if (flags & SLAB_RED_ZONE)
/*
* Add some empty padding so that we can catch
* overwrites from earlier objects rather than let
* tracking information or the free pointer be
* corrupted if a user writes before the start
* of the object.
*/
size += sizeof(void *);
#endif

/*
* Determine the alignment based on various parameters that the
* user specified and the dynamic determination of cache line size
* on bootup.
*/
/*计算object对齐的大小*/
align = calculate_alignment(flags, align, s->objsize);
s->align = align;

/*
* SLUB stores one object immediately after another beginning from
* offset 0. In order to align the objects we have to simply size
* each object to conform to the alignment.
*/
size = ALIGN(size, align);
s->size = size;
/*如果指定了order就用指定的，否则自己计算一个合适的*/
if (forced_order >= 0)
order = forced_order;
else
order = calculate_order(size, s->reserved);

if (order < 0)
return 0;

s->allocflags = 0;
if (order)
s->allocflags |= __GFP_COMP;

if (s->flags & SLAB_CACHE_DMA)
s->allocflags |= SLUB_DMA;

if (s->flags & SLAB_RECLAIM_ACCOUNT)
s->allocflags |= __GFP_RECLAIMABLE;
/*
* Determine the number of objects per slab
*/
/*oo成员的高位保存页面的order，低位保存对象的个数。*/
s->oo = oo_make(order, size, s->reserved);
s->min = oo_make(get_order(size), size, s->reserved);
if (oo_objects(s->oo) > oo_objects(s->max))
s->max = s->oo;

return !!oo_objects(s->oo);
}