slab分配object
阅读原文时间:2023年07月15日阅读:1

在numa架构下,slab分配object:

3192static __always_inline void *
3193__do_cache_alloc(struct kmem_cache *cache, gfp_t flags)
{
void *objp;


objp = ____cache_alloc(cache, flags);

/\*  

3205 * We may just have run out of memory on the local node.
3206 * ____cache_alloc_node() knows how to locate memory on other nodes
3207 */
if (!objp)
objp = ____cache_alloc_node(cache, flags, numa_mem_id());

out:
return objp;
}

首先,调用____cache_alloc来分配,该函数实现如下:

2920static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
{
void *objp;
struct array_cache *ac;
bool force_refill = false;

check\_irq\_off();

ac = cpu\_cache\_get(cachep);  
if (likely(ac->avail)) {  
    ac->touched = ;  
    objp = ac\_get\_obj(cachep, ac, flags, false);

    /\*  

2934 * Allow for the possibility all avail objects are not allowed
2935 * by the current flags
2936 */
if (objp) {
STATS_INC_ALLOCHIT(cachep);
goto out;
}
force_refill = true;
}

STATS\_INC\_ALLOCMISS(cachep);  
objp = cache\_alloc\_refill(cachep, flags, force\_refill);  
/\*  

2947 * the 'ac' may be updated by cache_alloc_refill(),
2948 * and kmemleak_erase() requires its correct value.
2949 */
ac = cpu_cache_get(cachep);
out:
/*
2954 * To avoid a false negative, if an object that is in one of the
2955 * per-CPU caches is leaked, we need to make sure kmemleak doesn't
2956 * treat the array pointers as a reference to the object.
2957 */
if (objp)
kmemleak_erase(&ac->entry[ac->avail]);
return objp;
}

1. 先从array cache里面去找,如果找到,返回,如果没找到,走到2.

2.调用cache_alloc_refill来从node的shared里去找object,或者slab的partial/free list里面获取object然后填充到cpu的array cache.

cache_alloc_refill实现如下:

2751static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags,
bool force_refill)
{
int batchcount;
struct kmem_cache_node *n;
struct array_cache *ac;
int node;

check\_irq\_off();  
node = numa\_mem\_id();  
if (unlikely(force\_refill))  
    goto force\_grow;  

2763retry:
ac = cpu_cache_get(cachep);
batchcount = ac->batchcount;
if (!ac->touched && batchcount > BATCHREFILL_LIMIT) {
/*
2768 * If there was little recent activity on this cache, then
2769 * perform only a partial refill. Otherwise we could generate
2770 * refill bouncing.
2771 */
batchcount = BATCHREFILL_LIMIT;
}
n = get_node(cachep, node);

BUG\_ON(ac->avail >  || !n);  
spin\_lock(&n->list\_lock);

/\* See if we can refill from the shared array \*/  
if (n->shared && transfer\_objects(ac, n->shared, batchcount)) {  
    n->shared->touched = ;  
    goto alloc\_done;  
}

while (batchcount > ) {  
    struct list\_head \*entry;  
    struct page \*page;  
    /\* Get slab alloc is to come from. \*/  
    entry = n->slabs\_partial.next;  
    if (entry == &n->slabs\_partial) {  
        n->free\_touched = ;  
        entry = n->slabs\_free.next;  
        if (entry == &n->slabs\_free)  
            goto must\_grow;  
    }

    page = list\_entry(entry, struct page, lru);  
    check\_spinlock\_acquired(cachep);

    /\*  

2801 * The slab was either on partial or free list so
2802 * there must be at least one object available for
2803 * allocation.
2804 */
BUG_ON(page->active >= cachep->num);

    while (page->active < cachep->num && batchcount--) {  
        STATS\_INC\_ALLOCED(cachep);  
        STATS\_INC\_ACTIVE(cachep);  
        STATS\_SET\_HIGH(cachep);

        ac\_put\_obj(cachep, ac, slab\_get\_obj(cachep, page,  
                                node));  
    }

    /\* move slabp to correct slabp list: \*/  
    list\_del(&page->lru);  
    if (page->active == cachep->num)  
        list\_add(&page->lru, &n->slabs\_full);  
    else  
        list\_add(&page->lru, &n->slabs\_partial);  
}

2824must_grow:
n->free_objects -= ac->avail;
2826alloc_done:
spin_unlock(&n->list_lock);

if (unlikely(!ac->avail)) {  
    int x;  

2831force_grow:
x = cache_grow(cachep, flags | GFP_THISNODE, node, NULL);

    /\* cache\_grow can reenable interrupts, then ac could change. \*/  
    ac = cpu\_cache\_get(cachep);  
    node = numa\_mem\_id();

    /\* no objects in sight? abort \*/  
    if (!x && (ac->avail ==  || force\_refill))  
        return NULL;

    if (!ac->avail)        /\* objects refilled by interrupt? \*/  
        goto retry;  
}  
ac->touched = ;

return ac\_get\_obj(cachep, ac, flags, force\_refill);  

}

3. 若从n->shared里面可以transfer nr(nr>0)个object,返回,分配成功。

4. 若n->shared也没有可用的object,则从slab的partial/free list里获取object,填充ac.

page->active是该slab里面已经使用的object的数量。

ac->available是ac里面可用的object的index.递减使用。

注意2825 n->free_objects -= ac->avail;  说明当ac被填充后,该ac里面的object就认为被分配出去了。

如果3和4均未成功transfer object到ac,只能重新申请slab。如cache_grow的实现:

2588static int cache_grow(struct kmem_cache *cachep,
gfp_t flags, int nodeid, struct page *page)
{
void *freelist;
size_t offset;
gfp_t local_flags;
struct kmem_cache_node *n;

/\*  

2597 * Be lazy and only check for valid flags here, keeping it out of the
2598 * critical path in kmem_cache_alloc().
2599 */
BUG_ON(flags & GFP_SLAB_BUG_MASK);
local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);

/\* Take the node list lock to change the colour\_next on this node \*/  
check\_irq\_off();  
n = get\_node(cachep, nodeid);  
spin\_lock(&n->list\_lock);

/\* Get colour for the slab, and cal the next value. \*/  
offset = n->colour\_next;  
n->colour\_next++;  
if (n->colour\_next >= cachep->colour)  
    n->colour\_next = ;  
spin\_unlock(&n->list\_lock);

offset \*= cachep->colour\_off;

if (local\_flags & \_\_GFP\_WAIT)  
    local\_irq\_enable();

/\*  

2621 * The test for missing atomic flag is performed here, rather than
2622 * the more obvious place, simply to reduce the critical path length
2623 * in kmem_cache_alloc(). If a caller is seriously mis-behaving they
2624 * will eventually be caught here (where it matters).
2625 */
kmem_flagcheck(cachep, flags);

/\*  

2629 * Get mem for the objs. Attempt to allocate a physical page from
2630 * 'nodeid'.
2631 */
if (!page)
page = kmem_getpages(cachep, local_flags, nodeid);
if (!page)
goto failed;

/\* Get slab management. \*/  
freelist = alloc\_slabmgmt(cachep, page, offset,  
        local\_flags & ~GFP\_CONSTRAINT\_MASK, nodeid);  
if (!freelist)  
    goto opps1;

slab\_map\_pages(cachep, page, freelist);

cache\_init\_objs(cachep, page);

if (local\_flags & \_\_GFP\_WAIT)  
    local\_irq\_disable();  
check\_irq\_off();  
spin\_lock(&n->list\_lock);

/\* Make slab active. \*/  
list\_add\_tail(&page->lru, &(n->slabs\_free));  
STATS\_INC\_GROWN(cachep);  
n->free\_objects += cachep->num;  
spin\_unlock(&n->list\_lock);  
return ;  

2658opps1:
kmem_freepages(cachep, page);
2660failed:
if (local_flags & __GFP_WAIT)
local_irq_disable();
return ;
}

申请完pages之后,申请slabmgmt.如下:

2445static void *alloc_slabmgmt(struct kmem_cache *cachep,
struct page *page, int colour_off,
gfp_t local_flags, int nodeid)
{
void *freelist;
void *addr = page_address(page);

if (OFF\_SLAB(cachep)) {  
    /\* Slab management obj is off-slab. \*/  
    freelist = kmem\_cache\_alloc\_node(cachep->freelist\_cache,  
                      local\_flags, nodeid);  
    if (!freelist)  
        return NULL;  
} else {  
    freelist = addr + colour\_off;  
    colour\_off += cachep->freelist\_size;  
}  
page->active = ;  
page->s\_mem = addr + colour\_off;  
return freelist;  

}

slabmgmt可以放在slab内部,也可以放在slab外部。放在slab外部的条件如下:

/\*  

2195 * Determine if the slab management is 'on' or 'off' slab.
2196 * (bootstrapping cannot cope with offslab caches so don't do
2197 * it too early on. Always use on-slab management when
2198 * SLAB_NOLEAKTRACE to avoid recursive calls into kmemleak)
2199 */
if ((size >= (PAGE_SIZE >> )) && !slab_early_init &&
!(flags & SLAB_NOLEAKTRACE))
/*
2203 * Size is large, assume best to place the slab management obj
2204 * off-slab (should allow better packing of objs).
2205 */
flags |= CFLGS_OFF_SLAB;

colour_off

freelist_size

obj…

如果在管理节点在slab内部,结构图如上。如果开启了CONFIG_DEBUG_SLAB_LEAK宏,freelist_size后面还会有每个object的状态。

然后初始化page和object。

手机扫一扫

移动阅读更方便

阿里云服务器
腾讯云服务器
七牛云服务器

你可能感兴趣的文章