在numa架构下,slab分配object:
3192static __always_inline void *
3193__do_cache_alloc(struct kmem_cache *cache, gfp_t flags)
{
void *objp;
…
objp = ____cache_alloc(cache, flags);
/\*
3205 * We may just have run out of memory on the local node.
3206 * ____cache_alloc_node() knows how to locate memory on other nodes
3207 */
if (!objp)
objp = ____cache_alloc_node(cache, flags, numa_mem_id());
out:
return objp;
}
首先,调用____cache_alloc来分配,该函数实现如下:
2920static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
{
void *objp;
struct array_cache *ac;
bool force_refill = false;
check\_irq\_off();
ac = cpu\_cache\_get(cachep);
if (likely(ac->avail)) {
ac->touched = ;
objp = ac\_get\_obj(cachep, ac, flags, false);
/\*
2934 * Allow for the possibility all avail objects are not allowed
2935 * by the current flags
2936 */
if (objp) {
STATS_INC_ALLOCHIT(cachep);
goto out;
}
force_refill = true;
}
STATS\_INC\_ALLOCMISS(cachep);
objp = cache\_alloc\_refill(cachep, flags, force\_refill);
/\*
2947 * the 'ac' may be updated by cache_alloc_refill(),
2948 * and kmemleak_erase() requires its correct value.
2949 */
ac = cpu_cache_get(cachep);
out:
/*
2954 * To avoid a false negative, if an object that is in one of the
2955 * per-CPU caches is leaked, we need to make sure kmemleak doesn't
2956 * treat the array pointers as a reference to the object.
2957 */
if (objp)
kmemleak_erase(&ac->entry[ac->avail]);
return objp;
}
1. 先从array cache里面去找,如果找到,返回,如果没找到,走到2.
2.调用cache_alloc_refill来从node的shared里去找object,或者slab的partial/free list里面获取object然后填充到cpu的array cache.
cache_alloc_refill实现如下:
2751static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags,
bool force_refill)
{
int batchcount;
struct kmem_cache_node *n;
struct array_cache *ac;
int node;
check\_irq\_off();
node = numa\_mem\_id();
if (unlikely(force\_refill))
goto force\_grow;
2763retry:
ac = cpu_cache_get(cachep);
batchcount = ac->batchcount;
if (!ac->touched && batchcount > BATCHREFILL_LIMIT) {
/*
2768 * If there was little recent activity on this cache, then
2769 * perform only a partial refill. Otherwise we could generate
2770 * refill bouncing.
2771 */
batchcount = BATCHREFILL_LIMIT;
}
n = get_node(cachep, node);
BUG\_ON(ac->avail > || !n);
spin\_lock(&n->list\_lock);
/\* See if we can refill from the shared array \*/
if (n->shared && transfer\_objects(ac, n->shared, batchcount)) {
n->shared->touched = ;
goto alloc\_done;
}
while (batchcount > ) {
struct list\_head \*entry;
struct page \*page;
/\* Get slab alloc is to come from. \*/
entry = n->slabs\_partial.next;
if (entry == &n->slabs\_partial) {
n->free\_touched = ;
entry = n->slabs\_free.next;
if (entry == &n->slabs\_free)
goto must\_grow;
}
page = list\_entry(entry, struct page, lru);
check\_spinlock\_acquired(cachep);
/\*
2801 * The slab was either on partial or free list so
2802 * there must be at least one object available for
2803 * allocation.
2804 */
BUG_ON(page->active >= cachep->num);
while (page->active < cachep->num && batchcount--) {
STATS\_INC\_ALLOCED(cachep);
STATS\_INC\_ACTIVE(cachep);
STATS\_SET\_HIGH(cachep);
ac\_put\_obj(cachep, ac, slab\_get\_obj(cachep, page,
node));
}
/\* move slabp to correct slabp list: \*/
list\_del(&page->lru);
if (page->active == cachep->num)
list\_add(&page->lru, &n->slabs\_full);
else
list\_add(&page->lru, &n->slabs\_partial);
}
2824must_grow:
n->free_objects -= ac->avail;
2826alloc_done:
spin_unlock(&n->list_lock);
if (unlikely(!ac->avail)) {
int x;
2831force_grow:
x = cache_grow(cachep, flags | GFP_THISNODE, node, NULL);
/\* cache\_grow can reenable interrupts, then ac could change. \*/
ac = cpu\_cache\_get(cachep);
node = numa\_mem\_id();
/\* no objects in sight? abort \*/
if (!x && (ac->avail == || force\_refill))
return NULL;
if (!ac->avail) /\* objects refilled by interrupt? \*/
goto retry;
}
ac->touched = ;
return ac\_get\_obj(cachep, ac, flags, force\_refill);
}
3. 若从n->shared里面可以transfer nr(nr>0)个object,返回,分配成功。
4. 若n->shared也没有可用的object,则从slab的partial/free list里获取object,填充ac.
page->active是该slab里面已经使用的object的数量。
ac->available是ac里面可用的object的index.递减使用。
注意2825 n->free_objects -= ac->avail; 说明当ac被填充后,该ac里面的object就认为被分配出去了。
如果3和4均未成功transfer object到ac,只能重新申请slab。如cache_grow的实现:
2588static int cache_grow(struct kmem_cache *cachep,
gfp_t flags, int nodeid, struct page *page)
{
void *freelist;
size_t offset;
gfp_t local_flags;
struct kmem_cache_node *n;
/\*
2597 * Be lazy and only check for valid flags here, keeping it out of the
2598 * critical path in kmem_cache_alloc().
2599 */
BUG_ON(flags & GFP_SLAB_BUG_MASK);
local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);
/\* Take the node list lock to change the colour\_next on this node \*/
check\_irq\_off();
n = get\_node(cachep, nodeid);
spin\_lock(&n->list\_lock);
/\* Get colour for the slab, and cal the next value. \*/
offset = n->colour\_next;
n->colour\_next++;
if (n->colour\_next >= cachep->colour)
n->colour\_next = ;
spin\_unlock(&n->list\_lock);
offset \*= cachep->colour\_off;
if (local\_flags & \_\_GFP\_WAIT)
local\_irq\_enable();
/\*
2621 * The test for missing atomic flag is performed here, rather than
2622 * the more obvious place, simply to reduce the critical path length
2623 * in kmem_cache_alloc(). If a caller is seriously mis-behaving they
2624 * will eventually be caught here (where it matters).
2625 */
kmem_flagcheck(cachep, flags);
/\*
2629 * Get mem for the objs. Attempt to allocate a physical page from
2630 * 'nodeid'.
2631 */
if (!page)
page = kmem_getpages(cachep, local_flags, nodeid);
if (!page)
goto failed;
/\* Get slab management. \*/
freelist = alloc\_slabmgmt(cachep, page, offset,
local\_flags & ~GFP\_CONSTRAINT\_MASK, nodeid);
if (!freelist)
goto opps1;
slab\_map\_pages(cachep, page, freelist);
cache\_init\_objs(cachep, page);
if (local\_flags & \_\_GFP\_WAIT)
local\_irq\_disable();
check\_irq\_off();
spin\_lock(&n->list\_lock);
/\* Make slab active. \*/
list\_add\_tail(&page->lru, &(n->slabs\_free));
STATS\_INC\_GROWN(cachep);
n->free\_objects += cachep->num;
spin\_unlock(&n->list\_lock);
return ;
2658opps1:
kmem_freepages(cachep, page);
2660failed:
if (local_flags & __GFP_WAIT)
local_irq_disable();
return ;
}
申请完pages之后,申请slabmgmt.如下:
2445static void *alloc_slabmgmt(struct kmem_cache *cachep,
struct page *page, int colour_off,
gfp_t local_flags, int nodeid)
{
void *freelist;
void *addr = page_address(page);
if (OFF\_SLAB(cachep)) {
/\* Slab management obj is off-slab. \*/
freelist = kmem\_cache\_alloc\_node(cachep->freelist\_cache,
local\_flags, nodeid);
if (!freelist)
return NULL;
} else {
freelist = addr + colour\_off;
colour\_off += cachep->freelist\_size;
}
page->active = ;
page->s\_mem = addr + colour\_off;
return freelist;
}
slabmgmt可以放在slab内部,也可以放在slab外部。放在slab外部的条件如下:
/\*
2195 * Determine if the slab management is 'on' or 'off' slab.
2196 * (bootstrapping cannot cope with offslab caches so don't do
2197 * it too early on. Always use on-slab management when
2198 * SLAB_NOLEAKTRACE to avoid recursive calls into kmemleak)
2199 */
if ((size >= (PAGE_SIZE >> )) && !slab_early_init &&
!(flags & SLAB_NOLEAKTRACE))
/*
2203 * Size is large, assume best to place the slab management obj
2204 * off-slab (should allow better packing of objs).
2205 */
flags |= CFLGS_OFF_SLAB;
colour_off
freelist_size
obj…
如果在管理节点在slab内部,结构图如上。如果开启了CONFIG_DEBUG_SLAB_LEAK宏,freelist_size后面还会有每个object的状态。
然后初始化page和object。
手机扫一扫
移动阅读更方便
你可能感兴趣的文章