code: 基于Android common kernel 4.19.176

1. 简介

carveout heap是指模块专用的heap，不会被系统使用。在dts的reserved_memory节点中添加内存时需要加上no-map属性。本文主要根据代码看下这种heap是如何分配的。

2. 创建

carveout heap的创建需要调用struct ion_heap *ion_carveout_heap_create(struct ion_platform_heap *heap_data)函数,

传入参数：一个ion_platform_heap类型的参数，表示该heap的物理起始地址，长度等内容，定义如下

/**
 * struct ion_platform_heap - defines a heap in the given platform
 * @type:    type of the heap from ion_heap_type enum
 * @id:        unique identifier for heap.  When allocating (lower numbers
 *         will be allocated from first)
 * @name:    used for debug purposes
 * @base:    base address of heap in physical memory if applicable
 * @size:    size of the heap in bytes if applicable
 *
 * Provided by the board file.
 */
struct ion_platform_heap {
    enum ion_heap_type type;
    unsigned int id;
    const char *name;
    ion_phys_addr_t base;
    size_t size;
};

返回值：生成的ion_heap类型的指针,该结构体定义如下：

/**
 * struct ion_heap - represents a heap in the system
 * @node:        rb node to put the heap on the device's tree of heaps
 * @dev:        back pointer to the ion_device
 * @type:        type of heap
 * @ops:        ops struct as above
 * @flags:        flags
 * @id:            id of heap, also indicates priority of this heap when
 *            allocating.  These are specified by platform data and
 *            MUST be unique
 * @name:        used for debugging
 * @shrinker:        a shrinker for the heap
 * @free_list:        free list head if deferred free is used
 * @free_list_size    size of the deferred free list in bytes
 * @lock:        protects the free list
 * @waitqueue:        queue to wait on from deferred free thread
 * @task:        task struct of deferred free thread
 * @debug_show:        called when heap debug file is read to add any
 *            heap specific debug info to output
 *
 * Represents a pool of memory from which buffers can be made.  In some
 * systems the only heap is regular system memory allocated via vmalloc.
 * On others, some blocks might require large physically contiguous buffers
 * that are allocated from a specially reserved heap.
 */
struct ion_heap {
    struct plist_node node;
    struct ion_device *dev;
    enum ion_heap_type type;
    struct ion_heap_ops *ops;
    unsigned long flags;
    unsigned int id;
    const char *name;
    struct shrinker shrinker;
    struct list_head free_list;
    size_t free_list_size;
    spinlock_t free_lock;
    wait_queue_head_t waitqueue;
    struct task_struct *task;

    int (*debug_show)(struct ion_heap *heap, struct seq_file *s,
              void *unused);
};

heap创建完成后还需要调用void ion_device_add_heap(struct ion_heap *heap)函数将ion_heap添加到ion device。

该函数的内容如下:

struct ion_heap *ion_carveout_heap_create(struct ion_platform_heap *heap_data)
{
    struct ion_carveout_heap *carveout_heap;
    int ret;

    struct page *page;
    size_t size;
    // 将物理地址转化为pfn，再转化为page
    page = pfn_to_page(PFN_DOWN(heap_data->base));
    size = heap_data->size;
    // 负责将这些需要分配的物理页面清零
    ret = ion_heap_pages_zero(page, size, pgprot_writecombine(PAGE_KERNEL));
    if (ret)
        return ERR_PTR(ret);

    carveout_heap = kzalloc(sizeof(*carveout_heap), GFP_KERNEL);
    if (!carveout_heap)
        return ERR_PTR(-ENOMEM);
    // 创建内存池对象，PAGE_SHIFT表示pool中最小分配为4K大小,-1biao表示不指定node
    carveout_heap->pool = gen_pool_create(PAGE_SHIFT, -1);
    if (!carveout_heap->pool) {
        kfree(carveout_heap);
        return ERR_PTR(-ENOMEM);
    }
    // heap_data里的base就是reserved_memory中的起始地址
    carveout_heap->base = heap_data->base;
    // 将该内存块（base到base+size之间）加入到pool中，通过bitmap来表示使用和释放
    gen_pool_add(carveout_heap->pool, carveout_heap->base, heap_data->size,
             -1);
    carveout_heap->heap.ops = &carveout_heap_ops;
    carveout_heap->heap.type = ION_HEAP_TYPE_CARVEOUT;
    carveout_heap->heap.flags = ION_HEAP_FLAG_DEFER_FREE;

    return &carveout_heap->heap;
}

函数体中的使用的函数和结构体定义：

1.ion_carveout_heap可以看成是ion_heap的具体实例，因为ion_carveout_heap的创建使用了genalloc模块，所以需要gen_pool来表示生成的pool

struct ion_carveout_heap {
    struct ion_heap heap;
    struct gen_pool *pool;
    phys_addr_t base;
};

gen_pool结构体定义如下：

/*
 *  General purpose special memory pool descriptor.
 */
struct gen_pool {
    spinlock_t lock;
    struct list_head chunks;    /* list of chunks in this pool */
    int min_alloc_order;        /* minimum allocation order */

    genpool_algo_t algo;        /* allocation function */
    void *data;

    const char *name;
};

2.ion_heap_pages_zero()函数负责将需要分配给ion的物理内存清零

函数定义如下：
scatterlist是一个数据结构，用于描述一些分散的物理内存，以列表的形式组织起来12。它可以作为不同地址映射空间（如虚拟地址、物理地址、设备地址等）的媒介，借助它，这些映射空间才能相互转换。

int ion_heap_pages_zero(struct page *page, size_t size, pgprot_t pgprot)
{
    struct scatterlist sg;
    // sg_init_table函数是一个用于初始化scatterlist的函数，它接受一个struct scatterlist类型的指针和一个表示元素个数的整数作为参数。
    // 它会将指针指向的内存块按照元素个数分割成多个连续的物理内存块，并将每个内存块的信息（如起始地址、长度等）保存在struct scatterlist结构体中。
    // 这样就可以用一个scatterlist来描述一段不连续的物理内存。这里参数为1表示这是一块连续的物理内存
    sg_init_table(&sg, 1);
    // sg_set_page函数是一个用于设置scatterlist中的一个元素的函数，它接受一个struct scatterlist类型的指针，一个struct page类型的指针，一个表示长度的整数和一个表示偏移量的整数作为参数。
    // 它会将struct page指针中的信息（如页号、页大小等）编码到struct scatterlist指针中，并设置长度和偏移量。
    // 这样就可以用一个scatterlist来描述一段不连续的物理内存
    sg_set_page(&sg, page, size, 0);
    // 通过将page映射到vmalloc虚拟地址端，调用memset执行清零。
    return ion_heap_sglist_zero(&sg, 1, pgprot);
}


struct scatterlist {
    unsigned long    page_link;
    unsigned int    offset;
    unsigned int    length;
    dma_addr_t    dma_address;
#ifdef CONFIG_NEED_SG_DMA_LENGTH
    unsigned int    dma_length;
#endif
};

3.struct gen_pool *gen_pool_create(int min_alloc_order, int nid)函数

内核中有许多内存分配子系统，每一个都是针对特定的需求。然而，有时候，内核开发者需要为特定范围的特殊用途的内存实现一个新的分配器；通常这个内存位于某个设备上。该设备的驱动程序的作者当然可以写一个小的分配器来完成工作，但这是让内核充满几十个测试差劲的分配器的方法。早在2005年，Jes Sorensen从sym53c8xx_2驱动中提取了其中的一个分配器，并将其作为一个通用模块发布，用于创建特设的内存分配器。这段代码在2.6.13 版本中被合并；此后它被大大地修改了。

对gen_pool_create()的调用将创建一个内存池。分配的粒度由min_alloc_order设置；它是一个log-base-2（以2为底的对数）的数字，就像页面分配器使用的数字一样，但它指的是字节而不是页面。因此，如果min_alloc_order被传递为3，那么所有的分配将是8字节的倍数。增加min_alloc_order可以减少跟踪池中内存所需的内存。nid参数指定哪一个NUMA节点应该被用于分配管家结构体；如果调用者不关心，它可以是-1。

内存池是一种内存管理技术，它可以提高内存分配的效率和性能。内存池是一块预先分配好的内存区域，它可以按照一定的规则将内存划分为多个小块，每个小块都有一个标志位表示是否被使用。当需要分配内存时，内存池可以直接从空闲的小块中返回一个地址，而不需要调用系统函数。


/**
 * gen_pool_create - create a new special memory pool
 * @min_alloc_order: log base 2 of number of bytes each bitmap bit represents
 * @nid: node id of the node the pool structure should be allocated on, or -1
 *
 * Create a new special memory pool that can be used to manage special purpose
 * memory not managed by the regular kmalloc/kfree interface.
 */
struct gen_pool *gen_pool_create(int min_alloc_order, int nid)
{
    struct gen_pool *pool;

    pool = kmalloc_node(sizeof(struct gen_pool), GFP_KERNEL, nid);
    if (pool != NULL) {
        spin_lock_init(&pool->lock);
        INIT_LIST_HEAD(&pool->chunks);
        pool->min_alloc_order = min_alloc_order;
        pool->algo = gen_pool_first_fit;    // 初始化查找算法，起始就是bitmap的查找
        pool->data = NULL;
        pool->name = NULL;
    }
    return pool;
}
EXPORT_SYMBOL(gen_pool_create);

一个新创建的内存池没有内存可以分配。在这种状态下，它是相当无用的，所以首要任务之一通常是向内存池里添加内存。通过调用gen_pool_add()函数将在设备树中预留的内存加入到内存池中:

/**
 * gen_pool_add - add a new chunk of special memory to the pool
 * @pool: pool to add new memory chunk to
 * @addr: starting address of memory chunk to add to pool
 * @size: size in bytes of the memory chunk to add to pool
 * @nid: node id of the node the chunk structure and bitmap should be
 *       allocated on, or -1
 *
 * Add a new chunk of special memory to the specified pool.
 *
 * Returns 0 on success or a -ve errno on failure.
 */
static inline int gen_pool_add(struct gen_pool *pool, unsigned long addr,
                   size_t size, int nid)
{
    return gen_pool_add_virt(pool, addr, -1, size, nid);
}

从下面的函数可以看出，参数addr实际传递给virt这个参数了，addr保存的实际是carveout heap的base addr，也就是在内存中的实际物理地址。然后将这个addr保存到了chunk中的start_addr。下面这个phys传的是-1,没有使用。个人的理解是chunk这个东西可以是实际的物理内存，也可以是一块虚拟内存，根据实际来选择传递这些参数。

/**
 * gen_pool_add_virt - add a new chunk of special memory to the pool
 * @pool: pool to add new memory chunk to
 * @virt: virtual starting address of memory chunk to add to pool
 * @phys: physical starting address of memory chunk to add to pool
 * @size: size in bytes of the memory chunk to add to pool
 * @nid: node id of the node the chunk structure and bitmap should be
 *       allocated on, or -1
 *
 * Add a new chunk of special memory to the specified pool.
 *
 * Returns 0 on success or a -ve errno on failure.
 */
int gen_pool_add_virt(struct gen_pool *pool, unsigned long virt, phys_addr_t phys,
         size_t size, int nid)
{
    struct gen_pool_chunk *chunk;    // 通过gen_pool_chunk来存放和管理内存
    // 根据一个bit代表多大内存，计算出当前内存需要多个bit
    unsigned long nbits = size >> pool->min_alloc_order;
    // chunk结构体大小+需要表示内存大小的bit位数转换成long型变量，总共需要多少内存
    unsigned long nbytes = sizeof(struct gen_pool_chunk) +
                BITS_TO_LONGS(nbits) * sizeof(long);
    // 为chunk对象分配内存
    chunk = vzalloc_node(nbytes, nid);
    if (unlikely(chunk == NULL))
        return -ENOMEM;
    // 初始化chunk中的内存信息，start_addr和end_addr就是预留内存的物理地址和结束地址
    chunk->phys_addr = phys;
    chunk->start_addr = virt;
    chunk->end_addr = virt + size - 1;
    atomic_long_set(&chunk->avail, size);

    spin_lock(&pool->lock);
    // 将chunk加入到gen_pool链表中，最终这些chunk形成一个链表保存在pool中
    list_add_rcu(&chunk->next_chunk, &pool->chunks);
    spin_unlock(&pool->lock);

    return 0;
}
EXPORT_SYMBOL(gen_pool_add_virt);


/**
 * list_add_rcu - add a new entry to rcu-protected list
 * @new: new entry to be added
 * @head: list head to add it after
 *
 * Insert a new entry after the specified head.
 * This is good for implementing stacks.
 *
 * The caller must take whatever precautions are necessary
 * (such as holding appropriate locks) to avoid racing
 * with another list-mutation primitive, such as list_add_rcu()
 * or list_del_rcu(), running on this same list.
 * However, it is perfectly legal to run concurrently with
 * the _rcu list-traversal primitives, such as
 * list_for_each_entry_rcu().
 */
static inline void list_add_rcu(struct list_head *new, struct list_head *head)
{
    __list_add_rcu(new, head, head->next);
}

gen_pool_chunk结构体定义如下,chunk表示大块内存，为了和page作区分，可能包含多个page

/*
 *  General purpose special memory pool chunk descriptor.
 */
struct gen_pool_chunk {
    struct list_head next_chunk;    /* next chunk in pool */
    atomic_long_t avail;
    phys_addr_t phys_addr;        /* physical starting address of memory chunk */
    unsigned long start_addr;    /* start address of memory chunk */
    unsigned long end_addr;        /* end address of memory chunk (inclusive) */
    unsigned long bits[0];        /* bitmap for allocating memory chunk */
};

3. carveout heap分配

在carveout_heap创建时，还会指定该类型heap的操作函数，当前定义了如下几种，只有allocate和free这里对该类型的heap做了定义，其余的三个用的都是ion自带的。下面主要分析下ion_carveout_heap_allocate()函数的内容

static struct ion_heap_ops carveout_heap_ops = {
    .allocate = ion_carveout_heap_allocate,
    .free = ion_carveout_heap_free,
    .map_user = ion_heap_map_user,
    .map_kernel = ion_heap_map_kernel,
    .unmap_kernel = ion_heap_unmap_kernel,
};

在userspace层使用ion_alloc或者ioctl从carveout heap分配内存时，在驱动中实际会调用ion_carveout_heap_allocate()函数。

static int ion_carveout_heap_allocate(struct ion_heap *heap,
                      struct ion_buffer *buffer,
                      unsigned long size,
                      unsigned long flags)
{
    // 定义一个散列表，用来管理从ion中分配的内存。
    struct sg_table *table;
    phys_addr_t paddr;
    int ret;

    table = kmalloc(sizeof(*table), GFP_KERNEL);
    if (!table)
        return -ENOMEM;
    ret = sg_alloc_table(table, 1, GFP_KERNEL);
    if (ret)
        goto err_free;
    // 从该函数的返回值可以看出来分配的是个物理地址，因为gen_pool中存放的就是物理地址，通过bitmap来管理
    paddr = ion_carveout_allocate(heap, size);
    if (paddr == ION_CARVEOUT_ALLOCATE_FAIL) {
        ret = -ENOMEM;
        goto err_free_table;
    }
    // 将申请的内存加入到sg_table中
    // 生成的buffer最终都在ion_alloc总入口中转换成dma_buf，最终转换成fd句柄返回给用户层
    sg_set_page(table->sgl, pfn_to_page(PFN_DOWN(paddr)), size, 0);
    buffer->sg_table = table;

    return 0;

err_free_table:
    sg_free_table(table);
err_free:
    kfree(table);
    return ret;
}

部分函数中的结构体定义：


struct sg_table {
    struct scatterlist *sgl;    /* the list */
    unsigned int nents;        /* number of mapped entries */
    unsigned int orig_nents;    /* original size of list */
};

sg_alloc_table函数的定义：

/**
 * sg_alloc_table - Allocate and initialize an sg table
 * @table:    The sg table header to use
 * @nents:    Number of entries in sg list
 * @gfp_mask:    GFP allocation mask
 *
 *  Description:
 *    Allocate and initialize an sg table. If @nents@ is larger than
 *    SG_MAX_SINGLE_ALLOC a chained sg table will be setup.
 *
 **/
int sg_alloc_table(struct sg_table *table, unsigned int nents, gfp_t gfp_mask)
{
    int ret;

    ret = __sg_alloc_table(table, nents, SG_MAX_SINGLE_ALLOC,
                   NULL, gfp_mask, sg_kmalloc);
    if (unlikely(ret))
        __sg_free_table(table, SG_MAX_SINGLE_ALLOC, false, sg_kfree);

    return ret;
}
EXPORT_SYMBOL(sg_alloc_table);

接下来看下ion_carveout_heap_allocate()-->ion_carveout_allocate()函数

static phys_addr_t ion_carveout_allocate(struct ion_heap *heap,
                     unsigned long size)
{
    // container_of函数，通过结构体中的一个成员变量的地址获得该结构体的首地址
    // 通过heap变量的地址，获取ion_carveout_heap结构体的地址
    struct ion_carveout_heap *carveout_heap =
        container_of(heap, struct ion_carveout_heap, heap);
    // 从carveout_heap中的pool中分配size大小的内存
    unsigned long offset = gen_pool_alloc(carveout_heap->pool, size);

    if (!offset)
        return ION_CARVEOUT_ALLOCATE_FAIL;

    return offset;
}

gen_pool_alloc函数的作用是从给定的内存池中分配一定大小的内存。内存池是一种用于管理特殊内存区域的数据结构。gen_pool_alloc函数返回分配的内存的地址，如果分配失败则返回0，关于gen_pool_alloc函数：

/**
 * gen_pool_alloc - allocate special memory from the pool
 * @pool: pool to allocate from
 * @size: number of bytes to allocate from the pool
 *
 * Allocate the requested number of bytes from the specified pool.
 * Uses the pool allocation function (with first-fit algorithm by default).
 * Can not be used in NMI handler on architectures without
 * NMI-safe cmpxchg implementation.
 */
unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size)
{
    return gen_pool_alloc_algo(pool, size, pool->algo, pool->data);
}
EXPORT_SYMBOL(gen_pool_alloc);

下面函数返回的就是一个物理地址，ion_carveout_allocate()->gen_pool_alloc()->gen_pool_alloc_algo()

/**
 * gen_pool_alloc_algo - allocate special memory from the pool
 * @pool: pool to allocate from
 * @size: number of bytes to allocate from the pool
 * @algo: algorithm passed from caller
 * @data: data passed to algorithm
 *
 * Allocate the requested number of bytes from the specified pool.
 * Uses the pool allocation function (with first-fit algorithm by default).
 * Can not be used in NMI handler on architectures without
 * NMI-safe cmpxchg implementation.
 */
unsigned long gen_pool_alloc_algo(struct gen_pool *pool, size_t size,
        genpool_algo_t algo, void *data)
{
    struct gen_pool_chunk *chunk;
    unsigned long addr = 0;
    int order = pool->min_alloc_order;
    unsigned long nbits, start_bit, end_bit, remain;

#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
    BUG_ON(in_nmi());
#endif

    if (size == 0)
        return 0;
    // 假如一个bit代表4K的内存（当前是4K），总共需要多少个bit才能表示用户申请的内存size
    nbits = (size + (1UL << order) - 1) >> order;
    rcu_read_lock();
    // 遍历当前gen_pool中所有的chunk内存块（一般ION都只创建一块），找到bitmap中连续nbits个bit为0的位
    list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk) {
        if (size > atomic_long_read(&chunk->avail))
            continue;
    // 计算当前chunk中末位bit的下标
        start_bit = 0;
        end_bit = chunk_size(chunk) >> order;
retry:
        // 开始查找，algo算法实际在创建gen_pool时已经指定，其实就是在bits中查找满足要求的位
        start_bit = algo(chunk->bits, end_bit, start_bit,
                 nbits, data, pool, chunk->start_addr);
        if (start_bit >= end_bit)
            continue;
        // 对找到的满足要求的bits位置1，表示分配出去
        remain = bitmap_set_ll(chunk->bits, start_bit, nbits);
        if (remain) {
            remain = bitmap_clear_ll(chunk->bits, start_bit,
                         nbits - remain);
            BUG_ON(remain);
            goto retry;
        }
         // 然后计算出分配到的内存实际的起始物理地址，start_addr+偏移地址
        addr = chunk->start_addr + ((unsigned long)start_bit << order);
        size = nbits << order;
        atomic_long_sub(size, &chunk->avail);
        break;
    }
    rcu_read_unlock();
    // 返回申请到的内存的起始物理地址
    return addr;
}
EXPORT_SYMBOL(gen_pool_alloc_algo);

从pool中分配好内存后，获取到的是一段内存的物理起始地址，还需要调用sg_set_page函数加入到sg_table中.

sg_set_page(table->sgl, pfn_to_page(PFN_DOWN(paddr)), size, 0);
buffer->sg_table = table;

sg_set_page函数的作用是将一个物理页和一个长度赋给一个散列表（scatterlist）的条目。散列表是一种用于存储DMA（直接内存访问）的数据块的数据结构。sg_set_page函数需要传入四个参数：散列表的指针，物理页的指针，长度和偏移量

/**
 * sg_set_page - Set sg entry to point at given page
 * @sg:         SG entry
 * @page:     The page
 * @len:     Length of data
 * @offset:     Offset into page
 *
 * Description:
 *   Use this function to set an sg entry pointing at a page, never assign
 *   the page directly. We encode sg table information in the lower bits
 *   of the page pointer. See sg_page() for looking up the page belonging
 *   to an sg entry.
 *
 **/
static inline void sg_set_page(struct scatterlist *sg, struct page *page,
                   unsigned int len, unsigned int offset)
{
    sg_assign_page(sg, page);
    sg->offset = offset;
    sg->length = len;
}

最终buffer->sg_table = table;将申请到的散列表赋值给ion_buffer结构体中的sg_table字段。

ION carveout heap简介

1. 简介

2. 创建

3. carveout heap分配

评论 (0)