内存管理中常用api

作者 by adtxl / 2022-04-18 / 暂无评论 / 299 个足迹

整理自《奔跑吧Linux内核,张天飞著》

内存管理错综复杂,不仅要从用户态的相关API来窥探和理解Linux内核内存是如何运作,还要总结Linux内核中常用的内存管理相关的API。前文中已经总结了内存管理相关的数据结构的关系,下面总结内存管理中内核常用的API。

1. 页表相关

页表相关的API可以概括为如下4类。

  • 查询页表
  • 判断页表项的状态位
  • 修改页表
  • page和pfn的关系

查询页表:

//[arch/arm/include/asm/pgtable.h]

#define pgd_offset_k(addr)  pgd_offset(&init_mm, addr)
#define pgd_index(addr)     ((addr) >> PGDIR_SHIFT)
#define pgd_offset(mm, addr)    ((mm)->pgd + pgd_index(addr))
#define pte_index(addr)     (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
#define pte_offset_kernel(pmd,addr) (pmd_page_vaddr(*(pmd)) + pte_index(addr))
#define pte_offset_map(pmd,addr)    (__pte_map(pmd) + pte_index(addr))
#define pte_unmap(pte)          __pte_unmap(pte)
#define pte_offset_map_lock(mm, pmd, address, ptlp)

判断页表项的状态位:

#define pte_none(pte)       (!pte_val(pte))
#define pte_present(pte)    (pte_isset((pte), L_PTE_PRESENT))
#define pte_valid(pte)      (pte_isset((pte), L_PTE_VALID))
#define pte_accessible(mm, pte) (mm_tlb_flush_pending(mm) ? pte_present(pte) : pte_valid(pte))
#define pte_write(pte)      (pte_isclear((pte), L_PTE_RDONLY))
#define pte_dirty(pte)      (pte_isset((pte), L_PTE_DIRTY))
#define pte_young(pte)      (pte_isset((pte), L_PTE_YOUNG))
#define pte_exec(pte)       (pte_isclear((pte), L_PTE_XN))

修改页表

// [arch/arm/include/asm/pgtable.h]

#define mk_pte(page,prot)   pfn_pte(page_to_pfn(page), prot)
static inline pte_t pte_mkdirty(pte_t pte)
static inline pte_t pte_mkold(pte_t pte)
static inline pte_t pte_mkyoung(pte_t pte)
static inline pte_t pte_wrprotect(pte_t pte)
static inline pte_t pte_mkwrite(pte_t pte)
static inline pte_t pte_mkclean(pte_t pte)
static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
                  pte_t *ptep, pte_t pteval)
// [mm/pgtable-generic.c]
int ptep_set_access_flags(struct vm_area_struct *vma,
              unsigned long address, pte_t *ptep,
              pte_t entry, int dirty)

page和pfn的关系

// [arch/arm/include/asm/pgtable.h]

#define pte_pfn(pte)        ((pte_val(pte) & PHYS_MASK) >> PAGE_SHIFT)
#define pfn_pte(pfn,prot)   __pte(__pfn_to_phys(pfn) | pgprot_val(prot))

2. 内存分配

内核中常用的内存分配API如下:

分配和释放页面:

//[include/linux/gfp.h]

static inline struct page * alloc_pages(gfp_t gfp_mask, unsigned int order)

unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order)

struct page *
__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
            struct zonelist *zonelist, nodemask_t *nodemask)

void free_pages(unsigned long addr, unsigned int order)
void __free_pages(struct page *page, unsigned int order)

slab分配器:

struct kmem_cache *
kmem_cache_create(const char *name, size_t size, size_t align,
          unsigned long flags, void (*ctor)(void *))
void kmem_cache_destroy(struct kmem_cache *);

void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
void kmem_cache_free(struct kmem_cache *cachep, void *objp)

void *kmalloc(size_t size, gfp_t flags)
void kfree(const void *);

vmalloc相关:

void *vmalloc(unsigned long size)
void vfree(const void *addr)

3. VMA操作相关

struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
struct vm_area_struct *
find_vma_prev(struct mm_struct *mm, unsigned long addr,
            struct vm_area_struct **pprev)

static inline struct vm_area_struct * find_vma_intersection(struct mm_struct * mm, 
            unsigned long start_addr, unsigned long end_addr)

int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)

static int find_vma_links(struct mm_struct *mm, unsigned long addr,
        unsigned long end, struct vm_area_struct **pprev,
        struct rb_node ***rb_link, struct rb_node **rb_parent)

4. 页面相关

内存管理的复杂之处是和页面相关的操作,内核中常用的API函数归纳如下。

  • PG_XXX标志位操作。
  • page引用计数操作。
  • 匿名页面和KSM页面
  • 页面操作
  • 页面映射
  • 缺页中断
  • LRU和页面回收

PG_XXX标志位操作:

// [include/linux/page-flags.h]
static inline int Page##uname(const struct page *page)
static inline void SetPage##uname(struct page *page)
static inline void ClearPage##uname(struct page *page)
static inline int TestSetPage##uname(struct page *page)
static inline int TestClearPage##uname(struct page *page)

void lock_page(struct page *page)
int trylock_page(struct page *page)
void wait_on_page_bit(struct page *page, int bit_nr)
void wake_up_page(struct page *page, int bit)

// include/linux/pagemap.h
static inline void wait_on_page_locked(struct page *page)
static inline void wait_on_page_writeback(struct page *page)

page引用计数操作:

static inline void get_page(struct page *page)
void put_page(struct page *page)

#define page_cache_get(page)        get_page(page)
#define page_cache_release(page)    put_page(page)

static inline int page_count(struct page *page)
static inline int page_mapcount(struct page *page)
static inline int page_mapped(struct page *page)
static inline int put_page_testzero(struct page *page)

匿名页面和KSM页面:

static inline int PageAnon(struct page *page)
static inline int PageKsm(struct page *page)
struct address_space *page_mapping(struct page *page)
void page_add_new_anon_rmap(struct page *page,
    struct vm_area_struct *vma, unsigned long address)

页面操作:

static inline struct page *follow_page(struct vm_area_struct *vma,
        unsigned long address, unsigned int foll_flags)
struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
                pte_t pte)
long get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
        unsigned long start, unsigned long nr_pages, int write,
        int force, struct page **pages, struct vm_area_struct **vmas)

页面映射:

static void create_mapping_late(phys_addr_t phys, unsigned long virt,
                  phys_addr_t size, pgprot_t prot)

unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
    unsigned long len, unsigned long prot, unsigned long flags,
    unsigned long pgoff, unsigned long *populate);

int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
            unsigned long pfn, unsigned long size, pgprot_t prot)

缺页中断:

static int __kprobes
do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)

static int handle_pte_fault(struct mm_struct *mm,
             struct vm_area_struct *vma, unsigned long address,
             pte_t *pte, pmd_t *pmd, unsigned int flags)

static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
        unsigned long address, pte_t *page_table, pmd_t *pmd,
        unsigned int flags)

static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
        unsigned long address, pte_t *page_table, pmd_t *pmd,
        spinlock_t *ptl, pte_t orig_pte)

LRU和页面回收:

void lru_cache_add(struct page *page)
#define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru))
bool zone_watermark_ok(struct zone *z, unsigned int order,
        unsigned long mark, int classzone_idx, int alloc_flags);

独特见解