当前位置: 首页 > news >正文

蔡文胜做的个人网站天津网络关键词排名

蔡文胜做的个人网站,天津网络关键词排名,五星级酒店网站建设方案,湖北网站建设检修1. 引言 在用户进程发生缺页异常时,Linux内核需要分配所需物理页面以及建立也表映射,来维持进程的正常内存使用需求。而对于分配物理页面仅依赖于buddy系统,对于小order页面的分配效率较低。因此Linux通过在每个cpu维护一个page链表&#xff…

1. 引言

在用户进程发生缺页异常时,Linux内核需要分配所需物理页面以及建立也表映射,来维持进程的正常内存使用需求。而对于分配物理页面仅依赖于buddy系统,对于小order页面的分配效率较低。因此Linux通过在每个cpu维护一个page链表(percpu page list简称pageset),用来满足小order页面分配请求,提高页面分配效率。
下面我们重点来看一下,pageset的原理是什么,以及在Linux内核中是怎样实现和使用的。

2. pageset定义

struct zone {
......struct pglist_data	*zone_pgdat;struct per_cpu_pages	__percpu *per_cpu_pageset;struct per_cpu_zonestat	__percpu *per_cpu_zonestats;/** the high and batch values are copied to individual pagesets for* faster access*/int pageset_high;int pageset_batch;
......
};

pageset的定义是放在zone里,每个zone里有一个per_cpu_pageset成员,用于这个zone内小order页面的快速分配。

3. pageset的初始化流程

调用流程

start_kernel(void)
---> setup_per_cpu_pageset();

从内核启动流程开始,通过调用setup_per_cpu_pageset()函数完成per_cpu_pageset初始化动作;

/** Allocate per cpu pagesets and initialize them.* Before this call only boot pagesets were available.*/
void __init setup_per_cpu_pageset(void)
{struct pglist_data *pgdat;struct zone *zone;int __maybe_unused cpu;for_each_populated_zone(zone) // 遍历可用的zone,设置zone_pagesetsetup_zone_pageset(zone);#ifdef CONFIG_NUMA/** Unpopulated zones continue using the boot pagesets.* The numa stats for these pagesets need to be reset.* Otherwise, they will end up skewing the stats of* the nodes these zones are associated with.*/for_each_possible_cpu(cpu) {struct per_cpu_zonestat *pzstats = &per_cpu(boot_zonestats, cpu);memset(pzstats->vm_numa_event, 0,sizeof(pzstats->vm_numa_event));}
#endiffor_each_online_pgdat(pgdat)pgdat->per_cpu_nodestats =alloc_percpu(struct per_cpu_nodestat);
}void __meminit setup_zone_pageset(struct zone *zone)
{int cpu;/* Size may be 0 on !SMP && !NUMA */if (sizeof(struct per_cpu_zonestat) > 0)zone->per_cpu_zonestats = alloc_percpu(struct per_cpu_zonestat);zone->per_cpu_pageset = alloc_percpu(struct per_cpu_pages); // 为当前zone的per_cpu_pageset分配percpu内存for_each_possible_cpu(cpu) { // 遍历所有cpustruct per_cpu_pages *pcp;struct per_cpu_zonestat *pzstats;pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu);pzstats = per_cpu_ptr(zone->per_cpu_zonestats, cpu);per_cpu_pages_init(pcp, pzstats); // 初始化per_cpu_pages}zone_set_pageset_high_and_batch(zone, 0);
}/** PAGE_ALLOC_COSTLY_ORDER is the order at which allocations are deemed* costly to service.  That is between allocation orders which should* coalesce naturally under reasonable reclaim pressure and those which* will not.*/
#define PAGE_ALLOC_COSTLY_ORDER 3 // 这个是指pageset支持分配的最大order,[0-3]
enum migratetype {MIGRATE_UNMOVABLE,MIGRATE_MOVABLE,MIGRATE_RECLAIMABLE,MIGRATE_PCPTYPES,	/* the number of types on the pcp lists */MIGRATE_HIGHATOMIC = MIGRATE_PCPTYPES,......MIGRATE_TYPES
};
/** One per migratetype for each PAGE_ALLOC_COSTLY_ORDER. One additional list* for THP which will usually be GFP_MOVABLE. Even if it is another type,* it should not contribute to serious fragmentation causing THP allocation* failures.*/
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#define NR_PCP_THP 1
#else
#define NR_PCP_THP 0
#endif
#define NR_LOWORDER_PCP_LISTS (MIGRATE_PCPTYPES * (PAGE_ALLOC_COSTLY_ORDER + 1))
#define NR_PCP_LISTS (NR_LOWORDER_PCP_LISTS + NR_PCP_THP)static void per_cpu_pages_init(struct per_cpu_pages *pcp, struct per_cpu_zonestat *pzstats)
{int pindex;memset(pcp, 0, sizeof(*pcp));memset(pzstats, 0, sizeof(*pzstats));spin_lock_init(&pcp->lock);for (pindex = 0; pindex < NR_PCP_LISTS; pindex++) // 初始化pcp中不同迁移类型,不同order用来存放页面的链表INIT_LIST_HEAD(&pcp->lists[pindex]);/** Set batch and high values safe for a boot pageset. A true percpu* pageset's initialization will update them subsequently. Here we don't* need to be as careful as pageset_update() as nobody can access the* pageset yet.*/pcp->high = BOOT_PAGESET_HIGH;pcp->batch = BOOT_PAGESET_BATCH;pcp->free_factor = 0;
}

4. pageset的页面分配(用来分配order为[0-3]的页面)

调用流程

alloc_pages()
---> alloc_pages_node()
-------> __alloc_pages_node()
----------> __alloc_pages()
-------------> get_page_from_freelist()
-----------------> rmqueue()
/** Allocate a page from the given zone.* Use pcplists for THP or "cheap" high-order allocations.*//** Do not instrument rmqueue() with KMSAN. This function may call* __msan_poison_alloca() through a call to set_pfnblock_flags_mask().* If __msan_poison_alloca() attempts to allocate pages for the stack depot, it* may call rmqueue() again, which will result in a deadlock.*/
__no_sanitize_memory
static inline
struct page *rmqueue(struct zone *preferred_zone,struct zone *zone, unsigned int order,gfp_t gfp_flags, unsigned int alloc_flags,int migratetype)
{struct page *page;/** We most definitely don't want callers attempting to* allocate greater than order-1 page units with __GFP_NOFAIL.*/WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1));if (likely(pcp_allowed_order(order))) { // 检查要分配的页面order是否是pcp允许的order/** MIGRATE_MOVABLE pcplist could have the pages on CMA area and* we need to skip it when CMA area isn't allowed.*/if (!IS_ENABLED(CONFIG_CMA) || alloc_flags & ALLOC_CMA ||migratetype != MIGRATE_MOVABLE) { // 进行一些参数检查,如果满足条件,则从pageset中分配pagepage = rmqueue_pcplist(preferred_zone, zone, order, // 从pageset中分配页面migratetype, alloc_flags);if (likely(page))goto out;}}page = rmqueue_buddy(preferred_zone, zone, order, alloc_flags,migratetype);out:/* Separate test+clear to avoid unnecessary atomics */if (unlikely(test_bit(ZONE_BOOSTED_WATERMARK, &zone->flags))) {clear_bit(ZONE_BOOSTED_WATERMARK, &zone->flags);wakeup_kswapd(zone, 0, 0, zone_idx(zone));}VM_BUG_ON_PAGE(page && bad_range(zone, page), page);return page;
}/** PAGE_ALLOC_COSTLY_ORDER is the order at which allocations are deemed* costly to service.  That is between allocation orders which should* coalesce naturally under reasonable reclaim pressure and those which* will not.*/
#define PAGE_ALLOC_COSTLY_ORDER 3static inline bool pcp_allowed_order(unsigned int order) // 检查该order页面是否允许从pageset中分配
{if (order <= PAGE_ALLOC_COSTLY_ORDER) // 主要就是判断order是否小于PAGE_ALLOC_COSTLY_ORDER,可以从前面的定义入手,发现order只要在[0-3]范围内就允许从pageset中分配return true;
#ifdef CONFIG_TRANSPARENT_HUGEPAGEif (order == pageblock_order)return true;
#endifreturn false;
}

接下来我们看一下rmqueue_pcplist()是如何从pageset中分配页面的

/* Lock and remove page from the per-cpu list */
static struct page *rmqueue_pcplist(struct zone *preferred_zone,struct zone *zone, unsigned int order,int migratetype, unsigned int alloc_flags)
{struct per_cpu_pages *pcp;struct list_head *list;struct page *page;unsigned long flags;unsigned long __maybe_unused UP_flags;/** spin_trylock may fail due to a parallel drain. In the future, the* trylock will also protect against IRQ reentrancy.*/pcp_trylock_prepare(UP_flags);pcp = pcp_spin_trylock_irqsave(zone->per_cpu_pageset, flags); // 获取当前cpu上的per_cpu_pages对象if (!pcp) {pcp_trylock_finish(UP_flags);return NULL;}/** On allocation, reduce the number of pages that are batch freed.* See nr_pcp_free() where free_factor is increased for subsequent* frees.*/pcp->free_factor >>= 1;list = &pcp->lists[order_to_pindex(migratetype, order)]; // 根据迁移类型和order大小找寻要从哪个页面链表中摘取页面page = __rmqueue_pcplist(zone, order, migratetype, alloc_flags, pcp, list); // 摘取页面pcp_spin_unlock_irqrestore(pcp, flags);pcp_trylock_finish(UP_flags);if (page) { // 如果分配页面成功,做一些统计__count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);zone_statistics(preferred_zone, zone, 1);}return page; // 返回从pageset中分配到的页面
}static inline unsigned int order_to_pindex(int migratetype, int order) // 根据迁移类型和要分配的order计算要从哪条页面链表中摘取页面,这个计算index的逻辑和一开始pageset初始化时一致(看不明白,可以往前翻找一下)
{int base = order;#ifdef CONFIG_TRANSPARENT_HUGEPAGEif (order > PAGE_ALLOC_COSTLY_ORDER) {VM_BUG_ON(order != pageblock_order);return NR_LOWORDER_PCP_LISTS;}
#elseVM_BUG_ON(order > PAGE_ALLOC_COSTLY_ORDER);
#endifreturn (MIGRATE_PCPTYPES * base) + migratetype;
}

接下来看看__rmqueue_pcplist()函数内部是如何实现的:

/* Remove page from the per-cpu list, caller must protect the list */
static inline
struct page *__rmqueue_pcplist(struct zone *zone, unsigned int order,int migratetype,unsigned int alloc_flags,struct per_cpu_pages *pcp,struct list_head *list)
{struct page *page;do {if (list_empty(list)) { // 如果当前list中没有页面,则需要从buddy系统中请求页面int batch = READ_ONCE(pcp->batch);int alloced;/** Scale batch relative to order if batch implies* free pages can be stored on the PCP. Batch can* be 1 for small zones or for boot pagesets which* should never store free pages as the pages may* belong to arbitrary zones.*/if (batch > 1)batch = max(batch >> order, 2);alloced = rmqueue_bulk(zone, order, // 从buddy中批量申请batch个order大小、migratetype类型的页面batch, list,migratetype, alloc_flags);pcp->count += alloced << order;if (unlikely(list_empty(list))) // 如果从buddy系统中申请不到页面,则返回NULLreturn NULL;}page = list_first_entry(list, struct page, pcp_list); // 从list中获取页面list_del(&page->pcp_list); // 删除页面pcp->count -= 1 << order; // pcp页面个数更新} while (check_new_pcp(page, order));return page;
}/** Obtain a specified number of elements from the buddy allocator, all under* a single hold of the lock, for efficiency.  Add them to the supplied list.* Returns the number of new pages which were placed at *list.*/
static int rmqueue_bulk(struct zone *zone, unsigned int order,unsigned long count, struct list_head *list,int migratetype, unsigned int alloc_flags)
{int i, allocated = 0;/* Caller must hold IRQ-safe pcp->lock so IRQs are disabled. */spin_lock(&zone->lock);for (i = 0; i < count; ++i) { // 重复count次struct page *page = __rmqueue(zone, order, migratetype, // 每次从zone的buddy系统中申请一个对应order和migratetype的页面alloc_flags);if (unlikely(page == NULL)) // 从buddy系统中申请不到内存,则退出,否则继续申请break;if (unlikely(check_pcp_refill(page, order)))continue;/** Split buddy pages returned by expand() are received here in* physical page order. The page is added to the tail of* caller's list. From the callers perspective, the linked list* is ordered by page number under some conditions. This is* useful for IO devices that can forward direction from the* head, thus also in the physical page order. This is useful* for IO devices that can merge IO requests if the physical* pages are ordered properly.*/list_add_tail(&page->pcp_list, list); // 将申请到的页面,挂载到pageset中的页面链表中allocated++; // 已分配的个数加一if (is_migrate_cma(get_pcppage_migratetype(page)))__mod_zone_page_state(zone, NR_FREE_CMA_PAGES,-(1 << order));}/** i pages were removed from the buddy list even if some leak due* to check_pcp_refill failing so adjust NR_FREE_PAGES based* on i. Do not confuse with 'allocated' which is the number of* pages added to the pcp list.*/__mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order));spin_unlock(&zone->lock);return allocated; // 返回已分配页面个数
}

5. pageset的页面释放

调用流程

free_pages()
---> __free_pages()
------> free_the_page()
static inline void free_the_page(struct page *page, unsigned int order)
{if (pcp_allowed_order(order))		/* Via pcp? */ // 检查该order页面是否是从pageset中分配的free_unref_page(page, order); // 如果是的话,则释放到pageset中else__free_pages_ok(page, order, FPI_NONE);
}/** Free a pcp page*/
void free_unref_page(struct page *page, unsigned int order)
{unsigned long flags;unsigned long __maybe_unused UP_flags;struct per_cpu_pages *pcp;struct zone *zone;unsigned long pfn = page_to_pfn(page);int migratetype;if (!free_unref_page_prepare(page, pfn, order))return;/** We only track unmovable, reclaimable and movable on pcp lists.* Place ISOLATE pages on the isolated list because they are being* offlined but treat HIGHATOMIC as movable pages so we can get those* areas back if necessary. Otherwise, we may have to free* excessively into the page allocator*/migratetype = get_pcppage_migratetype(page);if (unlikely(migratetype >= MIGRATE_PCPTYPES)) {if (unlikely(is_migrate_isolate(migratetype))) {free_one_page(page_zone(page), page, pfn, order, migratetype, FPI_NONE);return;}migratetype = MIGRATE_MOVABLE;}zone = page_zone(page);pcp_trylock_prepare(UP_flags);pcp = pcp_spin_trylock_irqsave(zone->per_cpu_pageset, flags); // 获取当前cpu的pageset对象if (pcp) {free_unref_page_commit(zone, pcp, page, migratetype, order); // 调用该函数将页面释放到pageset中pcp_spin_unlock_irqrestore(pcp, flags);} else {free_one_page(zone, page, pfn, order, migratetype, FPI_NONE);}pcp_trylock_finish(UP_flags);
}static void free_unref_page_commit(struct zone *zone, struct per_cpu_pages *pcp,struct page *page, int migratetype,unsigned int order)
{int high;int pindex;bool free_high;__count_vm_events(PGFREE, 1 << order);pindex = order_to_pindex(migratetype, order); // 计算该migratetype和order应该对应pageset哪条页面链表list_add(&page->pcp_list, &pcp->lists[pindex]); // 将该页面重新挂载到该链表中,用于后续分配pcp->count += 1 << order; // 更新pageset页面个数/** As high-order pages other than THP's stored on PCP can contribute* to fragmentation, limit the number stored when PCP is heavily* freeing without allocation. The remainder after bulk freeing* stops will be drained from vmstat refresh context.*/free_high = (pcp->free_factor && order && order <= PAGE_ALLOC_COSTLY_ORDER);high = nr_pcp_high(pcp, zone, free_high);if (pcp->count >= high) { // 计算当前pageset保存的页面数量是否超过high值int batch = READ_ONCE(pcp->batch); // 如果超过,则需要将batch个页面返还给buddy系统free_pcppages_bulk(zone, nr_pcp_free(pcp, high, batch, free_high), pcp, pindex); // 将多余页面返还给buddy系统}
}/** Frees a number of pages from the PCP lists* Assumes all pages on list are in same zone.* count is the number of pages to free.*/
static void free_pcppages_bulk(struct zone *zone, int count,struct per_cpu_pages *pcp,int pindex)
{int min_pindex = 0;int max_pindex = NR_PCP_LISTS - 1;unsigned int order;bool isolated_pageblocks;struct page *page;/** Ensure proper count is passed which otherwise would stuck in the* below while (list_empty(list)) loop.*/count = min(pcp->count, count);/* Ensure requested pindex is drained first. */pindex = pindex - 1;/* Caller must hold IRQ-safe pcp->lock so IRQs are disabled. */spin_lock(&zone->lock);isolated_pageblocks = has_isolate_pageblock(zone);while (count > 0) { // 不断地将页面返还给buddy系统struct list_head *list;int nr_pages;/* Remove pages from lists in a round-robin fashion. */do {if (++pindex > max_pindex)pindex = min_pindex;list = &pcp->lists[pindex]; // 获取到页面所在链表if (!list_empty(list)) // 如果链表不为空,则跳出循环break;if (pindex == max_pindex)max_pindex--;if (pindex == min_pindex)min_pindex++;} while (1);order = pindex_to_order(pindex);nr_pages = 1 << order;do {int mt;page = list_last_entry(list, struct page, pcp_list); // 获取当前list中最后一个页面mt = get_pcppage_migratetype(page); // 获取页面的迁移类型/* must delete to avoid corrupting pcp list */list_del(&page->pcp_list); // 将页面从list中删除count -= nr_pages; // 减少要释放到页面数量pcp->count -= nr_pages; // 更新pageset页面个数if (bulkfree_pcp_prepare(page))continue;/* MIGRATE_ISOLATE page should not go to pcplists */VM_BUG_ON_PAGE(is_migrate_isolate(mt), page);/* Pageblock could have been isolated meanwhile */if (unlikely(isolated_pageblocks))mt = get_pageblock_migratetype(page);__free_one_page(page, page_to_pfn(page), zone, order, mt, FPI_NONE); // 释放页面trace_mm_page_pcpu_drain(page, order, mt);} while (count > 0 && !list_empty(list));}spin_unlock(&zone->lock);
}

至此Linux pageset初始化和使用流程介绍完毕,感谢各位读者浏览!


文章转载自:
http://goal.hkpn.cn
http://kapellmeister.hkpn.cn
http://rookling.hkpn.cn
http://bim.hkpn.cn
http://makeable.hkpn.cn
http://quadrivalence.hkpn.cn
http://dehydrate.hkpn.cn
http://magpie.hkpn.cn
http://ed.hkpn.cn
http://elegant.hkpn.cn
http://conventicle.hkpn.cn
http://synonymist.hkpn.cn
http://bronchotomy.hkpn.cn
http://matara.hkpn.cn
http://flashtube.hkpn.cn
http://laurentian.hkpn.cn
http://tessella.hkpn.cn
http://disadapt.hkpn.cn
http://coadapted.hkpn.cn
http://overroast.hkpn.cn
http://regulable.hkpn.cn
http://uhf.hkpn.cn
http://ideology.hkpn.cn
http://quagmiry.hkpn.cn
http://streetward.hkpn.cn
http://premiate.hkpn.cn
http://sobranje.hkpn.cn
http://bally.hkpn.cn
http://chausses.hkpn.cn
http://regerminate.hkpn.cn
http://dissociability.hkpn.cn
http://hyaloplasmic.hkpn.cn
http://buff.hkpn.cn
http://lcl.hkpn.cn
http://spooky.hkpn.cn
http://empiric.hkpn.cn
http://chlorate.hkpn.cn
http://decury.hkpn.cn
http://genevese.hkpn.cn
http://attractive.hkpn.cn
http://irish.hkpn.cn
http://polydactylous.hkpn.cn
http://redly.hkpn.cn
http://cert.hkpn.cn
http://goatherd.hkpn.cn
http://hermoupolis.hkpn.cn
http://overtype.hkpn.cn
http://cooperationist.hkpn.cn
http://explore.hkpn.cn
http://surreptitious.hkpn.cn
http://asyllabic.hkpn.cn
http://semilog.hkpn.cn
http://semiabstract.hkpn.cn
http://pally.hkpn.cn
http://rookling.hkpn.cn
http://ramulose.hkpn.cn
http://turnip.hkpn.cn
http://buccaneer.hkpn.cn
http://terpsichorean.hkpn.cn
http://gluten.hkpn.cn
http://normanise.hkpn.cn
http://higher.hkpn.cn
http://motocar.hkpn.cn
http://ecopornography.hkpn.cn
http://parthenon.hkpn.cn
http://chaperone.hkpn.cn
http://alitalia.hkpn.cn
http://kingpin.hkpn.cn
http://byword.hkpn.cn
http://isospore.hkpn.cn
http://bacteremic.hkpn.cn
http://catecholamine.hkpn.cn
http://impenitence.hkpn.cn
http://dreambox.hkpn.cn
http://unengaging.hkpn.cn
http://enframe.hkpn.cn
http://inflectional.hkpn.cn
http://banns.hkpn.cn
http://magellan.hkpn.cn
http://epndb.hkpn.cn
http://lip.hkpn.cn
http://forzando.hkpn.cn
http://jalor.hkpn.cn
http://designatum.hkpn.cn
http://proline.hkpn.cn
http://dash.hkpn.cn
http://preservatory.hkpn.cn
http://flatting.hkpn.cn
http://irrationalism.hkpn.cn
http://dex.hkpn.cn
http://spoliation.hkpn.cn
http://cocarboxylase.hkpn.cn
http://ruefulness.hkpn.cn
http://bilbao.hkpn.cn
http://ewer.hkpn.cn
http://zippy.hkpn.cn
http://lo.hkpn.cn
http://icterus.hkpn.cn
http://slanderer.hkpn.cn
http://emotionalize.hkpn.cn
http://www.hrbkazy.com/news/72742.html

相关文章:

  • ppt模板网站哪个免费推广官网
  • 微信小网站是怎么做的百度关键词分析工具
  • 网站 linux 服务器配置vue seo优化
  • 吉安网站建设优化服务软文广告代理平台
  • 西安莲湖区建设局网站网络营销专业学什么
  • 哪里购买域名seo国外推广软件
  • 西安建设网浙江关键词优化
  • 长沙做信息seo网站百度通用网址
  • ppt模板大全免费下载网站软文类型
  • 用dw可以做动态网站吗培训班管理系统 免费
  • 济南软件外包公司女生做sem还是seo
  • 吴中区企业网站制作哪家靠谱如何进行网络营销推广
  • 开县网站建设seo优化推广
  • 驻马店网站制作成都高薪seo
  • 广州做网站网络公司昆明百度关键词优化
  • 郑州网站服务外包公司seo搜索优化公司报价
  • 做企业推广去哪个网站比较好营销型网站建设的价格
  • 湖南新冠疫情最新情况爱采购seo
  • wordpress恢复数据库菜单不见广州seo运营
  • 湘潭大学迎新自助网站今日热点事件
  • 网站设计制作哪种快教育培训机构排名
  • 一级a做爰片免费网站 小说企业站seo案例分析
  • 网上去哪里找做网站的中国十大知名网站
  • 做网站电商云数据库有用吗互联网行业都有哪些工作
  • 贵州省建设银行网站电商线上推广
  • 德国网站域名后缀nba最新交易信息
  • 千图网官网免费图广州seo好找工作吗
  • 互联网趋势发展前景北京seo推广服务
  • 做网站都去哪里找模板财经新闻最新消息
  • 网站开发需要的技术人才广州百度竞价开户