Linux 中带权内存交错分配实现方式

mm/mempolicy.c

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
/*
* Return a nodemask representing a mempolicy for filtering nodes for
* page allocation, together with preferred node id (or the input node id).
*/
static nodemask_t *policy_nodemask(gfp_t gfp, struct mempolicy *pol,
pgoff_t ilx, int *nid)
{
nodemask_t *nodemask = NULL;

switch (pol->mode) {
case MPOL_PREFERRED:
/* Override input node id */
*nid = first_node(pol->nodes);
break;
case MPOL_PREFERRED_MANY:
nodemask = &pol->nodes;
if (pol->home_node != NUMA_NO_NODE)
*nid = pol->home_node;
break;
case MPOL_BIND:
/* Restrict to nodemask (but not on lower zones) */
if (apply_policy_zone(pol, gfp_zone(gfp)) &&
cpuset_nodemask_valid_mems_allowed(&pol->nodes))
nodemask = &pol->nodes;
if (pol->home_node != NUMA_NO_NODE)
*nid = pol->home_node;
/*
* __GFP_THISNODE shouldn't even be used with the bind policy
* because we might easily break the expectation to stay on the
* requested node and not break the policy.
*/
WARN_ON_ONCE(gfp & __GFP_THISNODE);
break;
case MPOL_INTERLEAVE:
/* Override input node id */
*nid = (ilx == NO_INTERLEAVE_INDEX) ?
interleave_nodes(pol) : interleave_nid(pol, ilx);
break;
case MPOL_WEIGHTED_INTERLEAVE:
*nid = (ilx == NO_INTERLEAVE_INDEX) ?
weighted_interleave_nodes(pol) :
weighted_interleave_nid(pol, ilx);
break;
}

return nodemask;
}

当使用MPOL_WEIGHTED_INTERLEAVE时,节点的选择看ilx是否为NO_INTERLEAVE_INDXE

使用epbf进行内核插桩

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
sudo bpftrace -e '                                             
kprobe:weighted_interleave_nodes {
@nodes_stack[kstack, ustack] = count();
@nodes_proc[comm, pid] = count();
}

kprobe:weighted_interleave_nid {
@nid_stack[kstack, ustack] = count();
@nid_proc[comm, pid] = count();
}

interval:s:5 {
printf("\n=== %s ===\n", strftime("%H:%M:%S", nsecs));

printf("=== Nodes - By Call Stack ===\n");
print(@nodes_stack);
printf("=== Nodes - By Process ===\n");
print(@nodes_proc);

printf("=== Nid - By Call Stack ===\n");
print(@nid_stack);
printf("=== Nid - By Process ===\n");
print(@nid_proc);

clear(@nodes_stack);
clear(@nodes_proc);
clear(@nid_stack);
clear(@nid_proc);
}'

发现node和nid都会触发。
nid触发调用栈为:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
  @nid_stack[
weighted_interleave_nid+1
policy_nodemask+74
alloc_pages_mpol+49
vma_alloc_folio_noprof+110
do_anonymous_page+810
__handle_mm_fault+2667
handle_mm_fault+170
do_user_addr_fault+529
exc_page_fault+116
asm_exc_page_fault+38
,
0x55555556ecd1
0
]: 1413

=== Nid - By Process ===
@nid_proc[numactl, 166213]: 1
@nid_proc[fuse, 166213]: 4
@nid_proc[lulesh2.0, 166213]: 45563

node触发调用栈为:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
@nodes_stack[
weighted_interleave_nodes+1
policy_nodemask+333
alloc_pages_mpol+49
alloc_pages_noprof+80
pte_alloc_one+26
do_huge_pmd_anonymous_page+207
__handle_mm_fault+2789
handle_mm_fault+170
do_user_addr_fault+529
exc_page_fault+116
asm_exc_page_fault+38
,
__memset_avx512_unaligned_erms+361
0
]: 8530
=== Nodes - By Process ===
@nodes_proc[lulesh2.0, 168848]: 8563

本站由 Zane Jiang 使用 Stellar 1.33.1 主题创建,一款很棒的 Hexo 主题!

总访问 次 || 本页访问
总访客 人 || 本页访客