最近在测试容器垂直扩容功能,通过探测容器的CPU使用率来判断该容器是否超过预设阈值而需要扩容来增加CPU limite来提升容器处理能力。于是对docker cgroup有了一定的了解和研究,记录下这些以便以后重温。
如果是通过k8s部署的容器,那么还不是那么容易找到对应容器cgroup文件夹的。
[root@qa-k8s-2-node1 ~]# docker ps -a|grep noah-demo
a5741124c089 0cb09c6774bc "/sbin/vip_init" 22 hours ago Up 22 hours k8s_noah-demo-lj0qov_noah-demo-lj0qov-6bf497484b-dvllf_eric6_e5cbaea7-d62f-4909-8091-10bc63a7b0a0_0
de67a310fddb vipdocker-f9nub.vclound.com/google_containers/pause:0.8.0 "/pause" 22 hours ago Up 22 hours k8s_POD_noah-demo-lj0qov-6bf497484b-dvllf_eric6_e5cbaea7-d62f-4909-8091-10bc63a7b0a0_0
[root@qa-k8s-2-node1 ~]# docker inspect a5741124c089
[
{
"Id": "a5741124c089dc63e33b921c018eca1666bbe0e1114ac87123876a56a5e98c0e",
"Created": "2020-05-13T06:46:58.185455574Z",
"Path": "/sbin/vip_init",
"Args": [],
"State": {
"Status": "running",
"Running": true,
"Paused": false,
"Restarting": false,
"OOMKilled": false,
"Dead": false,
"Pid": 43651,
"ExitCode": 0,
"Error": "",
"StartedAt": "2020-05-13T06:46:58.266310462Z",
"FinishedAt": "0001-01-01T00:00:00Z"
},
....... 略
"Isolation": "",
"CpuShares": 4096,
"Memory": 25769803776,
"NanoCpus": 0,
"CgroupParent": "/kubepods/pode5cbaea7-d62f-4909-8091-10bc63a7b0a0",
"BlkioWeight": 0,
"BlkioWeightDevice": null,
"BlkioDeviceReadBps": null,
"BlkioDeviceWriteBps": null,
"BlkioDeviceReadIOps": null,
"BlkioDeviceWriteIOps": null,
"CpuPeriod": 100000,
"CpuQuota": 400000,
"CpuRealtimePeriod": 0,
从上面得知CgroupParent=/kubepods/pode5cbaea7-d62f-4909-8091-10bc63a7b0a0。
那么在宿主机上的cgroup目录为/sys/fs/cgroup/cpu/kubepods/${CgroupParent}
[root@qa-k8s-2-node1 pode5cbaea7-d62f-4909-8091-10bc63a7b0a0]# pwd
/sys/fs/cgroup/cpu/kubepods/pode5cbaea7-d62f-4909-8091-10bc63a7b0a0
[root@qa-k8s-2-node1 pode5cbaea7-d62f-4909-8091-10bc63a7b0a0]# ll
total 0
drwxr-xr-x 2 root root 0 May 13 14:46 a5741124c089dc63e33b921c018eca1666bbe0e1114ac87123876a56a5e98c0e
-rw-r--r-- 1 root root 0 May 13 14:46 cgroup.clone_children
--w--w--w- 1 root root 0 May 13 14:46 cgroup.event_control
-rw-r--r-- 1 root root 0 May 13 14:46 cgroup.procs
-r--r--r-- 1 root root 0 May 13 14:46 cpuacct.stat
-rw-r--r-- 1 root root 0 May 13 14:46 cpuacct.usage
-r--r--r-- 1 root root 0 May 13 14:46 cpuacct.usage_percpu
-rw-r--r-- 1 root root 0 May 13 14:46 cpu.cfs_period_us
-rw-r--r-- 1 root root 0 May 13 19:55 cpu.cfs_quota_us
-rw-r--r-- 1 root root 0 May 13 14:46 cpu.rt_period_us
-rw-r--r-- 1 root root 0 May 13 14:46 cpu.rt_runtime_us
-rw-r--r-- 1 root root 0 May 13 14:46 cpu.shares
-r--r--r-- 1 root root 0 May 13 14:46 cpu.stat
drwxr-xr-x 2 root root 0 May 13 14:46 de67a310fddbdb156f01be5fb930030adc81534bb52e83268c8fd70feb776e05
-rw-r--r-- 1 root root 0 May 13 14:46 notify_on_release
-rw-r--r-- 1 root root 0 May 13 14:46 tasks
上面为pod级别的cgroup文件,但一个pod是至少有两个个容器一个应用容器和pause容器,所以里面有两个文件夹,文件夹名称为docker inspect xxxx里看到的第一行id字段,至此我们就找到了容器的cgroup文件夹了。
以上面的容器为例子:
[root@qa-k8s-2-node1 a5741124c089dc63e33b921c018eca1666bbe0e1114ac87123876a56a5e98c0e]# pwd
/sys/fs/cgroup/cpu/kubepods/pode5cbaea7-d62f-4909-8091-10bc63a7b0a0/a5741124c089dc63e33b921c018eca1666bbe0e1114ac87123876a56a5e98c0e
[root@qa-k8s-2-node1 a5741124c089dc63e33b921c018eca1666bbe0e1114ac87123876a56a5e98c0e]# ll
total 0
-rw-r--r-- 1 root root 0 May 13 14:46 cgroup.clone_children
--w--w--w- 1 root root 0 May 13 14:46 cgroup.event_control
-rw-r--r-- 1 root root 0 May 14 13:51 cgroup.procs
-r--r--r-- 1 root root 0 May 13 14:46 cpuacct.stat
-rw-r--r-- 1 root root 0 May 13 14:46 cpuacct.usage
-r--r--r-- 1 root root 0 May 13 14:46 cpuacct.usage_percpu
-rw-r--r-- 1 root root 0 May 13 14:46 cpu.cfs_period_us
-rw-r--r-- 1 root root 0 May 13 19:55 cpu.cfs_quota_us
-rw-r--r-- 1 root root 0 May 13 14:46 cpu.rt_period_us
-rw-r--r-- 1 root root 0 May 13 14:46 cpu.rt_runtime_us
-rw-r--r-- 1 root root 0 May 13 14:46 cpu.shares
-r--r--r-- 1 root root 0 May 13 14:46 cpu.stat
-rw-r--r-- 1 root root 0 May 13 14:46 notify_on_release
-rw-r--r-- 1 root root 0 May 13 14:46 tasks
[root@qa-k8s-2-node1 4b0b72362cee918179d087abd3ad057fa0416b6f186c1572c8968b083d316cea]# cat cpu.cfs_quota_us cpu.cfs_period_us
400000
100000
上面这个例子就说明container CPU limite=4.
[root@qa-k8s-2-node1 4b0b72362cee918179d087abd3ad057fa0416b6f186c1572c8968b083d316cea]# cat cpu.stat
nr_periods 474029
nr_throttled 7
throttled_time 98324109
文件路径与CPU类似 /sys/fs/cgroup/memory/${Cgroup}
[root@qa-k8s-2-node1 4b0b72362cee918179d087abd3ad057fa0416b6f186c1572c8968b083d316cea]# pwd
/sys/fs/cgroup/memory/kubepods/podcdab3d7c-fe83-4460-9d3a-f0756d594ab1/4b0b72362cee918179d087abd3ad057fa0416b6f186c1572c8968b083d316cea
[root@qa-k8s-2-node1 4b0b72362cee918179d087abd3ad057fa0416b6f186c1572c8968b083d316cea]# ll
total 0
-rw-r--r-- 1 root root 0 May 14 18:49 cgroup.clone_children
--w--w--w- 1 root root 0 May 14 18:49 cgroup.event_control
-rw-r--r-- 1 root root 0 May 15 09:28 cgroup.procs
-rw-r--r-- 1 root root 0 May 14 18:49 memory.failcnt
--w------- 1 root root 0 May 14 18:49 memory.force_empty
-rw-r--r-- 1 root root 0 May 14 18:49 memory.kmem.failcnt
-rw-r--r-- 1 root root 0 May 14 18:49 memory.kmem.limit_in_bytes
-rw-r--r-- 1 root root 0 May 14 18:49 memory.kmem.max_usage_in_bytes
-r--r--r-- 1 root root 0 May 14 18:49 memory.kmem.slabinfo
-rw-r--r-- 1 root root 0 May 14 18:49 memory.kmem.tcp.failcnt
-rw-r--r-- 1 root root 0 May 14 18:49 memory.kmem.tcp.limit_in_bytes
-rw-r--r-- 1 root root 0 May 14 18:49 memory.kmem.tcp.max_usage_in_bytes
-r--r--r-- 1 root root 0 May 14 18:49 memory.kmem.tcp.usage_in_bytes
-r--r--r-- 1 root root 0 May 14 18:49 memory.kmem.usage_in_bytes
-rw-r--r-- 1 root root 0 May 14 18:49 memory.limit_in_bytes
-rw-r--r-- 1 root root 0 May 14 18:49 memory.max_usage_in_bytes
-rw-r--r-- 1 root root 0 May 14 18:49 memory.memsw.failcnt
-rw-r--r-- 1 root root 0 May 14 18:49 memory.memsw.limit_in_bytes
-rw-r--r-- 1 root root 0 May 14 18:49 memory.memsw.max_usage_in_bytes
-r--r--r-- 1 root root 0 May 14 18:49 memory.memsw.usage_in_bytes
-rw-r--r-- 1 root root 0 May 14 18:49 memory.move_charge_at_immigrate
-r--r--r-- 1 root root 0 May 14 18:49 memory.numa_stat
-rw-r--r-- 1 root root 0 May 14 18:49 memory.oom_control
---------- 1 root root 0 May 14 18:49 memory.pressure_level
-rw-r--r-- 1 root root 0 May 14 18:49 memory.soft_limit_in_bytes
-r--r--r-- 1 root root 0 May 14 18:49 memory.stat
-rw-r--r-- 1 root root 0 May 14 18:49 memory.swappiness
-r--r--r-- 1 root root 0 May 14 18:49 memory.usage_in_bytes
-rw-r--r-- 1 root root 0 May 14 18:49 memory.use_hierarchy
-rw-r--r-- 1 root root 0 May 14 18:49 notify_on_release
-rw-r--r-- 1 root root 0 May 14 18:49 tasks
//todo 解释memory各文件含义。
原文:https://www.cnblogs.com/ericte/p/13613590.html