安装依赖:
yum -y install perl --nogpgcheck
yum -y install perf.x86_64 --nogpgcheck
git clone https://github.com/brendangregg/FlameGraph.git
将FlameGraph加入PATH:
echo "export PATH=`pwd`/FlameGraph:\$PATH" > /etc/profile.d/perf.sh
#或
cp -r `pwd`/FlameGraph/* /usr/local/bin/
echo $PATH | grep '/usr/local/bin' || export PATH=$PATH:/usr/local/bin
录制函数调用取样数据:
perf record -e cpu-clock -F 每秒取样频次 -p 进程ID -g -- sleep 取样秒数
也可以根据进程名
perf record -e cpu-clock -F 每秒取样频次 -p `ps aux | grep '进程名' | grep -v 'grep' | awk '{print $2}'` -g -- sleep 取样秒数
如果是多线程程序可以指定录制的线程ID
perf record -e cpu-clock -F 每秒取样频次 -p 线程ID -g -- sleep 取样秒数
生成On-CPU火焰图:
得到采样数据perf.data后再进行以下步骤:
perf script -i perf.data &> perf.unfold
stackcollapse-perf.pl perf.unfold &> perf.folded
flamegraph.pl perf.folded > perf.svg
将perf.svg用浏览器打开。
整理成脚本如下:
perf-record.sh
#/bin/sh
if [ "$1" == "" ]; then
echo "usage: $0 prog_name [sleep_sec]"
exit
fi
echo prog_name:$1
echo pid:`ps aux | grep "$1" | grep -v 'grep' | grep -v 'perf-record' | grep -v 'gdb' | awk '{print $2}'`
if [ "$2" != "" ]; then
sleep_time=$2
else
sleep_time=30
fi
echo sleep_time:$sleep_time sec
perf record -F 10000 -p `ps aux | grep $1 | grep -v 'grep' | grep -v 'perf-record' | grep -v 'gdb' | awk '{print $2}'` -g -- sleep $sleep_time
perf script -i perf.data &> perf.unfold
stackcollapse-perf.pl perf.unfold &> perf.folded
flamegraph.pl perf.folded > oncpu.svg
也可以通过perf命令实时查看top函数:
perf-top.sh
#/bin/sh
if [ "$1" == "" ]; then
echo "usage: $0 prog_name"
exit
fi
echo prog_name:$1
echo pid:`ps aux | grep "$1" | grep -v 'grep' | grep -v 'perf-top' | grep -v 'gdb' | awk '{print $2}'`
perf top -g -F 10000 -p `ps aux | grep $1 | grep -v 'grep' | grep -v 'perf-top' | grep -v 'gdb' | awk '{print $2}'`
生成Off-CPU火焰图:
以centos8为例,安装依赖:
yum install kernel-debug kernel-debug-devel --nogpgcheck
echo 1 > /proc/sys/kernel/sched_schedstats
生成off-cpu脚本:
perf-offcpu-time.sh(带时间转换)
#/bin/sh
if [ "$1" == "" ]; then
echo "usage: $0 prog_name [sleep_sec]"
exit
fi
pid=`ps aux | grep "$1" | grep -v 'grep' | grep -v 'perf-offcpu' | awk '{print $2}'`
echo prog_name:$1
echo pid:$pid
if [ "$2" != "" ]; then
sleep_time=$2
else
sleep_time=30
fi
echo sleep_time:$sleep_time sec
perf record -e sched:sched_stat_sleep -e sched:sched_switch \
-e sched:sched_stat_iowait -e sched:sched_process_exit \
-e sched:sched_stat_blocked -e sched:sched_stat_wait \
-g -o perf.data.raw -p $pid -- sleep $sleep_time
perf inject -v -s -i perf.data.raw -o perf.data
perf script -F comm,pid,tid,cpu,time,period,event,ip,sym,dso,trace | awk '
NF > 4 { exec = $1; period_ms = int($5 / 1000000) }
NF > 1 && NF <= 4 && period_ms > 0 { print $2 }
NF < 2 && period_ms > 0 { printf "%s\n%d\n\n", exec, period_ms }' | \
stackcollapse.pl | \
flamegraph.pl --countname=ms --title="Off-CPU Time Flame Graph" --colors=io > offcpu-time.svg
注意:Off-CPU 时间是所有线程的阻塞时间之和。如果你的程序有 N 个并发线程同时阻塞,每个都阻塞了 t 秒,那么总的 off-CPU 时间就是 N × t。
perf-offcpu-samples.sh(不带时间转换)
#/bin/sh
if [ "$1" == "" ]; then
echo "usage: $0 prog_name [sleep_sec]"
exit
fi
pid=`ps aux | grep $1 | grep -v 'grep' | grep -v 'perf-offcpu' | awk '{print $2}'`
echo prog_name:$1
echo pid:$pid
if [ "$2" != "" ]; then
sleep_time=$2
else
sleep_time=30
fi
echo sleep_time:$sleep_time sec
perf record -e sched:sched_stat_sleep -e sched:sched_switch \
-e sched:sched_stat_iowait -e sched:sched_process_exit \
-e sched:sched_stat_blocked -e sched:sched_stat_wait \
-g -o perf.data.raw -p $pid -- sleep $sleep_time
perf inject -v -s -i perf.data.raw -o perf.data
perf script | stackcollapse-perf.pl | flamegraph.pl --colors=io > offcpu-samples.svg
进程的Off-CPU时间统计:
cputime.sh:
#!/bin/sh
if [ "$1" == "" ]; then
echo "usage: $0 prog_name [sleep_sec]"
exit
fi
PID=`ps aux | grep "$1" | grep -v 'grep' | grep -v 'perf' | grep -v 'cputime' | awk '{print $2}'`
echo prog_name:$1
echo pid:$PID
if [ "$2" != "" ]; then
sleep_time=$2
else
sleep_time=30
fi
echo sleep_time:$sleep_time sec
# 第一次采样
stat1=$(cat /proc/$PID/stat)
utime1=$(echo "$stat1" | awk '{print $14}')
stime1=$(echo "$stat1" | awk '{print $15}')
ts1=$(date +%s%N)
# 等待
sleep $sleep_time
# 第二次采样
stat2=$(cat /proc/$PID/stat)
utime2=$(echo "$stat2" | awk '{print $14}')
stime2=$(echo "$stat2" | awk '{print $15}')
ts2=$(date +%s%N)
# 计算时间
elapsed_ns=$((ts2 - ts1))
elapsed=$(echo "scale=6; $elapsed_ns / 1000000000" | bc)
# 内核 3.10 时钟频率 100Hz:1 个 tick = 10ms
on_ticks=$((utime2 + stime2 - utime1 - stime1))
on_cpu=$(echo "scale=6; $on_ticks / 100" | bc)
off_cpu=$(echo "$elapsed - $on_cpu" | bc)
# 计算百分比
on_pct=$(echo "scale=2; $on_cpu / $elapsed * 100" | bc)
off_pct=$(echo "scale=2; $off_cpu / $elapsed * 100" | bc)
echo "elapsed:$elapsed sec"
echo "on-cpu:$on_cpu sec ($on_pct%)"
echo "off-cpu:$off_cpu sec ($off_pct%)"
遇到的问题:
ERROR: No stack counts found
或者是权限问题,或者是采样率太低,可以更换root权限或提高采样率试试。
perf的采用率默认有上限,需要root权限修改系统配置:
临时修改:
echo 0 > /proc/sys/kernel/perf_cpu_time_max_percent
echo 10000 > /proc/sys/kernel/perf_event_max_sample_rate
持久修改:
root权限编辑 /etc/sysctl.conf 文件,添加:
kernel.perf_event_paranoid = -1
kernel.perf_cpu_time_max_percent = 0
然后执行 sysctl -p,立刻生效。
--end--


3097

被折叠的 条评论
为什么被折叠?



