从Linux Kernel2.6.31版本开始,Linux内核开始提供一个叫__NR_perf_counter_open(最新的版本里叫__NR_perf_event_open)的系统调用。使用这个系统调用我们可以像使用文件一样打开一个Performance counter,通过设置不同的参数让这个Performance Counter统计不同的软件或硬件事件,然后就可以向读文件一样来读取这些事件的统计结果。比如我可以打开一个Performance Counter统计某一个进程的CPU Cache Miss次数。关于如何传递参数构造Performance Counter来统计不同的事件可以看这篇日志: http://tblog29.appspot.com/blog/1004
下面是我写的一个小程序,它为每个CPU和每个进程开一个Performance Counter,统计每个CPU上的Cache miss和每个进程上的Cache miss(不能统计每个进程在单个CPU上的事件,详见上边那篇日志)。本代码参考了 perf 的stat部分。运行需要CAP_SYS_ADMIN权限
1 mperf.h
/*
* eperf.h
*
* Created on: Jan 28, 2010
* Author: hchen
*/
#ifndef EPERF_H_
#define EPERF_H_
#include <time.h>
#include <asm/unistd.h>
#include "perf_event.h"
#define MAX_COUNTERS 256
#define MAX_NR_CPUS 32
#define PROC "/proc"
/*
* We define u64 as unsigned long long for every architecture
* so that we can print it with %Lx without getting warnings.
*/
typedef unsigned long long u64;
typedef signed long long s64;
typedef unsigned int u32;
typedef signed int s32;
typedef unsigned short u16;
typedef signed short s16;
typedef unsigned char u8;
typedef signed char s8;
static inline int
sys_perf_event_open(struct perf_event_attr *attr,
pid_t pid, int cpu, int group_fd,
unsigned long flags)
{
attr->size = sizeof(*attr);
//This system call is defined in asm/unistd.h, in the latest linux kernel
//it's name has been changed to __NR_perf_event_open .
return syscall(__NR_perf_counter_open, attr, pid, cpu, group_fd, flags);
}
#endif /* EPERF_H_ */
2 eperf.c
#include <time.h>
#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include <asm/unistd.h>
#include <dirent.h>
#include "eperf.h"
unsigned int verbose = 0;
//event to be countered
static struct perf_event_attr attrs[] = {
{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_MISSES }
};
int nr_counters = 0;
static unsigned int nr_cpus = 0; // amount of cpus
static int inherit = 1;
static int scale = 1;
//used to save performance counter
static int fd[MAX_COUNTERS];
/*
* Read out the results of a single counter:
*/
static void read_counter(int counter)
{
u64 single_count[3];
size_t res, nv;
if (fd[counter] <= 0)
return;
nv = scale ? 3 : 1;
res = read(fd[counter], single_count, nv * sizeof(u64));
if(res == nv * sizeof(u64)){
if(verbose)
printf("Counter %d: %llu\n", counter, single_count[0]);
}else{
fprintf(stderr, "Fail to read counter %d\n", counter);
}
}
void close_all_counters(){
int counter, tn;
tn = nr_cpus + nr_counters;
for (counter = 0; counter < tn; counter++){
if (fd[counter] <= 0)
continue;
close(fd[counter]);
fd[counter] = -1;
}
}
void run_perf_stat()
{
int counter, tn;
tn = nr_cpus + nr_counters;
for (counter = 0; counter < tn; counter++)
read_counter(counter);
}
static void create_perf_stat_counter(int counter, int pid, int system_wide)
{
struct perf_event_attr attr; //cache miss
memcpy(&attr, attrs, sizeof(struct perf_event_attr));
if (scale)
attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
PERF_FORMAT_TOTAL_TIME_RUNNING;
if (system_wide) {
unsigned int cpu;
for (cpu = 0; cpu < nr_cpus; cpu++) {
fd[cpu] = sys_perf_event_open(&attr, -1, cpu, -1, 0);
}
} else {
attr.inherit = inherit;
attr.disabled = 0;
attr.enable_on_exec = 1;
fd[counter + nr_cpus] = sys_perf_event_open(&attr, pid, -1, -1, 0);
}
}
int main(int argc, const char **argv)
{
if(argc > 1)
verbose = atoi(argv[1]);
DIR *dir;
struct dirent *drp;
int run_count, p, pid;
struct timespec tim, tim2;
tim.tv_sec = 1; tim.tv_nsec = 0;
nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);//the the number of CPU
int counter = 0;
/* Open /proc directory */
if ((dir = opendir(PROC)) == NULL) {
perror("opendir /proc error!");
exit(4);
}
//create counters for each CPU
create_perf_stat_counter(-1, 1, 1);
p = 0;
while ((p++) < 254) {
/* Get directory entries */
while ((drp = readdir(dir)) != NULL) {
if (isdigit(drp->d_name[0]))
break;
}
if (drp) {
pid = atoi(drp->d_name);
create_perf_stat_counter(counter, pid, 0);
if(fd[counter] != -1)
counter++;
}
}
nr_counters = counter - 1;
/* Close /proc directory */
closedir(dir);
run_count = 100;
// for (run_idx = 0; run_idx < run_count; run_idx++) {
while (1) {
nanosleep(&tim , &tim2);
run_perf_stat();
}
close_all_counters();
return 1;
}
分享到:
相关推荐
在计算机硬件优化领域,性能计数器(Performance Counter)扮演着至关重要的角色。它们是处理器内部的一种硬件机制,用于收集和记录特定事件的发生次数,如指令执行、缓存命中、分支预测等,从而帮助开发者深入理解...
在描述中提到的"Perf: amd_iommu - AMD IOMMU Performance Counter PMU implementation.",这里`Perf`代表Linux性能事件子系统,它是Linux内核中用于性能分析的一个工具。PMU(Performance Monitoring Unit)是硬件...
6. **性能计数器**:Windows 2000提供了丰富的性能计数器,开发者可以通过Performance Counter API来收集系统性能数据,如CPU使用率、内存使用情况、磁盘I/O等。 7. **权限和访问控制**:了解Windows的访问控制列表...
Intel的性能计数器(Performance Counter)是基于硬件级别的监控工具,它能够帮助开发者深入了解处理器的运行状态,进而优化软件性能。Intel自2006年起推出的酷睿(Core)系列处理器架构,包括Core Solo、Core Duo、...
7. **性能计数器(Performance Counter)**: .NET框架提供了`System.Diagnostics.PerformanceCounter`类,可以用来监控系统的各种性能指标,包括网络和磁盘I/O,这也能用于计算流量。 8. **线程安全(Thread Safety...
它允许软件开发者记录和分析应用程序、系统和服务中的事件,以调试问题、优化性能或理解系统行为。ETW提供了一种高效且低开销的方式来收集和分析系统事件。 ### 基本概念 1. **提供者(Provider)**: 提供者是ETW的...
6. **系统调用和API**:在某些情况下,BandWidthMonitor可能通过操作系统提供的API(如Windows的Performance Counter API或Linux的sysfs)来获取更精确的网络统计数据,而不是仅依赖于Java的网络套接字。 7. **事件...