Annotation of src/usr.bin/pctr/pctr.c, Revision 1.4
1.4 ! downsj 1: /* $OpenBSD: pctr.c,v 1.3 1998/05/27 02:26:07 downsj Exp $ */
1.1 dm 2:
3: /*
4: * Pentium performance counter control program for OpenBSD.
5: * Copyright 1996 David Mazieres <dm@lcs.mit.edu>.
6: *
7: * Modification and redistribution in source and binary forms is
8: * permitted provided that due credit is given to the author and the
9: * OpenBSD project (for instance by leaving this copyright notice
10: * intact).
11: */
12:
13: #include <stdio.h>
14: #include <stdlib.h>
15: #include <string.h>
1.4 ! downsj 16: #include <sys/param.h>
1.1 dm 17: #include <sys/types.h>
18: #include <sys/stat.h>
1.4 ! downsj 19: #include <sys/sysctl.h>
1.1 dm 20: #include <sys/ioctl.h>
1.4 ! downsj 21: #include <err.h>
1.1 dm 22: #include <fcntl.h>
23: #include <machine/cpu.h>
24: #include <machine/pctr.h>
1.4 ! downsj 25: #include <machine/specialreg.h>
1.1 dm 26:
27: #define CFL_MESI 0x1 /* Unit mask accepts MESI encoding */
28: #define CFL_SA 0x2 /* Unit mask accepts Self/Any bit */
29: #define CFL_C0 0x4 /* Counter 0 only */
30: #define CFL_C1 0x8 /* Counter 1 only */
31:
1.4 ! downsj 32: /* Kernel cpuid values. */
! 33: int cpu_id, cpu_feature;
! 34: char cpu_vendor[16];
! 35:
! 36: int pctr_isintel;
! 37:
! 38: #define usetsc (cpu_feature & CPUID_TSC)
! 39: #define usep5ctr (pctr_isintel && (((cpu_id >> 8) & 15) == 5) && \
! 40: (((cpu_id >> 4) & 15) > 0))
! 41: #define usep6ctr (pctr_isintel && ((cpu_id >> 8) & 15) == 6)
! 42: #define cpufamily ((cpu_id >> 8) & 15)
! 43:
! 44: extern char *__progname;
! 45:
1.1 dm 46: struct ctrfn {
47: u_int fn;
48: int flags;
49: char *name;
50: char *desc;
51: };
52:
53: struct ctrfn p5fn[] = {
54: {0x00, 0, "Data read", NULL},
55: {0x01, 0, "Data write", NULL},
56: {0x02, 0, "Data TLB miss", NULL},
57: {0x03, 0, "Data read miss", NULL},
58: {0x04, 0, "Data write miss", NULL},
59: {0x05, 0, "Write (hit) to M or E state lines", NULL},
60: {0x06, 0, "Data cache lines written back", NULL},
61: {0x07, 0, "Data cache snoops", NULL},
62: {0x08, 0, "Data cache snoop hits", NULL},
63: {0x09, 0, "Memory accesses in both pipes", NULL},
64: {0x0a, 0, "Bank conflicts", NULL},
65: {0x0b, 0, "Misaligned data memory references", NULL},
66: {0x0c, 0, "Code read", NULL},
67: {0x0d, 0, "Code TLB miss", NULL},
68: {0x0e, 0, "Code cache miss", NULL},
69: {0x0f, 0, "Any segment register load", NULL},
70: {0x12, 0, "Branches", NULL},
71: {0x13, 0, "BTB hits", NULL},
72: {0x14, 0, "Taken branch or BTB hit", NULL},
73: {0x15, 0, "Pipeline flushes", NULL},
74: {0x16, 0, "Instructions executed", NULL},
75: {0x17, 0, "Instructions executed in the V-pipe", NULL},
76: {0x18, 0, "Bus utilization (clocks)", NULL},
77: {0x19, 0, "Pipeline stalled by write backup", NULL},
78: {0x1a, 0, "Pipeline stalled by data memory read", NULL},
79: {0x1b, 0, "Pipeline stalled by write to E or M line", NULL},
80: {0x1c, 0, "Locked bus cycle", NULL},
81: {0x1d, 0, "I/O read or write cycle", NULL},
82: {0x1e, 0, "Noncacheable memory references", NULL},
83: {0x1f, 0, "AGI (Address Generation Interlock)", NULL},
84: {0x22, 0, "Floating-point operations", NULL},
85: {0x23, 0, "Breakpoint 0 match", NULL},
86: {0x24, 0, "Breakpoint 1 match", NULL},
87: {0x25, 0, "Breakpoint 2 match", NULL},
88: {0x26, 0, "Breakpoint 3 match", NULL},
89: {0x27, 0, "Hardware interupts", NULL},
90: {0x28, 0, "Data read or data write", NULL},
91: {0x29, 0, "Data read miss or data write miss", NULL},
92: {0x0, 0, NULL, NULL},
93: };
94:
95: struct ctrfn p6fn[] = {
96: {0x03, 0, "LD_BLOCKS",
97: "Number of store buffer blocks."},
98: {0x04, 0, "SB_DRAINS",
99: "Number of store buffer drain cycles."},
1.2 dm 100: {0x05, 0, "MISALIGN_MEM_REF",
1.1 dm 101: "Number of misaligned data memory references."},
102: {0x06, 0, "SEGMENT_REG_LOADS",
103: "Number of segment register loads."},
104: {0x10, CFL_C0, "FP_COMP_OPS_EXE",
105: "Number of computational floating-point operations executed."},
106: {0x11, CFL_C1, "FP_ASSIST",
107: "Number of floating-point exception cases handled by microcode."},
108: {0x12, CFL_C1, "MUL",
109: "Number of multiplies."},
110: {0x13, CFL_C1, "DIV",
111: "Number of divides."},
112: {0x14, CFL_C0, "CYCLES_DIV_BUSY",
113: "Number of cycles during which the divider is busy."},
114: {0x21, 0, "L2_ADS",
115: "Number of L2 address strobes."},
116: {0x22, 0, "L2_DBUS_BUSY",
117: "Number of cycles durring which the data bus was busy."},
118: {0x23, 0, "L2_DBUS_BUSY_RD",
119: "Number of cycles during which the data bus was busy transferring "
120: "data from L2 to the processor."},
121: {0x24, 0, "L2_LINES_IN",
122: "Number of lines allocated in the L2."},
123: {0x25, 0, "L2_M_LINES_INM",
124: "Number of modified lines allocated in the L2."},
125: {0x26, 0, "L2_LINES_OUT",
126: "Number of lines removed from the L2 for any reason."},
127: {0x27, 0, "L2_M_LINES_OUTM",
128: "Number of modified lines removed from the L2 for any reason."},
129: {0x28, CFL_MESI, "L2_IFETCH",
130: "Number of L2 instruction fetches."},
131: {0x29, CFL_MESI, "L2_LD",
132: "Number of L2 data loads."},
133: {0x2a, CFL_MESI, "L2_ST",
134: "Number of L2 data stores."},
135: {0x2e, CFL_MESI, "L2_RQSTS",
136: "Number of L2 requests."},
137: {0x43, 0, "DATA_MEM_REFS",
138: "All memory references, both cacheable and non-cacheable."},
139: {0x45, 0, "DCU_LINES_IN",
140: "Total lines allocated in the DCU."},
141: {0x46, 0, "DCU_M_LINES_IN",
142: "Number of M state lines allocated in the DCU."},
143: {0x47, 0, "DCU_M_LINES_OUT",
144: "Number of M state lines evicted from the DCU. "
1.2 dm 145: "This includes evictions via snoop HITM, intervention or replacement"},
1.1 dm 146: {0x48, 0, "DCU_MISS_OUTSTANDING",
147: "Weighted number of cycles while a DCU miss is outstanding."},
148: {0x60, 0, "BUS_REQ_OUTSTANDING",
149: "Number of bus requests outstanding."},
150: {0x61, 0, "BUS_BNR_DRV",
151: "Number of bus clock cycles during which the processor is "
152: "driving the BNR pin."},
153: {0x62, CFL_SA, "BUS_DRDY_CLOCKS",
154: "Number of clocks during which DRDY is asserted."},
155: {0x63, CFL_SA, "BUS_LOCK_CLOCKS",
156: "Number of clocks during which LOCK is asserted."},
157: {0x64, 0, "BUS_DATA_RCV",
158: "Number of bus clock cycles during which the processor is "
159: "receiving data."},
160: {0x65, CFL_SA, "BUS_TRAN_BRD",
161: "Number of burst read transactions."},
162: {0x66, CFL_SA, "BUS_TRAN_RFO",
163: "Number of read for ownership transactions."},
164: {0x67, CFL_SA, "BUS_TRANS_WB",
165: "Number of write back transactions."},
166: {0x68, CFL_SA, "BUS_TRAN_IFETCH",
167: "Number of instruction fetch transactions."},
168: {0x69, CFL_SA, "BUS_TRAN_INVAL",
169: "Number of invalidate transactions."},
170: {0x6a, CFL_SA, "BUS_TRAN_PWR",
171: "Number of partial write transactions."},
172: {0x6b, CFL_SA, "BUS_TRANS_P",
173: "Number of partial transactions."},
174: {0x6c, CFL_SA, "BUS_TRANS_IO",
175: "Number of I/O transactions."},
176: {0x6d, CFL_SA, "BUS_TRAN_DEF",
177: "Number of deferred transactions."},
178: {0x6e, CFL_SA, "BUS_TRAN_BURST",
179: "Number of burst transactions."},
180: {0x6f, CFL_SA, "BUS_TRAN_MEM",
181: "Number of memory transactions."},
182: {0x70, CFL_SA, "BUS_TRAN_ANY",
183: "Number of all transactions."},
184: {0x79, 0, "CPU_CLK_UNHALTED",
185: "Number of cycles during which the processor is not halted."},
186: {0x7a, 0, "BUS_HIT_DRV",
187: "Number of bus clock cycles during which the processor is "
188: "driving the HIT pin."},
189: {0x7b, 0, "BUS_HITM_DRV",
190: "Number of bus clock cycles during which the processor is "
191: "driving the HITM pin."},
192: {0x7e, 0, "BUS_SNOOP_STALL",
193: "Number of clock cycles during which the bus is snoop stalled."},
194: {0x80, 0, "IFU_IFETCH",
195: "Number of instruction fetches, both cacheable and non-cacheable."},
196: {0x81, 0, "IFU_IFETCH_MISS",
197: "Number of instruction fetch misses."},
198: {0x85, 0, "ITLB_MISS",
199: "Number of ITLB misses."},
200: {0x86, 0, "IFU_MEM_STALL",
201: "Number of cycles that the instruction fetch pipe stage is stalled, "
202: "including cache mises, ITLB misses, ITLB faults, "
203: "and victim cache evictions"},
204: {0x87, 0, "ILD_STALL",
205: "Number of cycles that the instruction length decoder is stalled"},
206: {0xa2, 0, "RESOURCE_STALLS",
207: "Number of cycles during which there are resource-related stalls."},
208: {0xc0, 0, "INST_RETIRED",
209: "Number of instructions retired."},
210: {0xc1, CFL_C0, "FLOPS",
211: "Number of computational floating-point operations retired."},
212: {0xc2, 0, "UOPS_RETIRED",
213: "Number of UOPs retired."},
214: {0xc4, 0, "BR_INST_RETIRED",
215: "Number of branch instructions retired."},
216: {0xc5, 0, "BR_MISS_PRED_RETIRED",
217: "Number of mispredicted branches retired."},
218: {0xc6, 0, "CYCLES_INT_MASKED",
219: "Number of processor cycles for which interrupts are disabled."},
220: {0xc7, 0, "CYCLES_INT_PENDING_AND_MASKED",
221: "Number of processor cycles for which interrupts are disabled "
222: "and interrupts are pending."},
223: {0xc8, 0, "HW_INT_RX",
224: "Number of hardware interrupts received."},
225: {0xc9, 0, "BR_TAKEN_RETIRED",
226: "Number of taken branches retired."},
227: {0xca, 0, "BR_MISS_PRED_TAKEN_RET",
228: "Number of taken mispredictioned branches retired."},
229: {0xd0, 0, "INST_DECODER",
230: "Number of instructions decoded."},
231: {0xd2, 0, "PARTIAL_RAT_STALLS",
232: "Number of cycles or events for partial stalls."},
233: {0xe0, 0, "BR_INST_DECODED",
234: "Number of branch instructions decoded."},
235: {0xe2, 0, "BTB_MISSES",
236: "Number of branches that miss the BTB."},
237: {0xe4, 0, "BR_BOGUS",
238: "Number of bogus branches."},
239: {0xe6, 0, "BACLEARS",
240: "Number of times BACLEAR is asserted."},
241: {0x0, 0, NULL, NULL},
242: };
1.3 downsj 243:
1.1 dm 244: static void
245: printdesc (char *desc)
246: {
247: char *p;
248:
249: for (;;) {
250: while (*desc == ' ')
251: desc++;
252: if (strlen (desc) < 70) {
253: if (*desc)
254: printf (" %s\n", desc);
255: return;
256: }
257: p = desc + 72;
258: while (*--p != ' ')
259: ;
260: while (*--p == ' ')
261: ;
262: p++;
263: printf (" %.*s\n", p - desc, desc);
264: desc = p;
265: }
266:
267: }
268:
269: /* Print all possible counter functions */
270: static void
271: list (int fam)
272: {
273: struct ctrfn *cfnp;
274:
275: if (fam == 5)
276: cfnp = p5fn;
277: else if (fam == 6)
278: cfnp = p6fn;
279: else {
280: fprintf (stderr, "Unknown CPU family %d\n", fam);
281: exit (1);
282: }
283: printf ("Hardware counter functions for the %s:\n\n",
284: fam == 5 ? "Pentium" : "Pentium Pro");
285: for (; cfnp->name; cfnp++) {
286: printf ("%02x %s", cfnp->fn, cfnp->name);
287: if (cfnp->flags & CFL_MESI)
288: printf ("/mesi");
289: else if (cfnp->flags & CFL_SA)
290: printf ("/a");
291: if (cfnp->flags & CFL_C0)
292: printf (" (ctr0 only)");
293: if (cfnp->flags & CFL_C1)
294: printf (" (ctr1 only)");
295: printf ("\n");
296: if (cfnp->desc)
297: printdesc (cfnp->desc);
298: }
299: }
300:
301: struct ctrfn *
302: fn2cfnp (u_int family, u_int sel)
303: {
304: struct ctrfn *cfnp;
305:
306: if (family == 6) {
307: cfnp = p6fn;
308: sel &= 0xff;
309: }
310: else {
311: cfnp = p5fn;
312: sel &= 0x3f;
313: }
314: for (; cfnp->name; cfnp++)
315: if (cfnp->fn == sel)
316: return (cfnp);
317: return (NULL);
318: }
319:
320: static char *
321: fn2str (int family, u_int sel)
322: {
323: static char buf[128];
324: char um[9] = "";
325: char cm[6] = "";
326: struct ctrfn *cfnp;
327: u_int fn;
328:
329: if (family == 5) {
330: fn = sel & 0x3f;
331: cfnp = fn2cfnp (family, fn);
332: sprintf (buf, "%c%c%c %02x %s",
333: sel & P5CTR_C ? 'c' : '-',
334: sel & P5CTR_U ? 'u' : '-',
335: sel & P5CTR_K ? 'k' : '-',
336: fn, cfnp ? cfnp->name : "unknown function");
337: }
338: else if (family == 6) {
339: fn = sel & 0xff;
340: cfnp = fn2cfnp (family, fn);
341: if (cfnp && cfnp->flags & CFL_MESI)
342: sprintf (um, "/%c%c%c%c",
343: sel & P6CTR_UM_M ? 'm' : '-',
344: sel & P6CTR_UM_E ? 'e' : '-',
345: sel & P6CTR_UM_S ? 's' : '-',
346: sel & P6CTR_UM_I ? 'i' : '-');
347: else if (cfnp && cfnp->flags & CFL_SA)
348: sprintf (um, "/%c", sel & P6CTR_UM_A ? 'a' : '-');
349: if (sel >> 24)
350: sprintf (cm, "+%d", sel >> 24);
351: sprintf (buf, "%c%c%c%c %02x%s%s%*s %s",
352: sel & P6CTR_I ? 'i' : '-',
353: sel & P6CTR_E ? 'e' : '-',
354: sel & P6CTR_K ? 'k' : '-',
355: sel & P6CTR_U ? 'u' : '-',
356: fn, cm, um, 7 - (strlen (cm) + strlen (um)), "",
357: cfnp ? cfnp->name : "unknown function");
358: }
359: else
360: return (NULL);
361: return (buf);
362: }
363:
364: /* Print status of counters */
365: static void
366: readst (void)
367: {
368: int fd, i;
369: struct pctrst st;
370:
371: fd = open (_PATH_PCTR, O_RDONLY);
372: if (fd < 0) {
373: perror (_PATH_PCTR);
374: exit (1);
375: }
376: if (ioctl (fd, PCIOCRD, &st) < 0) {
377: perror ("PCIOCRD");
378: exit (1);
379: }
380: close (fd);
381:
1.4 ! downsj 382: if (usep5ctr || usep6ctr) {
! 383: for (i = 0; i < PCTR_NUM; i++)
! 384: printf (" ctr%d = %16qd [%s]\n", i, st.pctr_hwc[i],
! 385: fn2str (cpufamily, st.pctr_fn[i]));
! 386: }
1.1 dm 387: printf (" tsc = %16qd\n idl = %16qd\n", st.pctr_tsc, st.pctr_idl);
388: }
389:
390: static void
391: setctr (int ctr, u_int val)
392: {
393: int fd;
394:
395: fd = open (_PATH_PCTR, O_WRONLY);
396: if (fd < 0) {
397: perror (_PATH_PCTR);
398: exit (1);
399: }
400: if (ioctl (fd, PCIOCS0 + ctr, &val) < 0) {
401: perror ("PCIOCSn");
402: exit (1);
403: }
404: close (fd);
405: }
406:
407: static void
408: usage (void)
409: {
410: fprintf (stderr,
411: "usage:\n"
412: " %s\n"
413: " Read the counters.\n"
414: " %s -l [5|6]\n"
415: " List all possible counter functions for P5/P6.\n",
1.4 ! downsj 416: __progname, __progname);
! 417: if (usep5ctr)
1.1 dm 418: fprintf (stderr,
419: " %s -s {0|1} [-[c][u][k]] function\n"
420: " Configure counter.\n"
421: " 0/1 - counter to configure\n"
422: " c - count cycles not events\n"
423: " u - count events in user mode (ring 3)\n"
424: " k - count events in kernel mode (rings 0-2)\n",
1.4 ! downsj 425: __progname);
! 426: else if (usep6ctr)
1.1 dm 427: fprintf (stderr,
428: " %s -s {0|1} [-[i][e][k][u]] "
429: "function[+cm][/{[m][e][s][i]|[a]}]\n"
430: " Configure counter.\n"
431: " 0/1 - counter number to configure\n"
432: " i - invert cm\n"
433: " e - edge detect\n"
434: " k - count events in kernel mode (rings 0-2)\n"
435: " u - count events in user mode (ring 3)\n"
436: " cm - # events/cycle required to bump ctr\n"
437: " mesi - Modified/Exclusive/Shared/Invalid in cache\n"
1.4 ! downsj 438: " s/a - self generated/all events\n", __progname);
1.1 dm 439: exit (1);
440: }
441:
442:
443: int
444: main (int argc, char **argv)
445: {
446: int fd;
447: u_int ctr;
448: char *cp;
449: u_int fn, fl = 0;
450: char **ap;
451: int ac;
452: struct ctrfn *cfnp;
1.4 ! downsj 453: int mib[2];
! 454: size_t len;
1.1 dm 455:
1.4 ! downsj 456: /* Get the kernel cpuid return values. */
! 457: mib[0] = CTL_MACHDEP;
! 458: mib[1] = CPU_CPUVENDOR;
! 459: if (sysctl(mib, 2, NULL, &len, NULL, 0) == -1)
! 460: err(1, "sysctl CPU_CPUVENDOR");
! 461: if (len > sizeof(cpu_vendor)) /* Shouldn't ever happen. */
! 462: err(1, "sysctl CPU_CPUVENDOR too big");
! 463: if (sysctl(mib, 2, cpu_vendor, &len, NULL, 0) == -1)
! 464: err(1, "sysctl CPU_CPUVENDOR");
! 465:
! 466: mib[1] = CPU_CPUID;
! 467: len = sizeof(cpu_id);
! 468: if (sysctl(mib, 2, &cpu_id, &len, NULL, 0) == -1)
! 469: err(1, "sysctl CPU_CPUID");
! 470:
! 471: mib[1] = CPU_CPUFEATURE;
! 472: len = sizeof(cpu_feature);
! 473: if (sysctl(mib, 2, &cpu_feature, &len, NULL, 0) == -1)
! 474: err(1, "sysctl CPU_CPUFEATURE");
1.1 dm 475:
1.4 ! downsj 476: pctr_isintel = (strcmp(cpu_vendor, "GenuineIntel") == 0);
1.1 dm 477:
478: if (argc <= 1)
479: readst ();
480: else if (argc == 2 && !strcmp (argv[1], "-l"))
481: list (cpufamily);
482: else if (argc == 3 && !strcmp (argv[1], "-l"))
483: list (atoi (argv[2]));
484: else if (!strcmp (argv[1], "-s") && argc >= 4) {
485: ctr = atoi (argv[2]);
486: if (ctr >= PCTR_NUM)
487: usage ();
488: ap = &argv[3];
489: ac = argc - 3;
490:
1.4 ! downsj 491: if (usep6ctr)
1.1 dm 492: fl |= P6CTR_EN;
493: if (**ap == '-') {
494: cp = *ap;
1.4 ! downsj 495: if (usep6ctr)
1.1 dm 496: while (*++cp)
497: switch (*cp) {
498: case 'i':
499: fl |= P6CTR_I;
500: break;
501: case 'e':
502: fl |= P6CTR_E;
503: break;
504: case 'k':
505: fl |= P6CTR_K;
506: break;
507: case 'u':
508: fl |= P6CTR_U;
509: break;
510: default:
511: usage ();
512: }
1.4 ! downsj 513: else if(usep5ctr)
1.1 dm 514: while (*++cp)
515: switch (*cp) {
516: case 'c':
517: fl |= P5CTR_C;
518: break;
519: case 'k':
520: fl |= P5CTR_K;
521: break;
522: case 'u':
523: fl |= P5CTR_U;
524: break;
525: default:
526: usage ();
527: }
528: ap++;
529: ac--;
530: }
531: else {
1.4 ! downsj 532: if (usep6ctr)
1.1 dm 533: fl |= P6CTR_U|P6CTR_K;
1.4 ! downsj 534: else if (usep5ctr)
1.1 dm 535: fl |= P5CTR_U|P5CTR_K;
536: }
537:
538: if (!ac)
539: usage ();
540:
541: fn = strtoul (*ap, NULL, 16);
1.4 ! downsj 542: if ((usep6ctr && (fn & ~0xff)) || (!usep6ctr && (fn & ~0x3f)))
1.1 dm 543: usage ();
544: fl |= fn;
1.4 ! downsj 545: if (usep6ctr && (cp = strchr (*ap, '+'))) {
1.1 dm 546: cp++;
547: fn = strtol (cp, NULL, 0);
548: if (fn & ~0xff)
549: usage ();
550: fl |= (fn << 24);
551: }
552: cfnp = fn2cfnp (6, fl);
1.4 ! downsj 553: if (usep6ctr && cfnp && (cp = strchr (*ap, '/'))) {
1.1 dm 554: if (cfnp->flags & CFL_MESI)
555: while (*++cp)
556: switch (*cp) {
557: case 'm':
558: fl |= P6CTR_UM_M;
559: break;
560: case 'e':
561: fl |= P6CTR_UM_E;
562: break;
563: case 's':
564: fl |= P6CTR_UM_S;
565: break;
566: case 'i':
567: fl |= P6CTR_UM_I;
568: break;
569: default:
570: usage ();
571: }
572: else if (cfnp->flags & CFL_SA)
573: while (*++cp)
574: switch (*cp) {
575: case 'a':
576: fl |= P6CTR_UM_A;
577: break;
578: default:
579: usage ();
580: }
581: else
582: usage ();
583: }
584: else if (cfnp && (cfnp->flags & CFL_MESI))
585: fl |= P6CTR_UM_MESI;
586: ap++;
587: ac--;
588:
589: if (ac)
590: usage ();
591:
1.4 ! downsj 592: if (usep6ctr && ! (fl & 0xff))
1.1 dm 593: fl = 0;
594: setctr (ctr, fl);
595: }
596: else
597: usage ();
598:
599: return 0;
600: }