Annotation of src/usr.bin/pctr/pctr.c, Revision 1.5
1.5 ! pvalchev 1: /* $OpenBSD: pctr.c,v 1.4 1998/08/30 22:35:37 downsj Exp $ */
1.1 dm 2:
3: /*
4: * Pentium performance counter control program for OpenBSD.
5: * Copyright 1996 David Mazieres <dm@lcs.mit.edu>.
6: *
7: * Modification and redistribution in source and binary forms is
8: * permitted provided that due credit is given to the author and the
1.5 ! pvalchev 9: * OpenBSD project by leaving this copyright notice intact.
1.1 dm 10: */
11:
12: #include <stdio.h>
13: #include <stdlib.h>
14: #include <string.h>
1.4 downsj 15: #include <sys/param.h>
1.1 dm 16: #include <sys/types.h>
17: #include <sys/stat.h>
1.4 downsj 18: #include <sys/sysctl.h>
1.1 dm 19: #include <sys/ioctl.h>
1.4 downsj 20: #include <err.h>
1.1 dm 21: #include <fcntl.h>
22: #include <machine/cpu.h>
23: #include <machine/pctr.h>
1.4 downsj 24: #include <machine/specialreg.h>
1.1 dm 25:
26: #define CFL_MESI 0x1 /* Unit mask accepts MESI encoding */
27: #define CFL_SA 0x2 /* Unit mask accepts Self/Any bit */
28: #define CFL_C0 0x4 /* Counter 0 only */
29: #define CFL_C1 0x8 /* Counter 1 only */
30:
1.4 downsj 31: /* Kernel cpuid values. */
32: int cpu_id, cpu_feature;
33: char cpu_vendor[16];
34:
35: int pctr_isintel;
36:
37: #define usetsc (cpu_feature & CPUID_TSC)
38: #define usep5ctr (pctr_isintel && (((cpu_id >> 8) & 15) == 5) && \
39: (((cpu_id >> 4) & 15) > 0))
40: #define usep6ctr (pctr_isintel && ((cpu_id >> 8) & 15) == 6)
41: #define cpufamily ((cpu_id >> 8) & 15)
42:
43: extern char *__progname;
44:
1.1 dm 45: struct ctrfn {
46: u_int fn;
47: int flags;
48: char *name;
49: char *desc;
50: };
51:
52: struct ctrfn p5fn[] = {
53: {0x00, 0, "Data read", NULL},
54: {0x01, 0, "Data write", NULL},
55: {0x02, 0, "Data TLB miss", NULL},
56: {0x03, 0, "Data read miss", NULL},
57: {0x04, 0, "Data write miss", NULL},
58: {0x05, 0, "Write (hit) to M or E state lines", NULL},
59: {0x06, 0, "Data cache lines written back", NULL},
60: {0x07, 0, "Data cache snoops", NULL},
61: {0x08, 0, "Data cache snoop hits", NULL},
62: {0x09, 0, "Memory accesses in both pipes", NULL},
63: {0x0a, 0, "Bank conflicts", NULL},
64: {0x0b, 0, "Misaligned data memory references", NULL},
65: {0x0c, 0, "Code read", NULL},
66: {0x0d, 0, "Code TLB miss", NULL},
67: {0x0e, 0, "Code cache miss", NULL},
68: {0x0f, 0, "Any segment register load", NULL},
69: {0x12, 0, "Branches", NULL},
70: {0x13, 0, "BTB hits", NULL},
71: {0x14, 0, "Taken branch or BTB hit", NULL},
72: {0x15, 0, "Pipeline flushes", NULL},
73: {0x16, 0, "Instructions executed", NULL},
74: {0x17, 0, "Instructions executed in the V-pipe", NULL},
75: {0x18, 0, "Bus utilization (clocks)", NULL},
76: {0x19, 0, "Pipeline stalled by write backup", NULL},
77: {0x1a, 0, "Pipeline stalled by data memory read", NULL},
78: {0x1b, 0, "Pipeline stalled by write to E or M line", NULL},
79: {0x1c, 0, "Locked bus cycle", NULL},
80: {0x1d, 0, "I/O read or write cycle", NULL},
81: {0x1e, 0, "Noncacheable memory references", NULL},
82: {0x1f, 0, "AGI (Address Generation Interlock)", NULL},
83: {0x22, 0, "Floating-point operations", NULL},
84: {0x23, 0, "Breakpoint 0 match", NULL},
85: {0x24, 0, "Breakpoint 1 match", NULL},
86: {0x25, 0, "Breakpoint 2 match", NULL},
87: {0x26, 0, "Breakpoint 3 match", NULL},
88: {0x27, 0, "Hardware interupts", NULL},
89: {0x28, 0, "Data read or data write", NULL},
90: {0x29, 0, "Data read miss or data write miss", NULL},
91: {0x0, 0, NULL, NULL},
92: };
93:
94: struct ctrfn p6fn[] = {
95: {0x03, 0, "LD_BLOCKS",
96: "Number of store buffer blocks."},
97: {0x04, 0, "SB_DRAINS",
98: "Number of store buffer drain cycles."},
1.2 dm 99: {0x05, 0, "MISALIGN_MEM_REF",
1.1 dm 100: "Number of misaligned data memory references."},
101: {0x06, 0, "SEGMENT_REG_LOADS",
102: "Number of segment register loads."},
103: {0x10, CFL_C0, "FP_COMP_OPS_EXE",
104: "Number of computational floating-point operations executed."},
105: {0x11, CFL_C1, "FP_ASSIST",
106: "Number of floating-point exception cases handled by microcode."},
107: {0x12, CFL_C1, "MUL",
108: "Number of multiplies."},
109: {0x13, CFL_C1, "DIV",
110: "Number of divides."},
111: {0x14, CFL_C0, "CYCLES_DIV_BUSY",
112: "Number of cycles during which the divider is busy."},
113: {0x21, 0, "L2_ADS",
114: "Number of L2 address strobes."},
115: {0x22, 0, "L2_DBUS_BUSY",
116: "Number of cycles durring which the data bus was busy."},
117: {0x23, 0, "L2_DBUS_BUSY_RD",
118: "Number of cycles during which the data bus was busy transferring "
119: "data from L2 to the processor."},
120: {0x24, 0, "L2_LINES_IN",
121: "Number of lines allocated in the L2."},
122: {0x25, 0, "L2_M_LINES_INM",
123: "Number of modified lines allocated in the L2."},
124: {0x26, 0, "L2_LINES_OUT",
125: "Number of lines removed from the L2 for any reason."},
126: {0x27, 0, "L2_M_LINES_OUTM",
127: "Number of modified lines removed from the L2 for any reason."},
128: {0x28, CFL_MESI, "L2_IFETCH",
129: "Number of L2 instruction fetches."},
130: {0x29, CFL_MESI, "L2_LD",
131: "Number of L2 data loads."},
132: {0x2a, CFL_MESI, "L2_ST",
133: "Number of L2 data stores."},
134: {0x2e, CFL_MESI, "L2_RQSTS",
135: "Number of L2 requests."},
136: {0x43, 0, "DATA_MEM_REFS",
137: "All memory references, both cacheable and non-cacheable."},
138: {0x45, 0, "DCU_LINES_IN",
139: "Total lines allocated in the DCU."},
140: {0x46, 0, "DCU_M_LINES_IN",
141: "Number of M state lines allocated in the DCU."},
142: {0x47, 0, "DCU_M_LINES_OUT",
143: "Number of M state lines evicted from the DCU. "
1.2 dm 144: "This includes evictions via snoop HITM, intervention or replacement"},
1.1 dm 145: {0x48, 0, "DCU_MISS_OUTSTANDING",
146: "Weighted number of cycles while a DCU miss is outstanding."},
147: {0x60, 0, "BUS_REQ_OUTSTANDING",
148: "Number of bus requests outstanding."},
149: {0x61, 0, "BUS_BNR_DRV",
150: "Number of bus clock cycles during which the processor is "
151: "driving the BNR pin."},
152: {0x62, CFL_SA, "BUS_DRDY_CLOCKS",
153: "Number of clocks during which DRDY is asserted."},
154: {0x63, CFL_SA, "BUS_LOCK_CLOCKS",
155: "Number of clocks during which LOCK is asserted."},
156: {0x64, 0, "BUS_DATA_RCV",
157: "Number of bus clock cycles during which the processor is "
158: "receiving data."},
159: {0x65, CFL_SA, "BUS_TRAN_BRD",
160: "Number of burst read transactions."},
161: {0x66, CFL_SA, "BUS_TRAN_RFO",
162: "Number of read for ownership transactions."},
163: {0x67, CFL_SA, "BUS_TRANS_WB",
164: "Number of write back transactions."},
165: {0x68, CFL_SA, "BUS_TRAN_IFETCH",
166: "Number of instruction fetch transactions."},
167: {0x69, CFL_SA, "BUS_TRAN_INVAL",
168: "Number of invalidate transactions."},
169: {0x6a, CFL_SA, "BUS_TRAN_PWR",
170: "Number of partial write transactions."},
171: {0x6b, CFL_SA, "BUS_TRANS_P",
172: "Number of partial transactions."},
173: {0x6c, CFL_SA, "BUS_TRANS_IO",
174: "Number of I/O transactions."},
175: {0x6d, CFL_SA, "BUS_TRAN_DEF",
176: "Number of deferred transactions."},
177: {0x6e, CFL_SA, "BUS_TRAN_BURST",
178: "Number of burst transactions."},
179: {0x6f, CFL_SA, "BUS_TRAN_MEM",
180: "Number of memory transactions."},
181: {0x70, CFL_SA, "BUS_TRAN_ANY",
182: "Number of all transactions."},
183: {0x79, 0, "CPU_CLK_UNHALTED",
184: "Number of cycles during which the processor is not halted."},
185: {0x7a, 0, "BUS_HIT_DRV",
186: "Number of bus clock cycles during which the processor is "
187: "driving the HIT pin."},
188: {0x7b, 0, "BUS_HITM_DRV",
189: "Number of bus clock cycles during which the processor is "
190: "driving the HITM pin."},
191: {0x7e, 0, "BUS_SNOOP_STALL",
192: "Number of clock cycles during which the bus is snoop stalled."},
193: {0x80, 0, "IFU_IFETCH",
194: "Number of instruction fetches, both cacheable and non-cacheable."},
195: {0x81, 0, "IFU_IFETCH_MISS",
196: "Number of instruction fetch misses."},
197: {0x85, 0, "ITLB_MISS",
198: "Number of ITLB misses."},
199: {0x86, 0, "IFU_MEM_STALL",
200: "Number of cycles that the instruction fetch pipe stage is stalled, "
201: "including cache mises, ITLB misses, ITLB faults, "
202: "and victim cache evictions"},
203: {0x87, 0, "ILD_STALL",
204: "Number of cycles that the instruction length decoder is stalled"},
205: {0xa2, 0, "RESOURCE_STALLS",
206: "Number of cycles during which there are resource-related stalls."},
207: {0xc0, 0, "INST_RETIRED",
208: "Number of instructions retired."},
209: {0xc1, CFL_C0, "FLOPS",
210: "Number of computational floating-point operations retired."},
211: {0xc2, 0, "UOPS_RETIRED",
212: "Number of UOPs retired."},
213: {0xc4, 0, "BR_INST_RETIRED",
214: "Number of branch instructions retired."},
215: {0xc5, 0, "BR_MISS_PRED_RETIRED",
216: "Number of mispredicted branches retired."},
217: {0xc6, 0, "CYCLES_INT_MASKED",
218: "Number of processor cycles for which interrupts are disabled."},
219: {0xc7, 0, "CYCLES_INT_PENDING_AND_MASKED",
220: "Number of processor cycles for which interrupts are disabled "
221: "and interrupts are pending."},
222: {0xc8, 0, "HW_INT_RX",
223: "Number of hardware interrupts received."},
224: {0xc9, 0, "BR_TAKEN_RETIRED",
225: "Number of taken branches retired."},
226: {0xca, 0, "BR_MISS_PRED_TAKEN_RET",
227: "Number of taken mispredictioned branches retired."},
228: {0xd0, 0, "INST_DECODER",
229: "Number of instructions decoded."},
230: {0xd2, 0, "PARTIAL_RAT_STALLS",
231: "Number of cycles or events for partial stalls."},
232: {0xe0, 0, "BR_INST_DECODED",
233: "Number of branch instructions decoded."},
234: {0xe2, 0, "BTB_MISSES",
235: "Number of branches that miss the BTB."},
236: {0xe4, 0, "BR_BOGUS",
237: "Number of bogus branches."},
238: {0xe6, 0, "BACLEARS",
239: "Number of times BACLEAR is asserted."},
240: {0x0, 0, NULL, NULL},
241: };
1.3 downsj 242:
1.1 dm 243: static void
244: printdesc (char *desc)
245: {
246: char *p;
247:
248: for (;;) {
249: while (*desc == ' ')
250: desc++;
251: if (strlen (desc) < 70) {
252: if (*desc)
253: printf (" %s\n", desc);
254: return;
255: }
256: p = desc + 72;
257: while (*--p != ' ')
258: ;
259: while (*--p == ' ')
260: ;
261: p++;
262: printf (" %.*s\n", p - desc, desc);
263: desc = p;
264: }
265:
266: }
267:
268: /* Print all possible counter functions */
269: static void
270: list (int fam)
271: {
272: struct ctrfn *cfnp;
273:
274: if (fam == 5)
275: cfnp = p5fn;
276: else if (fam == 6)
277: cfnp = p6fn;
278: else {
279: fprintf (stderr, "Unknown CPU family %d\n", fam);
280: exit (1);
281: }
282: printf ("Hardware counter functions for the %s:\n\n",
283: fam == 5 ? "Pentium" : "Pentium Pro");
284: for (; cfnp->name; cfnp++) {
285: printf ("%02x %s", cfnp->fn, cfnp->name);
286: if (cfnp->flags & CFL_MESI)
287: printf ("/mesi");
288: else if (cfnp->flags & CFL_SA)
289: printf ("/a");
290: if (cfnp->flags & CFL_C0)
291: printf (" (ctr0 only)");
292: if (cfnp->flags & CFL_C1)
293: printf (" (ctr1 only)");
294: printf ("\n");
295: if (cfnp->desc)
296: printdesc (cfnp->desc);
297: }
298: }
299:
300: struct ctrfn *
301: fn2cfnp (u_int family, u_int sel)
302: {
303: struct ctrfn *cfnp;
304:
305: if (family == 6) {
306: cfnp = p6fn;
307: sel &= 0xff;
308: }
309: else {
310: cfnp = p5fn;
311: sel &= 0x3f;
312: }
313: for (; cfnp->name; cfnp++)
314: if (cfnp->fn == sel)
315: return (cfnp);
316: return (NULL);
317: }
318:
319: static char *
320: fn2str (int family, u_int sel)
321: {
322: static char buf[128];
323: char um[9] = "";
324: char cm[6] = "";
325: struct ctrfn *cfnp;
326: u_int fn;
327:
328: if (family == 5) {
329: fn = sel & 0x3f;
330: cfnp = fn2cfnp (family, fn);
331: sprintf (buf, "%c%c%c %02x %s",
332: sel & P5CTR_C ? 'c' : '-',
333: sel & P5CTR_U ? 'u' : '-',
334: sel & P5CTR_K ? 'k' : '-',
335: fn, cfnp ? cfnp->name : "unknown function");
336: }
337: else if (family == 6) {
338: fn = sel & 0xff;
339: cfnp = fn2cfnp (family, fn);
340: if (cfnp && cfnp->flags & CFL_MESI)
341: sprintf (um, "/%c%c%c%c",
342: sel & P6CTR_UM_M ? 'm' : '-',
343: sel & P6CTR_UM_E ? 'e' : '-',
344: sel & P6CTR_UM_S ? 's' : '-',
345: sel & P6CTR_UM_I ? 'i' : '-');
346: else if (cfnp && cfnp->flags & CFL_SA)
347: sprintf (um, "/%c", sel & P6CTR_UM_A ? 'a' : '-');
348: if (sel >> 24)
349: sprintf (cm, "+%d", sel >> 24);
350: sprintf (buf, "%c%c%c%c %02x%s%s%*s %s",
351: sel & P6CTR_I ? 'i' : '-',
352: sel & P6CTR_E ? 'e' : '-',
353: sel & P6CTR_K ? 'k' : '-',
354: sel & P6CTR_U ? 'u' : '-',
355: fn, cm, um, 7 - (strlen (cm) + strlen (um)), "",
356: cfnp ? cfnp->name : "unknown function");
357: }
358: else
359: return (NULL);
360: return (buf);
361: }
362:
363: /* Print status of counters */
364: static void
365: readst (void)
366: {
367: int fd, i;
368: struct pctrst st;
369:
370: fd = open (_PATH_PCTR, O_RDONLY);
371: if (fd < 0) {
372: perror (_PATH_PCTR);
373: exit (1);
374: }
375: if (ioctl (fd, PCIOCRD, &st) < 0) {
376: perror ("PCIOCRD");
377: exit (1);
378: }
379: close (fd);
380:
1.4 downsj 381: if (usep5ctr || usep6ctr) {
382: for (i = 0; i < PCTR_NUM; i++)
383: printf (" ctr%d = %16qd [%s]\n", i, st.pctr_hwc[i],
384: fn2str (cpufamily, st.pctr_fn[i]));
385: }
1.1 dm 386: printf (" tsc = %16qd\n idl = %16qd\n", st.pctr_tsc, st.pctr_idl);
387: }
388:
389: static void
390: setctr (int ctr, u_int val)
391: {
392: int fd;
393:
394: fd = open (_PATH_PCTR, O_WRONLY);
395: if (fd < 0) {
396: perror (_PATH_PCTR);
397: exit (1);
398: }
399: if (ioctl (fd, PCIOCS0 + ctr, &val) < 0) {
400: perror ("PCIOCSn");
401: exit (1);
402: }
403: close (fd);
404: }
405:
406: static void
407: usage (void)
408: {
409: fprintf (stderr,
410: "usage:\n"
411: " %s\n"
412: " Read the counters.\n"
413: " %s -l [5|6]\n"
414: " List all possible counter functions for P5/P6.\n",
1.4 downsj 415: __progname, __progname);
416: if (usep5ctr)
1.1 dm 417: fprintf (stderr,
418: " %s -s {0|1} [-[c][u][k]] function\n"
419: " Configure counter.\n"
420: " 0/1 - counter to configure\n"
421: " c - count cycles not events\n"
422: " u - count events in user mode (ring 3)\n"
423: " k - count events in kernel mode (rings 0-2)\n",
1.4 downsj 424: __progname);
425: else if (usep6ctr)
1.1 dm 426: fprintf (stderr,
427: " %s -s {0|1} [-[i][e][k][u]] "
428: "function[+cm][/{[m][e][s][i]|[a]}]\n"
429: " Configure counter.\n"
430: " 0/1 - counter number to configure\n"
431: " i - invert cm\n"
432: " e - edge detect\n"
433: " k - count events in kernel mode (rings 0-2)\n"
434: " u - count events in user mode (ring 3)\n"
435: " cm - # events/cycle required to bump ctr\n"
436: " mesi - Modified/Exclusive/Shared/Invalid in cache\n"
1.4 downsj 437: " s/a - self generated/all events\n", __progname);
1.1 dm 438: exit (1);
439: }
440:
441:
442: int
443: main (int argc, char **argv)
444: {
445: int fd;
446: u_int ctr;
447: char *cp;
448: u_int fn, fl = 0;
449: char **ap;
450: int ac;
451: struct ctrfn *cfnp;
1.4 downsj 452: int mib[2];
453: size_t len;
1.1 dm 454:
1.4 downsj 455: /* Get the kernel cpuid return values. */
456: mib[0] = CTL_MACHDEP;
457: mib[1] = CPU_CPUVENDOR;
458: if (sysctl(mib, 2, NULL, &len, NULL, 0) == -1)
459: err(1, "sysctl CPU_CPUVENDOR");
460: if (len > sizeof(cpu_vendor)) /* Shouldn't ever happen. */
461: err(1, "sysctl CPU_CPUVENDOR too big");
462: if (sysctl(mib, 2, cpu_vendor, &len, NULL, 0) == -1)
463: err(1, "sysctl CPU_CPUVENDOR");
464:
465: mib[1] = CPU_CPUID;
466: len = sizeof(cpu_id);
467: if (sysctl(mib, 2, &cpu_id, &len, NULL, 0) == -1)
468: err(1, "sysctl CPU_CPUID");
469:
470: mib[1] = CPU_CPUFEATURE;
471: len = sizeof(cpu_feature);
472: if (sysctl(mib, 2, &cpu_feature, &len, NULL, 0) == -1)
473: err(1, "sysctl CPU_CPUFEATURE");
1.1 dm 474:
1.4 downsj 475: pctr_isintel = (strcmp(cpu_vendor, "GenuineIntel") == 0);
1.1 dm 476:
477: if (argc <= 1)
478: readst ();
479: else if (argc == 2 && !strcmp (argv[1], "-l"))
480: list (cpufamily);
481: else if (argc == 3 && !strcmp (argv[1], "-l"))
482: list (atoi (argv[2]));
483: else if (!strcmp (argv[1], "-s") && argc >= 4) {
484: ctr = atoi (argv[2]);
485: if (ctr >= PCTR_NUM)
486: usage ();
487: ap = &argv[3];
488: ac = argc - 3;
489:
1.4 downsj 490: if (usep6ctr)
1.1 dm 491: fl |= P6CTR_EN;
492: if (**ap == '-') {
493: cp = *ap;
1.4 downsj 494: if (usep6ctr)
1.1 dm 495: while (*++cp)
496: switch (*cp) {
497: case 'i':
498: fl |= P6CTR_I;
499: break;
500: case 'e':
501: fl |= P6CTR_E;
502: break;
503: case 'k':
504: fl |= P6CTR_K;
505: break;
506: case 'u':
507: fl |= P6CTR_U;
508: break;
509: default:
510: usage ();
511: }
1.4 downsj 512: else if(usep5ctr)
1.1 dm 513: while (*++cp)
514: switch (*cp) {
515: case 'c':
516: fl |= P5CTR_C;
517: break;
518: case 'k':
519: fl |= P5CTR_K;
520: break;
521: case 'u':
522: fl |= P5CTR_U;
523: break;
524: default:
525: usage ();
526: }
527: ap++;
528: ac--;
529: }
530: else {
1.4 downsj 531: if (usep6ctr)
1.1 dm 532: fl |= P6CTR_U|P6CTR_K;
1.4 downsj 533: else if (usep5ctr)
1.1 dm 534: fl |= P5CTR_U|P5CTR_K;
535: }
536:
537: if (!ac)
538: usage ();
539:
540: fn = strtoul (*ap, NULL, 16);
1.4 downsj 541: if ((usep6ctr && (fn & ~0xff)) || (!usep6ctr && (fn & ~0x3f)))
1.1 dm 542: usage ();
543: fl |= fn;
1.4 downsj 544: if (usep6ctr && (cp = strchr (*ap, '+'))) {
1.1 dm 545: cp++;
546: fn = strtol (cp, NULL, 0);
547: if (fn & ~0xff)
548: usage ();
549: fl |= (fn << 24);
550: }
551: cfnp = fn2cfnp (6, fl);
1.4 downsj 552: if (usep6ctr && cfnp && (cp = strchr (*ap, '/'))) {
1.1 dm 553: if (cfnp->flags & CFL_MESI)
554: while (*++cp)
555: switch (*cp) {
556: case 'm':
557: fl |= P6CTR_UM_M;
558: break;
559: case 'e':
560: fl |= P6CTR_UM_E;
561: break;
562: case 's':
563: fl |= P6CTR_UM_S;
564: break;
565: case 'i':
566: fl |= P6CTR_UM_I;
567: break;
568: default:
569: usage ();
570: }
571: else if (cfnp->flags & CFL_SA)
572: while (*++cp)
573: switch (*cp) {
574: case 'a':
575: fl |= P6CTR_UM_A;
576: break;
577: default:
578: usage ();
579: }
580: else
581: usage ();
582: }
583: else if (cfnp && (cfnp->flags & CFL_MESI))
584: fl |= P6CTR_UM_MESI;
585: ap++;
586: ac--;
587:
588: if (ac)
589: usage ();
590:
1.4 downsj 591: if (usep6ctr && ! (fl & 0xff))
1.1 dm 592: fl = 0;
593: setctr (ctr, fl);
594: }
595: else
596: usage ();
597:
598: return 0;
599: }