In [9]:
import ctypes
import os
import struct
import time

# Constants from the Linux kernel headers
PERF_TYPE_HARDWARE = 0
PERF_COUNT_HW_INSTRUCTIONS = 0x00
PERF_COUNT_HW_CPU_CYCLES = 0x01

PERF_EVENT_IOC_RESET = 0x24000001
PERF_EVENT_IOC_ENABLE = 0x24000000
PERF_EVENT_IOC_DISABLE = 0x24000002

class perf_event_attr(ctypes.Structure):
    _fields_ = [
        ("type", ctypes.c_uint),
        ("size", ctypes.c_uint),
        ("config", ctypes.c_ulonglong),
        ("sample_period", ctypes.c_ulonglong),
        ("sample_type", ctypes.c_ulonglong),
        ("read_format", ctypes.c_ulonglong),
        ("flags", ctypes.c_ulonglong),
        ("wakeup_events", ctypes.c_uint),
        ("bp_type", ctypes.c_uint),
        ("config1", ctypes.c_ulonglong),
        ("config2", ctypes.c_ulonglong),
        ("branch_sample_type", ctypes.c_ulonglong),
        ("sample_regs_user", ctypes.c_ulonglong),
        ("sample_stack_user", ctypes.c_uint),
        ("__reserved_2", ctypes.c_uint),
        ("__reserved_3", ctypes.c_ulonglong * 7),
    ]

libc = ctypes.CDLL('libc.so.6', use_errno=True)

# Define perf_event_open function
def perf_event_open(attr, pid, cpu, group_fd, flags):
    res = libc.syscall(298, ctypes.byref(attr), pid, cpu, group_fd, flags)
    if res == -1:
        errno = ctypes.get_errno()
        raise OSError(errno, os.strerror(errno))
    return res

# Create and configure perf_event_attr structures
pe_cycles = perf_event_attr()
pe_cycles.type = PERF_TYPE_HARDWARE
pe_cycles.size = ctypes.sizeof(perf_event_attr)
pe_cycles.config = PERF_COUNT_HW_CPU_CYCLES

pe_instructions = perf_event_attr()
pe_instructions.type = PERF_TYPE_HARDWARE
pe_instructions.size = ctypes.sizeof(perf_event_attr)
pe_instructions.config = PERF_COUNT_HW_INSTRUCTIONS

# Open performance counters
fd_cycles = perf_event_open(pe_cycles, 0, -1, -1, 0)
fd_instructions = perf_event_open(pe_instructions, 0, -1, -1, 0)

# Enable the counters
libc.ioctl(fd_cycles, PERF_EVENT_IOC_RESET, 0)
libc.ioctl(fd_instructions, PERF_EVENT_IOC_RESET, 0)
libc.ioctl(fd_cycles, PERF_EVENT_IOC_ENABLE, 0)
libc.ioctl(fd_instructions, PERF_EVENT_IOC_ENABLE, 0)

# Sleep for a short time to allow some instructions and cycles to occur
time.sleep(0.1)

# Disable the counters
libc.ioctl(fd_cycles, PERF_EVENT_IOC_DISABLE, 0)
libc.ioctl(fd_instructions, PERF_EVENT_IOC_DISABLE, 0)

# Measure the time taken to read and calculate IPC
start_time = time.time()

# Read the counters using os.read
instructions = os.read(fd_instructions, ctypes.sizeof(ctypes.c_longlong()))
cycles = os.read(fd_cycles, ctypes.sizeof(ctypes.c_longlong()))

# Unpack the values into Python integers
instructions = struct.unpack('q', instructions)[0]
cycles = struct.unpack('q', cycles)[0]

# Calculate IPC
ipc = instructions / cycles if cycles != 0 else 0

# End time measurement
end_time = time.time()

# Calculate the time taken in microseconds
elapsed_time_us = (end_time - start_time) * 1e6

# Output the results
print(f"Instructions: {instructions}")
print(f"Cycles: {cycles}")
print(f"IPC: {ipc}")
print(f"Time taken to read and calculate: {elapsed_time_us:.2f} microseconds")

# Close file descriptors
os.close(fd_cycles)
os.close(fd_instructions)


Instructions: 626168
Cycles: 1444175
IPC: 0.43358180275936087
Time taken to read and calculate: 463.96 microseconds


In [8]:
import subprocess
import time

# The perf command to measure IPC
perf_command = ['perf', 'stat', '-e', 'instructions,cycles', '-a', 'sleep', '0.1']

# Record the start time
start_time = time.time()

# Call perf using subprocess
result = subprocess.run(perf_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)

# Record the end time
end_time = time.time()

# Calculate the elapsed time in seconds
elapsed_time = end_time - start_time

# Print the output of the perf command
print("perf command output:")
print(result.stderr)

# Print the time taken to execute the perf command
print(f"Time taken to execute perf: {elapsed_time:.4f} seconds")


perf command output:

 Performance counter stats for 'system wide':

          18452978      instructions              #    1.24  insn per cycle         
          14837521      cycles                                                      

       0.108567427 seconds time elapsed


Time taken to execute perf: 0.2363 seconds
