In [None]:
import polars as pl
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.ticker import ScalarFormatter

## Build XED
```
git clone https://github.com/intelxed/xed.git xed
git clone https://github.com/intelxed/mbuild.git mbuild
cd xed
./mfile.py
./mfile.py examples
./obj/wkit/bin/xed -version
```

## Mapping objdump address to run-time address
```
perf script --show-mmap-events
```

For example
```
      all_in_one 1307309 [-01] 1521131.637903: PERF_RECORD_MMAP2 1307309/1307309: [0x55daaf6dd000(0x1be000) @ 0x22000 103:02 26643848 1208969036]: r-xp /home/zixianc/hwgc_soft/builds/all_in_one
      all_in_one 1307309 [-01] 1521131.637915: PERF_RECORD_MMAP2 1307309/1307309: [0x7f5df5d03000(0x2a000) @ 0x2000 103:02 13384699 1313450352]: r-xp /usr/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2
      all_in_one 1307309 [-01] 1521131.637922: PERF_RECORD_MMAP2 1307309/1307309: [0x7fffaddb8000(0x2000) @ 0 00:00 0 0]: r-xp [vdso]
      all_in_one 1307309 [-01] 1521131.638009: PERF_RECORD_MMAP2 1307309/1307309: [0x7f5df5cd7000(0x17000) @ 0x3000 103:02 13386634 3117925035]: r-xp /usr/lib/x86_64-linux-gnu/libgcc_s.so.1
      all_in_one 1307309 [-01] 1521131.638037: PERF_RECORD_MMAP2 1307309/1307309: [0x7f5df5cd0000(0x1000) @ 0x1000 103:02 13384744 3170015033]: r-xp /usr/lib/x86_64-linux-gnu/libpthread.so.0
      all_in_one 1307309 [-01] 1521131.638075: PERF_RECORD_MMAP2 1307309/1307309: [0x7f5df5ccb000(0x1000) @ 0x1000 103:02 13384720 2853223792]: r-xp /usr/lib/x86_64-linux-gnu/libdl.so.2
      all_in_one 1307309 [-01] 1521131.638101: PERF_RECORD_MMAP2 1307309/1307309: [0x7f5df5a28000(0x195000) @ 0x28000 103:02 13384710 3585233712]: r-xp /usr/lib/x86_64-linux-gnu/libc.so.6
```


## Generate CSV
```
perf script -i ~/hwgc_soft/perf.data --no-demangle -F +brstackinsn  | ./obj/wkit/bin/xed -F insn: -A -64 > dump.txt
```
This disassembles the output from `perf script`
```
        _ZN9hwgc_soft4mark12trace_object17hd1e9edf31db3578aE:
        000055daaf7ef260                        movq  (%rdi), %rax
        000055daaf7ef263                        cmp %sil, %al
        000055daaf7ef266                        jnz 0x55daaf7ef26f                      # PRED 49 cycles [57] 0.06 IPC
        000055daaf7ef26f                        movzx %sil, %ecx
        000055daaf7ef273                        mov %rax, %rdx
        000055daaf7ef276                        and $0xffffffffffffff00, %rdx
        000055daaf7ef27d                        or %rcx, %rdx
        000055daaf7ef280                        movq  %rdx, (%rdi)
        000055daaf7ef283                        cmp %sil, %al
        000055daaf7ef286                        setnz %al
        000055daaf7ef289                        retq                            # PRED 4 cycles [61] 2.00 IPC
```

Then `grep` for the branch instruction of interest (which can be `PRED` or `MISPRED`),

```
grep "55b436519266" dump.txt|grep PRED > 134266.txt
```

```
        000055daaf7ef289                        retq                            # PRED 1 cycles [174] 8.00 IPC
        000055daaf7ef289                        retq                            # PRED 4 cycles [116] 2.00 IPC
        000055daaf7ef289                        retq                            # PRED 4 cycles [163] 2.00 IPC
        000055daaf7ef289                        retq                            # PRED 26 cycles [60] 0.31 IPC
        000055daaf7ef289                        retq                            # PRED 1 cycles [7] 8.00 IPC
        000055daaf7ef289                        retq                            # PRED 11 cycles [42] 0.73 IPC
        000055daaf7ef289                        retq                            # PRED 4 cycles [19] 2.00 IPC
        000055daaf7ef289                        retq                            # PRED 4 cycles [53] 2.00 IPC
        000055daaf7ef289                        retq                            # PRED 1 cycles [64] 8.00 IPC
        000055daaf7ef289                        retq                            # PRED 4 cycles [13] 2.00 IPC
        000055daaf7ef289                        retq                            # PRED 1 cycles [28] 8.00 IPC
```

Finally, turn it into a csv.
```
cat 134266.txt|tr -s " " | cut -d " " -f 3,4>134266.csv
```

```
PRED 11
PRED 4
PRED 1
PRED 1
PRED 4
PRED 1
PRED 1
```

In [None]:
def draw_insn(path):
    fig, ax = plt.subplots()
    df = pl.read_csv(source = path, separator=" ", has_header = False, new_columns=["pred", "cycles"])
    freq = df.with_columns(pl.col("pred") == "PRED").group_by("cycles", "pred").agg(
        pl.count().alias("count")
    ).with_columns(
    (pl.col("count")/ pl.col("count").sum()).alias("freq")
    )
    sns.lineplot(data=freq, x="cycles", y="freq", hue="pred", ax = ax)
    ax.set_xscale("log", base = 2)
    ax.xaxis.set_major_formatter(ScalarFormatter())

```
0000000000134260 <hwgc_soft::mark::trace_object>:
  134260:	48 8b 07             	mov    (%rdi),%rax
  134263:	40 38 f0             	cmp    %sil,%al
  134266:	75 07                	jne    13426f <hwgc_soft::mark::trace_object+0xf>
  134268:	40 38 f0             	cmp    %sil,%al
  13426b:	0f 95 c0             	setne  %al
  13426e:	c3                   	ret
  13426f:	40 0f b6 ce          	movzbl %sil,%ecx
  134273:	48 89 c2             	mov    %rax,%rdx
  134276:	48 81 e2 00 ff ff ff 	and    $0xffffffffffffff00,%rdx
  13427d:	48 09 ca             	or     %rcx,%rdx
  134280:	48 89 17             	mov    %rdx,(%rdi)
  134283:	40 38 f0             	cmp    %sil,%al
  134286:	0f 95 c0             	setne  %al
  134289:	c3                   	ret
  13428a:	66 0f 1f 44 00 00    	nopw   0x0(%rax,%rax,1)
```

In [None]:
draw_insn("/Users/caizixian/Downloads/lbr/134266.csv")

In [None]:
draw_insn("/Users/caizixian/Downloads/lbr/13426e.csv")

In [None]:
draw_insn("/Users/caizixian/Downloads/lbr/134289.csv")