# Analysis from the PCM monitor

Currently, analysing -
- IPC (Instructions per cycle)
- Cache Miss and Cache Hits/Miss Ratio

In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import os
import pandas as pd
import sys
import warnings
from datetime import datetime
warnings.filterwarnings("ignore")

sys.path.append(os.path.abspath(os.path.join("..")))
from parsers.rpc_parser import RPCParser
from utils.utils import *

In [2]:
EXPERIMENT_DIRNAME = "BuzzBlogBenchmark_2022-01-30-22-32-14"

## Log Parsing

Reading CSV and removing the unnecessary "Unnamed" column

In [3]:
raw_csv_mult_nodes = {}
for node_name, logfiles in get_experiment_pcm_logfiles(EXPERIMENT_DIRNAME):
    raw_csv_mult_nodes[node_name] = pd.read_csv(logfiles)
    raw_csv_mult_nodes[node_name].drop(columns=['Unnamed: 594'], inplace=True)
    raw_csv_mult_nodes[node_name]['Node Name'] = [node_name] * raw_csv_mult_nodes[node_name].shape[0]
    print(raw_csv_mult_nodes[node_name].shape)


(37322, 595)
(37319, 595)
(37325, 595)
(37326, 595)
(37331, 595)
(37321, 595)
(37328, 595)
(37323, 595)
(37326, 595)
(37329, 595)
(37325, 595)
(37324, 595)


In [4]:
combined_csv = pd.concat(list(raw_csv_mult_nodes.values()))
print(combined_csv.shape)

In [None]:
raw_csv = pd.read_csv("../data/pcm.csv")
raw_csv.drop(columns=['Unnamed: 594'], inplace=True)

Filtering the automatic garbage introduced by reading CSV

In [None]:
labels = raw_csv.iloc[0]
old_cols_name = [x.split('.')[0] for x in list(labels.keys())]      # Removing the .1, .2, etc. Now only contains classification of data, like, System, Core 0, etc.
labels = list(labels.values)                                       # List with all the attributes of data
new_cols_name = [old_cols_name[i] + " " + labels[i]  for i in range(len(old_cols_name))]
column_change_dict = {raw_csv.columns.values[i]: new_cols_name[i] for i in range(len(new_cols_name))}

raw_csv.rename(columns=column_change_dict, inplace=True)
raw_csv.drop(index=raw_csv.index[0], axis=0, inplace=True)
timestamp = raw_csv['System Date'] + " " + raw_csv['System Time']
raw_csv.insert(0,'Timestamp', timestamp)
raw_csv.drop(labels=['System Date', 'System Time'], axis=1, inplace=True)
print(raw_csv.columns.values)


In [None]:
system_stats = raw_csv.loc[:, :'UncFREQ (Ghz) SKT0']
start_time = datetime.fromisoformat(system_stats['Timestamp'][1])
system_stats['Timestamp'] = system_stats.apply(lambda r: (datetime.fromisoformat(r['Timestamp']) - start_time).total_seconds(), axis=1)
system_stats.set_index('Timestamp', inplace=True)


## Plotting Graphs

### IPC

In [None]:
system_stats['System IPC'] = system_stats.apply(lambda r: float(r['System IPC']), axis=1)
fig = plt.figure(figsize=(24,12))
ax = fig.add_subplot()
df = system_stats['System IPC']
ax.grid(alpha=0.75)
df.plot(ax=ax, kind="line", title="IPC", xlabel="Seconds", ylabel="Instruction per cycles", color="blue", grid=True)
