# Correlation analysis
This Jupyter notebook can be used to analyze the measured data and calculate the correlation coefficients.<br>
**Input:** Pruning and inference data as CSV. The inference data can be generated by using trace_analysis.jpynb.<br>
**Output:** Complete dataset that contains pruning and inference information + correlation table.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

## Read input data

In [None]:
path_unstructured_pruning_training_data = "../../../src/mnist/train/models/unstructured_pruning/pruning.csv"
path_unstructured_pruning_speedup_training_data = "../../../src/mnist/train/models/unstructured_pruning/pruning.csv"
path_structured_pruning_training_data = "../../../src/mnist/train/models/structured_pruning/pruning.csv"
path_unstructured_pruning_inference_data = "../../../src/mnist/train/models/unstructured_pruning/inference.csv"
path_unstructured_pruning_speedup_inference_data = "../../../src/mnist/train/models/unstructured_pruning/inference_speedup.csv"
path_structured_pruning_inference_data = "../../../src/mnist/train/models/structured_pruning/inference.csv"

In [None]:
df_unstructured_pruning_training = pd.read_csv(path_unstructured_pruning_training_data, sep=';')
df_unstructured_pruning_speedup_training = pd.read_csv(path_unstructured_pruning_speedup_training_data, sep=';')
df_structured_pruning_training = pd.read_csv(path_structured_pruning_training_data, sep=';')
df_unstructured_pruning_inference = pd.read_csv(path_unstructured_pruning_inference_data, sep=';')
df_unstructured_pruning_speedup_inference = pd.read_csv(path_unstructured_pruning_speedup_inference_data, sep=';')
df_structured_pruning_inference = pd.read_csv(path_structured_pruning_inference_data, sep=';')

## Merge training and inference data

In [None]:
df_unstructured_pruning = pd.concat([df_unstructured_pruning_training, df_unstructured_pruning_inference[["energyJoule", "duration", "energyJouleStd", "durationStd"]]], axis=1)
df_unstructured_pruning_speedup = pd.concat([df_unstructured_pruning_speedup_training, df_unstructured_pruning_speedup_inference[["energyJoule", "duration", "energyJouleStd", "durationStd"]]], axis=1)
df_structured_pruning = pd.concat([df_structured_pruning_training, df_structured_pruning_inference[["energyJoule", "duration", "energyJouleStd", "durationStd"]]], axis=1)

In [None]:
def save_as_tex(df, path):
    df_latex = df[["CRPercent", "loss", "accuracy", "size", "parameters", "energyJoule", "duration"]].copy()
    df_latex.loc[:,'CRPercent'] *= 100
    df_latex.loc[:,'accuracy'] *= 100
    df_latex.loc[:,'energyJoule'] *= 1000
    df_latex.to_latex(buf=path, index=None, header=["CR [%]", "Loss", "Accuracy [%]", "Size [Bytes]", "Parameters", "Energy [mJ]", "Duration [s]"], float_format=lambda x: '%10.2f' % x)


def save_corr_as_tex(df_latex, path):
    df_latex.to_latex(buf=path, index=None, header=["CR [%]", "Energy [mJ]", "Duration [s]", "Loss", "Accuracy [%]"], float_format=lambda x: '%10.3f' % x)

In [None]:
df_unstructured_pruning.head(10)

In [None]:
df_unstructured_pruning.to_csv(path_or_buf="../../../src/mnist/train/models/unstructured_pruning/pruning_and_inference.csv", sep=';', index=None)
save_as_tex(df_unstructured_pruning, "../../../src/mnist/train/models/unstructured_pruning/pruning_and_inference.tex")

In [None]:
df_structured_pruning.head(10)

In [None]:
df_structured_pruning.to_csv(path_or_buf="../../../src/mnist/train/models/structured_pruning/pruning_and_inference.csv", sep=';', index=None)
save_as_tex(df_structured_pruning, "../../../src/mnist/train/models/structured_pruning/pruning_and_inference.tex")

In [None]:
df_unstructured_pruning_speedup.head(10)

In [None]:
df_unstructured_pruning_speedup.to_csv(path_or_buf="../../../src/mnist/train/models/unstructured_pruning/pruning_and_inference_speedup.csv", sep=';', index=None)
save_as_tex(df_unstructured_pruning_speedup, "../../../src/mnist/train/models/unstructured_pruning/pruning_and_inference_speedup.tex")

# Visualization

In [None]:
plt.plot(df_unstructured_pruning.CRPercent, df_unstructured_pruning.accuracy)
plt.plot(df_structured_pruning.CRPercent, df_structured_pruning.accuracy)
plt.legend(['Unstructured','Structured'], title = "Accuracy")
plt.xlabel("CR")
plt.ylabel("[%]")
plt.show()
plt.plot(df_unstructured_pruning.CRPercent, df_unstructured_pruning.energyJoule)
plt.plot(df_structured_pruning.CRPercent, df_structured_pruning.energyJoule)
plt.plot(df_unstructured_pruning_speedup.CRPercent, df_unstructured_pruning_speedup.energyJoule)
plt.legend(['Unstructured','Structured', 'Unstructured (speedup)'], title = "Energy consumption")
plt.xlabel("CR")
plt.ylabel("[J]")
plt.show()
plt.plot(df_unstructured_pruning.CRPercent, df_unstructured_pruning.duration)
plt.plot(df_structured_pruning.CRPercent, df_structured_pruning.duration)
plt.plot(df_unstructured_pruning_speedup.CRPercent, df_unstructured_pruning_speedup.duration)
plt.legend(['Unstructured','Structured', 'Unstructured (speedup)'], title = "Duration")
plt.xlabel("CR")
plt.ylabel("[s]")
plt.show()

## Correlations

### Unstructured pruning

In [None]:
df_unstructured_with_baseline_corr = df_unstructured_pruning[["CRPercent", "energyJoule", "duration", "loss", "accuracy"]].corr(method="pearson")
df_unstructured_with_baseline_corr.to_csv(path_or_buf="../../../src/mnist/train/models/unstructured_pruning/inference_corr.csv", sep=';')
save_corr_as_tex(df_unstructured_with_baseline_corr, "../../../src/mnist/train/models/unstructured_pruning/inference_corr.tex")
df_unstructured_with_baseline_corr.head()

### Structured pruning

In [None]:
df_structured_with_baseline_corr = df_structured_pruning[["CRPercent", "energyJoule", "duration", "loss", "accuracy"]].corr(method="pearson")
df_structured_with_baseline_corr.to_csv(path_or_buf="../../../src/mnist/train/models/structured_pruning/inference_corr.csv", sep=';')
save_corr_as_tex(df_structured_with_baseline_corr, "../../../src/mnist/train/models/structured_pruning/inference_corr.tex")
df_structured_with_baseline_corr.head()

### Unstructured pruning (speedup)

In [None]:
df_unstructured_with_baseline_speedup_corr = df_unstructured_pruning_speedup[["CRPercent", "energyJoule", "duration", "loss", "accuracy"]].corr(method="pearson")
df_unstructured_with_baseline_speedup_corr.to_csv(path_or_buf="../../../src/mnist/train/models/unstructured_pruning/inference_speedup_corr.csv", sep=';')
save_corr_as_tex(df_unstructured_with_baseline_speedup_corr, "../../../src/mnist/train/models/unstructured_pruning/inference_speedup_corr.tex")
df_unstructured_with_baseline_speedup_corr.head()