In [1]:
import pandas
import numpy as np
from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler, PowerTransformer, Normalizer
import os
import glob
import re

## Checking up the idle current readings for reference

In [2]:
# Find the idle power consumption with and without Fan and no perf running
# i.e. # Idleworkload-MaxFan and 

def __get_stats__(path:str,field_prefix:str) -> pandas.DataFrame:
    freqs=[]
    ampere_mA_mean = []
    watt_mW_mean = []
    idle_datafname  = re.compile(r'.*CPUFreq\-([.\d]+)GHz')
    _idle_results_maxfan_bcore = glob.glob(path)
    ctr = 0
    for freqstepdata in _idle_results_maxfan_bcore:
        fname_match = idle_datafname.match(freqstepdata)
        if (fname_match):
            # print (freqstepdata + ': '+fname_match.group(1))
            freqidlepd = pandas.read_csv(freqstepdata+'/Idling.powdata.csv')
            freqidlepd.drop(columns=['utctime.1', 'ts_utc'], inplace=True)
            freqidlepd['dev_ippwr-ch1-volts_mV'] = freqidlepd['dev_ippwr-ch1-volts_mV'].apply(lambda x: float(x[2:-1]) if isinstance(x, str) and x.startswith("b'") else x)
            freqidlepd['dev_ippwr-ch1-ampere_mA'] = freqidlepd['dev_ippwr-ch1-ampere_mA'].apply(lambda x: float(x[2:-1]) if isinstance(x, str) and x.startswith("b'") else x)
            freqidlepd['dev_ippwr-ch1-watt_mW'] = freqidlepd['dev_ippwr-ch1-watt_mW'].apply(lambda x: float(x[2:-1]) if isinstance(x, str) and x.startswith("b'") else x)

            df2_mean = freqidlepd[["dev_ippwr-ch1-ampere_mA","dev_ippwr-ch1-watt_mW","therm_cpu0","therm_cpu1", "therm_cpu2", "therm_cpu4"]].mean()
            df2_median = freqidlepd[["dev_ippwr-ch1-ampere_mA","dev_ippwr-ch1-watt_mW","therm_cpu0","therm_cpu1", "therm_cpu2", "therm_cpu4"]].median()
            # display(freqidlepd)
            freqs.append(float(fname_match.group(1)))
            ampere_mA_mean.append(df2_mean["dev_ippwr-ch1-ampere_mA"])
            watt_mW_mean.append(df2_mean["dev_ippwr-ch1-watt_mW"])

            # print (str(freqs[ctr])+': '+str(ampere_mA_mean[ctr])+', '+str(ampere_mA_median[ctr])+', '+str(watt_mW_mean[ctr])+', '+str(watt_mW_median[ctr]))
            
        else:
            # print ('No match for: '+ freqstepdata)
            pass
        ctr += 1
    df = pandas.DataFrame(list(zip(freqs, ampere_mA_mean, watt_mW_mean)),
                        columns =['Frequency', field_prefix+'ampere_mA_mean', field_prefix+'watt_mW_mean'])
    return df    

def calculate_power_impacts():
    ###########################################################################
    # Compile the Big Core idling current data
    df_maxfan_bigcore = __get_stats__('combined_dataset-2/01-Simple-Idling/MaxFan/*-BigCore-*','MaxFan_')
    df_maxfan_bigcore.sort_values('Frequency', inplace=True)
    # df_maxfan.index= df_maxfan['Frequency']

    df_nofan_bigcore = __get_stats__('combined_dataset-2/01-Simple-Idling/NoFan/*-BigCore-*','NoFan_')
    df_nofan_bigcore.sort_values('Frequency', inplace=True)
    # df_nofan.index= df_nofan['Frequency']

    newdf_bigcore= pandas.merge(left=df_maxfan_bigcore, right=df_nofan_bigcore, on='Frequency' )
    newdf_bigcore.sort_values('Frequency', inplace=True)

    ###########################################################################
    # Compile the Lttle Core idling current data
    df_maxfan_littlecore = __get_stats__('combined_dataset/01-Simple-Idling/MaxFan/*-LittleCore-*','MaxFan_')
    df_maxfan_littlecore.sort_values('Frequency', inplace=True)
    # df_maxfan.index= df_maxfan['Frequency']

    df_nofan_littlecore = __get_stats__('combined_dataset/01-Simple-Idling/NoFan/*-LittleCore-*','NoFan_')
    df_nofan_littlecore.sort_values('Frequency', inplace=True)
    # df_nofan.index= df_nofan['Frequency']

    newdf_littlecore = pandas.merge(left=df_maxfan_littlecore, right=df_nofan_littlecore, on='Frequency' )
    newdf_littlecore.sort_values('Frequency', inplace=True)

    ###########################################################################
    # Calculate the power consumption by Fan
    df_bigcore_fandiff = newdf_bigcore['MaxFan_ampere_mA_mean'] - newdf_bigcore['NoFan_ampere_mA_mean']
    df_litcore_fandiff = newdf_littlecore['MaxFan_ampere_mA_mean'] - newdf_littlecore['NoFan_ampere_mA_mean']
    fan_delta = (df_bigcore_fandiff.describe()['mean'] + df_litcore_fandiff.describe()['mean'])/2
    print ('Current consumption impact on fan = '+ str(fan_delta) +' mA')

    df_bigcore_fandiff = newdf_bigcore['MaxFan_watt_mW_mean'] - newdf_bigcore['NoFan_watt_mW_mean']
    df_litcore_fandiff = newdf_littlecore['MaxFan_watt_mW_mean'] - newdf_littlecore['NoFan_watt_mW_mean']
    fan_delta = (df_bigcore_fandiff.describe()['mean'] + df_litcore_fandiff.describe()['mean'])/2
    print ('Power consumption impact on fan = '+ str(fan_delta) +' mW')
    ###########################################################################
    # Calculate the Power difference between each frequency ranges in idling
    # df_bigcore_freqdiff = newdf_bigcore.diff()
    display(newdf_bigcore[['Frequency','NoFan_watt_mW_mean']])


## Processing workloads data

In [3]:
def process_workload_data(filename:str):
    pd_bzip2_enwik8 = pandas.read_csv(filename)

    pd_bzip2_enwik8['dev_ippwr-ch1-volts_mV'] = pd_bzip2_enwik8['dev_ippwr-ch1-volts_mV'].apply(lambda x: float(x[2:-1]) if isinstance(x, str) and x.startswith("b'") else x)
    pd_bzip2_enwik8['dev_ippwr-ch1-ampere_mA'] = pd_bzip2_enwik8['dev_ippwr-ch1-ampere_mA'].apply(lambda x: float(x[2:-1]) if isinstance(x, str) and x.startswith("b'") else x)
    pd_bzip2_enwik8['dev_ippwr-ch1-watt_mW'] = pd_bzip2_enwik8['dev_ippwr-ch1-watt_mW'].apply(lambda x: float(x[2:-1]) if isinstance(x, str) and x.startswith("b'") else x)

    ### JUGAAD WARNING :) ### 
    # Jugaad method of deducing earlier power estimations of idle power at each frequency and fan consumption
    # pd_bzip2_enwik8['dev_ippwr-ch1-watt_mW'] = pd_bzip2_enwik8['dev_ippwr-ch1-watt_mW'] - 2761.181154 - 409.6397807531478

    pd_bzip2_enwik8.drop(pd_bzip2_enwik8.columns.difference([
        'utctime',
        'dev_ippwr-ch1-volts_mV','dev_ippwr-ch1-ampere_mA','dev_ippwr-ch1-watt_mW',
        'S0-D0-C0_cpu-cycles','S0-D0-C0_instructions','S0-D0-C0_cache-misses', 'S0-D0-C0_cache-references', 'S0-D0-C0_branch-instructions', 'S0-D0-C0_branch-misses', 'S0-D0-C0_branch-load-misses', 'S0-D0-C0_branch-loads',
        'S0-D0-C1_cpu-cycles','S0-D0-C1_instructions','S0-D0-C1_cache-misses', 'S0-D0-C1_cache-references', 'S0-D0-C1_branch-instructions', 'S0-D0-C1_branch-misses', 'S0-D0-C1_branch-load-misses', 'S0-D0-C1_branch-loads',
        'S0-D0-C2_cpu-cycles','S0-D0-C2_instructions','S0-D0-C2_cache-misses', 'S0-D0-C2_cache-references', 'S0-D0-C2_branch-instructions', 'S0-D0-C2_branch-misses', 'S0-D0-C2_branch-load-misses', 'S0-D0-C2_branch-loads',
        'S0-D0-C3_cpu-cycles','S0-D0-C3_instructions','S0-D0-C3_cache-misses', 'S0-D0-C3_cache-references', 'S0-D0-C3_branch-instructions', 'S0-D0-C3_branch-misses', 'S0-D0-C3_branch-load-misses', 'S0-D0-C3_branch-loads',
        'S1-D0-C0_cpu-cycles','S1-D0-C0_instructions','S1-D0-C0_cache-misses', 'S1-D0-C0_cache-references', 'S1-D0-C0_branch-instructions', 'S1-D0-C0_branch-misses', 'S1-D0-C0_branch-load-misses', 'S1-D0-C0_branch-loads',
        'S1-D0-C1_cpu-cycles','S1-D0-C1_instructions','S1-D0-C1_cache-misses', 'S1-D0-C1_cache-references', 'S1-D0-C1_branch-instructions', 'S1-D0-C1_branch-misses', 'S1-D0-C1_branch-load-misses', 'S1-D0-C1_branch-loads',
        'S1-D0-C2_cpu-cycles','S1-D0-C2_instructions','S1-D0-C2_cache-misses', 'S1-D0-C2_cache-references', 'S1-D0-C2_branch-instructions', 'S1-D0-C2_branch-misses', 'S1-D0-C2_branch-load-misses', 'S1-D0-C2_branch-loads',
        'S1-D0-C3_cpu-cycles','S1-D0-C3_instructions','S1-D0-C3_cache-misses', 'S1-D0-C3_cache-references', 'S1-D0-C3_branch-instructions', 'S1-D0-C3_branch-misses', 'S1-D0-C3_branch-load-misses', 'S1-D0-C3_branch-loads',
        ]), axis=1, inplace=True)

    ## Per core CPI/IPC & Misc. calculation for little cores
    # S0-D0-C0 - Little Core #1
    pd_bzip2_enwik8['S0-D0-C0_CPI'] = pd_bzip2_enwik8['S0-D0-C0_cpu-cycles']/pd_bzip2_enwik8['S0-D0-C0_instructions']  
    pd_bzip2_enwik8['S0-D0-C0_IPC'] = 1/pd_bzip2_enwik8['S0-D0-C0_CPI']
    pd_bzip2_enwik8['S0-D0-C0_CacheMissRatio'] = pd_bzip2_enwik8['S0-D0-C0_cache-misses']  / pd_bzip2_enwik8['S0-D0-C0_cache-references']

    # S0-D0-C1 - Little Core #2
    pd_bzip2_enwik8['S0-D0-C1_CPI'] = pd_bzip2_enwik8['S0-D0-C1_cpu-cycles']/pd_bzip2_enwik8['S0-D0-C1_instructions']
    pd_bzip2_enwik8['S0-D0-C1_IPC'] = 1/pd_bzip2_enwik8['S0-D0-C1_CPI']
    pd_bzip2_enwik8['S0-D0-C1_CacheMissRatio'] = pd_bzip2_enwik8['S0-D0-C1_cache-misses']  / pd_bzip2_enwik8['S0-D0-C1_cache-references']

    # S0-D0-C2 - Little Core #3
    pd_bzip2_enwik8['S0-D0-C2_CPI'] = pd_bzip2_enwik8['S0-D0-C2_cpu-cycles']/pd_bzip2_enwik8['S0-D0-C2_instructions']
    pd_bzip2_enwik8['S0-D0-C2_IPC'] = 1/pd_bzip2_enwik8['S0-D0-C2_CPI']
    pd_bzip2_enwik8['S0-D0-C2_CacheMissRatio'] = pd_bzip2_enwik8['S0-D0-C2_cache-misses']  / pd_bzip2_enwik8['S0-D0-C2_cache-references']

    # S0-D0-C3 - Little Core #4
    pd_bzip2_enwik8['S0-D0-C3_CPI'] = pd_bzip2_enwik8['S0-D0-C3_cpu-cycles']/pd_bzip2_enwik8['S0-D0-C3_instructions']
    pd_bzip2_enwik8['S0-D0-C3_IPC'] = 1/pd_bzip2_enwik8['S0-D0-C3_CPI']
    pd_bzip2_enwik8['S0-D0-C3_CacheMissRatio'] = pd_bzip2_enwik8['S0-D0-C3_cache-misses'] / pd_bzip2_enwik8['S0-D0-C3_cache-references']

    ## Per core CPI/IPC & Misc. calculation for Big cores
    # S1-D0-C0 - Big Core #1
    pd_bzip2_enwik8['S1-D0-C0_CPI'] = pd_bzip2_enwik8['S1-D0-C0_cpu-cycles']/pd_bzip2_enwik8['S1-D0-C0_instructions']
    pd_bzip2_enwik8['S1-D0-C0_IPC'] = 1/pd_bzip2_enwik8['S1-D0-C0_CPI']
    pd_bzip2_enwik8['S1-D0-C0_CacheMissRatio'] = pd_bzip2_enwik8['S1-D0-C0_cache-misses']  / pd_bzip2_enwik8['S1-D0-C0_cache-references']

    # S1-D0-C1 - Big Core #2
    pd_bzip2_enwik8['S1-D0-C1_CPI'] = pd_bzip2_enwik8['S1-D0-C1_cpu-cycles']/pd_bzip2_enwik8['S1-D0-C1_instructions']
    pd_bzip2_enwik8['S1-D0-C1_IPC'] = 1/pd_bzip2_enwik8['S1-D0-C1_CPI']
    pd_bzip2_enwik8['S1-D0-C1_CacheMissRatio'] = pd_bzip2_enwik8['S1-D0-C1_cache-misses']  / pd_bzip2_enwik8['S1-D0-C1_cache-references']

    # S1-D0-C2 - Big Core #3
    pd_bzip2_enwik8['S1-D0-C2_CPI'] = pd_bzip2_enwik8['S1-D0-C2_cpu-cycles']/pd_bzip2_enwik8['S1-D0-C2_instructions']
    pd_bzip2_enwik8['S1-D0-C2_IPC'] = 1/pd_bzip2_enwik8['S1-D0-C2_CPI']
    pd_bzip2_enwik8['S1-D0-C2_CacheMissRatio'] = pd_bzip2_enwik8['S1-D0-C2_cache-misses'] / pd_bzip2_enwik8['S1-D0-C2_cache-references']

    # S1-D0-C3 - Big Core #4
    pd_bzip2_enwik8['S1-D0-C3_CPI'] = pd_bzip2_enwik8['S1-D0-C3_cpu-cycles']/pd_bzip2_enwik8['S1-D0-C3_instructions']
    pd_bzip2_enwik8['S1-D0-C3_IPC'] = 1/pd_bzip2_enwik8['S1-D0-C3_CPI']
    pd_bzip2_enwik8['S1-D0-C3_CacheMissRatio'] = pd_bzip2_enwik8['S1-D0-C3_cache-misses'] / pd_bzip2_enwik8['S1-D0-C3_cache-references']

    # Drop all the NA data points
    pd_bzip2_enwik8 = pd_bzip2_enwik8.dropna()

    ## Obtain the aggregate numbers for cluster and chip for CPI
    collist_cpi = ['S0-D0-C0_CPI', 'S0-D0-C1_CPI', 'S0-D0-C2_CPI', 'S0-D0-C3_CPI']
    collist_ipc = ['S1-D0-C0_CPI', 'S1-D0-C1_CPI', 'S1-D0-C2_CPI', 'S1-D0-C3_CPI']
    pd_bzip2_enwik8['Aggregate_CPI-Little'] = pd_bzip2_enwik8[collist_cpi].sum(axis=1)
    pd_bzip2_enwik8['Aggregate_CPI-Big'] = pd_bzip2_enwik8[collist_ipc].sum(axis=1)
    pd_bzip2_enwik8['Aggregate_CPI'] = pd_bzip2_enwik8[['Aggregate_CPI-Little', 'Aggregate_CPI-Big']].sum(axis=1)

    ## Obtain the aggregate numbers for cluster and chip for IPC
    collist = ['S0-D0-C0_IPC', 'S0-D0-C1_IPC', 'S0-D0-C2_IPC', 'S0-D0-C3_IPC']
    collist = ['S1-D0-C0_IPC', 'S1-D0-C1_IPC', 'S1-D0-C2_IPC', 'S1-D0-C3_IPC']
    pd_bzip2_enwik8['Aggregate_IPC-Little'] = pd_bzip2_enwik8[collist].sum(axis=1)
    pd_bzip2_enwik8['Aggregate_IPC-Big'] = pd_bzip2_enwik8[collist].sum(axis=1)
    pd_bzip2_enwik8['Aggregate_IPC'] = pd_bzip2_enwik8[['Aggregate_IPC-Little','Aggregate_IPC-Big']].sum(axis=1)

    ## Obtain the aggregate numbers for cluster and chip for Cache miss ratio
    collist_cmrat_little = ['S1-D0-C0_CacheMissRatio','S1-D0-C1_CacheMissRatio', 'S1-D0-C2_CacheMissRatio','S1-D0-C3_CacheMissRatio']
    collist_cmrat_big = ['S0-D0-C0_CacheMissRatio','S0-D0-C1_CacheMissRatio', 'S0-D0-C2_CacheMissRatio','S0-D0-C3_CacheMissRatio']
    pd_bzip2_enwik8['Aggregate_CacheMissRatio-Big'] = pd_bzip2_enwik8[collist_cmrat_big].sum(axis=1)
    pd_bzip2_enwik8['Aggregate_CacheMissRatio-Little'] = pd_bzip2_enwik8[collist_cmrat_little].sum(axis=1)
    pd_bzip2_enwik8['Aggregate_CacheMissRatio'] = pd_bzip2_enwik8[['Aggregate_CacheMissRatio-Little','Aggregate_CacheMissRatio-Big']].sum(axis=1)

    # Compose final data frame with Aggregate IPC
    cols = ['dev_ippwr-ch1-watt_mW','Aggregate_IPC']
    __pd_bzip2_enwik8 = pd_bzip2_enwik8[cols]
    __pd_bzip2_enwik8= __pd_bzip2_enwik8.sort_values('dev_ippwr-ch1-watt_mW')

    # Normalize the final data frame to appropriately scale the values
    scaler = Normalizer()
    df_scaled = scaler.fit_transform(__pd_bzip2_enwik8.to_numpy())
    df_scaled = pandas.DataFrame(df_scaled, columns=cols)

    correlation = df_scaled.corrwith(df_scaled['dev_ippwr-ch1-watt_mW']).sort_values(ascending=False)
    # display (correlation["Aggregate_IPC"])
    return correlation["Aggregate_IPC"]


In [4]:

freq_regex  = re.compile(r'.*CPUFreq\-([.\d]+)GHz')
frequency=[]
testset=[]
correlation=[]
dirs_collection = glob.glob('combined_dataset-2/03-Workloads/*')
dirs_collection_len = len (dirs_collection)
ctr = 0
for dirs in dirs_collection:
    freqval_match = freq_regex.match(dirs)
    if (freqval_match):
        print ('Processing ('+str(ctr+1)+'/'+str(dirs_collection_len)+'): '+freqval_match.group(1)+'GHz data... ')
        dataset_collection = glob.glob(dirs+'/*.csv')
        for data in dataset_collection:
            frequency.append(freqval_match.group(1))
            testset.append(os.path.basename(data))
            corr = process_workload_data(data)
            correlation.append(corr)
            
    else:
        print ('Error in frequency identification for '+dirs)
    ctr += 1

df = pandas.DataFrame(list(zip(frequency, testset, correlation)),
                    columns =['Frequency(GHz)','Data', 'correlation'])
pandas.set_option('display.max_rows', df.shape[0]+1)
print ("Correlation Result summary:")
display(df.describe())

print ("Correlation Results:")
display(df)


Processing (1/32): 0.4GHz data... 
Processing (2/32): 1.5GHz data... 
Processing (3/32): 1.2GHz data... 
Processing (4/32): 0.5GHz data... 
Processing (5/32): 0.3GHz data... 
Processing (6/32): 1.4GHz data... 
Processing (7/32): 0.8GHz data... 
Processing (8/32): 0.4GHz data... 
Processing (9/32): 0.7GHz data... 
Processing (10/32): 2.0GHz data... 
Processing (11/32): 1.4GHz data... 
Processing (12/32): 1.1GHz data... 
Processing (13/32): 1.7GHz data... 
Processing (14/32): 0.7GHz data... 
Processing (15/32): 1.1GHz data... 
Processing (16/32): 1.9GHz data... 
Processing (17/32): 0.6GHz data... 
Processing (18/32): 1.3GHz data... 
Processing (19/32): 0.3GHz data... 
Processing (20/32): 0.9GHz data... 
Processing (21/32): 0.5GHz data... 
Processing (22/32): 0.8GHz data... 
Processing (23/32): 1.8GHz data... 
Processing (24/32): 1.3GHz data... 
Processing (25/32): 0.9GHz data... 
Processing (26/32): 1.2GHz data... 
Processing (27/32): 0.6GHz data... 
Processing (28/32): 1.0GHz data... 
P

Unnamed: 0,correlation
count,715.0
mean,-0.936316
std,0.043304
min,-0.997982
25%,-0.967652
50%,-0.938532
75%,-0.913957
max,-0.665649


Correlation Results:


Unnamed: 0,Frequency(GHz),Data,correlation
0,0.4,stress-io1-100s-1.prof.csv,-0.933979
1,0.4,bzip2-enwik8-1.prof.csv,-0.942343
2,0.4,stress-cpu1-100s-1.prof.csv,-0.902005
3,0.4,stress-cpu4-100s-1.prof.csv,-0.919271
4,0.4,stress-io3-100s-1.prof.csv,-0.927533
5,0.4,stress-io4-100s-1.prof.csv,-0.91144
6,0.4,gzip-enwik8-1.prof.csv,-0.954223
7,0.4,bzip2-webster-1.prof.csv,-0.956563
8,0.4,stress-io2-100s-1.prof.csv,-0.915228
9,0.4,stress-cpu3-100s-1.prof.csv,-0.864435
