In [27]:
import os
import re
import copy
import pandas as pd
emon_data='/home/yangkun/emon-2'

def parse_file_name(file_name, instance, flag=None):
    """
    type: eg: simd
    flag: eg: avx
    core: eg: 28
    freq: eg: 3.0Ghz
    parse file's name and return flog, core and so on values
    support
    
    用于demo，不同的指令集，当在不同的 core freq，不同的 Cores，其实际cpu freq, uncore freq, power 的关系
    数据集文件基于如下规则：
    <inst>_<cores>_<freq>.dat.xlsx
    <inst>: general(busy), simd_sse, simd_avx, simd_avx512, amx : Total 5
    <cores>: 4..56..4 : Total 14
    <freq>: 2.4..3.8..0.2 : Total 8
    Total files: 5 * 14 * 8 = 560
    Sample: simd_sse_56_3.6Ghz.dat.xls
    每个emon文件，取 "socket view" 的 Socket 0 数据，取3个值：
    "metric_CPU operating frequency (in GHz)": 标记运行时实际cpu frequency
    "metric_uncore frequency GHz": 标记 uncore frequency
    "metric_package power (watts)": 标记功耗
    """
    file_info = {}
    # print(instance)
    if instance == "busy":
        if re.search("^busy", file_name):
            file_gather = file_name.split('_')
            file_info['type'] = file_gather[0]
            file_info['core'] = file_gather[1]
            file_info['freq'] = re.findall('\d+\.\d+Ghz',file_gather[2])[0]
    elif instance == "simd_sse":
        if re.search("^simd_sse_[0-9]+", file_name) :
            file_gather = file_name.split('_')
            file_info['type'] = file_gather[0:1]
            file_info['core'] = file_gather[2]
            file_info['freq'] = re.findall('\d+\.\d+Ghz',file_gather[3])[0]
            
    # print(file_info)
    return file_info

def get_emon_data(file_path, sheet_name, sample, select_col):
    dict_s = {}
    df = pd.read_excel(file_path, index_col=0, header=0, sheet_name=sheet_name)
    sample_value = df.loc[sample, select_col]
    dict_s[sample] = sample_value
    return sample_value

sample_list=["metric_CPU operating frequency (in GHz)", "metric_uncore frequency GHz", 'metric_package power (watts)']

In [24]:

def save_df(dict_va):
    res = {}
    for d in dict_va:
        for k, v in d.items():
            res[k] = res.get(k, []) + v
    res = list(dict(sorted(res.items())).values())
    return res

def get_dataframe_data(emon_data, sample_list, instance, flag=None):
    # core_dict = {}
    core_info = []
    for file_name in os.listdir(emon_data):
        if re.search(".xlsx$", file_name) and re.search("^{}".format(instance), file_name):
        #if re.search(".xlsx$", file_name):
            # print(file_name)
            # print(emon_data)
            file_info = parse_file_name(file_name, instance, flag)
            samplev_list = []
            if os.path.isfile(os.path.join(emon_data,file_name)):
                file_path = os.path.join(emon_data, file_name) 
                for sample in sample_list: 
                    sample_value = get_emon_data(file_path, "socket view", sample, "socket 0")
                    samplev_list.append(sample_value)
            # core_dict[file_info['core']]=samplev_list
            core_info.append({file_info['core']: samplev_list})

    # print(core_l)
    result=save_df(core_info)
    return result
 


In [3]:
def save(df_data):
    multi_index = pd.MultiIndex.from_tuples([("4", ), ("8",), ("12",), ("16",), ("20",),
                                             ("24",), ("28",),("32",), ("36",), ("40", ),
                                             ("44",), ("48",),("52",), ("56",)], names=['core'])
                                       
             
    cols = pd.MultiIndex.from_tuples([("2.4", "metric_CPU"), ("2.4", "metric_uncore"), ("2.4", "power"), 
                                      ("2.6", "metric_CPU"), ("2.6", "metric_uncore"), ("2.6", "power"),
                                      ("2.8", "metric_CPU"), ("2.8", "metric_uncore"), ("2.8", "power"),
                                      ("3"  , "metric_CPU"), ("3",   "metric_uncore"), ("3",   "power"),          
                                      ("3.2", "metric_CPU"), ("3.2", "metric_uncore"), ("3.2", "power"),
                                      ("3.4", "metric_CPU"), ("3.4", "metric_uncore"), ("3.4", "power"),
                                      ("3.6", "metric_CPU"), ("3.6", "metric_uncore"), ("3.6", "power"),
                                      ("3.8", "metric_CPU"), ("3.8", "metric_uncore"), ("3.8", "power"),])
                                    
    df = pd.DataFrame(df_data, columns=cols, index=multi_index)
    
    return df 
    # with pd.ExcelWriter('1.xlsx') as writer:
    #     df.to_excel(writer, sheet_name='Sheet_name_1')
    #     df.to_excel(writer, sheet_name='Sheet_name_2')

In [43]:
inst = ["simd_sse", "busy"]
flag = ["avx", "avx_512", "amx"]

type_dict = {}
for i in inst:
    if i == "busy":
        print(i)
        df_data = get_dataframe_data(emon_data, sample_list, i)
        type_dict[i] = df_data
        print(type_dict)
    elif i == "simd_sse":
        df_data = get_dataframe_data(emon_data, sample_list, i)
        type_dict[i] = df_data
        print(type_dict)
    # else:
    #     for f in flag:
    #         # print(i, f)
    #         df_data = get_dataframe_data(emon_data, sample_list, i, f)
    #         type_dict["{}_{}".format(i,f)] = df_data
    #         print(type_dict)
        # save(df_data)
with pd.ExcelWriter('2.xlsx') as writer:
    for k, v in type_dict.items():
        print(k,v)
        save(v).to_excel(writer, sheet_name=k)

{'simd_sse': [[2.400162271372627, 2.500560522981281, 206.5110821013007, 2.600404644036958, 2.500528462826018, 212.1059291247158, 2.800448494130071, 2.50071915503357, 214.6533516928203, 3.000453738864781, 2.500547858383393, 217.9698277021694, 3.199609651152929, 2.500680909176584, 226.4088133757872, 3.399007293150188, 2.500522053996437, 229.4909392536392, 3.598261657616435, 2.5005105568754, 235.75580768074, 3.797720536683618, 2.500559070458916, 242.8524639484569], [2.400161544198649, 2.500566543697685, 217.1404970441229, 2.600420376778114, 2.50075892881563, 219.6957500234689, 2.800424567266395, 2.500515272016664, 227.2181784820684, 3.0004451719345, 2.500546187865052, 230.6600763153913, 3.199648980640758, 2.500494491791209, 237.8344934799509, 3.39925140666334, 2.500506673899131, 245.1552419406907, 3.598591444298643, 2.500538577500905, 252.7564419608507, 3.798055933264198, 2.500515555879588, 262.5822080051354], [2.40015757750832, 2.500546832030813, 224.2135383471404, 2.600391879811262, 2.5