In [1]:
import pandas as pd
import re
import json

In [19]:
import re
from collections import OrderedDict

def parse_log_content_fixed(file_content):
    datasets = OrderedDict()
    current_dataset = None
    current_sub_dataset = None
    temp_functions = []  # Temporary storage for functions before assigning to a sub-dataset
    temp_sub_dataset_info = None  # Temporary storage for sub-dataset information

    for line in file_content.split("\n"):
        line = line.strip()
        
        # Detect Main Dataset
        main_dataset_match = re.match(r'========== Evaluating (.+) ============', line)
        if main_dataset_match:
            current_dataset = main_dataset_match.group(1)
            datasets[current_dataset] = OrderedDict()
            current_sub_dataset = None
            temp_functions = []  # Reset temporary function storage
            temp_sub_dataset_info = None  # Reset temporary sub-dataset info
            continue
        
        # Detect function execution and store in temp list
        func_match = re.match(r'begin timestamp : [\d.]+; tag : ([^;]+); duration : ([\d.]+); package_0 : ([\d.]+); core_0 : ([\d.]+); uncore_0 : ([\d.]+)', line)
        if func_match:
            function_name = func_match.group(1)
            duration = float(func_match.group(2))
            package_0 = float(func_match.group(3))
            core_0 = float(func_match.group(4))
            uncore_0 = float(func_match.group(5))
            
            temp_functions.append({
                'function_name': function_name,
                'duration': duration,
                'package_0': package_0,
                'core_0': core_0,
                'uncore_0': uncore_0
            })
            continue
        
        # Detect Similarity computation results (sub-dataset identifier)
        similarity_match = re.match(r'(\S+) (.+) : ([\d.]+) - ([\d.]+) ms - db_size : (\d+)', line)
        if similarity_match:
            main_dataset, sub_dataset, accuracy, sim_time, db_size = similarity_match.groups()
            current_sub_dataset = sub_dataset.strip(" - ")  # Ensure correct sub dataset name format
            
            # Store sub-dataset information temporarily
            temp_sub_dataset_info = OrderedDict({
                'accuracy': float(accuracy),
                'similarity_computation_time_ms': float(sim_time),
                'db_size': int(db_size),
                'functions': temp_functions  # Assign stored function list
            })
            temp_functions = []  # Reset temporary function storage after assignment
            
            # Assign to dataset only after function measurements
            datasets[current_dataset][current_sub_dataset] = temp_sub_dataset_info
            temp_sub_dataset_info = None
            continue
    
    return datasets

def print_parsed_data(datasets):
    for dataset, sub_datasets in datasets.items():
        print(f'========== Evaluating {dataset} ============')
        for sub_dataset, details in sub_datasets.items():
            for func in details['functions']:
                print(f'begin timestamp : XXXX; tag : {func["function_name"]}; duration : {func["duration"]}; package_0 : {func["package_0"]}; core_0 : {func["core_0"]}; uncore_0 : {func["uncore_0"]}')
            print(f'{dataset} {sub_dataset} : {details["accuracy"]} - {details["similarity_computation_time_ms"]} ms - db_size : {details["db_size"]}')


In [20]:
file = "cpu_power_1.txt"

with open(file, "r") as file:
    file_content = file.read()

In [21]:
data = parse_log_content_fixed(file_content)
# print_parsed_data(data)
data

OrderedDict([('GardensPointWalking',
              OrderedDict([('day_left - night_right',
                            OrderedDict([('accuracy', 0.3483199833406223),
                                         ('similarity_computation_time_ms',
                                          2.49),
                                         ('db_size', 200),
                                         ('functions',
                                          [{'function_name': 'multiple_run_dummy',
                                            'duration': 0.024197816848754883,
                                            'package_0': 527709.0,
                                            'core_0': 419921.0,
                                            'uncore_0': 21301.0},
                                           {'function_name': 'multiple_run',
                                            'duration': 2.4869487285614014,
                                            'package_0': 62893149.0,
               

In [12]:
# data

In [28]:
power_data = {}

for dataset in data:
    power_data[dataset] = {}
    for sub_dataset in data[dataset]:
        print(sub_dataset)
        power_data[dataset][sub_dataset] = {}
        dummy_power = 0
        workload_power = 0
        dynamic_power = 0
        for function in data[dataset][sub_dataset]['functions']:
            # print(function["function_name"])
            if function["function_name"] == "multiple_run_dummy":
                dummy_power = function["package_0"] / function["duration"]
                power_data[dataset][sub_dataset]["Static Power"] = dummy_power
            else:
                workload_power = function["package_0"] / function["duration"]
                power_data[dataset][sub_dataset]["Total Power"] = workload_power
      
        power_data[dataset][sub_dataset]["Dynamic Power"] = abs(power_data[dataset][sub_dataset]["Total Power"] - \
                power_data[dataset][sub_dataset]["Static Power"])
        
        power_data[dataset][sub_dataset]["Total Power"] /= 1000000
        power_data[dataset][sub_dataset]["Static Power"] /= 1000000
        power_data[dataset][sub_dataset]["Dynamic Power"] /= 1000000
        




day_left - night_right
day_right - night_right
day_right - day_left
20110421 - 20100901
20110421 - 20100915
20110421 - 20101221
20110421 - 20110202
dry - dusk
dry - jan
dry - wet
spring - winter
spring - summer
summer - winter
summer - fall
100909_0845 - 180809_1545
100909_1000 - 190809_1410
100909_1210 - 210809_1210


In [29]:
power_data

{'GardensPointWalking': {'day_left - night_right': {'Static Power': 21.80812439809642,
   'Total Power': 25.289282516242757,
   'Dynamic Power': 3.4811581181463374},
  'day_right - night_right': {'Static Power': 31.438578281926787,
   'Total Power': 22.47981866488981,
   'Dynamic Power': 8.95875961703698},
  'day_right - day_left': {'Static Power': 22.13403904676867,
   'Total Power': 21.00180604470379,
   'Dynamic Power': 1.1322330020648763}},
 'CMU': {'20110421 - 20100901': {'Static Power': 20.980164741899156,
   'Total Power': 17.4760027866193,
   'Dynamic Power': 3.5041619552798533},
  '20110421 - 20100915': {'Static Power': 19.756927427204197,
   'Total Power': 12.253474841561838,
   'Dynamic Power': 7.503452585642358},
  '20110421 - 20101221': {'Static Power': 20.240913536091085,
   'Total Power': 15.157587147656507,
   'Dynamic Power': 5.083326388434577},
  '20110421 - 20110202': {'Static Power': 19.973960458679255,
   'Total Power': 17.660965214790743,
   'Dynamic Power': 2.312