In [2]:
import os
import time
import config
from datetime import datetime
import ollama
from codecarbon import EmissionsTracker
from log_utils import read_log_messages, normalize_template, normalize_template_v1, normalize_template_v2, save_templates
from ollama_utils import start_ollama_server, stop_ollama_server, start_ollama_server_log
from evaluation import evaluate_and_save

In [3]:
llm_config = config.LLM_CONFIG
task = config.TASK_PROMPT
sys_prompt_few_shot_single_log_parser = config.SYS_MSG_SINGLE_LOG_PARSER_FEW_SHOT
sys_prompt_three_shot_single_log_parser = config.SYS_MSG_SINGLE_LOG_PARSER_THREE_SHOT
sys_prompt_zero_shot_single_log_parser = config.SYS_MSG_SINGLE_LOG_PARSER_ZERO_SHOT

LOG_DIR = config.LOG_DIR
RESULT_DIR = config.RESULT_DIR
if not os.path.exists(RESULT_DIR):
    os.makedirs(RESULT_DIR)

input_log_file = "HDFS_200_sampled.log"
log_path = os.path.join(LOG_DIR, input_log_file)
ground_truth_file_path = os.path.join(LOG_DIR, "HDFS_200_sampled_log_structured.csv")

DESIGN = "NA-zero"

model_name = llm_config["config_list"][0]["model"]
model = llm_config["config_list"][0]["model"].replace(":", "-")
temperature = llm_config["temperature"] 
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
project_name = DESIGN.capitalize()
exp_name = f"{project_name}_{model}_{timestamp}"

In [4]:
# --- Ollama Query Function ---
def ask_ollama(model, prompt):
    try:
        response = ollama.generate(
            model=model,
            prompt=prompt,
            options={'temperature': temperature}
        )
    except ollama.ResponseError as e:
        print('Error:', e.error)
        return None
    return response.get('response', None)

# --- Read Log Messages ---
logs = read_log_messages(log_path)

With CodeCarbon

In [5]:
# --- With CodeCarbon ---
def run_inference_with_emissions(logs, model_name, prompt_prefix, task, exp_name, result_dir):
    parsed_templates = []
    tracker = EmissionsTracker(project_name=exp_name, output_dir=result_dir, save_to_file=True)
    tracker.start()
    try:
        for i, log_message in enumerate(logs):
            #print(f"Processing log {i+1}/{len(logs)}")
            prompt = prompt_prefix + task + log_message
            response = ask_ollama(model_name, prompt)
            if response is not None:
                parsed_templates.append(response)
                
            else:
                print(f"[Warning] Skipped log {i} â€” no response or invalid format.")
    finally:
        emissions = tracker.stop()
    print(f"Emissions: {emissions} kg CO2")
    return parsed_templates


In [6]:
proc = start_ollama_server()
time.sleep(5) # Give it some time to initialize
parsed_templates = run_inference_with_emissions(logs, model_name, sys_prompt_zero_shot_single_log_parser, task, exp_name, RESULT_DIR)
save_templates(parsed_templates, llm_config, DESIGN, RESULT_DIR)
stop_ollama_server(proc)

Starting Ollama server...


[codecarbon INFO @ 17:37:44] [setup] RAM Tracking...
[codecarbon INFO @ 17:37:44] [setup] CPU Tracking...
 Mac OS and ARM processor detected: Please enable PowerMetrics sudo to measure CPU

[codecarbon INFO @ 17:37:44] CPU Model on constant consumption mode: Apple M3 Max
[codecarbon INFO @ 17:37:44] [setup] GPU Tracking...
[codecarbon INFO @ 17:37:44] No GPU found.
[codecarbon INFO @ 17:37:44] The below tracking methods have been set up:
                RAM Tracking Method: RAM power estimation model
                CPU Tracking Method: global constant
                GPU Tracking Method: Unspecified
            
[codecarbon INFO @ 17:37:44] >>> Tracker's metadata:
[codecarbon INFO @ 17:37:44]   Platform system: macOS-15.6.1-arm64-arm-64bit
[codecarbon INFO @ 17:37:44]   Python version: 3.12.11
[codecarbon INFO @ 17:37:44]   CodeCarbon version: 3.0.4
[codecarbon INFO @ 17:37:44]   Available RAM : 128.000 GB
[codecarbon INFO @ 17:37:44]   CPU count: 16 thread(s) in 1 physical CPU(s)
[co

Emissions: 0.0009330192790238688 kg CO2
Saved 200 raw and 200 normalized templates.
Stopping Ollama server...
Error stopping Ollama server: [Errno 3] No such process


In [7]:
# --- Print Results ---
#print("Templates:", parsed_templates)


Evaluate Accuracy

In [8]:
# using normalize_template
results = evaluate_and_save(normalize_template, parsed_templates, ground_truth_file_path, exp_name)
# using normalize_template_v1
results_v1 = evaluate_and_save(normalize_template_v1, parsed_templates, ground_truth_file_path, exp_name)
# using normalize_template_v2
results_v2 = evaluate_and_save(normalize_template_v2, parsed_templates, ground_truth_file_path, exp_name)

Log Line 1:
  Parsed:    Receiving block <blk_id> src: <src_ip>:<src_port> dest: <dest_ip>:<dest_port>
  Ground:    Receiving block <*> src: <*>:<*> dest: <*>:<*>
  Edit Dist: 36
  LCS:       41
--------------------------------------------------
Log Line 2:
  Parsed:    BLOCK* NameSystem.addStoredBlock: blockMap updated: <IP>:<PORT> is added to <BLOCK_ID> size <SIZE>
  Ground:    BLOCK* NameSystem.addStoredBlock: blockMap updated: <*>:<*> is added to <*> size <*>
  Edit Dist: 18
  LCS:       80
--------------------------------------------------
Log Line 3:
  Parsed:    BLOCK* NameSystem.addStoredBlock: blockMap updated: <IP>:<PORT> is added to blk_<BLOCK_ID> size <SIZE>
  Ground:    BLOCK* NameSystem.addStoredBlock: blockMap updated: <*>:<*> is added to <*> size <*>
  Edit Dist: 22
  LCS:       80
--------------------------------------------------
Log Line 4:
  Parsed:    BLOCK\* NameSystem\.delete: <blk_id> is added to invalidSet of <ip>:<port>
  Ground:    BLOCK* NameSystem.delete: <

In [9]:
#print("Results:", results)