In [1]:
import os
import subprocess
from tqdm import tqdm

# Read the template configuration
def read_config_template(template_path):
    with open(template_path, 'r') as file:
        return file.readlines()

# Modify the configuration
def modify_config(template_lines, changes):
    modified_lines = []
    for line in template_lines:
        for key, value in changes.items():
            if key in line:
                line = f"{key}: {value},\n"
        modified_lines.append(line)
    return modified_lines

# Write the modified configuration
def write_config(output_path, modified_lines):
    with open(output_path, 'w') as file:
        file.writelines(modified_lines)

# Automate training and log handling
def automate_training(changes, log_name):
    # Read and modify the config
    template_lines = read_config_template("config_template.py")
    modified_lines = modify_config(template_lines, changes)
    write_config("config.py", modified_lines)

    # Run the training script
    print(f"Starting training for {log_name}...")
    subprocess.run(["python", "train.py"], check=True)

    # Rename the log file
    log_path = os.path.join('eval/logs', 'training_log.log')
    new_log_path = os.path.join('eval/logs', log_name)
    if os.path.exists(log_path):
        os.rename(log_path, new_log_path)
        print(f"Log file renamed to {new_log_path}")
    else:
        print(f"Log file not found for {log_name}. Check if training completed successfully.")



In [11]:

# Example list of models and changes
models = [
    {"log_name": "default.log", "changes": {}},
    {"log_name": "cnn-s.log", "changes": {"'channels'": [8, 16, 32]}},
    {"log_name": "cnn-l.log", "changes": {"'channels'": [32, 64, 128]}},
    {"log_name": "roi21.log", "changes": {"'roi_size'": 21}},
    {"log_name": "roi31.log", "changes": {"'roi_size'": 31}},
    {"log_name": "roi41.log", "changes": {"'roi_size'": 41}},
    {"log_name": "gat_depth-6.log", "changes": {"'depth_gat'": 6}},
    {"log_name": "gat_depth-7.log", "changes": {"'depth_gat'": 7}},
    {"log_name": "gat_depth-8.log", "changes": {"'depth_gat'": 8}},
    {"log_name": "gat_depth-4.log", "changes": {"'depth_gat'": 4}},
    {"log_name": "gat_depth-3.log", "changes": {"'depth_gat'": 3}},
    {"log_name": "gat_depth-2.log", "changes": {"'depth_gat'": 2}},
    {"log_name": "gat_depth-1.log", "changes": {"'depth_gat'": 1}},
    {"log_name": "gat_depth-0.log", "changes": {"'depth_gat'": 0}},
    {"log_name": "num_heads-5.log", "changes": {"'num_heads'": 5}},
    {"log_name": "num_heads-4.log", "changes": {"'num_heads'": 4}},
    {"log_name": "num_heads-2.log", "changes": {"'num_heads'": 2}},
    {"log_name": "num_heads-1.log", "changes": {"'num_heads'": 1}},
    {"log_name": "8d.log", "changes": {"'hiddens'": [8], "'output'": [8], "'output_dense'": 8, "'input_gat'": 8, "'output_gat'": 8}},
    {"log_name": "32d.log", "changes": {"'hiddens'": [32], "'output'": [32], "'output_dense'": 32, "'input_gat'": 32, "'output_gat'": 32}},
    {"log_name": "64d.log", "changes": {"'hiddens'": [64], "'output'": [64], "'output_dense'": 64, "'input_gat'": 64, "'output_gat'": 64}},
    {"log_name": "128d.log", "changes": {"'hiddens'": [128], "'output'": [128], "'output_dense'": 128, "'input_gat'": 128, "'output_gat'": 128}},
    {"log_name": "c-pop_only.log", "changes": {"'employment'": [], "'landuse_poi'": []}},
    {"log_name": "c-emp_only.log", "changes": {"'population'": [], "'landuse_poi'": []}},
    {"log_name": "c-pop_emp_only.log", "changes": {"'landuse_poi'": []}},
    {"log_name": "c-landuse_poi_only.log", "changes": {"'population'": [], "'employment'": []}},
    {"log_name": "c-lv1strata.log", "changes": {"'population'": list(range(8)), "'employment'": list(range(18,40)), "'landuse_poi'": []}},
    {"log_name": "c-lv2strata.log", "changes": {"'population'": [0] + list(range(8,18)), "'employment'": [18] + list(range(40,94)), "'landuse_poi'": []}},
    {"log_name": "c-lv1strata+landusepoi.log", "changes": {"'population'": list(range(8)), "'employment'": list(range(18,40))}},
    {"log_name": "c-lv2strata+landusepoi.log", "changes": {"'population'": [0] + list(range(8,18)), "'employment'": [18] + list(range(40,94))}}
]


In [2]:

# Example list of models and changes
models = [
    # {"log_name": "best-no-emp.log", "changes": {"'employment'": [], "'epoch'": 26, "'step'":6}},
    # {"log_name": "best-no-pop.log", "changes": {"'population'": [], "'epoch'": 21, "'step'":5}},
    # {"log_name": "best-no-land_use.log", "changes": {"'landuse_poi'": list(range(4,12)), "'epoch'": 37, "'step'":7}},
    # {"log_name": "best-no-road_network.log", "changes": {"'landuse_poi'": list(range(4))+list(range(7,12)), "'epoch'": 21, "'step'":2}},
    # {"log_name": "best-no-poi.log", "changes": {"'landuse_poi'": list(range(7)), "'epoch'": 29, "'step'":5}},
    # {"log_name": "best-no-edge_features.log", "changes": {"'model'": "'model_no_edge_features'", "'epoch'": 42, "'step'":1}},
    # {"log_name": "c-pop_only2.log", "changes": {"'employment'": [], "'landuse_poi'": []}},
    # {"log_name": "c-emp_only2.log", "changes": {"'population'": [], "'landuse_poi'": []}},
    # {"log_name": "c-pop_emp_only2.log", "changes": {"'landuse_poi'": []}},
    {"log_name": "c-poplv1.log", "changes": {"'population'": list(range(1,8)), "'employment'": [], "'landuse_poi'": []}},
    # {"log_name": "c-poplv2.log", "changes": {"'population'": list(range(8,18)), "'employment'": [], "'landuse_poi'": []}},
    {"log_name": "c-emplv1.log", "changes": {"'population'": [], "'employment'": list(range(19,40)), "'landuse_poi'": []}},
    # {"log_name": "c-emplv2.log", "changes": {"'population'": [], "'employment'": list(range(40,94)), "'landuse_poi'": []}},
    {"log_name": "c-pop-emp-lv1.log", "changes": {"'population'": list(range(1,8)), "'employment'": list(range(19,40)), "'landuse_poi'": []}},
    # {"log_name": "c-pop-emp-lv2.log", "changes": {"'population'": list(range(8,18)), "'employment'": list(range(40,94)), "'landuse_poi'": []}},
]


In [3]:

for model in tqdm(models, desc="Training Models"):
    log_name = model["log_name"]
    changes = model["changes"]
    automate_training(changes, log_name)

Training Models:   0%|          | 0/3 [00:00<?, ?it/s]

Starting training for c-poplv1.log...


Training Models:  33%|███▎      | 1/3 [1:34:13<3:08:27, 5653.99s/it]

Log file renamed to eval/logs\c-poplv1.log
Starting training for c-emplv1.log...


Training Models:  67%|██████▋   | 2/3 [3:10:19<1:35:19, 5719.39s/it]

Log file renamed to eval/logs\c-emplv1.log
Starting training for c-pop-emp-lv1.log...


Training Models: 100%|██████████| 3/3 [4:47:22<00:00, 5747.36s/it]  

Log file renamed to eval/logs\c-pop-emp-lv1.log





In [22]:
from eval.evaluate_log import extract_metrics
import pandas as pd
import os

def update_results_csv(results_csv, log_name, min_valid_geh, min_valid_mae, changes, min_valid_geh_epoch, min_valid_geh_step):
    # Check if results CSV exists
    if os.path.exists(results_csv):
        results_df = pd.read_csv(results_csv)
    else:
        # Initialize with relevant columns
        results_df = pd.DataFrame(columns=["log_name", "min_valid_geh", "min_valid_mae", "changes", "min_valid_geh_epoch", "min_valid_geh_step"])
    
    # Add or update model results
    results_df = results_df[results_df['log_name'] != log_name]
    results_df = pd.concat([results_df, pd.DataFrame([{
        "log_name": log_name,
        "min_valid_geh": min_valid_geh,
        "min_valid_mae": min_valid_mae,
        "changes": str(changes),
        "min_valid_geh_epoch": min_valid_geh_epoch,
        "min_valid_geh_step": min_valid_geh_step  # Store changes as a string for reference
    }])], ignore_index=True)

    # Save to CSV
    results_df.to_csv(results_csv, index=False)

# Path to results CSV
results_csv = "validation_results.csv"

# Loop through each model in the dictionary
for model in models:
    log_name = model["log_name"]
    changes = model["changes"]

    if os.path.exists("eval/logs/"+log_name):  # Assuming log is named correctly during the run
        df = extract_metrics("eval/logs/"+log_name)
        # df = df[df['step'] == 7] # only for complete epoch
        min_valid_geh = df['valid_geh'].min()
        min_valid_mae = df['valid_mae'].min()
        min_valid_geh_epoch = df.loc[df['valid_geh'].idxmin(), 'epoch']
        min_valid_geh_step = df.loc[df['valid_geh'].idxmin(), 'step']
        print(f"Log: {log_name}, Min Valid GEH: {min_valid_geh}, Min Valid MAE: {min_valid_mae}")

        # Update results CSV
        update_results_csv(results_csv, log_name, min_valid_geh, min_valid_mae, changes, min_valid_geh_epoch, min_valid_geh_step)
    else:
        print(f"Log file {log_name} not found. Make sure the training has generated the log.")

Log: best-no-emp.log, Min Valid GEH: 50.02, Min Valid MAE: 8627.86
Log: best-no-pop.log, Min Valid GEH: 52.05, Min Valid MAE: 9337.73
Log: best-no-land_use.log, Min Valid GEH: 55.05, Min Valid MAE: 9597.14
Log: best-no-road_network.log, Min Valid GEH: 51.74, Min Valid MAE: 9081.86
Log: best-no-poi.log, Min Valid GEH: 53.17, Min Valid MAE: 9085.83
Log: best-no-edge_features.log, Min Valid GEH: 52.8, Min Valid MAE: 9312.38


  results_df = pd.concat([results_df, pd.DataFrame([{
