# Imports

In [32]:
import sys
from pathlib import Path

sys.path.insert(0, str(Path.cwd().parent))

In [35]:
from src.data_prep import load_and_clean, build_avg_dataset,build_yearly_dataset
import pandas as pd

# Data Prep

In [36]:
# AVG dataset
df = pd.read_csv("../data/avtopati.csv")
df_avg = build_avg_dataset(df)
# Yearly dataset
yearly_dfs = {year: build_yearly_dataset(df, year) for year in range(2014, 2024 )}


In [37]:
df_avg.to_csv('../data/avtopati_avg.csv', index=False)
for year, df_year in yearly_dfs.items():
    out_path = f"../data/avtopati_{year}.csv"
    df_year.to_csv(out_path, index=False)

# Feature Importance yearly datasets

In [38]:


from src.data_prep import load_and_clean_year
from src.calc_importance import compute_importance_precise
import os

os.makedirs("outputs", exist_ok=True)

years = range(2014, 2025)

for year in years:
    csv_path = f"../data/avtopati_{year}.csv"
    target_col = f"Wi_{year}"

    try:
        X, y = load_and_clean_year(csv_path, target_col=target_col)
    except FileNotFoundError:
        print(f"[SKIP] {csv_path} not found")
        continue


    print(f"\n=== {year} ===")
    imp = compute_importance_precise(X, y)
    out_path = f"outputs/feature_importance_{year}.csv"
    imp.to_csv(out_path, index=False)
    print(f"Saved feature importances to {out_path}")



=== 2014 ===
CV R²: 0.055 ± 0.307
Saved feature importances to outputs/feature_importance_2014.csv

=== 2015 ===
CV R²: -0.313 ± 0.559
Saved feature importances to outputs/feature_importance_2015.csv

=== 2016 ===
CV R²: -0.132 ± 0.603
Saved feature importances to outputs/feature_importance_2016.csv

=== 2017 ===
CV R²: -0.021 ± 0.107
Saved feature importances to outputs/feature_importance_2017.csv

=== 2018 ===
CV R²: -0.527 ± 0.893
Saved feature importances to outputs/feature_importance_2018.csv

=== 2019 ===
CV R²: -0.010 ± 0.343
Saved feature importances to outputs/feature_importance_2019.csv

=== 2020 ===
CV R²: -0.445 ± 0.589
Saved feature importances to outputs/feature_importance_2020.csv

=== 2021 ===
CV R²: -0.198 ± 0.424
Saved feature importances to outputs/feature_importance_2021.csv

=== 2022 ===
CV R²: -0.812 ± 1.910
Saved feature importances to outputs/feature_importance_2022.csv

=== 2023 ===
CV R²: -0.247 ± 1.076
Saved feature importances to outputs/feature_importance_

# Feature importance - Average 10 year

In [39]:
csv_path = f"../data/avtopati_avg.csv"
target_col = f"Wi_AVG"

try:
    X, y = load_and_clean_year(csv_path, target_col=target_col)
except FileNotFoundError:
    print(f"[SKIP] {csv_path} not found")
    

imp = compute_importance_precise(X, y)
out_path = f"outputs/feature_importance_avg.csv"
imp.to_csv(out_path, index=False)
print(f"Saved feature importances to {out_path}")

CV R²: 0.029 ± 0.375
Saved feature importances to outputs/feature_importance_avg.csv


# Export output

In [None]:
import json

os.makedirs("outputs/json", exist_ok=True)

result = {}
years = range(2014, 2025)
for year in years:
    file_path = f"outputs/feature_importance_{year}.csv"
    if os.path.exists(file_path):
        df = pd.read_csv(file_path)

        properties = dict(zip(df[df.columns[0]], df[df.columns[1]]))
        result[year] = properties

avg_path = "outputs/feature_importance_avg.csv"
if os.path.exists(avg_path):
    df_avg = pd.read_csv(avg_path)
    average = dict(zip(df_avg[df_avg.columns[0]], df_avg[df_avg.columns[1]]))
    result["average"] = average

json_output = json.dumps(result, indent=4)
out_path = "outputs/json/feature_importances.json"
with open(out_path, "w") as f:
    f.write(json_output)
print(f"Saved JSON to {out_path}")


Saved JSON to outputs/json/feature_importances.json
