In [None]:
import sys
from pathlib import Path
import pandas as pd
rootPath = Path.cwd().parent
sys.path.append(str(rootPath))
from src.timeseries_processing.improved_feature_groups import feature_groups
df = pd.read_csv(f"{rootPath}/data/delay_analysis_improved_v2.csv")

In [None]:
feature_columns = []
for group in feature_groups.values():
    feature_columns.extend(group)
feature_columns = list(set(feature_columns))

In [None]:
df = df[feature_columns]
df.head()

In [None]:
# 数値型の列を取得
numeric_cols = df.select_dtypes(include=['int64', 'float64']).columns
print(f"数値型の列数: {len(numeric_cols)}")
print(f"\n数値型の列:\n{list(numeric_cols)}")

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# 相関行列を計算
correlation_matrix = df[numeric_cols].corr()

# ヒートマップで可視化
plt.figure(figsize=(20, 16))
sns.heatmap(correlation_matrix, annot=False, cmap='coolwarm', center=0, 
            square=True, linewidths=0.5, cbar_kws={"shrink": 0.8})
plt.title('数値型列の相関行列', fontsize=16, pad=20)
plt.tight_layout()
plt.show()

In [None]:
# 高い相関を持つペアを表示
high_corr = []
criteria = 0.5
for i in range(len(correlation_matrix.columns)):
    for j in range(i+1, len(correlation_matrix.columns)):
        if abs(correlation_matrix.iloc[i, j]) >= criteria:
            high_corr.append({
                '列1': correlation_matrix.columns[i],
                '列2': correlation_matrix.columns[j],
                '相関係数': correlation_matrix.iloc[i, j]
            })

high_corr_df = pd.DataFrame(high_corr).sort_values('相関係数', ascending=False, key=abs)
print(f"\n高い相関(|r| >= {criteria})を持つペア: {len(high_corr_df)}個")
high_corr_df